scheme-sdk 0.3.6__tar.gz → 0.3.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: scheme_sdk
3
- Version: 0.3.6
3
+ Version: 0.3.8
4
4
  Summary: The Scheme SDK provides connectors for ingesting conversations, messages, and files across communication platforms.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -203,6 +203,7 @@ License: Apache License
203
203
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
204
204
  See the License for the specific language governing permissions and
205
205
  limitations under the License.
206
+ Requires-Dist: html-sanitizer>=2.6.0
206
207
  Requires-Dist: requests>=2.32.5
207
208
  Requires-Python: >=3.11
208
209
  Project-URL: Homepage, https://www.schemebig.com/
@@ -4,12 +4,13 @@ build-backend = "uv_build"
4
4
 
5
5
  [project]
6
6
  name = "scheme_sdk"
7
- version = "0.3.6"
7
+ version = "0.3.8"
8
8
  description = "The Scheme SDK provides connectors for ingesting conversations, messages, and files across communication platforms."
9
9
  readme = "README.md"
10
10
  license = { file = "LICENSE" }
11
11
  requires-python = ">=3.11"
12
12
  dependencies = [
13
+ "html-sanitizer>=2.6.0",
13
14
  "requests>=2.32.5",
14
15
  ]
15
16
 
@@ -13,12 +13,17 @@ from .errors import (
13
13
  ConnectorRateLimitError,
14
14
  )
15
15
  from .message import MessageConnector
16
+ from .storage import FileContent, FileMetadata, FolderMetadata, StorageConnector
16
17
 
17
18
  __all__ = [
18
19
  # Base classes
19
20
  "BaseConnector",
20
21
  "MessageConnector",
22
+ "StorageConnector",
21
23
  "ConnectorContext",
24
+ "FileMetadata",
25
+ "FolderMetadata",
26
+ "FileContent",
22
27
  # Exceptions
23
28
  "ConnectorError",
24
29
  "ConnectorAuthError",
@@ -2,6 +2,7 @@ from abc import ABC, abstractmethod
2
2
  from datetime import datetime
3
3
  from dataclasses import dataclass, field
4
4
  from typing import Any, Dict, Iterable, Optional
5
+ from html_sanitizer import Sanitizer
5
6
 
6
7
  from .base import BaseConnector
7
8
 
@@ -11,6 +12,38 @@ class MessageConnector(BaseConnector, ABC):
11
12
  Abstract base class for all message connectors.
12
13
  """
13
14
 
15
+ _sanitizer_settings = {
16
+ "tags": {
17
+ "a",
18
+ "strong",
19
+ "em",
20
+ "p",
21
+ "ul",
22
+ "ol",
23
+ "li",
24
+ "br",
25
+ "blockquote",
26
+ "code",
27
+ "pre",
28
+ "span",
29
+ "sub",
30
+ "sup",
31
+ "hr",
32
+ },
33
+ "attributes": {"a": {"href", "title", "rel", "target"}},
34
+ "empty": {"hr", "a", "br"},
35
+ "separate": {"a", "p", "li", "blockquote", "pre"},
36
+ "whitespace": {"br"},
37
+ "add_nofollow": False,
38
+ "autolink": False,
39
+ "keep_typographic_whitespace": False,
40
+ }
41
+
42
+ _sanitizer: Sanitizer
43
+
44
+ def __init__(self):
45
+ self._sanitizer = Sanitizer(settings=self._sanitizer_settings)
46
+
14
47
  @abstractmethod
15
48
  def fetch_conversations(self) -> Iterable[Dict[str, Any]]:
16
49
  """
@@ -85,6 +118,7 @@ class MessageConnector(BaseConnector, ABC):
85
118
  """
86
119
  ...
87
120
 
121
+ @abstractmethod
88
122
  def normalize_message(self, raw_message: Dict[str, Any]) -> Dict[str, Any]:
89
123
  """
90
124
  Transform a platform-specific message into canonical format.
@@ -100,6 +134,7 @@ class MessageConnector(BaseConnector, ABC):
100
134
  """
101
135
  ...
102
136
 
137
+ @abstractmethod
103
138
  def normalize_conversation(
104
139
  self, raw_conversation: Dict[str, Any]
105
140
  ) -> Dict[str, Any]:
@@ -134,6 +169,18 @@ class MessageConnector(BaseConnector, ABC):
134
169
  self._logger.warning(f"search_messages not implemented for {self.platform}")
135
170
  return iter([])
136
171
 
172
+ def _sanitize_html(self, html: str) -> str:
173
+ """
174
+ Sanitize HTML content.
175
+
176
+ Args:
177
+ html: HTML content to sanitize
178
+
179
+ Returns:
180
+ Sanitized HTML content
181
+ """
182
+ return self._sanitizer.sanitize(html)
183
+
137
184
 
138
185
  @dataclass
139
186
  class Conversation:
@@ -0,0 +1,159 @@
1
+ """
2
+ SeedVault Connector SDK - Storage Connector Interfaces
3
+
4
+ This module defines the base interface for storage connectors (e.g. Drive,
5
+ OneDrive, Dropbox). Storage connectors provide file and folder metadata,
6
+ search capabilities, and content export.
7
+ """
8
+
9
+ from abc import ABC, abstractmethod
10
+ from dataclasses import dataclass, field
11
+ from datetime import datetime
12
+ from typing import Any, Dict, Iterable, Optional
13
+
14
+ from .base import BaseConnector
15
+
16
+
17
+ class StorageConnector(BaseConnector, ABC):
18
+ """
19
+ Abstract base class for all storage connectors.
20
+ """
21
+
22
+ @abstractmethod
23
+ def list_root_items(self) -> Iterable[Dict[str, Any]]:
24
+ """
25
+ List items at the storage root.
26
+
27
+ Yields:
28
+ Dict: Raw file or folder objects from the platform API.
29
+ """
30
+ ...
31
+
32
+ @abstractmethod
33
+ def list_children(self, folder_id: str) -> Iterable[Dict[str, Any]]:
34
+ """
35
+ List child items within a folder.
36
+
37
+ Args:
38
+ folder_id: Unique identifier of the parent folder.
39
+
40
+ Yields:
41
+ Dict: Raw file or folder objects from the platform API.
42
+ """
43
+ ...
44
+
45
+ @abstractmethod
46
+ def get_item(self, item_id: str) -> Dict[str, Any]:
47
+ """
48
+ Fetch a single file or folder by ID.
49
+
50
+ Args:
51
+ item_id: Unique identifier of the item.
52
+
53
+ Returns:
54
+ Dict: Raw file or folder object from the platform API.
55
+ """
56
+ ...
57
+
58
+ @abstractmethod
59
+ def search_items(
60
+ self, query: str, folder_id: Optional[str] = None, limit: int = 100
61
+ ) -> Iterable[Dict[str, Any]]:
62
+ """
63
+ Search for files or folders by query string.
64
+
65
+ Args:
66
+ query: Search query (platform-specific syntax).
67
+ folder_id: Optional folder scope for the search.
68
+ limit: Maximum number of results.
69
+
70
+ Yields:
71
+ Dict: Raw file or folder objects from the platform API.
72
+ """
73
+ ...
74
+
75
+ @abstractmethod
76
+ def fetch_content(self, file_id: str) -> Dict[str, Any]:
77
+ """
78
+ Fetch file content by file ID.
79
+
80
+ Args:
81
+ file_id: Unique identifier of the file.
82
+
83
+ Returns:
84
+ Dict: Raw content payload from the platform API.
85
+ Expected keys may include:
86
+ - content_bytes: bytes
87
+ - content_text: str
88
+ - mime_type: str
89
+ - size: int
90
+ - checksum: str
91
+ - encoding: str
92
+ """
93
+ ...
94
+
95
+ @abstractmethod
96
+ def normalize_file(self, raw_file: Dict[str, Any]) -> Dict[str, Any]:
97
+ """
98
+ Transform a platform-specific file into canonical format.
99
+
100
+ Args:
101
+ raw_file: Raw file object from the platform API.
102
+
103
+ Returns:
104
+ Normalized file dictionary in canonical format.
105
+ """
106
+ ...
107
+
108
+ @abstractmethod
109
+ def normalize_folder(self, raw_folder: Dict[str, Any]) -> Dict[str, Any]:
110
+ """
111
+ Transform a platform-specific folder into canonical format.
112
+
113
+ Args:
114
+ raw_folder: Raw folder object from the platform API.
115
+
116
+ Returns:
117
+ Normalized folder dictionary in canonical format.
118
+ """
119
+ ...
120
+
121
+
122
+ @dataclass
123
+ class FileMetadata:
124
+ id: str
125
+ name: str
126
+ path: Optional[str] = None
127
+ mime_type: Optional[str] = None
128
+ size: Optional[int] = None
129
+ modified_at: Optional[datetime] = None
130
+ created_at: Optional[datetime] = None
131
+ checksum: Optional[str] = None
132
+ direct_link: Optional[str] = None
133
+ metadata: Dict[str, Any] = field(default_factory=dict)
134
+
135
+
136
+ @dataclass
137
+ class FolderMetadata:
138
+ id: str
139
+ name: str
140
+ path: Optional[str] = None
141
+ parent_id: Optional[str] = None
142
+ created_at: Optional[datetime] = None
143
+ modified_at: Optional[datetime] = None
144
+ item_count: Optional[int] = None
145
+ direct_link: Optional[str] = None
146
+ metadata: Dict[str, Any] = field(default_factory=dict)
147
+
148
+
149
+ @dataclass
150
+ class FileContent:
151
+ file_id: str
152
+ name: Optional[str] = None
153
+ mime_type: Optional[str] = None
154
+ size: Optional[int] = None
155
+ content_bytes: Optional[bytes] = None
156
+ content_text: Optional[str] = None
157
+ checksum: Optional[str] = None
158
+ encoding: Optional[str] = None
159
+ metadata: Dict[str, Any] = field(default_factory=dict)
@@ -4,6 +4,12 @@ from typing import Any, Dict, List, Optional
4
4
 
5
5
  import requests
6
6
 
7
+ from pprint import pprint as pp
8
+ from dotenv import load_dotenv
9
+ import os
10
+
11
+ load_dotenv()
12
+
7
13
  from .base import MessageConnector
8
14
 
9
15
 
@@ -14,6 +20,7 @@ class OutlookConnector(MessageConnector):
14
20
  _backoff_cap_seconds = 30
15
21
 
16
22
  def __init__(self, token: str):
23
+ super().__init__()
17
24
  self.token = token
18
25
  self.base = "https://graph.microsoft.com/v1.0"
19
26
 
@@ -118,7 +125,7 @@ class OutlookConnector(MessageConnector):
118
125
  return {
119
126
  "title": message["subject"],
120
127
  "platform": self.platform,
121
- "text": message["body"]["content"],
128
+ "text": self._sanitize_html(message["body"]["content"]),
122
129
  "direct_link": message["webLink"],
123
130
  "metadata": {
124
131
  "platform_conversation_id": message["conversationId"],
@@ -212,3 +219,11 @@ class OutlookConnector(MessageConnector):
212
219
  next_params = None # nextLink already includes the query string
213
220
 
214
221
  return items
222
+
223
+
224
+ if __name__ == "__main__":
225
+ connector = OutlookConnector(os.getenv("OUTLOOK_TOKEN"))
226
+ conversations = connector.fetch_conversations(top=1)
227
+ pp(conversations)
228
+ messages = connector.fetch_messages(conversations[0]["id"])
229
+ pp(messages)
File without changes
File without changes