scheme-sdk 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scheme_sdk/connectors/base/__init__.py +5 -0
- scheme_sdk/connectors/base/message.py +47 -0
- scheme_sdk/connectors/base/storage.py +159 -0
- scheme_sdk/connectors/outlook.py +16 -1
- {scheme_sdk-0.3.6.dist-info → scheme_sdk-0.3.8.dist-info}/METADATA +2 -1
- {scheme_sdk-0.3.6.dist-info → scheme_sdk-0.3.8.dist-info}/RECORD +7 -6
- {scheme_sdk-0.3.6.dist-info → scheme_sdk-0.3.8.dist-info}/WHEEL +1 -1
|
@@ -13,12 +13,17 @@ from .errors import (
|
|
|
13
13
|
ConnectorRateLimitError,
|
|
14
14
|
)
|
|
15
15
|
from .message import MessageConnector
|
|
16
|
+
from .storage import FileContent, FileMetadata, FolderMetadata, StorageConnector
|
|
16
17
|
|
|
17
18
|
__all__ = [
|
|
18
19
|
# Base classes
|
|
19
20
|
"BaseConnector",
|
|
20
21
|
"MessageConnector",
|
|
22
|
+
"StorageConnector",
|
|
21
23
|
"ConnectorContext",
|
|
24
|
+
"FileMetadata",
|
|
25
|
+
"FolderMetadata",
|
|
26
|
+
"FileContent",
|
|
22
27
|
# Exceptions
|
|
23
28
|
"ConnectorError",
|
|
24
29
|
"ConnectorAuthError",
|
|
@@ -2,6 +2,7 @@ from abc import ABC, abstractmethod
|
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from dataclasses import dataclass, field
|
|
4
4
|
from typing import Any, Dict, Iterable, Optional
|
|
5
|
+
from html_sanitizer import Sanitizer
|
|
5
6
|
|
|
6
7
|
from .base import BaseConnector
|
|
7
8
|
|
|
@@ -11,6 +12,38 @@ class MessageConnector(BaseConnector, ABC):
|
|
|
11
12
|
Abstract base class for all message connectors.
|
|
12
13
|
"""
|
|
13
14
|
|
|
15
|
+
_sanitizer_settings = {
|
|
16
|
+
"tags": {
|
|
17
|
+
"a",
|
|
18
|
+
"strong",
|
|
19
|
+
"em",
|
|
20
|
+
"p",
|
|
21
|
+
"ul",
|
|
22
|
+
"ol",
|
|
23
|
+
"li",
|
|
24
|
+
"br",
|
|
25
|
+
"blockquote",
|
|
26
|
+
"code",
|
|
27
|
+
"pre",
|
|
28
|
+
"span",
|
|
29
|
+
"sub",
|
|
30
|
+
"sup",
|
|
31
|
+
"hr",
|
|
32
|
+
},
|
|
33
|
+
"attributes": {"a": {"href", "title", "rel", "target"}},
|
|
34
|
+
"empty": {"hr", "a", "br"},
|
|
35
|
+
"separate": {"a", "p", "li", "blockquote", "pre"},
|
|
36
|
+
"whitespace": {"br"},
|
|
37
|
+
"add_nofollow": False,
|
|
38
|
+
"autolink": False,
|
|
39
|
+
"keep_typographic_whitespace": False,
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
_sanitizer: Sanitizer
|
|
43
|
+
|
|
44
|
+
def __init__(self):
|
|
45
|
+
self._sanitizer = Sanitizer(settings=self._sanitizer_settings)
|
|
46
|
+
|
|
14
47
|
@abstractmethod
|
|
15
48
|
def fetch_conversations(self) -> Iterable[Dict[str, Any]]:
|
|
16
49
|
"""
|
|
@@ -85,6 +118,7 @@ class MessageConnector(BaseConnector, ABC):
|
|
|
85
118
|
"""
|
|
86
119
|
...
|
|
87
120
|
|
|
121
|
+
@abstractmethod
|
|
88
122
|
def normalize_message(self, raw_message: Dict[str, Any]) -> Dict[str, Any]:
|
|
89
123
|
"""
|
|
90
124
|
Transform a platform-specific message into canonical format.
|
|
@@ -100,6 +134,7 @@ class MessageConnector(BaseConnector, ABC):
|
|
|
100
134
|
"""
|
|
101
135
|
...
|
|
102
136
|
|
|
137
|
+
@abstractmethod
|
|
103
138
|
def normalize_conversation(
|
|
104
139
|
self, raw_conversation: Dict[str, Any]
|
|
105
140
|
) -> Dict[str, Any]:
|
|
@@ -134,6 +169,18 @@ class MessageConnector(BaseConnector, ABC):
|
|
|
134
169
|
self._logger.warning(f"search_messages not implemented for {self.platform}")
|
|
135
170
|
return iter([])
|
|
136
171
|
|
|
172
|
+
def _sanitize_html(self, html: str) -> str:
|
|
173
|
+
"""
|
|
174
|
+
Sanitize HTML content.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
html: HTML content to sanitize
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
Sanitized HTML content
|
|
181
|
+
"""
|
|
182
|
+
return self._sanitizer.sanitize(html)
|
|
183
|
+
|
|
137
184
|
|
|
138
185
|
@dataclass
|
|
139
186
|
class Conversation:
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SeedVault Connector SDK - Storage Connector Interfaces
|
|
3
|
+
|
|
4
|
+
This module defines the base interface for storage connectors (e.g. Drive,
|
|
5
|
+
OneDrive, Dropbox). Storage connectors provide file and folder metadata,
|
|
6
|
+
search capabilities, and content export.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from abc import ABC, abstractmethod
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from typing import Any, Dict, Iterable, Optional
|
|
13
|
+
|
|
14
|
+
from .base import BaseConnector
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class StorageConnector(BaseConnector, ABC):
|
|
18
|
+
"""
|
|
19
|
+
Abstract base class for all storage connectors.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def list_root_items(self) -> Iterable[Dict[str, Any]]:
|
|
24
|
+
"""
|
|
25
|
+
List items at the storage root.
|
|
26
|
+
|
|
27
|
+
Yields:
|
|
28
|
+
Dict: Raw file or folder objects from the platform API.
|
|
29
|
+
"""
|
|
30
|
+
...
|
|
31
|
+
|
|
32
|
+
@abstractmethod
|
|
33
|
+
def list_children(self, folder_id: str) -> Iterable[Dict[str, Any]]:
|
|
34
|
+
"""
|
|
35
|
+
List child items within a folder.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
folder_id: Unique identifier of the parent folder.
|
|
39
|
+
|
|
40
|
+
Yields:
|
|
41
|
+
Dict: Raw file or folder objects from the platform API.
|
|
42
|
+
"""
|
|
43
|
+
...
|
|
44
|
+
|
|
45
|
+
@abstractmethod
|
|
46
|
+
def get_item(self, item_id: str) -> Dict[str, Any]:
|
|
47
|
+
"""
|
|
48
|
+
Fetch a single file or folder by ID.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
item_id: Unique identifier of the item.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
Dict: Raw file or folder object from the platform API.
|
|
55
|
+
"""
|
|
56
|
+
...
|
|
57
|
+
|
|
58
|
+
@abstractmethod
|
|
59
|
+
def search_items(
|
|
60
|
+
self, query: str, folder_id: Optional[str] = None, limit: int = 100
|
|
61
|
+
) -> Iterable[Dict[str, Any]]:
|
|
62
|
+
"""
|
|
63
|
+
Search for files or folders by query string.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
query: Search query (platform-specific syntax).
|
|
67
|
+
folder_id: Optional folder scope for the search.
|
|
68
|
+
limit: Maximum number of results.
|
|
69
|
+
|
|
70
|
+
Yields:
|
|
71
|
+
Dict: Raw file or folder objects from the platform API.
|
|
72
|
+
"""
|
|
73
|
+
...
|
|
74
|
+
|
|
75
|
+
@abstractmethod
|
|
76
|
+
def fetch_content(self, file_id: str) -> Dict[str, Any]:
|
|
77
|
+
"""
|
|
78
|
+
Fetch file content by file ID.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
file_id: Unique identifier of the file.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Dict: Raw content payload from the platform API.
|
|
85
|
+
Expected keys may include:
|
|
86
|
+
- content_bytes: bytes
|
|
87
|
+
- content_text: str
|
|
88
|
+
- mime_type: str
|
|
89
|
+
- size: int
|
|
90
|
+
- checksum: str
|
|
91
|
+
- encoding: str
|
|
92
|
+
"""
|
|
93
|
+
...
|
|
94
|
+
|
|
95
|
+
@abstractmethod
|
|
96
|
+
def normalize_file(self, raw_file: Dict[str, Any]) -> Dict[str, Any]:
|
|
97
|
+
"""
|
|
98
|
+
Transform a platform-specific file into canonical format.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
raw_file: Raw file object from the platform API.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Normalized file dictionary in canonical format.
|
|
105
|
+
"""
|
|
106
|
+
...
|
|
107
|
+
|
|
108
|
+
@abstractmethod
|
|
109
|
+
def normalize_folder(self, raw_folder: Dict[str, Any]) -> Dict[str, Any]:
|
|
110
|
+
"""
|
|
111
|
+
Transform a platform-specific folder into canonical format.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
raw_folder: Raw folder object from the platform API.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Normalized folder dictionary in canonical format.
|
|
118
|
+
"""
|
|
119
|
+
...
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@dataclass
|
|
123
|
+
class FileMetadata:
|
|
124
|
+
id: str
|
|
125
|
+
name: str
|
|
126
|
+
path: Optional[str] = None
|
|
127
|
+
mime_type: Optional[str] = None
|
|
128
|
+
size: Optional[int] = None
|
|
129
|
+
modified_at: Optional[datetime] = None
|
|
130
|
+
created_at: Optional[datetime] = None
|
|
131
|
+
checksum: Optional[str] = None
|
|
132
|
+
direct_link: Optional[str] = None
|
|
133
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
@dataclass
|
|
137
|
+
class FolderMetadata:
|
|
138
|
+
id: str
|
|
139
|
+
name: str
|
|
140
|
+
path: Optional[str] = None
|
|
141
|
+
parent_id: Optional[str] = None
|
|
142
|
+
created_at: Optional[datetime] = None
|
|
143
|
+
modified_at: Optional[datetime] = None
|
|
144
|
+
item_count: Optional[int] = None
|
|
145
|
+
direct_link: Optional[str] = None
|
|
146
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@dataclass
|
|
150
|
+
class FileContent:
|
|
151
|
+
file_id: str
|
|
152
|
+
name: Optional[str] = None
|
|
153
|
+
mime_type: Optional[str] = None
|
|
154
|
+
size: Optional[int] = None
|
|
155
|
+
content_bytes: Optional[bytes] = None
|
|
156
|
+
content_text: Optional[str] = None
|
|
157
|
+
checksum: Optional[str] = None
|
|
158
|
+
encoding: Optional[str] = None
|
|
159
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
scheme_sdk/connectors/outlook.py
CHANGED
|
@@ -4,6 +4,12 @@ from typing import Any, Dict, List, Optional
|
|
|
4
4
|
|
|
5
5
|
import requests
|
|
6
6
|
|
|
7
|
+
from pprint import pprint as pp
|
|
8
|
+
from dotenv import load_dotenv
|
|
9
|
+
import os
|
|
10
|
+
|
|
11
|
+
load_dotenv()
|
|
12
|
+
|
|
7
13
|
from .base import MessageConnector
|
|
8
14
|
|
|
9
15
|
|
|
@@ -14,6 +20,7 @@ class OutlookConnector(MessageConnector):
|
|
|
14
20
|
_backoff_cap_seconds = 30
|
|
15
21
|
|
|
16
22
|
def __init__(self, token: str):
|
|
23
|
+
super().__init__()
|
|
17
24
|
self.token = token
|
|
18
25
|
self.base = "https://graph.microsoft.com/v1.0"
|
|
19
26
|
|
|
@@ -118,7 +125,7 @@ class OutlookConnector(MessageConnector):
|
|
|
118
125
|
return {
|
|
119
126
|
"title": message["subject"],
|
|
120
127
|
"platform": self.platform,
|
|
121
|
-
"text": message["body"]["content"],
|
|
128
|
+
"text": self._sanitize_html(message["body"]["content"]),
|
|
122
129
|
"direct_link": message["webLink"],
|
|
123
130
|
"metadata": {
|
|
124
131
|
"platform_conversation_id": message["conversationId"],
|
|
@@ -212,3 +219,11 @@ class OutlookConnector(MessageConnector):
|
|
|
212
219
|
next_params = None # nextLink already includes the query string
|
|
213
220
|
|
|
214
221
|
return items
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
if __name__ == "__main__":
|
|
225
|
+
connector = OutlookConnector(os.getenv("OUTLOOK_TOKEN"))
|
|
226
|
+
conversations = connector.fetch_conversations(top=1)
|
|
227
|
+
pp(conversations)
|
|
228
|
+
messages = connector.fetch_messages(conversations[0]["id"])
|
|
229
|
+
pp(messages)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: scheme_sdk
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.8
|
|
4
4
|
Summary: The Scheme SDK provides connectors for ingesting conversations, messages, and files across communication platforms.
|
|
5
5
|
License: Apache License
|
|
6
6
|
Version 2.0, January 2004
|
|
@@ -203,6 +203,7 @@ License: Apache License
|
|
|
203
203
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
204
204
|
See the License for the specific language governing permissions and
|
|
205
205
|
limitations under the License.
|
|
206
|
+
Requires-Dist: html-sanitizer>=2.6.0
|
|
206
207
|
Requires-Dist: requests>=2.32.5
|
|
207
208
|
Requires-Python: >=3.11
|
|
208
209
|
Project-URL: Homepage, https://www.schemebig.com/
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
scheme_sdk/__init__.py,sha256=bQ_i10iSwabzAhS5CapMVplDhMXptJB0qhcgxW_-lnU,26
|
|
2
2
|
scheme_sdk/connectors/__init__.py,sha256=d7XyCKbx0QklcAFyhTbNxTg0PakQfTs3eQSayYUonqs,144
|
|
3
|
-
scheme_sdk/connectors/base/__init__.py,sha256=
|
|
3
|
+
scheme_sdk/connectors/base/__init__.py,sha256=_1iMndo5AbzvPiVoJHlSQbDgXYoKxoz65FMRloolJxM,805
|
|
4
4
|
scheme_sdk/connectors/base/base.py,sha256=JtsD-HBdjC_Jl22M9j-ihe0tyXJw6JpgWsZp_ozCwDA,8855
|
|
5
5
|
scheme_sdk/connectors/base/errors.py,sha256=53Dhz5qImXJe0q0cO7IqG4yja0OC0hnxMNMMmgdCtW0,1152
|
|
6
|
-
scheme_sdk/connectors/base/message.py,sha256=
|
|
6
|
+
scheme_sdk/connectors/base/message.py,sha256=xD1M1Wslsr6c5no4G4FozXNzUWzgPifZHI_qveYU2YY,6529
|
|
7
|
+
scheme_sdk/connectors/base/storage.py,sha256=OpcIub4wLoIGZfL5wi1yc_05RInYb61xEpzXqik2Eyg,4263
|
|
7
8
|
scheme_sdk/connectors/discord.py,sha256=CdtzVlwT0aHcUayhLGno_vmfhdt_WRtGOeV2zj7Ye7I,844
|
|
8
9
|
scheme_sdk/connectors/gmail.py,sha256=wZCj917qujXSE9jexqvkDgdmz02y6QST8q9KNRwEvg0,866
|
|
9
|
-
scheme_sdk/connectors/outlook.py,sha256=
|
|
10
|
+
scheme_sdk/connectors/outlook.py,sha256=udANT0wZNOla_u0Tp8ih-HVLeJeGp7P7GHux6l2SMgs,7798
|
|
10
11
|
scheme_sdk/connectors/slack.py,sha256=_g--XxS4_ImT0fs0HX9vNquTkUlaikiylBhsgJzCC-4,1434
|
|
11
|
-
scheme_sdk-0.3.
|
|
12
|
-
scheme_sdk-0.3.
|
|
13
|
-
scheme_sdk-0.3.
|
|
12
|
+
scheme_sdk-0.3.8.dist-info/WHEEL,sha256=5DEXXimM34_d4Gx1AuF9ysMr1_maoEtGKjaILM3s4w4,80
|
|
13
|
+
scheme_sdk-0.3.8.dist-info/METADATA,sha256=TOAFe1QG8coyBGbgn1JZtTSXxxtW97sZUbcv2AvZbug,15121
|
|
14
|
+
scheme_sdk-0.3.8.dist-info/RECORD,,
|