scheme-sdk 0.3.6__tar.gz → 0.3.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: scheme_sdk
3
- Version: 0.3.6
3
+ Version: 0.3.7
4
4
  Summary: The Scheme SDK provides connectors for ingesting conversations, messages, and files across communication platforms.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -203,6 +203,7 @@ License: Apache License
203
203
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
204
204
  See the License for the specific language governing permissions and
205
205
  limitations under the License.
206
+ Requires-Dist: html-sanitizer>=2.6.0
206
207
  Requires-Dist: requests>=2.32.5
207
208
  Requires-Python: >=3.11
208
209
  Project-URL: Homepage, https://www.schemebig.com/
@@ -4,12 +4,13 @@ build-backend = "uv_build"
4
4
 
5
5
  [project]
6
6
  name = "scheme_sdk"
7
- version = "0.3.6"
7
+ version = "0.3.7"
8
8
  description = "The Scheme SDK provides connectors for ingesting conversations, messages, and files across communication platforms."
9
9
  readme = "README.md"
10
10
  license = { file = "LICENSE" }
11
11
  requires-python = ">=3.11"
12
12
  dependencies = [
13
+ "html-sanitizer>=2.6.0",
13
14
  "requests>=2.32.5",
14
15
  ]
15
16
 
@@ -2,6 +2,7 @@ from abc import ABC, abstractmethod
2
2
  from datetime import datetime
3
3
  from dataclasses import dataclass, field
4
4
  from typing import Any, Dict, Iterable, Optional
5
+ from html_sanitizer import Sanitizer
5
6
 
6
7
  from .base import BaseConnector
7
8
 
@@ -11,6 +12,11 @@ class MessageConnector(BaseConnector, ABC):
11
12
  Abstract base class for all message connectors.
12
13
  """
13
14
 
15
+ _sanitizer: Sanitizer
16
+
17
+ def __init__(self):
18
+ self._sanitizer = Sanitizer()
19
+
14
20
  @abstractmethod
15
21
  def fetch_conversations(self) -> Iterable[Dict[str, Any]]:
16
22
  """
@@ -85,6 +91,7 @@ class MessageConnector(BaseConnector, ABC):
85
91
  """
86
92
  ...
87
93
 
94
+ @abstractmethod
88
95
  def normalize_message(self, raw_message: Dict[str, Any]) -> Dict[str, Any]:
89
96
  """
90
97
  Transform a platform-specific message into canonical format.
@@ -100,6 +107,7 @@ class MessageConnector(BaseConnector, ABC):
100
107
  """
101
108
  ...
102
109
 
110
+ @abstractmethod
103
111
  def normalize_conversation(
104
112
  self, raw_conversation: Dict[str, Any]
105
113
  ) -> Dict[str, Any]:
@@ -134,6 +142,18 @@ class MessageConnector(BaseConnector, ABC):
134
142
  self._logger.warning(f"search_messages not implemented for {self.platform}")
135
143
  return iter([])
136
144
 
145
+ def _sanitize_html(self, html: str) -> str:
146
+ """
147
+ Sanitize HTML content.
148
+
149
+ Args:
150
+ html: HTML content to sanitize
151
+
152
+ Returns:
153
+ Sanitized HTML content
154
+ """
155
+ return self._sanitizer.sanitize(html)
156
+
137
157
 
138
158
  @dataclass
139
159
  class Conversation:
@@ -4,6 +4,12 @@ from typing import Any, Dict, List, Optional
4
4
 
5
5
  import requests
6
6
 
7
+ from pprint import pprint as pp
8
+ from dotenv import load_dotenv
9
+ import os
10
+
11
+ load_dotenv()
12
+
7
13
  from .base import MessageConnector
8
14
 
9
15
 
@@ -14,6 +20,7 @@ class OutlookConnector(MessageConnector):
14
20
  _backoff_cap_seconds = 30
15
21
 
16
22
  def __init__(self, token: str):
23
+ super().__init__()
17
24
  self.token = token
18
25
  self.base = "https://graph.microsoft.com/v1.0"
19
26
 
@@ -118,7 +125,7 @@ class OutlookConnector(MessageConnector):
118
125
  return {
119
126
  "title": message["subject"],
120
127
  "platform": self.platform,
121
- "text": message["body"]["content"],
128
+ "text": self._sanitize_html(message["body"]["content"]),
122
129
  "direct_link": message["webLink"],
123
130
  "metadata": {
124
131
  "platform_conversation_id": message["conversationId"],
@@ -212,3 +219,11 @@ class OutlookConnector(MessageConnector):
212
219
  next_params = None # nextLink already includes the query string
213
220
 
214
221
  return items
222
+
223
+
224
+ if __name__ == "__main__":
225
+ connector = OutlookConnector(os.getenv("OUTLOOK_TOKEN"))
226
+ conversations = connector.fetch_conversations(top=1)
227
+ pp(conversations)
228
+ messages = connector.fetch_messages(conversations[0]["id"])
229
+ pp(messages)
File without changes
File without changes