sayou-connector 0.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sayou/connector/__init__.py +11 -0
- sayou/connector/core/exceptions.py +38 -0
- sayou/connector/fetcher/file_fetcher.py +42 -0
- sayou/connector/fetcher/requests_fetcher.py +77 -0
- sayou/connector/fetcher/sqlite_fetcher.py +50 -0
- sayou/connector/generator/file_generator.py +124 -0
- sayou/connector/generator/requests_generator.py +113 -0
- sayou/connector/generator/sqlite_generator.py +140 -0
- sayou/connector/interfaces/base_fetcher.py +81 -0
- sayou/connector/interfaces/base_generator.py +99 -0
- sayou/connector/pipeline.py +304 -0
- sayou/connector/plugins/gmail_fetcher.py +127 -0
- sayou/connector/plugins/gmail_generator.py +79 -0
- sayou/connector/plugins/google_calendar_fetcher.py +89 -0
- sayou/connector/plugins/google_calendar_generator.py +46 -0
- sayou/connector/plugins/google_drive_fetcher.py +151 -0
- sayou/connector/plugins/google_drive_generator.py +107 -0
- sayou/connector/plugins/imap_email_fetcher.py +140 -0
- sayou/connector/plugins/imap_email_generator.py +93 -0
- sayou/connector/plugins/notion_fetcher.py +301 -0
- sayou/connector/plugins/notion_generator.py +73 -0
- sayou/connector/plugins/public_youtube_fetcher.py +134 -0
- sayou/connector/plugins/public_youtube_generator.py +60 -0
- sayou_connector-0.3.12.dist-info/METADATA +303 -0
- sayou_connector-0.3.12.dist-info/RECORD +26 -0
- sayou_connector-0.3.12.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
from abc import abstractmethod
|
|
2
|
+
from typing import Iterator
|
|
3
|
+
|
|
4
|
+
from sayou.core.base_component import BaseComponent
|
|
5
|
+
from sayou.core.decorators import measure_time
|
|
6
|
+
from sayou.core.schemas import SayouPacket, SayouTask
|
|
7
|
+
|
|
8
|
+
from ..core.exceptions import GeneratorError
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BaseGenerator(BaseComponent):
|
|
12
|
+
"""
|
|
13
|
+
(Tier 1) Abstract base class for all task generators.
|
|
14
|
+
|
|
15
|
+
Generators are responsible for discovering resources (e.g., listing files,
|
|
16
|
+
crawling links) and creating `SayouTask` objects for the Fetcher.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
component_name = "BaseGenerator"
|
|
20
|
+
SUPPORTED_TYPES = []
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def can_handle(cls, source: str) -> float:
|
|
24
|
+
"""
|
|
25
|
+
Evaluates whether this generator can handle the given source.
|
|
26
|
+
|
|
27
|
+
Analyzes the source string to determine if it matches the pattern or format
|
|
28
|
+
supported by this generator. Returns a confidence score between 0.0 and 1.0.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
source (str): The input source string to evaluate.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
float: A confidence score where 1.0 means full confidence,
|
|
35
|
+
0.0 means the source is incompatible, and intermediate values
|
|
36
|
+
indicate partial matches or heuristics.
|
|
37
|
+
"""
|
|
38
|
+
return 0.0
|
|
39
|
+
|
|
40
|
+
@measure_time
|
|
41
|
+
def generate(self, source: str, **kwargs) -> Iterator[SayouTask]:
|
|
42
|
+
"""
|
|
43
|
+
Execute the generation strategy and yield tasks one by one.
|
|
44
|
+
|
|
45
|
+
This method handles the lifecycle of the generation process, including
|
|
46
|
+
logging and error boundary protection.
|
|
47
|
+
|
|
48
|
+
Yields:
|
|
49
|
+
Iterator[SayouTask]: An iterator of tasks to be processed by Fetchers.
|
|
50
|
+
"""
|
|
51
|
+
self._emit("on_start", input_data={"component": self.component_name})
|
|
52
|
+
self._log(f"Starting generation strategy: {self.component_name}")
|
|
53
|
+
count = 0
|
|
54
|
+
try:
|
|
55
|
+
for task in self._do_generate(source, **kwargs):
|
|
56
|
+
count += 1
|
|
57
|
+
yield task
|
|
58
|
+
self._emit("on_finish", result_data={"total_tasks": count}, success=True)
|
|
59
|
+
except Exception as e:
|
|
60
|
+
wrapped_error = GeneratorError(
|
|
61
|
+
f"[{self.component_name}] Strategy crashed: {str(e)}"
|
|
62
|
+
)
|
|
63
|
+
self.logger.error(wrapped_error, exc_info=True)
|
|
64
|
+
self._emit("on_error", error=wrapped_error)
|
|
65
|
+
raise wrapped_error
|
|
66
|
+
finally:
|
|
67
|
+
self._log(f"Generator finished. Total tasks yielded: {count}")
|
|
68
|
+
|
|
69
|
+
@abstractmethod
|
|
70
|
+
def _do_generate(self, source: str, **kwargs) -> Iterator[SayouTask]:
|
|
71
|
+
"""
|
|
72
|
+
[Abstract Hook] Implement the logic to discover resources.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
source (str): The source string to generate tasks from.
|
|
76
|
+
**kwargs: Additional keyword arguments.
|
|
77
|
+
|
|
78
|
+
Yields:
|
|
79
|
+
SayouTask: A task object representing a unit of work.
|
|
80
|
+
"""
|
|
81
|
+
raise NotImplementedError
|
|
82
|
+
|
|
83
|
+
def feedback(self, packet: SayouPacket):
|
|
84
|
+
"""
|
|
85
|
+
Receive feedback from the execution result of a task.
|
|
86
|
+
|
|
87
|
+
This allows the generator to adjust its strategy dynamically
|
|
88
|
+
(e.g., adding new links found in a crawled page).
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
packet (SayouPacket): The result packet from the Fetcher.
|
|
92
|
+
"""
|
|
93
|
+
self._do_feedback(packet)
|
|
94
|
+
|
|
95
|
+
def _do_feedback(self, packet: SayouPacket):
|
|
96
|
+
"""
|
|
97
|
+
[Optional Hook] Override this to handle feedback logic.
|
|
98
|
+
"""
|
|
99
|
+
pass
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
import pkgutil
|
|
3
|
+
from typing import Dict, Iterator, List, Optional, Type
|
|
4
|
+
|
|
5
|
+
from sayou.core.base_component import BaseComponent
|
|
6
|
+
from sayou.core.decorators import safe_run
|
|
7
|
+
from sayou.core.registry import COMPONENT_REGISTRY
|
|
8
|
+
from sayou.core.schemas import SayouPacket, SayouTask
|
|
9
|
+
|
|
10
|
+
from .interfaces.base_fetcher import BaseFetcher
|
|
11
|
+
from .interfaces.base_generator import BaseGenerator
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ConnectorPipeline(BaseComponent):
|
|
15
|
+
"""
|
|
16
|
+
Orchestrates the data collection process by connecting Generators and Fetchers.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
component_name = "ConnectorPipeline"
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
extra_generators: Optional[List[Type[BaseGenerator]]] = None,
|
|
24
|
+
extra_fetchers: Optional[List[Type[BaseFetcher]]] = None,
|
|
25
|
+
**kwargs,
|
|
26
|
+
):
|
|
27
|
+
"""
|
|
28
|
+
Initializes the pipeline and discovers available components.
|
|
29
|
+
|
|
30
|
+
Sets up the internal storage for generators and fetchers, scans specific
|
|
31
|
+
package paths to automatically discover plugins, and loads them from the
|
|
32
|
+
global registry into the local mapping.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
extra_generators: List of custom generator classes to register.
|
|
36
|
+
extra_fetchers: List of custom fetcher classes to register.
|
|
37
|
+
**kwargs: Configuration arguments passed to the parent component.
|
|
38
|
+
"""
|
|
39
|
+
super().__init__()
|
|
40
|
+
|
|
41
|
+
self.generator_cls_map: Dict[str, Type[BaseGenerator]] = {}
|
|
42
|
+
self.fetcher_cls_map: Dict[str, Type[BaseFetcher]] = {}
|
|
43
|
+
|
|
44
|
+
self._register("sayou.connector.generator")
|
|
45
|
+
self._register("sayou.connector.fetcher")
|
|
46
|
+
self._register("sayou.connector.plugins")
|
|
47
|
+
|
|
48
|
+
self._load_from_registry()
|
|
49
|
+
|
|
50
|
+
if extra_generators:
|
|
51
|
+
for cls in extra_generators:
|
|
52
|
+
self._register_manual(cls)
|
|
53
|
+
|
|
54
|
+
if extra_fetchers:
|
|
55
|
+
for cls in extra_fetchers:
|
|
56
|
+
self._register_manual(cls)
|
|
57
|
+
|
|
58
|
+
self.global_config = kwargs
|
|
59
|
+
|
|
60
|
+
self.initialize(**kwargs)
|
|
61
|
+
|
|
62
|
+
def _register_manual(self, cls):
|
|
63
|
+
"""
|
|
64
|
+
Safely registers a user-provided class.
|
|
65
|
+
"""
|
|
66
|
+
if not isinstance(cls, type):
|
|
67
|
+
raise TypeError(
|
|
68
|
+
f"Invalid generator: {cls}. "
|
|
69
|
+
f"Please pass the CLASS itself (e.g., MyGenerator), not an instance (MyGenerator())."
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
name = getattr(cls, "component_name", cls.__name__)
|
|
73
|
+
self.generators_cls_map[name] = cls
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def process(
|
|
77
|
+
cls,
|
|
78
|
+
source: str,
|
|
79
|
+
strategy: str = "auto",
|
|
80
|
+
**kwargs,
|
|
81
|
+
) -> Iterator[SayouPacket]:
|
|
82
|
+
"""
|
|
83
|
+
[Facade] 1-Line Execution Method.
|
|
84
|
+
Creates an instance, runs it, and returns the result immediately.
|
|
85
|
+
"""
|
|
86
|
+
instance = cls(**kwargs)
|
|
87
|
+
return instance.run(source, strategy, **kwargs)
|
|
88
|
+
|
|
89
|
+
def _register(self, package_name: str):
|
|
90
|
+
"""
|
|
91
|
+
Automatically discovers and registers plugins from the specified package.
|
|
92
|
+
|
|
93
|
+
Scans the directory of the given package name and attempts to import all
|
|
94
|
+
submodules found. Importing these modules triggers the `@register_component`
|
|
95
|
+
decorator attached to the classes, effectively registering them into the
|
|
96
|
+
global `COMPONENT_REGISTRY`.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
package_name (str): The dot-separated Python package path to scan
|
|
100
|
+
(e.g., "sayou.connector.generator").
|
|
101
|
+
"""
|
|
102
|
+
try:
|
|
103
|
+
package = importlib.import_module(package_name)
|
|
104
|
+
if hasattr(package, "__path__"):
|
|
105
|
+
for _, name, _ in pkgutil.iter_modules(package.__path__):
|
|
106
|
+
full_name = f"{package_name}.{name}"
|
|
107
|
+
try:
|
|
108
|
+
importlib.import_module(full_name)
|
|
109
|
+
self._log(f"Discovered module: {full_name}", level="debug")
|
|
110
|
+
except Exception as e:
|
|
111
|
+
self._log(
|
|
112
|
+
f"Failed to import module {full_name}: {e}", level="warning"
|
|
113
|
+
)
|
|
114
|
+
except ImportError as e:
|
|
115
|
+
self._log(f"Package not found: {package_name} ({e})", level="warning")
|
|
116
|
+
|
|
117
|
+
def _load_from_registry(self):
|
|
118
|
+
"""
|
|
119
|
+
Populates local component maps from the global registry.
|
|
120
|
+
|
|
121
|
+
Iterates through the global `COMPONENT_REGISTRY` to retrieve registered
|
|
122
|
+
generator and fetcher classes. It stores references to these classes in
|
|
123
|
+
`self.generator_cls_map` and instantiates fetchers in `self.fetcher_cls_map`.
|
|
124
|
+
"""
|
|
125
|
+
for name, cls in COMPONENT_REGISTRY["generator"].items():
|
|
126
|
+
self.generator_cls_map[name] = cls
|
|
127
|
+
supported = getattr(cls, "SUPPORTED_TYPES", [])
|
|
128
|
+
for t in supported:
|
|
129
|
+
self.generator_cls_map[t] = cls
|
|
130
|
+
|
|
131
|
+
for name, cls in COMPONENT_REGISTRY["fetcher"].items():
|
|
132
|
+
instance = cls()
|
|
133
|
+
for t in getattr(instance, "SUPPORTED_TYPES", []):
|
|
134
|
+
self.fetcher_cls_map[t] = instance
|
|
135
|
+
|
|
136
|
+
@safe_run(default_return=None)
|
|
137
|
+
def initialize(self, **kwargs):
|
|
138
|
+
"""
|
|
139
|
+
Perform global initialization for the pipeline.
|
|
140
|
+
|
|
141
|
+
This method is protected by safe_run decorators in the actual implementation
|
|
142
|
+
to prevent initialization crashes.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
**kwargs: Global configuration parameters.
|
|
146
|
+
"""
|
|
147
|
+
self.global_config.update(kwargs)
|
|
148
|
+
self._log("ConnectorPipeline initialized.")
|
|
149
|
+
|
|
150
|
+
def run(
|
|
151
|
+
self,
|
|
152
|
+
source: str,
|
|
153
|
+
strategy: str = "auto",
|
|
154
|
+
**kwargs,
|
|
155
|
+
) -> Iterator[SayouPacket]:
|
|
156
|
+
"""
|
|
157
|
+
Execute the collection pipeline.
|
|
158
|
+
|
|
159
|
+
This is the main entry point. It selects a Generator based on the strategy,
|
|
160
|
+
produces Tasks, routes them to the appropriate Fetcher, and yields the results.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
source (str): The root source (e.g., file path, URL, connection string).
|
|
164
|
+
strategy (str): The name of the generator strategy to use (default: "auto").
|
|
165
|
+
**kwargs: Additional arguments passed to the Generator's initialize method.
|
|
166
|
+
|
|
167
|
+
Yields:
|
|
168
|
+
Iterator[SayouPacket]: A stream of packets containing fetched data.
|
|
169
|
+
|
|
170
|
+
Raises:
|
|
171
|
+
ValueError: If the specified strategy is not registered.
|
|
172
|
+
"""
|
|
173
|
+
self._emit("on_start", input_data={"source": source, "strategy": strategy})
|
|
174
|
+
|
|
175
|
+
# 1. Generator 선택
|
|
176
|
+
generator_cls = self._resolve_generator(source, strategy)
|
|
177
|
+
generator = generator_cls()
|
|
178
|
+
|
|
179
|
+
if hasattr(self, "_callbacks"):
|
|
180
|
+
for cb in self._callbacks:
|
|
181
|
+
generator.add_callback(cb)
|
|
182
|
+
|
|
183
|
+
# 2. Generator 초기화
|
|
184
|
+
generator.initialize(source=source, **kwargs)
|
|
185
|
+
self._log(f"Connector started using strategy '{strategy}' on '{source}'")
|
|
186
|
+
|
|
187
|
+
# 3. Execution Loop
|
|
188
|
+
count = 0
|
|
189
|
+
success_count = 0
|
|
190
|
+
|
|
191
|
+
try:
|
|
192
|
+
for task in generator.generate(source, **kwargs):
|
|
193
|
+
if not isinstance(task, SayouTask):
|
|
194
|
+
self._log(
|
|
195
|
+
f"Invalid task type from generator: {type(task)}",
|
|
196
|
+
level="warning",
|
|
197
|
+
)
|
|
198
|
+
continue
|
|
199
|
+
|
|
200
|
+
# 4. Fetcher 라우팅
|
|
201
|
+
fetcher = self.fetcher_cls_map.get(task.source_type)
|
|
202
|
+
if not fetcher:
|
|
203
|
+
self._log(
|
|
204
|
+
f"Skipping task {task.uri}: No fetcher for type '{task.source_type}'"
|
|
205
|
+
)
|
|
206
|
+
continue
|
|
207
|
+
|
|
208
|
+
for cb in self._callbacks:
|
|
209
|
+
fetcher.add_callback(cb)
|
|
210
|
+
|
|
211
|
+
# 5. Fetch 수행
|
|
212
|
+
packet = fetcher.fetch(task)
|
|
213
|
+
|
|
214
|
+
# 6. 결과 처리
|
|
215
|
+
if packet.success:
|
|
216
|
+
success_count += 1
|
|
217
|
+
yield packet
|
|
218
|
+
else:
|
|
219
|
+
self._log(f"Fetch failed: {packet.error}")
|
|
220
|
+
|
|
221
|
+
# 7. Feedback Loop
|
|
222
|
+
generator.feedback(packet)
|
|
223
|
+
count += 1
|
|
224
|
+
|
|
225
|
+
self._emit("on_finish", result_data={"count": count}, success=True)
|
|
226
|
+
|
|
227
|
+
except Exception as e:
|
|
228
|
+
self._emit("on_error", error=e)
|
|
229
|
+
raise e
|
|
230
|
+
|
|
231
|
+
self._log(f"Connector finished. Processed: {count}, Success: {success_count}")
|
|
232
|
+
|
|
233
|
+
def _resolve_generator(
|
|
234
|
+
self,
|
|
235
|
+
source: str,
|
|
236
|
+
strategy: str,
|
|
237
|
+
) -> BaseGenerator:
|
|
238
|
+
"""
|
|
239
|
+
Determines the appropriate generator strategy to use.
|
|
240
|
+
|
|
241
|
+
Prioritizes the strategy explicitly specified by the user. If the strategy
|
|
242
|
+
is set to 'auto' or None, it attempts to detect the most suitable generator
|
|
243
|
+
based on the source string using the `can_handle` method of registered generators.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
source (str): The input source string (e.g., file path, URL, connection string).
|
|
247
|
+
strategy (str): The name of the strategy to use (e.g., 'file', 'sqlite').
|
|
248
|
+
If 'auto' or None, automatic detection is performed.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
BaseGenerator: The initialized generator instance ready for execution.
|
|
252
|
+
|
|
253
|
+
Raises:
|
|
254
|
+
ValueError: If a specific strategy is requested but not found in the registry.
|
|
255
|
+
"""
|
|
256
|
+
if strategy and strategy != "auto":
|
|
257
|
+
gen = self.generator_cls_map.get(strategy)
|
|
258
|
+
if not gen:
|
|
259
|
+
raise ValueError(f"Unknown strategy: {strategy}")
|
|
260
|
+
return gen
|
|
261
|
+
|
|
262
|
+
best_score = 0.0
|
|
263
|
+
best_cls = None
|
|
264
|
+
|
|
265
|
+
obj_type = getattr(source, "type", type(source).__name__)
|
|
266
|
+
content_len = 0
|
|
267
|
+
if hasattr(source, "content"):
|
|
268
|
+
c = source.content
|
|
269
|
+
if hasattr(c, "__len__"):
|
|
270
|
+
content_len = len(c)
|
|
271
|
+
elif isinstance(source, (str, bytes, list, dict)):
|
|
272
|
+
content_len = len(source)
|
|
273
|
+
|
|
274
|
+
log_lines = [f"Scoring for Item (Type: {obj_type}, Len: {content_len}):"]
|
|
275
|
+
if hasattr(source, "content") and isinstance(source.content, str):
|
|
276
|
+
log_lines.append(f"Content Preview: {source.content[:50]}...")
|
|
277
|
+
elif isinstance(source, str):
|
|
278
|
+
log_lines.append(f"Content Preview: {source[:50]}...")
|
|
279
|
+
|
|
280
|
+
for cls in set(self.generator_cls_map.values()):
|
|
281
|
+
try:
|
|
282
|
+
score = cls.can_handle(source)
|
|
283
|
+
|
|
284
|
+
mark = ""
|
|
285
|
+
if score > best_score:
|
|
286
|
+
best_score = score
|
|
287
|
+
best_cls = cls
|
|
288
|
+
mark = "👑"
|
|
289
|
+
|
|
290
|
+
log_lines.append(f" - {cls.__name__}: {score} {mark}")
|
|
291
|
+
|
|
292
|
+
except Exception as e:
|
|
293
|
+
log_lines.append(f" - {cls.__name__}: Error ({e})")
|
|
294
|
+
|
|
295
|
+
self._log("\n".join(log_lines))
|
|
296
|
+
|
|
297
|
+
if best_cls and best_score > 0.0:
|
|
298
|
+
return best_cls
|
|
299
|
+
|
|
300
|
+
self._log(
|
|
301
|
+
"Auto-detection failed. Falling back to default strategy 'file'.",
|
|
302
|
+
level="warning",
|
|
303
|
+
)
|
|
304
|
+
return self.generator_cls_map["file"]
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
from typing import Dict, Any
|
|
3
|
+
|
|
4
|
+
from sayou.core.registry import register_component
|
|
5
|
+
from sayou.core.schemas import SayouTask
|
|
6
|
+
from ..interfaces.base_fetcher import BaseFetcher
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from google.oauth2.credentials import Credentials
|
|
10
|
+
from googleapiclient.discovery import build
|
|
11
|
+
except ImportError:
|
|
12
|
+
build = None
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@register_component("fetcher")
|
|
16
|
+
class GmailFetcher(BaseFetcher):
|
|
17
|
+
"""
|
|
18
|
+
Fetches specific email content using Gmail API.
|
|
19
|
+
Reconstructs the email into a standardized HTML format suitable for Refinery.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
component_name = "GmailFetcher"
|
|
23
|
+
SUPPORTED_TYPES = ["gmail"]
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def can_handle(cls, uri: str) -> float:
|
|
27
|
+
return 1.0 if uri.startswith("gmail-msg://") else 0.0
|
|
28
|
+
|
|
29
|
+
def _do_fetch(self, task: SayouTask) -> Dict[str, Any]:
|
|
30
|
+
token_path = task.params.get("token_path")
|
|
31
|
+
msg_id = task.params.get("msg_id")
|
|
32
|
+
|
|
33
|
+
if not build:
|
|
34
|
+
raise ImportError("Please install google-api-python-client")
|
|
35
|
+
|
|
36
|
+
creds = Credentials.from_authorized_user_file(token_path)
|
|
37
|
+
service = build("gmail", "v1", credentials=creds)
|
|
38
|
+
|
|
39
|
+
# 1. Fetch email details (format='full')
|
|
40
|
+
message = (
|
|
41
|
+
service.users()
|
|
42
|
+
.messages()
|
|
43
|
+
.get(userId="me", id=msg_id, format="full")
|
|
44
|
+
.execute()
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
payload = message.get("payload", {})
|
|
48
|
+
headers = payload.get("headers", [])
|
|
49
|
+
|
|
50
|
+
# 2. Parse headers (Subject, From, Date)
|
|
51
|
+
subject = self._get_header(headers, "Subject", "(No Subject)")
|
|
52
|
+
sender = self._get_header(headers, "From", "Unknown")
|
|
53
|
+
date = self._get_header(headers, "Date", "")
|
|
54
|
+
|
|
55
|
+
# 3. Extract body (Recursive)
|
|
56
|
+
body_content = self._extract_body(payload)
|
|
57
|
+
|
|
58
|
+
# 4. Reconstruct HTML (User Request Format)
|
|
59
|
+
html_doc = f"""<!DOCTYPE html>
|
|
60
|
+
<html>
|
|
61
|
+
<head>
|
|
62
|
+
<title>{subject}</title>
|
|
63
|
+
<meta name="sender" content="{sender}">
|
|
64
|
+
<meta name="date" content="{date}">
|
|
65
|
+
<meta name="msg_id" content="{msg_id}">
|
|
66
|
+
<meta name="source" content="gmail">
|
|
67
|
+
</head>
|
|
68
|
+
<body>
|
|
69
|
+
{body_content}
|
|
70
|
+
</body>
|
|
71
|
+
</html>"""
|
|
72
|
+
|
|
73
|
+
return html_doc.strip()
|
|
74
|
+
|
|
75
|
+
def _get_header(self, headers: list, name: str, default: str) -> str:
|
|
76
|
+
for h in headers:
|
|
77
|
+
if h["name"].lower() == name.lower():
|
|
78
|
+
return h["value"]
|
|
79
|
+
return default
|
|
80
|
+
|
|
81
|
+
def _extract_body(self, payload: dict) -> str:
|
|
82
|
+
body = ""
|
|
83
|
+
|
|
84
|
+
# Case A: Single Part
|
|
85
|
+
if "body" in payload and payload["body"].get("data"):
|
|
86
|
+
mime_type = payload.get("mimeType", "")
|
|
87
|
+
data = payload["body"]["data"]
|
|
88
|
+
decoded_text = self._decode_base64url(data)
|
|
89
|
+
|
|
90
|
+
if mime_type == "text/html":
|
|
91
|
+
return decoded_text
|
|
92
|
+
elif mime_type == "text/plain":
|
|
93
|
+
return f"<pre>{decoded_text}</pre>"
|
|
94
|
+
|
|
95
|
+
# Case B: Multi Part
|
|
96
|
+
if "parts" in payload:
|
|
97
|
+
html_part = None
|
|
98
|
+
text_part = None
|
|
99
|
+
|
|
100
|
+
for part in payload["parts"]:
|
|
101
|
+
mime_type = part.get("mimeType", "")
|
|
102
|
+
|
|
103
|
+
content = self._extract_body(part)
|
|
104
|
+
|
|
105
|
+
if mime_type == "text/html":
|
|
106
|
+
html_part = content
|
|
107
|
+
elif mime_type == "text/plain":
|
|
108
|
+
text_part = content
|
|
109
|
+
elif "multipart" in mime_type:
|
|
110
|
+
if content:
|
|
111
|
+
html_part = content
|
|
112
|
+
|
|
113
|
+
if html_part:
|
|
114
|
+
return html_part
|
|
115
|
+
if text_part:
|
|
116
|
+
return text_part
|
|
117
|
+
|
|
118
|
+
return body
|
|
119
|
+
|
|
120
|
+
def _decode_base64url(self, data: str) -> str:
|
|
121
|
+
try:
|
|
122
|
+
padding = len(data) % 4
|
|
123
|
+
if padding:
|
|
124
|
+
data += "=" * (4 - padding)
|
|
125
|
+
return base64.urlsafe_b64decode(data).decode("utf-8", errors="replace")
|
|
126
|
+
except Exception:
|
|
127
|
+
return ""
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from typing import Iterator
|
|
2
|
+
from sayou.core.registry import register_component
|
|
3
|
+
from sayou.core.schemas import SayouTask
|
|
4
|
+
from ..interfaces.base_generator import BaseGenerator
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
from google.oauth2.credentials import Credentials
|
|
8
|
+
from googleapiclient.discovery import build
|
|
9
|
+
except ImportError:
|
|
10
|
+
build = None
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@register_component("generator")
|
|
14
|
+
class GmailGenerator(BaseGenerator):
|
|
15
|
+
"""
|
|
16
|
+
Scans Gmail inbox using Gmail API (OAuth) and generates tasks.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
component_name = "GmailGenerator"
|
|
20
|
+
SUPPORTED_TYPES = ["gmail"]
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def can_handle(cls, source: str) -> float:
|
|
24
|
+
return 1.0 if source.startswith("gmail://") else 0.0
|
|
25
|
+
|
|
26
|
+
def _do_generate(self, source: str, **kwargs) -> Iterator[SayouTask]:
|
|
27
|
+
"""
|
|
28
|
+
Connects to Gmail API -> Search (List) -> Yield Tasks.
|
|
29
|
+
source example: gmail://me (default) or gmail://me?q=is:unread
|
|
30
|
+
"""
|
|
31
|
+
if not build:
|
|
32
|
+
raise ImportError(
|
|
33
|
+
"Please install google-api-python-client google-auth-oauthlib"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
token_path = kwargs.get("token_path")
|
|
37
|
+
if not token_path:
|
|
38
|
+
raise ValueError("GmailGenerator requires 'token_path' in kwargs.")
|
|
39
|
+
|
|
40
|
+
# 1. Parsing Parameters
|
|
41
|
+
query = kwargs.get("query", "is:inbox")
|
|
42
|
+
max_results = int(kwargs.get("limit", 10))
|
|
43
|
+
|
|
44
|
+
# 2. Connect to Gmail API
|
|
45
|
+
creds = Credentials.from_authorized_user_file(token_path)
|
|
46
|
+
service = build("gmail", "v1", credentials=creds)
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
# 3. Fetch email list
|
|
50
|
+
results = (
|
|
51
|
+
service.users()
|
|
52
|
+
.messages()
|
|
53
|
+
.list(userId="me", q=query, maxResults=max_results)
|
|
54
|
+
.execute()
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
messages = results.get("messages", [])
|
|
58
|
+
|
|
59
|
+
self._log(f"📧 Found {len(messages)} emails. Generating tasks...")
|
|
60
|
+
|
|
61
|
+
# 4. Generate tasks
|
|
62
|
+
for msg in messages:
|
|
63
|
+
msg_id = msg["id"]
|
|
64
|
+
thread_id = msg["threadId"]
|
|
65
|
+
task_uri = f"gmail-msg://{msg_id}"
|
|
66
|
+
|
|
67
|
+
yield SayouTask(
|
|
68
|
+
uri=task_uri,
|
|
69
|
+
source_type="gmail",
|
|
70
|
+
params={
|
|
71
|
+
"token_path": token_path,
|
|
72
|
+
"msg_id": msg_id,
|
|
73
|
+
"thread_id": thread_id,
|
|
74
|
+
},
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
except Exception as e:
|
|
78
|
+
self._log(f"Gmail API List failed: {e}", level="error")
|
|
79
|
+
raise e
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import os
|
|
3
|
+
from typing import Any, Dict
|
|
4
|
+
|
|
5
|
+
from sayou.core.registry import register_component
|
|
6
|
+
from sayou.core.schemas import SayouTask
|
|
7
|
+
|
|
8
|
+
from ..interfaces.base_fetcher import BaseFetcher
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
from google.oauth2.credentials import Credentials
|
|
12
|
+
from googleapiclient.discovery import build
|
|
13
|
+
except ImportError:
|
|
14
|
+
build = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@register_component("fetcher")
|
|
18
|
+
class GoogleCalendarFetcher(BaseFetcher):
|
|
19
|
+
"""
|
|
20
|
+
Fetches events using Google API with User OAuth Token.
|
|
21
|
+
Works for Workspace (Corporate) & Personal accounts.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
component_name = "GoogleCalendarFetcher"
|
|
25
|
+
SUPPORTED_TYPES = ["google_calendar"]
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def can_handle(cls, uri: str) -> float:
|
|
29
|
+
return 1.0 if uri.startswith("gcal://") else 0.0
|
|
30
|
+
|
|
31
|
+
def _do_fetch(self, task: SayouTask) -> Dict[str, Any]:
|
|
32
|
+
if not build:
|
|
33
|
+
raise ImportError("Google API libraries required.")
|
|
34
|
+
|
|
35
|
+
token_path = task.params.get("token_path")
|
|
36
|
+
if not token_path or not os.path.exists(token_path):
|
|
37
|
+
raise ValueError("Token path invalid.")
|
|
38
|
+
|
|
39
|
+
creds = Credentials.from_authorized_user_file(token_path)
|
|
40
|
+
|
|
41
|
+
service = build("calendar", "v3", credentials=creds)
|
|
42
|
+
|
|
43
|
+
now = datetime.datetime.utcnow()
|
|
44
|
+
time_min = (now - datetime.timedelta(days=30)).isoformat() + "Z"
|
|
45
|
+
time_max = (now + datetime.timedelta(days=30)).isoformat() + "Z"
|
|
46
|
+
|
|
47
|
+
events_result = (
|
|
48
|
+
service.events()
|
|
49
|
+
.list(
|
|
50
|
+
calendarId="primary",
|
|
51
|
+
timeMin=time_min,
|
|
52
|
+
timeMax=time_max,
|
|
53
|
+
singleEvents=True,
|
|
54
|
+
orderBy="startTime",
|
|
55
|
+
)
|
|
56
|
+
.execute()
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
events = events_result.get("items", [])
|
|
60
|
+
|
|
61
|
+
parsed_events = []
|
|
62
|
+
for event in events:
|
|
63
|
+
start = event["start"].get("dateTime", event["start"].get("date"))
|
|
64
|
+
end = event["end"].get("dateTime", event["end"].get("date"))
|
|
65
|
+
|
|
66
|
+
parsed_events.append(
|
|
67
|
+
{
|
|
68
|
+
"id": event.get("id"),
|
|
69
|
+
"summary": event.get("summary", "No Title"),
|
|
70
|
+
"description": event.get("description", ""),
|
|
71
|
+
"start": start,
|
|
72
|
+
"end": end,
|
|
73
|
+
"location": event.get("location", ""),
|
|
74
|
+
"htmlLink": event.get("htmlLink", ""),
|
|
75
|
+
"attendees": [
|
|
76
|
+
{"email": a.get("email"), "status": a.get("responseStatus")}
|
|
77
|
+
for a in event.get("attendees", [])
|
|
78
|
+
],
|
|
79
|
+
}
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
return {
|
|
83
|
+
"content": parsed_events,
|
|
84
|
+
"meta": {
|
|
85
|
+
"source": "google_calendar",
|
|
86
|
+
"account": "authenticated_user",
|
|
87
|
+
"count": len(parsed_events),
|
|
88
|
+
},
|
|
89
|
+
}
|