sayou-connector 0.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sayou/connector/__init__.py +11 -0
- sayou/connector/core/exceptions.py +38 -0
- sayou/connector/fetcher/file_fetcher.py +42 -0
- sayou/connector/fetcher/requests_fetcher.py +77 -0
- sayou/connector/fetcher/sqlite_fetcher.py +50 -0
- sayou/connector/generator/file_generator.py +124 -0
- sayou/connector/generator/requests_generator.py +113 -0
- sayou/connector/generator/sqlite_generator.py +140 -0
- sayou/connector/interfaces/base_fetcher.py +81 -0
- sayou/connector/interfaces/base_generator.py +99 -0
- sayou/connector/pipeline.py +304 -0
- sayou/connector/plugins/gmail_fetcher.py +127 -0
- sayou/connector/plugins/gmail_generator.py +79 -0
- sayou/connector/plugins/google_calendar_fetcher.py +89 -0
- sayou/connector/plugins/google_calendar_generator.py +46 -0
- sayou/connector/plugins/google_drive_fetcher.py +151 -0
- sayou/connector/plugins/google_drive_generator.py +107 -0
- sayou/connector/plugins/imap_email_fetcher.py +140 -0
- sayou/connector/plugins/imap_email_generator.py +93 -0
- sayou/connector/plugins/notion_fetcher.py +301 -0
- sayou/connector/plugins/notion_generator.py +73 -0
- sayou/connector/plugins/public_youtube_fetcher.py +134 -0
- sayou/connector/plugins/public_youtube_generator.py +60 -0
- sayou_connector-0.3.12.dist-info/METADATA +303 -0
- sayou_connector-0.3.12.dist-info/RECORD +26 -0
- sayou_connector-0.3.12.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Iterator
|
|
3
|
+
|
|
4
|
+
from sayou.core.registry import register_component
|
|
5
|
+
from sayou.core.schemas import SayouTask
|
|
6
|
+
|
|
7
|
+
from ..interfaces.base_generator import BaseGenerator
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@register_component("generator")
|
|
11
|
+
class GoogleCalendarGenerator(BaseGenerator):
|
|
12
|
+
"""
|
|
13
|
+
Generates tasks using OAuth 2.0 Token.
|
|
14
|
+
Requires 'sayou_google_token.json' (generated by auth script).
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
component_name = "GoogleCalendarGenerator"
|
|
18
|
+
SUPPORTED_TYPES = ["google_calendar"]
|
|
19
|
+
|
|
20
|
+
@classmethod
|
|
21
|
+
def can_handle(cls, uri: str) -> float:
|
|
22
|
+
return 1.0 if uri.startswith("gcal://") else 0.0
|
|
23
|
+
|
|
24
|
+
def _do_generate(self, source: str, **kwargs) -> Iterator[SayouTask]:
|
|
25
|
+
token_path = kwargs.get("google_token_path")
|
|
26
|
+
|
|
27
|
+
if not os.path.exists(token_path):
|
|
28
|
+
raise FileNotFoundError(
|
|
29
|
+
f"Google Token not found at {token_path}. Run authentication script first."
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
calendar_id = source.replace("gcal://", "") or "primary"
|
|
33
|
+
|
|
34
|
+
yield SayouTask(
|
|
35
|
+
uri=source,
|
|
36
|
+
source_type="google_calendar",
|
|
37
|
+
params={
|
|
38
|
+
"calendar_id": calendar_id,
|
|
39
|
+
"token_path": token_path,
|
|
40
|
+
},
|
|
41
|
+
meta={
|
|
42
|
+
"source": "google_calendar",
|
|
43
|
+
"calendar_id": calendar_id,
|
|
44
|
+
"filename": f"calendar_{calendar_id.replace('@', '_')}",
|
|
45
|
+
},
|
|
46
|
+
)
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import os
|
|
3
|
+
from typing import Any, Dict
|
|
4
|
+
|
|
5
|
+
from sayou.core.registry import register_component
|
|
6
|
+
from sayou.core.schemas import SayouTask
|
|
7
|
+
|
|
8
|
+
from ..interfaces.base_fetcher import BaseFetcher
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
import chardet
|
|
12
|
+
from google.oauth2.credentials import Credentials
|
|
13
|
+
from googleapiclient.discovery import build
|
|
14
|
+
from googleapiclient.errors import HttpError
|
|
15
|
+
from googleapiclient.http import MediaIoBaseDownload
|
|
16
|
+
except ImportError:
|
|
17
|
+
build = None
|
|
18
|
+
chardet = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@register_component("fetcher")
|
|
22
|
+
class GoogleDriveFetcher(BaseFetcher):
|
|
23
|
+
"""
|
|
24
|
+
Fetches content from Google Drive files.
|
|
25
|
+
- Google Native Formats -> Converted to MS Office formats (.docx, .xlsx, .pptx)
|
|
26
|
+
- Standard Files (PDF, JPG, ZIP...) -> Downloaded as original binary.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
component_name = "GoogleDriveFetcher"
|
|
30
|
+
SUPPORTED_TYPES = ["drive"]
|
|
31
|
+
|
|
32
|
+
@classmethod
|
|
33
|
+
def can_handle(cls, uri: str) -> float:
|
|
34
|
+
return 1.0 if uri.startswith("gdrive://file/") else 0.0
|
|
35
|
+
|
|
36
|
+
def _do_fetch(self, task: SayouTask) -> Dict[str, Any]:
|
|
37
|
+
token_path = task.params.get("token_path")
|
|
38
|
+
file_id = task.params.get("file_id")
|
|
39
|
+
mime_type = task.params.get("mime_type")
|
|
40
|
+
original_name = task.meta.get("filename", "unknown_file")
|
|
41
|
+
|
|
42
|
+
creds = Credentials.from_authorized_user_file(token_path)
|
|
43
|
+
service = build("drive", "v3", credentials=creds)
|
|
44
|
+
|
|
45
|
+
request = None
|
|
46
|
+
extension = ""
|
|
47
|
+
is_google_doc = False
|
|
48
|
+
|
|
49
|
+
# 1. Google Native Formats
|
|
50
|
+
if mime_type == "application/vnd.google-apps.document":
|
|
51
|
+
request = service.files().export_media(
|
|
52
|
+
fileId=file_id,
|
|
53
|
+
mimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
54
|
+
)
|
|
55
|
+
extension = ".docx"
|
|
56
|
+
is_google_doc = True
|
|
57
|
+
elif mime_type == "application/vnd.google-apps.spreadsheet":
|
|
58
|
+
request = service.files().export_media(
|
|
59
|
+
fileId=file_id,
|
|
60
|
+
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
61
|
+
)
|
|
62
|
+
extension = ".xlsx"
|
|
63
|
+
is_google_doc = True
|
|
64
|
+
elif mime_type == "application/vnd.google-apps.presentation":
|
|
65
|
+
request = service.files().export_media(
|
|
66
|
+
fileId=file_id,
|
|
67
|
+
mimeType="application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
68
|
+
)
|
|
69
|
+
extension = ".pptx"
|
|
70
|
+
is_google_doc = True
|
|
71
|
+
else:
|
|
72
|
+
request = service.files().get_media(fileId=file_id)
|
|
73
|
+
_, ext = os.path.splitext(original_name)
|
|
74
|
+
extension = ext if ext else ""
|
|
75
|
+
|
|
76
|
+
# 2. Execute Download
|
|
77
|
+
try:
|
|
78
|
+
fh = io.BytesIO()
|
|
79
|
+
downloader = MediaIoBaseDownload(fh, request)
|
|
80
|
+
done = False
|
|
81
|
+
while done is False:
|
|
82
|
+
status, done = downloader.next_chunk()
|
|
83
|
+
|
|
84
|
+
raw_bytes = fh.getvalue()
|
|
85
|
+
|
|
86
|
+
final_content = raw_bytes
|
|
87
|
+
is_text_candidate = False
|
|
88
|
+
|
|
89
|
+
if mime_type.startswith("text/") or mime_type == "application/json":
|
|
90
|
+
is_text_candidate = True
|
|
91
|
+
elif extension.lower() in [
|
|
92
|
+
".csv",
|
|
93
|
+
".txt",
|
|
94
|
+
".json",
|
|
95
|
+
".md",
|
|
96
|
+
".py",
|
|
97
|
+
".html",
|
|
98
|
+
".xml",
|
|
99
|
+
]:
|
|
100
|
+
is_text_candidate = True
|
|
101
|
+
|
|
102
|
+
if not is_google_doc and is_text_candidate:
|
|
103
|
+
detected = chardet.detect(raw_bytes)
|
|
104
|
+
encoding = detected.get("encoding")
|
|
105
|
+
confidence = detected.get("confidence", 0)
|
|
106
|
+
|
|
107
|
+
# 2) EUC-KR -> UTF-8
|
|
108
|
+
if (
|
|
109
|
+
encoding
|
|
110
|
+
and encoding.lower() not in ["utf-8", "ascii"]
|
|
111
|
+
and confidence > 0.6
|
|
112
|
+
):
|
|
113
|
+
try:
|
|
114
|
+
# Decode (Bytes -> Str)
|
|
115
|
+
text_content = raw_bytes.decode(encoding)
|
|
116
|
+
# Encode back to Bytes (Str -> UTF-8 Bytes)
|
|
117
|
+
final_content = text_content.encode("utf-8")
|
|
118
|
+
self._log(
|
|
119
|
+
f"Transcoded {original_name} from {encoding} to utf-8 bytes."
|
|
120
|
+
)
|
|
121
|
+
except Exception as e:
|
|
122
|
+
self._log(
|
|
123
|
+
f"Encoding conversion failed: {e}. Keeping raw bytes.",
|
|
124
|
+
level="warning",
|
|
125
|
+
)
|
|
126
|
+
final_content = raw_bytes
|
|
127
|
+
|
|
128
|
+
# 3. Return
|
|
129
|
+
return {
|
|
130
|
+
"content": final_content,
|
|
131
|
+
"meta": {
|
|
132
|
+
"source": "google_drive",
|
|
133
|
+
"file_id": file_id,
|
|
134
|
+
"mime_type": mime_type,
|
|
135
|
+
"original_filename": original_name,
|
|
136
|
+
"suggested_filename": (
|
|
137
|
+
f"{original_name}{extension}"
|
|
138
|
+
if not original_name.endswith(extension)
|
|
139
|
+
else original_name
|
|
140
|
+
),
|
|
141
|
+
"extension": extension,
|
|
142
|
+
"is_binary": isinstance(final_content, bytes),
|
|
143
|
+
},
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
except HttpError as e:
|
|
147
|
+
self._log(f"Drive Download Failed ({file_id}): {e}", level="error")
|
|
148
|
+
return {
|
|
149
|
+
"content": b"",
|
|
150
|
+
"meta": {"source": "google_drive", "error": str(e), "file_id": file_id},
|
|
151
|
+
}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Iterator
|
|
3
|
+
|
|
4
|
+
from sayou.core.registry import register_component
|
|
5
|
+
from sayou.core.schemas import SayouTask
|
|
6
|
+
|
|
7
|
+
from ..interfaces.base_generator import BaseGenerator
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
from google.oauth2.credentials import Credentials
|
|
11
|
+
from googleapiclient.discovery import build
|
|
12
|
+
except ImportError:
|
|
13
|
+
build = None
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@register_component("generator")
|
|
17
|
+
class GoogleDriveGenerator(BaseGenerator):
|
|
18
|
+
"""
|
|
19
|
+
Generates tasks for files in Google Drive.
|
|
20
|
+
URI Schema:
|
|
21
|
+
- gdrive://root (My Drive Root)
|
|
22
|
+
- gdrive://{folderID} (Specific Folder)
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
component_name = "GoogleDriveGenerator"
|
|
26
|
+
SUPPORTED_TYPES = ["drive"]
|
|
27
|
+
|
|
28
|
+
@classmethod
|
|
29
|
+
def can_handle(cls, uri: str) -> float:
|
|
30
|
+
return 1.0 if uri.startswith("gdrive://") else 0.0
|
|
31
|
+
|
|
32
|
+
def _do_generate(self, source: str, **kwargs) -> Iterator[SayouTask]:
|
|
33
|
+
# 1. Certification
|
|
34
|
+
token_path = kwargs.get("google_token_path")
|
|
35
|
+
|
|
36
|
+
if not os.path.exists(token_path):
|
|
37
|
+
raise FileNotFoundError(
|
|
38
|
+
f"Google Token not found at {token_path}. Run authentication script first."
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
creds = Credentials.from_authorized_user_file(token_path)
|
|
42
|
+
service = build("drive", "v3", credentials=creds)
|
|
43
|
+
|
|
44
|
+
# 2. Search Query
|
|
45
|
+
root_id = source.replace("gdrive://", "") or "root"
|
|
46
|
+
|
|
47
|
+
query_override = None
|
|
48
|
+
if "?" in root_id:
|
|
49
|
+
root_id, query_part = root_id.split("?", 1)
|
|
50
|
+
|
|
51
|
+
# 3. File Search (Recursive or Flat Search)
|
|
52
|
+
query = f"'{root_id}' in parents and trashed = false"
|
|
53
|
+
if root_id == "root":
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
results = (
|
|
57
|
+
service.files()
|
|
58
|
+
.list(
|
|
59
|
+
q=query,
|
|
60
|
+
pageSize=100,
|
|
61
|
+
fields="nextPageToken, files(id, name, mimeType, webViewLink, createdTime, modifiedTime)",
|
|
62
|
+
)
|
|
63
|
+
.execute()
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
files = results.get("files", [])
|
|
67
|
+
|
|
68
|
+
for file in files:
|
|
69
|
+
mime_type = file.get("mimeType")
|
|
70
|
+
file_id = file["id"]
|
|
71
|
+
file_name = file["name"]
|
|
72
|
+
|
|
73
|
+
if mime_type == "application/vnd.google-apps.document":
|
|
74
|
+
target_uri = f"gdocs://document/{file_id}"
|
|
75
|
+
source = "docs"
|
|
76
|
+
|
|
77
|
+
elif mime_type == "application/vnd.google-apps.spreadsheet":
|
|
78
|
+
target_uri = f"gsheets://spreadsheet/{file_id}"
|
|
79
|
+
source = "sheets"
|
|
80
|
+
|
|
81
|
+
elif mime_type == "application/vnd.google-apps.presentation":
|
|
82
|
+
target_uri = f"gslides://presentation/{file_id}"
|
|
83
|
+
source = "slides"
|
|
84
|
+
|
|
85
|
+
elif mime_type == "application/vnd.google-apps.folder":
|
|
86
|
+
continue
|
|
87
|
+
|
|
88
|
+
else:
|
|
89
|
+
target_uri = f"gdrive://file/{file_id}"
|
|
90
|
+
source = "drive"
|
|
91
|
+
|
|
92
|
+
yield SayouTask(
|
|
93
|
+
uri=target_uri,
|
|
94
|
+
source_type=source,
|
|
95
|
+
params={
|
|
96
|
+
"file_id": file_id,
|
|
97
|
+
"mime_type": mime_type,
|
|
98
|
+
"token_path": token_path,
|
|
99
|
+
},
|
|
100
|
+
meta={
|
|
101
|
+
"source": source,
|
|
102
|
+
"filename": file_name,
|
|
103
|
+
"file_id": file_id,
|
|
104
|
+
"mime_type": mime_type,
|
|
105
|
+
"link": file.get("webViewLink"),
|
|
106
|
+
},
|
|
107
|
+
)
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import email
|
|
2
|
+
import imaplib
|
|
3
|
+
from email.header import decode_header
|
|
4
|
+
from typing import Any, Dict
|
|
5
|
+
|
|
6
|
+
from sayou.core.registry import register_component
|
|
7
|
+
from sayou.core.schemas import SayouTask
|
|
8
|
+
|
|
9
|
+
from ..interfaces.base_fetcher import BaseFetcher
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
import html2text
|
|
13
|
+
except ImportError:
|
|
14
|
+
html2text = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@register_component("fetcher")
|
|
18
|
+
class ImapEmailFetcher(BaseFetcher):
|
|
19
|
+
"""
|
|
20
|
+
Fetches a specific email body from ANY IMAP server and converts it to HTML.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
component_name = "ImapEmailFetcher"
|
|
24
|
+
SUPPORTED_TYPES = ["imap", "email"]
|
|
25
|
+
|
|
26
|
+
@classmethod
|
|
27
|
+
def can_handle(cls, uri: str) -> float:
|
|
28
|
+
return 1.0 if uri.startswith("imap-msg://") else 0.0
|
|
29
|
+
|
|
30
|
+
def _do_fetch(self, task: SayouTask) -> Dict[str, Any]:
|
|
31
|
+
"""
|
|
32
|
+
Reconnects -> Fetches UID -> Parses -> Returns HTML String.
|
|
33
|
+
"""
|
|
34
|
+
params = task.params
|
|
35
|
+
uid = params["uid"]
|
|
36
|
+
folder = params.get("folder", "INBOX")
|
|
37
|
+
imap_server = params.get("imap_server")
|
|
38
|
+
|
|
39
|
+
if not imap_server:
|
|
40
|
+
imap_server = "imap.gmail.com"
|
|
41
|
+
|
|
42
|
+
mail = imaplib.IMAP4_SSL(imap_server)
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
mail.login(params["username"], params["password"])
|
|
46
|
+
mail.select(folder)
|
|
47
|
+
|
|
48
|
+
status, msg_data = mail.fetch(uid, "(RFC822)")
|
|
49
|
+
|
|
50
|
+
if status != "OK" or not msg_data:
|
|
51
|
+
raise ValueError(
|
|
52
|
+
f"Email UID {uid} not found or fetch failed on {imap_server}."
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
raw_email = msg_data[0][1]
|
|
56
|
+
msg = email.message_from_bytes(raw_email)
|
|
57
|
+
|
|
58
|
+
parsed_content = self._parse_email(msg)
|
|
59
|
+
|
|
60
|
+
html_doc = f"""
|
|
61
|
+
<!DOCTYPE html>
|
|
62
|
+
<html>
|
|
63
|
+
<head>
|
|
64
|
+
<title>{parsed_content['subject']}</title>
|
|
65
|
+
<meta name="sender" content="{parsed_content['sender']}">
|
|
66
|
+
<meta name="date" content="{parsed_content['date']}">
|
|
67
|
+
<meta name="uid" content="{uid}">
|
|
68
|
+
<meta name="source" content="imap">
|
|
69
|
+
<meta name="server" content="{imap_server}">
|
|
70
|
+
</head>
|
|
71
|
+
<body>
|
|
72
|
+
{parsed_content['body']}
|
|
73
|
+
</body>
|
|
74
|
+
</html>
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
return html_doc.strip()
|
|
78
|
+
|
|
79
|
+
except Exception as e:
|
|
80
|
+
raise RuntimeError(f"Failed to fetch email from {imap_server}: {e}")
|
|
81
|
+
|
|
82
|
+
finally:
|
|
83
|
+
try:
|
|
84
|
+
mail.logout()
|
|
85
|
+
except:
|
|
86
|
+
pass
|
|
87
|
+
|
|
88
|
+
def _parse_email(self, msg) -> Dict[str, Any]:
|
|
89
|
+
subject = self._decode_header(msg["Subject"])
|
|
90
|
+
sender = self._decode_header(msg["From"])
|
|
91
|
+
date = msg["Date"]
|
|
92
|
+
|
|
93
|
+
body_content = ""
|
|
94
|
+
html_found = False
|
|
95
|
+
|
|
96
|
+
if msg.is_multipart():
|
|
97
|
+
for part in msg.walk():
|
|
98
|
+
ctype = part.get_content_type()
|
|
99
|
+
payload = part.get_payload(decode=True)
|
|
100
|
+
|
|
101
|
+
if not payload:
|
|
102
|
+
continue
|
|
103
|
+
|
|
104
|
+
try:
|
|
105
|
+
text = payload.decode(
|
|
106
|
+
part.get_content_charset() or "utf-8", errors="ignore"
|
|
107
|
+
)
|
|
108
|
+
except:
|
|
109
|
+
text = payload.decode("utf-8", errors="ignore")
|
|
110
|
+
|
|
111
|
+
if ctype == "text/html":
|
|
112
|
+
body_content = text
|
|
113
|
+
html_found = True
|
|
114
|
+
|
|
115
|
+
elif ctype == "text/plain":
|
|
116
|
+
if not html_found:
|
|
117
|
+
body_content = text
|
|
118
|
+
|
|
119
|
+
else:
|
|
120
|
+
body_content = msg.get_payload(decode=True).decode(errors="ignore")
|
|
121
|
+
|
|
122
|
+
return {
|
|
123
|
+
"subject": subject,
|
|
124
|
+
"sender": sender,
|
|
125
|
+
"date": date,
|
|
126
|
+
"body": body_content,
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
def _decode_header(self, header_text):
|
|
130
|
+
"""Decodes MIME headers (e.g., =?utf-8?b?...)"""
|
|
131
|
+
if not header_text:
|
|
132
|
+
return "(No Subject)"
|
|
133
|
+
decoded_list = decode_header(header_text)
|
|
134
|
+
text = ""
|
|
135
|
+
for bytes_str, encoding in decoded_list:
|
|
136
|
+
if isinstance(bytes_str, bytes):
|
|
137
|
+
text += bytes_str.decode(encoding or "utf-8", errors="ignore")
|
|
138
|
+
else:
|
|
139
|
+
text += str(bytes_str)
|
|
140
|
+
return text
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import imaplib
|
|
2
|
+
from typing import Iterator
|
|
3
|
+
|
|
4
|
+
from sayou.core.registry import register_component
|
|
5
|
+
from sayou.core.schemas import SayouTask
|
|
6
|
+
|
|
7
|
+
from ..interfaces.base_generator import BaseGenerator
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@register_component("generator")
|
|
11
|
+
class ImapEmailGenerator(BaseGenerator):
|
|
12
|
+
"""
|
|
13
|
+
Scans Generic IMAP inbox and generates tasks for individual emails.
|
|
14
|
+
Supports Gmail, Naver, Daum, Outlook, etc.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
component_name = "ImapEmailGenerator"
|
|
18
|
+
SUPPORTED_TYPES = ["imap", "email"]
|
|
19
|
+
|
|
20
|
+
@classmethod
|
|
21
|
+
def can_handle(cls, source: str) -> float:
|
|
22
|
+
return 1.0 if source.startswith("imap://") else 0.0
|
|
23
|
+
|
|
24
|
+
def _do_generate(self, source: str, **kwargs) -> Iterator[SayouTask]:
|
|
25
|
+
"""
|
|
26
|
+
Connects to IMAP Server -> Search -> Yield Tasks.
|
|
27
|
+
"""
|
|
28
|
+
# 1. Parse connection information
|
|
29
|
+
parsed_host = source.replace("imap://", "").strip()
|
|
30
|
+
imap_server = (
|
|
31
|
+
parsed_host if parsed_host else kwargs.get("imap_server", "imap.gmail.com")
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
username = kwargs.get("username")
|
|
35
|
+
password = kwargs.get("password")
|
|
36
|
+
|
|
37
|
+
if not username or not password:
|
|
38
|
+
raise ValueError(
|
|
39
|
+
"IMAP credentials (username, password) required in kwargs."
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
folder = kwargs.get("folder", "INBOX")
|
|
43
|
+
limit = int(kwargs.get("limit", 10))
|
|
44
|
+
|
|
45
|
+
# 2. IMAP connection and search
|
|
46
|
+
mail = imaplib.IMAP4_SSL("imap.gmail.com")
|
|
47
|
+
try:
|
|
48
|
+
mail = imaplib.IMAP4_SSL(imap_server)
|
|
49
|
+
mail.login(username, password)
|
|
50
|
+
mail.select(folder)
|
|
51
|
+
|
|
52
|
+
# Search criteria (e.g., '(UNSEEN)' or 'ALL')
|
|
53
|
+
criteria = kwargs.get("search_criteria", "ALL")
|
|
54
|
+
status, messages = mail.search(None, criteria)
|
|
55
|
+
|
|
56
|
+
if status != "OK":
|
|
57
|
+
return
|
|
58
|
+
|
|
59
|
+
mail_ids = messages[0].split()
|
|
60
|
+
target_ids = mail_ids[-limit:]
|
|
61
|
+
|
|
62
|
+
self._log(
|
|
63
|
+
f"📧 [{imap_server}] Found {len(mail_ids)} emails. Generating tasks for last {len(target_ids)}."
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# 3. Task generation (one task per email)
|
|
67
|
+
for b_id in reversed(target_ids):
|
|
68
|
+
uid = b_id.decode()
|
|
69
|
+
|
|
70
|
+
# Fetcher will process this internal protocol
|
|
71
|
+
task_uri = f"imap-msg://{imap_server}/{folder}/{uid}"
|
|
72
|
+
|
|
73
|
+
yield SayouTask(
|
|
74
|
+
uri=task_uri,
|
|
75
|
+
source_type="imap",
|
|
76
|
+
params={
|
|
77
|
+
"imap_server": imap_server,
|
|
78
|
+
"username": username,
|
|
79
|
+
"password": password,
|
|
80
|
+
"uid": uid,
|
|
81
|
+
"folder": folder,
|
|
82
|
+
},
|
|
83
|
+
meta={"source": "imap", "server": imap_server, "email_id": uid},
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
except Exception as e:
|
|
87
|
+
raise RuntimeError(f"IMAP connection failed to {imap_server}: {e}")
|
|
88
|
+
finally:
|
|
89
|
+
try:
|
|
90
|
+
mail.close()
|
|
91
|
+
mail.logout()
|
|
92
|
+
except:
|
|
93
|
+
pass
|