abstractgateway 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractgateway/__init__.py +1 -2
- abstractgateway/__main__.py +7 -0
- abstractgateway/app.py +4 -4
- abstractgateway/cli.py +568 -8
- abstractgateway/config.py +15 -5
- abstractgateway/embeddings_config.py +45 -0
- abstractgateway/host_metrics.py +274 -0
- abstractgateway/hosts/bundle_host.py +528 -55
- abstractgateway/hosts/visualflow_host.py +30 -3
- abstractgateway/integrations/__init__.py +2 -0
- abstractgateway/integrations/email_bridge.py +782 -0
- abstractgateway/integrations/telegram_bridge.py +534 -0
- abstractgateway/maintenance/__init__.py +5 -0
- abstractgateway/maintenance/action_tokens.py +100 -0
- abstractgateway/maintenance/backlog_exec_runner.py +1592 -0
- abstractgateway/maintenance/backlog_parser.py +184 -0
- abstractgateway/maintenance/draft_generator.py +451 -0
- abstractgateway/maintenance/llm_assist.py +212 -0
- abstractgateway/maintenance/notifier.py +109 -0
- abstractgateway/maintenance/process_manager.py +1064 -0
- abstractgateway/maintenance/report_models.py +81 -0
- abstractgateway/maintenance/report_parser.py +219 -0
- abstractgateway/maintenance/text_similarity.py +123 -0
- abstractgateway/maintenance/triage.py +507 -0
- abstractgateway/maintenance/triage_queue.py +142 -0
- abstractgateway/migrate.py +155 -0
- abstractgateway/routes/__init__.py +2 -2
- abstractgateway/routes/gateway.py +10817 -179
- abstractgateway/routes/triage.py +118 -0
- abstractgateway/runner.py +689 -14
- abstractgateway/security/gateway_security.py +425 -110
- abstractgateway/service.py +213 -6
- abstractgateway/stores.py +64 -4
- abstractgateway/workflow_deprecations.py +225 -0
- abstractgateway-0.1.1.dist-info/METADATA +135 -0
- abstractgateway-0.1.1.dist-info/RECORD +40 -0
- abstractgateway-0.1.0.dist-info/METADATA +0 -101
- abstractgateway-0.1.0.dist-info/RECORD +0 -18
- {abstractgateway-0.1.0.dist-info → abstractgateway-0.1.1.dist-info}/WHEEL +0 -0
- {abstractgateway-0.1.0.dist-info → abstractgateway-0.1.1.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,782 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import email
|
|
4
|
+
from email.header import decode_header
|
|
5
|
+
from email.message import Message
|
|
6
|
+
import hashlib
|
|
7
|
+
import imaplib
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
import re
|
|
11
|
+
import threading
|
|
12
|
+
import time
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
from datetime import datetime, timezone
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
17
|
+
|
|
18
|
+
from abstractruntime.integrations.abstractcore.session_attachments import session_memory_owner_run_id
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _utc_now_iso() -> str:
|
|
22
|
+
return datetime.now(timezone.utc).isoformat()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _as_bool(raw: Any, default: bool) -> bool:
|
|
26
|
+
if raw is None:
|
|
27
|
+
return default
|
|
28
|
+
if isinstance(raw, bool):
|
|
29
|
+
return raw
|
|
30
|
+
s = str(raw).strip().lower()
|
|
31
|
+
if not s:
|
|
32
|
+
return default
|
|
33
|
+
if s in {"1", "true", "yes", "y", "on"}:
|
|
34
|
+
return True
|
|
35
|
+
if s in {"0", "false", "no", "n", "off"}:
|
|
36
|
+
return False
|
|
37
|
+
return default
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _as_int(raw: Any, default: int) -> int:
|
|
41
|
+
if raw is None:
|
|
42
|
+
return default
|
|
43
|
+
try:
|
|
44
|
+
return int(str(raw).strip())
|
|
45
|
+
except Exception:
|
|
46
|
+
return default
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
_MSG_ID_RE = re.compile(r"<[^>]+>")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _decode_mime_header(value: Any) -> str:
|
|
53
|
+
if not isinstance(value, str) or not value.strip():
|
|
54
|
+
return ""
|
|
55
|
+
try:
|
|
56
|
+
chunks = decode_header(value)
|
|
57
|
+
except Exception:
|
|
58
|
+
return value.strip()
|
|
59
|
+
|
|
60
|
+
out: list[str] = []
|
|
61
|
+
for part, charset in chunks:
|
|
62
|
+
if isinstance(part, bytes):
|
|
63
|
+
enc = charset or "utf-8"
|
|
64
|
+
try:
|
|
65
|
+
out.append(part.decode(enc, errors="replace"))
|
|
66
|
+
except Exception:
|
|
67
|
+
out.append(part.decode("utf-8", errors="replace"))
|
|
68
|
+
else:
|
|
69
|
+
out.append(str(part))
|
|
70
|
+
return "".join(out).strip()
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _parse_message_ids(value: Any) -> list[str]:
|
|
74
|
+
text = _decode_mime_header(value)
|
|
75
|
+
if not text:
|
|
76
|
+
return []
|
|
77
|
+
ids = _MSG_ID_RE.findall(text)
|
|
78
|
+
# Keep original casing; message ids are typically case-sensitive opaque identifiers.
|
|
79
|
+
out = []
|
|
80
|
+
seen: set[str] = set()
|
|
81
|
+
for mid in ids:
|
|
82
|
+
mid2 = str(mid).strip()
|
|
83
|
+
if not mid2 or mid2 in seen:
|
|
84
|
+
continue
|
|
85
|
+
seen.add(mid2)
|
|
86
|
+
out.append(mid2)
|
|
87
|
+
return out
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _safe_id_component(value: str, *, max_len: int = 48) -> str:
|
|
91
|
+
raw = str(value or "").strip()
|
|
92
|
+
if not raw:
|
|
93
|
+
return "default"
|
|
94
|
+
# Keep readable for common cases (emails, simple ids); fallback to hash for very long/odd strings.
|
|
95
|
+
safe = re.sub(r"[^a-zA-Z0-9_-]+", "_", raw).strip("_")
|
|
96
|
+
if not safe:
|
|
97
|
+
safe = hashlib.sha256(raw.encode("utf-8")).hexdigest()[:16]
|
|
98
|
+
if len(safe) > max_len:
|
|
99
|
+
safe = hashlib.sha256(raw.encode("utf-8")).hexdigest()[:max_len]
|
|
100
|
+
return safe
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _extract_text_bodies(msg: Message) -> tuple[str, str]:
|
|
104
|
+
"""Return (text/plain, text/html) bodies, best-effort decoded."""
|
|
105
|
+
if msg is None:
|
|
106
|
+
return "", ""
|
|
107
|
+
|
|
108
|
+
text_parts: list[str] = []
|
|
109
|
+
html_parts: list[str] = []
|
|
110
|
+
|
|
111
|
+
def _decode_part(part: Message) -> str:
|
|
112
|
+
payload = part.get_payload(decode=True)
|
|
113
|
+
if payload is None:
|
|
114
|
+
return ""
|
|
115
|
+
charset = part.get_content_charset() or "utf-8"
|
|
116
|
+
try:
|
|
117
|
+
return payload.decode(charset, errors="replace")
|
|
118
|
+
except Exception:
|
|
119
|
+
return payload.decode("utf-8", errors="replace")
|
|
120
|
+
|
|
121
|
+
if msg.is_multipart():
|
|
122
|
+
for part in msg.walk():
|
|
123
|
+
if part.is_multipart():
|
|
124
|
+
continue
|
|
125
|
+
disp = part.get_content_disposition()
|
|
126
|
+
if disp == "attachment":
|
|
127
|
+
continue
|
|
128
|
+
ctype = str(part.get_content_type() or "")
|
|
129
|
+
if ctype == "text/plain":
|
|
130
|
+
text = _decode_part(part).strip()
|
|
131
|
+
if text:
|
|
132
|
+
text_parts.append(text)
|
|
133
|
+
elif ctype == "text/html":
|
|
134
|
+
html = _decode_part(part).strip()
|
|
135
|
+
if html:
|
|
136
|
+
html_parts.append(html)
|
|
137
|
+
else:
|
|
138
|
+
ctype = str(msg.get_content_type() or "")
|
|
139
|
+
if ctype == "text/plain":
|
|
140
|
+
t = _decode_part(msg).strip()
|
|
141
|
+
if t:
|
|
142
|
+
text_parts.append(t)
|
|
143
|
+
elif ctype == "text/html":
|
|
144
|
+
h = _decode_part(msg).strip()
|
|
145
|
+
if h:
|
|
146
|
+
html_parts.append(h)
|
|
147
|
+
|
|
148
|
+
return ("\n\n".join(text_parts).strip(), "\n\n".join(html_parts).strip())
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _clamp_text(text: str, *, max_chars: int) -> str:
|
|
152
|
+
t = str(text or "")
|
|
153
|
+
try:
|
|
154
|
+
limit = int(max_chars)
|
|
155
|
+
except Exception:
|
|
156
|
+
limit = 0
|
|
157
|
+
if limit <= 0:
|
|
158
|
+
return t
|
|
159
|
+
if len(t) <= limit:
|
|
160
|
+
return t
|
|
161
|
+
#[WARNING:TRUNCATION] inbound email body text is bounded before entering durable state
|
|
162
|
+
return t[:limit] + "…"
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@dataclass(frozen=True)
|
|
166
|
+
class EmailBridgeConfig:
|
|
167
|
+
enabled: bool
|
|
168
|
+
|
|
169
|
+
event_name: str
|
|
170
|
+
session_prefix: str
|
|
171
|
+
account: str
|
|
172
|
+
|
|
173
|
+
imap_host: str
|
|
174
|
+
imap_username: str
|
|
175
|
+
imap_password_env_var: str
|
|
176
|
+
imap_folder: str
|
|
177
|
+
imap_port: int = 993
|
|
178
|
+
imap_timeout_s: float = 30.0
|
|
179
|
+
|
|
180
|
+
poll_seconds: float = 60.0
|
|
181
|
+
max_messages_per_poll: int = 50
|
|
182
|
+
|
|
183
|
+
# Optional: start a workflow per email thread/session (Telegram-bridge-style).
|
|
184
|
+
autostart_flow_id: Optional[str] = None
|
|
185
|
+
autostart_bundle_id: Optional[str] = None
|
|
186
|
+
|
|
187
|
+
# Storage and normalization caps (treat email as untrusted input).
|
|
188
|
+
store_raw_message: bool = True
|
|
189
|
+
store_attachments: bool = True
|
|
190
|
+
max_raw_bytes: int = 2 * 1024 * 1024
|
|
191
|
+
max_body_chars: int = 20_000
|
|
192
|
+
max_html_chars: int = 20_000
|
|
193
|
+
max_attachments: int = 20
|
|
194
|
+
max_attachment_bytes: int = 5 * 1024 * 1024
|
|
195
|
+
max_total_attachment_bytes: int = 15 * 1024 * 1024
|
|
196
|
+
|
|
197
|
+
state_dir: Path = Path("./runtime/email_bridge")
|
|
198
|
+
|
|
199
|
+
@staticmethod
|
|
200
|
+
def from_env(*, base_dir: Path) -> "EmailBridgeConfig":
|
|
201
|
+
enabled = _as_bool(os.getenv("ABSTRACT_EMAIL_BRIDGE"), False)
|
|
202
|
+
|
|
203
|
+
# Defaults from AbstractCore config system (best-effort).
|
|
204
|
+
cfg_smtp_host = ""
|
|
205
|
+
cfg_smtp_username = ""
|
|
206
|
+
cfg_smtp_password_env_var = "EMAIL_PASSWORD"
|
|
207
|
+
cfg_imap_host = ""
|
|
208
|
+
cfg_imap_username = ""
|
|
209
|
+
cfg_imap_password_env_var = "EMAIL_PASSWORD"
|
|
210
|
+
cfg_imap_folder = "INBOX"
|
|
211
|
+
try:
|
|
212
|
+
from abstractcore.config.manager import get_config_manager # type: ignore
|
|
213
|
+
|
|
214
|
+
core_cfg = get_config_manager().config
|
|
215
|
+
email_cfg = getattr(core_cfg, "email", None)
|
|
216
|
+
if email_cfg is not None:
|
|
217
|
+
cfg_smtp_host = str(getattr(email_cfg, "smtp_host", "") or "")
|
|
218
|
+
cfg_smtp_username = str(getattr(email_cfg, "smtp_username", "") or "")
|
|
219
|
+
cfg_smtp_password_env_var = str(getattr(email_cfg, "smtp_password_env_var", "") or "") or "EMAIL_PASSWORD"
|
|
220
|
+
cfg_imap_host = str(getattr(email_cfg, "imap_host", "") or "")
|
|
221
|
+
cfg_imap_username = str(getattr(email_cfg, "imap_username", "") or "")
|
|
222
|
+
cfg_imap_password_env_var = str(getattr(email_cfg, "imap_password_env_var", "") or "") or "EMAIL_PASSWORD"
|
|
223
|
+
cfg_imap_folder = str(getattr(email_cfg, "imap_folder", "") or "") or "INBOX"
|
|
224
|
+
except Exception:
|
|
225
|
+
pass
|
|
226
|
+
|
|
227
|
+
event_name = str(os.getenv("ABSTRACT_EMAIL_EVENT_NAME", "") or "").strip() or "email.message"
|
|
228
|
+
session_prefix = str(os.getenv("ABSTRACT_EMAIL_SESSION_PREFIX", "") or "").strip() or "email:"
|
|
229
|
+
|
|
230
|
+
account = str(os.getenv("ABSTRACT_EMAIL_ACCOUNT", "") or "").strip()
|
|
231
|
+
imap_host = str(os.getenv("ABSTRACT_EMAIL_IMAP_HOST", "") or cfg_imap_host or "").strip()
|
|
232
|
+
imap_username = str(os.getenv("ABSTRACT_EMAIL_IMAP_USERNAME", "") or cfg_imap_username or "").strip()
|
|
233
|
+
imap_password_env_var = str(os.getenv("ABSTRACT_EMAIL_IMAP_PASSWORD_ENV_VAR", "") or cfg_imap_password_env_var or "").strip() or "EMAIL_PASSWORD"
|
|
234
|
+
imap_folder = str(os.getenv("ABSTRACT_EMAIL_IMAP_FOLDER", "") or cfg_imap_folder or "").strip() or "INBOX"
|
|
235
|
+
|
|
236
|
+
poll_seconds = float(os.getenv("ABSTRACT_EMAIL_POLL_SECONDS", "60") or "60")
|
|
237
|
+
imap_port = _as_int(os.getenv("ABSTRACT_EMAIL_IMAP_PORT"), 993)
|
|
238
|
+
imap_timeout_s = float(os.getenv("ABSTRACT_EMAIL_IMAP_TIMEOUT_S", "30") or "30")
|
|
239
|
+
max_messages_per_poll = _as_int(os.getenv("ABSTRACT_EMAIL_MAX_MESSAGES_PER_POLL"), 50)
|
|
240
|
+
|
|
241
|
+
autostart_flow_id = str(os.getenv("ABSTRACT_EMAIL_FLOW_ID", "") or "").strip() or None
|
|
242
|
+
autostart_bundle_id = str(os.getenv("ABSTRACT_EMAIL_BUNDLE_ID", "") or "").strip() or None
|
|
243
|
+
|
|
244
|
+
store_raw_message = _as_bool(os.getenv("ABSTRACT_EMAIL_STORE_RAW_MESSAGE"), True)
|
|
245
|
+
store_attachments = _as_bool(os.getenv("ABSTRACT_EMAIL_STORE_ATTACHMENTS"), True)
|
|
246
|
+
|
|
247
|
+
max_raw_bytes = _as_int(os.getenv("ABSTRACT_EMAIL_MAX_RAW_BYTES"), 2 * 1024 * 1024)
|
|
248
|
+
max_body_chars = _as_int(os.getenv("ABSTRACT_EMAIL_MAX_BODY_CHARS"), 20_000)
|
|
249
|
+
max_html_chars = _as_int(os.getenv("ABSTRACT_EMAIL_MAX_HTML_CHARS"), 20_000)
|
|
250
|
+
max_attachments = _as_int(os.getenv("ABSTRACT_EMAIL_MAX_ATTACHMENTS"), 20)
|
|
251
|
+
max_attachment_bytes = _as_int(os.getenv("ABSTRACT_EMAIL_MAX_ATTACHMENT_BYTES"), 5 * 1024 * 1024)
|
|
252
|
+
max_total_attachment_bytes = _as_int(os.getenv("ABSTRACT_EMAIL_MAX_TOTAL_ATTACHMENT_BYTES"), 15 * 1024 * 1024)
|
|
253
|
+
|
|
254
|
+
state_dir = Path(base_dir) / "email_bridge"
|
|
255
|
+
|
|
256
|
+
# Use outbound defaults as a fallback account label if no explicit account is set.
|
|
257
|
+
if not account:
|
|
258
|
+
account = str(os.getenv("ABSTRACT_EMAIL_FROM", "") or cfg_smtp_username or imap_username or "default").strip()
|
|
259
|
+
|
|
260
|
+
return EmailBridgeConfig(
|
|
261
|
+
enabled=bool(enabled),
|
|
262
|
+
event_name=event_name,
|
|
263
|
+
session_prefix=session_prefix,
|
|
264
|
+
account=account,
|
|
265
|
+
imap_host=imap_host,
|
|
266
|
+
imap_username=imap_username,
|
|
267
|
+
imap_password_env_var=imap_password_env_var,
|
|
268
|
+
imap_folder=imap_folder,
|
|
269
|
+
imap_port=max(1, int(imap_port)),
|
|
270
|
+
imap_timeout_s=max(1.0, float(imap_timeout_s)),
|
|
271
|
+
poll_seconds=max(1.0, float(poll_seconds)),
|
|
272
|
+
max_messages_per_poll=max(1, int(max_messages_per_poll)),
|
|
273
|
+
autostart_flow_id=autostart_flow_id,
|
|
274
|
+
autostart_bundle_id=autostart_bundle_id,
|
|
275
|
+
store_raw_message=bool(store_raw_message),
|
|
276
|
+
store_attachments=bool(store_attachments),
|
|
277
|
+
max_raw_bytes=max(1, int(max_raw_bytes)),
|
|
278
|
+
max_body_chars=max(0, int(max_body_chars)),
|
|
279
|
+
max_html_chars=max(0, int(max_html_chars)),
|
|
280
|
+
max_attachments=max(0, int(max_attachments)),
|
|
281
|
+
max_attachment_bytes=max(1, int(max_attachment_bytes)),
|
|
282
|
+
max_total_attachment_bytes=max(1, int(max_total_attachment_bytes)),
|
|
283
|
+
state_dir=state_dir,
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
class EmailBridge:
|
|
288
|
+
"""Bridge inbound IMAP email messages to AbstractGateway events."""
|
|
289
|
+
|
|
290
|
+
def __init__(self, *, config: EmailBridgeConfig, host: Any, runner: Any, artifact_store: Any) -> None:
|
|
291
|
+
self._cfg = config
|
|
292
|
+
self._host = host
|
|
293
|
+
self._runner = runner
|
|
294
|
+
self._artifact_store = artifact_store
|
|
295
|
+
|
|
296
|
+
self._lock = threading.Lock()
|
|
297
|
+
self._state: Dict[str, Any] = {}
|
|
298
|
+
|
|
299
|
+
self._stop = threading.Event()
|
|
300
|
+
self._thread: Optional[threading.Thread] = None
|
|
301
|
+
|
|
302
|
+
@property
|
|
303
|
+
def enabled(self) -> bool:
|
|
304
|
+
return bool(self._cfg.enabled)
|
|
305
|
+
|
|
306
|
+
@property
|
|
307
|
+
def state_path(self) -> Path:
|
|
308
|
+
return Path(self._cfg.state_dir) / "state.json"
|
|
309
|
+
|
|
310
|
+
def start(self) -> None:
|
|
311
|
+
if not self._cfg.enabled:
|
|
312
|
+
return
|
|
313
|
+
if not self._cfg.imap_host or not self._cfg.imap_username:
|
|
314
|
+
raise ValueError("Email bridge is enabled but IMAP host/username are missing (ABSTRACT_EMAIL_IMAP_HOST/USERNAME)")
|
|
315
|
+
|
|
316
|
+
self._load_state()
|
|
317
|
+
|
|
318
|
+
if self._thread is not None and self._thread.is_alive():
|
|
319
|
+
return
|
|
320
|
+
self._stop.clear()
|
|
321
|
+
self._thread = threading.Thread(target=self._loop, name="email-bridge", daemon=True)
|
|
322
|
+
self._thread.start()
|
|
323
|
+
|
|
324
|
+
def stop(self) -> None:
|
|
325
|
+
self._stop.set()
|
|
326
|
+
if self._thread is not None:
|
|
327
|
+
try:
|
|
328
|
+
self._thread.join(timeout=3.0)
|
|
329
|
+
except Exception:
|
|
330
|
+
pass
|
|
331
|
+
self._thread = None
|
|
332
|
+
|
|
333
|
+
# ---------------------------------------------------------------------
|
|
334
|
+
# State (cursor + optional bindings)
|
|
335
|
+
# ---------------------------------------------------------------------
|
|
336
|
+
|
|
337
|
+
def _load_state(self) -> None:
|
|
338
|
+
path = self.state_path
|
|
339
|
+
try:
|
|
340
|
+
if path.exists():
|
|
341
|
+
obj = json.loads(path.read_text(encoding="utf-8"))
|
|
342
|
+
if isinstance(obj, dict):
|
|
343
|
+
self._state = obj
|
|
344
|
+
except Exception:
|
|
345
|
+
self._state = {}
|
|
346
|
+
self._state.setdefault("version", 1)
|
|
347
|
+
self._state.setdefault("cursors", {})
|
|
348
|
+
self._state.setdefault("bindings", {}) # session_id -> {run_id, ...}
|
|
349
|
+
|
|
350
|
+
def _save_state(self) -> None:
|
|
351
|
+
path = self.state_path
|
|
352
|
+
try:
|
|
353
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
354
|
+
tmp = path.with_suffix(".tmp")
|
|
355
|
+
tmp.write_text(json.dumps(self._state, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
356
|
+
tmp.replace(path)
|
|
357
|
+
except Exception:
|
|
358
|
+
pass
|
|
359
|
+
|
|
360
|
+
def _cursor_key(self) -> str:
|
|
361
|
+
# Keyed by account+folder so multiple bridges/accounts can share a state dir safely.
|
|
362
|
+
account = _safe_id_component(self._cfg.account)
|
|
363
|
+
folder = _safe_id_component(self._cfg.imap_folder, max_len=64)
|
|
364
|
+
return f"{account}:{folder}"
|
|
365
|
+
|
|
366
|
+
def _get_last_uid(self) -> int:
|
|
367
|
+
cursors = self._state.get("cursors")
|
|
368
|
+
if not isinstance(cursors, dict):
|
|
369
|
+
return 0
|
|
370
|
+
entry = cursors.get(self._cursor_key())
|
|
371
|
+
if isinstance(entry, dict):
|
|
372
|
+
raw = entry.get("last_uid")
|
|
373
|
+
else:
|
|
374
|
+
raw = entry
|
|
375
|
+
try:
|
|
376
|
+
return int(raw or 0)
|
|
377
|
+
except Exception:
|
|
378
|
+
return 0
|
|
379
|
+
|
|
380
|
+
def _set_last_uid(self, uid: int) -> None:
|
|
381
|
+
cursors = self._state.setdefault("cursors", {})
|
|
382
|
+
if not isinstance(cursors, dict):
|
|
383
|
+
self._state["cursors"] = {}
|
|
384
|
+
cursors = self._state["cursors"]
|
|
385
|
+
cursors[self._cursor_key()] = {"last_uid": int(uid), "updated_at": _utc_now_iso()}
|
|
386
|
+
|
|
387
|
+
def _binding_for_session(self, session_id: str) -> Optional[Dict[str, Any]]:
|
|
388
|
+
b = self._state.get("bindings")
|
|
389
|
+
if not isinstance(b, dict):
|
|
390
|
+
return None
|
|
391
|
+
entry = b.get(str(session_id))
|
|
392
|
+
return entry if isinstance(entry, dict) else None
|
|
393
|
+
|
|
394
|
+
def _ensure_binding(self, *, session_id: str, thread_key: str) -> Optional[Dict[str, Any]]:
|
|
395
|
+
if not self._cfg.autostart_flow_id:
|
|
396
|
+
return None
|
|
397
|
+
with self._lock:
|
|
398
|
+
existing = self._binding_for_session(session_id)
|
|
399
|
+
if isinstance(existing, dict):
|
|
400
|
+
return existing
|
|
401
|
+
|
|
402
|
+
try:
|
|
403
|
+
run_id = self._host.start_run(
|
|
404
|
+
flow_id=self._cfg.autostart_flow_id,
|
|
405
|
+
bundle_id=self._cfg.autostart_bundle_id,
|
|
406
|
+
input_data={"email": {"thread_key": thread_key, "session_id": session_id}},
|
|
407
|
+
actor_id="email",
|
|
408
|
+
session_id=session_id,
|
|
409
|
+
)
|
|
410
|
+
except Exception:
|
|
411
|
+
return None
|
|
412
|
+
|
|
413
|
+
binding = {
|
|
414
|
+
"session_id": session_id,
|
|
415
|
+
"thread_key": thread_key,
|
|
416
|
+
"run_id": str(run_id),
|
|
417
|
+
"flow_id": self._cfg.autostart_flow_id,
|
|
418
|
+
"bundle_id": self._cfg.autostart_bundle_id,
|
|
419
|
+
"created_at": _utc_now_iso(),
|
|
420
|
+
"updated_at": _utc_now_iso(),
|
|
421
|
+
}
|
|
422
|
+
bindings = self._state.setdefault("bindings", {})
|
|
423
|
+
if isinstance(bindings, dict):
|
|
424
|
+
bindings[str(session_id)] = binding
|
|
425
|
+
self._save_state()
|
|
426
|
+
return binding
|
|
427
|
+
|
|
428
|
+
# ---------------------------------------------------------------------
|
|
429
|
+
# Polling loop
|
|
430
|
+
# ---------------------------------------------------------------------
|
|
431
|
+
|
|
432
|
+
def _resolve_password(self) -> tuple[Optional[str], Optional[str]]:
|
|
433
|
+
ref = str(self._cfg.imap_password_env_var or "").strip() or "EMAIL_PASSWORD"
|
|
434
|
+
v = os.getenv(ref)
|
|
435
|
+
if v is not None and str(v).strip():
|
|
436
|
+
return str(v).strip(), None
|
|
437
|
+
|
|
438
|
+
# Fail fast for conventional env var names (avoid silently using a name as a password).
|
|
439
|
+
if re.fullmatch(r"[A-Za-z_][A-Za-z0-9_]*", ref):
|
|
440
|
+
return None, f"Missing IMAP password env var {ref}"
|
|
441
|
+
|
|
442
|
+
# Otherwise: treat the reference as a literal secret.
|
|
443
|
+
return ref, None
|
|
444
|
+
|
|
445
|
+
def _connect_imap(self) -> tuple[Optional[imaplib.IMAP4_SSL], Optional[str]]:
|
|
446
|
+
password, err = self._resolve_password()
|
|
447
|
+
if err is not None:
|
|
448
|
+
return None, err
|
|
449
|
+
try:
|
|
450
|
+
client = imaplib.IMAP4_SSL(self._cfg.imap_host, int(self._cfg.imap_port))
|
|
451
|
+
try:
|
|
452
|
+
if getattr(client, "sock", None) is not None:
|
|
453
|
+
client.sock.settimeout(float(self._cfg.imap_timeout_s)) # type: ignore[attr-defined]
|
|
454
|
+
except Exception:
|
|
455
|
+
pass
|
|
456
|
+
client.login(self._cfg.imap_username, password)
|
|
457
|
+
typ, _ = client.select(self._cfg.imap_folder, readonly=True)
|
|
458
|
+
if typ != "OK":
|
|
459
|
+
try:
|
|
460
|
+
client.logout()
|
|
461
|
+
except Exception:
|
|
462
|
+
pass
|
|
463
|
+
return None, f"Failed to select mailbox: {self._cfg.imap_folder}"
|
|
464
|
+
return client, None
|
|
465
|
+
except Exception as e:
|
|
466
|
+
return None, str(e)
|
|
467
|
+
|
|
468
|
+
def _loop(self) -> None:
|
|
469
|
+
while not self._stop.is_set():
|
|
470
|
+
try:
|
|
471
|
+
self.poll_once()
|
|
472
|
+
except Exception:
|
|
473
|
+
# Best-effort: never crash the runner thread.
|
|
474
|
+
pass
|
|
475
|
+
self._stop.wait(timeout=float(self._cfg.poll_seconds))
|
|
476
|
+
|
|
477
|
+
def poll_once(self) -> int:
|
|
478
|
+
"""Poll IMAP once and process new messages (deterministic; used by tests)."""
|
|
479
|
+
if not self._cfg.enabled:
|
|
480
|
+
return 0
|
|
481
|
+
|
|
482
|
+
with self._lock:
|
|
483
|
+
last_uid = self._get_last_uid()
|
|
484
|
+
|
|
485
|
+
client, err = self._connect_imap()
|
|
486
|
+
if err is not None or client is None:
|
|
487
|
+
return 0
|
|
488
|
+
|
|
489
|
+
processed = 0
|
|
490
|
+
try:
|
|
491
|
+
# Search new UIDs.
|
|
492
|
+
search_query = f"UID {int(last_uid) + 1}:*"
|
|
493
|
+
typ, data = client.uid("search", None, search_query)
|
|
494
|
+
if typ != "OK" or not data:
|
|
495
|
+
return 0
|
|
496
|
+
raw_uids = data[0] if isinstance(data, list) and data else b""
|
|
497
|
+
if not isinstance(raw_uids, (bytes, bytearray)):
|
|
498
|
+
raw_uids = str(raw_uids).encode("utf-8", errors="replace")
|
|
499
|
+
uids = [u.decode("utf-8", errors="replace") for u in bytes(raw_uids).split() if u]
|
|
500
|
+
|
|
501
|
+
def _as_uid(u: str) -> int:
|
|
502
|
+
try:
|
|
503
|
+
return int(str(u).strip())
|
|
504
|
+
except Exception:
|
|
505
|
+
return -1
|
|
506
|
+
|
|
507
|
+
uids_sorted = sorted({u for u in uids if _as_uid(u) > int(last_uid)}, key=_as_uid)
|
|
508
|
+
if not uids_sorted:
|
|
509
|
+
return 0
|
|
510
|
+
|
|
511
|
+
limit = max(1, int(self._cfg.max_messages_per_poll))
|
|
512
|
+
uids_sorted = uids_sorted[:limit]
|
|
513
|
+
|
|
514
|
+
for uid in uids_sorted:
|
|
515
|
+
uid_i = _as_uid(uid)
|
|
516
|
+
if uid_i <= int(last_uid):
|
|
517
|
+
continue
|
|
518
|
+
ok = self._process_uid(client, uid=uid, uid_i=uid_i)
|
|
519
|
+
if ok:
|
|
520
|
+
processed += 1
|
|
521
|
+
last_uid = uid_i
|
|
522
|
+
with self._lock:
|
|
523
|
+
self._set_last_uid(uid_i)
|
|
524
|
+
self._save_state()
|
|
525
|
+
finally:
|
|
526
|
+
try:
|
|
527
|
+
client.logout()
|
|
528
|
+
except Exception:
|
|
529
|
+
pass
|
|
530
|
+
|
|
531
|
+
return processed
|
|
532
|
+
|
|
533
|
+
def _process_uid(self, client: imaplib.IMAP4_SSL, *, uid: str, uid_i: int) -> bool:
|
|
534
|
+
# Fetch full message bytes.
|
|
535
|
+
typ, fetched = client.uid("fetch", str(uid), "(FLAGS BODY.PEEK[])")
|
|
536
|
+
if typ != "OK" or not fetched:
|
|
537
|
+
return False
|
|
538
|
+
|
|
539
|
+
raw_bytes: Optional[bytes] = None
|
|
540
|
+
flags: list[str] = []
|
|
541
|
+
for item in fetched:
|
|
542
|
+
if not isinstance(item, tuple) or len(item) < 2:
|
|
543
|
+
continue
|
|
544
|
+
meta, payload = item[0], item[1]
|
|
545
|
+
if isinstance(payload, (bytes, bytearray)) and payload:
|
|
546
|
+
raw_bytes = bytes(payload)
|
|
547
|
+
if isinstance(meta, (bytes, bytearray)):
|
|
548
|
+
try:
|
|
549
|
+
flags_bytes = imaplib.ParseFlags(meta)
|
|
550
|
+
flags = [fb.decode("utf-8", errors="replace") for fb in flags_bytes]
|
|
551
|
+
except Exception:
|
|
552
|
+
flags = []
|
|
553
|
+
|
|
554
|
+
if raw_bytes is None:
|
|
555
|
+
return False
|
|
556
|
+
|
|
557
|
+
event_payload, session_id, thread_key = self._normalize_message(uid=str(uid), raw_bytes=raw_bytes, flags=flags)
|
|
558
|
+
|
|
559
|
+
# Optional: start the per-thread workflow run.
|
|
560
|
+
binding = self._ensure_binding(session_id=session_id, thread_key=thread_key)
|
|
561
|
+
if isinstance(binding, dict):
|
|
562
|
+
event_payload["binding"] = {"run_id": str(binding.get("run_id") or "")}
|
|
563
|
+
|
|
564
|
+
self._runner.emit_event(
|
|
565
|
+
name=self._cfg.event_name,
|
|
566
|
+
session_id=session_id,
|
|
567
|
+
scope="session",
|
|
568
|
+
payload={"email": event_payload},
|
|
569
|
+
client_id="email",
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
return True
|
|
573
|
+
|
|
574
|
+
def _normalize_message(self, *, uid: str, raw_bytes: bytes, flags: list[str]) -> tuple[Dict[str, Any], str, str]:
|
|
575
|
+
msg = email.message_from_bytes(raw_bytes)
|
|
576
|
+
subject_v = _decode_mime_header(msg.get("Subject"))
|
|
577
|
+
from_v = _decode_mime_header(msg.get("From"))
|
|
578
|
+
to_v = _decode_mime_header(msg.get("To"))
|
|
579
|
+
cc_v = _decode_mime_header(msg.get("Cc"))
|
|
580
|
+
date_v = _decode_mime_header(msg.get("Date"))
|
|
581
|
+
message_id = _decode_mime_header(msg.get("Message-ID"))
|
|
582
|
+
|
|
583
|
+
in_reply_to = _parse_message_ids(msg.get("In-Reply-To"))
|
|
584
|
+
references = _parse_message_ids(msg.get("References"))
|
|
585
|
+
|
|
586
|
+
thread_root = ""
|
|
587
|
+
if references:
|
|
588
|
+
thread_root = references[0]
|
|
589
|
+
elif in_reply_to:
|
|
590
|
+
thread_root = in_reply_to[0]
|
|
591
|
+
elif message_id:
|
|
592
|
+
thread_root = message_id
|
|
593
|
+
else:
|
|
594
|
+
thread_root = f"{from_v}\n{subject_v}".strip() or uid
|
|
595
|
+
|
|
596
|
+
thread_key = hashlib.sha256(str(thread_root).encode("utf-8")).hexdigest()[:16]
|
|
597
|
+
|
|
598
|
+
account_key = _safe_id_component(self._cfg.account)
|
|
599
|
+
prefix = str(self._cfg.session_prefix or "email:").strip() or "email:"
|
|
600
|
+
if not prefix.endswith(":"):
|
|
601
|
+
prefix = prefix + ":"
|
|
602
|
+
session_id = f"{prefix}{account_key}:{thread_key}"
|
|
603
|
+
|
|
604
|
+
rid = session_memory_owner_run_id(session_id)
|
|
605
|
+
|
|
606
|
+
raw_meta: Optional[Dict[str, Any]] = None
|
|
607
|
+
if self._cfg.store_raw_message:
|
|
608
|
+
raw_cap = max(1, int(self._cfg.max_raw_bytes))
|
|
609
|
+
raw_truncated = len(raw_bytes) > raw_cap
|
|
610
|
+
raw_to_store = raw_bytes[:raw_cap] if raw_truncated else raw_bytes
|
|
611
|
+
tags = {
|
|
612
|
+
"kind": "email_raw",
|
|
613
|
+
"source": "email_bridge",
|
|
614
|
+
"session_id": session_id,
|
|
615
|
+
"account": account_key,
|
|
616
|
+
"mailbox": str(self._cfg.imap_folder or ""),
|
|
617
|
+
"uid": str(uid),
|
|
618
|
+
"thread_key": thread_key,
|
|
619
|
+
"message_id": str(message_id or ""),
|
|
620
|
+
}
|
|
621
|
+
try:
|
|
622
|
+
meta = self._artifact_store.store(bytes(raw_to_store), content_type="message/rfc822", run_id=str(rid), tags=tags)
|
|
623
|
+
raw_meta = {
|
|
624
|
+
"artifact_id": str(getattr(meta, "artifact_id", "") or ""),
|
|
625
|
+
"blob_id": str(getattr(meta, "blob_id", "") or ""),
|
|
626
|
+
"size_bytes": int(getattr(meta, "size_bytes", 0) or 0),
|
|
627
|
+
"truncated": bool(raw_truncated),
|
|
628
|
+
"total_bytes": int(len(raw_bytes)),
|
|
629
|
+
}
|
|
630
|
+
except Exception:
|
|
631
|
+
raw_meta = None
|
|
632
|
+
|
|
633
|
+
body_text, body_html = _extract_text_bodies(msg)
|
|
634
|
+
body_text = _clamp_text(body_text, max_chars=int(self._cfg.max_body_chars))
|
|
635
|
+
body_html = _clamp_text(body_html, max_chars=int(self._cfg.max_html_chars))
|
|
636
|
+
|
|
637
|
+
attachments: list[Dict[str, Any]] = []
|
|
638
|
+
skipped_attachments: list[Dict[str, Any]] = []
|
|
639
|
+
|
|
640
|
+
if self._cfg.store_attachments and msg.is_multipart():
|
|
641
|
+
# Dedupe by (handle, sha256) within the session attachment registry.
|
|
642
|
+
existing: list[Any]
|
|
643
|
+
try:
|
|
644
|
+
existing = self._artifact_store.list_by_run(str(rid)) or []
|
|
645
|
+
except Exception:
|
|
646
|
+
existing = []
|
|
647
|
+
existing_index: Dict[Tuple[str, str], Any] = {}
|
|
648
|
+
for m in existing:
|
|
649
|
+
tags = getattr(m, "tags", None)
|
|
650
|
+
if not isinstance(tags, dict):
|
|
651
|
+
continue
|
|
652
|
+
if str(tags.get("kind") or "") != "attachment":
|
|
653
|
+
continue
|
|
654
|
+
handle = str(tags.get("path") or "").strip()
|
|
655
|
+
sha256 = str(tags.get("sha256") or "").strip().lower()
|
|
656
|
+
if handle and sha256:
|
|
657
|
+
existing_index[(handle, sha256)] = m
|
|
658
|
+
|
|
659
|
+
max_count = max(0, int(self._cfg.max_attachments))
|
|
660
|
+
max_each = max(1, int(self._cfg.max_attachment_bytes))
|
|
661
|
+
max_total = max(1, int(self._cfg.max_total_attachment_bytes))
|
|
662
|
+
total = 0
|
|
663
|
+
idx = 0
|
|
664
|
+
|
|
665
|
+
for part in msg.walk():
|
|
666
|
+
if part.is_multipart():
|
|
667
|
+
continue
|
|
668
|
+
disp = part.get_content_disposition()
|
|
669
|
+
filename = _decode_mime_header(part.get_filename())
|
|
670
|
+
if disp != "attachment" and not filename:
|
|
671
|
+
continue
|
|
672
|
+
|
|
673
|
+
idx += 1
|
|
674
|
+
if max_count and len(attachments) >= max_count:
|
|
675
|
+
skipped_attachments.append({"reason": "max_attachments", "filename": filename or "", "content_type": str(part.get_content_type() or "")})
|
|
676
|
+
continue
|
|
677
|
+
|
|
678
|
+
payload = part.get_payload(decode=True)
|
|
679
|
+
if payload is None:
|
|
680
|
+
continue
|
|
681
|
+
content = bytes(payload)
|
|
682
|
+
if not content:
|
|
683
|
+
continue
|
|
684
|
+
|
|
685
|
+
if len(content) > max_each:
|
|
686
|
+
skipped_attachments.append(
|
|
687
|
+
{
|
|
688
|
+
"reason": "max_attachment_bytes",
|
|
689
|
+
"filename": filename or "",
|
|
690
|
+
"content_type": str(part.get_content_type() or ""),
|
|
691
|
+
"size_bytes": int(len(content)),
|
|
692
|
+
}
|
|
693
|
+
)
|
|
694
|
+
continue
|
|
695
|
+
|
|
696
|
+
if total + len(content) > max_total:
|
|
697
|
+
skipped_attachments.append(
|
|
698
|
+
{
|
|
699
|
+
"reason": "max_total_attachment_bytes",
|
|
700
|
+
"filename": filename or "",
|
|
701
|
+
"content_type": str(part.get_content_type() or ""),
|
|
702
|
+
"size_bytes": int(len(content)),
|
|
703
|
+
}
|
|
704
|
+
)
|
|
705
|
+
continue
|
|
706
|
+
|
|
707
|
+
filename2 = filename or f"attachment_{idx}"
|
|
708
|
+
handle = f"email/{account_key}/{thread_key}/{filename2}"
|
|
709
|
+
sha256 = hashlib.sha256(content).hexdigest()
|
|
710
|
+
|
|
711
|
+
existing_meta = existing_index.get((handle, sha256))
|
|
712
|
+
if existing_meta is not None:
|
|
713
|
+
attachments.append(
|
|
714
|
+
{
|
|
715
|
+
"artifact_id": str(getattr(existing_meta, "artifact_id", "") or ""),
|
|
716
|
+
"blob_id": str(getattr(existing_meta, "blob_id", "") or ""),
|
|
717
|
+
"sha256": sha256,
|
|
718
|
+
"handle": handle,
|
|
719
|
+
"filename": filename2,
|
|
720
|
+
"content_type": str(getattr(existing_meta, "content_type", "") or str(part.get_content_type() or "")),
|
|
721
|
+
"size_bytes": int(getattr(existing_meta, "size_bytes", 0) or len(content)),
|
|
722
|
+
"deduped": True,
|
|
723
|
+
}
|
|
724
|
+
)
|
|
725
|
+
total += len(content)
|
|
726
|
+
continue
|
|
727
|
+
|
|
728
|
+
tags2 = {
|
|
729
|
+
"kind": "attachment",
|
|
730
|
+
"source": "email",
|
|
731
|
+
"path": handle,
|
|
732
|
+
"filename": filename2,
|
|
733
|
+
"session_id": session_id,
|
|
734
|
+
"sha256": sha256,
|
|
735
|
+
"email_uid": str(uid),
|
|
736
|
+
"thread_key": thread_key,
|
|
737
|
+
}
|
|
738
|
+
try:
|
|
739
|
+
meta2 = self._artifact_store.store(content, content_type=str(part.get_content_type() or "application/octet-stream"), run_id=str(rid), tags=tags2)
|
|
740
|
+
except Exception:
|
|
741
|
+
skipped_attachments.append(
|
|
742
|
+
{"reason": "store_failed", "filename": filename2, "content_type": str(part.get_content_type() or ""), "size_bytes": int(len(content))}
|
|
743
|
+
)
|
|
744
|
+
continue
|
|
745
|
+
|
|
746
|
+
attachments.append(
|
|
747
|
+
{
|
|
748
|
+
"artifact_id": str(getattr(meta2, "artifact_id", "") or ""),
|
|
749
|
+
"blob_id": str(getattr(meta2, "blob_id", "") or ""),
|
|
750
|
+
"sha256": sha256,
|
|
751
|
+
"handle": handle,
|
|
752
|
+
"filename": filename2,
|
|
753
|
+
"content_type": str(getattr(meta2, "content_type", "") or ""),
|
|
754
|
+
"size_bytes": int(getattr(meta2, "size_bytes", 0) or 0),
|
|
755
|
+
"deduped": False,
|
|
756
|
+
}
|
|
757
|
+
)
|
|
758
|
+
total += len(content)
|
|
759
|
+
|
|
760
|
+
payload: Dict[str, Any] = {
|
|
761
|
+
"bridge": {"version": 1, "received_at": _utc_now_iso()},
|
|
762
|
+
"account": account_key,
|
|
763
|
+
"mailbox": str(self._cfg.imap_folder or ""),
|
|
764
|
+
"uid": str(uid),
|
|
765
|
+
"message_id": message_id,
|
|
766
|
+
"thread_key": thread_key,
|
|
767
|
+
"thread_root_message_id": thread_root,
|
|
768
|
+
"in_reply_to": in_reply_to,
|
|
769
|
+
"references": references,
|
|
770
|
+
"from": from_v,
|
|
771
|
+
"to": to_v,
|
|
772
|
+
"cc": cc_v,
|
|
773
|
+
"subject": subject_v,
|
|
774
|
+
"date": date_v,
|
|
775
|
+
"flags": list(flags),
|
|
776
|
+
"seen": any(str(f).lstrip("\\").lower() == "seen" for f in flags or []),
|
|
777
|
+
"body_text": body_text,
|
|
778
|
+
"body_html": body_html,
|
|
779
|
+
"artifacts": {"raw": raw_meta, "attachments": attachments, "skipped_attachments": skipped_attachments},
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
return payload, session_id, thread_key
|