abstractgateway 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. abstractgateway/__init__.py +1 -2
  2. abstractgateway/__main__.py +7 -0
  3. abstractgateway/app.py +4 -4
  4. abstractgateway/cli.py +568 -8
  5. abstractgateway/config.py +15 -5
  6. abstractgateway/embeddings_config.py +45 -0
  7. abstractgateway/host_metrics.py +274 -0
  8. abstractgateway/hosts/bundle_host.py +528 -55
  9. abstractgateway/hosts/visualflow_host.py +30 -3
  10. abstractgateway/integrations/__init__.py +2 -0
  11. abstractgateway/integrations/email_bridge.py +782 -0
  12. abstractgateway/integrations/telegram_bridge.py +534 -0
  13. abstractgateway/maintenance/__init__.py +5 -0
  14. abstractgateway/maintenance/action_tokens.py +100 -0
  15. abstractgateway/maintenance/backlog_exec_runner.py +1592 -0
  16. abstractgateway/maintenance/backlog_parser.py +184 -0
  17. abstractgateway/maintenance/draft_generator.py +451 -0
  18. abstractgateway/maintenance/llm_assist.py +212 -0
  19. abstractgateway/maintenance/notifier.py +109 -0
  20. abstractgateway/maintenance/process_manager.py +1064 -0
  21. abstractgateway/maintenance/report_models.py +81 -0
  22. abstractgateway/maintenance/report_parser.py +219 -0
  23. abstractgateway/maintenance/text_similarity.py +123 -0
  24. abstractgateway/maintenance/triage.py +507 -0
  25. abstractgateway/maintenance/triage_queue.py +142 -0
  26. abstractgateway/migrate.py +155 -0
  27. abstractgateway/routes/__init__.py +2 -2
  28. abstractgateway/routes/gateway.py +10817 -179
  29. abstractgateway/routes/triage.py +118 -0
  30. abstractgateway/runner.py +689 -14
  31. abstractgateway/security/gateway_security.py +425 -110
  32. abstractgateway/service.py +213 -6
  33. abstractgateway/stores.py +64 -4
  34. abstractgateway/workflow_deprecations.py +225 -0
  35. abstractgateway-0.1.1.dist-info/METADATA +135 -0
  36. abstractgateway-0.1.1.dist-info/RECORD +40 -0
  37. abstractgateway-0.1.0.dist-info/METADATA +0 -101
  38. abstractgateway-0.1.0.dist-info/RECORD +0 -18
  39. {abstractgateway-0.1.0.dist-info → abstractgateway-0.1.1.dist-info}/WHEEL +0 -0
  40. {abstractgateway-0.1.0.dist-info → abstractgateway-0.1.1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,782 @@
1
+ from __future__ import annotations
2
+
3
+ import email
4
+ from email.header import decode_header
5
+ from email.message import Message
6
+ import hashlib
7
+ import imaplib
8
+ import json
9
+ import os
10
+ import re
11
+ import threading
12
+ import time
13
+ from dataclasses import dataclass
14
+ from datetime import datetime, timezone
15
+ from pathlib import Path
16
+ from typing import Any, Dict, List, Optional, Tuple
17
+
18
+ from abstractruntime.integrations.abstractcore.session_attachments import session_memory_owner_run_id
19
+
20
+
21
+ def _utc_now_iso() -> str:
22
+ return datetime.now(timezone.utc).isoformat()
23
+
24
+
25
+ def _as_bool(raw: Any, default: bool) -> bool:
26
+ if raw is None:
27
+ return default
28
+ if isinstance(raw, bool):
29
+ return raw
30
+ s = str(raw).strip().lower()
31
+ if not s:
32
+ return default
33
+ if s in {"1", "true", "yes", "y", "on"}:
34
+ return True
35
+ if s in {"0", "false", "no", "n", "off"}:
36
+ return False
37
+ return default
38
+
39
+
40
+ def _as_int(raw: Any, default: int) -> int:
41
+ if raw is None:
42
+ return default
43
+ try:
44
+ return int(str(raw).strip())
45
+ except Exception:
46
+ return default
47
+
48
+
49
+ _MSG_ID_RE = re.compile(r"<[^>]+>")
50
+
51
+
52
+ def _decode_mime_header(value: Any) -> str:
53
+ if not isinstance(value, str) or not value.strip():
54
+ return ""
55
+ try:
56
+ chunks = decode_header(value)
57
+ except Exception:
58
+ return value.strip()
59
+
60
+ out: list[str] = []
61
+ for part, charset in chunks:
62
+ if isinstance(part, bytes):
63
+ enc = charset or "utf-8"
64
+ try:
65
+ out.append(part.decode(enc, errors="replace"))
66
+ except Exception:
67
+ out.append(part.decode("utf-8", errors="replace"))
68
+ else:
69
+ out.append(str(part))
70
+ return "".join(out).strip()
71
+
72
+
73
+ def _parse_message_ids(value: Any) -> list[str]:
74
+ text = _decode_mime_header(value)
75
+ if not text:
76
+ return []
77
+ ids = _MSG_ID_RE.findall(text)
78
+ # Keep original casing; message ids are typically case-sensitive opaque identifiers.
79
+ out = []
80
+ seen: set[str] = set()
81
+ for mid in ids:
82
+ mid2 = str(mid).strip()
83
+ if not mid2 or mid2 in seen:
84
+ continue
85
+ seen.add(mid2)
86
+ out.append(mid2)
87
+ return out
88
+
89
+
90
+ def _safe_id_component(value: str, *, max_len: int = 48) -> str:
91
+ raw = str(value or "").strip()
92
+ if not raw:
93
+ return "default"
94
+ # Keep readable for common cases (emails, simple ids); fallback to hash for very long/odd strings.
95
+ safe = re.sub(r"[^a-zA-Z0-9_-]+", "_", raw).strip("_")
96
+ if not safe:
97
+ safe = hashlib.sha256(raw.encode("utf-8")).hexdigest()[:16]
98
+ if len(safe) > max_len:
99
+ safe = hashlib.sha256(raw.encode("utf-8")).hexdigest()[:max_len]
100
+ return safe
101
+
102
+
103
+ def _extract_text_bodies(msg: Message) -> tuple[str, str]:
104
+ """Return (text/plain, text/html) bodies, best-effort decoded."""
105
+ if msg is None:
106
+ return "", ""
107
+
108
+ text_parts: list[str] = []
109
+ html_parts: list[str] = []
110
+
111
+ def _decode_part(part: Message) -> str:
112
+ payload = part.get_payload(decode=True)
113
+ if payload is None:
114
+ return ""
115
+ charset = part.get_content_charset() or "utf-8"
116
+ try:
117
+ return payload.decode(charset, errors="replace")
118
+ except Exception:
119
+ return payload.decode("utf-8", errors="replace")
120
+
121
+ if msg.is_multipart():
122
+ for part in msg.walk():
123
+ if part.is_multipart():
124
+ continue
125
+ disp = part.get_content_disposition()
126
+ if disp == "attachment":
127
+ continue
128
+ ctype = str(part.get_content_type() or "")
129
+ if ctype == "text/plain":
130
+ text = _decode_part(part).strip()
131
+ if text:
132
+ text_parts.append(text)
133
+ elif ctype == "text/html":
134
+ html = _decode_part(part).strip()
135
+ if html:
136
+ html_parts.append(html)
137
+ else:
138
+ ctype = str(msg.get_content_type() or "")
139
+ if ctype == "text/plain":
140
+ t = _decode_part(msg).strip()
141
+ if t:
142
+ text_parts.append(t)
143
+ elif ctype == "text/html":
144
+ h = _decode_part(msg).strip()
145
+ if h:
146
+ html_parts.append(h)
147
+
148
+ return ("\n\n".join(text_parts).strip(), "\n\n".join(html_parts).strip())
149
+
150
+
151
+ def _clamp_text(text: str, *, max_chars: int) -> str:
152
+ t = str(text or "")
153
+ try:
154
+ limit = int(max_chars)
155
+ except Exception:
156
+ limit = 0
157
+ if limit <= 0:
158
+ return t
159
+ if len(t) <= limit:
160
+ return t
161
+ #[WARNING:TRUNCATION] inbound email body text is bounded before entering durable state
162
+ return t[:limit] + "…"
163
+
164
+
165
+ @dataclass(frozen=True)
166
+ class EmailBridgeConfig:
167
+ enabled: bool
168
+
169
+ event_name: str
170
+ session_prefix: str
171
+ account: str
172
+
173
+ imap_host: str
174
+ imap_username: str
175
+ imap_password_env_var: str
176
+ imap_folder: str
177
+ imap_port: int = 993
178
+ imap_timeout_s: float = 30.0
179
+
180
+ poll_seconds: float = 60.0
181
+ max_messages_per_poll: int = 50
182
+
183
+ # Optional: start a workflow per email thread/session (Telegram-bridge-style).
184
+ autostart_flow_id: Optional[str] = None
185
+ autostart_bundle_id: Optional[str] = None
186
+
187
+ # Storage and normalization caps (treat email as untrusted input).
188
+ store_raw_message: bool = True
189
+ store_attachments: bool = True
190
+ max_raw_bytes: int = 2 * 1024 * 1024
191
+ max_body_chars: int = 20_000
192
+ max_html_chars: int = 20_000
193
+ max_attachments: int = 20
194
+ max_attachment_bytes: int = 5 * 1024 * 1024
195
+ max_total_attachment_bytes: int = 15 * 1024 * 1024
196
+
197
+ state_dir: Path = Path("./runtime/email_bridge")
198
+
199
+ @staticmethod
200
+ def from_env(*, base_dir: Path) -> "EmailBridgeConfig":
201
+ enabled = _as_bool(os.getenv("ABSTRACT_EMAIL_BRIDGE"), False)
202
+
203
+ # Defaults from AbstractCore config system (best-effort).
204
+ cfg_smtp_host = ""
205
+ cfg_smtp_username = ""
206
+ cfg_smtp_password_env_var = "EMAIL_PASSWORD"
207
+ cfg_imap_host = ""
208
+ cfg_imap_username = ""
209
+ cfg_imap_password_env_var = "EMAIL_PASSWORD"
210
+ cfg_imap_folder = "INBOX"
211
+ try:
212
+ from abstractcore.config.manager import get_config_manager # type: ignore
213
+
214
+ core_cfg = get_config_manager().config
215
+ email_cfg = getattr(core_cfg, "email", None)
216
+ if email_cfg is not None:
217
+ cfg_smtp_host = str(getattr(email_cfg, "smtp_host", "") or "")
218
+ cfg_smtp_username = str(getattr(email_cfg, "smtp_username", "") or "")
219
+ cfg_smtp_password_env_var = str(getattr(email_cfg, "smtp_password_env_var", "") or "") or "EMAIL_PASSWORD"
220
+ cfg_imap_host = str(getattr(email_cfg, "imap_host", "") or "")
221
+ cfg_imap_username = str(getattr(email_cfg, "imap_username", "") or "")
222
+ cfg_imap_password_env_var = str(getattr(email_cfg, "imap_password_env_var", "") or "") or "EMAIL_PASSWORD"
223
+ cfg_imap_folder = str(getattr(email_cfg, "imap_folder", "") or "") or "INBOX"
224
+ except Exception:
225
+ pass
226
+
227
+ event_name = str(os.getenv("ABSTRACT_EMAIL_EVENT_NAME", "") or "").strip() or "email.message"
228
+ session_prefix = str(os.getenv("ABSTRACT_EMAIL_SESSION_PREFIX", "") or "").strip() or "email:"
229
+
230
+ account = str(os.getenv("ABSTRACT_EMAIL_ACCOUNT", "") or "").strip()
231
+ imap_host = str(os.getenv("ABSTRACT_EMAIL_IMAP_HOST", "") or cfg_imap_host or "").strip()
232
+ imap_username = str(os.getenv("ABSTRACT_EMAIL_IMAP_USERNAME", "") or cfg_imap_username or "").strip()
233
+ imap_password_env_var = str(os.getenv("ABSTRACT_EMAIL_IMAP_PASSWORD_ENV_VAR", "") or cfg_imap_password_env_var or "").strip() or "EMAIL_PASSWORD"
234
+ imap_folder = str(os.getenv("ABSTRACT_EMAIL_IMAP_FOLDER", "") or cfg_imap_folder or "").strip() or "INBOX"
235
+
236
+ poll_seconds = float(os.getenv("ABSTRACT_EMAIL_POLL_SECONDS", "60") or "60")
237
+ imap_port = _as_int(os.getenv("ABSTRACT_EMAIL_IMAP_PORT"), 993)
238
+ imap_timeout_s = float(os.getenv("ABSTRACT_EMAIL_IMAP_TIMEOUT_S", "30") or "30")
239
+ max_messages_per_poll = _as_int(os.getenv("ABSTRACT_EMAIL_MAX_MESSAGES_PER_POLL"), 50)
240
+
241
+ autostart_flow_id = str(os.getenv("ABSTRACT_EMAIL_FLOW_ID", "") or "").strip() or None
242
+ autostart_bundle_id = str(os.getenv("ABSTRACT_EMAIL_BUNDLE_ID", "") or "").strip() or None
243
+
244
+ store_raw_message = _as_bool(os.getenv("ABSTRACT_EMAIL_STORE_RAW_MESSAGE"), True)
245
+ store_attachments = _as_bool(os.getenv("ABSTRACT_EMAIL_STORE_ATTACHMENTS"), True)
246
+
247
+ max_raw_bytes = _as_int(os.getenv("ABSTRACT_EMAIL_MAX_RAW_BYTES"), 2 * 1024 * 1024)
248
+ max_body_chars = _as_int(os.getenv("ABSTRACT_EMAIL_MAX_BODY_CHARS"), 20_000)
249
+ max_html_chars = _as_int(os.getenv("ABSTRACT_EMAIL_MAX_HTML_CHARS"), 20_000)
250
+ max_attachments = _as_int(os.getenv("ABSTRACT_EMAIL_MAX_ATTACHMENTS"), 20)
251
+ max_attachment_bytes = _as_int(os.getenv("ABSTRACT_EMAIL_MAX_ATTACHMENT_BYTES"), 5 * 1024 * 1024)
252
+ max_total_attachment_bytes = _as_int(os.getenv("ABSTRACT_EMAIL_MAX_TOTAL_ATTACHMENT_BYTES"), 15 * 1024 * 1024)
253
+
254
+ state_dir = Path(base_dir) / "email_bridge"
255
+
256
+ # Use outbound defaults as a fallback account label if no explicit account is set.
257
+ if not account:
258
+ account = str(os.getenv("ABSTRACT_EMAIL_FROM", "") or cfg_smtp_username or imap_username or "default").strip()
259
+
260
+ return EmailBridgeConfig(
261
+ enabled=bool(enabled),
262
+ event_name=event_name,
263
+ session_prefix=session_prefix,
264
+ account=account,
265
+ imap_host=imap_host,
266
+ imap_username=imap_username,
267
+ imap_password_env_var=imap_password_env_var,
268
+ imap_folder=imap_folder,
269
+ imap_port=max(1, int(imap_port)),
270
+ imap_timeout_s=max(1.0, float(imap_timeout_s)),
271
+ poll_seconds=max(1.0, float(poll_seconds)),
272
+ max_messages_per_poll=max(1, int(max_messages_per_poll)),
273
+ autostart_flow_id=autostart_flow_id,
274
+ autostart_bundle_id=autostart_bundle_id,
275
+ store_raw_message=bool(store_raw_message),
276
+ store_attachments=bool(store_attachments),
277
+ max_raw_bytes=max(1, int(max_raw_bytes)),
278
+ max_body_chars=max(0, int(max_body_chars)),
279
+ max_html_chars=max(0, int(max_html_chars)),
280
+ max_attachments=max(0, int(max_attachments)),
281
+ max_attachment_bytes=max(1, int(max_attachment_bytes)),
282
+ max_total_attachment_bytes=max(1, int(max_total_attachment_bytes)),
283
+ state_dir=state_dir,
284
+ )
285
+
286
+
287
+ class EmailBridge:
288
+ """Bridge inbound IMAP email messages to AbstractGateway events."""
289
+
290
+ def __init__(self, *, config: EmailBridgeConfig, host: Any, runner: Any, artifact_store: Any) -> None:
291
+ self._cfg = config
292
+ self._host = host
293
+ self._runner = runner
294
+ self._artifact_store = artifact_store
295
+
296
+ self._lock = threading.Lock()
297
+ self._state: Dict[str, Any] = {}
298
+
299
+ self._stop = threading.Event()
300
+ self._thread: Optional[threading.Thread] = None
301
+
302
+ @property
303
+ def enabled(self) -> bool:
304
+ return bool(self._cfg.enabled)
305
+
306
+ @property
307
+ def state_path(self) -> Path:
308
+ return Path(self._cfg.state_dir) / "state.json"
309
+
310
+ def start(self) -> None:
311
+ if not self._cfg.enabled:
312
+ return
313
+ if not self._cfg.imap_host or not self._cfg.imap_username:
314
+ raise ValueError("Email bridge is enabled but IMAP host/username are missing (ABSTRACT_EMAIL_IMAP_HOST/USERNAME)")
315
+
316
+ self._load_state()
317
+
318
+ if self._thread is not None and self._thread.is_alive():
319
+ return
320
+ self._stop.clear()
321
+ self._thread = threading.Thread(target=self._loop, name="email-bridge", daemon=True)
322
+ self._thread.start()
323
+
324
+ def stop(self) -> None:
325
+ self._stop.set()
326
+ if self._thread is not None:
327
+ try:
328
+ self._thread.join(timeout=3.0)
329
+ except Exception:
330
+ pass
331
+ self._thread = None
332
+
333
+ # ---------------------------------------------------------------------
334
+ # State (cursor + optional bindings)
335
+ # ---------------------------------------------------------------------
336
+
337
+ def _load_state(self) -> None:
338
+ path = self.state_path
339
+ try:
340
+ if path.exists():
341
+ obj = json.loads(path.read_text(encoding="utf-8"))
342
+ if isinstance(obj, dict):
343
+ self._state = obj
344
+ except Exception:
345
+ self._state = {}
346
+ self._state.setdefault("version", 1)
347
+ self._state.setdefault("cursors", {})
348
+ self._state.setdefault("bindings", {}) # session_id -> {run_id, ...}
349
+
350
+ def _save_state(self) -> None:
351
+ path = self.state_path
352
+ try:
353
+ path.parent.mkdir(parents=True, exist_ok=True)
354
+ tmp = path.with_suffix(".tmp")
355
+ tmp.write_text(json.dumps(self._state, ensure_ascii=False, indent=2), encoding="utf-8")
356
+ tmp.replace(path)
357
+ except Exception:
358
+ pass
359
+
360
+ def _cursor_key(self) -> str:
361
+ # Keyed by account+folder so multiple bridges/accounts can share a state dir safely.
362
+ account = _safe_id_component(self._cfg.account)
363
+ folder = _safe_id_component(self._cfg.imap_folder, max_len=64)
364
+ return f"{account}:{folder}"
365
+
366
+ def _get_last_uid(self) -> int:
367
+ cursors = self._state.get("cursors")
368
+ if not isinstance(cursors, dict):
369
+ return 0
370
+ entry = cursors.get(self._cursor_key())
371
+ if isinstance(entry, dict):
372
+ raw = entry.get("last_uid")
373
+ else:
374
+ raw = entry
375
+ try:
376
+ return int(raw or 0)
377
+ except Exception:
378
+ return 0
379
+
380
+ def _set_last_uid(self, uid: int) -> None:
381
+ cursors = self._state.setdefault("cursors", {})
382
+ if not isinstance(cursors, dict):
383
+ self._state["cursors"] = {}
384
+ cursors = self._state["cursors"]
385
+ cursors[self._cursor_key()] = {"last_uid": int(uid), "updated_at": _utc_now_iso()}
386
+
387
+ def _binding_for_session(self, session_id: str) -> Optional[Dict[str, Any]]:
388
+ b = self._state.get("bindings")
389
+ if not isinstance(b, dict):
390
+ return None
391
+ entry = b.get(str(session_id))
392
+ return entry if isinstance(entry, dict) else None
393
+
394
+ def _ensure_binding(self, *, session_id: str, thread_key: str) -> Optional[Dict[str, Any]]:
395
+ if not self._cfg.autostart_flow_id:
396
+ return None
397
+ with self._lock:
398
+ existing = self._binding_for_session(session_id)
399
+ if isinstance(existing, dict):
400
+ return existing
401
+
402
+ try:
403
+ run_id = self._host.start_run(
404
+ flow_id=self._cfg.autostart_flow_id,
405
+ bundle_id=self._cfg.autostart_bundle_id,
406
+ input_data={"email": {"thread_key": thread_key, "session_id": session_id}},
407
+ actor_id="email",
408
+ session_id=session_id,
409
+ )
410
+ except Exception:
411
+ return None
412
+
413
+ binding = {
414
+ "session_id": session_id,
415
+ "thread_key": thread_key,
416
+ "run_id": str(run_id),
417
+ "flow_id": self._cfg.autostart_flow_id,
418
+ "bundle_id": self._cfg.autostart_bundle_id,
419
+ "created_at": _utc_now_iso(),
420
+ "updated_at": _utc_now_iso(),
421
+ }
422
+ bindings = self._state.setdefault("bindings", {})
423
+ if isinstance(bindings, dict):
424
+ bindings[str(session_id)] = binding
425
+ self._save_state()
426
+ return binding
427
+
428
+ # ---------------------------------------------------------------------
429
+ # Polling loop
430
+ # ---------------------------------------------------------------------
431
+
432
+ def _resolve_password(self) -> tuple[Optional[str], Optional[str]]:
433
+ ref = str(self._cfg.imap_password_env_var or "").strip() or "EMAIL_PASSWORD"
434
+ v = os.getenv(ref)
435
+ if v is not None and str(v).strip():
436
+ return str(v).strip(), None
437
+
438
+ # Fail fast for conventional env var names (avoid silently using a name as a password).
439
+ if re.fullmatch(r"[A-Za-z_][A-Za-z0-9_]*", ref):
440
+ return None, f"Missing IMAP password env var {ref}"
441
+
442
+ # Otherwise: treat the reference as a literal secret.
443
+ return ref, None
444
+
445
+ def _connect_imap(self) -> tuple[Optional[imaplib.IMAP4_SSL], Optional[str]]:
446
+ password, err = self._resolve_password()
447
+ if err is not None:
448
+ return None, err
449
+ try:
450
+ client = imaplib.IMAP4_SSL(self._cfg.imap_host, int(self._cfg.imap_port))
451
+ try:
452
+ if getattr(client, "sock", None) is not None:
453
+ client.sock.settimeout(float(self._cfg.imap_timeout_s)) # type: ignore[attr-defined]
454
+ except Exception:
455
+ pass
456
+ client.login(self._cfg.imap_username, password)
457
+ typ, _ = client.select(self._cfg.imap_folder, readonly=True)
458
+ if typ != "OK":
459
+ try:
460
+ client.logout()
461
+ except Exception:
462
+ pass
463
+ return None, f"Failed to select mailbox: {self._cfg.imap_folder}"
464
+ return client, None
465
+ except Exception as e:
466
+ return None, str(e)
467
+
468
+ def _loop(self) -> None:
469
+ while not self._stop.is_set():
470
+ try:
471
+ self.poll_once()
472
+ except Exception:
473
+ # Best-effort: never crash the runner thread.
474
+ pass
475
+ self._stop.wait(timeout=float(self._cfg.poll_seconds))
476
+
477
+ def poll_once(self) -> int:
478
+ """Poll IMAP once and process new messages (deterministic; used by tests)."""
479
+ if not self._cfg.enabled:
480
+ return 0
481
+
482
+ with self._lock:
483
+ last_uid = self._get_last_uid()
484
+
485
+ client, err = self._connect_imap()
486
+ if err is not None or client is None:
487
+ return 0
488
+
489
+ processed = 0
490
+ try:
491
+ # Search new UIDs.
492
+ search_query = f"UID {int(last_uid) + 1}:*"
493
+ typ, data = client.uid("search", None, search_query)
494
+ if typ != "OK" or not data:
495
+ return 0
496
+ raw_uids = data[0] if isinstance(data, list) and data else b""
497
+ if not isinstance(raw_uids, (bytes, bytearray)):
498
+ raw_uids = str(raw_uids).encode("utf-8", errors="replace")
499
+ uids = [u.decode("utf-8", errors="replace") for u in bytes(raw_uids).split() if u]
500
+
501
+ def _as_uid(u: str) -> int:
502
+ try:
503
+ return int(str(u).strip())
504
+ except Exception:
505
+ return -1
506
+
507
+ uids_sorted = sorted({u for u in uids if _as_uid(u) > int(last_uid)}, key=_as_uid)
508
+ if not uids_sorted:
509
+ return 0
510
+
511
+ limit = max(1, int(self._cfg.max_messages_per_poll))
512
+ uids_sorted = uids_sorted[:limit]
513
+
514
+ for uid in uids_sorted:
515
+ uid_i = _as_uid(uid)
516
+ if uid_i <= int(last_uid):
517
+ continue
518
+ ok = self._process_uid(client, uid=uid, uid_i=uid_i)
519
+ if ok:
520
+ processed += 1
521
+ last_uid = uid_i
522
+ with self._lock:
523
+ self._set_last_uid(uid_i)
524
+ self._save_state()
525
+ finally:
526
+ try:
527
+ client.logout()
528
+ except Exception:
529
+ pass
530
+
531
+ return processed
532
+
533
+ def _process_uid(self, client: imaplib.IMAP4_SSL, *, uid: str, uid_i: int) -> bool:
534
+ # Fetch full message bytes.
535
+ typ, fetched = client.uid("fetch", str(uid), "(FLAGS BODY.PEEK[])")
536
+ if typ != "OK" or not fetched:
537
+ return False
538
+
539
+ raw_bytes: Optional[bytes] = None
540
+ flags: list[str] = []
541
+ for item in fetched:
542
+ if not isinstance(item, tuple) or len(item) < 2:
543
+ continue
544
+ meta, payload = item[0], item[1]
545
+ if isinstance(payload, (bytes, bytearray)) and payload:
546
+ raw_bytes = bytes(payload)
547
+ if isinstance(meta, (bytes, bytearray)):
548
+ try:
549
+ flags_bytes = imaplib.ParseFlags(meta)
550
+ flags = [fb.decode("utf-8", errors="replace") for fb in flags_bytes]
551
+ except Exception:
552
+ flags = []
553
+
554
+ if raw_bytes is None:
555
+ return False
556
+
557
+ event_payload, session_id, thread_key = self._normalize_message(uid=str(uid), raw_bytes=raw_bytes, flags=flags)
558
+
559
+ # Optional: start the per-thread workflow run.
560
+ binding = self._ensure_binding(session_id=session_id, thread_key=thread_key)
561
+ if isinstance(binding, dict):
562
+ event_payload["binding"] = {"run_id": str(binding.get("run_id") or "")}
563
+
564
+ self._runner.emit_event(
565
+ name=self._cfg.event_name,
566
+ session_id=session_id,
567
+ scope="session",
568
+ payload={"email": event_payload},
569
+ client_id="email",
570
+ )
571
+
572
+ return True
573
+
574
+ def _normalize_message(self, *, uid: str, raw_bytes: bytes, flags: list[str]) -> tuple[Dict[str, Any], str, str]:
575
+ msg = email.message_from_bytes(raw_bytes)
576
+ subject_v = _decode_mime_header(msg.get("Subject"))
577
+ from_v = _decode_mime_header(msg.get("From"))
578
+ to_v = _decode_mime_header(msg.get("To"))
579
+ cc_v = _decode_mime_header(msg.get("Cc"))
580
+ date_v = _decode_mime_header(msg.get("Date"))
581
+ message_id = _decode_mime_header(msg.get("Message-ID"))
582
+
583
+ in_reply_to = _parse_message_ids(msg.get("In-Reply-To"))
584
+ references = _parse_message_ids(msg.get("References"))
585
+
586
+ thread_root = ""
587
+ if references:
588
+ thread_root = references[0]
589
+ elif in_reply_to:
590
+ thread_root = in_reply_to[0]
591
+ elif message_id:
592
+ thread_root = message_id
593
+ else:
594
+ thread_root = f"{from_v}\n{subject_v}".strip() or uid
595
+
596
+ thread_key = hashlib.sha256(str(thread_root).encode("utf-8")).hexdigest()[:16]
597
+
598
+ account_key = _safe_id_component(self._cfg.account)
599
+ prefix = str(self._cfg.session_prefix or "email:").strip() or "email:"
600
+ if not prefix.endswith(":"):
601
+ prefix = prefix + ":"
602
+ session_id = f"{prefix}{account_key}:{thread_key}"
603
+
604
+ rid = session_memory_owner_run_id(session_id)
605
+
606
+ raw_meta: Optional[Dict[str, Any]] = None
607
+ if self._cfg.store_raw_message:
608
+ raw_cap = max(1, int(self._cfg.max_raw_bytes))
609
+ raw_truncated = len(raw_bytes) > raw_cap
610
+ raw_to_store = raw_bytes[:raw_cap] if raw_truncated else raw_bytes
611
+ tags = {
612
+ "kind": "email_raw",
613
+ "source": "email_bridge",
614
+ "session_id": session_id,
615
+ "account": account_key,
616
+ "mailbox": str(self._cfg.imap_folder or ""),
617
+ "uid": str(uid),
618
+ "thread_key": thread_key,
619
+ "message_id": str(message_id or ""),
620
+ }
621
+ try:
622
+ meta = self._artifact_store.store(bytes(raw_to_store), content_type="message/rfc822", run_id=str(rid), tags=tags)
623
+ raw_meta = {
624
+ "artifact_id": str(getattr(meta, "artifact_id", "") or ""),
625
+ "blob_id": str(getattr(meta, "blob_id", "") or ""),
626
+ "size_bytes": int(getattr(meta, "size_bytes", 0) or 0),
627
+ "truncated": bool(raw_truncated),
628
+ "total_bytes": int(len(raw_bytes)),
629
+ }
630
+ except Exception:
631
+ raw_meta = None
632
+
633
+ body_text, body_html = _extract_text_bodies(msg)
634
+ body_text = _clamp_text(body_text, max_chars=int(self._cfg.max_body_chars))
635
+ body_html = _clamp_text(body_html, max_chars=int(self._cfg.max_html_chars))
636
+
637
+ attachments: list[Dict[str, Any]] = []
638
+ skipped_attachments: list[Dict[str, Any]] = []
639
+
640
+ if self._cfg.store_attachments and msg.is_multipart():
641
+ # Dedupe by (handle, sha256) within the session attachment registry.
642
+ existing: list[Any]
643
+ try:
644
+ existing = self._artifact_store.list_by_run(str(rid)) or []
645
+ except Exception:
646
+ existing = []
647
+ existing_index: Dict[Tuple[str, str], Any] = {}
648
+ for m in existing:
649
+ tags = getattr(m, "tags", None)
650
+ if not isinstance(tags, dict):
651
+ continue
652
+ if str(tags.get("kind") or "") != "attachment":
653
+ continue
654
+ handle = str(tags.get("path") or "").strip()
655
+ sha256 = str(tags.get("sha256") or "").strip().lower()
656
+ if handle and sha256:
657
+ existing_index[(handle, sha256)] = m
658
+
659
+ max_count = max(0, int(self._cfg.max_attachments))
660
+ max_each = max(1, int(self._cfg.max_attachment_bytes))
661
+ max_total = max(1, int(self._cfg.max_total_attachment_bytes))
662
+ total = 0
663
+ idx = 0
664
+
665
+ for part in msg.walk():
666
+ if part.is_multipart():
667
+ continue
668
+ disp = part.get_content_disposition()
669
+ filename = _decode_mime_header(part.get_filename())
670
+ if disp != "attachment" and not filename:
671
+ continue
672
+
673
+ idx += 1
674
+ if max_count and len(attachments) >= max_count:
675
+ skipped_attachments.append({"reason": "max_attachments", "filename": filename or "", "content_type": str(part.get_content_type() or "")})
676
+ continue
677
+
678
+ payload = part.get_payload(decode=True)
679
+ if payload is None:
680
+ continue
681
+ content = bytes(payload)
682
+ if not content:
683
+ continue
684
+
685
+ if len(content) > max_each:
686
+ skipped_attachments.append(
687
+ {
688
+ "reason": "max_attachment_bytes",
689
+ "filename": filename or "",
690
+ "content_type": str(part.get_content_type() or ""),
691
+ "size_bytes": int(len(content)),
692
+ }
693
+ )
694
+ continue
695
+
696
+ if total + len(content) > max_total:
697
+ skipped_attachments.append(
698
+ {
699
+ "reason": "max_total_attachment_bytes",
700
+ "filename": filename or "",
701
+ "content_type": str(part.get_content_type() or ""),
702
+ "size_bytes": int(len(content)),
703
+ }
704
+ )
705
+ continue
706
+
707
+ filename2 = filename or f"attachment_{idx}"
708
+ handle = f"email/{account_key}/{thread_key}/{filename2}"
709
+ sha256 = hashlib.sha256(content).hexdigest()
710
+
711
+ existing_meta = existing_index.get((handle, sha256))
712
+ if existing_meta is not None:
713
+ attachments.append(
714
+ {
715
+ "artifact_id": str(getattr(existing_meta, "artifact_id", "") or ""),
716
+ "blob_id": str(getattr(existing_meta, "blob_id", "") or ""),
717
+ "sha256": sha256,
718
+ "handle": handle,
719
+ "filename": filename2,
720
+ "content_type": str(getattr(existing_meta, "content_type", "") or str(part.get_content_type() or "")),
721
+ "size_bytes": int(getattr(existing_meta, "size_bytes", 0) or len(content)),
722
+ "deduped": True,
723
+ }
724
+ )
725
+ total += len(content)
726
+ continue
727
+
728
+ tags2 = {
729
+ "kind": "attachment",
730
+ "source": "email",
731
+ "path": handle,
732
+ "filename": filename2,
733
+ "session_id": session_id,
734
+ "sha256": sha256,
735
+ "email_uid": str(uid),
736
+ "thread_key": thread_key,
737
+ }
738
+ try:
739
+ meta2 = self._artifact_store.store(content, content_type=str(part.get_content_type() or "application/octet-stream"), run_id=str(rid), tags=tags2)
740
+ except Exception:
741
+ skipped_attachments.append(
742
+ {"reason": "store_failed", "filename": filename2, "content_type": str(part.get_content_type() or ""), "size_bytes": int(len(content))}
743
+ )
744
+ continue
745
+
746
+ attachments.append(
747
+ {
748
+ "artifact_id": str(getattr(meta2, "artifact_id", "") or ""),
749
+ "blob_id": str(getattr(meta2, "blob_id", "") or ""),
750
+ "sha256": sha256,
751
+ "handle": handle,
752
+ "filename": filename2,
753
+ "content_type": str(getattr(meta2, "content_type", "") or ""),
754
+ "size_bytes": int(getattr(meta2, "size_bytes", 0) or 0),
755
+ "deduped": False,
756
+ }
757
+ )
758
+ total += len(content)
759
+
760
+ payload: Dict[str, Any] = {
761
+ "bridge": {"version": 1, "received_at": _utc_now_iso()},
762
+ "account": account_key,
763
+ "mailbox": str(self._cfg.imap_folder or ""),
764
+ "uid": str(uid),
765
+ "message_id": message_id,
766
+ "thread_key": thread_key,
767
+ "thread_root_message_id": thread_root,
768
+ "in_reply_to": in_reply_to,
769
+ "references": references,
770
+ "from": from_v,
771
+ "to": to_v,
772
+ "cc": cc_v,
773
+ "subject": subject_v,
774
+ "date": date_v,
775
+ "flags": list(flags),
776
+ "seen": any(str(f).lstrip("\\").lower() == "seen" for f in flags or []),
777
+ "body_text": body_text,
778
+ "body_html": body_html,
779
+ "artifacts": {"raw": raw_meta, "attachments": attachments, "skipped_attachments": skipped_attachments},
780
+ }
781
+
782
+ return payload, session_id, thread_key