superlocalmemory 3.4.0 → 3.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/README.md +7 -8
  2. package/docs/screenshots/01-dashboard-main.png +0 -0
  3. package/docs/screenshots/02-knowledge-graph.png +0 -0
  4. package/docs/screenshots/03-patterns-learning.png +0 -0
  5. package/docs/screenshots/04-learning-dashboard.png +0 -0
  6. package/docs/screenshots/05-behavioral-analysis.png +0 -0
  7. package/docs/screenshots/06-graph-communities.png +0 -0
  8. package/package.json +2 -2
  9. package/pyproject.toml +11 -2
  10. package/scripts/postinstall.js +26 -7
  11. package/src/superlocalmemory/cli/commands.py +42 -60
  12. package/src/superlocalmemory/cli/daemon.py +107 -47
  13. package/src/superlocalmemory/cli/main.py +10 -0
  14. package/src/superlocalmemory/cli/setup_wizard.py +137 -9
  15. package/src/superlocalmemory/core/config.py +28 -0
  16. package/src/superlocalmemory/core/consolidation_engine.py +38 -1
  17. package/src/superlocalmemory/core/engine.py +9 -0
  18. package/src/superlocalmemory/core/engine_wiring.py +5 -1
  19. package/src/superlocalmemory/core/graph_analyzer.py +254 -12
  20. package/src/superlocalmemory/core/health_monitor.py +313 -0
  21. package/src/superlocalmemory/core/reranker_worker.py +19 -5
  22. package/src/superlocalmemory/ingestion/__init__.py +13 -0
  23. package/src/superlocalmemory/ingestion/adapter_manager.py +234 -0
  24. package/src/superlocalmemory/ingestion/base_adapter.py +177 -0
  25. package/src/superlocalmemory/ingestion/calendar_adapter.py +340 -0
  26. package/src/superlocalmemory/ingestion/credentials.py +118 -0
  27. package/src/superlocalmemory/ingestion/gmail_adapter.py +369 -0
  28. package/src/superlocalmemory/ingestion/parsers.py +100 -0
  29. package/src/superlocalmemory/ingestion/transcript_adapter.py +156 -0
  30. package/src/superlocalmemory/learning/consolidation_worker.py +287 -53
  31. package/src/superlocalmemory/learning/entity_compiler.py +377 -0
  32. package/src/superlocalmemory/mesh/__init__.py +12 -0
  33. package/src/superlocalmemory/mesh/broker.py +344 -0
  34. package/src/superlocalmemory/retrieval/entity_channel.py +141 -4
  35. package/src/superlocalmemory/retrieval/spreading_activation.py +45 -0
  36. package/src/superlocalmemory/server/api.py +15 -8
  37. package/src/superlocalmemory/server/routes/behavioral.py +8 -4
  38. package/src/superlocalmemory/server/routes/chat.py +320 -0
  39. package/src/superlocalmemory/server/routes/entity.py +95 -0
  40. package/src/superlocalmemory/server/routes/ingest.py +110 -0
  41. package/src/superlocalmemory/server/routes/insights.py +368 -0
  42. package/src/superlocalmemory/server/routes/learning.py +106 -6
  43. package/src/superlocalmemory/server/routes/memories.py +20 -9
  44. package/src/superlocalmemory/server/routes/mesh.py +186 -0
  45. package/src/superlocalmemory/server/routes/stats.py +25 -3
  46. package/src/superlocalmemory/server/routes/timeline.py +252 -0
  47. package/src/superlocalmemory/server/routes/v3_api.py +161 -0
  48. package/src/superlocalmemory/server/ui.py +8 -0
  49. package/src/superlocalmemory/server/unified_daemon.py +691 -0
  50. package/src/superlocalmemory/storage/schema_v343.py +229 -0
  51. package/src/superlocalmemory/ui/index.html +168 -58
  52. package/src/superlocalmemory/ui/js/graph-event-bus.js +83 -0
  53. package/src/superlocalmemory/ui/js/graph-filters.js +1 -1
  54. package/src/superlocalmemory/ui/js/knowledge-graph.js +942 -0
  55. package/src/superlocalmemory/ui/js/memory-chat.js +344 -0
  56. package/src/superlocalmemory/ui/js/memory-timeline.js +265 -0
  57. package/src/superlocalmemory/ui/js/quick-actions.js +334 -0
  58. package/src/superlocalmemory.egg-info/PKG-INFO +0 -594
  59. package/src/superlocalmemory.egg-info/SOURCES.txt +0 -279
  60. package/src/superlocalmemory.egg-info/dependency_links.txt +0 -1
  61. package/src/superlocalmemory.egg-info/entry_points.txt +0 -2
  62. package/src/superlocalmemory.egg-info/requires.txt +0 -47
  63. package/src/superlocalmemory.egg-info/top_level.txt +0 -1
@@ -0,0 +1,369 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the Elastic License 2.0 - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """Gmail ingestion adapter — 3 tiers of Gmail access.
6
+
7
+ Tier 1: File import (.mbox from Google Takeout) — zero setup
8
+ Tier 1.5: IMAP polling — no GCP, just email/password
9
+ Tier 2: Gmail API with OAuth polling — needs GCP OAuth client, no Pub/Sub
10
+ Tier 3: Gmail API with Pub/Sub push — full GCP (future)
11
+
12
+ OPT-IN only. Enabled via: slm adapters enable gmail
13
+
14
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
15
+ License: Elastic-2.0
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import json
21
+ import logging
22
+ import sys
23
+ from pathlib import Path
24
+
25
+ from superlocalmemory.ingestion.base_adapter import BaseAdapter, AdapterConfig, IngestItem
26
+
27
+ logger = logging.getLogger("superlocalmemory.ingestion.gmail")
28
+
29
+
30
+ class GmailAdapter(BaseAdapter):
31
+ """Gmail ingestion with automatic tier detection."""
32
+
33
+ source_type = "gmail"
34
+
35
+ def __init__(self, config: AdapterConfig | None = None, tier: str = "auto"):
36
+ super().__init__(config)
37
+ self._tier = tier
38
+ self._mbox_path: str | None = None
39
+ self._mbox_processed = False
40
+ self._history_id: str | None = None
41
+ self._poll_interval = 300 # 5 min for API polling
42
+
43
+ def run(self) -> None:
44
+ """Detect tier and run."""
45
+ self._detect_tier()
46
+ logger.info("Gmail adapter starting (tier=%s)", self._tier)
47
+ super().run()
48
+
49
+ def fetch_items(self) -> list[IngestItem]:
50
+ """Fetch items based on active tier."""
51
+ if self._tier == "mbox":
52
+ return self._fetch_mbox()
53
+ elif self._tier == "imap":
54
+ return self._fetch_imap()
55
+ elif self._tier == "oauth":
56
+ return self._fetch_oauth()
57
+ return []
58
+
59
+ def wait_for_next_cycle(self) -> None:
60
+ """Tier 1 (mbox): run once then stop. Others: poll interval."""
61
+ if self._tier == "mbox" and self._mbox_processed:
62
+ logger.info("MBOX import complete, adapter stopping")
63
+ self.stop()
64
+ return
65
+ self._stop_event.wait(self._poll_interval)
66
+
67
+ # -- Tier detection --
68
+
69
+ def _detect_tier(self) -> None:
70
+ """Auto-detect the best available tier."""
71
+ if self._tier != "auto":
72
+ return
73
+
74
+ adapters_path = Path.home() / ".superlocalmemory" / "adapters.json"
75
+ cfg = {}
76
+ if adapters_path.exists():
77
+ cfg = json.loads(adapters_path.read_text()).get("gmail", {})
78
+
79
+ # Check for explicit tier
80
+ if cfg.get("tier") == "mbox" or cfg.get("mbox_path"):
81
+ self._tier = "mbox"
82
+ self._mbox_path = cfg.get("mbox_path", "")
83
+ return
84
+
85
+ if cfg.get("tier") == "imap":
86
+ self._tier = "imap"
87
+ return
88
+
89
+ # Check for OAuth credentials
90
+ from superlocalmemory.ingestion.credentials import has_credential
91
+ if has_credential("gmail", "refresh_token"):
92
+ self._tier = "oauth"
93
+ return
94
+
95
+ # Default: look for mbox file
96
+ mbox_dir = Path.home() / ".superlocalmemory" / "import"
97
+ mbox_files = list(mbox_dir.glob("*.mbox")) if mbox_dir.exists() else []
98
+ if mbox_files:
99
+ self._tier = "mbox"
100
+ self._mbox_path = str(mbox_files[0])
101
+ return
102
+
103
+ logger.warning("No Gmail credentials or MBOX file found. "
104
+ "Place .mbox in ~/.superlocalmemory/import/ or run setup.")
105
+ self._tier = "mbox" # Will return empty if no file
106
+
107
+ # -- Tier 1: MBOX file import --
108
+
109
+ def _fetch_mbox(self) -> list[IngestItem]:
110
+ """Parse .mbox file from Google Takeout."""
111
+ if self._mbox_processed or not self._mbox_path:
112
+ return []
113
+
114
+ path = Path(self._mbox_path)
115
+ if not path.exists():
116
+ logger.warning("MBOX file not found: %s", path)
117
+ self._mbox_processed = True
118
+ return []
119
+
120
+ import mailbox
121
+ items = []
122
+ mbox = mailbox.mbox(str(path))
123
+ total = len(mbox)
124
+ logger.info("Parsing MBOX: %d messages", total)
125
+
126
+ for i, message in enumerate(mbox):
127
+ if self._stop_event.is_set():
128
+ break
129
+
130
+ try:
131
+ msg_id = message.get("Message-ID", f"mbox-{i}")
132
+ subject = message.get("Subject", "(no subject)")
133
+ from_addr = message.get("From", "unknown")
134
+ date = message.get("Date", "")
135
+
136
+ # Extract plain text body
137
+ body = ""
138
+ if message.is_multipart():
139
+ for part in message.walk():
140
+ if part.get_content_type() == "text/plain":
141
+ payload = part.get_payload(decode=True)
142
+ if payload:
143
+ body = payload.decode("utf-8", errors="replace")
144
+ break
145
+ else:
146
+ payload = message.get_payload(decode=True)
147
+ if payload:
148
+ body = payload.decode("utf-8", errors="replace")
149
+
150
+ # Truncate body
151
+ body = body[:3000] if body else ""
152
+
153
+ content = f"Email: {subject}\nFrom: {from_addr}\nDate: {date}\n\n{body}"
154
+
155
+ items.append(IngestItem(
156
+ content=content,
157
+ dedup_key=str(msg_id).strip("<>"),
158
+ metadata={
159
+ "subject": subject,
160
+ "from": from_addr,
161
+ "date": date,
162
+ "source": "mbox_import",
163
+ },
164
+ ))
165
+
166
+ # Progress logging
167
+ if (i + 1) % 100 == 0:
168
+ logger.info("MBOX progress: %d/%d messages", i + 1, total)
169
+
170
+ except Exception as exc:
171
+ logger.debug("Failed to parse message %d: %s", i, exc)
172
+
173
+ self._mbox_processed = True
174
+ logger.info("MBOX import: %d messages extracted", len(items))
175
+ return items
176
+
177
+ # -- Tier 1.5: IMAP polling --
178
+
179
+ def _fetch_imap(self) -> list[IngestItem]:
180
+ """Poll via IMAP. Requires email + password credentials."""
181
+ try:
182
+ import imaplib
183
+ from superlocalmemory.ingestion.credentials import load_credential
184
+
185
+ host = load_credential("gmail", "imap_host") or "imap.gmail.com"
186
+ email = load_credential("gmail", "email")
187
+ password = load_credential("gmail", "password")
188
+
189
+ if not email or not password:
190
+ logger.warning("IMAP credentials not found. Run: slm adapters enable gmail --setup")
191
+ return []
192
+
193
+ conn = imaplib.IMAP4_SSL(host)
194
+ conn.login(email, password)
195
+ conn.select("INBOX")
196
+
197
+ # Fetch last 20 unseen messages
198
+ _, msg_nums = conn.search(None, "UNSEEN")
199
+ items = []
200
+
201
+ for num in msg_nums[0].split()[-20:]:
202
+ if self._stop_event.is_set():
203
+ break
204
+ try:
205
+ _, data = conn.fetch(num, "(RFC822)")
206
+ import email as email_lib
207
+ msg = email_lib.message_from_bytes(data[0][1])
208
+ msg_id = msg.get("Message-ID", f"imap-{num.decode()}")
209
+ subject = msg.get("Subject", "(no subject)")
210
+ from_addr = msg.get("From", "unknown")
211
+
212
+ body = ""
213
+ if msg.is_multipart():
214
+ for part in msg.walk():
215
+ if part.get_content_type() == "text/plain":
216
+ payload = part.get_payload(decode=True)
217
+ if payload:
218
+ body = payload.decode("utf-8", errors="replace")
219
+ break
220
+ else:
221
+ payload = msg.get_payload(decode=True)
222
+ if payload:
223
+ body = payload.decode("utf-8", errors="replace")
224
+
225
+ body = body[:3000] if body else ""
226
+ content = f"Email: {subject}\nFrom: {from_addr}\n\n{body}"
227
+
228
+ items.append(IngestItem(
229
+ content=content,
230
+ dedup_key=str(msg_id).strip("<>"),
231
+ metadata={"subject": subject, "from": from_addr, "source": "imap"},
232
+ ))
233
+ except Exception as exc:
234
+ logger.debug("IMAP fetch error: %s", exc)
235
+
236
+ conn.logout()
237
+ return items
238
+
239
+ except Exception as exc:
240
+ logger.warning("IMAP polling failed: %s", exc)
241
+ return []
242
+
243
+ # -- Tier 2: OAuth API polling --
244
+
245
+ def _fetch_oauth(self) -> list[IngestItem]:
246
+ """Poll Gmail API with OAuth. Requires google-api-python-client."""
247
+ try:
248
+ from superlocalmemory.ingestion.credentials import load_credential
249
+
250
+ refresh_token = load_credential("gmail", "refresh_token")
251
+ client_id = load_credential("gmail", "client_id")
252
+ client_secret = load_credential("gmail", "client_secret")
253
+
254
+ if not all([refresh_token, client_id, client_secret]):
255
+ logger.warning("Gmail OAuth credentials incomplete. Run setup.")
256
+ return []
257
+
258
+ # Build credentials
259
+ from google.oauth2.credentials import Credentials
260
+ from googleapiclient.discovery import build
261
+
262
+ creds = Credentials(
263
+ token=None,
264
+ refresh_token=refresh_token,
265
+ client_id=client_id,
266
+ client_secret=client_secret,
267
+ token_uri="https://oauth2.googleapis.com/token",
268
+ )
269
+
270
+ service = build("gmail", "v1", credentials=creds)
271
+
272
+ # Get history since last sync
273
+ if self._history_id:
274
+ results = service.users().history().list(
275
+ userId="me",
276
+ startHistoryId=self._history_id,
277
+ historyTypes=["messageAdded"],
278
+ ).execute()
279
+ history = results.get("history", [])
280
+ msg_ids = []
281
+ for h in history:
282
+ for added in h.get("messagesAdded", []):
283
+ msg_ids.append(added["message"]["id"])
284
+ else:
285
+ # Initial: get last 20 messages
286
+ results = service.users().messages().list(
287
+ userId="me", maxResults=20,
288
+ ).execute()
289
+ msg_ids = [m["id"] for m in results.get("messages", [])]
290
+
291
+ # Update history ID for next cycle
292
+ profile = service.users().getProfile(userId="me").execute()
293
+ self._history_id = profile.get("historyId")
294
+
295
+ items = []
296
+ for msg_id in msg_ids:
297
+ if self._stop_event.is_set():
298
+ break
299
+ try:
300
+ msg = service.users().messages().get(
301
+ userId="me", id=msg_id, format="full",
302
+ ).execute()
303
+ headers = {h["name"]: h["value"] for h in msg.get("payload", {}).get("headers", [])}
304
+ subject = headers.get("Subject", "(no subject)")
305
+ from_addr = headers.get("From", "unknown")
306
+ date = headers.get("Date", "")
307
+
308
+ # Extract body from payload
309
+ body = self._extract_gmail_body(msg.get("payload", {}))
310
+ body = body[:3000] if body else ""
311
+ content = f"Email: {subject}\nFrom: {from_addr}\nDate: {date}\n\n{body}"
312
+
313
+ items.append(IngestItem(
314
+ content=content,
315
+ dedup_key=msg_id,
316
+ metadata={"subject": subject, "from": from_addr, "date": date, "source": "oauth"},
317
+ ))
318
+ except Exception as exc:
319
+ logger.debug("Gmail API fetch error for %s: %s", msg_id, exc)
320
+
321
+ return items
322
+
323
+ except ImportError:
324
+ logger.warning("Gmail OAuth requires: pip install 'superlocalmemory[ingestion]'")
325
+ return []
326
+ except Exception as exc:
327
+ logger.warning("Gmail OAuth polling failed: %s", exc)
328
+ return []
329
+
330
+ @staticmethod
331
+ def _extract_gmail_body(payload: dict) -> str:
332
+ """Extract plain text body from Gmail API payload."""
333
+ import base64
334
+
335
+ if payload.get("mimeType") == "text/plain":
336
+ data = payload.get("body", {}).get("data", "")
337
+ if data:
338
+ return base64.urlsafe_b64decode(data).decode("utf-8", errors="replace")
339
+
340
+ for part in payload.get("parts", []):
341
+ if part.get("mimeType") == "text/plain":
342
+ data = part.get("body", {}).get("data", "")
343
+ if data:
344
+ return base64.urlsafe_b64decode(data).decode("utf-8", errors="replace")
345
+ # Recurse into nested parts
346
+ if "parts" in part:
347
+ result = GmailAdapter._extract_gmail_body(part)
348
+ if result:
349
+ return result
350
+
351
+ return ""
352
+
353
+
354
+ # ---------------------------------------------------------------------------
355
+ # CLI entry point
356
+ # ---------------------------------------------------------------------------
357
+
358
+ if __name__ == "__main__":
359
+ import logging as _logging
360
+ _logging.basicConfig(level=_logging.INFO, format="%(asctime)s %(message)s")
361
+
362
+ adapters_path = Path.home() / ".superlocalmemory" / "adapters.json"
363
+ tier = "auto"
364
+ if adapters_path.exists():
365
+ cfg = json.loads(adapters_path.read_text()).get("gmail", {})
366
+ tier = cfg.get("tier", "auto")
367
+
368
+ adapter = GmailAdapter(tier=tier)
369
+ adapter.run()
@@ -0,0 +1,100 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the Elastic License 2.0 - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """Parsers for ingestion file formats: SRT, VTT, MBOX, ICS."""
6
+
7
+ from __future__ import annotations
8
+
9
+ import hashlib
10
+ import re
11
+ from pathlib import Path
12
+ from typing import NamedTuple
13
+
14
+
15
+ class Utterance(NamedTuple):
16
+ speaker: str
17
+ text: str
18
+ timestamp: str
19
+
20
+
21
+ def parse_srt(filepath: str | Path) -> list[Utterance]:
22
+ """Parse SubRip (.srt) file into utterances."""
23
+ content = Path(filepath).read_text(encoding="utf-8", errors="replace")
24
+ blocks = re.split(r"\n\n+", content.strip())
25
+ utterances = []
26
+ for block in blocks:
27
+ lines = block.strip().split("\n")
28
+ if len(lines) < 3:
29
+ continue
30
+ # Line 1: sequence number, Line 2: timestamps, Line 3+: text
31
+ timestamp = lines[1].strip() if len(lines) > 1 else ""
32
+ text = " ".join(lines[2:]).strip()
33
+ if text:
34
+ # Try to extract speaker from "Speaker: text" pattern
35
+ speaker, content_text = _extract_speaker(text)
36
+ utterances.append(Utterance(speaker=speaker, text=content_text, timestamp=timestamp))
37
+ return utterances
38
+
39
+
40
+ def parse_vtt(filepath: str | Path) -> list[Utterance]:
41
+ """Parse WebVTT (.vtt) file into utterances."""
42
+ content = Path(filepath).read_text(encoding="utf-8", errors="replace")
43
+ # Remove WEBVTT header
44
+ content = re.sub(r"^WEBVTT.*?\n\n", "", content, flags=re.DOTALL)
45
+ blocks = re.split(r"\n\n+", content.strip())
46
+ utterances = []
47
+ for block in blocks:
48
+ lines = block.strip().split("\n")
49
+ timestamp = ""
50
+ text_lines = []
51
+ for line in lines:
52
+ if "-->" in line:
53
+ timestamp = line.strip()
54
+ elif line.strip() and not line.strip().isdigit():
55
+ # Remove VTT tags like <v Speaker>
56
+ clean = re.sub(r"<v\s+([^>]+)>", r"\1: ", line)
57
+ clean = re.sub(r"<[^>]+>", "", clean).strip()
58
+ if clean:
59
+ text_lines.append(clean)
60
+ text = " ".join(text_lines)
61
+ if text:
62
+ speaker, content_text = _extract_speaker(text)
63
+ utterances.append(Utterance(speaker=speaker, text=content_text, timestamp=timestamp))
64
+ return utterances
65
+
66
+
67
+ def parse_transcript_file(filepath: str | Path) -> tuple[str, list[str]]:
68
+ """Parse any transcript file (.srt, .vtt, .txt).
69
+
70
+ Returns (combined_text, list_of_speakers).
71
+ """
72
+ path = Path(filepath)
73
+ suffix = path.suffix.lower()
74
+
75
+ if suffix == ".srt":
76
+ utterances = parse_srt(path)
77
+ elif suffix == ".vtt":
78
+ utterances = parse_vtt(path)
79
+ else:
80
+ # Plain text — treat entire file as one utterance
81
+ text = path.read_text(encoding="utf-8", errors="replace")
82
+ return text[:5000], []
83
+
84
+ speakers = list({u.speaker for u in utterances if u.speaker != "unknown"})
85
+ combined = "\n".join(f"[{u.speaker}] {u.text}" for u in utterances)
86
+ return combined[:5000], speakers
87
+
88
+
89
+ def content_hash(filepath: str | Path) -> str:
90
+ """SHA256 of file content (first 32 chars). Path-independent for dedup."""
91
+ content = Path(filepath).read_bytes()
92
+ return hashlib.sha256(content).hexdigest()[:32]
93
+
94
+
95
+ def _extract_speaker(text: str) -> tuple[str, str]:
96
+ """Extract speaker from 'Speaker: text' or 'Speaker Name: text' pattern."""
97
+ match = re.match(r"^([A-Z][a-zA-Z\s]{0,30}):\s*(.+)", text)
98
+ if match:
99
+ return match.group(1).strip(), match.group(2).strip()
100
+ return "unknown", text
@@ -0,0 +1,156 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the Elastic License 2.0 - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """Transcript ingestion adapter — watches for .srt/.vtt/.txt files.
6
+
7
+ Uses watchdog (cross-platform file watcher) to detect new transcript files.
8
+ Parses them, extracts speaker diarization, propagates entities, and POSTs
9
+ to the daemon's /ingest endpoint.
10
+
11
+ OPT-IN only. Enabled via: slm adapters enable transcript
12
+
13
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
14
+ License: Elastic-2.0
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import json
20
+ import logging
21
+ import sys
22
+ import time
23
+ from pathlib import Path
24
+
25
+ from superlocalmemory.ingestion.base_adapter import BaseAdapter, AdapterConfig, IngestItem
26
+
27
+ logger = logging.getLogger("superlocalmemory.ingestion.transcript")
28
+
29
+ _WATCH_EXTENSIONS = {".srt", ".vtt", ".txt"}
30
+
31
+
32
+ class TranscriptAdapter(BaseAdapter):
33
+ """Watches a directory for transcript files and ingests them."""
34
+
35
+ source_type = "transcript"
36
+
37
+ def __init__(self, watch_dir: str | Path, config: AdapterConfig | None = None):
38
+ super().__init__(config)
39
+ self._watch_dir = Path(watch_dir)
40
+ self._pending_files: list[Path] = []
41
+ self._observer = None
42
+
43
+ def run(self) -> None:
44
+ """Start file watcher then enter the base adapter loop."""
45
+ if not self._watch_dir.exists():
46
+ logger.error("Watch directory does not exist: %s", self._watch_dir)
47
+ return
48
+
49
+ # Start watchdog observer
50
+ try:
51
+ from watchdog.observers import Observer
52
+ from watchdog.events import FileSystemEventHandler
53
+
54
+ class _Handler(FileSystemEventHandler):
55
+ def __init__(self, adapter: TranscriptAdapter):
56
+ self._adapter = adapter
57
+
58
+ def on_created(self, event):
59
+ if event.is_directory:
60
+ return
61
+ path = Path(event.src_path)
62
+ if path.suffix.lower() in _WATCH_EXTENSIONS:
63
+ self._adapter._pending_files.append(path)
64
+
65
+ self._observer = Observer()
66
+ self._observer.schedule(_Handler(self), str(self._watch_dir), recursive=False)
67
+ self._observer.start()
68
+ logger.info("Watching for transcripts in: %s", self._watch_dir)
69
+ except ImportError:
70
+ logger.warning("watchdog not installed — polling mode only")
71
+
72
+ # Also scan for existing files on first run
73
+ for path in self._watch_dir.iterdir():
74
+ if path.suffix.lower() in _WATCH_EXTENSIONS and path.is_file():
75
+ self._pending_files.append(path)
76
+
77
+ super().run()
78
+
79
+ # Cleanup
80
+ if self._observer:
81
+ self._observer.stop()
82
+ self._observer.join()
83
+
84
+ def fetch_items(self) -> list[IngestItem]:
85
+ """Return pending transcript files as IngestItems."""
86
+ if not self._pending_files:
87
+ return []
88
+
89
+ items = []
90
+ batch = list(self._pending_files)
91
+ self._pending_files.clear()
92
+
93
+ for filepath in batch:
94
+ try:
95
+ from superlocalmemory.ingestion.parsers import (
96
+ parse_transcript_file, content_hash,
97
+ )
98
+ combined_text, speakers = parse_transcript_file(filepath)
99
+ dedup = content_hash(filepath)
100
+
101
+ # Main transcript ingestion
102
+ items.append(IngestItem(
103
+ content=f"Meeting transcript ({filepath.name}):\n{combined_text}",
104
+ dedup_key=dedup,
105
+ metadata={
106
+ "filename": filepath.name,
107
+ "speakers": speakers,
108
+ "source": "file_watcher",
109
+ },
110
+ ))
111
+
112
+ # Entity propagation: each speaker gets a timeline entry
113
+ for speaker in speakers:
114
+ items.append(IngestItem(
115
+ content=f"{speaker} participated in meeting: {filepath.stem}. "
116
+ f"Transcript file: {filepath.name}",
117
+ dedup_key=f"speaker-{speaker}-{dedup}",
118
+ metadata={
119
+ "entity_name": speaker,
120
+ "meeting_file": filepath.name,
121
+ "source": "entity_propagation",
122
+ },
123
+ ))
124
+
125
+ except Exception as exc:
126
+ logger.warning("Failed to parse %s: %s", filepath, exc)
127
+
128
+ return items
129
+
130
+ def wait_for_next_cycle(self) -> None:
131
+ """Wait 30s for new files (watchdog handles detection)."""
132
+ self._stop_event.wait(30)
133
+
134
+
135
+ # ---------------------------------------------------------------------------
136
+ # CLI entry point: python -m superlocalmemory.ingestion.transcript_adapter
137
+ # ---------------------------------------------------------------------------
138
+
139
+ if __name__ == "__main__":
140
+ import logging as _logging
141
+ _logging.basicConfig(level=_logging.INFO, format="%(asctime)s %(message)s")
142
+
143
+ # Load config
144
+ watch_dir = ""
145
+ adapters_path = Path.home() / ".superlocalmemory" / "adapters.json"
146
+ if adapters_path.exists():
147
+ cfg = json.loads(adapters_path.read_text())
148
+ watch_dir = cfg.get("transcript", {}).get("watch_dir", "")
149
+
150
+ if not watch_dir:
151
+ print("No watch_dir configured. Set it in ~/.superlocalmemory/adapters.json")
152
+ print(' {"transcript": {"enabled": true, "watch_dir": "/path/to/transcripts"}}')
153
+ sys.exit(1)
154
+
155
+ adapter = TranscriptAdapter(watch_dir=watch_dir)
156
+ adapter.run()