omnimemory-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,826 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import json
5
+ import os
6
+ import sqlite3
7
+ import subprocess
8
+ import time
9
+ import uuid
10
+ from dataclasses import dataclass
11
+ from datetime import datetime, timezone
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ SCHEMA_VERSION = "0.1.0"
16
+ LAYER_SET = {"instant", "short", "long", "archive"}
17
+ KIND_SET = {"note", "decision", "task", "checkpoint", "summary", "evidence"}
18
+ EVENT_SET = {
19
+ "memory.write",
20
+ "memory.update",
21
+ "memory.checkpoint",
22
+ "memory.promote",
23
+ "memory.verify",
24
+ "memory.sync",
25
+ }
26
+
27
+
28
+ @dataclass
29
+ class MemoryPaths:
30
+ root: Path
31
+ markdown_root: Path
32
+ jsonl_root: Path
33
+ sqlite_path: Path
34
+
35
+
36
+ def utc_now() -> str:
37
+ return datetime.now(timezone.utc).replace(microsecond=0).isoformat()
38
+
39
+
40
+ def make_id() -> str:
41
+ return uuid.uuid4().hex
42
+
43
+
44
+ def sha256_text(text: str) -> str:
45
+ return hashlib.sha256(text.encode("utf-8")).hexdigest()
46
+
47
+
48
+ def parse_list_csv(raw: str | None) -> list[str]:
49
+ if not raw:
50
+ return []
51
+ return [x.strip() for x in raw.split(",") if x.strip()]
52
+
53
+
54
+ def parse_ref(raw: str) -> dict[str, str]:
55
+ # type:target[:note]
56
+ parts = raw.split(":", 2)
57
+ if len(parts) < 2:
58
+ raise ValueError(f"invalid --ref format: {raw}")
59
+ obj: dict[str, str] = {"type": parts[0], "target": parts[1]}
60
+ if len(parts) == 3 and parts[2]:
61
+ obj["note"] = parts[2]
62
+ return obj
63
+
64
+
65
+ def load_config(path: Path | None) -> dict[str, Any]:
66
+ if path and path.exists():
67
+ return json.loads(path.read_text(encoding="utf-8"))
68
+
69
+ env_home = os.getenv("OMNIMEMORY_HOME")
70
+ if env_home:
71
+ default_cfg = Path(env_home) / "omnimemory.config.json"
72
+ else:
73
+ default_cfg = Path.home() / ".omnimemory" / "omnimemory.config.json"
74
+
75
+ if default_cfg.exists():
76
+ return json.loads(default_cfg.read_text(encoding="utf-8"))
77
+
78
+ root = default_cfg.parent
79
+ return {
80
+ "version": SCHEMA_VERSION,
81
+ "home": str(root),
82
+ "storage": {
83
+ "markdown": str(root / "data" / "markdown"),
84
+ "jsonl": str(root / "data" / "jsonl"),
85
+ "sqlite": str(root / "data" / "omnimemory.db"),
86
+ },
87
+ }
88
+
89
+
90
+ def default_config_path() -> Path:
91
+ env_home = os.getenv("OMNIMEMORY_HOME")
92
+ if env_home:
93
+ return Path(env_home).expanduser().resolve() / "omnimemory.config.json"
94
+ return Path.home().expanduser().resolve() / ".omnimemory" / "omnimemory.config.json"
95
+
96
+
97
+ def load_config_with_path(path: Path | None) -> tuple[dict[str, Any], Path]:
98
+ if path:
99
+ p = path.expanduser().resolve()
100
+ if p.exists():
101
+ return json.loads(p.read_text(encoding="utf-8")), p
102
+ return load_config(None), p
103
+
104
+ p = default_config_path()
105
+ if p.exists():
106
+ return json.loads(p.read_text(encoding="utf-8")), p
107
+ return load_config(None), p
108
+
109
+
110
+ def save_config(path: Path, cfg: dict[str, Any]) -> None:
111
+ path.parent.mkdir(parents=True, exist_ok=True)
112
+ path.write_text(json.dumps(cfg, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
113
+
114
+
115
+ def resolve_paths(cfg: dict[str, Any]) -> MemoryPaths:
116
+ home = Path(cfg.get("home", Path.cwd())).expanduser().resolve()
117
+ storage = cfg.get("storage", {})
118
+ markdown_root = Path(storage.get("markdown", home / "data" / "markdown")).expanduser().resolve()
119
+ jsonl_root = Path(storage.get("jsonl", home / "data" / "jsonl")).expanduser().resolve()
120
+ sqlite_path = Path(storage.get("sqlite", home / "data" / "omnimemory.db")).expanduser().resolve()
121
+ return MemoryPaths(root=home, markdown_root=markdown_root, jsonl_root=jsonl_root, sqlite_path=sqlite_path)
122
+
123
+
124
+ def ensure_storage(paths: MemoryPaths, schema_sql_path: Path) -> None:
125
+ for layer in sorted(LAYER_SET):
126
+ (paths.markdown_root / layer).mkdir(parents=True, exist_ok=True)
127
+ paths.jsonl_root.mkdir(parents=True, exist_ok=True)
128
+ paths.sqlite_path.parent.mkdir(parents=True, exist_ok=True)
129
+
130
+ with sqlite3.connect(paths.sqlite_path) as conn:
131
+ conn.executescript(schema_sql_path.read_text(encoding="utf-8"))
132
+
133
+
134
+ def ensure_system_memory(paths: MemoryPaths, schema_sql_path: Path) -> str:
135
+ ensure_storage(paths, schema_sql_path)
136
+ system_id = "system000"
137
+ rel_path = "archive/system/system000.md"
138
+ md_path = paths.markdown_root / rel_path
139
+ if not md_path.exists():
140
+ md_path.parent.mkdir(parents=True, exist_ok=True)
141
+ body = "# system\n\nreserved memory for system audit events\n"
142
+ md_path.write_text(body, encoding="utf-8")
143
+ else:
144
+ body = md_path.read_text(encoding="utf-8")
145
+
146
+ with sqlite3.connect(paths.sqlite_path) as conn:
147
+ conn.execute("PRAGMA foreign_keys = ON")
148
+ conn.execute(
149
+ """
150
+ INSERT OR IGNORE INTO memories(
151
+ id, schema_version, created_at, updated_at, layer, kind, summary, body_md_path, body_text,
152
+ tags_json, importance_score, confidence_score, stability_score, reuse_count, volatility_score,
153
+ cred_refs_json, source_json, scope_json, integrity_json
154
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
155
+ """,
156
+ (
157
+ system_id,
158
+ SCHEMA_VERSION,
159
+ utc_now(),
160
+ utc_now(),
161
+ "archive",
162
+ "summary",
163
+ "system",
164
+ rel_path,
165
+ body,
166
+ "[]",
167
+ 1.0,
168
+ 1.0,
169
+ 1.0,
170
+ 0,
171
+ 0.0,
172
+ "[]",
173
+ '{"tool":"system","session_id":"system"}',
174
+ '{"project_id":"global","workspace":""}',
175
+ json.dumps({"content_sha256": sha256_text(body), "envelope_version": 1}),
176
+ ),
177
+ )
178
+ conn.commit()
179
+
180
+ return system_id
181
+
182
+
183
+ def event_file_path(paths: MemoryPaths, when: datetime) -> Path:
184
+ return paths.jsonl_root / f"events-{when.strftime('%Y-%m')}.jsonl"
185
+
186
+
187
+ def md_rel_path(layer: str, mem_id: str, when: datetime) -> str:
188
+ return f"{layer}/{when.strftime('%Y/%m')}/{mem_id}.md"
189
+
190
+
191
+ def write_markdown(paths: MemoryPaths, rel_path: str, content: str) -> Path:
192
+ full = paths.markdown_root / rel_path
193
+ full.parent.mkdir(parents=True, exist_ok=True)
194
+ full.write_text(content, encoding="utf-8")
195
+ return full
196
+
197
+
198
+ def append_jsonl(path: Path, obj: dict[str, Any]) -> None:
199
+ with path.open("a", encoding="utf-8") as f:
200
+ f.write(json.dumps(obj, ensure_ascii=False) + "\n")
201
+
202
+
203
+ def insert_memory(conn: sqlite3.Connection, envelope: dict[str, Any], body_text: str) -> None:
204
+ sig = envelope["signals"]
205
+ conn.execute(
206
+ """
207
+ INSERT OR REPLACE INTO memories(
208
+ id, schema_version, created_at, updated_at, layer, kind, summary, body_md_path, body_text,
209
+ tags_json, importance_score, confidence_score, stability_score, reuse_count, volatility_score,
210
+ cred_refs_json, source_json, scope_json, integrity_json
211
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
212
+ """,
213
+ (
214
+ envelope["id"],
215
+ envelope["schema_version"],
216
+ envelope["created_at"],
217
+ envelope["updated_at"],
218
+ envelope["layer"],
219
+ envelope["kind"],
220
+ envelope["summary"],
221
+ envelope["body_md_path"],
222
+ body_text,
223
+ json.dumps(envelope["tags"], ensure_ascii=False),
224
+ float(sig["importance_score"]),
225
+ float(sig["confidence_score"]),
226
+ float(sig["stability_score"]),
227
+ int(sig["reuse_count"]),
228
+ float(sig["volatility_score"]),
229
+ json.dumps(envelope["cred_refs"], ensure_ascii=False),
230
+ json.dumps(envelope["source"], ensure_ascii=False),
231
+ json.dumps(envelope["scope"], ensure_ascii=False),
232
+ json.dumps(envelope["integrity"], ensure_ascii=False),
233
+ ),
234
+ )
235
+
236
+ conn.execute("DELETE FROM memory_refs WHERE memory_id = ?", (envelope["id"],))
237
+ for ref in envelope["refs"]:
238
+ conn.execute(
239
+ "INSERT INTO memory_refs(memory_id, ref_type, target, note) VALUES (?, ?, ?, ?)",
240
+ (envelope["id"], ref.get("type", "memory"), ref.get("target", ""), ref.get("note")),
241
+ )
242
+
243
+
244
+ def insert_event(conn: sqlite3.Connection, evt: dict[str, Any]) -> None:
245
+ conn.execute(
246
+ "INSERT OR REPLACE INTO memory_events(event_id, event_type, event_time, memory_id, payload_json) VALUES (?, ?, ?, ?, ?)",
247
+ (evt["event_id"], evt["event_type"], evt["event_time"], evt["memory_id"], json.dumps(evt["payload"], ensure_ascii=False)),
248
+ )
249
+
250
+
251
+ def log_system_event(paths: MemoryPaths, schema_sql_path: Path, event_type: str, payload: dict[str, Any]) -> None:
252
+ system_id = ensure_system_memory(paths, schema_sql_path)
253
+ evt = {
254
+ "event_id": make_id(),
255
+ "event_type": event_type,
256
+ "event_time": utc_now(),
257
+ "memory_id": system_id,
258
+ "payload": payload,
259
+ }
260
+ append_jsonl(event_file_path(paths, datetime.now(timezone.utc)), evt)
261
+ with sqlite3.connect(paths.sqlite_path) as conn:
262
+ conn.execute("PRAGMA foreign_keys = ON")
263
+ insert_event(conn, evt)
264
+ conn.commit()
265
+
266
+
267
+ def reindex_from_jsonl(paths: MemoryPaths, schema_sql_path: Path, reset: bool = True) -> dict[str, Any]:
268
+ ensure_storage(paths, schema_sql_path)
269
+ system_id = ensure_system_memory(paths, schema_sql_path)
270
+ files = sorted(paths.jsonl_root.glob("events-*.jsonl"))
271
+ parsed_events = 0
272
+ indexed_memories = 0
273
+ skipped_events = 0
274
+
275
+ with sqlite3.connect(paths.sqlite_path) as conn:
276
+ conn.execute("PRAGMA foreign_keys = ON")
277
+ if reset:
278
+ conn.execute("DELETE FROM memory_events")
279
+ conn.execute("DELETE FROM memory_refs")
280
+ conn.execute("DELETE FROM memories WHERE id != ?", (system_id,))
281
+
282
+ for fp in files:
283
+ for line in fp.read_text(encoding="utf-8").splitlines():
284
+ if not line.strip():
285
+ continue
286
+ parsed_events += 1
287
+ try:
288
+ evt = json.loads(line)
289
+ except json.JSONDecodeError:
290
+ skipped_events += 1
291
+ continue
292
+
293
+ memory_id = evt.get("memory_id", system_id)
294
+ if evt.get("event_type") not in EVENT_SET:
295
+ skipped_events += 1
296
+ continue
297
+
298
+ payload = evt.get("payload", {})
299
+ env = payload.get("envelope")
300
+ if isinstance(env, dict):
301
+ rel = env.get("body_md_path", "")
302
+ body = ""
303
+ if rel:
304
+ mdp = paths.markdown_root / rel
305
+ if mdp.exists():
306
+ body = mdp.read_text(encoding="utf-8")
307
+ try:
308
+ insert_memory(conn, env, body)
309
+ indexed_memories += 1
310
+ except Exception:
311
+ skipped_events += 1
312
+ continue
313
+
314
+ # Keep foreign key intact for system-level events or legacy lines.
315
+ evt["memory_id"] = memory_id if memory_id else system_id
316
+ try:
317
+ insert_event(conn, evt)
318
+ except Exception:
319
+ skipped_events += 1
320
+ continue
321
+
322
+ conn.commit()
323
+
324
+ result = {
325
+ "ok": True,
326
+ "reset": reset,
327
+ "jsonl_files": len(files),
328
+ "events_parsed": parsed_events,
329
+ "memories_indexed": indexed_memories,
330
+ "events_skipped": skipped_events,
331
+ }
332
+ log_system_event(paths, schema_sql_path, "memory.update", {"action": "reindex", **result})
333
+ return result
334
+
335
+
336
+ def build_envelope(
337
+ *,
338
+ mem_id: str,
339
+ when_iso: str,
340
+ layer: str,
341
+ kind: str,
342
+ summary: str,
343
+ body_md_path: str,
344
+ tags: list[str],
345
+ refs: list[dict[str, str]],
346
+ cred_refs: list[str],
347
+ tool: str,
348
+ account: str,
349
+ device: str,
350
+ session_id: str,
351
+ project_id: str,
352
+ workspace: str,
353
+ importance: float,
354
+ confidence: float,
355
+ stability: float,
356
+ reuse_count: int,
357
+ volatility: float,
358
+ content_sha256: str,
359
+ ) -> dict[str, Any]:
360
+ if layer not in LAYER_SET:
361
+ raise ValueError(f"invalid layer: {layer}")
362
+ if kind not in KIND_SET:
363
+ raise ValueError(f"invalid kind: {kind}")
364
+
365
+ return {
366
+ "id": mem_id,
367
+ "schema_version": SCHEMA_VERSION,
368
+ "created_at": when_iso,
369
+ "updated_at": when_iso,
370
+ "layer": layer,
371
+ "kind": kind,
372
+ "summary": summary,
373
+ "body_md_path": body_md_path,
374
+ "tags": tags,
375
+ "refs": refs,
376
+ "signals": {
377
+ "importance_score": importance,
378
+ "confidence_score": confidence,
379
+ "stability_score": stability,
380
+ "reuse_count": reuse_count,
381
+ "volatility_score": volatility,
382
+ },
383
+ "cred_refs": cred_refs,
384
+ "source": {
385
+ "tool": tool,
386
+ "account": account,
387
+ "device": device,
388
+ "session_id": session_id,
389
+ },
390
+ "scope": {
391
+ "project_id": project_id,
392
+ "workspace": workspace,
393
+ },
394
+ "integrity": {
395
+ "content_sha256": content_sha256,
396
+ "envelope_version": 1,
397
+ },
398
+ }
399
+
400
+
401
+ def write_memory(
402
+ *,
403
+ paths: MemoryPaths,
404
+ schema_sql_path: Path,
405
+ layer: str,
406
+ kind: str,
407
+ summary: str,
408
+ body: str,
409
+ tags: list[str],
410
+ refs: list[dict[str, str]],
411
+ cred_refs: list[str],
412
+ tool: str,
413
+ account: str,
414
+ device: str,
415
+ session_id: str,
416
+ project_id: str,
417
+ workspace: str,
418
+ importance: float,
419
+ confidence: float,
420
+ stability: float,
421
+ reuse_count: int,
422
+ volatility: float,
423
+ event_type: str,
424
+ ) -> dict[str, Any]:
425
+ ensure_storage(paths, schema_sql_path)
426
+ if event_type not in EVENT_SET:
427
+ raise ValueError(f"invalid event_type: {event_type}")
428
+
429
+ when_dt = datetime.now(timezone.utc)
430
+ when_iso = when_dt.replace(microsecond=0).isoformat()
431
+ mem_id = make_id()
432
+ rel_path = md_rel_path(layer, mem_id, when_dt)
433
+ body_md = f"# {summary}\n\n{body.strip()}\n"
434
+ write_markdown(paths, rel_path, body_md)
435
+
436
+ env = build_envelope(
437
+ mem_id=mem_id,
438
+ when_iso=when_iso,
439
+ layer=layer,
440
+ kind=kind,
441
+ summary=summary,
442
+ body_md_path=rel_path,
443
+ tags=tags,
444
+ refs=refs,
445
+ cred_refs=cred_refs,
446
+ tool=tool,
447
+ account=account,
448
+ device=device,
449
+ session_id=session_id,
450
+ project_id=project_id,
451
+ workspace=workspace,
452
+ importance=importance,
453
+ confidence=confidence,
454
+ stability=stability,
455
+ reuse_count=reuse_count,
456
+ volatility=volatility,
457
+ content_sha256=sha256_text(body_md),
458
+ )
459
+
460
+ evt = {
461
+ "event_id": make_id(),
462
+ "event_type": event_type,
463
+ "event_time": when_iso,
464
+ "memory_id": mem_id,
465
+ "payload": {
466
+ "summary": summary,
467
+ "layer": layer,
468
+ "kind": kind,
469
+ "body_md_path": rel_path,
470
+ "envelope": env,
471
+ },
472
+ }
473
+
474
+ append_jsonl(event_file_path(paths, when_dt), evt)
475
+
476
+ with sqlite3.connect(paths.sqlite_path) as conn:
477
+ conn.execute("PRAGMA foreign_keys = ON")
478
+ insert_memory(conn, env, body_md)
479
+ insert_event(conn, evt)
480
+ conn.commit()
481
+
482
+ return {"memory": env, "event": evt}
483
+
484
+
485
+ def find_memories(paths: MemoryPaths, schema_sql_path: Path, query: str, layer: str | None, limit: int) -> list[dict[str, Any]]:
486
+ ensure_storage(paths, schema_sql_path)
487
+ with sqlite3.connect(paths.sqlite_path) as conn:
488
+ conn.row_factory = sqlite3.Row
489
+ if query:
490
+ if layer:
491
+ rows = conn.execute(
492
+ """
493
+ SELECT m.id, m.layer, m.kind, m.summary, m.updated_at, m.body_md_path
494
+ FROM memories_fts f
495
+ JOIN memories m ON m.id = f.id
496
+ WHERE f.memories_fts MATCH ? AND m.layer = ?
497
+ ORDER BY bm25(memories_fts), m.updated_at DESC
498
+ LIMIT ?
499
+ """,
500
+ (query, layer, limit),
501
+ ).fetchall()
502
+ else:
503
+ rows = conn.execute(
504
+ """
505
+ SELECT m.id, m.layer, m.kind, m.summary, m.updated_at, m.body_md_path
506
+ FROM memories_fts f
507
+ JOIN memories m ON m.id = f.id
508
+ WHERE f.memories_fts MATCH ?
509
+ ORDER BY bm25(memories_fts), m.updated_at DESC
510
+ LIMIT ?
511
+ """,
512
+ (query, limit),
513
+ ).fetchall()
514
+ else:
515
+ if layer:
516
+ rows = conn.execute(
517
+ "SELECT id, layer, kind, summary, updated_at, body_md_path FROM memories WHERE layer = ? ORDER BY updated_at DESC LIMIT ?",
518
+ (layer, limit),
519
+ ).fetchall()
520
+ else:
521
+ rows = conn.execute(
522
+ "SELECT id, layer, kind, summary, updated_at, body_md_path FROM memories ORDER BY updated_at DESC LIMIT ?",
523
+ (limit,),
524
+ ).fetchall()
525
+
526
+ return [dict(r) for r in rows]
527
+
528
+
529
+ def build_brief(paths: MemoryPaths, schema_sql_path: Path, project_id: str, limit: int) -> dict[str, Any]:
530
+ ensure_storage(paths, schema_sql_path)
531
+ with sqlite3.connect(paths.sqlite_path) as conn:
532
+ conn.row_factory = sqlite3.Row
533
+ recent = conn.execute(
534
+ """
535
+ SELECT id, layer, kind, summary, updated_at, body_md_path
536
+ FROM memories
537
+ WHERE json_extract(scope_json, '$.project_id') = ? OR ? = ''
538
+ ORDER BY updated_at DESC
539
+ LIMIT ?
540
+ """,
541
+ (project_id, project_id, limit),
542
+ ).fetchall()
543
+
544
+ checkpoints = conn.execute(
545
+ """
546
+ SELECT id, summary, updated_at
547
+ FROM memories
548
+ WHERE kind = 'checkpoint' AND (json_extract(scope_json, '$.project_id') = ? OR ? = '')
549
+ ORDER BY updated_at DESC
550
+ LIMIT 3
551
+ """,
552
+ (project_id, project_id),
553
+ ).fetchall()
554
+
555
+ return {
556
+ "project_id": project_id,
557
+ "recent": [dict(r) for r in recent],
558
+ "checkpoints": [dict(r) for r in checkpoints],
559
+ }
560
+
561
+
562
+ def verify_storage(paths: MemoryPaths, schema_sql_path: Path) -> dict[str, Any]:
563
+ ensure_storage(paths, schema_sql_path)
564
+ ensure_system_memory(paths, schema_sql_path)
565
+ issues: list[str] = []
566
+
567
+ with sqlite3.connect(paths.sqlite_path) as conn:
568
+ conn.row_factory = sqlite3.Row
569
+ table_count = conn.execute("SELECT count(*) FROM sqlite_master WHERE type IN ('table','view')").fetchone()[0]
570
+ rows = conn.execute("SELECT id, body_md_path, integrity_json FROM memories ORDER BY updated_at DESC").fetchall()
571
+
572
+ checked = 0
573
+ for row in rows:
574
+ checked += 1
575
+ md_path = paths.markdown_root / row["body_md_path"]
576
+ if not md_path.exists():
577
+ issues.append(f"missing_markdown:{row['id']}:{row['body_md_path']}")
578
+ continue
579
+
580
+ data = md_path.read_text(encoding="utf-8")
581
+ expected = json.loads(row["integrity_json"]).get("content_sha256", "")
582
+ actual = sha256_text(data)
583
+ if expected != actual:
584
+ issues.append(f"hash_mismatch:{row['id']}")
585
+
586
+ jsonl_count = 0
587
+ bad_jsonl = 0
588
+ for fp in sorted(paths.jsonl_root.glob("events-*.jsonl")):
589
+ for line in fp.read_text(encoding="utf-8").splitlines():
590
+ if not line.strip():
591
+ continue
592
+ jsonl_count += 1
593
+ try:
594
+ obj = json.loads(line)
595
+ if obj.get("event_type") not in EVENT_SET:
596
+ bad_jsonl += 1
597
+ except json.JSONDecodeError:
598
+ bad_jsonl += 1
599
+
600
+ if bad_jsonl:
601
+ issues.append(f"jsonl_invalid_lines:{bad_jsonl}")
602
+
603
+ result = {
604
+ "ok": len(issues) == 0,
605
+ "sqlite_table_view_count": table_count,
606
+ "memory_rows_checked": checked,
607
+ "jsonl_events_checked": jsonl_count,
608
+ "issues": issues,
609
+ }
610
+
611
+ log_system_event(
612
+ paths,
613
+ schema_sql_path,
614
+ "memory.verify",
615
+ {
616
+ "ok": result["ok"],
617
+ "issues": issues,
618
+ "memory_rows_checked": checked,
619
+ "jsonl_events_checked": jsonl_count,
620
+ },
621
+ )
622
+
623
+ return result
624
+
625
+
626
+ def _run_git(paths: MemoryPaths, args: list[str]) -> subprocess.CompletedProcess[str]:
627
+ return subprocess.run(
628
+ ["git", "-C", str(paths.root), *args],
629
+ check=True,
630
+ capture_output=True,
631
+ text=True,
632
+ )
633
+
634
+
635
+ def _ensure_git_repo(paths: MemoryPaths) -> None:
636
+ if not (paths.root / ".git").exists():
637
+ _run_git(paths, ["init"])
638
+
639
+
640
+ def _ensure_remote(paths: MemoryPaths, remote_name: str, remote_url: str | None) -> None:
641
+ remotes = _run_git(paths, ["remote"]).stdout.split()
642
+ if remote_url:
643
+ if remote_name in remotes:
644
+ _run_git(paths, ["remote", "set-url", remote_name, remote_url])
645
+ else:
646
+ _run_git(paths, ["remote", "add", remote_name, remote_url])
647
+
648
+
649
+ def sync_placeholder(
650
+ paths: MemoryPaths,
651
+ schema_sql_path: Path,
652
+ mode: str,
653
+ remote_name: str = "origin",
654
+ branch: str = "main",
655
+ remote_url: str | None = None,
656
+ commit_message: str = "chore(memory): sync snapshot",
657
+ log_event: bool = True,
658
+ ) -> dict[str, Any]:
659
+ ensure_system_memory(paths, schema_sql_path)
660
+ if mode == "noop":
661
+ message = "sync placeholder: noop"
662
+ ok = True
663
+ detail = ""
664
+ elif mode in {"git", "github-status"}:
665
+ try:
666
+ _ensure_git_repo(paths)
667
+ proc = _run_git(paths, ["status", "--short"])
668
+ message = "github status ok"
669
+ ok = True
670
+ detail = proc.stdout.strip()
671
+ except Exception as exc: # pragma: no cover
672
+ message = f"github status failed ({exc})"
673
+ ok = False
674
+ detail = ""
675
+ elif mode == "github-push":
676
+ try:
677
+ _ensure_git_repo(paths)
678
+ _ensure_remote(paths, remote_name, remote_url)
679
+ _run_git(paths, ["add", "-A"])
680
+ commit_proc = subprocess.run(
681
+ ["git", "-C", str(paths.root), "commit", "-m", commit_message],
682
+ check=False,
683
+ capture_output=True,
684
+ text=True,
685
+ )
686
+ if commit_proc.returncode != 0 and "nothing to commit" not in commit_proc.stdout + commit_proc.stderr:
687
+ raise RuntimeError(commit_proc.stderr.strip() or commit_proc.stdout.strip() or "git commit failed")
688
+ if remote_url or remote_name in _run_git(paths, ["remote"]).stdout.split():
689
+ _run_git(paths, ["push", "-u", remote_name, branch])
690
+ message = "github push ok"
691
+ else:
692
+ message = "local commit ok; remote not configured"
693
+ ok = True
694
+ detail = _run_git(paths, ["status", "--short"]).stdout.strip()
695
+ except Exception as exc: # pragma: no cover
696
+ message = f"github push failed ({exc})"
697
+ ok = False
698
+ detail = ""
699
+ elif mode == "github-pull":
700
+ try:
701
+ _ensure_git_repo(paths)
702
+ _ensure_remote(paths, remote_name, remote_url)
703
+ _run_git(paths, ["fetch", remote_name, branch])
704
+ _run_git(paths, ["pull", "--rebase", remote_name, branch])
705
+ message = "github pull ok"
706
+ ok = True
707
+ detail = _run_git(paths, ["status", "--short"]).stdout.strip()
708
+ except Exception as exc: # pragma: no cover
709
+ message = f"github pull failed ({exc})"
710
+ ok = False
711
+ detail = ""
712
+ elif mode == "github-bootstrap":
713
+ pull_out = sync_placeholder(
714
+ paths,
715
+ schema_sql_path,
716
+ "github-pull",
717
+ remote_name=remote_name,
718
+ branch=branch,
719
+ remote_url=remote_url,
720
+ commit_message=commit_message,
721
+ log_event=False,
722
+ )
723
+ reindex_out = reindex_from_jsonl(paths, schema_sql_path, reset=True)
724
+ push_out = sync_placeholder(
725
+ paths,
726
+ schema_sql_path,
727
+ "github-push",
728
+ remote_name=remote_name,
729
+ branch=branch,
730
+ remote_url=remote_url,
731
+ commit_message=commit_message,
732
+ log_event=False,
733
+ )
734
+ ok = bool(pull_out.get("ok") and reindex_out.get("ok") and push_out.get("ok"))
735
+ message = "github bootstrap ok" if ok else "github bootstrap finished with errors"
736
+ detail = {"pull": pull_out, "reindex": reindex_out, "push": push_out}
737
+ else:
738
+ raise ValueError("mode must be one of: noop, git, github-status, github-push, github-pull, github-bootstrap")
739
+
740
+ should_log_event = log_event and mode in {"noop", "git", "github-status"}
741
+ if should_log_event:
742
+ log_system_event(
743
+ paths,
744
+ schema_sql_path,
745
+ "memory.sync",
746
+ {"mode": mode, "ok": ok, "message": message, "remote_name": remote_name, "branch": branch},
747
+ )
748
+
749
+ out: dict[str, Any] = {"ok": ok, "mode": mode, "message": message}
750
+ if mode in {"git", "github-status", "github-push", "github-pull", "github-bootstrap"}:
751
+ out["detail"] = detail
752
+ return out
753
+
754
+
755
+ def latest_content_mtime(paths: MemoryPaths) -> float:
756
+ latest = 0.0
757
+ for root in [paths.markdown_root, paths.jsonl_root]:
758
+ if not root.exists():
759
+ continue
760
+ for base, _, files in os.walk(root):
761
+ for name in files:
762
+ p = Path(base) / name
763
+ try:
764
+ mt = p.stat().st_mtime
765
+ except FileNotFoundError:
766
+ continue
767
+ if mt > latest:
768
+ latest = mt
769
+ return latest
770
+
771
+
772
+ def run_sync_daemon(
773
+ *,
774
+ paths: MemoryPaths,
775
+ schema_sql_path: Path,
776
+ remote_name: str,
777
+ branch: str,
778
+ remote_url: str | None,
779
+ scan_interval: int,
780
+ pull_interval: int,
781
+ once: bool = False,
782
+ ) -> dict[str, Any]:
783
+ ensure_storage(paths, schema_sql_path)
784
+ ensure_system_memory(paths, schema_sql_path)
785
+ last_seen = latest_content_mtime(paths)
786
+ last_pull = 0.0
787
+ cycles = 0
788
+
789
+ while True:
790
+ cycles += 1
791
+ now = time.time()
792
+
793
+ if now - last_pull >= pull_interval:
794
+ sync_placeholder(
795
+ paths,
796
+ schema_sql_path,
797
+ "github-pull",
798
+ remote_name=remote_name,
799
+ branch=branch,
800
+ remote_url=remote_url,
801
+ log_event=False,
802
+ )
803
+ reindex_from_jsonl(paths, schema_sql_path, reset=True)
804
+ last_pull = now
805
+ last_seen = latest_content_mtime(paths)
806
+
807
+ current_seen = latest_content_mtime(paths)
808
+ if current_seen > last_seen:
809
+ sync_placeholder(
810
+ paths,
811
+ schema_sql_path,
812
+ "github-push",
813
+ remote_name=remote_name,
814
+ branch=branch,
815
+ remote_url=remote_url,
816
+ log_event=False,
817
+ )
818
+ last_seen = current_seen
819
+
820
+ if once:
821
+ break
822
+ time.sleep(max(1, scan_interval))
823
+
824
+ result = {"ok": True, "cycles": cycles, "mode": "once" if once else "daemon"}
825
+ log_system_event(paths, schema_sql_path, "memory.sync", {"daemon": result})
826
+ return result