@simbimbo/memory-ocmemog 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/CHANGELOG.md +59 -0
  2. package/LICENSE +21 -0
  3. package/README.md +223 -0
  4. package/brain/__init__.py +1 -0
  5. package/brain/runtime/__init__.py +13 -0
  6. package/brain/runtime/config.py +21 -0
  7. package/brain/runtime/inference.py +83 -0
  8. package/brain/runtime/instrumentation.py +17 -0
  9. package/brain/runtime/memory/__init__.py +13 -0
  10. package/brain/runtime/memory/api.py +152 -0
  11. package/brain/runtime/memory/artifacts.py +33 -0
  12. package/brain/runtime/memory/candidate.py +89 -0
  13. package/brain/runtime/memory/context_builder.py +87 -0
  14. package/brain/runtime/memory/conversation_state.py +1825 -0
  15. package/brain/runtime/memory/distill.py +198 -0
  16. package/brain/runtime/memory/embedding_engine.py +94 -0
  17. package/brain/runtime/memory/freshness.py +91 -0
  18. package/brain/runtime/memory/health.py +42 -0
  19. package/brain/runtime/memory/integrity.py +170 -0
  20. package/brain/runtime/memory/interaction_memory.py +57 -0
  21. package/brain/runtime/memory/memory_consolidation.py +60 -0
  22. package/brain/runtime/memory/memory_gate.py +38 -0
  23. package/brain/runtime/memory/memory_graph.py +54 -0
  24. package/brain/runtime/memory/memory_links.py +109 -0
  25. package/brain/runtime/memory/memory_salience.py +235 -0
  26. package/brain/runtime/memory/memory_synthesis.py +33 -0
  27. package/brain/runtime/memory/memory_taxonomy.py +35 -0
  28. package/brain/runtime/memory/person_identity.py +83 -0
  29. package/brain/runtime/memory/person_memory.py +138 -0
  30. package/brain/runtime/memory/pondering_engine.py +577 -0
  31. package/brain/runtime/memory/promote.py +237 -0
  32. package/brain/runtime/memory/provenance.py +356 -0
  33. package/brain/runtime/memory/reinforcement.py +73 -0
  34. package/brain/runtime/memory/retrieval.py +153 -0
  35. package/brain/runtime/memory/semantic_search.py +66 -0
  36. package/brain/runtime/memory/sentiment_memory.py +67 -0
  37. package/brain/runtime/memory/store.py +400 -0
  38. package/brain/runtime/memory/tool_catalog.py +68 -0
  39. package/brain/runtime/memory/unresolved_state.py +93 -0
  40. package/brain/runtime/memory/vector_index.py +270 -0
  41. package/brain/runtime/model_roles.py +11 -0
  42. package/brain/runtime/model_router.py +22 -0
  43. package/brain/runtime/providers.py +59 -0
  44. package/brain/runtime/security/__init__.py +3 -0
  45. package/brain/runtime/security/redaction.py +14 -0
  46. package/brain/runtime/state_store.py +25 -0
  47. package/brain/runtime/storage_paths.py +41 -0
  48. package/docs/architecture/memory.md +118 -0
  49. package/docs/release-checklist.md +34 -0
  50. package/docs/reports/ocmemog-code-audit-2026-03-14.md +155 -0
  51. package/docs/usage.md +223 -0
  52. package/index.ts +726 -0
  53. package/ocmemog/__init__.py +1 -0
  54. package/ocmemog/sidecar/__init__.py +1 -0
  55. package/ocmemog/sidecar/app.py +1068 -0
  56. package/ocmemog/sidecar/compat.py +74 -0
  57. package/ocmemog/sidecar/transcript_watcher.py +425 -0
  58. package/openclaw.plugin.json +18 -0
  59. package/package.json +60 -0
  60. package/scripts/install-ocmemog.sh +277 -0
  61. package/scripts/launchagents/com.openclaw.ocmemog.guard.plist +22 -0
  62. package/scripts/launchagents/com.openclaw.ocmemog.ponder.plist +22 -0
  63. package/scripts/launchagents/com.openclaw.ocmemog.sidecar.plist +27 -0
  64. package/scripts/ocmemog-context.sh +15 -0
  65. package/scripts/ocmemog-continuity-benchmark.py +178 -0
  66. package/scripts/ocmemog-demo.py +122 -0
  67. package/scripts/ocmemog-failover-test.sh +17 -0
  68. package/scripts/ocmemog-guard.sh +11 -0
  69. package/scripts/ocmemog-install.sh +93 -0
  70. package/scripts/ocmemog-load-test.py +106 -0
  71. package/scripts/ocmemog-ponder.sh +30 -0
  72. package/scripts/ocmemog-recall-test.py +58 -0
  73. package/scripts/ocmemog-reindex-vectors.py +14 -0
  74. package/scripts/ocmemog-reliability-soak.py +177 -0
  75. package/scripts/ocmemog-sidecar.sh +46 -0
  76. package/scripts/ocmemog-soak-report.py +58 -0
  77. package/scripts/ocmemog-soak-test.py +44 -0
  78. package/scripts/ocmemog-test-rig.py +345 -0
  79. package/scripts/ocmemog-transcript-append.py +45 -0
  80. package/scripts/ocmemog-transcript-watcher.py +8 -0
  81. package/scripts/ocmemog-transcript-watcher.sh +7 -0
@@ -0,0 +1,345 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import argparse
5
+ import json
6
+ import os
7
+ import random
8
+ import re
9
+ import sys
10
+ import time
11
+ from pathlib import Path
12
+ from typing import Iterable
13
+ from urllib import request as urlrequest
14
+
15
+ DEFAULT_ENDPOINT = "http://127.0.0.1:17890"
16
+ DEFAULT_EXTS = {".md", ".txt", ".log", ".jsonl"}
17
+ DEFAULT_SKIP_DIRS = {
18
+ ".git",
19
+ "node_modules",
20
+ ".venv",
21
+ "dist",
22
+ "build",
23
+ "__pycache__",
24
+ ".DS_Store",
25
+ "Library",
26
+ ".Trash",
27
+ ".cache",
28
+ ".openclaw/logs",
29
+ }
30
+
31
+ ASYNC_DEFAULT = os.environ.get("OCMEMOG_INGEST_ASYNC_DEFAULT", "true").lower() in {"1", "true", "yes"}
32
+ INGEST_PATH = "/memory/ingest_async" if ASYNC_DEFAULT else "/memory/ingest"
33
+ REPO_ROOT = Path(__file__).resolve().parent.parent
34
+
35
+
36
+ def _post_json(endpoint: str, path: str, payload: dict, *, timeout: int = 20) -> dict:
37
+ data = json.dumps(payload).encode("utf-8")
38
+ req = urlrequest.Request(endpoint.rstrip("/") + path, data=data, method="POST")
39
+ req.add_header("Content-Type", "application/json")
40
+ with urlrequest.urlopen(req, timeout=timeout) as resp:
41
+ body = resp.read().decode("utf-8")
42
+ try:
43
+ return json.loads(body)
44
+ except Exception:
45
+ return {"ok": False, "raw": body}
46
+
47
+
48
+ def _chunk_text(text: str, max_len: int = 800) -> list[str]:
49
+ chunks: list[str] = []
50
+ buf: list[str] = []
51
+ for line in text.splitlines():
52
+ if line.strip().startswith("```"):
53
+ continue
54
+ if not line.strip():
55
+ if buf:
56
+ chunk = " ".join(buf).strip()
57
+ if len(chunk) >= 40:
58
+ chunks.append(chunk)
59
+ buf = []
60
+ continue
61
+ buf.append(line.strip())
62
+ if buf:
63
+ chunk = " ".join(buf).strip()
64
+ if len(chunk) >= 40:
65
+ chunks.append(chunk)
66
+ trimmed: list[str] = []
67
+ for c in chunks:
68
+ if len(c) > max_len:
69
+ mid = len(c) // 2
70
+ trimmed.append(c[:mid].strip())
71
+ trimmed.append(c[mid:].strip())
72
+ else:
73
+ trimmed.append(c)
74
+ return [c for c in trimmed if c]
75
+
76
+
77
+ def _classify_bucket(text: str) -> str:
78
+ head = text.lower()
79
+ if "runbook" in head or "procedure" in head or "steps" in head:
80
+ return "runbooks"
81
+ if "lesson" in head or "postmortem" in head or "learned" in head:
82
+ return "lessons"
83
+ if "todo" in head or "next steps" in head or "task" in head:
84
+ return "tasks"
85
+ if "directive" in head or "rule" in head:
86
+ return "directives"
87
+ if "reflection" in head:
88
+ return "reflections"
89
+ return "knowledge"
90
+
91
+
92
+ def _walk_sources(roots: Iterable[Path], exts: set[str], max_files: int, max_size_kb: int) -> list[Path]:
93
+ files: list[Path] = []
94
+ for root in roots:
95
+ if not root.exists():
96
+ continue
97
+ for dirpath, dirnames, filenames in os.walk(root):
98
+ rel = os.path.relpath(dirpath, root)
99
+ dirnames[:] = [d for d in dirnames if d not in DEFAULT_SKIP_DIRS]
100
+ if any(part in DEFAULT_SKIP_DIRS for part in rel.split(os.sep)):
101
+ continue
102
+ for name in filenames:
103
+ path = Path(dirpath) / name
104
+ if path.suffix.lower() not in exts:
105
+ continue
106
+ try:
107
+ size_kb = path.stat().st_size / 1024
108
+ except Exception:
109
+ continue
110
+ if size_kb > max_size_kb:
111
+ continue
112
+ files.append(path)
113
+ if len(files) >= max_files:
114
+ return files
115
+ return files
116
+
117
+
118
+ def _sample_query(text: str) -> str:
119
+ words = re.findall(r"[A-Za-z][A-Za-z0-9\-]{3,}", text)
120
+ if not words:
121
+ return "memory"
122
+ pick = random.sample(words, k=min(3, len(words)))
123
+ return " ".join(pick)
124
+
125
+
126
+ def _choose_threshold(confidences: list[float], reject_fraction: float) -> float | None:
127
+ if not confidences:
128
+ return None
129
+ ordered = sorted(confidences)
130
+ idx = max(0, min(len(ordered) - 1, int((1.0 - reject_fraction) * len(ordered))))
131
+ return float(ordered[idx])
132
+
133
+
134
+ def _distill_batches(endpoint: str, target: int, batch_sizes: list[int], timeout: int, budget_s: int) -> dict:
135
+ attempts = []
136
+ total = 0
137
+ start = time.time()
138
+ for batch in batch_sizes:
139
+ if time.time() - start > budget_s:
140
+ break
141
+ try:
142
+ resp = _post_json(endpoint, "/memory/distill", {"limit": batch}, timeout=timeout)
143
+ except Exception as exc:
144
+ resp = {"ok": False, "error": str(exc)}
145
+ count = resp.get("count") if isinstance(resp, dict) else None
146
+ if isinstance(count, int):
147
+ total += count
148
+ attempts.append({"batch": batch, "ok": resp.get("ok"), "count": count, "error": resp.get("error")})
149
+ if total >= target:
150
+ break
151
+ return {"attempts": attempts, "total": total, "elapsed_s": round(time.time() - start, 3)}
152
+
153
+
154
+ def _enable_local_embeddings() -> None:
155
+ os.environ.setdefault("BRAIN_EMBED_MODEL_LOCAL", "")
156
+ os.environ.setdefault("BRAIN_EMBED_MODEL_PROVIDER", "ollama")
157
+ os.environ.setdefault("OCMEMOG_OLLAMA_EMBED_MODEL", "nomic-embed-text:latest")
158
+
159
+
160
+ def main() -> int:
161
+ parser = argparse.ArgumentParser()
162
+ parser.add_argument("--endpoint", default=DEFAULT_ENDPOINT)
163
+ parser.add_argument("--max-files", type=int, default=600)
164
+ parser.add_argument("--max-chunks", type=int, default=1200)
165
+ parser.add_argument("--max-size-kb", type=int, default=512)
166
+ parser.add_argument("--experience-count", type=int, default=250)
167
+ parser.add_argument("--distill-target", type=int, default=60)
168
+ parser.add_argument("--distill-batches", default="10")
169
+ parser.add_argument("--distill-timeout", type=int, default=45)
170
+ parser.add_argument("--distill-budget", type=int, default=120)
171
+ parser.add_argument("--query-samples", type=int, default=40)
172
+ parser.add_argument("--promote-limit", type=int, default=60)
173
+ parser.add_argument("--promotion-reject-rate", type=float, default=0.1)
174
+ parser.add_argument("--promotion-shadow", action="store_true")
175
+ parser.add_argument("--demote-limit", type=int, default=20)
176
+ parser.add_argument("--demote-threshold", type=float, default=0.2)
177
+ parser.add_argument("--demote-force", action="store_true")
178
+ parser.add_argument("--ponder-limit", type=int, default=5)
179
+ parser.add_argument("--report", default=str(REPO_ROOT / "reports" / "test-rig-latest.json"))
180
+ args = parser.parse_args()
181
+
182
+ _enable_local_embeddings()
183
+
184
+ default_workspace = Path.home() / ".openclaw" / "workspace"
185
+ roots = [default_workspace]
186
+ env_roots = os.environ.get("OCMEMOG_TEST_RIG_ROOTS", "").strip()
187
+ if env_roots:
188
+ roots = [Path(p).expanduser() for p in env_roots.split(os.pathsep) if p.strip()]
189
+
190
+ files = _walk_sources(roots, DEFAULT_EXTS, args.max_files, args.max_size_kb)
191
+
192
+ chunks: list[dict] = []
193
+ for path in files:
194
+ try:
195
+ text = path.read_text(encoding="utf-8", errors="ignore")
196
+ except Exception:
197
+ continue
198
+ for chunk in _chunk_text(text):
199
+ chunks.append({"source": str(path), "content": chunk})
200
+ if len(chunks) >= args.max_chunks:
201
+ break
202
+ if len(chunks) >= args.max_chunks:
203
+ break
204
+
205
+ random.shuffle(chunks)
206
+
207
+ ingest_start = time.time()
208
+ mem_count = 0
209
+ for entry in chunks:
210
+ bucket = _classify_bucket(entry["content"])
211
+ source_path = str(entry["source"])
212
+ if "/memory/" in source_path or "/memory/transcripts/" in source_path:
213
+ bucket = "reflections"
214
+ payload = {
215
+ "content": entry["content"],
216
+ "kind": "memory",
217
+ "memory_type": bucket,
218
+ "source": f"rig:{entry['source']}",
219
+ "session_id": f"rig-session:{Path(entry['source']).name}",
220
+ "thread_id": f"rig-thread:{Path(entry['source']).stem}",
221
+ "message_id": None,
222
+ "transcript_path": entry["source"],
223
+ "transcript_offset": None,
224
+ }
225
+ _post_json(args.endpoint, INGEST_PATH, payload)
226
+ mem_count += 1
227
+ ingest_elapsed = time.time() - ingest_start
228
+
229
+ exp_count = 0
230
+ for entry in chunks[: args.experience_count]:
231
+ payload = {
232
+ "content": entry["content"],
233
+ "kind": "experience",
234
+ "source": f"rig:{entry['source']}",
235
+ }
236
+ _post_json(args.endpoint, INGEST_PATH, payload)
237
+ exp_count += 1
238
+
239
+ distill_result = _distill_batches(
240
+ args.endpoint,
241
+ args.distill_target,
242
+ [int(x) for x in args.distill_batches.split(",") if x.strip().isdigit()],
243
+ args.distill_timeout,
244
+ args.distill_budget,
245
+ )
246
+
247
+ # query sampling (search)
248
+ query_samples = random.sample(chunks, k=min(args.query_samples, len(chunks))) if chunks else []
249
+ query_results = []
250
+ for entry in query_samples:
251
+ query = _sample_query(entry["content"])
252
+ t0 = time.time()
253
+ resp = _post_json(args.endpoint, "/memory/search", {"query": query, "limit": 5})
254
+ elapsed = time.time() - t0
255
+ hits = 0
256
+ for item in resp.get("results", []) or []:
257
+ content = str(item.get("content") or "")
258
+ if any(token.lower() in content.lower() for token in query.split()):
259
+ hits += 1
260
+ query_results.append({"query": query, "elapsed": elapsed, "hits": hits})
261
+
262
+ # local pipeline: promote + ponder + research synthesis
263
+ repo_root = Path(__file__).resolve().parents[1]
264
+ sys.path.insert(0, str(repo_root))
265
+ promote_summary = {}
266
+ demote_summary = {}
267
+ ponder_summary = {}
268
+ research_summary = {}
269
+ try:
270
+ from brain.runtime.memory import promote, store, pondering_engine, memory_synthesis, semantic_search
271
+
272
+ conn = store.connect()
273
+ rows = conn.execute(
274
+ "SELECT candidate_id, confidence_score FROM candidates WHERE status='pending' ORDER BY created_at DESC LIMIT ?",
275
+ (args.promote_limit,),
276
+ ).fetchall()
277
+ conn.close()
278
+ confidences = [float(r[1] or 0.0) for r in rows]
279
+ threshold = _choose_threshold(confidences, args.promotion_reject_rate)
280
+ shadow_rejects = sum(1 for c in confidences if threshold is not None and c < threshold)
281
+ if not args.promotion_shadow and threshold is not None:
282
+ promote.config.OCMEMOG_PROMOTION_THRESHOLD = threshold
283
+
284
+ promoted = 0
285
+ rejected = 0
286
+ for row in rows:
287
+ cid = row[0]
288
+ res = promote.promote_candidate_by_id(cid)
289
+ if res.get("decision") == "promote":
290
+ promoted += 1
291
+ elif res.get("decision") == "reject":
292
+ rejected += 1
293
+ promote_summary = {
294
+ "attempted": len(rows),
295
+ "promoted": promoted,
296
+ "rejected": rejected,
297
+ "shadow_threshold": threshold,
298
+ "shadow_rejects": shadow_rejects,
299
+ "shadow_mode": bool(args.promotion_shadow),
300
+ }
301
+
302
+ demote = promote.demote_by_confidence(limit=args.demote_limit, threshold=args.demote_threshold, force=args.demote_force)
303
+ demote_summary = {"count": demote.get("count"), "threshold": demote.get("threshold")}
304
+
305
+ ponder = pondering_engine.run_ponder_cycle(max_items=args.ponder_limit)
306
+ ponder_summary = {
307
+ "unresolved": len(ponder.get("unresolved", []) or []),
308
+ "insights": len(ponder.get("insights", []) or []),
309
+ "links": len(ponder.get("links", []) or []),
310
+ }
311
+
312
+ synth = memory_synthesis.synthesize_memory_patterns(limit=5)
313
+ semantic_queries = [q["query"] for q in query_results[:5]]
314
+ semantic = []
315
+ for q in semantic_queries:
316
+ semantic.append({"query": q, "results": semantic_search.semantic_search(q, limit=3)})
317
+ research_summary = {"synthesis": synth, "semantic": semantic}
318
+ except Exception as exc:
319
+ promote_summary = {"error": str(exc)}
320
+ demote_summary = {"error": str(exc)}
321
+
322
+ report = {
323
+ "files_scanned": len(files),
324
+ "chunks_ingested": mem_count,
325
+ "experiences_ingested": exp_count,
326
+ "ingest_elapsed_s": round(ingest_elapsed, 3),
327
+ "distill": distill_result,
328
+ "query_samples": query_results,
329
+ "query_hit_rate": round(sum(1 for q in query_results if q["hits"] > 0) / max(1, len(query_results)), 3),
330
+ "promote": promote_summary,
331
+ "demote": demote_summary,
332
+ "ponder": ponder_summary,
333
+ "research": research_summary,
334
+ }
335
+
336
+ report_path = Path(args.report)
337
+ report_path.parent.mkdir(parents=True, exist_ok=True)
338
+ report_path.write_text(json.dumps(report, indent=2))
339
+
340
+ print(json.dumps(report, indent=2))
341
+ return 0
342
+
343
+
344
+ if __name__ == "__main__":
345
+ raise SystemExit(main())
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ from pathlib import Path
6
+ from datetime import datetime
7
+ import sys
8
+
9
+ ROOT = Path.home() / ".openclaw" / "workspace" / "memory" / "transcripts"
10
+ STATE = ROOT / "transcript-state.json"
11
+ ROOT.mkdir(parents=True, exist_ok=True)
12
+
13
+ payload = json.loads(sys.stdin.read() or "{}")
14
+ messages = payload.get("messages", [])
15
+
16
+ if not messages:
17
+ sys.exit(0)
18
+
19
+ # determine log file by date of newest message
20
+ latest_ts = None
21
+ for m in messages:
22
+ ts = m.get("timestamp") or m.get("createdAt")
23
+ if ts:
24
+ latest_ts = ts
25
+
26
+ if latest_ts:
27
+ try:
28
+ dt = datetime.fromisoformat(latest_ts.replace("Z", "+00:00"))
29
+ except Exception:
30
+ dt = datetime.utcnow()
31
+ else:
32
+ dt = datetime.utcnow()
33
+
34
+ log_path = ROOT / f"{dt.strftime('%Y-%m-%d')}.log"
35
+
36
+ with log_path.open("a", encoding="utf-8") as handle:
37
+ for m in messages:
38
+ role = m.get("role", "")
39
+ content = (m.get("content") or "").replace("\n", " ")
40
+ ts = m.get("timestamp") or m.get("createdAt") or ""
41
+ handle.write(f"{ts} [{role}] {content}\n")
42
+
43
+ # update state with last message id
44
+ last_id = messages[-1].get("id") or messages[-1].get("message_id")
45
+ STATE.write_text(json.dumps({"last_id": last_id, "updated": datetime.utcnow().isoformat()}), encoding="utf-8")
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ from ocmemog.sidecar.transcript_watcher import watch_forever
5
+
6
+
7
+ if __name__ == "__main__":
8
+ watch_forever()
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
5
+ export PYTHONPATH="${ROOT_DIR}${PYTHONPATH:+:${PYTHONPATH}}"
6
+
7
+ exec python3 "${ROOT_DIR}/scripts/ocmemog-transcript-watcher.py"