@simbimbo/memory-ocmemog 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/CHANGELOG.md +30 -0
  2. package/README.md +85 -18
  3. package/brain/runtime/__init__.py +2 -12
  4. package/brain/runtime/config.py +1 -24
  5. package/brain/runtime/inference.py +1 -151
  6. package/brain/runtime/instrumentation.py +1 -15
  7. package/brain/runtime/memory/__init__.py +3 -13
  8. package/brain/runtime/memory/api.py +1 -1219
  9. package/brain/runtime/memory/candidate.py +1 -185
  10. package/brain/runtime/memory/conversation_state.py +1 -1823
  11. package/brain/runtime/memory/distill.py +1 -344
  12. package/brain/runtime/memory/embedding_engine.py +1 -92
  13. package/brain/runtime/memory/freshness.py +1 -112
  14. package/brain/runtime/memory/health.py +1 -40
  15. package/brain/runtime/memory/integrity.py +1 -186
  16. package/brain/runtime/memory/memory_consolidation.py +1 -58
  17. package/brain/runtime/memory/memory_links.py +1 -107
  18. package/brain/runtime/memory/memory_salience.py +1 -233
  19. package/brain/runtime/memory/memory_synthesis.py +1 -31
  20. package/brain/runtime/memory/memory_taxonomy.py +1 -33
  21. package/brain/runtime/memory/pondering_engine.py +1 -654
  22. package/brain/runtime/memory/promote.py +1 -277
  23. package/brain/runtime/memory/provenance.py +1 -406
  24. package/brain/runtime/memory/reinforcement.py +1 -71
  25. package/brain/runtime/memory/retrieval.py +1 -210
  26. package/brain/runtime/memory/semantic_search.py +1 -64
  27. package/brain/runtime/memory/store.py +1 -429
  28. package/brain/runtime/memory/unresolved_state.py +1 -91
  29. package/brain/runtime/memory/vector_index.py +1 -323
  30. package/brain/runtime/model_roles.py +1 -9
  31. package/brain/runtime/model_router.py +1 -22
  32. package/brain/runtime/providers.py +1 -66
  33. package/brain/runtime/security/redaction.py +1 -12
  34. package/brain/runtime/state_store.py +1 -23
  35. package/brain/runtime/storage_paths.py +1 -39
  36. package/docs/architecture/memory.md +20 -24
  37. package/docs/release-checklist.md +19 -6
  38. package/docs/usage.md +33 -17
  39. package/index.ts +8 -1
  40. package/ocmemog/__init__.py +11 -0
  41. package/ocmemog/doctor.py +1255 -0
  42. package/ocmemog/runtime/__init__.py +18 -0
  43. package/ocmemog/runtime/_compat_bridge.py +28 -0
  44. package/ocmemog/runtime/config.py +35 -0
  45. package/ocmemog/runtime/identity.py +115 -0
  46. package/ocmemog/runtime/inference.py +164 -0
  47. package/ocmemog/runtime/instrumentation.py +20 -0
  48. package/ocmemog/runtime/memory/__init__.py +91 -0
  49. package/ocmemog/runtime/memory/api.py +1431 -0
  50. package/ocmemog/runtime/memory/candidate.py +192 -0
  51. package/ocmemog/runtime/memory/conversation_state.py +1831 -0
  52. package/ocmemog/runtime/memory/distill.py +282 -0
  53. package/ocmemog/runtime/memory/embedding_engine.py +151 -0
  54. package/ocmemog/runtime/memory/freshness.py +114 -0
  55. package/ocmemog/runtime/memory/health.py +57 -0
  56. package/ocmemog/runtime/memory/integrity.py +208 -0
  57. package/ocmemog/runtime/memory/memory_consolidation.py +60 -0
  58. package/ocmemog/runtime/memory/memory_links.py +109 -0
  59. package/ocmemog/runtime/memory/memory_salience.py +235 -0
  60. package/ocmemog/runtime/memory/memory_synthesis.py +33 -0
  61. package/ocmemog/runtime/memory/memory_taxonomy.py +35 -0
  62. package/ocmemog/runtime/memory/pondering_engine.py +681 -0
  63. package/ocmemog/runtime/memory/promote.py +279 -0
  64. package/ocmemog/runtime/memory/provenance.py +408 -0
  65. package/ocmemog/runtime/memory/reinforcement.py +73 -0
  66. package/ocmemog/runtime/memory/retrieval.py +224 -0
  67. package/ocmemog/runtime/memory/semantic_search.py +66 -0
  68. package/ocmemog/runtime/memory/store.py +433 -0
  69. package/ocmemog/runtime/memory/unresolved_state.py +93 -0
  70. package/ocmemog/runtime/memory/vector_index.py +411 -0
  71. package/ocmemog/runtime/model_roles.py +16 -0
  72. package/ocmemog/runtime/model_router.py +29 -0
  73. package/ocmemog/runtime/providers.py +79 -0
  74. package/ocmemog/runtime/roles.py +92 -0
  75. package/ocmemog/runtime/security/__init__.py +8 -0
  76. package/ocmemog/runtime/security/redaction.py +17 -0
  77. package/ocmemog/runtime/state_store.py +34 -0
  78. package/ocmemog/runtime/storage_paths.py +70 -0
  79. package/ocmemog/sidecar/app.py +311 -23
  80. package/ocmemog/sidecar/compat.py +50 -13
  81. package/ocmemog/sidecar/transcript_watcher.py +391 -190
  82. package/openclaw.plugin.json +4 -0
  83. package/package.json +1 -1
  84. package/scripts/ocmemog-backfill-vectors.py +5 -3
  85. package/scripts/ocmemog-continuity-benchmark.py +1 -1
  86. package/scripts/ocmemog-demo.py +1 -1
  87. package/scripts/ocmemog-doctor.py +15 -0
  88. package/scripts/ocmemog-install.sh +29 -7
  89. package/scripts/ocmemog-integrated-proof.py +373 -0
  90. package/scripts/ocmemog-reindex-vectors.py +5 -3
  91. package/scripts/ocmemog-release-check.sh +330 -0
  92. package/scripts/ocmemog-sidecar.sh +4 -2
  93. package/scripts/ocmemog-test-rig.py +5 -3
  94. package/brain/runtime/memory/artifacts.py +0 -33
  95. package/brain/runtime/memory/context_builder.py +0 -112
  96. package/brain/runtime/memory/interaction_memory.py +0 -57
  97. package/brain/runtime/memory/memory_gate.py +0 -38
  98. package/brain/runtime/memory/memory_graph.py +0 -54
  99. package/brain/runtime/memory/person_identity.py +0 -83
  100. package/brain/runtime/memory/person_memory.py +0 -138
  101. package/brain/runtime/memory/sentiment_memory.py +0 -67
  102. package/brain/runtime/memory/tool_catalog.py +0 -68
@@ -0,0 +1,411 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import json
5
+ import math
6
+ import re
7
+ import os
8
+ import threading
9
+ from typing import Any, Dict, List, Iterable
10
+
11
+ from ocmemog.runtime.security import redaction
12
+ from ocmemog.runtime import state_store
13
+ from ocmemog.runtime.instrumentation import emit_event
14
+ from . import embedding_engine, memory_links, store
15
+
16
+ LOGFILE = state_store.report_log_path()
17
+
18
+ EMBEDDING_TABLES: tuple[str, ...] = tuple(store.MEMORY_TABLES)
19
+ _REBUILD_LOCK = threading.Lock()
20
+ _WRITE_CHUNK_SIZE = 64
21
+ _EMBEDDING_TEXT_LIMIT = 1000
22
+ _EMBEDDING_KNOWLEDGE_ARTIFACT_LIMIT = 500
23
+ _EMBEDDING_REFLECTION_LIMIT = 1200
24
+ _EMBEDDING_EXTENDED_LIMIT = 2000
25
+ _EMBEDDING_ULTRA_LIMIT = 4000
26
+ _HTML_TAG_RE = re.compile(r"<[^>]+>")
27
+ _WHITESPACE_RE = re.compile(r"\s+")
28
+
29
+
30
+ def _ensure_vector_table(conn) -> None:
31
+ conn.execute(
32
+ """
33
+ CREATE TABLE IF NOT EXISTS vector_embeddings (
34
+ id TEXT PRIMARY KEY,
35
+ source_type TEXT NOT NULL,
36
+ source_id TEXT NOT NULL,
37
+ embedding TEXT NOT NULL,
38
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
39
+ )
40
+ """
41
+ )
42
+ conn.execute(
43
+ "CREATE INDEX IF NOT EXISTS idx_vector_embeddings_source ON vector_embeddings (source_type, source_id)"
44
+ )
45
+
46
+
47
+ def _cosine_similarity(a: List[float], b: List[float]) -> float:
48
+ if not a or not b:
49
+ return 0.0
50
+ size = min(len(a), len(b))
51
+ if size == 0:
52
+ return 0.0
53
+ a2 = a[:size]
54
+ b2 = b[:size]
55
+ dot = sum(x * y for x, y in zip(a2, b2))
56
+ mag_a = math.sqrt(sum(x * x for x in a2))
57
+ mag_b = math.sqrt(sum(x * x for x in b2))
58
+ if mag_a == 0.0 or mag_b == 0.0:
59
+ return 0.0
60
+ return dot / (mag_a * mag_b)
61
+
62
+
63
+ def _normalized_tables(tables: Iterable[str] | None) -> List[str]:
64
+ source = EMBEDDING_TABLES if tables is None else tables
65
+ seen: set[str] = set()
66
+ normalized: List[str] = []
67
+ for table in source:
68
+ if table in EMBEDDING_TABLES and table not in seen:
69
+ normalized.append(table)
70
+ seen.add(table)
71
+ return normalized
72
+
73
+
74
+ def insert_memory(
75
+ memory_id: int,
76
+ content: str,
77
+ confidence: float,
78
+ *,
79
+ source_type: str = "knowledge",
80
+ skip_provider: bool = False,
81
+ ) -> None:
82
+ source_type = source_type if source_type in EMBEDDING_TABLES else "knowledge"
83
+ redacted_content, changed = redaction.redact_text(content)
84
+ embedding = embedding_engine.generate_embedding(redacted_content, skip_provider=skip_provider)
85
+ metadata_json = json.dumps({"redacted": changed, "source_type": source_type})
86
+
87
+ def _write() -> None:
88
+ conn = store.connect()
89
+ try:
90
+ _ensure_vector_table(conn)
91
+ conn.execute(
92
+ "INSERT INTO memory_index (source, confidence, metadata_json, content, schema_version) VALUES (?, ?, ?, ?, ?)",
93
+ (
94
+ f"{source_type}:{memory_id}",
95
+ confidence,
96
+ metadata_json,
97
+ redacted_content,
98
+ store.SCHEMA_VERSION,
99
+ ),
100
+ )
101
+ if embedding:
102
+ emit_event(LOGFILE, "brain_memory_embedding_generated", status="ok", source_id=str(memory_id))
103
+ conn.execute(
104
+ """
105
+ INSERT INTO vector_embeddings (id, source_type, source_id, embedding)
106
+ VALUES (?, ?, ?, ?)
107
+ ON CONFLICT(id) DO UPDATE SET embedding=excluded.embedding
108
+ """,
109
+ (f"{source_type}:{memory_id}", source_type, str(memory_id), json.dumps(embedding)),
110
+ )
111
+ conn.commit()
112
+ finally:
113
+ conn.close()
114
+
115
+ store.submit_write(_write, timeout=30.0)
116
+
117
+
118
+ def _load_table_rows(table: str, *, limit: int | None = None, descending: bool = False, missing_only: bool = False) -> List[Dict[str, Any]]:
119
+ conn = store.connect()
120
+ try:
121
+ order = "DESC" if descending else "ASC"
122
+ where = ""
123
+ params: list[Any] = []
124
+ if missing_only:
125
+ where = (
126
+ " WHERE NOT EXISTS ("
127
+ "SELECT 1 FROM vector_embeddings AS ve "
128
+ "WHERE ve.source_type = ? AND ve.source_id = CAST(tbl.id AS TEXT)"
129
+ ")"
130
+ )
131
+ params.append(table)
132
+ if limit is None:
133
+ rows = conn.execute(
134
+ f"SELECT tbl.id, tbl.content, tbl.confidence, tbl.metadata_json FROM {table} AS tbl{where} ORDER BY tbl.id {order}",
135
+ tuple(params),
136
+ ).fetchall()
137
+ else:
138
+ rows = conn.execute(
139
+ f"SELECT tbl.id, tbl.content, tbl.confidence, tbl.metadata_json FROM {table} AS tbl{where} ORDER BY tbl.id {order} LIMIT ?",
140
+ tuple(params + [limit]),
141
+ ).fetchall()
142
+ finally:
143
+ conn.close()
144
+ return [dict(row) for row in rows]
145
+
146
+
147
+ def _embedding_input(text: str, *, table: str = "knowledge") -> str:
148
+ """Normalize and hard-cap embedding input text.
149
+
150
+ Keep output deterministic and bounded for embedded calls that may have
151
+ conservative token windows.
152
+ """
153
+ raw = _WHITESPACE_RE.sub(" ", str(text or "")).strip()
154
+ cleaned = _HTML_TAG_RE.sub(" ", raw)
155
+ cleaned = _WHITESPACE_RE.sub(" ", cleaned).strip()
156
+ if not cleaned:
157
+ cleaned = raw
158
+ lowered = cleaned.lower()
159
+ artifactish = (
160
+ "| chunk " in lowered
161
+ or ".sql" in lowered
162
+ or "topology/" in lowered
163
+ or cleaned.count("),(") >= 8
164
+ )
165
+ if table == "knowledge" and artifactish:
166
+ return cleaned[:_EMBEDDING_KNOWLEDGE_ARTIFACT_LIMIT]
167
+ if table == "knowledge" and len(cleaned) > 9000:
168
+ return cleaned[:_EMBEDDING_TEXT_LIMIT]
169
+ if table == "reflections" and len(cleaned) > 8000:
170
+ return cleaned[:_EMBEDDING_REFLECTION_LIMIT]
171
+ if len(cleaned) > 20000:
172
+ return cleaned[:_EMBEDDING_ULTRA_LIMIT]
173
+ if len(cleaned) > 12000:
174
+ return cleaned[:_EMBEDDING_EXTENDED_LIMIT]
175
+ # Local llama.cpp embedding runtime currently rejects inputs above its effective
176
+ # token window (~512 tokens physical batch). Keep a conservative character cap so
177
+ # backfill and live embedding stay deterministic instead of failing with HTTP 500s.
178
+ return cleaned[:_EMBEDDING_TEXT_LIMIT]
179
+
180
+
181
+ def _prepare_embedding_rows(rows: Iterable[Dict[str, Any]], *, table: str) -> List[Dict[str, Any]]:
182
+ prepared: List[Dict[str, Any]] = []
183
+ embedding_cache: Dict[str, List[float] | None] = {}
184
+ for row in rows:
185
+ content = str(row.get("content") or "")
186
+ redacted_content, changed = redaction.redact_text(content)
187
+ embedding_input = _embedding_input(redacted_content, table=table)
188
+ cache_key = hashlib.sha256(embedding_input.encode("utf-8", errors="ignore")).hexdigest()
189
+ if cache_key in embedding_cache:
190
+ embedding = embedding_cache[cache_key]
191
+ else:
192
+ embedding = embedding_engine.generate_embedding(embedding_input)
193
+ embedding_cache[cache_key] = embedding
194
+ if not embedding:
195
+ continue
196
+ try:
197
+ metadata = json.loads(row.get("metadata_json") or "{}")
198
+ except Exception:
199
+ metadata = {}
200
+ metadata["redacted"] = changed
201
+ prepared.append(
202
+ {
203
+ "id": int(row["id"]),
204
+ "content": redacted_content,
205
+ "confidence": float(row.get("confidence") or 0.0),
206
+ "metadata_json": json.dumps(metadata),
207
+ "embedding": json.dumps(embedding),
208
+ "source_type": table,
209
+ }
210
+ )
211
+ return prepared
212
+
213
+
214
+ def _write_embedding_chunk(table: str, rows: List[Dict[str, Any]]) -> int:
215
+ if not rows:
216
+ return 0
217
+
218
+ def _write() -> int:
219
+ conn = store.connect()
220
+ try:
221
+ _ensure_vector_table(conn)
222
+ for row in rows:
223
+ conn.execute(
224
+ f"UPDATE {table} SET content=?, metadata_json=? WHERE id=?",
225
+ (row["content"], row["metadata_json"], row["id"]),
226
+ )
227
+ conn.execute(
228
+ """
229
+ INSERT INTO vector_embeddings (id, source_type, source_id, embedding)
230
+ VALUES (?, ?, ?, ?)
231
+ ON CONFLICT(id) DO UPDATE SET embedding=excluded.embedding
232
+ """,
233
+ (f"{table}:{row['id']}", table, str(row["id"]), row["embedding"]),
234
+ )
235
+ conn.commit()
236
+ return len(rows)
237
+ finally:
238
+ conn.close()
239
+
240
+ return int(store.submit_write(_write, timeout=60.0))
241
+
242
+
243
+ def index_memory(limit: int = 100, *, tables: Iterable[str] | None = None) -> int:
244
+ emit_event(LOGFILE, "brain_memory_vector_index_start", status="ok")
245
+ count = 0
246
+ for table in (tables or EMBEDDING_TABLES):
247
+ if table not in EMBEDDING_TABLES:
248
+ continue
249
+ prepared = _prepare_embedding_rows(_load_table_rows(table, limit=limit, descending=True), table=table)
250
+ for offset in range(0, len(prepared), _WRITE_CHUNK_SIZE):
251
+ count += _write_embedding_chunk(table, prepared[offset: offset + _WRITE_CHUNK_SIZE])
252
+ emit_event(LOGFILE, "brain_memory_vector_index_complete", status="ok", indexed=count)
253
+ return count
254
+
255
+
256
+ def rebuild_vector_index(*, tables: Iterable[str] | None = None) -> int:
257
+ emit_event(LOGFILE, "brain_memory_vector_rebuild_start", status="ok")
258
+ requested_tables = _normalized_tables(tables)
259
+ if not requested_tables:
260
+ emit_event(LOGFILE, "brain_memory_vector_rebuild_complete", status="skipped", reason="no_valid_tables")
261
+ return 0
262
+ if not _REBUILD_LOCK.acquire(blocking=False):
263
+ emit_event(LOGFILE, "brain_memory_vector_rebuild_complete", status="skipped", reason="already_running")
264
+ return 0
265
+ count = 0
266
+ try:
267
+ def _clear() -> None:
268
+ conn = store.connect()
269
+ try:
270
+ _ensure_vector_table(conn)
271
+ if requested_tables:
272
+ conn.executemany(
273
+ "DELETE FROM vector_embeddings WHERE source_type = ?",
274
+ [(table,) for table in requested_tables],
275
+ )
276
+ conn.commit()
277
+ finally:
278
+ conn.close()
279
+
280
+ store.submit_write(_clear, timeout=60.0)
281
+ for table in requested_tables:
282
+ prepared = _prepare_embedding_rows(_load_table_rows(table), table=table)
283
+ for offset in range(0, len(prepared), _WRITE_CHUNK_SIZE):
284
+ count += _write_embedding_chunk(table, prepared[offset: offset + _WRITE_CHUNK_SIZE])
285
+ finally:
286
+ _REBUILD_LOCK.release()
287
+ emit_event(LOGFILE, "brain_memory_vector_rebuild_complete", status="ok", indexed=count)
288
+ return count
289
+
290
+
291
+ def backfill_missing_vectors(*, tables: Iterable[str] | None = None, limit_per_table: int | None = None) -> int:
292
+ emit_event(LOGFILE, "brain_memory_vector_backfill_start", status="ok")
293
+ requested_tables = _normalized_tables(tables)
294
+ if not requested_tables:
295
+ emit_event(LOGFILE, "brain_memory_vector_backfill_complete", status="skipped", reason="no_valid_tables")
296
+ return 0
297
+ if limit_per_table is not None and limit_per_table <= 0:
298
+ emit_event(LOGFILE, "brain_memory_vector_backfill_complete", status="skipped", reason="invalid_limit")
299
+ return 0
300
+ if not _REBUILD_LOCK.acquire(blocking=False):
301
+ emit_event(LOGFILE, "brain_memory_vector_backfill_complete", status="skipped", reason="already_running")
302
+ return 0
303
+ count = 0
304
+ try:
305
+ for table in requested_tables:
306
+ prepared = _prepare_embedding_rows(
307
+ _load_table_rows(table, limit=limit_per_table, missing_only=True),
308
+ table=table,
309
+ )
310
+ for offset in range(0, len(prepared), _WRITE_CHUNK_SIZE):
311
+ count += _write_embedding_chunk(table, prepared[offset: offset + _WRITE_CHUNK_SIZE])
312
+ finally:
313
+ _REBUILD_LOCK.release()
314
+ emit_event(LOGFILE, "brain_memory_vector_backfill_complete", status="ok", indexed=count)
315
+ return count
316
+
317
+
318
+ def search_memory(
319
+ query: str,
320
+ limit: int = 5,
321
+ *,
322
+ skip_provider: bool = False,
323
+ source_types: Iterable[str] | None = None,
324
+ ) -> List[Dict[str, Any]]:
325
+ emit_event(LOGFILE, "brain_memory_vector_search_start", status="ok")
326
+ conn = store.connect()
327
+ _ensure_vector_table(conn)
328
+
329
+ query_embedding = embedding_engine.generate_embedding(query, skip_provider=skip_provider)
330
+ results: List[Dict[str, Any]] = []
331
+
332
+ try:
333
+ scan_limit = int(os.environ.get("OCMEMOG_SEARCH_VECTOR_SCAN_LIMIT", 1200))
334
+ except Exception:
335
+ scan_limit = 1200
336
+ if scan_limit <= 0:
337
+ scan_limit = max(1, limit * 8)
338
+ scan_limit = max(limit, scan_limit)
339
+
340
+ if source_types is None:
341
+ filtered_source_types = tuple(EMBEDDING_TABLES)
342
+ else:
343
+ filtered_source_types = tuple(
344
+ source_type
345
+ for source_type in dict.fromkeys(source_type for source_type in source_types if source_type in EMBEDDING_TABLES)
346
+ )
347
+ if filtered_source_types:
348
+ placeholders = ",".join("?" for _ in filtered_source_types)
349
+ vector_query = (
350
+ "SELECT id, source_type, source_id, embedding "
351
+ f"FROM vector_embeddings WHERE source_type IN ({placeholders}) "
352
+ "ORDER BY rowid DESC LIMIT ?"
353
+ )
354
+ scan_rows = (*filtered_source_types, scan_limit)
355
+ else:
356
+ vector_query = "SELECT id, source_type, source_id, embedding FROM vector_embeddings ORDER BY rowid DESC LIMIT ?"
357
+ scan_rows = (scan_limit,)
358
+
359
+ if query_embedding:
360
+ rows = conn.execute(vector_query, scan_rows).fetchall()
361
+ scored: List[Dict[str, Any]] = []
362
+ for row in rows:
363
+ try:
364
+ emb = json.loads(row["embedding"])
365
+ emb_list = [float(x) for x in emb]
366
+ except Exception:
367
+ continue
368
+ score = _cosine_similarity(query_embedding, emb_list)
369
+ scored.append(
370
+ {
371
+ "entry_id": row["id"],
372
+ "source_type": row["source_type"],
373
+ "source_id": row["source_id"],
374
+ "score": round(score, 6),
375
+ }
376
+ )
377
+ scored.sort(key=lambda item: item["score"], reverse=True)
378
+ results = scored[:limit]
379
+
380
+ if not results:
381
+ fallback_where = ""
382
+ fallback_params: List[Any] = [f"%{query}%"]
383
+ if filtered_source_types:
384
+ patterns = [f"{source_type}:%" for source_type in filtered_source_types]
385
+ fallback_where = f" AND ({' OR '.join(['source LIKE ?'] * len(patterns))})"
386
+ fallback_params.extend(patterns)
387
+ rows = conn.execute(
388
+ f"SELECT id, source, content, confidence, metadata_json FROM memory_index WHERE content LIKE ?{fallback_where} ORDER BY id DESC LIMIT ?",
389
+ tuple(fallback_params + [limit]),
390
+ ).fetchall()
391
+ fallback_results: List[Dict[str, Any]] = []
392
+ for row in rows:
393
+ source_ref = str(row["source"] or "")
394
+ source_type, _, source_id = source_ref.partition(":")
395
+ canonical_type = source_type if source_type in EMBEDDING_TABLES else "knowledge"
396
+ canonical_ref = f"{canonical_type}:{source_id}" if source_id else source_ref
397
+ fallback_results.append(
398
+ {
399
+ "entry_id": canonical_ref,
400
+ "source_type": canonical_type,
401
+ "source_id": source_id or str(row["id"]),
402
+ "score": float(row["confidence"] or 0.0),
403
+ "content": str(row["content"] or "")[:240],
404
+ "links": memory_links.get_memory_links(canonical_ref),
405
+ }
406
+ )
407
+ results = fallback_results
408
+
409
+ conn.close()
410
+ emit_event(LOGFILE, "brain_memory_vector_search_complete", status="ok", result_count=len(results))
411
+ return results
@@ -0,0 +1,16 @@
1
+ """Model-role mapping helpers owned by ocmemog."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from . import config
6
+
7
+ __wrapped_from__ = "brain.runtime.model_roles"
8
+
9
+
10
+ def get_model_for_role(role: str) -> str:
11
+ if role == "memory":
12
+ return config.OCMEMOG_MEMORY_MODEL
13
+ if role == "embedding":
14
+ return config.OCMEMOG_OPENAI_EMBED_MODEL
15
+ return config.OCMEMOG_MEMORY_MODEL
16
+
@@ -0,0 +1,29 @@
1
+ """Model-provider routing helpers owned by ocmemog."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+ from . import config
8
+
9
+ __wrapped_from__ = "brain.runtime.model_router"
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class ModelSelection:
14
+ provider_id: str = ""
15
+ model: str = ""
16
+
17
+
18
+ def get_provider_for_role(role: str) -> ModelSelection:
19
+ if role != "embedding":
20
+ return ModelSelection()
21
+ provider = (config.OCMEMOG_EMBED_PROVIDER or config.BRAIN_EMBED_MODEL_PROVIDER or "").strip().lower()
22
+ if provider in {"openai", "openai_compatible", "openai-compatible"}:
23
+ return ModelSelection(provider_id="openai", model=config.OCMEMOG_OPENAI_EMBED_MODEL)
24
+ if provider in {"local-openai", "local_openai", "llamacpp", "llama.cpp"}:
25
+ return ModelSelection(provider_id="local-openai", model=config.OCMEMOG_LOCAL_EMBED_MODEL)
26
+ if provider in {"ollama", "local-ollama"}:
27
+ return ModelSelection(provider_id="ollama", model=config.OCMEMOG_OLLAMA_EMBED_MODEL)
28
+ return ModelSelection()
29
+
@@ -0,0 +1,79 @@
1
+ """Embedding provider execution wrapper owned by ocmemog."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ import urllib.request
8
+
9
+ from . import config, instrumentation, state_store
10
+
11
+ __wrapped_from__ = "brain.runtime.providers"
12
+
13
+ LOGFILE = state_store.report_log_path()
14
+
15
+
16
+ class ProviderExecute:
17
+ def execute_embedding_call(self, selection, text: str) -> dict[str, object]:
18
+ provider_id = getattr(selection, "provider_id", "") or ""
19
+ model = getattr(selection, "model", "") or config.OCMEMOG_OPENAI_EMBED_MODEL
20
+ if provider_id in {"openai", "local-openai"}:
21
+ api_key = None
22
+ url_base = config.OCMEMOG_OPENAI_API_BASE
23
+ provider_label = "openai"
24
+ if provider_id == "openai":
25
+ api_key = os.environ.get("OCMEMOG_OPENAI_API_KEY") or os.environ.get("OPENAI_API_KEY")
26
+ if not api_key:
27
+ return {}
28
+ else:
29
+ url_base = config.OCMEMOG_LOCAL_EMBED_BASE_URL
30
+ api_key = os.environ.get("OCMEMOG_LOCAL_EMBED_API_KEY") or os.environ.get("LOCAL_EMBED_API_KEY")
31
+ provider_label = "local-openai"
32
+ url = f"{url_base.rstrip('/')}/embeddings"
33
+ payload = json.dumps({"model": model, "input": text}).encode("utf-8")
34
+ req = urllib.request.Request(url, data=payload, method="POST")
35
+ if api_key:
36
+ req.add_header("Authorization", f"Bearer {api_key}")
37
+ req.add_header("Content-Type", "application/json")
38
+ try:
39
+ with urllib.request.urlopen(req, timeout=20) as resp:
40
+ data = json.loads(resp.read().decode("utf-8"))
41
+ except Exception as exc:
42
+ instrumentation.emit_event(
43
+ LOGFILE, "brain_embedding_provider_error", status="error", provider=provider_label, error=str(exc)
44
+ )
45
+ return {}
46
+ try:
47
+ embedding = data["data"][0]["embedding"]
48
+ except Exception as exc:
49
+ instrumentation.emit_event(
50
+ LOGFILE, "brain_embedding_provider_error", status="error", provider=provider_label, error=str(exc)
51
+ )
52
+ return {}
53
+ return {"embedding": embedding}
54
+
55
+ if provider_id == "ollama":
56
+ url = f"{config.OCMEMOG_OLLAMA_HOST.rstrip('/')}/api/embeddings"
57
+ payload = json.dumps({"model": model, "prompt": text}).encode("utf-8")
58
+ req = urllib.request.Request(url, data=payload, method="POST")
59
+ req.add_header("Content-Type", "application/json")
60
+ try:
61
+ with urllib.request.urlopen(req, timeout=20) as resp:
62
+ data = json.loads(resp.read().decode("utf-8"))
63
+ except Exception as exc:
64
+ instrumentation.emit_event(LOGFILE, "brain_embedding_provider_error", status="error", provider="ollama", error=str(exc))
65
+ return {}
66
+ embedding = data.get("embedding")
67
+ if not isinstance(embedding, list):
68
+ instrumentation.emit_event(
69
+ LOGFILE, "brain_embedding_provider_error", status="error", provider="ollama", error="invalid_embedding"
70
+ )
71
+ return {}
72
+ return {"embedding": embedding}
73
+
74
+ return {}
75
+
76
+
77
+ provider_execute = ProviderExecute()
78
+ provider_execute.__shim__ = False
79
+
@@ -0,0 +1,92 @@
1
+ """Native role registry surfaced as an engine-like runtime surface.
2
+
3
+ The upstream brAIn project exposes role-prioritized context behavior through a
4
+ `roles` module. This repo keeps a compatible shape in `ocmemog` so callers can
5
+ query role metadata without being coupled to legacy implementation details.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Dict, List
11
+
12
+ # Core roles are intentionally conservative and deterministic so they can be used
13
+ # by fallback context builders and lightweight policy checks.
14
+ ROLE_REGISTRY: Dict[str, Dict[str, object]] = {
15
+ "default": {
16
+ "priority": 90,
17
+ "description": "Balanced fallback role for generic memory lookup",
18
+ "ordered_buckets": (
19
+ "knowledge",
20
+ "preferences",
21
+ "identity",
22
+ "reflections",
23
+ "directives",
24
+ "tasks",
25
+ "runbooks",
26
+ "lessons",
27
+ ),
28
+ "soft_window": 5,
29
+ },
30
+ "user": {
31
+ "priority": 100,
32
+ "description": "User-sourced prompts should prioritize reflective and identity buckets",
33
+ "ordered_buckets": (
34
+ "reflections",
35
+ "identity",
36
+ "preferences",
37
+ "knowledge",
38
+ "tasks",
39
+ "directives",
40
+ "runbooks",
41
+ "lessons",
42
+ ),
43
+ "soft_window": 6,
44
+ },
45
+ "assistant": {
46
+ "priority": 80,
47
+ "description": "Assistant-sourced prompts should prioritize concise operational memory",
48
+ "ordered_buckets": (
49
+ "tasks",
50
+ "directives",
51
+ "knowledge",
52
+ "runbooks",
53
+ "lessons",
54
+ "preferences",
55
+ "reflections",
56
+ "identity",
57
+ ),
58
+ "soft_window": 4,
59
+ },
60
+ }
61
+
62
+ __all__ = ["ROLE_REGISTRY", "role_registry", "role_profile", "sorted_roles"]
63
+
64
+
65
+ def role_registry() -> Dict[str, Dict[str, object]]:
66
+ """Return the current role registry as a copy for safe introspection."""
67
+
68
+ return {
69
+ key: {
70
+ "role": key,
71
+ **{
72
+ "buckets": tuple(profile.get("ordered_buckets") or ()),
73
+ "priority": int(profile.get("priority", 0)),
74
+ "description": str(profile.get("description", "")),
75
+ "soft_window": int(profile.get("soft_window", 0)),
76
+ },
77
+ }
78
+ for key, profile in ROLE_REGISTRY.items()
79
+ }
80
+
81
+
82
+ def role_profile(role: str) -> Dict[str, object] | None:
83
+ """Return a normalized profile for a single role, if defined."""
84
+
85
+ normalized = str(role or "").strip().lower() or "default"
86
+ return role_registry().get(normalized)
87
+
88
+
89
+ def sorted_roles() -> List[str]:
90
+ """Return role ids ordered by descending priority."""
91
+
92
+ return sorted((role_registry().keys()), key=lambda role_id: role_registry()[role_id]["priority"], reverse=True)
@@ -0,0 +1,8 @@
1
+ """ocmemog-native security namespace."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from . import redaction
6
+
7
+ __all__ = ["redaction"]
8
+
@@ -0,0 +1,17 @@
1
+ """Input redaction helpers owned by ocmemog."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ EMAIL_RE = re.compile(r"\b[\w.+-]+@[\w.-]+\.[A-Za-z]{2,}\b")
8
+ PHONE_RE = re.compile(r"\b(?:\+?1[-.\s]?)?(?:\(?\d{3}\)?[-.\s]?){2}\d{4}\b")
9
+
10
+
11
+ def redact_text(text: str) -> tuple[str, bool]:
12
+ if not isinstance(text, str):
13
+ return "", False
14
+ redacted = EMAIL_RE.sub("[redacted-email]", text)
15
+ redacted = PHONE_RE.sub("[redacted-phone]", redacted)
16
+ return redacted, redacted != text
17
+
@@ -0,0 +1,34 @@
1
+ """Runtime state persistence surface owned by ocmemog."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ from . import storage_paths
8
+
9
+ __wrapped_from__ = "brain.runtime.state_store"
10
+ __wrapped_by__ = "ocmemog-runtime-bridge"
11
+
12
+
13
+ def root_dir() -> Path:
14
+ return storage_paths.root_dir()
15
+
16
+
17
+ def data_dir() -> Path:
18
+ return storage_paths.data_dir()
19
+
20
+
21
+ def memory_dir() -> Path:
22
+ return storage_paths.memory_dir()
23
+
24
+
25
+ def reports_dir() -> Path:
26
+ return storage_paths.reports_dir()
27
+
28
+
29
+ def report_log_path() -> Path:
30
+ return storage_paths.report_log_path()
31
+
32
+
33
+ def memory_db_path() -> Path:
34
+ return storage_paths.memory_db_path()