@simbimbo/memory-ocmemog 0.1.11 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/README.md +83 -18
- package/brain/runtime/__init__.py +2 -12
- package/brain/runtime/config.py +1 -24
- package/brain/runtime/inference.py +1 -151
- package/brain/runtime/instrumentation.py +1 -15
- package/brain/runtime/memory/__init__.py +3 -13
- package/brain/runtime/memory/api.py +1 -1219
- package/brain/runtime/memory/candidate.py +1 -185
- package/brain/runtime/memory/conversation_state.py +1 -1823
- package/brain/runtime/memory/distill.py +1 -344
- package/brain/runtime/memory/embedding_engine.py +1 -92
- package/brain/runtime/memory/freshness.py +1 -112
- package/brain/runtime/memory/health.py +1 -40
- package/brain/runtime/memory/integrity.py +1 -186
- package/brain/runtime/memory/memory_consolidation.py +1 -58
- package/brain/runtime/memory/memory_links.py +1 -107
- package/brain/runtime/memory/memory_salience.py +1 -233
- package/brain/runtime/memory/memory_synthesis.py +1 -31
- package/brain/runtime/memory/memory_taxonomy.py +1 -33
- package/brain/runtime/memory/pondering_engine.py +1 -654
- package/brain/runtime/memory/promote.py +1 -277
- package/brain/runtime/memory/provenance.py +1 -406
- package/brain/runtime/memory/reinforcement.py +1 -71
- package/brain/runtime/memory/retrieval.py +1 -210
- package/brain/runtime/memory/semantic_search.py +1 -64
- package/brain/runtime/memory/store.py +1 -429
- package/brain/runtime/memory/unresolved_state.py +1 -91
- package/brain/runtime/memory/vector_index.py +1 -323
- package/brain/runtime/model_roles.py +1 -9
- package/brain/runtime/model_router.py +1 -22
- package/brain/runtime/providers.py +1 -66
- package/brain/runtime/security/redaction.py +1 -12
- package/brain/runtime/state_store.py +1 -23
- package/brain/runtime/storage_paths.py +1 -39
- package/docs/architecture/memory.md +20 -24
- package/docs/release-checklist.md +19 -6
- package/docs/usage.md +33 -17
- package/index.ts +8 -1
- package/ocmemog/__init__.py +11 -0
- package/ocmemog/doctor.py +1255 -0
- package/ocmemog/runtime/__init__.py +18 -0
- package/ocmemog/runtime/_compat_bridge.py +28 -0
- package/ocmemog/runtime/config.py +35 -0
- package/ocmemog/runtime/identity.py +115 -0
- package/ocmemog/runtime/inference.py +164 -0
- package/ocmemog/runtime/instrumentation.py +20 -0
- package/ocmemog/runtime/memory/__init__.py +91 -0
- package/ocmemog/runtime/memory/api.py +1431 -0
- package/ocmemog/runtime/memory/candidate.py +192 -0
- package/ocmemog/runtime/memory/conversation_state.py +1831 -0
- package/ocmemog/runtime/memory/distill.py +282 -0
- package/ocmemog/runtime/memory/embedding_engine.py +151 -0
- package/ocmemog/runtime/memory/freshness.py +114 -0
- package/ocmemog/runtime/memory/health.py +57 -0
- package/ocmemog/runtime/memory/integrity.py +208 -0
- package/ocmemog/runtime/memory/memory_consolidation.py +60 -0
- package/ocmemog/runtime/memory/memory_links.py +109 -0
- package/ocmemog/runtime/memory/memory_salience.py +235 -0
- package/ocmemog/runtime/memory/memory_synthesis.py +33 -0
- package/ocmemog/runtime/memory/memory_taxonomy.py +35 -0
- package/ocmemog/runtime/memory/pondering_engine.py +681 -0
- package/ocmemog/runtime/memory/promote.py +279 -0
- package/ocmemog/runtime/memory/provenance.py +408 -0
- package/ocmemog/runtime/memory/reinforcement.py +73 -0
- package/ocmemog/runtime/memory/retrieval.py +224 -0
- package/ocmemog/runtime/memory/semantic_search.py +66 -0
- package/ocmemog/runtime/memory/store.py +433 -0
- package/ocmemog/runtime/memory/unresolved_state.py +93 -0
- package/ocmemog/runtime/memory/vector_index.py +411 -0
- package/ocmemog/runtime/model_roles.py +16 -0
- package/ocmemog/runtime/model_router.py +29 -0
- package/ocmemog/runtime/providers.py +79 -0
- package/ocmemog/runtime/roles.py +92 -0
- package/ocmemog/runtime/security/__init__.py +8 -0
- package/ocmemog/runtime/security/redaction.py +17 -0
- package/ocmemog/runtime/state_store.py +34 -0
- package/ocmemog/runtime/storage_paths.py +70 -0
- package/ocmemog/sidecar/app.py +310 -23
- package/ocmemog/sidecar/compat.py +50 -13
- package/ocmemog/sidecar/transcript_watcher.py +318 -240
- package/openclaw.plugin.json +4 -0
- package/package.json +1 -1
- package/scripts/ocmemog-backfill-vectors.py +5 -3
- package/scripts/ocmemog-continuity-benchmark.py +1 -1
- package/scripts/ocmemog-demo.py +1 -1
- package/scripts/ocmemog-doctor.py +15 -0
- package/scripts/ocmemog-install.sh +29 -7
- package/scripts/ocmemog-integrated-proof.py +373 -0
- package/scripts/ocmemog-reindex-vectors.py +5 -3
- package/scripts/ocmemog-release-check.sh +330 -0
- package/scripts/ocmemog-sidecar.sh +4 -2
- package/scripts/ocmemog-test-rig.py +5 -3
- package/brain/runtime/memory/artifacts.py +0 -33
- package/brain/runtime/memory/context_builder.py +0 -112
- package/brain/runtime/memory/interaction_memory.py +0 -57
- package/brain/runtime/memory/memory_gate.py +0 -38
- package/brain/runtime/memory/memory_graph.py +0 -54
- package/brain/runtime/memory/person_identity.py +0 -83
- package/brain/runtime/memory/person_memory.py +0 -138
- package/brain/runtime/memory/sentiment_memory.py +0 -67
- package/brain/runtime/memory/tool_catalog.py +0 -68
|
@@ -1,325 +1,3 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import
|
|
4
|
-
import json
|
|
5
|
-
import math
|
|
6
|
-
import re
|
|
7
|
-
import threading
|
|
8
|
-
from typing import Any, Dict, List, Iterable
|
|
9
|
-
|
|
10
|
-
from brain.runtime import state_store
|
|
11
|
-
from brain.runtime.instrumentation import emit_event
|
|
12
|
-
from brain.runtime.memory import embedding_engine, store, memory_links
|
|
13
|
-
from brain.runtime.security import redaction
|
|
14
|
-
|
|
15
|
-
LOGFILE = state_store.reports_dir() / "brain_memory.log.jsonl"
|
|
16
|
-
|
|
17
|
-
EMBEDDING_TABLES: tuple[str, ...] = tuple(store.MEMORY_TABLES)
|
|
18
|
-
_REBUILD_LOCK = threading.Lock()
|
|
19
|
-
_WRITE_CHUNK_SIZE = 64
|
|
20
|
-
_EMBEDDING_TEXT_LIMIT = 8000
|
|
21
|
-
_HTML_TAG_RE = re.compile(r"<[^>]+>")
|
|
22
|
-
_WHITESPACE_RE = re.compile(r"\s+")
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def _ensure_vector_table(conn) -> None:
|
|
26
|
-
conn.execute(
|
|
27
|
-
"""
|
|
28
|
-
CREATE TABLE IF NOT EXISTS vector_embeddings (
|
|
29
|
-
id TEXT PRIMARY KEY,
|
|
30
|
-
source_type TEXT NOT NULL,
|
|
31
|
-
source_id TEXT NOT NULL,
|
|
32
|
-
embedding TEXT NOT NULL,
|
|
33
|
-
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
34
|
-
)
|
|
35
|
-
"""
|
|
36
|
-
)
|
|
37
|
-
conn.execute(
|
|
38
|
-
"CREATE INDEX IF NOT EXISTS idx_vector_embeddings_source ON vector_embeddings (source_type, source_id)"
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def _cosine_similarity(a: List[float], b: List[float]) -> float:
|
|
43
|
-
if not a or not b:
|
|
44
|
-
return 0.0
|
|
45
|
-
size = min(len(a), len(b))
|
|
46
|
-
if size == 0:
|
|
47
|
-
return 0.0
|
|
48
|
-
a2 = a[:size]
|
|
49
|
-
b2 = b[:size]
|
|
50
|
-
dot = sum(x * y for x, y in zip(a2, b2))
|
|
51
|
-
mag_a = math.sqrt(sum(x * x for x in a2))
|
|
52
|
-
mag_b = math.sqrt(sum(x * x for x in b2))
|
|
53
|
-
if mag_a == 0.0 or mag_b == 0.0:
|
|
54
|
-
return 0.0
|
|
55
|
-
return dot / (mag_a * mag_b)
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
def insert_memory(memory_id: int, content: str, confidence: float, *, source_type: str = "knowledge") -> None:
|
|
59
|
-
source_type = source_type if source_type in EMBEDDING_TABLES else "knowledge"
|
|
60
|
-
redacted_content, changed = redaction.redact_text(content)
|
|
61
|
-
embedding = embedding_engine.generate_embedding(redacted_content)
|
|
62
|
-
metadata_json = json.dumps({"redacted": changed, "source_type": source_type})
|
|
63
|
-
|
|
64
|
-
def _write() -> None:
|
|
65
|
-
conn = store.connect()
|
|
66
|
-
try:
|
|
67
|
-
_ensure_vector_table(conn)
|
|
68
|
-
conn.execute(
|
|
69
|
-
"INSERT INTO memory_index (source, confidence, metadata_json, content, schema_version) VALUES (?, ?, ?, ?, ?)",
|
|
70
|
-
(
|
|
71
|
-
f"{source_type}:{memory_id}",
|
|
72
|
-
confidence,
|
|
73
|
-
metadata_json,
|
|
74
|
-
redacted_content,
|
|
75
|
-
store.SCHEMA_VERSION,
|
|
76
|
-
),
|
|
77
|
-
)
|
|
78
|
-
if embedding:
|
|
79
|
-
emit_event(LOGFILE, "brain_memory_embedding_generated", status="ok", source_id=str(memory_id))
|
|
80
|
-
conn.execute(
|
|
81
|
-
"""
|
|
82
|
-
INSERT INTO vector_embeddings (id, source_type, source_id, embedding)
|
|
83
|
-
VALUES (?, ?, ?, ?)
|
|
84
|
-
ON CONFLICT(id) DO UPDATE SET embedding=excluded.embedding
|
|
85
|
-
""",
|
|
86
|
-
(f"{source_type}:{memory_id}", source_type, str(memory_id), json.dumps(embedding)),
|
|
87
|
-
)
|
|
88
|
-
conn.commit()
|
|
89
|
-
finally:
|
|
90
|
-
conn.close()
|
|
91
|
-
|
|
92
|
-
store.submit_write(_write, timeout=30.0)
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
def _load_table_rows(table: str, *, limit: int | None = None, descending: bool = False, missing_only: bool = False) -> List[Dict[str, Any]]:
|
|
96
|
-
conn = store.connect()
|
|
97
|
-
try:
|
|
98
|
-
order = "DESC" if descending else "ASC"
|
|
99
|
-
where = ""
|
|
100
|
-
params: list[Any] = []
|
|
101
|
-
if missing_only:
|
|
102
|
-
where = " WHERE CAST(id AS TEXT) NOT IN (SELECT source_id FROM vector_embeddings WHERE source_type = ?)"
|
|
103
|
-
params.append(table)
|
|
104
|
-
if limit is None:
|
|
105
|
-
rows = conn.execute(
|
|
106
|
-
f"SELECT id, content, confidence, metadata_json FROM {table}{where} ORDER BY id {order}",
|
|
107
|
-
tuple(params),
|
|
108
|
-
).fetchall()
|
|
109
|
-
else:
|
|
110
|
-
rows = conn.execute(
|
|
111
|
-
f"SELECT id, content, confidence, metadata_json FROM {table}{where} ORDER BY id {order} LIMIT ?",
|
|
112
|
-
tuple(params + [limit]),
|
|
113
|
-
).fetchall()
|
|
114
|
-
finally:
|
|
115
|
-
conn.close()
|
|
116
|
-
return [dict(row) for row in rows]
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
def _embedding_input(text: str, *, table: str = "knowledge") -> str:
|
|
120
|
-
cleaned = _HTML_TAG_RE.sub(" ", text)
|
|
121
|
-
cleaned = _WHITESPACE_RE.sub(" ", cleaned).strip()
|
|
122
|
-
lowered = cleaned.lower()
|
|
123
|
-
artifactish = (
|
|
124
|
-
"| chunk " in lowered
|
|
125
|
-
or ".sql" in lowered
|
|
126
|
-
or "topology/" in lowered
|
|
127
|
-
or cleaned.count("),(") >= 8
|
|
128
|
-
)
|
|
129
|
-
if table == "knowledge" and artifactish:
|
|
130
|
-
return cleaned[:500]
|
|
131
|
-
if table == "knowledge" and len(cleaned) > 9000:
|
|
132
|
-
return cleaned[:1000]
|
|
133
|
-
if table == "reflections" and len(cleaned) > 8000:
|
|
134
|
-
return cleaned[:1200]
|
|
135
|
-
if len(cleaned) > 20000:
|
|
136
|
-
return cleaned[:2000]
|
|
137
|
-
if len(cleaned) > 12000:
|
|
138
|
-
return cleaned[:4000]
|
|
139
|
-
return cleaned[:_EMBEDDING_TEXT_LIMIT]
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
def _prepare_embedding_rows(rows: Iterable[Dict[str, Any]], *, table: str) -> List[Dict[str, Any]]:
|
|
143
|
-
prepared: List[Dict[str, Any]] = []
|
|
144
|
-
embedding_cache: Dict[str, List[float] | None] = {}
|
|
145
|
-
for row in rows:
|
|
146
|
-
content = str(row.get("content") or "")
|
|
147
|
-
redacted_content, changed = redaction.redact_text(content)
|
|
148
|
-
embedding_input = _embedding_input(redacted_content, table=table)
|
|
149
|
-
cache_key = hashlib.sha256(embedding_input.encode("utf-8", errors="ignore")).hexdigest()
|
|
150
|
-
if cache_key in embedding_cache:
|
|
151
|
-
embedding = embedding_cache[cache_key]
|
|
152
|
-
else:
|
|
153
|
-
embedding = embedding_engine.generate_embedding(embedding_input)
|
|
154
|
-
embedding_cache[cache_key] = embedding
|
|
155
|
-
if not embedding:
|
|
156
|
-
continue
|
|
157
|
-
try:
|
|
158
|
-
metadata = json.loads(row.get("metadata_json") or "{}")
|
|
159
|
-
except Exception:
|
|
160
|
-
metadata = {}
|
|
161
|
-
metadata["redacted"] = changed
|
|
162
|
-
prepared.append(
|
|
163
|
-
{
|
|
164
|
-
"id": int(row["id"]),
|
|
165
|
-
"content": redacted_content,
|
|
166
|
-
"confidence": float(row.get("confidence") or 0.0),
|
|
167
|
-
"metadata_json": json.dumps(metadata),
|
|
168
|
-
"embedding": json.dumps(embedding),
|
|
169
|
-
"source_type": table,
|
|
170
|
-
}
|
|
171
|
-
)
|
|
172
|
-
return prepared
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
def _write_embedding_chunk(table: str, rows: List[Dict[str, Any]]) -> int:
|
|
176
|
-
if not rows:
|
|
177
|
-
return 0
|
|
178
|
-
|
|
179
|
-
def _write() -> int:
|
|
180
|
-
conn = store.connect()
|
|
181
|
-
try:
|
|
182
|
-
_ensure_vector_table(conn)
|
|
183
|
-
for row in rows:
|
|
184
|
-
conn.execute(
|
|
185
|
-
f"UPDATE {table} SET content=?, metadata_json=? WHERE id=?",
|
|
186
|
-
(row["content"], row["metadata_json"], row["id"]),
|
|
187
|
-
)
|
|
188
|
-
conn.execute(
|
|
189
|
-
"""
|
|
190
|
-
INSERT INTO vector_embeddings (id, source_type, source_id, embedding)
|
|
191
|
-
VALUES (?, ?, ?, ?)
|
|
192
|
-
ON CONFLICT(id) DO UPDATE SET embedding=excluded.embedding
|
|
193
|
-
""",
|
|
194
|
-
(f"{table}:{row['id']}", table, str(row["id"]), row["embedding"]),
|
|
195
|
-
)
|
|
196
|
-
conn.commit()
|
|
197
|
-
return len(rows)
|
|
198
|
-
finally:
|
|
199
|
-
conn.close()
|
|
200
|
-
|
|
201
|
-
return int(store.submit_write(_write, timeout=60.0))
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
def index_memory(limit: int = 100, *, tables: Iterable[str] | None = None) -> int:
|
|
205
|
-
emit_event(LOGFILE, "brain_memory_vector_index_start", status="ok")
|
|
206
|
-
count = 0
|
|
207
|
-
for table in (tables or EMBEDDING_TABLES):
|
|
208
|
-
if table not in EMBEDDING_TABLES:
|
|
209
|
-
continue
|
|
210
|
-
prepared = _prepare_embedding_rows(_load_table_rows(table, limit=limit, descending=True), table=table)
|
|
211
|
-
for offset in range(0, len(prepared), _WRITE_CHUNK_SIZE):
|
|
212
|
-
count += _write_embedding_chunk(table, prepared[offset: offset + _WRITE_CHUNK_SIZE])
|
|
213
|
-
emit_event(LOGFILE, "brain_memory_vector_index_complete", status="ok", indexed=count)
|
|
214
|
-
return count
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
def rebuild_vector_index(*, tables: Iterable[str] | None = None) -> int:
|
|
218
|
-
emit_event(LOGFILE, "brain_memory_vector_rebuild_start", status="ok")
|
|
219
|
-
if not _REBUILD_LOCK.acquire(blocking=False):
|
|
220
|
-
emit_event(LOGFILE, "brain_memory_vector_rebuild_complete", status="skipped", reason="already_running")
|
|
221
|
-
return 0
|
|
222
|
-
count = 0
|
|
223
|
-
try:
|
|
224
|
-
requested_tables = [table for table in (tables or EMBEDDING_TABLES) if table in EMBEDDING_TABLES]
|
|
225
|
-
|
|
226
|
-
def _clear() -> None:
|
|
227
|
-
conn = store.connect()
|
|
228
|
-
try:
|
|
229
|
-
_ensure_vector_table(conn)
|
|
230
|
-
if requested_tables:
|
|
231
|
-
conn.executemany(
|
|
232
|
-
"DELETE FROM vector_embeddings WHERE source_type = ?",
|
|
233
|
-
[(table,) for table in requested_tables],
|
|
234
|
-
)
|
|
235
|
-
conn.commit()
|
|
236
|
-
finally:
|
|
237
|
-
conn.close()
|
|
238
|
-
|
|
239
|
-
store.submit_write(_clear, timeout=60.0)
|
|
240
|
-
for table in requested_tables:
|
|
241
|
-
prepared = _prepare_embedding_rows(_load_table_rows(table), table=table)
|
|
242
|
-
for offset in range(0, len(prepared), _WRITE_CHUNK_SIZE):
|
|
243
|
-
count += _write_embedding_chunk(table, prepared[offset: offset + _WRITE_CHUNK_SIZE])
|
|
244
|
-
finally:
|
|
245
|
-
_REBUILD_LOCK.release()
|
|
246
|
-
emit_event(LOGFILE, "brain_memory_vector_rebuild_complete", status="ok", indexed=count)
|
|
247
|
-
return count
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
def backfill_missing_vectors(*, tables: Iterable[str] | None = None, limit_per_table: int | None = None) -> int:
|
|
251
|
-
emit_event(LOGFILE, "brain_memory_vector_backfill_start", status="ok")
|
|
252
|
-
if not _REBUILD_LOCK.acquire(blocking=False):
|
|
253
|
-
emit_event(LOGFILE, "brain_memory_vector_backfill_complete", status="skipped", reason="already_running")
|
|
254
|
-
return 0
|
|
255
|
-
count = 0
|
|
256
|
-
try:
|
|
257
|
-
requested_tables = [table for table in (tables or EMBEDDING_TABLES) if table in EMBEDDING_TABLES]
|
|
258
|
-
for table in requested_tables:
|
|
259
|
-
prepared = _prepare_embedding_rows(
|
|
260
|
-
_load_table_rows(table, limit=limit_per_table, missing_only=True),
|
|
261
|
-
table=table,
|
|
262
|
-
)
|
|
263
|
-
for offset in range(0, len(prepared), _WRITE_CHUNK_SIZE):
|
|
264
|
-
count += _write_embedding_chunk(table, prepared[offset: offset + _WRITE_CHUNK_SIZE])
|
|
265
|
-
finally:
|
|
266
|
-
_REBUILD_LOCK.release()
|
|
267
|
-
emit_event(LOGFILE, "brain_memory_vector_backfill_complete", status="ok", indexed=count)
|
|
268
|
-
return count
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
def search_memory(query: str, limit: int = 5) -> List[Dict[str, Any]]:
|
|
272
|
-
emit_event(LOGFILE, "brain_memory_vector_search_start", status="ok")
|
|
273
|
-
conn = store.connect()
|
|
274
|
-
_ensure_vector_table(conn)
|
|
275
|
-
|
|
276
|
-
query_embedding = embedding_engine.generate_embedding(query)
|
|
277
|
-
results: List[Dict[str, Any]] = []
|
|
278
|
-
|
|
279
|
-
if query_embedding:
|
|
280
|
-
rows = conn.execute("SELECT id, source_type, source_id, embedding FROM vector_embeddings").fetchall()
|
|
281
|
-
scored: List[Dict[str, Any]] = []
|
|
282
|
-
for row in rows:
|
|
283
|
-
try:
|
|
284
|
-
emb = json.loads(row["embedding"])
|
|
285
|
-
emb_list = [float(x) for x in emb]
|
|
286
|
-
except Exception:
|
|
287
|
-
continue
|
|
288
|
-
score = _cosine_similarity(query_embedding, emb_list)
|
|
289
|
-
scored.append(
|
|
290
|
-
{
|
|
291
|
-
"entry_id": row["id"],
|
|
292
|
-
"source_type": row["source_type"],
|
|
293
|
-
"source_id": row["source_id"],
|
|
294
|
-
"score": round(score, 6),
|
|
295
|
-
}
|
|
296
|
-
)
|
|
297
|
-
scored.sort(key=lambda item: item["score"], reverse=True)
|
|
298
|
-
results = scored[:limit]
|
|
299
|
-
|
|
300
|
-
if not results:
|
|
301
|
-
rows = conn.execute(
|
|
302
|
-
"SELECT id, source, content, confidence, metadata_json FROM memory_index WHERE content LIKE ? ORDER BY id DESC LIMIT ?",
|
|
303
|
-
(f"%{query}%", limit),
|
|
304
|
-
).fetchall()
|
|
305
|
-
fallback_results: List[Dict[str, Any]] = []
|
|
306
|
-
for row in rows:
|
|
307
|
-
source_ref = str(row["source"] or "")
|
|
308
|
-
source_type, _, source_id = source_ref.partition(":")
|
|
309
|
-
canonical_type = source_type if source_type in EMBEDDING_TABLES else "knowledge"
|
|
310
|
-
canonical_ref = f"{canonical_type}:{source_id}" if source_id else source_ref
|
|
311
|
-
fallback_results.append(
|
|
312
|
-
{
|
|
313
|
-
"entry_id": canonical_ref,
|
|
314
|
-
"source_type": canonical_type,
|
|
315
|
-
"source_id": source_id or str(row["id"]),
|
|
316
|
-
"score": float(row["confidence"] or 0.0),
|
|
317
|
-
"content": str(row["content"] or "")[:240],
|
|
318
|
-
"links": memory_links.get_memory_links(canonical_ref),
|
|
319
|
-
}
|
|
320
|
-
)
|
|
321
|
-
results = fallback_results
|
|
322
|
-
|
|
323
|
-
conn.close()
|
|
324
|
-
emit_event(LOGFILE, "brain_memory_vector_search_complete", status="ok", result_count=len(results))
|
|
325
|
-
return results
|
|
3
|
+
from ocmemog.runtime.memory.vector_index import * # noqa: F401,F403
|
|
@@ -1,11 +1,3 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def get_model_for_role(role: str) -> str:
|
|
7
|
-
if role == "memory":
|
|
8
|
-
return config.OCMEMOG_MEMORY_MODEL
|
|
9
|
-
if role == "embedding":
|
|
10
|
-
return config.OCMEMOG_OPENAI_EMBED_MODEL
|
|
11
|
-
return config.OCMEMOG_MEMORY_MODEL
|
|
3
|
+
from ocmemog.runtime.model_roles import * # noqa: F401,F403
|
|
@@ -1,24 +1,3 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from
|
|
4
|
-
|
|
5
|
-
from brain.runtime import config
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
@dataclass(frozen=True)
|
|
9
|
-
class ModelSelection:
|
|
10
|
-
provider_id: str = ""
|
|
11
|
-
model: str = ""
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def get_provider_for_role(role: str) -> ModelSelection:
|
|
15
|
-
if role != "embedding":
|
|
16
|
-
return ModelSelection()
|
|
17
|
-
provider = (config.BRAIN_EMBED_MODEL_PROVIDER or "").strip().lower()
|
|
18
|
-
if provider in {"openai", "openai_compatible", "openai-compatible"}:
|
|
19
|
-
return ModelSelection(provider_id="openai", model=config.OCMEMOG_OPENAI_EMBED_MODEL)
|
|
20
|
-
if provider in {"local-openai", "local_openai", "llamacpp", "llama.cpp"}:
|
|
21
|
-
return ModelSelection(provider_id="local-openai", model=config.OCMEMOG_LOCAL_EMBED_MODEL)
|
|
22
|
-
if provider in {"ollama", "local-ollama"}:
|
|
23
|
-
return ModelSelection(provider_id="ollama", model=config.OCMEMOG_OLLAMA_EMBED_MODEL)
|
|
24
|
-
return ModelSelection()
|
|
3
|
+
from ocmemog.runtime.model_router import * # noqa: F401,F403
|
|
@@ -1,68 +1,3 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import
|
|
4
|
-
import os
|
|
5
|
-
import urllib.request
|
|
6
|
-
|
|
7
|
-
from brain.runtime import config, state_store
|
|
8
|
-
from brain.runtime.instrumentation import emit_event
|
|
9
|
-
|
|
10
|
-
LOGFILE = state_store.reports_dir() / "brain_memory.log.jsonl"
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class ProviderExecute:
|
|
14
|
-
def execute_embedding_call(self, selection, text: str) -> dict[str, object]:
|
|
15
|
-
provider_id = getattr(selection, "provider_id", "") or ""
|
|
16
|
-
model = getattr(selection, "model", "") or config.OCMEMOG_OPENAI_EMBED_MODEL
|
|
17
|
-
if provider_id in {"openai", "local-openai"}:
|
|
18
|
-
api_key = None
|
|
19
|
-
url_base = config.OCMEMOG_OPENAI_API_BASE
|
|
20
|
-
provider_label = "openai"
|
|
21
|
-
if provider_id == "openai":
|
|
22
|
-
api_key = os.environ.get("OCMEMOG_OPENAI_API_KEY") or os.environ.get("OPENAI_API_KEY")
|
|
23
|
-
if not api_key:
|
|
24
|
-
return {}
|
|
25
|
-
else:
|
|
26
|
-
url_base = config.OCMEMOG_LOCAL_EMBED_BASE_URL
|
|
27
|
-
api_key = os.environ.get("OCMEMOG_LOCAL_EMBED_API_KEY") or os.environ.get("LOCAL_EMBED_API_KEY")
|
|
28
|
-
provider_label = "local-openai"
|
|
29
|
-
url = f"{url_base.rstrip('/')}/embeddings"
|
|
30
|
-
payload = json.dumps({"model": model, "input": text}).encode("utf-8")
|
|
31
|
-
req = urllib.request.Request(url, data=payload, method="POST")
|
|
32
|
-
if api_key:
|
|
33
|
-
req.add_header("Authorization", f"Bearer {api_key}")
|
|
34
|
-
req.add_header("Content-Type", "application/json")
|
|
35
|
-
try:
|
|
36
|
-
with urllib.request.urlopen(req, timeout=20) as resp:
|
|
37
|
-
data = json.loads(resp.read().decode("utf-8"))
|
|
38
|
-
except Exception as exc:
|
|
39
|
-
emit_event(LOGFILE, "brain_embedding_provider_error", status="error", provider=provider_label, error=str(exc))
|
|
40
|
-
return {}
|
|
41
|
-
try:
|
|
42
|
-
embedding = data["data"][0]["embedding"]
|
|
43
|
-
except Exception as exc:
|
|
44
|
-
emit_event(LOGFILE, "brain_embedding_provider_error", status="error", provider=provider_label, error=str(exc))
|
|
45
|
-
return {}
|
|
46
|
-
return {"embedding": embedding}
|
|
47
|
-
|
|
48
|
-
if provider_id == "ollama":
|
|
49
|
-
url = f"{config.OCMEMOG_OLLAMA_HOST.rstrip('/')}/api/embeddings"
|
|
50
|
-
payload = json.dumps({"model": model, "prompt": text}).encode("utf-8")
|
|
51
|
-
req = urllib.request.Request(url, data=payload, method="POST")
|
|
52
|
-
req.add_header("Content-Type", "application/json")
|
|
53
|
-
try:
|
|
54
|
-
with urllib.request.urlopen(req, timeout=20) as resp:
|
|
55
|
-
data = json.loads(resp.read().decode("utf-8"))
|
|
56
|
-
except Exception as exc:
|
|
57
|
-
emit_event(LOGFILE, "brain_embedding_provider_error", status="error", provider="ollama", error=str(exc))
|
|
58
|
-
return {}
|
|
59
|
-
embedding = data.get("embedding")
|
|
60
|
-
if not isinstance(embedding, list):
|
|
61
|
-
emit_event(LOGFILE, "brain_embedding_provider_error", status="error", provider="ollama", error="invalid_embedding")
|
|
62
|
-
return {}
|
|
63
|
-
return {"embedding": embedding}
|
|
64
|
-
|
|
65
|
-
return {}
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
provider_execute = ProviderExecute()
|
|
3
|
+
from ocmemog.runtime.providers import * # noqa: F401,F403
|
|
@@ -1,14 +1,3 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import
|
|
4
|
-
|
|
5
|
-
EMAIL_RE = re.compile(r"\b[\w.+-]+@[\w.-]+\.[A-Za-z]{2,}\b")
|
|
6
|
-
PHONE_RE = re.compile(r"\b(?:\+?1[-.\s]?)?(?:\(?\d{3}\)?[-.\s]?){2}\d{4}\b")
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def redact_text(text: str) -> tuple[str, bool]:
|
|
10
|
-
if not isinstance(text, str):
|
|
11
|
-
return "", False
|
|
12
|
-
redacted = EMAIL_RE.sub("[redacted-email]", text)
|
|
13
|
-
redacted = PHONE_RE.sub("[redacted-phone]", redacted)
|
|
14
|
-
return redacted, redacted != text
|
|
3
|
+
from ocmemog.runtime.security.redaction import * # noqa: F401,F403
|
|
@@ -1,25 +1,3 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from
|
|
4
|
-
|
|
5
|
-
from . import storage_paths
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def root_dir() -> Path:
|
|
9
|
-
return storage_paths.root_dir()
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def data_dir() -> Path:
|
|
13
|
-
return storage_paths.data_dir()
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def memory_dir() -> Path:
|
|
17
|
-
return storage_paths.memory_dir()
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def reports_dir() -> Path:
|
|
21
|
-
return storage_paths.reports_dir()
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def memory_db_path() -> Path:
|
|
25
|
-
return storage_paths.memory_db_path()
|
|
3
|
+
from ocmemog.runtime.state_store import * # noqa: F401,F403
|
|
@@ -1,41 +1,3 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def root_dir() -> Path:
|
|
8
|
-
configured = os.environ.get("OCMEMOG_STATE_DIR") or os.environ.get("BRAIN_STATE_DIR")
|
|
9
|
-
if configured:
|
|
10
|
-
base = Path(configured).expanduser()
|
|
11
|
-
else:
|
|
12
|
-
base = Path(__file__).resolve().parents[2] / ".ocmemog-state"
|
|
13
|
-
base.mkdir(parents=True, exist_ok=True)
|
|
14
|
-
return base
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def data_dir() -> Path:
|
|
18
|
-
path = root_dir() / "data"
|
|
19
|
-
path.mkdir(parents=True, exist_ok=True)
|
|
20
|
-
return path
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def memory_dir() -> Path:
|
|
24
|
-
path = root_dir() / "memory"
|
|
25
|
-
path.mkdir(parents=True, exist_ok=True)
|
|
26
|
-
return path
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def reports_dir() -> Path:
|
|
30
|
-
path = root_dir() / "reports"
|
|
31
|
-
path.mkdir(parents=True, exist_ok=True)
|
|
32
|
-
return path
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def memory_db_path() -> Path:
|
|
36
|
-
override = os.environ.get("OCMEMOG_DB_PATH")
|
|
37
|
-
if override:
|
|
38
|
-
path = Path(override).expanduser()
|
|
39
|
-
path.parent.mkdir(parents=True, exist_ok=True)
|
|
40
|
-
return path
|
|
41
|
-
return memory_dir() / "brain_memory.sqlite3"
|
|
3
|
+
from ocmemog.runtime.storage_paths import * # noqa: F401,F403
|
|
@@ -2,28 +2,24 @@
|
|
|
2
2
|
|
|
3
3
|
## What this repo actually ships
|
|
4
4
|
|
|
5
|
-
ocmemog
|
|
5
|
+
ocmemog ships a repo-local memory implementation with a FastAPI sidecar, while still carrying some compatibility residue from earlier brAIn-derived runtime surfaces. The authoritative local implementation lives in:
|
|
6
6
|
|
|
7
|
-
- `
|
|
8
|
-
- `
|
|
9
|
-
- `
|
|
7
|
+
- `ocmemog/runtime/memory/store.py` for the main SQLite schema
|
|
8
|
+
- `ocmemog/runtime/memory/retrieval.py` for keyword-first retrieval
|
|
9
|
+
- `ocmemog/runtime/memory/vector_index.py` for embeddings and fallback semantic lookup
|
|
10
10
|
- `ocmemog/sidecar/app.py` for the plugin-facing HTTP API
|
|
11
11
|
|
|
12
|
-
Unlike brAIn, this repo does not ship the full cognition/runtime stack. Several modules under `brain/runtime/*` are compatibility shims so
|
|
12
|
+
Unlike brAIn, this repo does not ship the full cognition/runtime stack. Several modules under `brain/runtime/*` are compatibility shims so `ocmemog/runtime/*` can import cleanly.
|
|
13
13
|
|
|
14
14
|
## Storage layout
|
|
15
15
|
|
|
16
|
-
By default, ocmemog stores state under `.ocmemog-state/` at the repo root unless `OCMEMOG_STATE_DIR`
|
|
16
|
+
By default, ocmemog stores state under `.ocmemog-state/` at the repo root unless `OCMEMOG_STATE_DIR` overrides it. `BRAIN_STATE_DIR` remains as a legacy compatibility alias and should not be used for new deployments.
|
|
17
17
|
|
|
18
18
|
Primary files:
|
|
19
19
|
|
|
20
|
-
- `.ocmemog-state/memory/
|
|
21
|
-
- `.ocmemog-state/reports/
|
|
22
|
-
- `.ocmemog-state/data/person_memory.db`
|
|
23
|
-
- `.ocmemog-state/data/interaction_memory.db`
|
|
24
|
-
- `.ocmemog-state/data/sentiment_memory.db`
|
|
20
|
+
- `.ocmemog-state/memory/ocmemog_memory.sqlite3`
|
|
21
|
+
- `.ocmemog-state/reports/ocmemog_memory.log.jsonl`
|
|
25
22
|
- `.ocmemog-state/data/unresolved_state.db`
|
|
26
|
-
- `.ocmemog-state/data/memory_graph.db`
|
|
27
23
|
|
|
28
24
|
The main SQLite database owns these tables:
|
|
29
25
|
|
|
@@ -48,7 +44,7 @@ The current sidecar behavior is simpler than brAIn's full memory architecture:
|
|
|
48
44
|
Operational limits:
|
|
49
45
|
|
|
50
46
|
- Semantic fallback now rehydrates any embedded bucket (`knowledge`, `runbooks`, `lessons`) when there are no keyword hits.
|
|
51
|
-
- Default embeddings are local hash vectors (`
|
|
47
|
+
- Default embeddings are local hash vectors (`OCMEMOG_EMBED_MODEL_LOCAL=simple`; legacy alias: `BRAIN_EMBED_MODEL_LOCAL`), which are deterministic but weak.
|
|
52
48
|
- `runbooks`, `lessons`, `directives`, `reflections`, and `tasks` are now included in the default searchable categories and embedding index.
|
|
53
49
|
|
|
54
50
|
## Write paths
|
|
@@ -60,14 +56,14 @@ The main repo-local write paths are:
|
|
|
60
56
|
- `promote.promote_candidate()` writes to `promotions` plus one of `knowledge`, `runbooks`, or `lessons`
|
|
61
57
|
- `vector_index.insert_memory()` writes to `memory_index` and `vector_embeddings`
|
|
62
58
|
- `memory_links.add_memory_link()` writes link metadata inside the main memory DB
|
|
63
|
-
- `
|
|
59
|
+
- `unresolved_state` writes to a separate SQLite file under `.ocmemog-state/data`; core memory relationships and provenance now live in the main memory DB.
|
|
64
60
|
|
|
65
61
|
## Distillation and promotion
|
|
66
62
|
|
|
67
63
|
The brAIn docs describe a richer distill/promote pipeline. In ocmemog today:
|
|
68
64
|
|
|
69
|
-
- Distillation exists in `
|
|
70
|
-
- Model-backed distillation
|
|
65
|
+
- Distillation exists in `ocmemog/runtime/memory/distill.py`
|
|
66
|
+
- Model-backed distillation depends on the configured runtime inference provider and may fall back to heuristics when no usable provider is available
|
|
71
67
|
- The practical fallback is a first-line heuristic summary plus generated verification prompts
|
|
72
68
|
- Promotion is available locally and writes promoted summaries into `knowledge`, `runbooks`, or `lessons`
|
|
73
69
|
- Successful promotion also logs a reinforcement event and attempts vector indexing
|
|
@@ -80,11 +76,11 @@ Available support paths:
|
|
|
80
76
|
|
|
81
77
|
- `integrity.run_integrity_check()` checks for missing tables, orphan candidates, duplicate promotions, missing memory references, and index mismatches
|
|
82
78
|
- `health.get_memory_health()` reports counts and a coarse integrity summary
|
|
83
|
-
- `
|
|
79
|
+
- `ocmemog_memory.log.jsonl` captures retrieval, embedding, integrity, and promotion events
|
|
84
80
|
|
|
85
81
|
Known caveat:
|
|
86
82
|
|
|
87
|
-
- health/integrity
|
|
83
|
+
- health/integrity now use source coverage against `vector_embeddings`, but operator interpretation still depends on the active embedding backend and any compatibility-shim surfaces reported by runtime probe
|
|
88
84
|
|
|
89
85
|
## Sidecar contract
|
|
90
86
|
|
|
@@ -100,19 +96,19 @@ The sidecar also reports runtime readiness through `mode`, `missingDeps`, `todo`
|
|
|
100
96
|
|
|
101
97
|
## Runtime adapters
|
|
102
98
|
|
|
103
|
-
ocmemog
|
|
99
|
+
ocmemog now uses repo-local runtime adapters for inference + embeddings, with some compatibility residue still present behind the runtime boundary. The primary active surfaces are under `ocmemog/runtime/*` and require environment configuration:
|
|
104
100
|
|
|
105
|
-
- `
|
|
106
|
-
- `
|
|
107
|
-
- `
|
|
101
|
+
- `ocmemog/runtime/inference.py` → chat/inference routing (OpenAI or local-openai depending on configured provider)
|
|
102
|
+
- `ocmemog/runtime/providers.py` → embedding provider routing
|
|
103
|
+
- `ocmemog/runtime/model_roles.py` + `model_router.py` → role-to-model and provider routing
|
|
108
104
|
|
|
109
105
|
Effect on behavior:
|
|
110
106
|
|
|
111
107
|
- Distillation uses OpenAI when API key is set, otherwise falls back to heuristics
|
|
112
108
|
- Embeddings use OpenAI when configured, otherwise fall back to local hash or sentence-transformers
|
|
113
|
-
- Role-aware context selection is
|
|
109
|
+
- Role-aware context selection is now supported via `ocmemog.runtime.roles`, with native ownership tracked in runtime compatibility reporting.
|
|
114
110
|
|
|
115
111
|
## TODO: Missing runtime dependencies
|
|
116
112
|
|
|
117
|
-
-
|
|
113
|
+
- DONE: add a repo-local `brain.runtime.roles` implementation.
|
|
118
114
|
- TODO: decide whether to add additional provider backends beyond OpenAI
|
|
@@ -2,20 +2,33 @@
|
|
|
2
2
|
|
|
3
3
|
Use this checklist before publishing an ocmemog release.
|
|
4
4
|
|
|
5
|
+
The release gate is now codified by:
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
./scripts/ocmemog-release-check.sh
|
|
9
|
+
```
|
|
10
|
+
|
|
5
11
|
## Versioning
|
|
6
12
|
- [ ] Update `package.json` version
|
|
7
13
|
- [ ] Ensure release tag matches package version
|
|
8
14
|
- [ ] Update `CHANGELOG.md`
|
|
9
|
-
- [ ] Confirm README
|
|
15
|
+
- [ ] Confirm README/release docs reflect current versioned package identity and release workflow
|
|
10
16
|
|
|
11
17
|
## Validation
|
|
12
|
-
- [ ] `
|
|
13
|
-
- [ ]
|
|
14
|
-
- [ ]
|
|
15
|
-
- [ ]
|
|
16
|
-
-
|
|
18
|
+
- [ ] Install test deps for sidecar route tests: `python3 -m pip install -r requirements-test.txt`
|
|
19
|
+
- [ ] `./scripts/ocmemog-release-check.sh`
|
|
20
|
+
- [ ] Verify `tests/test_doctor.py` still passes for doctor health surfaces if you changed check coverage
|
|
21
|
+
- [ ] Verify `reports/release-gate-proof.json` exists after a passing gate and documents:
|
|
22
|
+
- live ingest/search/get/hydrate verification
|
|
23
|
+
- capped response selection (`memory/search` and `conversation/hydrate`)
|
|
24
|
+
- reference recall for distinctive injected memory
|
|
25
|
+
- [ ] If testing against a protected sidecar, confirm auth-bearing requests succeed (`x-ocmemog-token` or `Authorization: Bearer ...`)
|
|
17
26
|
- [ ] `npm pack --dry-run`
|
|
18
27
|
|
|
28
|
+
The `ocmemog-release-check` command enforces strict doctor mode for repo-locally safe checks, runs a focused pytest subset, validates explicit sidecar route behavior, runs live `/healthz`, `/memory/ingest`, `/memory/search`, `/memory/get`, and `/conversation/hydrate` smoke checks, and executes a full integrated proof in fresh state.
|
|
29
|
+
Legacy-state verification is optional and can be enabled with `OCMEMOG_RELEASE_LEGACY_ENDPOINT`.
|
|
30
|
+
GitHub CI runs the same release check command so local and CI validation remain aligned.
|
|
31
|
+
|
|
19
32
|
## Install flow
|
|
20
33
|
- [ ] Verify default installer path still works: `./scripts/install-ocmemog.sh`
|
|
21
34
|
- [ ] Verify optional prereq install path is documented correctly
|