@simbimbo/memory-ocmemog 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +59 -0
- package/LICENSE +21 -0
- package/README.md +223 -0
- package/brain/__init__.py +1 -0
- package/brain/runtime/__init__.py +13 -0
- package/brain/runtime/config.py +21 -0
- package/brain/runtime/inference.py +83 -0
- package/brain/runtime/instrumentation.py +17 -0
- package/brain/runtime/memory/__init__.py +13 -0
- package/brain/runtime/memory/api.py +152 -0
- package/brain/runtime/memory/artifacts.py +33 -0
- package/brain/runtime/memory/candidate.py +89 -0
- package/brain/runtime/memory/context_builder.py +87 -0
- package/brain/runtime/memory/conversation_state.py +1825 -0
- package/brain/runtime/memory/distill.py +198 -0
- package/brain/runtime/memory/embedding_engine.py +94 -0
- package/brain/runtime/memory/freshness.py +91 -0
- package/brain/runtime/memory/health.py +42 -0
- package/brain/runtime/memory/integrity.py +170 -0
- package/brain/runtime/memory/interaction_memory.py +57 -0
- package/brain/runtime/memory/memory_consolidation.py +60 -0
- package/brain/runtime/memory/memory_gate.py +38 -0
- package/brain/runtime/memory/memory_graph.py +54 -0
- package/brain/runtime/memory/memory_links.py +109 -0
- package/brain/runtime/memory/memory_salience.py +235 -0
- package/brain/runtime/memory/memory_synthesis.py +33 -0
- package/brain/runtime/memory/memory_taxonomy.py +35 -0
- package/brain/runtime/memory/person_identity.py +83 -0
- package/brain/runtime/memory/person_memory.py +138 -0
- package/brain/runtime/memory/pondering_engine.py +577 -0
- package/brain/runtime/memory/promote.py +237 -0
- package/brain/runtime/memory/provenance.py +356 -0
- package/brain/runtime/memory/reinforcement.py +73 -0
- package/brain/runtime/memory/retrieval.py +153 -0
- package/brain/runtime/memory/semantic_search.py +66 -0
- package/brain/runtime/memory/sentiment_memory.py +67 -0
- package/brain/runtime/memory/store.py +400 -0
- package/brain/runtime/memory/tool_catalog.py +68 -0
- package/brain/runtime/memory/unresolved_state.py +93 -0
- package/brain/runtime/memory/vector_index.py +270 -0
- package/brain/runtime/model_roles.py +11 -0
- package/brain/runtime/model_router.py +22 -0
- package/brain/runtime/providers.py +59 -0
- package/brain/runtime/security/__init__.py +3 -0
- package/brain/runtime/security/redaction.py +14 -0
- package/brain/runtime/state_store.py +25 -0
- package/brain/runtime/storage_paths.py +41 -0
- package/docs/architecture/memory.md +118 -0
- package/docs/release-checklist.md +34 -0
- package/docs/reports/ocmemog-code-audit-2026-03-14.md +155 -0
- package/docs/usage.md +223 -0
- package/index.ts +726 -0
- package/ocmemog/__init__.py +1 -0
- package/ocmemog/sidecar/__init__.py +1 -0
- package/ocmemog/sidecar/app.py +1068 -0
- package/ocmemog/sidecar/compat.py +74 -0
- package/ocmemog/sidecar/transcript_watcher.py +425 -0
- package/openclaw.plugin.json +18 -0
- package/package.json +60 -0
- package/scripts/install-ocmemog.sh +277 -0
- package/scripts/launchagents/com.openclaw.ocmemog.guard.plist +22 -0
- package/scripts/launchagents/com.openclaw.ocmemog.ponder.plist +22 -0
- package/scripts/launchagents/com.openclaw.ocmemog.sidecar.plist +27 -0
- package/scripts/ocmemog-context.sh +15 -0
- package/scripts/ocmemog-continuity-benchmark.py +178 -0
- package/scripts/ocmemog-demo.py +122 -0
- package/scripts/ocmemog-failover-test.sh +17 -0
- package/scripts/ocmemog-guard.sh +11 -0
- package/scripts/ocmemog-install.sh +93 -0
- package/scripts/ocmemog-load-test.py +106 -0
- package/scripts/ocmemog-ponder.sh +30 -0
- package/scripts/ocmemog-recall-test.py +58 -0
- package/scripts/ocmemog-reindex-vectors.py +14 -0
- package/scripts/ocmemog-reliability-soak.py +177 -0
- package/scripts/ocmemog-sidecar.sh +46 -0
- package/scripts/ocmemog-soak-report.py +58 -0
- package/scripts/ocmemog-soak-test.py +44 -0
- package/scripts/ocmemog-test-rig.py +345 -0
- package/scripts/ocmemog-transcript-append.py +45 -0
- package/scripts/ocmemog-transcript-watcher.py +8 -0
- package/scripts/ocmemog-transcript-watcher.sh +7 -0
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from typing import Dict, Any
|
|
5
|
+
|
|
6
|
+
from brain.runtime.instrumentation import emit_event
|
|
7
|
+
from brain.runtime import state_store
|
|
8
|
+
from brain.runtime.memory import store
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _ensure_table(conn) -> None:
|
|
12
|
+
conn.execute(
|
|
13
|
+
"""
|
|
14
|
+
CREATE TABLE IF NOT EXISTS tool_catalog (
|
|
15
|
+
tool_id TEXT PRIMARY KEY,
|
|
16
|
+
description TEXT,
|
|
17
|
+
permission_class TEXT,
|
|
18
|
+
capability_tags TEXT,
|
|
19
|
+
first_seen TEXT DEFAULT (datetime('now')),
|
|
20
|
+
last_used TEXT
|
|
21
|
+
)
|
|
22
|
+
"""
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def record_tool_metadata(metadata: Dict[str, Any]) -> None:
|
|
27
|
+
conn = store.connect()
|
|
28
|
+
_ensure_table(conn)
|
|
29
|
+
tool_id = metadata.get("tool_id")
|
|
30
|
+
description = metadata.get("description", "")
|
|
31
|
+
permission_class = metadata.get("permission_class", "restricted")
|
|
32
|
+
capability_tags = json.dumps(metadata.get("capability_tags", []) or [])
|
|
33
|
+
conn.execute(
|
|
34
|
+
"""
|
|
35
|
+
INSERT INTO tool_catalog (tool_id, description, permission_class, capability_tags)
|
|
36
|
+
VALUES (?, ?, ?, ?)
|
|
37
|
+
ON CONFLICT(tool_id) DO UPDATE SET
|
|
38
|
+
description=excluded.description,
|
|
39
|
+
permission_class=excluded.permission_class,
|
|
40
|
+
capability_tags=excluded.capability_tags
|
|
41
|
+
""",
|
|
42
|
+
(tool_id, description, permission_class, capability_tags),
|
|
43
|
+
)
|
|
44
|
+
conn.commit()
|
|
45
|
+
conn.close()
|
|
46
|
+
emit_event(
|
|
47
|
+
state_store.reports_dir() / "brain_memory.log.jsonl",
|
|
48
|
+
"brain_memory_tool_catalog_update",
|
|
49
|
+
status="ok",
|
|
50
|
+
tool_id=tool_id,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def record_tool_usage(tool_id: str) -> None:
|
|
55
|
+
conn = store.connect()
|
|
56
|
+
_ensure_table(conn)
|
|
57
|
+
conn.execute(
|
|
58
|
+
"UPDATE tool_catalog SET last_used=datetime('now') WHERE tool_id=?",
|
|
59
|
+
(tool_id,),
|
|
60
|
+
)
|
|
61
|
+
conn.commit()
|
|
62
|
+
conn.close()
|
|
63
|
+
emit_event(
|
|
64
|
+
state_store.reports_dir() / "brain_memory.log.jsonl",
|
|
65
|
+
"brain_memory_tool_catalog_update",
|
|
66
|
+
status="ok",
|
|
67
|
+
tool_id=tool_id,
|
|
68
|
+
)
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import sqlite3
|
|
4
|
+
import time
|
|
5
|
+
from typing import Dict, List
|
|
6
|
+
|
|
7
|
+
from brain.runtime import state_store
|
|
8
|
+
from brain.runtime.instrumentation import emit_event
|
|
9
|
+
|
|
10
|
+
LOGFILE = state_store.reports_dir() / "brain_memory.log.jsonl"
|
|
11
|
+
|
|
12
|
+
TYPES = {
|
|
13
|
+
"unresolved_question",
|
|
14
|
+
"paused_task",
|
|
15
|
+
"interrupted_thread",
|
|
16
|
+
"pending_decision",
|
|
17
|
+
"incomplete_hypothesis",
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _connect() -> sqlite3.Connection:
|
|
22
|
+
path = state_store.data_dir() / "unresolved_state.db"
|
|
23
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
24
|
+
conn = sqlite3.connect(str(path))
|
|
25
|
+
conn.row_factory = sqlite3.Row
|
|
26
|
+
conn.execute(
|
|
27
|
+
"""
|
|
28
|
+
CREATE TABLE IF NOT EXISTS unresolved_state (
|
|
29
|
+
state_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
30
|
+
state_type TEXT NOT NULL,
|
|
31
|
+
reference TEXT,
|
|
32
|
+
summary TEXT,
|
|
33
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
34
|
+
resolved INTEGER NOT NULL DEFAULT 0
|
|
35
|
+
)
|
|
36
|
+
"""
|
|
37
|
+
)
|
|
38
|
+
return conn
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def add_unresolved_state(state_type: str, reference: str, summary: str) -> int:
|
|
42
|
+
if state_type not in TYPES:
|
|
43
|
+
state_type = "unresolved_question"
|
|
44
|
+
conn = _connect()
|
|
45
|
+
conn.execute(
|
|
46
|
+
"INSERT INTO unresolved_state (state_type, reference, summary, created_at, resolved) VALUES (?, ?, ?, ?, 0)",
|
|
47
|
+
(state_type, reference, summary, time.strftime("%Y-%m-%d %H:%M:%S")),
|
|
48
|
+
)
|
|
49
|
+
conn.commit()
|
|
50
|
+
row = conn.execute("SELECT last_insert_rowid()").fetchone()
|
|
51
|
+
conn.close()
|
|
52
|
+
emit_event(LOGFILE, "brain_unresolved_state_added", status="ok", state_type=state_type)
|
|
53
|
+
return int(row[0]) if row else 0
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def list_unresolved_state(limit: int = 20) -> List[Dict[str, object]]:
|
|
57
|
+
conn = _connect()
|
|
58
|
+
rows = conn.execute(
|
|
59
|
+
"SELECT state_id, state_type, reference, summary, created_at FROM unresolved_state WHERE resolved=0 ORDER BY created_at DESC LIMIT ?",
|
|
60
|
+
(limit,),
|
|
61
|
+
).fetchall()
|
|
62
|
+
conn.close()
|
|
63
|
+
return [dict(row) for row in rows]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def resolve_unresolved_state(state_id: int) -> bool:
|
|
67
|
+
conn = _connect()
|
|
68
|
+
conn.execute("UPDATE unresolved_state SET resolved=1 WHERE state_id=?", (state_id,))
|
|
69
|
+
conn.commit()
|
|
70
|
+
conn.close()
|
|
71
|
+
emit_event(LOGFILE, "brain_unresolved_state_resolved", status="ok", state_id=state_id)
|
|
72
|
+
return True
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def list_unresolved_state_for_references(references: List[str], limit: int = 20) -> List[Dict[str, object]]:
|
|
76
|
+
refs = [str(ref).strip() for ref in references if str(ref).strip()]
|
|
77
|
+
if not refs:
|
|
78
|
+
return []
|
|
79
|
+
placeholders = ",".join("?" for _ in refs)
|
|
80
|
+
conn = _connect()
|
|
81
|
+
rows = conn.execute(
|
|
82
|
+
f"SELECT state_id, state_type, reference, summary, created_at FROM unresolved_state WHERE resolved=0 AND reference IN ({placeholders}) ORDER BY created_at DESC LIMIT ?",
|
|
83
|
+
(*refs, limit),
|
|
84
|
+
).fetchall()
|
|
85
|
+
conn.close()
|
|
86
|
+
return [dict(row) for row in rows]
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def count_unresolved_state() -> int:
|
|
90
|
+
conn = _connect()
|
|
91
|
+
row = conn.execute("SELECT COUNT(*) FROM unresolved_state WHERE resolved=0").fetchone()
|
|
92
|
+
conn.close()
|
|
93
|
+
return int(row[0]) if row else 0
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import math
|
|
5
|
+
import threading
|
|
6
|
+
from typing import Any, Dict, List, Iterable
|
|
7
|
+
|
|
8
|
+
from brain.runtime import state_store
|
|
9
|
+
from brain.runtime.instrumentation import emit_event
|
|
10
|
+
from brain.runtime.memory import embedding_engine, store, memory_links
|
|
11
|
+
from brain.runtime.security import redaction
|
|
12
|
+
|
|
13
|
+
LOGFILE = state_store.reports_dir() / "brain_memory.log.jsonl"
|
|
14
|
+
|
|
15
|
+
EMBEDDING_TABLES: tuple[str, ...] = (
|
|
16
|
+
"knowledge",
|
|
17
|
+
"runbooks",
|
|
18
|
+
"lessons",
|
|
19
|
+
"directives",
|
|
20
|
+
"reflections",
|
|
21
|
+
"tasks",
|
|
22
|
+
)
|
|
23
|
+
_REBUILD_LOCK = threading.Lock()
|
|
24
|
+
_WRITE_CHUNK_SIZE = 64
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _ensure_vector_table(conn) -> None:
|
|
28
|
+
conn.execute(
|
|
29
|
+
"""
|
|
30
|
+
CREATE TABLE IF NOT EXISTS vector_embeddings (
|
|
31
|
+
id TEXT PRIMARY KEY,
|
|
32
|
+
source_type TEXT NOT NULL,
|
|
33
|
+
source_id TEXT NOT NULL,
|
|
34
|
+
embedding TEXT NOT NULL,
|
|
35
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
36
|
+
)
|
|
37
|
+
"""
|
|
38
|
+
)
|
|
39
|
+
conn.execute(
|
|
40
|
+
"CREATE INDEX IF NOT EXISTS idx_vector_embeddings_source ON vector_embeddings (source_type, source_id)"
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _cosine_similarity(a: List[float], b: List[float]) -> float:
|
|
45
|
+
if not a or not b:
|
|
46
|
+
return 0.0
|
|
47
|
+
size = min(len(a), len(b))
|
|
48
|
+
if size == 0:
|
|
49
|
+
return 0.0
|
|
50
|
+
a2 = a[:size]
|
|
51
|
+
b2 = b[:size]
|
|
52
|
+
dot = sum(x * y for x, y in zip(a2, b2))
|
|
53
|
+
mag_a = math.sqrt(sum(x * x for x in a2))
|
|
54
|
+
mag_b = math.sqrt(sum(x * x for x in b2))
|
|
55
|
+
if mag_a == 0.0 or mag_b == 0.0:
|
|
56
|
+
return 0.0
|
|
57
|
+
return dot / (mag_a * mag_b)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def insert_memory(memory_id: int, content: str, confidence: float, *, source_type: str = "knowledge") -> None:
|
|
61
|
+
source_type = source_type if source_type in EMBEDDING_TABLES else "knowledge"
|
|
62
|
+
redacted_content, changed = redaction.redact_text(content)
|
|
63
|
+
embedding = embedding_engine.generate_embedding(redacted_content)
|
|
64
|
+
metadata_json = json.dumps({"redacted": changed, "source_type": source_type})
|
|
65
|
+
|
|
66
|
+
def _write() -> None:
|
|
67
|
+
conn = store.connect()
|
|
68
|
+
try:
|
|
69
|
+
_ensure_vector_table(conn)
|
|
70
|
+
conn.execute(
|
|
71
|
+
"INSERT INTO memory_index (source, confidence, metadata_json, content, schema_version) VALUES (?, ?, ?, ?, ?)",
|
|
72
|
+
(
|
|
73
|
+
f"{source_type}:{memory_id}",
|
|
74
|
+
confidence,
|
|
75
|
+
metadata_json,
|
|
76
|
+
redacted_content,
|
|
77
|
+
store.SCHEMA_VERSION,
|
|
78
|
+
),
|
|
79
|
+
)
|
|
80
|
+
if embedding:
|
|
81
|
+
emit_event(LOGFILE, "brain_memory_embedding_generated", status="ok", source_id=str(memory_id))
|
|
82
|
+
conn.execute(
|
|
83
|
+
"""
|
|
84
|
+
INSERT INTO vector_embeddings (id, source_type, source_id, embedding)
|
|
85
|
+
VALUES (?, ?, ?, ?)
|
|
86
|
+
ON CONFLICT(id) DO UPDATE SET embedding=excluded.embedding
|
|
87
|
+
""",
|
|
88
|
+
(f"{source_type}:{memory_id}", source_type, str(memory_id), json.dumps(embedding)),
|
|
89
|
+
)
|
|
90
|
+
conn.commit()
|
|
91
|
+
finally:
|
|
92
|
+
conn.close()
|
|
93
|
+
|
|
94
|
+
store.submit_write(_write, timeout=30.0)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _load_table_rows(table: str, *, limit: int | None = None, descending: bool = False) -> List[Dict[str, Any]]:
|
|
98
|
+
conn = store.connect()
|
|
99
|
+
try:
|
|
100
|
+
order = "DESC" if descending else "ASC"
|
|
101
|
+
if limit is None:
|
|
102
|
+
rows = conn.execute(
|
|
103
|
+
f"SELECT id, content, confidence, metadata_json FROM {table} ORDER BY id {order}",
|
|
104
|
+
).fetchall()
|
|
105
|
+
else:
|
|
106
|
+
rows = conn.execute(
|
|
107
|
+
f"SELECT id, content, confidence, metadata_json FROM {table} ORDER BY id {order} LIMIT ?",
|
|
108
|
+
(limit,),
|
|
109
|
+
).fetchall()
|
|
110
|
+
finally:
|
|
111
|
+
conn.close()
|
|
112
|
+
return [dict(row) for row in rows]
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _prepare_embedding_rows(rows: Iterable[Dict[str, Any]], *, table: str) -> List[Dict[str, Any]]:
|
|
116
|
+
prepared: List[Dict[str, Any]] = []
|
|
117
|
+
for row in rows:
|
|
118
|
+
content = str(row.get("content") or "")
|
|
119
|
+
redacted_content, changed = redaction.redact_text(content)
|
|
120
|
+
embedding = embedding_engine.generate_embedding(redacted_content)
|
|
121
|
+
if not embedding:
|
|
122
|
+
continue
|
|
123
|
+
try:
|
|
124
|
+
metadata = json.loads(row.get("metadata_json") or "{}")
|
|
125
|
+
except Exception:
|
|
126
|
+
metadata = {}
|
|
127
|
+
metadata["redacted"] = changed
|
|
128
|
+
prepared.append(
|
|
129
|
+
{
|
|
130
|
+
"id": int(row["id"]),
|
|
131
|
+
"content": redacted_content,
|
|
132
|
+
"confidence": float(row.get("confidence") or 0.0),
|
|
133
|
+
"metadata_json": json.dumps(metadata),
|
|
134
|
+
"embedding": json.dumps(embedding),
|
|
135
|
+
"source_type": table,
|
|
136
|
+
}
|
|
137
|
+
)
|
|
138
|
+
return prepared
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _write_embedding_chunk(table: str, rows: List[Dict[str, Any]]) -> int:
|
|
142
|
+
if not rows:
|
|
143
|
+
return 0
|
|
144
|
+
|
|
145
|
+
def _write() -> int:
|
|
146
|
+
conn = store.connect()
|
|
147
|
+
try:
|
|
148
|
+
_ensure_vector_table(conn)
|
|
149
|
+
for row in rows:
|
|
150
|
+
conn.execute(
|
|
151
|
+
f"UPDATE {table} SET content=?, metadata_json=? WHERE id=?",
|
|
152
|
+
(row["content"], row["metadata_json"], row["id"]),
|
|
153
|
+
)
|
|
154
|
+
conn.execute(
|
|
155
|
+
"""
|
|
156
|
+
INSERT INTO vector_embeddings (id, source_type, source_id, embedding)
|
|
157
|
+
VALUES (?, ?, ?, ?)
|
|
158
|
+
ON CONFLICT(id) DO UPDATE SET embedding=excluded.embedding
|
|
159
|
+
""",
|
|
160
|
+
(f"{table}:{row['id']}", table, str(row["id"]), row["embedding"]),
|
|
161
|
+
)
|
|
162
|
+
conn.commit()
|
|
163
|
+
return len(rows)
|
|
164
|
+
finally:
|
|
165
|
+
conn.close()
|
|
166
|
+
|
|
167
|
+
return int(store.submit_write(_write, timeout=60.0))
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def index_memory(limit: int = 100, *, tables: Iterable[str] | None = None) -> int:
|
|
171
|
+
emit_event(LOGFILE, "brain_memory_vector_index_start", status="ok")
|
|
172
|
+
count = 0
|
|
173
|
+
for table in (tables or EMBEDDING_TABLES):
|
|
174
|
+
if table not in EMBEDDING_TABLES:
|
|
175
|
+
continue
|
|
176
|
+
prepared = _prepare_embedding_rows(_load_table_rows(table, limit=limit, descending=True), table=table)
|
|
177
|
+
for offset in range(0, len(prepared), _WRITE_CHUNK_SIZE):
|
|
178
|
+
count += _write_embedding_chunk(table, prepared[offset: offset + _WRITE_CHUNK_SIZE])
|
|
179
|
+
emit_event(LOGFILE, "brain_memory_vector_index_complete", status="ok", indexed=count)
|
|
180
|
+
return count
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def rebuild_vector_index(*, tables: Iterable[str] | None = None) -> int:
|
|
184
|
+
emit_event(LOGFILE, "brain_memory_vector_rebuild_start", status="ok")
|
|
185
|
+
if not _REBUILD_LOCK.acquire(blocking=False):
|
|
186
|
+
emit_event(LOGFILE, "brain_memory_vector_rebuild_complete", status="skipped", reason="already_running")
|
|
187
|
+
return 0
|
|
188
|
+
count = 0
|
|
189
|
+
try:
|
|
190
|
+
requested_tables = [table for table in (tables or EMBEDDING_TABLES) if table in EMBEDDING_TABLES]
|
|
191
|
+
|
|
192
|
+
def _clear() -> None:
|
|
193
|
+
conn = store.connect()
|
|
194
|
+
try:
|
|
195
|
+
_ensure_vector_table(conn)
|
|
196
|
+
if requested_tables:
|
|
197
|
+
conn.executemany(
|
|
198
|
+
"DELETE FROM vector_embeddings WHERE source_type = ?",
|
|
199
|
+
[(table,) for table in requested_tables],
|
|
200
|
+
)
|
|
201
|
+
conn.commit()
|
|
202
|
+
finally:
|
|
203
|
+
conn.close()
|
|
204
|
+
|
|
205
|
+
store.submit_write(_clear, timeout=60.0)
|
|
206
|
+
for table in requested_tables:
|
|
207
|
+
prepared = _prepare_embedding_rows(_load_table_rows(table), table=table)
|
|
208
|
+
for offset in range(0, len(prepared), _WRITE_CHUNK_SIZE):
|
|
209
|
+
count += _write_embedding_chunk(table, prepared[offset: offset + _WRITE_CHUNK_SIZE])
|
|
210
|
+
finally:
|
|
211
|
+
_REBUILD_LOCK.release()
|
|
212
|
+
emit_event(LOGFILE, "brain_memory_vector_rebuild_complete", status="ok", indexed=count)
|
|
213
|
+
return count
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def search_memory(query: str, limit: int = 5) -> List[Dict[str, Any]]:
|
|
217
|
+
emit_event(LOGFILE, "brain_memory_vector_search_start", status="ok")
|
|
218
|
+
conn = store.connect()
|
|
219
|
+
_ensure_vector_table(conn)
|
|
220
|
+
|
|
221
|
+
query_embedding = embedding_engine.generate_embedding(query)
|
|
222
|
+
results: List[Dict[str, Any]] = []
|
|
223
|
+
|
|
224
|
+
if query_embedding:
|
|
225
|
+
rows = conn.execute("SELECT id, source_type, source_id, embedding FROM vector_embeddings").fetchall()
|
|
226
|
+
scored: List[Dict[str, Any]] = []
|
|
227
|
+
for row in rows:
|
|
228
|
+
try:
|
|
229
|
+
emb = json.loads(row["embedding"])
|
|
230
|
+
emb_list = [float(x) for x in emb]
|
|
231
|
+
except Exception:
|
|
232
|
+
continue
|
|
233
|
+
score = _cosine_similarity(query_embedding, emb_list)
|
|
234
|
+
scored.append(
|
|
235
|
+
{
|
|
236
|
+
"entry_id": row["id"],
|
|
237
|
+
"source_type": row["source_type"],
|
|
238
|
+
"source_id": row["source_id"],
|
|
239
|
+
"score": round(score, 6),
|
|
240
|
+
}
|
|
241
|
+
)
|
|
242
|
+
scored.sort(key=lambda item: item["score"], reverse=True)
|
|
243
|
+
results = scored[:limit]
|
|
244
|
+
|
|
245
|
+
if not results:
|
|
246
|
+
rows = conn.execute(
|
|
247
|
+
"SELECT id, source, content, confidence, metadata_json FROM memory_index WHERE content LIKE ? ORDER BY id DESC LIMIT ?",
|
|
248
|
+
(f"%{query}%", limit),
|
|
249
|
+
).fetchall()
|
|
250
|
+
fallback_results: List[Dict[str, Any]] = []
|
|
251
|
+
for row in rows:
|
|
252
|
+
source_ref = str(row["source"] or "")
|
|
253
|
+
source_type, _, source_id = source_ref.partition(":")
|
|
254
|
+
canonical_type = source_type if source_type in EMBEDDING_TABLES else "knowledge"
|
|
255
|
+
canonical_ref = f"{canonical_type}:{source_id}" if source_id else source_ref
|
|
256
|
+
fallback_results.append(
|
|
257
|
+
{
|
|
258
|
+
"entry_id": canonical_ref,
|
|
259
|
+
"source_type": canonical_type,
|
|
260
|
+
"source_id": source_id or str(row["id"]),
|
|
261
|
+
"score": float(row["confidence"] or 0.0),
|
|
262
|
+
"content": str(row["content"] or "")[:240],
|
|
263
|
+
"links": memory_links.get_memory_links(canonical_ref),
|
|
264
|
+
}
|
|
265
|
+
)
|
|
266
|
+
results = fallback_results
|
|
267
|
+
|
|
268
|
+
conn.close()
|
|
269
|
+
emit_event(LOGFILE, "brain_memory_vector_search_complete", status="ok", result_count=len(results))
|
|
270
|
+
return results
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from brain.runtime import config
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_model_for_role(role: str) -> str:
|
|
7
|
+
if role == "memory":
|
|
8
|
+
return config.OCMEMOG_MEMORY_MODEL
|
|
9
|
+
if role == "embedding":
|
|
10
|
+
return config.OCMEMOG_OPENAI_EMBED_MODEL
|
|
11
|
+
return config.OCMEMOG_MEMORY_MODEL
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from brain.runtime import config
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass(frozen=True)
|
|
9
|
+
class ModelSelection:
|
|
10
|
+
provider_id: str = ""
|
|
11
|
+
model: str = ""
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_provider_for_role(role: str) -> ModelSelection:
|
|
15
|
+
if role != "embedding":
|
|
16
|
+
return ModelSelection()
|
|
17
|
+
provider = (config.BRAIN_EMBED_MODEL_PROVIDER or "").strip().lower()
|
|
18
|
+
if provider in {"openai", "openai_compatible", "openai-compatible"}:
|
|
19
|
+
return ModelSelection(provider_id="openai", model=config.OCMEMOG_OPENAI_EMBED_MODEL)
|
|
20
|
+
if provider in {"ollama", "local-ollama"}:
|
|
21
|
+
return ModelSelection(provider_id="ollama", model=config.OCMEMOG_OLLAMA_EMBED_MODEL)
|
|
22
|
+
return ModelSelection()
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import urllib.request
|
|
6
|
+
|
|
7
|
+
from brain.runtime import config, state_store
|
|
8
|
+
from brain.runtime.instrumentation import emit_event
|
|
9
|
+
|
|
10
|
+
LOGFILE = state_store.reports_dir() / "brain_memory.log.jsonl"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ProviderExecute:
|
|
14
|
+
def execute_embedding_call(self, selection, text: str) -> dict[str, object]:
|
|
15
|
+
provider_id = getattr(selection, "provider_id", "") or ""
|
|
16
|
+
model = getattr(selection, "model", "") or config.OCMEMOG_OPENAI_EMBED_MODEL
|
|
17
|
+
if provider_id == "openai":
|
|
18
|
+
api_key = os.environ.get("OCMEMOG_OPENAI_API_KEY") or os.environ.get("OPENAI_API_KEY")
|
|
19
|
+
if not api_key:
|
|
20
|
+
return {}
|
|
21
|
+
url = f"{config.OCMEMOG_OPENAI_API_BASE.rstrip('/')}/embeddings"
|
|
22
|
+
payload = json.dumps({"model": model, "input": text}).encode("utf-8")
|
|
23
|
+
req = urllib.request.Request(url, data=payload, method="POST")
|
|
24
|
+
req.add_header("Authorization", f"Bearer {api_key}")
|
|
25
|
+
req.add_header("Content-Type", "application/json")
|
|
26
|
+
try:
|
|
27
|
+
with urllib.request.urlopen(req, timeout=20) as resp:
|
|
28
|
+
data = json.loads(resp.read().decode("utf-8"))
|
|
29
|
+
except Exception as exc:
|
|
30
|
+
emit_event(LOGFILE, "brain_embedding_provider_error", status="error", provider="openai", error=str(exc))
|
|
31
|
+
return {}
|
|
32
|
+
try:
|
|
33
|
+
embedding = data["data"][0]["embedding"]
|
|
34
|
+
except Exception as exc:
|
|
35
|
+
emit_event(LOGFILE, "brain_embedding_provider_error", status="error", provider="openai", error=str(exc))
|
|
36
|
+
return {}
|
|
37
|
+
return {"embedding": embedding}
|
|
38
|
+
|
|
39
|
+
if provider_id == "ollama":
|
|
40
|
+
url = f"{config.OCMEMOG_OLLAMA_HOST.rstrip('/')}/api/embeddings"
|
|
41
|
+
payload = json.dumps({"model": model, "prompt": text}).encode("utf-8")
|
|
42
|
+
req = urllib.request.Request(url, data=payload, method="POST")
|
|
43
|
+
req.add_header("Content-Type", "application/json")
|
|
44
|
+
try:
|
|
45
|
+
with urllib.request.urlopen(req, timeout=20) as resp:
|
|
46
|
+
data = json.loads(resp.read().decode("utf-8"))
|
|
47
|
+
except Exception as exc:
|
|
48
|
+
emit_event(LOGFILE, "brain_embedding_provider_error", status="error", provider="ollama", error=str(exc))
|
|
49
|
+
return {}
|
|
50
|
+
embedding = data.get("embedding")
|
|
51
|
+
if not isinstance(embedding, list):
|
|
52
|
+
emit_event(LOGFILE, "brain_embedding_provider_error", status="error", provider="ollama", error="invalid_embedding")
|
|
53
|
+
return {}
|
|
54
|
+
return {"embedding": embedding}
|
|
55
|
+
|
|
56
|
+
return {}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
provider_execute = ProviderExecute()
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
EMAIL_RE = re.compile(r"\b[\w.+-]+@[\w.-]+\.[A-Za-z]{2,}\b")
|
|
6
|
+
PHONE_RE = re.compile(r"\b(?:\+?1[-.\s]?)?(?:\(?\d{3}\)?[-.\s]?){2}\d{4}\b")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def redact_text(text: str) -> tuple[str, bool]:
|
|
10
|
+
if not isinstance(text, str):
|
|
11
|
+
return "", False
|
|
12
|
+
redacted = EMAIL_RE.sub("[redacted-email]", text)
|
|
13
|
+
redacted = PHONE_RE.sub("[redacted-phone]", redacted)
|
|
14
|
+
return redacted, redacted != text
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from . import storage_paths
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def root_dir() -> Path:
|
|
9
|
+
return storage_paths.root_dir()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def data_dir() -> Path:
|
|
13
|
+
return storage_paths.data_dir()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def memory_dir() -> Path:
|
|
17
|
+
return storage_paths.memory_dir()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def reports_dir() -> Path:
|
|
21
|
+
return storage_paths.reports_dir()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def memory_db_path() -> Path:
|
|
25
|
+
return storage_paths.memory_db_path()
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def root_dir() -> Path:
|
|
8
|
+
configured = os.environ.get("OCMEMOG_STATE_DIR") or os.environ.get("BRAIN_STATE_DIR")
|
|
9
|
+
if configured:
|
|
10
|
+
base = Path(configured).expanduser()
|
|
11
|
+
else:
|
|
12
|
+
base = Path(__file__).resolve().parents[2] / ".ocmemog-state"
|
|
13
|
+
base.mkdir(parents=True, exist_ok=True)
|
|
14
|
+
return base
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def data_dir() -> Path:
|
|
18
|
+
path = root_dir() / "data"
|
|
19
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
20
|
+
return path
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def memory_dir() -> Path:
|
|
24
|
+
path = root_dir() / "memory"
|
|
25
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
26
|
+
return path
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def reports_dir() -> Path:
|
|
30
|
+
path = root_dir() / "reports"
|
|
31
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
32
|
+
return path
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def memory_db_path() -> Path:
|
|
36
|
+
override = os.environ.get("OCMEMOG_DB_PATH")
|
|
37
|
+
if override:
|
|
38
|
+
path = Path(override).expanduser()
|
|
39
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
40
|
+
return path
|
|
41
|
+
return memory_dir() / "brain_memory.sqlite3"
|