@simbimbo/memory-ocmemog 0.1.10 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +30 -0
- package/README.md +85 -18
- package/brain/runtime/__init__.py +2 -12
- package/brain/runtime/config.py +1 -24
- package/brain/runtime/inference.py +1 -151
- package/brain/runtime/instrumentation.py +1 -15
- package/brain/runtime/memory/__init__.py +3 -13
- package/brain/runtime/memory/api.py +1 -1219
- package/brain/runtime/memory/candidate.py +1 -185
- package/brain/runtime/memory/conversation_state.py +1 -1823
- package/brain/runtime/memory/distill.py +1 -344
- package/brain/runtime/memory/embedding_engine.py +1 -92
- package/brain/runtime/memory/freshness.py +1 -112
- package/brain/runtime/memory/health.py +1 -40
- package/brain/runtime/memory/integrity.py +1 -186
- package/brain/runtime/memory/memory_consolidation.py +1 -58
- package/brain/runtime/memory/memory_links.py +1 -107
- package/brain/runtime/memory/memory_salience.py +1 -233
- package/brain/runtime/memory/memory_synthesis.py +1 -31
- package/brain/runtime/memory/memory_taxonomy.py +1 -33
- package/brain/runtime/memory/pondering_engine.py +1 -654
- package/brain/runtime/memory/promote.py +1 -277
- package/brain/runtime/memory/provenance.py +1 -406
- package/brain/runtime/memory/reinforcement.py +1 -71
- package/brain/runtime/memory/retrieval.py +1 -210
- package/brain/runtime/memory/semantic_search.py +1 -64
- package/brain/runtime/memory/store.py +1 -429
- package/brain/runtime/memory/unresolved_state.py +1 -91
- package/brain/runtime/memory/vector_index.py +1 -323
- package/brain/runtime/model_roles.py +1 -9
- package/brain/runtime/model_router.py +1 -22
- package/brain/runtime/providers.py +1 -66
- package/brain/runtime/security/redaction.py +1 -12
- package/brain/runtime/state_store.py +1 -23
- package/brain/runtime/storage_paths.py +1 -39
- package/docs/architecture/memory.md +20 -24
- package/docs/release-checklist.md +19 -6
- package/docs/usage.md +33 -17
- package/index.ts +8 -1
- package/ocmemog/__init__.py +11 -0
- package/ocmemog/doctor.py +1255 -0
- package/ocmemog/runtime/__init__.py +18 -0
- package/ocmemog/runtime/_compat_bridge.py +28 -0
- package/ocmemog/runtime/config.py +35 -0
- package/ocmemog/runtime/identity.py +115 -0
- package/ocmemog/runtime/inference.py +164 -0
- package/ocmemog/runtime/instrumentation.py +20 -0
- package/ocmemog/runtime/memory/__init__.py +91 -0
- package/ocmemog/runtime/memory/api.py +1431 -0
- package/ocmemog/runtime/memory/candidate.py +192 -0
- package/ocmemog/runtime/memory/conversation_state.py +1831 -0
- package/ocmemog/runtime/memory/distill.py +282 -0
- package/ocmemog/runtime/memory/embedding_engine.py +151 -0
- package/ocmemog/runtime/memory/freshness.py +114 -0
- package/ocmemog/runtime/memory/health.py +57 -0
- package/ocmemog/runtime/memory/integrity.py +208 -0
- package/ocmemog/runtime/memory/memory_consolidation.py +60 -0
- package/ocmemog/runtime/memory/memory_links.py +109 -0
- package/ocmemog/runtime/memory/memory_salience.py +235 -0
- package/ocmemog/runtime/memory/memory_synthesis.py +33 -0
- package/ocmemog/runtime/memory/memory_taxonomy.py +35 -0
- package/ocmemog/runtime/memory/pondering_engine.py +681 -0
- package/ocmemog/runtime/memory/promote.py +279 -0
- package/ocmemog/runtime/memory/provenance.py +408 -0
- package/ocmemog/runtime/memory/reinforcement.py +73 -0
- package/ocmemog/runtime/memory/retrieval.py +224 -0
- package/ocmemog/runtime/memory/semantic_search.py +66 -0
- package/ocmemog/runtime/memory/store.py +433 -0
- package/ocmemog/runtime/memory/unresolved_state.py +93 -0
- package/ocmemog/runtime/memory/vector_index.py +411 -0
- package/ocmemog/runtime/model_roles.py +16 -0
- package/ocmemog/runtime/model_router.py +29 -0
- package/ocmemog/runtime/providers.py +79 -0
- package/ocmemog/runtime/roles.py +92 -0
- package/ocmemog/runtime/security/__init__.py +8 -0
- package/ocmemog/runtime/security/redaction.py +17 -0
- package/ocmemog/runtime/state_store.py +34 -0
- package/ocmemog/runtime/storage_paths.py +70 -0
- package/ocmemog/sidecar/app.py +311 -23
- package/ocmemog/sidecar/compat.py +50 -13
- package/ocmemog/sidecar/transcript_watcher.py +391 -190
- package/openclaw.plugin.json +4 -0
- package/package.json +1 -1
- package/scripts/ocmemog-backfill-vectors.py +5 -3
- package/scripts/ocmemog-continuity-benchmark.py +1 -1
- package/scripts/ocmemog-demo.py +1 -1
- package/scripts/ocmemog-doctor.py +15 -0
- package/scripts/ocmemog-install.sh +29 -7
- package/scripts/ocmemog-integrated-proof.py +373 -0
- package/scripts/ocmemog-reindex-vectors.py +5 -3
- package/scripts/ocmemog-release-check.sh +330 -0
- package/scripts/ocmemog-sidecar.sh +4 -2
- package/scripts/ocmemog-test-rig.py +5 -3
- package/brain/runtime/memory/artifacts.py +0 -33
- package/brain/runtime/memory/context_builder.py +0 -112
- package/brain/runtime/memory/interaction_memory.py +0 -57
- package/brain/runtime/memory/memory_gate.py +0 -38
- package/brain/runtime/memory/memory_graph.py +0 -54
- package/brain/runtime/memory/person_identity.py +0 -83
- package/brain/runtime/memory/person_memory.py +0 -138
- package/brain/runtime/memory/sentiment_memory.py +0 -67
- package/brain/runtime/memory/tool_catalog.py +0 -68
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
from typing import Any, Dict, List
|
|
7
|
+
|
|
8
|
+
from ocmemog.runtime import inference, model_roles, state_store
|
|
9
|
+
from ocmemog.runtime.instrumentation import emit_event
|
|
10
|
+
from ocmemog.runtime.memory import candidate, provenance, store
|
|
11
|
+
from ocmemog.runtime.security import redaction
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _normalize(text: str) -> str:
|
|
15
|
+
return re.sub(r"\s+", " ", text.lower()).strip()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _heuristic_summary(text: str) -> str:
|
|
19
|
+
lines = [line.strip() for line in text.splitlines() if line.strip()]
|
|
20
|
+
if not lines:
|
|
21
|
+
return ""
|
|
22
|
+
return lines[0][:240]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _should_skip_local_distill(text: str) -> bool:
|
|
26
|
+
cleaned = _normalize(text)
|
|
27
|
+
if not cleaned or len(cleaned) < 24:
|
|
28
|
+
return True
|
|
29
|
+
if cleaned in {"ok", "okay", "done", "fixed", "working", "success", "positive feedback"}:
|
|
30
|
+
return True
|
|
31
|
+
return False
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _local_distill_summary(text: str) -> str:
|
|
35
|
+
if _should_skip_local_distill(text):
|
|
36
|
+
return ""
|
|
37
|
+
prompt = (
|
|
38
|
+
"Distill this experience into one concise operational summary. "
|
|
39
|
+
"Prefer concrete cause/effect, decision, or reusable takeaway. "
|
|
40
|
+
"Keep it under 220 characters. Return NONE if there is no meaningful takeaway.\n\n"
|
|
41
|
+
f"Experience:\n{text}\n\n"
|
|
42
|
+
"Summary:"
|
|
43
|
+
)
|
|
44
|
+
model = os.environ.get("OCMEMOG_PONDER_MODEL", "local-openai:qwen2.5-7b-instruct")
|
|
45
|
+
try:
|
|
46
|
+
result = inference.infer(prompt, provider_name=model)
|
|
47
|
+
except Exception:
|
|
48
|
+
return ""
|
|
49
|
+
if result.get("status") != "ok":
|
|
50
|
+
return ""
|
|
51
|
+
output = str(result.get("output", "")).strip()
|
|
52
|
+
output = re.sub(r"^(Summary|Sentence|Lesson):\s*", "", output, flags=re.IGNORECASE).strip()
|
|
53
|
+
if not output or output.upper().startswith("NONE"):
|
|
54
|
+
return ""
|
|
55
|
+
return output[:240]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _frontier_distill_summary(text: str) -> str:
|
|
59
|
+
try:
|
|
60
|
+
model = model_roles.get_model_for_role("memory")
|
|
61
|
+
result = inference.infer(
|
|
62
|
+
f"Distill this experience into a concise summary:\n\n{text}".strip(),
|
|
63
|
+
provider_name=model,
|
|
64
|
+
)
|
|
65
|
+
if result.get("status") == "ok":
|
|
66
|
+
return str(result.get("output", "")).strip()[:240]
|
|
67
|
+
except Exception:
|
|
68
|
+
return ""
|
|
69
|
+
return ""
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _needs_frontier_refine(summary: str, source: str) -> bool:
|
|
73
|
+
if not summary:
|
|
74
|
+
return True
|
|
75
|
+
lowered = summary.lower().strip()
|
|
76
|
+
if lowered.startswith(("be ", "always ", "remember ", "good job", "be careful")):
|
|
77
|
+
return True
|
|
78
|
+
if len(summary) < 24:
|
|
79
|
+
return True
|
|
80
|
+
if len(summary) > len(source):
|
|
81
|
+
return True
|
|
82
|
+
if _normalize(summary) == _normalize(_heuristic_summary(source)):
|
|
83
|
+
return True
|
|
84
|
+
return False
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _reject_distilled_summary(summary: str, source: str) -> bool:
|
|
88
|
+
lowered = _normalize(summary)
|
|
89
|
+
if not lowered:
|
|
90
|
+
return True
|
|
91
|
+
if lowered in {"ok", "okay", "done", "fixed", "working", "positive feedback", "success", "passed"}:
|
|
92
|
+
return True
|
|
93
|
+
if len(lowered) < 16:
|
|
94
|
+
return True
|
|
95
|
+
if lowered.startswith(("good job", "be proactive", "be thorough", "always check", "always remember")):
|
|
96
|
+
return True
|
|
97
|
+
if source and lowered == _normalize(source):
|
|
98
|
+
compact_source = re.sub(r"\s+", " ", str(source or "")).strip()
|
|
99
|
+
if "\n" in compact_source or len(compact_source) > 120:
|
|
100
|
+
return True
|
|
101
|
+
return False
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _verification_points(text: str) -> List[str]:
|
|
105
|
+
points = []
|
|
106
|
+
if "verify" in text.lower():
|
|
107
|
+
points.append("Verify referenced assumptions")
|
|
108
|
+
if "risk" in text.lower():
|
|
109
|
+
points.append("Validate risk and mitigation")
|
|
110
|
+
if not points:
|
|
111
|
+
points.append("Confirm key facts before promotion")
|
|
112
|
+
return points[:3]
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _candidate_score(summary: str, source: str) -> float:
|
|
116
|
+
if not source:
|
|
117
|
+
return 0.0
|
|
118
|
+
ratio = len(summary) / max(1, len(source))
|
|
119
|
+
score = 1.0 - min(1.0, ratio * 0.5)
|
|
120
|
+
return round(max(0.1, min(1.0, score)), 3)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _row_value(row: Any, key: str, fallback_index: int | None = None) -> Any:
|
|
124
|
+
if isinstance(row, dict):
|
|
125
|
+
return row.get(key)
|
|
126
|
+
try:
|
|
127
|
+
return row[key]
|
|
128
|
+
except Exception:
|
|
129
|
+
if fallback_index is None:
|
|
130
|
+
return None
|
|
131
|
+
try:
|
|
132
|
+
return row[fallback_index]
|
|
133
|
+
except Exception:
|
|
134
|
+
return None
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def distill_experiences(limit: int = 10) -> List[Dict[str, Any]]:
|
|
138
|
+
emit_event(state_store.report_log_path(), "brain_memory_distill_start", status="ok")
|
|
139
|
+
conn = store.connect()
|
|
140
|
+
rows = conn.execute(
|
|
141
|
+
"SELECT id, task_id, outcome, source_module, metadata_json FROM experiences ORDER BY id DESC LIMIT ?",
|
|
142
|
+
(limit,),
|
|
143
|
+
).fetchall()
|
|
144
|
+
conn.close()
|
|
145
|
+
|
|
146
|
+
distilled: List[Dict[str, Any]] = []
|
|
147
|
+
seen = set()
|
|
148
|
+
|
|
149
|
+
for row in rows:
|
|
150
|
+
source_id = _row_value(row, "id", 0)
|
|
151
|
+
task_id = _row_value(row, "task_id", 1)
|
|
152
|
+
content = _row_value(row, "outcome", 2) or ""
|
|
153
|
+
source_module = _row_value(row, "source_module", 3)
|
|
154
|
+
raw_metadata = _row_value(row, "metadata_json", 4) or "{}"
|
|
155
|
+
try:
|
|
156
|
+
experience_metadata = json.loads(raw_metadata) if isinstance(raw_metadata, str) else dict(raw_metadata or {})
|
|
157
|
+
except Exception:
|
|
158
|
+
experience_metadata = {}
|
|
159
|
+
content, _ = redaction.redact_text(content)
|
|
160
|
+
|
|
161
|
+
heuristic_summary = _heuristic_summary(content)
|
|
162
|
+
summary = _local_distill_summary(content)
|
|
163
|
+
if _needs_frontier_refine(summary, content):
|
|
164
|
+
refined = _frontier_distill_summary(content)
|
|
165
|
+
if refined:
|
|
166
|
+
summary = refined
|
|
167
|
+
|
|
168
|
+
if not summary or len(summary) > len(content):
|
|
169
|
+
summary = heuristic_summary
|
|
170
|
+
|
|
171
|
+
summary, _ = redaction.redact_text(summary)
|
|
172
|
+
norm = _normalize(summary)
|
|
173
|
+
if _reject_distilled_summary(summary, content):
|
|
174
|
+
emit_event(state_store.report_log_path(), "brain_memory_distill_rejected", status="ok")
|
|
175
|
+
continue
|
|
176
|
+
if not norm or norm in seen:
|
|
177
|
+
emit_event(state_store.report_log_path(), "brain_memory_distill_rejected", status="ok")
|
|
178
|
+
continue
|
|
179
|
+
|
|
180
|
+
seen.add(norm)
|
|
181
|
+
verification = _verification_points(content)
|
|
182
|
+
score = _candidate_score(summary, content)
|
|
183
|
+
ratio = len(summary) / max(1, len(content))
|
|
184
|
+
|
|
185
|
+
if score <= 0.1:
|
|
186
|
+
emit_event(state_store.report_log_path(), "brain_memory_distill_rejected", status="ok")
|
|
187
|
+
continue
|
|
188
|
+
|
|
189
|
+
candidate_metadata = provenance.normalize_metadata(
|
|
190
|
+
{
|
|
191
|
+
**experience_metadata,
|
|
192
|
+
"compression_ratio": round(ratio, 3),
|
|
193
|
+
"task_id": task_id,
|
|
194
|
+
"source_event_id": source_id,
|
|
195
|
+
"experience_reference": f"experiences:{source_id}",
|
|
196
|
+
"derived_via": "distill",
|
|
197
|
+
"kind": "distilled_candidate",
|
|
198
|
+
"source_labels": [*(experience_metadata.get("source_labels") or []), *( [source_module] if source_module else [])],
|
|
199
|
+
},
|
|
200
|
+
source=source_module,
|
|
201
|
+
)
|
|
202
|
+
candidate_result = candidate.create_candidate(
|
|
203
|
+
source_event_id=source_id,
|
|
204
|
+
distilled_summary=summary,
|
|
205
|
+
verification_points=verification,
|
|
206
|
+
confidence_score=score,
|
|
207
|
+
metadata=candidate_metadata,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
distilled.append({
|
|
211
|
+
"source_event_id": source_id,
|
|
212
|
+
"distilled_summary": summary,
|
|
213
|
+
"verification_points": verification,
|
|
214
|
+
"confidence_score": score,
|
|
215
|
+
"compression_ratio": round(ratio, 3),
|
|
216
|
+
"candidate_id": candidate_result.get("candidate_id"),
|
|
217
|
+
"duplicate": candidate_result.get("duplicate"),
|
|
218
|
+
"provenance": provenance.preview_from_metadata(candidate_metadata),
|
|
219
|
+
})
|
|
220
|
+
emit_event(state_store.report_log_path(), "brain_memory_distill_success", status="ok")
|
|
221
|
+
|
|
222
|
+
return distilled
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def distill_artifact(artifact: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
226
|
+
text = artifact.get("content_text", "")
|
|
227
|
+
if not isinstance(text, str) or not text.strip():
|
|
228
|
+
return []
|
|
229
|
+
|
|
230
|
+
text, _ = redaction.redact_text(text)
|
|
231
|
+
summary = _local_distill_summary(text)
|
|
232
|
+
if _needs_frontier_refine(summary, text):
|
|
233
|
+
refined = _frontier_distill_summary(text)
|
|
234
|
+
if refined:
|
|
235
|
+
summary = refined
|
|
236
|
+
if not summary or len(summary) > len(text):
|
|
237
|
+
summary = _heuristic_summary(text)
|
|
238
|
+
summary, _ = redaction.redact_text(summary)
|
|
239
|
+
if _reject_distilled_summary(summary, text):
|
|
240
|
+
emit_event(state_store.report_log_path(), "brain_memory_distill_rejected", status="ok")
|
|
241
|
+
return []
|
|
242
|
+
norm = _normalize(summary)
|
|
243
|
+
if not norm:
|
|
244
|
+
emit_event(state_store.report_log_path(), "brain_memory_distill_rejected", status="ok")
|
|
245
|
+
return []
|
|
246
|
+
|
|
247
|
+
verification = _verification_points(text)
|
|
248
|
+
score = _candidate_score(summary, text)
|
|
249
|
+
ratio = len(summary) / max(1, len(text))
|
|
250
|
+
|
|
251
|
+
if score <= 0.1:
|
|
252
|
+
emit_event(state_store.report_log_path(), "brain_memory_distill_rejected", status="ok")
|
|
253
|
+
return []
|
|
254
|
+
|
|
255
|
+
artifact_metadata = provenance.normalize_metadata(
|
|
256
|
+
{
|
|
257
|
+
"compression_ratio": round(ratio, 3),
|
|
258
|
+
"artifact_id": artifact.get("artifact_id"),
|
|
259
|
+
"derived_via": "artifact_distill",
|
|
260
|
+
"kind": "distilled_candidate",
|
|
261
|
+
"source_labels": ["artifact"],
|
|
262
|
+
}
|
|
263
|
+
)
|
|
264
|
+
candidate_result = candidate.create_candidate(
|
|
265
|
+
source_event_id=0,
|
|
266
|
+
distilled_summary=summary,
|
|
267
|
+
verification_points=verification,
|
|
268
|
+
confidence_score=score,
|
|
269
|
+
metadata=artifact_metadata,
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
emit_event(state_store.report_log_path(), "brain_memory_distill_success", status="ok")
|
|
273
|
+
return [{
|
|
274
|
+
"source_event_id": 0,
|
|
275
|
+
"distilled_summary": summary,
|
|
276
|
+
"verification_points": verification,
|
|
277
|
+
"confidence_score": score,
|
|
278
|
+
"compression_ratio": round(ratio, 3),
|
|
279
|
+
"candidate_id": candidate_result.get("candidate_id"),
|
|
280
|
+
"duplicate": candidate_result.get("duplicate"),
|
|
281
|
+
"provenance": provenance.preview_from_metadata(artifact_metadata),
|
|
282
|
+
}]
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
from typing import List, Any
|
|
5
|
+
|
|
6
|
+
from ocmemog.runtime import config, model_router, state_store
|
|
7
|
+
from ocmemog.runtime.instrumentation import emit_event
|
|
8
|
+
from ocmemog.runtime.providers import provider_execute
|
|
9
|
+
|
|
10
|
+
LOGFILE = state_store.report_log_path()
|
|
11
|
+
_MODEL_CACHE: dict[str, Any] = {}
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _local_embedding(text: str, local_model: str) -> List[float] | None:
|
|
15
|
+
if local_model in {"simple", "hash"}:
|
|
16
|
+
return _simple_embedding(text)
|
|
17
|
+
model = _load_sentence_transformer(local_model)
|
|
18
|
+
if model is None:
|
|
19
|
+
return None
|
|
20
|
+
return [float(x) for x in model.encode([text])[0]]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _simple_embedding(text: str, dims: int = 8) -> List[float]:
|
|
24
|
+
digest = hashlib.sha256(text.encode("utf-8")).digest()
|
|
25
|
+
values = [digest[i] / 255.0 for i in range(dims)]
|
|
26
|
+
return values
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _load_sentence_transformer(model_name: str) -> Any | None:
|
|
30
|
+
if model_name in _MODEL_CACHE:
|
|
31
|
+
return _MODEL_CACHE[model_name]
|
|
32
|
+
try:
|
|
33
|
+
from sentence_transformers import SentenceTransformer
|
|
34
|
+
except Exception:
|
|
35
|
+
return None
|
|
36
|
+
model = SentenceTransformer(model_name)
|
|
37
|
+
_MODEL_CACHE[model_name] = model
|
|
38
|
+
return model
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _provider_embedding(text: str, model_name: str) -> tuple[List[float] | None, dict[str, str]]:
|
|
42
|
+
selection = model_router.get_provider_for_role("embedding")
|
|
43
|
+
if not selection.provider_id:
|
|
44
|
+
return None, {}
|
|
45
|
+
response = provider_execute.execute_embedding_call(selection, text)
|
|
46
|
+
embedding = response.get("embedding") if isinstance(response, dict) else None
|
|
47
|
+
meta = {
|
|
48
|
+
"provider_id": str(getattr(selection, "provider_id", "") or ""),
|
|
49
|
+
"model": str(model_name or getattr(selection, "model", "") or ""),
|
|
50
|
+
}
|
|
51
|
+
if isinstance(embedding, list):
|
|
52
|
+
return [float(x) for x in embedding], meta
|
|
53
|
+
return None, meta
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def generate_embedding(
|
|
57
|
+
text: str,
|
|
58
|
+
*,
|
|
59
|
+
skip_provider: bool = False,
|
|
60
|
+
) -> List[float] | None:
|
|
61
|
+
emit_event(LOGFILE, "brain_embedding_start", status="ok")
|
|
62
|
+
if not isinstance(text, str) or not text.strip():
|
|
63
|
+
emit_event(LOGFILE, "brain_embedding_failed", status="error", reason="empty_text")
|
|
64
|
+
return None
|
|
65
|
+
local_model = str(
|
|
66
|
+
getattr(config, "OCMEMOG_EMBED_MODEL_LOCAL", "")
|
|
67
|
+
or getattr(config, "BRAIN_EMBED_MODEL_LOCAL", getattr(config, "OCMEMOG_EMBED_LOCAL_MODEL", "simple"))
|
|
68
|
+
or ""
|
|
69
|
+
)
|
|
70
|
+
provider_model = (
|
|
71
|
+
getattr(config, "OCMEMOG_EMBED_PROVIDER", "")
|
|
72
|
+
or getattr(config, "OCMEMOG_EMBED_MODEL_PROVIDER", "")
|
|
73
|
+
or getattr(config, "BRAIN_EMBED_MODEL_PROVIDER", "")
|
|
74
|
+
)
|
|
75
|
+
embedding: List[float] | None = None
|
|
76
|
+
provider_meta: dict[str, str] = {}
|
|
77
|
+
|
|
78
|
+
if provider_model and not skip_provider:
|
|
79
|
+
try:
|
|
80
|
+
embedding, provider_meta = _provider_embedding(text, provider_model)
|
|
81
|
+
except TimeoutError as exc:
|
|
82
|
+
emit_event(
|
|
83
|
+
LOGFILE,
|
|
84
|
+
"brain_embedding_failed",
|
|
85
|
+
status="error",
|
|
86
|
+
reason="provider_timeout",
|
|
87
|
+
provider=provider_model,
|
|
88
|
+
model=provider_model,
|
|
89
|
+
error=str(exc),
|
|
90
|
+
)
|
|
91
|
+
if not local_model:
|
|
92
|
+
raise
|
|
93
|
+
embedding, provider_meta = None, {}
|
|
94
|
+
except Exception as exc:
|
|
95
|
+
emit_event(
|
|
96
|
+
LOGFILE,
|
|
97
|
+
"brain_embedding_failed",
|
|
98
|
+
status="error",
|
|
99
|
+
reason="provider_error",
|
|
100
|
+
provider=provider_model,
|
|
101
|
+
model=provider_model,
|
|
102
|
+
error=str(exc),
|
|
103
|
+
)
|
|
104
|
+
embedding, provider_meta = None, {}
|
|
105
|
+
if not embedding:
|
|
106
|
+
emit_event(
|
|
107
|
+
LOGFILE,
|
|
108
|
+
"brain_embedding_failed",
|
|
109
|
+
status="error",
|
|
110
|
+
reason="provider_no_embedding",
|
|
111
|
+
provider=provider_model,
|
|
112
|
+
model=provider_meta.get("model", ""),
|
|
113
|
+
fallback="local" if local_model else "disabled",
|
|
114
|
+
)
|
|
115
|
+
elif embedding:
|
|
116
|
+
emit_event(
|
|
117
|
+
LOGFILE,
|
|
118
|
+
"brain_embedding_complete",
|
|
119
|
+
status="ok",
|
|
120
|
+
provider="provider",
|
|
121
|
+
provider_id=provider_meta.get("provider_id", ""),
|
|
122
|
+
model=provider_meta.get("model", ""),
|
|
123
|
+
)
|
|
124
|
+
emit_event(
|
|
125
|
+
LOGFILE,
|
|
126
|
+
"brain_embedding_generated",
|
|
127
|
+
status="ok",
|
|
128
|
+
provider="provider",
|
|
129
|
+
dimensions=len(embedding),
|
|
130
|
+
provider_id=provider_meta.get("provider_id", ""),
|
|
131
|
+
model=provider_meta.get("model", ""),
|
|
132
|
+
)
|
|
133
|
+
return embedding
|
|
134
|
+
elif provider_model:
|
|
135
|
+
emit_event(
|
|
136
|
+
LOGFILE,
|
|
137
|
+
"brain_embedding_start",
|
|
138
|
+
status="ok",
|
|
139
|
+
provider=provider_model,
|
|
140
|
+
provider_skipped="true",
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
if local_model:
|
|
144
|
+
embedding = _local_embedding(text, local_model)
|
|
145
|
+
if embedding:
|
|
146
|
+
provider = "local_simple" if local_model in {"simple", "hash"} else "local_model"
|
|
147
|
+
emit_event(LOGFILE, "brain_embedding_complete", status="ok", provider=provider)
|
|
148
|
+
emit_event(LOGFILE, "brain_embedding_generated", status="ok", provider=provider, dimensions=len(embedding))
|
|
149
|
+
return embedding
|
|
150
|
+
emit_event(LOGFILE, "brain_embedding_failed", status="error", reason="no_embedding")
|
|
151
|
+
return None
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from typing import Any, Dict, List
|
|
5
|
+
|
|
6
|
+
from ocmemog.runtime import state_store
|
|
7
|
+
from ocmemog.runtime.instrumentation import emit_event
|
|
8
|
+
from ocmemog.runtime.memory import store
|
|
9
|
+
|
|
10
|
+
DEFAULT_STALE_DAYS = 30
|
|
11
|
+
DEFAULT_CONFIDENCE_THRESHOLD = 0.6
|
|
12
|
+
DEFAULT_LIMIT = 25
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
_BAD_SUMMARY_PREFIXES = (
|
|
16
|
+
"promoted",
|
|
17
|
+
"candidate_promoted",
|
|
18
|
+
"no local memory summary available",
|
|
19
|
+
"summary",
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _summary_from_content(content: Any, limit: int = 120) -> str:
|
|
24
|
+
text = str(content or "").strip()
|
|
25
|
+
if not text:
|
|
26
|
+
return "(empty memory content)"
|
|
27
|
+
lines = [line.strip() for line in text.splitlines() if line.strip()]
|
|
28
|
+
for line in lines:
|
|
29
|
+
lowered = line.lower()
|
|
30
|
+
if lowered in _BAD_SUMMARY_PREFIXES:
|
|
31
|
+
continue
|
|
32
|
+
if any(lowered.startswith(prefix + ":") for prefix in _BAD_SUMMARY_PREFIXES):
|
|
33
|
+
continue
|
|
34
|
+
return line[:limit]
|
|
35
|
+
return "(needs summary cleanup)"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def scan_freshness(
|
|
39
|
+
stale_days: int = DEFAULT_STALE_DAYS,
|
|
40
|
+
confidence_threshold: float = DEFAULT_CONFIDENCE_THRESHOLD,
|
|
41
|
+
limit: int = DEFAULT_LIMIT,
|
|
42
|
+
) -> Dict[str, Any]:
|
|
43
|
+
emit_event(
|
|
44
|
+
state_store.report_log_path(),
|
|
45
|
+
"brain_memory_freshness_scan_start",
|
|
46
|
+
status="ok",
|
|
47
|
+
stale_days=stale_days,
|
|
48
|
+
confidence_threshold=confidence_threshold,
|
|
49
|
+
)
|
|
50
|
+
conn = store.connect()
|
|
51
|
+
stale_rows = conn.execute(
|
|
52
|
+
"""
|
|
53
|
+
SELECT 'knowledge' AS memory_type, id, timestamp, confidence, content
|
|
54
|
+
FROM knowledge
|
|
55
|
+
WHERE timestamp <= datetime('now', ?)
|
|
56
|
+
ORDER BY timestamp ASC
|
|
57
|
+
LIMIT ?
|
|
58
|
+
""",
|
|
59
|
+
(f"-{max(1, stale_days)} days", limit),
|
|
60
|
+
).fetchall()
|
|
61
|
+
low_conf_rows = conn.execute(
|
|
62
|
+
"""
|
|
63
|
+
SELECT 'knowledge' AS memory_type, id, timestamp, confidence, content
|
|
64
|
+
FROM knowledge
|
|
65
|
+
WHERE confidence < ?
|
|
66
|
+
ORDER BY confidence ASC, timestamp ASC
|
|
67
|
+
LIMIT ?
|
|
68
|
+
""",
|
|
69
|
+
(confidence_threshold, limit),
|
|
70
|
+
).fetchall()
|
|
71
|
+
conn.close()
|
|
72
|
+
advisories: List[Dict[str, Any]] = []
|
|
73
|
+
refresh_candidates: List[Dict[str, Any]] = []
|
|
74
|
+
now_ts = time.time()
|
|
75
|
+
for category, rows in (("stale", stale_rows), ("low_confidence", low_conf_rows)):
|
|
76
|
+
for row in rows:
|
|
77
|
+
age_seconds = 0.0
|
|
78
|
+
if row["timestamp"]:
|
|
79
|
+
try:
|
|
80
|
+
age_seconds = max(0.0, now_ts - time.mktime(time.strptime(row["timestamp"], "%Y-%m-%d %H:%M:%S")))
|
|
81
|
+
except Exception:
|
|
82
|
+
age_seconds = 0.0
|
|
83
|
+
confidence = float(row["confidence"] or 0.0)
|
|
84
|
+
freshness_score = max(0.0, 1.0 - min(age_seconds / (stale_days * 86400), 1.0)) * (0.5 + confidence / 2.0)
|
|
85
|
+
refresh_recommended = freshness_score < 0.4 or category == "stale"
|
|
86
|
+
entry = {
|
|
87
|
+
"category": category,
|
|
88
|
+
"memory_type": row["memory_type"],
|
|
89
|
+
"memory_id": row["id"],
|
|
90
|
+
"timestamp": row["timestamp"],
|
|
91
|
+
"confidence": confidence,
|
|
92
|
+
"summary": _summary_from_content(row["content"]),
|
|
93
|
+
"freshness_score": round(freshness_score, 3),
|
|
94
|
+
"refresh_recommended": refresh_recommended,
|
|
95
|
+
}
|
|
96
|
+
advisories.append(entry)
|
|
97
|
+
refresh_candidates.append(entry)
|
|
98
|
+
emit_event(
|
|
99
|
+
state_store.report_log_path(),
|
|
100
|
+
"brain_memory_freshness_scan_complete",
|
|
101
|
+
status="ok",
|
|
102
|
+
advisory_count=len(advisories),
|
|
103
|
+
refresh_candidates=len(refresh_candidates),
|
|
104
|
+
)
|
|
105
|
+
return {
|
|
106
|
+
"ok": True,
|
|
107
|
+
"advisory_only": True,
|
|
108
|
+
"advisories": advisories,
|
|
109
|
+
"refresh_candidates": refresh_candidates,
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def freshness_weight(score: float) -> float:
|
|
114
|
+
return max(0.0, min(1.0, float(score)))
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict
|
|
4
|
+
|
|
5
|
+
from ocmemog.runtime.memory import integrity, store
|
|
6
|
+
|
|
7
|
+
EMBED_TABLES = tuple(store.MEMORY_TABLES)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_memory_health() -> Dict[str, Any]:
|
|
11
|
+
conn = store.connect()
|
|
12
|
+
counts: Dict[str, int] = {}
|
|
13
|
+
for table in ["experiences", "candidates", "promotions", "memory_index", *store.MEMORY_TABLES, "vector_embeddings"]:
|
|
14
|
+
try:
|
|
15
|
+
counts[table] = conn.execute(f"SELECT COUNT(*) FROM {table}").fetchone()[0]
|
|
16
|
+
except Exception:
|
|
17
|
+
counts[table] = 0
|
|
18
|
+
|
|
19
|
+
vector_index_count = 0
|
|
20
|
+
vector_covered_sources = 0
|
|
21
|
+
try:
|
|
22
|
+
for table in EMBED_TABLES:
|
|
23
|
+
vector_index_count += conn.execute(
|
|
24
|
+
"SELECT COUNT(*) FROM vector_embeddings WHERE source_type=?",
|
|
25
|
+
(table,),
|
|
26
|
+
).fetchone()[0]
|
|
27
|
+
vector_covered_sources += conn.execute(
|
|
28
|
+
f"""
|
|
29
|
+
SELECT COUNT(*) FROM {table} AS source
|
|
30
|
+
WHERE EXISTS (
|
|
31
|
+
SELECT 1
|
|
32
|
+
FROM vector_embeddings AS embeddings
|
|
33
|
+
WHERE embeddings.source_type = ?
|
|
34
|
+
AND CAST(embeddings.source_id AS TEXT) = CAST(source.id AS TEXT)
|
|
35
|
+
)
|
|
36
|
+
""",
|
|
37
|
+
(table,),
|
|
38
|
+
).fetchone()[0]
|
|
39
|
+
except Exception:
|
|
40
|
+
vector_covered_sources = 0
|
|
41
|
+
vector_index_count = 0
|
|
42
|
+
|
|
43
|
+
total_embed_sources = sum(counts.get(table, 0) for table in EMBED_TABLES)
|
|
44
|
+
conn.close()
|
|
45
|
+
integrity_result = integrity.run_integrity_check()
|
|
46
|
+
|
|
47
|
+
coverage = 0.0
|
|
48
|
+
if total_embed_sources:
|
|
49
|
+
coverage = round(vector_covered_sources / total_embed_sources, 3)
|
|
50
|
+
|
|
51
|
+
return {
|
|
52
|
+
"counts": counts,
|
|
53
|
+
"vector_index_count": vector_index_count,
|
|
54
|
+
"vector_index_coverage": coverage,
|
|
55
|
+
"vector_index_integrity_status": integrity_result.get("ok"),
|
|
56
|
+
"integrity": integrity_result,
|
|
57
|
+
}
|