@simbimbo/memory-ocmemog 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,87 +1,112 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Dict, Iterable, List
3
+ import json
4
+ import os
5
+ import re
6
+ from typing import Any, Dict, List
4
7
 
5
8
  from brain.runtime.instrumentation import emit_event
6
- from brain.runtime import state_store
9
+ from brain.runtime import state_store, inference
7
10
  from brain.runtime.memory import retrieval
8
11
 
12
+ LOGFILE = state_store.reports_dir() / "brain_memory.log.jsonl"
9
13
 
10
- def build_context(
11
- prompt: str,
12
- max_context_blocks: int = 5,
13
- *,
14
- memory_queries: Iterable[str] | None = None,
15
- memory_priorities: Iterable[str] | None = None,
16
- role_id: str | None = None,
17
- ) -> Dict[str, List[str]]:
18
- emit_event(state_store.reports_dir() / "brain_memory.log.jsonl", "brain_memory_context_build_start", status="ok")
19
- queries = [query for query in (memory_queries or ()) if isinstance(query, str) and query.strip()]
20
- categories = [category for category in (memory_priorities or ()) if isinstance(category, str) and category.strip()]
21
- role_priorities: List[str] = []
22
- if role_id:
23
- try:
24
- from brain.runtime.roles import role_registry
25
- role = role_registry.get_role(role_id)
26
- role_priorities = list(role.memory_priority) if role else []
27
- except Exception:
28
- role_priorities = []
29
- combined_priorities = [*categories, *role_priorities]
30
- if queries:
31
- mem = retrieval.retrieve_for_queries(queries, categories=combined_priorities or None)
32
- else:
33
- mem = retrieval.retrieve(prompt, categories=combined_priorities or None)
34
14
 
35
- ranked_blocks: List[Dict[str, str | float]] = []
36
- for item in mem.get("knowledge", []):
37
- ranked_blocks.append(
38
- {
39
- "content": item.get("content"),
40
- "source": "knowledge",
41
- "score": float(item.get("score") or item.get("confidence") or 0.0),
42
- }
43
- )
44
- for item in mem.get("tasks", []):
45
- ranked_blocks.append(
46
- {
47
- "content": item.get("content"),
48
- "source": "tasks",
49
- "score": float(item.get("score") or item.get("confidence") or 0.0),
50
- }
51
- )
52
- if role_priorities:
53
- for item in ranked_blocks:
54
- if item.get("source") in role_priorities:
55
- item["score"] = float(item.get("score", 0.0)) + 0.2
56
- emit_event(
57
- state_store.reports_dir() / "brain_memory.log.jsonl",
58
- "brain_role_context_weighted",
59
- status="ok",
60
- role_id=role_id,
61
- priorities=len(role_priorities),
62
- )
63
- ranked_blocks.sort(key=lambda item: item.get("score", 0.0), reverse=True)
64
- if len(ranked_blocks) > max_context_blocks:
65
- ranked_blocks = ranked_blocks[:max_context_blocks]
66
- emit_event(state_store.reports_dir() / "brain_memory.log.jsonl", "brain_memory_context_trim", status="ok")
15
+ def _heuristic_queries(prompt: str, limit: int = 3) -> List[str]:
16
+ cleaned = re.sub(r"\s+", " ", prompt or "").strip()
17
+ parts = re.split(r",| and | then | also ", cleaned)
18
+ queries = []
19
+ for part in parts:
20
+ q = part.strip(" .")
21
+ if len(q) >= 8 and q.lower() not in {cleaned.lower()}:
22
+ queries.append(q)
23
+ if cleaned and cleaned not in queries:
24
+ queries.insert(0, cleaned)
25
+ deduped: List[str] = []
26
+ seen = set()
27
+ for q in queries:
28
+ key = q.lower()
29
+ if key in seen:
30
+ continue
31
+ seen.add(key)
32
+ deduped.append(q)
33
+ if len(deduped) >= limit:
34
+ break
35
+ return deduped
67
36
 
68
- context_blocks = [item["content"] for item in ranked_blocks if item.get("content")]
69
- context_scores = [item.get("score", 0.0) for item in ranked_blocks]
70
- synthesis = mem.get("synthesis", []) if isinstance(mem, dict) else []
71
- for item in synthesis[:2]:
72
- summary = item.get("summary") if isinstance(item, dict) else None
73
- if summary:
74
- context_blocks.append(str(summary))
75
37
 
76
- context = {
77
- "context_blocks": context_blocks,
78
- "context_scores": context_scores,
79
- "ranked_blocks": ranked_blocks,
80
- "knowledge": mem.get("knowledge", []),
81
- "tasks": mem.get("tasks", []),
82
- "directives": [item["content"] if isinstance(item, dict) else item for item in mem.get("directives", [])],
83
- "reflections": [item["content"] if isinstance(item, dict) else item for item in mem.get("reflections", [])],
84
- "used_queries": queries,
38
+ def _should_skip_query_grooming(prompt: str) -> bool:
39
+ cleaned = re.sub(r"\s+", " ", prompt or "").strip()
40
+ if not cleaned:
41
+ return True
42
+ if len(cleaned) <= 32 and ',' not in cleaned and ' and ' not in cleaned.lower():
43
+ return True
44
+ words = cleaned.split()
45
+ if 1 <= len(words) <= 5 and all(len(w) >= 3 for w in words):
46
+ return True
47
+ return False
48
+
49
+
50
+ def _groom_queries(prompt: str, limit: int = 3) -> List[str]:
51
+ cleaned = re.sub(r"\s+", " ", prompt or "").strip()
52
+ if not cleaned:
53
+ return []
54
+ if _should_skip_query_grooming(cleaned):
55
+ return _heuristic_queries(cleaned, limit=limit)
56
+ model = os.environ.get("OCMEMOG_PONDER_MODEL", "local-openai:qwen2.5-7b-instruct")
57
+ ask = (
58
+ "Rewrite this raw memory request into up to 3 short search queries. "
59
+ "Return strict JSON as {\"queries\":[\"...\"]}. "
60
+ "Prefer compact entity/topic phrases, not full sentences.\n\n"
61
+ f"Request: {cleaned}\n"
62
+ )
63
+ try:
64
+ result = inference.infer(ask, provider_name=model)
65
+ except Exception:
66
+ return _heuristic_queries(cleaned, limit=limit)
67
+ if result.get("status") != "ok":
68
+ return _heuristic_queries(cleaned, limit=limit)
69
+ output = str(result.get("output") or "").strip()
70
+ try:
71
+ payload = json.loads(output)
72
+ raw_queries = payload.get("queries") or []
73
+ queries = [str(q).strip() for q in raw_queries if str(q).strip()]
74
+ except Exception:
75
+ queries = []
76
+ cleaned_queries: List[str] = []
77
+ seen = set()
78
+ for q in queries:
79
+ key = q.lower()
80
+ if len(q) < 4 or key in seen:
81
+ continue
82
+ seen.add(key)
83
+ cleaned_queries.append(q)
84
+ if len(cleaned_queries) >= limit:
85
+ break
86
+ return cleaned_queries or _heuristic_queries(cleaned, limit=limit)
87
+
88
+
89
+ def build_context(prompt: str, memory_queries: List[str] | None = None, limit: int = 5) -> Dict[str, Any]:
90
+ emit_event(LOGFILE, "brain_memory_context_build_start", status="ok")
91
+ queries = memory_queries or _groom_queries(prompt, limit=3)
92
+ memories: List[Dict[str, Any]] = []
93
+ seen: set[str] = set()
94
+ for query in queries:
95
+ for item in retrieval.retrieve_memories(query, limit=limit):
96
+ ref = str(item.get("reference") or item.get("id") or "")
97
+ if ref and ref in seen:
98
+ continue
99
+ if ref:
100
+ seen.add(ref)
101
+ memories.append(item)
102
+ if len(memories) >= limit:
103
+ break
104
+ if len(memories) >= limit:
105
+ break
106
+
107
+ emit_event(LOGFILE, "brain_memory_context_build_complete", status="ok", item_count=len(memories), query_count=len(queries))
108
+ return {
109
+ "prompt": prompt,
110
+ "queries": queries,
111
+ "memories": memories,
85
112
  }
86
- emit_event(state_store.reports_dir() / "brain_memory.log.jsonl", "brain_memory_context_build_complete", status="ok")
87
- return context
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import json
4
+ import os
4
5
  import re
5
6
  from typing import Dict, Any, List
6
7
 
@@ -23,6 +24,83 @@ def _heuristic_summary(text: str) -> str:
23
24
  return lines[0][:240]
24
25
 
25
26
 
27
+ def _should_skip_local_distill(text: str) -> bool:
28
+ cleaned = _normalize(text)
29
+ if not cleaned or len(cleaned) < 24:
30
+ return True
31
+ if cleaned in {"ok", "okay", "done", "fixed", "working", "success", "positive feedback"}:
32
+ return True
33
+ return False
34
+
35
+
36
+ def _local_distill_summary(text: str) -> str:
37
+ if _should_skip_local_distill(text):
38
+ return ""
39
+ prompt = (
40
+ "Distill this experience into one concise operational summary. "
41
+ "Prefer concrete cause/effect, decision, or reusable takeaway. "
42
+ "Keep it under 220 characters. Return NONE if there is no meaningful takeaway.\n\n"
43
+ f"Experience:\n{text}\n\n"
44
+ "Summary:"
45
+ )
46
+ model = os.environ.get("OCMEMOG_PONDER_MODEL", "local-openai:qwen2.5-7b-instruct")
47
+ try:
48
+ result = inference.infer(prompt, provider_name=model)
49
+ except Exception:
50
+ return ""
51
+ if result.get("status") != "ok":
52
+ return ""
53
+ output = str(result.get("output", "")).strip()
54
+ output = re.sub(r"^(Summary|Sentence|Lesson):\s*", "", output, flags=re.IGNORECASE).strip()
55
+ if not output or output.upper().startswith("NONE"):
56
+ return ""
57
+ return output[:240]
58
+
59
+
60
+ def _frontier_distill_summary(text: str) -> str:
61
+ try:
62
+ model = model_roles.get_model_for_role("memory")
63
+ result = inference.infer(
64
+ f"Distill this experience into a concise summary:\n\n{text}".strip(),
65
+ provider_name=model,
66
+ )
67
+ if result.get("status") == "ok":
68
+ return str(result.get("output", "")).strip()[:240]
69
+ except Exception:
70
+ return ""
71
+ return ""
72
+
73
+
74
+ def _needs_frontier_refine(summary: str, source: str) -> bool:
75
+ if not summary:
76
+ return True
77
+ lowered = summary.lower().strip()
78
+ if lowered.startswith(("be ", "always ", "remember ", "good job", "be careful")):
79
+ return True
80
+ if len(summary) < 24:
81
+ return True
82
+ if len(summary) > len(source):
83
+ return True
84
+ if _normalize(summary) == _normalize(_heuristic_summary(source)):
85
+ return True
86
+ return False
87
+
88
+
89
+ def _reject_distilled_summary(summary: str, source: str) -> bool:
90
+ lowered = _normalize(summary)
91
+ if not lowered:
92
+ return True
93
+ if lowered in {"ok", "okay", "done", "fixed", "working", "positive feedback", "success", "passed"}:
94
+ return True
95
+ if len(lowered) < 16:
96
+ return True
97
+ if lowered.startswith(("good job", "be proactive", "be thorough", "always check", "always remember")):
98
+ return True
99
+ if source and lowered == _normalize(source):
100
+ return True
101
+ return False
102
+
103
+
26
104
  def _verification_points(text: str) -> List[str]:
27
105
  points = []
28
106
  if "verify" in text.lower():
@@ -81,23 +159,21 @@ def distill_experiences(limit: int = 10) -> List[Dict[str, Any]]:
81
159
  experience_metadata = {}
82
160
  content, _ = redaction.redact_text(content)
83
161
 
84
- summary = ""
85
- try:
86
- model = model_roles.get_model_for_role("memory")
87
- result = inference.infer(
88
- f"Distill this experience into a concise summary:\n\n{content}".strip(),
89
- provider_name=model,
90
- )
91
- if result.get("status") == "ok":
92
- summary = str(result.get("output", "")).strip()
93
- except Exception:
94
- summary = ""
162
+ heuristic_summary = _heuristic_summary(content)
163
+ summary = _local_distill_summary(content)
164
+ if _needs_frontier_refine(summary, content):
165
+ refined = _frontier_distill_summary(content)
166
+ if refined:
167
+ summary = refined
95
168
 
96
169
  if not summary or len(summary) > len(content):
97
- summary = _heuristic_summary(content)
170
+ summary = heuristic_summary
98
171
 
99
172
  summary, _ = redaction.redact_text(summary)
100
173
  norm = _normalize(summary)
174
+ if _reject_distilled_summary(summary, content):
175
+ emit_event(state_store.reports_dir() / "brain_memory.log.jsonl", "brain_memory_distill_rejected", status="ok")
176
+ continue
101
177
  if not norm or norm in seen:
102
178
  emit_event(state_store.reports_dir() / "brain_memory.log.jsonl", "brain_memory_distill_rejected", status="ok")
103
179
  continue
@@ -153,8 +229,17 @@ def distill_artifact(artifact: Dict[str, Any]) -> List[Dict[str, Any]]:
153
229
  return []
154
230
 
155
231
  text, _ = redaction.redact_text(text)
156
- summary = _heuristic_summary(text)
232
+ summary = _local_distill_summary(text)
233
+ if _needs_frontier_refine(summary, text):
234
+ refined = _frontier_distill_summary(text)
235
+ if refined:
236
+ summary = refined
237
+ if not summary or len(summary) > len(text):
238
+ summary = _heuristic_summary(text)
157
239
  summary, _ = redaction.redact_text(summary)
240
+ if _reject_distilled_summary(summary, text):
241
+ emit_event(state_store.reports_dir() / "brain_memory.log.jsonl", "brain_memory_distill_rejected", status="ok")
242
+ return []
158
243
  norm = _normalize(summary)
159
244
  if not norm:
160
245
  emit_event(state_store.reports_dir() / "brain_memory.log.jsonl", "brain_memory_distill_rejected", status="ok")
@@ -196,3 +281,61 @@ def distill_artifact(artifact: Dict[str, Any]) -> List[Dict[str, Any]]:
196
281
  "duplicate": candidate_result.get("duplicate"),
197
282
  "provenance": provenance.preview_from_metadata(candidate_metadata),
198
283
  }]
284
+
285
+ candidate_metadata = provenance.normalize_metadata(
286
+ {
287
+ "compression_ratio": round(ratio, 3),
288
+ "artifact_id": artifact.get("artifact_id"),
289
+ "derived_via": "artifact_distill",
290
+ "kind": "distilled_candidate",
291
+ "source_labels": ["artifact"],
292
+ }
293
+ )
294
+ candidate_result = candidate.create_candidate(
295
+ source_event_id=0,
296
+ distilled_summary=summary,
297
+ verification_points=verification,
298
+ confidence_score=score,
299
+ metadata=candidate_metadata,
300
+ )
301
+
302
+ emit_event(state_store.reports_dir() / "brain_memory.log.jsonl", "brain_memory_distill_success", status="ok")
303
+ return [{
304
+ "source_event_id": 0,
305
+ "distilled_summary": summary,
306
+ "verification_points": verification,
307
+ "confidence_score": score,
308
+ "compression_ratio": round(ratio, 3),
309
+ "candidate_id": candidate_result.get("candidate_id"),
310
+ "duplicate": candidate_result.get("duplicate"),
311
+ "provenance": provenance.preview_from_metadata(candidate_metadata),
312
+ }]
313
+
314
+ candidate_metadata = provenance.normalize_metadata(
315
+ {
316
+ "compression_ratio": round(ratio, 3),
317
+ "artifact_id": artifact.get("artifact_id"),
318
+ "derived_via": "artifact_distill",
319
+ "kind": "distilled_candidate",
320
+ "source_labels": ["artifact"],
321
+ }
322
+ )
323
+ candidate_result = candidate.create_candidate(
324
+ source_event_id=0,
325
+ distilled_summary=summary,
326
+ verification_points=verification,
327
+ confidence_score=score,
328
+ metadata=candidate_metadata,
329
+ )
330
+
331
+ emit_event(state_store.reports_dir() / "brain_memory.log.jsonl", "brain_memory_distill_success", status="ok")
332
+ return [{
333
+ "source_event_id": 0,
334
+ "distilled_summary": summary,
335
+ "verification_points": verification,
336
+ "confidence_score": score,
337
+ "compression_ratio": round(ratio, 3),
338
+ "candidate_id": candidate_result.get("candidate_id"),
339
+ "duplicate": candidate_result.get("duplicate"),
340
+ "provenance": provenance.preview_from_metadata(candidate_metadata),
341
+ }]
@@ -297,6 +297,8 @@ def _refine_unresolved_summary(summary: str, reference: str = "") -> str:
297
297
  raw = _heuristic_summary(summary, limit=500)
298
298
  if not _needs_unresolved_refine(raw):
299
299
  return _heuristic_summary(raw)
300
+ if raw and not raw.startswith(("#", "*", "1)", "2)", "TODO:")) and len(raw.split()) >= 5:
301
+ return _heuristic_summary(raw, limit=180)
300
302
  prompt = (
301
303
  "Rewrite this unresolved item as one concise actionable unresolved summary. "
302
304
  "Keep it under 180 characters. Focus on the decision, blocker, or next action. "
@@ -140,6 +140,12 @@ def promote_candidate(candidate: Dict[str, Any]) -> Dict[str, Any]:
140
140
  emit_event(LOGFILE, "brain_memory_reinforcement_created", status="ok")
141
141
  if memory_id:
142
142
  vector_index.insert_memory(memory_id, candidate.get("distilled_summary", ""), confidence)
143
+ try:
144
+ from brain.runtime.memory import api as memory_api
145
+
146
+ memory_api._auto_attach_governance_candidates(promoted_reference)
147
+ except Exception as exc:
148
+ emit_event(LOGFILE, "brain_memory_promotion_governance_failed", status="error", error=str(exc), reference=promoted_reference)
143
149
 
144
150
  return {"decision": decision, "confidence": confidence, "promotion_id": promotion_id, "destination": destination}
145
151
 
@@ -106,6 +106,16 @@ def normalize_metadata(metadata: Optional[Dict[str, Any]], *, source: Optional[s
106
106
  "derived_from_promotion_id",
107
107
  "derived_via",
108
108
  "kind",
109
+ "memory_status",
110
+ "superseded_by",
111
+ "supersedes",
112
+ "duplicate_of",
113
+ "duplicate_candidates",
114
+ "contradicts",
115
+ "contradiction_candidates",
116
+ "contradiction_status",
117
+ "canonical_reference",
118
+ "supersession_recommendation",
109
119
  ):
110
120
  if raw.get(key) is not None and provenance.get(key) is None:
111
121
  provenance[key] = raw.get(key)
@@ -177,6 +187,20 @@ def apply_links(reference: str, metadata: Optional[Dict[str, Any]]) -> None:
177
187
  _link_once(reference, "candidate", f"candidate:{provenance['derived_from_candidate_id']}")
178
188
  if provenance.get("derived_from_promotion_id"):
179
189
  _link_once(reference, "promotion", f"promotions:{provenance['derived_from_promotion_id']}")
190
+ if provenance.get("superseded_by"):
191
+ _link_once(reference, "superseded_by", str(provenance.get("superseded_by")))
192
+ if provenance.get("supersedes"):
193
+ _link_once(reference, "supersedes", str(provenance.get("supersedes")))
194
+ if provenance.get("duplicate_of"):
195
+ _link_once(reference, "duplicate_of", str(provenance.get("duplicate_of")))
196
+ for candidate in provenance.get("duplicate_candidates") or []:
197
+ _link_once(reference, "duplicate_candidate", str(candidate))
198
+ for target in provenance.get("contradicts") or []:
199
+ _link_once(reference, "contradicts", str(target))
200
+ for target in provenance.get("contradiction_candidates") or []:
201
+ _link_once(reference, "contradiction_candidate", str(target))
202
+ if provenance.get("canonical_reference"):
203
+ _link_once(reference, "canonical", str(provenance.get("canonical_reference")))
180
204
 
181
205
 
182
206
  def update_memory_metadata(reference: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]]:
@@ -201,6 +225,34 @@ def update_memory_metadata(reference: str, updates: Dict[str, Any]) -> Optional[
201
225
  return merged
202
226
 
203
227
 
228
+ def force_update_memory_metadata(reference: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]]:
229
+ table, sep, raw_id = reference.partition(":")
230
+ if not sep or table not in _MEMORY_TABLES or not raw_id.isdigit():
231
+ return None
232
+ conn = store.connect()
233
+ try:
234
+ row = conn.execute(f"SELECT metadata_json FROM {table} WHERE id = ?", (int(raw_id),)).fetchone()
235
+ if not row:
236
+ return None
237
+ current = _load_json(row["metadata_json"], {})
238
+ provenance_meta = current.get("provenance") if isinstance(current.get("provenance"), dict) else {}
239
+ for key, value in updates.items():
240
+ if value is None or value == "":
241
+ provenance_meta.pop(key, None)
242
+ else:
243
+ provenance_meta[key] = value
244
+ current["provenance"] = provenance_meta
245
+ conn.execute(
246
+ f"UPDATE {table} SET metadata_json = ? WHERE id = ?",
247
+ (json.dumps(current, ensure_ascii=False), int(raw_id)),
248
+ )
249
+ conn.commit()
250
+ finally:
251
+ conn.close()
252
+ apply_links(reference, current)
253
+ return current
254
+
255
+
204
256
  def fetch_reference(reference: str) -> Optional[Dict[str, Any]]:
205
257
  prefix, sep, raw_id = reference.partition(":")
206
258
  if not sep or not prefix: