@simbimbo/memory-ocmemog 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,26 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.1.9 — 2026-03-19
4
+
5
+ Memory quality, governance, and review release.
6
+
7
+ ### Highlights
8
+ - added near-duplicate collapse for transcript/session double-ingest candidate generation
9
+ - added conservative reflection reclassification and new durable buckets for `preferences` and `identity`
10
+ - wired new buckets through storage, retrieval, embeddings, health, integrity, and promotion/demotion paths
11
+ - hardened governance auto-promotion for duplicates and supersessions with stricter thresholds and guardrails
12
+ - added governance review endpoints plus dashboard review panel with filters and approve/reject actions
13
+ - fixed release-blocking distill fallback behavior in no-model environments and removed stale hard-coded bucket drift
14
+
15
+ ## 0.1.8 — 2026-03-19
16
+
17
+ Documentation and release follow-through after the llama.cpp migration and repo grooming pass.
18
+
19
+ ### Highlights
20
+ - documented the stable local runtime architecture (gateway/sidecar/text/embed split)
21
+ - published the repo in a llama.cpp-first state with fixed ports and cleaned installers/scripts
22
+ - kept compatibility hooks only where still useful instead of leaving Ollama as the implied primary path
23
+
3
24
  ## 0.1.7 — 2026-03-19
4
25
 
5
26
  llama.cpp-first cleanup after the 0.1.6 runtime cutover.
package/README.md CHANGED
@@ -14,6 +14,9 @@ Architecture at a glance:
14
14
  - **FastAPI sidecar (`ocmemog/sidecar/`)** exposes memory and continuity APIs
15
15
  - **SQLite-backed runtime (`brain/runtime/memory/`)** powers storage, hydration, checkpoints, salience ranking, and pondering
16
16
 
17
+ Current local runtime architecture note:
18
+ - `docs/architecture/local-runtime-2026-03-19.md`
19
+
17
20
  ## Repo layout
18
21
 
19
22
  - `openclaw.plugin.json`, `index.ts`, `package.json`: OpenClaw plugin package and manifest.
@@ -9,6 +9,27 @@ from brain.runtime import inference
9
9
  from brain.runtime.instrumentation import emit_event
10
10
  from brain.runtime.security import redaction
11
11
 
12
+ _REVIEW_KIND_METADATA: Dict[str, Dict[str, str]] = {
13
+ "duplicate_candidate": {
14
+ "relationship": "duplicate_of",
15
+ "label": "Duplicate candidate",
16
+ "approve_label": "Approve duplicate merge",
17
+ "reject_label": "Reject duplicate merge",
18
+ },
19
+ "contradiction_candidate": {
20
+ "relationship": "contradicts",
21
+ "label": "Contradiction candidate",
22
+ "approve_label": "Mark as contradiction",
23
+ "reject_label": "Dismiss contradiction",
24
+ },
25
+ "supersession_recommendation": {
26
+ "relationship": "supersedes",
27
+ "label": "Supersession recommendation",
28
+ "approve_label": "Approve supersession",
29
+ "reject_label": "Dismiss supersession",
30
+ },
31
+ }
32
+
12
33
 
13
34
  def _sanitize(text: str) -> str:
14
35
  redacted, _ = redaction.redact_text(text)
@@ -72,8 +93,6 @@ def _recommend_supersession_from_contradictions(
72
93
 
73
94
  signal_threshold = float(os.environ.get("OCMEMOG_GOVERNANCE_SUPERSESSION_RECOMMEND_SIGNAL", "0.9") or 0.9)
74
95
  model_conf_threshold = float(os.environ.get("OCMEMOG_GOVERNANCE_SUPERSESSION_MODEL_CONFIDENCE", "0.9") or 0.9)
75
- auto_apply = os.environ.get("OCMEMOG_GOVERNANCE_AUTOPROMOTE_SUPERSESSION", "false").strip().lower() in {"1", "true", "yes"}
76
-
77
96
  ranked = sorted(contradiction_candidates, key=lambda item: float(item.get("signal") or 0.0), reverse=True)
78
97
  top = ranked[0]
79
98
  signal = float(top.get("signal") or 0.0)
@@ -105,28 +124,38 @@ def _recommend_supersession_from_contradictions(
105
124
  "model_hint": model_hint,
106
125
  })
107
126
 
108
- if auto_apply:
109
- merged = mark_memory_relationship(reference, relationship="supersedes", target_reference=target, status="active")
110
- recommendation["auto_applied"] = merged is not None
111
- recommendation["reason"] = "auto_applied_supersession" if merged is not None else "auto_apply_failed"
112
-
113
127
  return recommendation
114
128
 
115
129
 
116
- def _auto_promote_governance_candidates(
130
+ def _canonicalize_duplicate_target(reference: str) -> str:
131
+ payload = provenance.fetch_reference(reference) or {}
132
+ metadata = payload.get("metadata") or {}
133
+ prov = metadata.get("provenance") if isinstance(metadata.get("provenance"), dict) else {}
134
+ canonical = str(prov.get("canonical_reference") or prov.get("duplicate_of") or reference).strip()
135
+ return canonical or reference
136
+
137
+
138
+ def _token_signature(text: str) -> frozenset[str]:
139
+ return frozenset(_tokenize(text))
140
+
141
+
142
+ def _auto_promote_duplicate_candidate(
117
143
  reference: str,
118
144
  *,
119
145
  duplicate_candidates: List[Dict[str, Any]],
120
146
  contradiction_candidates: List[Dict[str, Any]],
121
147
  ) -> Dict[str, Any]:
122
148
  auto_promote_enabled = os.environ.get("OCMEMOG_GOVERNANCE_AUTOPROMOTE", "true").strip().lower() in {"1", "true", "yes"}
123
- duplicate_threshold = float(os.environ.get("OCMEMOG_GOVERNANCE_DUPLICATE_AUTOPROMOTE_SIMILARITY", "0.92") or 0.92)
149
+ allow_with_contradictions = os.environ.get("OCMEMOG_GOVERNANCE_AUTOPROMOTE_ALLOW_CONTRADICTIONS", "false").strip().lower() in {"1", "true", "yes"}
150
+ duplicate_threshold = float(os.environ.get("OCMEMOG_GOVERNANCE_DUPLICATE_AUTOPROMOTE_SIMILARITY", "0.98") or 0.98)
151
+ duplicate_margin = float(os.environ.get("OCMEMOG_GOVERNANCE_DUPLICATE_AUTOPROMOTE_MARGIN", "0.02") or 0.02)
152
+ require_exact_tokens = os.environ.get("OCMEMOG_GOVERNANCE_DUPLICATE_AUTOPROMOTE_REQUIRE_EXACT_TOKENS", "true").strip().lower() in {"1", "true", "yes"}
124
153
  promoted: Dict[str, Any] = {"duplicate_of": None, "promoted": False, "reason": "disabled" if not auto_promote_enabled else "none"}
125
154
 
126
155
  if not auto_promote_enabled:
127
156
  return promoted
128
157
 
129
- if contradiction_candidates:
158
+ if contradiction_candidates and not allow_with_contradictions:
130
159
  promoted["reason"] = "blocked_by_contradiction_candidates"
131
160
  return promoted
132
161
 
@@ -134,13 +163,29 @@ def _auto_promote_governance_candidates(
134
163
  promoted["reason"] = "no_duplicate_candidates"
135
164
  return promoted
136
165
 
137
- top = sorted(duplicate_candidates, key=lambda item: float(item.get("similarity") or 0.0), reverse=True)[0]
166
+ payload = provenance.fetch_reference(reference) or {}
167
+ reference_content = str(payload.get("content") or "")
168
+ reference_signature = _token_signature(reference_content)
169
+ ranked = sorted(duplicate_candidates, key=lambda item: float(item.get("similarity") or 0.0), reverse=True)
170
+ top = ranked[0]
138
171
  similarity = float(top.get("similarity") or 0.0)
139
- target = str(top.get("reference") or "")
140
- if not target or similarity < duplicate_threshold:
172
+ target = _canonicalize_duplicate_target(str(top.get("reference") or ""))
173
+ if not target or target == reference or similarity < duplicate_threshold:
141
174
  promoted["reason"] = "similarity_below_threshold"
142
175
  return promoted
143
176
 
177
+ if len(ranked) > 1:
178
+ runner_up = float(ranked[1].get("similarity") or 0.0)
179
+ if similarity - runner_up < duplicate_margin:
180
+ promoted["reason"] = "ambiguous_duplicate_candidates"
181
+ return promoted
182
+
183
+ target_payload = provenance.fetch_reference(target) or {}
184
+ target_content = str(target_payload.get("content") or "")
185
+ if require_exact_tokens and _token_signature(target_content) != reference_signature:
186
+ promoted["reason"] = "token_signature_mismatch"
187
+ return promoted
188
+
144
189
  merged = mark_memory_relationship(reference, relationship="duplicate_of", target_reference=target, status="duplicate")
145
190
  promoted.update({
146
191
  "duplicate_of": target,
@@ -151,17 +196,70 @@ def _auto_promote_governance_candidates(
151
196
  return promoted
152
197
 
153
198
 
199
+ def _auto_apply_supersession_recommendation(
200
+ reference: str,
201
+ *,
202
+ contradiction_candidates: List[Dict[str, Any]],
203
+ supersession_recommendation: Dict[str, Any],
204
+ ) -> Dict[str, Any]:
205
+ recommendation = dict(supersession_recommendation or {})
206
+ if not recommendation:
207
+ return {"recommended": False, "auto_applied": False, "reason": "missing_recommendation", "target_reference": None, "signal": 0.0}
208
+
209
+ auto_apply = os.environ.get("OCMEMOG_GOVERNANCE_AUTOPROMOTE_SUPERSESSION", "false").strip().lower() in {"1", "true", "yes"}
210
+ allow_with_contradictions = os.environ.get("OCMEMOG_GOVERNANCE_AUTOPROMOTE_ALLOW_CONTRADICTIONS", "false").strip().lower() in {"1", "true", "yes"}
211
+ auto_apply_signal = float(os.environ.get("OCMEMOG_GOVERNANCE_SUPERSESSION_AUTOPROMOTE_SIGNAL", "0.97") or 0.97)
212
+ model_conf_threshold = float(os.environ.get("OCMEMOG_GOVERNANCE_SUPERSESSION_AUTOPROMOTE_MODEL_CONFIDENCE", "0.97") or 0.97)
213
+
214
+ recommendation.setdefault("auto_applied", False)
215
+ if not recommendation.get("recommended"):
216
+ recommendation["reason"] = recommendation.get("reason") or "not_recommended"
217
+ return recommendation
218
+
219
+ if not auto_apply:
220
+ return recommendation
221
+
222
+ if contradiction_candidates and not allow_with_contradictions:
223
+ recommendation["reason"] = "blocked_by_contradiction_candidates"
224
+ return recommendation
225
+
226
+ signal = float(recommendation.get("signal") or 0.0)
227
+ if signal < auto_apply_signal:
228
+ recommendation["reason"] = "signal_below_autopromote_threshold"
229
+ return recommendation
230
+
231
+ model_hint = recommendation.get("model_hint") if isinstance(recommendation.get("model_hint"), dict) else {}
232
+ if not model_hint or not model_hint.get("contradiction") or float(model_hint.get("confidence") or 0.0) < model_conf_threshold:
233
+ recommendation["reason"] = "model_hint_below_autopromote_threshold"
234
+ return recommendation
235
+
236
+ target = str(recommendation.get("target_reference") or "").strip()
237
+ if not target or target == reference:
238
+ recommendation["reason"] = "missing_target"
239
+ return recommendation
240
+
241
+ merged = mark_memory_relationship(reference, relationship="supersedes", target_reference=target, status="active")
242
+ recommendation["auto_applied"] = merged is not None
243
+ recommendation["reason"] = "auto_applied_supersession" if merged is not None else "auto_apply_failed"
244
+ return recommendation
245
+
246
+
154
247
  def _auto_attach_governance_candidates(reference: str) -> Dict[str, Any]:
155
248
  duplicate_candidates = find_duplicate_candidates(reference, limit=5, min_similarity=0.72)
156
249
  contradiction_candidates = find_contradiction_candidates(reference, limit=5, min_signal=0.55, use_model=True)
157
- auto_promotion = _auto_promote_governance_candidates(
250
+ supersession_recommendation = _recommend_supersession_from_contradictions(
251
+ reference,
252
+ contradiction_candidates=contradiction_candidates,
253
+ )
254
+ auto_promotion = _auto_promote_duplicate_candidate(
158
255
  reference,
159
256
  duplicate_candidates=duplicate_candidates,
160
257
  contradiction_candidates=contradiction_candidates,
161
258
  )
162
- supersession_recommendation = _recommend_supersession_from_contradictions(
259
+ supersession_recommendation = _auto_apply_supersession_recommendation(
163
260
  reference,
164
261
  contradiction_candidates=contradiction_candidates,
262
+ supersession_recommendation=supersession_recommendation,
165
263
  )
166
264
  payload = {
167
265
  "duplicate_candidates": [item.get("reference") for item in duplicate_candidates if item.get("reference")],
@@ -196,7 +294,7 @@ def store_memory(
196
294
  ) -> int:
197
295
  content = _sanitize(content)
198
296
  table = memory_type.strip().lower() if memory_type else "knowledge"
199
- allowed = {"knowledge", "reflections", "directives", "tasks", "runbooks", "lessons"}
297
+ allowed = set(store.MEMORY_TABLES)
200
298
  if table not in allowed:
201
299
  table = "knowledge"
202
300
  normalized_metadata = provenance.normalize_metadata(metadata, source=source)
@@ -344,7 +442,7 @@ def find_duplicate_candidates(
344
442
  payload = provenance.fetch_reference(reference) or {}
345
443
  table = str(payload.get("table") or payload.get("type") or "")
346
444
  content = str(payload.get("content") or "")
347
- if table not in {"knowledge", "reflections", "directives", "tasks", "runbooks", "lessons"}:
445
+ if table not in set(store.MEMORY_TABLES):
348
446
  return []
349
447
  row_id = payload.get("id")
350
448
  conn = store.connect()
@@ -395,7 +493,7 @@ def find_contradiction_candidates(
395
493
  payload = provenance.fetch_reference(reference) or {}
396
494
  table = str(payload.get("table") or payload.get("type") or "")
397
495
  content = str(payload.get("content") or "")
398
- if table not in {"knowledge", "reflections", "directives", "tasks", "runbooks", "lessons"}:
496
+ if table not in set(store.MEMORY_TABLES):
399
497
  return []
400
498
  row_id = payload.get("id")
401
499
  conn = store.connect()
@@ -494,7 +592,7 @@ def list_governance_candidates(
494
592
  categories: Optional[List[str]] = None,
495
593
  limit: int = 50,
496
594
  ) -> List[Dict[str, Any]]:
497
- allowed = {"knowledge", "reflections", "directives", "tasks", "runbooks", "lessons"}
595
+ allowed = set(store.MEMORY_TABLES)
498
596
  tables = [table for table in (categories or list(allowed)) if table in allowed]
499
597
  conn = store.connect()
500
598
  try:
@@ -532,6 +630,95 @@ def _remove_from_list(values: Any, target: str) -> List[str]:
532
630
  return [str(item) for item in (values or []) if str(item) and str(item) != target]
533
631
 
534
632
 
633
+ def _review_item_context(reference: str, *, depth: int = 1) -> Dict[str, Any]:
634
+ payload = provenance.hydrate_reference(reference, depth=depth) or {"reference": reference}
635
+ metadata = payload.get("metadata") if isinstance(payload.get("metadata"), dict) else {}
636
+ prov = metadata.get("provenance") if isinstance(metadata.get("provenance"), dict) else {}
637
+ return {
638
+ "reference": reference,
639
+ "bucket": payload.get("table"),
640
+ "id": payload.get("id"),
641
+ "timestamp": payload.get("timestamp"),
642
+ "content": payload.get("content"),
643
+ "memory_status": prov.get("memory_status") or metadata.get("memory_status") or "active",
644
+ "provenance_preview": payload.get("provenance_preview") or provenance.preview_from_metadata(metadata),
645
+ "metadata": metadata,
646
+ "links": payload.get("links") or [],
647
+ "backlinks": payload.get("backlinks") or [],
648
+ }
649
+
650
+
651
+ def _review_item_summary(kind: str, reference: str, target_reference: str) -> str:
652
+ if kind == "duplicate_candidate":
653
+ return f"{reference} may duplicate {target_reference}"
654
+ if kind == "contradiction_candidate":
655
+ return f"{reference} may contradict {target_reference}"
656
+ if kind == "supersession_recommendation":
657
+ return f"{reference} may supersede {target_reference}"
658
+ return f"{reference} requires review against {target_reference}"
659
+
660
+
661
+ def _review_actions(kind: str, relationship: str) -> List[Dict[str, Any]]:
662
+ meta = _REVIEW_KIND_METADATA.get(kind, {})
663
+ return [
664
+ {
665
+ "decision": "approve",
666
+ "approved": True,
667
+ "relationship": relationship,
668
+ "label": meta.get("approve_label") or "Approve",
669
+ },
670
+ {
671
+ "decision": "reject",
672
+ "approved": False,
673
+ "relationship": relationship,
674
+ "label": meta.get("reject_label") or "Reject",
675
+ },
676
+ ]
677
+
678
+
679
+ def _relationship_for_review(kind: str | None = None, relationship: str | None = None) -> str:
680
+ resolved = (relationship or "").strip().lower()
681
+ if resolved:
682
+ return resolved
683
+ kind_key = (kind or "").strip().lower()
684
+ return _REVIEW_KIND_METADATA.get(kind_key, {}).get("relationship", "")
685
+
686
+
687
+ def list_governance_review_items(
688
+ *,
689
+ categories: Optional[List[str]] = None,
690
+ limit: int = 100,
691
+ context_depth: int = 1,
692
+ ) -> List[Dict[str, Any]]:
693
+ items = governance_queue(categories=categories, limit=limit)
694
+ review_items: List[Dict[str, Any]] = []
695
+ for item in items:
696
+ kind = str(item.get("kind") or "")
697
+ relationship = _relationship_for_review(kind=kind)
698
+ reference = str(item.get("reference") or "")
699
+ target_reference = str(item.get("target_reference") or "")
700
+ if not reference or not target_reference or not relationship:
701
+ continue
702
+ review_items.append({
703
+ "review_id": f"{kind}:{reference}->{target_reference}",
704
+ "kind": kind,
705
+ "kind_label": _REVIEW_KIND_METADATA.get(kind, {}).get("label") or kind.replace("_", " "),
706
+ "relationship": relationship,
707
+ "priority": int(item.get("priority") or 0),
708
+ "timestamp": item.get("timestamp"),
709
+ "bucket": item.get("bucket"),
710
+ "signal": float(item.get("signal") or 0.0),
711
+ "reason": item.get("reason"),
712
+ "reference": reference,
713
+ "target_reference": target_reference,
714
+ "summary": _review_item_summary(kind, reference, target_reference),
715
+ "actions": _review_actions(kind, relationship),
716
+ "source": _review_item_context(reference, depth=context_depth),
717
+ "target": _review_item_context(target_reference, depth=context_depth),
718
+ })
719
+ return review_items
720
+
721
+
535
722
  def apply_governance_decision(
536
723
  reference: str,
537
724
  *,
@@ -541,7 +728,26 @@ def apply_governance_decision(
541
728
  ) -> Dict[str, Any] | None:
542
729
  relationship = (relationship or "").strip().lower()
543
730
  if approved:
544
- return mark_memory_relationship(reference, relationship=relationship, target_reference=target_reference)
731
+ merged = mark_memory_relationship(reference, relationship=relationship, target_reference=target_reference)
732
+ if merged is None:
733
+ return None
734
+ updates: Dict[str, Any] = {}
735
+ if relationship == "duplicate_of":
736
+ current = provenance.fetch_reference(reference) or {}
737
+ metadata = current.get("metadata") or {}
738
+ prov = metadata.get("provenance") if isinstance(metadata.get("provenance"), dict) else {}
739
+ updates["duplicate_candidates"] = _remove_from_list(prov.get("duplicate_candidates"), target_reference)
740
+ elif relationship == "contradicts":
741
+ current = provenance.fetch_reference(reference) or {}
742
+ metadata = current.get("metadata") or {}
743
+ prov = metadata.get("provenance") if isinstance(metadata.get("provenance"), dict) else {}
744
+ updates["contradiction_candidates"] = _remove_from_list(prov.get("contradiction_candidates"), target_reference)
745
+ elif relationship == "supersedes":
746
+ updates["supersession_recommendation"] = None
747
+ if updates:
748
+ merged = provenance.force_update_memory_metadata(reference, updates) or merged
749
+ _emit(f"apply_governance_decision_{relationship}_approved")
750
+ return merged
545
751
 
546
752
  current = provenance.fetch_reference(reference) or {}
547
753
  metadata = current.get("metadata") or {}
@@ -552,14 +758,55 @@ def apply_governance_decision(
552
758
  elif relationship == "contradicts":
553
759
  updates["contradiction_candidates"] = _remove_from_list(prov.get("contradiction_candidates"), target_reference)
554
760
  elif relationship == "supersedes":
761
+ recommendation = prov.get("supersession_recommendation") if isinstance(prov.get("supersession_recommendation"), dict) else {}
762
+ if not recommendation or str(recommendation.get("target_reference") or "") == target_reference:
763
+ updates["supersession_recommendation"] = None
555
764
  updates["supersedes"] = None
556
765
  else:
557
766
  return None
558
- merged = provenance.update_memory_metadata(reference, updates)
767
+ merged = provenance.force_update_memory_metadata(reference, updates)
559
768
  _emit(f"apply_governance_decision_{relationship}_{'approved' if approved else 'rejected'}")
560
769
  return merged
561
770
 
562
771
 
772
+ def apply_governance_review_decision(
773
+ reference: str,
774
+ *,
775
+ target_reference: str,
776
+ approved: bool = True,
777
+ kind: str | None = None,
778
+ relationship: str | None = None,
779
+ context_depth: int = 1,
780
+ ) -> Dict[str, Any] | None:
781
+ resolved_relationship = _relationship_for_review(kind=kind, relationship=relationship)
782
+ if not resolved_relationship:
783
+ return None
784
+ result = apply_governance_decision(
785
+ reference,
786
+ relationship=resolved_relationship,
787
+ target_reference=target_reference,
788
+ approved=approved,
789
+ )
790
+ if result is None:
791
+ return None
792
+ resolved_kind = (kind or "").strip().lower()
793
+ if not resolved_kind:
794
+ for candidate_kind, meta in _REVIEW_KIND_METADATA.items():
795
+ if meta.get("relationship") == resolved_relationship:
796
+ resolved_kind = candidate_kind
797
+ break
798
+ return {
799
+ "reference": reference,
800
+ "target_reference": target_reference,
801
+ "approved": bool(approved),
802
+ "kind": resolved_kind or None,
803
+ "relationship": resolved_relationship,
804
+ "result": result,
805
+ "source": _review_item_context(reference, depth=context_depth),
806
+ "target": _review_item_context(target_reference, depth=context_depth),
807
+ }
808
+
809
+
563
810
  def rollback_governance_decision(
564
811
  reference: str,
565
812
  *,
@@ -617,7 +864,7 @@ def rollback_governance_decision(
617
864
 
618
865
 
619
866
  def governance_queue(*, categories: Optional[List[str]] = None, limit: int = 100) -> List[Dict[str, Any]]:
620
- allowed = {"knowledge", "reflections", "directives", "tasks", "runbooks", "lessons"}
867
+ allowed = set(store.MEMORY_TABLES)
621
868
  tables = [table for table in (categories or list(allowed)) if table in allowed]
622
869
  conn = store.connect()
623
870
  try:
@@ -883,7 +1130,7 @@ def governance_audit(*, limit: int = 100, kinds: Optional[List[str]] = None) ->
883
1130
 
884
1131
 
885
1132
  def governance_summary(*, categories: Optional[List[str]] = None) -> Dict[str, Any]:
886
- allowed = {"knowledge", "reflections", "directives", "tasks", "runbooks", "lessons"}
1133
+ allowed = set(store.MEMORY_TABLES)
887
1134
  tables = [table for table in (categories or list(allowed)) if table in allowed]
888
1135
  conn = store.connect()
889
1136
  try:
@@ -2,6 +2,8 @@ from __future__ import annotations
2
2
 
3
3
  import uuid
4
4
  import json
5
+ import re
6
+ from difflib import SequenceMatcher
5
7
  from typing import Dict, Any
6
8
 
7
9
  from brain.runtime.instrumentation import emit_event
@@ -11,6 +13,96 @@ from brain.runtime.security import redaction
11
13
 
12
14
 
13
15
  LOGFILE = state_store.reports_dir() / "brain_memory.log.jsonl"
16
+ _NEAR_DUPLICATE_SIMILARITY = 0.85
17
+
18
+
19
+ def _normalize_summary(text: str) -> str:
20
+ return re.sub(r"\s+", " ", str(text or "").strip().lower())
21
+
22
+
23
+ def _tokenize(text: str) -> set[str]:
24
+ return {token for token in re.findall(r"[a-z0-9]+", _normalize_summary(text))}
25
+
26
+
27
+ def _summary_similarity(left: str, right: str) -> float:
28
+ left_tokens = _tokenize(left)
29
+ right_tokens = _tokenize(right)
30
+ token_similarity = 0.0
31
+ if left_tokens and right_tokens:
32
+ overlap = len(left_tokens & right_tokens)
33
+ union = len(left_tokens | right_tokens)
34
+ token_similarity = overlap / max(1, union)
35
+ sequence_similarity = SequenceMatcher(None, _normalize_summary(left), _normalize_summary(right)).ratio()
36
+ return max(token_similarity, sequence_similarity)
37
+
38
+
39
+ def _ranges_overlap(left: Dict[str, Any], right: Dict[str, Any]) -> bool:
40
+ if str(left.get("path") or "") != str(right.get("path") or ""):
41
+ return False
42
+
43
+ def _as_int(value: Any) -> int | None:
44
+ try:
45
+ return int(value) if value is not None else None
46
+ except Exception:
47
+ return None
48
+
49
+ left_start = _as_int(left.get("start_line"))
50
+ left_end = _as_int(left.get("end_line")) or left_start
51
+ right_start = _as_int(right.get("start_line"))
52
+ right_end = _as_int(right.get("end_line")) or right_start
53
+
54
+ if left_start is None and right_start is None:
55
+ return True
56
+ if left_start is None or right_start is None:
57
+ return False
58
+ return max(left_start, right_start) <= min(left_end or left_start, right_end or right_start)
59
+
60
+
61
+ def _shares_provenance_anchor(left: Dict[str, Any], right: Dict[str, Any]) -> bool:
62
+ left_meta = provenance.normalize_metadata(left)
63
+ right_meta = provenance.normalize_metadata(right)
64
+ left_prov = left_meta.get("provenance") if isinstance(left_meta.get("provenance"), dict) else {}
65
+ right_prov = right_meta.get("provenance") if isinstance(right_meta.get("provenance"), dict) else {}
66
+
67
+ left_conv = left_prov.get("conversation") if isinstance(left_prov.get("conversation"), dict) else {}
68
+ right_conv = right_prov.get("conversation") if isinstance(right_prov.get("conversation"), dict) else {}
69
+ if left_conv.get("message_id") and left_conv.get("message_id") == right_conv.get("message_id"):
70
+ return True
71
+
72
+ left_transcript = left_prov.get("transcript_anchor") if isinstance(left_prov.get("transcript_anchor"), dict) else {}
73
+ right_transcript = right_prov.get("transcript_anchor") if isinstance(right_prov.get("transcript_anchor"), dict) else {}
74
+ if left_transcript.get("path") and right_transcript.get("path") and _ranges_overlap(left_transcript, right_transcript):
75
+ return True
76
+
77
+ left_refs = {str(item) for item in left_prov.get("source_references") or [] if str(item).strip()}
78
+ right_refs = {str(item) for item in right_prov.get("source_references") or [] if str(item).strip()}
79
+ return bool(left_refs & right_refs)
80
+
81
+
82
+ def _find_near_duplicate_candidate(conn, source_event_id: int, summary: str, metadata: Dict[str, Any]) -> str | None:
83
+ rows = conn.execute(
84
+ """
85
+ SELECT candidate_id, distilled_summary, metadata_json
86
+ FROM candidates
87
+ WHERE source_event_id != ?
88
+ ORDER BY created_at DESC, candidate_id DESC
89
+ LIMIT 250
90
+ """,
91
+ (source_event_id,),
92
+ ).fetchall()
93
+ normalized_summary = _normalize_summary(summary)
94
+ for row in rows:
95
+ existing_summary = str(row["distilled_summary"] if isinstance(row, dict) else row[1] or "")
96
+ similarity = _summary_similarity(normalized_summary, existing_summary)
97
+ if similarity < _NEAR_DUPLICATE_SIMILARITY:
98
+ continue
99
+ try:
100
+ existing_metadata = json.loads(row["metadata_json"] if isinstance(row, dict) else row[2] or "{}")
101
+ except Exception:
102
+ existing_metadata = {}
103
+ if _shares_provenance_anchor(metadata, existing_metadata):
104
+ return str(row["candidate_id"] if isinstance(row, dict) else row[0])
105
+ return None
14
106
 
15
107
 
16
108
  def create_candidate(
@@ -29,14 +121,20 @@ def create_candidate(
29
121
  normalized_metadata = provenance.normalize_metadata(metadata, source="candidate")
30
122
 
31
123
  conn = store.connect()
32
- row = conn.execute(
124
+ exact_row = conn.execute(
33
125
  "SELECT candidate_id FROM candidates WHERE source_event_id=? AND distilled_summary=?",
34
126
  (source_event_id, summary),
35
127
  ).fetchone()
36
- if row:
128
+ if exact_row:
37
129
  conn.close()
38
130
  emit_event(LOGFILE, "brain_memory_candidate_duplicate", status="ok", source_event_id=source_event_id)
39
- return {"candidate_id": row[0], "duplicate": True}
131
+ return {"candidate_id": exact_row[0], "duplicate": True}
132
+
133
+ near_duplicate_id = _find_near_duplicate_candidate(conn, source_event_id, summary, normalized_metadata)
134
+ if near_duplicate_id:
135
+ conn.close()
136
+ emit_event(LOGFILE, "brain_memory_candidate_duplicate", status="ok", source_event_id=source_event_id, duplicate_kind="near")
137
+ return {"candidate_id": near_duplicate_id, "duplicate": True}
40
138
 
41
139
  candidate_id = str(uuid.uuid4())
42
140
  verification_status = "verified" if verification_lines else "unverified"
@@ -9,7 +9,7 @@ from brain.runtime import state_store
9
9
  from brain.runtime.instrumentation import emit_event
10
10
  from brain.runtime.memory import memory_links, memory_salience, provenance, store, unresolved_state
11
11
 
12
- _ALLOWED_MEMORY_TABLES = {"knowledge", "reflections", "directives", "tasks", "runbooks", "lessons", "candidates", "promotions"}
12
+ _ALLOWED_MEMORY_TABLES = {*store.MEMORY_TABLES, "candidates", "promotions"}
13
13
  LOGFILE = state_store.reports_dir() / "brain_memory.log.jsonl"
14
14
  _COMMITMENT_RE = re.compile(
15
15
  r"\b(i(?:'m| am)? going to|i will|i'll|let me|i can(?:\s+now)?|next,? i(?:'ll| will)|i should be able to)\b",
@@ -97,7 +97,12 @@ def _reject_distilled_summary(summary: str, source: str) -> bool:
97
97
  if lowered.startswith(("good job", "be proactive", "be thorough", "always check", "always remember")):
98
98
  return True
99
99
  if source and lowered == _normalize(source):
100
- return True
100
+ # In no-model environments the best available summary can be the
101
+ # original one-line experience. Keep rejecting verbose/source-equal
102
+ # fallbacks, but allow concise operational statements through.
103
+ compact_source = re.sub(r"\s+", " ", str(source or "")).strip()
104
+ if "\n" in compact_source or len(compact_source) > 120:
105
+ return True
101
106
  return False
102
107
 
103
108
 
@@ -5,13 +5,13 @@ from typing import Dict, Any
5
5
  from brain.runtime.memory import store, integrity
6
6
 
7
7
 
8
- EMBED_TABLES = ("knowledge", "runbooks", "lessons", "directives", "reflections", "tasks")
8
+ EMBED_TABLES = tuple(store.MEMORY_TABLES)
9
9
 
10
10
 
11
11
  def get_memory_health() -> Dict[str, Any]:
12
12
  conn = store.connect()
13
13
  counts: Dict[str, int] = {}
14
- for table in ["experiences", "candidates", "promotions", "memory_index", "knowledge", "runbooks", "lessons", "directives", "reflections", "tasks", "vector_embeddings"]:
14
+ for table in ["experiences", "candidates", "promotions", "memory_index", *store.MEMORY_TABLES, "vector_embeddings"]:
15
15
  try:
16
16
  counts[table] = conn.execute(f"SELECT COUNT(*) FROM {table}").fetchone()[0]
17
17
  except Exception:
@@ -20,7 +20,7 @@ def get_memory_health() -> Dict[str, Any]:
20
20
  vector_count = 0
21
21
  try:
22
22
  vector_count = conn.execute(
23
- "SELECT COUNT(*) FROM vector_embeddings WHERE source_type IN ('knowledge','runbooks','lessons','directives','reflections','tasks')"
23
+ "SELECT COUNT(*) FROM vector_embeddings WHERE source_type IN ('knowledge','preferences','identity','reflections','directives','tasks','runbooks','lessons')"
24
24
  ).fetchone()[0]
25
25
  except Exception:
26
26
  vector_count = 0
@@ -7,7 +7,7 @@ from brain.runtime import state_store
7
7
  from brain.runtime.memory import store
8
8
 
9
9
 
10
- EMBED_TABLES = ("knowledge", "runbooks", "lessons", "directives", "reflections", "tasks")
10
+ EMBED_TABLES = tuple(store.MEMORY_TABLES)
11
11
 
12
12
 
13
13
  def run_integrity_check() -> Dict[str, Any]:
@@ -21,6 +21,8 @@ def run_integrity_check() -> Dict[str, Any]:
21
21
  required = {
22
22
  "experiences",
23
23
  "knowledge",
24
+ "preferences",
25
+ "identity",
24
26
  "reflections",
25
27
  "tasks",
26
28
  "directives",
@@ -12,7 +12,7 @@ from brain.runtime.instrumentation import emit_event
12
12
  from brain.runtime.memory import api, integrity, memory_consolidation, memory_links, provenance, store, unresolved_state, vector_index
13
13
 
14
14
  LOGFILE = state_store.reports_dir() / "brain_memory.log.jsonl"
15
- _WRITABLE_MEMORY_TABLES = {"knowledge", "reflections", "directives", "tasks", "runbooks", "lessons"}
15
+ _WRITABLE_MEMORY_TABLES = set(store.MEMORY_TABLES)
16
16
  _SUMMARY_PREFIX_RE = re.compile(r"^(?:insight|recommendation|lesson)\s*:\s*", re.IGNORECASE)
17
17
 
18
18