npm - @simbimbo/memory-ocmemog - Versions diffs - 0.1.7 → 0.1.9 - Mend

@simbimbo/memory-ocmemog 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/CHANGELOG.md +21 -0
package/README.md +3 -0
package/brain/runtime/memory/api.py +270 -23
package/brain/runtime/memory/candidate.py +101 -3
package/brain/runtime/memory/conversation_state.py +1 -1
package/brain/runtime/memory/distill.py +6 -1
package/brain/runtime/memory/health.py +3 -3
package/brain/runtime/memory/integrity.py +3 -1
package/brain/runtime/memory/pondering_engine.py +1 -1
package/brain/runtime/memory/promote.py +39 -3
package/brain/runtime/memory/provenance.py +1 -1
package/brain/runtime/memory/retrieval.py +1 -8
package/brain/runtime/memory/store.py +31 -0
package/brain/runtime/memory/vector_index.py +1 -8
package/docs/architecture/local-runtime-2026-03-19.md +33 -0
package/ocmemog/sidecar/app.py +334 -14
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,26 @@
 # Changelog
+## 0.1.9 — 2026-03-19
+Memory quality, governance, and review release.
+### Highlights
+- added near-duplicate collapse for transcript/session double-ingest candidate generation
+- added conservative reflection reclassification and new durable buckets for `preferences` and `identity`
+- wired new buckets through storage, retrieval, embeddings, health, integrity, and promotion/demotion paths
+- hardened governance auto-promotion for duplicates and supersessions with stricter thresholds and guardrails
+- added governance review endpoints plus dashboard review panel with filters and approve/reject actions
+- fixed release-blocking distill fallback behavior in no-model environments and removed stale hard-coded bucket drift
+## 0.1.8 — 2026-03-19
+Documentation and release follow-through after the llama.cpp migration and repo grooming pass.
+### Highlights
+- documented the stable local runtime architecture (gateway/sidecar/text/embed split)
+- published the repo in a llama.cpp-first state with fixed ports and cleaned installers/scripts
+- kept compatibility hooks only where still useful instead of leaving Ollama as the implied primary path
 ## 0.1.7 — 2026-03-19
 llama.cpp-first cleanup after the 0.1.6 runtime cutover.

package/README.md CHANGED Viewed

@@ -14,6 +14,9 @@ Architecture at a glance:
 - **FastAPI sidecar (`ocmemog/sidecar/`)** exposes memory and continuity APIs
 - **SQLite-backed runtime (`brain/runtime/memory/`)** powers storage, hydration, checkpoints, salience ranking, and pondering
+Current local runtime architecture note:
+- `docs/architecture/local-runtime-2026-03-19.md`
 ## Repo layout
 - `openclaw.plugin.json`, `index.ts`, `package.json`: OpenClaw plugin package and manifest.

package/brain/runtime/memory/api.py CHANGED Viewed

@@ -9,6 +9,27 @@ from brain.runtime import inference
 from brain.runtime.instrumentation import emit_event
 from brain.runtime.security import redaction
+_REVIEW_KIND_METADATA: Dict[str, Dict[str, str]] = {
+    "duplicate_candidate": {
+        "relationship": "duplicate_of",
+        "label": "Duplicate candidate",
+        "approve_label": "Approve duplicate merge",
+        "reject_label": "Reject duplicate merge",
+    },
+    "contradiction_candidate": {
+        "relationship": "contradicts",
+        "label": "Contradiction candidate",
+        "approve_label": "Mark as contradiction",
+        "reject_label": "Dismiss contradiction",
+    },
+    "supersession_recommendation": {
+        "relationship": "supersedes",
+        "label": "Supersession recommendation",
+        "approve_label": "Approve supersession",
+        "reject_label": "Dismiss supersession",
+    },
+}
 def _sanitize(text: str) -> str:
     redacted, _ = redaction.redact_text(text)
@@ -72,8 +93,6 @@ def _recommend_supersession_from_contradictions(
     signal_threshold = float(os.environ.get("OCMEMOG_GOVERNANCE_SUPERSESSION_RECOMMEND_SIGNAL", "0.9") or 0.9)
     model_conf_threshold = float(os.environ.get("OCMEMOG_GOVERNANCE_SUPERSESSION_MODEL_CONFIDENCE", "0.9") or 0.9)
-    auto_apply = os.environ.get("OCMEMOG_GOVERNANCE_AUTOPROMOTE_SUPERSESSION", "false").strip().lower() in {"1", "true", "yes"}
     ranked = sorted(contradiction_candidates, key=lambda item: float(item.get("signal") or 0.0), reverse=True)
     top = ranked[0]
     signal = float(top.get("signal") or 0.0)
@@ -105,28 +124,38 @@ def _recommend_supersession_from_contradictions(
         "model_hint": model_hint,
     })
-    if auto_apply:
-        merged = mark_memory_relationship(reference, relationship="supersedes", target_reference=target, status="active")
-        recommendation["auto_applied"] = merged is not None
-        recommendation["reason"] = "auto_applied_supersession" if merged is not None else "auto_apply_failed"
     return recommendation
-def _auto_promote_governance_candidates(
+def _canonicalize_duplicate_target(reference: str) -> str:
+    payload = provenance.fetch_reference(reference) or {}
+    metadata = payload.get("metadata") or {}
+    prov = metadata.get("provenance") if isinstance(metadata.get("provenance"), dict) else {}
+    canonical = str(prov.get("canonical_reference") or prov.get("duplicate_of") or reference).strip()
+    return canonical or reference
+def _token_signature(text: str) -> frozenset[str]:
+    return frozenset(_tokenize(text))
+def _auto_promote_duplicate_candidate(
     reference: str,
     *,
     duplicate_candidates: List[Dict[str, Any]],
     contradiction_candidates: List[Dict[str, Any]],
 ) -> Dict[str, Any]:
     auto_promote_enabled = os.environ.get("OCMEMOG_GOVERNANCE_AUTOPROMOTE", "true").strip().lower() in {"1", "true", "yes"}
-    duplicate_threshold = float(os.environ.get("OCMEMOG_GOVERNANCE_DUPLICATE_AUTOPROMOTE_SIMILARITY", "0.92") or 0.92)
+    allow_with_contradictions = os.environ.get("OCMEMOG_GOVERNANCE_AUTOPROMOTE_ALLOW_CONTRADICTIONS", "false").strip().lower() in {"1", "true", "yes"}
+    duplicate_threshold = float(os.environ.get("OCMEMOG_GOVERNANCE_DUPLICATE_AUTOPROMOTE_SIMILARITY", "0.98") or 0.98)
+    duplicate_margin = float(os.environ.get("OCMEMOG_GOVERNANCE_DUPLICATE_AUTOPROMOTE_MARGIN", "0.02") or 0.02)
+    require_exact_tokens = os.environ.get("OCMEMOG_GOVERNANCE_DUPLICATE_AUTOPROMOTE_REQUIRE_EXACT_TOKENS", "true").strip().lower() in {"1", "true", "yes"}
     promoted: Dict[str, Any] = {"duplicate_of": None, "promoted": False, "reason": "disabled" if not auto_promote_enabled else "none"}
     if not auto_promote_enabled:
         return promoted
-    if contradiction_candidates:
+    if contradiction_candidates and not allow_with_contradictions:
         promoted["reason"] = "blocked_by_contradiction_candidates"
         return promoted
@@ -134,13 +163,29 @@ def _auto_promote_governance_candidates(
         promoted["reason"] = "no_duplicate_candidates"
         return promoted
-    top = sorted(duplicate_candidates, key=lambda item: float(item.get("similarity") or 0.0), reverse=True)[0]
+    payload = provenance.fetch_reference(reference) or {}
+    reference_content = str(payload.get("content") or "")
+    reference_signature = _token_signature(reference_content)
+    ranked = sorted(duplicate_candidates, key=lambda item: float(item.get("similarity") or 0.0), reverse=True)
+    top = ranked[0]
     similarity = float(top.get("similarity") or 0.0)
-    target = str(top.get("reference") or "")
-    if not target or similarity < duplicate_threshold:
+    target = _canonicalize_duplicate_target(str(top.get("reference") or ""))
+    if not target or target == reference or similarity < duplicate_threshold:
         promoted["reason"] = "similarity_below_threshold"
         return promoted
+    if len(ranked) > 1:
+        runner_up = float(ranked[1].get("similarity") or 0.0)
+        if similarity - runner_up < duplicate_margin:
+            promoted["reason"] = "ambiguous_duplicate_candidates"
+            return promoted
+    target_payload = provenance.fetch_reference(target) or {}
+    target_content = str(target_payload.get("content") or "")
+    if require_exact_tokens and _token_signature(target_content) != reference_signature:
+        promoted["reason"] = "token_signature_mismatch"
+        return promoted
     merged = mark_memory_relationship(reference, relationship="duplicate_of", target_reference=target, status="duplicate")
     promoted.update({
         "duplicate_of": target,
@@ -151,17 +196,70 @@ def _auto_promote_governance_candidates(
     return promoted
+def _auto_apply_supersession_recommendation(
+    reference: str,
+    *,
+    contradiction_candidates: List[Dict[str, Any]],
+    supersession_recommendation: Dict[str, Any],
+) -> Dict[str, Any]:
+    recommendation = dict(supersession_recommendation or {})
+    if not recommendation:
+        return {"recommended": False, "auto_applied": False, "reason": "missing_recommendation", "target_reference": None, "signal": 0.0}
+    auto_apply = os.environ.get("OCMEMOG_GOVERNANCE_AUTOPROMOTE_SUPERSESSION", "false").strip().lower() in {"1", "true", "yes"}
+    allow_with_contradictions = os.environ.get("OCMEMOG_GOVERNANCE_AUTOPROMOTE_ALLOW_CONTRADICTIONS", "false").strip().lower() in {"1", "true", "yes"}
+    auto_apply_signal = float(os.environ.get("OCMEMOG_GOVERNANCE_SUPERSESSION_AUTOPROMOTE_SIGNAL", "0.97") or 0.97)
+    model_conf_threshold = float(os.environ.get("OCMEMOG_GOVERNANCE_SUPERSESSION_AUTOPROMOTE_MODEL_CONFIDENCE", "0.97") or 0.97)
+    recommendation.setdefault("auto_applied", False)
+    if not recommendation.get("recommended"):
+        recommendation["reason"] = recommendation.get("reason") or "not_recommended"
+        return recommendation
+    if not auto_apply:
+        return recommendation
+    if contradiction_candidates and not allow_with_contradictions:
+        recommendation["reason"] = "blocked_by_contradiction_candidates"
+        return recommendation
+    signal = float(recommendation.get("signal") or 0.0)
+    if signal < auto_apply_signal:
+        recommendation["reason"] = "signal_below_autopromote_threshold"
+        return recommendation
+    model_hint = recommendation.get("model_hint") if isinstance(recommendation.get("model_hint"), dict) else {}
+    if not model_hint or not model_hint.get("contradiction") or float(model_hint.get("confidence") or 0.0) < model_conf_threshold:
+        recommendation["reason"] = "model_hint_below_autopromote_threshold"
+        return recommendation
+    target = str(recommendation.get("target_reference") or "").strip()
+    if not target or target == reference:
+        recommendation["reason"] = "missing_target"
+        return recommendation
+    merged = mark_memory_relationship(reference, relationship="supersedes", target_reference=target, status="active")
+    recommendation["auto_applied"] = merged is not None
+    recommendation["reason"] = "auto_applied_supersession" if merged is not None else "auto_apply_failed"
+    return recommendation
 def _auto_attach_governance_candidates(reference: str) -> Dict[str, Any]:
     duplicate_candidates = find_duplicate_candidates(reference, limit=5, min_similarity=0.72)
     contradiction_candidates = find_contradiction_candidates(reference, limit=5, min_signal=0.55, use_model=True)
-    auto_promotion = _auto_promote_governance_candidates(
+    supersession_recommendation = _recommend_supersession_from_contradictions(
+        reference,
+        contradiction_candidates=contradiction_candidates,
+    )
+    auto_promotion = _auto_promote_duplicate_candidate(
         reference,
         duplicate_candidates=duplicate_candidates,
         contradiction_candidates=contradiction_candidates,
     )
-    supersession_recommendation = _recommend_supersession_from_contradictions(
+    supersession_recommendation = _auto_apply_supersession_recommendation(
         reference,
         contradiction_candidates=contradiction_candidates,
+        supersession_recommendation=supersession_recommendation,
     )
     payload = {
         "duplicate_candidates": [item.get("reference") for item in duplicate_candidates if item.get("reference")],
@@ -196,7 +294,7 @@ def store_memory(
 ) -> int:
     content = _sanitize(content)
     table = memory_type.strip().lower() if memory_type else "knowledge"
-    allowed = {"knowledge", "reflections", "directives", "tasks", "runbooks", "lessons"}
+    allowed = set(store.MEMORY_TABLES)
     if table not in allowed:
         table = "knowledge"
     normalized_metadata = provenance.normalize_metadata(metadata, source=source)
@@ -344,7 +442,7 @@ def find_duplicate_candidates(
     payload = provenance.fetch_reference(reference) or {}
     table = str(payload.get("table") or payload.get("type") or "")
     content = str(payload.get("content") or "")
-    if table not in {"knowledge", "reflections", "directives", "tasks", "runbooks", "lessons"}:
+    if table not in set(store.MEMORY_TABLES):
         return []
     row_id = payload.get("id")
     conn = store.connect()
@@ -395,7 +493,7 @@ def find_contradiction_candidates(
     payload = provenance.fetch_reference(reference) or {}
     table = str(payload.get("table") or payload.get("type") or "")
     content = str(payload.get("content") or "")
-    if table not in {"knowledge", "reflections", "directives", "tasks", "runbooks", "lessons"}:
+    if table not in set(store.MEMORY_TABLES):
         return []
     row_id = payload.get("id")
     conn = store.connect()
@@ -494,7 +592,7 @@ def list_governance_candidates(
     categories: Optional[List[str]] = None,
     limit: int = 50,
 ) -> List[Dict[str, Any]]:
-    allowed = {"knowledge", "reflections", "directives", "tasks", "runbooks", "lessons"}
+    allowed = set(store.MEMORY_TABLES)
     tables = [table for table in (categories or list(allowed)) if table in allowed]
     conn = store.connect()
     try:
@@ -532,6 +630,95 @@ def _remove_from_list(values: Any, target: str) -> List[str]:
     return [str(item) for item in (values or []) if str(item) and str(item) != target]
+def _review_item_context(reference: str, *, depth: int = 1) -> Dict[str, Any]:
+    payload = provenance.hydrate_reference(reference, depth=depth) or {"reference": reference}
+    metadata = payload.get("metadata") if isinstance(payload.get("metadata"), dict) else {}
+    prov = metadata.get("provenance") if isinstance(metadata.get("provenance"), dict) else {}
+    return {
+        "reference": reference,
+        "bucket": payload.get("table"),
+        "id": payload.get("id"),
+        "timestamp": payload.get("timestamp"),
+        "content": payload.get("content"),
+        "memory_status": prov.get("memory_status") or metadata.get("memory_status") or "active",
+        "provenance_preview": payload.get("provenance_preview") or provenance.preview_from_metadata(metadata),
+        "metadata": metadata,
+        "links": payload.get("links") or [],
+        "backlinks": payload.get("backlinks") or [],
+    }
+def _review_item_summary(kind: str, reference: str, target_reference: str) -> str:
+    if kind == "duplicate_candidate":
+        return f"{reference} may duplicate {target_reference}"
+    if kind == "contradiction_candidate":
+        return f"{reference} may contradict {target_reference}"
+    if kind == "supersession_recommendation":
+        return f"{reference} may supersede {target_reference}"
+    return f"{reference} requires review against {target_reference}"
+def _review_actions(kind: str, relationship: str) -> List[Dict[str, Any]]:
+    meta = _REVIEW_KIND_METADATA.get(kind, {})
+    return [
+        {
+            "decision": "approve",
+            "approved": True,
+            "relationship": relationship,
+            "label": meta.get("approve_label") or "Approve",
+        },
+        {
+            "decision": "reject",
+            "approved": False,
+            "relationship": relationship,
+            "label": meta.get("reject_label") or "Reject",
+        },
+    ]
+def _relationship_for_review(kind: str | None = None, relationship: str | None = None) -> str:
+    resolved = (relationship or "").strip().lower()
+    if resolved:
+        return resolved
+    kind_key = (kind or "").strip().lower()
+    return _REVIEW_KIND_METADATA.get(kind_key, {}).get("relationship", "")
+def list_governance_review_items(
+    *,
+    categories: Optional[List[str]] = None,
+    limit: int = 100,
+    context_depth: int = 1,
+) -> List[Dict[str, Any]]:
+    items = governance_queue(categories=categories, limit=limit)
+    review_items: List[Dict[str, Any]] = []
+    for item in items:
+        kind = str(item.get("kind") or "")
+        relationship = _relationship_for_review(kind=kind)
+        reference = str(item.get("reference") or "")
+        target_reference = str(item.get("target_reference") or "")
+        if not reference or not target_reference or not relationship:
+            continue
+        review_items.append({
+            "review_id": f"{kind}:{reference}->{target_reference}",
+            "kind": kind,
+            "kind_label": _REVIEW_KIND_METADATA.get(kind, {}).get("label") or kind.replace("_", " "),
+            "relationship": relationship,
+            "priority": int(item.get("priority") or 0),
+            "timestamp": item.get("timestamp"),
+            "bucket": item.get("bucket"),
+            "signal": float(item.get("signal") or 0.0),
+            "reason": item.get("reason"),
+            "reference": reference,
+            "target_reference": target_reference,
+            "summary": _review_item_summary(kind, reference, target_reference),
+            "actions": _review_actions(kind, relationship),
+            "source": _review_item_context(reference, depth=context_depth),
+            "target": _review_item_context(target_reference, depth=context_depth),
+        })
+    return review_items
 def apply_governance_decision(
     reference: str,
     *,
@@ -541,7 +728,26 @@ def apply_governance_decision(
 ) -> Dict[str, Any] | None:
     relationship = (relationship or "").strip().lower()
     if approved:
-        return mark_memory_relationship(reference, relationship=relationship, target_reference=target_reference)
+        merged = mark_memory_relationship(reference, relationship=relationship, target_reference=target_reference)
+        if merged is None:
+            return None
+        updates: Dict[str, Any] = {}
+        if relationship == "duplicate_of":
+            current = provenance.fetch_reference(reference) or {}
+            metadata = current.get("metadata") or {}
+            prov = metadata.get("provenance") if isinstance(metadata.get("provenance"), dict) else {}
+            updates["duplicate_candidates"] = _remove_from_list(prov.get("duplicate_candidates"), target_reference)
+        elif relationship == "contradicts":
+            current = provenance.fetch_reference(reference) or {}
+            metadata = current.get("metadata") or {}
+            prov = metadata.get("provenance") if isinstance(metadata.get("provenance"), dict) else {}
+            updates["contradiction_candidates"] = _remove_from_list(prov.get("contradiction_candidates"), target_reference)
+        elif relationship == "supersedes":
+            updates["supersession_recommendation"] = None
+        if updates:
+            merged = provenance.force_update_memory_metadata(reference, updates) or merged
+        _emit(f"apply_governance_decision_{relationship}_approved")
+        return merged
     current = provenance.fetch_reference(reference) or {}
     metadata = current.get("metadata") or {}
@@ -552,14 +758,55 @@ def apply_governance_decision(
     elif relationship == "contradicts":
         updates["contradiction_candidates"] = _remove_from_list(prov.get("contradiction_candidates"), target_reference)
     elif relationship == "supersedes":
+        recommendation = prov.get("supersession_recommendation") if isinstance(prov.get("supersession_recommendation"), dict) else {}
+        if not recommendation or str(recommendation.get("target_reference") or "") == target_reference:
+            updates["supersession_recommendation"] = None
         updates["supersedes"] = None
     else:
         return None
-    merged = provenance.update_memory_metadata(reference, updates)
+    merged = provenance.force_update_memory_metadata(reference, updates)
     _emit(f"apply_governance_decision_{relationship}_{'approved' if approved else 'rejected'}")
     return merged
+def apply_governance_review_decision(
+    reference: str,
+    *,
+    target_reference: str,
+    approved: bool = True,
+    kind: str | None = None,
+    relationship: str | None = None,
+    context_depth: int = 1,
+) -> Dict[str, Any] | None:
+    resolved_relationship = _relationship_for_review(kind=kind, relationship=relationship)
+    if not resolved_relationship:
+        return None
+    result = apply_governance_decision(
+        reference,
+        relationship=resolved_relationship,
+        target_reference=target_reference,
+        approved=approved,
+    )
+    if result is None:
+        return None
+    resolved_kind = (kind or "").strip().lower()
+    if not resolved_kind:
+        for candidate_kind, meta in _REVIEW_KIND_METADATA.items():
+            if meta.get("relationship") == resolved_relationship:
+                resolved_kind = candidate_kind
+                break
+    return {
+        "reference": reference,
+        "target_reference": target_reference,
+        "approved": bool(approved),
+        "kind": resolved_kind or None,
+        "relationship": resolved_relationship,
+        "result": result,
+        "source": _review_item_context(reference, depth=context_depth),
+        "target": _review_item_context(target_reference, depth=context_depth),
+    }
 def rollback_governance_decision(
     reference: str,
     *,
@@ -617,7 +864,7 @@ def rollback_governance_decision(
 def governance_queue(*, categories: Optional[List[str]] = None, limit: int = 100) -> List[Dict[str, Any]]:
-    allowed = {"knowledge", "reflections", "directives", "tasks", "runbooks", "lessons"}
+    allowed = set(store.MEMORY_TABLES)
     tables = [table for table in (categories or list(allowed)) if table in allowed]
     conn = store.connect()
     try:
@@ -883,7 +1130,7 @@ def governance_audit(*, limit: int = 100, kinds: Optional[List[str]] = None) ->
 def governance_summary(*, categories: Optional[List[str]] = None) -> Dict[str, Any]:
-    allowed = {"knowledge", "reflections", "directives", "tasks", "runbooks", "lessons"}
+    allowed = set(store.MEMORY_TABLES)
     tables = [table for table in (categories or list(allowed)) if table in allowed]
     conn = store.connect()
     try:

package/brain/runtime/memory/candidate.py CHANGED Viewed

@@ -2,6 +2,8 @@ from __future__ import annotations
 import uuid
 import json
+import re
+from difflib import SequenceMatcher
 from typing import Dict, Any
 from brain.runtime.instrumentation import emit_event
@@ -11,6 +13,96 @@ from brain.runtime.security import redaction
 LOGFILE = state_store.reports_dir() / "brain_memory.log.jsonl"
+_NEAR_DUPLICATE_SIMILARITY = 0.85
+def _normalize_summary(text: str) -> str:
+    return re.sub(r"\s+", " ", str(text or "").strip().lower())
+def _tokenize(text: str) -> set[str]:
+    return {token for token in re.findall(r"[a-z0-9]+", _normalize_summary(text))}
+def _summary_similarity(left: str, right: str) -> float:
+    left_tokens = _tokenize(left)
+    right_tokens = _tokenize(right)
+    token_similarity = 0.0
+    if left_tokens and right_tokens:
+        overlap = len(left_tokens & right_tokens)
+        union = len(left_tokens | right_tokens)
+        token_similarity = overlap / max(1, union)
+    sequence_similarity = SequenceMatcher(None, _normalize_summary(left), _normalize_summary(right)).ratio()
+    return max(token_similarity, sequence_similarity)
+def _ranges_overlap(left: Dict[str, Any], right: Dict[str, Any]) -> bool:
+    if str(left.get("path") or "") != str(right.get("path") or ""):
+        return False
+    def _as_int(value: Any) -> int | None:
+        try:
+            return int(value) if value is not None else None
+        except Exception:
+            return None
+    left_start = _as_int(left.get("start_line"))
+    left_end = _as_int(left.get("end_line")) or left_start
+    right_start = _as_int(right.get("start_line"))
+    right_end = _as_int(right.get("end_line")) or right_start
+    if left_start is None and right_start is None:
+        return True
+    if left_start is None or right_start is None:
+        return False
+    return max(left_start, right_start) <= min(left_end or left_start, right_end or right_start)
+def _shares_provenance_anchor(left: Dict[str, Any], right: Dict[str, Any]) -> bool:
+    left_meta = provenance.normalize_metadata(left)
+    right_meta = provenance.normalize_metadata(right)
+    left_prov = left_meta.get("provenance") if isinstance(left_meta.get("provenance"), dict) else {}
+    right_prov = right_meta.get("provenance") if isinstance(right_meta.get("provenance"), dict) else {}
+    left_conv = left_prov.get("conversation") if isinstance(left_prov.get("conversation"), dict) else {}
+    right_conv = right_prov.get("conversation") if isinstance(right_prov.get("conversation"), dict) else {}
+    if left_conv.get("message_id") and left_conv.get("message_id") == right_conv.get("message_id"):
+        return True
+    left_transcript = left_prov.get("transcript_anchor") if isinstance(left_prov.get("transcript_anchor"), dict) else {}
+    right_transcript = right_prov.get("transcript_anchor") if isinstance(right_prov.get("transcript_anchor"), dict) else {}
+    if left_transcript.get("path") and right_transcript.get("path") and _ranges_overlap(left_transcript, right_transcript):
+        return True
+    left_refs = {str(item) for item in left_prov.get("source_references") or [] if str(item).strip()}
+    right_refs = {str(item) for item in right_prov.get("source_references") or [] if str(item).strip()}
+    return bool(left_refs & right_refs)
+def _find_near_duplicate_candidate(conn, source_event_id: int, summary: str, metadata: Dict[str, Any]) -> str | None:
+    rows = conn.execute(
+        """
+        SELECT candidate_id, distilled_summary, metadata_json
+        FROM candidates
+        WHERE source_event_id != ?
+        ORDER BY created_at DESC, candidate_id DESC
+        LIMIT 250
+        """,
+        (source_event_id,),
+    ).fetchall()
+    normalized_summary = _normalize_summary(summary)
+    for row in rows:
+        existing_summary = str(row["distilled_summary"] if isinstance(row, dict) else row[1] or "")
+        similarity = _summary_similarity(normalized_summary, existing_summary)
+        if similarity < _NEAR_DUPLICATE_SIMILARITY:
+            continue
+        try:
+            existing_metadata = json.loads(row["metadata_json"] if isinstance(row, dict) else row[2] or "{}")
+        except Exception:
+            existing_metadata = {}
+        if _shares_provenance_anchor(metadata, existing_metadata):
+            return str(row["candidate_id"] if isinstance(row, dict) else row[0])
+    return None
 def create_candidate(
@@ -29,14 +121,20 @@ def create_candidate(
     normalized_metadata = provenance.normalize_metadata(metadata, source="candidate")
     conn = store.connect()
-    row = conn.execute(
+    exact_row = conn.execute(
         "SELECT candidate_id FROM candidates WHERE source_event_id=? AND distilled_summary=?",
         (source_event_id, summary),
     ).fetchone()
-    if row:
+    if exact_row:
         conn.close()
         emit_event(LOGFILE, "brain_memory_candidate_duplicate", status="ok", source_event_id=source_event_id)
-        return {"candidate_id": row[0], "duplicate": True}
+        return {"candidate_id": exact_row[0], "duplicate": True}
+    near_duplicate_id = _find_near_duplicate_candidate(conn, source_event_id, summary, normalized_metadata)
+    if near_duplicate_id:
+        conn.close()
+        emit_event(LOGFILE, "brain_memory_candidate_duplicate", status="ok", source_event_id=source_event_id, duplicate_kind="near")
+        return {"candidate_id": near_duplicate_id, "duplicate": True}
     candidate_id = str(uuid.uuid4())
     verification_status = "verified" if verification_lines else "unverified"

package/brain/runtime/memory/conversation_state.py CHANGED Viewed

@@ -9,7 +9,7 @@ from brain.runtime import state_store
 from brain.runtime.instrumentation import emit_event
 from brain.runtime.memory import memory_links, memory_salience, provenance, store, unresolved_state
-_ALLOWED_MEMORY_TABLES = {"knowledge", "reflections", "directives", "tasks", "runbooks", "lessons", "candidates", "promotions"}
+_ALLOWED_MEMORY_TABLES = {*store.MEMORY_TABLES, "candidates", "promotions"}
 LOGFILE = state_store.reports_dir() / "brain_memory.log.jsonl"
 _COMMITMENT_RE = re.compile(
     r"\b(i(?:'m| am)? going to|i will|i'll|let me|i can(?:\s+now)?|next,? i(?:'ll| will)|i should be able to)\b",

package/brain/runtime/memory/distill.py CHANGED Viewed

@@ -97,7 +97,12 @@ def _reject_distilled_summary(summary: str, source: str) -> bool:
     if lowered.startswith(("good job", "be proactive", "be thorough", "always check", "always remember")):
         return True
     if source and lowered == _normalize(source):
-        return True
+        # In no-model environments the best available summary can be the
+        # original one-line experience. Keep rejecting verbose/source-equal
+        # fallbacks, but allow concise operational statements through.
+        compact_source = re.sub(r"\s+", " ", str(source or "")).strip()
+        if "\n" in compact_source or len(compact_source) > 120:
+            return True
     return False

package/brain/runtime/memory/health.py CHANGED Viewed

@@ -5,13 +5,13 @@ from typing import Dict, Any
 from brain.runtime.memory import store, integrity
-EMBED_TABLES = ("knowledge", "runbooks", "lessons", "directives", "reflections", "tasks")
+EMBED_TABLES = tuple(store.MEMORY_TABLES)
 def get_memory_health() -> Dict[str, Any]:
     conn = store.connect()
     counts: Dict[str, int] = {}
-    for table in ["experiences", "candidates", "promotions", "memory_index", "knowledge", "runbooks", "lessons", "directives", "reflections", "tasks", "vector_embeddings"]:
+    for table in ["experiences", "candidates", "promotions", "memory_index", *store.MEMORY_TABLES, "vector_embeddings"]:
         try:
             counts[table] = conn.execute(f"SELECT COUNT(*) FROM {table}").fetchone()[0]
         except Exception:
@@ -20,7 +20,7 @@ def get_memory_health() -> Dict[str, Any]:
     vector_count = 0
     try:
         vector_count = conn.execute(
-            "SELECT COUNT(*) FROM vector_embeddings WHERE source_type IN ('knowledge','runbooks','lessons','directives','reflections','tasks')"
+            "SELECT COUNT(*) FROM vector_embeddings WHERE source_type IN ('knowledge','preferences','identity','reflections','directives','tasks','runbooks','lessons')"
         ).fetchone()[0]
     except Exception:
         vector_count = 0

package/brain/runtime/memory/integrity.py CHANGED Viewed

@@ -7,7 +7,7 @@ from brain.runtime import state_store
 from brain.runtime.memory import store
-EMBED_TABLES = ("knowledge", "runbooks", "lessons", "directives", "reflections", "tasks")
+EMBED_TABLES = tuple(store.MEMORY_TABLES)
 def run_integrity_check() -> Dict[str, Any]:
@@ -21,6 +21,8 @@ def run_integrity_check() -> Dict[str, Any]:
     required = {
         "experiences",
         "knowledge",
+        "preferences",
+        "identity",
         "reflections",
         "tasks",
         "directives",

package/brain/runtime/memory/pondering_engine.py CHANGED Viewed

@@ -12,7 +12,7 @@ from brain.runtime.instrumentation import emit_event
 from brain.runtime.memory import api, integrity, memory_consolidation, memory_links, provenance, store, unresolved_state, vector_index
 LOGFILE = state_store.reports_dir() / "brain_memory.log.jsonl"
-_WRITABLE_MEMORY_TABLES = {"knowledge", "reflections", "directives", "tasks", "runbooks", "lessons"}
+_WRITABLE_MEMORY_TABLES = set(store.MEMORY_TABLES)
 _SUMMARY_PREFIX_RE = re.compile(r"^(?:insight|recommendation|lesson)\s*:\s*", re.IGNORECASE)