npm - @simbimbo/memory-ocmemog - Versions diffs - 0.1.18 → 0.1.20 - Mend

@simbimbo/memory-ocmemog 0.1.18 → 0.1.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/CHANGELOG.md +27 -0
package/ocmemog/doctor.py +8 -1
package/ocmemog/runtime/memory/api.py +81 -0
package/ocmemog/runtime/memory/conversation_state.py +93 -5
package/ocmemog/runtime/memory/memory_links.py +6 -0
package/ocmemog/runtime/memory/promote.py +73 -1
package/ocmemog/runtime/memory/retrieval.py +321 -4
package/ocmemog/runtime/memory/store.py +11 -0
package/ocmemog/runtime/memory/unresolved_state.py +91 -26
package/ocmemog/sidecar/app.py +200 -21
package/package.json +1 -1
package/scripts/ocmemog-continuity-benchmark.py +20 -3

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,33 @@
 ## Unreleased
+## 0.1.20 — 2026-03-29
+Operational-artifact canonicalization, dead-lane retrieval hardening, and rehydratable-memory recall fixes.
+### Highlights
+- added first-class operational artifact canonicalization with idempotent upsert semantics for durable canonical memories keyed by artifact identity
+- added a sidecar API route to canonicalize operational artifacts explicitly, including aliases, ownership, status, kind, and provenance/source references
+- taught retrieval to recognize operational-artifact / dead-lane queries and strongly reward exact operational literals such as cron/job names and CLI module tokens
+- reduced transcript/reflection noise for operational-artifact queries so canonical memories win over repeated run logs and scattered debug fragments
+- added transcript claim-search as a bounded fallback surface for exact prior-claim recovery, alongside larger `/memory/context` transcript windows with provenance-anchor fallback
+- added focused regression coverage for operational artifact canonicalization, claim-search/memory-context rehydration, and canonical top-answer retrieval behavior
+- validated the architecture fix with a targeted release gate: `30 passed`
+## 0.1.19 — 2026-03-29
+Hydrate/resume hardening, unresolved-state main-DB consolidation, and retrieval/rehydration source-of-truth completion.
+### Highlights
+- eliminated expensive hydrate hot-path scans by adding release-critical indexes for linked-memory and unresolved-state lookups
+- moved unresolved state into the main SQLite memory DB with compatibility import from legacy `unresolved_state.db`
+- removed inline self-heal from `/conversation/hydrate` and kept hydrate read-mostly/fast by default
+- disabled predictive brief generation on hydrate by default and added long-session hydrate guardrails for oversized scopes
+- surfaced hydrate budget/warning metadata for long sessions and added doctor visibility for stale legacy unresolved-state DB residue
+- completed the stranded retrieval/rehydration hardening lane: preserved canonical operator-facing `selected_because` semantics while retaining richer ranking signals, and validated canonical source-of-truth retrieval behavior
+- added regression coverage for resume-latency query plans, long-session guardrails, unresolved-state main-DB migration, and updated retrieval explanation semantics
+- validation passed across the combined retrieval + hydrate + migration + doctor suite, and the canonical `./scripts/ocmemog-release-check.sh` gate passed
 ## 0.1.17 — 2026-03-26
 Promotion/governance observability, anti-cruft hardening, queue/runtime summary parity, and release validation recovery.

package/ocmemog/doctor.py CHANGED Viewed

@@ -279,6 +279,8 @@ def _run_sqlite_schema(_: None) -> CheckResult:
             tables = {row[0] for row in conn.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall()}
             missing = sorted(required - tables)
             quick = str(conn.execute("PRAGMA quick_check(1)").fetchone()[0] or "unknown")
+            legacy_unresolved_state_path = state_store.data_dir() / "unresolved_state.db"
+            legacy_unresolved_state_exists = legacy_unresolved_state_path.exists()
             for table in sorted(required):
                 if table in missing:
                     continue
@@ -319,6 +321,7 @@ def _run_sqlite_schema(_: None) -> CheckResult:
             "schema_version_expected": store.SCHEMA_VERSION,
             "schema_versions": version_map,
             "schema_version_issues": version_issues,
+            "legacy_unresolved_state_db": str(legacy_unresolved_state_path) if legacy_unresolved_state_exists else None,
         }
     else:
         details = {
@@ -329,6 +332,7 @@ def _run_sqlite_schema(_: None) -> CheckResult:
             "schema_version_expected": store.SCHEMA_VERSION,
             "schema_versions": version_map,
             "schema_version_issues": version_issues,
+            "legacy_unresolved_state_db": str(legacy_unresolved_state_path) if legacy_unresolved_state_exists else None,
         }
     if version_issues:
         details["schema_version_issues"] = version_issues
@@ -361,11 +365,14 @@ def _run_sqlite_schema(_: None) -> CheckResult:
             message="Schema metadata includes unexpected versions or schema column issues.",
             details=details,
         )
+    message = "SQLite schema and DB open state are healthy."
+    if legacy_unresolved_state_exists:
+        message = "SQLite schema and DB open state are healthy, but legacy unresolved_state.db still exists and should be cleaned up after migration verification."
     return CheckResult(
         key="sqlite/schema-access",
         label="sqlite and schema",
         status="ok",
-        message="SQLite schema and DB open state are healthy.",
+        message=message,
         details=details,
     )

package/ocmemog/runtime/memory/api.py CHANGED Viewed

@@ -789,6 +789,87 @@ def find_contradiction_candidates(
     return top
+def canonicalize_operational_artifact(
+    *,
+    key: str,
+    summary: str,
+    aliases: Optional[List[str]] = None,
+    status: str = "active",
+    owner: Optional[str] = None,
+    artifact_kind: Optional[str] = None,
+    source_references: Optional[List[str]] = None,
+    source_labels: Optional[List[str]] = None,
+) -> Dict[str, Any]:
+    normalized_key = str(key or "").strip().lower()
+    if not normalized_key:
+        return {"ok": False, "error": "missing_key"}
+    aliases = [str(item).strip() for item in (aliases or []) if str(item).strip()]
+    source_references = [str(item).strip() for item in (source_references or []) if str(item).strip()]
+    source_labels = [str(item).strip() for item in (source_labels or []) if str(item).strip()]
+    conn = store.connect()
+    existing_reference: Optional[str] = None
+    existing_row: Optional[Dict[str, Any]] = None
+    try:
+        rows = conn.execute(
+            "SELECT id, content, metadata_json FROM knowledge ORDER BY id DESC LIMIT 500"
+        ).fetchall()
+        for row in rows:
+            memory_id = int(row["id"] if isinstance(row, dict) else row[0])
+            content = str(row["content"] if isinstance(row, dict) else row[1] or "")
+            raw_metadata = row["metadata_json"] if isinstance(row, dict) else row[2]
+            metadata = json.loads(raw_metadata or "{}")
+            prov = metadata.get("provenance") if isinstance(metadata.get("provenance"), dict) else {}
+            artifact_key = str(prov.get("artifact_key") or metadata.get("artifact_key") or "").strip().lower()
+            haystack = f"{content}\n{json.dumps(metadata, ensure_ascii=False)}".lower()
+            if artifact_key == normalized_key or normalized_key in haystack:
+                existing_reference = f"knowledge:{memory_id}"
+                existing_row = {"content": content, "metadata": metadata}
+                break
+    finally:
+        conn.close()
+    metadata = provenance.normalize_metadata({
+        "artifact_key": normalized_key,
+        "artifact_aliases": aliases,
+        "artifact_kind": artifact_kind or "operational_artifact",
+        "owner": owner or "openclaw",
+        "memory_status": status,
+        "canonical": True,
+        "source_references": source_references,
+        "source_labels": list(dict.fromkeys(source_labels + ["canonical-operational-artifact"])),
+        "derived_via": "operational_artifact_canonicalize",
+    }, source="operational_artifact")
+    if existing_reference:
+        provenance.force_update_memory_metadata(existing_reference, metadata)
+        parsed = _parse_memory_reference(existing_reference)
+        if parsed:
+            table, identifier = parsed
+            conn = store.connect()
+            try:
+                conn.execute(
+                    f"UPDATE {table} SET content=?, metadata_json=? WHERE id=?",
+                    (summary, json.dumps(provenance.normalize_metadata({**metadata, **(existing_row.get('metadata') if existing_row else {})}, source='operational_artifact'), ensure_ascii=False), int(identifier)),
+                )
+                conn.commit()
+            finally:
+                conn.close()
+        return {"ok": True, "reference": existing_reference, "created": False}
+    memory_id = store_memory(
+        "knowledge",
+        summary,
+        source="operational_artifact",
+        metadata=metadata,
+        post_process=True,
+    )
+    reference = f"knowledge:{memory_id}"
+    provenance.force_update_memory_metadata(reference, {"canonical_reference": reference, "memory_status": status, "artifact_key": normalized_key})
+    return {"ok": True, "reference": reference, "created": True}
 def mark_memory_relationship(
     reference: str,
     *,

package/ocmemog/runtime/memory/conversation_state.py CHANGED Viewed

@@ -454,10 +454,17 @@ def _enrich_turn_metadata(
             enriched["resolution"] = resolution
             if reply_target is None:
                 reply_target = _get_turn_by_id(resolution.get("resolved_turn_id"))
+    lane_pivot = role == "user" and _looks_like_lane_pivot(content)
     if reply_target:
         reply_meta = _turn_meta(reply_target)
-        branch_root_turn_id = int(reply_meta.get("branch_root_turn_id") or reply_target.get("id") or 0) or None
-        branch_id = str(reply_meta.get("branch_id") or f"branch:{branch_root_turn_id or reply_target.get('id')}")
+        if lane_pivot:
+            branch_root_turn_id = int(reply_target.get("id") or 0) or None
+            branch_id = f"pivot:{branch_root_turn_id or (message_id or 'unknown')}"
+            enriched["lane_pivot"] = True
+            enriched["lane_pivot_from_turn_id"] = int(reply_target.get("id") or 0) or None
+        else:
+            branch_root_turn_id = int(reply_meta.get("branch_root_turn_id") or reply_target.get("id") or 0) or None
+            branch_id = str(reply_meta.get("branch_id") or f"branch:{branch_root_turn_id or reply_target.get('id')}")
         enriched["reply_to_turn_id"] = int(reply_target.get("id") or 0) or None
         enriched["reply_to_reference"] = reply_target.get("reference")
         if reply_target.get("message_id"):
@@ -465,7 +472,7 @@ def _enrich_turn_metadata(
         if branch_root_turn_id:
             enriched["branch_root_turn_id"] = branch_root_turn_id
         enriched["branch_id"] = branch_id
-        enriched["branch_depth"] = int(reply_meta.get("branch_depth") or 0) + 1
+        enriched["branch_depth"] = int(reply_meta.get("branch_depth") or 0) + 1 if not lane_pivot else 1
     elif message_id and "branch_id" not in enriched:
         enriched["branch_id"] = f"message:{message_id}"
         enriched["branch_depth"] = 0
@@ -485,6 +492,30 @@ def _effective_turn_content(turn: Optional[Dict[str, Any]]) -> Optional[str]:
     return content or None
+def _looks_like_lane_pivot(text: str) -> bool:
+    lowered = _normalize_conversation_text(text).lower()
+    if not lowered:
+        return False
+    return any(
+        token in lowered
+        for token in (
+            "before we continue",
+            "let's pause",
+            "lets pause",
+            "back to",
+            "move back",
+            "return to",
+            "failing us",
+            "before we move back",
+            "pause for one second",
+            "task list for",
+            "can you show me",
+            "what did we just fix",
+            "what is the task list",
+        )
+    )
 def _reply_chain_for_turn(turn: Optional[Dict[str, Any]], turns: Sequence[Dict[str, Any]], *, limit: int = 6) -> List[Dict[str, Any]]:
     if not turn:
         return []
@@ -501,7 +532,21 @@ def _reply_chain_for_turn(turn: Optional[Dict[str, Any]], turns: Sequence[Dict[s
             break
         seen.add(reply_to_turn_id)
         current = lookup.get(reply_to_turn_id) or _get_turn_by_id(reply_to_turn_id)
-    return list(reversed(chain))
+    chain = list(reversed(chain))
+    # Trim temporary side-answer prefixes when the later cluster clearly returns to the foreground lane.
+    if len(chain) >= 4:
+        for idx in range(len(chain) - 3):
+            first = chain[idx]
+            second = chain[idx + 1]
+            third = chain[idx + 2]
+            first_text = _normalize_conversation_text(str(first.get("content") or "").strip()).lower()
+            second_text = _normalize_conversation_text(str(second.get("content") or "").strip()).lower()
+            third_text = _normalize_conversation_text(str(third.get("content") or "").strip()).lower()
+            if first.get("role") == "assistant" and second.get("role") == "user" and third.get("role") == "assistant":
+                if any(token in first_text for token in ("recent", "repo work", "list", "includes", "task list", "show me")) and any(token in second_text for token in ("great", "proceed", "task list", "back to", "continue")):
+                    return chain[idx + 1 :]
+    return chain
 def _active_branch_payload(turns: Sequence[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
@@ -519,13 +564,56 @@ def _active_branch_payload(turns: Sequence[Dict[str, Any]]) -> Optional[Dict[str
     ]
     if not branch_turns:
         branch_turns = [latest_turn]
+    reply_chain = _reply_chain_for_turn(latest_turn, turns_list, limit=8)
+    reply_chain_ids = [int(item.get("id") or 0) for item in reply_chain if int(item.get("id") or 0) > 0]
+    latest_turn_id = int(latest_turn.get("id") or 0)
+    # When a later user/assistant cluster explicitly pivots or returns to a lane,
+    # prefer the suffix of the branch starting at the most recent non-reply turn that
+    # follows the earlier adjacent lane. This keeps fluid topic switches from dragging
+    # previous foreground work into the active branch payload.
+    suffix_start_id = reply_chain_ids[0] if reply_chain_ids else latest_turn_id
+    for turn in reversed(branch_turns):
+        turn_id = int(turn.get("id") or 0)
+        if turn_id <= 0 or turn_id >= latest_turn_id:
+            continue
+        meta = _turn_meta(turn)
+        if meta.get("reply_to_turn_id"):
+            continue
+        if turn.get("role") != "user":
+            continue
+        turn_text = _normalize_conversation_text(str(turn.get("content") or "").strip()).lower()
+        if any(token in turn_text for token in ("before we continue", "let's pause", "move back", "back to", "failing us", "return to", "resume", "task list for", "can you show me", "what did we just fix", "what is the task list")):
+            suffix_start_id = turn_id
+            break
+    # If the latest reply chain is the user explicitly returning after a temporary side answer,
+    # do not keep the side-answer assistant turn as foreground branch context.
+    if len(reply_chain_ids) >= 2:
+        first_reply_id = reply_chain_ids[0]
+        first_reply_turn = next((turn for turn in branch_turns if int(turn.get("id") or 0) == first_reply_id), None)
+        if first_reply_turn and first_reply_turn.get("role") == "assistant":
+            first_reply_text = _normalize_conversation_text(str(first_reply_turn.get("content") or "").strip()).lower()
+            if any(token in first_reply_text for token in ("recent", "repo work", "list", "includes", "task list", "show me")):
+                user_followup_id = reply_chain_ids[1] if len(reply_chain_ids) > 1 else None
+                if user_followup_id:
+                    suffix_start_id = max(suffix_start_id, user_followup_id)
+    filtered_branch_turns = [
+        turn for turn in branch_turns
+        if int(turn.get("id") or 0) >= suffix_start_id or int(turn.get("id") or 0) in reply_chain_ids
+    ]
+    if filtered_branch_turns:
+        branch_turns = filtered_branch_turns
     return {
         "branch_id": branch_id,
         "root_turn_id": root_turn_id or latest_turn.get("id"),
         "latest_turn": _turn_anchor(latest_turn),
         "turn_ids": [int(turn.get("id") or 0) for turn in branch_turns],
         "turns": [_turn_anchor(turn) for turn in branch_turns[-8:]],
-        "reply_chain": _reply_chain_for_turn(latest_turn, turns_list, limit=8),
+        "reply_chain": reply_chain,
     }

package/ocmemog/runtime/memory/memory_links.py CHANGED Viewed

@@ -46,6 +46,12 @@ def _ensure_table(conn) -> None:
             "CREATE UNIQUE INDEX IF NOT EXISTS idx_memory_links_unique ON memory_links(source_reference, link_type, target_reference)"
         )
         conn.commit()
+    conn.execute(
+        "CREATE INDEX IF NOT EXISTS idx_memory_links_target_created_source ON memory_links(target_reference, created_at DESC, source_reference DESC)"
+    )
+    conn.execute(
+        "CREATE INDEX IF NOT EXISTS idx_memory_links_source_created_target ON memory_links(source_reference, created_at DESC, target_reference)"
+    )
 def add_memory_link(source_reference: str, link_type: str, target_reference: str) -> None:

package/ocmemog/runtime/memory/promote.py CHANGED Viewed

@@ -49,7 +49,26 @@ def _should_promote(confidence: float, threshold: float | None = None) -> bool:
 def _destination_table(summary: str) -> str:
     lowered = summary.lower()
-    if "runbook" in lowered or "procedure" in lowered or "steps" in lowered:
+    procedural_markers = (
+        "runbook",
+        "procedure",
+        "steps",
+        "checklist",
+        "how to",
+        "how-do-i",
+        "upgrade",
+        "recover",
+        "recovery",
+        "rollback",
+        "restart",
+        "validate",
+        "verification",
+        "diagnose",
+        "troubleshoot",
+        "fix by",
+        "safe way",
+    )
+    if any(marker in lowered for marker in procedural_markers):
         return "runbooks"
     if "lesson" in lowered or "postmortem" in lowered or "learned" in lowered:
         return "lessons"
@@ -64,6 +83,46 @@ def _normalized_text(text: str) -> str:
     return " ".join((text or "").strip().lower().split())
+def _looks_like_changelog_or_release_notes(text: str) -> bool:
+    lowered = _normalized_text(text)
+    if not lowered:
+        return False
+    changelog_markers = (
+        "thanks @",
+        "(#",
+        "ghsa-",
+        "release notes",
+        "changelog",
+        "breaking change",
+        "bootstrap:",
+        "security/",
+        "agents/",
+        "telegram:",
+        "discord/",
+        "slack/",
+        "providers/",
+        "install/",
+        "docker/",
+    )
+    bulletish = lowered.count(" - ") >= 2 or lowered.startswith("-")
+    return bulletish and any(marker in lowered for marker in changelog_markers)
+def _looks_like_docs_index_link_list(text: str) -> bool:
+    lowered = _normalized_text(text)
+    if not lowered:
+        return False
+    markers = (
+        "start with the docs index",
+        "architecture overview",
+        "full configuration reference",
+        "run the gateway by the book",
+        "learn how the control ui/web surfaces work",
+        "https://docs.openclaw.ai",
+    )
+    return lowered.count("https://docs.openclaw.ai") >= 2 and any(marker in lowered for marker in markers)
 def _is_redundant_generic_candidate(summary_text: str) -> bool:
     normalized = _normalized_text(summary_text)
     if not normalized:
@@ -83,6 +142,8 @@ def _is_redundant_generic_candidate(summary_text: str) -> bool:
 def _should_reject_as_cruft(*, confidence: float, threshold: float, destination: str, summary_text: str) -> bool:
+    if destination == "runbooks" and (_looks_like_changelog_or_release_notes(summary_text) or _looks_like_docs_index_link_list(summary_text)):
+        return True
     if destination != "knowledge" or confidence >= threshold:
         return False
     return bool(_normalized_text(summary_text))
@@ -189,6 +250,17 @@ def promote_candidate(candidate: Dict[str, Any]) -> Dict[str, Any]:
     candidate_metadata["candidate_id"] = candidate_id
     candidate_metadata["derived_from_candidate_id"] = candidate_id
     candidate_metadata["derived_via"] = "promotion"
+    # Preserve rehydration-critical anchors whenever available.
+    transcript_anchor = candidate_metadata.get("transcript_anchor")
+    if isinstance(transcript_anchor, dict):
+        path_value = transcript_anchor.get("path")
+        if path_value and not candidate_metadata.get("source_path"):
+            candidate_metadata["source_path"] = path_value
+        if not candidate_metadata.get("source_type"):
+            candidate_metadata["source_type"] = "transcript"
+    source_refs = candidate_metadata.get("source_references")
+    if isinstance(source_refs, list) and source_refs and not candidate_metadata.get("source_type"):
+        candidate_metadata["source_type"] = "derived"
     conn = store.connect()
     promotion_id = None