@simbimbo/memory-ocmemog 0.1.18 → 0.1.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,33 @@
2
2
 
3
3
  ## Unreleased
4
4
 
5
+ ## 0.1.20 — 2026-03-29
6
+
7
+ Operational-artifact canonicalization, dead-lane retrieval hardening, and rehydratable-memory recall fixes.
8
+
9
+ ### Highlights
10
+ - added first-class operational artifact canonicalization with idempotent upsert semantics for durable canonical memories keyed by artifact identity
11
+ - added a sidecar API route to canonicalize operational artifacts explicitly, including aliases, ownership, status, kind, and provenance/source references
12
+ - taught retrieval to recognize operational-artifact / dead-lane queries and strongly reward exact operational literals such as cron/job names and CLI module tokens
13
+ - reduced transcript/reflection noise for operational-artifact queries so canonical memories win over repeated run logs and scattered debug fragments
14
+ - added transcript claim-search as a bounded fallback surface for exact prior-claim recovery, alongside larger `/memory/context` transcript windows with provenance-anchor fallback
15
+ - added focused regression coverage for operational artifact canonicalization, claim-search/memory-context rehydration, and canonical top-answer retrieval behavior
16
+ - validated the architecture fix with a targeted release gate: `30 passed`
17
+
18
+ ## 0.1.19 — 2026-03-29
19
+
20
+ Hydrate/resume hardening, unresolved-state main-DB consolidation, and retrieval/rehydration source-of-truth completion.
21
+
22
+ ### Highlights
23
+ - eliminated expensive hydrate hot-path scans by adding release-critical indexes for linked-memory and unresolved-state lookups
24
+ - moved unresolved state into the main SQLite memory DB with compatibility import from legacy `unresolved_state.db`
25
+ - removed inline self-heal from `/conversation/hydrate` and kept hydrate read-mostly/fast by default
26
+ - disabled predictive brief generation on hydrate by default and added long-session hydrate guardrails for oversized scopes
27
+ - surfaced hydrate budget/warning metadata for long sessions and added doctor visibility for stale legacy unresolved-state DB residue
28
+ - completed the stranded retrieval/rehydration hardening lane: preserved canonical operator-facing `selected_because` semantics while retaining richer ranking signals, and validated canonical source-of-truth retrieval behavior
29
+ - added regression coverage for resume-latency query plans, long-session guardrails, unresolved-state main-DB migration, and updated retrieval explanation semantics
30
+ - validation passed across the combined retrieval + hydrate + migration + doctor suite, and the canonical `./scripts/ocmemog-release-check.sh` gate passed
31
+
5
32
  ## 0.1.17 — 2026-03-26
6
33
 
7
34
  Promotion/governance observability, anti-cruft hardening, queue/runtime summary parity, and release validation recovery.
package/ocmemog/doctor.py CHANGED
@@ -279,6 +279,8 @@ def _run_sqlite_schema(_: None) -> CheckResult:
279
279
  tables = {row[0] for row in conn.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall()}
280
280
  missing = sorted(required - tables)
281
281
  quick = str(conn.execute("PRAGMA quick_check(1)").fetchone()[0] or "unknown")
282
+ legacy_unresolved_state_path = state_store.data_dir() / "unresolved_state.db"
283
+ legacy_unresolved_state_exists = legacy_unresolved_state_path.exists()
282
284
  for table in sorted(required):
283
285
  if table in missing:
284
286
  continue
@@ -319,6 +321,7 @@ def _run_sqlite_schema(_: None) -> CheckResult:
319
321
  "schema_version_expected": store.SCHEMA_VERSION,
320
322
  "schema_versions": version_map,
321
323
  "schema_version_issues": version_issues,
324
+ "legacy_unresolved_state_db": str(legacy_unresolved_state_path) if legacy_unresolved_state_exists else None,
322
325
  }
323
326
  else:
324
327
  details = {
@@ -329,6 +332,7 @@ def _run_sqlite_schema(_: None) -> CheckResult:
329
332
  "schema_version_expected": store.SCHEMA_VERSION,
330
333
  "schema_versions": version_map,
331
334
  "schema_version_issues": version_issues,
335
+ "legacy_unresolved_state_db": str(legacy_unresolved_state_path) if legacy_unresolved_state_exists else None,
332
336
  }
333
337
  if version_issues:
334
338
  details["schema_version_issues"] = version_issues
@@ -361,11 +365,14 @@ def _run_sqlite_schema(_: None) -> CheckResult:
361
365
  message="Schema metadata includes unexpected versions or schema column issues.",
362
366
  details=details,
363
367
  )
368
+ message = "SQLite schema and DB open state are healthy."
369
+ if legacy_unresolved_state_exists:
370
+ message = "SQLite schema and DB open state are healthy, but legacy unresolved_state.db still exists and should be cleaned up after migration verification."
364
371
  return CheckResult(
365
372
  key="sqlite/schema-access",
366
373
  label="sqlite and schema",
367
374
  status="ok",
368
- message="SQLite schema and DB open state are healthy.",
375
+ message=message,
369
376
  details=details,
370
377
  )
371
378
 
@@ -789,6 +789,87 @@ def find_contradiction_candidates(
789
789
  return top
790
790
 
791
791
 
792
+ def canonicalize_operational_artifact(
793
+ *,
794
+ key: str,
795
+ summary: str,
796
+ aliases: Optional[List[str]] = None,
797
+ status: str = "active",
798
+ owner: Optional[str] = None,
799
+ artifact_kind: Optional[str] = None,
800
+ source_references: Optional[List[str]] = None,
801
+ source_labels: Optional[List[str]] = None,
802
+ ) -> Dict[str, Any]:
803
+ normalized_key = str(key or "").strip().lower()
804
+ if not normalized_key:
805
+ return {"ok": False, "error": "missing_key"}
806
+
807
+ aliases = [str(item).strip() for item in (aliases or []) if str(item).strip()]
808
+ source_references = [str(item).strip() for item in (source_references or []) if str(item).strip()]
809
+ source_labels = [str(item).strip() for item in (source_labels or []) if str(item).strip()]
810
+
811
+ conn = store.connect()
812
+ existing_reference: Optional[str] = None
813
+ existing_row: Optional[Dict[str, Any]] = None
814
+ try:
815
+ rows = conn.execute(
816
+ "SELECT id, content, metadata_json FROM knowledge ORDER BY id DESC LIMIT 500"
817
+ ).fetchall()
818
+ for row in rows:
819
+ memory_id = int(row["id"] if isinstance(row, dict) else row[0])
820
+ content = str(row["content"] if isinstance(row, dict) else row[1] or "")
821
+ raw_metadata = row["metadata_json"] if isinstance(row, dict) else row[2]
822
+ metadata = json.loads(raw_metadata or "{}")
823
+ prov = metadata.get("provenance") if isinstance(metadata.get("provenance"), dict) else {}
824
+ artifact_key = str(prov.get("artifact_key") or metadata.get("artifact_key") or "").strip().lower()
825
+ haystack = f"{content}\n{json.dumps(metadata, ensure_ascii=False)}".lower()
826
+ if artifact_key == normalized_key or normalized_key in haystack:
827
+ existing_reference = f"knowledge:{memory_id}"
828
+ existing_row = {"content": content, "metadata": metadata}
829
+ break
830
+ finally:
831
+ conn.close()
832
+
833
+ metadata = provenance.normalize_metadata({
834
+ "artifact_key": normalized_key,
835
+ "artifact_aliases": aliases,
836
+ "artifact_kind": artifact_kind or "operational_artifact",
837
+ "owner": owner or "openclaw",
838
+ "memory_status": status,
839
+ "canonical": True,
840
+ "source_references": source_references,
841
+ "source_labels": list(dict.fromkeys(source_labels + ["canonical-operational-artifact"])),
842
+ "derived_via": "operational_artifact_canonicalize",
843
+ }, source="operational_artifact")
844
+
845
+ if existing_reference:
846
+ provenance.force_update_memory_metadata(existing_reference, metadata)
847
+ parsed = _parse_memory_reference(existing_reference)
848
+ if parsed:
849
+ table, identifier = parsed
850
+ conn = store.connect()
851
+ try:
852
+ conn.execute(
853
+ f"UPDATE {table} SET content=?, metadata_json=? WHERE id=?",
854
+ (summary, json.dumps(provenance.normalize_metadata({**metadata, **(existing_row.get('metadata') if existing_row else {})}, source='operational_artifact'), ensure_ascii=False), int(identifier)),
855
+ )
856
+ conn.commit()
857
+ finally:
858
+ conn.close()
859
+ return {"ok": True, "reference": existing_reference, "created": False}
860
+
861
+ memory_id = store_memory(
862
+ "knowledge",
863
+ summary,
864
+ source="operational_artifact",
865
+ metadata=metadata,
866
+ post_process=True,
867
+ )
868
+ reference = f"knowledge:{memory_id}"
869
+ provenance.force_update_memory_metadata(reference, {"canonical_reference": reference, "memory_status": status, "artifact_key": normalized_key})
870
+ return {"ok": True, "reference": reference, "created": True}
871
+
872
+
792
873
  def mark_memory_relationship(
793
874
  reference: str,
794
875
  *,
@@ -454,10 +454,17 @@ def _enrich_turn_metadata(
454
454
  enriched["resolution"] = resolution
455
455
  if reply_target is None:
456
456
  reply_target = _get_turn_by_id(resolution.get("resolved_turn_id"))
457
+ lane_pivot = role == "user" and _looks_like_lane_pivot(content)
457
458
  if reply_target:
458
459
  reply_meta = _turn_meta(reply_target)
459
- branch_root_turn_id = int(reply_meta.get("branch_root_turn_id") or reply_target.get("id") or 0) or None
460
- branch_id = str(reply_meta.get("branch_id") or f"branch:{branch_root_turn_id or reply_target.get('id')}")
460
+ if lane_pivot:
461
+ branch_root_turn_id = int(reply_target.get("id") or 0) or None
462
+ branch_id = f"pivot:{branch_root_turn_id or (message_id or 'unknown')}"
463
+ enriched["lane_pivot"] = True
464
+ enriched["lane_pivot_from_turn_id"] = int(reply_target.get("id") or 0) or None
465
+ else:
466
+ branch_root_turn_id = int(reply_meta.get("branch_root_turn_id") or reply_target.get("id") or 0) or None
467
+ branch_id = str(reply_meta.get("branch_id") or f"branch:{branch_root_turn_id or reply_target.get('id')}")
461
468
  enriched["reply_to_turn_id"] = int(reply_target.get("id") or 0) or None
462
469
  enriched["reply_to_reference"] = reply_target.get("reference")
463
470
  if reply_target.get("message_id"):
@@ -465,7 +472,7 @@ def _enrich_turn_metadata(
465
472
  if branch_root_turn_id:
466
473
  enriched["branch_root_turn_id"] = branch_root_turn_id
467
474
  enriched["branch_id"] = branch_id
468
- enriched["branch_depth"] = int(reply_meta.get("branch_depth") or 0) + 1
475
+ enriched["branch_depth"] = int(reply_meta.get("branch_depth") or 0) + 1 if not lane_pivot else 1
469
476
  elif message_id and "branch_id" not in enriched:
470
477
  enriched["branch_id"] = f"message:{message_id}"
471
478
  enriched["branch_depth"] = 0
@@ -485,6 +492,30 @@ def _effective_turn_content(turn: Optional[Dict[str, Any]]) -> Optional[str]:
485
492
  return content or None
486
493
 
487
494
 
495
+ def _looks_like_lane_pivot(text: str) -> bool:
496
+ lowered = _normalize_conversation_text(text).lower()
497
+ if not lowered:
498
+ return False
499
+ return any(
500
+ token in lowered
501
+ for token in (
502
+ "before we continue",
503
+ "let's pause",
504
+ "lets pause",
505
+ "back to",
506
+ "move back",
507
+ "return to",
508
+ "failing us",
509
+ "before we move back",
510
+ "pause for one second",
511
+ "task list for",
512
+ "can you show me",
513
+ "what did we just fix",
514
+ "what is the task list",
515
+ )
516
+ )
517
+
518
+
488
519
  def _reply_chain_for_turn(turn: Optional[Dict[str, Any]], turns: Sequence[Dict[str, Any]], *, limit: int = 6) -> List[Dict[str, Any]]:
489
520
  if not turn:
490
521
  return []
@@ -501,7 +532,21 @@ def _reply_chain_for_turn(turn: Optional[Dict[str, Any]], turns: Sequence[Dict[s
501
532
  break
502
533
  seen.add(reply_to_turn_id)
503
534
  current = lookup.get(reply_to_turn_id) or _get_turn_by_id(reply_to_turn_id)
504
- return list(reversed(chain))
535
+ chain = list(reversed(chain))
536
+
537
+ # Trim temporary side-answer prefixes when the later cluster clearly returns to the foreground lane.
538
+ if len(chain) >= 4:
539
+ for idx in range(len(chain) - 3):
540
+ first = chain[idx]
541
+ second = chain[idx + 1]
542
+ third = chain[idx + 2]
543
+ first_text = _normalize_conversation_text(str(first.get("content") or "").strip()).lower()
544
+ second_text = _normalize_conversation_text(str(second.get("content") or "").strip()).lower()
545
+ third_text = _normalize_conversation_text(str(third.get("content") or "").strip()).lower()
546
+ if first.get("role") == "assistant" and second.get("role") == "user" and third.get("role") == "assistant":
547
+ if any(token in first_text for token in ("recent", "repo work", "list", "includes", "task list", "show me")) and any(token in second_text for token in ("great", "proceed", "task list", "back to", "continue")):
548
+ return chain[idx + 1 :]
549
+ return chain
505
550
 
506
551
 
507
552
  def _active_branch_payload(turns: Sequence[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
@@ -519,13 +564,56 @@ def _active_branch_payload(turns: Sequence[Dict[str, Any]]) -> Optional[Dict[str
519
564
  ]
520
565
  if not branch_turns:
521
566
  branch_turns = [latest_turn]
567
+
568
+ reply_chain = _reply_chain_for_turn(latest_turn, turns_list, limit=8)
569
+ reply_chain_ids = [int(item.get("id") or 0) for item in reply_chain if int(item.get("id") or 0) > 0]
570
+ latest_turn_id = int(latest_turn.get("id") or 0)
571
+
572
+ # When a later user/assistant cluster explicitly pivots or returns to a lane,
573
+ # prefer the suffix of the branch starting at the most recent non-reply turn that
574
+ # follows the earlier adjacent lane. This keeps fluid topic switches from dragging
575
+ # previous foreground work into the active branch payload.
576
+ suffix_start_id = reply_chain_ids[0] if reply_chain_ids else latest_turn_id
577
+ for turn in reversed(branch_turns):
578
+ turn_id = int(turn.get("id") or 0)
579
+ if turn_id <= 0 or turn_id >= latest_turn_id:
580
+ continue
581
+ meta = _turn_meta(turn)
582
+ if meta.get("reply_to_turn_id"):
583
+ continue
584
+ if turn.get("role") != "user":
585
+ continue
586
+ turn_text = _normalize_conversation_text(str(turn.get("content") or "").strip()).lower()
587
+ if any(token in turn_text for token in ("before we continue", "let's pause", "move back", "back to", "failing us", "return to", "resume", "task list for", "can you show me", "what did we just fix", "what is the task list")):
588
+ suffix_start_id = turn_id
589
+ break
590
+
591
+ # If the latest reply chain is the user explicitly returning after a temporary side answer,
592
+ # do not keep the side-answer assistant turn as foreground branch context.
593
+ if len(reply_chain_ids) >= 2:
594
+ first_reply_id = reply_chain_ids[0]
595
+ first_reply_turn = next((turn for turn in branch_turns if int(turn.get("id") or 0) == first_reply_id), None)
596
+ if first_reply_turn and first_reply_turn.get("role") == "assistant":
597
+ first_reply_text = _normalize_conversation_text(str(first_reply_turn.get("content") or "").strip()).lower()
598
+ if any(token in first_reply_text for token in ("recent", "repo work", "list", "includes", "task list", "show me")):
599
+ user_followup_id = reply_chain_ids[1] if len(reply_chain_ids) > 1 else None
600
+ if user_followup_id:
601
+ suffix_start_id = max(suffix_start_id, user_followup_id)
602
+
603
+ filtered_branch_turns = [
604
+ turn for turn in branch_turns
605
+ if int(turn.get("id") or 0) >= suffix_start_id or int(turn.get("id") or 0) in reply_chain_ids
606
+ ]
607
+ if filtered_branch_turns:
608
+ branch_turns = filtered_branch_turns
609
+
522
610
  return {
523
611
  "branch_id": branch_id,
524
612
  "root_turn_id": root_turn_id or latest_turn.get("id"),
525
613
  "latest_turn": _turn_anchor(latest_turn),
526
614
  "turn_ids": [int(turn.get("id") or 0) for turn in branch_turns],
527
615
  "turns": [_turn_anchor(turn) for turn in branch_turns[-8:]],
528
- "reply_chain": _reply_chain_for_turn(latest_turn, turns_list, limit=8),
616
+ "reply_chain": reply_chain,
529
617
  }
530
618
 
531
619
 
@@ -46,6 +46,12 @@ def _ensure_table(conn) -> None:
46
46
  "CREATE UNIQUE INDEX IF NOT EXISTS idx_memory_links_unique ON memory_links(source_reference, link_type, target_reference)"
47
47
  )
48
48
  conn.commit()
49
+ conn.execute(
50
+ "CREATE INDEX IF NOT EXISTS idx_memory_links_target_created_source ON memory_links(target_reference, created_at DESC, source_reference DESC)"
51
+ )
52
+ conn.execute(
53
+ "CREATE INDEX IF NOT EXISTS idx_memory_links_source_created_target ON memory_links(source_reference, created_at DESC, target_reference)"
54
+ )
49
55
 
50
56
 
51
57
  def add_memory_link(source_reference: str, link_type: str, target_reference: str) -> None:
@@ -49,7 +49,26 @@ def _should_promote(confidence: float, threshold: float | None = None) -> bool:
49
49
 
50
50
  def _destination_table(summary: str) -> str:
51
51
  lowered = summary.lower()
52
- if "runbook" in lowered or "procedure" in lowered or "steps" in lowered:
52
+ procedural_markers = (
53
+ "runbook",
54
+ "procedure",
55
+ "steps",
56
+ "checklist",
57
+ "how to",
58
+ "how-do-i",
59
+ "upgrade",
60
+ "recover",
61
+ "recovery",
62
+ "rollback",
63
+ "restart",
64
+ "validate",
65
+ "verification",
66
+ "diagnose",
67
+ "troubleshoot",
68
+ "fix by",
69
+ "safe way",
70
+ )
71
+ if any(marker in lowered for marker in procedural_markers):
53
72
  return "runbooks"
54
73
  if "lesson" in lowered or "postmortem" in lowered or "learned" in lowered:
55
74
  return "lessons"
@@ -64,6 +83,46 @@ def _normalized_text(text: str) -> str:
64
83
  return " ".join((text or "").strip().lower().split())
65
84
 
66
85
 
86
+ def _looks_like_changelog_or_release_notes(text: str) -> bool:
87
+ lowered = _normalized_text(text)
88
+ if not lowered:
89
+ return False
90
+ changelog_markers = (
91
+ "thanks @",
92
+ "(#",
93
+ "ghsa-",
94
+ "release notes",
95
+ "changelog",
96
+ "breaking change",
97
+ "bootstrap:",
98
+ "security/",
99
+ "agents/",
100
+ "telegram:",
101
+ "discord/",
102
+ "slack/",
103
+ "providers/",
104
+ "install/",
105
+ "docker/",
106
+ )
107
+ bulletish = lowered.count(" - ") >= 2 or lowered.startswith("-")
108
+ return bulletish and any(marker in lowered for marker in changelog_markers)
109
+
110
+
111
+ def _looks_like_docs_index_link_list(text: str) -> bool:
112
+ lowered = _normalized_text(text)
113
+ if not lowered:
114
+ return False
115
+ markers = (
116
+ "start with the docs index",
117
+ "architecture overview",
118
+ "full configuration reference",
119
+ "run the gateway by the book",
120
+ "learn how the control ui/web surfaces work",
121
+ "https://docs.openclaw.ai",
122
+ )
123
+ return lowered.count("https://docs.openclaw.ai") >= 2 and any(marker in lowered for marker in markers)
124
+
125
+
67
126
  def _is_redundant_generic_candidate(summary_text: str) -> bool:
68
127
  normalized = _normalized_text(summary_text)
69
128
  if not normalized:
@@ -83,6 +142,8 @@ def _is_redundant_generic_candidate(summary_text: str) -> bool:
83
142
 
84
143
 
85
144
  def _should_reject_as_cruft(*, confidence: float, threshold: float, destination: str, summary_text: str) -> bool:
145
+ if destination == "runbooks" and (_looks_like_changelog_or_release_notes(summary_text) or _looks_like_docs_index_link_list(summary_text)):
146
+ return True
86
147
  if destination != "knowledge" or confidence >= threshold:
87
148
  return False
88
149
  return bool(_normalized_text(summary_text))
@@ -189,6 +250,17 @@ def promote_candidate(candidate: Dict[str, Any]) -> Dict[str, Any]:
189
250
  candidate_metadata["candidate_id"] = candidate_id
190
251
  candidate_metadata["derived_from_candidate_id"] = candidate_id
191
252
  candidate_metadata["derived_via"] = "promotion"
253
+ # Preserve rehydration-critical anchors whenever available.
254
+ transcript_anchor = candidate_metadata.get("transcript_anchor")
255
+ if isinstance(transcript_anchor, dict):
256
+ path_value = transcript_anchor.get("path")
257
+ if path_value and not candidate_metadata.get("source_path"):
258
+ candidate_metadata["source_path"] = path_value
259
+ if not candidate_metadata.get("source_type"):
260
+ candidate_metadata["source_type"] = "transcript"
261
+ source_refs = candidate_metadata.get("source_references")
262
+ if isinstance(source_refs, list) and source_refs and not candidate_metadata.get("source_type"):
263
+ candidate_metadata["source_type"] = "derived"
192
264
 
193
265
  conn = store.connect()
194
266
  promotion_id = None