@simbimbo/memory-ocmemog 0.1.18 → 0.1.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/ocmemog/doctor.py +8 -1
- package/ocmemog/runtime/memory/api.py +81 -0
- package/ocmemog/runtime/memory/conversation_state.py +93 -5
- package/ocmemog/runtime/memory/memory_links.py +6 -0
- package/ocmemog/runtime/memory/promote.py +73 -1
- package/ocmemog/runtime/memory/retrieval.py +321 -4
- package/ocmemog/runtime/memory/store.py +11 -0
- package/ocmemog/runtime/memory/unresolved_state.py +91 -26
- package/ocmemog/sidecar/app.py +200 -21
- package/package.json +1 -1
- package/scripts/ocmemog-continuity-benchmark.py +20 -3
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,33 @@
|
|
|
2
2
|
|
|
3
3
|
## Unreleased
|
|
4
4
|
|
|
5
|
+
## 0.1.20 — 2026-03-29
|
|
6
|
+
|
|
7
|
+
Operational-artifact canonicalization, dead-lane retrieval hardening, and rehydratable-memory recall fixes.
|
|
8
|
+
|
|
9
|
+
### Highlights
|
|
10
|
+
- added first-class operational artifact canonicalization with idempotent upsert semantics for durable canonical memories keyed by artifact identity
|
|
11
|
+
- added a sidecar API route to canonicalize operational artifacts explicitly, including aliases, ownership, status, kind, and provenance/source references
|
|
12
|
+
- taught retrieval to recognize operational-artifact / dead-lane queries and strongly reward exact operational literals such as cron/job names and CLI module tokens
|
|
13
|
+
- reduced transcript/reflection noise for operational-artifact queries so canonical memories win over repeated run logs and scattered debug fragments
|
|
14
|
+
- added transcript claim-search as a bounded fallback surface for exact prior-claim recovery, alongside larger `/memory/context` transcript windows with provenance-anchor fallback
|
|
15
|
+
- added focused regression coverage for operational artifact canonicalization, claim-search/memory-context rehydration, and canonical top-answer retrieval behavior
|
|
16
|
+
- validated the architecture fix with a targeted release gate: `30 passed`
|
|
17
|
+
|
|
18
|
+
## 0.1.19 — 2026-03-29
|
|
19
|
+
|
|
20
|
+
Hydrate/resume hardening, unresolved-state main-DB consolidation, and retrieval/rehydration source-of-truth completion.
|
|
21
|
+
|
|
22
|
+
### Highlights
|
|
23
|
+
- eliminated expensive hydrate hot-path scans by adding release-critical indexes for linked-memory and unresolved-state lookups
|
|
24
|
+
- moved unresolved state into the main SQLite memory DB with compatibility import from legacy `unresolved_state.db`
|
|
25
|
+
- removed inline self-heal from `/conversation/hydrate` and kept hydrate read-mostly/fast by default
|
|
26
|
+
- disabled predictive brief generation on hydrate by default and added long-session hydrate guardrails for oversized scopes
|
|
27
|
+
- surfaced hydrate budget/warning metadata for long sessions and added doctor visibility for stale legacy unresolved-state DB residue
|
|
28
|
+
- completed the stranded retrieval/rehydration hardening lane: preserved canonical operator-facing `selected_because` semantics while retaining richer ranking signals, and validated canonical source-of-truth retrieval behavior
|
|
29
|
+
- added regression coverage for resume-latency query plans, long-session guardrails, unresolved-state main-DB migration, and updated retrieval explanation semantics
|
|
30
|
+
- validation passed across the combined retrieval + hydrate + migration + doctor suite, and the canonical `./scripts/ocmemog-release-check.sh` gate passed
|
|
31
|
+
|
|
5
32
|
## 0.1.17 — 2026-03-26
|
|
6
33
|
|
|
7
34
|
Promotion/governance observability, anti-cruft hardening, queue/runtime summary parity, and release validation recovery.
|
package/ocmemog/doctor.py
CHANGED
|
@@ -279,6 +279,8 @@ def _run_sqlite_schema(_: None) -> CheckResult:
|
|
|
279
279
|
tables = {row[0] for row in conn.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall()}
|
|
280
280
|
missing = sorted(required - tables)
|
|
281
281
|
quick = str(conn.execute("PRAGMA quick_check(1)").fetchone()[0] or "unknown")
|
|
282
|
+
legacy_unresolved_state_path = state_store.data_dir() / "unresolved_state.db"
|
|
283
|
+
legacy_unresolved_state_exists = legacy_unresolved_state_path.exists()
|
|
282
284
|
for table in sorted(required):
|
|
283
285
|
if table in missing:
|
|
284
286
|
continue
|
|
@@ -319,6 +321,7 @@ def _run_sqlite_schema(_: None) -> CheckResult:
|
|
|
319
321
|
"schema_version_expected": store.SCHEMA_VERSION,
|
|
320
322
|
"schema_versions": version_map,
|
|
321
323
|
"schema_version_issues": version_issues,
|
|
324
|
+
"legacy_unresolved_state_db": str(legacy_unresolved_state_path) if legacy_unresolved_state_exists else None,
|
|
322
325
|
}
|
|
323
326
|
else:
|
|
324
327
|
details = {
|
|
@@ -329,6 +332,7 @@ def _run_sqlite_schema(_: None) -> CheckResult:
|
|
|
329
332
|
"schema_version_expected": store.SCHEMA_VERSION,
|
|
330
333
|
"schema_versions": version_map,
|
|
331
334
|
"schema_version_issues": version_issues,
|
|
335
|
+
"legacy_unresolved_state_db": str(legacy_unresolved_state_path) if legacy_unresolved_state_exists else None,
|
|
332
336
|
}
|
|
333
337
|
if version_issues:
|
|
334
338
|
details["schema_version_issues"] = version_issues
|
|
@@ -361,11 +365,14 @@ def _run_sqlite_schema(_: None) -> CheckResult:
|
|
|
361
365
|
message="Schema metadata includes unexpected versions or schema column issues.",
|
|
362
366
|
details=details,
|
|
363
367
|
)
|
|
368
|
+
message = "SQLite schema and DB open state are healthy."
|
|
369
|
+
if legacy_unresolved_state_exists:
|
|
370
|
+
message = "SQLite schema and DB open state are healthy, but legacy unresolved_state.db still exists and should be cleaned up after migration verification."
|
|
364
371
|
return CheckResult(
|
|
365
372
|
key="sqlite/schema-access",
|
|
366
373
|
label="sqlite and schema",
|
|
367
374
|
status="ok",
|
|
368
|
-
message=
|
|
375
|
+
message=message,
|
|
369
376
|
details=details,
|
|
370
377
|
)
|
|
371
378
|
|
|
@@ -789,6 +789,87 @@ def find_contradiction_candidates(
|
|
|
789
789
|
return top
|
|
790
790
|
|
|
791
791
|
|
|
792
|
+
def canonicalize_operational_artifact(
|
|
793
|
+
*,
|
|
794
|
+
key: str,
|
|
795
|
+
summary: str,
|
|
796
|
+
aliases: Optional[List[str]] = None,
|
|
797
|
+
status: str = "active",
|
|
798
|
+
owner: Optional[str] = None,
|
|
799
|
+
artifact_kind: Optional[str] = None,
|
|
800
|
+
source_references: Optional[List[str]] = None,
|
|
801
|
+
source_labels: Optional[List[str]] = None,
|
|
802
|
+
) -> Dict[str, Any]:
|
|
803
|
+
normalized_key = str(key or "").strip().lower()
|
|
804
|
+
if not normalized_key:
|
|
805
|
+
return {"ok": False, "error": "missing_key"}
|
|
806
|
+
|
|
807
|
+
aliases = [str(item).strip() for item in (aliases or []) if str(item).strip()]
|
|
808
|
+
source_references = [str(item).strip() for item in (source_references or []) if str(item).strip()]
|
|
809
|
+
source_labels = [str(item).strip() for item in (source_labels or []) if str(item).strip()]
|
|
810
|
+
|
|
811
|
+
conn = store.connect()
|
|
812
|
+
existing_reference: Optional[str] = None
|
|
813
|
+
existing_row: Optional[Dict[str, Any]] = None
|
|
814
|
+
try:
|
|
815
|
+
rows = conn.execute(
|
|
816
|
+
"SELECT id, content, metadata_json FROM knowledge ORDER BY id DESC LIMIT 500"
|
|
817
|
+
).fetchall()
|
|
818
|
+
for row in rows:
|
|
819
|
+
memory_id = int(row["id"] if isinstance(row, dict) else row[0])
|
|
820
|
+
content = str(row["content"] if isinstance(row, dict) else row[1] or "")
|
|
821
|
+
raw_metadata = row["metadata_json"] if isinstance(row, dict) else row[2]
|
|
822
|
+
metadata = json.loads(raw_metadata or "{}")
|
|
823
|
+
prov = metadata.get("provenance") if isinstance(metadata.get("provenance"), dict) else {}
|
|
824
|
+
artifact_key = str(prov.get("artifact_key") or metadata.get("artifact_key") or "").strip().lower()
|
|
825
|
+
haystack = f"{content}\n{json.dumps(metadata, ensure_ascii=False)}".lower()
|
|
826
|
+
if artifact_key == normalized_key or normalized_key in haystack:
|
|
827
|
+
existing_reference = f"knowledge:{memory_id}"
|
|
828
|
+
existing_row = {"content": content, "metadata": metadata}
|
|
829
|
+
break
|
|
830
|
+
finally:
|
|
831
|
+
conn.close()
|
|
832
|
+
|
|
833
|
+
metadata = provenance.normalize_metadata({
|
|
834
|
+
"artifact_key": normalized_key,
|
|
835
|
+
"artifact_aliases": aliases,
|
|
836
|
+
"artifact_kind": artifact_kind or "operational_artifact",
|
|
837
|
+
"owner": owner or "openclaw",
|
|
838
|
+
"memory_status": status,
|
|
839
|
+
"canonical": True,
|
|
840
|
+
"source_references": source_references,
|
|
841
|
+
"source_labels": list(dict.fromkeys(source_labels + ["canonical-operational-artifact"])),
|
|
842
|
+
"derived_via": "operational_artifact_canonicalize",
|
|
843
|
+
}, source="operational_artifact")
|
|
844
|
+
|
|
845
|
+
if existing_reference:
|
|
846
|
+
provenance.force_update_memory_metadata(existing_reference, metadata)
|
|
847
|
+
parsed = _parse_memory_reference(existing_reference)
|
|
848
|
+
if parsed:
|
|
849
|
+
table, identifier = parsed
|
|
850
|
+
conn = store.connect()
|
|
851
|
+
try:
|
|
852
|
+
conn.execute(
|
|
853
|
+
f"UPDATE {table} SET content=?, metadata_json=? WHERE id=?",
|
|
854
|
+
(summary, json.dumps(provenance.normalize_metadata({**metadata, **(existing_row.get('metadata') if existing_row else {})}, source='operational_artifact'), ensure_ascii=False), int(identifier)),
|
|
855
|
+
)
|
|
856
|
+
conn.commit()
|
|
857
|
+
finally:
|
|
858
|
+
conn.close()
|
|
859
|
+
return {"ok": True, "reference": existing_reference, "created": False}
|
|
860
|
+
|
|
861
|
+
memory_id = store_memory(
|
|
862
|
+
"knowledge",
|
|
863
|
+
summary,
|
|
864
|
+
source="operational_artifact",
|
|
865
|
+
metadata=metadata,
|
|
866
|
+
post_process=True,
|
|
867
|
+
)
|
|
868
|
+
reference = f"knowledge:{memory_id}"
|
|
869
|
+
provenance.force_update_memory_metadata(reference, {"canonical_reference": reference, "memory_status": status, "artifact_key": normalized_key})
|
|
870
|
+
return {"ok": True, "reference": reference, "created": True}
|
|
871
|
+
|
|
872
|
+
|
|
792
873
|
def mark_memory_relationship(
|
|
793
874
|
reference: str,
|
|
794
875
|
*,
|
|
@@ -454,10 +454,17 @@ def _enrich_turn_metadata(
|
|
|
454
454
|
enriched["resolution"] = resolution
|
|
455
455
|
if reply_target is None:
|
|
456
456
|
reply_target = _get_turn_by_id(resolution.get("resolved_turn_id"))
|
|
457
|
+
lane_pivot = role == "user" and _looks_like_lane_pivot(content)
|
|
457
458
|
if reply_target:
|
|
458
459
|
reply_meta = _turn_meta(reply_target)
|
|
459
|
-
|
|
460
|
-
|
|
460
|
+
if lane_pivot:
|
|
461
|
+
branch_root_turn_id = int(reply_target.get("id") or 0) or None
|
|
462
|
+
branch_id = f"pivot:{branch_root_turn_id or (message_id or 'unknown')}"
|
|
463
|
+
enriched["lane_pivot"] = True
|
|
464
|
+
enriched["lane_pivot_from_turn_id"] = int(reply_target.get("id") or 0) or None
|
|
465
|
+
else:
|
|
466
|
+
branch_root_turn_id = int(reply_meta.get("branch_root_turn_id") or reply_target.get("id") or 0) or None
|
|
467
|
+
branch_id = str(reply_meta.get("branch_id") or f"branch:{branch_root_turn_id or reply_target.get('id')}")
|
|
461
468
|
enriched["reply_to_turn_id"] = int(reply_target.get("id") or 0) or None
|
|
462
469
|
enriched["reply_to_reference"] = reply_target.get("reference")
|
|
463
470
|
if reply_target.get("message_id"):
|
|
@@ -465,7 +472,7 @@ def _enrich_turn_metadata(
|
|
|
465
472
|
if branch_root_turn_id:
|
|
466
473
|
enriched["branch_root_turn_id"] = branch_root_turn_id
|
|
467
474
|
enriched["branch_id"] = branch_id
|
|
468
|
-
enriched["branch_depth"] = int(reply_meta.get("branch_depth") or 0) + 1
|
|
475
|
+
enriched["branch_depth"] = int(reply_meta.get("branch_depth") or 0) + 1 if not lane_pivot else 1
|
|
469
476
|
elif message_id and "branch_id" not in enriched:
|
|
470
477
|
enriched["branch_id"] = f"message:{message_id}"
|
|
471
478
|
enriched["branch_depth"] = 0
|
|
@@ -485,6 +492,30 @@ def _effective_turn_content(turn: Optional[Dict[str, Any]]) -> Optional[str]:
|
|
|
485
492
|
return content or None
|
|
486
493
|
|
|
487
494
|
|
|
495
|
+
def _looks_like_lane_pivot(text: str) -> bool:
|
|
496
|
+
lowered = _normalize_conversation_text(text).lower()
|
|
497
|
+
if not lowered:
|
|
498
|
+
return False
|
|
499
|
+
return any(
|
|
500
|
+
token in lowered
|
|
501
|
+
for token in (
|
|
502
|
+
"before we continue",
|
|
503
|
+
"let's pause",
|
|
504
|
+
"lets pause",
|
|
505
|
+
"back to",
|
|
506
|
+
"move back",
|
|
507
|
+
"return to",
|
|
508
|
+
"failing us",
|
|
509
|
+
"before we move back",
|
|
510
|
+
"pause for one second",
|
|
511
|
+
"task list for",
|
|
512
|
+
"can you show me",
|
|
513
|
+
"what did we just fix",
|
|
514
|
+
"what is the task list",
|
|
515
|
+
)
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
|
|
488
519
|
def _reply_chain_for_turn(turn: Optional[Dict[str, Any]], turns: Sequence[Dict[str, Any]], *, limit: int = 6) -> List[Dict[str, Any]]:
|
|
489
520
|
if not turn:
|
|
490
521
|
return []
|
|
@@ -501,7 +532,21 @@ def _reply_chain_for_turn(turn: Optional[Dict[str, Any]], turns: Sequence[Dict[s
|
|
|
501
532
|
break
|
|
502
533
|
seen.add(reply_to_turn_id)
|
|
503
534
|
current = lookup.get(reply_to_turn_id) or _get_turn_by_id(reply_to_turn_id)
|
|
504
|
-
|
|
535
|
+
chain = list(reversed(chain))
|
|
536
|
+
|
|
537
|
+
# Trim temporary side-answer prefixes when the later cluster clearly returns to the foreground lane.
|
|
538
|
+
if len(chain) >= 4:
|
|
539
|
+
for idx in range(len(chain) - 3):
|
|
540
|
+
first = chain[idx]
|
|
541
|
+
second = chain[idx + 1]
|
|
542
|
+
third = chain[idx + 2]
|
|
543
|
+
first_text = _normalize_conversation_text(str(first.get("content") or "").strip()).lower()
|
|
544
|
+
second_text = _normalize_conversation_text(str(second.get("content") or "").strip()).lower()
|
|
545
|
+
third_text = _normalize_conversation_text(str(third.get("content") or "").strip()).lower()
|
|
546
|
+
if first.get("role") == "assistant" and second.get("role") == "user" and third.get("role") == "assistant":
|
|
547
|
+
if any(token in first_text for token in ("recent", "repo work", "list", "includes", "task list", "show me")) and any(token in second_text for token in ("great", "proceed", "task list", "back to", "continue")):
|
|
548
|
+
return chain[idx + 1 :]
|
|
549
|
+
return chain
|
|
505
550
|
|
|
506
551
|
|
|
507
552
|
def _active_branch_payload(turns: Sequence[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
|
|
@@ -519,13 +564,56 @@ def _active_branch_payload(turns: Sequence[Dict[str, Any]]) -> Optional[Dict[str
|
|
|
519
564
|
]
|
|
520
565
|
if not branch_turns:
|
|
521
566
|
branch_turns = [latest_turn]
|
|
567
|
+
|
|
568
|
+
reply_chain = _reply_chain_for_turn(latest_turn, turns_list, limit=8)
|
|
569
|
+
reply_chain_ids = [int(item.get("id") or 0) for item in reply_chain if int(item.get("id") or 0) > 0]
|
|
570
|
+
latest_turn_id = int(latest_turn.get("id") or 0)
|
|
571
|
+
|
|
572
|
+
# When a later user/assistant cluster explicitly pivots or returns to a lane,
|
|
573
|
+
# prefer the suffix of the branch starting at the most recent non-reply turn that
|
|
574
|
+
# follows the earlier adjacent lane. This keeps fluid topic switches from dragging
|
|
575
|
+
# previous foreground work into the active branch payload.
|
|
576
|
+
suffix_start_id = reply_chain_ids[0] if reply_chain_ids else latest_turn_id
|
|
577
|
+
for turn in reversed(branch_turns):
|
|
578
|
+
turn_id = int(turn.get("id") or 0)
|
|
579
|
+
if turn_id <= 0 or turn_id >= latest_turn_id:
|
|
580
|
+
continue
|
|
581
|
+
meta = _turn_meta(turn)
|
|
582
|
+
if meta.get("reply_to_turn_id"):
|
|
583
|
+
continue
|
|
584
|
+
if turn.get("role") != "user":
|
|
585
|
+
continue
|
|
586
|
+
turn_text = _normalize_conversation_text(str(turn.get("content") or "").strip()).lower()
|
|
587
|
+
if any(token in turn_text for token in ("before we continue", "let's pause", "move back", "back to", "failing us", "return to", "resume", "task list for", "can you show me", "what did we just fix", "what is the task list")):
|
|
588
|
+
suffix_start_id = turn_id
|
|
589
|
+
break
|
|
590
|
+
|
|
591
|
+
# If the latest reply chain is the user explicitly returning after a temporary side answer,
|
|
592
|
+
# do not keep the side-answer assistant turn as foreground branch context.
|
|
593
|
+
if len(reply_chain_ids) >= 2:
|
|
594
|
+
first_reply_id = reply_chain_ids[0]
|
|
595
|
+
first_reply_turn = next((turn for turn in branch_turns if int(turn.get("id") or 0) == first_reply_id), None)
|
|
596
|
+
if first_reply_turn and first_reply_turn.get("role") == "assistant":
|
|
597
|
+
first_reply_text = _normalize_conversation_text(str(first_reply_turn.get("content") or "").strip()).lower()
|
|
598
|
+
if any(token in first_reply_text for token in ("recent", "repo work", "list", "includes", "task list", "show me")):
|
|
599
|
+
user_followup_id = reply_chain_ids[1] if len(reply_chain_ids) > 1 else None
|
|
600
|
+
if user_followup_id:
|
|
601
|
+
suffix_start_id = max(suffix_start_id, user_followup_id)
|
|
602
|
+
|
|
603
|
+
filtered_branch_turns = [
|
|
604
|
+
turn for turn in branch_turns
|
|
605
|
+
if int(turn.get("id") or 0) >= suffix_start_id or int(turn.get("id") or 0) in reply_chain_ids
|
|
606
|
+
]
|
|
607
|
+
if filtered_branch_turns:
|
|
608
|
+
branch_turns = filtered_branch_turns
|
|
609
|
+
|
|
522
610
|
return {
|
|
523
611
|
"branch_id": branch_id,
|
|
524
612
|
"root_turn_id": root_turn_id or latest_turn.get("id"),
|
|
525
613
|
"latest_turn": _turn_anchor(latest_turn),
|
|
526
614
|
"turn_ids": [int(turn.get("id") or 0) for turn in branch_turns],
|
|
527
615
|
"turns": [_turn_anchor(turn) for turn in branch_turns[-8:]],
|
|
528
|
-
"reply_chain":
|
|
616
|
+
"reply_chain": reply_chain,
|
|
529
617
|
}
|
|
530
618
|
|
|
531
619
|
|
|
@@ -46,6 +46,12 @@ def _ensure_table(conn) -> None:
|
|
|
46
46
|
"CREATE UNIQUE INDEX IF NOT EXISTS idx_memory_links_unique ON memory_links(source_reference, link_type, target_reference)"
|
|
47
47
|
)
|
|
48
48
|
conn.commit()
|
|
49
|
+
conn.execute(
|
|
50
|
+
"CREATE INDEX IF NOT EXISTS idx_memory_links_target_created_source ON memory_links(target_reference, created_at DESC, source_reference DESC)"
|
|
51
|
+
)
|
|
52
|
+
conn.execute(
|
|
53
|
+
"CREATE INDEX IF NOT EXISTS idx_memory_links_source_created_target ON memory_links(source_reference, created_at DESC, target_reference)"
|
|
54
|
+
)
|
|
49
55
|
|
|
50
56
|
|
|
51
57
|
def add_memory_link(source_reference: str, link_type: str, target_reference: str) -> None:
|
|
@@ -49,7 +49,26 @@ def _should_promote(confidence: float, threshold: float | None = None) -> bool:
|
|
|
49
49
|
|
|
50
50
|
def _destination_table(summary: str) -> str:
|
|
51
51
|
lowered = summary.lower()
|
|
52
|
-
|
|
52
|
+
procedural_markers = (
|
|
53
|
+
"runbook",
|
|
54
|
+
"procedure",
|
|
55
|
+
"steps",
|
|
56
|
+
"checklist",
|
|
57
|
+
"how to",
|
|
58
|
+
"how-do-i",
|
|
59
|
+
"upgrade",
|
|
60
|
+
"recover",
|
|
61
|
+
"recovery",
|
|
62
|
+
"rollback",
|
|
63
|
+
"restart",
|
|
64
|
+
"validate",
|
|
65
|
+
"verification",
|
|
66
|
+
"diagnose",
|
|
67
|
+
"troubleshoot",
|
|
68
|
+
"fix by",
|
|
69
|
+
"safe way",
|
|
70
|
+
)
|
|
71
|
+
if any(marker in lowered for marker in procedural_markers):
|
|
53
72
|
return "runbooks"
|
|
54
73
|
if "lesson" in lowered or "postmortem" in lowered or "learned" in lowered:
|
|
55
74
|
return "lessons"
|
|
@@ -64,6 +83,46 @@ def _normalized_text(text: str) -> str:
|
|
|
64
83
|
return " ".join((text or "").strip().lower().split())
|
|
65
84
|
|
|
66
85
|
|
|
86
|
+
def _looks_like_changelog_or_release_notes(text: str) -> bool:
|
|
87
|
+
lowered = _normalized_text(text)
|
|
88
|
+
if not lowered:
|
|
89
|
+
return False
|
|
90
|
+
changelog_markers = (
|
|
91
|
+
"thanks @",
|
|
92
|
+
"(#",
|
|
93
|
+
"ghsa-",
|
|
94
|
+
"release notes",
|
|
95
|
+
"changelog",
|
|
96
|
+
"breaking change",
|
|
97
|
+
"bootstrap:",
|
|
98
|
+
"security/",
|
|
99
|
+
"agents/",
|
|
100
|
+
"telegram:",
|
|
101
|
+
"discord/",
|
|
102
|
+
"slack/",
|
|
103
|
+
"providers/",
|
|
104
|
+
"install/",
|
|
105
|
+
"docker/",
|
|
106
|
+
)
|
|
107
|
+
bulletish = lowered.count(" - ") >= 2 or lowered.startswith("-")
|
|
108
|
+
return bulletish and any(marker in lowered for marker in changelog_markers)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _looks_like_docs_index_link_list(text: str) -> bool:
|
|
112
|
+
lowered = _normalized_text(text)
|
|
113
|
+
if not lowered:
|
|
114
|
+
return False
|
|
115
|
+
markers = (
|
|
116
|
+
"start with the docs index",
|
|
117
|
+
"architecture overview",
|
|
118
|
+
"full configuration reference",
|
|
119
|
+
"run the gateway by the book",
|
|
120
|
+
"learn how the control ui/web surfaces work",
|
|
121
|
+
"https://docs.openclaw.ai",
|
|
122
|
+
)
|
|
123
|
+
return lowered.count("https://docs.openclaw.ai") >= 2 and any(marker in lowered for marker in markers)
|
|
124
|
+
|
|
125
|
+
|
|
67
126
|
def _is_redundant_generic_candidate(summary_text: str) -> bool:
|
|
68
127
|
normalized = _normalized_text(summary_text)
|
|
69
128
|
if not normalized:
|
|
@@ -83,6 +142,8 @@ def _is_redundant_generic_candidate(summary_text: str) -> bool:
|
|
|
83
142
|
|
|
84
143
|
|
|
85
144
|
def _should_reject_as_cruft(*, confidence: float, threshold: float, destination: str, summary_text: str) -> bool:
|
|
145
|
+
if destination == "runbooks" and (_looks_like_changelog_or_release_notes(summary_text) or _looks_like_docs_index_link_list(summary_text)):
|
|
146
|
+
return True
|
|
86
147
|
if destination != "knowledge" or confidence >= threshold:
|
|
87
148
|
return False
|
|
88
149
|
return bool(_normalized_text(summary_text))
|
|
@@ -189,6 +250,17 @@ def promote_candidate(candidate: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
189
250
|
candidate_metadata["candidate_id"] = candidate_id
|
|
190
251
|
candidate_metadata["derived_from_candidate_id"] = candidate_id
|
|
191
252
|
candidate_metadata["derived_via"] = "promotion"
|
|
253
|
+
# Preserve rehydration-critical anchors whenever available.
|
|
254
|
+
transcript_anchor = candidate_metadata.get("transcript_anchor")
|
|
255
|
+
if isinstance(transcript_anchor, dict):
|
|
256
|
+
path_value = transcript_anchor.get("path")
|
|
257
|
+
if path_value and not candidate_metadata.get("source_path"):
|
|
258
|
+
candidate_metadata["source_path"] = path_value
|
|
259
|
+
if not candidate_metadata.get("source_type"):
|
|
260
|
+
candidate_metadata["source_type"] = "transcript"
|
|
261
|
+
source_refs = candidate_metadata.get("source_references")
|
|
262
|
+
if isinstance(source_refs, list) and source_refs and not candidate_metadata.get("source_type"):
|
|
263
|
+
candidate_metadata["source_type"] = "derived"
|
|
192
264
|
|
|
193
265
|
conn = store.connect()
|
|
194
266
|
promotion_id = None
|