@simbimbo/memory-ocmemog 0.1.11 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/CHANGELOG.md +30 -0
  2. package/README.md +83 -18
  3. package/brain/runtime/__init__.py +2 -12
  4. package/brain/runtime/config.py +1 -24
  5. package/brain/runtime/inference.py +1 -151
  6. package/brain/runtime/instrumentation.py +1 -15
  7. package/brain/runtime/memory/__init__.py +3 -13
  8. package/brain/runtime/memory/api.py +1 -1219
  9. package/brain/runtime/memory/candidate.py +1 -185
  10. package/brain/runtime/memory/conversation_state.py +1 -1823
  11. package/brain/runtime/memory/distill.py +1 -344
  12. package/brain/runtime/memory/embedding_engine.py +1 -92
  13. package/brain/runtime/memory/freshness.py +1 -112
  14. package/brain/runtime/memory/health.py +1 -40
  15. package/brain/runtime/memory/integrity.py +1 -186
  16. package/brain/runtime/memory/memory_consolidation.py +1 -58
  17. package/brain/runtime/memory/memory_links.py +1 -107
  18. package/brain/runtime/memory/memory_salience.py +1 -233
  19. package/brain/runtime/memory/memory_synthesis.py +1 -31
  20. package/brain/runtime/memory/memory_taxonomy.py +1 -33
  21. package/brain/runtime/memory/pondering_engine.py +1 -654
  22. package/brain/runtime/memory/promote.py +1 -277
  23. package/brain/runtime/memory/provenance.py +1 -406
  24. package/brain/runtime/memory/reinforcement.py +1 -71
  25. package/brain/runtime/memory/retrieval.py +1 -210
  26. package/brain/runtime/memory/semantic_search.py +1 -64
  27. package/brain/runtime/memory/store.py +1 -429
  28. package/brain/runtime/memory/unresolved_state.py +1 -91
  29. package/brain/runtime/memory/vector_index.py +1 -323
  30. package/brain/runtime/model_roles.py +1 -9
  31. package/brain/runtime/model_router.py +1 -22
  32. package/brain/runtime/providers.py +1 -66
  33. package/brain/runtime/security/redaction.py +1 -12
  34. package/brain/runtime/state_store.py +1 -23
  35. package/brain/runtime/storage_paths.py +1 -39
  36. package/docs/architecture/memory.md +20 -24
  37. package/docs/release-checklist.md +19 -6
  38. package/docs/usage.md +33 -17
  39. package/index.ts +8 -1
  40. package/ocmemog/__init__.py +11 -0
  41. package/ocmemog/doctor.py +1255 -0
  42. package/ocmemog/runtime/__init__.py +18 -0
  43. package/ocmemog/runtime/_compat_bridge.py +28 -0
  44. package/ocmemog/runtime/config.py +34 -0
  45. package/ocmemog/runtime/identity.py +115 -0
  46. package/ocmemog/runtime/inference.py +163 -0
  47. package/ocmemog/runtime/instrumentation.py +20 -0
  48. package/ocmemog/runtime/memory/__init__.py +91 -0
  49. package/ocmemog/runtime/memory/api.py +1594 -0
  50. package/ocmemog/runtime/memory/candidate.py +192 -0
  51. package/ocmemog/runtime/memory/conversation_state.py +1831 -0
  52. package/ocmemog/runtime/memory/distill.py +282 -0
  53. package/ocmemog/runtime/memory/embedding_engine.py +151 -0
  54. package/ocmemog/runtime/memory/freshness.py +114 -0
  55. package/ocmemog/runtime/memory/health.py +93 -0
  56. package/ocmemog/runtime/memory/integrity.py +208 -0
  57. package/ocmemog/runtime/memory/memory_consolidation.py +60 -0
  58. package/ocmemog/runtime/memory/memory_links.py +109 -0
  59. package/ocmemog/runtime/memory/memory_salience.py +235 -0
  60. package/ocmemog/runtime/memory/memory_synthesis.py +33 -0
  61. package/ocmemog/runtime/memory/memory_taxonomy.py +35 -0
  62. package/ocmemog/runtime/memory/pondering_engine.py +681 -0
  63. package/ocmemog/runtime/memory/promote.py +279 -0
  64. package/ocmemog/runtime/memory/provenance.py +408 -0
  65. package/ocmemog/runtime/memory/reinforcement.py +73 -0
  66. package/ocmemog/runtime/memory/retrieval.py +224 -0
  67. package/ocmemog/runtime/memory/semantic_search.py +66 -0
  68. package/ocmemog/runtime/memory/store.py +433 -0
  69. package/ocmemog/runtime/memory/unresolved_state.py +93 -0
  70. package/ocmemog/runtime/memory/vector_index.py +411 -0
  71. package/ocmemog/runtime/model_roles.py +15 -0
  72. package/ocmemog/runtime/model_router.py +28 -0
  73. package/ocmemog/runtime/providers.py +78 -0
  74. package/ocmemog/runtime/roles.py +92 -0
  75. package/ocmemog/runtime/security/__init__.py +8 -0
  76. package/ocmemog/runtime/security/redaction.py +17 -0
  77. package/ocmemog/runtime/state_store.py +32 -0
  78. package/ocmemog/runtime/storage_paths.py +70 -0
  79. package/ocmemog/sidecar/app.py +421 -60
  80. package/ocmemog/sidecar/compat.py +50 -13
  81. package/ocmemog/sidecar/transcript_watcher.py +327 -242
  82. package/openclaw.plugin.json +4 -0
  83. package/package.json +1 -1
  84. package/scripts/ocmemog-backfill-vectors.py +5 -3
  85. package/scripts/ocmemog-continuity-benchmark.py +1 -1
  86. package/scripts/ocmemog-demo.py +1 -1
  87. package/scripts/ocmemog-doctor.py +15 -0
  88. package/scripts/ocmemog-install.sh +29 -7
  89. package/scripts/ocmemog-integrated-proof.py +374 -0
  90. package/scripts/ocmemog-reindex-vectors.py +5 -3
  91. package/scripts/ocmemog-release-check.sh +330 -0
  92. package/scripts/ocmemog-sidecar.sh +4 -2
  93. package/scripts/ocmemog-test-rig.py +5 -3
  94. package/brain/runtime/memory/artifacts.py +0 -33
  95. package/brain/runtime/memory/context_builder.py +0 -112
  96. package/brain/runtime/memory/interaction_memory.py +0 -57
  97. package/brain/runtime/memory/memory_gate.py +0 -38
  98. package/brain/runtime/memory/memory_graph.py +0 -54
  99. package/brain/runtime/memory/person_identity.py +0 -83
  100. package/brain/runtime/memory/person_memory.py +0 -138
  101. package/brain/runtime/memory/sentiment_memory.py +0 -67
  102. package/brain/runtime/memory/tool_catalog.py +0 -68
@@ -1,1825 +1,3 @@
1
1
  from __future__ import annotations
2
2
 
3
- import json
4
- import os
5
- import re
6
- from typing import Any, Dict, List, Optional, Sequence, Tuple
7
-
8
- from brain.runtime import state_store
9
- from brain.runtime.instrumentation import emit_event
10
- from brain.runtime.memory import memory_links, memory_salience, provenance, store, unresolved_state
11
-
12
- _ALLOWED_MEMORY_TABLES = {*store.MEMORY_TABLES, "candidates", "promotions"}
13
- LOGFILE = state_store.reports_dir() / "brain_memory.log.jsonl"
14
- _COMMITMENT_RE = re.compile(
15
- r"\b(i(?:'m| am)? going to|i will|i'll|let me|i can(?:\s+now)?|next,? i(?:'ll| will)|i should be able to)\b",
16
- re.IGNORECASE,
17
- )
18
- _CHECKPOINT_EVERY = max(0, int(os.environ.get("OCMEMOG_CONVERSATION_CHECKPOINT_EVERY", "6") or "6"))
19
- _MAX_STATE_TURNS = max(6, int(os.environ.get("OCMEMOG_CONVERSATION_STATE_TURNS", "24") or "24"))
20
- _SHORT_REPLY_NORMALIZED = {
21
- "yes",
22
- "yeah",
23
- "yep",
24
- "sure",
25
- "ok",
26
- "okay",
27
- "do it",
28
- "go ahead",
29
- "sounds good",
30
- "lets do it",
31
- "let us do it",
32
- }
33
- _NEGATIVE_SHORT_REPLY_NORMALIZED = {"no", "nope", "not now", "dont", "do not"}
34
- _INTERNAL_CONTINUITY_PATTERNS = [
35
- re.compile(r"^Memory continuity \(auto-hydrated by ocmemog\):", re.IGNORECASE),
36
- re.compile(r"^Pre-compaction memory flush\.", re.IGNORECASE),
37
- re.compile(r"^Current time:", re.IGNORECASE),
38
- ]
39
- _INTERNAL_CONTINUITY_LINE_PREFIXES = (
40
- "Latest user ask:",
41
- "Last assistant commitment:",
42
- "Open loops:",
43
- "Pending actions:",
44
- "Recent turns:",
45
- "Linked memories:",
46
- "Checkpoint:",
47
- "Sender (untrusted metadata):",
48
- )
49
- _REPLY_TAG_RE = re.compile(r"\[\[\s*reply_to(?::[^\]]+|_current)?\s*\]\]", re.IGNORECASE)
50
- _SENDER_BLOCK_RE = re.compile(r"Sender \(untrusted metadata\):\s*```[\s\S]*?```", re.IGNORECASE)
51
- _TIMESTAMP_PREFIX_RE = re.compile(r"^\[[^\]]+\]\s*")
52
- _TIMESTAMP_MARKER_RE = re.compile(r"\[[^\]]+\]\s*")
53
-
54
-
55
- def _looks_like_internal_continuity_text(text: str) -> bool:
56
- raw = (text or "").strip()
57
- if not raw:
58
- return False
59
- if any(pattern.search(raw) for pattern in _INTERNAL_CONTINUITY_PATTERNS):
60
- return True
61
- if raw.startswith("- Checkpoint:") or raw.startswith("Checkpoint:"):
62
- return True
63
- marker_hits = sum(1 for prefix in _INTERNAL_CONTINUITY_LINE_PREFIXES if prefix in raw)
64
- return marker_hits >= 2
65
-
66
-
67
- def _strip_internal_continuity_text(text: str) -> str:
68
- raw = (text or "").strip()
69
- if not raw:
70
- return ""
71
- lines = []
72
- for line in raw.splitlines():
73
- stripped = line.strip()
74
- if not stripped:
75
- continue
76
- if any(pattern.search(stripped) for pattern in _INTERNAL_CONTINUITY_PATTERNS):
77
- continue
78
- if any(stripped.startswith(prefix) or stripped.startswith(f"- {prefix}") for prefix in _INTERNAL_CONTINUITY_LINE_PREFIXES):
79
- continue
80
- if stripped.startswith("```") or stripped == "```":
81
- continue
82
- lines.append(stripped)
83
- cleaned = re.sub(r"\s+", " ", " ".join(lines)).strip()
84
- return cleaned
85
-
86
-
87
- def _normalize_conversation_text(text: str) -> str:
88
- cleaned = (text or "").strip()
89
- if not cleaned:
90
- return ""
91
- cleaned = _SENDER_BLOCK_RE.sub(" ", cleaned)
92
- cleaned = _REPLY_TAG_RE.sub(" ", cleaned)
93
- cleaned = _strip_internal_continuity_text(cleaned)
94
- cleaned = re.sub(r"```[\s\S]*?```", " ", cleaned)
95
- timestamp_matches = list(_TIMESTAMP_MARKER_RE.finditer(cleaned))
96
- if timestamp_matches:
97
- cleaned = cleaned[timestamp_matches[-1].end():]
98
- cleaned = _TIMESTAMP_PREFIX_RE.sub("", cleaned).strip()
99
- cleaned = re.sub(r"^[\-:\s]+", "", cleaned)
100
- cleaned = re.sub(r"\s+", " ", cleaned).strip()
101
- return cleaned
102
-
103
-
104
- def _checkpoint_summary_is_polluted(summary: str) -> bool:
105
- text = (summary or "").strip()
106
- if not text:
107
- return False
108
- if _REPLY_TAG_RE.search(text) or "Sender (untrusted metadata):" in text:
109
- return True
110
- if len(_TIMESTAMP_MARKER_RE.findall(text)) >= 2:
111
- return True
112
- if '{ "label": "openclaw-tui' in text or '{"label":"openclaw-tui' in text:
113
- return True
114
- if len(text) > 700:
115
- return True
116
- if "assistant committed:" in text and len(text) > 280:
117
- return True
118
- return False
119
-
120
-
121
- def _build_memory_layers(
122
- *,
123
- turns: Sequence[Dict[str, Any]],
124
- latest_user_turn: Optional[Dict[str, Any]],
125
- latest_commitment_turn: Optional[Dict[str, Any]],
126
- linked_memories: Sequence[Dict[str, Any]],
127
- ) -> Dict[str, Any]:
128
- transcript_layer = {
129
- "kind": "raw_transcript",
130
- "turn_count": len(turns),
131
- "latest_turn_reference": f"conversation_turns:{turns[-1]['id']}" if turns else None,
132
- }
133
- working_state_layer = {
134
- "kind": "working_state",
135
- "latest_user_ask": _effective_turn_content(latest_user_turn) if latest_user_turn else None,
136
- "last_assistant_commitment": _effective_turn_content(latest_commitment_turn) if latest_commitment_turn else None,
137
- }
138
- durable_memory_layer = {
139
- "kind": "durable_memory",
140
- "linked_memory_count": len(linked_memories),
141
- "references": [str(item.get("reference") or "") for item in linked_memories[:5] if item.get("reference")],
142
- }
143
- return {
144
- "raw_transcript": transcript_layer,
145
- "working_state": working_state_layer,
146
- "durable_memory": durable_memory_layer,
147
- }
148
-
149
-
150
- def _context_quality(
151
- *,
152
- turns: Sequence[Dict[str, Any]],
153
- latest_checkpoint: Optional[Dict[str, Any]],
154
- linked_memories: Sequence[Dict[str, Any]],
155
- summary_text: str,
156
- ) -> Dict[str, Any]:
157
- issues: list[str] = []
158
- score = 1.0
159
- duplicate_turn_text = 0
160
- seen = set()
161
- for turn in turns:
162
- text = (_effective_turn_content(turn) or "").strip().lower()
163
- if not text:
164
- continue
165
- if text in seen:
166
- duplicate_turn_text += 1
167
- else:
168
- seen.add(text)
169
- if duplicate_turn_text:
170
- issues.append("duplicate_turn_text")
171
- score -= min(0.2, duplicate_turn_text * 0.05)
172
- if latest_checkpoint and _checkpoint_summary_is_polluted(str(latest_checkpoint.get("summary") or "")):
173
- issues.append("polluted_checkpoint")
174
- score -= 0.35
175
- if summary_text and len(summary_text) > 280:
176
- issues.append("oversized_summary")
177
- score -= 0.15
178
- if len(linked_memories) > 5:
179
- issues.append("memory_overlinking")
180
- score -= 0.1
181
- band = "good"
182
- if score < 0.75:
183
- band = "degraded"
184
- if score < 0.45:
185
- band = "poor"
186
- return {
187
- "score": round(max(0.0, score), 3),
188
- "band": band,
189
- "issues": issues,
190
- }
191
-
192
-
193
- def _state_from_payload(
194
- state_payload: Dict[str, Any],
195
- *,
196
- conversation_id: Optional[str],
197
- session_id: Optional[str],
198
- thread_id: Optional[str],
199
- ) -> Dict[str, Any]:
200
- latest_user_turn = state_payload.get("latest_user_turn") or {}
201
- latest_assistant_turn = state_payload.get("latest_assistant_turn") or {}
202
- latest_user_ask = state_payload.get("latest_user_intent") or state_payload.get("latest_user_ask") or {}
203
- latest_checkpoint = state_payload.get("latest_checkpoint") or {}
204
- return {
205
- "id": None,
206
- "scope_type": _scope_parts(conversation_id=conversation_id, session_id=session_id, thread_id=thread_id)[0],
207
- "scope_id": _scope_parts(conversation_id=conversation_id, session_id=session_id, thread_id=thread_id)[1],
208
- "conversation_id": conversation_id,
209
- "session_id": session_id,
210
- "thread_id": thread_id,
211
- "latest_user_turn_id": latest_user_turn.get("id"),
212
- "latest_assistant_turn_id": latest_assistant_turn.get("id"),
213
- "latest_user_ask": latest_user_ask.get("effective_content") or latest_user_ask.get("content"),
214
- "last_assistant_commitment": (state_payload.get("last_assistant_commitment") or {}).get("content"),
215
- "open_loops": state_payload.get("open_loops") or [],
216
- "pending_actions": state_payload.get("pending_actions") or [],
217
- "unresolved_state": state_payload.get("unresolved_state") or [],
218
- "latest_checkpoint_id": latest_checkpoint.get("id"),
219
- "metadata": {
220
- "summary_text": state_payload.get("summary_text"),
221
- "active_branch": state_payload.get("active_branch"),
222
- "latest_user_intent": state_payload.get("latest_user_intent"),
223
- "state_status": "derived_not_persisted",
224
- },
225
- "updated_at": None,
226
- }
227
-
228
-
229
-
230
- def _scope_parts(
231
- *,
232
- conversation_id: Optional[str] = None,
233
- session_id: Optional[str] = None,
234
- thread_id: Optional[str] = None,
235
- ) -> tuple[Optional[str], Optional[str]]:
236
- if thread_id:
237
- return "thread", thread_id
238
- if session_id:
239
- return "session", session_id
240
- if conversation_id:
241
- return "conversation", conversation_id
242
- return None, None
243
-
244
-
245
- def _scope_target_refs(
246
- *,
247
- conversation_id: Optional[str] = None,
248
- session_id: Optional[str] = None,
249
- thread_id: Optional[str] = None,
250
- ) -> List[str]:
251
- refs: List[str] = []
252
- if thread_id:
253
- refs.append(f"thread:{thread_id}")
254
- if session_id:
255
- refs.append(f"session:{session_id}")
256
- if conversation_id:
257
- refs.append(f"conversation:{conversation_id}")
258
- return refs
259
-
260
-
261
- def _scope_where(
262
- *,
263
- conversation_id: Optional[str] = None,
264
- session_id: Optional[str] = None,
265
- thread_id: Optional[str] = None,
266
- upto_turn_id: Optional[int] = None,
267
- ) -> tuple[str, List[Any]]:
268
- filters = []
269
- params: List[Any] = []
270
- if thread_id:
271
- filters.append("thread_id = ?")
272
- params.append(thread_id)
273
- elif session_id:
274
- filters.append("session_id = ?")
275
- params.append(session_id)
276
- elif conversation_id:
277
- filters.append("conversation_id = ?")
278
- params.append(conversation_id)
279
- if upto_turn_id is not None:
280
- filters.append("id <= ?")
281
- params.append(int(upto_turn_id))
282
- where = f" WHERE {' AND '.join(filters)}" if filters else ""
283
- return where, params
284
-
285
-
286
- def _normalized_reply_text(text: str) -> str:
287
- return re.sub(r"[^a-z0-9]+", " ", (text or "").strip().lower()).strip()
288
-
289
-
290
- def _turn_meta(turn: Optional[Dict[str, Any]]) -> Dict[str, Any]:
291
- if not turn:
292
- return {}
293
- meta = turn.get("metadata") or {}
294
- return dict(meta) if isinstance(meta, dict) else {}
295
-
296
-
297
- def _is_ambiguous_short_reply(text: str) -> bool:
298
- normalized = _normalized_reply_text(text)
299
- if not normalized:
300
- return False
301
- tokens = normalized.split()
302
- if len(tokens) > 4 or len(normalized) > 24:
303
- return False
304
- return normalized in _SHORT_REPLY_NORMALIZED or normalized in _NEGATIVE_SHORT_REPLY_NORMALIZED
305
-
306
-
307
- def _find_turn_by_message_id(
308
- message_id: Optional[str],
309
- *,
310
- conversation_id: Optional[str] = None,
311
- session_id: Optional[str] = None,
312
- thread_id: Optional[str] = None,
313
- upto_turn_id: Optional[int] = None,
314
- ) -> Optional[Dict[str, Any]]:
315
- if not message_id:
316
- return None
317
- where, params = _scope_where(
318
- conversation_id=conversation_id,
319
- session_id=session_id,
320
- thread_id=thread_id,
321
- upto_turn_id=upto_turn_id,
322
- )
323
- query = f"SELECT * FROM conversation_turns{where}{' AND ' if where else ' WHERE '}message_id = ? ORDER BY id DESC LIMIT 1"
324
- conn = store.connect()
325
- try:
326
- row = conn.execute(query, (*params, message_id)).fetchone()
327
- finally:
328
- conn.close()
329
- turns = _rows_to_turns([row] if row else [])
330
- return turns[0] if turns else None
331
-
332
-
333
- def _get_turn_by_id(turn_id: Optional[int]) -> Optional[Dict[str, Any]]:
334
- if not turn_id:
335
- return None
336
- conn = store.connect()
337
- try:
338
- row = conn.execute("SELECT * FROM conversation_turns WHERE id = ?", (int(turn_id),)).fetchone()
339
- finally:
340
- conn.close()
341
- turns = _rows_to_turns([row] if row else [])
342
- return turns[0] if turns else None
343
-
344
-
345
- def _resolve_explicit_reply_target(
346
- metadata: Dict[str, Any],
347
- prior_turns: Sequence[Dict[str, Any]],
348
- *,
349
- conversation_id: Optional[str] = None,
350
- session_id: Optional[str] = None,
351
- thread_id: Optional[str] = None,
352
- ) -> Optional[Dict[str, Any]]:
353
- reply_to_turn_id = metadata.get("reply_to_turn_id")
354
- if isinstance(reply_to_turn_id, int):
355
- for turn in reversed(prior_turns):
356
- if int(turn.get("id") or 0) == reply_to_turn_id:
357
- return turn
358
- found = _get_turn_by_id(reply_to_turn_id)
359
- if found:
360
- return found
361
- reply_to_message_id = metadata.get("reply_to_message_id") or metadata.get("parent_message_id")
362
- if isinstance(reply_to_message_id, str) and reply_to_message_id.strip():
363
- for turn in reversed(prior_turns):
364
- if turn.get("message_id") == reply_to_message_id:
365
- return turn
366
- found = _find_turn_by_message_id(
367
- reply_to_message_id,
368
- conversation_id=conversation_id,
369
- session_id=session_id,
370
- thread_id=thread_id,
371
- )
372
- if found:
373
- return found
374
- return None
375
-
376
-
377
- def _infer_short_reply_resolution(
378
- turn_content: str,
379
- prior_turns: Sequence[Dict[str, Any]],
380
- *,
381
- reply_target: Optional[Dict[str, Any]] = None,
382
- ) -> Optional[Dict[str, Any]]:
383
- if not _is_ambiguous_short_reply(turn_content):
384
- return None
385
- normalized = _normalized_reply_text(turn_content)
386
- referent = reply_target
387
- if referent is None:
388
- referent = _assistant_commitment(prior_turns) or _latest_turn_by_role(prior_turns, "assistant")
389
- if not referent:
390
- return None
391
- referent_content = _normalize_conversation_text(str(referent.get("content") or "").strip())
392
- if not referent_content:
393
- return None
394
- decision = "decline" if normalized in _NEGATIVE_SHORT_REPLY_NORMALIZED else "confirm"
395
- effective_summary = turn_content.strip()
396
- return {
397
- "kind": "short_reply_reference",
398
- "decision": decision,
399
- "reply_text": turn_content,
400
- "normalized_reply": normalized,
401
- "resolved_turn_id": referent.get("id"),
402
- "resolved_reference": referent.get("reference"),
403
- "resolved_message_id": referent.get("message_id"),
404
- "resolved_content": referent_content,
405
- "effective_summary": effective_summary,
406
- "user_intent_compact": True,
407
- }
408
-
409
-
410
- def _enrich_turn_metadata(
411
- *,
412
- role: str,
413
- content: str,
414
- conversation_id: Optional[str],
415
- session_id: Optional[str],
416
- thread_id: Optional[str],
417
- message_id: Optional[str],
418
- metadata: Optional[Dict[str, Any]],
419
- ) -> Dict[str, Any]:
420
- enriched = dict(metadata or {})
421
- prior_turns = get_recent_turns(
422
- conversation_id=conversation_id,
423
- session_id=session_id,
424
- thread_id=thread_id,
425
- limit=max(8, min(_MAX_STATE_TURNS, 32)),
426
- )
427
- reply_target = _resolve_explicit_reply_target(
428
- enriched,
429
- prior_turns,
430
- conversation_id=conversation_id,
431
- session_id=session_id,
432
- thread_id=thread_id,
433
- )
434
- if reply_target is None and prior_turns:
435
- last_turn = prior_turns[-1]
436
- if role == "assistant" and last_turn.get("role") == "user":
437
- reply_target = last_turn
438
- elif role == "user" and last_turn.get("role") == "assistant":
439
- reply_target = last_turn
440
- resolution = None
441
- if role == "user":
442
- resolution = _infer_short_reply_resolution(content, prior_turns, reply_target=reply_target)
443
- if resolution:
444
- enriched["resolution"] = resolution
445
- if reply_target is None:
446
- reply_target = _get_turn_by_id(resolution.get("resolved_turn_id"))
447
- if reply_target:
448
- reply_meta = _turn_meta(reply_target)
449
- branch_root_turn_id = int(reply_meta.get("branch_root_turn_id") or reply_target.get("id") or 0) or None
450
- branch_id = str(reply_meta.get("branch_id") or f"branch:{branch_root_turn_id or reply_target.get('id')}")
451
- enriched["reply_to_turn_id"] = int(reply_target.get("id") or 0) or None
452
- enriched["reply_to_reference"] = reply_target.get("reference")
453
- if reply_target.get("message_id"):
454
- enriched["reply_to_message_id"] = reply_target.get("message_id")
455
- if branch_root_turn_id:
456
- enriched["branch_root_turn_id"] = branch_root_turn_id
457
- enriched["branch_id"] = branch_id
458
- enriched["branch_depth"] = int(reply_meta.get("branch_depth") or 0) + 1
459
- elif message_id and "branch_id" not in enriched:
460
- enriched["branch_id"] = f"message:{message_id}"
461
- enriched["branch_depth"] = 0
462
- return enriched
463
-
464
-
465
- def _effective_turn_content(turn: Optional[Dict[str, Any]]) -> Optional[str]:
466
- if not turn:
467
- return None
468
- resolution = _turn_meta(turn).get("resolution") or {}
469
- effective = str(resolution.get("effective_summary") or "").strip()
470
- if effective and not _looks_like_internal_continuity_text(effective):
471
- normalized = _normalize_conversation_text(effective)
472
- if normalized:
473
- return normalized
474
- content = _normalize_conversation_text(str(turn.get("content") or "").strip())
475
- return content or None
476
-
477
-
478
- def _reply_chain_for_turn(turn: Optional[Dict[str, Any]], turns: Sequence[Dict[str, Any]], *, limit: int = 6) -> List[Dict[str, Any]]:
479
- if not turn:
480
- return []
481
- lookup = {int(item.get("id") or 0): item for item in turns if item.get("id") is not None}
482
- chain: List[Dict[str, Any]] = []
483
- current = turn
484
- seen: set[int] = set()
485
- while current and len(chain) < max(1, limit):
486
- anchor = _turn_anchor(current)
487
- if anchor:
488
- chain.append(anchor)
489
- reply_to_turn_id = _turn_meta(current).get("reply_to_turn_id")
490
- if not isinstance(reply_to_turn_id, int) or reply_to_turn_id in seen:
491
- break
492
- seen.add(reply_to_turn_id)
493
- current = lookup.get(reply_to_turn_id) or _get_turn_by_id(reply_to_turn_id)
494
- return list(reversed(chain))
495
-
496
-
497
- def _active_branch_payload(turns: Sequence[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
498
- turns_list = list(turns)
499
- if not turns_list:
500
- return None
501
- latest_turn = turns_list[-1]
502
- latest_meta = _turn_meta(latest_turn)
503
- root_turn_id = int(latest_meta.get("branch_root_turn_id") or latest_turn.get("id") or 0) or None
504
- branch_id = str(latest_meta.get("branch_id") or f"turn:{latest_turn.get('id')}")
505
- branch_turns = [
506
- turn for turn in turns_list
507
- if str(_turn_meta(turn).get("branch_id") or f"turn:{turn.get('id')}") == branch_id
508
- or (root_turn_id and int(turn.get("id") or 0) == root_turn_id)
509
- ]
510
- if not branch_turns:
511
- branch_turns = [latest_turn]
512
- return {
513
- "branch_id": branch_id,
514
- "root_turn_id": root_turn_id or latest_turn.get("id"),
515
- "latest_turn": _turn_anchor(latest_turn),
516
- "turn_ids": [int(turn.get("id") or 0) for turn in branch_turns],
517
- "turns": [_turn_anchor(turn) for turn in branch_turns[-8:]],
518
- "reply_chain": _reply_chain_for_turn(latest_turn, turns_list, limit=8),
519
- }
520
-
521
-
522
- def _ranked_turn_expansion(turns: Sequence[Dict[str, Any]], active_branch: Optional[Dict[str, Any]], *, limit: int = 12) -> List[Dict[str, Any]]:
523
- branch_id = str((active_branch or {}).get("branch_id") or "") or None
524
- reply_chain = active_branch.get("reply_chain") if isinstance(active_branch, dict) else []
525
- reply_chain_turn_ids = [int(item.get("id") or 0) for item in reply_chain if int(item.get("id") or 0) > 0]
526
- return memory_salience.rank_turns_by_salience(
527
- turns,
528
- active_branch_id=branch_id,
529
- reply_chain_turn_ids=reply_chain_turn_ids,
530
- limit=min(max(limit, 1), 50),
531
- )
532
-
533
-
534
- def _ranked_checkpoint_expansion(
535
- checkpoints: Sequence[Dict[str, Any]],
536
- active_branch: Optional[Dict[str, Any]],
537
- *,
538
- limit: int = 12,
539
- ) -> List[Dict[str, Any]]:
540
- branch_id = str((active_branch or {}).get("branch_id") or "") or None
541
- return memory_salience.rank_checkpoints_by_salience(
542
- checkpoints,
543
- active_branch_id=branch_id,
544
- limit=min(max(limit, 1), 50),
545
- )
546
-
547
-
548
- def _checkpoint_scope_filter(checkpoint: Dict[str, Any]) -> Dict[str, Optional[str]]:
549
- return {
550
- "conversation_id": checkpoint.get("conversation_id"),
551
- "session_id": checkpoint.get("session_id"),
552
- "thread_id": checkpoint.get("thread_id"),
553
- }
554
-
555
-
556
- def _get_turns_between_ids(
557
- start_id: int,
558
- end_id: int,
559
- *,
560
- conversation_id: Optional[str] = None,
561
- session_id: Optional[str] = None,
562
- thread_id: Optional[str] = None,
563
- limit: int = 200,
564
- ) -> List[Dict[str, Any]]:
565
- if end_id < start_id:
566
- return []
567
- where, params = _scope_where(
568
- conversation_id=conversation_id,
569
- session_id=session_id,
570
- thread_id=thread_id,
571
- )
572
- query = f"SELECT * FROM conversation_turns{where}{' AND ' if where else ' WHERE '}id BETWEEN ? AND ? ORDER BY id ASC LIMIT ?"
573
- conn = store.connect()
574
- try:
575
- rows = conn.execute(query, (*params, int(start_id), int(end_id), min(max(limit, 1), 500))).fetchall()
576
- finally:
577
- conn.close()
578
- return _rows_to_turns(rows)
579
-
580
-
581
- def record_turn(
582
- *,
583
- role: str,
584
- content: str,
585
- conversation_id: Optional[str] = None,
586
- session_id: Optional[str] = None,
587
- thread_id: Optional[str] = None,
588
- message_id: Optional[str] = None,
589
- transcript_path: Optional[str] = None,
590
- transcript_offset: Optional[int] = None,
591
- transcript_end_offset: Optional[int] = None,
592
- source: Optional[str] = None,
593
- timestamp: Optional[str] = None,
594
- metadata: Optional[Dict[str, Any]] = None,
595
- ) -> int:
596
- turn_role = (role or "unknown").strip().lower() or "unknown"
597
- raw_turn_content = (content or "").strip()
598
- if not raw_turn_content:
599
- raise ValueError("empty_turn_content")
600
- if _looks_like_internal_continuity_text(raw_turn_content):
601
- raise ValueError("internal_continuity_turn")
602
- turn_content = _normalize_conversation_text(raw_turn_content)
603
- if not turn_content:
604
- raise ValueError("empty_turn_content")
605
- enriched_metadata = _enrich_turn_metadata(
606
- role=turn_role,
607
- content=turn_content,
608
- conversation_id=conversation_id,
609
- session_id=session_id,
610
- thread_id=thread_id,
611
- message_id=message_id,
612
- metadata=metadata,
613
- )
614
-
615
- def _write() -> int:
616
- conn = store.connect()
617
- try:
618
- if message_id:
619
- row = conn.execute(
620
- """
621
- SELECT id, metadata_json, transcript_path, transcript_offset, transcript_end_offset
622
- FROM conversation_turns
623
- WHERE role = ? AND message_id = ?
624
- AND COALESCE(conversation_id, '') = COALESCE(?, '')
625
- AND COALESCE(session_id, '') = COALESCE(?, '')
626
- AND COALESCE(thread_id, '') = COALESCE(?, '')
627
- ORDER BY id DESC LIMIT 1
628
- """,
629
- (turn_role, message_id, conversation_id, session_id, thread_id),
630
- ).fetchone()
631
- if row is not None:
632
- try:
633
- existing_meta = json.loads(row["metadata_json"] or "{}")
634
- except Exception:
635
- existing_meta = {}
636
- merged_meta = {**existing_meta, **enriched_metadata}
637
- conn.execute(
638
- """
639
- UPDATE conversation_turns
640
- SET content = ?,
641
- transcript_path = COALESCE(?, transcript_path),
642
- transcript_offset = COALESCE(?, transcript_offset),
643
- transcript_end_offset = COALESCE(?, transcript_end_offset),
644
- source = COALESCE(?, source),
645
- metadata_json = ?
646
- WHERE id = ?
647
- """,
648
- (
649
- turn_content,
650
- transcript_path,
651
- transcript_offset,
652
- transcript_end_offset,
653
- source,
654
- json.dumps(merged_meta, ensure_ascii=False),
655
- int(row["id"]),
656
- ),
657
- )
658
- conn.commit()
659
- return int(row["id"])
660
- if timestamp:
661
- cur = conn.execute(
662
- """
663
- INSERT INTO conversation_turns (
664
- timestamp, conversation_id, session_id, thread_id, message_id,
665
- role, content, transcript_path, transcript_offset, transcript_end_offset,
666
- source, metadata_json, schema_version
667
- ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
668
- """,
669
- (
670
- timestamp,
671
- conversation_id,
672
- session_id,
673
- thread_id,
674
- message_id,
675
- turn_role,
676
- turn_content,
677
- transcript_path,
678
- transcript_offset,
679
- transcript_end_offset,
680
- source,
681
- json.dumps(enriched_metadata, ensure_ascii=False),
682
- store.SCHEMA_VERSION,
683
- ),
684
- )
685
- else:
686
- cur = conn.execute(
687
- """
688
- INSERT INTO conversation_turns (
689
- conversation_id, session_id, thread_id, message_id,
690
- role, content, transcript_path, transcript_offset, transcript_end_offset,
691
- source, metadata_json, schema_version
692
- ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
693
- """,
694
- (
695
- conversation_id,
696
- session_id,
697
- thread_id,
698
- message_id,
699
- turn_role,
700
- turn_content,
701
- transcript_path,
702
- transcript_offset,
703
- transcript_end_offset,
704
- source,
705
- json.dumps(enriched_metadata, ensure_ascii=False),
706
- store.SCHEMA_VERSION,
707
- ),
708
- )
709
- conn.commit()
710
- return int(cur.lastrowid)
711
- finally:
712
- conn.close()
713
-
714
- turn_id = int(store.submit_write(_write, timeout=30.0))
715
- try:
716
- refresh_state(
717
- conversation_id=conversation_id,
718
- session_id=session_id,
719
- thread_id=thread_id,
720
- )
721
- if _CHECKPOINT_EVERY > 0:
722
- counts = get_turn_counts(conversation_id=conversation_id, session_id=session_id, thread_id=thread_id)
723
- if counts["total"] > 0 and counts["total"] % _CHECKPOINT_EVERY == 0:
724
- latest = get_latest_checkpoint(conversation_id=conversation_id, session_id=session_id, thread_id=thread_id)
725
- if not latest or int(latest.get("turn_end_id") or 0) < turn_id:
726
- create_checkpoint(
727
- conversation_id=conversation_id,
728
- session_id=session_id,
729
- thread_id=thread_id,
730
- upto_turn_id=turn_id,
731
- checkpoint_kind="rolling",
732
- )
733
- except Exception as exc:
734
- emit_event(
735
- LOGFILE,
736
- "brain_conversation_turn_post_write_maintenance_failed",
737
- status="warn",
738
- error=str(exc),
739
- turn_id=turn_id,
740
- )
741
- emit_event(LOGFILE, "brain_conversation_turn_recorded", status="ok", role=turn_role, turn_id=turn_id)
742
- return turn_id
743
-
744
-
745
- def _rows_to_turns(rows) -> List[Dict[str, Any]]:
746
- items: List[Dict[str, Any]] = []
747
- for row in rows:
748
- try:
749
- meta = json.loads(row["metadata_json"] or "{}")
750
- except Exception:
751
- meta = {}
752
- items.append(
753
- {
754
- "id": int(row["id"]),
755
- "reference": f"conversation_turns:{row['id']}",
756
- "timestamp": row["timestamp"],
757
- "conversation_id": row["conversation_id"],
758
- "session_id": row["session_id"],
759
- "thread_id": row["thread_id"],
760
- "message_id": row["message_id"],
761
- "role": row["role"],
762
- "content": row["content"],
763
- "transcript_path": row["transcript_path"],
764
- "transcript_offset": row["transcript_offset"],
765
- "transcript_end_offset": row["transcript_end_offset"],
766
- "source": row["source"],
767
- "metadata": meta,
768
- }
769
- )
770
- return items
771
-
772
-
773
- def get_recent_turns(
774
- *,
775
- conversation_id: Optional[str] = None,
776
- session_id: Optional[str] = None,
777
- thread_id: Optional[str] = None,
778
- limit: int = 20,
779
- upto_turn_id: Optional[int] = None,
780
- ) -> List[Dict[str, Any]]:
781
- where, params = _scope_where(
782
- conversation_id=conversation_id,
783
- session_id=session_id,
784
- thread_id=thread_id,
785
- upto_turn_id=upto_turn_id,
786
- )
787
- query = f"SELECT * FROM conversation_turns{where} ORDER BY id DESC LIMIT ?"
788
- params.append(min(max(limit, 1), 200))
789
-
790
- conn = store.connect()
791
- try:
792
- rows = conn.execute(query, tuple(params)).fetchall()
793
- finally:
794
- conn.close()
795
- return list(reversed(_rows_to_turns(rows)))
796
-
797
-
798
- def get_turn_counts(
799
- *,
800
- conversation_id: Optional[str] = None,
801
- session_id: Optional[str] = None,
802
- thread_id: Optional[str] = None,
803
- ) -> Dict[str, int]:
804
- where, params = _scope_where(
805
- conversation_id=conversation_id,
806
- session_id=session_id,
807
- thread_id=thread_id,
808
- )
809
- conn = store.connect()
810
- try:
811
- row = conn.execute(
812
- f"""
813
- SELECT
814
- COUNT(*) AS total,
815
- SUM(CASE WHEN role='user' THEN 1 ELSE 0 END) AS user_count,
816
- SUM(CASE WHEN role='assistant' THEN 1 ELSE 0 END) AS assistant_count
817
- FROM conversation_turns{where}
818
- """,
819
- tuple(params),
820
- ).fetchone()
821
- finally:
822
- conn.close()
823
-
824
- return {
825
- "total": int(row["total"] or 0),
826
- "user": int(row["user_count"] or 0),
827
- "assistant": int(row["assistant_count"] or 0),
828
- }
829
-
830
-
831
- def get_linked_memories(
832
- *,
833
- conversation_id: Optional[str] = None,
834
- session_id: Optional[str] = None,
835
- thread_id: Optional[str] = None,
836
- limit: int = 10,
837
- ) -> List[Dict[str, Any]]:
838
- targets = _scope_target_refs(
839
- conversation_id=conversation_id,
840
- session_id=session_id,
841
- thread_id=thread_id,
842
- )
843
- if not targets:
844
- return []
845
-
846
- placeholders = ",".join("?" for _ in targets)
847
- conn = store.connect()
848
- try:
849
- memory_links._ensure_table(conn)
850
- rows = conn.execute(
851
- f"""
852
- SELECT source_reference, link_type, target_reference, created_at
853
- FROM memory_links
854
- WHERE target_reference IN ({placeholders})
855
- ORDER BY created_at DESC, source_reference DESC
856
- LIMIT ?
857
- """,
858
- (*targets, min(max(limit, 1), 100)),
859
- ).fetchall()
860
-
861
- items: List[Dict[str, Any]] = []
862
- seen: set[str] = set()
863
- for row in rows:
864
- source_reference = str(row["source_reference"])
865
- if source_reference in seen:
866
- continue
867
- table, sep, raw_id = source_reference.partition(":")
868
- if not sep or table not in _ALLOWED_MEMORY_TABLES or not raw_id.isdigit():
869
- continue
870
- memory_row = conn.execute(
871
- f"SELECT id, timestamp, content, metadata_json FROM {table} WHERE id = ?",
872
- (int(raw_id),),
873
- ).fetchone()
874
- if not memory_row:
875
- continue
876
- linked_rows = conn.execute(
877
- "SELECT link_type, target_reference FROM memory_links WHERE source_reference = ? ORDER BY created_at ASC",
878
- (source_reference,),
879
- ).fetchall()
880
- try:
881
- meta = json.loads(memory_row["metadata_json"] or "{}")
882
- except Exception:
883
- meta = {}
884
- hydrated = provenance.hydrate_reference(source_reference, depth=1) or {}
885
- items.append(
886
- {
887
- "reference": source_reference,
888
- "timestamp": memory_row["timestamp"],
889
- "content": memory_row["content"],
890
- "metadata": meta,
891
- "links": [
892
- {"link_type": linked_row["link_type"], "target_reference": linked_row["target_reference"]}
893
- for linked_row in linked_rows
894
- ],
895
- "provenance_preview": hydrated.get("provenance_preview") or provenance.preview_from_metadata(meta),
896
- "provenance": hydrated.get("provenance") or {},
897
- }
898
- )
899
- seen.add(source_reference)
900
- return items
901
- finally:
902
- conn.close()
903
-
904
-
905
- def _latest_turn_by_role(turns: Sequence[Dict[str, Any]], role: str) -> Optional[Dict[str, Any]]:
906
- return next((turn for turn in reversed(turns) if turn.get("role") == role), None)
907
-
908
-
909
- def _assistant_commitment(turns: Sequence[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
910
- for turn in reversed(turns):
911
- if turn.get("role") != "assistant":
912
- continue
913
- content = str(turn.get("content") or "").strip()
914
- if _COMMITMENT_RE.search(content):
915
- return turn
916
- return None
917
-
918
-
919
- def _assistant_turn_creates_user_reply_loop(turn: Optional[Dict[str, Any]]) -> bool:
920
- if turn is None:
921
- return False
922
- if hasattr(turn, "get"):
923
- role = turn.get("role")
924
- content_value = turn.get("content")
925
- else:
926
- role = None
927
- content_value = None
928
- try:
929
- role = turn["role"]
930
- except Exception:
931
- role = None
932
- try:
933
- content_value = turn["content"]
934
- except Exception:
935
- content_value = None
936
- if role != "assistant":
937
- return False
938
- content = str(content_value or "").strip()
939
- if not content:
940
- return False
941
- lowered = content.lower()
942
- explicit_question = "?" in content
943
- explicit_prompt = any(token in lowered for token in ("let me know", "which do you want", "should i", "want me to"))
944
- optional_tail_only = "if you want" in lowered and not explicit_question and not explicit_prompt
945
- if optional_tail_only:
946
- return False
947
- return explicit_question or explicit_prompt
948
-
949
-
950
- def _assistant_question(turns: Sequence[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
951
- for turn in reversed(turns):
952
- if _assistant_turn_creates_user_reply_loop(turn):
953
- return turn
954
- return None
955
-
956
-
957
- def _looks_complete(text: str) -> bool:
958
- normalized = (text or "").lower()
959
- return any(token in normalized for token in ("done", "completed", "finished", "shipped", "implemented", "added", "fixed", "sent"))
960
-
961
-
962
- def _has_later_assistant_turn(turns: Sequence[Dict[str, Any]], turn_id: int) -> bool:
963
- return any(turn.get("role") == "assistant" and int(turn.get("id") or 0) > turn_id for turn in turns)
964
-
965
-
966
- def _latest_unresolved_commitment(turns: Sequence[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
967
- for turn in reversed(turns):
968
- if turn.get("role") != "assistant":
969
- continue
970
- content = str(turn.get("content") or "").strip()
971
- if not content or not _COMMITMENT_RE.search(content):
972
- continue
973
- turn_id = int(turn.get("id") or 0)
974
- if not _has_later_assistant_turn(turns, turn_id):
975
- return turn
976
- break
977
- return None
978
-
979
-
980
- def _turn_anchor(turn: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
981
- if not turn:
982
- return None
983
- meta = _turn_meta(turn)
984
- return {
985
- "id": turn.get("id"),
986
- "reference": turn.get("reference"),
987
- "message_id": turn.get("message_id"),
988
- "role": turn.get("role"),
989
- "timestamp": turn.get("timestamp"),
990
- "content": turn.get("content"),
991
- "effective_content": _effective_turn_content(turn),
992
- "transcript_path": turn.get("transcript_path"),
993
- "transcript_offset": turn.get("transcript_offset"),
994
- "transcript_end_offset": turn.get("transcript_end_offset"),
995
- "reply_to_turn_id": meta.get("reply_to_turn_id"),
996
- "reply_to_reference": meta.get("reply_to_reference"),
997
- "reply_to_message_id": meta.get("reply_to_message_id"),
998
- "branch_id": meta.get("branch_id"),
999
- "branch_root_turn_id": meta.get("branch_root_turn_id"),
1000
- "resolution": meta.get("resolution"),
1001
- }
1002
-
1003
-
1004
- def _pending_from_turns(turns: Sequence[Dict[str, Any]]) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
1005
- pending_actions: list[dict[str, Any]] = []
1006
- open_loops: list[dict[str, Any]] = []
1007
- filtered_turns = [turn for turn in turns if not _looks_like_internal_continuity_text(str(turn.get("content") or ""))]
1008
- last_user = _latest_turn_by_role(filtered_turns, "user")
1009
- last_assistant = _latest_turn_by_role(filtered_turns, "assistant")
1010
- commitment = _latest_unresolved_commitment(filtered_turns)
1011
- assistant_question = _assistant_question(filtered_turns)
1012
-
1013
- if last_user and (not last_assistant or int(last_user.get("id") or 0) > int(last_assistant.get("id") or 0)):
1014
- content = _effective_turn_content(last_user) or _strip_internal_continuity_text(str(last_user.get("content") or "").strip())
1015
- resolution = _turn_meta(last_user).get("resolution") or {}
1016
- if content:
1017
- open_loops.append(
1018
- {
1019
- "kind": "awaiting_assistant_reply",
1020
- "summary": content,
1021
- "source_reference": last_user.get("reference"),
1022
- "related_reference": resolution.get("resolved_reference"),
1023
- }
1024
- )
1025
- pending_actions.append(
1026
- {
1027
- "kind": "respond_to_latest_user",
1028
- "summary": content,
1029
- "source_reference": last_user.get("reference"),
1030
- "related_reference": resolution.get("resolved_reference"),
1031
- }
1032
- )
1033
- if resolution:
1034
- pending_actions.append(
1035
- {
1036
- "kind": "fulfill_confirmed_branch",
1037
- "summary": content,
1038
- "source_reference": last_user.get("reference"),
1039
- "related_reference": resolution.get("resolved_reference"),
1040
- }
1041
- )
1042
-
1043
- if assistant_question and (not last_user or int(assistant_question.get("id") or 0) > int(last_user.get("id") or 0)):
1044
- content = _strip_internal_continuity_text(str(assistant_question.get("content") or "").strip())
1045
- if content:
1046
- open_loops.append(
1047
- {
1048
- "kind": "awaiting_user_reply",
1049
- "summary": content,
1050
- "source_reference": assistant_question.get("reference"),
1051
- }
1052
- )
1053
- pending_actions.append(
1054
- {
1055
- "kind": "await_user_clarification",
1056
- "summary": content,
1057
- "source_reference": assistant_question.get("reference"),
1058
- }
1059
- )
1060
-
1061
- if commitment:
1062
- content = _strip_internal_continuity_text(str(commitment.get("content") or "").strip())
1063
- if content and not _looks_complete(content):
1064
- pending_actions.append(
1065
- {
1066
- "kind": "assistant_commitment",
1067
- "summary": content,
1068
- "source_reference": commitment.get("reference"),
1069
- }
1070
- )
1071
- open_loops.append(
1072
- {
1073
- "kind": "assistant_commitment",
1074
- "summary": content,
1075
- "source_reference": commitment.get("reference"),
1076
- }
1077
- )
1078
-
1079
- deduped_pending: list[dict[str, Any]] = []
1080
- seen = set()
1081
- for item in pending_actions:
1082
- key = (item.get("kind"), item.get("summary"), item.get("source_reference"), item.get("related_reference"))
1083
- if key in seen:
1084
- continue
1085
- seen.add(key)
1086
- deduped_pending.append(item)
1087
-
1088
- deduped_loops: list[dict[str, Any]] = []
1089
- seen = set()
1090
- for item in open_loops:
1091
- key = (item.get("kind"), item.get("summary"), item.get("source_reference"), item.get("related_reference"))
1092
- if key in seen:
1093
- continue
1094
- seen.add(key)
1095
- deduped_loops.append(item)
1096
-
1097
- return deduped_pending, deduped_loops
1098
-
1099
-
1100
- def list_relevant_unresolved_state(
1101
- *,
1102
- conversation_id: Optional[str] = None,
1103
- session_id: Optional[str] = None,
1104
- thread_id: Optional[str] = None,
1105
- limit: int = 10,
1106
- ) -> List[Dict[str, Any]]:
1107
- target_refs = set(
1108
- _scope_target_refs(
1109
- conversation_id=conversation_id,
1110
- session_id=session_id,
1111
- thread_id=thread_id,
1112
- )
1113
- )
1114
- if not target_refs:
1115
- return []
1116
- return unresolved_state.list_unresolved_state_for_references(list(target_refs), limit=limit)
1117
-
1118
-
1119
- def infer_hydration_payload(
1120
- turns: Sequence[Dict[str, Any]],
1121
- *,
1122
- conversation_id: Optional[str] = None,
1123
- session_id: Optional[str] = None,
1124
- thread_id: Optional[str] = None,
1125
- unresolved_items: Optional[Sequence[Dict[str, Any]]] = None,
1126
- latest_checkpoint: Optional[Dict[str, Any]] = None,
1127
- linked_memories: Optional[Sequence[Dict[str, Any]]] = None,
1128
- ) -> Dict[str, Any]:
1129
- turns_list = list(turns)
1130
- filtered_user_turns = [turn for turn in turns_list if turn.get("role") == "user" and not _looks_like_internal_continuity_text(str(turn.get("content") or ""))]
1131
- filtered_assistant_turns = [turn for turn in turns_list if turn.get("role") == "assistant" and not _looks_like_internal_continuity_text(str(turn.get("content") or ""))]
1132
- latest_user_turn = filtered_user_turns[-1] if filtered_user_turns else None
1133
- latest_assistant_turn = filtered_assistant_turns[-1] if filtered_assistant_turns else None
1134
- latest_commitment_turn = _latest_unresolved_commitment(filtered_assistant_turns)
1135
- last_turn = turns_list[-1] if turns_list else None
1136
- pending_actions, open_loops = _pending_from_turns(turns_list)
1137
- unresolved_payload = list(unresolved_items or [])
1138
- active_branch = _active_branch_payload(turns_list)
1139
- checkpoint_lineage = get_checkpoint_lineage(latest_checkpoint.get("id")) if latest_checkpoint else []
1140
-
1141
- for item in unresolved_payload:
1142
- summary = str(item.get("summary") or "").strip()
1143
- if not summary:
1144
- continue
1145
- open_loops.append(
1146
- {
1147
- "kind": item.get("state_type") or "unresolved_state",
1148
- "summary": summary,
1149
- "source_reference": item.get("reference"),
1150
- "state_id": item.get("state_id"),
1151
- }
1152
- )
1153
-
1154
- deduped_open_loops: list[dict[str, Any]] = []
1155
- seen = set()
1156
- for item in open_loops:
1157
- key = (item.get("kind"), item.get("summary"), item.get("source_reference"), item.get("state_id"), item.get("related_reference"))
1158
- if key in seen:
1159
- continue
1160
- seen.add(key)
1161
- deduped_open_loops.append(item)
1162
-
1163
- summary_text_parts: list[str] = []
1164
- if latest_checkpoint and latest_checkpoint.get("summary"):
1165
- summary_text_parts.append(str(latest_checkpoint["summary"]).strip())
1166
- if latest_user_turn:
1167
- summary_text_parts.append(f"Latest user ask: {_effective_turn_content(latest_user_turn) or _normalize_conversation_text(str(latest_user_turn.get('content') or '').strip())}")
1168
- if latest_commitment_turn:
1169
- summary_text_parts.append(f"Last assistant commitment: {_effective_turn_content(latest_commitment_turn) or _normalize_conversation_text(str(latest_commitment_turn.get('content') or '').strip())}")
1170
- summary_text = " | ".join(part for part in summary_text_parts if part)
1171
- linked_memories_list = list(linked_memories or [])
1172
- memory_layers = _build_memory_layers(
1173
- turns=turns_list,
1174
- latest_user_turn=latest_user_turn,
1175
- latest_commitment_turn=latest_commitment_turn,
1176
- linked_memories=linked_memories_list,
1177
- )
1178
- context_quality = _context_quality(
1179
- turns=turns_list,
1180
- latest_checkpoint=latest_checkpoint,
1181
- linked_memories=linked_memories_list,
1182
- summary_text=summary_text,
1183
- )
1184
-
1185
- return {
1186
- "turn_count": len(turns_list),
1187
- "latest_user_turn": _turn_anchor(latest_user_turn),
1188
- "latest_assistant_turn": _turn_anchor(latest_assistant_turn),
1189
- "latest_user_ask": _turn_anchor(latest_user_turn),
1190
- "latest_user_intent": {
1191
- "literal": _turn_anchor(latest_user_turn),
1192
- "effective_content": _effective_turn_content(latest_user_turn),
1193
- "resolution": _turn_meta(latest_user_turn).get("resolution") if latest_user_turn else None,
1194
- } if latest_user_turn else None,
1195
- "last_assistant_commitment": _turn_anchor(latest_commitment_turn),
1196
- "latest_transcript_anchor": {
1197
- "path": last_turn.get("transcript_path") if last_turn else None,
1198
- "start_line": last_turn.get("transcript_offset") if last_turn else None,
1199
- "end_line": last_turn.get("transcript_end_offset") if last_turn else None,
1200
- },
1201
- "open_loops": deduped_open_loops,
1202
- "pending_actions": pending_actions,
1203
- "unresolved_state": unresolved_payload,
1204
- "latest_checkpoint": latest_checkpoint,
1205
- "checkpoint_graph": {
1206
- "latest": latest_checkpoint,
1207
- "lineage": checkpoint_lineage,
1208
- } if latest_checkpoint else None,
1209
- "active_branch": active_branch,
1210
- "summary_text": summary_text,
1211
- "summary_status": "derived",
1212
- "memory_layers": memory_layers,
1213
- "context_quality": context_quality,
1214
- "scope": {
1215
- "conversation_id": conversation_id,
1216
- "session_id": session_id,
1217
- "thread_id": thread_id,
1218
- },
1219
- }
1220
-
1221
-
1222
- def _upsert_state(
1223
- *,
1224
- conversation_id: Optional[str],
1225
- session_id: Optional[str],
1226
- thread_id: Optional[str],
1227
- state_payload: Dict[str, Any],
1228
- ) -> Optional[Dict[str, Any]]:
1229
- scope_type, scope_id = _scope_parts(
1230
- conversation_id=conversation_id,
1231
- session_id=session_id,
1232
- thread_id=thread_id,
1233
- )
1234
- if not scope_type or not scope_id:
1235
- return None
1236
-
1237
- def _write() -> None:
1238
- conn = store.connect()
1239
- try:
1240
- conn.execute(
1241
- """
1242
- INSERT INTO conversation_state (
1243
- scope_type, scope_id, conversation_id, session_id, thread_id,
1244
- latest_user_turn_id, latest_assistant_turn_id,
1245
- latest_user_ask, last_assistant_commitment,
1246
- open_loops_json, pending_actions_json, unresolved_state_json,
1247
- latest_checkpoint_id, metadata_json, schema_version
1248
- ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1249
- ON CONFLICT(scope_type, scope_id) DO UPDATE SET
1250
- conversation_id=excluded.conversation_id,
1251
- session_id=excluded.session_id,
1252
- thread_id=excluded.thread_id,
1253
- latest_user_turn_id=excluded.latest_user_turn_id,
1254
- latest_assistant_turn_id=excluded.latest_assistant_turn_id,
1255
- latest_user_ask=excluded.latest_user_ask,
1256
- last_assistant_commitment=excluded.last_assistant_commitment,
1257
- open_loops_json=excluded.open_loops_json,
1258
- pending_actions_json=excluded.pending_actions_json,
1259
- unresolved_state_json=excluded.unresolved_state_json,
1260
- latest_checkpoint_id=excluded.latest_checkpoint_id,
1261
- metadata_json=excluded.metadata_json,
1262
- updated_at=datetime('now'),
1263
- schema_version=excluded.schema_version
1264
- """,
1265
- (
1266
- scope_type,
1267
- scope_id,
1268
- conversation_id,
1269
- session_id,
1270
- thread_id,
1271
- (state_payload.get("latest_user_turn") or {}).get("id"),
1272
- (state_payload.get("latest_assistant_turn") or {}).get("id"),
1273
- (state_payload.get("latest_user_ask") or {}).get("effective_content") or (state_payload.get("latest_user_ask") or {}).get("content"),
1274
- (state_payload.get("last_assistant_commitment") or {}).get("content"),
1275
- json.dumps(state_payload.get("open_loops") or [], ensure_ascii=False),
1276
- json.dumps(state_payload.get("pending_actions") or [], ensure_ascii=False),
1277
- json.dumps(state_payload.get("unresolved_state") or [], ensure_ascii=False),
1278
- (state_payload.get("latest_checkpoint") or {}).get("id"),
1279
- json.dumps({"summary_text": state_payload.get("summary_text"), "active_branch": state_payload.get("active_branch"), "latest_user_intent": state_payload.get("latest_user_intent")}, ensure_ascii=False),
1280
- store.SCHEMA_VERSION,
1281
- ),
1282
- )
1283
- conn.commit()
1284
- finally:
1285
- conn.close()
1286
-
1287
- store.submit_write(_write, timeout=30.0)
1288
- emit_event(LOGFILE, "brain_conversation_state_upserted", status="ok", scope_type=scope_type)
1289
- return get_state(conversation_id=conversation_id, session_id=session_id, thread_id=thread_id)
1290
-
1291
-
1292
- def get_state(
1293
- *,
1294
- conversation_id: Optional[str] = None,
1295
- session_id: Optional[str] = None,
1296
- thread_id: Optional[str] = None,
1297
- ) -> Optional[Dict[str, Any]]:
1298
- scope_type, scope_id = _scope_parts(
1299
- conversation_id=conversation_id,
1300
- session_id=session_id,
1301
- thread_id=thread_id,
1302
- )
1303
- if not scope_type or not scope_id:
1304
- return None
1305
-
1306
- conn = store.connect()
1307
- try:
1308
- row = conn.execute(
1309
- "SELECT * FROM conversation_state WHERE scope_type = ? AND scope_id = ?",
1310
- (scope_type, scope_id),
1311
- ).fetchone()
1312
- finally:
1313
- conn.close()
1314
- if not row:
1315
- return None
1316
-
1317
- def _load(value: Any) -> Any:
1318
- try:
1319
- return json.loads(value or "[]")
1320
- except Exception:
1321
- return []
1322
-
1323
- try:
1324
- meta = json.loads(row["metadata_json"] or "{}")
1325
- except Exception:
1326
- meta = {}
1327
-
1328
- return {
1329
- "id": int(row["id"]),
1330
- "scope_type": row["scope_type"],
1331
- "scope_id": row["scope_id"],
1332
- "conversation_id": row["conversation_id"],
1333
- "session_id": row["session_id"],
1334
- "thread_id": row["thread_id"],
1335
- "latest_user_turn_id": row["latest_user_turn_id"],
1336
- "latest_assistant_turn_id": row["latest_assistant_turn_id"],
1337
- "latest_user_ask": row["latest_user_ask"],
1338
- "last_assistant_commitment": row["last_assistant_commitment"],
1339
- "open_loops": _load(row["open_loops_json"]),
1340
- "pending_actions": _load(row["pending_actions_json"]),
1341
- "unresolved_state": _load(row["unresolved_state_json"]),
1342
- "latest_checkpoint_id": row["latest_checkpoint_id"],
1343
- "metadata": meta,
1344
- "updated_at": row["updated_at"],
1345
- }
1346
-
1347
-
1348
- def create_checkpoint(
1349
- *,
1350
- conversation_id: Optional[str] = None,
1351
- session_id: Optional[str] = None,
1352
- thread_id: Optional[str] = None,
1353
- upto_turn_id: Optional[int] = None,
1354
- turns_limit: int = 24,
1355
- checkpoint_kind: str = "manual",
1356
- ) -> Optional[Dict[str, Any]]:
1357
- turns = get_recent_turns(
1358
- conversation_id=conversation_id,
1359
- session_id=session_id,
1360
- thread_id=thread_id,
1361
- limit=turns_limit,
1362
- upto_turn_id=upto_turn_id,
1363
- )
1364
- if not turns:
1365
- return None
1366
- unresolved_items = list_relevant_unresolved_state(
1367
- conversation_id=conversation_id,
1368
- session_id=session_id,
1369
- thread_id=thread_id,
1370
- limit=10,
1371
- )
1372
- latest_existing_checkpoint = get_latest_checkpoint(
1373
- conversation_id=conversation_id,
1374
- session_id=session_id,
1375
- thread_id=thread_id,
1376
- )
1377
- derived = infer_hydration_payload(
1378
- turns,
1379
- conversation_id=conversation_id,
1380
- session_id=session_id,
1381
- thread_id=thread_id,
1382
- unresolved_items=unresolved_items,
1383
- latest_checkpoint=latest_existing_checkpoint,
1384
- )
1385
- summary_parts = [
1386
- f"{len(turns)} recent turns captured",
1387
- ]
1388
- latest_user = derived.get("latest_user_ask") or {}
1389
- latest_user_text = _strip_internal_continuity_text(str(latest_user.get("effective_content") or latest_user.get("content") or "").strip())
1390
- if latest_user_text:
1391
- summary_parts.append(f"user asked: {latest_user_text}")
1392
- commitment = derived.get("last_assistant_commitment") or {}
1393
- commitment_text = _strip_internal_continuity_text(str(commitment.get("content") or "").strip())
1394
- if commitment_text:
1395
- summary_parts.append(f"assistant committed: {commitment_text}")
1396
- if derived.get("open_loops"):
1397
- summary_parts.append(f"open loops: {len(derived['open_loops'])}")
1398
- summary_text = " | ".join(summary_parts)
1399
- turn_start_id = int(turns[0]["id"])
1400
- turn_end_id = int(turns[-1]["id"])
1401
- parent_checkpoint = None
1402
- if latest_existing_checkpoint and int(latest_existing_checkpoint.get("turn_end_id") or 0) < turn_end_id:
1403
- parent_checkpoint = latest_existing_checkpoint
1404
- parent_checkpoint_id = (int(parent_checkpoint.get("id") or 0) or None) if parent_checkpoint else None
1405
- root_checkpoint_id = (
1406
- int(parent_checkpoint.get("root_checkpoint_id") or parent_checkpoint_id or 0) or None
1407
- if parent_checkpoint
1408
- else None
1409
- )
1410
- checkpoint_depth = int(parent_checkpoint.get("depth") or 0) + 1 if parent_checkpoint else 0
1411
- supporting_turn_ids = [int(turn.get("id") or 0) for turn in turns]
1412
-
1413
- def _write() -> int:
1414
- conn = store.connect()
1415
- try:
1416
- cur = conn.execute(
1417
- """
1418
- INSERT INTO conversation_checkpoints (
1419
- conversation_id, session_id, thread_id,
1420
- turn_start_id, turn_end_id, checkpoint_kind, summary,
1421
- latest_user_ask, last_assistant_commitment,
1422
- open_loops_json, pending_actions_json,
1423
- parent_checkpoint_id, root_checkpoint_id, depth, metadata_json, schema_version
1424
- ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1425
- """,
1426
- (
1427
- conversation_id,
1428
- session_id,
1429
- thread_id,
1430
- turn_start_id,
1431
- turn_end_id,
1432
- checkpoint_kind,
1433
- summary_text,
1434
- latest_user.get("effective_content") or latest_user.get("content"),
1435
- commitment.get("content"),
1436
- json.dumps(derived.get("open_loops") or [], ensure_ascii=False),
1437
- json.dumps(derived.get("pending_actions") or [], ensure_ascii=False),
1438
- parent_checkpoint_id,
1439
- root_checkpoint_id,
1440
- checkpoint_depth,
1441
- json.dumps(
1442
- {
1443
- "turn_count": len(turns),
1444
- "latest_turn_reference": turns[-1].get("reference"),
1445
- "latest_turn_timestamp": turns[-1].get("timestamp"),
1446
- "unresolved_count": len(unresolved_items),
1447
- "supporting_turn_ids": supporting_turn_ids,
1448
- "active_branch": derived.get("active_branch"),
1449
- },
1450
- ensure_ascii=False,
1451
- ),
1452
- store.SCHEMA_VERSION,
1453
- ),
1454
- )
1455
- conn.commit()
1456
- checkpoint_id = int(cur.lastrowid)
1457
- if root_checkpoint_id is None:
1458
- conn.execute(
1459
- "UPDATE conversation_checkpoints SET root_checkpoint_id = ? WHERE id = ? AND root_checkpoint_id IS NULL",
1460
- (checkpoint_id, checkpoint_id),
1461
- )
1462
- conn.commit()
1463
- return checkpoint_id
1464
- finally:
1465
- conn.close()
1466
-
1467
- checkpoint_id = int(store.submit_write(_write, timeout=30.0))
1468
- emit_event(LOGFILE, "brain_conversation_checkpoint_created", status="ok", checkpoint_id=checkpoint_id, checkpoint_kind=checkpoint_kind)
1469
- payload = get_checkpoint_by_id(checkpoint_id)
1470
- try:
1471
- refresh_state(conversation_id=conversation_id, session_id=session_id, thread_id=thread_id)
1472
- except Exception as exc:
1473
- emit_event(
1474
- LOGFILE,
1475
- "brain_conversation_checkpoint_post_write_maintenance_failed",
1476
- status="warn",
1477
- error=str(exc),
1478
- checkpoint_id=checkpoint_id,
1479
- )
1480
- return payload
1481
-
1482
-
1483
- def _row_to_checkpoint(row) -> Optional[Dict[str, Any]]:
1484
- if not row:
1485
- return None
1486
- try:
1487
- loops = json.loads(row["open_loops_json"] or "[]")
1488
- except Exception:
1489
- loops = []
1490
- try:
1491
- pending = json.loads(row["pending_actions_json"] or "[]")
1492
- except Exception:
1493
- pending = []
1494
- try:
1495
- meta = json.loads(row["metadata_json"] or "{}")
1496
- except Exception:
1497
- meta = {}
1498
- return {
1499
- "id": int(row["id"]),
1500
- "reference": f"conversation_checkpoints:{row['id']}",
1501
- "timestamp": row["timestamp"],
1502
- "conversation_id": row["conversation_id"],
1503
- "session_id": row["session_id"],
1504
- "thread_id": row["thread_id"],
1505
- "turn_start_id": row["turn_start_id"],
1506
- "turn_end_id": row["turn_end_id"],
1507
- "checkpoint_kind": row["checkpoint_kind"],
1508
- "summary": row["summary"],
1509
- "latest_user_ask": row["latest_user_ask"],
1510
- "last_assistant_commitment": row["last_assistant_commitment"],
1511
- "parent_checkpoint_id": row["parent_checkpoint_id"] if "parent_checkpoint_id" in row.keys() else None,
1512
- "root_checkpoint_id": row["root_checkpoint_id"] if "root_checkpoint_id" in row.keys() else None,
1513
- "depth": row["depth"] if "depth" in row.keys() else 0,
1514
- "open_loops": loops,
1515
- "pending_actions": pending,
1516
- "metadata": meta,
1517
- }
1518
-
1519
-
1520
- def get_checkpoint_by_id(checkpoint_id: int) -> Optional[Dict[str, Any]]:
1521
- conn = store.connect()
1522
- try:
1523
- row = conn.execute("SELECT * FROM conversation_checkpoints WHERE id = ?", (int(checkpoint_id),)).fetchone()
1524
- finally:
1525
- conn.close()
1526
- return _row_to_checkpoint(row)
1527
-
1528
-
1529
- def list_checkpoints(
1530
- *,
1531
- conversation_id: Optional[str] = None,
1532
- session_id: Optional[str] = None,
1533
- thread_id: Optional[str] = None,
1534
- limit: int = 20,
1535
- ) -> List[Dict[str, Any]]:
1536
- where, params = _scope_where(
1537
- conversation_id=conversation_id,
1538
- session_id=session_id,
1539
- thread_id=thread_id,
1540
- )
1541
- conn = store.connect()
1542
- try:
1543
- rows = conn.execute(
1544
- f"SELECT * FROM conversation_checkpoints{where} ORDER BY id DESC LIMIT ?",
1545
- (*params, min(max(limit, 1), 200)),
1546
- ).fetchall()
1547
- finally:
1548
- conn.close()
1549
- return [_row_to_checkpoint(row) for row in rows if row]
1550
-
1551
-
1552
- def get_checkpoint_lineage(checkpoint_id: Optional[int]) -> List[Dict[str, Any]]:
1553
- if not checkpoint_id:
1554
- return []
1555
- lineage: List[Dict[str, Any]] = []
1556
- current = get_checkpoint_by_id(int(checkpoint_id))
1557
- seen: set[int] = set()
1558
- while current:
1559
- current_id = int(current.get("id") or 0)
1560
- if not current_id or current_id in seen:
1561
- break
1562
- seen.add(current_id)
1563
- lineage.append(current)
1564
- parent_id = current.get("parent_checkpoint_id")
1565
- current = get_checkpoint_by_id(int(parent_id)) if parent_id else None
1566
- return list(reversed(lineage))
1567
-
1568
-
1569
- def get_checkpoint_children(checkpoint_id: int, *, limit: int = 20) -> List[Dict[str, Any]]:
1570
- conn = store.connect()
1571
- try:
1572
- rows = conn.execute(
1573
- "SELECT * FROM conversation_checkpoints WHERE parent_checkpoint_id = ? ORDER BY id ASC LIMIT ?",
1574
- (int(checkpoint_id), min(max(limit, 1), 100)),
1575
- ).fetchall()
1576
- finally:
1577
- conn.close()
1578
- return [_row_to_checkpoint(row) for row in rows if row]
1579
-
1580
-
1581
- def expand_checkpoint(checkpoint_id: int, *, radius_turns: int = 0, turns_limit: int = 200) -> Optional[Dict[str, Any]]:
1582
- checkpoint = get_checkpoint_by_id(checkpoint_id)
1583
- if not checkpoint:
1584
- return None
1585
- start_id = max(1, int(checkpoint.get("turn_start_id") or 0) - max(0, radius_turns))
1586
- end_id = int(checkpoint.get("turn_end_id") or 0) + max(0, radius_turns)
1587
- scope = _checkpoint_scope_filter(checkpoint)
1588
- turns = _get_turns_between_ids(start_id, end_id, limit=turns_limit, **scope)
1589
- lineage = get_checkpoint_lineage(checkpoint_id)
1590
- children = get_checkpoint_children(checkpoint_id, limit=20)
1591
- active_branch = _active_branch_payload(turns)
1592
- checkpoint_candidates: List[Dict[str, Any]] = []
1593
- seen_checkpoint_ids: set[int] = set()
1594
- for candidate in [*lineage, checkpoint, *children]:
1595
- candidate_id = int(candidate.get("id") or 0)
1596
- if not candidate_id or candidate_id in seen_checkpoint_ids:
1597
- continue
1598
- seen_checkpoint_ids.add(candidate_id)
1599
- checkpoint_candidates.append(candidate)
1600
- return {
1601
- "checkpoint": checkpoint,
1602
- "lineage": lineage,
1603
- "children": children,
1604
- "supporting_turns": turns,
1605
- "active_branch": active_branch,
1606
- "salience_ranked_turns": _ranked_turn_expansion(turns, active_branch, limit=min(turns_limit, 12)),
1607
- "salience_ranked_checkpoints": _ranked_checkpoint_expansion(checkpoint_candidates, active_branch, limit=12),
1608
- }
1609
-
1610
-
1611
- def expand_turn(turn_id: int, *, radius_turns: int = 4, turns_limit: int = 80) -> Optional[Dict[str, Any]]:
1612
- turn = _get_turn_by_id(turn_id)
1613
- if not turn:
1614
- return None
1615
- scope = {
1616
- "conversation_id": turn.get("conversation_id"),
1617
- "session_id": turn.get("session_id"),
1618
- "thread_id": turn.get("thread_id"),
1619
- }
1620
- center_turn_id = int(turn.get("id") or 0)
1621
- start_id = max(1, center_turn_id - max(0, radius_turns))
1622
- end_id = center_turn_id + max(0, radius_turns)
1623
- turns = _get_turns_between_ids(start_id, end_id, limit=turns_limit, **scope)
1624
- active_branch = _active_branch_payload(turns)
1625
- checkpoint_candidates: List[Dict[str, Any]] = []
1626
- for checkpoint in list_checkpoints(limit=20, **scope):
1627
- start = int(checkpoint.get("turn_start_id") or 0)
1628
- end = int(checkpoint.get("turn_end_id") or 0)
1629
- if start <= center_turn_id <= end:
1630
- checkpoint_candidates.append(checkpoint)
1631
- return {
1632
- "turn": turn,
1633
- "reply_chain": _reply_chain_for_turn(turn, turns, limit=8),
1634
- "supporting_turns": turns,
1635
- "active_branch": active_branch,
1636
- "related_checkpoints": checkpoint_candidates,
1637
- "salience_ranked_turns": _ranked_turn_expansion(turns, active_branch, limit=min(turns_limit, 12)),
1638
- "salience_ranked_checkpoints": _ranked_checkpoint_expansion(checkpoint_candidates, active_branch, limit=12),
1639
- }
1640
-
1641
-
1642
- def get_latest_checkpoint(
1643
- *,
1644
- conversation_id: Optional[str] = None,
1645
- session_id: Optional[str] = None,
1646
- thread_id: Optional[str] = None,
1647
- ) -> Optional[Dict[str, Any]]:
1648
- where, params = _scope_where(
1649
- conversation_id=conversation_id,
1650
- session_id=session_id,
1651
- thread_id=thread_id,
1652
- )
1653
- conn = store.connect()
1654
- try:
1655
- row = conn.execute(
1656
- f"SELECT * FROM conversation_checkpoints{where} ORDER BY id DESC LIMIT 1",
1657
- tuple(params),
1658
- ).fetchone()
1659
- finally:
1660
- conn.close()
1661
- return _row_to_checkpoint(row)
1662
-
1663
-
1664
- def _self_heal_legacy_continuity_artifacts(
1665
- *,
1666
- conversation_id: Optional[str] = None,
1667
- session_id: Optional[str] = None,
1668
- thread_id: Optional[str] = None,
1669
- ) -> int:
1670
- where, params = _scope_where(
1671
- conversation_id=conversation_id,
1672
- session_id=session_id,
1673
- thread_id=thread_id,
1674
- )
1675
- conn = store.connect()
1676
- try:
1677
- rows = conn.execute(f"SELECT id, content FROM conversation_turns{where}", tuple(params)).fetchall()
1678
- bad_ids = [
1679
- int(row["id"])
1680
- for row in rows
1681
- if _looks_like_internal_continuity_text(str(row["content"] or ""))
1682
- ]
1683
- checkpoint_where, checkpoint_params = _scope_where(
1684
- conversation_id=conversation_id,
1685
- session_id=session_id,
1686
- thread_id=thread_id,
1687
- )
1688
- checkpoint_rows = conn.execute(
1689
- f"SELECT * FROM conversation_checkpoints{checkpoint_where}",
1690
- tuple(checkpoint_params),
1691
- ).fetchall()
1692
- turn_lookup = {int(row["id"]): row for row in rows}
1693
- bad_checkpoint_ids = []
1694
- for row in checkpoint_rows:
1695
- polluted = _checkpoint_summary_is_polluted(str(row["summary"] or ""))
1696
- if not polluted:
1697
- try:
1698
- loops = json.loads(row["open_loops_json"] or "[]")
1699
- except Exception:
1700
- loops = []
1701
- for item in loops:
1702
- if item.get("kind") != "awaiting_user_reply":
1703
- continue
1704
- source_reference = str(item.get("source_reference") or "")
1705
- if not source_reference.startswith("conversation_turns:"):
1706
- continue
1707
- try:
1708
- source_id = int(source_reference.split(":", 1)[1])
1709
- except Exception:
1710
- continue
1711
- turn = turn_lookup.get(source_id)
1712
- if turn is None:
1713
- turn = conn.execute("SELECT * FROM conversation_turns WHERE id = ?", (source_id,)).fetchone()
1714
- if turn is not None and not _assistant_turn_creates_user_reply_loop(turn):
1715
- polluted = True
1716
- break
1717
- if polluted:
1718
- bad_checkpoint_ids.append(int(row["id"]))
1719
- if not bad_ids and not bad_checkpoint_ids:
1720
- return 0
1721
- if bad_ids:
1722
- placeholders = ",".join("?" for _ in bad_ids)
1723
- conn.execute(f"DELETE FROM conversation_turns WHERE id IN ({placeholders})", tuple(bad_ids))
1724
- if bad_checkpoint_ids:
1725
- placeholders = ",".join("?" for _ in bad_checkpoint_ids)
1726
- conn.execute(f"DELETE FROM conversation_checkpoints WHERE id IN ({placeholders})", tuple(bad_checkpoint_ids))
1727
- if thread_id:
1728
- conn.execute("DELETE FROM conversation_checkpoints WHERE thread_id = ?", (thread_id,))
1729
- conn.execute("DELETE FROM conversation_state WHERE thread_id = ? OR (scope_type = 'thread' AND scope_id = ?)", (thread_id, thread_id))
1730
- elif session_id:
1731
- conn.execute("DELETE FROM conversation_checkpoints WHERE session_id = ?", (session_id,))
1732
- conn.execute("DELETE FROM conversation_state WHERE session_id = ? OR (scope_type = 'session' AND scope_id = ?)", (session_id, session_id))
1733
- elif conversation_id:
1734
- conn.execute("DELETE FROM conversation_checkpoints WHERE conversation_id = ?", (conversation_id,))
1735
- conn.execute("DELETE FROM conversation_state WHERE conversation_id = ? OR (scope_type = 'conversation' AND scope_id = ?)", (conversation_id, conversation_id))
1736
- conn.commit()
1737
- emit_event(
1738
- LOGFILE,
1739
- "brain_conversation_state_self_healed",
1740
- status="ok",
1741
- removed_turns=len(bad_ids),
1742
- removed_checkpoints=len(bad_checkpoint_ids),
1743
- conversation_id=conversation_id,
1744
- session_id=session_id,
1745
- thread_id=thread_id,
1746
- )
1747
- return len(bad_ids) + len(bad_checkpoint_ids)
1748
- finally:
1749
- conn.close()
1750
-
1751
-
1752
- def refresh_state(
1753
- *,
1754
- conversation_id: Optional[str] = None,
1755
- session_id: Optional[str] = None,
1756
- thread_id: Optional[str] = None,
1757
- tolerate_write_failure: bool = False,
1758
- ) -> Optional[Dict[str, Any]]:
1759
- _self_heal_legacy_continuity_artifacts(
1760
- conversation_id=conversation_id,
1761
- session_id=session_id,
1762
- thread_id=thread_id,
1763
- )
1764
- turns = get_recent_turns(
1765
- conversation_id=conversation_id,
1766
- session_id=session_id,
1767
- thread_id=thread_id,
1768
- limit=_MAX_STATE_TURNS,
1769
- )
1770
- unresolved_items = list_relevant_unresolved_state(
1771
- conversation_id=conversation_id,
1772
- session_id=session_id,
1773
- thread_id=thread_id,
1774
- limit=10,
1775
- )
1776
- latest_checkpoint = get_latest_checkpoint(
1777
- conversation_id=conversation_id,
1778
- session_id=session_id,
1779
- thread_id=thread_id,
1780
- )
1781
- if latest_checkpoint and _checkpoint_summary_is_polluted(str(latest_checkpoint.get("summary") or "")):
1782
- latest_checkpoint = None
1783
- payload = infer_hydration_payload(
1784
- turns,
1785
- conversation_id=conversation_id,
1786
- session_id=session_id,
1787
- thread_id=thread_id,
1788
- unresolved_items=unresolved_items,
1789
- latest_checkpoint=latest_checkpoint,
1790
- linked_memories=[],
1791
- )
1792
- try:
1793
- return _upsert_state(
1794
- conversation_id=conversation_id,
1795
- session_id=session_id,
1796
- thread_id=thread_id,
1797
- state_payload=payload,
1798
- )
1799
- except Exception as exc:
1800
- if not tolerate_write_failure:
1801
- raise
1802
- emit_event(
1803
- LOGFILE,
1804
- "brain_conversation_state_refresh_degraded",
1805
- status="warn",
1806
- error=str(exc),
1807
- conversation_id=conversation_id,
1808
- session_id=session_id,
1809
- thread_id=thread_id,
1810
- )
1811
- existing = get_state(
1812
- conversation_id=conversation_id,
1813
- session_id=session_id,
1814
- thread_id=thread_id,
1815
- )
1816
- if existing:
1817
- metadata = existing.get("metadata") if isinstance(existing.get("metadata"), dict) else {}
1818
- existing["metadata"] = {**metadata, "state_status": "stale_persisted"}
1819
- return existing
1820
- return _state_from_payload(
1821
- payload,
1822
- conversation_id=conversation_id,
1823
- session_id=session_id,
1824
- thread_id=thread_id,
1825
- )
3
+ from ocmemog.runtime.memory.conversation_state import * # noqa: F401,F403