@simbimbo/memory-ocmemog 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/CHANGELOG.md +30 -0
  2. package/README.md +85 -18
  3. package/brain/runtime/__init__.py +2 -12
  4. package/brain/runtime/config.py +1 -24
  5. package/brain/runtime/inference.py +1 -151
  6. package/brain/runtime/instrumentation.py +1 -15
  7. package/brain/runtime/memory/__init__.py +3 -13
  8. package/brain/runtime/memory/api.py +1 -1219
  9. package/brain/runtime/memory/candidate.py +1 -185
  10. package/brain/runtime/memory/conversation_state.py +1 -1823
  11. package/brain/runtime/memory/distill.py +1 -344
  12. package/brain/runtime/memory/embedding_engine.py +1 -92
  13. package/brain/runtime/memory/freshness.py +1 -112
  14. package/brain/runtime/memory/health.py +1 -40
  15. package/brain/runtime/memory/integrity.py +1 -186
  16. package/brain/runtime/memory/memory_consolidation.py +1 -58
  17. package/brain/runtime/memory/memory_links.py +1 -107
  18. package/brain/runtime/memory/memory_salience.py +1 -233
  19. package/brain/runtime/memory/memory_synthesis.py +1 -31
  20. package/brain/runtime/memory/memory_taxonomy.py +1 -33
  21. package/brain/runtime/memory/pondering_engine.py +1 -654
  22. package/brain/runtime/memory/promote.py +1 -277
  23. package/brain/runtime/memory/provenance.py +1 -406
  24. package/brain/runtime/memory/reinforcement.py +1 -71
  25. package/brain/runtime/memory/retrieval.py +1 -210
  26. package/brain/runtime/memory/semantic_search.py +1 -64
  27. package/brain/runtime/memory/store.py +1 -429
  28. package/brain/runtime/memory/unresolved_state.py +1 -91
  29. package/brain/runtime/memory/vector_index.py +1 -323
  30. package/brain/runtime/model_roles.py +1 -9
  31. package/brain/runtime/model_router.py +1 -22
  32. package/brain/runtime/providers.py +1 -66
  33. package/brain/runtime/security/redaction.py +1 -12
  34. package/brain/runtime/state_store.py +1 -23
  35. package/brain/runtime/storage_paths.py +1 -39
  36. package/docs/architecture/memory.md +20 -24
  37. package/docs/release-checklist.md +19 -6
  38. package/docs/usage.md +33 -17
  39. package/index.ts +8 -1
  40. package/ocmemog/__init__.py +11 -0
  41. package/ocmemog/doctor.py +1255 -0
  42. package/ocmemog/runtime/__init__.py +18 -0
  43. package/ocmemog/runtime/_compat_bridge.py +28 -0
  44. package/ocmemog/runtime/config.py +35 -0
  45. package/ocmemog/runtime/identity.py +115 -0
  46. package/ocmemog/runtime/inference.py +164 -0
  47. package/ocmemog/runtime/instrumentation.py +20 -0
  48. package/ocmemog/runtime/memory/__init__.py +91 -0
  49. package/ocmemog/runtime/memory/api.py +1431 -0
  50. package/ocmemog/runtime/memory/candidate.py +192 -0
  51. package/ocmemog/runtime/memory/conversation_state.py +1831 -0
  52. package/ocmemog/runtime/memory/distill.py +282 -0
  53. package/ocmemog/runtime/memory/embedding_engine.py +151 -0
  54. package/ocmemog/runtime/memory/freshness.py +114 -0
  55. package/ocmemog/runtime/memory/health.py +57 -0
  56. package/ocmemog/runtime/memory/integrity.py +208 -0
  57. package/ocmemog/runtime/memory/memory_consolidation.py +60 -0
  58. package/ocmemog/runtime/memory/memory_links.py +109 -0
  59. package/ocmemog/runtime/memory/memory_salience.py +235 -0
  60. package/ocmemog/runtime/memory/memory_synthesis.py +33 -0
  61. package/ocmemog/runtime/memory/memory_taxonomy.py +35 -0
  62. package/ocmemog/runtime/memory/pondering_engine.py +681 -0
  63. package/ocmemog/runtime/memory/promote.py +279 -0
  64. package/ocmemog/runtime/memory/provenance.py +408 -0
  65. package/ocmemog/runtime/memory/reinforcement.py +73 -0
  66. package/ocmemog/runtime/memory/retrieval.py +224 -0
  67. package/ocmemog/runtime/memory/semantic_search.py +66 -0
  68. package/ocmemog/runtime/memory/store.py +433 -0
  69. package/ocmemog/runtime/memory/unresolved_state.py +93 -0
  70. package/ocmemog/runtime/memory/vector_index.py +411 -0
  71. package/ocmemog/runtime/model_roles.py +16 -0
  72. package/ocmemog/runtime/model_router.py +29 -0
  73. package/ocmemog/runtime/providers.py +79 -0
  74. package/ocmemog/runtime/roles.py +92 -0
  75. package/ocmemog/runtime/security/__init__.py +8 -0
  76. package/ocmemog/runtime/security/redaction.py +17 -0
  77. package/ocmemog/runtime/state_store.py +34 -0
  78. package/ocmemog/runtime/storage_paths.py +70 -0
  79. package/ocmemog/sidecar/app.py +311 -23
  80. package/ocmemog/sidecar/compat.py +50 -13
  81. package/ocmemog/sidecar/transcript_watcher.py +391 -190
  82. package/openclaw.plugin.json +4 -0
  83. package/package.json +1 -1
  84. package/scripts/ocmemog-backfill-vectors.py +5 -3
  85. package/scripts/ocmemog-continuity-benchmark.py +1 -1
  86. package/scripts/ocmemog-demo.py +1 -1
  87. package/scripts/ocmemog-doctor.py +15 -0
  88. package/scripts/ocmemog-install.sh +29 -7
  89. package/scripts/ocmemog-integrated-proof.py +373 -0
  90. package/scripts/ocmemog-reindex-vectors.py +5 -3
  91. package/scripts/ocmemog-release-check.sh +330 -0
  92. package/scripts/ocmemog-sidecar.sh +4 -2
  93. package/scripts/ocmemog-test-rig.py +5 -3
  94. package/brain/runtime/memory/artifacts.py +0 -33
  95. package/brain/runtime/memory/context_builder.py +0 -112
  96. package/brain/runtime/memory/interaction_memory.py +0 -57
  97. package/brain/runtime/memory/memory_gate.py +0 -38
  98. package/brain/runtime/memory/memory_graph.py +0 -54
  99. package/brain/runtime/memory/person_identity.py +0 -83
  100. package/brain/runtime/memory/person_memory.py +0 -138
  101. package/brain/runtime/memory/sentiment_memory.py +0 -67
  102. package/brain/runtime/memory/tool_catalog.py +0 -68
@@ -1,12 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import json
4
+ import sys
4
5
  import os
5
6
  import time
7
+ from collections import deque
8
+ import threading
6
9
  from pathlib import Path
7
10
  from typing import Optional
8
11
  from urllib import request as urlrequest
9
12
 
13
+ from ocmemog.runtime import state_store
14
+
10
15
  DEFAULT_ENDPOINT = "http://127.0.0.1:17891/memory/ingest_async"
11
16
  DEFAULT_GLOB = "*.log"
12
17
  DEFAULT_SESSION_GLOB = "*.jsonl"
@@ -30,6 +35,82 @@ DEFAULT_REINFORCE_NEGATIVE = [
30
35
  "disappointed",
31
36
  "frustrated",
32
37
  ]
38
+ WATCHER_ERROR_LOG = state_store.reports_dir() / "ocmemog_transcript_watcher_errors.jsonl"
39
+ _SHUTDOWN_TRACE = os.environ.get("OCMEMOG_SHUTDOWN_TIMING", "true").lower() in {"1", "true", "yes", "on"}
40
+ _WATCHER_REQUEST_TIMEOUT_SECONDS = 10.0
41
+ _WATCHER_SHUTDOWN_REQUEST_TIMEOUT_SECONDS = 1.0
42
+ _WATCHER_STOP_EVENT: threading.Event | None = None
43
+
44
+ try:
45
+ _WATCHER_REQUEST_TIMEOUT_SECONDS = float(os.environ.get("OCMEMOG_INGEST_REQUEST_TIMEOUT_SECONDS", "10"))
46
+ except Exception:
47
+ pass
48
+ try:
49
+ _WATCHER_SHUTDOWN_REQUEST_TIMEOUT_SECONDS = float(
50
+ os.environ.get("OCMEMOG_SHUTDOWN_INGEST_REQUEST_TIMEOUT_SECONDS", "1")
51
+ )
52
+ except Exception:
53
+ pass
54
+
55
+
56
+ def _watcher_timeout(stop_event: threading.Event | None) -> float:
57
+ timeout = _WATCHER_REQUEST_TIMEOUT_SECONDS
58
+ if stop_event is not None and stop_event.is_set():
59
+ timeout = min(timeout, _WATCHER_SHUTDOWN_REQUEST_TIMEOUT_SECONDS)
60
+ return max(0.05, timeout)
61
+
62
+
63
+ def _post_json_payload(endpoint: str, payload: dict, *, stop_event: threading.Event | None, kind: str) -> bool:
64
+ data = json.dumps(payload).encode("utf-8")
65
+ req = urlrequest.Request(endpoint, data=data, method="POST")
66
+ req.add_header("Content-Type", "application/json")
67
+ _apply_auth_headers(req)
68
+ timeout = _watcher_timeout(stop_event)
69
+ start = time.perf_counter()
70
+ status = "ok"
71
+ try:
72
+ with urlrequest.urlopen(req, timeout=timeout) as resp:
73
+ resp.read()
74
+ return True
75
+ except Exception as exc:
76
+ status = f"error={type(exc).__name__}"
77
+ _log_watcher_error(kind, endpoint, payload, exc)
78
+ if _SHUTDOWN_TRACE:
79
+ print(
80
+ f"[ocmemog][watcher-request] {kind} failed timeout={timeout:.3f}s elapsed={time.perf_counter()-start:.3f}s",
81
+ file=sys.stderr,
82
+ )
83
+ return False
84
+ finally:
85
+ if _SHUTDOWN_TRACE:
86
+ elapsed = time.perf_counter() - start
87
+ if stop_event is None or not stop_event.is_set():
88
+ if elapsed >= timeout * 0.95:
89
+ print(
90
+ f"[ocmemog][watcher-request] {kind} timeout={timeout:.3f}s elapsed={elapsed:.3f}s status={status}",
91
+ file=sys.stderr,
92
+ )
93
+ else:
94
+ print(
95
+ f"[ocmemog][watcher-request] {kind} timeout={timeout:.3f}s elapsed={elapsed:.3f}s status={status}",
96
+ file=sys.stderr,
97
+ )
98
+
99
+
100
+ def _log_watcher_error(kind: str, endpoint: str, payload: dict, exc: Exception) -> None:
101
+ try:
102
+ WATCHER_ERROR_LOG.parent.mkdir(parents=True, exist_ok=True)
103
+ with WATCHER_ERROR_LOG.open("a", encoding="utf-8") as handle:
104
+ handle.write(json.dumps({
105
+ "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
106
+ "kind": kind,
107
+ "endpoint": endpoint,
108
+ "error_type": type(exc).__name__,
109
+ "error": str(exc),
110
+ "payload_preview": str(payload)[:500],
111
+ }, ensure_ascii=False) + "\n")
112
+ except Exception:
113
+ return
33
114
 
34
115
 
35
116
  def _pick_latest(path: Path, pattern: str) -> Optional[Path]:
@@ -41,30 +122,22 @@ def _pick_latest(path: Path, pattern: str) -> Optional[Path]:
41
122
  return files[-1] if files else None
42
123
 
43
124
 
44
- def _post_ingest(endpoint: str, payload: dict) -> None:
45
- data = json.dumps(payload).encode("utf-8")
46
- req = urlrequest.Request(endpoint, data=data, method="POST")
47
- req.add_header("Content-Type", "application/json")
48
- try:
49
- with urlrequest.urlopen(req, timeout=10) as resp:
50
- resp.read()
51
- except Exception:
52
- return
125
+ def _apply_auth_headers(req: urlrequest.Request) -> None:
126
+ token = os.environ.get("OCMEMOG_API_TOKEN", "").strip()
127
+ if token:
128
+ req.add_header("x-ocmemog-token", token)
53
129
 
54
130
 
55
- def _post_json(endpoint: str, payload: dict) -> None:
56
- data = json.dumps(payload).encode("utf-8")
57
- req = urlrequest.Request(endpoint, data=data, method="POST")
58
- req.add_header("Content-Type", "application/json")
59
- try:
60
- with urlrequest.urlopen(req, timeout=10) as resp:
61
- resp.read()
62
- except Exception:
63
- return
131
+ def _post_ingest(endpoint: str, payload: dict, *, stop_event: threading.Event | None = None) -> bool:
132
+ return _post_json_payload(endpoint, payload, stop_event=stop_event, kind="ingest")
133
+
134
+
135
+ def _post_json(endpoint: str, payload: dict, *, stop_event: threading.Event | None = None) -> bool:
136
+ return _post_json_payload(endpoint, payload, stop_event=stop_event, kind="json")
64
137
 
65
138
 
66
- def _post_turn(endpoint: str, payload: dict) -> None:
67
- _post_json(endpoint, payload)
139
+ def _post_turn(endpoint: str, payload: dict, *, stop_event: threading.Event | None = None) -> bool:
140
+ return _post_json(endpoint, payload, stop_event=stop_event)
68
141
 
69
142
 
70
143
  def _extract_user_text(text: str) -> str:
@@ -101,6 +174,21 @@ def _extract_conversation_info(text: str) -> dict:
101
174
  return payload if isinstance(payload, dict) else {}
102
175
 
103
176
 
177
+ def _extract_message_text(content: object) -> str:
178
+ if isinstance(content, list):
179
+ parts: list[str] = []
180
+ for item in content:
181
+ if not isinstance(item, dict):
182
+ continue
183
+ if item.get("type") != "text":
184
+ continue
185
+ text = str(item.get("text") or "").strip()
186
+ if text:
187
+ parts.append(text)
188
+ return "\n".join(parts)
189
+ return str(content or "")
190
+
191
+
104
192
  def _parse_transcript_line(text: str) -> tuple[Optional[str], str]:
105
193
  stripped = text.strip()
106
194
  if not stripped:
@@ -122,17 +210,22 @@ def _count_lines(path: Path) -> int:
122
210
 
123
211
 
124
212
 
125
- def _append_transcript(transcripts_dir: Path, timestamp: str, role: str, text: str) -> tuple[Path, int]:
126
- date = timestamp.split("T")[0] if "T" in timestamp else time.strftime("%Y-%m-%d")
127
- path = transcripts_dir / f"{date}.log"
128
- transcripts_dir.mkdir(parents=True, exist_ok=True)
213
+ def _append_transcript(transcript_target: Path, timestamp: str, role: str, text: str) -> tuple[Path, int]:
214
+ if transcript_target.suffix:
215
+ path = transcript_target
216
+ path.parent.mkdir(parents=True, exist_ok=True)
217
+ else:
218
+ date = timestamp.split("T")[0] if "T" in timestamp else time.strftime("%Y-%m-%d")
219
+ path = transcript_target / f"{date}.log"
220
+ transcript_target.mkdir(parents=True, exist_ok=True)
129
221
  line_no = _count_lines(path) + 1
130
222
  with path.open("a", encoding="utf-8") as handle:
131
223
  handle.write(f"{timestamp} [{role}] {text}\n")
132
224
  return path, line_no
133
225
 
134
226
 
135
- def watch_forever() -> None:
227
+ def watch_forever(stop_event: Optional[threading.Event] = None) -> None:
228
+ global _WATCHER_STOP_EVENT
136
229
  transcript_path = os.environ.get("OCMEMOG_TRANSCRIPT_PATH", "").strip()
137
230
  transcript_dir = os.environ.get("OCMEMOG_TRANSCRIPT_DIR", "").strip()
138
231
  glob_pattern = os.environ.get("OCMEMOG_TRANSCRIPT_GLOB", DEFAULT_GLOB)
@@ -185,8 +278,17 @@ def watch_forever() -> None:
185
278
  transcript_end_line: Optional[int] = None
186
279
  session_start_line: Optional[int] = None
187
280
  session_end_line: Optional[int] = None
281
+ recent_session_transcript_lines: deque[tuple[str, int]] = deque(maxlen=max(batch_max * 8, 128))
282
+ pending_session_turns: dict[tuple[str, int], dict[str, object]] = {}
188
283
  last_transcript_flush = time.time()
189
284
  last_session_flush = time.time()
285
+ stopper: threading.Event
286
+ if isinstance(stop_event, threading.Event):
287
+ stopper = stop_event
288
+ else:
289
+ stopper = threading.Event()
290
+ stopper.clear()
291
+ _WATCHER_STOP_EVENT = stopper
190
292
 
191
293
  def _flush_buffer(
192
294
  buffer: list[str],
@@ -196,9 +298,12 @@ def watch_forever() -> None:
196
298
  timestamp: Optional[str],
197
299
  start_line: Optional[int],
198
300
  end_line: Optional[int],
199
- ) -> None:
301
+ stop_event: threading.Event,
302
+ ) -> bool:
200
303
  if not buffer:
201
- return
304
+ return True
305
+ if stop_event.is_set():
306
+ return False
202
307
  payload = {
203
308
  "content": "\n".join(buffer),
204
309
  "kind": kind,
@@ -213,8 +318,10 @@ def watch_forever() -> None:
213
318
  payload["transcript_end_offset"] = end_line
214
319
  if timestamp:
215
320
  payload["timestamp"] = timestamp.replace("T", " ")[:19]
216
- _post_ingest(endpoint, payload)
217
- buffer.clear()
321
+ ok = _post_ingest(endpoint, payload, stop_event=stop_event)
322
+ if ok:
323
+ buffer.clear()
324
+ return ok
218
325
 
219
326
  def _maybe_reinforce(text: str, timestamp: str) -> None:
220
327
  if not reinforce_enabled:
@@ -231,7 +338,7 @@ def watch_forever() -> None:
231
338
  "source_module": "sentiment",
232
339
  "note": text,
233
340
  }
234
- _post_json(reinforce_endpoint, payload)
341
+ _post_json(reinforce_endpoint, payload, stop_event=stopper)
235
342
  elif any(term in lowered for term in negative_terms):
236
343
  payload = {
237
344
  "task_id": f"feedback:{timestamp}",
@@ -243,183 +350,277 @@ def watch_forever() -> None:
243
350
  "source_module": "sentiment",
244
351
  "note": text,
245
352
  }
246
- _post_json(reinforce_endpoint, payload)
247
-
248
- while True:
249
- # 1) Watch transcript logs (if any)
250
- latest = _pick_latest(transcript_target, glob_pattern)
251
- if latest is not None:
252
- if current_file is None or latest != current_file:
253
- current_file = latest
254
- position = 0
255
- current_line_number = 0
256
- if start_at_end:
257
- try:
258
- position = current_file.stat().st_size
259
- except Exception:
260
- position = 0
261
- try:
262
- current_line_number = _count_lines(current_file)
263
- except Exception:
264
- current_line_number = 0
265
-
266
- try:
267
- with current_file.open("r", encoding="utf-8", errors="ignore") as handle:
268
- handle.seek(position)
269
- for line in handle:
270
- text = line.rstrip("\n")
271
- current_line_number += 1
272
- if not text.strip():
273
- continue
274
- transcript_buffer.append(text)
275
- transcript_last_path = current_file
276
- if transcript_start_line is None:
277
- transcript_start_line = current_line_number
278
- transcript_end_line = current_line_number
279
- timestamp_value = None
280
- if text and " " in text:
281
- timestamp_value = text.split(" ", 1)[0]
282
- transcript_last_timestamp = timestamp_value
283
- role, turn_text = _parse_transcript_line(text)
284
- if role and turn_text:
285
- _post_turn(
286
- turn_endpoint,
287
- {
288
- "role": role,
289
- "content": turn_text,
290
- "source": source,
291
- "transcript_path": str(current_file),
292
- "transcript_offset": current_line_number,
293
- "transcript_end_offset": current_line_number,
294
- "timestamp": timestamp_value.replace("T", " ")[:19] if timestamp_value else None,
295
- },
296
- )
297
- if len(transcript_buffer) >= batch_max:
298
- _flush_buffer(
299
- transcript_buffer,
300
- source_label=source,
301
- transcript_path=transcript_last_path,
302
- timestamp=transcript_last_timestamp,
303
- start_line=transcript_start_line,
304
- end_line=transcript_end_line,
305
- )
306
- transcript_start_line = None
307
- transcript_end_line = None
308
- last_transcript_flush = time.time()
309
- position = handle.tell()
310
- except Exception:
311
- pass
312
-
313
- # 2) Watch OpenClaw session jsonl (verbatim capture)
314
- session_latest = _pick_latest(session_target, session_glob)
315
- if session_latest is not None:
316
- if session_file is None or session_latest != session_file:
317
- session_file = session_latest
318
- session_pos = 0
319
- if start_at_end:
320
- try:
321
- session_pos = session_file.stat().st_size
322
- except Exception:
323
- session_pos = 0
324
- try:
325
- with session_file.open("r", encoding="utf-8", errors="ignore") as handle:
326
- handle.seek(session_pos)
327
- for line in handle:
353
+ _post_json(reinforce_endpoint, payload, stop_event=stopper)
354
+
355
+ try:
356
+ while not stopper.is_set():
357
+ # 1) Watch transcript logs (if any)
358
+ latest = _pick_latest(transcript_target, glob_pattern)
359
+ if latest is not None:
360
+ if current_file is None or latest != current_file:
361
+ current_file = latest
362
+ position = 0
363
+ current_line_number = 0
364
+ if start_at_end:
365
+ try:
366
+ position = current_file.stat().st_size
367
+ except Exception:
368
+ position = 0
328
369
  try:
329
- entry = json.loads(line)
370
+ current_line_number = _count_lines(current_file)
330
371
  except Exception:
331
- continue
332
- if entry.get("type") != "message":
333
- continue
334
- msg = entry.get("message") or {}
335
- role = msg.get("role")
336
- if role not in {"user", "assistant"}:
337
- continue
338
- content = msg.get("content")
339
- if isinstance(content, list):
340
- text = next((c.get("text") for c in content if c.get("type") == "text"), "")
341
- else:
342
- text = content or ""
343
- text = str(text).strip()
344
- conversation_info = _extract_conversation_info(text)
345
- if role == "user":
346
- text = _extract_user_text(text)
347
- text = text.replace("\n", " ").strip()
348
- if not text:
349
- continue
350
- timestamp = entry.get("timestamp") or time.strftime("%Y-%m-%dT%H:%M:%S")
351
- if role == "user":
352
- _maybe_reinforce(text, timestamp)
353
- transcript_path, transcript_line_no = _append_transcript(transcript_target, timestamp, role, text)
354
- session_id = session_file.stem if session_file is not None else None
355
- message_id = entry.get("id") or conversation_info.get("message_id")
356
- conversation_id = conversation_info.get("conversation_id") or session_id
357
- thread_id = conversation_info.get("thread_id") or session_id
358
- _post_turn(
359
- turn_endpoint,
360
- {
361
- "role": role,
362
- "content": text,
363
- "conversation_id": conversation_id,
364
- "session_id": session_id,
365
- "thread_id": thread_id,
366
- "message_id": message_id,
367
- "source": "session",
368
- "transcript_path": str(transcript_path),
369
- "transcript_offset": transcript_line_no,
370
- "transcript_end_offset": transcript_line_no,
371
- "timestamp": timestamp.replace("T", " ")[:19],
372
- "metadata": {
373
- "parent_message_id": entry.get("parentId"),
374
- },
375
- },
376
- )
377
- session_buffer.append(f"{timestamp} [{role}] {text}")
378
- session_last_path = transcript_path
379
- session_last_timestamp = timestamp
380
- if session_start_line is None:
381
- session_start_line = transcript_line_no
382
- session_end_line = transcript_line_no
383
- if len(session_buffer) >= batch_max:
384
- _flush_buffer(
385
- session_buffer,
386
- source_label="session",
387
- transcript_path=session_last_path,
388
- timestamp=session_last_timestamp,
389
- start_line=session_start_line,
390
- end_line=session_end_line,
391
- )
392
- session_start_line = None
393
- session_end_line = None
394
- last_session_flush = time.time()
395
- session_pos = handle.tell()
396
- except Exception:
397
- pass
372
+ current_line_number = 0
373
+
374
+ try:
375
+ with current_file.open("r", encoding="utf-8", errors="ignore") as handle:
376
+ handle.seek(position)
377
+ committed_position = position
378
+ committed_line_number = current_line_number
379
+ while True:
380
+ if stopper.is_set():
381
+ break
382
+ line_start = handle.tell()
383
+ line = handle.readline()
384
+ if not line:
385
+ position = committed_position
386
+ current_line_number = committed_line_number
387
+ break
388
+ text = line.rstrip("\n")
389
+ next_line_number = committed_line_number + 1
390
+ if not text.strip():
391
+ committed_position = handle.tell()
392
+ committed_line_number = next_line_number
393
+ position = committed_position
394
+ current_line_number = committed_line_number
395
+ continue
396
+ current_marker = (str(current_file), next_line_number)
397
+ if current_marker in recent_session_transcript_lines:
398
+ committed_position = handle.tell()
399
+ committed_line_number = next_line_number
400
+ position = committed_position
401
+ current_line_number = committed_line_number
402
+ continue
403
+ transcript_buffer.append(text)
404
+ transcript_last_path = current_file
405
+ if transcript_start_line is None:
406
+ transcript_start_line = next_line_number
407
+ transcript_end_line = next_line_number
408
+ timestamp_value = None
409
+ if text and " " in text:
410
+ timestamp_value = text.split(" ", 1)[0]
411
+ transcript_last_timestamp = timestamp_value
412
+ role, turn_text = _parse_transcript_line(text)
413
+ if role and turn_text:
414
+ if stopper.is_set():
415
+ break
416
+ ok = _post_turn(
417
+ turn_endpoint,
418
+ {
419
+ "role": role,
420
+ "content": turn_text,
421
+ "source": source,
422
+ "transcript_path": str(current_file),
423
+ "transcript_offset": next_line_number,
424
+ "transcript_end_offset": next_line_number,
425
+ "timestamp": timestamp_value.replace("T", " ")[:19] if timestamp_value else None,
426
+ },
427
+ stop_event=stopper,
428
+ )
429
+ if not ok:
430
+ if transcript_buffer:
431
+ transcript_buffer.pop()
432
+ if transcript_start_line == next_line_number:
433
+ transcript_start_line = None
434
+ transcript_end_line = committed_line_number if transcript_start_line is not None else None
435
+ position = line_start
436
+ current_line_number = committed_line_number
437
+ break
438
+ if len(transcript_buffer) >= batch_max:
439
+ ok = _flush_buffer(
440
+ transcript_buffer,
441
+ source_label=source,
442
+ transcript_path=transcript_last_path,
443
+ timestamp=transcript_last_timestamp,
444
+ start_line=transcript_start_line,
445
+ end_line=transcript_end_line,
446
+ stop_event=stopper,
447
+ )
448
+ if not ok:
449
+ position = line_start
450
+ current_line_number = committed_line_number
451
+ break
452
+ transcript_start_line = None
453
+ transcript_end_line = None
454
+ last_transcript_flush = time.time()
455
+ committed_position = handle.tell()
456
+ committed_line_number = next_line_number
457
+ position = committed_position
458
+ current_line_number = committed_line_number
459
+ except Exception:
460
+ pass
461
+
462
+ # 2) Watch OpenClaw session jsonl (verbatim capture)
463
+ session_latest = _pick_latest(session_target, session_glob)
464
+ if session_latest is not None:
465
+ if session_file is None or session_latest != session_file:
466
+ session_file = session_latest
467
+ session_pos = 0
468
+ if start_at_end:
469
+ try:
470
+ session_pos = session_file.stat().st_size
471
+ except Exception:
472
+ session_pos = 0
473
+ try:
474
+ with session_file.open("r", encoding="utf-8", errors="ignore") as handle:
475
+ handle.seek(session_pos)
476
+ committed_session_pos = session_pos
477
+ while True:
478
+ if stopper.is_set():
479
+ break
480
+ line_start = handle.tell()
481
+ line = handle.readline()
482
+ if not line:
483
+ session_pos = committed_session_pos
484
+ break
485
+ try:
486
+ entry = json.loads(line)
487
+ except Exception:
488
+ committed_session_pos = handle.tell()
489
+ session_pos = committed_session_pos
490
+ continue
491
+ if entry.get("type") != "message":
492
+ committed_session_pos = handle.tell()
493
+ session_pos = committed_session_pos
494
+ continue
495
+ msg = entry.get("message") or {}
496
+ role = msg.get("role")
497
+ if role not in {"user", "assistant"}:
498
+ committed_session_pos = handle.tell()
499
+ session_pos = committed_session_pos
500
+ continue
501
+ content = msg.get("content")
502
+ text = _extract_message_text(content).strip()
503
+ conversation_info = _extract_conversation_info(text)
504
+ if role == "user":
505
+ text = _extract_user_text(text)
506
+ text = text.replace("\n", " ").strip()
507
+ if not text:
508
+ committed_session_pos = handle.tell()
509
+ session_pos = committed_session_pos
510
+ continue
511
+ timestamp = entry.get("timestamp") or time.strftime("%Y-%m-%dT%H:%M:%S")
512
+ if role == "user":
513
+ _maybe_reinforce(text, timestamp)
514
+ session_id = session_file.stem if session_file is not None else None
515
+ message_id = entry.get("id") or conversation_info.get("message_id")
516
+ conversation_id = conversation_info.get("conversation_id") or session_id
517
+ thread_id = conversation_info.get("thread_id") or session_id
518
+ transcript_line = f"{timestamp} [{role}] {text}"
519
+ retry_key = (str(session_file), line_start)
520
+ pending = pending_session_turns.get(retry_key)
521
+ if pending is None:
522
+ transcript_path, transcript_line_no = _append_transcript(transcript_target, timestamp, role, text)
523
+ turn_payload = {
524
+ "role": role,
525
+ "content": text,
526
+ "conversation_id": conversation_id,
527
+ "session_id": session_id,
528
+ "thread_id": thread_id,
529
+ "message_id": message_id,
530
+ "source": "session",
531
+ "timestamp": timestamp.replace("T", " ")[:19],
532
+ "transcript_path": str(transcript_path),
533
+ "transcript_offset": transcript_line_no,
534
+ "transcript_end_offset": transcript_line_no,
535
+ "metadata": {
536
+ "parent_message_id": entry.get("parentId"),
537
+ },
538
+ }
539
+ pending_session_turns[retry_key] = {
540
+ "payload": dict(turn_payload),
541
+ "transcript_line": transcript_line,
542
+ "transcript_path": transcript_path,
543
+ "transcript_line_no": transcript_line_no,
544
+ }
545
+ else:
546
+ turn_payload = dict(pending["payload"])
547
+ transcript_line = str(pending["transcript_line"])
548
+ transcript_path = Path(str(pending["transcript_path"]))
549
+ transcript_line_no = int(pending["transcript_line_no"])
550
+ if stopper.is_set():
551
+ break
552
+ if not _post_turn(turn_endpoint, turn_payload, stop_event=stopper):
553
+ session_pos = line_start
554
+ break
555
+ pending_session_turns.pop(retry_key, None)
556
+ recent_session_transcript_lines.append((str(transcript_path), transcript_line_no))
557
+ session_buffer.append(transcript_line)
558
+ session_last_path = transcript_path
559
+ session_last_timestamp = timestamp
560
+ if session_start_line is None:
561
+ session_start_line = transcript_line_no
562
+ session_end_line = transcript_line_no
563
+ if len(session_buffer) >= batch_max:
564
+ ok = _flush_buffer(
565
+ session_buffer,
566
+ source_label="session",
567
+ transcript_path=session_last_path,
568
+ timestamp=session_last_timestamp,
569
+ start_line=session_start_line,
570
+ end_line=session_end_line,
571
+ stop_event=stopper,
572
+ )
573
+ if not ok:
574
+ session_pos = line_start
575
+ break
576
+ session_start_line = None
577
+ session_end_line = None
578
+ last_session_flush = time.time()
579
+ committed_session_pos = handle.tell()
580
+ session_pos = committed_session_pos
581
+ except Exception:
582
+ pass
398
583
 
399
584
  now = time.time()
400
585
  if transcript_buffer and (now - last_transcript_flush) >= batch_seconds:
401
- _flush_buffer(
586
+ ok = _flush_buffer(
402
587
  transcript_buffer,
403
588
  source_label=source,
404
589
  transcript_path=transcript_last_path,
405
590
  timestamp=transcript_last_timestamp,
406
591
  start_line=transcript_start_line,
407
592
  end_line=transcript_end_line,
593
+ stop_event=stopper,
408
594
  )
409
- transcript_start_line = None
410
- transcript_end_line = None
411
- last_transcript_flush = now
595
+ if ok:
596
+ transcript_start_line = None
597
+ transcript_end_line = None
598
+ last_transcript_flush = now
412
599
  if session_buffer and (now - last_session_flush) >= batch_seconds:
413
- _flush_buffer(
600
+ ok = _flush_buffer(
414
601
  session_buffer,
415
602
  source_label="session",
416
603
  transcript_path=session_last_path,
417
604
  timestamp=session_last_timestamp,
418
605
  start_line=session_start_line,
419
606
  end_line=session_end_line,
607
+ stop_event=stopper,
420
608
  )
421
- session_start_line = None
422
- session_end_line = None
423
- last_session_flush = now
424
-
425
- time.sleep(poll_seconds)
609
+ if ok:
610
+ session_start_line = None
611
+ session_end_line = None
612
+ last_session_flush = now
613
+
614
+ poll_started = time.perf_counter()
615
+ if stopper.wait(poll_seconds):
616
+ if _SHUTDOWN_TRACE:
617
+ print(
618
+ f"[ocmemog][watcher-poll] stop_wait timeout={poll_seconds:.3f}s elapsed={time.perf_counter()-poll_started:.3f}s",
619
+ file=sys.stderr,
620
+ )
621
+ return
622
+ finally:
623
+ _WATCHER_STOP_EVENT = None
624
+ if _SHUTDOWN_TRACE:
625
+ print("[ocmemog][watcher] shutdown loop exiting", file=sys.stderr)
626
+ # no return value