@simbimbo/memory-ocmemog 0.1.11 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/CHANGELOG.md +16 -0
  2. package/README.md +83 -18
  3. package/brain/runtime/__init__.py +2 -12
  4. package/brain/runtime/config.py +1 -24
  5. package/brain/runtime/inference.py +1 -151
  6. package/brain/runtime/instrumentation.py +1 -15
  7. package/brain/runtime/memory/__init__.py +3 -13
  8. package/brain/runtime/memory/api.py +1 -1219
  9. package/brain/runtime/memory/candidate.py +1 -185
  10. package/brain/runtime/memory/conversation_state.py +1 -1823
  11. package/brain/runtime/memory/distill.py +1 -344
  12. package/brain/runtime/memory/embedding_engine.py +1 -92
  13. package/brain/runtime/memory/freshness.py +1 -112
  14. package/brain/runtime/memory/health.py +1 -40
  15. package/brain/runtime/memory/integrity.py +1 -186
  16. package/brain/runtime/memory/memory_consolidation.py +1 -58
  17. package/brain/runtime/memory/memory_links.py +1 -107
  18. package/brain/runtime/memory/memory_salience.py +1 -233
  19. package/brain/runtime/memory/memory_synthesis.py +1 -31
  20. package/brain/runtime/memory/memory_taxonomy.py +1 -33
  21. package/brain/runtime/memory/pondering_engine.py +1 -654
  22. package/brain/runtime/memory/promote.py +1 -277
  23. package/brain/runtime/memory/provenance.py +1 -406
  24. package/brain/runtime/memory/reinforcement.py +1 -71
  25. package/brain/runtime/memory/retrieval.py +1 -210
  26. package/brain/runtime/memory/semantic_search.py +1 -64
  27. package/brain/runtime/memory/store.py +1 -429
  28. package/brain/runtime/memory/unresolved_state.py +1 -91
  29. package/brain/runtime/memory/vector_index.py +1 -323
  30. package/brain/runtime/model_roles.py +1 -9
  31. package/brain/runtime/model_router.py +1 -22
  32. package/brain/runtime/providers.py +1 -66
  33. package/brain/runtime/security/redaction.py +1 -12
  34. package/brain/runtime/state_store.py +1 -23
  35. package/brain/runtime/storage_paths.py +1 -39
  36. package/docs/architecture/memory.md +20 -24
  37. package/docs/release-checklist.md +19 -6
  38. package/docs/usage.md +33 -17
  39. package/index.ts +8 -1
  40. package/ocmemog/__init__.py +11 -0
  41. package/ocmemog/doctor.py +1255 -0
  42. package/ocmemog/runtime/__init__.py +18 -0
  43. package/ocmemog/runtime/_compat_bridge.py +28 -0
  44. package/ocmemog/runtime/config.py +35 -0
  45. package/ocmemog/runtime/identity.py +115 -0
  46. package/ocmemog/runtime/inference.py +164 -0
  47. package/ocmemog/runtime/instrumentation.py +20 -0
  48. package/ocmemog/runtime/memory/__init__.py +91 -0
  49. package/ocmemog/runtime/memory/api.py +1431 -0
  50. package/ocmemog/runtime/memory/candidate.py +192 -0
  51. package/ocmemog/runtime/memory/conversation_state.py +1831 -0
  52. package/ocmemog/runtime/memory/distill.py +282 -0
  53. package/ocmemog/runtime/memory/embedding_engine.py +151 -0
  54. package/ocmemog/runtime/memory/freshness.py +114 -0
  55. package/ocmemog/runtime/memory/health.py +57 -0
  56. package/ocmemog/runtime/memory/integrity.py +208 -0
  57. package/ocmemog/runtime/memory/memory_consolidation.py +60 -0
  58. package/ocmemog/runtime/memory/memory_links.py +109 -0
  59. package/ocmemog/runtime/memory/memory_salience.py +235 -0
  60. package/ocmemog/runtime/memory/memory_synthesis.py +33 -0
  61. package/ocmemog/runtime/memory/memory_taxonomy.py +35 -0
  62. package/ocmemog/runtime/memory/pondering_engine.py +681 -0
  63. package/ocmemog/runtime/memory/promote.py +279 -0
  64. package/ocmemog/runtime/memory/provenance.py +408 -0
  65. package/ocmemog/runtime/memory/reinforcement.py +73 -0
  66. package/ocmemog/runtime/memory/retrieval.py +224 -0
  67. package/ocmemog/runtime/memory/semantic_search.py +66 -0
  68. package/ocmemog/runtime/memory/store.py +433 -0
  69. package/ocmemog/runtime/memory/unresolved_state.py +93 -0
  70. package/ocmemog/runtime/memory/vector_index.py +411 -0
  71. package/ocmemog/runtime/model_roles.py +16 -0
  72. package/ocmemog/runtime/model_router.py +29 -0
  73. package/ocmemog/runtime/providers.py +79 -0
  74. package/ocmemog/runtime/roles.py +92 -0
  75. package/ocmemog/runtime/security/__init__.py +8 -0
  76. package/ocmemog/runtime/security/redaction.py +17 -0
  77. package/ocmemog/runtime/state_store.py +34 -0
  78. package/ocmemog/runtime/storage_paths.py +70 -0
  79. package/ocmemog/sidecar/app.py +310 -23
  80. package/ocmemog/sidecar/compat.py +50 -13
  81. package/ocmemog/sidecar/transcript_watcher.py +318 -240
  82. package/openclaw.plugin.json +4 -0
  83. package/package.json +1 -1
  84. package/scripts/ocmemog-backfill-vectors.py +5 -3
  85. package/scripts/ocmemog-continuity-benchmark.py +1 -1
  86. package/scripts/ocmemog-demo.py +1 -1
  87. package/scripts/ocmemog-doctor.py +15 -0
  88. package/scripts/ocmemog-install.sh +29 -7
  89. package/scripts/ocmemog-integrated-proof.py +373 -0
  90. package/scripts/ocmemog-reindex-vectors.py +5 -3
  91. package/scripts/ocmemog-release-check.sh +330 -0
  92. package/scripts/ocmemog-sidecar.sh +4 -2
  93. package/scripts/ocmemog-test-rig.py +5 -3
  94. package/brain/runtime/memory/artifacts.py +0 -33
  95. package/brain/runtime/memory/context_builder.py +0 -112
  96. package/brain/runtime/memory/interaction_memory.py +0 -57
  97. package/brain/runtime/memory/memory_gate.py +0 -38
  98. package/brain/runtime/memory/memory_graph.py +0 -54
  99. package/brain/runtime/memory/person_identity.py +0 -83
  100. package/brain/runtime/memory/person_memory.py +0 -138
  101. package/brain/runtime/memory/sentiment_memory.py +0 -67
  102. package/brain/runtime/memory/tool_catalog.py +0 -68
@@ -1,14 +1,16 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import json
4
+ import sys
4
5
  import os
5
6
  import time
6
7
  from collections import deque
8
+ import threading
7
9
  from pathlib import Path
8
10
  from typing import Optional
9
11
  from urllib import request as urlrequest
10
12
 
11
- from brain.runtime import state_store
13
+ from ocmemog.runtime import state_store
12
14
 
13
15
  DEFAULT_ENDPOINT = "http://127.0.0.1:17891/memory/ingest_async"
14
16
  DEFAULT_GLOB = "*.log"
@@ -34,6 +36,65 @@ DEFAULT_REINFORCE_NEGATIVE = [
34
36
  "frustrated",
35
37
  ]
36
38
  WATCHER_ERROR_LOG = state_store.reports_dir() / "ocmemog_transcript_watcher_errors.jsonl"
39
+ _SHUTDOWN_TRACE = os.environ.get("OCMEMOG_SHUTDOWN_TIMING", "true").lower() in {"1", "true", "yes", "on"}
40
+ _WATCHER_REQUEST_TIMEOUT_SECONDS = 10.0
41
+ _WATCHER_SHUTDOWN_REQUEST_TIMEOUT_SECONDS = 1.0
42
+ _WATCHER_STOP_EVENT: threading.Event | None = None
43
+
44
+ try:
45
+ _WATCHER_REQUEST_TIMEOUT_SECONDS = float(os.environ.get("OCMEMOG_INGEST_REQUEST_TIMEOUT_SECONDS", "10"))
46
+ except Exception:
47
+ pass
48
+ try:
49
+ _WATCHER_SHUTDOWN_REQUEST_TIMEOUT_SECONDS = float(
50
+ os.environ.get("OCMEMOG_SHUTDOWN_INGEST_REQUEST_TIMEOUT_SECONDS", "1")
51
+ )
52
+ except Exception:
53
+ pass
54
+
55
+
56
+ def _watcher_timeout(stop_event: threading.Event | None) -> float:
57
+ timeout = _WATCHER_REQUEST_TIMEOUT_SECONDS
58
+ if stop_event is not None and stop_event.is_set():
59
+ timeout = min(timeout, _WATCHER_SHUTDOWN_REQUEST_TIMEOUT_SECONDS)
60
+ return max(0.05, timeout)
61
+
62
+
63
+ def _post_json_payload(endpoint: str, payload: dict, *, stop_event: threading.Event | None, kind: str) -> bool:
64
+ data = json.dumps(payload).encode("utf-8")
65
+ req = urlrequest.Request(endpoint, data=data, method="POST")
66
+ req.add_header("Content-Type", "application/json")
67
+ _apply_auth_headers(req)
68
+ timeout = _watcher_timeout(stop_event)
69
+ start = time.perf_counter()
70
+ status = "ok"
71
+ try:
72
+ with urlrequest.urlopen(req, timeout=timeout) as resp:
73
+ resp.read()
74
+ return True
75
+ except Exception as exc:
76
+ status = f"error={type(exc).__name__}"
77
+ _log_watcher_error(kind, endpoint, payload, exc)
78
+ if _SHUTDOWN_TRACE:
79
+ print(
80
+ f"[ocmemog][watcher-request] {kind} failed timeout={timeout:.3f}s elapsed={time.perf_counter()-start:.3f}s",
81
+ file=sys.stderr,
82
+ )
83
+ return False
84
+ finally:
85
+ if _SHUTDOWN_TRACE:
86
+ elapsed = time.perf_counter() - start
87
+ if stop_event is None or not stop_event.is_set():
88
+ if elapsed >= timeout * 0.95:
89
+ print(
90
+ f"[ocmemog][watcher-request] {kind} timeout={timeout:.3f}s elapsed={elapsed:.3f}s status={status}",
91
+ file=sys.stderr,
92
+ )
93
+ else:
94
+ print(
95
+ f"[ocmemog][watcher-request] {kind} timeout={timeout:.3f}s elapsed={elapsed:.3f}s status={status}",
96
+ file=sys.stderr,
97
+ )
37
98
 
38
99
 
39
100
  def _log_watcher_error(kind: str, endpoint: str, payload: dict, exc: Exception) -> None:
@@ -67,36 +128,16 @@ def _apply_auth_headers(req: urlrequest.Request) -> None:
67
128
  req.add_header("x-ocmemog-token", token)
68
129
 
69
130
 
70
- def _post_ingest(endpoint: str, payload: dict) -> bool:
71
- data = json.dumps(payload).encode("utf-8")
72
- req = urlrequest.Request(endpoint, data=data, method="POST")
73
- req.add_header("Content-Type", "application/json")
74
- _apply_auth_headers(req)
75
- try:
76
- with urlrequest.urlopen(req, timeout=10) as resp:
77
- resp.read()
78
- return True
79
- except Exception as exc:
80
- _log_watcher_error("ingest", endpoint, payload, exc)
81
- return False
131
+ def _post_ingest(endpoint: str, payload: dict, *, stop_event: threading.Event | None = None) -> bool:
132
+ return _post_json_payload(endpoint, payload, stop_event=stop_event, kind="ingest")
82
133
 
83
134
 
84
- def _post_json(endpoint: str, payload: dict) -> bool:
85
- data = json.dumps(payload).encode("utf-8")
86
- req = urlrequest.Request(endpoint, data=data, method="POST")
87
- req.add_header("Content-Type", "application/json")
88
- _apply_auth_headers(req)
89
- try:
90
- with urlrequest.urlopen(req, timeout=10) as resp:
91
- resp.read()
92
- return True
93
- except Exception as exc:
94
- _log_watcher_error("json", endpoint, payload, exc)
95
- return False
135
+ def _post_json(endpoint: str, payload: dict, *, stop_event: threading.Event | None = None) -> bool:
136
+ return _post_json_payload(endpoint, payload, stop_event=stop_event, kind="json")
96
137
 
97
138
 
98
- def _post_turn(endpoint: str, payload: dict) -> bool:
99
- return _post_json(endpoint, payload)
139
+ def _post_turn(endpoint: str, payload: dict, *, stop_event: threading.Event | None = None) -> bool:
140
+ return _post_json(endpoint, payload, stop_event=stop_event)
100
141
 
101
142
 
102
143
  def _extract_user_text(text: str) -> str:
@@ -183,7 +224,8 @@ def _append_transcript(transcript_target: Path, timestamp: str, role: str, text:
183
224
  return path, line_no
184
225
 
185
226
 
186
- def watch_forever() -> None:
227
+ def watch_forever(stop_event: Optional[threading.Event] = None) -> None:
228
+ global _WATCHER_STOP_EVENT
187
229
  transcript_path = os.environ.get("OCMEMOG_TRANSCRIPT_PATH", "").strip()
188
230
  transcript_dir = os.environ.get("OCMEMOG_TRANSCRIPT_DIR", "").strip()
189
231
  glob_pattern = os.environ.get("OCMEMOG_TRANSCRIPT_GLOB", DEFAULT_GLOB)
@@ -240,6 +282,13 @@ def watch_forever() -> None:
240
282
  pending_session_turns: dict[tuple[str, int], dict[str, object]] = {}
241
283
  last_transcript_flush = time.time()
242
284
  last_session_flush = time.time()
285
+ stopper: threading.Event
286
+ if isinstance(stop_event, threading.Event):
287
+ stopper = stop_event
288
+ else:
289
+ stopper = threading.Event()
290
+ stopper.clear()
291
+ _WATCHER_STOP_EVENT = stopper
243
292
 
244
293
  def _flush_buffer(
245
294
  buffer: list[str],
@@ -249,9 +298,12 @@ def watch_forever() -> None:
249
298
  timestamp: Optional[str],
250
299
  start_line: Optional[int],
251
300
  end_line: Optional[int],
301
+ stop_event: threading.Event,
252
302
  ) -> bool:
253
303
  if not buffer:
254
304
  return True
305
+ if stop_event.is_set():
306
+ return False
255
307
  payload = {
256
308
  "content": "\n".join(buffer),
257
309
  "kind": kind,
@@ -266,7 +318,7 @@ def watch_forever() -> None:
266
318
  payload["transcript_end_offset"] = end_line
267
319
  if timestamp:
268
320
  payload["timestamp"] = timestamp.replace("T", " ")[:19]
269
- ok = _post_ingest(endpoint, payload)
321
+ ok = _post_ingest(endpoint, payload, stop_event=stop_event)
270
322
  if ok:
271
323
  buffer.clear()
272
324
  return ok
@@ -286,7 +338,7 @@ def watch_forever() -> None:
286
338
  "source_module": "sentiment",
287
339
  "note": text,
288
340
  }
289
- _post_json(reinforce_endpoint, payload)
341
+ _post_json(reinforce_endpoint, payload, stop_event=stopper)
290
342
  elif any(term in lowered for term in negative_terms):
291
343
  payload = {
292
344
  "task_id": f"feedback:{timestamp}",
@@ -298,224 +350,236 @@ def watch_forever() -> None:
298
350
  "source_module": "sentiment",
299
351
  "note": text,
300
352
  }
301
- _post_json(reinforce_endpoint, payload)
302
-
303
- while True:
304
- # 1) Watch transcript logs (if any)
305
- latest = _pick_latest(transcript_target, glob_pattern)
306
- if latest is not None:
307
- if current_file is None or latest != current_file:
308
- current_file = latest
309
- position = 0
310
- current_line_number = 0
311
- if start_at_end:
312
- try:
313
- position = current_file.stat().st_size
314
- except Exception:
315
- position = 0
316
- try:
317
- current_line_number = _count_lines(current_file)
318
- except Exception:
319
- current_line_number = 0
320
-
321
- try:
322
- with current_file.open("r", encoding="utf-8", errors="ignore") as handle:
323
- handle.seek(position)
324
- committed_position = position
325
- committed_line_number = current_line_number
326
- while True:
327
- line_start = handle.tell()
328
- line = handle.readline()
329
- if not line:
330
- position = committed_position
331
- current_line_number = committed_line_number
332
- break
333
- text = line.rstrip("\n")
334
- next_line_number = committed_line_number + 1
335
- if not text.strip():
336
- committed_position = handle.tell()
337
- committed_line_number = next_line_number
338
- position = committed_position
339
- current_line_number = committed_line_number
340
- continue
341
- current_marker = (str(current_file), next_line_number)
342
- if current_marker in recent_session_transcript_lines:
353
+ _post_json(reinforce_endpoint, payload, stop_event=stopper)
354
+
355
+ try:
356
+ while not stopper.is_set():
357
+ # 1) Watch transcript logs (if any)
358
+ latest = _pick_latest(transcript_target, glob_pattern)
359
+ if latest is not None:
360
+ if current_file is None or latest != current_file:
361
+ current_file = latest
362
+ position = 0
363
+ current_line_number = 0
364
+ if start_at_end:
365
+ try:
366
+ position = current_file.stat().st_size
367
+ except Exception:
368
+ position = 0
369
+ try:
370
+ current_line_number = _count_lines(current_file)
371
+ except Exception:
372
+ current_line_number = 0
373
+
374
+ try:
375
+ with current_file.open("r", encoding="utf-8", errors="ignore") as handle:
376
+ handle.seek(position)
377
+ committed_position = position
378
+ committed_line_number = current_line_number
379
+ while True:
380
+ if stopper.is_set():
381
+ break
382
+ line_start = handle.tell()
383
+ line = handle.readline()
384
+ if not line:
385
+ position = committed_position
386
+ current_line_number = committed_line_number
387
+ break
388
+ text = line.rstrip("\n")
389
+ next_line_number = committed_line_number + 1
390
+ if not text.strip():
391
+ committed_position = handle.tell()
392
+ committed_line_number = next_line_number
393
+ position = committed_position
394
+ current_line_number = committed_line_number
395
+ continue
396
+ current_marker = (str(current_file), next_line_number)
397
+ if current_marker in recent_session_transcript_lines:
398
+ committed_position = handle.tell()
399
+ committed_line_number = next_line_number
400
+ position = committed_position
401
+ current_line_number = committed_line_number
402
+ continue
403
+ transcript_buffer.append(text)
404
+ transcript_last_path = current_file
405
+ if transcript_start_line is None:
406
+ transcript_start_line = next_line_number
407
+ transcript_end_line = next_line_number
408
+ timestamp_value = None
409
+ if text and " " in text:
410
+ timestamp_value = text.split(" ", 1)[0]
411
+ transcript_last_timestamp = timestamp_value
412
+ role, turn_text = _parse_transcript_line(text)
413
+ if role and turn_text:
414
+ if stopper.is_set():
415
+ break
416
+ ok = _post_turn(
417
+ turn_endpoint,
418
+ {
419
+ "role": role,
420
+ "content": turn_text,
421
+ "source": source,
422
+ "transcript_path": str(current_file),
423
+ "transcript_offset": next_line_number,
424
+ "transcript_end_offset": next_line_number,
425
+ "timestamp": timestamp_value.replace("T", " ")[:19] if timestamp_value else None,
426
+ },
427
+ stop_event=stopper,
428
+ )
429
+ if not ok:
430
+ if transcript_buffer:
431
+ transcript_buffer.pop()
432
+ if transcript_start_line == next_line_number:
433
+ transcript_start_line = None
434
+ transcript_end_line = committed_line_number if transcript_start_line is not None else None
435
+ position = line_start
436
+ current_line_number = committed_line_number
437
+ break
438
+ if len(transcript_buffer) >= batch_max:
439
+ ok = _flush_buffer(
440
+ transcript_buffer,
441
+ source_label=source,
442
+ transcript_path=transcript_last_path,
443
+ timestamp=transcript_last_timestamp,
444
+ start_line=transcript_start_line,
445
+ end_line=transcript_end_line,
446
+ stop_event=stopper,
447
+ )
448
+ if not ok:
449
+ position = line_start
450
+ current_line_number = committed_line_number
451
+ break
452
+ transcript_start_line = None
453
+ transcript_end_line = None
454
+ last_transcript_flush = time.time()
343
455
  committed_position = handle.tell()
344
456
  committed_line_number = next_line_number
345
457
  position = committed_position
346
458
  current_line_number = committed_line_number
347
- continue
348
- transcript_buffer.append(text)
349
- transcript_last_path = current_file
350
- if transcript_start_line is None:
351
- transcript_start_line = next_line_number
352
- transcript_end_line = next_line_number
353
- timestamp_value = None
354
- if text and " " in text:
355
- timestamp_value = text.split(" ", 1)[0]
356
- transcript_last_timestamp = timestamp_value
357
- role, turn_text = _parse_transcript_line(text)
358
- if role and turn_text:
359
- ok = _post_turn(
360
- turn_endpoint,
361
- {
459
+ except Exception:
460
+ pass
461
+
462
+ # 2) Watch OpenClaw session jsonl (verbatim capture)
463
+ session_latest = _pick_latest(session_target, session_glob)
464
+ if session_latest is not None:
465
+ if session_file is None or session_latest != session_file:
466
+ session_file = session_latest
467
+ session_pos = 0
468
+ if start_at_end:
469
+ try:
470
+ session_pos = session_file.stat().st_size
471
+ except Exception:
472
+ session_pos = 0
473
+ try:
474
+ with session_file.open("r", encoding="utf-8", errors="ignore") as handle:
475
+ handle.seek(session_pos)
476
+ committed_session_pos = session_pos
477
+ while True:
478
+ if stopper.is_set():
479
+ break
480
+ line_start = handle.tell()
481
+ line = handle.readline()
482
+ if not line:
483
+ session_pos = committed_session_pos
484
+ break
485
+ try:
486
+ entry = json.loads(line)
487
+ except Exception:
488
+ committed_session_pos = handle.tell()
489
+ session_pos = committed_session_pos
490
+ continue
491
+ if entry.get("type") != "message":
492
+ committed_session_pos = handle.tell()
493
+ session_pos = committed_session_pos
494
+ continue
495
+ msg = entry.get("message") or {}
496
+ role = msg.get("role")
497
+ if role not in {"user", "assistant"}:
498
+ committed_session_pos = handle.tell()
499
+ session_pos = committed_session_pos
500
+ continue
501
+ content = msg.get("content")
502
+ text = _extract_message_text(content).strip()
503
+ conversation_info = _extract_conversation_info(text)
504
+ if role == "user":
505
+ text = _extract_user_text(text)
506
+ text = text.replace("\n", " ").strip()
507
+ if not text:
508
+ committed_session_pos = handle.tell()
509
+ session_pos = committed_session_pos
510
+ continue
511
+ timestamp = entry.get("timestamp") or time.strftime("%Y-%m-%dT%H:%M:%S")
512
+ if role == "user":
513
+ _maybe_reinforce(text, timestamp)
514
+ session_id = session_file.stem if session_file is not None else None
515
+ message_id = entry.get("id") or conversation_info.get("message_id")
516
+ conversation_id = conversation_info.get("conversation_id") or session_id
517
+ thread_id = conversation_info.get("thread_id") or session_id
518
+ transcript_line = f"{timestamp} [{role}] {text}"
519
+ retry_key = (str(session_file), line_start)
520
+ pending = pending_session_turns.get(retry_key)
521
+ if pending is None:
522
+ transcript_path, transcript_line_no = _append_transcript(transcript_target, timestamp, role, text)
523
+ turn_payload = {
362
524
  "role": role,
363
- "content": turn_text,
364
- "source": source,
365
- "transcript_path": str(current_file),
366
- "transcript_offset": next_line_number,
367
- "transcript_end_offset": next_line_number,
368
- "timestamp": timestamp_value.replace("T", " ")[:19] if timestamp_value else None,
369
- },
370
- )
371
- if not ok:
372
- if transcript_buffer:
373
- transcript_buffer.pop()
374
- if transcript_start_line == next_line_number:
375
- transcript_start_line = None
376
- transcript_end_line = committed_line_number if transcript_start_line is not None else None
377
- position = line_start
378
- current_line_number = committed_line_number
525
+ "content": text,
526
+ "conversation_id": conversation_id,
527
+ "session_id": session_id,
528
+ "thread_id": thread_id,
529
+ "message_id": message_id,
530
+ "source": "session",
531
+ "timestamp": timestamp.replace("T", " ")[:19],
532
+ "transcript_path": str(transcript_path),
533
+ "transcript_offset": transcript_line_no,
534
+ "transcript_end_offset": transcript_line_no,
535
+ "metadata": {
536
+ "parent_message_id": entry.get("parentId"),
537
+ },
538
+ }
539
+ pending_session_turns[retry_key] = {
540
+ "payload": dict(turn_payload),
541
+ "transcript_line": transcript_line,
542
+ "transcript_path": transcript_path,
543
+ "transcript_line_no": transcript_line_no,
544
+ }
545
+ else:
546
+ turn_payload = dict(pending["payload"])
547
+ transcript_line = str(pending["transcript_line"])
548
+ transcript_path = Path(str(pending["transcript_path"]))
549
+ transcript_line_no = int(pending["transcript_line_no"])
550
+ if stopper.is_set():
379
551
  break
380
- if len(transcript_buffer) >= batch_max:
381
- ok = _flush_buffer(
382
- transcript_buffer,
383
- source_label=source,
384
- transcript_path=transcript_last_path,
385
- timestamp=transcript_last_timestamp,
386
- start_line=transcript_start_line,
387
- end_line=transcript_end_line,
388
- )
389
- if not ok:
390
- position = line_start
391
- current_line_number = committed_line_number
552
+ if not _post_turn(turn_endpoint, turn_payload, stop_event=stopper):
553
+ session_pos = line_start
392
554
  break
393
- transcript_start_line = None
394
- transcript_end_line = None
395
- last_transcript_flush = time.time()
396
- committed_position = handle.tell()
397
- committed_line_number = next_line_number
398
- position = committed_position
399
- current_line_number = committed_line_number
400
- except Exception:
401
- pass
402
-
403
- # 2) Watch OpenClaw session jsonl (verbatim capture)
404
- session_latest = _pick_latest(session_target, session_glob)
405
- if session_latest is not None:
406
- if session_file is None or session_latest != session_file:
407
- session_file = session_latest
408
- session_pos = 0
409
- if start_at_end:
410
- try:
411
- session_pos = session_file.stat().st_size
412
- except Exception:
413
- session_pos = 0
414
- try:
415
- with session_file.open("r", encoding="utf-8", errors="ignore") as handle:
416
- handle.seek(session_pos)
417
- committed_session_pos = session_pos
418
- while True:
419
- line_start = handle.tell()
420
- line = handle.readline()
421
- if not line:
422
- session_pos = committed_session_pos
423
- break
424
- try:
425
- entry = json.loads(line)
426
- except Exception:
427
- committed_session_pos = handle.tell()
428
- session_pos = committed_session_pos
429
- continue
430
- if entry.get("type") != "message":
431
- committed_session_pos = handle.tell()
432
- session_pos = committed_session_pos
433
- continue
434
- msg = entry.get("message") or {}
435
- role = msg.get("role")
436
- if role not in {"user", "assistant"}:
437
- committed_session_pos = handle.tell()
438
- session_pos = committed_session_pos
439
- continue
440
- content = msg.get("content")
441
- text = _extract_message_text(content).strip()
442
- conversation_info = _extract_conversation_info(text)
443
- if role == "user":
444
- text = _extract_user_text(text)
445
- text = text.replace("\n", " ").strip()
446
- if not text:
555
+ pending_session_turns.pop(retry_key, None)
556
+ recent_session_transcript_lines.append((str(transcript_path), transcript_line_no))
557
+ session_buffer.append(transcript_line)
558
+ session_last_path = transcript_path
559
+ session_last_timestamp = timestamp
560
+ if session_start_line is None:
561
+ session_start_line = transcript_line_no
562
+ session_end_line = transcript_line_no
563
+ if len(session_buffer) >= batch_max:
564
+ ok = _flush_buffer(
565
+ session_buffer,
566
+ source_label="session",
567
+ transcript_path=session_last_path,
568
+ timestamp=session_last_timestamp,
569
+ start_line=session_start_line,
570
+ end_line=session_end_line,
571
+ stop_event=stopper,
572
+ )
573
+ if not ok:
574
+ session_pos = line_start
575
+ break
576
+ session_start_line = None
577
+ session_end_line = None
578
+ last_session_flush = time.time()
447
579
  committed_session_pos = handle.tell()
448
580
  session_pos = committed_session_pos
449
- continue
450
- timestamp = entry.get("timestamp") or time.strftime("%Y-%m-%dT%H:%M:%S")
451
- if role == "user":
452
- _maybe_reinforce(text, timestamp)
453
- session_id = session_file.stem if session_file is not None else None
454
- message_id = entry.get("id") or conversation_info.get("message_id")
455
- conversation_id = conversation_info.get("conversation_id") or session_id
456
- thread_id = conversation_info.get("thread_id") or session_id
457
- transcript_line = f"{timestamp} [{role}] {text}"
458
- retry_key = (str(session_file), line_start)
459
- pending = pending_session_turns.get(retry_key)
460
- if pending is None:
461
- transcript_path, transcript_line_no = _append_transcript(transcript_target, timestamp, role, text)
462
- turn_payload = {
463
- "role": role,
464
- "content": text,
465
- "conversation_id": conversation_id,
466
- "session_id": session_id,
467
- "thread_id": thread_id,
468
- "message_id": message_id,
469
- "source": "session",
470
- "timestamp": timestamp.replace("T", " ")[:19],
471
- "transcript_path": str(transcript_path),
472
- "transcript_offset": transcript_line_no,
473
- "transcript_end_offset": transcript_line_no,
474
- "metadata": {
475
- "parent_message_id": entry.get("parentId"),
476
- },
477
- }
478
- pending_session_turns[retry_key] = {
479
- "payload": dict(turn_payload),
480
- "transcript_line": transcript_line,
481
- "transcript_path": transcript_path,
482
- "transcript_line_no": transcript_line_no,
483
- }
484
- else:
485
- turn_payload = dict(pending["payload"])
486
- transcript_line = str(pending["transcript_line"])
487
- transcript_path = Path(str(pending["transcript_path"]))
488
- transcript_line_no = int(pending["transcript_line_no"])
489
- if not _post_turn(turn_endpoint, turn_payload):
490
- session_pos = line_start
491
- break
492
- pending_session_turns.pop(retry_key, None)
493
- recent_session_transcript_lines.append((str(transcript_path), transcript_line_no))
494
- session_buffer.append(transcript_line)
495
- session_last_path = transcript_path
496
- session_last_timestamp = timestamp
497
- if session_start_line is None:
498
- session_start_line = transcript_line_no
499
- session_end_line = transcript_line_no
500
- if len(session_buffer) >= batch_max:
501
- ok = _flush_buffer(
502
- session_buffer,
503
- source_label="session",
504
- transcript_path=session_last_path,
505
- timestamp=session_last_timestamp,
506
- start_line=session_start_line,
507
- end_line=session_end_line,
508
- )
509
- if not ok:
510
- session_pos = line_start
511
- break
512
- session_start_line = None
513
- session_end_line = None
514
- last_session_flush = time.time()
515
- committed_session_pos = handle.tell()
516
- session_pos = committed_session_pos
517
- except Exception:
518
- pass
581
+ except Exception:
582
+ pass
519
583
 
520
584
  now = time.time()
521
585
  if transcript_buffer and (now - last_transcript_flush) >= batch_seconds:
@@ -526,6 +590,7 @@ def watch_forever() -> None:
526
590
  timestamp=transcript_last_timestamp,
527
591
  start_line=transcript_start_line,
528
592
  end_line=transcript_end_line,
593
+ stop_event=stopper,
529
594
  )
530
595
  if ok:
531
596
  transcript_start_line = None
@@ -539,10 +604,23 @@ def watch_forever() -> None:
539
604
  timestamp=session_last_timestamp,
540
605
  start_line=session_start_line,
541
606
  end_line=session_end_line,
607
+ stop_event=stopper,
542
608
  )
543
609
  if ok:
544
610
  session_start_line = None
545
611
  session_end_line = None
546
612
  last_session_flush = now
547
613
 
548
- time.sleep(poll_seconds)
614
+ poll_started = time.perf_counter()
615
+ if stopper.wait(poll_seconds):
616
+ if _SHUTDOWN_TRACE:
617
+ print(
618
+ f"[ocmemog][watcher-poll] stop_wait timeout={poll_seconds:.3f}s elapsed={time.perf_counter()-poll_started:.3f}s",
619
+ file=sys.stderr,
620
+ )
621
+ return
622
+ finally:
623
+ _WATCHER_STOP_EVENT = None
624
+ if _SHUTDOWN_TRACE:
625
+ print("[ocmemog][watcher] shutdown loop exiting", file=sys.stderr)
626
+ # no return value