@simbimbo/memory-ocmemog 0.1.11 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/CHANGELOG.md +30 -0
  2. package/README.md +83 -18
  3. package/brain/runtime/__init__.py +2 -12
  4. package/brain/runtime/config.py +1 -24
  5. package/brain/runtime/inference.py +1 -151
  6. package/brain/runtime/instrumentation.py +1 -15
  7. package/brain/runtime/memory/__init__.py +3 -13
  8. package/brain/runtime/memory/api.py +1 -1219
  9. package/brain/runtime/memory/candidate.py +1 -185
  10. package/brain/runtime/memory/conversation_state.py +1 -1823
  11. package/brain/runtime/memory/distill.py +1 -344
  12. package/brain/runtime/memory/embedding_engine.py +1 -92
  13. package/brain/runtime/memory/freshness.py +1 -112
  14. package/brain/runtime/memory/health.py +1 -40
  15. package/brain/runtime/memory/integrity.py +1 -186
  16. package/brain/runtime/memory/memory_consolidation.py +1 -58
  17. package/brain/runtime/memory/memory_links.py +1 -107
  18. package/brain/runtime/memory/memory_salience.py +1 -233
  19. package/brain/runtime/memory/memory_synthesis.py +1 -31
  20. package/brain/runtime/memory/memory_taxonomy.py +1 -33
  21. package/brain/runtime/memory/pondering_engine.py +1 -654
  22. package/brain/runtime/memory/promote.py +1 -277
  23. package/brain/runtime/memory/provenance.py +1 -406
  24. package/brain/runtime/memory/reinforcement.py +1 -71
  25. package/brain/runtime/memory/retrieval.py +1 -210
  26. package/brain/runtime/memory/semantic_search.py +1 -64
  27. package/brain/runtime/memory/store.py +1 -429
  28. package/brain/runtime/memory/unresolved_state.py +1 -91
  29. package/brain/runtime/memory/vector_index.py +1 -323
  30. package/brain/runtime/model_roles.py +1 -9
  31. package/brain/runtime/model_router.py +1 -22
  32. package/brain/runtime/providers.py +1 -66
  33. package/brain/runtime/security/redaction.py +1 -12
  34. package/brain/runtime/state_store.py +1 -23
  35. package/brain/runtime/storage_paths.py +1 -39
  36. package/docs/architecture/memory.md +20 -24
  37. package/docs/release-checklist.md +19 -6
  38. package/docs/usage.md +33 -17
  39. package/index.ts +8 -1
  40. package/ocmemog/__init__.py +11 -0
  41. package/ocmemog/doctor.py +1255 -0
  42. package/ocmemog/runtime/__init__.py +18 -0
  43. package/ocmemog/runtime/_compat_bridge.py +28 -0
  44. package/ocmemog/runtime/config.py +34 -0
  45. package/ocmemog/runtime/identity.py +115 -0
  46. package/ocmemog/runtime/inference.py +163 -0
  47. package/ocmemog/runtime/instrumentation.py +20 -0
  48. package/ocmemog/runtime/memory/__init__.py +91 -0
  49. package/ocmemog/runtime/memory/api.py +1594 -0
  50. package/ocmemog/runtime/memory/candidate.py +192 -0
  51. package/ocmemog/runtime/memory/conversation_state.py +1831 -0
  52. package/ocmemog/runtime/memory/distill.py +282 -0
  53. package/ocmemog/runtime/memory/embedding_engine.py +151 -0
  54. package/ocmemog/runtime/memory/freshness.py +114 -0
  55. package/ocmemog/runtime/memory/health.py +93 -0
  56. package/ocmemog/runtime/memory/integrity.py +208 -0
  57. package/ocmemog/runtime/memory/memory_consolidation.py +60 -0
  58. package/ocmemog/runtime/memory/memory_links.py +109 -0
  59. package/ocmemog/runtime/memory/memory_salience.py +235 -0
  60. package/ocmemog/runtime/memory/memory_synthesis.py +33 -0
  61. package/ocmemog/runtime/memory/memory_taxonomy.py +35 -0
  62. package/ocmemog/runtime/memory/pondering_engine.py +681 -0
  63. package/ocmemog/runtime/memory/promote.py +279 -0
  64. package/ocmemog/runtime/memory/provenance.py +408 -0
  65. package/ocmemog/runtime/memory/reinforcement.py +73 -0
  66. package/ocmemog/runtime/memory/retrieval.py +224 -0
  67. package/ocmemog/runtime/memory/semantic_search.py +66 -0
  68. package/ocmemog/runtime/memory/store.py +433 -0
  69. package/ocmemog/runtime/memory/unresolved_state.py +93 -0
  70. package/ocmemog/runtime/memory/vector_index.py +411 -0
  71. package/ocmemog/runtime/model_roles.py +15 -0
  72. package/ocmemog/runtime/model_router.py +28 -0
  73. package/ocmemog/runtime/providers.py +78 -0
  74. package/ocmemog/runtime/roles.py +92 -0
  75. package/ocmemog/runtime/security/__init__.py +8 -0
  76. package/ocmemog/runtime/security/redaction.py +17 -0
  77. package/ocmemog/runtime/state_store.py +32 -0
  78. package/ocmemog/runtime/storage_paths.py +70 -0
  79. package/ocmemog/sidecar/app.py +421 -60
  80. package/ocmemog/sidecar/compat.py +50 -13
  81. package/ocmemog/sidecar/transcript_watcher.py +327 -242
  82. package/openclaw.plugin.json +4 -0
  83. package/package.json +1 -1
  84. package/scripts/ocmemog-backfill-vectors.py +5 -3
  85. package/scripts/ocmemog-continuity-benchmark.py +1 -1
  86. package/scripts/ocmemog-demo.py +1 -1
  87. package/scripts/ocmemog-doctor.py +15 -0
  88. package/scripts/ocmemog-install.sh +29 -7
  89. package/scripts/ocmemog-integrated-proof.py +374 -0
  90. package/scripts/ocmemog-reindex-vectors.py +5 -3
  91. package/scripts/ocmemog-release-check.sh +330 -0
  92. package/scripts/ocmemog-sidecar.sh +4 -2
  93. package/scripts/ocmemog-test-rig.py +5 -3
  94. package/brain/runtime/memory/artifacts.py +0 -33
  95. package/brain/runtime/memory/context_builder.py +0 -112
  96. package/brain/runtime/memory/interaction_memory.py +0 -57
  97. package/brain/runtime/memory/memory_gate.py +0 -38
  98. package/brain/runtime/memory/memory_graph.py +0 -54
  99. package/brain/runtime/memory/person_identity.py +0 -83
  100. package/brain/runtime/memory/person_memory.py +0 -138
  101. package/brain/runtime/memory/sentiment_memory.py +0 -67
  102. package/brain/runtime/memory/tool_catalog.py +0 -68
@@ -1,14 +1,16 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import json
4
+ import sys
4
5
  import os
5
6
  import time
6
7
  from collections import deque
8
+ import threading
7
9
  from pathlib import Path
8
10
  from typing import Optional
9
11
  from urllib import request as urlrequest
10
12
 
11
- from brain.runtime import state_store
13
+ from ocmemog.runtime import state_store
12
14
 
13
15
  DEFAULT_ENDPOINT = "http://127.0.0.1:17891/memory/ingest_async"
14
16
  DEFAULT_GLOB = "*.log"
@@ -34,6 +36,65 @@ DEFAULT_REINFORCE_NEGATIVE = [
34
36
  "frustrated",
35
37
  ]
36
38
  WATCHER_ERROR_LOG = state_store.reports_dir() / "ocmemog_transcript_watcher_errors.jsonl"
39
+ _SHUTDOWN_TRACE = os.environ.get("OCMEMOG_SHUTDOWN_TIMING", "true").lower() in {"1", "true", "yes", "on"}
40
+ _WATCHER_REQUEST_TIMEOUT_SECONDS = 10.0
41
+ _WATCHER_SHUTDOWN_REQUEST_TIMEOUT_SECONDS = 1.0
42
+ _WATCHER_STOP_EVENT: threading.Event | None = None
43
+
44
+ try:
45
+ _WATCHER_REQUEST_TIMEOUT_SECONDS = float(os.environ.get("OCMEMOG_INGEST_REQUEST_TIMEOUT_SECONDS", "10"))
46
+ except Exception:
47
+ pass
48
+ try:
49
+ _WATCHER_SHUTDOWN_REQUEST_TIMEOUT_SECONDS = float(
50
+ os.environ.get("OCMEMOG_SHUTDOWN_INGEST_REQUEST_TIMEOUT_SECONDS", "1")
51
+ )
52
+ except Exception:
53
+ pass
54
+
55
+
56
+ def _watcher_timeout(stop_event: threading.Event | None) -> float:
57
+ timeout = _WATCHER_REQUEST_TIMEOUT_SECONDS
58
+ if stop_event is not None and stop_event.is_set():
59
+ timeout = min(timeout, _WATCHER_SHUTDOWN_REQUEST_TIMEOUT_SECONDS)
60
+ return max(0.05, timeout)
61
+
62
+
63
+ def _post_json_payload(endpoint: str, payload: dict, *, stop_event: threading.Event | None, kind: str) -> bool:
64
+ data = json.dumps(payload).encode("utf-8")
65
+ req = urlrequest.Request(endpoint, data=data, method="POST")
66
+ req.add_header("Content-Type", "application/json")
67
+ _apply_auth_headers(req)
68
+ timeout = _watcher_timeout(stop_event)
69
+ start = time.perf_counter()
70
+ status = "ok"
71
+ try:
72
+ with urlrequest.urlopen(req, timeout=timeout) as resp:
73
+ resp.read()
74
+ return True
75
+ except Exception as exc:
76
+ status = f"error={type(exc).__name__}"
77
+ _log_watcher_error(kind, endpoint, payload, exc)
78
+ if _SHUTDOWN_TRACE:
79
+ print(
80
+ f"[ocmemog][watcher-request] {kind} failed timeout={timeout:.3f}s elapsed={time.perf_counter()-start:.3f}s",
81
+ file=sys.stderr,
82
+ )
83
+ return False
84
+ finally:
85
+ if _SHUTDOWN_TRACE:
86
+ elapsed = time.perf_counter() - start
87
+ if stop_event is None or not stop_event.is_set():
88
+ if elapsed >= timeout * 0.95:
89
+ print(
90
+ f"[ocmemog][watcher-request] {kind} timeout={timeout:.3f}s elapsed={elapsed:.3f}s status={status}",
91
+ file=sys.stderr,
92
+ )
93
+ else:
94
+ print(
95
+ f"[ocmemog][watcher-request] {kind} timeout={timeout:.3f}s elapsed={elapsed:.3f}s status={status}",
96
+ file=sys.stderr,
97
+ )
37
98
 
38
99
 
39
100
  def _log_watcher_error(kind: str, endpoint: str, payload: dict, exc: Exception) -> None:
@@ -57,8 +118,15 @@ def _pick_latest(path: Path, pattern: str) -> Optional[Path]:
57
118
  return path
58
119
  if not path.exists():
59
120
  return None
60
- files = sorted(path.glob(pattern), key=lambda p: p.stat().st_mtime)
61
- return files[-1] if files else None
121
+ files = []
122
+ for candidate in path.glob(pattern):
123
+ try:
124
+ mtime = candidate.stat().st_mtime
125
+ except FileNotFoundError:
126
+ continue
127
+ files.append((mtime, candidate))
128
+ files.sort(key=lambda item: item[0])
129
+ return files[-1][1] if files else None
62
130
 
63
131
 
64
132
  def _apply_auth_headers(req: urlrequest.Request) -> None:
@@ -67,36 +135,16 @@ def _apply_auth_headers(req: urlrequest.Request) -> None:
67
135
  req.add_header("x-ocmemog-token", token)
68
136
 
69
137
 
70
- def _post_ingest(endpoint: str, payload: dict) -> bool:
71
- data = json.dumps(payload).encode("utf-8")
72
- req = urlrequest.Request(endpoint, data=data, method="POST")
73
- req.add_header("Content-Type", "application/json")
74
- _apply_auth_headers(req)
75
- try:
76
- with urlrequest.urlopen(req, timeout=10) as resp:
77
- resp.read()
78
- return True
79
- except Exception as exc:
80
- _log_watcher_error("ingest", endpoint, payload, exc)
81
- return False
138
+ def _post_ingest(endpoint: str, payload: dict, *, stop_event: threading.Event | None = None) -> bool:
139
+ return _post_json_payload(endpoint, payload, stop_event=stop_event, kind="ingest")
82
140
 
83
141
 
84
- def _post_json(endpoint: str, payload: dict) -> bool:
85
- data = json.dumps(payload).encode("utf-8")
86
- req = urlrequest.Request(endpoint, data=data, method="POST")
87
- req.add_header("Content-Type", "application/json")
88
- _apply_auth_headers(req)
89
- try:
90
- with urlrequest.urlopen(req, timeout=10) as resp:
91
- resp.read()
92
- return True
93
- except Exception as exc:
94
- _log_watcher_error("json", endpoint, payload, exc)
95
- return False
142
+ def _post_json(endpoint: str, payload: dict, *, stop_event: threading.Event | None = None) -> bool:
143
+ return _post_json_payload(endpoint, payload, stop_event=stop_event, kind="json")
96
144
 
97
145
 
98
- def _post_turn(endpoint: str, payload: dict) -> bool:
99
- return _post_json(endpoint, payload)
146
+ def _post_turn(endpoint: str, payload: dict, *, stop_event: threading.Event | None = None) -> bool:
147
+ return _post_json(endpoint, payload, stop_event=stop_event)
100
148
 
101
149
 
102
150
  def _extract_user_text(text: str) -> str:
@@ -183,7 +231,8 @@ def _append_transcript(transcript_target: Path, timestamp: str, role: str, text:
183
231
  return path, line_no
184
232
 
185
233
 
186
- def watch_forever() -> None:
234
+ def watch_forever(stop_event: Optional[threading.Event] = None) -> None:
235
+ global _WATCHER_STOP_EVENT
187
236
  transcript_path = os.environ.get("OCMEMOG_TRANSCRIPT_PATH", "").strip()
188
237
  transcript_dir = os.environ.get("OCMEMOG_TRANSCRIPT_DIR", "").strip()
189
238
  glob_pattern = os.environ.get("OCMEMOG_TRANSCRIPT_GLOB", DEFAULT_GLOB)
@@ -240,6 +289,13 @@ def watch_forever() -> None:
240
289
  pending_session_turns: dict[tuple[str, int], dict[str, object]] = {}
241
290
  last_transcript_flush = time.time()
242
291
  last_session_flush = time.time()
292
+ stopper: threading.Event
293
+ if isinstance(stop_event, threading.Event):
294
+ stopper = stop_event
295
+ else:
296
+ stopper = threading.Event()
297
+ stopper.clear()
298
+ _WATCHER_STOP_EVENT = stopper
243
299
 
244
300
  def _flush_buffer(
245
301
  buffer: list[str],
@@ -249,9 +305,12 @@ def watch_forever() -> None:
249
305
  timestamp: Optional[str],
250
306
  start_line: Optional[int],
251
307
  end_line: Optional[int],
308
+ stop_event: threading.Event,
252
309
  ) -> bool:
253
310
  if not buffer:
254
311
  return True
312
+ if stop_event.is_set():
313
+ return False
255
314
  payload = {
256
315
  "content": "\n".join(buffer),
257
316
  "kind": kind,
@@ -266,7 +325,7 @@ def watch_forever() -> None:
266
325
  payload["transcript_end_offset"] = end_line
267
326
  if timestamp:
268
327
  payload["timestamp"] = timestamp.replace("T", " ")[:19]
269
- ok = _post_ingest(endpoint, payload)
328
+ ok = _post_ingest(endpoint, payload, stop_event=stop_event)
270
329
  if ok:
271
330
  buffer.clear()
272
331
  return ok
@@ -286,7 +345,7 @@ def watch_forever() -> None:
286
345
  "source_module": "sentiment",
287
346
  "note": text,
288
347
  }
289
- _post_json(reinforce_endpoint, payload)
348
+ _post_json(reinforce_endpoint, payload, stop_event=stopper)
290
349
  elif any(term in lowered for term in negative_terms):
291
350
  payload = {
292
351
  "task_id": f"feedback:{timestamp}",
@@ -298,224 +357,236 @@ def watch_forever() -> None:
298
357
  "source_module": "sentiment",
299
358
  "note": text,
300
359
  }
301
- _post_json(reinforce_endpoint, payload)
302
-
303
- while True:
304
- # 1) Watch transcript logs (if any)
305
- latest = _pick_latest(transcript_target, glob_pattern)
306
- if latest is not None:
307
- if current_file is None or latest != current_file:
308
- current_file = latest
309
- position = 0
310
- current_line_number = 0
311
- if start_at_end:
312
- try:
313
- position = current_file.stat().st_size
314
- except Exception:
315
- position = 0
316
- try:
317
- current_line_number = _count_lines(current_file)
318
- except Exception:
319
- current_line_number = 0
320
-
321
- try:
322
- with current_file.open("r", encoding="utf-8", errors="ignore") as handle:
323
- handle.seek(position)
324
- committed_position = position
325
- committed_line_number = current_line_number
326
- while True:
327
- line_start = handle.tell()
328
- line = handle.readline()
329
- if not line:
330
- position = committed_position
331
- current_line_number = committed_line_number
332
- break
333
- text = line.rstrip("\n")
334
- next_line_number = committed_line_number + 1
335
- if not text.strip():
336
- committed_position = handle.tell()
337
- committed_line_number = next_line_number
338
- position = committed_position
339
- current_line_number = committed_line_number
340
- continue
341
- current_marker = (str(current_file), next_line_number)
342
- if current_marker in recent_session_transcript_lines:
360
+ _post_json(reinforce_endpoint, payload, stop_event=stopper)
361
+
362
+ try:
363
+ while not stopper.is_set():
364
+ # 1) Watch transcript logs (if any)
365
+ latest = _pick_latest(transcript_target, glob_pattern)
366
+ if latest is not None:
367
+ if current_file is None or latest != current_file:
368
+ current_file = latest
369
+ position = 0
370
+ current_line_number = 0
371
+ if start_at_end:
372
+ try:
373
+ position = current_file.stat().st_size
374
+ except Exception:
375
+ position = 0
376
+ try:
377
+ current_line_number = _count_lines(current_file)
378
+ except Exception:
379
+ current_line_number = 0
380
+
381
+ try:
382
+ with current_file.open("r", encoding="utf-8", errors="ignore") as handle:
383
+ handle.seek(position)
384
+ committed_position = position
385
+ committed_line_number = current_line_number
386
+ while True:
387
+ if stopper.is_set():
388
+ break
389
+ line_start = handle.tell()
390
+ line = handle.readline()
391
+ if not line:
392
+ position = committed_position
393
+ current_line_number = committed_line_number
394
+ break
395
+ text = line.rstrip("\n")
396
+ next_line_number = committed_line_number + 1
397
+ if not text.strip():
398
+ committed_position = handle.tell()
399
+ committed_line_number = next_line_number
400
+ position = committed_position
401
+ current_line_number = committed_line_number
402
+ continue
403
+ current_marker = (str(current_file), next_line_number)
404
+ if current_marker in recent_session_transcript_lines:
405
+ committed_position = handle.tell()
406
+ committed_line_number = next_line_number
407
+ position = committed_position
408
+ current_line_number = committed_line_number
409
+ continue
410
+ transcript_buffer.append(text)
411
+ transcript_last_path = current_file
412
+ if transcript_start_line is None:
413
+ transcript_start_line = next_line_number
414
+ transcript_end_line = next_line_number
415
+ timestamp_value = None
416
+ if text and " " in text:
417
+ timestamp_value = text.split(" ", 1)[0]
418
+ transcript_last_timestamp = timestamp_value
419
+ role, turn_text = _parse_transcript_line(text)
420
+ if role and turn_text:
421
+ if stopper.is_set():
422
+ break
423
+ ok = _post_turn(
424
+ turn_endpoint,
425
+ {
426
+ "role": role,
427
+ "content": turn_text,
428
+ "source": source,
429
+ "transcript_path": str(current_file),
430
+ "transcript_offset": next_line_number,
431
+ "transcript_end_offset": next_line_number,
432
+ "timestamp": timestamp_value.replace("T", " ")[:19] if timestamp_value else None,
433
+ },
434
+ stop_event=stopper,
435
+ )
436
+ if not ok:
437
+ if transcript_buffer:
438
+ transcript_buffer.pop()
439
+ if transcript_start_line == next_line_number:
440
+ transcript_start_line = None
441
+ transcript_end_line = committed_line_number if transcript_start_line is not None else None
442
+ position = line_start
443
+ current_line_number = committed_line_number
444
+ break
445
+ if len(transcript_buffer) >= batch_max:
446
+ ok = _flush_buffer(
447
+ transcript_buffer,
448
+ source_label=source,
449
+ transcript_path=transcript_last_path,
450
+ timestamp=transcript_last_timestamp,
451
+ start_line=transcript_start_line,
452
+ end_line=transcript_end_line,
453
+ stop_event=stopper,
454
+ )
455
+ if not ok:
456
+ position = line_start
457
+ current_line_number = committed_line_number
458
+ break
459
+ transcript_start_line = None
460
+ transcript_end_line = None
461
+ last_transcript_flush = time.time()
343
462
  committed_position = handle.tell()
344
463
  committed_line_number = next_line_number
345
464
  position = committed_position
346
465
  current_line_number = committed_line_number
347
- continue
348
- transcript_buffer.append(text)
349
- transcript_last_path = current_file
350
- if transcript_start_line is None:
351
- transcript_start_line = next_line_number
352
- transcript_end_line = next_line_number
353
- timestamp_value = None
354
- if text and " " in text:
355
- timestamp_value = text.split(" ", 1)[0]
356
- transcript_last_timestamp = timestamp_value
357
- role, turn_text = _parse_transcript_line(text)
358
- if role and turn_text:
359
- ok = _post_turn(
360
- turn_endpoint,
361
- {
466
+ except Exception:
467
+ pass
468
+
469
+ # 2) Watch OpenClaw session jsonl (verbatim capture)
470
+ session_latest = _pick_latest(session_target, session_glob)
471
+ if session_latest is not None:
472
+ if session_file is None or session_latest != session_file:
473
+ session_file = session_latest
474
+ session_pos = 0
475
+ if start_at_end:
476
+ try:
477
+ session_pos = session_file.stat().st_size
478
+ except Exception:
479
+ session_pos = 0
480
+ try:
481
+ with session_file.open("r", encoding="utf-8", errors="ignore") as handle:
482
+ handle.seek(session_pos)
483
+ committed_session_pos = session_pos
484
+ while True:
485
+ if stopper.is_set():
486
+ break
487
+ line_start = handle.tell()
488
+ line = handle.readline()
489
+ if not line:
490
+ session_pos = committed_session_pos
491
+ break
492
+ try:
493
+ entry = json.loads(line)
494
+ except Exception:
495
+ committed_session_pos = handle.tell()
496
+ session_pos = committed_session_pos
497
+ continue
498
+ if entry.get("type") != "message":
499
+ committed_session_pos = handle.tell()
500
+ session_pos = committed_session_pos
501
+ continue
502
+ msg = entry.get("message") or {}
503
+ role = msg.get("role")
504
+ if role not in {"user", "assistant"}:
505
+ committed_session_pos = handle.tell()
506
+ session_pos = committed_session_pos
507
+ continue
508
+ content = msg.get("content")
509
+ text = _extract_message_text(content).strip()
510
+ conversation_info = _extract_conversation_info(text)
511
+ if role == "user":
512
+ text = _extract_user_text(text)
513
+ text = text.replace("\n", " ").strip()
514
+ if not text:
515
+ committed_session_pos = handle.tell()
516
+ session_pos = committed_session_pos
517
+ continue
518
+ timestamp = entry.get("timestamp") or time.strftime("%Y-%m-%dT%H:%M:%S")
519
+ if role == "user":
520
+ _maybe_reinforce(text, timestamp)
521
+ session_id = session_file.stem if session_file is not None else None
522
+ message_id = entry.get("id") or conversation_info.get("message_id")
523
+ conversation_id = conversation_info.get("conversation_id") or session_id
524
+ thread_id = conversation_info.get("thread_id") or session_id
525
+ transcript_line = f"{timestamp} [{role}] {text}"
526
+ retry_key = (str(session_file), line_start)
527
+ pending = pending_session_turns.get(retry_key)
528
+ if pending is None:
529
+ transcript_path, transcript_line_no = _append_transcript(transcript_target, timestamp, role, text)
530
+ turn_payload = {
362
531
  "role": role,
363
- "content": turn_text,
364
- "source": source,
365
- "transcript_path": str(current_file),
366
- "transcript_offset": next_line_number,
367
- "transcript_end_offset": next_line_number,
368
- "timestamp": timestamp_value.replace("T", " ")[:19] if timestamp_value else None,
369
- },
370
- )
371
- if not ok:
372
- if transcript_buffer:
373
- transcript_buffer.pop()
374
- if transcript_start_line == next_line_number:
375
- transcript_start_line = None
376
- transcript_end_line = committed_line_number if transcript_start_line is not None else None
377
- position = line_start
378
- current_line_number = committed_line_number
532
+ "content": text,
533
+ "conversation_id": conversation_id,
534
+ "session_id": session_id,
535
+ "thread_id": thread_id,
536
+ "message_id": message_id,
537
+ "source": "session",
538
+ "timestamp": timestamp.replace("T", " ")[:19],
539
+ "transcript_path": str(transcript_path),
540
+ "transcript_offset": transcript_line_no,
541
+ "transcript_end_offset": transcript_line_no,
542
+ "metadata": {
543
+ "parent_message_id": entry.get("parentId"),
544
+ },
545
+ }
546
+ pending_session_turns[retry_key] = {
547
+ "payload": dict(turn_payload),
548
+ "transcript_line": transcript_line,
549
+ "transcript_path": transcript_path,
550
+ "transcript_line_no": transcript_line_no,
551
+ }
552
+ else:
553
+ turn_payload = dict(pending["payload"])
554
+ transcript_line = str(pending["transcript_line"])
555
+ transcript_path = Path(str(pending["transcript_path"]))
556
+ transcript_line_no = int(pending["transcript_line_no"])
557
+ if stopper.is_set():
379
558
  break
380
- if len(transcript_buffer) >= batch_max:
381
- ok = _flush_buffer(
382
- transcript_buffer,
383
- source_label=source,
384
- transcript_path=transcript_last_path,
385
- timestamp=transcript_last_timestamp,
386
- start_line=transcript_start_line,
387
- end_line=transcript_end_line,
388
- )
389
- if not ok:
390
- position = line_start
391
- current_line_number = committed_line_number
559
+ if not _post_turn(turn_endpoint, turn_payload, stop_event=stopper):
560
+ session_pos = line_start
392
561
  break
393
- transcript_start_line = None
394
- transcript_end_line = None
395
- last_transcript_flush = time.time()
396
- committed_position = handle.tell()
397
- committed_line_number = next_line_number
398
- position = committed_position
399
- current_line_number = committed_line_number
400
- except Exception:
401
- pass
402
-
403
- # 2) Watch OpenClaw session jsonl (verbatim capture)
404
- session_latest = _pick_latest(session_target, session_glob)
405
- if session_latest is not None:
406
- if session_file is None or session_latest != session_file:
407
- session_file = session_latest
408
- session_pos = 0
409
- if start_at_end:
410
- try:
411
- session_pos = session_file.stat().st_size
412
- except Exception:
413
- session_pos = 0
414
- try:
415
- with session_file.open("r", encoding="utf-8", errors="ignore") as handle:
416
- handle.seek(session_pos)
417
- committed_session_pos = session_pos
418
- while True:
419
- line_start = handle.tell()
420
- line = handle.readline()
421
- if not line:
422
- session_pos = committed_session_pos
423
- break
424
- try:
425
- entry = json.loads(line)
426
- except Exception:
427
- committed_session_pos = handle.tell()
428
- session_pos = committed_session_pos
429
- continue
430
- if entry.get("type") != "message":
431
- committed_session_pos = handle.tell()
432
- session_pos = committed_session_pos
433
- continue
434
- msg = entry.get("message") or {}
435
- role = msg.get("role")
436
- if role not in {"user", "assistant"}:
437
- committed_session_pos = handle.tell()
438
- session_pos = committed_session_pos
439
- continue
440
- content = msg.get("content")
441
- text = _extract_message_text(content).strip()
442
- conversation_info = _extract_conversation_info(text)
443
- if role == "user":
444
- text = _extract_user_text(text)
445
- text = text.replace("\n", " ").strip()
446
- if not text:
562
+ pending_session_turns.pop(retry_key, None)
563
+ recent_session_transcript_lines.append((str(transcript_path), transcript_line_no))
564
+ session_buffer.append(transcript_line)
565
+ session_last_path = transcript_path
566
+ session_last_timestamp = timestamp
567
+ if session_start_line is None:
568
+ session_start_line = transcript_line_no
569
+ session_end_line = transcript_line_no
570
+ if len(session_buffer) >= batch_max:
571
+ ok = _flush_buffer(
572
+ session_buffer,
573
+ source_label="session",
574
+ transcript_path=session_last_path,
575
+ timestamp=session_last_timestamp,
576
+ start_line=session_start_line,
577
+ end_line=session_end_line,
578
+ stop_event=stopper,
579
+ )
580
+ if not ok:
581
+ session_pos = line_start
582
+ break
583
+ session_start_line = None
584
+ session_end_line = None
585
+ last_session_flush = time.time()
447
586
  committed_session_pos = handle.tell()
448
587
  session_pos = committed_session_pos
449
- continue
450
- timestamp = entry.get("timestamp") or time.strftime("%Y-%m-%dT%H:%M:%S")
451
- if role == "user":
452
- _maybe_reinforce(text, timestamp)
453
- session_id = session_file.stem if session_file is not None else None
454
- message_id = entry.get("id") or conversation_info.get("message_id")
455
- conversation_id = conversation_info.get("conversation_id") or session_id
456
- thread_id = conversation_info.get("thread_id") or session_id
457
- transcript_line = f"{timestamp} [{role}] {text}"
458
- retry_key = (str(session_file), line_start)
459
- pending = pending_session_turns.get(retry_key)
460
- if pending is None:
461
- transcript_path, transcript_line_no = _append_transcript(transcript_target, timestamp, role, text)
462
- turn_payload = {
463
- "role": role,
464
- "content": text,
465
- "conversation_id": conversation_id,
466
- "session_id": session_id,
467
- "thread_id": thread_id,
468
- "message_id": message_id,
469
- "source": "session",
470
- "timestamp": timestamp.replace("T", " ")[:19],
471
- "transcript_path": str(transcript_path),
472
- "transcript_offset": transcript_line_no,
473
- "transcript_end_offset": transcript_line_no,
474
- "metadata": {
475
- "parent_message_id": entry.get("parentId"),
476
- },
477
- }
478
- pending_session_turns[retry_key] = {
479
- "payload": dict(turn_payload),
480
- "transcript_line": transcript_line,
481
- "transcript_path": transcript_path,
482
- "transcript_line_no": transcript_line_no,
483
- }
484
- else:
485
- turn_payload = dict(pending["payload"])
486
- transcript_line = str(pending["transcript_line"])
487
- transcript_path = Path(str(pending["transcript_path"]))
488
- transcript_line_no = int(pending["transcript_line_no"])
489
- if not _post_turn(turn_endpoint, turn_payload):
490
- session_pos = line_start
491
- break
492
- pending_session_turns.pop(retry_key, None)
493
- recent_session_transcript_lines.append((str(transcript_path), transcript_line_no))
494
- session_buffer.append(transcript_line)
495
- session_last_path = transcript_path
496
- session_last_timestamp = timestamp
497
- if session_start_line is None:
498
- session_start_line = transcript_line_no
499
- session_end_line = transcript_line_no
500
- if len(session_buffer) >= batch_max:
501
- ok = _flush_buffer(
502
- session_buffer,
503
- source_label="session",
504
- transcript_path=session_last_path,
505
- timestamp=session_last_timestamp,
506
- start_line=session_start_line,
507
- end_line=session_end_line,
508
- )
509
- if not ok:
510
- session_pos = line_start
511
- break
512
- session_start_line = None
513
- session_end_line = None
514
- last_session_flush = time.time()
515
- committed_session_pos = handle.tell()
516
- session_pos = committed_session_pos
517
- except Exception:
518
- pass
588
+ except Exception:
589
+ pass
519
590
 
520
591
  now = time.time()
521
592
  if transcript_buffer and (now - last_transcript_flush) >= batch_seconds:
@@ -526,6 +597,7 @@ def watch_forever() -> None:
526
597
  timestamp=transcript_last_timestamp,
527
598
  start_line=transcript_start_line,
528
599
  end_line=transcript_end_line,
600
+ stop_event=stopper,
529
601
  )
530
602
  if ok:
531
603
  transcript_start_line = None
@@ -539,10 +611,23 @@ def watch_forever() -> None:
539
611
  timestamp=session_last_timestamp,
540
612
  start_line=session_start_line,
541
613
  end_line=session_end_line,
614
+ stop_event=stopper,
542
615
  )
543
616
  if ok:
544
617
  session_start_line = None
545
618
  session_end_line = None
546
619
  last_session_flush = now
547
620
 
548
- time.sleep(poll_seconds)
621
+ poll_started = time.perf_counter()
622
+ if stopper.wait(poll_seconds):
623
+ if _SHUTDOWN_TRACE:
624
+ print(
625
+ f"[ocmemog][watcher-poll] stop_wait timeout={poll_seconds:.3f}s elapsed={time.perf_counter()-poll_started:.3f}s",
626
+ file=sys.stderr,
627
+ )
628
+ return
629
+ finally:
630
+ _WATCHER_STOP_EVENT = None
631
+ if _SHUTDOWN_TRACE:
632
+ print("[ocmemog][watcher] shutdown loop exiting", file=sys.stderr)
633
+ # no return value