@simbimbo/memory-ocmemog 0.1.11 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/README.md +83 -18
- package/brain/runtime/__init__.py +2 -12
- package/brain/runtime/config.py +1 -24
- package/brain/runtime/inference.py +1 -151
- package/brain/runtime/instrumentation.py +1 -15
- package/brain/runtime/memory/__init__.py +3 -13
- package/brain/runtime/memory/api.py +1 -1219
- package/brain/runtime/memory/candidate.py +1 -185
- package/brain/runtime/memory/conversation_state.py +1 -1823
- package/brain/runtime/memory/distill.py +1 -344
- package/brain/runtime/memory/embedding_engine.py +1 -92
- package/brain/runtime/memory/freshness.py +1 -112
- package/brain/runtime/memory/health.py +1 -40
- package/brain/runtime/memory/integrity.py +1 -186
- package/brain/runtime/memory/memory_consolidation.py +1 -58
- package/brain/runtime/memory/memory_links.py +1 -107
- package/brain/runtime/memory/memory_salience.py +1 -233
- package/brain/runtime/memory/memory_synthesis.py +1 -31
- package/brain/runtime/memory/memory_taxonomy.py +1 -33
- package/brain/runtime/memory/pondering_engine.py +1 -654
- package/brain/runtime/memory/promote.py +1 -277
- package/brain/runtime/memory/provenance.py +1 -406
- package/brain/runtime/memory/reinforcement.py +1 -71
- package/brain/runtime/memory/retrieval.py +1 -210
- package/brain/runtime/memory/semantic_search.py +1 -64
- package/brain/runtime/memory/store.py +1 -429
- package/brain/runtime/memory/unresolved_state.py +1 -91
- package/brain/runtime/memory/vector_index.py +1 -323
- package/brain/runtime/model_roles.py +1 -9
- package/brain/runtime/model_router.py +1 -22
- package/brain/runtime/providers.py +1 -66
- package/brain/runtime/security/redaction.py +1 -12
- package/brain/runtime/state_store.py +1 -23
- package/brain/runtime/storage_paths.py +1 -39
- package/docs/architecture/memory.md +20 -24
- package/docs/release-checklist.md +19 -6
- package/docs/usage.md +33 -17
- package/index.ts +8 -1
- package/ocmemog/__init__.py +11 -0
- package/ocmemog/doctor.py +1255 -0
- package/ocmemog/runtime/__init__.py +18 -0
- package/ocmemog/runtime/_compat_bridge.py +28 -0
- package/ocmemog/runtime/config.py +35 -0
- package/ocmemog/runtime/identity.py +115 -0
- package/ocmemog/runtime/inference.py +164 -0
- package/ocmemog/runtime/instrumentation.py +20 -0
- package/ocmemog/runtime/memory/__init__.py +91 -0
- package/ocmemog/runtime/memory/api.py +1431 -0
- package/ocmemog/runtime/memory/candidate.py +192 -0
- package/ocmemog/runtime/memory/conversation_state.py +1831 -0
- package/ocmemog/runtime/memory/distill.py +282 -0
- package/ocmemog/runtime/memory/embedding_engine.py +151 -0
- package/ocmemog/runtime/memory/freshness.py +114 -0
- package/ocmemog/runtime/memory/health.py +57 -0
- package/ocmemog/runtime/memory/integrity.py +208 -0
- package/ocmemog/runtime/memory/memory_consolidation.py +60 -0
- package/ocmemog/runtime/memory/memory_links.py +109 -0
- package/ocmemog/runtime/memory/memory_salience.py +235 -0
- package/ocmemog/runtime/memory/memory_synthesis.py +33 -0
- package/ocmemog/runtime/memory/memory_taxonomy.py +35 -0
- package/ocmemog/runtime/memory/pondering_engine.py +681 -0
- package/ocmemog/runtime/memory/promote.py +279 -0
- package/ocmemog/runtime/memory/provenance.py +408 -0
- package/ocmemog/runtime/memory/reinforcement.py +73 -0
- package/ocmemog/runtime/memory/retrieval.py +224 -0
- package/ocmemog/runtime/memory/semantic_search.py +66 -0
- package/ocmemog/runtime/memory/store.py +433 -0
- package/ocmemog/runtime/memory/unresolved_state.py +93 -0
- package/ocmemog/runtime/memory/vector_index.py +411 -0
- package/ocmemog/runtime/model_roles.py +16 -0
- package/ocmemog/runtime/model_router.py +29 -0
- package/ocmemog/runtime/providers.py +79 -0
- package/ocmemog/runtime/roles.py +92 -0
- package/ocmemog/runtime/security/__init__.py +8 -0
- package/ocmemog/runtime/security/redaction.py +17 -0
- package/ocmemog/runtime/state_store.py +34 -0
- package/ocmemog/runtime/storage_paths.py +70 -0
- package/ocmemog/sidecar/app.py +310 -23
- package/ocmemog/sidecar/compat.py +50 -13
- package/ocmemog/sidecar/transcript_watcher.py +318 -240
- package/openclaw.plugin.json +4 -0
- package/package.json +1 -1
- package/scripts/ocmemog-backfill-vectors.py +5 -3
- package/scripts/ocmemog-continuity-benchmark.py +1 -1
- package/scripts/ocmemog-demo.py +1 -1
- package/scripts/ocmemog-doctor.py +15 -0
- package/scripts/ocmemog-install.sh +29 -7
- package/scripts/ocmemog-integrated-proof.py +373 -0
- package/scripts/ocmemog-reindex-vectors.py +5 -3
- package/scripts/ocmemog-release-check.sh +330 -0
- package/scripts/ocmemog-sidecar.sh +4 -2
- package/scripts/ocmemog-test-rig.py +5 -3
- package/brain/runtime/memory/artifacts.py +0 -33
- package/brain/runtime/memory/context_builder.py +0 -112
- package/brain/runtime/memory/interaction_memory.py +0 -57
- package/brain/runtime/memory/memory_gate.py +0 -38
- package/brain/runtime/memory/memory_graph.py +0 -54
- package/brain/runtime/memory/person_identity.py +0 -83
- package/brain/runtime/memory/person_memory.py +0 -138
- package/brain/runtime/memory/sentiment_memory.py +0 -67
- package/brain/runtime/memory/tool_catalog.py +0 -68
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
|
+
import sys
|
|
4
5
|
import os
|
|
5
6
|
import time
|
|
6
7
|
from collections import deque
|
|
8
|
+
import threading
|
|
7
9
|
from pathlib import Path
|
|
8
10
|
from typing import Optional
|
|
9
11
|
from urllib import request as urlrequest
|
|
10
12
|
|
|
11
|
-
from
|
|
13
|
+
from ocmemog.runtime import state_store
|
|
12
14
|
|
|
13
15
|
DEFAULT_ENDPOINT = "http://127.0.0.1:17891/memory/ingest_async"
|
|
14
16
|
DEFAULT_GLOB = "*.log"
|
|
@@ -34,6 +36,65 @@ DEFAULT_REINFORCE_NEGATIVE = [
|
|
|
34
36
|
"frustrated",
|
|
35
37
|
]
|
|
36
38
|
WATCHER_ERROR_LOG = state_store.reports_dir() / "ocmemog_transcript_watcher_errors.jsonl"
|
|
39
|
+
_SHUTDOWN_TRACE = os.environ.get("OCMEMOG_SHUTDOWN_TIMING", "true").lower() in {"1", "true", "yes", "on"}
|
|
40
|
+
_WATCHER_REQUEST_TIMEOUT_SECONDS = 10.0
|
|
41
|
+
_WATCHER_SHUTDOWN_REQUEST_TIMEOUT_SECONDS = 1.0
|
|
42
|
+
_WATCHER_STOP_EVENT: threading.Event | None = None
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
_WATCHER_REQUEST_TIMEOUT_SECONDS = float(os.environ.get("OCMEMOG_INGEST_REQUEST_TIMEOUT_SECONDS", "10"))
|
|
46
|
+
except Exception:
|
|
47
|
+
pass
|
|
48
|
+
try:
|
|
49
|
+
_WATCHER_SHUTDOWN_REQUEST_TIMEOUT_SECONDS = float(
|
|
50
|
+
os.environ.get("OCMEMOG_SHUTDOWN_INGEST_REQUEST_TIMEOUT_SECONDS", "1")
|
|
51
|
+
)
|
|
52
|
+
except Exception:
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _watcher_timeout(stop_event: threading.Event | None) -> float:
|
|
57
|
+
timeout = _WATCHER_REQUEST_TIMEOUT_SECONDS
|
|
58
|
+
if stop_event is not None and stop_event.is_set():
|
|
59
|
+
timeout = min(timeout, _WATCHER_SHUTDOWN_REQUEST_TIMEOUT_SECONDS)
|
|
60
|
+
return max(0.05, timeout)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _post_json_payload(endpoint: str, payload: dict, *, stop_event: threading.Event | None, kind: str) -> bool:
|
|
64
|
+
data = json.dumps(payload).encode("utf-8")
|
|
65
|
+
req = urlrequest.Request(endpoint, data=data, method="POST")
|
|
66
|
+
req.add_header("Content-Type", "application/json")
|
|
67
|
+
_apply_auth_headers(req)
|
|
68
|
+
timeout = _watcher_timeout(stop_event)
|
|
69
|
+
start = time.perf_counter()
|
|
70
|
+
status = "ok"
|
|
71
|
+
try:
|
|
72
|
+
with urlrequest.urlopen(req, timeout=timeout) as resp:
|
|
73
|
+
resp.read()
|
|
74
|
+
return True
|
|
75
|
+
except Exception as exc:
|
|
76
|
+
status = f"error={type(exc).__name__}"
|
|
77
|
+
_log_watcher_error(kind, endpoint, payload, exc)
|
|
78
|
+
if _SHUTDOWN_TRACE:
|
|
79
|
+
print(
|
|
80
|
+
f"[ocmemog][watcher-request] {kind} failed timeout={timeout:.3f}s elapsed={time.perf_counter()-start:.3f}s",
|
|
81
|
+
file=sys.stderr,
|
|
82
|
+
)
|
|
83
|
+
return False
|
|
84
|
+
finally:
|
|
85
|
+
if _SHUTDOWN_TRACE:
|
|
86
|
+
elapsed = time.perf_counter() - start
|
|
87
|
+
if stop_event is None or not stop_event.is_set():
|
|
88
|
+
if elapsed >= timeout * 0.95:
|
|
89
|
+
print(
|
|
90
|
+
f"[ocmemog][watcher-request] {kind} timeout={timeout:.3f}s elapsed={elapsed:.3f}s status={status}",
|
|
91
|
+
file=sys.stderr,
|
|
92
|
+
)
|
|
93
|
+
else:
|
|
94
|
+
print(
|
|
95
|
+
f"[ocmemog][watcher-request] {kind} timeout={timeout:.3f}s elapsed={elapsed:.3f}s status={status}",
|
|
96
|
+
file=sys.stderr,
|
|
97
|
+
)
|
|
37
98
|
|
|
38
99
|
|
|
39
100
|
def _log_watcher_error(kind: str, endpoint: str, payload: dict, exc: Exception) -> None:
|
|
@@ -67,36 +128,16 @@ def _apply_auth_headers(req: urlrequest.Request) -> None:
|
|
|
67
128
|
req.add_header("x-ocmemog-token", token)
|
|
68
129
|
|
|
69
130
|
|
|
70
|
-
def _post_ingest(endpoint: str, payload: dict) -> bool:
|
|
71
|
-
|
|
72
|
-
req = urlrequest.Request(endpoint, data=data, method="POST")
|
|
73
|
-
req.add_header("Content-Type", "application/json")
|
|
74
|
-
_apply_auth_headers(req)
|
|
75
|
-
try:
|
|
76
|
-
with urlrequest.urlopen(req, timeout=10) as resp:
|
|
77
|
-
resp.read()
|
|
78
|
-
return True
|
|
79
|
-
except Exception as exc:
|
|
80
|
-
_log_watcher_error("ingest", endpoint, payload, exc)
|
|
81
|
-
return False
|
|
131
|
+
def _post_ingest(endpoint: str, payload: dict, *, stop_event: threading.Event | None = None) -> bool:
|
|
132
|
+
return _post_json_payload(endpoint, payload, stop_event=stop_event, kind="ingest")
|
|
82
133
|
|
|
83
134
|
|
|
84
|
-
def _post_json(endpoint: str, payload: dict) -> bool:
|
|
85
|
-
|
|
86
|
-
req = urlrequest.Request(endpoint, data=data, method="POST")
|
|
87
|
-
req.add_header("Content-Type", "application/json")
|
|
88
|
-
_apply_auth_headers(req)
|
|
89
|
-
try:
|
|
90
|
-
with urlrequest.urlopen(req, timeout=10) as resp:
|
|
91
|
-
resp.read()
|
|
92
|
-
return True
|
|
93
|
-
except Exception as exc:
|
|
94
|
-
_log_watcher_error("json", endpoint, payload, exc)
|
|
95
|
-
return False
|
|
135
|
+
def _post_json(endpoint: str, payload: dict, *, stop_event: threading.Event | None = None) -> bool:
|
|
136
|
+
return _post_json_payload(endpoint, payload, stop_event=stop_event, kind="json")
|
|
96
137
|
|
|
97
138
|
|
|
98
|
-
def _post_turn(endpoint: str, payload: dict) -> bool:
|
|
99
|
-
return _post_json(endpoint, payload)
|
|
139
|
+
def _post_turn(endpoint: str, payload: dict, *, stop_event: threading.Event | None = None) -> bool:
|
|
140
|
+
return _post_json(endpoint, payload, stop_event=stop_event)
|
|
100
141
|
|
|
101
142
|
|
|
102
143
|
def _extract_user_text(text: str) -> str:
|
|
@@ -183,7 +224,8 @@ def _append_transcript(transcript_target: Path, timestamp: str, role: str, text:
|
|
|
183
224
|
return path, line_no
|
|
184
225
|
|
|
185
226
|
|
|
186
|
-
def watch_forever() -> None:
|
|
227
|
+
def watch_forever(stop_event: Optional[threading.Event] = None) -> None:
|
|
228
|
+
global _WATCHER_STOP_EVENT
|
|
187
229
|
transcript_path = os.environ.get("OCMEMOG_TRANSCRIPT_PATH", "").strip()
|
|
188
230
|
transcript_dir = os.environ.get("OCMEMOG_TRANSCRIPT_DIR", "").strip()
|
|
189
231
|
glob_pattern = os.environ.get("OCMEMOG_TRANSCRIPT_GLOB", DEFAULT_GLOB)
|
|
@@ -240,6 +282,13 @@ def watch_forever() -> None:
|
|
|
240
282
|
pending_session_turns: dict[tuple[str, int], dict[str, object]] = {}
|
|
241
283
|
last_transcript_flush = time.time()
|
|
242
284
|
last_session_flush = time.time()
|
|
285
|
+
stopper: threading.Event
|
|
286
|
+
if isinstance(stop_event, threading.Event):
|
|
287
|
+
stopper = stop_event
|
|
288
|
+
else:
|
|
289
|
+
stopper = threading.Event()
|
|
290
|
+
stopper.clear()
|
|
291
|
+
_WATCHER_STOP_EVENT = stopper
|
|
243
292
|
|
|
244
293
|
def _flush_buffer(
|
|
245
294
|
buffer: list[str],
|
|
@@ -249,9 +298,12 @@ def watch_forever() -> None:
|
|
|
249
298
|
timestamp: Optional[str],
|
|
250
299
|
start_line: Optional[int],
|
|
251
300
|
end_line: Optional[int],
|
|
301
|
+
stop_event: threading.Event,
|
|
252
302
|
) -> bool:
|
|
253
303
|
if not buffer:
|
|
254
304
|
return True
|
|
305
|
+
if stop_event.is_set():
|
|
306
|
+
return False
|
|
255
307
|
payload = {
|
|
256
308
|
"content": "\n".join(buffer),
|
|
257
309
|
"kind": kind,
|
|
@@ -266,7 +318,7 @@ def watch_forever() -> None:
|
|
|
266
318
|
payload["transcript_end_offset"] = end_line
|
|
267
319
|
if timestamp:
|
|
268
320
|
payload["timestamp"] = timestamp.replace("T", " ")[:19]
|
|
269
|
-
ok = _post_ingest(endpoint, payload)
|
|
321
|
+
ok = _post_ingest(endpoint, payload, stop_event=stop_event)
|
|
270
322
|
if ok:
|
|
271
323
|
buffer.clear()
|
|
272
324
|
return ok
|
|
@@ -286,7 +338,7 @@ def watch_forever() -> None:
|
|
|
286
338
|
"source_module": "sentiment",
|
|
287
339
|
"note": text,
|
|
288
340
|
}
|
|
289
|
-
_post_json(reinforce_endpoint, payload)
|
|
341
|
+
_post_json(reinforce_endpoint, payload, stop_event=stopper)
|
|
290
342
|
elif any(term in lowered for term in negative_terms):
|
|
291
343
|
payload = {
|
|
292
344
|
"task_id": f"feedback:{timestamp}",
|
|
@@ -298,224 +350,236 @@ def watch_forever() -> None:
|
|
|
298
350
|
"source_module": "sentiment",
|
|
299
351
|
"note": text,
|
|
300
352
|
}
|
|
301
|
-
_post_json(reinforce_endpoint, payload)
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
if
|
|
308
|
-
current_file
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
353
|
+
_post_json(reinforce_endpoint, payload, stop_event=stopper)
|
|
354
|
+
|
|
355
|
+
try:
|
|
356
|
+
while not stopper.is_set():
|
|
357
|
+
# 1) Watch transcript logs (if any)
|
|
358
|
+
latest = _pick_latest(transcript_target, glob_pattern)
|
|
359
|
+
if latest is not None:
|
|
360
|
+
if current_file is None or latest != current_file:
|
|
361
|
+
current_file = latest
|
|
362
|
+
position = 0
|
|
363
|
+
current_line_number = 0
|
|
364
|
+
if start_at_end:
|
|
365
|
+
try:
|
|
366
|
+
position = current_file.stat().st_size
|
|
367
|
+
except Exception:
|
|
368
|
+
position = 0
|
|
369
|
+
try:
|
|
370
|
+
current_line_number = _count_lines(current_file)
|
|
371
|
+
except Exception:
|
|
372
|
+
current_line_number = 0
|
|
373
|
+
|
|
374
|
+
try:
|
|
375
|
+
with current_file.open("r", encoding="utf-8", errors="ignore") as handle:
|
|
376
|
+
handle.seek(position)
|
|
377
|
+
committed_position = position
|
|
378
|
+
committed_line_number = current_line_number
|
|
379
|
+
while True:
|
|
380
|
+
if stopper.is_set():
|
|
381
|
+
break
|
|
382
|
+
line_start = handle.tell()
|
|
383
|
+
line = handle.readline()
|
|
384
|
+
if not line:
|
|
385
|
+
position = committed_position
|
|
386
|
+
current_line_number = committed_line_number
|
|
387
|
+
break
|
|
388
|
+
text = line.rstrip("\n")
|
|
389
|
+
next_line_number = committed_line_number + 1
|
|
390
|
+
if not text.strip():
|
|
391
|
+
committed_position = handle.tell()
|
|
392
|
+
committed_line_number = next_line_number
|
|
393
|
+
position = committed_position
|
|
394
|
+
current_line_number = committed_line_number
|
|
395
|
+
continue
|
|
396
|
+
current_marker = (str(current_file), next_line_number)
|
|
397
|
+
if current_marker in recent_session_transcript_lines:
|
|
398
|
+
committed_position = handle.tell()
|
|
399
|
+
committed_line_number = next_line_number
|
|
400
|
+
position = committed_position
|
|
401
|
+
current_line_number = committed_line_number
|
|
402
|
+
continue
|
|
403
|
+
transcript_buffer.append(text)
|
|
404
|
+
transcript_last_path = current_file
|
|
405
|
+
if transcript_start_line is None:
|
|
406
|
+
transcript_start_line = next_line_number
|
|
407
|
+
transcript_end_line = next_line_number
|
|
408
|
+
timestamp_value = None
|
|
409
|
+
if text and " " in text:
|
|
410
|
+
timestamp_value = text.split(" ", 1)[0]
|
|
411
|
+
transcript_last_timestamp = timestamp_value
|
|
412
|
+
role, turn_text = _parse_transcript_line(text)
|
|
413
|
+
if role and turn_text:
|
|
414
|
+
if stopper.is_set():
|
|
415
|
+
break
|
|
416
|
+
ok = _post_turn(
|
|
417
|
+
turn_endpoint,
|
|
418
|
+
{
|
|
419
|
+
"role": role,
|
|
420
|
+
"content": turn_text,
|
|
421
|
+
"source": source,
|
|
422
|
+
"transcript_path": str(current_file),
|
|
423
|
+
"transcript_offset": next_line_number,
|
|
424
|
+
"transcript_end_offset": next_line_number,
|
|
425
|
+
"timestamp": timestamp_value.replace("T", " ")[:19] if timestamp_value else None,
|
|
426
|
+
},
|
|
427
|
+
stop_event=stopper,
|
|
428
|
+
)
|
|
429
|
+
if not ok:
|
|
430
|
+
if transcript_buffer:
|
|
431
|
+
transcript_buffer.pop()
|
|
432
|
+
if transcript_start_line == next_line_number:
|
|
433
|
+
transcript_start_line = None
|
|
434
|
+
transcript_end_line = committed_line_number if transcript_start_line is not None else None
|
|
435
|
+
position = line_start
|
|
436
|
+
current_line_number = committed_line_number
|
|
437
|
+
break
|
|
438
|
+
if len(transcript_buffer) >= batch_max:
|
|
439
|
+
ok = _flush_buffer(
|
|
440
|
+
transcript_buffer,
|
|
441
|
+
source_label=source,
|
|
442
|
+
transcript_path=transcript_last_path,
|
|
443
|
+
timestamp=transcript_last_timestamp,
|
|
444
|
+
start_line=transcript_start_line,
|
|
445
|
+
end_line=transcript_end_line,
|
|
446
|
+
stop_event=stopper,
|
|
447
|
+
)
|
|
448
|
+
if not ok:
|
|
449
|
+
position = line_start
|
|
450
|
+
current_line_number = committed_line_number
|
|
451
|
+
break
|
|
452
|
+
transcript_start_line = None
|
|
453
|
+
transcript_end_line = None
|
|
454
|
+
last_transcript_flush = time.time()
|
|
343
455
|
committed_position = handle.tell()
|
|
344
456
|
committed_line_number = next_line_number
|
|
345
457
|
position = committed_position
|
|
346
458
|
current_line_number = committed_line_number
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
459
|
+
except Exception:
|
|
460
|
+
pass
|
|
461
|
+
|
|
462
|
+
# 2) Watch OpenClaw session jsonl (verbatim capture)
|
|
463
|
+
session_latest = _pick_latest(session_target, session_glob)
|
|
464
|
+
if session_latest is not None:
|
|
465
|
+
if session_file is None or session_latest != session_file:
|
|
466
|
+
session_file = session_latest
|
|
467
|
+
session_pos = 0
|
|
468
|
+
if start_at_end:
|
|
469
|
+
try:
|
|
470
|
+
session_pos = session_file.stat().st_size
|
|
471
|
+
except Exception:
|
|
472
|
+
session_pos = 0
|
|
473
|
+
try:
|
|
474
|
+
with session_file.open("r", encoding="utf-8", errors="ignore") as handle:
|
|
475
|
+
handle.seek(session_pos)
|
|
476
|
+
committed_session_pos = session_pos
|
|
477
|
+
while True:
|
|
478
|
+
if stopper.is_set():
|
|
479
|
+
break
|
|
480
|
+
line_start = handle.tell()
|
|
481
|
+
line = handle.readline()
|
|
482
|
+
if not line:
|
|
483
|
+
session_pos = committed_session_pos
|
|
484
|
+
break
|
|
485
|
+
try:
|
|
486
|
+
entry = json.loads(line)
|
|
487
|
+
except Exception:
|
|
488
|
+
committed_session_pos = handle.tell()
|
|
489
|
+
session_pos = committed_session_pos
|
|
490
|
+
continue
|
|
491
|
+
if entry.get("type") != "message":
|
|
492
|
+
committed_session_pos = handle.tell()
|
|
493
|
+
session_pos = committed_session_pos
|
|
494
|
+
continue
|
|
495
|
+
msg = entry.get("message") or {}
|
|
496
|
+
role = msg.get("role")
|
|
497
|
+
if role not in {"user", "assistant"}:
|
|
498
|
+
committed_session_pos = handle.tell()
|
|
499
|
+
session_pos = committed_session_pos
|
|
500
|
+
continue
|
|
501
|
+
content = msg.get("content")
|
|
502
|
+
text = _extract_message_text(content).strip()
|
|
503
|
+
conversation_info = _extract_conversation_info(text)
|
|
504
|
+
if role == "user":
|
|
505
|
+
text = _extract_user_text(text)
|
|
506
|
+
text = text.replace("\n", " ").strip()
|
|
507
|
+
if not text:
|
|
508
|
+
committed_session_pos = handle.tell()
|
|
509
|
+
session_pos = committed_session_pos
|
|
510
|
+
continue
|
|
511
|
+
timestamp = entry.get("timestamp") or time.strftime("%Y-%m-%dT%H:%M:%S")
|
|
512
|
+
if role == "user":
|
|
513
|
+
_maybe_reinforce(text, timestamp)
|
|
514
|
+
session_id = session_file.stem if session_file is not None else None
|
|
515
|
+
message_id = entry.get("id") or conversation_info.get("message_id")
|
|
516
|
+
conversation_id = conversation_info.get("conversation_id") or session_id
|
|
517
|
+
thread_id = conversation_info.get("thread_id") or session_id
|
|
518
|
+
transcript_line = f"{timestamp} [{role}] {text}"
|
|
519
|
+
retry_key = (str(session_file), line_start)
|
|
520
|
+
pending = pending_session_turns.get(retry_key)
|
|
521
|
+
if pending is None:
|
|
522
|
+
transcript_path, transcript_line_no = _append_transcript(transcript_target, timestamp, role, text)
|
|
523
|
+
turn_payload = {
|
|
362
524
|
"role": role,
|
|
363
|
-
"content":
|
|
364
|
-
"
|
|
365
|
-
"
|
|
366
|
-
"
|
|
367
|
-
"
|
|
368
|
-
"
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
525
|
+
"content": text,
|
|
526
|
+
"conversation_id": conversation_id,
|
|
527
|
+
"session_id": session_id,
|
|
528
|
+
"thread_id": thread_id,
|
|
529
|
+
"message_id": message_id,
|
|
530
|
+
"source": "session",
|
|
531
|
+
"timestamp": timestamp.replace("T", " ")[:19],
|
|
532
|
+
"transcript_path": str(transcript_path),
|
|
533
|
+
"transcript_offset": transcript_line_no,
|
|
534
|
+
"transcript_end_offset": transcript_line_no,
|
|
535
|
+
"metadata": {
|
|
536
|
+
"parent_message_id": entry.get("parentId"),
|
|
537
|
+
},
|
|
538
|
+
}
|
|
539
|
+
pending_session_turns[retry_key] = {
|
|
540
|
+
"payload": dict(turn_payload),
|
|
541
|
+
"transcript_line": transcript_line,
|
|
542
|
+
"transcript_path": transcript_path,
|
|
543
|
+
"transcript_line_no": transcript_line_no,
|
|
544
|
+
}
|
|
545
|
+
else:
|
|
546
|
+
turn_payload = dict(pending["payload"])
|
|
547
|
+
transcript_line = str(pending["transcript_line"])
|
|
548
|
+
transcript_path = Path(str(pending["transcript_path"]))
|
|
549
|
+
transcript_line_no = int(pending["transcript_line_no"])
|
|
550
|
+
if stopper.is_set():
|
|
379
551
|
break
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
transcript_buffer,
|
|
383
|
-
source_label=source,
|
|
384
|
-
transcript_path=transcript_last_path,
|
|
385
|
-
timestamp=transcript_last_timestamp,
|
|
386
|
-
start_line=transcript_start_line,
|
|
387
|
-
end_line=transcript_end_line,
|
|
388
|
-
)
|
|
389
|
-
if not ok:
|
|
390
|
-
position = line_start
|
|
391
|
-
current_line_number = committed_line_number
|
|
552
|
+
if not _post_turn(turn_endpoint, turn_payload, stop_event=stopper):
|
|
553
|
+
session_pos = line_start
|
|
392
554
|
break
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
committed_session_pos = session_pos
|
|
418
|
-
while True:
|
|
419
|
-
line_start = handle.tell()
|
|
420
|
-
line = handle.readline()
|
|
421
|
-
if not line:
|
|
422
|
-
session_pos = committed_session_pos
|
|
423
|
-
break
|
|
424
|
-
try:
|
|
425
|
-
entry = json.loads(line)
|
|
426
|
-
except Exception:
|
|
427
|
-
committed_session_pos = handle.tell()
|
|
428
|
-
session_pos = committed_session_pos
|
|
429
|
-
continue
|
|
430
|
-
if entry.get("type") != "message":
|
|
431
|
-
committed_session_pos = handle.tell()
|
|
432
|
-
session_pos = committed_session_pos
|
|
433
|
-
continue
|
|
434
|
-
msg = entry.get("message") or {}
|
|
435
|
-
role = msg.get("role")
|
|
436
|
-
if role not in {"user", "assistant"}:
|
|
437
|
-
committed_session_pos = handle.tell()
|
|
438
|
-
session_pos = committed_session_pos
|
|
439
|
-
continue
|
|
440
|
-
content = msg.get("content")
|
|
441
|
-
text = _extract_message_text(content).strip()
|
|
442
|
-
conversation_info = _extract_conversation_info(text)
|
|
443
|
-
if role == "user":
|
|
444
|
-
text = _extract_user_text(text)
|
|
445
|
-
text = text.replace("\n", " ").strip()
|
|
446
|
-
if not text:
|
|
555
|
+
pending_session_turns.pop(retry_key, None)
|
|
556
|
+
recent_session_transcript_lines.append((str(transcript_path), transcript_line_no))
|
|
557
|
+
session_buffer.append(transcript_line)
|
|
558
|
+
session_last_path = transcript_path
|
|
559
|
+
session_last_timestamp = timestamp
|
|
560
|
+
if session_start_line is None:
|
|
561
|
+
session_start_line = transcript_line_no
|
|
562
|
+
session_end_line = transcript_line_no
|
|
563
|
+
if len(session_buffer) >= batch_max:
|
|
564
|
+
ok = _flush_buffer(
|
|
565
|
+
session_buffer,
|
|
566
|
+
source_label="session",
|
|
567
|
+
transcript_path=session_last_path,
|
|
568
|
+
timestamp=session_last_timestamp,
|
|
569
|
+
start_line=session_start_line,
|
|
570
|
+
end_line=session_end_line,
|
|
571
|
+
stop_event=stopper,
|
|
572
|
+
)
|
|
573
|
+
if not ok:
|
|
574
|
+
session_pos = line_start
|
|
575
|
+
break
|
|
576
|
+
session_start_line = None
|
|
577
|
+
session_end_line = None
|
|
578
|
+
last_session_flush = time.time()
|
|
447
579
|
committed_session_pos = handle.tell()
|
|
448
580
|
session_pos = committed_session_pos
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
if role == "user":
|
|
452
|
-
_maybe_reinforce(text, timestamp)
|
|
453
|
-
session_id = session_file.stem if session_file is not None else None
|
|
454
|
-
message_id = entry.get("id") or conversation_info.get("message_id")
|
|
455
|
-
conversation_id = conversation_info.get("conversation_id") or session_id
|
|
456
|
-
thread_id = conversation_info.get("thread_id") or session_id
|
|
457
|
-
transcript_line = f"{timestamp} [{role}] {text}"
|
|
458
|
-
retry_key = (str(session_file), line_start)
|
|
459
|
-
pending = pending_session_turns.get(retry_key)
|
|
460
|
-
if pending is None:
|
|
461
|
-
transcript_path, transcript_line_no = _append_transcript(transcript_target, timestamp, role, text)
|
|
462
|
-
turn_payload = {
|
|
463
|
-
"role": role,
|
|
464
|
-
"content": text,
|
|
465
|
-
"conversation_id": conversation_id,
|
|
466
|
-
"session_id": session_id,
|
|
467
|
-
"thread_id": thread_id,
|
|
468
|
-
"message_id": message_id,
|
|
469
|
-
"source": "session",
|
|
470
|
-
"timestamp": timestamp.replace("T", " ")[:19],
|
|
471
|
-
"transcript_path": str(transcript_path),
|
|
472
|
-
"transcript_offset": transcript_line_no,
|
|
473
|
-
"transcript_end_offset": transcript_line_no,
|
|
474
|
-
"metadata": {
|
|
475
|
-
"parent_message_id": entry.get("parentId"),
|
|
476
|
-
},
|
|
477
|
-
}
|
|
478
|
-
pending_session_turns[retry_key] = {
|
|
479
|
-
"payload": dict(turn_payload),
|
|
480
|
-
"transcript_line": transcript_line,
|
|
481
|
-
"transcript_path": transcript_path,
|
|
482
|
-
"transcript_line_no": transcript_line_no,
|
|
483
|
-
}
|
|
484
|
-
else:
|
|
485
|
-
turn_payload = dict(pending["payload"])
|
|
486
|
-
transcript_line = str(pending["transcript_line"])
|
|
487
|
-
transcript_path = Path(str(pending["transcript_path"]))
|
|
488
|
-
transcript_line_no = int(pending["transcript_line_no"])
|
|
489
|
-
if not _post_turn(turn_endpoint, turn_payload):
|
|
490
|
-
session_pos = line_start
|
|
491
|
-
break
|
|
492
|
-
pending_session_turns.pop(retry_key, None)
|
|
493
|
-
recent_session_transcript_lines.append((str(transcript_path), transcript_line_no))
|
|
494
|
-
session_buffer.append(transcript_line)
|
|
495
|
-
session_last_path = transcript_path
|
|
496
|
-
session_last_timestamp = timestamp
|
|
497
|
-
if session_start_line is None:
|
|
498
|
-
session_start_line = transcript_line_no
|
|
499
|
-
session_end_line = transcript_line_no
|
|
500
|
-
if len(session_buffer) >= batch_max:
|
|
501
|
-
ok = _flush_buffer(
|
|
502
|
-
session_buffer,
|
|
503
|
-
source_label="session",
|
|
504
|
-
transcript_path=session_last_path,
|
|
505
|
-
timestamp=session_last_timestamp,
|
|
506
|
-
start_line=session_start_line,
|
|
507
|
-
end_line=session_end_line,
|
|
508
|
-
)
|
|
509
|
-
if not ok:
|
|
510
|
-
session_pos = line_start
|
|
511
|
-
break
|
|
512
|
-
session_start_line = None
|
|
513
|
-
session_end_line = None
|
|
514
|
-
last_session_flush = time.time()
|
|
515
|
-
committed_session_pos = handle.tell()
|
|
516
|
-
session_pos = committed_session_pos
|
|
517
|
-
except Exception:
|
|
518
|
-
pass
|
|
581
|
+
except Exception:
|
|
582
|
+
pass
|
|
519
583
|
|
|
520
584
|
now = time.time()
|
|
521
585
|
if transcript_buffer and (now - last_transcript_flush) >= batch_seconds:
|
|
@@ -526,6 +590,7 @@ def watch_forever() -> None:
|
|
|
526
590
|
timestamp=transcript_last_timestamp,
|
|
527
591
|
start_line=transcript_start_line,
|
|
528
592
|
end_line=transcript_end_line,
|
|
593
|
+
stop_event=stopper,
|
|
529
594
|
)
|
|
530
595
|
if ok:
|
|
531
596
|
transcript_start_line = None
|
|
@@ -539,10 +604,23 @@ def watch_forever() -> None:
|
|
|
539
604
|
timestamp=session_last_timestamp,
|
|
540
605
|
start_line=session_start_line,
|
|
541
606
|
end_line=session_end_line,
|
|
607
|
+
stop_event=stopper,
|
|
542
608
|
)
|
|
543
609
|
if ok:
|
|
544
610
|
session_start_line = None
|
|
545
611
|
session_end_line = None
|
|
546
612
|
last_session_flush = now
|
|
547
613
|
|
|
548
|
-
time.
|
|
614
|
+
poll_started = time.perf_counter()
|
|
615
|
+
if stopper.wait(poll_seconds):
|
|
616
|
+
if _SHUTDOWN_TRACE:
|
|
617
|
+
print(
|
|
618
|
+
f"[ocmemog][watcher-poll] stop_wait timeout={poll_seconds:.3f}s elapsed={time.perf_counter()-poll_started:.3f}s",
|
|
619
|
+
file=sys.stderr,
|
|
620
|
+
)
|
|
621
|
+
return
|
|
622
|
+
finally:
|
|
623
|
+
_WATCHER_STOP_EVENT = None
|
|
624
|
+
if _SHUTDOWN_TRACE:
|
|
625
|
+
print("[ocmemog][watcher] shutdown loop exiting", file=sys.stderr)
|
|
626
|
+
# no return value
|