@simbimbo/memory-ocmemog 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,628 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import argparse
5
+ import json
6
+ import os
7
+ import re
8
+ import signal
9
+ import subprocess
10
+ import sys
11
+ import tempfile
12
+ import textwrap
13
+ import threading
14
+ import time
15
+ import uuid
16
+ from itertools import cycle
17
+ from concurrent.futures import ThreadPoolExecutor, as_completed
18
+ from dataclasses import dataclass
19
+ from pathlib import Path
20
+ from typing import Any
21
+ from urllib import request as urlrequest
22
+
23
+ REPO_ROOT = Path(__file__).resolve().parent.parent
24
+ DEFAULT_HOST = "127.0.0.1"
25
+ DEFAULT_PORT = 17921
26
+
27
+
28
+ def _post_json(base_url: str, path: str, payload: dict[str, Any], *, timeout: float = 30.0) -> dict[str, Any]:
29
+ req = urlrequest.Request(
30
+ f"{base_url.rstrip('/')}{path}",
31
+ data=json.dumps(payload).encode("utf-8"),
32
+ method="POST",
33
+ headers={"Content-Type": "application/json"},
34
+ )
35
+ with urlrequest.urlopen(req, timeout=timeout) as resp:
36
+ body = resp.read().decode("utf-8")
37
+ try:
38
+ return json.loads(body)
39
+ except Exception:
40
+ return {"ok": False, "raw": body}
41
+
42
+
43
+ def _get_json(base_url: str, path: str, *, timeout: float = 10.0) -> dict[str, Any]:
44
+ req = urlrequest.Request(f"{base_url.rstrip('/')}{path}", method="GET")
45
+ with urlrequest.urlopen(req, timeout=timeout) as resp:
46
+ body = resp.read().decode("utf-8")
47
+ try:
48
+ return json.loads(body)
49
+ except Exception:
50
+ return {"ok": False, "raw": body}
51
+
52
+
53
+ def _sanitize_continuity_noise(text: str, max_len: int = 280) -> str:
54
+ markers = [
55
+ "Memory continuity (auto-hydrated by ocmemog):",
56
+ "Pre-compaction memory flush.",
57
+ "Current time:",
58
+ "Latest user ask:",
59
+ "Last assistant commitment:",
60
+ "Open loops:",
61
+ "Pending actions:",
62
+ "Recent turns:",
63
+ "Linked memories:",
64
+ "Sender (untrusted metadata):",
65
+ ]
66
+ cleaned = text or ""
67
+ for marker in markers:
68
+ cleaned = cleaned.replace(marker, " ")
69
+ cleaned = " ".join(cleaned.split()).strip()
70
+ if len(cleaned) > max_len:
71
+ cleaned = f"{cleaned[: max_len - 1].rstrip()}…"
72
+ return cleaned
73
+
74
+
75
+ def _first_str(*values: Any) -> str:
76
+ for value in values:
77
+ if isinstance(value, str) and value.strip():
78
+ return value.strip()
79
+ return ""
80
+
81
+
82
+ def _as_dict(value: Any) -> dict[str, Any]:
83
+ return value if isinstance(value, dict) else {}
84
+
85
+
86
+ def _summarize_list(items: Any, limit: int = 3) -> list[str]:
87
+ if not isinstance(items, list):
88
+ return []
89
+ output: list[str] = []
90
+ for item in items[:limit]:
91
+ record = _as_dict(item)
92
+ text = _first_str(record.get("summary"), record.get("content"), record.get("reference"))
93
+ if text:
94
+ output.append(text)
95
+ return output
96
+
97
+
98
+ def build_predictive_brief_context(payload: dict[str, Any]) -> str:
99
+ if not payload.get("ok"):
100
+ return ""
101
+ brief = _as_dict(payload.get("predictive_brief"))
102
+ if not brief:
103
+ return ""
104
+ lines: list[str] = []
105
+ lane = _sanitize_continuity_noise(_first_str(brief.get("lane")), 48)
106
+ if lane:
107
+ lines.append(f"Lane: {lane}")
108
+ checkpoint = _as_dict(brief.get("checkpoint"))
109
+ checkpoint_summary = _sanitize_continuity_noise(_first_str(checkpoint.get("summary")), 140)
110
+ if checkpoint_summary:
111
+ lines.append(f"Checkpoint: {checkpoint_summary}")
112
+ memories = brief.get("memories") if isinstance(brief.get("memories"), list) else []
113
+ memory_lines = []
114
+ for item in memories[:4]:
115
+ record = _as_dict(item)
116
+ text = _sanitize_continuity_noise(_first_str(record.get("content"), record.get("reference")), 120)
117
+ if text:
118
+ memory_lines.append(text)
119
+ if memory_lines:
120
+ lines.append(f"Likely-needed facts: {' | '.join(memory_lines)}")
121
+ open_loops = brief.get("open_loops") if isinstance(brief.get("open_loops"), list) else []
122
+ open_loop_lines = []
123
+ for item in open_loops[:2]:
124
+ record = _as_dict(item)
125
+ text = _sanitize_continuity_noise(_first_str(record.get("summary"), record.get("reference")), 100)
126
+ if text:
127
+ open_loop_lines.append(text)
128
+ if open_loop_lines:
129
+ lines.append(f"Open loops: {' | '.join(open_loop_lines)}")
130
+ if not lines:
131
+ return ""
132
+ joined = "\n- ".join(lines)
133
+ return f"Working memory brief (JIT by ocmemog):\n- {joined}"
134
+
135
+
136
+ def build_hydration_context(payload: dict[str, Any]) -> str:
137
+ if not payload.get("ok"):
138
+ return ""
139
+ summary = _as_dict(payload.get("summary"))
140
+ state = _as_dict(payload.get("state"))
141
+ lines: list[str] = []
142
+ checkpoint = _as_dict(summary.get("latest_checkpoint"))
143
+ checkpoint_summary = _sanitize_continuity_noise(_first_str(checkpoint.get("summary")), 140)
144
+ if checkpoint_summary:
145
+ lines.append(f"Checkpoint: {checkpoint_summary}")
146
+ latest_user_ask = _as_dict(summary.get("latest_user_ask"))
147
+ latest_user_text = _sanitize_continuity_noise(
148
+ _first_str(latest_user_ask.get("effective_content"), latest_user_ask.get("content"), state.get("latest_user_ask")),
149
+ 220,
150
+ )
151
+ if latest_user_text:
152
+ lines.append(f"Latest user ask: {latest_user_text}")
153
+ commitment = _as_dict(summary.get("last_assistant_commitment"))
154
+ commitment_text = _sanitize_continuity_noise(
155
+ _first_str(commitment.get("content"), state.get("last_assistant_commitment")),
156
+ 180,
157
+ )
158
+ if commitment_text:
159
+ lines.append(f"Last assistant commitment: {commitment_text}")
160
+ open_loops = [_sanitize_continuity_noise(item, 120) for item in _summarize_list(summary.get("open_loops"), 2)]
161
+ open_loops = [item for item in open_loops if item]
162
+ if open_loops:
163
+ lines.append(f"Open loops: {' | '.join(open_loops)}")
164
+ if not lines:
165
+ return ""
166
+ joined = "\n- ".join(lines)
167
+ return f"Memory continuity (auto-hydrated by ocmemog):\n- {joined}"
168
+
169
+
170
+ @dataclass
171
+ class HarnessResult:
172
+ name: str
173
+ ok: bool
174
+ metrics: dict[str, Any]
175
+ failures: list[str]
176
+
177
+
178
+ class SidecarHarness:
179
+ def __init__(self, args: argparse.Namespace):
180
+ self.args = args
181
+ self.base_url = f"http://{args.host}:{args.port}"
182
+ self.tempdir = tempfile.TemporaryDirectory(prefix="ocmemog-hydrate-stress-")
183
+ self.root = Path(self.tempdir.name)
184
+ self.state_dir = self.root / "state"
185
+ self.session_dir = self.root / "sessions"
186
+ self.transcript_dir = self.root / "transcripts"
187
+ self.logs_dir = self.root / "logs"
188
+ self.state_dir.mkdir(parents=True, exist_ok=True)
189
+ self.session_dir.mkdir(parents=True, exist_ok=True)
190
+ self.transcript_dir.mkdir(parents=True, exist_ok=True)
191
+ self.logs_dir.mkdir(parents=True, exist_ok=True)
192
+ self.out_log = self.logs_dir / "sidecar.out.log"
193
+ self.err_log = self.logs_dir / "sidecar.err.log"
194
+ self.process: subprocess.Popen[str] | None = None
195
+ self.session_file = self.session_dir / f"{uuid.uuid4()}.jsonl"
196
+ self.stop_event = threading.Event()
197
+
198
+ def start(self) -> None:
199
+ env = os.environ.copy()
200
+ env.update(
201
+ {
202
+ "PYTHONPATH": str(REPO_ROOT),
203
+ "OCMEMOG_STATE_DIR": str(self.state_dir),
204
+ "OCMEMOG_SESSION_DIR": str(self.session_dir),
205
+ "OCMEMOG_TRANSCRIPT_DIR": str(self.transcript_dir),
206
+ "OCMEMOG_TRANSCRIPT_WATCHER": "true" if self.args.watcher else "false",
207
+ "OCMEMOG_TRANSCRIPT_POLL_SECONDS": str(self.args.poll_seconds),
208
+ "OCMEMOG_INGEST_BATCH_SECONDS": str(self.args.batch_seconds),
209
+ "OCMEMOG_INGEST_BATCH_MAX": str(self.args.batch_max),
210
+ "OCMEMOG_INGEST_ENDPOINT": f"{self.base_url}/memory/ingest_async",
211
+ "OCMEMOG_TURN_INGEST_ENDPOINT": f"{self.base_url}/conversation/ingest_turn",
212
+ "OCMEMOG_REINFORCE_SENTIMENT": "false",
213
+ "OCMEMOG_SEARCH_SKIP_EMBEDDING_PROVIDER": "true",
214
+ "OCMEMOG_TRACE_HYDRATE": "true" if self.args.trace else "false",
215
+ "OCMEMOG_TRACE_HYDRATE_WARN_MS": str(self.args.trace_hydrate_warn_ms),
216
+ "OCMEMOG_TRACE_REFRESH_STATE": "true" if self.args.trace else "false",
217
+ "OCMEMOG_TRACE_REFRESH_STATE_WARN_MS": str(self.args.trace_refresh_warn_ms),
218
+ "OCMEMOG_TRACE_WATCHER_TURN": "true" if self.args.trace else "false",
219
+ "OCMEMOG_TRACE_WATCHER_TURN_WARN_MS": str(self.args.trace_watcher_turn_warn_ms),
220
+ }
221
+ )
222
+ out = self.out_log.open("w", encoding="utf-8")
223
+ err = self.err_log.open("w", encoding="utf-8")
224
+ self.process = subprocess.Popen(
225
+ [
226
+ sys.executable,
227
+ "-m",
228
+ "uvicorn",
229
+ "ocmemog.sidecar.app:app",
230
+ "--host",
231
+ self.args.host,
232
+ "--port",
233
+ str(self.args.port),
234
+ ],
235
+ cwd=str(REPO_ROOT),
236
+ env=env,
237
+ stdout=out,
238
+ stderr=err,
239
+ text=True,
240
+ )
241
+ deadline = time.time() + self.args.start_timeout
242
+ last_error = None
243
+ while time.time() < deadline:
244
+ if self.process.poll() is not None:
245
+ raise RuntimeError(f"sidecar exited early with code {self.process.returncode}")
246
+ try:
247
+ payload = _get_json(self.base_url, "/healthz", timeout=2.0)
248
+ if payload.get("ok"):
249
+ return
250
+ except Exception as exc:
251
+ last_error = exc
252
+ time.sleep(0.25)
253
+ raise RuntimeError(f"sidecar did not become healthy before timeout: {last_error}")
254
+
255
+ def stop(self) -> None:
256
+ self.stop_event.set()
257
+ if not self.process:
258
+ if not self.args.keep_temp:
259
+ self.tempdir.cleanup()
260
+ return
261
+ if self.process.poll() is None:
262
+ self.process.send_signal(signal.SIGTERM)
263
+ try:
264
+ self.process.wait(timeout=5)
265
+ except subprocess.TimeoutExpired:
266
+ self.process.kill()
267
+ self.process.wait(timeout=5)
268
+ if not self.args.keep_temp:
269
+ self.tempdir.cleanup()
270
+
271
+ def sample_process(self) -> dict[str, float]:
272
+ if not self.process or self.process.poll() is not None:
273
+ return {"cpu": 0.0, "rss_kb": 0.0}
274
+ try:
275
+ output = subprocess.check_output(
276
+ ["ps", "-p", str(self.process.pid), "-o", "%cpu=,rss="], text=True
277
+ ).strip()
278
+ parts = output.split()
279
+ cpu = float(parts[0]) if parts else 0.0
280
+ rss = float(parts[1]) if len(parts) > 1 else 0.0
281
+ return {"cpu": cpu, "rss_kb": rss}
282
+ except Exception:
283
+ return {"cpu": 0.0, "rss_kb": 0.0}
284
+
285
+ def report_sizes(self) -> dict[str, int]:
286
+ report = self.state_dir / "reports" / "brain_memory.log.jsonl"
287
+ watcher_errors = self.state_dir / "reports" / "ocmemog_transcript_watcher_errors.jsonl"
288
+ return {
289
+ "report_log_bytes": report.stat().st_size if report.exists() else 0,
290
+ "watcher_error_log_bytes": watcher_errors.stat().st_size if watcher_errors.exists() else 0,
291
+ }
292
+
293
+ def read_trace_summary(self) -> dict[str, Any]:
294
+ pattern = re.compile(r"^\[ocmemog\]\[(?P<group>route|state|watcher)\]\s+(?P<name>[a-zA-Z_]+)\s+elapsed_ms=(?P<elapsed>[0-9.]+).*$")
295
+ summary: dict[str, dict[str, Any]] = {}
296
+ if not self.err_log.exists():
297
+ return {}
298
+ try:
299
+ lines = self.err_log.read_text(encoding="utf-8", errors="ignore").splitlines()
300
+ except Exception:
301
+ return {}
302
+ for line in lines:
303
+ match = pattern.match(line.strip())
304
+ if not match:
305
+ continue
306
+ key = f"{match.group('group')}.{match.group('name')}"
307
+ elapsed = float(match.group("elapsed"))
308
+ bucket = summary.setdefault(key, {"count": 0, "max_ms": 0.0, "avg_ms": 0.0, "total_ms": 0.0})
309
+ bucket["count"] += 1
310
+ bucket["total_ms"] += elapsed
311
+ bucket["max_ms"] = max(float(bucket["max_ms"]), elapsed)
312
+ for bucket in summary.values():
313
+ count = int(bucket["count"])
314
+ bucket["avg_ms"] = round(float(bucket["total_ms"]) / count, 3) if count else 0.0
315
+ bucket["max_ms"] = round(float(bucket["max_ms"]), 3)
316
+ bucket.pop("total_ms", None)
317
+ return summary
318
+
319
+ def append_session_message(self, role: str, content: str, *, message_id: str, parent_id: str | None = None) -> None:
320
+ stamp = time.strftime("%Y-%m-%dT%H:%M:%S.000Z", time.gmtime())
321
+ record = {
322
+ "type": "message",
323
+ "id": message_id,
324
+ "parentId": parent_id,
325
+ "timestamp": stamp,
326
+ "message": {"role": role, "content": [{"type": "text", "text": content}]},
327
+ }
328
+ with self.session_file.open("a", encoding="utf-8") as handle:
329
+ handle.write(json.dumps(record, ensure_ascii=False) + "\n")
330
+
331
+ def seed_conversation(self, turns: int) -> dict[str, str]:
332
+ session_id = f"stress-sess-{uuid.uuid4().hex[:8]}"
333
+ thread_id = f"stress-thread-{uuid.uuid4().hex[:8]}"
334
+ conversation_id = f"stress-conv-{uuid.uuid4().hex[:8]}"
335
+ previous = None
336
+ for idx in range(turns):
337
+ role = "user" if idx % 2 == 0 else "assistant"
338
+ content = (
339
+ f"user turn {idx}: keep the continuity state compact and stable under load"
340
+ if role == "user"
341
+ else f"assistant turn {idx}: acknowledged, keeping track of the task and next step"
342
+ )
343
+ self.append_session_message(role, content, message_id=f"seed-{idx}", parent_id=previous)
344
+ previous = f"seed-{idx}"
345
+ _post_json(
346
+ self.base_url,
347
+ "/conversation/ingest_turn",
348
+ {
349
+ "role": role,
350
+ "content": content,
351
+ "session_id": session_id,
352
+ "thread_id": thread_id,
353
+ "conversation_id": conversation_id,
354
+ "message_id": previous,
355
+ "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
356
+ },
357
+ timeout=15.0,
358
+ )
359
+ return {"session_id": session_id, "thread_id": thread_id, "conversation_id": conversation_id}
360
+
361
+ def seed_from_fixture(self, fixture_path: Path, scenario_name: str | None = None) -> dict[str, str]:
362
+ payload = json.loads(fixture_path.read_text(encoding="utf-8"))
363
+ scenarios = payload.get("scenarios") if isinstance(payload.get("scenarios"), list) else []
364
+ if not scenarios:
365
+ raise ValueError(f"fixture has no scenarios: {fixture_path}")
366
+ scenario = None
367
+ if scenario_name:
368
+ for item in scenarios:
369
+ if isinstance(item, dict) and item.get("name") == scenario_name:
370
+ scenario = item
371
+ break
372
+ if scenario is None:
373
+ raise ValueError(f"scenario not found: {scenario_name}")
374
+ else:
375
+ scenario = scenarios[0]
376
+ scope = scenario.get("scope") if isinstance(scenario.get("scope"), dict) else {}
377
+ if not scope:
378
+ raise ValueError(f"scenario missing scope: {scenario_name or scenario.get('name')}")
379
+ previous = None
380
+ for idx, turn in enumerate(scenario.get("turns") or []):
381
+ if not isinstance(turn, dict):
382
+ continue
383
+ role = str(turn.get("role") or "user")
384
+ content = str(turn.get("content") or "").strip()
385
+ if not content:
386
+ continue
387
+ message_id = str(turn.get("message_id") or f"fixture-{idx}")
388
+ metadata = turn.get("metadata") if isinstance(turn.get("metadata"), dict) else {}
389
+ parent_id = metadata.get("reply_to_message_id") or previous
390
+ self.append_session_message(role, content, message_id=message_id, parent_id=parent_id)
391
+ _post_json(
392
+ self.base_url,
393
+ "/conversation/ingest_turn",
394
+ {
395
+ "role": role,
396
+ "content": content,
397
+ "conversation_id": scope.get("conversation_id"),
398
+ "session_id": scope.get("session_id"),
399
+ "thread_id": scope.get("thread_id"),
400
+ "message_id": message_id,
401
+ "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
402
+ "metadata": metadata,
403
+ },
404
+ timeout=15.0,
405
+ )
406
+ previous = message_id
407
+ return {
408
+ "session_id": str(scope.get("session_id") or ""),
409
+ "thread_id": str(scope.get("thread_id") or ""),
410
+ "conversation_id": str(scope.get("conversation_id") or ""),
411
+ }
412
+
413
+
414
+ def run_hydrate_calls(base_url: str, scope: dict[str, str], *, total_calls: int, concurrency: int, timeout: float, plugin_sim: bool) -> dict[str, Any]:
415
+ latencies: list[float] = []
416
+ failures: list[str] = []
417
+ prepend_sizes: list[int] = []
418
+ warning_count = 0
419
+
420
+ def _one(_: int) -> None:
421
+ nonlocal warning_count
422
+ started = time.perf_counter()
423
+ payload = _post_json(
424
+ base_url,
425
+ "/conversation/hydrate",
426
+ {
427
+ **scope,
428
+ "turns_limit": 8,
429
+ "memory_limit": 4,
430
+ },
431
+ timeout=timeout,
432
+ )
433
+ latencies.append((time.perf_counter() - started) * 1000.0)
434
+ if not payload.get("ok"):
435
+ failures.append(str(payload))
436
+ return
437
+ warnings = payload.get("warnings") if isinstance(payload.get("warnings"), list) else []
438
+ warning_count += len(warnings)
439
+ if plugin_sim:
440
+ prepend = "\n\n".join(
441
+ part for part in [build_predictive_brief_context(payload), build_hydration_context(payload)] if part
442
+ )
443
+ prepend_sizes.append(len(prepend.encode("utf-8")))
444
+
445
+ with ThreadPoolExecutor(max_workers=max(1, concurrency)) as pool:
446
+ futures = [pool.submit(_one, idx) for idx in range(total_calls)]
447
+ for future in as_completed(futures):
448
+ future.result()
449
+
450
+ ordered = sorted(latencies)
451
+ p95 = ordered[max(0, int(len(ordered) * 0.95) - 1)] if ordered else 0.0
452
+ return {
453
+ "calls": total_calls,
454
+ "failures": failures,
455
+ "warning_count": warning_count,
456
+ "avg_ms": round(sum(latencies) / len(latencies), 3) if latencies else 0.0,
457
+ "p95_ms": round(p95, 3),
458
+ "max_ms": round(max(latencies), 3) if latencies else 0.0,
459
+ "prepend_sizes": prepend_sizes,
460
+ }
461
+
462
+
463
+ def run_mode(args: argparse.Namespace, harness: SidecarHarness) -> HarnessResult:
464
+ if args.fixture:
465
+ scope = harness.seed_from_fixture(Path(args.fixture), args.scenario)
466
+ else:
467
+ scope = harness.seed_conversation(args.seed_turns)
468
+ metrics: dict[str, Any] = {"mode": args.mode, "scope": scope}
469
+ failures: list[str] = []
470
+
471
+ report_before = harness.report_sizes()
472
+ cpu_samples: list[float] = []
473
+ rss_samples: list[float] = []
474
+
475
+ def _sampler() -> None:
476
+ while not harness.stop_event.is_set():
477
+ sample = harness.sample_process()
478
+ cpu_samples.append(sample["cpu"])
479
+ rss_samples.append(sample["rss_kb"])
480
+ time.sleep(args.sample_interval)
481
+
482
+ sampler_thread = threading.Thread(target=_sampler, daemon=True)
483
+ sampler_thread.start()
484
+
485
+ try:
486
+ if args.mode in {"watcher-only", "combined"}:
487
+ previous = "seed-final"
488
+ role_cycle = cycle(["user", "assistant"])
489
+ runtime_templates = [
490
+ "verify hydrate and watcher remain stable under concurrent load",
491
+ "preserve branch specificity and avoid unrelated continuity noise",
492
+ "keep checkpoint expansion bounded and relevant",
493
+ "watch for CPU spikes and queue churn under synthetic pressure",
494
+ ]
495
+ for idx in range(args.turn_count):
496
+ role = next(role_cycle)
497
+ text = f"runtime {role} turn {idx}: {runtime_templates[idx % len(runtime_templates)]}"
498
+ harness.append_session_message(role, text, message_id=f"runtime-{idx}", parent_id=previous)
499
+ previous = f"runtime-{idx}"
500
+ time.sleep(max(0.0, args.turn_interval_ms / 1000.0))
501
+
502
+ hydrate_metrics = {}
503
+ if args.mode in {"hydrate-only", "combined", "plugin-sim"}:
504
+ hydrate_metrics = run_hydrate_calls(
505
+ harness.base_url,
506
+ scope,
507
+ total_calls=args.hydrate_calls,
508
+ concurrency=args.hydrate_concurrency,
509
+ timeout=args.request_timeout,
510
+ plugin_sim=args.mode == "plugin-sim" or args.mode == "combined",
511
+ )
512
+ if hydrate_metrics["failures"]:
513
+ failures.extend(hydrate_metrics["failures"][:5])
514
+ if hydrate_metrics.get("p95_ms", 0.0) > args.max_p95_ms:
515
+ failures.append(f"hydrate p95 too high: {hydrate_metrics['p95_ms']}ms > {args.max_p95_ms}ms")
516
+ if hydrate_metrics.get("prepend_sizes"):
517
+ max_prepend = max(hydrate_metrics["prepend_sizes"])
518
+ if max_prepend > args.max_prepend_bytes:
519
+ failures.append(f"prepend too large: {max_prepend} > {args.max_prepend_bytes} bytes")
520
+ metrics["hydrate"] = hydrate_metrics
521
+
522
+ time.sleep(args.settle_seconds)
523
+ health = _get_json(harness.base_url, "/healthz", timeout=5.0)
524
+ metrics["health"] = health
525
+ if not health.get("ok"):
526
+ failures.append(f"healthz not ok: {health}")
527
+ finally:
528
+ harness.stop_event.set()
529
+ sampler_thread.join(timeout=2)
530
+
531
+ report_after = harness.report_sizes()
532
+ metrics["process"] = {
533
+ "cpu_peak": round(max(cpu_samples), 3) if cpu_samples else 0.0,
534
+ "cpu_avg": round(sum(cpu_samples) / len(cpu_samples), 3) if cpu_samples else 0.0,
535
+ "rss_peak_kb": round(max(rss_samples), 3) if rss_samples else 0.0,
536
+ }
537
+ metrics["trace_summary"] = harness.read_trace_summary() if args.trace else {}
538
+ metrics["log_growth"] = {
539
+ key: report_after.get(key, 0) - report_before.get(key, 0)
540
+ for key in set(report_before) | set(report_after)
541
+ }
542
+
543
+ if metrics["process"]["cpu_peak"] > args.max_cpu_peak:
544
+ failures.append(f"cpu peak too high: {metrics['process']['cpu_peak']} > {args.max_cpu_peak}")
545
+ if metrics["log_growth"]["report_log_bytes"] > args.max_report_log_growth_bytes:
546
+ failures.append(
547
+ f"report log grew too fast: {metrics['log_growth']['report_log_bytes']} > {args.max_report_log_growth_bytes}"
548
+ )
549
+ if metrics["log_growth"]["watcher_error_log_bytes"] > args.max_watcher_error_growth_bytes:
550
+ failures.append(
551
+ f"watcher error log grew too fast: {metrics['log_growth']['watcher_error_log_bytes']} > {args.max_watcher_error_growth_bytes}"
552
+ )
553
+
554
+ return HarnessResult(name=args.mode, ok=not failures, metrics=metrics, failures=failures)
555
+
556
+
557
+ def parse_args() -> argparse.Namespace:
558
+ parser = argparse.ArgumentParser(
559
+ description="Gateway-independent stress harness for ocmemog hydration/watcher interactions.",
560
+ formatter_class=argparse.RawDescriptionHelpFormatter,
561
+ epilog=textwrap.dedent(
562
+ """
563
+ Modes:
564
+ hydrate-only Repeated /conversation/hydrate calls against seeded state
565
+ watcher-only Session-jsonl append workload with watcher enabled
566
+ combined Watcher append workload plus concurrent hydrate requests
567
+ plugin-sim Hydrate calls plus plugin-style prepend formatting budget checks
568
+ """
569
+ ),
570
+ )
571
+ parser.add_argument("--mode", choices=["hydrate-only", "watcher-only", "combined", "plugin-sim"], default="combined")
572
+ parser.add_argument("--host", default=DEFAULT_HOST)
573
+ parser.add_argument("--port", type=int, default=DEFAULT_PORT)
574
+ parser.add_argument("--seed-turns", type=int, default=12)
575
+ parser.add_argument("--fixture", default="", help="Optional path to a fixture JSON file with scenarios")
576
+ parser.add_argument("--scenario", default="", help="Optional scenario name inside the fixture file")
577
+ parser.add_argument("--turn-count", type=int, default=120)
578
+ parser.add_argument("--turn-interval-ms", type=float, default=25.0)
579
+ parser.add_argument("--hydrate-calls", type=int, default=60)
580
+ parser.add_argument("--hydrate-concurrency", type=int, default=2)
581
+ parser.add_argument("--watcher", action="store_true", default=False)
582
+ parser.add_argument("--poll-seconds", type=float, default=0.25)
583
+ parser.add_argument("--batch-seconds", type=float, default=0.5)
584
+ parser.add_argument("--batch-max", type=int, default=8)
585
+ parser.add_argument("--sample-interval", type=float, default=0.5)
586
+ parser.add_argument("--settle-seconds", type=float, default=1.0)
587
+ parser.add_argument("--request-timeout", type=float, default=30.0)
588
+ parser.add_argument("--start-timeout", type=float, default=20.0)
589
+ parser.add_argument("--max-cpu-peak", type=float, default=85.0)
590
+ parser.add_argument("--max-p95-ms", type=float, default=2500.0)
591
+ parser.add_argument("--max-report-log-growth-bytes", type=int, default=8_000_000)
592
+ parser.add_argument("--max-watcher-error-growth-bytes", type=int, default=200_000)
593
+ parser.add_argument("--max-prepend-bytes", type=int, default=12_000)
594
+ parser.add_argument("--trace", action="store_true", help="Enable sidecar timing traces for hydrate/refresh/watcher turn paths")
595
+ parser.add_argument("--keep-temp", action="store_true", help="Preserve temp state/logs for inspection")
596
+ parser.add_argument("--trace-hydrate-warn-ms", type=float, default=25.0)
597
+ parser.add_argument("--trace-refresh-warn-ms", type=float, default=15.0)
598
+ parser.add_argument("--trace-watcher-turn-warn-ms", type=float, default=20.0)
599
+ parser.add_argument("--json", action="store_true", help="Emit only JSON summary")
600
+ args = parser.parse_args()
601
+ if args.mode in {"watcher-only", "combined"}:
602
+ args.watcher = True
603
+ return args
604
+
605
+
606
+ def main() -> int:
607
+ args = parse_args()
608
+ harness = SidecarHarness(args)
609
+ try:
610
+ harness.start()
611
+ result = run_mode(args, harness)
612
+ finally:
613
+ harness.stop()
614
+ payload = {
615
+ "ok": result.ok,
616
+ "mode": result.name,
617
+ "metrics": result.metrics,
618
+ "failures": result.failures,
619
+ }
620
+ if args.json:
621
+ print(json.dumps(payload, indent=2))
622
+ else:
623
+ print(json.dumps(payload, indent=2))
624
+ return 0 if result.ok else 1
625
+
626
+
627
+ if __name__ == "__main__":
628
+ raise SystemExit(main())
@@ -32,8 +32,18 @@ PROOF_REPORT_FILE="${PROOF_REPORT_DIR}/release-gate-proof.json"
32
32
  PROOF_LEGACY_ENDPOINT="${OCMEMOG_RELEASE_LEGACY_ENDPOINT:-}"
33
33
  LIVE_STATE_DIR="$(mktemp -d -t ocmemog-release-live-XXXXXX)"
34
34
  DOCTOR_STATE_DIR="$(mktemp -d -t ocmemog-release-doctor-XXXXXX)"
35
+ SMOKE_STATE_DIR="$(mktemp -d -t ocmemog-release-smoke-XXXXXX)"
36
+ SMOKE_LOG_FILE="${SMOKE_STATE_DIR}/sidecar-smoke.log"
37
+ SMOKE_SIDECAR_PID=""
35
38
  mkdir -p "$PROOF_REPORT_DIR"
36
- trap 'rm -rf "$DOCTOR_STATE_DIR" "$LIVE_STATE_DIR"' EXIT
39
+ cleanup_release_check() {
40
+ if [[ -n "${SMOKE_SIDECAR_PID:-}" ]]; then
41
+ kill "${SMOKE_SIDECAR_PID}" >/dev/null 2>&1 || true
42
+ wait "${SMOKE_SIDECAR_PID}" >/dev/null 2>&1 || true
43
+ fi
44
+ rm -rf "$DOCTOR_STATE_DIR" "$LIVE_STATE_DIR" "$SMOKE_STATE_DIR"
45
+ }
46
+ trap cleanup_release_check EXIT
37
47
 
38
48
  STATUS=0
39
49
 
@@ -64,6 +74,24 @@ run_optional_step() {
64
74
  fi
65
75
  }
66
76
 
77
+ start_local_smoke_sidecar() {
78
+ local smoke_port="${OCMEMOG_RELEASE_SMOKE_PORT:-17931}"
79
+ local smoke_host="127.0.0.1"
80
+ export OCMEMOG_STATE_DIR="$SMOKE_STATE_DIR"
81
+ export OCMEMOG_TRANSCRIPT_WATCHER="false"
82
+ export OCMEMOG_INGEST_ASYNC_WORKER="true"
83
+ export OCMEMOG_AUTO_HYDRATION="false"
84
+ export OCMEMOG_SEARCH_SKIP_EMBEDDING_PROVIDER="true"
85
+ export OCMEMOG_HOST="$smoke_host"
86
+ export OCMEMOG_PORT="$smoke_port"
87
+ export PYTHONPATH="$ROOT_DIR${PYTHONPATH:+:$PYTHONPATH}"
88
+
89
+ "$PYTHON_BIN" -m uvicorn ocmemog.sidecar.app:app --host "$smoke_host" --port "$smoke_port" >"$SMOKE_LOG_FILE" 2>&1 &
90
+ SMOKE_SIDECAR_PID=$!
91
+ LIVE_CHECK_URL="http://${smoke_host}:${smoke_port}"
92
+ export LIVE_CHECK_URL
93
+ }
94
+
67
95
  run_step "Verifying shell script syntax" \
68
96
  bash -n scripts/install-ocmemog.sh \
69
97
  && bash -n scripts/ocmemog-install.sh
@@ -138,6 +166,12 @@ run_step "Running broad regression subset" \
138
166
  run_step "Running contract-facing sidecar route tests" \
139
167
  "$PYTHON_BIN" -m pytest -q tests/test_sidecar_routes.py
140
168
 
169
+ if [[ -z "${OCMEMOG_RELEASE_LIVE_ENDPOINT:-}" ]]; then
170
+ echo
171
+ echo "[ocmemog-release-check] No explicit live endpoint provided; starting temporary local sidecar for smoke checks"
172
+ start_local_smoke_sidecar
173
+ fi
174
+
141
175
  LIVE_CHECK_ENDPOINT="$LIVE_CHECK_URL"
142
176
  export LIVE_CHECK_ENDPOINT
143
177
  run_step "Running live /healthz, /memory/ingest and /memory/search smoke checks" \