@simbimbo/memory-ocmemog 0.1.11 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/README.md +83 -18
- package/brain/runtime/__init__.py +2 -12
- package/brain/runtime/config.py +1 -24
- package/brain/runtime/inference.py +1 -151
- package/brain/runtime/instrumentation.py +1 -15
- package/brain/runtime/memory/__init__.py +3 -13
- package/brain/runtime/memory/api.py +1 -1219
- package/brain/runtime/memory/candidate.py +1 -185
- package/brain/runtime/memory/conversation_state.py +1 -1823
- package/brain/runtime/memory/distill.py +1 -344
- package/brain/runtime/memory/embedding_engine.py +1 -92
- package/brain/runtime/memory/freshness.py +1 -112
- package/brain/runtime/memory/health.py +1 -40
- package/brain/runtime/memory/integrity.py +1 -186
- package/brain/runtime/memory/memory_consolidation.py +1 -58
- package/brain/runtime/memory/memory_links.py +1 -107
- package/brain/runtime/memory/memory_salience.py +1 -233
- package/brain/runtime/memory/memory_synthesis.py +1 -31
- package/brain/runtime/memory/memory_taxonomy.py +1 -33
- package/brain/runtime/memory/pondering_engine.py +1 -654
- package/brain/runtime/memory/promote.py +1 -277
- package/brain/runtime/memory/provenance.py +1 -406
- package/brain/runtime/memory/reinforcement.py +1 -71
- package/brain/runtime/memory/retrieval.py +1 -210
- package/brain/runtime/memory/semantic_search.py +1 -64
- package/brain/runtime/memory/store.py +1 -429
- package/brain/runtime/memory/unresolved_state.py +1 -91
- package/brain/runtime/memory/vector_index.py +1 -323
- package/brain/runtime/model_roles.py +1 -9
- package/brain/runtime/model_router.py +1 -22
- package/brain/runtime/providers.py +1 -66
- package/brain/runtime/security/redaction.py +1 -12
- package/brain/runtime/state_store.py +1 -23
- package/brain/runtime/storage_paths.py +1 -39
- package/docs/architecture/memory.md +20 -24
- package/docs/release-checklist.md +19 -6
- package/docs/usage.md +33 -17
- package/index.ts +8 -1
- package/ocmemog/__init__.py +11 -0
- package/ocmemog/doctor.py +1255 -0
- package/ocmemog/runtime/__init__.py +18 -0
- package/ocmemog/runtime/_compat_bridge.py +28 -0
- package/ocmemog/runtime/config.py +35 -0
- package/ocmemog/runtime/identity.py +115 -0
- package/ocmemog/runtime/inference.py +164 -0
- package/ocmemog/runtime/instrumentation.py +20 -0
- package/ocmemog/runtime/memory/__init__.py +91 -0
- package/ocmemog/runtime/memory/api.py +1431 -0
- package/ocmemog/runtime/memory/candidate.py +192 -0
- package/ocmemog/runtime/memory/conversation_state.py +1831 -0
- package/ocmemog/runtime/memory/distill.py +282 -0
- package/ocmemog/runtime/memory/embedding_engine.py +151 -0
- package/ocmemog/runtime/memory/freshness.py +114 -0
- package/ocmemog/runtime/memory/health.py +57 -0
- package/ocmemog/runtime/memory/integrity.py +208 -0
- package/ocmemog/runtime/memory/memory_consolidation.py +60 -0
- package/ocmemog/runtime/memory/memory_links.py +109 -0
- package/ocmemog/runtime/memory/memory_salience.py +235 -0
- package/ocmemog/runtime/memory/memory_synthesis.py +33 -0
- package/ocmemog/runtime/memory/memory_taxonomy.py +35 -0
- package/ocmemog/runtime/memory/pondering_engine.py +681 -0
- package/ocmemog/runtime/memory/promote.py +279 -0
- package/ocmemog/runtime/memory/provenance.py +408 -0
- package/ocmemog/runtime/memory/reinforcement.py +73 -0
- package/ocmemog/runtime/memory/retrieval.py +224 -0
- package/ocmemog/runtime/memory/semantic_search.py +66 -0
- package/ocmemog/runtime/memory/store.py +433 -0
- package/ocmemog/runtime/memory/unresolved_state.py +93 -0
- package/ocmemog/runtime/memory/vector_index.py +411 -0
- package/ocmemog/runtime/model_roles.py +16 -0
- package/ocmemog/runtime/model_router.py +29 -0
- package/ocmemog/runtime/providers.py +79 -0
- package/ocmemog/runtime/roles.py +92 -0
- package/ocmemog/runtime/security/__init__.py +8 -0
- package/ocmemog/runtime/security/redaction.py +17 -0
- package/ocmemog/runtime/state_store.py +34 -0
- package/ocmemog/runtime/storage_paths.py +70 -0
- package/ocmemog/sidecar/app.py +310 -23
- package/ocmemog/sidecar/compat.py +50 -13
- package/ocmemog/sidecar/transcript_watcher.py +318 -240
- package/openclaw.plugin.json +4 -0
- package/package.json +1 -1
- package/scripts/ocmemog-backfill-vectors.py +5 -3
- package/scripts/ocmemog-continuity-benchmark.py +1 -1
- package/scripts/ocmemog-demo.py +1 -1
- package/scripts/ocmemog-doctor.py +15 -0
- package/scripts/ocmemog-install.sh +29 -7
- package/scripts/ocmemog-integrated-proof.py +373 -0
- package/scripts/ocmemog-reindex-vectors.py +5 -3
- package/scripts/ocmemog-release-check.sh +330 -0
- package/scripts/ocmemog-sidecar.sh +4 -2
- package/scripts/ocmemog-test-rig.py +5 -3
- package/brain/runtime/memory/artifacts.py +0 -33
- package/brain/runtime/memory/context_builder.py +0 -112
- package/brain/runtime/memory/interaction_memory.py +0 -57
- package/brain/runtime/memory/memory_gate.py +0 -38
- package/brain/runtime/memory/memory_graph.py +0 -54
- package/brain/runtime/memory/person_identity.py +0 -83
- package/brain/runtime/memory/person_memory.py +0 -138
- package/brain/runtime/memory/sentiment_memory.py +0 -67
- package/brain/runtime/memory/tool_catalog.py +0 -68
|
@@ -0,0 +1,1255 @@
|
|
|
1
|
+
"""Operator-facing diagnostics command for ocmemog runtime and sidecar state."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import importlib
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from collections.abc import Iterable
|
|
11
|
+
from dataclasses import dataclass, asdict
|
|
12
|
+
from typing import Any, Callable
|
|
13
|
+
from urllib.request import Request, urlopen
|
|
14
|
+
from urllib.error import HTTPError
|
|
15
|
+
import contextlib
|
|
16
|
+
|
|
17
|
+
from ocmemog.runtime import state_store
|
|
18
|
+
from ocmemog.runtime.memory import embedding_engine, health, store
|
|
19
|
+
from ocmemog.sidecar import compat as sidecar_compat
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True)
|
|
23
|
+
class FixResult:
|
|
24
|
+
action: str
|
|
25
|
+
check_key: str
|
|
26
|
+
message: str
|
|
27
|
+
changed: int
|
|
28
|
+
ok: bool
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass(frozen=True)
|
|
32
|
+
class CheckResult:
|
|
33
|
+
key: str
|
|
34
|
+
label: str
|
|
35
|
+
status: str
|
|
36
|
+
message: str
|
|
37
|
+
details: dict[str, Any]
|
|
38
|
+
fixable: bool = False
|
|
39
|
+
fixed: bool = False
|
|
40
|
+
fix_action: str | None = None
|
|
41
|
+
fix_details: dict[str, Any] | None = None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass(frozen=True)
|
|
45
|
+
class DoctorCheck:
|
|
46
|
+
key: str
|
|
47
|
+
label: str
|
|
48
|
+
check: Callable[[None], CheckResult]
|
|
49
|
+
fix_key: str | None = None
|
|
50
|
+
fix: Callable[[None], FixResult] | None = None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
_STATUS_PRECEDENCE = {"fail": 2, "warn": 1, "ok": 0}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
_ENV_TOGGLE_KEYS = (
|
|
57
|
+
"OCMEMOG_TRANSCRIPT_WATCHER",
|
|
58
|
+
"OCMEMOG_AUTO_HYDRATION",
|
|
59
|
+
"OCMEMOG_INGEST_ASYNC_WORKER",
|
|
60
|
+
"OCMEMOG_SHUTDOWN_DRAIN_QUEUE",
|
|
61
|
+
"OCMEMOG_SHUTDOWN_TIMING",
|
|
62
|
+
"OCMEMOG_SHUTDOWN_DUMP_THREADS",
|
|
63
|
+
"OCMEMOG_USE_OLLAMA",
|
|
64
|
+
"OCMEMOG_REINFORCE_SENTIMENT",
|
|
65
|
+
)
|
|
66
|
+
_SCHEMA_VERSION_NON_STANDARD_TABLES = {"artifacts", "vector_embeddings"}
|
|
67
|
+
_HTTP_TIMEOUT_SECONDS = 2.0
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _queue_backlog_severity(depth: int) -> str:
|
|
71
|
+
if depth <= 0:
|
|
72
|
+
return "none"
|
|
73
|
+
if depth <= 25:
|
|
74
|
+
return "low"
|
|
75
|
+
if depth <= 250:
|
|
76
|
+
return "medium"
|
|
77
|
+
if depth <= 1000:
|
|
78
|
+
return "high"
|
|
79
|
+
return "critical"
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _vector_backlog_severity(missing: int) -> str:
|
|
83
|
+
if missing <= 0:
|
|
84
|
+
return "none"
|
|
85
|
+
if missing <= 200:
|
|
86
|
+
return "low"
|
|
87
|
+
if missing <= 2000:
|
|
88
|
+
return "medium"
|
|
89
|
+
if missing <= 10000:
|
|
90
|
+
return "high"
|
|
91
|
+
return "critical"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _parse_float_env(name: str, default: float, *, minimum: float | None = None) -> tuple[float, str | None]:
|
|
95
|
+
raw = os.environ.get(name)
|
|
96
|
+
if raw is None or raw == "":
|
|
97
|
+
return default, None
|
|
98
|
+
try:
|
|
99
|
+
value = float(raw)
|
|
100
|
+
except Exception:
|
|
101
|
+
return default, f"{name} must be numeric"
|
|
102
|
+
if minimum is not None and value < minimum:
|
|
103
|
+
return default, f"{name} must be >= {minimum}"
|
|
104
|
+
return value, None
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _parse_int_env(name: str, default: int, *, minimum: int | None = None) -> tuple[int, str | None]:
|
|
108
|
+
raw = os.environ.get(name)
|
|
109
|
+
if raw is None or raw == "":
|
|
110
|
+
return default, None
|
|
111
|
+
try:
|
|
112
|
+
value = int(raw)
|
|
113
|
+
except Exception:
|
|
114
|
+
return default, f"{name} must be integer"
|
|
115
|
+
if minimum is not None and value < minimum:
|
|
116
|
+
return default, f"{name} must be >= {minimum}"
|
|
117
|
+
return value, None
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _parse_bool_env(name: str, default: bool = False) -> tuple[bool, str | None]:
|
|
121
|
+
raw = os.environ.get(name)
|
|
122
|
+
if raw is None or raw == "":
|
|
123
|
+
return default, None
|
|
124
|
+
lowered = raw.strip().lower()
|
|
125
|
+
if lowered in {"1", "true", "yes", "on", "y", "t"}:
|
|
126
|
+
return True, None
|
|
127
|
+
if lowered in {"0", "false", "no", "off", "n", "f"}:
|
|
128
|
+
return False, None
|
|
129
|
+
return default, f"{name} must be a boolean value"
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _queue_status_to_icon(status: str) -> str:
|
|
133
|
+
if status == "fail":
|
|
134
|
+
return "FAIL"
|
|
135
|
+
if status == "warn":
|
|
136
|
+
return "WARN"
|
|
137
|
+
return "PASS"
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _normalize_fixes(raw: Iterable[str] | None) -> list[str]:
|
|
141
|
+
actions: list[str] = []
|
|
142
|
+
if not raw:
|
|
143
|
+
return actions
|
|
144
|
+
for item in raw:
|
|
145
|
+
if not item:
|
|
146
|
+
continue
|
|
147
|
+
for part in item.split(","):
|
|
148
|
+
part = part.strip()
|
|
149
|
+
if part:
|
|
150
|
+
actions.append(part)
|
|
151
|
+
return sorted(dict.fromkeys(actions).keys())
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@contextlib.contextmanager
|
|
155
|
+
def _scoped_state_dir(state_dir: str | None):
|
|
156
|
+
if not state_dir:
|
|
157
|
+
yield
|
|
158
|
+
return
|
|
159
|
+
previous = os.environ.get("OCMEMOG_STATE_DIR")
|
|
160
|
+
os.environ["OCMEMOG_STATE_DIR"] = state_dir
|
|
161
|
+
try:
|
|
162
|
+
yield
|
|
163
|
+
finally:
|
|
164
|
+
if previous is None:
|
|
165
|
+
os.environ.pop("OCMEMOG_STATE_DIR", None)
|
|
166
|
+
else:
|
|
167
|
+
os.environ["OCMEMOG_STATE_DIR"] = previous
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _run_imports(_: None) -> CheckResult:
|
|
171
|
+
required_modules = (
|
|
172
|
+
"ocmemog.runtime",
|
|
173
|
+
"ocmemog.runtime.config",
|
|
174
|
+
"ocmemog.runtime.memory",
|
|
175
|
+
"ocmemog.runtime.memory.store",
|
|
176
|
+
"ocmemog.runtime.memory.health",
|
|
177
|
+
"ocmemog.runtime.memory.integrity",
|
|
178
|
+
"ocmemog.runtime.memory.vector_index",
|
|
179
|
+
"ocmemog.runtime.inference",
|
|
180
|
+
"ocmemog.runtime.providers",
|
|
181
|
+
"ocmemog.runtime.memory.embedding_engine",
|
|
182
|
+
"ocmemog.sidecar.compat",
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
errors: list[str] = []
|
|
186
|
+
for module_name in required_modules:
|
|
187
|
+
try:
|
|
188
|
+
importlib.import_module(module_name)
|
|
189
|
+
except Exception as exc:
|
|
190
|
+
errors.append(f"{module_name}: {exc}")
|
|
191
|
+
|
|
192
|
+
if errors:
|
|
193
|
+
return CheckResult(
|
|
194
|
+
key="runtime/imports",
|
|
195
|
+
label="runtime module imports",
|
|
196
|
+
status="fail",
|
|
197
|
+
message="Some required modules failed to import.",
|
|
198
|
+
details={
|
|
199
|
+
"tested": list(required_modules),
|
|
200
|
+
"errors": errors,
|
|
201
|
+
},
|
|
202
|
+
)
|
|
203
|
+
return CheckResult(
|
|
204
|
+
key="runtime/imports",
|
|
205
|
+
label="runtime module imports",
|
|
206
|
+
status="ok",
|
|
207
|
+
message="All runtime modules imported.",
|
|
208
|
+
details={"tested": list(required_modules)},
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _run_state_paths(_: None) -> CheckResult:
|
|
213
|
+
targets = [state_store.root_dir(), state_store.data_dir(), state_store.memory_dir(), state_store.reports_dir()]
|
|
214
|
+
failed: list[str] = []
|
|
215
|
+
tested: list[str] = []
|
|
216
|
+
for target in targets:
|
|
217
|
+
tested.append(str(target))
|
|
218
|
+
try:
|
|
219
|
+
target.mkdir(parents=True, exist_ok=True)
|
|
220
|
+
probe = target / ".ocmemog_doctor_probe"
|
|
221
|
+
probe.write_text("ok", encoding="utf-8")
|
|
222
|
+
probe.unlink()
|
|
223
|
+
except Exception as exc:
|
|
224
|
+
failed.append(f"{target}: {exc}")
|
|
225
|
+
|
|
226
|
+
if failed:
|
|
227
|
+
return CheckResult(
|
|
228
|
+
key="state/path-writable",
|
|
229
|
+
label="state path writability",
|
|
230
|
+
status="fail",
|
|
231
|
+
message="State directories are not fully writable.",
|
|
232
|
+
details={"tested": tested, "failed": failed},
|
|
233
|
+
fixable=True,
|
|
234
|
+
fix_action="create-missing-paths",
|
|
235
|
+
)
|
|
236
|
+
return CheckResult(
|
|
237
|
+
key="state/path-writable",
|
|
238
|
+
label="state path writability",
|
|
239
|
+
status="ok",
|
|
240
|
+
message="State directories exist and are writable.",
|
|
241
|
+
details={"tested": tested},
|
|
242
|
+
fixable=True,
|
|
243
|
+
fix_action="create-missing-paths",
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _run_sqlite_schema(_: None) -> CheckResult:
|
|
248
|
+
required = {
|
|
249
|
+
"memory_events",
|
|
250
|
+
"environment_cognition",
|
|
251
|
+
"experiences",
|
|
252
|
+
"directives",
|
|
253
|
+
"candidates",
|
|
254
|
+
"promotions",
|
|
255
|
+
"demotions",
|
|
256
|
+
"cold_storage",
|
|
257
|
+
"memory_index",
|
|
258
|
+
"vector_embeddings",
|
|
259
|
+
"artifacts",
|
|
260
|
+
"knowledge",
|
|
261
|
+
"preferences",
|
|
262
|
+
"identity",
|
|
263
|
+
"runbooks",
|
|
264
|
+
"lessons",
|
|
265
|
+
"reflections",
|
|
266
|
+
"tasks",
|
|
267
|
+
"conversation_turns",
|
|
268
|
+
"conversation_checkpoints",
|
|
269
|
+
"conversation_state",
|
|
270
|
+
} | set(store.MEMORY_TABLES)
|
|
271
|
+
|
|
272
|
+
counts: dict[str, int] = {table: 0 for table in required}
|
|
273
|
+
version_map: dict[str, dict[str, int]] = {}
|
|
274
|
+
version_issues: list[str] = []
|
|
275
|
+
try:
|
|
276
|
+
store.init_db()
|
|
277
|
+
conn = store.connect()
|
|
278
|
+
try:
|
|
279
|
+
tables = {row[0] for row in conn.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall()}
|
|
280
|
+
missing = sorted(required - tables)
|
|
281
|
+
quick = str(conn.execute("PRAGMA quick_check(1)").fetchone()[0] or "unknown")
|
|
282
|
+
for table in sorted(required):
|
|
283
|
+
if table in missing:
|
|
284
|
+
continue
|
|
285
|
+
try:
|
|
286
|
+
counts[table] = int(conn.execute(f"SELECT COUNT(*) FROM {table}").fetchone()[0] or 0)
|
|
287
|
+
except Exception as exc:
|
|
288
|
+
version_issues.append(f"{table} row count query failed: {exc}")
|
|
289
|
+
|
|
290
|
+
try:
|
|
291
|
+
columns = {row[1] for row in conn.execute(f"PRAGMA table_info({table})").fetchall()}
|
|
292
|
+
if "schema_version" not in columns:
|
|
293
|
+
if table not in _SCHEMA_VERSION_NON_STANDARD_TABLES:
|
|
294
|
+
version_issues.append(f"{table} missing schema_version column")
|
|
295
|
+
continue
|
|
296
|
+
rows = conn.execute(
|
|
297
|
+
f"SELECT COALESCE(schema_version, '<null>') AS schema_version, COUNT(*) AS count "
|
|
298
|
+
f"FROM {table} GROUP BY COALESCE(schema_version, '<null>')"
|
|
299
|
+
).fetchall()
|
|
300
|
+
version_map[table] = {str(item[0]): int(item[1]) for item in rows}
|
|
301
|
+
except Exception as exc:
|
|
302
|
+
version_issues.append(f"{table} schema query failed: {exc}")
|
|
303
|
+
finally:
|
|
304
|
+
conn.close()
|
|
305
|
+
except Exception as exc:
|
|
306
|
+
return CheckResult(
|
|
307
|
+
key="sqlite/schema-access",
|
|
308
|
+
label="sqlite and schema",
|
|
309
|
+
status="fail",
|
|
310
|
+
message=f"SQLite schema check failed: {exc}",
|
|
311
|
+
details={"error": str(exc)},
|
|
312
|
+
)
|
|
313
|
+
if not missing:
|
|
314
|
+
details = {
|
|
315
|
+
"required_tables": sorted(required),
|
|
316
|
+
"missing_tables": [],
|
|
317
|
+
"sqlite_quick_check": quick,
|
|
318
|
+
"row_counts": {key: counts[key] for key in sorted(counts)},
|
|
319
|
+
"schema_version_expected": store.SCHEMA_VERSION,
|
|
320
|
+
"schema_versions": version_map,
|
|
321
|
+
"schema_version_issues": version_issues,
|
|
322
|
+
}
|
|
323
|
+
else:
|
|
324
|
+
details = {
|
|
325
|
+
"required_tables": sorted(required),
|
|
326
|
+
"missing_tables": missing,
|
|
327
|
+
"sqlite_quick_check": quick,
|
|
328
|
+
"row_counts": {key: counts[key] for key in sorted(counts)},
|
|
329
|
+
"schema_version_expected": store.SCHEMA_VERSION,
|
|
330
|
+
"schema_versions": version_map,
|
|
331
|
+
"schema_version_issues": version_issues,
|
|
332
|
+
}
|
|
333
|
+
if version_issues:
|
|
334
|
+
details["schema_version_issues"] = version_issues
|
|
335
|
+
for table, versions in version_map.items():
|
|
336
|
+
unexpected = [item for item in versions if item != store.SCHEMA_VERSION]
|
|
337
|
+
if unexpected and table not in ("memory_events", "environment_cognition"):
|
|
338
|
+
version_issues.extend([f"{table} has unexpected schema_version value(s): {', '.join(sorted(unexpected))}"])
|
|
339
|
+
|
|
340
|
+
if missing:
|
|
341
|
+
return CheckResult(
|
|
342
|
+
key="sqlite/schema-access",
|
|
343
|
+
label="sqlite and schema",
|
|
344
|
+
status="fail",
|
|
345
|
+
message="One or more expected schema tables are missing.",
|
|
346
|
+
details=details,
|
|
347
|
+
)
|
|
348
|
+
if quick.lower() != "ok":
|
|
349
|
+
return CheckResult(
|
|
350
|
+
key="sqlite/schema-access",
|
|
351
|
+
label="sqlite and schema",
|
|
352
|
+
status="fail",
|
|
353
|
+
message="SQLite quick check failed.",
|
|
354
|
+
details=details,
|
|
355
|
+
)
|
|
356
|
+
if version_issues:
|
|
357
|
+
return CheckResult(
|
|
358
|
+
key="sqlite/schema-access",
|
|
359
|
+
label="sqlite and schema",
|
|
360
|
+
status="warn",
|
|
361
|
+
message="Schema metadata includes unexpected versions or schema column issues.",
|
|
362
|
+
details=details,
|
|
363
|
+
)
|
|
364
|
+
return CheckResult(
|
|
365
|
+
key="sqlite/schema-access",
|
|
366
|
+
label="sqlite and schema",
|
|
367
|
+
status="ok",
|
|
368
|
+
message="SQLite schema and DB open state are healthy.",
|
|
369
|
+
details=details,
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def _import_sidecar_app():
|
|
374
|
+
return importlib.import_module("ocmemog.sidecar.app")
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def _run_queue_health(_: None) -> CheckResult:
|
|
378
|
+
try:
|
|
379
|
+
app = _import_sidecar_app()
|
|
380
|
+
except Exception as exc:
|
|
381
|
+
return CheckResult(
|
|
382
|
+
key="queue/health",
|
|
383
|
+
label="queue health",
|
|
384
|
+
status="fail",
|
|
385
|
+
message=f"Failed to import sidecar app for queue checks: {exc}",
|
|
386
|
+
details={"error": str(exc)},
|
|
387
|
+
fixable=True,
|
|
388
|
+
fix_action="repair-queue",
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
try:
|
|
392
|
+
queue_path = app._queue_path()
|
|
393
|
+
depth = app._queue_depth()
|
|
394
|
+
stats = dict(app.QUEUE_STATS)
|
|
395
|
+
queue_size = queue_path.stat().st_size
|
|
396
|
+
worker_enabled = app._parse_bool_env("OCMEMOG_INGEST_ASYNC_WORKER", default=True)
|
|
397
|
+
worker_poll_seconds = None
|
|
398
|
+
worker_batch_max = None
|
|
399
|
+
queue_config: list[str] = []
|
|
400
|
+
try:
|
|
401
|
+
worker_poll_seconds = float(os.environ.get("OCMEMOG_INGEST_ASYNC_POLL_SECONDS", "5"))
|
|
402
|
+
if worker_poll_seconds < 0:
|
|
403
|
+
queue_config.append("OCMEMOG_INGEST_ASYNC_POLL_SECONDS must be >= 0")
|
|
404
|
+
except Exception:
|
|
405
|
+
queue_config.append("OCMEMOG_INGEST_ASYNC_POLL_SECONDS")
|
|
406
|
+
try:
|
|
407
|
+
worker_batch_max = int(os.environ.get("OCMEMOG_INGEST_ASYNC_BATCH_MAX", "25"))
|
|
408
|
+
if worker_batch_max < 1:
|
|
409
|
+
queue_config.append("OCMEMOG_INGEST_ASYNC_BATCH_MAX must be >= 1")
|
|
410
|
+
except Exception:
|
|
411
|
+
queue_config.append("OCMEMOG_INGEST_ASYNC_BATCH_MAX")
|
|
412
|
+
|
|
413
|
+
invalid = 0
|
|
414
|
+
total = 0
|
|
415
|
+
invalid_samples: list[dict[str, Any]] = []
|
|
416
|
+
for raw_line in queue_path.read_text(encoding="utf-8").splitlines():
|
|
417
|
+
line = raw_line.strip()
|
|
418
|
+
if not line:
|
|
419
|
+
continue
|
|
420
|
+
total += 1
|
|
421
|
+
try:
|
|
422
|
+
json.loads(line)
|
|
423
|
+
except Exception:
|
|
424
|
+
invalid += 1
|
|
425
|
+
if len(invalid_samples) < 3:
|
|
426
|
+
invalid_samples.append({"line_no": total, "line": line[:160]})
|
|
427
|
+
|
|
428
|
+
status = "ok"
|
|
429
|
+
messages: list[str] = []
|
|
430
|
+
if invalid:
|
|
431
|
+
status = "warn"
|
|
432
|
+
messages.append(f"Queue has {invalid} invalid line(s).")
|
|
433
|
+
if depth > 25:
|
|
434
|
+
status = "warn"
|
|
435
|
+
messages.append(f"Queue backlog is elevated ({depth}).")
|
|
436
|
+
backlog_severity = _queue_backlog_severity(depth)
|
|
437
|
+
if invalid or backlog_severity in {"medium", "high", "critical"}:
|
|
438
|
+
status = "warn"
|
|
439
|
+
if queue_config:
|
|
440
|
+
status = "warn"
|
|
441
|
+
messages.append("Queue config has invalid values: " + ", ".join(sorted(set(queue_config))))
|
|
442
|
+
if depth > 0 and not worker_enabled and not queue_config:
|
|
443
|
+
status = "warn"
|
|
444
|
+
messages.append("Ingest worker is disabled but queue has pending entries.")
|
|
445
|
+
if depth > 0 and worker_enabled and app._INGEST_WORKER_THREAD is not None and not app._INGEST_WORKER_THREAD.is_alive():
|
|
446
|
+
status = "warn"
|
|
447
|
+
messages.append("Ingest worker thread exists but is not currently alive.")
|
|
448
|
+
hints: list[str] = []
|
|
449
|
+
if invalid > 0:
|
|
450
|
+
hints.append("Run --fix repair-queue to drop invalid queue entries.")
|
|
451
|
+
if depth > 0 and not worker_enabled:
|
|
452
|
+
hints.append("Enable OCMEMOG_INGEST_ASYNC_WORKER or flush with POST /memory/ingest_flush.")
|
|
453
|
+
if depth > 1000:
|
|
454
|
+
hints.append("Queue depth is very high; inspect upstream ingest failures and sidecar reachability.")
|
|
455
|
+
worker_config_issues = queue_config
|
|
456
|
+
if not worker_config_issues:
|
|
457
|
+
if worker_batch_max and worker_batch_max > 40:
|
|
458
|
+
hints.append("Ingest batch size is large; reduce OCMEMOG_INGEST_ASYNC_BATCH_MAX if queue consumers lag.")
|
|
459
|
+
message = "; ".join(messages) if messages else "Queue state is healthy."
|
|
460
|
+
if backlog_severity in {"medium", "high", "critical"} and "Queue state is healthy." in message:
|
|
461
|
+
message = f"Queue backlog severity is {backlog_severity} ({depth})."
|
|
462
|
+
except Exception as exc:
|
|
463
|
+
return CheckResult(
|
|
464
|
+
key="queue/health",
|
|
465
|
+
label="queue health",
|
|
466
|
+
status="fail",
|
|
467
|
+
message=f"Queue health check failed: {exc}",
|
|
468
|
+
details={"error": str(exc)},
|
|
469
|
+
fixable=True,
|
|
470
|
+
fix_action="repair-queue",
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
return CheckResult(
|
|
474
|
+
key="queue/health",
|
|
475
|
+
label="queue health",
|
|
476
|
+
status=status,
|
|
477
|
+
message=message,
|
|
478
|
+
details={
|
|
479
|
+
"queue_depth": depth,
|
|
480
|
+
"queue_path": str(queue_path),
|
|
481
|
+
"invalid_lines": invalid,
|
|
482
|
+
"lines_seen": total,
|
|
483
|
+
"stats": stats,
|
|
484
|
+
"queue_bytes": queue_size,
|
|
485
|
+
"queue_worker_enabled": worker_enabled,
|
|
486
|
+
"queue_worker_poll_seconds": worker_poll_seconds,
|
|
487
|
+
"queue_worker_batch_max": worker_batch_max,
|
|
488
|
+
"queue_config_issues": queue_config,
|
|
489
|
+
"invalid_payload_samples": invalid_samples,
|
|
490
|
+
"ingest_worker_running": bool(app._INGEST_WORKER_THREAD and app._INGEST_WORKER_THREAD.is_alive()),
|
|
491
|
+
"queue_backlog_severity": backlog_severity,
|
|
492
|
+
"queue_hints": hints,
|
|
493
|
+
},
|
|
494
|
+
fixable=True,
|
|
495
|
+
fix_action="repair-queue",
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
def _run_transcript_watcher_sanity(_: None) -> CheckResult:
|
|
500
|
+
try:
|
|
501
|
+
app = _import_sidecar_app()
|
|
502
|
+
except Exception as exc:
|
|
503
|
+
return CheckResult(
|
|
504
|
+
key="sidecar/transcript-watcher",
|
|
505
|
+
label="sidecar transcript watcher",
|
|
506
|
+
status="fail",
|
|
507
|
+
message=f"Failed to import sidecar app for transcript watcher checks: {exc}",
|
|
508
|
+
details={"error": str(exc)},
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
enabled, valid_toggle = app._parse_bool_env_value(os.environ.get("OCMEMOG_TRANSCRIPT_WATCHER"), default=False)
|
|
512
|
+
issues: list[str] = []
|
|
513
|
+
hints: list[str] = []
|
|
514
|
+
config: dict[str, Any] = {
|
|
515
|
+
"enabled": enabled,
|
|
516
|
+
"watcher_thread_running": bool(app._WATCHER_THREAD and app._WATCHER_THREAD.is_alive()),
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
if not valid_toggle:
|
|
520
|
+
config["watcher_toggle_parse_valid"] = False
|
|
521
|
+
return CheckResult(
|
|
522
|
+
key="sidecar/transcript-watcher",
|
|
523
|
+
label="sidecar transcript watcher",
|
|
524
|
+
status="warn",
|
|
525
|
+
message="Transcript watcher env toggle is not valid boolean syntax.",
|
|
526
|
+
details={"config": config, "issues": ["OCMEMOG_TRANSCRIPT_WATCHER must be a boolean value"], "hints": []},
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
if not enabled:
|
|
530
|
+
return CheckResult(
|
|
531
|
+
key="sidecar/transcript-watcher",
|
|
532
|
+
label="sidecar transcript watcher",
|
|
533
|
+
status="ok",
|
|
534
|
+
message="Transcript watcher is disabled.",
|
|
535
|
+
details={"enabled": False, "issues": [], "hints": [], "config": config},
|
|
536
|
+
)
|
|
537
|
+
if enabled:
|
|
538
|
+
transcript_path = os.environ.get("OCMEMOG_TRANSCRIPT_PATH", "").strip()
|
|
539
|
+
transcript_dir = os.environ.get("OCMEMOG_TRANSCRIPT_DIR", "").strip()
|
|
540
|
+
session_dir = os.environ.get("OCMEMOG_SESSION_DIR", "").strip()
|
|
541
|
+
config.update(
|
|
542
|
+
{
|
|
543
|
+
"transcript_path": transcript_path or None,
|
|
544
|
+
"transcript_dir": transcript_dir or None,
|
|
545
|
+
"session_dir": session_dir or None,
|
|
546
|
+
"transcript_glob": os.environ.get("OCMEMOG_TRANSCRIPT_GLOB", "*.log"),
|
|
547
|
+
"session_glob": os.environ.get("OCMEMOG_SESSION_GLOB", "*.jsonl"),
|
|
548
|
+
"batch_seconds": os.environ.get("OCMEMOG_INGEST_BATCH_SECONDS", "30"),
|
|
549
|
+
"batch_max": os.environ.get("OCMEMOG_INGEST_BATCH_MAX", "25"),
|
|
550
|
+
"poll_seconds": os.environ.get("OCMEMOG_TRANSCRIPT_POLL_SECONDS", "30"),
|
|
551
|
+
"start_at_end": os.environ.get("OCMEMOG_TRANSCRIPT_START_AT_END", "true"),
|
|
552
|
+
"watcher_toggle_parse_valid": True,
|
|
553
|
+
}
|
|
554
|
+
)
|
|
555
|
+
poll_seconds, issue = _parse_float_env("OCMEMOG_TRANSCRIPT_POLL_SECONDS", 30.0, minimum=1)
|
|
556
|
+
if issue:
|
|
557
|
+
issues.append(issue)
|
|
558
|
+
hints.append("Set OCMEMOG_TRANSCRIPT_POLL_SECONDS to a positive number.")
|
|
559
|
+
batch_seconds, issue = _parse_float_env("OCMEMOG_INGEST_BATCH_SECONDS", 30.0, minimum=1)
|
|
560
|
+
if issue:
|
|
561
|
+
issues.append(issue)
|
|
562
|
+
hints.append("Set OCMEMOG_INGEST_BATCH_SECONDS to a positive number.")
|
|
563
|
+
batch_max, issue = _parse_int_env("OCMEMOG_INGEST_BATCH_MAX", 25, minimum=1)
|
|
564
|
+
if issue:
|
|
565
|
+
issues.append(issue)
|
|
566
|
+
hints.append("Set OCMEMOG_INGEST_BATCH_MAX to an integer >= 1.")
|
|
567
|
+
reinforce_enabled, issue = _parse_bool_env("OCMEMOG_REINFORCE_SENTIMENT", True)
|
|
568
|
+
if issue:
|
|
569
|
+
issues.append(issue)
|
|
570
|
+
hints.append("Set OCMEMOG_REINFORCE_SENTIMENT to true/false.")
|
|
571
|
+
config.update(
|
|
572
|
+
{
|
|
573
|
+
"poll_seconds": poll_seconds,
|
|
574
|
+
"batch_seconds": batch_seconds,
|
|
575
|
+
"batch_max": batch_max,
|
|
576
|
+
"reinforce_sentiment": reinforce_enabled,
|
|
577
|
+
}
|
|
578
|
+
)
|
|
579
|
+
|
|
580
|
+
for raw_value in (transcript_path, transcript_dir, session_dir):
|
|
581
|
+
if raw_value:
|
|
582
|
+
target = Path(raw_value).expanduser().resolve()
|
|
583
|
+
if not target.exists():
|
|
584
|
+
hints.append(f"Configured path '{target}' does not currently exist; watcher will create as needed.")
|
|
585
|
+
elif target.is_file() and target.suffix == "":
|
|
586
|
+
issues.append(f"Configured path '{target}' looks like a directory but is a file path.")
|
|
587
|
+
|
|
588
|
+
ingest_endpoint = os.environ.get("OCMEMOG_INGEST_ENDPOINT", "http://127.0.0.1:17891/memory/ingest_async")
|
|
589
|
+
turn_ingest_endpoint = os.environ.get("OCMEMOG_TURN_INGEST_ENDPOINT", "")
|
|
590
|
+
config["ingest_endpoint"] = ingest_endpoint
|
|
591
|
+
config["turn_ingest_endpoint"] = turn_ingest_endpoint or ingest_endpoint.replace("/memory/ingest_async", "/conversation/ingest_turn")
|
|
592
|
+
if not config["turn_ingest_endpoint"].startswith("http"):
|
|
593
|
+
issues.append("OCMEMOG_TURN_INGEST_ENDPOINT must be an absolute HTTP(S) URL when overridden.")
|
|
594
|
+
config["watcher_thread_running"] = bool(app._WATCHER_THREAD and app._WATCHER_THREAD.is_alive())
|
|
595
|
+
|
|
596
|
+
status = "ok"
|
|
597
|
+
message = "Transcript watcher config is healthy."
|
|
598
|
+
if issues:
|
|
599
|
+
status = "warn"
|
|
600
|
+
message = "Transcript watcher config has issues."
|
|
601
|
+
|
|
602
|
+
return CheckResult(
|
|
603
|
+
key="sidecar/transcript-watcher",
|
|
604
|
+
label="sidecar transcript watcher",
|
|
605
|
+
status=status,
|
|
606
|
+
message=message,
|
|
607
|
+
details={
|
|
608
|
+
"config": config,
|
|
609
|
+
"issues": issues,
|
|
610
|
+
"hints": hints,
|
|
611
|
+
"enabled": enabled,
|
|
612
|
+
"watcher_running": bool(app._WATCHER_THREAD and app._WATCHER_THREAD.is_alive()),
|
|
613
|
+
"watcher_toggle_parse_valid": valid_toggle,
|
|
614
|
+
},
|
|
615
|
+
)
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
def _collect_vector_backlog() -> dict[str, Any]:
|
|
619
|
+
try:
|
|
620
|
+
store.init_db()
|
|
621
|
+
except Exception:
|
|
622
|
+
pass
|
|
623
|
+
backlog: dict[str, int] = {}
|
|
624
|
+
conn = store.connect()
|
|
625
|
+
total_missing = 0
|
|
626
|
+
query_errors: list[str] = []
|
|
627
|
+
try:
|
|
628
|
+
for table in store.MEMORY_TABLES:
|
|
629
|
+
try:
|
|
630
|
+
total = int(conn.execute(f"SELECT COUNT(*) FROM {table}").fetchone()[0] or 0)
|
|
631
|
+
indexed = int(
|
|
632
|
+
conn.execute(
|
|
633
|
+
"SELECT COUNT(*) FROM vector_embeddings WHERE source_type = ?",
|
|
634
|
+
(table,),
|
|
635
|
+
).fetchone()[0]
|
|
636
|
+
or 0
|
|
637
|
+
)
|
|
638
|
+
missing = max(total - indexed, 0)
|
|
639
|
+
backlog[table] = missing
|
|
640
|
+
total_missing += missing
|
|
641
|
+
except Exception as exc:
|
|
642
|
+
query_errors.append(f"{table}: {exc}")
|
|
643
|
+
backlog[table] = 0
|
|
644
|
+
finally:
|
|
645
|
+
conn.close()
|
|
646
|
+
return {
|
|
647
|
+
"per_table": backlog,
|
|
648
|
+
"total_missing": total_missing,
|
|
649
|
+
"severity": _vector_backlog_severity(total_missing),
|
|
650
|
+
"errors": query_errors,
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
|
|
654
|
+
def _run_transcript_root_readability(_: None) -> CheckResult:
|
|
655
|
+
try:
|
|
656
|
+
app = _import_sidecar_app()
|
|
657
|
+
except Exception as exc:
|
|
658
|
+
return CheckResult(
|
|
659
|
+
key="sidecar/transcript-roots",
|
|
660
|
+
label="sidecar transcript roots",
|
|
661
|
+
status="fail",
|
|
662
|
+
message=f"Failed to import sidecar app for transcript-root checks: {exc}",
|
|
663
|
+
details={"error": str(exc)},
|
|
664
|
+
)
|
|
665
|
+
|
|
666
|
+
raw_roots = os.environ.get("OCMEMOG_TRANSCRIPT_ROOTS")
|
|
667
|
+
try:
|
|
668
|
+
roots = app._allowed_transcript_roots()
|
|
669
|
+
root_values = [str(path) for path in roots]
|
|
670
|
+
missing: list[str] = []
|
|
671
|
+
non_directories: list[str] = []
|
|
672
|
+
inaccessible: list[str] = []
|
|
673
|
+
readable_roots: list[str] = []
|
|
674
|
+
for path in roots:
|
|
675
|
+
if not path.exists():
|
|
676
|
+
missing.append(str(path))
|
|
677
|
+
elif not path.is_dir():
|
|
678
|
+
non_directories.append(str(path))
|
|
679
|
+
elif not os.access(str(path), os.R_OK | os.X_OK):
|
|
680
|
+
inaccessible.append(str(path))
|
|
681
|
+
else:
|
|
682
|
+
readable_roots.append(str(path))
|
|
683
|
+
except Exception as exc:
|
|
684
|
+
return CheckResult(
|
|
685
|
+
key="sidecar/transcript-roots",
|
|
686
|
+
label="sidecar transcript roots",
|
|
687
|
+
status="fail",
|
|
688
|
+
message=f"Could not evaluate transcript roots: {exc}",
|
|
689
|
+
details={"error": str(exc)},
|
|
690
|
+
)
|
|
691
|
+
|
|
692
|
+
issues = missing + non_directories + inaccessible
|
|
693
|
+
status = "ok"
|
|
694
|
+
message = "Transcript root paths are readable."
|
|
695
|
+
if raw_roots is not None and not roots:
|
|
696
|
+
status = "warn"
|
|
697
|
+
message = "OCMEMOG_TRANSCRIPT_ROOTS is set but contains no usable entries."
|
|
698
|
+
elif issues:
|
|
699
|
+
status = "warn"
|
|
700
|
+
message = "One or more transcript root paths are not usable."
|
|
701
|
+
|
|
702
|
+
return CheckResult(
|
|
703
|
+
key="sidecar/transcript-roots",
|
|
704
|
+
label="sidecar transcript roots",
|
|
705
|
+
status=status,
|
|
706
|
+
message=message,
|
|
707
|
+
details={
|
|
708
|
+
"configured_via_env": raw_roots is not None,
|
|
709
|
+
"roots": root_values,
|
|
710
|
+
"readable_roots": readable_roots,
|
|
711
|
+
"missing_roots": missing,
|
|
712
|
+
"non_directories": non_directories,
|
|
713
|
+
"inaccessible_roots": inaccessible,
|
|
714
|
+
},
|
|
715
|
+
)
|
|
716
|
+
|
|
717
|
+
|
|
718
|
+
def _run_sidecar_toggle_sanity(_: None) -> CheckResult:
|
|
719
|
+
try:
|
|
720
|
+
app = _import_sidecar_app()
|
|
721
|
+
except Exception as exc:
|
|
722
|
+
return CheckResult(
|
|
723
|
+
key="sidecar/env-toggles",
|
|
724
|
+
label="sidecar environment toggles",
|
|
725
|
+
status="fail",
|
|
726
|
+
message=f"Failed to import sidecar app for env toggle checks: {exc}",
|
|
727
|
+
details={"error": str(exc)},
|
|
728
|
+
)
|
|
729
|
+
|
|
730
|
+
invalid: list[str] = []
|
|
731
|
+
checks: dict[str, dict[str, Any]] = {}
|
|
732
|
+
for key in _ENV_TOGGLE_KEYS:
|
|
733
|
+
raw = os.environ.get(key)
|
|
734
|
+
if raw is None:
|
|
735
|
+
continue
|
|
736
|
+
parsed, valid = app._parse_bool_env_value(raw, default=False)
|
|
737
|
+
checks[key] = {
|
|
738
|
+
"raw": str(raw),
|
|
739
|
+
"parsed": parsed,
|
|
740
|
+
"valid": valid,
|
|
741
|
+
}
|
|
742
|
+
if not valid:
|
|
743
|
+
invalid.append(key)
|
|
744
|
+
|
|
745
|
+
status = "ok"
|
|
746
|
+
message = "Boolean env toggles are valid."
|
|
747
|
+
if invalid:
|
|
748
|
+
status = "warn"
|
|
749
|
+
message = "Invalid boolean env toggle value(s): " + ", ".join(sorted(invalid))
|
|
750
|
+
|
|
751
|
+
if not checks:
|
|
752
|
+
message = "No explicitly configured boolean toggles were found."
|
|
753
|
+
|
|
754
|
+
return CheckResult(
|
|
755
|
+
key="sidecar/env-toggles",
|
|
756
|
+
label="sidecar environment toggles",
|
|
757
|
+
status=status,
|
|
758
|
+
message=message,
|
|
759
|
+
details={"toggles": checks, "invalid": invalid},
|
|
760
|
+
)
|
|
761
|
+
|
|
762
|
+
|
|
763
|
+
def _run_sidecar_http_auth(_: None) -> CheckResult:
|
|
764
|
+
endpoint = os.environ.get("OCMEMOG_ENDPOINT", "http://127.0.0.1:17891")
|
|
765
|
+
token = os.environ.get("OCMEMOG_API_TOKEN")
|
|
766
|
+
probes: list[dict[str, Any]] = []
|
|
767
|
+
issues: list[str] = []
|
|
768
|
+
hints: list[str] = []
|
|
769
|
+
status = "ok"
|
|
770
|
+
message = "Sidecar HTTP auth configuration is healthy."
|
|
771
|
+
|
|
772
|
+
if token:
|
|
773
|
+
unauth_status, unauth_payload, unauth_error = _probe_health_json(endpoint)
|
|
774
|
+
probes.append({
|
|
775
|
+
"label": "unauthenticated",
|
|
776
|
+
"status": unauth_status,
|
|
777
|
+
"error": unauth_error,
|
|
778
|
+
"ok": bool(unauth_payload.get("ok")) if isinstance(unauth_payload, dict) else None,
|
|
779
|
+
})
|
|
780
|
+
if unauth_error:
|
|
781
|
+
status = "warn"
|
|
782
|
+
message = "Sidecar health endpoint is not currently reachable."
|
|
783
|
+
issues.append(unauth_error)
|
|
784
|
+
elif unauth_status == 200:
|
|
785
|
+
status = "warn"
|
|
786
|
+
issues.append("Token configured, but authenticated endpoints are accepting unauthenticated access.")
|
|
787
|
+
hints.append("Verify OCMEMOG_API_TOKEN is exported in both sidecar and operator processes.")
|
|
788
|
+
elif unauth_status != 401:
|
|
789
|
+
status = "warn"
|
|
790
|
+
issues.append(f"Expected 401 for unauthenticated access, got {unauth_status}.")
|
|
791
|
+
|
|
792
|
+
token_ok: list[str] = []
|
|
793
|
+
for label, headers in (
|
|
794
|
+
("x-token", {"x-ocmemog-token": token}),
|
|
795
|
+
("bearer", {"authorization": f"Bearer {token}"}),
|
|
796
|
+
):
|
|
797
|
+
auth_status, auth_payload, auth_error = _probe_health_json(endpoint, headers=headers)
|
|
798
|
+
probes.append(
|
|
799
|
+
{
|
|
800
|
+
"label": label,
|
|
801
|
+
"status": auth_status,
|
|
802
|
+
"error": auth_error,
|
|
803
|
+
"ok": bool(auth_payload.get("ok")) if isinstance(auth_payload, dict) else None,
|
|
804
|
+
}
|
|
805
|
+
)
|
|
806
|
+
if not auth_error and auth_status == 200:
|
|
807
|
+
token_ok.append(label)
|
|
808
|
+
|
|
809
|
+
if not token_ok:
|
|
810
|
+
status = "warn"
|
|
811
|
+
issues.append("Token-based authenticated health check failed.")
|
|
812
|
+
hints.append("Verify the token on operator and sidecar match and the expected header is supported.")
|
|
813
|
+
|
|
814
|
+
details = {
|
|
815
|
+
"token_required": True,
|
|
816
|
+
"token_probe_headers": token_ok,
|
|
817
|
+
"probes": probes,
|
|
818
|
+
"hints": hints,
|
|
819
|
+
}
|
|
820
|
+
else:
|
|
821
|
+
health_status, health_payload, health_error = _probe_health_json(endpoint)
|
|
822
|
+
probes.append({
|
|
823
|
+
"label": "unauthenticated",
|
|
824
|
+
"status": health_status,
|
|
825
|
+
"error": health_error,
|
|
826
|
+
"ok": bool(health_payload.get("ok")) if isinstance(health_payload, dict) else None,
|
|
827
|
+
})
|
|
828
|
+
if health_error:
|
|
829
|
+
status = "warn"
|
|
830
|
+
message = "Sidecar health endpoint is not currently reachable."
|
|
831
|
+
issues.append(health_error)
|
|
832
|
+
elif health_status != 200 or not isinstance(health_payload, dict) or not health_payload.get("ok", False):
|
|
833
|
+
status = "warn"
|
|
834
|
+
issues.append("Sidecar health endpoint returned a non-OK response.")
|
|
835
|
+
details = {
|
|
836
|
+
"token_required": False,
|
|
837
|
+
"probes": probes,
|
|
838
|
+
"hints": hints,
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
if not issues:
|
|
842
|
+
if message == "Sidecar HTTP auth configuration is healthy." and status == "ok":
|
|
843
|
+
details["token_required"] = bool(token)
|
|
844
|
+
else:
|
|
845
|
+
message = "; ".join(issues)
|
|
846
|
+
|
|
847
|
+
details["endpoint"] = endpoint
|
|
848
|
+
return CheckResult(
|
|
849
|
+
key="sidecar/http-auth",
|
|
850
|
+
label="sidecar HTTP auth",
|
|
851
|
+
status=status,
|
|
852
|
+
message=message,
|
|
853
|
+
details=details,
|
|
854
|
+
)
|
|
855
|
+
|
|
856
|
+
|
|
857
|
+
def _probe_health_json(endpoint: str, headers: dict[str, str] | None = None, *, timeout: float = _HTTP_TIMEOUT_SECONDS) -> tuple[int | None, dict[str, Any] | None, str | None]:
|
|
858
|
+
request_headers = {
|
|
859
|
+
"Accept": "application/json",
|
|
860
|
+
}
|
|
861
|
+
if headers:
|
|
862
|
+
request_headers.update(headers)
|
|
863
|
+
request = Request(f"{endpoint.rstrip('/')}/healthz", method="GET")
|
|
864
|
+
for key, value in request_headers.items():
|
|
865
|
+
request.add_header(key, value)
|
|
866
|
+
try:
|
|
867
|
+
with urlopen(request, timeout=timeout) as response:
|
|
868
|
+
status = getattr(response, "status", 200)
|
|
869
|
+
raw = response.read(256).decode("utf-8", errors="ignore")
|
|
870
|
+
payload: dict[str, Any] | None = None
|
|
871
|
+
if raw:
|
|
872
|
+
payload = json.loads(raw)
|
|
873
|
+
if not isinstance(payload, dict):
|
|
874
|
+
return status, None, "non-dict JSON payload"
|
|
875
|
+
return status, payload, None
|
|
876
|
+
except HTTPError as exc:
|
|
877
|
+
raw = ""
|
|
878
|
+
try:
|
|
879
|
+
raw = exc.read(256).decode("utf-8", errors="ignore")
|
|
880
|
+
except Exception:
|
|
881
|
+
raw = ""
|
|
882
|
+
payload: dict[str, Any] | None = None
|
|
883
|
+
if raw:
|
|
884
|
+
try:
|
|
885
|
+
loaded = json.loads(raw)
|
|
886
|
+
payload = loaded if isinstance(loaded, dict) else None
|
|
887
|
+
except Exception:
|
|
888
|
+
payload = None
|
|
889
|
+
return getattr(exc, "code", None), payload, None
|
|
890
|
+
except Exception as exc:
|
|
891
|
+
return None, None, str(exc)
|
|
892
|
+
|
|
893
|
+
|
|
894
|
+
def _fix_create_paths(_: None) -> FixResult:
|
|
895
|
+
try:
|
|
896
|
+
created = []
|
|
897
|
+
for target in (state_store.root_dir(), state_store.data_dir(), state_store.memory_dir(), state_store.reports_dir()):
|
|
898
|
+
target.mkdir(parents=True, exist_ok=True)
|
|
899
|
+
created.append(str(target))
|
|
900
|
+
probe = target / ".ocmemog_doctor_probe"
|
|
901
|
+
probe.write_text("ok", encoding="utf-8")
|
|
902
|
+
probe.unlink()
|
|
903
|
+
return FixResult(
|
|
904
|
+
action="create-missing-paths",
|
|
905
|
+
check_key="state/path-writable",
|
|
906
|
+
message="Created required state directories and confirmed writable state.",
|
|
907
|
+
changed=len(created),
|
|
908
|
+
ok=True,
|
|
909
|
+
)
|
|
910
|
+
except Exception as exc:
|
|
911
|
+
return FixResult(
|
|
912
|
+
action="create-missing-paths",
|
|
913
|
+
check_key="state/path-writable",
|
|
914
|
+
message=f"Could not create state paths: {exc}",
|
|
915
|
+
changed=0,
|
|
916
|
+
ok=False,
|
|
917
|
+
)
|
|
918
|
+
|
|
919
|
+
|
|
920
|
+
def _fix_repair_queue(_: None) -> FixResult:
|
|
921
|
+
try:
|
|
922
|
+
app = _import_sidecar_app()
|
|
923
|
+
queue_path = app._queue_path()
|
|
924
|
+
queue_lines = []
|
|
925
|
+
dropped = 0
|
|
926
|
+
for raw_line in queue_path.read_text(encoding="utf-8").splitlines():
|
|
927
|
+
line = raw_line.strip()
|
|
928
|
+
if not line:
|
|
929
|
+
continue
|
|
930
|
+
try:
|
|
931
|
+
payload = json.loads(line)
|
|
932
|
+
except Exception:
|
|
933
|
+
dropped += 1
|
|
934
|
+
continue
|
|
935
|
+
queue_lines.append(json.dumps(payload, ensure_ascii=False))
|
|
936
|
+
|
|
937
|
+
with app.QUEUE_LOCK:
|
|
938
|
+
app._write_queue_lines(queue_lines)
|
|
939
|
+
return FixResult(
|
|
940
|
+
action="repair-queue",
|
|
941
|
+
check_key="queue/health",
|
|
942
|
+
message=f"Removed {dropped} invalid queue entry(ies).",
|
|
943
|
+
changed=dropped,
|
|
944
|
+
ok=True,
|
|
945
|
+
)
|
|
946
|
+
except Exception as exc:
|
|
947
|
+
return FixResult(
|
|
948
|
+
action="repair-queue",
|
|
949
|
+
check_key="queue/health",
|
|
950
|
+
message=f"Queue repair failed: {exc}",
|
|
951
|
+
changed=0,
|
|
952
|
+
ok=False,
|
|
953
|
+
)
|
|
954
|
+
|
|
955
|
+
|
|
956
|
+
def _run_sidecar_import(_: None) -> CheckResult:
|
|
957
|
+
try:
|
|
958
|
+
app = _import_sidecar_app()
|
|
959
|
+
except Exception as exc:
|
|
960
|
+
return CheckResult(
|
|
961
|
+
key="sidecar/app-import",
|
|
962
|
+
label="sidecar app import",
|
|
963
|
+
status="fail",
|
|
964
|
+
message=f"Failed to import sidecar app module: {exc}",
|
|
965
|
+
details={"error": str(exc)},
|
|
966
|
+
)
|
|
967
|
+
|
|
968
|
+
if not hasattr(app, "app"):
|
|
969
|
+
return CheckResult(
|
|
970
|
+
key="sidecar/app-import",
|
|
971
|
+
label="sidecar app import",
|
|
972
|
+
status="fail",
|
|
973
|
+
message="ocmemog.sidecar.app did not expose FastAPI app object.",
|
|
974
|
+
details={"module": "ocmemog.sidecar.app"},
|
|
975
|
+
)
|
|
976
|
+
return CheckResult(
|
|
977
|
+
key="sidecar/app-import",
|
|
978
|
+
label="sidecar app import",
|
|
979
|
+
status="ok",
|
|
980
|
+
message="sidecar app module imports and exposes FastAPI app.",
|
|
981
|
+
details={"module": "ocmemog.sidecar.app", "app_type": type(app.app).__name__},
|
|
982
|
+
)
|
|
983
|
+
|
|
984
|
+
|
|
985
|
+
def _check_http(endpoint: str) -> str | None:
|
|
986
|
+
try:
|
|
987
|
+
status, payload, error = _probe_health_json(endpoint)
|
|
988
|
+
if error:
|
|
989
|
+
return error
|
|
990
|
+
if not status or status >= 400:
|
|
991
|
+
return f"health endpoint status {status}"
|
|
992
|
+
if not payload:
|
|
993
|
+
return "empty response"
|
|
994
|
+
if not isinstance(payload, dict) or not payload.get("ok"):
|
|
995
|
+
return "health endpoint returned non-ok payload"
|
|
996
|
+
except Exception as exc:
|
|
997
|
+
return str(exc)
|
|
998
|
+
return None
|
|
999
|
+
|
|
1000
|
+
|
|
1001
|
+
def _run_runtime_probe(_: None) -> CheckResult:
|
|
1002
|
+
details: dict[str, Any] = {}
|
|
1003
|
+
status = "ok"
|
|
1004
|
+
messages: list[str] = []
|
|
1005
|
+
|
|
1006
|
+
try:
|
|
1007
|
+
runtime_status = sidecar_compat.probe_runtime()
|
|
1008
|
+
details["runtime_mode"] = runtime_status.mode
|
|
1009
|
+
details["missing_deps"] = runtime_status.missing_deps
|
|
1010
|
+
details["warnings"] = runtime_status.warnings
|
|
1011
|
+
details["todo"] = runtime_status.todo
|
|
1012
|
+
except Exception as exc:
|
|
1013
|
+
status = "fail"
|
|
1014
|
+
messages.append(f"runtime/probe import failed: {exc}")
|
|
1015
|
+
details["runtime_error"] = str(exc)
|
|
1016
|
+
return CheckResult(
|
|
1017
|
+
key="vector/runtime-probe",
|
|
1018
|
+
label="vector/runtime probe",
|
|
1019
|
+
status="fail",
|
|
1020
|
+
message="Runtime probe failed.",
|
|
1021
|
+
details=details,
|
|
1022
|
+
)
|
|
1023
|
+
|
|
1024
|
+
try:
|
|
1025
|
+
payload = health.get_memory_health()
|
|
1026
|
+
details["memory_health"] = payload
|
|
1027
|
+
vector_backlog = _collect_vector_backlog()
|
|
1028
|
+
details["vector_backlog"] = vector_backlog
|
|
1029
|
+
memory_integrity_ok = payload.get("integrity", {}).get("ok", payload.get("vector_index_integrity_status"))
|
|
1030
|
+
if not memory_integrity_ok:
|
|
1031
|
+
status = "fail"
|
|
1032
|
+
messages.append("memory health reported failed integrity.")
|
|
1033
|
+
if vector_backlog.get("errors"):
|
|
1034
|
+
status = max(status, "warn", key=lambda s: _STATUS_PRECEDENCE[s]) if isinstance(status, str) else "warn"
|
|
1035
|
+
messages.append("Vector backlog probe reported query warnings: " + "; ".join(vector_backlog["errors"][:3]))
|
|
1036
|
+
if vector_backlog["total_missing"] > 0:
|
|
1037
|
+
status = max(status, "warn", key=lambda s: _STATUS_PRECEDENCE[s]) if isinstance(status, str) else "warn"
|
|
1038
|
+
messages.append(
|
|
1039
|
+
f"Vector backlog is elevated ({vector_backlog['total_missing']} rows, severity={vector_backlog['severity']})."
|
|
1040
|
+
)
|
|
1041
|
+
details["vector_backlog_hint"] = "Run scripts/ocmemog-backfill-vectors.py to reduce missing vector debt."
|
|
1042
|
+
except Exception as exc:
|
|
1043
|
+
status = "fail"
|
|
1044
|
+
details["memory_health_error"] = str(exc)
|
|
1045
|
+
messages.append(f"memory health check failed: {exc}")
|
|
1046
|
+
|
|
1047
|
+
if runtime_status.mode != "ready":
|
|
1048
|
+
status = max(status, "warn", key=lambda s: _STATUS_PRECEDENCE[s]) if isinstance(status, str) else "warn"
|
|
1049
|
+
messages.append(
|
|
1050
|
+
f"runtime mode is degraded ({len(runtime_status.missing_deps)} missing item(s): "
|
|
1051
|
+
f"{', '.join(runtime_status.missing_deps) or 'none'})."
|
|
1052
|
+
)
|
|
1053
|
+
|
|
1054
|
+
try:
|
|
1055
|
+
if not embedding_engine.generate_embedding("ocmemog doctor probe"):
|
|
1056
|
+
status = "fail"
|
|
1057
|
+
messages.append("embedding probe returned no vector.")
|
|
1058
|
+
except Exception as exc:
|
|
1059
|
+
status = "fail"
|
|
1060
|
+
details["embedding_error"] = str(exc)
|
|
1061
|
+
messages.append(f"embedding probe failed: {exc}")
|
|
1062
|
+
|
|
1063
|
+
endpoint = os.environ.get("OCMEMOG_ENDPOINT", "http://127.0.0.1:17891")
|
|
1064
|
+
sidecar_error = _check_http(endpoint)
|
|
1065
|
+
if sidecar_error:
|
|
1066
|
+
status = max(status, "warn", key=lambda s: _STATUS_PRECEDENCE[s]) if isinstance(status, str) else "warn"
|
|
1067
|
+
details["sidecar_http_error"] = sidecar_error
|
|
1068
|
+
messages.append("sidecar HTTP probe not currently available.")
|
|
1069
|
+
else:
|
|
1070
|
+
details["sidecar_http"] = "ok"
|
|
1071
|
+
|
|
1072
|
+
if not messages:
|
|
1073
|
+
messages.append("runtime, vector, and sidecar probe checks look healthy.")
|
|
1074
|
+
|
|
1075
|
+
return CheckResult(
|
|
1076
|
+
key="vector/runtime-probe",
|
|
1077
|
+
label="vector/runtime probe",
|
|
1078
|
+
status=status,
|
|
1079
|
+
message="; ".join(messages),
|
|
1080
|
+
details=details,
|
|
1081
|
+
)
|
|
1082
|
+
|
|
1083
|
+
|
|
1084
|
+
def _status_rank(status: str) -> int:
|
|
1085
|
+
return _STATUS_PRECEDENCE.get(status, 0)
|
|
1086
|
+
|
|
1087
|
+
|
|
1088
|
+
def _overall_status(results: Iterable[CheckResult]) -> str:
|
|
1089
|
+
max_status = "ok"
|
|
1090
|
+
for result in results:
|
|
1091
|
+
if _status_rank(result.status) > _status_rank(max_status):
|
|
1092
|
+
max_status = result.status
|
|
1093
|
+
return max_status
|
|
1094
|
+
|
|
1095
|
+
|
|
1096
|
+
DOCTOR_CHECKS: tuple[DoctorCheck, ...] = (
|
|
1097
|
+
DoctorCheck(key="runtime/imports", label="runtime module imports", check=_run_imports),
|
|
1098
|
+
DoctorCheck(key="state/path-writable", label="state path writability", check=_run_state_paths, fix_key="create-missing-paths", fix=_fix_create_paths),
|
|
1099
|
+
DoctorCheck(key="sqlite/schema-access", label="sqlite schema access", check=_run_sqlite_schema),
|
|
1100
|
+
DoctorCheck(key="queue/health", label="queue health", check=_run_queue_health, fix_key="repair-queue", fix=_fix_repair_queue),
|
|
1101
|
+
DoctorCheck(key="sidecar/http-auth", label="sidecar HTTP auth", check=_run_sidecar_http_auth),
|
|
1102
|
+
DoctorCheck(key="sidecar/transcript-roots", label="sidecar transcript roots", check=_run_transcript_root_readability),
|
|
1103
|
+
DoctorCheck(key="sidecar/transcript-watcher", label="sidecar transcript watcher", check=_run_transcript_watcher_sanity),
|
|
1104
|
+
DoctorCheck(key="sidecar/env-toggles", label="sidecar environment toggles", check=_run_sidecar_toggle_sanity),
|
|
1105
|
+
DoctorCheck(key="sidecar/app-import", label="sidecar app import", check=_run_sidecar_import),
|
|
1106
|
+
DoctorCheck(key="vector/runtime-probe", label="vector/runtime probe", check=_run_runtime_probe),
|
|
1107
|
+
)
|
|
1108
|
+
|
|
1109
|
+
|
|
1110
|
+
_KNOWN_FIXES = {
|
|
1111
|
+
"create-missing-paths": "state/path-writable",
|
|
1112
|
+
"repair-queue": "queue/health",
|
|
1113
|
+
}
|
|
1114
|
+
|
|
1115
|
+
|
|
1116
|
+
def run_doctor_checks(*, fix_actions: list[str] | None = None, include_checks: set[str] | None = None, state_dir: str | None = None, strict: bool = False):
|
|
1117
|
+
include_checks = set(include_checks or [])
|
|
1118
|
+
known_check_keys = {check.key for check in DOCTOR_CHECKS}
|
|
1119
|
+
if include_checks and (unknown_checks := (set(include_checks) - known_check_keys)):
|
|
1120
|
+
unknown = ", ".join(sorted(unknown_checks))
|
|
1121
|
+
raise ValueError(f"unknown --check key(s): {unknown}")
|
|
1122
|
+
|
|
1123
|
+
selected = [check for check in DOCTOR_CHECKS if not include_checks or check.key in include_checks]
|
|
1124
|
+
fix_actions = _normalize_fixes(fix_actions)
|
|
1125
|
+
if any(item not in _KNOWN_FIXES for item in fix_actions):
|
|
1126
|
+
unknown = sorted(set(fix_actions) - set(_KNOWN_FIXES))
|
|
1127
|
+
raise ValueError(f"unknown --fix action(s): {', '.join(unknown)}")
|
|
1128
|
+
|
|
1129
|
+
with _scoped_state_dir(state_dir):
|
|
1130
|
+
results: list[CheckResult] = []
|
|
1131
|
+
applied_fixes: list[FixResult] = []
|
|
1132
|
+
|
|
1133
|
+
for check in selected:
|
|
1134
|
+
result = check.check(None)
|
|
1135
|
+
if result.status != "ok" and check.fix and check.fix_key in fix_actions:
|
|
1136
|
+
fix = check.fix(None)
|
|
1137
|
+
if fix.ok:
|
|
1138
|
+
result = check.check(None)
|
|
1139
|
+
result = CheckResult(
|
|
1140
|
+
key=result.key,
|
|
1141
|
+
label=result.label,
|
|
1142
|
+
status=result.status,
|
|
1143
|
+
message=result.message,
|
|
1144
|
+
details=result.details,
|
|
1145
|
+
fixable=result.fixable,
|
|
1146
|
+
fixed=True,
|
|
1147
|
+
fix_action=check.fix_key,
|
|
1148
|
+
fix_details=asdict(fix),
|
|
1149
|
+
)
|
|
1150
|
+
applied_fixes.append(fix)
|
|
1151
|
+
results.append(result)
|
|
1152
|
+
|
|
1153
|
+
status = _overall_status(results)
|
|
1154
|
+
if strict and status == "warn":
|
|
1155
|
+
status = "fail"
|
|
1156
|
+
|
|
1157
|
+
return {
|
|
1158
|
+
"status": status,
|
|
1159
|
+
"checks": [asdict(item) for item in results],
|
|
1160
|
+
"fixes": [asdict(item) for item in applied_fixes],
|
|
1161
|
+
"strict": strict,
|
|
1162
|
+
}
|
|
1163
|
+
|
|
1164
|
+
|
|
1165
|
+
def _render_text(report: dict[str, Any]) -> None:
|
|
1166
|
+
print("ocmemog doctor")
|
|
1167
|
+
for check in report["checks"]:
|
|
1168
|
+
status = check["status"]
|
|
1169
|
+
print(f"{_queue_status_to_icon(status):<4} {check['key']}: {check['message']}")
|
|
1170
|
+
details = check.get("details") or {}
|
|
1171
|
+
if details:
|
|
1172
|
+
details_text = json.dumps(details, sort_keys=True)
|
|
1173
|
+
print(f" details: {details_text}")
|
|
1174
|
+
if check.get("fix_action") and check.get("fixed"):
|
|
1175
|
+
fix_details = check.get("fix_details") or {}
|
|
1176
|
+
changed = fix_details.get("changed", 0)
|
|
1177
|
+
fix_message = fix_details.get("message", "fix applied")
|
|
1178
|
+
print(f" fix: {fix_message} (changed={changed})")
|
|
1179
|
+
summary = {
|
|
1180
|
+
"ok": sum(1 for item in report["checks"] if item["status"] == "ok"),
|
|
1181
|
+
"warn": sum(1 for item in report["checks"] if item["status"] == "warn"),
|
|
1182
|
+
"fail": sum(1 for item in report["checks"] if item["status"] == "fail"),
|
|
1183
|
+
"applied_fixes": len(report["fixes"]),
|
|
1184
|
+
}
|
|
1185
|
+
status = report["status"]
|
|
1186
|
+
print(f"summary: {json.dumps(summary, sort_keys=True)}")
|
|
1187
|
+
print(f"overall: {status}")
|
|
1188
|
+
|
|
1189
|
+
|
|
1190
|
+
def _render_json(report: dict[str, Any]) -> None:
|
|
1191
|
+
payload = {
|
|
1192
|
+
"ok": report["status"] == "ok",
|
|
1193
|
+
"status": report["status"],
|
|
1194
|
+
"checks": report["checks"],
|
|
1195
|
+
"fixes": report["fixes"],
|
|
1196
|
+
}
|
|
1197
|
+
print(json.dumps(payload, indent=2, sort_keys=True))
|
|
1198
|
+
|
|
1199
|
+
|
|
1200
|
+
def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
|
|
1201
|
+
parser = argparse.ArgumentParser(
|
|
1202
|
+
prog="ocmemog-doctor",
|
|
1203
|
+
description="Run operator-oriented health checks for ocmemog.",
|
|
1204
|
+
)
|
|
1205
|
+
parser.add_argument(
|
|
1206
|
+
"--json",
|
|
1207
|
+
action="store_true",
|
|
1208
|
+
help="Emit machine-readable JSON output.",
|
|
1209
|
+
)
|
|
1210
|
+
parser.add_argument(
|
|
1211
|
+
"--fix",
|
|
1212
|
+
action="append",
|
|
1213
|
+
default=[],
|
|
1214
|
+
help="Apply explicit low-risk fix action(s): create-missing-paths, repair-queue",
|
|
1215
|
+
)
|
|
1216
|
+
parser.add_argument(
|
|
1217
|
+
"--state-dir",
|
|
1218
|
+
help="Use an explicit state directory for all checks.",
|
|
1219
|
+
)
|
|
1220
|
+
parser.add_argument(
|
|
1221
|
+
"--check",
|
|
1222
|
+
action="append",
|
|
1223
|
+
default=[],
|
|
1224
|
+
help="Run only selected check key(s) (repeatable or comma-separated).",
|
|
1225
|
+
)
|
|
1226
|
+
parser.add_argument(
|
|
1227
|
+
"--strict",
|
|
1228
|
+
action="store_true",
|
|
1229
|
+
help="Treat warn results as failures to hard-gate release checks.",
|
|
1230
|
+
)
|
|
1231
|
+
return parser.parse_args(argv)
|
|
1232
|
+
|
|
1233
|
+
|
|
1234
|
+
def main(argv: list[str] | None = None) -> int:
|
|
1235
|
+
args = parse_args(argv)
|
|
1236
|
+
checks = set(_normalize_fixes(args.check))
|
|
1237
|
+
report = run_doctor_checks(
|
|
1238
|
+
fix_actions=args.fix,
|
|
1239
|
+
include_checks=checks,
|
|
1240
|
+
state_dir=args.state_dir,
|
|
1241
|
+
strict=args.strict,
|
|
1242
|
+
)
|
|
1243
|
+
if args.json:
|
|
1244
|
+
_render_json(report)
|
|
1245
|
+
else:
|
|
1246
|
+
_render_text(report)
|
|
1247
|
+
if report["status"] == "fail":
|
|
1248
|
+
return 2
|
|
1249
|
+
if report["status"] == "warn":
|
|
1250
|
+
return 1
|
|
1251
|
+
return 0
|
|
1252
|
+
|
|
1253
|
+
|
|
1254
|
+
if __name__ == "__main__":
|
|
1255
|
+
raise SystemExit(main())
|