code-data-ark 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,299 @@
1
+ """
2
+ cda selfcheck — the system knows itself.
3
+
4
+ Checks:
5
+ version — VERSION file exists, valid semver, matches __version__
6
+ install_path — editable install of cda resolves to this project dir
7
+ db_present — local/data/cda.db exists on disk
8
+ db_accessible — DB opens and WAL mode is confirmed
9
+ db_integrity — PRAGMA integrity_check passes
10
+ db_tables — all expected tables are present
11
+ db_counts — core tables have rows (non-empty)
12
+ db_wal — no abandoned WAL/SHM files blocking writes
13
+ watcher_state — watcher.pid present and process is alive (or cleanly absent)
14
+ queue_depth — local/queue/ exists and reports pending file count
15
+ data_gitignored — local/ is gitignored in git
16
+ cli_path — this binary is on PATH and resolves correctly
17
+ python_runtime — running on expected Python (3.9, not Homebrew 3.14+)
18
+ dependencies — all required imports load without error
19
+ """
20
+
21
+ import importlib
22
+ import os
23
+ import shutil
24
+ import sqlite3
25
+ import subprocess
26
+ import sys
27
+ from pathlib import Path
28
+
29
+ # ── paths the system knows about itself ─────────────────────────────────────
30
+ PACKAGE_DIR = Path(__file__).resolve().parent
31
+ SOURCE_DIR = PACKAGE_DIR.parent.parent # source/ — tracked repo root
32
+ PROJECT_DIR = PACKAGE_DIR.parent.parent.parent # repo root — where layers live
33
+ LOCAL_DIR = PROJECT_DIR / "local"
34
+ DB_PATH = LOCAL_DIR / "data" / "cda.db"
35
+ PID_FILE = LOCAL_DIR / "run" / "watcher.pid"
36
+ QUEUE_DIR = LOCAL_DIR / "queue"
37
+ VERSION_FILE = SOURCE_DIR / "version"
38
+
39
+ REQUIRED_TABLES = [
40
+ "sessions", "exchanges", "tool_calls", "vfs", "workspaces",
41
+ "memory_files", "embeddings", "exchange_signals", "ingest_log",
42
+ "transcript_events", "token_usage", "compactions",
43
+ "session_analysis", "session_summaries",
44
+ "recommendations", "anomaly_alerts", "symbols", "file_offsets",
45
+ "state_items", "chat_messages",
46
+ ]
47
+
48
+ CORE_COUNT_TABLES = ["sessions", "exchanges", "tool_calls", "vfs"]
49
+
50
+ REQUIRED_IMPORTS = [
51
+ "click", "sqlite3", "watchfiles", "pathlib", "json", "gzip",
52
+ ]
53
+
54
+
55
+ # ── result helpers ────────────────────────────────────────────────────────────
56
+
57
+ def _ok(name, message, details=None):
58
+ r = {"name": name, "passed": True, "message": message}
59
+ if details:
60
+ r["details"] = details
61
+ return r
62
+
63
+
64
+ def _fail(name, message, details=None):
65
+ r = {"name": name, "passed": False, "message": message}
66
+ if details:
67
+ r["details"] = details
68
+ return r
69
+
70
+
71
+ # ── individual checks ─────────────────────────────────────────────────────────
72
+
73
+ def check_version():
74
+ import re
75
+ if not VERSION_FILE.exists():
76
+ return _fail("version", "VERSION file not found")
77
+ version = VERSION_FILE.read_text().strip()
78
+ if not re.fullmatch(r"\d+\.\d+\.\d+", version):
79
+ return _fail("version", f"VERSION is not valid semver: {version!r}")
80
+ try:
81
+ from cda import __version__
82
+ if __version__ != version:
83
+ return _fail("version",
84
+ f"VERSION file ({version}) does not match __version__ ({__version__})")
85
+ except (ImportError, AttributeError):
86
+ pass # __version__ not defined — just check the file
87
+ return _ok("version", f"VERSION is valid semver: {version}")
88
+
89
+
90
+ def check_install_path():
91
+ try:
92
+ result = subprocess.run(
93
+ [sys.executable, "-c",
94
+ "import cda, pathlib; "
95
+ "print(pathlib.Path(cda.__file__).parent.parent.resolve())"],
96
+ capture_output=True, text=True,
97
+ )
98
+ if result.returncode != 0:
99
+ return _fail("install_path", "cda not importable — editable install broken")
100
+ install_dir = Path(result.stdout.strip()).resolve()
101
+ if install_dir == SOURCE_DIR:
102
+ return _ok("install_path", f"editable install → {install_dir}")
103
+ return _fail("install_path",
104
+ f"editable install points to wrong path: {install_dir} (expected {SOURCE_DIR})")
105
+ except Exception as exc:
106
+ return _fail("install_path", f"install_path check error: {exc}")
107
+
108
+
109
+ def check_db_present():
110
+ if not DB_PATH.exists():
111
+ return _fail("db_present", f"cda.db not found at {DB_PATH}")
112
+ size_mb = DB_PATH.stat().st_size / (1024 * 1024)
113
+ return _ok("db_present", f"cda.db present ({size_mb:.0f} MB)")
114
+
115
+
116
+ def check_db_accessible():
117
+ if not DB_PATH.exists():
118
+ return _fail("db_accessible", "cda.db not found — skipping")
119
+ try:
120
+ conn = sqlite3.connect(f"file:{DB_PATH}?mode=ro", uri=True, timeout=5)
121
+ row = conn.execute("PRAGMA journal_mode").fetchone()
122
+ conn.close()
123
+ mode = row[0] if row else "unknown"
124
+ if mode != "wal":
125
+ return _fail("db_accessible", f"DB is accessible but journal_mode={mode} (expected wal)")
126
+ return _ok("db_accessible", "DB accessible, journal_mode=wal")
127
+ except sqlite3.DatabaseError as exc:
128
+ return _fail("db_accessible", f"DB is corrupt or unreadable: {exc}")
129
+
130
+
131
+ def check_db_integrity():
132
+ if not DB_PATH.exists():
133
+ return _fail("db_integrity", "cda.db not found — skipping")
134
+ try:
135
+ conn = sqlite3.connect(f"file:{DB_PATH}?mode=ro", uri=True, timeout=10)
136
+ row = conn.execute("PRAGMA integrity_check(1)").fetchone()
137
+ conn.close()
138
+ result = row[0] if row else "unknown"
139
+ if result == "ok":
140
+ return _ok("db_integrity", "PRAGMA integrity_check: ok")
141
+ return _fail("db_integrity", f"PRAGMA integrity_check: {result}")
142
+ except sqlite3.DatabaseError as exc:
143
+ return _fail("db_integrity", f"integrity_check failed: {exc}")
144
+
145
+
146
+ def check_db_tables():
147
+ if not DB_PATH.exists():
148
+ return _fail("db_tables", "cda.db not found — skipping")
149
+ try:
150
+ conn = sqlite3.connect(f"file:{DB_PATH}?mode=ro", uri=True, timeout=5)
151
+ present = {r[0] for r in conn.execute(
152
+ "SELECT name FROM sqlite_master WHERE type='table'"
153
+ ).fetchall()}
154
+ conn.close()
155
+ missing = [t for t in REQUIRED_TABLES if t not in present]
156
+ if missing:
157
+ return _fail("db_tables", f"Missing tables: {', '.join(missing)}")
158
+ return _ok("db_tables", f"All {len(REQUIRED_TABLES)} expected tables present")
159
+ except sqlite3.DatabaseError as exc:
160
+ return _fail("db_tables", f"Table check failed: {exc}")
161
+
162
+
163
+ def check_db_counts():
164
+ if not DB_PATH.exists():
165
+ return _fail("db_counts", "cda.db not found — skipping")
166
+ try:
167
+ conn = sqlite3.connect(f"file:{DB_PATH}?mode=ro", uri=True, timeout=5)
168
+ counts = {}
169
+ for t in CORE_COUNT_TABLES:
170
+ row = conn.execute(f"SELECT COUNT(*) FROM {t}").fetchone()
171
+ counts[t] = row[0] if row else 0
172
+ conn.close()
173
+ empty = [t for t, c in counts.items() if c == 0]
174
+ summary = ", ".join(f"{t}={c:,}" for t, c in counts.items())
175
+ if empty:
176
+ return _fail("db_counts", f"Empty core tables: {', '.join(empty)}", summary)
177
+ return _ok("db_counts", f"Core table counts: {summary}")
178
+ except sqlite3.DatabaseError as exc:
179
+ return _fail("db_counts", f"Count check failed: {exc}")
180
+
181
+
182
+ def check_db_wal():
183
+ wal = DB_PATH.with_suffix(".db-wal")
184
+ shm = DB_PATH.with_suffix(".db-shm")
185
+ issues = []
186
+ if wal.exists():
187
+ size_kb = wal.stat().st_size // 1024
188
+ if size_kb > 100 * 1024: # > 100MB WAL may indicate abandoned writer
189
+ # Only flag as bad if the watcher is NOT running (active writer is fine)
190
+ watcher_active = False
191
+ if PID_FILE.exists():
192
+ try:
193
+ os.kill(int(PID_FILE.read_text().strip()), 0)
194
+ watcher_active = True
195
+ except (ProcessLookupError, ValueError, OSError):
196
+ pass
197
+ if not watcher_active:
198
+ issues.append(f"WAL file is large ({size_kb // 1024} MB) — may indicate abandoned writer")
199
+ if shm.exists() and not wal.exists():
200
+ issues.append("SHM file present without WAL — possible unclean shutdown")
201
+ if issues:
202
+ return _fail("db_wal", "; ".join(issues))
203
+ return _ok("db_wal", "WAL/SHM state looks healthy")
204
+
205
+
206
+ def check_watcher_state():
207
+ if not PID_FILE.exists():
208
+ return _ok("watcher_state", "watcher not running (no PID file)")
209
+ try:
210
+ pid = int(PID_FILE.read_text().strip())
211
+ os.kill(pid, 0) # signal 0 = existence check, no actual signal
212
+ return _ok("watcher_state", f"watcher running (PID {pid})")
213
+ except ProcessLookupError:
214
+ return _fail("watcher_state",
215
+ f"watcher.pid exists (PID {pid}) but process is dead — stale PID file")
216
+ except ValueError:
217
+ return _fail("watcher_state", "watcher.pid contains invalid PID")
218
+
219
+
220
+ def check_queue_depth():
221
+ if not QUEUE_DIR.exists():
222
+ return _fail("queue_depth", f"watcher-queue/ not found at {QUEUE_DIR}")
223
+ pending = [f for f in QUEUE_DIR.iterdir() if not f.name.endswith(".completed")]
224
+ count = len(pending)
225
+ if count > 500:
226
+ return _fail("queue_depth", f"queue backlog is high: {count} files pending")
227
+ return _ok("queue_depth", f"watcher-queue/ exists, {count} files pending")
228
+
229
+
230
+ def check_data_gitignored():
231
+ try:
232
+ result = subprocess.run(
233
+ ["git", "check-ignore", "-q", "local"],
234
+ cwd=PROJECT_DIR,
235
+ capture_output=True,
236
+ )
237
+ if result.returncode == 0:
238
+ return _ok("data_gitignored", "local/ is gitignored")
239
+ return _fail("data_gitignored", "local/ is NOT gitignored — sensitive data at risk") # noqa: E501
240
+ except FileNotFoundError:
241
+ return _fail("data_gitignored", "git not available")
242
+
243
+
244
+ def check_cli_path():
245
+ cda_bin = shutil.which("cda")
246
+ if not cda_bin:
247
+ return _fail("cli_path", "cda not found on PATH")
248
+ resolved = Path(cda_bin).resolve()
249
+ return _ok("cli_path", f"cda found at {resolved}")
250
+
251
+
252
+ def check_python_runtime():
253
+ major, minor = sys.version_info[:2]
254
+ version_str = f"{major}.{minor}.{sys.version_info[2]}"
255
+ if major == 3 and minor == 9:
256
+ return _ok("python_runtime", f"Python {version_str} (system 3.9 — correct)")
257
+ if major == 3 and minor >= 14:
258
+ return _fail("python_runtime",
259
+ f"Python {version_str} — running under Homebrew Python. Use system Python 3.9.")
260
+ return _ok("python_runtime", f"Python {version_str}")
261
+
262
+
263
+ def check_dependencies():
264
+ failed = []
265
+ for mod in REQUIRED_IMPORTS:
266
+ try:
267
+ importlib.import_module(mod)
268
+ except ImportError:
269
+ failed.append(mod)
270
+ if failed:
271
+ return _fail("dependencies", f"Missing imports: {', '.join(failed)}")
272
+ return _ok("dependencies", f"All {len(REQUIRED_IMPORTS)} required imports available")
273
+
274
+
275
+ # ── public interface ──────────────────────────────────────────────────────────
276
+
277
+ CHECKS = [
278
+ check_version,
279
+ check_install_path,
280
+ check_db_present,
281
+ check_db_accessible,
282
+ check_db_integrity,
283
+ check_db_tables,
284
+ check_db_counts,
285
+ check_db_wal,
286
+ check_watcher_state,
287
+ check_queue_depth,
288
+ check_data_gitignored,
289
+ check_cli_path,
290
+ check_python_runtime,
291
+ check_dependencies,
292
+ ]
293
+
294
+
295
+ def run_all():
296
+ """Run all self-checks. Returns (passed: bool, results: list[dict])."""
297
+ results = [c() for c in CHECKS]
298
+ passed = all(r["passed"] for r in results)
299
+ return passed, results
File without changes