code-data-ark 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cda/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """Code Data Ark — local observability and intelligence platform for VS Code + Copilot Chat sessions."""
2
+
3
+ __version__ = "2.0.2"
cda/kernel/__init__.py ADDED
File without changes
@@ -0,0 +1,151 @@
1
+ """
2
+ cda control_db — write-side interface for control/data/control.db.
3
+
4
+ Used by:
5
+ - selfcheck (cda check) → writes to health table
6
+ - sync pipeline → writes to runs table
7
+ - cli events → writes to events table
8
+
9
+ The control DB lives outside the source tree at:
10
+ <repo_root>/control/data/control.db
11
+
12
+ If the DB or its parent directory doesn't exist, all writes are silently
13
+ skipped — the control plane is optional and must not block normal operation.
14
+ """
15
+
16
+ import sqlite3
17
+ from datetime import datetime, timezone
18
+ from pathlib import Path
19
+ from typing import List
20
+
21
+ PACKAGE_DIR = Path(__file__).resolve().parent
22
+ PROJECT_DIR = PACKAGE_DIR.parent.parent.parent
23
+ CONTROL_DB = PROJECT_DIR / "control" / "data" / "control.db"
24
+
25
+
26
+ def _connect():
27
+ """Open a connection to control.db, or return None if unavailable."""
28
+ if not CONTROL_DB.exists():
29
+ return None
30
+ try:
31
+ conn = sqlite3.connect(CONTROL_DB, timeout=3)
32
+ conn.execute("PRAGMA journal_mode=WAL")
33
+ return conn
34
+ except Exception:
35
+ return None
36
+
37
+
38
+ def _now() -> str:
39
+ return datetime.now(timezone.utc).isoformat()
40
+
41
+
42
+ # ── health ────────────────────────────────────────────────────────────────────
43
+
44
+ def write_health(results: List[dict], run_at: str = None):
45
+ """
46
+ Write a list of selfcheck result dicts to the health table.
47
+ Each dict must have: name, passed, message.
48
+ """
49
+ conn = _connect()
50
+ if conn is None:
51
+ return
52
+ ts = run_at or _now()
53
+ try:
54
+ with conn:
55
+ conn.executemany(
56
+ "INSERT INTO health (run_at, check_name, passed, message) VALUES (?, ?, ?, ?)",
57
+ [(ts, r["name"], 1 if r["passed"] else 0, r.get("message", "")) for r in results],
58
+ )
59
+ except Exception:
60
+ pass
61
+ finally:
62
+ conn.close()
63
+
64
+
65
+ # ── runs ─────────────────────────────────────────────────────────────────────
66
+
67
+ def start_run(trigger: str = "manual") -> int:
68
+ """
69
+ Record the start of a sync pipeline run.
70
+ Returns the run id (for passing to finish_run), or -1 on failure.
71
+ """
72
+ conn = _connect()
73
+ if conn is None:
74
+ return -1
75
+ try:
76
+ with conn:
77
+ cur = conn.execute(
78
+ "INSERT INTO runs (started_at, trigger) VALUES (?, ?)",
79
+ (_now(), trigger),
80
+ )
81
+ return cur.lastrowid
82
+ except Exception:
83
+ return -1
84
+ finally:
85
+ conn.close()
86
+
87
+
88
+ def finish_run(run_id: int, stages: list[str], counts: dict, errors: int = 0,
89
+ exit_code: int = 0, notes: str = None):
90
+ """
91
+ Update a run record on completion.
92
+ counts dict: sessions, exchanges, tool_calls, vfs_files
93
+ """
94
+ if run_id < 0:
95
+ return
96
+ conn = _connect()
97
+ if conn is None:
98
+ return
99
+ try:
100
+ with conn:
101
+ conn.execute(
102
+ """UPDATE runs SET
103
+ finished_at = ?,
104
+ stages = ?,
105
+ sessions = ?,
106
+ exchanges = ?,
107
+ tool_calls = ?,
108
+ vfs_files = ?,
109
+ errors = ?,
110
+ exit_code = ?,
111
+ notes = ?
112
+ WHERE id = ?""",
113
+ (
114
+ _now(),
115
+ ",".join(stages),
116
+ counts.get("sessions"),
117
+ counts.get("exchanges"),
118
+ counts.get("tool_calls"),
119
+ counts.get("vfs_files"),
120
+ errors,
121
+ exit_code,
122
+ notes,
123
+ run_id,
124
+ ),
125
+ )
126
+ except Exception:
127
+ pass
128
+ finally:
129
+ conn.close()
130
+
131
+
132
+ # ── events ────────────────────────────────────────────────────────────────────
133
+
134
+ def log_event(kind: str, subject: str = None, detail: str = None, actor: str = "cda"):
135
+ """
136
+ Append a single event to the events table.
137
+ kind examples: watcher.start, watcher.stop, sync.complete, version.bump
138
+ """
139
+ conn = _connect()
140
+ if conn is None:
141
+ return
142
+ try:
143
+ with conn:
144
+ conn.execute(
145
+ "INSERT INTO events (occurred_at, kind, actor, subject, detail) VALUES (?, ?, ?, ?, ?)",
146
+ (_now(), kind, actor, subject, detail),
147
+ )
148
+ except Exception:
149
+ pass
150
+ finally:
151
+ conn.close()
@@ -0,0 +1,364 @@
1
+ import json
2
+ import os
3
+ import signal
4
+ import subprocess
5
+ import sys
6
+ import time
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+ from typing import Dict, List, Optional
10
+
11
+ ROOT_DIR = Path(__file__).resolve().parent.parent.parent.parent
12
+ LOCAL_DIR = ROOT_DIR / "local"
13
+ PACKAGE_DIR = Path(__file__).resolve().parent
14
+ RUNTIME_FILE = LOCAL_DIR / "pmf" / "runtime.json"
15
+ LOG_DIR = LOCAL_DIR / "pmf" / "logs"
16
+ WATCHER_PID_FILE = LOCAL_DIR / "run" / "watcher.pid"
17
+ UI_PID_FILE = LOCAL_DIR / "run" / "ui.pid"
18
+ DEFAULT_HOST = "127.0.0.1"
19
+ DEFAULT_PORT = 10001
20
+
21
+ (LOCAL_DIR / "data").mkdir(parents=True, exist_ok=True)
22
+ (LOCAL_DIR / "run").mkdir(parents=True, exist_ok=True)
23
+ LOG_DIR.mkdir(parents=True, exist_ok=True)
24
+
25
+
26
+ def now_ts():
27
+ return int(time.time() * 1000)
28
+
29
+
30
+ def now_iso():
31
+ return time.strftime("%Y-%m-%dT%H:%M:%S", time.localtime())
32
+
33
+
34
+ @dataclass
35
+ class ServiceSpec:
36
+ service_id: str
37
+ label: str
38
+ service_type: str
39
+ description: str
40
+ command: Optional[List[str]] = None
41
+ cwd: Optional[Path] = None
42
+ env: Optional[Dict[str, str]] = None
43
+ pid_file: Optional[Path] = None
44
+ log_file: Optional[Path] = None
45
+ allowed_actions: Optional[List[str]] = None
46
+
47
+ def build_command(self, options: Dict[str, str] = None) -> List[str]:
48
+ if self.service_id == "ui":
49
+ host = options.get("host", DEFAULT_HOST) if options else DEFAULT_HOST
50
+ port = options.get("port", DEFAULT_PORT) if options else DEFAULT_PORT
51
+ return [
52
+ sys.executable,
53
+ "-c",
54
+ (
55
+ "import cda.ui.web as w; "
56
+ f"w.start_server(host={json.dumps(host)}, port={port})"
57
+ ),
58
+ ]
59
+
60
+ if self.service_id == "watcher":
61
+ return [sys.executable, str(PACKAGE_DIR.parent / "pipeline" / "watcher.py")]
62
+
63
+ if self.command is not None:
64
+ return list(self.command)
65
+
66
+ raise RuntimeError(f"No command configured for service: {self.service_id}")
67
+
68
+
69
+ SERVICE_SPECS: Dict[str, ServiceSpec] = {
70
+ "watcher": ServiceSpec(
71
+ service_id="watcher",
72
+ label="Watcher Daemon",
73
+ service_type="daemon",
74
+ description="Live VS Code data watcher and incremental ingest process.",
75
+ cwd=ROOT_DIR,
76
+ pid_file=WATCHER_PID_FILE,
77
+ log_file=LOCAL_DIR / "logs" / "watcher.log",
78
+ allowed_actions=["start", "stop", "restart", "status"],
79
+ ),
80
+ "ui": ServiceSpec(
81
+ service_id="ui",
82
+ label="Web UI",
83
+ service_type="daemon",
84
+ description="Local web dashboard for Ark runtime and session analytics.",
85
+ cwd=ROOT_DIR,
86
+ pid_file=UI_PID_FILE,
87
+ log_file=LOCAL_DIR / "logs" / "ui.log",
88
+ allowed_actions=["start", "stop", "restart", "status"],
89
+ ),
90
+ "sync": ServiceSpec(
91
+ service_id="sync",
92
+ label="Full Sync",
93
+ service_type="task",
94
+ description="Full ingest and rebuild pipeline for Ark data.",
95
+ command=[sys.executable, str(PACKAGE_DIR.parent / "pipeline" / "ingest.py")],
96
+ cwd=ROOT_DIR,
97
+ log_file=LOG_DIR / "sync.log",
98
+ allowed_actions=["start", "status"],
99
+ ),
100
+ "reconstruct": ServiceSpec(
101
+ service_id="reconstruct",
102
+ label="Reconstruct",
103
+ service_type="task",
104
+ description="Reconstruct conversations and rebuild the full text search index.",
105
+ command=[sys.executable, str(PACKAGE_DIR.parent / "pipeline" / "reconstruct.py")],
106
+ cwd=ROOT_DIR,
107
+ log_file=LOG_DIR / "reconstruct.log",
108
+ allowed_actions=["start", "status"],
109
+ ),
110
+ "embed-build": ServiceSpec(
111
+ service_id="embed-build",
112
+ label="Embed Build",
113
+ service_type="task",
114
+ description="Build semantic embeddings and session intelligence.",
115
+ command=[sys.executable, str(PACKAGE_DIR.parent / "pipeline" / "embed.py"), "build"],
116
+ cwd=ROOT_DIR,
117
+ log_file=LOG_DIR / "embed.log",
118
+ allowed_actions=["start", "status"],
119
+ ),
120
+ }
121
+
122
+
123
+ def default_state():
124
+ return {"services": {service_id: {
125
+ "service_id": service_id,
126
+ "status": "stopped",
127
+ "pid": None,
128
+ "exit_code": None,
129
+ "started_at": None,
130
+ "updated_at": None,
131
+ "last_error": None,
132
+ } for service_id in SERVICE_SPECS}}
133
+
134
+
135
+ class PMFKernelError(Exception):
136
+ pass
137
+
138
+
139
+ class PMFKernel:
140
+ def __init__(self):
141
+ self.state_path = RUNTIME_FILE
142
+ self.state = self._load_state()
143
+
144
+ def _load_state(self):
145
+ if self.state_path.exists():
146
+ try:
147
+ return json.loads(self.state_path.read_text())
148
+ except Exception:
149
+ pass
150
+ state = default_state()
151
+ self._save_state(state)
152
+ return state
153
+
154
+ def _save_state(self, state=None):
155
+ if state is None:
156
+ state = self.state
157
+ self.state_path.write_text(json.dumps(state, indent=2))
158
+
159
+ def _touch_state(self, service_id: str, **updates):
160
+ svc_state = self.state["services"].get(service_id)
161
+ if svc_state is None:
162
+ raise PMFKernelError(f"Unknown service: {service_id}")
163
+ svc_state.update(updates)
164
+ svc_state["updated_at"] = now_iso()
165
+ self._save_state()
166
+ return svc_state
167
+
168
+ def _is_process_alive(self, pid: int) -> bool:
169
+ try:
170
+ os.kill(pid, 0)
171
+ return True
172
+ except OSError:
173
+ return False
174
+
175
+ def _refresh_service(self, service_id: str):
176
+ spec = SERVICE_SPECS[service_id]
177
+ state = self.state["services"][service_id]
178
+
179
+ pid = state.get("pid")
180
+ if pid and self._is_process_alive(pid):
181
+ if state["status"] not in ["running", "starting"]:
182
+ state["status"] = "running"
183
+ state["updated_at"] = now_iso()
184
+ return state
185
+
186
+ if spec.pid_file and spec.pid_file.exists():
187
+ try:
188
+ file_pid = int(spec.pid_file.read_text().strip())
189
+ if self._is_process_alive(file_pid):
190
+ state["pid"] = file_pid
191
+ state["status"] = "running"
192
+ state["updated_at"] = now_iso()
193
+ self._save_state()
194
+ return state
195
+ except Exception:
196
+ pass
197
+
198
+ if pid and not self._is_process_alive(pid):
199
+ state["status"] = "stopped"
200
+ state["pid"] = None
201
+ self._save_state()
202
+
203
+ return state
204
+
205
+ def service_spec(self, service_id: str) -> ServiceSpec:
206
+ spec = SERVICE_SPECS.get(service_id)
207
+ if not spec:
208
+ raise PMFKernelError(f"Unknown service: {service_id}")
209
+ return spec
210
+
211
+ def services(self) -> List[Dict]:
212
+ results = []
213
+ for service_id in SERVICE_SPECS:
214
+ spec = SERVICE_SPECS[service_id]
215
+ state = self._refresh_service(service_id)
216
+ results.append({
217
+ "service_id": service_id,
218
+ "label": spec.label,
219
+ "description": spec.description,
220
+ "service_type": spec.service_type,
221
+ "status": state["status"],
222
+ "pid": state.get("pid"),
223
+ "exit_code": state.get("exit_code"),
224
+ "started_at": state.get("started_at"),
225
+ "updated_at": state.get("updated_at"),
226
+ "log_file": str(spec.log_file) if spec.log_file else None,
227
+ "allowed_actions": spec.allowed_actions,
228
+ })
229
+ return results
230
+
231
+ def service_status(self, service_id: str) -> Dict:
232
+ spec = self.service_spec(service_id)
233
+ state = self._refresh_service(service_id)
234
+ return {
235
+ "service_id": service_id,
236
+ "label": spec.label,
237
+ "description": spec.description,
238
+ "service_type": spec.service_type,
239
+ "status": state["status"],
240
+ "pid": state.get("pid"),
241
+ "exit_code": state.get("exit_code"),
242
+ "started_at": state.get("started_at"),
243
+ "updated_at": state.get("updated_at"),
244
+ "log_file": str(spec.log_file) if spec.log_file else None,
245
+ "allowed_actions": spec.allowed_actions,
246
+ }
247
+
248
+ def start_service(self, service_id: str, options: Dict[str, str] = None) -> Dict:
249
+ spec = self.service_spec(service_id)
250
+ state = self.state["services"][service_id]
251
+
252
+ if service_id in ["watcher", "ui"] and spec.pid_file and spec.pid_file.exists():
253
+ try:
254
+ existing_pid = int(spec.pid_file.read_text().strip())
255
+ if self._is_process_alive(existing_pid):
256
+ raise PMFKernelError(f"{spec.label} is already running (pid={existing_pid})")
257
+ spec.pid_file.unlink(missing_ok=True)
258
+ except ValueError:
259
+ spec.pid_file.unlink(missing_ok=True)
260
+
261
+ command = spec.build_command(options or {})
262
+ log_file = spec.log_file or LOG_DIR / f"{service_id}.log"
263
+ with open(log_file, "a") as fh:
264
+ proc = subprocess.Popen(
265
+ command,
266
+ cwd=spec.cwd or ROOT_DIR,
267
+ env={**os.environ, **(spec.env or {}), "PYTHONPATH": str(ROOT_DIR)},
268
+ stdout=fh,
269
+ stderr=fh,
270
+ preexec_fn=os.setsid if spec.service_type == "daemon" else None,
271
+ )
272
+
273
+ state["pid"] = proc.pid
274
+ state["status"] = "starting"
275
+ state["started_at"] = now_iso()
276
+ state["exit_code"] = None
277
+ state["last_error"] = None
278
+ state["updated_at"] = now_iso()
279
+ self._save_state()
280
+
281
+ if spec.pid_file:
282
+ wait_seconds = 0.0
283
+ while wait_seconds < 3.0:
284
+ if spec.pid_file.exists():
285
+ try:
286
+ pid = int(spec.pid_file.read_text().strip())
287
+ if self._is_process_alive(pid):
288
+ state["pid"] = pid
289
+ state["status"] = "running"
290
+ state["updated_at"] = now_iso()
291
+ self._save_state()
292
+ return self.service_status(service_id)
293
+ except Exception:
294
+ pass
295
+ time.sleep(0.25)
296
+ wait_seconds += 0.25
297
+
298
+ if spec.service_type == "daemon":
299
+ state["status"] = "running"
300
+ else:
301
+ state["status"] = "running"
302
+ self._save_state()
303
+ return self.service_status(service_id)
304
+
305
+ def stop_service(self, service_id: str) -> Dict:
306
+ spec = self.service_spec(service_id)
307
+ state = self.state["services"][service_id]
308
+
309
+ pid = None
310
+ if spec.pid_file and spec.pid_file.exists():
311
+ try:
312
+ pid = int(spec.pid_file.read_text().strip())
313
+ except Exception:
314
+ pid = None
315
+
316
+ if pid is None:
317
+ pid = state.get("pid")
318
+
319
+ if pid is None:
320
+ raise PMFKernelError(f"No running PID found for {spec.label}")
321
+
322
+ if not self._is_process_alive(pid):
323
+ state["status"] = "stopped"
324
+ state["pid"] = None
325
+ self._save_state()
326
+ return self.service_status(service_id)
327
+
328
+ try:
329
+ os.kill(pid, signal.SIGTERM)
330
+ time.sleep(0.5)
331
+ except OSError as exc:
332
+ raise PMFKernelError(f"Failed to stop {spec.label}: {exc}")
333
+
334
+ if spec.pid_file and spec.pid_file.exists():
335
+ spec.pid_file.unlink(missing_ok=True)
336
+
337
+ state["status"] = "stopped"
338
+ state["pid"] = None
339
+ state["updated_at"] = now_iso()
340
+ self._save_state()
341
+ return self.service_status(service_id)
342
+
343
+ def restart_service(self, service_id: str, options: Dict[str, str] = None) -> Dict:
344
+ self.stop_service(service_id)
345
+ time.sleep(0.5)
346
+ return self.start_service(service_id, options=options or {})
347
+
348
+ def tail_log(self, service_id: str, lines: int = 200) -> str:
349
+ spec = self.service_spec(service_id)
350
+ if not spec.log_file or not spec.log_file.exists():
351
+ return ""
352
+ with open(spec.log_file, "rb") as fh:
353
+ fh.seek(0, os.SEEK_END)
354
+ pos = fh.tell()
355
+ chunk = bytearray()
356
+ while pos > 0 and len(chunk) < 8192 * lines:
357
+ step = min(4096, pos)
358
+ pos -= step
359
+ fh.seek(pos)
360
+ chunk[:0] = fh.read(step)
361
+ if chunk.count(b"\n") > lines:
362
+ break
363
+ text = chunk.decode("utf-8", errors="replace")
364
+ return "\n".join(text.strip().splitlines()[-lines:])