code-data-ark 2.0.2__tar.gz → 2.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/PKG-INFO +15 -13
- code_data_ark-2.0.4/cda/kernel/paths.py +54 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/cda/kernel/pmf_kernel.py +153 -26
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/cda/kernel/selfcheck.py +4 -7
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/cda/pipeline/embed.py +2 -4
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/cda/pipeline/extract.py +1 -3
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/cda/pipeline/ingest.py +5 -3
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/cda/pipeline/parse_edits.py +1 -5
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/cda/pipeline/reconstruct.py +2 -3
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/cda/pipeline/watcher.py +3 -6
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/cda/ui/cli.py +163 -41
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/cda/ui/web.py +6 -10
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/changelog.md +24 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/pyproject.toml +1 -1
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/readme.md +14 -12
- code_data_ark-2.0.4/version +1 -0
- code_data_ark-2.0.2/version +0 -1
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/.flake8 +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/.github/workflows/ci.yml +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/.gitignore +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/bin/release.py +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/cda/__init__.py +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/cda/kernel/__init__.py +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/cda/kernel/control_db.py +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/cda/pipeline/__init__.py +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/cda/ui/__init__.py +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/contributing.md +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/docs/architecture.md +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/docs/examples/usage.md +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/docs/pmf_kernel.md +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/docs/roadmap.md +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/license +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/makefile +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/tests/test_basic.py +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.4}/tests/test_selfcheck.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: code-data-ark
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.4
|
|
4
4
|
Summary: Code Data Ark — local observability and intelligence platform for VS Code + Copilot Chat sessions
|
|
5
5
|
Project-URL: Homepage, https://github.com/goCosmix/cda
|
|
6
6
|
Project-URL: Repository, https://github.com/goCosmix/cda.git
|
|
@@ -121,39 +121,41 @@ make install-dev
|
|
|
121
121
|
|
|
122
122
|
## ⚡ Quick Start
|
|
123
123
|
|
|
124
|
-
1. **
|
|
124
|
+
1. **Install**
|
|
125
125
|
|
|
126
126
|
```bash
|
|
127
|
-
|
|
127
|
+
pip install code-data-ark
|
|
128
128
|
```
|
|
129
129
|
|
|
130
|
-
2. **
|
|
130
|
+
2. **Initialize — create `~/.cda/` and validate your VS Code data path**
|
|
131
131
|
|
|
132
132
|
```bash
|
|
133
|
-
cda
|
|
133
|
+
cda init
|
|
134
134
|
```
|
|
135
135
|
|
|
136
|
-
3. **
|
|
136
|
+
3. **Ingest all VS Code session data**
|
|
137
137
|
|
|
138
138
|
```bash
|
|
139
|
-
cda
|
|
139
|
+
cda sync
|
|
140
140
|
```
|
|
141
141
|
|
|
142
|
-
4. **
|
|
142
|
+
4. **Start the live watcher daemon**
|
|
143
143
|
|
|
144
144
|
```bash
|
|
145
|
-
cda
|
|
145
|
+
cda watch start
|
|
146
146
|
```
|
|
147
147
|
|
|
148
|
-
|
|
148
|
+
5. **Open the web dashboard**
|
|
149
149
|
|
|
150
150
|
```bash
|
|
151
|
-
cda
|
|
151
|
+
cda serve # → http://127.0.0.1:10001
|
|
152
152
|
```
|
|
153
153
|
|
|
154
|
-
|
|
154
|
+
6. **Build semantic intelligence** (optional, requires `sentence-transformers`)
|
|
155
155
|
|
|
156
|
-
|
|
156
|
+
```bash
|
|
157
|
+
cda embed build
|
|
158
|
+
```
|
|
157
159
|
|
|
158
160
|
## 🌐 Web UI
|
|
159
161
|
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""
|
|
2
|
+
cda.kernel.paths — canonical path resolution for Code Data Ark.
|
|
3
|
+
|
|
4
|
+
CDA_HOME is the single root for all runtime state (DB, PID files, logs,
|
|
5
|
+
queue, PMF runtime). It is resolved exactly once at import time via:
|
|
6
|
+
|
|
7
|
+
1. CDA_HOME environment variable (absolute path)
|
|
8
|
+
2. ~/.cda/ (default — survives pip install, editable install, CI)
|
|
9
|
+
|
|
10
|
+
Pipeline stages and the CLI all import from here so every module agrees
|
|
11
|
+
on the same paths regardless of where the package is installed.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
# ── home resolution ──────────────────────────────────────────────────────────
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_cda_home() -> Path:
|
|
21
|
+
"""Return the CDA home directory, creating it if it doesn't exist."""
|
|
22
|
+
env = os.environ.get("CDA_HOME")
|
|
23
|
+
if env:
|
|
24
|
+
home = Path(env).expanduser().resolve()
|
|
25
|
+
else:
|
|
26
|
+
home = Path.home() / ".cda"
|
|
27
|
+
home.mkdir(parents=True, exist_ok=True)
|
|
28
|
+
return home
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# ── canonical paths (module-level constants, computed once) ─────────────────
|
|
32
|
+
|
|
33
|
+
CDA_HOME = get_cda_home()
|
|
34
|
+
LOCAL_DIR = CDA_HOME # CDA_HOME *is* the local root
|
|
35
|
+
DATA_DIR = CDA_HOME / "data"
|
|
36
|
+
RUN_DIR = CDA_HOME / "run"
|
|
37
|
+
LOG_DIR = CDA_HOME / "logs"
|
|
38
|
+
QUEUE_DIR = CDA_HOME / "queue"
|
|
39
|
+
PMF_DIR = CDA_HOME / "pmf"
|
|
40
|
+
CONFIG_DIR = CDA_HOME / "config"
|
|
41
|
+
|
|
42
|
+
DB_PATH = DATA_DIR / "cda.db"
|
|
43
|
+
PID_FILE = RUN_DIR / "watcher.pid"
|
|
44
|
+
UI_PID_FILE = RUN_DIR / "ui.pid"
|
|
45
|
+
UI_LOG_FILE = LOG_DIR / "ui.log"
|
|
46
|
+
POLICY_FILE = CONFIG_DIR / "policy.txt"
|
|
47
|
+
PMF_LOG_DIR = PMF_DIR / "logs"
|
|
48
|
+
RUNTIME_FILE = PMF_DIR / "runtime.json"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def ensure_dirs() -> None:
|
|
52
|
+
"""Create all runtime directories. Safe to call multiple times."""
|
|
53
|
+
for d in (DATA_DIR, RUN_DIR, LOG_DIR, QUEUE_DIR, PMF_DIR, PMF_LOG_DIR, CONFIG_DIR):
|
|
54
|
+
d.mkdir(parents=True, exist_ok=True)
|
|
@@ -1,26 +1,153 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
+
import shutil
|
|
3
4
|
import signal
|
|
5
|
+
import socket
|
|
4
6
|
import subprocess
|
|
5
7
|
import sys
|
|
8
|
+
import threading
|
|
6
9
|
import time
|
|
10
|
+
import webbrowser
|
|
7
11
|
from dataclasses import dataclass
|
|
8
12
|
from pathlib import Path
|
|
9
13
|
from typing import Dict, List, Optional
|
|
10
14
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
UI_PID_FILE = LOCAL_DIR / "run" / "ui.pid"
|
|
15
|
+
from cda.kernel.paths import (
|
|
16
|
+
LOG_DIR, RUNTIME_FILE, PMF_LOG_DIR,
|
|
17
|
+
PID_FILE as WATCHER_PID_FILE, UI_PID_FILE, CDA_HOME,
|
|
18
|
+
ensure_dirs,
|
|
19
|
+
)
|
|
20
|
+
|
|
18
21
|
DEFAULT_HOST = "127.0.0.1"
|
|
19
22
|
DEFAULT_PORT = 10001
|
|
20
23
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
+
# ── launchd integration ──────────────────────────────────────────────────────
|
|
25
|
+
|
|
26
|
+
PLIST_LABEL = "com.gocosmix.cda"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def plist_path() -> Path:
|
|
30
|
+
return Path.home() / "Library" / "LaunchAgents" / f"{PLIST_LABEL}.plist"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def generate_plist(cda_bin: str, cda_home: Path) -> str:
|
|
34
|
+
log = cda_home / "logs" / "launchd.log"
|
|
35
|
+
return f"""<?xml version="1.0" encoding="UTF-8"?>
|
|
36
|
+
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
37
|
+
<plist version="1.0">
|
|
38
|
+
<dict>
|
|
39
|
+
<key>Label</key>
|
|
40
|
+
<string>{PLIST_LABEL}</string>
|
|
41
|
+
<key>ProgramArguments</key>
|
|
42
|
+
<array>
|
|
43
|
+
<string>{cda_bin}</string>
|
|
44
|
+
<string>pmf</string>
|
|
45
|
+
<string>up</string>
|
|
46
|
+
</array>
|
|
47
|
+
<key>RunAtLoad</key>
|
|
48
|
+
<true/>
|
|
49
|
+
<key>KeepAlive</key>
|
|
50
|
+
<false/>
|
|
51
|
+
<key>StandardOutPath</key>
|
|
52
|
+
<string>{log}</string>
|
|
53
|
+
<key>StandardErrorPath</key>
|
|
54
|
+
<string>{log}</string>
|
|
55
|
+
<key>EnvironmentVariables</key>
|
|
56
|
+
<dict>
|
|
57
|
+
<key>CDA_HOME</key>
|
|
58
|
+
<string>{cda_home}</string>
|
|
59
|
+
<key>PATH</key>
|
|
60
|
+
<string>{os.path.dirname(cda_bin)}:/usr/local/bin:/usr/bin:/bin</string>
|
|
61
|
+
</dict>
|
|
62
|
+
</dict>
|
|
63
|
+
</plist>
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def install_launchd(cda_home: Path) -> Path:
|
|
68
|
+
"""Write the LaunchAgent plist and load it with launchctl."""
|
|
69
|
+
cda_bin = shutil.which("cda")
|
|
70
|
+
if not cda_bin:
|
|
71
|
+
raise PMFKernelError("cda binary not found on PATH — cannot generate plist")
|
|
72
|
+
|
|
73
|
+
target = plist_path()
|
|
74
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
75
|
+
target.write_text(generate_plist(cda_bin, cda_home))
|
|
76
|
+
|
|
77
|
+
# Unload any stale registration first
|
|
78
|
+
subprocess.run(["launchctl", "unload", str(target)], capture_output=True)
|
|
79
|
+
|
|
80
|
+
result = subprocess.run(
|
|
81
|
+
["launchctl", "load", str(target)],
|
|
82
|
+
capture_output=True,
|
|
83
|
+
text=True,
|
|
84
|
+
)
|
|
85
|
+
if result.returncode != 0:
|
|
86
|
+
raise PMFKernelError(f"launchctl load failed: {result.stderr.strip()}")
|
|
87
|
+
|
|
88
|
+
return target
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def uninstall_launchd() -> None:
|
|
92
|
+
"""Unload and remove the LaunchAgent plist."""
|
|
93
|
+
target = plist_path()
|
|
94
|
+
if target.exists():
|
|
95
|
+
subprocess.run(["launchctl", "unload", str(target)], capture_output=True)
|
|
96
|
+
target.unlink(missing_ok=True)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def open_browser_when_ready(
|
|
100
|
+
url: str,
|
|
101
|
+
host: str = DEFAULT_HOST,
|
|
102
|
+
port: int = DEFAULT_PORT,
|
|
103
|
+
timeout: float = 12.0,
|
|
104
|
+
) -> threading.Thread:
|
|
105
|
+
"""
|
|
106
|
+
Spawn a daemon thread that polls host:port and opens a browser when ready.
|
|
107
|
+
For foreground (serve) use: the thread outlives the caller because serve blocks.
|
|
108
|
+
For background (pmf up / ui start): call wait_for_port() instead so we poll
|
|
109
|
+
synchronously before the process exits.
|
|
110
|
+
"""
|
|
111
|
+
def _wait_and_open():
|
|
112
|
+
elapsed = 0.0
|
|
113
|
+
while elapsed < timeout:
|
|
114
|
+
try:
|
|
115
|
+
with socket.create_connection((host, port), timeout=0.5):
|
|
116
|
+
webbrowser.open(url)
|
|
117
|
+
return
|
|
118
|
+
except OSError:
|
|
119
|
+
time.sleep(0.25)
|
|
120
|
+
elapsed += 0.25
|
|
121
|
+
|
|
122
|
+
t = threading.Thread(target=_wait_and_open, daemon=True)
|
|
123
|
+
t.start()
|
|
124
|
+
return t
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def wait_for_port_and_open_browser(
|
|
128
|
+
url: str,
|
|
129
|
+
host: str = DEFAULT_HOST,
|
|
130
|
+
port: int = DEFAULT_PORT,
|
|
131
|
+
timeout: float = 8.0,
|
|
132
|
+
) -> bool:
|
|
133
|
+
"""
|
|
134
|
+
Block until host:port accepts connections (or timeout), then open browser.
|
|
135
|
+
Use this when the caller process will exit after starting a background service.
|
|
136
|
+
Returns True if port came up, False on timeout.
|
|
137
|
+
"""
|
|
138
|
+
elapsed = 0.0
|
|
139
|
+
while elapsed < timeout:
|
|
140
|
+
try:
|
|
141
|
+
with socket.create_connection((host, port), timeout=0.5):
|
|
142
|
+
webbrowser.open(url)
|
|
143
|
+
return True
|
|
144
|
+
except OSError:
|
|
145
|
+
time.sleep(0.25)
|
|
146
|
+
elapsed += 0.25
|
|
147
|
+
return False
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
ensure_dirs()
|
|
24
151
|
|
|
25
152
|
|
|
26
153
|
def now_ts():
|
|
@@ -58,7 +185,7 @@ class ServiceSpec:
|
|
|
58
185
|
]
|
|
59
186
|
|
|
60
187
|
if self.service_id == "watcher":
|
|
61
|
-
return [sys.executable,
|
|
188
|
+
return [sys.executable, "-m", "cda.pipeline.watcher"]
|
|
62
189
|
|
|
63
190
|
if self.command is not None:
|
|
64
191
|
return list(self.command)
|
|
@@ -72,9 +199,9 @@ SERVICE_SPECS: Dict[str, ServiceSpec] = {
|
|
|
72
199
|
label="Watcher Daemon",
|
|
73
200
|
service_type="daemon",
|
|
74
201
|
description="Live VS Code data watcher and incremental ingest process.",
|
|
75
|
-
cwd=
|
|
202
|
+
cwd=CDA_HOME,
|
|
76
203
|
pid_file=WATCHER_PID_FILE,
|
|
77
|
-
log_file=
|
|
204
|
+
log_file=LOG_DIR / "watcher.log",
|
|
78
205
|
allowed_actions=["start", "stop", "restart", "status"],
|
|
79
206
|
),
|
|
80
207
|
"ui": ServiceSpec(
|
|
@@ -82,9 +209,9 @@ SERVICE_SPECS: Dict[str, ServiceSpec] = {
|
|
|
82
209
|
label="Web UI",
|
|
83
210
|
service_type="daemon",
|
|
84
211
|
description="Local web dashboard for Ark runtime and session analytics.",
|
|
85
|
-
cwd=
|
|
212
|
+
cwd=CDA_HOME,
|
|
86
213
|
pid_file=UI_PID_FILE,
|
|
87
|
-
log_file=
|
|
214
|
+
log_file=LOG_DIR / "ui.log",
|
|
88
215
|
allowed_actions=["start", "stop", "restart", "status"],
|
|
89
216
|
),
|
|
90
217
|
"sync": ServiceSpec(
|
|
@@ -92,9 +219,9 @@ SERVICE_SPECS: Dict[str, ServiceSpec] = {
|
|
|
92
219
|
label="Full Sync",
|
|
93
220
|
service_type="task",
|
|
94
221
|
description="Full ingest and rebuild pipeline for Ark data.",
|
|
95
|
-
command=[sys.executable,
|
|
96
|
-
cwd=
|
|
97
|
-
log_file=
|
|
222
|
+
command=[sys.executable, "-m", "cda.pipeline.ingest"],
|
|
223
|
+
cwd=CDA_HOME,
|
|
224
|
+
log_file=PMF_LOG_DIR / "sync.log",
|
|
98
225
|
allowed_actions=["start", "status"],
|
|
99
226
|
),
|
|
100
227
|
"reconstruct": ServiceSpec(
|
|
@@ -102,9 +229,9 @@ SERVICE_SPECS: Dict[str, ServiceSpec] = {
|
|
|
102
229
|
label="Reconstruct",
|
|
103
230
|
service_type="task",
|
|
104
231
|
description="Reconstruct conversations and rebuild the full text search index.",
|
|
105
|
-
command=[sys.executable,
|
|
106
|
-
cwd=
|
|
107
|
-
log_file=
|
|
232
|
+
command=[sys.executable, "-m", "cda.pipeline.reconstruct"],
|
|
233
|
+
cwd=CDA_HOME,
|
|
234
|
+
log_file=PMF_LOG_DIR / "reconstruct.log",
|
|
108
235
|
allowed_actions=["start", "status"],
|
|
109
236
|
),
|
|
110
237
|
"embed-build": ServiceSpec(
|
|
@@ -112,9 +239,9 @@ SERVICE_SPECS: Dict[str, ServiceSpec] = {
|
|
|
112
239
|
label="Embed Build",
|
|
113
240
|
service_type="task",
|
|
114
241
|
description="Build semantic embeddings and session intelligence.",
|
|
115
|
-
command=[sys.executable,
|
|
116
|
-
cwd=
|
|
117
|
-
log_file=
|
|
242
|
+
command=[sys.executable, "-m", "cda.pipeline.embed", "build"],
|
|
243
|
+
cwd=CDA_HOME,
|
|
244
|
+
log_file=PMF_LOG_DIR / "embed.log",
|
|
118
245
|
allowed_actions=["start", "status"],
|
|
119
246
|
),
|
|
120
247
|
}
|
|
@@ -263,8 +390,8 @@ class PMFKernel:
|
|
|
263
390
|
with open(log_file, "a") as fh:
|
|
264
391
|
proc = subprocess.Popen(
|
|
265
392
|
command,
|
|
266
|
-
cwd=spec.cwd or
|
|
267
|
-
env={**os.environ, **(spec.env or {})
|
|
393
|
+
cwd=spec.cwd or CDA_HOME,
|
|
394
|
+
env={**os.environ, **(spec.env or {})},
|
|
268
395
|
stdout=fh,
|
|
269
396
|
stderr=fh,
|
|
270
397
|
preexec_fn=os.setsid if spec.service_type == "daemon" else None,
|
|
@@ -26,15 +26,12 @@ import subprocess
|
|
|
26
26
|
import sys
|
|
27
27
|
from pathlib import Path
|
|
28
28
|
|
|
29
|
-
|
|
29
|
+
from cda.kernel.paths import DB_PATH, PID_FILE, QUEUE_DIR
|
|
30
|
+
|
|
30
31
|
PACKAGE_DIR = Path(__file__).resolve().parent
|
|
31
32
|
SOURCE_DIR = PACKAGE_DIR.parent.parent # source/ — tracked repo root
|
|
32
|
-
PROJECT_DIR = PACKAGE_DIR.parent.parent.parent # repo root — where layers live
|
|
33
|
-
LOCAL_DIR = PROJECT_DIR / "local"
|
|
34
|
-
DB_PATH = LOCAL_DIR / "data" / "cda.db"
|
|
35
|
-
PID_FILE = LOCAL_DIR / "run" / "watcher.pid"
|
|
36
|
-
QUEUE_DIR = LOCAL_DIR / "queue"
|
|
37
33
|
VERSION_FILE = SOURCE_DIR / "version"
|
|
34
|
+
GIT_ROOT = SOURCE_DIR.parent # repo root — used for git check-ignore
|
|
38
35
|
|
|
39
36
|
REQUIRED_TABLES = [
|
|
40
37
|
"sessions", "exchanges", "tool_calls", "vfs", "workspaces",
|
|
@@ -231,7 +228,7 @@ def check_data_gitignored():
|
|
|
231
228
|
try:
|
|
232
229
|
result = subprocess.run(
|
|
233
230
|
["git", "check-ignore", "-q", "local"],
|
|
234
|
-
cwd=
|
|
231
|
+
cwd=GIT_ROOT,
|
|
235
232
|
capture_output=True,
|
|
236
233
|
)
|
|
237
234
|
if result.returncode == 0:
|
|
@@ -10,12 +10,10 @@ This stage builds semantic embeddings and mini-intelligence artifacts:
|
|
|
10
10
|
|
|
11
11
|
import json
|
|
12
12
|
import sqlite3
|
|
13
|
-
from pathlib import Path
|
|
14
13
|
from typing import Dict, List, Optional, Tuple
|
|
15
14
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
DB_PATH = LOCAL_DIR / "data" / "cda.db"
|
|
15
|
+
from cda.kernel.paths import DB_PATH
|
|
16
|
+
|
|
19
17
|
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
|
|
20
18
|
MAX_EMBED_TEXT = 1400
|
|
21
19
|
|
|
@@ -26,9 +26,7 @@ from datetime import datetime
|
|
|
26
26
|
from typing import Dict, List, Tuple, DefaultDict
|
|
27
27
|
from collections import defaultdict
|
|
28
28
|
|
|
29
|
-
|
|
30
|
-
LOCAL_DIR = ROOT_DIR / "local"
|
|
31
|
-
DB_PATH = LOCAL_DIR / "data" / "cda.db"
|
|
29
|
+
from cda.kernel.paths import DB_PATH
|
|
32
30
|
|
|
33
31
|
# ─────────────────────────────────────────────────────────
|
|
34
32
|
# Signal patterns
|
|
@@ -26,6 +26,11 @@ import time
|
|
|
26
26
|
import logging
|
|
27
27
|
from pathlib import Path
|
|
28
28
|
|
|
29
|
+
from cda.kernel.paths import DB_PATH, ensure_dirs
|
|
30
|
+
|
|
31
|
+
# Ensure local dirs are present before writing
|
|
32
|
+
ensure_dirs()
|
|
33
|
+
|
|
29
34
|
# Set up logging
|
|
30
35
|
logging.basicConfig(
|
|
31
36
|
level=logging.INFO,
|
|
@@ -39,9 +44,6 @@ HOME = Path.home()
|
|
|
39
44
|
VSCODE_DATA_DIR = Path(os.environ.get("VSCODE_DATA_DIR", HOME / "Library/Application Support/Code/User"))
|
|
40
45
|
VS_STORAGE = VSCODE_DATA_DIR / "workspaceStorage"
|
|
41
46
|
GLOBAL_MEM = VSCODE_DATA_DIR / "globalStorage/github.copilot-chat/memory-tool/memories"
|
|
42
|
-
ROOT_DIR = Path(__file__).resolve().parent.parent.parent.parent
|
|
43
|
-
LOCAL_DIR = ROOT_DIR / "local"
|
|
44
|
-
DB_PATH = LOCAL_DIR / "data" / "cda.db"
|
|
45
47
|
|
|
46
48
|
# Large index DBs — too big to blob, record path only
|
|
47
49
|
SKIP_BLOB_PATTERNS = ["workspace-chunks.db", "local-index"]
|
|
@@ -33,11 +33,7 @@ Edit rounds: len(checkpoints) - 1 (first is always "Initial State")
|
|
|
33
33
|
import sqlite3
|
|
34
34
|
import gzip
|
|
35
35
|
import json
|
|
36
|
-
from
|
|
37
|
-
|
|
38
|
-
ROOT_DIR = Path(__file__).resolve().parent.parent.parent.parent
|
|
39
|
-
LOCAL_DIR = ROOT_DIR / "local"
|
|
40
|
-
DB_PATH = LOCAL_DIR / "data" / "cda.db"
|
|
36
|
+
from cda.kernel.paths import DB_PATH
|
|
41
37
|
|
|
42
38
|
SCHEMA = """
|
|
43
39
|
CREATE TABLE IF NOT EXISTS edit_sessions (
|
|
@@ -19,9 +19,8 @@ import time
|
|
|
19
19
|
from typing import Optional
|
|
20
20
|
from pathlib import Path
|
|
21
21
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
DB_PATH = LOCAL_DIR / "data" / "cda.db"
|
|
22
|
+
from cda.kernel.paths import DB_PATH
|
|
23
|
+
|
|
25
24
|
NOW_MS = int(time.time() * 1000)
|
|
26
25
|
|
|
27
26
|
EXCHANGES_SCHEMA = """
|
|
@@ -38,17 +38,14 @@ except ImportError:
|
|
|
38
38
|
print("ERROR: watchfiles not installed. Run: pip install watchfiles")
|
|
39
39
|
sys.exit(1)
|
|
40
40
|
|
|
41
|
-
|
|
42
|
-
LOCAL_DIR = ROOT_DIR / "local"
|
|
43
|
-
DB_PATH = LOCAL_DIR / "data" / "cda.db"
|
|
44
|
-
PID_FILE = LOCAL_DIR / "run" / "watcher.pid"
|
|
45
|
-
QUEUE_DIR = LOCAL_DIR / "queue"
|
|
41
|
+
from cda.kernel.paths import DB_PATH, PID_FILE, QUEUE_DIR, LOG_DIR, ensure_dirs
|
|
46
42
|
# Allow override via env var for portability
|
|
47
43
|
VSCODE_DATA_DIR = Path(os.environ.get("VSCODE_DATA_DIR", Path.home() / "Library/Application Support/Code/User"))
|
|
48
44
|
VS_ROOT = VSCODE_DATA_DIR / "workspaceStorage"
|
|
49
45
|
GLOBAL_MEM = VSCODE_DATA_DIR / "globalStorage/github.copilot-chat/memory-tool/memories"
|
|
50
46
|
|
|
51
|
-
|
|
47
|
+
ensure_dirs()
|
|
48
|
+
log_file = LOG_DIR / "watcher.log"
|
|
52
49
|
logging.basicConfig(
|
|
53
50
|
level=logging.INFO,
|
|
54
51
|
format="%(asctime)s %(levelname)-7s %(message)s",
|
|
@@ -24,7 +24,11 @@ Commands:
|
|
|
24
24
|
cda pmf stop <service> Stop a service
|
|
25
25
|
cda pmf restart <service> Restart a service
|
|
26
26
|
cda pmf logs <service> Tail service logs
|
|
27
|
+
cda pmf up Start watcher + web UI (opens browser when ready)
|
|
28
|
+
cda pmf install Register as macOS LaunchAgent (auto-start on login)
|
|
29
|
+
cda pmf uninstall Remove the LaunchAgent registration
|
|
27
30
|
cda check Run a full self-diagnostic. The system checks itself.
|
|
31
|
+
cda init First-run setup — create ~/.cda/ and validate environment
|
|
28
32
|
cda serve Start the local web UI on port 10001
|
|
29
33
|
cda sync Full re-ingest from disk (rebuilds entire DB)
|
|
30
34
|
cda reconstruct Re-run reconstruction and FTS rebuild only
|
|
@@ -61,23 +65,20 @@ import textwrap
|
|
|
61
65
|
import datetime
|
|
62
66
|
from pathlib import Path
|
|
63
67
|
from cda.pipeline.reconstruct import decompress_vfs
|
|
64
|
-
from cda.kernel.pmf_kernel import
|
|
68
|
+
from cda.kernel.pmf_kernel import (
|
|
69
|
+
PMFKernel, PMFKernelError,
|
|
70
|
+
install_launchd, uninstall_launchd, plist_path,
|
|
71
|
+
open_browser_when_ready, wait_for_port_and_open_browser,
|
|
72
|
+
)
|
|
73
|
+
from cda.kernel.paths import (
|
|
74
|
+
DB_PATH, PID_FILE, UI_PID_FILE, UI_LOG_FILE,
|
|
75
|
+
QUEUE_DIR, POLICY_FILE, ensure_dirs,
|
|
76
|
+
)
|
|
65
77
|
|
|
66
78
|
import click
|
|
67
79
|
|
|
68
|
-
#
|
|
69
|
-
|
|
70
|
-
ARK_DIR = PACKAGE_DIR.parent.parent.parent
|
|
71
|
-
LOCAL_DIR = ARK_DIR / "local"
|
|
72
|
-
DB_PATH = LOCAL_DIR / "data" / "cda.db"
|
|
73
|
-
PID_FILE = LOCAL_DIR / "run" / "watcher.pid"
|
|
74
|
-
UI_PID_FILE = LOCAL_DIR / "run" / "ui.pid"
|
|
75
|
-
UI_LOG_FILE = LOCAL_DIR / "logs" / "ui.log"
|
|
76
|
-
WATCHER = PACKAGE_DIR.parent / "pipeline" / "watcher.py"
|
|
77
|
-
INGEST = PACKAGE_DIR.parent / "pipeline" / "ingest.py"
|
|
78
|
-
RECON = PACKAGE_DIR.parent / "pipeline" / "reconstruct.py"
|
|
79
|
-
EXTRACT = PACKAGE_DIR.parent / "pipeline" / "extract.py"
|
|
80
|
-
EMBED = PACKAGE_DIR.parent / "pipeline" / "embed.py"
|
|
80
|
+
# Ensure runtime dirs exist on every CLI invocation
|
|
81
|
+
ensure_dirs()
|
|
81
82
|
|
|
82
83
|
kernel = PMFKernel()
|
|
83
84
|
|
|
@@ -333,14 +334,13 @@ def status():
|
|
|
333
334
|
click.echo(f" Start with: {bold('cda watch start')}")
|
|
334
335
|
|
|
335
336
|
# Queue status
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
completed = len(list(queue_dir.glob("*.completed")))
|
|
337
|
+
if QUEUE_DIR.exists():
|
|
338
|
+
pending = len(list(QUEUE_DIR.glob("*.json")))
|
|
339
|
+
completed = len(list(QUEUE_DIR.glob("*.completed")))
|
|
340
340
|
click.echo(f" Queue: {pending} pending, {completed} completed")
|
|
341
341
|
if pending > 0:
|
|
342
342
|
# Show last pending operation
|
|
343
|
-
pending_files = sorted(
|
|
343
|
+
pending_files = sorted(QUEUE_DIR.glob("*.json"))
|
|
344
344
|
if pending_files:
|
|
345
345
|
try:
|
|
346
346
|
data = json.loads(pending_files[-1].read_text())
|
|
@@ -368,10 +368,11 @@ def status():
|
|
|
368
368
|
@cli.command("serve")
|
|
369
369
|
@click.option("--host", default="127.0.0.1", show_default=True, help="Local host to bind the web UI")
|
|
370
370
|
@click.option("--port", default=10001, show_default=True, help="Local port for the web UI")
|
|
371
|
-
|
|
371
|
+
@click.option("--no-browser", "no_browser", is_flag=True, default=False, help="Don't open browser automatically")
|
|
372
|
+
def serve(host, port, no_browser):
|
|
372
373
|
"""Start the local web UI for Code Data Ark in the foreground."""
|
|
373
|
-
|
|
374
|
-
click.echo(yellow("
|
|
374
|
+
url = f"http://{host}:{port}"
|
|
375
|
+
click.echo(yellow(f" Starting local web UI at {url}"))
|
|
375
376
|
try:
|
|
376
377
|
import importlib
|
|
377
378
|
import cda.ui.web as web
|
|
@@ -380,6 +381,8 @@ def serve(host, port):
|
|
|
380
381
|
click.echo(red(" Failed to start web UI. Ensure the package is installed and importable."))
|
|
381
382
|
click.echo(red(f" Details: {exc}"))
|
|
382
383
|
return
|
|
384
|
+
if not no_browser:
|
|
385
|
+
open_browser_when_ready(url, host, port)
|
|
383
386
|
web.start_server(host=host, port=port)
|
|
384
387
|
|
|
385
388
|
|
|
@@ -403,12 +406,17 @@ def _ui_is_running():
|
|
|
403
406
|
@ui.command("start")
|
|
404
407
|
@click.option("--host", default="127.0.0.1", show_default=True, help="Local host to bind the web UI")
|
|
405
408
|
@click.option("--port", default=10001, show_default=True, help="Local port for the web UI")
|
|
406
|
-
|
|
409
|
+
@click.option("--no-browser", "no_browser", is_flag=True, default=False, help="Don't open browser automatically")
|
|
410
|
+
def ui_start(host, port, no_browser):
|
|
407
411
|
"""Start the web UI as a background service."""
|
|
408
412
|
try:
|
|
409
413
|
result = kernel.start_service("ui", options={"host": host, "port": port})
|
|
410
|
-
|
|
414
|
+
url = f"http://{host}:{port}"
|
|
415
|
+
click.echo(green(f" Web UI started in background at {url} pid={result['pid']}"))
|
|
411
416
|
click.echo(yellow(f" Logs: {UI_LOG_FILE}"))
|
|
417
|
+
if not no_browser:
|
|
418
|
+
click.echo(dim(" Opening browser when server is ready..."))
|
|
419
|
+
wait_for_port_and_open_browser(url, host, port)
|
|
412
420
|
except PMFKernelError as exc:
|
|
413
421
|
click.echo(red(f" Failed to start UI: {exc}"))
|
|
414
422
|
|
|
@@ -492,12 +500,17 @@ def pmf_status(service_id):
|
|
|
492
500
|
@click.argument("service_id")
|
|
493
501
|
@click.option("--host", default="127.0.0.1", help="Host override for UI service")
|
|
494
502
|
@click.option("--port", default=10001, help="Port override for UI service")
|
|
495
|
-
|
|
503
|
+
@click.option("--no-browser", "no_browser", is_flag=True, default=False, help="Don't open browser (UI service only)")
|
|
504
|
+
def pmf_start(service_id, host, port, no_browser):
|
|
496
505
|
"""Start a PMF-managed Ark service."""
|
|
497
506
|
options = {"host": host, "port": port} if service_id == "ui" else None
|
|
498
507
|
try:
|
|
499
508
|
result = kernel.start_service(service_id, options=options)
|
|
500
509
|
click.echo(green(f" Started {result['label']} pid={result['pid']}"))
|
|
510
|
+
if service_id == "ui" and not no_browser:
|
|
511
|
+
url = f"http://{host}:{port}"
|
|
512
|
+
click.echo(dim(" Opening browser when server is ready..."))
|
|
513
|
+
wait_for_port_and_open_browser(url, host, port)
|
|
501
514
|
except PMFKernelError as exc:
|
|
502
515
|
click.echo(red(f" {exc}"))
|
|
503
516
|
|
|
@@ -536,6 +549,69 @@ def pmf_logs(service_id, tail):
|
|
|
536
549
|
click.echo(red(f" {exc}"))
|
|
537
550
|
|
|
538
551
|
|
|
552
|
+
@pmf.command("up")
|
|
553
|
+
@click.option("--host", default="127.0.0.1", show_default=True, help="Host for web UI")
|
|
554
|
+
@click.option("--port", default=10001, show_default=True, help="Port for web UI")
|
|
555
|
+
@click.option("--no-browser", "no_browser", is_flag=True, default=False, help="Don't open browser when UI is ready")
|
|
556
|
+
def pmf_up(host, port, no_browser):
|
|
557
|
+
"""Start all CDA services (watcher + web UI). Called automatically by launchd on login."""
|
|
558
|
+
url = f"http://{host}:{port}"
|
|
559
|
+
|
|
560
|
+
click.echo(bold(" Code Data Ark — starting services"))
|
|
561
|
+
click.echo(hr())
|
|
562
|
+
|
|
563
|
+
try:
|
|
564
|
+
result = kernel.start_service("watcher")
|
|
565
|
+
click.echo(green(f" Watcher started pid={result['pid']}"))
|
|
566
|
+
except PMFKernelError as exc:
|
|
567
|
+
click.echo(yellow(f" Watcher {exc}"))
|
|
568
|
+
|
|
569
|
+
try:
|
|
570
|
+
result = kernel.start_service("ui", options={"host": host, "port": port})
|
|
571
|
+
click.echo(green(f" Web UI started pid={result['pid']} → {url}"))
|
|
572
|
+
if not no_browser:
|
|
573
|
+
click.echo(dim(" Opening browser when server is ready..."))
|
|
574
|
+
wait_for_port_and_open_browser(url, host, port)
|
|
575
|
+
except PMFKernelError as exc:
|
|
576
|
+
click.echo(yellow(f" Web UI {exc}"))
|
|
577
|
+
|
|
578
|
+
click.echo()
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
@pmf.command("install")
|
|
582
|
+
def pmf_install():
|
|
583
|
+
"""Install CDA as a macOS launchd LaunchAgent (auto-start on login)."""
|
|
584
|
+
from cda.kernel.paths import CDA_HOME as _cda_home
|
|
585
|
+
click.echo()
|
|
586
|
+
click.echo(bold(" Installing CDA LaunchAgent"))
|
|
587
|
+
click.echo(hr())
|
|
588
|
+
try:
|
|
589
|
+
target = install_launchd(_cda_home)
|
|
590
|
+
click.echo(green(f" Plist: {target}"))
|
|
591
|
+
click.echo(green(" Label: com.gocosmix.cda"))
|
|
592
|
+
click.echo(green(" Loaded: yes — CDA will start automatically on next login"))
|
|
593
|
+
click.echo()
|
|
594
|
+
click.echo(dim(" To start services now without logging out:"))
|
|
595
|
+
click.echo(dim(" cda pmf up"))
|
|
596
|
+
click.echo()
|
|
597
|
+
except PMFKernelError as exc:
|
|
598
|
+
click.echo(red(f" {exc}"))
|
|
599
|
+
click.echo(yellow(" Make sure `cda` is on PATH: export PATH=\"$HOME/Library/Python/3.9/bin:$PATH\""))
|
|
600
|
+
click.echo()
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
@pmf.command("uninstall")
|
|
604
|
+
def pmf_uninstall():
|
|
605
|
+
"""Remove the CDA launchd LaunchAgent."""
|
|
606
|
+
target = plist_path()
|
|
607
|
+
if not target.exists():
|
|
608
|
+
click.echo(yellow(" No LaunchAgent plist found — nothing to uninstall."))
|
|
609
|
+
return
|
|
610
|
+
uninstall_launchd()
|
|
611
|
+
click.echo(green(f" Removed: {target}"))
|
|
612
|
+
click.echo(green(" CDA will no longer start automatically on login."))
|
|
613
|
+
|
|
614
|
+
|
|
539
615
|
@cli.group()
|
|
540
616
|
def embed():
|
|
541
617
|
"""Build and inspect semantic intelligence."""
|
|
@@ -546,7 +622,7 @@ def embed():
|
|
|
546
622
|
def embed_build():
|
|
547
623
|
"""Build semantic embeddings and session intelligence."""
|
|
548
624
|
click.echo(yellow(" Building semantic intelligence..."))
|
|
549
|
-
result = subprocess.run([sys.executable,
|
|
625
|
+
result = subprocess.run([sys.executable, "-m", "cda.pipeline.embed"], capture_output=False)
|
|
550
626
|
if result.returncode == 0:
|
|
551
627
|
click.echo(green(" Embed build complete"))
|
|
552
628
|
else:
|
|
@@ -754,7 +830,7 @@ def sync():
|
|
|
754
830
|
errors = 0
|
|
755
831
|
|
|
756
832
|
click.echo(yellow(" Running full ingest — this rewrites the DB..."))
|
|
757
|
-
result = subprocess.run([sys.executable,
|
|
833
|
+
result = subprocess.run([sys.executable, "-m", "cda.pipeline.ingest"], capture_output=False)
|
|
758
834
|
if result.returncode != 0:
|
|
759
835
|
click.echo(red(" Ingest failed"))
|
|
760
836
|
finish_run(run_id, stages_done, {}, errors=1, exit_code=1, notes="ingest failed")
|
|
@@ -763,7 +839,7 @@ def sync():
|
|
|
763
839
|
|
|
764
840
|
click.echo(green(" Ingest complete"))
|
|
765
841
|
click.echo(yellow(" Running reconstruction..."))
|
|
766
|
-
result = subprocess.run([sys.executable,
|
|
842
|
+
result = subprocess.run([sys.executable, "-m", "cda.pipeline.reconstruct"], capture_output=False)
|
|
767
843
|
if result.returncode != 0:
|
|
768
844
|
click.echo(red(" Reconstruction failed"))
|
|
769
845
|
finish_run(run_id, stages_done, {}, errors=1, exit_code=1, notes="reconstruct failed")
|
|
@@ -772,7 +848,7 @@ def sync():
|
|
|
772
848
|
|
|
773
849
|
click.echo(green(" Reconstruction complete"))
|
|
774
850
|
click.echo(yellow(" Running analysis..."))
|
|
775
|
-
result = subprocess.run([sys.executable,
|
|
851
|
+
result = subprocess.run([sys.executable, "-m", "cda.pipeline.extract"], capture_output=False)
|
|
776
852
|
if result.returncode != 0:
|
|
777
853
|
click.echo(red(" Analysis failed"))
|
|
778
854
|
finish_run(run_id, stages_done, {}, errors=1, exit_code=1, notes="extract failed")
|
|
@@ -781,7 +857,7 @@ def sync():
|
|
|
781
857
|
|
|
782
858
|
click.echo(green(" Analysis complete"))
|
|
783
859
|
click.echo(yellow(" Running semantic intelligence..."))
|
|
784
|
-
result = subprocess.run([sys.executable,
|
|
860
|
+
result = subprocess.run([sys.executable, "-m", "cda.pipeline.embed"], capture_output=False)
|
|
785
861
|
if result.returncode != 0:
|
|
786
862
|
click.echo(red(" Semantic intelligence failed"))
|
|
787
863
|
errors += 1
|
|
@@ -809,7 +885,7 @@ def sync():
|
|
|
809
885
|
def reconstruct():
|
|
810
886
|
"""Re-run session reconstruction and FTS rebuild only."""
|
|
811
887
|
click.echo(yellow(" Reconstructing exchanges..."))
|
|
812
|
-
subprocess.run([sys.executable,
|
|
888
|
+
subprocess.run([sys.executable, "-m", "cda.pipeline.reconstruct"], capture_output=False)
|
|
813
889
|
click.echo(green(" Done"))
|
|
814
890
|
|
|
815
891
|
|
|
@@ -1470,9 +1546,8 @@ def policy():
|
|
|
1470
1546
|
def policy_allow(pattern):
|
|
1471
1547
|
"""Add an allow pattern for search results."""
|
|
1472
1548
|
# For now, store in a simple text file
|
|
1473
|
-
policy_file = LOCAL_DIR / "config" / "policy.txt"
|
|
1474
1549
|
try:
|
|
1475
|
-
with open(
|
|
1550
|
+
with open(POLICY_FILE, "a") as f:
|
|
1476
1551
|
f.write(f"ALLOW {pattern}\n")
|
|
1477
1552
|
click.echo(green(f" Added allow pattern: {pattern}"))
|
|
1478
1553
|
except Exception as e:
|
|
@@ -1483,9 +1558,8 @@ def policy_allow(pattern):
|
|
|
1483
1558
|
@click.argument("pattern")
|
|
1484
1559
|
def policy_deny(pattern):
|
|
1485
1560
|
"""Add a deny pattern for search results."""
|
|
1486
|
-
policy_file = LOCAL_DIR / "config" / "policy.txt"
|
|
1487
1561
|
try:
|
|
1488
|
-
with open(
|
|
1562
|
+
with open(POLICY_FILE, "a") as f:
|
|
1489
1563
|
f.write(f"DENY {pattern}\n")
|
|
1490
1564
|
click.echo(green(f" Added deny pattern: {pattern}"))
|
|
1491
1565
|
except Exception as e:
|
|
@@ -1495,8 +1569,7 @@ def policy_deny(pattern):
|
|
|
1495
1569
|
@policy.command("list")
|
|
1496
1570
|
def policy_list():
|
|
1497
1571
|
"""List current policies."""
|
|
1498
|
-
|
|
1499
|
-
if not policy_file.exists():
|
|
1572
|
+
if not POLICY_FILE.exists():
|
|
1500
1573
|
click.echo(dim(" No policies configured"))
|
|
1501
1574
|
return
|
|
1502
1575
|
|
|
@@ -1504,7 +1577,7 @@ def policy_list():
|
|
|
1504
1577
|
click.echo(bold(" Data Access Policies"))
|
|
1505
1578
|
click.echo(hr())
|
|
1506
1579
|
try:
|
|
1507
|
-
with open(
|
|
1580
|
+
with open(POLICY_FILE, "r") as f:
|
|
1508
1581
|
for line in f:
|
|
1509
1582
|
line = line.strip()
|
|
1510
1583
|
if line.startswith("ALLOW "):
|
|
@@ -1518,14 +1591,13 @@ def policy_list():
|
|
|
1518
1591
|
|
|
1519
1592
|
def check_policy(text):
|
|
1520
1593
|
"""Check if text passes policy filters. Returns True if allowed."""
|
|
1521
|
-
|
|
1522
|
-
if not policy_file.exists():
|
|
1594
|
+
if not POLICY_FILE.exists():
|
|
1523
1595
|
return True # No policies = allow all
|
|
1524
1596
|
|
|
1525
1597
|
allow_patterns = []
|
|
1526
1598
|
deny_patterns = []
|
|
1527
1599
|
try:
|
|
1528
|
-
with open(
|
|
1600
|
+
with open(POLICY_FILE, "r") as f:
|
|
1529
1601
|
for line in f:
|
|
1530
1602
|
line = line.strip()
|
|
1531
1603
|
if line.startswith("ALLOW "):
|
|
@@ -2574,6 +2646,56 @@ def check(as_json, fail_fast):
|
|
|
2574
2646
|
sys.exit(0 if passed_all else 1)
|
|
2575
2647
|
|
|
2576
2648
|
|
|
2649
|
+
# ─────────────────────────────────────────────
|
|
2650
|
+
# INIT
|
|
2651
|
+
# ─────────────────────────────────────────────
|
|
2652
|
+
|
|
2653
|
+
@cli.command("init")
|
|
2654
|
+
def init():
|
|
2655
|
+
"""First-run setup — create ~/.cda/ directory structure and validate environment."""
|
|
2656
|
+
from cda.kernel.paths import (
|
|
2657
|
+
CDA_HOME, DATA_DIR, RUN_DIR, LOG_DIR, QUEUE_DIR,
|
|
2658
|
+
PMF_DIR, PMF_LOG_DIR, CONFIG_DIR, POLICY_FILE,
|
|
2659
|
+
)
|
|
2660
|
+
import os
|
|
2661
|
+
|
|
2662
|
+
click.echo()
|
|
2663
|
+
click.echo(bold(" Code Data Ark — init"))
|
|
2664
|
+
click.echo(hr())
|
|
2665
|
+
|
|
2666
|
+
# Create directory tree
|
|
2667
|
+
dirs = [DATA_DIR, RUN_DIR, LOG_DIR, QUEUE_DIR, PMF_DIR, PMF_LOG_DIR, CONFIG_DIR]
|
|
2668
|
+
for d in dirs:
|
|
2669
|
+
d.mkdir(parents=True, exist_ok=True)
|
|
2670
|
+
click.echo(green(f" {d}"))
|
|
2671
|
+
|
|
2672
|
+
# Write a starter policy file if none exists
|
|
2673
|
+
if not POLICY_FILE.exists():
|
|
2674
|
+
POLICY_FILE.write_text("# CDA access policy\n# ALLOW <pattern>\n# DENY <pattern>\n")
|
|
2675
|
+
click.echo(green(f" {POLICY_FILE} (created)"))
|
|
2676
|
+
|
|
2677
|
+
# Validate VS Code data dir
|
|
2678
|
+
vscode_data = Path(os.environ.get(
|
|
2679
|
+
"VSCODE_DATA_DIR",
|
|
2680
|
+
Path.home() / "Library/Application Support/Code/User",
|
|
2681
|
+
))
|
|
2682
|
+
if vscode_data.exists():
|
|
2683
|
+
click.echo(green(f" VS Code data dir: {vscode_data}"))
|
|
2684
|
+
else:
|
|
2685
|
+
click.echo(yellow(f" VS Code data dir not found: {vscode_data}"))
|
|
2686
|
+
click.echo(yellow(" Set VSCODE_DATA_DIR if your data is elsewhere."))
|
|
2687
|
+
|
|
2688
|
+
click.echo()
|
|
2689
|
+
click.echo(bold(" CDA_HOME: ") + str(CDA_HOME))
|
|
2690
|
+
click.echo()
|
|
2691
|
+
click.echo(dim(" Next steps:"))
|
|
2692
|
+
click.echo(dim(" cda pmf install — register as a macOS LaunchAgent (auto-start on login)"))
|
|
2693
|
+
click.echo(dim(" cda sync — ingest all VS Code session data"))
|
|
2694
|
+
click.echo(dim(" cda pmf up — start watcher + web UI now (opens browser)"))
|
|
2695
|
+
click.echo(dim(" cda serve — run web UI in foreground (opens browser)"))
|
|
2696
|
+
click.echo()
|
|
2697
|
+
|
|
2698
|
+
|
|
2577
2699
|
# ─────────────────────────────────────────────
|
|
2578
2700
|
# ENTRY
|
|
2579
2701
|
# ─────────────────────────────────────────────
|
|
@@ -11,18 +11,14 @@ import threading
|
|
|
11
11
|
import time
|
|
12
12
|
import traceback
|
|
13
13
|
import subprocess
|
|
14
|
+
import sys
|
|
14
15
|
import socket
|
|
15
16
|
from typing import Any, Dict
|
|
16
|
-
from pathlib import Path
|
|
17
17
|
from datetime import datetime
|
|
18
18
|
from wsgiref.simple_server import make_server, WSGIServer
|
|
19
19
|
from urllib.parse import parse_qs
|
|
20
20
|
from cda.kernel.pmf_kernel import PMFKernel
|
|
21
|
-
|
|
22
|
-
# Get DB path relative to this file
|
|
23
|
-
PACKAGE_DIR = Path(__file__).resolve().parent
|
|
24
|
-
LOCAL_DIR = PACKAGE_DIR.parent.parent.parent / "local"
|
|
25
|
-
DB_PATH = LOCAL_DIR / "data" / "cda.db"
|
|
21
|
+
from cda.kernel.paths import DB_PATH
|
|
26
22
|
kernel = PMFKernel()
|
|
27
23
|
|
|
28
24
|
# ─────────────────────────────────────────────
|
|
@@ -1396,28 +1392,28 @@ def run_action_background(action_id, action_name):
|
|
|
1396
1392
|
try:
|
|
1397
1393
|
if action_name == "sync":
|
|
1398
1394
|
result = subprocess.run(
|
|
1399
|
-
["
|
|
1395
|
+
[sys.executable, "-m", "cda.pipeline.ingest"],
|
|
1400
1396
|
capture_output=True,
|
|
1401
1397
|
text=True,
|
|
1402
1398
|
timeout=300
|
|
1403
1399
|
)
|
|
1404
1400
|
elif action_name == "reconstruct":
|
|
1405
1401
|
result = subprocess.run(
|
|
1406
|
-
["
|
|
1402
|
+
[sys.executable, "-m", "cda.pipeline.reconstruct"],
|
|
1407
1403
|
capture_output=True,
|
|
1408
1404
|
text=True,
|
|
1409
1405
|
timeout=300
|
|
1410
1406
|
)
|
|
1411
1407
|
elif action_name == "embed-build":
|
|
1412
1408
|
result = subprocess.run(
|
|
1413
|
-
["
|
|
1409
|
+
[sys.executable, "-m", "cda.pipeline.embed", "build"],
|
|
1414
1410
|
capture_output=True,
|
|
1415
1411
|
text=True,
|
|
1416
1412
|
timeout=600
|
|
1417
1413
|
)
|
|
1418
1414
|
elif action_name == "watch-start":
|
|
1419
1415
|
result = subprocess.run(
|
|
1420
|
-
["
|
|
1416
|
+
[sys.executable, "-m", "cda.pipeline.watcher", "start"],
|
|
1421
1417
|
capture_output=True,
|
|
1422
1418
|
text=True,
|
|
1423
1419
|
timeout=30
|
|
@@ -5,6 +5,30 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [2.0.4] - 2026-05-11
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- **`cda pmf install`** — generates and loads a macOS `~/Library/LaunchAgents/com.gocosmix.cda.plist`; CDA starts automatically on login
|
|
12
|
+
- **`cda pmf uninstall`** — unloads and removes the LaunchAgent plist
|
|
13
|
+
- **`cda pmf up`** — starts watcher + web UI in one command; opens browser when the server is ready; called by launchd on login
|
|
14
|
+
- **Browser auto-open**: `cda serve`, `cda ui start`, and `cda pmf start ui` now open a browser tab when the server is ready (`--no-browser` to disable)
|
|
15
|
+
- `cda.kernel.pmf_kernel`: `install_launchd()`, `uninstall_launchd()`, `generate_plist()`, `plist_path()`, `open_browser_when_ready()`, `wait_for_port_and_open_browser()`
|
|
16
|
+
|
|
17
|
+
## [2.0.3] - 2026-05-11
|
|
18
|
+
|
|
19
|
+
### Fixed
|
|
20
|
+
- **Install path resolution**: `LOCAL_DIR`/`DB_PATH` no longer derived from `__file__` — now resolves to `~/.cda/` (or `$CDA_HOME`). Survives `pip install` into site-packages.
|
|
21
|
+
- All pipeline stages (`ingest`, `reconstruct`, `extract`, `embed`, `watcher`, `parse_edits`) import canonical paths from new `cda.kernel.paths` module.
|
|
22
|
+
- `PMFKernel` and `selfcheck` updated to use `cda.kernel.paths`.
|
|
23
|
+
- All subprocess pipeline invocations switched from script file paths to `python -m cda.pipeline.<stage>` module calls.
|
|
24
|
+
|
|
25
|
+
### Added
|
|
26
|
+
- `cda.kernel.paths` — single source of truth for `CDA_HOME`, `DB_PATH`, `PID_FILE`, `QUEUE_DIR`, `POLICY_FILE`, `ensure_dirs()`.
|
|
27
|
+
- `cda init` command — first-run setup: creates `~/.cda/` directory tree, writes starter policy, validates VS Code data path.
|
|
28
|
+
|
|
29
|
+
### Changed
|
|
30
|
+
- README quickstart now reflects correct install flow: `pip install` → `cda init` → `cda sync` → `cda watch start` → `cda serve`.
|
|
31
|
+
|
|
8
32
|
## [2.0.2] - 2026-05-11
|
|
9
33
|
|
|
10
34
|
### Fixed
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "code-data-ark"
|
|
7
|
-
version = "2.0.
|
|
7
|
+
version = "2.0.4"
|
|
8
8
|
description = "Code Data Ark — local observability and intelligence platform for VS Code + Copilot Chat sessions"
|
|
9
9
|
readme = "readme.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -78,39 +78,41 @@ make install-dev
|
|
|
78
78
|
|
|
79
79
|
## ⚡ Quick Start
|
|
80
80
|
|
|
81
|
-
1. **
|
|
81
|
+
1. **Install**
|
|
82
82
|
|
|
83
83
|
```bash
|
|
84
|
-
|
|
84
|
+
pip install code-data-ark
|
|
85
85
|
```
|
|
86
86
|
|
|
87
|
-
2. **
|
|
87
|
+
2. **Initialize — create `~/.cda/` and validate your VS Code data path**
|
|
88
88
|
|
|
89
89
|
```bash
|
|
90
|
-
cda
|
|
90
|
+
cda init
|
|
91
91
|
```
|
|
92
92
|
|
|
93
|
-
3. **
|
|
93
|
+
3. **Ingest all VS Code session data**
|
|
94
94
|
|
|
95
95
|
```bash
|
|
96
|
-
cda
|
|
96
|
+
cda sync
|
|
97
97
|
```
|
|
98
98
|
|
|
99
|
-
4. **
|
|
99
|
+
4. **Start the live watcher daemon**
|
|
100
100
|
|
|
101
101
|
```bash
|
|
102
|
-
cda
|
|
102
|
+
cda watch start
|
|
103
103
|
```
|
|
104
104
|
|
|
105
|
-
|
|
105
|
+
5. **Open the web dashboard**
|
|
106
106
|
|
|
107
107
|
```bash
|
|
108
|
-
cda
|
|
108
|
+
cda serve # → http://127.0.0.1:10001
|
|
109
109
|
```
|
|
110
110
|
|
|
111
|
-
|
|
111
|
+
6. **Build semantic intelligence** (optional, requires `sentence-transformers`)
|
|
112
112
|
|
|
113
|
-
|
|
113
|
+
```bash
|
|
114
|
+
cda embed build
|
|
115
|
+
```
|
|
114
116
|
|
|
115
117
|
## 🌐 Web UI
|
|
116
118
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
2.0.4
|
code_data_ark-2.0.2/version
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
2.0.2
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|