code-data-ark 2.0.2__tar.gz → 2.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/PKG-INFO +15 -13
- code_data_ark-2.0.3/cda/kernel/paths.py +54 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/kernel/pmf_kernel.py +23 -26
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/kernel/selfcheck.py +4 -7
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/pipeline/embed.py +2 -4
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/pipeline/extract.py +1 -3
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/pipeline/ingest.py +5 -3
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/pipeline/parse_edits.py +1 -5
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/pipeline/reconstruct.py +2 -3
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/pipeline/watcher.py +3 -6
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/ui/cli.py +72 -34
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/ui/web.py +6 -10
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/changelog.md +15 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/pyproject.toml +1 -1
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/readme.md +14 -12
- code_data_ark-2.0.3/version +1 -0
- code_data_ark-2.0.2/version +0 -1
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/.flake8 +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/.github/workflows/ci.yml +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/.gitignore +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/bin/release.py +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/__init__.py +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/kernel/__init__.py +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/kernel/control_db.py +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/pipeline/__init__.py +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/ui/__init__.py +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/contributing.md +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/docs/architecture.md +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/docs/examples/usage.md +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/docs/pmf_kernel.md +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/docs/roadmap.md +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/license +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/makefile +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/tests/test_basic.py +0 -0
- {code_data_ark-2.0.2 → code_data_ark-2.0.3}/tests/test_selfcheck.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: code-data-ark
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.3
|
|
4
4
|
Summary: Code Data Ark — local observability and intelligence platform for VS Code + Copilot Chat sessions
|
|
5
5
|
Project-URL: Homepage, https://github.com/goCosmix/cda
|
|
6
6
|
Project-URL: Repository, https://github.com/goCosmix/cda.git
|
|
@@ -121,39 +121,41 @@ make install-dev
|
|
|
121
121
|
|
|
122
122
|
## ⚡ Quick Start
|
|
123
123
|
|
|
124
|
-
1. **
|
|
124
|
+
1. **Install**
|
|
125
125
|
|
|
126
126
|
```bash
|
|
127
|
-
|
|
127
|
+
pip install code-data-ark
|
|
128
128
|
```
|
|
129
129
|
|
|
130
|
-
2. **
|
|
130
|
+
2. **Initialize — create `~/.cda/` and validate your VS Code data path**
|
|
131
131
|
|
|
132
132
|
```bash
|
|
133
|
-
cda
|
|
133
|
+
cda init
|
|
134
134
|
```
|
|
135
135
|
|
|
136
|
-
3. **
|
|
136
|
+
3. **Ingest all VS Code session data**
|
|
137
137
|
|
|
138
138
|
```bash
|
|
139
|
-
cda
|
|
139
|
+
cda sync
|
|
140
140
|
```
|
|
141
141
|
|
|
142
|
-
4. **
|
|
142
|
+
4. **Start the live watcher daemon**
|
|
143
143
|
|
|
144
144
|
```bash
|
|
145
|
-
cda
|
|
145
|
+
cda watch start
|
|
146
146
|
```
|
|
147
147
|
|
|
148
|
-
|
|
148
|
+
5. **Open the web dashboard**
|
|
149
149
|
|
|
150
150
|
```bash
|
|
151
|
-
cda
|
|
151
|
+
cda serve # → http://127.0.0.1:10001
|
|
152
152
|
```
|
|
153
153
|
|
|
154
|
-
|
|
154
|
+
6. **Build semantic intelligence** (optional, requires `sentence-transformers`)
|
|
155
155
|
|
|
156
|
-
|
|
156
|
+
```bash
|
|
157
|
+
cda embed build
|
|
158
|
+
```
|
|
157
159
|
|
|
158
160
|
## 🌐 Web UI
|
|
159
161
|
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""
|
|
2
|
+
cda.kernel.paths — canonical path resolution for Code Data Ark.
|
|
3
|
+
|
|
4
|
+
CDA_HOME is the single root for all runtime state (DB, PID files, logs,
|
|
5
|
+
queue, PMF runtime). It is resolved exactly once at import time via:
|
|
6
|
+
|
|
7
|
+
1. CDA_HOME environment variable (absolute path)
|
|
8
|
+
2. ~/.cda/ (default — survives pip install, editable install, CI)
|
|
9
|
+
|
|
10
|
+
Pipeline stages and the CLI all import from here so every module agrees
|
|
11
|
+
on the same paths regardless of where the package is installed.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
# ── home resolution ──────────────────────────────────────────────────────────
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_cda_home() -> Path:
|
|
21
|
+
"""Return the CDA home directory, creating it if it doesn't exist."""
|
|
22
|
+
env = os.environ.get("CDA_HOME")
|
|
23
|
+
if env:
|
|
24
|
+
home = Path(env).expanduser().resolve()
|
|
25
|
+
else:
|
|
26
|
+
home = Path.home() / ".cda"
|
|
27
|
+
home.mkdir(parents=True, exist_ok=True)
|
|
28
|
+
return home
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# ── canonical paths (module-level constants, computed once) ─────────────────
|
|
32
|
+
|
|
33
|
+
CDA_HOME = get_cda_home()
|
|
34
|
+
LOCAL_DIR = CDA_HOME # CDA_HOME *is* the local root
|
|
35
|
+
DATA_DIR = CDA_HOME / "data"
|
|
36
|
+
RUN_DIR = CDA_HOME / "run"
|
|
37
|
+
LOG_DIR = CDA_HOME / "logs"
|
|
38
|
+
QUEUE_DIR = CDA_HOME / "queue"
|
|
39
|
+
PMF_DIR = CDA_HOME / "pmf"
|
|
40
|
+
CONFIG_DIR = CDA_HOME / "config"
|
|
41
|
+
|
|
42
|
+
DB_PATH = DATA_DIR / "cda.db"
|
|
43
|
+
PID_FILE = RUN_DIR / "watcher.pid"
|
|
44
|
+
UI_PID_FILE = RUN_DIR / "ui.pid"
|
|
45
|
+
UI_LOG_FILE = LOG_DIR / "ui.log"
|
|
46
|
+
POLICY_FILE = CONFIG_DIR / "policy.txt"
|
|
47
|
+
PMF_LOG_DIR = PMF_DIR / "logs"
|
|
48
|
+
RUNTIME_FILE = PMF_DIR / "runtime.json"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def ensure_dirs() -> None:
|
|
52
|
+
"""Create all runtime directories. Safe to call multiple times."""
|
|
53
|
+
for d in (DATA_DIR, RUN_DIR, LOG_DIR, QUEUE_DIR, PMF_DIR, PMF_LOG_DIR, CONFIG_DIR):
|
|
54
|
+
d.mkdir(parents=True, exist_ok=True)
|
|
@@ -8,19 +8,16 @@ from dataclasses import dataclass
|
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
from typing import Dict, List, Optional
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
UI_PID_FILE = LOCAL_DIR / "run" / "ui.pid"
|
|
11
|
+
from cda.kernel.paths import (
|
|
12
|
+
LOG_DIR, RUNTIME_FILE, PMF_LOG_DIR,
|
|
13
|
+
PID_FILE as WATCHER_PID_FILE, UI_PID_FILE, CDA_HOME,
|
|
14
|
+
ensure_dirs,
|
|
15
|
+
)
|
|
16
|
+
|
|
18
17
|
DEFAULT_HOST = "127.0.0.1"
|
|
19
18
|
DEFAULT_PORT = 10001
|
|
20
19
|
|
|
21
|
-
(
|
|
22
|
-
(LOCAL_DIR / "run").mkdir(parents=True, exist_ok=True)
|
|
23
|
-
LOG_DIR.mkdir(parents=True, exist_ok=True)
|
|
20
|
+
ensure_dirs()
|
|
24
21
|
|
|
25
22
|
|
|
26
23
|
def now_ts():
|
|
@@ -58,7 +55,7 @@ class ServiceSpec:
|
|
|
58
55
|
]
|
|
59
56
|
|
|
60
57
|
if self.service_id == "watcher":
|
|
61
|
-
return [sys.executable,
|
|
58
|
+
return [sys.executable, "-m", "cda.pipeline.watcher"]
|
|
62
59
|
|
|
63
60
|
if self.command is not None:
|
|
64
61
|
return list(self.command)
|
|
@@ -72,9 +69,9 @@ SERVICE_SPECS: Dict[str, ServiceSpec] = {
|
|
|
72
69
|
label="Watcher Daemon",
|
|
73
70
|
service_type="daemon",
|
|
74
71
|
description="Live VS Code data watcher and incremental ingest process.",
|
|
75
|
-
cwd=
|
|
72
|
+
cwd=CDA_HOME,
|
|
76
73
|
pid_file=WATCHER_PID_FILE,
|
|
77
|
-
log_file=
|
|
74
|
+
log_file=LOG_DIR / "watcher.log",
|
|
78
75
|
allowed_actions=["start", "stop", "restart", "status"],
|
|
79
76
|
),
|
|
80
77
|
"ui": ServiceSpec(
|
|
@@ -82,9 +79,9 @@ SERVICE_SPECS: Dict[str, ServiceSpec] = {
|
|
|
82
79
|
label="Web UI",
|
|
83
80
|
service_type="daemon",
|
|
84
81
|
description="Local web dashboard for Ark runtime and session analytics.",
|
|
85
|
-
cwd=
|
|
82
|
+
cwd=CDA_HOME,
|
|
86
83
|
pid_file=UI_PID_FILE,
|
|
87
|
-
log_file=
|
|
84
|
+
log_file=LOG_DIR / "ui.log",
|
|
88
85
|
allowed_actions=["start", "stop", "restart", "status"],
|
|
89
86
|
),
|
|
90
87
|
"sync": ServiceSpec(
|
|
@@ -92,9 +89,9 @@ SERVICE_SPECS: Dict[str, ServiceSpec] = {
|
|
|
92
89
|
label="Full Sync",
|
|
93
90
|
service_type="task",
|
|
94
91
|
description="Full ingest and rebuild pipeline for Ark data.",
|
|
95
|
-
command=[sys.executable,
|
|
96
|
-
cwd=
|
|
97
|
-
log_file=
|
|
92
|
+
command=[sys.executable, "-m", "cda.pipeline.ingest"],
|
|
93
|
+
cwd=CDA_HOME,
|
|
94
|
+
log_file=PMF_LOG_DIR / "sync.log",
|
|
98
95
|
allowed_actions=["start", "status"],
|
|
99
96
|
),
|
|
100
97
|
"reconstruct": ServiceSpec(
|
|
@@ -102,9 +99,9 @@ SERVICE_SPECS: Dict[str, ServiceSpec] = {
|
|
|
102
99
|
label="Reconstruct",
|
|
103
100
|
service_type="task",
|
|
104
101
|
description="Reconstruct conversations and rebuild the full text search index.",
|
|
105
|
-
command=[sys.executable,
|
|
106
|
-
cwd=
|
|
107
|
-
log_file=
|
|
102
|
+
command=[sys.executable, "-m", "cda.pipeline.reconstruct"],
|
|
103
|
+
cwd=CDA_HOME,
|
|
104
|
+
log_file=PMF_LOG_DIR / "reconstruct.log",
|
|
108
105
|
allowed_actions=["start", "status"],
|
|
109
106
|
),
|
|
110
107
|
"embed-build": ServiceSpec(
|
|
@@ -112,9 +109,9 @@ SERVICE_SPECS: Dict[str, ServiceSpec] = {
|
|
|
112
109
|
label="Embed Build",
|
|
113
110
|
service_type="task",
|
|
114
111
|
description="Build semantic embeddings and session intelligence.",
|
|
115
|
-
command=[sys.executable,
|
|
116
|
-
cwd=
|
|
117
|
-
log_file=
|
|
112
|
+
command=[sys.executable, "-m", "cda.pipeline.embed", "build"],
|
|
113
|
+
cwd=CDA_HOME,
|
|
114
|
+
log_file=PMF_LOG_DIR / "embed.log",
|
|
118
115
|
allowed_actions=["start", "status"],
|
|
119
116
|
),
|
|
120
117
|
}
|
|
@@ -263,8 +260,8 @@ class PMFKernel:
|
|
|
263
260
|
with open(log_file, "a") as fh:
|
|
264
261
|
proc = subprocess.Popen(
|
|
265
262
|
command,
|
|
266
|
-
cwd=spec.cwd or
|
|
267
|
-
env={**os.environ, **(spec.env or {})
|
|
263
|
+
cwd=spec.cwd or CDA_HOME,
|
|
264
|
+
env={**os.environ, **(spec.env or {})},
|
|
268
265
|
stdout=fh,
|
|
269
266
|
stderr=fh,
|
|
270
267
|
preexec_fn=os.setsid if spec.service_type == "daemon" else None,
|
|
@@ -26,15 +26,12 @@ import subprocess
|
|
|
26
26
|
import sys
|
|
27
27
|
from pathlib import Path
|
|
28
28
|
|
|
29
|
-
|
|
29
|
+
from cda.kernel.paths import DB_PATH, PID_FILE, QUEUE_DIR
|
|
30
|
+
|
|
30
31
|
PACKAGE_DIR = Path(__file__).resolve().parent
|
|
31
32
|
SOURCE_DIR = PACKAGE_DIR.parent.parent # source/ — tracked repo root
|
|
32
|
-
PROJECT_DIR = PACKAGE_DIR.parent.parent.parent # repo root — where layers live
|
|
33
|
-
LOCAL_DIR = PROJECT_DIR / "local"
|
|
34
|
-
DB_PATH = LOCAL_DIR / "data" / "cda.db"
|
|
35
|
-
PID_FILE = LOCAL_DIR / "run" / "watcher.pid"
|
|
36
|
-
QUEUE_DIR = LOCAL_DIR / "queue"
|
|
37
33
|
VERSION_FILE = SOURCE_DIR / "version"
|
|
34
|
+
GIT_ROOT = SOURCE_DIR.parent # repo root — used for git check-ignore
|
|
38
35
|
|
|
39
36
|
REQUIRED_TABLES = [
|
|
40
37
|
"sessions", "exchanges", "tool_calls", "vfs", "workspaces",
|
|
@@ -231,7 +228,7 @@ def check_data_gitignored():
|
|
|
231
228
|
try:
|
|
232
229
|
result = subprocess.run(
|
|
233
230
|
["git", "check-ignore", "-q", "local"],
|
|
234
|
-
cwd=
|
|
231
|
+
cwd=GIT_ROOT,
|
|
235
232
|
capture_output=True,
|
|
236
233
|
)
|
|
237
234
|
if result.returncode == 0:
|
|
@@ -10,12 +10,10 @@ This stage builds semantic embeddings and mini-intelligence artifacts:
|
|
|
10
10
|
|
|
11
11
|
import json
|
|
12
12
|
import sqlite3
|
|
13
|
-
from pathlib import Path
|
|
14
13
|
from typing import Dict, List, Optional, Tuple
|
|
15
14
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
DB_PATH = LOCAL_DIR / "data" / "cda.db"
|
|
15
|
+
from cda.kernel.paths import DB_PATH
|
|
16
|
+
|
|
19
17
|
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
|
|
20
18
|
MAX_EMBED_TEXT = 1400
|
|
21
19
|
|
|
@@ -26,9 +26,7 @@ from datetime import datetime
|
|
|
26
26
|
from typing import Dict, List, Tuple, DefaultDict
|
|
27
27
|
from collections import defaultdict
|
|
28
28
|
|
|
29
|
-
|
|
30
|
-
LOCAL_DIR = ROOT_DIR / "local"
|
|
31
|
-
DB_PATH = LOCAL_DIR / "data" / "cda.db"
|
|
29
|
+
from cda.kernel.paths import DB_PATH
|
|
32
30
|
|
|
33
31
|
# ─────────────────────────────────────────────────────────
|
|
34
32
|
# Signal patterns
|
|
@@ -26,6 +26,11 @@ import time
|
|
|
26
26
|
import logging
|
|
27
27
|
from pathlib import Path
|
|
28
28
|
|
|
29
|
+
from cda.kernel.paths import DB_PATH, ensure_dirs
|
|
30
|
+
|
|
31
|
+
# Ensure local dirs are present before writing
|
|
32
|
+
ensure_dirs()
|
|
33
|
+
|
|
29
34
|
# Set up logging
|
|
30
35
|
logging.basicConfig(
|
|
31
36
|
level=logging.INFO,
|
|
@@ -39,9 +44,6 @@ HOME = Path.home()
|
|
|
39
44
|
VSCODE_DATA_DIR = Path(os.environ.get("VSCODE_DATA_DIR", HOME / "Library/Application Support/Code/User"))
|
|
40
45
|
VS_STORAGE = VSCODE_DATA_DIR / "workspaceStorage"
|
|
41
46
|
GLOBAL_MEM = VSCODE_DATA_DIR / "globalStorage/github.copilot-chat/memory-tool/memories"
|
|
42
|
-
ROOT_DIR = Path(__file__).resolve().parent.parent.parent.parent
|
|
43
|
-
LOCAL_DIR = ROOT_DIR / "local"
|
|
44
|
-
DB_PATH = LOCAL_DIR / "data" / "cda.db"
|
|
45
47
|
|
|
46
48
|
# Large index DBs — too big to blob, record path only
|
|
47
49
|
SKIP_BLOB_PATTERNS = ["workspace-chunks.db", "local-index"]
|
|
@@ -33,11 +33,7 @@ Edit rounds: len(checkpoints) - 1 (first is always "Initial State")
|
|
|
33
33
|
import sqlite3
|
|
34
34
|
import gzip
|
|
35
35
|
import json
|
|
36
|
-
from
|
|
37
|
-
|
|
38
|
-
ROOT_DIR = Path(__file__).resolve().parent.parent.parent.parent
|
|
39
|
-
LOCAL_DIR = ROOT_DIR / "local"
|
|
40
|
-
DB_PATH = LOCAL_DIR / "data" / "cda.db"
|
|
36
|
+
from cda.kernel.paths import DB_PATH
|
|
41
37
|
|
|
42
38
|
SCHEMA = """
|
|
43
39
|
CREATE TABLE IF NOT EXISTS edit_sessions (
|
|
@@ -19,9 +19,8 @@ import time
|
|
|
19
19
|
from typing import Optional
|
|
20
20
|
from pathlib import Path
|
|
21
21
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
DB_PATH = LOCAL_DIR / "data" / "cda.db"
|
|
22
|
+
from cda.kernel.paths import DB_PATH
|
|
23
|
+
|
|
25
24
|
NOW_MS = int(time.time() * 1000)
|
|
26
25
|
|
|
27
26
|
EXCHANGES_SCHEMA = """
|
|
@@ -38,17 +38,14 @@ except ImportError:
|
|
|
38
38
|
print("ERROR: watchfiles not installed. Run: pip install watchfiles")
|
|
39
39
|
sys.exit(1)
|
|
40
40
|
|
|
41
|
-
|
|
42
|
-
LOCAL_DIR = ROOT_DIR / "local"
|
|
43
|
-
DB_PATH = LOCAL_DIR / "data" / "cda.db"
|
|
44
|
-
PID_FILE = LOCAL_DIR / "run" / "watcher.pid"
|
|
45
|
-
QUEUE_DIR = LOCAL_DIR / "queue"
|
|
41
|
+
from cda.kernel.paths import DB_PATH, PID_FILE, QUEUE_DIR, LOG_DIR, ensure_dirs
|
|
46
42
|
# Allow override via env var for portability
|
|
47
43
|
VSCODE_DATA_DIR = Path(os.environ.get("VSCODE_DATA_DIR", Path.home() / "Library/Application Support/Code/User"))
|
|
48
44
|
VS_ROOT = VSCODE_DATA_DIR / "workspaceStorage"
|
|
49
45
|
GLOBAL_MEM = VSCODE_DATA_DIR / "globalStorage/github.copilot-chat/memory-tool/memories"
|
|
50
46
|
|
|
51
|
-
|
|
47
|
+
ensure_dirs()
|
|
48
|
+
log_file = LOG_DIR / "watcher.log"
|
|
52
49
|
logging.basicConfig(
|
|
53
50
|
level=logging.INFO,
|
|
54
51
|
format="%(asctime)s %(levelname)-7s %(message)s",
|
|
@@ -25,6 +25,7 @@ Commands:
|
|
|
25
25
|
cda pmf restart <service> Restart a service
|
|
26
26
|
cda pmf logs <service> Tail service logs
|
|
27
27
|
cda check Run a full self-diagnostic. The system checks itself.
|
|
28
|
+
cda init First-run setup — create ~/.cda/ and validate environment
|
|
28
29
|
cda serve Start the local web UI on port 10001
|
|
29
30
|
cda sync Full re-ingest from disk (rebuilds entire DB)
|
|
30
31
|
cda reconstruct Re-run reconstruction and FTS rebuild only
|
|
@@ -62,22 +63,15 @@ import datetime
|
|
|
62
63
|
from pathlib import Path
|
|
63
64
|
from cda.pipeline.reconstruct import decompress_vfs
|
|
64
65
|
from cda.kernel.pmf_kernel import PMFKernel, PMFKernelError
|
|
66
|
+
from cda.kernel.paths import (
|
|
67
|
+
DB_PATH, PID_FILE, UI_PID_FILE, UI_LOG_FILE,
|
|
68
|
+
QUEUE_DIR, POLICY_FILE, ensure_dirs,
|
|
69
|
+
)
|
|
65
70
|
|
|
66
71
|
import click
|
|
67
72
|
|
|
68
|
-
#
|
|
69
|
-
|
|
70
|
-
ARK_DIR = PACKAGE_DIR.parent.parent.parent
|
|
71
|
-
LOCAL_DIR = ARK_DIR / "local"
|
|
72
|
-
DB_PATH = LOCAL_DIR / "data" / "cda.db"
|
|
73
|
-
PID_FILE = LOCAL_DIR / "run" / "watcher.pid"
|
|
74
|
-
UI_PID_FILE = LOCAL_DIR / "run" / "ui.pid"
|
|
75
|
-
UI_LOG_FILE = LOCAL_DIR / "logs" / "ui.log"
|
|
76
|
-
WATCHER = PACKAGE_DIR.parent / "pipeline" / "watcher.py"
|
|
77
|
-
INGEST = PACKAGE_DIR.parent / "pipeline" / "ingest.py"
|
|
78
|
-
RECON = PACKAGE_DIR.parent / "pipeline" / "reconstruct.py"
|
|
79
|
-
EXTRACT = PACKAGE_DIR.parent / "pipeline" / "extract.py"
|
|
80
|
-
EMBED = PACKAGE_DIR.parent / "pipeline" / "embed.py"
|
|
73
|
+
# Ensure runtime dirs exist on every CLI invocation
|
|
74
|
+
ensure_dirs()
|
|
81
75
|
|
|
82
76
|
kernel = PMFKernel()
|
|
83
77
|
|
|
@@ -333,14 +327,13 @@ def status():
|
|
|
333
327
|
click.echo(f" Start with: {bold('cda watch start')}")
|
|
334
328
|
|
|
335
329
|
# Queue status
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
completed = len(list(queue_dir.glob("*.completed")))
|
|
330
|
+
if QUEUE_DIR.exists():
|
|
331
|
+
pending = len(list(QUEUE_DIR.glob("*.json")))
|
|
332
|
+
completed = len(list(QUEUE_DIR.glob("*.completed")))
|
|
340
333
|
click.echo(f" Queue: {pending} pending, {completed} completed")
|
|
341
334
|
if pending > 0:
|
|
342
335
|
# Show last pending operation
|
|
343
|
-
pending_files = sorted(
|
|
336
|
+
pending_files = sorted(QUEUE_DIR.glob("*.json"))
|
|
344
337
|
if pending_files:
|
|
345
338
|
try:
|
|
346
339
|
data = json.loads(pending_files[-1].read_text())
|
|
@@ -546,7 +539,7 @@ def embed():
|
|
|
546
539
|
def embed_build():
|
|
547
540
|
"""Build semantic embeddings and session intelligence."""
|
|
548
541
|
click.echo(yellow(" Building semantic intelligence..."))
|
|
549
|
-
result = subprocess.run([sys.executable,
|
|
542
|
+
result = subprocess.run([sys.executable, "-m", "cda.pipeline.embed"], capture_output=False)
|
|
550
543
|
if result.returncode == 0:
|
|
551
544
|
click.echo(green(" Embed build complete"))
|
|
552
545
|
else:
|
|
@@ -754,7 +747,7 @@ def sync():
|
|
|
754
747
|
errors = 0
|
|
755
748
|
|
|
756
749
|
click.echo(yellow(" Running full ingest — this rewrites the DB..."))
|
|
757
|
-
result = subprocess.run([sys.executable,
|
|
750
|
+
result = subprocess.run([sys.executable, "-m", "cda.pipeline.ingest"], capture_output=False)
|
|
758
751
|
if result.returncode != 0:
|
|
759
752
|
click.echo(red(" Ingest failed"))
|
|
760
753
|
finish_run(run_id, stages_done, {}, errors=1, exit_code=1, notes="ingest failed")
|
|
@@ -763,7 +756,7 @@ def sync():
|
|
|
763
756
|
|
|
764
757
|
click.echo(green(" Ingest complete"))
|
|
765
758
|
click.echo(yellow(" Running reconstruction..."))
|
|
766
|
-
result = subprocess.run([sys.executable,
|
|
759
|
+
result = subprocess.run([sys.executable, "-m", "cda.pipeline.reconstruct"], capture_output=False)
|
|
767
760
|
if result.returncode != 0:
|
|
768
761
|
click.echo(red(" Reconstruction failed"))
|
|
769
762
|
finish_run(run_id, stages_done, {}, errors=1, exit_code=1, notes="reconstruct failed")
|
|
@@ -772,7 +765,7 @@ def sync():
|
|
|
772
765
|
|
|
773
766
|
click.echo(green(" Reconstruction complete"))
|
|
774
767
|
click.echo(yellow(" Running analysis..."))
|
|
775
|
-
result = subprocess.run([sys.executable,
|
|
768
|
+
result = subprocess.run([sys.executable, "-m", "cda.pipeline.extract"], capture_output=False)
|
|
776
769
|
if result.returncode != 0:
|
|
777
770
|
click.echo(red(" Analysis failed"))
|
|
778
771
|
finish_run(run_id, stages_done, {}, errors=1, exit_code=1, notes="extract failed")
|
|
@@ -781,7 +774,7 @@ def sync():
|
|
|
781
774
|
|
|
782
775
|
click.echo(green(" Analysis complete"))
|
|
783
776
|
click.echo(yellow(" Running semantic intelligence..."))
|
|
784
|
-
result = subprocess.run([sys.executable,
|
|
777
|
+
result = subprocess.run([sys.executable, "-m", "cda.pipeline.embed"], capture_output=False)
|
|
785
778
|
if result.returncode != 0:
|
|
786
779
|
click.echo(red(" Semantic intelligence failed"))
|
|
787
780
|
errors += 1
|
|
@@ -809,7 +802,7 @@ def sync():
|
|
|
809
802
|
def reconstruct():
|
|
810
803
|
"""Re-run session reconstruction and FTS rebuild only."""
|
|
811
804
|
click.echo(yellow(" Reconstructing exchanges..."))
|
|
812
|
-
subprocess.run([sys.executable,
|
|
805
|
+
subprocess.run([sys.executable, "-m", "cda.pipeline.reconstruct"], capture_output=False)
|
|
813
806
|
click.echo(green(" Done"))
|
|
814
807
|
|
|
815
808
|
|
|
@@ -1470,9 +1463,8 @@ def policy():
|
|
|
1470
1463
|
def policy_allow(pattern):
|
|
1471
1464
|
"""Add an allow pattern for search results."""
|
|
1472
1465
|
# For now, store in a simple text file
|
|
1473
|
-
policy_file = LOCAL_DIR / "config" / "policy.txt"
|
|
1474
1466
|
try:
|
|
1475
|
-
with open(
|
|
1467
|
+
with open(POLICY_FILE, "a") as f:
|
|
1476
1468
|
f.write(f"ALLOW {pattern}\n")
|
|
1477
1469
|
click.echo(green(f" Added allow pattern: {pattern}"))
|
|
1478
1470
|
except Exception as e:
|
|
@@ -1483,9 +1475,8 @@ def policy_allow(pattern):
|
|
|
1483
1475
|
@click.argument("pattern")
|
|
1484
1476
|
def policy_deny(pattern):
|
|
1485
1477
|
"""Add a deny pattern for search results."""
|
|
1486
|
-
policy_file = LOCAL_DIR / "config" / "policy.txt"
|
|
1487
1478
|
try:
|
|
1488
|
-
with open(
|
|
1479
|
+
with open(POLICY_FILE, "a") as f:
|
|
1489
1480
|
f.write(f"DENY {pattern}\n")
|
|
1490
1481
|
click.echo(green(f" Added deny pattern: {pattern}"))
|
|
1491
1482
|
except Exception as e:
|
|
@@ -1495,8 +1486,7 @@ def policy_deny(pattern):
|
|
|
1495
1486
|
@policy.command("list")
|
|
1496
1487
|
def policy_list():
|
|
1497
1488
|
"""List current policies."""
|
|
1498
|
-
|
|
1499
|
-
if not policy_file.exists():
|
|
1489
|
+
if not POLICY_FILE.exists():
|
|
1500
1490
|
click.echo(dim(" No policies configured"))
|
|
1501
1491
|
return
|
|
1502
1492
|
|
|
@@ -1504,7 +1494,7 @@ def policy_list():
|
|
|
1504
1494
|
click.echo(bold(" Data Access Policies"))
|
|
1505
1495
|
click.echo(hr())
|
|
1506
1496
|
try:
|
|
1507
|
-
with open(
|
|
1497
|
+
with open(POLICY_FILE, "r") as f:
|
|
1508
1498
|
for line in f:
|
|
1509
1499
|
line = line.strip()
|
|
1510
1500
|
if line.startswith("ALLOW "):
|
|
@@ -1518,14 +1508,13 @@ def policy_list():
|
|
|
1518
1508
|
|
|
1519
1509
|
def check_policy(text):
|
|
1520
1510
|
"""Check if text passes policy filters. Returns True if allowed."""
|
|
1521
|
-
|
|
1522
|
-
if not policy_file.exists():
|
|
1511
|
+
if not POLICY_FILE.exists():
|
|
1523
1512
|
return True # No policies = allow all
|
|
1524
1513
|
|
|
1525
1514
|
allow_patterns = []
|
|
1526
1515
|
deny_patterns = []
|
|
1527
1516
|
try:
|
|
1528
|
-
with open(
|
|
1517
|
+
with open(POLICY_FILE, "r") as f:
|
|
1529
1518
|
for line in f:
|
|
1530
1519
|
line = line.strip()
|
|
1531
1520
|
if line.startswith("ALLOW "):
|
|
@@ -2574,6 +2563,55 @@ def check(as_json, fail_fast):
|
|
|
2574
2563
|
sys.exit(0 if passed_all else 1)
|
|
2575
2564
|
|
|
2576
2565
|
|
|
2566
|
+
# ─────────────────────────────────────────────
|
|
2567
|
+
# INIT
|
|
2568
|
+
# ─────────────────────────────────────────────
|
|
2569
|
+
|
|
2570
|
+
@cli.command("init")
|
|
2571
|
+
def init():
|
|
2572
|
+
"""First-run setup — create ~/.cda/ directory structure and validate environment."""
|
|
2573
|
+
from cda.kernel.paths import (
|
|
2574
|
+
CDA_HOME, DATA_DIR, RUN_DIR, LOG_DIR, QUEUE_DIR,
|
|
2575
|
+
PMF_DIR, PMF_LOG_DIR, CONFIG_DIR, POLICY_FILE,
|
|
2576
|
+
)
|
|
2577
|
+
import os
|
|
2578
|
+
|
|
2579
|
+
click.echo()
|
|
2580
|
+
click.echo(bold(" Code Data Ark — init"))
|
|
2581
|
+
click.echo(hr())
|
|
2582
|
+
|
|
2583
|
+
# Create directory tree
|
|
2584
|
+
dirs = [DATA_DIR, RUN_DIR, LOG_DIR, QUEUE_DIR, PMF_DIR, PMF_LOG_DIR, CONFIG_DIR]
|
|
2585
|
+
for d in dirs:
|
|
2586
|
+
d.mkdir(parents=True, exist_ok=True)
|
|
2587
|
+
click.echo(green(f" {d}"))
|
|
2588
|
+
|
|
2589
|
+
# Write a starter policy file if none exists
|
|
2590
|
+
if not POLICY_FILE.exists():
|
|
2591
|
+
POLICY_FILE.write_text("# CDA access policy\n# ALLOW <pattern>\n# DENY <pattern>\n")
|
|
2592
|
+
click.echo(green(f" {POLICY_FILE} (created)"))
|
|
2593
|
+
|
|
2594
|
+
# Validate VS Code data dir
|
|
2595
|
+
vscode_data = Path(os.environ.get(
|
|
2596
|
+
"VSCODE_DATA_DIR",
|
|
2597
|
+
Path.home() / "Library/Application Support/Code/User",
|
|
2598
|
+
))
|
|
2599
|
+
if vscode_data.exists():
|
|
2600
|
+
click.echo(green(f" VS Code data dir: {vscode_data}"))
|
|
2601
|
+
else:
|
|
2602
|
+
click.echo(yellow(f" VS Code data dir not found: {vscode_data}"))
|
|
2603
|
+
click.echo(yellow(" Set VSCODE_DATA_DIR if your data is elsewhere."))
|
|
2604
|
+
|
|
2605
|
+
click.echo()
|
|
2606
|
+
click.echo(bold(" CDA_HOME: ") + str(CDA_HOME))
|
|
2607
|
+
click.echo()
|
|
2608
|
+
click.echo(dim(" Next steps:"))
|
|
2609
|
+
click.echo(dim(" cda sync — ingest all VS Code session data"))
|
|
2610
|
+
click.echo(dim(" cda watch start — start the live watcher daemon"))
|
|
2611
|
+
click.echo(dim(" cda serve — open the web dashboard on :10001"))
|
|
2612
|
+
click.echo()
|
|
2613
|
+
|
|
2614
|
+
|
|
2577
2615
|
# ─────────────────────────────────────────────
|
|
2578
2616
|
# ENTRY
|
|
2579
2617
|
# ─────────────────────────────────────────────
|
|
@@ -11,18 +11,14 @@ import threading
|
|
|
11
11
|
import time
|
|
12
12
|
import traceback
|
|
13
13
|
import subprocess
|
|
14
|
+
import sys
|
|
14
15
|
import socket
|
|
15
16
|
from typing import Any, Dict
|
|
16
|
-
from pathlib import Path
|
|
17
17
|
from datetime import datetime
|
|
18
18
|
from wsgiref.simple_server import make_server, WSGIServer
|
|
19
19
|
from urllib.parse import parse_qs
|
|
20
20
|
from cda.kernel.pmf_kernel import PMFKernel
|
|
21
|
-
|
|
22
|
-
# Get DB path relative to this file
|
|
23
|
-
PACKAGE_DIR = Path(__file__).resolve().parent
|
|
24
|
-
LOCAL_DIR = PACKAGE_DIR.parent.parent.parent / "local"
|
|
25
|
-
DB_PATH = LOCAL_DIR / "data" / "cda.db"
|
|
21
|
+
from cda.kernel.paths import DB_PATH
|
|
26
22
|
kernel = PMFKernel()
|
|
27
23
|
|
|
28
24
|
# ─────────────────────────────────────────────
|
|
@@ -1396,28 +1392,28 @@ def run_action_background(action_id, action_name):
|
|
|
1396
1392
|
try:
|
|
1397
1393
|
if action_name == "sync":
|
|
1398
1394
|
result = subprocess.run(
|
|
1399
|
-
["
|
|
1395
|
+
[sys.executable, "-m", "cda.pipeline.ingest"],
|
|
1400
1396
|
capture_output=True,
|
|
1401
1397
|
text=True,
|
|
1402
1398
|
timeout=300
|
|
1403
1399
|
)
|
|
1404
1400
|
elif action_name == "reconstruct":
|
|
1405
1401
|
result = subprocess.run(
|
|
1406
|
-
["
|
|
1402
|
+
[sys.executable, "-m", "cda.pipeline.reconstruct"],
|
|
1407
1403
|
capture_output=True,
|
|
1408
1404
|
text=True,
|
|
1409
1405
|
timeout=300
|
|
1410
1406
|
)
|
|
1411
1407
|
elif action_name == "embed-build":
|
|
1412
1408
|
result = subprocess.run(
|
|
1413
|
-
["
|
|
1409
|
+
[sys.executable, "-m", "cda.pipeline.embed", "build"],
|
|
1414
1410
|
capture_output=True,
|
|
1415
1411
|
text=True,
|
|
1416
1412
|
timeout=600
|
|
1417
1413
|
)
|
|
1418
1414
|
elif action_name == "watch-start":
|
|
1419
1415
|
result = subprocess.run(
|
|
1420
|
-
["
|
|
1416
|
+
[sys.executable, "-m", "cda.pipeline.watcher", "start"],
|
|
1421
1417
|
capture_output=True,
|
|
1422
1418
|
text=True,
|
|
1423
1419
|
timeout=30
|
|
@@ -5,6 +5,21 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [2.0.3] - 2026-05-11
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
- **Install path resolution**: `LOCAL_DIR`/`DB_PATH` no longer derived from `__file__` — now resolves to `~/.cda/` (or `$CDA_HOME`). Survives `pip install` into site-packages.
|
|
12
|
+
- All pipeline stages (`ingest`, `reconstruct`, `extract`, `embed`, `watcher`, `parse_edits`) import canonical paths from new `cda.kernel.paths` module.
|
|
13
|
+
- `PMFKernel` and `selfcheck` updated to use `cda.kernel.paths`.
|
|
14
|
+
- All subprocess pipeline invocations switched from script file paths to `python -m cda.pipeline.<stage>` module calls.
|
|
15
|
+
|
|
16
|
+
### Added
|
|
17
|
+
- `cda.kernel.paths` — single source of truth for `CDA_HOME`, `DB_PATH`, `PID_FILE`, `QUEUE_DIR`, `POLICY_FILE`, `ensure_dirs()`.
|
|
18
|
+
- `cda init` command — first-run setup: creates `~/.cda/` directory tree, writes starter policy, validates VS Code data path.
|
|
19
|
+
|
|
20
|
+
### Changed
|
|
21
|
+
- README quickstart now reflects correct install flow: `pip install` → `cda init` → `cda sync` → `cda watch start` → `cda serve`.
|
|
22
|
+
|
|
8
23
|
## [2.0.2] - 2026-05-11
|
|
9
24
|
|
|
10
25
|
### Fixed
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "code-data-ark"
|
|
7
|
-
version = "2.0.
|
|
7
|
+
version = "2.0.3"
|
|
8
8
|
description = "Code Data Ark — local observability and intelligence platform for VS Code + Copilot Chat sessions"
|
|
9
9
|
readme = "readme.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -78,39 +78,41 @@ make install-dev
|
|
|
78
78
|
|
|
79
79
|
## ⚡ Quick Start
|
|
80
80
|
|
|
81
|
-
1. **
|
|
81
|
+
1. **Install**
|
|
82
82
|
|
|
83
83
|
```bash
|
|
84
|
-
|
|
84
|
+
pip install code-data-ark
|
|
85
85
|
```
|
|
86
86
|
|
|
87
|
-
2. **
|
|
87
|
+
2. **Initialize — create `~/.cda/` and validate your VS Code data path**
|
|
88
88
|
|
|
89
89
|
```bash
|
|
90
|
-
cda
|
|
90
|
+
cda init
|
|
91
91
|
```
|
|
92
92
|
|
|
93
|
-
3. **
|
|
93
|
+
3. **Ingest all VS Code session data**
|
|
94
94
|
|
|
95
95
|
```bash
|
|
96
|
-
cda
|
|
96
|
+
cda sync
|
|
97
97
|
```
|
|
98
98
|
|
|
99
|
-
4. **
|
|
99
|
+
4. **Start the live watcher daemon**
|
|
100
100
|
|
|
101
101
|
```bash
|
|
102
|
-
cda
|
|
102
|
+
cda watch start
|
|
103
103
|
```
|
|
104
104
|
|
|
105
|
-
|
|
105
|
+
5. **Open the web dashboard**
|
|
106
106
|
|
|
107
107
|
```bash
|
|
108
|
-
cda
|
|
108
|
+
cda serve # → http://127.0.0.1:10001
|
|
109
109
|
```
|
|
110
110
|
|
|
111
|
-
|
|
111
|
+
6. **Build semantic intelligence** (optional, requires `sentence-transformers`)
|
|
112
112
|
|
|
113
|
-
|
|
113
|
+
```bash
|
|
114
|
+
cda embed build
|
|
115
|
+
```
|
|
114
116
|
|
|
115
117
|
## 🌐 Web UI
|
|
116
118
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
2.0.3
|
code_data_ark-2.0.2/version
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
2.0.2
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|