code-data-ark 2.0.2__tar.gz → 2.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/PKG-INFO +15 -13
  2. code_data_ark-2.0.3/cda/kernel/paths.py +54 -0
  3. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/kernel/pmf_kernel.py +23 -26
  4. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/kernel/selfcheck.py +4 -7
  5. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/pipeline/embed.py +2 -4
  6. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/pipeline/extract.py +1 -3
  7. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/pipeline/ingest.py +5 -3
  8. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/pipeline/parse_edits.py +1 -5
  9. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/pipeline/reconstruct.py +2 -3
  10. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/pipeline/watcher.py +3 -6
  11. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/ui/cli.py +72 -34
  12. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/ui/web.py +6 -10
  13. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/changelog.md +15 -0
  14. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/pyproject.toml +1 -1
  15. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/readme.md +14 -12
  16. code_data_ark-2.0.3/version +1 -0
  17. code_data_ark-2.0.2/version +0 -1
  18. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/.flake8 +0 -0
  19. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/.github/workflows/ci.yml +0 -0
  20. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/.gitignore +0 -0
  21. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/bin/release.py +0 -0
  22. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/__init__.py +0 -0
  23. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/kernel/__init__.py +0 -0
  24. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/kernel/control_db.py +0 -0
  25. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/pipeline/__init__.py +0 -0
  26. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/ui/__init__.py +0 -0
  27. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/contributing.md +0 -0
  28. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/docs/architecture.md +0 -0
  29. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/docs/examples/usage.md +0 -0
  30. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/docs/pmf_kernel.md +0 -0
  31. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/docs/roadmap.md +0 -0
  32. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/license +0 -0
  33. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/makefile +0 -0
  34. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/tests/test_basic.py +0 -0
  35. {code_data_ark-2.0.2 → code_data_ark-2.0.3}/tests/test_selfcheck.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: code-data-ark
3
- Version: 2.0.2
3
+ Version: 2.0.3
4
4
  Summary: Code Data Ark — local observability and intelligence platform for VS Code + Copilot Chat sessions
5
5
  Project-URL: Homepage, https://github.com/goCosmix/cda
6
6
  Project-URL: Repository, https://github.com/goCosmix/cda.git
@@ -121,39 +121,41 @@ make install-dev
121
121
 
122
122
  ## ⚡ Quick Start
123
123
 
124
- 1. **Initialize the database**
124
+ 1. **Install**
125
125
 
126
126
  ```bash
127
- cda sync
127
+ pip install code-data-ark
128
128
  ```
129
129
 
130
- 2. **Start the watcher daemon**
130
+ 2. **Initialize create `~/.cda/` and validate your VS Code data path**
131
131
 
132
132
  ```bash
133
- cda watch start
133
+ cda init
134
134
  ```
135
135
 
136
- 3. **Inspect the PMF runtime services**
136
+ 3. **Ingest all VS Code session data**
137
137
 
138
138
  ```bash
139
- cda pmf services
139
+ cda sync
140
140
  ```
141
141
 
142
- 4. **Build semantic intelligence**
142
+ 4. **Start the live watcher daemon**
143
143
 
144
144
  ```bash
145
- cda embed build
145
+ cda watch start
146
146
  ```
147
147
 
148
- 4. **Start the web UI**
148
+ 5. **Open the web dashboard**
149
149
 
150
150
  ```bash
151
- cda ui start
151
+ cda serve # → http://127.0.0.1:10001
152
152
  ```
153
153
 
154
- 5. **Open your browser**
154
+ 6. **Build semantic intelligence** (optional, requires `sentence-transformers`)
155
155
 
156
- Visit `http://127.0.0.1:10001`
156
+ ```bash
157
+ cda embed build
158
+ ```
157
159
 
158
160
  ## 🌐 Web UI
159
161
 
@@ -0,0 +1,54 @@
1
+ """
2
+ cda.kernel.paths — canonical path resolution for Code Data Ark.
3
+
4
+ CDA_HOME is the single root for all runtime state (DB, PID files, logs,
5
+ queue, PMF runtime). It is resolved exactly once at import time via:
6
+
7
+ 1. CDA_HOME environment variable (absolute path)
8
+ 2. ~/.cda/ (default — survives pip install, editable install, CI)
9
+
10
+ Pipeline stages and the CLI all import from here so every module agrees
11
+ on the same paths regardless of where the package is installed.
12
+ """
13
+
14
+ import os
15
+ from pathlib import Path
16
+
17
+ # ── home resolution ──────────────────────────────────────────────────────────
18
+
19
+
20
+ def get_cda_home() -> Path:
21
+ """Return the CDA home directory, creating it if it doesn't exist."""
22
+ env = os.environ.get("CDA_HOME")
23
+ if env:
24
+ home = Path(env).expanduser().resolve()
25
+ else:
26
+ home = Path.home() / ".cda"
27
+ home.mkdir(parents=True, exist_ok=True)
28
+ return home
29
+
30
+
31
+ # ── canonical paths (module-level constants, computed once) ─────────────────
32
+
33
+ CDA_HOME = get_cda_home()
34
+ LOCAL_DIR = CDA_HOME # CDA_HOME *is* the local root
35
+ DATA_DIR = CDA_HOME / "data"
36
+ RUN_DIR = CDA_HOME / "run"
37
+ LOG_DIR = CDA_HOME / "logs"
38
+ QUEUE_DIR = CDA_HOME / "queue"
39
+ PMF_DIR = CDA_HOME / "pmf"
40
+ CONFIG_DIR = CDA_HOME / "config"
41
+
42
+ DB_PATH = DATA_DIR / "cda.db"
43
+ PID_FILE = RUN_DIR / "watcher.pid"
44
+ UI_PID_FILE = RUN_DIR / "ui.pid"
45
+ UI_LOG_FILE = LOG_DIR / "ui.log"
46
+ POLICY_FILE = CONFIG_DIR / "policy.txt"
47
+ PMF_LOG_DIR = PMF_DIR / "logs"
48
+ RUNTIME_FILE = PMF_DIR / "runtime.json"
49
+
50
+
51
+ def ensure_dirs() -> None:
52
+ """Create all runtime directories. Safe to call multiple times."""
53
+ for d in (DATA_DIR, RUN_DIR, LOG_DIR, QUEUE_DIR, PMF_DIR, PMF_LOG_DIR, CONFIG_DIR):
54
+ d.mkdir(parents=True, exist_ok=True)
@@ -8,19 +8,16 @@ from dataclasses import dataclass
8
8
  from pathlib import Path
9
9
  from typing import Dict, List, Optional
10
10
 
11
- ROOT_DIR = Path(__file__).resolve().parent.parent.parent.parent
12
- LOCAL_DIR = ROOT_DIR / "local"
13
- PACKAGE_DIR = Path(__file__).resolve().parent
14
- RUNTIME_FILE = LOCAL_DIR / "pmf" / "runtime.json"
15
- LOG_DIR = LOCAL_DIR / "pmf" / "logs"
16
- WATCHER_PID_FILE = LOCAL_DIR / "run" / "watcher.pid"
17
- UI_PID_FILE = LOCAL_DIR / "run" / "ui.pid"
11
+ from cda.kernel.paths import (
12
+ LOG_DIR, RUNTIME_FILE, PMF_LOG_DIR,
13
+ PID_FILE as WATCHER_PID_FILE, UI_PID_FILE, CDA_HOME,
14
+ ensure_dirs,
15
+ )
16
+
18
17
  DEFAULT_HOST = "127.0.0.1"
19
18
  DEFAULT_PORT = 10001
20
19
 
21
- (LOCAL_DIR / "data").mkdir(parents=True, exist_ok=True)
22
- (LOCAL_DIR / "run").mkdir(parents=True, exist_ok=True)
23
- LOG_DIR.mkdir(parents=True, exist_ok=True)
20
+ ensure_dirs()
24
21
 
25
22
 
26
23
  def now_ts():
@@ -58,7 +55,7 @@ class ServiceSpec:
58
55
  ]
59
56
 
60
57
  if self.service_id == "watcher":
61
- return [sys.executable, str(PACKAGE_DIR.parent / "pipeline" / "watcher.py")]
58
+ return [sys.executable, "-m", "cda.pipeline.watcher"]
62
59
 
63
60
  if self.command is not None:
64
61
  return list(self.command)
@@ -72,9 +69,9 @@ SERVICE_SPECS: Dict[str, ServiceSpec] = {
72
69
  label="Watcher Daemon",
73
70
  service_type="daemon",
74
71
  description="Live VS Code data watcher and incremental ingest process.",
75
- cwd=ROOT_DIR,
72
+ cwd=CDA_HOME,
76
73
  pid_file=WATCHER_PID_FILE,
77
- log_file=LOCAL_DIR / "logs" / "watcher.log",
74
+ log_file=LOG_DIR / "watcher.log",
78
75
  allowed_actions=["start", "stop", "restart", "status"],
79
76
  ),
80
77
  "ui": ServiceSpec(
@@ -82,9 +79,9 @@ SERVICE_SPECS: Dict[str, ServiceSpec] = {
82
79
  label="Web UI",
83
80
  service_type="daemon",
84
81
  description="Local web dashboard for Ark runtime and session analytics.",
85
- cwd=ROOT_DIR,
82
+ cwd=CDA_HOME,
86
83
  pid_file=UI_PID_FILE,
87
- log_file=LOCAL_DIR / "logs" / "ui.log",
84
+ log_file=LOG_DIR / "ui.log",
88
85
  allowed_actions=["start", "stop", "restart", "status"],
89
86
  ),
90
87
  "sync": ServiceSpec(
@@ -92,9 +89,9 @@ SERVICE_SPECS: Dict[str, ServiceSpec] = {
92
89
  label="Full Sync",
93
90
  service_type="task",
94
91
  description="Full ingest and rebuild pipeline for Ark data.",
95
- command=[sys.executable, str(PACKAGE_DIR.parent / "pipeline" / "ingest.py")],
96
- cwd=ROOT_DIR,
97
- log_file=LOG_DIR / "sync.log",
92
+ command=[sys.executable, "-m", "cda.pipeline.ingest"],
93
+ cwd=CDA_HOME,
94
+ log_file=PMF_LOG_DIR / "sync.log",
98
95
  allowed_actions=["start", "status"],
99
96
  ),
100
97
  "reconstruct": ServiceSpec(
@@ -102,9 +99,9 @@ SERVICE_SPECS: Dict[str, ServiceSpec] = {
102
99
  label="Reconstruct",
103
100
  service_type="task",
104
101
  description="Reconstruct conversations and rebuild the full text search index.",
105
- command=[sys.executable, str(PACKAGE_DIR.parent / "pipeline" / "reconstruct.py")],
106
- cwd=ROOT_DIR,
107
- log_file=LOG_DIR / "reconstruct.log",
102
+ command=[sys.executable, "-m", "cda.pipeline.reconstruct"],
103
+ cwd=CDA_HOME,
104
+ log_file=PMF_LOG_DIR / "reconstruct.log",
108
105
  allowed_actions=["start", "status"],
109
106
  ),
110
107
  "embed-build": ServiceSpec(
@@ -112,9 +109,9 @@ SERVICE_SPECS: Dict[str, ServiceSpec] = {
112
109
  label="Embed Build",
113
110
  service_type="task",
114
111
  description="Build semantic embeddings and session intelligence.",
115
- command=[sys.executable, str(PACKAGE_DIR.parent / "pipeline" / "embed.py"), "build"],
116
- cwd=ROOT_DIR,
117
- log_file=LOG_DIR / "embed.log",
112
+ command=[sys.executable, "-m", "cda.pipeline.embed", "build"],
113
+ cwd=CDA_HOME,
114
+ log_file=PMF_LOG_DIR / "embed.log",
118
115
  allowed_actions=["start", "status"],
119
116
  ),
120
117
  }
@@ -263,8 +260,8 @@ class PMFKernel:
263
260
  with open(log_file, "a") as fh:
264
261
  proc = subprocess.Popen(
265
262
  command,
266
- cwd=spec.cwd or ROOT_DIR,
267
- env={**os.environ, **(spec.env or {}), "PYTHONPATH": str(ROOT_DIR)},
263
+ cwd=spec.cwd or CDA_HOME,
264
+ env={**os.environ, **(spec.env or {})},
268
265
  stdout=fh,
269
266
  stderr=fh,
270
267
  preexec_fn=os.setsid if spec.service_type == "daemon" else None,
@@ -26,15 +26,12 @@ import subprocess
26
26
  import sys
27
27
  from pathlib import Path
28
28
 
29
- # ── paths the system knows about itself ─────────────────────────────────────
29
+ from cda.kernel.paths import DB_PATH, PID_FILE, QUEUE_DIR
30
+
30
31
  PACKAGE_DIR = Path(__file__).resolve().parent
31
32
  SOURCE_DIR = PACKAGE_DIR.parent.parent # source/ — tracked repo root
32
- PROJECT_DIR = PACKAGE_DIR.parent.parent.parent # repo root — where layers live
33
- LOCAL_DIR = PROJECT_DIR / "local"
34
- DB_PATH = LOCAL_DIR / "data" / "cda.db"
35
- PID_FILE = LOCAL_DIR / "run" / "watcher.pid"
36
- QUEUE_DIR = LOCAL_DIR / "queue"
37
33
  VERSION_FILE = SOURCE_DIR / "version"
34
+ GIT_ROOT = SOURCE_DIR.parent # repo root — used for git check-ignore
38
35
 
39
36
  REQUIRED_TABLES = [
40
37
  "sessions", "exchanges", "tool_calls", "vfs", "workspaces",
@@ -231,7 +228,7 @@ def check_data_gitignored():
231
228
  try:
232
229
  result = subprocess.run(
233
230
  ["git", "check-ignore", "-q", "local"],
234
- cwd=PROJECT_DIR,
231
+ cwd=GIT_ROOT,
235
232
  capture_output=True,
236
233
  )
237
234
  if result.returncode == 0:
@@ -10,12 +10,10 @@ This stage builds semantic embeddings and mini-intelligence artifacts:
10
10
 
11
11
  import json
12
12
  import sqlite3
13
- from pathlib import Path
14
13
  from typing import Dict, List, Optional, Tuple
15
14
 
16
- ROOT_DIR = Path(__file__).resolve().parent.parent.parent.parent
17
- LOCAL_DIR = ROOT_DIR / "local"
18
- DB_PATH = LOCAL_DIR / "data" / "cda.db"
15
+ from cda.kernel.paths import DB_PATH
16
+
19
17
  MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
20
18
  MAX_EMBED_TEXT = 1400
21
19
 
@@ -26,9 +26,7 @@ from datetime import datetime
26
26
  from typing import Dict, List, Tuple, DefaultDict
27
27
  from collections import defaultdict
28
28
 
29
- ROOT_DIR = Path(__file__).resolve().parent.parent.parent.parent
30
- LOCAL_DIR = ROOT_DIR / "local"
31
- DB_PATH = LOCAL_DIR / "data" / "cda.db"
29
+ from cda.kernel.paths import DB_PATH
32
30
 
33
31
  # ─────────────────────────────────────────────────────────
34
32
  # Signal patterns
@@ -26,6 +26,11 @@ import time
26
26
  import logging
27
27
  from pathlib import Path
28
28
 
29
+ from cda.kernel.paths import DB_PATH, ensure_dirs
30
+
31
+ # Ensure local dirs are present before writing
32
+ ensure_dirs()
33
+
29
34
  # Set up logging
30
35
  logging.basicConfig(
31
36
  level=logging.INFO,
@@ -39,9 +44,6 @@ HOME = Path.home()
39
44
  VSCODE_DATA_DIR = Path(os.environ.get("VSCODE_DATA_DIR", HOME / "Library/Application Support/Code/User"))
40
45
  VS_STORAGE = VSCODE_DATA_DIR / "workspaceStorage"
41
46
  GLOBAL_MEM = VSCODE_DATA_DIR / "globalStorage/github.copilot-chat/memory-tool/memories"
42
- ROOT_DIR = Path(__file__).resolve().parent.parent.parent.parent
43
- LOCAL_DIR = ROOT_DIR / "local"
44
- DB_PATH = LOCAL_DIR / "data" / "cda.db"
45
47
 
46
48
  # Large index DBs — too big to blob, record path only
47
49
  SKIP_BLOB_PATTERNS = ["workspace-chunks.db", "local-index"]
@@ -33,11 +33,7 @@ Edit rounds: len(checkpoints) - 1 (first is always "Initial State")
33
33
  import sqlite3
34
34
  import gzip
35
35
  import json
36
- from pathlib import Path
37
-
38
- ROOT_DIR = Path(__file__).resolve().parent.parent.parent.parent
39
- LOCAL_DIR = ROOT_DIR / "local"
40
- DB_PATH = LOCAL_DIR / "data" / "cda.db"
36
+ from cda.kernel.paths import DB_PATH
41
37
 
42
38
  SCHEMA = """
43
39
  CREATE TABLE IF NOT EXISTS edit_sessions (
@@ -19,9 +19,8 @@ import time
19
19
  from typing import Optional
20
20
  from pathlib import Path
21
21
 
22
- ROOT_DIR = Path(__file__).resolve().parent.parent.parent.parent
23
- LOCAL_DIR = ROOT_DIR / "local"
24
- DB_PATH = LOCAL_DIR / "data" / "cda.db"
22
+ from cda.kernel.paths import DB_PATH
23
+
25
24
  NOW_MS = int(time.time() * 1000)
26
25
 
27
26
  EXCHANGES_SCHEMA = """
@@ -38,17 +38,14 @@ except ImportError:
38
38
  print("ERROR: watchfiles not installed. Run: pip install watchfiles")
39
39
  sys.exit(1)
40
40
 
41
- ROOT_DIR = Path(__file__).resolve().parent.parent.parent.parent
42
- LOCAL_DIR = ROOT_DIR / "local"
43
- DB_PATH = LOCAL_DIR / "data" / "cda.db"
44
- PID_FILE = LOCAL_DIR / "run" / "watcher.pid"
45
- QUEUE_DIR = LOCAL_DIR / "queue"
41
+ from cda.kernel.paths import DB_PATH, PID_FILE, QUEUE_DIR, LOG_DIR, ensure_dirs
46
42
  # Allow override via env var for portability
47
43
  VSCODE_DATA_DIR = Path(os.environ.get("VSCODE_DATA_DIR", Path.home() / "Library/Application Support/Code/User"))
48
44
  VS_ROOT = VSCODE_DATA_DIR / "workspaceStorage"
49
45
  GLOBAL_MEM = VSCODE_DATA_DIR / "globalStorage/github.copilot-chat/memory-tool/memories"
50
46
 
51
- log_file = LOCAL_DIR / "logs" / "watcher.log"
47
+ ensure_dirs()
48
+ log_file = LOG_DIR / "watcher.log"
52
49
  logging.basicConfig(
53
50
  level=logging.INFO,
54
51
  format="%(asctime)s %(levelname)-7s %(message)s",
@@ -25,6 +25,7 @@ Commands:
25
25
  cda pmf restart <service> Restart a service
26
26
  cda pmf logs <service> Tail service logs
27
27
  cda check Run a full self-diagnostic. The system checks itself.
28
+ cda init First-run setup — create ~/.cda/ and validate environment
28
29
  cda serve Start the local web UI on port 10001
29
30
  cda sync Full re-ingest from disk (rebuilds entire DB)
30
31
  cda reconstruct Re-run reconstruction and FTS rebuild only
@@ -62,22 +63,15 @@ import datetime
62
63
  from pathlib import Path
63
64
  from cda.pipeline.reconstruct import decompress_vfs
64
65
  from cda.kernel.pmf_kernel import PMFKernel, PMFKernelError
66
+ from cda.kernel.paths import (
67
+ DB_PATH, PID_FILE, UI_PID_FILE, UI_LOG_FILE,
68
+ QUEUE_DIR, POLICY_FILE, ensure_dirs,
69
+ )
65
70
 
66
71
  import click
67
72
 
68
- # Package-relative paths
69
- PACKAGE_DIR = Path(__file__).resolve().parent
70
- ARK_DIR = PACKAGE_DIR.parent.parent.parent
71
- LOCAL_DIR = ARK_DIR / "local"
72
- DB_PATH = LOCAL_DIR / "data" / "cda.db"
73
- PID_FILE = LOCAL_DIR / "run" / "watcher.pid"
74
- UI_PID_FILE = LOCAL_DIR / "run" / "ui.pid"
75
- UI_LOG_FILE = LOCAL_DIR / "logs" / "ui.log"
76
- WATCHER = PACKAGE_DIR.parent / "pipeline" / "watcher.py"
77
- INGEST = PACKAGE_DIR.parent / "pipeline" / "ingest.py"
78
- RECON = PACKAGE_DIR.parent / "pipeline" / "reconstruct.py"
79
- EXTRACT = PACKAGE_DIR.parent / "pipeline" / "extract.py"
80
- EMBED = PACKAGE_DIR.parent / "pipeline" / "embed.py"
73
+ # Ensure runtime dirs exist on every CLI invocation
74
+ ensure_dirs()
81
75
 
82
76
  kernel = PMFKernel()
83
77
 
@@ -333,14 +327,13 @@ def status():
333
327
  click.echo(f" Start with: {bold('cda watch start')}")
334
328
 
335
329
  # Queue status
336
- queue_dir = LOCAL_DIR / "queue"
337
- if queue_dir.exists():
338
- pending = len(list(queue_dir.glob("*.json")))
339
- completed = len(list(queue_dir.glob("*.completed")))
330
+ if QUEUE_DIR.exists():
331
+ pending = len(list(QUEUE_DIR.glob("*.json")))
332
+ completed = len(list(QUEUE_DIR.glob("*.completed")))
340
333
  click.echo(f" Queue: {pending} pending, {completed} completed")
341
334
  if pending > 0:
342
335
  # Show last pending operation
343
- pending_files = sorted(queue_dir.glob("*.json"))
336
+ pending_files = sorted(QUEUE_DIR.glob("*.json"))
344
337
  if pending_files:
345
338
  try:
346
339
  data = json.loads(pending_files[-1].read_text())
@@ -546,7 +539,7 @@ def embed():
546
539
  def embed_build():
547
540
  """Build semantic embeddings and session intelligence."""
548
541
  click.echo(yellow(" Building semantic intelligence..."))
549
- result = subprocess.run([sys.executable, str(EMBED)], capture_output=False)
542
+ result = subprocess.run([sys.executable, "-m", "cda.pipeline.embed"], capture_output=False)
550
543
  if result.returncode == 0:
551
544
  click.echo(green(" Embed build complete"))
552
545
  else:
@@ -754,7 +747,7 @@ def sync():
754
747
  errors = 0
755
748
 
756
749
  click.echo(yellow(" Running full ingest — this rewrites the DB..."))
757
- result = subprocess.run([sys.executable, str(INGEST)], capture_output=False)
750
+ result = subprocess.run([sys.executable, "-m", "cda.pipeline.ingest"], capture_output=False)
758
751
  if result.returncode != 0:
759
752
  click.echo(red(" Ingest failed"))
760
753
  finish_run(run_id, stages_done, {}, errors=1, exit_code=1, notes="ingest failed")
@@ -763,7 +756,7 @@ def sync():
763
756
 
764
757
  click.echo(green(" Ingest complete"))
765
758
  click.echo(yellow(" Running reconstruction..."))
766
- result = subprocess.run([sys.executable, str(RECON)], capture_output=False)
759
+ result = subprocess.run([sys.executable, "-m", "cda.pipeline.reconstruct"], capture_output=False)
767
760
  if result.returncode != 0:
768
761
  click.echo(red(" Reconstruction failed"))
769
762
  finish_run(run_id, stages_done, {}, errors=1, exit_code=1, notes="reconstruct failed")
@@ -772,7 +765,7 @@ def sync():
772
765
 
773
766
  click.echo(green(" Reconstruction complete"))
774
767
  click.echo(yellow(" Running analysis..."))
775
- result = subprocess.run([sys.executable, str(EXTRACT)], capture_output=False)
768
+ result = subprocess.run([sys.executable, "-m", "cda.pipeline.extract"], capture_output=False)
776
769
  if result.returncode != 0:
777
770
  click.echo(red(" Analysis failed"))
778
771
  finish_run(run_id, stages_done, {}, errors=1, exit_code=1, notes="extract failed")
@@ -781,7 +774,7 @@ def sync():
781
774
 
782
775
  click.echo(green(" Analysis complete"))
783
776
  click.echo(yellow(" Running semantic intelligence..."))
784
- result = subprocess.run([sys.executable, str(EMBED)], capture_output=False)
777
+ result = subprocess.run([sys.executable, "-m", "cda.pipeline.embed"], capture_output=False)
785
778
  if result.returncode != 0:
786
779
  click.echo(red(" Semantic intelligence failed"))
787
780
  errors += 1
@@ -809,7 +802,7 @@ def sync():
809
802
  def reconstruct():
810
803
  """Re-run session reconstruction and FTS rebuild only."""
811
804
  click.echo(yellow(" Reconstructing exchanges..."))
812
- subprocess.run([sys.executable, str(RECON)], capture_output=False)
805
+ subprocess.run([sys.executable, "-m", "cda.pipeline.reconstruct"], capture_output=False)
813
806
  click.echo(green(" Done"))
814
807
 
815
808
 
@@ -1470,9 +1463,8 @@ def policy():
1470
1463
  def policy_allow(pattern):
1471
1464
  """Add an allow pattern for search results."""
1472
1465
  # For now, store in a simple text file
1473
- policy_file = LOCAL_DIR / "config" / "policy.txt"
1474
1466
  try:
1475
- with open(policy_file, "a") as f:
1467
+ with open(POLICY_FILE, "a") as f:
1476
1468
  f.write(f"ALLOW {pattern}\n")
1477
1469
  click.echo(green(f" Added allow pattern: {pattern}"))
1478
1470
  except Exception as e:
@@ -1483,9 +1475,8 @@ def policy_allow(pattern):
1483
1475
  @click.argument("pattern")
1484
1476
  def policy_deny(pattern):
1485
1477
  """Add a deny pattern for search results."""
1486
- policy_file = LOCAL_DIR / "config" / "policy.txt"
1487
1478
  try:
1488
- with open(policy_file, "a") as f:
1479
+ with open(POLICY_FILE, "a") as f:
1489
1480
  f.write(f"DENY {pattern}\n")
1490
1481
  click.echo(green(f" Added deny pattern: {pattern}"))
1491
1482
  except Exception as e:
@@ -1495,8 +1486,7 @@ def policy_deny(pattern):
1495
1486
  @policy.command("list")
1496
1487
  def policy_list():
1497
1488
  """List current policies."""
1498
- policy_file = LOCAL_DIR / "config" / "policy.txt"
1499
- if not policy_file.exists():
1489
+ if not POLICY_FILE.exists():
1500
1490
  click.echo(dim(" No policies configured"))
1501
1491
  return
1502
1492
 
@@ -1504,7 +1494,7 @@ def policy_list():
1504
1494
  click.echo(bold(" Data Access Policies"))
1505
1495
  click.echo(hr())
1506
1496
  try:
1507
- with open(policy_file, "r") as f:
1497
+ with open(POLICY_FILE, "r") as f:
1508
1498
  for line in f:
1509
1499
  line = line.strip()
1510
1500
  if line.startswith("ALLOW "):
@@ -1518,14 +1508,13 @@ def policy_list():
1518
1508
 
1519
1509
  def check_policy(text):
1520
1510
  """Check if text passes policy filters. Returns True if allowed."""
1521
- policy_file = LOCAL_DIR / "config" / "policy.txt"
1522
- if not policy_file.exists():
1511
+ if not POLICY_FILE.exists():
1523
1512
  return True # No policies = allow all
1524
1513
 
1525
1514
  allow_patterns = []
1526
1515
  deny_patterns = []
1527
1516
  try:
1528
- with open(policy_file, "r") as f:
1517
+ with open(POLICY_FILE, "r") as f:
1529
1518
  for line in f:
1530
1519
  line = line.strip()
1531
1520
  if line.startswith("ALLOW "):
@@ -2574,6 +2563,55 @@ def check(as_json, fail_fast):
2574
2563
  sys.exit(0 if passed_all else 1)
2575
2564
 
2576
2565
 
2566
+ # ─────────────────────────────────────────────
2567
+ # INIT
2568
+ # ─────────────────────────────────────────────
2569
+
2570
+ @cli.command("init")
2571
+ def init():
2572
+ """First-run setup — create ~/.cda/ directory structure and validate environment."""
2573
+ from cda.kernel.paths import (
2574
+ CDA_HOME, DATA_DIR, RUN_DIR, LOG_DIR, QUEUE_DIR,
2575
+ PMF_DIR, PMF_LOG_DIR, CONFIG_DIR, POLICY_FILE,
2576
+ )
2577
+ import os
2578
+
2579
+ click.echo()
2580
+ click.echo(bold(" Code Data Ark — init"))
2581
+ click.echo(hr())
2582
+
2583
+ # Create directory tree
2584
+ dirs = [DATA_DIR, RUN_DIR, LOG_DIR, QUEUE_DIR, PMF_DIR, PMF_LOG_DIR, CONFIG_DIR]
2585
+ for d in dirs:
2586
+ d.mkdir(parents=True, exist_ok=True)
2587
+ click.echo(green(f" {d}"))
2588
+
2589
+ # Write a starter policy file if none exists
2590
+ if not POLICY_FILE.exists():
2591
+ POLICY_FILE.write_text("# CDA access policy\n# ALLOW <pattern>\n# DENY <pattern>\n")
2592
+ click.echo(green(f" {POLICY_FILE} (created)"))
2593
+
2594
+ # Validate VS Code data dir
2595
+ vscode_data = Path(os.environ.get(
2596
+ "VSCODE_DATA_DIR",
2597
+ Path.home() / "Library/Application Support/Code/User",
2598
+ ))
2599
+ if vscode_data.exists():
2600
+ click.echo(green(f" VS Code data dir: {vscode_data}"))
2601
+ else:
2602
+ click.echo(yellow(f" VS Code data dir not found: {vscode_data}"))
2603
+ click.echo(yellow(" Set VSCODE_DATA_DIR if your data is elsewhere."))
2604
+
2605
+ click.echo()
2606
+ click.echo(bold(" CDA_HOME: ") + str(CDA_HOME))
2607
+ click.echo()
2608
+ click.echo(dim(" Next steps:"))
2609
+ click.echo(dim(" cda sync — ingest all VS Code session data"))
2610
+ click.echo(dim(" cda watch start — start the live watcher daemon"))
2611
+ click.echo(dim(" cda serve — open the web dashboard on :10001"))
2612
+ click.echo()
2613
+
2614
+
2577
2615
  # ─────────────────────────────────────────────
2578
2616
  # ENTRY
2579
2617
  # ─────────────────────────────────────────────
@@ -11,18 +11,14 @@ import threading
11
11
  import time
12
12
  import traceback
13
13
  import subprocess
14
+ import sys
14
15
  import socket
15
16
  from typing import Any, Dict
16
- from pathlib import Path
17
17
  from datetime import datetime
18
18
  from wsgiref.simple_server import make_server, WSGIServer
19
19
  from urllib.parse import parse_qs
20
20
  from cda.kernel.pmf_kernel import PMFKernel
21
-
22
- # Get DB path relative to this file
23
- PACKAGE_DIR = Path(__file__).resolve().parent
24
- LOCAL_DIR = PACKAGE_DIR.parent.parent.parent / "local"
25
- DB_PATH = LOCAL_DIR / "data" / "cda.db"
21
+ from cda.kernel.paths import DB_PATH
26
22
  kernel = PMFKernel()
27
23
 
28
24
  # ─────────────────────────────────────────────
@@ -1396,28 +1392,28 @@ def run_action_background(action_id, action_name):
1396
1392
  try:
1397
1393
  if action_name == "sync":
1398
1394
  result = subprocess.run(
1399
- ["python3", str(PACKAGE_DIR.parent / "pipeline" / "ingest.py")],
1395
+ [sys.executable, "-m", "cda.pipeline.ingest"],
1400
1396
  capture_output=True,
1401
1397
  text=True,
1402
1398
  timeout=300
1403
1399
  )
1404
1400
  elif action_name == "reconstruct":
1405
1401
  result = subprocess.run(
1406
- ["python3", str(PACKAGE_DIR.parent / "pipeline" / "reconstruct.py")],
1402
+ [sys.executable, "-m", "cda.pipeline.reconstruct"],
1407
1403
  capture_output=True,
1408
1404
  text=True,
1409
1405
  timeout=300
1410
1406
  )
1411
1407
  elif action_name == "embed-build":
1412
1408
  result = subprocess.run(
1413
- ["python3", str(PACKAGE_DIR.parent / "pipeline" / "embed.py"), "build"],
1409
+ [sys.executable, "-m", "cda.pipeline.embed", "build"],
1414
1410
  capture_output=True,
1415
1411
  text=True,
1416
1412
  timeout=600
1417
1413
  )
1418
1414
  elif action_name == "watch-start":
1419
1415
  result = subprocess.run(
1420
- ["python3", str(PACKAGE_DIR.parent / "pipeline" / "watcher.py"), "start"],
1416
+ [sys.executable, "-m", "cda.pipeline.watcher", "start"],
1421
1417
  capture_output=True,
1422
1418
  text=True,
1423
1419
  timeout=30
@@ -5,6 +5,21 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [2.0.3] - 2026-05-11
9
+
10
+ ### Fixed
11
+ - **Install path resolution**: `LOCAL_DIR`/`DB_PATH` no longer derived from `__file__` — now resolves to `~/.cda/` (or `$CDA_HOME`). Survives `pip install` into site-packages.
12
+ - All pipeline stages (`ingest`, `reconstruct`, `extract`, `embed`, `watcher`, `parse_edits`) import canonical paths from new `cda.kernel.paths` module.
13
+ - `PMFKernel` and `selfcheck` updated to use `cda.kernel.paths`.
14
+ - All subprocess pipeline invocations switched from script file paths to `python -m cda.pipeline.<stage>` module calls.
15
+
16
+ ### Added
17
+ - `cda.kernel.paths` — single source of truth for `CDA_HOME`, `DB_PATH`, `PID_FILE`, `QUEUE_DIR`, `POLICY_FILE`, `ensure_dirs()`.
18
+ - `cda init` command — first-run setup: creates `~/.cda/` directory tree, writes starter policy, validates VS Code data path.
19
+
20
+ ### Changed
21
+ - README quickstart now reflects correct install flow: `pip install` → `cda init` → `cda sync` → `cda watch start` → `cda serve`.
22
+
8
23
  ## [2.0.2] - 2026-05-11
9
24
 
10
25
  ### Fixed
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "code-data-ark"
7
- version = "2.0.2"
7
+ version = "2.0.3"
8
8
  description = "Code Data Ark — local observability and intelligence platform for VS Code + Copilot Chat sessions"
9
9
  readme = "readme.md"
10
10
  license = "MIT"
@@ -78,39 +78,41 @@ make install-dev
78
78
 
79
79
  ## ⚡ Quick Start
80
80
 
81
- 1. **Initialize the database**
81
+ 1. **Install**
82
82
 
83
83
  ```bash
84
- cda sync
84
+ pip install code-data-ark
85
85
  ```
86
86
 
87
- 2. **Start the watcher daemon**
87
+ 2. **Initialize create `~/.cda/` and validate your VS Code data path**
88
88
 
89
89
  ```bash
90
- cda watch start
90
+ cda init
91
91
  ```
92
92
 
93
- 3. **Inspect the PMF runtime services**
93
+ 3. **Ingest all VS Code session data**
94
94
 
95
95
  ```bash
96
- cda pmf services
96
+ cda sync
97
97
  ```
98
98
 
99
- 4. **Build semantic intelligence**
99
+ 4. **Start the live watcher daemon**
100
100
 
101
101
  ```bash
102
- cda embed build
102
+ cda watch start
103
103
  ```
104
104
 
105
- 4. **Start the web UI**
105
+ 5. **Open the web dashboard**
106
106
 
107
107
  ```bash
108
- cda ui start
108
+ cda serve # → http://127.0.0.1:10001
109
109
  ```
110
110
 
111
- 5. **Open your browser**
111
+ 6. **Build semantic intelligence** (optional, requires `sentence-transformers`)
112
112
 
113
- Visit `http://127.0.0.1:10001`
113
+ ```bash
114
+ cda embed build
115
+ ```
114
116
 
115
117
  ## 🌐 Web UI
116
118
 
@@ -0,0 +1 @@
1
+ 2.0.3
@@ -1 +0,0 @@
1
- 2.0.2
File without changes
File without changes
File without changes
File without changes