PyPI - code-data-ark - Versions diffs - 2.0.2__tar.gz → 2.0.3__tar.gz - Mend

code-data-ark 2.0.2tar.gz → 2.0.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: code-data-ark
-Version: 2.0.2
+Version: 2.0.3
 Summary: Code Data Ark — local observability and intelligence platform for VS Code + Copilot Chat sessions
 Project-URL: Homepage, https://github.com/goCosmix/cda
 Project-URL: Repository, https://github.com/goCosmix/cda.git
@@ -121,39 +121,41 @@ make install-dev
 ## ⚡ Quick Start
-1. **Initialize the database**
+1. **Install**
 ```bash
-cda sync
+pip install code-data-ark
 ```
-2. **Start the watcher daemon**
+2. **Initialize — create `~/.cda/` and validate your VS Code data path**
 ```bash
-cda watch start
+cda init
 ```
-3. **Inspect the PMF runtime services**
+3. **Ingest all VS Code session data**
 ```bash
-cda pmf services
+cda sync
 ```
-4. **Build semantic intelligence**
+4. **Start the live watcher daemon**
 ```bash
-cda embed build
+cda watch start
 ```
-4. **Start the web UI**
+5. **Open the web dashboard**
 ```bash
-cda ui start
+cda serve   # → http://127.0.0.1:10001
 ```
-5. **Open your browser**
+6. **Build semantic intelligence** (optional, requires `sentence-transformers`)
-Visit `http://127.0.0.1:10001`
+```bash
+cda embed build
+```
 ## 🌐 Web UI

code_data_ark-2.0.3/cda/kernel/paths.py ADDED Viewed

@@ -0,0 +1,54 @@
+"""
+cda.kernel.paths — canonical path resolution for Code Data Ark.
+CDA_HOME is the single root for all runtime state (DB, PID files, logs,
+queue, PMF runtime).  It is resolved exactly once at import time via:
+  1. CDA_HOME environment variable (absolute path)
+  2. ~/.cda/  (default — survives pip install, editable install, CI)
+Pipeline stages and the CLI all import from here so every module agrees
+on the same paths regardless of where the package is installed.
+"""
+import os
+from pathlib import Path
+# ── home resolution ──────────────────────────────────────────────────────────
+def get_cda_home() -> Path:
+    """Return the CDA home directory, creating it if it doesn't exist."""
+    env = os.environ.get("CDA_HOME")
+    if env:
+        home = Path(env).expanduser().resolve()
+    else:
+        home = Path.home() / ".cda"
+    home.mkdir(parents=True, exist_ok=True)
+    return home
+# ── canonical paths (module-level constants, computed once) ─────────────────
+CDA_HOME   = get_cda_home()
+LOCAL_DIR  = CDA_HOME                         # CDA_HOME *is* the local root
+DATA_DIR   = CDA_HOME / "data"
+RUN_DIR    = CDA_HOME / "run"
+LOG_DIR    = CDA_HOME / "logs"
+QUEUE_DIR  = CDA_HOME / "queue"
+PMF_DIR    = CDA_HOME / "pmf"
+CONFIG_DIR = CDA_HOME / "config"
+DB_PATH        = DATA_DIR / "cda.db"
+PID_FILE       = RUN_DIR / "watcher.pid"
+UI_PID_FILE    = RUN_DIR / "ui.pid"
+UI_LOG_FILE    = LOG_DIR / "ui.log"
+POLICY_FILE    = CONFIG_DIR / "policy.txt"
+PMF_LOG_DIR    = PMF_DIR / "logs"
+RUNTIME_FILE   = PMF_DIR / "runtime.json"
+def ensure_dirs() -> None:
+    """Create all runtime directories. Safe to call multiple times."""
+    for d in (DATA_DIR, RUN_DIR, LOG_DIR, QUEUE_DIR, PMF_DIR, PMF_LOG_DIR, CONFIG_DIR):
+        d.mkdir(parents=True, exist_ok=True)

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/kernel/pmf_kernel.py RENAMED Viewed

@@ -8,19 +8,16 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import Dict, List, Optional
-ROOT_DIR = Path(__file__).resolve().parent.parent.parent.parent
-LOCAL_DIR = ROOT_DIR / "local"
-PACKAGE_DIR = Path(__file__).resolve().parent
-RUNTIME_FILE = LOCAL_DIR / "pmf" / "runtime.json"
-LOG_DIR = LOCAL_DIR / "pmf" / "logs"
-WATCHER_PID_FILE = LOCAL_DIR / "run" / "watcher.pid"
-UI_PID_FILE = LOCAL_DIR / "run" / "ui.pid"
+from cda.kernel.paths import (
+    LOG_DIR, RUNTIME_FILE, PMF_LOG_DIR,
+    PID_FILE as WATCHER_PID_FILE, UI_PID_FILE, CDA_HOME,
+    ensure_dirs,
+)
 DEFAULT_HOST = "127.0.0.1"
 DEFAULT_PORT = 10001
-(LOCAL_DIR / "data").mkdir(parents=True, exist_ok=True)
-(LOCAL_DIR / "run").mkdir(parents=True, exist_ok=True)
-LOG_DIR.mkdir(parents=True, exist_ok=True)
+ensure_dirs()
 def now_ts():
@@ -58,7 +55,7 @@ class ServiceSpec:
             ]
         if self.service_id == "watcher":
-            return [sys.executable, str(PACKAGE_DIR.parent / "pipeline" / "watcher.py")]
+            return [sys.executable, "-m", "cda.pipeline.watcher"]
         if self.command is not None:
             return list(self.command)
@@ -72,9 +69,9 @@ SERVICE_SPECS: Dict[str, ServiceSpec] = {
         label="Watcher Daemon",
         service_type="daemon",
         description="Live VS Code data watcher and incremental ingest process.",
-        cwd=ROOT_DIR,
+        cwd=CDA_HOME,
         pid_file=WATCHER_PID_FILE,
-        log_file=LOCAL_DIR / "logs" / "watcher.log",
+        log_file=LOG_DIR / "watcher.log",
         allowed_actions=["start", "stop", "restart", "status"],
     ),
     "ui": ServiceSpec(
@@ -82,9 +79,9 @@ SERVICE_SPECS: Dict[str, ServiceSpec] = {
         label="Web UI",
         service_type="daemon",
         description="Local web dashboard for Ark runtime and session analytics.",
-        cwd=ROOT_DIR,
+        cwd=CDA_HOME,
         pid_file=UI_PID_FILE,
-        log_file=LOCAL_DIR / "logs" / "ui.log",
+        log_file=LOG_DIR / "ui.log",
         allowed_actions=["start", "stop", "restart", "status"],
     ),
     "sync": ServiceSpec(
@@ -92,9 +89,9 @@ SERVICE_SPECS: Dict[str, ServiceSpec] = {
         label="Full Sync",
         service_type="task",
         description="Full ingest and rebuild pipeline for Ark data.",
-        command=[sys.executable, str(PACKAGE_DIR.parent / "pipeline" / "ingest.py")],
-        cwd=ROOT_DIR,
-        log_file=LOG_DIR / "sync.log",
+        command=[sys.executable, "-m", "cda.pipeline.ingest"],
+        cwd=CDA_HOME,
+        log_file=PMF_LOG_DIR / "sync.log",
         allowed_actions=["start", "status"],
     ),
     "reconstruct": ServiceSpec(
@@ -102,9 +99,9 @@ SERVICE_SPECS: Dict[str, ServiceSpec] = {
         label="Reconstruct",
         service_type="task",
         description="Reconstruct conversations and rebuild the full text search index.",
-        command=[sys.executable, str(PACKAGE_DIR.parent / "pipeline" / "reconstruct.py")],
-        cwd=ROOT_DIR,
-        log_file=LOG_DIR / "reconstruct.log",
+        command=[sys.executable, "-m", "cda.pipeline.reconstruct"],
+        cwd=CDA_HOME,
+        log_file=PMF_LOG_DIR / "reconstruct.log",
         allowed_actions=["start", "status"],
     ),
     "embed-build": ServiceSpec(
@@ -112,9 +109,9 @@ SERVICE_SPECS: Dict[str, ServiceSpec] = {
         label="Embed Build",
         service_type="task",
         description="Build semantic embeddings and session intelligence.",
-        command=[sys.executable, str(PACKAGE_DIR.parent / "pipeline" / "embed.py"), "build"],
-        cwd=ROOT_DIR,
-        log_file=LOG_DIR / "embed.log",
+        command=[sys.executable, "-m", "cda.pipeline.embed", "build"],
+        cwd=CDA_HOME,
+        log_file=PMF_LOG_DIR / "embed.log",
         allowed_actions=["start", "status"],
     ),
 }
@@ -263,8 +260,8 @@ class PMFKernel:
         with open(log_file, "a") as fh:
             proc = subprocess.Popen(
                 command,
-                cwd=spec.cwd or ROOT_DIR,
-                env={**os.environ, **(spec.env or {}), "PYTHONPATH": str(ROOT_DIR)},
+                cwd=spec.cwd or CDA_HOME,
+                env={**os.environ, **(spec.env or {})},
                 stdout=fh,
                 stderr=fh,
                 preexec_fn=os.setsid if spec.service_type == "daemon" else None,

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/kernel/selfcheck.py RENAMED Viewed

@@ -26,15 +26,12 @@ import subprocess
 import sys
 from pathlib import Path
-# ── paths the system knows about itself ─────────────────────────────────────
+from cda.kernel.paths import DB_PATH, PID_FILE, QUEUE_DIR
 PACKAGE_DIR  = Path(__file__).resolve().parent
 SOURCE_DIR   = PACKAGE_DIR.parent.parent          # source/  — tracked repo root
-PROJECT_DIR  = PACKAGE_DIR.parent.parent.parent   # repo root — where layers live
-LOCAL_DIR    = PROJECT_DIR / "local"
-DB_PATH      = LOCAL_DIR / "data" / "cda.db"
-PID_FILE     = LOCAL_DIR / "run" / "watcher.pid"
-QUEUE_DIR    = LOCAL_DIR / "queue"
 VERSION_FILE = SOURCE_DIR / "version"
+GIT_ROOT     = SOURCE_DIR.parent                   # repo root — used for git check-ignore
 REQUIRED_TABLES = [
     "sessions", "exchanges", "tool_calls", "vfs", "workspaces",
@@ -231,7 +228,7 @@ def check_data_gitignored():
     try:
         result = subprocess.run(
             ["git", "check-ignore", "-q", "local"],
-            cwd=PROJECT_DIR,
+            cwd=GIT_ROOT,
             capture_output=True,
         )
         if result.returncode == 0:

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/pipeline/embed.py RENAMED Viewed

@@ -10,12 +10,10 @@ This stage builds semantic embeddings and mini-intelligence artifacts:
 import json
 import sqlite3
-from pathlib import Path
 from typing import Dict, List, Optional, Tuple
-ROOT_DIR = Path(__file__).resolve().parent.parent.parent.parent
-LOCAL_DIR = ROOT_DIR / "local"
-DB_PATH = LOCAL_DIR / "data" / "cda.db"
+from cda.kernel.paths import DB_PATH
 MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
 MAX_EMBED_TEXT = 1400

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/pipeline/extract.py RENAMED Viewed

@@ -26,9 +26,7 @@ from datetime import datetime
 from typing import Dict, List, Tuple, DefaultDict
 from collections import defaultdict
-ROOT_DIR = Path(__file__).resolve().parent.parent.parent.parent
-LOCAL_DIR = ROOT_DIR / "local"
-DB_PATH = LOCAL_DIR / "data" / "cda.db"
+from cda.kernel.paths import DB_PATH
 # ─────────────────────────────────────────────────────────
 # Signal patterns

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/pipeline/ingest.py RENAMED Viewed

@@ -26,6 +26,11 @@ import time
 import logging
 from pathlib import Path
+from cda.kernel.paths import DB_PATH, ensure_dirs
+# Ensure local dirs are present before writing
+ensure_dirs()
 # Set up logging
 logging.basicConfig(
     level=logging.INFO,
@@ -39,9 +44,6 @@ HOME        = Path.home()
 VSCODE_DATA_DIR = Path(os.environ.get("VSCODE_DATA_DIR", HOME / "Library/Application Support/Code/User"))
 VS_STORAGE  = VSCODE_DATA_DIR / "workspaceStorage"
 GLOBAL_MEM  = VSCODE_DATA_DIR / "globalStorage/github.copilot-chat/memory-tool/memories"
-ROOT_DIR = Path(__file__).resolve().parent.parent.parent.parent
-LOCAL_DIR = ROOT_DIR / "local"
-DB_PATH  = LOCAL_DIR / "data" / "cda.db"
 # Large index DBs — too big to blob, record path only
 SKIP_BLOB_PATTERNS = ["workspace-chunks.db", "local-index"]

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/pipeline/parse_edits.py RENAMED Viewed

@@ -33,11 +33,7 @@ Edit rounds: len(checkpoints) - 1  (first is always "Initial State")
 import sqlite3
 import gzip
 import json
-from pathlib import Path
-ROOT_DIR = Path(__file__).resolve().parent.parent.parent.parent
-LOCAL_DIR = ROOT_DIR / "local"
-DB_PATH = LOCAL_DIR / "data" / "cda.db"
+from cda.kernel.paths import DB_PATH
 SCHEMA = """
 CREATE TABLE IF NOT EXISTS edit_sessions (

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/pipeline/reconstruct.py RENAMED Viewed

@@ -19,9 +19,8 @@ import time
 from typing import Optional
 from pathlib import Path
-ROOT_DIR = Path(__file__).resolve().parent.parent.parent.parent
-LOCAL_DIR = ROOT_DIR / "local"
-DB_PATH = LOCAL_DIR / "data" / "cda.db"
+from cda.kernel.paths import DB_PATH
 NOW_MS  = int(time.time() * 1000)
 EXCHANGES_SCHEMA = """

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/pipeline/watcher.py RENAMED Viewed

@@ -38,17 +38,14 @@ except ImportError:
     print("ERROR: watchfiles not installed. Run: pip install watchfiles")
     sys.exit(1)
-ROOT_DIR  = Path(__file__).resolve().parent.parent.parent.parent
-LOCAL_DIR = ROOT_DIR / "local"
-DB_PATH   = LOCAL_DIR / "data" / "cda.db"
-PID_FILE  = LOCAL_DIR / "run" / "watcher.pid"
-QUEUE_DIR = LOCAL_DIR / "queue"
+from cda.kernel.paths import DB_PATH, PID_FILE, QUEUE_DIR, LOG_DIR, ensure_dirs
 # Allow override via env var for portability
 VSCODE_DATA_DIR = Path(os.environ.get("VSCODE_DATA_DIR", Path.home() / "Library/Application Support/Code/User"))
 VS_ROOT   = VSCODE_DATA_DIR / "workspaceStorage"
 GLOBAL_MEM = VSCODE_DATA_DIR / "globalStorage/github.copilot-chat/memory-tool/memories"
-log_file = LOCAL_DIR / "logs" / "watcher.log"
+ensure_dirs()
+log_file = LOG_DIR / "watcher.log"
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s  %(levelname)-7s  %(message)s",

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/ui/cli.py RENAMED Viewed

@@ -25,6 +25,7 @@ Commands:
   cda pmf restart <service>  Restart a service
   cda pmf logs <service>     Tail service logs
   cda check                  Run a full self-diagnostic. The system checks itself.
+  cda init                   First-run setup — create ~/.cda/ and validate environment
   cda serve                  Start the local web UI on port 10001
   cda sync                   Full re-ingest from disk (rebuilds entire DB)
   cda reconstruct            Re-run reconstruction and FTS rebuild only
@@ -62,22 +63,15 @@ import datetime
 from pathlib import Path
 from cda.pipeline.reconstruct import decompress_vfs
 from cda.kernel.pmf_kernel import PMFKernel, PMFKernelError
+from cda.kernel.paths import (
+    DB_PATH, PID_FILE, UI_PID_FILE, UI_LOG_FILE,
+    QUEUE_DIR, POLICY_FILE, ensure_dirs,
+)
 import click
-# Package-relative paths
-PACKAGE_DIR = Path(__file__).resolve().parent
-ARK_DIR = PACKAGE_DIR.parent.parent.parent
-LOCAL_DIR = ARK_DIR / "local"
-DB_PATH = LOCAL_DIR / "data" / "cda.db"
-PID_FILE = LOCAL_DIR / "run" / "watcher.pid"
-UI_PID_FILE = LOCAL_DIR / "run" / "ui.pid"
-UI_LOG_FILE = LOCAL_DIR / "logs" / "ui.log"
-WATCHER = PACKAGE_DIR.parent / "pipeline" / "watcher.py"
-INGEST = PACKAGE_DIR.parent / "pipeline" / "ingest.py"
-RECON = PACKAGE_DIR.parent / "pipeline" / "reconstruct.py"
-EXTRACT = PACKAGE_DIR.parent / "pipeline" / "extract.py"
-EMBED = PACKAGE_DIR.parent / "pipeline" / "embed.py"
+# Ensure runtime dirs exist on every CLI invocation
+ensure_dirs()
 kernel = PMFKernel()
@@ -333,14 +327,13 @@ def status():
         click.echo(f"  Start with: {bold('cda watch start')}")
     # Queue status
-    queue_dir = LOCAL_DIR / "queue"
-    if queue_dir.exists():
-        pending = len(list(queue_dir.glob("*.json")))
-        completed = len(list(queue_dir.glob("*.completed")))
+    if QUEUE_DIR.exists():
+        pending = len(list(QUEUE_DIR.glob("*.json")))
+        completed = len(list(QUEUE_DIR.glob("*.completed")))
         click.echo(f"  Queue: {pending} pending, {completed} completed")
         if pending > 0:
             # Show last pending operation
-            pending_files = sorted(queue_dir.glob("*.json"))
+            pending_files = sorted(QUEUE_DIR.glob("*.json"))
             if pending_files:
                 try:
                     data = json.loads(pending_files[-1].read_text())
@@ -546,7 +539,7 @@ def embed():
 def embed_build():
     """Build semantic embeddings and session intelligence."""
     click.echo(yellow("  Building semantic intelligence..."))
-    result = subprocess.run([sys.executable, str(EMBED)], capture_output=False)
+    result = subprocess.run([sys.executable, "-m", "cda.pipeline.embed"], capture_output=False)
     if result.returncode == 0:
         click.echo(green("  Embed build complete"))
     else:
@@ -754,7 +747,7 @@ def sync():
     errors = 0
     click.echo(yellow("  Running full ingest — this rewrites the DB..."))
-    result = subprocess.run([sys.executable, str(INGEST)], capture_output=False)
+    result = subprocess.run([sys.executable, "-m", "cda.pipeline.ingest"], capture_output=False)
     if result.returncode != 0:
         click.echo(red("  Ingest failed"))
         finish_run(run_id, stages_done, {}, errors=1, exit_code=1, notes="ingest failed")
@@ -763,7 +756,7 @@ def sync():
     click.echo(green("  Ingest complete"))
     click.echo(yellow("  Running reconstruction..."))
-    result = subprocess.run([sys.executable, str(RECON)], capture_output=False)
+    result = subprocess.run([sys.executable, "-m", "cda.pipeline.reconstruct"], capture_output=False)
     if result.returncode != 0:
         click.echo(red("  Reconstruction failed"))
         finish_run(run_id, stages_done, {}, errors=1, exit_code=1, notes="reconstruct failed")
@@ -772,7 +765,7 @@ def sync():
     click.echo(green("  Reconstruction complete"))
     click.echo(yellow("  Running analysis..."))
-    result = subprocess.run([sys.executable, str(EXTRACT)], capture_output=False)
+    result = subprocess.run([sys.executable, "-m", "cda.pipeline.extract"], capture_output=False)
     if result.returncode != 0:
         click.echo(red("  Analysis failed"))
         finish_run(run_id, stages_done, {}, errors=1, exit_code=1, notes="extract failed")
@@ -781,7 +774,7 @@ def sync():
     click.echo(green("  Analysis complete"))
     click.echo(yellow("  Running semantic intelligence..."))
-    result = subprocess.run([sys.executable, str(EMBED)], capture_output=False)
+    result = subprocess.run([sys.executable, "-m", "cda.pipeline.embed"], capture_output=False)
     if result.returncode != 0:
         click.echo(red("  Semantic intelligence failed"))
         errors += 1
@@ -809,7 +802,7 @@ def sync():
 def reconstruct():
     """Re-run session reconstruction and FTS rebuild only."""
     click.echo(yellow("  Reconstructing exchanges..."))
-    subprocess.run([sys.executable, str(RECON)], capture_output=False)
+    subprocess.run([sys.executable, "-m", "cda.pipeline.reconstruct"], capture_output=False)
     click.echo(green("  Done"))
@@ -1470,9 +1463,8 @@ def policy():
 def policy_allow(pattern):
     """Add an allow pattern for search results."""
     # For now, store in a simple text file
-    policy_file = LOCAL_DIR / "config" / "policy.txt"
     try:
-        with open(policy_file, "a") as f:
+        with open(POLICY_FILE, "a") as f:
             f.write(f"ALLOW {pattern}\n")
         click.echo(green(f"  Added allow pattern: {pattern}"))
     except Exception as e:
@@ -1483,9 +1475,8 @@ def policy_allow(pattern):
 @click.argument("pattern")
 def policy_deny(pattern):
     """Add a deny pattern for search results."""
-    policy_file = LOCAL_DIR / "config" / "policy.txt"
     try:
-        with open(policy_file, "a") as f:
+        with open(POLICY_FILE, "a") as f:
             f.write(f"DENY {pattern}\n")
         click.echo(green(f"  Added deny pattern: {pattern}"))
     except Exception as e:
@@ -1495,8 +1486,7 @@ def policy_deny(pattern):
 @policy.command("list")
 def policy_list():
     """List current policies."""
-    policy_file = LOCAL_DIR / "config" / "policy.txt"
-    if not policy_file.exists():
+    if not POLICY_FILE.exists():
         click.echo(dim("  No policies configured"))
         return
@@ -1504,7 +1494,7 @@ def policy_list():
     click.echo(bold("  Data Access Policies"))
     click.echo(hr())
     try:
-        with open(policy_file, "r") as f:
+        with open(POLICY_FILE, "r") as f:
             for line in f:
                 line = line.strip()
                 if line.startswith("ALLOW "):
@@ -1518,14 +1508,13 @@ def policy_list():
 def check_policy(text):
     """Check if text passes policy filters. Returns True if allowed."""
-    policy_file = LOCAL_DIR / "config" / "policy.txt"
-    if not policy_file.exists():
+    if not POLICY_FILE.exists():
         return True  # No policies = allow all
     allow_patterns = []
     deny_patterns = []
     try:
-        with open(policy_file, "r") as f:
+        with open(POLICY_FILE, "r") as f:
             for line in f:
                 line = line.strip()
                 if line.startswith("ALLOW "):
@@ -2574,6 +2563,55 @@ def check(as_json, fail_fast):
     sys.exit(0 if passed_all else 1)
+# ─────────────────────────────────────────────
+# INIT
+# ─────────────────────────────────────────────
+@cli.command("init")
+def init():
+    """First-run setup — create ~/.cda/ directory structure and validate environment."""
+    from cda.kernel.paths import (
+        CDA_HOME, DATA_DIR, RUN_DIR, LOG_DIR, QUEUE_DIR,
+        PMF_DIR, PMF_LOG_DIR, CONFIG_DIR, POLICY_FILE,
+    )
+    import os
+    click.echo()
+    click.echo(bold("  Code Data Ark — init"))
+    click.echo(hr())
+    # Create directory tree
+    dirs = [DATA_DIR, RUN_DIR, LOG_DIR, QUEUE_DIR, PMF_DIR, PMF_LOG_DIR, CONFIG_DIR]
+    for d in dirs:
+        d.mkdir(parents=True, exist_ok=True)
+        click.echo(green(f"  {d}"))
+    # Write a starter policy file if none exists
+    if not POLICY_FILE.exists():
+        POLICY_FILE.write_text("# CDA access policy\n# ALLOW <pattern>\n# DENY  <pattern>\n")
+        click.echo(green(f"  {POLICY_FILE}  (created)"))
+    # Validate VS Code data dir
+    vscode_data = Path(os.environ.get(
+        "VSCODE_DATA_DIR",
+        Path.home() / "Library/Application Support/Code/User",
+    ))
+    if vscode_data.exists():
+        click.echo(green(f"  VS Code data dir: {vscode_data}"))
+    else:
+        click.echo(yellow(f"  VS Code data dir not found: {vscode_data}"))
+        click.echo(yellow("  Set VSCODE_DATA_DIR if your data is elsewhere."))
+    click.echo()
+    click.echo(bold("  CDA_HOME: ") + str(CDA_HOME))
+    click.echo()
+    click.echo(dim("  Next steps:"))
+    click.echo(dim("    cda sync         — ingest all VS Code session data"))
+    click.echo(dim("    cda watch start  — start the live watcher daemon"))
+    click.echo(dim("    cda serve        — open the web dashboard on :10001"))
+    click.echo()
 # ─────────────────────────────────────────────
 # ENTRY
 # ─────────────────────────────────────────────

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/ui/web.py RENAMED Viewed

@@ -11,18 +11,14 @@ import threading
 import time
 import traceback
 import subprocess
+import sys
 import socket
 from typing import Any, Dict
-from pathlib import Path
 from datetime import datetime
 from wsgiref.simple_server import make_server, WSGIServer
 from urllib.parse import parse_qs
 from cda.kernel.pmf_kernel import PMFKernel
-# Get DB path relative to this file
-PACKAGE_DIR = Path(__file__).resolve().parent
-LOCAL_DIR = PACKAGE_DIR.parent.parent.parent / "local"
-DB_PATH = LOCAL_DIR / "data" / "cda.db"
+from cda.kernel.paths import DB_PATH
 kernel = PMFKernel()
 # ─────────────────────────────────────────────
@@ -1396,28 +1392,28 @@ def run_action_background(action_id, action_name):
     try:
         if action_name == "sync":
             result = subprocess.run(
-                ["python3", str(PACKAGE_DIR.parent / "pipeline" / "ingest.py")],
+                [sys.executable, "-m", "cda.pipeline.ingest"],
                 capture_output=True,
                 text=True,
                 timeout=300
             )
         elif action_name == "reconstruct":
             result = subprocess.run(
-                ["python3", str(PACKAGE_DIR.parent / "pipeline" / "reconstruct.py")],
+                [sys.executable, "-m", "cda.pipeline.reconstruct"],
                 capture_output=True,
                 text=True,
                 timeout=300
             )
         elif action_name == "embed-build":
             result = subprocess.run(
-                ["python3", str(PACKAGE_DIR.parent / "pipeline" / "embed.py"), "build"],
+                [sys.executable, "-m", "cda.pipeline.embed", "build"],
                 capture_output=True,
                 text=True,
                 timeout=600
             )
         elif action_name == "watch-start":
             result = subprocess.run(
-                ["python3", str(PACKAGE_DIR.parent / "pipeline" / "watcher.py"), "start"],
+                [sys.executable, "-m", "cda.pipeline.watcher", "start"],
                 capture_output=True,
                 text=True,
                 timeout=30

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/changelog.md RENAMED Viewed

@@ -5,6 +5,21 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [2.0.3] - 2026-05-11
+### Fixed
+- **Install path resolution**: `LOCAL_DIR`/`DB_PATH` no longer derived from `__file__` — now resolves to `~/.cda/` (or `$CDA_HOME`). Survives `pip install` into site-packages.
+- All pipeline stages (`ingest`, `reconstruct`, `extract`, `embed`, `watcher`, `parse_edits`) import canonical paths from new `cda.kernel.paths` module.
+- `PMFKernel` and `selfcheck` updated to use `cda.kernel.paths`.
+- All subprocess pipeline invocations switched from script file paths to `python -m cda.pipeline.<stage>` module calls.
+### Added
+- `cda.kernel.paths` — single source of truth for `CDA_HOME`, `DB_PATH`, `PID_FILE`, `QUEUE_DIR`, `POLICY_FILE`, `ensure_dirs()`.
+- `cda init` command — first-run setup: creates `~/.cda/` directory tree, writes starter policy, validates VS Code data path.
+### Changed
+- README quickstart now reflects correct install flow: `pip install` → `cda init` → `cda sync` → `cda watch start` → `cda serve`.
 ## [2.0.2] - 2026-05-11
 ### Fixed

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "code-data-ark"
-version = "2.0.2"
+version = "2.0.3"
 description = "Code Data Ark — local observability and intelligence platform for VS Code + Copilot Chat sessions"
 readme = "readme.md"
 license = "MIT"

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/readme.md RENAMED Viewed

@@ -78,39 +78,41 @@ make install-dev
 ## ⚡ Quick Start
-1. **Initialize the database**
+1. **Install**
 ```bash
-cda sync
+pip install code-data-ark
 ```
-2. **Start the watcher daemon**
+2. **Initialize — create `~/.cda/` and validate your VS Code data path**
 ```bash
-cda watch start
+cda init
 ```
-3. **Inspect the PMF runtime services**
+3. **Ingest all VS Code session data**
 ```bash
-cda pmf services
+cda sync
 ```
-4. **Build semantic intelligence**
+4. **Start the live watcher daemon**
 ```bash
-cda embed build
+cda watch start
 ```
-4. **Start the web UI**
+5. **Open the web dashboard**
 ```bash
-cda ui start
+cda serve   # → http://127.0.0.1:10001
 ```
-5. **Open your browser**
+6. **Build semantic intelligence** (optional, requires `sentence-transformers`)
-Visit `http://127.0.0.1:10001`
+```bash
+cda embed build
+```
 ## 🌐 Web UI

code_data_ark-2.0.3/version ADDED Viewed

	@@ -0,0 +1 @@
1	+ 2.0.3

code_data_ark-2.0.2/version DELETED Viewed

	@@ -1 +0,0 @@
1	- 2.0.2

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/.flake8 RENAMED Viewed

File without changes

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/.github/workflows/ci.yml RENAMED Viewed

File without changes

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/.gitignore RENAMED Viewed

File without changes

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/bin/release.py RENAMED Viewed

File without changes

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/__init__.py RENAMED Viewed

File without changes

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/kernel/__init__.py RENAMED Viewed

File without changes

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/kernel/control_db.py RENAMED Viewed

File without changes

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/pipeline/__init__.py RENAMED Viewed

File without changes

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/cda/ui/__init__.py RENAMED Viewed

File without changes

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/contributing.md RENAMED Viewed

File without changes

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/docs/architecture.md RENAMED Viewed

File without changes

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/docs/examples/usage.md RENAMED Viewed

File without changes

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/docs/pmf_kernel.md RENAMED Viewed

File without changes

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/docs/roadmap.md RENAMED Viewed

File without changes

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/license RENAMED Viewed

File without changes

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/makefile RENAMED Viewed

File without changes

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/tests/test_basic.py RENAMED Viewed

File without changes

{code_data_ark-2.0.2 → code_data_ark-2.0.3}/tests/test_selfcheck.py RENAMED Viewed

File without changes

code-data-ark 2.0.2__tar.gz → 2.0.3__tar.gz

code-data-ark 2.0.2tar.gz → 2.0.3tar.gz