code-data-ark 2.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_data_ark-2.0.2/.flake8 +19 -0
- code_data_ark-2.0.2/.github/workflows/ci.yml +63 -0
- code_data_ark-2.0.2/.gitignore +27 -0
- code_data_ark-2.0.2/PKG-INFO +495 -0
- code_data_ark-2.0.2/bin/release.py +87 -0
- code_data_ark-2.0.2/cda/__init__.py +3 -0
- code_data_ark-2.0.2/cda/kernel/__init__.py +0 -0
- code_data_ark-2.0.2/cda/kernel/control_db.py +151 -0
- code_data_ark-2.0.2/cda/kernel/pmf_kernel.py +364 -0
- code_data_ark-2.0.2/cda/kernel/selfcheck.py +299 -0
- code_data_ark-2.0.2/cda/pipeline/__init__.py +0 -0
- code_data_ark-2.0.2/cda/pipeline/embed.py +694 -0
- code_data_ark-2.0.2/cda/pipeline/extract.py +1064 -0
- code_data_ark-2.0.2/cda/pipeline/ingest.py +673 -0
- code_data_ark-2.0.2/cda/pipeline/parse_edits.py +250 -0
- code_data_ark-2.0.2/cda/pipeline/reconstruct.py +536 -0
- code_data_ark-2.0.2/cda/pipeline/watcher.py +783 -0
- code_data_ark-2.0.2/cda/ui/__init__.py +0 -0
- code_data_ark-2.0.2/cda/ui/cli.py +2587 -0
- code_data_ark-2.0.2/cda/ui/web.py +2848 -0
- code_data_ark-2.0.2/changelog.md +142 -0
- code_data_ark-2.0.2/contributing.md +297 -0
- code_data_ark-2.0.2/docs/architecture.md +503 -0
- code_data_ark-2.0.2/docs/examples/usage.md +418 -0
- code_data_ark-2.0.2/docs/pmf_kernel.md +98 -0
- code_data_ark-2.0.2/docs/roadmap.md +82 -0
- code_data_ark-2.0.2/license +21 -0
- code_data_ark-2.0.2/makefile +58 -0
- code_data_ark-2.0.2/pyproject.toml +104 -0
- code_data_ark-2.0.2/readme.md +452 -0
- code_data_ark-2.0.2/tests/test_basic.py +115 -0
- code_data_ark-2.0.2/tests/test_selfcheck.py +352 -0
- code_data_ark-2.0.2/version +1 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
[flake8]
|
|
2
|
+
max-line-length = 150
|
|
3
|
+
extend-ignore =
|
|
4
|
+
# Intentional aligned assignments (project style)
|
|
5
|
+
E221,
|
|
6
|
+
E272,
|
|
7
|
+
# Continuation line indent (black manages this)
|
|
8
|
+
E128,
|
|
9
|
+
# Inline comment spacing (minor)
|
|
10
|
+
E261,
|
|
11
|
+
# Multiple statements on one line (minor)
|
|
12
|
+
E701,
|
|
13
|
+
# Missing whitespace around operator in annotations
|
|
14
|
+
E225,
|
|
15
|
+
# Line break before binary operator (black preference)
|
|
16
|
+
W503,
|
|
17
|
+
per-file-ignores =
|
|
18
|
+
# Conditional import at bottom of watcher.py main guard
|
|
19
|
+
cda/pipeline/watcher.py:E402
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [ main, master ]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [ main, master ]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.9", "3.10", "3.11", "3.12"]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
19
|
+
uses: actions/setup-python@v4
|
|
20
|
+
with:
|
|
21
|
+
python-version: ${{ matrix.python-version }}
|
|
22
|
+
- name: Install dependencies
|
|
23
|
+
run: |
|
|
24
|
+
python -m pip install --upgrade pip
|
|
25
|
+
pip install -e ".[dev]"
|
|
26
|
+
- name: Lint with flake8
|
|
27
|
+
run: |
|
|
28
|
+
flake8 cda tests
|
|
29
|
+
- name: Type check with mypy
|
|
30
|
+
run: |
|
|
31
|
+
mypy cda
|
|
32
|
+
- name: Test with pytest
|
|
33
|
+
run: |
|
|
34
|
+
pytest tests/ -v --cov=cda --cov-report=xml
|
|
35
|
+
- name: Upload coverage to Codecov
|
|
36
|
+
uses: codecov/codecov-action@v3
|
|
37
|
+
with:
|
|
38
|
+
file: coverage.xml
|
|
39
|
+
flags: unittests
|
|
40
|
+
name: codecov-umbrella
|
|
41
|
+
fail_ci_if_error: false
|
|
42
|
+
|
|
43
|
+
build:
|
|
44
|
+
runs-on: ubuntu-latest
|
|
45
|
+
needs: test
|
|
46
|
+
|
|
47
|
+
steps:
|
|
48
|
+
- uses: actions/checkout@v4
|
|
49
|
+
- name: Set up Python
|
|
50
|
+
uses: actions/setup-python@v4
|
|
51
|
+
with:
|
|
52
|
+
python-version: "3.11"
|
|
53
|
+
- name: Install build dependencies
|
|
54
|
+
run: |
|
|
55
|
+
python -m pip install --upgrade pip
|
|
56
|
+
pip install build
|
|
57
|
+
- name: Build package
|
|
58
|
+
run: python -m build
|
|
59
|
+
- name: Store build artifacts
|
|
60
|
+
uses: actions/upload-artifact@v4
|
|
61
|
+
with:
|
|
62
|
+
name: dist
|
|
63
|
+
path: dist/
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Build outputs
|
|
2
|
+
dist/
|
|
3
|
+
build/
|
|
4
|
+
*.egg-info/
|
|
5
|
+
|
|
6
|
+
# Python
|
|
7
|
+
__pycache__/
|
|
8
|
+
*.py[cod]
|
|
9
|
+
*.pyo
|
|
10
|
+
|
|
11
|
+
# Environment
|
|
12
|
+
.env
|
|
13
|
+
.venv/
|
|
14
|
+
venv/
|
|
15
|
+
env/
|
|
16
|
+
|
|
17
|
+
# Type checking
|
|
18
|
+
.mypy_cache/
|
|
19
|
+
|
|
20
|
+
# Test / coverage
|
|
21
|
+
.pytest_cache/
|
|
22
|
+
.coverage
|
|
23
|
+
coverage.xml
|
|
24
|
+
htmlcov/
|
|
25
|
+
|
|
26
|
+
# Editors
|
|
27
|
+
.DS_Store
|
|
@@ -0,0 +1,495 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: code-data-ark
|
|
3
|
+
Version: 2.0.2
|
|
4
|
+
Summary: Code Data Ark — local observability and intelligence platform for VS Code + Copilot Chat sessions
|
|
5
|
+
Project-URL: Homepage, https://github.com/goCosmix/cda
|
|
6
|
+
Project-URL: Repository, https://github.com/goCosmix/cda.git
|
|
7
|
+
Project-URL: Issues, https://github.com/goCosmix/cda/issues
|
|
8
|
+
Project-URL: Documentation, https://github.com/goCosmix/cda#readme
|
|
9
|
+
Project-URL: Changelog, https://github.com/goCosmix/cda/blob/main/changelog.md
|
|
10
|
+
Author-email: Ernie Butcher <ernie@fiosii.com>
|
|
11
|
+
Maintainer-email: Ernie Butcher <ernie@fiosii.com>
|
|
12
|
+
License-Expression: MIT
|
|
13
|
+
License-File: license
|
|
14
|
+
Keywords: ai,analysis,behavioral,chat,conversation,copilot,heat-score,signals,vscode
|
|
15
|
+
Classifier: Development Status :: 3 - Alpha
|
|
16
|
+
Classifier: Intended Audience :: Developers
|
|
17
|
+
Classifier: Intended Audience :: Science/Research
|
|
18
|
+
Classifier: Operating System :: OS Independent
|
|
19
|
+
Classifier: Programming Language :: Python :: 3
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
24
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
25
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
26
|
+
Classifier: Topic :: System :: Logging
|
|
27
|
+
Requires-Python: >=3.9
|
|
28
|
+
Requires-Dist: click>=8.0
|
|
29
|
+
Requires-Dist: numpy>=1.26
|
|
30
|
+
Requires-Dist: sentence-transformers>=2.2.2
|
|
31
|
+
Requires-Dist: watchfiles>=0.20
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Requires-Dist: black; extra == 'dev'
|
|
34
|
+
Requires-Dist: flake8; extra == 'dev'
|
|
35
|
+
Requires-Dist: isort; extra == 'dev'
|
|
36
|
+
Requires-Dist: mypy; extra == 'dev'
|
|
37
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
38
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
39
|
+
Provides-Extra: test
|
|
40
|
+
Requires-Dist: pytest-cov; extra == 'test'
|
|
41
|
+
Requires-Dist: pytest>=7.0; extra == 'test'
|
|
42
|
+
Description-Content-Type: text/markdown
|
|
43
|
+
|
|
44
|
+
# Code Data Ark
|
|
45
|
+
|
|
46
|
+
[](https://www.python.org/downloads/)
|
|
47
|
+
[](https://pypi.org/project/code-data-ark)
|
|
48
|
+
[](https://opensource.org/licenses/MIT)
|
|
49
|
+
|
|
50
|
+
**Code Data Ark** (`cda`) is a local observability and intelligence platform for VS Code + GitHub Copilot Chat sessions. It ingests everything VS Code writes to disk — transcripts, tool calls, VFS blobs, workspace state — and runs a multi-stage pipeline to turn that raw activity into structured data you can actually reason about.
|
|
51
|
+
|
|
52
|
+
The core insight is that your chat history is not just logs. It carries behavioral signals: moments you corrected the agent, redirected it, expressed frustration, or confirmed that something finally worked. Ark extracts those signals, scores session quality with a heat model, and surfaces the patterns — so you can understand how you work with AI, not just what was said.
|
|
53
|
+
|
|
54
|
+
On top of that signal layer, Ark builds a semantic intelligence layer: embeddings over all your sessions, full-text and code-symbol search, anomaly alerts, session summaries, and related-session discovery. All of this lives in a local SQLite database, queryable via a 40+ command CLI or a background web dashboard.
|
|
55
|
+
|
|
56
|
+
The runtime is managed by an embedded process kernel (PMF) that supervises the watcher daemon, web UI, and pipeline tasks as background services — giving the whole system a lifecycle you can control without touching a process manager.
|
|
57
|
+
|
|
58
|
+
**In short**: point it at your VS Code data directory, run `cda sync`, and you have a searchable, annotated, semantically indexed record of every Copilot session you've ever had — with behavioral scores and anomaly detection included.
|
|
59
|
+
|
|
60
|
+
## ✨ Key Capabilities
|
|
61
|
+
|
|
62
|
+
- **Multi-stage pipeline**: ingest → reconstruct → extract → embed — each stage enriches the data further
|
|
63
|
+
- **Behavioral signal detection**: 200+ keyword patterns across 6 signal types; frustration, correction, recovery
|
|
64
|
+
- **Heat scoring**: weighted session quality score (0–100) that tracks arc from friction to resolution
|
|
65
|
+
- **Semantic search**: miniLM embeddings over all sessions for similarity, related-session discovery, and topic clustering
|
|
66
|
+
- **Full-text search**: FTS5 index over all exchanges, tool calls, and code symbols
|
|
67
|
+
- **Live watcher daemon**: monitors VS Code directories, queues changes, replays on crash
|
|
68
|
+
- **Background web UI**: session drilldown, signal summaries, alert views, tool-call detail, VFS inspection
|
|
69
|
+
- **PMF Embedded Kernel**: local service lifecycle management — start, stop, restart, status for all Ark daemons
|
|
70
|
+
- **Export workflows**: JSON, JSONL, and plain-text session export
|
|
71
|
+
|
|
72
|
+
## 📋 Table of Contents
|
|
73
|
+
|
|
74
|
+
- [Installation](#installation)
|
|
75
|
+
- [Quick Start](#quick-start)
|
|
76
|
+
- [Web UI](#web-ui)
|
|
77
|
+
- [CLI Reference](#cli-reference)
|
|
78
|
+
- [Architecture](#architecture)
|
|
79
|
+
- [Roadmap](#roadmap)
|
|
80
|
+
- [Configuration](#configuration)
|
|
81
|
+
- [Development](#development)
|
|
82
|
+
- [Contributing](#contributing)
|
|
83
|
+
- [License](#license)
|
|
84
|
+
|
|
85
|
+
## 🚀 Installation
|
|
86
|
+
|
|
87
|
+
### Prerequisites
|
|
88
|
+
|
|
89
|
+
- Python 3.8+
|
|
90
|
+
- VS Code with the Copilot Chat extension installed
|
|
91
|
+
|
|
92
|
+
### Install from PyPI
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
pip install code-data-ark
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Install with pipx
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
pipx install code-data-ark
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Install from source
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
git clone https://github.com/goCosmix/cda.git
|
|
108
|
+
cd cda/source
|
|
109
|
+
pip install -e .
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Install development dependencies
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
pip install -e ".[dev]"
|
|
116
|
+
# or
|
|
117
|
+
make install-dev
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
> The `cda` console command is installed into your active Python environment's `bin` directory. Activate your virtual environment before running `cda`.
|
|
121
|
+
|
|
122
|
+
## ⚡ Quick Start
|
|
123
|
+
|
|
124
|
+
1. **Initialize the database**
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
cda sync
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
2. **Start the watcher daemon**
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
cda watch start
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
3. **Inspect the PMF runtime services**
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
cda pmf services
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
4. **Build semantic intelligence**
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
cda embed build
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
4. **Start the web UI**
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
cda ui start
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
5. **Open your browser**
|
|
155
|
+
|
|
156
|
+
Visit `http://127.0.0.1:10001`
|
|
157
|
+
|
|
158
|
+
## 🌐 Web UI
|
|
159
|
+
|
|
160
|
+
- **Background service**: `cda ui start`
|
|
161
|
+
- **Stop service**: `cda ui stop`
|
|
162
|
+
- **Service status**: `cda ui status`
|
|
163
|
+
- **Foreground mode**: `cda serve`
|
|
164
|
+
|
|
165
|
+
The web UI includes:
|
|
166
|
+
|
|
167
|
+
- Session drilldown panels and charts
|
|
168
|
+
- Behavioral signal summaries
|
|
169
|
+
- Alert and recommendation views
|
|
170
|
+
- Searchable transcript and tool-call detail
|
|
171
|
+
- File/VFS browsing and raw session inspection
|
|
172
|
+
|
|
173
|
+
## 🧠 Core Features
|
|
174
|
+
|
|
175
|
+
- Behavioral signals with 200+ keyword patterns across six categories
|
|
176
|
+
- Frustration heat scoring and recovery analytics
|
|
177
|
+
- Full-text search and semantic search with embeddings
|
|
178
|
+
- Code symbol indexing for Python/JS/TS
|
|
179
|
+
- Incremental ingestion with crash-resilient queue replay
|
|
180
|
+
- Export workflows for JSON, JSONL, and text
|
|
181
|
+
|
|
182
|
+
## 📦 Package and Release
|
|
183
|
+
|
|
184
|
+
- Published on PyPI as `code-data-ark`
|
|
185
|
+
- Current release version: `2.0.2`
|
|
186
|
+
- CLI entry point: `cda`
|
|
187
|
+
- License: MIT
|
|
188
|
+
|
|
189
|
+
## 🛣 Roadmap
|
|
190
|
+
|
|
191
|
+
See `docs/roadmap.md` for product direction, milestone planning, and release priorities.
|
|
192
|
+
|
|
193
|
+
## 🤝 Contributing
|
|
194
|
+
|
|
195
|
+
See `contributing.md` for development setup, test guidance, and PR workflow.
|
|
196
|
+
|
|
197
|
+
## 📜 License
|
|
198
|
+
|
|
199
|
+
This project is licensed under the MIT License.
|
|
200
|
+
|
|
201
|
+
## 🧠 SQLite limits and mitigation
|
|
202
|
+
|
|
203
|
+
- **Single writer in WAL mode**: the system uses one writer process for ingest/reconstruct/extract/embed and allows many concurrent readers via SQLite WAL.
|
|
204
|
+
- **Large VFS blob handling**: for very large raw artifacts, the clean approach is chunked storage or external file references instead of a single enormous BLOB.
|
|
205
|
+
- **Default 8KB page size / cache**: this code now sets `PRAGMA cache_size=-2000`, `PRAGMA mmap_size=268435456`, and `PRAGMA temp_store=MEMORY` to improve read/cache performance on larger databases.
|
|
206
|
+
- **Further tuning**: rebuild the DB with a larger page size (e.g. `PRAGMA page_size=32768`) if you need more efficient storage for very large session history.
|
|
207
|
+
|
|
208
|
+
## 🔧 Configuration
|
|
209
|
+
|
|
210
|
+
- **VS Code Data Directory**: By default, assumes macOS paths (`~/Library/Application Support/Code/User`). Override with `export VSCODE_DATA_DIR=/path/to/vscode/data` (e.g., on Linux: `~/.config/Code/User`).
|
|
211
|
+
- **No other config needed**: Everything is CLI-driven with local SQLite.
|
|
212
|
+
|
|
213
|
+
## 🏗️ Architecture
|
|
214
|
+
|
|
215
|
+
```
|
|
216
|
+
VS Code Storage → ingest.py → vfs + sessions + transcripts
|
|
217
|
+
↓
|
|
218
|
+
reconstruct.py → exchanges (structured conversations)
|
|
219
|
+
↓
|
|
220
|
+
extract.py → signals + tokens + heat scores + analysis
|
|
221
|
+
↓
|
|
222
|
+
embed.py → semantic embeddings + summaries + alerts
|
|
223
|
+
↓
|
|
224
|
+
watcher.py → live sync + FTS indexing + queue resilience
|
|
225
|
+
↓
|
|
226
|
+
cda → query interface + policy enforcement
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
### Core Components
|
|
230
|
+
|
|
231
|
+
| Component | Purpose | Key Features |
|
|
232
|
+
|-----------|---------|--------------|
|
|
233
|
+
| **pipeline/ingest.py** | Data ingestion | VFS storage, gzip compression, session metadata |
|
|
234
|
+
| **pipeline/reconstruct.py** | Conversation processing | Exchange threading, tool call linking, FTS indexing |
|
|
235
|
+
| **pipeline/extract.py** | Signal analysis | Behavioral pattern recognition, heat scoring, token accounting |
|
|
236
|
+
| **pipeline/watcher.py** | Live monitoring | File watching, incremental updates, crash recovery |
|
|
237
|
+
| **pipeline/embed.py** | Semantic intelligence | Embeddings, session summaries, anomaly alerts |
|
|
238
|
+
| **kernel/pmf_kernel.py** | Service management | Daemon lifecycle, PID/log tracking, runtime state |
|
|
239
|
+
| **kernel/selfcheck.py** | System diagnostics | Health checks, install validation, DB integrity |
|
|
240
|
+
| **ui/cli.py** | CLI entry point | 40+ commands, policy filtering, rich formatting |
|
|
241
|
+
| **ui/web.py** | Web dashboard | Browser UI for all CLI features, service control |
|
|
242
|
+
|
|
243
|
+
### Database Schema
|
|
244
|
+
|
|
245
|
+
- **workspaces** - VS Code workspace metadata
|
|
246
|
+
- **sessions** - Chat session information and metadata
|
|
247
|
+
- **vfs** - Gzip-compressed file storage with SHA256 hashes
|
|
248
|
+
- **exchanges** - Structured conversation turns with tool calls
|
|
249
|
+
- **exchange_signals** - Behavioral signal annotations
|
|
250
|
+
- **symbols** - Code symbol index (functions, classes, etc.)
|
|
251
|
+
- **token_usage** - Per-request token consumption tracking
|
|
252
|
+
- **compactions** - Context window summarization events
|
|
253
|
+
- **session_analysis** - Aggregated session metrics and heat scores
|
|
254
|
+
|
|
255
|
+
## 🖥️ CLI Reference
|
|
256
|
+
|
|
257
|
+
### Core Commands
|
|
258
|
+
|
|
259
|
+
```bash
|
|
260
|
+
# System Management
|
|
261
|
+
cda status # Show daemon status and queue information
|
|
262
|
+
cda stats # System-wide statistics and coverage
|
|
263
|
+
cda sync # Full data ingestion and rebuild
|
|
264
|
+
cda reconstruct # Rebuild conversations and search index
|
|
265
|
+
cda pmf services # List embedded PMF runtime services
|
|
266
|
+
cda pmf status [service] # Show runtime status for PMF services
|
|
267
|
+
cda pmf start <service> # Start a PMF-managed Ark service
|
|
268
|
+
cda pmf stop <service> # Stop a PMF-managed Ark service
|
|
269
|
+
cda pmf restart <service> # Restart a PMF-managed Ark service
|
|
270
|
+
cda pmf logs <service> # Tail runtime logs for a PMF service
|
|
271
|
+
|
|
272
|
+
# Session Analysis
|
|
273
|
+
cda sessions # List all sessions (newest first)
|
|
274
|
+
cda session <id> # Show detailed session information
|
|
275
|
+
cda workspace <id> # Show sessions for a workspace
|
|
276
|
+
cda workspaces # List all workspaces
|
|
277
|
+
|
|
278
|
+
# Search & Query
|
|
279
|
+
cda search <query> # Full-text search across conversations
|
|
280
|
+
cda code-search <pattern> [--symbol] [--regex] # Search code symbols or code content
|
|
281
|
+
cda semantic-search <query> # Semantic search using embeddings
|
|
282
|
+
cda similar <session> # Find sessions similar to a session
|
|
283
|
+
cda related <session> # Alias for semantic related sessions
|
|
284
|
+
cda summarize <session> # Show session summary, topics, and recommendations
|
|
285
|
+
cda topics # List semantic topic tags
|
|
286
|
+
cda alerts <session> # Show semantic anomaly alerts
|
|
287
|
+
cda recommend <session> # Show session recommendations
|
|
288
|
+
cda tools <query> # Search tool call arguments
|
|
289
|
+
cda memory # Show memory files and global state
|
|
290
|
+
|
|
291
|
+
# Behavioral Analysis
|
|
292
|
+
cda signals [session] # Show behavioral signals
|
|
293
|
+
cda heat [session] # Frustration and heat analysis
|
|
294
|
+
cda behavior # Aggregate behavioral intelligence
|
|
295
|
+
cda saved # Sessions that recovered from high heat
|
|
296
|
+
|
|
297
|
+
# Data Export
|
|
298
|
+
cda export <session> # Export session as JSON/JSONL/text
|
|
299
|
+
cda replay <session> # Print conversation as readable text
|
|
300
|
+
|
|
301
|
+
# Advanced
|
|
302
|
+
cda query <sql> # Execute raw SQL queries
|
|
303
|
+
cda tokens [session] # Token usage analysis
|
|
304
|
+
cda compactions [session] # Context compaction events
|
|
305
|
+
cda edits # Edit session analytics
|
|
306
|
+
|
|
307
|
+
# Policy Management
|
|
308
|
+
cda policy allow <pattern> # Add allow pattern
|
|
309
|
+
cda policy deny <pattern> # Add deny pattern
|
|
310
|
+
cda policy list # Show current policies
|
|
311
|
+
|
|
312
|
+
# Live Monitoring
|
|
313
|
+
cda watch start # Start watcher daemon
|
|
314
|
+
cda watch stop # Stop watcher daemon
|
|
315
|
+
cda watch restart # Restart watcher daemon
|
|
316
|
+
cda ui start # Start web UI background service
|
|
317
|
+
cda ui stop # Stop web UI background service
|
|
318
|
+
cda ui status # Show web UI background service status
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
### Command Examples
|
|
322
|
+
|
|
323
|
+
```bash
|
|
324
|
+
# Search for error handling discussions
|
|
325
|
+
cda search "error handling" --limit 20
|
|
326
|
+
|
|
327
|
+
# Find sessions with high frustration
|
|
328
|
+
cda heat --limit 10
|
|
329
|
+
|
|
330
|
+
# Search for specific functions in code
|
|
331
|
+
cda code-search "def process_data" --symbol
|
|
332
|
+
|
|
333
|
+
# Search code content with regex or plain text
|
|
334
|
+
cda code-search "timeout" --regex
|
|
335
|
+
|
|
336
|
+
# Find semantically related sessions
|
|
337
|
+
cda related abc123
|
|
338
|
+
|
|
339
|
+
# Summarize a session with semantic topics and recommendations
|
|
340
|
+
cda summarize abc123
|
|
341
|
+
|
|
342
|
+
# Export a session for external analysis
|
|
343
|
+
cda export abc123 --format jsonl --output session.jsonl
|
|
344
|
+
|
|
345
|
+
# Monitor live sessions
|
|
346
|
+
cda watch start
|
|
347
|
+
cda status # Check queue status
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
## 📊 Data Analysis
|
|
351
|
+
|
|
352
|
+
### Behavioral Signals
|
|
353
|
+
|
|
354
|
+
The system recognizes 6 signal types with 200+ keyword patterns:
|
|
355
|
+
|
|
356
|
+
| Signal Type | Weight | Description | Example Keywords |
|
|
357
|
+
|-------------|--------|-------------|------------------|
|
|
358
|
+
| **correction** | 3 | User correcting agent behavior | "stop", "wrong", "nope", "wait" |
|
|
359
|
+
| **pre_correction** | 2 | Early frustration signs | "actually", "hold on", "slow down" |
|
|
360
|
+
| **redirect** | 1 | User changing direction | "pivot", "change direction", "instead" |
|
|
361
|
+
| **affirmation** | 0 | Positive feedback | "good", "right", "perfect", "thanks" |
|
|
362
|
+
| **approval** | 0 | Task completion approval | "that works", "looks good", "approved" |
|
|
363
|
+
| **frustration** | 5 | Strong negative signals | "this is broken", "not working", "terrible" |
|
|
364
|
+
|
|
365
|
+
### Heat Score Algorithm
|
|
366
|
+
|
|
367
|
+
```
|
|
368
|
+
Heat Score = min(100, Σ(signal_weights))
|
|
369
|
+
```
|
|
370
|
+
|
|
371
|
+
- **Peak Heat**: Maximum heat reached in session
|
|
372
|
+
- **Final Heat**: Heat at session end
|
|
373
|
+
- **Recovery**: Sessions that return to low heat after high peaks
|
|
374
|
+
- **Saved Sessions**: High-heat sessions that recover with affirmations
|
|
375
|
+
|
|
376
|
+
### Token Usage Tracking
|
|
377
|
+
|
|
378
|
+
- Per-request token consumption (prompt + completion)
|
|
379
|
+
- Model identification and version tracking
|
|
380
|
+
- Context compaction event logging
|
|
381
|
+
- Cost estimation capabilities
|
|
382
|
+
|
|
383
|
+
## ⚙️ Configuration
|
|
384
|
+
|
|
385
|
+
### Automatic Detection
|
|
386
|
+
|
|
387
|
+
Code Data Ark automatically detects paths using standard locations:
|
|
388
|
+
|
|
389
|
+
- **macOS**: `~/Library/Application Support/Code/User/`
|
|
390
|
+
- **Windows**: `%APPDATA%\Code\User\`
|
|
391
|
+
- **Linux**: `~/.config/Code/User/`
|
|
392
|
+
|
|
393
|
+
### Environment Variables
|
|
394
|
+
|
|
395
|
+
```bash
|
|
396
|
+
export CDA_DB=/path/to/custom.db # Custom database location
|
|
397
|
+
export CDA_CONFIG=/path/to/config # Custom config directory
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
### Policy Configuration
|
|
401
|
+
|
|
402
|
+
Data access policies are stored in `policy.txt`:
|
|
403
|
+
|
|
404
|
+
```
|
|
405
|
+
ALLOW important-project
|
|
406
|
+
DENY sensitive-data
|
|
407
|
+
ALLOW *.py
|
|
408
|
+
```
|
|
409
|
+
|
|
410
|
+
## 🔧 Development
|
|
411
|
+
|
|
412
|
+
### Setup Development Environment
|
|
413
|
+
|
|
414
|
+
```bash
|
|
415
|
+
pip install -e ".[dev]"
|
|
416
|
+
```
|
|
417
|
+
|
|
418
|
+
### Running Tests
|
|
419
|
+
|
|
420
|
+
```bash
|
|
421
|
+
pytest tests/ -q
|
|
422
|
+
```
|
|
423
|
+
|
|
424
|
+
### Code Quality
|
|
425
|
+
|
|
426
|
+
```bash
|
|
427
|
+
flake8 cda tests
|
|
428
|
+
mypy cda
|
|
429
|
+
```
|
|
430
|
+
|
|
431
|
+
### Building
|
|
432
|
+
|
|
433
|
+
```bash
|
|
434
|
+
python -m build
|
|
435
|
+
```
|
|
436
|
+
|
|
437
|
+
### Project Structure
|
|
438
|
+
|
|
439
|
+
```
|
|
440
|
+
cda/
|
|
441
|
+
├── .gitignore
|
|
442
|
+
├── source/ # all tracked code (pushed to git)
|
|
443
|
+
│ ├── cda/
|
|
444
|
+
│ │ ├── pipeline/ # ingest, reconstruct, extract, embed, watcher, parse_edits
|
|
445
|
+
│ │ ├── ui/ # cli, web
|
|
446
|
+
│ │ └── kernel/ # pmf_kernel, selfcheck
|
|
447
|
+
│ ├── bin/release.py
|
|
448
|
+
│ ├── tests/
|
|
449
|
+
│ ├── docs/
|
|
450
|
+
│ └── pyproject.toml
|
|
451
|
+
├── local/ # runtime state (gitignored, host-only)
|
|
452
|
+
│ ├── data/ # cda.db
|
|
453
|
+
│ ├── logs/
|
|
454
|
+
│ ├── queue/
|
|
455
|
+
│ ├── run/
|
|
456
|
+
│ ├── config/
|
|
457
|
+
│ └── pmf/
|
|
458
|
+
└── control/ # management artifacts (gitignored, host-only)
|
|
459
|
+
├── data/ # control.db
|
|
460
|
+
├── scripts/
|
|
461
|
+
├── audit/
|
|
462
|
+
└── scan/
|
|
463
|
+
```
|
|
464
|
+
|
|
465
|
+
## 🤝 Contributing
|
|
466
|
+
|
|
467
|
+
1. Fork the repository
|
|
468
|
+
2. Create a feature branch: `git checkout -b feature/amazing-feature`
|
|
469
|
+
3. Make your changes and add tests
|
|
470
|
+
4. Run the test suite: `make test`
|
|
471
|
+
5. Format code: `make format`
|
|
472
|
+
6. Commit your changes: `git commit -m 'Add amazing feature'`
|
|
473
|
+
7. Push to the branch: `git push origin feature/amazing-feature`
|
|
474
|
+
8. Open a Pull Request
|
|
475
|
+
|
|
476
|
+
### Development Guidelines
|
|
477
|
+
|
|
478
|
+
- **Tests**: Unit tests for all new functionality
|
|
479
|
+
- **Linting**: Code must pass `flake8` and `mypy` before pushing
|
|
480
|
+
- **Versioning**: Keep `version`, `pyproject.toml`, and `changelog.md` in sync
|
|
481
|
+
|
|
482
|
+
## 📝 License
|
|
483
|
+
|
|
484
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
485
|
+
|
|
486
|
+
## 🙏 Acknowledgments
|
|
487
|
+
|
|
488
|
+
- Built for analyzing VS Code/Copilot Chat interaction patterns
|
|
489
|
+
- Inspired by the need for better human-AI interaction insights
|
|
490
|
+
- Uses SQLite FTS5 for high-performance full-text search
|
|
491
|
+
- Implements behavioral signal processing for conversation analysis
|
|
492
|
+
|
|
493
|
+
---
|
|
494
|
+
|
|
495
|
+
**Code Data Ark** (`cda`) - Understanding the human side of AI conversations.
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
import argparse
|
|
3
|
+
import re
|
|
4
|
+
import subprocess
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
ROOT = Path(__file__).resolve().parent.parent
|
|
8
|
+
VERSION_FILE = ROOT / "version"
|
|
9
|
+
PYPROJECT_FILE = ROOT / "pyproject.toml"
|
|
10
|
+
INIT_FILE = ROOT / "cda" / "__init__.py"
|
|
11
|
+
|
|
12
|
+
VERSION_PATTERN = r"\d+\.\d+\.\d+"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def read_version():
|
|
16
|
+
text = VERSION_FILE.read_text().strip()
|
|
17
|
+
if not re.fullmatch(VERSION_PATTERN, text):
|
|
18
|
+
raise SystemExit(f"VERSION file must contain a semantic version, found: {text}")
|
|
19
|
+
return text
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def write_version(version: str):
|
|
23
|
+
VERSION_FILE.write_text(f"{version}\n")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def replace_in_file(path: Path, pattern: str, replacement: str, multiline: bool = False):
|
|
27
|
+
text = path.read_text()
|
|
28
|
+
flags = re.MULTILINE if multiline else 0
|
|
29
|
+
new_text, count = re.subn(pattern, replacement, text, flags=flags)
|
|
30
|
+
if count == 0:
|
|
31
|
+
raise SystemExit(f"Pattern not found in {path}: {pattern}")
|
|
32
|
+
path.write_text(new_text)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def sync_version(version: str):
|
|
36
|
+
replace_in_file(PYPROJECT_FILE, r'^(version\s*=\s*")' + VERSION_PATTERN + r'(")', rf'\g<1>{version}\g<2>', multiline=True)
|
|
37
|
+
replace_in_file(INIT_FILE, r'^(\s*__version__\s*=\s*")' + VERSION_PATTERN + r'(")', rf'\g<1>{version}\g<2>', multiline=True)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def git_command(args, check=True):
|
|
41
|
+
subprocess.run(["git"] + args, cwd=ROOT, check=check)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def build_package():
|
|
45
|
+
subprocess.run(["python", "-m", "build", "--sdist", "--wheel"], cwd=ROOT, check=True)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def publish_package():
|
|
49
|
+
subprocess.run(["python", "-m", "twine", "upload", "dist/*"], cwd=ROOT, check=True)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def main():
|
|
53
|
+
parser = argparse.ArgumentParser(description="Release management for code-data-ark")
|
|
54
|
+
parser.add_argument("--set-version", help="Set a new version and update all version sources")
|
|
55
|
+
parser.add_argument("--sync", action="store_true", help="Sync version sources from VERSION file")
|
|
56
|
+
parser.add_argument("--tag", action="store_true", help="Create a git tag for the current version")
|
|
57
|
+
parser.add_argument("--push", action="store_true", help="Push current branch and tags to origin")
|
|
58
|
+
parser.add_argument("--build", action="store_true", help="Build source and wheel distributions")
|
|
59
|
+
parser.add_argument("--publish", action="store_true", help="Publish built distributions to PyPI")
|
|
60
|
+
args = parser.parse_args()
|
|
61
|
+
|
|
62
|
+
version = args.set_version or read_version()
|
|
63
|
+
if args.set_version:
|
|
64
|
+
write_version(version)
|
|
65
|
+
|
|
66
|
+
if args.sync or args.set_version:
|
|
67
|
+
sync_version(version)
|
|
68
|
+
|
|
69
|
+
if args.tag:
|
|
70
|
+
git_command(["tag", "-a", f"v{version}", "-m", f"Release v{version}"])
|
|
71
|
+
|
|
72
|
+
if args.build:
|
|
73
|
+
build_package()
|
|
74
|
+
|
|
75
|
+
if args.publish:
|
|
76
|
+
publish_package()
|
|
77
|
+
|
|
78
|
+
if args.push:
|
|
79
|
+
git_command(["push", "origin", "HEAD"])
|
|
80
|
+
if args.tag:
|
|
81
|
+
git_command(["push", "origin", "--tags"])
|
|
82
|
+
|
|
83
|
+
print(f"Release process completed for version {version}.")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
if __name__ == "__main__":
|
|
87
|
+
main()
|
|
File without changes
|