code-data-ark 2.0.6__tar.gz → 2.0.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/PKG-INFO +1 -1
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/__init__.py +1 -1
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/kernel/pmf_kernel.py +8 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/ui/web.py +110 -46
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/pyproject.toml +1 -1
- code_data_ark-2.0.7/version +1 -0
- code_data_ark-2.0.6/version +0 -1
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/.flake8 +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/.github/workflows/ci.yml +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/.gitignore +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/bin/release.py +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/__main__.py +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/kernel/__init__.py +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/kernel/control_db.py +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/kernel/paths.py +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/kernel/selfcheck.py +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/pipeline/__init__.py +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/pipeline/embed.py +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/pipeline/extract.py +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/pipeline/ingest.py +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/pipeline/parse_edits.py +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/pipeline/reconstruct.py +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/pipeline/watcher.py +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/ui/__init__.py +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/ui/cli.py +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/changelog.md +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/contributing.md +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/docs/architecture.md +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/docs/examples/usage.md +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/docs/pmf_kernel.md +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/docs/roadmap.md +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/license +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/makefile +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/readme.md +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/tests/test_basic.py +0 -0
- {code_data_ark-2.0.6 → code_data_ark-2.0.7}/tests/test_selfcheck.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: code-data-ark
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.7
|
|
4
4
|
Summary: Code Data Ark — local observability and intelligence platform for VS Code + Copilot Chat sessions
|
|
5
5
|
Project-URL: Homepage, https://github.com/goCosmix/cda
|
|
6
6
|
Project-URL: Repository, https://github.com/goCosmix/cda.git
|
|
@@ -421,6 +421,14 @@ class PMFKernel:
|
|
|
421
421
|
pass
|
|
422
422
|
time.sleep(0.25)
|
|
423
423
|
wait_seconds += 0.25
|
|
424
|
+
# Process didn't write its own pid file — write it now using the
|
|
425
|
+
# spawned process's PID so status checks work correctly.
|
|
426
|
+
if self._is_process_alive(proc.pid):
|
|
427
|
+
try:
|
|
428
|
+
spec.pid_file.write_text(str(proc.pid))
|
|
429
|
+
state["pid"] = proc.pid
|
|
430
|
+
except Exception:
|
|
431
|
+
pass
|
|
424
432
|
|
|
425
433
|
if spec.service_type == "daemon":
|
|
426
434
|
state["status"] = "running"
|
|
@@ -1008,7 +1008,7 @@ def get_overview():
|
|
|
1008
1008
|
{("(SELECT AVG(heat_score) FROM session_analysis WHERE heat_score IS NOT NULL)" if has_analysis else "0")} as avg_heat,
|
|
1009
1009
|
{("(SELECT COUNT(*) FROM session_analysis WHERE heat_score >= 50)" if has_analysis else "0")} as critical_sessions,
|
|
1010
1010
|
{("(SELECT COUNT(*) FROM anomaly_alerts)" if has_alerts else "0")} as alert_count,
|
|
1011
|
-
(SELECT COUNT(
|
|
1011
|
+
(SELECT COUNT(*) FROM workspaces) as workspace_count,
|
|
1012
1012
|
(SELECT MAX(created_at) FROM sessions) as last_session
|
|
1013
1013
|
""")
|
|
1014
1014
|
|
|
@@ -1041,10 +1041,11 @@ def get_overview():
|
|
|
1041
1041
|
LIMIT 15
|
|
1042
1042
|
""")) if has_signals else []
|
|
1043
1043
|
|
|
1044
|
+
exchange_count_expr = "(SELECT COUNT(*) FROM exchanges WHERE exchanges.session_id = s.session_id)" if has_exchanges else "0"
|
|
1044
1045
|
if has_analysis:
|
|
1045
|
-
recent = safe_rows(query_rows("""
|
|
1046
|
+
recent = safe_rows(query_rows(f"""
|
|
1046
1047
|
SELECT s.session_id as id, s.title, sa.heat_score,
|
|
1047
|
-
{
|
|
1048
|
+
{exchange_count_expr} as exchange_count,
|
|
1048
1049
|
s.created_at
|
|
1049
1050
|
FROM sessions s
|
|
1050
1051
|
LEFT JOIN session_analysis sa ON sa.session_id = s.session_id
|
|
@@ -1052,9 +1053,9 @@ def get_overview():
|
|
|
1052
1053
|
LIMIT 10
|
|
1053
1054
|
"""))
|
|
1054
1055
|
else:
|
|
1055
|
-
recent = safe_rows(query_rows("""
|
|
1056
|
+
recent = safe_rows(query_rows(f"""
|
|
1056
1057
|
SELECT s.session_id as id, s.title, NULL as heat_score,
|
|
1057
|
-
{
|
|
1058
|
+
{exchange_count_expr} as exchange_count,
|
|
1058
1059
|
s.created_at
|
|
1059
1060
|
FROM sessions s
|
|
1060
1061
|
ORDER BY s.created_at DESC
|
|
@@ -1161,7 +1162,7 @@ def get_session_detail(session_id):
|
|
|
1161
1162
|
signals = safe_rows(query_rows("""
|
|
1162
1163
|
SELECT * FROM exchange_signals
|
|
1163
1164
|
WHERE session_id = ?
|
|
1164
|
-
ORDER BY
|
|
1165
|
+
ORDER BY ts DESC
|
|
1165
1166
|
""", (session_id,))) if has_signals else []
|
|
1166
1167
|
|
|
1167
1168
|
signal_summary = safe_rows(query_rows("""
|
|
@@ -1195,18 +1196,20 @@ def get_search_results(query, limit=50):
|
|
|
1195
1196
|
"""Full-text search across exchanges."""
|
|
1196
1197
|
try:
|
|
1197
1198
|
results = query_rows("""
|
|
1198
|
-
SELECT
|
|
1199
|
-
|
|
1199
|
+
SELECT
|
|
1200
|
+
e.session_id,
|
|
1200
1201
|
s.title,
|
|
1201
|
-
|
|
1202
|
+
sa.heat_score,
|
|
1202
1203
|
e.id as exchange_id,
|
|
1203
|
-
e.
|
|
1204
|
-
e.
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
JOIN
|
|
1209
|
-
|
|
1204
|
+
e.exchange_index,
|
|
1205
|
+
e.user_message,
|
|
1206
|
+
e.response_text,
|
|
1207
|
+
e.user_ts
|
|
1208
|
+
FROM fts_exchanges fts
|
|
1209
|
+
JOIN exchanges e ON fts.rowid = e.id
|
|
1210
|
+
JOIN sessions s ON e.session_id = s.session_id
|
|
1211
|
+
LEFT JOIN session_analysis sa ON sa.session_id = e.session_id
|
|
1212
|
+
WHERE fts_exchanges MATCH ?
|
|
1210
1213
|
ORDER BY rank
|
|
1211
1214
|
LIMIT ?
|
|
1212
1215
|
""", (query, limit))
|
|
@@ -1219,13 +1222,11 @@ def get_workspaces():
|
|
|
1219
1222
|
"""List all workspaces with session counts."""
|
|
1220
1223
|
try:
|
|
1221
1224
|
workspaces = query_rows("""
|
|
1222
|
-
SELECT
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
FROM
|
|
1226
|
-
|
|
1227
|
-
GROUP BY workspace_id
|
|
1228
|
-
ORDER BY session_count DESC
|
|
1225
|
+
SELECT w.workspace_id, w.uri, w.name, w.type, w.session_count,
|
|
1226
|
+
(SELECT MAX(s.created_at) FROM sessions s
|
|
1227
|
+
WHERE s.workspace_id = w.workspace_id) as last_session
|
|
1228
|
+
FROM workspaces w
|
|
1229
|
+
ORDER BY w.session_count DESC
|
|
1229
1230
|
""")
|
|
1230
1231
|
return {"workspaces": workspaces}
|
|
1231
1232
|
except Exception as e:
|
|
@@ -1253,9 +1254,9 @@ def get_memory():
|
|
|
1253
1254
|
"""Get all memory files."""
|
|
1254
1255
|
try:
|
|
1255
1256
|
memory = query_rows("""
|
|
1256
|
-
SELECT id,
|
|
1257
|
+
SELECT id, scope, workspace_id, session_id, filename, size_bytes, ingested_at
|
|
1257
1258
|
FROM memory_files
|
|
1258
|
-
ORDER BY
|
|
1259
|
+
ORDER BY ingested_at DESC
|
|
1259
1260
|
""")
|
|
1260
1261
|
return {"memory": memory}
|
|
1261
1262
|
except Exception as e:
|
|
@@ -1267,21 +1268,25 @@ def get_tool_calls(query_str=None, limit=50):
|
|
|
1267
1268
|
try:
|
|
1268
1269
|
if query_str:
|
|
1269
1270
|
results = query_rows("""
|
|
1270
|
-
SELECT tc
|
|
1271
|
+
SELECT tc.id, tc.session_id, tc.exchange_index, tc.request_id,
|
|
1272
|
+
tc.tool_call_id, tc.tool_name, tc.file_path,
|
|
1273
|
+
tc.arguments_json, tc.has_output, tc.ingested_at,
|
|
1274
|
+
s.title as session_title
|
|
1271
1275
|
FROM tool_calls tc
|
|
1272
|
-
JOIN
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
ORDER BY tc.created_at DESC
|
|
1276
|
+
JOIN sessions s ON tc.session_id = s.session_id
|
|
1277
|
+
WHERE tc.tool_name LIKE ? OR tc.arguments_json LIKE ?
|
|
1278
|
+
ORDER BY tc.ingested_at DESC
|
|
1276
1279
|
LIMIT ?
|
|
1277
1280
|
""", (f"%{query_str}%", f"%{query_str}%", limit))
|
|
1278
1281
|
else:
|
|
1279
1282
|
results = query_rows("""
|
|
1280
|
-
SELECT tc
|
|
1283
|
+
SELECT tc.id, tc.session_id, tc.exchange_index, tc.request_id,
|
|
1284
|
+
tc.tool_call_id, tc.tool_name, tc.file_path,
|
|
1285
|
+
tc.arguments_json, tc.has_output, tc.ingested_at,
|
|
1286
|
+
s.title as session_title
|
|
1281
1287
|
FROM tool_calls tc
|
|
1282
|
-
JOIN
|
|
1283
|
-
|
|
1284
|
-
ORDER BY tc.created_at DESC
|
|
1288
|
+
JOIN sessions s ON tc.session_id = s.session_id
|
|
1289
|
+
ORDER BY tc.ingested_at DESC
|
|
1285
1290
|
LIMIT ?
|
|
1286
1291
|
""", (limit,))
|
|
1287
1292
|
return {"tool_calls": results, "query": query_str, "count": len(results)}
|
|
@@ -1293,10 +1298,11 @@ def get_vfs(session_id):
|
|
|
1293
1298
|
"""List VFS files for a session."""
|
|
1294
1299
|
try:
|
|
1295
1300
|
vfs = query_rows("""
|
|
1296
|
-
SELECT id, session_id,
|
|
1301
|
+
SELECT id, session_id, source_type, source_path, filename,
|
|
1302
|
+
content_type, size_bytes, sha256, ingested_at
|
|
1297
1303
|
FROM vfs
|
|
1298
1304
|
WHERE session_id = ?
|
|
1299
|
-
ORDER BY
|
|
1305
|
+
ORDER BY filename
|
|
1300
1306
|
""", (session_id,))
|
|
1301
1307
|
return {"vfs": vfs, "session_id": session_id}
|
|
1302
1308
|
except Exception as e:
|
|
@@ -1354,17 +1360,26 @@ def get_tokens(session_id=None):
|
|
|
1354
1360
|
if session_id:
|
|
1355
1361
|
tokens = query_rows("""
|
|
1356
1362
|
SELECT
|
|
1357
|
-
SUM(
|
|
1358
|
-
|
|
1359
|
-
|
|
1363
|
+
SUM(prompt_tokens) as total_prompt,
|
|
1364
|
+
SUM(completion_tokens) as total_completion,
|
|
1365
|
+
SUM(cached_tokens) as total_cached,
|
|
1366
|
+
SUM(prompt_tokens + completion_tokens) as total_tokens,
|
|
1367
|
+
COUNT(*) as turn_count,
|
|
1368
|
+
GROUP_CONCAT(DISTINCT model_id) as models
|
|
1369
|
+
FROM token_usage
|
|
1360
1370
|
WHERE session_id = ?
|
|
1361
1371
|
""", (session_id,))
|
|
1362
1372
|
else:
|
|
1363
1373
|
tokens = query_rows("""
|
|
1364
1374
|
SELECT
|
|
1365
|
-
SUM(
|
|
1366
|
-
|
|
1367
|
-
|
|
1375
|
+
SUM(prompt_tokens) as total_prompt,
|
|
1376
|
+
SUM(completion_tokens) as total_completion,
|
|
1377
|
+
SUM(cached_tokens) as total_cached,
|
|
1378
|
+
SUM(prompt_tokens + completion_tokens) as total_tokens,
|
|
1379
|
+
COUNT(*) as turn_count,
|
|
1380
|
+
COUNT(DISTINCT session_id) as session_count,
|
|
1381
|
+
GROUP_CONCAT(DISTINCT model_id) as models
|
|
1382
|
+
FROM token_usage
|
|
1368
1383
|
""")
|
|
1369
1384
|
return {"tokens": tokens}
|
|
1370
1385
|
except Exception as e:
|
|
@@ -1726,11 +1741,10 @@ def render_tokens():
|
|
|
1726
1741
|
return """
|
|
1727
1742
|
<div class="page-header">
|
|
1728
1743
|
<div class="page-title">Token Usage</div>
|
|
1729
|
-
<div class="page-subtitle">Token consumption
|
|
1730
|
-
</div>
|
|
1731
|
-
<div class="card">
|
|
1732
|
-
<p>Token usage analysis coming soon.</p>
|
|
1744
|
+
<div class="page-subtitle">Token consumption across all sessions.</div>
|
|
1733
1745
|
</div>
|
|
1746
|
+
<div id="tokens-summary" class="loading"><div class="spinner"></div>Loading...</div>
|
|
1747
|
+
<div id="tokens-table" style="margin-top:16px"></div>
|
|
1734
1748
|
"""
|
|
1735
1749
|
|
|
1736
1750
|
|
|
@@ -1910,6 +1924,9 @@ function initializePage(page) {
|
|
|
1910
1924
|
case 'alerts':
|
|
1911
1925
|
initAlerts();
|
|
1912
1926
|
break;
|
|
1927
|
+
case 'tokens':
|
|
1928
|
+
initTokens();
|
|
1929
|
+
break;
|
|
1913
1930
|
case 'pipeline':
|
|
1914
1931
|
initPipeline();
|
|
1915
1932
|
break;
|
|
@@ -2358,6 +2375,46 @@ function initKeywords() {
|
|
|
2358
2375
|
});
|
|
2359
2376
|
}
|
|
2360
2377
|
|
|
2378
|
+
function initTokens() {
|
|
2379
|
+
const summary = document.getElementById('tokens-summary');
|
|
2380
|
+
const table = document.getElementById('tokens-table');
|
|
2381
|
+
if (!summary) return;
|
|
2382
|
+
summary.innerHTML = '<div class="spinner"></div> Loading...';
|
|
2383
|
+
fetch('/api/tokens').then(r => r.json()).then(data => {
|
|
2384
|
+
const t = (data.tokens || [])[0] || {};
|
|
2385
|
+
const fmt = n => (n || 0).toLocaleString();
|
|
2386
|
+
summary.innerHTML = `
|
|
2387
|
+
<div class="grid-4">
|
|
2388
|
+
<div class="card"><div class="card-header">Total Tokens</div><div class="card-value">${fmt(t.total_tokens)}</div></div>
|
|
2389
|
+
<div class="card"><div class="card-header">Prompt</div><div class="card-value">${fmt(t.total_prompt)}</div></div>
|
|
2390
|
+
<div class="card"><div class="card-header">Completion</div><div class="card-value">${fmt(t.total_completion)}</div></div>
|
|
2391
|
+
<div class="card"><div class="card-header">Cached</div><div class="card-value">${fmt(t.total_cached)}</div></div>
|
|
2392
|
+
<div class="card"><div class="card-header">Sessions</div><div class="card-value">${fmt(t.session_count)}</div></div>
|
|
2393
|
+
<div class="card"><div class="card-header">Turns</div><div class="card-value">${fmt(t.turn_count)}</div></div>
|
|
2394
|
+
</div>
|
|
2395
|
+
<div class="card" style="margin-top:12px"><b>Models:</b> ${t.models || 'n/a'}</div>
|
|
2396
|
+
`;
|
|
2397
|
+
}).catch(() => {
|
|
2398
|
+
summary.innerHTML = '<div class="alert alert-danger">Failed to load token data.</div>';
|
|
2399
|
+
});
|
|
2400
|
+
if (table) {
|
|
2401
|
+
table.innerHTML = '<div class="spinner"></div> Loading sessions...';
|
|
2402
|
+
const sql = 'SELECT s.title, tu.session_id, SUM(tu.prompt_tokens) as prompt, SUM(tu.completion_tokens) as completion, SUM(tu.cached_tokens) as cached, SUM(tu.prompt_tokens + tu.completion_tokens) as total, COUNT(*) as turns, GROUP_CONCAT(DISTINCT tu.model_id) as models FROM token_usage tu JOIN sessions s ON tu.session_id = s.session_id GROUP BY tu.session_id ORDER BY total DESC LIMIT 50';
|
|
2403
|
+
fetch('/api/query', {method: 'POST', headers: {'Content-Type': 'application/json'}, body: JSON.stringify({sql: sql})})
|
|
2404
|
+
.then(r => r.json()).then(data => {
|
|
2405
|
+
const rows = data.rows || [];
|
|
2406
|
+
if (!rows.length) { table.innerHTML = '<p>No per-session data.</p>'; return; }
|
|
2407
|
+
const fmt = n => (n || 0).toLocaleString();
|
|
2408
|
+
let html = '<div class="card"><div class="card-header">Top Sessions by Token Usage</div><table class="table"><thead><tr><th>Session</th><th>Total</th><th>Prompt</th><th>Completion</th><th>Cached</th><th>Turns</th><th>Models</th></tr></thead><tbody>';
|
|
2409
|
+
rows.forEach(r => {
|
|
2410
|
+
html += '<tr><td class="truncate">' + (r.title || r.session_id) + '</td><td>' + fmt(r.total) + '</td><td>' + fmt(r.prompt) + '</td><td>' + fmt(r.completion) + '</td><td>' + fmt(r.cached) + '</td><td>' + r.turns + '</td><td class="truncate">' + (r.models || '') + '</td></tr>';
|
|
2411
|
+
});
|
|
2412
|
+
html += '</tbody></table></div>';
|
|
2413
|
+
table.innerHTML = html;
|
|
2414
|
+
}).catch(() => { table.innerHTML = '<p>Failed to load session breakdown.</p>'; });
|
|
2415
|
+
}
|
|
2416
|
+
}
|
|
2417
|
+
|
|
2361
2418
|
function initWorkspaces() {
|
|
2362
2419
|
const container = document.getElementById('workspaces-content');
|
|
2363
2420
|
if (!container) return;
|
|
@@ -2737,6 +2794,13 @@ def application(environ, start_response):
|
|
|
2737
2794
|
start_response('200 OK', [('Content-Type', 'application/json')])
|
|
2738
2795
|
return [response]
|
|
2739
2796
|
|
|
2797
|
+
elif path == '/api/tokens':
|
|
2798
|
+
session_id = query.get('session_id', [None])[0]
|
|
2799
|
+
data = get_tokens(session_id)
|
|
2800
|
+
response = json.dumps(data).encode('utf-8')
|
|
2801
|
+
start_response('200 OK', [('Content-Type', 'application/json')])
|
|
2802
|
+
return [response]
|
|
2803
|
+
|
|
2740
2804
|
elif path == '/api/alerts':
|
|
2741
2805
|
data = get_alerts()
|
|
2742
2806
|
response = json.dumps(data).encode('utf-8')
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "code-data-ark"
|
|
7
|
-
version = "2.0.
|
|
7
|
+
version = "2.0.7"
|
|
8
8
|
description = "Code Data Ark — local observability and intelligence platform for VS Code + Copilot Chat sessions"
|
|
9
9
|
readme = "readme.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
2.0.7
|
code_data_ark-2.0.6/version
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
2.0.6
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|