code-data-ark 2.0.6__tar.gz → 2.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/PKG-INFO +1 -1
  2. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/__init__.py +1 -1
  3. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/kernel/pmf_kernel.py +8 -0
  4. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/ui/web.py +110 -46
  5. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/pyproject.toml +1 -1
  6. code_data_ark-2.0.7/version +1 -0
  7. code_data_ark-2.0.6/version +0 -1
  8. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/.flake8 +0 -0
  9. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/.github/workflows/ci.yml +0 -0
  10. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/.gitignore +0 -0
  11. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/bin/release.py +0 -0
  12. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/__main__.py +0 -0
  13. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/kernel/__init__.py +0 -0
  14. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/kernel/control_db.py +0 -0
  15. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/kernel/paths.py +0 -0
  16. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/kernel/selfcheck.py +0 -0
  17. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/pipeline/__init__.py +0 -0
  18. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/pipeline/embed.py +0 -0
  19. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/pipeline/extract.py +0 -0
  20. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/pipeline/ingest.py +0 -0
  21. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/pipeline/parse_edits.py +0 -0
  22. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/pipeline/reconstruct.py +0 -0
  23. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/pipeline/watcher.py +0 -0
  24. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/ui/__init__.py +0 -0
  25. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/cda/ui/cli.py +0 -0
  26. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/changelog.md +0 -0
  27. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/contributing.md +0 -0
  28. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/docs/architecture.md +0 -0
  29. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/docs/examples/usage.md +0 -0
  30. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/docs/pmf_kernel.md +0 -0
  31. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/docs/roadmap.md +0 -0
  32. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/license +0 -0
  33. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/makefile +0 -0
  34. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/readme.md +0 -0
  35. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/tests/test_basic.py +0 -0
  36. {code_data_ark-2.0.6 → code_data_ark-2.0.7}/tests/test_selfcheck.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: code-data-ark
3
- Version: 2.0.6
3
+ Version: 2.0.7
4
4
  Summary: Code Data Ark — local observability and intelligence platform for VS Code + Copilot Chat sessions
5
5
  Project-URL: Homepage, https://github.com/goCosmix/cda
6
6
  Project-URL: Repository, https://github.com/goCosmix/cda.git
@@ -1,3 +1,3 @@
1
1
  """Code Data Ark — local observability and intelligence platform for VS Code + Copilot Chat sessions."""
2
2
 
3
- __version__ = "2.0.2"
3
+ __version__ = "2.0.7"
@@ -421,6 +421,14 @@ class PMFKernel:
421
421
  pass
422
422
  time.sleep(0.25)
423
423
  wait_seconds += 0.25
424
+ # Process didn't write its own pid file — write it now using the
425
+ # spawned process's PID so status checks work correctly.
426
+ if self._is_process_alive(proc.pid):
427
+ try:
428
+ spec.pid_file.write_text(str(proc.pid))
429
+ state["pid"] = proc.pid
430
+ except Exception:
431
+ pass
424
432
 
425
433
  if spec.service_type == "daemon":
426
434
  state["status"] = "running"
@@ -1008,7 +1008,7 @@ def get_overview():
1008
1008
  {("(SELECT AVG(heat_score) FROM session_analysis WHERE heat_score IS NOT NULL)" if has_analysis else "0")} as avg_heat,
1009
1009
  {("(SELECT COUNT(*) FROM session_analysis WHERE heat_score >= 50)" if has_analysis else "0")} as critical_sessions,
1010
1010
  {("(SELECT COUNT(*) FROM anomaly_alerts)" if has_alerts else "0")} as alert_count,
1011
- (SELECT COUNT(DISTINCT workspace_id) FROM sessions) as workspace_count,
1011
+ (SELECT COUNT(*) FROM workspaces) as workspace_count,
1012
1012
  (SELECT MAX(created_at) FROM sessions) as last_session
1013
1013
  """)
1014
1014
 
@@ -1041,10 +1041,11 @@ def get_overview():
1041
1041
  LIMIT 15
1042
1042
  """)) if has_signals else []
1043
1043
 
1044
+ exchange_count_expr = "(SELECT COUNT(*) FROM exchanges WHERE exchanges.session_id = s.session_id)" if has_exchanges else "0"
1044
1045
  if has_analysis:
1045
- recent = safe_rows(query_rows("""
1046
+ recent = safe_rows(query_rows(f"""
1046
1047
  SELECT s.session_id as id, s.title, sa.heat_score,
1047
- {("(SELECT COUNT(*) FROM exchanges WHERE exchanges.session_id = s.session_id)" if has_exchanges else "0")} as exchange_count,
1048
+ {exchange_count_expr} as exchange_count,
1048
1049
  s.created_at
1049
1050
  FROM sessions s
1050
1051
  LEFT JOIN session_analysis sa ON sa.session_id = s.session_id
@@ -1052,9 +1053,9 @@ def get_overview():
1052
1053
  LIMIT 10
1053
1054
  """))
1054
1055
  else:
1055
- recent = safe_rows(query_rows("""
1056
+ recent = safe_rows(query_rows(f"""
1056
1057
  SELECT s.session_id as id, s.title, NULL as heat_score,
1057
- {("(SELECT COUNT(*) FROM exchanges WHERE exchanges.session_id = s.session_id)" if has_exchanges else "0")} as exchange_count,
1058
+ {exchange_count_expr} as exchange_count,
1058
1059
  s.created_at
1059
1060
  FROM sessions s
1060
1061
  ORDER BY s.created_at DESC
@@ -1161,7 +1162,7 @@ def get_session_detail(session_id):
1161
1162
  signals = safe_rows(query_rows("""
1162
1163
  SELECT * FROM exchange_signals
1163
1164
  WHERE session_id = ?
1164
- ORDER BY created_at DESC
1165
+ ORDER BY ts DESC
1165
1166
  """, (session_id,))) if has_signals else []
1166
1167
 
1167
1168
  signal_summary = safe_rows(query_rows("""
@@ -1195,18 +1196,20 @@ def get_search_results(query, limit=50):
1195
1196
  """Full-text search across exchanges."""
1196
1197
  try:
1197
1198
  results = query_rows("""
1198
- SELECT DISTINCT
1199
- s.id as session_id,
1199
+ SELECT
1200
+ e.session_id,
1200
1201
  s.title,
1201
- s.heat_score,
1202
+ sa.heat_score,
1202
1203
  e.id as exchange_id,
1203
- e.user_input,
1204
- e.assistant_response,
1205
- RANK() OVER (ORDER BY rank) as relevance
1206
- FROM sessions s
1207
- JOIN exchanges e ON s.id = e.session_id
1208
- JOIN full_text_search fts ON e.id = fts.exchange_id
1209
- WHERE fts.full_text_search MATCH ?
1204
+ e.exchange_index,
1205
+ e.user_message,
1206
+ e.response_text,
1207
+ e.user_ts
1208
+ FROM fts_exchanges fts
1209
+ JOIN exchanges e ON fts.rowid = e.id
1210
+ JOIN sessions s ON e.session_id = s.session_id
1211
+ LEFT JOIN session_analysis sa ON sa.session_id = e.session_id
1212
+ WHERE fts_exchanges MATCH ?
1210
1213
  ORDER BY rank
1211
1214
  LIMIT ?
1212
1215
  """, (query, limit))
@@ -1219,13 +1222,11 @@ def get_workspaces():
1219
1222
  """List all workspaces with session counts."""
1220
1223
  try:
1221
1224
  workspaces = query_rows("""
1222
- SELECT DISTINCT workspace_id,
1223
- COUNT(*) as session_count,
1224
- MAX(created_at) as last_session
1225
- FROM sessions
1226
- WHERE workspace_id IS NOT NULL
1227
- GROUP BY workspace_id
1228
- ORDER BY session_count DESC
1225
+ SELECT w.workspace_id, w.uri, w.name, w.type, w.session_count,
1226
+ (SELECT MAX(s.created_at) FROM sessions s
1227
+ WHERE s.workspace_id = w.workspace_id) as last_session
1228
+ FROM workspaces w
1229
+ ORDER BY w.session_count DESC
1229
1230
  """)
1230
1231
  return {"workspaces": workspaces}
1231
1232
  except Exception as e:
@@ -1253,9 +1254,9 @@ def get_memory():
1253
1254
  """Get all memory files."""
1254
1255
  try:
1255
1256
  memory = query_rows("""
1256
- SELECT id, name, size, created_at, updated_at
1257
+ SELECT id, scope, workspace_id, session_id, filename, size_bytes, ingested_at
1257
1258
  FROM memory_files
1258
- ORDER BY updated_at DESC
1259
+ ORDER BY ingested_at DESC
1259
1260
  """)
1260
1261
  return {"memory": memory}
1261
1262
  except Exception as e:
@@ -1267,21 +1268,25 @@ def get_tool_calls(query_str=None, limit=50):
1267
1268
  try:
1268
1269
  if query_str:
1269
1270
  results = query_rows("""
1270
- SELECT tc.*, e.session_id, s.title as session_title
1271
+ SELECT tc.id, tc.session_id, tc.exchange_index, tc.request_id,
1272
+ tc.tool_call_id, tc.tool_name, tc.file_path,
1273
+ tc.arguments_json, tc.has_output, tc.ingested_at,
1274
+ s.title as session_title
1271
1275
  FROM tool_calls tc
1272
- JOIN exchanges e ON tc.exchange_id = e.id
1273
- JOIN sessions s ON e.session_id = s.id
1274
- WHERE tc.tool_name LIKE ? OR tc.arguments LIKE ?
1275
- ORDER BY tc.created_at DESC
1276
+ JOIN sessions s ON tc.session_id = s.session_id
1277
+ WHERE tc.tool_name LIKE ? OR tc.arguments_json LIKE ?
1278
+ ORDER BY tc.ingested_at DESC
1276
1279
  LIMIT ?
1277
1280
  """, (f"%{query_str}%", f"%{query_str}%", limit))
1278
1281
  else:
1279
1282
  results = query_rows("""
1280
- SELECT tc.*, e.session_id, s.title as session_title
1283
+ SELECT tc.id, tc.session_id, tc.exchange_index, tc.request_id,
1284
+ tc.tool_call_id, tc.tool_name, tc.file_path,
1285
+ tc.arguments_json, tc.has_output, tc.ingested_at,
1286
+ s.title as session_title
1281
1287
  FROM tool_calls tc
1282
- JOIN exchanges e ON tc.exchange_id = e.id
1283
- JOIN sessions s ON e.session_id = s.id
1284
- ORDER BY tc.created_at DESC
1288
+ JOIN sessions s ON tc.session_id = s.session_id
1289
+ ORDER BY tc.ingested_at DESC
1285
1290
  LIMIT ?
1286
1291
  """, (limit,))
1287
1292
  return {"tool_calls": results, "query": query_str, "count": len(results)}
@@ -1293,10 +1298,11 @@ def get_vfs(session_id):
1293
1298
  """List VFS files for a session."""
1294
1299
  try:
1295
1300
  vfs = query_rows("""
1296
- SELECT id, session_id, path, size, created_at
1301
+ SELECT id, session_id, source_type, source_path, filename,
1302
+ content_type, size_bytes, sha256, ingested_at
1297
1303
  FROM vfs
1298
1304
  WHERE session_id = ?
1299
- ORDER BY path
1305
+ ORDER BY filename
1300
1306
  """, (session_id,))
1301
1307
  return {"vfs": vfs, "session_id": session_id}
1302
1308
  except Exception as e:
@@ -1354,17 +1360,26 @@ def get_tokens(session_id=None):
1354
1360
  if session_id:
1355
1361
  tokens = query_rows("""
1356
1362
  SELECT
1357
- SUM(CAST(json_extract(metadata, '$.token_count') AS INTEGER)) as total_tokens,
1358
- COUNT(*) as exchange_count
1359
- FROM exchanges
1363
+ SUM(prompt_tokens) as total_prompt,
1364
+ SUM(completion_tokens) as total_completion,
1365
+ SUM(cached_tokens) as total_cached,
1366
+ SUM(prompt_tokens + completion_tokens) as total_tokens,
1367
+ COUNT(*) as turn_count,
1368
+ GROUP_CONCAT(DISTINCT model_id) as models
1369
+ FROM token_usage
1360
1370
  WHERE session_id = ?
1361
1371
  """, (session_id,))
1362
1372
  else:
1363
1373
  tokens = query_rows("""
1364
1374
  SELECT
1365
- SUM(CAST(json_extract(metadata, '$.token_count') AS INTEGER)) as total_tokens,
1366
- COUNT(*) as exchange_count
1367
- FROM exchanges
1375
+ SUM(prompt_tokens) as total_prompt,
1376
+ SUM(completion_tokens) as total_completion,
1377
+ SUM(cached_tokens) as total_cached,
1378
+ SUM(prompt_tokens + completion_tokens) as total_tokens,
1379
+ COUNT(*) as turn_count,
1380
+ COUNT(DISTINCT session_id) as session_count,
1381
+ GROUP_CONCAT(DISTINCT model_id) as models
1382
+ FROM token_usage
1368
1383
  """)
1369
1384
  return {"tokens": tokens}
1370
1385
  except Exception as e:
@@ -1726,11 +1741,10 @@ def render_tokens():
1726
1741
  return """
1727
1742
  <div class="page-header">
1728
1743
  <div class="page-title">Token Usage</div>
1729
- <div class="page-subtitle">Token consumption analysis by session.</div>
1730
- </div>
1731
- <div class="card">
1732
- <p>Token usage analysis coming soon.</p>
1744
+ <div class="page-subtitle">Token consumption across all sessions.</div>
1733
1745
  </div>
1746
+ <div id="tokens-summary" class="loading"><div class="spinner"></div>Loading...</div>
1747
+ <div id="tokens-table" style="margin-top:16px"></div>
1734
1748
  """
1735
1749
 
1736
1750
 
@@ -1910,6 +1924,9 @@ function initializePage(page) {
1910
1924
  case 'alerts':
1911
1925
  initAlerts();
1912
1926
  break;
1927
+ case 'tokens':
1928
+ initTokens();
1929
+ break;
1913
1930
  case 'pipeline':
1914
1931
  initPipeline();
1915
1932
  break;
@@ -2358,6 +2375,46 @@ function initKeywords() {
2358
2375
  });
2359
2376
  }
2360
2377
 
2378
+ function initTokens() {
2379
+ const summary = document.getElementById('tokens-summary');
2380
+ const table = document.getElementById('tokens-table');
2381
+ if (!summary) return;
2382
+ summary.innerHTML = '<div class="spinner"></div> Loading...';
2383
+ fetch('/api/tokens').then(r => r.json()).then(data => {
2384
+ const t = (data.tokens || [])[0] || {};
2385
+ const fmt = n => (n || 0).toLocaleString();
2386
+ summary.innerHTML = `
2387
+ <div class="grid-4">
2388
+ <div class="card"><div class="card-header">Total Tokens</div><div class="card-value">${fmt(t.total_tokens)}</div></div>
2389
+ <div class="card"><div class="card-header">Prompt</div><div class="card-value">${fmt(t.total_prompt)}</div></div>
2390
+ <div class="card"><div class="card-header">Completion</div><div class="card-value">${fmt(t.total_completion)}</div></div>
2391
+ <div class="card"><div class="card-header">Cached</div><div class="card-value">${fmt(t.total_cached)}</div></div>
2392
+ <div class="card"><div class="card-header">Sessions</div><div class="card-value">${fmt(t.session_count)}</div></div>
2393
+ <div class="card"><div class="card-header">Turns</div><div class="card-value">${fmt(t.turn_count)}</div></div>
2394
+ </div>
2395
+ <div class="card" style="margin-top:12px"><b>Models:</b> ${t.models || 'n/a'}</div>
2396
+ `;
2397
+ }).catch(() => {
2398
+ summary.innerHTML = '<div class="alert alert-danger">Failed to load token data.</div>';
2399
+ });
2400
+ if (table) {
2401
+ table.innerHTML = '<div class="spinner"></div> Loading sessions...';
2402
+ const sql = 'SELECT s.title, tu.session_id, SUM(tu.prompt_tokens) as prompt, SUM(tu.completion_tokens) as completion, SUM(tu.cached_tokens) as cached, SUM(tu.prompt_tokens + tu.completion_tokens) as total, COUNT(*) as turns, GROUP_CONCAT(DISTINCT tu.model_id) as models FROM token_usage tu JOIN sessions s ON tu.session_id = s.session_id GROUP BY tu.session_id ORDER BY total DESC LIMIT 50';
2403
+ fetch('/api/query', {method: 'POST', headers: {'Content-Type': 'application/json'}, body: JSON.stringify({sql: sql})})
2404
+ .then(r => r.json()).then(data => {
2405
+ const rows = data.rows || [];
2406
+ if (!rows.length) { table.innerHTML = '<p>No per-session data.</p>'; return; }
2407
+ const fmt = n => (n || 0).toLocaleString();
2408
+ let html = '<div class="card"><div class="card-header">Top Sessions by Token Usage</div><table class="table"><thead><tr><th>Session</th><th>Total</th><th>Prompt</th><th>Completion</th><th>Cached</th><th>Turns</th><th>Models</th></tr></thead><tbody>';
2409
+ rows.forEach(r => {
2410
+ html += '<tr><td class="truncate">' + (r.title || r.session_id) + '</td><td>' + fmt(r.total) + '</td><td>' + fmt(r.prompt) + '</td><td>' + fmt(r.completion) + '</td><td>' + fmt(r.cached) + '</td><td>' + r.turns + '</td><td class="truncate">' + (r.models || '') + '</td></tr>';
2411
+ });
2412
+ html += '</tbody></table></div>';
2413
+ table.innerHTML = html;
2414
+ }).catch(() => { table.innerHTML = '<p>Failed to load session breakdown.</p>'; });
2415
+ }
2416
+ }
2417
+
2361
2418
  function initWorkspaces() {
2362
2419
  const container = document.getElementById('workspaces-content');
2363
2420
  if (!container) return;
@@ -2737,6 +2794,13 @@ def application(environ, start_response):
2737
2794
  start_response('200 OK', [('Content-Type', 'application/json')])
2738
2795
  return [response]
2739
2796
 
2797
+ elif path == '/api/tokens':
2798
+ session_id = query.get('session_id', [None])[0]
2799
+ data = get_tokens(session_id)
2800
+ response = json.dumps(data).encode('utf-8')
2801
+ start_response('200 OK', [('Content-Type', 'application/json')])
2802
+ return [response]
2803
+
2740
2804
  elif path == '/api/alerts':
2741
2805
  data = get_alerts()
2742
2806
  response = json.dumps(data).encode('utf-8')
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "code-data-ark"
7
- version = "2.0.6"
7
+ version = "2.0.7"
8
8
  description = "Code Data Ark — local observability and intelligence platform for VS Code + Copilot Chat sessions"
9
9
  readme = "readme.md"
10
10
  license = "MIT"
@@ -0,0 +1 @@
1
+ 2.0.7
@@ -1 +0,0 @@
1
- 2.0.6
File without changes
File without changes
File without changes
File without changes
File without changes