@c3-oss/prosa 0.3.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -402,10 +402,291 @@ INSERT OR IGNORE INTO search_index_status (
402
402
  ('tantivy', 'missing', 0, 0, strftime('%Y-%m-%dT%H:%M:%fZ','now'), NULL);
403
403
  `;
404
404
 
405
+ // src/core/schema/sql/003_analytics_views.ts
406
+ var SQL_003_ANALYTICS_VIEWS = String.raw`
407
+ CREATE VIEW IF NOT EXISTS session_facts AS
408
+ WITH turn_counts AS (
409
+ SELECT session_id, count(*) AS turn_count
410
+ FROM turns
411
+ GROUP BY session_id
412
+ ),
413
+ message_counts AS (
414
+ SELECT session_id,
415
+ count(*) AS message_count,
416
+ sum(CASE WHEN role = 'user' THEN 1 ELSE 0 END) AS user_message_count,
417
+ sum(CASE WHEN role = 'assistant' THEN 1 ELSE 0 END) AS assistant_message_count
418
+ FROM messages
419
+ GROUP BY session_id
420
+ ),
421
+ tool_call_counts AS (
422
+ SELECT session_id,
423
+ count(*) AS tool_call_count,
424
+ sum(CASE WHEN status = 'error' THEN 1 ELSE 0 END) AS tool_call_error_count
425
+ FROM tool_calls
426
+ GROUP BY session_id
427
+ ),
428
+ tool_result_counts AS (
429
+ SELECT session_id,
430
+ count(*) AS tool_result_count,
431
+ sum(CASE WHEN is_error = 1 OR (exit_code IS NOT NULL AND exit_code <> 0)
432
+ THEN 1 ELSE 0 END) AS tool_result_error_count,
433
+ sum(COALESCE(duration_ms, 0)) AS tool_duration_ms
434
+ FROM tool_results
435
+ GROUP BY session_id
436
+ ),
437
+ search_doc_counts AS (
438
+ SELECT session_id, count(*) AS search_doc_count
439
+ FROM search_docs
440
+ WHERE session_id IS NOT NULL
441
+ GROUP BY session_id
442
+ )
443
+ SELECT s.session_id,
444
+ s.source_tool,
445
+ s.source_session_id,
446
+ s.project_id,
447
+ p.display_name AS project_name,
448
+ p.canonical_path AS project_path,
449
+ s.parent_session_id,
450
+ s.is_subagent,
451
+ s.agent_role,
452
+ s.agent_nickname,
453
+ s.title,
454
+ s.start_ts,
455
+ s.end_ts,
456
+ CASE
457
+ WHEN s.start_ts IS NOT NULL AND s.end_ts IS NOT NULL
458
+ THEN ROUND((julianday(s.end_ts) - julianday(s.start_ts)) * 86400, 3)
459
+ ELSE NULL
460
+ END AS duration_seconds,
461
+ s.cwd_initial,
462
+ s.git_branch_initial,
463
+ s.model_first,
464
+ s.model_last,
465
+ s.status,
466
+ s.timeline_confidence,
467
+ sf.path AS source_file_path,
468
+ COALESCE(tc.turn_count, 0) AS turn_count,
469
+ COALESCE(mc.message_count, 0) AS message_count,
470
+ COALESCE(mc.user_message_count, 0) AS user_message_count,
471
+ COALESCE(mc.assistant_message_count, 0) AS assistant_message_count,
472
+ COALESCE(tcc.tool_call_count, 0) AS tool_call_count,
473
+ COALESCE(trc.tool_result_count, 0) AS tool_result_count,
474
+ COALESCE(tcc.tool_call_error_count, 0)
475
+ + COALESCE(trc.tool_result_error_count, 0) AS tool_error_count,
476
+ COALESCE(trc.tool_duration_ms, 0) AS tool_duration_ms,
477
+ COALESCE(sdc.search_doc_count, 0) AS search_doc_count
478
+ FROM sessions s
479
+ LEFT JOIN projects p ON p.project_id = s.project_id
480
+ LEFT JOIN raw_records rr ON rr.raw_record_id = s.raw_record_id
481
+ LEFT JOIN source_files sf ON sf.source_file_id = rr.source_file_id
482
+ LEFT JOIN turn_counts tc ON tc.session_id = s.session_id
483
+ LEFT JOIN message_counts mc ON mc.session_id = s.session_id
484
+ LEFT JOIN tool_call_counts tcc ON tcc.session_id = s.session_id
485
+ LEFT JOIN tool_result_counts trc ON trc.session_id = s.session_id
486
+ LEFT JOIN search_doc_counts sdc ON sdc.session_id = s.session_id;
487
+
488
+ CREATE VIEW IF NOT EXISTS tool_usage_facts AS
489
+ WITH result_rollup AS (
490
+ SELECT tool_call_id,
491
+ session_id,
492
+ count(*) AS tool_result_count,
493
+ max(status) AS result_status,
494
+ max(is_error) AS is_error,
495
+ min(exit_code) AS exit_code,
496
+ sum(COALESCE(duration_ms, 0)) AS duration_ms,
497
+ max(preview) AS preview
498
+ FROM tool_results
499
+ GROUP BY tool_call_id, session_id
500
+ )
501
+ SELECT tc.tool_call_id,
502
+ tc.session_id,
503
+ s.source_tool,
504
+ s.source_session_id,
505
+ s.project_id,
506
+ p.display_name AS project_name,
507
+ p.canonical_path AS project_path,
508
+ tc.turn_id,
509
+ tc.message_id,
510
+ tc.event_id,
511
+ tc.source_call_id,
512
+ tc.tool_name,
513
+ tc.canonical_tool_type,
514
+ tc.command,
515
+ tc.cwd,
516
+ tc.path,
517
+ tc.query,
518
+ tc.timestamp_start,
519
+ tc.timestamp_end,
520
+ CASE
521
+ WHEN tc.timestamp_start IS NOT NULL AND tc.timestamp_end IS NOT NULL
522
+ THEN ROUND((julianday(tc.timestamp_end) - julianday(tc.timestamp_start)) * 86400, 3)
523
+ ELSE NULL
524
+ END AS call_duration_seconds,
525
+ tc.status AS call_status,
526
+ rr.result_status,
527
+ COALESCE(rr.is_error, 0) AS is_error,
528
+ rr.exit_code,
529
+ rr.duration_ms AS result_duration_ms,
530
+ COALESCE(rr.tool_result_count, 0) AS tool_result_count,
531
+ rr.preview,
532
+ tc.raw_record_id
533
+ FROM tool_calls tc
534
+ LEFT JOIN sessions s ON s.session_id = tc.session_id
535
+ LEFT JOIN projects p ON p.project_id = s.project_id
536
+ LEFT JOIN result_rollup rr ON rr.tool_call_id = tc.tool_call_id;
537
+
538
+ CREATE VIEW IF NOT EXISTS error_facts AS
539
+ SELECT 'tool_result:' || tr.tool_result_id AS error_id,
540
+ 'tool_result' AS error_category,
541
+ s.source_tool,
542
+ s.project_id,
543
+ p.display_name AS project_name,
544
+ tr.session_id,
545
+ COALESCE(tc.timestamp_end, tc.timestamp_start) AS timestamp,
546
+ tc.tool_name,
547
+ tc.canonical_tool_type,
548
+ COALESCE(tr.status, tc.status) AS status,
549
+ tr.exit_code,
550
+ NULL AS message,
551
+ tr.preview,
552
+ NULL AS entity_type,
553
+ NULL AS entity_id,
554
+ tr.raw_record_id
555
+ FROM tool_results tr
556
+ LEFT JOIN tool_calls tc ON tc.tool_call_id = tr.tool_call_id
557
+ LEFT JOIN sessions s ON s.session_id = tr.session_id
558
+ LEFT JOIN projects p ON p.project_id = s.project_id
559
+ WHERE tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
560
+ UNION ALL
561
+ SELECT 'import_error:' || CAST(ie.error_id AS TEXT) AS error_id,
562
+ 'import_error' AS error_category,
563
+ COALESCE(rr.source_tool, ib.source_tool) AS source_tool,
564
+ NULL AS project_id,
565
+ NULL AS project_name,
566
+ NULL AS session_id,
567
+ ie.occurred_at AS timestamp,
568
+ NULL AS tool_name,
569
+ NULL AS canonical_tool_type,
570
+ ie.kind AS status,
571
+ NULL AS exit_code,
572
+ ie.message,
573
+ NULL AS preview,
574
+ NULL AS entity_type,
575
+ NULL AS entity_id,
576
+ ie.raw_record_id
577
+ FROM import_errors ie
578
+ LEFT JOIN import_batches ib ON ib.batch_id = ie.batch_id
579
+ LEFT JOIN raw_records rr ON rr.raw_record_id = ie.raw_record_id
580
+ UNION ALL
581
+ SELECT 'uncertainty:' || CAST(u.uncertainty_id AS TEXT) AS error_id,
582
+ 'uncertainty' AS error_category,
583
+ NULL AS source_tool,
584
+ NULL AS project_id,
585
+ NULL AS project_name,
586
+ CASE WHEN u.entity_type = 'session' THEN u.entity_id ELSE NULL END AS session_id,
587
+ NULL AS timestamp,
588
+ NULL AS tool_name,
589
+ NULL AS canonical_tool_type,
590
+ u.reason AS status,
591
+ NULL AS exit_code,
592
+ u.reason AS message,
593
+ NULL AS preview,
594
+ u.entity_type,
595
+ u.entity_id,
596
+ NULL AS raw_record_id
597
+ FROM uncertainties u;
598
+
599
+ CREATE VIEW IF NOT EXISTS model_usage AS
600
+ WITH model_events AS (
601
+ SELECT s.source_tool,
602
+ s.project_id,
603
+ p.display_name AS project_name,
604
+ p.canonical_path AS project_path,
605
+ s.session_id,
606
+ NULL AS turn_id,
607
+ s.model_first AS model,
608
+ s.start_ts AS timestamp,
609
+ 'session_first' AS observation_type
610
+ FROM sessions s
611
+ LEFT JOIN projects p ON p.project_id = s.project_id
612
+ WHERE s.model_first IS NOT NULL
613
+ UNION ALL
614
+ SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
615
+ s.session_id, NULL AS turn_id, s.model_last AS model, s.end_ts AS timestamp,
616
+ 'session_last' AS observation_type
617
+ FROM sessions s
618
+ LEFT JOIN projects p ON p.project_id = s.project_id
619
+ WHERE s.model_last IS NOT NULL
620
+ UNION ALL
621
+ SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
622
+ t.session_id, t.turn_id, t.model, t.start_ts AS timestamp, 'turn' AS observation_type
623
+ FROM turns t
624
+ LEFT JOIN sessions s ON s.session_id = t.session_id
625
+ LEFT JOIN projects p ON p.project_id = s.project_id
626
+ WHERE t.model IS NOT NULL
627
+ UNION ALL
628
+ SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
629
+ m.session_id, m.turn_id, m.model, m.timestamp, 'message' AS observation_type
630
+ FROM messages m
631
+ LEFT JOIN sessions s ON s.session_id = m.session_id
632
+ LEFT JOIN projects p ON p.project_id = s.project_id
633
+ WHERE m.model IS NOT NULL
634
+ )
635
+ SELECT source_tool,
636
+ project_id,
637
+ project_name,
638
+ project_path,
639
+ model,
640
+ count(DISTINCT session_id) AS session_count,
641
+ count(DISTINCT turn_id) AS turn_count,
642
+ count(*) AS observation_count,
643
+ sum(CASE WHEN observation_type = 'message' THEN 1 ELSE 0 END) AS message_count,
644
+ min(timestamp) AS first_seen_ts,
645
+ max(timestamp) AS last_seen_ts
646
+ FROM model_events
647
+ GROUP BY source_tool, project_id, project_name, project_path, model;
648
+
649
+ CREATE VIEW IF NOT EXISTS project_activity AS
650
+ SELECT s.source_tool,
651
+ s.project_id,
652
+ COALESCE(p.display_name, s.cwd_initial, '(unknown)') AS project_name,
653
+ p.canonical_path AS project_path,
654
+ min(s.start_ts) AS first_session_ts,
655
+ max(COALESCE(s.end_ts, s.start_ts)) AS latest_session_ts,
656
+ count(DISTINCT s.session_id) AS session_count,
657
+ count(DISTINCT CASE WHEN s.timeline_confidence = 'low' THEN s.session_id END)
658
+ AS low_confidence_session_count,
659
+ count(DISTINCT t.turn_id) AS turn_count,
660
+ count(DISTINCT m.message_id) AS message_count,
661
+ count(DISTINCT tc.tool_call_id) AS tool_call_count,
662
+ count(DISTINCT tr.tool_result_id) AS tool_result_count,
663
+ count(DISTINCT CASE
664
+ WHEN tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
665
+ THEN tr.tool_result_id
666
+ END) AS tool_error_count,
667
+ count(DISTINCT sd.doc_id) AS search_doc_count
668
+ FROM sessions s
669
+ LEFT JOIN projects p ON p.project_id = s.project_id
670
+ LEFT JOIN turns t ON t.session_id = s.session_id
671
+ LEFT JOIN messages m ON m.session_id = s.session_id
672
+ LEFT JOIN tool_calls tc ON tc.session_id = s.session_id
673
+ LEFT JOIN tool_results tr ON tr.session_id = s.session_id
674
+ LEFT JOIN search_docs sd ON sd.session_id = s.session_id
675
+ GROUP BY s.source_tool, s.project_id, p.display_name, s.cwd_initial, p.canonical_path;
676
+ `;
677
+
678
+ // src/core/schema/sql/004_tantivy_checkpoint.ts
679
+ var SQL_004_TANTIVY_CHECKPOINT = String.raw`
680
+ ALTER TABLE search_index_status ADD COLUMN last_indexed_rowid INTEGER;
681
+ ALTER TABLE search_index_status ADD COLUMN schema_fingerprint TEXT;
682
+ `;
683
+
405
684
  // src/core/schema/migrate.ts
406
685
  var MIGRATIONS = [
407
686
  { version: 1, name: "init", sql: SQL_001_INIT },
408
- { version: 2, name: "search_index_status", sql: SQL_002_SEARCH_INDEX_STATUS }
687
+ { version: 2, name: "search_index_status", sql: SQL_002_SEARCH_INDEX_STATUS },
688
+ { version: 3, name: "analytics_views", sql: SQL_003_ANALYTICS_VIEWS },
689
+ { version: 4, name: "tantivy_checkpoint", sql: SQL_004_TANTIVY_CHECKPOINT }
409
690
  ];
410
691
  function runMigrations(db) {
411
692
  db.exec(`
@@ -447,7 +728,7 @@ function currentSchemaVersion(db) {
447
728
 
448
729
  // src/core/version.ts
449
730
  var PROSA_PARSER_VERSION = "0.1.0";
450
- var PROSA_SCHEMA_VERSION = 2;
731
+ var PROSA_SCHEMA_VERSION = 4;
451
732
 
452
733
  // src/core/bundle.ts
453
734
  function defaultBundlePath() {
@@ -534,6 +815,18 @@ async function openBundle(rootPath) {
534
815
  }
535
816
  return { path: resolved, db, manifest, paths };
536
817
  }
818
+ async function openOrInitBundle(rootPath) {
819
+ const resolved = path.resolve(rootPath);
820
+ const paths = bundlePaths(resolved);
821
+ const dirStat = await stat(resolved).catch(() => null);
822
+ if (dirStat && !dirStat.isDirectory()) {
823
+ throw new Error(`bundle path not found or not a directory: ${resolved}`);
824
+ }
825
+ if (!dirStat || !await exists(paths.manifest)) {
826
+ return await initBundle(resolved);
827
+ }
828
+ return await openBundle(resolved);
829
+ }
537
830
  function closeBundle(bundle) {
538
831
  closeDb(bundle.db);
539
832
  }
@@ -752,8 +1045,8 @@ var FS_WRITE_CONCURRENCY = 16;
752
1045
  async function writeFilesParallel(tasks) {
753
1046
  let cursor = 0;
754
1047
  const workers = [];
755
- const limit = Math.min(FS_WRITE_CONCURRENCY, tasks.length);
756
- for (let w = 0; w < limit; w++) {
1048
+ const limit2 = Math.min(FS_WRITE_CONCURRENCY, tasks.length);
1049
+ for (let w = 0; w < limit2; w++) {
757
1050
  workers.push(
758
1051
  (async () => {
759
1052
  while (true) {
@@ -1034,7 +1327,7 @@ function sessionFilterWhere(filters) {
1034
1327
  }
1035
1328
  function listSessions(bundle, filters = {}) {
1036
1329
  const { where, params } = sessionFilterWhere(filters);
1037
- const limit = clampLimit(filters.limit, { max: 1e3, fallback: 50 });
1330
+ const limit2 = clampLimit(filters.limit, { max: 1e3, fallback: 50 });
1038
1331
  const sql = `
1039
1332
  SELECT s.session_id,
1040
1333
  s.source_tool,
@@ -1055,7 +1348,7 @@ function listSessions(bundle, filters = {}) {
1055
1348
  FROM sessions s
1056
1349
  ${where}
1057
1350
  ORDER BY s.start_ts DESC NULLS LAST
1058
- LIMIT ${limit}
1351
+ LIMIT ${limit2}
1059
1352
  `;
1060
1353
  return bundle.db.prepare(sql).all(...params);
1061
1354
  }
@@ -1107,15 +1400,21 @@ function getSession(bundle, sessionId2) {
1107
1400
  }
1108
1401
 
1109
1402
  // src/services/search.ts
1110
- import { existsSync } from "fs";
1403
+ import { existsSync as existsSync2 } from "fs";
1111
1404
  import { createRequire } from "module";
1112
1405
 
1113
1406
  // src/core/errors.ts
1114
1407
  var getErrorMessage = (err) => err instanceof Error ? err.message : String(err);
1115
1408
 
1116
1409
  // src/services/indexing.ts
1410
+ import { createHash as createHash2 } from "crypto";
1411
+ import { existsSync } from "fs";
1117
1412
  import { mkdir as mkdir3, rm, writeFile as writeFile4 } from "fs/promises";
1118
1413
  import path4 from "path";
1414
+ var SEARCH_INDEX_STATUS_COLUMNS = `
1415
+ engine, status, source_doc_count, indexed_doc_count, updated_at,
1416
+ error_message, last_indexed_rowid, schema_fingerprint
1417
+ `;
1119
1418
  var FTS5_TRIGGER_SQL = `
1120
1419
  CREATE TRIGGER IF NOT EXISTS search_docs_ai AFTER INSERT ON search_docs BEGIN
1121
1420
  INSERT INTO search_docs_fts(rowid, text, role, tool_name, field_kind)
@@ -1147,7 +1446,7 @@ function disableFts5Triggers(bundle) {
1147
1446
  function getSearchIndexStatuses(bundle) {
1148
1447
  ensureSearchIndexStatusRows(bundle);
1149
1448
  return bundle.db.prepare(
1150
- `SELECT engine, status, source_doc_count, indexed_doc_count, updated_at, error_message
1449
+ `SELECT ${SEARCH_INDEX_STATUS_COLUMNS}
1151
1450
  FROM search_index_status
1152
1451
  ORDER BY engine`
1153
1452
  ).all();
@@ -1155,28 +1454,13 @@ function getSearchIndexStatuses(bundle) {
1155
1454
  function getSearchIndexStatus(bundle, engine) {
1156
1455
  ensureSearchIndexStatusRows(bundle);
1157
1456
  return bundle.db.prepare(
1158
- `SELECT engine, status, source_doc_count, indexed_doc_count, updated_at, error_message
1457
+ `SELECT ${SEARCH_INDEX_STATUS_COLUMNS}
1159
1458
  FROM search_index_status
1160
1459
  WHERE engine = ?`
1161
1460
  ).get(engine) ?? null;
1162
1461
  }
1163
1462
  function markIndexesAfterImport(bundle, options) {
1164
1463
  if (!options.changed) return;
1165
- if (options.fts5Deferred) {
1166
- updateSearchIndexStatus(bundle, "fts5", {
1167
- status: "stale",
1168
- sourceDocCount: countSearchDocs(bundle),
1169
- indexedDocCount: countFts5Docs(bundle),
1170
- errorMessage: null
1171
- });
1172
- } else {
1173
- updateSearchIndexStatus(bundle, "fts5", {
1174
- status: "ready",
1175
- sourceDocCount: countSearchDocs(bundle),
1176
- indexedDocCount: countFts5Docs(bundle),
1177
- errorMessage: null
1178
- });
1179
- }
1180
1464
  const tantivy = getSearchIndexStatus(bundle, "tantivy");
1181
1465
  if (tantivy?.status === "ready" || tantivy?.status === "stale" || tantivy?.status === "failed") {
1182
1466
  updateSearchIndexStatus(bundle, "tantivy", {
@@ -1217,46 +1501,100 @@ function rebuildFts5Index(bundle) {
1217
1501
  }
1218
1502
  return getSearchIndexStatus(bundle, "fts5");
1219
1503
  }
1220
- async function rebuildTantivyIndex(bundle) {
1504
+ var TANTIVY_SCHEMA_FIELDS = [
1505
+ { name: "doc_id", tokenizer: "raw" },
1506
+ { name: "entity_type", tokenizer: "raw" },
1507
+ { name: "entity_id", tokenizer: "raw" },
1508
+ { name: "session_id", tokenizer: "raw" },
1509
+ { name: "project_id", tokenizer: "raw" },
1510
+ { name: "timestamp", tokenizer: "raw" },
1511
+ { name: "role", tokenizer: "raw" },
1512
+ { name: "tool_name", tokenizer: "raw" },
1513
+ { name: "canonical_tool_type", tokenizer: "raw" },
1514
+ { name: "field_kind", tokenizer: "raw" },
1515
+ // The text field uses tantivy's default tokenizer (en_stem in the binding).
1516
+ { name: "text", tokenizer: "default" }
1517
+ ];
1518
+ function buildTantivySchema(tantivy) {
1519
+ const builder = new tantivy.SchemaBuilder();
1520
+ for (const field of TANTIVY_SCHEMA_FIELDS) {
1521
+ if (field.tokenizer === "default") {
1522
+ builder.addTextField(field.name, { stored: true });
1523
+ } else {
1524
+ builder.addTextField(field.name, { stored: true, tokenizerName: field.tokenizer });
1525
+ }
1526
+ }
1527
+ return builder.build();
1528
+ }
1529
+ function computeSchemaFingerprint() {
1530
+ const canonical = TANTIVY_SCHEMA_FIELDS.map((f) => `${f.name}:${f.tokenizer}:stored`).join("|");
1531
+ return createHash2("sha256").update(canonical).digest("hex");
1532
+ }
1533
+ function tantivyIndexLooksValid(dir) {
1534
+ return existsSync(path4.join(dir, "meta.json"));
1535
+ }
1536
+ function makeTantivyDoc(tantivy, row) {
1537
+ const doc = new tantivy.Document();
1538
+ doc.addText("doc_id", row.doc_id);
1539
+ doc.addText("entity_type", row.entity_type);
1540
+ doc.addText("entity_id", row.entity_id);
1541
+ doc.addText("session_id", row.session_id ?? "");
1542
+ doc.addText("project_id", row.project_id ?? "");
1543
+ doc.addText("timestamp", row.timestamp ?? "");
1544
+ doc.addText("role", row.role ?? "");
1545
+ doc.addText("tool_name", row.tool_name ?? "");
1546
+ doc.addText("canonical_tool_type", row.canonical_tool_type ?? "");
1547
+ doc.addText("field_kind", row.field_kind);
1548
+ doc.addText("text", row.text);
1549
+ return doc;
1550
+ }
1551
+ var SEARCH_DOCS_SELECT = `
1552
+ SELECT rowid, doc_id, entity_type, entity_id, session_id, project_id, timestamp,
1553
+ role, tool_name, canonical_tool_type, field_kind, text
1554
+ FROM search_docs
1555
+ `;
1556
+ async function rebuildTantivyIndex(bundle, options = {}) {
1221
1557
  ensureSearchIndexStatusRows(bundle);
1558
+ const sourceDocCount = countSearchDocs(bundle);
1559
+ const prev = getSearchIndexStatus(bundle, "tantivy");
1560
+ const fingerprint = computeSchemaFingerprint();
1561
+ const indexDirValid = tantivyIndexLooksValid(bundle.paths.tantivy);
1562
+ const fingerprintMatches = prev?.schema_fingerprint === fingerprint;
1563
+ const lastIndexedRowid = typeof prev?.last_indexed_rowid === "number" ? prev.last_indexed_rowid : 0;
1564
+ const wantFullRebuild = options.overwrite === true || !indexDirValid || !fingerprintMatches || lastIndexedRowid <= 0;
1222
1565
  updateSearchIndexStatus(bundle, "tantivy", {
1223
1566
  status: "building",
1224
- sourceDocCount: countSearchDocs(bundle),
1567
+ sourceDocCount,
1225
1568
  indexedDocCount: 0,
1226
1569
  errorMessage: null
1227
1570
  });
1228
1571
  try {
1229
1572
  const tantivy = await import("@oxdev03/node-tantivy-binding");
1230
- const schema = new tantivy.SchemaBuilder().addTextField("doc_id", { stored: true, tokenizerName: "raw" }).addTextField("entity_type", { stored: true, tokenizerName: "raw" }).addTextField("entity_id", { stored: true, tokenizerName: "raw" }).addTextField("session_id", { stored: true, tokenizerName: "raw" }).addTextField("project_id", { stored: true, tokenizerName: "raw" }).addTextField("timestamp", { stored: true, tokenizerName: "raw" }).addTextField("role", { stored: true, tokenizerName: "raw" }).addTextField("tool_name", { stored: true, tokenizerName: "raw" }).addTextField("canonical_tool_type", { stored: true, tokenizerName: "raw" }).addTextField("field_kind", { stored: true, tokenizerName: "raw" }).addTextField("text", { stored: true }).build();
1231
- await rm(bundle.paths.tantivy, { recursive: true, force: true });
1232
- await mkdir3(bundle.paths.tantivy, { recursive: true });
1233
- const index = new tantivy.Index(schema, bundle.paths.tantivy, false);
1234
- const writer = index.writer(5e7, 1);
1235
- let indexedDocCount = 0;
1236
- const rows = bundle.db.prepare(
1237
- `SELECT rowid, doc_id, entity_type, entity_id, session_id, project_id, timestamp,
1238
- role, tool_name, canonical_tool_type, field_kind, text
1239
- FROM search_docs
1240
- ORDER BY rowid`
1241
- ).iterate();
1242
- for (const row of rows) {
1243
- const doc = new tantivy.Document();
1244
- doc.addText("doc_id", row.doc_id);
1245
- doc.addText("entity_type", row.entity_type);
1246
- doc.addText("entity_id", row.entity_id);
1247
- doc.addText("session_id", row.session_id ?? "");
1248
- doc.addText("project_id", row.project_id ?? "");
1249
- doc.addText("timestamp", row.timestamp ?? "");
1250
- doc.addText("role", row.role ?? "");
1251
- doc.addText("tool_name", row.tool_name ?? "");
1252
- doc.addText("canonical_tool_type", row.canonical_tool_type ?? "");
1253
- doc.addText("field_kind", row.field_kind);
1254
- doc.addText("text", row.text);
1255
- writer.addDocument(doc);
1256
- indexedDocCount++;
1573
+ const schema = buildTantivySchema(tantivy);
1574
+ let index;
1575
+ if (wantFullRebuild) {
1576
+ await rm(bundle.paths.tantivy, { recursive: true, force: true });
1577
+ await mkdir3(bundle.paths.tantivy, { recursive: true });
1578
+ index = new tantivy.Index(schema, bundle.paths.tantivy, false);
1579
+ } else {
1580
+ index = tantivy.Index.open(bundle.paths.tantivy);
1581
+ }
1582
+ const writer = index.writer(3e8, 4);
1583
+ const select = wantFullRebuild ? `${SEARCH_DOCS_SELECT} ORDER BY rowid` : `${SEARCH_DOCS_SELECT} WHERE rowid > ${lastIndexedRowid} ORDER BY rowid`;
1584
+ let addedDocCount = 0;
1585
+ let maxRowid = wantFullRebuild ? 0 : lastIndexedRowid;
1586
+ for (const row of bundle.db.prepare(select).iterate()) {
1587
+ if (!wantFullRebuild) {
1588
+ writer.deleteDocumentsByTerm("doc_id", row.doc_id);
1589
+ }
1590
+ writer.addDocument(makeTantivyDoc(tantivy, row));
1591
+ addedDocCount++;
1592
+ if (row.rowid > maxRowid) maxRowid = row.rowid;
1257
1593
  }
1258
1594
  writer.commit();
1259
1595
  index.reload();
1596
+ writer.waitMergingThreads();
1597
+ const indexedDocCount = wantFullRebuild ? addedDocCount : countTantivyDocsBest(prev, addedDocCount);
1260
1598
  await writeFile4(
1261
1599
  path4.join(bundle.paths.tantivy, "prosa-index.json"),
1262
1600
  `${JSON.stringify(
@@ -1264,8 +1602,11 @@ async function rebuildTantivyIndex(bundle) {
1264
1602
  engine: "tantivy",
1265
1603
  source: "search_docs",
1266
1604
  built_at: (/* @__PURE__ */ new Date()).toISOString(),
1267
- source_doc_count: countSearchDocs(bundle),
1268
- indexed_doc_count: indexedDocCount
1605
+ mode: wantFullRebuild ? "full" : "incremental",
1606
+ source_doc_count: sourceDocCount,
1607
+ indexed_doc_count: indexedDocCount,
1608
+ last_indexed_rowid: maxRowid,
1609
+ schema_fingerprint: fingerprint
1269
1610
  },
1270
1611
  null,
1271
1612
  2
@@ -1275,14 +1616,16 @@ async function rebuildTantivyIndex(bundle) {
1275
1616
  );
1276
1617
  updateSearchIndexStatus(bundle, "tantivy", {
1277
1618
  status: "ready",
1278
- sourceDocCount: countSearchDocs(bundle),
1619
+ sourceDocCount,
1279
1620
  indexedDocCount,
1280
- errorMessage: null
1621
+ errorMessage: null,
1622
+ lastIndexedRowid: maxRowid,
1623
+ schemaFingerprint: fingerprint
1281
1624
  });
1282
1625
  } catch (error) {
1283
1626
  updateSearchIndexStatus(bundle, "tantivy", {
1284
1627
  status: "failed",
1285
- sourceDocCount: countSearchDocs(bundle),
1628
+ sourceDocCount,
1286
1629
  indexedDocCount: 0,
1287
1630
  errorMessage: getErrorMessage(error)
1288
1631
  });
@@ -1290,36 +1633,53 @@ async function rebuildTantivyIndex(bundle) {
1290
1633
  }
1291
1634
  return getSearchIndexStatus(bundle, "tantivy");
1292
1635
  }
1636
+ function countTantivyDocsBest(prev, added) {
1637
+ if (prev && typeof prev.indexed_doc_count === "number") {
1638
+ return prev.indexed_doc_count + added;
1639
+ }
1640
+ return added;
1641
+ }
1293
1642
  function ensureSearchIndexStatusRows(bundle) {
1294
1643
  const now = (/* @__PURE__ */ new Date()).toISOString();
1295
1644
  const stmt = prepare(
1296
1645
  bundle.db,
1297
1646
  `INSERT OR IGNORE INTO search_index_status (
1298
- engine, status, source_doc_count, indexed_doc_count, updated_at, error_message
1299
- ) VALUES (?, ?, 0, 0, ?, NULL)`
1647
+ engine, status, source_doc_count, indexed_doc_count, updated_at,
1648
+ error_message, last_indexed_rowid, schema_fingerprint
1649
+ ) VALUES (?, ?, 0, 0, ?, NULL, NULL, NULL)`
1300
1650
  );
1301
1651
  stmt.run("fts5", "ready", now);
1302
1652
  stmt.run("tantivy", "missing", now);
1303
1653
  }
1304
1654
  function updateSearchIndexStatus(bundle, engine, values) {
1305
1655
  ensureSearchIndexStatusRows(bundle);
1306
- prepare(
1307
- bundle.db,
1308
- `UPDATE search_index_status
1309
- SET status = ?,
1310
- source_doc_count = ?,
1311
- indexed_doc_count = ?,
1312
- updated_at = ?,
1313
- error_message = ?
1314
- WHERE engine = ?`
1315
- ).run(
1656
+ const setClauses = [
1657
+ "status = ?",
1658
+ "source_doc_count = ?",
1659
+ "indexed_doc_count = ?",
1660
+ "updated_at = ?",
1661
+ "error_message = ?"
1662
+ ];
1663
+ const params = [
1316
1664
  values.status,
1317
1665
  values.sourceDocCount,
1318
1666
  values.indexedDocCount,
1319
1667
  (/* @__PURE__ */ new Date()).toISOString(),
1320
- values.errorMessage,
1321
- engine
1322
- );
1668
+ values.errorMessage
1669
+ ];
1670
+ if (values.lastIndexedRowid !== void 0) {
1671
+ setClauses.push("last_indexed_rowid = ?");
1672
+ params.push(values.lastIndexedRowid);
1673
+ }
1674
+ if (values.schemaFingerprint !== void 0) {
1675
+ setClauses.push("schema_fingerprint = ?");
1676
+ params.push(values.schemaFingerprint);
1677
+ }
1678
+ params.push(engine);
1679
+ prepare(
1680
+ bundle.db,
1681
+ `UPDATE search_index_status SET ${setClauses.join(", ")} WHERE engine = ?`
1682
+ ).run(...params);
1323
1683
  }
1324
1684
  function countSearchDocs(bundle) {
1325
1685
  return bundle.db.prepare(`SELECT count(*) AS n FROM search_docs`).get()?.n ?? 0;
@@ -1337,7 +1697,7 @@ function searchFullText(bundle, options) {
1337
1697
  if (options.engine === "tantivy") {
1338
1698
  return searchTantivy(bundle, options);
1339
1699
  }
1340
- const limit = clampLimit(options.limit, { max: 500, fallback: 50 });
1700
+ const limit2 = clampLimit(options.limit, { max: 500, fallback: 50 });
1341
1701
  const sql = `
1342
1702
  SELECT d.doc_id,
1343
1703
  d.entity_type,
@@ -1352,14 +1712,14 @@ function searchFullText(bundle, options) {
1352
1712
  JOIN search_docs d ON d.rowid = search_docs_fts.rowid
1353
1713
  WHERE search_docs_fts MATCH ?
1354
1714
  ORDER BY bm25(search_docs_fts), d.timestamp DESC
1355
- LIMIT ${limit}
1715
+ LIMIT ${limit2}
1356
1716
  `;
1357
1717
  const ftsQuery = options.raw ? options.query : escapeFtsQuery(options.query);
1358
1718
  if (!ftsQuery) return [];
1359
1719
  return bundle.db.prepare(sql).all(ftsQuery);
1360
1720
  }
1361
1721
  function searchTantivy(bundle, options) {
1362
- if (!existsSync(bundle.paths.tantivy)) {
1722
+ if (!existsSync2(bundle.paths.tantivy)) {
1363
1723
  throw new Error("tantivy index not found; run `prosa index tantivy` first");
1364
1724
  }
1365
1725
  const status = getSearchIndexStatus(bundle, "tantivy");
@@ -1368,7 +1728,7 @@ function searchTantivy(bundle, options) {
1368
1728
  `tantivy index is ${status?.status ?? "missing"}; run \`prosa index tantivy\` first`
1369
1729
  );
1370
1730
  }
1371
- const limit = clampLimit(options.limit, { max: 500, fallback: 50 });
1731
+ const limit2 = clampLimit(options.limit, { max: 500, fallback: 50 });
1372
1732
  const queryText = options.query.trim();
1373
1733
  if (!queryText) return [];
1374
1734
  const tantivy = requireTantivy();
@@ -1377,7 +1737,7 @@ function searchTantivy(bundle, options) {
1377
1737
  const [query] = options.raw ? [index.parseQuery(queryText, ["text"])] : index.parseQueryLenient(queryText, ["text"], void 0, {
1378
1738
  text: [true, 2, true]
1379
1739
  });
1380
- const result = searcher.search(query, limit, true);
1740
+ const result = searcher.search(query, limit2, true);
1381
1741
  const snippets = tantivy.SnippetGenerator.create(searcher, query, index.schema, "text");
1382
1742
  snippets.setMaxNumChars(180);
1383
1743
  return result.hits.map((hit) => {
@@ -5067,6 +5427,13 @@ var PARQUET_TABLES = [
5067
5427
  "edges",
5068
5428
  "search_docs"
5069
5429
  ];
5430
+ var ANALYTICS_VIEWS = [
5431
+ "session_facts",
5432
+ "tool_usage_facts",
5433
+ "error_facts",
5434
+ "model_usage",
5435
+ "project_activity"
5436
+ ];
5070
5437
  async function exportBundleParquet(options) {
5071
5438
  const snapshot = await openBundleSnapshot(options.bundlePath);
5072
5439
  const outDir = path13.resolve(options.outDir ?? snapshot.defaultOutDir);
@@ -5083,7 +5450,7 @@ async function exportBundleParquet(options) {
5083
5450
  await attachSqlite(connection, snapshot.dbPath);
5084
5451
  for (const table of PARQUET_TABLES) {
5085
5452
  await connection.run(
5086
- `COPY (SELECT * FROM prosa.${quoteIdentifier(table)}) TO ${sqlString(files[table])} (FORMAT parquet)`
5453
+ `COPY (SELECT * FROM prosa.${quoteIdentifier(table)}) TO ${sqlString(files[table])} (FORMAT parquet, COMPRESSION zstd, COMPRESSION_LEVEL 1, ROW_GROUP_SIZE 100000)`
5087
5454
  );
5088
5455
  }
5089
5456
  } finally {
@@ -5119,6 +5486,7 @@ async function queryDuckDbParquet(options) {
5119
5486
  )})`
5120
5487
  );
5121
5488
  }
5489
+ await createAnalyticsViews(connection);
5122
5490
  const reader = await connection.runAndReadAll(options.sql);
5123
5491
  return {
5124
5492
  columns: reader.deduplicatedColumnNames(),
@@ -5149,6 +5517,285 @@ async function attachSqlite(connection, dbPath) {
5149
5517
  );
5150
5518
  }
5151
5519
  }
5520
+ async function createAnalyticsViews(connection) {
5521
+ await connection.run(`
5522
+ CREATE OR REPLACE VIEW session_facts AS
5523
+ WITH turn_counts AS (
5524
+ SELECT session_id, count(*) AS turn_count
5525
+ FROM turns
5526
+ GROUP BY session_id
5527
+ ),
5528
+ message_counts AS (
5529
+ SELECT session_id,
5530
+ count(*) AS message_count,
5531
+ sum(CASE WHEN role = 'user' THEN 1 ELSE 0 END) AS user_message_count,
5532
+ sum(CASE WHEN role = 'assistant' THEN 1 ELSE 0 END) AS assistant_message_count
5533
+ FROM messages
5534
+ GROUP BY session_id
5535
+ ),
5536
+ tool_call_counts AS (
5537
+ SELECT session_id,
5538
+ count(*) AS tool_call_count,
5539
+ sum(CASE WHEN status = 'error' THEN 1 ELSE 0 END) AS tool_call_error_count
5540
+ FROM tool_calls
5541
+ GROUP BY session_id
5542
+ ),
5543
+ tool_result_counts AS (
5544
+ SELECT session_id,
5545
+ count(*) AS tool_result_count,
5546
+ sum(CASE WHEN is_error = 1 OR (exit_code IS NOT NULL AND exit_code <> 0)
5547
+ THEN 1 ELSE 0 END) AS tool_result_error_count,
5548
+ sum(COALESCE(duration_ms, 0)) AS tool_duration_ms
5549
+ FROM tool_results
5550
+ GROUP BY session_id
5551
+ ),
5552
+ search_doc_counts AS (
5553
+ SELECT session_id, count(*) AS search_doc_count
5554
+ FROM search_docs
5555
+ WHERE session_id IS NOT NULL
5556
+ GROUP BY session_id
5557
+ )
5558
+ SELECT s.session_id,
5559
+ s.source_tool,
5560
+ s.source_session_id,
5561
+ s.project_id,
5562
+ p.display_name AS project_name,
5563
+ p.canonical_path AS project_path,
5564
+ s.parent_session_id,
5565
+ s.is_subagent,
5566
+ s.agent_role,
5567
+ s.agent_nickname,
5568
+ s.title,
5569
+ s.start_ts,
5570
+ s.end_ts,
5571
+ CASE
5572
+ WHEN s.start_ts IS NOT NULL AND s.end_ts IS NOT NULL
5573
+ THEN date_diff('millisecond', TRY_CAST(s.start_ts AS TIMESTAMP),
5574
+ TRY_CAST(s.end_ts AS TIMESTAMP)) / 1000.0
5575
+ ELSE NULL
5576
+ END AS duration_seconds,
5577
+ s.cwd_initial,
5578
+ s.git_branch_initial,
5579
+ s.model_first,
5580
+ s.model_last,
5581
+ s.status,
5582
+ s.timeline_confidence,
5583
+ sf.path AS source_file_path,
5584
+ COALESCE(tc.turn_count, 0) AS turn_count,
5585
+ COALESCE(mc.message_count, 0) AS message_count,
5586
+ COALESCE(mc.user_message_count, 0) AS user_message_count,
5587
+ COALESCE(mc.assistant_message_count, 0) AS assistant_message_count,
5588
+ COALESCE(tcc.tool_call_count, 0) AS tool_call_count,
5589
+ COALESCE(trc.tool_result_count, 0) AS tool_result_count,
5590
+ COALESCE(tcc.tool_call_error_count, 0)
5591
+ + COALESCE(trc.tool_result_error_count, 0) AS tool_error_count,
5592
+ COALESCE(trc.tool_duration_ms, 0) AS tool_duration_ms,
5593
+ COALESCE(sdc.search_doc_count, 0) AS search_doc_count
5594
+ FROM sessions s
5595
+ LEFT JOIN projects p ON p.project_id = s.project_id
5596
+ LEFT JOIN raw_records rr ON rr.raw_record_id = s.raw_record_id
5597
+ LEFT JOIN source_files sf ON sf.source_file_id = rr.source_file_id
5598
+ LEFT JOIN turn_counts tc ON tc.session_id = s.session_id
5599
+ LEFT JOIN message_counts mc ON mc.session_id = s.session_id
5600
+ LEFT JOIN tool_call_counts tcc ON tcc.session_id = s.session_id
5601
+ LEFT JOIN tool_result_counts trc ON trc.session_id = s.session_id
5602
+ LEFT JOIN search_doc_counts sdc ON sdc.session_id = s.session_id
5603
+ `);
5604
+ await connection.run(`
5605
+ CREATE OR REPLACE VIEW tool_usage_facts AS
5606
+ WITH result_rollup AS (
5607
+ SELECT tool_call_id,
5608
+ session_id,
5609
+ count(*) AS tool_result_count,
5610
+ max(status) AS result_status,
5611
+ max(is_error) AS is_error,
5612
+ min(exit_code) AS exit_code,
5613
+ sum(COALESCE(duration_ms, 0)) AS duration_ms,
5614
+ max(preview) AS preview
5615
+ FROM tool_results
5616
+ GROUP BY tool_call_id, session_id
5617
+ )
5618
+ SELECT tc.tool_call_id,
5619
+ tc.session_id,
5620
+ s.source_tool,
5621
+ s.source_session_id,
5622
+ s.project_id,
5623
+ p.display_name AS project_name,
5624
+ p.canonical_path AS project_path,
5625
+ tc.turn_id,
5626
+ tc.message_id,
5627
+ tc.event_id,
5628
+ tc.source_call_id,
5629
+ tc.tool_name,
5630
+ tc.canonical_tool_type,
5631
+ tc.command,
5632
+ tc.cwd,
5633
+ tc.path,
5634
+ tc.query,
5635
+ tc.timestamp_start,
5636
+ tc.timestamp_end,
5637
+ CASE
5638
+ WHEN tc.timestamp_start IS NOT NULL AND tc.timestamp_end IS NOT NULL
5639
+ THEN date_diff('millisecond', TRY_CAST(tc.timestamp_start AS TIMESTAMP),
5640
+ TRY_CAST(tc.timestamp_end AS TIMESTAMP)) / 1000.0
5641
+ ELSE NULL
5642
+ END AS call_duration_seconds,
5643
+ tc.status AS call_status,
5644
+ rr.result_status,
5645
+ COALESCE(rr.is_error, 0) AS is_error,
5646
+ rr.exit_code,
5647
+ rr.duration_ms AS result_duration_ms,
5648
+ COALESCE(rr.tool_result_count, 0) AS tool_result_count,
5649
+ rr.preview,
5650
+ tc.raw_record_id
5651
+ FROM tool_calls tc
5652
+ LEFT JOIN sessions s ON s.session_id = tc.session_id
5653
+ LEFT JOIN projects p ON p.project_id = s.project_id
5654
+ LEFT JOIN result_rollup rr ON rr.tool_call_id = tc.tool_call_id
5655
+ `);
5656
+ await connection.run(`
5657
+ CREATE OR REPLACE VIEW error_facts AS
5658
+ SELECT 'tool_result:' || tr.tool_result_id AS error_id,
5659
+ 'tool_result' AS error_category,
5660
+ s.source_tool,
5661
+ s.project_id,
5662
+ p.display_name AS project_name,
5663
+ tr.session_id,
5664
+ COALESCE(tc.timestamp_end, tc.timestamp_start) AS timestamp,
5665
+ tc.tool_name,
5666
+ tc.canonical_tool_type,
5667
+ COALESCE(tr.status, tc.status) AS status,
5668
+ tr.exit_code,
5669
+ NULL AS message,
5670
+ tr.preview,
5671
+ NULL AS entity_type,
5672
+ NULL AS entity_id,
5673
+ tr.raw_record_id
5674
+ FROM tool_results tr
5675
+ LEFT JOIN tool_calls tc ON tc.tool_call_id = tr.tool_call_id
5676
+ LEFT JOIN sessions s ON s.session_id = tr.session_id
5677
+ LEFT JOIN projects p ON p.project_id = s.project_id
5678
+ WHERE tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
5679
+ UNION ALL
5680
+ SELECT 'import_error:' || CAST(ie.error_id AS VARCHAR) AS error_id,
5681
+ 'import_error' AS error_category,
5682
+ COALESCE(rr.source_tool, ib.source_tool) AS source_tool,
5683
+ NULL AS project_id,
5684
+ NULL AS project_name,
5685
+ NULL AS session_id,
5686
+ ie.occurred_at AS timestamp,
5687
+ NULL AS tool_name,
5688
+ NULL AS canonical_tool_type,
5689
+ ie.kind AS status,
5690
+ NULL AS exit_code,
5691
+ ie.message,
5692
+ NULL AS preview,
5693
+ NULL AS entity_type,
5694
+ NULL AS entity_id,
5695
+ ie.raw_record_id
5696
+ FROM import_errors ie
5697
+ LEFT JOIN import_batches ib ON ib.batch_id = ie.batch_id
5698
+ LEFT JOIN raw_records rr ON rr.raw_record_id = ie.raw_record_id
5699
+ UNION ALL
5700
+ SELECT 'uncertainty:' || CAST(u.uncertainty_id AS VARCHAR) AS error_id,
5701
+ 'uncertainty' AS error_category,
5702
+ NULL AS source_tool,
5703
+ NULL AS project_id,
5704
+ NULL AS project_name,
5705
+ CASE WHEN u.entity_type = 'session' THEN u.entity_id ELSE NULL END AS session_id,
5706
+ NULL AS timestamp,
5707
+ NULL AS tool_name,
5708
+ NULL AS canonical_tool_type,
5709
+ u.reason AS status,
5710
+ NULL AS exit_code,
5711
+ u.reason AS message,
5712
+ NULL AS preview,
5713
+ u.entity_type,
5714
+ u.entity_id,
5715
+ NULL AS raw_record_id
5716
+ FROM uncertainties u
5717
+ `);
5718
+ await connection.run(`
5719
+ CREATE OR REPLACE VIEW model_usage AS
5720
+ WITH model_events AS (
5721
+ SELECT s.source_tool,
5722
+ s.project_id,
5723
+ p.display_name AS project_name,
5724
+ p.canonical_path AS project_path,
5725
+ s.session_id,
5726
+ NULL AS turn_id,
5727
+ s.model_first AS model,
5728
+ s.start_ts AS timestamp,
5729
+ 'session_first' AS observation_type
5730
+ FROM sessions s
5731
+ LEFT JOIN projects p ON p.project_id = s.project_id
5732
+ WHERE s.model_first IS NOT NULL
5733
+ UNION ALL
5734
+ SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
5735
+ s.session_id, NULL AS turn_id, s.model_last AS model, s.end_ts AS timestamp,
5736
+ 'session_last' AS observation_type
5737
+ FROM sessions s
5738
+ LEFT JOIN projects p ON p.project_id = s.project_id
5739
+ WHERE s.model_last IS NOT NULL
5740
+ UNION ALL
5741
+ SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
5742
+ t.session_id, t.turn_id, t.model, t.start_ts AS timestamp, 'turn' AS observation_type
5743
+ FROM turns t
5744
+ LEFT JOIN sessions s ON s.session_id = t.session_id
5745
+ LEFT JOIN projects p ON p.project_id = s.project_id
5746
+ WHERE t.model IS NOT NULL
5747
+ UNION ALL
5748
+ SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
5749
+ m.session_id, m.turn_id, m.model, m.timestamp, 'message' AS observation_type
5750
+ FROM messages m
5751
+ LEFT JOIN sessions s ON s.session_id = m.session_id
5752
+ LEFT JOIN projects p ON p.project_id = s.project_id
5753
+ WHERE m.model IS NOT NULL
5754
+ )
5755
+ SELECT source_tool,
5756
+ project_id,
5757
+ project_name,
5758
+ project_path,
5759
+ model,
5760
+ count(DISTINCT session_id) AS session_count,
5761
+ count(DISTINCT turn_id) AS turn_count,
5762
+ count(*) AS observation_count,
5763
+ sum(CASE WHEN observation_type = 'message' THEN 1 ELSE 0 END) AS message_count,
5764
+ min(timestamp) AS first_seen_ts,
5765
+ max(timestamp) AS last_seen_ts
5766
+ FROM model_events
5767
+ GROUP BY source_tool, project_id, project_name, project_path, model
5768
+ `);
5769
+ await connection.run(`
5770
+ CREATE OR REPLACE VIEW project_activity AS
5771
+ SELECT s.source_tool,
5772
+ s.project_id,
5773
+ COALESCE(p.display_name, s.cwd_initial, '(unknown)') AS project_name,
5774
+ p.canonical_path AS project_path,
5775
+ min(s.start_ts) AS first_session_ts,
5776
+ max(COALESCE(s.end_ts, s.start_ts)) AS latest_session_ts,
5777
+ count(DISTINCT s.session_id) AS session_count,
5778
+ count(DISTINCT CASE WHEN s.timeline_confidence = 'low' THEN s.session_id END)
5779
+ AS low_confidence_session_count,
5780
+ count(DISTINCT t.turn_id) AS turn_count,
5781
+ count(DISTINCT m.message_id) AS message_count,
5782
+ count(DISTINCT tc.tool_call_id) AS tool_call_count,
5783
+ count(DISTINCT tr.tool_result_id) AS tool_result_count,
5784
+ count(DISTINCT CASE
5785
+ WHEN tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
5786
+ THEN tr.tool_result_id
5787
+ END) AS tool_error_count,
5788
+ count(DISTINCT sd.doc_id) AS search_doc_count
5789
+ FROM sessions s
5790
+ LEFT JOIN projects p ON p.project_id = s.project_id
5791
+ LEFT JOIN turns t ON t.session_id = s.session_id
5792
+ LEFT JOIN messages m ON m.session_id = s.session_id
5793
+ LEFT JOIN tool_calls tc ON tc.session_id = s.session_id
5794
+ LEFT JOIN tool_results tr ON tr.session_id = s.session_id
5795
+ LEFT JOIN search_docs sd ON sd.session_id = s.session_id
5796
+ GROUP BY s.source_tool, s.project_id, p.display_name, s.cwd_initial, p.canonical_path
5797
+ `);
5798
+ }
5152
5799
  async function openBundleSnapshot(bundlePath) {
5153
5800
  const bundle = await openBundle(bundlePath);
5154
5801
  try {
@@ -5224,16 +5871,16 @@ function resolveCompilePath(p) {
5224
5871
  return path14.resolve(p);
5225
5872
  }
5226
5873
  async function runCompileImports(options) {
5227
- const { bundle, providers, deferIndex, logger } = options;
5874
+ const { bundle, providers, logger } = options;
5875
+ const overwrite = options.overwrite === true;
5228
5876
  let importedAny = false;
5229
5877
  const summaries = [];
5230
5878
  let tantivy = null;
5231
5879
  let tantivyError = null;
5880
+ let fts5Error = null;
5232
5881
  try {
5233
- if (deferIndex) {
5234
- logger?.info("disabling FTS5 triggers for deferred indexing");
5235
- disableFts5Triggers(bundle);
5236
- }
5882
+ logger?.info("disabling FTS5 triggers for bulk rebuild");
5883
+ disableFts5Triggers(bundle);
5237
5884
  for (const provider of providers) {
5238
5885
  const sourcePath = resolveCompilePath(options.sessionsPath ?? provider.defaultSessionsPath());
5239
5886
  const providerLogger = logger?.child({
@@ -5260,15 +5907,23 @@ async function runCompileImports(options) {
5260
5907
  summaries.push(summary);
5261
5908
  options.onProviderComplete?.(summary);
5262
5909
  }
5263
- logger?.info({ changed: importedAny, fts5_deferred: deferIndex }, "marking indexes");
5264
- markIndexesAfterImport(bundle, {
5265
- changed: importedAny,
5266
- fts5Deferred: deferIndex
5267
- });
5268
- if (importedAny) {
5910
+ const shouldRebuildIndexes = importedAny || overwrite;
5911
+ if (shouldRebuildIndexes) {
5912
+ logger?.info(
5913
+ { changed: importedAny, overwrite },
5914
+ importedAny ? "marking indexes" : "overwrite forces rebuild despite no new imports"
5915
+ );
5916
+ markIndexesAfterImport(bundle, { changed: true });
5917
+ try {
5918
+ logger?.info("rebuilding fts5 index");
5919
+ rebuildFts5Index(bundle);
5920
+ } catch (error) {
5921
+ fts5Error = getErrorMessage(error);
5922
+ logger?.error({ err: error }, "fts5 rebuild failed; SQLite data is intact");
5923
+ }
5269
5924
  try {
5270
- logger?.info("rebuilding tantivy index");
5271
- const status = await rebuildTantivyIndex(bundle);
5925
+ logger?.info({ overwrite }, "rebuilding tantivy index");
5926
+ const status = await rebuildTantivyIndex(bundle, { overwrite });
5272
5927
  tantivy = { indexedDocCount: status.indexed_doc_count };
5273
5928
  options.onTantivyComplete?.(tantivy);
5274
5929
  } catch (error) {
@@ -5277,16 +5932,14 @@ async function runCompileImports(options) {
5277
5932
  }
5278
5933
  }
5279
5934
  } finally {
5280
- if (deferIndex) {
5281
- logger?.info("re-enabling FTS5 triggers");
5282
- enableFts5Triggers(bundle);
5283
- }
5935
+ enableFts5Triggers(bundle);
5284
5936
  }
5285
5937
  return {
5286
5938
  providers: summaries,
5287
5939
  importedAny,
5288
5940
  tantivy,
5289
- tantivyError
5941
+ tantivyError,
5942
+ fts5Error
5290
5943
  };
5291
5944
  }
5292
5945
  async function exportCompileParquet(options) {
@@ -5302,6 +5955,251 @@ async function exportCompileParquet(options) {
5302
5955
  };
5303
5956
  }
5304
5957
 
5958
+ // src/services/analytics.ts
5959
+ var ANALYTICS_REPORTS = ["sessions", "tools", "errors", "models", "projects"];
5960
+ async function runAnalyticsReport(options) {
5961
+ return queryDuckDbParquet({
5962
+ parquetDir: options.parquetDir,
5963
+ sql: buildAnalyticsSql(options.report, options.filters ?? {}, "duckdb")
5964
+ });
5965
+ }
5966
+ function runAnalyticsReportFromBundle(options) {
5967
+ const sql = buildAnalyticsSql(options.report, options.filters ?? {}, "sqlite");
5968
+ const stmt = options.bundle.db.prepare(sql);
5969
+ const rows = stmt.all();
5970
+ const columns = stmt.columns().map((column) => column.name);
5971
+ return { columns, rows };
5972
+ }
5973
+ function buildAnalyticsSql(report, filters, dialect) {
5974
+ switch (report) {
5975
+ case "sessions":
5976
+ return buildSessionsSql(filters, dialect);
5977
+ case "tools":
5978
+ return buildToolsSql(filters, dialect);
5979
+ case "errors":
5980
+ return buildErrorsSql(filters, dialect);
5981
+ case "models":
5982
+ return buildModelsSql(filters, dialect);
5983
+ case "projects":
5984
+ return buildProjectsSql(filters, dialect);
5985
+ }
5986
+ }
5987
+ function buildSessionsSql(filters, dialect) {
5988
+ const where = buildWhere([
5989
+ sourceFilter(filters),
5990
+ timeFilter("start_ts", filters),
5991
+ projectFilter(filters, dialect),
5992
+ filters.sessionId ? `session_id = ${sqlString2(filters.sessionId)}` : null,
5993
+ filters.sourcePathSubstring ? `source_file_path LIKE ${sqlString2(`%${escapeLike(filters.sourcePathSubstring)}%`)} ESCAPE '\\'` : null
5994
+ ]);
5995
+ return `
5996
+ SELECT start_ts, source_tool, project_name, source_file_path, session_id,
5997
+ source_session_id, model_last, duration_seconds,
5998
+ message_count, tool_call_count, tool_result_count, tool_error_count,
5999
+ tool_duration_ms, timeline_confidence, title
6000
+ FROM session_facts
6001
+ ${where}
6002
+ ORDER BY start_ts DESC NULLS LAST
6003
+ LIMIT ${limit(filters)}
6004
+ `;
6005
+ }
6006
+ function buildToolsSql(filters, dialect) {
6007
+ const where = buildWhere([
6008
+ sourceFilter(filters),
6009
+ timeFilter("timestamp_start", filters),
6010
+ projectFilter(filters, dialect),
6011
+ filters.toolName ? `tool_name = ${sqlString2(filters.toolName)}` : null,
6012
+ filters.canonicalType ? `canonical_tool_type = ${sqlString2(filters.canonicalType)}` : null,
6013
+ filters.errorsOnly ? `(is_error = 1 OR call_status = 'error')` : null
6014
+ ]);
6015
+ return `
6016
+ SELECT tool_name, canonical_tool_type, source_tool, project_name,
6017
+ count(*) AS call_count,
6018
+ sum(CASE WHEN is_error = 1 OR call_status = 'error' THEN 1 ELSE 0 END) AS error_count,
6019
+ round(avg(result_duration_ms), 3) AS avg_result_duration_ms,
6020
+ max(timestamp_start) AS latest_ts
6021
+ FROM tool_usage_facts
6022
+ ${where}
6023
+ GROUP BY tool_name, canonical_tool_type, source_tool, project_name
6024
+ ORDER BY call_count DESC, error_count DESC, tool_name ASC
6025
+ LIMIT ${limit(filters)}
6026
+ `;
6027
+ }
6028
+ function buildErrorsSql(filters, dialect) {
6029
+ const where = buildWhere([
6030
+ sourceFilter(filters),
6031
+ timeFilter("timestamp", filters),
6032
+ projectFilter(filters, dialect),
6033
+ filters.toolName ? `tool_name = ${sqlString2(filters.toolName)}` : null,
6034
+ filters.category ? `error_category = ${sqlString2(filters.category)}` : null
6035
+ ]);
6036
+ return `
6037
+ SELECT timestamp, error_category, source_tool, project_name, session_id,
6038
+ tool_name, status, exit_code, message, preview
6039
+ FROM error_facts
6040
+ ${where}
6041
+ ORDER BY timestamp DESC NULLS LAST, error_id DESC
6042
+ LIMIT ${limit(filters)}
6043
+ `;
6044
+ }
6045
+ function buildModelsSql(filters, dialect) {
6046
+ const where = buildWhere([
6047
+ sourceFilter(filters),
6048
+ rangeOverlapFilter("first_seen_ts", "last_seen_ts", filters),
6049
+ projectFilter(filters, dialect),
6050
+ filters.model ? `model = ${sqlString2(filters.model)}` : null
6051
+ ]);
6052
+ return `
6053
+ SELECT model, source_tool, project_name, session_count, turn_count,
6054
+ message_count, observation_count, first_seen_ts, last_seen_ts
6055
+ FROM model_usage
6056
+ ${where}
6057
+ ORDER BY session_count DESC, observation_count DESC, model ASC
6058
+ LIMIT ${limit(filters)}
6059
+ `;
6060
+ }
6061
+ function buildProjectsSql(filters, dialect) {
6062
+ const where = buildWhere([
6063
+ sourceFilter(filters),
6064
+ rangeOverlapFilter("first_session_ts", "latest_session_ts", filters),
6065
+ projectFilter(filters, dialect)
6066
+ ]);
6067
+ return `
6068
+ SELECT latest_session_ts, source_tool, project_name, project_path,
6069
+ session_count, message_count, tool_call_count, tool_error_count,
6070
+ low_confidence_session_count
6071
+ FROM project_activity
6072
+ ${where}
6073
+ ORDER BY latest_session_ts DESC NULLS LAST, session_count DESC, project_name ASC
6074
+ LIMIT ${limit(filters)}
6075
+ `;
6076
+ }
6077
+ function sourceFilter(filters) {
6078
+ return filters.source ? `source_tool = ${sqlString2(filters.source)}` : null;
6079
+ }
6080
+ function timeFilter(column, filters) {
6081
+ const filtersSql = [];
6082
+ if (filters.since)
6083
+ filtersSql.push(`(${column} IS NULL OR ${column} >= ${sqlString2(filters.since)})`);
6084
+ if (filters.until)
6085
+ filtersSql.push(`(${column} IS NULL OR ${column} < ${sqlString2(filters.until)})`);
6086
+ return filtersSql.length ? filtersSql.join(" AND ") : null;
6087
+ }
6088
+ function rangeOverlapFilter(firstColumn, lastColumn, filters) {
6089
+ const filtersSql = [];
6090
+ if (filters.since) {
6091
+ filtersSql.push(`(${lastColumn} IS NULL OR ${lastColumn} >= ${sqlString2(filters.since)})`);
6092
+ }
6093
+ if (filters.until) {
6094
+ filtersSql.push(`(${firstColumn} IS NULL OR ${firstColumn} < ${sqlString2(filters.until)})`);
6095
+ }
6096
+ return filtersSql.length ? filtersSql.join(" AND ") : null;
6097
+ }
6098
+ function projectFilter(filters, dialect) {
6099
+ if (!filters.project) return null;
6100
+ const exact = sqlString2(filters.project);
6101
+ const like = sqlString2(`%${escapeLike(filters.project)}%`);
6102
+ const op = dialect === "duckdb" ? "ILIKE" : "LIKE";
6103
+ return `(project_id = ${exact} OR project_name ${op} ${like} ESCAPE '\\' OR project_path ${op} ${like} ESCAPE '\\')`;
6104
+ }
6105
+ function buildWhere(filters) {
6106
+ const active = filters.filter((filter) => Boolean(filter));
6107
+ return active.length ? `WHERE ${active.join(" AND ")}` : "";
6108
+ }
6109
+ function limit(filters) {
6110
+ const value = Number.isFinite(filters.limit) ? filters.limit : void 0;
6111
+ return clampLimit(value, { max: 500, fallback: 50 });
6112
+ }
6113
+ function sqlString2(value) {
6114
+ return `'${value.replace(/'/g, "''")}'`;
6115
+ }
6116
+ function escapeLike(value) {
6117
+ return value.replace(/[\\%_]/g, (match) => `\\${match}`);
6118
+ }
6119
+
6120
+ // src/services/tool_calls.ts
6121
+ function listToolCalls(bundle, filters = {}) {
6122
+ const conds = [];
6123
+ const params = [];
6124
+ if (filters.toolName) {
6125
+ conds.push("tc.tool_name = ?");
6126
+ params.push(filters.toolName);
6127
+ }
6128
+ if (filters.canonicalType) {
6129
+ conds.push("tc.canonical_tool_type = ?");
6130
+ params.push(filters.canonicalType);
6131
+ }
6132
+ if (filters.sessionId) {
6133
+ conds.push("tc.session_id = ?");
6134
+ params.push(filters.sessionId);
6135
+ }
6136
+ if (filters.errorsOnly) {
6137
+ conds.push("(tr.is_error = 1 OR tc.status = ?)");
6138
+ params.push("error");
6139
+ }
6140
+ if (filters.pathSubstring) {
6141
+ conds.push("tc.path IS NOT NULL AND tc.path LIKE ?");
6142
+ params.push(`%${filters.pathSubstring}%`);
6143
+ }
6144
+ if (filters.sinceIso) {
6145
+ conds.push("(tc.timestamp_start IS NULL OR tc.timestamp_start >= ?)");
6146
+ params.push(filters.sinceIso);
6147
+ }
6148
+ if (filters.untilIso) {
6149
+ conds.push("(tc.timestamp_start IS NULL OR tc.timestamp_start < ?)");
6150
+ params.push(filters.untilIso);
6151
+ }
6152
+ const where = conds.length ? `WHERE ${conds.join(" AND ")}` : "";
6153
+ const limit2 = clampLimit(filters.limit, { max: 500, fallback: 100 });
6154
+ const toolCallSql = `
6155
+ SELECT 'tool_call' AS entity_type,
6156
+ tc.session_id,
6157
+ tc.tool_call_id,
6158
+ NULL AS artifact_id,
6159
+ tc.tool_name,
6160
+ tc.canonical_tool_type,
6161
+ tc.command,
6162
+ tc.path,
6163
+ tc.status,
6164
+ tc.timestamp_start,
6165
+ tr.is_error,
6166
+ tr.exit_code,
6167
+ tr.preview
6168
+ FROM tool_calls tc
6169
+ LEFT JOIN tool_results tr ON tr.tool_call_id = tc.tool_call_id
6170
+ ${where}
6171
+ `;
6172
+ if (!filters.pathSubstring) {
6173
+ const sql2 = `${toolCallSql} ORDER BY tc.timestamp_start DESC LIMIT ${limit2}`;
6174
+ return bundle.db.prepare(sql2).all(...params);
6175
+ }
6176
+ const artifactSql = `
6177
+ SELECT 'artifact' AS entity_type,
6178
+ a.session_id,
6179
+ NULL AS tool_call_id,
6180
+ a.artifact_id,
6181
+ NULL AS tool_name,
6182
+ NULL AS canonical_tool_type,
6183
+ NULL AS command,
6184
+ a.path,
6185
+ NULL AS status,
6186
+ a.created_ts AS timestamp_start,
6187
+ NULL AS is_error,
6188
+ NULL AS exit_code,
6189
+ NULL AS preview
6190
+ FROM artifacts a
6191
+ WHERE a.path IS NOT NULL AND a.path LIKE ?
6192
+ `;
6193
+ const sql = `
6194
+ ${toolCallSql}
6195
+ UNION ALL
6196
+ ${artifactSql}
6197
+ ORDER BY timestamp_start DESC
6198
+ LIMIT ${limit2}
6199
+ `;
6200
+ return bundle.db.prepare(sql).all(...params, `%${filters.pathSubstring}%`);
6201
+ }
6202
+
5305
6203
  // src/services/export/markdown.ts
5306
6204
  async function exportSessionMarkdown(bundle, sessionId2) {
5307
6205
  const session = bundle.db.prepare(
@@ -5412,6 +6310,8 @@ function renderToolCall(c) {
5412
6310
  return lines.join("\n");
5413
6311
  }
5414
6312
  export {
6313
+ ANALYTICS_REPORTS,
6314
+ ANALYTICS_VIEWS,
5415
6315
  COMPILE_PROVIDERS,
5416
6316
  PARQUET_TABLES,
5417
6317
  PROSA_PARSER_VERSION,
@@ -5441,8 +6341,10 @@ export {
5441
6341
  getText,
5442
6342
  initBundle,
5443
6343
  listSessions,
6344
+ listToolCalls,
5444
6345
  markIndexesAfterImport,
5445
6346
  openBundle,
6347
+ openOrInitBundle,
5446
6348
  putBytes,
5447
6349
  putJson,
5448
6350
  putText,
@@ -5452,6 +6354,8 @@ export {
5452
6354
  recordError,
5453
6355
  registerSourceFile,
5454
6356
  resolveCompilePath,
6357
+ runAnalyticsReport,
6358
+ runAnalyticsReportFromBundle,
5455
6359
  runCompileImports,
5456
6360
  runMigrations,
5457
6361
  searchFullText,