@c3-oss/prosa 0.3.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -402,10 +402,291 @@ INSERT OR IGNORE INTO search_index_status (
402
402
  ('tantivy', 'missing', 0, 0, strftime('%Y-%m-%dT%H:%M:%fZ','now'), NULL);
403
403
  `;
404
404
 
405
+ // src/core/schema/sql/003_analytics_views.ts
406
+ var SQL_003_ANALYTICS_VIEWS = String.raw`
407
+ CREATE VIEW IF NOT EXISTS session_facts AS
408
+ WITH turn_counts AS (
409
+ SELECT session_id, count(*) AS turn_count
410
+ FROM turns
411
+ GROUP BY session_id
412
+ ),
413
+ message_counts AS (
414
+ SELECT session_id,
415
+ count(*) AS message_count,
416
+ sum(CASE WHEN role = 'user' THEN 1 ELSE 0 END) AS user_message_count,
417
+ sum(CASE WHEN role = 'assistant' THEN 1 ELSE 0 END) AS assistant_message_count
418
+ FROM messages
419
+ GROUP BY session_id
420
+ ),
421
+ tool_call_counts AS (
422
+ SELECT session_id,
423
+ count(*) AS tool_call_count,
424
+ sum(CASE WHEN status = 'error' THEN 1 ELSE 0 END) AS tool_call_error_count
425
+ FROM tool_calls
426
+ GROUP BY session_id
427
+ ),
428
+ tool_result_counts AS (
429
+ SELECT session_id,
430
+ count(*) AS tool_result_count,
431
+ sum(CASE WHEN is_error = 1 OR (exit_code IS NOT NULL AND exit_code <> 0)
432
+ THEN 1 ELSE 0 END) AS tool_result_error_count,
433
+ sum(COALESCE(duration_ms, 0)) AS tool_duration_ms
434
+ FROM tool_results
435
+ GROUP BY session_id
436
+ ),
437
+ search_doc_counts AS (
438
+ SELECT session_id, count(*) AS search_doc_count
439
+ FROM search_docs
440
+ WHERE session_id IS NOT NULL
441
+ GROUP BY session_id
442
+ )
443
+ SELECT s.session_id,
444
+ s.source_tool,
445
+ s.source_session_id,
446
+ s.project_id,
447
+ p.display_name AS project_name,
448
+ p.canonical_path AS project_path,
449
+ s.parent_session_id,
450
+ s.is_subagent,
451
+ s.agent_role,
452
+ s.agent_nickname,
453
+ s.title,
454
+ s.start_ts,
455
+ s.end_ts,
456
+ CASE
457
+ WHEN s.start_ts IS NOT NULL AND s.end_ts IS NOT NULL
458
+ THEN ROUND((julianday(s.end_ts) - julianday(s.start_ts)) * 86400, 3)
459
+ ELSE NULL
460
+ END AS duration_seconds,
461
+ s.cwd_initial,
462
+ s.git_branch_initial,
463
+ s.model_first,
464
+ s.model_last,
465
+ s.status,
466
+ s.timeline_confidence,
467
+ sf.path AS source_file_path,
468
+ COALESCE(tc.turn_count, 0) AS turn_count,
469
+ COALESCE(mc.message_count, 0) AS message_count,
470
+ COALESCE(mc.user_message_count, 0) AS user_message_count,
471
+ COALESCE(mc.assistant_message_count, 0) AS assistant_message_count,
472
+ COALESCE(tcc.tool_call_count, 0) AS tool_call_count,
473
+ COALESCE(trc.tool_result_count, 0) AS tool_result_count,
474
+ COALESCE(tcc.tool_call_error_count, 0)
475
+ + COALESCE(trc.tool_result_error_count, 0) AS tool_error_count,
476
+ COALESCE(trc.tool_duration_ms, 0) AS tool_duration_ms,
477
+ COALESCE(sdc.search_doc_count, 0) AS search_doc_count
478
+ FROM sessions s
479
+ LEFT JOIN projects p ON p.project_id = s.project_id
480
+ LEFT JOIN raw_records rr ON rr.raw_record_id = s.raw_record_id
481
+ LEFT JOIN source_files sf ON sf.source_file_id = rr.source_file_id
482
+ LEFT JOIN turn_counts tc ON tc.session_id = s.session_id
483
+ LEFT JOIN message_counts mc ON mc.session_id = s.session_id
484
+ LEFT JOIN tool_call_counts tcc ON tcc.session_id = s.session_id
485
+ LEFT JOIN tool_result_counts trc ON trc.session_id = s.session_id
486
+ LEFT JOIN search_doc_counts sdc ON sdc.session_id = s.session_id;
487
+
488
+ CREATE VIEW IF NOT EXISTS tool_usage_facts AS
489
+ WITH result_rollup AS (
490
+ SELECT tool_call_id,
491
+ session_id,
492
+ count(*) AS tool_result_count,
493
+ max(status) AS result_status,
494
+ max(is_error) AS is_error,
495
+ min(exit_code) AS exit_code,
496
+ sum(COALESCE(duration_ms, 0)) AS duration_ms,
497
+ max(preview) AS preview
498
+ FROM tool_results
499
+ GROUP BY tool_call_id, session_id
500
+ )
501
+ SELECT tc.tool_call_id,
502
+ tc.session_id,
503
+ s.source_tool,
504
+ s.source_session_id,
505
+ s.project_id,
506
+ p.display_name AS project_name,
507
+ p.canonical_path AS project_path,
508
+ tc.turn_id,
509
+ tc.message_id,
510
+ tc.event_id,
511
+ tc.source_call_id,
512
+ tc.tool_name,
513
+ tc.canonical_tool_type,
514
+ tc.command,
515
+ tc.cwd,
516
+ tc.path,
517
+ tc.query,
518
+ tc.timestamp_start,
519
+ tc.timestamp_end,
520
+ CASE
521
+ WHEN tc.timestamp_start IS NOT NULL AND tc.timestamp_end IS NOT NULL
522
+ THEN ROUND((julianday(tc.timestamp_end) - julianday(tc.timestamp_start)) * 86400, 3)
523
+ ELSE NULL
524
+ END AS call_duration_seconds,
525
+ tc.status AS call_status,
526
+ rr.result_status,
527
+ COALESCE(rr.is_error, 0) AS is_error,
528
+ rr.exit_code,
529
+ rr.duration_ms AS result_duration_ms,
530
+ COALESCE(rr.tool_result_count, 0) AS tool_result_count,
531
+ rr.preview,
532
+ tc.raw_record_id
533
+ FROM tool_calls tc
534
+ LEFT JOIN sessions s ON s.session_id = tc.session_id
535
+ LEFT JOIN projects p ON p.project_id = s.project_id
536
+ LEFT JOIN result_rollup rr ON rr.tool_call_id = tc.tool_call_id;
537
+
538
+ CREATE VIEW IF NOT EXISTS error_facts AS
539
+ SELECT 'tool_result:' || tr.tool_result_id AS error_id,
540
+ 'tool_result' AS error_category,
541
+ s.source_tool,
542
+ s.project_id,
543
+ p.display_name AS project_name,
544
+ tr.session_id,
545
+ COALESCE(tc.timestamp_end, tc.timestamp_start) AS timestamp,
546
+ tc.tool_name,
547
+ tc.canonical_tool_type,
548
+ COALESCE(tr.status, tc.status) AS status,
549
+ tr.exit_code,
550
+ NULL AS message,
551
+ tr.preview,
552
+ NULL AS entity_type,
553
+ NULL AS entity_id,
554
+ tr.raw_record_id
555
+ FROM tool_results tr
556
+ LEFT JOIN tool_calls tc ON tc.tool_call_id = tr.tool_call_id
557
+ LEFT JOIN sessions s ON s.session_id = tr.session_id
558
+ LEFT JOIN projects p ON p.project_id = s.project_id
559
+ WHERE tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
560
+ UNION ALL
561
+ SELECT 'import_error:' || CAST(ie.error_id AS TEXT) AS error_id,
562
+ 'import_error' AS error_category,
563
+ COALESCE(rr.source_tool, ib.source_tool) AS source_tool,
564
+ NULL AS project_id,
565
+ NULL AS project_name,
566
+ NULL AS session_id,
567
+ ie.occurred_at AS timestamp,
568
+ NULL AS tool_name,
569
+ NULL AS canonical_tool_type,
570
+ ie.kind AS status,
571
+ NULL AS exit_code,
572
+ ie.message,
573
+ NULL AS preview,
574
+ NULL AS entity_type,
575
+ NULL AS entity_id,
576
+ ie.raw_record_id
577
+ FROM import_errors ie
578
+ LEFT JOIN import_batches ib ON ib.batch_id = ie.batch_id
579
+ LEFT JOIN raw_records rr ON rr.raw_record_id = ie.raw_record_id
580
+ UNION ALL
581
+ SELECT 'uncertainty:' || CAST(u.uncertainty_id AS TEXT) AS error_id,
582
+ 'uncertainty' AS error_category,
583
+ NULL AS source_tool,
584
+ NULL AS project_id,
585
+ NULL AS project_name,
586
+ CASE WHEN u.entity_type = 'session' THEN u.entity_id ELSE NULL END AS session_id,
587
+ NULL AS timestamp,
588
+ NULL AS tool_name,
589
+ NULL AS canonical_tool_type,
590
+ u.reason AS status,
591
+ NULL AS exit_code,
592
+ u.reason AS message,
593
+ NULL AS preview,
594
+ u.entity_type,
595
+ u.entity_id,
596
+ NULL AS raw_record_id
597
+ FROM uncertainties u;
598
+
599
+ CREATE VIEW IF NOT EXISTS model_usage AS
600
+ WITH model_events AS (
601
+ SELECT s.source_tool,
602
+ s.project_id,
603
+ p.display_name AS project_name,
604
+ p.canonical_path AS project_path,
605
+ s.session_id,
606
+ NULL AS turn_id,
607
+ s.model_first AS model,
608
+ s.start_ts AS timestamp,
609
+ 'session_first' AS observation_type
610
+ FROM sessions s
611
+ LEFT JOIN projects p ON p.project_id = s.project_id
612
+ WHERE s.model_first IS NOT NULL
613
+ UNION ALL
614
+ SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
615
+ s.session_id, NULL AS turn_id, s.model_last AS model, s.end_ts AS timestamp,
616
+ 'session_last' AS observation_type
617
+ FROM sessions s
618
+ LEFT JOIN projects p ON p.project_id = s.project_id
619
+ WHERE s.model_last IS NOT NULL
620
+ UNION ALL
621
+ SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
622
+ t.session_id, t.turn_id, t.model, t.start_ts AS timestamp, 'turn' AS observation_type
623
+ FROM turns t
624
+ LEFT JOIN sessions s ON s.session_id = t.session_id
625
+ LEFT JOIN projects p ON p.project_id = s.project_id
626
+ WHERE t.model IS NOT NULL
627
+ UNION ALL
628
+ SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
629
+ m.session_id, m.turn_id, m.model, m.timestamp, 'message' AS observation_type
630
+ FROM messages m
631
+ LEFT JOIN sessions s ON s.session_id = m.session_id
632
+ LEFT JOIN projects p ON p.project_id = s.project_id
633
+ WHERE m.model IS NOT NULL
634
+ )
635
+ SELECT source_tool,
636
+ project_id,
637
+ project_name,
638
+ project_path,
639
+ model,
640
+ count(DISTINCT session_id) AS session_count,
641
+ count(DISTINCT turn_id) AS turn_count,
642
+ count(*) AS observation_count,
643
+ sum(CASE WHEN observation_type = 'message' THEN 1 ELSE 0 END) AS message_count,
644
+ min(timestamp) AS first_seen_ts,
645
+ max(timestamp) AS last_seen_ts
646
+ FROM model_events
647
+ GROUP BY source_tool, project_id, project_name, project_path, model;
648
+
649
+ CREATE VIEW IF NOT EXISTS project_activity AS
650
+ SELECT s.source_tool,
651
+ s.project_id,
652
+ COALESCE(p.display_name, s.cwd_initial, '(unknown)') AS project_name,
653
+ p.canonical_path AS project_path,
654
+ min(s.start_ts) AS first_session_ts,
655
+ max(COALESCE(s.end_ts, s.start_ts)) AS latest_session_ts,
656
+ count(DISTINCT s.session_id) AS session_count,
657
+ count(DISTINCT CASE WHEN s.timeline_confidence = 'low' THEN s.session_id END)
658
+ AS low_confidence_session_count,
659
+ count(DISTINCT t.turn_id) AS turn_count,
660
+ count(DISTINCT m.message_id) AS message_count,
661
+ count(DISTINCT tc.tool_call_id) AS tool_call_count,
662
+ count(DISTINCT tr.tool_result_id) AS tool_result_count,
663
+ count(DISTINCT CASE
664
+ WHEN tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
665
+ THEN tr.tool_result_id
666
+ END) AS tool_error_count,
667
+ count(DISTINCT sd.doc_id) AS search_doc_count
668
+ FROM sessions s
669
+ LEFT JOIN projects p ON p.project_id = s.project_id
670
+ LEFT JOIN turns t ON t.session_id = s.session_id
671
+ LEFT JOIN messages m ON m.session_id = s.session_id
672
+ LEFT JOIN tool_calls tc ON tc.session_id = s.session_id
673
+ LEFT JOIN tool_results tr ON tr.session_id = s.session_id
674
+ LEFT JOIN search_docs sd ON sd.session_id = s.session_id
675
+ GROUP BY s.source_tool, s.project_id, p.display_name, s.cwd_initial, p.canonical_path;
676
+ `;
677
+
678
+ // src/core/schema/sql/004_tantivy_checkpoint.ts
679
+ var SQL_004_TANTIVY_CHECKPOINT = String.raw`
680
+ ALTER TABLE search_index_status ADD COLUMN last_indexed_rowid INTEGER;
681
+ ALTER TABLE search_index_status ADD COLUMN schema_fingerprint TEXT;
682
+ `;
683
+
405
684
  // src/core/schema/migrate.ts
406
685
  var MIGRATIONS = [
407
686
  { version: 1, name: "init", sql: SQL_001_INIT },
408
- { version: 2, name: "search_index_status", sql: SQL_002_SEARCH_INDEX_STATUS }
687
+ { version: 2, name: "search_index_status", sql: SQL_002_SEARCH_INDEX_STATUS },
688
+ { version: 3, name: "analytics_views", sql: SQL_003_ANALYTICS_VIEWS },
689
+ { version: 4, name: "tantivy_checkpoint", sql: SQL_004_TANTIVY_CHECKPOINT }
409
690
  ];
410
691
  function runMigrations(db) {
411
692
  db.exec(`
@@ -447,7 +728,7 @@ function currentSchemaVersion(db) {
447
728
 
448
729
  // src/core/version.ts
449
730
  var PROSA_PARSER_VERSION = "0.1.0";
450
- var PROSA_SCHEMA_VERSION = 2;
731
+ var PROSA_SCHEMA_VERSION = 4;
451
732
 
452
733
  // src/core/bundle.ts
453
734
  function defaultBundlePath() {
@@ -764,8 +1045,8 @@ var FS_WRITE_CONCURRENCY = 16;
764
1045
  async function writeFilesParallel(tasks) {
765
1046
  let cursor = 0;
766
1047
  const workers = [];
767
- const limit = Math.min(FS_WRITE_CONCURRENCY, tasks.length);
768
- for (let w = 0; w < limit; w++) {
1048
+ const limit2 = Math.min(FS_WRITE_CONCURRENCY, tasks.length);
1049
+ for (let w = 0; w < limit2; w++) {
769
1050
  workers.push(
770
1051
  (async () => {
771
1052
  while (true) {
@@ -1046,7 +1327,7 @@ function sessionFilterWhere(filters) {
1046
1327
  }
1047
1328
  function listSessions(bundle, filters = {}) {
1048
1329
  const { where, params } = sessionFilterWhere(filters);
1049
- const limit = clampLimit(filters.limit, { max: 1e3, fallback: 50 });
1330
+ const limit2 = clampLimit(filters.limit, { max: 1e3, fallback: 50 });
1050
1331
  const sql = `
1051
1332
  SELECT s.session_id,
1052
1333
  s.source_tool,
@@ -1067,7 +1348,7 @@ function listSessions(bundle, filters = {}) {
1067
1348
  FROM sessions s
1068
1349
  ${where}
1069
1350
  ORDER BY s.start_ts DESC NULLS LAST
1070
- LIMIT ${limit}
1351
+ LIMIT ${limit2}
1071
1352
  `;
1072
1353
  return bundle.db.prepare(sql).all(...params);
1073
1354
  }
@@ -1119,15 +1400,21 @@ function getSession(bundle, sessionId2) {
1119
1400
  }
1120
1401
 
1121
1402
  // src/services/search.ts
1122
- import { existsSync } from "fs";
1403
+ import { existsSync as existsSync2 } from "fs";
1123
1404
  import { createRequire } from "module";
1124
1405
 
1125
1406
  // src/core/errors.ts
1126
1407
  var getErrorMessage = (err) => err instanceof Error ? err.message : String(err);
1127
1408
 
1128
1409
  // src/services/indexing.ts
1410
+ import { createHash as createHash2 } from "crypto";
1411
+ import { existsSync } from "fs";
1129
1412
  import { mkdir as mkdir3, rm, writeFile as writeFile4 } from "fs/promises";
1130
1413
  import path4 from "path";
1414
+ var SEARCH_INDEX_STATUS_COLUMNS = `
1415
+ engine, status, source_doc_count, indexed_doc_count, updated_at,
1416
+ error_message, last_indexed_rowid, schema_fingerprint
1417
+ `;
1131
1418
  var FTS5_TRIGGER_SQL = `
1132
1419
  CREATE TRIGGER IF NOT EXISTS search_docs_ai AFTER INSERT ON search_docs BEGIN
1133
1420
  INSERT INTO search_docs_fts(rowid, text, role, tool_name, field_kind)
@@ -1159,7 +1446,7 @@ function disableFts5Triggers(bundle) {
1159
1446
  function getSearchIndexStatuses(bundle) {
1160
1447
  ensureSearchIndexStatusRows(bundle);
1161
1448
  return bundle.db.prepare(
1162
- `SELECT engine, status, source_doc_count, indexed_doc_count, updated_at, error_message
1449
+ `SELECT ${SEARCH_INDEX_STATUS_COLUMNS}
1163
1450
  FROM search_index_status
1164
1451
  ORDER BY engine`
1165
1452
  ).all();
@@ -1167,28 +1454,13 @@ function getSearchIndexStatuses(bundle) {
1167
1454
  function getSearchIndexStatus(bundle, engine) {
1168
1455
  ensureSearchIndexStatusRows(bundle);
1169
1456
  return bundle.db.prepare(
1170
- `SELECT engine, status, source_doc_count, indexed_doc_count, updated_at, error_message
1457
+ `SELECT ${SEARCH_INDEX_STATUS_COLUMNS}
1171
1458
  FROM search_index_status
1172
1459
  WHERE engine = ?`
1173
1460
  ).get(engine) ?? null;
1174
1461
  }
1175
1462
  function markIndexesAfterImport(bundle, options) {
1176
1463
  if (!options.changed) return;
1177
- if (options.fts5Deferred) {
1178
- updateSearchIndexStatus(bundle, "fts5", {
1179
- status: "stale",
1180
- sourceDocCount: countSearchDocs(bundle),
1181
- indexedDocCount: countFts5Docs(bundle),
1182
- errorMessage: null
1183
- });
1184
- } else {
1185
- updateSearchIndexStatus(bundle, "fts5", {
1186
- status: "ready",
1187
- sourceDocCount: countSearchDocs(bundle),
1188
- indexedDocCount: countFts5Docs(bundle),
1189
- errorMessage: null
1190
- });
1191
- }
1192
1464
  const tantivy = getSearchIndexStatus(bundle, "tantivy");
1193
1465
  if (tantivy?.status === "ready" || tantivy?.status === "stale" || tantivy?.status === "failed") {
1194
1466
  updateSearchIndexStatus(bundle, "tantivy", {
@@ -1229,46 +1501,100 @@ function rebuildFts5Index(bundle) {
1229
1501
  }
1230
1502
  return getSearchIndexStatus(bundle, "fts5");
1231
1503
  }
1232
- async function rebuildTantivyIndex(bundle) {
1504
+ var TANTIVY_SCHEMA_FIELDS = [
1505
+ { name: "doc_id", tokenizer: "raw" },
1506
+ { name: "entity_type", tokenizer: "raw" },
1507
+ { name: "entity_id", tokenizer: "raw" },
1508
+ { name: "session_id", tokenizer: "raw" },
1509
+ { name: "project_id", tokenizer: "raw" },
1510
+ { name: "timestamp", tokenizer: "raw" },
1511
+ { name: "role", tokenizer: "raw" },
1512
+ { name: "tool_name", tokenizer: "raw" },
1513
+ { name: "canonical_tool_type", tokenizer: "raw" },
1514
+ { name: "field_kind", tokenizer: "raw" },
1515
+ // The text field uses tantivy's default tokenizer (en_stem in the binding).
1516
+ { name: "text", tokenizer: "default" }
1517
+ ];
1518
+ function buildTantivySchema(tantivy) {
1519
+ const builder = new tantivy.SchemaBuilder();
1520
+ for (const field of TANTIVY_SCHEMA_FIELDS) {
1521
+ if (field.tokenizer === "default") {
1522
+ builder.addTextField(field.name, { stored: true });
1523
+ } else {
1524
+ builder.addTextField(field.name, { stored: true, tokenizerName: field.tokenizer });
1525
+ }
1526
+ }
1527
+ return builder.build();
1528
+ }
1529
+ function computeSchemaFingerprint() {
1530
+ const canonical = TANTIVY_SCHEMA_FIELDS.map((f) => `${f.name}:${f.tokenizer}:stored`).join("|");
1531
+ return createHash2("sha256").update(canonical).digest("hex");
1532
+ }
1533
+ function tantivyIndexLooksValid(dir) {
1534
+ return existsSync(path4.join(dir, "meta.json"));
1535
+ }
1536
+ function makeTantivyDoc(tantivy, row) {
1537
+ const doc = new tantivy.Document();
1538
+ doc.addText("doc_id", row.doc_id);
1539
+ doc.addText("entity_type", row.entity_type);
1540
+ doc.addText("entity_id", row.entity_id);
1541
+ doc.addText("session_id", row.session_id ?? "");
1542
+ doc.addText("project_id", row.project_id ?? "");
1543
+ doc.addText("timestamp", row.timestamp ?? "");
1544
+ doc.addText("role", row.role ?? "");
1545
+ doc.addText("tool_name", row.tool_name ?? "");
1546
+ doc.addText("canonical_tool_type", row.canonical_tool_type ?? "");
1547
+ doc.addText("field_kind", row.field_kind);
1548
+ doc.addText("text", row.text);
1549
+ return doc;
1550
+ }
1551
+ var SEARCH_DOCS_SELECT = `
1552
+ SELECT rowid, doc_id, entity_type, entity_id, session_id, project_id, timestamp,
1553
+ role, tool_name, canonical_tool_type, field_kind, text
1554
+ FROM search_docs
1555
+ `;
1556
+ async function rebuildTantivyIndex(bundle, options = {}) {
1233
1557
  ensureSearchIndexStatusRows(bundle);
1558
+ const sourceDocCount = countSearchDocs(bundle);
1559
+ const prev = getSearchIndexStatus(bundle, "tantivy");
1560
+ const fingerprint = computeSchemaFingerprint();
1561
+ const indexDirValid = tantivyIndexLooksValid(bundle.paths.tantivy);
1562
+ const fingerprintMatches = prev?.schema_fingerprint === fingerprint;
1563
+ const lastIndexedRowid = typeof prev?.last_indexed_rowid === "number" ? prev.last_indexed_rowid : 0;
1564
+ const wantFullRebuild = options.overwrite === true || !indexDirValid || !fingerprintMatches || lastIndexedRowid <= 0;
1234
1565
  updateSearchIndexStatus(bundle, "tantivy", {
1235
1566
  status: "building",
1236
- sourceDocCount: countSearchDocs(bundle),
1567
+ sourceDocCount,
1237
1568
  indexedDocCount: 0,
1238
1569
  errorMessage: null
1239
1570
  });
1240
1571
  try {
1241
1572
  const tantivy = await import("@oxdev03/node-tantivy-binding");
1242
- const schema = new tantivy.SchemaBuilder().addTextField("doc_id", { stored: true, tokenizerName: "raw" }).addTextField("entity_type", { stored: true, tokenizerName: "raw" }).addTextField("entity_id", { stored: true, tokenizerName: "raw" }).addTextField("session_id", { stored: true, tokenizerName: "raw" }).addTextField("project_id", { stored: true, tokenizerName: "raw" }).addTextField("timestamp", { stored: true, tokenizerName: "raw" }).addTextField("role", { stored: true, tokenizerName: "raw" }).addTextField("tool_name", { stored: true, tokenizerName: "raw" }).addTextField("canonical_tool_type", { stored: true, tokenizerName: "raw" }).addTextField("field_kind", { stored: true, tokenizerName: "raw" }).addTextField("text", { stored: true }).build();
1243
- await rm(bundle.paths.tantivy, { recursive: true, force: true });
1244
- await mkdir3(bundle.paths.tantivy, { recursive: true });
1245
- const index = new tantivy.Index(schema, bundle.paths.tantivy, false);
1246
- const writer = index.writer(5e7, 1);
1247
- let indexedDocCount = 0;
1248
- const rows = bundle.db.prepare(
1249
- `SELECT rowid, doc_id, entity_type, entity_id, session_id, project_id, timestamp,
1250
- role, tool_name, canonical_tool_type, field_kind, text
1251
- FROM search_docs
1252
- ORDER BY rowid`
1253
- ).iterate();
1254
- for (const row of rows) {
1255
- const doc = new tantivy.Document();
1256
- doc.addText("doc_id", row.doc_id);
1257
- doc.addText("entity_type", row.entity_type);
1258
- doc.addText("entity_id", row.entity_id);
1259
- doc.addText("session_id", row.session_id ?? "");
1260
- doc.addText("project_id", row.project_id ?? "");
1261
- doc.addText("timestamp", row.timestamp ?? "");
1262
- doc.addText("role", row.role ?? "");
1263
- doc.addText("tool_name", row.tool_name ?? "");
1264
- doc.addText("canonical_tool_type", row.canonical_tool_type ?? "");
1265
- doc.addText("field_kind", row.field_kind);
1266
- doc.addText("text", row.text);
1267
- writer.addDocument(doc);
1268
- indexedDocCount++;
1573
+ const schema = buildTantivySchema(tantivy);
1574
+ let index;
1575
+ if (wantFullRebuild) {
1576
+ await rm(bundle.paths.tantivy, { recursive: true, force: true });
1577
+ await mkdir3(bundle.paths.tantivy, { recursive: true });
1578
+ index = new tantivy.Index(schema, bundle.paths.tantivy, false);
1579
+ } else {
1580
+ index = tantivy.Index.open(bundle.paths.tantivy);
1581
+ }
1582
+ const writer = index.writer(3e8, 4);
1583
+ const select = wantFullRebuild ? `${SEARCH_DOCS_SELECT} ORDER BY rowid` : `${SEARCH_DOCS_SELECT} WHERE rowid > ${lastIndexedRowid} ORDER BY rowid`;
1584
+ let addedDocCount = 0;
1585
+ let maxRowid = wantFullRebuild ? 0 : lastIndexedRowid;
1586
+ for (const row of bundle.db.prepare(select).iterate()) {
1587
+ if (!wantFullRebuild) {
1588
+ writer.deleteDocumentsByTerm("doc_id", row.doc_id);
1589
+ }
1590
+ writer.addDocument(makeTantivyDoc(tantivy, row));
1591
+ addedDocCount++;
1592
+ if (row.rowid > maxRowid) maxRowid = row.rowid;
1269
1593
  }
1270
1594
  writer.commit();
1271
1595
  index.reload();
1596
+ writer.waitMergingThreads();
1597
+ const indexedDocCount = wantFullRebuild ? addedDocCount : countTantivyDocsBest(prev, addedDocCount);
1272
1598
  await writeFile4(
1273
1599
  path4.join(bundle.paths.tantivy, "prosa-index.json"),
1274
1600
  `${JSON.stringify(
@@ -1276,8 +1602,11 @@ async function rebuildTantivyIndex(bundle) {
1276
1602
  engine: "tantivy",
1277
1603
  source: "search_docs",
1278
1604
  built_at: (/* @__PURE__ */ new Date()).toISOString(),
1279
- source_doc_count: countSearchDocs(bundle),
1280
- indexed_doc_count: indexedDocCount
1605
+ mode: wantFullRebuild ? "full" : "incremental",
1606
+ source_doc_count: sourceDocCount,
1607
+ indexed_doc_count: indexedDocCount,
1608
+ last_indexed_rowid: maxRowid,
1609
+ schema_fingerprint: fingerprint
1281
1610
  },
1282
1611
  null,
1283
1612
  2
@@ -1287,14 +1616,16 @@ async function rebuildTantivyIndex(bundle) {
1287
1616
  );
1288
1617
  updateSearchIndexStatus(bundle, "tantivy", {
1289
1618
  status: "ready",
1290
- sourceDocCount: countSearchDocs(bundle),
1619
+ sourceDocCount,
1291
1620
  indexedDocCount,
1292
- errorMessage: null
1621
+ errorMessage: null,
1622
+ lastIndexedRowid: maxRowid,
1623
+ schemaFingerprint: fingerprint
1293
1624
  });
1294
1625
  } catch (error) {
1295
1626
  updateSearchIndexStatus(bundle, "tantivy", {
1296
1627
  status: "failed",
1297
- sourceDocCount: countSearchDocs(bundle),
1628
+ sourceDocCount,
1298
1629
  indexedDocCount: 0,
1299
1630
  errorMessage: getErrorMessage(error)
1300
1631
  });
@@ -1302,36 +1633,53 @@ async function rebuildTantivyIndex(bundle) {
1302
1633
  }
1303
1634
  return getSearchIndexStatus(bundle, "tantivy");
1304
1635
  }
1636
+ function countTantivyDocsBest(prev, added) {
1637
+ if (prev && typeof prev.indexed_doc_count === "number") {
1638
+ return prev.indexed_doc_count + added;
1639
+ }
1640
+ return added;
1641
+ }
1305
1642
  function ensureSearchIndexStatusRows(bundle) {
1306
1643
  const now = (/* @__PURE__ */ new Date()).toISOString();
1307
1644
  const stmt = prepare(
1308
1645
  bundle.db,
1309
1646
  `INSERT OR IGNORE INTO search_index_status (
1310
- engine, status, source_doc_count, indexed_doc_count, updated_at, error_message
1311
- ) VALUES (?, ?, 0, 0, ?, NULL)`
1647
+ engine, status, source_doc_count, indexed_doc_count, updated_at,
1648
+ error_message, last_indexed_rowid, schema_fingerprint
1649
+ ) VALUES (?, ?, 0, 0, ?, NULL, NULL, NULL)`
1312
1650
  );
1313
1651
  stmt.run("fts5", "ready", now);
1314
1652
  stmt.run("tantivy", "missing", now);
1315
1653
  }
1316
1654
  function updateSearchIndexStatus(bundle, engine, values) {
1317
1655
  ensureSearchIndexStatusRows(bundle);
1318
- prepare(
1319
- bundle.db,
1320
- `UPDATE search_index_status
1321
- SET status = ?,
1322
- source_doc_count = ?,
1323
- indexed_doc_count = ?,
1324
- updated_at = ?,
1325
- error_message = ?
1326
- WHERE engine = ?`
1327
- ).run(
1656
+ const setClauses = [
1657
+ "status = ?",
1658
+ "source_doc_count = ?",
1659
+ "indexed_doc_count = ?",
1660
+ "updated_at = ?",
1661
+ "error_message = ?"
1662
+ ];
1663
+ const params = [
1328
1664
  values.status,
1329
1665
  values.sourceDocCount,
1330
1666
  values.indexedDocCount,
1331
1667
  (/* @__PURE__ */ new Date()).toISOString(),
1332
- values.errorMessage,
1333
- engine
1334
- );
1668
+ values.errorMessage
1669
+ ];
1670
+ if (values.lastIndexedRowid !== void 0) {
1671
+ setClauses.push("last_indexed_rowid = ?");
1672
+ params.push(values.lastIndexedRowid);
1673
+ }
1674
+ if (values.schemaFingerprint !== void 0) {
1675
+ setClauses.push("schema_fingerprint = ?");
1676
+ params.push(values.schemaFingerprint);
1677
+ }
1678
+ params.push(engine);
1679
+ prepare(
1680
+ bundle.db,
1681
+ `UPDATE search_index_status SET ${setClauses.join(", ")} WHERE engine = ?`
1682
+ ).run(...params);
1335
1683
  }
1336
1684
  function countSearchDocs(bundle) {
1337
1685
  return bundle.db.prepare(`SELECT count(*) AS n FROM search_docs`).get()?.n ?? 0;
@@ -1349,7 +1697,7 @@ function searchFullText(bundle, options) {
1349
1697
  if (options.engine === "tantivy") {
1350
1698
  return searchTantivy(bundle, options);
1351
1699
  }
1352
- const limit = clampLimit(options.limit, { max: 500, fallback: 50 });
1700
+ const limit2 = clampLimit(options.limit, { max: 500, fallback: 50 });
1353
1701
  const sql = `
1354
1702
  SELECT d.doc_id,
1355
1703
  d.entity_type,
@@ -1364,14 +1712,14 @@ function searchFullText(bundle, options) {
1364
1712
  JOIN search_docs d ON d.rowid = search_docs_fts.rowid
1365
1713
  WHERE search_docs_fts MATCH ?
1366
1714
  ORDER BY bm25(search_docs_fts), d.timestamp DESC
1367
- LIMIT ${limit}
1715
+ LIMIT ${limit2}
1368
1716
  `;
1369
1717
  const ftsQuery = options.raw ? options.query : escapeFtsQuery(options.query);
1370
1718
  if (!ftsQuery) return [];
1371
1719
  return bundle.db.prepare(sql).all(ftsQuery);
1372
1720
  }
1373
1721
  function searchTantivy(bundle, options) {
1374
- if (!existsSync(bundle.paths.tantivy)) {
1722
+ if (!existsSync2(bundle.paths.tantivy)) {
1375
1723
  throw new Error("tantivy index not found; run `prosa index tantivy` first");
1376
1724
  }
1377
1725
  const status = getSearchIndexStatus(bundle, "tantivy");
@@ -1380,7 +1728,7 @@ function searchTantivy(bundle, options) {
1380
1728
  `tantivy index is ${status?.status ?? "missing"}; run \`prosa index tantivy\` first`
1381
1729
  );
1382
1730
  }
1383
- const limit = clampLimit(options.limit, { max: 500, fallback: 50 });
1731
+ const limit2 = clampLimit(options.limit, { max: 500, fallback: 50 });
1384
1732
  const queryText = options.query.trim();
1385
1733
  if (!queryText) return [];
1386
1734
  const tantivy = requireTantivy();
@@ -1389,7 +1737,7 @@ function searchTantivy(bundle, options) {
1389
1737
  const [query] = options.raw ? [index.parseQuery(queryText, ["text"])] : index.parseQueryLenient(queryText, ["text"], void 0, {
1390
1738
  text: [true, 2, true]
1391
1739
  });
1392
- const result = searcher.search(query, limit, true);
1740
+ const result = searcher.search(query, limit2, true);
1393
1741
  const snippets = tantivy.SnippetGenerator.create(searcher, query, index.schema, "text");
1394
1742
  snippets.setMaxNumChars(180);
1395
1743
  return result.hits.map((hit) => {
@@ -5079,6 +5427,13 @@ var PARQUET_TABLES = [
5079
5427
  "edges",
5080
5428
  "search_docs"
5081
5429
  ];
5430
+ var ANALYTICS_VIEWS = [
5431
+ "session_facts",
5432
+ "tool_usage_facts",
5433
+ "error_facts",
5434
+ "model_usage",
5435
+ "project_activity"
5436
+ ];
5082
5437
  async function exportBundleParquet(options) {
5083
5438
  const snapshot = await openBundleSnapshot(options.bundlePath);
5084
5439
  const outDir = path13.resolve(options.outDir ?? snapshot.defaultOutDir);
@@ -5095,7 +5450,7 @@ async function exportBundleParquet(options) {
5095
5450
  await attachSqlite(connection, snapshot.dbPath);
5096
5451
  for (const table of PARQUET_TABLES) {
5097
5452
  await connection.run(
5098
- `COPY (SELECT * FROM prosa.${quoteIdentifier(table)}) TO ${sqlString(files[table])} (FORMAT parquet)`
5453
+ `COPY (SELECT * FROM prosa.${quoteIdentifier(table)}) TO ${sqlString(files[table])} (FORMAT parquet, COMPRESSION zstd, COMPRESSION_LEVEL 1, ROW_GROUP_SIZE 100000)`
5099
5454
  );
5100
5455
  }
5101
5456
  } finally {
@@ -5131,6 +5486,7 @@ async function queryDuckDbParquet(options) {
5131
5486
  )})`
5132
5487
  );
5133
5488
  }
5489
+ await createAnalyticsViews(connection);
5134
5490
  const reader = await connection.runAndReadAll(options.sql);
5135
5491
  return {
5136
5492
  columns: reader.deduplicatedColumnNames(),
@@ -5161,6 +5517,285 @@ async function attachSqlite(connection, dbPath) {
5161
5517
  );
5162
5518
  }
5163
5519
  }
5520
+ async function createAnalyticsViews(connection) {
5521
+ await connection.run(`
5522
+ CREATE OR REPLACE VIEW session_facts AS
5523
+ WITH turn_counts AS (
5524
+ SELECT session_id, count(*) AS turn_count
5525
+ FROM turns
5526
+ GROUP BY session_id
5527
+ ),
5528
+ message_counts AS (
5529
+ SELECT session_id,
5530
+ count(*) AS message_count,
5531
+ sum(CASE WHEN role = 'user' THEN 1 ELSE 0 END) AS user_message_count,
5532
+ sum(CASE WHEN role = 'assistant' THEN 1 ELSE 0 END) AS assistant_message_count
5533
+ FROM messages
5534
+ GROUP BY session_id
5535
+ ),
5536
+ tool_call_counts AS (
5537
+ SELECT session_id,
5538
+ count(*) AS tool_call_count,
5539
+ sum(CASE WHEN status = 'error' THEN 1 ELSE 0 END) AS tool_call_error_count
5540
+ FROM tool_calls
5541
+ GROUP BY session_id
5542
+ ),
5543
+ tool_result_counts AS (
5544
+ SELECT session_id,
5545
+ count(*) AS tool_result_count,
5546
+ sum(CASE WHEN is_error = 1 OR (exit_code IS NOT NULL AND exit_code <> 0)
5547
+ THEN 1 ELSE 0 END) AS tool_result_error_count,
5548
+ sum(COALESCE(duration_ms, 0)) AS tool_duration_ms
5549
+ FROM tool_results
5550
+ GROUP BY session_id
5551
+ ),
5552
+ search_doc_counts AS (
5553
+ SELECT session_id, count(*) AS search_doc_count
5554
+ FROM search_docs
5555
+ WHERE session_id IS NOT NULL
5556
+ GROUP BY session_id
5557
+ )
5558
+ SELECT s.session_id,
5559
+ s.source_tool,
5560
+ s.source_session_id,
5561
+ s.project_id,
5562
+ p.display_name AS project_name,
5563
+ p.canonical_path AS project_path,
5564
+ s.parent_session_id,
5565
+ s.is_subagent,
5566
+ s.agent_role,
5567
+ s.agent_nickname,
5568
+ s.title,
5569
+ s.start_ts,
5570
+ s.end_ts,
5571
+ CASE
5572
+ WHEN s.start_ts IS NOT NULL AND s.end_ts IS NOT NULL
5573
+ THEN date_diff('millisecond', TRY_CAST(s.start_ts AS TIMESTAMP),
5574
+ TRY_CAST(s.end_ts AS TIMESTAMP)) / 1000.0
5575
+ ELSE NULL
5576
+ END AS duration_seconds,
5577
+ s.cwd_initial,
5578
+ s.git_branch_initial,
5579
+ s.model_first,
5580
+ s.model_last,
5581
+ s.status,
5582
+ s.timeline_confidence,
5583
+ sf.path AS source_file_path,
5584
+ COALESCE(tc.turn_count, 0) AS turn_count,
5585
+ COALESCE(mc.message_count, 0) AS message_count,
5586
+ COALESCE(mc.user_message_count, 0) AS user_message_count,
5587
+ COALESCE(mc.assistant_message_count, 0) AS assistant_message_count,
5588
+ COALESCE(tcc.tool_call_count, 0) AS tool_call_count,
5589
+ COALESCE(trc.tool_result_count, 0) AS tool_result_count,
5590
+ COALESCE(tcc.tool_call_error_count, 0)
5591
+ + COALESCE(trc.tool_result_error_count, 0) AS tool_error_count,
5592
+ COALESCE(trc.tool_duration_ms, 0) AS tool_duration_ms,
5593
+ COALESCE(sdc.search_doc_count, 0) AS search_doc_count
5594
+ FROM sessions s
5595
+ LEFT JOIN projects p ON p.project_id = s.project_id
5596
+ LEFT JOIN raw_records rr ON rr.raw_record_id = s.raw_record_id
5597
+ LEFT JOIN source_files sf ON sf.source_file_id = rr.source_file_id
5598
+ LEFT JOIN turn_counts tc ON tc.session_id = s.session_id
5599
+ LEFT JOIN message_counts mc ON mc.session_id = s.session_id
5600
+ LEFT JOIN tool_call_counts tcc ON tcc.session_id = s.session_id
5601
+ LEFT JOIN tool_result_counts trc ON trc.session_id = s.session_id
5602
+ LEFT JOIN search_doc_counts sdc ON sdc.session_id = s.session_id
5603
+ `);
5604
+ await connection.run(`
5605
+ CREATE OR REPLACE VIEW tool_usage_facts AS
5606
+ WITH result_rollup AS (
5607
+ SELECT tool_call_id,
5608
+ session_id,
5609
+ count(*) AS tool_result_count,
5610
+ max(status) AS result_status,
5611
+ max(is_error) AS is_error,
5612
+ min(exit_code) AS exit_code,
5613
+ sum(COALESCE(duration_ms, 0)) AS duration_ms,
5614
+ max(preview) AS preview
5615
+ FROM tool_results
5616
+ GROUP BY tool_call_id, session_id
5617
+ )
5618
+ SELECT tc.tool_call_id,
5619
+ tc.session_id,
5620
+ s.source_tool,
5621
+ s.source_session_id,
5622
+ s.project_id,
5623
+ p.display_name AS project_name,
5624
+ p.canonical_path AS project_path,
5625
+ tc.turn_id,
5626
+ tc.message_id,
5627
+ tc.event_id,
5628
+ tc.source_call_id,
5629
+ tc.tool_name,
5630
+ tc.canonical_tool_type,
5631
+ tc.command,
5632
+ tc.cwd,
5633
+ tc.path,
5634
+ tc.query,
5635
+ tc.timestamp_start,
5636
+ tc.timestamp_end,
5637
+ CASE
5638
+ WHEN tc.timestamp_start IS NOT NULL AND tc.timestamp_end IS NOT NULL
5639
+ THEN date_diff('millisecond', TRY_CAST(tc.timestamp_start AS TIMESTAMP),
5640
+ TRY_CAST(tc.timestamp_end AS TIMESTAMP)) / 1000.0
5641
+ ELSE NULL
5642
+ END AS call_duration_seconds,
5643
+ tc.status AS call_status,
5644
+ rr.result_status,
5645
+ COALESCE(rr.is_error, 0) AS is_error,
5646
+ rr.exit_code,
5647
+ rr.duration_ms AS result_duration_ms,
5648
+ COALESCE(rr.tool_result_count, 0) AS tool_result_count,
5649
+ rr.preview,
5650
+ tc.raw_record_id
5651
+ FROM tool_calls tc
5652
+ LEFT JOIN sessions s ON s.session_id = tc.session_id
5653
+ LEFT JOIN projects p ON p.project_id = s.project_id
5654
+ LEFT JOIN result_rollup rr ON rr.tool_call_id = tc.tool_call_id
5655
+ `);
5656
+ await connection.run(`
5657
+ CREATE OR REPLACE VIEW error_facts AS
5658
+ SELECT 'tool_result:' || tr.tool_result_id AS error_id,
5659
+ 'tool_result' AS error_category,
5660
+ s.source_tool,
5661
+ s.project_id,
5662
+ p.display_name AS project_name,
5663
+ tr.session_id,
5664
+ COALESCE(tc.timestamp_end, tc.timestamp_start) AS timestamp,
5665
+ tc.tool_name,
5666
+ tc.canonical_tool_type,
5667
+ COALESCE(tr.status, tc.status) AS status,
5668
+ tr.exit_code,
5669
+ NULL AS message,
5670
+ tr.preview,
5671
+ NULL AS entity_type,
5672
+ NULL AS entity_id,
5673
+ tr.raw_record_id
5674
+ FROM tool_results tr
5675
+ LEFT JOIN tool_calls tc ON tc.tool_call_id = tr.tool_call_id
5676
+ LEFT JOIN sessions s ON s.session_id = tr.session_id
5677
+ LEFT JOIN projects p ON p.project_id = s.project_id
5678
+ WHERE tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
5679
+ UNION ALL
5680
+ SELECT 'import_error:' || CAST(ie.error_id AS VARCHAR) AS error_id,
5681
+ 'import_error' AS error_category,
5682
+ COALESCE(rr.source_tool, ib.source_tool) AS source_tool,
5683
+ NULL AS project_id,
5684
+ NULL AS project_name,
5685
+ NULL AS session_id,
5686
+ ie.occurred_at AS timestamp,
5687
+ NULL AS tool_name,
5688
+ NULL AS canonical_tool_type,
5689
+ ie.kind AS status,
5690
+ NULL AS exit_code,
5691
+ ie.message,
5692
+ NULL AS preview,
5693
+ NULL AS entity_type,
5694
+ NULL AS entity_id,
5695
+ ie.raw_record_id
5696
+ FROM import_errors ie
5697
+ LEFT JOIN import_batches ib ON ib.batch_id = ie.batch_id
5698
+ LEFT JOIN raw_records rr ON rr.raw_record_id = ie.raw_record_id
5699
+ UNION ALL
5700
+ SELECT 'uncertainty:' || CAST(u.uncertainty_id AS VARCHAR) AS error_id,
5701
+ 'uncertainty' AS error_category,
5702
+ NULL AS source_tool,
5703
+ NULL AS project_id,
5704
+ NULL AS project_name,
5705
+ CASE WHEN u.entity_type = 'session' THEN u.entity_id ELSE NULL END AS session_id,
5706
+ NULL AS timestamp,
5707
+ NULL AS tool_name,
5708
+ NULL AS canonical_tool_type,
5709
+ u.reason AS status,
5710
+ NULL AS exit_code,
5711
+ u.reason AS message,
5712
+ NULL AS preview,
5713
+ u.entity_type,
5714
+ u.entity_id,
5715
+ NULL AS raw_record_id
5716
+ FROM uncertainties u
5717
+ `);
5718
+ await connection.run(`
5719
+ CREATE OR REPLACE VIEW model_usage AS
5720
+ WITH model_events AS (
5721
+ SELECT s.source_tool,
5722
+ s.project_id,
5723
+ p.display_name AS project_name,
5724
+ p.canonical_path AS project_path,
5725
+ s.session_id,
5726
+ NULL AS turn_id,
5727
+ s.model_first AS model,
5728
+ s.start_ts AS timestamp,
5729
+ 'session_first' AS observation_type
5730
+ FROM sessions s
5731
+ LEFT JOIN projects p ON p.project_id = s.project_id
5732
+ WHERE s.model_first IS NOT NULL
5733
+ UNION ALL
5734
+ SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
5735
+ s.session_id, NULL AS turn_id, s.model_last AS model, s.end_ts AS timestamp,
5736
+ 'session_last' AS observation_type
5737
+ FROM sessions s
5738
+ LEFT JOIN projects p ON p.project_id = s.project_id
5739
+ WHERE s.model_last IS NOT NULL
5740
+ UNION ALL
5741
+ SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
5742
+ t.session_id, t.turn_id, t.model, t.start_ts AS timestamp, 'turn' AS observation_type
5743
+ FROM turns t
5744
+ LEFT JOIN sessions s ON s.session_id = t.session_id
5745
+ LEFT JOIN projects p ON p.project_id = s.project_id
5746
+ WHERE t.model IS NOT NULL
5747
+ UNION ALL
5748
+ SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
5749
+ m.session_id, m.turn_id, m.model, m.timestamp, 'message' AS observation_type
5750
+ FROM messages m
5751
+ LEFT JOIN sessions s ON s.session_id = m.session_id
5752
+ LEFT JOIN projects p ON p.project_id = s.project_id
5753
+ WHERE m.model IS NOT NULL
5754
+ )
5755
+ SELECT source_tool,
5756
+ project_id,
5757
+ project_name,
5758
+ project_path,
5759
+ model,
5760
+ count(DISTINCT session_id) AS session_count,
5761
+ count(DISTINCT turn_id) AS turn_count,
5762
+ count(*) AS observation_count,
5763
+ sum(CASE WHEN observation_type = 'message' THEN 1 ELSE 0 END) AS message_count,
5764
+ min(timestamp) AS first_seen_ts,
5765
+ max(timestamp) AS last_seen_ts
5766
+ FROM model_events
5767
+ GROUP BY source_tool, project_id, project_name, project_path, model
5768
+ `);
5769
+ await connection.run(`
5770
+ CREATE OR REPLACE VIEW project_activity AS
5771
+ SELECT s.source_tool,
5772
+ s.project_id,
5773
+ COALESCE(p.display_name, s.cwd_initial, '(unknown)') AS project_name,
5774
+ p.canonical_path AS project_path,
5775
+ min(s.start_ts) AS first_session_ts,
5776
+ max(COALESCE(s.end_ts, s.start_ts)) AS latest_session_ts,
5777
+ count(DISTINCT s.session_id) AS session_count,
5778
+ count(DISTINCT CASE WHEN s.timeline_confidence = 'low' THEN s.session_id END)
5779
+ AS low_confidence_session_count,
5780
+ count(DISTINCT t.turn_id) AS turn_count,
5781
+ count(DISTINCT m.message_id) AS message_count,
5782
+ count(DISTINCT tc.tool_call_id) AS tool_call_count,
5783
+ count(DISTINCT tr.tool_result_id) AS tool_result_count,
5784
+ count(DISTINCT CASE
5785
+ WHEN tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
5786
+ THEN tr.tool_result_id
5787
+ END) AS tool_error_count,
5788
+ count(DISTINCT sd.doc_id) AS search_doc_count
5789
+ FROM sessions s
5790
+ LEFT JOIN projects p ON p.project_id = s.project_id
5791
+ LEFT JOIN turns t ON t.session_id = s.session_id
5792
+ LEFT JOIN messages m ON m.session_id = s.session_id
5793
+ LEFT JOIN tool_calls tc ON tc.session_id = s.session_id
5794
+ LEFT JOIN tool_results tr ON tr.session_id = s.session_id
5795
+ LEFT JOIN search_docs sd ON sd.session_id = s.session_id
5796
+ GROUP BY s.source_tool, s.project_id, p.display_name, s.cwd_initial, p.canonical_path
5797
+ `);
5798
+ }
5164
5799
  async function openBundleSnapshot(bundlePath) {
5165
5800
  const bundle = await openBundle(bundlePath);
5166
5801
  try {
@@ -5236,16 +5871,16 @@ function resolveCompilePath(p) {
5236
5871
  return path14.resolve(p);
5237
5872
  }
5238
5873
  async function runCompileImports(options) {
5239
- const { bundle, providers, deferIndex, logger } = options;
5874
+ const { bundle, providers, logger } = options;
5875
+ const overwrite = options.overwrite === true;
5240
5876
  let importedAny = false;
5241
5877
  const summaries = [];
5242
5878
  let tantivy = null;
5243
5879
  let tantivyError = null;
5880
+ let fts5Error = null;
5244
5881
  try {
5245
- if (deferIndex) {
5246
- logger?.info("disabling FTS5 triggers for deferred indexing");
5247
- disableFts5Triggers(bundle);
5248
- }
5882
+ logger?.info("disabling FTS5 triggers for bulk rebuild");
5883
+ disableFts5Triggers(bundle);
5249
5884
  for (const provider of providers) {
5250
5885
  const sourcePath = resolveCompilePath(options.sessionsPath ?? provider.defaultSessionsPath());
5251
5886
  const providerLogger = logger?.child({
@@ -5272,15 +5907,23 @@ async function runCompileImports(options) {
5272
5907
  summaries.push(summary);
5273
5908
  options.onProviderComplete?.(summary);
5274
5909
  }
5275
- logger?.info({ changed: importedAny, fts5_deferred: deferIndex }, "marking indexes");
5276
- markIndexesAfterImport(bundle, {
5277
- changed: importedAny,
5278
- fts5Deferred: deferIndex
5279
- });
5280
- if (importedAny) {
5910
+ const shouldRebuildIndexes = importedAny || overwrite;
5911
+ if (shouldRebuildIndexes) {
5912
+ logger?.info(
5913
+ { changed: importedAny, overwrite },
5914
+ importedAny ? "marking indexes" : "overwrite forces rebuild despite no new imports"
5915
+ );
5916
+ markIndexesAfterImport(bundle, { changed: true });
5917
+ try {
5918
+ logger?.info("rebuilding fts5 index");
5919
+ rebuildFts5Index(bundle);
5920
+ } catch (error) {
5921
+ fts5Error = getErrorMessage(error);
5922
+ logger?.error({ err: error }, "fts5 rebuild failed; SQLite data is intact");
5923
+ }
5281
5924
  try {
5282
- logger?.info("rebuilding tantivy index");
5283
- const status = await rebuildTantivyIndex(bundle);
5925
+ logger?.info({ overwrite }, "rebuilding tantivy index");
5926
+ const status = await rebuildTantivyIndex(bundle, { overwrite });
5284
5927
  tantivy = { indexedDocCount: status.indexed_doc_count };
5285
5928
  options.onTantivyComplete?.(tantivy);
5286
5929
  } catch (error) {
@@ -5289,16 +5932,14 @@ async function runCompileImports(options) {
5289
5932
  }
5290
5933
  }
5291
5934
  } finally {
5292
- if (deferIndex) {
5293
- logger?.info("re-enabling FTS5 triggers");
5294
- enableFts5Triggers(bundle);
5295
- }
5935
+ enableFts5Triggers(bundle);
5296
5936
  }
5297
5937
  return {
5298
5938
  providers: summaries,
5299
5939
  importedAny,
5300
5940
  tantivy,
5301
- tantivyError
5941
+ tantivyError,
5942
+ fts5Error
5302
5943
  };
5303
5944
  }
5304
5945
  async function exportCompileParquet(options) {
@@ -5314,6 +5955,251 @@ async function exportCompileParquet(options) {
5314
5955
  };
5315
5956
  }
5316
5957
 
5958
+ // src/services/analytics.ts
5959
+ var ANALYTICS_REPORTS = ["sessions", "tools", "errors", "models", "projects"];
5960
+ async function runAnalyticsReport(options) {
5961
+ return queryDuckDbParquet({
5962
+ parquetDir: options.parquetDir,
5963
+ sql: buildAnalyticsSql(options.report, options.filters ?? {}, "duckdb")
5964
+ });
5965
+ }
5966
+ function runAnalyticsReportFromBundle(options) {
5967
+ const sql = buildAnalyticsSql(options.report, options.filters ?? {}, "sqlite");
5968
+ const stmt = options.bundle.db.prepare(sql);
5969
+ const rows = stmt.all();
5970
+ const columns = stmt.columns().map((column) => column.name);
5971
+ return { columns, rows };
5972
+ }
5973
+ function buildAnalyticsSql(report, filters, dialect) {
5974
+ switch (report) {
5975
+ case "sessions":
5976
+ return buildSessionsSql(filters, dialect);
5977
+ case "tools":
5978
+ return buildToolsSql(filters, dialect);
5979
+ case "errors":
5980
+ return buildErrorsSql(filters, dialect);
5981
+ case "models":
5982
+ return buildModelsSql(filters, dialect);
5983
+ case "projects":
5984
+ return buildProjectsSql(filters, dialect);
5985
+ }
5986
+ }
5987
+ function buildSessionsSql(filters, dialect) {
5988
+ const where = buildWhere([
5989
+ sourceFilter(filters),
5990
+ timeFilter("start_ts", filters),
5991
+ projectFilter(filters, dialect),
5992
+ filters.sessionId ? `session_id = ${sqlString2(filters.sessionId)}` : null,
5993
+ filters.sourcePathSubstring ? `source_file_path LIKE ${sqlString2(`%${escapeLike(filters.sourcePathSubstring)}%`)} ESCAPE '\\'` : null
5994
+ ]);
5995
+ return `
5996
+ SELECT start_ts, source_tool, project_name, source_file_path, session_id,
5997
+ source_session_id, model_last, duration_seconds,
5998
+ message_count, tool_call_count, tool_result_count, tool_error_count,
5999
+ tool_duration_ms, timeline_confidence, title
6000
+ FROM session_facts
6001
+ ${where}
6002
+ ORDER BY start_ts DESC NULLS LAST
6003
+ LIMIT ${limit(filters)}
6004
+ `;
6005
+ }
6006
+ function buildToolsSql(filters, dialect) {
6007
+ const where = buildWhere([
6008
+ sourceFilter(filters),
6009
+ timeFilter("timestamp_start", filters),
6010
+ projectFilter(filters, dialect),
6011
+ filters.toolName ? `tool_name = ${sqlString2(filters.toolName)}` : null,
6012
+ filters.canonicalType ? `canonical_tool_type = ${sqlString2(filters.canonicalType)}` : null,
6013
+ filters.errorsOnly ? `(is_error = 1 OR call_status = 'error')` : null
6014
+ ]);
6015
+ return `
6016
+ SELECT tool_name, canonical_tool_type, source_tool, project_name,
6017
+ count(*) AS call_count,
6018
+ sum(CASE WHEN is_error = 1 OR call_status = 'error' THEN 1 ELSE 0 END) AS error_count,
6019
+ round(avg(result_duration_ms), 3) AS avg_result_duration_ms,
6020
+ max(timestamp_start) AS latest_ts
6021
+ FROM tool_usage_facts
6022
+ ${where}
6023
+ GROUP BY tool_name, canonical_tool_type, source_tool, project_name
6024
+ ORDER BY call_count DESC, error_count DESC, tool_name ASC
6025
+ LIMIT ${limit(filters)}
6026
+ `;
6027
+ }
6028
+ function buildErrorsSql(filters, dialect) {
6029
+ const where = buildWhere([
6030
+ sourceFilter(filters),
6031
+ timeFilter("timestamp", filters),
6032
+ projectFilter(filters, dialect),
6033
+ filters.toolName ? `tool_name = ${sqlString2(filters.toolName)}` : null,
6034
+ filters.category ? `error_category = ${sqlString2(filters.category)}` : null
6035
+ ]);
6036
+ return `
6037
+ SELECT timestamp, error_category, source_tool, project_name, session_id,
6038
+ tool_name, status, exit_code, message, preview
6039
+ FROM error_facts
6040
+ ${where}
6041
+ ORDER BY timestamp DESC NULLS LAST, error_id DESC
6042
+ LIMIT ${limit(filters)}
6043
+ `;
6044
+ }
6045
+ function buildModelsSql(filters, dialect) {
6046
+ const where = buildWhere([
6047
+ sourceFilter(filters),
6048
+ rangeOverlapFilter("first_seen_ts", "last_seen_ts", filters),
6049
+ projectFilter(filters, dialect),
6050
+ filters.model ? `model = ${sqlString2(filters.model)}` : null
6051
+ ]);
6052
+ return `
6053
+ SELECT model, source_tool, project_name, session_count, turn_count,
6054
+ message_count, observation_count, first_seen_ts, last_seen_ts
6055
+ FROM model_usage
6056
+ ${where}
6057
+ ORDER BY session_count DESC, observation_count DESC, model ASC
6058
+ LIMIT ${limit(filters)}
6059
+ `;
6060
+ }
6061
+ function buildProjectsSql(filters, dialect) {
6062
+ const where = buildWhere([
6063
+ sourceFilter(filters),
6064
+ rangeOverlapFilter("first_session_ts", "latest_session_ts", filters),
6065
+ projectFilter(filters, dialect)
6066
+ ]);
6067
+ return `
6068
+ SELECT latest_session_ts, source_tool, project_name, project_path,
6069
+ session_count, message_count, tool_call_count, tool_error_count,
6070
+ low_confidence_session_count
6071
+ FROM project_activity
6072
+ ${where}
6073
+ ORDER BY latest_session_ts DESC NULLS LAST, session_count DESC, project_name ASC
6074
+ LIMIT ${limit(filters)}
6075
+ `;
6076
+ }
6077
+ function sourceFilter(filters) {
6078
+ return filters.source ? `source_tool = ${sqlString2(filters.source)}` : null;
6079
+ }
6080
+ function timeFilter(column, filters) {
6081
+ const filtersSql = [];
6082
+ if (filters.since)
6083
+ filtersSql.push(`(${column} IS NULL OR ${column} >= ${sqlString2(filters.since)})`);
6084
+ if (filters.until)
6085
+ filtersSql.push(`(${column} IS NULL OR ${column} < ${sqlString2(filters.until)})`);
6086
+ return filtersSql.length ? filtersSql.join(" AND ") : null;
6087
+ }
6088
+ function rangeOverlapFilter(firstColumn, lastColumn, filters) {
6089
+ const filtersSql = [];
6090
+ if (filters.since) {
6091
+ filtersSql.push(`(${lastColumn} IS NULL OR ${lastColumn} >= ${sqlString2(filters.since)})`);
6092
+ }
6093
+ if (filters.until) {
6094
+ filtersSql.push(`(${firstColumn} IS NULL OR ${firstColumn} < ${sqlString2(filters.until)})`);
6095
+ }
6096
+ return filtersSql.length ? filtersSql.join(" AND ") : null;
6097
+ }
6098
+ function projectFilter(filters, dialect) {
6099
+ if (!filters.project) return null;
6100
+ const exact = sqlString2(filters.project);
6101
+ const like = sqlString2(`%${escapeLike(filters.project)}%`);
6102
+ const op = dialect === "duckdb" ? "ILIKE" : "LIKE";
6103
+ return `(project_id = ${exact} OR project_name ${op} ${like} ESCAPE '\\' OR project_path ${op} ${like} ESCAPE '\\')`;
6104
+ }
6105
+ function buildWhere(filters) {
6106
+ const active = filters.filter((filter) => Boolean(filter));
6107
+ return active.length ? `WHERE ${active.join(" AND ")}` : "";
6108
+ }
6109
+ function limit(filters) {
6110
+ const value = Number.isFinite(filters.limit) ? filters.limit : void 0;
6111
+ return clampLimit(value, { max: 500, fallback: 50 });
6112
+ }
6113
+ function sqlString2(value) {
6114
+ return `'${value.replace(/'/g, "''")}'`;
6115
+ }
6116
+ function escapeLike(value) {
6117
+ return value.replace(/[\\%_]/g, (match) => `\\${match}`);
6118
+ }
6119
+
6120
+ // src/services/tool_calls.ts
6121
+ function listToolCalls(bundle, filters = {}) {
6122
+ const conds = [];
6123
+ const params = [];
6124
+ if (filters.toolName) {
6125
+ conds.push("tc.tool_name = ?");
6126
+ params.push(filters.toolName);
6127
+ }
6128
+ if (filters.canonicalType) {
6129
+ conds.push("tc.canonical_tool_type = ?");
6130
+ params.push(filters.canonicalType);
6131
+ }
6132
+ if (filters.sessionId) {
6133
+ conds.push("tc.session_id = ?");
6134
+ params.push(filters.sessionId);
6135
+ }
6136
+ if (filters.errorsOnly) {
6137
+ conds.push("(tr.is_error = 1 OR tc.status = ?)");
6138
+ params.push("error");
6139
+ }
6140
+ if (filters.pathSubstring) {
6141
+ conds.push("tc.path IS NOT NULL AND tc.path LIKE ?");
6142
+ params.push(`%${filters.pathSubstring}%`);
6143
+ }
6144
+ if (filters.sinceIso) {
6145
+ conds.push("(tc.timestamp_start IS NULL OR tc.timestamp_start >= ?)");
6146
+ params.push(filters.sinceIso);
6147
+ }
6148
+ if (filters.untilIso) {
6149
+ conds.push("(tc.timestamp_start IS NULL OR tc.timestamp_start < ?)");
6150
+ params.push(filters.untilIso);
6151
+ }
6152
+ const where = conds.length ? `WHERE ${conds.join(" AND ")}` : "";
6153
+ const limit2 = clampLimit(filters.limit, { max: 500, fallback: 100 });
6154
+ const toolCallSql = `
6155
+ SELECT 'tool_call' AS entity_type,
6156
+ tc.session_id,
6157
+ tc.tool_call_id,
6158
+ NULL AS artifact_id,
6159
+ tc.tool_name,
6160
+ tc.canonical_tool_type,
6161
+ tc.command,
6162
+ tc.path,
6163
+ tc.status,
6164
+ tc.timestamp_start,
6165
+ tr.is_error,
6166
+ tr.exit_code,
6167
+ tr.preview
6168
+ FROM tool_calls tc
6169
+ LEFT JOIN tool_results tr ON tr.tool_call_id = tc.tool_call_id
6170
+ ${where}
6171
+ `;
6172
+ if (!filters.pathSubstring) {
6173
+ const sql2 = `${toolCallSql} ORDER BY tc.timestamp_start DESC LIMIT ${limit2}`;
6174
+ return bundle.db.prepare(sql2).all(...params);
6175
+ }
6176
+ const artifactSql = `
6177
+ SELECT 'artifact' AS entity_type,
6178
+ a.session_id,
6179
+ NULL AS tool_call_id,
6180
+ a.artifact_id,
6181
+ NULL AS tool_name,
6182
+ NULL AS canonical_tool_type,
6183
+ NULL AS command,
6184
+ a.path,
6185
+ NULL AS status,
6186
+ a.created_ts AS timestamp_start,
6187
+ NULL AS is_error,
6188
+ NULL AS exit_code,
6189
+ NULL AS preview
6190
+ FROM artifacts a
6191
+ WHERE a.path IS NOT NULL AND a.path LIKE ?
6192
+ `;
6193
+ const sql = `
6194
+ ${toolCallSql}
6195
+ UNION ALL
6196
+ ${artifactSql}
6197
+ ORDER BY timestamp_start DESC
6198
+ LIMIT ${limit2}
6199
+ `;
6200
+ return bundle.db.prepare(sql).all(...params, `%${filters.pathSubstring}%`);
6201
+ }
6202
+
5317
6203
  // src/services/export/markdown.ts
5318
6204
  async function exportSessionMarkdown(bundle, sessionId2) {
5319
6205
  const session = bundle.db.prepare(
@@ -5424,6 +6310,8 @@ function renderToolCall(c) {
5424
6310
  return lines.join("\n");
5425
6311
  }
5426
6312
  export {
6313
+ ANALYTICS_REPORTS,
6314
+ ANALYTICS_VIEWS,
5427
6315
  COMPILE_PROVIDERS,
5428
6316
  PARQUET_TABLES,
5429
6317
  PROSA_PARSER_VERSION,
@@ -5453,6 +6341,7 @@ export {
5453
6341
  getText,
5454
6342
  initBundle,
5455
6343
  listSessions,
6344
+ listToolCalls,
5456
6345
  markIndexesAfterImport,
5457
6346
  openBundle,
5458
6347
  openOrInitBundle,
@@ -5465,6 +6354,8 @@ export {
5465
6354
  recordError,
5466
6355
  registerSourceFile,
5467
6356
  resolveCompilePath,
6357
+ runAnalyticsReport,
6358
+ runAnalyticsReportFromBundle,
5468
6359
  runCompileImports,
5469
6360
  runMigrations,
5470
6361
  searchFullText,