@c3-oss/prosa 0.3.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/main.js CHANGED
@@ -12,8 +12,8 @@ var __export = (target, all) => {
12
12
 
13
13
  // src/core/db.ts
14
14
  import Database from "better-sqlite3";
15
- function openDb(path20) {
16
- const db = new Database(path20);
15
+ function openDb(path21) {
16
+ const db = new Database(path21);
17
17
  db.pragma("journal_mode = WAL");
18
18
  db.pragma("foreign_keys = ON");
19
19
  db.pragma("synchronous = NORMAL");
@@ -48,283 +48,30 @@ var init_db = __esm({
48
48
  }
49
49
  });
50
50
 
51
- // src/core/errors.ts
52
- var getErrorMessage;
53
- var init_errors = __esm({
54
- "src/core/errors.ts"() {
55
- "use strict";
56
- getErrorMessage = (err) => err instanceof Error ? err.message : String(err);
57
- }
58
- });
59
-
60
- // src/core/cas/compress.ts
61
- import { compress as zstdCompress, decompress as zstdDecompress } from "zstd-napi";
62
- function compressBytes(input) {
63
- if (input.byteLength < COMPRESS_THRESHOLD_BYTES) {
64
- return { bytes: Buffer.from(input), compression: "none" };
65
- }
66
- const out = zstdCompress(Buffer.from(input), { compressionLevel: ZSTD_LEVEL });
67
- return { bytes: out, compression: "zstd" };
68
- }
69
- function decompressBytes(input, compression) {
70
- if (compression === "none") return input;
71
- return zstdDecompress(input);
72
- }
73
- var COMPRESS_THRESHOLD_BYTES, ZSTD_LEVEL;
74
- var init_compress = __esm({
75
- "src/core/cas/compress.ts"() {
76
- "use strict";
77
- COMPRESS_THRESHOLD_BYTES = 256;
78
- ZSTD_LEVEL = 3;
79
- }
80
- });
81
-
82
- // src/core/cas/hash.ts
83
- import { createHash } from "crypto";
84
- import { blake3 } from "@noble/hashes/blake3";
85
- import { bytesToHex } from "@noble/hashes/utils";
86
- function blake3Hex(bytes) {
87
- return bytesToHex(blake3(bytes));
88
- }
89
- function sha256Hex(bytes) {
90
- return createHash("sha256").update(bytes).digest("hex");
91
- }
92
- function objectIdFromHash(hashHex) {
93
- return `blake3:${hashHex}`;
94
- }
95
- function objectStoragePath(hashHex, compression) {
96
- const ext = compression === "zstd" ? ".zst" : ".bin";
97
- const a = hashHex.slice(0, 2);
98
- const b = hashHex.slice(2, 4);
99
- return `objects/blake3/${a}/${b}/${hashHex}${ext}`;
51
+ // src/core/limits.ts
52
+ function clampLimit(value, opts) {
53
+ return Math.max(opts.min ?? 1, Math.min(opts.max, value ?? opts.fallback));
100
54
  }
101
- var init_hash = __esm({
102
- "src/core/cas/hash.ts"() {
55
+ var init_limits = __esm({
56
+ "src/core/limits.ts"() {
103
57
  "use strict";
104
58
  }
105
59
  });
106
60
 
107
- // src/core/cas/index.ts
108
- var cas_exports = {};
109
- __export(cas_exports, {
110
- createPendingObjects: () => createPendingObjects,
111
- ensureDir: () => ensureDir,
112
- flushPendingObjects: () => flushPendingObjects,
113
- getBytes: () => getBytes,
114
- getJson: () => getJson,
115
- getObjectMeta: () => getObjectMeta,
116
- getText: () => getText,
117
- putBytes: () => putBytes,
118
- putJson: () => putJson,
119
- putText: () => putText,
120
- stageBytes: () => stageBytes,
121
- stageJson: () => stageJson,
122
- stageText: () => stageText
123
- });
124
- import { mkdir as mkdir2, readFile as readFile2, writeFile as writeFile2 } from "fs/promises";
125
- import path2 from "path";
126
- async function ensureDir(absoluteDir) {
127
- if (ensuredDirs.has(absoluteDir)) return;
128
- await mkdir2(absoluteDir, { recursive: true });
129
- ensuredDirs.add(absoluteDir);
130
- }
131
- async function putBytes(bundle, bytes, options = {}) {
132
- const hash = blake3Hex(bytes);
133
- const objectId = objectIdFromHash(hash);
134
- const existing = prepare(
135
- bundle.db,
136
- `SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
137
- compression, mime_type, encoding, storage_path, created_at
138
- FROM objects WHERE object_id = ?`
139
- ).get(objectId);
140
- if (existing) return objectId;
141
- const { bytes: stored, compression } = compressBytes(bytes);
142
- const storagePath = objectStoragePath(hash, compression);
143
- const absolutePath = path2.join(bundle.path, storagePath);
144
- await ensureDir(path2.dirname(absolutePath));
145
- await writeFile2(absolutePath, stored);
146
- prepare(
147
- bundle.db,
148
- `INSERT INTO objects (
149
- object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
150
- compression, mime_type, encoding, storage_path, created_at
151
- ) VALUES (?, 'blake3', ?, ?, ?, ?, ?, ?, ?, ?)`
152
- ).run(
153
- objectId,
154
- hash,
155
- bytes.byteLength,
156
- compression === "zstd" ? stored.byteLength : null,
157
- compression,
158
- options.mimeType ?? null,
159
- options.encoding ?? null,
160
- storagePath,
161
- (/* @__PURE__ */ new Date()).toISOString()
162
- );
163
- return objectId;
164
- }
165
- async function putText(bundle, text, options = {}) {
166
- const buf = Buffer.from(text, "utf8");
167
- return putBytes(bundle, buf, {
168
- mimeType: options.mimeType ?? "text/plain; charset=utf-8",
169
- encoding: "utf-8"
170
- });
171
- }
172
- async function putJson(bundle, value) {
173
- const text = JSON.stringify(value);
174
- return putBytes(bundle, Buffer.from(text, "utf8"), {
175
- mimeType: "application/json",
176
- encoding: "utf-8"
177
- });
178
- }
179
- async function getBytes(bundle, objectId) {
180
- const meta = prepare(
181
- bundle.db,
182
- `SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
183
- compression, mime_type, encoding, storage_path, created_at
184
- FROM objects WHERE object_id = ?`
185
- ).get(objectId);
186
- if (!meta) {
187
- throw new Error(`object not found: ${objectId}`);
188
- }
189
- const buf = await readFile2(path2.join(bundle.path, meta.storage_path));
190
- return decompressBytes(buf, meta.compression);
191
- }
192
- async function getText(bundle, objectId) {
193
- const buf = await getBytes(bundle, objectId);
194
- return buf.toString("utf8");
195
- }
196
- async function getJson(bundle, objectId) {
197
- const text = await getText(bundle, objectId);
198
- return JSON.parse(text);
199
- }
200
- function getObjectMeta(bundle, objectId) {
201
- return prepare(
202
- bundle.db,
203
- `SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
204
- compression, mime_type, encoding, storage_path, created_at
205
- FROM objects WHERE object_id = ?`
206
- ).get(objectId) ?? null;
207
- }
208
- function createPendingObjects() {
209
- return { byId: /* @__PURE__ */ new Map() };
210
- }
211
- function stageBytes(pending, bytes, options = {}) {
212
- const buf = Buffer.isBuffer(bytes) ? bytes : Buffer.from(bytes);
213
- const hash = blake3Hex(buf);
214
- const objectId = objectIdFromHash(hash);
215
- if (!pending.byId.has(objectId)) {
216
- pending.byId.set(objectId, {
217
- objectId,
218
- hash,
219
- bytes: buf,
220
- mimeType: options.mimeType ?? null,
221
- encoding: options.encoding ?? null
222
- });
223
- }
224
- return objectId;
225
- }
226
- function stageText(pending, text, options = {}) {
227
- return stageBytes(pending, Buffer.from(text, "utf8"), {
228
- mimeType: options.mimeType ?? "text/plain; charset=utf-8",
229
- encoding: "utf-8"
230
- });
231
- }
232
- function stageJson(pending, value) {
233
- return stageBytes(pending, Buffer.from(JSON.stringify(value), "utf8"), {
234
- mimeType: "application/json",
235
- encoding: "utf-8"
236
- });
237
- }
238
- async function flushPendingObjects(bundle, pending) {
239
- if (pending.byId.size === 0) return;
240
- const ids = [...pending.byId.keys()];
241
- const existingIds = queryExistingObjectIds(bundle, ids);
242
- const toWrite = [];
243
- for (const obj of pending.byId.values()) {
244
- if (existingIds.has(obj.objectId)) continue;
245
- const { bytes: compressedBytes, compression } = compressBytes(obj.bytes);
246
- const storagePath = objectStoragePath(obj.hash, compression);
247
- toWrite.push({
248
- staged: obj,
249
- compression,
250
- compressedBytes,
251
- storagePath,
252
- absolutePath: path2.join(bundle.path, storagePath)
253
- });
254
- }
255
- if (toWrite.length > 0) {
256
- await writeFilesParallel(toWrite);
257
- }
258
- const insertObject = prepare(
259
- bundle.db,
260
- `INSERT OR IGNORE INTO objects (
261
- object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
262
- compression, mime_type, encoding, storage_path, created_at
263
- ) VALUES (?, 'blake3', ?, ?, ?, ?, ?, ?, ?, ?)`
264
- );
265
- const now = (/* @__PURE__ */ new Date()).toISOString();
266
- for (const p of toWrite) {
267
- insertObject.run(
268
- p.staged.objectId,
269
- p.staged.hash,
270
- p.staged.bytes.byteLength,
271
- p.compression === "zstd" ? p.compressedBytes.byteLength : null,
272
- p.compression,
273
- p.staged.mimeType,
274
- p.staged.encoding,
275
- p.storagePath,
276
- now
277
- );
278
- }
279
- }
280
- function queryExistingObjectIds(bundle, ids) {
281
- const found = /* @__PURE__ */ new Set();
282
- if (ids.length === 0) return found;
283
- const CHUNK = 500;
284
- for (let start = 0; start < ids.length; start += CHUNK) {
285
- const slice = ids.slice(start, start + CHUNK);
286
- const placeholders = slice.map(() => "?").join(",");
287
- const rows = bundle.db.prepare(
288
- `SELECT object_id FROM objects WHERE object_id IN (${placeholders})`
289
- ).all(...slice);
290
- for (const row of rows) found.add(row.object_id);
291
- }
292
- return found;
293
- }
294
- async function writeFilesParallel(tasks) {
295
- let cursor = 0;
296
- const workers = [];
297
- const limit = Math.min(FS_WRITE_CONCURRENCY, tasks.length);
298
- for (let w = 0; w < limit; w++) {
299
- workers.push(
300
- (async () => {
301
- while (true) {
302
- const i = cursor++;
303
- if (i >= tasks.length) return;
304
- const task = tasks[i];
305
- await ensureDir(path2.dirname(task.absolutePath));
306
- await writeFile2(task.absolutePath, task.compressedBytes);
307
- }
308
- })()
309
- );
310
- }
311
- await Promise.all(workers);
312
- }
313
- var ensuredDirs, FS_WRITE_CONCURRENCY;
314
- var init_cas = __esm({
315
- "src/core/cas/index.ts"() {
61
+ // src/core/errors.ts
62
+ var getErrorMessage;
63
+ var init_errors = __esm({
64
+ "src/core/errors.ts"() {
316
65
  "use strict";
317
- init_db();
318
- init_compress();
319
- init_hash();
320
- ensuredDirs = /* @__PURE__ */ new Set();
321
- FS_WRITE_CONCURRENCY = 16;
66
+ getErrorMessage = (err) => err instanceof Error ? err.message : String(err);
322
67
  }
323
68
  });
324
69
 
325
70
  // src/services/indexing.ts
71
+ import { createHash as createHash2 } from "crypto";
72
+ import { existsSync } from "fs";
326
73
  import { mkdir as mkdir4, rm as rm2, writeFile as writeFile5 } from "fs/promises";
327
- import path13 from "path";
74
+ import path15 from "path";
328
75
  function enableFts5Triggers(bundle) {
329
76
  bundle.db.exec(FTS5_TRIGGER_SQL);
330
77
  }
@@ -338,7 +85,7 @@ function disableFts5Triggers(bundle) {
338
85
  function getSearchIndexStatuses(bundle) {
339
86
  ensureSearchIndexStatusRows(bundle);
340
87
  return bundle.db.prepare(
341
- `SELECT engine, status, source_doc_count, indexed_doc_count, updated_at, error_message
88
+ `SELECT ${SEARCH_INDEX_STATUS_COLUMNS}
342
89
  FROM search_index_status
343
90
  ORDER BY engine`
344
91
  ).all();
@@ -346,28 +93,13 @@ function getSearchIndexStatuses(bundle) {
346
93
  function getSearchIndexStatus(bundle, engine) {
347
94
  ensureSearchIndexStatusRows(bundle);
348
95
  return bundle.db.prepare(
349
- `SELECT engine, status, source_doc_count, indexed_doc_count, updated_at, error_message
96
+ `SELECT ${SEARCH_INDEX_STATUS_COLUMNS}
350
97
  FROM search_index_status
351
98
  WHERE engine = ?`
352
99
  ).get(engine) ?? null;
353
100
  }
354
101
  function markIndexesAfterImport(bundle, options) {
355
102
  if (!options.changed) return;
356
- if (options.fts5Deferred) {
357
- updateSearchIndexStatus(bundle, "fts5", {
358
- status: "stale",
359
- sourceDocCount: countSearchDocs(bundle),
360
- indexedDocCount: countFts5Docs(bundle),
361
- errorMessage: null
362
- });
363
- } else {
364
- updateSearchIndexStatus(bundle, "fts5", {
365
- status: "ready",
366
- sourceDocCount: countSearchDocs(bundle),
367
- indexedDocCount: countFts5Docs(bundle),
368
- errorMessage: null
369
- });
370
- }
371
103
  const tantivy = getSearchIndexStatus(bundle, "tantivy");
372
104
  if (tantivy?.status === "ready" || tantivy?.status === "stale" || tantivy?.status === "failed") {
373
105
  updateSearchIndexStatus(bundle, "tantivy", {
@@ -408,55 +140,93 @@ function rebuildFts5Index(bundle) {
408
140
  }
409
141
  return getSearchIndexStatus(bundle, "fts5");
410
142
  }
411
- async function rebuildTantivyIndex(bundle) {
143
+ function buildTantivySchema(tantivy) {
144
+ const builder = new tantivy.SchemaBuilder();
145
+ for (const field of TANTIVY_SCHEMA_FIELDS) {
146
+ if (field.tokenizer === "default") {
147
+ builder.addTextField(field.name, { stored: true });
148
+ } else {
149
+ builder.addTextField(field.name, { stored: true, tokenizerName: field.tokenizer });
150
+ }
151
+ }
152
+ return builder.build();
153
+ }
154
+ function computeSchemaFingerprint() {
155
+ const canonical = TANTIVY_SCHEMA_FIELDS.map((f) => `${f.name}:${f.tokenizer}:stored`).join("|");
156
+ return createHash2("sha256").update(canonical).digest("hex");
157
+ }
158
+ function tantivyIndexLooksValid(dir) {
159
+ return existsSync(path15.join(dir, "meta.json"));
160
+ }
161
+ function makeTantivyDoc(tantivy, row) {
162
+ const doc = new tantivy.Document();
163
+ doc.addText("doc_id", row.doc_id);
164
+ doc.addText("entity_type", row.entity_type);
165
+ doc.addText("entity_id", row.entity_id);
166
+ doc.addText("session_id", row.session_id ?? "");
167
+ doc.addText("project_id", row.project_id ?? "");
168
+ doc.addText("timestamp", row.timestamp ?? "");
169
+ doc.addText("role", row.role ?? "");
170
+ doc.addText("tool_name", row.tool_name ?? "");
171
+ doc.addText("canonical_tool_type", row.canonical_tool_type ?? "");
172
+ doc.addText("field_kind", row.field_kind);
173
+ doc.addText("text", row.text);
174
+ return doc;
175
+ }
176
+ async function rebuildTantivyIndex(bundle, options = {}) {
412
177
  ensureSearchIndexStatusRows(bundle);
178
+ const sourceDocCount = countSearchDocs(bundle);
179
+ const prev = getSearchIndexStatus(bundle, "tantivy");
180
+ const fingerprint = computeSchemaFingerprint();
181
+ const indexDirValid = tantivyIndexLooksValid(bundle.paths.tantivy);
182
+ const fingerprintMatches = prev?.schema_fingerprint === fingerprint;
183
+ const lastIndexedRowid = typeof prev?.last_indexed_rowid === "number" ? prev.last_indexed_rowid : 0;
184
+ const wantFullRebuild = options.overwrite === true || !indexDirValid || !fingerprintMatches || lastIndexedRowid <= 0;
413
185
  updateSearchIndexStatus(bundle, "tantivy", {
414
186
  status: "building",
415
- sourceDocCount: countSearchDocs(bundle),
187
+ sourceDocCount,
416
188
  indexedDocCount: 0,
417
189
  errorMessage: null
418
190
  });
419
191
  try {
420
192
  const tantivy = await import("@oxdev03/node-tantivy-binding");
421
- const schema = new tantivy.SchemaBuilder().addTextField("doc_id", { stored: true, tokenizerName: "raw" }).addTextField("entity_type", { stored: true, tokenizerName: "raw" }).addTextField("entity_id", { stored: true, tokenizerName: "raw" }).addTextField("session_id", { stored: true, tokenizerName: "raw" }).addTextField("project_id", { stored: true, tokenizerName: "raw" }).addTextField("timestamp", { stored: true, tokenizerName: "raw" }).addTextField("role", { stored: true, tokenizerName: "raw" }).addTextField("tool_name", { stored: true, tokenizerName: "raw" }).addTextField("canonical_tool_type", { stored: true, tokenizerName: "raw" }).addTextField("field_kind", { stored: true, tokenizerName: "raw" }).addTextField("text", { stored: true }).build();
422
- await rm2(bundle.paths.tantivy, { recursive: true, force: true });
423
- await mkdir4(bundle.paths.tantivy, { recursive: true });
424
- const index = new tantivy.Index(schema, bundle.paths.tantivy, false);
425
- const writer = index.writer(5e7, 1);
426
- let indexedDocCount = 0;
427
- const rows = bundle.db.prepare(
428
- `SELECT rowid, doc_id, entity_type, entity_id, session_id, project_id, timestamp,
429
- role, tool_name, canonical_tool_type, field_kind, text
430
- FROM search_docs
431
- ORDER BY rowid`
432
- ).iterate();
433
- for (const row of rows) {
434
- const doc = new tantivy.Document();
435
- doc.addText("doc_id", row.doc_id);
436
- doc.addText("entity_type", row.entity_type);
437
- doc.addText("entity_id", row.entity_id);
438
- doc.addText("session_id", row.session_id ?? "");
439
- doc.addText("project_id", row.project_id ?? "");
440
- doc.addText("timestamp", row.timestamp ?? "");
441
- doc.addText("role", row.role ?? "");
442
- doc.addText("tool_name", row.tool_name ?? "");
443
- doc.addText("canonical_tool_type", row.canonical_tool_type ?? "");
444
- doc.addText("field_kind", row.field_kind);
445
- doc.addText("text", row.text);
446
- writer.addDocument(doc);
447
- indexedDocCount++;
193
+ const schema = buildTantivySchema(tantivy);
194
+ let index;
195
+ if (wantFullRebuild) {
196
+ await rm2(bundle.paths.tantivy, { recursive: true, force: true });
197
+ await mkdir4(bundle.paths.tantivy, { recursive: true });
198
+ index = new tantivy.Index(schema, bundle.paths.tantivy, false);
199
+ } else {
200
+ index = tantivy.Index.open(bundle.paths.tantivy);
201
+ }
202
+ const writer = index.writer(3e8, 4);
203
+ const select = wantFullRebuild ? `${SEARCH_DOCS_SELECT} ORDER BY rowid` : `${SEARCH_DOCS_SELECT} WHERE rowid > ${lastIndexedRowid} ORDER BY rowid`;
204
+ let addedDocCount = 0;
205
+ let maxRowid = wantFullRebuild ? 0 : lastIndexedRowid;
206
+ for (const row of bundle.db.prepare(select).iterate()) {
207
+ if (!wantFullRebuild) {
208
+ writer.deleteDocumentsByTerm("doc_id", row.doc_id);
209
+ }
210
+ writer.addDocument(makeTantivyDoc(tantivy, row));
211
+ addedDocCount++;
212
+ if (row.rowid > maxRowid) maxRowid = row.rowid;
448
213
  }
449
214
  writer.commit();
450
215
  index.reload();
216
+ writer.waitMergingThreads();
217
+ const indexedDocCount = wantFullRebuild ? addedDocCount : countTantivyDocsBest(prev, addedDocCount);
451
218
  await writeFile5(
452
- path13.join(bundle.paths.tantivy, "prosa-index.json"),
219
+ path15.join(bundle.paths.tantivy, "prosa-index.json"),
453
220
  `${JSON.stringify(
454
221
  {
455
222
  engine: "tantivy",
456
223
  source: "search_docs",
457
224
  built_at: (/* @__PURE__ */ new Date()).toISOString(),
458
- source_doc_count: countSearchDocs(bundle),
459
- indexed_doc_count: indexedDocCount
225
+ mode: wantFullRebuild ? "full" : "incremental",
226
+ source_doc_count: sourceDocCount,
227
+ indexed_doc_count: indexedDocCount,
228
+ last_indexed_rowid: maxRowid,
229
+ schema_fingerprint: fingerprint
460
230
  },
461
231
  null,
462
232
  2
@@ -466,14 +236,16 @@ async function rebuildTantivyIndex(bundle) {
466
236
  );
467
237
  updateSearchIndexStatus(bundle, "tantivy", {
468
238
  status: "ready",
469
- sourceDocCount: countSearchDocs(bundle),
239
+ sourceDocCount,
470
240
  indexedDocCount,
471
- errorMessage: null
241
+ errorMessage: null,
242
+ lastIndexedRowid: maxRowid,
243
+ schemaFingerprint: fingerprint
472
244
  });
473
245
  } catch (error) {
474
246
  updateSearchIndexStatus(bundle, "tantivy", {
475
247
  status: "failed",
476
- sourceDocCount: countSearchDocs(bundle),
248
+ sourceDocCount,
477
249
  indexedDocCount: 0,
478
250
  errorMessage: getErrorMessage(error)
479
251
  });
@@ -481,36 +253,53 @@ async function rebuildTantivyIndex(bundle) {
481
253
  }
482
254
  return getSearchIndexStatus(bundle, "tantivy");
483
255
  }
256
+ function countTantivyDocsBest(prev, added) {
257
+ if (prev && typeof prev.indexed_doc_count === "number") {
258
+ return prev.indexed_doc_count + added;
259
+ }
260
+ return added;
261
+ }
484
262
  function ensureSearchIndexStatusRows(bundle) {
485
263
  const now = (/* @__PURE__ */ new Date()).toISOString();
486
264
  const stmt = prepare(
487
265
  bundle.db,
488
266
  `INSERT OR IGNORE INTO search_index_status (
489
- engine, status, source_doc_count, indexed_doc_count, updated_at, error_message
490
- ) VALUES (?, ?, 0, 0, ?, NULL)`
267
+ engine, status, source_doc_count, indexed_doc_count, updated_at,
268
+ error_message, last_indexed_rowid, schema_fingerprint
269
+ ) VALUES (?, ?, 0, 0, ?, NULL, NULL, NULL)`
491
270
  );
492
271
  stmt.run("fts5", "ready", now);
493
272
  stmt.run("tantivy", "missing", now);
494
273
  }
495
274
  function updateSearchIndexStatus(bundle, engine, values) {
496
275
  ensureSearchIndexStatusRows(bundle);
497
- prepare(
498
- bundle.db,
499
- `UPDATE search_index_status
500
- SET status = ?,
501
- source_doc_count = ?,
502
- indexed_doc_count = ?,
503
- updated_at = ?,
504
- error_message = ?
505
- WHERE engine = ?`
506
- ).run(
276
+ const setClauses = [
277
+ "status = ?",
278
+ "source_doc_count = ?",
279
+ "indexed_doc_count = ?",
280
+ "updated_at = ?",
281
+ "error_message = ?"
282
+ ];
283
+ const params = [
507
284
  values.status,
508
285
  values.sourceDocCount,
509
286
  values.indexedDocCount,
510
287
  (/* @__PURE__ */ new Date()).toISOString(),
511
- values.errorMessage,
512
- engine
513
- );
288
+ values.errorMessage
289
+ ];
290
+ if (values.lastIndexedRowid !== void 0) {
291
+ setClauses.push("last_indexed_rowid = ?");
292
+ params.push(values.lastIndexedRowid);
293
+ }
294
+ if (values.schemaFingerprint !== void 0) {
295
+ setClauses.push("schema_fingerprint = ?");
296
+ params.push(values.schemaFingerprint);
297
+ }
298
+ params.push(engine);
299
+ prepare(
300
+ bundle.db,
301
+ `UPDATE search_index_status SET ${setClauses.join(", ")} WHERE engine = ?`
302
+ ).run(...params);
514
303
  }
515
304
  function countSearchDocs(bundle) {
516
305
  return bundle.db.prepare(`SELECT count(*) AS n FROM search_docs`).get()?.n ?? 0;
@@ -518,12 +307,16 @@ function countSearchDocs(bundle) {
518
307
  function countFts5Docs(bundle) {
519
308
  return bundle.db.prepare(`SELECT count(*) AS n FROM search_docs_fts`).get()?.n ?? 0;
520
309
  }
521
- var FTS5_TRIGGER_SQL;
310
+ var SEARCH_INDEX_STATUS_COLUMNS, FTS5_TRIGGER_SQL, TANTIVY_SCHEMA_FIELDS, SEARCH_DOCS_SELECT;
522
311
  var init_indexing = __esm({
523
312
  "src/services/indexing.ts"() {
524
313
  "use strict";
525
314
  init_db();
526
315
  init_errors();
316
+ SEARCH_INDEX_STATUS_COLUMNS = `
317
+ engine, status, source_doc_count, indexed_doc_count, updated_at,
318
+ error_message, last_indexed_rowid, schema_fingerprint
319
+ `;
527
320
  FTS5_TRIGGER_SQL = `
528
321
  CREATE TRIGGER IF NOT EXISTS search_docs_ai AFTER INSERT ON search_docs BEGIN
529
322
  INSERT INTO search_docs_fts(rowid, text, role, tool_name, field_kind)
@@ -542,21 +335,30 @@ CREATE TRIGGER IF NOT EXISTS search_docs_au AFTER UPDATE ON search_docs BEGIN
542
335
  VALUES (new.rowid, new.text, new.role, new.tool_name, new.field_kind);
543
336
  END;
544
337
  `;
545
- }
546
- });
547
-
548
- // src/core/limits.ts
549
- function clampLimit(value, opts) {
550
- return Math.max(opts.min ?? 1, Math.min(opts.max, value ?? opts.fallback));
551
- }
552
- var init_limits = __esm({
553
- "src/core/limits.ts"() {
554
- "use strict";
338
+ TANTIVY_SCHEMA_FIELDS = [
339
+ { name: "doc_id", tokenizer: "raw" },
340
+ { name: "entity_type", tokenizer: "raw" },
341
+ { name: "entity_id", tokenizer: "raw" },
342
+ { name: "session_id", tokenizer: "raw" },
343
+ { name: "project_id", tokenizer: "raw" },
344
+ { name: "timestamp", tokenizer: "raw" },
345
+ { name: "role", tokenizer: "raw" },
346
+ { name: "tool_name", tokenizer: "raw" },
347
+ { name: "canonical_tool_type", tokenizer: "raw" },
348
+ { name: "field_kind", tokenizer: "raw" },
349
+ // The text field uses tantivy's default tokenizer (en_stem in the binding).
350
+ { name: "text", tokenizer: "default" }
351
+ ];
352
+ SEARCH_DOCS_SELECT = `
353
+ SELECT rowid, doc_id, entity_type, entity_id, session_id, project_id, timestamp,
354
+ role, tool_name, canonical_tool_type, field_kind, text
355
+ FROM search_docs
356
+ `;
555
357
  }
556
358
  });
557
359
 
558
360
  // src/services/search.ts
559
- import { existsSync } from "fs";
361
+ import { existsSync as existsSync2 } from "fs";
560
362
  import { createRequire } from "module";
561
363
  function escapeFtsQuery(q) {
562
364
  return q.split(/\s+/).filter((t) => t.length > 0).map((t) => `"${t.replace(/"/g, '""')}"`).join(" ");
@@ -565,7 +367,7 @@ function searchFullText(bundle, options) {
565
367
  if (options.engine === "tantivy") {
566
368
  return searchTantivy(bundle, options);
567
369
  }
568
- const limit = clampLimit(options.limit, { max: 500, fallback: 50 });
370
+ const limit2 = clampLimit(options.limit, { max: 500, fallback: 50 });
569
371
  const sql = `
570
372
  SELECT d.doc_id,
571
373
  d.entity_type,
@@ -580,14 +382,14 @@ function searchFullText(bundle, options) {
580
382
  JOIN search_docs d ON d.rowid = search_docs_fts.rowid
581
383
  WHERE search_docs_fts MATCH ?
582
384
  ORDER BY bm25(search_docs_fts), d.timestamp DESC
583
- LIMIT ${limit}
385
+ LIMIT ${limit2}
584
386
  `;
585
387
  const ftsQuery = options.raw ? options.query : escapeFtsQuery(options.query);
586
388
  if (!ftsQuery) return [];
587
389
  return bundle.db.prepare(sql).all(ftsQuery);
588
390
  }
589
391
  function searchTantivy(bundle, options) {
590
- if (!existsSync(bundle.paths.tantivy)) {
392
+ if (!existsSync2(bundle.paths.tantivy)) {
591
393
  throw new Error("tantivy index not found; run `prosa index tantivy` first");
592
394
  }
593
395
  const status = getSearchIndexStatus(bundle, "tantivy");
@@ -596,7 +398,7 @@ function searchTantivy(bundle, options) {
596
398
  `tantivy index is ${status?.status ?? "missing"}; run \`prosa index tantivy\` first`
597
399
  );
598
400
  }
599
- const limit = clampLimit(options.limit, { max: 500, fallback: 50 });
401
+ const limit2 = clampLimit(options.limit, { max: 500, fallback: 50 });
600
402
  const queryText = options.query.trim();
601
403
  if (!queryText) return [];
602
404
  const tantivy = requireTantivy();
@@ -605,7 +407,7 @@ function searchTantivy(bundle, options) {
605
407
  const [query] = options.raw ? [index.parseQuery(queryText, ["text"])] : index.parseQueryLenient(queryText, ["text"], void 0, {
606
408
  text: [true, 2, true]
607
409
  });
608
- const result = searcher.search(query, limit, true);
410
+ const result = searcher.search(query, limit2, true);
609
411
  const snippets = tantivy.SnippetGenerator.create(searcher, query, index.schema, "text");
610
412
  snippets.setMaxNumChars(180);
611
413
  return result.hits.map((hit) => {
@@ -689,7 +491,7 @@ function sessionFilterWhere(filters) {
689
491
  }
690
492
  function listSessions(bundle, filters = {}) {
691
493
  const { where, params } = sessionFilterWhere(filters);
692
- const limit = clampLimit(filters.limit, { max: 1e3, fallback: 50 });
494
+ const limit2 = clampLimit(filters.limit, { max: 1e3, fallback: 50 });
693
495
  const sql = `
694
496
  SELECT s.session_id,
695
497
  s.source_tool,
@@ -710,7 +512,7 @@ function listSessions(bundle, filters = {}) {
710
512
  FROM sessions s
711
513
  ${where}
712
514
  ORDER BY s.start_ts DESC NULLS LAST
713
- LIMIT ${limit}
515
+ LIMIT ${limit2}
714
516
  `;
715
517
  return bundle.db.prepare(sql).all(...params);
716
518
  }
@@ -1134,13 +936,14 @@ var init_App = __esm({
1134
936
  });
1135
937
 
1136
938
  // src/cli/main.ts
1137
- import { Command as Command10 } from "commander";
939
+ import { Command as Command11 } from "commander";
1138
940
 
1139
941
  // src/core/version.ts
1140
942
  var PROSA_PARSER_VERSION = "0.1.0";
1141
- var PROSA_SCHEMA_VERSION = 2;
943
+ var PROSA_SCHEMA_VERSION = 4;
1142
944
 
1143
- // src/cli/commands/compile.ts
945
+ // src/cli/commands/analytics.ts
946
+ import path4 from "path";
1144
947
  import { Command } from "commander";
1145
948
 
1146
949
  // src/core/bundle.ts
@@ -1514,10 +1317,291 @@ INSERT OR IGNORE INTO search_index_status (
1514
1317
  ('tantivy', 'missing', 0, 0, strftime('%Y-%m-%dT%H:%M:%fZ','now'), NULL);
1515
1318
  `;
1516
1319
 
1320
+ // src/core/schema/sql/003_analytics_views.ts
1321
+ var SQL_003_ANALYTICS_VIEWS = String.raw`
1322
+ CREATE VIEW IF NOT EXISTS session_facts AS
1323
+ WITH turn_counts AS (
1324
+ SELECT session_id, count(*) AS turn_count
1325
+ FROM turns
1326
+ GROUP BY session_id
1327
+ ),
1328
+ message_counts AS (
1329
+ SELECT session_id,
1330
+ count(*) AS message_count,
1331
+ sum(CASE WHEN role = 'user' THEN 1 ELSE 0 END) AS user_message_count,
1332
+ sum(CASE WHEN role = 'assistant' THEN 1 ELSE 0 END) AS assistant_message_count
1333
+ FROM messages
1334
+ GROUP BY session_id
1335
+ ),
1336
+ tool_call_counts AS (
1337
+ SELECT session_id,
1338
+ count(*) AS tool_call_count,
1339
+ sum(CASE WHEN status = 'error' THEN 1 ELSE 0 END) AS tool_call_error_count
1340
+ FROM tool_calls
1341
+ GROUP BY session_id
1342
+ ),
1343
+ tool_result_counts AS (
1344
+ SELECT session_id,
1345
+ count(*) AS tool_result_count,
1346
+ sum(CASE WHEN is_error = 1 OR (exit_code IS NOT NULL AND exit_code <> 0)
1347
+ THEN 1 ELSE 0 END) AS tool_result_error_count,
1348
+ sum(COALESCE(duration_ms, 0)) AS tool_duration_ms
1349
+ FROM tool_results
1350
+ GROUP BY session_id
1351
+ ),
1352
+ search_doc_counts AS (
1353
+ SELECT session_id, count(*) AS search_doc_count
1354
+ FROM search_docs
1355
+ WHERE session_id IS NOT NULL
1356
+ GROUP BY session_id
1357
+ )
1358
+ SELECT s.session_id,
1359
+ s.source_tool,
1360
+ s.source_session_id,
1361
+ s.project_id,
1362
+ p.display_name AS project_name,
1363
+ p.canonical_path AS project_path,
1364
+ s.parent_session_id,
1365
+ s.is_subagent,
1366
+ s.agent_role,
1367
+ s.agent_nickname,
1368
+ s.title,
1369
+ s.start_ts,
1370
+ s.end_ts,
1371
+ CASE
1372
+ WHEN s.start_ts IS NOT NULL AND s.end_ts IS NOT NULL
1373
+ THEN ROUND((julianday(s.end_ts) - julianday(s.start_ts)) * 86400, 3)
1374
+ ELSE NULL
1375
+ END AS duration_seconds,
1376
+ s.cwd_initial,
1377
+ s.git_branch_initial,
1378
+ s.model_first,
1379
+ s.model_last,
1380
+ s.status,
1381
+ s.timeline_confidence,
1382
+ sf.path AS source_file_path,
1383
+ COALESCE(tc.turn_count, 0) AS turn_count,
1384
+ COALESCE(mc.message_count, 0) AS message_count,
1385
+ COALESCE(mc.user_message_count, 0) AS user_message_count,
1386
+ COALESCE(mc.assistant_message_count, 0) AS assistant_message_count,
1387
+ COALESCE(tcc.tool_call_count, 0) AS tool_call_count,
1388
+ COALESCE(trc.tool_result_count, 0) AS tool_result_count,
1389
+ COALESCE(tcc.tool_call_error_count, 0)
1390
+ + COALESCE(trc.tool_result_error_count, 0) AS tool_error_count,
1391
+ COALESCE(trc.tool_duration_ms, 0) AS tool_duration_ms,
1392
+ COALESCE(sdc.search_doc_count, 0) AS search_doc_count
1393
+ FROM sessions s
1394
+ LEFT JOIN projects p ON p.project_id = s.project_id
1395
+ LEFT JOIN raw_records rr ON rr.raw_record_id = s.raw_record_id
1396
+ LEFT JOIN source_files sf ON sf.source_file_id = rr.source_file_id
1397
+ LEFT JOIN turn_counts tc ON tc.session_id = s.session_id
1398
+ LEFT JOIN message_counts mc ON mc.session_id = s.session_id
1399
+ LEFT JOIN tool_call_counts tcc ON tcc.session_id = s.session_id
1400
+ LEFT JOIN tool_result_counts trc ON trc.session_id = s.session_id
1401
+ LEFT JOIN search_doc_counts sdc ON sdc.session_id = s.session_id;
1402
+
1403
+ CREATE VIEW IF NOT EXISTS tool_usage_facts AS
1404
+ WITH result_rollup AS (
1405
+ SELECT tool_call_id,
1406
+ session_id,
1407
+ count(*) AS tool_result_count,
1408
+ max(status) AS result_status,
1409
+ max(is_error) AS is_error,
1410
+ min(exit_code) AS exit_code,
1411
+ sum(COALESCE(duration_ms, 0)) AS duration_ms,
1412
+ max(preview) AS preview
1413
+ FROM tool_results
1414
+ GROUP BY tool_call_id, session_id
1415
+ )
1416
+ SELECT tc.tool_call_id,
1417
+ tc.session_id,
1418
+ s.source_tool,
1419
+ s.source_session_id,
1420
+ s.project_id,
1421
+ p.display_name AS project_name,
1422
+ p.canonical_path AS project_path,
1423
+ tc.turn_id,
1424
+ tc.message_id,
1425
+ tc.event_id,
1426
+ tc.source_call_id,
1427
+ tc.tool_name,
1428
+ tc.canonical_tool_type,
1429
+ tc.command,
1430
+ tc.cwd,
1431
+ tc.path,
1432
+ tc.query,
1433
+ tc.timestamp_start,
1434
+ tc.timestamp_end,
1435
+ CASE
1436
+ WHEN tc.timestamp_start IS NOT NULL AND tc.timestamp_end IS NOT NULL
1437
+ THEN ROUND((julianday(tc.timestamp_end) - julianday(tc.timestamp_start)) * 86400, 3)
1438
+ ELSE NULL
1439
+ END AS call_duration_seconds,
1440
+ tc.status AS call_status,
1441
+ rr.result_status,
1442
+ COALESCE(rr.is_error, 0) AS is_error,
1443
+ rr.exit_code,
1444
+ rr.duration_ms AS result_duration_ms,
1445
+ COALESCE(rr.tool_result_count, 0) AS tool_result_count,
1446
+ rr.preview,
1447
+ tc.raw_record_id
1448
+ FROM tool_calls tc
1449
+ LEFT JOIN sessions s ON s.session_id = tc.session_id
1450
+ LEFT JOIN projects p ON p.project_id = s.project_id
1451
+ LEFT JOIN result_rollup rr ON rr.tool_call_id = tc.tool_call_id;
1452
+
1453
+ CREATE VIEW IF NOT EXISTS error_facts AS
1454
+ SELECT 'tool_result:' || tr.tool_result_id AS error_id,
1455
+ 'tool_result' AS error_category,
1456
+ s.source_tool,
1457
+ s.project_id,
1458
+ p.display_name AS project_name,
1459
+ tr.session_id,
1460
+ COALESCE(tc.timestamp_end, tc.timestamp_start) AS timestamp,
1461
+ tc.tool_name,
1462
+ tc.canonical_tool_type,
1463
+ COALESCE(tr.status, tc.status) AS status,
1464
+ tr.exit_code,
1465
+ NULL AS message,
1466
+ tr.preview,
1467
+ NULL AS entity_type,
1468
+ NULL AS entity_id,
1469
+ tr.raw_record_id
1470
+ FROM tool_results tr
1471
+ LEFT JOIN tool_calls tc ON tc.tool_call_id = tr.tool_call_id
1472
+ LEFT JOIN sessions s ON s.session_id = tr.session_id
1473
+ LEFT JOIN projects p ON p.project_id = s.project_id
1474
+ WHERE tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
1475
+ UNION ALL
1476
+ SELECT 'import_error:' || CAST(ie.error_id AS TEXT) AS error_id,
1477
+ 'import_error' AS error_category,
1478
+ COALESCE(rr.source_tool, ib.source_tool) AS source_tool,
1479
+ NULL AS project_id,
1480
+ NULL AS project_name,
1481
+ NULL AS session_id,
1482
+ ie.occurred_at AS timestamp,
1483
+ NULL AS tool_name,
1484
+ NULL AS canonical_tool_type,
1485
+ ie.kind AS status,
1486
+ NULL AS exit_code,
1487
+ ie.message,
1488
+ NULL AS preview,
1489
+ NULL AS entity_type,
1490
+ NULL AS entity_id,
1491
+ ie.raw_record_id
1492
+ FROM import_errors ie
1493
+ LEFT JOIN import_batches ib ON ib.batch_id = ie.batch_id
1494
+ LEFT JOIN raw_records rr ON rr.raw_record_id = ie.raw_record_id
1495
+ UNION ALL
1496
+ SELECT 'uncertainty:' || CAST(u.uncertainty_id AS TEXT) AS error_id,
1497
+ 'uncertainty' AS error_category,
1498
+ NULL AS source_tool,
1499
+ NULL AS project_id,
1500
+ NULL AS project_name,
1501
+ CASE WHEN u.entity_type = 'session' THEN u.entity_id ELSE NULL END AS session_id,
1502
+ NULL AS timestamp,
1503
+ NULL AS tool_name,
1504
+ NULL AS canonical_tool_type,
1505
+ u.reason AS status,
1506
+ NULL AS exit_code,
1507
+ u.reason AS message,
1508
+ NULL AS preview,
1509
+ u.entity_type,
1510
+ u.entity_id,
1511
+ NULL AS raw_record_id
1512
+ FROM uncertainties u;
1513
+
1514
+ CREATE VIEW IF NOT EXISTS model_usage AS
1515
+ WITH model_events AS (
1516
+ SELECT s.source_tool,
1517
+ s.project_id,
1518
+ p.display_name AS project_name,
1519
+ p.canonical_path AS project_path,
1520
+ s.session_id,
1521
+ NULL AS turn_id,
1522
+ s.model_first AS model,
1523
+ s.start_ts AS timestamp,
1524
+ 'session_first' AS observation_type
1525
+ FROM sessions s
1526
+ LEFT JOIN projects p ON p.project_id = s.project_id
1527
+ WHERE s.model_first IS NOT NULL
1528
+ UNION ALL
1529
+ SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
1530
+ s.session_id, NULL AS turn_id, s.model_last AS model, s.end_ts AS timestamp,
1531
+ 'session_last' AS observation_type
1532
+ FROM sessions s
1533
+ LEFT JOIN projects p ON p.project_id = s.project_id
1534
+ WHERE s.model_last IS NOT NULL
1535
+ UNION ALL
1536
+ SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
1537
+ t.session_id, t.turn_id, t.model, t.start_ts AS timestamp, 'turn' AS observation_type
1538
+ FROM turns t
1539
+ LEFT JOIN sessions s ON s.session_id = t.session_id
1540
+ LEFT JOIN projects p ON p.project_id = s.project_id
1541
+ WHERE t.model IS NOT NULL
1542
+ UNION ALL
1543
+ SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
1544
+ m.session_id, m.turn_id, m.model, m.timestamp, 'message' AS observation_type
1545
+ FROM messages m
1546
+ LEFT JOIN sessions s ON s.session_id = m.session_id
1547
+ LEFT JOIN projects p ON p.project_id = s.project_id
1548
+ WHERE m.model IS NOT NULL
1549
+ )
1550
+ SELECT source_tool,
1551
+ project_id,
1552
+ project_name,
1553
+ project_path,
1554
+ model,
1555
+ count(DISTINCT session_id) AS session_count,
1556
+ count(DISTINCT turn_id) AS turn_count,
1557
+ count(*) AS observation_count,
1558
+ sum(CASE WHEN observation_type = 'message' THEN 1 ELSE 0 END) AS message_count,
1559
+ min(timestamp) AS first_seen_ts,
1560
+ max(timestamp) AS last_seen_ts
1561
+ FROM model_events
1562
+ GROUP BY source_tool, project_id, project_name, project_path, model;
1563
+
1564
+ CREATE VIEW IF NOT EXISTS project_activity AS
1565
+ SELECT s.source_tool,
1566
+ s.project_id,
1567
+ COALESCE(p.display_name, s.cwd_initial, '(unknown)') AS project_name,
1568
+ p.canonical_path AS project_path,
1569
+ min(s.start_ts) AS first_session_ts,
1570
+ max(COALESCE(s.end_ts, s.start_ts)) AS latest_session_ts,
1571
+ count(DISTINCT s.session_id) AS session_count,
1572
+ count(DISTINCT CASE WHEN s.timeline_confidence = 'low' THEN s.session_id END)
1573
+ AS low_confidence_session_count,
1574
+ count(DISTINCT t.turn_id) AS turn_count,
1575
+ count(DISTINCT m.message_id) AS message_count,
1576
+ count(DISTINCT tc.tool_call_id) AS tool_call_count,
1577
+ count(DISTINCT tr.tool_result_id) AS tool_result_count,
1578
+ count(DISTINCT CASE
1579
+ WHEN tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
1580
+ THEN tr.tool_result_id
1581
+ END) AS tool_error_count,
1582
+ count(DISTINCT sd.doc_id) AS search_doc_count
1583
+ FROM sessions s
1584
+ LEFT JOIN projects p ON p.project_id = s.project_id
1585
+ LEFT JOIN turns t ON t.session_id = s.session_id
1586
+ LEFT JOIN messages m ON m.session_id = s.session_id
1587
+ LEFT JOIN tool_calls tc ON tc.session_id = s.session_id
1588
+ LEFT JOIN tool_results tr ON tr.session_id = s.session_id
1589
+ LEFT JOIN search_docs sd ON sd.session_id = s.session_id
1590
+ GROUP BY s.source_tool, s.project_id, p.display_name, s.cwd_initial, p.canonical_path;
1591
+ `;
1592
+
1593
+ // src/core/schema/sql/004_tantivy_checkpoint.ts
1594
+ var SQL_004_TANTIVY_CHECKPOINT = String.raw`
1595
+ ALTER TABLE search_index_status ADD COLUMN last_indexed_rowid INTEGER;
1596
+ ALTER TABLE search_index_status ADD COLUMN schema_fingerprint TEXT;
1597
+ `;
1598
+
1517
1599
  // src/core/schema/migrate.ts
1518
1600
  var MIGRATIONS = [
1519
1601
  { version: 1, name: "init", sql: SQL_001_INIT },
1520
- { version: 2, name: "search_index_status", sql: SQL_002_SEARCH_INDEX_STATUS }
1602
+ { version: 2, name: "search_index_status", sql: SQL_002_SEARCH_INDEX_STATUS },
1603
+ { version: 3, name: "analytics_views", sql: SQL_003_ANALYTICS_VIEWS },
1604
+ { version: 4, name: "tantivy_checkpoint", sql: SQL_004_TANTIVY_CHECKPOINT }
1521
1605
  ];
1522
1606
  function runMigrations(db) {
1523
1607
  db.exec(`
@@ -1625,52 +1709,1026 @@ async function openBundle(rootPath) {
1625
1709
  `no manifest.json in ${resolved} \u2014 initialize first with \`prosa init --store ${resolved}\``
1626
1710
  );
1627
1711
  }
1628
- const manifest = JSON.parse(await readFile(paths.manifest, "utf8"));
1629
- await mkdir(paths.search, { recursive: true });
1630
- await mkdir(paths.tantivy, { recursive: true });
1631
- const db = openDb(paths.db);
1632
- runMigrations(db);
1633
- const currentVersion = currentSchemaVersion(db);
1634
- if (currentVersion !== PROSA_SCHEMA_VERSION) {
1635
- closeDb(db);
1636
- throw new Error(`schema version mismatch (db=${currentVersion}, code=${PROSA_SCHEMA_VERSION})`);
1637
- }
1638
- if (manifest.parser_version !== PROSA_PARSER_VERSION) {
1639
- manifest.parser_version = PROSA_PARSER_VERSION;
1640
- await writeFile(paths.manifest, `${JSON.stringify(manifest, null, 2)}
1641
- `, "utf8");
1642
- }
1643
- return { path: resolved, db, manifest, paths };
1712
+ const manifest = JSON.parse(await readFile(paths.manifest, "utf8"));
1713
+ await mkdir(paths.search, { recursive: true });
1714
+ await mkdir(paths.tantivy, { recursive: true });
1715
+ const db = openDb(paths.db);
1716
+ runMigrations(db);
1717
+ const currentVersion = currentSchemaVersion(db);
1718
+ if (currentVersion !== PROSA_SCHEMA_VERSION) {
1719
+ closeDb(db);
1720
+ throw new Error(`schema version mismatch (db=${currentVersion}, code=${PROSA_SCHEMA_VERSION})`);
1721
+ }
1722
+ if (manifest.parser_version !== PROSA_PARSER_VERSION) {
1723
+ manifest.parser_version = PROSA_PARSER_VERSION;
1724
+ await writeFile(paths.manifest, `${JSON.stringify(manifest, null, 2)}
1725
+ `, "utf8");
1726
+ }
1727
+ return { path: resolved, db, manifest, paths };
1728
+ }
1729
+ async function openOrInitBundle(rootPath) {
1730
+ const resolved = path.resolve(rootPath);
1731
+ const paths = bundlePaths(resolved);
1732
+ const dirStat = await stat(resolved).catch(() => null);
1733
+ if (dirStat && !dirStat.isDirectory()) {
1734
+ throw new Error(`bundle path not found or not a directory: ${resolved}`);
1735
+ }
1736
+ if (!dirStat || !await exists(paths.manifest)) {
1737
+ return await initBundle(resolved);
1738
+ }
1739
+ return await openBundle(resolved);
1740
+ }
1741
+ function closeBundle(bundle) {
1742
+ closeDb(bundle.db);
1743
+ }
1744
+
1745
+ // src/services/analytics.ts
1746
+ init_limits();
1747
+
1748
+ // src/services/export/parquet.ts
1749
+ import { mkdir as mkdir2, rm, writeFile as writeFile2 } from "fs/promises";
1750
+ import path2 from "path";
1751
+ import { DuckDBConnection } from "@duckdb/node-api";
1752
+ init_errors();
1753
+ var PARQUET_TABLES = [
1754
+ "objects",
1755
+ "source_files",
1756
+ "import_batches",
1757
+ "raw_records",
1758
+ "import_errors",
1759
+ "uncertainties",
1760
+ "projects",
1761
+ "sessions",
1762
+ "turns",
1763
+ "events",
1764
+ "messages",
1765
+ "content_blocks",
1766
+ "tool_calls",
1767
+ "tool_results",
1768
+ "artifacts",
1769
+ "edges",
1770
+ "search_docs"
1771
+ ];
1772
+ async function exportBundleParquet(options) {
1773
+ const snapshot = await openBundleSnapshot(options.bundlePath);
1774
+ const outDir = path2.resolve(options.outDir ?? snapshot.defaultOutDir);
1775
+ await mkdir2(outDir, { recursive: true });
1776
+ const files = Object.fromEntries(
1777
+ PARQUET_TABLES.map((table) => [table, path2.join(outDir, `${table}.parquet`)])
1778
+ );
1779
+ const manifestPath = path2.join(outDir, "manifest.json");
1780
+ for (const file of [...Object.values(files), manifestPath]) {
1781
+ await rm(file, { force: true });
1782
+ }
1783
+ const connection = await createDuckDbConnection();
1784
+ try {
1785
+ await attachSqlite(connection, snapshot.dbPath);
1786
+ for (const table of PARQUET_TABLES) {
1787
+ await connection.run(
1788
+ `COPY (SELECT * FROM prosa.${quoteIdentifier(table)}) TO ${sqlString(files[table])} (FORMAT parquet, COMPRESSION zstd, COMPRESSION_LEVEL 1, ROW_GROUP_SIZE 100000)`
1789
+ );
1790
+ }
1791
+ } finally {
1792
+ connection.closeSync();
1793
+ }
1794
+ const manifest = {
1795
+ exported_at: (/* @__PURE__ */ new Date()).toISOString(),
1796
+ source_db: snapshot.dbPath,
1797
+ schema_version: snapshot.schemaVersion,
1798
+ parser_version: snapshot.parserVersion,
1799
+ tables: Object.fromEntries(
1800
+ PARQUET_TABLES.map((table) => [
1801
+ table,
1802
+ {
1803
+ file: path2.basename(files[table]),
1804
+ rows: snapshot.counts[table]
1805
+ }
1806
+ ])
1807
+ )
1808
+ };
1809
+ await writeFile2(manifestPath, `${JSON.stringify(manifest, null, 2)}
1810
+ `, "utf8");
1811
+ return { outDir, manifestPath, files, counts: snapshot.counts };
1812
+ }
1813
+ async function queryDuckDbParquet(options) {
1814
+ const parquetDir = path2.resolve(options.parquetDir);
1815
+ const connection = await createDuckDbConnection();
1816
+ try {
1817
+ for (const table of PARQUET_TABLES) {
1818
+ await connection.run(
1819
+ `CREATE OR REPLACE VIEW ${quoteIdentifier(table)} AS SELECT * FROM read_parquet(${sqlString(
1820
+ path2.join(parquetDir, `${table}.parquet`)
1821
+ )})`
1822
+ );
1823
+ }
1824
+ await createAnalyticsViews(connection);
1825
+ const reader = await connection.runAndReadAll(options.sql);
1826
+ return {
1827
+ columns: reader.deduplicatedColumnNames(),
1828
+ rows: reader.getRowObjectsJson()
1829
+ };
1830
+ } catch (error) {
1831
+ if (isMissingParquetError(error)) {
1832
+ throw new Error(
1833
+ `Parquet export not found in ${parquetDir}; run \`prosa export parquet --store <path>\` first`
1834
+ );
1835
+ }
1836
+ throw error;
1837
+ } finally {
1838
+ connection.closeSync();
1839
+ }
1840
+ }
1841
+ async function createDuckDbConnection() {
1842
+ return DuckDBConnection.create();
1843
+ }
1844
+ async function attachSqlite(connection, dbPath) {
1845
+ try {
1846
+ await connection.run("INSTALL sqlite");
1847
+ await connection.run("LOAD sqlite");
1848
+ await connection.run(`ATTACH ${sqlString(dbPath)} AS prosa (TYPE sqlite)`);
1849
+ } catch (error) {
1850
+ throw new Error(
1851
+ `DuckDB could not attach prosa.sqlite via the sqlite extension: ${getErrorMessage(error)}`
1852
+ );
1853
+ }
1854
+ }
1855
+ async function createAnalyticsViews(connection) {
1856
+ await connection.run(`
1857
+ CREATE OR REPLACE VIEW session_facts AS
1858
+ WITH turn_counts AS (
1859
+ SELECT session_id, count(*) AS turn_count
1860
+ FROM turns
1861
+ GROUP BY session_id
1862
+ ),
1863
+ message_counts AS (
1864
+ SELECT session_id,
1865
+ count(*) AS message_count,
1866
+ sum(CASE WHEN role = 'user' THEN 1 ELSE 0 END) AS user_message_count,
1867
+ sum(CASE WHEN role = 'assistant' THEN 1 ELSE 0 END) AS assistant_message_count
1868
+ FROM messages
1869
+ GROUP BY session_id
1870
+ ),
1871
+ tool_call_counts AS (
1872
+ SELECT session_id,
1873
+ count(*) AS tool_call_count,
1874
+ sum(CASE WHEN status = 'error' THEN 1 ELSE 0 END) AS tool_call_error_count
1875
+ FROM tool_calls
1876
+ GROUP BY session_id
1877
+ ),
1878
+ tool_result_counts AS (
1879
+ SELECT session_id,
1880
+ count(*) AS tool_result_count,
1881
+ sum(CASE WHEN is_error = 1 OR (exit_code IS NOT NULL AND exit_code <> 0)
1882
+ THEN 1 ELSE 0 END) AS tool_result_error_count,
1883
+ sum(COALESCE(duration_ms, 0)) AS tool_duration_ms
1884
+ FROM tool_results
1885
+ GROUP BY session_id
1886
+ ),
1887
+ search_doc_counts AS (
1888
+ SELECT session_id, count(*) AS search_doc_count
1889
+ FROM search_docs
1890
+ WHERE session_id IS NOT NULL
1891
+ GROUP BY session_id
1892
+ )
1893
+ SELECT s.session_id,
1894
+ s.source_tool,
1895
+ s.source_session_id,
1896
+ s.project_id,
1897
+ p.display_name AS project_name,
1898
+ p.canonical_path AS project_path,
1899
+ s.parent_session_id,
1900
+ s.is_subagent,
1901
+ s.agent_role,
1902
+ s.agent_nickname,
1903
+ s.title,
1904
+ s.start_ts,
1905
+ s.end_ts,
1906
+ CASE
1907
+ WHEN s.start_ts IS NOT NULL AND s.end_ts IS NOT NULL
1908
+ THEN date_diff('millisecond', TRY_CAST(s.start_ts AS TIMESTAMP),
1909
+ TRY_CAST(s.end_ts AS TIMESTAMP)) / 1000.0
1910
+ ELSE NULL
1911
+ END AS duration_seconds,
1912
+ s.cwd_initial,
1913
+ s.git_branch_initial,
1914
+ s.model_first,
1915
+ s.model_last,
1916
+ s.status,
1917
+ s.timeline_confidence,
1918
+ sf.path AS source_file_path,
1919
+ COALESCE(tc.turn_count, 0) AS turn_count,
1920
+ COALESCE(mc.message_count, 0) AS message_count,
1921
+ COALESCE(mc.user_message_count, 0) AS user_message_count,
1922
+ COALESCE(mc.assistant_message_count, 0) AS assistant_message_count,
1923
+ COALESCE(tcc.tool_call_count, 0) AS tool_call_count,
1924
+ COALESCE(trc.tool_result_count, 0) AS tool_result_count,
1925
+ COALESCE(tcc.tool_call_error_count, 0)
1926
+ + COALESCE(trc.tool_result_error_count, 0) AS tool_error_count,
1927
+ COALESCE(trc.tool_duration_ms, 0) AS tool_duration_ms,
1928
+ COALESCE(sdc.search_doc_count, 0) AS search_doc_count
1929
+ FROM sessions s
1930
+ LEFT JOIN projects p ON p.project_id = s.project_id
1931
+ LEFT JOIN raw_records rr ON rr.raw_record_id = s.raw_record_id
1932
+ LEFT JOIN source_files sf ON sf.source_file_id = rr.source_file_id
1933
+ LEFT JOIN turn_counts tc ON tc.session_id = s.session_id
1934
+ LEFT JOIN message_counts mc ON mc.session_id = s.session_id
1935
+ LEFT JOIN tool_call_counts tcc ON tcc.session_id = s.session_id
1936
+ LEFT JOIN tool_result_counts trc ON trc.session_id = s.session_id
1937
+ LEFT JOIN search_doc_counts sdc ON sdc.session_id = s.session_id
1938
+ `);
1939
+ await connection.run(`
1940
+ CREATE OR REPLACE VIEW tool_usage_facts AS
1941
+ WITH result_rollup AS (
1942
+ SELECT tool_call_id,
1943
+ session_id,
1944
+ count(*) AS tool_result_count,
1945
+ max(status) AS result_status,
1946
+ max(is_error) AS is_error,
1947
+ min(exit_code) AS exit_code,
1948
+ sum(COALESCE(duration_ms, 0)) AS duration_ms,
1949
+ max(preview) AS preview
1950
+ FROM tool_results
1951
+ GROUP BY tool_call_id, session_id
1952
+ )
1953
+ SELECT tc.tool_call_id,
1954
+ tc.session_id,
1955
+ s.source_tool,
1956
+ s.source_session_id,
1957
+ s.project_id,
1958
+ p.display_name AS project_name,
1959
+ p.canonical_path AS project_path,
1960
+ tc.turn_id,
1961
+ tc.message_id,
1962
+ tc.event_id,
1963
+ tc.source_call_id,
1964
+ tc.tool_name,
1965
+ tc.canonical_tool_type,
1966
+ tc.command,
1967
+ tc.cwd,
1968
+ tc.path,
1969
+ tc.query,
1970
+ tc.timestamp_start,
1971
+ tc.timestamp_end,
1972
+ CASE
1973
+ WHEN tc.timestamp_start IS NOT NULL AND tc.timestamp_end IS NOT NULL
1974
+ THEN date_diff('millisecond', TRY_CAST(tc.timestamp_start AS TIMESTAMP),
1975
+ TRY_CAST(tc.timestamp_end AS TIMESTAMP)) / 1000.0
1976
+ ELSE NULL
1977
+ END AS call_duration_seconds,
1978
+ tc.status AS call_status,
1979
+ rr.result_status,
1980
+ COALESCE(rr.is_error, 0) AS is_error,
1981
+ rr.exit_code,
1982
+ rr.duration_ms AS result_duration_ms,
1983
+ COALESCE(rr.tool_result_count, 0) AS tool_result_count,
1984
+ rr.preview,
1985
+ tc.raw_record_id
1986
+ FROM tool_calls tc
1987
+ LEFT JOIN sessions s ON s.session_id = tc.session_id
1988
+ LEFT JOIN projects p ON p.project_id = s.project_id
1989
+ LEFT JOIN result_rollup rr ON rr.tool_call_id = tc.tool_call_id
1990
+ `);
1991
+ await connection.run(`
1992
+ CREATE OR REPLACE VIEW error_facts AS
1993
+ SELECT 'tool_result:' || tr.tool_result_id AS error_id,
1994
+ 'tool_result' AS error_category,
1995
+ s.source_tool,
1996
+ s.project_id,
1997
+ p.display_name AS project_name,
1998
+ tr.session_id,
1999
+ COALESCE(tc.timestamp_end, tc.timestamp_start) AS timestamp,
2000
+ tc.tool_name,
2001
+ tc.canonical_tool_type,
2002
+ COALESCE(tr.status, tc.status) AS status,
2003
+ tr.exit_code,
2004
+ NULL AS message,
2005
+ tr.preview,
2006
+ NULL AS entity_type,
2007
+ NULL AS entity_id,
2008
+ tr.raw_record_id
2009
+ FROM tool_results tr
2010
+ LEFT JOIN tool_calls tc ON tc.tool_call_id = tr.tool_call_id
2011
+ LEFT JOIN sessions s ON s.session_id = tr.session_id
2012
+ LEFT JOIN projects p ON p.project_id = s.project_id
2013
+ WHERE tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
2014
+ UNION ALL
2015
+ SELECT 'import_error:' || CAST(ie.error_id AS VARCHAR) AS error_id,
2016
+ 'import_error' AS error_category,
2017
+ COALESCE(rr.source_tool, ib.source_tool) AS source_tool,
2018
+ NULL AS project_id,
2019
+ NULL AS project_name,
2020
+ NULL AS session_id,
2021
+ ie.occurred_at AS timestamp,
2022
+ NULL AS tool_name,
2023
+ NULL AS canonical_tool_type,
2024
+ ie.kind AS status,
2025
+ NULL AS exit_code,
2026
+ ie.message,
2027
+ NULL AS preview,
2028
+ NULL AS entity_type,
2029
+ NULL AS entity_id,
2030
+ ie.raw_record_id
2031
+ FROM import_errors ie
2032
+ LEFT JOIN import_batches ib ON ib.batch_id = ie.batch_id
2033
+ LEFT JOIN raw_records rr ON rr.raw_record_id = ie.raw_record_id
2034
+ UNION ALL
2035
+ SELECT 'uncertainty:' || CAST(u.uncertainty_id AS VARCHAR) AS error_id,
2036
+ 'uncertainty' AS error_category,
2037
+ NULL AS source_tool,
2038
+ NULL AS project_id,
2039
+ NULL AS project_name,
2040
+ CASE WHEN u.entity_type = 'session' THEN u.entity_id ELSE NULL END AS session_id,
2041
+ NULL AS timestamp,
2042
+ NULL AS tool_name,
2043
+ NULL AS canonical_tool_type,
2044
+ u.reason AS status,
2045
+ NULL AS exit_code,
2046
+ u.reason AS message,
2047
+ NULL AS preview,
2048
+ u.entity_type,
2049
+ u.entity_id,
2050
+ NULL AS raw_record_id
2051
+ FROM uncertainties u
2052
+ `);
2053
+ await connection.run(`
2054
+ CREATE OR REPLACE VIEW model_usage AS
2055
+ WITH model_events AS (
2056
+ SELECT s.source_tool,
2057
+ s.project_id,
2058
+ p.display_name AS project_name,
2059
+ p.canonical_path AS project_path,
2060
+ s.session_id,
2061
+ NULL AS turn_id,
2062
+ s.model_first AS model,
2063
+ s.start_ts AS timestamp,
2064
+ 'session_first' AS observation_type
2065
+ FROM sessions s
2066
+ LEFT JOIN projects p ON p.project_id = s.project_id
2067
+ WHERE s.model_first IS NOT NULL
2068
+ UNION ALL
2069
+ SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
2070
+ s.session_id, NULL AS turn_id, s.model_last AS model, s.end_ts AS timestamp,
2071
+ 'session_last' AS observation_type
2072
+ FROM sessions s
2073
+ LEFT JOIN projects p ON p.project_id = s.project_id
2074
+ WHERE s.model_last IS NOT NULL
2075
+ UNION ALL
2076
+ SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
2077
+ t.session_id, t.turn_id, t.model, t.start_ts AS timestamp, 'turn' AS observation_type
2078
+ FROM turns t
2079
+ LEFT JOIN sessions s ON s.session_id = t.session_id
2080
+ LEFT JOIN projects p ON p.project_id = s.project_id
2081
+ WHERE t.model IS NOT NULL
2082
+ UNION ALL
2083
+ SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
2084
+ m.session_id, m.turn_id, m.model, m.timestamp, 'message' AS observation_type
2085
+ FROM messages m
2086
+ LEFT JOIN sessions s ON s.session_id = m.session_id
2087
+ LEFT JOIN projects p ON p.project_id = s.project_id
2088
+ WHERE m.model IS NOT NULL
2089
+ )
2090
+ SELECT source_tool,
2091
+ project_id,
2092
+ project_name,
2093
+ project_path,
2094
+ model,
2095
+ count(DISTINCT session_id) AS session_count,
2096
+ count(DISTINCT turn_id) AS turn_count,
2097
+ count(*) AS observation_count,
2098
+ sum(CASE WHEN observation_type = 'message' THEN 1 ELSE 0 END) AS message_count,
2099
+ min(timestamp) AS first_seen_ts,
2100
+ max(timestamp) AS last_seen_ts
2101
+ FROM model_events
2102
+ GROUP BY source_tool, project_id, project_name, project_path, model
2103
+ `);
2104
+ await connection.run(`
2105
+ CREATE OR REPLACE VIEW project_activity AS
2106
+ SELECT s.source_tool,
2107
+ s.project_id,
2108
+ COALESCE(p.display_name, s.cwd_initial, '(unknown)') AS project_name,
2109
+ p.canonical_path AS project_path,
2110
+ min(s.start_ts) AS first_session_ts,
2111
+ max(COALESCE(s.end_ts, s.start_ts)) AS latest_session_ts,
2112
+ count(DISTINCT s.session_id) AS session_count,
2113
+ count(DISTINCT CASE WHEN s.timeline_confidence = 'low' THEN s.session_id END)
2114
+ AS low_confidence_session_count,
2115
+ count(DISTINCT t.turn_id) AS turn_count,
2116
+ count(DISTINCT m.message_id) AS message_count,
2117
+ count(DISTINCT tc.tool_call_id) AS tool_call_count,
2118
+ count(DISTINCT tr.tool_result_id) AS tool_result_count,
2119
+ count(DISTINCT CASE
2120
+ WHEN tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
2121
+ THEN tr.tool_result_id
2122
+ END) AS tool_error_count,
2123
+ count(DISTINCT sd.doc_id) AS search_doc_count
2124
+ FROM sessions s
2125
+ LEFT JOIN projects p ON p.project_id = s.project_id
2126
+ LEFT JOIN turns t ON t.session_id = s.session_id
2127
+ LEFT JOIN messages m ON m.session_id = s.session_id
2128
+ LEFT JOIN tool_calls tc ON tc.session_id = s.session_id
2129
+ LEFT JOIN tool_results tr ON tr.session_id = s.session_id
2130
+ LEFT JOIN search_docs sd ON sd.session_id = s.session_id
2131
+ GROUP BY s.source_tool, s.project_id, p.display_name, s.cwd_initial, p.canonical_path
2132
+ `);
2133
+ }
2134
+ async function openBundleSnapshot(bundlePath) {
2135
+ const bundle = await openBundle(bundlePath);
2136
+ try {
2137
+ const counts = Object.fromEntries(
2138
+ PARQUET_TABLES.map((table) => {
2139
+ const row = bundle.db.prepare(`SELECT count(*) AS n FROM ${quoteIdentifier(table)}`).get();
2140
+ return [table, row?.n ?? 0];
2141
+ })
2142
+ );
2143
+ return {
2144
+ dbPath: bundle.paths.db,
2145
+ schemaVersion: bundle.manifest.schema_version,
2146
+ parserVersion: bundle.manifest.parser_version,
2147
+ defaultOutDir: bundle.paths.parquet,
2148
+ counts
2149
+ };
2150
+ } finally {
2151
+ closeBundle(bundle);
2152
+ }
2153
+ }
2154
+ function quoteIdentifier(value) {
2155
+ return `"${value.replace(/"/g, '""')}"`;
2156
+ }
2157
+ function sqlString(value) {
2158
+ return `'${value.replace(/'/g, "''")}'`;
2159
+ }
2160
+ function isMissingParquetError(error) {
2161
+ const message = getErrorMessage(error);
2162
+ return /No files found|does not exist|not found/i.test(message) && /\.parquet/i.test(message);
2163
+ }
2164
+
2165
+ // src/services/analytics.ts
2166
+ var ANALYTICS_REPORTS = ["sessions", "tools", "errors", "models", "projects"];
2167
+ async function runAnalyticsReport(options) {
2168
+ return queryDuckDbParquet({
2169
+ parquetDir: options.parquetDir,
2170
+ sql: buildAnalyticsSql(options.report, options.filters ?? {}, "duckdb")
2171
+ });
2172
+ }
2173
+ function runAnalyticsReportFromBundle(options) {
2174
+ const sql = buildAnalyticsSql(options.report, options.filters ?? {}, "sqlite");
2175
+ const stmt = options.bundle.db.prepare(sql);
2176
+ const rows = stmt.all();
2177
+ const columns = stmt.columns().map((column) => column.name);
2178
+ return { columns, rows };
2179
+ }
2180
+ function buildAnalyticsSql(report, filters, dialect) {
2181
+ switch (report) {
2182
+ case "sessions":
2183
+ return buildSessionsSql(filters, dialect);
2184
+ case "tools":
2185
+ return buildToolsSql(filters, dialect);
2186
+ case "errors":
2187
+ return buildErrorsSql(filters, dialect);
2188
+ case "models":
2189
+ return buildModelsSql(filters, dialect);
2190
+ case "projects":
2191
+ return buildProjectsSql(filters, dialect);
2192
+ }
2193
+ }
2194
+ function buildSessionsSql(filters, dialect) {
2195
+ const where = buildWhere([
2196
+ sourceFilter(filters),
2197
+ timeFilter("start_ts", filters),
2198
+ projectFilter(filters, dialect),
2199
+ filters.sessionId ? `session_id = ${sqlString2(filters.sessionId)}` : null,
2200
+ filters.sourcePathSubstring ? `source_file_path LIKE ${sqlString2(`%${escapeLike(filters.sourcePathSubstring)}%`)} ESCAPE '\\'` : null
2201
+ ]);
2202
+ return `
2203
+ SELECT start_ts, source_tool, project_name, source_file_path, session_id,
2204
+ source_session_id, model_last, duration_seconds,
2205
+ message_count, tool_call_count, tool_result_count, tool_error_count,
2206
+ tool_duration_ms, timeline_confidence, title
2207
+ FROM session_facts
2208
+ ${where}
2209
+ ORDER BY start_ts DESC NULLS LAST
2210
+ LIMIT ${limit(filters)}
2211
+ `;
2212
+ }
2213
+ function buildToolsSql(filters, dialect) {
2214
+ const where = buildWhere([
2215
+ sourceFilter(filters),
2216
+ timeFilter("timestamp_start", filters),
2217
+ projectFilter(filters, dialect),
2218
+ filters.toolName ? `tool_name = ${sqlString2(filters.toolName)}` : null,
2219
+ filters.canonicalType ? `canonical_tool_type = ${sqlString2(filters.canonicalType)}` : null,
2220
+ filters.errorsOnly ? `(is_error = 1 OR call_status = 'error')` : null
2221
+ ]);
2222
+ return `
2223
+ SELECT tool_name, canonical_tool_type, source_tool, project_name,
2224
+ count(*) AS call_count,
2225
+ sum(CASE WHEN is_error = 1 OR call_status = 'error' THEN 1 ELSE 0 END) AS error_count,
2226
+ round(avg(result_duration_ms), 3) AS avg_result_duration_ms,
2227
+ max(timestamp_start) AS latest_ts
2228
+ FROM tool_usage_facts
2229
+ ${where}
2230
+ GROUP BY tool_name, canonical_tool_type, source_tool, project_name
2231
+ ORDER BY call_count DESC, error_count DESC, tool_name ASC
2232
+ LIMIT ${limit(filters)}
2233
+ `;
2234
+ }
2235
+ function buildErrorsSql(filters, dialect) {
2236
+ const where = buildWhere([
2237
+ sourceFilter(filters),
2238
+ timeFilter("timestamp", filters),
2239
+ projectFilter(filters, dialect),
2240
+ filters.toolName ? `tool_name = ${sqlString2(filters.toolName)}` : null,
2241
+ filters.category ? `error_category = ${sqlString2(filters.category)}` : null
2242
+ ]);
2243
+ return `
2244
+ SELECT timestamp, error_category, source_tool, project_name, session_id,
2245
+ tool_name, status, exit_code, message, preview
2246
+ FROM error_facts
2247
+ ${where}
2248
+ ORDER BY timestamp DESC NULLS LAST, error_id DESC
2249
+ LIMIT ${limit(filters)}
2250
+ `;
2251
+ }
2252
+ function buildModelsSql(filters, dialect) {
2253
+ const where = buildWhere([
2254
+ sourceFilter(filters),
2255
+ rangeOverlapFilter("first_seen_ts", "last_seen_ts", filters),
2256
+ projectFilter(filters, dialect),
2257
+ filters.model ? `model = ${sqlString2(filters.model)}` : null
2258
+ ]);
2259
+ return `
2260
+ SELECT model, source_tool, project_name, session_count, turn_count,
2261
+ message_count, observation_count, first_seen_ts, last_seen_ts
2262
+ FROM model_usage
2263
+ ${where}
2264
+ ORDER BY session_count DESC, observation_count DESC, model ASC
2265
+ LIMIT ${limit(filters)}
2266
+ `;
2267
+ }
2268
+ function buildProjectsSql(filters, dialect) {
2269
+ const where = buildWhere([
2270
+ sourceFilter(filters),
2271
+ rangeOverlapFilter("first_session_ts", "latest_session_ts", filters),
2272
+ projectFilter(filters, dialect)
2273
+ ]);
2274
+ return `
2275
+ SELECT latest_session_ts, source_tool, project_name, project_path,
2276
+ session_count, message_count, tool_call_count, tool_error_count,
2277
+ low_confidence_session_count
2278
+ FROM project_activity
2279
+ ${where}
2280
+ ORDER BY latest_session_ts DESC NULLS LAST, session_count DESC, project_name ASC
2281
+ LIMIT ${limit(filters)}
2282
+ `;
2283
+ }
2284
+ function sourceFilter(filters) {
2285
+ return filters.source ? `source_tool = ${sqlString2(filters.source)}` : null;
2286
+ }
2287
+ function timeFilter(column, filters) {
2288
+ const filtersSql = [];
2289
+ if (filters.since)
2290
+ filtersSql.push(`(${column} IS NULL OR ${column} >= ${sqlString2(filters.since)})`);
2291
+ if (filters.until)
2292
+ filtersSql.push(`(${column} IS NULL OR ${column} < ${sqlString2(filters.until)})`);
2293
+ return filtersSql.length ? filtersSql.join(" AND ") : null;
2294
+ }
2295
+ function rangeOverlapFilter(firstColumn, lastColumn, filters) {
2296
+ const filtersSql = [];
2297
+ if (filters.since) {
2298
+ filtersSql.push(`(${lastColumn} IS NULL OR ${lastColumn} >= ${sqlString2(filters.since)})`);
2299
+ }
2300
+ if (filters.until) {
2301
+ filtersSql.push(`(${firstColumn} IS NULL OR ${firstColumn} < ${sqlString2(filters.until)})`);
2302
+ }
2303
+ return filtersSql.length ? filtersSql.join(" AND ") : null;
2304
+ }
2305
+ function projectFilter(filters, dialect) {
2306
+ if (!filters.project) return null;
2307
+ const exact = sqlString2(filters.project);
2308
+ const like = sqlString2(`%${escapeLike(filters.project)}%`);
2309
+ const op = dialect === "duckdb" ? "ILIKE" : "LIKE";
2310
+ return `(project_id = ${exact} OR project_name ${op} ${like} ESCAPE '\\' OR project_path ${op} ${like} ESCAPE '\\')`;
2311
+ }
2312
+ function buildWhere(filters) {
2313
+ const active = filters.filter((filter) => Boolean(filter));
2314
+ return active.length ? `WHERE ${active.join(" AND ")}` : "";
2315
+ }
2316
+ function limit(filters) {
2317
+ const value = Number.isFinite(filters.limit) ? filters.limit : void 0;
2318
+ return clampLimit(value, { max: 500, fallback: 50 });
2319
+ }
2320
+ function sqlString2(value) {
2321
+ return `'${value.replace(/'/g, "''")}'`;
2322
+ }
2323
+ function escapeLike(value) {
2324
+ return value.replace(/[\\%_]/g, (match) => `\\${match}`);
2325
+ }
2326
+
2327
+ // src/cli/bundle.ts
2328
+ import path3 from "path";
2329
+ async function withBundle(storePath, fn) {
2330
+ const bundle = await openBundle(path3.resolve(storePath));
2331
+ try {
2332
+ return await fn(bundle);
2333
+ } finally {
2334
+ closeBundle(bundle);
2335
+ }
2336
+ }
2337
+
2338
+ // src/cli/output.ts
2339
+ var OUTPUT_FORMATS = ["interactive", "table", "json", "csv"];
2340
+ var COL_SEPARATOR = " ";
2341
+ var RULE_CHAR = "-";
2342
+ function parseOutputFormat(value, fallback) {
2343
+ if (value === void 0) return fallback;
2344
+ if (OUTPUT_FORMATS.includes(value)) return value;
2345
+ throw new Error(
2346
+ `invalid --output-format: ${value} (expected one of ${OUTPUT_FORMATS.join(", ")})`
2347
+ );
2348
+ }
2349
+ function printRows(rows, opts) {
2350
+ switch (opts.format) {
2351
+ case "json":
2352
+ printJson(rows, opts);
2353
+ return;
2354
+ case "csv":
2355
+ printCsv(rows, opts);
2356
+ return;
2357
+ case "table":
2358
+ case "interactive":
2359
+ printTable(rows, opts);
2360
+ return;
2361
+ }
2362
+ }
2363
+ function printJson(rows, opts) {
2364
+ const out = opts.meta ? { ...opts.meta, rows } : rows;
2365
+ process.stdout.write(`${JSON.stringify(out, null, 2)}
2366
+ `);
2367
+ }
2368
+ function printCsv(rows, opts) {
2369
+ const columns = opts.columns;
2370
+ process.stdout.write(`${columns.map(csvField).join(",")}
2371
+ `);
2372
+ for (const row of rows) {
2373
+ const record = row;
2374
+ const line = columns.map((column) => csvField(formatCell(record[column]))).join(",");
2375
+ process.stdout.write(`${line}
2376
+ `);
2377
+ }
2378
+ }
2379
+ function csvField(value) {
2380
+ if (/[",\n]/.test(value)) return `"${value.replace(/"/g, '""')}"`;
2381
+ return value;
2382
+ }
2383
+ function printTable(rows, opts) {
2384
+ const columns = opts.columns;
2385
+ const widths = columns.map((column) => column.length);
2386
+ const cells = rows.map((row) => {
2387
+ const record = row;
2388
+ return columns.map((column, index) => {
2389
+ const text = formatCell(record[column]);
2390
+ const width = widths[index] ?? 0;
2391
+ if (text.length > width) widths[index] = text.length;
2392
+ return text;
2393
+ });
2394
+ });
2395
+ const header = columns.map((column, index) => column.padEnd(widths[index] ?? 0)).join(COL_SEPARATOR);
2396
+ const rule = columns.map((_, index) => RULE_CHAR.repeat(widths[index] ?? 0)).join(COL_SEPARATOR);
2397
+ process.stdout.write(`${header}
2398
+ ${rule}
2399
+ `);
2400
+ for (const cellRow of cells) {
2401
+ const line = cellRow.map((cell, index) => cell.padEnd(widths[index] ?? 0)).join(COL_SEPARATOR);
2402
+ process.stdout.write(`${line}
2403
+ `);
2404
+ }
2405
+ }
2406
+ function formatCell(value) {
2407
+ if (value == null) return "";
2408
+ if (typeof value === "string") return value;
2409
+ if (typeof value === "number" || typeof value === "boolean") return String(value);
2410
+ return JSON.stringify(value);
2411
+ }
2412
+
2413
+ // src/core/domain/types.ts
2414
+ var SOURCE_TOOLS = ["cursor", "codex", "claude", "gemini"];
2415
+
2416
+ // src/cli/parsers.ts
2417
+ function parseSearchEngine(value) {
2418
+ if (value === "fts5" || value === "tantivy") return value;
2419
+ throw new Error(`invalid search engine: ${value} (expected fts5 or tantivy)`);
2420
+ }
2421
+ function parseMcpTransport(value) {
2422
+ if (value === "stdio" || value === "http") return value;
2423
+ throw new Error(`invalid transport: ${value} (expected stdio or http)`);
2424
+ }
2425
+ function parseSourceTool(value) {
2426
+ if (value === void 0) return void 0;
2427
+ if (SOURCE_TOOLS.includes(value)) return value;
2428
+ throw new Error(`invalid source tool: ${value} (expected one of ${SOURCE_TOOLS.join(", ")})`);
2429
+ }
2430
+
2431
+ // src/cli/commands/analytics.ts
2432
+ function analyticsCommand() {
2433
+ const command = new Command("analytics").description(
2434
+ "Run high-level analytics reports over exported Parquet files."
2435
+ );
2436
+ command.addCommand(reportCommand("sessions", "Summarize sessions by source, project and model."));
2437
+ command.addCommand(reportCommand("tools", "Summarize tool usage, status, duration and errors."));
2438
+ command.addCommand(
2439
+ reportCommand("errors", "List import errors, failed tool results and uncertainties.")
2440
+ );
2441
+ command.addCommand(reportCommand("models", "Summarize model usage by source, project and time."));
2442
+ command.addCommand(
2443
+ reportCommand("projects", "Summarize project activity and operational counts.")
2444
+ );
2445
+ return command;
2446
+ }
2447
+ function reportCommand(report, description) {
2448
+ const command = addCommonOptions(new Command(report).description(description));
2449
+ if (report === "tools") {
2450
+ command.option("--tool-name <name>", "filter by exact tool name").option("--canonical-type <type>", "filter by canonical tool type").option("--errors-only", "only include tool calls with errors");
2451
+ }
2452
+ if (report === "errors") {
2453
+ command.option("--tool-name <name>", "filter by exact tool name").option("--category <category>", "filter by error category");
2454
+ }
2455
+ if (report === "models") {
2456
+ command.option("--model <model>", "filter by exact model name");
2457
+ }
2458
+ if (report === "projects") {
2459
+ command.option("--project <text>", "filter by project id, name, or path substring");
2460
+ }
2461
+ if (report === "sessions") {
2462
+ command.option("--project <text>", "filter by project id, name, or path substring");
2463
+ }
2464
+ return command.action(async (options) => {
2465
+ const format = parseOutputFormat(options.outputFormat, "table");
2466
+ const parquetDir = await resolveParquetDir(options);
2467
+ const filters = buildFilters(options);
2468
+ const result = await runAnalyticsReport({ parquetDir, report, filters });
2469
+ printRows(result.rows, {
2470
+ format,
2471
+ columns: result.columns,
2472
+ meta: { report, count: result.rows.length }
2473
+ });
2474
+ });
2475
+ }
2476
+ function addCommonOptions(command) {
2477
+ return command.option("--store <path>", "bundle directory", defaultBundlePath()).option("--parquet-dir <path>", "Parquet directory (default: <store>/parquet)").option("--refresh", "export Parquet before running the report").option("--source <tool>", "filter by source tool: cursor|codex|claude|gemini").option("--since <iso>", "lower timestamp bound (inclusive)").option("--until <iso>", "upper timestamp bound (exclusive)").option("--limit <n>", "maximum rows", "50").option("--output-format <fmt>", "interactive|table|json|csv", "table");
2478
+ }
2479
+ async function resolveParquetDir(options) {
2480
+ const storePath = path4.resolve(options.store);
2481
+ const outDir = options.parquetDir ? path4.resolve(options.parquetDir) : void 0;
2482
+ if (options.refresh) {
2483
+ const result = await exportBundleParquet({ bundlePath: storePath, outDir });
2484
+ return result.outDir;
2485
+ }
2486
+ return outDir ?? await withBundle(storePath, (bundle) => bundle.paths.parquet);
2487
+ }
2488
+ function buildFilters(options) {
2489
+ return {
2490
+ source: parseSourceTool(options.source),
2491
+ since: options.since,
2492
+ until: options.until,
2493
+ limit: Number.parseInt(options.limit, 10),
2494
+ toolName: options.toolName,
2495
+ canonicalType: options.canonicalType,
2496
+ errorsOnly: options.errorsOnly,
2497
+ category: options.category,
2498
+ model: options.model,
2499
+ project: options.project
2500
+ };
2501
+ }
2502
+
2503
+ // src/cli/commands/compile.ts
2504
+ import { Command as Command2 } from "commander";
2505
+
2506
+ // src/services/compile.ts
2507
+ init_errors();
2508
+ import os2 from "os";
2509
+ import path16 from "path";
2510
+
2511
+ // src/importers/claude/index.ts
2512
+ import { readFile as readFile4 } from "fs/promises";
2513
+ import path8 from "path";
2514
+
2515
+ // src/core/cas/index.ts
2516
+ init_db();
2517
+ import { mkdir as mkdir3, readFile as readFile2, writeFile as writeFile3 } from "fs/promises";
2518
+ import path5 from "path";
2519
+
2520
+ // src/core/cas/compress.ts
2521
+ import { compress as zstdCompress, decompress as zstdDecompress } from "zstd-napi";
2522
+ var COMPRESS_THRESHOLD_BYTES = 256;
2523
+ var ZSTD_LEVEL = 3;
2524
+ function compressBytes(input) {
2525
+ if (input.byteLength < COMPRESS_THRESHOLD_BYTES) {
2526
+ return { bytes: Buffer.from(input), compression: "none" };
2527
+ }
2528
+ const out = zstdCompress(Buffer.from(input), { compressionLevel: ZSTD_LEVEL });
2529
+ return { bytes: out, compression: "zstd" };
2530
+ }
2531
+ function decompressBytes(input, compression) {
2532
+ if (compression === "none") return input;
2533
+ return zstdDecompress(input);
2534
+ }
2535
+
2536
+ // src/core/cas/hash.ts
2537
+ import { createHash } from "crypto";
2538
+ import { blake3 } from "@noble/hashes/blake3";
2539
+ import { bytesToHex } from "@noble/hashes/utils";
2540
+ function blake3Hex(bytes) {
2541
+ return bytesToHex(blake3(bytes));
2542
+ }
2543
+ function sha256Hex(bytes) {
2544
+ return createHash("sha256").update(bytes).digest("hex");
2545
+ }
2546
+ function objectIdFromHash(hashHex) {
2547
+ return `blake3:${hashHex}`;
2548
+ }
2549
+ function objectStoragePath(hashHex, compression) {
2550
+ const ext = compression === "zstd" ? ".zst" : ".bin";
2551
+ const a = hashHex.slice(0, 2);
2552
+ const b = hashHex.slice(2, 4);
2553
+ return `objects/blake3/${a}/${b}/${hashHex}${ext}`;
2554
+ }
2555
+
2556
+ // src/core/cas/index.ts
2557
+ var ensuredDirs = /* @__PURE__ */ new Set();
2558
+ async function ensureDir(absoluteDir) {
2559
+ if (ensuredDirs.has(absoluteDir)) return;
2560
+ await mkdir3(absoluteDir, { recursive: true });
2561
+ ensuredDirs.add(absoluteDir);
2562
+ }
2563
+ async function putBytes(bundle, bytes, options = {}) {
2564
+ const hash = blake3Hex(bytes);
2565
+ const objectId = objectIdFromHash(hash);
2566
+ const existing = prepare(
2567
+ bundle.db,
2568
+ `SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
2569
+ compression, mime_type, encoding, storage_path, created_at
2570
+ FROM objects WHERE object_id = ?`
2571
+ ).get(objectId);
2572
+ if (existing) return objectId;
2573
+ const { bytes: stored, compression } = compressBytes(bytes);
2574
+ const storagePath = objectStoragePath(hash, compression);
2575
+ const absolutePath = path5.join(bundle.path, storagePath);
2576
+ await ensureDir(path5.dirname(absolutePath));
2577
+ await writeFile3(absolutePath, stored);
2578
+ prepare(
2579
+ bundle.db,
2580
+ `INSERT INTO objects (
2581
+ object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
2582
+ compression, mime_type, encoding, storage_path, created_at
2583
+ ) VALUES (?, 'blake3', ?, ?, ?, ?, ?, ?, ?, ?)`
2584
+ ).run(
2585
+ objectId,
2586
+ hash,
2587
+ bytes.byteLength,
2588
+ compression === "zstd" ? stored.byteLength : null,
2589
+ compression,
2590
+ options.mimeType ?? null,
2591
+ options.encoding ?? null,
2592
+ storagePath,
2593
+ (/* @__PURE__ */ new Date()).toISOString()
2594
+ );
2595
+ return objectId;
2596
+ }
2597
+ async function putJson(bundle, value) {
2598
+ const text = JSON.stringify(value);
2599
+ return putBytes(bundle, Buffer.from(text, "utf8"), {
2600
+ mimeType: "application/json",
2601
+ encoding: "utf-8"
2602
+ });
2603
+ }
2604
+ async function getBytes(bundle, objectId) {
2605
+ const meta = prepare(
2606
+ bundle.db,
2607
+ `SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
2608
+ compression, mime_type, encoding, storage_path, created_at
2609
+ FROM objects WHERE object_id = ?`
2610
+ ).get(objectId);
2611
+ if (!meta) {
2612
+ throw new Error(`object not found: ${objectId}`);
2613
+ }
2614
+ const buf = await readFile2(path5.join(bundle.path, meta.storage_path));
2615
+ return decompressBytes(buf, meta.compression);
2616
+ }
2617
+ async function getText(bundle, objectId) {
2618
+ const buf = await getBytes(bundle, objectId);
2619
+ return buf.toString("utf8");
2620
+ }
2621
+ function createPendingObjects() {
2622
+ return { byId: /* @__PURE__ */ new Map() };
2623
+ }
2624
+ function stageBytes(pending, bytes, options = {}) {
2625
+ const buf = Buffer.isBuffer(bytes) ? bytes : Buffer.from(bytes);
2626
+ const hash = blake3Hex(buf);
2627
+ const objectId = objectIdFromHash(hash);
2628
+ if (!pending.byId.has(objectId)) {
2629
+ pending.byId.set(objectId, {
2630
+ objectId,
2631
+ hash,
2632
+ bytes: buf,
2633
+ mimeType: options.mimeType ?? null,
2634
+ encoding: options.encoding ?? null
2635
+ });
2636
+ }
2637
+ return objectId;
2638
+ }
2639
+ function stageText(pending, text, options = {}) {
2640
+ return stageBytes(pending, Buffer.from(text, "utf8"), {
2641
+ mimeType: options.mimeType ?? "text/plain; charset=utf-8",
2642
+ encoding: "utf-8"
2643
+ });
2644
+ }
2645
+ function stageJson(pending, value) {
2646
+ return stageBytes(pending, Buffer.from(JSON.stringify(value), "utf8"), {
2647
+ mimeType: "application/json",
2648
+ encoding: "utf-8"
2649
+ });
2650
+ }
2651
+ async function flushPendingObjects(bundle, pending) {
2652
+ if (pending.byId.size === 0) return;
2653
+ const ids = [...pending.byId.keys()];
2654
+ const existingIds = queryExistingObjectIds(bundle, ids);
2655
+ const toWrite = [];
2656
+ for (const obj of pending.byId.values()) {
2657
+ if (existingIds.has(obj.objectId)) continue;
2658
+ const { bytes: compressedBytes, compression } = compressBytes(obj.bytes);
2659
+ const storagePath = objectStoragePath(obj.hash, compression);
2660
+ toWrite.push({
2661
+ staged: obj,
2662
+ compression,
2663
+ compressedBytes,
2664
+ storagePath,
2665
+ absolutePath: path5.join(bundle.path, storagePath)
2666
+ });
2667
+ }
2668
+ if (toWrite.length > 0) {
2669
+ await writeFilesParallel(toWrite);
2670
+ }
2671
+ const insertObject = prepare(
2672
+ bundle.db,
2673
+ `INSERT OR IGNORE INTO objects (
2674
+ object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
2675
+ compression, mime_type, encoding, storage_path, created_at
2676
+ ) VALUES (?, 'blake3', ?, ?, ?, ?, ?, ?, ?, ?)`
2677
+ );
2678
+ const now = (/* @__PURE__ */ new Date()).toISOString();
2679
+ for (const p of toWrite) {
2680
+ insertObject.run(
2681
+ p.staged.objectId,
2682
+ p.staged.hash,
2683
+ p.staged.bytes.byteLength,
2684
+ p.compression === "zstd" ? p.compressedBytes.byteLength : null,
2685
+ p.compression,
2686
+ p.staged.mimeType,
2687
+ p.staged.encoding,
2688
+ p.storagePath,
2689
+ now
2690
+ );
2691
+ }
1644
2692
  }
1645
- async function openOrInitBundle(rootPath) {
1646
- const resolved = path.resolve(rootPath);
1647
- const paths = bundlePaths(resolved);
1648
- const dirStat = await stat(resolved).catch(() => null);
1649
- if (dirStat && !dirStat.isDirectory()) {
1650
- throw new Error(`bundle path not found or not a directory: ${resolved}`);
1651
- }
1652
- if (!dirStat || !await exists(paths.manifest)) {
1653
- return await initBundle(resolved);
2693
+ function queryExistingObjectIds(bundle, ids) {
2694
+ const found = /* @__PURE__ */ new Set();
2695
+ if (ids.length === 0) return found;
2696
+ const CHUNK = 500;
2697
+ for (let start = 0; start < ids.length; start += CHUNK) {
2698
+ const slice = ids.slice(start, start + CHUNK);
2699
+ const placeholders = slice.map(() => "?").join(",");
2700
+ const rows = bundle.db.prepare(
2701
+ `SELECT object_id FROM objects WHERE object_id IN (${placeholders})`
2702
+ ).all(...slice);
2703
+ for (const row of rows) found.add(row.object_id);
1654
2704
  }
1655
- return await openBundle(resolved);
2705
+ return found;
1656
2706
  }
1657
- function closeBundle(bundle) {
1658
- closeDb(bundle.db);
2707
+ var FS_WRITE_CONCURRENCY = 16;
2708
+ async function writeFilesParallel(tasks) {
2709
+ let cursor = 0;
2710
+ const workers = [];
2711
+ const limit2 = Math.min(FS_WRITE_CONCURRENCY, tasks.length);
2712
+ for (let w = 0; w < limit2; w++) {
2713
+ workers.push(
2714
+ (async () => {
2715
+ while (true) {
2716
+ const i = cursor++;
2717
+ if (i >= tasks.length) return;
2718
+ const task = tasks[i];
2719
+ await ensureDir(path5.dirname(task.absolutePath));
2720
+ await writeFile3(task.absolutePath, task.compressedBytes);
2721
+ }
2722
+ })()
2723
+ );
2724
+ }
2725
+ await Promise.all(workers);
1659
2726
  }
1660
2727
 
1661
- // src/services/compile.ts
1662
- init_errors();
1663
- import os2 from "os";
1664
- import path14 from "path";
1665
-
1666
2728
  // src/importers/claude/index.ts
1667
- init_cas();
1668
2729
  init_db();
1669
- import { readFile as readFile4 } from "fs/promises";
1670
- import path5 from "path";
1671
2730
 
1672
2731
  // src/core/domain/ids.ts
1673
- init_hash();
1674
2732
  var ID_PREFIX_BYTES = 16;
1675
2733
  function tupleId(parts) {
1676
2734
  return sha256Hex(parts.join("\0")).slice(0, ID_PREFIX_BYTES * 2);
@@ -1716,7 +2774,6 @@ function importBatchId(sourceTool, startedAtIso) {
1716
2774
  init_errors();
1717
2775
 
1718
2776
  // src/core/ingest/batch.ts
1719
- init_cas();
1720
2777
  init_db();
1721
2778
  function emptyCounts() {
1722
2779
  return {
@@ -1784,12 +2841,9 @@ async function recordError(bundle, batchId, args) {
1784
2841
  }
1785
2842
 
1786
2843
  // src/core/ingest/idempotency.ts
1787
- init_compress();
1788
- init_hash();
1789
- init_cas();
2844
+ import { access as access2, readFile as readFile3, stat as stat2, writeFile as writeFile4 } from "fs/promises";
2845
+ import path6 from "path";
1790
2846
  init_db();
1791
- import { access as access2, readFile as readFile3, stat as stat2, writeFile as writeFile3 } from "fs/promises";
1792
- import path3 from "path";
1793
2847
  async function registerSourceFile(bundle, args) {
1794
2848
  const st = await stat2(args.absolutePath);
1795
2849
  const size = st.size;
@@ -1873,10 +2927,10 @@ async function preserveRawSourceBytes(bundle, bytes) {
1873
2927
  const objectId = objectIdFromHash(hash);
1874
2928
  const { bytes: stored, compression } = compressBytes(bytes);
1875
2929
  const storagePath = rawSourceStoragePath(hash, compression);
1876
- const absolutePath = path3.join(bundle.path, storagePath);
1877
- await ensureDir(path3.dirname(absolutePath));
2930
+ const absolutePath = path6.join(bundle.path, storagePath);
2931
+ await ensureDir(path6.dirname(absolutePath));
1878
2932
  if (!await fileExists(absolutePath)) {
1879
- await writeFile3(absolutePath, stored);
2933
+ await writeFile4(absolutePath, stored);
1880
2934
  }
1881
2935
  const existing = prepare(
1882
2936
  bundle.db,
@@ -1918,12 +2972,12 @@ async function fileExists(filePath) {
1918
2972
 
1919
2973
  // src/importers/claude/discover.ts
1920
2974
  import { readdir } from "fs/promises";
1921
- import path4 from "path";
2975
+ import path7 from "path";
1922
2976
  async function* discoverClaudeFiles(root) {
1923
2977
  const projectDirs = await readdirSafe(root);
1924
2978
  for (const project of projectDirs) {
1925
2979
  if (!project.isDirectory()) continue;
1926
- const projectRoot = path4.join(root, project.name);
2980
+ const projectRoot = path7.join(root, project.name);
1927
2981
  yield* walkProject(projectRoot, project.name);
1928
2982
  }
1929
2983
  }
@@ -1932,7 +2986,7 @@ async function* walkProject(projectRoot, projectSlug) {
1932
2986
  for (const entry of entries) {
1933
2987
  if (entry.isFile() && entry.name.endsWith(".jsonl")) {
1934
2988
  yield {
1935
- filePath: path4.join(projectRoot, entry.name),
2989
+ filePath: path7.join(projectRoot, entry.name),
1936
2990
  projectSlug,
1937
2991
  isSubagent: false,
1938
2992
  parentSessionId: null,
@@ -1942,18 +2996,18 @@ async function* walkProject(projectRoot, projectSlug) {
1942
2996
  continue;
1943
2997
  }
1944
2998
  if (entry.isDirectory()) {
1945
- const subagentsDir = path4.join(projectRoot, entry.name, "subagents");
2999
+ const subagentsDir = path7.join(projectRoot, entry.name, "subagents");
1946
3000
  const subagentEntries = await readdirSafe(subagentsDir);
1947
3001
  for (const sub of subagentEntries) {
1948
3002
  if (!sub.isFile() || !sub.name.endsWith(".jsonl")) continue;
1949
3003
  if (!sub.name.startsWith("agent-")) continue;
1950
3004
  const agentId = sub.name.slice("agent-".length, -".jsonl".length);
1951
- const metaCandidate = path4.join(subagentsDir, `agent-${agentId}.meta.json`);
3005
+ const metaCandidate = path7.join(subagentsDir, `agent-${agentId}.meta.json`);
1952
3006
  const metaExists = subagentEntries.some(
1953
3007
  (e) => e.isFile() && e.name === `agent-${agentId}.meta.json`
1954
3008
  );
1955
3009
  yield {
1956
- filePath: path4.join(subagentsDir, sub.name),
3010
+ filePath: path7.join(subagentsDir, sub.name),
1957
3011
  projectSlug,
1958
3012
  isSubagent: true,
1959
3013
  parentSessionId: entry.name,
@@ -2073,7 +3127,7 @@ async function compileClaudeFile(bundle, batch, file, logger) {
2073
3127
  const counts = emptyFileCounts();
2074
3128
  const { row: sourceFile, alreadyKnown } = await registerSourceFile(bundle, {
2075
3129
  sourceTool: "claude",
2076
- absolutePath: path5.resolve(file.filePath),
3130
+ absolutePath: path8.resolve(file.filePath),
2077
3131
  fileKind: "jsonl",
2078
3132
  workspaceHint: file.projectSlug
2079
3133
  });
@@ -2173,7 +3227,7 @@ async function compileClaudeFile(bundle, batch, file, logger) {
2173
3227
  pending.session.parent_session_id_pending = parentSid;
2174
3228
  }
2175
3229
  }
2176
- const sessionId2 = pending.session?.session_id ?? sessionId("claude", `unknown:${path5.basename(file.filePath)}`);
3230
+ const sessionId2 = pending.session?.session_id ?? sessionId("claude", `unknown:${path8.basename(file.filePath)}`);
2177
3231
  const type = typeof parsed.type === "string" ? parsed.type : null;
2178
3232
  if (type === "user" || type === "assistant") {
2179
3233
  const msgRole = type === "user" ? "user" : "assistant";
@@ -2915,15 +3969,14 @@ function flushPending(bundle, pending, meta) {
2915
3969
  }
2916
3970
 
2917
3971
  // src/importers/codex/index.ts
2918
- init_cas();
2919
- init_db();
2920
3972
  import { readFile as readFile5 } from "fs/promises";
2921
- import path7 from "path";
3973
+ import path10 from "path";
3974
+ init_db();
2922
3975
  init_errors();
2923
3976
 
2924
3977
  // src/importers/codex/discover.ts
2925
3978
  import { readdir as readdir2 } from "fs/promises";
2926
- import path6 from "path";
3979
+ import path9 from "path";
2927
3980
  async function* discoverCodexSessions(root) {
2928
3981
  yield* walk(root);
2929
3982
  }
@@ -2935,7 +3988,7 @@ async function* walk(dir) {
2935
3988
  return;
2936
3989
  }
2937
3990
  for (const entry of entries) {
2938
- const full = path6.join(dir, entry.name);
3991
+ const full = path9.join(dir, entry.name);
2939
3992
  if (entry.isDirectory()) {
2940
3993
  yield* walk(full);
2941
3994
  } else if (entry.isFile() && entry.name.endsWith(".jsonl")) {
@@ -3037,7 +4090,7 @@ async function compileCodexFile(bundle, batch, filePath, logger) {
3037
4090
  const counts = emptyFileCounts2();
3038
4091
  const { row: sourceFileRow, alreadyKnown } = await registerSourceFile(bundle, {
3039
4092
  sourceTool: "codex",
3040
- absolutePath: path7.resolve(filePath),
4093
+ absolutePath: path10.resolve(filePath),
3041
4094
  fileKind: "jsonl"
3042
4095
  });
3043
4096
  if (alreadyKnown) {
@@ -3123,7 +4176,7 @@ async function compileCodexFile(bundle, batch, filePath, logger) {
3123
4176
  const payload = parsed.payload ?? {};
3124
4177
  if (type === "session_meta") {
3125
4178
  const meta = payload;
3126
- const sourceSessionId = meta.id ?? path7.basename(filePath, ".jsonl");
4179
+ const sourceSessionId = meta.id ?? path10.basename(filePath, ".jsonl");
3127
4180
  const sessionId3 = sessionId("codex", sourceSessionId);
3128
4181
  if (!pending.session) {
3129
4182
  const sub = parseSubagent(meta.source);
@@ -3155,11 +4208,11 @@ async function compileCodexFile(bundle, batch, filePath, logger) {
3155
4208
  }
3156
4209
  continue;
3157
4210
  }
3158
- const sessionId2 = pending.session?.session_id ?? sessionId("codex", path7.basename(filePath, ".jsonl"));
4211
+ const sessionId2 = pending.session?.session_id ?? sessionId("codex", path10.basename(filePath, ".jsonl"));
3159
4212
  if (!pending.session) {
3160
4213
  pending.session = {
3161
4214
  session_id: sessionId2,
3162
- source_session_id: path7.basename(filePath, ".jsonl"),
4215
+ source_session_id: path10.basename(filePath, ".jsonl"),
3163
4216
  parent_session_id: null,
3164
4217
  is_subagent: 0,
3165
4218
  agent_role: null,
@@ -4056,25 +5109,24 @@ function flushPending2(bundle, pending, meta) {
4056
5109
  }
4057
5110
 
4058
5111
  // src/importers/cursor/index.ts
4059
- init_cas();
4060
- init_db();
4061
- import path9 from "path";
5112
+ import path12 from "path";
4062
5113
  import Database2 from "better-sqlite3";
5114
+ init_db();
4063
5115
  init_errors();
4064
5116
 
4065
5117
  // src/importers/cursor/discover.ts
4066
5118
  import { readdir as readdir3 } from "fs/promises";
4067
- import path8 from "path";
5119
+ import path11 from "path";
4068
5120
  async function* discoverCursorStores(root) {
4069
5121
  const workspaces = await readdirSafe2(root);
4070
5122
  for (const ws of workspaces) {
4071
5123
  if (!ws.isDirectory()) continue;
4072
- const wsPath = path8.join(root, ws.name);
5124
+ const wsPath = path11.join(root, ws.name);
4073
5125
  const agents = await readdirSafe2(wsPath);
4074
5126
  for (const ag of agents) {
4075
5127
  if (!ag.isDirectory()) continue;
4076
- const dbPath = path8.join(wsPath, ag.name, "store.db");
4077
- const dbEntries = await readdirSafe2(path8.join(wsPath, ag.name));
5128
+ const dbPath = path11.join(wsPath, ag.name, "store.db");
5129
+ const dbEntries = await readdirSafe2(path11.join(wsPath, ag.name));
4078
5130
  const hasStoreDb = dbEntries.some((e) => e.isFile() && e.name === "store.db");
4079
5131
  if (!hasStoreDb) continue;
4080
5132
  yield {
@@ -4173,7 +5225,7 @@ async function compileCursorStore(bundle, batch, store, logger) {
4173
5225
  const counts = emptyFileCounts3();
4174
5226
  const { row: sourceFile, alreadyKnown } = await registerSourceFile(bundle, {
4175
5227
  sourceTool: "cursor",
4176
- absolutePath: path9.resolve(store.filePath),
5228
+ absolutePath: path12.resolve(store.filePath),
4177
5229
  fileKind: "sqlite",
4178
5230
  workspaceHint: store.workspaceId
4179
5231
  });
@@ -4775,29 +5827,27 @@ function flushPending3(bundle, pending) {
4775
5827
  }
4776
5828
 
4777
5829
  // src/importers/gemini/index.ts
4778
- init_hash();
4779
- init_cas();
4780
- init_db();
4781
5830
  import { readFile as readFile7 } from "fs/promises";
4782
- import path11 from "path";
5831
+ import path14 from "path";
5832
+ init_db();
4783
5833
  init_errors();
4784
5834
 
4785
5835
  // src/importers/gemini/discover.ts
4786
5836
  import { readFile as readFile6, readdir as readdir4 } from "fs/promises";
4787
- import path10 from "path";
5837
+ import path13 from "path";
4788
5838
  async function* discoverGeminiChats(root) {
4789
5839
  const entries = await readdirSafe3(root);
4790
5840
  for (const entry of entries) {
4791
5841
  if (!entry.isDirectory()) continue;
4792
5842
  if (entry.name === "bin") continue;
4793
- const projectRoot = await readProjectRoot(path10.join(root, entry.name));
4794
- const chatsDir = path10.join(root, entry.name, "chats");
5843
+ const projectRoot = await readProjectRoot(path13.join(root, entry.name));
5844
+ const chatsDir = path13.join(root, entry.name, "chats");
4795
5845
  const chatEntries = await readdirSafe3(chatsDir);
4796
5846
  for (const c of chatEntries) {
4797
5847
  if (!c.isFile()) continue;
4798
5848
  if (!c.name.startsWith("session-") || !c.name.endsWith(".json")) continue;
4799
5849
  yield {
4800
- filePath: path10.join(chatsDir, c.name),
5850
+ filePath: path13.join(chatsDir, c.name),
4801
5851
  projectDir: entry.name,
4802
5852
  projectRoot
4803
5853
  };
@@ -4806,7 +5856,7 @@ async function* discoverGeminiChats(root) {
4806
5856
  }
4807
5857
  async function readProjectRoot(dir) {
4808
5858
  try {
4809
- const text = await readFile6(path10.join(dir, ".project_root"), "utf8");
5859
+ const text = await readFile6(path13.join(dir, ".project_root"), "utf8");
4810
5860
  return text.replace(/\n+$/, "").trim() || null;
4811
5861
  } catch {
4812
5862
  return null;
@@ -4900,7 +5950,7 @@ async function compileGeminiFile(bundle, batch, file, logger) {
4900
5950
  const counts = emptyFileCounts4();
4901
5951
  const { row: sourceFile, alreadyKnown } = await registerSourceFile(bundle, {
4902
5952
  sourceTool: "gemini",
4903
- absolutePath: path11.resolve(file.filePath),
5953
+ absolutePath: path14.resolve(file.filePath),
4904
5954
  fileKind: "json",
4905
5955
  workspaceHint: file.projectDir
4906
5956
  });
@@ -4953,7 +6003,7 @@ async function compileGeminiFile(bundle, batch, file, logger) {
4953
6003
  project: null,
4954
6004
  objects
4955
6005
  };
4956
- const sourceSid = parsed.sessionId ?? path11.basename(file.filePath, ".json");
6006
+ const sourceSid = parsed.sessionId ?? path14.basename(file.filePath, ".json");
4957
6007
  const sessionPk = sessionId("gemini", sourceSid);
4958
6008
  if (file.projectRoot) {
4959
6009
  pending.project = {
@@ -5448,227 +6498,90 @@ function flushPending4(bundle, pending) {
5448
6498
  insertCall.run(
5449
6499
  c.tool_call_id,
5450
6500
  pending.session.session_id,
5451
- c.message_id,
5452
- c.event_id,
5453
- c.source_call_id,
5454
- c.tool_name,
5455
- c.canonical_tool_type,
5456
- c.args_object_id,
5457
- c.command,
5458
- c.cwd,
5459
- c.path,
5460
- c.query,
5461
- c.timestamp_start,
5462
- c.status,
5463
- c.raw_record_id
5464
- );
5465
- }
5466
- const insertResult = prepare(
5467
- bundle.db,
5468
- `INSERT OR REPLACE INTO tool_results (
5469
- tool_result_id, tool_call_id, session_id, message_id, event_id,
5470
- source_call_id, status, is_error, exit_code, duration_ms,
5471
- stdout_object_id, stderr_object_id, output_object_id, preview, raw_record_id
5472
- ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, NULL, NULL, NULL, NULL, ?, ?, ?)`
5473
- );
5474
- for (const r of pending.toolResults) {
5475
- insertResult.run(
5476
- r.tool_result_id,
5477
- r.tool_call_id,
5478
- pending.session.session_id,
5479
- r.message_id,
5480
- r.event_id,
5481
- r.source_call_id,
5482
- r.status,
5483
- r.is_error,
5484
- r.output_object_id,
5485
- r.preview,
5486
- r.raw_record_id
5487
- );
5488
- }
5489
- const insertArtifact = prepare(
5490
- bundle.db,
5491
- `INSERT OR REPLACE INTO artifacts (
5492
- artifact_id, session_id, project_id, source_tool, kind, path,
5493
- logical_path, object_id, text_object_id, mime_type, size_bytes,
5494
- created_ts, raw_record_id
5495
- ) VALUES (?, ?, ?, 'gemini', ?, ?, ?, ?, ?, ?, ?, ?, ?)`
5496
- );
5497
- for (const a of pending.artifacts) {
5498
- insertArtifact.run(
5499
- a.artifact_id,
5500
- pending.session.session_id,
5501
- pending.project?.project_id ?? null,
5502
- a.kind,
5503
- a.path,
5504
- a.logical_path,
5505
- a.object_id,
5506
- a.text_object_id,
5507
- a.mime_type,
5508
- a.size_bytes,
5509
- a.created_ts,
5510
- a.raw_record_id
5511
- );
5512
- }
5513
- const insertSearch = prepare(
5514
- bundle.db,
5515
- `INSERT OR REPLACE INTO search_docs (
5516
- doc_id, entity_type, entity_id, session_id, project_id, timestamp,
5517
- role, tool_name, canonical_tool_type, field_kind, text
5518
- ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
5519
- );
5520
- for (const d of pending.searchDocs) {
5521
- insertSearch.run(
5522
- d.doc_id,
5523
- d.entity_type,
5524
- d.entity_id,
5525
- pending.session.session_id,
5526
- pending.project?.project_id ?? null,
5527
- d.timestamp,
5528
- d.role,
5529
- d.tool_name,
5530
- d.canonical_tool_type,
5531
- d.field_kind,
5532
- d.text
5533
- );
5534
- }
5535
- }
5536
-
5537
- // src/services/export/parquet.ts
5538
- import { mkdir as mkdir3, rm, writeFile as writeFile4 } from "fs/promises";
5539
- import path12 from "path";
5540
- import { DuckDBConnection } from "@duckdb/node-api";
5541
- init_errors();
5542
- var PARQUET_TABLES = [
5543
- "objects",
5544
- "source_files",
5545
- "import_batches",
5546
- "raw_records",
5547
- "import_errors",
5548
- "uncertainties",
5549
- "projects",
5550
- "sessions",
5551
- "turns",
5552
- "events",
5553
- "messages",
5554
- "content_blocks",
5555
- "tool_calls",
5556
- "tool_results",
5557
- "artifacts",
5558
- "edges",
5559
- "search_docs"
5560
- ];
5561
- async function exportBundleParquet(options) {
5562
- const snapshot = await openBundleSnapshot(options.bundlePath);
5563
- const outDir = path12.resolve(options.outDir ?? snapshot.defaultOutDir);
5564
- await mkdir3(outDir, { recursive: true });
5565
- const files = Object.fromEntries(
5566
- PARQUET_TABLES.map((table) => [table, path12.join(outDir, `${table}.parquet`)])
5567
- );
5568
- const manifestPath = path12.join(outDir, "manifest.json");
5569
- for (const file of [...Object.values(files), manifestPath]) {
5570
- await rm(file, { force: true });
5571
- }
5572
- const connection = await createDuckDbConnection();
5573
- try {
5574
- await attachSqlite(connection, snapshot.dbPath);
5575
- for (const table of PARQUET_TABLES) {
5576
- await connection.run(
5577
- `COPY (SELECT * FROM prosa.${quoteIdentifier(table)}) TO ${sqlString(files[table])} (FORMAT parquet)`
5578
- );
5579
- }
5580
- } finally {
5581
- connection.closeSync();
5582
- }
5583
- const manifest = {
5584
- exported_at: (/* @__PURE__ */ new Date()).toISOString(),
5585
- source_db: snapshot.dbPath,
5586
- schema_version: snapshot.schemaVersion,
5587
- parser_version: snapshot.parserVersion,
5588
- tables: Object.fromEntries(
5589
- PARQUET_TABLES.map((table) => [
5590
- table,
5591
- {
5592
- file: path12.basename(files[table]),
5593
- rows: snapshot.counts[table]
5594
- }
5595
- ])
5596
- )
5597
- };
5598
- await writeFile4(manifestPath, `${JSON.stringify(manifest, null, 2)}
5599
- `, "utf8");
5600
- return { outDir, manifestPath, files, counts: snapshot.counts };
5601
- }
5602
- async function queryDuckDbParquet(options) {
5603
- const parquetDir = path12.resolve(options.parquetDir);
5604
- const connection = await createDuckDbConnection();
5605
- try {
5606
- for (const table of PARQUET_TABLES) {
5607
- await connection.run(
5608
- `CREATE OR REPLACE VIEW ${quoteIdentifier(table)} AS SELECT * FROM read_parquet(${sqlString(
5609
- path12.join(parquetDir, `${table}.parquet`)
5610
- )})`
5611
- );
5612
- }
5613
- const reader = await connection.runAndReadAll(options.sql);
5614
- return {
5615
- columns: reader.deduplicatedColumnNames(),
5616
- rows: reader.getRowObjectsJson()
5617
- };
5618
- } catch (error) {
5619
- if (isMissingParquetError(error)) {
5620
- throw new Error(
5621
- `Parquet export not found in ${parquetDir}; run \`prosa export parquet --store <path>\` first`
5622
- );
5623
- }
5624
- throw error;
5625
- } finally {
5626
- connection.closeSync();
6501
+ c.message_id,
6502
+ c.event_id,
6503
+ c.source_call_id,
6504
+ c.tool_name,
6505
+ c.canonical_tool_type,
6506
+ c.args_object_id,
6507
+ c.command,
6508
+ c.cwd,
6509
+ c.path,
6510
+ c.query,
6511
+ c.timestamp_start,
6512
+ c.status,
6513
+ c.raw_record_id
6514
+ );
5627
6515
  }
5628
- }
5629
- async function createDuckDbConnection() {
5630
- return DuckDBConnection.create();
5631
- }
5632
- async function attachSqlite(connection, dbPath) {
5633
- try {
5634
- await connection.run("INSTALL sqlite");
5635
- await connection.run("LOAD sqlite");
5636
- await connection.run(`ATTACH ${sqlString(dbPath)} AS prosa (TYPE sqlite)`);
5637
- } catch (error) {
5638
- throw new Error(
5639
- `DuckDB could not attach prosa.sqlite via the sqlite extension: ${getErrorMessage(error)}`
6516
+ const insertResult = prepare(
6517
+ bundle.db,
6518
+ `INSERT OR REPLACE INTO tool_results (
6519
+ tool_result_id, tool_call_id, session_id, message_id, event_id,
6520
+ source_call_id, status, is_error, exit_code, duration_ms,
6521
+ stdout_object_id, stderr_object_id, output_object_id, preview, raw_record_id
6522
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, NULL, NULL, NULL, NULL, ?, ?, ?)`
6523
+ );
6524
+ for (const r of pending.toolResults) {
6525
+ insertResult.run(
6526
+ r.tool_result_id,
6527
+ r.tool_call_id,
6528
+ pending.session.session_id,
6529
+ r.message_id,
6530
+ r.event_id,
6531
+ r.source_call_id,
6532
+ r.status,
6533
+ r.is_error,
6534
+ r.output_object_id,
6535
+ r.preview,
6536
+ r.raw_record_id
5640
6537
  );
5641
6538
  }
5642
- }
5643
- async function openBundleSnapshot(bundlePath) {
5644
- const bundle = await openBundle(bundlePath);
5645
- try {
5646
- const counts = Object.fromEntries(
5647
- PARQUET_TABLES.map((table) => {
5648
- const row = bundle.db.prepare(`SELECT count(*) AS n FROM ${quoteIdentifier(table)}`).get();
5649
- return [table, row?.n ?? 0];
5650
- })
6539
+ const insertArtifact = prepare(
6540
+ bundle.db,
6541
+ `INSERT OR REPLACE INTO artifacts (
6542
+ artifact_id, session_id, project_id, source_tool, kind, path,
6543
+ logical_path, object_id, text_object_id, mime_type, size_bytes,
6544
+ created_ts, raw_record_id
6545
+ ) VALUES (?, ?, ?, 'gemini', ?, ?, ?, ?, ?, ?, ?, ?, ?)`
6546
+ );
6547
+ for (const a of pending.artifacts) {
6548
+ insertArtifact.run(
6549
+ a.artifact_id,
6550
+ pending.session.session_id,
6551
+ pending.project?.project_id ?? null,
6552
+ a.kind,
6553
+ a.path,
6554
+ a.logical_path,
6555
+ a.object_id,
6556
+ a.text_object_id,
6557
+ a.mime_type,
6558
+ a.size_bytes,
6559
+ a.created_ts,
6560
+ a.raw_record_id
6561
+ );
6562
+ }
6563
+ const insertSearch = prepare(
6564
+ bundle.db,
6565
+ `INSERT OR REPLACE INTO search_docs (
6566
+ doc_id, entity_type, entity_id, session_id, project_id, timestamp,
6567
+ role, tool_name, canonical_tool_type, field_kind, text
6568
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
6569
+ );
6570
+ for (const d of pending.searchDocs) {
6571
+ insertSearch.run(
6572
+ d.doc_id,
6573
+ d.entity_type,
6574
+ d.entity_id,
6575
+ pending.session.session_id,
6576
+ pending.project?.project_id ?? null,
6577
+ d.timestamp,
6578
+ d.role,
6579
+ d.tool_name,
6580
+ d.canonical_tool_type,
6581
+ d.field_kind,
6582
+ d.text
5651
6583
  );
5652
- return {
5653
- dbPath: bundle.paths.db,
5654
- schemaVersion: bundle.manifest.schema_version,
5655
- parserVersion: bundle.manifest.parser_version,
5656
- defaultOutDir: bundle.paths.parquet,
5657
- counts
5658
- };
5659
- } finally {
5660
- closeBundle(bundle);
5661
6584
  }
5662
- }
5663
- function quoteIdentifier(value) {
5664
- return `"${value.replace(/"/g, '""')}"`;
5665
- }
5666
- function sqlString(value) {
5667
- return `'${value.replace(/'/g, "''")}'`;
5668
- }
5669
- function isMissingParquetError(error) {
5670
- const message = getErrorMessage(error);
5671
- return /No files found|does not exist|not found/i.test(message) && /\.parquet/i.test(message);
5672
6585
  }
5673
6586
 
5674
6587
  // src/services/compile.ts
@@ -5678,28 +6591,28 @@ var COMPILE_PROVIDERS = [
5678
6591
  name: "codex",
5679
6592
  description: "Import Codex CLI session histories into the bundle.",
5680
6593
  pathHelp: "root of Codex CLI sessions",
5681
- defaultSessionsPath: () => path14.join(os2.homedir(), ".codex", "sessions"),
6594
+ defaultSessionsPath: () => path16.join(os2.homedir(), ".codex", "sessions"),
5682
6595
  compile: compileCodex
5683
6596
  },
5684
6597
  {
5685
6598
  name: "claude",
5686
6599
  description: "Import Claude Code project histories into the bundle.",
5687
6600
  pathHelp: "root of Claude Code projects",
5688
- defaultSessionsPath: () => path14.join(os2.homedir(), ".claude", "projects"),
6601
+ defaultSessionsPath: () => path16.join(os2.homedir(), ".claude", "projects"),
5689
6602
  compile: compileClaude
5690
6603
  },
5691
6604
  {
5692
6605
  name: "gemini",
5693
6606
  description: "Import Gemini CLI session histories into the bundle.",
5694
6607
  pathHelp: "root of Gemini CLI tmp dir",
5695
- defaultSessionsPath: () => path14.join(os2.homedir(), ".gemini", "tmp"),
6608
+ defaultSessionsPath: () => path16.join(os2.homedir(), ".gemini", "tmp"),
5696
6609
  compile: compileGemini
5697
6610
  },
5698
6611
  {
5699
6612
  name: "cursor",
5700
6613
  description: "Import Cursor agent stores into the bundle.",
5701
6614
  pathHelp: "root of Cursor agent stores",
5702
- defaultSessionsPath: () => path14.join(os2.homedir(), ".cursor", "chats"),
6615
+ defaultSessionsPath: () => path16.join(os2.homedir(), ".cursor", "chats"),
5703
6616
  compile: compileCursor
5704
6617
  }
5705
6618
  ];
@@ -5712,20 +6625,20 @@ function getCompileProvider(source) {
5712
6625
  }
5713
6626
  function resolveCompilePath(p) {
5714
6627
  if (p === "~") return os2.homedir();
5715
- if (p.startsWith("~/")) return path14.join(os2.homedir(), p.slice(2));
5716
- return path14.resolve(p);
6628
+ if (p.startsWith("~/")) return path16.join(os2.homedir(), p.slice(2));
6629
+ return path16.resolve(p);
5717
6630
  }
5718
6631
  async function runCompileImports(options) {
5719
- const { bundle, providers, deferIndex, logger } = options;
6632
+ const { bundle, providers, logger } = options;
6633
+ const overwrite = options.overwrite === true;
5720
6634
  let importedAny = false;
5721
6635
  const summaries = [];
5722
6636
  let tantivy = null;
5723
6637
  let tantivyError = null;
6638
+ let fts5Error = null;
5724
6639
  try {
5725
- if (deferIndex) {
5726
- logger?.info("disabling FTS5 triggers for deferred indexing");
5727
- disableFts5Triggers(bundle);
5728
- }
6640
+ logger?.info("disabling FTS5 triggers for bulk rebuild");
6641
+ disableFts5Triggers(bundle);
5729
6642
  for (const provider of providers) {
5730
6643
  const sourcePath = resolveCompilePath(options.sessionsPath ?? provider.defaultSessionsPath());
5731
6644
  const providerLogger = logger?.child({
@@ -5752,15 +6665,23 @@ async function runCompileImports(options) {
5752
6665
  summaries.push(summary);
5753
6666
  options.onProviderComplete?.(summary);
5754
6667
  }
5755
- logger?.info({ changed: importedAny, fts5_deferred: deferIndex }, "marking indexes");
5756
- markIndexesAfterImport(bundle, {
5757
- changed: importedAny,
5758
- fts5Deferred: deferIndex
5759
- });
5760
- if (importedAny) {
6668
+ const shouldRebuildIndexes = importedAny || overwrite;
6669
+ if (shouldRebuildIndexes) {
6670
+ logger?.info(
6671
+ { changed: importedAny, overwrite },
6672
+ importedAny ? "marking indexes" : "overwrite forces rebuild despite no new imports"
6673
+ );
6674
+ markIndexesAfterImport(bundle, { changed: true });
6675
+ try {
6676
+ logger?.info("rebuilding fts5 index");
6677
+ rebuildFts5Index(bundle);
6678
+ } catch (error) {
6679
+ fts5Error = getErrorMessage(error);
6680
+ logger?.error({ err: error }, "fts5 rebuild failed; SQLite data is intact");
6681
+ }
5761
6682
  try {
5762
- logger?.info("rebuilding tantivy index");
5763
- const status = await rebuildTantivyIndex(bundle);
6683
+ logger?.info({ overwrite }, "rebuilding tantivy index");
6684
+ const status = await rebuildTantivyIndex(bundle, { overwrite });
5764
6685
  tantivy = { indexedDocCount: status.indexed_doc_count };
5765
6686
  options.onTantivyComplete?.(tantivy);
5766
6687
  } catch (error) {
@@ -5769,16 +6690,14 @@ async function runCompileImports(options) {
5769
6690
  }
5770
6691
  }
5771
6692
  } finally {
5772
- if (deferIndex) {
5773
- logger?.info("re-enabling FTS5 triggers");
5774
- enableFts5Triggers(bundle);
5775
- }
6693
+ enableFts5Triggers(bundle);
5776
6694
  }
5777
6695
  return {
5778
6696
  providers: summaries,
5779
6697
  importedAny,
5780
6698
  tantivy,
5781
- tantivyError
6699
+ tantivyError,
6700
+ fts5Error
5782
6701
  };
5783
6702
  }
5784
6703
  async function exportCompileParquet(options) {
@@ -5821,7 +6740,7 @@ function createCliLogger(options) {
5821
6740
  // src/cli/commands/compile.ts
5822
6741
  function compileCommand() {
5823
6742
  const command = addCompileLogOptions(
5824
- new Command("compile").description(
6743
+ new Command2("compile").description(
5825
6744
  "Import session histories from one agent CLI into the bundle."
5826
6745
  )
5827
6746
  );
@@ -5834,27 +6753,35 @@ function compileCommand() {
5834
6753
  return command;
5835
6754
  }
5836
6755
  function compileAllCommand() {
5837
- return addCompileLogOptions(new Command("compile-all")).description("Import all agent CLI session histories using default source paths.").option("--defer-index", "skip immediate FTS5 updates; run `prosa index fts5` later").action(async (options) => {
6756
+ return addCompileLogOptions(new Command2("compile-all")).description("Import all agent CLI session histories using default source paths.").option(
6757
+ "--overwrite",
6758
+ "force a full rebuild of derived indexes after import (Tantivy from scratch; FTS5 and Parquet are always full)",
6759
+ false
6760
+ ).action(async (options) => {
5838
6761
  await runCompiles({
5839
6762
  providers: COMPILE_PROVIDERS,
5840
6763
  storePath: defaultBundlePath(),
5841
- deferIndex: options.deferIndex ?? false,
6764
+ overwrite: options.overwrite,
5842
6765
  logOptions: options
5843
6766
  });
5844
6767
  });
5845
6768
  }
5846
6769
  function providerCompileCommand(provider) {
5847
- return addCompileLogOptions(new Command(provider.name)).description(provider.description).option(
6770
+ return addCompileLogOptions(new Command2(provider.name)).description(provider.description).option(
5848
6771
  "--sessions-path <path>",
5849
6772
  `${provider.pathHelp} (default: ${provider.defaultSessionsPath()})`,
5850
6773
  provider.defaultSessionsPath()
5851
- ).option("--store <path>", "bundle directory", defaultBundlePath()).option("--defer-index", "skip immediate FTS5 updates; run `prosa index fts5` later").action(
6774
+ ).option("--store <path>", "bundle directory", defaultBundlePath()).option(
6775
+ "--overwrite",
6776
+ "force a full rebuild of derived indexes after import (Tantivy from scratch; FTS5 and Parquet are always full)",
6777
+ false
6778
+ ).action(
5852
6779
  async (options, command) => {
5853
6780
  await runCompiles({
5854
6781
  providers: [provider],
5855
6782
  storePath: options.store,
5856
- deferIndex: options.deferIndex ?? false,
5857
6783
  sessionsPath: options.sessionsPath,
6784
+ overwrite: options.overwrite,
5858
6785
  logOptions: command.optsWithGlobals()
5859
6786
  });
5860
6787
  }
@@ -5873,8 +6800,8 @@ async function runCompiles(options) {
5873
6800
  const result = await runCompileImports({
5874
6801
  bundle,
5875
6802
  providers: options.providers,
5876
- deferIndex: options.deferIndex,
5877
6803
  sessionsPath: options.sessionsPath,
6804
+ overwrite: options.overwrite,
5878
6805
  logger,
5879
6806
  onProviderComplete: printCounts,
5880
6807
  onTantivyComplete: (status) => {
@@ -5887,7 +6814,8 @@ async function runCompiles(options) {
5887
6814
  closeBundle(bundle);
5888
6815
  logger.info({ store_path: storePath }, "bundle closed");
5889
6816
  }
5890
- if (importedAny) {
6817
+ const shouldExportParquet = importedAny || options.overwrite === true;
6818
+ if (shouldExportParquet) {
5891
6819
  try {
5892
6820
  const result = await exportCompileParquet({ storePath, logger });
5893
6821
  process.stdout.write(`parquet: wrote ${result.tableCount} tables to ${result.outDir}
@@ -5911,11 +6839,10 @@ function printCounts(summary) {
5911
6839
 
5912
6840
  // src/cli/commands/export.ts
5913
6841
  import { writeFile as writeFile6 } from "fs/promises";
5914
- import path16 from "path";
5915
- import { Command as Command2 } from "commander";
6842
+ import path17 from "path";
6843
+ import { Command as Command3 } from "commander";
5916
6844
 
5917
6845
  // src/services/export/markdown.ts
5918
- init_cas();
5919
6846
  async function exportSessionMarkdown(bundle, sessionId2) {
5920
6847
  const session = bundle.db.prepare(
5921
6848
  `SELECT session_id, source_tool, source_session_id, title, start_ts, end_ts,
@@ -6025,139 +6952,60 @@ function renderToolCall(c) {
6025
6952
  return lines.join("\n");
6026
6953
  }
6027
6954
 
6028
- // src/cli/bundle.ts
6029
- import path15 from "path";
6030
- async function withBundle(storePath, fn) {
6031
- const bundle = await openBundle(path15.resolve(storePath));
6032
- try {
6033
- return await fn(bundle);
6034
- } finally {
6035
- closeBundle(bundle);
6036
- }
6037
- }
6038
-
6039
6955
  // src/cli/commands/export.ts
6040
6956
  function exportCommand() {
6041
- const session = new Command2("session").description("Export a single session to a human-readable format.").argument("<session-id>", "prosa session_id").requiredOption("--format <fmt>", 'currently only "markdown" is supported').option("--out <path>", "write to file instead of stdout").option("--store <path>", "bundle directory", defaultBundlePath()).action(async (sessionId2, options) => {
6957
+ const session = new Command3("session").description("Export a single session to a human-readable format.").argument("<session-id>", "prosa session_id").requiredOption("--format <fmt>", 'currently only "markdown" is supported').option("--out <path>", "write to file instead of stdout").option("--store <path>", "bundle directory", defaultBundlePath()).action(async (sessionId2, options) => {
6042
6958
  if (options.format !== "markdown") {
6043
6959
  throw new Error(`unsupported format: ${options.format} (try --format markdown)`);
6044
6960
  }
6045
6961
  await withBundle(options.store, async (bundle) => {
6046
6962
  const markdown = await exportSessionMarkdown(bundle, sessionId2);
6047
6963
  if (options.out) {
6048
- await writeFile6(path16.resolve(options.out), markdown, "utf8");
6049
- process.stdout.write(`wrote ${path16.resolve(options.out)}
6964
+ await writeFile6(path17.resolve(options.out), markdown, "utf8");
6965
+ process.stdout.write(`wrote ${path17.resolve(options.out)}
6050
6966
  `);
6051
6967
  } else {
6052
6968
  process.stdout.write(markdown);
6053
6969
  }
6054
6970
  });
6055
6971
  });
6056
- const parquet = new Command2("parquet").description("Export canonical tables to derived Parquet files for analytics.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--out <path>", "output directory (default: <store>/parquet)").action(async (options) => {
6972
+ const parquet = new Command3("parquet").description("Export canonical tables to derived Parquet files for analytics.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--out <path>", "output directory (default: <store>/parquet)").action(async (options) => {
6057
6973
  const result = await exportBundleParquet({
6058
- bundlePath: path16.resolve(options.store),
6059
- outDir: options.out ? path16.resolve(options.out) : void 0
6974
+ bundlePath: path17.resolve(options.store),
6975
+ outDir: options.out ? path17.resolve(options.out) : void 0
6060
6976
  });
6061
6977
  process.stdout.write(`wrote parquet export to ${result.outDir}
6062
6978
  `);
6063
6979
  process.stdout.write(`manifest=${result.manifestPath}
6064
6980
  `);
6065
6981
  });
6066
- return new Command2("export").description("Export sessions / search excerpts to readable formats.").addCommand(session).addCommand(parquet);
6982
+ return new Command3("export").description("Export sessions / search excerpts to readable formats.").addCommand(session).addCommand(parquet);
6067
6983
  }
6068
6984
 
6069
6985
  // src/cli/commands/index.ts
6070
- import { Command as Command3 } from "commander";
6986
+ import { Command as Command4 } from "commander";
6071
6987
  init_indexing();
6072
-
6073
- // src/cli/output.ts
6074
- var OUTPUT_FORMATS = ["interactive", "table", "json", "csv"];
6075
- var COL_SEPARATOR = " ";
6076
- var RULE_CHAR = "-";
6077
- function parseOutputFormat(value, fallback) {
6078
- if (value === void 0) return fallback;
6079
- if (OUTPUT_FORMATS.includes(value)) return value;
6080
- throw new Error(
6081
- `invalid --output-format: ${value} (expected one of ${OUTPUT_FORMATS.join(", ")})`
6082
- );
6083
- }
6084
- function printRows(rows, opts) {
6085
- switch (opts.format) {
6086
- case "json":
6087
- printJson(rows, opts);
6088
- return;
6089
- case "csv":
6090
- printCsv(rows, opts);
6091
- return;
6092
- case "table":
6093
- case "interactive":
6094
- printTable(rows, opts);
6095
- return;
6096
- }
6097
- }
6098
- function printJson(rows, opts) {
6099
- const out = opts.meta ? { ...opts.meta, rows } : rows;
6100
- process.stdout.write(`${JSON.stringify(out, null, 2)}
6101
- `);
6102
- }
6103
- function printCsv(rows, opts) {
6104
- const columns = opts.columns;
6105
- process.stdout.write(`${columns.map(csvField).join(",")}
6106
- `);
6107
- for (const row of rows) {
6108
- const record = row;
6109
- const line = columns.map((column) => csvField(formatCell(record[column]))).join(",");
6110
- process.stdout.write(`${line}
6111
- `);
6112
- }
6113
- }
6114
- function csvField(value) {
6115
- if (/[",\n]/.test(value)) return `"${value.replace(/"/g, '""')}"`;
6116
- return value;
6117
- }
6118
- function printTable(rows, opts) {
6119
- const columns = opts.columns;
6120
- const widths = columns.map((column) => column.length);
6121
- const cells = rows.map((row) => {
6122
- const record = row;
6123
- return columns.map((column, index) => {
6124
- const text = formatCell(record[column]);
6125
- const width = widths[index] ?? 0;
6126
- if (text.length > width) widths[index] = text.length;
6127
- return text;
6128
- });
6129
- });
6130
- const header = columns.map((column, index) => column.padEnd(widths[index] ?? 0)).join(COL_SEPARATOR);
6131
- const rule = columns.map((_, index) => RULE_CHAR.repeat(widths[index] ?? 0)).join(COL_SEPARATOR);
6132
- process.stdout.write(`${header}
6133
- ${rule}
6134
- `);
6135
- for (const cellRow of cells) {
6136
- const line = cellRow.map((cell, index) => cell.padEnd(widths[index] ?? 0)).join(COL_SEPARATOR);
6137
- process.stdout.write(`${line}
6138
- `);
6139
- }
6140
- }
6141
- function formatCell(value) {
6142
- if (value == null) return "";
6143
- if (typeof value === "string") return value;
6144
- if (typeof value === "number" || typeof value === "boolean") return String(value);
6145
- return JSON.stringify(value);
6146
- }
6147
-
6148
- // src/cli/commands/index.ts
6149
6988
  function indexCommand() {
6150
- const fts5 = new Command3("fts5").description("Rebuild the SQLite FTS5 index from search_docs.").option("--store <path>", "bundle directory", defaultBundlePath()).action(async (options) => {
6989
+ const fts5 = new Command4("fts5").description("Rebuild the SQLite FTS5 index from search_docs.").option("--store <path>", "bundle directory", defaultBundlePath()).option(
6990
+ "--overwrite",
6991
+ "rebuild from scratch (FTS5 always overwrites; flag accepted for parity with other index commands)",
6992
+ false
6993
+ ).action(async (options) => {
6151
6994
  await withBundle(options.store, (bundle) => {
6995
+ void options.overwrite;
6152
6996
  printIndexStatus(rebuildFts5Index(bundle));
6153
6997
  });
6154
6998
  });
6155
- const tantivy = new Command3("tantivy").description("Rebuild the Tantivy sidecar index from search_docs.").option("--store <path>", "bundle directory", defaultBundlePath()).action(async (options) => {
6999
+ const tantivy = new Command4("tantivy").description("Rebuild the Tantivy sidecar index from search_docs.").option("--store <path>", "bundle directory", defaultBundlePath()).option(
7000
+ "--overwrite",
7001
+ "force a full re-index instead of the default incremental rebuild",
7002
+ false
7003
+ ).action(async (options) => {
6156
7004
  await withBundle(options.store, async (bundle) => {
6157
- printIndexStatus(await rebuildTantivyIndex(bundle));
7005
+ printIndexStatus(await rebuildTantivyIndex(bundle, { overwrite: options.overwrite }));
6158
7006
  });
6159
7007
  });
6160
- const status = new Command3("status").description("Show derived search index status.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--output-format <fmt>", "interactive|table|json|csv", "table").action(async (options) => {
7008
+ const status = new Command4("status").description("Show derived search index status.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--output-format <fmt>", "interactive|table|json|csv", "table").action(async (options) => {
6161
7009
  const format = parseOutputFormat(options.outputFormat, "table");
6162
7010
  await withBundle(options.store, (bundle) => {
6163
7011
  const rows = getSearchIndexStatuses(bundle);
@@ -6174,7 +7022,7 @@ function indexCommand() {
6174
7022
  });
6175
7023
  });
6176
7024
  });
6177
- return new Command3("index").description("Build or inspect derived search indexes.").addCommand(fts5).addCommand(tantivy).addCommand(status);
7025
+ return new Command4("index").description("Build or inspect derived search indexes.").addCommand(fts5).addCommand(tantivy).addCommand(status);
6178
7026
  }
6179
7027
  function printIndexStatus(status) {
6180
7028
  process.stdout.write(
@@ -6186,11 +7034,11 @@ function printIndexStatus(status) {
6186
7034
 
6187
7035
  // src/cli/commands/init.ts
6188
7036
  import { stat as stat3 } from "fs/promises";
6189
- import path17 from "path";
6190
- import { Command as Command4 } from "commander";
7037
+ import path18 from "path";
7038
+ import { Command as Command5 } from "commander";
6191
7039
  function initCommand() {
6192
- return new Command4("init").description("Initialize a new prosa bundle (SQLite + manifest + objects/).").option("--store <path>", "bundle directory", defaultBundlePath()).option("--force-existing", "open instead of failing if a manifest exists", false).action(async (options) => {
6193
- const resolved = path17.resolve(options.store);
7040
+ return new Command5("init").description("Initialize a new prosa bundle (SQLite + manifest + objects/).").option("--store <path>", "bundle directory", defaultBundlePath()).option("--force-existing", "open instead of failing if a manifest exists", false).action(async (options) => {
7041
+ const resolved = path18.resolve(options.store);
6194
7042
  const exists2 = await stat3(`${resolved}/manifest.json`).then(() => true).catch(() => false);
6195
7043
  if (exists2) {
6196
7044
  if (!options.forceExisting) {
@@ -6215,8 +7063,8 @@ use --force-existing to skip without erroring
6215
7063
  }
6216
7064
 
6217
7065
  // src/cli/commands/mcp.ts
6218
- import path18 from "path";
6219
- import { Command as Command5 } from "commander";
7066
+ import path19 from "path";
7067
+ import { Command as Command6 } from "commander";
6220
7068
 
6221
7069
  // src/mcp/server.ts
6222
7070
  init_errors();
@@ -6228,213 +7076,222 @@ import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/
6228
7076
 
6229
7077
  // src/mcp/guidance.ts
6230
7078
  var PROSA_MCP_INSTRUCTIONS = `
6231
- prosa is a local memory over local agent session histories. Use it to import recent sessions,
6232
- find prior work, commands, decisions, file touches, and full transcripts before answering from
6233
- memory.
7079
+ prosa is a local memory over local agent session histories. Use it to find prior work, commands,
7080
+ decisions, file touches, transcripts, and analytical rollups before answering from memory.
6234
7081
 
6235
- Recommended workflow:
6236
- - Use compile to refresh the bundle when recent local sessions may not be indexed yet. With no
6237
- input it imports all supported providers from default paths.
6238
- - For open-ended questions, start with search_sessions using 2-5 concrete terms.
6239
- - For questions about a file or path, start with find_touched_files, then inspect the returned sessions.
6240
- - After search results, call get_session for the most relevant session_ids before drawing conclusions.
6241
- - Use export_session_markdown only after selecting a likely session; it can return a large transcript.
6242
- - Use list_tool_calls for command history, failed tools, patches, and operational audit trails.
6243
- - Use get_artifact only when a returned artifact_id is needed for full output or diff content.
6244
- - Use index_status if search results look stale or unexpectedly empty.
7082
+ There are six tools:
7083
+ - search: full-text over messages, commands, paths, diffs, and previews. Start here for open-ended
7084
+ questions with 2-5 concrete terms. Optional engine, field_kind, raw, since/until filters.
7085
+ - sessions: without session_id, lists candidates filtered by source/time/limit. With session_id,
7086
+ opens the session: format=detail (default) returns metadata + timeline, format=summary returns
7087
+ only the session row, format=markdown renders the full transcript.
7088
+ - tool_calls: audit commands and tool usage. Filters by tool_name, canonical_type, session_id,
7089
+ errors_only. When path_substring is set, also returns artifacts touching that path \u2014 use this for
7090
+ file-history questions.
7091
+ - analytics: built-in aggregate reports backed by SQLite views. Pick report=sessions|tools|errors|
7092
+ models|projects with the matching filters. Use report=sessions with session_id or
7093
+ source_path_substring for per-session metrics.
7094
+ - artifact: fetch full text for an artifact_id when previews are not enough. Binary artifacts return
7095
+ a placeholder.
7096
+ - compile: with no input, returns a status snapshot (search index health). With source (and
7097
+ optionally sessions_path), imports that provider into the bundle. Use status mode when search
7098
+ results look stale; use import mode when local sessions may not be indexed yet.
6245
7099
 
6246
- When answering, cite concrete evidence: session_id, timestamp, tool/file path, and the relevant snippet
6247
- or event. Do not treat search snippets as the whole truth; open the session when accuracy matters.
7100
+ When answering, cite concrete evidence: session_id, timestamp, tool/file path, and the relevant
7101
+ snippet or event. Do not treat search snippets as the whole truth; open the session with
7102
+ \`sessions session_id=\u2026 format=detail\` when accuracy matters.
6248
7103
  `.trim();
6249
7104
  var INVESTIGATE_PRIOR_WORK_PROMPT = `
6250
7105
  Investigate prior work in prosa for the topic: {{topic}}
6251
7106
 
6252
7107
  Use this workflow:
6253
- 1. Call search_sessions with a short query built from the topic.
7108
+ 1. Call \`search\` with a short query built from the topic.
6254
7109
  2. If results are broad, search again with narrower terms from the best snippets.
6255
- 3. Open the most relevant session_ids with get_session.
6256
- 4. Use export_session_markdown only for sessions that appear directly relevant.
7110
+ 3. Open the most relevant session_ids with \`sessions session_id=\u2026 format=detail\`.
7111
+ 4. Use \`sessions session_id=\u2026 format=markdown\` only for sessions that appear directly relevant.
6257
7112
  5. Answer with evidence: session_id, timestamp, and the decisive snippet or event.
6258
7113
  `.trim();
6259
7114
  var FIND_FILE_HISTORY_PROMPT = `
6260
7115
  Investigate history for file/path: {{path}}
6261
7116
 
6262
7117
  Use this workflow:
6263
- 1. Call find_touched_files with the path or the most distinctive path suffix.
6264
- 2. Open returned session_ids with get_session.
6265
- 3. Use list_tool_calls with session_id when you need command-level detail.
6266
- 4. Use export_session_markdown only for the most relevant session.
7118
+ 1. Call \`tool_calls\` with path_substring set to the path or its most distinctive suffix.
7119
+ 2. Open returned session_ids with \`sessions session_id=\u2026 format=detail\`.
7120
+ 3. Call \`tool_calls\` with session_id when you need command-level detail inside one session.
7121
+ 4. Use \`sessions session_id=\u2026 format=markdown\` only for the most relevant session.
6267
7122
  5. Summarize what changed, who/what tool touched it, and cite session_id plus timestamp.
6268
7123
  `.trim();
6269
7124
  var AUDIT_TOOL_FAILURES_PROMPT = `
6270
7125
  Audit tool failures in prosa{{query_clause}}.
6271
7126
 
6272
7127
  Use this workflow:
6273
- 1. Call list_tool_calls with errors_only=true.
6274
- 2. If a query is provided, also call search_sessions for that query to find related context.
6275
- 3. Open relevant session_ids with get_session.
6276
- 4. Group failures by tool_name, command/path, and likely cause.
6277
- 5. Answer with evidence: session_id, timestamp, command/path, exit code, and preview.
7128
+ 1. For an aggregate report, call \`analytics report=errors\` (filter by source/since/until/tool_name
7129
+ as needed).
7130
+ 2. For per-call evidence, call \`tool_calls\` with errors_only=true.
7131
+ 3. If a query is provided, also call \`search\` for that query to find related context.
7132
+ 4. Open relevant session_ids with \`sessions session_id=\u2026 format=detail\`.
7133
+ 5. Group failures by tool_name, command/path, and likely cause.
7134
+ 6. Answer with evidence: session_id, timestamp, command/path, exit code, and preview.
6278
7135
  `.trim();
6279
7136
 
6280
7137
  // src/mcp/tools.ts
6281
7138
  import { z } from "zod";
6282
-
6283
- // src/core/domain/types.ts
6284
- var SOURCE_TOOLS = ["cursor", "codex", "claude", "gemini"];
6285
-
6286
- // src/mcp/tools.ts
6287
7139
  init_errors();
6288
- init_limits();
6289
7140
  init_indexing();
6290
7141
  init_search();
6291
7142
  init_sessions();
7143
+
7144
+ // src/services/tool_calls.ts
7145
+ init_limits();
7146
+ function listToolCalls(bundle, filters = {}) {
7147
+ const conds = [];
7148
+ const params = [];
7149
+ if (filters.toolName) {
7150
+ conds.push("tc.tool_name = ?");
7151
+ params.push(filters.toolName);
7152
+ }
7153
+ if (filters.canonicalType) {
7154
+ conds.push("tc.canonical_tool_type = ?");
7155
+ params.push(filters.canonicalType);
7156
+ }
7157
+ if (filters.sessionId) {
7158
+ conds.push("tc.session_id = ?");
7159
+ params.push(filters.sessionId);
7160
+ }
7161
+ if (filters.errorsOnly) {
7162
+ conds.push("(tr.is_error = 1 OR tc.status = ?)");
7163
+ params.push("error");
7164
+ }
7165
+ if (filters.pathSubstring) {
7166
+ conds.push("tc.path IS NOT NULL AND tc.path LIKE ?");
7167
+ params.push(`%${filters.pathSubstring}%`);
7168
+ }
7169
+ if (filters.sinceIso) {
7170
+ conds.push("(tc.timestamp_start IS NULL OR tc.timestamp_start >= ?)");
7171
+ params.push(filters.sinceIso);
7172
+ }
7173
+ if (filters.untilIso) {
7174
+ conds.push("(tc.timestamp_start IS NULL OR tc.timestamp_start < ?)");
7175
+ params.push(filters.untilIso);
7176
+ }
7177
+ const where = conds.length ? `WHERE ${conds.join(" AND ")}` : "";
7178
+ const limit2 = clampLimit(filters.limit, { max: 500, fallback: 100 });
7179
+ const toolCallSql = `
7180
+ SELECT 'tool_call' AS entity_type,
7181
+ tc.session_id,
7182
+ tc.tool_call_id,
7183
+ NULL AS artifact_id,
7184
+ tc.tool_name,
7185
+ tc.canonical_tool_type,
7186
+ tc.command,
7187
+ tc.path,
7188
+ tc.status,
7189
+ tc.timestamp_start,
7190
+ tr.is_error,
7191
+ tr.exit_code,
7192
+ tr.preview
7193
+ FROM tool_calls tc
7194
+ LEFT JOIN tool_results tr ON tr.tool_call_id = tc.tool_call_id
7195
+ ${where}
7196
+ `;
7197
+ if (!filters.pathSubstring) {
7198
+ const sql2 = `${toolCallSql} ORDER BY tc.timestamp_start DESC LIMIT ${limit2}`;
7199
+ return bundle.db.prepare(sql2).all(...params);
7200
+ }
7201
+ const artifactSql = `
7202
+ SELECT 'artifact' AS entity_type,
7203
+ a.session_id,
7204
+ NULL AS tool_call_id,
7205
+ a.artifact_id,
7206
+ NULL AS tool_name,
7207
+ NULL AS canonical_tool_type,
7208
+ NULL AS command,
7209
+ a.path,
7210
+ NULL AS status,
7211
+ a.created_ts AS timestamp_start,
7212
+ NULL AS is_error,
7213
+ NULL AS exit_code,
7214
+ NULL AS preview
7215
+ FROM artifacts a
7216
+ WHERE a.path IS NOT NULL AND a.path LIKE ?
7217
+ `;
7218
+ const sql = `
7219
+ ${toolCallSql}
7220
+ UNION ALL
7221
+ ${artifactSql}
7222
+ ORDER BY timestamp_start DESC
7223
+ LIMIT ${limit2}
7224
+ `;
7225
+ return bundle.db.prepare(sql).all(...params, `%${filters.pathSubstring}%`);
7226
+ }
7227
+
7228
+ // src/mcp/tools.ts
7229
+ var CANONICAL_TOOL_TYPES = [
7230
+ "shell",
7231
+ "read_file",
7232
+ "write_file",
7233
+ "edit_file",
7234
+ "search_file",
7235
+ "web_search",
7236
+ "mcp",
7237
+ "subagent",
7238
+ "patch",
7239
+ "other"
7240
+ ];
7241
+ var FIELD_KINDS = [
7242
+ "message_text",
7243
+ "user_prompt",
7244
+ "assistant_text",
7245
+ "command",
7246
+ "command_output_preview",
7247
+ "error",
7248
+ "file_path",
7249
+ "diff",
7250
+ "summary",
7251
+ "artifact_text",
7252
+ "tool_args",
7253
+ "tool_result"
7254
+ ];
6292
7255
  function registerProsaTools(server, bundle, options = {}) {
6293
7256
  const searchEngine = options.searchEngine ?? "fts5";
6294
7257
  const storePath = options.storePath ?? bundle.path;
6295
7258
  const ensureStore = options.ensureStore ?? false;
6296
7259
  registerProsaPrompts(server);
6297
7260
  server.registerTool(
6298
- "compile",
6299
- {
6300
- title: "Compile sessions",
6301
- description: "Import local agent session histories into the active prosa bundle. With no input, compiles all providers from default paths. With source, compiles that provider; sessions_path may override that provider path.",
6302
- inputSchema: {
6303
- source: z.enum(SOURCE_TOOLS).optional(),
6304
- sessions_path: z.string().min(1).optional()
6305
- },
6306
- annotations: { readOnlyHint: false, destructiveHint: false, idempotentHint: true }
6307
- },
6308
- async ({ source, sessions_path }) => withToolBundle(bundle, storePath, ensureStore, async (activeBundle) => {
6309
- if (sessions_path && !source) {
6310
- return {
6311
- content: [
6312
- {
6313
- type: "text",
6314
- text: "sessions_path requires source because providers use incompatible source layouts"
6315
- }
6316
- ],
6317
- isError: true
6318
- };
6319
- }
6320
- try {
6321
- const result = await runCompileImports({
6322
- bundle: activeBundle,
6323
- providers: source ? [getCompileProvider(source)] : COMPILE_PROVIDERS,
6324
- deferIndex: false,
6325
- sessionsPath: sessions_path
6326
- });
6327
- const parquet = result.importedAny ? await exportCompileParquet({ storePath }) : null;
6328
- return {
6329
- content: [
6330
- {
6331
- type: "text",
6332
- text: JSON.stringify(
6333
- {
6334
- providers: result.providers.map((provider) => ({
6335
- source: provider.source,
6336
- source_path: provider.sourcePath,
6337
- batch_id: provider.batchId,
6338
- counts: provider.counts
6339
- })),
6340
- imported_any: result.importedAny,
6341
- tantivy: result.tantivy ? { indexed_doc_count: result.tantivy.indexedDocCount } : null,
6342
- tantivy_error: result.tantivyError,
6343
- parquet: parquet ? {
6344
- out_dir: parquet.outDir,
6345
- manifest_path: parquet.manifestPath,
6346
- table_count: parquet.tableCount,
6347
- files: parquet.files,
6348
- counts: parquet.counts
6349
- } : null
6350
- },
6351
- null,
6352
- 2
6353
- )
6354
- }
6355
- ]
6356
- };
6357
- } catch (error) {
6358
- return {
6359
- content: [{ type: "text", text: getErrorMessage(error) }],
6360
- isError: true
6361
- };
6362
- }
6363
- })
6364
- );
6365
- server.registerTool(
6366
- "list_sessions",
6367
- {
6368
- title: "List sessions",
6369
- description: "List recent sessions when you need candidates by source/date before deeper inspection. Next step: call get_session for relevant session_id values.",
6370
- inputSchema: {
6371
- source: z.enum(SOURCE_TOOLS).optional(),
6372
- since: z.string().optional().describe("ISO timestamp lower bound (inclusive)"),
6373
- until: z.string().optional().describe("ISO timestamp upper bound (exclusive)"),
6374
- limit: z.number().int().min(1).max(500).optional().default(50)
6375
- },
6376
- annotations: { readOnlyHint: true, idempotentHint: true }
6377
- },
6378
- async (input) => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
6379
- const rows = listSessions(activeBundle, {
6380
- sourceTool: input.source,
6381
- sinceIso: input.since,
6382
- untilIso: input.until,
6383
- limit: input.limit ?? 50
6384
- });
6385
- return {
6386
- content: [{ type: "text", text: JSON.stringify(rows, null, 2) }]
6387
- };
6388
- })
6389
- );
6390
- server.registerTool(
6391
- "get_session",
6392
- {
6393
- title: "Get session detail",
6394
- description: "Open one session and return metadata plus timeline events. Use this after search_sessions, list_sessions, find_touched_files, or list_tool_calls before making evidence-backed claims.",
6395
- inputSchema: {
6396
- session_id: z.string().min(1)
6397
- },
6398
- annotations: { readOnlyHint: true, idempotentHint: true }
6399
- },
6400
- async ({ session_id }) => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
6401
- const detail = getSession(activeBundle, session_id);
6402
- if (!detail) {
6403
- return {
6404
- content: [{ type: "text", text: `session not found: ${session_id}` }],
6405
- isError: true
6406
- };
6407
- }
6408
- return {
6409
- content: [{ type: "text", text: JSON.stringify(detail, null, 2) }]
6410
- };
6411
- })
6412
- );
6413
- server.registerTool(
6414
- "search_sessions",
7261
+ "search",
6415
7262
  {
6416
7263
  title: "Full-text search",
6417
- description: `Search messages, commands, paths, and result previews using the server-selected ${searchEngine} engine. Start here for open-ended questions with 2-5 concrete terms, then call get_session for relevant hits.`,
7264
+ description: `Search messages, commands, paths, diffs, and result previews using the server-selected ${searchEngine} engine. Start here for open-ended questions with 2-5 concrete terms; then call \`sessions\` for relevant hits.`,
6418
7265
  inputSchema: {
6419
7266
  query: z.string().min(1),
7267
+ engine: z.enum(["fts5", "tantivy"]).optional(),
7268
+ field_kind: z.enum(FIELD_KINDS).optional(),
6420
7269
  limit: z.number().int().min(1).max(500).optional().default(50),
6421
- raw: z.boolean().optional().default(false)
7270
+ raw: z.boolean().optional().default(false).describe("Pass query straight to FTS5 MATCH (allows OR/NEAR/prefixes).")
6422
7271
  },
6423
7272
  annotations: { readOnlyHint: true, idempotentHint: true }
6424
7273
  },
6425
- async ({ query, limit, raw }) => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
7274
+ async ({ query, engine, field_kind, limit: limit2, raw }) => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
7275
+ const selectedEngine = engine ?? searchEngine;
6426
7276
  const hits = searchFullText(activeBundle, {
6427
7277
  query,
6428
- limit: limit ?? 50,
7278
+ limit: limit2 ?? 50,
6429
7279
  raw,
6430
- engine: searchEngine
7280
+ engine: selectedEngine
6431
7281
  });
7282
+ const filtered = field_kind ? hits.filter((hit) => hit.field_kind === field_kind) : hits;
6432
7283
  return {
6433
7284
  content: [
6434
7285
  {
6435
7286
  type: "text",
6436
7287
  text: JSON.stringify(
6437
- { query, engine: searchEngine, count: hits.length, hits },
7288
+ {
7289
+ query,
7290
+ engine: selectedEngine,
7291
+ field_kind: field_kind ?? null,
7292
+ count: filtered.length,
7293
+ hits: filtered
7294
+ },
6438
7295
  null,
6439
7296
  2
6440
7297
  )
@@ -6444,125 +7301,157 @@ function registerProsaTools(server, bundle, options = {}) {
6444
7301
  })
6445
7302
  );
6446
7303
  server.registerTool(
6447
- "export_session_markdown",
7304
+ "sessions",
6448
7305
  {
6449
- title: "Export session as Markdown",
6450
- description: "Render a selected session into a readable transcript. Use only after get_session confirms relevance; this can return much more context than snippets.",
7306
+ title: "List or open sessions",
7307
+ description: "Without `session_id`, lists sessions filtered by source/time/limit. With `session_id`, opens that session: `format=detail` (default) returns metadata plus timeline events; `format=summary` returns only the session row; `format=markdown` renders the readable transcript. Call after `search` to get evidence behind a hit.",
6451
7308
  inputSchema: {
6452
- session_id: z.string().min(1)
7309
+ session_id: z.string().min(1).optional(),
7310
+ format: z.enum(["summary", "detail", "markdown"]).optional().default("detail"),
7311
+ source: z.enum(SOURCE_TOOLS).optional(),
7312
+ since: z.string().optional().describe("ISO timestamp lower bound (inclusive)"),
7313
+ until: z.string().optional().describe("ISO timestamp upper bound (exclusive)"),
7314
+ limit: z.number().int().min(1).max(500).optional().default(50)
6453
7315
  },
6454
7316
  annotations: { readOnlyHint: true, idempotentHint: true }
6455
7317
  },
6456
- async ({ session_id }) => withToolBundle(bundle, storePath, ensureStore, async (activeBundle) => {
6457
- try {
6458
- const md = await exportSessionMarkdown(activeBundle, session_id);
6459
- return { content: [{ type: "text", text: md }] };
6460
- } catch (error) {
7318
+ async ({ session_id, format, source, since, until, limit: limit2 }) => withToolBundle(bundle, storePath, ensureStore, async (activeBundle) => {
7319
+ if (!session_id) {
7320
+ const rows = listSessions(activeBundle, {
7321
+ sourceTool: source,
7322
+ sinceIso: since,
7323
+ untilIso: until,
7324
+ limit: limit2 ?? 50
7325
+ });
6461
7326
  return {
6462
- content: [{ type: "text", text: getErrorMessage(error) }],
7327
+ content: [{ type: "text", text: JSON.stringify(rows, null, 2) }]
7328
+ };
7329
+ }
7330
+ if (format === "markdown") {
7331
+ try {
7332
+ const md = await exportSessionMarkdown(activeBundle, session_id);
7333
+ return { content: [{ type: "text", text: md }] };
7334
+ } catch (error) {
7335
+ return {
7336
+ content: [{ type: "text", text: getErrorMessage(error) }],
7337
+ isError: true
7338
+ };
7339
+ }
7340
+ }
7341
+ const detail = getSession(activeBundle, session_id);
7342
+ if (!detail) {
7343
+ return {
7344
+ content: [{ type: "text", text: `session not found: ${session_id}` }],
6463
7345
  isError: true
6464
7346
  };
6465
7347
  }
7348
+ const payload = format === "summary" ? { session: detail.session } : detail;
7349
+ return {
7350
+ content: [{ type: "text", text: JSON.stringify(payload, null, 2) }]
7351
+ };
6466
7352
  })
6467
7353
  );
6468
7354
  server.registerTool(
6469
- "list_tool_calls",
7355
+ "tool_calls",
6470
7356
  {
6471
- title: "List tool calls",
6472
- description: "Audit commands and tool usage by tool name, canonical type, error status, or session. Use this for failed commands, shell history, patches, and operational evidence; then open relevant sessions with get_session.",
7357
+ title: "Audit tool calls and file touches",
7358
+ description: "Audit commands and tool usage. Filter by tool_name, canonical_type, session_id, errors_only, or path_substring. When `path_substring` is set, also surfaces matching artifacts so file-history questions return both invocations and produced files.",
6473
7359
  inputSchema: {
7360
+ session_id: z.string().min(1).optional(),
6474
7361
  tool_name: z.string().optional(),
6475
- canonical_type: z.enum([
6476
- "shell",
6477
- "read_file",
6478
- "write_file",
6479
- "edit_file",
6480
- "search_file",
6481
- "web_search",
6482
- "mcp",
6483
- "subagent",
6484
- "patch",
6485
- "other"
6486
- ]).optional(),
6487
- session_id: z.string().optional(),
7362
+ canonical_type: z.enum(CANONICAL_TOOL_TYPES).optional(),
7363
+ path_substring: z.string().min(1).optional().describe("Filter rows where tool_calls.path or artifacts.path contains this substring."),
6488
7364
  errors_only: z.boolean().optional().default(false),
7365
+ since: z.string().optional().describe("ISO timestamp lower bound (inclusive)"),
7366
+ until: z.string().optional().describe("ISO timestamp upper bound (exclusive)"),
6489
7367
  limit: z.number().int().min(1).max(500).optional().default(100)
6490
7368
  },
6491
7369
  annotations: { readOnlyHint: true, idempotentHint: true }
6492
7370
  },
6493
- async ({ tool_name, canonical_type, session_id, errors_only, limit }) => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
6494
- const conds = [];
6495
- const params = [];
6496
- if (tool_name) {
6497
- conds.push("tc.tool_name = ?");
6498
- params.push(tool_name);
6499
- }
6500
- if (canonical_type) {
6501
- conds.push("tc.canonical_tool_type = ?");
6502
- params.push(canonical_type);
6503
- }
6504
- if (session_id) {
6505
- conds.push("tc.session_id = ?");
6506
- params.push(session_id);
6507
- }
6508
- if (errors_only) {
6509
- conds.push("(tr.is_error = 1 OR tc.status = ?)");
6510
- params.push("error");
6511
- }
6512
- const where = conds.length ? `WHERE ${conds.join(" AND ")}` : "";
6513
- const sql = `
6514
- SELECT tc.tool_call_id, tc.session_id, tc.tool_name, tc.canonical_tool_type,
6515
- tc.command, tc.path, tc.status, tc.timestamp_start,
6516
- tr.is_error, tr.exit_code, tr.preview
6517
- FROM tool_calls tc
6518
- LEFT JOIN tool_results tr ON tr.tool_call_id = tc.tool_call_id
6519
- ${where}
6520
- ORDER BY tc.timestamp_start DESC
6521
- LIMIT ${clampLimit(limit, { max: 500, fallback: 100 })}
6522
- `;
6523
- const rows = activeBundle.db.prepare(sql).all(...params);
7371
+ async (input) => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
7372
+ const rows = listToolCalls(activeBundle, {
7373
+ sessionId: input.session_id,
7374
+ toolName: input.tool_name,
7375
+ canonicalType: input.canonical_type,
7376
+ pathSubstring: input.path_substring,
7377
+ errorsOnly: input.errors_only,
7378
+ sinceIso: input.since,
7379
+ untilIso: input.until,
7380
+ limit: input.limit ?? 100
7381
+ });
6524
7382
  return {
6525
7383
  content: [{ type: "text", text: JSON.stringify(rows, null, 2) }]
6526
7384
  };
6527
7385
  })
6528
7386
  );
6529
7387
  server.registerTool(
6530
- "find_touched_files",
7388
+ "analytics",
6531
7389
  {
6532
- title: "Find sessions that touched a file",
6533
- description: "Find sessions with tool calls or artifacts whose path contains `path_substring`. Start here for file-history questions, then open returned sessions with get_session.",
7390
+ title: "Aggregate analytics reports",
7391
+ description: "Run a built-in aggregation over the bundle: per-session metrics (`sessions`), tool usage rollup (`tools`), error timeline (`errors`), model usage (`models`), or project activity (`projects`). Backed by SQLite views; mirrors the `prosa analytics` CLI.",
6534
7392
  inputSchema: {
6535
- path_substring: z.string().min(1),
6536
- limit: z.number().int().min(1).max(500).optional().default(100)
7393
+ report: z.enum(ANALYTICS_REPORTS),
7394
+ source: z.enum(SOURCE_TOOLS).optional(),
7395
+ since: z.string().optional().describe("ISO timestamp lower bound (inclusive)"),
7396
+ until: z.string().optional().describe("ISO timestamp upper bound (exclusive)"),
7397
+ limit: z.number().int().min(1).max(500).optional().default(50),
7398
+ session_id: z.string().min(1).optional().describe("Drill-down filter (applies to `sessions` report)."),
7399
+ source_path_substring: z.string().min(1).optional().describe("Filter `sessions` rows by imported source file path substring."),
7400
+ project: z.string().min(1).optional().describe("Filter by project id, name, or path substring."),
7401
+ tool_name: z.string().min(1).optional().describe("Filter `tools`/`errors` rows by exact tool name."),
7402
+ canonical_type: z.enum(CANONICAL_TOOL_TYPES).optional().describe("Filter `tools` rows by canonical tool type."),
7403
+ errors_only: z.boolean().optional().describe("`tools` report: only error rows."),
7404
+ category: z.string().min(1).optional().describe("Filter `errors` by category: tool_result|import_error|uncertainty."),
7405
+ model: z.string().min(1).optional().describe("Filter `models` rows by exact model name.")
6537
7406
  },
6538
7407
  annotations: { readOnlyHint: true, idempotentHint: true }
6539
7408
  },
6540
- async ({ path_substring, limit }) => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
6541
- const sql = `
6542
- SELECT tc.session_id, tc.tool_name, tc.canonical_tool_type, tc.path,
6543
- tc.timestamp_start, tc.command
6544
- FROM tool_calls tc
6545
- WHERE tc.path IS NOT NULL AND tc.path LIKE ?
6546
- UNION ALL
6547
- SELECT a.session_id AS session_id, NULL AS tool_name, NULL AS canonical_tool_type,
6548
- a.path, a.created_ts AS timestamp_start, NULL AS command
6549
- FROM artifacts a
6550
- WHERE a.path IS NOT NULL AND a.path LIKE ?
6551
- ORDER BY timestamp_start DESC
6552
- LIMIT ${clampLimit(limit, { max: 500, fallback: 100 })}
6553
- `;
6554
- const like = `%${path_substring}%`;
6555
- const rows = activeBundle.db.prepare(sql).all(like, like);
6556
- return {
6557
- content: [{ type: "text", text: JSON.stringify(rows, null, 2) }]
7409
+ async (input) => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
7410
+ const filters = {
7411
+ source: input.source,
7412
+ since: input.since,
7413
+ until: input.until,
7414
+ limit: input.limit,
7415
+ sessionId: input.session_id,
7416
+ sourcePathSubstring: input.source_path_substring,
7417
+ project: input.project,
7418
+ toolName: input.tool_name,
7419
+ canonicalType: input.canonical_type,
7420
+ errorsOnly: input.errors_only,
7421
+ category: input.category,
7422
+ model: input.model
6558
7423
  };
7424
+ try {
7425
+ const result = runAnalyticsReportFromBundle({
7426
+ bundle: activeBundle,
7427
+ report: input.report,
7428
+ filters
7429
+ });
7430
+ return {
7431
+ content: [
7432
+ {
7433
+ type: "text",
7434
+ text: JSON.stringify(
7435
+ { report: input.report, count: result.rows.length, rows: result.rows },
7436
+ null,
7437
+ 2
7438
+ )
7439
+ }
7440
+ ]
7441
+ };
7442
+ } catch (error) {
7443
+ return {
7444
+ content: [{ type: "text", text: getErrorMessage(error) }],
7445
+ isError: true
7446
+ };
7447
+ }
6559
7448
  })
6560
7449
  );
6561
7450
  server.registerTool(
6562
- "get_artifact",
7451
+ "artifact",
6563
7452
  {
6564
7453
  title: "Get artifact bytes/text",
6565
- description: "Retrieve full text for an artifact_id found in a session or export. Use this for detailed diffs or large tool outputs after identifying the artifact; binary artifacts return a placeholder.",
7454
+ description: "Retrieve full text for an `artifact_id` referenced in a session, search hit, or tool_calls row. Use this when previews are not enough; binary artifacts return a placeholder.",
6566
7455
  inputSchema: {
6567
7456
  artifact_id: z.string().min(1)
6568
7457
  },
@@ -6581,8 +7470,7 @@ function registerProsaTools(server, bundle, options = {}) {
6581
7470
  return { content: [{ type: "text", text: "[no content stored]" }] };
6582
7471
  }
6583
7472
  try {
6584
- const { getText: getText2 } = await Promise.resolve().then(() => (init_cas(), cas_exports));
6585
- const text = await getText2(activeBundle, objectId);
7473
+ const text = await getText(activeBundle, objectId);
6586
7474
  return { content: [{ type: "text", text }] };
6587
7475
  } catch {
6588
7476
  return { content: [{ type: "text", text: `[binary artifact: ${objectId}]` }] };
@@ -6590,18 +7478,89 @@ function registerProsaTools(server, bundle, options = {}) {
6590
7478
  })
6591
7479
  );
6592
7480
  server.registerTool(
6593
- "index_status",
7481
+ "compile",
6594
7482
  {
6595
- title: "Search index status",
6596
- description: "Show whether derived search indexes are ready, stale, missing, building, or failed. Use when search results are unexpectedly empty or when choosing between FTS5 and Tantivy.",
6597
- inputSchema: {},
6598
- annotations: { readOnlyHint: true, idempotentHint: true }
7483
+ title: "Compile sessions or report bundle status",
7484
+ description: "Without input, returns a status snapshot (search index health, last batch, schema version) without mutating anything. With `source`, imports that provider; `sessions_path` may override its default. Pass `overwrite: true` to force a full rebuild of derived indexes (Tantivy from scratch). With neither `source` nor `sessions_path`, only status is returned.",
7485
+ inputSchema: {
7486
+ source: z.enum(SOURCE_TOOLS).optional(),
7487
+ sessions_path: z.string().min(1).optional(),
7488
+ overwrite: z.boolean().optional()
7489
+ },
7490
+ annotations: { readOnlyHint: false, destructiveHint: false, idempotentHint: true }
6599
7491
  },
6600
- async () => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
6601
- const rows = getSearchIndexStatuses(activeBundle);
6602
- return {
6603
- content: [{ type: "text", text: JSON.stringify(rows, null, 2) }]
6604
- };
7492
+ async ({ source, sessions_path, overwrite }) => withToolBundle(bundle, storePath, ensureStore, async (activeBundle) => {
7493
+ if (sessions_path && !source) {
7494
+ return {
7495
+ content: [
7496
+ {
7497
+ type: "text",
7498
+ text: "sessions_path requires source because providers use incompatible source layouts"
7499
+ }
7500
+ ],
7501
+ isError: true
7502
+ };
7503
+ }
7504
+ if (!source && !sessions_path) {
7505
+ return {
7506
+ content: [
7507
+ {
7508
+ type: "text",
7509
+ text: JSON.stringify(
7510
+ { mode: "status", search_index: getSearchIndexStatuses(activeBundle) },
7511
+ null,
7512
+ 2
7513
+ )
7514
+ }
7515
+ ]
7516
+ };
7517
+ }
7518
+ try {
7519
+ const result = await runCompileImports({
7520
+ bundle: activeBundle,
7521
+ providers: source ? [getCompileProvider(source)] : COMPILE_PROVIDERS,
7522
+ sessionsPath: sessions_path,
7523
+ overwrite
7524
+ });
7525
+ const parquet = result.importedAny ? await exportCompileParquet({ storePath }) : null;
7526
+ return {
7527
+ content: [
7528
+ {
7529
+ type: "text",
7530
+ text: JSON.stringify(
7531
+ {
7532
+ mode: "import",
7533
+ providers: result.providers.map((provider) => ({
7534
+ source: provider.source,
7535
+ source_path: provider.sourcePath,
7536
+ batch_id: provider.batchId,
7537
+ counts: provider.counts
7538
+ })),
7539
+ imported_any: result.importedAny,
7540
+ tantivy: result.tantivy ? { indexed_doc_count: result.tantivy.indexedDocCount } : null,
7541
+ tantivy_error: result.tantivyError,
7542
+ fts5_error: result.fts5Error,
7543
+ parquet: parquet ? {
7544
+ out_dir: parquet.outDir,
7545
+ manifest_path: parquet.manifestPath,
7546
+ table_count: parquet.tableCount,
7547
+ files: parquet.files,
7548
+ counts: parquet.counts
7549
+ } : null,
7550
+ search_index: getSearchIndexStatuses(activeBundle)
7551
+ },
7552
+ null,
7553
+ 2
7554
+ )
7555
+ }
7556
+ ]
7557
+ };
7558
+ } catch (error) {
7559
+ return {
7560
+ content: [{ type: "text", text: getErrorMessage(error) }],
7561
+ isError: true
7562
+ };
7563
+ }
6605
7564
  })
6606
7565
  );
6607
7566
  }
@@ -6648,14 +7607,14 @@ function registerProsaPrompts(server) {
6648
7607
  path: z.string().min(1).describe("File path, directory, or distinctive path suffix")
6649
7608
  }
6650
7609
  },
6651
- ({ path: path20 }) => ({
7610
+ ({ path: path21 }) => ({
6652
7611
  description: "Find sessions that touched a path and summarize the evidence.",
6653
7612
  messages: [
6654
7613
  {
6655
7614
  role: "user",
6656
7615
  content: {
6657
7616
  type: "text",
6658
- text: FIND_FILE_HISTORY_PROMPT.replace("{{path}}", path20)
7617
+ text: FIND_FILE_HISTORY_PROMPT.replace("{{path}}", path21)
6659
7618
  }
6660
7619
  }
6661
7620
  ]
@@ -6825,26 +7784,11 @@ function writeError(res, error) {
6825
7784
  );
6826
7785
  }
6827
7786
 
6828
- // src/cli/parsers.ts
6829
- function parseSearchEngine(value) {
6830
- if (value === "fts5" || value === "tantivy") return value;
6831
- throw new Error(`invalid search engine: ${value} (expected fts5 or tantivy)`);
6832
- }
6833
- function parseMcpTransport(value) {
6834
- if (value === "stdio" || value === "http") return value;
6835
- throw new Error(`invalid transport: ${value} (expected stdio or http)`);
6836
- }
6837
- function parseSourceTool(value) {
6838
- if (value === void 0) return void 0;
6839
- if (SOURCE_TOOLS.includes(value)) return value;
6840
- throw new Error(`invalid source tool: ${value} (expected one of ${SOURCE_TOOLS.join(", ")})`);
6841
- }
6842
-
6843
7787
  // src/cli/commands/mcp.ts
6844
7788
  function mcpCommand() {
6845
- const serve = new Command5("serve").description("Start a local MCP server over the prosa bundle.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--transport <transport>", "MCP transport: stdio|http", "stdio").option("--host <host>", "bind host", "127.0.0.1").option("--port <port>", "bind port", "7331").option("--path <path>", "HTTP path", "/mcp").option("--search-engine <engine>", "search engine: fts5|tantivy", "fts5").action(
7789
+ const serve = new Command6("serve").description("Start a local MCP server over the prosa bundle.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--transport <transport>", "MCP transport: stdio|http", "stdio").option("--host <host>", "bind host", "127.0.0.1").option("--port <port>", "bind port", "7331").option("--path <path>", "HTTP path", "/mcp").option("--search-engine <engine>", "search engine: fts5|tantivy", "fts5").action(
6846
7790
  async (options) => {
6847
- const storePath = path18.resolve(options.store);
7791
+ const storePath = path19.resolve(options.store);
6848
7792
  const bundle = await openOrInitBundle(storePath);
6849
7793
  try {
6850
7794
  const transport = parseMcpTransport(options.transport);
@@ -6875,7 +7819,7 @@ function mcpCommand() {
6875
7819
  }
6876
7820
  }
6877
7821
  );
6878
- return new Command5("mcp").description("MCP server commands.").addCommand(serve);
7822
+ return new Command6("mcp").description("MCP server commands.").addCommand(serve);
6879
7823
  }
6880
7824
  function registerShutdown(closeServer, bundle) {
6881
7825
  const shutdown = async () => {
@@ -6892,13 +7836,13 @@ function registerShutdown(closeServer, bundle) {
6892
7836
  }
6893
7837
 
6894
7838
  // src/cli/commands/query.ts
6895
- import path19 from "path";
6896
- import { Command as Command6 } from "commander";
7839
+ import path20 from "path";
7840
+ import { Command as Command7 } from "commander";
6897
7841
  function queryCommand() {
6898
- const duckdb = new Command6("duckdb").description("Run a DuckDB SQL query over exported Parquet tables.").argument("<sql>", "DuckDB SQL query").option("--store <path>", "bundle directory", defaultBundlePath()).option("--parquet-dir <path>", "Parquet directory (default: <store>/parquet)").option("--output-format <fmt>", "interactive|table|json|csv", "table").action(
7842
+ const duckdb = new Command7("duckdb").description("Run a DuckDB SQL query over exported Parquet tables.").argument("<sql>", "DuckDB SQL query").option("--store <path>", "bundle directory", defaultBundlePath()).option("--parquet-dir <path>", "Parquet directory (default: <store>/parquet)").option("--output-format <fmt>", "interactive|table|json|csv", "table").action(
6899
7843
  async (sql, options) => {
6900
7844
  const format = parseOutputFormat(options.outputFormat, "table");
6901
- const parquetDir = options.parquetDir ? path19.resolve(options.parquetDir) : await withBundle(options.store, (bundle) => bundle.paths.parquet);
7845
+ const parquetDir = options.parquetDir ? path20.resolve(options.parquetDir) : await withBundle(options.store, (bundle) => bundle.paths.parquet);
6902
7846
  const result = await queryDuckDbParquet({ parquetDir, sql });
6903
7847
  printRows(result.rows, {
6904
7848
  format,
@@ -6907,14 +7851,14 @@ function queryCommand() {
6907
7851
  });
6908
7852
  }
6909
7853
  );
6910
- return new Command6("query").description("Run derived analytical queries.").addCommand(duckdb);
7854
+ return new Command7("query").description("Run derived analytical queries.").addCommand(duckdb);
6911
7855
  }
6912
7856
 
6913
7857
  // src/cli/commands/search.ts
6914
- import { Command as Command7 } from "commander";
7858
+ import { Command as Command8 } from "commander";
6915
7859
  init_search();
6916
7860
  function searchCommand() {
6917
- return new Command7("search").description("Full-text search across messages, tool calls and tool outputs.").argument("<query>", "FTS5 query string (supports MATCH syntax)").option("--store <path>", "bundle directory", defaultBundlePath()).option("--limit <n>", "maximum hits", "50").option("--engine <engine>", "search engine: fts5|tantivy", "fts5").option("--output-format <fmt>", "interactive|table|json|csv", "table").action(
7861
+ return new Command8("search").description("Full-text search across messages, tool calls and tool outputs.").argument("<query>", "FTS5 query string (supports MATCH syntax)").option("--store <path>", "bundle directory", defaultBundlePath()).option("--limit <n>", "maximum hits", "50").option("--engine <engine>", "search engine: fts5|tantivy", "fts5").option("--output-format <fmt>", "interactive|table|json|csv", "table").action(
6918
7862
  async (query, options) => {
6919
7863
  const engine = parseSearchEngine(options.engine);
6920
7864
  const format = parseOutputFormat(options.outputFormat, "table");
@@ -6935,10 +7879,10 @@ function searchCommand() {
6935
7879
  }
6936
7880
 
6937
7881
  // src/cli/commands/sessions.ts
6938
- import { Command as Command8 } from "commander";
7882
+ import { Command as Command9 } from "commander";
6939
7883
  init_sessions();
6940
7884
  function sessionsCommand() {
6941
- const command = new Command8("sessions").description("List sessions in the bundle, with filters.").enablePositionalOptions().option("--store <path>", "bundle directory", defaultBundlePath()).option("--source <tool>", "filter by source tool: cursor|codex|claude|gemini").option("--since <iso>", "sessions starting on/after this ISO timestamp").option("--until <iso>", "sessions starting before this ISO timestamp").option("--limit <n>", "maximum rows", "50").option("--output-format <fmt>", "interactive|table|json|csv", "table").action(
7885
+ const command = new Command9("sessions").description("List sessions in the bundle, with filters.").enablePositionalOptions().option("--store <path>", "bundle directory", defaultBundlePath()).option("--source <tool>", "filter by source tool: cursor|codex|claude|gemini").option("--since <iso>", "sessions starting on/after this ISO timestamp").option("--until <iso>", "sessions starting before this ISO timestamp").option("--limit <n>", "maximum rows", "50").option("--output-format <fmt>", "interactive|table|json|csv", "table").action(
6942
7886
  async (options) => {
6943
7887
  const format = parseOutputFormat(options.outputFormat, "table");
6944
7888
  await withBundle(options.store, (bundle) => {
@@ -6965,7 +7909,7 @@ function sessionsCommand() {
6965
7909
  }
6966
7910
  );
6967
7911
  command.addCommand(
6968
- new Command8("count").description("Count sessions in the bundle, with filters.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--source <tool>", "filter by source tool: cursor|codex|claude|gemini").option("--since <iso>", "sessions starting on/after this ISO timestamp").option("--until <iso>", "sessions starting before this ISO timestamp").action(
7912
+ new Command9("count").description("Count sessions in the bundle, with filters.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--source <tool>", "filter by source tool: cursor|codex|claude|gemini").option("--since <iso>", "sessions starting on/after this ISO timestamp").option("--until <iso>", "sessions starting before this ISO timestamp").action(
6969
7913
  async (options) => {
6970
7914
  await withBundle(options.store, (bundle) => {
6971
7915
  const count = countSessions(bundle, {
@@ -6983,9 +7927,9 @@ function sessionsCommand() {
6983
7927
  }
6984
7928
 
6985
7929
  // src/cli/commands/tui.ts
6986
- import { Command as Command9 } from "commander";
7930
+ import { Command as Command10 } from "commander";
6987
7931
  function tuiCommand() {
6988
- return new Command9("tui").description("Open the interactive Ink-based explorer.").option("--store <path>", "bundle directory", defaultBundlePath()).action(async (options) => {
7932
+ return new Command10("tui").description("Open the interactive Ink-based explorer.").option("--store <path>", "bundle directory", defaultBundlePath()).action(async (options) => {
6989
7933
  const [{ render }, React, { App: App2 }] = await Promise.all([
6990
7934
  import("ink"),
6991
7935
  import("react"),
@@ -7000,8 +7944,14 @@ function tuiCommand() {
7000
7944
  }
7001
7945
 
7002
7946
  // src/cli/main.ts
7947
+ function stripLeadingDoubleDash(argv) {
7948
+ if (argv.length >= 3 && argv[2] === "--") {
7949
+ return [argv[0], argv[1], ...argv.slice(3)];
7950
+ }
7951
+ return [...argv];
7952
+ }
7003
7953
  async function runCli(argv) {
7004
- const program = new Command10().name("prosa").enablePositionalOptions().description(
7954
+ const program = new Command11().name("prosa").enablePositionalOptions().description(
7005
7955
  "Compile, search and export local agent session histories\n(Cursor, Codex CLI, Claude Code, Gemini CLI) into one canonical store."
7006
7956
  ).version(PROSA_PARSER_VERSION, "-v, --version");
7007
7957
  program.addCommand(initCommand());
@@ -7012,9 +7962,10 @@ async function runCli(argv) {
7012
7962
  program.addCommand(searchCommand());
7013
7963
  program.addCommand(exportCommand());
7014
7964
  program.addCommand(queryCommand());
7965
+ program.addCommand(analyticsCommand());
7015
7966
  program.addCommand(mcpCommand());
7016
7967
  program.addCommand(tuiCommand());
7017
- await program.parseAsync([...argv]);
7968
+ await program.parseAsync(stripLeadingDoubleDash(argv));
7018
7969
  }
7019
7970
  var isEntry = import.meta.url === `file://${process.argv[1]}`;
7020
7971
  if (isEntry) {