@c3-oss/prosa 0.3.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bin/prosa.js CHANGED
@@ -12,8 +12,8 @@ var __export = (target, all) => {
12
12
 
13
13
  // src/core/db.ts
14
14
  import Database from "better-sqlite3";
15
- function openDb(path20) {
16
- const db = new Database(path20);
15
+ function openDb(path21) {
16
+ const db = new Database(path21);
17
17
  db.pragma("journal_mode = WAL");
18
18
  db.pragma("foreign_keys = ON");
19
19
  db.pragma("synchronous = NORMAL");
@@ -48,283 +48,30 @@ var init_db = __esm({
48
48
  }
49
49
  });
50
50
 
51
- // src/core/errors.ts
52
- var getErrorMessage;
53
- var init_errors = __esm({
54
- "src/core/errors.ts"() {
55
- "use strict";
56
- getErrorMessage = (err) => err instanceof Error ? err.message : String(err);
57
- }
58
- });
59
-
60
- // src/core/cas/compress.ts
61
- import { compress as zstdCompress, decompress as zstdDecompress } from "zstd-napi";
62
- function compressBytes(input) {
63
- if (input.byteLength < COMPRESS_THRESHOLD_BYTES) {
64
- return { bytes: Buffer.from(input), compression: "none" };
65
- }
66
- const out = zstdCompress(Buffer.from(input), { compressionLevel: ZSTD_LEVEL });
67
- return { bytes: out, compression: "zstd" };
68
- }
69
- function decompressBytes(input, compression) {
70
- if (compression === "none") return input;
71
- return zstdDecompress(input);
72
- }
73
- var COMPRESS_THRESHOLD_BYTES, ZSTD_LEVEL;
74
- var init_compress = __esm({
75
- "src/core/cas/compress.ts"() {
76
- "use strict";
77
- COMPRESS_THRESHOLD_BYTES = 256;
78
- ZSTD_LEVEL = 3;
79
- }
80
- });
81
-
82
- // src/core/cas/hash.ts
83
- import { createHash } from "crypto";
84
- import { blake3 } from "@noble/hashes/blake3";
85
- import { bytesToHex } from "@noble/hashes/utils";
86
- function blake3Hex(bytes) {
87
- return bytesToHex(blake3(bytes));
88
- }
89
- function sha256Hex(bytes) {
90
- return createHash("sha256").update(bytes).digest("hex");
91
- }
92
- function objectIdFromHash(hashHex) {
93
- return `blake3:${hashHex}`;
94
- }
95
- function objectStoragePath(hashHex, compression) {
96
- const ext = compression === "zstd" ? ".zst" : ".bin";
97
- const a = hashHex.slice(0, 2);
98
- const b = hashHex.slice(2, 4);
99
- return `objects/blake3/${a}/${b}/${hashHex}${ext}`;
51
+ // src/core/limits.ts
52
+ function clampLimit(value, opts) {
53
+ return Math.max(opts.min ?? 1, Math.min(opts.max, value ?? opts.fallback));
100
54
  }
101
- var init_hash = __esm({
102
- "src/core/cas/hash.ts"() {
55
+ var init_limits = __esm({
56
+ "src/core/limits.ts"() {
103
57
  "use strict";
104
58
  }
105
59
  });
106
60
 
107
- // src/core/cas/index.ts
108
- var cas_exports = {};
109
- __export(cas_exports, {
110
- createPendingObjects: () => createPendingObjects,
111
- ensureDir: () => ensureDir,
112
- flushPendingObjects: () => flushPendingObjects,
113
- getBytes: () => getBytes,
114
- getJson: () => getJson,
115
- getObjectMeta: () => getObjectMeta,
116
- getText: () => getText,
117
- putBytes: () => putBytes,
118
- putJson: () => putJson,
119
- putText: () => putText,
120
- stageBytes: () => stageBytes,
121
- stageJson: () => stageJson,
122
- stageText: () => stageText
123
- });
124
- import { mkdir as mkdir2, readFile as readFile2, writeFile as writeFile2 } from "fs/promises";
125
- import path2 from "path";
126
- async function ensureDir(absoluteDir) {
127
- if (ensuredDirs.has(absoluteDir)) return;
128
- await mkdir2(absoluteDir, { recursive: true });
129
- ensuredDirs.add(absoluteDir);
130
- }
131
- async function putBytes(bundle, bytes, options = {}) {
132
- const hash = blake3Hex(bytes);
133
- const objectId = objectIdFromHash(hash);
134
- const existing = prepare(
135
- bundle.db,
136
- `SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
137
- compression, mime_type, encoding, storage_path, created_at
138
- FROM objects WHERE object_id = ?`
139
- ).get(objectId);
140
- if (existing) return objectId;
141
- const { bytes: stored, compression } = compressBytes(bytes);
142
- const storagePath = objectStoragePath(hash, compression);
143
- const absolutePath = path2.join(bundle.path, storagePath);
144
- await ensureDir(path2.dirname(absolutePath));
145
- await writeFile2(absolutePath, stored);
146
- prepare(
147
- bundle.db,
148
- `INSERT INTO objects (
149
- object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
150
- compression, mime_type, encoding, storage_path, created_at
151
- ) VALUES (?, 'blake3', ?, ?, ?, ?, ?, ?, ?, ?)`
152
- ).run(
153
- objectId,
154
- hash,
155
- bytes.byteLength,
156
- compression === "zstd" ? stored.byteLength : null,
157
- compression,
158
- options.mimeType ?? null,
159
- options.encoding ?? null,
160
- storagePath,
161
- (/* @__PURE__ */ new Date()).toISOString()
162
- );
163
- return objectId;
164
- }
165
- async function putText(bundle, text, options = {}) {
166
- const buf = Buffer.from(text, "utf8");
167
- return putBytes(bundle, buf, {
168
- mimeType: options.mimeType ?? "text/plain; charset=utf-8",
169
- encoding: "utf-8"
170
- });
171
- }
172
- async function putJson(bundle, value) {
173
- const text = JSON.stringify(value);
174
- return putBytes(bundle, Buffer.from(text, "utf8"), {
175
- mimeType: "application/json",
176
- encoding: "utf-8"
177
- });
178
- }
179
- async function getBytes(bundle, objectId) {
180
- const meta = prepare(
181
- bundle.db,
182
- `SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
183
- compression, mime_type, encoding, storage_path, created_at
184
- FROM objects WHERE object_id = ?`
185
- ).get(objectId);
186
- if (!meta) {
187
- throw new Error(`object not found: ${objectId}`);
188
- }
189
- const buf = await readFile2(path2.join(bundle.path, meta.storage_path));
190
- return decompressBytes(buf, meta.compression);
191
- }
192
- async function getText(bundle, objectId) {
193
- const buf = await getBytes(bundle, objectId);
194
- return buf.toString("utf8");
195
- }
196
- async function getJson(bundle, objectId) {
197
- const text = await getText(bundle, objectId);
198
- return JSON.parse(text);
199
- }
200
- function getObjectMeta(bundle, objectId) {
201
- return prepare(
202
- bundle.db,
203
- `SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
204
- compression, mime_type, encoding, storage_path, created_at
205
- FROM objects WHERE object_id = ?`
206
- ).get(objectId) ?? null;
207
- }
208
- function createPendingObjects() {
209
- return { byId: /* @__PURE__ */ new Map() };
210
- }
211
- function stageBytes(pending, bytes, options = {}) {
212
- const buf = Buffer.isBuffer(bytes) ? bytes : Buffer.from(bytes);
213
- const hash = blake3Hex(buf);
214
- const objectId = objectIdFromHash(hash);
215
- if (!pending.byId.has(objectId)) {
216
- pending.byId.set(objectId, {
217
- objectId,
218
- hash,
219
- bytes: buf,
220
- mimeType: options.mimeType ?? null,
221
- encoding: options.encoding ?? null
222
- });
223
- }
224
- return objectId;
225
- }
226
- function stageText(pending, text, options = {}) {
227
- return stageBytes(pending, Buffer.from(text, "utf8"), {
228
- mimeType: options.mimeType ?? "text/plain; charset=utf-8",
229
- encoding: "utf-8"
230
- });
231
- }
232
- function stageJson(pending, value) {
233
- return stageBytes(pending, Buffer.from(JSON.stringify(value), "utf8"), {
234
- mimeType: "application/json",
235
- encoding: "utf-8"
236
- });
237
- }
238
- async function flushPendingObjects(bundle, pending) {
239
- if (pending.byId.size === 0) return;
240
- const ids = [...pending.byId.keys()];
241
- const existingIds = queryExistingObjectIds(bundle, ids);
242
- const toWrite = [];
243
- for (const obj of pending.byId.values()) {
244
- if (existingIds.has(obj.objectId)) continue;
245
- const { bytes: compressedBytes, compression } = compressBytes(obj.bytes);
246
- const storagePath = objectStoragePath(obj.hash, compression);
247
- toWrite.push({
248
- staged: obj,
249
- compression,
250
- compressedBytes,
251
- storagePath,
252
- absolutePath: path2.join(bundle.path, storagePath)
253
- });
254
- }
255
- if (toWrite.length > 0) {
256
- await writeFilesParallel(toWrite);
257
- }
258
- const insertObject = prepare(
259
- bundle.db,
260
- `INSERT OR IGNORE INTO objects (
261
- object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
262
- compression, mime_type, encoding, storage_path, created_at
263
- ) VALUES (?, 'blake3', ?, ?, ?, ?, ?, ?, ?, ?)`
264
- );
265
- const now = (/* @__PURE__ */ new Date()).toISOString();
266
- for (const p of toWrite) {
267
- insertObject.run(
268
- p.staged.objectId,
269
- p.staged.hash,
270
- p.staged.bytes.byteLength,
271
- p.compression === "zstd" ? p.compressedBytes.byteLength : null,
272
- p.compression,
273
- p.staged.mimeType,
274
- p.staged.encoding,
275
- p.storagePath,
276
- now
277
- );
278
- }
279
- }
280
- function queryExistingObjectIds(bundle, ids) {
281
- const found = /* @__PURE__ */ new Set();
282
- if (ids.length === 0) return found;
283
- const CHUNK = 500;
284
- for (let start = 0; start < ids.length; start += CHUNK) {
285
- const slice = ids.slice(start, start + CHUNK);
286
- const placeholders = slice.map(() => "?").join(",");
287
- const rows = bundle.db.prepare(
288
- `SELECT object_id FROM objects WHERE object_id IN (${placeholders})`
289
- ).all(...slice);
290
- for (const row of rows) found.add(row.object_id);
291
- }
292
- return found;
293
- }
294
- async function writeFilesParallel(tasks) {
295
- let cursor = 0;
296
- const workers = [];
297
- const limit = Math.min(FS_WRITE_CONCURRENCY, tasks.length);
298
- for (let w = 0; w < limit; w++) {
299
- workers.push(
300
- (async () => {
301
- while (true) {
302
- const i = cursor++;
303
- if (i >= tasks.length) return;
304
- const task = tasks[i];
305
- await ensureDir(path2.dirname(task.absolutePath));
306
- await writeFile2(task.absolutePath, task.compressedBytes);
307
- }
308
- })()
309
- );
310
- }
311
- await Promise.all(workers);
312
- }
313
- var ensuredDirs, FS_WRITE_CONCURRENCY;
314
- var init_cas = __esm({
315
- "src/core/cas/index.ts"() {
61
+ // src/core/errors.ts
62
+ var getErrorMessage;
63
+ var init_errors = __esm({
64
+ "src/core/errors.ts"() {
316
65
  "use strict";
317
- init_db();
318
- init_compress();
319
- init_hash();
320
- ensuredDirs = /* @__PURE__ */ new Set();
321
- FS_WRITE_CONCURRENCY = 16;
66
+ getErrorMessage = (err) => err instanceof Error ? err.message : String(err);
322
67
  }
323
68
  });
324
69
 
325
70
  // src/services/indexing.ts
71
+ import { createHash as createHash2 } from "crypto";
72
+ import { existsSync } from "fs";
326
73
  import { mkdir as mkdir4, rm as rm2, writeFile as writeFile5 } from "fs/promises";
327
- import path13 from "path";
74
+ import path15 from "path";
328
75
  function enableFts5Triggers(bundle) {
329
76
  bundle.db.exec(FTS5_TRIGGER_SQL);
330
77
  }
@@ -338,7 +85,7 @@ function disableFts5Triggers(bundle) {
338
85
  function getSearchIndexStatuses(bundle) {
339
86
  ensureSearchIndexStatusRows(bundle);
340
87
  return bundle.db.prepare(
341
- `SELECT engine, status, source_doc_count, indexed_doc_count, updated_at, error_message
88
+ `SELECT ${SEARCH_INDEX_STATUS_COLUMNS}
342
89
  FROM search_index_status
343
90
  ORDER BY engine`
344
91
  ).all();
@@ -346,28 +93,13 @@ function getSearchIndexStatuses(bundle) {
346
93
  function getSearchIndexStatus(bundle, engine) {
347
94
  ensureSearchIndexStatusRows(bundle);
348
95
  return bundle.db.prepare(
349
- `SELECT engine, status, source_doc_count, indexed_doc_count, updated_at, error_message
96
+ `SELECT ${SEARCH_INDEX_STATUS_COLUMNS}
350
97
  FROM search_index_status
351
98
  WHERE engine = ?`
352
99
  ).get(engine) ?? null;
353
100
  }
354
101
  function markIndexesAfterImport(bundle, options) {
355
102
  if (!options.changed) return;
356
- if (options.fts5Deferred) {
357
- updateSearchIndexStatus(bundle, "fts5", {
358
- status: "stale",
359
- sourceDocCount: countSearchDocs(bundle),
360
- indexedDocCount: countFts5Docs(bundle),
361
- errorMessage: null
362
- });
363
- } else {
364
- updateSearchIndexStatus(bundle, "fts5", {
365
- status: "ready",
366
- sourceDocCount: countSearchDocs(bundle),
367
- indexedDocCount: countFts5Docs(bundle),
368
- errorMessage: null
369
- });
370
- }
371
103
  const tantivy = getSearchIndexStatus(bundle, "tantivy");
372
104
  if (tantivy?.status === "ready" || tantivy?.status === "stale" || tantivy?.status === "failed") {
373
105
  updateSearchIndexStatus(bundle, "tantivy", {
@@ -408,55 +140,93 @@ function rebuildFts5Index(bundle) {
408
140
  }
409
141
  return getSearchIndexStatus(bundle, "fts5");
410
142
  }
411
- async function rebuildTantivyIndex(bundle) {
143
+ function buildTantivySchema(tantivy) {
144
+ const builder = new tantivy.SchemaBuilder();
145
+ for (const field of TANTIVY_SCHEMA_FIELDS) {
146
+ if (field.tokenizer === "default") {
147
+ builder.addTextField(field.name, { stored: true });
148
+ } else {
149
+ builder.addTextField(field.name, { stored: true, tokenizerName: field.tokenizer });
150
+ }
151
+ }
152
+ return builder.build();
153
+ }
154
+ function computeSchemaFingerprint() {
155
+ const canonical = TANTIVY_SCHEMA_FIELDS.map((f) => `${f.name}:${f.tokenizer}:stored`).join("|");
156
+ return createHash2("sha256").update(canonical).digest("hex");
157
+ }
158
+ function tantivyIndexLooksValid(dir) {
159
+ return existsSync(path15.join(dir, "meta.json"));
160
+ }
161
+ function makeTantivyDoc(tantivy, row) {
162
+ const doc = new tantivy.Document();
163
+ doc.addText("doc_id", row.doc_id);
164
+ doc.addText("entity_type", row.entity_type);
165
+ doc.addText("entity_id", row.entity_id);
166
+ doc.addText("session_id", row.session_id ?? "");
167
+ doc.addText("project_id", row.project_id ?? "");
168
+ doc.addText("timestamp", row.timestamp ?? "");
169
+ doc.addText("role", row.role ?? "");
170
+ doc.addText("tool_name", row.tool_name ?? "");
171
+ doc.addText("canonical_tool_type", row.canonical_tool_type ?? "");
172
+ doc.addText("field_kind", row.field_kind);
173
+ doc.addText("text", row.text);
174
+ return doc;
175
+ }
176
+ async function rebuildTantivyIndex(bundle, options = {}) {
412
177
  ensureSearchIndexStatusRows(bundle);
178
+ const sourceDocCount = countSearchDocs(bundle);
179
+ const prev = getSearchIndexStatus(bundle, "tantivy");
180
+ const fingerprint = computeSchemaFingerprint();
181
+ const indexDirValid = tantivyIndexLooksValid(bundle.paths.tantivy);
182
+ const fingerprintMatches = prev?.schema_fingerprint === fingerprint;
183
+ const lastIndexedRowid = typeof prev?.last_indexed_rowid === "number" ? prev.last_indexed_rowid : 0;
184
+ const wantFullRebuild = options.overwrite === true || !indexDirValid || !fingerprintMatches || lastIndexedRowid <= 0;
413
185
  updateSearchIndexStatus(bundle, "tantivy", {
414
186
  status: "building",
415
- sourceDocCount: countSearchDocs(bundle),
187
+ sourceDocCount,
416
188
  indexedDocCount: 0,
417
189
  errorMessage: null
418
190
  });
419
191
  try {
420
192
  const tantivy = await import("@oxdev03/node-tantivy-binding");
421
- const schema = new tantivy.SchemaBuilder().addTextField("doc_id", { stored: true, tokenizerName: "raw" }).addTextField("entity_type", { stored: true, tokenizerName: "raw" }).addTextField("entity_id", { stored: true, tokenizerName: "raw" }).addTextField("session_id", { stored: true, tokenizerName: "raw" }).addTextField("project_id", { stored: true, tokenizerName: "raw" }).addTextField("timestamp", { stored: true, tokenizerName: "raw" }).addTextField("role", { stored: true, tokenizerName: "raw" }).addTextField("tool_name", { stored: true, tokenizerName: "raw" }).addTextField("canonical_tool_type", { stored: true, tokenizerName: "raw" }).addTextField("field_kind", { stored: true, tokenizerName: "raw" }).addTextField("text", { stored: true }).build();
422
- await rm2(bundle.paths.tantivy, { recursive: true, force: true });
423
- await mkdir4(bundle.paths.tantivy, { recursive: true });
424
- const index = new tantivy.Index(schema, bundle.paths.tantivy, false);
425
- const writer = index.writer(5e7, 1);
426
- let indexedDocCount = 0;
427
- const rows = bundle.db.prepare(
428
- `SELECT rowid, doc_id, entity_type, entity_id, session_id, project_id, timestamp,
429
- role, tool_name, canonical_tool_type, field_kind, text
430
- FROM search_docs
431
- ORDER BY rowid`
432
- ).iterate();
433
- for (const row of rows) {
434
- const doc = new tantivy.Document();
435
- doc.addText("doc_id", row.doc_id);
436
- doc.addText("entity_type", row.entity_type);
437
- doc.addText("entity_id", row.entity_id);
438
- doc.addText("session_id", row.session_id ?? "");
439
- doc.addText("project_id", row.project_id ?? "");
440
- doc.addText("timestamp", row.timestamp ?? "");
441
- doc.addText("role", row.role ?? "");
442
- doc.addText("tool_name", row.tool_name ?? "");
443
- doc.addText("canonical_tool_type", row.canonical_tool_type ?? "");
444
- doc.addText("field_kind", row.field_kind);
445
- doc.addText("text", row.text);
446
- writer.addDocument(doc);
447
- indexedDocCount++;
193
+ const schema = buildTantivySchema(tantivy);
194
+ let index;
195
+ if (wantFullRebuild) {
196
+ await rm2(bundle.paths.tantivy, { recursive: true, force: true });
197
+ await mkdir4(bundle.paths.tantivy, { recursive: true });
198
+ index = new tantivy.Index(schema, bundle.paths.tantivy, false);
199
+ } else {
200
+ index = tantivy.Index.open(bundle.paths.tantivy);
201
+ }
202
+ const writer = index.writer(3e8, 4);
203
+ const select = wantFullRebuild ? `${SEARCH_DOCS_SELECT} ORDER BY rowid` : `${SEARCH_DOCS_SELECT} WHERE rowid > ${lastIndexedRowid} ORDER BY rowid`;
204
+ let addedDocCount = 0;
205
+ let maxRowid = wantFullRebuild ? 0 : lastIndexedRowid;
206
+ for (const row of bundle.db.prepare(select).iterate()) {
207
+ if (!wantFullRebuild) {
208
+ writer.deleteDocumentsByTerm("doc_id", row.doc_id);
209
+ }
210
+ writer.addDocument(makeTantivyDoc(tantivy, row));
211
+ addedDocCount++;
212
+ if (row.rowid > maxRowid) maxRowid = row.rowid;
448
213
  }
449
214
  writer.commit();
450
215
  index.reload();
216
+ writer.waitMergingThreads();
217
+ const indexedDocCount = wantFullRebuild ? addedDocCount : countTantivyDocsBest(prev, addedDocCount);
451
218
  await writeFile5(
452
- path13.join(bundle.paths.tantivy, "prosa-index.json"),
219
+ path15.join(bundle.paths.tantivy, "prosa-index.json"),
453
220
  `${JSON.stringify(
454
221
  {
455
222
  engine: "tantivy",
456
223
  source: "search_docs",
457
224
  built_at: (/* @__PURE__ */ new Date()).toISOString(),
458
- source_doc_count: countSearchDocs(bundle),
459
- indexed_doc_count: indexedDocCount
225
+ mode: wantFullRebuild ? "full" : "incremental",
226
+ source_doc_count: sourceDocCount,
227
+ indexed_doc_count: indexedDocCount,
228
+ last_indexed_rowid: maxRowid,
229
+ schema_fingerprint: fingerprint
460
230
  },
461
231
  null,
462
232
  2
@@ -466,14 +236,16 @@ async function rebuildTantivyIndex(bundle) {
466
236
  );
467
237
  updateSearchIndexStatus(bundle, "tantivy", {
468
238
  status: "ready",
469
- sourceDocCount: countSearchDocs(bundle),
239
+ sourceDocCount,
470
240
  indexedDocCount,
471
- errorMessage: null
241
+ errorMessage: null,
242
+ lastIndexedRowid: maxRowid,
243
+ schemaFingerprint: fingerprint
472
244
  });
473
245
  } catch (error) {
474
246
  updateSearchIndexStatus(bundle, "tantivy", {
475
247
  status: "failed",
476
- sourceDocCount: countSearchDocs(bundle),
248
+ sourceDocCount,
477
249
  indexedDocCount: 0,
478
250
  errorMessage: getErrorMessage(error)
479
251
  });
@@ -481,36 +253,53 @@ async function rebuildTantivyIndex(bundle) {
481
253
  }
482
254
  return getSearchIndexStatus(bundle, "tantivy");
483
255
  }
256
+ function countTantivyDocsBest(prev, added) {
257
+ if (prev && typeof prev.indexed_doc_count === "number") {
258
+ return prev.indexed_doc_count + added;
259
+ }
260
+ return added;
261
+ }
484
262
  function ensureSearchIndexStatusRows(bundle) {
485
263
  const now = (/* @__PURE__ */ new Date()).toISOString();
486
264
  const stmt = prepare(
487
265
  bundle.db,
488
266
  `INSERT OR IGNORE INTO search_index_status (
489
- engine, status, source_doc_count, indexed_doc_count, updated_at, error_message
490
- ) VALUES (?, ?, 0, 0, ?, NULL)`
267
+ engine, status, source_doc_count, indexed_doc_count, updated_at,
268
+ error_message, last_indexed_rowid, schema_fingerprint
269
+ ) VALUES (?, ?, 0, 0, ?, NULL, NULL, NULL)`
491
270
  );
492
271
  stmt.run("fts5", "ready", now);
493
272
  stmt.run("tantivy", "missing", now);
494
273
  }
495
274
  function updateSearchIndexStatus(bundle, engine, values) {
496
275
  ensureSearchIndexStatusRows(bundle);
497
- prepare(
498
- bundle.db,
499
- `UPDATE search_index_status
500
- SET status = ?,
501
- source_doc_count = ?,
502
- indexed_doc_count = ?,
503
- updated_at = ?,
504
- error_message = ?
505
- WHERE engine = ?`
506
- ).run(
276
+ const setClauses = [
277
+ "status = ?",
278
+ "source_doc_count = ?",
279
+ "indexed_doc_count = ?",
280
+ "updated_at = ?",
281
+ "error_message = ?"
282
+ ];
283
+ const params = [
507
284
  values.status,
508
285
  values.sourceDocCount,
509
286
  values.indexedDocCount,
510
287
  (/* @__PURE__ */ new Date()).toISOString(),
511
- values.errorMessage,
512
- engine
513
- );
288
+ values.errorMessage
289
+ ];
290
+ if (values.lastIndexedRowid !== void 0) {
291
+ setClauses.push("last_indexed_rowid = ?");
292
+ params.push(values.lastIndexedRowid);
293
+ }
294
+ if (values.schemaFingerprint !== void 0) {
295
+ setClauses.push("schema_fingerprint = ?");
296
+ params.push(values.schemaFingerprint);
297
+ }
298
+ params.push(engine);
299
+ prepare(
300
+ bundle.db,
301
+ `UPDATE search_index_status SET ${setClauses.join(", ")} WHERE engine = ?`
302
+ ).run(...params);
514
303
  }
515
304
  function countSearchDocs(bundle) {
516
305
  return bundle.db.prepare(`SELECT count(*) AS n FROM search_docs`).get()?.n ?? 0;
@@ -518,12 +307,16 @@ function countSearchDocs(bundle) {
518
307
  function countFts5Docs(bundle) {
519
308
  return bundle.db.prepare(`SELECT count(*) AS n FROM search_docs_fts`).get()?.n ?? 0;
520
309
  }
521
- var FTS5_TRIGGER_SQL;
310
+ var SEARCH_INDEX_STATUS_COLUMNS, FTS5_TRIGGER_SQL, TANTIVY_SCHEMA_FIELDS, SEARCH_DOCS_SELECT;
522
311
  var init_indexing = __esm({
523
312
  "src/services/indexing.ts"() {
524
313
  "use strict";
525
314
  init_db();
526
315
  init_errors();
316
+ SEARCH_INDEX_STATUS_COLUMNS = `
317
+ engine, status, source_doc_count, indexed_doc_count, updated_at,
318
+ error_message, last_indexed_rowid, schema_fingerprint
319
+ `;
527
320
  FTS5_TRIGGER_SQL = `
528
321
  CREATE TRIGGER IF NOT EXISTS search_docs_ai AFTER INSERT ON search_docs BEGIN
529
322
  INSERT INTO search_docs_fts(rowid, text, role, tool_name, field_kind)
@@ -542,21 +335,30 @@ CREATE TRIGGER IF NOT EXISTS search_docs_au AFTER UPDATE ON search_docs BEGIN
542
335
  VALUES (new.rowid, new.text, new.role, new.tool_name, new.field_kind);
543
336
  END;
544
337
  `;
545
- }
546
- });
547
-
548
- // src/core/limits.ts
549
- function clampLimit(value, opts) {
550
- return Math.max(opts.min ?? 1, Math.min(opts.max, value ?? opts.fallback));
551
- }
552
- var init_limits = __esm({
553
- "src/core/limits.ts"() {
554
- "use strict";
338
+ TANTIVY_SCHEMA_FIELDS = [
339
+ { name: "doc_id", tokenizer: "raw" },
340
+ { name: "entity_type", tokenizer: "raw" },
341
+ { name: "entity_id", tokenizer: "raw" },
342
+ { name: "session_id", tokenizer: "raw" },
343
+ { name: "project_id", tokenizer: "raw" },
344
+ { name: "timestamp", tokenizer: "raw" },
345
+ { name: "role", tokenizer: "raw" },
346
+ { name: "tool_name", tokenizer: "raw" },
347
+ { name: "canonical_tool_type", tokenizer: "raw" },
348
+ { name: "field_kind", tokenizer: "raw" },
349
+ // The text field uses tantivy's default tokenizer (en_stem in the binding).
350
+ { name: "text", tokenizer: "default" }
351
+ ];
352
+ SEARCH_DOCS_SELECT = `
353
+ SELECT rowid, doc_id, entity_type, entity_id, session_id, project_id, timestamp,
354
+ role, tool_name, canonical_tool_type, field_kind, text
355
+ FROM search_docs
356
+ `;
555
357
  }
556
358
  });
557
359
 
558
360
  // src/services/search.ts
559
- import { existsSync } from "fs";
361
+ import { existsSync as existsSync2 } from "fs";
560
362
  import { createRequire } from "module";
561
363
  function escapeFtsQuery(q) {
562
364
  return q.split(/\s+/).filter((t) => t.length > 0).map((t) => `"${t.replace(/"/g, '""')}"`).join(" ");
@@ -565,7 +367,7 @@ function searchFullText(bundle, options) {
565
367
  if (options.engine === "tantivy") {
566
368
  return searchTantivy(bundle, options);
567
369
  }
568
- const limit = clampLimit(options.limit, { max: 500, fallback: 50 });
370
+ const limit2 = clampLimit(options.limit, { max: 500, fallback: 50 });
569
371
  const sql = `
570
372
  SELECT d.doc_id,
571
373
  d.entity_type,
@@ -580,14 +382,14 @@ function searchFullText(bundle, options) {
580
382
  JOIN search_docs d ON d.rowid = search_docs_fts.rowid
581
383
  WHERE search_docs_fts MATCH ?
582
384
  ORDER BY bm25(search_docs_fts), d.timestamp DESC
583
- LIMIT ${limit}
385
+ LIMIT ${limit2}
584
386
  `;
585
387
  const ftsQuery = options.raw ? options.query : escapeFtsQuery(options.query);
586
388
  if (!ftsQuery) return [];
587
389
  return bundle.db.prepare(sql).all(ftsQuery);
588
390
  }
589
391
  function searchTantivy(bundle, options) {
590
- if (!existsSync(bundle.paths.tantivy)) {
392
+ if (!existsSync2(bundle.paths.tantivy)) {
591
393
  throw new Error("tantivy index not found; run `prosa index tantivy` first");
592
394
  }
593
395
  const status = getSearchIndexStatus(bundle, "tantivy");
@@ -596,7 +398,7 @@ function searchTantivy(bundle, options) {
596
398
  `tantivy index is ${status?.status ?? "missing"}; run \`prosa index tantivy\` first`
597
399
  );
598
400
  }
599
- const limit = clampLimit(options.limit, { max: 500, fallback: 50 });
401
+ const limit2 = clampLimit(options.limit, { max: 500, fallback: 50 });
600
402
  const queryText = options.query.trim();
601
403
  if (!queryText) return [];
602
404
  const tantivy = requireTantivy();
@@ -605,7 +407,7 @@ function searchTantivy(bundle, options) {
605
407
  const [query] = options.raw ? [index.parseQuery(queryText, ["text"])] : index.parseQueryLenient(queryText, ["text"], void 0, {
606
408
  text: [true, 2, true]
607
409
  });
608
- const result = searcher.search(query, limit, true);
410
+ const result = searcher.search(query, limit2, true);
609
411
  const snippets = tantivy.SnippetGenerator.create(searcher, query, index.schema, "text");
610
412
  snippets.setMaxNumChars(180);
611
413
  return result.hits.map((hit) => {
@@ -689,7 +491,7 @@ function sessionFilterWhere(filters) {
689
491
  }
690
492
  function listSessions(bundle, filters = {}) {
691
493
  const { where, params } = sessionFilterWhere(filters);
692
- const limit = clampLimit(filters.limit, { max: 1e3, fallback: 50 });
494
+ const limit2 = clampLimit(filters.limit, { max: 1e3, fallback: 50 });
693
495
  const sql = `
694
496
  SELECT s.session_id,
695
497
  s.source_tool,
@@ -710,7 +512,7 @@ function listSessions(bundle, filters = {}) {
710
512
  FROM sessions s
711
513
  ${where}
712
514
  ORDER BY s.start_ts DESC NULLS LAST
713
- LIMIT ${limit}
515
+ LIMIT ${limit2}
714
516
  `;
715
517
  return bundle.db.prepare(sql).all(...params);
716
518
  }
@@ -1134,13 +936,14 @@ var init_App = __esm({
1134
936
  });
1135
937
 
1136
938
  // src/cli/main.ts
1137
- import { Command as Command10 } from "commander";
939
+ import { Command as Command11 } from "commander";
1138
940
 
1139
941
  // src/core/version.ts
1140
942
  var PROSA_PARSER_VERSION = "0.1.0";
1141
- var PROSA_SCHEMA_VERSION = 2;
943
+ var PROSA_SCHEMA_VERSION = 4;
1142
944
 
1143
- // src/cli/commands/compile.ts
945
+ // src/cli/commands/analytics.ts
946
+ import path4 from "path";
1144
947
  import { Command } from "commander";
1145
948
 
1146
949
  // src/core/bundle.ts
@@ -1514,10 +1317,291 @@ INSERT OR IGNORE INTO search_index_status (
1514
1317
  ('tantivy', 'missing', 0, 0, strftime('%Y-%m-%dT%H:%M:%fZ','now'), NULL);
1515
1318
  `;
1516
1319
 
1320
+ // src/core/schema/sql/003_analytics_views.ts
1321
+ var SQL_003_ANALYTICS_VIEWS = String.raw`
1322
+ CREATE VIEW IF NOT EXISTS session_facts AS
1323
+ WITH turn_counts AS (
1324
+ SELECT session_id, count(*) AS turn_count
1325
+ FROM turns
1326
+ GROUP BY session_id
1327
+ ),
1328
+ message_counts AS (
1329
+ SELECT session_id,
1330
+ count(*) AS message_count,
1331
+ sum(CASE WHEN role = 'user' THEN 1 ELSE 0 END) AS user_message_count,
1332
+ sum(CASE WHEN role = 'assistant' THEN 1 ELSE 0 END) AS assistant_message_count
1333
+ FROM messages
1334
+ GROUP BY session_id
1335
+ ),
1336
+ tool_call_counts AS (
1337
+ SELECT session_id,
1338
+ count(*) AS tool_call_count,
1339
+ sum(CASE WHEN status = 'error' THEN 1 ELSE 0 END) AS tool_call_error_count
1340
+ FROM tool_calls
1341
+ GROUP BY session_id
1342
+ ),
1343
+ tool_result_counts AS (
1344
+ SELECT session_id,
1345
+ count(*) AS tool_result_count,
1346
+ sum(CASE WHEN is_error = 1 OR (exit_code IS NOT NULL AND exit_code <> 0)
1347
+ THEN 1 ELSE 0 END) AS tool_result_error_count,
1348
+ sum(COALESCE(duration_ms, 0)) AS tool_duration_ms
1349
+ FROM tool_results
1350
+ GROUP BY session_id
1351
+ ),
1352
+ search_doc_counts AS (
1353
+ SELECT session_id, count(*) AS search_doc_count
1354
+ FROM search_docs
1355
+ WHERE session_id IS NOT NULL
1356
+ GROUP BY session_id
1357
+ )
1358
+ SELECT s.session_id,
1359
+ s.source_tool,
1360
+ s.source_session_id,
1361
+ s.project_id,
1362
+ p.display_name AS project_name,
1363
+ p.canonical_path AS project_path,
1364
+ s.parent_session_id,
1365
+ s.is_subagent,
1366
+ s.agent_role,
1367
+ s.agent_nickname,
1368
+ s.title,
1369
+ s.start_ts,
1370
+ s.end_ts,
1371
+ CASE
1372
+ WHEN s.start_ts IS NOT NULL AND s.end_ts IS NOT NULL
1373
+ THEN ROUND((julianday(s.end_ts) - julianday(s.start_ts)) * 86400, 3)
1374
+ ELSE NULL
1375
+ END AS duration_seconds,
1376
+ s.cwd_initial,
1377
+ s.git_branch_initial,
1378
+ s.model_first,
1379
+ s.model_last,
1380
+ s.status,
1381
+ s.timeline_confidence,
1382
+ sf.path AS source_file_path,
1383
+ COALESCE(tc.turn_count, 0) AS turn_count,
1384
+ COALESCE(mc.message_count, 0) AS message_count,
1385
+ COALESCE(mc.user_message_count, 0) AS user_message_count,
1386
+ COALESCE(mc.assistant_message_count, 0) AS assistant_message_count,
1387
+ COALESCE(tcc.tool_call_count, 0) AS tool_call_count,
1388
+ COALESCE(trc.tool_result_count, 0) AS tool_result_count,
1389
+ COALESCE(tcc.tool_call_error_count, 0)
1390
+ + COALESCE(trc.tool_result_error_count, 0) AS tool_error_count,
1391
+ COALESCE(trc.tool_duration_ms, 0) AS tool_duration_ms,
1392
+ COALESCE(sdc.search_doc_count, 0) AS search_doc_count
1393
+ FROM sessions s
1394
+ LEFT JOIN projects p ON p.project_id = s.project_id
1395
+ LEFT JOIN raw_records rr ON rr.raw_record_id = s.raw_record_id
1396
+ LEFT JOIN source_files sf ON sf.source_file_id = rr.source_file_id
1397
+ LEFT JOIN turn_counts tc ON tc.session_id = s.session_id
1398
+ LEFT JOIN message_counts mc ON mc.session_id = s.session_id
1399
+ LEFT JOIN tool_call_counts tcc ON tcc.session_id = s.session_id
1400
+ LEFT JOIN tool_result_counts trc ON trc.session_id = s.session_id
1401
+ LEFT JOIN search_doc_counts sdc ON sdc.session_id = s.session_id;
1402
+
1403
+ CREATE VIEW IF NOT EXISTS tool_usage_facts AS
1404
+ WITH result_rollup AS (
1405
+ SELECT tool_call_id,
1406
+ session_id,
1407
+ count(*) AS tool_result_count,
1408
+ max(status) AS result_status,
1409
+ max(is_error) AS is_error,
1410
+ min(exit_code) AS exit_code,
1411
+ sum(COALESCE(duration_ms, 0)) AS duration_ms,
1412
+ max(preview) AS preview
1413
+ FROM tool_results
1414
+ GROUP BY tool_call_id, session_id
1415
+ )
1416
+ SELECT tc.tool_call_id,
1417
+ tc.session_id,
1418
+ s.source_tool,
1419
+ s.source_session_id,
1420
+ s.project_id,
1421
+ p.display_name AS project_name,
1422
+ p.canonical_path AS project_path,
1423
+ tc.turn_id,
1424
+ tc.message_id,
1425
+ tc.event_id,
1426
+ tc.source_call_id,
1427
+ tc.tool_name,
1428
+ tc.canonical_tool_type,
1429
+ tc.command,
1430
+ tc.cwd,
1431
+ tc.path,
1432
+ tc.query,
1433
+ tc.timestamp_start,
1434
+ tc.timestamp_end,
1435
+ CASE
1436
+ WHEN tc.timestamp_start IS NOT NULL AND tc.timestamp_end IS NOT NULL
1437
+ THEN ROUND((julianday(tc.timestamp_end) - julianday(tc.timestamp_start)) * 86400, 3)
1438
+ ELSE NULL
1439
+ END AS call_duration_seconds,
1440
+ tc.status AS call_status,
1441
+ rr.result_status,
1442
+ COALESCE(rr.is_error, 0) AS is_error,
1443
+ rr.exit_code,
1444
+ rr.duration_ms AS result_duration_ms,
1445
+ COALESCE(rr.tool_result_count, 0) AS tool_result_count,
1446
+ rr.preview,
1447
+ tc.raw_record_id
1448
+ FROM tool_calls tc
1449
+ LEFT JOIN sessions s ON s.session_id = tc.session_id
1450
+ LEFT JOIN projects p ON p.project_id = s.project_id
1451
+ LEFT JOIN result_rollup rr ON rr.tool_call_id = tc.tool_call_id;
1452
+
1453
+ CREATE VIEW IF NOT EXISTS error_facts AS
1454
+ SELECT 'tool_result:' || tr.tool_result_id AS error_id,
1455
+ 'tool_result' AS error_category,
1456
+ s.source_tool,
1457
+ s.project_id,
1458
+ p.display_name AS project_name,
1459
+ tr.session_id,
1460
+ COALESCE(tc.timestamp_end, tc.timestamp_start) AS timestamp,
1461
+ tc.tool_name,
1462
+ tc.canonical_tool_type,
1463
+ COALESCE(tr.status, tc.status) AS status,
1464
+ tr.exit_code,
1465
+ NULL AS message,
1466
+ tr.preview,
1467
+ NULL AS entity_type,
1468
+ NULL AS entity_id,
1469
+ tr.raw_record_id
1470
+ FROM tool_results tr
1471
+ LEFT JOIN tool_calls tc ON tc.tool_call_id = tr.tool_call_id
1472
+ LEFT JOIN sessions s ON s.session_id = tr.session_id
1473
+ LEFT JOIN projects p ON p.project_id = s.project_id
1474
+ WHERE tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
1475
+ UNION ALL
1476
+ SELECT 'import_error:' || CAST(ie.error_id AS TEXT) AS error_id,
1477
+ 'import_error' AS error_category,
1478
+ COALESCE(rr.source_tool, ib.source_tool) AS source_tool,
1479
+ NULL AS project_id,
1480
+ NULL AS project_name,
1481
+ NULL AS session_id,
1482
+ ie.occurred_at AS timestamp,
1483
+ NULL AS tool_name,
1484
+ NULL AS canonical_tool_type,
1485
+ ie.kind AS status,
1486
+ NULL AS exit_code,
1487
+ ie.message,
1488
+ NULL AS preview,
1489
+ NULL AS entity_type,
1490
+ NULL AS entity_id,
1491
+ ie.raw_record_id
1492
+ FROM import_errors ie
1493
+ LEFT JOIN import_batches ib ON ib.batch_id = ie.batch_id
1494
+ LEFT JOIN raw_records rr ON rr.raw_record_id = ie.raw_record_id
1495
+ UNION ALL
1496
+ SELECT 'uncertainty:' || CAST(u.uncertainty_id AS TEXT) AS error_id,
1497
+ 'uncertainty' AS error_category,
1498
+ NULL AS source_tool,
1499
+ NULL AS project_id,
1500
+ NULL AS project_name,
1501
+ CASE WHEN u.entity_type = 'session' THEN u.entity_id ELSE NULL END AS session_id,
1502
+ NULL AS timestamp,
1503
+ NULL AS tool_name,
1504
+ NULL AS canonical_tool_type,
1505
+ u.reason AS status,
1506
+ NULL AS exit_code,
1507
+ u.reason AS message,
1508
+ NULL AS preview,
1509
+ u.entity_type,
1510
+ u.entity_id,
1511
+ NULL AS raw_record_id
1512
+ FROM uncertainties u;
1513
+
1514
+ CREATE VIEW IF NOT EXISTS model_usage AS
1515
+ WITH model_events AS (
1516
+ SELECT s.source_tool,
1517
+ s.project_id,
1518
+ p.display_name AS project_name,
1519
+ p.canonical_path AS project_path,
1520
+ s.session_id,
1521
+ NULL AS turn_id,
1522
+ s.model_first AS model,
1523
+ s.start_ts AS timestamp,
1524
+ 'session_first' AS observation_type
1525
+ FROM sessions s
1526
+ LEFT JOIN projects p ON p.project_id = s.project_id
1527
+ WHERE s.model_first IS NOT NULL
1528
+ UNION ALL
1529
+ SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
1530
+ s.session_id, NULL AS turn_id, s.model_last AS model, s.end_ts AS timestamp,
1531
+ 'session_last' AS observation_type
1532
+ FROM sessions s
1533
+ LEFT JOIN projects p ON p.project_id = s.project_id
1534
+ WHERE s.model_last IS NOT NULL
1535
+ UNION ALL
1536
+ SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
1537
+ t.session_id, t.turn_id, t.model, t.start_ts AS timestamp, 'turn' AS observation_type
1538
+ FROM turns t
1539
+ LEFT JOIN sessions s ON s.session_id = t.session_id
1540
+ LEFT JOIN projects p ON p.project_id = s.project_id
1541
+ WHERE t.model IS NOT NULL
1542
+ UNION ALL
1543
+ SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
1544
+ m.session_id, m.turn_id, m.model, m.timestamp, 'message' AS observation_type
1545
+ FROM messages m
1546
+ LEFT JOIN sessions s ON s.session_id = m.session_id
1547
+ LEFT JOIN projects p ON p.project_id = s.project_id
1548
+ WHERE m.model IS NOT NULL
1549
+ )
1550
+ SELECT source_tool,
1551
+ project_id,
1552
+ project_name,
1553
+ project_path,
1554
+ model,
1555
+ count(DISTINCT session_id) AS session_count,
1556
+ count(DISTINCT turn_id) AS turn_count,
1557
+ count(*) AS observation_count,
1558
+ sum(CASE WHEN observation_type = 'message' THEN 1 ELSE 0 END) AS message_count,
1559
+ min(timestamp) AS first_seen_ts,
1560
+ max(timestamp) AS last_seen_ts
1561
+ FROM model_events
1562
+ GROUP BY source_tool, project_id, project_name, project_path, model;
1563
+
1564
+ CREATE VIEW IF NOT EXISTS project_activity AS
1565
+ SELECT s.source_tool,
1566
+ s.project_id,
1567
+ COALESCE(p.display_name, s.cwd_initial, '(unknown)') AS project_name,
1568
+ p.canonical_path AS project_path,
1569
+ min(s.start_ts) AS first_session_ts,
1570
+ max(COALESCE(s.end_ts, s.start_ts)) AS latest_session_ts,
1571
+ count(DISTINCT s.session_id) AS session_count,
1572
+ count(DISTINCT CASE WHEN s.timeline_confidence = 'low' THEN s.session_id END)
1573
+ AS low_confidence_session_count,
1574
+ count(DISTINCT t.turn_id) AS turn_count,
1575
+ count(DISTINCT m.message_id) AS message_count,
1576
+ count(DISTINCT tc.tool_call_id) AS tool_call_count,
1577
+ count(DISTINCT tr.tool_result_id) AS tool_result_count,
1578
+ count(DISTINCT CASE
1579
+ WHEN tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
1580
+ THEN tr.tool_result_id
1581
+ END) AS tool_error_count,
1582
+ count(DISTINCT sd.doc_id) AS search_doc_count
1583
+ FROM sessions s
1584
+ LEFT JOIN projects p ON p.project_id = s.project_id
1585
+ LEFT JOIN turns t ON t.session_id = s.session_id
1586
+ LEFT JOIN messages m ON m.session_id = s.session_id
1587
+ LEFT JOIN tool_calls tc ON tc.session_id = s.session_id
1588
+ LEFT JOIN tool_results tr ON tr.session_id = s.session_id
1589
+ LEFT JOIN search_docs sd ON sd.session_id = s.session_id
1590
+ GROUP BY s.source_tool, s.project_id, p.display_name, s.cwd_initial, p.canonical_path;
1591
+ `;
1592
+
1593
+ // src/core/schema/sql/004_tantivy_checkpoint.ts
1594
+ var SQL_004_TANTIVY_CHECKPOINT = String.raw`
1595
+ ALTER TABLE search_index_status ADD COLUMN last_indexed_rowid INTEGER;
1596
+ ALTER TABLE search_index_status ADD COLUMN schema_fingerprint TEXT;
1597
+ `;
1598
+
1517
1599
  // src/core/schema/migrate.ts
1518
1600
  var MIGRATIONS = [
1519
1601
  { version: 1, name: "init", sql: SQL_001_INIT },
1520
- { version: 2, name: "search_index_status", sql: SQL_002_SEARCH_INDEX_STATUS }
1602
+ { version: 2, name: "search_index_status", sql: SQL_002_SEARCH_INDEX_STATUS },
1603
+ { version: 3, name: "analytics_views", sql: SQL_003_ANALYTICS_VIEWS },
1604
+ { version: 4, name: "tantivy_checkpoint", sql: SQL_004_TANTIVY_CHECKPOINT }
1521
1605
  ];
1522
1606
  function runMigrations(db) {
1523
1607
  db.exec(`
@@ -1613,52 +1697,1038 @@ async function initBundle(rootPath) {
1613
1697
  runMigrations(db);
1614
1698
  return { path: resolved, db, manifest, paths };
1615
1699
  }
1616
- async function openBundle(rootPath) {
1617
- const resolved = path.resolve(rootPath);
1618
- const paths = bundlePaths(resolved);
1619
- const dirStat = await stat(resolved).catch(() => null);
1620
- if (!dirStat?.isDirectory()) {
1621
- throw new Error(`bundle path not found or not a directory: ${resolved}`);
1700
+ async function openBundle(rootPath) {
1701
+ const resolved = path.resolve(rootPath);
1702
+ const paths = bundlePaths(resolved);
1703
+ const dirStat = await stat(resolved).catch(() => null);
1704
+ if (!dirStat?.isDirectory()) {
1705
+ throw new Error(`bundle path not found or not a directory: ${resolved}`);
1706
+ }
1707
+ if (!await exists(paths.manifest)) {
1708
+ throw new Error(
1709
+ `no manifest.json in ${resolved} \u2014 initialize first with \`prosa init --store ${resolved}\``
1710
+ );
1711
+ }
1712
+ const manifest = JSON.parse(await readFile(paths.manifest, "utf8"));
1713
+ await mkdir(paths.search, { recursive: true });
1714
+ await mkdir(paths.tantivy, { recursive: true });
1715
+ const db = openDb(paths.db);
1716
+ runMigrations(db);
1717
+ const currentVersion = currentSchemaVersion(db);
1718
+ if (currentVersion !== PROSA_SCHEMA_VERSION) {
1719
+ closeDb(db);
1720
+ throw new Error(`schema version mismatch (db=${currentVersion}, code=${PROSA_SCHEMA_VERSION})`);
1721
+ }
1722
+ if (manifest.parser_version !== PROSA_PARSER_VERSION) {
1723
+ manifest.parser_version = PROSA_PARSER_VERSION;
1724
+ await writeFile(paths.manifest, `${JSON.stringify(manifest, null, 2)}
1725
+ `, "utf8");
1726
+ }
1727
+ return { path: resolved, db, manifest, paths };
1728
+ }
1729
+ async function openOrInitBundle(rootPath) {
1730
+ const resolved = path.resolve(rootPath);
1731
+ const paths = bundlePaths(resolved);
1732
+ const dirStat = await stat(resolved).catch(() => null);
1733
+ if (dirStat && !dirStat.isDirectory()) {
1734
+ throw new Error(`bundle path not found or not a directory: ${resolved}`);
1735
+ }
1736
+ if (!dirStat || !await exists(paths.manifest)) {
1737
+ return await initBundle(resolved);
1738
+ }
1739
+ return await openBundle(resolved);
1740
+ }
1741
+ function closeBundle(bundle) {
1742
+ closeDb(bundle.db);
1743
+ }
1744
+
1745
+ // src/services/analytics.ts
1746
+ init_limits();
1747
+
1748
+ // src/services/export/parquet.ts
1749
+ import { mkdir as mkdir2, rm, writeFile as writeFile2 } from "fs/promises";
1750
+ import path2 from "path";
1751
+ import { DuckDBConnection } from "@duckdb/node-api";
1752
+ init_errors();
1753
+ var PARQUET_TABLES = [
1754
+ "objects",
1755
+ "source_files",
1756
+ "import_batches",
1757
+ "raw_records",
1758
+ "import_errors",
1759
+ "uncertainties",
1760
+ "projects",
1761
+ "sessions",
1762
+ "turns",
1763
+ "events",
1764
+ "messages",
1765
+ "content_blocks",
1766
+ "tool_calls",
1767
+ "tool_results",
1768
+ "artifacts",
1769
+ "edges",
1770
+ "search_docs"
1771
+ ];
1772
+ async function exportBundleParquet(options) {
1773
+ const snapshot = await openBundleSnapshot(options.bundlePath);
1774
+ const outDir = path2.resolve(options.outDir ?? snapshot.defaultOutDir);
1775
+ await mkdir2(outDir, { recursive: true });
1776
+ const files = Object.fromEntries(
1777
+ PARQUET_TABLES.map((table) => [table, path2.join(outDir, `${table}.parquet`)])
1778
+ );
1779
+ const manifestPath = path2.join(outDir, "manifest.json");
1780
+ for (const file of [...Object.values(files), manifestPath]) {
1781
+ await rm(file, { force: true });
1782
+ }
1783
+ const connection = await createDuckDbConnection();
1784
+ try {
1785
+ await attachSqlite(connection, snapshot.dbPath);
1786
+ for (const table of PARQUET_TABLES) {
1787
+ await connection.run(
1788
+ `COPY (SELECT * FROM prosa.${quoteIdentifier(table)}) TO ${sqlString(files[table])} (FORMAT parquet, COMPRESSION zstd, COMPRESSION_LEVEL 1, ROW_GROUP_SIZE 100000)`
1789
+ );
1790
+ }
1791
+ } finally {
1792
+ connection.closeSync();
1793
+ }
1794
+ const manifest = {
1795
+ exported_at: (/* @__PURE__ */ new Date()).toISOString(),
1796
+ source_db: snapshot.dbPath,
1797
+ schema_version: snapshot.schemaVersion,
1798
+ parser_version: snapshot.parserVersion,
1799
+ tables: Object.fromEntries(
1800
+ PARQUET_TABLES.map((table) => [
1801
+ table,
1802
+ {
1803
+ file: path2.basename(files[table]),
1804
+ rows: snapshot.counts[table]
1805
+ }
1806
+ ])
1807
+ )
1808
+ };
1809
+ await writeFile2(manifestPath, `${JSON.stringify(manifest, null, 2)}
1810
+ `, "utf8");
1811
+ return { outDir, manifestPath, files, counts: snapshot.counts };
1812
+ }
1813
+ async function queryDuckDbParquet(options) {
1814
+ const parquetDir = path2.resolve(options.parquetDir);
1815
+ const connection = await createDuckDbConnection();
1816
+ try {
1817
+ for (const table of PARQUET_TABLES) {
1818
+ await connection.run(
1819
+ `CREATE OR REPLACE VIEW ${quoteIdentifier(table)} AS SELECT * FROM read_parquet(${sqlString(
1820
+ path2.join(parquetDir, `${table}.parquet`)
1821
+ )})`
1822
+ );
1823
+ }
1824
+ await createAnalyticsViews(connection);
1825
+ const reader = await connection.runAndReadAll(options.sql);
1826
+ return {
1827
+ columns: reader.deduplicatedColumnNames(),
1828
+ rows: reader.getRowObjectsJson()
1829
+ };
1830
+ } catch (error) {
1831
+ if (isMissingParquetError(error)) {
1832
+ throw new Error(
1833
+ `Parquet export not found in ${parquetDir}; run \`prosa export parquet --store <path>\` first`
1834
+ );
1835
+ }
1836
+ throw error;
1837
+ } finally {
1838
+ connection.closeSync();
1839
+ }
1840
+ }
1841
+ async function createDuckDbConnection() {
1842
+ return DuckDBConnection.create();
1843
+ }
1844
+ async function attachSqlite(connection, dbPath) {
1845
+ try {
1846
+ await connection.run("INSTALL sqlite");
1847
+ await connection.run("LOAD sqlite");
1848
+ await connection.run(`ATTACH ${sqlString(dbPath)} AS prosa (TYPE sqlite)`);
1849
+ } catch (error) {
1850
+ throw new Error(
1851
+ `DuckDB could not attach prosa.sqlite via the sqlite extension: ${getErrorMessage(error)}`
1852
+ );
1853
+ }
1854
+ }
1855
+ async function createAnalyticsViews(connection) {
1856
+ await connection.run(`
1857
+ CREATE OR REPLACE VIEW session_facts AS
1858
+ WITH turn_counts AS (
1859
+ SELECT session_id, count(*) AS turn_count
1860
+ FROM turns
1861
+ GROUP BY session_id
1862
+ ),
1863
+ message_counts AS (
1864
+ SELECT session_id,
1865
+ count(*) AS message_count,
1866
+ sum(CASE WHEN role = 'user' THEN 1 ELSE 0 END) AS user_message_count,
1867
+ sum(CASE WHEN role = 'assistant' THEN 1 ELSE 0 END) AS assistant_message_count
1868
+ FROM messages
1869
+ GROUP BY session_id
1870
+ ),
1871
+ tool_call_counts AS (
1872
+ SELECT session_id,
1873
+ count(*) AS tool_call_count,
1874
+ sum(CASE WHEN status = 'error' THEN 1 ELSE 0 END) AS tool_call_error_count
1875
+ FROM tool_calls
1876
+ GROUP BY session_id
1877
+ ),
1878
+ tool_result_counts AS (
1879
+ SELECT session_id,
1880
+ count(*) AS tool_result_count,
1881
+ sum(CASE WHEN is_error = 1 OR (exit_code IS NOT NULL AND exit_code <> 0)
1882
+ THEN 1 ELSE 0 END) AS tool_result_error_count,
1883
+ sum(COALESCE(duration_ms, 0)) AS tool_duration_ms
1884
+ FROM tool_results
1885
+ GROUP BY session_id
1886
+ ),
1887
+ search_doc_counts AS (
1888
+ SELECT session_id, count(*) AS search_doc_count
1889
+ FROM search_docs
1890
+ WHERE session_id IS NOT NULL
1891
+ GROUP BY session_id
1892
+ )
1893
+ SELECT s.session_id,
1894
+ s.source_tool,
1895
+ s.source_session_id,
1896
+ s.project_id,
1897
+ p.display_name AS project_name,
1898
+ p.canonical_path AS project_path,
1899
+ s.parent_session_id,
1900
+ s.is_subagent,
1901
+ s.agent_role,
1902
+ s.agent_nickname,
1903
+ s.title,
1904
+ s.start_ts,
1905
+ s.end_ts,
1906
+ CASE
1907
+ WHEN s.start_ts IS NOT NULL AND s.end_ts IS NOT NULL
1908
+ THEN date_diff('millisecond', TRY_CAST(s.start_ts AS TIMESTAMP),
1909
+ TRY_CAST(s.end_ts AS TIMESTAMP)) / 1000.0
1910
+ ELSE NULL
1911
+ END AS duration_seconds,
1912
+ s.cwd_initial,
1913
+ s.git_branch_initial,
1914
+ s.model_first,
1915
+ s.model_last,
1916
+ s.status,
1917
+ s.timeline_confidence,
1918
+ sf.path AS source_file_path,
1919
+ COALESCE(tc.turn_count, 0) AS turn_count,
1920
+ COALESCE(mc.message_count, 0) AS message_count,
1921
+ COALESCE(mc.user_message_count, 0) AS user_message_count,
1922
+ COALESCE(mc.assistant_message_count, 0) AS assistant_message_count,
1923
+ COALESCE(tcc.tool_call_count, 0) AS tool_call_count,
1924
+ COALESCE(trc.tool_result_count, 0) AS tool_result_count,
1925
+ COALESCE(tcc.tool_call_error_count, 0)
1926
+ + COALESCE(trc.tool_result_error_count, 0) AS tool_error_count,
1927
+ COALESCE(trc.tool_duration_ms, 0) AS tool_duration_ms,
1928
+ COALESCE(sdc.search_doc_count, 0) AS search_doc_count
1929
+ FROM sessions s
1930
+ LEFT JOIN projects p ON p.project_id = s.project_id
1931
+ LEFT JOIN raw_records rr ON rr.raw_record_id = s.raw_record_id
1932
+ LEFT JOIN source_files sf ON sf.source_file_id = rr.source_file_id
1933
+ LEFT JOIN turn_counts tc ON tc.session_id = s.session_id
1934
+ LEFT JOIN message_counts mc ON mc.session_id = s.session_id
1935
+ LEFT JOIN tool_call_counts tcc ON tcc.session_id = s.session_id
1936
+ LEFT JOIN tool_result_counts trc ON trc.session_id = s.session_id
1937
+ LEFT JOIN search_doc_counts sdc ON sdc.session_id = s.session_id
1938
+ `);
1939
+ await connection.run(`
1940
+ CREATE OR REPLACE VIEW tool_usage_facts AS
1941
+ WITH result_rollup AS (
1942
+ SELECT tool_call_id,
1943
+ session_id,
1944
+ count(*) AS tool_result_count,
1945
+ max(status) AS result_status,
1946
+ max(is_error) AS is_error,
1947
+ min(exit_code) AS exit_code,
1948
+ sum(COALESCE(duration_ms, 0)) AS duration_ms,
1949
+ max(preview) AS preview
1950
+ FROM tool_results
1951
+ GROUP BY tool_call_id, session_id
1952
+ )
1953
+ SELECT tc.tool_call_id,
1954
+ tc.session_id,
1955
+ s.source_tool,
1956
+ s.source_session_id,
1957
+ s.project_id,
1958
+ p.display_name AS project_name,
1959
+ p.canonical_path AS project_path,
1960
+ tc.turn_id,
1961
+ tc.message_id,
1962
+ tc.event_id,
1963
+ tc.source_call_id,
1964
+ tc.tool_name,
1965
+ tc.canonical_tool_type,
1966
+ tc.command,
1967
+ tc.cwd,
1968
+ tc.path,
1969
+ tc.query,
1970
+ tc.timestamp_start,
1971
+ tc.timestamp_end,
1972
+ CASE
1973
+ WHEN tc.timestamp_start IS NOT NULL AND tc.timestamp_end IS NOT NULL
1974
+ THEN date_diff('millisecond', TRY_CAST(tc.timestamp_start AS TIMESTAMP),
1975
+ TRY_CAST(tc.timestamp_end AS TIMESTAMP)) / 1000.0
1976
+ ELSE NULL
1977
+ END AS call_duration_seconds,
1978
+ tc.status AS call_status,
1979
+ rr.result_status,
1980
+ COALESCE(rr.is_error, 0) AS is_error,
1981
+ rr.exit_code,
1982
+ rr.duration_ms AS result_duration_ms,
1983
+ COALESCE(rr.tool_result_count, 0) AS tool_result_count,
1984
+ rr.preview,
1985
+ tc.raw_record_id
1986
+ FROM tool_calls tc
1987
+ LEFT JOIN sessions s ON s.session_id = tc.session_id
1988
+ LEFT JOIN projects p ON p.project_id = s.project_id
1989
+ LEFT JOIN result_rollup rr ON rr.tool_call_id = tc.tool_call_id
1990
+ `);
1991
+ await connection.run(`
1992
+ CREATE OR REPLACE VIEW error_facts AS
1993
+ SELECT 'tool_result:' || tr.tool_result_id AS error_id,
1994
+ 'tool_result' AS error_category,
1995
+ s.source_tool,
1996
+ s.project_id,
1997
+ p.display_name AS project_name,
1998
+ tr.session_id,
1999
+ COALESCE(tc.timestamp_end, tc.timestamp_start) AS timestamp,
2000
+ tc.tool_name,
2001
+ tc.canonical_tool_type,
2002
+ COALESCE(tr.status, tc.status) AS status,
2003
+ tr.exit_code,
2004
+ NULL AS message,
2005
+ tr.preview,
2006
+ NULL AS entity_type,
2007
+ NULL AS entity_id,
2008
+ tr.raw_record_id
2009
+ FROM tool_results tr
2010
+ LEFT JOIN tool_calls tc ON tc.tool_call_id = tr.tool_call_id
2011
+ LEFT JOIN sessions s ON s.session_id = tr.session_id
2012
+ LEFT JOIN projects p ON p.project_id = s.project_id
2013
+ WHERE tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
2014
+ UNION ALL
2015
+ SELECT 'import_error:' || CAST(ie.error_id AS VARCHAR) AS error_id,
2016
+ 'import_error' AS error_category,
2017
+ COALESCE(rr.source_tool, ib.source_tool) AS source_tool,
2018
+ NULL AS project_id,
2019
+ NULL AS project_name,
2020
+ NULL AS session_id,
2021
+ ie.occurred_at AS timestamp,
2022
+ NULL AS tool_name,
2023
+ NULL AS canonical_tool_type,
2024
+ ie.kind AS status,
2025
+ NULL AS exit_code,
2026
+ ie.message,
2027
+ NULL AS preview,
2028
+ NULL AS entity_type,
2029
+ NULL AS entity_id,
2030
+ ie.raw_record_id
2031
+ FROM import_errors ie
2032
+ LEFT JOIN import_batches ib ON ib.batch_id = ie.batch_id
2033
+ LEFT JOIN raw_records rr ON rr.raw_record_id = ie.raw_record_id
2034
+ UNION ALL
2035
+ SELECT 'uncertainty:' || CAST(u.uncertainty_id AS VARCHAR) AS error_id,
2036
+ 'uncertainty' AS error_category,
2037
+ NULL AS source_tool,
2038
+ NULL AS project_id,
2039
+ NULL AS project_name,
2040
+ CASE WHEN u.entity_type = 'session' THEN u.entity_id ELSE NULL END AS session_id,
2041
+ NULL AS timestamp,
2042
+ NULL AS tool_name,
2043
+ NULL AS canonical_tool_type,
2044
+ u.reason AS status,
2045
+ NULL AS exit_code,
2046
+ u.reason AS message,
2047
+ NULL AS preview,
2048
+ u.entity_type,
2049
+ u.entity_id,
2050
+ NULL AS raw_record_id
2051
+ FROM uncertainties u
2052
+ `);
2053
+ await connection.run(`
2054
+ CREATE OR REPLACE VIEW model_usage AS
2055
+ WITH model_events AS (
2056
+ SELECT s.source_tool,
2057
+ s.project_id,
2058
+ p.display_name AS project_name,
2059
+ p.canonical_path AS project_path,
2060
+ s.session_id,
2061
+ NULL AS turn_id,
2062
+ s.model_first AS model,
2063
+ s.start_ts AS timestamp,
2064
+ 'session_first' AS observation_type
2065
+ FROM sessions s
2066
+ LEFT JOIN projects p ON p.project_id = s.project_id
2067
+ WHERE s.model_first IS NOT NULL
2068
+ UNION ALL
2069
+ SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
2070
+ s.session_id, NULL AS turn_id, s.model_last AS model, s.end_ts AS timestamp,
2071
+ 'session_last' AS observation_type
2072
+ FROM sessions s
2073
+ LEFT JOIN projects p ON p.project_id = s.project_id
2074
+ WHERE s.model_last IS NOT NULL
2075
+ UNION ALL
2076
+ SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
2077
+ t.session_id, t.turn_id, t.model, t.start_ts AS timestamp, 'turn' AS observation_type
2078
+ FROM turns t
2079
+ LEFT JOIN sessions s ON s.session_id = t.session_id
2080
+ LEFT JOIN projects p ON p.project_id = s.project_id
2081
+ WHERE t.model IS NOT NULL
2082
+ UNION ALL
2083
+ SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
2084
+ m.session_id, m.turn_id, m.model, m.timestamp, 'message' AS observation_type
2085
+ FROM messages m
2086
+ LEFT JOIN sessions s ON s.session_id = m.session_id
2087
+ LEFT JOIN projects p ON p.project_id = s.project_id
2088
+ WHERE m.model IS NOT NULL
2089
+ )
2090
+ SELECT source_tool,
2091
+ project_id,
2092
+ project_name,
2093
+ project_path,
2094
+ model,
2095
+ count(DISTINCT session_id) AS session_count,
2096
+ count(DISTINCT turn_id) AS turn_count,
2097
+ count(*) AS observation_count,
2098
+ sum(CASE WHEN observation_type = 'message' THEN 1 ELSE 0 END) AS message_count,
2099
+ min(timestamp) AS first_seen_ts,
2100
+ max(timestamp) AS last_seen_ts
2101
+ FROM model_events
2102
+ GROUP BY source_tool, project_id, project_name, project_path, model
2103
+ `);
2104
+ await connection.run(`
2105
+ CREATE OR REPLACE VIEW project_activity AS
2106
+ SELECT s.source_tool,
2107
+ s.project_id,
2108
+ COALESCE(p.display_name, s.cwd_initial, '(unknown)') AS project_name,
2109
+ p.canonical_path AS project_path,
2110
+ min(s.start_ts) AS first_session_ts,
2111
+ max(COALESCE(s.end_ts, s.start_ts)) AS latest_session_ts,
2112
+ count(DISTINCT s.session_id) AS session_count,
2113
+ count(DISTINCT CASE WHEN s.timeline_confidence = 'low' THEN s.session_id END)
2114
+ AS low_confidence_session_count,
2115
+ count(DISTINCT t.turn_id) AS turn_count,
2116
+ count(DISTINCT m.message_id) AS message_count,
2117
+ count(DISTINCT tc.tool_call_id) AS tool_call_count,
2118
+ count(DISTINCT tr.tool_result_id) AS tool_result_count,
2119
+ count(DISTINCT CASE
2120
+ WHEN tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
2121
+ THEN tr.tool_result_id
2122
+ END) AS tool_error_count,
2123
+ count(DISTINCT sd.doc_id) AS search_doc_count
2124
+ FROM sessions s
2125
+ LEFT JOIN projects p ON p.project_id = s.project_id
2126
+ LEFT JOIN turns t ON t.session_id = s.session_id
2127
+ LEFT JOIN messages m ON m.session_id = s.session_id
2128
+ LEFT JOIN tool_calls tc ON tc.session_id = s.session_id
2129
+ LEFT JOIN tool_results tr ON tr.session_id = s.session_id
2130
+ LEFT JOIN search_docs sd ON sd.session_id = s.session_id
2131
+ GROUP BY s.source_tool, s.project_id, p.display_name, s.cwd_initial, p.canonical_path
2132
+ `);
2133
+ }
2134
+ async function openBundleSnapshot(bundlePath) {
2135
+ const bundle = await openBundle(bundlePath);
2136
+ try {
2137
+ const counts = Object.fromEntries(
2138
+ PARQUET_TABLES.map((table) => {
2139
+ const row = bundle.db.prepare(`SELECT count(*) AS n FROM ${quoteIdentifier(table)}`).get();
2140
+ return [table, row?.n ?? 0];
2141
+ })
2142
+ );
2143
+ return {
2144
+ dbPath: bundle.paths.db,
2145
+ schemaVersion: bundle.manifest.schema_version,
2146
+ parserVersion: bundle.manifest.parser_version,
2147
+ defaultOutDir: bundle.paths.parquet,
2148
+ counts
2149
+ };
2150
+ } finally {
2151
+ closeBundle(bundle);
2152
+ }
2153
+ }
2154
+ function quoteIdentifier(value) {
2155
+ return `"${value.replace(/"/g, '""')}"`;
2156
+ }
2157
+ function sqlString(value) {
2158
+ return `'${value.replace(/'/g, "''")}'`;
2159
+ }
2160
+ function isMissingParquetError(error) {
2161
+ const message = getErrorMessage(error);
2162
+ return /No files found|does not exist|not found/i.test(message) && /\.parquet/i.test(message);
2163
+ }
2164
+
2165
+ // src/services/analytics.ts
2166
+ var ANALYTICS_REPORTS = ["sessions", "tools", "errors", "models", "projects"];
2167
+ async function runAnalyticsReport(options) {
2168
+ return queryDuckDbParquet({
2169
+ parquetDir: options.parquetDir,
2170
+ sql: buildAnalyticsSql(options.report, options.filters ?? {}, "duckdb")
2171
+ });
2172
+ }
2173
+ function runAnalyticsReportFromBundle(options) {
2174
+ const sql = buildAnalyticsSql(options.report, options.filters ?? {}, "sqlite");
2175
+ const stmt = options.bundle.db.prepare(sql);
2176
+ const rows = stmt.all();
2177
+ const columns = stmt.columns().map((column) => column.name);
2178
+ return { columns, rows };
2179
+ }
2180
+ function buildAnalyticsSql(report, filters, dialect) {
2181
+ switch (report) {
2182
+ case "sessions":
2183
+ return buildSessionsSql(filters, dialect);
2184
+ case "tools":
2185
+ return buildToolsSql(filters, dialect);
2186
+ case "errors":
2187
+ return buildErrorsSql(filters, dialect);
2188
+ case "models":
2189
+ return buildModelsSql(filters, dialect);
2190
+ case "projects":
2191
+ return buildProjectsSql(filters, dialect);
2192
+ }
2193
+ }
2194
+ function buildSessionsSql(filters, dialect) {
2195
+ const where = buildWhere([
2196
+ sourceFilter(filters),
2197
+ timeFilter("start_ts", filters),
2198
+ projectFilter(filters, dialect),
2199
+ filters.sessionId ? `session_id = ${sqlString2(filters.sessionId)}` : null,
2200
+ filters.sourcePathSubstring ? `source_file_path LIKE ${sqlString2(`%${escapeLike(filters.sourcePathSubstring)}%`)} ESCAPE '\\'` : null
2201
+ ]);
2202
+ return `
2203
+ SELECT start_ts, source_tool, project_name, source_file_path, session_id,
2204
+ source_session_id, model_last, duration_seconds,
2205
+ message_count, tool_call_count, tool_result_count, tool_error_count,
2206
+ tool_duration_ms, timeline_confidence, title
2207
+ FROM session_facts
2208
+ ${where}
2209
+ ORDER BY start_ts DESC NULLS LAST
2210
+ LIMIT ${limit(filters)}
2211
+ `;
2212
+ }
2213
+ function buildToolsSql(filters, dialect) {
2214
+ const where = buildWhere([
2215
+ sourceFilter(filters),
2216
+ timeFilter("timestamp_start", filters),
2217
+ projectFilter(filters, dialect),
2218
+ filters.toolName ? `tool_name = ${sqlString2(filters.toolName)}` : null,
2219
+ filters.canonicalType ? `canonical_tool_type = ${sqlString2(filters.canonicalType)}` : null,
2220
+ filters.errorsOnly ? `(is_error = 1 OR call_status = 'error')` : null
2221
+ ]);
2222
+ return `
2223
+ SELECT tool_name, canonical_tool_type, source_tool, project_name,
2224
+ count(*) AS call_count,
2225
+ sum(CASE WHEN is_error = 1 OR call_status = 'error' THEN 1 ELSE 0 END) AS error_count,
2226
+ round(avg(result_duration_ms), 3) AS avg_result_duration_ms,
2227
+ max(timestamp_start) AS latest_ts
2228
+ FROM tool_usage_facts
2229
+ ${where}
2230
+ GROUP BY tool_name, canonical_tool_type, source_tool, project_name
2231
+ ORDER BY call_count DESC, error_count DESC, tool_name ASC
2232
+ LIMIT ${limit(filters)}
2233
+ `;
2234
+ }
2235
+ function buildErrorsSql(filters, dialect) {
2236
+ const where = buildWhere([
2237
+ sourceFilter(filters),
2238
+ timeFilter("timestamp", filters),
2239
+ projectFilter(filters, dialect),
2240
+ filters.toolName ? `tool_name = ${sqlString2(filters.toolName)}` : null,
2241
+ filters.category ? `error_category = ${sqlString2(filters.category)}` : null
2242
+ ]);
2243
+ return `
2244
+ SELECT timestamp, error_category, source_tool, project_name, session_id,
2245
+ tool_name, status, exit_code, message, preview
2246
+ FROM error_facts
2247
+ ${where}
2248
+ ORDER BY timestamp DESC NULLS LAST, error_id DESC
2249
+ LIMIT ${limit(filters)}
2250
+ `;
2251
+ }
2252
+ function buildModelsSql(filters, dialect) {
2253
+ const where = buildWhere([
2254
+ sourceFilter(filters),
2255
+ rangeOverlapFilter("first_seen_ts", "last_seen_ts", filters),
2256
+ projectFilter(filters, dialect),
2257
+ filters.model ? `model = ${sqlString2(filters.model)}` : null
2258
+ ]);
2259
+ return `
2260
+ SELECT model, source_tool, project_name, session_count, turn_count,
2261
+ message_count, observation_count, first_seen_ts, last_seen_ts
2262
+ FROM model_usage
2263
+ ${where}
2264
+ ORDER BY session_count DESC, observation_count DESC, model ASC
2265
+ LIMIT ${limit(filters)}
2266
+ `;
2267
+ }
2268
+ function buildProjectsSql(filters, dialect) {
2269
+ const where = buildWhere([
2270
+ sourceFilter(filters),
2271
+ rangeOverlapFilter("first_session_ts", "latest_session_ts", filters),
2272
+ projectFilter(filters, dialect)
2273
+ ]);
2274
+ return `
2275
+ SELECT latest_session_ts, source_tool, project_name, project_path,
2276
+ session_count, message_count, tool_call_count, tool_error_count,
2277
+ low_confidence_session_count
2278
+ FROM project_activity
2279
+ ${where}
2280
+ ORDER BY latest_session_ts DESC NULLS LAST, session_count DESC, project_name ASC
2281
+ LIMIT ${limit(filters)}
2282
+ `;
2283
+ }
2284
+ function sourceFilter(filters) {
2285
+ return filters.source ? `source_tool = ${sqlString2(filters.source)}` : null;
2286
+ }
2287
+ function timeFilter(column, filters) {
2288
+ const filtersSql = [];
2289
+ if (filters.since)
2290
+ filtersSql.push(`(${column} IS NULL OR ${column} >= ${sqlString2(filters.since)})`);
2291
+ if (filters.until)
2292
+ filtersSql.push(`(${column} IS NULL OR ${column} < ${sqlString2(filters.until)})`);
2293
+ return filtersSql.length ? filtersSql.join(" AND ") : null;
2294
+ }
2295
+ function rangeOverlapFilter(firstColumn, lastColumn, filters) {
2296
+ const filtersSql = [];
2297
+ if (filters.since) {
2298
+ filtersSql.push(`(${lastColumn} IS NULL OR ${lastColumn} >= ${sqlString2(filters.since)})`);
2299
+ }
2300
+ if (filters.until) {
2301
+ filtersSql.push(`(${firstColumn} IS NULL OR ${firstColumn} < ${sqlString2(filters.until)})`);
2302
+ }
2303
+ return filtersSql.length ? filtersSql.join(" AND ") : null;
2304
+ }
2305
+ function projectFilter(filters, dialect) {
2306
+ if (!filters.project) return null;
2307
+ const exact = sqlString2(filters.project);
2308
+ const like = sqlString2(`%${escapeLike(filters.project)}%`);
2309
+ const op = dialect === "duckdb" ? "ILIKE" : "LIKE";
2310
+ return `(project_id = ${exact} OR project_name ${op} ${like} ESCAPE '\\' OR project_path ${op} ${like} ESCAPE '\\')`;
2311
+ }
2312
+ function buildWhere(filters) {
2313
+ const active = filters.filter((filter) => Boolean(filter));
2314
+ return active.length ? `WHERE ${active.join(" AND ")}` : "";
2315
+ }
2316
+ function limit(filters) {
2317
+ const value = Number.isFinite(filters.limit) ? filters.limit : void 0;
2318
+ return clampLimit(value, { max: 500, fallback: 50 });
2319
+ }
2320
+ function sqlString2(value) {
2321
+ return `'${value.replace(/'/g, "''")}'`;
2322
+ }
2323
+ function escapeLike(value) {
2324
+ return value.replace(/[\\%_]/g, (match) => `\\${match}`);
2325
+ }
2326
+
2327
+ // src/cli/bundle.ts
2328
+ import path3 from "path";
2329
+ async function withBundle(storePath, fn) {
2330
+ const bundle = await openBundle(path3.resolve(storePath));
2331
+ try {
2332
+ return await fn(bundle);
2333
+ } finally {
2334
+ closeBundle(bundle);
2335
+ }
2336
+ }
2337
+
2338
+ // src/cli/output.ts
2339
+ var OUTPUT_FORMATS = ["interactive", "table", "json", "csv"];
2340
+ var COL_SEPARATOR = " ";
2341
+ var RULE_CHAR = "-";
2342
+ function parseOutputFormat(value, fallback) {
2343
+ if (value === void 0) return fallback;
2344
+ if (OUTPUT_FORMATS.includes(value)) return value;
2345
+ throw new Error(
2346
+ `invalid --output-format: ${value} (expected one of ${OUTPUT_FORMATS.join(", ")})`
2347
+ );
2348
+ }
2349
+ function printRows(rows, opts) {
2350
+ switch (opts.format) {
2351
+ case "json":
2352
+ printJson(rows, opts);
2353
+ return;
2354
+ case "csv":
2355
+ printCsv(rows, opts);
2356
+ return;
2357
+ case "table":
2358
+ case "interactive":
2359
+ printTable(rows, opts);
2360
+ return;
2361
+ }
2362
+ }
2363
+ function printJson(rows, opts) {
2364
+ const out = opts.meta ? { ...opts.meta, rows } : rows;
2365
+ process.stdout.write(`${JSON.stringify(out, null, 2)}
2366
+ `);
2367
+ }
2368
+ function printCsv(rows, opts) {
2369
+ const columns = opts.columns;
2370
+ process.stdout.write(`${columns.map(csvField).join(",")}
2371
+ `);
2372
+ for (const row of rows) {
2373
+ const record = row;
2374
+ const line = columns.map((column) => csvField(formatCell(record[column]))).join(",");
2375
+ process.stdout.write(`${line}
2376
+ `);
2377
+ }
2378
+ }
2379
+ function csvField(value) {
2380
+ if (/[",\n]/.test(value)) return `"${value.replace(/"/g, '""')}"`;
2381
+ return value;
2382
+ }
2383
+ function printTable(rows, opts) {
2384
+ const columns = opts.columns;
2385
+ const widths = columns.map((column) => column.length);
2386
+ const cells = rows.map((row) => {
2387
+ const record = row;
2388
+ return columns.map((column, index) => {
2389
+ const text = formatCell(record[column]);
2390
+ const width = widths[index] ?? 0;
2391
+ if (text.length > width) widths[index] = text.length;
2392
+ return text;
2393
+ });
2394
+ });
2395
+ const header = columns.map((column, index) => column.padEnd(widths[index] ?? 0)).join(COL_SEPARATOR);
2396
+ const rule = columns.map((_, index) => RULE_CHAR.repeat(widths[index] ?? 0)).join(COL_SEPARATOR);
2397
+ process.stdout.write(`${header}
2398
+ ${rule}
2399
+ `);
2400
+ for (const cellRow of cells) {
2401
+ const line = cellRow.map((cell, index) => cell.padEnd(widths[index] ?? 0)).join(COL_SEPARATOR);
2402
+ process.stdout.write(`${line}
2403
+ `);
2404
+ }
2405
+ }
2406
+ function formatCell(value) {
2407
+ if (value == null) return "";
2408
+ if (typeof value === "string") return value;
2409
+ if (typeof value === "number" || typeof value === "boolean") return String(value);
2410
+ return JSON.stringify(value);
2411
+ }
2412
+
2413
+ // src/core/domain/types.ts
2414
+ var SOURCE_TOOLS = ["cursor", "codex", "claude", "gemini"];
2415
+
2416
+ // src/cli/parsers.ts
2417
+ function parseSearchEngine(value) {
2418
+ if (value === "fts5" || value === "tantivy") return value;
2419
+ throw new Error(`invalid search engine: ${value} (expected fts5 or tantivy)`);
2420
+ }
2421
+ function parseMcpTransport(value) {
2422
+ if (value === "stdio" || value === "http") return value;
2423
+ throw new Error(`invalid transport: ${value} (expected stdio or http)`);
2424
+ }
2425
+ function parseSourceTool(value) {
2426
+ if (value === void 0) return void 0;
2427
+ if (SOURCE_TOOLS.includes(value)) return value;
2428
+ throw new Error(`invalid source tool: ${value} (expected one of ${SOURCE_TOOLS.join(", ")})`);
2429
+ }
2430
+
2431
+ // src/cli/commands/analytics.ts
2432
+ function analyticsCommand() {
2433
+ const command = new Command("analytics").description(
2434
+ "Run high-level analytics reports over exported Parquet files."
2435
+ );
2436
+ command.addCommand(reportCommand("sessions", "Summarize sessions by source, project and model."));
2437
+ command.addCommand(reportCommand("tools", "Summarize tool usage, status, duration and errors."));
2438
+ command.addCommand(
2439
+ reportCommand("errors", "List import errors, failed tool results and uncertainties.")
2440
+ );
2441
+ command.addCommand(reportCommand("models", "Summarize model usage by source, project and time."));
2442
+ command.addCommand(
2443
+ reportCommand("projects", "Summarize project activity and operational counts.")
2444
+ );
2445
+ return command;
2446
+ }
2447
+ function reportCommand(report, description) {
2448
+ const command = addCommonOptions(new Command(report).description(description));
2449
+ if (report === "tools") {
2450
+ command.option("--tool-name <name>", "filter by exact tool name").option("--canonical-type <type>", "filter by canonical tool type").option("--errors-only", "only include tool calls with errors");
2451
+ }
2452
+ if (report === "errors") {
2453
+ command.option("--tool-name <name>", "filter by exact tool name").option("--category <category>", "filter by error category");
2454
+ }
2455
+ if (report === "models") {
2456
+ command.option("--model <model>", "filter by exact model name");
2457
+ }
2458
+ if (report === "projects") {
2459
+ command.option("--project <text>", "filter by project id, name, or path substring");
2460
+ }
2461
+ if (report === "sessions") {
2462
+ command.option("--project <text>", "filter by project id, name, or path substring");
2463
+ }
2464
+ return command.action(async (options) => {
2465
+ const format = parseOutputFormat(options.outputFormat, "table");
2466
+ const parquetDir = await resolveParquetDir(options);
2467
+ const filters = buildFilters(options);
2468
+ const result = await runAnalyticsReport({ parquetDir, report, filters });
2469
+ printRows(result.rows, {
2470
+ format,
2471
+ columns: result.columns,
2472
+ meta: { report, count: result.rows.length }
2473
+ });
2474
+ });
2475
+ }
2476
+ function addCommonOptions(command) {
2477
+ return command.option("--store <path>", "bundle directory", defaultBundlePath()).option("--parquet-dir <path>", "Parquet directory (default: <store>/parquet)").option("--refresh", "export Parquet before running the report").option("--source <tool>", "filter by source tool: cursor|codex|claude|gemini").option("--since <iso>", "lower timestamp bound (inclusive)").option("--until <iso>", "upper timestamp bound (exclusive)").option("--limit <n>", "maximum rows", "50").option("--output-format <fmt>", "interactive|table|json|csv", "table");
2478
+ }
2479
+ async function resolveParquetDir(options) {
2480
+ const storePath = path4.resolve(options.store);
2481
+ const outDir = options.parquetDir ? path4.resolve(options.parquetDir) : void 0;
2482
+ if (options.refresh) {
2483
+ const result = await exportBundleParquet({ bundlePath: storePath, outDir });
2484
+ return result.outDir;
2485
+ }
2486
+ return outDir ?? await withBundle(storePath, (bundle) => bundle.paths.parquet);
2487
+ }
2488
+ function buildFilters(options) {
2489
+ return {
2490
+ source: parseSourceTool(options.source),
2491
+ since: options.since,
2492
+ until: options.until,
2493
+ limit: Number.parseInt(options.limit, 10),
2494
+ toolName: options.toolName,
2495
+ canonicalType: options.canonicalType,
2496
+ errorsOnly: options.errorsOnly,
2497
+ category: options.category,
2498
+ model: options.model,
2499
+ project: options.project
2500
+ };
2501
+ }
2502
+
2503
+ // src/cli/commands/compile.ts
2504
+ import { Command as Command2 } from "commander";
2505
+
2506
+ // src/services/compile.ts
2507
+ init_errors();
2508
+ import os2 from "os";
2509
+ import path16 from "path";
2510
+
2511
+ // src/importers/claude/index.ts
2512
+ import { readFile as readFile4 } from "fs/promises";
2513
+ import path8 from "path";
2514
+
2515
+ // src/core/cas/index.ts
2516
+ init_db();
2517
+ import { mkdir as mkdir3, readFile as readFile2, writeFile as writeFile3 } from "fs/promises";
2518
+ import path5 from "path";
2519
+
2520
+ // src/core/cas/compress.ts
2521
+ import { compress as zstdCompress, decompress as zstdDecompress } from "zstd-napi";
2522
+ var COMPRESS_THRESHOLD_BYTES = 256;
2523
+ var ZSTD_LEVEL = 3;
2524
+ function compressBytes(input) {
2525
+ if (input.byteLength < COMPRESS_THRESHOLD_BYTES) {
2526
+ return { bytes: Buffer.from(input), compression: "none" };
2527
+ }
2528
+ const out = zstdCompress(Buffer.from(input), { compressionLevel: ZSTD_LEVEL });
2529
+ return { bytes: out, compression: "zstd" };
2530
+ }
2531
+ function decompressBytes(input, compression) {
2532
+ if (compression === "none") return input;
2533
+ return zstdDecompress(input);
2534
+ }
2535
+
2536
+ // src/core/cas/hash.ts
2537
+ import { createHash } from "crypto";
2538
+ import { blake3 } from "@noble/hashes/blake3";
2539
+ import { bytesToHex } from "@noble/hashes/utils";
2540
+ function blake3Hex(bytes) {
2541
+ return bytesToHex(blake3(bytes));
2542
+ }
2543
+ function sha256Hex(bytes) {
2544
+ return createHash("sha256").update(bytes).digest("hex");
2545
+ }
2546
+ function objectIdFromHash(hashHex) {
2547
+ return `blake3:${hashHex}`;
2548
+ }
2549
+ function objectStoragePath(hashHex, compression) {
2550
+ const ext = compression === "zstd" ? ".zst" : ".bin";
2551
+ const a = hashHex.slice(0, 2);
2552
+ const b = hashHex.slice(2, 4);
2553
+ return `objects/blake3/${a}/${b}/${hashHex}${ext}`;
2554
+ }
2555
+
2556
+ // src/core/cas/index.ts
2557
+ var ensuredDirs = /* @__PURE__ */ new Set();
2558
+ async function ensureDir(absoluteDir) {
2559
+ if (ensuredDirs.has(absoluteDir)) return;
2560
+ await mkdir3(absoluteDir, { recursive: true });
2561
+ ensuredDirs.add(absoluteDir);
2562
+ }
2563
+ async function putBytes(bundle, bytes, options = {}) {
2564
+ const hash = blake3Hex(bytes);
2565
+ const objectId = objectIdFromHash(hash);
2566
+ const existing = prepare(
2567
+ bundle.db,
2568
+ `SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
2569
+ compression, mime_type, encoding, storage_path, created_at
2570
+ FROM objects WHERE object_id = ?`
2571
+ ).get(objectId);
2572
+ if (existing) return objectId;
2573
+ const { bytes: stored, compression } = compressBytes(bytes);
2574
+ const storagePath = objectStoragePath(hash, compression);
2575
+ const absolutePath = path5.join(bundle.path, storagePath);
2576
+ await ensureDir(path5.dirname(absolutePath));
2577
+ await writeFile3(absolutePath, stored);
2578
+ prepare(
2579
+ bundle.db,
2580
+ `INSERT INTO objects (
2581
+ object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
2582
+ compression, mime_type, encoding, storage_path, created_at
2583
+ ) VALUES (?, 'blake3', ?, ?, ?, ?, ?, ?, ?, ?)`
2584
+ ).run(
2585
+ objectId,
2586
+ hash,
2587
+ bytes.byteLength,
2588
+ compression === "zstd" ? stored.byteLength : null,
2589
+ compression,
2590
+ options.mimeType ?? null,
2591
+ options.encoding ?? null,
2592
+ storagePath,
2593
+ (/* @__PURE__ */ new Date()).toISOString()
2594
+ );
2595
+ return objectId;
2596
+ }
2597
+ async function putJson(bundle, value) {
2598
+ const text = JSON.stringify(value);
2599
+ return putBytes(bundle, Buffer.from(text, "utf8"), {
2600
+ mimeType: "application/json",
2601
+ encoding: "utf-8"
2602
+ });
2603
+ }
2604
+ async function getBytes(bundle, objectId) {
2605
+ const meta = prepare(
2606
+ bundle.db,
2607
+ `SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
2608
+ compression, mime_type, encoding, storage_path, created_at
2609
+ FROM objects WHERE object_id = ?`
2610
+ ).get(objectId);
2611
+ if (!meta) {
2612
+ throw new Error(`object not found: ${objectId}`);
2613
+ }
2614
+ const buf = await readFile2(path5.join(bundle.path, meta.storage_path));
2615
+ return decompressBytes(buf, meta.compression);
2616
+ }
2617
+ async function getText(bundle, objectId) {
2618
+ const buf = await getBytes(bundle, objectId);
2619
+ return buf.toString("utf8");
2620
+ }
2621
+ function createPendingObjects() {
2622
+ return { byId: /* @__PURE__ */ new Map() };
2623
+ }
2624
+ function stageBytes(pending, bytes, options = {}) {
2625
+ const buf = Buffer.isBuffer(bytes) ? bytes : Buffer.from(bytes);
2626
+ const hash = blake3Hex(buf);
2627
+ const objectId = objectIdFromHash(hash);
2628
+ if (!pending.byId.has(objectId)) {
2629
+ pending.byId.set(objectId, {
2630
+ objectId,
2631
+ hash,
2632
+ bytes: buf,
2633
+ mimeType: options.mimeType ?? null,
2634
+ encoding: options.encoding ?? null
2635
+ });
2636
+ }
2637
+ return objectId;
2638
+ }
2639
+ function stageText(pending, text, options = {}) {
2640
+ return stageBytes(pending, Buffer.from(text, "utf8"), {
2641
+ mimeType: options.mimeType ?? "text/plain; charset=utf-8",
2642
+ encoding: "utf-8"
2643
+ });
2644
+ }
2645
+ function stageJson(pending, value) {
2646
+ return stageBytes(pending, Buffer.from(JSON.stringify(value), "utf8"), {
2647
+ mimeType: "application/json",
2648
+ encoding: "utf-8"
2649
+ });
2650
+ }
2651
+ async function flushPendingObjects(bundle, pending) {
2652
+ if (pending.byId.size === 0) return;
2653
+ const ids = [...pending.byId.keys()];
2654
+ const existingIds = queryExistingObjectIds(bundle, ids);
2655
+ const toWrite = [];
2656
+ for (const obj of pending.byId.values()) {
2657
+ if (existingIds.has(obj.objectId)) continue;
2658
+ const { bytes: compressedBytes, compression } = compressBytes(obj.bytes);
2659
+ const storagePath = objectStoragePath(obj.hash, compression);
2660
+ toWrite.push({
2661
+ staged: obj,
2662
+ compression,
2663
+ compressedBytes,
2664
+ storagePath,
2665
+ absolutePath: path5.join(bundle.path, storagePath)
2666
+ });
1622
2667
  }
1623
- if (!await exists(paths.manifest)) {
1624
- throw new Error(
1625
- `no manifest.json in ${resolved} \u2014 initialize first with \`prosa init --store ${resolved}\``
1626
- );
2668
+ if (toWrite.length > 0) {
2669
+ await writeFilesParallel(toWrite);
1627
2670
  }
1628
- const manifest = JSON.parse(await readFile(paths.manifest, "utf8"));
1629
- await mkdir(paths.search, { recursive: true });
1630
- await mkdir(paths.tantivy, { recursive: true });
1631
- const db = openDb(paths.db);
1632
- runMigrations(db);
1633
- const currentVersion = currentSchemaVersion(db);
1634
- if (currentVersion !== PROSA_SCHEMA_VERSION) {
1635
- closeDb(db);
1636
- throw new Error(`schema version mismatch (db=${currentVersion}, code=${PROSA_SCHEMA_VERSION})`);
2671
+ const insertObject = prepare(
2672
+ bundle.db,
2673
+ `INSERT OR IGNORE INTO objects (
2674
+ object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
2675
+ compression, mime_type, encoding, storage_path, created_at
2676
+ ) VALUES (?, 'blake3', ?, ?, ?, ?, ?, ?, ?, ?)`
2677
+ );
2678
+ const now = (/* @__PURE__ */ new Date()).toISOString();
2679
+ for (const p of toWrite) {
2680
+ insertObject.run(
2681
+ p.staged.objectId,
2682
+ p.staged.hash,
2683
+ p.staged.bytes.byteLength,
2684
+ p.compression === "zstd" ? p.compressedBytes.byteLength : null,
2685
+ p.compression,
2686
+ p.staged.mimeType,
2687
+ p.staged.encoding,
2688
+ p.storagePath,
2689
+ now
2690
+ );
1637
2691
  }
1638
- if (manifest.parser_version !== PROSA_PARSER_VERSION) {
1639
- manifest.parser_version = PROSA_PARSER_VERSION;
1640
- await writeFile(paths.manifest, `${JSON.stringify(manifest, null, 2)}
1641
- `, "utf8");
2692
+ }
2693
+ function queryExistingObjectIds(bundle, ids) {
2694
+ const found = /* @__PURE__ */ new Set();
2695
+ if (ids.length === 0) return found;
2696
+ const CHUNK = 500;
2697
+ for (let start = 0; start < ids.length; start += CHUNK) {
2698
+ const slice = ids.slice(start, start + CHUNK);
2699
+ const placeholders = slice.map(() => "?").join(",");
2700
+ const rows = bundle.db.prepare(
2701
+ `SELECT object_id FROM objects WHERE object_id IN (${placeholders})`
2702
+ ).all(...slice);
2703
+ for (const row of rows) found.add(row.object_id);
1642
2704
  }
1643
- return { path: resolved, db, manifest, paths };
2705
+ return found;
1644
2706
  }
1645
- function closeBundle(bundle) {
1646
- closeDb(bundle.db);
2707
+ var FS_WRITE_CONCURRENCY = 16;
2708
+ async function writeFilesParallel(tasks) {
2709
+ let cursor = 0;
2710
+ const workers = [];
2711
+ const limit2 = Math.min(FS_WRITE_CONCURRENCY, tasks.length);
2712
+ for (let w = 0; w < limit2; w++) {
2713
+ workers.push(
2714
+ (async () => {
2715
+ while (true) {
2716
+ const i = cursor++;
2717
+ if (i >= tasks.length) return;
2718
+ const task = tasks[i];
2719
+ await ensureDir(path5.dirname(task.absolutePath));
2720
+ await writeFile3(task.absolutePath, task.compressedBytes);
2721
+ }
2722
+ })()
2723
+ );
2724
+ }
2725
+ await Promise.all(workers);
1647
2726
  }
1648
2727
 
1649
- // src/services/compile.ts
1650
- init_errors();
1651
- import os2 from "os";
1652
- import path14 from "path";
1653
-
1654
2728
  // src/importers/claude/index.ts
1655
- init_cas();
1656
2729
  init_db();
1657
- import { readFile as readFile4 } from "fs/promises";
1658
- import path5 from "path";
1659
2730
 
1660
2731
  // src/core/domain/ids.ts
1661
- init_hash();
1662
2732
  var ID_PREFIX_BYTES = 16;
1663
2733
  function tupleId(parts) {
1664
2734
  return sha256Hex(parts.join("\0")).slice(0, ID_PREFIX_BYTES * 2);
@@ -1704,7 +2774,6 @@ function importBatchId(sourceTool, startedAtIso) {
1704
2774
  init_errors();
1705
2775
 
1706
2776
  // src/core/ingest/batch.ts
1707
- init_cas();
1708
2777
  init_db();
1709
2778
  function emptyCounts() {
1710
2779
  return {
@@ -1772,12 +2841,9 @@ async function recordError(bundle, batchId, args) {
1772
2841
  }
1773
2842
 
1774
2843
  // src/core/ingest/idempotency.ts
1775
- init_compress();
1776
- init_hash();
1777
- init_cas();
2844
+ import { access as access2, readFile as readFile3, stat as stat2, writeFile as writeFile4 } from "fs/promises";
2845
+ import path6 from "path";
1778
2846
  init_db();
1779
- import { access as access2, readFile as readFile3, stat as stat2, writeFile as writeFile3 } from "fs/promises";
1780
- import path3 from "path";
1781
2847
  async function registerSourceFile(bundle, args) {
1782
2848
  const st = await stat2(args.absolutePath);
1783
2849
  const size = st.size;
@@ -1861,10 +2927,10 @@ async function preserveRawSourceBytes(bundle, bytes) {
1861
2927
  const objectId = objectIdFromHash(hash);
1862
2928
  const { bytes: stored, compression } = compressBytes(bytes);
1863
2929
  const storagePath = rawSourceStoragePath(hash, compression);
1864
- const absolutePath = path3.join(bundle.path, storagePath);
1865
- await ensureDir(path3.dirname(absolutePath));
2930
+ const absolutePath = path6.join(bundle.path, storagePath);
2931
+ await ensureDir(path6.dirname(absolutePath));
1866
2932
  if (!await fileExists(absolutePath)) {
1867
- await writeFile3(absolutePath, stored);
2933
+ await writeFile4(absolutePath, stored);
1868
2934
  }
1869
2935
  const existing = prepare(
1870
2936
  bundle.db,
@@ -1906,12 +2972,12 @@ async function fileExists(filePath) {
1906
2972
 
1907
2973
  // src/importers/claude/discover.ts
1908
2974
  import { readdir } from "fs/promises";
1909
- import path4 from "path";
2975
+ import path7 from "path";
1910
2976
  async function* discoverClaudeFiles(root) {
1911
2977
  const projectDirs = await readdirSafe(root);
1912
2978
  for (const project of projectDirs) {
1913
2979
  if (!project.isDirectory()) continue;
1914
- const projectRoot = path4.join(root, project.name);
2980
+ const projectRoot = path7.join(root, project.name);
1915
2981
  yield* walkProject(projectRoot, project.name);
1916
2982
  }
1917
2983
  }
@@ -1920,7 +2986,7 @@ async function* walkProject(projectRoot, projectSlug) {
1920
2986
  for (const entry of entries) {
1921
2987
  if (entry.isFile() && entry.name.endsWith(".jsonl")) {
1922
2988
  yield {
1923
- filePath: path4.join(projectRoot, entry.name),
2989
+ filePath: path7.join(projectRoot, entry.name),
1924
2990
  projectSlug,
1925
2991
  isSubagent: false,
1926
2992
  parentSessionId: null,
@@ -1930,18 +2996,18 @@ async function* walkProject(projectRoot, projectSlug) {
1930
2996
  continue;
1931
2997
  }
1932
2998
  if (entry.isDirectory()) {
1933
- const subagentsDir = path4.join(projectRoot, entry.name, "subagents");
2999
+ const subagentsDir = path7.join(projectRoot, entry.name, "subagents");
1934
3000
  const subagentEntries = await readdirSafe(subagentsDir);
1935
3001
  for (const sub of subagentEntries) {
1936
3002
  if (!sub.isFile() || !sub.name.endsWith(".jsonl")) continue;
1937
3003
  if (!sub.name.startsWith("agent-")) continue;
1938
3004
  const agentId = sub.name.slice("agent-".length, -".jsonl".length);
1939
- const metaCandidate = path4.join(subagentsDir, `agent-${agentId}.meta.json`);
3005
+ const metaCandidate = path7.join(subagentsDir, `agent-${agentId}.meta.json`);
1940
3006
  const metaExists = subagentEntries.some(
1941
3007
  (e) => e.isFile() && e.name === `agent-${agentId}.meta.json`
1942
3008
  );
1943
3009
  yield {
1944
- filePath: path4.join(subagentsDir, sub.name),
3010
+ filePath: path7.join(subagentsDir, sub.name),
1945
3011
  projectSlug,
1946
3012
  isSubagent: true,
1947
3013
  parentSessionId: entry.name,
@@ -2061,7 +3127,7 @@ async function compileClaudeFile(bundle, batch, file, logger) {
2061
3127
  const counts = emptyFileCounts();
2062
3128
  const { row: sourceFile, alreadyKnown } = await registerSourceFile(bundle, {
2063
3129
  sourceTool: "claude",
2064
- absolutePath: path5.resolve(file.filePath),
3130
+ absolutePath: path8.resolve(file.filePath),
2065
3131
  fileKind: "jsonl",
2066
3132
  workspaceHint: file.projectSlug
2067
3133
  });
@@ -2161,7 +3227,7 @@ async function compileClaudeFile(bundle, batch, file, logger) {
2161
3227
  pending.session.parent_session_id_pending = parentSid;
2162
3228
  }
2163
3229
  }
2164
- const sessionId2 = pending.session?.session_id ?? sessionId("claude", `unknown:${path5.basename(file.filePath)}`);
3230
+ const sessionId2 = pending.session?.session_id ?? sessionId("claude", `unknown:${path8.basename(file.filePath)}`);
2165
3231
  const type = typeof parsed.type === "string" ? parsed.type : null;
2166
3232
  if (type === "user" || type === "assistant") {
2167
3233
  const msgRole = type === "user" ? "user" : "assistant";
@@ -2903,15 +3969,14 @@ function flushPending(bundle, pending, meta) {
2903
3969
  }
2904
3970
 
2905
3971
  // src/importers/codex/index.ts
2906
- init_cas();
2907
- init_db();
2908
3972
  import { readFile as readFile5 } from "fs/promises";
2909
- import path7 from "path";
3973
+ import path10 from "path";
3974
+ init_db();
2910
3975
  init_errors();
2911
3976
 
2912
3977
  // src/importers/codex/discover.ts
2913
3978
  import { readdir as readdir2 } from "fs/promises";
2914
- import path6 from "path";
3979
+ import path9 from "path";
2915
3980
  async function* discoverCodexSessions(root) {
2916
3981
  yield* walk(root);
2917
3982
  }
@@ -2923,7 +3988,7 @@ async function* walk(dir) {
2923
3988
  return;
2924
3989
  }
2925
3990
  for (const entry of entries) {
2926
- const full = path6.join(dir, entry.name);
3991
+ const full = path9.join(dir, entry.name);
2927
3992
  if (entry.isDirectory()) {
2928
3993
  yield* walk(full);
2929
3994
  } else if (entry.isFile() && entry.name.endsWith(".jsonl")) {
@@ -3025,7 +4090,7 @@ async function compileCodexFile(bundle, batch, filePath, logger) {
3025
4090
  const counts = emptyFileCounts2();
3026
4091
  const { row: sourceFileRow, alreadyKnown } = await registerSourceFile(bundle, {
3027
4092
  sourceTool: "codex",
3028
- absolutePath: path7.resolve(filePath),
4093
+ absolutePath: path10.resolve(filePath),
3029
4094
  fileKind: "jsonl"
3030
4095
  });
3031
4096
  if (alreadyKnown) {
@@ -3111,7 +4176,7 @@ async function compileCodexFile(bundle, batch, filePath, logger) {
3111
4176
  const payload = parsed.payload ?? {};
3112
4177
  if (type === "session_meta") {
3113
4178
  const meta = payload;
3114
- const sourceSessionId = meta.id ?? path7.basename(filePath, ".jsonl");
4179
+ const sourceSessionId = meta.id ?? path10.basename(filePath, ".jsonl");
3115
4180
  const sessionId3 = sessionId("codex", sourceSessionId);
3116
4181
  if (!pending.session) {
3117
4182
  const sub = parseSubagent(meta.source);
@@ -3143,11 +4208,11 @@ async function compileCodexFile(bundle, batch, filePath, logger) {
3143
4208
  }
3144
4209
  continue;
3145
4210
  }
3146
- const sessionId2 = pending.session?.session_id ?? sessionId("codex", path7.basename(filePath, ".jsonl"));
4211
+ const sessionId2 = pending.session?.session_id ?? sessionId("codex", path10.basename(filePath, ".jsonl"));
3147
4212
  if (!pending.session) {
3148
4213
  pending.session = {
3149
4214
  session_id: sessionId2,
3150
- source_session_id: path7.basename(filePath, ".jsonl"),
4215
+ source_session_id: path10.basename(filePath, ".jsonl"),
3151
4216
  parent_session_id: null,
3152
4217
  is_subagent: 0,
3153
4218
  agent_role: null,
@@ -4044,25 +5109,24 @@ function flushPending2(bundle, pending, meta) {
4044
5109
  }
4045
5110
 
4046
5111
  // src/importers/cursor/index.ts
4047
- init_cas();
4048
- init_db();
4049
- import path9 from "path";
5112
+ import path12 from "path";
4050
5113
  import Database2 from "better-sqlite3";
5114
+ init_db();
4051
5115
  init_errors();
4052
5116
 
4053
5117
  // src/importers/cursor/discover.ts
4054
5118
  import { readdir as readdir3 } from "fs/promises";
4055
- import path8 from "path";
5119
+ import path11 from "path";
4056
5120
  async function* discoverCursorStores(root) {
4057
5121
  const workspaces = await readdirSafe2(root);
4058
5122
  for (const ws of workspaces) {
4059
5123
  if (!ws.isDirectory()) continue;
4060
- const wsPath = path8.join(root, ws.name);
5124
+ const wsPath = path11.join(root, ws.name);
4061
5125
  const agents = await readdirSafe2(wsPath);
4062
5126
  for (const ag of agents) {
4063
5127
  if (!ag.isDirectory()) continue;
4064
- const dbPath = path8.join(wsPath, ag.name, "store.db");
4065
- const dbEntries = await readdirSafe2(path8.join(wsPath, ag.name));
5128
+ const dbPath = path11.join(wsPath, ag.name, "store.db");
5129
+ const dbEntries = await readdirSafe2(path11.join(wsPath, ag.name));
4066
5130
  const hasStoreDb = dbEntries.some((e) => e.isFile() && e.name === "store.db");
4067
5131
  if (!hasStoreDb) continue;
4068
5132
  yield {
@@ -4161,7 +5225,7 @@ async function compileCursorStore(bundle, batch, store, logger) {
4161
5225
  const counts = emptyFileCounts3();
4162
5226
  const { row: sourceFile, alreadyKnown } = await registerSourceFile(bundle, {
4163
5227
  sourceTool: "cursor",
4164
- absolutePath: path9.resolve(store.filePath),
5228
+ absolutePath: path12.resolve(store.filePath),
4165
5229
  fileKind: "sqlite",
4166
5230
  workspaceHint: store.workspaceId
4167
5231
  });
@@ -4763,29 +5827,27 @@ function flushPending3(bundle, pending) {
4763
5827
  }
4764
5828
 
4765
5829
  // src/importers/gemini/index.ts
4766
- init_hash();
4767
- init_cas();
4768
- init_db();
4769
5830
  import { readFile as readFile7 } from "fs/promises";
4770
- import path11 from "path";
5831
+ import path14 from "path";
5832
+ init_db();
4771
5833
  init_errors();
4772
5834
 
4773
5835
  // src/importers/gemini/discover.ts
4774
5836
  import { readFile as readFile6, readdir as readdir4 } from "fs/promises";
4775
- import path10 from "path";
5837
+ import path13 from "path";
4776
5838
  async function* discoverGeminiChats(root) {
4777
5839
  const entries = await readdirSafe3(root);
4778
5840
  for (const entry of entries) {
4779
5841
  if (!entry.isDirectory()) continue;
4780
5842
  if (entry.name === "bin") continue;
4781
- const projectRoot = await readProjectRoot(path10.join(root, entry.name));
4782
- const chatsDir = path10.join(root, entry.name, "chats");
5843
+ const projectRoot = await readProjectRoot(path13.join(root, entry.name));
5844
+ const chatsDir = path13.join(root, entry.name, "chats");
4783
5845
  const chatEntries = await readdirSafe3(chatsDir);
4784
5846
  for (const c of chatEntries) {
4785
5847
  if (!c.isFile()) continue;
4786
5848
  if (!c.name.startsWith("session-") || !c.name.endsWith(".json")) continue;
4787
5849
  yield {
4788
- filePath: path10.join(chatsDir, c.name),
5850
+ filePath: path13.join(chatsDir, c.name),
4789
5851
  projectDir: entry.name,
4790
5852
  projectRoot
4791
5853
  };
@@ -4794,7 +5856,7 @@ async function* discoverGeminiChats(root) {
4794
5856
  }
4795
5857
  async function readProjectRoot(dir) {
4796
5858
  try {
4797
- const text = await readFile6(path10.join(dir, ".project_root"), "utf8");
5859
+ const text = await readFile6(path13.join(dir, ".project_root"), "utf8");
4798
5860
  return text.replace(/\n+$/, "").trim() || null;
4799
5861
  } catch {
4800
5862
  return null;
@@ -4888,7 +5950,7 @@ async function compileGeminiFile(bundle, batch, file, logger) {
4888
5950
  const counts = emptyFileCounts4();
4889
5951
  const { row: sourceFile, alreadyKnown } = await registerSourceFile(bundle, {
4890
5952
  sourceTool: "gemini",
4891
- absolutePath: path11.resolve(file.filePath),
5953
+ absolutePath: path14.resolve(file.filePath),
4892
5954
  fileKind: "json",
4893
5955
  workspaceHint: file.projectDir
4894
5956
  });
@@ -4941,7 +6003,7 @@ async function compileGeminiFile(bundle, batch, file, logger) {
4941
6003
  project: null,
4942
6004
  objects
4943
6005
  };
4944
- const sourceSid = parsed.sessionId ?? path11.basename(file.filePath, ".json");
6006
+ const sourceSid = parsed.sessionId ?? path14.basename(file.filePath, ".json");
4945
6007
  const sessionPk = sessionId("gemini", sourceSid);
4946
6008
  if (file.projectRoot) {
4947
6009
  pending.project = {
@@ -5522,143 +6584,6 @@ function flushPending4(bundle, pending) {
5522
6584
  }
5523
6585
  }
5524
6586
 
5525
- // src/services/export/parquet.ts
5526
- import { mkdir as mkdir3, rm, writeFile as writeFile4 } from "fs/promises";
5527
- import path12 from "path";
5528
- import { DuckDBConnection } from "@duckdb/node-api";
5529
- init_errors();
5530
- var PARQUET_TABLES = [
5531
- "objects",
5532
- "source_files",
5533
- "import_batches",
5534
- "raw_records",
5535
- "import_errors",
5536
- "uncertainties",
5537
- "projects",
5538
- "sessions",
5539
- "turns",
5540
- "events",
5541
- "messages",
5542
- "content_blocks",
5543
- "tool_calls",
5544
- "tool_results",
5545
- "artifacts",
5546
- "edges",
5547
- "search_docs"
5548
- ];
5549
- async function exportBundleParquet(options) {
5550
- const snapshot = await openBundleSnapshot(options.bundlePath);
5551
- const outDir = path12.resolve(options.outDir ?? snapshot.defaultOutDir);
5552
- await mkdir3(outDir, { recursive: true });
5553
- const files = Object.fromEntries(
5554
- PARQUET_TABLES.map((table) => [table, path12.join(outDir, `${table}.parquet`)])
5555
- );
5556
- const manifestPath = path12.join(outDir, "manifest.json");
5557
- for (const file of [...Object.values(files), manifestPath]) {
5558
- await rm(file, { force: true });
5559
- }
5560
- const connection = await createDuckDbConnection();
5561
- try {
5562
- await attachSqlite(connection, snapshot.dbPath);
5563
- for (const table of PARQUET_TABLES) {
5564
- await connection.run(
5565
- `COPY (SELECT * FROM prosa.${quoteIdentifier(table)}) TO ${sqlString(files[table])} (FORMAT parquet)`
5566
- );
5567
- }
5568
- } finally {
5569
- connection.closeSync();
5570
- }
5571
- const manifest = {
5572
- exported_at: (/* @__PURE__ */ new Date()).toISOString(),
5573
- source_db: snapshot.dbPath,
5574
- schema_version: snapshot.schemaVersion,
5575
- parser_version: snapshot.parserVersion,
5576
- tables: Object.fromEntries(
5577
- PARQUET_TABLES.map((table) => [
5578
- table,
5579
- {
5580
- file: path12.basename(files[table]),
5581
- rows: snapshot.counts[table]
5582
- }
5583
- ])
5584
- )
5585
- };
5586
- await writeFile4(manifestPath, `${JSON.stringify(manifest, null, 2)}
5587
- `, "utf8");
5588
- return { outDir, manifestPath, files, counts: snapshot.counts };
5589
- }
5590
- async function queryDuckDbParquet(options) {
5591
- const parquetDir = path12.resolve(options.parquetDir);
5592
- const connection = await createDuckDbConnection();
5593
- try {
5594
- for (const table of PARQUET_TABLES) {
5595
- await connection.run(
5596
- `CREATE OR REPLACE VIEW ${quoteIdentifier(table)} AS SELECT * FROM read_parquet(${sqlString(
5597
- path12.join(parquetDir, `${table}.parquet`)
5598
- )})`
5599
- );
5600
- }
5601
- const reader = await connection.runAndReadAll(options.sql);
5602
- return {
5603
- columns: reader.deduplicatedColumnNames(),
5604
- rows: reader.getRowObjectsJson()
5605
- };
5606
- } catch (error) {
5607
- if (isMissingParquetError(error)) {
5608
- throw new Error(
5609
- `Parquet export not found in ${parquetDir}; run \`prosa export parquet --store <path>\` first`
5610
- );
5611
- }
5612
- throw error;
5613
- } finally {
5614
- connection.closeSync();
5615
- }
5616
- }
5617
- async function createDuckDbConnection() {
5618
- return DuckDBConnection.create();
5619
- }
5620
- async function attachSqlite(connection, dbPath) {
5621
- try {
5622
- await connection.run("INSTALL sqlite");
5623
- await connection.run("LOAD sqlite");
5624
- await connection.run(`ATTACH ${sqlString(dbPath)} AS prosa (TYPE sqlite)`);
5625
- } catch (error) {
5626
- throw new Error(
5627
- `DuckDB could not attach prosa.sqlite via the sqlite extension: ${getErrorMessage(error)}`
5628
- );
5629
- }
5630
- }
5631
- async function openBundleSnapshot(bundlePath) {
5632
- const bundle = await openBundle(bundlePath);
5633
- try {
5634
- const counts = Object.fromEntries(
5635
- PARQUET_TABLES.map((table) => {
5636
- const row = bundle.db.prepare(`SELECT count(*) AS n FROM ${quoteIdentifier(table)}`).get();
5637
- return [table, row?.n ?? 0];
5638
- })
5639
- );
5640
- return {
5641
- dbPath: bundle.paths.db,
5642
- schemaVersion: bundle.manifest.schema_version,
5643
- parserVersion: bundle.manifest.parser_version,
5644
- defaultOutDir: bundle.paths.parquet,
5645
- counts
5646
- };
5647
- } finally {
5648
- closeBundle(bundle);
5649
- }
5650
- }
5651
- function quoteIdentifier(value) {
5652
- return `"${value.replace(/"/g, '""')}"`;
5653
- }
5654
- function sqlString(value) {
5655
- return `'${value.replace(/'/g, "''")}'`;
5656
- }
5657
- function isMissingParquetError(error) {
5658
- const message = getErrorMessage(error);
5659
- return /No files found|does not exist|not found/i.test(message) && /\.parquet/i.test(message);
5660
- }
5661
-
5662
6587
  // src/services/compile.ts
5663
6588
  init_indexing();
5664
6589
  var COMPILE_PROVIDERS = [
@@ -5666,28 +6591,28 @@ var COMPILE_PROVIDERS = [
5666
6591
  name: "codex",
5667
6592
  description: "Import Codex CLI session histories into the bundle.",
5668
6593
  pathHelp: "root of Codex CLI sessions",
5669
- defaultSessionsPath: () => path14.join(os2.homedir(), ".codex", "sessions"),
6594
+ defaultSessionsPath: () => path16.join(os2.homedir(), ".codex", "sessions"),
5670
6595
  compile: compileCodex
5671
6596
  },
5672
6597
  {
5673
6598
  name: "claude",
5674
6599
  description: "Import Claude Code project histories into the bundle.",
5675
6600
  pathHelp: "root of Claude Code projects",
5676
- defaultSessionsPath: () => path14.join(os2.homedir(), ".claude", "projects"),
6601
+ defaultSessionsPath: () => path16.join(os2.homedir(), ".claude", "projects"),
5677
6602
  compile: compileClaude
5678
6603
  },
5679
6604
  {
5680
6605
  name: "gemini",
5681
6606
  description: "Import Gemini CLI session histories into the bundle.",
5682
6607
  pathHelp: "root of Gemini CLI tmp dir",
5683
- defaultSessionsPath: () => path14.join(os2.homedir(), ".gemini", "tmp"),
6608
+ defaultSessionsPath: () => path16.join(os2.homedir(), ".gemini", "tmp"),
5684
6609
  compile: compileGemini
5685
6610
  },
5686
6611
  {
5687
6612
  name: "cursor",
5688
6613
  description: "Import Cursor agent stores into the bundle.",
5689
6614
  pathHelp: "root of Cursor agent stores",
5690
- defaultSessionsPath: () => path14.join(os2.homedir(), ".cursor", "chats"),
6615
+ defaultSessionsPath: () => path16.join(os2.homedir(), ".cursor", "chats"),
5691
6616
  compile: compileCursor
5692
6617
  }
5693
6618
  ];
@@ -5700,20 +6625,20 @@ function getCompileProvider(source) {
5700
6625
  }
5701
6626
  function resolveCompilePath(p) {
5702
6627
  if (p === "~") return os2.homedir();
5703
- if (p.startsWith("~/")) return path14.join(os2.homedir(), p.slice(2));
5704
- return path14.resolve(p);
6628
+ if (p.startsWith("~/")) return path16.join(os2.homedir(), p.slice(2));
6629
+ return path16.resolve(p);
5705
6630
  }
5706
6631
  async function runCompileImports(options) {
5707
- const { bundle, providers, deferIndex, logger } = options;
6632
+ const { bundle, providers, logger } = options;
6633
+ const overwrite = options.overwrite === true;
5708
6634
  let importedAny = false;
5709
6635
  const summaries = [];
5710
6636
  let tantivy = null;
5711
6637
  let tantivyError = null;
6638
+ let fts5Error = null;
5712
6639
  try {
5713
- if (deferIndex) {
5714
- logger?.info("disabling FTS5 triggers for deferred indexing");
5715
- disableFts5Triggers(bundle);
5716
- }
6640
+ logger?.info("disabling FTS5 triggers for bulk rebuild");
6641
+ disableFts5Triggers(bundle);
5717
6642
  for (const provider of providers) {
5718
6643
  const sourcePath = resolveCompilePath(options.sessionsPath ?? provider.defaultSessionsPath());
5719
6644
  const providerLogger = logger?.child({
@@ -5740,15 +6665,23 @@ async function runCompileImports(options) {
5740
6665
  summaries.push(summary);
5741
6666
  options.onProviderComplete?.(summary);
5742
6667
  }
5743
- logger?.info({ changed: importedAny, fts5_deferred: deferIndex }, "marking indexes");
5744
- markIndexesAfterImport(bundle, {
5745
- changed: importedAny,
5746
- fts5Deferred: deferIndex
5747
- });
5748
- if (importedAny) {
6668
+ const shouldRebuildIndexes = importedAny || overwrite;
6669
+ if (shouldRebuildIndexes) {
6670
+ logger?.info(
6671
+ { changed: importedAny, overwrite },
6672
+ importedAny ? "marking indexes" : "overwrite forces rebuild despite no new imports"
6673
+ );
6674
+ markIndexesAfterImport(bundle, { changed: true });
6675
+ try {
6676
+ logger?.info("rebuilding fts5 index");
6677
+ rebuildFts5Index(bundle);
6678
+ } catch (error) {
6679
+ fts5Error = getErrorMessage(error);
6680
+ logger?.error({ err: error }, "fts5 rebuild failed; SQLite data is intact");
6681
+ }
5749
6682
  try {
5750
- logger?.info("rebuilding tantivy index");
5751
- const status = await rebuildTantivyIndex(bundle);
6683
+ logger?.info({ overwrite }, "rebuilding tantivy index");
6684
+ const status = await rebuildTantivyIndex(bundle, { overwrite });
5752
6685
  tantivy = { indexedDocCount: status.indexed_doc_count };
5753
6686
  options.onTantivyComplete?.(tantivy);
5754
6687
  } catch (error) {
@@ -5757,16 +6690,14 @@ async function runCompileImports(options) {
5757
6690
  }
5758
6691
  }
5759
6692
  } finally {
5760
- if (deferIndex) {
5761
- logger?.info("re-enabling FTS5 triggers");
5762
- enableFts5Triggers(bundle);
5763
- }
6693
+ enableFts5Triggers(bundle);
5764
6694
  }
5765
6695
  return {
5766
6696
  providers: summaries,
5767
6697
  importedAny,
5768
6698
  tantivy,
5769
- tantivyError
6699
+ tantivyError,
6700
+ fts5Error
5770
6701
  };
5771
6702
  }
5772
6703
  async function exportCompileParquet(options) {
@@ -5809,7 +6740,7 @@ function createCliLogger(options) {
5809
6740
  // src/cli/commands/compile.ts
5810
6741
  function compileCommand() {
5811
6742
  const command = addCompileLogOptions(
5812
- new Command("compile").description(
6743
+ new Command2("compile").description(
5813
6744
  "Import session histories from one agent CLI into the bundle."
5814
6745
  )
5815
6746
  );
@@ -5822,27 +6753,35 @@ function compileCommand() {
5822
6753
  return command;
5823
6754
  }
5824
6755
  function compileAllCommand() {
5825
- return addCompileLogOptions(new Command("compile-all")).description("Import all agent CLI session histories using default source paths.").option("--defer-index", "skip immediate FTS5 updates; run `prosa index fts5` later").action(async (options) => {
6756
+ return addCompileLogOptions(new Command2("compile-all")).description("Import all agent CLI session histories using default source paths.").option(
6757
+ "--overwrite",
6758
+ "force a full rebuild of derived indexes after import (Tantivy from scratch; FTS5 and Parquet are always full)",
6759
+ false
6760
+ ).action(async (options) => {
5826
6761
  await runCompiles({
5827
6762
  providers: COMPILE_PROVIDERS,
5828
6763
  storePath: defaultBundlePath(),
5829
- deferIndex: options.deferIndex ?? false,
6764
+ overwrite: options.overwrite,
5830
6765
  logOptions: options
5831
6766
  });
5832
6767
  });
5833
6768
  }
5834
6769
  function providerCompileCommand(provider) {
5835
- return addCompileLogOptions(new Command(provider.name)).description(provider.description).option(
6770
+ return addCompileLogOptions(new Command2(provider.name)).description(provider.description).option(
5836
6771
  "--sessions-path <path>",
5837
6772
  `${provider.pathHelp} (default: ${provider.defaultSessionsPath()})`,
5838
6773
  provider.defaultSessionsPath()
5839
- ).option("--store <path>", "bundle directory", defaultBundlePath()).option("--defer-index", "skip immediate FTS5 updates; run `prosa index fts5` later").action(
6774
+ ).option("--store <path>", "bundle directory", defaultBundlePath()).option(
6775
+ "--overwrite",
6776
+ "force a full rebuild of derived indexes after import (Tantivy from scratch; FTS5 and Parquet are always full)",
6777
+ false
6778
+ ).action(
5840
6779
  async (options, command) => {
5841
6780
  await runCompiles({
5842
6781
  providers: [provider],
5843
6782
  storePath: options.store,
5844
- deferIndex: options.deferIndex ?? false,
5845
6783
  sessionsPath: options.sessionsPath,
6784
+ overwrite: options.overwrite,
5846
6785
  logOptions: command.optsWithGlobals()
5847
6786
  });
5848
6787
  }
@@ -5861,8 +6800,8 @@ async function runCompiles(options) {
5861
6800
  const result = await runCompileImports({
5862
6801
  bundle,
5863
6802
  providers: options.providers,
5864
- deferIndex: options.deferIndex,
5865
6803
  sessionsPath: options.sessionsPath,
6804
+ overwrite: options.overwrite,
5866
6805
  logger,
5867
6806
  onProviderComplete: printCounts,
5868
6807
  onTantivyComplete: (status) => {
@@ -5875,7 +6814,8 @@ async function runCompiles(options) {
5875
6814
  closeBundle(bundle);
5876
6815
  logger.info({ store_path: storePath }, "bundle closed");
5877
6816
  }
5878
- if (importedAny) {
6817
+ const shouldExportParquet = importedAny || options.overwrite === true;
6818
+ if (shouldExportParquet) {
5879
6819
  try {
5880
6820
  const result = await exportCompileParquet({ storePath, logger });
5881
6821
  process.stdout.write(`parquet: wrote ${result.tableCount} tables to ${result.outDir}
@@ -5899,11 +6839,10 @@ function printCounts(summary) {
5899
6839
 
5900
6840
  // src/cli/commands/export.ts
5901
6841
  import { writeFile as writeFile6 } from "fs/promises";
5902
- import path16 from "path";
5903
- import { Command as Command2 } from "commander";
6842
+ import path17 from "path";
6843
+ import { Command as Command3 } from "commander";
5904
6844
 
5905
6845
  // src/services/export/markdown.ts
5906
- init_cas();
5907
6846
  async function exportSessionMarkdown(bundle, sessionId2) {
5908
6847
  const session = bundle.db.prepare(
5909
6848
  `SELECT session_id, source_tool, source_session_id, title, start_ts, end_ts,
@@ -6013,139 +6952,60 @@ function renderToolCall(c) {
6013
6952
  return lines.join("\n");
6014
6953
  }
6015
6954
 
6016
- // src/cli/bundle.ts
6017
- import path15 from "path";
6018
- async function withBundle(storePath, fn) {
6019
- const bundle = await openBundle(path15.resolve(storePath));
6020
- try {
6021
- return await fn(bundle);
6022
- } finally {
6023
- closeBundle(bundle);
6024
- }
6025
- }
6026
-
6027
6955
  // src/cli/commands/export.ts
6028
6956
  function exportCommand() {
6029
- const session = new Command2("session").description("Export a single session to a human-readable format.").argument("<session-id>", "prosa session_id").requiredOption("--format <fmt>", 'currently only "markdown" is supported').option("--out <path>", "write to file instead of stdout").option("--store <path>", "bundle directory", defaultBundlePath()).action(async (sessionId2, options) => {
6957
+ const session = new Command3("session").description("Export a single session to a human-readable format.").argument("<session-id>", "prosa session_id").requiredOption("--format <fmt>", 'currently only "markdown" is supported').option("--out <path>", "write to file instead of stdout").option("--store <path>", "bundle directory", defaultBundlePath()).action(async (sessionId2, options) => {
6030
6958
  if (options.format !== "markdown") {
6031
6959
  throw new Error(`unsupported format: ${options.format} (try --format markdown)`);
6032
6960
  }
6033
6961
  await withBundle(options.store, async (bundle) => {
6034
6962
  const markdown = await exportSessionMarkdown(bundle, sessionId2);
6035
6963
  if (options.out) {
6036
- await writeFile6(path16.resolve(options.out), markdown, "utf8");
6037
- process.stdout.write(`wrote ${path16.resolve(options.out)}
6964
+ await writeFile6(path17.resolve(options.out), markdown, "utf8");
6965
+ process.stdout.write(`wrote ${path17.resolve(options.out)}
6038
6966
  `);
6039
6967
  } else {
6040
6968
  process.stdout.write(markdown);
6041
6969
  }
6042
6970
  });
6043
6971
  });
6044
- const parquet = new Command2("parquet").description("Export canonical tables to derived Parquet files for analytics.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--out <path>", "output directory (default: <store>/parquet)").action(async (options) => {
6972
+ const parquet = new Command3("parquet").description("Export canonical tables to derived Parquet files for analytics.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--out <path>", "output directory (default: <store>/parquet)").action(async (options) => {
6045
6973
  const result = await exportBundleParquet({
6046
- bundlePath: path16.resolve(options.store),
6047
- outDir: options.out ? path16.resolve(options.out) : void 0
6974
+ bundlePath: path17.resolve(options.store),
6975
+ outDir: options.out ? path17.resolve(options.out) : void 0
6048
6976
  });
6049
6977
  process.stdout.write(`wrote parquet export to ${result.outDir}
6050
6978
  `);
6051
6979
  process.stdout.write(`manifest=${result.manifestPath}
6052
6980
  `);
6053
6981
  });
6054
- return new Command2("export").description("Export sessions / search excerpts to readable formats.").addCommand(session).addCommand(parquet);
6982
+ return new Command3("export").description("Export sessions / search excerpts to readable formats.").addCommand(session).addCommand(parquet);
6055
6983
  }
6056
6984
 
6057
6985
  // src/cli/commands/index.ts
6058
- import { Command as Command3 } from "commander";
6986
+ import { Command as Command4 } from "commander";
6059
6987
  init_indexing();
6060
-
6061
- // src/cli/output.ts
6062
- var OUTPUT_FORMATS = ["interactive", "table", "json", "csv"];
6063
- var COL_SEPARATOR = " ";
6064
- var RULE_CHAR = "-";
6065
- function parseOutputFormat(value, fallback) {
6066
- if (value === void 0) return fallback;
6067
- if (OUTPUT_FORMATS.includes(value)) return value;
6068
- throw new Error(
6069
- `invalid --output-format: ${value} (expected one of ${OUTPUT_FORMATS.join(", ")})`
6070
- );
6071
- }
6072
- function printRows(rows, opts) {
6073
- switch (opts.format) {
6074
- case "json":
6075
- printJson(rows, opts);
6076
- return;
6077
- case "csv":
6078
- printCsv(rows, opts);
6079
- return;
6080
- case "table":
6081
- case "interactive":
6082
- printTable(rows, opts);
6083
- return;
6084
- }
6085
- }
6086
- function printJson(rows, opts) {
6087
- const out = opts.meta ? { ...opts.meta, rows } : rows;
6088
- process.stdout.write(`${JSON.stringify(out, null, 2)}
6089
- `);
6090
- }
6091
- function printCsv(rows, opts) {
6092
- const columns = opts.columns;
6093
- process.stdout.write(`${columns.map(csvField).join(",")}
6094
- `);
6095
- for (const row of rows) {
6096
- const record = row;
6097
- const line = columns.map((column) => csvField(formatCell(record[column]))).join(",");
6098
- process.stdout.write(`${line}
6099
- `);
6100
- }
6101
- }
6102
- function csvField(value) {
6103
- if (/[",\n]/.test(value)) return `"${value.replace(/"/g, '""')}"`;
6104
- return value;
6105
- }
6106
- function printTable(rows, opts) {
6107
- const columns = opts.columns;
6108
- const widths = columns.map((column) => column.length);
6109
- const cells = rows.map((row) => {
6110
- const record = row;
6111
- return columns.map((column, index) => {
6112
- const text = formatCell(record[column]);
6113
- const width = widths[index] ?? 0;
6114
- if (text.length > width) widths[index] = text.length;
6115
- return text;
6116
- });
6117
- });
6118
- const header = columns.map((column, index) => column.padEnd(widths[index] ?? 0)).join(COL_SEPARATOR);
6119
- const rule = columns.map((_, index) => RULE_CHAR.repeat(widths[index] ?? 0)).join(COL_SEPARATOR);
6120
- process.stdout.write(`${header}
6121
- ${rule}
6122
- `);
6123
- for (const cellRow of cells) {
6124
- const line = cellRow.map((cell, index) => cell.padEnd(widths[index] ?? 0)).join(COL_SEPARATOR);
6125
- process.stdout.write(`${line}
6126
- `);
6127
- }
6128
- }
6129
- function formatCell(value) {
6130
- if (value == null) return "";
6131
- if (typeof value === "string") return value;
6132
- if (typeof value === "number" || typeof value === "boolean") return String(value);
6133
- return JSON.stringify(value);
6134
- }
6135
-
6136
- // src/cli/commands/index.ts
6137
6988
  function indexCommand() {
6138
- const fts5 = new Command3("fts5").description("Rebuild the SQLite FTS5 index from search_docs.").option("--store <path>", "bundle directory", defaultBundlePath()).action(async (options) => {
6989
+ const fts5 = new Command4("fts5").description("Rebuild the SQLite FTS5 index from search_docs.").option("--store <path>", "bundle directory", defaultBundlePath()).option(
6990
+ "--overwrite",
6991
+ "rebuild from scratch (FTS5 always overwrites; flag accepted for parity with other index commands)",
6992
+ false
6993
+ ).action(async (options) => {
6139
6994
  await withBundle(options.store, (bundle) => {
6995
+ void options.overwrite;
6140
6996
  printIndexStatus(rebuildFts5Index(bundle));
6141
6997
  });
6142
6998
  });
6143
- const tantivy = new Command3("tantivy").description("Rebuild the Tantivy sidecar index from search_docs.").option("--store <path>", "bundle directory", defaultBundlePath()).action(async (options) => {
6999
+ const tantivy = new Command4("tantivy").description("Rebuild the Tantivy sidecar index from search_docs.").option("--store <path>", "bundle directory", defaultBundlePath()).option(
7000
+ "--overwrite",
7001
+ "force a full re-index instead of the default incremental rebuild",
7002
+ false
7003
+ ).action(async (options) => {
6144
7004
  await withBundle(options.store, async (bundle) => {
6145
- printIndexStatus(await rebuildTantivyIndex(bundle));
7005
+ printIndexStatus(await rebuildTantivyIndex(bundle, { overwrite: options.overwrite }));
6146
7006
  });
6147
7007
  });
6148
- const status = new Command3("status").description("Show derived search index status.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--output-format <fmt>", "interactive|table|json|csv", "table").action(async (options) => {
7008
+ const status = new Command4("status").description("Show derived search index status.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--output-format <fmt>", "interactive|table|json|csv", "table").action(async (options) => {
6149
7009
  const format = parseOutputFormat(options.outputFormat, "table");
6150
7010
  await withBundle(options.store, (bundle) => {
6151
7011
  const rows = getSearchIndexStatuses(bundle);
@@ -6162,7 +7022,7 @@ function indexCommand() {
6162
7022
  });
6163
7023
  });
6164
7024
  });
6165
- return new Command3("index").description("Build or inspect derived search indexes.").addCommand(fts5).addCommand(tantivy).addCommand(status);
7025
+ return new Command4("index").description("Build or inspect derived search indexes.").addCommand(fts5).addCommand(tantivy).addCommand(status);
6166
7026
  }
6167
7027
  function printIndexStatus(status) {
6168
7028
  process.stdout.write(
@@ -6174,11 +7034,11 @@ function printIndexStatus(status) {
6174
7034
 
6175
7035
  // src/cli/commands/init.ts
6176
7036
  import { stat as stat3 } from "fs/promises";
6177
- import path17 from "path";
6178
- import { Command as Command4 } from "commander";
7037
+ import path18 from "path";
7038
+ import { Command as Command5 } from "commander";
6179
7039
  function initCommand() {
6180
- return new Command4("init").description("Initialize a new prosa bundle (SQLite + manifest + objects/).").option("--store <path>", "bundle directory", defaultBundlePath()).option("--force-existing", "open instead of failing if a manifest exists", false).action(async (options) => {
6181
- const resolved = path17.resolve(options.store);
7040
+ return new Command5("init").description("Initialize a new prosa bundle (SQLite + manifest + objects/).").option("--store <path>", "bundle directory", defaultBundlePath()).option("--force-existing", "open instead of failing if a manifest exists", false).action(async (options) => {
7041
+ const resolved = path18.resolve(options.store);
6182
7042
  const exists2 = await stat3(`${resolved}/manifest.json`).then(() => true).catch(() => false);
6183
7043
  if (exists2) {
6184
7044
  if (!options.forceExisting) {
@@ -6189,369 +7049,416 @@ use --force-existing to skip without erroring
6189
7049
  );
6190
7050
  process.exit(2);
6191
7051
  }
6192
- const bundle2 = await openBundle(resolved);
6193
- closeBundle(bundle2);
6194
- process.stdout.write(`bundle already exists at ${resolved}
6195
- `);
6196
- return;
6197
- }
6198
- const bundle = await initBundle(resolved);
6199
- closeBundle(bundle);
6200
- process.stdout.write(`initialized prosa bundle at ${resolved}
6201
- `);
6202
- });
6203
- }
6204
-
6205
- // src/cli/commands/mcp.ts
6206
- import path18 from "path";
6207
- import { Command as Command5 } from "commander";
6208
-
6209
- // src/mcp/server.ts
6210
- init_errors();
6211
- import { randomUUID } from "crypto";
6212
- import http from "http";
6213
- import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
6214
- import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
6215
- import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
6216
-
6217
- // src/mcp/guidance.ts
6218
- var PROSA_MCP_INSTRUCTIONS = `
6219
- prosa is a local memory over local agent session histories. Use it to import recent sessions,
6220
- find prior work, commands, decisions, file touches, and full transcripts before answering from
6221
- memory.
6222
-
6223
- Recommended workflow:
6224
- - Use compile to refresh the bundle when recent local sessions may not be indexed yet. With no
6225
- input it imports all supported providers from default paths.
6226
- - For open-ended questions, start with search_sessions using 2-5 concrete terms.
6227
- - For questions about a file or path, start with find_touched_files, then inspect the returned sessions.
6228
- - After search results, call get_session for the most relevant session_ids before drawing conclusions.
6229
- - Use export_session_markdown only after selecting a likely session; it can return a large transcript.
6230
- - Use list_tool_calls for command history, failed tools, patches, and operational audit trails.
6231
- - Use get_artifact only when a returned artifact_id is needed for full output or diff content.
6232
- - Use index_status if search results look stale or unexpectedly empty.
6233
-
6234
- When answering, cite concrete evidence: session_id, timestamp, tool/file path, and the relevant snippet
6235
- or event. Do not treat search snippets as the whole truth; open the session when accuracy matters.
6236
- `.trim();
6237
- var INVESTIGATE_PRIOR_WORK_PROMPT = `
6238
- Investigate prior work in prosa for the topic: {{topic}}
6239
-
6240
- Use this workflow:
6241
- 1. Call search_sessions with a short query built from the topic.
6242
- 2. If results are broad, search again with narrower terms from the best snippets.
6243
- 3. Open the most relevant session_ids with get_session.
6244
- 4. Use export_session_markdown only for sessions that appear directly relevant.
6245
- 5. Answer with evidence: session_id, timestamp, and the decisive snippet or event.
6246
- `.trim();
6247
- var FIND_FILE_HISTORY_PROMPT = `
6248
- Investigate history for file/path: {{path}}
6249
-
6250
- Use this workflow:
6251
- 1. Call find_touched_files with the path or the most distinctive path suffix.
6252
- 2. Open returned session_ids with get_session.
6253
- 3. Use list_tool_calls with session_id when you need command-level detail.
6254
- 4. Use export_session_markdown only for the most relevant session.
6255
- 5. Summarize what changed, who/what tool touched it, and cite session_id plus timestamp.
6256
- `.trim();
6257
- var AUDIT_TOOL_FAILURES_PROMPT = `
6258
- Audit tool failures in prosa{{query_clause}}.
6259
-
6260
- Use this workflow:
6261
- 1. Call list_tool_calls with errors_only=true.
6262
- 2. If a query is provided, also call search_sessions for that query to find related context.
6263
- 3. Open relevant session_ids with get_session.
6264
- 4. Group failures by tool_name, command/path, and likely cause.
6265
- 5. Answer with evidence: session_id, timestamp, command/path, exit code, and preview.
6266
- `.trim();
6267
-
6268
- // src/mcp/tools.ts
6269
- import { z } from "zod";
6270
-
6271
- // src/core/domain/types.ts
6272
- var SOURCE_TOOLS = ["cursor", "codex", "claude", "gemini"];
6273
-
6274
- // src/mcp/tools.ts
6275
- init_errors();
6276
- init_limits();
6277
- init_indexing();
6278
- init_search();
6279
- init_sessions();
6280
- function registerProsaTools(server, bundle, options = {}) {
6281
- const searchEngine = options.searchEngine ?? "fts5";
6282
- const storePath = options.storePath ?? bundle.path;
6283
- registerProsaPrompts(server);
6284
- server.registerTool(
6285
- "compile",
6286
- {
6287
- title: "Compile sessions",
6288
- description: "Import local agent session histories into the active prosa bundle. With no input, compiles all providers from default paths. With source, compiles that provider; sessions_path may override that provider path.",
6289
- inputSchema: {
6290
- source: z.enum(SOURCE_TOOLS).optional(),
6291
- sessions_path: z.string().min(1).optional()
6292
- },
6293
- annotations: { readOnlyHint: false, destructiveHint: false, idempotentHint: true }
6294
- },
6295
- async ({ source, sessions_path }) => {
6296
- if (sessions_path && !source) {
6297
- return {
6298
- content: [
6299
- {
6300
- type: "text",
6301
- text: "sessions_path requires source because providers use incompatible source layouts"
6302
- }
6303
- ],
6304
- isError: true
6305
- };
6306
- }
6307
- try {
6308
- const result = await runCompileImports({
6309
- bundle,
6310
- providers: source ? [getCompileProvider(source)] : COMPILE_PROVIDERS,
6311
- deferIndex: false,
6312
- sessionsPath: sessions_path
6313
- });
6314
- const parquet = result.importedAny ? await exportCompileParquet({ storePath }) : null;
6315
- return {
6316
- content: [
6317
- {
6318
- type: "text",
6319
- text: JSON.stringify(
6320
- {
6321
- providers: result.providers.map((provider) => ({
6322
- source: provider.source,
6323
- source_path: provider.sourcePath,
6324
- batch_id: provider.batchId,
6325
- counts: provider.counts
6326
- })),
6327
- imported_any: result.importedAny,
6328
- tantivy: result.tantivy ? { indexed_doc_count: result.tantivy.indexedDocCount } : null,
6329
- tantivy_error: result.tantivyError,
6330
- parquet: parquet ? {
6331
- out_dir: parquet.outDir,
6332
- manifest_path: parquet.manifestPath,
6333
- table_count: parquet.tableCount,
6334
- files: parquet.files,
6335
- counts: parquet.counts
6336
- } : null
6337
- },
6338
- null,
6339
- 2
6340
- )
6341
- }
6342
- ]
6343
- };
6344
- } catch (error) {
6345
- return {
6346
- content: [{ type: "text", text: getErrorMessage(error) }],
6347
- isError: true
6348
- };
6349
- }
6350
- }
6351
- );
6352
- server.registerTool(
6353
- "list_sessions",
6354
- {
6355
- title: "List sessions",
6356
- description: "List recent sessions when you need candidates by source/date before deeper inspection. Next step: call get_session for relevant session_id values.",
6357
- inputSchema: {
6358
- source: z.enum(SOURCE_TOOLS).optional(),
6359
- since: z.string().optional().describe("ISO timestamp lower bound (inclusive)"),
6360
- until: z.string().optional().describe("ISO timestamp upper bound (exclusive)"),
6361
- limit: z.number().int().min(1).max(500).optional().default(50)
6362
- },
6363
- annotations: { readOnlyHint: true, idempotentHint: true }
6364
- },
6365
- async (input) => {
6366
- const rows = listSessions(bundle, {
6367
- sourceTool: input.source,
6368
- sinceIso: input.since,
6369
- untilIso: input.until,
6370
- limit: input.limit ?? 50
6371
- });
6372
- return {
6373
- content: [{ type: "text", text: JSON.stringify(rows, null, 2) }]
6374
- };
6375
- }
6376
- );
6377
- server.registerTool(
6378
- "get_session",
6379
- {
6380
- title: "Get session detail",
6381
- description: "Open one session and return metadata plus timeline events. Use this after search_sessions, list_sessions, find_touched_files, or list_tool_calls before making evidence-backed claims.",
6382
- inputSchema: {
6383
- session_id: z.string().min(1)
6384
- },
6385
- annotations: { readOnlyHint: true, idempotentHint: true }
6386
- },
6387
- async ({ session_id }) => {
6388
- const detail = getSession(bundle, session_id);
6389
- if (!detail) {
6390
- return {
6391
- content: [{ type: "text", text: `session not found: ${session_id}` }],
6392
- isError: true
6393
- };
6394
- }
6395
- return {
6396
- content: [{ type: "text", text: JSON.stringify(detail, null, 2) }]
6397
- };
7052
+ const bundle2 = await openBundle(resolved);
7053
+ closeBundle(bundle2);
7054
+ process.stdout.write(`bundle already exists at ${resolved}
7055
+ `);
7056
+ return;
6398
7057
  }
6399
- );
7058
+ const bundle = await initBundle(resolved);
7059
+ closeBundle(bundle);
7060
+ process.stdout.write(`initialized prosa bundle at ${resolved}
7061
+ `);
7062
+ });
7063
+ }
7064
+
7065
+ // src/cli/commands/mcp.ts
7066
+ import path19 from "path";
7067
+ import { Command as Command6 } from "commander";
7068
+
7069
+ // src/mcp/server.ts
7070
+ init_errors();
7071
+ import { randomUUID } from "crypto";
7072
+ import http from "http";
7073
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
7074
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
7075
+ import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
7076
+
7077
+ // src/mcp/guidance.ts
7078
+ var PROSA_MCP_INSTRUCTIONS = `
7079
+ prosa is a local memory over local agent session histories. Use it to find prior work, commands,
7080
+ decisions, file touches, transcripts, and analytical rollups before answering from memory.
7081
+
7082
+ There are six tools:
7083
+ - search: full-text over messages, commands, paths, diffs, and previews. Start here for open-ended
7084
+ questions with 2-5 concrete terms. Optional engine, field_kind, raw, since/until filters.
7085
+ - sessions: without session_id, lists candidates filtered by source/time/limit. With session_id,
7086
+ opens the session: format=detail (default) returns metadata + timeline, format=summary returns
7087
+ only the session row, format=markdown renders the full transcript.
7088
+ - tool_calls: audit commands and tool usage. Filters by tool_name, canonical_type, session_id,
7089
+ errors_only. When path_substring is set, also returns artifacts touching that path \u2014 use this for
7090
+ file-history questions.
7091
+ - analytics: built-in aggregate reports backed by SQLite views. Pick report=sessions|tools|errors|
7092
+ models|projects with the matching filters. Use report=sessions with session_id or
7093
+ source_path_substring for per-session metrics.
7094
+ - artifact: fetch full text for an artifact_id when previews are not enough. Binary artifacts return
7095
+ a placeholder.
7096
+ - compile: with no input, returns a status snapshot (search index health). With source (and
7097
+ optionally sessions_path), imports that provider into the bundle. Use status mode when search
7098
+ results look stale; use import mode when local sessions may not be indexed yet.
7099
+
7100
+ When answering, cite concrete evidence: session_id, timestamp, tool/file path, and the relevant
7101
+ snippet or event. Do not treat search snippets as the whole truth; open the session with
7102
+ \`sessions session_id=\u2026 format=detail\` when accuracy matters.
7103
+ `.trim();
7104
+ var INVESTIGATE_PRIOR_WORK_PROMPT = `
7105
+ Investigate prior work in prosa for the topic: {{topic}}
7106
+
7107
+ Use this workflow:
7108
+ 1. Call \`search\` with a short query built from the topic.
7109
+ 2. If results are broad, search again with narrower terms from the best snippets.
7110
+ 3. Open the most relevant session_ids with \`sessions session_id=\u2026 format=detail\`.
7111
+ 4. Use \`sessions session_id=\u2026 format=markdown\` only for sessions that appear directly relevant.
7112
+ 5. Answer with evidence: session_id, timestamp, and the decisive snippet or event.
7113
+ `.trim();
7114
+ var FIND_FILE_HISTORY_PROMPT = `
7115
+ Investigate history for file/path: {{path}}
7116
+
7117
+ Use this workflow:
7118
+ 1. Call \`tool_calls\` with path_substring set to the path or its most distinctive suffix.
7119
+ 2. Open returned session_ids with \`sessions session_id=\u2026 format=detail\`.
7120
+ 3. Call \`tool_calls\` with session_id when you need command-level detail inside one session.
7121
+ 4. Use \`sessions session_id=\u2026 format=markdown\` only for the most relevant session.
7122
+ 5. Summarize what changed, who/what tool touched it, and cite session_id plus timestamp.
7123
+ `.trim();
7124
+ var AUDIT_TOOL_FAILURES_PROMPT = `
7125
+ Audit tool failures in prosa{{query_clause}}.
7126
+
7127
+ Use this workflow:
7128
+ 1. For an aggregate report, call \`analytics report=errors\` (filter by source/since/until/tool_name
7129
+ as needed).
7130
+ 2. For per-call evidence, call \`tool_calls\` with errors_only=true.
7131
+ 3. If a query is provided, also call \`search\` for that query to find related context.
7132
+ 4. Open relevant session_ids with \`sessions session_id=\u2026 format=detail\`.
7133
+ 5. Group failures by tool_name, command/path, and likely cause.
7134
+ 6. Answer with evidence: session_id, timestamp, command/path, exit code, and preview.
7135
+ `.trim();
7136
+
7137
+ // src/mcp/tools.ts
7138
+ import { z } from "zod";
7139
+ init_errors();
7140
+ init_indexing();
7141
+ init_search();
7142
+ init_sessions();
7143
+
7144
+ // src/services/tool_calls.ts
7145
+ init_limits();
7146
+ function listToolCalls(bundle, filters = {}) {
7147
+ const conds = [];
7148
+ const params = [];
7149
+ if (filters.toolName) {
7150
+ conds.push("tc.tool_name = ?");
7151
+ params.push(filters.toolName);
7152
+ }
7153
+ if (filters.canonicalType) {
7154
+ conds.push("tc.canonical_tool_type = ?");
7155
+ params.push(filters.canonicalType);
7156
+ }
7157
+ if (filters.sessionId) {
7158
+ conds.push("tc.session_id = ?");
7159
+ params.push(filters.sessionId);
7160
+ }
7161
+ if (filters.errorsOnly) {
7162
+ conds.push("(tr.is_error = 1 OR tc.status = ?)");
7163
+ params.push("error");
7164
+ }
7165
+ if (filters.pathSubstring) {
7166
+ conds.push("tc.path IS NOT NULL AND tc.path LIKE ?");
7167
+ params.push(`%${filters.pathSubstring}%`);
7168
+ }
7169
+ if (filters.sinceIso) {
7170
+ conds.push("(tc.timestamp_start IS NULL OR tc.timestamp_start >= ?)");
7171
+ params.push(filters.sinceIso);
7172
+ }
7173
+ if (filters.untilIso) {
7174
+ conds.push("(tc.timestamp_start IS NULL OR tc.timestamp_start < ?)");
7175
+ params.push(filters.untilIso);
7176
+ }
7177
+ const where = conds.length ? `WHERE ${conds.join(" AND ")}` : "";
7178
+ const limit2 = clampLimit(filters.limit, { max: 500, fallback: 100 });
7179
+ const toolCallSql = `
7180
+ SELECT 'tool_call' AS entity_type,
7181
+ tc.session_id,
7182
+ tc.tool_call_id,
7183
+ NULL AS artifact_id,
7184
+ tc.tool_name,
7185
+ tc.canonical_tool_type,
7186
+ tc.command,
7187
+ tc.path,
7188
+ tc.status,
7189
+ tc.timestamp_start,
7190
+ tr.is_error,
7191
+ tr.exit_code,
7192
+ tr.preview
7193
+ FROM tool_calls tc
7194
+ LEFT JOIN tool_results tr ON tr.tool_call_id = tc.tool_call_id
7195
+ ${where}
7196
+ `;
7197
+ if (!filters.pathSubstring) {
7198
+ const sql2 = `${toolCallSql} ORDER BY tc.timestamp_start DESC LIMIT ${limit2}`;
7199
+ return bundle.db.prepare(sql2).all(...params);
7200
+ }
7201
+ const artifactSql = `
7202
+ SELECT 'artifact' AS entity_type,
7203
+ a.session_id,
7204
+ NULL AS tool_call_id,
7205
+ a.artifact_id,
7206
+ NULL AS tool_name,
7207
+ NULL AS canonical_tool_type,
7208
+ NULL AS command,
7209
+ a.path,
7210
+ NULL AS status,
7211
+ a.created_ts AS timestamp_start,
7212
+ NULL AS is_error,
7213
+ NULL AS exit_code,
7214
+ NULL AS preview
7215
+ FROM artifacts a
7216
+ WHERE a.path IS NOT NULL AND a.path LIKE ?
7217
+ `;
7218
+ const sql = `
7219
+ ${toolCallSql}
7220
+ UNION ALL
7221
+ ${artifactSql}
7222
+ ORDER BY timestamp_start DESC
7223
+ LIMIT ${limit2}
7224
+ `;
7225
+ return bundle.db.prepare(sql).all(...params, `%${filters.pathSubstring}%`);
7226
+ }
7227
+
7228
+ // src/mcp/tools.ts
7229
+ var CANONICAL_TOOL_TYPES = [
7230
+ "shell",
7231
+ "read_file",
7232
+ "write_file",
7233
+ "edit_file",
7234
+ "search_file",
7235
+ "web_search",
7236
+ "mcp",
7237
+ "subagent",
7238
+ "patch",
7239
+ "other"
7240
+ ];
7241
+ var FIELD_KINDS = [
7242
+ "message_text",
7243
+ "user_prompt",
7244
+ "assistant_text",
7245
+ "command",
7246
+ "command_output_preview",
7247
+ "error",
7248
+ "file_path",
7249
+ "diff",
7250
+ "summary",
7251
+ "artifact_text",
7252
+ "tool_args",
7253
+ "tool_result"
7254
+ ];
7255
+ function registerProsaTools(server, bundle, options = {}) {
7256
+ const searchEngine = options.searchEngine ?? "fts5";
7257
+ const storePath = options.storePath ?? bundle.path;
7258
+ const ensureStore = options.ensureStore ?? false;
7259
+ registerProsaPrompts(server);
6400
7260
  server.registerTool(
6401
- "search_sessions",
7261
+ "search",
6402
7262
  {
6403
7263
  title: "Full-text search",
6404
- description: `Search messages, commands, paths, and result previews using the server-selected ${searchEngine} engine. Start here for open-ended questions with 2-5 concrete terms, then call get_session for relevant hits.`,
7264
+ description: `Search messages, commands, paths, diffs, and result previews using the server-selected ${searchEngine} engine. Start here for open-ended questions with 2-5 concrete terms; then call \`sessions\` for relevant hits.`,
6405
7265
  inputSchema: {
6406
7266
  query: z.string().min(1),
7267
+ engine: z.enum(["fts5", "tantivy"]).optional(),
7268
+ field_kind: z.enum(FIELD_KINDS).optional(),
6407
7269
  limit: z.number().int().min(1).max(500).optional().default(50),
6408
- raw: z.boolean().optional().default(false)
7270
+ raw: z.boolean().optional().default(false).describe("Pass query straight to FTS5 MATCH (allows OR/NEAR/prefixes).")
6409
7271
  },
6410
7272
  annotations: { readOnlyHint: true, idempotentHint: true }
6411
7273
  },
6412
- async ({ query, limit, raw }) => {
6413
- const hits = searchFullText(bundle, { query, limit: limit ?? 50, raw, engine: searchEngine });
7274
+ async ({ query, engine, field_kind, limit: limit2, raw }) => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
7275
+ const selectedEngine = engine ?? searchEngine;
7276
+ const hits = searchFullText(activeBundle, {
7277
+ query,
7278
+ limit: limit2 ?? 50,
7279
+ raw,
7280
+ engine: selectedEngine
7281
+ });
7282
+ const filtered = field_kind ? hits.filter((hit) => hit.field_kind === field_kind) : hits;
6414
7283
  return {
6415
7284
  content: [
6416
7285
  {
6417
7286
  type: "text",
6418
7287
  text: JSON.stringify(
6419
- { query, engine: searchEngine, count: hits.length, hits },
7288
+ {
7289
+ query,
7290
+ engine: selectedEngine,
7291
+ field_kind: field_kind ?? null,
7292
+ count: filtered.length,
7293
+ hits: filtered
7294
+ },
6420
7295
  null,
6421
7296
  2
6422
7297
  )
6423
7298
  }
6424
7299
  ]
6425
7300
  };
6426
- }
7301
+ })
6427
7302
  );
6428
7303
  server.registerTool(
6429
- "export_session_markdown",
7304
+ "sessions",
6430
7305
  {
6431
- title: "Export session as Markdown",
6432
- description: "Render a selected session into a readable transcript. Use only after get_session confirms relevance; this can return much more context than snippets.",
7306
+ title: "List or open sessions",
7307
+ description: "Without `session_id`, lists sessions filtered by source/time/limit. With `session_id`, opens that session: `format=detail` (default) returns metadata plus timeline events; `format=summary` returns only the session row; `format=markdown` renders the readable transcript. Call after `search` to get evidence behind a hit.",
6433
7308
  inputSchema: {
6434
- session_id: z.string().min(1)
7309
+ session_id: z.string().min(1).optional(),
7310
+ format: z.enum(["summary", "detail", "markdown"]).optional().default("detail"),
7311
+ source: z.enum(SOURCE_TOOLS).optional(),
7312
+ since: z.string().optional().describe("ISO timestamp lower bound (inclusive)"),
7313
+ until: z.string().optional().describe("ISO timestamp upper bound (exclusive)"),
7314
+ limit: z.number().int().min(1).max(500).optional().default(50)
6435
7315
  },
6436
7316
  annotations: { readOnlyHint: true, idempotentHint: true }
6437
7317
  },
6438
- async ({ session_id }) => {
6439
- try {
6440
- const md = await exportSessionMarkdown(bundle, session_id);
6441
- return { content: [{ type: "text", text: md }] };
6442
- } catch (error) {
7318
+ async ({ session_id, format, source, since, until, limit: limit2 }) => withToolBundle(bundle, storePath, ensureStore, async (activeBundle) => {
7319
+ if (!session_id) {
7320
+ const rows = listSessions(activeBundle, {
7321
+ sourceTool: source,
7322
+ sinceIso: since,
7323
+ untilIso: until,
7324
+ limit: limit2 ?? 50
7325
+ });
6443
7326
  return {
6444
- content: [{ type: "text", text: getErrorMessage(error) }],
7327
+ content: [{ type: "text", text: JSON.stringify(rows, null, 2) }]
7328
+ };
7329
+ }
7330
+ if (format === "markdown") {
7331
+ try {
7332
+ const md = await exportSessionMarkdown(activeBundle, session_id);
7333
+ return { content: [{ type: "text", text: md }] };
7334
+ } catch (error) {
7335
+ return {
7336
+ content: [{ type: "text", text: getErrorMessage(error) }],
7337
+ isError: true
7338
+ };
7339
+ }
7340
+ }
7341
+ const detail = getSession(activeBundle, session_id);
7342
+ if (!detail) {
7343
+ return {
7344
+ content: [{ type: "text", text: `session not found: ${session_id}` }],
6445
7345
  isError: true
6446
7346
  };
6447
7347
  }
6448
- }
7348
+ const payload = format === "summary" ? { session: detail.session } : detail;
7349
+ return {
7350
+ content: [{ type: "text", text: JSON.stringify(payload, null, 2) }]
7351
+ };
7352
+ })
6449
7353
  );
6450
7354
  server.registerTool(
6451
- "list_tool_calls",
7355
+ "tool_calls",
6452
7356
  {
6453
- title: "List tool calls",
6454
- description: "Audit commands and tool usage by tool name, canonical type, error status, or session. Use this for failed commands, shell history, patches, and operational evidence; then open relevant sessions with get_session.",
7357
+ title: "Audit tool calls and file touches",
7358
+ description: "Audit commands and tool usage. Filter by tool_name, canonical_type, session_id, errors_only, or path_substring. When `path_substring` is set, also surfaces matching artifacts so file-history questions return both invocations and produced files.",
6455
7359
  inputSchema: {
7360
+ session_id: z.string().min(1).optional(),
6456
7361
  tool_name: z.string().optional(),
6457
- canonical_type: z.enum([
6458
- "shell",
6459
- "read_file",
6460
- "write_file",
6461
- "edit_file",
6462
- "search_file",
6463
- "web_search",
6464
- "mcp",
6465
- "subagent",
6466
- "patch",
6467
- "other"
6468
- ]).optional(),
6469
- session_id: z.string().optional(),
7362
+ canonical_type: z.enum(CANONICAL_TOOL_TYPES).optional(),
7363
+ path_substring: z.string().min(1).optional().describe("Filter rows where tool_calls.path or artifacts.path contains this substring."),
6470
7364
  errors_only: z.boolean().optional().default(false),
7365
+ since: z.string().optional().describe("ISO timestamp lower bound (inclusive)"),
7366
+ until: z.string().optional().describe("ISO timestamp upper bound (exclusive)"),
6471
7367
  limit: z.number().int().min(1).max(500).optional().default(100)
6472
7368
  },
6473
7369
  annotations: { readOnlyHint: true, idempotentHint: true }
6474
7370
  },
6475
- async ({ tool_name, canonical_type, session_id, errors_only, limit }) => {
6476
- const conds = [];
6477
- const params = [];
6478
- if (tool_name) {
6479
- conds.push("tc.tool_name = ?");
6480
- params.push(tool_name);
6481
- }
6482
- if (canonical_type) {
6483
- conds.push("tc.canonical_tool_type = ?");
6484
- params.push(canonical_type);
6485
- }
6486
- if (session_id) {
6487
- conds.push("tc.session_id = ?");
6488
- params.push(session_id);
6489
- }
6490
- if (errors_only) {
6491
- conds.push("(tr.is_error = 1 OR tc.status = ?)");
6492
- params.push("error");
6493
- }
6494
- const where = conds.length ? `WHERE ${conds.join(" AND ")}` : "";
6495
- const sql = `
6496
- SELECT tc.tool_call_id, tc.session_id, tc.tool_name, tc.canonical_tool_type,
6497
- tc.command, tc.path, tc.status, tc.timestamp_start,
6498
- tr.is_error, tr.exit_code, tr.preview
6499
- FROM tool_calls tc
6500
- LEFT JOIN tool_results tr ON tr.tool_call_id = tc.tool_call_id
6501
- ${where}
6502
- ORDER BY tc.timestamp_start DESC
6503
- LIMIT ${clampLimit(limit, { max: 500, fallback: 100 })}
6504
- `;
6505
- const rows = bundle.db.prepare(sql).all(...params);
7371
+ async (input) => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
7372
+ const rows = listToolCalls(activeBundle, {
7373
+ sessionId: input.session_id,
7374
+ toolName: input.tool_name,
7375
+ canonicalType: input.canonical_type,
7376
+ pathSubstring: input.path_substring,
7377
+ errorsOnly: input.errors_only,
7378
+ sinceIso: input.since,
7379
+ untilIso: input.until,
7380
+ limit: input.limit ?? 100
7381
+ });
6506
7382
  return {
6507
7383
  content: [{ type: "text", text: JSON.stringify(rows, null, 2) }]
6508
7384
  };
6509
- }
7385
+ })
6510
7386
  );
6511
7387
  server.registerTool(
6512
- "find_touched_files",
7388
+ "analytics",
6513
7389
  {
6514
- title: "Find sessions that touched a file",
6515
- description: "Find sessions with tool calls or artifacts whose path contains `path_substring`. Start here for file-history questions, then open returned sessions with get_session.",
7390
+ title: "Aggregate analytics reports",
7391
+ description: "Run a built-in aggregation over the bundle: per-session metrics (`sessions`), tool usage rollup (`tools`), error timeline (`errors`), model usage (`models`), or project activity (`projects`). Backed by SQLite views; mirrors the `prosa analytics` CLI.",
6516
7392
  inputSchema: {
6517
- path_substring: z.string().min(1),
6518
- limit: z.number().int().min(1).max(500).optional().default(100)
7393
+ report: z.enum(ANALYTICS_REPORTS),
7394
+ source: z.enum(SOURCE_TOOLS).optional(),
7395
+ since: z.string().optional().describe("ISO timestamp lower bound (inclusive)"),
7396
+ until: z.string().optional().describe("ISO timestamp upper bound (exclusive)"),
7397
+ limit: z.number().int().min(1).max(500).optional().default(50),
7398
+ session_id: z.string().min(1).optional().describe("Drill-down filter (applies to `sessions` report)."),
7399
+ source_path_substring: z.string().min(1).optional().describe("Filter `sessions` rows by imported source file path substring."),
7400
+ project: z.string().min(1).optional().describe("Filter by project id, name, or path substring."),
7401
+ tool_name: z.string().min(1).optional().describe("Filter `tools`/`errors` rows by exact tool name."),
7402
+ canonical_type: z.enum(CANONICAL_TOOL_TYPES).optional().describe("Filter `tools` rows by canonical tool type."),
7403
+ errors_only: z.boolean().optional().describe("`tools` report: only error rows."),
7404
+ category: z.string().min(1).optional().describe("Filter `errors` by category: tool_result|import_error|uncertainty."),
7405
+ model: z.string().min(1).optional().describe("Filter `models` rows by exact model name.")
6519
7406
  },
6520
7407
  annotations: { readOnlyHint: true, idempotentHint: true }
6521
7408
  },
6522
- async ({ path_substring, limit }) => {
6523
- const sql = `
6524
- SELECT tc.session_id, tc.tool_name, tc.canonical_tool_type, tc.path,
6525
- tc.timestamp_start, tc.command
6526
- FROM tool_calls tc
6527
- WHERE tc.path IS NOT NULL AND tc.path LIKE ?
6528
- UNION ALL
6529
- SELECT a.session_id AS session_id, NULL AS tool_name, NULL AS canonical_tool_type,
6530
- a.path, a.created_ts AS timestamp_start, NULL AS command
6531
- FROM artifacts a
6532
- WHERE a.path IS NOT NULL AND a.path LIKE ?
6533
- ORDER BY timestamp_start DESC
6534
- LIMIT ${clampLimit(limit, { max: 500, fallback: 100 })}
6535
- `;
6536
- const like = `%${path_substring}%`;
6537
- const rows = bundle.db.prepare(sql).all(like, like);
6538
- return {
6539
- content: [{ type: "text", text: JSON.stringify(rows, null, 2) }]
7409
+ async (input) => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
7410
+ const filters = {
7411
+ source: input.source,
7412
+ since: input.since,
7413
+ until: input.until,
7414
+ limit: input.limit,
7415
+ sessionId: input.session_id,
7416
+ sourcePathSubstring: input.source_path_substring,
7417
+ project: input.project,
7418
+ toolName: input.tool_name,
7419
+ canonicalType: input.canonical_type,
7420
+ errorsOnly: input.errors_only,
7421
+ category: input.category,
7422
+ model: input.model
6540
7423
  };
6541
- }
7424
+ try {
7425
+ const result = runAnalyticsReportFromBundle({
7426
+ bundle: activeBundle,
7427
+ report: input.report,
7428
+ filters
7429
+ });
7430
+ return {
7431
+ content: [
7432
+ {
7433
+ type: "text",
7434
+ text: JSON.stringify(
7435
+ { report: input.report, count: result.rows.length, rows: result.rows },
7436
+ null,
7437
+ 2
7438
+ )
7439
+ }
7440
+ ]
7441
+ };
7442
+ } catch (error) {
7443
+ return {
7444
+ content: [{ type: "text", text: getErrorMessage(error) }],
7445
+ isError: true
7446
+ };
7447
+ }
7448
+ })
6542
7449
  );
6543
7450
  server.registerTool(
6544
- "get_artifact",
7451
+ "artifact",
6545
7452
  {
6546
7453
  title: "Get artifact bytes/text",
6547
- description: "Retrieve full text for an artifact_id found in a session or export. Use this for detailed diffs or large tool outputs after identifying the artifact; binary artifacts return a placeholder.",
7454
+ description: "Retrieve full text for an `artifact_id` referenced in a session, search hit, or tool_calls row. Use this when previews are not enough; binary artifacts return a placeholder.",
6548
7455
  inputSchema: {
6549
7456
  artifact_id: z.string().min(1)
6550
7457
  },
6551
7458
  annotations: { readOnlyHint: true, idempotentHint: true }
6552
7459
  },
6553
- async ({ artifact_id }) => {
6554
- const row = bundle.db.prepare(`SELECT text_object_id, object_id, mime_type FROM artifacts WHERE artifact_id = ?`).get(artifact_id);
7460
+ async ({ artifact_id }) => withToolBundle(bundle, storePath, ensureStore, async (activeBundle) => {
7461
+ const row = activeBundle.db.prepare(`SELECT text_object_id, object_id, mime_type FROM artifacts WHERE artifact_id = ?`).get(artifact_id);
6555
7462
  if (!row) {
6556
7463
  return {
6557
7464
  content: [{ type: "text", text: `artifact not found: ${artifact_id}` }],
@@ -6563,30 +7470,111 @@ function registerProsaTools(server, bundle, options = {}) {
6563
7470
  return { content: [{ type: "text", text: "[no content stored]" }] };
6564
7471
  }
6565
7472
  try {
6566
- const { getText: getText2 } = await Promise.resolve().then(() => (init_cas(), cas_exports));
6567
- const text = await getText2(bundle, objectId);
7473
+ const text = await getText(activeBundle, objectId);
6568
7474
  return { content: [{ type: "text", text }] };
6569
7475
  } catch {
6570
7476
  return { content: [{ type: "text", text: `[binary artifact: ${objectId}]` }] };
6571
7477
  }
6572
- }
7478
+ })
6573
7479
  );
6574
7480
  server.registerTool(
6575
- "index_status",
7481
+ "compile",
6576
7482
  {
6577
- title: "Search index status",
6578
- description: "Show whether derived search indexes are ready, stale, missing, building, or failed. Use when search results are unexpectedly empty or when choosing between FTS5 and Tantivy.",
6579
- inputSchema: {},
6580
- annotations: { readOnlyHint: true, idempotentHint: true }
7483
+ title: "Compile sessions or report bundle status",
7484
+ description: "Without input, returns a status snapshot (search index health, last batch, schema version) without mutating anything. With `source`, imports that provider; `sessions_path` may override its default. Pass `overwrite: true` to force a full rebuild of derived indexes (Tantivy from scratch). With neither `source` nor `sessions_path`, only status is returned.",
7485
+ inputSchema: {
7486
+ source: z.enum(SOURCE_TOOLS).optional(),
7487
+ sessions_path: z.string().min(1).optional(),
7488
+ overwrite: z.boolean().optional()
7489
+ },
7490
+ annotations: { readOnlyHint: false, destructiveHint: false, idempotentHint: true }
6581
7491
  },
6582
- async () => {
6583
- const rows = getSearchIndexStatuses(bundle);
6584
- return {
6585
- content: [{ type: "text", text: JSON.stringify(rows, null, 2) }]
6586
- };
6587
- }
7492
+ async ({ source, sessions_path, overwrite }) => withToolBundle(bundle, storePath, ensureStore, async (activeBundle) => {
7493
+ if (sessions_path && !source) {
7494
+ return {
7495
+ content: [
7496
+ {
7497
+ type: "text",
7498
+ text: "sessions_path requires source because providers use incompatible source layouts"
7499
+ }
7500
+ ],
7501
+ isError: true
7502
+ };
7503
+ }
7504
+ if (!source && !sessions_path) {
7505
+ return {
7506
+ content: [
7507
+ {
7508
+ type: "text",
7509
+ text: JSON.stringify(
7510
+ { mode: "status", search_index: getSearchIndexStatuses(activeBundle) },
7511
+ null,
7512
+ 2
7513
+ )
7514
+ }
7515
+ ]
7516
+ };
7517
+ }
7518
+ try {
7519
+ const result = await runCompileImports({
7520
+ bundle: activeBundle,
7521
+ providers: source ? [getCompileProvider(source)] : COMPILE_PROVIDERS,
7522
+ sessionsPath: sessions_path,
7523
+ overwrite
7524
+ });
7525
+ const parquet = result.importedAny ? await exportCompileParquet({ storePath }) : null;
7526
+ return {
7527
+ content: [
7528
+ {
7529
+ type: "text",
7530
+ text: JSON.stringify(
7531
+ {
7532
+ mode: "import",
7533
+ providers: result.providers.map((provider) => ({
7534
+ source: provider.source,
7535
+ source_path: provider.sourcePath,
7536
+ batch_id: provider.batchId,
7537
+ counts: provider.counts
7538
+ })),
7539
+ imported_any: result.importedAny,
7540
+ tantivy: result.tantivy ? { indexed_doc_count: result.tantivy.indexedDocCount } : null,
7541
+ tantivy_error: result.tantivyError,
7542
+ fts5_error: result.fts5Error,
7543
+ parquet: parquet ? {
7544
+ out_dir: parquet.outDir,
7545
+ manifest_path: parquet.manifestPath,
7546
+ table_count: parquet.tableCount,
7547
+ files: parquet.files,
7548
+ counts: parquet.counts
7549
+ } : null,
7550
+ search_index: getSearchIndexStatuses(activeBundle)
7551
+ },
7552
+ null,
7553
+ 2
7554
+ )
7555
+ }
7556
+ ]
7557
+ };
7558
+ } catch (error) {
7559
+ return {
7560
+ content: [{ type: "text", text: getErrorMessage(error) }],
7561
+ isError: true
7562
+ };
7563
+ }
7564
+ })
6588
7565
  );
6589
7566
  }
7567
+ async function withToolBundle(fallbackBundle, storePath, ensureStore, fn) {
7568
+ if (!ensureStore) {
7569
+ return await fn(fallbackBundle);
7570
+ }
7571
+ const bundle = await openOrInitBundle(storePath);
7572
+ try {
7573
+ return await fn(bundle);
7574
+ } finally {
7575
+ closeBundle(bundle);
7576
+ }
7577
+ }
6590
7578
  function registerProsaPrompts(server) {
6591
7579
  server.registerPrompt(
6592
7580
  "investigate_prior_work",
@@ -6619,14 +7607,14 @@ function registerProsaPrompts(server) {
6619
7607
  path: z.string().min(1).describe("File path, directory, or distinctive path suffix")
6620
7608
  }
6621
7609
  },
6622
- ({ path: path20 }) => ({
7610
+ ({ path: path21 }) => ({
6623
7611
  description: "Find sessions that touched a path and summarize the evidence.",
6624
7612
  messages: [
6625
7613
  {
6626
7614
  role: "user",
6627
7615
  content: {
6628
7616
  type: "text",
6629
- text: FIND_FILE_HISTORY_PROMPT.replace("{{path}}", path20)
7617
+ text: FIND_FILE_HISTORY_PROMPT.replace("{{path}}", path21)
6630
7618
  }
6631
7619
  }
6632
7620
  ]
@@ -6759,7 +7747,7 @@ function createMcpServer(bundle, searchEngine, storePath) {
6759
7747
  },
6760
7748
  { instructions: PROSA_MCP_INSTRUCTIONS }
6761
7749
  );
6762
- registerProsaTools(server, bundle, { searchEngine, storePath });
7750
+ registerProsaTools(server, bundle, { ensureStore: true, searchEngine, storePath });
6763
7751
  return server;
6764
7752
  }
6765
7753
  async function readBody(req) {
@@ -6796,27 +7784,12 @@ function writeError(res, error) {
6796
7784
  );
6797
7785
  }
6798
7786
 
6799
- // src/cli/parsers.ts
6800
- function parseSearchEngine(value) {
6801
- if (value === "fts5" || value === "tantivy") return value;
6802
- throw new Error(`invalid search engine: ${value} (expected fts5 or tantivy)`);
6803
- }
6804
- function parseMcpTransport(value) {
6805
- if (value === "stdio" || value === "http") return value;
6806
- throw new Error(`invalid transport: ${value} (expected stdio or http)`);
6807
- }
6808
- function parseSourceTool(value) {
6809
- if (value === void 0) return void 0;
6810
- if (SOURCE_TOOLS.includes(value)) return value;
6811
- throw new Error(`invalid source tool: ${value} (expected one of ${SOURCE_TOOLS.join(", ")})`);
6812
- }
6813
-
6814
7787
  // src/cli/commands/mcp.ts
6815
7788
  function mcpCommand() {
6816
- const serve = new Command5("serve").description("Start a local MCP server over the prosa bundle.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--transport <transport>", "MCP transport: stdio|http", "stdio").option("--host <host>", "bind host", "127.0.0.1").option("--port <port>", "bind port", "7331").option("--path <path>", "HTTP path", "/mcp").option("--search-engine <engine>", "search engine: fts5|tantivy", "fts5").action(
7789
+ const serve = new Command6("serve").description("Start a local MCP server over the prosa bundle.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--transport <transport>", "MCP transport: stdio|http", "stdio").option("--host <host>", "bind host", "127.0.0.1").option("--port <port>", "bind port", "7331").option("--path <path>", "HTTP path", "/mcp").option("--search-engine <engine>", "search engine: fts5|tantivy", "fts5").action(
6817
7790
  async (options) => {
6818
- const storePath = path18.resolve(options.store);
6819
- const bundle = await openBundle(storePath);
7791
+ const storePath = path19.resolve(options.store);
7792
+ const bundle = await openOrInitBundle(storePath);
6820
7793
  try {
6821
7794
  const transport = parseMcpTransport(options.transport);
6822
7795
  const searchEngine = parseSearchEngine(options.searchEngine);
@@ -6846,7 +7819,7 @@ function mcpCommand() {
6846
7819
  }
6847
7820
  }
6848
7821
  );
6849
- return new Command5("mcp").description("MCP server commands.").addCommand(serve);
7822
+ return new Command6("mcp").description("MCP server commands.").addCommand(serve);
6850
7823
  }
6851
7824
  function registerShutdown(closeServer, bundle) {
6852
7825
  const shutdown = async () => {
@@ -6863,13 +7836,13 @@ function registerShutdown(closeServer, bundle) {
6863
7836
  }
6864
7837
 
6865
7838
  // src/cli/commands/query.ts
6866
- import path19 from "path";
6867
- import { Command as Command6 } from "commander";
7839
+ import path20 from "path";
7840
+ import { Command as Command7 } from "commander";
6868
7841
  function queryCommand() {
6869
- const duckdb = new Command6("duckdb").description("Run a DuckDB SQL query over exported Parquet tables.").argument("<sql>", "DuckDB SQL query").option("--store <path>", "bundle directory", defaultBundlePath()).option("--parquet-dir <path>", "Parquet directory (default: <store>/parquet)").option("--output-format <fmt>", "interactive|table|json|csv", "table").action(
7842
+ const duckdb = new Command7("duckdb").description("Run a DuckDB SQL query over exported Parquet tables.").argument("<sql>", "DuckDB SQL query").option("--store <path>", "bundle directory", defaultBundlePath()).option("--parquet-dir <path>", "Parquet directory (default: <store>/parquet)").option("--output-format <fmt>", "interactive|table|json|csv", "table").action(
6870
7843
  async (sql, options) => {
6871
7844
  const format = parseOutputFormat(options.outputFormat, "table");
6872
- const parquetDir = options.parquetDir ? path19.resolve(options.parquetDir) : await withBundle(options.store, (bundle) => bundle.paths.parquet);
7845
+ const parquetDir = options.parquetDir ? path20.resolve(options.parquetDir) : await withBundle(options.store, (bundle) => bundle.paths.parquet);
6873
7846
  const result = await queryDuckDbParquet({ parquetDir, sql });
6874
7847
  printRows(result.rows, {
6875
7848
  format,
@@ -6878,14 +7851,14 @@ function queryCommand() {
6878
7851
  });
6879
7852
  }
6880
7853
  );
6881
- return new Command6("query").description("Run derived analytical queries.").addCommand(duckdb);
7854
+ return new Command7("query").description("Run derived analytical queries.").addCommand(duckdb);
6882
7855
  }
6883
7856
 
6884
7857
  // src/cli/commands/search.ts
6885
- import { Command as Command7 } from "commander";
7858
+ import { Command as Command8 } from "commander";
6886
7859
  init_search();
6887
7860
  function searchCommand() {
6888
- return new Command7("search").description("Full-text search across messages, tool calls and tool outputs.").argument("<query>", "FTS5 query string (supports MATCH syntax)").option("--store <path>", "bundle directory", defaultBundlePath()).option("--limit <n>", "maximum hits", "50").option("--engine <engine>", "search engine: fts5|tantivy", "fts5").option("--output-format <fmt>", "interactive|table|json|csv", "table").action(
7861
+ return new Command8("search").description("Full-text search across messages, tool calls and tool outputs.").argument("<query>", "FTS5 query string (supports MATCH syntax)").option("--store <path>", "bundle directory", defaultBundlePath()).option("--limit <n>", "maximum hits", "50").option("--engine <engine>", "search engine: fts5|tantivy", "fts5").option("--output-format <fmt>", "interactive|table|json|csv", "table").action(
6889
7862
  async (query, options) => {
6890
7863
  const engine = parseSearchEngine(options.engine);
6891
7864
  const format = parseOutputFormat(options.outputFormat, "table");
@@ -6906,10 +7879,10 @@ function searchCommand() {
6906
7879
  }
6907
7880
 
6908
7881
  // src/cli/commands/sessions.ts
6909
- import { Command as Command8 } from "commander";
7882
+ import { Command as Command9 } from "commander";
6910
7883
  init_sessions();
6911
7884
  function sessionsCommand() {
6912
- const command = new Command8("sessions").description("List sessions in the bundle, with filters.").enablePositionalOptions().option("--store <path>", "bundle directory", defaultBundlePath()).option("--source <tool>", "filter by source tool: cursor|codex|claude|gemini").option("--since <iso>", "sessions starting on/after this ISO timestamp").option("--until <iso>", "sessions starting before this ISO timestamp").option("--limit <n>", "maximum rows", "50").option("--output-format <fmt>", "interactive|table|json|csv", "table").action(
7885
+ const command = new Command9("sessions").description("List sessions in the bundle, with filters.").enablePositionalOptions().option("--store <path>", "bundle directory", defaultBundlePath()).option("--source <tool>", "filter by source tool: cursor|codex|claude|gemini").option("--since <iso>", "sessions starting on/after this ISO timestamp").option("--until <iso>", "sessions starting before this ISO timestamp").option("--limit <n>", "maximum rows", "50").option("--output-format <fmt>", "interactive|table|json|csv", "table").action(
6913
7886
  async (options) => {
6914
7887
  const format = parseOutputFormat(options.outputFormat, "table");
6915
7888
  await withBundle(options.store, (bundle) => {
@@ -6936,7 +7909,7 @@ function sessionsCommand() {
6936
7909
  }
6937
7910
  );
6938
7911
  command.addCommand(
6939
- new Command8("count").description("Count sessions in the bundle, with filters.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--source <tool>", "filter by source tool: cursor|codex|claude|gemini").option("--since <iso>", "sessions starting on/after this ISO timestamp").option("--until <iso>", "sessions starting before this ISO timestamp").action(
7912
+ new Command9("count").description("Count sessions in the bundle, with filters.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--source <tool>", "filter by source tool: cursor|codex|claude|gemini").option("--since <iso>", "sessions starting on/after this ISO timestamp").option("--until <iso>", "sessions starting before this ISO timestamp").action(
6940
7913
  async (options) => {
6941
7914
  await withBundle(options.store, (bundle) => {
6942
7915
  const count = countSessions(bundle, {
@@ -6954,9 +7927,9 @@ function sessionsCommand() {
6954
7927
  }
6955
7928
 
6956
7929
  // src/cli/commands/tui.ts
6957
- import { Command as Command9 } from "commander";
7930
+ import { Command as Command10 } from "commander";
6958
7931
  function tuiCommand() {
6959
- return new Command9("tui").description("Open the interactive Ink-based explorer.").option("--store <path>", "bundle directory", defaultBundlePath()).action(async (options) => {
7932
+ return new Command10("tui").description("Open the interactive Ink-based explorer.").option("--store <path>", "bundle directory", defaultBundlePath()).action(async (options) => {
6960
7933
  const [{ render }, React, { App: App2 }] = await Promise.all([
6961
7934
  import("ink"),
6962
7935
  import("react"),
@@ -6971,8 +7944,14 @@ function tuiCommand() {
6971
7944
  }
6972
7945
 
6973
7946
  // src/cli/main.ts
7947
+ function stripLeadingDoubleDash(argv) {
7948
+ if (argv.length >= 3 && argv[2] === "--") {
7949
+ return [argv[0], argv[1], ...argv.slice(3)];
7950
+ }
7951
+ return [...argv];
7952
+ }
6974
7953
  async function runCli(argv) {
6975
- const program = new Command10().name("prosa").enablePositionalOptions().description(
7954
+ const program = new Command11().name("prosa").enablePositionalOptions().description(
6976
7955
  "Compile, search and export local agent session histories\n(Cursor, Codex CLI, Claude Code, Gemini CLI) into one canonical store."
6977
7956
  ).version(PROSA_PARSER_VERSION, "-v, --version");
6978
7957
  program.addCommand(initCommand());
@@ -6983,9 +7962,10 @@ async function runCli(argv) {
6983
7962
  program.addCommand(searchCommand());
6984
7963
  program.addCommand(exportCommand());
6985
7964
  program.addCommand(queryCommand());
7965
+ program.addCommand(analyticsCommand());
6986
7966
  program.addCommand(mcpCommand());
6987
7967
  program.addCommand(tuiCommand());
6988
- await program.parseAsync([...argv]);
7968
+ await program.parseAsync(stripLeadingDoubleDash(argv));
6989
7969
  }
6990
7970
  var isEntry = import.meta.url === `file://${process.argv[1]}`;
6991
7971
  if (isEntry) {