npm - @c3-oss/prosa - Versions diffs - 0.3.1 → 0.5.0 - Mend

@c3-oss/prosa 0.3.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/bin/prosa.js CHANGED Viewed

@@ -12,8 +12,8 @@ var __export = (target, all) => {
 // src/core/db.ts
 import Database from "better-sqlite3";
-function openDb(path20) {
-  const db = new Database(path20);
+function openDb(path21) {
+  const db = new Database(path21);
   db.pragma("journal_mode = WAL");
   db.pragma("foreign_keys = ON");
   db.pragma("synchronous = NORMAL");
@@ -48,283 +48,30 @@ var init_db = __esm({
   }
 });
-// src/core/errors.ts
-var getErrorMessage;
-var init_errors = __esm({
-  "src/core/errors.ts"() {
-    "use strict";
-    getErrorMessage = (err) => err instanceof Error ? err.message : String(err);
-  }
-});
-// src/core/cas/compress.ts
-import { compress as zstdCompress, decompress as zstdDecompress } from "zstd-napi";
-function compressBytes(input) {
-  if (input.byteLength < COMPRESS_THRESHOLD_BYTES) {
-    return { bytes: Buffer.from(input), compression: "none" };
-  }
-  const out = zstdCompress(Buffer.from(input), { compressionLevel: ZSTD_LEVEL });
-  return { bytes: out, compression: "zstd" };
-}
-function decompressBytes(input, compression) {
-  if (compression === "none") return input;
-  return zstdDecompress(input);
-}
-var COMPRESS_THRESHOLD_BYTES, ZSTD_LEVEL;
-var init_compress = __esm({
-  "src/core/cas/compress.ts"() {
-    "use strict";
-    COMPRESS_THRESHOLD_BYTES = 256;
-    ZSTD_LEVEL = 3;
-  }
-});
-// src/core/cas/hash.ts
-import { createHash } from "crypto";
-import { blake3 } from "@noble/hashes/blake3";
-import { bytesToHex } from "@noble/hashes/utils";
-function blake3Hex(bytes) {
-  return bytesToHex(blake3(bytes));
-}
-function sha256Hex(bytes) {
-  return createHash("sha256").update(bytes).digest("hex");
-}
-function objectIdFromHash(hashHex) {
-  return `blake3:${hashHex}`;
-}
-function objectStoragePath(hashHex, compression) {
-  const ext = compression === "zstd" ? ".zst" : ".bin";
-  const a = hashHex.slice(0, 2);
-  const b = hashHex.slice(2, 4);
-  return `objects/blake3/${a}/${b}/${hashHex}${ext}`;
+// src/core/limits.ts
+function clampLimit(value, opts) {
+  return Math.max(opts.min ?? 1, Math.min(opts.max, value ?? opts.fallback));
 }
-var init_hash = __esm({
-  "src/core/cas/hash.ts"() {
+var init_limits = __esm({
+  "src/core/limits.ts"() {
     "use strict";
   }
 });
-// src/core/cas/index.ts
-var cas_exports = {};
-__export(cas_exports, {
-  createPendingObjects: () => createPendingObjects,
-  ensureDir: () => ensureDir,
-  flushPendingObjects: () => flushPendingObjects,
-  getBytes: () => getBytes,
-  getJson: () => getJson,
-  getObjectMeta: () => getObjectMeta,
-  getText: () => getText,
-  putBytes: () => putBytes,
-  putJson: () => putJson,
-  putText: () => putText,
-  stageBytes: () => stageBytes,
-  stageJson: () => stageJson,
-  stageText: () => stageText
-});
-import { mkdir as mkdir2, readFile as readFile2, writeFile as writeFile2 } from "fs/promises";
-import path2 from "path";
-async function ensureDir(absoluteDir) {
-  if (ensuredDirs.has(absoluteDir)) return;
-  await mkdir2(absoluteDir, { recursive: true });
-  ensuredDirs.add(absoluteDir);
-}
-async function putBytes(bundle, bytes, options = {}) {
-  const hash = blake3Hex(bytes);
-  const objectId = objectIdFromHash(hash);
-  const existing = prepare(
-    bundle.db,
-    `SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
-            compression, mime_type, encoding, storage_path, created_at
-       FROM objects WHERE object_id = ?`
-  ).get(objectId);
-  if (existing) return objectId;
-  const { bytes: stored, compression } = compressBytes(bytes);
-  const storagePath = objectStoragePath(hash, compression);
-  const absolutePath = path2.join(bundle.path, storagePath);
-  await ensureDir(path2.dirname(absolutePath));
-  await writeFile2(absolutePath, stored);
-  prepare(
-    bundle.db,
-    `INSERT INTO objects (
-       object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
-       compression, mime_type, encoding, storage_path, created_at
-     ) VALUES (?, 'blake3', ?, ?, ?, ?, ?, ?, ?, ?)`
-  ).run(
-    objectId,
-    hash,
-    bytes.byteLength,
-    compression === "zstd" ? stored.byteLength : null,
-    compression,
-    options.mimeType ?? null,
-    options.encoding ?? null,
-    storagePath,
-    (/* @__PURE__ */ new Date()).toISOString()
-  );
-  return objectId;
-}
-async function putText(bundle, text, options = {}) {
-  const buf = Buffer.from(text, "utf8");
-  return putBytes(bundle, buf, {
-    mimeType: options.mimeType ?? "text/plain; charset=utf-8",
-    encoding: "utf-8"
-  });
-}
-async function putJson(bundle, value) {
-  const text = JSON.stringify(value);
-  return putBytes(bundle, Buffer.from(text, "utf8"), {
-    mimeType: "application/json",
-    encoding: "utf-8"
-  });
-}
-async function getBytes(bundle, objectId) {
-  const meta = prepare(
-    bundle.db,
-    `SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
-            compression, mime_type, encoding, storage_path, created_at
-       FROM objects WHERE object_id = ?`
-  ).get(objectId);
-  if (!meta) {
-    throw new Error(`object not found: ${objectId}`);
-  }
-  const buf = await readFile2(path2.join(bundle.path, meta.storage_path));
-  return decompressBytes(buf, meta.compression);
-}
-async function getText(bundle, objectId) {
-  const buf = await getBytes(bundle, objectId);
-  return buf.toString("utf8");
-}
-async function getJson(bundle, objectId) {
-  const text = await getText(bundle, objectId);
-  return JSON.parse(text);
-}
-function getObjectMeta(bundle, objectId) {
-  return prepare(
-    bundle.db,
-    `SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
-              compression, mime_type, encoding, storage_path, created_at
-         FROM objects WHERE object_id = ?`
-  ).get(objectId) ?? null;
-}
-function createPendingObjects() {
-  return { byId: /* @__PURE__ */ new Map() };
-}
-function stageBytes(pending, bytes, options = {}) {
-  const buf = Buffer.isBuffer(bytes) ? bytes : Buffer.from(bytes);
-  const hash = blake3Hex(buf);
-  const objectId = objectIdFromHash(hash);
-  if (!pending.byId.has(objectId)) {
-    pending.byId.set(objectId, {
-      objectId,
-      hash,
-      bytes: buf,
-      mimeType: options.mimeType ?? null,
-      encoding: options.encoding ?? null
-    });
-  }
-  return objectId;
-}
-function stageText(pending, text, options = {}) {
-  return stageBytes(pending, Buffer.from(text, "utf8"), {
-    mimeType: options.mimeType ?? "text/plain; charset=utf-8",
-    encoding: "utf-8"
-  });
-}
-function stageJson(pending, value) {
-  return stageBytes(pending, Buffer.from(JSON.stringify(value), "utf8"), {
-    mimeType: "application/json",
-    encoding: "utf-8"
-  });
-}
-async function flushPendingObjects(bundle, pending) {
-  if (pending.byId.size === 0) return;
-  const ids = [...pending.byId.keys()];
-  const existingIds = queryExistingObjectIds(bundle, ids);
-  const toWrite = [];
-  for (const obj of pending.byId.values()) {
-    if (existingIds.has(obj.objectId)) continue;
-    const { bytes: compressedBytes, compression } = compressBytes(obj.bytes);
-    const storagePath = objectStoragePath(obj.hash, compression);
-    toWrite.push({
-      staged: obj,
-      compression,
-      compressedBytes,
-      storagePath,
-      absolutePath: path2.join(bundle.path, storagePath)
-    });
-  }
-  if (toWrite.length > 0) {
-    await writeFilesParallel(toWrite);
-  }
-  const insertObject = prepare(
-    bundle.db,
-    `INSERT OR IGNORE INTO objects (
-       object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
-       compression, mime_type, encoding, storage_path, created_at
-     ) VALUES (?, 'blake3', ?, ?, ?, ?, ?, ?, ?, ?)`
-  );
-  const now = (/* @__PURE__ */ new Date()).toISOString();
-  for (const p of toWrite) {
-    insertObject.run(
-      p.staged.objectId,
-      p.staged.hash,
-      p.staged.bytes.byteLength,
-      p.compression === "zstd" ? p.compressedBytes.byteLength : null,
-      p.compression,
-      p.staged.mimeType,
-      p.staged.encoding,
-      p.storagePath,
-      now
-    );
-  }
-}
-function queryExistingObjectIds(bundle, ids) {
-  const found = /* @__PURE__ */ new Set();
-  if (ids.length === 0) return found;
-  const CHUNK = 500;
-  for (let start = 0; start < ids.length; start += CHUNK) {
-    const slice = ids.slice(start, start + CHUNK);
-    const placeholders = slice.map(() => "?").join(",");
-    const rows = bundle.db.prepare(
-      `SELECT object_id FROM objects WHERE object_id IN (${placeholders})`
-    ).all(...slice);
-    for (const row of rows) found.add(row.object_id);
-  }
-  return found;
-}
-async function writeFilesParallel(tasks) {
-  let cursor = 0;
-  const workers = [];
-  const limit = Math.min(FS_WRITE_CONCURRENCY, tasks.length);
-  for (let w = 0; w < limit; w++) {
-    workers.push(
-      (async () => {
-        while (true) {
-          const i = cursor++;
-          if (i >= tasks.length) return;
-          const task = tasks[i];
-          await ensureDir(path2.dirname(task.absolutePath));
-          await writeFile2(task.absolutePath, task.compressedBytes);
-        }
-      })()
-    );
-  }
-  await Promise.all(workers);
-}
-var ensuredDirs, FS_WRITE_CONCURRENCY;
-var init_cas = __esm({
-  "src/core/cas/index.ts"() {
+// src/core/errors.ts
+var getErrorMessage;
+var init_errors = __esm({
+  "src/core/errors.ts"() {
     "use strict";
-    init_db();
-    init_compress();
-    init_hash();
-    ensuredDirs = /* @__PURE__ */ new Set();
-    FS_WRITE_CONCURRENCY = 16;
+    getErrorMessage = (err) => err instanceof Error ? err.message : String(err);
   }
 });
 // src/services/indexing.ts
+import { createHash as createHash2 } from "crypto";
+import { existsSync } from "fs";
 import { mkdir as mkdir4, rm as rm2, writeFile as writeFile5 } from "fs/promises";
-import path13 from "path";
+import path15 from "path";
 function enableFts5Triggers(bundle) {
   bundle.db.exec(FTS5_TRIGGER_SQL);
 }
@@ -338,7 +85,7 @@ function disableFts5Triggers(bundle) {
 function getSearchIndexStatuses(bundle) {
   ensureSearchIndexStatusRows(bundle);
   return bundle.db.prepare(
-    `SELECT engine, status, source_doc_count, indexed_doc_count, updated_at, error_message
+    `SELECT ${SEARCH_INDEX_STATUS_COLUMNS}
          FROM search_index_status
         ORDER BY engine`
   ).all();
@@ -346,28 +93,13 @@ function getSearchIndexStatuses(bundle) {
 function getSearchIndexStatus(bundle, engine) {
   ensureSearchIndexStatusRows(bundle);
   return bundle.db.prepare(
-    `SELECT engine, status, source_doc_count, indexed_doc_count, updated_at, error_message
+    `SELECT ${SEARCH_INDEX_STATUS_COLUMNS}
            FROM search_index_status
           WHERE engine = ?`
   ).get(engine) ?? null;
 }
 function markIndexesAfterImport(bundle, options) {
   if (!options.changed) return;
-  if (options.fts5Deferred) {
-    updateSearchIndexStatus(bundle, "fts5", {
-      status: "stale",
-      sourceDocCount: countSearchDocs(bundle),
-      indexedDocCount: countFts5Docs(bundle),
-      errorMessage: null
-    });
-  } else {
-    updateSearchIndexStatus(bundle, "fts5", {
-      status: "ready",
-      sourceDocCount: countSearchDocs(bundle),
-      indexedDocCount: countFts5Docs(bundle),
-      errorMessage: null
-    });
-  }
   const tantivy = getSearchIndexStatus(bundle, "tantivy");
   if (tantivy?.status === "ready" || tantivy?.status === "stale" || tantivy?.status === "failed") {
     updateSearchIndexStatus(bundle, "tantivy", {
@@ -408,55 +140,93 @@ function rebuildFts5Index(bundle) {
   }
   return getSearchIndexStatus(bundle, "fts5");
 }
-async function rebuildTantivyIndex(bundle) {
+function buildTantivySchema(tantivy) {
+  const builder = new tantivy.SchemaBuilder();
+  for (const field of TANTIVY_SCHEMA_FIELDS) {
+    if (field.tokenizer === "default") {
+      builder.addTextField(field.name, { stored: true });
+    } else {
+      builder.addTextField(field.name, { stored: true, tokenizerName: field.tokenizer });
+    }
+  }
+  return builder.build();
+}
+function computeSchemaFingerprint() {
+  const canonical = TANTIVY_SCHEMA_FIELDS.map((f) => `${f.name}:${f.tokenizer}:stored`).join("|");
+  return createHash2("sha256").update(canonical).digest("hex");
+}
+function tantivyIndexLooksValid(dir) {
+  return existsSync(path15.join(dir, "meta.json"));
+}
+function makeTantivyDoc(tantivy, row) {
+  const doc = new tantivy.Document();
+  doc.addText("doc_id", row.doc_id);
+  doc.addText("entity_type", row.entity_type);
+  doc.addText("entity_id", row.entity_id);
+  doc.addText("session_id", row.session_id ?? "");
+  doc.addText("project_id", row.project_id ?? "");
+  doc.addText("timestamp", row.timestamp ?? "");
+  doc.addText("role", row.role ?? "");
+  doc.addText("tool_name", row.tool_name ?? "");
+  doc.addText("canonical_tool_type", row.canonical_tool_type ?? "");
+  doc.addText("field_kind", row.field_kind);
+  doc.addText("text", row.text);
+  return doc;
+}
+async function rebuildTantivyIndex(bundle, options = {}) {
   ensureSearchIndexStatusRows(bundle);
+  const sourceDocCount = countSearchDocs(bundle);
+  const prev = getSearchIndexStatus(bundle, "tantivy");
+  const fingerprint = computeSchemaFingerprint();
+  const indexDirValid = tantivyIndexLooksValid(bundle.paths.tantivy);
+  const fingerprintMatches = prev?.schema_fingerprint === fingerprint;
+  const lastIndexedRowid = typeof prev?.last_indexed_rowid === "number" ? prev.last_indexed_rowid : 0;
+  const wantFullRebuild = options.overwrite === true || !indexDirValid || !fingerprintMatches || lastIndexedRowid <= 0;
   updateSearchIndexStatus(bundle, "tantivy", {
     status: "building",
-    sourceDocCount: countSearchDocs(bundle),
+    sourceDocCount,
     indexedDocCount: 0,
     errorMessage: null
   });
   try {
     const tantivy = await import("@oxdev03/node-tantivy-binding");
-    const schema = new tantivy.SchemaBuilder().addTextField("doc_id", { stored: true, tokenizerName: "raw" }).addTextField("entity_type", { stored: true, tokenizerName: "raw" }).addTextField("entity_id", { stored: true, tokenizerName: "raw" }).addTextField("session_id", { stored: true, tokenizerName: "raw" }).addTextField("project_id", { stored: true, tokenizerName: "raw" }).addTextField("timestamp", { stored: true, tokenizerName: "raw" }).addTextField("role", { stored: true, tokenizerName: "raw" }).addTextField("tool_name", { stored: true, tokenizerName: "raw" }).addTextField("canonical_tool_type", { stored: true, tokenizerName: "raw" }).addTextField("field_kind", { stored: true, tokenizerName: "raw" }).addTextField("text", { stored: true }).build();
-    await rm2(bundle.paths.tantivy, { recursive: true, force: true });
-    await mkdir4(bundle.paths.tantivy, { recursive: true });
-    const index = new tantivy.Index(schema, bundle.paths.tantivy, false);
-    const writer = index.writer(5e7, 1);
-    let indexedDocCount = 0;
-    const rows = bundle.db.prepare(
-      `SELECT rowid, doc_id, entity_type, entity_id, session_id, project_id, timestamp,
-                role, tool_name, canonical_tool_type, field_kind, text
-           FROM search_docs
-          ORDER BY rowid`
-    ).iterate();
-    for (const row of rows) {
-      const doc = new tantivy.Document();
-      doc.addText("doc_id", row.doc_id);
-      doc.addText("entity_type", row.entity_type);
-      doc.addText("entity_id", row.entity_id);
-      doc.addText("session_id", row.session_id ?? "");
-      doc.addText("project_id", row.project_id ?? "");
-      doc.addText("timestamp", row.timestamp ?? "");
-      doc.addText("role", row.role ?? "");
-      doc.addText("tool_name", row.tool_name ?? "");
-      doc.addText("canonical_tool_type", row.canonical_tool_type ?? "");
-      doc.addText("field_kind", row.field_kind);
-      doc.addText("text", row.text);
-      writer.addDocument(doc);
-      indexedDocCount++;
+    const schema = buildTantivySchema(tantivy);
+    let index;
+    if (wantFullRebuild) {
+      await rm2(bundle.paths.tantivy, { recursive: true, force: true });
+      await mkdir4(bundle.paths.tantivy, { recursive: true });
+      index = new tantivy.Index(schema, bundle.paths.tantivy, false);
+    } else {
+      index = tantivy.Index.open(bundle.paths.tantivy);
+    }
+    const writer = index.writer(3e8, 4);
+    const select = wantFullRebuild ? `${SEARCH_DOCS_SELECT} ORDER BY rowid` : `${SEARCH_DOCS_SELECT} WHERE rowid > ${lastIndexedRowid} ORDER BY rowid`;
+    let addedDocCount = 0;
+    let maxRowid = wantFullRebuild ? 0 : lastIndexedRowid;
+    for (const row of bundle.db.prepare(select).iterate()) {
+      if (!wantFullRebuild) {
+        writer.deleteDocumentsByTerm("doc_id", row.doc_id);
+      }
+      writer.addDocument(makeTantivyDoc(tantivy, row));
+      addedDocCount++;
+      if (row.rowid > maxRowid) maxRowid = row.rowid;
     }
     writer.commit();
     index.reload();
+    writer.waitMergingThreads();
+    const indexedDocCount = wantFullRebuild ? addedDocCount : countTantivyDocsBest(prev, addedDocCount);
     await writeFile5(
-      path13.join(bundle.paths.tantivy, "prosa-index.json"),
+      path15.join(bundle.paths.tantivy, "prosa-index.json"),
       `${JSON.stringify(
         {
           engine: "tantivy",
           source: "search_docs",
           built_at: (/* @__PURE__ */ new Date()).toISOString(),
-          source_doc_count: countSearchDocs(bundle),
-          indexed_doc_count: indexedDocCount
+          mode: wantFullRebuild ? "full" : "incremental",
+          source_doc_count: sourceDocCount,
+          indexed_doc_count: indexedDocCount,
+          last_indexed_rowid: maxRowid,
+          schema_fingerprint: fingerprint
         },
         null,
         2
@@ -466,14 +236,16 @@ async function rebuildTantivyIndex(bundle) {
     );
     updateSearchIndexStatus(bundle, "tantivy", {
       status: "ready",
-      sourceDocCount: countSearchDocs(bundle),
+      sourceDocCount,
       indexedDocCount,
-      errorMessage: null
+      errorMessage: null,
+      lastIndexedRowid: maxRowid,
+      schemaFingerprint: fingerprint
     });
   } catch (error) {
     updateSearchIndexStatus(bundle, "tantivy", {
       status: "failed",
-      sourceDocCount: countSearchDocs(bundle),
+      sourceDocCount,
       indexedDocCount: 0,
       errorMessage: getErrorMessage(error)
     });
@@ -481,36 +253,53 @@ async function rebuildTantivyIndex(bundle) {
   }
   return getSearchIndexStatus(bundle, "tantivy");
 }
+function countTantivyDocsBest(prev, added) {
+  if (prev && typeof prev.indexed_doc_count === "number") {
+    return prev.indexed_doc_count + added;
+  }
+  return added;
+}
 function ensureSearchIndexStatusRows(bundle) {
   const now = (/* @__PURE__ */ new Date()).toISOString();
   const stmt = prepare(
     bundle.db,
     `INSERT OR IGNORE INTO search_index_status (
-       engine, status, source_doc_count, indexed_doc_count, updated_at, error_message
-     ) VALUES (?, ?, 0, 0, ?, NULL)`
+       engine, status, source_doc_count, indexed_doc_count, updated_at,
+       error_message, last_indexed_rowid, schema_fingerprint
+     ) VALUES (?, ?, 0, 0, ?, NULL, NULL, NULL)`
   );
   stmt.run("fts5", "ready", now);
   stmt.run("tantivy", "missing", now);
 }
 function updateSearchIndexStatus(bundle, engine, values) {
   ensureSearchIndexStatusRows(bundle);
-  prepare(
-    bundle.db,
-    `UPDATE search_index_status
-        SET status = ?,
-            source_doc_count = ?,
-            indexed_doc_count = ?,
-            updated_at = ?,
-            error_message = ?
-      WHERE engine = ?`
-  ).run(
+  const setClauses = [
+    "status = ?",
+    "source_doc_count = ?",
+    "indexed_doc_count = ?",
+    "updated_at = ?",
+    "error_message = ?"
+  ];
+  const params = [
     values.status,
     values.sourceDocCount,
     values.indexedDocCount,
     (/* @__PURE__ */ new Date()).toISOString(),
-    values.errorMessage,
-    engine
-  );
+    values.errorMessage
+  ];
+  if (values.lastIndexedRowid !== void 0) {
+    setClauses.push("last_indexed_rowid = ?");
+    params.push(values.lastIndexedRowid);
+  }
+  if (values.schemaFingerprint !== void 0) {
+    setClauses.push("schema_fingerprint = ?");
+    params.push(values.schemaFingerprint);
+  }
+  params.push(engine);
+  prepare(
+    bundle.db,
+    `UPDATE search_index_status SET ${setClauses.join(", ")} WHERE engine = ?`
+  ).run(...params);
 }
 function countSearchDocs(bundle) {
   return bundle.db.prepare(`SELECT count(*) AS n FROM search_docs`).get()?.n ?? 0;
@@ -518,12 +307,16 @@ function countSearchDocs(bundle) {
 function countFts5Docs(bundle) {
   return bundle.db.prepare(`SELECT count(*) AS n FROM search_docs_fts`).get()?.n ?? 0;
 }
-var FTS5_TRIGGER_SQL;
+var SEARCH_INDEX_STATUS_COLUMNS, FTS5_TRIGGER_SQL, TANTIVY_SCHEMA_FIELDS, SEARCH_DOCS_SELECT;
 var init_indexing = __esm({
   "src/services/indexing.ts"() {
     "use strict";
     init_db();
     init_errors();
+    SEARCH_INDEX_STATUS_COLUMNS = `
+  engine, status, source_doc_count, indexed_doc_count, updated_at,
+  error_message, last_indexed_rowid, schema_fingerprint
+`;
     FTS5_TRIGGER_SQL = `
 CREATE TRIGGER IF NOT EXISTS search_docs_ai AFTER INSERT ON search_docs BEGIN
   INSERT INTO search_docs_fts(rowid, text, role, tool_name, field_kind)
@@ -542,21 +335,30 @@ CREATE TRIGGER IF NOT EXISTS search_docs_au AFTER UPDATE ON search_docs BEGIN
   VALUES (new.rowid, new.text, new.role, new.tool_name, new.field_kind);
 END;
 `;
-  }
-});
-// src/core/limits.ts
-function clampLimit(value, opts) {
-  return Math.max(opts.min ?? 1, Math.min(opts.max, value ?? opts.fallback));
-}
-var init_limits = __esm({
-  "src/core/limits.ts"() {
-    "use strict";
+    TANTIVY_SCHEMA_FIELDS = [
+      { name: "doc_id", tokenizer: "raw" },
+      { name: "entity_type", tokenizer: "raw" },
+      { name: "entity_id", tokenizer: "raw" },
+      { name: "session_id", tokenizer: "raw" },
+      { name: "project_id", tokenizer: "raw" },
+      { name: "timestamp", tokenizer: "raw" },
+      { name: "role", tokenizer: "raw" },
+      { name: "tool_name", tokenizer: "raw" },
+      { name: "canonical_tool_type", tokenizer: "raw" },
+      { name: "field_kind", tokenizer: "raw" },
+      // The text field uses tantivy's default tokenizer (en_stem in the binding).
+      { name: "text", tokenizer: "default" }
+    ];
+    SEARCH_DOCS_SELECT = `
+  SELECT rowid, doc_id, entity_type, entity_id, session_id, project_id, timestamp,
+         role, tool_name, canonical_tool_type, field_kind, text
+    FROM search_docs
+`;
   }
 });
 // src/services/search.ts
-import { existsSync } from "fs";
+import { existsSync as existsSync2 } from "fs";
 import { createRequire } from "module";
 function escapeFtsQuery(q) {
   return q.split(/\s+/).filter((t) => t.length > 0).map((t) => `"${t.replace(/"/g, '""')}"`).join(" ");
@@ -565,7 +367,7 @@ function searchFullText(bundle, options) {
   if (options.engine === "tantivy") {
     return searchTantivy(bundle, options);
   }
-  const limit = clampLimit(options.limit, { max: 500, fallback: 50 });
+  const limit2 = clampLimit(options.limit, { max: 500, fallback: 50 });
   const sql = `
     SELECT d.doc_id,
            d.entity_type,
@@ -580,14 +382,14 @@ function searchFullText(bundle, options) {
       JOIN search_docs d ON d.rowid = search_docs_fts.rowid
      WHERE search_docs_fts MATCH ?
      ORDER BY bm25(search_docs_fts), d.timestamp DESC
-     LIMIT ${limit}
+     LIMIT ${limit2}
   `;
   const ftsQuery = options.raw ? options.query : escapeFtsQuery(options.query);
   if (!ftsQuery) return [];
   return bundle.db.prepare(sql).all(ftsQuery);
 }
 function searchTantivy(bundle, options) {
-  if (!existsSync(bundle.paths.tantivy)) {
+  if (!existsSync2(bundle.paths.tantivy)) {
     throw new Error("tantivy index not found; run `prosa index tantivy` first");
   }
   const status = getSearchIndexStatus(bundle, "tantivy");
@@ -596,7 +398,7 @@ function searchTantivy(bundle, options) {
       `tantivy index is ${status?.status ?? "missing"}; run \`prosa index tantivy\` first`
     );
   }
-  const limit = clampLimit(options.limit, { max: 500, fallback: 50 });
+  const limit2 = clampLimit(options.limit, { max: 500, fallback: 50 });
   const queryText = options.query.trim();
   if (!queryText) return [];
   const tantivy = requireTantivy();
@@ -605,7 +407,7 @@ function searchTantivy(bundle, options) {
   const [query] = options.raw ? [index.parseQuery(queryText, ["text"])] : index.parseQueryLenient(queryText, ["text"], void 0, {
     text: [true, 2, true]
   });
-  const result = searcher.search(query, limit, true);
+  const result = searcher.search(query, limit2, true);
   const snippets = tantivy.SnippetGenerator.create(searcher, query, index.schema, "text");
   snippets.setMaxNumChars(180);
   return result.hits.map((hit) => {
@@ -689,7 +491,7 @@ function sessionFilterWhere(filters) {
 }
 function listSessions(bundle, filters = {}) {
   const { where, params } = sessionFilterWhere(filters);
-  const limit = clampLimit(filters.limit, { max: 1e3, fallback: 50 });
+  const limit2 = clampLimit(filters.limit, { max: 1e3, fallback: 50 });
   const sql = `
     SELECT s.session_id,
            s.source_tool,
@@ -710,7 +512,7 @@ function listSessions(bundle, filters = {}) {
       FROM sessions s
       ${where}
      ORDER BY s.start_ts DESC NULLS LAST
-     LIMIT ${limit}
+     LIMIT ${limit2}
   `;
   return bundle.db.prepare(sql).all(...params);
 }
@@ -1134,13 +936,14 @@ var init_App = __esm({
 });
 // src/cli/main.ts
-import { Command as Command10 } from "commander";
+import { Command as Command11 } from "commander";
 // src/core/version.ts
 var PROSA_PARSER_VERSION = "0.1.0";
-var PROSA_SCHEMA_VERSION = 2;
+var PROSA_SCHEMA_VERSION = 4;
-// src/cli/commands/compile.ts
+// src/cli/commands/analytics.ts
+import path4 from "path";
 import { Command } from "commander";
 // src/core/bundle.ts
@@ -1514,10 +1317,291 @@ INSERT OR IGNORE INTO search_index_status (
   ('tantivy', 'missing', 0, 0, strftime('%Y-%m-%dT%H:%M:%fZ','now'), NULL);
 `;
+// src/core/schema/sql/003_analytics_views.ts
+var SQL_003_ANALYTICS_VIEWS = String.raw`
+CREATE VIEW IF NOT EXISTS session_facts AS
+WITH turn_counts AS (
+  SELECT session_id, count(*) AS turn_count
+    FROM turns
+   GROUP BY session_id
+),
+message_counts AS (
+  SELECT session_id,
+         count(*) AS message_count,
+         sum(CASE WHEN role = 'user' THEN 1 ELSE 0 END) AS user_message_count,
+         sum(CASE WHEN role = 'assistant' THEN 1 ELSE 0 END) AS assistant_message_count
+    FROM messages
+   GROUP BY session_id
+),
+tool_call_counts AS (
+  SELECT session_id,
+         count(*) AS tool_call_count,
+         sum(CASE WHEN status = 'error' THEN 1 ELSE 0 END) AS tool_call_error_count
+    FROM tool_calls
+   GROUP BY session_id
+),
+tool_result_counts AS (
+  SELECT session_id,
+         count(*) AS tool_result_count,
+         sum(CASE WHEN is_error = 1 OR (exit_code IS NOT NULL AND exit_code <> 0)
+                  THEN 1 ELSE 0 END) AS tool_result_error_count,
+         sum(COALESCE(duration_ms, 0)) AS tool_duration_ms
+    FROM tool_results
+   GROUP BY session_id
+),
+search_doc_counts AS (
+  SELECT session_id, count(*) AS search_doc_count
+    FROM search_docs
+   WHERE session_id IS NOT NULL
+   GROUP BY session_id
+)
+SELECT s.session_id,
+       s.source_tool,
+       s.source_session_id,
+       s.project_id,
+       p.display_name AS project_name,
+       p.canonical_path AS project_path,
+       s.parent_session_id,
+       s.is_subagent,
+       s.agent_role,
+       s.agent_nickname,
+       s.title,
+       s.start_ts,
+       s.end_ts,
+       CASE
+         WHEN s.start_ts IS NOT NULL AND s.end_ts IS NOT NULL
+         THEN ROUND((julianday(s.end_ts) - julianday(s.start_ts)) * 86400, 3)
+         ELSE NULL
+       END AS duration_seconds,
+       s.cwd_initial,
+       s.git_branch_initial,
+       s.model_first,
+       s.model_last,
+       s.status,
+       s.timeline_confidence,
+       sf.path AS source_file_path,
+       COALESCE(tc.turn_count, 0) AS turn_count,
+       COALESCE(mc.message_count, 0) AS message_count,
+       COALESCE(mc.user_message_count, 0) AS user_message_count,
+       COALESCE(mc.assistant_message_count, 0) AS assistant_message_count,
+       COALESCE(tcc.tool_call_count, 0) AS tool_call_count,
+       COALESCE(trc.tool_result_count, 0) AS tool_result_count,
+       COALESCE(tcc.tool_call_error_count, 0)
+         + COALESCE(trc.tool_result_error_count, 0) AS tool_error_count,
+       COALESCE(trc.tool_duration_ms, 0) AS tool_duration_ms,
+       COALESCE(sdc.search_doc_count, 0) AS search_doc_count
+  FROM sessions s
+  LEFT JOIN projects p ON p.project_id = s.project_id
+  LEFT JOIN raw_records rr ON rr.raw_record_id = s.raw_record_id
+  LEFT JOIN source_files sf ON sf.source_file_id = rr.source_file_id
+  LEFT JOIN turn_counts tc ON tc.session_id = s.session_id
+  LEFT JOIN message_counts mc ON mc.session_id = s.session_id
+  LEFT JOIN tool_call_counts tcc ON tcc.session_id = s.session_id
+  LEFT JOIN tool_result_counts trc ON trc.session_id = s.session_id
+  LEFT JOIN search_doc_counts sdc ON sdc.session_id = s.session_id;
+CREATE VIEW IF NOT EXISTS tool_usage_facts AS
+WITH result_rollup AS (
+  SELECT tool_call_id,
+         session_id,
+         count(*) AS tool_result_count,
+         max(status) AS result_status,
+         max(is_error) AS is_error,
+         min(exit_code) AS exit_code,
+         sum(COALESCE(duration_ms, 0)) AS duration_ms,
+         max(preview) AS preview
+    FROM tool_results
+   GROUP BY tool_call_id, session_id
+)
+SELECT tc.tool_call_id,
+       tc.session_id,
+       s.source_tool,
+       s.source_session_id,
+       s.project_id,
+       p.display_name AS project_name,
+       p.canonical_path AS project_path,
+       tc.turn_id,
+       tc.message_id,
+       tc.event_id,
+       tc.source_call_id,
+       tc.tool_name,
+       tc.canonical_tool_type,
+       tc.command,
+       tc.cwd,
+       tc.path,
+       tc.query,
+       tc.timestamp_start,
+       tc.timestamp_end,
+       CASE
+         WHEN tc.timestamp_start IS NOT NULL AND tc.timestamp_end IS NOT NULL
+         THEN ROUND((julianday(tc.timestamp_end) - julianday(tc.timestamp_start)) * 86400, 3)
+         ELSE NULL
+       END AS call_duration_seconds,
+       tc.status AS call_status,
+       rr.result_status,
+       COALESCE(rr.is_error, 0) AS is_error,
+       rr.exit_code,
+       rr.duration_ms AS result_duration_ms,
+       COALESCE(rr.tool_result_count, 0) AS tool_result_count,
+       rr.preview,
+       tc.raw_record_id
+  FROM tool_calls tc
+  LEFT JOIN sessions s ON s.session_id = tc.session_id
+  LEFT JOIN projects p ON p.project_id = s.project_id
+  LEFT JOIN result_rollup rr ON rr.tool_call_id = tc.tool_call_id;
+CREATE VIEW IF NOT EXISTS error_facts AS
+SELECT 'tool_result:' || tr.tool_result_id AS error_id,
+       'tool_result' AS error_category,
+       s.source_tool,
+       s.project_id,
+       p.display_name AS project_name,
+       tr.session_id,
+       COALESCE(tc.timestamp_end, tc.timestamp_start) AS timestamp,
+       tc.tool_name,
+       tc.canonical_tool_type,
+       COALESCE(tr.status, tc.status) AS status,
+       tr.exit_code,
+       NULL AS message,
+       tr.preview,
+       NULL AS entity_type,
+       NULL AS entity_id,
+       tr.raw_record_id
+  FROM tool_results tr
+  LEFT JOIN tool_calls tc ON tc.tool_call_id = tr.tool_call_id
+  LEFT JOIN sessions s ON s.session_id = tr.session_id
+  LEFT JOIN projects p ON p.project_id = s.project_id
+ WHERE tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
+UNION ALL
+SELECT 'import_error:' || CAST(ie.error_id AS TEXT) AS error_id,
+       'import_error' AS error_category,
+       COALESCE(rr.source_tool, ib.source_tool) AS source_tool,
+       NULL AS project_id,
+       NULL AS project_name,
+       NULL AS session_id,
+       ie.occurred_at AS timestamp,
+       NULL AS tool_name,
+       NULL AS canonical_tool_type,
+       ie.kind AS status,
+       NULL AS exit_code,
+       ie.message,
+       NULL AS preview,
+       NULL AS entity_type,
+       NULL AS entity_id,
+       ie.raw_record_id
+  FROM import_errors ie
+  LEFT JOIN import_batches ib ON ib.batch_id = ie.batch_id
+  LEFT JOIN raw_records rr ON rr.raw_record_id = ie.raw_record_id
+UNION ALL
+SELECT 'uncertainty:' || CAST(u.uncertainty_id AS TEXT) AS error_id,
+       'uncertainty' AS error_category,
+       NULL AS source_tool,
+       NULL AS project_id,
+       NULL AS project_name,
+       CASE WHEN u.entity_type = 'session' THEN u.entity_id ELSE NULL END AS session_id,
+       NULL AS timestamp,
+       NULL AS tool_name,
+       NULL AS canonical_tool_type,
+       u.reason AS status,
+       NULL AS exit_code,
+       u.reason AS message,
+       NULL AS preview,
+       u.entity_type,
+       u.entity_id,
+       NULL AS raw_record_id
+  FROM uncertainties u;
+CREATE VIEW IF NOT EXISTS model_usage AS
+WITH model_events AS (
+  SELECT s.source_tool,
+         s.project_id,
+         p.display_name AS project_name,
+         p.canonical_path AS project_path,
+         s.session_id,
+         NULL AS turn_id,
+         s.model_first AS model,
+         s.start_ts AS timestamp,
+         'session_first' AS observation_type
+    FROM sessions s
+    LEFT JOIN projects p ON p.project_id = s.project_id
+   WHERE s.model_first IS NOT NULL
+  UNION ALL
+  SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
+         s.session_id, NULL AS turn_id, s.model_last AS model, s.end_ts AS timestamp,
+         'session_last' AS observation_type
+    FROM sessions s
+    LEFT JOIN projects p ON p.project_id = s.project_id
+   WHERE s.model_last IS NOT NULL
+  UNION ALL
+  SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
+         t.session_id, t.turn_id, t.model, t.start_ts AS timestamp, 'turn' AS observation_type
+    FROM turns t
+    LEFT JOIN sessions s ON s.session_id = t.session_id
+    LEFT JOIN projects p ON p.project_id = s.project_id
+   WHERE t.model IS NOT NULL
+  UNION ALL
+  SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
+         m.session_id, m.turn_id, m.model, m.timestamp, 'message' AS observation_type
+    FROM messages m
+    LEFT JOIN sessions s ON s.session_id = m.session_id
+    LEFT JOIN projects p ON p.project_id = s.project_id
+   WHERE m.model IS NOT NULL
+)
+SELECT source_tool,
+       project_id,
+       project_name,
+       project_path,
+       model,
+       count(DISTINCT session_id) AS session_count,
+       count(DISTINCT turn_id) AS turn_count,
+       count(*) AS observation_count,
+       sum(CASE WHEN observation_type = 'message' THEN 1 ELSE 0 END) AS message_count,
+       min(timestamp) AS first_seen_ts,
+       max(timestamp) AS last_seen_ts
+  FROM model_events
+ GROUP BY source_tool, project_id, project_name, project_path, model;
+CREATE VIEW IF NOT EXISTS project_activity AS
+SELECT s.source_tool,
+       s.project_id,
+       COALESCE(p.display_name, s.cwd_initial, '(unknown)') AS project_name,
+       p.canonical_path AS project_path,
+       min(s.start_ts) AS first_session_ts,
+       max(COALESCE(s.end_ts, s.start_ts)) AS latest_session_ts,
+       count(DISTINCT s.session_id) AS session_count,
+       count(DISTINCT CASE WHEN s.timeline_confidence = 'low' THEN s.session_id END)
+         AS low_confidence_session_count,
+       count(DISTINCT t.turn_id) AS turn_count,
+       count(DISTINCT m.message_id) AS message_count,
+       count(DISTINCT tc.tool_call_id) AS tool_call_count,
+       count(DISTINCT tr.tool_result_id) AS tool_result_count,
+       count(DISTINCT CASE
+         WHEN tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
+         THEN tr.tool_result_id
+       END) AS tool_error_count,
+       count(DISTINCT sd.doc_id) AS search_doc_count
+  FROM sessions s
+  LEFT JOIN projects p ON p.project_id = s.project_id
+  LEFT JOIN turns t ON t.session_id = s.session_id
+  LEFT JOIN messages m ON m.session_id = s.session_id
+  LEFT JOIN tool_calls tc ON tc.session_id = s.session_id
+  LEFT JOIN tool_results tr ON tr.session_id = s.session_id
+  LEFT JOIN search_docs sd ON sd.session_id = s.session_id
+ GROUP BY s.source_tool, s.project_id, p.display_name, s.cwd_initial, p.canonical_path;
+`;
+// src/core/schema/sql/004_tantivy_checkpoint.ts
+var SQL_004_TANTIVY_CHECKPOINT = String.raw`
+ALTER TABLE search_index_status ADD COLUMN last_indexed_rowid INTEGER;
+ALTER TABLE search_index_status ADD COLUMN schema_fingerprint TEXT;
+`;
 // src/core/schema/migrate.ts
 var MIGRATIONS = [
   { version: 1, name: "init", sql: SQL_001_INIT },
-  { version: 2, name: "search_index_status", sql: SQL_002_SEARCH_INDEX_STATUS }
+  { version: 2, name: "search_index_status", sql: SQL_002_SEARCH_INDEX_STATUS },
+  { version: 3, name: "analytics_views", sql: SQL_003_ANALYTICS_VIEWS },
+  { version: 4, name: "tantivy_checkpoint", sql: SQL_004_TANTIVY_CHECKPOINT }
 ];
 function runMigrations(db) {
   db.exec(`
@@ -1613,52 +1697,1038 @@ async function initBundle(rootPath) {
   runMigrations(db);
   return { path: resolved, db, manifest, paths };
 }
-async function openBundle(rootPath) {
-  const resolved = path.resolve(rootPath);
-  const paths = bundlePaths(resolved);
-  const dirStat = await stat(resolved).catch(() => null);
-  if (!dirStat?.isDirectory()) {
-    throw new Error(`bundle path not found or not a directory: ${resolved}`);
+async function openBundle(rootPath) {
+  const resolved = path.resolve(rootPath);
+  const paths = bundlePaths(resolved);
+  const dirStat = await stat(resolved).catch(() => null);
+  if (!dirStat?.isDirectory()) {
+    throw new Error(`bundle path not found or not a directory: ${resolved}`);
+  }
+  if (!await exists(paths.manifest)) {
+    throw new Error(
+      `no manifest.json in ${resolved} \u2014 initialize first with \`prosa init --store ${resolved}\``
+    );
+  }
+  const manifest = JSON.parse(await readFile(paths.manifest, "utf8"));
+  await mkdir(paths.search, { recursive: true });
+  await mkdir(paths.tantivy, { recursive: true });
+  const db = openDb(paths.db);
+  runMigrations(db);
+  const currentVersion = currentSchemaVersion(db);
+  if (currentVersion !== PROSA_SCHEMA_VERSION) {
+    closeDb(db);
+    throw new Error(`schema version mismatch (db=${currentVersion}, code=${PROSA_SCHEMA_VERSION})`);
+  }
+  if (manifest.parser_version !== PROSA_PARSER_VERSION) {
+    manifest.parser_version = PROSA_PARSER_VERSION;
+    await writeFile(paths.manifest, `${JSON.stringify(manifest, null, 2)}
+`, "utf8");
+  }
+  return { path: resolved, db, manifest, paths };
+}
+async function openOrInitBundle(rootPath) {
+  const resolved = path.resolve(rootPath);
+  const paths = bundlePaths(resolved);
+  const dirStat = await stat(resolved).catch(() => null);
+  if (dirStat && !dirStat.isDirectory()) {
+    throw new Error(`bundle path not found or not a directory: ${resolved}`);
+  }
+  if (!dirStat || !await exists(paths.manifest)) {
+    return await initBundle(resolved);
+  }
+  return await openBundle(resolved);
+}
+function closeBundle(bundle) {
+  closeDb(bundle.db);
+}
+// src/services/analytics.ts
+init_limits();
+// src/services/export/parquet.ts
+import { mkdir as mkdir2, rm, writeFile as writeFile2 } from "fs/promises";
+import path2 from "path";
+import { DuckDBConnection } from "@duckdb/node-api";
+init_errors();
+var PARQUET_TABLES = [
+  "objects",
+  "source_files",
+  "import_batches",
+  "raw_records",
+  "import_errors",
+  "uncertainties",
+  "projects",
+  "sessions",
+  "turns",
+  "events",
+  "messages",
+  "content_blocks",
+  "tool_calls",
+  "tool_results",
+  "artifacts",
+  "edges",
+  "search_docs"
+];
+async function exportBundleParquet(options) {
+  const snapshot = await openBundleSnapshot(options.bundlePath);
+  const outDir = path2.resolve(options.outDir ?? snapshot.defaultOutDir);
+  await mkdir2(outDir, { recursive: true });
+  const files = Object.fromEntries(
+    PARQUET_TABLES.map((table) => [table, path2.join(outDir, `${table}.parquet`)])
+  );
+  const manifestPath = path2.join(outDir, "manifest.json");
+  for (const file of [...Object.values(files), manifestPath]) {
+    await rm(file, { force: true });
+  }
+  const connection = await createDuckDbConnection();
+  try {
+    await attachSqlite(connection, snapshot.dbPath);
+    for (const table of PARQUET_TABLES) {
+      await connection.run(
+        `COPY (SELECT * FROM prosa.${quoteIdentifier(table)}) TO ${sqlString(files[table])} (FORMAT parquet, COMPRESSION zstd, COMPRESSION_LEVEL 1, ROW_GROUP_SIZE 100000)`
+      );
+    }
+  } finally {
+    connection.closeSync();
+  }
+  const manifest = {
+    exported_at: (/* @__PURE__ */ new Date()).toISOString(),
+    source_db: snapshot.dbPath,
+    schema_version: snapshot.schemaVersion,
+    parser_version: snapshot.parserVersion,
+    tables: Object.fromEntries(
+      PARQUET_TABLES.map((table) => [
+        table,
+        {
+          file: path2.basename(files[table]),
+          rows: snapshot.counts[table]
+        }
+      ])
+    )
+  };
+  await writeFile2(manifestPath, `${JSON.stringify(manifest, null, 2)}
+`, "utf8");
+  return { outDir, manifestPath, files, counts: snapshot.counts };
+}
+async function queryDuckDbParquet(options) {
+  const parquetDir = path2.resolve(options.parquetDir);
+  const connection = await createDuckDbConnection();
+  try {
+    for (const table of PARQUET_TABLES) {
+      await connection.run(
+        `CREATE OR REPLACE VIEW ${quoteIdentifier(table)} AS SELECT * FROM read_parquet(${sqlString(
+          path2.join(parquetDir, `${table}.parquet`)
+        )})`
+      );
+    }
+    await createAnalyticsViews(connection);
+    const reader = await connection.runAndReadAll(options.sql);
+    return {
+      columns: reader.deduplicatedColumnNames(),
+      rows: reader.getRowObjectsJson()
+    };
+  } catch (error) {
+    if (isMissingParquetError(error)) {
+      throw new Error(
+        `Parquet export not found in ${parquetDir}; run \`prosa export parquet --store <path>\` first`
+      );
+    }
+    throw error;
+  } finally {
+    connection.closeSync();
+  }
+}
+async function createDuckDbConnection() {
+  return DuckDBConnection.create();
+}
+async function attachSqlite(connection, dbPath) {
+  try {
+    await connection.run("INSTALL sqlite");
+    await connection.run("LOAD sqlite");
+    await connection.run(`ATTACH ${sqlString(dbPath)} AS prosa (TYPE sqlite)`);
+  } catch (error) {
+    throw new Error(
+      `DuckDB could not attach prosa.sqlite via the sqlite extension: ${getErrorMessage(error)}`
+    );
+  }
+}
+async function createAnalyticsViews(connection) {
+  await connection.run(`
+    CREATE OR REPLACE VIEW session_facts AS
+    WITH turn_counts AS (
+      SELECT session_id, count(*) AS turn_count
+        FROM turns
+       GROUP BY session_id
+    ),
+    message_counts AS (
+      SELECT session_id,
+             count(*) AS message_count,
+             sum(CASE WHEN role = 'user' THEN 1 ELSE 0 END) AS user_message_count,
+             sum(CASE WHEN role = 'assistant' THEN 1 ELSE 0 END) AS assistant_message_count
+        FROM messages
+       GROUP BY session_id
+    ),
+    tool_call_counts AS (
+      SELECT session_id,
+             count(*) AS tool_call_count,
+             sum(CASE WHEN status = 'error' THEN 1 ELSE 0 END) AS tool_call_error_count
+        FROM tool_calls
+       GROUP BY session_id
+    ),
+    tool_result_counts AS (
+      SELECT session_id,
+             count(*) AS tool_result_count,
+             sum(CASE WHEN is_error = 1 OR (exit_code IS NOT NULL AND exit_code <> 0)
+                      THEN 1 ELSE 0 END) AS tool_result_error_count,
+             sum(COALESCE(duration_ms, 0)) AS tool_duration_ms
+        FROM tool_results
+       GROUP BY session_id
+    ),
+    search_doc_counts AS (
+      SELECT session_id, count(*) AS search_doc_count
+        FROM search_docs
+       WHERE session_id IS NOT NULL
+       GROUP BY session_id
+    )
+    SELECT s.session_id,
+           s.source_tool,
+           s.source_session_id,
+           s.project_id,
+           p.display_name AS project_name,
+           p.canonical_path AS project_path,
+           s.parent_session_id,
+           s.is_subagent,
+           s.agent_role,
+           s.agent_nickname,
+           s.title,
+           s.start_ts,
+           s.end_ts,
+           CASE
+             WHEN s.start_ts IS NOT NULL AND s.end_ts IS NOT NULL
+             THEN date_diff('millisecond', TRY_CAST(s.start_ts AS TIMESTAMP),
+                    TRY_CAST(s.end_ts AS TIMESTAMP)) / 1000.0
+             ELSE NULL
+           END AS duration_seconds,
+           s.cwd_initial,
+           s.git_branch_initial,
+           s.model_first,
+           s.model_last,
+           s.status,
+           s.timeline_confidence,
+           sf.path AS source_file_path,
+           COALESCE(tc.turn_count, 0) AS turn_count,
+           COALESCE(mc.message_count, 0) AS message_count,
+           COALESCE(mc.user_message_count, 0) AS user_message_count,
+           COALESCE(mc.assistant_message_count, 0) AS assistant_message_count,
+           COALESCE(tcc.tool_call_count, 0) AS tool_call_count,
+           COALESCE(trc.tool_result_count, 0) AS tool_result_count,
+           COALESCE(tcc.tool_call_error_count, 0)
+             + COALESCE(trc.tool_result_error_count, 0) AS tool_error_count,
+           COALESCE(trc.tool_duration_ms, 0) AS tool_duration_ms,
+           COALESCE(sdc.search_doc_count, 0) AS search_doc_count
+      FROM sessions s
+      LEFT JOIN projects p ON p.project_id = s.project_id
+      LEFT JOIN raw_records rr ON rr.raw_record_id = s.raw_record_id
+      LEFT JOIN source_files sf ON sf.source_file_id = rr.source_file_id
+      LEFT JOIN turn_counts tc ON tc.session_id = s.session_id
+      LEFT JOIN message_counts mc ON mc.session_id = s.session_id
+      LEFT JOIN tool_call_counts tcc ON tcc.session_id = s.session_id
+      LEFT JOIN tool_result_counts trc ON trc.session_id = s.session_id
+      LEFT JOIN search_doc_counts sdc ON sdc.session_id = s.session_id
+  `);
+  await connection.run(`
+    CREATE OR REPLACE VIEW tool_usage_facts AS
+    WITH result_rollup AS (
+      SELECT tool_call_id,
+             session_id,
+             count(*) AS tool_result_count,
+             max(status) AS result_status,
+             max(is_error) AS is_error,
+             min(exit_code) AS exit_code,
+             sum(COALESCE(duration_ms, 0)) AS duration_ms,
+             max(preview) AS preview
+        FROM tool_results
+       GROUP BY tool_call_id, session_id
+    )
+    SELECT tc.tool_call_id,
+           tc.session_id,
+           s.source_tool,
+           s.source_session_id,
+           s.project_id,
+           p.display_name AS project_name,
+           p.canonical_path AS project_path,
+           tc.turn_id,
+           tc.message_id,
+           tc.event_id,
+           tc.source_call_id,
+           tc.tool_name,
+           tc.canonical_tool_type,
+           tc.command,
+           tc.cwd,
+           tc.path,
+           tc.query,
+           tc.timestamp_start,
+           tc.timestamp_end,
+           CASE
+             WHEN tc.timestamp_start IS NOT NULL AND tc.timestamp_end IS NOT NULL
+             THEN date_diff('millisecond', TRY_CAST(tc.timestamp_start AS TIMESTAMP),
+                    TRY_CAST(tc.timestamp_end AS TIMESTAMP)) / 1000.0
+             ELSE NULL
+           END AS call_duration_seconds,
+           tc.status AS call_status,
+           rr.result_status,
+           COALESCE(rr.is_error, 0) AS is_error,
+           rr.exit_code,
+           rr.duration_ms AS result_duration_ms,
+           COALESCE(rr.tool_result_count, 0) AS tool_result_count,
+           rr.preview,
+           tc.raw_record_id
+      FROM tool_calls tc
+      LEFT JOIN sessions s ON s.session_id = tc.session_id
+      LEFT JOIN projects p ON p.project_id = s.project_id
+      LEFT JOIN result_rollup rr ON rr.tool_call_id = tc.tool_call_id
+  `);
+  await connection.run(`
+    CREATE OR REPLACE VIEW error_facts AS
+    SELECT 'tool_result:' || tr.tool_result_id AS error_id,
+           'tool_result' AS error_category,
+           s.source_tool,
+           s.project_id,
+           p.display_name AS project_name,
+           tr.session_id,
+           COALESCE(tc.timestamp_end, tc.timestamp_start) AS timestamp,
+           tc.tool_name,
+           tc.canonical_tool_type,
+           COALESCE(tr.status, tc.status) AS status,
+           tr.exit_code,
+           NULL AS message,
+           tr.preview,
+           NULL AS entity_type,
+           NULL AS entity_id,
+           tr.raw_record_id
+      FROM tool_results tr
+      LEFT JOIN tool_calls tc ON tc.tool_call_id = tr.tool_call_id
+      LEFT JOIN sessions s ON s.session_id = tr.session_id
+      LEFT JOIN projects p ON p.project_id = s.project_id
+     WHERE tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
+    UNION ALL
+    SELECT 'import_error:' || CAST(ie.error_id AS VARCHAR) AS error_id,
+           'import_error' AS error_category,
+           COALESCE(rr.source_tool, ib.source_tool) AS source_tool,
+           NULL AS project_id,
+           NULL AS project_name,
+           NULL AS session_id,
+           ie.occurred_at AS timestamp,
+           NULL AS tool_name,
+           NULL AS canonical_tool_type,
+           ie.kind AS status,
+           NULL AS exit_code,
+           ie.message,
+           NULL AS preview,
+           NULL AS entity_type,
+           NULL AS entity_id,
+           ie.raw_record_id
+      FROM import_errors ie
+      LEFT JOIN import_batches ib ON ib.batch_id = ie.batch_id
+      LEFT JOIN raw_records rr ON rr.raw_record_id = ie.raw_record_id
+    UNION ALL
+    SELECT 'uncertainty:' || CAST(u.uncertainty_id AS VARCHAR) AS error_id,
+           'uncertainty' AS error_category,
+           NULL AS source_tool,
+           NULL AS project_id,
+           NULL AS project_name,
+           CASE WHEN u.entity_type = 'session' THEN u.entity_id ELSE NULL END AS session_id,
+           NULL AS timestamp,
+           NULL AS tool_name,
+           NULL AS canonical_tool_type,
+           u.reason AS status,
+           NULL AS exit_code,
+           u.reason AS message,
+           NULL AS preview,
+           u.entity_type,
+           u.entity_id,
+           NULL AS raw_record_id
+      FROM uncertainties u
+  `);
+  await connection.run(`
+    CREATE OR REPLACE VIEW model_usage AS
+    WITH model_events AS (
+      SELECT s.source_tool,
+             s.project_id,
+             p.display_name AS project_name,
+             p.canonical_path AS project_path,
+             s.session_id,
+             NULL AS turn_id,
+             s.model_first AS model,
+             s.start_ts AS timestamp,
+             'session_first' AS observation_type
+        FROM sessions s
+        LEFT JOIN projects p ON p.project_id = s.project_id
+       WHERE s.model_first IS NOT NULL
+      UNION ALL
+      SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
+             s.session_id, NULL AS turn_id, s.model_last AS model, s.end_ts AS timestamp,
+             'session_last' AS observation_type
+        FROM sessions s
+        LEFT JOIN projects p ON p.project_id = s.project_id
+       WHERE s.model_last IS NOT NULL
+      UNION ALL
+      SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
+             t.session_id, t.turn_id, t.model, t.start_ts AS timestamp, 'turn' AS observation_type
+        FROM turns t
+        LEFT JOIN sessions s ON s.session_id = t.session_id
+        LEFT JOIN projects p ON p.project_id = s.project_id
+       WHERE t.model IS NOT NULL
+      UNION ALL
+      SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
+             m.session_id, m.turn_id, m.model, m.timestamp, 'message' AS observation_type
+        FROM messages m
+        LEFT JOIN sessions s ON s.session_id = m.session_id
+        LEFT JOIN projects p ON p.project_id = s.project_id
+       WHERE m.model IS NOT NULL
+    )
+    SELECT source_tool,
+           project_id,
+           project_name,
+           project_path,
+           model,
+           count(DISTINCT session_id) AS session_count,
+           count(DISTINCT turn_id) AS turn_count,
+           count(*) AS observation_count,
+           sum(CASE WHEN observation_type = 'message' THEN 1 ELSE 0 END) AS message_count,
+           min(timestamp) AS first_seen_ts,
+           max(timestamp) AS last_seen_ts
+      FROM model_events
+     GROUP BY source_tool, project_id, project_name, project_path, model
+  `);
+  await connection.run(`
+    CREATE OR REPLACE VIEW project_activity AS
+    SELECT s.source_tool,
+           s.project_id,
+           COALESCE(p.display_name, s.cwd_initial, '(unknown)') AS project_name,
+           p.canonical_path AS project_path,
+           min(s.start_ts) AS first_session_ts,
+           max(COALESCE(s.end_ts, s.start_ts)) AS latest_session_ts,
+           count(DISTINCT s.session_id) AS session_count,
+           count(DISTINCT CASE WHEN s.timeline_confidence = 'low' THEN s.session_id END)
+             AS low_confidence_session_count,
+           count(DISTINCT t.turn_id) AS turn_count,
+           count(DISTINCT m.message_id) AS message_count,
+           count(DISTINCT tc.tool_call_id) AS tool_call_count,
+           count(DISTINCT tr.tool_result_id) AS tool_result_count,
+           count(DISTINCT CASE
+             WHEN tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
+             THEN tr.tool_result_id
+           END) AS tool_error_count,
+           count(DISTINCT sd.doc_id) AS search_doc_count
+      FROM sessions s
+      LEFT JOIN projects p ON p.project_id = s.project_id
+      LEFT JOIN turns t ON t.session_id = s.session_id
+      LEFT JOIN messages m ON m.session_id = s.session_id
+      LEFT JOIN tool_calls tc ON tc.session_id = s.session_id
+      LEFT JOIN tool_results tr ON tr.session_id = s.session_id
+      LEFT JOIN search_docs sd ON sd.session_id = s.session_id
+     GROUP BY s.source_tool, s.project_id, p.display_name, s.cwd_initial, p.canonical_path
+  `);
+}
+async function openBundleSnapshot(bundlePath) {
+  const bundle = await openBundle(bundlePath);
+  try {
+    const counts = Object.fromEntries(
+      PARQUET_TABLES.map((table) => {
+        const row = bundle.db.prepare(`SELECT count(*) AS n FROM ${quoteIdentifier(table)}`).get();
+        return [table, row?.n ?? 0];
+      })
+    );
+    return {
+      dbPath: bundle.paths.db,
+      schemaVersion: bundle.manifest.schema_version,
+      parserVersion: bundle.manifest.parser_version,
+      defaultOutDir: bundle.paths.parquet,
+      counts
+    };
+  } finally {
+    closeBundle(bundle);
+  }
+}
+function quoteIdentifier(value) {
+  return `"${value.replace(/"/g, '""')}"`;
+}
+function sqlString(value) {
+  return `'${value.replace(/'/g, "''")}'`;
+}
+function isMissingParquetError(error) {
+  const message = getErrorMessage(error);
+  return /No files found|does not exist|not found/i.test(message) && /\.parquet/i.test(message);
+}
+// src/services/analytics.ts
+var ANALYTICS_REPORTS = ["sessions", "tools", "errors", "models", "projects"];
+async function runAnalyticsReport(options) {
+  return queryDuckDbParquet({
+    parquetDir: options.parquetDir,
+    sql: buildAnalyticsSql(options.report, options.filters ?? {}, "duckdb")
+  });
+}
+function runAnalyticsReportFromBundle(options) {
+  const sql = buildAnalyticsSql(options.report, options.filters ?? {}, "sqlite");
+  const stmt = options.bundle.db.prepare(sql);
+  const rows = stmt.all();
+  const columns = stmt.columns().map((column) => column.name);
+  return { columns, rows };
+}
+function buildAnalyticsSql(report, filters, dialect) {
+  switch (report) {
+    case "sessions":
+      return buildSessionsSql(filters, dialect);
+    case "tools":
+      return buildToolsSql(filters, dialect);
+    case "errors":
+      return buildErrorsSql(filters, dialect);
+    case "models":
+      return buildModelsSql(filters, dialect);
+    case "projects":
+      return buildProjectsSql(filters, dialect);
+  }
+}
+function buildSessionsSql(filters, dialect) {
+  const where = buildWhere([
+    sourceFilter(filters),
+    timeFilter("start_ts", filters),
+    projectFilter(filters, dialect),
+    filters.sessionId ? `session_id = ${sqlString2(filters.sessionId)}` : null,
+    filters.sourcePathSubstring ? `source_file_path LIKE ${sqlString2(`%${escapeLike(filters.sourcePathSubstring)}%`)} ESCAPE '\\'` : null
+  ]);
+  return `
+    SELECT start_ts, source_tool, project_name, source_file_path, session_id,
+           source_session_id, model_last, duration_seconds,
+           message_count, tool_call_count, tool_result_count, tool_error_count,
+           tool_duration_ms, timeline_confidence, title
+      FROM session_facts
+      ${where}
+     ORDER BY start_ts DESC NULLS LAST
+     LIMIT ${limit(filters)}
+  `;
+}
+function buildToolsSql(filters, dialect) {
+  const where = buildWhere([
+    sourceFilter(filters),
+    timeFilter("timestamp_start", filters),
+    projectFilter(filters, dialect),
+    filters.toolName ? `tool_name = ${sqlString2(filters.toolName)}` : null,
+    filters.canonicalType ? `canonical_tool_type = ${sqlString2(filters.canonicalType)}` : null,
+    filters.errorsOnly ? `(is_error = 1 OR call_status = 'error')` : null
+  ]);
+  return `
+    SELECT tool_name, canonical_tool_type, source_tool, project_name,
+           count(*) AS call_count,
+           sum(CASE WHEN is_error = 1 OR call_status = 'error' THEN 1 ELSE 0 END) AS error_count,
+           round(avg(result_duration_ms), 3) AS avg_result_duration_ms,
+           max(timestamp_start) AS latest_ts
+      FROM tool_usage_facts
+      ${where}
+     GROUP BY tool_name, canonical_tool_type, source_tool, project_name
+     ORDER BY call_count DESC, error_count DESC, tool_name ASC
+     LIMIT ${limit(filters)}
+  `;
+}
+function buildErrorsSql(filters, dialect) {
+  const where = buildWhere([
+    sourceFilter(filters),
+    timeFilter("timestamp", filters),
+    projectFilter(filters, dialect),
+    filters.toolName ? `tool_name = ${sqlString2(filters.toolName)}` : null,
+    filters.category ? `error_category = ${sqlString2(filters.category)}` : null
+  ]);
+  return `
+    SELECT timestamp, error_category, source_tool, project_name, session_id,
+           tool_name, status, exit_code, message, preview
+      FROM error_facts
+      ${where}
+     ORDER BY timestamp DESC NULLS LAST, error_id DESC
+     LIMIT ${limit(filters)}
+  `;
+}
+function buildModelsSql(filters, dialect) {
+  const where = buildWhere([
+    sourceFilter(filters),
+    rangeOverlapFilter("first_seen_ts", "last_seen_ts", filters),
+    projectFilter(filters, dialect),
+    filters.model ? `model = ${sqlString2(filters.model)}` : null
+  ]);
+  return `
+    SELECT model, source_tool, project_name, session_count, turn_count,
+           message_count, observation_count, first_seen_ts, last_seen_ts
+      FROM model_usage
+      ${where}
+     ORDER BY session_count DESC, observation_count DESC, model ASC
+     LIMIT ${limit(filters)}
+  `;
+}
+function buildProjectsSql(filters, dialect) {
+  const where = buildWhere([
+    sourceFilter(filters),
+    rangeOverlapFilter("first_session_ts", "latest_session_ts", filters),
+    projectFilter(filters, dialect)
+  ]);
+  return `
+    SELECT latest_session_ts, source_tool, project_name, project_path,
+           session_count, message_count, tool_call_count, tool_error_count,
+           low_confidence_session_count
+      FROM project_activity
+      ${where}
+     ORDER BY latest_session_ts DESC NULLS LAST, session_count DESC, project_name ASC
+     LIMIT ${limit(filters)}
+  `;
+}
+function sourceFilter(filters) {
+  return filters.source ? `source_tool = ${sqlString2(filters.source)}` : null;
+}
+function timeFilter(column, filters) {
+  const filtersSql = [];
+  if (filters.since)
+    filtersSql.push(`(${column} IS NULL OR ${column} >= ${sqlString2(filters.since)})`);
+  if (filters.until)
+    filtersSql.push(`(${column} IS NULL OR ${column} < ${sqlString2(filters.until)})`);
+  return filtersSql.length ? filtersSql.join(" AND ") : null;
+}
+function rangeOverlapFilter(firstColumn, lastColumn, filters) {
+  const filtersSql = [];
+  if (filters.since) {
+    filtersSql.push(`(${lastColumn} IS NULL OR ${lastColumn} >= ${sqlString2(filters.since)})`);
+  }
+  if (filters.until) {
+    filtersSql.push(`(${firstColumn} IS NULL OR ${firstColumn} < ${sqlString2(filters.until)})`);
+  }
+  return filtersSql.length ? filtersSql.join(" AND ") : null;
+}
+function projectFilter(filters, dialect) {
+  if (!filters.project) return null;
+  const exact = sqlString2(filters.project);
+  const like = sqlString2(`%${escapeLike(filters.project)}%`);
+  const op = dialect === "duckdb" ? "ILIKE" : "LIKE";
+  return `(project_id = ${exact} OR project_name ${op} ${like} ESCAPE '\\' OR project_path ${op} ${like} ESCAPE '\\')`;
+}
+function buildWhere(filters) {
+  const active = filters.filter((filter) => Boolean(filter));
+  return active.length ? `WHERE ${active.join(" AND ")}` : "";
+}
+function limit(filters) {
+  const value = Number.isFinite(filters.limit) ? filters.limit : void 0;
+  return clampLimit(value, { max: 500, fallback: 50 });
+}
+function sqlString2(value) {
+  return `'${value.replace(/'/g, "''")}'`;
+}
+function escapeLike(value) {
+  return value.replace(/[\\%_]/g, (match) => `\\${match}`);
+}
+// src/cli/bundle.ts
+import path3 from "path";
+async function withBundle(storePath, fn) {
+  const bundle = await openBundle(path3.resolve(storePath));
+  try {
+    return await fn(bundle);
+  } finally {
+    closeBundle(bundle);
+  }
+}
+// src/cli/output.ts
+var OUTPUT_FORMATS = ["interactive", "table", "json", "csv"];
+var COL_SEPARATOR = "  ";
+var RULE_CHAR = "-";
+function parseOutputFormat(value, fallback) {
+  if (value === void 0) return fallback;
+  if (OUTPUT_FORMATS.includes(value)) return value;
+  throw new Error(
+    `invalid --output-format: ${value} (expected one of ${OUTPUT_FORMATS.join(", ")})`
+  );
+}
+function printRows(rows, opts) {
+  switch (opts.format) {
+    case "json":
+      printJson(rows, opts);
+      return;
+    case "csv":
+      printCsv(rows, opts);
+      return;
+    case "table":
+    case "interactive":
+      printTable(rows, opts);
+      return;
+  }
+}
+function printJson(rows, opts) {
+  const out = opts.meta ? { ...opts.meta, rows } : rows;
+  process.stdout.write(`${JSON.stringify(out, null, 2)}
+`);
+}
+function printCsv(rows, opts) {
+  const columns = opts.columns;
+  process.stdout.write(`${columns.map(csvField).join(",")}
+`);
+  for (const row of rows) {
+    const record = row;
+    const line = columns.map((column) => csvField(formatCell(record[column]))).join(",");
+    process.stdout.write(`${line}
+`);
+  }
+}
+function csvField(value) {
+  if (/[",\n]/.test(value)) return `"${value.replace(/"/g, '""')}"`;
+  return value;
+}
+function printTable(rows, opts) {
+  const columns = opts.columns;
+  const widths = columns.map((column) => column.length);
+  const cells = rows.map((row) => {
+    const record = row;
+    return columns.map((column, index) => {
+      const text = formatCell(record[column]);
+      const width = widths[index] ?? 0;
+      if (text.length > width) widths[index] = text.length;
+      return text;
+    });
+  });
+  const header = columns.map((column, index) => column.padEnd(widths[index] ?? 0)).join(COL_SEPARATOR);
+  const rule = columns.map((_, index) => RULE_CHAR.repeat(widths[index] ?? 0)).join(COL_SEPARATOR);
+  process.stdout.write(`${header}
+${rule}
+`);
+  for (const cellRow of cells) {
+    const line = cellRow.map((cell, index) => cell.padEnd(widths[index] ?? 0)).join(COL_SEPARATOR);
+    process.stdout.write(`${line}
+`);
+  }
+}
+function formatCell(value) {
+  if (value == null) return "";
+  if (typeof value === "string") return value;
+  if (typeof value === "number" || typeof value === "boolean") return String(value);
+  return JSON.stringify(value);
+}
+// src/core/domain/types.ts
+var SOURCE_TOOLS = ["cursor", "codex", "claude", "gemini"];
+// src/cli/parsers.ts
+function parseSearchEngine(value) {
+  if (value === "fts5" || value === "tantivy") return value;
+  throw new Error(`invalid search engine: ${value} (expected fts5 or tantivy)`);
+}
+function parseMcpTransport(value) {
+  if (value === "stdio" || value === "http") return value;
+  throw new Error(`invalid transport: ${value} (expected stdio or http)`);
+}
+function parseSourceTool(value) {
+  if (value === void 0) return void 0;
+  if (SOURCE_TOOLS.includes(value)) return value;
+  throw new Error(`invalid source tool: ${value} (expected one of ${SOURCE_TOOLS.join(", ")})`);
+}
+// src/cli/commands/analytics.ts
+function analyticsCommand() {
+  const command = new Command("analytics").description(
+    "Run high-level analytics reports over exported Parquet files."
+  );
+  command.addCommand(reportCommand("sessions", "Summarize sessions by source, project and model."));
+  command.addCommand(reportCommand("tools", "Summarize tool usage, status, duration and errors."));
+  command.addCommand(
+    reportCommand("errors", "List import errors, failed tool results and uncertainties.")
+  );
+  command.addCommand(reportCommand("models", "Summarize model usage by source, project and time."));
+  command.addCommand(
+    reportCommand("projects", "Summarize project activity and operational counts.")
+  );
+  return command;
+}
+function reportCommand(report, description) {
+  const command = addCommonOptions(new Command(report).description(description));
+  if (report === "tools") {
+    command.option("--tool-name <name>", "filter by exact tool name").option("--canonical-type <type>", "filter by canonical tool type").option("--errors-only", "only include tool calls with errors");
+  }
+  if (report === "errors") {
+    command.option("--tool-name <name>", "filter by exact tool name").option("--category <category>", "filter by error category");
+  }
+  if (report === "models") {
+    command.option("--model <model>", "filter by exact model name");
+  }
+  if (report === "projects") {
+    command.option("--project <text>", "filter by project id, name, or path substring");
+  }
+  if (report === "sessions") {
+    command.option("--project <text>", "filter by project id, name, or path substring");
+  }
+  return command.action(async (options) => {
+    const format = parseOutputFormat(options.outputFormat, "table");
+    const parquetDir = await resolveParquetDir(options);
+    const filters = buildFilters(options);
+    const result = await runAnalyticsReport({ parquetDir, report, filters });
+    printRows(result.rows, {
+      format,
+      columns: result.columns,
+      meta: { report, count: result.rows.length }
+    });
+  });
+}
+function addCommonOptions(command) {
+  return command.option("--store <path>", "bundle directory", defaultBundlePath()).option("--parquet-dir <path>", "Parquet directory (default: <store>/parquet)").option("--refresh", "export Parquet before running the report").option("--source <tool>", "filter by source tool: cursor|codex|claude|gemini").option("--since <iso>", "lower timestamp bound (inclusive)").option("--until <iso>", "upper timestamp bound (exclusive)").option("--limit <n>", "maximum rows", "50").option("--output-format <fmt>", "interactive|table|json|csv", "table");
+}
+async function resolveParquetDir(options) {
+  const storePath = path4.resolve(options.store);
+  const outDir = options.parquetDir ? path4.resolve(options.parquetDir) : void 0;
+  if (options.refresh) {
+    const result = await exportBundleParquet({ bundlePath: storePath, outDir });
+    return result.outDir;
+  }
+  return outDir ?? await withBundle(storePath, (bundle) => bundle.paths.parquet);
+}
+function buildFilters(options) {
+  return {
+    source: parseSourceTool(options.source),
+    since: options.since,
+    until: options.until,
+    limit: Number.parseInt(options.limit, 10),
+    toolName: options.toolName,
+    canonicalType: options.canonicalType,
+    errorsOnly: options.errorsOnly,
+    category: options.category,
+    model: options.model,
+    project: options.project
+  };
+}
+// src/cli/commands/compile.ts
+import { Command as Command2 } from "commander";
+// src/services/compile.ts
+init_errors();
+import os2 from "os";
+import path16 from "path";
+// src/importers/claude/index.ts
+import { readFile as readFile4 } from "fs/promises";
+import path8 from "path";
+// src/core/cas/index.ts
+init_db();
+import { mkdir as mkdir3, readFile as readFile2, writeFile as writeFile3 } from "fs/promises";
+import path5 from "path";
+// src/core/cas/compress.ts
+import { compress as zstdCompress, decompress as zstdDecompress } from "zstd-napi";
+var COMPRESS_THRESHOLD_BYTES = 256;
+var ZSTD_LEVEL = 3;
+function compressBytes(input) {
+  if (input.byteLength < COMPRESS_THRESHOLD_BYTES) {
+    return { bytes: Buffer.from(input), compression: "none" };
+  }
+  const out = zstdCompress(Buffer.from(input), { compressionLevel: ZSTD_LEVEL });
+  return { bytes: out, compression: "zstd" };
+}
+function decompressBytes(input, compression) {
+  if (compression === "none") return input;
+  return zstdDecompress(input);
+}
+// src/core/cas/hash.ts
+import { createHash } from "crypto";
+import { blake3 } from "@noble/hashes/blake3";
+import { bytesToHex } from "@noble/hashes/utils";
+function blake3Hex(bytes) {
+  return bytesToHex(blake3(bytes));
+}
+function sha256Hex(bytes) {
+  return createHash("sha256").update(bytes).digest("hex");
+}
+function objectIdFromHash(hashHex) {
+  return `blake3:${hashHex}`;
+}
+function objectStoragePath(hashHex, compression) {
+  const ext = compression === "zstd" ? ".zst" : ".bin";
+  const a = hashHex.slice(0, 2);
+  const b = hashHex.slice(2, 4);
+  return `objects/blake3/${a}/${b}/${hashHex}${ext}`;
+}
+// src/core/cas/index.ts
+var ensuredDirs = /* @__PURE__ */ new Set();
+async function ensureDir(absoluteDir) {
+  if (ensuredDirs.has(absoluteDir)) return;
+  await mkdir3(absoluteDir, { recursive: true });
+  ensuredDirs.add(absoluteDir);
+}
+async function putBytes(bundle, bytes, options = {}) {
+  const hash = blake3Hex(bytes);
+  const objectId = objectIdFromHash(hash);
+  const existing = prepare(
+    bundle.db,
+    `SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
+            compression, mime_type, encoding, storage_path, created_at
+       FROM objects WHERE object_id = ?`
+  ).get(objectId);
+  if (existing) return objectId;
+  const { bytes: stored, compression } = compressBytes(bytes);
+  const storagePath = objectStoragePath(hash, compression);
+  const absolutePath = path5.join(bundle.path, storagePath);
+  await ensureDir(path5.dirname(absolutePath));
+  await writeFile3(absolutePath, stored);
+  prepare(
+    bundle.db,
+    `INSERT INTO objects (
+       object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
+       compression, mime_type, encoding, storage_path, created_at
+     ) VALUES (?, 'blake3', ?, ?, ?, ?, ?, ?, ?, ?)`
+  ).run(
+    objectId,
+    hash,
+    bytes.byteLength,
+    compression === "zstd" ? stored.byteLength : null,
+    compression,
+    options.mimeType ?? null,
+    options.encoding ?? null,
+    storagePath,
+    (/* @__PURE__ */ new Date()).toISOString()
+  );
+  return objectId;
+}
+async function putJson(bundle, value) {
+  const text = JSON.stringify(value);
+  return putBytes(bundle, Buffer.from(text, "utf8"), {
+    mimeType: "application/json",
+    encoding: "utf-8"
+  });
+}
+async function getBytes(bundle, objectId) {
+  const meta = prepare(
+    bundle.db,
+    `SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
+            compression, mime_type, encoding, storage_path, created_at
+       FROM objects WHERE object_id = ?`
+  ).get(objectId);
+  if (!meta) {
+    throw new Error(`object not found: ${objectId}`);
+  }
+  const buf = await readFile2(path5.join(bundle.path, meta.storage_path));
+  return decompressBytes(buf, meta.compression);
+}
+async function getText(bundle, objectId) {
+  const buf = await getBytes(bundle, objectId);
+  return buf.toString("utf8");
+}
+function createPendingObjects() {
+  return { byId: /* @__PURE__ */ new Map() };
+}
+function stageBytes(pending, bytes, options = {}) {
+  const buf = Buffer.isBuffer(bytes) ? bytes : Buffer.from(bytes);
+  const hash = blake3Hex(buf);
+  const objectId = objectIdFromHash(hash);
+  if (!pending.byId.has(objectId)) {
+    pending.byId.set(objectId, {
+      objectId,
+      hash,
+      bytes: buf,
+      mimeType: options.mimeType ?? null,
+      encoding: options.encoding ?? null
+    });
+  }
+  return objectId;
+}
+function stageText(pending, text, options = {}) {
+  return stageBytes(pending, Buffer.from(text, "utf8"), {
+    mimeType: options.mimeType ?? "text/plain; charset=utf-8",
+    encoding: "utf-8"
+  });
+}
+function stageJson(pending, value) {
+  return stageBytes(pending, Buffer.from(JSON.stringify(value), "utf8"), {
+    mimeType: "application/json",
+    encoding: "utf-8"
+  });
+}
+async function flushPendingObjects(bundle, pending) {
+  if (pending.byId.size === 0) return;
+  const ids = [...pending.byId.keys()];
+  const existingIds = queryExistingObjectIds(bundle, ids);
+  const toWrite = [];
+  for (const obj of pending.byId.values()) {
+    if (existingIds.has(obj.objectId)) continue;
+    const { bytes: compressedBytes, compression } = compressBytes(obj.bytes);
+    const storagePath = objectStoragePath(obj.hash, compression);
+    toWrite.push({
+      staged: obj,
+      compression,
+      compressedBytes,
+      storagePath,
+      absolutePath: path5.join(bundle.path, storagePath)
+    });
   }
-  if (!await exists(paths.manifest)) {
-    throw new Error(
-      `no manifest.json in ${resolved} \u2014 initialize first with \`prosa init --store ${resolved}\``
-    );
+  if (toWrite.length > 0) {
+    await writeFilesParallel(toWrite);
   }
-  const manifest = JSON.parse(await readFile(paths.manifest, "utf8"));
-  await mkdir(paths.search, { recursive: true });
-  await mkdir(paths.tantivy, { recursive: true });
-  const db = openDb(paths.db);
-  runMigrations(db);
-  const currentVersion = currentSchemaVersion(db);
-  if (currentVersion !== PROSA_SCHEMA_VERSION) {
-    closeDb(db);
-    throw new Error(`schema version mismatch (db=${currentVersion}, code=${PROSA_SCHEMA_VERSION})`);
+  const insertObject = prepare(
+    bundle.db,
+    `INSERT OR IGNORE INTO objects (
+       object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
+       compression, mime_type, encoding, storage_path, created_at
+     ) VALUES (?, 'blake3', ?, ?, ?, ?, ?, ?, ?, ?)`
+  );
+  const now = (/* @__PURE__ */ new Date()).toISOString();
+  for (const p of toWrite) {
+    insertObject.run(
+      p.staged.objectId,
+      p.staged.hash,
+      p.staged.bytes.byteLength,
+      p.compression === "zstd" ? p.compressedBytes.byteLength : null,
+      p.compression,
+      p.staged.mimeType,
+      p.staged.encoding,
+      p.storagePath,
+      now
+    );
   }
-  if (manifest.parser_version !== PROSA_PARSER_VERSION) {
-    manifest.parser_version = PROSA_PARSER_VERSION;
-    await writeFile(paths.manifest, `${JSON.stringify(manifest, null, 2)}
-`, "utf8");
+}
+function queryExistingObjectIds(bundle, ids) {
+  const found = /* @__PURE__ */ new Set();
+  if (ids.length === 0) return found;
+  const CHUNK = 500;
+  for (let start = 0; start < ids.length; start += CHUNK) {
+    const slice = ids.slice(start, start + CHUNK);
+    const placeholders = slice.map(() => "?").join(",");
+    const rows = bundle.db.prepare(
+      `SELECT object_id FROM objects WHERE object_id IN (${placeholders})`
+    ).all(...slice);
+    for (const row of rows) found.add(row.object_id);
   }
-  return { path: resolved, db, manifest, paths };
+  return found;
 }
-function closeBundle(bundle) {
-  closeDb(bundle.db);
+var FS_WRITE_CONCURRENCY = 16;
+async function writeFilesParallel(tasks) {
+  let cursor = 0;
+  const workers = [];
+  const limit2 = Math.min(FS_WRITE_CONCURRENCY, tasks.length);
+  for (let w = 0; w < limit2; w++) {
+    workers.push(
+      (async () => {
+        while (true) {
+          const i = cursor++;
+          if (i >= tasks.length) return;
+          const task = tasks[i];
+          await ensureDir(path5.dirname(task.absolutePath));
+          await writeFile3(task.absolutePath, task.compressedBytes);
+        }
+      })()
+    );
+  }
+  await Promise.all(workers);
 }
-// src/services/compile.ts
-init_errors();
-import os2 from "os";
-import path14 from "path";
 // src/importers/claude/index.ts
-init_cas();
 init_db();
-import { readFile as readFile4 } from "fs/promises";
-import path5 from "path";
 // src/core/domain/ids.ts
-init_hash();
 var ID_PREFIX_BYTES = 16;
 function tupleId(parts) {
   return sha256Hex(parts.join("\0")).slice(0, ID_PREFIX_BYTES * 2);
@@ -1704,7 +2774,6 @@ function importBatchId(sourceTool, startedAtIso) {
 init_errors();
 // src/core/ingest/batch.ts
-init_cas();
 init_db();
 function emptyCounts() {
   return {
@@ -1772,12 +2841,9 @@ async function recordError(bundle, batchId, args) {
 }
 // src/core/ingest/idempotency.ts
-init_compress();
-init_hash();
-init_cas();
+import { access as access2, readFile as readFile3, stat as stat2, writeFile as writeFile4 } from "fs/promises";
+import path6 from "path";
 init_db();
-import { access as access2, readFile as readFile3, stat as stat2, writeFile as writeFile3 } from "fs/promises";
-import path3 from "path";
 async function registerSourceFile(bundle, args) {
   const st = await stat2(args.absolutePath);
   const size = st.size;
@@ -1861,10 +2927,10 @@ async function preserveRawSourceBytes(bundle, bytes) {
   const objectId = objectIdFromHash(hash);
   const { bytes: stored, compression } = compressBytes(bytes);
   const storagePath = rawSourceStoragePath(hash, compression);
-  const absolutePath = path3.join(bundle.path, storagePath);
-  await ensureDir(path3.dirname(absolutePath));
+  const absolutePath = path6.join(bundle.path, storagePath);
+  await ensureDir(path6.dirname(absolutePath));
   if (!await fileExists(absolutePath)) {
-    await writeFile3(absolutePath, stored);
+    await writeFile4(absolutePath, stored);
   }
   const existing = prepare(
     bundle.db,
@@ -1906,12 +2972,12 @@ async function fileExists(filePath) {
 // src/importers/claude/discover.ts
 import { readdir } from "fs/promises";
-import path4 from "path";
+import path7 from "path";
 async function* discoverClaudeFiles(root) {
   const projectDirs = await readdirSafe(root);
   for (const project of projectDirs) {
     if (!project.isDirectory()) continue;
-    const projectRoot = path4.join(root, project.name);
+    const projectRoot = path7.join(root, project.name);
     yield* walkProject(projectRoot, project.name);
   }
 }
@@ -1920,7 +2986,7 @@ async function* walkProject(projectRoot, projectSlug) {
   for (const entry of entries) {
     if (entry.isFile() && entry.name.endsWith(".jsonl")) {
       yield {
-        filePath: path4.join(projectRoot, entry.name),
+        filePath: path7.join(projectRoot, entry.name),
         projectSlug,
         isSubagent: false,
         parentSessionId: null,
@@ -1930,18 +2996,18 @@ async function* walkProject(projectRoot, projectSlug) {
       continue;
     }
     if (entry.isDirectory()) {
-      const subagentsDir = path4.join(projectRoot, entry.name, "subagents");
+      const subagentsDir = path7.join(projectRoot, entry.name, "subagents");
       const subagentEntries = await readdirSafe(subagentsDir);
       for (const sub of subagentEntries) {
         if (!sub.isFile() || !sub.name.endsWith(".jsonl")) continue;
         if (!sub.name.startsWith("agent-")) continue;
         const agentId = sub.name.slice("agent-".length, -".jsonl".length);
-        const metaCandidate = path4.join(subagentsDir, `agent-${agentId}.meta.json`);
+        const metaCandidate = path7.join(subagentsDir, `agent-${agentId}.meta.json`);
         const metaExists = subagentEntries.some(
           (e) => e.isFile() && e.name === `agent-${agentId}.meta.json`
         );
         yield {
-          filePath: path4.join(subagentsDir, sub.name),
+          filePath: path7.join(subagentsDir, sub.name),
           projectSlug,
           isSubagent: true,
           parentSessionId: entry.name,
@@ -2061,7 +3127,7 @@ async function compileClaudeFile(bundle, batch, file, logger) {
   const counts = emptyFileCounts();
   const { row: sourceFile, alreadyKnown } = await registerSourceFile(bundle, {
     sourceTool: "claude",
-    absolutePath: path5.resolve(file.filePath),
+    absolutePath: path8.resolve(file.filePath),
     fileKind: "jsonl",
     workspaceHint: file.projectSlug
   });
@@ -2161,7 +3227,7 @@ async function compileClaudeFile(bundle, batch, file, logger) {
         pending.session.parent_session_id_pending = parentSid;
       }
     }
-    const sessionId2 = pending.session?.session_id ?? sessionId("claude", `unknown:${path5.basename(file.filePath)}`);
+    const sessionId2 = pending.session?.session_id ?? sessionId("claude", `unknown:${path8.basename(file.filePath)}`);
     const type = typeof parsed.type === "string" ? parsed.type : null;
     if (type === "user" || type === "assistant") {
       const msgRole = type === "user" ? "user" : "assistant";
@@ -2903,15 +3969,14 @@ function flushPending(bundle, pending, meta) {
 }
 // src/importers/codex/index.ts
-init_cas();
-init_db();
 import { readFile as readFile5 } from "fs/promises";
-import path7 from "path";
+import path10 from "path";
+init_db();
 init_errors();
 // src/importers/codex/discover.ts
 import { readdir as readdir2 } from "fs/promises";
-import path6 from "path";
+import path9 from "path";
 async function* discoverCodexSessions(root) {
   yield* walk(root);
 }
@@ -2923,7 +3988,7 @@ async function* walk(dir) {
     return;
   }
   for (const entry of entries) {
-    const full = path6.join(dir, entry.name);
+    const full = path9.join(dir, entry.name);
     if (entry.isDirectory()) {
       yield* walk(full);
     } else if (entry.isFile() && entry.name.endsWith(".jsonl")) {
@@ -3025,7 +4090,7 @@ async function compileCodexFile(bundle, batch, filePath, logger) {
   const counts = emptyFileCounts2();
   const { row: sourceFileRow, alreadyKnown } = await registerSourceFile(bundle, {
     sourceTool: "codex",
-    absolutePath: path7.resolve(filePath),
+    absolutePath: path10.resolve(filePath),
     fileKind: "jsonl"
   });
   if (alreadyKnown) {
@@ -3111,7 +4176,7 @@ async function compileCodexFile(bundle, batch, filePath, logger) {
     const payload = parsed.payload ?? {};
     if (type === "session_meta") {
       const meta = payload;
-      const sourceSessionId = meta.id ?? path7.basename(filePath, ".jsonl");
+      const sourceSessionId = meta.id ?? path10.basename(filePath, ".jsonl");
       const sessionId3 = sessionId("codex", sourceSessionId);
       if (!pending.session) {
         const sub = parseSubagent(meta.source);
@@ -3143,11 +4208,11 @@ async function compileCodexFile(bundle, batch, filePath, logger) {
       }
       continue;
     }
-    const sessionId2 = pending.session?.session_id ?? sessionId("codex", path7.basename(filePath, ".jsonl"));
+    const sessionId2 = pending.session?.session_id ?? sessionId("codex", path10.basename(filePath, ".jsonl"));
     if (!pending.session) {
       pending.session = {
         session_id: sessionId2,
-        source_session_id: path7.basename(filePath, ".jsonl"),
+        source_session_id: path10.basename(filePath, ".jsonl"),
         parent_session_id: null,
         is_subagent: 0,
         agent_role: null,
@@ -4044,25 +5109,24 @@ function flushPending2(bundle, pending, meta) {
 }
 // src/importers/cursor/index.ts
-init_cas();
-init_db();
-import path9 from "path";
+import path12 from "path";
 import Database2 from "better-sqlite3";
+init_db();
 init_errors();
 // src/importers/cursor/discover.ts
 import { readdir as readdir3 } from "fs/promises";
-import path8 from "path";
+import path11 from "path";
 async function* discoverCursorStores(root) {
   const workspaces = await readdirSafe2(root);
   for (const ws of workspaces) {
     if (!ws.isDirectory()) continue;
-    const wsPath = path8.join(root, ws.name);
+    const wsPath = path11.join(root, ws.name);
     const agents = await readdirSafe2(wsPath);
     for (const ag of agents) {
       if (!ag.isDirectory()) continue;
-      const dbPath = path8.join(wsPath, ag.name, "store.db");
-      const dbEntries = await readdirSafe2(path8.join(wsPath, ag.name));
+      const dbPath = path11.join(wsPath, ag.name, "store.db");
+      const dbEntries = await readdirSafe2(path11.join(wsPath, ag.name));
       const hasStoreDb = dbEntries.some((e) => e.isFile() && e.name === "store.db");
       if (!hasStoreDb) continue;
       yield {
@@ -4161,7 +5225,7 @@ async function compileCursorStore(bundle, batch, store, logger) {
   const counts = emptyFileCounts3();
   const { row: sourceFile, alreadyKnown } = await registerSourceFile(bundle, {
     sourceTool: "cursor",
-    absolutePath: path9.resolve(store.filePath),
+    absolutePath: path12.resolve(store.filePath),
     fileKind: "sqlite",
     workspaceHint: store.workspaceId
   });
@@ -4763,29 +5827,27 @@ function flushPending3(bundle, pending) {
 }
 // src/importers/gemini/index.ts
-init_hash();
-init_cas();
-init_db();
 import { readFile as readFile7 } from "fs/promises";
-import path11 from "path";
+import path14 from "path";
+init_db();
 init_errors();
 // src/importers/gemini/discover.ts
 import { readFile as readFile6, readdir as readdir4 } from "fs/promises";
-import path10 from "path";
+import path13 from "path";
 async function* discoverGeminiChats(root) {
   const entries = await readdirSafe3(root);
   for (const entry of entries) {
     if (!entry.isDirectory()) continue;
     if (entry.name === "bin") continue;
-    const projectRoot = await readProjectRoot(path10.join(root, entry.name));
-    const chatsDir = path10.join(root, entry.name, "chats");
+    const projectRoot = await readProjectRoot(path13.join(root, entry.name));
+    const chatsDir = path13.join(root, entry.name, "chats");
     const chatEntries = await readdirSafe3(chatsDir);
     for (const c of chatEntries) {
       if (!c.isFile()) continue;
       if (!c.name.startsWith("session-") || !c.name.endsWith(".json")) continue;
       yield {
-        filePath: path10.join(chatsDir, c.name),
+        filePath: path13.join(chatsDir, c.name),
         projectDir: entry.name,
         projectRoot
       };
@@ -4794,7 +5856,7 @@ async function* discoverGeminiChats(root) {
 }
 async function readProjectRoot(dir) {
   try {
-    const text = await readFile6(path10.join(dir, ".project_root"), "utf8");
+    const text = await readFile6(path13.join(dir, ".project_root"), "utf8");
     return text.replace(/\n+$/, "").trim() || null;
   } catch {
     return null;
@@ -4888,7 +5950,7 @@ async function compileGeminiFile(bundle, batch, file, logger) {
   const counts = emptyFileCounts4();
   const { row: sourceFile, alreadyKnown } = await registerSourceFile(bundle, {
     sourceTool: "gemini",
-    absolutePath: path11.resolve(file.filePath),
+    absolutePath: path14.resolve(file.filePath),
     fileKind: "json",
     workspaceHint: file.projectDir
   });
@@ -4941,7 +6003,7 @@ async function compileGeminiFile(bundle, batch, file, logger) {
     project: null,
     objects
   };
-  const sourceSid = parsed.sessionId ?? path11.basename(file.filePath, ".json");
+  const sourceSid = parsed.sessionId ?? path14.basename(file.filePath, ".json");
   const sessionPk = sessionId("gemini", sourceSid);
   if (file.projectRoot) {
     pending.project = {
@@ -5522,143 +6584,6 @@ function flushPending4(bundle, pending) {
   }
 }
-// src/services/export/parquet.ts
-import { mkdir as mkdir3, rm, writeFile as writeFile4 } from "fs/promises";
-import path12 from "path";
-import { DuckDBConnection } from "@duckdb/node-api";
-init_errors();
-var PARQUET_TABLES = [
-  "objects",
-  "source_files",
-  "import_batches",
-  "raw_records",
-  "import_errors",
-  "uncertainties",
-  "projects",
-  "sessions",
-  "turns",
-  "events",
-  "messages",
-  "content_blocks",
-  "tool_calls",
-  "tool_results",
-  "artifacts",
-  "edges",
-  "search_docs"
-];
-async function exportBundleParquet(options) {
-  const snapshot = await openBundleSnapshot(options.bundlePath);
-  const outDir = path12.resolve(options.outDir ?? snapshot.defaultOutDir);
-  await mkdir3(outDir, { recursive: true });
-  const files = Object.fromEntries(
-    PARQUET_TABLES.map((table) => [table, path12.join(outDir, `${table}.parquet`)])
-  );
-  const manifestPath = path12.join(outDir, "manifest.json");
-  for (const file of [...Object.values(files), manifestPath]) {
-    await rm(file, { force: true });
-  }
-  const connection = await createDuckDbConnection();
-  try {
-    await attachSqlite(connection, snapshot.dbPath);
-    for (const table of PARQUET_TABLES) {
-      await connection.run(
-        `COPY (SELECT * FROM prosa.${quoteIdentifier(table)}) TO ${sqlString(files[table])} (FORMAT parquet)`
-      );
-    }
-  } finally {
-    connection.closeSync();
-  }
-  const manifest = {
-    exported_at: (/* @__PURE__ */ new Date()).toISOString(),
-    source_db: snapshot.dbPath,
-    schema_version: snapshot.schemaVersion,
-    parser_version: snapshot.parserVersion,
-    tables: Object.fromEntries(
-      PARQUET_TABLES.map((table) => [
-        table,
-        {
-          file: path12.basename(files[table]),
-          rows: snapshot.counts[table]
-        }
-      ])
-    )
-  };
-  await writeFile4(manifestPath, `${JSON.stringify(manifest, null, 2)}
-`, "utf8");
-  return { outDir, manifestPath, files, counts: snapshot.counts };
-}
-async function queryDuckDbParquet(options) {
-  const parquetDir = path12.resolve(options.parquetDir);
-  const connection = await createDuckDbConnection();
-  try {
-    for (const table of PARQUET_TABLES) {
-      await connection.run(
-        `CREATE OR REPLACE VIEW ${quoteIdentifier(table)} AS SELECT * FROM read_parquet(${sqlString(
-          path12.join(parquetDir, `${table}.parquet`)
-        )})`
-      );
-    }
-    const reader = await connection.runAndReadAll(options.sql);
-    return {
-      columns: reader.deduplicatedColumnNames(),
-      rows: reader.getRowObjectsJson()
-    };
-  } catch (error) {
-    if (isMissingParquetError(error)) {
-      throw new Error(
-        `Parquet export not found in ${parquetDir}; run \`prosa export parquet --store <path>\` first`
-      );
-    }
-    throw error;
-  } finally {
-    connection.closeSync();
-  }
-}
-async function createDuckDbConnection() {
-  return DuckDBConnection.create();
-}
-async function attachSqlite(connection, dbPath) {
-  try {
-    await connection.run("INSTALL sqlite");
-    await connection.run("LOAD sqlite");
-    await connection.run(`ATTACH ${sqlString(dbPath)} AS prosa (TYPE sqlite)`);
-  } catch (error) {
-    throw new Error(
-      `DuckDB could not attach prosa.sqlite via the sqlite extension: ${getErrorMessage(error)}`
-    );
-  }
-}
-async function openBundleSnapshot(bundlePath) {
-  const bundle = await openBundle(bundlePath);
-  try {
-    const counts = Object.fromEntries(
-      PARQUET_TABLES.map((table) => {
-        const row = bundle.db.prepare(`SELECT count(*) AS n FROM ${quoteIdentifier(table)}`).get();
-        return [table, row?.n ?? 0];
-      })
-    );
-    return {
-      dbPath: bundle.paths.db,
-      schemaVersion: bundle.manifest.schema_version,
-      parserVersion: bundle.manifest.parser_version,
-      defaultOutDir: bundle.paths.parquet,
-      counts
-    };
-  } finally {
-    closeBundle(bundle);
-  }
-}
-function quoteIdentifier(value) {
-  return `"${value.replace(/"/g, '""')}"`;
-}
-function sqlString(value) {
-  return `'${value.replace(/'/g, "''")}'`;
-}
-function isMissingParquetError(error) {
-  const message = getErrorMessage(error);
-  return /No files found|does not exist|not found/i.test(message) && /\.parquet/i.test(message);
-}
 // src/services/compile.ts
 init_indexing();
 var COMPILE_PROVIDERS = [
@@ -5666,28 +6591,28 @@ var COMPILE_PROVIDERS = [
     name: "codex",
     description: "Import Codex CLI session histories into the bundle.",
     pathHelp: "root of Codex CLI sessions",
-    defaultSessionsPath: () => path14.join(os2.homedir(), ".codex", "sessions"),
+    defaultSessionsPath: () => path16.join(os2.homedir(), ".codex", "sessions"),
     compile: compileCodex
   },
   {
     name: "claude",
     description: "Import Claude Code project histories into the bundle.",
     pathHelp: "root of Claude Code projects",
-    defaultSessionsPath: () => path14.join(os2.homedir(), ".claude", "projects"),
+    defaultSessionsPath: () => path16.join(os2.homedir(), ".claude", "projects"),
     compile: compileClaude
   },
   {
     name: "gemini",
     description: "Import Gemini CLI session histories into the bundle.",
     pathHelp: "root of Gemini CLI tmp dir",
-    defaultSessionsPath: () => path14.join(os2.homedir(), ".gemini", "tmp"),
+    defaultSessionsPath: () => path16.join(os2.homedir(), ".gemini", "tmp"),
     compile: compileGemini
   },
   {
     name: "cursor",
     description: "Import Cursor agent stores into the bundle.",
     pathHelp: "root of Cursor agent stores",
-    defaultSessionsPath: () => path14.join(os2.homedir(), ".cursor", "chats"),
+    defaultSessionsPath: () => path16.join(os2.homedir(), ".cursor", "chats"),
     compile: compileCursor
   }
 ];
@@ -5700,20 +6625,20 @@ function getCompileProvider(source) {
 }
 function resolveCompilePath(p) {
   if (p === "~") return os2.homedir();
-  if (p.startsWith("~/")) return path14.join(os2.homedir(), p.slice(2));
-  return path14.resolve(p);
+  if (p.startsWith("~/")) return path16.join(os2.homedir(), p.slice(2));
+  return path16.resolve(p);
 }
 async function runCompileImports(options) {
-  const { bundle, providers, deferIndex, logger } = options;
+  const { bundle, providers, logger } = options;
+  const overwrite = options.overwrite === true;
   let importedAny = false;
   const summaries = [];
   let tantivy = null;
   let tantivyError = null;
+  let fts5Error = null;
   try {
-    if (deferIndex) {
-      logger?.info("disabling FTS5 triggers for deferred indexing");
-      disableFts5Triggers(bundle);
-    }
+    logger?.info("disabling FTS5 triggers for bulk rebuild");
+    disableFts5Triggers(bundle);
     for (const provider of providers) {
       const sourcePath = resolveCompilePath(options.sessionsPath ?? provider.defaultSessionsPath());
       const providerLogger = logger?.child({
@@ -5740,15 +6665,23 @@ async function runCompileImports(options) {
       summaries.push(summary);
       options.onProviderComplete?.(summary);
     }
-    logger?.info({ changed: importedAny, fts5_deferred: deferIndex }, "marking indexes");
-    markIndexesAfterImport(bundle, {
-      changed: importedAny,
-      fts5Deferred: deferIndex
-    });
-    if (importedAny) {
+    const shouldRebuildIndexes = importedAny || overwrite;
+    if (shouldRebuildIndexes) {
+      logger?.info(
+        { changed: importedAny, overwrite },
+        importedAny ? "marking indexes" : "overwrite forces rebuild despite no new imports"
+      );
+      markIndexesAfterImport(bundle, { changed: true });
+      try {
+        logger?.info("rebuilding fts5 index");
+        rebuildFts5Index(bundle);
+      } catch (error) {
+        fts5Error = getErrorMessage(error);
+        logger?.error({ err: error }, "fts5 rebuild failed; SQLite data is intact");
+      }
       try {
-        logger?.info("rebuilding tantivy index");
-        const status = await rebuildTantivyIndex(bundle);
+        logger?.info({ overwrite }, "rebuilding tantivy index");
+        const status = await rebuildTantivyIndex(bundle, { overwrite });
         tantivy = { indexedDocCount: status.indexed_doc_count };
         options.onTantivyComplete?.(tantivy);
       } catch (error) {
@@ -5757,16 +6690,14 @@ async function runCompileImports(options) {
       }
     }
   } finally {
-    if (deferIndex) {
-      logger?.info("re-enabling FTS5 triggers");
-      enableFts5Triggers(bundle);
-    }
+    enableFts5Triggers(bundle);
   }
   return {
     providers: summaries,
     importedAny,
     tantivy,
-    tantivyError
+    tantivyError,
+    fts5Error
   };
 }
 async function exportCompileParquet(options) {
@@ -5809,7 +6740,7 @@ function createCliLogger(options) {
 // src/cli/commands/compile.ts
 function compileCommand() {
   const command = addCompileLogOptions(
-    new Command("compile").description(
+    new Command2("compile").description(
       "Import session histories from one agent CLI into the bundle."
     )
   );
@@ -5822,27 +6753,35 @@ function compileCommand() {
   return command;
 }
 function compileAllCommand() {
-  return addCompileLogOptions(new Command("compile-all")).description("Import all agent CLI session histories using default source paths.").option("--defer-index", "skip immediate FTS5 updates; run `prosa index fts5` later").action(async (options) => {
+  return addCompileLogOptions(new Command2("compile-all")).description("Import all agent CLI session histories using default source paths.").option(
+    "--overwrite",
+    "force a full rebuild of derived indexes after import (Tantivy from scratch; FTS5 and Parquet are always full)",
+    false
+  ).action(async (options) => {
     await runCompiles({
       providers: COMPILE_PROVIDERS,
       storePath: defaultBundlePath(),
-      deferIndex: options.deferIndex ?? false,
+      overwrite: options.overwrite,
       logOptions: options
     });
   });
 }
 function providerCompileCommand(provider) {
-  return addCompileLogOptions(new Command(provider.name)).description(provider.description).option(
+  return addCompileLogOptions(new Command2(provider.name)).description(provider.description).option(
     "--sessions-path <path>",
     `${provider.pathHelp} (default: ${provider.defaultSessionsPath()})`,
     provider.defaultSessionsPath()
-  ).option("--store <path>", "bundle directory", defaultBundlePath()).option("--defer-index", "skip immediate FTS5 updates; run `prosa index fts5` later").action(
+  ).option("--store <path>", "bundle directory", defaultBundlePath()).option(
+    "--overwrite",
+    "force a full rebuild of derived indexes after import (Tantivy from scratch; FTS5 and Parquet are always full)",
+    false
+  ).action(
     async (options, command) => {
       await runCompiles({
         providers: [provider],
         storePath: options.store,
-        deferIndex: options.deferIndex ?? false,
         sessionsPath: options.sessionsPath,
+        overwrite: options.overwrite,
         logOptions: command.optsWithGlobals()
       });
     }
@@ -5861,8 +6800,8 @@ async function runCompiles(options) {
     const result = await runCompileImports({
       bundle,
       providers: options.providers,
-      deferIndex: options.deferIndex,
       sessionsPath: options.sessionsPath,
+      overwrite: options.overwrite,
       logger,
       onProviderComplete: printCounts,
       onTantivyComplete: (status) => {
@@ -5875,7 +6814,8 @@ async function runCompiles(options) {
     closeBundle(bundle);
     logger.info({ store_path: storePath }, "bundle closed");
   }
-  if (importedAny) {
+  const shouldExportParquet = importedAny || options.overwrite === true;
+  if (shouldExportParquet) {
     try {
       const result = await exportCompileParquet({ storePath, logger });
       process.stdout.write(`parquet: wrote ${result.tableCount} tables to ${result.outDir}
@@ -5899,11 +6839,10 @@ function printCounts(summary) {
 // src/cli/commands/export.ts
 import { writeFile as writeFile6 } from "fs/promises";
-import path16 from "path";
-import { Command as Command2 } from "commander";
+import path17 from "path";
+import { Command as Command3 } from "commander";
 // src/services/export/markdown.ts
-init_cas();
 async function exportSessionMarkdown(bundle, sessionId2) {
   const session = bundle.db.prepare(
     `SELECT session_id, source_tool, source_session_id, title, start_ts, end_ts,
@@ -6013,139 +6952,60 @@ function renderToolCall(c) {
   return lines.join("\n");
 }
-// src/cli/bundle.ts
-import path15 from "path";
-async function withBundle(storePath, fn) {
-  const bundle = await openBundle(path15.resolve(storePath));
-  try {
-    return await fn(bundle);
-  } finally {
-    closeBundle(bundle);
-  }
-}
 // src/cli/commands/export.ts
 function exportCommand() {
-  const session = new Command2("session").description("Export a single session to a human-readable format.").argument("<session-id>", "prosa session_id").requiredOption("--format <fmt>", 'currently only "markdown" is supported').option("--out <path>", "write to file instead of stdout").option("--store <path>", "bundle directory", defaultBundlePath()).action(async (sessionId2, options) => {
+  const session = new Command3("session").description("Export a single session to a human-readable format.").argument("<session-id>", "prosa session_id").requiredOption("--format <fmt>", 'currently only "markdown" is supported').option("--out <path>", "write to file instead of stdout").option("--store <path>", "bundle directory", defaultBundlePath()).action(async (sessionId2, options) => {
     if (options.format !== "markdown") {
       throw new Error(`unsupported format: ${options.format} (try --format markdown)`);
     }
     await withBundle(options.store, async (bundle) => {
       const markdown = await exportSessionMarkdown(bundle, sessionId2);
       if (options.out) {
-        await writeFile6(path16.resolve(options.out), markdown, "utf8");
-        process.stdout.write(`wrote ${path16.resolve(options.out)}
+        await writeFile6(path17.resolve(options.out), markdown, "utf8");
+        process.stdout.write(`wrote ${path17.resolve(options.out)}
 `);
       } else {
         process.stdout.write(markdown);
       }
     });
   });
-  const parquet = new Command2("parquet").description("Export canonical tables to derived Parquet files for analytics.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--out <path>", "output directory (default: <store>/parquet)").action(async (options) => {
+  const parquet = new Command3("parquet").description("Export canonical tables to derived Parquet files for analytics.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--out <path>", "output directory (default: <store>/parquet)").action(async (options) => {
     const result = await exportBundleParquet({
-      bundlePath: path16.resolve(options.store),
-      outDir: options.out ? path16.resolve(options.out) : void 0
+      bundlePath: path17.resolve(options.store),
+      outDir: options.out ? path17.resolve(options.out) : void 0
     });
     process.stdout.write(`wrote parquet export to ${result.outDir}
 `);
     process.stdout.write(`manifest=${result.manifestPath}
 `);
   });
-  return new Command2("export").description("Export sessions / search excerpts to readable formats.").addCommand(session).addCommand(parquet);
+  return new Command3("export").description("Export sessions / search excerpts to readable formats.").addCommand(session).addCommand(parquet);
 }
 // src/cli/commands/index.ts
-import { Command as Command3 } from "commander";
+import { Command as Command4 } from "commander";
 init_indexing();
-// src/cli/output.ts
-var OUTPUT_FORMATS = ["interactive", "table", "json", "csv"];
-var COL_SEPARATOR = "  ";
-var RULE_CHAR = "-";
-function parseOutputFormat(value, fallback) {
-  if (value === void 0) return fallback;
-  if (OUTPUT_FORMATS.includes(value)) return value;
-  throw new Error(
-    `invalid --output-format: ${value} (expected one of ${OUTPUT_FORMATS.join(", ")})`
-  );
-}
-function printRows(rows, opts) {
-  switch (opts.format) {
-    case "json":
-      printJson(rows, opts);
-      return;
-    case "csv":
-      printCsv(rows, opts);
-      return;
-    case "table":
-    case "interactive":
-      printTable(rows, opts);
-      return;
-  }
-}
-function printJson(rows, opts) {
-  const out = opts.meta ? { ...opts.meta, rows } : rows;
-  process.stdout.write(`${JSON.stringify(out, null, 2)}
-`);
-}
-function printCsv(rows, opts) {
-  const columns = opts.columns;
-  process.stdout.write(`${columns.map(csvField).join(",")}
-`);
-  for (const row of rows) {
-    const record = row;
-    const line = columns.map((column) => csvField(formatCell(record[column]))).join(",");
-    process.stdout.write(`${line}
-`);
-  }
-}
-function csvField(value) {
-  if (/[",\n]/.test(value)) return `"${value.replace(/"/g, '""')}"`;
-  return value;
-}
-function printTable(rows, opts) {
-  const columns = opts.columns;
-  const widths = columns.map((column) => column.length);
-  const cells = rows.map((row) => {
-    const record = row;
-    return columns.map((column, index) => {
-      const text = formatCell(record[column]);
-      const width = widths[index] ?? 0;
-      if (text.length > width) widths[index] = text.length;
-      return text;
-    });
-  });
-  const header = columns.map((column, index) => column.padEnd(widths[index] ?? 0)).join(COL_SEPARATOR);
-  const rule = columns.map((_, index) => RULE_CHAR.repeat(widths[index] ?? 0)).join(COL_SEPARATOR);
-  process.stdout.write(`${header}
-${rule}
-`);
-  for (const cellRow of cells) {
-    const line = cellRow.map((cell, index) => cell.padEnd(widths[index] ?? 0)).join(COL_SEPARATOR);
-    process.stdout.write(`${line}
-`);
-  }
-}
-function formatCell(value) {
-  if (value == null) return "";
-  if (typeof value === "string") return value;
-  if (typeof value === "number" || typeof value === "boolean") return String(value);
-  return JSON.stringify(value);
-}
-// src/cli/commands/index.ts
 function indexCommand() {
-  const fts5 = new Command3("fts5").description("Rebuild the SQLite FTS5 index from search_docs.").option("--store <path>", "bundle directory", defaultBundlePath()).action(async (options) => {
+  const fts5 = new Command4("fts5").description("Rebuild the SQLite FTS5 index from search_docs.").option("--store <path>", "bundle directory", defaultBundlePath()).option(
+    "--overwrite",
+    "rebuild from scratch (FTS5 always overwrites; flag accepted for parity with other index commands)",
+    false
+  ).action(async (options) => {
     await withBundle(options.store, (bundle) => {
+      void options.overwrite;
       printIndexStatus(rebuildFts5Index(bundle));
     });
   });
-  const tantivy = new Command3("tantivy").description("Rebuild the Tantivy sidecar index from search_docs.").option("--store <path>", "bundle directory", defaultBundlePath()).action(async (options) => {
+  const tantivy = new Command4("tantivy").description("Rebuild the Tantivy sidecar index from search_docs.").option("--store <path>", "bundle directory", defaultBundlePath()).option(
+    "--overwrite",
+    "force a full re-index instead of the default incremental rebuild",
+    false
+  ).action(async (options) => {
     await withBundle(options.store, async (bundle) => {
-      printIndexStatus(await rebuildTantivyIndex(bundle));
+      printIndexStatus(await rebuildTantivyIndex(bundle, { overwrite: options.overwrite }));
     });
   });
-  const status = new Command3("status").description("Show derived search index status.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--output-format <fmt>", "interactive|table|json|csv", "table").action(async (options) => {
+  const status = new Command4("status").description("Show derived search index status.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--output-format <fmt>", "interactive|table|json|csv", "table").action(async (options) => {
     const format = parseOutputFormat(options.outputFormat, "table");
     await withBundle(options.store, (bundle) => {
       const rows = getSearchIndexStatuses(bundle);
@@ -6162,7 +7022,7 @@ function indexCommand() {
       });
     });
   });
-  return new Command3("index").description("Build or inspect derived search indexes.").addCommand(fts5).addCommand(tantivy).addCommand(status);
+  return new Command4("index").description("Build or inspect derived search indexes.").addCommand(fts5).addCommand(tantivy).addCommand(status);
 }
 function printIndexStatus(status) {
   process.stdout.write(
@@ -6174,11 +7034,11 @@ function printIndexStatus(status) {
 // src/cli/commands/init.ts
 import { stat as stat3 } from "fs/promises";
-import path17 from "path";
-import { Command as Command4 } from "commander";
+import path18 from "path";
+import { Command as Command5 } from "commander";
 function initCommand() {
-  return new Command4("init").description("Initialize a new prosa bundle (SQLite + manifest + objects/).").option("--store <path>", "bundle directory", defaultBundlePath()).option("--force-existing", "open instead of failing if a manifest exists", false).action(async (options) => {
-    const resolved = path17.resolve(options.store);
+  return new Command5("init").description("Initialize a new prosa bundle (SQLite + manifest + objects/).").option("--store <path>", "bundle directory", defaultBundlePath()).option("--force-existing", "open instead of failing if a manifest exists", false).action(async (options) => {
+    const resolved = path18.resolve(options.store);
     const exists2 = await stat3(`${resolved}/manifest.json`).then(() => true).catch(() => false);
     if (exists2) {
       if (!options.forceExisting) {
@@ -6189,369 +7049,416 @@ use --force-existing to skip without erroring
         );
         process.exit(2);
       }
-      const bundle2 = await openBundle(resolved);
-      closeBundle(bundle2);
-      process.stdout.write(`bundle already exists at ${resolved}
-`);
-      return;
-    }
-    const bundle = await initBundle(resolved);
-    closeBundle(bundle);
-    process.stdout.write(`initialized prosa bundle at ${resolved}
-`);
-  });
-}
-// src/cli/commands/mcp.ts
-import path18 from "path";
-import { Command as Command5 } from "commander";
-// src/mcp/server.ts
-init_errors();
-import { randomUUID } from "crypto";
-import http from "http";
-import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
-import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
-import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
-// src/mcp/guidance.ts
-var PROSA_MCP_INSTRUCTIONS = `
-prosa is a local memory over local agent session histories. Use it to import recent sessions,
-find prior work, commands, decisions, file touches, and full transcripts before answering from
-memory.
-Recommended workflow:
-- Use compile to refresh the bundle when recent local sessions may not be indexed yet. With no
-  input it imports all supported providers from default paths.
-- For open-ended questions, start with search_sessions using 2-5 concrete terms.
-- For questions about a file or path, start with find_touched_files, then inspect the returned sessions.
-- After search results, call get_session for the most relevant session_ids before drawing conclusions.
-- Use export_session_markdown only after selecting a likely session; it can return a large transcript.
-- Use list_tool_calls for command history, failed tools, patches, and operational audit trails.
-- Use get_artifact only when a returned artifact_id is needed for full output or diff content.
-- Use index_status if search results look stale or unexpectedly empty.
-When answering, cite concrete evidence: session_id, timestamp, tool/file path, and the relevant snippet
-or event. Do not treat search snippets as the whole truth; open the session when accuracy matters.
-`.trim();
-var INVESTIGATE_PRIOR_WORK_PROMPT = `
-Investigate prior work in prosa for the topic: {{topic}}
-Use this workflow:
-1. Call search_sessions with a short query built from the topic.
-2. If results are broad, search again with narrower terms from the best snippets.
-3. Open the most relevant session_ids with get_session.
-4. Use export_session_markdown only for sessions that appear directly relevant.
-5. Answer with evidence: session_id, timestamp, and the decisive snippet or event.
-`.trim();
-var FIND_FILE_HISTORY_PROMPT = `
-Investigate history for file/path: {{path}}
-Use this workflow:
-1. Call find_touched_files with the path or the most distinctive path suffix.
-2. Open returned session_ids with get_session.
-3. Use list_tool_calls with session_id when you need command-level detail.
-4. Use export_session_markdown only for the most relevant session.
-5. Summarize what changed, who/what tool touched it, and cite session_id plus timestamp.
-`.trim();
-var AUDIT_TOOL_FAILURES_PROMPT = `
-Audit tool failures in prosa{{query_clause}}.
-Use this workflow:
-1. Call list_tool_calls with errors_only=true.
-2. If a query is provided, also call search_sessions for that query to find related context.
-3. Open relevant session_ids with get_session.
-4. Group failures by tool_name, command/path, and likely cause.
-5. Answer with evidence: session_id, timestamp, command/path, exit code, and preview.
-`.trim();
-// src/mcp/tools.ts
-import { z } from "zod";
-// src/core/domain/types.ts
-var SOURCE_TOOLS = ["cursor", "codex", "claude", "gemini"];
-// src/mcp/tools.ts
-init_errors();
-init_limits();
-init_indexing();
-init_search();
-init_sessions();
-function registerProsaTools(server, bundle, options = {}) {
-  const searchEngine = options.searchEngine ?? "fts5";
-  const storePath = options.storePath ?? bundle.path;
-  registerProsaPrompts(server);
-  server.registerTool(
-    "compile",
-    {
-      title: "Compile sessions",
-      description: "Import local agent session histories into the active prosa bundle. With no input, compiles all providers from default paths. With source, compiles that provider; sessions_path may override that provider path.",
-      inputSchema: {
-        source: z.enum(SOURCE_TOOLS).optional(),
-        sessions_path: z.string().min(1).optional()
-      },
-      annotations: { readOnlyHint: false, destructiveHint: false, idempotentHint: true }
-    },
-    async ({ source, sessions_path }) => {
-      if (sessions_path && !source) {
-        return {
-          content: [
-            {
-              type: "text",
-              text: "sessions_path requires source because providers use incompatible source layouts"
-            }
-          ],
-          isError: true
-        };
-      }
-      try {
-        const result = await runCompileImports({
-          bundle,
-          providers: source ? [getCompileProvider(source)] : COMPILE_PROVIDERS,
-          deferIndex: false,
-          sessionsPath: sessions_path
-        });
-        const parquet = result.importedAny ? await exportCompileParquet({ storePath }) : null;
-        return {
-          content: [
-            {
-              type: "text",
-              text: JSON.stringify(
-                {
-                  providers: result.providers.map((provider) => ({
-                    source: provider.source,
-                    source_path: provider.sourcePath,
-                    batch_id: provider.batchId,
-                    counts: provider.counts
-                  })),
-                  imported_any: result.importedAny,
-                  tantivy: result.tantivy ? { indexed_doc_count: result.tantivy.indexedDocCount } : null,
-                  tantivy_error: result.tantivyError,
-                  parquet: parquet ? {
-                    out_dir: parquet.outDir,
-                    manifest_path: parquet.manifestPath,
-                    table_count: parquet.tableCount,
-                    files: parquet.files,
-                    counts: parquet.counts
-                  } : null
-                },
-                null,
-                2
-              )
-            }
-          ]
-        };
-      } catch (error) {
-        return {
-          content: [{ type: "text", text: getErrorMessage(error) }],
-          isError: true
-        };
-      }
-    }
-  );
-  server.registerTool(
-    "list_sessions",
-    {
-      title: "List sessions",
-      description: "List recent sessions when you need candidates by source/date before deeper inspection. Next step: call get_session for relevant session_id values.",
-      inputSchema: {
-        source: z.enum(SOURCE_TOOLS).optional(),
-        since: z.string().optional().describe("ISO timestamp lower bound (inclusive)"),
-        until: z.string().optional().describe("ISO timestamp upper bound (exclusive)"),
-        limit: z.number().int().min(1).max(500).optional().default(50)
-      },
-      annotations: { readOnlyHint: true, idempotentHint: true }
-    },
-    async (input) => {
-      const rows = listSessions(bundle, {
-        sourceTool: input.source,
-        sinceIso: input.since,
-        untilIso: input.until,
-        limit: input.limit ?? 50
-      });
-      return {
-        content: [{ type: "text", text: JSON.stringify(rows, null, 2) }]
-      };
-    }
-  );
-  server.registerTool(
-    "get_session",
-    {
-      title: "Get session detail",
-      description: "Open one session and return metadata plus timeline events. Use this after search_sessions, list_sessions, find_touched_files, or list_tool_calls before making evidence-backed claims.",
-      inputSchema: {
-        session_id: z.string().min(1)
-      },
-      annotations: { readOnlyHint: true, idempotentHint: true }
-    },
-    async ({ session_id }) => {
-      const detail = getSession(bundle, session_id);
-      if (!detail) {
-        return {
-          content: [{ type: "text", text: `session not found: ${session_id}` }],
-          isError: true
-        };
-      }
-      return {
-        content: [{ type: "text", text: JSON.stringify(detail, null, 2) }]
-      };
+      const bundle2 = await openBundle(resolved);
+      closeBundle(bundle2);
+      process.stdout.write(`bundle already exists at ${resolved}
+`);
+      return;
     }
-  );
+    const bundle = await initBundle(resolved);
+    closeBundle(bundle);
+    process.stdout.write(`initialized prosa bundle at ${resolved}
+`);
+  });
+}
+// src/cli/commands/mcp.ts
+import path19 from "path";
+import { Command as Command6 } from "commander";
+// src/mcp/server.ts
+init_errors();
+import { randomUUID } from "crypto";
+import http from "http";
+import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
+import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
+// src/mcp/guidance.ts
+var PROSA_MCP_INSTRUCTIONS = `
+prosa is a local memory over local agent session histories. Use it to find prior work, commands,
+decisions, file touches, transcripts, and analytical rollups before answering from memory.
+There are six tools:
+- search: full-text over messages, commands, paths, diffs, and previews. Start here for open-ended
+  questions with 2-5 concrete terms. Optional engine, field_kind, raw, since/until filters.
+- sessions: without session_id, lists candidates filtered by source/time/limit. With session_id,
+  opens the session: format=detail (default) returns metadata + timeline, format=summary returns
+  only the session row, format=markdown renders the full transcript.
+- tool_calls: audit commands and tool usage. Filters by tool_name, canonical_type, session_id,
+  errors_only. When path_substring is set, also returns artifacts touching that path \u2014 use this for
+  file-history questions.
+- analytics: built-in aggregate reports backed by SQLite views. Pick report=sessions|tools|errors|
+  models|projects with the matching filters. Use report=sessions with session_id or
+  source_path_substring for per-session metrics.
+- artifact: fetch full text for an artifact_id when previews are not enough. Binary artifacts return
+  a placeholder.
+- compile: with no input, returns a status snapshot (search index health). With source (and
+  optionally sessions_path), imports that provider into the bundle. Use status mode when search
+  results look stale; use import mode when local sessions may not be indexed yet.
+When answering, cite concrete evidence: session_id, timestamp, tool/file path, and the relevant
+snippet or event. Do not treat search snippets as the whole truth; open the session with
+\`sessions session_id=\u2026 format=detail\` when accuracy matters.
+`.trim();
+var INVESTIGATE_PRIOR_WORK_PROMPT = `
+Investigate prior work in prosa for the topic: {{topic}}
+Use this workflow:
+1. Call \`search\` with a short query built from the topic.
+2. If results are broad, search again with narrower terms from the best snippets.
+3. Open the most relevant session_ids with \`sessions session_id=\u2026 format=detail\`.
+4. Use \`sessions session_id=\u2026 format=markdown\` only for sessions that appear directly relevant.
+5. Answer with evidence: session_id, timestamp, and the decisive snippet or event.
+`.trim();
+var FIND_FILE_HISTORY_PROMPT = `
+Investigate history for file/path: {{path}}
+Use this workflow:
+1. Call \`tool_calls\` with path_substring set to the path or its most distinctive suffix.
+2. Open returned session_ids with \`sessions session_id=\u2026 format=detail\`.
+3. Call \`tool_calls\` with session_id when you need command-level detail inside one session.
+4. Use \`sessions session_id=\u2026 format=markdown\` only for the most relevant session.
+5. Summarize what changed, who/what tool touched it, and cite session_id plus timestamp.
+`.trim();
+var AUDIT_TOOL_FAILURES_PROMPT = `
+Audit tool failures in prosa{{query_clause}}.
+Use this workflow:
+1. For an aggregate report, call \`analytics report=errors\` (filter by source/since/until/tool_name
+   as needed).
+2. For per-call evidence, call \`tool_calls\` with errors_only=true.
+3. If a query is provided, also call \`search\` for that query to find related context.
+4. Open relevant session_ids with \`sessions session_id=\u2026 format=detail\`.
+5. Group failures by tool_name, command/path, and likely cause.
+6. Answer with evidence: session_id, timestamp, command/path, exit code, and preview.
+`.trim();
+// src/mcp/tools.ts
+import { z } from "zod";
+init_errors();
+init_indexing();
+init_search();
+init_sessions();
+// src/services/tool_calls.ts
+init_limits();
+function listToolCalls(bundle, filters = {}) {
+  const conds = [];
+  const params = [];
+  if (filters.toolName) {
+    conds.push("tc.tool_name = ?");
+    params.push(filters.toolName);
+  }
+  if (filters.canonicalType) {
+    conds.push("tc.canonical_tool_type = ?");
+    params.push(filters.canonicalType);
+  }
+  if (filters.sessionId) {
+    conds.push("tc.session_id = ?");
+    params.push(filters.sessionId);
+  }
+  if (filters.errorsOnly) {
+    conds.push("(tr.is_error = 1 OR tc.status = ?)");
+    params.push("error");
+  }
+  if (filters.pathSubstring) {
+    conds.push("tc.path IS NOT NULL AND tc.path LIKE ?");
+    params.push(`%${filters.pathSubstring}%`);
+  }
+  if (filters.sinceIso) {
+    conds.push("(tc.timestamp_start IS NULL OR tc.timestamp_start >= ?)");
+    params.push(filters.sinceIso);
+  }
+  if (filters.untilIso) {
+    conds.push("(tc.timestamp_start IS NULL OR tc.timestamp_start < ?)");
+    params.push(filters.untilIso);
+  }
+  const where = conds.length ? `WHERE ${conds.join(" AND ")}` : "";
+  const limit2 = clampLimit(filters.limit, { max: 500, fallback: 100 });
+  const toolCallSql = `
+    SELECT 'tool_call' AS entity_type,
+           tc.session_id,
+           tc.tool_call_id,
+           NULL AS artifact_id,
+           tc.tool_name,
+           tc.canonical_tool_type,
+           tc.command,
+           tc.path,
+           tc.status,
+           tc.timestamp_start,
+           tr.is_error,
+           tr.exit_code,
+           tr.preview
+      FROM tool_calls tc
+      LEFT JOIN tool_results tr ON tr.tool_call_id = tc.tool_call_id
+      ${where}
+  `;
+  if (!filters.pathSubstring) {
+    const sql2 = `${toolCallSql} ORDER BY tc.timestamp_start DESC LIMIT ${limit2}`;
+    return bundle.db.prepare(sql2).all(...params);
+  }
+  const artifactSql = `
+    SELECT 'artifact' AS entity_type,
+           a.session_id,
+           NULL AS tool_call_id,
+           a.artifact_id,
+           NULL AS tool_name,
+           NULL AS canonical_tool_type,
+           NULL AS command,
+           a.path,
+           NULL AS status,
+           a.created_ts AS timestamp_start,
+           NULL AS is_error,
+           NULL AS exit_code,
+           NULL AS preview
+      FROM artifacts a
+     WHERE a.path IS NOT NULL AND a.path LIKE ?
+  `;
+  const sql = `
+    ${toolCallSql}
+    UNION ALL
+    ${artifactSql}
+    ORDER BY timestamp_start DESC
+    LIMIT ${limit2}
+  `;
+  return bundle.db.prepare(sql).all(...params, `%${filters.pathSubstring}%`);
+}
+// src/mcp/tools.ts
+var CANONICAL_TOOL_TYPES = [
+  "shell",
+  "read_file",
+  "write_file",
+  "edit_file",
+  "search_file",
+  "web_search",
+  "mcp",
+  "subagent",
+  "patch",
+  "other"
+];
+var FIELD_KINDS = [
+  "message_text",
+  "user_prompt",
+  "assistant_text",
+  "command",
+  "command_output_preview",
+  "error",
+  "file_path",
+  "diff",
+  "summary",
+  "artifact_text",
+  "tool_args",
+  "tool_result"
+];
+function registerProsaTools(server, bundle, options = {}) {
+  const searchEngine = options.searchEngine ?? "fts5";
+  const storePath = options.storePath ?? bundle.path;
+  const ensureStore = options.ensureStore ?? false;
+  registerProsaPrompts(server);
   server.registerTool(
-    "search_sessions",
+    "search",
     {
       title: "Full-text search",
-      description: `Search messages, commands, paths, and result previews using the server-selected ${searchEngine} engine. Start here for open-ended questions with 2-5 concrete terms, then call get_session for relevant hits.`,
+      description: `Search messages, commands, paths, diffs, and result previews using the server-selected ${searchEngine} engine. Start here for open-ended questions with 2-5 concrete terms; then call \`sessions\` for relevant hits.`,
       inputSchema: {
         query: z.string().min(1),
+        engine: z.enum(["fts5", "tantivy"]).optional(),
+        field_kind: z.enum(FIELD_KINDS).optional(),
         limit: z.number().int().min(1).max(500).optional().default(50),
-        raw: z.boolean().optional().default(false)
+        raw: z.boolean().optional().default(false).describe("Pass query straight to FTS5 MATCH (allows OR/NEAR/prefixes).")
       },
       annotations: { readOnlyHint: true, idempotentHint: true }
     },
-    async ({ query, limit, raw }) => {
-      const hits = searchFullText(bundle, { query, limit: limit ?? 50, raw, engine: searchEngine });
+    async ({ query, engine, field_kind, limit: limit2, raw }) => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
+      const selectedEngine = engine ?? searchEngine;
+      const hits = searchFullText(activeBundle, {
+        query,
+        limit: limit2 ?? 50,
+        raw,
+        engine: selectedEngine
+      });
+      const filtered = field_kind ? hits.filter((hit) => hit.field_kind === field_kind) : hits;
       return {
         content: [
           {
             type: "text",
             text: JSON.stringify(
-              { query, engine: searchEngine, count: hits.length, hits },
+              {
+                query,
+                engine: selectedEngine,
+                field_kind: field_kind ?? null,
+                count: filtered.length,
+                hits: filtered
+              },
               null,
               2
             )
           }
         ]
       };
-    }
+    })
   );
   server.registerTool(
-    "export_session_markdown",
+    "sessions",
     {
-      title: "Export session as Markdown",
-      description: "Render a selected session into a readable transcript. Use only after get_session confirms relevance; this can return much more context than snippets.",
+      title: "List or open sessions",
+      description: "Without `session_id`, lists sessions filtered by source/time/limit. With `session_id`, opens that session: `format=detail` (default) returns metadata plus timeline events; `format=summary` returns only the session row; `format=markdown` renders the readable transcript. Call after `search` to get evidence behind a hit.",
       inputSchema: {
-        session_id: z.string().min(1)
+        session_id: z.string().min(1).optional(),
+        format: z.enum(["summary", "detail", "markdown"]).optional().default("detail"),
+        source: z.enum(SOURCE_TOOLS).optional(),
+        since: z.string().optional().describe("ISO timestamp lower bound (inclusive)"),
+        until: z.string().optional().describe("ISO timestamp upper bound (exclusive)"),
+        limit: z.number().int().min(1).max(500).optional().default(50)
       },
       annotations: { readOnlyHint: true, idempotentHint: true }
     },
-    async ({ session_id }) => {
-      try {
-        const md = await exportSessionMarkdown(bundle, session_id);
-        return { content: [{ type: "text", text: md }] };
-      } catch (error) {
+    async ({ session_id, format, source, since, until, limit: limit2 }) => withToolBundle(bundle, storePath, ensureStore, async (activeBundle) => {
+      if (!session_id) {
+        const rows = listSessions(activeBundle, {
+          sourceTool: source,
+          sinceIso: since,
+          untilIso: until,
+          limit: limit2 ?? 50
+        });
         return {
-          content: [{ type: "text", text: getErrorMessage(error) }],
+          content: [{ type: "text", text: JSON.stringify(rows, null, 2) }]
+        };
+      }
+      if (format === "markdown") {
+        try {
+          const md = await exportSessionMarkdown(activeBundle, session_id);
+          return { content: [{ type: "text", text: md }] };
+        } catch (error) {
+          return {
+            content: [{ type: "text", text: getErrorMessage(error) }],
+            isError: true
+          };
+        }
+      }
+      const detail = getSession(activeBundle, session_id);
+      if (!detail) {
+        return {
+          content: [{ type: "text", text: `session not found: ${session_id}` }],
           isError: true
         };
       }
-    }
+      const payload = format === "summary" ? { session: detail.session } : detail;
+      return {
+        content: [{ type: "text", text: JSON.stringify(payload, null, 2) }]
+      };
+    })
   );
   server.registerTool(
-    "list_tool_calls",
+    "tool_calls",
     {
-      title: "List tool calls",
-      description: "Audit commands and tool usage by tool name, canonical type, error status, or session. Use this for failed commands, shell history, patches, and operational evidence; then open relevant sessions with get_session.",
+      title: "Audit tool calls and file touches",
+      description: "Audit commands and tool usage. Filter by tool_name, canonical_type, session_id, errors_only, or path_substring. When `path_substring` is set, also surfaces matching artifacts so file-history questions return both invocations and produced files.",
       inputSchema: {
+        session_id: z.string().min(1).optional(),
         tool_name: z.string().optional(),
-        canonical_type: z.enum([
-          "shell",
-          "read_file",
-          "write_file",
-          "edit_file",
-          "search_file",
-          "web_search",
-          "mcp",
-          "subagent",
-          "patch",
-          "other"
-        ]).optional(),
-        session_id: z.string().optional(),
+        canonical_type: z.enum(CANONICAL_TOOL_TYPES).optional(),
+        path_substring: z.string().min(1).optional().describe("Filter rows where tool_calls.path or artifacts.path contains this substring."),
         errors_only: z.boolean().optional().default(false),
+        since: z.string().optional().describe("ISO timestamp lower bound (inclusive)"),
+        until: z.string().optional().describe("ISO timestamp upper bound (exclusive)"),
         limit: z.number().int().min(1).max(500).optional().default(100)
       },
       annotations: { readOnlyHint: true, idempotentHint: true }
     },
-    async ({ tool_name, canonical_type, session_id, errors_only, limit }) => {
-      const conds = [];
-      const params = [];
-      if (tool_name) {
-        conds.push("tc.tool_name = ?");
-        params.push(tool_name);
-      }
-      if (canonical_type) {
-        conds.push("tc.canonical_tool_type = ?");
-        params.push(canonical_type);
-      }
-      if (session_id) {
-        conds.push("tc.session_id = ?");
-        params.push(session_id);
-      }
-      if (errors_only) {
-        conds.push("(tr.is_error = 1 OR tc.status = ?)");
-        params.push("error");
-      }
-      const where = conds.length ? `WHERE ${conds.join(" AND ")}` : "";
-      const sql = `
-        SELECT tc.tool_call_id, tc.session_id, tc.tool_name, tc.canonical_tool_type,
-               tc.command, tc.path, tc.status, tc.timestamp_start,
-               tr.is_error, tr.exit_code, tr.preview
-          FROM tool_calls tc
-          LEFT JOIN tool_results tr ON tr.tool_call_id = tc.tool_call_id
-          ${where}
-         ORDER BY tc.timestamp_start DESC
-         LIMIT ${clampLimit(limit, { max: 500, fallback: 100 })}
-      `;
-      const rows = bundle.db.prepare(sql).all(...params);
+    async (input) => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
+      const rows = listToolCalls(activeBundle, {
+        sessionId: input.session_id,
+        toolName: input.tool_name,
+        canonicalType: input.canonical_type,
+        pathSubstring: input.path_substring,
+        errorsOnly: input.errors_only,
+        sinceIso: input.since,
+        untilIso: input.until,
+        limit: input.limit ?? 100
+      });
       return {
         content: [{ type: "text", text: JSON.stringify(rows, null, 2) }]
       };
-    }
+    })
   );
   server.registerTool(
-    "find_touched_files",
+    "analytics",
     {
-      title: "Find sessions that touched a file",
-      description: "Find sessions with tool calls or artifacts whose path contains `path_substring`. Start here for file-history questions, then open returned sessions with get_session.",
+      title: "Aggregate analytics reports",
+      description: "Run a built-in aggregation over the bundle: per-session metrics (`sessions`), tool usage rollup (`tools`), error timeline (`errors`), model usage (`models`), or project activity (`projects`). Backed by SQLite views; mirrors the `prosa analytics` CLI.",
       inputSchema: {
-        path_substring: z.string().min(1),
-        limit: z.number().int().min(1).max(500).optional().default(100)
+        report: z.enum(ANALYTICS_REPORTS),
+        source: z.enum(SOURCE_TOOLS).optional(),
+        since: z.string().optional().describe("ISO timestamp lower bound (inclusive)"),
+        until: z.string().optional().describe("ISO timestamp upper bound (exclusive)"),
+        limit: z.number().int().min(1).max(500).optional().default(50),
+        session_id: z.string().min(1).optional().describe("Drill-down filter (applies to `sessions` report)."),
+        source_path_substring: z.string().min(1).optional().describe("Filter `sessions` rows by imported source file path substring."),
+        project: z.string().min(1).optional().describe("Filter by project id, name, or path substring."),
+        tool_name: z.string().min(1).optional().describe("Filter `tools`/`errors` rows by exact tool name."),
+        canonical_type: z.enum(CANONICAL_TOOL_TYPES).optional().describe("Filter `tools` rows by canonical tool type."),
+        errors_only: z.boolean().optional().describe("`tools` report: only error rows."),
+        category: z.string().min(1).optional().describe("Filter `errors` by category: tool_result|import_error|uncertainty."),
+        model: z.string().min(1).optional().describe("Filter `models` rows by exact model name.")
       },
       annotations: { readOnlyHint: true, idempotentHint: true }
     },
-    async ({ path_substring, limit }) => {
-      const sql = `
-        SELECT tc.session_id, tc.tool_name, tc.canonical_tool_type, tc.path,
-               tc.timestamp_start, tc.command
-          FROM tool_calls tc
-         WHERE tc.path IS NOT NULL AND tc.path LIKE ?
-         UNION ALL
-        SELECT a.session_id AS session_id, NULL AS tool_name, NULL AS canonical_tool_type,
-               a.path, a.created_ts AS timestamp_start, NULL AS command
-          FROM artifacts a
-         WHERE a.path IS NOT NULL AND a.path LIKE ?
-         ORDER BY timestamp_start DESC
-         LIMIT ${clampLimit(limit, { max: 500, fallback: 100 })}
-      `;
-      const like = `%${path_substring}%`;
-      const rows = bundle.db.prepare(sql).all(like, like);
-      return {
-        content: [{ type: "text", text: JSON.stringify(rows, null, 2) }]
+    async (input) => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
+      const filters = {
+        source: input.source,
+        since: input.since,
+        until: input.until,
+        limit: input.limit,
+        sessionId: input.session_id,
+        sourcePathSubstring: input.source_path_substring,
+        project: input.project,
+        toolName: input.tool_name,
+        canonicalType: input.canonical_type,
+        errorsOnly: input.errors_only,
+        category: input.category,
+        model: input.model
       };
-    }
+      try {
+        const result = runAnalyticsReportFromBundle({
+          bundle: activeBundle,
+          report: input.report,
+          filters
+        });
+        return {
+          content: [
+            {
+              type: "text",
+              text: JSON.stringify(
+                { report: input.report, count: result.rows.length, rows: result.rows },
+                null,
+                2
+              )
+            }
+          ]
+        };
+      } catch (error) {
+        return {
+          content: [{ type: "text", text: getErrorMessage(error) }],
+          isError: true
+        };
+      }
+    })
   );
   server.registerTool(
-    "get_artifact",
+    "artifact",
     {
       title: "Get artifact bytes/text",
-      description: "Retrieve full text for an artifact_id found in a session or export. Use this for detailed diffs or large tool outputs after identifying the artifact; binary artifacts return a placeholder.",
+      description: "Retrieve full text for an `artifact_id` referenced in a session, search hit, or tool_calls row. Use this when previews are not enough; binary artifacts return a placeholder.",
       inputSchema: {
         artifact_id: z.string().min(1)
       },
       annotations: { readOnlyHint: true, idempotentHint: true }
     },
-    async ({ artifact_id }) => {
-      const row = bundle.db.prepare(`SELECT text_object_id, object_id, mime_type FROM artifacts WHERE artifact_id = ?`).get(artifact_id);
+    async ({ artifact_id }) => withToolBundle(bundle, storePath, ensureStore, async (activeBundle) => {
+      const row = activeBundle.db.prepare(`SELECT text_object_id, object_id, mime_type FROM artifacts WHERE artifact_id = ?`).get(artifact_id);
       if (!row) {
         return {
           content: [{ type: "text", text: `artifact not found: ${artifact_id}` }],
@@ -6563,30 +7470,111 @@ function registerProsaTools(server, bundle, options = {}) {
         return { content: [{ type: "text", text: "[no content stored]" }] };
       }
       try {
-        const { getText: getText2 } = await Promise.resolve().then(() => (init_cas(), cas_exports));
-        const text = await getText2(bundle, objectId);
+        const text = await getText(activeBundle, objectId);
         return { content: [{ type: "text", text }] };
       } catch {
         return { content: [{ type: "text", text: `[binary artifact: ${objectId}]` }] };
       }
-    }
+    })
   );
   server.registerTool(
-    "index_status",
+    "compile",
     {
-      title: "Search index status",
-      description: "Show whether derived search indexes are ready, stale, missing, building, or failed. Use when search results are unexpectedly empty or when choosing between FTS5 and Tantivy.",
-      inputSchema: {},
-      annotations: { readOnlyHint: true, idempotentHint: true }
+      title: "Compile sessions or report bundle status",
+      description: "Without input, returns a status snapshot (search index health, last batch, schema version) without mutating anything. With `source`, imports that provider; `sessions_path` may override its default. Pass `overwrite: true` to force a full rebuild of derived indexes (Tantivy from scratch). With neither `source` nor `sessions_path`, only status is returned.",
+      inputSchema: {
+        source: z.enum(SOURCE_TOOLS).optional(),
+        sessions_path: z.string().min(1).optional(),
+        overwrite: z.boolean().optional()
+      },
+      annotations: { readOnlyHint: false, destructiveHint: false, idempotentHint: true }
     },
-    async () => {
-      const rows = getSearchIndexStatuses(bundle);
-      return {
-        content: [{ type: "text", text: JSON.stringify(rows, null, 2) }]
-      };
-    }
+    async ({ source, sessions_path, overwrite }) => withToolBundle(bundle, storePath, ensureStore, async (activeBundle) => {
+      if (sessions_path && !source) {
+        return {
+          content: [
+            {
+              type: "text",
+              text: "sessions_path requires source because providers use incompatible source layouts"
+            }
+          ],
+          isError: true
+        };
+      }
+      if (!source && !sessions_path) {
+        return {
+          content: [
+            {
+              type: "text",
+              text: JSON.stringify(
+                { mode: "status", search_index: getSearchIndexStatuses(activeBundle) },
+                null,
+                2
+              )
+            }
+          ]
+        };
+      }
+      try {
+        const result = await runCompileImports({
+          bundle: activeBundle,
+          providers: source ? [getCompileProvider(source)] : COMPILE_PROVIDERS,
+          sessionsPath: sessions_path,
+          overwrite
+        });
+        const parquet = result.importedAny ? await exportCompileParquet({ storePath }) : null;
+        return {
+          content: [
+            {
+              type: "text",
+              text: JSON.stringify(
+                {
+                  mode: "import",
+                  providers: result.providers.map((provider) => ({
+                    source: provider.source,
+                    source_path: provider.sourcePath,
+                    batch_id: provider.batchId,
+                    counts: provider.counts
+                  })),
+                  imported_any: result.importedAny,
+                  tantivy: result.tantivy ? { indexed_doc_count: result.tantivy.indexedDocCount } : null,
+                  tantivy_error: result.tantivyError,
+                  fts5_error: result.fts5Error,
+                  parquet: parquet ? {
+                    out_dir: parquet.outDir,
+                    manifest_path: parquet.manifestPath,
+                    table_count: parquet.tableCount,
+                    files: parquet.files,
+                    counts: parquet.counts
+                  } : null,
+                  search_index: getSearchIndexStatuses(activeBundle)
+                },
+                null,
+                2
+              )
+            }
+          ]
+        };
+      } catch (error) {
+        return {
+          content: [{ type: "text", text: getErrorMessage(error) }],
+          isError: true
+        };
+      }
+    })
   );
 }
+async function withToolBundle(fallbackBundle, storePath, ensureStore, fn) {
+  if (!ensureStore) {
+    return await fn(fallbackBundle);
+  }
+  const bundle = await openOrInitBundle(storePath);
+  try {
+    return await fn(bundle);
+  } finally {
+    closeBundle(bundle);
+  }
+}
 function registerProsaPrompts(server) {
   server.registerPrompt(
     "investigate_prior_work",
@@ -6619,14 +7607,14 @@ function registerProsaPrompts(server) {
         path: z.string().min(1).describe("File path, directory, or distinctive path suffix")
       }
     },
-    ({ path: path20 }) => ({
+    ({ path: path21 }) => ({
       description: "Find sessions that touched a path and summarize the evidence.",
       messages: [
         {
           role: "user",
           content: {
             type: "text",
-            text: FIND_FILE_HISTORY_PROMPT.replace("{{path}}", path20)
+            text: FIND_FILE_HISTORY_PROMPT.replace("{{path}}", path21)
           }
         }
       ]
@@ -6759,7 +7747,7 @@ function createMcpServer(bundle, searchEngine, storePath) {
     },
     { instructions: PROSA_MCP_INSTRUCTIONS }
   );
-  registerProsaTools(server, bundle, { searchEngine, storePath });
+  registerProsaTools(server, bundle, { ensureStore: true, searchEngine, storePath });
   return server;
 }
 async function readBody(req) {
@@ -6796,27 +7784,12 @@ function writeError(res, error) {
   );
 }
-// src/cli/parsers.ts
-function parseSearchEngine(value) {
-  if (value === "fts5" || value === "tantivy") return value;
-  throw new Error(`invalid search engine: ${value} (expected fts5 or tantivy)`);
-}
-function parseMcpTransport(value) {
-  if (value === "stdio" || value === "http") return value;
-  throw new Error(`invalid transport: ${value} (expected stdio or http)`);
-}
-function parseSourceTool(value) {
-  if (value === void 0) return void 0;
-  if (SOURCE_TOOLS.includes(value)) return value;
-  throw new Error(`invalid source tool: ${value} (expected one of ${SOURCE_TOOLS.join(", ")})`);
-}
 // src/cli/commands/mcp.ts
 function mcpCommand() {
-  const serve = new Command5("serve").description("Start a local MCP server over the prosa bundle.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--transport <transport>", "MCP transport: stdio|http", "stdio").option("--host <host>", "bind host", "127.0.0.1").option("--port <port>", "bind port", "7331").option("--path <path>", "HTTP path", "/mcp").option("--search-engine <engine>", "search engine: fts5|tantivy", "fts5").action(
+  const serve = new Command6("serve").description("Start a local MCP server over the prosa bundle.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--transport <transport>", "MCP transport: stdio|http", "stdio").option("--host <host>", "bind host", "127.0.0.1").option("--port <port>", "bind port", "7331").option("--path <path>", "HTTP path", "/mcp").option("--search-engine <engine>", "search engine: fts5|tantivy", "fts5").action(
     async (options) => {
-      const storePath = path18.resolve(options.store);
-      const bundle = await openBundle(storePath);
+      const storePath = path19.resolve(options.store);
+      const bundle = await openOrInitBundle(storePath);
       try {
         const transport = parseMcpTransport(options.transport);
         const searchEngine = parseSearchEngine(options.searchEngine);
@@ -6846,7 +7819,7 @@ function mcpCommand() {
       }
     }
   );
-  return new Command5("mcp").description("MCP server commands.").addCommand(serve);
+  return new Command6("mcp").description("MCP server commands.").addCommand(serve);
 }
 function registerShutdown(closeServer, bundle) {
   const shutdown = async () => {
@@ -6863,13 +7836,13 @@ function registerShutdown(closeServer, bundle) {
 }
 // src/cli/commands/query.ts
-import path19 from "path";
-import { Command as Command6 } from "commander";
+import path20 from "path";
+import { Command as Command7 } from "commander";
 function queryCommand() {
-  const duckdb = new Command6("duckdb").description("Run a DuckDB SQL query over exported Parquet tables.").argument("<sql>", "DuckDB SQL query").option("--store <path>", "bundle directory", defaultBundlePath()).option("--parquet-dir <path>", "Parquet directory (default: <store>/parquet)").option("--output-format <fmt>", "interactive|table|json|csv", "table").action(
+  const duckdb = new Command7("duckdb").description("Run a DuckDB SQL query over exported Parquet tables.").argument("<sql>", "DuckDB SQL query").option("--store <path>", "bundle directory", defaultBundlePath()).option("--parquet-dir <path>", "Parquet directory (default: <store>/parquet)").option("--output-format <fmt>", "interactive|table|json|csv", "table").action(
     async (sql, options) => {
       const format = parseOutputFormat(options.outputFormat, "table");
-      const parquetDir = options.parquetDir ? path19.resolve(options.parquetDir) : await withBundle(options.store, (bundle) => bundle.paths.parquet);
+      const parquetDir = options.parquetDir ? path20.resolve(options.parquetDir) : await withBundle(options.store, (bundle) => bundle.paths.parquet);
       const result = await queryDuckDbParquet({ parquetDir, sql });
       printRows(result.rows, {
         format,
@@ -6878,14 +7851,14 @@ function queryCommand() {
       });
     }
   );
-  return new Command6("query").description("Run derived analytical queries.").addCommand(duckdb);
+  return new Command7("query").description("Run derived analytical queries.").addCommand(duckdb);
 }
 // src/cli/commands/search.ts
-import { Command as Command7 } from "commander";
+import { Command as Command8 } from "commander";
 init_search();
 function searchCommand() {
-  return new Command7("search").description("Full-text search across messages, tool calls and tool outputs.").argument("<query>", "FTS5 query string (supports MATCH syntax)").option("--store <path>", "bundle directory", defaultBundlePath()).option("--limit <n>", "maximum hits", "50").option("--engine <engine>", "search engine: fts5|tantivy", "fts5").option("--output-format <fmt>", "interactive|table|json|csv", "table").action(
+  return new Command8("search").description("Full-text search across messages, tool calls and tool outputs.").argument("<query>", "FTS5 query string (supports MATCH syntax)").option("--store <path>", "bundle directory", defaultBundlePath()).option("--limit <n>", "maximum hits", "50").option("--engine <engine>", "search engine: fts5|tantivy", "fts5").option("--output-format <fmt>", "interactive|table|json|csv", "table").action(
     async (query, options) => {
       const engine = parseSearchEngine(options.engine);
       const format = parseOutputFormat(options.outputFormat, "table");
@@ -6906,10 +7879,10 @@ function searchCommand() {
 }
 // src/cli/commands/sessions.ts
-import { Command as Command8 } from "commander";
+import { Command as Command9 } from "commander";
 init_sessions();
 function sessionsCommand() {
-  const command = new Command8("sessions").description("List sessions in the bundle, with filters.").enablePositionalOptions().option("--store <path>", "bundle directory", defaultBundlePath()).option("--source <tool>", "filter by source tool: cursor|codex|claude|gemini").option("--since <iso>", "sessions starting on/after this ISO timestamp").option("--until <iso>", "sessions starting before this ISO timestamp").option("--limit <n>", "maximum rows", "50").option("--output-format <fmt>", "interactive|table|json|csv", "table").action(
+  const command = new Command9("sessions").description("List sessions in the bundle, with filters.").enablePositionalOptions().option("--store <path>", "bundle directory", defaultBundlePath()).option("--source <tool>", "filter by source tool: cursor|codex|claude|gemini").option("--since <iso>", "sessions starting on/after this ISO timestamp").option("--until <iso>", "sessions starting before this ISO timestamp").option("--limit <n>", "maximum rows", "50").option("--output-format <fmt>", "interactive|table|json|csv", "table").action(
     async (options) => {
       const format = parseOutputFormat(options.outputFormat, "table");
       await withBundle(options.store, (bundle) => {
@@ -6936,7 +7909,7 @@ function sessionsCommand() {
     }
   );
   command.addCommand(
-    new Command8("count").description("Count sessions in the bundle, with filters.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--source <tool>", "filter by source tool: cursor|codex|claude|gemini").option("--since <iso>", "sessions starting on/after this ISO timestamp").option("--until <iso>", "sessions starting before this ISO timestamp").action(
+    new Command9("count").description("Count sessions in the bundle, with filters.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--source <tool>", "filter by source tool: cursor|codex|claude|gemini").option("--since <iso>", "sessions starting on/after this ISO timestamp").option("--until <iso>", "sessions starting before this ISO timestamp").action(
       async (options) => {
         await withBundle(options.store, (bundle) => {
           const count = countSessions(bundle, {
@@ -6954,9 +7927,9 @@ function sessionsCommand() {
 }
 // src/cli/commands/tui.ts
-import { Command as Command9 } from "commander";
+import { Command as Command10 } from "commander";
 function tuiCommand() {
-  return new Command9("tui").description("Open the interactive Ink-based explorer.").option("--store <path>", "bundle directory", defaultBundlePath()).action(async (options) => {
+  return new Command10("tui").description("Open the interactive Ink-based explorer.").option("--store <path>", "bundle directory", defaultBundlePath()).action(async (options) => {
     const [{ render }, React, { App: App2 }] = await Promise.all([
       import("ink"),
       import("react"),
@@ -6971,8 +7944,14 @@ function tuiCommand() {
 }
 // src/cli/main.ts
+function stripLeadingDoubleDash(argv) {
+  if (argv.length >= 3 && argv[2] === "--") {
+    return [argv[0], argv[1], ...argv.slice(3)];
+  }
+  return [...argv];
+}
 async function runCli(argv) {
-  const program = new Command10().name("prosa").enablePositionalOptions().description(
+  const program = new Command11().name("prosa").enablePositionalOptions().description(
     "Compile, search and export local agent session histories\n(Cursor, Codex CLI, Claude Code, Gemini CLI) into one canonical store."
   ).version(PROSA_PARSER_VERSION, "-v, --version");
   program.addCommand(initCommand());
@@ -6983,9 +7962,10 @@ async function runCli(argv) {
   program.addCommand(searchCommand());
   program.addCommand(exportCommand());
   program.addCommand(queryCommand());
+  program.addCommand(analyticsCommand());
   program.addCommand(mcpCommand());
   program.addCommand(tuiCommand());
-  await program.parseAsync([...argv]);
+  await program.parseAsync(stripLeadingDoubleDash(argv));
 }
 var isEntry = import.meta.url === `file://${process.argv[1]}`;
 if (isEntry) {