@c3-oss/prosa 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bin/prosa.js CHANGED
@@ -98,16 +98,27 @@ var init_hash = __esm({
98
98
  // src/core/cas/index.ts
99
99
  var cas_exports = {};
100
100
  __export(cas_exports, {
101
+ createPendingObjects: () => createPendingObjects,
102
+ ensureDir: () => ensureDir,
103
+ flushPendingObjects: () => flushPendingObjects,
101
104
  getBytes: () => getBytes,
102
105
  getJson: () => getJson,
103
106
  getObjectMeta: () => getObjectMeta,
104
107
  getText: () => getText,
105
108
  putBytes: () => putBytes,
106
109
  putJson: () => putJson,
107
- putText: () => putText
110
+ putText: () => putText,
111
+ stageBytes: () => stageBytes,
112
+ stageJson: () => stageJson,
113
+ stageText: () => stageText
108
114
  });
109
115
  import { mkdir as mkdir2, readFile as readFile2, writeFile as writeFile2 } from "fs/promises";
110
116
  import path2 from "path";
117
+ async function ensureDir(absoluteDir) {
118
+ if (ensuredDirs.has(absoluteDir)) return;
119
+ await mkdir2(absoluteDir, { recursive: true });
120
+ ensuredDirs.add(absoluteDir);
121
+ }
111
122
  async function putBytes(bundle, bytes, options = {}) {
112
123
  const hash = blake3Hex(bytes);
113
124
  const objectId = objectIdFromHash(hash);
@@ -121,7 +132,7 @@ async function putBytes(bundle, bytes, options = {}) {
121
132
  const { bytes: stored, compression } = compressBytes(bytes);
122
133
  const storagePath = objectStoragePath(hash, compression);
123
134
  const absolutePath = path2.join(bundle.path, storagePath);
124
- await mkdir2(path2.dirname(absolutePath), { recursive: true });
135
+ await ensureDir(path2.dirname(absolutePath));
125
136
  await writeFile2(absolutePath, stored);
126
137
  prepare(
127
138
  bundle.db,
@@ -185,18 +196,126 @@ function getObjectMeta(bundle, objectId) {
185
196
  FROM objects WHERE object_id = ?`
186
197
  ).get(objectId) ?? null;
187
198
  }
199
+ function createPendingObjects() {
200
+ return { byId: /* @__PURE__ */ new Map() };
201
+ }
202
+ function stageBytes(pending, bytes, options = {}) {
203
+ const buf = Buffer.isBuffer(bytes) ? bytes : Buffer.from(bytes);
204
+ const hash = blake3Hex(buf);
205
+ const objectId = objectIdFromHash(hash);
206
+ if (!pending.byId.has(objectId)) {
207
+ pending.byId.set(objectId, {
208
+ objectId,
209
+ hash,
210
+ bytes: buf,
211
+ mimeType: options.mimeType ?? null,
212
+ encoding: options.encoding ?? null
213
+ });
214
+ }
215
+ return objectId;
216
+ }
217
+ function stageText(pending, text, options = {}) {
218
+ return stageBytes(pending, Buffer.from(text, "utf8"), {
219
+ mimeType: options.mimeType ?? "text/plain; charset=utf-8",
220
+ encoding: "utf-8"
221
+ });
222
+ }
223
+ function stageJson(pending, value) {
224
+ return stageBytes(pending, Buffer.from(JSON.stringify(value), "utf8"), {
225
+ mimeType: "application/json",
226
+ encoding: "utf-8"
227
+ });
228
+ }
229
+ async function flushPendingObjects(bundle, pending) {
230
+ if (pending.byId.size === 0) return;
231
+ const ids = [...pending.byId.keys()];
232
+ const existingIds = queryExistingObjectIds(bundle, ids);
233
+ const toWrite = [];
234
+ for (const obj of pending.byId.values()) {
235
+ if (existingIds.has(obj.objectId)) continue;
236
+ const { bytes: compressedBytes, compression } = compressBytes(obj.bytes);
237
+ const storagePath = objectStoragePath(obj.hash, compression);
238
+ toWrite.push({
239
+ staged: obj,
240
+ compression,
241
+ compressedBytes,
242
+ storagePath,
243
+ absolutePath: path2.join(bundle.path, storagePath)
244
+ });
245
+ }
246
+ if (toWrite.length > 0) {
247
+ await writeFilesParallel(toWrite);
248
+ }
249
+ const insertObject = prepare(
250
+ bundle.db,
251
+ `INSERT OR IGNORE INTO objects (
252
+ object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
253
+ compression, mime_type, encoding, storage_path, created_at
254
+ ) VALUES (?, 'blake3', ?, ?, ?, ?, ?, ?, ?, ?)`
255
+ );
256
+ const now = (/* @__PURE__ */ new Date()).toISOString();
257
+ for (const p of toWrite) {
258
+ insertObject.run(
259
+ p.staged.objectId,
260
+ p.staged.hash,
261
+ p.staged.bytes.byteLength,
262
+ p.compression === "zstd" ? p.compressedBytes.byteLength : null,
263
+ p.compression,
264
+ p.staged.mimeType,
265
+ p.staged.encoding,
266
+ p.storagePath,
267
+ now
268
+ );
269
+ }
270
+ }
271
+ function queryExistingObjectIds(bundle, ids) {
272
+ const found = /* @__PURE__ */ new Set();
273
+ if (ids.length === 0) return found;
274
+ const CHUNK = 500;
275
+ for (let start = 0; start < ids.length; start += CHUNK) {
276
+ const slice = ids.slice(start, start + CHUNK);
277
+ const placeholders = slice.map(() => "?").join(",");
278
+ const rows = bundle.db.prepare(
279
+ `SELECT object_id FROM objects WHERE object_id IN (${placeholders})`
280
+ ).all(...slice);
281
+ for (const row of rows) found.add(row.object_id);
282
+ }
283
+ return found;
284
+ }
285
+ async function writeFilesParallel(tasks) {
286
+ let cursor = 0;
287
+ const workers = [];
288
+ const limit = Math.min(FS_WRITE_CONCURRENCY, tasks.length);
289
+ for (let w = 0; w < limit; w++) {
290
+ workers.push(
291
+ (async () => {
292
+ while (true) {
293
+ const i = cursor++;
294
+ if (i >= tasks.length) return;
295
+ const task = tasks[i];
296
+ await ensureDir(path2.dirname(task.absolutePath));
297
+ await writeFile2(task.absolutePath, task.compressedBytes);
298
+ }
299
+ })()
300
+ );
301
+ }
302
+ await Promise.all(workers);
303
+ }
304
+ var ensuredDirs, FS_WRITE_CONCURRENCY;
188
305
  var init_cas = __esm({
189
306
  "src/core/cas/index.ts"() {
190
307
  "use strict";
191
308
  init_db();
192
309
  init_compress();
193
310
  init_hash();
311
+ ensuredDirs = /* @__PURE__ */ new Set();
312
+ FS_WRITE_CONCURRENCY = 16;
194
313
  }
195
314
  });
196
315
 
197
316
  // src/services/indexing.ts
198
- import { mkdir as mkdir4, rm, writeFile as writeFile4 } from "fs/promises";
199
- import path12 from "path";
317
+ import { mkdir as mkdir4, rm as rm2, writeFile as writeFile5 } from "fs/promises";
318
+ import path13 from "path";
200
319
  function enableFts5Triggers(bundle) {
201
320
  bundle.db.exec(FTS5_TRIGGER_SQL);
202
321
  }
@@ -291,7 +410,7 @@ async function rebuildTantivyIndex(bundle) {
291
410
  try {
292
411
  const tantivy = await import("@oxdev03/node-tantivy-binding");
293
412
  const schema = new tantivy.SchemaBuilder().addTextField("doc_id", { stored: true, tokenizerName: "raw" }).addTextField("entity_type", { stored: true, tokenizerName: "raw" }).addTextField("entity_id", { stored: true, tokenizerName: "raw" }).addTextField("session_id", { stored: true, tokenizerName: "raw" }).addTextField("project_id", { stored: true, tokenizerName: "raw" }).addTextField("timestamp", { stored: true, tokenizerName: "raw" }).addTextField("role", { stored: true, tokenizerName: "raw" }).addTextField("tool_name", { stored: true, tokenizerName: "raw" }).addTextField("canonical_tool_type", { stored: true, tokenizerName: "raw" }).addTextField("field_kind", { stored: true, tokenizerName: "raw" }).addTextField("text", { stored: true }).build();
294
- await rm(bundle.paths.tantivy, { recursive: true, force: true });
413
+ await rm2(bundle.paths.tantivy, { recursive: true, force: true });
295
414
  await mkdir4(bundle.paths.tantivy, { recursive: true });
296
415
  const index = new tantivy.Index(schema, bundle.paths.tantivy, false);
297
416
  const writer = index.writer(5e7, 1);
@@ -320,8 +439,8 @@ async function rebuildTantivyIndex(bundle) {
320
439
  }
321
440
  writer.commit();
322
441
  index.reload();
323
- await writeFile4(
324
- path12.join(bundle.paths.tantivy, "prosa-index.json"),
442
+ await writeFile5(
443
+ path13.join(bundle.paths.tantivy, "prosa-index.json"),
325
444
  `${JSON.stringify(
326
445
  {
327
446
  engine: "tantivy",
@@ -1001,7 +1120,6 @@ var PROSA_PARSER_VERSION = "0.1.0";
1001
1120
  var PROSA_SCHEMA_VERSION = 2;
1002
1121
 
1003
1122
  // src/cli/commands/compile.ts
1004
- import path13 from "path";
1005
1123
  import { Command } from "commander";
1006
1124
 
1007
1125
  // src/core/bundle.ts
@@ -1507,6 +1625,10 @@ function closeBundle(bundle) {
1507
1625
  closeDb(bundle.db);
1508
1626
  }
1509
1627
 
1628
+ // src/services/compile.ts
1629
+ import os2 from "os";
1630
+ import path14 from "path";
1631
+
1510
1632
  // src/importers/claude/index.ts
1511
1633
  init_cas();
1512
1634
  init_db();
@@ -1627,8 +1749,9 @@ async function recordError(bundle, batchId, args) {
1627
1749
  // src/core/ingest/idempotency.ts
1628
1750
  init_compress();
1629
1751
  init_hash();
1752
+ init_cas();
1630
1753
  init_db();
1631
- import { access as access2, mkdir as mkdir3, readFile as readFile3, stat as stat2, writeFile as writeFile3 } from "fs/promises";
1754
+ import { access as access2, readFile as readFile3, stat as stat2, writeFile as writeFile3 } from "fs/promises";
1632
1755
  import path3 from "path";
1633
1756
  async function registerSourceFile(bundle, args) {
1634
1757
  const st = await stat2(args.absolutePath);
@@ -1714,7 +1837,7 @@ async function preserveRawSourceBytes(bundle, bytes) {
1714
1837
  const { bytes: stored, compression } = compressBytes(bytes);
1715
1838
  const storagePath = rawSourceStoragePath(hash, compression);
1716
1839
  const absolutePath = path3.join(bundle.path, storagePath);
1717
- await mkdir3(path3.dirname(absolutePath), { recursive: true });
1840
+ await ensureDir(path3.dirname(absolutePath));
1718
1841
  if (!await fileExists(absolutePath)) {
1719
1842
  await writeFile3(absolutePath, stored);
1720
1843
  }
@@ -1814,17 +1937,34 @@ async function readdirSafe(dir) {
1814
1937
 
1815
1938
  // src/importers/claude/index.ts
1816
1939
  var PREVIEW_MAX = 4e3;
1817
- async function compileClaude(bundle, root) {
1940
+ async function compileClaude(bundle, root, options = {}) {
1941
+ const logger = options.logger;
1818
1942
  const batch = startBatch(bundle, "claude", [root]);
1819
1943
  const counts = emptyCounts();
1944
+ logger?.info({ batch_id: batch.batch_id, root }, "claude batch started");
1820
1945
  try {
1821
1946
  for await (const file of discoverClaudeFiles(root)) {
1822
1947
  counts.source_files_seen++;
1948
+ logger?.debug(
1949
+ {
1950
+ path: file.filePath,
1951
+ project_slug: file.projectSlug,
1952
+ is_subagent: file.isSubagent
1953
+ },
1954
+ "claude source file discovered"
1955
+ );
1823
1956
  try {
1824
- const fc = await compileClaudeFile(bundle, batch, file);
1957
+ const fc = await compileClaudeFile(bundle, batch, file, logger);
1825
1958
  addCounts(counts, fc);
1826
1959
  } catch (error) {
1827
1960
  counts.errors++;
1961
+ logger?.warn(
1962
+ {
1963
+ err: error,
1964
+ path: file.filePath
1965
+ },
1966
+ "claude source file failed"
1967
+ );
1828
1968
  await recordError(bundle, batch.batch_id, {
1829
1969
  kind: "claude_file_failed",
1830
1970
  message: error instanceof Error ? error.message : String(error),
@@ -1833,9 +1973,12 @@ async function compileClaude(bundle, root) {
1833
1973
  }
1834
1974
  }
1835
1975
  linkSubagentParents(bundle);
1976
+ logger?.debug({ batch_id: batch.batch_id }, "claude subagent parent links refreshed");
1836
1977
  finishBatch(bundle, batch, counts, "completed");
1978
+ logger?.info({ batch_id: batch.batch_id, counts }, "claude batch completed");
1837
1979
  } catch (error) {
1838
1980
  finishBatch(bundle, batch, counts, "failed");
1981
+ logger?.error({ err: error, batch_id: batch.batch_id, counts }, "claude batch failed");
1839
1982
  throw error;
1840
1983
  }
1841
1984
  return { batch, counts };
@@ -1889,7 +2032,7 @@ function addCounts(target, source) {
1889
2032
  target.edges += source.edges;
1890
2033
  target.errors += source.errors;
1891
2034
  }
1892
- async function compileClaudeFile(bundle, batch, file) {
2035
+ async function compileClaudeFile(bundle, batch, file, logger) {
1893
2036
  const counts = emptyFileCounts();
1894
2037
  const { row: sourceFile, alreadyKnown } = await registerSourceFile(bundle, {
1895
2038
  sourceTool: "claude",
@@ -1899,9 +2042,17 @@ async function compileClaudeFile(bundle, batch, file) {
1899
2042
  });
1900
2043
  if (alreadyKnown) {
1901
2044
  counts.source_files_skipped = 1;
2045
+ logger?.debug(
2046
+ { path: file.filePath, source_file_id: sourceFile.source_file_id },
2047
+ "claude source file skipped"
2048
+ );
1902
2049
  return counts;
1903
2050
  }
1904
2051
  counts.source_files_imported = 1;
2052
+ logger?.debug(
2053
+ { path: file.filePath, source_file_id: sourceFile.source_file_id },
2054
+ "claude source file registered"
2055
+ );
1905
2056
  const text = await readFile4(file.filePath, "utf8");
1906
2057
  const rawLines = text.split("\n");
1907
2058
  const lines = rawLines[rawLines.length - 1] === "" ? rawLines.slice(0, -1) : rawLines;
@@ -1918,7 +2069,8 @@ async function compileClaudeFile(bundle, batch, file) {
1918
2069
  artifacts: [],
1919
2070
  edges: [],
1920
2071
  searchDocs: [],
1921
- uuidToMessageId: /* @__PURE__ */ new Map()
2072
+ uuidToMessageId: /* @__PURE__ */ new Map(),
2073
+ objects: createPendingObjects()
1922
2074
  };
1923
2075
  let modelFirst = null;
1924
2076
  let modelLast = null;
@@ -1933,7 +2085,7 @@ async function compileClaudeFile(bundle, batch, file) {
1933
2085
  const lineNo = i + 1;
1934
2086
  const ordinal = i;
1935
2087
  const lineBytes = Buffer.from(line, "utf8");
1936
- const rawObjectId = await putBytes(bundle, lineBytes, {
2088
+ const rawObjectId = stageBytes(pending.objects, lineBytes, {
1937
2089
  mimeType: "application/jsonl-line",
1938
2090
  encoding: "utf-8"
1939
2091
  });
@@ -1944,7 +2096,7 @@ async function compileClaudeFile(bundle, batch, file) {
1944
2096
  } catch {
1945
2097
  parserStatus = "failed";
1946
2098
  }
1947
- const decodedObjectId = parsed != null && parserStatus === "ok" ? await putJson(bundle, parsed) : null;
2099
+ const decodedObjectId = null;
1948
2100
  const nativeId = parsed?.uuid ?? null;
1949
2101
  const rawRecordId2 = rawRecordId(sourceFile.source_file_id, ordinal, rawObjectId);
1950
2102
  pending.rawRecords.push({
@@ -2042,7 +2194,7 @@ async function compileClaudeFile(bundle, batch, file) {
2042
2194
  raw_record_id: rawRecordId2
2043
2195
  });
2044
2196
  if (content.length > PREVIEW_MAX) {
2045
- const fullId = await putText(bundle, content);
2197
+ const fullId = stageText(pending.objects, content);
2046
2198
  const last = pending.blocks[pending.blocks.length - 1];
2047
2199
  if (last) last.text_object_id = fullId;
2048
2200
  }
@@ -2182,6 +2334,7 @@ async function compileClaudeFile(bundle, batch, file) {
2182
2334
  pending.session.git_branch_initial ??= branchInitial;
2183
2335
  }
2184
2336
  buildSearchDocs(pending);
2337
+ await flushPendingObjects(bundle, pending.objects);
2185
2338
  transactional(bundle.db, () => {
2186
2339
  flushPending(bundle, pending, { modelFirst, modelLast });
2187
2340
  });
@@ -2194,6 +2347,10 @@ async function compileClaudeFile(bundle, batch, file) {
2194
2347
  counts.tool_results = pending.toolResults.length;
2195
2348
  counts.artifacts = pending.artifacts.length;
2196
2349
  counts.edges = pending.edges.length;
2350
+ logger?.debug(
2351
+ { path: file.filePath, source_file_id: sourceFile.source_file_id, counts },
2352
+ "claude source file imported"
2353
+ );
2197
2354
  return counts;
2198
2355
  }
2199
2356
  function createSessionFromFirstRecord(file, parsed, meta, ts, rawRecordId2) {
@@ -2241,7 +2398,7 @@ async function processContentBlock(bundle, sessionId2, messageId2, eventId2, blo
2241
2398
  event_id: null,
2242
2399
  ordinal: blockOrdinal,
2243
2400
  block_type: "text",
2244
- text_object_id: text.length > PREVIEW_MAX ? await putText(bundle, text) : null,
2401
+ text_object_id: text.length > PREVIEW_MAX ? stageText(pending.objects, text) : null,
2245
2402
  text_inline: text.slice(0, PREVIEW_MAX),
2246
2403
  is_error: 0,
2247
2404
  visibility: "default",
@@ -2257,7 +2414,7 @@ async function processContentBlock(bundle, sessionId2, messageId2, eventId2, blo
2257
2414
  event_id: null,
2258
2415
  ordinal: blockOrdinal,
2259
2416
  block_type: "thinking",
2260
- text_object_id: text.length > PREVIEW_MAX ? await putText(bundle, text) : null,
2417
+ text_object_id: text.length > PREVIEW_MAX ? stageText(pending.objects, text) : null,
2261
2418
  text_inline: text.slice(0, PREVIEW_MAX),
2262
2419
  is_error: 0,
2263
2420
  visibility: "hidden_by_default",
@@ -2269,7 +2426,7 @@ async function processContentBlock(bundle, sessionId2, messageId2, eventId2, blo
2269
2426
  const tu = block;
2270
2427
  const sourceCallId = tu.id ?? `${blockOrdinal}`;
2271
2428
  const toolName = tu.name ?? "unknown";
2272
- const argsId = tu.input != null ? await putJson(bundle, tu.input) : null;
2429
+ const argsId = tu.input != null ? stageJson(pending.objects, tu.input) : null;
2273
2430
  const command = inferCommandFromArgs(toolName, tu.input);
2274
2431
  const filePath = inferPathFromArgs(tu.input);
2275
2432
  const tcId = toolCallId(sessionId2, sourceCallId);
@@ -2310,13 +2467,14 @@ async function processContentBlock(bundle, sessionId2, messageId2, eventId2, blo
2310
2467
  const sourceCallId = tr.tool_use_id ?? null;
2311
2468
  const isError = tr.is_error === true ? 1 : 0;
2312
2469
  const text = stringifyOrNull(tr.content) ?? "";
2470
+ const overflowId = text.length > PREVIEW_MAX ? stageText(pending.objects, text) : null;
2313
2471
  pending.blocks.push({
2314
2472
  block_id: blkId,
2315
2473
  message_id: messageId2,
2316
2474
  event_id: null,
2317
2475
  ordinal: blockOrdinal,
2318
2476
  block_type: "tool_result",
2319
- text_object_id: text.length > PREVIEW_MAX ? await putText(bundle, text) : null,
2477
+ text_object_id: overflowId,
2320
2478
  text_inline: text.slice(0, PREVIEW_MAX),
2321
2479
  is_error: isError,
2322
2480
  visibility: "default",
@@ -2335,7 +2493,7 @@ async function processContentBlock(bundle, sessionId2, messageId2, eventId2, blo
2335
2493
  duration_ms: null,
2336
2494
  stdout_object_id: null,
2337
2495
  stderr_object_id: null,
2338
- output_object_id: text.length > PREVIEW_MAX ? await putText(bundle, text) : null,
2496
+ output_object_id: overflowId,
2339
2497
  preview: text.slice(0, PREVIEW_MAX),
2340
2498
  raw_record_id: rawRecordId2
2341
2499
  });
@@ -2750,17 +2908,27 @@ async function* walk(dir) {
2750
2908
 
2751
2909
  // src/importers/codex/index.ts
2752
2910
  var PREVIEW_MAX2 = 4e3;
2753
- async function compileCodex(bundle, root) {
2911
+ async function compileCodex(bundle, root, options = {}) {
2912
+ const logger = options.logger;
2754
2913
  const batch = startBatch(bundle, "codex", [root]);
2755
2914
  const counts = emptyCounts();
2915
+ logger?.info({ batch_id: batch.batch_id, root }, "codex batch started");
2756
2916
  try {
2757
2917
  for await (const filePath of discoverCodexSessions(root)) {
2758
2918
  counts.source_files_seen++;
2919
+ logger?.debug({ path: filePath }, "codex source file discovered");
2759
2920
  try {
2760
- const fileCounts = await compileCodexFile(bundle, batch, filePath);
2921
+ const fileCounts = await compileCodexFile(bundle, batch, filePath, logger);
2761
2922
  addCounts2(counts, fileCounts);
2762
2923
  } catch (error) {
2763
2924
  counts.errors++;
2925
+ logger?.warn(
2926
+ {
2927
+ err: error,
2928
+ path: filePath
2929
+ },
2930
+ "codex source file failed"
2931
+ );
2764
2932
  await recordError(bundle, batch.batch_id, {
2765
2933
  kind: "codex_file_failed",
2766
2934
  message: error instanceof Error ? error.message : String(error),
@@ -2769,9 +2937,12 @@ async function compileCodex(bundle, root) {
2769
2937
  }
2770
2938
  }
2771
2939
  linkSubagentParents2(bundle);
2940
+ logger?.debug({ batch_id: batch.batch_id }, "codex subagent parent links refreshed");
2772
2941
  finishBatch(bundle, batch, counts, "completed");
2942
+ logger?.info({ batch_id: batch.batch_id, counts }, "codex batch completed");
2773
2943
  } catch (error) {
2774
2944
  finishBatch(bundle, batch, counts, "failed");
2945
+ logger?.error({ err: error, batch_id: batch.batch_id, counts }, "codex batch failed");
2775
2946
  throw error;
2776
2947
  }
2777
2948
  return { batch, counts };
@@ -2824,7 +2995,7 @@ function addCounts2(target, source) {
2824
2995
  target.edges += source.edges;
2825
2996
  target.errors += source.errors;
2826
2997
  }
2827
- async function compileCodexFile(bundle, batch, filePath) {
2998
+ async function compileCodexFile(bundle, batch, filePath, logger) {
2828
2999
  const counts = emptyFileCounts2();
2829
3000
  const { row: sourceFileRow, alreadyKnown } = await registerSourceFile(bundle, {
2830
3001
  sourceTool: "codex",
@@ -2833,9 +3004,17 @@ async function compileCodexFile(bundle, batch, filePath) {
2833
3004
  });
2834
3005
  if (alreadyKnown) {
2835
3006
  counts.source_files_skipped = 1;
3007
+ logger?.debug(
3008
+ { path: filePath, source_file_id: sourceFileRow.source_file_id },
3009
+ "codex source file skipped"
3010
+ );
2836
3011
  return counts;
2837
3012
  }
2838
3013
  counts.source_files_imported = 1;
3014
+ logger?.debug(
3015
+ { path: filePath, source_file_id: sourceFileRow.source_file_id },
3016
+ "codex source file registered"
3017
+ );
2839
3018
  const text = await readFile5(filePath, "utf8");
2840
3019
  const rawLines = text.split("\n");
2841
3020
  const lines = rawLines[rawLines.length - 1] === "" ? rawLines.slice(0, -1) : rawLines;
@@ -2852,7 +3031,8 @@ async function compileCodexFile(bundle, batch, filePath) {
2852
3031
  toolResults: [],
2853
3032
  artifacts: [],
2854
3033
  edges: [],
2855
- searchDocs: []
3034
+ searchDocs: [],
3035
+ objects: createPendingObjects()
2856
3036
  };
2857
3037
  let sessionStartTs = null;
2858
3038
  let sessionEndTs = null;
@@ -2866,7 +3046,7 @@ async function compileCodexFile(bundle, batch, filePath) {
2866
3046
  const lineNo = i + 1;
2867
3047
  const ordinal = i;
2868
3048
  const lineBytes = Buffer.from(line, "utf8");
2869
- const rawObjectId = await putBytes(bundle, lineBytes, {
3049
+ const rawObjectId = stageBytes(pending.objects, lineBytes, {
2870
3050
  mimeType: "application/jsonl-line",
2871
3051
  encoding: "utf-8"
2872
3052
  });
@@ -2877,7 +3057,7 @@ async function compileCodexFile(bundle, batch, filePath) {
2877
3057
  } catch {
2878
3058
  parserStatus = "failed";
2879
3059
  }
2880
- const decodedObjectId = parsed != null && parserStatus === "ok" ? await putJson(bundle, parsed) : null;
3060
+ const decodedObjectId = null;
2881
3061
  const nativeId = parsed ? extractNativeId(parsed) : null;
2882
3062
  const rawRecordId2 = rawRecordId(sourceFileRow.source_file_id, ordinal, rawObjectId);
2883
3063
  pending.rawRecords.push({
@@ -3046,6 +3226,7 @@ async function compileCodexFile(bundle, batch, filePath) {
3046
3226
  pending.session.start_ts ??= sessionStartTs;
3047
3227
  }
3048
3228
  buildSearchDocs2(pending);
3229
+ await flushPendingObjects(bundle, pending.objects);
3049
3230
  transactional(bundle.db, () => {
3050
3231
  flushPending2(bundle, pending, {
3051
3232
  sessionEndTs,
@@ -3064,6 +3245,10 @@ async function compileCodexFile(bundle, batch, filePath) {
3064
3245
  counts.tool_results = pending.toolResults.length;
3065
3246
  counts.artifacts = pending.artifacts.length;
3066
3247
  counts.edges = pending.edges.length;
3248
+ logger?.debug(
3249
+ { path: filePath, source_file_id: sourceFileRow.source_file_id, counts },
3250
+ "codex source file imported"
3251
+ );
3067
3252
  return counts;
3068
3253
  }
3069
3254
  function handleResponseItem(_bundle, sessionId2, currentTurnId, rawRecordId2, ordinal, ts, ri, payloadObjectId, nextMsgOrdinal, currentModel, pending) {
@@ -3223,8 +3408,8 @@ async function handleEventMsg(bundle, sessionId2, currentTurnId, rawRecordId2, o
3223
3408
  const subtype = em.type ?? "unknown";
3224
3409
  if (subtype === "exec_command_end") {
3225
3410
  const sourceCallId = em.call_id ?? null;
3226
- const stdoutId = em.stdout ? await putText(bundle, em.stdout, { mimeType: "text/plain" }) : null;
3227
- const stderrId = em.stderr ? await putText(bundle, em.stderr, { mimeType: "text/plain" }) : null;
3411
+ const stdoutId = em.stdout ? stageText(pending.objects, em.stdout, { mimeType: "text/plain" }) : null;
3412
+ const stderrId = em.stderr ? stageText(pending.objects, em.stderr, { mimeType: "text/plain" }) : null;
3228
3413
  const preview = (em.formatted_output ?? em.aggregated_output ?? em.stdout ?? "").slice(
3229
3414
  0,
3230
3415
  PREVIEW_MAX2
@@ -3871,17 +4056,34 @@ async function readdirSafe2(dir) {
3871
4056
 
3872
4057
  // src/importers/cursor/index.ts
3873
4058
  var PREVIEW_MAX3 = 4e3;
3874
- async function compileCursor(bundle, root) {
4059
+ async function compileCursor(bundle, root, options = {}) {
4060
+ const logger = options.logger;
3875
4061
  const batch = startBatch(bundle, "cursor", [root]);
3876
4062
  const counts = emptyCounts();
4063
+ logger?.info({ batch_id: batch.batch_id, root }, "cursor batch started");
3877
4064
  try {
3878
4065
  for await (const store of discoverCursorStores(root)) {
3879
4066
  counts.source_files_seen++;
4067
+ logger?.debug(
4068
+ {
4069
+ path: store.filePath,
4070
+ workspace_id: store.workspaceId,
4071
+ agent_id: store.agentId
4072
+ },
4073
+ "cursor store discovered"
4074
+ );
3880
4075
  try {
3881
- const fc = await compileCursorStore(bundle, batch, store);
4076
+ const fc = await compileCursorStore(bundle, batch, store, logger);
3882
4077
  addCounts3(counts, fc);
3883
4078
  } catch (error) {
3884
4079
  counts.errors++;
4080
+ logger?.warn(
4081
+ {
4082
+ err: error,
4083
+ path: store.filePath
4084
+ },
4085
+ "cursor store failed"
4086
+ );
3885
4087
  await recordError(bundle, batch.batch_id, {
3886
4088
  kind: "cursor_store_failed",
3887
4089
  message: error instanceof Error ? error.message : String(error),
@@ -3890,8 +4092,10 @@ async function compileCursor(bundle, root) {
3890
4092
  }
3891
4093
  }
3892
4094
  finishBatch(bundle, batch, counts, "completed");
4095
+ logger?.info({ batch_id: batch.batch_id, counts }, "cursor batch completed");
3893
4096
  } catch (error) {
3894
4097
  finishBatch(bundle, batch, counts, "failed");
4098
+ logger?.error({ err: error, batch_id: batch.batch_id, counts }, "cursor batch failed");
3895
4099
  throw error;
3896
4100
  }
3897
4101
  return { batch, counts };
@@ -3926,7 +4130,7 @@ function addCounts3(target, source) {
3926
4130
  target.edges += source.edges;
3927
4131
  target.errors += source.errors;
3928
4132
  }
3929
- async function compileCursorStore(bundle, batch, store) {
4133
+ async function compileCursorStore(bundle, batch, store, logger) {
3930
4134
  const counts = emptyFileCounts3();
3931
4135
  const { row: sourceFile, alreadyKnown } = await registerSourceFile(bundle, {
3932
4136
  sourceTool: "cursor",
@@ -3936,9 +4140,17 @@ async function compileCursorStore(bundle, batch, store) {
3936
4140
  });
3937
4141
  if (alreadyKnown) {
3938
4142
  counts.source_files_skipped = 1;
4143
+ logger?.debug(
4144
+ { path: store.filePath, source_file_id: sourceFile.source_file_id },
4145
+ "cursor store skipped"
4146
+ );
3939
4147
  return counts;
3940
4148
  }
3941
4149
  counts.source_files_imported = 1;
4150
+ logger?.debug(
4151
+ { path: store.filePath, source_file_id: sourceFile.source_file_id },
4152
+ "cursor store registered"
4153
+ );
3942
4154
  const cdb = new Database2(store.filePath, { readonly: true, fileMustExist: true });
3943
4155
  try {
3944
4156
  const pending = {
@@ -3951,7 +4163,8 @@ async function compileCursorStore(bundle, batch, store) {
3951
4163
  toolCallsList: [],
3952
4164
  toolResults: [],
3953
4165
  artifacts: [],
3954
- searchDocs: []
4166
+ searchDocs: [],
4167
+ objects: createPendingObjects()
3955
4168
  };
3956
4169
  const metaRow = cdb.prepare(`SELECT value FROM meta WHERE key='0'`).get();
3957
4170
  let meta = {};
@@ -3963,7 +4176,7 @@ async function compileCursorStore(bundle, batch, store) {
3963
4176
  } catch {
3964
4177
  meta = {};
3965
4178
  }
3966
- const metaObjId = await putBytes(bundle, Buffer.from(metaText, "utf8"), {
4179
+ const metaObjId = stageBytes(pending.objects, Buffer.from(metaText, "utf8"), {
3967
4180
  mimeType: "application/json",
3968
4181
  encoding: "utf-8"
3969
4182
  });
@@ -4002,7 +4215,7 @@ async function compileCursorStore(bundle, batch, store) {
4002
4215
  const blob = blobs[i];
4003
4216
  if (!blob) continue;
4004
4217
  const ordinal = i + 1;
4005
- const blobObjectId = await putBytes(bundle, blob.data);
4218
+ const blobObjectId = stageBytes(pending.objects, blob.data);
4006
4219
  const blobRawId = rawRecordId(sourceFile.source_file_id, ordinal, blobObjectId);
4007
4220
  let parsed = null;
4008
4221
  const firstByte = blob.data[0];
@@ -4022,7 +4235,7 @@ async function compileCursorStore(bundle, batch, store) {
4022
4235
  json_pointer: `blobs/${blob.id}`,
4023
4236
  native_id: blob.id,
4024
4237
  raw_object_id: blobObjectId,
4025
- decoded_json_object_id: parsed != null ? await putJson(bundle, parsed) : null,
4238
+ decoded_json_object_id: parsed != null ? stageJson(pending.objects, parsed) : null,
4026
4239
  parser_status: parsed != null ? "ok" : looksJson ? "failed" : "partial",
4027
4240
  confidence: "low",
4028
4241
  // timeline order from blob list isn't canonical
@@ -4077,6 +4290,7 @@ async function compileCursorStore(bundle, batch, store) {
4077
4290
  }
4078
4291
  }
4079
4292
  buildSearchDocs3(pending);
4293
+ await flushPendingObjects(bundle, pending.objects);
4080
4294
  transactional(bundle.db, () => {
4081
4295
  flushPending3(bundle, pending);
4082
4296
  });
@@ -4088,6 +4302,10 @@ async function compileCursorStore(bundle, batch, store) {
4088
4302
  counts.tool_calls = pending.toolCallsList.length;
4089
4303
  counts.tool_results = pending.toolResults.length;
4090
4304
  counts.artifacts = pending.artifacts.length;
4305
+ logger?.debug(
4306
+ { path: store.filePath, source_file_id: sourceFile.source_file_id, counts },
4307
+ "cursor store imported"
4308
+ );
4091
4309
  return counts;
4092
4310
  } finally {
4093
4311
  cdb.close();
@@ -4112,7 +4330,7 @@ function mapRole(role) {
4112
4330
  }
4113
4331
  async function pushTextBlock(bundle, pending, messageId2, ordinal, blockType, text, rawRecordId2, visibility = "default") {
4114
4332
  if (!text) return;
4115
- const overflow = text.length > PREVIEW_MAX3 ? await putText(bundle, text) : null;
4333
+ const overflow = text.length > PREVIEW_MAX3 ? stageText(pending.objects, text) : null;
4116
4334
  pending.blocks.push({
4117
4335
  block_id: blockId(messageId2, ordinal),
4118
4336
  message_id: messageId2,
@@ -4163,7 +4381,7 @@ async function processContentItem(bundle, sessionId2, messageId2, eventId2, ordi
4163
4381
  if (t === "tool-call") {
4164
4382
  const sourceCallId = item.toolCallId ?? `${ordinal}`;
4165
4383
  const toolName = item.toolName ?? "unknown";
4166
- const argsObjectId = item.args != null ? await putJson(bundle, item.args) : null;
4384
+ const argsObjectId = item.args != null ? stageJson(pending.objects, item.args) : null;
4167
4385
  const tcId = toolCallId(sessionId2, sourceCallId);
4168
4386
  pending.blocks.push({
4169
4387
  block_id: blockId(messageId2, ordinal),
@@ -4200,7 +4418,7 @@ async function processContentItem(bundle, sessionId2, messageId2, eventId2, ordi
4200
4418
  if (t === "tool-result") {
4201
4419
  const sourceCallId = item.toolCallId ?? `${ordinal}`;
4202
4420
  const text = stringifyOrNull3(item.result) ?? "";
4203
- const overflow = text.length > PREVIEW_MAX3 ? await putText(bundle, text) : null;
4421
+ const overflow = text.length > PREVIEW_MAX3 ? stageText(pending.objects, text) : null;
4204
4422
  const isError = readIsError(item) ? 1 : 0;
4205
4423
  pending.blocks.push({
4206
4424
  block_id: blockId(messageId2, ordinal),
@@ -4564,17 +4782,34 @@ async function readdirSafe3(dir) {
4564
4782
 
4565
4783
  // src/importers/gemini/index.ts
4566
4784
  var PREVIEW_MAX4 = 4e3;
4567
- async function compileGemini(bundle, root) {
4785
+ async function compileGemini(bundle, root, options = {}) {
4786
+ const logger = options.logger;
4568
4787
  const batch = startBatch(bundle, "gemini", [root]);
4569
4788
  const counts = emptyCounts();
4789
+ logger?.info({ batch_id: batch.batch_id, root }, "gemini batch started");
4570
4790
  try {
4571
4791
  for await (const file of discoverGeminiChats(root)) {
4572
4792
  counts.source_files_seen++;
4793
+ logger?.debug(
4794
+ {
4795
+ path: file.filePath,
4796
+ project_dir: file.projectDir,
4797
+ project_root: file.projectRoot
4798
+ },
4799
+ "gemini source file discovered"
4800
+ );
4573
4801
  try {
4574
- const fc = await compileGeminiFile(bundle, batch, file);
4802
+ const fc = await compileGeminiFile(bundle, batch, file, logger);
4575
4803
  addCounts4(counts, fc);
4576
4804
  } catch (error) {
4577
4805
  counts.errors++;
4806
+ logger?.warn(
4807
+ {
4808
+ err: error,
4809
+ path: file.filePath
4810
+ },
4811
+ "gemini source file failed"
4812
+ );
4578
4813
  await recordError(bundle, batch.batch_id, {
4579
4814
  kind: "gemini_file_failed",
4580
4815
  message: error instanceof Error ? error.message : String(error),
@@ -4583,8 +4818,10 @@ async function compileGemini(bundle, root) {
4583
4818
  }
4584
4819
  }
4585
4820
  finishBatch(bundle, batch, counts, "completed");
4821
+ logger?.info({ batch_id: batch.batch_id, counts }, "gemini batch completed");
4586
4822
  } catch (error) {
4587
4823
  finishBatch(bundle, batch, counts, "failed");
4824
+ logger?.error({ err: error, batch_id: batch.batch_id, counts }, "gemini batch failed");
4588
4825
  throw error;
4589
4826
  }
4590
4827
  return { batch, counts };
@@ -4619,7 +4856,7 @@ function addCounts4(target, source) {
4619
4856
  target.edges += source.edges;
4620
4857
  target.errors += source.errors;
4621
4858
  }
4622
- async function compileGeminiFile(bundle, batch, file) {
4859
+ async function compileGeminiFile(bundle, batch, file, logger) {
4623
4860
  const counts = emptyFileCounts4();
4624
4861
  const { row: sourceFile, alreadyKnown } = await registerSourceFile(bundle, {
4625
4862
  sourceTool: "gemini",
@@ -4629,12 +4866,21 @@ async function compileGeminiFile(bundle, batch, file) {
4629
4866
  });
4630
4867
  if (alreadyKnown) {
4631
4868
  counts.source_files_skipped = 1;
4869
+ logger?.debug(
4870
+ { path: file.filePath, source_file_id: sourceFile.source_file_id },
4871
+ "gemini source file skipped"
4872
+ );
4632
4873
  return counts;
4633
4874
  }
4634
4875
  counts.source_files_imported = 1;
4876
+ logger?.debug(
4877
+ { path: file.filePath, source_file_id: sourceFile.source_file_id },
4878
+ "gemini source file registered"
4879
+ );
4635
4880
  const text = await readFile7(file.filePath, "utf8");
4636
4881
  const parsed = JSON.parse(text);
4637
- const fileObjectId = await putBytes(bundle, Buffer.from(text, "utf8"), {
4882
+ const objects = createPendingObjects();
4883
+ const fileObjectId = stageBytes(objects, Buffer.from(text, "utf8"), {
4638
4884
  mimeType: "application/json",
4639
4885
  encoding: "utf-8"
4640
4886
  });
@@ -4664,7 +4910,8 @@ async function compileGeminiFile(bundle, batch, file) {
4664
4910
  toolResults: [],
4665
4911
  artifacts: [],
4666
4912
  searchDocs: [],
4667
- project: null
4913
+ project: null,
4914
+ objects
4668
4915
  };
4669
4916
  const sourceSid = parsed.sessionId ?? path11.basename(file.filePath, ".json");
4670
4917
  const sessionPk = sessionId("gemini", sourceSid);
@@ -4701,6 +4948,7 @@ async function compileGeminiFile(bundle, batch, file) {
4701
4948
  );
4702
4949
  }
4703
4950
  buildSearchDocs4(pending);
4951
+ await flushPendingObjects(bundle, pending.objects);
4704
4952
  transactional(bundle.db, () => {
4705
4953
  flushPending4(bundle, pending);
4706
4954
  });
@@ -4712,12 +4960,16 @@ async function compileGeminiFile(bundle, batch, file) {
4712
4960
  counts.tool_calls = pending.toolCallsList.length;
4713
4961
  counts.tool_results = pending.toolResults.length;
4714
4962
  counts.artifacts = pending.artifacts.length;
4963
+ logger?.debug(
4964
+ { path: file.filePath, source_file_id: sourceFile.source_file_id, counts },
4965
+ "gemini source file imported"
4966
+ );
4715
4967
  return counts;
4716
4968
  }
4717
4969
  async function processMessage(bundle, sessionId2, sourceFileId2, index, msg, batchId, pending) {
4718
4970
  const ordinal = index + 1;
4719
4971
  const ts = msg.timestamp ?? null;
4720
- const payloadId = await putJson(bundle, msg);
4972
+ const payloadId = stageJson(pending.objects, msg);
4721
4973
  const pointer = `/messages/${index}`;
4722
4974
  const rawObjectIdInput = sha256Hex(`${pointer}
4723
4975
  ${JSON.stringify(msg)}`);
@@ -4834,7 +5086,7 @@ ${JSON.stringify(msg)}`);
4834
5086
  }
4835
5087
  async function pushTextBlock2(bundle, pending, messageId2, blockOrdinal, blockType, text, rawRecordId2, visibility = "default") {
4836
5088
  if (!text) return;
4837
- const overflowId = text.length > PREVIEW_MAX4 ? await putText(bundle, text) : null;
5089
+ const overflowId = text.length > PREVIEW_MAX4 ? stageText(pending.objects, text) : null;
4838
5090
  pending.blocks.push({
4839
5091
  block_id: blockId(messageId2, blockOrdinal),
4840
5092
  message_id: messageId2,
@@ -4851,7 +5103,7 @@ async function processToolCall(bundle, sessionId2, messageId2, eventId2, index,
4851
5103
  const sourceCallId = tc.id ?? `${messageId2}:${index}`;
4852
5104
  const toolName = tc.name ?? "unknown";
4853
5105
  const toolCallId2 = toolCallId(sessionId2, sourceCallId);
4854
- const argsObjectId = tc.args ? await putJson(bundle, tc.args) : null;
5106
+ const argsObjectId = tc.args ? stageJson(pending.objects, tc.args) : null;
4855
5107
  pending.toolCallsList.push({
4856
5108
  tool_call_id: toolCallId2,
4857
5109
  message_id: messageId2,
@@ -4870,7 +5122,7 @@ async function processToolCall(bundle, sessionId2, messageId2, eventId2, index,
4870
5122
  });
4871
5123
  const isError = tc.status === "error" ? 1 : 0;
4872
5124
  const resultText = renderToolResultText(tc.result);
4873
- const overflowId = resultText.length > PREVIEW_MAX4 ? await putText(bundle, resultText) : null;
5125
+ const overflowId = resultText.length > PREVIEW_MAX4 ? stageText(pending.objects, resultText) : null;
4874
5126
  pending.toolResults.push({
4875
5127
  tool_result_id: toolResultId(sessionId2, sourceCallId),
4876
5128
  tool_call_id: toolCallId2,
@@ -4887,7 +5139,7 @@ async function processToolCall(bundle, sessionId2, messageId2, eventId2, index,
4887
5139
  const rd = tc.resultDisplay;
4888
5140
  if (rd.fileDiff || rd.filePath) {
4889
5141
  const diffText = rd.fileDiff ?? "";
4890
- const diffId = diffText ? await putText(bundle, diffText, { mimeType: "text/x-diff" }) : null;
5142
+ const diffId = diffText ? stageText(pending.objects, diffText, { mimeType: "text/x-diff" }) : null;
4891
5143
  pending.artifacts.push({
4892
5144
  artifact_id: artifactId(sessionId2, "gemini", `${toolCallId2}:diff`),
4893
5145
  kind: "diff",
@@ -5242,59 +5494,372 @@ function flushPending4(bundle, pending) {
5242
5494
  }
5243
5495
  }
5244
5496
 
5245
- // src/cli/commands/compile.ts
5497
+ // src/services/export/parquet.ts
5498
+ import { mkdir as mkdir3, rm, writeFile as writeFile4 } from "fs/promises";
5499
+ import path12 from "path";
5500
+ import { DuckDBConnection } from "@duckdb/node-api";
5501
+ var PARQUET_TABLES = [
5502
+ "objects",
5503
+ "source_files",
5504
+ "import_batches",
5505
+ "raw_records",
5506
+ "import_errors",
5507
+ "uncertainties",
5508
+ "projects",
5509
+ "sessions",
5510
+ "turns",
5511
+ "events",
5512
+ "messages",
5513
+ "content_blocks",
5514
+ "tool_calls",
5515
+ "tool_results",
5516
+ "artifacts",
5517
+ "edges",
5518
+ "search_docs"
5519
+ ];
5520
+ async function exportBundleParquet(options) {
5521
+ const snapshot = await openBundleSnapshot(options.bundlePath);
5522
+ const outDir = path12.resolve(options.outDir ?? snapshot.defaultOutDir);
5523
+ await mkdir3(outDir, { recursive: true });
5524
+ const files = Object.fromEntries(
5525
+ PARQUET_TABLES.map((table) => [table, path12.join(outDir, `${table}.parquet`)])
5526
+ );
5527
+ const manifestPath = path12.join(outDir, "manifest.json");
5528
+ for (const file of [...Object.values(files), manifestPath]) {
5529
+ await rm(file, { force: true });
5530
+ }
5531
+ const connection = await createDuckDbConnection();
5532
+ try {
5533
+ await attachSqlite(connection, snapshot.dbPath);
5534
+ for (const table of PARQUET_TABLES) {
5535
+ await connection.run(
5536
+ `COPY (SELECT * FROM prosa.${quoteIdentifier(table)}) TO ${sqlString(files[table])} (FORMAT parquet)`
5537
+ );
5538
+ }
5539
+ } finally {
5540
+ connection.closeSync();
5541
+ }
5542
+ const manifest = {
5543
+ exported_at: (/* @__PURE__ */ new Date()).toISOString(),
5544
+ source_db: snapshot.dbPath,
5545
+ schema_version: snapshot.schemaVersion,
5546
+ parser_version: snapshot.parserVersion,
5547
+ tables: Object.fromEntries(
5548
+ PARQUET_TABLES.map((table) => [
5549
+ table,
5550
+ {
5551
+ file: path12.basename(files[table]),
5552
+ rows: snapshot.counts[table]
5553
+ }
5554
+ ])
5555
+ )
5556
+ };
5557
+ await writeFile4(manifestPath, `${JSON.stringify(manifest, null, 2)}
5558
+ `, "utf8");
5559
+ return { outDir, manifestPath, files, counts: snapshot.counts };
5560
+ }
5561
+ async function queryDuckDbParquet(options) {
5562
+ const parquetDir = path12.resolve(options.parquetDir);
5563
+ const connection = await createDuckDbConnection();
5564
+ try {
5565
+ for (const table of PARQUET_TABLES) {
5566
+ await connection.run(
5567
+ `CREATE OR REPLACE VIEW ${quoteIdentifier(table)} AS SELECT * FROM read_parquet(${sqlString(
5568
+ path12.join(parquetDir, `${table}.parquet`)
5569
+ )})`
5570
+ );
5571
+ }
5572
+ const reader = await connection.runAndReadAll(options.sql);
5573
+ return {
5574
+ columns: reader.deduplicatedColumnNames(),
5575
+ rows: reader.getRowObjectsJson()
5576
+ };
5577
+ } catch (error) {
5578
+ if (isMissingParquetError(error)) {
5579
+ throw new Error(
5580
+ `Parquet export not found in ${parquetDir}; run \`prosa export parquet --store <path>\` first`
5581
+ );
5582
+ }
5583
+ throw error;
5584
+ } finally {
5585
+ connection.closeSync();
5586
+ }
5587
+ }
5588
+ async function createDuckDbConnection() {
5589
+ return DuckDBConnection.create();
5590
+ }
5591
+ async function attachSqlite(connection, dbPath) {
5592
+ try {
5593
+ await connection.run("INSTALL sqlite");
5594
+ await connection.run("LOAD sqlite");
5595
+ await connection.run(`ATTACH ${sqlString(dbPath)} AS prosa (TYPE sqlite)`);
5596
+ } catch (error) {
5597
+ throw new Error(
5598
+ `DuckDB could not attach prosa.sqlite via the sqlite extension: ${error instanceof Error ? error.message : String(error)}`
5599
+ );
5600
+ }
5601
+ }
5602
+ async function openBundleSnapshot(bundlePath) {
5603
+ const bundle = await openBundle(bundlePath);
5604
+ try {
5605
+ const counts = Object.fromEntries(
5606
+ PARQUET_TABLES.map((table) => {
5607
+ const row = bundle.db.prepare(`SELECT count(*) AS n FROM ${quoteIdentifier(table)}`).get();
5608
+ return [table, row?.n ?? 0];
5609
+ })
5610
+ );
5611
+ return {
5612
+ dbPath: bundle.paths.db,
5613
+ schemaVersion: bundle.manifest.schema_version,
5614
+ parserVersion: bundle.manifest.parser_version,
5615
+ defaultOutDir: bundle.paths.parquet,
5616
+ counts
5617
+ };
5618
+ } finally {
5619
+ closeBundle(bundle);
5620
+ }
5621
+ }
5622
+ function quoteIdentifier(value) {
5623
+ return `"${value.replace(/"/g, '""')}"`;
5624
+ }
5625
+ function sqlString(value) {
5626
+ return `'${value.replace(/'/g, "''")}'`;
5627
+ }
5628
+ function isMissingParquetError(error) {
5629
+ const message = error instanceof Error ? error.message : String(error);
5630
+ return /No files found|does not exist|not found/i.test(message) && /\.parquet/i.test(message);
5631
+ }
5632
+
5633
+ // src/services/compile.ts
5246
5634
  init_indexing();
5247
- function compileCommand() {
5248
- return new Command("compile").description("Import session histories from one or more agent CLIs into the bundle.").option("--codex <path>", "root of Codex CLI sessions (e.g. ~/.codex/sessions)").option("--claude <path>", "root of Claude Code projects (e.g. ~/.claude/projects)").option("--gemini <path>", "root of Gemini CLI tmp dir (e.g. ~/.gemini/tmp)").option("--cursor <path>", "root of Cursor agent stores (e.g. ~/.cursor/chats)").option("--store <path>", "bundle directory", defaultBundlePath()).option("--defer-index", "skip immediate FTS5 updates; run `prosa index fts5` later").action(
5249
- async (options) => {
5250
- if (!options.codex && !options.claude && !options.gemini && !options.cursor) {
5251
- process.stderr.write(
5252
- "no source specified \u2014 pass at least one of --codex / --claude / --gemini / --cursor\n"
5253
- );
5254
- process.exit(2);
5255
- }
5256
- const bundle = await openBundle(path13.resolve(options.store));
5257
- let importedAny = false;
5635
+ var COMPILE_PROVIDERS = [
5636
+ {
5637
+ name: "codex",
5638
+ description: "Import Codex CLI session histories into the bundle.",
5639
+ pathHelp: "root of Codex CLI sessions",
5640
+ defaultSessionsPath: () => path14.join(os2.homedir(), ".codex", "sessions"),
5641
+ compile: compileCodex
5642
+ },
5643
+ {
5644
+ name: "claude",
5645
+ description: "Import Claude Code project histories into the bundle.",
5646
+ pathHelp: "root of Claude Code projects",
5647
+ defaultSessionsPath: () => path14.join(os2.homedir(), ".claude", "projects"),
5648
+ compile: compileClaude
5649
+ },
5650
+ {
5651
+ name: "gemini",
5652
+ description: "Import Gemini CLI session histories into the bundle.",
5653
+ pathHelp: "root of Gemini CLI tmp dir",
5654
+ defaultSessionsPath: () => path14.join(os2.homedir(), ".gemini", "tmp"),
5655
+ compile: compileGemini
5656
+ },
5657
+ {
5658
+ name: "cursor",
5659
+ description: "Import Cursor agent stores into the bundle.",
5660
+ pathHelp: "root of Cursor agent stores",
5661
+ defaultSessionsPath: () => path14.join(os2.homedir(), ".cursor", "chats"),
5662
+ compile: compileCursor
5663
+ }
5664
+ ];
5665
+ function getCompileProvider(source) {
5666
+ const provider = COMPILE_PROVIDERS.find((p) => p.name === source);
5667
+ if (!provider) {
5668
+ throw new Error(`unknown compile source: ${source}`);
5669
+ }
5670
+ return provider;
5671
+ }
5672
+ function resolveCompilePath(p) {
5673
+ if (p === "~") return os2.homedir();
5674
+ if (p.startsWith("~/")) return path14.join(os2.homedir(), p.slice(2));
5675
+ return path14.resolve(p);
5676
+ }
5677
+ async function runCompileImports(options) {
5678
+ const { bundle, providers, deferIndex, logger } = options;
5679
+ let importedAny = false;
5680
+ const summaries = [];
5681
+ let tantivy = null;
5682
+ let tantivyError = null;
5683
+ try {
5684
+ if (deferIndex) {
5685
+ logger?.info("disabling FTS5 triggers for deferred indexing");
5686
+ disableFts5Triggers(bundle);
5687
+ }
5688
+ for (const provider of providers) {
5689
+ const sourcePath = resolveCompilePath(options.sessionsPath ?? provider.defaultSessionsPath());
5690
+ const providerLogger = logger?.child({
5691
+ source_tool: provider.name,
5692
+ source_path: sourcePath
5693
+ });
5694
+ providerLogger?.info("starting compile");
5695
+ const r = await provider.compile(bundle, sourcePath, { logger: providerLogger });
5696
+ importedAny ||= r.counts.source_files_imported > 0;
5697
+ providerLogger?.info(
5698
+ {
5699
+ batch_id: r.batch.batch_id,
5700
+ counts: r.counts
5701
+ },
5702
+ "compile finished"
5703
+ );
5704
+ const summary = {
5705
+ source: provider.name,
5706
+ sourcePath,
5707
+ batchId: r.batch.batch_id,
5708
+ batch: r.batch,
5709
+ counts: r.counts
5710
+ };
5711
+ summaries.push(summary);
5712
+ options.onProviderComplete?.(summary);
5713
+ }
5714
+ logger?.info({ changed: importedAny, fts5_deferred: deferIndex }, "marking indexes");
5715
+ markIndexesAfterImport(bundle, {
5716
+ changed: importedAny,
5717
+ fts5Deferred: deferIndex
5718
+ });
5719
+ if (importedAny) {
5258
5720
  try {
5259
- if (options.deferIndex) {
5260
- disableFts5Triggers(bundle);
5261
- }
5262
- if (options.codex) {
5263
- const r = await compileCodex(bundle, path13.resolve(options.codex));
5264
- importedAny ||= r.counts.source_files_imported > 0;
5265
- printCounts("codex", r.batch.batch_id, r.counts);
5266
- }
5267
- if (options.claude) {
5268
- const r = await compileClaude(bundle, path13.resolve(options.claude));
5269
- importedAny ||= r.counts.source_files_imported > 0;
5270
- printCounts("claude", r.batch.batch_id, r.counts);
5271
- }
5272
- if (options.gemini) {
5273
- const r = await compileGemini(bundle, path13.resolve(options.gemini));
5274
- importedAny ||= r.counts.source_files_imported > 0;
5275
- printCounts("gemini", r.batch.batch_id, r.counts);
5276
- }
5277
- if (options.cursor) {
5278
- const r = await compileCursor(bundle, path13.resolve(options.cursor));
5279
- importedAny ||= r.counts.source_files_imported > 0;
5280
- printCounts("cursor", r.batch.batch_id, r.counts);
5281
- }
5282
- markIndexesAfterImport(bundle, {
5283
- changed: importedAny,
5284
- fts5Deferred: options.deferIndex === true
5285
- });
5286
- } finally {
5287
- if (options.deferIndex) {
5288
- enableFts5Triggers(bundle);
5289
- }
5290
- closeBundle(bundle);
5721
+ logger?.info("rebuilding tantivy index");
5722
+ const status = await rebuildTantivyIndex(bundle);
5723
+ tantivy = { indexedDocCount: status.indexed_doc_count };
5724
+ options.onTantivyComplete?.(tantivy);
5725
+ } catch (error) {
5726
+ tantivyError = error instanceof Error ? error.message : String(error);
5727
+ logger?.error({ err: error }, "tantivy rebuild failed; SQLite data is intact");
5291
5728
  }
5292
5729
  }
5730
+ } finally {
5731
+ if (deferIndex) {
5732
+ logger?.info("re-enabling FTS5 triggers");
5733
+ enableFts5Triggers(bundle);
5734
+ }
5735
+ }
5736
+ return {
5737
+ providers: summaries,
5738
+ importedAny,
5739
+ tantivy,
5740
+ tantivyError
5741
+ };
5742
+ }
5743
+ async function exportCompileParquet(options) {
5744
+ const storePath = resolveCompilePath(options.storePath);
5745
+ options.logger?.info({ store_path: storePath }, "exporting parquet");
5746
+ const result = await exportBundleParquet({ bundlePath: storePath });
5747
+ return {
5748
+ outDir: result.outDir,
5749
+ manifestPath: result.manifestPath,
5750
+ tableCount: Object.keys(result.files).length,
5751
+ files: result.files,
5752
+ counts: result.counts
5753
+ };
5754
+ }
5755
+
5756
+ // src/cli/logger.ts
5757
+ import pino from "pino";
5758
+ import pretty from "pino-pretty";
5759
+ function createCliLogger(options) {
5760
+ const loggerOptions = {
5761
+ base: void 0,
5762
+ level: options.verbose === true ? "debug" : "info"
5763
+ };
5764
+ if (options.jsonLogs === true) {
5765
+ return pino(loggerOptions, pino.destination({ dest: 2, sync: true }));
5766
+ }
5767
+ return pino(
5768
+ loggerOptions,
5769
+ pretty({
5770
+ colorize: process.stderr.isTTY,
5771
+ destination: 2,
5772
+ ignore: "pid,hostname",
5773
+ singleLine: true,
5774
+ sync: true,
5775
+ translateTime: "SYS:yyyy-mm-dd HH:MM:ss.l"
5776
+ })
5777
+ );
5778
+ }
5779
+
5780
+ // src/cli/commands/compile.ts
5781
+ function compileCommand() {
5782
+ const command = addCompileLogOptions(
5783
+ new Command("compile").description(
5784
+ "Import session histories from one agent CLI into the bundle."
5785
+ )
5786
+ );
5787
+ for (const provider of COMPILE_PROVIDERS) {
5788
+ command.addCommand(providerCompileCommand(provider));
5789
+ }
5790
+ command.action(() => {
5791
+ command.help({ error: true });
5792
+ });
5793
+ return command;
5794
+ }
5795
+ function compileAllCommand() {
5796
+ return addCompileLogOptions(new Command("compile-all")).description("Import all agent CLI session histories using default source paths.").option("--defer-index", "skip immediate FTS5 updates; run `prosa index fts5` later").action(async (options) => {
5797
+ await runCompiles({
5798
+ providers: COMPILE_PROVIDERS,
5799
+ storePath: defaultBundlePath(),
5800
+ deferIndex: options.deferIndex === true,
5801
+ logOptions: options
5802
+ });
5803
+ });
5804
+ }
5805
+ function providerCompileCommand(provider) {
5806
+ return addCompileLogOptions(new Command(provider.name)).description(provider.description).option(
5807
+ "--sessions-path <path>",
5808
+ `${provider.pathHelp} (default: ${provider.defaultSessionsPath()})`,
5809
+ provider.defaultSessionsPath()
5810
+ ).option("--store <path>", "bundle directory", defaultBundlePath()).option("--defer-index", "skip immediate FTS5 updates; run `prosa index fts5` later").action(
5811
+ async (options, command) => {
5812
+ await runCompiles({
5813
+ providers: [provider],
5814
+ storePath: options.store,
5815
+ deferIndex: options.deferIndex === true,
5816
+ sessionsPath: options.sessionsPath,
5817
+ logOptions: command.optsWithGlobals()
5818
+ });
5819
+ }
5293
5820
  );
5294
5821
  }
5295
- function printCounts(label, batchId, c) {
5822
+ function addCompileLogOptions(command) {
5823
+ return command.option("--verbose", "emit debug logs during compilation").option("--json-logs", "emit raw newline-delimited JSON logs instead of pretty logs");
5824
+ }
5825
+ async function runCompiles(options) {
5826
+ const logger = createCliLogger(options.logOptions);
5827
+ const storePath = resolveCompilePath(options.storePath);
5828
+ logger.info({ store_path: storePath }, "opening bundle");
5829
+ const bundle = await openBundle(storePath);
5830
+ let importedAny = false;
5831
+ try {
5832
+ const result = await runCompileImports({
5833
+ bundle,
5834
+ providers: options.providers,
5835
+ deferIndex: options.deferIndex,
5836
+ sessionsPath: options.sessionsPath,
5837
+ logger,
5838
+ onProviderComplete: printCounts,
5839
+ onTantivyComplete: (status) => {
5840
+ process.stdout.write(`tantivy: indexed ${status.indexedDocCount} docs
5841
+ `);
5842
+ }
5843
+ });
5844
+ importedAny = result.importedAny;
5845
+ } finally {
5846
+ closeBundle(bundle);
5847
+ logger.info({ store_path: storePath }, "bundle closed");
5848
+ }
5849
+ if (importedAny) {
5850
+ try {
5851
+ const result = await exportCompileParquet({ storePath, logger });
5852
+ process.stdout.write(`parquet: wrote ${result.tableCount} tables to ${result.outDir}
5853
+ `);
5854
+ } catch (error) {
5855
+ logger.error({ err: error }, "parquet export failed; SQLite data is intact");
5856
+ }
5857
+ }
5858
+ }
5859
+ function printCounts(summary) {
5860
+ const c = summary.counts;
5296
5861
  process.stdout.write(
5297
- `${label} import: batch=${batchId}
5862
+ `${summary.source} import: batch=${summary.batchId}
5298
5863
  source_files seen=${c.source_files_seen} imported=${c.source_files_imported} skipped=${c.source_files_skipped}
5299
5864
  sessions=${c.sessions} turns=${c.turns} messages=${c.messages} blocks=${c.content_blocks}
5300
5865
  events=${c.events} tool_calls=${c.tool_calls} tool_results=${c.tool_results}
@@ -5419,142 +5984,6 @@ function renderToolCall(c) {
5419
5984
  return lines.join("\n");
5420
5985
  }
5421
5986
 
5422
- // src/services/export/parquet.ts
5423
- import { mkdir as mkdir5, rm as rm2, writeFile as writeFile5 } from "fs/promises";
5424
- import path14 from "path";
5425
- import { DuckDBConnection } from "@duckdb/node-api";
5426
- var PARQUET_TABLES = [
5427
- "objects",
5428
- "source_files",
5429
- "import_batches",
5430
- "raw_records",
5431
- "import_errors",
5432
- "uncertainties",
5433
- "projects",
5434
- "sessions",
5435
- "turns",
5436
- "events",
5437
- "messages",
5438
- "content_blocks",
5439
- "tool_calls",
5440
- "tool_results",
5441
- "artifacts",
5442
- "edges",
5443
- "search_docs"
5444
- ];
5445
- async function exportBundleParquet(options) {
5446
- const snapshot = await openBundleSnapshot(options.bundlePath);
5447
- const outDir = path14.resolve(options.outDir ?? snapshot.defaultOutDir);
5448
- await mkdir5(outDir, { recursive: true });
5449
- const files = Object.fromEntries(
5450
- PARQUET_TABLES.map((table) => [table, path14.join(outDir, `${table}.parquet`)])
5451
- );
5452
- const manifestPath = path14.join(outDir, "manifest.json");
5453
- for (const file of [...Object.values(files), manifestPath]) {
5454
- await rm2(file, { force: true });
5455
- }
5456
- const connection = await createDuckDbConnection();
5457
- try {
5458
- await attachSqlite(connection, snapshot.dbPath);
5459
- for (const table of PARQUET_TABLES) {
5460
- await connection.run(
5461
- `COPY (SELECT * FROM prosa.${quoteIdentifier(table)}) TO ${sqlString(files[table])} (FORMAT parquet)`
5462
- );
5463
- }
5464
- } finally {
5465
- connection.closeSync();
5466
- }
5467
- const manifest = {
5468
- exported_at: (/* @__PURE__ */ new Date()).toISOString(),
5469
- source_db: snapshot.dbPath,
5470
- schema_version: snapshot.schemaVersion,
5471
- parser_version: snapshot.parserVersion,
5472
- tables: Object.fromEntries(
5473
- PARQUET_TABLES.map((table) => [
5474
- table,
5475
- {
5476
- file: path14.basename(files[table]),
5477
- rows: snapshot.counts[table]
5478
- }
5479
- ])
5480
- )
5481
- };
5482
- await writeFile5(manifestPath, `${JSON.stringify(manifest, null, 2)}
5483
- `, "utf8");
5484
- return { outDir, manifestPath, files, counts: snapshot.counts };
5485
- }
5486
- async function queryDuckDbParquet(options) {
5487
- const parquetDir = path14.resolve(options.parquetDir);
5488
- const connection = await createDuckDbConnection();
5489
- try {
5490
- for (const table of PARQUET_TABLES) {
5491
- await connection.run(
5492
- `CREATE OR REPLACE VIEW ${quoteIdentifier(table)} AS SELECT * FROM read_parquet(${sqlString(
5493
- path14.join(parquetDir, `${table}.parquet`)
5494
- )})`
5495
- );
5496
- }
5497
- const reader = await connection.runAndReadAll(options.sql);
5498
- return {
5499
- columns: reader.deduplicatedColumnNames(),
5500
- rows: reader.getRowObjectsJson()
5501
- };
5502
- } catch (error) {
5503
- if (isMissingParquetError(error)) {
5504
- throw new Error(
5505
- `Parquet export not found in ${parquetDir}; run \`prosa export parquet --store <path>\` first`
5506
- );
5507
- }
5508
- throw error;
5509
- } finally {
5510
- connection.closeSync();
5511
- }
5512
- }
5513
- async function createDuckDbConnection() {
5514
- return DuckDBConnection.create();
5515
- }
5516
- async function attachSqlite(connection, dbPath) {
5517
- try {
5518
- await connection.run("INSTALL sqlite");
5519
- await connection.run("LOAD sqlite");
5520
- await connection.run(`ATTACH ${sqlString(dbPath)} AS prosa (TYPE sqlite)`);
5521
- } catch (error) {
5522
- throw new Error(
5523
- `DuckDB could not attach prosa.sqlite via the sqlite extension: ${error instanceof Error ? error.message : String(error)}`
5524
- );
5525
- }
5526
- }
5527
- async function openBundleSnapshot(bundlePath) {
5528
- const bundle = await openBundle(bundlePath);
5529
- try {
5530
- const counts = Object.fromEntries(
5531
- PARQUET_TABLES.map((table) => {
5532
- const row = bundle.db.prepare(`SELECT count(*) AS n FROM ${quoteIdentifier(table)}`).get();
5533
- return [table, row?.n ?? 0];
5534
- })
5535
- );
5536
- return {
5537
- dbPath: bundle.paths.db,
5538
- schemaVersion: bundle.manifest.schema_version,
5539
- parserVersion: bundle.manifest.parser_version,
5540
- defaultOutDir: bundle.paths.parquet,
5541
- counts
5542
- };
5543
- } finally {
5544
- closeBundle(bundle);
5545
- }
5546
- }
5547
- function quoteIdentifier(value) {
5548
- return `"${value.replace(/"/g, '""')}"`;
5549
- }
5550
- function sqlString(value) {
5551
- return `'${value.replace(/'/g, "''")}'`;
5552
- }
5553
- function isMissingParquetError(error) {
5554
- const message = error instanceof Error ? error.message : String(error);
5555
- return /No files found|does not exist|not found/i.test(message) && /\.parquet/i.test(message);
5556
- }
5557
-
5558
5987
  // src/cli/commands/export.ts
5559
5988
  function exportCommand() {
5560
5989
  const session = new Command2("session").description("Export a single session to a human-readable format.").argument("<session-id>", "prosa session_id").requiredOption("--format <fmt>", 'currently only "markdown" is supported').option("--out <path>", "write to file instead of stdout").option("--store <path>", "bundle directory", defaultBundlePath()).action(async (sessionId2, options) => {
@@ -5757,10 +6186,13 @@ import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/
5757
6186
 
5758
6187
  // src/mcp/guidance.ts
5759
6188
  var PROSA_MCP_INSTRUCTIONS = `
5760
- prosa is a read-only memory over local agent session histories. Use it to find prior work,
5761
- commands, decisions, file touches, and full transcripts before answering from memory.
6189
+ prosa is a local memory over local agent session histories. Use it to import recent sessions,
6190
+ find prior work, commands, decisions, file touches, and full transcripts before answering from
6191
+ memory.
5762
6192
 
5763
6193
  Recommended workflow:
6194
+ - Use compile to refresh the bundle when recent local sessions may not be indexed yet. With no
6195
+ input it imports all supported providers from default paths.
5764
6196
  - For open-ended questions, start with search_sessions using 2-5 concrete terms.
5765
6197
  - For questions about a file or path, start with find_touched_files, then inspect the returned sessions.
5766
6198
  - After search results, call get_session for the most relevant session_ids before drawing conclusions.
@@ -5810,7 +6242,76 @@ init_search();
5810
6242
  init_sessions();
5811
6243
  function registerProsaTools(server, bundle, options = {}) {
5812
6244
  const searchEngine = options.searchEngine ?? "fts5";
6245
+ const storePath = options.storePath ?? bundle.path;
5813
6246
  registerProsaPrompts(server);
6247
+ server.registerTool(
6248
+ "compile",
6249
+ {
6250
+ title: "Compile sessions",
6251
+ description: "Import local agent session histories into the active prosa bundle. With no input, compiles all providers from default paths. With source, compiles that provider; sessions_path may override that provider path.",
6252
+ inputSchema: {
6253
+ source: z.enum(["cursor", "codex", "claude", "gemini"]).optional(),
6254
+ sessions_path: z.string().min(1).optional()
6255
+ },
6256
+ annotations: { readOnlyHint: false, destructiveHint: false, idempotentHint: true }
6257
+ },
6258
+ async ({ source, sessions_path }) => {
6259
+ if (sessions_path && !source) {
6260
+ return {
6261
+ content: [
6262
+ {
6263
+ type: "text",
6264
+ text: "sessions_path requires source because providers use incompatible source layouts"
6265
+ }
6266
+ ],
6267
+ isError: true
6268
+ };
6269
+ }
6270
+ try {
6271
+ const result = await runCompileImports({
6272
+ bundle,
6273
+ providers: source ? [getCompileProvider(source)] : COMPILE_PROVIDERS,
6274
+ deferIndex: false,
6275
+ sessionsPath: sessions_path
6276
+ });
6277
+ const parquet = result.importedAny ? await exportCompileParquet({ storePath }) : null;
6278
+ return {
6279
+ content: [
6280
+ {
6281
+ type: "text",
6282
+ text: JSON.stringify(
6283
+ {
6284
+ providers: result.providers.map((provider) => ({
6285
+ source: provider.source,
6286
+ source_path: provider.sourcePath,
6287
+ batch_id: provider.batchId,
6288
+ counts: provider.counts
6289
+ })),
6290
+ imported_any: result.importedAny,
6291
+ tantivy: result.tantivy ? { indexed_doc_count: result.tantivy.indexedDocCount } : null,
6292
+ tantivy_error: result.tantivyError,
6293
+ parquet: parquet ? {
6294
+ out_dir: parquet.outDir,
6295
+ manifest_path: parquet.manifestPath,
6296
+ table_count: parquet.tableCount,
6297
+ files: parquet.files,
6298
+ counts: parquet.counts
6299
+ } : null
6300
+ },
6301
+ null,
6302
+ 2
6303
+ )
6304
+ }
6305
+ ]
6306
+ };
6307
+ } catch (error) {
6308
+ return {
6309
+ content: [{ type: "text", text: error instanceof Error ? error.message : String(error) }],
6310
+ isError: true
6311
+ };
6312
+ }
6313
+ }
6314
+ );
5814
6315
  server.registerTool(
5815
6316
  "list_sessions",
5816
6317
  {
@@ -6123,7 +6624,7 @@ function registerProsaPrompts(server) {
6123
6624
 
6124
6625
  // src/mcp/server.ts
6125
6626
  async function listenMcpStdioServer(bundle, options = {}) {
6126
- const server = createMcpServer(bundle, options.searchEngine ?? "fts5");
6627
+ const server = createMcpServer(bundle, options.searchEngine ?? "fts5", options.storePath);
6127
6628
  const transport = new StdioServerTransport();
6128
6629
  await server.connect(transport);
6129
6630
  return {
@@ -6137,10 +6638,13 @@ async function listenMcpServer(bundle, options) {
6137
6638
  const mcpPath = options.path ?? "/mcp";
6138
6639
  const sessions = /* @__PURE__ */ new Map();
6139
6640
  const searchEngine = options.searchEngine ?? "fts5";
6641
+ const storePath = options.storePath ?? bundle.path;
6140
6642
  const httpServer = http.createServer((req, res) => {
6141
- handleRequest(req, res, mcpPath, sessions, bundle, searchEngine).catch((error) => {
6142
- writeError(res, error);
6143
- });
6643
+ handleRequest(req, res, mcpPath, sessions, bundle, searchEngine, storePath).catch(
6644
+ (error) => {
6645
+ writeError(res, error);
6646
+ }
6647
+ );
6144
6648
  });
6145
6649
  await new Promise((resolve, reject) => {
6146
6650
  httpServer.once("error", reject);
@@ -6163,7 +6667,7 @@ async function listenMcpServer(bundle, options) {
6163
6667
  }
6164
6668
  };
6165
6669
  }
6166
- async function handleRequest(req, res, mcpPath, sessions, bundle, searchEngine) {
6670
+ async function handleRequest(req, res, mcpPath, sessions, bundle, searchEngine, storePath) {
6167
6671
  if (!req.url || !req.url.startsWith(mcpPath)) {
6168
6672
  res.writeHead(404).end();
6169
6673
  return;
@@ -6185,14 +6689,14 @@ async function handleRequest(req, res, mcpPath, sessions, bundle, searchEngine)
6185
6689
  res.writeHead(404).end();
6186
6690
  return;
6187
6691
  }
6188
- entry = await openSession(bundle, sessions, searchEngine);
6692
+ entry = await openSession(bundle, sessions, searchEngine, storePath);
6189
6693
  }
6190
6694
  const bodyText = await readBody(req);
6191
6695
  const body = bodyText.length > 0 ? safeJsonParse(bodyText) : void 0;
6192
6696
  await entry.transport.handleRequest(req, res, body);
6193
6697
  }
6194
- async function openSession(bundle, store, searchEngine) {
6195
- const server = createMcpServer(bundle, searchEngine);
6698
+ async function openSession(bundle, store, searchEngine, storePath) {
6699
+ const server = createMcpServer(bundle, searchEngine, storePath);
6196
6700
  const transport = new StreamableHTTPServerTransport({
6197
6701
  sessionIdGenerator: () => randomUUID(),
6198
6702
  onsessioninitialized: (id) => {
@@ -6210,7 +6714,7 @@ async function openSession(bundle, store, searchEngine) {
6210
6714
  await server.connect(transport);
6211
6715
  return { server, transport };
6212
6716
  }
6213
- function createMcpServer(bundle, searchEngine) {
6717
+ function createMcpServer(bundle, searchEngine, storePath) {
6214
6718
  const server = new McpServer(
6215
6719
  {
6216
6720
  name: "prosa",
@@ -6218,7 +6722,7 @@ function createMcpServer(bundle, searchEngine) {
6218
6722
  },
6219
6723
  { instructions: PROSA_MCP_INSTRUCTIONS }
6220
6724
  );
6221
- registerProsaTools(server, bundle, { searchEngine });
6725
+ registerProsaTools(server, bundle, { searchEngine, storePath });
6222
6726
  return server;
6223
6727
  }
6224
6728
  async function readBody(req) {
@@ -6259,7 +6763,8 @@ function writeError(res, error) {
6259
6763
  function mcpCommand() {
6260
6764
  const serve = new Command5("serve").description("Start a local MCP server over the prosa bundle.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--transport <transport>", "MCP transport: stdio|http", "stdio").option("--host <host>", "bind host", "127.0.0.1").option("--port <port>", "bind port", "7331").option("--path <path>", "HTTP path", "/mcp").option("--search-engine <engine>", "search engine: fts5|tantivy", "fts5").action(
6261
6765
  async (options) => {
6262
- const bundle = await openBundle(path18.resolve(options.store));
6766
+ const storePath = path18.resolve(options.store);
6767
+ const bundle = await openBundle(storePath);
6263
6768
  try {
6264
6769
  const transport = parseMcpTransport(options.transport);
6265
6770
  const searchEngine = parseSearchEngine(options.searchEngine);
@@ -6272,7 +6777,8 @@ function mcpCommand() {
6272
6777
  host: options.host,
6273
6778
  port,
6274
6779
  path: options.path,
6275
- searchEngine
6780
+ searchEngine,
6781
+ storePath
6276
6782
  });
6277
6783
  process.stdout.write(`prosa mcp server listening at ${server2.url}
6278
6784
  `);
@@ -6280,7 +6786,7 @@ function mcpCommand() {
6280
6786
  registerShutdown(server2.close, bundle);
6281
6787
  return;
6282
6788
  }
6283
- const server = await listenMcpStdioServer(bundle, { searchEngine });
6789
+ const server = await listenMcpStdioServer(bundle, { searchEngine, storePath });
6284
6790
  registerShutdown(server.close, bundle);
6285
6791
  } catch (error) {
6286
6792
  closeBundle(bundle);
@@ -6454,6 +6960,7 @@ async function runCli(argv) {
6454
6960
  ).version(PROSA_PARSER_VERSION, "-v, --version");
6455
6961
  program.addCommand(initCommand());
6456
6962
  program.addCommand(compileCommand());
6963
+ program.addCommand(compileAllCommand());
6457
6964
  program.addCommand(indexCommand());
6458
6965
  program.addCommand(sessionsCommand());
6459
6966
  program.addCommand(searchCommand());