@c3-oss/prosa 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -579,6 +579,12 @@ function objectStoragePath(hashHex, compression) {
579
579
  }
580
580
 
581
581
  // src/core/cas/index.ts
582
+ var ensuredDirs = /* @__PURE__ */ new Set();
583
+ async function ensureDir(absoluteDir) {
584
+ if (ensuredDirs.has(absoluteDir)) return;
585
+ await mkdir2(absoluteDir, { recursive: true });
586
+ ensuredDirs.add(absoluteDir);
587
+ }
582
588
  async function putBytes(bundle, bytes, options = {}) {
583
589
  const hash = blake3Hex(bytes);
584
590
  const objectId = objectIdFromHash(hash);
@@ -592,7 +598,7 @@ async function putBytes(bundle, bytes, options = {}) {
592
598
  const { bytes: stored, compression } = compressBytes(bytes);
593
599
  const storagePath = objectStoragePath(hash, compression);
594
600
  const absolutePath = path2.join(bundle.path, storagePath);
595
- await mkdir2(path2.dirname(absolutePath), { recursive: true });
601
+ await ensureDir(path2.dirname(absolutePath));
596
602
  await writeFile2(absolutePath, stored);
597
603
  prepare(
598
604
  bundle.db,
@@ -656,6 +662,112 @@ function getObjectMeta(bundle, objectId) {
656
662
  FROM objects WHERE object_id = ?`
657
663
  ).get(objectId) ?? null;
658
664
  }
665
+ function createPendingObjects() {
666
+ return { byId: /* @__PURE__ */ new Map() };
667
+ }
668
+ function stageBytes(pending, bytes, options = {}) {
669
+ const buf = Buffer.isBuffer(bytes) ? bytes : Buffer.from(bytes);
670
+ const hash = blake3Hex(buf);
671
+ const objectId = objectIdFromHash(hash);
672
+ if (!pending.byId.has(objectId)) {
673
+ pending.byId.set(objectId, {
674
+ objectId,
675
+ hash,
676
+ bytes: buf,
677
+ mimeType: options.mimeType ?? null,
678
+ encoding: options.encoding ?? null
679
+ });
680
+ }
681
+ return objectId;
682
+ }
683
+ function stageText(pending, text, options = {}) {
684
+ return stageBytes(pending, Buffer.from(text, "utf8"), {
685
+ mimeType: options.mimeType ?? "text/plain; charset=utf-8",
686
+ encoding: "utf-8"
687
+ });
688
+ }
689
+ function stageJson(pending, value) {
690
+ return stageBytes(pending, Buffer.from(JSON.stringify(value), "utf8"), {
691
+ mimeType: "application/json",
692
+ encoding: "utf-8"
693
+ });
694
+ }
695
+ async function flushPendingObjects(bundle, pending) {
696
+ if (pending.byId.size === 0) return;
697
+ const ids = [...pending.byId.keys()];
698
+ const existingIds = queryExistingObjectIds(bundle, ids);
699
+ const toWrite = [];
700
+ for (const obj of pending.byId.values()) {
701
+ if (existingIds.has(obj.objectId)) continue;
702
+ const { bytes: compressedBytes, compression } = compressBytes(obj.bytes);
703
+ const storagePath = objectStoragePath(obj.hash, compression);
704
+ toWrite.push({
705
+ staged: obj,
706
+ compression,
707
+ compressedBytes,
708
+ storagePath,
709
+ absolutePath: path2.join(bundle.path, storagePath)
710
+ });
711
+ }
712
+ if (toWrite.length > 0) {
713
+ await writeFilesParallel(toWrite);
714
+ }
715
+ const insertObject = prepare(
716
+ bundle.db,
717
+ `INSERT OR IGNORE INTO objects (
718
+ object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
719
+ compression, mime_type, encoding, storage_path, created_at
720
+ ) VALUES (?, 'blake3', ?, ?, ?, ?, ?, ?, ?, ?)`
721
+ );
722
+ const now = (/* @__PURE__ */ new Date()).toISOString();
723
+ for (const p of toWrite) {
724
+ insertObject.run(
725
+ p.staged.objectId,
726
+ p.staged.hash,
727
+ p.staged.bytes.byteLength,
728
+ p.compression === "zstd" ? p.compressedBytes.byteLength : null,
729
+ p.compression,
730
+ p.staged.mimeType,
731
+ p.staged.encoding,
732
+ p.storagePath,
733
+ now
734
+ );
735
+ }
736
+ }
737
+ function queryExistingObjectIds(bundle, ids) {
738
+ const found = /* @__PURE__ */ new Set();
739
+ if (ids.length === 0) return found;
740
+ const CHUNK = 500;
741
+ for (let start = 0; start < ids.length; start += CHUNK) {
742
+ const slice = ids.slice(start, start + CHUNK);
743
+ const placeholders = slice.map(() => "?").join(",");
744
+ const rows = bundle.db.prepare(
745
+ `SELECT object_id FROM objects WHERE object_id IN (${placeholders})`
746
+ ).all(...slice);
747
+ for (const row of rows) found.add(row.object_id);
748
+ }
749
+ return found;
750
+ }
751
+ var FS_WRITE_CONCURRENCY = 16;
752
+ async function writeFilesParallel(tasks) {
753
+ let cursor = 0;
754
+ const workers = [];
755
+ const limit = Math.min(FS_WRITE_CONCURRENCY, tasks.length);
756
+ for (let w = 0; w < limit; w++) {
757
+ workers.push(
758
+ (async () => {
759
+ while (true) {
760
+ const i = cursor++;
761
+ if (i >= tasks.length) return;
762
+ const task = tasks[i];
763
+ await ensureDir(path2.dirname(task.absolutePath));
764
+ await writeFile2(task.absolutePath, task.compressedBytes);
765
+ }
766
+ })()
767
+ );
768
+ }
769
+ await Promise.all(workers);
770
+ }
659
771
 
660
772
  // src/core/domain/ids.ts
661
773
  var ID_PREFIX_BYTES = 16;
@@ -766,7 +878,7 @@ async function recordError(bundle, batchId, args) {
766
878
  }
767
879
 
768
880
  // src/core/ingest/idempotency.ts
769
- import { access as access2, mkdir as mkdir3, readFile as readFile3, stat as stat2, writeFile as writeFile3 } from "fs/promises";
881
+ import { access as access2, readFile as readFile3, stat as stat2, writeFile as writeFile3 } from "fs/promises";
770
882
  import path3 from "path";
771
883
  async function registerSourceFile(bundle, args) {
772
884
  const st = await stat2(args.absolutePath);
@@ -852,7 +964,7 @@ async function preserveRawSourceBytes(bundle, bytes) {
852
964
  const { bytes: stored, compression } = compressBytes(bytes);
853
965
  const storagePath = rawSourceStoragePath(hash, compression);
854
966
  const absolutePath = path3.join(bundle.path, storagePath);
855
- await mkdir3(path3.dirname(absolutePath), { recursive: true });
967
+ await ensureDir(path3.dirname(absolutePath));
856
968
  if (!await fileExists(absolutePath)) {
857
969
  await writeFile3(absolutePath, stored);
858
970
  }
@@ -994,7 +1106,7 @@ import { existsSync } from "fs";
994
1106
  import { createRequire } from "module";
995
1107
 
996
1108
  // src/services/indexing.ts
997
- import { mkdir as mkdir4, rm, writeFile as writeFile4 } from "fs/promises";
1109
+ import { mkdir as mkdir3, rm, writeFile as writeFile4 } from "fs/promises";
998
1110
  import path4 from "path";
999
1111
  var FTS5_TRIGGER_SQL = `
1000
1112
  CREATE TRIGGER IF NOT EXISTS search_docs_ai AFTER INSERT ON search_docs BEGIN
@@ -1109,7 +1221,7 @@ async function rebuildTantivyIndex(bundle) {
1109
1221
  const tantivy = await import("@oxdev03/node-tantivy-binding");
1110
1222
  const schema = new tantivy.SchemaBuilder().addTextField("doc_id", { stored: true, tokenizerName: "raw" }).addTextField("entity_type", { stored: true, tokenizerName: "raw" }).addTextField("entity_id", { stored: true, tokenizerName: "raw" }).addTextField("session_id", { stored: true, tokenizerName: "raw" }).addTextField("project_id", { stored: true, tokenizerName: "raw" }).addTextField("timestamp", { stored: true, tokenizerName: "raw" }).addTextField("role", { stored: true, tokenizerName: "raw" }).addTextField("tool_name", { stored: true, tokenizerName: "raw" }).addTextField("canonical_tool_type", { stored: true, tokenizerName: "raw" }).addTextField("field_kind", { stored: true, tokenizerName: "raw" }).addTextField("text", { stored: true }).build();
1111
1223
  await rm(bundle.paths.tantivy, { recursive: true, force: true });
1112
- await mkdir4(bundle.paths.tantivy, { recursive: true });
1224
+ await mkdir3(bundle.paths.tantivy, { recursive: true });
1113
1225
  const index = new tantivy.Index(schema, bundle.paths.tantivy, false);
1114
1226
  const writer = index.writer(5e7, 1);
1115
1227
  let indexedDocCount = 0;
@@ -1421,7 +1533,7 @@ function renderToolCall(c) {
1421
1533
  }
1422
1534
 
1423
1535
  // src/services/export/parquet.ts
1424
- import { mkdir as mkdir5, rm as rm2, writeFile as writeFile5 } from "fs/promises";
1536
+ import { mkdir as mkdir4, rm as rm2, writeFile as writeFile5 } from "fs/promises";
1425
1537
  import path5 from "path";
1426
1538
  import { DuckDBConnection } from "@duckdb/node-api";
1427
1539
  var PARQUET_TABLES = [
@@ -1446,7 +1558,7 @@ var PARQUET_TABLES = [
1446
1558
  async function exportBundleParquet(options) {
1447
1559
  const snapshot = await openBundleSnapshot(options.bundlePath);
1448
1560
  const outDir = path5.resolve(options.outDir ?? snapshot.defaultOutDir);
1449
- await mkdir5(outDir, { recursive: true });
1561
+ await mkdir4(outDir, { recursive: true });
1450
1562
  const files = Object.fromEntries(
1451
1563
  PARQUET_TABLES.map((table) => [table, path5.join(outDir, `${table}.parquet`)])
1452
1564
  );
@@ -1585,17 +1697,27 @@ async function* walk(dir) {
1585
1697
 
1586
1698
  // src/importers/codex/index.ts
1587
1699
  var PREVIEW_MAX = 4e3;
1588
- async function compileCodex(bundle, root) {
1700
+ async function compileCodex(bundle, root, options = {}) {
1701
+ const logger = options.logger;
1589
1702
  const batch = startBatch(bundle, "codex", [root]);
1590
1703
  const counts = emptyCounts();
1704
+ logger?.info({ batch_id: batch.batch_id, root }, "codex batch started");
1591
1705
  try {
1592
1706
  for await (const filePath of discoverCodexSessions(root)) {
1593
1707
  counts.source_files_seen++;
1708
+ logger?.debug({ path: filePath }, "codex source file discovered");
1594
1709
  try {
1595
- const fileCounts = await compileCodexFile(bundle, batch, filePath);
1710
+ const fileCounts = await compileCodexFile(bundle, batch, filePath, logger);
1596
1711
  addCounts(counts, fileCounts);
1597
1712
  } catch (error) {
1598
1713
  counts.errors++;
1714
+ logger?.warn(
1715
+ {
1716
+ err: error,
1717
+ path: filePath
1718
+ },
1719
+ "codex source file failed"
1720
+ );
1599
1721
  await recordError(bundle, batch.batch_id, {
1600
1722
  kind: "codex_file_failed",
1601
1723
  message: error instanceof Error ? error.message : String(error),
@@ -1604,9 +1726,12 @@ async function compileCodex(bundle, root) {
1604
1726
  }
1605
1727
  }
1606
1728
  linkSubagentParents(bundle);
1729
+ logger?.debug({ batch_id: batch.batch_id }, "codex subagent parent links refreshed");
1607
1730
  finishBatch(bundle, batch, counts, "completed");
1731
+ logger?.info({ batch_id: batch.batch_id, counts }, "codex batch completed");
1608
1732
  } catch (error) {
1609
1733
  finishBatch(bundle, batch, counts, "failed");
1734
+ logger?.error({ err: error, batch_id: batch.batch_id, counts }, "codex batch failed");
1610
1735
  throw error;
1611
1736
  }
1612
1737
  return { batch, counts };
@@ -1659,7 +1784,7 @@ function addCounts(target, source) {
1659
1784
  target.edges += source.edges;
1660
1785
  target.errors += source.errors;
1661
1786
  }
1662
- async function compileCodexFile(bundle, batch, filePath) {
1787
+ async function compileCodexFile(bundle, batch, filePath, logger) {
1663
1788
  const counts = emptyFileCounts();
1664
1789
  const { row: sourceFileRow, alreadyKnown } = await registerSourceFile(bundle, {
1665
1790
  sourceTool: "codex",
@@ -1668,9 +1793,17 @@ async function compileCodexFile(bundle, batch, filePath) {
1668
1793
  });
1669
1794
  if (alreadyKnown) {
1670
1795
  counts.source_files_skipped = 1;
1796
+ logger?.debug(
1797
+ { path: filePath, source_file_id: sourceFileRow.source_file_id },
1798
+ "codex source file skipped"
1799
+ );
1671
1800
  return counts;
1672
1801
  }
1673
1802
  counts.source_files_imported = 1;
1803
+ logger?.debug(
1804
+ { path: filePath, source_file_id: sourceFileRow.source_file_id },
1805
+ "codex source file registered"
1806
+ );
1674
1807
  const text = await readFile4(filePath, "utf8");
1675
1808
  const rawLines = text.split("\n");
1676
1809
  const lines = rawLines[rawLines.length - 1] === "" ? rawLines.slice(0, -1) : rawLines;
@@ -1687,7 +1820,8 @@ async function compileCodexFile(bundle, batch, filePath) {
1687
1820
  toolResults: [],
1688
1821
  artifacts: [],
1689
1822
  edges: [],
1690
- searchDocs: []
1823
+ searchDocs: [],
1824
+ objects: createPendingObjects()
1691
1825
  };
1692
1826
  let sessionStartTs = null;
1693
1827
  let sessionEndTs = null;
@@ -1701,7 +1835,7 @@ async function compileCodexFile(bundle, batch, filePath) {
1701
1835
  const lineNo = i + 1;
1702
1836
  const ordinal = i;
1703
1837
  const lineBytes = Buffer.from(line, "utf8");
1704
- const rawObjectId = await putBytes(bundle, lineBytes, {
1838
+ const rawObjectId = stageBytes(pending.objects, lineBytes, {
1705
1839
  mimeType: "application/jsonl-line",
1706
1840
  encoding: "utf-8"
1707
1841
  });
@@ -1712,7 +1846,7 @@ async function compileCodexFile(bundle, batch, filePath) {
1712
1846
  } catch {
1713
1847
  parserStatus = "failed";
1714
1848
  }
1715
- const decodedObjectId = parsed != null && parserStatus === "ok" ? await putJson(bundle, parsed) : null;
1849
+ const decodedObjectId = null;
1716
1850
  const nativeId = parsed ? extractNativeId(parsed) : null;
1717
1851
  const rawRecordId2 = rawRecordId(sourceFileRow.source_file_id, ordinal, rawObjectId);
1718
1852
  pending.rawRecords.push({
@@ -1881,6 +2015,7 @@ async function compileCodexFile(bundle, batch, filePath) {
1881
2015
  pending.session.start_ts ??= sessionStartTs;
1882
2016
  }
1883
2017
  buildSearchDocs(pending);
2018
+ await flushPendingObjects(bundle, pending.objects);
1884
2019
  transactional(bundle.db, () => {
1885
2020
  flushPending(bundle, pending, {
1886
2021
  sessionEndTs,
@@ -1899,6 +2034,10 @@ async function compileCodexFile(bundle, batch, filePath) {
1899
2034
  counts.tool_results = pending.toolResults.length;
1900
2035
  counts.artifacts = pending.artifacts.length;
1901
2036
  counts.edges = pending.edges.length;
2037
+ logger?.debug(
2038
+ { path: filePath, source_file_id: sourceFileRow.source_file_id, counts },
2039
+ "codex source file imported"
2040
+ );
1902
2041
  return counts;
1903
2042
  }
1904
2043
  function handleResponseItem(_bundle, sessionId2, currentTurnId, rawRecordId2, ordinal, ts, ri, payloadObjectId, nextMsgOrdinal, currentModel, pending) {
@@ -2058,8 +2197,8 @@ async function handleEventMsg(bundle, sessionId2, currentTurnId, rawRecordId2, o
2058
2197
  const subtype = em.type ?? "unknown";
2059
2198
  if (subtype === "exec_command_end") {
2060
2199
  const sourceCallId = em.call_id ?? null;
2061
- const stdoutId = em.stdout ? await putText(bundle, em.stdout, { mimeType: "text/plain" }) : null;
2062
- const stderrId = em.stderr ? await putText(bundle, em.stderr, { mimeType: "text/plain" }) : null;
2200
+ const stdoutId = em.stdout ? stageText(pending.objects, em.stdout, { mimeType: "text/plain" }) : null;
2201
+ const stderrId = em.stderr ? stageText(pending.objects, em.stderr, { mimeType: "text/plain" }) : null;
2063
2202
  const preview = (em.formatted_output ?? em.aggregated_output ?? em.stdout ?? "").slice(
2064
2203
  0,
2065
2204
  PREVIEW_MAX
@@ -2729,17 +2868,34 @@ async function readdirSafe(dir) {
2729
2868
 
2730
2869
  // src/importers/claude/index.ts
2731
2870
  var PREVIEW_MAX2 = 4e3;
2732
- async function compileClaude(bundle, root) {
2871
+ async function compileClaude(bundle, root, options = {}) {
2872
+ const logger = options.logger;
2733
2873
  const batch = startBatch(bundle, "claude", [root]);
2734
2874
  const counts = emptyCounts();
2875
+ logger?.info({ batch_id: batch.batch_id, root }, "claude batch started");
2735
2876
  try {
2736
2877
  for await (const file of discoverClaudeFiles(root)) {
2737
2878
  counts.source_files_seen++;
2879
+ logger?.debug(
2880
+ {
2881
+ path: file.filePath,
2882
+ project_slug: file.projectSlug,
2883
+ is_subagent: file.isSubagent
2884
+ },
2885
+ "claude source file discovered"
2886
+ );
2738
2887
  try {
2739
- const fc = await compileClaudeFile(bundle, batch, file);
2888
+ const fc = await compileClaudeFile(bundle, batch, file, logger);
2740
2889
  addCounts2(counts, fc);
2741
2890
  } catch (error) {
2742
2891
  counts.errors++;
2892
+ logger?.warn(
2893
+ {
2894
+ err: error,
2895
+ path: file.filePath
2896
+ },
2897
+ "claude source file failed"
2898
+ );
2743
2899
  await recordError(bundle, batch.batch_id, {
2744
2900
  kind: "claude_file_failed",
2745
2901
  message: error instanceof Error ? error.message : String(error),
@@ -2748,9 +2904,12 @@ async function compileClaude(bundle, root) {
2748
2904
  }
2749
2905
  }
2750
2906
  linkSubagentParents2(bundle);
2907
+ logger?.debug({ batch_id: batch.batch_id }, "claude subagent parent links refreshed");
2751
2908
  finishBatch(bundle, batch, counts, "completed");
2909
+ logger?.info({ batch_id: batch.batch_id, counts }, "claude batch completed");
2752
2910
  } catch (error) {
2753
2911
  finishBatch(bundle, batch, counts, "failed");
2912
+ logger?.error({ err: error, batch_id: batch.batch_id, counts }, "claude batch failed");
2754
2913
  throw error;
2755
2914
  }
2756
2915
  return { batch, counts };
@@ -2804,7 +2963,7 @@ function addCounts2(target, source) {
2804
2963
  target.edges += source.edges;
2805
2964
  target.errors += source.errors;
2806
2965
  }
2807
- async function compileClaudeFile(bundle, batch, file) {
2966
+ async function compileClaudeFile(bundle, batch, file, logger) {
2808
2967
  const counts = emptyFileCounts2();
2809
2968
  const { row: sourceFile, alreadyKnown } = await registerSourceFile(bundle, {
2810
2969
  sourceTool: "claude",
@@ -2814,9 +2973,17 @@ async function compileClaudeFile(bundle, batch, file) {
2814
2973
  });
2815
2974
  if (alreadyKnown) {
2816
2975
  counts.source_files_skipped = 1;
2976
+ logger?.debug(
2977
+ { path: file.filePath, source_file_id: sourceFile.source_file_id },
2978
+ "claude source file skipped"
2979
+ );
2817
2980
  return counts;
2818
2981
  }
2819
2982
  counts.source_files_imported = 1;
2983
+ logger?.debug(
2984
+ { path: file.filePath, source_file_id: sourceFile.source_file_id },
2985
+ "claude source file registered"
2986
+ );
2820
2987
  const text = await readFile5(file.filePath, "utf8");
2821
2988
  const rawLines = text.split("\n");
2822
2989
  const lines = rawLines[rawLines.length - 1] === "" ? rawLines.slice(0, -1) : rawLines;
@@ -2833,7 +3000,8 @@ async function compileClaudeFile(bundle, batch, file) {
2833
3000
  artifacts: [],
2834
3001
  edges: [],
2835
3002
  searchDocs: [],
2836
- uuidToMessageId: /* @__PURE__ */ new Map()
3003
+ uuidToMessageId: /* @__PURE__ */ new Map(),
3004
+ objects: createPendingObjects()
2837
3005
  };
2838
3006
  let modelFirst = null;
2839
3007
  let modelLast = null;
@@ -2848,7 +3016,7 @@ async function compileClaudeFile(bundle, batch, file) {
2848
3016
  const lineNo = i + 1;
2849
3017
  const ordinal = i;
2850
3018
  const lineBytes = Buffer.from(line, "utf8");
2851
- const rawObjectId = await putBytes(bundle, lineBytes, {
3019
+ const rawObjectId = stageBytes(pending.objects, lineBytes, {
2852
3020
  mimeType: "application/jsonl-line",
2853
3021
  encoding: "utf-8"
2854
3022
  });
@@ -2859,7 +3027,7 @@ async function compileClaudeFile(bundle, batch, file) {
2859
3027
  } catch {
2860
3028
  parserStatus = "failed";
2861
3029
  }
2862
- const decodedObjectId = parsed != null && parserStatus === "ok" ? await putJson(bundle, parsed) : null;
3030
+ const decodedObjectId = null;
2863
3031
  const nativeId = parsed?.uuid ?? null;
2864
3032
  const rawRecordId2 = rawRecordId(sourceFile.source_file_id, ordinal, rawObjectId);
2865
3033
  pending.rawRecords.push({
@@ -2957,7 +3125,7 @@ async function compileClaudeFile(bundle, batch, file) {
2957
3125
  raw_record_id: rawRecordId2
2958
3126
  });
2959
3127
  if (content.length > PREVIEW_MAX2) {
2960
- const fullId = await putText(bundle, content);
3128
+ const fullId = stageText(pending.objects, content);
2961
3129
  const last = pending.blocks[pending.blocks.length - 1];
2962
3130
  if (last) last.text_object_id = fullId;
2963
3131
  }
@@ -3097,6 +3265,7 @@ async function compileClaudeFile(bundle, batch, file) {
3097
3265
  pending.session.git_branch_initial ??= branchInitial;
3098
3266
  }
3099
3267
  buildSearchDocs2(pending);
3268
+ await flushPendingObjects(bundle, pending.objects);
3100
3269
  transactional(bundle.db, () => {
3101
3270
  flushPending2(bundle, pending, { modelFirst, modelLast });
3102
3271
  });
@@ -3109,6 +3278,10 @@ async function compileClaudeFile(bundle, batch, file) {
3109
3278
  counts.tool_results = pending.toolResults.length;
3110
3279
  counts.artifacts = pending.artifacts.length;
3111
3280
  counts.edges = pending.edges.length;
3281
+ logger?.debug(
3282
+ { path: file.filePath, source_file_id: sourceFile.source_file_id, counts },
3283
+ "claude source file imported"
3284
+ );
3112
3285
  return counts;
3113
3286
  }
3114
3287
  function createSessionFromFirstRecord(file, parsed, meta, ts, rawRecordId2) {
@@ -3156,7 +3329,7 @@ async function processContentBlock(bundle, sessionId2, messageId2, eventId2, blo
3156
3329
  event_id: null,
3157
3330
  ordinal: blockOrdinal,
3158
3331
  block_type: "text",
3159
- text_object_id: text.length > PREVIEW_MAX2 ? await putText(bundle, text) : null,
3332
+ text_object_id: text.length > PREVIEW_MAX2 ? stageText(pending.objects, text) : null,
3160
3333
  text_inline: text.slice(0, PREVIEW_MAX2),
3161
3334
  is_error: 0,
3162
3335
  visibility: "default",
@@ -3172,7 +3345,7 @@ async function processContentBlock(bundle, sessionId2, messageId2, eventId2, blo
3172
3345
  event_id: null,
3173
3346
  ordinal: blockOrdinal,
3174
3347
  block_type: "thinking",
3175
- text_object_id: text.length > PREVIEW_MAX2 ? await putText(bundle, text) : null,
3348
+ text_object_id: text.length > PREVIEW_MAX2 ? stageText(pending.objects, text) : null,
3176
3349
  text_inline: text.slice(0, PREVIEW_MAX2),
3177
3350
  is_error: 0,
3178
3351
  visibility: "hidden_by_default",
@@ -3184,7 +3357,7 @@ async function processContentBlock(bundle, sessionId2, messageId2, eventId2, blo
3184
3357
  const tu = block;
3185
3358
  const sourceCallId = tu.id ?? `${blockOrdinal}`;
3186
3359
  const toolName = tu.name ?? "unknown";
3187
- const argsId = tu.input != null ? await putJson(bundle, tu.input) : null;
3360
+ const argsId = tu.input != null ? stageJson(pending.objects, tu.input) : null;
3188
3361
  const command = inferCommandFromArgs2(toolName, tu.input);
3189
3362
  const filePath = inferPathFromArgs2(tu.input);
3190
3363
  const tcId = toolCallId(sessionId2, sourceCallId);
@@ -3225,13 +3398,14 @@ async function processContentBlock(bundle, sessionId2, messageId2, eventId2, blo
3225
3398
  const sourceCallId = tr.tool_use_id ?? null;
3226
3399
  const isError = tr.is_error === true ? 1 : 0;
3227
3400
  const text = stringifyOrNull2(tr.content) ?? "";
3401
+ const overflowId = text.length > PREVIEW_MAX2 ? stageText(pending.objects, text) : null;
3228
3402
  pending.blocks.push({
3229
3403
  block_id: blkId,
3230
3404
  message_id: messageId2,
3231
3405
  event_id: null,
3232
3406
  ordinal: blockOrdinal,
3233
3407
  block_type: "tool_result",
3234
- text_object_id: text.length > PREVIEW_MAX2 ? await putText(bundle, text) : null,
3408
+ text_object_id: overflowId,
3235
3409
  text_inline: text.slice(0, PREVIEW_MAX2),
3236
3410
  is_error: isError,
3237
3411
  visibility: "default",
@@ -3250,7 +3424,7 @@ async function processContentBlock(bundle, sessionId2, messageId2, eventId2, blo
3250
3424
  duration_ms: null,
3251
3425
  stdout_object_id: null,
3252
3426
  stderr_object_id: null,
3253
- output_object_id: text.length > PREVIEW_MAX2 ? await putText(bundle, text) : null,
3427
+ output_object_id: overflowId,
3254
3428
  preview: text.slice(0, PREVIEW_MAX2),
3255
3429
  raw_record_id: rawRecordId2
3256
3430
  });
@@ -3678,17 +3852,34 @@ async function readdirSafe2(dir) {
3678
3852
 
3679
3853
  // src/importers/gemini/index.ts
3680
3854
  var PREVIEW_MAX3 = 4e3;
3681
- async function compileGemini(bundle, root) {
3855
+ async function compileGemini(bundle, root, options = {}) {
3856
+ const logger = options.logger;
3682
3857
  const batch = startBatch(bundle, "gemini", [root]);
3683
3858
  const counts = emptyCounts();
3859
+ logger?.info({ batch_id: batch.batch_id, root }, "gemini batch started");
3684
3860
  try {
3685
3861
  for await (const file of discoverGeminiChats(root)) {
3686
3862
  counts.source_files_seen++;
3863
+ logger?.debug(
3864
+ {
3865
+ path: file.filePath,
3866
+ project_dir: file.projectDir,
3867
+ project_root: file.projectRoot
3868
+ },
3869
+ "gemini source file discovered"
3870
+ );
3687
3871
  try {
3688
- const fc = await compileGeminiFile(bundle, batch, file);
3872
+ const fc = await compileGeminiFile(bundle, batch, file, logger);
3689
3873
  addCounts3(counts, fc);
3690
3874
  } catch (error) {
3691
3875
  counts.errors++;
3876
+ logger?.warn(
3877
+ {
3878
+ err: error,
3879
+ path: file.filePath
3880
+ },
3881
+ "gemini source file failed"
3882
+ );
3692
3883
  await recordError(bundle, batch.batch_id, {
3693
3884
  kind: "gemini_file_failed",
3694
3885
  message: error instanceof Error ? error.message : String(error),
@@ -3697,8 +3888,10 @@ async function compileGemini(bundle, root) {
3697
3888
  }
3698
3889
  }
3699
3890
  finishBatch(bundle, batch, counts, "completed");
3891
+ logger?.info({ batch_id: batch.batch_id, counts }, "gemini batch completed");
3700
3892
  } catch (error) {
3701
3893
  finishBatch(bundle, batch, counts, "failed");
3894
+ logger?.error({ err: error, batch_id: batch.batch_id, counts }, "gemini batch failed");
3702
3895
  throw error;
3703
3896
  }
3704
3897
  return { batch, counts };
@@ -3733,7 +3926,7 @@ function addCounts3(target, source) {
3733
3926
  target.edges += source.edges;
3734
3927
  target.errors += source.errors;
3735
3928
  }
3736
- async function compileGeminiFile(bundle, batch, file) {
3929
+ async function compileGeminiFile(bundle, batch, file, logger) {
3737
3930
  const counts = emptyFileCounts3();
3738
3931
  const { row: sourceFile, alreadyKnown } = await registerSourceFile(bundle, {
3739
3932
  sourceTool: "gemini",
@@ -3743,12 +3936,21 @@ async function compileGeminiFile(bundle, batch, file) {
3743
3936
  });
3744
3937
  if (alreadyKnown) {
3745
3938
  counts.source_files_skipped = 1;
3939
+ logger?.debug(
3940
+ { path: file.filePath, source_file_id: sourceFile.source_file_id },
3941
+ "gemini source file skipped"
3942
+ );
3746
3943
  return counts;
3747
3944
  }
3748
3945
  counts.source_files_imported = 1;
3946
+ logger?.debug(
3947
+ { path: file.filePath, source_file_id: sourceFile.source_file_id },
3948
+ "gemini source file registered"
3949
+ );
3749
3950
  const text = await readFile7(file.filePath, "utf8");
3750
3951
  const parsed = JSON.parse(text);
3751
- const fileObjectId = await putBytes(bundle, Buffer.from(text, "utf8"), {
3952
+ const objects = createPendingObjects();
3953
+ const fileObjectId = stageBytes(objects, Buffer.from(text, "utf8"), {
3752
3954
  mimeType: "application/json",
3753
3955
  encoding: "utf-8"
3754
3956
  });
@@ -3778,7 +3980,8 @@ async function compileGeminiFile(bundle, batch, file) {
3778
3980
  toolResults: [],
3779
3981
  artifacts: [],
3780
3982
  searchDocs: [],
3781
- project: null
3983
+ project: null,
3984
+ objects
3782
3985
  };
3783
3986
  const sourceSid = parsed.sessionId ?? path11.basename(file.filePath, ".json");
3784
3987
  const sessionPk = sessionId("gemini", sourceSid);
@@ -3815,6 +4018,7 @@ async function compileGeminiFile(bundle, batch, file) {
3815
4018
  );
3816
4019
  }
3817
4020
  buildSearchDocs3(pending);
4021
+ await flushPendingObjects(bundle, pending.objects);
3818
4022
  transactional(bundle.db, () => {
3819
4023
  flushPending3(bundle, pending);
3820
4024
  });
@@ -3826,12 +4030,16 @@ async function compileGeminiFile(bundle, batch, file) {
3826
4030
  counts.tool_calls = pending.toolCallsList.length;
3827
4031
  counts.tool_results = pending.toolResults.length;
3828
4032
  counts.artifacts = pending.artifacts.length;
4033
+ logger?.debug(
4034
+ { path: file.filePath, source_file_id: sourceFile.source_file_id, counts },
4035
+ "gemini source file imported"
4036
+ );
3829
4037
  return counts;
3830
4038
  }
3831
4039
  async function processMessage(bundle, sessionId2, sourceFileId2, index, msg, batchId, pending) {
3832
4040
  const ordinal = index + 1;
3833
4041
  const ts = msg.timestamp ?? null;
3834
- const payloadId = await putJson(bundle, msg);
4042
+ const payloadId = stageJson(pending.objects, msg);
3835
4043
  const pointer = `/messages/${index}`;
3836
4044
  const rawObjectIdInput = sha256Hex(`${pointer}
3837
4045
  ${JSON.stringify(msg)}`);
@@ -3948,7 +4156,7 @@ ${JSON.stringify(msg)}`);
3948
4156
  }
3949
4157
  async function pushTextBlock(bundle, pending, messageId2, blockOrdinal, blockType, text, rawRecordId2, visibility = "default") {
3950
4158
  if (!text) return;
3951
- const overflowId = text.length > PREVIEW_MAX3 ? await putText(bundle, text) : null;
4159
+ const overflowId = text.length > PREVIEW_MAX3 ? stageText(pending.objects, text) : null;
3952
4160
  pending.blocks.push({
3953
4161
  block_id: blockId(messageId2, blockOrdinal),
3954
4162
  message_id: messageId2,
@@ -3965,7 +4173,7 @@ async function processToolCall(bundle, sessionId2, messageId2, eventId2, index,
3965
4173
  const sourceCallId = tc.id ?? `${messageId2}:${index}`;
3966
4174
  const toolName = tc.name ?? "unknown";
3967
4175
  const toolCallId2 = toolCallId(sessionId2, sourceCallId);
3968
- const argsObjectId = tc.args ? await putJson(bundle, tc.args) : null;
4176
+ const argsObjectId = tc.args ? stageJson(pending.objects, tc.args) : null;
3969
4177
  pending.toolCallsList.push({
3970
4178
  tool_call_id: toolCallId2,
3971
4179
  message_id: messageId2,
@@ -3984,7 +4192,7 @@ async function processToolCall(bundle, sessionId2, messageId2, eventId2, index,
3984
4192
  });
3985
4193
  const isError = tc.status === "error" ? 1 : 0;
3986
4194
  const resultText = renderToolResultText(tc.result);
3987
- const overflowId = resultText.length > PREVIEW_MAX3 ? await putText(bundle, resultText) : null;
4195
+ const overflowId = resultText.length > PREVIEW_MAX3 ? stageText(pending.objects, resultText) : null;
3988
4196
  pending.toolResults.push({
3989
4197
  tool_result_id: toolResultId(sessionId2, sourceCallId),
3990
4198
  tool_call_id: toolCallId2,
@@ -4001,7 +4209,7 @@ async function processToolCall(bundle, sessionId2, messageId2, eventId2, index,
4001
4209
  const rd = tc.resultDisplay;
4002
4210
  if (rd.fileDiff || rd.filePath) {
4003
4211
  const diffText = rd.fileDiff ?? "";
4004
- const diffId = diffText ? await putText(bundle, diffText, { mimeType: "text/x-diff" }) : null;
4212
+ const diffId = diffText ? stageText(pending.objects, diffText, { mimeType: "text/x-diff" }) : null;
4005
4213
  pending.artifacts.push({
4006
4214
  artifact_id: artifactId(sessionId2, "gemini", `${toolCallId2}:diff`),
4007
4215
  kind: "diff",
@@ -4393,17 +4601,34 @@ async function readdirSafe3(dir) {
4393
4601
 
4394
4602
  // src/importers/cursor/index.ts
4395
4603
  var PREVIEW_MAX4 = 4e3;
4396
- async function compileCursor(bundle, root) {
4604
+ async function compileCursor(bundle, root, options = {}) {
4605
+ const logger = options.logger;
4397
4606
  const batch = startBatch(bundle, "cursor", [root]);
4398
4607
  const counts = emptyCounts();
4608
+ logger?.info({ batch_id: batch.batch_id, root }, "cursor batch started");
4399
4609
  try {
4400
4610
  for await (const store of discoverCursorStores(root)) {
4401
4611
  counts.source_files_seen++;
4612
+ logger?.debug(
4613
+ {
4614
+ path: store.filePath,
4615
+ workspace_id: store.workspaceId,
4616
+ agent_id: store.agentId
4617
+ },
4618
+ "cursor store discovered"
4619
+ );
4402
4620
  try {
4403
- const fc = await compileCursorStore(bundle, batch, store);
4621
+ const fc = await compileCursorStore(bundle, batch, store, logger);
4404
4622
  addCounts4(counts, fc);
4405
4623
  } catch (error) {
4406
4624
  counts.errors++;
4625
+ logger?.warn(
4626
+ {
4627
+ err: error,
4628
+ path: store.filePath
4629
+ },
4630
+ "cursor store failed"
4631
+ );
4407
4632
  await recordError(bundle, batch.batch_id, {
4408
4633
  kind: "cursor_store_failed",
4409
4634
  message: error instanceof Error ? error.message : String(error),
@@ -4412,8 +4637,10 @@ async function compileCursor(bundle, root) {
4412
4637
  }
4413
4638
  }
4414
4639
  finishBatch(bundle, batch, counts, "completed");
4640
+ logger?.info({ batch_id: batch.batch_id, counts }, "cursor batch completed");
4415
4641
  } catch (error) {
4416
4642
  finishBatch(bundle, batch, counts, "failed");
4643
+ logger?.error({ err: error, batch_id: batch.batch_id, counts }, "cursor batch failed");
4417
4644
  throw error;
4418
4645
  }
4419
4646
  return { batch, counts };
@@ -4448,7 +4675,7 @@ function addCounts4(target, source) {
4448
4675
  target.edges += source.edges;
4449
4676
  target.errors += source.errors;
4450
4677
  }
4451
- async function compileCursorStore(bundle, batch, store) {
4678
+ async function compileCursorStore(bundle, batch, store, logger) {
4452
4679
  const counts = emptyFileCounts4();
4453
4680
  const { row: sourceFile, alreadyKnown } = await registerSourceFile(bundle, {
4454
4681
  sourceTool: "cursor",
@@ -4458,9 +4685,17 @@ async function compileCursorStore(bundle, batch, store) {
4458
4685
  });
4459
4686
  if (alreadyKnown) {
4460
4687
  counts.source_files_skipped = 1;
4688
+ logger?.debug(
4689
+ { path: store.filePath, source_file_id: sourceFile.source_file_id },
4690
+ "cursor store skipped"
4691
+ );
4461
4692
  return counts;
4462
4693
  }
4463
4694
  counts.source_files_imported = 1;
4695
+ logger?.debug(
4696
+ { path: store.filePath, source_file_id: sourceFile.source_file_id },
4697
+ "cursor store registered"
4698
+ );
4464
4699
  const cdb = new Database2(store.filePath, { readonly: true, fileMustExist: true });
4465
4700
  try {
4466
4701
  const pending = {
@@ -4473,7 +4708,8 @@ async function compileCursorStore(bundle, batch, store) {
4473
4708
  toolCallsList: [],
4474
4709
  toolResults: [],
4475
4710
  artifacts: [],
4476
- searchDocs: []
4711
+ searchDocs: [],
4712
+ objects: createPendingObjects()
4477
4713
  };
4478
4714
  const metaRow = cdb.prepare(`SELECT value FROM meta WHERE key='0'`).get();
4479
4715
  let meta = {};
@@ -4485,7 +4721,7 @@ async function compileCursorStore(bundle, batch, store) {
4485
4721
  } catch {
4486
4722
  meta = {};
4487
4723
  }
4488
- const metaObjId = await putBytes(bundle, Buffer.from(metaText, "utf8"), {
4724
+ const metaObjId = stageBytes(pending.objects, Buffer.from(metaText, "utf8"), {
4489
4725
  mimeType: "application/json",
4490
4726
  encoding: "utf-8"
4491
4727
  });
@@ -4524,7 +4760,7 @@ async function compileCursorStore(bundle, batch, store) {
4524
4760
  const blob = blobs[i];
4525
4761
  if (!blob) continue;
4526
4762
  const ordinal = i + 1;
4527
- const blobObjectId = await putBytes(bundle, blob.data);
4763
+ const blobObjectId = stageBytes(pending.objects, blob.data);
4528
4764
  const blobRawId = rawRecordId(sourceFile.source_file_id, ordinal, blobObjectId);
4529
4765
  let parsed = null;
4530
4766
  const firstByte = blob.data[0];
@@ -4544,7 +4780,7 @@ async function compileCursorStore(bundle, batch, store) {
4544
4780
  json_pointer: `blobs/${blob.id}`,
4545
4781
  native_id: blob.id,
4546
4782
  raw_object_id: blobObjectId,
4547
- decoded_json_object_id: parsed != null ? await putJson(bundle, parsed) : null,
4783
+ decoded_json_object_id: parsed != null ? stageJson(pending.objects, parsed) : null,
4548
4784
  parser_status: parsed != null ? "ok" : looksJson ? "failed" : "partial",
4549
4785
  confidence: "low",
4550
4786
  // timeline order from blob list isn't canonical
@@ -4599,6 +4835,7 @@ async function compileCursorStore(bundle, batch, store) {
4599
4835
  }
4600
4836
  }
4601
4837
  buildSearchDocs4(pending);
4838
+ await flushPendingObjects(bundle, pending.objects);
4602
4839
  transactional(bundle.db, () => {
4603
4840
  flushPending4(bundle, pending);
4604
4841
  });
@@ -4610,6 +4847,10 @@ async function compileCursorStore(bundle, batch, store) {
4610
4847
  counts.tool_calls = pending.toolCallsList.length;
4611
4848
  counts.tool_results = pending.toolResults.length;
4612
4849
  counts.artifacts = pending.artifacts.length;
4850
+ logger?.debug(
4851
+ { path: store.filePath, source_file_id: sourceFile.source_file_id, counts },
4852
+ "cursor store imported"
4853
+ );
4613
4854
  return counts;
4614
4855
  } finally {
4615
4856
  cdb.close();
@@ -4634,7 +4875,7 @@ function mapRole(role) {
4634
4875
  }
4635
4876
  async function pushTextBlock2(bundle, pending, messageId2, ordinal, blockType, text, rawRecordId2, visibility = "default") {
4636
4877
  if (!text) return;
4637
- const overflow = text.length > PREVIEW_MAX4 ? await putText(bundle, text) : null;
4878
+ const overflow = text.length > PREVIEW_MAX4 ? stageText(pending.objects, text) : null;
4638
4879
  pending.blocks.push({
4639
4880
  block_id: blockId(messageId2, ordinal),
4640
4881
  message_id: messageId2,
@@ -4685,7 +4926,7 @@ async function processContentItem(bundle, sessionId2, messageId2, eventId2, ordi
4685
4926
  if (t === "tool-call") {
4686
4927
  const sourceCallId = item.toolCallId ?? `${ordinal}`;
4687
4928
  const toolName = item.toolName ?? "unknown";
4688
- const argsObjectId = item.args != null ? await putJson(bundle, item.args) : null;
4929
+ const argsObjectId = item.args != null ? stageJson(pending.objects, item.args) : null;
4689
4930
  const tcId = toolCallId(sessionId2, sourceCallId);
4690
4931
  pending.blocks.push({
4691
4932
  block_id: blockId(messageId2, ordinal),
@@ -4722,7 +4963,7 @@ async function processContentItem(bundle, sessionId2, messageId2, eventId2, ordi
4722
4963
  if (t === "tool-result") {
4723
4964
  const sourceCallId = item.toolCallId ?? `${ordinal}`;
4724
4965
  const text = stringifyOrNull3(item.result) ?? "";
4725
- const overflow = text.length > PREVIEW_MAX4 ? await putText(bundle, text) : null;
4966
+ const overflow = text.length > PREVIEW_MAX4 ? stageText(pending.objects, text) : null;
4726
4967
  const isError = readIsError(item) ? 1 : 0;
4727
4968
  pending.blocks.push({
4728
4969
  block_id: blockId(messageId2, ordinal),