@c3-oss/prosa 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -31
- package/dist/bin/prosa.js +601 -229
- package/dist/bin/prosa.js.map +1 -1
- package/dist/cli/main.js +601 -229
- package/dist/cli/main.js.map +1 -1
- package/dist/index.d.ts +11 -5
- package/dist/index.js +288 -47
- package/dist/index.js.map +1 -1
- package/package.json +4 -2
package/dist/cli/main.js
CHANGED
|
@@ -98,16 +98,27 @@ var init_hash = __esm({
|
|
|
98
98
|
// src/core/cas/index.ts
|
|
99
99
|
var cas_exports = {};
|
|
100
100
|
__export(cas_exports, {
|
|
101
|
+
createPendingObjects: () => createPendingObjects,
|
|
102
|
+
ensureDir: () => ensureDir,
|
|
103
|
+
flushPendingObjects: () => flushPendingObjects,
|
|
101
104
|
getBytes: () => getBytes,
|
|
102
105
|
getJson: () => getJson,
|
|
103
106
|
getObjectMeta: () => getObjectMeta,
|
|
104
107
|
getText: () => getText,
|
|
105
108
|
putBytes: () => putBytes,
|
|
106
109
|
putJson: () => putJson,
|
|
107
|
-
putText: () => putText
|
|
110
|
+
putText: () => putText,
|
|
111
|
+
stageBytes: () => stageBytes,
|
|
112
|
+
stageJson: () => stageJson,
|
|
113
|
+
stageText: () => stageText
|
|
108
114
|
});
|
|
109
115
|
import { mkdir as mkdir2, readFile as readFile2, writeFile as writeFile2 } from "fs/promises";
|
|
110
116
|
import path2 from "path";
|
|
117
|
+
async function ensureDir(absoluteDir) {
|
|
118
|
+
if (ensuredDirs.has(absoluteDir)) return;
|
|
119
|
+
await mkdir2(absoluteDir, { recursive: true });
|
|
120
|
+
ensuredDirs.add(absoluteDir);
|
|
121
|
+
}
|
|
111
122
|
async function putBytes(bundle, bytes, options = {}) {
|
|
112
123
|
const hash = blake3Hex(bytes);
|
|
113
124
|
const objectId = objectIdFromHash(hash);
|
|
@@ -121,7 +132,7 @@ async function putBytes(bundle, bytes, options = {}) {
|
|
|
121
132
|
const { bytes: stored, compression } = compressBytes(bytes);
|
|
122
133
|
const storagePath = objectStoragePath(hash, compression);
|
|
123
134
|
const absolutePath = path2.join(bundle.path, storagePath);
|
|
124
|
-
await
|
|
135
|
+
await ensureDir(path2.dirname(absolutePath));
|
|
125
136
|
await writeFile2(absolutePath, stored);
|
|
126
137
|
prepare(
|
|
127
138
|
bundle.db,
|
|
@@ -185,18 +196,126 @@ function getObjectMeta(bundle, objectId) {
|
|
|
185
196
|
FROM objects WHERE object_id = ?`
|
|
186
197
|
).get(objectId) ?? null;
|
|
187
198
|
}
|
|
199
|
+
function createPendingObjects() {
|
|
200
|
+
return { byId: /* @__PURE__ */ new Map() };
|
|
201
|
+
}
|
|
202
|
+
function stageBytes(pending, bytes, options = {}) {
|
|
203
|
+
const buf = Buffer.isBuffer(bytes) ? bytes : Buffer.from(bytes);
|
|
204
|
+
const hash = blake3Hex(buf);
|
|
205
|
+
const objectId = objectIdFromHash(hash);
|
|
206
|
+
if (!pending.byId.has(objectId)) {
|
|
207
|
+
pending.byId.set(objectId, {
|
|
208
|
+
objectId,
|
|
209
|
+
hash,
|
|
210
|
+
bytes: buf,
|
|
211
|
+
mimeType: options.mimeType ?? null,
|
|
212
|
+
encoding: options.encoding ?? null
|
|
213
|
+
});
|
|
214
|
+
}
|
|
215
|
+
return objectId;
|
|
216
|
+
}
|
|
217
|
+
function stageText(pending, text, options = {}) {
|
|
218
|
+
return stageBytes(pending, Buffer.from(text, "utf8"), {
|
|
219
|
+
mimeType: options.mimeType ?? "text/plain; charset=utf-8",
|
|
220
|
+
encoding: "utf-8"
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
function stageJson(pending, value) {
|
|
224
|
+
return stageBytes(pending, Buffer.from(JSON.stringify(value), "utf8"), {
|
|
225
|
+
mimeType: "application/json",
|
|
226
|
+
encoding: "utf-8"
|
|
227
|
+
});
|
|
228
|
+
}
|
|
229
|
+
async function flushPendingObjects(bundle, pending) {
|
|
230
|
+
if (pending.byId.size === 0) return;
|
|
231
|
+
const ids = [...pending.byId.keys()];
|
|
232
|
+
const existingIds = queryExistingObjectIds(bundle, ids);
|
|
233
|
+
const toWrite = [];
|
|
234
|
+
for (const obj of pending.byId.values()) {
|
|
235
|
+
if (existingIds.has(obj.objectId)) continue;
|
|
236
|
+
const { bytes: compressedBytes, compression } = compressBytes(obj.bytes);
|
|
237
|
+
const storagePath = objectStoragePath(obj.hash, compression);
|
|
238
|
+
toWrite.push({
|
|
239
|
+
staged: obj,
|
|
240
|
+
compression,
|
|
241
|
+
compressedBytes,
|
|
242
|
+
storagePath,
|
|
243
|
+
absolutePath: path2.join(bundle.path, storagePath)
|
|
244
|
+
});
|
|
245
|
+
}
|
|
246
|
+
if (toWrite.length > 0) {
|
|
247
|
+
await writeFilesParallel(toWrite);
|
|
248
|
+
}
|
|
249
|
+
const insertObject = prepare(
|
|
250
|
+
bundle.db,
|
|
251
|
+
`INSERT OR IGNORE INTO objects (
|
|
252
|
+
object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
|
|
253
|
+
compression, mime_type, encoding, storage_path, created_at
|
|
254
|
+
) VALUES (?, 'blake3', ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
255
|
+
);
|
|
256
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
257
|
+
for (const p of toWrite) {
|
|
258
|
+
insertObject.run(
|
|
259
|
+
p.staged.objectId,
|
|
260
|
+
p.staged.hash,
|
|
261
|
+
p.staged.bytes.byteLength,
|
|
262
|
+
p.compression === "zstd" ? p.compressedBytes.byteLength : null,
|
|
263
|
+
p.compression,
|
|
264
|
+
p.staged.mimeType,
|
|
265
|
+
p.staged.encoding,
|
|
266
|
+
p.storagePath,
|
|
267
|
+
now
|
|
268
|
+
);
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
function queryExistingObjectIds(bundle, ids) {
|
|
272
|
+
const found = /* @__PURE__ */ new Set();
|
|
273
|
+
if (ids.length === 0) return found;
|
|
274
|
+
const CHUNK = 500;
|
|
275
|
+
for (let start = 0; start < ids.length; start += CHUNK) {
|
|
276
|
+
const slice = ids.slice(start, start + CHUNK);
|
|
277
|
+
const placeholders = slice.map(() => "?").join(",");
|
|
278
|
+
const rows = bundle.db.prepare(
|
|
279
|
+
`SELECT object_id FROM objects WHERE object_id IN (${placeholders})`
|
|
280
|
+
).all(...slice);
|
|
281
|
+
for (const row of rows) found.add(row.object_id);
|
|
282
|
+
}
|
|
283
|
+
return found;
|
|
284
|
+
}
|
|
285
|
+
async function writeFilesParallel(tasks) {
|
|
286
|
+
let cursor = 0;
|
|
287
|
+
const workers = [];
|
|
288
|
+
const limit = Math.min(FS_WRITE_CONCURRENCY, tasks.length);
|
|
289
|
+
for (let w = 0; w < limit; w++) {
|
|
290
|
+
workers.push(
|
|
291
|
+
(async () => {
|
|
292
|
+
while (true) {
|
|
293
|
+
const i = cursor++;
|
|
294
|
+
if (i >= tasks.length) return;
|
|
295
|
+
const task = tasks[i];
|
|
296
|
+
await ensureDir(path2.dirname(task.absolutePath));
|
|
297
|
+
await writeFile2(task.absolutePath, task.compressedBytes);
|
|
298
|
+
}
|
|
299
|
+
})()
|
|
300
|
+
);
|
|
301
|
+
}
|
|
302
|
+
await Promise.all(workers);
|
|
303
|
+
}
|
|
304
|
+
var ensuredDirs, FS_WRITE_CONCURRENCY;
|
|
188
305
|
var init_cas = __esm({
|
|
189
306
|
"src/core/cas/index.ts"() {
|
|
190
307
|
"use strict";
|
|
191
308
|
init_db();
|
|
192
309
|
init_compress();
|
|
193
310
|
init_hash();
|
|
311
|
+
ensuredDirs = /* @__PURE__ */ new Set();
|
|
312
|
+
FS_WRITE_CONCURRENCY = 16;
|
|
194
313
|
}
|
|
195
314
|
});
|
|
196
315
|
|
|
197
316
|
// src/services/indexing.ts
|
|
198
|
-
import { mkdir as mkdir4, rm, writeFile as
|
|
199
|
-
import
|
|
317
|
+
import { mkdir as mkdir4, rm as rm2, writeFile as writeFile5 } from "fs/promises";
|
|
318
|
+
import path13 from "path";
|
|
200
319
|
function enableFts5Triggers(bundle) {
|
|
201
320
|
bundle.db.exec(FTS5_TRIGGER_SQL);
|
|
202
321
|
}
|
|
@@ -291,7 +410,7 @@ async function rebuildTantivyIndex(bundle) {
|
|
|
291
410
|
try {
|
|
292
411
|
const tantivy = await import("@oxdev03/node-tantivy-binding");
|
|
293
412
|
const schema = new tantivy.SchemaBuilder().addTextField("doc_id", { stored: true, tokenizerName: "raw" }).addTextField("entity_type", { stored: true, tokenizerName: "raw" }).addTextField("entity_id", { stored: true, tokenizerName: "raw" }).addTextField("session_id", { stored: true, tokenizerName: "raw" }).addTextField("project_id", { stored: true, tokenizerName: "raw" }).addTextField("timestamp", { stored: true, tokenizerName: "raw" }).addTextField("role", { stored: true, tokenizerName: "raw" }).addTextField("tool_name", { stored: true, tokenizerName: "raw" }).addTextField("canonical_tool_type", { stored: true, tokenizerName: "raw" }).addTextField("field_kind", { stored: true, tokenizerName: "raw" }).addTextField("text", { stored: true }).build();
|
|
294
|
-
await
|
|
413
|
+
await rm2(bundle.paths.tantivy, { recursive: true, force: true });
|
|
295
414
|
await mkdir4(bundle.paths.tantivy, { recursive: true });
|
|
296
415
|
const index = new tantivy.Index(schema, bundle.paths.tantivy, false);
|
|
297
416
|
const writer = index.writer(5e7, 1);
|
|
@@ -320,8 +439,8 @@ async function rebuildTantivyIndex(bundle) {
|
|
|
320
439
|
}
|
|
321
440
|
writer.commit();
|
|
322
441
|
index.reload();
|
|
323
|
-
await
|
|
324
|
-
|
|
442
|
+
await writeFile5(
|
|
443
|
+
path13.join(bundle.paths.tantivy, "prosa-index.json"),
|
|
325
444
|
`${JSON.stringify(
|
|
326
445
|
{
|
|
327
446
|
engine: "tantivy",
|
|
@@ -1001,7 +1120,8 @@ var PROSA_PARSER_VERSION = "0.1.0";
|
|
|
1001
1120
|
var PROSA_SCHEMA_VERSION = 2;
|
|
1002
1121
|
|
|
1003
1122
|
// src/cli/commands/compile.ts
|
|
1004
|
-
import
|
|
1123
|
+
import os2 from "os";
|
|
1124
|
+
import path14 from "path";
|
|
1005
1125
|
import { Command } from "commander";
|
|
1006
1126
|
|
|
1007
1127
|
// src/core/bundle.ts
|
|
@@ -1627,8 +1747,9 @@ async function recordError(bundle, batchId, args) {
|
|
|
1627
1747
|
// src/core/ingest/idempotency.ts
|
|
1628
1748
|
init_compress();
|
|
1629
1749
|
init_hash();
|
|
1750
|
+
init_cas();
|
|
1630
1751
|
init_db();
|
|
1631
|
-
import { access as access2,
|
|
1752
|
+
import { access as access2, readFile as readFile3, stat as stat2, writeFile as writeFile3 } from "fs/promises";
|
|
1632
1753
|
import path3 from "path";
|
|
1633
1754
|
async function registerSourceFile(bundle, args) {
|
|
1634
1755
|
const st = await stat2(args.absolutePath);
|
|
@@ -1714,7 +1835,7 @@ async function preserveRawSourceBytes(bundle, bytes) {
|
|
|
1714
1835
|
const { bytes: stored, compression } = compressBytes(bytes);
|
|
1715
1836
|
const storagePath = rawSourceStoragePath(hash, compression);
|
|
1716
1837
|
const absolutePath = path3.join(bundle.path, storagePath);
|
|
1717
|
-
await
|
|
1838
|
+
await ensureDir(path3.dirname(absolutePath));
|
|
1718
1839
|
if (!await fileExists(absolutePath)) {
|
|
1719
1840
|
await writeFile3(absolutePath, stored);
|
|
1720
1841
|
}
|
|
@@ -1814,17 +1935,34 @@ async function readdirSafe(dir) {
|
|
|
1814
1935
|
|
|
1815
1936
|
// src/importers/claude/index.ts
|
|
1816
1937
|
var PREVIEW_MAX = 4e3;
|
|
1817
|
-
async function compileClaude(bundle, root) {
|
|
1938
|
+
async function compileClaude(bundle, root, options = {}) {
|
|
1939
|
+
const logger = options.logger;
|
|
1818
1940
|
const batch = startBatch(bundle, "claude", [root]);
|
|
1819
1941
|
const counts = emptyCounts();
|
|
1942
|
+
logger?.info({ batch_id: batch.batch_id, root }, "claude batch started");
|
|
1820
1943
|
try {
|
|
1821
1944
|
for await (const file of discoverClaudeFiles(root)) {
|
|
1822
1945
|
counts.source_files_seen++;
|
|
1946
|
+
logger?.debug(
|
|
1947
|
+
{
|
|
1948
|
+
path: file.filePath,
|
|
1949
|
+
project_slug: file.projectSlug,
|
|
1950
|
+
is_subagent: file.isSubagent
|
|
1951
|
+
},
|
|
1952
|
+
"claude source file discovered"
|
|
1953
|
+
);
|
|
1823
1954
|
try {
|
|
1824
|
-
const fc = await compileClaudeFile(bundle, batch, file);
|
|
1955
|
+
const fc = await compileClaudeFile(bundle, batch, file, logger);
|
|
1825
1956
|
addCounts(counts, fc);
|
|
1826
1957
|
} catch (error) {
|
|
1827
1958
|
counts.errors++;
|
|
1959
|
+
logger?.warn(
|
|
1960
|
+
{
|
|
1961
|
+
err: error,
|
|
1962
|
+
path: file.filePath
|
|
1963
|
+
},
|
|
1964
|
+
"claude source file failed"
|
|
1965
|
+
);
|
|
1828
1966
|
await recordError(bundle, batch.batch_id, {
|
|
1829
1967
|
kind: "claude_file_failed",
|
|
1830
1968
|
message: error instanceof Error ? error.message : String(error),
|
|
@@ -1833,9 +1971,12 @@ async function compileClaude(bundle, root) {
|
|
|
1833
1971
|
}
|
|
1834
1972
|
}
|
|
1835
1973
|
linkSubagentParents(bundle);
|
|
1974
|
+
logger?.debug({ batch_id: batch.batch_id }, "claude subagent parent links refreshed");
|
|
1836
1975
|
finishBatch(bundle, batch, counts, "completed");
|
|
1976
|
+
logger?.info({ batch_id: batch.batch_id, counts }, "claude batch completed");
|
|
1837
1977
|
} catch (error) {
|
|
1838
1978
|
finishBatch(bundle, batch, counts, "failed");
|
|
1979
|
+
logger?.error({ err: error, batch_id: batch.batch_id, counts }, "claude batch failed");
|
|
1839
1980
|
throw error;
|
|
1840
1981
|
}
|
|
1841
1982
|
return { batch, counts };
|
|
@@ -1889,7 +2030,7 @@ function addCounts(target, source) {
|
|
|
1889
2030
|
target.edges += source.edges;
|
|
1890
2031
|
target.errors += source.errors;
|
|
1891
2032
|
}
|
|
1892
|
-
async function compileClaudeFile(bundle, batch, file) {
|
|
2033
|
+
async function compileClaudeFile(bundle, batch, file, logger) {
|
|
1893
2034
|
const counts = emptyFileCounts();
|
|
1894
2035
|
const { row: sourceFile, alreadyKnown } = await registerSourceFile(bundle, {
|
|
1895
2036
|
sourceTool: "claude",
|
|
@@ -1899,9 +2040,17 @@ async function compileClaudeFile(bundle, batch, file) {
|
|
|
1899
2040
|
});
|
|
1900
2041
|
if (alreadyKnown) {
|
|
1901
2042
|
counts.source_files_skipped = 1;
|
|
2043
|
+
logger?.debug(
|
|
2044
|
+
{ path: file.filePath, source_file_id: sourceFile.source_file_id },
|
|
2045
|
+
"claude source file skipped"
|
|
2046
|
+
);
|
|
1902
2047
|
return counts;
|
|
1903
2048
|
}
|
|
1904
2049
|
counts.source_files_imported = 1;
|
|
2050
|
+
logger?.debug(
|
|
2051
|
+
{ path: file.filePath, source_file_id: sourceFile.source_file_id },
|
|
2052
|
+
"claude source file registered"
|
|
2053
|
+
);
|
|
1905
2054
|
const text = await readFile4(file.filePath, "utf8");
|
|
1906
2055
|
const rawLines = text.split("\n");
|
|
1907
2056
|
const lines = rawLines[rawLines.length - 1] === "" ? rawLines.slice(0, -1) : rawLines;
|
|
@@ -1918,7 +2067,8 @@ async function compileClaudeFile(bundle, batch, file) {
|
|
|
1918
2067
|
artifacts: [],
|
|
1919
2068
|
edges: [],
|
|
1920
2069
|
searchDocs: [],
|
|
1921
|
-
uuidToMessageId: /* @__PURE__ */ new Map()
|
|
2070
|
+
uuidToMessageId: /* @__PURE__ */ new Map(),
|
|
2071
|
+
objects: createPendingObjects()
|
|
1922
2072
|
};
|
|
1923
2073
|
let modelFirst = null;
|
|
1924
2074
|
let modelLast = null;
|
|
@@ -1933,7 +2083,7 @@ async function compileClaudeFile(bundle, batch, file) {
|
|
|
1933
2083
|
const lineNo = i + 1;
|
|
1934
2084
|
const ordinal = i;
|
|
1935
2085
|
const lineBytes = Buffer.from(line, "utf8");
|
|
1936
|
-
const rawObjectId =
|
|
2086
|
+
const rawObjectId = stageBytes(pending.objects, lineBytes, {
|
|
1937
2087
|
mimeType: "application/jsonl-line",
|
|
1938
2088
|
encoding: "utf-8"
|
|
1939
2089
|
});
|
|
@@ -1944,7 +2094,7 @@ async function compileClaudeFile(bundle, batch, file) {
|
|
|
1944
2094
|
} catch {
|
|
1945
2095
|
parserStatus = "failed";
|
|
1946
2096
|
}
|
|
1947
|
-
const decodedObjectId =
|
|
2097
|
+
const decodedObjectId = null;
|
|
1948
2098
|
const nativeId = parsed?.uuid ?? null;
|
|
1949
2099
|
const rawRecordId2 = rawRecordId(sourceFile.source_file_id, ordinal, rawObjectId);
|
|
1950
2100
|
pending.rawRecords.push({
|
|
@@ -2042,7 +2192,7 @@ async function compileClaudeFile(bundle, batch, file) {
|
|
|
2042
2192
|
raw_record_id: rawRecordId2
|
|
2043
2193
|
});
|
|
2044
2194
|
if (content.length > PREVIEW_MAX) {
|
|
2045
|
-
const fullId =
|
|
2195
|
+
const fullId = stageText(pending.objects, content);
|
|
2046
2196
|
const last = pending.blocks[pending.blocks.length - 1];
|
|
2047
2197
|
if (last) last.text_object_id = fullId;
|
|
2048
2198
|
}
|
|
@@ -2182,6 +2332,7 @@ async function compileClaudeFile(bundle, batch, file) {
|
|
|
2182
2332
|
pending.session.git_branch_initial ??= branchInitial;
|
|
2183
2333
|
}
|
|
2184
2334
|
buildSearchDocs(pending);
|
|
2335
|
+
await flushPendingObjects(bundle, pending.objects);
|
|
2185
2336
|
transactional(bundle.db, () => {
|
|
2186
2337
|
flushPending(bundle, pending, { modelFirst, modelLast });
|
|
2187
2338
|
});
|
|
@@ -2194,6 +2345,10 @@ async function compileClaudeFile(bundle, batch, file) {
|
|
|
2194
2345
|
counts.tool_results = pending.toolResults.length;
|
|
2195
2346
|
counts.artifacts = pending.artifacts.length;
|
|
2196
2347
|
counts.edges = pending.edges.length;
|
|
2348
|
+
logger?.debug(
|
|
2349
|
+
{ path: file.filePath, source_file_id: sourceFile.source_file_id, counts },
|
|
2350
|
+
"claude source file imported"
|
|
2351
|
+
);
|
|
2197
2352
|
return counts;
|
|
2198
2353
|
}
|
|
2199
2354
|
function createSessionFromFirstRecord(file, parsed, meta, ts, rawRecordId2) {
|
|
@@ -2241,7 +2396,7 @@ async function processContentBlock(bundle, sessionId2, messageId2, eventId2, blo
|
|
|
2241
2396
|
event_id: null,
|
|
2242
2397
|
ordinal: blockOrdinal,
|
|
2243
2398
|
block_type: "text",
|
|
2244
|
-
text_object_id: text.length > PREVIEW_MAX ?
|
|
2399
|
+
text_object_id: text.length > PREVIEW_MAX ? stageText(pending.objects, text) : null,
|
|
2245
2400
|
text_inline: text.slice(0, PREVIEW_MAX),
|
|
2246
2401
|
is_error: 0,
|
|
2247
2402
|
visibility: "default",
|
|
@@ -2257,7 +2412,7 @@ async function processContentBlock(bundle, sessionId2, messageId2, eventId2, blo
|
|
|
2257
2412
|
event_id: null,
|
|
2258
2413
|
ordinal: blockOrdinal,
|
|
2259
2414
|
block_type: "thinking",
|
|
2260
|
-
text_object_id: text.length > PREVIEW_MAX ?
|
|
2415
|
+
text_object_id: text.length > PREVIEW_MAX ? stageText(pending.objects, text) : null,
|
|
2261
2416
|
text_inline: text.slice(0, PREVIEW_MAX),
|
|
2262
2417
|
is_error: 0,
|
|
2263
2418
|
visibility: "hidden_by_default",
|
|
@@ -2269,7 +2424,7 @@ async function processContentBlock(bundle, sessionId2, messageId2, eventId2, blo
|
|
|
2269
2424
|
const tu = block;
|
|
2270
2425
|
const sourceCallId = tu.id ?? `${blockOrdinal}`;
|
|
2271
2426
|
const toolName = tu.name ?? "unknown";
|
|
2272
|
-
const argsId = tu.input != null ?
|
|
2427
|
+
const argsId = tu.input != null ? stageJson(pending.objects, tu.input) : null;
|
|
2273
2428
|
const command = inferCommandFromArgs(toolName, tu.input);
|
|
2274
2429
|
const filePath = inferPathFromArgs(tu.input);
|
|
2275
2430
|
const tcId = toolCallId(sessionId2, sourceCallId);
|
|
@@ -2310,13 +2465,14 @@ async function processContentBlock(bundle, sessionId2, messageId2, eventId2, blo
|
|
|
2310
2465
|
const sourceCallId = tr.tool_use_id ?? null;
|
|
2311
2466
|
const isError = tr.is_error === true ? 1 : 0;
|
|
2312
2467
|
const text = stringifyOrNull(tr.content) ?? "";
|
|
2468
|
+
const overflowId = text.length > PREVIEW_MAX ? stageText(pending.objects, text) : null;
|
|
2313
2469
|
pending.blocks.push({
|
|
2314
2470
|
block_id: blkId,
|
|
2315
2471
|
message_id: messageId2,
|
|
2316
2472
|
event_id: null,
|
|
2317
2473
|
ordinal: blockOrdinal,
|
|
2318
2474
|
block_type: "tool_result",
|
|
2319
|
-
text_object_id:
|
|
2475
|
+
text_object_id: overflowId,
|
|
2320
2476
|
text_inline: text.slice(0, PREVIEW_MAX),
|
|
2321
2477
|
is_error: isError,
|
|
2322
2478
|
visibility: "default",
|
|
@@ -2335,7 +2491,7 @@ async function processContentBlock(bundle, sessionId2, messageId2, eventId2, blo
|
|
|
2335
2491
|
duration_ms: null,
|
|
2336
2492
|
stdout_object_id: null,
|
|
2337
2493
|
stderr_object_id: null,
|
|
2338
|
-
output_object_id:
|
|
2494
|
+
output_object_id: overflowId,
|
|
2339
2495
|
preview: text.slice(0, PREVIEW_MAX),
|
|
2340
2496
|
raw_record_id: rawRecordId2
|
|
2341
2497
|
});
|
|
@@ -2750,17 +2906,27 @@ async function* walk(dir) {
|
|
|
2750
2906
|
|
|
2751
2907
|
// src/importers/codex/index.ts
|
|
2752
2908
|
var PREVIEW_MAX2 = 4e3;
|
|
2753
|
-
async function compileCodex(bundle, root) {
|
|
2909
|
+
async function compileCodex(bundle, root, options = {}) {
|
|
2910
|
+
const logger = options.logger;
|
|
2754
2911
|
const batch = startBatch(bundle, "codex", [root]);
|
|
2755
2912
|
const counts = emptyCounts();
|
|
2913
|
+
logger?.info({ batch_id: batch.batch_id, root }, "codex batch started");
|
|
2756
2914
|
try {
|
|
2757
2915
|
for await (const filePath of discoverCodexSessions(root)) {
|
|
2758
2916
|
counts.source_files_seen++;
|
|
2917
|
+
logger?.debug({ path: filePath }, "codex source file discovered");
|
|
2759
2918
|
try {
|
|
2760
|
-
const fileCounts = await compileCodexFile(bundle, batch, filePath);
|
|
2919
|
+
const fileCounts = await compileCodexFile(bundle, batch, filePath, logger);
|
|
2761
2920
|
addCounts2(counts, fileCounts);
|
|
2762
2921
|
} catch (error) {
|
|
2763
2922
|
counts.errors++;
|
|
2923
|
+
logger?.warn(
|
|
2924
|
+
{
|
|
2925
|
+
err: error,
|
|
2926
|
+
path: filePath
|
|
2927
|
+
},
|
|
2928
|
+
"codex source file failed"
|
|
2929
|
+
);
|
|
2764
2930
|
await recordError(bundle, batch.batch_id, {
|
|
2765
2931
|
kind: "codex_file_failed",
|
|
2766
2932
|
message: error instanceof Error ? error.message : String(error),
|
|
@@ -2769,9 +2935,12 @@ async function compileCodex(bundle, root) {
|
|
|
2769
2935
|
}
|
|
2770
2936
|
}
|
|
2771
2937
|
linkSubagentParents2(bundle);
|
|
2938
|
+
logger?.debug({ batch_id: batch.batch_id }, "codex subagent parent links refreshed");
|
|
2772
2939
|
finishBatch(bundle, batch, counts, "completed");
|
|
2940
|
+
logger?.info({ batch_id: batch.batch_id, counts }, "codex batch completed");
|
|
2773
2941
|
} catch (error) {
|
|
2774
2942
|
finishBatch(bundle, batch, counts, "failed");
|
|
2943
|
+
logger?.error({ err: error, batch_id: batch.batch_id, counts }, "codex batch failed");
|
|
2775
2944
|
throw error;
|
|
2776
2945
|
}
|
|
2777
2946
|
return { batch, counts };
|
|
@@ -2824,7 +2993,7 @@ function addCounts2(target, source) {
|
|
|
2824
2993
|
target.edges += source.edges;
|
|
2825
2994
|
target.errors += source.errors;
|
|
2826
2995
|
}
|
|
2827
|
-
async function compileCodexFile(bundle, batch, filePath) {
|
|
2996
|
+
async function compileCodexFile(bundle, batch, filePath, logger) {
|
|
2828
2997
|
const counts = emptyFileCounts2();
|
|
2829
2998
|
const { row: sourceFileRow, alreadyKnown } = await registerSourceFile(bundle, {
|
|
2830
2999
|
sourceTool: "codex",
|
|
@@ -2833,9 +3002,17 @@ async function compileCodexFile(bundle, batch, filePath) {
|
|
|
2833
3002
|
});
|
|
2834
3003
|
if (alreadyKnown) {
|
|
2835
3004
|
counts.source_files_skipped = 1;
|
|
3005
|
+
logger?.debug(
|
|
3006
|
+
{ path: filePath, source_file_id: sourceFileRow.source_file_id },
|
|
3007
|
+
"codex source file skipped"
|
|
3008
|
+
);
|
|
2836
3009
|
return counts;
|
|
2837
3010
|
}
|
|
2838
3011
|
counts.source_files_imported = 1;
|
|
3012
|
+
logger?.debug(
|
|
3013
|
+
{ path: filePath, source_file_id: sourceFileRow.source_file_id },
|
|
3014
|
+
"codex source file registered"
|
|
3015
|
+
);
|
|
2839
3016
|
const text = await readFile5(filePath, "utf8");
|
|
2840
3017
|
const rawLines = text.split("\n");
|
|
2841
3018
|
const lines = rawLines[rawLines.length - 1] === "" ? rawLines.slice(0, -1) : rawLines;
|
|
@@ -2852,7 +3029,8 @@ async function compileCodexFile(bundle, batch, filePath) {
|
|
|
2852
3029
|
toolResults: [],
|
|
2853
3030
|
artifacts: [],
|
|
2854
3031
|
edges: [],
|
|
2855
|
-
searchDocs: []
|
|
3032
|
+
searchDocs: [],
|
|
3033
|
+
objects: createPendingObjects()
|
|
2856
3034
|
};
|
|
2857
3035
|
let sessionStartTs = null;
|
|
2858
3036
|
let sessionEndTs = null;
|
|
@@ -2866,7 +3044,7 @@ async function compileCodexFile(bundle, batch, filePath) {
|
|
|
2866
3044
|
const lineNo = i + 1;
|
|
2867
3045
|
const ordinal = i;
|
|
2868
3046
|
const lineBytes = Buffer.from(line, "utf8");
|
|
2869
|
-
const rawObjectId =
|
|
3047
|
+
const rawObjectId = stageBytes(pending.objects, lineBytes, {
|
|
2870
3048
|
mimeType: "application/jsonl-line",
|
|
2871
3049
|
encoding: "utf-8"
|
|
2872
3050
|
});
|
|
@@ -2877,7 +3055,7 @@ async function compileCodexFile(bundle, batch, filePath) {
|
|
|
2877
3055
|
} catch {
|
|
2878
3056
|
parserStatus = "failed";
|
|
2879
3057
|
}
|
|
2880
|
-
const decodedObjectId =
|
|
3058
|
+
const decodedObjectId = null;
|
|
2881
3059
|
const nativeId = parsed ? extractNativeId(parsed) : null;
|
|
2882
3060
|
const rawRecordId2 = rawRecordId(sourceFileRow.source_file_id, ordinal, rawObjectId);
|
|
2883
3061
|
pending.rawRecords.push({
|
|
@@ -3046,6 +3224,7 @@ async function compileCodexFile(bundle, batch, filePath) {
|
|
|
3046
3224
|
pending.session.start_ts ??= sessionStartTs;
|
|
3047
3225
|
}
|
|
3048
3226
|
buildSearchDocs2(pending);
|
|
3227
|
+
await flushPendingObjects(bundle, pending.objects);
|
|
3049
3228
|
transactional(bundle.db, () => {
|
|
3050
3229
|
flushPending2(bundle, pending, {
|
|
3051
3230
|
sessionEndTs,
|
|
@@ -3064,6 +3243,10 @@ async function compileCodexFile(bundle, batch, filePath) {
|
|
|
3064
3243
|
counts.tool_results = pending.toolResults.length;
|
|
3065
3244
|
counts.artifacts = pending.artifacts.length;
|
|
3066
3245
|
counts.edges = pending.edges.length;
|
|
3246
|
+
logger?.debug(
|
|
3247
|
+
{ path: filePath, source_file_id: sourceFileRow.source_file_id, counts },
|
|
3248
|
+
"codex source file imported"
|
|
3249
|
+
);
|
|
3067
3250
|
return counts;
|
|
3068
3251
|
}
|
|
3069
3252
|
function handleResponseItem(_bundle, sessionId2, currentTurnId, rawRecordId2, ordinal, ts, ri, payloadObjectId, nextMsgOrdinal, currentModel, pending) {
|
|
@@ -3223,8 +3406,8 @@ async function handleEventMsg(bundle, sessionId2, currentTurnId, rawRecordId2, o
|
|
|
3223
3406
|
const subtype = em.type ?? "unknown";
|
|
3224
3407
|
if (subtype === "exec_command_end") {
|
|
3225
3408
|
const sourceCallId = em.call_id ?? null;
|
|
3226
|
-
const stdoutId = em.stdout ?
|
|
3227
|
-
const stderrId = em.stderr ?
|
|
3409
|
+
const stdoutId = em.stdout ? stageText(pending.objects, em.stdout, { mimeType: "text/plain" }) : null;
|
|
3410
|
+
const stderrId = em.stderr ? stageText(pending.objects, em.stderr, { mimeType: "text/plain" }) : null;
|
|
3228
3411
|
const preview = (em.formatted_output ?? em.aggregated_output ?? em.stdout ?? "").slice(
|
|
3229
3412
|
0,
|
|
3230
3413
|
PREVIEW_MAX2
|
|
@@ -3871,17 +4054,34 @@ async function readdirSafe2(dir) {
|
|
|
3871
4054
|
|
|
3872
4055
|
// src/importers/cursor/index.ts
|
|
3873
4056
|
var PREVIEW_MAX3 = 4e3;
|
|
3874
|
-
async function compileCursor(bundle, root) {
|
|
4057
|
+
async function compileCursor(bundle, root, options = {}) {
|
|
4058
|
+
const logger = options.logger;
|
|
3875
4059
|
const batch = startBatch(bundle, "cursor", [root]);
|
|
3876
4060
|
const counts = emptyCounts();
|
|
4061
|
+
logger?.info({ batch_id: batch.batch_id, root }, "cursor batch started");
|
|
3877
4062
|
try {
|
|
3878
4063
|
for await (const store of discoverCursorStores(root)) {
|
|
3879
4064
|
counts.source_files_seen++;
|
|
4065
|
+
logger?.debug(
|
|
4066
|
+
{
|
|
4067
|
+
path: store.filePath,
|
|
4068
|
+
workspace_id: store.workspaceId,
|
|
4069
|
+
agent_id: store.agentId
|
|
4070
|
+
},
|
|
4071
|
+
"cursor store discovered"
|
|
4072
|
+
);
|
|
3880
4073
|
try {
|
|
3881
|
-
const fc = await compileCursorStore(bundle, batch, store);
|
|
4074
|
+
const fc = await compileCursorStore(bundle, batch, store, logger);
|
|
3882
4075
|
addCounts3(counts, fc);
|
|
3883
4076
|
} catch (error) {
|
|
3884
4077
|
counts.errors++;
|
|
4078
|
+
logger?.warn(
|
|
4079
|
+
{
|
|
4080
|
+
err: error,
|
|
4081
|
+
path: store.filePath
|
|
4082
|
+
},
|
|
4083
|
+
"cursor store failed"
|
|
4084
|
+
);
|
|
3885
4085
|
await recordError(bundle, batch.batch_id, {
|
|
3886
4086
|
kind: "cursor_store_failed",
|
|
3887
4087
|
message: error instanceof Error ? error.message : String(error),
|
|
@@ -3890,8 +4090,10 @@ async function compileCursor(bundle, root) {
|
|
|
3890
4090
|
}
|
|
3891
4091
|
}
|
|
3892
4092
|
finishBatch(bundle, batch, counts, "completed");
|
|
4093
|
+
logger?.info({ batch_id: batch.batch_id, counts }, "cursor batch completed");
|
|
3893
4094
|
} catch (error) {
|
|
3894
4095
|
finishBatch(bundle, batch, counts, "failed");
|
|
4096
|
+
logger?.error({ err: error, batch_id: batch.batch_id, counts }, "cursor batch failed");
|
|
3895
4097
|
throw error;
|
|
3896
4098
|
}
|
|
3897
4099
|
return { batch, counts };
|
|
@@ -3926,7 +4128,7 @@ function addCounts3(target, source) {
|
|
|
3926
4128
|
target.edges += source.edges;
|
|
3927
4129
|
target.errors += source.errors;
|
|
3928
4130
|
}
|
|
3929
|
-
async function compileCursorStore(bundle, batch, store) {
|
|
4131
|
+
async function compileCursorStore(bundle, batch, store, logger) {
|
|
3930
4132
|
const counts = emptyFileCounts3();
|
|
3931
4133
|
const { row: sourceFile, alreadyKnown } = await registerSourceFile(bundle, {
|
|
3932
4134
|
sourceTool: "cursor",
|
|
@@ -3936,9 +4138,17 @@ async function compileCursorStore(bundle, batch, store) {
|
|
|
3936
4138
|
});
|
|
3937
4139
|
if (alreadyKnown) {
|
|
3938
4140
|
counts.source_files_skipped = 1;
|
|
4141
|
+
logger?.debug(
|
|
4142
|
+
{ path: store.filePath, source_file_id: sourceFile.source_file_id },
|
|
4143
|
+
"cursor store skipped"
|
|
4144
|
+
);
|
|
3939
4145
|
return counts;
|
|
3940
4146
|
}
|
|
3941
4147
|
counts.source_files_imported = 1;
|
|
4148
|
+
logger?.debug(
|
|
4149
|
+
{ path: store.filePath, source_file_id: sourceFile.source_file_id },
|
|
4150
|
+
"cursor store registered"
|
|
4151
|
+
);
|
|
3942
4152
|
const cdb = new Database2(store.filePath, { readonly: true, fileMustExist: true });
|
|
3943
4153
|
try {
|
|
3944
4154
|
const pending = {
|
|
@@ -3951,7 +4161,8 @@ async function compileCursorStore(bundle, batch, store) {
|
|
|
3951
4161
|
toolCallsList: [],
|
|
3952
4162
|
toolResults: [],
|
|
3953
4163
|
artifacts: [],
|
|
3954
|
-
searchDocs: []
|
|
4164
|
+
searchDocs: [],
|
|
4165
|
+
objects: createPendingObjects()
|
|
3955
4166
|
};
|
|
3956
4167
|
const metaRow = cdb.prepare(`SELECT value FROM meta WHERE key='0'`).get();
|
|
3957
4168
|
let meta = {};
|
|
@@ -3963,7 +4174,7 @@ async function compileCursorStore(bundle, batch, store) {
|
|
|
3963
4174
|
} catch {
|
|
3964
4175
|
meta = {};
|
|
3965
4176
|
}
|
|
3966
|
-
const metaObjId =
|
|
4177
|
+
const metaObjId = stageBytes(pending.objects, Buffer.from(metaText, "utf8"), {
|
|
3967
4178
|
mimeType: "application/json",
|
|
3968
4179
|
encoding: "utf-8"
|
|
3969
4180
|
});
|
|
@@ -4002,7 +4213,7 @@ async function compileCursorStore(bundle, batch, store) {
|
|
|
4002
4213
|
const blob = blobs[i];
|
|
4003
4214
|
if (!blob) continue;
|
|
4004
4215
|
const ordinal = i + 1;
|
|
4005
|
-
const blobObjectId =
|
|
4216
|
+
const blobObjectId = stageBytes(pending.objects, blob.data);
|
|
4006
4217
|
const blobRawId = rawRecordId(sourceFile.source_file_id, ordinal, blobObjectId);
|
|
4007
4218
|
let parsed = null;
|
|
4008
4219
|
const firstByte = blob.data[0];
|
|
@@ -4022,7 +4233,7 @@ async function compileCursorStore(bundle, batch, store) {
|
|
|
4022
4233
|
json_pointer: `blobs/${blob.id}`,
|
|
4023
4234
|
native_id: blob.id,
|
|
4024
4235
|
raw_object_id: blobObjectId,
|
|
4025
|
-
decoded_json_object_id: parsed != null ?
|
|
4236
|
+
decoded_json_object_id: parsed != null ? stageJson(pending.objects, parsed) : null,
|
|
4026
4237
|
parser_status: parsed != null ? "ok" : looksJson ? "failed" : "partial",
|
|
4027
4238
|
confidence: "low",
|
|
4028
4239
|
// timeline order from blob list isn't canonical
|
|
@@ -4077,6 +4288,7 @@ async function compileCursorStore(bundle, batch, store) {
|
|
|
4077
4288
|
}
|
|
4078
4289
|
}
|
|
4079
4290
|
buildSearchDocs3(pending);
|
|
4291
|
+
await flushPendingObjects(bundle, pending.objects);
|
|
4080
4292
|
transactional(bundle.db, () => {
|
|
4081
4293
|
flushPending3(bundle, pending);
|
|
4082
4294
|
});
|
|
@@ -4088,6 +4300,10 @@ async function compileCursorStore(bundle, batch, store) {
|
|
|
4088
4300
|
counts.tool_calls = pending.toolCallsList.length;
|
|
4089
4301
|
counts.tool_results = pending.toolResults.length;
|
|
4090
4302
|
counts.artifacts = pending.artifacts.length;
|
|
4303
|
+
logger?.debug(
|
|
4304
|
+
{ path: store.filePath, source_file_id: sourceFile.source_file_id, counts },
|
|
4305
|
+
"cursor store imported"
|
|
4306
|
+
);
|
|
4091
4307
|
return counts;
|
|
4092
4308
|
} finally {
|
|
4093
4309
|
cdb.close();
|
|
@@ -4112,7 +4328,7 @@ function mapRole(role) {
|
|
|
4112
4328
|
}
|
|
4113
4329
|
async function pushTextBlock(bundle, pending, messageId2, ordinal, blockType, text, rawRecordId2, visibility = "default") {
|
|
4114
4330
|
if (!text) return;
|
|
4115
|
-
const overflow = text.length > PREVIEW_MAX3 ?
|
|
4331
|
+
const overflow = text.length > PREVIEW_MAX3 ? stageText(pending.objects, text) : null;
|
|
4116
4332
|
pending.blocks.push({
|
|
4117
4333
|
block_id: blockId(messageId2, ordinal),
|
|
4118
4334
|
message_id: messageId2,
|
|
@@ -4163,7 +4379,7 @@ async function processContentItem(bundle, sessionId2, messageId2, eventId2, ordi
|
|
|
4163
4379
|
if (t === "tool-call") {
|
|
4164
4380
|
const sourceCallId = item.toolCallId ?? `${ordinal}`;
|
|
4165
4381
|
const toolName = item.toolName ?? "unknown";
|
|
4166
|
-
const argsObjectId = item.args != null ?
|
|
4382
|
+
const argsObjectId = item.args != null ? stageJson(pending.objects, item.args) : null;
|
|
4167
4383
|
const tcId = toolCallId(sessionId2, sourceCallId);
|
|
4168
4384
|
pending.blocks.push({
|
|
4169
4385
|
block_id: blockId(messageId2, ordinal),
|
|
@@ -4200,7 +4416,7 @@ async function processContentItem(bundle, sessionId2, messageId2, eventId2, ordi
|
|
|
4200
4416
|
if (t === "tool-result") {
|
|
4201
4417
|
const sourceCallId = item.toolCallId ?? `${ordinal}`;
|
|
4202
4418
|
const text = stringifyOrNull3(item.result) ?? "";
|
|
4203
|
-
const overflow = text.length > PREVIEW_MAX3 ?
|
|
4419
|
+
const overflow = text.length > PREVIEW_MAX3 ? stageText(pending.objects, text) : null;
|
|
4204
4420
|
const isError = readIsError(item) ? 1 : 0;
|
|
4205
4421
|
pending.blocks.push({
|
|
4206
4422
|
block_id: blockId(messageId2, ordinal),
|
|
@@ -4564,17 +4780,34 @@ async function readdirSafe3(dir) {
|
|
|
4564
4780
|
|
|
4565
4781
|
// src/importers/gemini/index.ts
|
|
4566
4782
|
var PREVIEW_MAX4 = 4e3;
|
|
4567
|
-
async function compileGemini(bundle, root) {
|
|
4783
|
+
async function compileGemini(bundle, root, options = {}) {
|
|
4784
|
+
const logger = options.logger;
|
|
4568
4785
|
const batch = startBatch(bundle, "gemini", [root]);
|
|
4569
4786
|
const counts = emptyCounts();
|
|
4787
|
+
logger?.info({ batch_id: batch.batch_id, root }, "gemini batch started");
|
|
4570
4788
|
try {
|
|
4571
4789
|
for await (const file of discoverGeminiChats(root)) {
|
|
4572
4790
|
counts.source_files_seen++;
|
|
4791
|
+
logger?.debug(
|
|
4792
|
+
{
|
|
4793
|
+
path: file.filePath,
|
|
4794
|
+
project_dir: file.projectDir,
|
|
4795
|
+
project_root: file.projectRoot
|
|
4796
|
+
},
|
|
4797
|
+
"gemini source file discovered"
|
|
4798
|
+
);
|
|
4573
4799
|
try {
|
|
4574
|
-
const fc = await compileGeminiFile(bundle, batch, file);
|
|
4800
|
+
const fc = await compileGeminiFile(bundle, batch, file, logger);
|
|
4575
4801
|
addCounts4(counts, fc);
|
|
4576
4802
|
} catch (error) {
|
|
4577
4803
|
counts.errors++;
|
|
4804
|
+
logger?.warn(
|
|
4805
|
+
{
|
|
4806
|
+
err: error,
|
|
4807
|
+
path: file.filePath
|
|
4808
|
+
},
|
|
4809
|
+
"gemini source file failed"
|
|
4810
|
+
);
|
|
4578
4811
|
await recordError(bundle, batch.batch_id, {
|
|
4579
4812
|
kind: "gemini_file_failed",
|
|
4580
4813
|
message: error instanceof Error ? error.message : String(error),
|
|
@@ -4583,8 +4816,10 @@ async function compileGemini(bundle, root) {
|
|
|
4583
4816
|
}
|
|
4584
4817
|
}
|
|
4585
4818
|
finishBatch(bundle, batch, counts, "completed");
|
|
4819
|
+
logger?.info({ batch_id: batch.batch_id, counts }, "gemini batch completed");
|
|
4586
4820
|
} catch (error) {
|
|
4587
4821
|
finishBatch(bundle, batch, counts, "failed");
|
|
4822
|
+
logger?.error({ err: error, batch_id: batch.batch_id, counts }, "gemini batch failed");
|
|
4588
4823
|
throw error;
|
|
4589
4824
|
}
|
|
4590
4825
|
return { batch, counts };
|
|
@@ -4619,7 +4854,7 @@ function addCounts4(target, source) {
|
|
|
4619
4854
|
target.edges += source.edges;
|
|
4620
4855
|
target.errors += source.errors;
|
|
4621
4856
|
}
|
|
4622
|
-
async function compileGeminiFile(bundle, batch, file) {
|
|
4857
|
+
async function compileGeminiFile(bundle, batch, file, logger) {
|
|
4623
4858
|
const counts = emptyFileCounts4();
|
|
4624
4859
|
const { row: sourceFile, alreadyKnown } = await registerSourceFile(bundle, {
|
|
4625
4860
|
sourceTool: "gemini",
|
|
@@ -4629,12 +4864,21 @@ async function compileGeminiFile(bundle, batch, file) {
|
|
|
4629
4864
|
});
|
|
4630
4865
|
if (alreadyKnown) {
|
|
4631
4866
|
counts.source_files_skipped = 1;
|
|
4867
|
+
logger?.debug(
|
|
4868
|
+
{ path: file.filePath, source_file_id: sourceFile.source_file_id },
|
|
4869
|
+
"gemini source file skipped"
|
|
4870
|
+
);
|
|
4632
4871
|
return counts;
|
|
4633
4872
|
}
|
|
4634
4873
|
counts.source_files_imported = 1;
|
|
4874
|
+
logger?.debug(
|
|
4875
|
+
{ path: file.filePath, source_file_id: sourceFile.source_file_id },
|
|
4876
|
+
"gemini source file registered"
|
|
4877
|
+
);
|
|
4635
4878
|
const text = await readFile7(file.filePath, "utf8");
|
|
4636
4879
|
const parsed = JSON.parse(text);
|
|
4637
|
-
const
|
|
4880
|
+
const objects = createPendingObjects();
|
|
4881
|
+
const fileObjectId = stageBytes(objects, Buffer.from(text, "utf8"), {
|
|
4638
4882
|
mimeType: "application/json",
|
|
4639
4883
|
encoding: "utf-8"
|
|
4640
4884
|
});
|
|
@@ -4664,7 +4908,8 @@ async function compileGeminiFile(bundle, batch, file) {
|
|
|
4664
4908
|
toolResults: [],
|
|
4665
4909
|
artifacts: [],
|
|
4666
4910
|
searchDocs: [],
|
|
4667
|
-
project: null
|
|
4911
|
+
project: null,
|
|
4912
|
+
objects
|
|
4668
4913
|
};
|
|
4669
4914
|
const sourceSid = parsed.sessionId ?? path11.basename(file.filePath, ".json");
|
|
4670
4915
|
const sessionPk = sessionId("gemini", sourceSid);
|
|
@@ -4701,6 +4946,7 @@ async function compileGeminiFile(bundle, batch, file) {
|
|
|
4701
4946
|
);
|
|
4702
4947
|
}
|
|
4703
4948
|
buildSearchDocs4(pending);
|
|
4949
|
+
await flushPendingObjects(bundle, pending.objects);
|
|
4704
4950
|
transactional(bundle.db, () => {
|
|
4705
4951
|
flushPending4(bundle, pending);
|
|
4706
4952
|
});
|
|
@@ -4712,12 +4958,16 @@ async function compileGeminiFile(bundle, batch, file) {
|
|
|
4712
4958
|
counts.tool_calls = pending.toolCallsList.length;
|
|
4713
4959
|
counts.tool_results = pending.toolResults.length;
|
|
4714
4960
|
counts.artifacts = pending.artifacts.length;
|
|
4961
|
+
logger?.debug(
|
|
4962
|
+
{ path: file.filePath, source_file_id: sourceFile.source_file_id, counts },
|
|
4963
|
+
"gemini source file imported"
|
|
4964
|
+
);
|
|
4715
4965
|
return counts;
|
|
4716
4966
|
}
|
|
4717
4967
|
async function processMessage(bundle, sessionId2, sourceFileId2, index, msg, batchId, pending) {
|
|
4718
4968
|
const ordinal = index + 1;
|
|
4719
4969
|
const ts = msg.timestamp ?? null;
|
|
4720
|
-
const payloadId =
|
|
4970
|
+
const payloadId = stageJson(pending.objects, msg);
|
|
4721
4971
|
const pointer = `/messages/${index}`;
|
|
4722
4972
|
const rawObjectIdInput = sha256Hex(`${pointer}
|
|
4723
4973
|
${JSON.stringify(msg)}`);
|
|
@@ -4834,7 +5084,7 @@ ${JSON.stringify(msg)}`);
|
|
|
4834
5084
|
}
|
|
4835
5085
|
async function pushTextBlock2(bundle, pending, messageId2, blockOrdinal, blockType, text, rawRecordId2, visibility = "default") {
|
|
4836
5086
|
if (!text) return;
|
|
4837
|
-
const overflowId = text.length > PREVIEW_MAX4 ?
|
|
5087
|
+
const overflowId = text.length > PREVIEW_MAX4 ? stageText(pending.objects, text) : null;
|
|
4838
5088
|
pending.blocks.push({
|
|
4839
5089
|
block_id: blockId(messageId2, blockOrdinal),
|
|
4840
5090
|
message_id: messageId2,
|
|
@@ -4851,7 +5101,7 @@ async function processToolCall(bundle, sessionId2, messageId2, eventId2, index,
|
|
|
4851
5101
|
const sourceCallId = tc.id ?? `${messageId2}:${index}`;
|
|
4852
5102
|
const toolName = tc.name ?? "unknown";
|
|
4853
5103
|
const toolCallId2 = toolCallId(sessionId2, sourceCallId);
|
|
4854
|
-
const argsObjectId = tc.args ?
|
|
5104
|
+
const argsObjectId = tc.args ? stageJson(pending.objects, tc.args) : null;
|
|
4855
5105
|
pending.toolCallsList.push({
|
|
4856
5106
|
tool_call_id: toolCallId2,
|
|
4857
5107
|
message_id: messageId2,
|
|
@@ -4870,7 +5120,7 @@ async function processToolCall(bundle, sessionId2, messageId2, eventId2, index,
|
|
|
4870
5120
|
});
|
|
4871
5121
|
const isError = tc.status === "error" ? 1 : 0;
|
|
4872
5122
|
const resultText = renderToolResultText(tc.result);
|
|
4873
|
-
const overflowId = resultText.length > PREVIEW_MAX4 ?
|
|
5123
|
+
const overflowId = resultText.length > PREVIEW_MAX4 ? stageText(pending.objects, resultText) : null;
|
|
4874
5124
|
pending.toolResults.push({
|
|
4875
5125
|
tool_result_id: toolResultId(sessionId2, sourceCallId),
|
|
4876
5126
|
tool_call_id: toolCallId2,
|
|
@@ -4887,7 +5137,7 @@ async function processToolCall(bundle, sessionId2, messageId2, eventId2, index,
|
|
|
4887
5137
|
const rd = tc.resultDisplay;
|
|
4888
5138
|
if (rd.fileDiff || rd.filePath) {
|
|
4889
5139
|
const diffText = rd.fileDiff ?? "";
|
|
4890
|
-
const diffId = diffText ?
|
|
5140
|
+
const diffId = diffText ? stageText(pending.objects, diffText, { mimeType: "text/x-diff" }) : null;
|
|
4891
5141
|
pending.artifacts.push({
|
|
4892
5142
|
artifact_id: artifactId(sessionId2, "gemini", `${toolCallId2}:diff`),
|
|
4893
5143
|
kind: "diff",
|
|
@@ -5242,55 +5492,312 @@ function flushPending4(bundle, pending) {
|
|
|
5242
5492
|
}
|
|
5243
5493
|
}
|
|
5244
5494
|
|
|
5495
|
+
// src/services/export/parquet.ts
|
|
5496
|
+
import { mkdir as mkdir3, rm, writeFile as writeFile4 } from "fs/promises";
|
|
5497
|
+
import path12 from "path";
|
|
5498
|
+
import { DuckDBConnection } from "@duckdb/node-api";
|
|
5499
|
+
var PARQUET_TABLES = [
|
|
5500
|
+
"objects",
|
|
5501
|
+
"source_files",
|
|
5502
|
+
"import_batches",
|
|
5503
|
+
"raw_records",
|
|
5504
|
+
"import_errors",
|
|
5505
|
+
"uncertainties",
|
|
5506
|
+
"projects",
|
|
5507
|
+
"sessions",
|
|
5508
|
+
"turns",
|
|
5509
|
+
"events",
|
|
5510
|
+
"messages",
|
|
5511
|
+
"content_blocks",
|
|
5512
|
+
"tool_calls",
|
|
5513
|
+
"tool_results",
|
|
5514
|
+
"artifacts",
|
|
5515
|
+
"edges",
|
|
5516
|
+
"search_docs"
|
|
5517
|
+
];
|
|
5518
|
+
async function exportBundleParquet(options) {
|
|
5519
|
+
const snapshot = await openBundleSnapshot(options.bundlePath);
|
|
5520
|
+
const outDir = path12.resolve(options.outDir ?? snapshot.defaultOutDir);
|
|
5521
|
+
await mkdir3(outDir, { recursive: true });
|
|
5522
|
+
const files = Object.fromEntries(
|
|
5523
|
+
PARQUET_TABLES.map((table) => [table, path12.join(outDir, `${table}.parquet`)])
|
|
5524
|
+
);
|
|
5525
|
+
const manifestPath = path12.join(outDir, "manifest.json");
|
|
5526
|
+
for (const file of [...Object.values(files), manifestPath]) {
|
|
5527
|
+
await rm(file, { force: true });
|
|
5528
|
+
}
|
|
5529
|
+
const connection = await createDuckDbConnection();
|
|
5530
|
+
try {
|
|
5531
|
+
await attachSqlite(connection, snapshot.dbPath);
|
|
5532
|
+
for (const table of PARQUET_TABLES) {
|
|
5533
|
+
await connection.run(
|
|
5534
|
+
`COPY (SELECT * FROM prosa.${quoteIdentifier(table)}) TO ${sqlString(files[table])} (FORMAT parquet)`
|
|
5535
|
+
);
|
|
5536
|
+
}
|
|
5537
|
+
} finally {
|
|
5538
|
+
connection.closeSync();
|
|
5539
|
+
}
|
|
5540
|
+
const manifest = {
|
|
5541
|
+
exported_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
5542
|
+
source_db: snapshot.dbPath,
|
|
5543
|
+
schema_version: snapshot.schemaVersion,
|
|
5544
|
+
parser_version: snapshot.parserVersion,
|
|
5545
|
+
tables: Object.fromEntries(
|
|
5546
|
+
PARQUET_TABLES.map((table) => [
|
|
5547
|
+
table,
|
|
5548
|
+
{
|
|
5549
|
+
file: path12.basename(files[table]),
|
|
5550
|
+
rows: snapshot.counts[table]
|
|
5551
|
+
}
|
|
5552
|
+
])
|
|
5553
|
+
)
|
|
5554
|
+
};
|
|
5555
|
+
await writeFile4(manifestPath, `${JSON.stringify(manifest, null, 2)}
|
|
5556
|
+
`, "utf8");
|
|
5557
|
+
return { outDir, manifestPath, files, counts: snapshot.counts };
|
|
5558
|
+
}
|
|
5559
|
+
async function queryDuckDbParquet(options) {
|
|
5560
|
+
const parquetDir = path12.resolve(options.parquetDir);
|
|
5561
|
+
const connection = await createDuckDbConnection();
|
|
5562
|
+
try {
|
|
5563
|
+
for (const table of PARQUET_TABLES) {
|
|
5564
|
+
await connection.run(
|
|
5565
|
+
`CREATE OR REPLACE VIEW ${quoteIdentifier(table)} AS SELECT * FROM read_parquet(${sqlString(
|
|
5566
|
+
path12.join(parquetDir, `${table}.parquet`)
|
|
5567
|
+
)})`
|
|
5568
|
+
);
|
|
5569
|
+
}
|
|
5570
|
+
const reader = await connection.runAndReadAll(options.sql);
|
|
5571
|
+
return {
|
|
5572
|
+
columns: reader.deduplicatedColumnNames(),
|
|
5573
|
+
rows: reader.getRowObjectsJson()
|
|
5574
|
+
};
|
|
5575
|
+
} catch (error) {
|
|
5576
|
+
if (isMissingParquetError(error)) {
|
|
5577
|
+
throw new Error(
|
|
5578
|
+
`Parquet export not found in ${parquetDir}; run \`prosa export parquet --store <path>\` first`
|
|
5579
|
+
);
|
|
5580
|
+
}
|
|
5581
|
+
throw error;
|
|
5582
|
+
} finally {
|
|
5583
|
+
connection.closeSync();
|
|
5584
|
+
}
|
|
5585
|
+
}
|
|
5586
|
+
async function createDuckDbConnection() {
|
|
5587
|
+
return DuckDBConnection.create();
|
|
5588
|
+
}
|
|
5589
|
+
async function attachSqlite(connection, dbPath) {
|
|
5590
|
+
try {
|
|
5591
|
+
await connection.run("INSTALL sqlite");
|
|
5592
|
+
await connection.run("LOAD sqlite");
|
|
5593
|
+
await connection.run(`ATTACH ${sqlString(dbPath)} AS prosa (TYPE sqlite)`);
|
|
5594
|
+
} catch (error) {
|
|
5595
|
+
throw new Error(
|
|
5596
|
+
`DuckDB could not attach prosa.sqlite via the sqlite extension: ${error instanceof Error ? error.message : String(error)}`
|
|
5597
|
+
);
|
|
5598
|
+
}
|
|
5599
|
+
}
|
|
5600
|
+
async function openBundleSnapshot(bundlePath) {
|
|
5601
|
+
const bundle = await openBundle(bundlePath);
|
|
5602
|
+
try {
|
|
5603
|
+
const counts = Object.fromEntries(
|
|
5604
|
+
PARQUET_TABLES.map((table) => {
|
|
5605
|
+
const row = bundle.db.prepare(`SELECT count(*) AS n FROM ${quoteIdentifier(table)}`).get();
|
|
5606
|
+
return [table, row?.n ?? 0];
|
|
5607
|
+
})
|
|
5608
|
+
);
|
|
5609
|
+
return {
|
|
5610
|
+
dbPath: bundle.paths.db,
|
|
5611
|
+
schemaVersion: bundle.manifest.schema_version,
|
|
5612
|
+
parserVersion: bundle.manifest.parser_version,
|
|
5613
|
+
defaultOutDir: bundle.paths.parquet,
|
|
5614
|
+
counts
|
|
5615
|
+
};
|
|
5616
|
+
} finally {
|
|
5617
|
+
closeBundle(bundle);
|
|
5618
|
+
}
|
|
5619
|
+
}
|
|
5620
|
+
function quoteIdentifier(value) {
|
|
5621
|
+
return `"${value.replace(/"/g, '""')}"`;
|
|
5622
|
+
}
|
|
5623
|
+
function sqlString(value) {
|
|
5624
|
+
return `'${value.replace(/'/g, "''")}'`;
|
|
5625
|
+
}
|
|
5626
|
+
function isMissingParquetError(error) {
|
|
5627
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
5628
|
+
return /No files found|does not exist|not found/i.test(message) && /\.parquet/i.test(message);
|
|
5629
|
+
}
|
|
5630
|
+
|
|
5245
5631
|
// src/cli/commands/compile.ts
|
|
5246
5632
|
init_indexing();
|
|
5633
|
+
|
|
5634
|
+
// src/cli/logger.ts
|
|
5635
|
+
import pino from "pino";
|
|
5636
|
+
import pretty from "pino-pretty";
|
|
5637
|
+
function createCliLogger(options) {
|
|
5638
|
+
const loggerOptions = {
|
|
5639
|
+
base: void 0,
|
|
5640
|
+
level: options.verbose === true ? "debug" : "info"
|
|
5641
|
+
};
|
|
5642
|
+
if (options.jsonLogs === true) {
|
|
5643
|
+
return pino(loggerOptions, pino.destination({ dest: 2, sync: true }));
|
|
5644
|
+
}
|
|
5645
|
+
return pino(
|
|
5646
|
+
loggerOptions,
|
|
5647
|
+
pretty({
|
|
5648
|
+
colorize: process.stderr.isTTY,
|
|
5649
|
+
destination: 2,
|
|
5650
|
+
ignore: "pid,hostname",
|
|
5651
|
+
singleLine: true,
|
|
5652
|
+
sync: true,
|
|
5653
|
+
translateTime: "SYS:yyyy-mm-dd HH:MM:ss.l"
|
|
5654
|
+
})
|
|
5655
|
+
);
|
|
5656
|
+
}
|
|
5657
|
+
|
|
5658
|
+
// src/cli/commands/compile.ts
|
|
5659
|
+
var PROVIDERS = [
|
|
5660
|
+
{
|
|
5661
|
+
name: "codex",
|
|
5662
|
+
description: "Import Codex CLI session histories into the bundle.",
|
|
5663
|
+
pathHelp: "root of Codex CLI sessions",
|
|
5664
|
+
defaultSessionsPath: () => path14.join(os2.homedir(), ".codex", "sessions"),
|
|
5665
|
+
compile: compileCodex
|
|
5666
|
+
},
|
|
5667
|
+
{
|
|
5668
|
+
name: "claude",
|
|
5669
|
+
description: "Import Claude Code project histories into the bundle.",
|
|
5670
|
+
pathHelp: "root of Claude Code projects",
|
|
5671
|
+
defaultSessionsPath: () => path14.join(os2.homedir(), ".claude", "projects"),
|
|
5672
|
+
compile: compileClaude
|
|
5673
|
+
},
|
|
5674
|
+
{
|
|
5675
|
+
name: "gemini",
|
|
5676
|
+
description: "Import Gemini CLI session histories into the bundle.",
|
|
5677
|
+
pathHelp: "root of Gemini CLI tmp dir",
|
|
5678
|
+
defaultSessionsPath: () => path14.join(os2.homedir(), ".gemini", "tmp"),
|
|
5679
|
+
compile: compileGemini
|
|
5680
|
+
},
|
|
5681
|
+
{
|
|
5682
|
+
name: "cursor",
|
|
5683
|
+
description: "Import Cursor agent stores into the bundle.",
|
|
5684
|
+
pathHelp: "root of Cursor agent stores",
|
|
5685
|
+
defaultSessionsPath: () => path14.join(os2.homedir(), ".cursor", "chats"),
|
|
5686
|
+
compile: compileCursor
|
|
5687
|
+
}
|
|
5688
|
+
];
|
|
5247
5689
|
function compileCommand() {
|
|
5248
|
-
|
|
5249
|
-
|
|
5250
|
-
|
|
5251
|
-
|
|
5252
|
-
|
|
5253
|
-
|
|
5254
|
-
|
|
5255
|
-
|
|
5256
|
-
|
|
5257
|
-
|
|
5690
|
+
const command = addCompileLogOptions(
|
|
5691
|
+
new Command("compile").description(
|
|
5692
|
+
"Import session histories from one agent CLI into the bundle."
|
|
5693
|
+
)
|
|
5694
|
+
);
|
|
5695
|
+
for (const provider of PROVIDERS) {
|
|
5696
|
+
command.addCommand(providerCompileCommand(provider));
|
|
5697
|
+
}
|
|
5698
|
+
command.action(() => {
|
|
5699
|
+
command.help({ error: true });
|
|
5700
|
+
});
|
|
5701
|
+
return command;
|
|
5702
|
+
}
|
|
5703
|
+
function compileAllCommand() {
|
|
5704
|
+
return addCompileLogOptions(new Command("compile-all")).description("Import all agent CLI session histories using default source paths.").option("--defer-index", "skip immediate FTS5 updates; run `prosa index fts5` later").action(async (options) => {
|
|
5705
|
+
await runCompiles({
|
|
5706
|
+
providers: PROVIDERS,
|
|
5707
|
+
storePath: defaultBundlePath(),
|
|
5708
|
+
deferIndex: options.deferIndex === true,
|
|
5709
|
+
logOptions: options
|
|
5710
|
+
});
|
|
5711
|
+
});
|
|
5712
|
+
}
|
|
5713
|
+
function providerCompileCommand(provider) {
|
|
5714
|
+
return addCompileLogOptions(new Command(provider.name)).description(provider.description).option(
|
|
5715
|
+
"--sessions-path <path>",
|
|
5716
|
+
`${provider.pathHelp} (default: ${provider.defaultSessionsPath()})`,
|
|
5717
|
+
provider.defaultSessionsPath()
|
|
5718
|
+
).option("--store <path>", "bundle directory", defaultBundlePath()).option("--defer-index", "skip immediate FTS5 updates; run `prosa index fts5` later").action(
|
|
5719
|
+
async (options, command) => {
|
|
5720
|
+
await runCompiles({
|
|
5721
|
+
providers: [provider],
|
|
5722
|
+
storePath: options.store,
|
|
5723
|
+
deferIndex: options.deferIndex === true,
|
|
5724
|
+
sessionsPath: options.sessionsPath,
|
|
5725
|
+
logOptions: command.optsWithGlobals()
|
|
5726
|
+
});
|
|
5727
|
+
}
|
|
5728
|
+
);
|
|
5729
|
+
}
|
|
5730
|
+
function addCompileLogOptions(command) {
|
|
5731
|
+
return command.option("--verbose", "emit debug logs during compilation").option("--json-logs", "emit raw newline-delimited JSON logs instead of pretty logs");
|
|
5732
|
+
}
|
|
5733
|
+
async function runCompiles(options) {
|
|
5734
|
+
const logger = createCliLogger(options.logOptions);
|
|
5735
|
+
const storePath = resolvePath(options.storePath);
|
|
5736
|
+
logger.info({ store_path: storePath }, "opening bundle");
|
|
5737
|
+
const bundle = await openBundle(storePath);
|
|
5738
|
+
let importedAny = false;
|
|
5739
|
+
try {
|
|
5740
|
+
if (options.deferIndex) {
|
|
5741
|
+
logger.info("disabling FTS5 triggers for deferred indexing");
|
|
5742
|
+
disableFts5Triggers(bundle);
|
|
5743
|
+
}
|
|
5744
|
+
for (const provider of options.providers) {
|
|
5745
|
+
const sourcePath = resolvePath(options.sessionsPath ?? provider.defaultSessionsPath());
|
|
5746
|
+
const providerLogger = logger.child({
|
|
5747
|
+
source_tool: provider.name,
|
|
5748
|
+
source_path: sourcePath
|
|
5749
|
+
});
|
|
5750
|
+
providerLogger.info("starting compile");
|
|
5751
|
+
const r = await provider.compile(bundle, sourcePath, { logger: providerLogger });
|
|
5752
|
+
importedAny ||= r.counts.source_files_imported > 0;
|
|
5753
|
+
providerLogger.info(
|
|
5754
|
+
{
|
|
5755
|
+
batch_id: r.batch.batch_id,
|
|
5756
|
+
counts: r.counts
|
|
5757
|
+
},
|
|
5758
|
+
"compile finished"
|
|
5759
|
+
);
|
|
5760
|
+
printCounts(provider.name, r.batch.batch_id, r.counts);
|
|
5761
|
+
}
|
|
5762
|
+
logger.info({ changed: importedAny, fts5_deferred: options.deferIndex }, "marking indexes");
|
|
5763
|
+
markIndexesAfterImport(bundle, {
|
|
5764
|
+
changed: importedAny,
|
|
5765
|
+
fts5Deferred: options.deferIndex
|
|
5766
|
+
});
|
|
5767
|
+
if (importedAny) {
|
|
5258
5768
|
try {
|
|
5259
|
-
|
|
5260
|
-
|
|
5261
|
-
}
|
|
5262
|
-
|
|
5263
|
-
|
|
5264
|
-
|
|
5265
|
-
printCounts("codex", r.batch.batch_id, r.counts);
|
|
5266
|
-
}
|
|
5267
|
-
if (options.claude) {
|
|
5268
|
-
const r = await compileClaude(bundle, path13.resolve(options.claude));
|
|
5269
|
-
importedAny ||= r.counts.source_files_imported > 0;
|
|
5270
|
-
printCounts("claude", r.batch.batch_id, r.counts);
|
|
5271
|
-
}
|
|
5272
|
-
if (options.gemini) {
|
|
5273
|
-
const r = await compileGemini(bundle, path13.resolve(options.gemini));
|
|
5274
|
-
importedAny ||= r.counts.source_files_imported > 0;
|
|
5275
|
-
printCounts("gemini", r.batch.batch_id, r.counts);
|
|
5276
|
-
}
|
|
5277
|
-
if (options.cursor) {
|
|
5278
|
-
const r = await compileCursor(bundle, path13.resolve(options.cursor));
|
|
5279
|
-
importedAny ||= r.counts.source_files_imported > 0;
|
|
5280
|
-
printCounts("cursor", r.batch.batch_id, r.counts);
|
|
5281
|
-
}
|
|
5282
|
-
markIndexesAfterImport(bundle, {
|
|
5283
|
-
changed: importedAny,
|
|
5284
|
-
fts5Deferred: options.deferIndex === true
|
|
5285
|
-
});
|
|
5286
|
-
} finally {
|
|
5287
|
-
if (options.deferIndex) {
|
|
5288
|
-
enableFts5Triggers(bundle);
|
|
5289
|
-
}
|
|
5290
|
-
closeBundle(bundle);
|
|
5769
|
+
logger.info("rebuilding tantivy index");
|
|
5770
|
+
const status = await rebuildTantivyIndex(bundle);
|
|
5771
|
+
process.stdout.write(`tantivy: indexed ${status.indexed_doc_count} docs
|
|
5772
|
+
`);
|
|
5773
|
+
} catch (error) {
|
|
5774
|
+
logger.error({ err: error }, "tantivy rebuild failed; SQLite data is intact");
|
|
5291
5775
|
}
|
|
5292
5776
|
}
|
|
5293
|
-
|
|
5777
|
+
} finally {
|
|
5778
|
+
if (options.deferIndex) {
|
|
5779
|
+
logger.info("re-enabling FTS5 triggers");
|
|
5780
|
+
enableFts5Triggers(bundle);
|
|
5781
|
+
}
|
|
5782
|
+
closeBundle(bundle);
|
|
5783
|
+
logger.info({ store_path: storePath }, "bundle closed");
|
|
5784
|
+
}
|
|
5785
|
+
if (importedAny) {
|
|
5786
|
+
try {
|
|
5787
|
+
logger.info({ store_path: storePath }, "exporting parquet");
|
|
5788
|
+
const result = await exportBundleParquet({ bundlePath: storePath });
|
|
5789
|
+
const tableCount = Object.keys(result.files).length;
|
|
5790
|
+
process.stdout.write(`parquet: wrote ${tableCount} tables to ${result.outDir}
|
|
5791
|
+
`);
|
|
5792
|
+
} catch (error) {
|
|
5793
|
+
logger.error({ err: error }, "parquet export failed; SQLite data is intact");
|
|
5794
|
+
}
|
|
5795
|
+
}
|
|
5796
|
+
}
|
|
5797
|
+
function resolvePath(p) {
|
|
5798
|
+
if (p === "~") return os2.homedir();
|
|
5799
|
+
if (p.startsWith("~/")) return path14.join(os2.homedir(), p.slice(2));
|
|
5800
|
+
return path14.resolve(p);
|
|
5294
5801
|
}
|
|
5295
5802
|
function printCounts(label, batchId, c) {
|
|
5296
5803
|
process.stdout.write(
|
|
@@ -5419,142 +5926,6 @@ function renderToolCall(c) {
|
|
|
5419
5926
|
return lines.join("\n");
|
|
5420
5927
|
}
|
|
5421
5928
|
|
|
5422
|
-
// src/services/export/parquet.ts
|
|
5423
|
-
import { mkdir as mkdir5, rm as rm2, writeFile as writeFile5 } from "fs/promises";
|
|
5424
|
-
import path14 from "path";
|
|
5425
|
-
import { DuckDBConnection } from "@duckdb/node-api";
|
|
5426
|
-
var PARQUET_TABLES = [
|
|
5427
|
-
"objects",
|
|
5428
|
-
"source_files",
|
|
5429
|
-
"import_batches",
|
|
5430
|
-
"raw_records",
|
|
5431
|
-
"import_errors",
|
|
5432
|
-
"uncertainties",
|
|
5433
|
-
"projects",
|
|
5434
|
-
"sessions",
|
|
5435
|
-
"turns",
|
|
5436
|
-
"events",
|
|
5437
|
-
"messages",
|
|
5438
|
-
"content_blocks",
|
|
5439
|
-
"tool_calls",
|
|
5440
|
-
"tool_results",
|
|
5441
|
-
"artifacts",
|
|
5442
|
-
"edges",
|
|
5443
|
-
"search_docs"
|
|
5444
|
-
];
|
|
5445
|
-
async function exportBundleParquet(options) {
|
|
5446
|
-
const snapshot = await openBundleSnapshot(options.bundlePath);
|
|
5447
|
-
const outDir = path14.resolve(options.outDir ?? snapshot.defaultOutDir);
|
|
5448
|
-
await mkdir5(outDir, { recursive: true });
|
|
5449
|
-
const files = Object.fromEntries(
|
|
5450
|
-
PARQUET_TABLES.map((table) => [table, path14.join(outDir, `${table}.parquet`)])
|
|
5451
|
-
);
|
|
5452
|
-
const manifestPath = path14.join(outDir, "manifest.json");
|
|
5453
|
-
for (const file of [...Object.values(files), manifestPath]) {
|
|
5454
|
-
await rm2(file, { force: true });
|
|
5455
|
-
}
|
|
5456
|
-
const connection = await createDuckDbConnection();
|
|
5457
|
-
try {
|
|
5458
|
-
await attachSqlite(connection, snapshot.dbPath);
|
|
5459
|
-
for (const table of PARQUET_TABLES) {
|
|
5460
|
-
await connection.run(
|
|
5461
|
-
`COPY (SELECT * FROM prosa.${quoteIdentifier(table)}) TO ${sqlString(files[table])} (FORMAT parquet)`
|
|
5462
|
-
);
|
|
5463
|
-
}
|
|
5464
|
-
} finally {
|
|
5465
|
-
connection.closeSync();
|
|
5466
|
-
}
|
|
5467
|
-
const manifest = {
|
|
5468
|
-
exported_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
5469
|
-
source_db: snapshot.dbPath,
|
|
5470
|
-
schema_version: snapshot.schemaVersion,
|
|
5471
|
-
parser_version: snapshot.parserVersion,
|
|
5472
|
-
tables: Object.fromEntries(
|
|
5473
|
-
PARQUET_TABLES.map((table) => [
|
|
5474
|
-
table,
|
|
5475
|
-
{
|
|
5476
|
-
file: path14.basename(files[table]),
|
|
5477
|
-
rows: snapshot.counts[table]
|
|
5478
|
-
}
|
|
5479
|
-
])
|
|
5480
|
-
)
|
|
5481
|
-
};
|
|
5482
|
-
await writeFile5(manifestPath, `${JSON.stringify(manifest, null, 2)}
|
|
5483
|
-
`, "utf8");
|
|
5484
|
-
return { outDir, manifestPath, files, counts: snapshot.counts };
|
|
5485
|
-
}
|
|
5486
|
-
async function queryDuckDbParquet(options) {
|
|
5487
|
-
const parquetDir = path14.resolve(options.parquetDir);
|
|
5488
|
-
const connection = await createDuckDbConnection();
|
|
5489
|
-
try {
|
|
5490
|
-
for (const table of PARQUET_TABLES) {
|
|
5491
|
-
await connection.run(
|
|
5492
|
-
`CREATE OR REPLACE VIEW ${quoteIdentifier(table)} AS SELECT * FROM read_parquet(${sqlString(
|
|
5493
|
-
path14.join(parquetDir, `${table}.parquet`)
|
|
5494
|
-
)})`
|
|
5495
|
-
);
|
|
5496
|
-
}
|
|
5497
|
-
const reader = await connection.runAndReadAll(options.sql);
|
|
5498
|
-
return {
|
|
5499
|
-
columns: reader.deduplicatedColumnNames(),
|
|
5500
|
-
rows: reader.getRowObjectsJson()
|
|
5501
|
-
};
|
|
5502
|
-
} catch (error) {
|
|
5503
|
-
if (isMissingParquetError(error)) {
|
|
5504
|
-
throw new Error(
|
|
5505
|
-
`Parquet export not found in ${parquetDir}; run \`prosa export parquet --store <path>\` first`
|
|
5506
|
-
);
|
|
5507
|
-
}
|
|
5508
|
-
throw error;
|
|
5509
|
-
} finally {
|
|
5510
|
-
connection.closeSync();
|
|
5511
|
-
}
|
|
5512
|
-
}
|
|
5513
|
-
async function createDuckDbConnection() {
|
|
5514
|
-
return DuckDBConnection.create();
|
|
5515
|
-
}
|
|
5516
|
-
async function attachSqlite(connection, dbPath) {
|
|
5517
|
-
try {
|
|
5518
|
-
await connection.run("INSTALL sqlite");
|
|
5519
|
-
await connection.run("LOAD sqlite");
|
|
5520
|
-
await connection.run(`ATTACH ${sqlString(dbPath)} AS prosa (TYPE sqlite)`);
|
|
5521
|
-
} catch (error) {
|
|
5522
|
-
throw new Error(
|
|
5523
|
-
`DuckDB could not attach prosa.sqlite via the sqlite extension: ${error instanceof Error ? error.message : String(error)}`
|
|
5524
|
-
);
|
|
5525
|
-
}
|
|
5526
|
-
}
|
|
5527
|
-
async function openBundleSnapshot(bundlePath) {
|
|
5528
|
-
const bundle = await openBundle(bundlePath);
|
|
5529
|
-
try {
|
|
5530
|
-
const counts = Object.fromEntries(
|
|
5531
|
-
PARQUET_TABLES.map((table) => {
|
|
5532
|
-
const row = bundle.db.prepare(`SELECT count(*) AS n FROM ${quoteIdentifier(table)}`).get();
|
|
5533
|
-
return [table, row?.n ?? 0];
|
|
5534
|
-
})
|
|
5535
|
-
);
|
|
5536
|
-
return {
|
|
5537
|
-
dbPath: bundle.paths.db,
|
|
5538
|
-
schemaVersion: bundle.manifest.schema_version,
|
|
5539
|
-
parserVersion: bundle.manifest.parser_version,
|
|
5540
|
-
defaultOutDir: bundle.paths.parquet,
|
|
5541
|
-
counts
|
|
5542
|
-
};
|
|
5543
|
-
} finally {
|
|
5544
|
-
closeBundle(bundle);
|
|
5545
|
-
}
|
|
5546
|
-
}
|
|
5547
|
-
function quoteIdentifier(value) {
|
|
5548
|
-
return `"${value.replace(/"/g, '""')}"`;
|
|
5549
|
-
}
|
|
5550
|
-
function sqlString(value) {
|
|
5551
|
-
return `'${value.replace(/'/g, "''")}'`;
|
|
5552
|
-
}
|
|
5553
|
-
function isMissingParquetError(error) {
|
|
5554
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
5555
|
-
return /No files found|does not exist|not found/i.test(message) && /\.parquet/i.test(message);
|
|
5556
|
-
}
|
|
5557
|
-
|
|
5558
5929
|
// src/cli/commands/export.ts
|
|
5559
5930
|
function exportCommand() {
|
|
5560
5931
|
const session = new Command2("session").description("Export a single session to a human-readable format.").argument("<session-id>", "prosa session_id").requiredOption("--format <fmt>", 'currently only "markdown" is supported').option("--out <path>", "write to file instead of stdout").option("--store <path>", "bundle directory", defaultBundlePath()).action(async (sessionId2, options) => {
|
|
@@ -6454,6 +6825,7 @@ async function runCli(argv) {
|
|
|
6454
6825
|
).version(PROSA_PARSER_VERSION, "-v, --version");
|
|
6455
6826
|
program.addCommand(initCommand());
|
|
6456
6827
|
program.addCommand(compileCommand());
|
|
6828
|
+
program.addCommand(compileAllCommand());
|
|
6457
6829
|
program.addCommand(indexCommand());
|
|
6458
6830
|
program.addCommand(sessionsCommand());
|
|
6459
6831
|
program.addCommand(searchCommand());
|