@c3-oss/prosa 0.3.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +68 -20
- package/dist/bin/prosa.js +2033 -1082
- package/dist/bin/prosa.js.map +1 -1
- package/dist/cli/main.js +2033 -1082
- package/dist/cli/main.js.map +1 -1
- package/dist/index.d.ts +88 -12
- package/dist/index.js +988 -97
- package/dist/index.js.map +1 -1
- package/package.json +2 -1
package/dist/bin/prosa.js
CHANGED
|
@@ -12,8 +12,8 @@ var __export = (target, all) => {
|
|
|
12
12
|
|
|
13
13
|
// src/core/db.ts
|
|
14
14
|
import Database from "better-sqlite3";
|
|
15
|
-
function openDb(
|
|
16
|
-
const db = new Database(
|
|
15
|
+
function openDb(path21) {
|
|
16
|
+
const db = new Database(path21);
|
|
17
17
|
db.pragma("journal_mode = WAL");
|
|
18
18
|
db.pragma("foreign_keys = ON");
|
|
19
19
|
db.pragma("synchronous = NORMAL");
|
|
@@ -48,283 +48,30 @@ var init_db = __esm({
|
|
|
48
48
|
}
|
|
49
49
|
});
|
|
50
50
|
|
|
51
|
-
// src/core/
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
"src/core/errors.ts"() {
|
|
55
|
-
"use strict";
|
|
56
|
-
getErrorMessage = (err) => err instanceof Error ? err.message : String(err);
|
|
57
|
-
}
|
|
58
|
-
});
|
|
59
|
-
|
|
60
|
-
// src/core/cas/compress.ts
|
|
61
|
-
import { compress as zstdCompress, decompress as zstdDecompress } from "zstd-napi";
|
|
62
|
-
function compressBytes(input) {
|
|
63
|
-
if (input.byteLength < COMPRESS_THRESHOLD_BYTES) {
|
|
64
|
-
return { bytes: Buffer.from(input), compression: "none" };
|
|
65
|
-
}
|
|
66
|
-
const out = zstdCompress(Buffer.from(input), { compressionLevel: ZSTD_LEVEL });
|
|
67
|
-
return { bytes: out, compression: "zstd" };
|
|
68
|
-
}
|
|
69
|
-
function decompressBytes(input, compression) {
|
|
70
|
-
if (compression === "none") return input;
|
|
71
|
-
return zstdDecompress(input);
|
|
72
|
-
}
|
|
73
|
-
var COMPRESS_THRESHOLD_BYTES, ZSTD_LEVEL;
|
|
74
|
-
var init_compress = __esm({
|
|
75
|
-
"src/core/cas/compress.ts"() {
|
|
76
|
-
"use strict";
|
|
77
|
-
COMPRESS_THRESHOLD_BYTES = 256;
|
|
78
|
-
ZSTD_LEVEL = 3;
|
|
79
|
-
}
|
|
80
|
-
});
|
|
81
|
-
|
|
82
|
-
// src/core/cas/hash.ts
|
|
83
|
-
import { createHash } from "crypto";
|
|
84
|
-
import { blake3 } from "@noble/hashes/blake3";
|
|
85
|
-
import { bytesToHex } from "@noble/hashes/utils";
|
|
86
|
-
function blake3Hex(bytes) {
|
|
87
|
-
return bytesToHex(blake3(bytes));
|
|
88
|
-
}
|
|
89
|
-
function sha256Hex(bytes) {
|
|
90
|
-
return createHash("sha256").update(bytes).digest("hex");
|
|
91
|
-
}
|
|
92
|
-
function objectIdFromHash(hashHex) {
|
|
93
|
-
return `blake3:${hashHex}`;
|
|
94
|
-
}
|
|
95
|
-
function objectStoragePath(hashHex, compression) {
|
|
96
|
-
const ext = compression === "zstd" ? ".zst" : ".bin";
|
|
97
|
-
const a = hashHex.slice(0, 2);
|
|
98
|
-
const b = hashHex.slice(2, 4);
|
|
99
|
-
return `objects/blake3/${a}/${b}/${hashHex}${ext}`;
|
|
51
|
+
// src/core/limits.ts
|
|
52
|
+
function clampLimit(value, opts) {
|
|
53
|
+
return Math.max(opts.min ?? 1, Math.min(opts.max, value ?? opts.fallback));
|
|
100
54
|
}
|
|
101
|
-
var
|
|
102
|
-
"src/core/
|
|
55
|
+
var init_limits = __esm({
|
|
56
|
+
"src/core/limits.ts"() {
|
|
103
57
|
"use strict";
|
|
104
58
|
}
|
|
105
59
|
});
|
|
106
60
|
|
|
107
|
-
// src/core/
|
|
108
|
-
var
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
ensureDir: () => ensureDir,
|
|
112
|
-
flushPendingObjects: () => flushPendingObjects,
|
|
113
|
-
getBytes: () => getBytes,
|
|
114
|
-
getJson: () => getJson,
|
|
115
|
-
getObjectMeta: () => getObjectMeta,
|
|
116
|
-
getText: () => getText,
|
|
117
|
-
putBytes: () => putBytes,
|
|
118
|
-
putJson: () => putJson,
|
|
119
|
-
putText: () => putText,
|
|
120
|
-
stageBytes: () => stageBytes,
|
|
121
|
-
stageJson: () => stageJson,
|
|
122
|
-
stageText: () => stageText
|
|
123
|
-
});
|
|
124
|
-
import { mkdir as mkdir2, readFile as readFile2, writeFile as writeFile2 } from "fs/promises";
|
|
125
|
-
import path2 from "path";
|
|
126
|
-
async function ensureDir(absoluteDir) {
|
|
127
|
-
if (ensuredDirs.has(absoluteDir)) return;
|
|
128
|
-
await mkdir2(absoluteDir, { recursive: true });
|
|
129
|
-
ensuredDirs.add(absoluteDir);
|
|
130
|
-
}
|
|
131
|
-
async function putBytes(bundle, bytes, options = {}) {
|
|
132
|
-
const hash = blake3Hex(bytes);
|
|
133
|
-
const objectId = objectIdFromHash(hash);
|
|
134
|
-
const existing = prepare(
|
|
135
|
-
bundle.db,
|
|
136
|
-
`SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
|
|
137
|
-
compression, mime_type, encoding, storage_path, created_at
|
|
138
|
-
FROM objects WHERE object_id = ?`
|
|
139
|
-
).get(objectId);
|
|
140
|
-
if (existing) return objectId;
|
|
141
|
-
const { bytes: stored, compression } = compressBytes(bytes);
|
|
142
|
-
const storagePath = objectStoragePath(hash, compression);
|
|
143
|
-
const absolutePath = path2.join(bundle.path, storagePath);
|
|
144
|
-
await ensureDir(path2.dirname(absolutePath));
|
|
145
|
-
await writeFile2(absolutePath, stored);
|
|
146
|
-
prepare(
|
|
147
|
-
bundle.db,
|
|
148
|
-
`INSERT INTO objects (
|
|
149
|
-
object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
|
|
150
|
-
compression, mime_type, encoding, storage_path, created_at
|
|
151
|
-
) VALUES (?, 'blake3', ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
152
|
-
).run(
|
|
153
|
-
objectId,
|
|
154
|
-
hash,
|
|
155
|
-
bytes.byteLength,
|
|
156
|
-
compression === "zstd" ? stored.byteLength : null,
|
|
157
|
-
compression,
|
|
158
|
-
options.mimeType ?? null,
|
|
159
|
-
options.encoding ?? null,
|
|
160
|
-
storagePath,
|
|
161
|
-
(/* @__PURE__ */ new Date()).toISOString()
|
|
162
|
-
);
|
|
163
|
-
return objectId;
|
|
164
|
-
}
|
|
165
|
-
async function putText(bundle, text, options = {}) {
|
|
166
|
-
const buf = Buffer.from(text, "utf8");
|
|
167
|
-
return putBytes(bundle, buf, {
|
|
168
|
-
mimeType: options.mimeType ?? "text/plain; charset=utf-8",
|
|
169
|
-
encoding: "utf-8"
|
|
170
|
-
});
|
|
171
|
-
}
|
|
172
|
-
async function putJson(bundle, value) {
|
|
173
|
-
const text = JSON.stringify(value);
|
|
174
|
-
return putBytes(bundle, Buffer.from(text, "utf8"), {
|
|
175
|
-
mimeType: "application/json",
|
|
176
|
-
encoding: "utf-8"
|
|
177
|
-
});
|
|
178
|
-
}
|
|
179
|
-
async function getBytes(bundle, objectId) {
|
|
180
|
-
const meta = prepare(
|
|
181
|
-
bundle.db,
|
|
182
|
-
`SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
|
|
183
|
-
compression, mime_type, encoding, storage_path, created_at
|
|
184
|
-
FROM objects WHERE object_id = ?`
|
|
185
|
-
).get(objectId);
|
|
186
|
-
if (!meta) {
|
|
187
|
-
throw new Error(`object not found: ${objectId}`);
|
|
188
|
-
}
|
|
189
|
-
const buf = await readFile2(path2.join(bundle.path, meta.storage_path));
|
|
190
|
-
return decompressBytes(buf, meta.compression);
|
|
191
|
-
}
|
|
192
|
-
async function getText(bundle, objectId) {
|
|
193
|
-
const buf = await getBytes(bundle, objectId);
|
|
194
|
-
return buf.toString("utf8");
|
|
195
|
-
}
|
|
196
|
-
async function getJson(bundle, objectId) {
|
|
197
|
-
const text = await getText(bundle, objectId);
|
|
198
|
-
return JSON.parse(text);
|
|
199
|
-
}
|
|
200
|
-
function getObjectMeta(bundle, objectId) {
|
|
201
|
-
return prepare(
|
|
202
|
-
bundle.db,
|
|
203
|
-
`SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
|
|
204
|
-
compression, mime_type, encoding, storage_path, created_at
|
|
205
|
-
FROM objects WHERE object_id = ?`
|
|
206
|
-
).get(objectId) ?? null;
|
|
207
|
-
}
|
|
208
|
-
function createPendingObjects() {
|
|
209
|
-
return { byId: /* @__PURE__ */ new Map() };
|
|
210
|
-
}
|
|
211
|
-
function stageBytes(pending, bytes, options = {}) {
|
|
212
|
-
const buf = Buffer.isBuffer(bytes) ? bytes : Buffer.from(bytes);
|
|
213
|
-
const hash = blake3Hex(buf);
|
|
214
|
-
const objectId = objectIdFromHash(hash);
|
|
215
|
-
if (!pending.byId.has(objectId)) {
|
|
216
|
-
pending.byId.set(objectId, {
|
|
217
|
-
objectId,
|
|
218
|
-
hash,
|
|
219
|
-
bytes: buf,
|
|
220
|
-
mimeType: options.mimeType ?? null,
|
|
221
|
-
encoding: options.encoding ?? null
|
|
222
|
-
});
|
|
223
|
-
}
|
|
224
|
-
return objectId;
|
|
225
|
-
}
|
|
226
|
-
function stageText(pending, text, options = {}) {
|
|
227
|
-
return stageBytes(pending, Buffer.from(text, "utf8"), {
|
|
228
|
-
mimeType: options.mimeType ?? "text/plain; charset=utf-8",
|
|
229
|
-
encoding: "utf-8"
|
|
230
|
-
});
|
|
231
|
-
}
|
|
232
|
-
function stageJson(pending, value) {
|
|
233
|
-
return stageBytes(pending, Buffer.from(JSON.stringify(value), "utf8"), {
|
|
234
|
-
mimeType: "application/json",
|
|
235
|
-
encoding: "utf-8"
|
|
236
|
-
});
|
|
237
|
-
}
|
|
238
|
-
async function flushPendingObjects(bundle, pending) {
|
|
239
|
-
if (pending.byId.size === 0) return;
|
|
240
|
-
const ids = [...pending.byId.keys()];
|
|
241
|
-
const existingIds = queryExistingObjectIds(bundle, ids);
|
|
242
|
-
const toWrite = [];
|
|
243
|
-
for (const obj of pending.byId.values()) {
|
|
244
|
-
if (existingIds.has(obj.objectId)) continue;
|
|
245
|
-
const { bytes: compressedBytes, compression } = compressBytes(obj.bytes);
|
|
246
|
-
const storagePath = objectStoragePath(obj.hash, compression);
|
|
247
|
-
toWrite.push({
|
|
248
|
-
staged: obj,
|
|
249
|
-
compression,
|
|
250
|
-
compressedBytes,
|
|
251
|
-
storagePath,
|
|
252
|
-
absolutePath: path2.join(bundle.path, storagePath)
|
|
253
|
-
});
|
|
254
|
-
}
|
|
255
|
-
if (toWrite.length > 0) {
|
|
256
|
-
await writeFilesParallel(toWrite);
|
|
257
|
-
}
|
|
258
|
-
const insertObject = prepare(
|
|
259
|
-
bundle.db,
|
|
260
|
-
`INSERT OR IGNORE INTO objects (
|
|
261
|
-
object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
|
|
262
|
-
compression, mime_type, encoding, storage_path, created_at
|
|
263
|
-
) VALUES (?, 'blake3', ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
264
|
-
);
|
|
265
|
-
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
266
|
-
for (const p of toWrite) {
|
|
267
|
-
insertObject.run(
|
|
268
|
-
p.staged.objectId,
|
|
269
|
-
p.staged.hash,
|
|
270
|
-
p.staged.bytes.byteLength,
|
|
271
|
-
p.compression === "zstd" ? p.compressedBytes.byteLength : null,
|
|
272
|
-
p.compression,
|
|
273
|
-
p.staged.mimeType,
|
|
274
|
-
p.staged.encoding,
|
|
275
|
-
p.storagePath,
|
|
276
|
-
now
|
|
277
|
-
);
|
|
278
|
-
}
|
|
279
|
-
}
|
|
280
|
-
function queryExistingObjectIds(bundle, ids) {
|
|
281
|
-
const found = /* @__PURE__ */ new Set();
|
|
282
|
-
if (ids.length === 0) return found;
|
|
283
|
-
const CHUNK = 500;
|
|
284
|
-
for (let start = 0; start < ids.length; start += CHUNK) {
|
|
285
|
-
const slice = ids.slice(start, start + CHUNK);
|
|
286
|
-
const placeholders = slice.map(() => "?").join(",");
|
|
287
|
-
const rows = bundle.db.prepare(
|
|
288
|
-
`SELECT object_id FROM objects WHERE object_id IN (${placeholders})`
|
|
289
|
-
).all(...slice);
|
|
290
|
-
for (const row of rows) found.add(row.object_id);
|
|
291
|
-
}
|
|
292
|
-
return found;
|
|
293
|
-
}
|
|
294
|
-
async function writeFilesParallel(tasks) {
|
|
295
|
-
let cursor = 0;
|
|
296
|
-
const workers = [];
|
|
297
|
-
const limit = Math.min(FS_WRITE_CONCURRENCY, tasks.length);
|
|
298
|
-
for (let w = 0; w < limit; w++) {
|
|
299
|
-
workers.push(
|
|
300
|
-
(async () => {
|
|
301
|
-
while (true) {
|
|
302
|
-
const i = cursor++;
|
|
303
|
-
if (i >= tasks.length) return;
|
|
304
|
-
const task = tasks[i];
|
|
305
|
-
await ensureDir(path2.dirname(task.absolutePath));
|
|
306
|
-
await writeFile2(task.absolutePath, task.compressedBytes);
|
|
307
|
-
}
|
|
308
|
-
})()
|
|
309
|
-
);
|
|
310
|
-
}
|
|
311
|
-
await Promise.all(workers);
|
|
312
|
-
}
|
|
313
|
-
var ensuredDirs, FS_WRITE_CONCURRENCY;
|
|
314
|
-
var init_cas = __esm({
|
|
315
|
-
"src/core/cas/index.ts"() {
|
|
61
|
+
// src/core/errors.ts
|
|
62
|
+
var getErrorMessage;
|
|
63
|
+
var init_errors = __esm({
|
|
64
|
+
"src/core/errors.ts"() {
|
|
316
65
|
"use strict";
|
|
317
|
-
|
|
318
|
-
init_compress();
|
|
319
|
-
init_hash();
|
|
320
|
-
ensuredDirs = /* @__PURE__ */ new Set();
|
|
321
|
-
FS_WRITE_CONCURRENCY = 16;
|
|
66
|
+
getErrorMessage = (err) => err instanceof Error ? err.message : String(err);
|
|
322
67
|
}
|
|
323
68
|
});
|
|
324
69
|
|
|
325
70
|
// src/services/indexing.ts
|
|
71
|
+
import { createHash as createHash2 } from "crypto";
|
|
72
|
+
import { existsSync } from "fs";
|
|
326
73
|
import { mkdir as mkdir4, rm as rm2, writeFile as writeFile5 } from "fs/promises";
|
|
327
|
-
import
|
|
74
|
+
import path15 from "path";
|
|
328
75
|
function enableFts5Triggers(bundle) {
|
|
329
76
|
bundle.db.exec(FTS5_TRIGGER_SQL);
|
|
330
77
|
}
|
|
@@ -338,7 +85,7 @@ function disableFts5Triggers(bundle) {
|
|
|
338
85
|
function getSearchIndexStatuses(bundle) {
|
|
339
86
|
ensureSearchIndexStatusRows(bundle);
|
|
340
87
|
return bundle.db.prepare(
|
|
341
|
-
`SELECT
|
|
88
|
+
`SELECT ${SEARCH_INDEX_STATUS_COLUMNS}
|
|
342
89
|
FROM search_index_status
|
|
343
90
|
ORDER BY engine`
|
|
344
91
|
).all();
|
|
@@ -346,28 +93,13 @@ function getSearchIndexStatuses(bundle) {
|
|
|
346
93
|
function getSearchIndexStatus(bundle, engine) {
|
|
347
94
|
ensureSearchIndexStatusRows(bundle);
|
|
348
95
|
return bundle.db.prepare(
|
|
349
|
-
`SELECT
|
|
96
|
+
`SELECT ${SEARCH_INDEX_STATUS_COLUMNS}
|
|
350
97
|
FROM search_index_status
|
|
351
98
|
WHERE engine = ?`
|
|
352
99
|
).get(engine) ?? null;
|
|
353
100
|
}
|
|
354
101
|
function markIndexesAfterImport(bundle, options) {
|
|
355
102
|
if (!options.changed) return;
|
|
356
|
-
if (options.fts5Deferred) {
|
|
357
|
-
updateSearchIndexStatus(bundle, "fts5", {
|
|
358
|
-
status: "stale",
|
|
359
|
-
sourceDocCount: countSearchDocs(bundle),
|
|
360
|
-
indexedDocCount: countFts5Docs(bundle),
|
|
361
|
-
errorMessage: null
|
|
362
|
-
});
|
|
363
|
-
} else {
|
|
364
|
-
updateSearchIndexStatus(bundle, "fts5", {
|
|
365
|
-
status: "ready",
|
|
366
|
-
sourceDocCount: countSearchDocs(bundle),
|
|
367
|
-
indexedDocCount: countFts5Docs(bundle),
|
|
368
|
-
errorMessage: null
|
|
369
|
-
});
|
|
370
|
-
}
|
|
371
103
|
const tantivy = getSearchIndexStatus(bundle, "tantivy");
|
|
372
104
|
if (tantivy?.status === "ready" || tantivy?.status === "stale" || tantivy?.status === "failed") {
|
|
373
105
|
updateSearchIndexStatus(bundle, "tantivy", {
|
|
@@ -408,55 +140,93 @@ function rebuildFts5Index(bundle) {
|
|
|
408
140
|
}
|
|
409
141
|
return getSearchIndexStatus(bundle, "fts5");
|
|
410
142
|
}
|
|
411
|
-
|
|
143
|
+
function buildTantivySchema(tantivy) {
|
|
144
|
+
const builder = new tantivy.SchemaBuilder();
|
|
145
|
+
for (const field of TANTIVY_SCHEMA_FIELDS) {
|
|
146
|
+
if (field.tokenizer === "default") {
|
|
147
|
+
builder.addTextField(field.name, { stored: true });
|
|
148
|
+
} else {
|
|
149
|
+
builder.addTextField(field.name, { stored: true, tokenizerName: field.tokenizer });
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
return builder.build();
|
|
153
|
+
}
|
|
154
|
+
function computeSchemaFingerprint() {
|
|
155
|
+
const canonical = TANTIVY_SCHEMA_FIELDS.map((f) => `${f.name}:${f.tokenizer}:stored`).join("|");
|
|
156
|
+
return createHash2("sha256").update(canonical).digest("hex");
|
|
157
|
+
}
|
|
158
|
+
function tantivyIndexLooksValid(dir) {
|
|
159
|
+
return existsSync(path15.join(dir, "meta.json"));
|
|
160
|
+
}
|
|
161
|
+
function makeTantivyDoc(tantivy, row) {
|
|
162
|
+
const doc = new tantivy.Document();
|
|
163
|
+
doc.addText("doc_id", row.doc_id);
|
|
164
|
+
doc.addText("entity_type", row.entity_type);
|
|
165
|
+
doc.addText("entity_id", row.entity_id);
|
|
166
|
+
doc.addText("session_id", row.session_id ?? "");
|
|
167
|
+
doc.addText("project_id", row.project_id ?? "");
|
|
168
|
+
doc.addText("timestamp", row.timestamp ?? "");
|
|
169
|
+
doc.addText("role", row.role ?? "");
|
|
170
|
+
doc.addText("tool_name", row.tool_name ?? "");
|
|
171
|
+
doc.addText("canonical_tool_type", row.canonical_tool_type ?? "");
|
|
172
|
+
doc.addText("field_kind", row.field_kind);
|
|
173
|
+
doc.addText("text", row.text);
|
|
174
|
+
return doc;
|
|
175
|
+
}
|
|
176
|
+
async function rebuildTantivyIndex(bundle, options = {}) {
|
|
412
177
|
ensureSearchIndexStatusRows(bundle);
|
|
178
|
+
const sourceDocCount = countSearchDocs(bundle);
|
|
179
|
+
const prev = getSearchIndexStatus(bundle, "tantivy");
|
|
180
|
+
const fingerprint = computeSchemaFingerprint();
|
|
181
|
+
const indexDirValid = tantivyIndexLooksValid(bundle.paths.tantivy);
|
|
182
|
+
const fingerprintMatches = prev?.schema_fingerprint === fingerprint;
|
|
183
|
+
const lastIndexedRowid = typeof prev?.last_indexed_rowid === "number" ? prev.last_indexed_rowid : 0;
|
|
184
|
+
const wantFullRebuild = options.overwrite === true || !indexDirValid || !fingerprintMatches || lastIndexedRowid <= 0;
|
|
413
185
|
updateSearchIndexStatus(bundle, "tantivy", {
|
|
414
186
|
status: "building",
|
|
415
|
-
sourceDocCount
|
|
187
|
+
sourceDocCount,
|
|
416
188
|
indexedDocCount: 0,
|
|
417
189
|
errorMessage: null
|
|
418
190
|
});
|
|
419
191
|
try {
|
|
420
192
|
const tantivy = await import("@oxdev03/node-tantivy-binding");
|
|
421
|
-
const schema =
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
doc.addText("role", row.role ?? "");
|
|
442
|
-
doc.addText("tool_name", row.tool_name ?? "");
|
|
443
|
-
doc.addText("canonical_tool_type", row.canonical_tool_type ?? "");
|
|
444
|
-
doc.addText("field_kind", row.field_kind);
|
|
445
|
-
doc.addText("text", row.text);
|
|
446
|
-
writer.addDocument(doc);
|
|
447
|
-
indexedDocCount++;
|
|
193
|
+
const schema = buildTantivySchema(tantivy);
|
|
194
|
+
let index;
|
|
195
|
+
if (wantFullRebuild) {
|
|
196
|
+
await rm2(bundle.paths.tantivy, { recursive: true, force: true });
|
|
197
|
+
await mkdir4(bundle.paths.tantivy, { recursive: true });
|
|
198
|
+
index = new tantivy.Index(schema, bundle.paths.tantivy, false);
|
|
199
|
+
} else {
|
|
200
|
+
index = tantivy.Index.open(bundle.paths.tantivy);
|
|
201
|
+
}
|
|
202
|
+
const writer = index.writer(3e8, 4);
|
|
203
|
+
const select = wantFullRebuild ? `${SEARCH_DOCS_SELECT} ORDER BY rowid` : `${SEARCH_DOCS_SELECT} WHERE rowid > ${lastIndexedRowid} ORDER BY rowid`;
|
|
204
|
+
let addedDocCount = 0;
|
|
205
|
+
let maxRowid = wantFullRebuild ? 0 : lastIndexedRowid;
|
|
206
|
+
for (const row of bundle.db.prepare(select).iterate()) {
|
|
207
|
+
if (!wantFullRebuild) {
|
|
208
|
+
writer.deleteDocumentsByTerm("doc_id", row.doc_id);
|
|
209
|
+
}
|
|
210
|
+
writer.addDocument(makeTantivyDoc(tantivy, row));
|
|
211
|
+
addedDocCount++;
|
|
212
|
+
if (row.rowid > maxRowid) maxRowid = row.rowid;
|
|
448
213
|
}
|
|
449
214
|
writer.commit();
|
|
450
215
|
index.reload();
|
|
216
|
+
writer.waitMergingThreads();
|
|
217
|
+
const indexedDocCount = wantFullRebuild ? addedDocCount : countTantivyDocsBest(prev, addedDocCount);
|
|
451
218
|
await writeFile5(
|
|
452
|
-
|
|
219
|
+
path15.join(bundle.paths.tantivy, "prosa-index.json"),
|
|
453
220
|
`${JSON.stringify(
|
|
454
221
|
{
|
|
455
222
|
engine: "tantivy",
|
|
456
223
|
source: "search_docs",
|
|
457
224
|
built_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
458
|
-
|
|
459
|
-
|
|
225
|
+
mode: wantFullRebuild ? "full" : "incremental",
|
|
226
|
+
source_doc_count: sourceDocCount,
|
|
227
|
+
indexed_doc_count: indexedDocCount,
|
|
228
|
+
last_indexed_rowid: maxRowid,
|
|
229
|
+
schema_fingerprint: fingerprint
|
|
460
230
|
},
|
|
461
231
|
null,
|
|
462
232
|
2
|
|
@@ -466,14 +236,16 @@ async function rebuildTantivyIndex(bundle) {
|
|
|
466
236
|
);
|
|
467
237
|
updateSearchIndexStatus(bundle, "tantivy", {
|
|
468
238
|
status: "ready",
|
|
469
|
-
sourceDocCount
|
|
239
|
+
sourceDocCount,
|
|
470
240
|
indexedDocCount,
|
|
471
|
-
errorMessage: null
|
|
241
|
+
errorMessage: null,
|
|
242
|
+
lastIndexedRowid: maxRowid,
|
|
243
|
+
schemaFingerprint: fingerprint
|
|
472
244
|
});
|
|
473
245
|
} catch (error) {
|
|
474
246
|
updateSearchIndexStatus(bundle, "tantivy", {
|
|
475
247
|
status: "failed",
|
|
476
|
-
sourceDocCount
|
|
248
|
+
sourceDocCount,
|
|
477
249
|
indexedDocCount: 0,
|
|
478
250
|
errorMessage: getErrorMessage(error)
|
|
479
251
|
});
|
|
@@ -481,36 +253,53 @@ async function rebuildTantivyIndex(bundle) {
|
|
|
481
253
|
}
|
|
482
254
|
return getSearchIndexStatus(bundle, "tantivy");
|
|
483
255
|
}
|
|
256
|
+
function countTantivyDocsBest(prev, added) {
|
|
257
|
+
if (prev && typeof prev.indexed_doc_count === "number") {
|
|
258
|
+
return prev.indexed_doc_count + added;
|
|
259
|
+
}
|
|
260
|
+
return added;
|
|
261
|
+
}
|
|
484
262
|
function ensureSearchIndexStatusRows(bundle) {
|
|
485
263
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
486
264
|
const stmt = prepare(
|
|
487
265
|
bundle.db,
|
|
488
266
|
`INSERT OR IGNORE INTO search_index_status (
|
|
489
|
-
engine, status, source_doc_count, indexed_doc_count, updated_at,
|
|
490
|
-
|
|
267
|
+
engine, status, source_doc_count, indexed_doc_count, updated_at,
|
|
268
|
+
error_message, last_indexed_rowid, schema_fingerprint
|
|
269
|
+
) VALUES (?, ?, 0, 0, ?, NULL, NULL, NULL)`
|
|
491
270
|
);
|
|
492
271
|
stmt.run("fts5", "ready", now);
|
|
493
272
|
stmt.run("tantivy", "missing", now);
|
|
494
273
|
}
|
|
495
274
|
function updateSearchIndexStatus(bundle, engine, values) {
|
|
496
275
|
ensureSearchIndexStatusRows(bundle);
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
WHERE engine = ?`
|
|
506
|
-
).run(
|
|
276
|
+
const setClauses = [
|
|
277
|
+
"status = ?",
|
|
278
|
+
"source_doc_count = ?",
|
|
279
|
+
"indexed_doc_count = ?",
|
|
280
|
+
"updated_at = ?",
|
|
281
|
+
"error_message = ?"
|
|
282
|
+
];
|
|
283
|
+
const params = [
|
|
507
284
|
values.status,
|
|
508
285
|
values.sourceDocCount,
|
|
509
286
|
values.indexedDocCount,
|
|
510
287
|
(/* @__PURE__ */ new Date()).toISOString(),
|
|
511
|
-
values.errorMessage
|
|
512
|
-
|
|
513
|
-
)
|
|
288
|
+
values.errorMessage
|
|
289
|
+
];
|
|
290
|
+
if (values.lastIndexedRowid !== void 0) {
|
|
291
|
+
setClauses.push("last_indexed_rowid = ?");
|
|
292
|
+
params.push(values.lastIndexedRowid);
|
|
293
|
+
}
|
|
294
|
+
if (values.schemaFingerprint !== void 0) {
|
|
295
|
+
setClauses.push("schema_fingerprint = ?");
|
|
296
|
+
params.push(values.schemaFingerprint);
|
|
297
|
+
}
|
|
298
|
+
params.push(engine);
|
|
299
|
+
prepare(
|
|
300
|
+
bundle.db,
|
|
301
|
+
`UPDATE search_index_status SET ${setClauses.join(", ")} WHERE engine = ?`
|
|
302
|
+
).run(...params);
|
|
514
303
|
}
|
|
515
304
|
function countSearchDocs(bundle) {
|
|
516
305
|
return bundle.db.prepare(`SELECT count(*) AS n FROM search_docs`).get()?.n ?? 0;
|
|
@@ -518,12 +307,16 @@ function countSearchDocs(bundle) {
|
|
|
518
307
|
function countFts5Docs(bundle) {
|
|
519
308
|
return bundle.db.prepare(`SELECT count(*) AS n FROM search_docs_fts`).get()?.n ?? 0;
|
|
520
309
|
}
|
|
521
|
-
var FTS5_TRIGGER_SQL;
|
|
310
|
+
var SEARCH_INDEX_STATUS_COLUMNS, FTS5_TRIGGER_SQL, TANTIVY_SCHEMA_FIELDS, SEARCH_DOCS_SELECT;
|
|
522
311
|
var init_indexing = __esm({
|
|
523
312
|
"src/services/indexing.ts"() {
|
|
524
313
|
"use strict";
|
|
525
314
|
init_db();
|
|
526
315
|
init_errors();
|
|
316
|
+
SEARCH_INDEX_STATUS_COLUMNS = `
|
|
317
|
+
engine, status, source_doc_count, indexed_doc_count, updated_at,
|
|
318
|
+
error_message, last_indexed_rowid, schema_fingerprint
|
|
319
|
+
`;
|
|
527
320
|
FTS5_TRIGGER_SQL = `
|
|
528
321
|
CREATE TRIGGER IF NOT EXISTS search_docs_ai AFTER INSERT ON search_docs BEGIN
|
|
529
322
|
INSERT INTO search_docs_fts(rowid, text, role, tool_name, field_kind)
|
|
@@ -542,21 +335,30 @@ CREATE TRIGGER IF NOT EXISTS search_docs_au AFTER UPDATE ON search_docs BEGIN
|
|
|
542
335
|
VALUES (new.rowid, new.text, new.role, new.tool_name, new.field_kind);
|
|
543
336
|
END;
|
|
544
337
|
`;
|
|
545
|
-
|
|
546
|
-
}
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
}
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
338
|
+
TANTIVY_SCHEMA_FIELDS = [
|
|
339
|
+
{ name: "doc_id", tokenizer: "raw" },
|
|
340
|
+
{ name: "entity_type", tokenizer: "raw" },
|
|
341
|
+
{ name: "entity_id", tokenizer: "raw" },
|
|
342
|
+
{ name: "session_id", tokenizer: "raw" },
|
|
343
|
+
{ name: "project_id", tokenizer: "raw" },
|
|
344
|
+
{ name: "timestamp", tokenizer: "raw" },
|
|
345
|
+
{ name: "role", tokenizer: "raw" },
|
|
346
|
+
{ name: "tool_name", tokenizer: "raw" },
|
|
347
|
+
{ name: "canonical_tool_type", tokenizer: "raw" },
|
|
348
|
+
{ name: "field_kind", tokenizer: "raw" },
|
|
349
|
+
// The text field uses tantivy's default tokenizer (en_stem in the binding).
|
|
350
|
+
{ name: "text", tokenizer: "default" }
|
|
351
|
+
];
|
|
352
|
+
SEARCH_DOCS_SELECT = `
|
|
353
|
+
SELECT rowid, doc_id, entity_type, entity_id, session_id, project_id, timestamp,
|
|
354
|
+
role, tool_name, canonical_tool_type, field_kind, text
|
|
355
|
+
FROM search_docs
|
|
356
|
+
`;
|
|
555
357
|
}
|
|
556
358
|
});
|
|
557
359
|
|
|
558
360
|
// src/services/search.ts
|
|
559
|
-
import { existsSync } from "fs";
|
|
361
|
+
import { existsSync as existsSync2 } from "fs";
|
|
560
362
|
import { createRequire } from "module";
|
|
561
363
|
function escapeFtsQuery(q) {
|
|
562
364
|
return q.split(/\s+/).filter((t) => t.length > 0).map((t) => `"${t.replace(/"/g, '""')}"`).join(" ");
|
|
@@ -565,7 +367,7 @@ function searchFullText(bundle, options) {
|
|
|
565
367
|
if (options.engine === "tantivy") {
|
|
566
368
|
return searchTantivy(bundle, options);
|
|
567
369
|
}
|
|
568
|
-
const
|
|
370
|
+
const limit2 = clampLimit(options.limit, { max: 500, fallback: 50 });
|
|
569
371
|
const sql = `
|
|
570
372
|
SELECT d.doc_id,
|
|
571
373
|
d.entity_type,
|
|
@@ -580,14 +382,14 @@ function searchFullText(bundle, options) {
|
|
|
580
382
|
JOIN search_docs d ON d.rowid = search_docs_fts.rowid
|
|
581
383
|
WHERE search_docs_fts MATCH ?
|
|
582
384
|
ORDER BY bm25(search_docs_fts), d.timestamp DESC
|
|
583
|
-
LIMIT ${
|
|
385
|
+
LIMIT ${limit2}
|
|
584
386
|
`;
|
|
585
387
|
const ftsQuery = options.raw ? options.query : escapeFtsQuery(options.query);
|
|
586
388
|
if (!ftsQuery) return [];
|
|
587
389
|
return bundle.db.prepare(sql).all(ftsQuery);
|
|
588
390
|
}
|
|
589
391
|
function searchTantivy(bundle, options) {
|
|
590
|
-
if (!
|
|
392
|
+
if (!existsSync2(bundle.paths.tantivy)) {
|
|
591
393
|
throw new Error("tantivy index not found; run `prosa index tantivy` first");
|
|
592
394
|
}
|
|
593
395
|
const status = getSearchIndexStatus(bundle, "tantivy");
|
|
@@ -596,7 +398,7 @@ function searchTantivy(bundle, options) {
|
|
|
596
398
|
`tantivy index is ${status?.status ?? "missing"}; run \`prosa index tantivy\` first`
|
|
597
399
|
);
|
|
598
400
|
}
|
|
599
|
-
const
|
|
401
|
+
const limit2 = clampLimit(options.limit, { max: 500, fallback: 50 });
|
|
600
402
|
const queryText = options.query.trim();
|
|
601
403
|
if (!queryText) return [];
|
|
602
404
|
const tantivy = requireTantivy();
|
|
@@ -605,7 +407,7 @@ function searchTantivy(bundle, options) {
|
|
|
605
407
|
const [query] = options.raw ? [index.parseQuery(queryText, ["text"])] : index.parseQueryLenient(queryText, ["text"], void 0, {
|
|
606
408
|
text: [true, 2, true]
|
|
607
409
|
});
|
|
608
|
-
const result = searcher.search(query,
|
|
410
|
+
const result = searcher.search(query, limit2, true);
|
|
609
411
|
const snippets = tantivy.SnippetGenerator.create(searcher, query, index.schema, "text");
|
|
610
412
|
snippets.setMaxNumChars(180);
|
|
611
413
|
return result.hits.map((hit) => {
|
|
@@ -689,7 +491,7 @@ function sessionFilterWhere(filters) {
|
|
|
689
491
|
}
|
|
690
492
|
function listSessions(bundle, filters = {}) {
|
|
691
493
|
const { where, params } = sessionFilterWhere(filters);
|
|
692
|
-
const
|
|
494
|
+
const limit2 = clampLimit(filters.limit, { max: 1e3, fallback: 50 });
|
|
693
495
|
const sql = `
|
|
694
496
|
SELECT s.session_id,
|
|
695
497
|
s.source_tool,
|
|
@@ -710,7 +512,7 @@ function listSessions(bundle, filters = {}) {
|
|
|
710
512
|
FROM sessions s
|
|
711
513
|
${where}
|
|
712
514
|
ORDER BY s.start_ts DESC NULLS LAST
|
|
713
|
-
LIMIT ${
|
|
515
|
+
LIMIT ${limit2}
|
|
714
516
|
`;
|
|
715
517
|
return bundle.db.prepare(sql).all(...params);
|
|
716
518
|
}
|
|
@@ -1134,13 +936,14 @@ var init_App = __esm({
|
|
|
1134
936
|
});
|
|
1135
937
|
|
|
1136
938
|
// src/cli/main.ts
|
|
1137
|
-
import { Command as
|
|
939
|
+
import { Command as Command11 } from "commander";
|
|
1138
940
|
|
|
1139
941
|
// src/core/version.ts
|
|
1140
942
|
var PROSA_PARSER_VERSION = "0.1.0";
|
|
1141
|
-
var PROSA_SCHEMA_VERSION =
|
|
943
|
+
var PROSA_SCHEMA_VERSION = 4;
|
|
1142
944
|
|
|
1143
|
-
// src/cli/commands/
|
|
945
|
+
// src/cli/commands/analytics.ts
|
|
946
|
+
import path4 from "path";
|
|
1144
947
|
import { Command } from "commander";
|
|
1145
948
|
|
|
1146
949
|
// src/core/bundle.ts
|
|
@@ -1514,10 +1317,291 @@ INSERT OR IGNORE INTO search_index_status (
|
|
|
1514
1317
|
('tantivy', 'missing', 0, 0, strftime('%Y-%m-%dT%H:%M:%fZ','now'), NULL);
|
|
1515
1318
|
`;
|
|
1516
1319
|
|
|
1320
|
+
// src/core/schema/sql/003_analytics_views.ts
|
|
1321
|
+
var SQL_003_ANALYTICS_VIEWS = String.raw`
|
|
1322
|
+
CREATE VIEW IF NOT EXISTS session_facts AS
|
|
1323
|
+
WITH turn_counts AS (
|
|
1324
|
+
SELECT session_id, count(*) AS turn_count
|
|
1325
|
+
FROM turns
|
|
1326
|
+
GROUP BY session_id
|
|
1327
|
+
),
|
|
1328
|
+
message_counts AS (
|
|
1329
|
+
SELECT session_id,
|
|
1330
|
+
count(*) AS message_count,
|
|
1331
|
+
sum(CASE WHEN role = 'user' THEN 1 ELSE 0 END) AS user_message_count,
|
|
1332
|
+
sum(CASE WHEN role = 'assistant' THEN 1 ELSE 0 END) AS assistant_message_count
|
|
1333
|
+
FROM messages
|
|
1334
|
+
GROUP BY session_id
|
|
1335
|
+
),
|
|
1336
|
+
tool_call_counts AS (
|
|
1337
|
+
SELECT session_id,
|
|
1338
|
+
count(*) AS tool_call_count,
|
|
1339
|
+
sum(CASE WHEN status = 'error' THEN 1 ELSE 0 END) AS tool_call_error_count
|
|
1340
|
+
FROM tool_calls
|
|
1341
|
+
GROUP BY session_id
|
|
1342
|
+
),
|
|
1343
|
+
tool_result_counts AS (
|
|
1344
|
+
SELECT session_id,
|
|
1345
|
+
count(*) AS tool_result_count,
|
|
1346
|
+
sum(CASE WHEN is_error = 1 OR (exit_code IS NOT NULL AND exit_code <> 0)
|
|
1347
|
+
THEN 1 ELSE 0 END) AS tool_result_error_count,
|
|
1348
|
+
sum(COALESCE(duration_ms, 0)) AS tool_duration_ms
|
|
1349
|
+
FROM tool_results
|
|
1350
|
+
GROUP BY session_id
|
|
1351
|
+
),
|
|
1352
|
+
search_doc_counts AS (
|
|
1353
|
+
SELECT session_id, count(*) AS search_doc_count
|
|
1354
|
+
FROM search_docs
|
|
1355
|
+
WHERE session_id IS NOT NULL
|
|
1356
|
+
GROUP BY session_id
|
|
1357
|
+
)
|
|
1358
|
+
SELECT s.session_id,
|
|
1359
|
+
s.source_tool,
|
|
1360
|
+
s.source_session_id,
|
|
1361
|
+
s.project_id,
|
|
1362
|
+
p.display_name AS project_name,
|
|
1363
|
+
p.canonical_path AS project_path,
|
|
1364
|
+
s.parent_session_id,
|
|
1365
|
+
s.is_subagent,
|
|
1366
|
+
s.agent_role,
|
|
1367
|
+
s.agent_nickname,
|
|
1368
|
+
s.title,
|
|
1369
|
+
s.start_ts,
|
|
1370
|
+
s.end_ts,
|
|
1371
|
+
CASE
|
|
1372
|
+
WHEN s.start_ts IS NOT NULL AND s.end_ts IS NOT NULL
|
|
1373
|
+
THEN ROUND((julianday(s.end_ts) - julianday(s.start_ts)) * 86400, 3)
|
|
1374
|
+
ELSE NULL
|
|
1375
|
+
END AS duration_seconds,
|
|
1376
|
+
s.cwd_initial,
|
|
1377
|
+
s.git_branch_initial,
|
|
1378
|
+
s.model_first,
|
|
1379
|
+
s.model_last,
|
|
1380
|
+
s.status,
|
|
1381
|
+
s.timeline_confidence,
|
|
1382
|
+
sf.path AS source_file_path,
|
|
1383
|
+
COALESCE(tc.turn_count, 0) AS turn_count,
|
|
1384
|
+
COALESCE(mc.message_count, 0) AS message_count,
|
|
1385
|
+
COALESCE(mc.user_message_count, 0) AS user_message_count,
|
|
1386
|
+
COALESCE(mc.assistant_message_count, 0) AS assistant_message_count,
|
|
1387
|
+
COALESCE(tcc.tool_call_count, 0) AS tool_call_count,
|
|
1388
|
+
COALESCE(trc.tool_result_count, 0) AS tool_result_count,
|
|
1389
|
+
COALESCE(tcc.tool_call_error_count, 0)
|
|
1390
|
+
+ COALESCE(trc.tool_result_error_count, 0) AS tool_error_count,
|
|
1391
|
+
COALESCE(trc.tool_duration_ms, 0) AS tool_duration_ms,
|
|
1392
|
+
COALESCE(sdc.search_doc_count, 0) AS search_doc_count
|
|
1393
|
+
FROM sessions s
|
|
1394
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
1395
|
+
LEFT JOIN raw_records rr ON rr.raw_record_id = s.raw_record_id
|
|
1396
|
+
LEFT JOIN source_files sf ON sf.source_file_id = rr.source_file_id
|
|
1397
|
+
LEFT JOIN turn_counts tc ON tc.session_id = s.session_id
|
|
1398
|
+
LEFT JOIN message_counts mc ON mc.session_id = s.session_id
|
|
1399
|
+
LEFT JOIN tool_call_counts tcc ON tcc.session_id = s.session_id
|
|
1400
|
+
LEFT JOIN tool_result_counts trc ON trc.session_id = s.session_id
|
|
1401
|
+
LEFT JOIN search_doc_counts sdc ON sdc.session_id = s.session_id;
|
|
1402
|
+
|
|
1403
|
+
CREATE VIEW IF NOT EXISTS tool_usage_facts AS
|
|
1404
|
+
WITH result_rollup AS (
|
|
1405
|
+
SELECT tool_call_id,
|
|
1406
|
+
session_id,
|
|
1407
|
+
count(*) AS tool_result_count,
|
|
1408
|
+
max(status) AS result_status,
|
|
1409
|
+
max(is_error) AS is_error,
|
|
1410
|
+
min(exit_code) AS exit_code,
|
|
1411
|
+
sum(COALESCE(duration_ms, 0)) AS duration_ms,
|
|
1412
|
+
max(preview) AS preview
|
|
1413
|
+
FROM tool_results
|
|
1414
|
+
GROUP BY tool_call_id, session_id
|
|
1415
|
+
)
|
|
1416
|
+
SELECT tc.tool_call_id,
|
|
1417
|
+
tc.session_id,
|
|
1418
|
+
s.source_tool,
|
|
1419
|
+
s.source_session_id,
|
|
1420
|
+
s.project_id,
|
|
1421
|
+
p.display_name AS project_name,
|
|
1422
|
+
p.canonical_path AS project_path,
|
|
1423
|
+
tc.turn_id,
|
|
1424
|
+
tc.message_id,
|
|
1425
|
+
tc.event_id,
|
|
1426
|
+
tc.source_call_id,
|
|
1427
|
+
tc.tool_name,
|
|
1428
|
+
tc.canonical_tool_type,
|
|
1429
|
+
tc.command,
|
|
1430
|
+
tc.cwd,
|
|
1431
|
+
tc.path,
|
|
1432
|
+
tc.query,
|
|
1433
|
+
tc.timestamp_start,
|
|
1434
|
+
tc.timestamp_end,
|
|
1435
|
+
CASE
|
|
1436
|
+
WHEN tc.timestamp_start IS NOT NULL AND tc.timestamp_end IS NOT NULL
|
|
1437
|
+
THEN ROUND((julianday(tc.timestamp_end) - julianday(tc.timestamp_start)) * 86400, 3)
|
|
1438
|
+
ELSE NULL
|
|
1439
|
+
END AS call_duration_seconds,
|
|
1440
|
+
tc.status AS call_status,
|
|
1441
|
+
rr.result_status,
|
|
1442
|
+
COALESCE(rr.is_error, 0) AS is_error,
|
|
1443
|
+
rr.exit_code,
|
|
1444
|
+
rr.duration_ms AS result_duration_ms,
|
|
1445
|
+
COALESCE(rr.tool_result_count, 0) AS tool_result_count,
|
|
1446
|
+
rr.preview,
|
|
1447
|
+
tc.raw_record_id
|
|
1448
|
+
FROM tool_calls tc
|
|
1449
|
+
LEFT JOIN sessions s ON s.session_id = tc.session_id
|
|
1450
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
1451
|
+
LEFT JOIN result_rollup rr ON rr.tool_call_id = tc.tool_call_id;
|
|
1452
|
+
|
|
1453
|
+
CREATE VIEW IF NOT EXISTS error_facts AS
|
|
1454
|
+
SELECT 'tool_result:' || tr.tool_result_id AS error_id,
|
|
1455
|
+
'tool_result' AS error_category,
|
|
1456
|
+
s.source_tool,
|
|
1457
|
+
s.project_id,
|
|
1458
|
+
p.display_name AS project_name,
|
|
1459
|
+
tr.session_id,
|
|
1460
|
+
COALESCE(tc.timestamp_end, tc.timestamp_start) AS timestamp,
|
|
1461
|
+
tc.tool_name,
|
|
1462
|
+
tc.canonical_tool_type,
|
|
1463
|
+
COALESCE(tr.status, tc.status) AS status,
|
|
1464
|
+
tr.exit_code,
|
|
1465
|
+
NULL AS message,
|
|
1466
|
+
tr.preview,
|
|
1467
|
+
NULL AS entity_type,
|
|
1468
|
+
NULL AS entity_id,
|
|
1469
|
+
tr.raw_record_id
|
|
1470
|
+
FROM tool_results tr
|
|
1471
|
+
LEFT JOIN tool_calls tc ON tc.tool_call_id = tr.tool_call_id
|
|
1472
|
+
LEFT JOIN sessions s ON s.session_id = tr.session_id
|
|
1473
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
1474
|
+
WHERE tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
|
|
1475
|
+
UNION ALL
|
|
1476
|
+
SELECT 'import_error:' || CAST(ie.error_id AS TEXT) AS error_id,
|
|
1477
|
+
'import_error' AS error_category,
|
|
1478
|
+
COALESCE(rr.source_tool, ib.source_tool) AS source_tool,
|
|
1479
|
+
NULL AS project_id,
|
|
1480
|
+
NULL AS project_name,
|
|
1481
|
+
NULL AS session_id,
|
|
1482
|
+
ie.occurred_at AS timestamp,
|
|
1483
|
+
NULL AS tool_name,
|
|
1484
|
+
NULL AS canonical_tool_type,
|
|
1485
|
+
ie.kind AS status,
|
|
1486
|
+
NULL AS exit_code,
|
|
1487
|
+
ie.message,
|
|
1488
|
+
NULL AS preview,
|
|
1489
|
+
NULL AS entity_type,
|
|
1490
|
+
NULL AS entity_id,
|
|
1491
|
+
ie.raw_record_id
|
|
1492
|
+
FROM import_errors ie
|
|
1493
|
+
LEFT JOIN import_batches ib ON ib.batch_id = ie.batch_id
|
|
1494
|
+
LEFT JOIN raw_records rr ON rr.raw_record_id = ie.raw_record_id
|
|
1495
|
+
UNION ALL
|
|
1496
|
+
SELECT 'uncertainty:' || CAST(u.uncertainty_id AS TEXT) AS error_id,
|
|
1497
|
+
'uncertainty' AS error_category,
|
|
1498
|
+
NULL AS source_tool,
|
|
1499
|
+
NULL AS project_id,
|
|
1500
|
+
NULL AS project_name,
|
|
1501
|
+
CASE WHEN u.entity_type = 'session' THEN u.entity_id ELSE NULL END AS session_id,
|
|
1502
|
+
NULL AS timestamp,
|
|
1503
|
+
NULL AS tool_name,
|
|
1504
|
+
NULL AS canonical_tool_type,
|
|
1505
|
+
u.reason AS status,
|
|
1506
|
+
NULL AS exit_code,
|
|
1507
|
+
u.reason AS message,
|
|
1508
|
+
NULL AS preview,
|
|
1509
|
+
u.entity_type,
|
|
1510
|
+
u.entity_id,
|
|
1511
|
+
NULL AS raw_record_id
|
|
1512
|
+
FROM uncertainties u;
|
|
1513
|
+
|
|
1514
|
+
CREATE VIEW IF NOT EXISTS model_usage AS
|
|
1515
|
+
WITH model_events AS (
|
|
1516
|
+
SELECT s.source_tool,
|
|
1517
|
+
s.project_id,
|
|
1518
|
+
p.display_name AS project_name,
|
|
1519
|
+
p.canonical_path AS project_path,
|
|
1520
|
+
s.session_id,
|
|
1521
|
+
NULL AS turn_id,
|
|
1522
|
+
s.model_first AS model,
|
|
1523
|
+
s.start_ts AS timestamp,
|
|
1524
|
+
'session_first' AS observation_type
|
|
1525
|
+
FROM sessions s
|
|
1526
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
1527
|
+
WHERE s.model_first IS NOT NULL
|
|
1528
|
+
UNION ALL
|
|
1529
|
+
SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
|
|
1530
|
+
s.session_id, NULL AS turn_id, s.model_last AS model, s.end_ts AS timestamp,
|
|
1531
|
+
'session_last' AS observation_type
|
|
1532
|
+
FROM sessions s
|
|
1533
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
1534
|
+
WHERE s.model_last IS NOT NULL
|
|
1535
|
+
UNION ALL
|
|
1536
|
+
SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
|
|
1537
|
+
t.session_id, t.turn_id, t.model, t.start_ts AS timestamp, 'turn' AS observation_type
|
|
1538
|
+
FROM turns t
|
|
1539
|
+
LEFT JOIN sessions s ON s.session_id = t.session_id
|
|
1540
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
1541
|
+
WHERE t.model IS NOT NULL
|
|
1542
|
+
UNION ALL
|
|
1543
|
+
SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
|
|
1544
|
+
m.session_id, m.turn_id, m.model, m.timestamp, 'message' AS observation_type
|
|
1545
|
+
FROM messages m
|
|
1546
|
+
LEFT JOIN sessions s ON s.session_id = m.session_id
|
|
1547
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
1548
|
+
WHERE m.model IS NOT NULL
|
|
1549
|
+
)
|
|
1550
|
+
SELECT source_tool,
|
|
1551
|
+
project_id,
|
|
1552
|
+
project_name,
|
|
1553
|
+
project_path,
|
|
1554
|
+
model,
|
|
1555
|
+
count(DISTINCT session_id) AS session_count,
|
|
1556
|
+
count(DISTINCT turn_id) AS turn_count,
|
|
1557
|
+
count(*) AS observation_count,
|
|
1558
|
+
sum(CASE WHEN observation_type = 'message' THEN 1 ELSE 0 END) AS message_count,
|
|
1559
|
+
min(timestamp) AS first_seen_ts,
|
|
1560
|
+
max(timestamp) AS last_seen_ts
|
|
1561
|
+
FROM model_events
|
|
1562
|
+
GROUP BY source_tool, project_id, project_name, project_path, model;
|
|
1563
|
+
|
|
1564
|
+
CREATE VIEW IF NOT EXISTS project_activity AS
|
|
1565
|
+
SELECT s.source_tool,
|
|
1566
|
+
s.project_id,
|
|
1567
|
+
COALESCE(p.display_name, s.cwd_initial, '(unknown)') AS project_name,
|
|
1568
|
+
p.canonical_path AS project_path,
|
|
1569
|
+
min(s.start_ts) AS first_session_ts,
|
|
1570
|
+
max(COALESCE(s.end_ts, s.start_ts)) AS latest_session_ts,
|
|
1571
|
+
count(DISTINCT s.session_id) AS session_count,
|
|
1572
|
+
count(DISTINCT CASE WHEN s.timeline_confidence = 'low' THEN s.session_id END)
|
|
1573
|
+
AS low_confidence_session_count,
|
|
1574
|
+
count(DISTINCT t.turn_id) AS turn_count,
|
|
1575
|
+
count(DISTINCT m.message_id) AS message_count,
|
|
1576
|
+
count(DISTINCT tc.tool_call_id) AS tool_call_count,
|
|
1577
|
+
count(DISTINCT tr.tool_result_id) AS tool_result_count,
|
|
1578
|
+
count(DISTINCT CASE
|
|
1579
|
+
WHEN tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
|
|
1580
|
+
THEN tr.tool_result_id
|
|
1581
|
+
END) AS tool_error_count,
|
|
1582
|
+
count(DISTINCT sd.doc_id) AS search_doc_count
|
|
1583
|
+
FROM sessions s
|
|
1584
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
1585
|
+
LEFT JOIN turns t ON t.session_id = s.session_id
|
|
1586
|
+
LEFT JOIN messages m ON m.session_id = s.session_id
|
|
1587
|
+
LEFT JOIN tool_calls tc ON tc.session_id = s.session_id
|
|
1588
|
+
LEFT JOIN tool_results tr ON tr.session_id = s.session_id
|
|
1589
|
+
LEFT JOIN search_docs sd ON sd.session_id = s.session_id
|
|
1590
|
+
GROUP BY s.source_tool, s.project_id, p.display_name, s.cwd_initial, p.canonical_path;
|
|
1591
|
+
`;
|
|
1592
|
+
|
|
1593
|
+
// src/core/schema/sql/004_tantivy_checkpoint.ts
|
|
1594
|
+
var SQL_004_TANTIVY_CHECKPOINT = String.raw`
|
|
1595
|
+
ALTER TABLE search_index_status ADD COLUMN last_indexed_rowid INTEGER;
|
|
1596
|
+
ALTER TABLE search_index_status ADD COLUMN schema_fingerprint TEXT;
|
|
1597
|
+
`;
|
|
1598
|
+
|
|
1517
1599
|
// src/core/schema/migrate.ts
|
|
1518
1600
|
var MIGRATIONS = [
|
|
1519
1601
|
{ version: 1, name: "init", sql: SQL_001_INIT },
|
|
1520
|
-
{ version: 2, name: "search_index_status", sql: SQL_002_SEARCH_INDEX_STATUS }
|
|
1602
|
+
{ version: 2, name: "search_index_status", sql: SQL_002_SEARCH_INDEX_STATUS },
|
|
1603
|
+
{ version: 3, name: "analytics_views", sql: SQL_003_ANALYTICS_VIEWS },
|
|
1604
|
+
{ version: 4, name: "tantivy_checkpoint", sql: SQL_004_TANTIVY_CHECKPOINT }
|
|
1521
1605
|
];
|
|
1522
1606
|
function runMigrations(db) {
|
|
1523
1607
|
db.exec(`
|
|
@@ -1625,52 +1709,1026 @@ async function openBundle(rootPath) {
|
|
|
1625
1709
|
`no manifest.json in ${resolved} \u2014 initialize first with \`prosa init --store ${resolved}\``
|
|
1626
1710
|
);
|
|
1627
1711
|
}
|
|
1628
|
-
const manifest = JSON.parse(await readFile(paths.manifest, "utf8"));
|
|
1629
|
-
await mkdir(paths.search, { recursive: true });
|
|
1630
|
-
await mkdir(paths.tantivy, { recursive: true });
|
|
1631
|
-
const db = openDb(paths.db);
|
|
1632
|
-
runMigrations(db);
|
|
1633
|
-
const currentVersion = currentSchemaVersion(db);
|
|
1634
|
-
if (currentVersion !== PROSA_SCHEMA_VERSION) {
|
|
1635
|
-
closeDb(db);
|
|
1636
|
-
throw new Error(`schema version mismatch (db=${currentVersion}, code=${PROSA_SCHEMA_VERSION})`);
|
|
1637
|
-
}
|
|
1638
|
-
if (manifest.parser_version !== PROSA_PARSER_VERSION) {
|
|
1639
|
-
manifest.parser_version = PROSA_PARSER_VERSION;
|
|
1640
|
-
await writeFile(paths.manifest, `${JSON.stringify(manifest, null, 2)}
|
|
1641
|
-
`, "utf8");
|
|
1642
|
-
}
|
|
1643
|
-
return { path: resolved, db, manifest, paths };
|
|
1712
|
+
const manifest = JSON.parse(await readFile(paths.manifest, "utf8"));
|
|
1713
|
+
await mkdir(paths.search, { recursive: true });
|
|
1714
|
+
await mkdir(paths.tantivy, { recursive: true });
|
|
1715
|
+
const db = openDb(paths.db);
|
|
1716
|
+
runMigrations(db);
|
|
1717
|
+
const currentVersion = currentSchemaVersion(db);
|
|
1718
|
+
if (currentVersion !== PROSA_SCHEMA_VERSION) {
|
|
1719
|
+
closeDb(db);
|
|
1720
|
+
throw new Error(`schema version mismatch (db=${currentVersion}, code=${PROSA_SCHEMA_VERSION})`);
|
|
1721
|
+
}
|
|
1722
|
+
if (manifest.parser_version !== PROSA_PARSER_VERSION) {
|
|
1723
|
+
manifest.parser_version = PROSA_PARSER_VERSION;
|
|
1724
|
+
await writeFile(paths.manifest, `${JSON.stringify(manifest, null, 2)}
|
|
1725
|
+
`, "utf8");
|
|
1726
|
+
}
|
|
1727
|
+
return { path: resolved, db, manifest, paths };
|
|
1728
|
+
}
|
|
1729
|
+
async function openOrInitBundle(rootPath) {
|
|
1730
|
+
const resolved = path.resolve(rootPath);
|
|
1731
|
+
const paths = bundlePaths(resolved);
|
|
1732
|
+
const dirStat = await stat(resolved).catch(() => null);
|
|
1733
|
+
if (dirStat && !dirStat.isDirectory()) {
|
|
1734
|
+
throw new Error(`bundle path not found or not a directory: ${resolved}`);
|
|
1735
|
+
}
|
|
1736
|
+
if (!dirStat || !await exists(paths.manifest)) {
|
|
1737
|
+
return await initBundle(resolved);
|
|
1738
|
+
}
|
|
1739
|
+
return await openBundle(resolved);
|
|
1740
|
+
}
|
|
1741
|
+
function closeBundle(bundle) {
|
|
1742
|
+
closeDb(bundle.db);
|
|
1743
|
+
}
|
|
1744
|
+
|
|
1745
|
+
// src/services/analytics.ts
|
|
1746
|
+
init_limits();
|
|
1747
|
+
|
|
1748
|
+
// src/services/export/parquet.ts
|
|
1749
|
+
import { mkdir as mkdir2, rm, writeFile as writeFile2 } from "fs/promises";
|
|
1750
|
+
import path2 from "path";
|
|
1751
|
+
import { DuckDBConnection } from "@duckdb/node-api";
|
|
1752
|
+
init_errors();
|
|
1753
|
+
var PARQUET_TABLES = [
|
|
1754
|
+
"objects",
|
|
1755
|
+
"source_files",
|
|
1756
|
+
"import_batches",
|
|
1757
|
+
"raw_records",
|
|
1758
|
+
"import_errors",
|
|
1759
|
+
"uncertainties",
|
|
1760
|
+
"projects",
|
|
1761
|
+
"sessions",
|
|
1762
|
+
"turns",
|
|
1763
|
+
"events",
|
|
1764
|
+
"messages",
|
|
1765
|
+
"content_blocks",
|
|
1766
|
+
"tool_calls",
|
|
1767
|
+
"tool_results",
|
|
1768
|
+
"artifacts",
|
|
1769
|
+
"edges",
|
|
1770
|
+
"search_docs"
|
|
1771
|
+
];
|
|
1772
|
+
async function exportBundleParquet(options) {
|
|
1773
|
+
const snapshot = await openBundleSnapshot(options.bundlePath);
|
|
1774
|
+
const outDir = path2.resolve(options.outDir ?? snapshot.defaultOutDir);
|
|
1775
|
+
await mkdir2(outDir, { recursive: true });
|
|
1776
|
+
const files = Object.fromEntries(
|
|
1777
|
+
PARQUET_TABLES.map((table) => [table, path2.join(outDir, `${table}.parquet`)])
|
|
1778
|
+
);
|
|
1779
|
+
const manifestPath = path2.join(outDir, "manifest.json");
|
|
1780
|
+
for (const file of [...Object.values(files), manifestPath]) {
|
|
1781
|
+
await rm(file, { force: true });
|
|
1782
|
+
}
|
|
1783
|
+
const connection = await createDuckDbConnection();
|
|
1784
|
+
try {
|
|
1785
|
+
await attachSqlite(connection, snapshot.dbPath);
|
|
1786
|
+
for (const table of PARQUET_TABLES) {
|
|
1787
|
+
await connection.run(
|
|
1788
|
+
`COPY (SELECT * FROM prosa.${quoteIdentifier(table)}) TO ${sqlString(files[table])} (FORMAT parquet, COMPRESSION zstd, COMPRESSION_LEVEL 1, ROW_GROUP_SIZE 100000)`
|
|
1789
|
+
);
|
|
1790
|
+
}
|
|
1791
|
+
} finally {
|
|
1792
|
+
connection.closeSync();
|
|
1793
|
+
}
|
|
1794
|
+
const manifest = {
|
|
1795
|
+
exported_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1796
|
+
source_db: snapshot.dbPath,
|
|
1797
|
+
schema_version: snapshot.schemaVersion,
|
|
1798
|
+
parser_version: snapshot.parserVersion,
|
|
1799
|
+
tables: Object.fromEntries(
|
|
1800
|
+
PARQUET_TABLES.map((table) => [
|
|
1801
|
+
table,
|
|
1802
|
+
{
|
|
1803
|
+
file: path2.basename(files[table]),
|
|
1804
|
+
rows: snapshot.counts[table]
|
|
1805
|
+
}
|
|
1806
|
+
])
|
|
1807
|
+
)
|
|
1808
|
+
};
|
|
1809
|
+
await writeFile2(manifestPath, `${JSON.stringify(manifest, null, 2)}
|
|
1810
|
+
`, "utf8");
|
|
1811
|
+
return { outDir, manifestPath, files, counts: snapshot.counts };
|
|
1812
|
+
}
|
|
1813
|
+
async function queryDuckDbParquet(options) {
|
|
1814
|
+
const parquetDir = path2.resolve(options.parquetDir);
|
|
1815
|
+
const connection = await createDuckDbConnection();
|
|
1816
|
+
try {
|
|
1817
|
+
for (const table of PARQUET_TABLES) {
|
|
1818
|
+
await connection.run(
|
|
1819
|
+
`CREATE OR REPLACE VIEW ${quoteIdentifier(table)} AS SELECT * FROM read_parquet(${sqlString(
|
|
1820
|
+
path2.join(parquetDir, `${table}.parquet`)
|
|
1821
|
+
)})`
|
|
1822
|
+
);
|
|
1823
|
+
}
|
|
1824
|
+
await createAnalyticsViews(connection);
|
|
1825
|
+
const reader = await connection.runAndReadAll(options.sql);
|
|
1826
|
+
return {
|
|
1827
|
+
columns: reader.deduplicatedColumnNames(),
|
|
1828
|
+
rows: reader.getRowObjectsJson()
|
|
1829
|
+
};
|
|
1830
|
+
} catch (error) {
|
|
1831
|
+
if (isMissingParquetError(error)) {
|
|
1832
|
+
throw new Error(
|
|
1833
|
+
`Parquet export not found in ${parquetDir}; run \`prosa export parquet --store <path>\` first`
|
|
1834
|
+
);
|
|
1835
|
+
}
|
|
1836
|
+
throw error;
|
|
1837
|
+
} finally {
|
|
1838
|
+
connection.closeSync();
|
|
1839
|
+
}
|
|
1840
|
+
}
|
|
1841
|
+
async function createDuckDbConnection() {
|
|
1842
|
+
return DuckDBConnection.create();
|
|
1843
|
+
}
|
|
1844
|
+
async function attachSqlite(connection, dbPath) {
|
|
1845
|
+
try {
|
|
1846
|
+
await connection.run("INSTALL sqlite");
|
|
1847
|
+
await connection.run("LOAD sqlite");
|
|
1848
|
+
await connection.run(`ATTACH ${sqlString(dbPath)} AS prosa (TYPE sqlite)`);
|
|
1849
|
+
} catch (error) {
|
|
1850
|
+
throw new Error(
|
|
1851
|
+
`DuckDB could not attach prosa.sqlite via the sqlite extension: ${getErrorMessage(error)}`
|
|
1852
|
+
);
|
|
1853
|
+
}
|
|
1854
|
+
}
|
|
1855
|
+
async function createAnalyticsViews(connection) {
|
|
1856
|
+
await connection.run(`
|
|
1857
|
+
CREATE OR REPLACE VIEW session_facts AS
|
|
1858
|
+
WITH turn_counts AS (
|
|
1859
|
+
SELECT session_id, count(*) AS turn_count
|
|
1860
|
+
FROM turns
|
|
1861
|
+
GROUP BY session_id
|
|
1862
|
+
),
|
|
1863
|
+
message_counts AS (
|
|
1864
|
+
SELECT session_id,
|
|
1865
|
+
count(*) AS message_count,
|
|
1866
|
+
sum(CASE WHEN role = 'user' THEN 1 ELSE 0 END) AS user_message_count,
|
|
1867
|
+
sum(CASE WHEN role = 'assistant' THEN 1 ELSE 0 END) AS assistant_message_count
|
|
1868
|
+
FROM messages
|
|
1869
|
+
GROUP BY session_id
|
|
1870
|
+
),
|
|
1871
|
+
tool_call_counts AS (
|
|
1872
|
+
SELECT session_id,
|
|
1873
|
+
count(*) AS tool_call_count,
|
|
1874
|
+
sum(CASE WHEN status = 'error' THEN 1 ELSE 0 END) AS tool_call_error_count
|
|
1875
|
+
FROM tool_calls
|
|
1876
|
+
GROUP BY session_id
|
|
1877
|
+
),
|
|
1878
|
+
tool_result_counts AS (
|
|
1879
|
+
SELECT session_id,
|
|
1880
|
+
count(*) AS tool_result_count,
|
|
1881
|
+
sum(CASE WHEN is_error = 1 OR (exit_code IS NOT NULL AND exit_code <> 0)
|
|
1882
|
+
THEN 1 ELSE 0 END) AS tool_result_error_count,
|
|
1883
|
+
sum(COALESCE(duration_ms, 0)) AS tool_duration_ms
|
|
1884
|
+
FROM tool_results
|
|
1885
|
+
GROUP BY session_id
|
|
1886
|
+
),
|
|
1887
|
+
search_doc_counts AS (
|
|
1888
|
+
SELECT session_id, count(*) AS search_doc_count
|
|
1889
|
+
FROM search_docs
|
|
1890
|
+
WHERE session_id IS NOT NULL
|
|
1891
|
+
GROUP BY session_id
|
|
1892
|
+
)
|
|
1893
|
+
SELECT s.session_id,
|
|
1894
|
+
s.source_tool,
|
|
1895
|
+
s.source_session_id,
|
|
1896
|
+
s.project_id,
|
|
1897
|
+
p.display_name AS project_name,
|
|
1898
|
+
p.canonical_path AS project_path,
|
|
1899
|
+
s.parent_session_id,
|
|
1900
|
+
s.is_subagent,
|
|
1901
|
+
s.agent_role,
|
|
1902
|
+
s.agent_nickname,
|
|
1903
|
+
s.title,
|
|
1904
|
+
s.start_ts,
|
|
1905
|
+
s.end_ts,
|
|
1906
|
+
CASE
|
|
1907
|
+
WHEN s.start_ts IS NOT NULL AND s.end_ts IS NOT NULL
|
|
1908
|
+
THEN date_diff('millisecond', TRY_CAST(s.start_ts AS TIMESTAMP),
|
|
1909
|
+
TRY_CAST(s.end_ts AS TIMESTAMP)) / 1000.0
|
|
1910
|
+
ELSE NULL
|
|
1911
|
+
END AS duration_seconds,
|
|
1912
|
+
s.cwd_initial,
|
|
1913
|
+
s.git_branch_initial,
|
|
1914
|
+
s.model_first,
|
|
1915
|
+
s.model_last,
|
|
1916
|
+
s.status,
|
|
1917
|
+
s.timeline_confidence,
|
|
1918
|
+
sf.path AS source_file_path,
|
|
1919
|
+
COALESCE(tc.turn_count, 0) AS turn_count,
|
|
1920
|
+
COALESCE(mc.message_count, 0) AS message_count,
|
|
1921
|
+
COALESCE(mc.user_message_count, 0) AS user_message_count,
|
|
1922
|
+
COALESCE(mc.assistant_message_count, 0) AS assistant_message_count,
|
|
1923
|
+
COALESCE(tcc.tool_call_count, 0) AS tool_call_count,
|
|
1924
|
+
COALESCE(trc.tool_result_count, 0) AS tool_result_count,
|
|
1925
|
+
COALESCE(tcc.tool_call_error_count, 0)
|
|
1926
|
+
+ COALESCE(trc.tool_result_error_count, 0) AS tool_error_count,
|
|
1927
|
+
COALESCE(trc.tool_duration_ms, 0) AS tool_duration_ms,
|
|
1928
|
+
COALESCE(sdc.search_doc_count, 0) AS search_doc_count
|
|
1929
|
+
FROM sessions s
|
|
1930
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
1931
|
+
LEFT JOIN raw_records rr ON rr.raw_record_id = s.raw_record_id
|
|
1932
|
+
LEFT JOIN source_files sf ON sf.source_file_id = rr.source_file_id
|
|
1933
|
+
LEFT JOIN turn_counts tc ON tc.session_id = s.session_id
|
|
1934
|
+
LEFT JOIN message_counts mc ON mc.session_id = s.session_id
|
|
1935
|
+
LEFT JOIN tool_call_counts tcc ON tcc.session_id = s.session_id
|
|
1936
|
+
LEFT JOIN tool_result_counts trc ON trc.session_id = s.session_id
|
|
1937
|
+
LEFT JOIN search_doc_counts sdc ON sdc.session_id = s.session_id
|
|
1938
|
+
`);
|
|
1939
|
+
await connection.run(`
|
|
1940
|
+
CREATE OR REPLACE VIEW tool_usage_facts AS
|
|
1941
|
+
WITH result_rollup AS (
|
|
1942
|
+
SELECT tool_call_id,
|
|
1943
|
+
session_id,
|
|
1944
|
+
count(*) AS tool_result_count,
|
|
1945
|
+
max(status) AS result_status,
|
|
1946
|
+
max(is_error) AS is_error,
|
|
1947
|
+
min(exit_code) AS exit_code,
|
|
1948
|
+
sum(COALESCE(duration_ms, 0)) AS duration_ms,
|
|
1949
|
+
max(preview) AS preview
|
|
1950
|
+
FROM tool_results
|
|
1951
|
+
GROUP BY tool_call_id, session_id
|
|
1952
|
+
)
|
|
1953
|
+
SELECT tc.tool_call_id,
|
|
1954
|
+
tc.session_id,
|
|
1955
|
+
s.source_tool,
|
|
1956
|
+
s.source_session_id,
|
|
1957
|
+
s.project_id,
|
|
1958
|
+
p.display_name AS project_name,
|
|
1959
|
+
p.canonical_path AS project_path,
|
|
1960
|
+
tc.turn_id,
|
|
1961
|
+
tc.message_id,
|
|
1962
|
+
tc.event_id,
|
|
1963
|
+
tc.source_call_id,
|
|
1964
|
+
tc.tool_name,
|
|
1965
|
+
tc.canonical_tool_type,
|
|
1966
|
+
tc.command,
|
|
1967
|
+
tc.cwd,
|
|
1968
|
+
tc.path,
|
|
1969
|
+
tc.query,
|
|
1970
|
+
tc.timestamp_start,
|
|
1971
|
+
tc.timestamp_end,
|
|
1972
|
+
CASE
|
|
1973
|
+
WHEN tc.timestamp_start IS NOT NULL AND tc.timestamp_end IS NOT NULL
|
|
1974
|
+
THEN date_diff('millisecond', TRY_CAST(tc.timestamp_start AS TIMESTAMP),
|
|
1975
|
+
TRY_CAST(tc.timestamp_end AS TIMESTAMP)) / 1000.0
|
|
1976
|
+
ELSE NULL
|
|
1977
|
+
END AS call_duration_seconds,
|
|
1978
|
+
tc.status AS call_status,
|
|
1979
|
+
rr.result_status,
|
|
1980
|
+
COALESCE(rr.is_error, 0) AS is_error,
|
|
1981
|
+
rr.exit_code,
|
|
1982
|
+
rr.duration_ms AS result_duration_ms,
|
|
1983
|
+
COALESCE(rr.tool_result_count, 0) AS tool_result_count,
|
|
1984
|
+
rr.preview,
|
|
1985
|
+
tc.raw_record_id
|
|
1986
|
+
FROM tool_calls tc
|
|
1987
|
+
LEFT JOIN sessions s ON s.session_id = tc.session_id
|
|
1988
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
1989
|
+
LEFT JOIN result_rollup rr ON rr.tool_call_id = tc.tool_call_id
|
|
1990
|
+
`);
|
|
1991
|
+
await connection.run(`
|
|
1992
|
+
CREATE OR REPLACE VIEW error_facts AS
|
|
1993
|
+
SELECT 'tool_result:' || tr.tool_result_id AS error_id,
|
|
1994
|
+
'tool_result' AS error_category,
|
|
1995
|
+
s.source_tool,
|
|
1996
|
+
s.project_id,
|
|
1997
|
+
p.display_name AS project_name,
|
|
1998
|
+
tr.session_id,
|
|
1999
|
+
COALESCE(tc.timestamp_end, tc.timestamp_start) AS timestamp,
|
|
2000
|
+
tc.tool_name,
|
|
2001
|
+
tc.canonical_tool_type,
|
|
2002
|
+
COALESCE(tr.status, tc.status) AS status,
|
|
2003
|
+
tr.exit_code,
|
|
2004
|
+
NULL AS message,
|
|
2005
|
+
tr.preview,
|
|
2006
|
+
NULL AS entity_type,
|
|
2007
|
+
NULL AS entity_id,
|
|
2008
|
+
tr.raw_record_id
|
|
2009
|
+
FROM tool_results tr
|
|
2010
|
+
LEFT JOIN tool_calls tc ON tc.tool_call_id = tr.tool_call_id
|
|
2011
|
+
LEFT JOIN sessions s ON s.session_id = tr.session_id
|
|
2012
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
2013
|
+
WHERE tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
|
|
2014
|
+
UNION ALL
|
|
2015
|
+
SELECT 'import_error:' || CAST(ie.error_id AS VARCHAR) AS error_id,
|
|
2016
|
+
'import_error' AS error_category,
|
|
2017
|
+
COALESCE(rr.source_tool, ib.source_tool) AS source_tool,
|
|
2018
|
+
NULL AS project_id,
|
|
2019
|
+
NULL AS project_name,
|
|
2020
|
+
NULL AS session_id,
|
|
2021
|
+
ie.occurred_at AS timestamp,
|
|
2022
|
+
NULL AS tool_name,
|
|
2023
|
+
NULL AS canonical_tool_type,
|
|
2024
|
+
ie.kind AS status,
|
|
2025
|
+
NULL AS exit_code,
|
|
2026
|
+
ie.message,
|
|
2027
|
+
NULL AS preview,
|
|
2028
|
+
NULL AS entity_type,
|
|
2029
|
+
NULL AS entity_id,
|
|
2030
|
+
ie.raw_record_id
|
|
2031
|
+
FROM import_errors ie
|
|
2032
|
+
LEFT JOIN import_batches ib ON ib.batch_id = ie.batch_id
|
|
2033
|
+
LEFT JOIN raw_records rr ON rr.raw_record_id = ie.raw_record_id
|
|
2034
|
+
UNION ALL
|
|
2035
|
+
SELECT 'uncertainty:' || CAST(u.uncertainty_id AS VARCHAR) AS error_id,
|
|
2036
|
+
'uncertainty' AS error_category,
|
|
2037
|
+
NULL AS source_tool,
|
|
2038
|
+
NULL AS project_id,
|
|
2039
|
+
NULL AS project_name,
|
|
2040
|
+
CASE WHEN u.entity_type = 'session' THEN u.entity_id ELSE NULL END AS session_id,
|
|
2041
|
+
NULL AS timestamp,
|
|
2042
|
+
NULL AS tool_name,
|
|
2043
|
+
NULL AS canonical_tool_type,
|
|
2044
|
+
u.reason AS status,
|
|
2045
|
+
NULL AS exit_code,
|
|
2046
|
+
u.reason AS message,
|
|
2047
|
+
NULL AS preview,
|
|
2048
|
+
u.entity_type,
|
|
2049
|
+
u.entity_id,
|
|
2050
|
+
NULL AS raw_record_id
|
|
2051
|
+
FROM uncertainties u
|
|
2052
|
+
`);
|
|
2053
|
+
await connection.run(`
|
|
2054
|
+
CREATE OR REPLACE VIEW model_usage AS
|
|
2055
|
+
WITH model_events AS (
|
|
2056
|
+
SELECT s.source_tool,
|
|
2057
|
+
s.project_id,
|
|
2058
|
+
p.display_name AS project_name,
|
|
2059
|
+
p.canonical_path AS project_path,
|
|
2060
|
+
s.session_id,
|
|
2061
|
+
NULL AS turn_id,
|
|
2062
|
+
s.model_first AS model,
|
|
2063
|
+
s.start_ts AS timestamp,
|
|
2064
|
+
'session_first' AS observation_type
|
|
2065
|
+
FROM sessions s
|
|
2066
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
2067
|
+
WHERE s.model_first IS NOT NULL
|
|
2068
|
+
UNION ALL
|
|
2069
|
+
SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
|
|
2070
|
+
s.session_id, NULL AS turn_id, s.model_last AS model, s.end_ts AS timestamp,
|
|
2071
|
+
'session_last' AS observation_type
|
|
2072
|
+
FROM sessions s
|
|
2073
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
2074
|
+
WHERE s.model_last IS NOT NULL
|
|
2075
|
+
UNION ALL
|
|
2076
|
+
SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
|
|
2077
|
+
t.session_id, t.turn_id, t.model, t.start_ts AS timestamp, 'turn' AS observation_type
|
|
2078
|
+
FROM turns t
|
|
2079
|
+
LEFT JOIN sessions s ON s.session_id = t.session_id
|
|
2080
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
2081
|
+
WHERE t.model IS NOT NULL
|
|
2082
|
+
UNION ALL
|
|
2083
|
+
SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
|
|
2084
|
+
m.session_id, m.turn_id, m.model, m.timestamp, 'message' AS observation_type
|
|
2085
|
+
FROM messages m
|
|
2086
|
+
LEFT JOIN sessions s ON s.session_id = m.session_id
|
|
2087
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
2088
|
+
WHERE m.model IS NOT NULL
|
|
2089
|
+
)
|
|
2090
|
+
SELECT source_tool,
|
|
2091
|
+
project_id,
|
|
2092
|
+
project_name,
|
|
2093
|
+
project_path,
|
|
2094
|
+
model,
|
|
2095
|
+
count(DISTINCT session_id) AS session_count,
|
|
2096
|
+
count(DISTINCT turn_id) AS turn_count,
|
|
2097
|
+
count(*) AS observation_count,
|
|
2098
|
+
sum(CASE WHEN observation_type = 'message' THEN 1 ELSE 0 END) AS message_count,
|
|
2099
|
+
min(timestamp) AS first_seen_ts,
|
|
2100
|
+
max(timestamp) AS last_seen_ts
|
|
2101
|
+
FROM model_events
|
|
2102
|
+
GROUP BY source_tool, project_id, project_name, project_path, model
|
|
2103
|
+
`);
|
|
2104
|
+
await connection.run(`
|
|
2105
|
+
CREATE OR REPLACE VIEW project_activity AS
|
|
2106
|
+
SELECT s.source_tool,
|
|
2107
|
+
s.project_id,
|
|
2108
|
+
COALESCE(p.display_name, s.cwd_initial, '(unknown)') AS project_name,
|
|
2109
|
+
p.canonical_path AS project_path,
|
|
2110
|
+
min(s.start_ts) AS first_session_ts,
|
|
2111
|
+
max(COALESCE(s.end_ts, s.start_ts)) AS latest_session_ts,
|
|
2112
|
+
count(DISTINCT s.session_id) AS session_count,
|
|
2113
|
+
count(DISTINCT CASE WHEN s.timeline_confidence = 'low' THEN s.session_id END)
|
|
2114
|
+
AS low_confidence_session_count,
|
|
2115
|
+
count(DISTINCT t.turn_id) AS turn_count,
|
|
2116
|
+
count(DISTINCT m.message_id) AS message_count,
|
|
2117
|
+
count(DISTINCT tc.tool_call_id) AS tool_call_count,
|
|
2118
|
+
count(DISTINCT tr.tool_result_id) AS tool_result_count,
|
|
2119
|
+
count(DISTINCT CASE
|
|
2120
|
+
WHEN tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
|
|
2121
|
+
THEN tr.tool_result_id
|
|
2122
|
+
END) AS tool_error_count,
|
|
2123
|
+
count(DISTINCT sd.doc_id) AS search_doc_count
|
|
2124
|
+
FROM sessions s
|
|
2125
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
2126
|
+
LEFT JOIN turns t ON t.session_id = s.session_id
|
|
2127
|
+
LEFT JOIN messages m ON m.session_id = s.session_id
|
|
2128
|
+
LEFT JOIN tool_calls tc ON tc.session_id = s.session_id
|
|
2129
|
+
LEFT JOIN tool_results tr ON tr.session_id = s.session_id
|
|
2130
|
+
LEFT JOIN search_docs sd ON sd.session_id = s.session_id
|
|
2131
|
+
GROUP BY s.source_tool, s.project_id, p.display_name, s.cwd_initial, p.canonical_path
|
|
2132
|
+
`);
|
|
2133
|
+
}
|
|
2134
|
+
async function openBundleSnapshot(bundlePath) {
|
|
2135
|
+
const bundle = await openBundle(bundlePath);
|
|
2136
|
+
try {
|
|
2137
|
+
const counts = Object.fromEntries(
|
|
2138
|
+
PARQUET_TABLES.map((table) => {
|
|
2139
|
+
const row = bundle.db.prepare(`SELECT count(*) AS n FROM ${quoteIdentifier(table)}`).get();
|
|
2140
|
+
return [table, row?.n ?? 0];
|
|
2141
|
+
})
|
|
2142
|
+
);
|
|
2143
|
+
return {
|
|
2144
|
+
dbPath: bundle.paths.db,
|
|
2145
|
+
schemaVersion: bundle.manifest.schema_version,
|
|
2146
|
+
parserVersion: bundle.manifest.parser_version,
|
|
2147
|
+
defaultOutDir: bundle.paths.parquet,
|
|
2148
|
+
counts
|
|
2149
|
+
};
|
|
2150
|
+
} finally {
|
|
2151
|
+
closeBundle(bundle);
|
|
2152
|
+
}
|
|
2153
|
+
}
|
|
2154
|
+
function quoteIdentifier(value) {
|
|
2155
|
+
return `"${value.replace(/"/g, '""')}"`;
|
|
2156
|
+
}
|
|
2157
|
+
function sqlString(value) {
|
|
2158
|
+
return `'${value.replace(/'/g, "''")}'`;
|
|
2159
|
+
}
|
|
2160
|
+
function isMissingParquetError(error) {
|
|
2161
|
+
const message = getErrorMessage(error);
|
|
2162
|
+
return /No files found|does not exist|not found/i.test(message) && /\.parquet/i.test(message);
|
|
2163
|
+
}
|
|
2164
|
+
|
|
2165
|
+
// src/services/analytics.ts
|
|
2166
|
+
var ANALYTICS_REPORTS = ["sessions", "tools", "errors", "models", "projects"];
|
|
2167
|
+
async function runAnalyticsReport(options) {
|
|
2168
|
+
return queryDuckDbParquet({
|
|
2169
|
+
parquetDir: options.parquetDir,
|
|
2170
|
+
sql: buildAnalyticsSql(options.report, options.filters ?? {}, "duckdb")
|
|
2171
|
+
});
|
|
2172
|
+
}
|
|
2173
|
+
function runAnalyticsReportFromBundle(options) {
|
|
2174
|
+
const sql = buildAnalyticsSql(options.report, options.filters ?? {}, "sqlite");
|
|
2175
|
+
const stmt = options.bundle.db.prepare(sql);
|
|
2176
|
+
const rows = stmt.all();
|
|
2177
|
+
const columns = stmt.columns().map((column) => column.name);
|
|
2178
|
+
return { columns, rows };
|
|
2179
|
+
}
|
|
2180
|
+
function buildAnalyticsSql(report, filters, dialect) {
|
|
2181
|
+
switch (report) {
|
|
2182
|
+
case "sessions":
|
|
2183
|
+
return buildSessionsSql(filters, dialect);
|
|
2184
|
+
case "tools":
|
|
2185
|
+
return buildToolsSql(filters, dialect);
|
|
2186
|
+
case "errors":
|
|
2187
|
+
return buildErrorsSql(filters, dialect);
|
|
2188
|
+
case "models":
|
|
2189
|
+
return buildModelsSql(filters, dialect);
|
|
2190
|
+
case "projects":
|
|
2191
|
+
return buildProjectsSql(filters, dialect);
|
|
2192
|
+
}
|
|
2193
|
+
}
|
|
2194
|
+
function buildSessionsSql(filters, dialect) {
|
|
2195
|
+
const where = buildWhere([
|
|
2196
|
+
sourceFilter(filters),
|
|
2197
|
+
timeFilter("start_ts", filters),
|
|
2198
|
+
projectFilter(filters, dialect),
|
|
2199
|
+
filters.sessionId ? `session_id = ${sqlString2(filters.sessionId)}` : null,
|
|
2200
|
+
filters.sourcePathSubstring ? `source_file_path LIKE ${sqlString2(`%${escapeLike(filters.sourcePathSubstring)}%`)} ESCAPE '\\'` : null
|
|
2201
|
+
]);
|
|
2202
|
+
return `
|
|
2203
|
+
SELECT start_ts, source_tool, project_name, source_file_path, session_id,
|
|
2204
|
+
source_session_id, model_last, duration_seconds,
|
|
2205
|
+
message_count, tool_call_count, tool_result_count, tool_error_count,
|
|
2206
|
+
tool_duration_ms, timeline_confidence, title
|
|
2207
|
+
FROM session_facts
|
|
2208
|
+
${where}
|
|
2209
|
+
ORDER BY start_ts DESC NULLS LAST
|
|
2210
|
+
LIMIT ${limit(filters)}
|
|
2211
|
+
`;
|
|
2212
|
+
}
|
|
2213
|
+
function buildToolsSql(filters, dialect) {
|
|
2214
|
+
const where = buildWhere([
|
|
2215
|
+
sourceFilter(filters),
|
|
2216
|
+
timeFilter("timestamp_start", filters),
|
|
2217
|
+
projectFilter(filters, dialect),
|
|
2218
|
+
filters.toolName ? `tool_name = ${sqlString2(filters.toolName)}` : null,
|
|
2219
|
+
filters.canonicalType ? `canonical_tool_type = ${sqlString2(filters.canonicalType)}` : null,
|
|
2220
|
+
filters.errorsOnly ? `(is_error = 1 OR call_status = 'error')` : null
|
|
2221
|
+
]);
|
|
2222
|
+
return `
|
|
2223
|
+
SELECT tool_name, canonical_tool_type, source_tool, project_name,
|
|
2224
|
+
count(*) AS call_count,
|
|
2225
|
+
sum(CASE WHEN is_error = 1 OR call_status = 'error' THEN 1 ELSE 0 END) AS error_count,
|
|
2226
|
+
round(avg(result_duration_ms), 3) AS avg_result_duration_ms,
|
|
2227
|
+
max(timestamp_start) AS latest_ts
|
|
2228
|
+
FROM tool_usage_facts
|
|
2229
|
+
${where}
|
|
2230
|
+
GROUP BY tool_name, canonical_tool_type, source_tool, project_name
|
|
2231
|
+
ORDER BY call_count DESC, error_count DESC, tool_name ASC
|
|
2232
|
+
LIMIT ${limit(filters)}
|
|
2233
|
+
`;
|
|
2234
|
+
}
|
|
2235
|
+
function buildErrorsSql(filters, dialect) {
|
|
2236
|
+
const where = buildWhere([
|
|
2237
|
+
sourceFilter(filters),
|
|
2238
|
+
timeFilter("timestamp", filters),
|
|
2239
|
+
projectFilter(filters, dialect),
|
|
2240
|
+
filters.toolName ? `tool_name = ${sqlString2(filters.toolName)}` : null,
|
|
2241
|
+
filters.category ? `error_category = ${sqlString2(filters.category)}` : null
|
|
2242
|
+
]);
|
|
2243
|
+
return `
|
|
2244
|
+
SELECT timestamp, error_category, source_tool, project_name, session_id,
|
|
2245
|
+
tool_name, status, exit_code, message, preview
|
|
2246
|
+
FROM error_facts
|
|
2247
|
+
${where}
|
|
2248
|
+
ORDER BY timestamp DESC NULLS LAST, error_id DESC
|
|
2249
|
+
LIMIT ${limit(filters)}
|
|
2250
|
+
`;
|
|
2251
|
+
}
|
|
2252
|
+
function buildModelsSql(filters, dialect) {
|
|
2253
|
+
const where = buildWhere([
|
|
2254
|
+
sourceFilter(filters),
|
|
2255
|
+
rangeOverlapFilter("first_seen_ts", "last_seen_ts", filters),
|
|
2256
|
+
projectFilter(filters, dialect),
|
|
2257
|
+
filters.model ? `model = ${sqlString2(filters.model)}` : null
|
|
2258
|
+
]);
|
|
2259
|
+
return `
|
|
2260
|
+
SELECT model, source_tool, project_name, session_count, turn_count,
|
|
2261
|
+
message_count, observation_count, first_seen_ts, last_seen_ts
|
|
2262
|
+
FROM model_usage
|
|
2263
|
+
${where}
|
|
2264
|
+
ORDER BY session_count DESC, observation_count DESC, model ASC
|
|
2265
|
+
LIMIT ${limit(filters)}
|
|
2266
|
+
`;
|
|
2267
|
+
}
|
|
2268
|
+
function buildProjectsSql(filters, dialect) {
|
|
2269
|
+
const where = buildWhere([
|
|
2270
|
+
sourceFilter(filters),
|
|
2271
|
+
rangeOverlapFilter("first_session_ts", "latest_session_ts", filters),
|
|
2272
|
+
projectFilter(filters, dialect)
|
|
2273
|
+
]);
|
|
2274
|
+
return `
|
|
2275
|
+
SELECT latest_session_ts, source_tool, project_name, project_path,
|
|
2276
|
+
session_count, message_count, tool_call_count, tool_error_count,
|
|
2277
|
+
low_confidence_session_count
|
|
2278
|
+
FROM project_activity
|
|
2279
|
+
${where}
|
|
2280
|
+
ORDER BY latest_session_ts DESC NULLS LAST, session_count DESC, project_name ASC
|
|
2281
|
+
LIMIT ${limit(filters)}
|
|
2282
|
+
`;
|
|
2283
|
+
}
|
|
2284
|
+
function sourceFilter(filters) {
|
|
2285
|
+
return filters.source ? `source_tool = ${sqlString2(filters.source)}` : null;
|
|
2286
|
+
}
|
|
2287
|
+
function timeFilter(column, filters) {
|
|
2288
|
+
const filtersSql = [];
|
|
2289
|
+
if (filters.since)
|
|
2290
|
+
filtersSql.push(`(${column} IS NULL OR ${column} >= ${sqlString2(filters.since)})`);
|
|
2291
|
+
if (filters.until)
|
|
2292
|
+
filtersSql.push(`(${column} IS NULL OR ${column} < ${sqlString2(filters.until)})`);
|
|
2293
|
+
return filtersSql.length ? filtersSql.join(" AND ") : null;
|
|
2294
|
+
}
|
|
2295
|
+
function rangeOverlapFilter(firstColumn, lastColumn, filters) {
|
|
2296
|
+
const filtersSql = [];
|
|
2297
|
+
if (filters.since) {
|
|
2298
|
+
filtersSql.push(`(${lastColumn} IS NULL OR ${lastColumn} >= ${sqlString2(filters.since)})`);
|
|
2299
|
+
}
|
|
2300
|
+
if (filters.until) {
|
|
2301
|
+
filtersSql.push(`(${firstColumn} IS NULL OR ${firstColumn} < ${sqlString2(filters.until)})`);
|
|
2302
|
+
}
|
|
2303
|
+
return filtersSql.length ? filtersSql.join(" AND ") : null;
|
|
2304
|
+
}
|
|
2305
|
+
function projectFilter(filters, dialect) {
|
|
2306
|
+
if (!filters.project) return null;
|
|
2307
|
+
const exact = sqlString2(filters.project);
|
|
2308
|
+
const like = sqlString2(`%${escapeLike(filters.project)}%`);
|
|
2309
|
+
const op = dialect === "duckdb" ? "ILIKE" : "LIKE";
|
|
2310
|
+
return `(project_id = ${exact} OR project_name ${op} ${like} ESCAPE '\\' OR project_path ${op} ${like} ESCAPE '\\')`;
|
|
2311
|
+
}
|
|
2312
|
+
function buildWhere(filters) {
|
|
2313
|
+
const active = filters.filter((filter) => Boolean(filter));
|
|
2314
|
+
return active.length ? `WHERE ${active.join(" AND ")}` : "";
|
|
2315
|
+
}
|
|
2316
|
+
function limit(filters) {
|
|
2317
|
+
const value = Number.isFinite(filters.limit) ? filters.limit : void 0;
|
|
2318
|
+
return clampLimit(value, { max: 500, fallback: 50 });
|
|
2319
|
+
}
|
|
2320
|
+
function sqlString2(value) {
|
|
2321
|
+
return `'${value.replace(/'/g, "''")}'`;
|
|
2322
|
+
}
|
|
2323
|
+
function escapeLike(value) {
|
|
2324
|
+
return value.replace(/[\\%_]/g, (match) => `\\${match}`);
|
|
2325
|
+
}
|
|
2326
|
+
|
|
2327
|
+
// src/cli/bundle.ts
|
|
2328
|
+
import path3 from "path";
|
|
2329
|
+
async function withBundle(storePath, fn) {
|
|
2330
|
+
const bundle = await openBundle(path3.resolve(storePath));
|
|
2331
|
+
try {
|
|
2332
|
+
return await fn(bundle);
|
|
2333
|
+
} finally {
|
|
2334
|
+
closeBundle(bundle);
|
|
2335
|
+
}
|
|
2336
|
+
}
|
|
2337
|
+
|
|
2338
|
+
// src/cli/output.ts
|
|
2339
|
+
var OUTPUT_FORMATS = ["interactive", "table", "json", "csv"];
|
|
2340
|
+
var COL_SEPARATOR = " ";
|
|
2341
|
+
var RULE_CHAR = "-";
|
|
2342
|
+
function parseOutputFormat(value, fallback) {
|
|
2343
|
+
if (value === void 0) return fallback;
|
|
2344
|
+
if (OUTPUT_FORMATS.includes(value)) return value;
|
|
2345
|
+
throw new Error(
|
|
2346
|
+
`invalid --output-format: ${value} (expected one of ${OUTPUT_FORMATS.join(", ")})`
|
|
2347
|
+
);
|
|
2348
|
+
}
|
|
2349
|
+
function printRows(rows, opts) {
|
|
2350
|
+
switch (opts.format) {
|
|
2351
|
+
case "json":
|
|
2352
|
+
printJson(rows, opts);
|
|
2353
|
+
return;
|
|
2354
|
+
case "csv":
|
|
2355
|
+
printCsv(rows, opts);
|
|
2356
|
+
return;
|
|
2357
|
+
case "table":
|
|
2358
|
+
case "interactive":
|
|
2359
|
+
printTable(rows, opts);
|
|
2360
|
+
return;
|
|
2361
|
+
}
|
|
2362
|
+
}
|
|
2363
|
+
function printJson(rows, opts) {
|
|
2364
|
+
const out = opts.meta ? { ...opts.meta, rows } : rows;
|
|
2365
|
+
process.stdout.write(`${JSON.stringify(out, null, 2)}
|
|
2366
|
+
`);
|
|
2367
|
+
}
|
|
2368
|
+
function printCsv(rows, opts) {
|
|
2369
|
+
const columns = opts.columns;
|
|
2370
|
+
process.stdout.write(`${columns.map(csvField).join(",")}
|
|
2371
|
+
`);
|
|
2372
|
+
for (const row of rows) {
|
|
2373
|
+
const record = row;
|
|
2374
|
+
const line = columns.map((column) => csvField(formatCell(record[column]))).join(",");
|
|
2375
|
+
process.stdout.write(`${line}
|
|
2376
|
+
`);
|
|
2377
|
+
}
|
|
2378
|
+
}
|
|
2379
|
+
function csvField(value) {
|
|
2380
|
+
if (/[",\n]/.test(value)) return `"${value.replace(/"/g, '""')}"`;
|
|
2381
|
+
return value;
|
|
2382
|
+
}
|
|
2383
|
+
function printTable(rows, opts) {
|
|
2384
|
+
const columns = opts.columns;
|
|
2385
|
+
const widths = columns.map((column) => column.length);
|
|
2386
|
+
const cells = rows.map((row) => {
|
|
2387
|
+
const record = row;
|
|
2388
|
+
return columns.map((column, index) => {
|
|
2389
|
+
const text = formatCell(record[column]);
|
|
2390
|
+
const width = widths[index] ?? 0;
|
|
2391
|
+
if (text.length > width) widths[index] = text.length;
|
|
2392
|
+
return text;
|
|
2393
|
+
});
|
|
2394
|
+
});
|
|
2395
|
+
const header = columns.map((column, index) => column.padEnd(widths[index] ?? 0)).join(COL_SEPARATOR);
|
|
2396
|
+
const rule = columns.map((_, index) => RULE_CHAR.repeat(widths[index] ?? 0)).join(COL_SEPARATOR);
|
|
2397
|
+
process.stdout.write(`${header}
|
|
2398
|
+
${rule}
|
|
2399
|
+
`);
|
|
2400
|
+
for (const cellRow of cells) {
|
|
2401
|
+
const line = cellRow.map((cell, index) => cell.padEnd(widths[index] ?? 0)).join(COL_SEPARATOR);
|
|
2402
|
+
process.stdout.write(`${line}
|
|
2403
|
+
`);
|
|
2404
|
+
}
|
|
2405
|
+
}
|
|
2406
|
+
function formatCell(value) {
|
|
2407
|
+
if (value == null) return "";
|
|
2408
|
+
if (typeof value === "string") return value;
|
|
2409
|
+
if (typeof value === "number" || typeof value === "boolean") return String(value);
|
|
2410
|
+
return JSON.stringify(value);
|
|
2411
|
+
}
|
|
2412
|
+
|
|
2413
|
+
// src/core/domain/types.ts
|
|
2414
|
+
var SOURCE_TOOLS = ["cursor", "codex", "claude", "gemini"];
|
|
2415
|
+
|
|
2416
|
+
// src/cli/parsers.ts
|
|
2417
|
+
function parseSearchEngine(value) {
|
|
2418
|
+
if (value === "fts5" || value === "tantivy") return value;
|
|
2419
|
+
throw new Error(`invalid search engine: ${value} (expected fts5 or tantivy)`);
|
|
2420
|
+
}
|
|
2421
|
+
function parseMcpTransport(value) {
|
|
2422
|
+
if (value === "stdio" || value === "http") return value;
|
|
2423
|
+
throw new Error(`invalid transport: ${value} (expected stdio or http)`);
|
|
2424
|
+
}
|
|
2425
|
+
function parseSourceTool(value) {
|
|
2426
|
+
if (value === void 0) return void 0;
|
|
2427
|
+
if (SOURCE_TOOLS.includes(value)) return value;
|
|
2428
|
+
throw new Error(`invalid source tool: ${value} (expected one of ${SOURCE_TOOLS.join(", ")})`);
|
|
2429
|
+
}
|
|
2430
|
+
|
|
2431
|
+
// src/cli/commands/analytics.ts
|
|
2432
|
+
function analyticsCommand() {
|
|
2433
|
+
const command = new Command("analytics").description(
|
|
2434
|
+
"Run high-level analytics reports over exported Parquet files."
|
|
2435
|
+
);
|
|
2436
|
+
command.addCommand(reportCommand("sessions", "Summarize sessions by source, project and model."));
|
|
2437
|
+
command.addCommand(reportCommand("tools", "Summarize tool usage, status, duration and errors."));
|
|
2438
|
+
command.addCommand(
|
|
2439
|
+
reportCommand("errors", "List import errors, failed tool results and uncertainties.")
|
|
2440
|
+
);
|
|
2441
|
+
command.addCommand(reportCommand("models", "Summarize model usage by source, project and time."));
|
|
2442
|
+
command.addCommand(
|
|
2443
|
+
reportCommand("projects", "Summarize project activity and operational counts.")
|
|
2444
|
+
);
|
|
2445
|
+
return command;
|
|
2446
|
+
}
|
|
2447
|
+
function reportCommand(report, description) {
|
|
2448
|
+
const command = addCommonOptions(new Command(report).description(description));
|
|
2449
|
+
if (report === "tools") {
|
|
2450
|
+
command.option("--tool-name <name>", "filter by exact tool name").option("--canonical-type <type>", "filter by canonical tool type").option("--errors-only", "only include tool calls with errors");
|
|
2451
|
+
}
|
|
2452
|
+
if (report === "errors") {
|
|
2453
|
+
command.option("--tool-name <name>", "filter by exact tool name").option("--category <category>", "filter by error category");
|
|
2454
|
+
}
|
|
2455
|
+
if (report === "models") {
|
|
2456
|
+
command.option("--model <model>", "filter by exact model name");
|
|
2457
|
+
}
|
|
2458
|
+
if (report === "projects") {
|
|
2459
|
+
command.option("--project <text>", "filter by project id, name, or path substring");
|
|
2460
|
+
}
|
|
2461
|
+
if (report === "sessions") {
|
|
2462
|
+
command.option("--project <text>", "filter by project id, name, or path substring");
|
|
2463
|
+
}
|
|
2464
|
+
return command.action(async (options) => {
|
|
2465
|
+
const format = parseOutputFormat(options.outputFormat, "table");
|
|
2466
|
+
const parquetDir = await resolveParquetDir(options);
|
|
2467
|
+
const filters = buildFilters(options);
|
|
2468
|
+
const result = await runAnalyticsReport({ parquetDir, report, filters });
|
|
2469
|
+
printRows(result.rows, {
|
|
2470
|
+
format,
|
|
2471
|
+
columns: result.columns,
|
|
2472
|
+
meta: { report, count: result.rows.length }
|
|
2473
|
+
});
|
|
2474
|
+
});
|
|
2475
|
+
}
|
|
2476
|
+
function addCommonOptions(command) {
|
|
2477
|
+
return command.option("--store <path>", "bundle directory", defaultBundlePath()).option("--parquet-dir <path>", "Parquet directory (default: <store>/parquet)").option("--refresh", "export Parquet before running the report").option("--source <tool>", "filter by source tool: cursor|codex|claude|gemini").option("--since <iso>", "lower timestamp bound (inclusive)").option("--until <iso>", "upper timestamp bound (exclusive)").option("--limit <n>", "maximum rows", "50").option("--output-format <fmt>", "interactive|table|json|csv", "table");
|
|
2478
|
+
}
|
|
2479
|
+
async function resolveParquetDir(options) {
|
|
2480
|
+
const storePath = path4.resolve(options.store);
|
|
2481
|
+
const outDir = options.parquetDir ? path4.resolve(options.parquetDir) : void 0;
|
|
2482
|
+
if (options.refresh) {
|
|
2483
|
+
const result = await exportBundleParquet({ bundlePath: storePath, outDir });
|
|
2484
|
+
return result.outDir;
|
|
2485
|
+
}
|
|
2486
|
+
return outDir ?? await withBundle(storePath, (bundle) => bundle.paths.parquet);
|
|
2487
|
+
}
|
|
2488
|
+
function buildFilters(options) {
|
|
2489
|
+
return {
|
|
2490
|
+
source: parseSourceTool(options.source),
|
|
2491
|
+
since: options.since,
|
|
2492
|
+
until: options.until,
|
|
2493
|
+
limit: Number.parseInt(options.limit, 10),
|
|
2494
|
+
toolName: options.toolName,
|
|
2495
|
+
canonicalType: options.canonicalType,
|
|
2496
|
+
errorsOnly: options.errorsOnly,
|
|
2497
|
+
category: options.category,
|
|
2498
|
+
model: options.model,
|
|
2499
|
+
project: options.project
|
|
2500
|
+
};
|
|
2501
|
+
}
|
|
2502
|
+
|
|
2503
|
+
// src/cli/commands/compile.ts
|
|
2504
|
+
import { Command as Command2 } from "commander";
|
|
2505
|
+
|
|
2506
|
+
// src/services/compile.ts
|
|
2507
|
+
init_errors();
|
|
2508
|
+
import os2 from "os";
|
|
2509
|
+
import path16 from "path";
|
|
2510
|
+
|
|
2511
|
+
// src/importers/claude/index.ts
|
|
2512
|
+
import { readFile as readFile4 } from "fs/promises";
|
|
2513
|
+
import path8 from "path";
|
|
2514
|
+
|
|
2515
|
+
// src/core/cas/index.ts
|
|
2516
|
+
init_db();
|
|
2517
|
+
import { mkdir as mkdir3, readFile as readFile2, writeFile as writeFile3 } from "fs/promises";
|
|
2518
|
+
import path5 from "path";
|
|
2519
|
+
|
|
2520
|
+
// src/core/cas/compress.ts
|
|
2521
|
+
import { compress as zstdCompress, decompress as zstdDecompress } from "zstd-napi";
|
|
2522
|
+
var COMPRESS_THRESHOLD_BYTES = 256;
|
|
2523
|
+
var ZSTD_LEVEL = 3;
|
|
2524
|
+
function compressBytes(input) {
|
|
2525
|
+
if (input.byteLength < COMPRESS_THRESHOLD_BYTES) {
|
|
2526
|
+
return { bytes: Buffer.from(input), compression: "none" };
|
|
2527
|
+
}
|
|
2528
|
+
const out = zstdCompress(Buffer.from(input), { compressionLevel: ZSTD_LEVEL });
|
|
2529
|
+
return { bytes: out, compression: "zstd" };
|
|
2530
|
+
}
|
|
2531
|
+
function decompressBytes(input, compression) {
|
|
2532
|
+
if (compression === "none") return input;
|
|
2533
|
+
return zstdDecompress(input);
|
|
2534
|
+
}
|
|
2535
|
+
|
|
2536
|
+
// src/core/cas/hash.ts
|
|
2537
|
+
import { createHash } from "crypto";
|
|
2538
|
+
import { blake3 } from "@noble/hashes/blake3";
|
|
2539
|
+
import { bytesToHex } from "@noble/hashes/utils";
|
|
2540
|
+
function blake3Hex(bytes) {
|
|
2541
|
+
return bytesToHex(blake3(bytes));
|
|
2542
|
+
}
|
|
2543
|
+
function sha256Hex(bytes) {
|
|
2544
|
+
return createHash("sha256").update(bytes).digest("hex");
|
|
2545
|
+
}
|
|
2546
|
+
function objectIdFromHash(hashHex) {
|
|
2547
|
+
return `blake3:${hashHex}`;
|
|
2548
|
+
}
|
|
2549
|
+
function objectStoragePath(hashHex, compression) {
|
|
2550
|
+
const ext = compression === "zstd" ? ".zst" : ".bin";
|
|
2551
|
+
const a = hashHex.slice(0, 2);
|
|
2552
|
+
const b = hashHex.slice(2, 4);
|
|
2553
|
+
return `objects/blake3/${a}/${b}/${hashHex}${ext}`;
|
|
2554
|
+
}
|
|
2555
|
+
|
|
2556
|
+
// src/core/cas/index.ts
|
|
2557
|
+
var ensuredDirs = /* @__PURE__ */ new Set();
|
|
2558
|
+
async function ensureDir(absoluteDir) {
|
|
2559
|
+
if (ensuredDirs.has(absoluteDir)) return;
|
|
2560
|
+
await mkdir3(absoluteDir, { recursive: true });
|
|
2561
|
+
ensuredDirs.add(absoluteDir);
|
|
2562
|
+
}
|
|
2563
|
+
async function putBytes(bundle, bytes, options = {}) {
|
|
2564
|
+
const hash = blake3Hex(bytes);
|
|
2565
|
+
const objectId = objectIdFromHash(hash);
|
|
2566
|
+
const existing = prepare(
|
|
2567
|
+
bundle.db,
|
|
2568
|
+
`SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
|
|
2569
|
+
compression, mime_type, encoding, storage_path, created_at
|
|
2570
|
+
FROM objects WHERE object_id = ?`
|
|
2571
|
+
).get(objectId);
|
|
2572
|
+
if (existing) return objectId;
|
|
2573
|
+
const { bytes: stored, compression } = compressBytes(bytes);
|
|
2574
|
+
const storagePath = objectStoragePath(hash, compression);
|
|
2575
|
+
const absolutePath = path5.join(bundle.path, storagePath);
|
|
2576
|
+
await ensureDir(path5.dirname(absolutePath));
|
|
2577
|
+
await writeFile3(absolutePath, stored);
|
|
2578
|
+
prepare(
|
|
2579
|
+
bundle.db,
|
|
2580
|
+
`INSERT INTO objects (
|
|
2581
|
+
object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
|
|
2582
|
+
compression, mime_type, encoding, storage_path, created_at
|
|
2583
|
+
) VALUES (?, 'blake3', ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
2584
|
+
).run(
|
|
2585
|
+
objectId,
|
|
2586
|
+
hash,
|
|
2587
|
+
bytes.byteLength,
|
|
2588
|
+
compression === "zstd" ? stored.byteLength : null,
|
|
2589
|
+
compression,
|
|
2590
|
+
options.mimeType ?? null,
|
|
2591
|
+
options.encoding ?? null,
|
|
2592
|
+
storagePath,
|
|
2593
|
+
(/* @__PURE__ */ new Date()).toISOString()
|
|
2594
|
+
);
|
|
2595
|
+
return objectId;
|
|
2596
|
+
}
|
|
2597
|
+
async function putJson(bundle, value) {
|
|
2598
|
+
const text = JSON.stringify(value);
|
|
2599
|
+
return putBytes(bundle, Buffer.from(text, "utf8"), {
|
|
2600
|
+
mimeType: "application/json",
|
|
2601
|
+
encoding: "utf-8"
|
|
2602
|
+
});
|
|
2603
|
+
}
|
|
2604
|
+
async function getBytes(bundle, objectId) {
|
|
2605
|
+
const meta = prepare(
|
|
2606
|
+
bundle.db,
|
|
2607
|
+
`SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
|
|
2608
|
+
compression, mime_type, encoding, storage_path, created_at
|
|
2609
|
+
FROM objects WHERE object_id = ?`
|
|
2610
|
+
).get(objectId);
|
|
2611
|
+
if (!meta) {
|
|
2612
|
+
throw new Error(`object not found: ${objectId}`);
|
|
2613
|
+
}
|
|
2614
|
+
const buf = await readFile2(path5.join(bundle.path, meta.storage_path));
|
|
2615
|
+
return decompressBytes(buf, meta.compression);
|
|
2616
|
+
}
|
|
2617
|
+
async function getText(bundle, objectId) {
|
|
2618
|
+
const buf = await getBytes(bundle, objectId);
|
|
2619
|
+
return buf.toString("utf8");
|
|
2620
|
+
}
|
|
2621
|
+
function createPendingObjects() {
|
|
2622
|
+
return { byId: /* @__PURE__ */ new Map() };
|
|
2623
|
+
}
|
|
2624
|
+
function stageBytes(pending, bytes, options = {}) {
|
|
2625
|
+
const buf = Buffer.isBuffer(bytes) ? bytes : Buffer.from(bytes);
|
|
2626
|
+
const hash = blake3Hex(buf);
|
|
2627
|
+
const objectId = objectIdFromHash(hash);
|
|
2628
|
+
if (!pending.byId.has(objectId)) {
|
|
2629
|
+
pending.byId.set(objectId, {
|
|
2630
|
+
objectId,
|
|
2631
|
+
hash,
|
|
2632
|
+
bytes: buf,
|
|
2633
|
+
mimeType: options.mimeType ?? null,
|
|
2634
|
+
encoding: options.encoding ?? null
|
|
2635
|
+
});
|
|
2636
|
+
}
|
|
2637
|
+
return objectId;
|
|
2638
|
+
}
|
|
2639
|
+
function stageText(pending, text, options = {}) {
|
|
2640
|
+
return stageBytes(pending, Buffer.from(text, "utf8"), {
|
|
2641
|
+
mimeType: options.mimeType ?? "text/plain; charset=utf-8",
|
|
2642
|
+
encoding: "utf-8"
|
|
2643
|
+
});
|
|
2644
|
+
}
|
|
2645
|
+
function stageJson(pending, value) {
|
|
2646
|
+
return stageBytes(pending, Buffer.from(JSON.stringify(value), "utf8"), {
|
|
2647
|
+
mimeType: "application/json",
|
|
2648
|
+
encoding: "utf-8"
|
|
2649
|
+
});
|
|
2650
|
+
}
|
|
2651
|
+
async function flushPendingObjects(bundle, pending) {
|
|
2652
|
+
if (pending.byId.size === 0) return;
|
|
2653
|
+
const ids = [...pending.byId.keys()];
|
|
2654
|
+
const existingIds = queryExistingObjectIds(bundle, ids);
|
|
2655
|
+
const toWrite = [];
|
|
2656
|
+
for (const obj of pending.byId.values()) {
|
|
2657
|
+
if (existingIds.has(obj.objectId)) continue;
|
|
2658
|
+
const { bytes: compressedBytes, compression } = compressBytes(obj.bytes);
|
|
2659
|
+
const storagePath = objectStoragePath(obj.hash, compression);
|
|
2660
|
+
toWrite.push({
|
|
2661
|
+
staged: obj,
|
|
2662
|
+
compression,
|
|
2663
|
+
compressedBytes,
|
|
2664
|
+
storagePath,
|
|
2665
|
+
absolutePath: path5.join(bundle.path, storagePath)
|
|
2666
|
+
});
|
|
2667
|
+
}
|
|
2668
|
+
if (toWrite.length > 0) {
|
|
2669
|
+
await writeFilesParallel(toWrite);
|
|
2670
|
+
}
|
|
2671
|
+
const insertObject = prepare(
|
|
2672
|
+
bundle.db,
|
|
2673
|
+
`INSERT OR IGNORE INTO objects (
|
|
2674
|
+
object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
|
|
2675
|
+
compression, mime_type, encoding, storage_path, created_at
|
|
2676
|
+
) VALUES (?, 'blake3', ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
2677
|
+
);
|
|
2678
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
2679
|
+
for (const p of toWrite) {
|
|
2680
|
+
insertObject.run(
|
|
2681
|
+
p.staged.objectId,
|
|
2682
|
+
p.staged.hash,
|
|
2683
|
+
p.staged.bytes.byteLength,
|
|
2684
|
+
p.compression === "zstd" ? p.compressedBytes.byteLength : null,
|
|
2685
|
+
p.compression,
|
|
2686
|
+
p.staged.mimeType,
|
|
2687
|
+
p.staged.encoding,
|
|
2688
|
+
p.storagePath,
|
|
2689
|
+
now
|
|
2690
|
+
);
|
|
2691
|
+
}
|
|
1644
2692
|
}
|
|
1645
|
-
|
|
1646
|
-
const
|
|
1647
|
-
|
|
1648
|
-
const
|
|
1649
|
-
|
|
1650
|
-
|
|
1651
|
-
|
|
1652
|
-
|
|
1653
|
-
|
|
2693
|
+
function queryExistingObjectIds(bundle, ids) {
|
|
2694
|
+
const found = /* @__PURE__ */ new Set();
|
|
2695
|
+
if (ids.length === 0) return found;
|
|
2696
|
+
const CHUNK = 500;
|
|
2697
|
+
for (let start = 0; start < ids.length; start += CHUNK) {
|
|
2698
|
+
const slice = ids.slice(start, start + CHUNK);
|
|
2699
|
+
const placeholders = slice.map(() => "?").join(",");
|
|
2700
|
+
const rows = bundle.db.prepare(
|
|
2701
|
+
`SELECT object_id FROM objects WHERE object_id IN (${placeholders})`
|
|
2702
|
+
).all(...slice);
|
|
2703
|
+
for (const row of rows) found.add(row.object_id);
|
|
1654
2704
|
}
|
|
1655
|
-
return
|
|
2705
|
+
return found;
|
|
1656
2706
|
}
|
|
1657
|
-
|
|
1658
|
-
|
|
2707
|
+
var FS_WRITE_CONCURRENCY = 16;
|
|
2708
|
+
async function writeFilesParallel(tasks) {
|
|
2709
|
+
let cursor = 0;
|
|
2710
|
+
const workers = [];
|
|
2711
|
+
const limit2 = Math.min(FS_WRITE_CONCURRENCY, tasks.length);
|
|
2712
|
+
for (let w = 0; w < limit2; w++) {
|
|
2713
|
+
workers.push(
|
|
2714
|
+
(async () => {
|
|
2715
|
+
while (true) {
|
|
2716
|
+
const i = cursor++;
|
|
2717
|
+
if (i >= tasks.length) return;
|
|
2718
|
+
const task = tasks[i];
|
|
2719
|
+
await ensureDir(path5.dirname(task.absolutePath));
|
|
2720
|
+
await writeFile3(task.absolutePath, task.compressedBytes);
|
|
2721
|
+
}
|
|
2722
|
+
})()
|
|
2723
|
+
);
|
|
2724
|
+
}
|
|
2725
|
+
await Promise.all(workers);
|
|
1659
2726
|
}
|
|
1660
2727
|
|
|
1661
|
-
// src/services/compile.ts
|
|
1662
|
-
init_errors();
|
|
1663
|
-
import os2 from "os";
|
|
1664
|
-
import path14 from "path";
|
|
1665
|
-
|
|
1666
2728
|
// src/importers/claude/index.ts
|
|
1667
|
-
init_cas();
|
|
1668
2729
|
init_db();
|
|
1669
|
-
import { readFile as readFile4 } from "fs/promises";
|
|
1670
|
-
import path5 from "path";
|
|
1671
2730
|
|
|
1672
2731
|
// src/core/domain/ids.ts
|
|
1673
|
-
init_hash();
|
|
1674
2732
|
var ID_PREFIX_BYTES = 16;
|
|
1675
2733
|
function tupleId(parts) {
|
|
1676
2734
|
return sha256Hex(parts.join("\0")).slice(0, ID_PREFIX_BYTES * 2);
|
|
@@ -1716,7 +2774,6 @@ function importBatchId(sourceTool, startedAtIso) {
|
|
|
1716
2774
|
init_errors();
|
|
1717
2775
|
|
|
1718
2776
|
// src/core/ingest/batch.ts
|
|
1719
|
-
init_cas();
|
|
1720
2777
|
init_db();
|
|
1721
2778
|
function emptyCounts() {
|
|
1722
2779
|
return {
|
|
@@ -1784,12 +2841,9 @@ async function recordError(bundle, batchId, args) {
|
|
|
1784
2841
|
}
|
|
1785
2842
|
|
|
1786
2843
|
// src/core/ingest/idempotency.ts
|
|
1787
|
-
|
|
1788
|
-
|
|
1789
|
-
init_cas();
|
|
2844
|
+
import { access as access2, readFile as readFile3, stat as stat2, writeFile as writeFile4 } from "fs/promises";
|
|
2845
|
+
import path6 from "path";
|
|
1790
2846
|
init_db();
|
|
1791
|
-
import { access as access2, readFile as readFile3, stat as stat2, writeFile as writeFile3 } from "fs/promises";
|
|
1792
|
-
import path3 from "path";
|
|
1793
2847
|
async function registerSourceFile(bundle, args) {
|
|
1794
2848
|
const st = await stat2(args.absolutePath);
|
|
1795
2849
|
const size = st.size;
|
|
@@ -1873,10 +2927,10 @@ async function preserveRawSourceBytes(bundle, bytes) {
|
|
|
1873
2927
|
const objectId = objectIdFromHash(hash);
|
|
1874
2928
|
const { bytes: stored, compression } = compressBytes(bytes);
|
|
1875
2929
|
const storagePath = rawSourceStoragePath(hash, compression);
|
|
1876
|
-
const absolutePath =
|
|
1877
|
-
await ensureDir(
|
|
2930
|
+
const absolutePath = path6.join(bundle.path, storagePath);
|
|
2931
|
+
await ensureDir(path6.dirname(absolutePath));
|
|
1878
2932
|
if (!await fileExists(absolutePath)) {
|
|
1879
|
-
await
|
|
2933
|
+
await writeFile4(absolutePath, stored);
|
|
1880
2934
|
}
|
|
1881
2935
|
const existing = prepare(
|
|
1882
2936
|
bundle.db,
|
|
@@ -1918,12 +2972,12 @@ async function fileExists(filePath) {
|
|
|
1918
2972
|
|
|
1919
2973
|
// src/importers/claude/discover.ts
|
|
1920
2974
|
import { readdir } from "fs/promises";
|
|
1921
|
-
import
|
|
2975
|
+
import path7 from "path";
|
|
1922
2976
|
async function* discoverClaudeFiles(root) {
|
|
1923
2977
|
const projectDirs = await readdirSafe(root);
|
|
1924
2978
|
for (const project of projectDirs) {
|
|
1925
2979
|
if (!project.isDirectory()) continue;
|
|
1926
|
-
const projectRoot =
|
|
2980
|
+
const projectRoot = path7.join(root, project.name);
|
|
1927
2981
|
yield* walkProject(projectRoot, project.name);
|
|
1928
2982
|
}
|
|
1929
2983
|
}
|
|
@@ -1932,7 +2986,7 @@ async function* walkProject(projectRoot, projectSlug) {
|
|
|
1932
2986
|
for (const entry of entries) {
|
|
1933
2987
|
if (entry.isFile() && entry.name.endsWith(".jsonl")) {
|
|
1934
2988
|
yield {
|
|
1935
|
-
filePath:
|
|
2989
|
+
filePath: path7.join(projectRoot, entry.name),
|
|
1936
2990
|
projectSlug,
|
|
1937
2991
|
isSubagent: false,
|
|
1938
2992
|
parentSessionId: null,
|
|
@@ -1942,18 +2996,18 @@ async function* walkProject(projectRoot, projectSlug) {
|
|
|
1942
2996
|
continue;
|
|
1943
2997
|
}
|
|
1944
2998
|
if (entry.isDirectory()) {
|
|
1945
|
-
const subagentsDir =
|
|
2999
|
+
const subagentsDir = path7.join(projectRoot, entry.name, "subagents");
|
|
1946
3000
|
const subagentEntries = await readdirSafe(subagentsDir);
|
|
1947
3001
|
for (const sub of subagentEntries) {
|
|
1948
3002
|
if (!sub.isFile() || !sub.name.endsWith(".jsonl")) continue;
|
|
1949
3003
|
if (!sub.name.startsWith("agent-")) continue;
|
|
1950
3004
|
const agentId = sub.name.slice("agent-".length, -".jsonl".length);
|
|
1951
|
-
const metaCandidate =
|
|
3005
|
+
const metaCandidate = path7.join(subagentsDir, `agent-${agentId}.meta.json`);
|
|
1952
3006
|
const metaExists = subagentEntries.some(
|
|
1953
3007
|
(e) => e.isFile() && e.name === `agent-${agentId}.meta.json`
|
|
1954
3008
|
);
|
|
1955
3009
|
yield {
|
|
1956
|
-
filePath:
|
|
3010
|
+
filePath: path7.join(subagentsDir, sub.name),
|
|
1957
3011
|
projectSlug,
|
|
1958
3012
|
isSubagent: true,
|
|
1959
3013
|
parentSessionId: entry.name,
|
|
@@ -2073,7 +3127,7 @@ async function compileClaudeFile(bundle, batch, file, logger) {
|
|
|
2073
3127
|
const counts = emptyFileCounts();
|
|
2074
3128
|
const { row: sourceFile, alreadyKnown } = await registerSourceFile(bundle, {
|
|
2075
3129
|
sourceTool: "claude",
|
|
2076
|
-
absolutePath:
|
|
3130
|
+
absolutePath: path8.resolve(file.filePath),
|
|
2077
3131
|
fileKind: "jsonl",
|
|
2078
3132
|
workspaceHint: file.projectSlug
|
|
2079
3133
|
});
|
|
@@ -2173,7 +3227,7 @@ async function compileClaudeFile(bundle, batch, file, logger) {
|
|
|
2173
3227
|
pending.session.parent_session_id_pending = parentSid;
|
|
2174
3228
|
}
|
|
2175
3229
|
}
|
|
2176
|
-
const sessionId2 = pending.session?.session_id ?? sessionId("claude", `unknown:${
|
|
3230
|
+
const sessionId2 = pending.session?.session_id ?? sessionId("claude", `unknown:${path8.basename(file.filePath)}`);
|
|
2177
3231
|
const type = typeof parsed.type === "string" ? parsed.type : null;
|
|
2178
3232
|
if (type === "user" || type === "assistant") {
|
|
2179
3233
|
const msgRole = type === "user" ? "user" : "assistant";
|
|
@@ -2915,15 +3969,14 @@ function flushPending(bundle, pending, meta) {
|
|
|
2915
3969
|
}
|
|
2916
3970
|
|
|
2917
3971
|
// src/importers/codex/index.ts
|
|
2918
|
-
init_cas();
|
|
2919
|
-
init_db();
|
|
2920
3972
|
import { readFile as readFile5 } from "fs/promises";
|
|
2921
|
-
import
|
|
3973
|
+
import path10 from "path";
|
|
3974
|
+
init_db();
|
|
2922
3975
|
init_errors();
|
|
2923
3976
|
|
|
2924
3977
|
// src/importers/codex/discover.ts
|
|
2925
3978
|
import { readdir as readdir2 } from "fs/promises";
|
|
2926
|
-
import
|
|
3979
|
+
import path9 from "path";
|
|
2927
3980
|
async function* discoverCodexSessions(root) {
|
|
2928
3981
|
yield* walk(root);
|
|
2929
3982
|
}
|
|
@@ -2935,7 +3988,7 @@ async function* walk(dir) {
|
|
|
2935
3988
|
return;
|
|
2936
3989
|
}
|
|
2937
3990
|
for (const entry of entries) {
|
|
2938
|
-
const full =
|
|
3991
|
+
const full = path9.join(dir, entry.name);
|
|
2939
3992
|
if (entry.isDirectory()) {
|
|
2940
3993
|
yield* walk(full);
|
|
2941
3994
|
} else if (entry.isFile() && entry.name.endsWith(".jsonl")) {
|
|
@@ -3037,7 +4090,7 @@ async function compileCodexFile(bundle, batch, filePath, logger) {
|
|
|
3037
4090
|
const counts = emptyFileCounts2();
|
|
3038
4091
|
const { row: sourceFileRow, alreadyKnown } = await registerSourceFile(bundle, {
|
|
3039
4092
|
sourceTool: "codex",
|
|
3040
|
-
absolutePath:
|
|
4093
|
+
absolutePath: path10.resolve(filePath),
|
|
3041
4094
|
fileKind: "jsonl"
|
|
3042
4095
|
});
|
|
3043
4096
|
if (alreadyKnown) {
|
|
@@ -3123,7 +4176,7 @@ async function compileCodexFile(bundle, batch, filePath, logger) {
|
|
|
3123
4176
|
const payload = parsed.payload ?? {};
|
|
3124
4177
|
if (type === "session_meta") {
|
|
3125
4178
|
const meta = payload;
|
|
3126
|
-
const sourceSessionId = meta.id ??
|
|
4179
|
+
const sourceSessionId = meta.id ?? path10.basename(filePath, ".jsonl");
|
|
3127
4180
|
const sessionId3 = sessionId("codex", sourceSessionId);
|
|
3128
4181
|
if (!pending.session) {
|
|
3129
4182
|
const sub = parseSubagent(meta.source);
|
|
@@ -3155,11 +4208,11 @@ async function compileCodexFile(bundle, batch, filePath, logger) {
|
|
|
3155
4208
|
}
|
|
3156
4209
|
continue;
|
|
3157
4210
|
}
|
|
3158
|
-
const sessionId2 = pending.session?.session_id ?? sessionId("codex",
|
|
4211
|
+
const sessionId2 = pending.session?.session_id ?? sessionId("codex", path10.basename(filePath, ".jsonl"));
|
|
3159
4212
|
if (!pending.session) {
|
|
3160
4213
|
pending.session = {
|
|
3161
4214
|
session_id: sessionId2,
|
|
3162
|
-
source_session_id:
|
|
4215
|
+
source_session_id: path10.basename(filePath, ".jsonl"),
|
|
3163
4216
|
parent_session_id: null,
|
|
3164
4217
|
is_subagent: 0,
|
|
3165
4218
|
agent_role: null,
|
|
@@ -4056,25 +5109,24 @@ function flushPending2(bundle, pending, meta) {
|
|
|
4056
5109
|
}
|
|
4057
5110
|
|
|
4058
5111
|
// src/importers/cursor/index.ts
|
|
4059
|
-
|
|
4060
|
-
init_db();
|
|
4061
|
-
import path9 from "path";
|
|
5112
|
+
import path12 from "path";
|
|
4062
5113
|
import Database2 from "better-sqlite3";
|
|
5114
|
+
init_db();
|
|
4063
5115
|
init_errors();
|
|
4064
5116
|
|
|
4065
5117
|
// src/importers/cursor/discover.ts
|
|
4066
5118
|
import { readdir as readdir3 } from "fs/promises";
|
|
4067
|
-
import
|
|
5119
|
+
import path11 from "path";
|
|
4068
5120
|
async function* discoverCursorStores(root) {
|
|
4069
5121
|
const workspaces = await readdirSafe2(root);
|
|
4070
5122
|
for (const ws of workspaces) {
|
|
4071
5123
|
if (!ws.isDirectory()) continue;
|
|
4072
|
-
const wsPath =
|
|
5124
|
+
const wsPath = path11.join(root, ws.name);
|
|
4073
5125
|
const agents = await readdirSafe2(wsPath);
|
|
4074
5126
|
for (const ag of agents) {
|
|
4075
5127
|
if (!ag.isDirectory()) continue;
|
|
4076
|
-
const dbPath =
|
|
4077
|
-
const dbEntries = await readdirSafe2(
|
|
5128
|
+
const dbPath = path11.join(wsPath, ag.name, "store.db");
|
|
5129
|
+
const dbEntries = await readdirSafe2(path11.join(wsPath, ag.name));
|
|
4078
5130
|
const hasStoreDb = dbEntries.some((e) => e.isFile() && e.name === "store.db");
|
|
4079
5131
|
if (!hasStoreDb) continue;
|
|
4080
5132
|
yield {
|
|
@@ -4173,7 +5225,7 @@ async function compileCursorStore(bundle, batch, store, logger) {
|
|
|
4173
5225
|
const counts = emptyFileCounts3();
|
|
4174
5226
|
const { row: sourceFile, alreadyKnown } = await registerSourceFile(bundle, {
|
|
4175
5227
|
sourceTool: "cursor",
|
|
4176
|
-
absolutePath:
|
|
5228
|
+
absolutePath: path12.resolve(store.filePath),
|
|
4177
5229
|
fileKind: "sqlite",
|
|
4178
5230
|
workspaceHint: store.workspaceId
|
|
4179
5231
|
});
|
|
@@ -4775,29 +5827,27 @@ function flushPending3(bundle, pending) {
|
|
|
4775
5827
|
}
|
|
4776
5828
|
|
|
4777
5829
|
// src/importers/gemini/index.ts
|
|
4778
|
-
init_hash();
|
|
4779
|
-
init_cas();
|
|
4780
|
-
init_db();
|
|
4781
5830
|
import { readFile as readFile7 } from "fs/promises";
|
|
4782
|
-
import
|
|
5831
|
+
import path14 from "path";
|
|
5832
|
+
init_db();
|
|
4783
5833
|
init_errors();
|
|
4784
5834
|
|
|
4785
5835
|
// src/importers/gemini/discover.ts
|
|
4786
5836
|
import { readFile as readFile6, readdir as readdir4 } from "fs/promises";
|
|
4787
|
-
import
|
|
5837
|
+
import path13 from "path";
|
|
4788
5838
|
async function* discoverGeminiChats(root) {
|
|
4789
5839
|
const entries = await readdirSafe3(root);
|
|
4790
5840
|
for (const entry of entries) {
|
|
4791
5841
|
if (!entry.isDirectory()) continue;
|
|
4792
5842
|
if (entry.name === "bin") continue;
|
|
4793
|
-
const projectRoot = await readProjectRoot(
|
|
4794
|
-
const chatsDir =
|
|
5843
|
+
const projectRoot = await readProjectRoot(path13.join(root, entry.name));
|
|
5844
|
+
const chatsDir = path13.join(root, entry.name, "chats");
|
|
4795
5845
|
const chatEntries = await readdirSafe3(chatsDir);
|
|
4796
5846
|
for (const c of chatEntries) {
|
|
4797
5847
|
if (!c.isFile()) continue;
|
|
4798
5848
|
if (!c.name.startsWith("session-") || !c.name.endsWith(".json")) continue;
|
|
4799
5849
|
yield {
|
|
4800
|
-
filePath:
|
|
5850
|
+
filePath: path13.join(chatsDir, c.name),
|
|
4801
5851
|
projectDir: entry.name,
|
|
4802
5852
|
projectRoot
|
|
4803
5853
|
};
|
|
@@ -4806,7 +5856,7 @@ async function* discoverGeminiChats(root) {
|
|
|
4806
5856
|
}
|
|
4807
5857
|
async function readProjectRoot(dir) {
|
|
4808
5858
|
try {
|
|
4809
|
-
const text = await readFile6(
|
|
5859
|
+
const text = await readFile6(path13.join(dir, ".project_root"), "utf8");
|
|
4810
5860
|
return text.replace(/\n+$/, "").trim() || null;
|
|
4811
5861
|
} catch {
|
|
4812
5862
|
return null;
|
|
@@ -4900,7 +5950,7 @@ async function compileGeminiFile(bundle, batch, file, logger) {
|
|
|
4900
5950
|
const counts = emptyFileCounts4();
|
|
4901
5951
|
const { row: sourceFile, alreadyKnown } = await registerSourceFile(bundle, {
|
|
4902
5952
|
sourceTool: "gemini",
|
|
4903
|
-
absolutePath:
|
|
5953
|
+
absolutePath: path14.resolve(file.filePath),
|
|
4904
5954
|
fileKind: "json",
|
|
4905
5955
|
workspaceHint: file.projectDir
|
|
4906
5956
|
});
|
|
@@ -4953,7 +6003,7 @@ async function compileGeminiFile(bundle, batch, file, logger) {
|
|
|
4953
6003
|
project: null,
|
|
4954
6004
|
objects
|
|
4955
6005
|
};
|
|
4956
|
-
const sourceSid = parsed.sessionId ??
|
|
6006
|
+
const sourceSid = parsed.sessionId ?? path14.basename(file.filePath, ".json");
|
|
4957
6007
|
const sessionPk = sessionId("gemini", sourceSid);
|
|
4958
6008
|
if (file.projectRoot) {
|
|
4959
6009
|
pending.project = {
|
|
@@ -5448,227 +6498,90 @@ function flushPending4(bundle, pending) {
|
|
|
5448
6498
|
insertCall.run(
|
|
5449
6499
|
c.tool_call_id,
|
|
5450
6500
|
pending.session.session_id,
|
|
5451
|
-
c.message_id,
|
|
5452
|
-
c.event_id,
|
|
5453
|
-
c.source_call_id,
|
|
5454
|
-
c.tool_name,
|
|
5455
|
-
c.canonical_tool_type,
|
|
5456
|
-
c.args_object_id,
|
|
5457
|
-
c.command,
|
|
5458
|
-
c.cwd,
|
|
5459
|
-
c.path,
|
|
5460
|
-
c.query,
|
|
5461
|
-
c.timestamp_start,
|
|
5462
|
-
c.status,
|
|
5463
|
-
c.raw_record_id
|
|
5464
|
-
);
|
|
5465
|
-
}
|
|
5466
|
-
const insertResult = prepare(
|
|
5467
|
-
bundle.db,
|
|
5468
|
-
`INSERT OR REPLACE INTO tool_results (
|
|
5469
|
-
tool_result_id, tool_call_id, session_id, message_id, event_id,
|
|
5470
|
-
source_call_id, status, is_error, exit_code, duration_ms,
|
|
5471
|
-
stdout_object_id, stderr_object_id, output_object_id, preview, raw_record_id
|
|
5472
|
-
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, NULL, NULL, NULL, NULL, ?, ?, ?)`
|
|
5473
|
-
);
|
|
5474
|
-
for (const r of pending.toolResults) {
|
|
5475
|
-
insertResult.run(
|
|
5476
|
-
r.tool_result_id,
|
|
5477
|
-
r.tool_call_id,
|
|
5478
|
-
pending.session.session_id,
|
|
5479
|
-
r.message_id,
|
|
5480
|
-
r.event_id,
|
|
5481
|
-
r.source_call_id,
|
|
5482
|
-
r.status,
|
|
5483
|
-
r.is_error,
|
|
5484
|
-
r.output_object_id,
|
|
5485
|
-
r.preview,
|
|
5486
|
-
r.raw_record_id
|
|
5487
|
-
);
|
|
5488
|
-
}
|
|
5489
|
-
const insertArtifact = prepare(
|
|
5490
|
-
bundle.db,
|
|
5491
|
-
`INSERT OR REPLACE INTO artifacts (
|
|
5492
|
-
artifact_id, session_id, project_id, source_tool, kind, path,
|
|
5493
|
-
logical_path, object_id, text_object_id, mime_type, size_bytes,
|
|
5494
|
-
created_ts, raw_record_id
|
|
5495
|
-
) VALUES (?, ?, ?, 'gemini', ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
5496
|
-
);
|
|
5497
|
-
for (const a of pending.artifacts) {
|
|
5498
|
-
insertArtifact.run(
|
|
5499
|
-
a.artifact_id,
|
|
5500
|
-
pending.session.session_id,
|
|
5501
|
-
pending.project?.project_id ?? null,
|
|
5502
|
-
a.kind,
|
|
5503
|
-
a.path,
|
|
5504
|
-
a.logical_path,
|
|
5505
|
-
a.object_id,
|
|
5506
|
-
a.text_object_id,
|
|
5507
|
-
a.mime_type,
|
|
5508
|
-
a.size_bytes,
|
|
5509
|
-
a.created_ts,
|
|
5510
|
-
a.raw_record_id
|
|
5511
|
-
);
|
|
5512
|
-
}
|
|
5513
|
-
const insertSearch = prepare(
|
|
5514
|
-
bundle.db,
|
|
5515
|
-
`INSERT OR REPLACE INTO search_docs (
|
|
5516
|
-
doc_id, entity_type, entity_id, session_id, project_id, timestamp,
|
|
5517
|
-
role, tool_name, canonical_tool_type, field_kind, text
|
|
5518
|
-
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
5519
|
-
);
|
|
5520
|
-
for (const d of pending.searchDocs) {
|
|
5521
|
-
insertSearch.run(
|
|
5522
|
-
d.doc_id,
|
|
5523
|
-
d.entity_type,
|
|
5524
|
-
d.entity_id,
|
|
5525
|
-
pending.session.session_id,
|
|
5526
|
-
pending.project?.project_id ?? null,
|
|
5527
|
-
d.timestamp,
|
|
5528
|
-
d.role,
|
|
5529
|
-
d.tool_name,
|
|
5530
|
-
d.canonical_tool_type,
|
|
5531
|
-
d.field_kind,
|
|
5532
|
-
d.text
|
|
5533
|
-
);
|
|
5534
|
-
}
|
|
5535
|
-
}
|
|
5536
|
-
|
|
5537
|
-
// src/services/export/parquet.ts
|
|
5538
|
-
import { mkdir as mkdir3, rm, writeFile as writeFile4 } from "fs/promises";
|
|
5539
|
-
import path12 from "path";
|
|
5540
|
-
import { DuckDBConnection } from "@duckdb/node-api";
|
|
5541
|
-
init_errors();
|
|
5542
|
-
var PARQUET_TABLES = [
|
|
5543
|
-
"objects",
|
|
5544
|
-
"source_files",
|
|
5545
|
-
"import_batches",
|
|
5546
|
-
"raw_records",
|
|
5547
|
-
"import_errors",
|
|
5548
|
-
"uncertainties",
|
|
5549
|
-
"projects",
|
|
5550
|
-
"sessions",
|
|
5551
|
-
"turns",
|
|
5552
|
-
"events",
|
|
5553
|
-
"messages",
|
|
5554
|
-
"content_blocks",
|
|
5555
|
-
"tool_calls",
|
|
5556
|
-
"tool_results",
|
|
5557
|
-
"artifacts",
|
|
5558
|
-
"edges",
|
|
5559
|
-
"search_docs"
|
|
5560
|
-
];
|
|
5561
|
-
async function exportBundleParquet(options) {
|
|
5562
|
-
const snapshot = await openBundleSnapshot(options.bundlePath);
|
|
5563
|
-
const outDir = path12.resolve(options.outDir ?? snapshot.defaultOutDir);
|
|
5564
|
-
await mkdir3(outDir, { recursive: true });
|
|
5565
|
-
const files = Object.fromEntries(
|
|
5566
|
-
PARQUET_TABLES.map((table) => [table, path12.join(outDir, `${table}.parquet`)])
|
|
5567
|
-
);
|
|
5568
|
-
const manifestPath = path12.join(outDir, "manifest.json");
|
|
5569
|
-
for (const file of [...Object.values(files), manifestPath]) {
|
|
5570
|
-
await rm(file, { force: true });
|
|
5571
|
-
}
|
|
5572
|
-
const connection = await createDuckDbConnection();
|
|
5573
|
-
try {
|
|
5574
|
-
await attachSqlite(connection, snapshot.dbPath);
|
|
5575
|
-
for (const table of PARQUET_TABLES) {
|
|
5576
|
-
await connection.run(
|
|
5577
|
-
`COPY (SELECT * FROM prosa.${quoteIdentifier(table)}) TO ${sqlString(files[table])} (FORMAT parquet)`
|
|
5578
|
-
);
|
|
5579
|
-
}
|
|
5580
|
-
} finally {
|
|
5581
|
-
connection.closeSync();
|
|
5582
|
-
}
|
|
5583
|
-
const manifest = {
|
|
5584
|
-
exported_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
5585
|
-
source_db: snapshot.dbPath,
|
|
5586
|
-
schema_version: snapshot.schemaVersion,
|
|
5587
|
-
parser_version: snapshot.parserVersion,
|
|
5588
|
-
tables: Object.fromEntries(
|
|
5589
|
-
PARQUET_TABLES.map((table) => [
|
|
5590
|
-
table,
|
|
5591
|
-
{
|
|
5592
|
-
file: path12.basename(files[table]),
|
|
5593
|
-
rows: snapshot.counts[table]
|
|
5594
|
-
}
|
|
5595
|
-
])
|
|
5596
|
-
)
|
|
5597
|
-
};
|
|
5598
|
-
await writeFile4(manifestPath, `${JSON.stringify(manifest, null, 2)}
|
|
5599
|
-
`, "utf8");
|
|
5600
|
-
return { outDir, manifestPath, files, counts: snapshot.counts };
|
|
5601
|
-
}
|
|
5602
|
-
async function queryDuckDbParquet(options) {
|
|
5603
|
-
const parquetDir = path12.resolve(options.parquetDir);
|
|
5604
|
-
const connection = await createDuckDbConnection();
|
|
5605
|
-
try {
|
|
5606
|
-
for (const table of PARQUET_TABLES) {
|
|
5607
|
-
await connection.run(
|
|
5608
|
-
`CREATE OR REPLACE VIEW ${quoteIdentifier(table)} AS SELECT * FROM read_parquet(${sqlString(
|
|
5609
|
-
path12.join(parquetDir, `${table}.parquet`)
|
|
5610
|
-
)})`
|
|
5611
|
-
);
|
|
5612
|
-
}
|
|
5613
|
-
const reader = await connection.runAndReadAll(options.sql);
|
|
5614
|
-
return {
|
|
5615
|
-
columns: reader.deduplicatedColumnNames(),
|
|
5616
|
-
rows: reader.getRowObjectsJson()
|
|
5617
|
-
};
|
|
5618
|
-
} catch (error) {
|
|
5619
|
-
if (isMissingParquetError(error)) {
|
|
5620
|
-
throw new Error(
|
|
5621
|
-
`Parquet export not found in ${parquetDir}; run \`prosa export parquet --store <path>\` first`
|
|
5622
|
-
);
|
|
5623
|
-
}
|
|
5624
|
-
throw error;
|
|
5625
|
-
} finally {
|
|
5626
|
-
connection.closeSync();
|
|
6501
|
+
c.message_id,
|
|
6502
|
+
c.event_id,
|
|
6503
|
+
c.source_call_id,
|
|
6504
|
+
c.tool_name,
|
|
6505
|
+
c.canonical_tool_type,
|
|
6506
|
+
c.args_object_id,
|
|
6507
|
+
c.command,
|
|
6508
|
+
c.cwd,
|
|
6509
|
+
c.path,
|
|
6510
|
+
c.query,
|
|
6511
|
+
c.timestamp_start,
|
|
6512
|
+
c.status,
|
|
6513
|
+
c.raw_record_id
|
|
6514
|
+
);
|
|
5627
6515
|
}
|
|
5628
|
-
|
|
5629
|
-
|
|
5630
|
-
|
|
5631
|
-
|
|
5632
|
-
|
|
5633
|
-
|
|
5634
|
-
|
|
5635
|
-
|
|
5636
|
-
|
|
5637
|
-
|
|
5638
|
-
|
|
5639
|
-
|
|
6516
|
+
const insertResult = prepare(
|
|
6517
|
+
bundle.db,
|
|
6518
|
+
`INSERT OR REPLACE INTO tool_results (
|
|
6519
|
+
tool_result_id, tool_call_id, session_id, message_id, event_id,
|
|
6520
|
+
source_call_id, status, is_error, exit_code, duration_ms,
|
|
6521
|
+
stdout_object_id, stderr_object_id, output_object_id, preview, raw_record_id
|
|
6522
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, NULL, NULL, NULL, NULL, ?, ?, ?)`
|
|
6523
|
+
);
|
|
6524
|
+
for (const r of pending.toolResults) {
|
|
6525
|
+
insertResult.run(
|
|
6526
|
+
r.tool_result_id,
|
|
6527
|
+
r.tool_call_id,
|
|
6528
|
+
pending.session.session_id,
|
|
6529
|
+
r.message_id,
|
|
6530
|
+
r.event_id,
|
|
6531
|
+
r.source_call_id,
|
|
6532
|
+
r.status,
|
|
6533
|
+
r.is_error,
|
|
6534
|
+
r.output_object_id,
|
|
6535
|
+
r.preview,
|
|
6536
|
+
r.raw_record_id
|
|
5640
6537
|
);
|
|
5641
6538
|
}
|
|
5642
|
-
|
|
5643
|
-
|
|
5644
|
-
|
|
5645
|
-
|
|
5646
|
-
|
|
5647
|
-
|
|
5648
|
-
|
|
5649
|
-
|
|
5650
|
-
|
|
6539
|
+
const insertArtifact = prepare(
|
|
6540
|
+
bundle.db,
|
|
6541
|
+
`INSERT OR REPLACE INTO artifacts (
|
|
6542
|
+
artifact_id, session_id, project_id, source_tool, kind, path,
|
|
6543
|
+
logical_path, object_id, text_object_id, mime_type, size_bytes,
|
|
6544
|
+
created_ts, raw_record_id
|
|
6545
|
+
) VALUES (?, ?, ?, 'gemini', ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
6546
|
+
);
|
|
6547
|
+
for (const a of pending.artifacts) {
|
|
6548
|
+
insertArtifact.run(
|
|
6549
|
+
a.artifact_id,
|
|
6550
|
+
pending.session.session_id,
|
|
6551
|
+
pending.project?.project_id ?? null,
|
|
6552
|
+
a.kind,
|
|
6553
|
+
a.path,
|
|
6554
|
+
a.logical_path,
|
|
6555
|
+
a.object_id,
|
|
6556
|
+
a.text_object_id,
|
|
6557
|
+
a.mime_type,
|
|
6558
|
+
a.size_bytes,
|
|
6559
|
+
a.created_ts,
|
|
6560
|
+
a.raw_record_id
|
|
6561
|
+
);
|
|
6562
|
+
}
|
|
6563
|
+
const insertSearch = prepare(
|
|
6564
|
+
bundle.db,
|
|
6565
|
+
`INSERT OR REPLACE INTO search_docs (
|
|
6566
|
+
doc_id, entity_type, entity_id, session_id, project_id, timestamp,
|
|
6567
|
+
role, tool_name, canonical_tool_type, field_kind, text
|
|
6568
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
6569
|
+
);
|
|
6570
|
+
for (const d of pending.searchDocs) {
|
|
6571
|
+
insertSearch.run(
|
|
6572
|
+
d.doc_id,
|
|
6573
|
+
d.entity_type,
|
|
6574
|
+
d.entity_id,
|
|
6575
|
+
pending.session.session_id,
|
|
6576
|
+
pending.project?.project_id ?? null,
|
|
6577
|
+
d.timestamp,
|
|
6578
|
+
d.role,
|
|
6579
|
+
d.tool_name,
|
|
6580
|
+
d.canonical_tool_type,
|
|
6581
|
+
d.field_kind,
|
|
6582
|
+
d.text
|
|
5651
6583
|
);
|
|
5652
|
-
return {
|
|
5653
|
-
dbPath: bundle.paths.db,
|
|
5654
|
-
schemaVersion: bundle.manifest.schema_version,
|
|
5655
|
-
parserVersion: bundle.manifest.parser_version,
|
|
5656
|
-
defaultOutDir: bundle.paths.parquet,
|
|
5657
|
-
counts
|
|
5658
|
-
};
|
|
5659
|
-
} finally {
|
|
5660
|
-
closeBundle(bundle);
|
|
5661
6584
|
}
|
|
5662
|
-
}
|
|
5663
|
-
function quoteIdentifier(value) {
|
|
5664
|
-
return `"${value.replace(/"/g, '""')}"`;
|
|
5665
|
-
}
|
|
5666
|
-
function sqlString(value) {
|
|
5667
|
-
return `'${value.replace(/'/g, "''")}'`;
|
|
5668
|
-
}
|
|
5669
|
-
function isMissingParquetError(error) {
|
|
5670
|
-
const message = getErrorMessage(error);
|
|
5671
|
-
return /No files found|does not exist|not found/i.test(message) && /\.parquet/i.test(message);
|
|
5672
6585
|
}
|
|
5673
6586
|
|
|
5674
6587
|
// src/services/compile.ts
|
|
@@ -5678,28 +6591,28 @@ var COMPILE_PROVIDERS = [
|
|
|
5678
6591
|
name: "codex",
|
|
5679
6592
|
description: "Import Codex CLI session histories into the bundle.",
|
|
5680
6593
|
pathHelp: "root of Codex CLI sessions",
|
|
5681
|
-
defaultSessionsPath: () =>
|
|
6594
|
+
defaultSessionsPath: () => path16.join(os2.homedir(), ".codex", "sessions"),
|
|
5682
6595
|
compile: compileCodex
|
|
5683
6596
|
},
|
|
5684
6597
|
{
|
|
5685
6598
|
name: "claude",
|
|
5686
6599
|
description: "Import Claude Code project histories into the bundle.",
|
|
5687
6600
|
pathHelp: "root of Claude Code projects",
|
|
5688
|
-
defaultSessionsPath: () =>
|
|
6601
|
+
defaultSessionsPath: () => path16.join(os2.homedir(), ".claude", "projects"),
|
|
5689
6602
|
compile: compileClaude
|
|
5690
6603
|
},
|
|
5691
6604
|
{
|
|
5692
6605
|
name: "gemini",
|
|
5693
6606
|
description: "Import Gemini CLI session histories into the bundle.",
|
|
5694
6607
|
pathHelp: "root of Gemini CLI tmp dir",
|
|
5695
|
-
defaultSessionsPath: () =>
|
|
6608
|
+
defaultSessionsPath: () => path16.join(os2.homedir(), ".gemini", "tmp"),
|
|
5696
6609
|
compile: compileGemini
|
|
5697
6610
|
},
|
|
5698
6611
|
{
|
|
5699
6612
|
name: "cursor",
|
|
5700
6613
|
description: "Import Cursor agent stores into the bundle.",
|
|
5701
6614
|
pathHelp: "root of Cursor agent stores",
|
|
5702
|
-
defaultSessionsPath: () =>
|
|
6615
|
+
defaultSessionsPath: () => path16.join(os2.homedir(), ".cursor", "chats"),
|
|
5703
6616
|
compile: compileCursor
|
|
5704
6617
|
}
|
|
5705
6618
|
];
|
|
@@ -5712,20 +6625,20 @@ function getCompileProvider(source) {
|
|
|
5712
6625
|
}
|
|
5713
6626
|
function resolveCompilePath(p) {
|
|
5714
6627
|
if (p === "~") return os2.homedir();
|
|
5715
|
-
if (p.startsWith("~/")) return
|
|
5716
|
-
return
|
|
6628
|
+
if (p.startsWith("~/")) return path16.join(os2.homedir(), p.slice(2));
|
|
6629
|
+
return path16.resolve(p);
|
|
5717
6630
|
}
|
|
5718
6631
|
async function runCompileImports(options) {
|
|
5719
|
-
const { bundle, providers,
|
|
6632
|
+
const { bundle, providers, logger } = options;
|
|
6633
|
+
const overwrite = options.overwrite === true;
|
|
5720
6634
|
let importedAny = false;
|
|
5721
6635
|
const summaries = [];
|
|
5722
6636
|
let tantivy = null;
|
|
5723
6637
|
let tantivyError = null;
|
|
6638
|
+
let fts5Error = null;
|
|
5724
6639
|
try {
|
|
5725
|
-
|
|
5726
|
-
|
|
5727
|
-
disableFts5Triggers(bundle);
|
|
5728
|
-
}
|
|
6640
|
+
logger?.info("disabling FTS5 triggers for bulk rebuild");
|
|
6641
|
+
disableFts5Triggers(bundle);
|
|
5729
6642
|
for (const provider of providers) {
|
|
5730
6643
|
const sourcePath = resolveCompilePath(options.sessionsPath ?? provider.defaultSessionsPath());
|
|
5731
6644
|
const providerLogger = logger?.child({
|
|
@@ -5752,15 +6665,23 @@ async function runCompileImports(options) {
|
|
|
5752
6665
|
summaries.push(summary);
|
|
5753
6666
|
options.onProviderComplete?.(summary);
|
|
5754
6667
|
}
|
|
5755
|
-
|
|
5756
|
-
|
|
5757
|
-
|
|
5758
|
-
|
|
5759
|
-
|
|
5760
|
-
|
|
6668
|
+
const shouldRebuildIndexes = importedAny || overwrite;
|
|
6669
|
+
if (shouldRebuildIndexes) {
|
|
6670
|
+
logger?.info(
|
|
6671
|
+
{ changed: importedAny, overwrite },
|
|
6672
|
+
importedAny ? "marking indexes" : "overwrite forces rebuild despite no new imports"
|
|
6673
|
+
);
|
|
6674
|
+
markIndexesAfterImport(bundle, { changed: true });
|
|
6675
|
+
try {
|
|
6676
|
+
logger?.info("rebuilding fts5 index");
|
|
6677
|
+
rebuildFts5Index(bundle);
|
|
6678
|
+
} catch (error) {
|
|
6679
|
+
fts5Error = getErrorMessage(error);
|
|
6680
|
+
logger?.error({ err: error }, "fts5 rebuild failed; SQLite data is intact");
|
|
6681
|
+
}
|
|
5761
6682
|
try {
|
|
5762
|
-
logger?.info("rebuilding tantivy index");
|
|
5763
|
-
const status = await rebuildTantivyIndex(bundle);
|
|
6683
|
+
logger?.info({ overwrite }, "rebuilding tantivy index");
|
|
6684
|
+
const status = await rebuildTantivyIndex(bundle, { overwrite });
|
|
5764
6685
|
tantivy = { indexedDocCount: status.indexed_doc_count };
|
|
5765
6686
|
options.onTantivyComplete?.(tantivy);
|
|
5766
6687
|
} catch (error) {
|
|
@@ -5769,16 +6690,14 @@ async function runCompileImports(options) {
|
|
|
5769
6690
|
}
|
|
5770
6691
|
}
|
|
5771
6692
|
} finally {
|
|
5772
|
-
|
|
5773
|
-
logger?.info("re-enabling FTS5 triggers");
|
|
5774
|
-
enableFts5Triggers(bundle);
|
|
5775
|
-
}
|
|
6693
|
+
enableFts5Triggers(bundle);
|
|
5776
6694
|
}
|
|
5777
6695
|
return {
|
|
5778
6696
|
providers: summaries,
|
|
5779
6697
|
importedAny,
|
|
5780
6698
|
tantivy,
|
|
5781
|
-
tantivyError
|
|
6699
|
+
tantivyError,
|
|
6700
|
+
fts5Error
|
|
5782
6701
|
};
|
|
5783
6702
|
}
|
|
5784
6703
|
async function exportCompileParquet(options) {
|
|
@@ -5821,7 +6740,7 @@ function createCliLogger(options) {
|
|
|
5821
6740
|
// src/cli/commands/compile.ts
|
|
5822
6741
|
function compileCommand() {
|
|
5823
6742
|
const command = addCompileLogOptions(
|
|
5824
|
-
new
|
|
6743
|
+
new Command2("compile").description(
|
|
5825
6744
|
"Import session histories from one agent CLI into the bundle."
|
|
5826
6745
|
)
|
|
5827
6746
|
);
|
|
@@ -5834,27 +6753,35 @@ function compileCommand() {
|
|
|
5834
6753
|
return command;
|
|
5835
6754
|
}
|
|
5836
6755
|
function compileAllCommand() {
|
|
5837
|
-
return addCompileLogOptions(new
|
|
6756
|
+
return addCompileLogOptions(new Command2("compile-all")).description("Import all agent CLI session histories using default source paths.").option(
|
|
6757
|
+
"--overwrite",
|
|
6758
|
+
"force a full rebuild of derived indexes after import (Tantivy from scratch; FTS5 and Parquet are always full)",
|
|
6759
|
+
false
|
|
6760
|
+
).action(async (options) => {
|
|
5838
6761
|
await runCompiles({
|
|
5839
6762
|
providers: COMPILE_PROVIDERS,
|
|
5840
6763
|
storePath: defaultBundlePath(),
|
|
5841
|
-
|
|
6764
|
+
overwrite: options.overwrite,
|
|
5842
6765
|
logOptions: options
|
|
5843
6766
|
});
|
|
5844
6767
|
});
|
|
5845
6768
|
}
|
|
5846
6769
|
function providerCompileCommand(provider) {
|
|
5847
|
-
return addCompileLogOptions(new
|
|
6770
|
+
return addCompileLogOptions(new Command2(provider.name)).description(provider.description).option(
|
|
5848
6771
|
"--sessions-path <path>",
|
|
5849
6772
|
`${provider.pathHelp} (default: ${provider.defaultSessionsPath()})`,
|
|
5850
6773
|
provider.defaultSessionsPath()
|
|
5851
|
-
).option("--store <path>", "bundle directory", defaultBundlePath()).option(
|
|
6774
|
+
).option("--store <path>", "bundle directory", defaultBundlePath()).option(
|
|
6775
|
+
"--overwrite",
|
|
6776
|
+
"force a full rebuild of derived indexes after import (Tantivy from scratch; FTS5 and Parquet are always full)",
|
|
6777
|
+
false
|
|
6778
|
+
).action(
|
|
5852
6779
|
async (options, command) => {
|
|
5853
6780
|
await runCompiles({
|
|
5854
6781
|
providers: [provider],
|
|
5855
6782
|
storePath: options.store,
|
|
5856
|
-
deferIndex: options.deferIndex ?? false,
|
|
5857
6783
|
sessionsPath: options.sessionsPath,
|
|
6784
|
+
overwrite: options.overwrite,
|
|
5858
6785
|
logOptions: command.optsWithGlobals()
|
|
5859
6786
|
});
|
|
5860
6787
|
}
|
|
@@ -5873,8 +6800,8 @@ async function runCompiles(options) {
|
|
|
5873
6800
|
const result = await runCompileImports({
|
|
5874
6801
|
bundle,
|
|
5875
6802
|
providers: options.providers,
|
|
5876
|
-
deferIndex: options.deferIndex,
|
|
5877
6803
|
sessionsPath: options.sessionsPath,
|
|
6804
|
+
overwrite: options.overwrite,
|
|
5878
6805
|
logger,
|
|
5879
6806
|
onProviderComplete: printCounts,
|
|
5880
6807
|
onTantivyComplete: (status) => {
|
|
@@ -5887,7 +6814,8 @@ async function runCompiles(options) {
|
|
|
5887
6814
|
closeBundle(bundle);
|
|
5888
6815
|
logger.info({ store_path: storePath }, "bundle closed");
|
|
5889
6816
|
}
|
|
5890
|
-
|
|
6817
|
+
const shouldExportParquet = importedAny || options.overwrite === true;
|
|
6818
|
+
if (shouldExportParquet) {
|
|
5891
6819
|
try {
|
|
5892
6820
|
const result = await exportCompileParquet({ storePath, logger });
|
|
5893
6821
|
process.stdout.write(`parquet: wrote ${result.tableCount} tables to ${result.outDir}
|
|
@@ -5911,11 +6839,10 @@ function printCounts(summary) {
|
|
|
5911
6839
|
|
|
5912
6840
|
// src/cli/commands/export.ts
|
|
5913
6841
|
import { writeFile as writeFile6 } from "fs/promises";
|
|
5914
|
-
import
|
|
5915
|
-
import { Command as
|
|
6842
|
+
import path17 from "path";
|
|
6843
|
+
import { Command as Command3 } from "commander";
|
|
5916
6844
|
|
|
5917
6845
|
// src/services/export/markdown.ts
|
|
5918
|
-
init_cas();
|
|
5919
6846
|
async function exportSessionMarkdown(bundle, sessionId2) {
|
|
5920
6847
|
const session = bundle.db.prepare(
|
|
5921
6848
|
`SELECT session_id, source_tool, source_session_id, title, start_ts, end_ts,
|
|
@@ -6025,139 +6952,60 @@ function renderToolCall(c) {
|
|
|
6025
6952
|
return lines.join("\n");
|
|
6026
6953
|
}
|
|
6027
6954
|
|
|
6028
|
-
// src/cli/bundle.ts
|
|
6029
|
-
import path15 from "path";
|
|
6030
|
-
async function withBundle(storePath, fn) {
|
|
6031
|
-
const bundle = await openBundle(path15.resolve(storePath));
|
|
6032
|
-
try {
|
|
6033
|
-
return await fn(bundle);
|
|
6034
|
-
} finally {
|
|
6035
|
-
closeBundle(bundle);
|
|
6036
|
-
}
|
|
6037
|
-
}
|
|
6038
|
-
|
|
6039
6955
|
// src/cli/commands/export.ts
|
|
6040
6956
|
function exportCommand() {
|
|
6041
|
-
const session = new
|
|
6957
|
+
const session = new Command3("session").description("Export a single session to a human-readable format.").argument("<session-id>", "prosa session_id").requiredOption("--format <fmt>", 'currently only "markdown" is supported').option("--out <path>", "write to file instead of stdout").option("--store <path>", "bundle directory", defaultBundlePath()).action(async (sessionId2, options) => {
|
|
6042
6958
|
if (options.format !== "markdown") {
|
|
6043
6959
|
throw new Error(`unsupported format: ${options.format} (try --format markdown)`);
|
|
6044
6960
|
}
|
|
6045
6961
|
await withBundle(options.store, async (bundle) => {
|
|
6046
6962
|
const markdown = await exportSessionMarkdown(bundle, sessionId2);
|
|
6047
6963
|
if (options.out) {
|
|
6048
|
-
await writeFile6(
|
|
6049
|
-
process.stdout.write(`wrote ${
|
|
6964
|
+
await writeFile6(path17.resolve(options.out), markdown, "utf8");
|
|
6965
|
+
process.stdout.write(`wrote ${path17.resolve(options.out)}
|
|
6050
6966
|
`);
|
|
6051
6967
|
} else {
|
|
6052
6968
|
process.stdout.write(markdown);
|
|
6053
6969
|
}
|
|
6054
6970
|
});
|
|
6055
6971
|
});
|
|
6056
|
-
const parquet = new
|
|
6972
|
+
const parquet = new Command3("parquet").description("Export canonical tables to derived Parquet files for analytics.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--out <path>", "output directory (default: <store>/parquet)").action(async (options) => {
|
|
6057
6973
|
const result = await exportBundleParquet({
|
|
6058
|
-
bundlePath:
|
|
6059
|
-
outDir: options.out ?
|
|
6974
|
+
bundlePath: path17.resolve(options.store),
|
|
6975
|
+
outDir: options.out ? path17.resolve(options.out) : void 0
|
|
6060
6976
|
});
|
|
6061
6977
|
process.stdout.write(`wrote parquet export to ${result.outDir}
|
|
6062
6978
|
`);
|
|
6063
6979
|
process.stdout.write(`manifest=${result.manifestPath}
|
|
6064
6980
|
`);
|
|
6065
6981
|
});
|
|
6066
|
-
return new
|
|
6982
|
+
return new Command3("export").description("Export sessions / search excerpts to readable formats.").addCommand(session).addCommand(parquet);
|
|
6067
6983
|
}
|
|
6068
6984
|
|
|
6069
6985
|
// src/cli/commands/index.ts
|
|
6070
|
-
import { Command as
|
|
6986
|
+
import { Command as Command4 } from "commander";
|
|
6071
6987
|
init_indexing();
|
|
6072
|
-
|
|
6073
|
-
// src/cli/output.ts
|
|
6074
|
-
var OUTPUT_FORMATS = ["interactive", "table", "json", "csv"];
|
|
6075
|
-
var COL_SEPARATOR = " ";
|
|
6076
|
-
var RULE_CHAR = "-";
|
|
6077
|
-
function parseOutputFormat(value, fallback) {
|
|
6078
|
-
if (value === void 0) return fallback;
|
|
6079
|
-
if (OUTPUT_FORMATS.includes(value)) return value;
|
|
6080
|
-
throw new Error(
|
|
6081
|
-
`invalid --output-format: ${value} (expected one of ${OUTPUT_FORMATS.join(", ")})`
|
|
6082
|
-
);
|
|
6083
|
-
}
|
|
6084
|
-
function printRows(rows, opts) {
|
|
6085
|
-
switch (opts.format) {
|
|
6086
|
-
case "json":
|
|
6087
|
-
printJson(rows, opts);
|
|
6088
|
-
return;
|
|
6089
|
-
case "csv":
|
|
6090
|
-
printCsv(rows, opts);
|
|
6091
|
-
return;
|
|
6092
|
-
case "table":
|
|
6093
|
-
case "interactive":
|
|
6094
|
-
printTable(rows, opts);
|
|
6095
|
-
return;
|
|
6096
|
-
}
|
|
6097
|
-
}
|
|
6098
|
-
function printJson(rows, opts) {
|
|
6099
|
-
const out = opts.meta ? { ...opts.meta, rows } : rows;
|
|
6100
|
-
process.stdout.write(`${JSON.stringify(out, null, 2)}
|
|
6101
|
-
`);
|
|
6102
|
-
}
|
|
6103
|
-
function printCsv(rows, opts) {
|
|
6104
|
-
const columns = opts.columns;
|
|
6105
|
-
process.stdout.write(`${columns.map(csvField).join(",")}
|
|
6106
|
-
`);
|
|
6107
|
-
for (const row of rows) {
|
|
6108
|
-
const record = row;
|
|
6109
|
-
const line = columns.map((column) => csvField(formatCell(record[column]))).join(",");
|
|
6110
|
-
process.stdout.write(`${line}
|
|
6111
|
-
`);
|
|
6112
|
-
}
|
|
6113
|
-
}
|
|
6114
|
-
function csvField(value) {
|
|
6115
|
-
if (/[",\n]/.test(value)) return `"${value.replace(/"/g, '""')}"`;
|
|
6116
|
-
return value;
|
|
6117
|
-
}
|
|
6118
|
-
function printTable(rows, opts) {
|
|
6119
|
-
const columns = opts.columns;
|
|
6120
|
-
const widths = columns.map((column) => column.length);
|
|
6121
|
-
const cells = rows.map((row) => {
|
|
6122
|
-
const record = row;
|
|
6123
|
-
return columns.map((column, index) => {
|
|
6124
|
-
const text = formatCell(record[column]);
|
|
6125
|
-
const width = widths[index] ?? 0;
|
|
6126
|
-
if (text.length > width) widths[index] = text.length;
|
|
6127
|
-
return text;
|
|
6128
|
-
});
|
|
6129
|
-
});
|
|
6130
|
-
const header = columns.map((column, index) => column.padEnd(widths[index] ?? 0)).join(COL_SEPARATOR);
|
|
6131
|
-
const rule = columns.map((_, index) => RULE_CHAR.repeat(widths[index] ?? 0)).join(COL_SEPARATOR);
|
|
6132
|
-
process.stdout.write(`${header}
|
|
6133
|
-
${rule}
|
|
6134
|
-
`);
|
|
6135
|
-
for (const cellRow of cells) {
|
|
6136
|
-
const line = cellRow.map((cell, index) => cell.padEnd(widths[index] ?? 0)).join(COL_SEPARATOR);
|
|
6137
|
-
process.stdout.write(`${line}
|
|
6138
|
-
`);
|
|
6139
|
-
}
|
|
6140
|
-
}
|
|
6141
|
-
function formatCell(value) {
|
|
6142
|
-
if (value == null) return "";
|
|
6143
|
-
if (typeof value === "string") return value;
|
|
6144
|
-
if (typeof value === "number" || typeof value === "boolean") return String(value);
|
|
6145
|
-
return JSON.stringify(value);
|
|
6146
|
-
}
|
|
6147
|
-
|
|
6148
|
-
// src/cli/commands/index.ts
|
|
6149
6988
|
function indexCommand() {
|
|
6150
|
-
const fts5 = new
|
|
6989
|
+
const fts5 = new Command4("fts5").description("Rebuild the SQLite FTS5 index from search_docs.").option("--store <path>", "bundle directory", defaultBundlePath()).option(
|
|
6990
|
+
"--overwrite",
|
|
6991
|
+
"rebuild from scratch (FTS5 always overwrites; flag accepted for parity with other index commands)",
|
|
6992
|
+
false
|
|
6993
|
+
).action(async (options) => {
|
|
6151
6994
|
await withBundle(options.store, (bundle) => {
|
|
6995
|
+
void options.overwrite;
|
|
6152
6996
|
printIndexStatus(rebuildFts5Index(bundle));
|
|
6153
6997
|
});
|
|
6154
6998
|
});
|
|
6155
|
-
const tantivy = new
|
|
6999
|
+
const tantivy = new Command4("tantivy").description("Rebuild the Tantivy sidecar index from search_docs.").option("--store <path>", "bundle directory", defaultBundlePath()).option(
|
|
7000
|
+
"--overwrite",
|
|
7001
|
+
"force a full re-index instead of the default incremental rebuild",
|
|
7002
|
+
false
|
|
7003
|
+
).action(async (options) => {
|
|
6156
7004
|
await withBundle(options.store, async (bundle) => {
|
|
6157
|
-
printIndexStatus(await rebuildTantivyIndex(bundle));
|
|
7005
|
+
printIndexStatus(await rebuildTantivyIndex(bundle, { overwrite: options.overwrite }));
|
|
6158
7006
|
});
|
|
6159
7007
|
});
|
|
6160
|
-
const status = new
|
|
7008
|
+
const status = new Command4("status").description("Show derived search index status.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--output-format <fmt>", "interactive|table|json|csv", "table").action(async (options) => {
|
|
6161
7009
|
const format = parseOutputFormat(options.outputFormat, "table");
|
|
6162
7010
|
await withBundle(options.store, (bundle) => {
|
|
6163
7011
|
const rows = getSearchIndexStatuses(bundle);
|
|
@@ -6174,7 +7022,7 @@ function indexCommand() {
|
|
|
6174
7022
|
});
|
|
6175
7023
|
});
|
|
6176
7024
|
});
|
|
6177
|
-
return new
|
|
7025
|
+
return new Command4("index").description("Build or inspect derived search indexes.").addCommand(fts5).addCommand(tantivy).addCommand(status);
|
|
6178
7026
|
}
|
|
6179
7027
|
function printIndexStatus(status) {
|
|
6180
7028
|
process.stdout.write(
|
|
@@ -6186,11 +7034,11 @@ function printIndexStatus(status) {
|
|
|
6186
7034
|
|
|
6187
7035
|
// src/cli/commands/init.ts
|
|
6188
7036
|
import { stat as stat3 } from "fs/promises";
|
|
6189
|
-
import
|
|
6190
|
-
import { Command as
|
|
7037
|
+
import path18 from "path";
|
|
7038
|
+
import { Command as Command5 } from "commander";
|
|
6191
7039
|
function initCommand() {
|
|
6192
|
-
return new
|
|
6193
|
-
const resolved =
|
|
7040
|
+
return new Command5("init").description("Initialize a new prosa bundle (SQLite + manifest + objects/).").option("--store <path>", "bundle directory", defaultBundlePath()).option("--force-existing", "open instead of failing if a manifest exists", false).action(async (options) => {
|
|
7041
|
+
const resolved = path18.resolve(options.store);
|
|
6194
7042
|
const exists2 = await stat3(`${resolved}/manifest.json`).then(() => true).catch(() => false);
|
|
6195
7043
|
if (exists2) {
|
|
6196
7044
|
if (!options.forceExisting) {
|
|
@@ -6215,8 +7063,8 @@ use --force-existing to skip without erroring
|
|
|
6215
7063
|
}
|
|
6216
7064
|
|
|
6217
7065
|
// src/cli/commands/mcp.ts
|
|
6218
|
-
import
|
|
6219
|
-
import { Command as
|
|
7066
|
+
import path19 from "path";
|
|
7067
|
+
import { Command as Command6 } from "commander";
|
|
6220
7068
|
|
|
6221
7069
|
// src/mcp/server.ts
|
|
6222
7070
|
init_errors();
|
|
@@ -6228,213 +7076,222 @@ import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/
|
|
|
6228
7076
|
|
|
6229
7077
|
// src/mcp/guidance.ts
|
|
6230
7078
|
var PROSA_MCP_INSTRUCTIONS = `
|
|
6231
|
-
prosa is a local memory over local agent session histories. Use it to
|
|
6232
|
-
|
|
6233
|
-
memory.
|
|
7079
|
+
prosa is a local memory over local agent session histories. Use it to find prior work, commands,
|
|
7080
|
+
decisions, file touches, transcripts, and analytical rollups before answering from memory.
|
|
6234
7081
|
|
|
6235
|
-
|
|
6236
|
-
-
|
|
6237
|
-
|
|
6238
|
-
-
|
|
6239
|
-
|
|
6240
|
-
|
|
6241
|
-
-
|
|
6242
|
-
|
|
6243
|
-
-
|
|
6244
|
-
-
|
|
7082
|
+
There are six tools:
|
|
7083
|
+
- search: full-text over messages, commands, paths, diffs, and previews. Start here for open-ended
|
|
7084
|
+
questions with 2-5 concrete terms. Optional engine, field_kind, raw, since/until filters.
|
|
7085
|
+
- sessions: without session_id, lists candidates filtered by source/time/limit. With session_id,
|
|
7086
|
+
opens the session: format=detail (default) returns metadata + timeline, format=summary returns
|
|
7087
|
+
only the session row, format=markdown renders the full transcript.
|
|
7088
|
+
- tool_calls: audit commands and tool usage. Filters by tool_name, canonical_type, session_id,
|
|
7089
|
+
errors_only. When path_substring is set, also returns artifacts touching that path \u2014 use this for
|
|
7090
|
+
file-history questions.
|
|
7091
|
+
- analytics: built-in aggregate reports backed by SQLite views. Pick report=sessions|tools|errors|
|
|
7092
|
+
models|projects with the matching filters. Use report=sessions with session_id or
|
|
7093
|
+
source_path_substring for per-session metrics.
|
|
7094
|
+
- artifact: fetch full text for an artifact_id when previews are not enough. Binary artifacts return
|
|
7095
|
+
a placeholder.
|
|
7096
|
+
- compile: with no input, returns a status snapshot (search index health). With source (and
|
|
7097
|
+
optionally sessions_path), imports that provider into the bundle. Use status mode when search
|
|
7098
|
+
results look stale; use import mode when local sessions may not be indexed yet.
|
|
6245
7099
|
|
|
6246
|
-
When answering, cite concrete evidence: session_id, timestamp, tool/file path, and the relevant
|
|
6247
|
-
or event. Do not treat search snippets as the whole truth; open the session
|
|
7100
|
+
When answering, cite concrete evidence: session_id, timestamp, tool/file path, and the relevant
|
|
7101
|
+
snippet or event. Do not treat search snippets as the whole truth; open the session with
|
|
7102
|
+
\`sessions session_id=\u2026 format=detail\` when accuracy matters.
|
|
6248
7103
|
`.trim();
|
|
6249
7104
|
var INVESTIGATE_PRIOR_WORK_PROMPT = `
|
|
6250
7105
|
Investigate prior work in prosa for the topic: {{topic}}
|
|
6251
7106
|
|
|
6252
7107
|
Use this workflow:
|
|
6253
|
-
1. Call
|
|
7108
|
+
1. Call \`search\` with a short query built from the topic.
|
|
6254
7109
|
2. If results are broad, search again with narrower terms from the best snippets.
|
|
6255
|
-
3. Open the most relevant session_ids with
|
|
6256
|
-
4. Use
|
|
7110
|
+
3. Open the most relevant session_ids with \`sessions session_id=\u2026 format=detail\`.
|
|
7111
|
+
4. Use \`sessions session_id=\u2026 format=markdown\` only for sessions that appear directly relevant.
|
|
6257
7112
|
5. Answer with evidence: session_id, timestamp, and the decisive snippet or event.
|
|
6258
7113
|
`.trim();
|
|
6259
7114
|
var FIND_FILE_HISTORY_PROMPT = `
|
|
6260
7115
|
Investigate history for file/path: {{path}}
|
|
6261
7116
|
|
|
6262
7117
|
Use this workflow:
|
|
6263
|
-
1. Call
|
|
6264
|
-
2. Open returned session_ids with
|
|
6265
|
-
3.
|
|
6266
|
-
4. Use
|
|
7118
|
+
1. Call \`tool_calls\` with path_substring set to the path or its most distinctive suffix.
|
|
7119
|
+
2. Open returned session_ids with \`sessions session_id=\u2026 format=detail\`.
|
|
7120
|
+
3. Call \`tool_calls\` with session_id when you need command-level detail inside one session.
|
|
7121
|
+
4. Use \`sessions session_id=\u2026 format=markdown\` only for the most relevant session.
|
|
6267
7122
|
5. Summarize what changed, who/what tool touched it, and cite session_id plus timestamp.
|
|
6268
7123
|
`.trim();
|
|
6269
7124
|
var AUDIT_TOOL_FAILURES_PROMPT = `
|
|
6270
7125
|
Audit tool failures in prosa{{query_clause}}.
|
|
6271
7126
|
|
|
6272
7127
|
Use this workflow:
|
|
6273
|
-
1.
|
|
6274
|
-
|
|
6275
|
-
|
|
6276
|
-
|
|
6277
|
-
|
|
7128
|
+
1. For an aggregate report, call \`analytics report=errors\` (filter by source/since/until/tool_name
|
|
7129
|
+
as needed).
|
|
7130
|
+
2. For per-call evidence, call \`tool_calls\` with errors_only=true.
|
|
7131
|
+
3. If a query is provided, also call \`search\` for that query to find related context.
|
|
7132
|
+
4. Open relevant session_ids with \`sessions session_id=\u2026 format=detail\`.
|
|
7133
|
+
5. Group failures by tool_name, command/path, and likely cause.
|
|
7134
|
+
6. Answer with evidence: session_id, timestamp, command/path, exit code, and preview.
|
|
6278
7135
|
`.trim();
|
|
6279
7136
|
|
|
6280
7137
|
// src/mcp/tools.ts
|
|
6281
7138
|
import { z } from "zod";
|
|
6282
|
-
|
|
6283
|
-
// src/core/domain/types.ts
|
|
6284
|
-
var SOURCE_TOOLS = ["cursor", "codex", "claude", "gemini"];
|
|
6285
|
-
|
|
6286
|
-
// src/mcp/tools.ts
|
|
6287
7139
|
init_errors();
|
|
6288
|
-
init_limits();
|
|
6289
7140
|
init_indexing();
|
|
6290
7141
|
init_search();
|
|
6291
7142
|
init_sessions();
|
|
7143
|
+
|
|
7144
|
+
// src/services/tool_calls.ts
|
|
7145
|
+
init_limits();
|
|
7146
|
+
function listToolCalls(bundle, filters = {}) {
|
|
7147
|
+
const conds = [];
|
|
7148
|
+
const params = [];
|
|
7149
|
+
if (filters.toolName) {
|
|
7150
|
+
conds.push("tc.tool_name = ?");
|
|
7151
|
+
params.push(filters.toolName);
|
|
7152
|
+
}
|
|
7153
|
+
if (filters.canonicalType) {
|
|
7154
|
+
conds.push("tc.canonical_tool_type = ?");
|
|
7155
|
+
params.push(filters.canonicalType);
|
|
7156
|
+
}
|
|
7157
|
+
if (filters.sessionId) {
|
|
7158
|
+
conds.push("tc.session_id = ?");
|
|
7159
|
+
params.push(filters.sessionId);
|
|
7160
|
+
}
|
|
7161
|
+
if (filters.errorsOnly) {
|
|
7162
|
+
conds.push("(tr.is_error = 1 OR tc.status = ?)");
|
|
7163
|
+
params.push("error");
|
|
7164
|
+
}
|
|
7165
|
+
if (filters.pathSubstring) {
|
|
7166
|
+
conds.push("tc.path IS NOT NULL AND tc.path LIKE ?");
|
|
7167
|
+
params.push(`%${filters.pathSubstring}%`);
|
|
7168
|
+
}
|
|
7169
|
+
if (filters.sinceIso) {
|
|
7170
|
+
conds.push("(tc.timestamp_start IS NULL OR tc.timestamp_start >= ?)");
|
|
7171
|
+
params.push(filters.sinceIso);
|
|
7172
|
+
}
|
|
7173
|
+
if (filters.untilIso) {
|
|
7174
|
+
conds.push("(tc.timestamp_start IS NULL OR tc.timestamp_start < ?)");
|
|
7175
|
+
params.push(filters.untilIso);
|
|
7176
|
+
}
|
|
7177
|
+
const where = conds.length ? `WHERE ${conds.join(" AND ")}` : "";
|
|
7178
|
+
const limit2 = clampLimit(filters.limit, { max: 500, fallback: 100 });
|
|
7179
|
+
const toolCallSql = `
|
|
7180
|
+
SELECT 'tool_call' AS entity_type,
|
|
7181
|
+
tc.session_id,
|
|
7182
|
+
tc.tool_call_id,
|
|
7183
|
+
NULL AS artifact_id,
|
|
7184
|
+
tc.tool_name,
|
|
7185
|
+
tc.canonical_tool_type,
|
|
7186
|
+
tc.command,
|
|
7187
|
+
tc.path,
|
|
7188
|
+
tc.status,
|
|
7189
|
+
tc.timestamp_start,
|
|
7190
|
+
tr.is_error,
|
|
7191
|
+
tr.exit_code,
|
|
7192
|
+
tr.preview
|
|
7193
|
+
FROM tool_calls tc
|
|
7194
|
+
LEFT JOIN tool_results tr ON tr.tool_call_id = tc.tool_call_id
|
|
7195
|
+
${where}
|
|
7196
|
+
`;
|
|
7197
|
+
if (!filters.pathSubstring) {
|
|
7198
|
+
const sql2 = `${toolCallSql} ORDER BY tc.timestamp_start DESC LIMIT ${limit2}`;
|
|
7199
|
+
return bundle.db.prepare(sql2).all(...params);
|
|
7200
|
+
}
|
|
7201
|
+
const artifactSql = `
|
|
7202
|
+
SELECT 'artifact' AS entity_type,
|
|
7203
|
+
a.session_id,
|
|
7204
|
+
NULL AS tool_call_id,
|
|
7205
|
+
a.artifact_id,
|
|
7206
|
+
NULL AS tool_name,
|
|
7207
|
+
NULL AS canonical_tool_type,
|
|
7208
|
+
NULL AS command,
|
|
7209
|
+
a.path,
|
|
7210
|
+
NULL AS status,
|
|
7211
|
+
a.created_ts AS timestamp_start,
|
|
7212
|
+
NULL AS is_error,
|
|
7213
|
+
NULL AS exit_code,
|
|
7214
|
+
NULL AS preview
|
|
7215
|
+
FROM artifacts a
|
|
7216
|
+
WHERE a.path IS NOT NULL AND a.path LIKE ?
|
|
7217
|
+
`;
|
|
7218
|
+
const sql = `
|
|
7219
|
+
${toolCallSql}
|
|
7220
|
+
UNION ALL
|
|
7221
|
+
${artifactSql}
|
|
7222
|
+
ORDER BY timestamp_start DESC
|
|
7223
|
+
LIMIT ${limit2}
|
|
7224
|
+
`;
|
|
7225
|
+
return bundle.db.prepare(sql).all(...params, `%${filters.pathSubstring}%`);
|
|
7226
|
+
}
|
|
7227
|
+
|
|
7228
|
+
// src/mcp/tools.ts
|
|
7229
|
+
var CANONICAL_TOOL_TYPES = [
|
|
7230
|
+
"shell",
|
|
7231
|
+
"read_file",
|
|
7232
|
+
"write_file",
|
|
7233
|
+
"edit_file",
|
|
7234
|
+
"search_file",
|
|
7235
|
+
"web_search",
|
|
7236
|
+
"mcp",
|
|
7237
|
+
"subagent",
|
|
7238
|
+
"patch",
|
|
7239
|
+
"other"
|
|
7240
|
+
];
|
|
7241
|
+
var FIELD_KINDS = [
|
|
7242
|
+
"message_text",
|
|
7243
|
+
"user_prompt",
|
|
7244
|
+
"assistant_text",
|
|
7245
|
+
"command",
|
|
7246
|
+
"command_output_preview",
|
|
7247
|
+
"error",
|
|
7248
|
+
"file_path",
|
|
7249
|
+
"diff",
|
|
7250
|
+
"summary",
|
|
7251
|
+
"artifact_text",
|
|
7252
|
+
"tool_args",
|
|
7253
|
+
"tool_result"
|
|
7254
|
+
];
|
|
6292
7255
|
function registerProsaTools(server, bundle, options = {}) {
|
|
6293
7256
|
const searchEngine = options.searchEngine ?? "fts5";
|
|
6294
7257
|
const storePath = options.storePath ?? bundle.path;
|
|
6295
7258
|
const ensureStore = options.ensureStore ?? false;
|
|
6296
7259
|
registerProsaPrompts(server);
|
|
6297
7260
|
server.registerTool(
|
|
6298
|
-
"
|
|
6299
|
-
{
|
|
6300
|
-
title: "Compile sessions",
|
|
6301
|
-
description: "Import local agent session histories into the active prosa bundle. With no input, compiles all providers from default paths. With source, compiles that provider; sessions_path may override that provider path.",
|
|
6302
|
-
inputSchema: {
|
|
6303
|
-
source: z.enum(SOURCE_TOOLS).optional(),
|
|
6304
|
-
sessions_path: z.string().min(1).optional()
|
|
6305
|
-
},
|
|
6306
|
-
annotations: { readOnlyHint: false, destructiveHint: false, idempotentHint: true }
|
|
6307
|
-
},
|
|
6308
|
-
async ({ source, sessions_path }) => withToolBundle(bundle, storePath, ensureStore, async (activeBundle) => {
|
|
6309
|
-
if (sessions_path && !source) {
|
|
6310
|
-
return {
|
|
6311
|
-
content: [
|
|
6312
|
-
{
|
|
6313
|
-
type: "text",
|
|
6314
|
-
text: "sessions_path requires source because providers use incompatible source layouts"
|
|
6315
|
-
}
|
|
6316
|
-
],
|
|
6317
|
-
isError: true
|
|
6318
|
-
};
|
|
6319
|
-
}
|
|
6320
|
-
try {
|
|
6321
|
-
const result = await runCompileImports({
|
|
6322
|
-
bundle: activeBundle,
|
|
6323
|
-
providers: source ? [getCompileProvider(source)] : COMPILE_PROVIDERS,
|
|
6324
|
-
deferIndex: false,
|
|
6325
|
-
sessionsPath: sessions_path
|
|
6326
|
-
});
|
|
6327
|
-
const parquet = result.importedAny ? await exportCompileParquet({ storePath }) : null;
|
|
6328
|
-
return {
|
|
6329
|
-
content: [
|
|
6330
|
-
{
|
|
6331
|
-
type: "text",
|
|
6332
|
-
text: JSON.stringify(
|
|
6333
|
-
{
|
|
6334
|
-
providers: result.providers.map((provider) => ({
|
|
6335
|
-
source: provider.source,
|
|
6336
|
-
source_path: provider.sourcePath,
|
|
6337
|
-
batch_id: provider.batchId,
|
|
6338
|
-
counts: provider.counts
|
|
6339
|
-
})),
|
|
6340
|
-
imported_any: result.importedAny,
|
|
6341
|
-
tantivy: result.tantivy ? { indexed_doc_count: result.tantivy.indexedDocCount } : null,
|
|
6342
|
-
tantivy_error: result.tantivyError,
|
|
6343
|
-
parquet: parquet ? {
|
|
6344
|
-
out_dir: parquet.outDir,
|
|
6345
|
-
manifest_path: parquet.manifestPath,
|
|
6346
|
-
table_count: parquet.tableCount,
|
|
6347
|
-
files: parquet.files,
|
|
6348
|
-
counts: parquet.counts
|
|
6349
|
-
} : null
|
|
6350
|
-
},
|
|
6351
|
-
null,
|
|
6352
|
-
2
|
|
6353
|
-
)
|
|
6354
|
-
}
|
|
6355
|
-
]
|
|
6356
|
-
};
|
|
6357
|
-
} catch (error) {
|
|
6358
|
-
return {
|
|
6359
|
-
content: [{ type: "text", text: getErrorMessage(error) }],
|
|
6360
|
-
isError: true
|
|
6361
|
-
};
|
|
6362
|
-
}
|
|
6363
|
-
})
|
|
6364
|
-
);
|
|
6365
|
-
server.registerTool(
|
|
6366
|
-
"list_sessions",
|
|
6367
|
-
{
|
|
6368
|
-
title: "List sessions",
|
|
6369
|
-
description: "List recent sessions when you need candidates by source/date before deeper inspection. Next step: call get_session for relevant session_id values.",
|
|
6370
|
-
inputSchema: {
|
|
6371
|
-
source: z.enum(SOURCE_TOOLS).optional(),
|
|
6372
|
-
since: z.string().optional().describe("ISO timestamp lower bound (inclusive)"),
|
|
6373
|
-
until: z.string().optional().describe("ISO timestamp upper bound (exclusive)"),
|
|
6374
|
-
limit: z.number().int().min(1).max(500).optional().default(50)
|
|
6375
|
-
},
|
|
6376
|
-
annotations: { readOnlyHint: true, idempotentHint: true }
|
|
6377
|
-
},
|
|
6378
|
-
async (input) => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
|
|
6379
|
-
const rows = listSessions(activeBundle, {
|
|
6380
|
-
sourceTool: input.source,
|
|
6381
|
-
sinceIso: input.since,
|
|
6382
|
-
untilIso: input.until,
|
|
6383
|
-
limit: input.limit ?? 50
|
|
6384
|
-
});
|
|
6385
|
-
return {
|
|
6386
|
-
content: [{ type: "text", text: JSON.stringify(rows, null, 2) }]
|
|
6387
|
-
};
|
|
6388
|
-
})
|
|
6389
|
-
);
|
|
6390
|
-
server.registerTool(
|
|
6391
|
-
"get_session",
|
|
6392
|
-
{
|
|
6393
|
-
title: "Get session detail",
|
|
6394
|
-
description: "Open one session and return metadata plus timeline events. Use this after search_sessions, list_sessions, find_touched_files, or list_tool_calls before making evidence-backed claims.",
|
|
6395
|
-
inputSchema: {
|
|
6396
|
-
session_id: z.string().min(1)
|
|
6397
|
-
},
|
|
6398
|
-
annotations: { readOnlyHint: true, idempotentHint: true }
|
|
6399
|
-
},
|
|
6400
|
-
async ({ session_id }) => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
|
|
6401
|
-
const detail = getSession(activeBundle, session_id);
|
|
6402
|
-
if (!detail) {
|
|
6403
|
-
return {
|
|
6404
|
-
content: [{ type: "text", text: `session not found: ${session_id}` }],
|
|
6405
|
-
isError: true
|
|
6406
|
-
};
|
|
6407
|
-
}
|
|
6408
|
-
return {
|
|
6409
|
-
content: [{ type: "text", text: JSON.stringify(detail, null, 2) }]
|
|
6410
|
-
};
|
|
6411
|
-
})
|
|
6412
|
-
);
|
|
6413
|
-
server.registerTool(
|
|
6414
|
-
"search_sessions",
|
|
7261
|
+
"search",
|
|
6415
7262
|
{
|
|
6416
7263
|
title: "Full-text search",
|
|
6417
|
-
description: `Search messages, commands, paths, and result previews using the server-selected ${searchEngine} engine. Start here for open-ended questions with 2-5 concrete terms
|
|
7264
|
+
description: `Search messages, commands, paths, diffs, and result previews using the server-selected ${searchEngine} engine. Start here for open-ended questions with 2-5 concrete terms; then call \`sessions\` for relevant hits.`,
|
|
6418
7265
|
inputSchema: {
|
|
6419
7266
|
query: z.string().min(1),
|
|
7267
|
+
engine: z.enum(["fts5", "tantivy"]).optional(),
|
|
7268
|
+
field_kind: z.enum(FIELD_KINDS).optional(),
|
|
6420
7269
|
limit: z.number().int().min(1).max(500).optional().default(50),
|
|
6421
|
-
raw: z.boolean().optional().default(false)
|
|
7270
|
+
raw: z.boolean().optional().default(false).describe("Pass query straight to FTS5 MATCH (allows OR/NEAR/prefixes).")
|
|
6422
7271
|
},
|
|
6423
7272
|
annotations: { readOnlyHint: true, idempotentHint: true }
|
|
6424
7273
|
},
|
|
6425
|
-
async ({ query, limit, raw }) => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
|
|
7274
|
+
async ({ query, engine, field_kind, limit: limit2, raw }) => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
|
|
7275
|
+
const selectedEngine = engine ?? searchEngine;
|
|
6426
7276
|
const hits = searchFullText(activeBundle, {
|
|
6427
7277
|
query,
|
|
6428
|
-
limit:
|
|
7278
|
+
limit: limit2 ?? 50,
|
|
6429
7279
|
raw,
|
|
6430
|
-
engine:
|
|
7280
|
+
engine: selectedEngine
|
|
6431
7281
|
});
|
|
7282
|
+
const filtered = field_kind ? hits.filter((hit) => hit.field_kind === field_kind) : hits;
|
|
6432
7283
|
return {
|
|
6433
7284
|
content: [
|
|
6434
7285
|
{
|
|
6435
7286
|
type: "text",
|
|
6436
7287
|
text: JSON.stringify(
|
|
6437
|
-
{
|
|
7288
|
+
{
|
|
7289
|
+
query,
|
|
7290
|
+
engine: selectedEngine,
|
|
7291
|
+
field_kind: field_kind ?? null,
|
|
7292
|
+
count: filtered.length,
|
|
7293
|
+
hits: filtered
|
|
7294
|
+
},
|
|
6438
7295
|
null,
|
|
6439
7296
|
2
|
|
6440
7297
|
)
|
|
@@ -6444,125 +7301,157 @@ function registerProsaTools(server, bundle, options = {}) {
|
|
|
6444
7301
|
})
|
|
6445
7302
|
);
|
|
6446
7303
|
server.registerTool(
|
|
6447
|
-
"
|
|
7304
|
+
"sessions",
|
|
6448
7305
|
{
|
|
6449
|
-
title: "
|
|
6450
|
-
description: "
|
|
7306
|
+
title: "List or open sessions",
|
|
7307
|
+
description: "Without `session_id`, lists sessions filtered by source/time/limit. With `session_id`, opens that session: `format=detail` (default) returns metadata plus timeline events; `format=summary` returns only the session row; `format=markdown` renders the readable transcript. Call after `search` to get evidence behind a hit.",
|
|
6451
7308
|
inputSchema: {
|
|
6452
|
-
session_id: z.string().min(1)
|
|
7309
|
+
session_id: z.string().min(1).optional(),
|
|
7310
|
+
format: z.enum(["summary", "detail", "markdown"]).optional().default("detail"),
|
|
7311
|
+
source: z.enum(SOURCE_TOOLS).optional(),
|
|
7312
|
+
since: z.string().optional().describe("ISO timestamp lower bound (inclusive)"),
|
|
7313
|
+
until: z.string().optional().describe("ISO timestamp upper bound (exclusive)"),
|
|
7314
|
+
limit: z.number().int().min(1).max(500).optional().default(50)
|
|
6453
7315
|
},
|
|
6454
7316
|
annotations: { readOnlyHint: true, idempotentHint: true }
|
|
6455
7317
|
},
|
|
6456
|
-
async ({ session_id }) => withToolBundle(bundle, storePath, ensureStore, async (activeBundle) => {
|
|
6457
|
-
|
|
6458
|
-
const
|
|
6459
|
-
|
|
6460
|
-
|
|
7318
|
+
async ({ session_id, format, source, since, until, limit: limit2 }) => withToolBundle(bundle, storePath, ensureStore, async (activeBundle) => {
|
|
7319
|
+
if (!session_id) {
|
|
7320
|
+
const rows = listSessions(activeBundle, {
|
|
7321
|
+
sourceTool: source,
|
|
7322
|
+
sinceIso: since,
|
|
7323
|
+
untilIso: until,
|
|
7324
|
+
limit: limit2 ?? 50
|
|
7325
|
+
});
|
|
6461
7326
|
return {
|
|
6462
|
-
content: [{ type: "text", text:
|
|
7327
|
+
content: [{ type: "text", text: JSON.stringify(rows, null, 2) }]
|
|
7328
|
+
};
|
|
7329
|
+
}
|
|
7330
|
+
if (format === "markdown") {
|
|
7331
|
+
try {
|
|
7332
|
+
const md = await exportSessionMarkdown(activeBundle, session_id);
|
|
7333
|
+
return { content: [{ type: "text", text: md }] };
|
|
7334
|
+
} catch (error) {
|
|
7335
|
+
return {
|
|
7336
|
+
content: [{ type: "text", text: getErrorMessage(error) }],
|
|
7337
|
+
isError: true
|
|
7338
|
+
};
|
|
7339
|
+
}
|
|
7340
|
+
}
|
|
7341
|
+
const detail = getSession(activeBundle, session_id);
|
|
7342
|
+
if (!detail) {
|
|
7343
|
+
return {
|
|
7344
|
+
content: [{ type: "text", text: `session not found: ${session_id}` }],
|
|
6463
7345
|
isError: true
|
|
6464
7346
|
};
|
|
6465
7347
|
}
|
|
7348
|
+
const payload = format === "summary" ? { session: detail.session } : detail;
|
|
7349
|
+
return {
|
|
7350
|
+
content: [{ type: "text", text: JSON.stringify(payload, null, 2) }]
|
|
7351
|
+
};
|
|
6466
7352
|
})
|
|
6467
7353
|
);
|
|
6468
7354
|
server.registerTool(
|
|
6469
|
-
"
|
|
7355
|
+
"tool_calls",
|
|
6470
7356
|
{
|
|
6471
|
-
title: "
|
|
6472
|
-
description: "Audit commands and tool usage by
|
|
7357
|
+
title: "Audit tool calls and file touches",
|
|
7358
|
+
description: "Audit commands and tool usage. Filter by tool_name, canonical_type, session_id, errors_only, or path_substring. When `path_substring` is set, also surfaces matching artifacts so file-history questions return both invocations and produced files.",
|
|
6473
7359
|
inputSchema: {
|
|
7360
|
+
session_id: z.string().min(1).optional(),
|
|
6474
7361
|
tool_name: z.string().optional(),
|
|
6475
|
-
canonical_type: z.enum(
|
|
6476
|
-
|
|
6477
|
-
"read_file",
|
|
6478
|
-
"write_file",
|
|
6479
|
-
"edit_file",
|
|
6480
|
-
"search_file",
|
|
6481
|
-
"web_search",
|
|
6482
|
-
"mcp",
|
|
6483
|
-
"subagent",
|
|
6484
|
-
"patch",
|
|
6485
|
-
"other"
|
|
6486
|
-
]).optional(),
|
|
6487
|
-
session_id: z.string().optional(),
|
|
7362
|
+
canonical_type: z.enum(CANONICAL_TOOL_TYPES).optional(),
|
|
7363
|
+
path_substring: z.string().min(1).optional().describe("Filter rows where tool_calls.path or artifacts.path contains this substring."),
|
|
6488
7364
|
errors_only: z.boolean().optional().default(false),
|
|
7365
|
+
since: z.string().optional().describe("ISO timestamp lower bound (inclusive)"),
|
|
7366
|
+
until: z.string().optional().describe("ISO timestamp upper bound (exclusive)"),
|
|
6489
7367
|
limit: z.number().int().min(1).max(500).optional().default(100)
|
|
6490
7368
|
},
|
|
6491
7369
|
annotations: { readOnlyHint: true, idempotentHint: true }
|
|
6492
7370
|
},
|
|
6493
|
-
async (
|
|
6494
|
-
const
|
|
6495
|
-
|
|
6496
|
-
|
|
6497
|
-
|
|
6498
|
-
|
|
6499
|
-
|
|
6500
|
-
|
|
6501
|
-
|
|
6502
|
-
|
|
6503
|
-
}
|
|
6504
|
-
if (session_id) {
|
|
6505
|
-
conds.push("tc.session_id = ?");
|
|
6506
|
-
params.push(session_id);
|
|
6507
|
-
}
|
|
6508
|
-
if (errors_only) {
|
|
6509
|
-
conds.push("(tr.is_error = 1 OR tc.status = ?)");
|
|
6510
|
-
params.push("error");
|
|
6511
|
-
}
|
|
6512
|
-
const where = conds.length ? `WHERE ${conds.join(" AND ")}` : "";
|
|
6513
|
-
const sql = `
|
|
6514
|
-
SELECT tc.tool_call_id, tc.session_id, tc.tool_name, tc.canonical_tool_type,
|
|
6515
|
-
tc.command, tc.path, tc.status, tc.timestamp_start,
|
|
6516
|
-
tr.is_error, tr.exit_code, tr.preview
|
|
6517
|
-
FROM tool_calls tc
|
|
6518
|
-
LEFT JOIN tool_results tr ON tr.tool_call_id = tc.tool_call_id
|
|
6519
|
-
${where}
|
|
6520
|
-
ORDER BY tc.timestamp_start DESC
|
|
6521
|
-
LIMIT ${clampLimit(limit, { max: 500, fallback: 100 })}
|
|
6522
|
-
`;
|
|
6523
|
-
const rows = activeBundle.db.prepare(sql).all(...params);
|
|
7371
|
+
async (input) => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
|
|
7372
|
+
const rows = listToolCalls(activeBundle, {
|
|
7373
|
+
sessionId: input.session_id,
|
|
7374
|
+
toolName: input.tool_name,
|
|
7375
|
+
canonicalType: input.canonical_type,
|
|
7376
|
+
pathSubstring: input.path_substring,
|
|
7377
|
+
errorsOnly: input.errors_only,
|
|
7378
|
+
sinceIso: input.since,
|
|
7379
|
+
untilIso: input.until,
|
|
7380
|
+
limit: input.limit ?? 100
|
|
7381
|
+
});
|
|
6524
7382
|
return {
|
|
6525
7383
|
content: [{ type: "text", text: JSON.stringify(rows, null, 2) }]
|
|
6526
7384
|
};
|
|
6527
7385
|
})
|
|
6528
7386
|
);
|
|
6529
7387
|
server.registerTool(
|
|
6530
|
-
"
|
|
7388
|
+
"analytics",
|
|
6531
7389
|
{
|
|
6532
|
-
title: "
|
|
6533
|
-
description: "
|
|
7390
|
+
title: "Aggregate analytics reports",
|
|
7391
|
+
description: "Run a built-in aggregation over the bundle: per-session metrics (`sessions`), tool usage rollup (`tools`), error timeline (`errors`), model usage (`models`), or project activity (`projects`). Backed by SQLite views; mirrors the `prosa analytics` CLI.",
|
|
6534
7392
|
inputSchema: {
|
|
6535
|
-
|
|
6536
|
-
|
|
7393
|
+
report: z.enum(ANALYTICS_REPORTS),
|
|
7394
|
+
source: z.enum(SOURCE_TOOLS).optional(),
|
|
7395
|
+
since: z.string().optional().describe("ISO timestamp lower bound (inclusive)"),
|
|
7396
|
+
until: z.string().optional().describe("ISO timestamp upper bound (exclusive)"),
|
|
7397
|
+
limit: z.number().int().min(1).max(500).optional().default(50),
|
|
7398
|
+
session_id: z.string().min(1).optional().describe("Drill-down filter (applies to `sessions` report)."),
|
|
7399
|
+
source_path_substring: z.string().min(1).optional().describe("Filter `sessions` rows by imported source file path substring."),
|
|
7400
|
+
project: z.string().min(1).optional().describe("Filter by project id, name, or path substring."),
|
|
7401
|
+
tool_name: z.string().min(1).optional().describe("Filter `tools`/`errors` rows by exact tool name."),
|
|
7402
|
+
canonical_type: z.enum(CANONICAL_TOOL_TYPES).optional().describe("Filter `tools` rows by canonical tool type."),
|
|
7403
|
+
errors_only: z.boolean().optional().describe("`tools` report: only error rows."),
|
|
7404
|
+
category: z.string().min(1).optional().describe("Filter `errors` by category: tool_result|import_error|uncertainty."),
|
|
7405
|
+
model: z.string().min(1).optional().describe("Filter `models` rows by exact model name.")
|
|
6537
7406
|
},
|
|
6538
7407
|
annotations: { readOnlyHint: true, idempotentHint: true }
|
|
6539
7408
|
},
|
|
6540
|
-
async (
|
|
6541
|
-
const
|
|
6542
|
-
|
|
6543
|
-
|
|
6544
|
-
|
|
6545
|
-
|
|
6546
|
-
|
|
6547
|
-
|
|
6548
|
-
|
|
6549
|
-
|
|
6550
|
-
|
|
6551
|
-
|
|
6552
|
-
|
|
6553
|
-
|
|
6554
|
-
const like = `%${path_substring}%`;
|
|
6555
|
-
const rows = activeBundle.db.prepare(sql).all(like, like);
|
|
6556
|
-
return {
|
|
6557
|
-
content: [{ type: "text", text: JSON.stringify(rows, null, 2) }]
|
|
7409
|
+
async (input) => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
|
|
7410
|
+
const filters = {
|
|
7411
|
+
source: input.source,
|
|
7412
|
+
since: input.since,
|
|
7413
|
+
until: input.until,
|
|
7414
|
+
limit: input.limit,
|
|
7415
|
+
sessionId: input.session_id,
|
|
7416
|
+
sourcePathSubstring: input.source_path_substring,
|
|
7417
|
+
project: input.project,
|
|
7418
|
+
toolName: input.tool_name,
|
|
7419
|
+
canonicalType: input.canonical_type,
|
|
7420
|
+
errorsOnly: input.errors_only,
|
|
7421
|
+
category: input.category,
|
|
7422
|
+
model: input.model
|
|
6558
7423
|
};
|
|
7424
|
+
try {
|
|
7425
|
+
const result = runAnalyticsReportFromBundle({
|
|
7426
|
+
bundle: activeBundle,
|
|
7427
|
+
report: input.report,
|
|
7428
|
+
filters
|
|
7429
|
+
});
|
|
7430
|
+
return {
|
|
7431
|
+
content: [
|
|
7432
|
+
{
|
|
7433
|
+
type: "text",
|
|
7434
|
+
text: JSON.stringify(
|
|
7435
|
+
{ report: input.report, count: result.rows.length, rows: result.rows },
|
|
7436
|
+
null,
|
|
7437
|
+
2
|
|
7438
|
+
)
|
|
7439
|
+
}
|
|
7440
|
+
]
|
|
7441
|
+
};
|
|
7442
|
+
} catch (error) {
|
|
7443
|
+
return {
|
|
7444
|
+
content: [{ type: "text", text: getErrorMessage(error) }],
|
|
7445
|
+
isError: true
|
|
7446
|
+
};
|
|
7447
|
+
}
|
|
6559
7448
|
})
|
|
6560
7449
|
);
|
|
6561
7450
|
server.registerTool(
|
|
6562
|
-
"
|
|
7451
|
+
"artifact",
|
|
6563
7452
|
{
|
|
6564
7453
|
title: "Get artifact bytes/text",
|
|
6565
|
-
description: "Retrieve full text for an artifact_id
|
|
7454
|
+
description: "Retrieve full text for an `artifact_id` referenced in a session, search hit, or tool_calls row. Use this when previews are not enough; binary artifacts return a placeholder.",
|
|
6566
7455
|
inputSchema: {
|
|
6567
7456
|
artifact_id: z.string().min(1)
|
|
6568
7457
|
},
|
|
@@ -6581,8 +7470,7 @@ function registerProsaTools(server, bundle, options = {}) {
|
|
|
6581
7470
|
return { content: [{ type: "text", text: "[no content stored]" }] };
|
|
6582
7471
|
}
|
|
6583
7472
|
try {
|
|
6584
|
-
const
|
|
6585
|
-
const text = await getText2(activeBundle, objectId);
|
|
7473
|
+
const text = await getText(activeBundle, objectId);
|
|
6586
7474
|
return { content: [{ type: "text", text }] };
|
|
6587
7475
|
} catch {
|
|
6588
7476
|
return { content: [{ type: "text", text: `[binary artifact: ${objectId}]` }] };
|
|
@@ -6590,18 +7478,89 @@ function registerProsaTools(server, bundle, options = {}) {
|
|
|
6590
7478
|
})
|
|
6591
7479
|
);
|
|
6592
7480
|
server.registerTool(
|
|
6593
|
-
"
|
|
7481
|
+
"compile",
|
|
6594
7482
|
{
|
|
6595
|
-
title: "
|
|
6596
|
-
description: "
|
|
6597
|
-
inputSchema: {
|
|
6598
|
-
|
|
7483
|
+
title: "Compile sessions or report bundle status",
|
|
7484
|
+
description: "Without input, returns a status snapshot (search index health, last batch, schema version) without mutating anything. With `source`, imports that provider; `sessions_path` may override its default. Pass `overwrite: true` to force a full rebuild of derived indexes (Tantivy from scratch). With neither `source` nor `sessions_path`, only status is returned.",
|
|
7485
|
+
inputSchema: {
|
|
7486
|
+
source: z.enum(SOURCE_TOOLS).optional(),
|
|
7487
|
+
sessions_path: z.string().min(1).optional(),
|
|
7488
|
+
overwrite: z.boolean().optional()
|
|
7489
|
+
},
|
|
7490
|
+
annotations: { readOnlyHint: false, destructiveHint: false, idempotentHint: true }
|
|
6599
7491
|
},
|
|
6600
|
-
async () => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
|
|
6601
|
-
|
|
6602
|
-
|
|
6603
|
-
|
|
6604
|
-
|
|
7492
|
+
async ({ source, sessions_path, overwrite }) => withToolBundle(bundle, storePath, ensureStore, async (activeBundle) => {
|
|
7493
|
+
if (sessions_path && !source) {
|
|
7494
|
+
return {
|
|
7495
|
+
content: [
|
|
7496
|
+
{
|
|
7497
|
+
type: "text",
|
|
7498
|
+
text: "sessions_path requires source because providers use incompatible source layouts"
|
|
7499
|
+
}
|
|
7500
|
+
],
|
|
7501
|
+
isError: true
|
|
7502
|
+
};
|
|
7503
|
+
}
|
|
7504
|
+
if (!source && !sessions_path) {
|
|
7505
|
+
return {
|
|
7506
|
+
content: [
|
|
7507
|
+
{
|
|
7508
|
+
type: "text",
|
|
7509
|
+
text: JSON.stringify(
|
|
7510
|
+
{ mode: "status", search_index: getSearchIndexStatuses(activeBundle) },
|
|
7511
|
+
null,
|
|
7512
|
+
2
|
|
7513
|
+
)
|
|
7514
|
+
}
|
|
7515
|
+
]
|
|
7516
|
+
};
|
|
7517
|
+
}
|
|
7518
|
+
try {
|
|
7519
|
+
const result = await runCompileImports({
|
|
7520
|
+
bundle: activeBundle,
|
|
7521
|
+
providers: source ? [getCompileProvider(source)] : COMPILE_PROVIDERS,
|
|
7522
|
+
sessionsPath: sessions_path,
|
|
7523
|
+
overwrite
|
|
7524
|
+
});
|
|
7525
|
+
const parquet = result.importedAny ? await exportCompileParquet({ storePath }) : null;
|
|
7526
|
+
return {
|
|
7527
|
+
content: [
|
|
7528
|
+
{
|
|
7529
|
+
type: "text",
|
|
7530
|
+
text: JSON.stringify(
|
|
7531
|
+
{
|
|
7532
|
+
mode: "import",
|
|
7533
|
+
providers: result.providers.map((provider) => ({
|
|
7534
|
+
source: provider.source,
|
|
7535
|
+
source_path: provider.sourcePath,
|
|
7536
|
+
batch_id: provider.batchId,
|
|
7537
|
+
counts: provider.counts
|
|
7538
|
+
})),
|
|
7539
|
+
imported_any: result.importedAny,
|
|
7540
|
+
tantivy: result.tantivy ? { indexed_doc_count: result.tantivy.indexedDocCount } : null,
|
|
7541
|
+
tantivy_error: result.tantivyError,
|
|
7542
|
+
fts5_error: result.fts5Error,
|
|
7543
|
+
parquet: parquet ? {
|
|
7544
|
+
out_dir: parquet.outDir,
|
|
7545
|
+
manifest_path: parquet.manifestPath,
|
|
7546
|
+
table_count: parquet.tableCount,
|
|
7547
|
+
files: parquet.files,
|
|
7548
|
+
counts: parquet.counts
|
|
7549
|
+
} : null,
|
|
7550
|
+
search_index: getSearchIndexStatuses(activeBundle)
|
|
7551
|
+
},
|
|
7552
|
+
null,
|
|
7553
|
+
2
|
|
7554
|
+
)
|
|
7555
|
+
}
|
|
7556
|
+
]
|
|
7557
|
+
};
|
|
7558
|
+
} catch (error) {
|
|
7559
|
+
return {
|
|
7560
|
+
content: [{ type: "text", text: getErrorMessage(error) }],
|
|
7561
|
+
isError: true
|
|
7562
|
+
};
|
|
7563
|
+
}
|
|
6605
7564
|
})
|
|
6606
7565
|
);
|
|
6607
7566
|
}
|
|
@@ -6648,14 +7607,14 @@ function registerProsaPrompts(server) {
|
|
|
6648
7607
|
path: z.string().min(1).describe("File path, directory, or distinctive path suffix")
|
|
6649
7608
|
}
|
|
6650
7609
|
},
|
|
6651
|
-
({ path:
|
|
7610
|
+
({ path: path21 }) => ({
|
|
6652
7611
|
description: "Find sessions that touched a path and summarize the evidence.",
|
|
6653
7612
|
messages: [
|
|
6654
7613
|
{
|
|
6655
7614
|
role: "user",
|
|
6656
7615
|
content: {
|
|
6657
7616
|
type: "text",
|
|
6658
|
-
text: FIND_FILE_HISTORY_PROMPT.replace("{{path}}",
|
|
7617
|
+
text: FIND_FILE_HISTORY_PROMPT.replace("{{path}}", path21)
|
|
6659
7618
|
}
|
|
6660
7619
|
}
|
|
6661
7620
|
]
|
|
@@ -6825,26 +7784,11 @@ function writeError(res, error) {
|
|
|
6825
7784
|
);
|
|
6826
7785
|
}
|
|
6827
7786
|
|
|
6828
|
-
// src/cli/parsers.ts
|
|
6829
|
-
function parseSearchEngine(value) {
|
|
6830
|
-
if (value === "fts5" || value === "tantivy") return value;
|
|
6831
|
-
throw new Error(`invalid search engine: ${value} (expected fts5 or tantivy)`);
|
|
6832
|
-
}
|
|
6833
|
-
function parseMcpTransport(value) {
|
|
6834
|
-
if (value === "stdio" || value === "http") return value;
|
|
6835
|
-
throw new Error(`invalid transport: ${value} (expected stdio or http)`);
|
|
6836
|
-
}
|
|
6837
|
-
function parseSourceTool(value) {
|
|
6838
|
-
if (value === void 0) return void 0;
|
|
6839
|
-
if (SOURCE_TOOLS.includes(value)) return value;
|
|
6840
|
-
throw new Error(`invalid source tool: ${value} (expected one of ${SOURCE_TOOLS.join(", ")})`);
|
|
6841
|
-
}
|
|
6842
|
-
|
|
6843
7787
|
// src/cli/commands/mcp.ts
|
|
6844
7788
|
function mcpCommand() {
|
|
6845
|
-
const serve = new
|
|
7789
|
+
const serve = new Command6("serve").description("Start a local MCP server over the prosa bundle.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--transport <transport>", "MCP transport: stdio|http", "stdio").option("--host <host>", "bind host", "127.0.0.1").option("--port <port>", "bind port", "7331").option("--path <path>", "HTTP path", "/mcp").option("--search-engine <engine>", "search engine: fts5|tantivy", "fts5").action(
|
|
6846
7790
|
async (options) => {
|
|
6847
|
-
const storePath =
|
|
7791
|
+
const storePath = path19.resolve(options.store);
|
|
6848
7792
|
const bundle = await openOrInitBundle(storePath);
|
|
6849
7793
|
try {
|
|
6850
7794
|
const transport = parseMcpTransport(options.transport);
|
|
@@ -6875,7 +7819,7 @@ function mcpCommand() {
|
|
|
6875
7819
|
}
|
|
6876
7820
|
}
|
|
6877
7821
|
);
|
|
6878
|
-
return new
|
|
7822
|
+
return new Command6("mcp").description("MCP server commands.").addCommand(serve);
|
|
6879
7823
|
}
|
|
6880
7824
|
function registerShutdown(closeServer, bundle) {
|
|
6881
7825
|
const shutdown = async () => {
|
|
@@ -6892,13 +7836,13 @@ function registerShutdown(closeServer, bundle) {
|
|
|
6892
7836
|
}
|
|
6893
7837
|
|
|
6894
7838
|
// src/cli/commands/query.ts
|
|
6895
|
-
import
|
|
6896
|
-
import { Command as
|
|
7839
|
+
import path20 from "path";
|
|
7840
|
+
import { Command as Command7 } from "commander";
|
|
6897
7841
|
function queryCommand() {
|
|
6898
|
-
const duckdb = new
|
|
7842
|
+
const duckdb = new Command7("duckdb").description("Run a DuckDB SQL query over exported Parquet tables.").argument("<sql>", "DuckDB SQL query").option("--store <path>", "bundle directory", defaultBundlePath()).option("--parquet-dir <path>", "Parquet directory (default: <store>/parquet)").option("--output-format <fmt>", "interactive|table|json|csv", "table").action(
|
|
6899
7843
|
async (sql, options) => {
|
|
6900
7844
|
const format = parseOutputFormat(options.outputFormat, "table");
|
|
6901
|
-
const parquetDir = options.parquetDir ?
|
|
7845
|
+
const parquetDir = options.parquetDir ? path20.resolve(options.parquetDir) : await withBundle(options.store, (bundle) => bundle.paths.parquet);
|
|
6902
7846
|
const result = await queryDuckDbParquet({ parquetDir, sql });
|
|
6903
7847
|
printRows(result.rows, {
|
|
6904
7848
|
format,
|
|
@@ -6907,14 +7851,14 @@ function queryCommand() {
|
|
|
6907
7851
|
});
|
|
6908
7852
|
}
|
|
6909
7853
|
);
|
|
6910
|
-
return new
|
|
7854
|
+
return new Command7("query").description("Run derived analytical queries.").addCommand(duckdb);
|
|
6911
7855
|
}
|
|
6912
7856
|
|
|
6913
7857
|
// src/cli/commands/search.ts
|
|
6914
|
-
import { Command as
|
|
7858
|
+
import { Command as Command8 } from "commander";
|
|
6915
7859
|
init_search();
|
|
6916
7860
|
function searchCommand() {
|
|
6917
|
-
return new
|
|
7861
|
+
return new Command8("search").description("Full-text search across messages, tool calls and tool outputs.").argument("<query>", "FTS5 query string (supports MATCH syntax)").option("--store <path>", "bundle directory", defaultBundlePath()).option("--limit <n>", "maximum hits", "50").option("--engine <engine>", "search engine: fts5|tantivy", "fts5").option("--output-format <fmt>", "interactive|table|json|csv", "table").action(
|
|
6918
7862
|
async (query, options) => {
|
|
6919
7863
|
const engine = parseSearchEngine(options.engine);
|
|
6920
7864
|
const format = parseOutputFormat(options.outputFormat, "table");
|
|
@@ -6935,10 +7879,10 @@ function searchCommand() {
|
|
|
6935
7879
|
}
|
|
6936
7880
|
|
|
6937
7881
|
// src/cli/commands/sessions.ts
|
|
6938
|
-
import { Command as
|
|
7882
|
+
import { Command as Command9 } from "commander";
|
|
6939
7883
|
init_sessions();
|
|
6940
7884
|
function sessionsCommand() {
|
|
6941
|
-
const command = new
|
|
7885
|
+
const command = new Command9("sessions").description("List sessions in the bundle, with filters.").enablePositionalOptions().option("--store <path>", "bundle directory", defaultBundlePath()).option("--source <tool>", "filter by source tool: cursor|codex|claude|gemini").option("--since <iso>", "sessions starting on/after this ISO timestamp").option("--until <iso>", "sessions starting before this ISO timestamp").option("--limit <n>", "maximum rows", "50").option("--output-format <fmt>", "interactive|table|json|csv", "table").action(
|
|
6942
7886
|
async (options) => {
|
|
6943
7887
|
const format = parseOutputFormat(options.outputFormat, "table");
|
|
6944
7888
|
await withBundle(options.store, (bundle) => {
|
|
@@ -6965,7 +7909,7 @@ function sessionsCommand() {
|
|
|
6965
7909
|
}
|
|
6966
7910
|
);
|
|
6967
7911
|
command.addCommand(
|
|
6968
|
-
new
|
|
7912
|
+
new Command9("count").description("Count sessions in the bundle, with filters.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--source <tool>", "filter by source tool: cursor|codex|claude|gemini").option("--since <iso>", "sessions starting on/after this ISO timestamp").option("--until <iso>", "sessions starting before this ISO timestamp").action(
|
|
6969
7913
|
async (options) => {
|
|
6970
7914
|
await withBundle(options.store, (bundle) => {
|
|
6971
7915
|
const count = countSessions(bundle, {
|
|
@@ -6983,9 +7927,9 @@ function sessionsCommand() {
|
|
|
6983
7927
|
}
|
|
6984
7928
|
|
|
6985
7929
|
// src/cli/commands/tui.ts
|
|
6986
|
-
import { Command as
|
|
7930
|
+
import { Command as Command10 } from "commander";
|
|
6987
7931
|
function tuiCommand() {
|
|
6988
|
-
return new
|
|
7932
|
+
return new Command10("tui").description("Open the interactive Ink-based explorer.").option("--store <path>", "bundle directory", defaultBundlePath()).action(async (options) => {
|
|
6989
7933
|
const [{ render }, React, { App: App2 }] = await Promise.all([
|
|
6990
7934
|
import("ink"),
|
|
6991
7935
|
import("react"),
|
|
@@ -7000,8 +7944,14 @@ function tuiCommand() {
|
|
|
7000
7944
|
}
|
|
7001
7945
|
|
|
7002
7946
|
// src/cli/main.ts
|
|
7947
|
+
function stripLeadingDoubleDash(argv) {
|
|
7948
|
+
if (argv.length >= 3 && argv[2] === "--") {
|
|
7949
|
+
return [argv[0], argv[1], ...argv.slice(3)];
|
|
7950
|
+
}
|
|
7951
|
+
return [...argv];
|
|
7952
|
+
}
|
|
7003
7953
|
async function runCli(argv) {
|
|
7004
|
-
const program = new
|
|
7954
|
+
const program = new Command11().name("prosa").enablePositionalOptions().description(
|
|
7005
7955
|
"Compile, search and export local agent session histories\n(Cursor, Codex CLI, Claude Code, Gemini CLI) into one canonical store."
|
|
7006
7956
|
).version(PROSA_PARSER_VERSION, "-v, --version");
|
|
7007
7957
|
program.addCommand(initCommand());
|
|
@@ -7012,9 +7962,10 @@ async function runCli(argv) {
|
|
|
7012
7962
|
program.addCommand(searchCommand());
|
|
7013
7963
|
program.addCommand(exportCommand());
|
|
7014
7964
|
program.addCommand(queryCommand());
|
|
7965
|
+
program.addCommand(analyticsCommand());
|
|
7015
7966
|
program.addCommand(mcpCommand());
|
|
7016
7967
|
program.addCommand(tuiCommand());
|
|
7017
|
-
await program.parseAsync(
|
|
7968
|
+
await program.parseAsync(stripLeadingDoubleDash(argv));
|
|
7018
7969
|
}
|
|
7019
7970
|
var isEntry = import.meta.url === `file://${process.argv[1]}`;
|
|
7020
7971
|
if (isEntry) {
|