@c3-oss/prosa 0.3.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +68 -20
- package/dist/bin/prosa.js +2046 -1066
- package/dist/bin/prosa.js.map +1 -1
- package/dist/cli/main.js +2046 -1066
- package/dist/cli/main.js.map +1 -1
- package/dist/index.d.ts +93 -12
- package/dist/index.js +1001 -97
- package/dist/index.js.map +1 -1
- package/package.json +2 -1
package/dist/bin/prosa.js
CHANGED
|
@@ -12,8 +12,8 @@ var __export = (target, all) => {
|
|
|
12
12
|
|
|
13
13
|
// src/core/db.ts
|
|
14
14
|
import Database from "better-sqlite3";
|
|
15
|
-
function openDb(
|
|
16
|
-
const db = new Database(
|
|
15
|
+
function openDb(path21) {
|
|
16
|
+
const db = new Database(path21);
|
|
17
17
|
db.pragma("journal_mode = WAL");
|
|
18
18
|
db.pragma("foreign_keys = ON");
|
|
19
19
|
db.pragma("synchronous = NORMAL");
|
|
@@ -48,283 +48,30 @@ var init_db = __esm({
|
|
|
48
48
|
}
|
|
49
49
|
});
|
|
50
50
|
|
|
51
|
-
// src/core/
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
"src/core/errors.ts"() {
|
|
55
|
-
"use strict";
|
|
56
|
-
getErrorMessage = (err) => err instanceof Error ? err.message : String(err);
|
|
57
|
-
}
|
|
58
|
-
});
|
|
59
|
-
|
|
60
|
-
// src/core/cas/compress.ts
|
|
61
|
-
import { compress as zstdCompress, decompress as zstdDecompress } from "zstd-napi";
|
|
62
|
-
function compressBytes(input) {
|
|
63
|
-
if (input.byteLength < COMPRESS_THRESHOLD_BYTES) {
|
|
64
|
-
return { bytes: Buffer.from(input), compression: "none" };
|
|
65
|
-
}
|
|
66
|
-
const out = zstdCompress(Buffer.from(input), { compressionLevel: ZSTD_LEVEL });
|
|
67
|
-
return { bytes: out, compression: "zstd" };
|
|
68
|
-
}
|
|
69
|
-
function decompressBytes(input, compression) {
|
|
70
|
-
if (compression === "none") return input;
|
|
71
|
-
return zstdDecompress(input);
|
|
72
|
-
}
|
|
73
|
-
var COMPRESS_THRESHOLD_BYTES, ZSTD_LEVEL;
|
|
74
|
-
var init_compress = __esm({
|
|
75
|
-
"src/core/cas/compress.ts"() {
|
|
76
|
-
"use strict";
|
|
77
|
-
COMPRESS_THRESHOLD_BYTES = 256;
|
|
78
|
-
ZSTD_LEVEL = 3;
|
|
79
|
-
}
|
|
80
|
-
});
|
|
81
|
-
|
|
82
|
-
// src/core/cas/hash.ts
|
|
83
|
-
import { createHash } from "crypto";
|
|
84
|
-
import { blake3 } from "@noble/hashes/blake3";
|
|
85
|
-
import { bytesToHex } from "@noble/hashes/utils";
|
|
86
|
-
function blake3Hex(bytes) {
|
|
87
|
-
return bytesToHex(blake3(bytes));
|
|
88
|
-
}
|
|
89
|
-
function sha256Hex(bytes) {
|
|
90
|
-
return createHash("sha256").update(bytes).digest("hex");
|
|
91
|
-
}
|
|
92
|
-
function objectIdFromHash(hashHex) {
|
|
93
|
-
return `blake3:${hashHex}`;
|
|
94
|
-
}
|
|
95
|
-
function objectStoragePath(hashHex, compression) {
|
|
96
|
-
const ext = compression === "zstd" ? ".zst" : ".bin";
|
|
97
|
-
const a = hashHex.slice(0, 2);
|
|
98
|
-
const b = hashHex.slice(2, 4);
|
|
99
|
-
return `objects/blake3/${a}/${b}/${hashHex}${ext}`;
|
|
51
|
+
// src/core/limits.ts
|
|
52
|
+
function clampLimit(value, opts) {
|
|
53
|
+
return Math.max(opts.min ?? 1, Math.min(opts.max, value ?? opts.fallback));
|
|
100
54
|
}
|
|
101
|
-
var
|
|
102
|
-
"src/core/
|
|
55
|
+
var init_limits = __esm({
|
|
56
|
+
"src/core/limits.ts"() {
|
|
103
57
|
"use strict";
|
|
104
58
|
}
|
|
105
59
|
});
|
|
106
60
|
|
|
107
|
-
// src/core/
|
|
108
|
-
var
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
ensureDir: () => ensureDir,
|
|
112
|
-
flushPendingObjects: () => flushPendingObjects,
|
|
113
|
-
getBytes: () => getBytes,
|
|
114
|
-
getJson: () => getJson,
|
|
115
|
-
getObjectMeta: () => getObjectMeta,
|
|
116
|
-
getText: () => getText,
|
|
117
|
-
putBytes: () => putBytes,
|
|
118
|
-
putJson: () => putJson,
|
|
119
|
-
putText: () => putText,
|
|
120
|
-
stageBytes: () => stageBytes,
|
|
121
|
-
stageJson: () => stageJson,
|
|
122
|
-
stageText: () => stageText
|
|
123
|
-
});
|
|
124
|
-
import { mkdir as mkdir2, readFile as readFile2, writeFile as writeFile2 } from "fs/promises";
|
|
125
|
-
import path2 from "path";
|
|
126
|
-
async function ensureDir(absoluteDir) {
|
|
127
|
-
if (ensuredDirs.has(absoluteDir)) return;
|
|
128
|
-
await mkdir2(absoluteDir, { recursive: true });
|
|
129
|
-
ensuredDirs.add(absoluteDir);
|
|
130
|
-
}
|
|
131
|
-
async function putBytes(bundle, bytes, options = {}) {
|
|
132
|
-
const hash = blake3Hex(bytes);
|
|
133
|
-
const objectId = objectIdFromHash(hash);
|
|
134
|
-
const existing = prepare(
|
|
135
|
-
bundle.db,
|
|
136
|
-
`SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
|
|
137
|
-
compression, mime_type, encoding, storage_path, created_at
|
|
138
|
-
FROM objects WHERE object_id = ?`
|
|
139
|
-
).get(objectId);
|
|
140
|
-
if (existing) return objectId;
|
|
141
|
-
const { bytes: stored, compression } = compressBytes(bytes);
|
|
142
|
-
const storagePath = objectStoragePath(hash, compression);
|
|
143
|
-
const absolutePath = path2.join(bundle.path, storagePath);
|
|
144
|
-
await ensureDir(path2.dirname(absolutePath));
|
|
145
|
-
await writeFile2(absolutePath, stored);
|
|
146
|
-
prepare(
|
|
147
|
-
bundle.db,
|
|
148
|
-
`INSERT INTO objects (
|
|
149
|
-
object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
|
|
150
|
-
compression, mime_type, encoding, storage_path, created_at
|
|
151
|
-
) VALUES (?, 'blake3', ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
152
|
-
).run(
|
|
153
|
-
objectId,
|
|
154
|
-
hash,
|
|
155
|
-
bytes.byteLength,
|
|
156
|
-
compression === "zstd" ? stored.byteLength : null,
|
|
157
|
-
compression,
|
|
158
|
-
options.mimeType ?? null,
|
|
159
|
-
options.encoding ?? null,
|
|
160
|
-
storagePath,
|
|
161
|
-
(/* @__PURE__ */ new Date()).toISOString()
|
|
162
|
-
);
|
|
163
|
-
return objectId;
|
|
164
|
-
}
|
|
165
|
-
async function putText(bundle, text, options = {}) {
|
|
166
|
-
const buf = Buffer.from(text, "utf8");
|
|
167
|
-
return putBytes(bundle, buf, {
|
|
168
|
-
mimeType: options.mimeType ?? "text/plain; charset=utf-8",
|
|
169
|
-
encoding: "utf-8"
|
|
170
|
-
});
|
|
171
|
-
}
|
|
172
|
-
async function putJson(bundle, value) {
|
|
173
|
-
const text = JSON.stringify(value);
|
|
174
|
-
return putBytes(bundle, Buffer.from(text, "utf8"), {
|
|
175
|
-
mimeType: "application/json",
|
|
176
|
-
encoding: "utf-8"
|
|
177
|
-
});
|
|
178
|
-
}
|
|
179
|
-
async function getBytes(bundle, objectId) {
|
|
180
|
-
const meta = prepare(
|
|
181
|
-
bundle.db,
|
|
182
|
-
`SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
|
|
183
|
-
compression, mime_type, encoding, storage_path, created_at
|
|
184
|
-
FROM objects WHERE object_id = ?`
|
|
185
|
-
).get(objectId);
|
|
186
|
-
if (!meta) {
|
|
187
|
-
throw new Error(`object not found: ${objectId}`);
|
|
188
|
-
}
|
|
189
|
-
const buf = await readFile2(path2.join(bundle.path, meta.storage_path));
|
|
190
|
-
return decompressBytes(buf, meta.compression);
|
|
191
|
-
}
|
|
192
|
-
async function getText(bundle, objectId) {
|
|
193
|
-
const buf = await getBytes(bundle, objectId);
|
|
194
|
-
return buf.toString("utf8");
|
|
195
|
-
}
|
|
196
|
-
async function getJson(bundle, objectId) {
|
|
197
|
-
const text = await getText(bundle, objectId);
|
|
198
|
-
return JSON.parse(text);
|
|
199
|
-
}
|
|
200
|
-
function getObjectMeta(bundle, objectId) {
|
|
201
|
-
return prepare(
|
|
202
|
-
bundle.db,
|
|
203
|
-
`SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
|
|
204
|
-
compression, mime_type, encoding, storage_path, created_at
|
|
205
|
-
FROM objects WHERE object_id = ?`
|
|
206
|
-
).get(objectId) ?? null;
|
|
207
|
-
}
|
|
208
|
-
function createPendingObjects() {
|
|
209
|
-
return { byId: /* @__PURE__ */ new Map() };
|
|
210
|
-
}
|
|
211
|
-
function stageBytes(pending, bytes, options = {}) {
|
|
212
|
-
const buf = Buffer.isBuffer(bytes) ? bytes : Buffer.from(bytes);
|
|
213
|
-
const hash = blake3Hex(buf);
|
|
214
|
-
const objectId = objectIdFromHash(hash);
|
|
215
|
-
if (!pending.byId.has(objectId)) {
|
|
216
|
-
pending.byId.set(objectId, {
|
|
217
|
-
objectId,
|
|
218
|
-
hash,
|
|
219
|
-
bytes: buf,
|
|
220
|
-
mimeType: options.mimeType ?? null,
|
|
221
|
-
encoding: options.encoding ?? null
|
|
222
|
-
});
|
|
223
|
-
}
|
|
224
|
-
return objectId;
|
|
225
|
-
}
|
|
226
|
-
function stageText(pending, text, options = {}) {
|
|
227
|
-
return stageBytes(pending, Buffer.from(text, "utf8"), {
|
|
228
|
-
mimeType: options.mimeType ?? "text/plain; charset=utf-8",
|
|
229
|
-
encoding: "utf-8"
|
|
230
|
-
});
|
|
231
|
-
}
|
|
232
|
-
function stageJson(pending, value) {
|
|
233
|
-
return stageBytes(pending, Buffer.from(JSON.stringify(value), "utf8"), {
|
|
234
|
-
mimeType: "application/json",
|
|
235
|
-
encoding: "utf-8"
|
|
236
|
-
});
|
|
237
|
-
}
|
|
238
|
-
async function flushPendingObjects(bundle, pending) {
|
|
239
|
-
if (pending.byId.size === 0) return;
|
|
240
|
-
const ids = [...pending.byId.keys()];
|
|
241
|
-
const existingIds = queryExistingObjectIds(bundle, ids);
|
|
242
|
-
const toWrite = [];
|
|
243
|
-
for (const obj of pending.byId.values()) {
|
|
244
|
-
if (existingIds.has(obj.objectId)) continue;
|
|
245
|
-
const { bytes: compressedBytes, compression } = compressBytes(obj.bytes);
|
|
246
|
-
const storagePath = objectStoragePath(obj.hash, compression);
|
|
247
|
-
toWrite.push({
|
|
248
|
-
staged: obj,
|
|
249
|
-
compression,
|
|
250
|
-
compressedBytes,
|
|
251
|
-
storagePath,
|
|
252
|
-
absolutePath: path2.join(bundle.path, storagePath)
|
|
253
|
-
});
|
|
254
|
-
}
|
|
255
|
-
if (toWrite.length > 0) {
|
|
256
|
-
await writeFilesParallel(toWrite);
|
|
257
|
-
}
|
|
258
|
-
const insertObject = prepare(
|
|
259
|
-
bundle.db,
|
|
260
|
-
`INSERT OR IGNORE INTO objects (
|
|
261
|
-
object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
|
|
262
|
-
compression, mime_type, encoding, storage_path, created_at
|
|
263
|
-
) VALUES (?, 'blake3', ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
264
|
-
);
|
|
265
|
-
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
266
|
-
for (const p of toWrite) {
|
|
267
|
-
insertObject.run(
|
|
268
|
-
p.staged.objectId,
|
|
269
|
-
p.staged.hash,
|
|
270
|
-
p.staged.bytes.byteLength,
|
|
271
|
-
p.compression === "zstd" ? p.compressedBytes.byteLength : null,
|
|
272
|
-
p.compression,
|
|
273
|
-
p.staged.mimeType,
|
|
274
|
-
p.staged.encoding,
|
|
275
|
-
p.storagePath,
|
|
276
|
-
now
|
|
277
|
-
);
|
|
278
|
-
}
|
|
279
|
-
}
|
|
280
|
-
function queryExistingObjectIds(bundle, ids) {
|
|
281
|
-
const found = /* @__PURE__ */ new Set();
|
|
282
|
-
if (ids.length === 0) return found;
|
|
283
|
-
const CHUNK = 500;
|
|
284
|
-
for (let start = 0; start < ids.length; start += CHUNK) {
|
|
285
|
-
const slice = ids.slice(start, start + CHUNK);
|
|
286
|
-
const placeholders = slice.map(() => "?").join(",");
|
|
287
|
-
const rows = bundle.db.prepare(
|
|
288
|
-
`SELECT object_id FROM objects WHERE object_id IN (${placeholders})`
|
|
289
|
-
).all(...slice);
|
|
290
|
-
for (const row of rows) found.add(row.object_id);
|
|
291
|
-
}
|
|
292
|
-
return found;
|
|
293
|
-
}
|
|
294
|
-
async function writeFilesParallel(tasks) {
|
|
295
|
-
let cursor = 0;
|
|
296
|
-
const workers = [];
|
|
297
|
-
const limit = Math.min(FS_WRITE_CONCURRENCY, tasks.length);
|
|
298
|
-
for (let w = 0; w < limit; w++) {
|
|
299
|
-
workers.push(
|
|
300
|
-
(async () => {
|
|
301
|
-
while (true) {
|
|
302
|
-
const i = cursor++;
|
|
303
|
-
if (i >= tasks.length) return;
|
|
304
|
-
const task = tasks[i];
|
|
305
|
-
await ensureDir(path2.dirname(task.absolutePath));
|
|
306
|
-
await writeFile2(task.absolutePath, task.compressedBytes);
|
|
307
|
-
}
|
|
308
|
-
})()
|
|
309
|
-
);
|
|
310
|
-
}
|
|
311
|
-
await Promise.all(workers);
|
|
312
|
-
}
|
|
313
|
-
var ensuredDirs, FS_WRITE_CONCURRENCY;
|
|
314
|
-
var init_cas = __esm({
|
|
315
|
-
"src/core/cas/index.ts"() {
|
|
61
|
+
// src/core/errors.ts
|
|
62
|
+
var getErrorMessage;
|
|
63
|
+
var init_errors = __esm({
|
|
64
|
+
"src/core/errors.ts"() {
|
|
316
65
|
"use strict";
|
|
317
|
-
|
|
318
|
-
init_compress();
|
|
319
|
-
init_hash();
|
|
320
|
-
ensuredDirs = /* @__PURE__ */ new Set();
|
|
321
|
-
FS_WRITE_CONCURRENCY = 16;
|
|
66
|
+
getErrorMessage = (err) => err instanceof Error ? err.message : String(err);
|
|
322
67
|
}
|
|
323
68
|
});
|
|
324
69
|
|
|
325
70
|
// src/services/indexing.ts
|
|
71
|
+
import { createHash as createHash2 } from "crypto";
|
|
72
|
+
import { existsSync } from "fs";
|
|
326
73
|
import { mkdir as mkdir4, rm as rm2, writeFile as writeFile5 } from "fs/promises";
|
|
327
|
-
import
|
|
74
|
+
import path15 from "path";
|
|
328
75
|
function enableFts5Triggers(bundle) {
|
|
329
76
|
bundle.db.exec(FTS5_TRIGGER_SQL);
|
|
330
77
|
}
|
|
@@ -338,7 +85,7 @@ function disableFts5Triggers(bundle) {
|
|
|
338
85
|
function getSearchIndexStatuses(bundle) {
|
|
339
86
|
ensureSearchIndexStatusRows(bundle);
|
|
340
87
|
return bundle.db.prepare(
|
|
341
|
-
`SELECT
|
|
88
|
+
`SELECT ${SEARCH_INDEX_STATUS_COLUMNS}
|
|
342
89
|
FROM search_index_status
|
|
343
90
|
ORDER BY engine`
|
|
344
91
|
).all();
|
|
@@ -346,28 +93,13 @@ function getSearchIndexStatuses(bundle) {
|
|
|
346
93
|
function getSearchIndexStatus(bundle, engine) {
|
|
347
94
|
ensureSearchIndexStatusRows(bundle);
|
|
348
95
|
return bundle.db.prepare(
|
|
349
|
-
`SELECT
|
|
96
|
+
`SELECT ${SEARCH_INDEX_STATUS_COLUMNS}
|
|
350
97
|
FROM search_index_status
|
|
351
98
|
WHERE engine = ?`
|
|
352
99
|
).get(engine) ?? null;
|
|
353
100
|
}
|
|
354
101
|
function markIndexesAfterImport(bundle, options) {
|
|
355
102
|
if (!options.changed) return;
|
|
356
|
-
if (options.fts5Deferred) {
|
|
357
|
-
updateSearchIndexStatus(bundle, "fts5", {
|
|
358
|
-
status: "stale",
|
|
359
|
-
sourceDocCount: countSearchDocs(bundle),
|
|
360
|
-
indexedDocCount: countFts5Docs(bundle),
|
|
361
|
-
errorMessage: null
|
|
362
|
-
});
|
|
363
|
-
} else {
|
|
364
|
-
updateSearchIndexStatus(bundle, "fts5", {
|
|
365
|
-
status: "ready",
|
|
366
|
-
sourceDocCount: countSearchDocs(bundle),
|
|
367
|
-
indexedDocCount: countFts5Docs(bundle),
|
|
368
|
-
errorMessage: null
|
|
369
|
-
});
|
|
370
|
-
}
|
|
371
103
|
const tantivy = getSearchIndexStatus(bundle, "tantivy");
|
|
372
104
|
if (tantivy?.status === "ready" || tantivy?.status === "stale" || tantivy?.status === "failed") {
|
|
373
105
|
updateSearchIndexStatus(bundle, "tantivy", {
|
|
@@ -408,55 +140,93 @@ function rebuildFts5Index(bundle) {
|
|
|
408
140
|
}
|
|
409
141
|
return getSearchIndexStatus(bundle, "fts5");
|
|
410
142
|
}
|
|
411
|
-
|
|
143
|
+
function buildTantivySchema(tantivy) {
|
|
144
|
+
const builder = new tantivy.SchemaBuilder();
|
|
145
|
+
for (const field of TANTIVY_SCHEMA_FIELDS) {
|
|
146
|
+
if (field.tokenizer === "default") {
|
|
147
|
+
builder.addTextField(field.name, { stored: true });
|
|
148
|
+
} else {
|
|
149
|
+
builder.addTextField(field.name, { stored: true, tokenizerName: field.tokenizer });
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
return builder.build();
|
|
153
|
+
}
|
|
154
|
+
function computeSchemaFingerprint() {
|
|
155
|
+
const canonical = TANTIVY_SCHEMA_FIELDS.map((f) => `${f.name}:${f.tokenizer}:stored`).join("|");
|
|
156
|
+
return createHash2("sha256").update(canonical).digest("hex");
|
|
157
|
+
}
|
|
158
|
+
function tantivyIndexLooksValid(dir) {
|
|
159
|
+
return existsSync(path15.join(dir, "meta.json"));
|
|
160
|
+
}
|
|
161
|
+
function makeTantivyDoc(tantivy, row) {
|
|
162
|
+
const doc = new tantivy.Document();
|
|
163
|
+
doc.addText("doc_id", row.doc_id);
|
|
164
|
+
doc.addText("entity_type", row.entity_type);
|
|
165
|
+
doc.addText("entity_id", row.entity_id);
|
|
166
|
+
doc.addText("session_id", row.session_id ?? "");
|
|
167
|
+
doc.addText("project_id", row.project_id ?? "");
|
|
168
|
+
doc.addText("timestamp", row.timestamp ?? "");
|
|
169
|
+
doc.addText("role", row.role ?? "");
|
|
170
|
+
doc.addText("tool_name", row.tool_name ?? "");
|
|
171
|
+
doc.addText("canonical_tool_type", row.canonical_tool_type ?? "");
|
|
172
|
+
doc.addText("field_kind", row.field_kind);
|
|
173
|
+
doc.addText("text", row.text);
|
|
174
|
+
return doc;
|
|
175
|
+
}
|
|
176
|
+
async function rebuildTantivyIndex(bundle, options = {}) {
|
|
412
177
|
ensureSearchIndexStatusRows(bundle);
|
|
178
|
+
const sourceDocCount = countSearchDocs(bundle);
|
|
179
|
+
const prev = getSearchIndexStatus(bundle, "tantivy");
|
|
180
|
+
const fingerprint = computeSchemaFingerprint();
|
|
181
|
+
const indexDirValid = tantivyIndexLooksValid(bundle.paths.tantivy);
|
|
182
|
+
const fingerprintMatches = prev?.schema_fingerprint === fingerprint;
|
|
183
|
+
const lastIndexedRowid = typeof prev?.last_indexed_rowid === "number" ? prev.last_indexed_rowid : 0;
|
|
184
|
+
const wantFullRebuild = options.overwrite === true || !indexDirValid || !fingerprintMatches || lastIndexedRowid <= 0;
|
|
413
185
|
updateSearchIndexStatus(bundle, "tantivy", {
|
|
414
186
|
status: "building",
|
|
415
|
-
sourceDocCount
|
|
187
|
+
sourceDocCount,
|
|
416
188
|
indexedDocCount: 0,
|
|
417
189
|
errorMessage: null
|
|
418
190
|
});
|
|
419
191
|
try {
|
|
420
192
|
const tantivy = await import("@oxdev03/node-tantivy-binding");
|
|
421
|
-
const schema =
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
doc.addText("role", row.role ?? "");
|
|
442
|
-
doc.addText("tool_name", row.tool_name ?? "");
|
|
443
|
-
doc.addText("canonical_tool_type", row.canonical_tool_type ?? "");
|
|
444
|
-
doc.addText("field_kind", row.field_kind);
|
|
445
|
-
doc.addText("text", row.text);
|
|
446
|
-
writer.addDocument(doc);
|
|
447
|
-
indexedDocCount++;
|
|
193
|
+
const schema = buildTantivySchema(tantivy);
|
|
194
|
+
let index;
|
|
195
|
+
if (wantFullRebuild) {
|
|
196
|
+
await rm2(bundle.paths.tantivy, { recursive: true, force: true });
|
|
197
|
+
await mkdir4(bundle.paths.tantivy, { recursive: true });
|
|
198
|
+
index = new tantivy.Index(schema, bundle.paths.tantivy, false);
|
|
199
|
+
} else {
|
|
200
|
+
index = tantivy.Index.open(bundle.paths.tantivy);
|
|
201
|
+
}
|
|
202
|
+
const writer = index.writer(3e8, 4);
|
|
203
|
+
const select = wantFullRebuild ? `${SEARCH_DOCS_SELECT} ORDER BY rowid` : `${SEARCH_DOCS_SELECT} WHERE rowid > ${lastIndexedRowid} ORDER BY rowid`;
|
|
204
|
+
let addedDocCount = 0;
|
|
205
|
+
let maxRowid = wantFullRebuild ? 0 : lastIndexedRowid;
|
|
206
|
+
for (const row of bundle.db.prepare(select).iterate()) {
|
|
207
|
+
if (!wantFullRebuild) {
|
|
208
|
+
writer.deleteDocumentsByTerm("doc_id", row.doc_id);
|
|
209
|
+
}
|
|
210
|
+
writer.addDocument(makeTantivyDoc(tantivy, row));
|
|
211
|
+
addedDocCount++;
|
|
212
|
+
if (row.rowid > maxRowid) maxRowid = row.rowid;
|
|
448
213
|
}
|
|
449
214
|
writer.commit();
|
|
450
215
|
index.reload();
|
|
216
|
+
writer.waitMergingThreads();
|
|
217
|
+
const indexedDocCount = wantFullRebuild ? addedDocCount : countTantivyDocsBest(prev, addedDocCount);
|
|
451
218
|
await writeFile5(
|
|
452
|
-
|
|
219
|
+
path15.join(bundle.paths.tantivy, "prosa-index.json"),
|
|
453
220
|
`${JSON.stringify(
|
|
454
221
|
{
|
|
455
222
|
engine: "tantivy",
|
|
456
223
|
source: "search_docs",
|
|
457
224
|
built_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
458
|
-
|
|
459
|
-
|
|
225
|
+
mode: wantFullRebuild ? "full" : "incremental",
|
|
226
|
+
source_doc_count: sourceDocCount,
|
|
227
|
+
indexed_doc_count: indexedDocCount,
|
|
228
|
+
last_indexed_rowid: maxRowid,
|
|
229
|
+
schema_fingerprint: fingerprint
|
|
460
230
|
},
|
|
461
231
|
null,
|
|
462
232
|
2
|
|
@@ -466,14 +236,16 @@ async function rebuildTantivyIndex(bundle) {
|
|
|
466
236
|
);
|
|
467
237
|
updateSearchIndexStatus(bundle, "tantivy", {
|
|
468
238
|
status: "ready",
|
|
469
|
-
sourceDocCount
|
|
239
|
+
sourceDocCount,
|
|
470
240
|
indexedDocCount,
|
|
471
|
-
errorMessage: null
|
|
241
|
+
errorMessage: null,
|
|
242
|
+
lastIndexedRowid: maxRowid,
|
|
243
|
+
schemaFingerprint: fingerprint
|
|
472
244
|
});
|
|
473
245
|
} catch (error) {
|
|
474
246
|
updateSearchIndexStatus(bundle, "tantivy", {
|
|
475
247
|
status: "failed",
|
|
476
|
-
sourceDocCount
|
|
248
|
+
sourceDocCount,
|
|
477
249
|
indexedDocCount: 0,
|
|
478
250
|
errorMessage: getErrorMessage(error)
|
|
479
251
|
});
|
|
@@ -481,36 +253,53 @@ async function rebuildTantivyIndex(bundle) {
|
|
|
481
253
|
}
|
|
482
254
|
return getSearchIndexStatus(bundle, "tantivy");
|
|
483
255
|
}
|
|
256
|
+
function countTantivyDocsBest(prev, added) {
|
|
257
|
+
if (prev && typeof prev.indexed_doc_count === "number") {
|
|
258
|
+
return prev.indexed_doc_count + added;
|
|
259
|
+
}
|
|
260
|
+
return added;
|
|
261
|
+
}
|
|
484
262
|
function ensureSearchIndexStatusRows(bundle) {
|
|
485
263
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
486
264
|
const stmt = prepare(
|
|
487
265
|
bundle.db,
|
|
488
266
|
`INSERT OR IGNORE INTO search_index_status (
|
|
489
|
-
engine, status, source_doc_count, indexed_doc_count, updated_at,
|
|
490
|
-
|
|
267
|
+
engine, status, source_doc_count, indexed_doc_count, updated_at,
|
|
268
|
+
error_message, last_indexed_rowid, schema_fingerprint
|
|
269
|
+
) VALUES (?, ?, 0, 0, ?, NULL, NULL, NULL)`
|
|
491
270
|
);
|
|
492
271
|
stmt.run("fts5", "ready", now);
|
|
493
272
|
stmt.run("tantivy", "missing", now);
|
|
494
273
|
}
|
|
495
274
|
function updateSearchIndexStatus(bundle, engine, values) {
|
|
496
275
|
ensureSearchIndexStatusRows(bundle);
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
WHERE engine = ?`
|
|
506
|
-
).run(
|
|
276
|
+
const setClauses = [
|
|
277
|
+
"status = ?",
|
|
278
|
+
"source_doc_count = ?",
|
|
279
|
+
"indexed_doc_count = ?",
|
|
280
|
+
"updated_at = ?",
|
|
281
|
+
"error_message = ?"
|
|
282
|
+
];
|
|
283
|
+
const params = [
|
|
507
284
|
values.status,
|
|
508
285
|
values.sourceDocCount,
|
|
509
286
|
values.indexedDocCount,
|
|
510
287
|
(/* @__PURE__ */ new Date()).toISOString(),
|
|
511
|
-
values.errorMessage
|
|
512
|
-
|
|
513
|
-
)
|
|
288
|
+
values.errorMessage
|
|
289
|
+
];
|
|
290
|
+
if (values.lastIndexedRowid !== void 0) {
|
|
291
|
+
setClauses.push("last_indexed_rowid = ?");
|
|
292
|
+
params.push(values.lastIndexedRowid);
|
|
293
|
+
}
|
|
294
|
+
if (values.schemaFingerprint !== void 0) {
|
|
295
|
+
setClauses.push("schema_fingerprint = ?");
|
|
296
|
+
params.push(values.schemaFingerprint);
|
|
297
|
+
}
|
|
298
|
+
params.push(engine);
|
|
299
|
+
prepare(
|
|
300
|
+
bundle.db,
|
|
301
|
+
`UPDATE search_index_status SET ${setClauses.join(", ")} WHERE engine = ?`
|
|
302
|
+
).run(...params);
|
|
514
303
|
}
|
|
515
304
|
function countSearchDocs(bundle) {
|
|
516
305
|
return bundle.db.prepare(`SELECT count(*) AS n FROM search_docs`).get()?.n ?? 0;
|
|
@@ -518,12 +307,16 @@ function countSearchDocs(bundle) {
|
|
|
518
307
|
function countFts5Docs(bundle) {
|
|
519
308
|
return bundle.db.prepare(`SELECT count(*) AS n FROM search_docs_fts`).get()?.n ?? 0;
|
|
520
309
|
}
|
|
521
|
-
var FTS5_TRIGGER_SQL;
|
|
310
|
+
var SEARCH_INDEX_STATUS_COLUMNS, FTS5_TRIGGER_SQL, TANTIVY_SCHEMA_FIELDS, SEARCH_DOCS_SELECT;
|
|
522
311
|
var init_indexing = __esm({
|
|
523
312
|
"src/services/indexing.ts"() {
|
|
524
313
|
"use strict";
|
|
525
314
|
init_db();
|
|
526
315
|
init_errors();
|
|
316
|
+
SEARCH_INDEX_STATUS_COLUMNS = `
|
|
317
|
+
engine, status, source_doc_count, indexed_doc_count, updated_at,
|
|
318
|
+
error_message, last_indexed_rowid, schema_fingerprint
|
|
319
|
+
`;
|
|
527
320
|
FTS5_TRIGGER_SQL = `
|
|
528
321
|
CREATE TRIGGER IF NOT EXISTS search_docs_ai AFTER INSERT ON search_docs BEGIN
|
|
529
322
|
INSERT INTO search_docs_fts(rowid, text, role, tool_name, field_kind)
|
|
@@ -542,21 +335,30 @@ CREATE TRIGGER IF NOT EXISTS search_docs_au AFTER UPDATE ON search_docs BEGIN
|
|
|
542
335
|
VALUES (new.rowid, new.text, new.role, new.tool_name, new.field_kind);
|
|
543
336
|
END;
|
|
544
337
|
`;
|
|
545
|
-
|
|
546
|
-
}
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
}
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
338
|
+
TANTIVY_SCHEMA_FIELDS = [
|
|
339
|
+
{ name: "doc_id", tokenizer: "raw" },
|
|
340
|
+
{ name: "entity_type", tokenizer: "raw" },
|
|
341
|
+
{ name: "entity_id", tokenizer: "raw" },
|
|
342
|
+
{ name: "session_id", tokenizer: "raw" },
|
|
343
|
+
{ name: "project_id", tokenizer: "raw" },
|
|
344
|
+
{ name: "timestamp", tokenizer: "raw" },
|
|
345
|
+
{ name: "role", tokenizer: "raw" },
|
|
346
|
+
{ name: "tool_name", tokenizer: "raw" },
|
|
347
|
+
{ name: "canonical_tool_type", tokenizer: "raw" },
|
|
348
|
+
{ name: "field_kind", tokenizer: "raw" },
|
|
349
|
+
// The text field uses tantivy's default tokenizer (en_stem in the binding).
|
|
350
|
+
{ name: "text", tokenizer: "default" }
|
|
351
|
+
];
|
|
352
|
+
SEARCH_DOCS_SELECT = `
|
|
353
|
+
SELECT rowid, doc_id, entity_type, entity_id, session_id, project_id, timestamp,
|
|
354
|
+
role, tool_name, canonical_tool_type, field_kind, text
|
|
355
|
+
FROM search_docs
|
|
356
|
+
`;
|
|
555
357
|
}
|
|
556
358
|
});
|
|
557
359
|
|
|
558
360
|
// src/services/search.ts
|
|
559
|
-
import { existsSync } from "fs";
|
|
361
|
+
import { existsSync as existsSync2 } from "fs";
|
|
560
362
|
import { createRequire } from "module";
|
|
561
363
|
function escapeFtsQuery(q) {
|
|
562
364
|
return q.split(/\s+/).filter((t) => t.length > 0).map((t) => `"${t.replace(/"/g, '""')}"`).join(" ");
|
|
@@ -565,7 +367,7 @@ function searchFullText(bundle, options) {
|
|
|
565
367
|
if (options.engine === "tantivy") {
|
|
566
368
|
return searchTantivy(bundle, options);
|
|
567
369
|
}
|
|
568
|
-
const
|
|
370
|
+
const limit2 = clampLimit(options.limit, { max: 500, fallback: 50 });
|
|
569
371
|
const sql = `
|
|
570
372
|
SELECT d.doc_id,
|
|
571
373
|
d.entity_type,
|
|
@@ -580,14 +382,14 @@ function searchFullText(bundle, options) {
|
|
|
580
382
|
JOIN search_docs d ON d.rowid = search_docs_fts.rowid
|
|
581
383
|
WHERE search_docs_fts MATCH ?
|
|
582
384
|
ORDER BY bm25(search_docs_fts), d.timestamp DESC
|
|
583
|
-
LIMIT ${
|
|
385
|
+
LIMIT ${limit2}
|
|
584
386
|
`;
|
|
585
387
|
const ftsQuery = options.raw ? options.query : escapeFtsQuery(options.query);
|
|
586
388
|
if (!ftsQuery) return [];
|
|
587
389
|
return bundle.db.prepare(sql).all(ftsQuery);
|
|
588
390
|
}
|
|
589
391
|
function searchTantivy(bundle, options) {
|
|
590
|
-
if (!
|
|
392
|
+
if (!existsSync2(bundle.paths.tantivy)) {
|
|
591
393
|
throw new Error("tantivy index not found; run `prosa index tantivy` first");
|
|
592
394
|
}
|
|
593
395
|
const status = getSearchIndexStatus(bundle, "tantivy");
|
|
@@ -596,7 +398,7 @@ function searchTantivy(bundle, options) {
|
|
|
596
398
|
`tantivy index is ${status?.status ?? "missing"}; run \`prosa index tantivy\` first`
|
|
597
399
|
);
|
|
598
400
|
}
|
|
599
|
-
const
|
|
401
|
+
const limit2 = clampLimit(options.limit, { max: 500, fallback: 50 });
|
|
600
402
|
const queryText = options.query.trim();
|
|
601
403
|
if (!queryText) return [];
|
|
602
404
|
const tantivy = requireTantivy();
|
|
@@ -605,7 +407,7 @@ function searchTantivy(bundle, options) {
|
|
|
605
407
|
const [query] = options.raw ? [index.parseQuery(queryText, ["text"])] : index.parseQueryLenient(queryText, ["text"], void 0, {
|
|
606
408
|
text: [true, 2, true]
|
|
607
409
|
});
|
|
608
|
-
const result = searcher.search(query,
|
|
410
|
+
const result = searcher.search(query, limit2, true);
|
|
609
411
|
const snippets = tantivy.SnippetGenerator.create(searcher, query, index.schema, "text");
|
|
610
412
|
snippets.setMaxNumChars(180);
|
|
611
413
|
return result.hits.map((hit) => {
|
|
@@ -689,7 +491,7 @@ function sessionFilterWhere(filters) {
|
|
|
689
491
|
}
|
|
690
492
|
function listSessions(bundle, filters = {}) {
|
|
691
493
|
const { where, params } = sessionFilterWhere(filters);
|
|
692
|
-
const
|
|
494
|
+
const limit2 = clampLimit(filters.limit, { max: 1e3, fallback: 50 });
|
|
693
495
|
const sql = `
|
|
694
496
|
SELECT s.session_id,
|
|
695
497
|
s.source_tool,
|
|
@@ -710,7 +512,7 @@ function listSessions(bundle, filters = {}) {
|
|
|
710
512
|
FROM sessions s
|
|
711
513
|
${where}
|
|
712
514
|
ORDER BY s.start_ts DESC NULLS LAST
|
|
713
|
-
LIMIT ${
|
|
515
|
+
LIMIT ${limit2}
|
|
714
516
|
`;
|
|
715
517
|
return bundle.db.prepare(sql).all(...params);
|
|
716
518
|
}
|
|
@@ -1134,13 +936,14 @@ var init_App = __esm({
|
|
|
1134
936
|
});
|
|
1135
937
|
|
|
1136
938
|
// src/cli/main.ts
|
|
1137
|
-
import { Command as
|
|
939
|
+
import { Command as Command11 } from "commander";
|
|
1138
940
|
|
|
1139
941
|
// src/core/version.ts
|
|
1140
942
|
var PROSA_PARSER_VERSION = "0.1.0";
|
|
1141
|
-
var PROSA_SCHEMA_VERSION =
|
|
943
|
+
var PROSA_SCHEMA_VERSION = 4;
|
|
1142
944
|
|
|
1143
|
-
// src/cli/commands/
|
|
945
|
+
// src/cli/commands/analytics.ts
|
|
946
|
+
import path4 from "path";
|
|
1144
947
|
import { Command } from "commander";
|
|
1145
948
|
|
|
1146
949
|
// src/core/bundle.ts
|
|
@@ -1514,10 +1317,291 @@ INSERT OR IGNORE INTO search_index_status (
|
|
|
1514
1317
|
('tantivy', 'missing', 0, 0, strftime('%Y-%m-%dT%H:%M:%fZ','now'), NULL);
|
|
1515
1318
|
`;
|
|
1516
1319
|
|
|
1320
|
+
// src/core/schema/sql/003_analytics_views.ts
|
|
1321
|
+
var SQL_003_ANALYTICS_VIEWS = String.raw`
|
|
1322
|
+
CREATE VIEW IF NOT EXISTS session_facts AS
|
|
1323
|
+
WITH turn_counts AS (
|
|
1324
|
+
SELECT session_id, count(*) AS turn_count
|
|
1325
|
+
FROM turns
|
|
1326
|
+
GROUP BY session_id
|
|
1327
|
+
),
|
|
1328
|
+
message_counts AS (
|
|
1329
|
+
SELECT session_id,
|
|
1330
|
+
count(*) AS message_count,
|
|
1331
|
+
sum(CASE WHEN role = 'user' THEN 1 ELSE 0 END) AS user_message_count,
|
|
1332
|
+
sum(CASE WHEN role = 'assistant' THEN 1 ELSE 0 END) AS assistant_message_count
|
|
1333
|
+
FROM messages
|
|
1334
|
+
GROUP BY session_id
|
|
1335
|
+
),
|
|
1336
|
+
tool_call_counts AS (
|
|
1337
|
+
SELECT session_id,
|
|
1338
|
+
count(*) AS tool_call_count,
|
|
1339
|
+
sum(CASE WHEN status = 'error' THEN 1 ELSE 0 END) AS tool_call_error_count
|
|
1340
|
+
FROM tool_calls
|
|
1341
|
+
GROUP BY session_id
|
|
1342
|
+
),
|
|
1343
|
+
tool_result_counts AS (
|
|
1344
|
+
SELECT session_id,
|
|
1345
|
+
count(*) AS tool_result_count,
|
|
1346
|
+
sum(CASE WHEN is_error = 1 OR (exit_code IS NOT NULL AND exit_code <> 0)
|
|
1347
|
+
THEN 1 ELSE 0 END) AS tool_result_error_count,
|
|
1348
|
+
sum(COALESCE(duration_ms, 0)) AS tool_duration_ms
|
|
1349
|
+
FROM tool_results
|
|
1350
|
+
GROUP BY session_id
|
|
1351
|
+
),
|
|
1352
|
+
search_doc_counts AS (
|
|
1353
|
+
SELECT session_id, count(*) AS search_doc_count
|
|
1354
|
+
FROM search_docs
|
|
1355
|
+
WHERE session_id IS NOT NULL
|
|
1356
|
+
GROUP BY session_id
|
|
1357
|
+
)
|
|
1358
|
+
SELECT s.session_id,
|
|
1359
|
+
s.source_tool,
|
|
1360
|
+
s.source_session_id,
|
|
1361
|
+
s.project_id,
|
|
1362
|
+
p.display_name AS project_name,
|
|
1363
|
+
p.canonical_path AS project_path,
|
|
1364
|
+
s.parent_session_id,
|
|
1365
|
+
s.is_subagent,
|
|
1366
|
+
s.agent_role,
|
|
1367
|
+
s.agent_nickname,
|
|
1368
|
+
s.title,
|
|
1369
|
+
s.start_ts,
|
|
1370
|
+
s.end_ts,
|
|
1371
|
+
CASE
|
|
1372
|
+
WHEN s.start_ts IS NOT NULL AND s.end_ts IS NOT NULL
|
|
1373
|
+
THEN ROUND((julianday(s.end_ts) - julianday(s.start_ts)) * 86400, 3)
|
|
1374
|
+
ELSE NULL
|
|
1375
|
+
END AS duration_seconds,
|
|
1376
|
+
s.cwd_initial,
|
|
1377
|
+
s.git_branch_initial,
|
|
1378
|
+
s.model_first,
|
|
1379
|
+
s.model_last,
|
|
1380
|
+
s.status,
|
|
1381
|
+
s.timeline_confidence,
|
|
1382
|
+
sf.path AS source_file_path,
|
|
1383
|
+
COALESCE(tc.turn_count, 0) AS turn_count,
|
|
1384
|
+
COALESCE(mc.message_count, 0) AS message_count,
|
|
1385
|
+
COALESCE(mc.user_message_count, 0) AS user_message_count,
|
|
1386
|
+
COALESCE(mc.assistant_message_count, 0) AS assistant_message_count,
|
|
1387
|
+
COALESCE(tcc.tool_call_count, 0) AS tool_call_count,
|
|
1388
|
+
COALESCE(trc.tool_result_count, 0) AS tool_result_count,
|
|
1389
|
+
COALESCE(tcc.tool_call_error_count, 0)
|
|
1390
|
+
+ COALESCE(trc.tool_result_error_count, 0) AS tool_error_count,
|
|
1391
|
+
COALESCE(trc.tool_duration_ms, 0) AS tool_duration_ms,
|
|
1392
|
+
COALESCE(sdc.search_doc_count, 0) AS search_doc_count
|
|
1393
|
+
FROM sessions s
|
|
1394
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
1395
|
+
LEFT JOIN raw_records rr ON rr.raw_record_id = s.raw_record_id
|
|
1396
|
+
LEFT JOIN source_files sf ON sf.source_file_id = rr.source_file_id
|
|
1397
|
+
LEFT JOIN turn_counts tc ON tc.session_id = s.session_id
|
|
1398
|
+
LEFT JOIN message_counts mc ON mc.session_id = s.session_id
|
|
1399
|
+
LEFT JOIN tool_call_counts tcc ON tcc.session_id = s.session_id
|
|
1400
|
+
LEFT JOIN tool_result_counts trc ON trc.session_id = s.session_id
|
|
1401
|
+
LEFT JOIN search_doc_counts sdc ON sdc.session_id = s.session_id;
|
|
1402
|
+
|
|
1403
|
+
CREATE VIEW IF NOT EXISTS tool_usage_facts AS
|
|
1404
|
+
WITH result_rollup AS (
|
|
1405
|
+
SELECT tool_call_id,
|
|
1406
|
+
session_id,
|
|
1407
|
+
count(*) AS tool_result_count,
|
|
1408
|
+
max(status) AS result_status,
|
|
1409
|
+
max(is_error) AS is_error,
|
|
1410
|
+
min(exit_code) AS exit_code,
|
|
1411
|
+
sum(COALESCE(duration_ms, 0)) AS duration_ms,
|
|
1412
|
+
max(preview) AS preview
|
|
1413
|
+
FROM tool_results
|
|
1414
|
+
GROUP BY tool_call_id, session_id
|
|
1415
|
+
)
|
|
1416
|
+
SELECT tc.tool_call_id,
|
|
1417
|
+
tc.session_id,
|
|
1418
|
+
s.source_tool,
|
|
1419
|
+
s.source_session_id,
|
|
1420
|
+
s.project_id,
|
|
1421
|
+
p.display_name AS project_name,
|
|
1422
|
+
p.canonical_path AS project_path,
|
|
1423
|
+
tc.turn_id,
|
|
1424
|
+
tc.message_id,
|
|
1425
|
+
tc.event_id,
|
|
1426
|
+
tc.source_call_id,
|
|
1427
|
+
tc.tool_name,
|
|
1428
|
+
tc.canonical_tool_type,
|
|
1429
|
+
tc.command,
|
|
1430
|
+
tc.cwd,
|
|
1431
|
+
tc.path,
|
|
1432
|
+
tc.query,
|
|
1433
|
+
tc.timestamp_start,
|
|
1434
|
+
tc.timestamp_end,
|
|
1435
|
+
CASE
|
|
1436
|
+
WHEN tc.timestamp_start IS NOT NULL AND tc.timestamp_end IS NOT NULL
|
|
1437
|
+
THEN ROUND((julianday(tc.timestamp_end) - julianday(tc.timestamp_start)) * 86400, 3)
|
|
1438
|
+
ELSE NULL
|
|
1439
|
+
END AS call_duration_seconds,
|
|
1440
|
+
tc.status AS call_status,
|
|
1441
|
+
rr.result_status,
|
|
1442
|
+
COALESCE(rr.is_error, 0) AS is_error,
|
|
1443
|
+
rr.exit_code,
|
|
1444
|
+
rr.duration_ms AS result_duration_ms,
|
|
1445
|
+
COALESCE(rr.tool_result_count, 0) AS tool_result_count,
|
|
1446
|
+
rr.preview,
|
|
1447
|
+
tc.raw_record_id
|
|
1448
|
+
FROM tool_calls tc
|
|
1449
|
+
LEFT JOIN sessions s ON s.session_id = tc.session_id
|
|
1450
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
1451
|
+
LEFT JOIN result_rollup rr ON rr.tool_call_id = tc.tool_call_id;
|
|
1452
|
+
|
|
1453
|
+
CREATE VIEW IF NOT EXISTS error_facts AS
|
|
1454
|
+
SELECT 'tool_result:' || tr.tool_result_id AS error_id,
|
|
1455
|
+
'tool_result' AS error_category,
|
|
1456
|
+
s.source_tool,
|
|
1457
|
+
s.project_id,
|
|
1458
|
+
p.display_name AS project_name,
|
|
1459
|
+
tr.session_id,
|
|
1460
|
+
COALESCE(tc.timestamp_end, tc.timestamp_start) AS timestamp,
|
|
1461
|
+
tc.tool_name,
|
|
1462
|
+
tc.canonical_tool_type,
|
|
1463
|
+
COALESCE(tr.status, tc.status) AS status,
|
|
1464
|
+
tr.exit_code,
|
|
1465
|
+
NULL AS message,
|
|
1466
|
+
tr.preview,
|
|
1467
|
+
NULL AS entity_type,
|
|
1468
|
+
NULL AS entity_id,
|
|
1469
|
+
tr.raw_record_id
|
|
1470
|
+
FROM tool_results tr
|
|
1471
|
+
LEFT JOIN tool_calls tc ON tc.tool_call_id = tr.tool_call_id
|
|
1472
|
+
LEFT JOIN sessions s ON s.session_id = tr.session_id
|
|
1473
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
1474
|
+
WHERE tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
|
|
1475
|
+
UNION ALL
|
|
1476
|
+
SELECT 'import_error:' || CAST(ie.error_id AS TEXT) AS error_id,
|
|
1477
|
+
'import_error' AS error_category,
|
|
1478
|
+
COALESCE(rr.source_tool, ib.source_tool) AS source_tool,
|
|
1479
|
+
NULL AS project_id,
|
|
1480
|
+
NULL AS project_name,
|
|
1481
|
+
NULL AS session_id,
|
|
1482
|
+
ie.occurred_at AS timestamp,
|
|
1483
|
+
NULL AS tool_name,
|
|
1484
|
+
NULL AS canonical_tool_type,
|
|
1485
|
+
ie.kind AS status,
|
|
1486
|
+
NULL AS exit_code,
|
|
1487
|
+
ie.message,
|
|
1488
|
+
NULL AS preview,
|
|
1489
|
+
NULL AS entity_type,
|
|
1490
|
+
NULL AS entity_id,
|
|
1491
|
+
ie.raw_record_id
|
|
1492
|
+
FROM import_errors ie
|
|
1493
|
+
LEFT JOIN import_batches ib ON ib.batch_id = ie.batch_id
|
|
1494
|
+
LEFT JOIN raw_records rr ON rr.raw_record_id = ie.raw_record_id
|
|
1495
|
+
UNION ALL
|
|
1496
|
+
SELECT 'uncertainty:' || CAST(u.uncertainty_id AS TEXT) AS error_id,
|
|
1497
|
+
'uncertainty' AS error_category,
|
|
1498
|
+
NULL AS source_tool,
|
|
1499
|
+
NULL AS project_id,
|
|
1500
|
+
NULL AS project_name,
|
|
1501
|
+
CASE WHEN u.entity_type = 'session' THEN u.entity_id ELSE NULL END AS session_id,
|
|
1502
|
+
NULL AS timestamp,
|
|
1503
|
+
NULL AS tool_name,
|
|
1504
|
+
NULL AS canonical_tool_type,
|
|
1505
|
+
u.reason AS status,
|
|
1506
|
+
NULL AS exit_code,
|
|
1507
|
+
u.reason AS message,
|
|
1508
|
+
NULL AS preview,
|
|
1509
|
+
u.entity_type,
|
|
1510
|
+
u.entity_id,
|
|
1511
|
+
NULL AS raw_record_id
|
|
1512
|
+
FROM uncertainties u;
|
|
1513
|
+
|
|
1514
|
+
CREATE VIEW IF NOT EXISTS model_usage AS
|
|
1515
|
+
WITH model_events AS (
|
|
1516
|
+
SELECT s.source_tool,
|
|
1517
|
+
s.project_id,
|
|
1518
|
+
p.display_name AS project_name,
|
|
1519
|
+
p.canonical_path AS project_path,
|
|
1520
|
+
s.session_id,
|
|
1521
|
+
NULL AS turn_id,
|
|
1522
|
+
s.model_first AS model,
|
|
1523
|
+
s.start_ts AS timestamp,
|
|
1524
|
+
'session_first' AS observation_type
|
|
1525
|
+
FROM sessions s
|
|
1526
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
1527
|
+
WHERE s.model_first IS NOT NULL
|
|
1528
|
+
UNION ALL
|
|
1529
|
+
SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
|
|
1530
|
+
s.session_id, NULL AS turn_id, s.model_last AS model, s.end_ts AS timestamp,
|
|
1531
|
+
'session_last' AS observation_type
|
|
1532
|
+
FROM sessions s
|
|
1533
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
1534
|
+
WHERE s.model_last IS NOT NULL
|
|
1535
|
+
UNION ALL
|
|
1536
|
+
SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
|
|
1537
|
+
t.session_id, t.turn_id, t.model, t.start_ts AS timestamp, 'turn' AS observation_type
|
|
1538
|
+
FROM turns t
|
|
1539
|
+
LEFT JOIN sessions s ON s.session_id = t.session_id
|
|
1540
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
1541
|
+
WHERE t.model IS NOT NULL
|
|
1542
|
+
UNION ALL
|
|
1543
|
+
SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
|
|
1544
|
+
m.session_id, m.turn_id, m.model, m.timestamp, 'message' AS observation_type
|
|
1545
|
+
FROM messages m
|
|
1546
|
+
LEFT JOIN sessions s ON s.session_id = m.session_id
|
|
1547
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
1548
|
+
WHERE m.model IS NOT NULL
|
|
1549
|
+
)
|
|
1550
|
+
SELECT source_tool,
|
|
1551
|
+
project_id,
|
|
1552
|
+
project_name,
|
|
1553
|
+
project_path,
|
|
1554
|
+
model,
|
|
1555
|
+
count(DISTINCT session_id) AS session_count,
|
|
1556
|
+
count(DISTINCT turn_id) AS turn_count,
|
|
1557
|
+
count(*) AS observation_count,
|
|
1558
|
+
sum(CASE WHEN observation_type = 'message' THEN 1 ELSE 0 END) AS message_count,
|
|
1559
|
+
min(timestamp) AS first_seen_ts,
|
|
1560
|
+
max(timestamp) AS last_seen_ts
|
|
1561
|
+
FROM model_events
|
|
1562
|
+
GROUP BY source_tool, project_id, project_name, project_path, model;
|
|
1563
|
+
|
|
1564
|
+
CREATE VIEW IF NOT EXISTS project_activity AS
|
|
1565
|
+
SELECT s.source_tool,
|
|
1566
|
+
s.project_id,
|
|
1567
|
+
COALESCE(p.display_name, s.cwd_initial, '(unknown)') AS project_name,
|
|
1568
|
+
p.canonical_path AS project_path,
|
|
1569
|
+
min(s.start_ts) AS first_session_ts,
|
|
1570
|
+
max(COALESCE(s.end_ts, s.start_ts)) AS latest_session_ts,
|
|
1571
|
+
count(DISTINCT s.session_id) AS session_count,
|
|
1572
|
+
count(DISTINCT CASE WHEN s.timeline_confidence = 'low' THEN s.session_id END)
|
|
1573
|
+
AS low_confidence_session_count,
|
|
1574
|
+
count(DISTINCT t.turn_id) AS turn_count,
|
|
1575
|
+
count(DISTINCT m.message_id) AS message_count,
|
|
1576
|
+
count(DISTINCT tc.tool_call_id) AS tool_call_count,
|
|
1577
|
+
count(DISTINCT tr.tool_result_id) AS tool_result_count,
|
|
1578
|
+
count(DISTINCT CASE
|
|
1579
|
+
WHEN tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
|
|
1580
|
+
THEN tr.tool_result_id
|
|
1581
|
+
END) AS tool_error_count,
|
|
1582
|
+
count(DISTINCT sd.doc_id) AS search_doc_count
|
|
1583
|
+
FROM sessions s
|
|
1584
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
1585
|
+
LEFT JOIN turns t ON t.session_id = s.session_id
|
|
1586
|
+
LEFT JOIN messages m ON m.session_id = s.session_id
|
|
1587
|
+
LEFT JOIN tool_calls tc ON tc.session_id = s.session_id
|
|
1588
|
+
LEFT JOIN tool_results tr ON tr.session_id = s.session_id
|
|
1589
|
+
LEFT JOIN search_docs sd ON sd.session_id = s.session_id
|
|
1590
|
+
GROUP BY s.source_tool, s.project_id, p.display_name, s.cwd_initial, p.canonical_path;
|
|
1591
|
+
`;
|
|
1592
|
+
|
|
1593
|
+
// src/core/schema/sql/004_tantivy_checkpoint.ts
|
|
1594
|
+
var SQL_004_TANTIVY_CHECKPOINT = String.raw`
|
|
1595
|
+
ALTER TABLE search_index_status ADD COLUMN last_indexed_rowid INTEGER;
|
|
1596
|
+
ALTER TABLE search_index_status ADD COLUMN schema_fingerprint TEXT;
|
|
1597
|
+
`;
|
|
1598
|
+
|
|
1517
1599
|
// src/core/schema/migrate.ts
|
|
1518
1600
|
var MIGRATIONS = [
|
|
1519
1601
|
{ version: 1, name: "init", sql: SQL_001_INIT },
|
|
1520
|
-
{ version: 2, name: "search_index_status", sql: SQL_002_SEARCH_INDEX_STATUS }
|
|
1602
|
+
{ version: 2, name: "search_index_status", sql: SQL_002_SEARCH_INDEX_STATUS },
|
|
1603
|
+
{ version: 3, name: "analytics_views", sql: SQL_003_ANALYTICS_VIEWS },
|
|
1604
|
+
{ version: 4, name: "tantivy_checkpoint", sql: SQL_004_TANTIVY_CHECKPOINT }
|
|
1521
1605
|
];
|
|
1522
1606
|
function runMigrations(db) {
|
|
1523
1607
|
db.exec(`
|
|
@@ -1613,52 +1697,1038 @@ async function initBundle(rootPath) {
|
|
|
1613
1697
|
runMigrations(db);
|
|
1614
1698
|
return { path: resolved, db, manifest, paths };
|
|
1615
1699
|
}
|
|
1616
|
-
async function openBundle(rootPath) {
|
|
1617
|
-
const resolved = path.resolve(rootPath);
|
|
1618
|
-
const paths = bundlePaths(resolved);
|
|
1619
|
-
const dirStat = await stat(resolved).catch(() => null);
|
|
1620
|
-
if (!dirStat?.isDirectory()) {
|
|
1621
|
-
throw new Error(`bundle path not found or not a directory: ${resolved}`);
|
|
1700
|
+
async function openBundle(rootPath) {
|
|
1701
|
+
const resolved = path.resolve(rootPath);
|
|
1702
|
+
const paths = bundlePaths(resolved);
|
|
1703
|
+
const dirStat = await stat(resolved).catch(() => null);
|
|
1704
|
+
if (!dirStat?.isDirectory()) {
|
|
1705
|
+
throw new Error(`bundle path not found or not a directory: ${resolved}`);
|
|
1706
|
+
}
|
|
1707
|
+
if (!await exists(paths.manifest)) {
|
|
1708
|
+
throw new Error(
|
|
1709
|
+
`no manifest.json in ${resolved} \u2014 initialize first with \`prosa init --store ${resolved}\``
|
|
1710
|
+
);
|
|
1711
|
+
}
|
|
1712
|
+
const manifest = JSON.parse(await readFile(paths.manifest, "utf8"));
|
|
1713
|
+
await mkdir(paths.search, { recursive: true });
|
|
1714
|
+
await mkdir(paths.tantivy, { recursive: true });
|
|
1715
|
+
const db = openDb(paths.db);
|
|
1716
|
+
runMigrations(db);
|
|
1717
|
+
const currentVersion = currentSchemaVersion(db);
|
|
1718
|
+
if (currentVersion !== PROSA_SCHEMA_VERSION) {
|
|
1719
|
+
closeDb(db);
|
|
1720
|
+
throw new Error(`schema version mismatch (db=${currentVersion}, code=${PROSA_SCHEMA_VERSION})`);
|
|
1721
|
+
}
|
|
1722
|
+
if (manifest.parser_version !== PROSA_PARSER_VERSION) {
|
|
1723
|
+
manifest.parser_version = PROSA_PARSER_VERSION;
|
|
1724
|
+
await writeFile(paths.manifest, `${JSON.stringify(manifest, null, 2)}
|
|
1725
|
+
`, "utf8");
|
|
1726
|
+
}
|
|
1727
|
+
return { path: resolved, db, manifest, paths };
|
|
1728
|
+
}
|
|
1729
|
+
async function openOrInitBundle(rootPath) {
|
|
1730
|
+
const resolved = path.resolve(rootPath);
|
|
1731
|
+
const paths = bundlePaths(resolved);
|
|
1732
|
+
const dirStat = await stat(resolved).catch(() => null);
|
|
1733
|
+
if (dirStat && !dirStat.isDirectory()) {
|
|
1734
|
+
throw new Error(`bundle path not found or not a directory: ${resolved}`);
|
|
1735
|
+
}
|
|
1736
|
+
if (!dirStat || !await exists(paths.manifest)) {
|
|
1737
|
+
return await initBundle(resolved);
|
|
1738
|
+
}
|
|
1739
|
+
return await openBundle(resolved);
|
|
1740
|
+
}
|
|
1741
|
+
function closeBundle(bundle) {
|
|
1742
|
+
closeDb(bundle.db);
|
|
1743
|
+
}
|
|
1744
|
+
|
|
1745
|
+
// src/services/analytics.ts
|
|
1746
|
+
init_limits();
|
|
1747
|
+
|
|
1748
|
+
// src/services/export/parquet.ts
|
|
1749
|
+
import { mkdir as mkdir2, rm, writeFile as writeFile2 } from "fs/promises";
|
|
1750
|
+
import path2 from "path";
|
|
1751
|
+
import { DuckDBConnection } from "@duckdb/node-api";
|
|
1752
|
+
init_errors();
|
|
1753
|
+
var PARQUET_TABLES = [
|
|
1754
|
+
"objects",
|
|
1755
|
+
"source_files",
|
|
1756
|
+
"import_batches",
|
|
1757
|
+
"raw_records",
|
|
1758
|
+
"import_errors",
|
|
1759
|
+
"uncertainties",
|
|
1760
|
+
"projects",
|
|
1761
|
+
"sessions",
|
|
1762
|
+
"turns",
|
|
1763
|
+
"events",
|
|
1764
|
+
"messages",
|
|
1765
|
+
"content_blocks",
|
|
1766
|
+
"tool_calls",
|
|
1767
|
+
"tool_results",
|
|
1768
|
+
"artifacts",
|
|
1769
|
+
"edges",
|
|
1770
|
+
"search_docs"
|
|
1771
|
+
];
|
|
1772
|
+
async function exportBundleParquet(options) {
|
|
1773
|
+
const snapshot = await openBundleSnapshot(options.bundlePath);
|
|
1774
|
+
const outDir = path2.resolve(options.outDir ?? snapshot.defaultOutDir);
|
|
1775
|
+
await mkdir2(outDir, { recursive: true });
|
|
1776
|
+
const files = Object.fromEntries(
|
|
1777
|
+
PARQUET_TABLES.map((table) => [table, path2.join(outDir, `${table}.parquet`)])
|
|
1778
|
+
);
|
|
1779
|
+
const manifestPath = path2.join(outDir, "manifest.json");
|
|
1780
|
+
for (const file of [...Object.values(files), manifestPath]) {
|
|
1781
|
+
await rm(file, { force: true });
|
|
1782
|
+
}
|
|
1783
|
+
const connection = await createDuckDbConnection();
|
|
1784
|
+
try {
|
|
1785
|
+
await attachSqlite(connection, snapshot.dbPath);
|
|
1786
|
+
for (const table of PARQUET_TABLES) {
|
|
1787
|
+
await connection.run(
|
|
1788
|
+
`COPY (SELECT * FROM prosa.${quoteIdentifier(table)}) TO ${sqlString(files[table])} (FORMAT parquet, COMPRESSION zstd, COMPRESSION_LEVEL 1, ROW_GROUP_SIZE 100000)`
|
|
1789
|
+
);
|
|
1790
|
+
}
|
|
1791
|
+
} finally {
|
|
1792
|
+
connection.closeSync();
|
|
1793
|
+
}
|
|
1794
|
+
const manifest = {
|
|
1795
|
+
exported_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1796
|
+
source_db: snapshot.dbPath,
|
|
1797
|
+
schema_version: snapshot.schemaVersion,
|
|
1798
|
+
parser_version: snapshot.parserVersion,
|
|
1799
|
+
tables: Object.fromEntries(
|
|
1800
|
+
PARQUET_TABLES.map((table) => [
|
|
1801
|
+
table,
|
|
1802
|
+
{
|
|
1803
|
+
file: path2.basename(files[table]),
|
|
1804
|
+
rows: snapshot.counts[table]
|
|
1805
|
+
}
|
|
1806
|
+
])
|
|
1807
|
+
)
|
|
1808
|
+
};
|
|
1809
|
+
await writeFile2(manifestPath, `${JSON.stringify(manifest, null, 2)}
|
|
1810
|
+
`, "utf8");
|
|
1811
|
+
return { outDir, manifestPath, files, counts: snapshot.counts };
|
|
1812
|
+
}
|
|
1813
|
+
async function queryDuckDbParquet(options) {
|
|
1814
|
+
const parquetDir = path2.resolve(options.parquetDir);
|
|
1815
|
+
const connection = await createDuckDbConnection();
|
|
1816
|
+
try {
|
|
1817
|
+
for (const table of PARQUET_TABLES) {
|
|
1818
|
+
await connection.run(
|
|
1819
|
+
`CREATE OR REPLACE VIEW ${quoteIdentifier(table)} AS SELECT * FROM read_parquet(${sqlString(
|
|
1820
|
+
path2.join(parquetDir, `${table}.parquet`)
|
|
1821
|
+
)})`
|
|
1822
|
+
);
|
|
1823
|
+
}
|
|
1824
|
+
await createAnalyticsViews(connection);
|
|
1825
|
+
const reader = await connection.runAndReadAll(options.sql);
|
|
1826
|
+
return {
|
|
1827
|
+
columns: reader.deduplicatedColumnNames(),
|
|
1828
|
+
rows: reader.getRowObjectsJson()
|
|
1829
|
+
};
|
|
1830
|
+
} catch (error) {
|
|
1831
|
+
if (isMissingParquetError(error)) {
|
|
1832
|
+
throw new Error(
|
|
1833
|
+
`Parquet export not found in ${parquetDir}; run \`prosa export parquet --store <path>\` first`
|
|
1834
|
+
);
|
|
1835
|
+
}
|
|
1836
|
+
throw error;
|
|
1837
|
+
} finally {
|
|
1838
|
+
connection.closeSync();
|
|
1839
|
+
}
|
|
1840
|
+
}
|
|
1841
|
+
async function createDuckDbConnection() {
|
|
1842
|
+
return DuckDBConnection.create();
|
|
1843
|
+
}
|
|
1844
|
+
async function attachSqlite(connection, dbPath) {
|
|
1845
|
+
try {
|
|
1846
|
+
await connection.run("INSTALL sqlite");
|
|
1847
|
+
await connection.run("LOAD sqlite");
|
|
1848
|
+
await connection.run(`ATTACH ${sqlString(dbPath)} AS prosa (TYPE sqlite)`);
|
|
1849
|
+
} catch (error) {
|
|
1850
|
+
throw new Error(
|
|
1851
|
+
`DuckDB could not attach prosa.sqlite via the sqlite extension: ${getErrorMessage(error)}`
|
|
1852
|
+
);
|
|
1853
|
+
}
|
|
1854
|
+
}
|
|
1855
|
+
async function createAnalyticsViews(connection) {
|
|
1856
|
+
await connection.run(`
|
|
1857
|
+
CREATE OR REPLACE VIEW session_facts AS
|
|
1858
|
+
WITH turn_counts AS (
|
|
1859
|
+
SELECT session_id, count(*) AS turn_count
|
|
1860
|
+
FROM turns
|
|
1861
|
+
GROUP BY session_id
|
|
1862
|
+
),
|
|
1863
|
+
message_counts AS (
|
|
1864
|
+
SELECT session_id,
|
|
1865
|
+
count(*) AS message_count,
|
|
1866
|
+
sum(CASE WHEN role = 'user' THEN 1 ELSE 0 END) AS user_message_count,
|
|
1867
|
+
sum(CASE WHEN role = 'assistant' THEN 1 ELSE 0 END) AS assistant_message_count
|
|
1868
|
+
FROM messages
|
|
1869
|
+
GROUP BY session_id
|
|
1870
|
+
),
|
|
1871
|
+
tool_call_counts AS (
|
|
1872
|
+
SELECT session_id,
|
|
1873
|
+
count(*) AS tool_call_count,
|
|
1874
|
+
sum(CASE WHEN status = 'error' THEN 1 ELSE 0 END) AS tool_call_error_count
|
|
1875
|
+
FROM tool_calls
|
|
1876
|
+
GROUP BY session_id
|
|
1877
|
+
),
|
|
1878
|
+
tool_result_counts AS (
|
|
1879
|
+
SELECT session_id,
|
|
1880
|
+
count(*) AS tool_result_count,
|
|
1881
|
+
sum(CASE WHEN is_error = 1 OR (exit_code IS NOT NULL AND exit_code <> 0)
|
|
1882
|
+
THEN 1 ELSE 0 END) AS tool_result_error_count,
|
|
1883
|
+
sum(COALESCE(duration_ms, 0)) AS tool_duration_ms
|
|
1884
|
+
FROM tool_results
|
|
1885
|
+
GROUP BY session_id
|
|
1886
|
+
),
|
|
1887
|
+
search_doc_counts AS (
|
|
1888
|
+
SELECT session_id, count(*) AS search_doc_count
|
|
1889
|
+
FROM search_docs
|
|
1890
|
+
WHERE session_id IS NOT NULL
|
|
1891
|
+
GROUP BY session_id
|
|
1892
|
+
)
|
|
1893
|
+
SELECT s.session_id,
|
|
1894
|
+
s.source_tool,
|
|
1895
|
+
s.source_session_id,
|
|
1896
|
+
s.project_id,
|
|
1897
|
+
p.display_name AS project_name,
|
|
1898
|
+
p.canonical_path AS project_path,
|
|
1899
|
+
s.parent_session_id,
|
|
1900
|
+
s.is_subagent,
|
|
1901
|
+
s.agent_role,
|
|
1902
|
+
s.agent_nickname,
|
|
1903
|
+
s.title,
|
|
1904
|
+
s.start_ts,
|
|
1905
|
+
s.end_ts,
|
|
1906
|
+
CASE
|
|
1907
|
+
WHEN s.start_ts IS NOT NULL AND s.end_ts IS NOT NULL
|
|
1908
|
+
THEN date_diff('millisecond', TRY_CAST(s.start_ts AS TIMESTAMP),
|
|
1909
|
+
TRY_CAST(s.end_ts AS TIMESTAMP)) / 1000.0
|
|
1910
|
+
ELSE NULL
|
|
1911
|
+
END AS duration_seconds,
|
|
1912
|
+
s.cwd_initial,
|
|
1913
|
+
s.git_branch_initial,
|
|
1914
|
+
s.model_first,
|
|
1915
|
+
s.model_last,
|
|
1916
|
+
s.status,
|
|
1917
|
+
s.timeline_confidence,
|
|
1918
|
+
sf.path AS source_file_path,
|
|
1919
|
+
COALESCE(tc.turn_count, 0) AS turn_count,
|
|
1920
|
+
COALESCE(mc.message_count, 0) AS message_count,
|
|
1921
|
+
COALESCE(mc.user_message_count, 0) AS user_message_count,
|
|
1922
|
+
COALESCE(mc.assistant_message_count, 0) AS assistant_message_count,
|
|
1923
|
+
COALESCE(tcc.tool_call_count, 0) AS tool_call_count,
|
|
1924
|
+
COALESCE(trc.tool_result_count, 0) AS tool_result_count,
|
|
1925
|
+
COALESCE(tcc.tool_call_error_count, 0)
|
|
1926
|
+
+ COALESCE(trc.tool_result_error_count, 0) AS tool_error_count,
|
|
1927
|
+
COALESCE(trc.tool_duration_ms, 0) AS tool_duration_ms,
|
|
1928
|
+
COALESCE(sdc.search_doc_count, 0) AS search_doc_count
|
|
1929
|
+
FROM sessions s
|
|
1930
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
1931
|
+
LEFT JOIN raw_records rr ON rr.raw_record_id = s.raw_record_id
|
|
1932
|
+
LEFT JOIN source_files sf ON sf.source_file_id = rr.source_file_id
|
|
1933
|
+
LEFT JOIN turn_counts tc ON tc.session_id = s.session_id
|
|
1934
|
+
LEFT JOIN message_counts mc ON mc.session_id = s.session_id
|
|
1935
|
+
LEFT JOIN tool_call_counts tcc ON tcc.session_id = s.session_id
|
|
1936
|
+
LEFT JOIN tool_result_counts trc ON trc.session_id = s.session_id
|
|
1937
|
+
LEFT JOIN search_doc_counts sdc ON sdc.session_id = s.session_id
|
|
1938
|
+
`);
|
|
1939
|
+
await connection.run(`
|
|
1940
|
+
CREATE OR REPLACE VIEW tool_usage_facts AS
|
|
1941
|
+
WITH result_rollup AS (
|
|
1942
|
+
SELECT tool_call_id,
|
|
1943
|
+
session_id,
|
|
1944
|
+
count(*) AS tool_result_count,
|
|
1945
|
+
max(status) AS result_status,
|
|
1946
|
+
max(is_error) AS is_error,
|
|
1947
|
+
min(exit_code) AS exit_code,
|
|
1948
|
+
sum(COALESCE(duration_ms, 0)) AS duration_ms,
|
|
1949
|
+
max(preview) AS preview
|
|
1950
|
+
FROM tool_results
|
|
1951
|
+
GROUP BY tool_call_id, session_id
|
|
1952
|
+
)
|
|
1953
|
+
SELECT tc.tool_call_id,
|
|
1954
|
+
tc.session_id,
|
|
1955
|
+
s.source_tool,
|
|
1956
|
+
s.source_session_id,
|
|
1957
|
+
s.project_id,
|
|
1958
|
+
p.display_name AS project_name,
|
|
1959
|
+
p.canonical_path AS project_path,
|
|
1960
|
+
tc.turn_id,
|
|
1961
|
+
tc.message_id,
|
|
1962
|
+
tc.event_id,
|
|
1963
|
+
tc.source_call_id,
|
|
1964
|
+
tc.tool_name,
|
|
1965
|
+
tc.canonical_tool_type,
|
|
1966
|
+
tc.command,
|
|
1967
|
+
tc.cwd,
|
|
1968
|
+
tc.path,
|
|
1969
|
+
tc.query,
|
|
1970
|
+
tc.timestamp_start,
|
|
1971
|
+
tc.timestamp_end,
|
|
1972
|
+
CASE
|
|
1973
|
+
WHEN tc.timestamp_start IS NOT NULL AND tc.timestamp_end IS NOT NULL
|
|
1974
|
+
THEN date_diff('millisecond', TRY_CAST(tc.timestamp_start AS TIMESTAMP),
|
|
1975
|
+
TRY_CAST(tc.timestamp_end AS TIMESTAMP)) / 1000.0
|
|
1976
|
+
ELSE NULL
|
|
1977
|
+
END AS call_duration_seconds,
|
|
1978
|
+
tc.status AS call_status,
|
|
1979
|
+
rr.result_status,
|
|
1980
|
+
COALESCE(rr.is_error, 0) AS is_error,
|
|
1981
|
+
rr.exit_code,
|
|
1982
|
+
rr.duration_ms AS result_duration_ms,
|
|
1983
|
+
COALESCE(rr.tool_result_count, 0) AS tool_result_count,
|
|
1984
|
+
rr.preview,
|
|
1985
|
+
tc.raw_record_id
|
|
1986
|
+
FROM tool_calls tc
|
|
1987
|
+
LEFT JOIN sessions s ON s.session_id = tc.session_id
|
|
1988
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
1989
|
+
LEFT JOIN result_rollup rr ON rr.tool_call_id = tc.tool_call_id
|
|
1990
|
+
`);
|
|
1991
|
+
await connection.run(`
|
|
1992
|
+
CREATE OR REPLACE VIEW error_facts AS
|
|
1993
|
+
SELECT 'tool_result:' || tr.tool_result_id AS error_id,
|
|
1994
|
+
'tool_result' AS error_category,
|
|
1995
|
+
s.source_tool,
|
|
1996
|
+
s.project_id,
|
|
1997
|
+
p.display_name AS project_name,
|
|
1998
|
+
tr.session_id,
|
|
1999
|
+
COALESCE(tc.timestamp_end, tc.timestamp_start) AS timestamp,
|
|
2000
|
+
tc.tool_name,
|
|
2001
|
+
tc.canonical_tool_type,
|
|
2002
|
+
COALESCE(tr.status, tc.status) AS status,
|
|
2003
|
+
tr.exit_code,
|
|
2004
|
+
NULL AS message,
|
|
2005
|
+
tr.preview,
|
|
2006
|
+
NULL AS entity_type,
|
|
2007
|
+
NULL AS entity_id,
|
|
2008
|
+
tr.raw_record_id
|
|
2009
|
+
FROM tool_results tr
|
|
2010
|
+
LEFT JOIN tool_calls tc ON tc.tool_call_id = tr.tool_call_id
|
|
2011
|
+
LEFT JOIN sessions s ON s.session_id = tr.session_id
|
|
2012
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
2013
|
+
WHERE tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
|
|
2014
|
+
UNION ALL
|
|
2015
|
+
SELECT 'import_error:' || CAST(ie.error_id AS VARCHAR) AS error_id,
|
|
2016
|
+
'import_error' AS error_category,
|
|
2017
|
+
COALESCE(rr.source_tool, ib.source_tool) AS source_tool,
|
|
2018
|
+
NULL AS project_id,
|
|
2019
|
+
NULL AS project_name,
|
|
2020
|
+
NULL AS session_id,
|
|
2021
|
+
ie.occurred_at AS timestamp,
|
|
2022
|
+
NULL AS tool_name,
|
|
2023
|
+
NULL AS canonical_tool_type,
|
|
2024
|
+
ie.kind AS status,
|
|
2025
|
+
NULL AS exit_code,
|
|
2026
|
+
ie.message,
|
|
2027
|
+
NULL AS preview,
|
|
2028
|
+
NULL AS entity_type,
|
|
2029
|
+
NULL AS entity_id,
|
|
2030
|
+
ie.raw_record_id
|
|
2031
|
+
FROM import_errors ie
|
|
2032
|
+
LEFT JOIN import_batches ib ON ib.batch_id = ie.batch_id
|
|
2033
|
+
LEFT JOIN raw_records rr ON rr.raw_record_id = ie.raw_record_id
|
|
2034
|
+
UNION ALL
|
|
2035
|
+
SELECT 'uncertainty:' || CAST(u.uncertainty_id AS VARCHAR) AS error_id,
|
|
2036
|
+
'uncertainty' AS error_category,
|
|
2037
|
+
NULL AS source_tool,
|
|
2038
|
+
NULL AS project_id,
|
|
2039
|
+
NULL AS project_name,
|
|
2040
|
+
CASE WHEN u.entity_type = 'session' THEN u.entity_id ELSE NULL END AS session_id,
|
|
2041
|
+
NULL AS timestamp,
|
|
2042
|
+
NULL AS tool_name,
|
|
2043
|
+
NULL AS canonical_tool_type,
|
|
2044
|
+
u.reason AS status,
|
|
2045
|
+
NULL AS exit_code,
|
|
2046
|
+
u.reason AS message,
|
|
2047
|
+
NULL AS preview,
|
|
2048
|
+
u.entity_type,
|
|
2049
|
+
u.entity_id,
|
|
2050
|
+
NULL AS raw_record_id
|
|
2051
|
+
FROM uncertainties u
|
|
2052
|
+
`);
|
|
2053
|
+
await connection.run(`
|
|
2054
|
+
CREATE OR REPLACE VIEW model_usage AS
|
|
2055
|
+
WITH model_events AS (
|
|
2056
|
+
SELECT s.source_tool,
|
|
2057
|
+
s.project_id,
|
|
2058
|
+
p.display_name AS project_name,
|
|
2059
|
+
p.canonical_path AS project_path,
|
|
2060
|
+
s.session_id,
|
|
2061
|
+
NULL AS turn_id,
|
|
2062
|
+
s.model_first AS model,
|
|
2063
|
+
s.start_ts AS timestamp,
|
|
2064
|
+
'session_first' AS observation_type
|
|
2065
|
+
FROM sessions s
|
|
2066
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
2067
|
+
WHERE s.model_first IS NOT NULL
|
|
2068
|
+
UNION ALL
|
|
2069
|
+
SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
|
|
2070
|
+
s.session_id, NULL AS turn_id, s.model_last AS model, s.end_ts AS timestamp,
|
|
2071
|
+
'session_last' AS observation_type
|
|
2072
|
+
FROM sessions s
|
|
2073
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
2074
|
+
WHERE s.model_last IS NOT NULL
|
|
2075
|
+
UNION ALL
|
|
2076
|
+
SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
|
|
2077
|
+
t.session_id, t.turn_id, t.model, t.start_ts AS timestamp, 'turn' AS observation_type
|
|
2078
|
+
FROM turns t
|
|
2079
|
+
LEFT JOIN sessions s ON s.session_id = t.session_id
|
|
2080
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
2081
|
+
WHERE t.model IS NOT NULL
|
|
2082
|
+
UNION ALL
|
|
2083
|
+
SELECT s.source_tool, s.project_id, p.display_name, p.canonical_path,
|
|
2084
|
+
m.session_id, m.turn_id, m.model, m.timestamp, 'message' AS observation_type
|
|
2085
|
+
FROM messages m
|
|
2086
|
+
LEFT JOIN sessions s ON s.session_id = m.session_id
|
|
2087
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
2088
|
+
WHERE m.model IS NOT NULL
|
|
2089
|
+
)
|
|
2090
|
+
SELECT source_tool,
|
|
2091
|
+
project_id,
|
|
2092
|
+
project_name,
|
|
2093
|
+
project_path,
|
|
2094
|
+
model,
|
|
2095
|
+
count(DISTINCT session_id) AS session_count,
|
|
2096
|
+
count(DISTINCT turn_id) AS turn_count,
|
|
2097
|
+
count(*) AS observation_count,
|
|
2098
|
+
sum(CASE WHEN observation_type = 'message' THEN 1 ELSE 0 END) AS message_count,
|
|
2099
|
+
min(timestamp) AS first_seen_ts,
|
|
2100
|
+
max(timestamp) AS last_seen_ts
|
|
2101
|
+
FROM model_events
|
|
2102
|
+
GROUP BY source_tool, project_id, project_name, project_path, model
|
|
2103
|
+
`);
|
|
2104
|
+
await connection.run(`
|
|
2105
|
+
CREATE OR REPLACE VIEW project_activity AS
|
|
2106
|
+
SELECT s.source_tool,
|
|
2107
|
+
s.project_id,
|
|
2108
|
+
COALESCE(p.display_name, s.cwd_initial, '(unknown)') AS project_name,
|
|
2109
|
+
p.canonical_path AS project_path,
|
|
2110
|
+
min(s.start_ts) AS first_session_ts,
|
|
2111
|
+
max(COALESCE(s.end_ts, s.start_ts)) AS latest_session_ts,
|
|
2112
|
+
count(DISTINCT s.session_id) AS session_count,
|
|
2113
|
+
count(DISTINCT CASE WHEN s.timeline_confidence = 'low' THEN s.session_id END)
|
|
2114
|
+
AS low_confidence_session_count,
|
|
2115
|
+
count(DISTINCT t.turn_id) AS turn_count,
|
|
2116
|
+
count(DISTINCT m.message_id) AS message_count,
|
|
2117
|
+
count(DISTINCT tc.tool_call_id) AS tool_call_count,
|
|
2118
|
+
count(DISTINCT tr.tool_result_id) AS tool_result_count,
|
|
2119
|
+
count(DISTINCT CASE
|
|
2120
|
+
WHEN tr.is_error = 1 OR (tr.exit_code IS NOT NULL AND tr.exit_code <> 0)
|
|
2121
|
+
THEN tr.tool_result_id
|
|
2122
|
+
END) AS tool_error_count,
|
|
2123
|
+
count(DISTINCT sd.doc_id) AS search_doc_count
|
|
2124
|
+
FROM sessions s
|
|
2125
|
+
LEFT JOIN projects p ON p.project_id = s.project_id
|
|
2126
|
+
LEFT JOIN turns t ON t.session_id = s.session_id
|
|
2127
|
+
LEFT JOIN messages m ON m.session_id = s.session_id
|
|
2128
|
+
LEFT JOIN tool_calls tc ON tc.session_id = s.session_id
|
|
2129
|
+
LEFT JOIN tool_results tr ON tr.session_id = s.session_id
|
|
2130
|
+
LEFT JOIN search_docs sd ON sd.session_id = s.session_id
|
|
2131
|
+
GROUP BY s.source_tool, s.project_id, p.display_name, s.cwd_initial, p.canonical_path
|
|
2132
|
+
`);
|
|
2133
|
+
}
|
|
2134
|
+
async function openBundleSnapshot(bundlePath) {
|
|
2135
|
+
const bundle = await openBundle(bundlePath);
|
|
2136
|
+
try {
|
|
2137
|
+
const counts = Object.fromEntries(
|
|
2138
|
+
PARQUET_TABLES.map((table) => {
|
|
2139
|
+
const row = bundle.db.prepare(`SELECT count(*) AS n FROM ${quoteIdentifier(table)}`).get();
|
|
2140
|
+
return [table, row?.n ?? 0];
|
|
2141
|
+
})
|
|
2142
|
+
);
|
|
2143
|
+
return {
|
|
2144
|
+
dbPath: bundle.paths.db,
|
|
2145
|
+
schemaVersion: bundle.manifest.schema_version,
|
|
2146
|
+
parserVersion: bundle.manifest.parser_version,
|
|
2147
|
+
defaultOutDir: bundle.paths.parquet,
|
|
2148
|
+
counts
|
|
2149
|
+
};
|
|
2150
|
+
} finally {
|
|
2151
|
+
closeBundle(bundle);
|
|
2152
|
+
}
|
|
2153
|
+
}
|
|
2154
|
+
function quoteIdentifier(value) {
|
|
2155
|
+
return `"${value.replace(/"/g, '""')}"`;
|
|
2156
|
+
}
|
|
2157
|
+
function sqlString(value) {
|
|
2158
|
+
return `'${value.replace(/'/g, "''")}'`;
|
|
2159
|
+
}
|
|
2160
|
+
function isMissingParquetError(error) {
|
|
2161
|
+
const message = getErrorMessage(error);
|
|
2162
|
+
return /No files found|does not exist|not found/i.test(message) && /\.parquet/i.test(message);
|
|
2163
|
+
}
|
|
2164
|
+
|
|
2165
|
+
// src/services/analytics.ts
|
|
2166
|
+
var ANALYTICS_REPORTS = ["sessions", "tools", "errors", "models", "projects"];
|
|
2167
|
+
async function runAnalyticsReport(options) {
|
|
2168
|
+
return queryDuckDbParquet({
|
|
2169
|
+
parquetDir: options.parquetDir,
|
|
2170
|
+
sql: buildAnalyticsSql(options.report, options.filters ?? {}, "duckdb")
|
|
2171
|
+
});
|
|
2172
|
+
}
|
|
2173
|
+
function runAnalyticsReportFromBundle(options) {
|
|
2174
|
+
const sql = buildAnalyticsSql(options.report, options.filters ?? {}, "sqlite");
|
|
2175
|
+
const stmt = options.bundle.db.prepare(sql);
|
|
2176
|
+
const rows = stmt.all();
|
|
2177
|
+
const columns = stmt.columns().map((column) => column.name);
|
|
2178
|
+
return { columns, rows };
|
|
2179
|
+
}
|
|
2180
|
+
function buildAnalyticsSql(report, filters, dialect) {
|
|
2181
|
+
switch (report) {
|
|
2182
|
+
case "sessions":
|
|
2183
|
+
return buildSessionsSql(filters, dialect);
|
|
2184
|
+
case "tools":
|
|
2185
|
+
return buildToolsSql(filters, dialect);
|
|
2186
|
+
case "errors":
|
|
2187
|
+
return buildErrorsSql(filters, dialect);
|
|
2188
|
+
case "models":
|
|
2189
|
+
return buildModelsSql(filters, dialect);
|
|
2190
|
+
case "projects":
|
|
2191
|
+
return buildProjectsSql(filters, dialect);
|
|
2192
|
+
}
|
|
2193
|
+
}
|
|
2194
|
+
function buildSessionsSql(filters, dialect) {
|
|
2195
|
+
const where = buildWhere([
|
|
2196
|
+
sourceFilter(filters),
|
|
2197
|
+
timeFilter("start_ts", filters),
|
|
2198
|
+
projectFilter(filters, dialect),
|
|
2199
|
+
filters.sessionId ? `session_id = ${sqlString2(filters.sessionId)}` : null,
|
|
2200
|
+
filters.sourcePathSubstring ? `source_file_path LIKE ${sqlString2(`%${escapeLike(filters.sourcePathSubstring)}%`)} ESCAPE '\\'` : null
|
|
2201
|
+
]);
|
|
2202
|
+
return `
|
|
2203
|
+
SELECT start_ts, source_tool, project_name, source_file_path, session_id,
|
|
2204
|
+
source_session_id, model_last, duration_seconds,
|
|
2205
|
+
message_count, tool_call_count, tool_result_count, tool_error_count,
|
|
2206
|
+
tool_duration_ms, timeline_confidence, title
|
|
2207
|
+
FROM session_facts
|
|
2208
|
+
${where}
|
|
2209
|
+
ORDER BY start_ts DESC NULLS LAST
|
|
2210
|
+
LIMIT ${limit(filters)}
|
|
2211
|
+
`;
|
|
2212
|
+
}
|
|
2213
|
+
function buildToolsSql(filters, dialect) {
|
|
2214
|
+
const where = buildWhere([
|
|
2215
|
+
sourceFilter(filters),
|
|
2216
|
+
timeFilter("timestamp_start", filters),
|
|
2217
|
+
projectFilter(filters, dialect),
|
|
2218
|
+
filters.toolName ? `tool_name = ${sqlString2(filters.toolName)}` : null,
|
|
2219
|
+
filters.canonicalType ? `canonical_tool_type = ${sqlString2(filters.canonicalType)}` : null,
|
|
2220
|
+
filters.errorsOnly ? `(is_error = 1 OR call_status = 'error')` : null
|
|
2221
|
+
]);
|
|
2222
|
+
return `
|
|
2223
|
+
SELECT tool_name, canonical_tool_type, source_tool, project_name,
|
|
2224
|
+
count(*) AS call_count,
|
|
2225
|
+
sum(CASE WHEN is_error = 1 OR call_status = 'error' THEN 1 ELSE 0 END) AS error_count,
|
|
2226
|
+
round(avg(result_duration_ms), 3) AS avg_result_duration_ms,
|
|
2227
|
+
max(timestamp_start) AS latest_ts
|
|
2228
|
+
FROM tool_usage_facts
|
|
2229
|
+
${where}
|
|
2230
|
+
GROUP BY tool_name, canonical_tool_type, source_tool, project_name
|
|
2231
|
+
ORDER BY call_count DESC, error_count DESC, tool_name ASC
|
|
2232
|
+
LIMIT ${limit(filters)}
|
|
2233
|
+
`;
|
|
2234
|
+
}
|
|
2235
|
+
function buildErrorsSql(filters, dialect) {
|
|
2236
|
+
const where = buildWhere([
|
|
2237
|
+
sourceFilter(filters),
|
|
2238
|
+
timeFilter("timestamp", filters),
|
|
2239
|
+
projectFilter(filters, dialect),
|
|
2240
|
+
filters.toolName ? `tool_name = ${sqlString2(filters.toolName)}` : null,
|
|
2241
|
+
filters.category ? `error_category = ${sqlString2(filters.category)}` : null
|
|
2242
|
+
]);
|
|
2243
|
+
return `
|
|
2244
|
+
SELECT timestamp, error_category, source_tool, project_name, session_id,
|
|
2245
|
+
tool_name, status, exit_code, message, preview
|
|
2246
|
+
FROM error_facts
|
|
2247
|
+
${where}
|
|
2248
|
+
ORDER BY timestamp DESC NULLS LAST, error_id DESC
|
|
2249
|
+
LIMIT ${limit(filters)}
|
|
2250
|
+
`;
|
|
2251
|
+
}
|
|
2252
|
+
function buildModelsSql(filters, dialect) {
|
|
2253
|
+
const where = buildWhere([
|
|
2254
|
+
sourceFilter(filters),
|
|
2255
|
+
rangeOverlapFilter("first_seen_ts", "last_seen_ts", filters),
|
|
2256
|
+
projectFilter(filters, dialect),
|
|
2257
|
+
filters.model ? `model = ${sqlString2(filters.model)}` : null
|
|
2258
|
+
]);
|
|
2259
|
+
return `
|
|
2260
|
+
SELECT model, source_tool, project_name, session_count, turn_count,
|
|
2261
|
+
message_count, observation_count, first_seen_ts, last_seen_ts
|
|
2262
|
+
FROM model_usage
|
|
2263
|
+
${where}
|
|
2264
|
+
ORDER BY session_count DESC, observation_count DESC, model ASC
|
|
2265
|
+
LIMIT ${limit(filters)}
|
|
2266
|
+
`;
|
|
2267
|
+
}
|
|
2268
|
+
function buildProjectsSql(filters, dialect) {
|
|
2269
|
+
const where = buildWhere([
|
|
2270
|
+
sourceFilter(filters),
|
|
2271
|
+
rangeOverlapFilter("first_session_ts", "latest_session_ts", filters),
|
|
2272
|
+
projectFilter(filters, dialect)
|
|
2273
|
+
]);
|
|
2274
|
+
return `
|
|
2275
|
+
SELECT latest_session_ts, source_tool, project_name, project_path,
|
|
2276
|
+
session_count, message_count, tool_call_count, tool_error_count,
|
|
2277
|
+
low_confidence_session_count
|
|
2278
|
+
FROM project_activity
|
|
2279
|
+
${where}
|
|
2280
|
+
ORDER BY latest_session_ts DESC NULLS LAST, session_count DESC, project_name ASC
|
|
2281
|
+
LIMIT ${limit(filters)}
|
|
2282
|
+
`;
|
|
2283
|
+
}
|
|
2284
|
+
function sourceFilter(filters) {
|
|
2285
|
+
return filters.source ? `source_tool = ${sqlString2(filters.source)}` : null;
|
|
2286
|
+
}
|
|
2287
|
+
function timeFilter(column, filters) {
|
|
2288
|
+
const filtersSql = [];
|
|
2289
|
+
if (filters.since)
|
|
2290
|
+
filtersSql.push(`(${column} IS NULL OR ${column} >= ${sqlString2(filters.since)})`);
|
|
2291
|
+
if (filters.until)
|
|
2292
|
+
filtersSql.push(`(${column} IS NULL OR ${column} < ${sqlString2(filters.until)})`);
|
|
2293
|
+
return filtersSql.length ? filtersSql.join(" AND ") : null;
|
|
2294
|
+
}
|
|
2295
|
+
function rangeOverlapFilter(firstColumn, lastColumn, filters) {
|
|
2296
|
+
const filtersSql = [];
|
|
2297
|
+
if (filters.since) {
|
|
2298
|
+
filtersSql.push(`(${lastColumn} IS NULL OR ${lastColumn} >= ${sqlString2(filters.since)})`);
|
|
2299
|
+
}
|
|
2300
|
+
if (filters.until) {
|
|
2301
|
+
filtersSql.push(`(${firstColumn} IS NULL OR ${firstColumn} < ${sqlString2(filters.until)})`);
|
|
2302
|
+
}
|
|
2303
|
+
return filtersSql.length ? filtersSql.join(" AND ") : null;
|
|
2304
|
+
}
|
|
2305
|
+
function projectFilter(filters, dialect) {
|
|
2306
|
+
if (!filters.project) return null;
|
|
2307
|
+
const exact = sqlString2(filters.project);
|
|
2308
|
+
const like = sqlString2(`%${escapeLike(filters.project)}%`);
|
|
2309
|
+
const op = dialect === "duckdb" ? "ILIKE" : "LIKE";
|
|
2310
|
+
return `(project_id = ${exact} OR project_name ${op} ${like} ESCAPE '\\' OR project_path ${op} ${like} ESCAPE '\\')`;
|
|
2311
|
+
}
|
|
2312
|
+
function buildWhere(filters) {
|
|
2313
|
+
const active = filters.filter((filter) => Boolean(filter));
|
|
2314
|
+
return active.length ? `WHERE ${active.join(" AND ")}` : "";
|
|
2315
|
+
}
|
|
2316
|
+
function limit(filters) {
|
|
2317
|
+
const value = Number.isFinite(filters.limit) ? filters.limit : void 0;
|
|
2318
|
+
return clampLimit(value, { max: 500, fallback: 50 });
|
|
2319
|
+
}
|
|
2320
|
+
function sqlString2(value) {
|
|
2321
|
+
return `'${value.replace(/'/g, "''")}'`;
|
|
2322
|
+
}
|
|
2323
|
+
function escapeLike(value) {
|
|
2324
|
+
return value.replace(/[\\%_]/g, (match) => `\\${match}`);
|
|
2325
|
+
}
|
|
2326
|
+
|
|
2327
|
+
// src/cli/bundle.ts
|
|
2328
|
+
import path3 from "path";
|
|
2329
|
+
async function withBundle(storePath, fn) {
|
|
2330
|
+
const bundle = await openBundle(path3.resolve(storePath));
|
|
2331
|
+
try {
|
|
2332
|
+
return await fn(bundle);
|
|
2333
|
+
} finally {
|
|
2334
|
+
closeBundle(bundle);
|
|
2335
|
+
}
|
|
2336
|
+
}
|
|
2337
|
+
|
|
2338
|
+
// src/cli/output.ts
|
|
2339
|
+
var OUTPUT_FORMATS = ["interactive", "table", "json", "csv"];
|
|
2340
|
+
var COL_SEPARATOR = " ";
|
|
2341
|
+
var RULE_CHAR = "-";
|
|
2342
|
+
function parseOutputFormat(value, fallback) {
|
|
2343
|
+
if (value === void 0) return fallback;
|
|
2344
|
+
if (OUTPUT_FORMATS.includes(value)) return value;
|
|
2345
|
+
throw new Error(
|
|
2346
|
+
`invalid --output-format: ${value} (expected one of ${OUTPUT_FORMATS.join(", ")})`
|
|
2347
|
+
);
|
|
2348
|
+
}
|
|
2349
|
+
function printRows(rows, opts) {
|
|
2350
|
+
switch (opts.format) {
|
|
2351
|
+
case "json":
|
|
2352
|
+
printJson(rows, opts);
|
|
2353
|
+
return;
|
|
2354
|
+
case "csv":
|
|
2355
|
+
printCsv(rows, opts);
|
|
2356
|
+
return;
|
|
2357
|
+
case "table":
|
|
2358
|
+
case "interactive":
|
|
2359
|
+
printTable(rows, opts);
|
|
2360
|
+
return;
|
|
2361
|
+
}
|
|
2362
|
+
}
|
|
2363
|
+
function printJson(rows, opts) {
|
|
2364
|
+
const out = opts.meta ? { ...opts.meta, rows } : rows;
|
|
2365
|
+
process.stdout.write(`${JSON.stringify(out, null, 2)}
|
|
2366
|
+
`);
|
|
2367
|
+
}
|
|
2368
|
+
function printCsv(rows, opts) {
|
|
2369
|
+
const columns = opts.columns;
|
|
2370
|
+
process.stdout.write(`${columns.map(csvField).join(",")}
|
|
2371
|
+
`);
|
|
2372
|
+
for (const row of rows) {
|
|
2373
|
+
const record = row;
|
|
2374
|
+
const line = columns.map((column) => csvField(formatCell(record[column]))).join(",");
|
|
2375
|
+
process.stdout.write(`${line}
|
|
2376
|
+
`);
|
|
2377
|
+
}
|
|
2378
|
+
}
|
|
2379
|
+
function csvField(value) {
|
|
2380
|
+
if (/[",\n]/.test(value)) return `"${value.replace(/"/g, '""')}"`;
|
|
2381
|
+
return value;
|
|
2382
|
+
}
|
|
2383
|
+
function printTable(rows, opts) {
|
|
2384
|
+
const columns = opts.columns;
|
|
2385
|
+
const widths = columns.map((column) => column.length);
|
|
2386
|
+
const cells = rows.map((row) => {
|
|
2387
|
+
const record = row;
|
|
2388
|
+
return columns.map((column, index) => {
|
|
2389
|
+
const text = formatCell(record[column]);
|
|
2390
|
+
const width = widths[index] ?? 0;
|
|
2391
|
+
if (text.length > width) widths[index] = text.length;
|
|
2392
|
+
return text;
|
|
2393
|
+
});
|
|
2394
|
+
});
|
|
2395
|
+
const header = columns.map((column, index) => column.padEnd(widths[index] ?? 0)).join(COL_SEPARATOR);
|
|
2396
|
+
const rule = columns.map((_, index) => RULE_CHAR.repeat(widths[index] ?? 0)).join(COL_SEPARATOR);
|
|
2397
|
+
process.stdout.write(`${header}
|
|
2398
|
+
${rule}
|
|
2399
|
+
`);
|
|
2400
|
+
for (const cellRow of cells) {
|
|
2401
|
+
const line = cellRow.map((cell, index) => cell.padEnd(widths[index] ?? 0)).join(COL_SEPARATOR);
|
|
2402
|
+
process.stdout.write(`${line}
|
|
2403
|
+
`);
|
|
2404
|
+
}
|
|
2405
|
+
}
|
|
2406
|
+
function formatCell(value) {
|
|
2407
|
+
if (value == null) return "";
|
|
2408
|
+
if (typeof value === "string") return value;
|
|
2409
|
+
if (typeof value === "number" || typeof value === "boolean") return String(value);
|
|
2410
|
+
return JSON.stringify(value);
|
|
2411
|
+
}
|
|
2412
|
+
|
|
2413
|
+
// src/core/domain/types.ts
|
|
2414
|
+
var SOURCE_TOOLS = ["cursor", "codex", "claude", "gemini"];
|
|
2415
|
+
|
|
2416
|
+
// src/cli/parsers.ts
|
|
2417
|
+
function parseSearchEngine(value) {
|
|
2418
|
+
if (value === "fts5" || value === "tantivy") return value;
|
|
2419
|
+
throw new Error(`invalid search engine: ${value} (expected fts5 or tantivy)`);
|
|
2420
|
+
}
|
|
2421
|
+
function parseMcpTransport(value) {
|
|
2422
|
+
if (value === "stdio" || value === "http") return value;
|
|
2423
|
+
throw new Error(`invalid transport: ${value} (expected stdio or http)`);
|
|
2424
|
+
}
|
|
2425
|
+
function parseSourceTool(value) {
|
|
2426
|
+
if (value === void 0) return void 0;
|
|
2427
|
+
if (SOURCE_TOOLS.includes(value)) return value;
|
|
2428
|
+
throw new Error(`invalid source tool: ${value} (expected one of ${SOURCE_TOOLS.join(", ")})`);
|
|
2429
|
+
}
|
|
2430
|
+
|
|
2431
|
+
// src/cli/commands/analytics.ts
|
|
2432
|
+
function analyticsCommand() {
|
|
2433
|
+
const command = new Command("analytics").description(
|
|
2434
|
+
"Run high-level analytics reports over exported Parquet files."
|
|
2435
|
+
);
|
|
2436
|
+
command.addCommand(reportCommand("sessions", "Summarize sessions by source, project and model."));
|
|
2437
|
+
command.addCommand(reportCommand("tools", "Summarize tool usage, status, duration and errors."));
|
|
2438
|
+
command.addCommand(
|
|
2439
|
+
reportCommand("errors", "List import errors, failed tool results and uncertainties.")
|
|
2440
|
+
);
|
|
2441
|
+
command.addCommand(reportCommand("models", "Summarize model usage by source, project and time."));
|
|
2442
|
+
command.addCommand(
|
|
2443
|
+
reportCommand("projects", "Summarize project activity and operational counts.")
|
|
2444
|
+
);
|
|
2445
|
+
return command;
|
|
2446
|
+
}
|
|
2447
|
+
function reportCommand(report, description) {
|
|
2448
|
+
const command = addCommonOptions(new Command(report).description(description));
|
|
2449
|
+
if (report === "tools") {
|
|
2450
|
+
command.option("--tool-name <name>", "filter by exact tool name").option("--canonical-type <type>", "filter by canonical tool type").option("--errors-only", "only include tool calls with errors");
|
|
2451
|
+
}
|
|
2452
|
+
if (report === "errors") {
|
|
2453
|
+
command.option("--tool-name <name>", "filter by exact tool name").option("--category <category>", "filter by error category");
|
|
2454
|
+
}
|
|
2455
|
+
if (report === "models") {
|
|
2456
|
+
command.option("--model <model>", "filter by exact model name");
|
|
2457
|
+
}
|
|
2458
|
+
if (report === "projects") {
|
|
2459
|
+
command.option("--project <text>", "filter by project id, name, or path substring");
|
|
2460
|
+
}
|
|
2461
|
+
if (report === "sessions") {
|
|
2462
|
+
command.option("--project <text>", "filter by project id, name, or path substring");
|
|
2463
|
+
}
|
|
2464
|
+
return command.action(async (options) => {
|
|
2465
|
+
const format = parseOutputFormat(options.outputFormat, "table");
|
|
2466
|
+
const parquetDir = await resolveParquetDir(options);
|
|
2467
|
+
const filters = buildFilters(options);
|
|
2468
|
+
const result = await runAnalyticsReport({ parquetDir, report, filters });
|
|
2469
|
+
printRows(result.rows, {
|
|
2470
|
+
format,
|
|
2471
|
+
columns: result.columns,
|
|
2472
|
+
meta: { report, count: result.rows.length }
|
|
2473
|
+
});
|
|
2474
|
+
});
|
|
2475
|
+
}
|
|
2476
|
+
function addCommonOptions(command) {
|
|
2477
|
+
return command.option("--store <path>", "bundle directory", defaultBundlePath()).option("--parquet-dir <path>", "Parquet directory (default: <store>/parquet)").option("--refresh", "export Parquet before running the report").option("--source <tool>", "filter by source tool: cursor|codex|claude|gemini").option("--since <iso>", "lower timestamp bound (inclusive)").option("--until <iso>", "upper timestamp bound (exclusive)").option("--limit <n>", "maximum rows", "50").option("--output-format <fmt>", "interactive|table|json|csv", "table");
|
|
2478
|
+
}
|
|
2479
|
+
async function resolveParquetDir(options) {
|
|
2480
|
+
const storePath = path4.resolve(options.store);
|
|
2481
|
+
const outDir = options.parquetDir ? path4.resolve(options.parquetDir) : void 0;
|
|
2482
|
+
if (options.refresh) {
|
|
2483
|
+
const result = await exportBundleParquet({ bundlePath: storePath, outDir });
|
|
2484
|
+
return result.outDir;
|
|
2485
|
+
}
|
|
2486
|
+
return outDir ?? await withBundle(storePath, (bundle) => bundle.paths.parquet);
|
|
2487
|
+
}
|
|
2488
|
+
function buildFilters(options) {
|
|
2489
|
+
return {
|
|
2490
|
+
source: parseSourceTool(options.source),
|
|
2491
|
+
since: options.since,
|
|
2492
|
+
until: options.until,
|
|
2493
|
+
limit: Number.parseInt(options.limit, 10),
|
|
2494
|
+
toolName: options.toolName,
|
|
2495
|
+
canonicalType: options.canonicalType,
|
|
2496
|
+
errorsOnly: options.errorsOnly,
|
|
2497
|
+
category: options.category,
|
|
2498
|
+
model: options.model,
|
|
2499
|
+
project: options.project
|
|
2500
|
+
};
|
|
2501
|
+
}
|
|
2502
|
+
|
|
2503
|
+
// src/cli/commands/compile.ts
|
|
2504
|
+
import { Command as Command2 } from "commander";
|
|
2505
|
+
|
|
2506
|
+
// src/services/compile.ts
|
|
2507
|
+
init_errors();
|
|
2508
|
+
import os2 from "os";
|
|
2509
|
+
import path16 from "path";
|
|
2510
|
+
|
|
2511
|
+
// src/importers/claude/index.ts
|
|
2512
|
+
import { readFile as readFile4 } from "fs/promises";
|
|
2513
|
+
import path8 from "path";
|
|
2514
|
+
|
|
2515
|
+
// src/core/cas/index.ts
|
|
2516
|
+
init_db();
|
|
2517
|
+
import { mkdir as mkdir3, readFile as readFile2, writeFile as writeFile3 } from "fs/promises";
|
|
2518
|
+
import path5 from "path";
|
|
2519
|
+
|
|
2520
|
+
// src/core/cas/compress.ts
|
|
2521
|
+
import { compress as zstdCompress, decompress as zstdDecompress } from "zstd-napi";
|
|
2522
|
+
var COMPRESS_THRESHOLD_BYTES = 256;
|
|
2523
|
+
var ZSTD_LEVEL = 3;
|
|
2524
|
+
function compressBytes(input) {
|
|
2525
|
+
if (input.byteLength < COMPRESS_THRESHOLD_BYTES) {
|
|
2526
|
+
return { bytes: Buffer.from(input), compression: "none" };
|
|
2527
|
+
}
|
|
2528
|
+
const out = zstdCompress(Buffer.from(input), { compressionLevel: ZSTD_LEVEL });
|
|
2529
|
+
return { bytes: out, compression: "zstd" };
|
|
2530
|
+
}
|
|
2531
|
+
function decompressBytes(input, compression) {
|
|
2532
|
+
if (compression === "none") return input;
|
|
2533
|
+
return zstdDecompress(input);
|
|
2534
|
+
}
|
|
2535
|
+
|
|
2536
|
+
// src/core/cas/hash.ts
|
|
2537
|
+
import { createHash } from "crypto";
|
|
2538
|
+
import { blake3 } from "@noble/hashes/blake3";
|
|
2539
|
+
import { bytesToHex } from "@noble/hashes/utils";
|
|
2540
|
+
function blake3Hex(bytes) {
|
|
2541
|
+
return bytesToHex(blake3(bytes));
|
|
2542
|
+
}
|
|
2543
|
+
function sha256Hex(bytes) {
|
|
2544
|
+
return createHash("sha256").update(bytes).digest("hex");
|
|
2545
|
+
}
|
|
2546
|
+
function objectIdFromHash(hashHex) {
|
|
2547
|
+
return `blake3:${hashHex}`;
|
|
2548
|
+
}
|
|
2549
|
+
function objectStoragePath(hashHex, compression) {
|
|
2550
|
+
const ext = compression === "zstd" ? ".zst" : ".bin";
|
|
2551
|
+
const a = hashHex.slice(0, 2);
|
|
2552
|
+
const b = hashHex.slice(2, 4);
|
|
2553
|
+
return `objects/blake3/${a}/${b}/${hashHex}${ext}`;
|
|
2554
|
+
}
|
|
2555
|
+
|
|
2556
|
+
// src/core/cas/index.ts
|
|
2557
|
+
var ensuredDirs = /* @__PURE__ */ new Set();
|
|
2558
|
+
async function ensureDir(absoluteDir) {
|
|
2559
|
+
if (ensuredDirs.has(absoluteDir)) return;
|
|
2560
|
+
await mkdir3(absoluteDir, { recursive: true });
|
|
2561
|
+
ensuredDirs.add(absoluteDir);
|
|
2562
|
+
}
|
|
2563
|
+
async function putBytes(bundle, bytes, options = {}) {
|
|
2564
|
+
const hash = blake3Hex(bytes);
|
|
2565
|
+
const objectId = objectIdFromHash(hash);
|
|
2566
|
+
const existing = prepare(
|
|
2567
|
+
bundle.db,
|
|
2568
|
+
`SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
|
|
2569
|
+
compression, mime_type, encoding, storage_path, created_at
|
|
2570
|
+
FROM objects WHERE object_id = ?`
|
|
2571
|
+
).get(objectId);
|
|
2572
|
+
if (existing) return objectId;
|
|
2573
|
+
const { bytes: stored, compression } = compressBytes(bytes);
|
|
2574
|
+
const storagePath = objectStoragePath(hash, compression);
|
|
2575
|
+
const absolutePath = path5.join(bundle.path, storagePath);
|
|
2576
|
+
await ensureDir(path5.dirname(absolutePath));
|
|
2577
|
+
await writeFile3(absolutePath, stored);
|
|
2578
|
+
prepare(
|
|
2579
|
+
bundle.db,
|
|
2580
|
+
`INSERT INTO objects (
|
|
2581
|
+
object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
|
|
2582
|
+
compression, mime_type, encoding, storage_path, created_at
|
|
2583
|
+
) VALUES (?, 'blake3', ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
2584
|
+
).run(
|
|
2585
|
+
objectId,
|
|
2586
|
+
hash,
|
|
2587
|
+
bytes.byteLength,
|
|
2588
|
+
compression === "zstd" ? stored.byteLength : null,
|
|
2589
|
+
compression,
|
|
2590
|
+
options.mimeType ?? null,
|
|
2591
|
+
options.encoding ?? null,
|
|
2592
|
+
storagePath,
|
|
2593
|
+
(/* @__PURE__ */ new Date()).toISOString()
|
|
2594
|
+
);
|
|
2595
|
+
return objectId;
|
|
2596
|
+
}
|
|
2597
|
+
async function putJson(bundle, value) {
|
|
2598
|
+
const text = JSON.stringify(value);
|
|
2599
|
+
return putBytes(bundle, Buffer.from(text, "utf8"), {
|
|
2600
|
+
mimeType: "application/json",
|
|
2601
|
+
encoding: "utf-8"
|
|
2602
|
+
});
|
|
2603
|
+
}
|
|
2604
|
+
async function getBytes(bundle, objectId) {
|
|
2605
|
+
const meta = prepare(
|
|
2606
|
+
bundle.db,
|
|
2607
|
+
`SELECT object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
|
|
2608
|
+
compression, mime_type, encoding, storage_path, created_at
|
|
2609
|
+
FROM objects WHERE object_id = ?`
|
|
2610
|
+
).get(objectId);
|
|
2611
|
+
if (!meta) {
|
|
2612
|
+
throw new Error(`object not found: ${objectId}`);
|
|
2613
|
+
}
|
|
2614
|
+
const buf = await readFile2(path5.join(bundle.path, meta.storage_path));
|
|
2615
|
+
return decompressBytes(buf, meta.compression);
|
|
2616
|
+
}
|
|
2617
|
+
async function getText(bundle, objectId) {
|
|
2618
|
+
const buf = await getBytes(bundle, objectId);
|
|
2619
|
+
return buf.toString("utf8");
|
|
2620
|
+
}
|
|
2621
|
+
function createPendingObjects() {
|
|
2622
|
+
return { byId: /* @__PURE__ */ new Map() };
|
|
2623
|
+
}
|
|
2624
|
+
function stageBytes(pending, bytes, options = {}) {
|
|
2625
|
+
const buf = Buffer.isBuffer(bytes) ? bytes : Buffer.from(bytes);
|
|
2626
|
+
const hash = blake3Hex(buf);
|
|
2627
|
+
const objectId = objectIdFromHash(hash);
|
|
2628
|
+
if (!pending.byId.has(objectId)) {
|
|
2629
|
+
pending.byId.set(objectId, {
|
|
2630
|
+
objectId,
|
|
2631
|
+
hash,
|
|
2632
|
+
bytes: buf,
|
|
2633
|
+
mimeType: options.mimeType ?? null,
|
|
2634
|
+
encoding: options.encoding ?? null
|
|
2635
|
+
});
|
|
2636
|
+
}
|
|
2637
|
+
return objectId;
|
|
2638
|
+
}
|
|
2639
|
+
function stageText(pending, text, options = {}) {
|
|
2640
|
+
return stageBytes(pending, Buffer.from(text, "utf8"), {
|
|
2641
|
+
mimeType: options.mimeType ?? "text/plain; charset=utf-8",
|
|
2642
|
+
encoding: "utf-8"
|
|
2643
|
+
});
|
|
2644
|
+
}
|
|
2645
|
+
function stageJson(pending, value) {
|
|
2646
|
+
return stageBytes(pending, Buffer.from(JSON.stringify(value), "utf8"), {
|
|
2647
|
+
mimeType: "application/json",
|
|
2648
|
+
encoding: "utf-8"
|
|
2649
|
+
});
|
|
2650
|
+
}
|
|
2651
|
+
async function flushPendingObjects(bundle, pending) {
|
|
2652
|
+
if (pending.byId.size === 0) return;
|
|
2653
|
+
const ids = [...pending.byId.keys()];
|
|
2654
|
+
const existingIds = queryExistingObjectIds(bundle, ids);
|
|
2655
|
+
const toWrite = [];
|
|
2656
|
+
for (const obj of pending.byId.values()) {
|
|
2657
|
+
if (existingIds.has(obj.objectId)) continue;
|
|
2658
|
+
const { bytes: compressedBytes, compression } = compressBytes(obj.bytes);
|
|
2659
|
+
const storagePath = objectStoragePath(obj.hash, compression);
|
|
2660
|
+
toWrite.push({
|
|
2661
|
+
staged: obj,
|
|
2662
|
+
compression,
|
|
2663
|
+
compressedBytes,
|
|
2664
|
+
storagePath,
|
|
2665
|
+
absolutePath: path5.join(bundle.path, storagePath)
|
|
2666
|
+
});
|
|
1622
2667
|
}
|
|
1623
|
-
if (
|
|
1624
|
-
|
|
1625
|
-
`no manifest.json in ${resolved} \u2014 initialize first with \`prosa init --store ${resolved}\``
|
|
1626
|
-
);
|
|
2668
|
+
if (toWrite.length > 0) {
|
|
2669
|
+
await writeFilesParallel(toWrite);
|
|
1627
2670
|
}
|
|
1628
|
-
const
|
|
1629
|
-
|
|
1630
|
-
|
|
1631
|
-
|
|
1632
|
-
|
|
1633
|
-
|
|
1634
|
-
|
|
1635
|
-
|
|
1636
|
-
|
|
2671
|
+
const insertObject = prepare(
|
|
2672
|
+
bundle.db,
|
|
2673
|
+
`INSERT OR IGNORE INTO objects (
|
|
2674
|
+
object_id, hash_alg, hash, size_bytes, compressed_size_bytes,
|
|
2675
|
+
compression, mime_type, encoding, storage_path, created_at
|
|
2676
|
+
) VALUES (?, 'blake3', ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
2677
|
+
);
|
|
2678
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
2679
|
+
for (const p of toWrite) {
|
|
2680
|
+
insertObject.run(
|
|
2681
|
+
p.staged.objectId,
|
|
2682
|
+
p.staged.hash,
|
|
2683
|
+
p.staged.bytes.byteLength,
|
|
2684
|
+
p.compression === "zstd" ? p.compressedBytes.byteLength : null,
|
|
2685
|
+
p.compression,
|
|
2686
|
+
p.staged.mimeType,
|
|
2687
|
+
p.staged.encoding,
|
|
2688
|
+
p.storagePath,
|
|
2689
|
+
now
|
|
2690
|
+
);
|
|
1637
2691
|
}
|
|
1638
|
-
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
|
|
2692
|
+
}
|
|
2693
|
+
function queryExistingObjectIds(bundle, ids) {
|
|
2694
|
+
const found = /* @__PURE__ */ new Set();
|
|
2695
|
+
if (ids.length === 0) return found;
|
|
2696
|
+
const CHUNK = 500;
|
|
2697
|
+
for (let start = 0; start < ids.length; start += CHUNK) {
|
|
2698
|
+
const slice = ids.slice(start, start + CHUNK);
|
|
2699
|
+
const placeholders = slice.map(() => "?").join(",");
|
|
2700
|
+
const rows = bundle.db.prepare(
|
|
2701
|
+
`SELECT object_id FROM objects WHERE object_id IN (${placeholders})`
|
|
2702
|
+
).all(...slice);
|
|
2703
|
+
for (const row of rows) found.add(row.object_id);
|
|
1642
2704
|
}
|
|
1643
|
-
return
|
|
2705
|
+
return found;
|
|
1644
2706
|
}
|
|
1645
|
-
|
|
1646
|
-
|
|
2707
|
+
var FS_WRITE_CONCURRENCY = 16;
|
|
2708
|
+
async function writeFilesParallel(tasks) {
|
|
2709
|
+
let cursor = 0;
|
|
2710
|
+
const workers = [];
|
|
2711
|
+
const limit2 = Math.min(FS_WRITE_CONCURRENCY, tasks.length);
|
|
2712
|
+
for (let w = 0; w < limit2; w++) {
|
|
2713
|
+
workers.push(
|
|
2714
|
+
(async () => {
|
|
2715
|
+
while (true) {
|
|
2716
|
+
const i = cursor++;
|
|
2717
|
+
if (i >= tasks.length) return;
|
|
2718
|
+
const task = tasks[i];
|
|
2719
|
+
await ensureDir(path5.dirname(task.absolutePath));
|
|
2720
|
+
await writeFile3(task.absolutePath, task.compressedBytes);
|
|
2721
|
+
}
|
|
2722
|
+
})()
|
|
2723
|
+
);
|
|
2724
|
+
}
|
|
2725
|
+
await Promise.all(workers);
|
|
1647
2726
|
}
|
|
1648
2727
|
|
|
1649
|
-
// src/services/compile.ts
|
|
1650
|
-
init_errors();
|
|
1651
|
-
import os2 from "os";
|
|
1652
|
-
import path14 from "path";
|
|
1653
|
-
|
|
1654
2728
|
// src/importers/claude/index.ts
|
|
1655
|
-
init_cas();
|
|
1656
2729
|
init_db();
|
|
1657
|
-
import { readFile as readFile4 } from "fs/promises";
|
|
1658
|
-
import path5 from "path";
|
|
1659
2730
|
|
|
1660
2731
|
// src/core/domain/ids.ts
|
|
1661
|
-
init_hash();
|
|
1662
2732
|
var ID_PREFIX_BYTES = 16;
|
|
1663
2733
|
function tupleId(parts) {
|
|
1664
2734
|
return sha256Hex(parts.join("\0")).slice(0, ID_PREFIX_BYTES * 2);
|
|
@@ -1704,7 +2774,6 @@ function importBatchId(sourceTool, startedAtIso) {
|
|
|
1704
2774
|
init_errors();
|
|
1705
2775
|
|
|
1706
2776
|
// src/core/ingest/batch.ts
|
|
1707
|
-
init_cas();
|
|
1708
2777
|
init_db();
|
|
1709
2778
|
function emptyCounts() {
|
|
1710
2779
|
return {
|
|
@@ -1772,12 +2841,9 @@ async function recordError(bundle, batchId, args) {
|
|
|
1772
2841
|
}
|
|
1773
2842
|
|
|
1774
2843
|
// src/core/ingest/idempotency.ts
|
|
1775
|
-
|
|
1776
|
-
|
|
1777
|
-
init_cas();
|
|
2844
|
+
import { access as access2, readFile as readFile3, stat as stat2, writeFile as writeFile4 } from "fs/promises";
|
|
2845
|
+
import path6 from "path";
|
|
1778
2846
|
init_db();
|
|
1779
|
-
import { access as access2, readFile as readFile3, stat as stat2, writeFile as writeFile3 } from "fs/promises";
|
|
1780
|
-
import path3 from "path";
|
|
1781
2847
|
async function registerSourceFile(bundle, args) {
|
|
1782
2848
|
const st = await stat2(args.absolutePath);
|
|
1783
2849
|
const size = st.size;
|
|
@@ -1861,10 +2927,10 @@ async function preserveRawSourceBytes(bundle, bytes) {
|
|
|
1861
2927
|
const objectId = objectIdFromHash(hash);
|
|
1862
2928
|
const { bytes: stored, compression } = compressBytes(bytes);
|
|
1863
2929
|
const storagePath = rawSourceStoragePath(hash, compression);
|
|
1864
|
-
const absolutePath =
|
|
1865
|
-
await ensureDir(
|
|
2930
|
+
const absolutePath = path6.join(bundle.path, storagePath);
|
|
2931
|
+
await ensureDir(path6.dirname(absolutePath));
|
|
1866
2932
|
if (!await fileExists(absolutePath)) {
|
|
1867
|
-
await
|
|
2933
|
+
await writeFile4(absolutePath, stored);
|
|
1868
2934
|
}
|
|
1869
2935
|
const existing = prepare(
|
|
1870
2936
|
bundle.db,
|
|
@@ -1906,12 +2972,12 @@ async function fileExists(filePath) {
|
|
|
1906
2972
|
|
|
1907
2973
|
// src/importers/claude/discover.ts
|
|
1908
2974
|
import { readdir } from "fs/promises";
|
|
1909
|
-
import
|
|
2975
|
+
import path7 from "path";
|
|
1910
2976
|
async function* discoverClaudeFiles(root) {
|
|
1911
2977
|
const projectDirs = await readdirSafe(root);
|
|
1912
2978
|
for (const project of projectDirs) {
|
|
1913
2979
|
if (!project.isDirectory()) continue;
|
|
1914
|
-
const projectRoot =
|
|
2980
|
+
const projectRoot = path7.join(root, project.name);
|
|
1915
2981
|
yield* walkProject(projectRoot, project.name);
|
|
1916
2982
|
}
|
|
1917
2983
|
}
|
|
@@ -1920,7 +2986,7 @@ async function* walkProject(projectRoot, projectSlug) {
|
|
|
1920
2986
|
for (const entry of entries) {
|
|
1921
2987
|
if (entry.isFile() && entry.name.endsWith(".jsonl")) {
|
|
1922
2988
|
yield {
|
|
1923
|
-
filePath:
|
|
2989
|
+
filePath: path7.join(projectRoot, entry.name),
|
|
1924
2990
|
projectSlug,
|
|
1925
2991
|
isSubagent: false,
|
|
1926
2992
|
parentSessionId: null,
|
|
@@ -1930,18 +2996,18 @@ async function* walkProject(projectRoot, projectSlug) {
|
|
|
1930
2996
|
continue;
|
|
1931
2997
|
}
|
|
1932
2998
|
if (entry.isDirectory()) {
|
|
1933
|
-
const subagentsDir =
|
|
2999
|
+
const subagentsDir = path7.join(projectRoot, entry.name, "subagents");
|
|
1934
3000
|
const subagentEntries = await readdirSafe(subagentsDir);
|
|
1935
3001
|
for (const sub of subagentEntries) {
|
|
1936
3002
|
if (!sub.isFile() || !sub.name.endsWith(".jsonl")) continue;
|
|
1937
3003
|
if (!sub.name.startsWith("agent-")) continue;
|
|
1938
3004
|
const agentId = sub.name.slice("agent-".length, -".jsonl".length);
|
|
1939
|
-
const metaCandidate =
|
|
3005
|
+
const metaCandidate = path7.join(subagentsDir, `agent-${agentId}.meta.json`);
|
|
1940
3006
|
const metaExists = subagentEntries.some(
|
|
1941
3007
|
(e) => e.isFile() && e.name === `agent-${agentId}.meta.json`
|
|
1942
3008
|
);
|
|
1943
3009
|
yield {
|
|
1944
|
-
filePath:
|
|
3010
|
+
filePath: path7.join(subagentsDir, sub.name),
|
|
1945
3011
|
projectSlug,
|
|
1946
3012
|
isSubagent: true,
|
|
1947
3013
|
parentSessionId: entry.name,
|
|
@@ -2061,7 +3127,7 @@ async function compileClaudeFile(bundle, batch, file, logger) {
|
|
|
2061
3127
|
const counts = emptyFileCounts();
|
|
2062
3128
|
const { row: sourceFile, alreadyKnown } = await registerSourceFile(bundle, {
|
|
2063
3129
|
sourceTool: "claude",
|
|
2064
|
-
absolutePath:
|
|
3130
|
+
absolutePath: path8.resolve(file.filePath),
|
|
2065
3131
|
fileKind: "jsonl",
|
|
2066
3132
|
workspaceHint: file.projectSlug
|
|
2067
3133
|
});
|
|
@@ -2161,7 +3227,7 @@ async function compileClaudeFile(bundle, batch, file, logger) {
|
|
|
2161
3227
|
pending.session.parent_session_id_pending = parentSid;
|
|
2162
3228
|
}
|
|
2163
3229
|
}
|
|
2164
|
-
const sessionId2 = pending.session?.session_id ?? sessionId("claude", `unknown:${
|
|
3230
|
+
const sessionId2 = pending.session?.session_id ?? sessionId("claude", `unknown:${path8.basename(file.filePath)}`);
|
|
2165
3231
|
const type = typeof parsed.type === "string" ? parsed.type : null;
|
|
2166
3232
|
if (type === "user" || type === "assistant") {
|
|
2167
3233
|
const msgRole = type === "user" ? "user" : "assistant";
|
|
@@ -2903,15 +3969,14 @@ function flushPending(bundle, pending, meta) {
|
|
|
2903
3969
|
}
|
|
2904
3970
|
|
|
2905
3971
|
// src/importers/codex/index.ts
|
|
2906
|
-
init_cas();
|
|
2907
|
-
init_db();
|
|
2908
3972
|
import { readFile as readFile5 } from "fs/promises";
|
|
2909
|
-
import
|
|
3973
|
+
import path10 from "path";
|
|
3974
|
+
init_db();
|
|
2910
3975
|
init_errors();
|
|
2911
3976
|
|
|
2912
3977
|
// src/importers/codex/discover.ts
|
|
2913
3978
|
import { readdir as readdir2 } from "fs/promises";
|
|
2914
|
-
import
|
|
3979
|
+
import path9 from "path";
|
|
2915
3980
|
async function* discoverCodexSessions(root) {
|
|
2916
3981
|
yield* walk(root);
|
|
2917
3982
|
}
|
|
@@ -2923,7 +3988,7 @@ async function* walk(dir) {
|
|
|
2923
3988
|
return;
|
|
2924
3989
|
}
|
|
2925
3990
|
for (const entry of entries) {
|
|
2926
|
-
const full =
|
|
3991
|
+
const full = path9.join(dir, entry.name);
|
|
2927
3992
|
if (entry.isDirectory()) {
|
|
2928
3993
|
yield* walk(full);
|
|
2929
3994
|
} else if (entry.isFile() && entry.name.endsWith(".jsonl")) {
|
|
@@ -3025,7 +4090,7 @@ async function compileCodexFile(bundle, batch, filePath, logger) {
|
|
|
3025
4090
|
const counts = emptyFileCounts2();
|
|
3026
4091
|
const { row: sourceFileRow, alreadyKnown } = await registerSourceFile(bundle, {
|
|
3027
4092
|
sourceTool: "codex",
|
|
3028
|
-
absolutePath:
|
|
4093
|
+
absolutePath: path10.resolve(filePath),
|
|
3029
4094
|
fileKind: "jsonl"
|
|
3030
4095
|
});
|
|
3031
4096
|
if (alreadyKnown) {
|
|
@@ -3111,7 +4176,7 @@ async function compileCodexFile(bundle, batch, filePath, logger) {
|
|
|
3111
4176
|
const payload = parsed.payload ?? {};
|
|
3112
4177
|
if (type === "session_meta") {
|
|
3113
4178
|
const meta = payload;
|
|
3114
|
-
const sourceSessionId = meta.id ??
|
|
4179
|
+
const sourceSessionId = meta.id ?? path10.basename(filePath, ".jsonl");
|
|
3115
4180
|
const sessionId3 = sessionId("codex", sourceSessionId);
|
|
3116
4181
|
if (!pending.session) {
|
|
3117
4182
|
const sub = parseSubagent(meta.source);
|
|
@@ -3143,11 +4208,11 @@ async function compileCodexFile(bundle, batch, filePath, logger) {
|
|
|
3143
4208
|
}
|
|
3144
4209
|
continue;
|
|
3145
4210
|
}
|
|
3146
|
-
const sessionId2 = pending.session?.session_id ?? sessionId("codex",
|
|
4211
|
+
const sessionId2 = pending.session?.session_id ?? sessionId("codex", path10.basename(filePath, ".jsonl"));
|
|
3147
4212
|
if (!pending.session) {
|
|
3148
4213
|
pending.session = {
|
|
3149
4214
|
session_id: sessionId2,
|
|
3150
|
-
source_session_id:
|
|
4215
|
+
source_session_id: path10.basename(filePath, ".jsonl"),
|
|
3151
4216
|
parent_session_id: null,
|
|
3152
4217
|
is_subagent: 0,
|
|
3153
4218
|
agent_role: null,
|
|
@@ -4044,25 +5109,24 @@ function flushPending2(bundle, pending, meta) {
|
|
|
4044
5109
|
}
|
|
4045
5110
|
|
|
4046
5111
|
// src/importers/cursor/index.ts
|
|
4047
|
-
|
|
4048
|
-
init_db();
|
|
4049
|
-
import path9 from "path";
|
|
5112
|
+
import path12 from "path";
|
|
4050
5113
|
import Database2 from "better-sqlite3";
|
|
5114
|
+
init_db();
|
|
4051
5115
|
init_errors();
|
|
4052
5116
|
|
|
4053
5117
|
// src/importers/cursor/discover.ts
|
|
4054
5118
|
import { readdir as readdir3 } from "fs/promises";
|
|
4055
|
-
import
|
|
5119
|
+
import path11 from "path";
|
|
4056
5120
|
async function* discoverCursorStores(root) {
|
|
4057
5121
|
const workspaces = await readdirSafe2(root);
|
|
4058
5122
|
for (const ws of workspaces) {
|
|
4059
5123
|
if (!ws.isDirectory()) continue;
|
|
4060
|
-
const wsPath =
|
|
5124
|
+
const wsPath = path11.join(root, ws.name);
|
|
4061
5125
|
const agents = await readdirSafe2(wsPath);
|
|
4062
5126
|
for (const ag of agents) {
|
|
4063
5127
|
if (!ag.isDirectory()) continue;
|
|
4064
|
-
const dbPath =
|
|
4065
|
-
const dbEntries = await readdirSafe2(
|
|
5128
|
+
const dbPath = path11.join(wsPath, ag.name, "store.db");
|
|
5129
|
+
const dbEntries = await readdirSafe2(path11.join(wsPath, ag.name));
|
|
4066
5130
|
const hasStoreDb = dbEntries.some((e) => e.isFile() && e.name === "store.db");
|
|
4067
5131
|
if (!hasStoreDb) continue;
|
|
4068
5132
|
yield {
|
|
@@ -4161,7 +5225,7 @@ async function compileCursorStore(bundle, batch, store, logger) {
|
|
|
4161
5225
|
const counts = emptyFileCounts3();
|
|
4162
5226
|
const { row: sourceFile, alreadyKnown } = await registerSourceFile(bundle, {
|
|
4163
5227
|
sourceTool: "cursor",
|
|
4164
|
-
absolutePath:
|
|
5228
|
+
absolutePath: path12.resolve(store.filePath),
|
|
4165
5229
|
fileKind: "sqlite",
|
|
4166
5230
|
workspaceHint: store.workspaceId
|
|
4167
5231
|
});
|
|
@@ -4763,29 +5827,27 @@ function flushPending3(bundle, pending) {
|
|
|
4763
5827
|
}
|
|
4764
5828
|
|
|
4765
5829
|
// src/importers/gemini/index.ts
|
|
4766
|
-
init_hash();
|
|
4767
|
-
init_cas();
|
|
4768
|
-
init_db();
|
|
4769
5830
|
import { readFile as readFile7 } from "fs/promises";
|
|
4770
|
-
import
|
|
5831
|
+
import path14 from "path";
|
|
5832
|
+
init_db();
|
|
4771
5833
|
init_errors();
|
|
4772
5834
|
|
|
4773
5835
|
// src/importers/gemini/discover.ts
|
|
4774
5836
|
import { readFile as readFile6, readdir as readdir4 } from "fs/promises";
|
|
4775
|
-
import
|
|
5837
|
+
import path13 from "path";
|
|
4776
5838
|
async function* discoverGeminiChats(root) {
|
|
4777
5839
|
const entries = await readdirSafe3(root);
|
|
4778
5840
|
for (const entry of entries) {
|
|
4779
5841
|
if (!entry.isDirectory()) continue;
|
|
4780
5842
|
if (entry.name === "bin") continue;
|
|
4781
|
-
const projectRoot = await readProjectRoot(
|
|
4782
|
-
const chatsDir =
|
|
5843
|
+
const projectRoot = await readProjectRoot(path13.join(root, entry.name));
|
|
5844
|
+
const chatsDir = path13.join(root, entry.name, "chats");
|
|
4783
5845
|
const chatEntries = await readdirSafe3(chatsDir);
|
|
4784
5846
|
for (const c of chatEntries) {
|
|
4785
5847
|
if (!c.isFile()) continue;
|
|
4786
5848
|
if (!c.name.startsWith("session-") || !c.name.endsWith(".json")) continue;
|
|
4787
5849
|
yield {
|
|
4788
|
-
filePath:
|
|
5850
|
+
filePath: path13.join(chatsDir, c.name),
|
|
4789
5851
|
projectDir: entry.name,
|
|
4790
5852
|
projectRoot
|
|
4791
5853
|
};
|
|
@@ -4794,7 +5856,7 @@ async function* discoverGeminiChats(root) {
|
|
|
4794
5856
|
}
|
|
4795
5857
|
async function readProjectRoot(dir) {
|
|
4796
5858
|
try {
|
|
4797
|
-
const text = await readFile6(
|
|
5859
|
+
const text = await readFile6(path13.join(dir, ".project_root"), "utf8");
|
|
4798
5860
|
return text.replace(/\n+$/, "").trim() || null;
|
|
4799
5861
|
} catch {
|
|
4800
5862
|
return null;
|
|
@@ -4888,7 +5950,7 @@ async function compileGeminiFile(bundle, batch, file, logger) {
|
|
|
4888
5950
|
const counts = emptyFileCounts4();
|
|
4889
5951
|
const { row: sourceFile, alreadyKnown } = await registerSourceFile(bundle, {
|
|
4890
5952
|
sourceTool: "gemini",
|
|
4891
|
-
absolutePath:
|
|
5953
|
+
absolutePath: path14.resolve(file.filePath),
|
|
4892
5954
|
fileKind: "json",
|
|
4893
5955
|
workspaceHint: file.projectDir
|
|
4894
5956
|
});
|
|
@@ -4941,7 +6003,7 @@ async function compileGeminiFile(bundle, batch, file, logger) {
|
|
|
4941
6003
|
project: null,
|
|
4942
6004
|
objects
|
|
4943
6005
|
};
|
|
4944
|
-
const sourceSid = parsed.sessionId ??
|
|
6006
|
+
const sourceSid = parsed.sessionId ?? path14.basename(file.filePath, ".json");
|
|
4945
6007
|
const sessionPk = sessionId("gemini", sourceSid);
|
|
4946
6008
|
if (file.projectRoot) {
|
|
4947
6009
|
pending.project = {
|
|
@@ -5522,143 +6584,6 @@ function flushPending4(bundle, pending) {
|
|
|
5522
6584
|
}
|
|
5523
6585
|
}
|
|
5524
6586
|
|
|
5525
|
-
// src/services/export/parquet.ts
|
|
5526
|
-
import { mkdir as mkdir3, rm, writeFile as writeFile4 } from "fs/promises";
|
|
5527
|
-
import path12 from "path";
|
|
5528
|
-
import { DuckDBConnection } from "@duckdb/node-api";
|
|
5529
|
-
init_errors();
|
|
5530
|
-
var PARQUET_TABLES = [
|
|
5531
|
-
"objects",
|
|
5532
|
-
"source_files",
|
|
5533
|
-
"import_batches",
|
|
5534
|
-
"raw_records",
|
|
5535
|
-
"import_errors",
|
|
5536
|
-
"uncertainties",
|
|
5537
|
-
"projects",
|
|
5538
|
-
"sessions",
|
|
5539
|
-
"turns",
|
|
5540
|
-
"events",
|
|
5541
|
-
"messages",
|
|
5542
|
-
"content_blocks",
|
|
5543
|
-
"tool_calls",
|
|
5544
|
-
"tool_results",
|
|
5545
|
-
"artifacts",
|
|
5546
|
-
"edges",
|
|
5547
|
-
"search_docs"
|
|
5548
|
-
];
|
|
5549
|
-
async function exportBundleParquet(options) {
|
|
5550
|
-
const snapshot = await openBundleSnapshot(options.bundlePath);
|
|
5551
|
-
const outDir = path12.resolve(options.outDir ?? snapshot.defaultOutDir);
|
|
5552
|
-
await mkdir3(outDir, { recursive: true });
|
|
5553
|
-
const files = Object.fromEntries(
|
|
5554
|
-
PARQUET_TABLES.map((table) => [table, path12.join(outDir, `${table}.parquet`)])
|
|
5555
|
-
);
|
|
5556
|
-
const manifestPath = path12.join(outDir, "manifest.json");
|
|
5557
|
-
for (const file of [...Object.values(files), manifestPath]) {
|
|
5558
|
-
await rm(file, { force: true });
|
|
5559
|
-
}
|
|
5560
|
-
const connection = await createDuckDbConnection();
|
|
5561
|
-
try {
|
|
5562
|
-
await attachSqlite(connection, snapshot.dbPath);
|
|
5563
|
-
for (const table of PARQUET_TABLES) {
|
|
5564
|
-
await connection.run(
|
|
5565
|
-
`COPY (SELECT * FROM prosa.${quoteIdentifier(table)}) TO ${sqlString(files[table])} (FORMAT parquet)`
|
|
5566
|
-
);
|
|
5567
|
-
}
|
|
5568
|
-
} finally {
|
|
5569
|
-
connection.closeSync();
|
|
5570
|
-
}
|
|
5571
|
-
const manifest = {
|
|
5572
|
-
exported_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
5573
|
-
source_db: snapshot.dbPath,
|
|
5574
|
-
schema_version: snapshot.schemaVersion,
|
|
5575
|
-
parser_version: snapshot.parserVersion,
|
|
5576
|
-
tables: Object.fromEntries(
|
|
5577
|
-
PARQUET_TABLES.map((table) => [
|
|
5578
|
-
table,
|
|
5579
|
-
{
|
|
5580
|
-
file: path12.basename(files[table]),
|
|
5581
|
-
rows: snapshot.counts[table]
|
|
5582
|
-
}
|
|
5583
|
-
])
|
|
5584
|
-
)
|
|
5585
|
-
};
|
|
5586
|
-
await writeFile4(manifestPath, `${JSON.stringify(manifest, null, 2)}
|
|
5587
|
-
`, "utf8");
|
|
5588
|
-
return { outDir, manifestPath, files, counts: snapshot.counts };
|
|
5589
|
-
}
|
|
5590
|
-
async function queryDuckDbParquet(options) {
|
|
5591
|
-
const parquetDir = path12.resolve(options.parquetDir);
|
|
5592
|
-
const connection = await createDuckDbConnection();
|
|
5593
|
-
try {
|
|
5594
|
-
for (const table of PARQUET_TABLES) {
|
|
5595
|
-
await connection.run(
|
|
5596
|
-
`CREATE OR REPLACE VIEW ${quoteIdentifier(table)} AS SELECT * FROM read_parquet(${sqlString(
|
|
5597
|
-
path12.join(parquetDir, `${table}.parquet`)
|
|
5598
|
-
)})`
|
|
5599
|
-
);
|
|
5600
|
-
}
|
|
5601
|
-
const reader = await connection.runAndReadAll(options.sql);
|
|
5602
|
-
return {
|
|
5603
|
-
columns: reader.deduplicatedColumnNames(),
|
|
5604
|
-
rows: reader.getRowObjectsJson()
|
|
5605
|
-
};
|
|
5606
|
-
} catch (error) {
|
|
5607
|
-
if (isMissingParquetError(error)) {
|
|
5608
|
-
throw new Error(
|
|
5609
|
-
`Parquet export not found in ${parquetDir}; run \`prosa export parquet --store <path>\` first`
|
|
5610
|
-
);
|
|
5611
|
-
}
|
|
5612
|
-
throw error;
|
|
5613
|
-
} finally {
|
|
5614
|
-
connection.closeSync();
|
|
5615
|
-
}
|
|
5616
|
-
}
|
|
5617
|
-
async function createDuckDbConnection() {
|
|
5618
|
-
return DuckDBConnection.create();
|
|
5619
|
-
}
|
|
5620
|
-
async function attachSqlite(connection, dbPath) {
|
|
5621
|
-
try {
|
|
5622
|
-
await connection.run("INSTALL sqlite");
|
|
5623
|
-
await connection.run("LOAD sqlite");
|
|
5624
|
-
await connection.run(`ATTACH ${sqlString(dbPath)} AS prosa (TYPE sqlite)`);
|
|
5625
|
-
} catch (error) {
|
|
5626
|
-
throw new Error(
|
|
5627
|
-
`DuckDB could not attach prosa.sqlite via the sqlite extension: ${getErrorMessage(error)}`
|
|
5628
|
-
);
|
|
5629
|
-
}
|
|
5630
|
-
}
|
|
5631
|
-
async function openBundleSnapshot(bundlePath) {
|
|
5632
|
-
const bundle = await openBundle(bundlePath);
|
|
5633
|
-
try {
|
|
5634
|
-
const counts = Object.fromEntries(
|
|
5635
|
-
PARQUET_TABLES.map((table) => {
|
|
5636
|
-
const row = bundle.db.prepare(`SELECT count(*) AS n FROM ${quoteIdentifier(table)}`).get();
|
|
5637
|
-
return [table, row?.n ?? 0];
|
|
5638
|
-
})
|
|
5639
|
-
);
|
|
5640
|
-
return {
|
|
5641
|
-
dbPath: bundle.paths.db,
|
|
5642
|
-
schemaVersion: bundle.manifest.schema_version,
|
|
5643
|
-
parserVersion: bundle.manifest.parser_version,
|
|
5644
|
-
defaultOutDir: bundle.paths.parquet,
|
|
5645
|
-
counts
|
|
5646
|
-
};
|
|
5647
|
-
} finally {
|
|
5648
|
-
closeBundle(bundle);
|
|
5649
|
-
}
|
|
5650
|
-
}
|
|
5651
|
-
function quoteIdentifier(value) {
|
|
5652
|
-
return `"${value.replace(/"/g, '""')}"`;
|
|
5653
|
-
}
|
|
5654
|
-
function sqlString(value) {
|
|
5655
|
-
return `'${value.replace(/'/g, "''")}'`;
|
|
5656
|
-
}
|
|
5657
|
-
function isMissingParquetError(error) {
|
|
5658
|
-
const message = getErrorMessage(error);
|
|
5659
|
-
return /No files found|does not exist|not found/i.test(message) && /\.parquet/i.test(message);
|
|
5660
|
-
}
|
|
5661
|
-
|
|
5662
6587
|
// src/services/compile.ts
|
|
5663
6588
|
init_indexing();
|
|
5664
6589
|
var COMPILE_PROVIDERS = [
|
|
@@ -5666,28 +6591,28 @@ var COMPILE_PROVIDERS = [
|
|
|
5666
6591
|
name: "codex",
|
|
5667
6592
|
description: "Import Codex CLI session histories into the bundle.",
|
|
5668
6593
|
pathHelp: "root of Codex CLI sessions",
|
|
5669
|
-
defaultSessionsPath: () =>
|
|
6594
|
+
defaultSessionsPath: () => path16.join(os2.homedir(), ".codex", "sessions"),
|
|
5670
6595
|
compile: compileCodex
|
|
5671
6596
|
},
|
|
5672
6597
|
{
|
|
5673
6598
|
name: "claude",
|
|
5674
6599
|
description: "Import Claude Code project histories into the bundle.",
|
|
5675
6600
|
pathHelp: "root of Claude Code projects",
|
|
5676
|
-
defaultSessionsPath: () =>
|
|
6601
|
+
defaultSessionsPath: () => path16.join(os2.homedir(), ".claude", "projects"),
|
|
5677
6602
|
compile: compileClaude
|
|
5678
6603
|
},
|
|
5679
6604
|
{
|
|
5680
6605
|
name: "gemini",
|
|
5681
6606
|
description: "Import Gemini CLI session histories into the bundle.",
|
|
5682
6607
|
pathHelp: "root of Gemini CLI tmp dir",
|
|
5683
|
-
defaultSessionsPath: () =>
|
|
6608
|
+
defaultSessionsPath: () => path16.join(os2.homedir(), ".gemini", "tmp"),
|
|
5684
6609
|
compile: compileGemini
|
|
5685
6610
|
},
|
|
5686
6611
|
{
|
|
5687
6612
|
name: "cursor",
|
|
5688
6613
|
description: "Import Cursor agent stores into the bundle.",
|
|
5689
6614
|
pathHelp: "root of Cursor agent stores",
|
|
5690
|
-
defaultSessionsPath: () =>
|
|
6615
|
+
defaultSessionsPath: () => path16.join(os2.homedir(), ".cursor", "chats"),
|
|
5691
6616
|
compile: compileCursor
|
|
5692
6617
|
}
|
|
5693
6618
|
];
|
|
@@ -5700,20 +6625,20 @@ function getCompileProvider(source) {
|
|
|
5700
6625
|
}
|
|
5701
6626
|
function resolveCompilePath(p) {
|
|
5702
6627
|
if (p === "~") return os2.homedir();
|
|
5703
|
-
if (p.startsWith("~/")) return
|
|
5704
|
-
return
|
|
6628
|
+
if (p.startsWith("~/")) return path16.join(os2.homedir(), p.slice(2));
|
|
6629
|
+
return path16.resolve(p);
|
|
5705
6630
|
}
|
|
5706
6631
|
async function runCompileImports(options) {
|
|
5707
|
-
const { bundle, providers,
|
|
6632
|
+
const { bundle, providers, logger } = options;
|
|
6633
|
+
const overwrite = options.overwrite === true;
|
|
5708
6634
|
let importedAny = false;
|
|
5709
6635
|
const summaries = [];
|
|
5710
6636
|
let tantivy = null;
|
|
5711
6637
|
let tantivyError = null;
|
|
6638
|
+
let fts5Error = null;
|
|
5712
6639
|
try {
|
|
5713
|
-
|
|
5714
|
-
|
|
5715
|
-
disableFts5Triggers(bundle);
|
|
5716
|
-
}
|
|
6640
|
+
logger?.info("disabling FTS5 triggers for bulk rebuild");
|
|
6641
|
+
disableFts5Triggers(bundle);
|
|
5717
6642
|
for (const provider of providers) {
|
|
5718
6643
|
const sourcePath = resolveCompilePath(options.sessionsPath ?? provider.defaultSessionsPath());
|
|
5719
6644
|
const providerLogger = logger?.child({
|
|
@@ -5740,15 +6665,23 @@ async function runCompileImports(options) {
|
|
|
5740
6665
|
summaries.push(summary);
|
|
5741
6666
|
options.onProviderComplete?.(summary);
|
|
5742
6667
|
}
|
|
5743
|
-
|
|
5744
|
-
|
|
5745
|
-
|
|
5746
|
-
|
|
5747
|
-
|
|
5748
|
-
|
|
6668
|
+
const shouldRebuildIndexes = importedAny || overwrite;
|
|
6669
|
+
if (shouldRebuildIndexes) {
|
|
6670
|
+
logger?.info(
|
|
6671
|
+
{ changed: importedAny, overwrite },
|
|
6672
|
+
importedAny ? "marking indexes" : "overwrite forces rebuild despite no new imports"
|
|
6673
|
+
);
|
|
6674
|
+
markIndexesAfterImport(bundle, { changed: true });
|
|
6675
|
+
try {
|
|
6676
|
+
logger?.info("rebuilding fts5 index");
|
|
6677
|
+
rebuildFts5Index(bundle);
|
|
6678
|
+
} catch (error) {
|
|
6679
|
+
fts5Error = getErrorMessage(error);
|
|
6680
|
+
logger?.error({ err: error }, "fts5 rebuild failed; SQLite data is intact");
|
|
6681
|
+
}
|
|
5749
6682
|
try {
|
|
5750
|
-
logger?.info("rebuilding tantivy index");
|
|
5751
|
-
const status = await rebuildTantivyIndex(bundle);
|
|
6683
|
+
logger?.info({ overwrite }, "rebuilding tantivy index");
|
|
6684
|
+
const status = await rebuildTantivyIndex(bundle, { overwrite });
|
|
5752
6685
|
tantivy = { indexedDocCount: status.indexed_doc_count };
|
|
5753
6686
|
options.onTantivyComplete?.(tantivy);
|
|
5754
6687
|
} catch (error) {
|
|
@@ -5757,16 +6690,14 @@ async function runCompileImports(options) {
|
|
|
5757
6690
|
}
|
|
5758
6691
|
}
|
|
5759
6692
|
} finally {
|
|
5760
|
-
|
|
5761
|
-
logger?.info("re-enabling FTS5 triggers");
|
|
5762
|
-
enableFts5Triggers(bundle);
|
|
5763
|
-
}
|
|
6693
|
+
enableFts5Triggers(bundle);
|
|
5764
6694
|
}
|
|
5765
6695
|
return {
|
|
5766
6696
|
providers: summaries,
|
|
5767
6697
|
importedAny,
|
|
5768
6698
|
tantivy,
|
|
5769
|
-
tantivyError
|
|
6699
|
+
tantivyError,
|
|
6700
|
+
fts5Error
|
|
5770
6701
|
};
|
|
5771
6702
|
}
|
|
5772
6703
|
async function exportCompileParquet(options) {
|
|
@@ -5809,7 +6740,7 @@ function createCliLogger(options) {
|
|
|
5809
6740
|
// src/cli/commands/compile.ts
|
|
5810
6741
|
function compileCommand() {
|
|
5811
6742
|
const command = addCompileLogOptions(
|
|
5812
|
-
new
|
|
6743
|
+
new Command2("compile").description(
|
|
5813
6744
|
"Import session histories from one agent CLI into the bundle."
|
|
5814
6745
|
)
|
|
5815
6746
|
);
|
|
@@ -5822,27 +6753,35 @@ function compileCommand() {
|
|
|
5822
6753
|
return command;
|
|
5823
6754
|
}
|
|
5824
6755
|
function compileAllCommand() {
|
|
5825
|
-
return addCompileLogOptions(new
|
|
6756
|
+
return addCompileLogOptions(new Command2("compile-all")).description("Import all agent CLI session histories using default source paths.").option(
|
|
6757
|
+
"--overwrite",
|
|
6758
|
+
"force a full rebuild of derived indexes after import (Tantivy from scratch; FTS5 and Parquet are always full)",
|
|
6759
|
+
false
|
|
6760
|
+
).action(async (options) => {
|
|
5826
6761
|
await runCompiles({
|
|
5827
6762
|
providers: COMPILE_PROVIDERS,
|
|
5828
6763
|
storePath: defaultBundlePath(),
|
|
5829
|
-
|
|
6764
|
+
overwrite: options.overwrite,
|
|
5830
6765
|
logOptions: options
|
|
5831
6766
|
});
|
|
5832
6767
|
});
|
|
5833
6768
|
}
|
|
5834
6769
|
function providerCompileCommand(provider) {
|
|
5835
|
-
return addCompileLogOptions(new
|
|
6770
|
+
return addCompileLogOptions(new Command2(provider.name)).description(provider.description).option(
|
|
5836
6771
|
"--sessions-path <path>",
|
|
5837
6772
|
`${provider.pathHelp} (default: ${provider.defaultSessionsPath()})`,
|
|
5838
6773
|
provider.defaultSessionsPath()
|
|
5839
|
-
).option("--store <path>", "bundle directory", defaultBundlePath()).option(
|
|
6774
|
+
).option("--store <path>", "bundle directory", defaultBundlePath()).option(
|
|
6775
|
+
"--overwrite",
|
|
6776
|
+
"force a full rebuild of derived indexes after import (Tantivy from scratch; FTS5 and Parquet are always full)",
|
|
6777
|
+
false
|
|
6778
|
+
).action(
|
|
5840
6779
|
async (options, command) => {
|
|
5841
6780
|
await runCompiles({
|
|
5842
6781
|
providers: [provider],
|
|
5843
6782
|
storePath: options.store,
|
|
5844
|
-
deferIndex: options.deferIndex ?? false,
|
|
5845
6783
|
sessionsPath: options.sessionsPath,
|
|
6784
|
+
overwrite: options.overwrite,
|
|
5846
6785
|
logOptions: command.optsWithGlobals()
|
|
5847
6786
|
});
|
|
5848
6787
|
}
|
|
@@ -5861,8 +6800,8 @@ async function runCompiles(options) {
|
|
|
5861
6800
|
const result = await runCompileImports({
|
|
5862
6801
|
bundle,
|
|
5863
6802
|
providers: options.providers,
|
|
5864
|
-
deferIndex: options.deferIndex,
|
|
5865
6803
|
sessionsPath: options.sessionsPath,
|
|
6804
|
+
overwrite: options.overwrite,
|
|
5866
6805
|
logger,
|
|
5867
6806
|
onProviderComplete: printCounts,
|
|
5868
6807
|
onTantivyComplete: (status) => {
|
|
@@ -5875,7 +6814,8 @@ async function runCompiles(options) {
|
|
|
5875
6814
|
closeBundle(bundle);
|
|
5876
6815
|
logger.info({ store_path: storePath }, "bundle closed");
|
|
5877
6816
|
}
|
|
5878
|
-
|
|
6817
|
+
const shouldExportParquet = importedAny || options.overwrite === true;
|
|
6818
|
+
if (shouldExportParquet) {
|
|
5879
6819
|
try {
|
|
5880
6820
|
const result = await exportCompileParquet({ storePath, logger });
|
|
5881
6821
|
process.stdout.write(`parquet: wrote ${result.tableCount} tables to ${result.outDir}
|
|
@@ -5899,11 +6839,10 @@ function printCounts(summary) {
|
|
|
5899
6839
|
|
|
5900
6840
|
// src/cli/commands/export.ts
|
|
5901
6841
|
import { writeFile as writeFile6 } from "fs/promises";
|
|
5902
|
-
import
|
|
5903
|
-
import { Command as
|
|
6842
|
+
import path17 from "path";
|
|
6843
|
+
import { Command as Command3 } from "commander";
|
|
5904
6844
|
|
|
5905
6845
|
// src/services/export/markdown.ts
|
|
5906
|
-
init_cas();
|
|
5907
6846
|
async function exportSessionMarkdown(bundle, sessionId2) {
|
|
5908
6847
|
const session = bundle.db.prepare(
|
|
5909
6848
|
`SELECT session_id, source_tool, source_session_id, title, start_ts, end_ts,
|
|
@@ -6013,139 +6952,60 @@ function renderToolCall(c) {
|
|
|
6013
6952
|
return lines.join("\n");
|
|
6014
6953
|
}
|
|
6015
6954
|
|
|
6016
|
-
// src/cli/bundle.ts
|
|
6017
|
-
import path15 from "path";
|
|
6018
|
-
async function withBundle(storePath, fn) {
|
|
6019
|
-
const bundle = await openBundle(path15.resolve(storePath));
|
|
6020
|
-
try {
|
|
6021
|
-
return await fn(bundle);
|
|
6022
|
-
} finally {
|
|
6023
|
-
closeBundle(bundle);
|
|
6024
|
-
}
|
|
6025
|
-
}
|
|
6026
|
-
|
|
6027
6955
|
// src/cli/commands/export.ts
|
|
6028
6956
|
function exportCommand() {
|
|
6029
|
-
const session = new
|
|
6957
|
+
const session = new Command3("session").description("Export a single session to a human-readable format.").argument("<session-id>", "prosa session_id").requiredOption("--format <fmt>", 'currently only "markdown" is supported').option("--out <path>", "write to file instead of stdout").option("--store <path>", "bundle directory", defaultBundlePath()).action(async (sessionId2, options) => {
|
|
6030
6958
|
if (options.format !== "markdown") {
|
|
6031
6959
|
throw new Error(`unsupported format: ${options.format} (try --format markdown)`);
|
|
6032
6960
|
}
|
|
6033
6961
|
await withBundle(options.store, async (bundle) => {
|
|
6034
6962
|
const markdown = await exportSessionMarkdown(bundle, sessionId2);
|
|
6035
6963
|
if (options.out) {
|
|
6036
|
-
await writeFile6(
|
|
6037
|
-
process.stdout.write(`wrote ${
|
|
6964
|
+
await writeFile6(path17.resolve(options.out), markdown, "utf8");
|
|
6965
|
+
process.stdout.write(`wrote ${path17.resolve(options.out)}
|
|
6038
6966
|
`);
|
|
6039
6967
|
} else {
|
|
6040
6968
|
process.stdout.write(markdown);
|
|
6041
6969
|
}
|
|
6042
6970
|
});
|
|
6043
6971
|
});
|
|
6044
|
-
const parquet = new
|
|
6972
|
+
const parquet = new Command3("parquet").description("Export canonical tables to derived Parquet files for analytics.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--out <path>", "output directory (default: <store>/parquet)").action(async (options) => {
|
|
6045
6973
|
const result = await exportBundleParquet({
|
|
6046
|
-
bundlePath:
|
|
6047
|
-
outDir: options.out ?
|
|
6974
|
+
bundlePath: path17.resolve(options.store),
|
|
6975
|
+
outDir: options.out ? path17.resolve(options.out) : void 0
|
|
6048
6976
|
});
|
|
6049
6977
|
process.stdout.write(`wrote parquet export to ${result.outDir}
|
|
6050
6978
|
`);
|
|
6051
6979
|
process.stdout.write(`manifest=${result.manifestPath}
|
|
6052
6980
|
`);
|
|
6053
6981
|
});
|
|
6054
|
-
return new
|
|
6982
|
+
return new Command3("export").description("Export sessions / search excerpts to readable formats.").addCommand(session).addCommand(parquet);
|
|
6055
6983
|
}
|
|
6056
6984
|
|
|
6057
6985
|
// src/cli/commands/index.ts
|
|
6058
|
-
import { Command as
|
|
6986
|
+
import { Command as Command4 } from "commander";
|
|
6059
6987
|
init_indexing();
|
|
6060
|
-
|
|
6061
|
-
// src/cli/output.ts
|
|
6062
|
-
var OUTPUT_FORMATS = ["interactive", "table", "json", "csv"];
|
|
6063
|
-
var COL_SEPARATOR = " ";
|
|
6064
|
-
var RULE_CHAR = "-";
|
|
6065
|
-
function parseOutputFormat(value, fallback) {
|
|
6066
|
-
if (value === void 0) return fallback;
|
|
6067
|
-
if (OUTPUT_FORMATS.includes(value)) return value;
|
|
6068
|
-
throw new Error(
|
|
6069
|
-
`invalid --output-format: ${value} (expected one of ${OUTPUT_FORMATS.join(", ")})`
|
|
6070
|
-
);
|
|
6071
|
-
}
|
|
6072
|
-
function printRows(rows, opts) {
|
|
6073
|
-
switch (opts.format) {
|
|
6074
|
-
case "json":
|
|
6075
|
-
printJson(rows, opts);
|
|
6076
|
-
return;
|
|
6077
|
-
case "csv":
|
|
6078
|
-
printCsv(rows, opts);
|
|
6079
|
-
return;
|
|
6080
|
-
case "table":
|
|
6081
|
-
case "interactive":
|
|
6082
|
-
printTable(rows, opts);
|
|
6083
|
-
return;
|
|
6084
|
-
}
|
|
6085
|
-
}
|
|
6086
|
-
function printJson(rows, opts) {
|
|
6087
|
-
const out = opts.meta ? { ...opts.meta, rows } : rows;
|
|
6088
|
-
process.stdout.write(`${JSON.stringify(out, null, 2)}
|
|
6089
|
-
`);
|
|
6090
|
-
}
|
|
6091
|
-
function printCsv(rows, opts) {
|
|
6092
|
-
const columns = opts.columns;
|
|
6093
|
-
process.stdout.write(`${columns.map(csvField).join(",")}
|
|
6094
|
-
`);
|
|
6095
|
-
for (const row of rows) {
|
|
6096
|
-
const record = row;
|
|
6097
|
-
const line = columns.map((column) => csvField(formatCell(record[column]))).join(",");
|
|
6098
|
-
process.stdout.write(`${line}
|
|
6099
|
-
`);
|
|
6100
|
-
}
|
|
6101
|
-
}
|
|
6102
|
-
function csvField(value) {
|
|
6103
|
-
if (/[",\n]/.test(value)) return `"${value.replace(/"/g, '""')}"`;
|
|
6104
|
-
return value;
|
|
6105
|
-
}
|
|
6106
|
-
function printTable(rows, opts) {
|
|
6107
|
-
const columns = opts.columns;
|
|
6108
|
-
const widths = columns.map((column) => column.length);
|
|
6109
|
-
const cells = rows.map((row) => {
|
|
6110
|
-
const record = row;
|
|
6111
|
-
return columns.map((column, index) => {
|
|
6112
|
-
const text = formatCell(record[column]);
|
|
6113
|
-
const width = widths[index] ?? 0;
|
|
6114
|
-
if (text.length > width) widths[index] = text.length;
|
|
6115
|
-
return text;
|
|
6116
|
-
});
|
|
6117
|
-
});
|
|
6118
|
-
const header = columns.map((column, index) => column.padEnd(widths[index] ?? 0)).join(COL_SEPARATOR);
|
|
6119
|
-
const rule = columns.map((_, index) => RULE_CHAR.repeat(widths[index] ?? 0)).join(COL_SEPARATOR);
|
|
6120
|
-
process.stdout.write(`${header}
|
|
6121
|
-
${rule}
|
|
6122
|
-
`);
|
|
6123
|
-
for (const cellRow of cells) {
|
|
6124
|
-
const line = cellRow.map((cell, index) => cell.padEnd(widths[index] ?? 0)).join(COL_SEPARATOR);
|
|
6125
|
-
process.stdout.write(`${line}
|
|
6126
|
-
`);
|
|
6127
|
-
}
|
|
6128
|
-
}
|
|
6129
|
-
function formatCell(value) {
|
|
6130
|
-
if (value == null) return "";
|
|
6131
|
-
if (typeof value === "string") return value;
|
|
6132
|
-
if (typeof value === "number" || typeof value === "boolean") return String(value);
|
|
6133
|
-
return JSON.stringify(value);
|
|
6134
|
-
}
|
|
6135
|
-
|
|
6136
|
-
// src/cli/commands/index.ts
|
|
6137
6988
|
function indexCommand() {
|
|
6138
|
-
const fts5 = new
|
|
6989
|
+
const fts5 = new Command4("fts5").description("Rebuild the SQLite FTS5 index from search_docs.").option("--store <path>", "bundle directory", defaultBundlePath()).option(
|
|
6990
|
+
"--overwrite",
|
|
6991
|
+
"rebuild from scratch (FTS5 always overwrites; flag accepted for parity with other index commands)",
|
|
6992
|
+
false
|
|
6993
|
+
).action(async (options) => {
|
|
6139
6994
|
await withBundle(options.store, (bundle) => {
|
|
6995
|
+
void options.overwrite;
|
|
6140
6996
|
printIndexStatus(rebuildFts5Index(bundle));
|
|
6141
6997
|
});
|
|
6142
6998
|
});
|
|
6143
|
-
const tantivy = new
|
|
6999
|
+
const tantivy = new Command4("tantivy").description("Rebuild the Tantivy sidecar index from search_docs.").option("--store <path>", "bundle directory", defaultBundlePath()).option(
|
|
7000
|
+
"--overwrite",
|
|
7001
|
+
"force a full re-index instead of the default incremental rebuild",
|
|
7002
|
+
false
|
|
7003
|
+
).action(async (options) => {
|
|
6144
7004
|
await withBundle(options.store, async (bundle) => {
|
|
6145
|
-
printIndexStatus(await rebuildTantivyIndex(bundle));
|
|
7005
|
+
printIndexStatus(await rebuildTantivyIndex(bundle, { overwrite: options.overwrite }));
|
|
6146
7006
|
});
|
|
6147
7007
|
});
|
|
6148
|
-
const status = new
|
|
7008
|
+
const status = new Command4("status").description("Show derived search index status.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--output-format <fmt>", "interactive|table|json|csv", "table").action(async (options) => {
|
|
6149
7009
|
const format = parseOutputFormat(options.outputFormat, "table");
|
|
6150
7010
|
await withBundle(options.store, (bundle) => {
|
|
6151
7011
|
const rows = getSearchIndexStatuses(bundle);
|
|
@@ -6162,7 +7022,7 @@ function indexCommand() {
|
|
|
6162
7022
|
});
|
|
6163
7023
|
});
|
|
6164
7024
|
});
|
|
6165
|
-
return new
|
|
7025
|
+
return new Command4("index").description("Build or inspect derived search indexes.").addCommand(fts5).addCommand(tantivy).addCommand(status);
|
|
6166
7026
|
}
|
|
6167
7027
|
function printIndexStatus(status) {
|
|
6168
7028
|
process.stdout.write(
|
|
@@ -6174,11 +7034,11 @@ function printIndexStatus(status) {
|
|
|
6174
7034
|
|
|
6175
7035
|
// src/cli/commands/init.ts
|
|
6176
7036
|
import { stat as stat3 } from "fs/promises";
|
|
6177
|
-
import
|
|
6178
|
-
import { Command as
|
|
7037
|
+
import path18 from "path";
|
|
7038
|
+
import { Command as Command5 } from "commander";
|
|
6179
7039
|
function initCommand() {
|
|
6180
|
-
return new
|
|
6181
|
-
const resolved =
|
|
7040
|
+
return new Command5("init").description("Initialize a new prosa bundle (SQLite + manifest + objects/).").option("--store <path>", "bundle directory", defaultBundlePath()).option("--force-existing", "open instead of failing if a manifest exists", false).action(async (options) => {
|
|
7041
|
+
const resolved = path18.resolve(options.store);
|
|
6182
7042
|
const exists2 = await stat3(`${resolved}/manifest.json`).then(() => true).catch(() => false);
|
|
6183
7043
|
if (exists2) {
|
|
6184
7044
|
if (!options.forceExisting) {
|
|
@@ -6189,369 +7049,416 @@ use --force-existing to skip without erroring
|
|
|
6189
7049
|
);
|
|
6190
7050
|
process.exit(2);
|
|
6191
7051
|
}
|
|
6192
|
-
const bundle2 = await openBundle(resolved);
|
|
6193
|
-
closeBundle(bundle2);
|
|
6194
|
-
process.stdout.write(`bundle already exists at ${resolved}
|
|
6195
|
-
`);
|
|
6196
|
-
return;
|
|
6197
|
-
}
|
|
6198
|
-
const bundle = await initBundle(resolved);
|
|
6199
|
-
closeBundle(bundle);
|
|
6200
|
-
process.stdout.write(`initialized prosa bundle at ${resolved}
|
|
6201
|
-
`);
|
|
6202
|
-
});
|
|
6203
|
-
}
|
|
6204
|
-
|
|
6205
|
-
// src/cli/commands/mcp.ts
|
|
6206
|
-
import path18 from "path";
|
|
6207
|
-
import { Command as Command5 } from "commander";
|
|
6208
|
-
|
|
6209
|
-
// src/mcp/server.ts
|
|
6210
|
-
init_errors();
|
|
6211
|
-
import { randomUUID } from "crypto";
|
|
6212
|
-
import http from "http";
|
|
6213
|
-
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
6214
|
-
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
6215
|
-
import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
|
|
6216
|
-
|
|
6217
|
-
// src/mcp/guidance.ts
|
|
6218
|
-
var PROSA_MCP_INSTRUCTIONS = `
|
|
6219
|
-
prosa is a local memory over local agent session histories. Use it to import recent sessions,
|
|
6220
|
-
find prior work, commands, decisions, file touches, and full transcripts before answering from
|
|
6221
|
-
memory.
|
|
6222
|
-
|
|
6223
|
-
Recommended workflow:
|
|
6224
|
-
- Use compile to refresh the bundle when recent local sessions may not be indexed yet. With no
|
|
6225
|
-
input it imports all supported providers from default paths.
|
|
6226
|
-
- For open-ended questions, start with search_sessions using 2-5 concrete terms.
|
|
6227
|
-
- For questions about a file or path, start with find_touched_files, then inspect the returned sessions.
|
|
6228
|
-
- After search results, call get_session for the most relevant session_ids before drawing conclusions.
|
|
6229
|
-
- Use export_session_markdown only after selecting a likely session; it can return a large transcript.
|
|
6230
|
-
- Use list_tool_calls for command history, failed tools, patches, and operational audit trails.
|
|
6231
|
-
- Use get_artifact only when a returned artifact_id is needed for full output or diff content.
|
|
6232
|
-
- Use index_status if search results look stale or unexpectedly empty.
|
|
6233
|
-
|
|
6234
|
-
When answering, cite concrete evidence: session_id, timestamp, tool/file path, and the relevant snippet
|
|
6235
|
-
or event. Do not treat search snippets as the whole truth; open the session when accuracy matters.
|
|
6236
|
-
`.trim();
|
|
6237
|
-
var INVESTIGATE_PRIOR_WORK_PROMPT = `
|
|
6238
|
-
Investigate prior work in prosa for the topic: {{topic}}
|
|
6239
|
-
|
|
6240
|
-
Use this workflow:
|
|
6241
|
-
1. Call search_sessions with a short query built from the topic.
|
|
6242
|
-
2. If results are broad, search again with narrower terms from the best snippets.
|
|
6243
|
-
3. Open the most relevant session_ids with get_session.
|
|
6244
|
-
4. Use export_session_markdown only for sessions that appear directly relevant.
|
|
6245
|
-
5. Answer with evidence: session_id, timestamp, and the decisive snippet or event.
|
|
6246
|
-
`.trim();
|
|
6247
|
-
var FIND_FILE_HISTORY_PROMPT = `
|
|
6248
|
-
Investigate history for file/path: {{path}}
|
|
6249
|
-
|
|
6250
|
-
Use this workflow:
|
|
6251
|
-
1. Call find_touched_files with the path or the most distinctive path suffix.
|
|
6252
|
-
2. Open returned session_ids with get_session.
|
|
6253
|
-
3. Use list_tool_calls with session_id when you need command-level detail.
|
|
6254
|
-
4. Use export_session_markdown only for the most relevant session.
|
|
6255
|
-
5. Summarize what changed, who/what tool touched it, and cite session_id plus timestamp.
|
|
6256
|
-
`.trim();
|
|
6257
|
-
var AUDIT_TOOL_FAILURES_PROMPT = `
|
|
6258
|
-
Audit tool failures in prosa{{query_clause}}.
|
|
6259
|
-
|
|
6260
|
-
Use this workflow:
|
|
6261
|
-
1. Call list_tool_calls with errors_only=true.
|
|
6262
|
-
2. If a query is provided, also call search_sessions for that query to find related context.
|
|
6263
|
-
3. Open relevant session_ids with get_session.
|
|
6264
|
-
4. Group failures by tool_name, command/path, and likely cause.
|
|
6265
|
-
5. Answer with evidence: session_id, timestamp, command/path, exit code, and preview.
|
|
6266
|
-
`.trim();
|
|
6267
|
-
|
|
6268
|
-
// src/mcp/tools.ts
|
|
6269
|
-
import { z } from "zod";
|
|
6270
|
-
|
|
6271
|
-
// src/core/domain/types.ts
|
|
6272
|
-
var SOURCE_TOOLS = ["cursor", "codex", "claude", "gemini"];
|
|
6273
|
-
|
|
6274
|
-
// src/mcp/tools.ts
|
|
6275
|
-
init_errors();
|
|
6276
|
-
init_limits();
|
|
6277
|
-
init_indexing();
|
|
6278
|
-
init_search();
|
|
6279
|
-
init_sessions();
|
|
6280
|
-
function registerProsaTools(server, bundle, options = {}) {
|
|
6281
|
-
const searchEngine = options.searchEngine ?? "fts5";
|
|
6282
|
-
const storePath = options.storePath ?? bundle.path;
|
|
6283
|
-
registerProsaPrompts(server);
|
|
6284
|
-
server.registerTool(
|
|
6285
|
-
"compile",
|
|
6286
|
-
{
|
|
6287
|
-
title: "Compile sessions",
|
|
6288
|
-
description: "Import local agent session histories into the active prosa bundle. With no input, compiles all providers from default paths. With source, compiles that provider; sessions_path may override that provider path.",
|
|
6289
|
-
inputSchema: {
|
|
6290
|
-
source: z.enum(SOURCE_TOOLS).optional(),
|
|
6291
|
-
sessions_path: z.string().min(1).optional()
|
|
6292
|
-
},
|
|
6293
|
-
annotations: { readOnlyHint: false, destructiveHint: false, idempotentHint: true }
|
|
6294
|
-
},
|
|
6295
|
-
async ({ source, sessions_path }) => {
|
|
6296
|
-
if (sessions_path && !source) {
|
|
6297
|
-
return {
|
|
6298
|
-
content: [
|
|
6299
|
-
{
|
|
6300
|
-
type: "text",
|
|
6301
|
-
text: "sessions_path requires source because providers use incompatible source layouts"
|
|
6302
|
-
}
|
|
6303
|
-
],
|
|
6304
|
-
isError: true
|
|
6305
|
-
};
|
|
6306
|
-
}
|
|
6307
|
-
try {
|
|
6308
|
-
const result = await runCompileImports({
|
|
6309
|
-
bundle,
|
|
6310
|
-
providers: source ? [getCompileProvider(source)] : COMPILE_PROVIDERS,
|
|
6311
|
-
deferIndex: false,
|
|
6312
|
-
sessionsPath: sessions_path
|
|
6313
|
-
});
|
|
6314
|
-
const parquet = result.importedAny ? await exportCompileParquet({ storePath }) : null;
|
|
6315
|
-
return {
|
|
6316
|
-
content: [
|
|
6317
|
-
{
|
|
6318
|
-
type: "text",
|
|
6319
|
-
text: JSON.stringify(
|
|
6320
|
-
{
|
|
6321
|
-
providers: result.providers.map((provider) => ({
|
|
6322
|
-
source: provider.source,
|
|
6323
|
-
source_path: provider.sourcePath,
|
|
6324
|
-
batch_id: provider.batchId,
|
|
6325
|
-
counts: provider.counts
|
|
6326
|
-
})),
|
|
6327
|
-
imported_any: result.importedAny,
|
|
6328
|
-
tantivy: result.tantivy ? { indexed_doc_count: result.tantivy.indexedDocCount } : null,
|
|
6329
|
-
tantivy_error: result.tantivyError,
|
|
6330
|
-
parquet: parquet ? {
|
|
6331
|
-
out_dir: parquet.outDir,
|
|
6332
|
-
manifest_path: parquet.manifestPath,
|
|
6333
|
-
table_count: parquet.tableCount,
|
|
6334
|
-
files: parquet.files,
|
|
6335
|
-
counts: parquet.counts
|
|
6336
|
-
} : null
|
|
6337
|
-
},
|
|
6338
|
-
null,
|
|
6339
|
-
2
|
|
6340
|
-
)
|
|
6341
|
-
}
|
|
6342
|
-
]
|
|
6343
|
-
};
|
|
6344
|
-
} catch (error) {
|
|
6345
|
-
return {
|
|
6346
|
-
content: [{ type: "text", text: getErrorMessage(error) }],
|
|
6347
|
-
isError: true
|
|
6348
|
-
};
|
|
6349
|
-
}
|
|
6350
|
-
}
|
|
6351
|
-
);
|
|
6352
|
-
server.registerTool(
|
|
6353
|
-
"list_sessions",
|
|
6354
|
-
{
|
|
6355
|
-
title: "List sessions",
|
|
6356
|
-
description: "List recent sessions when you need candidates by source/date before deeper inspection. Next step: call get_session for relevant session_id values.",
|
|
6357
|
-
inputSchema: {
|
|
6358
|
-
source: z.enum(SOURCE_TOOLS).optional(),
|
|
6359
|
-
since: z.string().optional().describe("ISO timestamp lower bound (inclusive)"),
|
|
6360
|
-
until: z.string().optional().describe("ISO timestamp upper bound (exclusive)"),
|
|
6361
|
-
limit: z.number().int().min(1).max(500).optional().default(50)
|
|
6362
|
-
},
|
|
6363
|
-
annotations: { readOnlyHint: true, idempotentHint: true }
|
|
6364
|
-
},
|
|
6365
|
-
async (input) => {
|
|
6366
|
-
const rows = listSessions(bundle, {
|
|
6367
|
-
sourceTool: input.source,
|
|
6368
|
-
sinceIso: input.since,
|
|
6369
|
-
untilIso: input.until,
|
|
6370
|
-
limit: input.limit ?? 50
|
|
6371
|
-
});
|
|
6372
|
-
return {
|
|
6373
|
-
content: [{ type: "text", text: JSON.stringify(rows, null, 2) }]
|
|
6374
|
-
};
|
|
6375
|
-
}
|
|
6376
|
-
);
|
|
6377
|
-
server.registerTool(
|
|
6378
|
-
"get_session",
|
|
6379
|
-
{
|
|
6380
|
-
title: "Get session detail",
|
|
6381
|
-
description: "Open one session and return metadata plus timeline events. Use this after search_sessions, list_sessions, find_touched_files, or list_tool_calls before making evidence-backed claims.",
|
|
6382
|
-
inputSchema: {
|
|
6383
|
-
session_id: z.string().min(1)
|
|
6384
|
-
},
|
|
6385
|
-
annotations: { readOnlyHint: true, idempotentHint: true }
|
|
6386
|
-
},
|
|
6387
|
-
async ({ session_id }) => {
|
|
6388
|
-
const detail = getSession(bundle, session_id);
|
|
6389
|
-
if (!detail) {
|
|
6390
|
-
return {
|
|
6391
|
-
content: [{ type: "text", text: `session not found: ${session_id}` }],
|
|
6392
|
-
isError: true
|
|
6393
|
-
};
|
|
6394
|
-
}
|
|
6395
|
-
return {
|
|
6396
|
-
content: [{ type: "text", text: JSON.stringify(detail, null, 2) }]
|
|
6397
|
-
};
|
|
7052
|
+
const bundle2 = await openBundle(resolved);
|
|
7053
|
+
closeBundle(bundle2);
|
|
7054
|
+
process.stdout.write(`bundle already exists at ${resolved}
|
|
7055
|
+
`);
|
|
7056
|
+
return;
|
|
6398
7057
|
}
|
|
6399
|
-
|
|
7058
|
+
const bundle = await initBundle(resolved);
|
|
7059
|
+
closeBundle(bundle);
|
|
7060
|
+
process.stdout.write(`initialized prosa bundle at ${resolved}
|
|
7061
|
+
`);
|
|
7062
|
+
});
|
|
7063
|
+
}
|
|
7064
|
+
|
|
7065
|
+
// src/cli/commands/mcp.ts
|
|
7066
|
+
import path19 from "path";
|
|
7067
|
+
import { Command as Command6 } from "commander";
|
|
7068
|
+
|
|
7069
|
+
// src/mcp/server.ts
|
|
7070
|
+
init_errors();
|
|
7071
|
+
import { randomUUID } from "crypto";
|
|
7072
|
+
import http from "http";
|
|
7073
|
+
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
7074
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
7075
|
+
import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
|
|
7076
|
+
|
|
7077
|
+
// src/mcp/guidance.ts
|
|
7078
|
+
var PROSA_MCP_INSTRUCTIONS = `
|
|
7079
|
+
prosa is a local memory over local agent session histories. Use it to find prior work, commands,
|
|
7080
|
+
decisions, file touches, transcripts, and analytical rollups before answering from memory.
|
|
7081
|
+
|
|
7082
|
+
There are six tools:
|
|
7083
|
+
- search: full-text over messages, commands, paths, diffs, and previews. Start here for open-ended
|
|
7084
|
+
questions with 2-5 concrete terms. Optional engine, field_kind, raw, since/until filters.
|
|
7085
|
+
- sessions: without session_id, lists candidates filtered by source/time/limit. With session_id,
|
|
7086
|
+
opens the session: format=detail (default) returns metadata + timeline, format=summary returns
|
|
7087
|
+
only the session row, format=markdown renders the full transcript.
|
|
7088
|
+
- tool_calls: audit commands and tool usage. Filters by tool_name, canonical_type, session_id,
|
|
7089
|
+
errors_only. When path_substring is set, also returns artifacts touching that path \u2014 use this for
|
|
7090
|
+
file-history questions.
|
|
7091
|
+
- analytics: built-in aggregate reports backed by SQLite views. Pick report=sessions|tools|errors|
|
|
7092
|
+
models|projects with the matching filters. Use report=sessions with session_id or
|
|
7093
|
+
source_path_substring for per-session metrics.
|
|
7094
|
+
- artifact: fetch full text for an artifact_id when previews are not enough. Binary artifacts return
|
|
7095
|
+
a placeholder.
|
|
7096
|
+
- compile: with no input, returns a status snapshot (search index health). With source (and
|
|
7097
|
+
optionally sessions_path), imports that provider into the bundle. Use status mode when search
|
|
7098
|
+
results look stale; use import mode when local sessions may not be indexed yet.
|
|
7099
|
+
|
|
7100
|
+
When answering, cite concrete evidence: session_id, timestamp, tool/file path, and the relevant
|
|
7101
|
+
snippet or event. Do not treat search snippets as the whole truth; open the session with
|
|
7102
|
+
\`sessions session_id=\u2026 format=detail\` when accuracy matters.
|
|
7103
|
+
`.trim();
|
|
7104
|
+
var INVESTIGATE_PRIOR_WORK_PROMPT = `
|
|
7105
|
+
Investigate prior work in prosa for the topic: {{topic}}
|
|
7106
|
+
|
|
7107
|
+
Use this workflow:
|
|
7108
|
+
1. Call \`search\` with a short query built from the topic.
|
|
7109
|
+
2. If results are broad, search again with narrower terms from the best snippets.
|
|
7110
|
+
3. Open the most relevant session_ids with \`sessions session_id=\u2026 format=detail\`.
|
|
7111
|
+
4. Use \`sessions session_id=\u2026 format=markdown\` only for sessions that appear directly relevant.
|
|
7112
|
+
5. Answer with evidence: session_id, timestamp, and the decisive snippet or event.
|
|
7113
|
+
`.trim();
|
|
7114
|
+
var FIND_FILE_HISTORY_PROMPT = `
|
|
7115
|
+
Investigate history for file/path: {{path}}
|
|
7116
|
+
|
|
7117
|
+
Use this workflow:
|
|
7118
|
+
1. Call \`tool_calls\` with path_substring set to the path or its most distinctive suffix.
|
|
7119
|
+
2. Open returned session_ids with \`sessions session_id=\u2026 format=detail\`.
|
|
7120
|
+
3. Call \`tool_calls\` with session_id when you need command-level detail inside one session.
|
|
7121
|
+
4. Use \`sessions session_id=\u2026 format=markdown\` only for the most relevant session.
|
|
7122
|
+
5. Summarize what changed, who/what tool touched it, and cite session_id plus timestamp.
|
|
7123
|
+
`.trim();
|
|
7124
|
+
var AUDIT_TOOL_FAILURES_PROMPT = `
|
|
7125
|
+
Audit tool failures in prosa{{query_clause}}.
|
|
7126
|
+
|
|
7127
|
+
Use this workflow:
|
|
7128
|
+
1. For an aggregate report, call \`analytics report=errors\` (filter by source/since/until/tool_name
|
|
7129
|
+
as needed).
|
|
7130
|
+
2. For per-call evidence, call \`tool_calls\` with errors_only=true.
|
|
7131
|
+
3. If a query is provided, also call \`search\` for that query to find related context.
|
|
7132
|
+
4. Open relevant session_ids with \`sessions session_id=\u2026 format=detail\`.
|
|
7133
|
+
5. Group failures by tool_name, command/path, and likely cause.
|
|
7134
|
+
6. Answer with evidence: session_id, timestamp, command/path, exit code, and preview.
|
|
7135
|
+
`.trim();
|
|
7136
|
+
|
|
7137
|
+
// src/mcp/tools.ts
|
|
7138
|
+
import { z } from "zod";
|
|
7139
|
+
init_errors();
|
|
7140
|
+
init_indexing();
|
|
7141
|
+
init_search();
|
|
7142
|
+
init_sessions();
|
|
7143
|
+
|
|
7144
|
+
// src/services/tool_calls.ts
|
|
7145
|
+
init_limits();
|
|
7146
|
+
function listToolCalls(bundle, filters = {}) {
|
|
7147
|
+
const conds = [];
|
|
7148
|
+
const params = [];
|
|
7149
|
+
if (filters.toolName) {
|
|
7150
|
+
conds.push("tc.tool_name = ?");
|
|
7151
|
+
params.push(filters.toolName);
|
|
7152
|
+
}
|
|
7153
|
+
if (filters.canonicalType) {
|
|
7154
|
+
conds.push("tc.canonical_tool_type = ?");
|
|
7155
|
+
params.push(filters.canonicalType);
|
|
7156
|
+
}
|
|
7157
|
+
if (filters.sessionId) {
|
|
7158
|
+
conds.push("tc.session_id = ?");
|
|
7159
|
+
params.push(filters.sessionId);
|
|
7160
|
+
}
|
|
7161
|
+
if (filters.errorsOnly) {
|
|
7162
|
+
conds.push("(tr.is_error = 1 OR tc.status = ?)");
|
|
7163
|
+
params.push("error");
|
|
7164
|
+
}
|
|
7165
|
+
if (filters.pathSubstring) {
|
|
7166
|
+
conds.push("tc.path IS NOT NULL AND tc.path LIKE ?");
|
|
7167
|
+
params.push(`%${filters.pathSubstring}%`);
|
|
7168
|
+
}
|
|
7169
|
+
if (filters.sinceIso) {
|
|
7170
|
+
conds.push("(tc.timestamp_start IS NULL OR tc.timestamp_start >= ?)");
|
|
7171
|
+
params.push(filters.sinceIso);
|
|
7172
|
+
}
|
|
7173
|
+
if (filters.untilIso) {
|
|
7174
|
+
conds.push("(tc.timestamp_start IS NULL OR tc.timestamp_start < ?)");
|
|
7175
|
+
params.push(filters.untilIso);
|
|
7176
|
+
}
|
|
7177
|
+
const where = conds.length ? `WHERE ${conds.join(" AND ")}` : "";
|
|
7178
|
+
const limit2 = clampLimit(filters.limit, { max: 500, fallback: 100 });
|
|
7179
|
+
const toolCallSql = `
|
|
7180
|
+
SELECT 'tool_call' AS entity_type,
|
|
7181
|
+
tc.session_id,
|
|
7182
|
+
tc.tool_call_id,
|
|
7183
|
+
NULL AS artifact_id,
|
|
7184
|
+
tc.tool_name,
|
|
7185
|
+
tc.canonical_tool_type,
|
|
7186
|
+
tc.command,
|
|
7187
|
+
tc.path,
|
|
7188
|
+
tc.status,
|
|
7189
|
+
tc.timestamp_start,
|
|
7190
|
+
tr.is_error,
|
|
7191
|
+
tr.exit_code,
|
|
7192
|
+
tr.preview
|
|
7193
|
+
FROM tool_calls tc
|
|
7194
|
+
LEFT JOIN tool_results tr ON tr.tool_call_id = tc.tool_call_id
|
|
7195
|
+
${where}
|
|
7196
|
+
`;
|
|
7197
|
+
if (!filters.pathSubstring) {
|
|
7198
|
+
const sql2 = `${toolCallSql} ORDER BY tc.timestamp_start DESC LIMIT ${limit2}`;
|
|
7199
|
+
return bundle.db.prepare(sql2).all(...params);
|
|
7200
|
+
}
|
|
7201
|
+
const artifactSql = `
|
|
7202
|
+
SELECT 'artifact' AS entity_type,
|
|
7203
|
+
a.session_id,
|
|
7204
|
+
NULL AS tool_call_id,
|
|
7205
|
+
a.artifact_id,
|
|
7206
|
+
NULL AS tool_name,
|
|
7207
|
+
NULL AS canonical_tool_type,
|
|
7208
|
+
NULL AS command,
|
|
7209
|
+
a.path,
|
|
7210
|
+
NULL AS status,
|
|
7211
|
+
a.created_ts AS timestamp_start,
|
|
7212
|
+
NULL AS is_error,
|
|
7213
|
+
NULL AS exit_code,
|
|
7214
|
+
NULL AS preview
|
|
7215
|
+
FROM artifacts a
|
|
7216
|
+
WHERE a.path IS NOT NULL AND a.path LIKE ?
|
|
7217
|
+
`;
|
|
7218
|
+
const sql = `
|
|
7219
|
+
${toolCallSql}
|
|
7220
|
+
UNION ALL
|
|
7221
|
+
${artifactSql}
|
|
7222
|
+
ORDER BY timestamp_start DESC
|
|
7223
|
+
LIMIT ${limit2}
|
|
7224
|
+
`;
|
|
7225
|
+
return bundle.db.prepare(sql).all(...params, `%${filters.pathSubstring}%`);
|
|
7226
|
+
}
|
|
7227
|
+
|
|
7228
|
+
// src/mcp/tools.ts
|
|
7229
|
+
var CANONICAL_TOOL_TYPES = [
|
|
7230
|
+
"shell",
|
|
7231
|
+
"read_file",
|
|
7232
|
+
"write_file",
|
|
7233
|
+
"edit_file",
|
|
7234
|
+
"search_file",
|
|
7235
|
+
"web_search",
|
|
7236
|
+
"mcp",
|
|
7237
|
+
"subagent",
|
|
7238
|
+
"patch",
|
|
7239
|
+
"other"
|
|
7240
|
+
];
|
|
7241
|
+
var FIELD_KINDS = [
|
|
7242
|
+
"message_text",
|
|
7243
|
+
"user_prompt",
|
|
7244
|
+
"assistant_text",
|
|
7245
|
+
"command",
|
|
7246
|
+
"command_output_preview",
|
|
7247
|
+
"error",
|
|
7248
|
+
"file_path",
|
|
7249
|
+
"diff",
|
|
7250
|
+
"summary",
|
|
7251
|
+
"artifact_text",
|
|
7252
|
+
"tool_args",
|
|
7253
|
+
"tool_result"
|
|
7254
|
+
];
|
|
7255
|
+
function registerProsaTools(server, bundle, options = {}) {
|
|
7256
|
+
const searchEngine = options.searchEngine ?? "fts5";
|
|
7257
|
+
const storePath = options.storePath ?? bundle.path;
|
|
7258
|
+
const ensureStore = options.ensureStore ?? false;
|
|
7259
|
+
registerProsaPrompts(server);
|
|
6400
7260
|
server.registerTool(
|
|
6401
|
-
"
|
|
7261
|
+
"search",
|
|
6402
7262
|
{
|
|
6403
7263
|
title: "Full-text search",
|
|
6404
|
-
description: `Search messages, commands, paths, and result previews using the server-selected ${searchEngine} engine. Start here for open-ended questions with 2-5 concrete terms
|
|
7264
|
+
description: `Search messages, commands, paths, diffs, and result previews using the server-selected ${searchEngine} engine. Start here for open-ended questions with 2-5 concrete terms; then call \`sessions\` for relevant hits.`,
|
|
6405
7265
|
inputSchema: {
|
|
6406
7266
|
query: z.string().min(1),
|
|
7267
|
+
engine: z.enum(["fts5", "tantivy"]).optional(),
|
|
7268
|
+
field_kind: z.enum(FIELD_KINDS).optional(),
|
|
6407
7269
|
limit: z.number().int().min(1).max(500).optional().default(50),
|
|
6408
|
-
raw: z.boolean().optional().default(false)
|
|
7270
|
+
raw: z.boolean().optional().default(false).describe("Pass query straight to FTS5 MATCH (allows OR/NEAR/prefixes).")
|
|
6409
7271
|
},
|
|
6410
7272
|
annotations: { readOnlyHint: true, idempotentHint: true }
|
|
6411
7273
|
},
|
|
6412
|
-
async ({ query, limit, raw }) => {
|
|
6413
|
-
const
|
|
7274
|
+
async ({ query, engine, field_kind, limit: limit2, raw }) => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
|
|
7275
|
+
const selectedEngine = engine ?? searchEngine;
|
|
7276
|
+
const hits = searchFullText(activeBundle, {
|
|
7277
|
+
query,
|
|
7278
|
+
limit: limit2 ?? 50,
|
|
7279
|
+
raw,
|
|
7280
|
+
engine: selectedEngine
|
|
7281
|
+
});
|
|
7282
|
+
const filtered = field_kind ? hits.filter((hit) => hit.field_kind === field_kind) : hits;
|
|
6414
7283
|
return {
|
|
6415
7284
|
content: [
|
|
6416
7285
|
{
|
|
6417
7286
|
type: "text",
|
|
6418
7287
|
text: JSON.stringify(
|
|
6419
|
-
{
|
|
7288
|
+
{
|
|
7289
|
+
query,
|
|
7290
|
+
engine: selectedEngine,
|
|
7291
|
+
field_kind: field_kind ?? null,
|
|
7292
|
+
count: filtered.length,
|
|
7293
|
+
hits: filtered
|
|
7294
|
+
},
|
|
6420
7295
|
null,
|
|
6421
7296
|
2
|
|
6422
7297
|
)
|
|
6423
7298
|
}
|
|
6424
7299
|
]
|
|
6425
7300
|
};
|
|
6426
|
-
}
|
|
7301
|
+
})
|
|
6427
7302
|
);
|
|
6428
7303
|
server.registerTool(
|
|
6429
|
-
"
|
|
7304
|
+
"sessions",
|
|
6430
7305
|
{
|
|
6431
|
-
title: "
|
|
6432
|
-
description: "
|
|
7306
|
+
title: "List or open sessions",
|
|
7307
|
+
description: "Without `session_id`, lists sessions filtered by source/time/limit. With `session_id`, opens that session: `format=detail` (default) returns metadata plus timeline events; `format=summary` returns only the session row; `format=markdown` renders the readable transcript. Call after `search` to get evidence behind a hit.",
|
|
6433
7308
|
inputSchema: {
|
|
6434
|
-
session_id: z.string().min(1)
|
|
7309
|
+
session_id: z.string().min(1).optional(),
|
|
7310
|
+
format: z.enum(["summary", "detail", "markdown"]).optional().default("detail"),
|
|
7311
|
+
source: z.enum(SOURCE_TOOLS).optional(),
|
|
7312
|
+
since: z.string().optional().describe("ISO timestamp lower bound (inclusive)"),
|
|
7313
|
+
until: z.string().optional().describe("ISO timestamp upper bound (exclusive)"),
|
|
7314
|
+
limit: z.number().int().min(1).max(500).optional().default(50)
|
|
6435
7315
|
},
|
|
6436
7316
|
annotations: { readOnlyHint: true, idempotentHint: true }
|
|
6437
7317
|
},
|
|
6438
|
-
async ({ session_id }) => {
|
|
6439
|
-
|
|
6440
|
-
const
|
|
6441
|
-
|
|
6442
|
-
|
|
7318
|
+
async ({ session_id, format, source, since, until, limit: limit2 }) => withToolBundle(bundle, storePath, ensureStore, async (activeBundle) => {
|
|
7319
|
+
if (!session_id) {
|
|
7320
|
+
const rows = listSessions(activeBundle, {
|
|
7321
|
+
sourceTool: source,
|
|
7322
|
+
sinceIso: since,
|
|
7323
|
+
untilIso: until,
|
|
7324
|
+
limit: limit2 ?? 50
|
|
7325
|
+
});
|
|
6443
7326
|
return {
|
|
6444
|
-
content: [{ type: "text", text:
|
|
7327
|
+
content: [{ type: "text", text: JSON.stringify(rows, null, 2) }]
|
|
7328
|
+
};
|
|
7329
|
+
}
|
|
7330
|
+
if (format === "markdown") {
|
|
7331
|
+
try {
|
|
7332
|
+
const md = await exportSessionMarkdown(activeBundle, session_id);
|
|
7333
|
+
return { content: [{ type: "text", text: md }] };
|
|
7334
|
+
} catch (error) {
|
|
7335
|
+
return {
|
|
7336
|
+
content: [{ type: "text", text: getErrorMessage(error) }],
|
|
7337
|
+
isError: true
|
|
7338
|
+
};
|
|
7339
|
+
}
|
|
7340
|
+
}
|
|
7341
|
+
const detail = getSession(activeBundle, session_id);
|
|
7342
|
+
if (!detail) {
|
|
7343
|
+
return {
|
|
7344
|
+
content: [{ type: "text", text: `session not found: ${session_id}` }],
|
|
6445
7345
|
isError: true
|
|
6446
7346
|
};
|
|
6447
7347
|
}
|
|
6448
|
-
|
|
7348
|
+
const payload = format === "summary" ? { session: detail.session } : detail;
|
|
7349
|
+
return {
|
|
7350
|
+
content: [{ type: "text", text: JSON.stringify(payload, null, 2) }]
|
|
7351
|
+
};
|
|
7352
|
+
})
|
|
6449
7353
|
);
|
|
6450
7354
|
server.registerTool(
|
|
6451
|
-
"
|
|
7355
|
+
"tool_calls",
|
|
6452
7356
|
{
|
|
6453
|
-
title: "
|
|
6454
|
-
description: "Audit commands and tool usage by
|
|
7357
|
+
title: "Audit tool calls and file touches",
|
|
7358
|
+
description: "Audit commands and tool usage. Filter by tool_name, canonical_type, session_id, errors_only, or path_substring. When `path_substring` is set, also surfaces matching artifacts so file-history questions return both invocations and produced files.",
|
|
6455
7359
|
inputSchema: {
|
|
7360
|
+
session_id: z.string().min(1).optional(),
|
|
6456
7361
|
tool_name: z.string().optional(),
|
|
6457
|
-
canonical_type: z.enum(
|
|
6458
|
-
|
|
6459
|
-
"read_file",
|
|
6460
|
-
"write_file",
|
|
6461
|
-
"edit_file",
|
|
6462
|
-
"search_file",
|
|
6463
|
-
"web_search",
|
|
6464
|
-
"mcp",
|
|
6465
|
-
"subagent",
|
|
6466
|
-
"patch",
|
|
6467
|
-
"other"
|
|
6468
|
-
]).optional(),
|
|
6469
|
-
session_id: z.string().optional(),
|
|
7362
|
+
canonical_type: z.enum(CANONICAL_TOOL_TYPES).optional(),
|
|
7363
|
+
path_substring: z.string().min(1).optional().describe("Filter rows where tool_calls.path or artifacts.path contains this substring."),
|
|
6470
7364
|
errors_only: z.boolean().optional().default(false),
|
|
7365
|
+
since: z.string().optional().describe("ISO timestamp lower bound (inclusive)"),
|
|
7366
|
+
until: z.string().optional().describe("ISO timestamp upper bound (exclusive)"),
|
|
6471
7367
|
limit: z.number().int().min(1).max(500).optional().default(100)
|
|
6472
7368
|
},
|
|
6473
7369
|
annotations: { readOnlyHint: true, idempotentHint: true }
|
|
6474
7370
|
},
|
|
6475
|
-
async (
|
|
6476
|
-
const
|
|
6477
|
-
|
|
6478
|
-
|
|
6479
|
-
|
|
6480
|
-
|
|
6481
|
-
|
|
6482
|
-
|
|
6483
|
-
|
|
6484
|
-
|
|
6485
|
-
}
|
|
6486
|
-
if (session_id) {
|
|
6487
|
-
conds.push("tc.session_id = ?");
|
|
6488
|
-
params.push(session_id);
|
|
6489
|
-
}
|
|
6490
|
-
if (errors_only) {
|
|
6491
|
-
conds.push("(tr.is_error = 1 OR tc.status = ?)");
|
|
6492
|
-
params.push("error");
|
|
6493
|
-
}
|
|
6494
|
-
const where = conds.length ? `WHERE ${conds.join(" AND ")}` : "";
|
|
6495
|
-
const sql = `
|
|
6496
|
-
SELECT tc.tool_call_id, tc.session_id, tc.tool_name, tc.canonical_tool_type,
|
|
6497
|
-
tc.command, tc.path, tc.status, tc.timestamp_start,
|
|
6498
|
-
tr.is_error, tr.exit_code, tr.preview
|
|
6499
|
-
FROM tool_calls tc
|
|
6500
|
-
LEFT JOIN tool_results tr ON tr.tool_call_id = tc.tool_call_id
|
|
6501
|
-
${where}
|
|
6502
|
-
ORDER BY tc.timestamp_start DESC
|
|
6503
|
-
LIMIT ${clampLimit(limit, { max: 500, fallback: 100 })}
|
|
6504
|
-
`;
|
|
6505
|
-
const rows = bundle.db.prepare(sql).all(...params);
|
|
7371
|
+
async (input) => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
|
|
7372
|
+
const rows = listToolCalls(activeBundle, {
|
|
7373
|
+
sessionId: input.session_id,
|
|
7374
|
+
toolName: input.tool_name,
|
|
7375
|
+
canonicalType: input.canonical_type,
|
|
7376
|
+
pathSubstring: input.path_substring,
|
|
7377
|
+
errorsOnly: input.errors_only,
|
|
7378
|
+
sinceIso: input.since,
|
|
7379
|
+
untilIso: input.until,
|
|
7380
|
+
limit: input.limit ?? 100
|
|
7381
|
+
});
|
|
6506
7382
|
return {
|
|
6507
7383
|
content: [{ type: "text", text: JSON.stringify(rows, null, 2) }]
|
|
6508
7384
|
};
|
|
6509
|
-
}
|
|
7385
|
+
})
|
|
6510
7386
|
);
|
|
6511
7387
|
server.registerTool(
|
|
6512
|
-
"
|
|
7388
|
+
"analytics",
|
|
6513
7389
|
{
|
|
6514
|
-
title: "
|
|
6515
|
-
description: "
|
|
7390
|
+
title: "Aggregate analytics reports",
|
|
7391
|
+
description: "Run a built-in aggregation over the bundle: per-session metrics (`sessions`), tool usage rollup (`tools`), error timeline (`errors`), model usage (`models`), or project activity (`projects`). Backed by SQLite views; mirrors the `prosa analytics` CLI.",
|
|
6516
7392
|
inputSchema: {
|
|
6517
|
-
|
|
6518
|
-
|
|
7393
|
+
report: z.enum(ANALYTICS_REPORTS),
|
|
7394
|
+
source: z.enum(SOURCE_TOOLS).optional(),
|
|
7395
|
+
since: z.string().optional().describe("ISO timestamp lower bound (inclusive)"),
|
|
7396
|
+
until: z.string().optional().describe("ISO timestamp upper bound (exclusive)"),
|
|
7397
|
+
limit: z.number().int().min(1).max(500).optional().default(50),
|
|
7398
|
+
session_id: z.string().min(1).optional().describe("Drill-down filter (applies to `sessions` report)."),
|
|
7399
|
+
source_path_substring: z.string().min(1).optional().describe("Filter `sessions` rows by imported source file path substring."),
|
|
7400
|
+
project: z.string().min(1).optional().describe("Filter by project id, name, or path substring."),
|
|
7401
|
+
tool_name: z.string().min(1).optional().describe("Filter `tools`/`errors` rows by exact tool name."),
|
|
7402
|
+
canonical_type: z.enum(CANONICAL_TOOL_TYPES).optional().describe("Filter `tools` rows by canonical tool type."),
|
|
7403
|
+
errors_only: z.boolean().optional().describe("`tools` report: only error rows."),
|
|
7404
|
+
category: z.string().min(1).optional().describe("Filter `errors` by category: tool_result|import_error|uncertainty."),
|
|
7405
|
+
model: z.string().min(1).optional().describe("Filter `models` rows by exact model name.")
|
|
6519
7406
|
},
|
|
6520
7407
|
annotations: { readOnlyHint: true, idempotentHint: true }
|
|
6521
7408
|
},
|
|
6522
|
-
async (
|
|
6523
|
-
const
|
|
6524
|
-
|
|
6525
|
-
|
|
6526
|
-
|
|
6527
|
-
|
|
6528
|
-
|
|
6529
|
-
|
|
6530
|
-
|
|
6531
|
-
|
|
6532
|
-
|
|
6533
|
-
|
|
6534
|
-
|
|
6535
|
-
|
|
6536
|
-
const like = `%${path_substring}%`;
|
|
6537
|
-
const rows = bundle.db.prepare(sql).all(like, like);
|
|
6538
|
-
return {
|
|
6539
|
-
content: [{ type: "text", text: JSON.stringify(rows, null, 2) }]
|
|
7409
|
+
async (input) => withToolBundle(bundle, storePath, ensureStore, (activeBundle) => {
|
|
7410
|
+
const filters = {
|
|
7411
|
+
source: input.source,
|
|
7412
|
+
since: input.since,
|
|
7413
|
+
until: input.until,
|
|
7414
|
+
limit: input.limit,
|
|
7415
|
+
sessionId: input.session_id,
|
|
7416
|
+
sourcePathSubstring: input.source_path_substring,
|
|
7417
|
+
project: input.project,
|
|
7418
|
+
toolName: input.tool_name,
|
|
7419
|
+
canonicalType: input.canonical_type,
|
|
7420
|
+
errorsOnly: input.errors_only,
|
|
7421
|
+
category: input.category,
|
|
7422
|
+
model: input.model
|
|
6540
7423
|
};
|
|
6541
|
-
|
|
7424
|
+
try {
|
|
7425
|
+
const result = runAnalyticsReportFromBundle({
|
|
7426
|
+
bundle: activeBundle,
|
|
7427
|
+
report: input.report,
|
|
7428
|
+
filters
|
|
7429
|
+
});
|
|
7430
|
+
return {
|
|
7431
|
+
content: [
|
|
7432
|
+
{
|
|
7433
|
+
type: "text",
|
|
7434
|
+
text: JSON.stringify(
|
|
7435
|
+
{ report: input.report, count: result.rows.length, rows: result.rows },
|
|
7436
|
+
null,
|
|
7437
|
+
2
|
|
7438
|
+
)
|
|
7439
|
+
}
|
|
7440
|
+
]
|
|
7441
|
+
};
|
|
7442
|
+
} catch (error) {
|
|
7443
|
+
return {
|
|
7444
|
+
content: [{ type: "text", text: getErrorMessage(error) }],
|
|
7445
|
+
isError: true
|
|
7446
|
+
};
|
|
7447
|
+
}
|
|
7448
|
+
})
|
|
6542
7449
|
);
|
|
6543
7450
|
server.registerTool(
|
|
6544
|
-
"
|
|
7451
|
+
"artifact",
|
|
6545
7452
|
{
|
|
6546
7453
|
title: "Get artifact bytes/text",
|
|
6547
|
-
description: "Retrieve full text for an artifact_id
|
|
7454
|
+
description: "Retrieve full text for an `artifact_id` referenced in a session, search hit, or tool_calls row. Use this when previews are not enough; binary artifacts return a placeholder.",
|
|
6548
7455
|
inputSchema: {
|
|
6549
7456
|
artifact_id: z.string().min(1)
|
|
6550
7457
|
},
|
|
6551
7458
|
annotations: { readOnlyHint: true, idempotentHint: true }
|
|
6552
7459
|
},
|
|
6553
|
-
async ({ artifact_id }) => {
|
|
6554
|
-
const row =
|
|
7460
|
+
async ({ artifact_id }) => withToolBundle(bundle, storePath, ensureStore, async (activeBundle) => {
|
|
7461
|
+
const row = activeBundle.db.prepare(`SELECT text_object_id, object_id, mime_type FROM artifacts WHERE artifact_id = ?`).get(artifact_id);
|
|
6555
7462
|
if (!row) {
|
|
6556
7463
|
return {
|
|
6557
7464
|
content: [{ type: "text", text: `artifact not found: ${artifact_id}` }],
|
|
@@ -6563,30 +7470,111 @@ function registerProsaTools(server, bundle, options = {}) {
|
|
|
6563
7470
|
return { content: [{ type: "text", text: "[no content stored]" }] };
|
|
6564
7471
|
}
|
|
6565
7472
|
try {
|
|
6566
|
-
const
|
|
6567
|
-
const text = await getText2(bundle, objectId);
|
|
7473
|
+
const text = await getText(activeBundle, objectId);
|
|
6568
7474
|
return { content: [{ type: "text", text }] };
|
|
6569
7475
|
} catch {
|
|
6570
7476
|
return { content: [{ type: "text", text: `[binary artifact: ${objectId}]` }] };
|
|
6571
7477
|
}
|
|
6572
|
-
}
|
|
7478
|
+
})
|
|
6573
7479
|
);
|
|
6574
7480
|
server.registerTool(
|
|
6575
|
-
"
|
|
7481
|
+
"compile",
|
|
6576
7482
|
{
|
|
6577
|
-
title: "
|
|
6578
|
-
description: "
|
|
6579
|
-
inputSchema: {
|
|
6580
|
-
|
|
7483
|
+
title: "Compile sessions or report bundle status",
|
|
7484
|
+
description: "Without input, returns a status snapshot (search index health, last batch, schema version) without mutating anything. With `source`, imports that provider; `sessions_path` may override its default. Pass `overwrite: true` to force a full rebuild of derived indexes (Tantivy from scratch). With neither `source` nor `sessions_path`, only status is returned.",
|
|
7485
|
+
inputSchema: {
|
|
7486
|
+
source: z.enum(SOURCE_TOOLS).optional(),
|
|
7487
|
+
sessions_path: z.string().min(1).optional(),
|
|
7488
|
+
overwrite: z.boolean().optional()
|
|
7489
|
+
},
|
|
7490
|
+
annotations: { readOnlyHint: false, destructiveHint: false, idempotentHint: true }
|
|
6581
7491
|
},
|
|
6582
|
-
async () => {
|
|
6583
|
-
|
|
6584
|
-
|
|
6585
|
-
|
|
6586
|
-
|
|
6587
|
-
|
|
7492
|
+
async ({ source, sessions_path, overwrite }) => withToolBundle(bundle, storePath, ensureStore, async (activeBundle) => {
|
|
7493
|
+
if (sessions_path && !source) {
|
|
7494
|
+
return {
|
|
7495
|
+
content: [
|
|
7496
|
+
{
|
|
7497
|
+
type: "text",
|
|
7498
|
+
text: "sessions_path requires source because providers use incompatible source layouts"
|
|
7499
|
+
}
|
|
7500
|
+
],
|
|
7501
|
+
isError: true
|
|
7502
|
+
};
|
|
7503
|
+
}
|
|
7504
|
+
if (!source && !sessions_path) {
|
|
7505
|
+
return {
|
|
7506
|
+
content: [
|
|
7507
|
+
{
|
|
7508
|
+
type: "text",
|
|
7509
|
+
text: JSON.stringify(
|
|
7510
|
+
{ mode: "status", search_index: getSearchIndexStatuses(activeBundle) },
|
|
7511
|
+
null,
|
|
7512
|
+
2
|
|
7513
|
+
)
|
|
7514
|
+
}
|
|
7515
|
+
]
|
|
7516
|
+
};
|
|
7517
|
+
}
|
|
7518
|
+
try {
|
|
7519
|
+
const result = await runCompileImports({
|
|
7520
|
+
bundle: activeBundle,
|
|
7521
|
+
providers: source ? [getCompileProvider(source)] : COMPILE_PROVIDERS,
|
|
7522
|
+
sessionsPath: sessions_path,
|
|
7523
|
+
overwrite
|
|
7524
|
+
});
|
|
7525
|
+
const parquet = result.importedAny ? await exportCompileParquet({ storePath }) : null;
|
|
7526
|
+
return {
|
|
7527
|
+
content: [
|
|
7528
|
+
{
|
|
7529
|
+
type: "text",
|
|
7530
|
+
text: JSON.stringify(
|
|
7531
|
+
{
|
|
7532
|
+
mode: "import",
|
|
7533
|
+
providers: result.providers.map((provider) => ({
|
|
7534
|
+
source: provider.source,
|
|
7535
|
+
source_path: provider.sourcePath,
|
|
7536
|
+
batch_id: provider.batchId,
|
|
7537
|
+
counts: provider.counts
|
|
7538
|
+
})),
|
|
7539
|
+
imported_any: result.importedAny,
|
|
7540
|
+
tantivy: result.tantivy ? { indexed_doc_count: result.tantivy.indexedDocCount } : null,
|
|
7541
|
+
tantivy_error: result.tantivyError,
|
|
7542
|
+
fts5_error: result.fts5Error,
|
|
7543
|
+
parquet: parquet ? {
|
|
7544
|
+
out_dir: parquet.outDir,
|
|
7545
|
+
manifest_path: parquet.manifestPath,
|
|
7546
|
+
table_count: parquet.tableCount,
|
|
7547
|
+
files: parquet.files,
|
|
7548
|
+
counts: parquet.counts
|
|
7549
|
+
} : null,
|
|
7550
|
+
search_index: getSearchIndexStatuses(activeBundle)
|
|
7551
|
+
},
|
|
7552
|
+
null,
|
|
7553
|
+
2
|
|
7554
|
+
)
|
|
7555
|
+
}
|
|
7556
|
+
]
|
|
7557
|
+
};
|
|
7558
|
+
} catch (error) {
|
|
7559
|
+
return {
|
|
7560
|
+
content: [{ type: "text", text: getErrorMessage(error) }],
|
|
7561
|
+
isError: true
|
|
7562
|
+
};
|
|
7563
|
+
}
|
|
7564
|
+
})
|
|
6588
7565
|
);
|
|
6589
7566
|
}
|
|
7567
|
+
async function withToolBundle(fallbackBundle, storePath, ensureStore, fn) {
|
|
7568
|
+
if (!ensureStore) {
|
|
7569
|
+
return await fn(fallbackBundle);
|
|
7570
|
+
}
|
|
7571
|
+
const bundle = await openOrInitBundle(storePath);
|
|
7572
|
+
try {
|
|
7573
|
+
return await fn(bundle);
|
|
7574
|
+
} finally {
|
|
7575
|
+
closeBundle(bundle);
|
|
7576
|
+
}
|
|
7577
|
+
}
|
|
6590
7578
|
function registerProsaPrompts(server) {
|
|
6591
7579
|
server.registerPrompt(
|
|
6592
7580
|
"investigate_prior_work",
|
|
@@ -6619,14 +7607,14 @@ function registerProsaPrompts(server) {
|
|
|
6619
7607
|
path: z.string().min(1).describe("File path, directory, or distinctive path suffix")
|
|
6620
7608
|
}
|
|
6621
7609
|
},
|
|
6622
|
-
({ path:
|
|
7610
|
+
({ path: path21 }) => ({
|
|
6623
7611
|
description: "Find sessions that touched a path and summarize the evidence.",
|
|
6624
7612
|
messages: [
|
|
6625
7613
|
{
|
|
6626
7614
|
role: "user",
|
|
6627
7615
|
content: {
|
|
6628
7616
|
type: "text",
|
|
6629
|
-
text: FIND_FILE_HISTORY_PROMPT.replace("{{path}}",
|
|
7617
|
+
text: FIND_FILE_HISTORY_PROMPT.replace("{{path}}", path21)
|
|
6630
7618
|
}
|
|
6631
7619
|
}
|
|
6632
7620
|
]
|
|
@@ -6759,7 +7747,7 @@ function createMcpServer(bundle, searchEngine, storePath) {
|
|
|
6759
7747
|
},
|
|
6760
7748
|
{ instructions: PROSA_MCP_INSTRUCTIONS }
|
|
6761
7749
|
);
|
|
6762
|
-
registerProsaTools(server, bundle, { searchEngine, storePath });
|
|
7750
|
+
registerProsaTools(server, bundle, { ensureStore: true, searchEngine, storePath });
|
|
6763
7751
|
return server;
|
|
6764
7752
|
}
|
|
6765
7753
|
async function readBody(req) {
|
|
@@ -6796,27 +7784,12 @@ function writeError(res, error) {
|
|
|
6796
7784
|
);
|
|
6797
7785
|
}
|
|
6798
7786
|
|
|
6799
|
-
// src/cli/parsers.ts
|
|
6800
|
-
function parseSearchEngine(value) {
|
|
6801
|
-
if (value === "fts5" || value === "tantivy") return value;
|
|
6802
|
-
throw new Error(`invalid search engine: ${value} (expected fts5 or tantivy)`);
|
|
6803
|
-
}
|
|
6804
|
-
function parseMcpTransport(value) {
|
|
6805
|
-
if (value === "stdio" || value === "http") return value;
|
|
6806
|
-
throw new Error(`invalid transport: ${value} (expected stdio or http)`);
|
|
6807
|
-
}
|
|
6808
|
-
function parseSourceTool(value) {
|
|
6809
|
-
if (value === void 0) return void 0;
|
|
6810
|
-
if (SOURCE_TOOLS.includes(value)) return value;
|
|
6811
|
-
throw new Error(`invalid source tool: ${value} (expected one of ${SOURCE_TOOLS.join(", ")})`);
|
|
6812
|
-
}
|
|
6813
|
-
|
|
6814
7787
|
// src/cli/commands/mcp.ts
|
|
6815
7788
|
function mcpCommand() {
|
|
6816
|
-
const serve = new
|
|
7789
|
+
const serve = new Command6("serve").description("Start a local MCP server over the prosa bundle.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--transport <transport>", "MCP transport: stdio|http", "stdio").option("--host <host>", "bind host", "127.0.0.1").option("--port <port>", "bind port", "7331").option("--path <path>", "HTTP path", "/mcp").option("--search-engine <engine>", "search engine: fts5|tantivy", "fts5").action(
|
|
6817
7790
|
async (options) => {
|
|
6818
|
-
const storePath =
|
|
6819
|
-
const bundle = await
|
|
7791
|
+
const storePath = path19.resolve(options.store);
|
|
7792
|
+
const bundle = await openOrInitBundle(storePath);
|
|
6820
7793
|
try {
|
|
6821
7794
|
const transport = parseMcpTransport(options.transport);
|
|
6822
7795
|
const searchEngine = parseSearchEngine(options.searchEngine);
|
|
@@ -6846,7 +7819,7 @@ function mcpCommand() {
|
|
|
6846
7819
|
}
|
|
6847
7820
|
}
|
|
6848
7821
|
);
|
|
6849
|
-
return new
|
|
7822
|
+
return new Command6("mcp").description("MCP server commands.").addCommand(serve);
|
|
6850
7823
|
}
|
|
6851
7824
|
function registerShutdown(closeServer, bundle) {
|
|
6852
7825
|
const shutdown = async () => {
|
|
@@ -6863,13 +7836,13 @@ function registerShutdown(closeServer, bundle) {
|
|
|
6863
7836
|
}
|
|
6864
7837
|
|
|
6865
7838
|
// src/cli/commands/query.ts
|
|
6866
|
-
import
|
|
6867
|
-
import { Command as
|
|
7839
|
+
import path20 from "path";
|
|
7840
|
+
import { Command as Command7 } from "commander";
|
|
6868
7841
|
function queryCommand() {
|
|
6869
|
-
const duckdb = new
|
|
7842
|
+
const duckdb = new Command7("duckdb").description("Run a DuckDB SQL query over exported Parquet tables.").argument("<sql>", "DuckDB SQL query").option("--store <path>", "bundle directory", defaultBundlePath()).option("--parquet-dir <path>", "Parquet directory (default: <store>/parquet)").option("--output-format <fmt>", "interactive|table|json|csv", "table").action(
|
|
6870
7843
|
async (sql, options) => {
|
|
6871
7844
|
const format = parseOutputFormat(options.outputFormat, "table");
|
|
6872
|
-
const parquetDir = options.parquetDir ?
|
|
7845
|
+
const parquetDir = options.parquetDir ? path20.resolve(options.parquetDir) : await withBundle(options.store, (bundle) => bundle.paths.parquet);
|
|
6873
7846
|
const result = await queryDuckDbParquet({ parquetDir, sql });
|
|
6874
7847
|
printRows(result.rows, {
|
|
6875
7848
|
format,
|
|
@@ -6878,14 +7851,14 @@ function queryCommand() {
|
|
|
6878
7851
|
});
|
|
6879
7852
|
}
|
|
6880
7853
|
);
|
|
6881
|
-
return new
|
|
7854
|
+
return new Command7("query").description("Run derived analytical queries.").addCommand(duckdb);
|
|
6882
7855
|
}
|
|
6883
7856
|
|
|
6884
7857
|
// src/cli/commands/search.ts
|
|
6885
|
-
import { Command as
|
|
7858
|
+
import { Command as Command8 } from "commander";
|
|
6886
7859
|
init_search();
|
|
6887
7860
|
function searchCommand() {
|
|
6888
|
-
return new
|
|
7861
|
+
return new Command8("search").description("Full-text search across messages, tool calls and tool outputs.").argument("<query>", "FTS5 query string (supports MATCH syntax)").option("--store <path>", "bundle directory", defaultBundlePath()).option("--limit <n>", "maximum hits", "50").option("--engine <engine>", "search engine: fts5|tantivy", "fts5").option("--output-format <fmt>", "interactive|table|json|csv", "table").action(
|
|
6889
7862
|
async (query, options) => {
|
|
6890
7863
|
const engine = parseSearchEngine(options.engine);
|
|
6891
7864
|
const format = parseOutputFormat(options.outputFormat, "table");
|
|
@@ -6906,10 +7879,10 @@ function searchCommand() {
|
|
|
6906
7879
|
}
|
|
6907
7880
|
|
|
6908
7881
|
// src/cli/commands/sessions.ts
|
|
6909
|
-
import { Command as
|
|
7882
|
+
import { Command as Command9 } from "commander";
|
|
6910
7883
|
init_sessions();
|
|
6911
7884
|
function sessionsCommand() {
|
|
6912
|
-
const command = new
|
|
7885
|
+
const command = new Command9("sessions").description("List sessions in the bundle, with filters.").enablePositionalOptions().option("--store <path>", "bundle directory", defaultBundlePath()).option("--source <tool>", "filter by source tool: cursor|codex|claude|gemini").option("--since <iso>", "sessions starting on/after this ISO timestamp").option("--until <iso>", "sessions starting before this ISO timestamp").option("--limit <n>", "maximum rows", "50").option("--output-format <fmt>", "interactive|table|json|csv", "table").action(
|
|
6913
7886
|
async (options) => {
|
|
6914
7887
|
const format = parseOutputFormat(options.outputFormat, "table");
|
|
6915
7888
|
await withBundle(options.store, (bundle) => {
|
|
@@ -6936,7 +7909,7 @@ function sessionsCommand() {
|
|
|
6936
7909
|
}
|
|
6937
7910
|
);
|
|
6938
7911
|
command.addCommand(
|
|
6939
|
-
new
|
|
7912
|
+
new Command9("count").description("Count sessions in the bundle, with filters.").option("--store <path>", "bundle directory", defaultBundlePath()).option("--source <tool>", "filter by source tool: cursor|codex|claude|gemini").option("--since <iso>", "sessions starting on/after this ISO timestamp").option("--until <iso>", "sessions starting before this ISO timestamp").action(
|
|
6940
7913
|
async (options) => {
|
|
6941
7914
|
await withBundle(options.store, (bundle) => {
|
|
6942
7915
|
const count = countSessions(bundle, {
|
|
@@ -6954,9 +7927,9 @@ function sessionsCommand() {
|
|
|
6954
7927
|
}
|
|
6955
7928
|
|
|
6956
7929
|
// src/cli/commands/tui.ts
|
|
6957
|
-
import { Command as
|
|
7930
|
+
import { Command as Command10 } from "commander";
|
|
6958
7931
|
function tuiCommand() {
|
|
6959
|
-
return new
|
|
7932
|
+
return new Command10("tui").description("Open the interactive Ink-based explorer.").option("--store <path>", "bundle directory", defaultBundlePath()).action(async (options) => {
|
|
6960
7933
|
const [{ render }, React, { App: App2 }] = await Promise.all([
|
|
6961
7934
|
import("ink"),
|
|
6962
7935
|
import("react"),
|
|
@@ -6971,8 +7944,14 @@ function tuiCommand() {
|
|
|
6971
7944
|
}
|
|
6972
7945
|
|
|
6973
7946
|
// src/cli/main.ts
|
|
7947
|
+
function stripLeadingDoubleDash(argv) {
|
|
7948
|
+
if (argv.length >= 3 && argv[2] === "--") {
|
|
7949
|
+
return [argv[0], argv[1], ...argv.slice(3)];
|
|
7950
|
+
}
|
|
7951
|
+
return [...argv];
|
|
7952
|
+
}
|
|
6974
7953
|
async function runCli(argv) {
|
|
6975
|
-
const program = new
|
|
7954
|
+
const program = new Command11().name("prosa").enablePositionalOptions().description(
|
|
6976
7955
|
"Compile, search and export local agent session histories\n(Cursor, Codex CLI, Claude Code, Gemini CLI) into one canonical store."
|
|
6977
7956
|
).version(PROSA_PARSER_VERSION, "-v, --version");
|
|
6978
7957
|
program.addCommand(initCommand());
|
|
@@ -6983,9 +7962,10 @@ async function runCli(argv) {
|
|
|
6983
7962
|
program.addCommand(searchCommand());
|
|
6984
7963
|
program.addCommand(exportCommand());
|
|
6985
7964
|
program.addCommand(queryCommand());
|
|
7965
|
+
program.addCommand(analyticsCommand());
|
|
6986
7966
|
program.addCommand(mcpCommand());
|
|
6987
7967
|
program.addCommand(tuiCommand());
|
|
6988
|
-
await program.parseAsync(
|
|
7968
|
+
await program.parseAsync(stripLeadingDoubleDash(argv));
|
|
6989
7969
|
}
|
|
6990
7970
|
var isEntry = import.meta.url === `file://${process.argv[1]}`;
|
|
6991
7971
|
if (isEntry) {
|