mobygate 0.8.3 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +511 -0
- package/bin/mobygate.js +214 -0
- package/inspector.html +200 -3
- package/lib/anthropic.js +6 -1
- package/lib/captures-index.js +524 -0
- package/lib/inference-runner.js +753 -0
- package/lib/openai-translation.js +146 -0
- package/lib/quiet.js +249 -0
- package/lib/request-capture.js +24 -0
- package/package.json +3 -1
- package/server.js +335 -1116
|
@@ -0,0 +1,524 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Captures index — SQLite-backed search over the capture summary fields.
|
|
3
|
+
*
|
|
4
|
+
* Why: ~/.mobygate/captures/ accumulates hundreds of (json, summary.txt)
|
|
5
|
+
* pairs. Filesystem ls/grep gets unwieldy fast. This module mirrors a
|
|
6
|
+
* small set of structured fields from each capture into SQLite so the
|
|
7
|
+
* `mobygate captures query` CLI can filter by model, session, status,
|
|
8
|
+
* stop_reason, duration, recency, etc.
|
|
9
|
+
*
|
|
10
|
+
* Design constraints:
|
|
11
|
+
* - Best-effort: if better-sqlite3 fails to load (native build broke,
|
|
12
|
+
* unsupported platform), every export becomes a no-op and the proxy
|
|
13
|
+
* keeps running. Indexing is purely a convenience layer.
|
|
14
|
+
* - Synchronous via better-sqlite3. Each capture write is two ~100µs
|
|
15
|
+
* calls — well below request-handling overhead.
|
|
16
|
+
* - Single writer (the proxy) and occasional reader (the CLI). WAL
|
|
17
|
+
* mode lets the CLI read while the proxy writes.
|
|
18
|
+
* - Schema is forward-compatible: new columns added via ALTER TABLE
|
|
19
|
+
* (see ensureSchema). Old captures keep working.
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import { existsSync, readdirSync, readFileSync, statSync, mkdirSync } from 'fs';
|
|
23
|
+
import { join } from 'path';
|
|
24
|
+
import { homedir } from 'os';
|
|
25
|
+
|
|
26
|
+
const MOBYGATE_DIR = join(homedir(), '.mobygate');
|
|
27
|
+
const CAPTURE_DIR = join(MOBYGATE_DIR, 'captures');
|
|
28
|
+
export const INDEX_DB_PATH = join(MOBYGATE_DIR, 'captures.sqlite');
|
|
29
|
+
|
|
30
|
+
// Lazily loaded better-sqlite3 — gracefully degrades if it fails to load.
|
|
31
|
+
let DatabaseCtor = null;
|
|
32
|
+
let loadAttempted = false;
|
|
33
|
+
let db = null;
|
|
34
|
+
let loadError = null;
|
|
35
|
+
|
|
36
|
+
async function loadDatabase() {
|
|
37
|
+
if (loadAttempted) return DatabaseCtor;
|
|
38
|
+
loadAttempted = true;
|
|
39
|
+
try {
|
|
40
|
+
const mod = await import('better-sqlite3');
|
|
41
|
+
DatabaseCtor = mod.default || mod;
|
|
42
|
+
} catch (e) {
|
|
43
|
+
loadError = e;
|
|
44
|
+
DatabaseCtor = null;
|
|
45
|
+
}
|
|
46
|
+
return DatabaseCtor;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
async function getDB() {
|
|
50
|
+
if (db) return db;
|
|
51
|
+
const Ctor = await loadDatabase();
|
|
52
|
+
if (!Ctor) return null;
|
|
53
|
+
|
|
54
|
+
if (!existsSync(MOBYGATE_DIR)) mkdirSync(MOBYGATE_DIR, { recursive: true });
|
|
55
|
+
|
|
56
|
+
try {
|
|
57
|
+
db = new Ctor(INDEX_DB_PATH);
|
|
58
|
+
db.pragma('journal_mode = WAL');
|
|
59
|
+
db.pragma('synchronous = NORMAL');
|
|
60
|
+
ensureSchema(db);
|
|
61
|
+
} catch (e) {
|
|
62
|
+
loadError = e;
|
|
63
|
+
db = null;
|
|
64
|
+
}
|
|
65
|
+
return db;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function ensureSchema(d) {
|
|
69
|
+
d.exec(`
|
|
70
|
+
CREATE TABLE IF NOT EXISTS captures (
|
|
71
|
+
request_id TEXT PRIMARY KEY,
|
|
72
|
+
ts TEXT NOT NULL,
|
|
73
|
+
path TEXT NOT NULL,
|
|
74
|
+
session_key TEXT,
|
|
75
|
+
session_source TEXT,
|
|
76
|
+
model TEXT,
|
|
77
|
+
resolved_model TEXT,
|
|
78
|
+
stream INTEGER,
|
|
79
|
+
max_tokens INTEGER,
|
|
80
|
+
temperature REAL,
|
|
81
|
+
system_bytes INTEGER,
|
|
82
|
+
message_count INTEGER,
|
|
83
|
+
message_bytes INTEGER,
|
|
84
|
+
image_count INTEGER,
|
|
85
|
+
tool_use_count INTEGER,
|
|
86
|
+
tool_result_count INTEGER,
|
|
87
|
+
tools_declared INTEGER,
|
|
88
|
+
tool_schema_bytes INTEGER,
|
|
89
|
+
total_input_bytes INTEGER,
|
|
90
|
+
total_input_tokens INTEGER,
|
|
91
|
+
first_user_text TEXT,
|
|
92
|
+
last_user_text TEXT,
|
|
93
|
+
status TEXT,
|
|
94
|
+
duration_ms INTEGER,
|
|
95
|
+
stop_reason TEXT,
|
|
96
|
+
input_tokens INTEGER,
|
|
97
|
+
cache_read_tokens INTEGER,
|
|
98
|
+
cache_create_tokens INTEGER,
|
|
99
|
+
output_tokens INTEGER,
|
|
100
|
+
cache_hit_pct REAL,
|
|
101
|
+
json_path TEXT,
|
|
102
|
+
summary_path TEXT,
|
|
103
|
+
indexed_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
104
|
+
);
|
|
105
|
+
|
|
106
|
+
CREATE INDEX IF NOT EXISTS idx_captures_ts ON captures(ts DESC);
|
|
107
|
+
CREATE INDEX IF NOT EXISTS idx_captures_session ON captures(session_key, ts DESC);
|
|
108
|
+
CREATE INDEX IF NOT EXISTS idx_captures_status ON captures(status, ts DESC);
|
|
109
|
+
CREATE INDEX IF NOT EXISTS idx_captures_model ON captures(model);
|
|
110
|
+
CREATE INDEX IF NOT EXISTS idx_captures_stop ON captures(stop_reason);
|
|
111
|
+
`);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// ---------------------------------------------------------------------------
|
|
115
|
+
// Field extraction — pull index columns out of an Anthropic-shape body.
|
|
116
|
+
// Mirrors the analyzeAnthropic() logic in request-capture.js but produces
|
|
117
|
+
// structured fields instead of formatted text.
|
|
118
|
+
// ---------------------------------------------------------------------------
|
|
119
|
+
|
|
120
|
+
function bytesOfContent(content) {
|
|
121
|
+
if (typeof content === 'string') return content.length;
|
|
122
|
+
if (!Array.isArray(content)) return 0;
|
|
123
|
+
return content.reduce((acc, b) => {
|
|
124
|
+
if (!b) return acc;
|
|
125
|
+
if (b.type === 'text') return acc + (b.text?.length || 0);
|
|
126
|
+
if (b.type === 'tool_result') {
|
|
127
|
+
const inner = typeof b.content === 'string' ? b.content : JSON.stringify(b.content || '');
|
|
128
|
+
return acc + inner.length;
|
|
129
|
+
}
|
|
130
|
+
if (b.type === 'tool_use') return acc + JSON.stringify(b.input || {}).length;
|
|
131
|
+
if (b.type === 'image') return acc + 100; // placeholder for image marker
|
|
132
|
+
return acc + JSON.stringify(b).length;
|
|
133
|
+
}, 0);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function extractTextFromContent(content) {
|
|
137
|
+
if (typeof content === 'string') return content;
|
|
138
|
+
if (!Array.isArray(content)) return '';
|
|
139
|
+
return content
|
|
140
|
+
.filter((b) => b?.type === 'text')
|
|
141
|
+
.map((b) => b.text || '')
|
|
142
|
+
.join(' ')
|
|
143
|
+
.slice(0, 500);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
export function extractIndexFields(body) {
|
|
147
|
+
const f = {
|
|
148
|
+
model: body?.model || null,
|
|
149
|
+
stream: body?.stream ? 1 : 0,
|
|
150
|
+
max_tokens: body?.max_tokens ?? body?.max_completion_tokens ?? null,
|
|
151
|
+
temperature: typeof body?.temperature === 'number' ? body.temperature : null,
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
// System bytes
|
|
155
|
+
const sys = body?.system;
|
|
156
|
+
if (typeof sys === 'string') f.system_bytes = sys.length;
|
|
157
|
+
else if (Array.isArray(sys)) f.system_bytes = sys.reduce((a, b) => a + (b?.text?.length || 0), 0);
|
|
158
|
+
else f.system_bytes = 0;
|
|
159
|
+
|
|
160
|
+
// Messages
|
|
161
|
+
const msgs = Array.isArray(body?.messages) ? body.messages : [];
|
|
162
|
+
f.message_count = msgs.length;
|
|
163
|
+
f.message_bytes = msgs.reduce((a, m) => a + bytesOfContent(m.content), 0);
|
|
164
|
+
|
|
165
|
+
let imageCount = 0;
|
|
166
|
+
let toolUseCount = 0;
|
|
167
|
+
let toolResultCount = 0;
|
|
168
|
+
for (const m of msgs) {
|
|
169
|
+
if (Array.isArray(m.content)) {
|
|
170
|
+
for (const b of m.content) {
|
|
171
|
+
if (b?.type === 'image') imageCount += 1;
|
|
172
|
+
if (b?.type === 'tool_use') toolUseCount += 1;
|
|
173
|
+
if (b?.type === 'tool_result') toolResultCount += 1;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
f.image_count = imageCount;
|
|
178
|
+
f.tool_use_count = toolUseCount;
|
|
179
|
+
f.tool_result_count = toolResultCount;
|
|
180
|
+
|
|
181
|
+
// First + last user message previews — useful for "find that one request"
|
|
182
|
+
const userMsgs = msgs.filter((m) => m.role === 'user');
|
|
183
|
+
f.first_user_text = userMsgs.length ? extractTextFromContent(userMsgs[0].content) : null;
|
|
184
|
+
f.last_user_text = userMsgs.length ? extractTextFromContent(userMsgs[userMsgs.length - 1].content) : null;
|
|
185
|
+
|
|
186
|
+
// Tools
|
|
187
|
+
if (Array.isArray(body?.tools)) {
|
|
188
|
+
f.tools_declared = body.tools.length;
|
|
189
|
+
f.tool_schema_bytes = JSON.stringify(body.tools).length;
|
|
190
|
+
} else {
|
|
191
|
+
f.tools_declared = 0;
|
|
192
|
+
f.tool_schema_bytes = 0;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
f.total_input_bytes = (f.system_bytes || 0) + (f.message_bytes || 0) + (f.tool_schema_bytes || 0);
|
|
196
|
+
f.total_input_tokens = Math.ceil(f.total_input_bytes / 4); // rough estimate, matches summary
|
|
197
|
+
|
|
198
|
+
return f;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// ---------------------------------------------------------------------------
|
|
202
|
+
// Write paths — called from request-capture.js
|
|
203
|
+
// ---------------------------------------------------------------------------
|
|
204
|
+
|
|
205
|
+
const insertStmt = (d) => d.prepare(`
|
|
206
|
+
INSERT OR REPLACE INTO captures (
|
|
207
|
+
request_id, ts, path, session_key, session_source,
|
|
208
|
+
model, stream, max_tokens, temperature,
|
|
209
|
+
system_bytes, message_count, message_bytes,
|
|
210
|
+
image_count, tool_use_count, tool_result_count,
|
|
211
|
+
tools_declared, tool_schema_bytes,
|
|
212
|
+
total_input_bytes, total_input_tokens,
|
|
213
|
+
first_user_text, last_user_text,
|
|
214
|
+
json_path, summary_path
|
|
215
|
+
) VALUES (
|
|
216
|
+
@request_id, @ts, @path, @session_key, @session_source,
|
|
217
|
+
@model, @stream, @max_tokens, @temperature,
|
|
218
|
+
@system_bytes, @message_count, @message_bytes,
|
|
219
|
+
@image_count, @tool_use_count, @tool_result_count,
|
|
220
|
+
@tools_declared, @tool_schema_bytes,
|
|
221
|
+
@total_input_bytes, @total_input_tokens,
|
|
222
|
+
@first_user_text, @last_user_text,
|
|
223
|
+
@json_path, @summary_path
|
|
224
|
+
)
|
|
225
|
+
`);
|
|
226
|
+
|
|
227
|
+
export async function indexCapture({ requestId, ts, path, body, sessionKey, sessionSource, jsonPath, summaryPath }) {
|
|
228
|
+
const d = await getDB();
|
|
229
|
+
if (!d) return;
|
|
230
|
+
try {
|
|
231
|
+
const fields = extractIndexFields(body || {});
|
|
232
|
+
insertStmt(d).run({
|
|
233
|
+
request_id: requestId,
|
|
234
|
+
ts: ts || new Date().toISOString(),
|
|
235
|
+
path: path || '',
|
|
236
|
+
session_key: sessionKey || null,
|
|
237
|
+
session_source: sessionSource || null,
|
|
238
|
+
json_path: jsonPath || null,
|
|
239
|
+
summary_path: summaryPath || null,
|
|
240
|
+
...fields,
|
|
241
|
+
});
|
|
242
|
+
} catch (e) {
|
|
243
|
+
// Best-effort — never break the request path.
|
|
244
|
+
if (process.env.MOBY_DEBUG_INDEX) console.warn(`[captures-index] insert failed: ${e.message}`);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
const updateResponseStmt = (d) => d.prepare(`
|
|
249
|
+
UPDATE captures
|
|
250
|
+
SET status = @status,
|
|
251
|
+
duration_ms = @duration_ms,
|
|
252
|
+
stop_reason = @stop_reason,
|
|
253
|
+
resolved_model = COALESCE(@resolved_model, resolved_model),
|
|
254
|
+
input_tokens = @input_tokens,
|
|
255
|
+
cache_read_tokens = @cache_read_tokens,
|
|
256
|
+
cache_create_tokens = @cache_create_tokens,
|
|
257
|
+
output_tokens = @output_tokens,
|
|
258
|
+
cache_hit_pct = @cache_hit_pct
|
|
259
|
+
WHERE request_id = @request_id
|
|
260
|
+
`);
|
|
261
|
+
|
|
262
|
+
export async function updateCaptureResponse({ requestId, usage, durationMs, status, stopReason, model }) {
|
|
263
|
+
const d = await getDB();
|
|
264
|
+
if (!d) return;
|
|
265
|
+
try {
|
|
266
|
+
const u = usage || {};
|
|
267
|
+
const totalInput = (u.input_tokens || 0) + (u.cache_read_input_tokens || 0) + (u.cache_creation_input_tokens || 0);
|
|
268
|
+
const cacheHitPct = totalInput > 0
|
|
269
|
+
? +(((u.cache_read_input_tokens || 0) / totalInput) * 100).toFixed(2)
|
|
270
|
+
: 0;
|
|
271
|
+
updateResponseStmt(d).run({
|
|
272
|
+
request_id: requestId,
|
|
273
|
+
status: status || null,
|
|
274
|
+
duration_ms: durationMs ?? null,
|
|
275
|
+
stop_reason: stopReason || null,
|
|
276
|
+
resolved_model: model || null,
|
|
277
|
+
input_tokens: u.input_tokens ?? 0,
|
|
278
|
+
cache_read_tokens: u.cache_read_input_tokens ?? 0,
|
|
279
|
+
cache_create_tokens: u.cache_creation_input_tokens ?? 0,
|
|
280
|
+
output_tokens: u.output_tokens ?? 0,
|
|
281
|
+
cache_hit_pct: cacheHitPct,
|
|
282
|
+
});
|
|
283
|
+
} catch (e) {
|
|
284
|
+
if (process.env.MOBY_DEBUG_INDEX) console.warn(`[captures-index] update failed: ${e.message}`);
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// ---------------------------------------------------------------------------
|
|
289
|
+
// Read paths — called from the CLI
|
|
290
|
+
// ---------------------------------------------------------------------------
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* Build a SELECT with optional filters. Returns rows ordered by ts DESC.
|
|
294
|
+
* Filters:
|
|
295
|
+
* - text: LIKE %text% on first_user_text / last_user_text / session_key
|
|
296
|
+
* - sinceMs: ts >= now() - sinceMs
|
|
297
|
+
* - model: model LIKE %model% (matches resolved_model too)
|
|
298
|
+
* - sessionKey: exact match on session_key
|
|
299
|
+
* - status: exact match on status
|
|
300
|
+
* - stopReason: exact match on stop_reason
|
|
301
|
+
* - minDurationMs / maxDurationMs
|
|
302
|
+
* - hasTools: tools_declared > 0
|
|
303
|
+
* - limit: default 20
|
|
304
|
+
*/
|
|
305
|
+
export async function queryCaptures(filters = {}) {
|
|
306
|
+
const d = await getDB();
|
|
307
|
+
if (!d) return { rows: [], error: loadError?.message || 'index unavailable' };
|
|
308
|
+
|
|
309
|
+
const where = [];
|
|
310
|
+
const params = {};
|
|
311
|
+
|
|
312
|
+
if (filters.text) {
|
|
313
|
+
where.push(`(first_user_text LIKE @text OR last_user_text LIKE @text OR session_key LIKE @text)`);
|
|
314
|
+
params.text = `%${filters.text}%`;
|
|
315
|
+
}
|
|
316
|
+
if (filters.sinceMs) {
|
|
317
|
+
const cutoff = new Date(Date.now() - filters.sinceMs).toISOString();
|
|
318
|
+
where.push(`ts >= @cutoff`);
|
|
319
|
+
params.cutoff = cutoff;
|
|
320
|
+
}
|
|
321
|
+
if (filters.model) {
|
|
322
|
+
where.push(`(model LIKE @model OR resolved_model LIKE @model)`);
|
|
323
|
+
params.model = `%${filters.model}%`;
|
|
324
|
+
}
|
|
325
|
+
if (filters.sessionKey) {
|
|
326
|
+
where.push(`session_key = @sessionKey`);
|
|
327
|
+
params.sessionKey = filters.sessionKey;
|
|
328
|
+
}
|
|
329
|
+
if (filters.status) {
|
|
330
|
+
where.push(`status = @status`);
|
|
331
|
+
params.status = filters.status;
|
|
332
|
+
}
|
|
333
|
+
if (filters.stopReason) {
|
|
334
|
+
where.push(`stop_reason = @stopReason`);
|
|
335
|
+
params.stopReason = filters.stopReason;
|
|
336
|
+
}
|
|
337
|
+
if (filters.minDurationMs != null) {
|
|
338
|
+
where.push(`duration_ms >= @minDur`);
|
|
339
|
+
params.minDur = filters.minDurationMs;
|
|
340
|
+
}
|
|
341
|
+
if (filters.maxDurationMs != null) {
|
|
342
|
+
where.push(`duration_ms <= @maxDur`);
|
|
343
|
+
params.maxDur = filters.maxDurationMs;
|
|
344
|
+
}
|
|
345
|
+
if (filters.hasTools) {
|
|
346
|
+
where.push(`tools_declared > 0`);
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
const limit = Math.max(1, Math.min(filters.limit || 20, 1000));
|
|
350
|
+
const sql = `
|
|
351
|
+
SELECT request_id, ts, path, session_key, model, resolved_model,
|
|
352
|
+
status, duration_ms, stop_reason,
|
|
353
|
+
total_input_tokens, output_tokens, cache_hit_pct,
|
|
354
|
+
tools_declared, message_count,
|
|
355
|
+
first_user_text, last_user_text,
|
|
356
|
+
summary_path, json_path
|
|
357
|
+
FROM captures
|
|
358
|
+
${where.length ? 'WHERE ' + where.join(' AND ') : ''}
|
|
359
|
+
ORDER BY ts DESC
|
|
360
|
+
LIMIT ${limit}
|
|
361
|
+
`;
|
|
362
|
+
try {
|
|
363
|
+
const rows = d.prepare(sql).all(params);
|
|
364
|
+
return { rows };
|
|
365
|
+
} catch (e) {
|
|
366
|
+
return { rows: [], error: e.message };
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
export async function getCapture(requestId) {
|
|
371
|
+
const d = await getDB();
|
|
372
|
+
if (!d) return null;
|
|
373
|
+
try {
|
|
374
|
+
return d.prepare(`SELECT * FROM captures WHERE request_id = ?`).get(requestId) || null;
|
|
375
|
+
} catch {
|
|
376
|
+
return null;
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
export async function captureStats() {
|
|
381
|
+
const d = await getDB();
|
|
382
|
+
if (!d) return { error: loadError?.message || 'index unavailable' };
|
|
383
|
+
try {
|
|
384
|
+
const total = d.prepare(`SELECT COUNT(*) as n FROM captures`).get().n;
|
|
385
|
+
const byStatus = d.prepare(`SELECT status, COUNT(*) as n FROM captures GROUP BY status ORDER BY n DESC`).all();
|
|
386
|
+
const byModel = d.prepare(`SELECT COALESCE(resolved_model, model) as m, COUNT(*) as n FROM captures GROUP BY m ORDER BY n DESC LIMIT 10`).all();
|
|
387
|
+
const byStop = d.prepare(`SELECT stop_reason, COUNT(*) as n FROM captures WHERE stop_reason IS NOT NULL GROUP BY stop_reason ORDER BY n DESC`).all();
|
|
388
|
+
const tokens = d.prepare(`
|
|
389
|
+
SELECT SUM(input_tokens) as in_t, SUM(output_tokens) as out_t,
|
|
390
|
+
SUM(cache_read_tokens) as cr_t, SUM(cache_create_tokens) as cc_t,
|
|
391
|
+
AVG(cache_hit_pct) as avg_hit, AVG(duration_ms) as avg_ms
|
|
392
|
+
FROM captures WHERE status IS NOT NULL
|
|
393
|
+
`).get();
|
|
394
|
+
return { total, byStatus, byModel, byStop, tokens };
|
|
395
|
+
} catch (e) {
|
|
396
|
+
return { error: e.message };
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
// ---------------------------------------------------------------------------
|
|
401
|
+
// Backfill — rebuild index from existing capture files
|
|
402
|
+
// ---------------------------------------------------------------------------
|
|
403
|
+
|
|
404
|
+
/**
|
|
405
|
+
* Walk ~/.mobygate/captures/, parse each .json + .summary.txt pair, and
|
|
406
|
+
* upsert into the index. Used for first-time setup or recovery from a
|
|
407
|
+
* lost DB. Idempotent — REPLACE on request_id keeps it safe to re-run.
|
|
408
|
+
*
|
|
409
|
+
* Returns { scanned, indexed, errors }.
|
|
410
|
+
*/
|
|
411
|
+
export async function rebuildFromFilesystem(onProgress) {
|
|
412
|
+
const d = await getDB();
|
|
413
|
+
if (!d) return { error: loadError?.message || 'index unavailable' };
|
|
414
|
+
|
|
415
|
+
const result = { scanned: 0, indexed: 0, errors: 0 };
|
|
416
|
+
if (!existsSync(CAPTURE_DIR)) return result;
|
|
417
|
+
|
|
418
|
+
const files = readdirSync(CAPTURE_DIR).filter((f) => f.endsWith('.json'));
|
|
419
|
+
result.scanned = files.length;
|
|
420
|
+
|
|
421
|
+
// Prepare statements once. better-sqlite3 transactions must be synchronous,
|
|
422
|
+
// so we sidestep the async indexCapture/updateCaptureResponse wrappers and
|
|
423
|
+
// call the prepared statements directly here.
|
|
424
|
+
const ins = insertStmt(d);
|
|
425
|
+
const upd = updateResponseStmt(d);
|
|
426
|
+
|
|
427
|
+
const tx = d.transaction((jsonFiles) => {
|
|
428
|
+
for (const fname of jsonFiles) {
|
|
429
|
+
try {
|
|
430
|
+
const jsonPath = join(CAPTURE_DIR, fname);
|
|
431
|
+
const summaryPath = jsonPath.replace(/\.json$/, '.summary.txt');
|
|
432
|
+
const stat = statSync(jsonPath);
|
|
433
|
+
const body = JSON.parse(readFileSync(jsonPath, 'utf8'));
|
|
434
|
+
|
|
435
|
+
// Parse the filename for ts + path + requestId.
|
|
436
|
+
// Format: YYYY-MM-DD_HH-MM-SS_<route-slug>_<requestId>.json
|
|
437
|
+
const m = fname.match(/^(\d{4}-\d{2}-\d{2})_(\d{2}-\d{2}-\d{2})_(.+?)_([0-9a-f]+)\.json$/);
|
|
438
|
+
const ts = m
|
|
439
|
+
? `${m[1]}T${m[2].replace(/-/g, ':')}.000Z`
|
|
440
|
+
: stat.mtime.toISOString();
|
|
441
|
+
const requestId = m ? m[4] : fname.replace(/\.json$/, '');
|
|
442
|
+
const routePath = m ? '/' + m[3].replace(/-/g, '/') : '';
|
|
443
|
+
|
|
444
|
+
// Pull session key + status + usage from the summary file (best-effort).
|
|
445
|
+
let sessionKey = null;
|
|
446
|
+
let sessionSource = null;
|
|
447
|
+
let status = null;
|
|
448
|
+
let stopReason = null;
|
|
449
|
+
let durationMs = null;
|
|
450
|
+
let resolvedModel = null;
|
|
451
|
+
const usage = {};
|
|
452
|
+
if (existsSync(summaryPath)) {
|
|
453
|
+
const txt = readFileSync(summaryPath, 'utf8');
|
|
454
|
+
sessionKey = matchOne(txt, /session_key:\s+(\S+)/) || null;
|
|
455
|
+
if (sessionKey === '(none)') sessionKey = null;
|
|
456
|
+
sessionSource = matchOne(txt, /session_source:\s+(\S+)/) || null;
|
|
457
|
+
if (sessionSource === '(unknown)') sessionSource = null;
|
|
458
|
+
status = matchOne(txt, /^status:\s+(\S+)/m) || null;
|
|
459
|
+
stopReason = matchOne(txt, /^stop_reason:\s+(\S+)/m) || null;
|
|
460
|
+
durationMs = parseIntOrNull(matchOne(txt, /^duration:\s+(\d+)\s+ms/m));
|
|
461
|
+
resolvedModel = matchOne(txt, /^model:\s+(\S+)/m) || null;
|
|
462
|
+
usage.input_tokens = parseIntOrNull(matchOne(txt, /input_tokens \(uncached\):\s+(\d+)/));
|
|
463
|
+
usage.cache_read_input_tokens = parseIntOrNull(matchOne(txt, /cache_read_input_tokens:\s+(\d+)/));
|
|
464
|
+
usage.cache_creation_input_tokens = parseIntOrNull(matchOne(txt, /cache_creation_input_tokens:\s+(\d+)/));
|
|
465
|
+
usage.output_tokens = parseIntOrNull(matchOne(txt, /^\s*output_tokens:\s+(\d+)/m));
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
const fields = extractIndexFields(body || {});
|
|
469
|
+
ins.run({
|
|
470
|
+
request_id: requestId,
|
|
471
|
+
ts,
|
|
472
|
+
path: routePath,
|
|
473
|
+
session_key: sessionKey,
|
|
474
|
+
session_source: sessionSource,
|
|
475
|
+
json_path: jsonPath,
|
|
476
|
+
summary_path: existsSync(summaryPath) ? summaryPath : null,
|
|
477
|
+
...fields,
|
|
478
|
+
});
|
|
479
|
+
if (status || stopReason || durationMs != null) {
|
|
480
|
+
const totalInput = (usage.input_tokens || 0) + (usage.cache_read_input_tokens || 0) + (usage.cache_creation_input_tokens || 0);
|
|
481
|
+
const cacheHitPct = totalInput > 0
|
|
482
|
+
? +(((usage.cache_read_input_tokens || 0) / totalInput) * 100).toFixed(2)
|
|
483
|
+
: 0;
|
|
484
|
+
upd.run({
|
|
485
|
+
request_id: requestId,
|
|
486
|
+
status,
|
|
487
|
+
duration_ms: durationMs,
|
|
488
|
+
stop_reason: stopReason,
|
|
489
|
+
resolved_model: resolvedModel,
|
|
490
|
+
input_tokens: usage.input_tokens ?? 0,
|
|
491
|
+
cache_read_tokens: usage.cache_read_input_tokens ?? 0,
|
|
492
|
+
cache_create_tokens: usage.cache_creation_input_tokens ?? 0,
|
|
493
|
+
output_tokens: usage.output_tokens ?? 0,
|
|
494
|
+
cache_hit_pct: cacheHitPct,
|
|
495
|
+
});
|
|
496
|
+
}
|
|
497
|
+
result.indexed += 1;
|
|
498
|
+
if (onProgress && result.indexed % 25 === 0) onProgress(result);
|
|
499
|
+
} catch (e) {
|
|
500
|
+
result.errors += 1;
|
|
501
|
+
if (process.env.MOBY_DEBUG_INDEX) console.warn(`[captures-index] backfill ${fname}: ${e.message}`);
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
});
|
|
505
|
+
tx(files);
|
|
506
|
+
return result;
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
function matchOne(text, re) {
|
|
510
|
+
const m = text.match(re);
|
|
511
|
+
return m ? m[1] : null;
|
|
512
|
+
}
|
|
513
|
+
function parseIntOrNull(s) {
|
|
514
|
+
if (s == null) return null;
|
|
515
|
+
const n = parseInt(s, 10);
|
|
516
|
+
return Number.isFinite(n) ? n : null;
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
export function isIndexAvailable() {
|
|
520
|
+
return loadError == null;
|
|
521
|
+
}
|
|
522
|
+
export function getLoadError() {
|
|
523
|
+
return loadError;
|
|
524
|
+
}
|