agentboss 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +34 -0
  2. package/bin/aboss.js +288 -0
  3. package/client/dist/assets/index-C1wFD_Vo.css +1 -0
  4. package/client/dist/assets/index-DBj1Ujlx.js +137 -0
  5. package/client/dist/index.html +34 -0
  6. package/package.json +64 -0
  7. package/server/analysis/daily-aggregator.js +258 -0
  8. package/server/analysis/difficulty.js +129 -0
  9. package/server/analysis/dimensions/ai-knowledge.js +172 -0
  10. package/server/analysis/dimensions/ai-tools.js +161 -0
  11. package/server/analysis/dimensions/judgement.js +107 -0
  12. package/server/analysis/dimensions/llm-merge.js +57 -0
  13. package/server/analysis/dimensions/output-quality.js +167 -0
  14. package/server/analysis/dimensions/problem-definition.js +104 -0
  15. package/server/analysis/dimensions/system-thinking.js +225 -0
  16. package/server/analysis/evidence-builder.js +104 -0
  17. package/server/analysis/job.js +273 -0
  18. package/server/analysis/report-builder.js +581 -0
  19. package/server/analysis/scoring-v2.js +72 -0
  20. package/server/analysis/text-signals.js +179 -0
  21. package/server/analysis/thresholds-v2.js +358 -0
  22. package/server/api/advice.js +124 -0
  23. package/server/api/analysis.js +141 -0
  24. package/server/api/execution.js +330 -0
  25. package/server/api/metrics.js +277 -0
  26. package/server/api/overview.js +308 -0
  27. package/server/api/project.js +255 -0
  28. package/server/api/reports.js +125 -0
  29. package/server/api/sessions.js +118 -0
  30. package/server/api/settings.js +119 -0
  31. package/server/db/connection.js +175 -0
  32. package/server/db/queries.js +1051 -0
  33. package/server/db/schema.js +487 -0
  34. package/server/etl/active-time.js +150 -0
  35. package/server/etl/backfill-subagents.js +178 -0
  36. package/server/etl/claude-code.js +826 -0
  37. package/server/etl/detect.js +341 -0
  38. package/server/etl/judge-filter.js +117 -0
  39. package/server/etl/opencode.js +606 -0
  40. package/server/execution/job.js +662 -0
  41. package/server/execution/prompt.js +227 -0
  42. package/server/execution/runner.js +218 -0
  43. package/server/index.js +94 -0
  44. package/server/llm/advice-prompt.js +339 -0
  45. package/server/llm/advice.js +384 -0
  46. package/server/llm/analysis-prompt.js +162 -0
  47. package/server/llm/cli-runner.js +249 -0
  48. package/server/llm/judge-prompts.js +179 -0
  49. package/server/llm/judge.js +118 -0
  50. package/server/llm/project-advice-prompt.js +332 -0
  51. package/server/llm/project-advice.js +491 -0
  52. package/server/llm/session-analyzer.js +122 -0
  53. package/server/utils/project.js +80 -0
@@ -0,0 +1,606 @@
1
+ /**
2
+ * OpenCode ETL collector for Agent Boss
3
+ *
4
+ * Reads session, message, part, and tool-call data from the OpenCode SQLite
5
+ * database (opencode.db — can be 2+ GB) and writes unified rows into boss.db.
6
+ *
7
+ * IMPORTANT: The source database is opened with the native `sqlite3` npm
8
+ * package (async, streaming, read-only) so that the entire file is never
9
+ * loaded into memory. The destination boss.db uses sql.js (in-memory) and
10
+ * is accessed via helpers in server/db/queries.js.
11
+ *
12
+ * @author Felix
13
+ */
14
+
15
+ const sqlite3 = require('sqlite3');
16
+ const { saveDb } = require('../db/connection');
17
+ const {
18
+ isJudgePrompt,
19
+ isInternalAbossTitle,
20
+ isPlaceholderOpencodeTitle,
21
+ } = require('./judge-filter');
22
+ const {
23
+ upsertSession,
24
+ bulkInsertMessages,
25
+ bulkInsertParts,
26
+ bulkInsertToolCalls,
27
+ getEtlState,
28
+ updateEtlState,
29
+ } = require('../db/queries');
30
+
31
+ // ---------------------------------------------------------------------------
32
+ // Constants
33
+ // ---------------------------------------------------------------------------
34
+
35
+ const SOURCE = 'opencode';
36
+ const BATCH_SIZE = 50;
37
+ const SQLITE_BUSY = 5; // SQLite error code for SQLITE_BUSY
38
+ const MAX_RETRIES = 3;
39
+ const RETRY_DELAY_MS = 1000;
40
+
41
+ // ---------------------------------------------------------------------------
42
+ // sqlite3 Promise wrappers
43
+ // ---------------------------------------------------------------------------
44
+
45
+ /**
46
+ * Open a sqlite3 database in read-only mode and return the instance.
47
+ * @param {string} dbPath
48
+ * @returns {Promise<sqlite3.Database>}
49
+ */
50
+ function openReadOnly(dbPath) {
51
+ return new Promise((resolve, reject) => {
52
+ const db = new sqlite3.Database(dbPath, sqlite3.OPEN_READONLY, (err) => {
53
+ if (err) return reject(err);
54
+ resolve(db);
55
+ });
56
+ });
57
+ }
58
+
59
+ /**
60
+ * Close a sqlite3 database.
61
+ * @param {sqlite3.Database} db
62
+ * @returns {Promise<void>}
63
+ */
64
+ function closeDb(db) {
65
+ return new Promise((resolve, reject) => {
66
+ db.close((err) => {
67
+ if (err) return reject(err);
68
+ resolve();
69
+ });
70
+ });
71
+ }
72
+
73
+ /**
74
+ * Run db.all() as a Promise.
75
+ * @param {sqlite3.Database} db
76
+ * @param {string} sql
77
+ * @param {Array} params
78
+ * @returns {Promise<Object[]>}
79
+ */
80
+ function dbAll(db, sql, params = []) {
81
+ return new Promise((resolve, reject) => {
82
+ db.all(sql, params, (err, rows) => {
83
+ if (err) return reject(err);
84
+ resolve(rows || []);
85
+ });
86
+ });
87
+ }
88
+
89
+ /**
90
+ * Run db.get() as a Promise (single row).
91
+ * @param {sqlite3.Database} db
92
+ * @param {string} sql
93
+ * @param {Array} params
94
+ * @returns {Promise<Object|undefined>}
95
+ */
96
+ function dbGet(db, sql, params = []) {
97
+ return new Promise((resolve, reject) => {
98
+ db.get(sql, params, (err, row) => {
99
+ if (err) return reject(err);
100
+ resolve(row);
101
+ });
102
+ });
103
+ }
104
+
105
+ /**
106
+ * Retry a function on SQLITE_BUSY errors with exponential back-off.
107
+ * @param {() => Promise<T>} fn
108
+ * @param {number} retries
109
+ * @returns {Promise<T>}
110
+ * @template T
111
+ */
112
+ async function withRetry(fn, retries = MAX_RETRIES) {
113
+ for (let attempt = 0; attempt <= retries; attempt++) {
114
+ try {
115
+ return await fn();
116
+ } catch (err) {
117
+ const isBusy = err && (err.errno === SQLITE_BUSY || /SQLITE_BUSY/.test(err.message));
118
+ if (isBusy && attempt < retries) {
119
+ const delay = RETRY_DELAY_MS * Math.pow(2, attempt);
120
+ await new Promise((r) => setTimeout(r, delay));
121
+ continue;
122
+ }
123
+ throw err;
124
+ }
125
+ }
126
+ }
127
+
128
+ // ---------------------------------------------------------------------------
129
+ // Date / time helpers
130
+ // ---------------------------------------------------------------------------
131
+
132
+ /**
133
+ * Convert epoch milliseconds to ISO 8601 string (UTC).
134
+ * @param {number|null} ms
135
+ * @returns {string|null}
136
+ */
137
+ function msToIso(ms) {
138
+ if (ms == null || ms <= 0) return null;
139
+ return new Date(ms).toISOString();
140
+ }
141
+
142
+ /**
143
+ * Convert epoch milliseconds to YYYY-MM-DD string (UTC).
144
+ * @param {number|null} ms
145
+ * @returns {string|null}
146
+ */
147
+ function msToDate(ms) {
148
+ if (ms == null || ms <= 0) return null;
149
+ return new Date(ms).toISOString().slice(0, 10);
150
+ }
151
+
152
+ // ---------------------------------------------------------------------------
153
+ // Safe JSON parsing
154
+ // ---------------------------------------------------------------------------
155
+
156
+ /**
157
+ * Parse a JSON string, returning a default value on failure.
158
+ * @param {string|null} raw
159
+ * @param {*} fallback
160
+ * @returns {*}
161
+ */
162
+ function safeParse(raw, fallback = null) {
163
+ if (raw == null || raw === '') return fallback;
164
+ try {
165
+ return JSON.parse(raw);
166
+ } catch (_) {
167
+ return fallback;
168
+ }
169
+ }
170
+
171
+ // ---------------------------------------------------------------------------
172
+ // Mapping functions
173
+ // ---------------------------------------------------------------------------
174
+
175
+ /**
176
+ * Map an OpenCode session row + aggregate counts/totals to a unified session
177
+ * object.
178
+ *
179
+ * NOTE: The OpenCode `session` table does NOT contain token / cost / model
180
+ * columns — those live inside each `message.data` JSON blob. The caller
181
+ * therefore aggregates them across all messages and passes them in via
182
+ * `agg`.
183
+ *
184
+ * @param {Object} row Raw session row from opencode.db
185
+ * @param {number} msgCount Number of messages in this session
186
+ * @param {number} errCount Number of error parts
187
+ * @param {number} toolCount Number of tool-call parts
188
+ * @param {Object} agg Aggregated values from messages: {
189
+ * tokens_input, tokens_output, tokens_reasoning,
190
+ * tokens_cache_read, tokens_cache_write, cost_usd, model_id
191
+ * }
192
+ * @returns {Object}
193
+ */
194
+ function mapSession(row, msgCount, errCount, toolCount, agg = {}) {
195
+ return {
196
+ id: row.id,
197
+ source: SOURCE,
198
+ date: msToDate(row.time_created),
199
+ started_at: msToIso(row.time_created),
200
+ ended_at: msToIso(row.time_updated),
201
+ duration_minutes:
202
+ row.time_updated && row.time_created
203
+ ? Math.round((row.time_updated - row.time_created) / 60000)
204
+ : 0,
205
+ active_minutes: null, // calculated separately by active-time calculator
206
+ message_count: msgCount,
207
+ tokens_input: agg.tokens_input || 0,
208
+ tokens_output: agg.tokens_output || 0,
209
+ tokens_reasoning: agg.tokens_reasoning || 0,
210
+ tokens_cache_read: agg.tokens_cache_read || 0,
211
+ tokens_cache_write: agg.tokens_cache_write || 0,
212
+ cost_usd: agg.cost_usd || 0,
213
+ project: row.directory || null,
214
+ title: row.title || null,
215
+ model: agg.model_id || null,
216
+ error_count: errCount,
217
+ tool_call_count: toolCount,
218
+ summary_additions: row.summary_additions || 0,
219
+ summary_deletions: row.summary_deletions || 0,
220
+ summary_files: row.summary_files || 0,
221
+ reverted: row.revert != null ? 1 : 0,
222
+ time_compacting: row.time_compacting
223
+ ? Math.round(row.time_compacting / 1000)
224
+ : 0,
225
+ // Subagent linkage from the source `session` table. parent_id is
226
+ // non-null only for sessions spawned by a parent through the `task`
227
+ // tool; agent describes the flavour (typically 'build' or 'explore').
228
+ // Used by the UI to filter subagents out of "会话列表" views while
229
+ // keeping their cost / token totals in aggregate stats.
230
+ parent_session_id: row.parent_id || null,
231
+ agent_type: row.agent || null,
232
+ };
233
+ }
234
+
235
+ /**
236
+ * Map an OpenCode message row to a unified message object.
237
+ * @param {Object} row Raw message row from opencode.db
238
+ * @returns {Object}
239
+ */
240
+ function mapMessage(row) {
241
+ const data = safeParse(row.data, {});
242
+ const tokens = data.tokens || {};
243
+ const cache = tokens.cache || {};
244
+
245
+ return {
246
+ id: row.id,
247
+ session_id: row.session_id,
248
+ source: SOURCE,
249
+ role: data.role || null,
250
+ timestamp: msToIso(row.time_created),
251
+ tokens_input: tokens.input || 0,
252
+ tokens_output: tokens.output || 0,
253
+ tokens_reasoning: tokens.reasoning || 0,
254
+ tokens_cache_read: cache.read || 0,
255
+ tokens_cache_write: cache.write || 0,
256
+ cost_usd: data.cost || 0,
257
+ content_length: 0, // estimated from part text lengths elsewhere
258
+ is_error: 0, // derived from parts
259
+ model_id: (data.modelID || (data.providerID ? `${data.providerID}/${data.modelID || ''}` : null)) || null,
260
+ };
261
+ }
262
+
263
+ /**
264
+ * Map an OpenCode part row to a unified part object.
265
+ * @param {Object} row Raw part row from opencode.db
266
+ * @returns {Object}
267
+ */
268
+ function mapPart(row) {
269
+ const data = safeParse(row.data, {});
270
+
271
+ return {
272
+ id: row.id,
273
+ message_id: row.message_id,
274
+ session_id: row.session_id,
275
+ source: SOURCE,
276
+ type: data.type || null,
277
+ timestamp: msToIso(row.time_created),
278
+ };
279
+ }
280
+
281
+ /**
282
+ * Extract a target file path from tool-call part data when applicable.
283
+ * Common tool names that carry file paths: read, write, edit, glob, grep.
284
+ * @param {Object} data Parsed part data JSON
285
+ * @returns {string|null}
286
+ */
287
+ function extractTargetFile(data) {
288
+ if (!data || !data.state || !data.state.metadata) return null;
289
+ const meta = data.state.metadata;
290
+
291
+ // Various known metadata shapes
292
+ if (typeof meta.filePath === 'string') return meta.filePath;
293
+ if (typeof meta.path === 'string') return meta.path;
294
+ if (typeof meta.file === 'string') return meta.file;
295
+
296
+ return null;
297
+ }
298
+
299
+ /**
300
+ * Map an OpenCode part row (where data.type === 'tool') to a unified tool
301
+ * call object.
302
+ * @param {Object} row Raw part row from opencode.db
303
+ * @param {Object} data Parsed part data JSON
304
+ * @returns {Object}
305
+ */
306
+ function mapToolCall(row, data) {
307
+ const state = data.state || {};
308
+
309
+ return {
310
+ id: row.id,
311
+ part_id: row.id,
312
+ session_id: row.session_id,
313
+ source: SOURCE,
314
+ tool_name: data.tool || null,
315
+ timestamp: msToIso(row.time_created),
316
+ status: state.status || null,
317
+ error_message: state.error || null,
318
+ target_file: extractTargetFile(data),
319
+ };
320
+ }
321
+
322
+ // ---------------------------------------------------------------------------
323
+ // Core ETL logic
324
+ // ---------------------------------------------------------------------------
325
+
326
+ /**
327
+ * Process a single session: read its messages and parts from opencode.db,
328
+ * map them, and write to boss.db.
329
+ *
330
+ * @param {sqlite3.Database} srcDb opencode.db (native sqlite3, read-only)
331
+ * @param {Object} bossDb boss.db (sql.js, in-memory)
332
+ * @param {Object} session Raw session row
333
+ * @returns {Promise<{messages: number, parts: number, toolCalls: number, errors: number}>}
334
+ */
335
+ async function processSession(srcDb, bossDb, session) {
336
+ // Short-circuit: sessions created by our own internal `opencode run`
337
+ // invocations (analysis / polish jobs) carry a stable title prefix.
338
+ // Filter them out before touching messages/parts.
339
+ if (isInternalAbossTitle(session.title)) {
340
+ return { messages: 0, parts: 0, toolCalls: 0, skipped: true };
341
+ }
342
+
343
+ // Short-circuit: OpenCode's placeholder title for sessions that were
344
+ // started programmatically without any meaningful prompt (typical of
345
+ // aboss-triggered judge / polish calls, but also of any third-party
346
+ // `opencode run`). These add zero analytical value — drop them.
347
+ if (isPlaceholderOpencodeTitle(session.title)) {
348
+ return { messages: 0, parts: 0, toolCalls: 0, skipped: true };
349
+ }
350
+
351
+ // --- Messages ---
352
+ const rawMessages = await withRetry(() =>
353
+ dbAll(srcDb, 'SELECT * FROM message WHERE session_id = ?', [session.id])
354
+ );
355
+ const messages = rawMessages.map(mapMessage);
356
+
357
+ // --- Parts ---
358
+ const rawParts = await withRetry(() =>
359
+ dbAll(srcDb, 'SELECT * FROM part WHERE session_id = ?', [session.id])
360
+ );
361
+ const parts = [];
362
+ const toolCalls = [];
363
+ const textByMsg = Object.create(null); // message_id -> concatenated text
364
+ let errorCount = 0;
365
+
366
+ for (const row of rawParts) {
367
+ parts.push(mapPart(row));
368
+
369
+ const data = safeParse(row.data, {});
370
+
371
+ // Collect text content for v2 capability-model signal extraction.
372
+ // OpenCode stores user/assistant text in parts with type='text';
373
+ // some flavours also use 'reasoning'. We keep both, capped at 4 KB
374
+ // per message to avoid bloating boss.db.
375
+ if (data.type === 'text' || data.type === 'reasoning') {
376
+ const txt = typeof data.text === 'string' ? data.text : '';
377
+ if (txt) {
378
+ const prev = textByMsg[row.message_id] || '';
379
+ const next = prev ? prev + '\n' + txt : txt;
380
+ textByMsg[row.message_id] = next.length > 4096 ? next.slice(0, 4096) : next;
381
+ }
382
+ }
383
+
384
+ if (data.type === 'tool') {
385
+ toolCalls.push(mapToolCall(row, data));
386
+
387
+ const state = data.state || {};
388
+ if (state.status === 'error' || state.error) {
389
+ errorCount++;
390
+ }
391
+ }
392
+ }
393
+
394
+ // Attach concatenated text to each message
395
+ for (const m of messages) {
396
+ m.text = textByMsg[m.id] || null;
397
+ }
398
+
399
+ // Skip sessions created by our own LLM judge — `opencode run` logs every
400
+ // judge call as a session here; importing them back would create a
401
+ // feedback loop (each analysis pass spawns more sessions to analyze).
402
+ if (messages.some((m) => m.role === 'user' && isJudgePrompt(m.text))) {
403
+ return { messages: 0, parts: 0, toolCalls: 0, skipped: true };
404
+ }
405
+
406
+ // --- Aggregate token / cost / model across messages ---
407
+ // OpenCode stores these per-message inside data JSON, not on the session
408
+ // row. We sum them here so the unified_session row carries totals.
409
+ const agg = {
410
+ tokens_input: 0,
411
+ tokens_output: 0,
412
+ tokens_reasoning: 0,
413
+ tokens_cache_read: 0,
414
+ tokens_cache_write: 0,
415
+ cost_usd: 0,
416
+ model_id: null,
417
+ };
418
+ for (const m of messages) {
419
+ agg.tokens_input += m.tokens_input || 0;
420
+ agg.tokens_output += m.tokens_output || 0;
421
+ agg.tokens_reasoning += m.tokens_reasoning || 0;
422
+ agg.tokens_cache_read += m.tokens_cache_read || 0;
423
+ agg.tokens_cache_write += m.tokens_cache_write || 0;
424
+ agg.cost_usd += m.cost_usd || 0;
425
+ if (!agg.model_id && m.model_id) agg.model_id = m.model_id;
426
+ }
427
+
428
+ // --- Session (needs aggregate counts) ---
429
+ const sessionObj = mapSession(
430
+ session,
431
+ messages.length,
432
+ errorCount,
433
+ toolCalls.length,
434
+ agg
435
+ );
436
+
437
+ // --- Write to boss.db ---
438
+ upsertSession(bossDb, sessionObj);
439
+ bulkInsertMessages(bossDb, messages);
440
+ bulkInsertParts(bossDb, parts);
441
+ bulkInsertToolCalls(bossDb, toolCalls);
442
+
443
+ return {
444
+ messages: messages.length,
445
+ parts: parts.length,
446
+ toolCalls: toolCalls.length,
447
+ errors: errorCount,
448
+ };
449
+ }
450
+
451
+ // ---------------------------------------------------------------------------
452
+ // Main entry point
453
+ // ---------------------------------------------------------------------------
454
+
455
+ /**
456
+ * Collect data from the OpenCode database and write to boss.db.
457
+ *
458
+ * Performs incremental sync based on etl_state.last_session_time. Sessions
459
+ * are processed in batches of {@link BATCH_SIZE} for memory efficiency.
460
+ * Each session is individually wrapped in try/catch so that one bad row
461
+ * never stops the whole ETL run.
462
+ *
463
+ * @param {Object} bossDb sql.js database instance (boss.db)
464
+ * @param {string} opencodePath Path to opencode.db
465
+ * @param {Object} [options] Options
466
+ * @param {(msg: string) => void} [options.onProgress] Progress callback
467
+ * @returns {Promise<{sessionCount: number, messageCount: number, partCount: number, toolCallCount: number, errorSessionCount: number}>}
468
+ */
469
+ async function collectOpenCode(bossDb, opencodePath, options = {}) {
470
+ const log = options.onProgress || (() => {});
471
+
472
+ // -- 1. Read ETL watermark --------------------------------------------------
473
+ const etlState = getEtlState(bossDb, SOURCE);
474
+ const lastSessionTime = etlState ? (etlState.last_session_time || 0) : 0;
475
+ log(`ETL watermark: last_session_time = ${lastSessionTime}`);
476
+
477
+ // -- 2. Open source database ------------------------------------------------
478
+ let srcDb;
479
+ try {
480
+ srcDb = await withRetry(() => openReadOnly(opencodePath));
481
+ } catch (err) {
482
+ log(`Failed to open opencode.db: ${err.message}`);
483
+ throw err;
484
+ }
485
+
486
+ // Configure for read-heavy workload
487
+ try {
488
+ await new Promise((resolve) => srcDb.run('PRAGMA journal_mode = WAL', resolve));
489
+ } catch (_) {
490
+ // Best-effort; some builds may not support WAL in read-only mode
491
+ }
492
+
493
+ const totals = {
494
+ sessionCount: 0,
495
+ messageCount: 0,
496
+ partCount: 0,
497
+ toolCallCount: 0,
498
+ errorSessionCount: 0,
499
+ judgeSkippedCount: 0,
500
+ };
501
+
502
+ try {
503
+ // -- 3. Count sessions to process -----------------------------------------
504
+ const countRow = await withRetry(() =>
505
+ dbGet(srcDb, 'SELECT COUNT(*) AS cnt FROM session WHERE time_created > ?', [lastSessionTime])
506
+ );
507
+ const totalSessions = countRow ? countRow.cnt : 0;
508
+ log(`Found ${totalSessions} new session(s) to process`);
509
+
510
+ if (totalSessions === 0) {
511
+ updateEtlState(bossDb, SOURCE, {
512
+ last_sync_at: new Date().toISOString(),
513
+ last_session_id: etlState ? etlState.last_session_id : null,
514
+ last_session_time: lastSessionTime || null,
515
+ status: 'idle',
516
+ });
517
+ return totals;
518
+ }
519
+
520
+ // -- 4. Process in batches ------------------------------------------------
521
+ let offset = 0;
522
+ let latestTime = lastSessionTime;
523
+ let latestSessionId = etlState ? etlState.last_session_id : null;
524
+
525
+ while (offset < totalSessions) {
526
+ const sessions = await withRetry(() =>
527
+ dbAll(
528
+ srcDb,
529
+ 'SELECT * FROM session WHERE time_created > ? ORDER BY time_created ASC LIMIT ? OFFSET ?',
530
+ [lastSessionTime, BATCH_SIZE, offset]
531
+ )
532
+ );
533
+
534
+ if (sessions.length === 0) break;
535
+
536
+ for (const session of sessions) {
537
+ try {
538
+ const result = await processSession(srcDb, bossDb, session);
539
+ if (result.skipped) {
540
+ totals.judgeSkippedCount++;
541
+ } else {
542
+ totals.sessionCount++;
543
+ totals.messageCount += result.messages;
544
+ totals.partCount += result.parts;
545
+ totals.toolCallCount += result.toolCalls;
546
+ }
547
+
548
+ // Track watermark (skipped sessions advance it too, so they are
549
+ // never re-scanned)
550
+ if (session.time_created > latestTime) {
551
+ latestTime = session.time_created;
552
+ latestSessionId = session.id;
553
+ }
554
+ } catch (err) {
555
+ totals.errorSessionCount++;
556
+ log(`Error processing session ${session.id}: ${err.message}`);
557
+ }
558
+ }
559
+
560
+ offset += sessions.length;
561
+
562
+ // -- 5. Update watermark after each batch & persist boss.db -------------
563
+ updateEtlState(bossDb, SOURCE, {
564
+ last_sync_at: new Date().toISOString(),
565
+ last_session_id: latestSessionId,
566
+ last_session_time: latestTime,
567
+ status: 'running',
568
+ });
569
+ saveDb();
570
+
571
+ log(`Processed ${Math.min(offset, totalSessions)}/${totalSessions} sessions`);
572
+ }
573
+
574
+ // -- 6. Final watermark update --------------------------------------------
575
+ updateEtlState(bossDb, SOURCE, {
576
+ last_sync_at: new Date().toISOString(),
577
+ last_session_id: latestSessionId,
578
+ last_session_time: latestTime,
579
+ status: 'idle',
580
+ });
581
+ saveDb();
582
+
583
+ log(
584
+ `ETL complete: ${totals.sessionCount} sessions, ` +
585
+ `${totals.messageCount} messages, ${totals.partCount} parts, ` +
586
+ `${totals.toolCallCount} tool calls` +
587
+ (totals.errorSessionCount ? `, ${totals.errorSessionCount} failed` : '') +
588
+ (totals.judgeSkippedCount ? `, ${totals.judgeSkippedCount} judge artifact(s) skipped` : '')
589
+ );
590
+ } finally {
591
+ // Always close the source database
592
+ try {
593
+ await closeDb(srcDb);
594
+ } catch (_) {
595
+ // best-effort
596
+ }
597
+ }
598
+
599
+ return totals;
600
+ }
601
+
602
+ // ---------------------------------------------------------------------------
603
+ // Exports
604
+ // ---------------------------------------------------------------------------
605
+
606
+ module.exports = { collectOpenCode };