agentboss 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -0
- package/bin/aboss.js +288 -0
- package/client/dist/assets/index-C1wFD_Vo.css +1 -0
- package/client/dist/assets/index-DBj1Ujlx.js +137 -0
- package/client/dist/index.html +34 -0
- package/package.json +64 -0
- package/server/analysis/daily-aggregator.js +258 -0
- package/server/analysis/difficulty.js +129 -0
- package/server/analysis/dimensions/ai-knowledge.js +172 -0
- package/server/analysis/dimensions/ai-tools.js +161 -0
- package/server/analysis/dimensions/judgement.js +107 -0
- package/server/analysis/dimensions/llm-merge.js +57 -0
- package/server/analysis/dimensions/output-quality.js +167 -0
- package/server/analysis/dimensions/problem-definition.js +104 -0
- package/server/analysis/dimensions/system-thinking.js +225 -0
- package/server/analysis/evidence-builder.js +104 -0
- package/server/analysis/job.js +273 -0
- package/server/analysis/report-builder.js +581 -0
- package/server/analysis/scoring-v2.js +72 -0
- package/server/analysis/text-signals.js +179 -0
- package/server/analysis/thresholds-v2.js +358 -0
- package/server/api/advice.js +124 -0
- package/server/api/analysis.js +141 -0
- package/server/api/execution.js +330 -0
- package/server/api/metrics.js +277 -0
- package/server/api/overview.js +308 -0
- package/server/api/project.js +255 -0
- package/server/api/reports.js +125 -0
- package/server/api/sessions.js +118 -0
- package/server/api/settings.js +119 -0
- package/server/db/connection.js +175 -0
- package/server/db/queries.js +1051 -0
- package/server/db/schema.js +487 -0
- package/server/etl/active-time.js +150 -0
- package/server/etl/backfill-subagents.js +178 -0
- package/server/etl/claude-code.js +826 -0
- package/server/etl/detect.js +341 -0
- package/server/etl/judge-filter.js +117 -0
- package/server/etl/opencode.js +606 -0
- package/server/execution/job.js +662 -0
- package/server/execution/prompt.js +227 -0
- package/server/execution/runner.js +218 -0
- package/server/index.js +94 -0
- package/server/llm/advice-prompt.js +339 -0
- package/server/llm/advice.js +384 -0
- package/server/llm/analysis-prompt.js +162 -0
- package/server/llm/cli-runner.js +249 -0
- package/server/llm/judge-prompts.js +179 -0
- package/server/llm/judge.js +118 -0
- package/server/llm/project-advice-prompt.js +332 -0
- package/server/llm/project-advice.js +491 -0
- package/server/llm/session-analyzer.js +122 -0
- package/server/utils/project.js +80 -0
|
@@ -0,0 +1,606 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenCode ETL collector for Agent Boss
|
|
3
|
+
*
|
|
4
|
+
* Reads session, message, part, and tool-call data from the OpenCode SQLite
|
|
5
|
+
* database (opencode.db — can be 2+ GB) and writes unified rows into boss.db.
|
|
6
|
+
*
|
|
7
|
+
* IMPORTANT: The source database is opened with the native `sqlite3` npm
|
|
8
|
+
* package (async, streaming, read-only) so that the entire file is never
|
|
9
|
+
* loaded into memory. The destination boss.db uses sql.js (in-memory) and
|
|
10
|
+
* is accessed via helpers in server/db/queries.js.
|
|
11
|
+
*
|
|
12
|
+
* @author Felix
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
const sqlite3 = require('sqlite3');
|
|
16
|
+
const { saveDb } = require('../db/connection');
|
|
17
|
+
const {
|
|
18
|
+
isJudgePrompt,
|
|
19
|
+
isInternalAbossTitle,
|
|
20
|
+
isPlaceholderOpencodeTitle,
|
|
21
|
+
} = require('./judge-filter');
|
|
22
|
+
const {
|
|
23
|
+
upsertSession,
|
|
24
|
+
bulkInsertMessages,
|
|
25
|
+
bulkInsertParts,
|
|
26
|
+
bulkInsertToolCalls,
|
|
27
|
+
getEtlState,
|
|
28
|
+
updateEtlState,
|
|
29
|
+
} = require('../db/queries');
|
|
30
|
+
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
// Constants
|
|
33
|
+
// ---------------------------------------------------------------------------
|
|
34
|
+
|
|
35
|
+
const SOURCE = 'opencode';
|
|
36
|
+
const BATCH_SIZE = 50;
|
|
37
|
+
const SQLITE_BUSY = 5; // SQLite error code for SQLITE_BUSY
|
|
38
|
+
const MAX_RETRIES = 3;
|
|
39
|
+
const RETRY_DELAY_MS = 1000;
|
|
40
|
+
|
|
41
|
+
// ---------------------------------------------------------------------------
|
|
42
|
+
// sqlite3 Promise wrappers
|
|
43
|
+
// ---------------------------------------------------------------------------
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Open a sqlite3 database in read-only mode and return the instance.
|
|
47
|
+
* @param {string} dbPath
|
|
48
|
+
* @returns {Promise<sqlite3.Database>}
|
|
49
|
+
*/
|
|
50
|
+
function openReadOnly(dbPath) {
|
|
51
|
+
return new Promise((resolve, reject) => {
|
|
52
|
+
const db = new sqlite3.Database(dbPath, sqlite3.OPEN_READONLY, (err) => {
|
|
53
|
+
if (err) return reject(err);
|
|
54
|
+
resolve(db);
|
|
55
|
+
});
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Close a sqlite3 database.
|
|
61
|
+
* @param {sqlite3.Database} db
|
|
62
|
+
* @returns {Promise<void>}
|
|
63
|
+
*/
|
|
64
|
+
function closeDb(db) {
|
|
65
|
+
return new Promise((resolve, reject) => {
|
|
66
|
+
db.close((err) => {
|
|
67
|
+
if (err) return reject(err);
|
|
68
|
+
resolve();
|
|
69
|
+
});
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Run db.all() as a Promise.
|
|
75
|
+
* @param {sqlite3.Database} db
|
|
76
|
+
* @param {string} sql
|
|
77
|
+
* @param {Array} params
|
|
78
|
+
* @returns {Promise<Object[]>}
|
|
79
|
+
*/
|
|
80
|
+
function dbAll(db, sql, params = []) {
|
|
81
|
+
return new Promise((resolve, reject) => {
|
|
82
|
+
db.all(sql, params, (err, rows) => {
|
|
83
|
+
if (err) return reject(err);
|
|
84
|
+
resolve(rows || []);
|
|
85
|
+
});
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Run db.get() as a Promise (single row).
|
|
91
|
+
* @param {sqlite3.Database} db
|
|
92
|
+
* @param {string} sql
|
|
93
|
+
* @param {Array} params
|
|
94
|
+
* @returns {Promise<Object|undefined>}
|
|
95
|
+
*/
|
|
96
|
+
function dbGet(db, sql, params = []) {
|
|
97
|
+
return new Promise((resolve, reject) => {
|
|
98
|
+
db.get(sql, params, (err, row) => {
|
|
99
|
+
if (err) return reject(err);
|
|
100
|
+
resolve(row);
|
|
101
|
+
});
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Retry a function on SQLITE_BUSY errors with exponential back-off.
|
|
107
|
+
* @param {() => Promise<T>} fn
|
|
108
|
+
* @param {number} retries
|
|
109
|
+
* @returns {Promise<T>}
|
|
110
|
+
* @template T
|
|
111
|
+
*/
|
|
112
|
+
async function withRetry(fn, retries = MAX_RETRIES) {
|
|
113
|
+
for (let attempt = 0; attempt <= retries; attempt++) {
|
|
114
|
+
try {
|
|
115
|
+
return await fn();
|
|
116
|
+
} catch (err) {
|
|
117
|
+
const isBusy = err && (err.errno === SQLITE_BUSY || /SQLITE_BUSY/.test(err.message));
|
|
118
|
+
if (isBusy && attempt < retries) {
|
|
119
|
+
const delay = RETRY_DELAY_MS * Math.pow(2, attempt);
|
|
120
|
+
await new Promise((r) => setTimeout(r, delay));
|
|
121
|
+
continue;
|
|
122
|
+
}
|
|
123
|
+
throw err;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// ---------------------------------------------------------------------------
|
|
129
|
+
// Date / time helpers
|
|
130
|
+
// ---------------------------------------------------------------------------
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Convert epoch milliseconds to ISO 8601 string (UTC).
|
|
134
|
+
* @param {number|null} ms
|
|
135
|
+
* @returns {string|null}
|
|
136
|
+
*/
|
|
137
|
+
function msToIso(ms) {
|
|
138
|
+
if (ms == null || ms <= 0) return null;
|
|
139
|
+
return new Date(ms).toISOString();
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Convert epoch milliseconds to YYYY-MM-DD string (UTC).
|
|
144
|
+
* @param {number|null} ms
|
|
145
|
+
* @returns {string|null}
|
|
146
|
+
*/
|
|
147
|
+
function msToDate(ms) {
|
|
148
|
+
if (ms == null || ms <= 0) return null;
|
|
149
|
+
return new Date(ms).toISOString().slice(0, 10);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// ---------------------------------------------------------------------------
|
|
153
|
+
// Safe JSON parsing
|
|
154
|
+
// ---------------------------------------------------------------------------
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Parse a JSON string, returning a default value on failure.
|
|
158
|
+
* @param {string|null} raw
|
|
159
|
+
* @param {*} fallback
|
|
160
|
+
* @returns {*}
|
|
161
|
+
*/
|
|
162
|
+
function safeParse(raw, fallback = null) {
|
|
163
|
+
if (raw == null || raw === '') return fallback;
|
|
164
|
+
try {
|
|
165
|
+
return JSON.parse(raw);
|
|
166
|
+
} catch (_) {
|
|
167
|
+
return fallback;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// ---------------------------------------------------------------------------
|
|
172
|
+
// Mapping functions
|
|
173
|
+
// ---------------------------------------------------------------------------
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Map an OpenCode session row + aggregate counts/totals to a unified session
|
|
177
|
+
* object.
|
|
178
|
+
*
|
|
179
|
+
* NOTE: The OpenCode `session` table does NOT contain token / cost / model
|
|
180
|
+
* columns — those live inside each `message.data` JSON blob. The caller
|
|
181
|
+
* therefore aggregates them across all messages and passes them in via
|
|
182
|
+
* `agg`.
|
|
183
|
+
*
|
|
184
|
+
* @param {Object} row Raw session row from opencode.db
|
|
185
|
+
* @param {number} msgCount Number of messages in this session
|
|
186
|
+
* @param {number} errCount Number of error parts
|
|
187
|
+
* @param {number} toolCount Number of tool-call parts
|
|
188
|
+
* @param {Object} agg Aggregated values from messages: {
|
|
189
|
+
* tokens_input, tokens_output, tokens_reasoning,
|
|
190
|
+
* tokens_cache_read, tokens_cache_write, cost_usd, model_id
|
|
191
|
+
* }
|
|
192
|
+
* @returns {Object}
|
|
193
|
+
*/
|
|
194
|
+
function mapSession(row, msgCount, errCount, toolCount, agg = {}) {
|
|
195
|
+
return {
|
|
196
|
+
id: row.id,
|
|
197
|
+
source: SOURCE,
|
|
198
|
+
date: msToDate(row.time_created),
|
|
199
|
+
started_at: msToIso(row.time_created),
|
|
200
|
+
ended_at: msToIso(row.time_updated),
|
|
201
|
+
duration_minutes:
|
|
202
|
+
row.time_updated && row.time_created
|
|
203
|
+
? Math.round((row.time_updated - row.time_created) / 60000)
|
|
204
|
+
: 0,
|
|
205
|
+
active_minutes: null, // calculated separately by active-time calculator
|
|
206
|
+
message_count: msgCount,
|
|
207
|
+
tokens_input: agg.tokens_input || 0,
|
|
208
|
+
tokens_output: agg.tokens_output || 0,
|
|
209
|
+
tokens_reasoning: agg.tokens_reasoning || 0,
|
|
210
|
+
tokens_cache_read: agg.tokens_cache_read || 0,
|
|
211
|
+
tokens_cache_write: agg.tokens_cache_write || 0,
|
|
212
|
+
cost_usd: agg.cost_usd || 0,
|
|
213
|
+
project: row.directory || null,
|
|
214
|
+
title: row.title || null,
|
|
215
|
+
model: agg.model_id || null,
|
|
216
|
+
error_count: errCount,
|
|
217
|
+
tool_call_count: toolCount,
|
|
218
|
+
summary_additions: row.summary_additions || 0,
|
|
219
|
+
summary_deletions: row.summary_deletions || 0,
|
|
220
|
+
summary_files: row.summary_files || 0,
|
|
221
|
+
reverted: row.revert != null ? 1 : 0,
|
|
222
|
+
time_compacting: row.time_compacting
|
|
223
|
+
? Math.round(row.time_compacting / 1000)
|
|
224
|
+
: 0,
|
|
225
|
+
// Subagent linkage from the source `session` table. parent_id is
|
|
226
|
+
// non-null only for sessions spawned by a parent through the `task`
|
|
227
|
+
// tool; agent describes the flavour (typically 'build' or 'explore').
|
|
228
|
+
// Used by the UI to filter subagents out of "会话列表" views while
|
|
229
|
+
// keeping their cost / token totals in aggregate stats.
|
|
230
|
+
parent_session_id: row.parent_id || null,
|
|
231
|
+
agent_type: row.agent || null,
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Map an OpenCode message row to a unified message object.
|
|
237
|
+
* @param {Object} row Raw message row from opencode.db
|
|
238
|
+
* @returns {Object}
|
|
239
|
+
*/
|
|
240
|
+
function mapMessage(row) {
|
|
241
|
+
const data = safeParse(row.data, {});
|
|
242
|
+
const tokens = data.tokens || {};
|
|
243
|
+
const cache = tokens.cache || {};
|
|
244
|
+
|
|
245
|
+
return {
|
|
246
|
+
id: row.id,
|
|
247
|
+
session_id: row.session_id,
|
|
248
|
+
source: SOURCE,
|
|
249
|
+
role: data.role || null,
|
|
250
|
+
timestamp: msToIso(row.time_created),
|
|
251
|
+
tokens_input: tokens.input || 0,
|
|
252
|
+
tokens_output: tokens.output || 0,
|
|
253
|
+
tokens_reasoning: tokens.reasoning || 0,
|
|
254
|
+
tokens_cache_read: cache.read || 0,
|
|
255
|
+
tokens_cache_write: cache.write || 0,
|
|
256
|
+
cost_usd: data.cost || 0,
|
|
257
|
+
content_length: 0, // estimated from part text lengths elsewhere
|
|
258
|
+
is_error: 0, // derived from parts
|
|
259
|
+
model_id: (data.modelID || (data.providerID ? `${data.providerID}/${data.modelID || ''}` : null)) || null,
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* Map an OpenCode part row to a unified part object.
|
|
265
|
+
* @param {Object} row Raw part row from opencode.db
|
|
266
|
+
* @returns {Object}
|
|
267
|
+
*/
|
|
268
|
+
function mapPart(row) {
|
|
269
|
+
const data = safeParse(row.data, {});
|
|
270
|
+
|
|
271
|
+
return {
|
|
272
|
+
id: row.id,
|
|
273
|
+
message_id: row.message_id,
|
|
274
|
+
session_id: row.session_id,
|
|
275
|
+
source: SOURCE,
|
|
276
|
+
type: data.type || null,
|
|
277
|
+
timestamp: msToIso(row.time_created),
|
|
278
|
+
};
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
/**
|
|
282
|
+
* Extract a target file path from tool-call part data when applicable.
|
|
283
|
+
* Common tool names that carry file paths: read, write, edit, glob, grep.
|
|
284
|
+
* @param {Object} data Parsed part data JSON
|
|
285
|
+
* @returns {string|null}
|
|
286
|
+
*/
|
|
287
|
+
function extractTargetFile(data) {
|
|
288
|
+
if (!data || !data.state || !data.state.metadata) return null;
|
|
289
|
+
const meta = data.state.metadata;
|
|
290
|
+
|
|
291
|
+
// Various known metadata shapes
|
|
292
|
+
if (typeof meta.filePath === 'string') return meta.filePath;
|
|
293
|
+
if (typeof meta.path === 'string') return meta.path;
|
|
294
|
+
if (typeof meta.file === 'string') return meta.file;
|
|
295
|
+
|
|
296
|
+
return null;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Map an OpenCode part row (where data.type === 'tool') to a unified tool
|
|
301
|
+
* call object.
|
|
302
|
+
* @param {Object} row Raw part row from opencode.db
|
|
303
|
+
* @param {Object} data Parsed part data JSON
|
|
304
|
+
* @returns {Object}
|
|
305
|
+
*/
|
|
306
|
+
function mapToolCall(row, data) {
|
|
307
|
+
const state = data.state || {};
|
|
308
|
+
|
|
309
|
+
return {
|
|
310
|
+
id: row.id,
|
|
311
|
+
part_id: row.id,
|
|
312
|
+
session_id: row.session_id,
|
|
313
|
+
source: SOURCE,
|
|
314
|
+
tool_name: data.tool || null,
|
|
315
|
+
timestamp: msToIso(row.time_created),
|
|
316
|
+
status: state.status || null,
|
|
317
|
+
error_message: state.error || null,
|
|
318
|
+
target_file: extractTargetFile(data),
|
|
319
|
+
};
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// ---------------------------------------------------------------------------
|
|
323
|
+
// Core ETL logic
|
|
324
|
+
// ---------------------------------------------------------------------------
|
|
325
|
+
|
|
326
|
+
/**
|
|
327
|
+
* Process a single session: read its messages and parts from opencode.db,
|
|
328
|
+
* map them, and write to boss.db.
|
|
329
|
+
*
|
|
330
|
+
* @param {sqlite3.Database} srcDb opencode.db (native sqlite3, read-only)
|
|
331
|
+
* @param {Object} bossDb boss.db (sql.js, in-memory)
|
|
332
|
+
* @param {Object} session Raw session row
|
|
333
|
+
* @returns {Promise<{messages: number, parts: number, toolCalls: number, errors: number}>}
|
|
334
|
+
*/
|
|
335
|
+
async function processSession(srcDb, bossDb, session) {
|
|
336
|
+
// Short-circuit: sessions created by our own internal `opencode run`
|
|
337
|
+
// invocations (analysis / polish jobs) carry a stable title prefix.
|
|
338
|
+
// Filter them out before touching messages/parts.
|
|
339
|
+
if (isInternalAbossTitle(session.title)) {
|
|
340
|
+
return { messages: 0, parts: 0, toolCalls: 0, skipped: true };
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// Short-circuit: OpenCode's placeholder title for sessions that were
|
|
344
|
+
// started programmatically without any meaningful prompt (typical of
|
|
345
|
+
// aboss-triggered judge / polish calls, but also of any third-party
|
|
346
|
+
// `opencode run`). These add zero analytical value — drop them.
|
|
347
|
+
if (isPlaceholderOpencodeTitle(session.title)) {
|
|
348
|
+
return { messages: 0, parts: 0, toolCalls: 0, skipped: true };
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// --- Messages ---
|
|
352
|
+
const rawMessages = await withRetry(() =>
|
|
353
|
+
dbAll(srcDb, 'SELECT * FROM message WHERE session_id = ?', [session.id])
|
|
354
|
+
);
|
|
355
|
+
const messages = rawMessages.map(mapMessage);
|
|
356
|
+
|
|
357
|
+
// --- Parts ---
|
|
358
|
+
const rawParts = await withRetry(() =>
|
|
359
|
+
dbAll(srcDb, 'SELECT * FROM part WHERE session_id = ?', [session.id])
|
|
360
|
+
);
|
|
361
|
+
const parts = [];
|
|
362
|
+
const toolCalls = [];
|
|
363
|
+
const textByMsg = Object.create(null); // message_id -> concatenated text
|
|
364
|
+
let errorCount = 0;
|
|
365
|
+
|
|
366
|
+
for (const row of rawParts) {
|
|
367
|
+
parts.push(mapPart(row));
|
|
368
|
+
|
|
369
|
+
const data = safeParse(row.data, {});
|
|
370
|
+
|
|
371
|
+
// Collect text content for v2 capability-model signal extraction.
|
|
372
|
+
// OpenCode stores user/assistant text in parts with type='text';
|
|
373
|
+
// some flavours also use 'reasoning'. We keep both, capped at 4 KB
|
|
374
|
+
// per message to avoid bloating boss.db.
|
|
375
|
+
if (data.type === 'text' || data.type === 'reasoning') {
|
|
376
|
+
const txt = typeof data.text === 'string' ? data.text : '';
|
|
377
|
+
if (txt) {
|
|
378
|
+
const prev = textByMsg[row.message_id] || '';
|
|
379
|
+
const next = prev ? prev + '\n' + txt : txt;
|
|
380
|
+
textByMsg[row.message_id] = next.length > 4096 ? next.slice(0, 4096) : next;
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
if (data.type === 'tool') {
|
|
385
|
+
toolCalls.push(mapToolCall(row, data));
|
|
386
|
+
|
|
387
|
+
const state = data.state || {};
|
|
388
|
+
if (state.status === 'error' || state.error) {
|
|
389
|
+
errorCount++;
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
// Attach concatenated text to each message
|
|
395
|
+
for (const m of messages) {
|
|
396
|
+
m.text = textByMsg[m.id] || null;
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
// Skip sessions created by our own LLM judge — `opencode run` logs every
|
|
400
|
+
// judge call as a session here; importing them back would create a
|
|
401
|
+
// feedback loop (each analysis pass spawns more sessions to analyze).
|
|
402
|
+
if (messages.some((m) => m.role === 'user' && isJudgePrompt(m.text))) {
|
|
403
|
+
return { messages: 0, parts: 0, toolCalls: 0, skipped: true };
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// --- Aggregate token / cost / model across messages ---
|
|
407
|
+
// OpenCode stores these per-message inside data JSON, not on the session
|
|
408
|
+
// row. We sum them here so the unified_session row carries totals.
|
|
409
|
+
const agg = {
|
|
410
|
+
tokens_input: 0,
|
|
411
|
+
tokens_output: 0,
|
|
412
|
+
tokens_reasoning: 0,
|
|
413
|
+
tokens_cache_read: 0,
|
|
414
|
+
tokens_cache_write: 0,
|
|
415
|
+
cost_usd: 0,
|
|
416
|
+
model_id: null,
|
|
417
|
+
};
|
|
418
|
+
for (const m of messages) {
|
|
419
|
+
agg.tokens_input += m.tokens_input || 0;
|
|
420
|
+
agg.tokens_output += m.tokens_output || 0;
|
|
421
|
+
agg.tokens_reasoning += m.tokens_reasoning || 0;
|
|
422
|
+
agg.tokens_cache_read += m.tokens_cache_read || 0;
|
|
423
|
+
agg.tokens_cache_write += m.tokens_cache_write || 0;
|
|
424
|
+
agg.cost_usd += m.cost_usd || 0;
|
|
425
|
+
if (!agg.model_id && m.model_id) agg.model_id = m.model_id;
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
// --- Session (needs aggregate counts) ---
|
|
429
|
+
const sessionObj = mapSession(
|
|
430
|
+
session,
|
|
431
|
+
messages.length,
|
|
432
|
+
errorCount,
|
|
433
|
+
toolCalls.length,
|
|
434
|
+
agg
|
|
435
|
+
);
|
|
436
|
+
|
|
437
|
+
// --- Write to boss.db ---
|
|
438
|
+
upsertSession(bossDb, sessionObj);
|
|
439
|
+
bulkInsertMessages(bossDb, messages);
|
|
440
|
+
bulkInsertParts(bossDb, parts);
|
|
441
|
+
bulkInsertToolCalls(bossDb, toolCalls);
|
|
442
|
+
|
|
443
|
+
return {
|
|
444
|
+
messages: messages.length,
|
|
445
|
+
parts: parts.length,
|
|
446
|
+
toolCalls: toolCalls.length,
|
|
447
|
+
errors: errorCount,
|
|
448
|
+
};
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
// ---------------------------------------------------------------------------
|
|
452
|
+
// Main entry point
|
|
453
|
+
// ---------------------------------------------------------------------------
|
|
454
|
+
|
|
455
|
+
/**
|
|
456
|
+
* Collect data from the OpenCode database and write to boss.db.
|
|
457
|
+
*
|
|
458
|
+
* Performs incremental sync based on etl_state.last_session_time. Sessions
|
|
459
|
+
* are processed in batches of {@link BATCH_SIZE} for memory efficiency.
|
|
460
|
+
* Each session is individually wrapped in try/catch so that one bad row
|
|
461
|
+
* never stops the whole ETL run.
|
|
462
|
+
*
|
|
463
|
+
* @param {Object} bossDb sql.js database instance (boss.db)
|
|
464
|
+
* @param {string} opencodePath Path to opencode.db
|
|
465
|
+
* @param {Object} [options] Options
|
|
466
|
+
* @param {(msg: string) => void} [options.onProgress] Progress callback
|
|
467
|
+
* @returns {Promise<{sessionCount: number, messageCount: number, partCount: number, toolCallCount: number, errorSessionCount: number}>}
|
|
468
|
+
*/
|
|
469
|
+
async function collectOpenCode(bossDb, opencodePath, options = {}) {
|
|
470
|
+
const log = options.onProgress || (() => {});
|
|
471
|
+
|
|
472
|
+
// -- 1. Read ETL watermark --------------------------------------------------
|
|
473
|
+
const etlState = getEtlState(bossDb, SOURCE);
|
|
474
|
+
const lastSessionTime = etlState ? (etlState.last_session_time || 0) : 0;
|
|
475
|
+
log(`ETL watermark: last_session_time = ${lastSessionTime}`);
|
|
476
|
+
|
|
477
|
+
// -- 2. Open source database ------------------------------------------------
|
|
478
|
+
let srcDb;
|
|
479
|
+
try {
|
|
480
|
+
srcDb = await withRetry(() => openReadOnly(opencodePath));
|
|
481
|
+
} catch (err) {
|
|
482
|
+
log(`Failed to open opencode.db: ${err.message}`);
|
|
483
|
+
throw err;
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
// Configure for read-heavy workload
|
|
487
|
+
try {
|
|
488
|
+
await new Promise((resolve) => srcDb.run('PRAGMA journal_mode = WAL', resolve));
|
|
489
|
+
} catch (_) {
|
|
490
|
+
// Best-effort; some builds may not support WAL in read-only mode
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
const totals = {
|
|
494
|
+
sessionCount: 0,
|
|
495
|
+
messageCount: 0,
|
|
496
|
+
partCount: 0,
|
|
497
|
+
toolCallCount: 0,
|
|
498
|
+
errorSessionCount: 0,
|
|
499
|
+
judgeSkippedCount: 0,
|
|
500
|
+
};
|
|
501
|
+
|
|
502
|
+
try {
|
|
503
|
+
// -- 3. Count sessions to process -----------------------------------------
|
|
504
|
+
const countRow = await withRetry(() =>
|
|
505
|
+
dbGet(srcDb, 'SELECT COUNT(*) AS cnt FROM session WHERE time_created > ?', [lastSessionTime])
|
|
506
|
+
);
|
|
507
|
+
const totalSessions = countRow ? countRow.cnt : 0;
|
|
508
|
+
log(`Found ${totalSessions} new session(s) to process`);
|
|
509
|
+
|
|
510
|
+
if (totalSessions === 0) {
|
|
511
|
+
updateEtlState(bossDb, SOURCE, {
|
|
512
|
+
last_sync_at: new Date().toISOString(),
|
|
513
|
+
last_session_id: etlState ? etlState.last_session_id : null,
|
|
514
|
+
last_session_time: lastSessionTime || null,
|
|
515
|
+
status: 'idle',
|
|
516
|
+
});
|
|
517
|
+
return totals;
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
// -- 4. Process in batches ------------------------------------------------
|
|
521
|
+
let offset = 0;
|
|
522
|
+
let latestTime = lastSessionTime;
|
|
523
|
+
let latestSessionId = etlState ? etlState.last_session_id : null;
|
|
524
|
+
|
|
525
|
+
while (offset < totalSessions) {
|
|
526
|
+
const sessions = await withRetry(() =>
|
|
527
|
+
dbAll(
|
|
528
|
+
srcDb,
|
|
529
|
+
'SELECT * FROM session WHERE time_created > ? ORDER BY time_created ASC LIMIT ? OFFSET ?',
|
|
530
|
+
[lastSessionTime, BATCH_SIZE, offset]
|
|
531
|
+
)
|
|
532
|
+
);
|
|
533
|
+
|
|
534
|
+
if (sessions.length === 0) break;
|
|
535
|
+
|
|
536
|
+
for (const session of sessions) {
|
|
537
|
+
try {
|
|
538
|
+
const result = await processSession(srcDb, bossDb, session);
|
|
539
|
+
if (result.skipped) {
|
|
540
|
+
totals.judgeSkippedCount++;
|
|
541
|
+
} else {
|
|
542
|
+
totals.sessionCount++;
|
|
543
|
+
totals.messageCount += result.messages;
|
|
544
|
+
totals.partCount += result.parts;
|
|
545
|
+
totals.toolCallCount += result.toolCalls;
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
// Track watermark (skipped sessions advance it too, so they are
|
|
549
|
+
// never re-scanned)
|
|
550
|
+
if (session.time_created > latestTime) {
|
|
551
|
+
latestTime = session.time_created;
|
|
552
|
+
latestSessionId = session.id;
|
|
553
|
+
}
|
|
554
|
+
} catch (err) {
|
|
555
|
+
totals.errorSessionCount++;
|
|
556
|
+
log(`Error processing session ${session.id}: ${err.message}`);
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
offset += sessions.length;
|
|
561
|
+
|
|
562
|
+
// -- 5. Update watermark after each batch & persist boss.db -------------
|
|
563
|
+
updateEtlState(bossDb, SOURCE, {
|
|
564
|
+
last_sync_at: new Date().toISOString(),
|
|
565
|
+
last_session_id: latestSessionId,
|
|
566
|
+
last_session_time: latestTime,
|
|
567
|
+
status: 'running',
|
|
568
|
+
});
|
|
569
|
+
saveDb();
|
|
570
|
+
|
|
571
|
+
log(`Processed ${Math.min(offset, totalSessions)}/${totalSessions} sessions`);
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
// -- 6. Final watermark update --------------------------------------------
|
|
575
|
+
updateEtlState(bossDb, SOURCE, {
|
|
576
|
+
last_sync_at: new Date().toISOString(),
|
|
577
|
+
last_session_id: latestSessionId,
|
|
578
|
+
last_session_time: latestTime,
|
|
579
|
+
status: 'idle',
|
|
580
|
+
});
|
|
581
|
+
saveDb();
|
|
582
|
+
|
|
583
|
+
log(
|
|
584
|
+
`ETL complete: ${totals.sessionCount} sessions, ` +
|
|
585
|
+
`${totals.messageCount} messages, ${totals.partCount} parts, ` +
|
|
586
|
+
`${totals.toolCallCount} tool calls` +
|
|
587
|
+
(totals.errorSessionCount ? `, ${totals.errorSessionCount} failed` : '') +
|
|
588
|
+
(totals.judgeSkippedCount ? `, ${totals.judgeSkippedCount} judge artifact(s) skipped` : '')
|
|
589
|
+
);
|
|
590
|
+
} finally {
|
|
591
|
+
// Always close the source database
|
|
592
|
+
try {
|
|
593
|
+
await closeDb(srcDb);
|
|
594
|
+
} catch (_) {
|
|
595
|
+
// best-effort
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
return totals;
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
// ---------------------------------------------------------------------------
|
|
603
|
+
// Exports
|
|
604
|
+
// ---------------------------------------------------------------------------
|
|
605
|
+
|
|
606
|
+
module.exports = { collectOpenCode };
|