selftune 0.2.29 → 0.2.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/apps/local-dashboard/dist/assets/index-B7v_o1WC.js +15 -0
- package/apps/local-dashboard/dist/assets/index-CrO77SVi.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-B0H8s1mP.js +1 -0
- package/apps/local-dashboard/dist/index.html +3 -3
- package/cli/selftune/auto-update.ts +40 -8
- package/cli/selftune/command-surface.ts +1 -1
- package/cli/selftune/constants.ts +5 -0
- package/cli/selftune/dashboard-action-events.ts +117 -0
- package/cli/selftune/dashboard-action-instrumentation.ts +103 -0
- package/cli/selftune/dashboard-action-result.ts +90 -0
- package/cli/selftune/dashboard-action-stream.ts +252 -0
- package/cli/selftune/dashboard-contract.ts +81 -1
- package/cli/selftune/dashboard-server.ts +133 -16
- package/cli/selftune/eval/hooks-to-evals.ts +157 -0
- package/cli/selftune/eval/synthetic-evals.ts +33 -2
- package/cli/selftune/eval/unit-test-cli.ts +53 -5
- package/cli/selftune/evolution/validate-host-replay.ts +191 -14
- package/cli/selftune/index.ts +4 -0
- package/cli/selftune/ingestors/opencode-ingest.ts +117 -8
- package/cli/selftune/localdb/schema.ts +34 -0
- package/cli/selftune/registry/github-install.ts +256 -0
- package/cli/selftune/registry/index.ts +1 -1
- package/cli/selftune/registry/install.ts +58 -7
- package/cli/selftune/routes/actions.ts +273 -42
- package/cli/selftune/testing-readiness.ts +203 -10
- package/cli/selftune/utils/llm-call.ts +90 -1
- package/package.json +1 -1
- package/packages/dashboard-core/src/routes/manifest.ts +2 -2
- package/packages/ui/src/components/EvolutionTimeline.tsx +1 -1
- package/packages/ui/src/components/SkillReportPanels.tsx +7 -7
- package/packages/ui/src/primitives/button.tsx +5 -0
- package/skill/SKILL.md +1 -1
- package/skill/workflows/Dashboard.md +50 -23
- package/skill/workflows/Registry.md +19 -13
- package/apps/local-dashboard/dist/assets/index-BcvtYmmL.js +0 -15
- package/apps/local-dashboard/dist/assets/index-BpRIxnpS.css +0 -1
- package/apps/local-dashboard/dist/assets/vendor-ui-DqH_uxum.js +0 -1
|
@@ -12,7 +12,12 @@ import {
|
|
|
12
12
|
import { tmpdir } from "node:os";
|
|
13
13
|
import { basename, dirname, isAbsolute, join } from "node:path";
|
|
14
14
|
|
|
15
|
+
import {
|
|
16
|
+
emitDashboardActionMetrics,
|
|
17
|
+
emitDashboardActionProgress,
|
|
18
|
+
} from "../dashboard-action-events.js";
|
|
15
19
|
import type { EvalEntry, RoutingReplayEntryResult, RoutingReplayFixture } from "../types.js";
|
|
20
|
+
import type { DashboardActionMetrics } from "../dashboard-contract.js";
|
|
16
21
|
import { parseFrontmatter } from "../utils/frontmatter.js";
|
|
17
22
|
import {
|
|
18
23
|
containsWholeSkillMention,
|
|
@@ -95,6 +100,14 @@ function resolveObservedReplayPath(path: string, workspaceRoot: string): string
|
|
|
95
100
|
return resolveReplayPath(isAbsolute(path) ? path : join(workspaceRoot, path));
|
|
96
101
|
}
|
|
97
102
|
|
|
103
|
+
function truncateReplayText(value: string | null | undefined, maxLength: number): string | null {
|
|
104
|
+
if (typeof value !== "string") return null;
|
|
105
|
+
const normalized = value.replace(/\s+/g, " ").trim();
|
|
106
|
+
if (!normalized) return null;
|
|
107
|
+
if (normalized.length <= maxLength) return normalized;
|
|
108
|
+
return `${normalized.slice(0, maxLength - 1)}…`;
|
|
109
|
+
}
|
|
110
|
+
|
|
98
111
|
function listCompetingSkillPaths(targetSkillPath: string): string[] {
|
|
99
112
|
const normalizedTargetPath = resolveReplayPath(targetSkillPath);
|
|
100
113
|
const targetSkillDir = dirname(normalizedTargetPath);
|
|
@@ -332,6 +345,128 @@ function normalizeReplayEventType(value: unknown): string {
|
|
|
332
345
|
return typeof value === "string" ? value.replace(/[._]/g, "-").trim().toLowerCase() : "";
|
|
333
346
|
}
|
|
334
347
|
|
|
348
|
+
function readObject(value: unknown): Record<string, unknown> | null {
|
|
349
|
+
return typeof value === "object" && value !== null ? (value as Record<string, unknown>) : null;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
function readNumber(value: unknown): number | null {
|
|
353
|
+
return typeof value === "number" && Number.isFinite(value) ? value : null;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
function readString(value: unknown): string | null {
|
|
357
|
+
return typeof value === "string" && value.trim().length > 0 ? value.trim() : null;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
function normalizeClaudeModel(value: string | null): string | null {
|
|
361
|
+
return value ? value.replace(/\[[^\]]+\]$/, "") : null;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
function firstModelUsageKey(value: unknown): string | null {
|
|
365
|
+
const modelUsage = readObject(value);
|
|
366
|
+
if (!modelUsage) return null;
|
|
367
|
+
const firstKey = Object.keys(modelUsage)[0];
|
|
368
|
+
return normalizeClaudeModel(firstKey ?? null);
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
export function extractClaudeRuntimeReplayMetrics(line: string): DashboardActionMetrics | null {
|
|
372
|
+
const trimmed = line.trim();
|
|
373
|
+
if (!trimmed) return null;
|
|
374
|
+
|
|
375
|
+
let parsed: Record<string, unknown>;
|
|
376
|
+
try {
|
|
377
|
+
parsed = JSON.parse(trimmed) as Record<string, unknown>;
|
|
378
|
+
} catch {
|
|
379
|
+
return null;
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
const eventType = readString(parsed.type);
|
|
383
|
+
const sessionId = readString(parsed.session_id);
|
|
384
|
+
|
|
385
|
+
if (eventType === "system" && readString(parsed.subtype) === "init") {
|
|
386
|
+
return {
|
|
387
|
+
platform: "claude_code",
|
|
388
|
+
model: normalizeClaudeModel(readString(parsed.model)),
|
|
389
|
+
session_id: sessionId,
|
|
390
|
+
input_tokens: null,
|
|
391
|
+
output_tokens: null,
|
|
392
|
+
cache_creation_input_tokens: null,
|
|
393
|
+
cache_read_input_tokens: null,
|
|
394
|
+
total_cost_usd: null,
|
|
395
|
+
duration_ms: null,
|
|
396
|
+
num_turns: null,
|
|
397
|
+
};
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
if (eventType === "assistant") {
|
|
401
|
+
const message = readObject(parsed.message);
|
|
402
|
+
const usage = readObject(message?.usage);
|
|
403
|
+
return {
|
|
404
|
+
platform: "claude_code",
|
|
405
|
+
model: normalizeClaudeModel(readString(message?.model)),
|
|
406
|
+
session_id: sessionId,
|
|
407
|
+
input_tokens: readNumber(usage?.input_tokens),
|
|
408
|
+
output_tokens: readNumber(usage?.output_tokens),
|
|
409
|
+
cache_creation_input_tokens: readNumber(usage?.cache_creation_input_tokens),
|
|
410
|
+
cache_read_input_tokens: readNumber(usage?.cache_read_input_tokens),
|
|
411
|
+
total_cost_usd: null,
|
|
412
|
+
duration_ms: null,
|
|
413
|
+
num_turns: null,
|
|
414
|
+
};
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
if (eventType === "result") {
|
|
418
|
+
const usage = readObject(parsed.usage);
|
|
419
|
+
return {
|
|
420
|
+
platform: "claude_code",
|
|
421
|
+
model: firstModelUsageKey(parsed.modelUsage),
|
|
422
|
+
session_id: sessionId,
|
|
423
|
+
input_tokens: readNumber(usage?.input_tokens),
|
|
424
|
+
output_tokens: readNumber(usage?.output_tokens),
|
|
425
|
+
cache_creation_input_tokens: readNumber(usage?.cache_creation_input_tokens),
|
|
426
|
+
cache_read_input_tokens: readNumber(usage?.cache_read_input_tokens),
|
|
427
|
+
total_cost_usd: readNumber(parsed.total_cost_usd),
|
|
428
|
+
duration_ms: readNumber(parsed.duration_ms),
|
|
429
|
+
num_turns: readNumber(parsed.num_turns),
|
|
430
|
+
};
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
return null;
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
async function readStreamText(
|
|
437
|
+
stream: ReadableStream<Uint8Array> | null | undefined,
|
|
438
|
+
onLine?: (line: string) => void,
|
|
439
|
+
): Promise<string> {
|
|
440
|
+
if (!stream) return "";
|
|
441
|
+
const reader = stream.getReader();
|
|
442
|
+
const decoder = new TextDecoder();
|
|
443
|
+
let output = "";
|
|
444
|
+
let buffered = "";
|
|
445
|
+
|
|
446
|
+
while (true) {
|
|
447
|
+
const { value, done } = await reader.read();
|
|
448
|
+
if (done) break;
|
|
449
|
+
const chunk = decoder.decode(value, { stream: true });
|
|
450
|
+
if (!chunk) continue;
|
|
451
|
+
output += chunk;
|
|
452
|
+
buffered += chunk;
|
|
453
|
+
const lines = buffered.split("\n");
|
|
454
|
+
buffered = lines.pop() ?? "";
|
|
455
|
+
for (const line of lines) {
|
|
456
|
+
onLine?.(line);
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
const tail = decoder.decode();
|
|
461
|
+
if (tail) {
|
|
462
|
+
output += tail;
|
|
463
|
+
buffered += tail;
|
|
464
|
+
}
|
|
465
|
+
if (buffered) onLine?.(buffered);
|
|
466
|
+
|
|
467
|
+
return output;
|
|
468
|
+
}
|
|
469
|
+
|
|
335
470
|
export function parseCodexRuntimeReplayOutput(
|
|
336
471
|
rawOutput: string,
|
|
337
472
|
knownSkillNames: Set<string>,
|
|
@@ -591,7 +726,10 @@ async function invokeClaudeRuntimeReplay(
|
|
|
591
726
|
const timeout = setTimeout(() => proc.kill(), CLAUDE_RUNTIME_REPLAY_TIMEOUT_MS);
|
|
592
727
|
|
|
593
728
|
const [stdoutText, stderrText, exitCode] = await Promise.all([
|
|
594
|
-
|
|
729
|
+
readStreamText(proc.stdout, (line) => {
|
|
730
|
+
const metrics = extractClaudeRuntimeReplayMetrics(line);
|
|
731
|
+
if (metrics) emitDashboardActionMetrics(metrics);
|
|
732
|
+
}),
|
|
595
733
|
new Response(proc.stderr).text(),
|
|
596
734
|
proc.exited,
|
|
597
735
|
]);
|
|
@@ -1032,20 +1170,59 @@ export async function runHostRuntimeReplayFixture(options: {
|
|
|
1032
1170
|
options.contentTarget ?? "routing",
|
|
1033
1171
|
);
|
|
1034
1172
|
const results: RoutingReplayEntryResult[] = [];
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1173
|
+
const total = options.evalSet.length;
|
|
1174
|
+
|
|
1175
|
+
for (const [index, entry] of options.evalSet.entries()) {
|
|
1176
|
+
const current = index + 1;
|
|
1177
|
+
const querySnippet = truncateReplayText(entry.query, 120);
|
|
1178
|
+
|
|
1179
|
+
emitDashboardActionProgress({
|
|
1180
|
+
current,
|
|
1181
|
+
total,
|
|
1182
|
+
status: "started",
|
|
1183
|
+
query: querySnippet,
|
|
1184
|
+
passed: null,
|
|
1185
|
+
evidence: null,
|
|
1045
1186
|
});
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1187
|
+
|
|
1188
|
+
try {
|
|
1189
|
+
const observation = await invokeRuntime({
|
|
1190
|
+
query: entry.query,
|
|
1191
|
+
platform: options.fixture.platform,
|
|
1192
|
+
workspaceRoot: workspace.rootDir,
|
|
1193
|
+
skillRegistryDir: workspace.skillRegistryDir,
|
|
1194
|
+
targetSkillName: options.fixture.target_skill_name,
|
|
1195
|
+
targetSkillPath: workspace.targetSkillPath,
|
|
1196
|
+
competingSkillPaths: workspace.competingSkillPaths,
|
|
1197
|
+
});
|
|
1198
|
+
const result = evaluateRuntimeReplayObservation(
|
|
1199
|
+
entry,
|
|
1200
|
+
options.fixture,
|
|
1201
|
+
observation,
|
|
1202
|
+
workspace,
|
|
1203
|
+
);
|
|
1204
|
+
results.push(result);
|
|
1205
|
+
|
|
1206
|
+
emitDashboardActionProgress({
|
|
1207
|
+
current,
|
|
1208
|
+
total,
|
|
1209
|
+
status: "finished",
|
|
1210
|
+
query: querySnippet,
|
|
1211
|
+
passed: result.passed,
|
|
1212
|
+
evidence: truncateReplayText(result.evidence, 180),
|
|
1213
|
+
});
|
|
1214
|
+
} catch (error) {
|
|
1215
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1216
|
+
emitDashboardActionProgress({
|
|
1217
|
+
current,
|
|
1218
|
+
total,
|
|
1219
|
+
status: "finished",
|
|
1220
|
+
query: querySnippet,
|
|
1221
|
+
passed: false,
|
|
1222
|
+
evidence: truncateReplayText(message, 180),
|
|
1223
|
+
});
|
|
1224
|
+
throw error;
|
|
1225
|
+
}
|
|
1049
1226
|
}
|
|
1050
1227
|
|
|
1051
1228
|
return results;
|
package/cli/selftune/index.ts
CHANGED
|
@@ -43,6 +43,7 @@ import { PUBLIC_COMMAND_SURFACES, renderCommandHelp } from "./command-surface.js
|
|
|
43
43
|
process.on("uncaughtException", handleCLIError);
|
|
44
44
|
process.on("unhandledRejection", handleCLIError);
|
|
45
45
|
|
|
46
|
+
const originalArgv = process.argv.slice(2);
|
|
46
47
|
const command = process.argv[2];
|
|
47
48
|
|
|
48
49
|
if (command === "--help" || command === "-h") {
|
|
@@ -112,6 +113,9 @@ if (!command) {
|
|
|
112
113
|
process.exit(0);
|
|
113
114
|
}
|
|
114
115
|
|
|
116
|
+
const { startDashboardActionStream } = await import("./dashboard-action-stream.js");
|
|
117
|
+
startDashboardActionStream(originalArgv);
|
|
118
|
+
|
|
115
119
|
// Route to the appropriate subcommand module.
|
|
116
120
|
// We use dynamic imports so only the needed module is loaded.
|
|
117
121
|
// Each module exports a cliMain() function that the router calls explicitly,
|
|
@@ -179,6 +179,77 @@ function normalizeContent(rawContent: unknown): Array<Record<string, unknown>> {
|
|
|
179
179
|
return [];
|
|
180
180
|
}
|
|
181
181
|
|
|
182
|
+
function normalizeTimestampMs(rawValue: unknown): number {
|
|
183
|
+
if (typeof rawValue !== "number" || !Number.isFinite(rawValue)) {
|
|
184
|
+
return Date.now();
|
|
185
|
+
}
|
|
186
|
+
if (rawValue > 1e12) return rawValue;
|
|
187
|
+
if (rawValue > 1e9) return rawValue * 1000;
|
|
188
|
+
return rawValue;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function getTableColumns(db: Database, tableName: string): Set<string> {
|
|
192
|
+
const safeTableName = assertSafeIdentifier(tableName);
|
|
193
|
+
const rows = db.query(`PRAGMA table_info(${safeTableName})`).all() as Array<{
|
|
194
|
+
name: string;
|
|
195
|
+
}>;
|
|
196
|
+
return new Set(rows.map((row) => row.name));
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
function pickColumn(columns: Set<string>, candidates: string[]): string | null {
|
|
200
|
+
for (const candidate of candidates) {
|
|
201
|
+
if (columns.has(candidate)) return candidate;
|
|
202
|
+
}
|
|
203
|
+
return null;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
function parseMessagePayload(rawValue: unknown): Record<string, unknown> | null {
|
|
207
|
+
if (typeof rawValue === "string") {
|
|
208
|
+
try {
|
|
209
|
+
const parsed = JSON.parse(rawValue) as unknown;
|
|
210
|
+
return typeof parsed === "object" && parsed !== null
|
|
211
|
+
? (parsed as Record<string, unknown>)
|
|
212
|
+
: null;
|
|
213
|
+
} catch {
|
|
214
|
+
return null;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
return typeof rawValue === "object" && rawValue !== null
|
|
218
|
+
? (rawValue as Record<string, unknown>)
|
|
219
|
+
: null;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
function extractMessageRole(
|
|
223
|
+
row: Record<string, unknown>,
|
|
224
|
+
payload: Record<string, unknown> | null,
|
|
225
|
+
): string {
|
|
226
|
+
const rowRole = row.role;
|
|
227
|
+
if (typeof rowRole === "string" && rowRole.trim()) return rowRole;
|
|
228
|
+
const payloadRole = payload?.role;
|
|
229
|
+
return typeof payloadRole === "string" ? payloadRole : "";
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
function extractMessageBlocks(
|
|
233
|
+
row: Record<string, unknown>,
|
|
234
|
+
payload: Record<string, unknown> | null,
|
|
235
|
+
): Array<Record<string, unknown>> {
|
|
236
|
+
const directBlocks = normalizeContent(row.content);
|
|
237
|
+
if (directBlocks.length > 0) return directBlocks;
|
|
238
|
+
|
|
239
|
+
const payloadBlocks = normalizeContent(payload?.content);
|
|
240
|
+
if (payloadBlocks.length > 0) return payloadBlocks;
|
|
241
|
+
|
|
242
|
+
const summary = payload?.summary;
|
|
243
|
+
if (typeof summary === "object" && summary !== null) {
|
|
244
|
+
const title = (summary as Record<string, unknown>).title;
|
|
245
|
+
if (typeof title === "string" && title.trim()) {
|
|
246
|
+
return [{ type: "text", text: title.trim() }];
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
return [];
|
|
251
|
+
}
|
|
252
|
+
|
|
182
253
|
/**
|
|
183
254
|
* Read OpenCode sessions from SQLite database.
|
|
184
255
|
*/
|
|
@@ -207,19 +278,40 @@ export function readSessionsFromSqlite(
|
|
|
207
278
|
|
|
208
279
|
const safeSessionsTable = assertSafeIdentifier(sessionsTable);
|
|
209
280
|
const safeMessagesTable = assertSafeIdentifier(messagesTable);
|
|
281
|
+
const sessionColumns = getTableColumns(db, safeSessionsTable);
|
|
282
|
+
const messageColumns = getTableColumns(db, safeMessagesTable);
|
|
283
|
+
const sessionTimeColumn = pickColumn(sessionColumns, [
|
|
284
|
+
"created",
|
|
285
|
+
"time_created",
|
|
286
|
+
"createdAt",
|
|
287
|
+
"timeCreated",
|
|
288
|
+
"updated",
|
|
289
|
+
"time_updated",
|
|
290
|
+
]);
|
|
291
|
+
const messageTimeColumn = pickColumn(messageColumns, [
|
|
292
|
+
"created",
|
|
293
|
+
"time_created",
|
|
294
|
+
"createdAt",
|
|
295
|
+
"timeCreated",
|
|
296
|
+
"updated",
|
|
297
|
+
"time_updated",
|
|
298
|
+
]);
|
|
210
299
|
|
|
211
300
|
// Get sessions
|
|
212
301
|
let whereClause = "";
|
|
213
302
|
const queryParams: number[] = [];
|
|
214
|
-
if (sinceTs) {
|
|
215
|
-
whereClause =
|
|
303
|
+
if (sinceTs && sessionTimeColumn) {
|
|
304
|
+
whereClause = `WHERE ${assertSafeIdentifier(sessionTimeColumn)} > ?`;
|
|
216
305
|
queryParams.push(Math.floor(sinceTs * 1000));
|
|
217
306
|
}
|
|
307
|
+
const orderBySessionColumn = sessionTimeColumn ? assertSafeIdentifier(sessionTimeColumn) : "id";
|
|
218
308
|
|
|
219
309
|
let sessionRows: Array<Record<string, unknown>>;
|
|
220
310
|
try {
|
|
221
311
|
sessionRows = db
|
|
222
|
-
.query(
|
|
312
|
+
.query(
|
|
313
|
+
`SELECT * FROM ${safeSessionsTable} ${whereClause} ORDER BY ${orderBySessionColumn} ASC`,
|
|
314
|
+
)
|
|
223
315
|
.all(...queryParams) as Array<Record<string, unknown>>;
|
|
224
316
|
} catch (e) {
|
|
225
317
|
console.warn(`[WARN] Could not query sessions: ${e}`);
|
|
@@ -231,14 +323,19 @@ export function readSessionsFromSqlite(
|
|
|
231
323
|
|
|
232
324
|
for (const sessionRow of sessionRows) {
|
|
233
325
|
const sessionId = String(sessionRow.id);
|
|
234
|
-
const createdMs =
|
|
326
|
+
const createdMs = normalizeTimestampMs(
|
|
327
|
+
sessionTimeColumn ? sessionRow[sessionTimeColumn] : Date.now(),
|
|
328
|
+
);
|
|
235
329
|
const timestamp = new Date(createdMs).toISOString();
|
|
236
330
|
|
|
237
331
|
// Get messages for this session
|
|
238
332
|
let msgRows: Array<Record<string, unknown>>;
|
|
239
333
|
try {
|
|
334
|
+
const orderByMessageColumn = messageTimeColumn
|
|
335
|
+
? ` ORDER BY ${assertSafeIdentifier(messageTimeColumn)} ASC`
|
|
336
|
+
: "";
|
|
240
337
|
msgRows = db
|
|
241
|
-
.query(`SELECT * FROM ${safeMessagesTable} WHERE session_id =
|
|
338
|
+
.query(`SELECT * FROM ${safeMessagesTable} WHERE session_id = ?${orderByMessageColumn}`)
|
|
242
339
|
.all(String(sessionRow.id)) as Array<Record<string, unknown>>;
|
|
243
340
|
} catch {
|
|
244
341
|
continue;
|
|
@@ -250,6 +347,7 @@ export function readSessionsFromSqlite(
|
|
|
250
347
|
const skillDetections = new Map<string, TriggeredSkillDetection>();
|
|
251
348
|
let errors = 0;
|
|
252
349
|
let assistantTurns = 0;
|
|
350
|
+
let cwd = typeof sessionRow.directory === "string" ? sessionRow.directory : "";
|
|
253
351
|
|
|
254
352
|
const noteSkillDetection = (skillName: string, hasSkillMdRead: boolean): void => {
|
|
255
353
|
const normalizedSkillName = skillName.trim();
|
|
@@ -266,8 +364,16 @@ export function readSessionsFromSqlite(
|
|
|
266
364
|
};
|
|
267
365
|
|
|
268
366
|
for (const msg of msgRows) {
|
|
269
|
-
const
|
|
270
|
-
const
|
|
367
|
+
const payload = parseMessagePayload(msg.data);
|
|
368
|
+
const role = extractMessageRole(msg, payload);
|
|
369
|
+
const blocks = extractMessageBlocks(msg, payload);
|
|
370
|
+
const payloadPath =
|
|
371
|
+
payload && typeof payload.path === "object" && payload.path !== null
|
|
372
|
+
? (payload.path as Record<string, unknown>)
|
|
373
|
+
: null;
|
|
374
|
+
if (!cwd && payloadPath && typeof payloadPath.cwd === "string") {
|
|
375
|
+
cwd = payloadPath.cwd;
|
|
376
|
+
}
|
|
271
377
|
|
|
272
378
|
if (role === "user") {
|
|
273
379
|
if (!firstUserQuery) {
|
|
@@ -291,6 +397,9 @@ export function readSessionsFromSqlite(
|
|
|
291
397
|
}
|
|
292
398
|
} else if (role === "assistant") {
|
|
293
399
|
assistantTurns += 1;
|
|
400
|
+
if (payload?.error) {
|
|
401
|
+
errors += 1;
|
|
402
|
+
}
|
|
294
403
|
for (const block of blocks) {
|
|
295
404
|
const blockType = (block.type as string) ?? "";
|
|
296
405
|
|
|
@@ -350,7 +459,7 @@ export function readSessionsFromSqlite(
|
|
|
350
459
|
session_id: sessionId,
|
|
351
460
|
source: "opencode",
|
|
352
461
|
transcript_path: dbPath,
|
|
353
|
-
cwd
|
|
462
|
+
cwd,
|
|
354
463
|
last_user_query: firstUserQuery,
|
|
355
464
|
query: firstUserQuery,
|
|
356
465
|
tool_calls: toolCalls,
|
|
@@ -245,6 +245,33 @@ CREATE TABLE IF NOT EXISTS grading_baselines (
|
|
|
245
245
|
grading_results_json TEXT
|
|
246
246
|
)`;
|
|
247
247
|
|
|
248
|
+
// -- Creator-loop artifact tables --------------------------------------------
|
|
249
|
+
|
|
250
|
+
export const CREATE_CANONICAL_EVAL_SETS = `
|
|
251
|
+
CREATE TABLE IF NOT EXISTS canonical_eval_sets (
|
|
252
|
+
skill_name TEXT PRIMARY KEY,
|
|
253
|
+
stored_at TEXT NOT NULL,
|
|
254
|
+
eval_set_json TEXT NOT NULL
|
|
255
|
+
)`;
|
|
256
|
+
|
|
257
|
+
export const CREATE_UNIT_TEST_FILES = `
|
|
258
|
+
CREATE TABLE IF NOT EXISTS unit_test_files (
|
|
259
|
+
skill_name TEXT PRIMARY KEY,
|
|
260
|
+
stored_at TEXT NOT NULL,
|
|
261
|
+
tests_json TEXT NOT NULL
|
|
262
|
+
)`;
|
|
263
|
+
|
|
264
|
+
export const CREATE_UNIT_TEST_RUN_RESULTS = `
|
|
265
|
+
CREATE TABLE IF NOT EXISTS unit_test_run_results (
|
|
266
|
+
skill_name TEXT PRIMARY KEY,
|
|
267
|
+
run_at TEXT NOT NULL,
|
|
268
|
+
total INTEGER NOT NULL,
|
|
269
|
+
passed INTEGER NOT NULL,
|
|
270
|
+
failed INTEGER NOT NULL,
|
|
271
|
+
pass_rate REAL NOT NULL,
|
|
272
|
+
result_json TEXT NOT NULL
|
|
273
|
+
)`;
|
|
274
|
+
|
|
248
275
|
// -- Improvement signal table (from signal_log.jsonl) ------------------------
|
|
249
276
|
|
|
250
277
|
export const CREATE_IMPROVEMENT_SIGNALS = `
|
|
@@ -388,6 +415,10 @@ export const CREATE_INDEXES = [
|
|
|
388
415
|
`CREATE INDEX IF NOT EXISTS idx_grading_bl_proposal ON grading_baselines(proposal_id)`,
|
|
389
416
|
`CREATE INDEX IF NOT EXISTS idx_grading_bl_ts ON grading_baselines(measured_at)`,
|
|
390
417
|
`CREATE INDEX IF NOT EXISTS idx_grading_bl_skill_proposal ON grading_baselines(skill_name, proposal_id, measured_at)`,
|
|
418
|
+
// -- Creator-loop artifact indexes -----------------------------------------
|
|
419
|
+
`CREATE INDEX IF NOT EXISTS idx_canonical_eval_sets_stored_at ON canonical_eval_sets(stored_at)`,
|
|
420
|
+
`CREATE INDEX IF NOT EXISTS idx_unit_test_files_stored_at ON unit_test_files(stored_at)`,
|
|
421
|
+
`CREATE INDEX IF NOT EXISTS idx_unit_test_run_results_run_at ON unit_test_run_results(run_at)`,
|
|
391
422
|
// -- Improvement signal indexes ---------------------------------------------
|
|
392
423
|
`CREATE INDEX IF NOT EXISTS idx_signals_session ON improvement_signals(session_id)`,
|
|
393
424
|
`CREATE INDEX IF NOT EXISTS idx_signals_consumed ON improvement_signals(consumed)`,
|
|
@@ -506,6 +537,9 @@ export const ALL_DDL = [
|
|
|
506
537
|
CREATE_QUERIES,
|
|
507
538
|
CREATE_GRADING_RESULTS,
|
|
508
539
|
CREATE_GRADING_BASELINES,
|
|
540
|
+
CREATE_CANONICAL_EVAL_SETS,
|
|
541
|
+
CREATE_UNIT_TEST_FILES,
|
|
542
|
+
CREATE_UNIT_TEST_RUN_RESULTS,
|
|
509
543
|
CREATE_IMPROVEMENT_SIGNALS,
|
|
510
544
|
CREATE_UPLOAD_QUEUE,
|
|
511
545
|
CREATE_CREATOR_CONTRIBUTION_STAGING,
|