selftune 0.2.13 → 0.2.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +2 -0
- package/apps/local-dashboard/dist/assets/index-DIrdlu2_.js +16 -0
- package/apps/local-dashboard/dist/index.html +2 -2
- package/cli/selftune/activation-rules.ts +24 -48
- package/cli/selftune/constants.ts +7 -0
- package/cli/selftune/contribute/bundle.ts +9 -44
- package/cli/selftune/dashboard-contract.ts +12 -0
- package/cli/selftune/eval/hooks-to-evals.ts +5 -22
- package/cli/selftune/grading/auto-grade.ts +3 -13
- package/cli/selftune/grading/grade-session.ts +3 -13
- package/cli/selftune/hooks/evolution-guard.ts +14 -24
- package/cli/selftune/hooks/prompt-log.ts +0 -8
- package/cli/selftune/hooks/session-stop.ts +0 -8
- package/cli/selftune/ingestors/codex-rollout.ts +9 -4
- package/cli/selftune/ingestors/codex-wrapper.ts +15 -13
- package/cli/selftune/ingestors/openclaw-ingest.ts +24 -5
- package/cli/selftune/ingestors/opencode-ingest.ts +9 -4
- package/cli/selftune/localdb/queries.ts +57 -0
- package/cli/selftune/monitoring/watch.ts +7 -22
- package/cli/selftune/normalization.ts +2 -23
- package/cli/selftune/orchestrate.ts +213 -14
- package/cli/selftune/schedule.ts +51 -5
- package/cli/selftune/utils/jsonl.ts +2 -0
- package/package.json +3 -1
- package/packages/ui/src/components/RecentActivityFeed.tsx +86 -0
- package/packages/ui/src/components/index.ts +1 -0
- package/packages/ui/src/components/section-cards.tsx +13 -0
- package/skill/SKILL.md +1 -1
- package/skill/Workflows/Orchestrate.md +11 -7
- package/skill/Workflows/Schedule.md +11 -0
- package/skill/references/logs.md +22 -21
- package/apps/local-dashboard/dist/assets/index-4_dAY17K.js +0 -16
- package/apps/local-dashboard/dist/assets/index-BxV5WZHc.css +0 -2
|
@@ -34,6 +34,11 @@ import {
|
|
|
34
34
|
SKILL_LOG,
|
|
35
35
|
TELEMETRY_LOG,
|
|
36
36
|
} from "../constants.js";
|
|
37
|
+
import {
|
|
38
|
+
writeQueryToDb,
|
|
39
|
+
writeSessionTelemetryToDb,
|
|
40
|
+
writeSkillUsageToDb,
|
|
41
|
+
} from "../localdb/direct-write.js";
|
|
37
42
|
import {
|
|
38
43
|
appendCanonicalRecords,
|
|
39
44
|
buildCanonicalExecutionFact,
|
|
@@ -46,7 +51,7 @@ import {
|
|
|
46
51
|
deriveSkillInvocationId,
|
|
47
52
|
} from "../normalization.js";
|
|
48
53
|
import type { CanonicalRecord, QueryLogRecord, SkillUsageRecord } from "../types.js";
|
|
49
|
-
import {
|
|
54
|
+
import { loadMarker, saveMarker } from "../utils/jsonl.js";
|
|
50
55
|
|
|
51
56
|
export interface SessionFile {
|
|
52
57
|
agentId: string;
|
|
@@ -389,11 +394,25 @@ export function writeSession(
|
|
|
389
394
|
query: prompt,
|
|
390
395
|
source: session.source,
|
|
391
396
|
};
|
|
392
|
-
|
|
397
|
+
writeQueryToDb(queryRecord);
|
|
393
398
|
}
|
|
394
399
|
|
|
395
|
-
|
|
396
|
-
|
|
400
|
+
// Build a SessionTelemetryRecord-shaped object for SQLite
|
|
401
|
+
writeSessionTelemetryToDb({
|
|
402
|
+
timestamp: session.timestamp,
|
|
403
|
+
session_id: session.session_id,
|
|
404
|
+
cwd: session.cwd,
|
|
405
|
+
transcript_path: session.transcript_path,
|
|
406
|
+
tool_calls: session.tool_calls,
|
|
407
|
+
total_tool_calls: session.total_tool_calls,
|
|
408
|
+
bash_commands: session.bash_commands,
|
|
409
|
+
skills_triggered: session.skills_triggered,
|
|
410
|
+
assistant_turns: session.assistant_turns,
|
|
411
|
+
errors_encountered: session.errors_encountered,
|
|
412
|
+
transcript_chars: session.transcript_chars,
|
|
413
|
+
last_user_query: session.last_user_query,
|
|
414
|
+
source: session.source,
|
|
415
|
+
});
|
|
397
416
|
|
|
398
417
|
for (const skillName of skills) {
|
|
399
418
|
const skillRecord: SkillUsageRecord = {
|
|
@@ -405,7 +424,7 @@ export function writeSession(
|
|
|
405
424
|
triggered: true,
|
|
406
425
|
source: session.source,
|
|
407
426
|
};
|
|
408
|
-
|
|
427
|
+
writeSkillUsageToDb(skillRecord);
|
|
409
428
|
}
|
|
410
429
|
|
|
411
430
|
// --- Canonical normalization records (additive) ---
|
|
@@ -27,6 +27,11 @@ import { basename, join } from "node:path";
|
|
|
27
27
|
import { parseArgs } from "node:util";
|
|
28
28
|
|
|
29
29
|
import { CANONICAL_LOG, QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "../constants.js";
|
|
30
|
+
import {
|
|
31
|
+
writeQueryToDb,
|
|
32
|
+
writeSessionTelemetryToDb,
|
|
33
|
+
writeSkillUsageToDb,
|
|
34
|
+
} from "../localdb/direct-write.js";
|
|
30
35
|
import {
|
|
31
36
|
appendCanonicalRecords,
|
|
32
37
|
buildCanonicalExecutionFact,
|
|
@@ -44,7 +49,7 @@ import type {
|
|
|
44
49
|
SessionTelemetryRecord,
|
|
45
50
|
SkillUsageRecord,
|
|
46
51
|
} from "../types.js";
|
|
47
|
-
import {
|
|
52
|
+
import { loadMarker, saveMarker } from "../utils/jsonl.js";
|
|
48
53
|
|
|
49
54
|
const XDG_DATA_HOME = process.env.XDG_DATA_HOME ?? join(homedir(), ".local", "share");
|
|
50
55
|
const DEFAULT_DATA_DIR = join(XDG_DATA_HOME, "opencode");
|
|
@@ -528,7 +533,7 @@ export function writeSession(
|
|
|
528
533
|
query: prompt,
|
|
529
534
|
source: session.source,
|
|
530
535
|
};
|
|
531
|
-
|
|
536
|
+
writeQueryToDb(queryRecord);
|
|
532
537
|
}
|
|
533
538
|
|
|
534
539
|
const telemetry: SessionTelemetryRecord = {
|
|
@@ -546,7 +551,7 @@ export function writeSession(
|
|
|
546
551
|
last_user_query: session.last_user_query,
|
|
547
552
|
source: session.source,
|
|
548
553
|
};
|
|
549
|
-
|
|
554
|
+
writeSessionTelemetryToDb(telemetry);
|
|
550
555
|
|
|
551
556
|
for (const skillName of skills) {
|
|
552
557
|
const skillRecord: SkillUsageRecord = {
|
|
@@ -558,7 +563,7 @@ export function writeSession(
|
|
|
558
563
|
triggered: true,
|
|
559
564
|
source: session.source,
|
|
560
565
|
};
|
|
561
|
-
|
|
566
|
+
writeSkillUsageToDb(skillRecord);
|
|
562
567
|
}
|
|
563
568
|
|
|
564
569
|
// --- Canonical normalization records (additive) ---
|
|
@@ -11,6 +11,7 @@ import type {
|
|
|
11
11
|
OrchestrateRunReport,
|
|
12
12
|
OverviewPayload,
|
|
13
13
|
PendingProposal,
|
|
14
|
+
RecentActivityItem,
|
|
14
15
|
SkillReportPayload,
|
|
15
16
|
SkillSummary,
|
|
16
17
|
} from "../dashboard-contract.js";
|
|
@@ -126,6 +127,10 @@ export function getOverviewPayload(db: Database): OverviewPayload {
|
|
|
126
127
|
// Pending proposals: created/validated but no terminal action (deduped in SQL)
|
|
127
128
|
const pending_proposals = getPendingProposals(db);
|
|
128
129
|
|
|
130
|
+
// Active sessions and recent activity
|
|
131
|
+
const active_sessions = getActiveSessionCount(db);
|
|
132
|
+
const recent_activity = getRecentActivity(db);
|
|
133
|
+
|
|
129
134
|
return {
|
|
130
135
|
telemetry,
|
|
131
136
|
skills,
|
|
@@ -133,6 +138,8 @@ export function getOverviewPayload(db: Database): OverviewPayload {
|
|
|
133
138
|
counts,
|
|
134
139
|
unmatched_queries: unmatchedRows,
|
|
135
140
|
pending_proposals,
|
|
141
|
+
active_sessions,
|
|
142
|
+
recent_activity,
|
|
136
143
|
};
|
|
137
144
|
}
|
|
138
145
|
|
|
@@ -361,6 +368,56 @@ export function getOrchestrateRuns(db: Database, limit = 20): OrchestrateRunRepo
|
|
|
361
368
|
}));
|
|
362
369
|
}
|
|
363
370
|
|
|
371
|
+
/**
|
|
372
|
+
* Count sessions that have queries recorded but no session_telemetry yet
|
|
373
|
+
* (i.e., the session is still in progress).
|
|
374
|
+
*/
|
|
375
|
+
export function getActiveSessionCount(db: Database): number {
|
|
376
|
+
const row = db
|
|
377
|
+
.query(
|
|
378
|
+
`SELECT COUNT(DISTINCT q.session_id) as count
|
|
379
|
+
FROM queries q
|
|
380
|
+
WHERE NOT EXISTS (
|
|
381
|
+
SELECT 1 FROM session_telemetry st WHERE st.session_id = q.session_id
|
|
382
|
+
)`,
|
|
383
|
+
)
|
|
384
|
+
.get() as { count: number };
|
|
385
|
+
return row.count;
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
/**
|
|
389
|
+
* Get the most recent skill invocations with a flag indicating whether the
|
|
390
|
+
* session is still in progress (no session_telemetry row yet).
|
|
391
|
+
*/
|
|
392
|
+
export function getRecentActivity(db: Database, limit = 20): RecentActivityItem[] {
|
|
393
|
+
const rows = db
|
|
394
|
+
.query(
|
|
395
|
+
`SELECT si.occurred_at, si.session_id, si.skill_name, si.query, si.triggered,
|
|
396
|
+
CASE WHEN st.session_id IS NULL THEN 1 ELSE 0 END as is_live
|
|
397
|
+
FROM skill_invocations si
|
|
398
|
+
LEFT JOIN session_telemetry st ON si.session_id = st.session_id
|
|
399
|
+
ORDER BY si.occurred_at DESC
|
|
400
|
+
LIMIT ?`,
|
|
401
|
+
)
|
|
402
|
+
.all(limit) as Array<{
|
|
403
|
+
occurred_at: string;
|
|
404
|
+
session_id: string;
|
|
405
|
+
skill_name: string;
|
|
406
|
+
query: string;
|
|
407
|
+
triggered: number;
|
|
408
|
+
is_live: number;
|
|
409
|
+
}>;
|
|
410
|
+
|
|
411
|
+
return rows.map((row) => ({
|
|
412
|
+
timestamp: row.occurred_at,
|
|
413
|
+
session_id: row.session_id,
|
|
414
|
+
skill_name: row.skill_name,
|
|
415
|
+
query: row.query ?? "",
|
|
416
|
+
triggered: row.triggered === 1,
|
|
417
|
+
is_live: row.is_live === 1,
|
|
418
|
+
}));
|
|
419
|
+
}
|
|
420
|
+
|
|
364
421
|
// -- Generic read queries (Phase 3: replace readJsonl calls) ------------------
|
|
365
422
|
|
|
366
423
|
/**
|
|
@@ -26,7 +26,6 @@ import type {
|
|
|
26
26
|
SessionTelemetryRecord,
|
|
27
27
|
SkillUsageRecord,
|
|
28
28
|
} from "../types.js";
|
|
29
|
-
import { readJsonl } from "../utils/jsonl.js";
|
|
30
29
|
import {
|
|
31
30
|
filterActionableQueryRecords,
|
|
32
31
|
filterActionableSkillUsageRecords,
|
|
@@ -212,27 +211,13 @@ export async function watch(options: WatchOptions): Promise<WatchResult> {
|
|
|
212
211
|
);
|
|
213
212
|
}
|
|
214
213
|
|
|
215
|
-
// 1. Read log files from SQLite
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
_queryLogPath === QUERY_LOG
|
|
223
|
-
) {
|
|
224
|
-
const db = getDb();
|
|
225
|
-
telemetry = querySessionTelemetry(db) as SessionTelemetryRecord[];
|
|
226
|
-
// SQLite queries return DESC order; computeMonitoringSnapshot expects chronological (ASC)
|
|
227
|
-
telemetry.sort((a, b) => a.timestamp.localeCompare(b.timestamp));
|
|
228
|
-
skillRecords = querySkillUsageRecords(db) as SkillUsageRecord[];
|
|
229
|
-
queryRecords = queryQueryLog(db) as QueryLogRecord[];
|
|
230
|
-
} else {
|
|
231
|
-
// Intentional JSONL fallback: custom log path overrides bypass SQLite reads
|
|
232
|
-
telemetry = readJsonl<SessionTelemetryRecord>(_telemetryLogPath);
|
|
233
|
-
skillRecords = readJsonl<SkillUsageRecord>(_skillLogPath);
|
|
234
|
-
queryRecords = readJsonl<QueryLogRecord>(_queryLogPath);
|
|
235
|
-
}
|
|
214
|
+
// 1. Read log files from SQLite
|
|
215
|
+
const db = getDb();
|
|
216
|
+
const telemetry = querySessionTelemetry(db) as SessionTelemetryRecord[];
|
|
217
|
+
// SQLite queries return DESC order; computeMonitoringSnapshot expects chronological (ASC)
|
|
218
|
+
telemetry.sort((a, b) => a.timestamp.localeCompare(b.timestamp));
|
|
219
|
+
const skillRecords = querySkillUsageRecords(db) as SkillUsageRecord[];
|
|
220
|
+
const queryRecords = queryQueryLog(db) as QueryLogRecord[];
|
|
236
221
|
|
|
237
222
|
// 2. Determine baseline pass rate from last deployed audit entry
|
|
238
223
|
const lastDeployed = getLastDeployedProposal(skillName, _auditLogPath);
|
|
@@ -14,7 +14,6 @@
|
|
|
14
14
|
|
|
15
15
|
import { createHash } from "node:crypto";
|
|
16
16
|
import {
|
|
17
|
-
appendFileSync,
|
|
18
17
|
existsSync,
|
|
19
18
|
mkdirSync,
|
|
20
19
|
readFileSync,
|
|
@@ -388,32 +387,12 @@ export function getLatestPromptIdentity(
|
|
|
388
387
|
};
|
|
389
388
|
}
|
|
390
389
|
|
|
391
|
-
export function appendCanonicalRecord(record: CanonicalRecord,
|
|
390
|
+
export function appendCanonicalRecord(record: CanonicalRecord, _logPath?: string): void {
|
|
392
391
|
writeCanonicalToDb(record);
|
|
393
|
-
// JSONL append — best-effort backup for prompt state recovery
|
|
394
|
-
try {
|
|
395
|
-
const path = logPath ?? CANONICAL_LOG;
|
|
396
|
-
const dir = dirname(path);
|
|
397
|
-
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
398
|
-
appendFileSync(path, `${JSON.stringify(record)}\n`, "utf-8");
|
|
399
|
-
} catch {
|
|
400
|
-
/* best-effort only */
|
|
401
|
-
}
|
|
402
392
|
}
|
|
403
393
|
|
|
404
|
-
export function appendCanonicalRecords(records: CanonicalRecord[],
|
|
394
|
+
export function appendCanonicalRecords(records: CanonicalRecord[], _logPath?: string): void {
|
|
405
395
|
writeCanonicalBatchToDb(records);
|
|
406
|
-
// JSONL append — best-effort backup for prompt state recovery
|
|
407
|
-
try {
|
|
408
|
-
const path = logPath ?? CANONICAL_LOG;
|
|
409
|
-
const dir = dirname(path);
|
|
410
|
-
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
411
|
-
for (const record of records) {
|
|
412
|
-
appendFileSync(path, `${JSON.stringify(record)}\n`, "utf-8");
|
|
413
|
-
}
|
|
414
|
-
} catch {
|
|
415
|
-
/* best-effort only */
|
|
416
|
-
}
|
|
417
396
|
}
|
|
418
397
|
|
|
419
398
|
// ---------------------------------------------------------------------------
|
|
@@ -9,9 +9,9 @@
|
|
|
9
9
|
* explicit dry-run and review-required modes for human-in-the-loop operation.
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
|
-
import { existsSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
|
|
12
|
+
import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
|
|
13
13
|
import { homedir } from "node:os";
|
|
14
|
-
import { join } from "node:path";
|
|
14
|
+
import { dirname, join } from "node:path";
|
|
15
15
|
import { parseArgs } from "node:util";
|
|
16
16
|
|
|
17
17
|
import { readAlphaIdentity } from "./alpha-identity.js";
|
|
@@ -19,9 +19,19 @@ import type { UploadCycleSummary } from "./alpha-upload/index.js";
|
|
|
19
19
|
import { ORCHESTRATE_LOCK, SELFTUNE_CONFIG_PATH } from "./constants.js";
|
|
20
20
|
import type { OrchestrateRunReport, OrchestrateRunSkillAction } from "./dashboard-contract.js";
|
|
21
21
|
import type { EvolveResult } from "./evolution/evolve.js";
|
|
22
|
+
import {
|
|
23
|
+
buildDefaultGradingOutputPath,
|
|
24
|
+
deriveExpectationsFromSkill,
|
|
25
|
+
gradeSession,
|
|
26
|
+
resolveLatestSessionForSkill,
|
|
27
|
+
} from "./grading/grade-session.js";
|
|
22
28
|
import { readGradingResultsForSkill } from "./grading/results.js";
|
|
23
29
|
import { getDb } from "./localdb/db.js";
|
|
24
|
-
import {
|
|
30
|
+
import {
|
|
31
|
+
updateSignalConsumed,
|
|
32
|
+
writeGradingResultToDb,
|
|
33
|
+
writeOrchestrateRunToDb,
|
|
34
|
+
} from "./localdb/direct-write.js";
|
|
25
35
|
import {
|
|
26
36
|
queryEvolutionAudit,
|
|
27
37
|
queryImprovementSignals,
|
|
@@ -50,6 +60,7 @@ import {
|
|
|
50
60
|
findRepositoryClaudeSkillDirs,
|
|
51
61
|
findRepositorySkillDirs,
|
|
52
62
|
} from "./utils/skill-discovery.js";
|
|
63
|
+
import { readExcerpt } from "./utils/transcript.js";
|
|
53
64
|
|
|
54
65
|
// ---------------------------------------------------------------------------
|
|
55
66
|
// Lockfile management
|
|
@@ -156,6 +167,8 @@ export interface OrchestrateOptions {
|
|
|
156
167
|
recentWindowHours: number;
|
|
157
168
|
/** Force sync to rescan all sources. */
|
|
158
169
|
syncForce: boolean;
|
|
170
|
+
/** Max ungraded skills to auto-grade per run (default: 5). Set 0 to disable. */
|
|
171
|
+
maxAutoGrade: number;
|
|
159
172
|
}
|
|
160
173
|
|
|
161
174
|
export interface SkillAction {
|
|
@@ -178,6 +191,7 @@ export interface OrchestrateResult {
|
|
|
178
191
|
deployed: number;
|
|
179
192
|
watched: number;
|
|
180
193
|
skipped: number;
|
|
194
|
+
autoGraded: number;
|
|
181
195
|
dryRun: boolean;
|
|
182
196
|
approvalMode: "auto" | "review";
|
|
183
197
|
elapsedMs: number;
|
|
@@ -335,6 +349,7 @@ export function formatOrchestrateReport(result: OrchestrateResult): string {
|
|
|
335
349
|
|
|
336
350
|
// Final summary
|
|
337
351
|
lines.push("Summary");
|
|
352
|
+
lines.push(` Auto-graded: ${result.summary.autoGraded}`);
|
|
338
353
|
lines.push(` Evaluated: ${result.summary.evaluated} skills`);
|
|
339
354
|
lines.push(` Deployed: ${result.summary.deployed}`);
|
|
340
355
|
lines.push(` Watched: ${result.summary.watched}`);
|
|
@@ -620,6 +635,111 @@ function findRecentlyDeployedSkills(
|
|
|
620
635
|
return names;
|
|
621
636
|
}
|
|
622
637
|
|
|
638
|
+
// ---------------------------------------------------------------------------
|
|
639
|
+
// Auto-grade ungraded skills
|
|
640
|
+
// ---------------------------------------------------------------------------
|
|
641
|
+
|
|
642
|
+
/**
|
|
643
|
+
* Auto-grade the top ungraded skills that have some session data.
|
|
644
|
+
* Fail-open: individual grading errors are logged but never propagated.
|
|
645
|
+
*
|
|
646
|
+
* @returns Number of skills successfully graded.
|
|
647
|
+
*/
|
|
648
|
+
export async function autoGradeTopUngraded(
|
|
649
|
+
skills: SkillStatus[],
|
|
650
|
+
maxAutoGrade: number,
|
|
651
|
+
agent: string,
|
|
652
|
+
deps: {
|
|
653
|
+
readTelemetry: () => SessionTelemetryRecord[];
|
|
654
|
+
readSkillRecords: () => SkillUsageRecord[];
|
|
655
|
+
},
|
|
656
|
+
): Promise<number> {
|
|
657
|
+
// Filter: UNGRADED skills with some data (skill_checks > 0)
|
|
658
|
+
const ungradedWithData = skills
|
|
659
|
+
.filter((s) => s.status === "UNGRADED" && (s.snapshot?.skill_checks ?? 0) > 0)
|
|
660
|
+
.sort((a, b) => (b.snapshot?.skill_checks ?? 0) - (a.snapshot?.skill_checks ?? 0))
|
|
661
|
+
.slice(0, maxAutoGrade);
|
|
662
|
+
|
|
663
|
+
if (ungradedWithData.length === 0) return 0;
|
|
664
|
+
|
|
665
|
+
let graded = 0;
|
|
666
|
+
|
|
667
|
+
for (const skill of ungradedWithData) {
|
|
668
|
+
try {
|
|
669
|
+
const telemetry = deps.readTelemetry();
|
|
670
|
+
const skillUsage = deps.readSkillRecords();
|
|
671
|
+
|
|
672
|
+
// Resolve the latest session for this skill
|
|
673
|
+
const resolved = resolveLatestSessionForSkill(telemetry, skillUsage, skill.name);
|
|
674
|
+
if (!resolved) {
|
|
675
|
+
console.error(` [auto-grade] ${skill.name}: no session found, skipping`);
|
|
676
|
+
continue;
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
// Derive expectations from SKILL.md
|
|
680
|
+
const derived = deriveExpectationsFromSkill(skill.name);
|
|
681
|
+
let transcriptExcerpt = "(no transcript)";
|
|
682
|
+
if (resolved.transcriptPath) {
|
|
683
|
+
try {
|
|
684
|
+
transcriptExcerpt = readExcerpt(resolved.transcriptPath);
|
|
685
|
+
} catch {
|
|
686
|
+
transcriptExcerpt = "(no transcript)";
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
console.error(` [auto-grade] Grading "${skill.name}" (session ${resolved.sessionId})...`);
|
|
691
|
+
|
|
692
|
+
const result = await gradeSession({
|
|
693
|
+
expectations: derived.expectations,
|
|
694
|
+
telemetry: resolved.telemetry,
|
|
695
|
+
sessionId: resolved.sessionId,
|
|
696
|
+
skillName: skill.name,
|
|
697
|
+
transcriptExcerpt,
|
|
698
|
+
transcriptPath: resolved.transcriptPath,
|
|
699
|
+
agent,
|
|
700
|
+
});
|
|
701
|
+
|
|
702
|
+
// Persist to SQLite — only count as graded if DB write succeeds
|
|
703
|
+
let persisted = false;
|
|
704
|
+
try {
|
|
705
|
+
persisted = writeGradingResultToDb(result);
|
|
706
|
+
} catch {
|
|
707
|
+
persisted = false;
|
|
708
|
+
}
|
|
709
|
+
if (!persisted) {
|
|
710
|
+
console.error(` [auto-grade] ${skill.name}: graded but failed to persist result`);
|
|
711
|
+
continue;
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
// Persist to file (fail-open, supplementary)
|
|
715
|
+
try {
|
|
716
|
+
const basePath = buildDefaultGradingOutputPath(resolved.sessionId);
|
|
717
|
+
const safeName = skill.name.replace(/[^a-zA-Z0-9_-]/g, "_");
|
|
718
|
+
const outputPath = basePath.replace(/\.json$/, `_${safeName}.json`);
|
|
719
|
+
const outputDir = dirname(outputPath);
|
|
720
|
+
mkdirSync(outputDir, { recursive: true });
|
|
721
|
+
writeFileSync(outputPath, JSON.stringify(result, null, 2), "utf-8");
|
|
722
|
+
} catch {
|
|
723
|
+
// fail-open: DB is authoritative, file is supplementary
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
const passRate = result.summary.pass_rate;
|
|
727
|
+
console.error(
|
|
728
|
+
` [auto-grade] ${skill.name}: ${result.summary.passed}/${result.summary.total} passed (${Math.round(passRate * 100)}%)`,
|
|
729
|
+
);
|
|
730
|
+
graded++;
|
|
731
|
+
} catch (err) {
|
|
732
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
733
|
+
console.error(
|
|
734
|
+
` [auto-grade] ${skill.name}: error — ${msg}. Retry with: selftune grade ${skill.name}`,
|
|
735
|
+
);
|
|
736
|
+
// fail-open: continue to next skill
|
|
737
|
+
}
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
return graded;
|
|
741
|
+
}
|
|
742
|
+
|
|
623
743
|
// ---------------------------------------------------------------------------
|
|
624
744
|
// Main orchestrator
|
|
625
745
|
// ---------------------------------------------------------------------------
|
|
@@ -665,6 +785,7 @@ export async function orchestrate(
|
|
|
665
785
|
deployed: 0,
|
|
666
786
|
watched: 0,
|
|
667
787
|
skipped: 0,
|
|
788
|
+
autoGraded: 0,
|
|
668
789
|
dryRun: options.dryRun,
|
|
669
790
|
approvalMode: options.approvalMode,
|
|
670
791
|
elapsedMs: 0,
|
|
@@ -732,7 +853,7 @@ export async function orchestrate(
|
|
|
732
853
|
const auditEntries = _readAuditEntries();
|
|
733
854
|
const doctorResult = await _doctor();
|
|
734
855
|
|
|
735
|
-
|
|
856
|
+
let statusResult = _computeStatus(
|
|
736
857
|
telemetry,
|
|
737
858
|
skillRecords,
|
|
738
859
|
queryRecords,
|
|
@@ -743,6 +864,61 @@ export async function orchestrate(
|
|
|
743
864
|
`[orchestrate] Status: ${statusResult.skills.length} skills, system=${statusResult.system.healthy ? "healthy" : "unhealthy"}`,
|
|
744
865
|
);
|
|
745
866
|
|
|
867
|
+
// -------------------------------------------------------------------------
|
|
868
|
+
// Step 2a: Auto-grade ungraded skills with sufficient data
|
|
869
|
+
// -------------------------------------------------------------------------
|
|
870
|
+
let autoGradedCount = 0;
|
|
871
|
+
const scopedSkills = options.skillFilter
|
|
872
|
+
? statusResult.skills.filter((s) => s.name === options.skillFilter)
|
|
873
|
+
: statusResult.skills;
|
|
874
|
+
const ungradedWithData = scopedSkills.filter(
|
|
875
|
+
(s) => s.status === "UNGRADED" && (s.snapshot?.skill_checks ?? 0) > 0,
|
|
876
|
+
);
|
|
877
|
+
|
|
878
|
+
if (!options.dryRun && options.maxAutoGrade > 0 && ungradedWithData.length > 0) {
|
|
879
|
+
const gradeAgent = _detectAgent();
|
|
880
|
+
if (gradeAgent) {
|
|
881
|
+
console.error(
|
|
882
|
+
`[orchestrate] Auto-grading ${Math.min(ungradedWithData.length, options.maxAutoGrade)} ungraded skill(s)...`,
|
|
883
|
+
);
|
|
884
|
+
autoGradedCount = await autoGradeTopUngraded(
|
|
885
|
+
scopedSkills,
|
|
886
|
+
options.maxAutoGrade,
|
|
887
|
+
gradeAgent,
|
|
888
|
+
{ readTelemetry: _readTelemetry, readSkillRecords: _readSkillRecords },
|
|
889
|
+
);
|
|
890
|
+
|
|
891
|
+
if (autoGradedCount > 0) {
|
|
892
|
+
// Recompute status so candidate selection sees updated grades
|
|
893
|
+
console.error(
|
|
894
|
+
`[orchestrate] Recomputing status after grading ${autoGradedCount} skill(s)...`,
|
|
895
|
+
);
|
|
896
|
+
try {
|
|
897
|
+
const freshTelemetry = _readTelemetry();
|
|
898
|
+
const freshSkillRecords = _readSkillRecords();
|
|
899
|
+
const freshQueryRecords = _readQueryRecords();
|
|
900
|
+
const freshAudit = _readAuditEntries();
|
|
901
|
+
const freshDoctor = doctorResult; // reuse — environment unchanged during grading
|
|
902
|
+
statusResult = _computeStatus(
|
|
903
|
+
freshTelemetry,
|
|
904
|
+
freshSkillRecords,
|
|
905
|
+
freshQueryRecords,
|
|
906
|
+
freshAudit,
|
|
907
|
+
freshDoctor,
|
|
908
|
+
);
|
|
909
|
+
} catch (recomputeErr) {
|
|
910
|
+
console.error(
|
|
911
|
+
`[orchestrate] Warning: failed to recompute status after grading — using pre-grade status. ${recomputeErr instanceof Error ? recomputeErr.message : String(recomputeErr)}`,
|
|
912
|
+
);
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
} else {
|
|
916
|
+
console.error(
|
|
917
|
+
"[orchestrate] No agent CLI found — skipping auto-grade. To disable, rerun with: selftune orchestrate --max-auto-grade 0",
|
|
918
|
+
);
|
|
919
|
+
}
|
|
920
|
+
}
|
|
921
|
+
|
|
746
922
|
// -------------------------------------------------------------------------
|
|
747
923
|
// Step 2b: Read pending improvement signals
|
|
748
924
|
// -------------------------------------------------------------------------
|
|
@@ -919,6 +1095,7 @@ export async function orchestrate(
|
|
|
919
1095
|
deployed: candidates.filter((c) => c.evolveResult?.deployed).length,
|
|
920
1096
|
watched: candidates.filter((c) => c.action === "watch").length,
|
|
921
1097
|
skipped: candidates.filter((c) => c.action === "skip").length,
|
|
1098
|
+
autoGraded: autoGradedCount,
|
|
922
1099
|
};
|
|
923
1100
|
|
|
924
1101
|
const result: OrchestrateResult = {
|
|
@@ -956,6 +1133,7 @@ export async function orchestrate(
|
|
|
956
1133
|
deployed: finalTotals.deployed,
|
|
957
1134
|
watched: finalTotals.watched,
|
|
958
1135
|
skipped: finalTotals.skipped,
|
|
1136
|
+
auto_graded: finalTotals.autoGraded,
|
|
959
1137
|
skill_actions: candidates.map(
|
|
960
1138
|
(c): OrchestrateRunSkillAction => ({
|
|
961
1139
|
skill: c.skill,
|
|
@@ -1023,6 +1201,7 @@ export async function cliMain(): Promise<void> {
|
|
|
1023
1201
|
"max-skills": { type: "string", default: "5" },
|
|
1024
1202
|
"recent-window": { type: "string", default: "48" },
|
|
1025
1203
|
"sync-force": { type: "boolean", default: false },
|
|
1204
|
+
"max-auto-grade": { type: "string", default: "5" },
|
|
1026
1205
|
loop: { type: "boolean", default: false },
|
|
1027
1206
|
"loop-interval": { type: "string", default: "3600" },
|
|
1028
1207
|
help: { type: "boolean", short: "h", default: false },
|
|
@@ -1033,7 +1212,7 @@ export async function cliMain(): Promise<void> {
|
|
|
1033
1212
|
if (values.help) {
|
|
1034
1213
|
console.log(`selftune orchestrate — Autonomous core loop
|
|
1035
1214
|
|
|
1036
|
-
Runs the full improvement cycle: sync → status → evolve → watch.
|
|
1215
|
+
Runs the full improvement cycle: sync → status → auto-grade → evolve → watch.
|
|
1037
1216
|
|
|
1038
1217
|
Usage:
|
|
1039
1218
|
selftune orchestrate [options]
|
|
@@ -1046,6 +1225,7 @@ Options:
|
|
|
1046
1225
|
--max-skills <n> Cap skills processed per run (default: 5)
|
|
1047
1226
|
--recent-window <hrs> Hours to look back for watch targets (default: 48)
|
|
1048
1227
|
--sync-force Force full rescan during sync
|
|
1228
|
+
--max-auto-grade <n> Max ungraded skills to auto-grade per run (default: 5, 0 to disable)
|
|
1049
1229
|
--loop Run in continuous loop mode (never stops)
|
|
1050
1230
|
--loop-interval <s> Seconds between iterations (default: 3600, min: 60)
|
|
1051
1231
|
-h, --help Show this help message
|
|
@@ -1067,23 +1247,41 @@ Examples:
|
|
|
1067
1247
|
process.exit(0);
|
|
1068
1248
|
}
|
|
1069
1249
|
|
|
1070
|
-
const
|
|
1071
|
-
if (
|
|
1072
|
-
console.error(
|
|
1250
|
+
const maxSkillsRaw = values["max-skills"] ?? "5";
|
|
1251
|
+
if (!/^\d+$/.test(maxSkillsRaw) || Number(maxSkillsRaw) < 1) {
|
|
1252
|
+
console.error(
|
|
1253
|
+
"[ERROR] --max-skills must be a positive integer. Retry with: selftune orchestrate --max-skills 5",
|
|
1254
|
+
);
|
|
1255
|
+
process.exit(1);
|
|
1256
|
+
}
|
|
1257
|
+
const maxSkills = Number(maxSkillsRaw);
|
|
1258
|
+
|
|
1259
|
+
const recentWindowRaw = values["recent-window"] ?? "48";
|
|
1260
|
+
if (!/^\d+$/.test(recentWindowRaw) || Number(recentWindowRaw) < 1) {
|
|
1261
|
+
console.error(
|
|
1262
|
+
"[ERROR] --recent-window must be a positive integer. Retry with: selftune orchestrate --recent-window 48",
|
|
1263
|
+
);
|
|
1073
1264
|
process.exit(1);
|
|
1074
1265
|
}
|
|
1266
|
+
const recentWindow = Number(recentWindowRaw);
|
|
1075
1267
|
|
|
1076
|
-
const
|
|
1077
|
-
if (
|
|
1078
|
-
console.error(
|
|
1268
|
+
const maxAutoGradeRaw = values["max-auto-grade"] ?? "5";
|
|
1269
|
+
if (!/^\d+$/.test(maxAutoGradeRaw)) {
|
|
1270
|
+
console.error(
|
|
1271
|
+
"[ERROR] --max-auto-grade must be a non-negative integer. Retry with: selftune orchestrate --max-auto-grade 5",
|
|
1272
|
+
);
|
|
1079
1273
|
process.exit(1);
|
|
1080
1274
|
}
|
|
1275
|
+
const maxAutoGrade = Number(maxAutoGradeRaw);
|
|
1081
1276
|
|
|
1082
|
-
const
|
|
1083
|
-
if (values.loop &&
|
|
1084
|
-
console.error(
|
|
1277
|
+
const loopIntervalRaw = values["loop-interval"] ?? "3600";
|
|
1278
|
+
if (!/^\d+$/.test(loopIntervalRaw) || (values.loop && Number(loopIntervalRaw) < 60)) {
|
|
1279
|
+
console.error(
|
|
1280
|
+
"[ERROR] --loop-interval must be an integer >= 60 (seconds). Retry with: selftune orchestrate --loop --loop-interval 3600",
|
|
1281
|
+
);
|
|
1085
1282
|
process.exit(1);
|
|
1086
1283
|
}
|
|
1284
|
+
const loopInterval = Number(loopIntervalRaw);
|
|
1087
1285
|
|
|
1088
1286
|
const autoApprove = values["auto-approve"] ?? false;
|
|
1089
1287
|
if (autoApprove) {
|
|
@@ -1132,6 +1330,7 @@ Examples:
|
|
|
1132
1330
|
maxSkills,
|
|
1133
1331
|
recentWindowHours: recentWindow,
|
|
1134
1332
|
syncForce: values["sync-force"] ?? false,
|
|
1333
|
+
maxAutoGrade,
|
|
1135
1334
|
});
|
|
1136
1335
|
|
|
1137
1336
|
// JSON output: include per-skill decisions for machine consumption
|