selftune 0.2.13 → 0.2.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +2 -0
  2. package/apps/local-dashboard/dist/assets/index-DIrdlu2_.js +16 -0
  3. package/apps/local-dashboard/dist/index.html +2 -2
  4. package/cli/selftune/activation-rules.ts +24 -48
  5. package/cli/selftune/constants.ts +7 -0
  6. package/cli/selftune/contribute/bundle.ts +9 -44
  7. package/cli/selftune/dashboard-contract.ts +12 -0
  8. package/cli/selftune/eval/hooks-to-evals.ts +5 -22
  9. package/cli/selftune/grading/auto-grade.ts +3 -13
  10. package/cli/selftune/grading/grade-session.ts +3 -13
  11. package/cli/selftune/hooks/evolution-guard.ts +14 -24
  12. package/cli/selftune/hooks/prompt-log.ts +0 -8
  13. package/cli/selftune/hooks/session-stop.ts +0 -8
  14. package/cli/selftune/ingestors/codex-rollout.ts +9 -4
  15. package/cli/selftune/ingestors/codex-wrapper.ts +15 -13
  16. package/cli/selftune/ingestors/openclaw-ingest.ts +24 -5
  17. package/cli/selftune/ingestors/opencode-ingest.ts +9 -4
  18. package/cli/selftune/localdb/queries.ts +57 -0
  19. package/cli/selftune/monitoring/watch.ts +7 -22
  20. package/cli/selftune/normalization.ts +2 -23
  21. package/cli/selftune/orchestrate.ts +213 -14
  22. package/cli/selftune/schedule.ts +51 -5
  23. package/cli/selftune/utils/jsonl.ts +2 -0
  24. package/package.json +3 -1
  25. package/packages/ui/src/components/RecentActivityFeed.tsx +86 -0
  26. package/packages/ui/src/components/index.ts +1 -0
  27. package/packages/ui/src/components/section-cards.tsx +13 -0
  28. package/skill/SKILL.md +1 -1
  29. package/skill/Workflows/Orchestrate.md +11 -7
  30. package/skill/Workflows/Schedule.md +11 -0
  31. package/skill/references/logs.md +22 -21
  32. package/apps/local-dashboard/dist/assets/index-4_dAY17K.js +0 -16
  33. package/apps/local-dashboard/dist/assets/index-BxV5WZHc.css +0 -2
@@ -34,6 +34,11 @@ import {
34
34
  SKILL_LOG,
35
35
  TELEMETRY_LOG,
36
36
  } from "../constants.js";
37
+ import {
38
+ writeQueryToDb,
39
+ writeSessionTelemetryToDb,
40
+ writeSkillUsageToDb,
41
+ } from "../localdb/direct-write.js";
37
42
  import {
38
43
  appendCanonicalRecords,
39
44
  buildCanonicalExecutionFact,
@@ -46,7 +51,7 @@ import {
46
51
  deriveSkillInvocationId,
47
52
  } from "../normalization.js";
48
53
  import type { CanonicalRecord, QueryLogRecord, SkillUsageRecord } from "../types.js";
49
- import { appendJsonl, loadMarker, saveMarker } from "../utils/jsonl.js";
54
+ import { loadMarker, saveMarker } from "../utils/jsonl.js";
50
55
 
51
56
  export interface SessionFile {
52
57
  agentId: string;
@@ -389,11 +394,25 @@ export function writeSession(
389
394
  query: prompt,
390
395
  source: session.source,
391
396
  };
392
- appendJsonl(queryLogPath, queryRecord, "all_queries");
397
+ writeQueryToDb(queryRecord);
393
398
  }
394
399
 
395
- const { query: _q, ...telemetry } = session;
396
- appendJsonl(telemetryLogPath, telemetry, "session_telemetry");
400
+ // Build a SessionTelemetryRecord-shaped object for SQLite
401
+ writeSessionTelemetryToDb({
402
+ timestamp: session.timestamp,
403
+ session_id: session.session_id,
404
+ cwd: session.cwd,
405
+ transcript_path: session.transcript_path,
406
+ tool_calls: session.tool_calls,
407
+ total_tool_calls: session.total_tool_calls,
408
+ bash_commands: session.bash_commands,
409
+ skills_triggered: session.skills_triggered,
410
+ assistant_turns: session.assistant_turns,
411
+ errors_encountered: session.errors_encountered,
412
+ transcript_chars: session.transcript_chars,
413
+ last_user_query: session.last_user_query,
414
+ source: session.source,
415
+ });
397
416
 
398
417
  for (const skillName of skills) {
399
418
  const skillRecord: SkillUsageRecord = {
@@ -405,7 +424,7 @@ export function writeSession(
405
424
  triggered: true,
406
425
  source: session.source,
407
426
  };
408
- appendJsonl(skillLogPath, skillRecord, "skill_usage");
427
+ writeSkillUsageToDb(skillRecord);
409
428
  }
410
429
 
411
430
  // --- Canonical normalization records (additive) ---
@@ -27,6 +27,11 @@ import { basename, join } from "node:path";
27
27
  import { parseArgs } from "node:util";
28
28
 
29
29
  import { CANONICAL_LOG, QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "../constants.js";
30
+ import {
31
+ writeQueryToDb,
32
+ writeSessionTelemetryToDb,
33
+ writeSkillUsageToDb,
34
+ } from "../localdb/direct-write.js";
30
35
  import {
31
36
  appendCanonicalRecords,
32
37
  buildCanonicalExecutionFact,
@@ -44,7 +49,7 @@ import type {
44
49
  SessionTelemetryRecord,
45
50
  SkillUsageRecord,
46
51
  } from "../types.js";
47
- import { appendJsonl, loadMarker, saveMarker } from "../utils/jsonl.js";
52
+ import { loadMarker, saveMarker } from "../utils/jsonl.js";
48
53
 
49
54
  const XDG_DATA_HOME = process.env.XDG_DATA_HOME ?? join(homedir(), ".local", "share");
50
55
  const DEFAULT_DATA_DIR = join(XDG_DATA_HOME, "opencode");
@@ -528,7 +533,7 @@ export function writeSession(
528
533
  query: prompt,
529
534
  source: session.source,
530
535
  };
531
- appendJsonl(queryLogPath, queryRecord, "all_queries");
536
+ writeQueryToDb(queryRecord);
532
537
  }
533
538
 
534
539
  const telemetry: SessionTelemetryRecord = {
@@ -546,7 +551,7 @@ export function writeSession(
546
551
  last_user_query: session.last_user_query,
547
552
  source: session.source,
548
553
  };
549
- appendJsonl(telemetryLogPath, telemetry, "session_telemetry");
554
+ writeSessionTelemetryToDb(telemetry);
550
555
 
551
556
  for (const skillName of skills) {
552
557
  const skillRecord: SkillUsageRecord = {
@@ -558,7 +563,7 @@ export function writeSession(
558
563
  triggered: true,
559
564
  source: session.source,
560
565
  };
561
- appendJsonl(skillLogPath, skillRecord, "skill_usage");
566
+ writeSkillUsageToDb(skillRecord);
562
567
  }
563
568
 
564
569
  // --- Canonical normalization records (additive) ---
@@ -11,6 +11,7 @@ import type {
11
11
  OrchestrateRunReport,
12
12
  OverviewPayload,
13
13
  PendingProposal,
14
+ RecentActivityItem,
14
15
  SkillReportPayload,
15
16
  SkillSummary,
16
17
  } from "../dashboard-contract.js";
@@ -126,6 +127,10 @@ export function getOverviewPayload(db: Database): OverviewPayload {
126
127
  // Pending proposals: created/validated but no terminal action (deduped in SQL)
127
128
  const pending_proposals = getPendingProposals(db);
128
129
 
130
+ // Active sessions and recent activity
131
+ const active_sessions = getActiveSessionCount(db);
132
+ const recent_activity = getRecentActivity(db);
133
+
129
134
  return {
130
135
  telemetry,
131
136
  skills,
@@ -133,6 +138,8 @@ export function getOverviewPayload(db: Database): OverviewPayload {
133
138
  counts,
134
139
  unmatched_queries: unmatchedRows,
135
140
  pending_proposals,
141
+ active_sessions,
142
+ recent_activity,
136
143
  };
137
144
  }
138
145
 
@@ -361,6 +368,56 @@ export function getOrchestrateRuns(db: Database, limit = 20): OrchestrateRunRepo
361
368
  }));
362
369
  }
363
370
 
371
+ /**
372
+ * Count sessions that have queries recorded but no session_telemetry yet
373
+ * (i.e., the session is still in progress).
374
+ */
375
+ export function getActiveSessionCount(db: Database): number {
376
+ const row = db
377
+ .query(
378
+ `SELECT COUNT(DISTINCT q.session_id) as count
379
+ FROM queries q
380
+ WHERE NOT EXISTS (
381
+ SELECT 1 FROM session_telemetry st WHERE st.session_id = q.session_id
382
+ )`,
383
+ )
384
+ .get() as { count: number };
385
+ return row.count;
386
+ }
387
+
388
+ /**
389
+ * Get the most recent skill invocations with a flag indicating whether the
390
+ * session is still in progress (no session_telemetry row yet).
391
+ */
392
+ export function getRecentActivity(db: Database, limit = 20): RecentActivityItem[] {
393
+ const rows = db
394
+ .query(
395
+ `SELECT si.occurred_at, si.session_id, si.skill_name, si.query, si.triggered,
396
+ CASE WHEN st.session_id IS NULL THEN 1 ELSE 0 END as is_live
397
+ FROM skill_invocations si
398
+ LEFT JOIN session_telemetry st ON si.session_id = st.session_id
399
+ ORDER BY si.occurred_at DESC
400
+ LIMIT ?`,
401
+ )
402
+ .all(limit) as Array<{
403
+ occurred_at: string;
404
+ session_id: string;
405
+ skill_name: string;
406
+ query: string;
407
+ triggered: number;
408
+ is_live: number;
409
+ }>;
410
+
411
+ return rows.map((row) => ({
412
+ timestamp: row.occurred_at,
413
+ session_id: row.session_id,
414
+ skill_name: row.skill_name,
415
+ query: row.query ?? "",
416
+ triggered: row.triggered === 1,
417
+ is_live: row.is_live === 1,
418
+ }));
419
+ }
420
+
364
421
  // -- Generic read queries (Phase 3: replace readJsonl calls) ------------------
365
422
 
366
423
  /**
@@ -26,7 +26,6 @@ import type {
26
26
  SessionTelemetryRecord,
27
27
  SkillUsageRecord,
28
28
  } from "../types.js";
29
- import { readJsonl } from "../utils/jsonl.js";
30
29
  import {
31
30
  filterActionableQueryRecords,
32
31
  filterActionableSkillUsageRecords,
@@ -212,27 +211,13 @@ export async function watch(options: WatchOptions): Promise<WatchResult> {
212
211
  );
213
212
  }
214
213
 
215
- // 1. Read log files from SQLite (fall back to JSONL for custom paths)
216
- let telemetry: SessionTelemetryRecord[];
217
- let skillRecords: SkillUsageRecord[];
218
- let queryRecords: QueryLogRecord[];
219
- if (
220
- _telemetryLogPath === TELEMETRY_LOG &&
221
- _skillLogPath === SKILL_LOG &&
222
- _queryLogPath === QUERY_LOG
223
- ) {
224
- const db = getDb();
225
- telemetry = querySessionTelemetry(db) as SessionTelemetryRecord[];
226
- // SQLite queries return DESC order; computeMonitoringSnapshot expects chronological (ASC)
227
- telemetry.sort((a, b) => a.timestamp.localeCompare(b.timestamp));
228
- skillRecords = querySkillUsageRecords(db) as SkillUsageRecord[];
229
- queryRecords = queryQueryLog(db) as QueryLogRecord[];
230
- } else {
231
- // Intentional JSONL fallback: custom log path overrides bypass SQLite reads
232
- telemetry = readJsonl<SessionTelemetryRecord>(_telemetryLogPath);
233
- skillRecords = readJsonl<SkillUsageRecord>(_skillLogPath);
234
- queryRecords = readJsonl<QueryLogRecord>(_queryLogPath);
235
- }
214
+ // 1. Read log files from SQLite
215
+ const db = getDb();
216
+ const telemetry = querySessionTelemetry(db) as SessionTelemetryRecord[];
217
+ // SQLite queries return DESC order; computeMonitoringSnapshot expects chronological (ASC)
218
+ telemetry.sort((a, b) => a.timestamp.localeCompare(b.timestamp));
219
+ const skillRecords = querySkillUsageRecords(db) as SkillUsageRecord[];
220
+ const queryRecords = queryQueryLog(db) as QueryLogRecord[];
236
221
 
237
222
  // 2. Determine baseline pass rate from last deployed audit entry
238
223
  const lastDeployed = getLastDeployedProposal(skillName, _auditLogPath);
@@ -14,7 +14,6 @@
14
14
 
15
15
  import { createHash } from "node:crypto";
16
16
  import {
17
- appendFileSync,
18
17
  existsSync,
19
18
  mkdirSync,
20
19
  readFileSync,
@@ -388,32 +387,12 @@ export function getLatestPromptIdentity(
388
387
  };
389
388
  }
390
389
 
391
- export function appendCanonicalRecord(record: CanonicalRecord, logPath?: string): void {
390
+ export function appendCanonicalRecord(record: CanonicalRecord, _logPath?: string): void {
392
391
  writeCanonicalToDb(record);
393
- // JSONL append — best-effort backup for prompt state recovery
394
- try {
395
- const path = logPath ?? CANONICAL_LOG;
396
- const dir = dirname(path);
397
- if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
398
- appendFileSync(path, `${JSON.stringify(record)}\n`, "utf-8");
399
- } catch {
400
- /* best-effort only */
401
- }
402
392
  }
403
393
 
404
- export function appendCanonicalRecords(records: CanonicalRecord[], logPath?: string): void {
394
+ export function appendCanonicalRecords(records: CanonicalRecord[], _logPath?: string): void {
405
395
  writeCanonicalBatchToDb(records);
406
- // JSONL append — best-effort backup for prompt state recovery
407
- try {
408
- const path = logPath ?? CANONICAL_LOG;
409
- const dir = dirname(path);
410
- if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
411
- for (const record of records) {
412
- appendFileSync(path, `${JSON.stringify(record)}\n`, "utf-8");
413
- }
414
- } catch {
415
- /* best-effort only */
416
- }
417
396
  }
418
397
 
419
398
  // ---------------------------------------------------------------------------
@@ -9,9 +9,9 @@
9
9
  * explicit dry-run and review-required modes for human-in-the-loop operation.
10
10
  */
11
11
 
12
- import { existsSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
12
+ import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
13
13
  import { homedir } from "node:os";
14
- import { join } from "node:path";
14
+ import { dirname, join } from "node:path";
15
15
  import { parseArgs } from "node:util";
16
16
 
17
17
  import { readAlphaIdentity } from "./alpha-identity.js";
@@ -19,9 +19,19 @@ import type { UploadCycleSummary } from "./alpha-upload/index.js";
19
19
  import { ORCHESTRATE_LOCK, SELFTUNE_CONFIG_PATH } from "./constants.js";
20
20
  import type { OrchestrateRunReport, OrchestrateRunSkillAction } from "./dashboard-contract.js";
21
21
  import type { EvolveResult } from "./evolution/evolve.js";
22
+ import {
23
+ buildDefaultGradingOutputPath,
24
+ deriveExpectationsFromSkill,
25
+ gradeSession,
26
+ resolveLatestSessionForSkill,
27
+ } from "./grading/grade-session.js";
22
28
  import { readGradingResultsForSkill } from "./grading/results.js";
23
29
  import { getDb } from "./localdb/db.js";
24
- import { updateSignalConsumed, writeOrchestrateRunToDb } from "./localdb/direct-write.js";
30
+ import {
31
+ updateSignalConsumed,
32
+ writeGradingResultToDb,
33
+ writeOrchestrateRunToDb,
34
+ } from "./localdb/direct-write.js";
25
35
  import {
26
36
  queryEvolutionAudit,
27
37
  queryImprovementSignals,
@@ -50,6 +60,7 @@ import {
50
60
  findRepositoryClaudeSkillDirs,
51
61
  findRepositorySkillDirs,
52
62
  } from "./utils/skill-discovery.js";
63
+ import { readExcerpt } from "./utils/transcript.js";
53
64
 
54
65
  // ---------------------------------------------------------------------------
55
66
  // Lockfile management
@@ -156,6 +167,8 @@ export interface OrchestrateOptions {
156
167
  recentWindowHours: number;
157
168
  /** Force sync to rescan all sources. */
158
169
  syncForce: boolean;
170
+ /** Max ungraded skills to auto-grade per run (default: 5). Set 0 to disable. */
171
+ maxAutoGrade: number;
159
172
  }
160
173
 
161
174
  export interface SkillAction {
@@ -178,6 +191,7 @@ export interface OrchestrateResult {
178
191
  deployed: number;
179
192
  watched: number;
180
193
  skipped: number;
194
+ autoGraded: number;
181
195
  dryRun: boolean;
182
196
  approvalMode: "auto" | "review";
183
197
  elapsedMs: number;
@@ -335,6 +349,7 @@ export function formatOrchestrateReport(result: OrchestrateResult): string {
335
349
 
336
350
  // Final summary
337
351
  lines.push("Summary");
352
+ lines.push(` Auto-graded: ${result.summary.autoGraded}`);
338
353
  lines.push(` Evaluated: ${result.summary.evaluated} skills`);
339
354
  lines.push(` Deployed: ${result.summary.deployed}`);
340
355
  lines.push(` Watched: ${result.summary.watched}`);
@@ -620,6 +635,111 @@ function findRecentlyDeployedSkills(
620
635
  return names;
621
636
  }
622
637
 
638
+ // ---------------------------------------------------------------------------
639
+ // Auto-grade ungraded skills
640
+ // ---------------------------------------------------------------------------
641
+
642
+ /**
643
+ * Auto-grade the top ungraded skills that have some session data.
644
+ * Fail-open: individual grading errors are logged but never propagated.
645
+ *
646
+ * @returns Number of skills successfully graded.
647
+ */
648
+ export async function autoGradeTopUngraded(
649
+ skills: SkillStatus[],
650
+ maxAutoGrade: number,
651
+ agent: string,
652
+ deps: {
653
+ readTelemetry: () => SessionTelemetryRecord[];
654
+ readSkillRecords: () => SkillUsageRecord[];
655
+ },
656
+ ): Promise<number> {
657
+ // Filter: UNGRADED skills with some data (skill_checks > 0)
658
+ const ungradedWithData = skills
659
+ .filter((s) => s.status === "UNGRADED" && (s.snapshot?.skill_checks ?? 0) > 0)
660
+ .sort((a, b) => (b.snapshot?.skill_checks ?? 0) - (a.snapshot?.skill_checks ?? 0))
661
+ .slice(0, maxAutoGrade);
662
+
663
+ if (ungradedWithData.length === 0) return 0;
664
+
665
+ let graded = 0;
666
+
667
+ for (const skill of ungradedWithData) {
668
+ try {
669
+ const telemetry = deps.readTelemetry();
670
+ const skillUsage = deps.readSkillRecords();
671
+
672
+ // Resolve the latest session for this skill
673
+ const resolved = resolveLatestSessionForSkill(telemetry, skillUsage, skill.name);
674
+ if (!resolved) {
675
+ console.error(` [auto-grade] ${skill.name}: no session found, skipping`);
676
+ continue;
677
+ }
678
+
679
+ // Derive expectations from SKILL.md
680
+ const derived = deriveExpectationsFromSkill(skill.name);
681
+ let transcriptExcerpt = "(no transcript)";
682
+ if (resolved.transcriptPath) {
683
+ try {
684
+ transcriptExcerpt = readExcerpt(resolved.transcriptPath);
685
+ } catch {
686
+ transcriptExcerpt = "(no transcript)";
687
+ }
688
+ }
689
+
690
+ console.error(` [auto-grade] Grading "${skill.name}" (session ${resolved.sessionId})...`);
691
+
692
+ const result = await gradeSession({
693
+ expectations: derived.expectations,
694
+ telemetry: resolved.telemetry,
695
+ sessionId: resolved.sessionId,
696
+ skillName: skill.name,
697
+ transcriptExcerpt,
698
+ transcriptPath: resolved.transcriptPath,
699
+ agent,
700
+ });
701
+
702
+ // Persist to SQLite — only count as graded if DB write succeeds
703
+ let persisted = false;
704
+ try {
705
+ persisted = writeGradingResultToDb(result);
706
+ } catch {
707
+ persisted = false;
708
+ }
709
+ if (!persisted) {
710
+ console.error(` [auto-grade] ${skill.name}: graded but failed to persist result`);
711
+ continue;
712
+ }
713
+
714
+ // Persist to file (fail-open, supplementary)
715
+ try {
716
+ const basePath = buildDefaultGradingOutputPath(resolved.sessionId);
717
+ const safeName = skill.name.replace(/[^a-zA-Z0-9_-]/g, "_");
718
+ const outputPath = basePath.replace(/\.json$/, `_${safeName}.json`);
719
+ const outputDir = dirname(outputPath);
720
+ mkdirSync(outputDir, { recursive: true });
721
+ writeFileSync(outputPath, JSON.stringify(result, null, 2), "utf-8");
722
+ } catch {
723
+ // fail-open: DB is authoritative, file is supplementary
724
+ }
725
+
726
+ const passRate = result.summary.pass_rate;
727
+ console.error(
728
+ ` [auto-grade] ${skill.name}: ${result.summary.passed}/${result.summary.total} passed (${Math.round(passRate * 100)}%)`,
729
+ );
730
+ graded++;
731
+ } catch (err) {
732
+ const msg = err instanceof Error ? err.message : String(err);
733
+ console.error(
734
+ ` [auto-grade] ${skill.name}: error — ${msg}. Retry with: selftune grade ${skill.name}`,
735
+ );
736
+ // fail-open: continue to next skill
737
+ }
738
+ }
739
+
740
+ return graded;
741
+ }
742
+
623
743
  // ---------------------------------------------------------------------------
624
744
  // Main orchestrator
625
745
  // ---------------------------------------------------------------------------
@@ -665,6 +785,7 @@ export async function orchestrate(
665
785
  deployed: 0,
666
786
  watched: 0,
667
787
  skipped: 0,
788
+ autoGraded: 0,
668
789
  dryRun: options.dryRun,
669
790
  approvalMode: options.approvalMode,
670
791
  elapsedMs: 0,
@@ -732,7 +853,7 @@ export async function orchestrate(
732
853
  const auditEntries = _readAuditEntries();
733
854
  const doctorResult = await _doctor();
734
855
 
735
- const statusResult = _computeStatus(
856
+ let statusResult = _computeStatus(
736
857
  telemetry,
737
858
  skillRecords,
738
859
  queryRecords,
@@ -743,6 +864,61 @@ export async function orchestrate(
743
864
  `[orchestrate] Status: ${statusResult.skills.length} skills, system=${statusResult.system.healthy ? "healthy" : "unhealthy"}`,
744
865
  );
745
866
 
867
+ // -------------------------------------------------------------------------
868
+ // Step 2a: Auto-grade ungraded skills with sufficient data
869
+ // -------------------------------------------------------------------------
870
+ let autoGradedCount = 0;
871
+ const scopedSkills = options.skillFilter
872
+ ? statusResult.skills.filter((s) => s.name === options.skillFilter)
873
+ : statusResult.skills;
874
+ const ungradedWithData = scopedSkills.filter(
875
+ (s) => s.status === "UNGRADED" && (s.snapshot?.skill_checks ?? 0) > 0,
876
+ );
877
+
878
+ if (!options.dryRun && options.maxAutoGrade > 0 && ungradedWithData.length > 0) {
879
+ const gradeAgent = _detectAgent();
880
+ if (gradeAgent) {
881
+ console.error(
882
+ `[orchestrate] Auto-grading ${Math.min(ungradedWithData.length, options.maxAutoGrade)} ungraded skill(s)...`,
883
+ );
884
+ autoGradedCount = await autoGradeTopUngraded(
885
+ scopedSkills,
886
+ options.maxAutoGrade,
887
+ gradeAgent,
888
+ { readTelemetry: _readTelemetry, readSkillRecords: _readSkillRecords },
889
+ );
890
+
891
+ if (autoGradedCount > 0) {
892
+ // Recompute status so candidate selection sees updated grades
893
+ console.error(
894
+ `[orchestrate] Recomputing status after grading ${autoGradedCount} skill(s)...`,
895
+ );
896
+ try {
897
+ const freshTelemetry = _readTelemetry();
898
+ const freshSkillRecords = _readSkillRecords();
899
+ const freshQueryRecords = _readQueryRecords();
900
+ const freshAudit = _readAuditEntries();
901
+ const freshDoctor = doctorResult; // reuse — environment unchanged during grading
902
+ statusResult = _computeStatus(
903
+ freshTelemetry,
904
+ freshSkillRecords,
905
+ freshQueryRecords,
906
+ freshAudit,
907
+ freshDoctor,
908
+ );
909
+ } catch (recomputeErr) {
910
+ console.error(
911
+ `[orchestrate] Warning: failed to recompute status after grading — using pre-grade status. ${recomputeErr instanceof Error ? recomputeErr.message : String(recomputeErr)}`,
912
+ );
913
+ }
914
+ }
915
+ } else {
916
+ console.error(
917
+ "[orchestrate] No agent CLI found — skipping auto-grade. To disable, rerun with: selftune orchestrate --max-auto-grade 0",
918
+ );
919
+ }
920
+ }
921
+
746
922
  // -------------------------------------------------------------------------
747
923
  // Step 2b: Read pending improvement signals
748
924
  // -------------------------------------------------------------------------
@@ -919,6 +1095,7 @@ export async function orchestrate(
919
1095
  deployed: candidates.filter((c) => c.evolveResult?.deployed).length,
920
1096
  watched: candidates.filter((c) => c.action === "watch").length,
921
1097
  skipped: candidates.filter((c) => c.action === "skip").length,
1098
+ autoGraded: autoGradedCount,
922
1099
  };
923
1100
 
924
1101
  const result: OrchestrateResult = {
@@ -956,6 +1133,7 @@ export async function orchestrate(
956
1133
  deployed: finalTotals.deployed,
957
1134
  watched: finalTotals.watched,
958
1135
  skipped: finalTotals.skipped,
1136
+ auto_graded: finalTotals.autoGraded,
959
1137
  skill_actions: candidates.map(
960
1138
  (c): OrchestrateRunSkillAction => ({
961
1139
  skill: c.skill,
@@ -1023,6 +1201,7 @@ export async function cliMain(): Promise<void> {
1023
1201
  "max-skills": { type: "string", default: "5" },
1024
1202
  "recent-window": { type: "string", default: "48" },
1025
1203
  "sync-force": { type: "boolean", default: false },
1204
+ "max-auto-grade": { type: "string", default: "5" },
1026
1205
  loop: { type: "boolean", default: false },
1027
1206
  "loop-interval": { type: "string", default: "3600" },
1028
1207
  help: { type: "boolean", short: "h", default: false },
@@ -1033,7 +1212,7 @@ export async function cliMain(): Promise<void> {
1033
1212
  if (values.help) {
1034
1213
  console.log(`selftune orchestrate — Autonomous core loop
1035
1214
 
1036
- Runs the full improvement cycle: sync → status → evolve → watch.
1215
+ Runs the full improvement cycle: sync → status → auto-grade → evolve → watch.
1037
1216
 
1038
1217
  Usage:
1039
1218
  selftune orchestrate [options]
@@ -1046,6 +1225,7 @@ Options:
1046
1225
  --max-skills <n> Cap skills processed per run (default: 5)
1047
1226
  --recent-window <hrs> Hours to look back for watch targets (default: 48)
1048
1227
  --sync-force Force full rescan during sync
1228
+ --max-auto-grade <n> Max ungraded skills to auto-grade per run (default: 5, 0 to disable)
1049
1229
  --loop Run in continuous loop mode (never stops)
1050
1230
  --loop-interval <s> Seconds between iterations (default: 3600, min: 60)
1051
1231
  -h, --help Show this help message
@@ -1067,23 +1247,41 @@ Examples:
1067
1247
  process.exit(0);
1068
1248
  }
1069
1249
 
1070
- const maxSkills = Number.parseInt(values["max-skills"] ?? "5", 10);
1071
- if (Number.isNaN(maxSkills) || maxSkills < 1) {
1072
- console.error("[ERROR] --max-skills must be a positive integer");
1250
+ const maxSkillsRaw = values["max-skills"] ?? "5";
1251
+ if (!/^\d+$/.test(maxSkillsRaw) || Number(maxSkillsRaw) < 1) {
1252
+ console.error(
1253
+ "[ERROR] --max-skills must be a positive integer. Retry with: selftune orchestrate --max-skills 5",
1254
+ );
1255
+ process.exit(1);
1256
+ }
1257
+ const maxSkills = Number(maxSkillsRaw);
1258
+
1259
+ const recentWindowRaw = values["recent-window"] ?? "48";
1260
+ if (!/^\d+$/.test(recentWindowRaw) || Number(recentWindowRaw) < 1) {
1261
+ console.error(
1262
+ "[ERROR] --recent-window must be a positive integer. Retry with: selftune orchestrate --recent-window 48",
1263
+ );
1073
1264
  process.exit(1);
1074
1265
  }
1266
+ const recentWindow = Number(recentWindowRaw);
1075
1267
 
1076
- const recentWindow = Number.parseInt(values["recent-window"] ?? "48", 10);
1077
- if (Number.isNaN(recentWindow) || recentWindow < 1) {
1078
- console.error("[ERROR] --recent-window must be a positive integer");
1268
+ const maxAutoGradeRaw = values["max-auto-grade"] ?? "5";
1269
+ if (!/^\d+$/.test(maxAutoGradeRaw)) {
1270
+ console.error(
1271
+ "[ERROR] --max-auto-grade must be a non-negative integer. Retry with: selftune orchestrate --max-auto-grade 5",
1272
+ );
1079
1273
  process.exit(1);
1080
1274
  }
1275
+ const maxAutoGrade = Number(maxAutoGradeRaw);
1081
1276
 
1082
- const loopInterval = Number.parseInt(values["loop-interval"] ?? "3600", 10);
1083
- if (values.loop && (Number.isNaN(loopInterval) || loopInterval < 60)) {
1084
- console.error("[ERROR] --loop-interval must be an integer >= 60 (seconds)");
1277
+ const loopIntervalRaw = values["loop-interval"] ?? "3600";
1278
+ if (!/^\d+$/.test(loopIntervalRaw) || (values.loop && Number(loopIntervalRaw) < 60)) {
1279
+ console.error(
1280
+ "[ERROR] --loop-interval must be an integer >= 60 (seconds). Retry with: selftune orchestrate --loop --loop-interval 3600",
1281
+ );
1085
1282
  process.exit(1);
1086
1283
  }
1284
+ const loopInterval = Number(loopIntervalRaw);
1087
1285
 
1088
1286
  const autoApprove = values["auto-approve"] ?? false;
1089
1287
  if (autoApprove) {
@@ -1132,6 +1330,7 @@ Examples:
1132
1330
  maxSkills,
1133
1331
  recentWindowHours: recentWindow,
1134
1332
  syncForce: values["sync-force"] ?? false,
1333
+ maxAutoGrade,
1135
1334
  });
1136
1335
 
1137
1336
  // JSON output: include per-skill decisions for machine consumption