npm - selftune - Versions diffs - 0.2.13 → 0.2.14 - Mend

selftune 0.2.13 → 0.2.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +2 -0
package/apps/local-dashboard/dist/assets/index-DIrdlu2_.js +16 -0
package/apps/local-dashboard/dist/index.html +2 -2
package/cli/selftune/activation-rules.ts +24 -48
package/cli/selftune/constants.ts +7 -0
package/cli/selftune/contribute/bundle.ts +9 -44
package/cli/selftune/dashboard-contract.ts +12 -0
package/cli/selftune/eval/hooks-to-evals.ts +5 -22
package/cli/selftune/grading/auto-grade.ts +3 -13
package/cli/selftune/grading/grade-session.ts +3 -13
package/cli/selftune/hooks/evolution-guard.ts +14 -24
package/cli/selftune/hooks/prompt-log.ts +0 -8
package/cli/selftune/hooks/session-stop.ts +0 -8
package/cli/selftune/ingestors/codex-rollout.ts +9 -4
package/cli/selftune/ingestors/codex-wrapper.ts +15 -13
package/cli/selftune/ingestors/openclaw-ingest.ts +24 -5
package/cli/selftune/ingestors/opencode-ingest.ts +9 -4
package/cli/selftune/localdb/queries.ts +57 -0
package/cli/selftune/monitoring/watch.ts +7 -22
package/cli/selftune/normalization.ts +2 -23
package/cli/selftune/orchestrate.ts +213 -14
package/cli/selftune/schedule.ts +51 -5
package/cli/selftune/utils/jsonl.ts +2 -0
package/package.json +3 -1
package/packages/ui/src/components/RecentActivityFeed.tsx +86 -0
package/packages/ui/src/components/index.ts +1 -0
package/packages/ui/src/components/section-cards.tsx +13 -0
package/skill/SKILL.md +1 -1
package/skill/Workflows/Orchestrate.md +11 -7
package/skill/Workflows/Schedule.md +11 -0
package/skill/references/logs.md +22 -21
package/apps/local-dashboard/dist/assets/index-4_dAY17K.js +0 -16
package/apps/local-dashboard/dist/assets/index-BxV5WZHc.css +0 -2

package/cli/selftune/ingestors/openclaw-ingest.ts CHANGED Viewed

@@ -34,6 +34,11 @@ import {
   SKILL_LOG,
   TELEMETRY_LOG,
 } from "../constants.js";
+import {
+  writeQueryToDb,
+  writeSessionTelemetryToDb,
+  writeSkillUsageToDb,
+} from "../localdb/direct-write.js";
 import {
   appendCanonicalRecords,
   buildCanonicalExecutionFact,
@@ -46,7 +51,7 @@ import {
   deriveSkillInvocationId,
 } from "../normalization.js";
 import type { CanonicalRecord, QueryLogRecord, SkillUsageRecord } from "../types.js";
-import { appendJsonl, loadMarker, saveMarker } from "../utils/jsonl.js";
+import { loadMarker, saveMarker } from "../utils/jsonl.js";
 export interface SessionFile {
   agentId: string;
@@ -389,11 +394,25 @@ export function writeSession(
       query: prompt,
       source: session.source,
     };
-    appendJsonl(queryLogPath, queryRecord, "all_queries");
+    writeQueryToDb(queryRecord);
   }
-  const { query: _q, ...telemetry } = session;
-  appendJsonl(telemetryLogPath, telemetry, "session_telemetry");
+  // Build a SessionTelemetryRecord-shaped object for SQLite
+  writeSessionTelemetryToDb({
+    timestamp: session.timestamp,
+    session_id: session.session_id,
+    cwd: session.cwd,
+    transcript_path: session.transcript_path,
+    tool_calls: session.tool_calls,
+    total_tool_calls: session.total_tool_calls,
+    bash_commands: session.bash_commands,
+    skills_triggered: session.skills_triggered,
+    assistant_turns: session.assistant_turns,
+    errors_encountered: session.errors_encountered,
+    transcript_chars: session.transcript_chars,
+    last_user_query: session.last_user_query,
+    source: session.source,
+  });
   for (const skillName of skills) {
     const skillRecord: SkillUsageRecord = {
@@ -405,7 +424,7 @@ export function writeSession(
       triggered: true,
       source: session.source,
     };
-    appendJsonl(skillLogPath, skillRecord, "skill_usage");
+    writeSkillUsageToDb(skillRecord);
   }
   // --- Canonical normalization records (additive) ---

package/cli/selftune/ingestors/opencode-ingest.ts CHANGED Viewed

@@ -27,6 +27,11 @@ import { basename, join } from "node:path";
 import { parseArgs } from "node:util";
 import { CANONICAL_LOG, QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "../constants.js";
+import {
+  writeQueryToDb,
+  writeSessionTelemetryToDb,
+  writeSkillUsageToDb,
+} from "../localdb/direct-write.js";
 import {
   appendCanonicalRecords,
   buildCanonicalExecutionFact,
@@ -44,7 +49,7 @@ import type {
   SessionTelemetryRecord,
   SkillUsageRecord,
 } from "../types.js";
-import { appendJsonl, loadMarker, saveMarker } from "../utils/jsonl.js";
+import { loadMarker, saveMarker } from "../utils/jsonl.js";
 const XDG_DATA_HOME = process.env.XDG_DATA_HOME ?? join(homedir(), ".local", "share");
 const DEFAULT_DATA_DIR = join(XDG_DATA_HOME, "opencode");
@@ -528,7 +533,7 @@ export function writeSession(
       query: prompt,
       source: session.source,
     };
-    appendJsonl(queryLogPath, queryRecord, "all_queries");
+    writeQueryToDb(queryRecord);
   }
   const telemetry: SessionTelemetryRecord = {
@@ -546,7 +551,7 @@ export function writeSession(
     last_user_query: session.last_user_query,
     source: session.source,
   };
-  appendJsonl(telemetryLogPath, telemetry, "session_telemetry");
+  writeSessionTelemetryToDb(telemetry);
   for (const skillName of skills) {
     const skillRecord: SkillUsageRecord = {
@@ -558,7 +563,7 @@ export function writeSession(
       triggered: true,
       source: session.source,
     };
-    appendJsonl(skillLogPath, skillRecord, "skill_usage");
+    writeSkillUsageToDb(skillRecord);
   }
   // --- Canonical normalization records (additive) ---

package/cli/selftune/localdb/queries.ts CHANGED Viewed

@@ -11,6 +11,7 @@ import type {
   OrchestrateRunReport,
   OverviewPayload,
   PendingProposal,
+  RecentActivityItem,
   SkillReportPayload,
   SkillSummary,
 } from "../dashboard-contract.js";
@@ -126,6 +127,10 @@ export function getOverviewPayload(db: Database): OverviewPayload {
   // Pending proposals: created/validated but no terminal action (deduped in SQL)
   const pending_proposals = getPendingProposals(db);
+  // Active sessions and recent activity
+  const active_sessions = getActiveSessionCount(db);
+  const recent_activity = getRecentActivity(db);
   return {
     telemetry,
     skills,
@@ -133,6 +138,8 @@ export function getOverviewPayload(db: Database): OverviewPayload {
     counts,
     unmatched_queries: unmatchedRows,
     pending_proposals,
+    active_sessions,
+    recent_activity,
   };
 }
@@ -361,6 +368,56 @@ export function getOrchestrateRuns(db: Database, limit = 20): OrchestrateRunRepo
   }));
 }
+/**
+ * Count sessions that have queries recorded but no session_telemetry yet
+ * (i.e., the session is still in progress).
+ */
+export function getActiveSessionCount(db: Database): number {
+  const row = db
+    .query(
+      `SELECT COUNT(DISTINCT q.session_id) as count
+       FROM queries q
+       WHERE NOT EXISTS (
+         SELECT 1 FROM session_telemetry st WHERE st.session_id = q.session_id
+       )`,
+    )
+    .get() as { count: number };
+  return row.count;
+}
+/**
+ * Get the most recent skill invocations with a flag indicating whether the
+ * session is still in progress (no session_telemetry row yet).
+ */
+export function getRecentActivity(db: Database, limit = 20): RecentActivityItem[] {
+  const rows = db
+    .query(
+      `SELECT si.occurred_at, si.session_id, si.skill_name, si.query, si.triggered,
+              CASE WHEN st.session_id IS NULL THEN 1 ELSE 0 END as is_live
+       FROM skill_invocations si
+       LEFT JOIN session_telemetry st ON si.session_id = st.session_id
+       ORDER BY si.occurred_at DESC
+       LIMIT ?`,
+    )
+    .all(limit) as Array<{
+    occurred_at: string;
+    session_id: string;
+    skill_name: string;
+    query: string;
+    triggered: number;
+    is_live: number;
+  }>;
+  return rows.map((row) => ({
+    timestamp: row.occurred_at,
+    session_id: row.session_id,
+    skill_name: row.skill_name,
+    query: row.query ?? "",
+    triggered: row.triggered === 1,
+    is_live: row.is_live === 1,
+  }));
+}
 // -- Generic read queries (Phase 3: replace readJsonl calls) ------------------
 /**

package/cli/selftune/monitoring/watch.ts CHANGED Viewed

@@ -26,7 +26,6 @@ import type {
   SessionTelemetryRecord,
   SkillUsageRecord,
 } from "../types.js";
-import { readJsonl } from "../utils/jsonl.js";
 import {
   filterActionableQueryRecords,
   filterActionableSkillUsageRecords,
@@ -212,27 +211,13 @@ export async function watch(options: WatchOptions): Promise<WatchResult> {
     );
   }
-  // 1. Read log files from SQLite (fall back to JSONL for custom paths)
-  let telemetry: SessionTelemetryRecord[];
-  let skillRecords: SkillUsageRecord[];
-  let queryRecords: QueryLogRecord[];
-  if (
-    _telemetryLogPath === TELEMETRY_LOG &&
-    _skillLogPath === SKILL_LOG &&
-    _queryLogPath === QUERY_LOG
-  ) {
-    const db = getDb();
-    telemetry = querySessionTelemetry(db) as SessionTelemetryRecord[];
-    // SQLite queries return DESC order; computeMonitoringSnapshot expects chronological (ASC)
-    telemetry.sort((a, b) => a.timestamp.localeCompare(b.timestamp));
-    skillRecords = querySkillUsageRecords(db) as SkillUsageRecord[];
-    queryRecords = queryQueryLog(db) as QueryLogRecord[];
-  } else {
-    // Intentional JSONL fallback: custom log path overrides bypass SQLite reads
-    telemetry = readJsonl<SessionTelemetryRecord>(_telemetryLogPath);
-    skillRecords = readJsonl<SkillUsageRecord>(_skillLogPath);
-    queryRecords = readJsonl<QueryLogRecord>(_queryLogPath);
-  }
+  // 1. Read log files from SQLite
+  const db = getDb();
+  const telemetry = querySessionTelemetry(db) as SessionTelemetryRecord[];
+  // SQLite queries return DESC order; computeMonitoringSnapshot expects chronological (ASC)
+  telemetry.sort((a, b) => a.timestamp.localeCompare(b.timestamp));
+  const skillRecords = querySkillUsageRecords(db) as SkillUsageRecord[];
+  const queryRecords = queryQueryLog(db) as QueryLogRecord[];
   // 2. Determine baseline pass rate from last deployed audit entry
   const lastDeployed = getLastDeployedProposal(skillName, _auditLogPath);

package/cli/selftune/normalization.ts CHANGED Viewed

@@ -14,7 +14,6 @@
 import { createHash } from "node:crypto";
 import {
-  appendFileSync,
   existsSync,
   mkdirSync,
   readFileSync,
@@ -388,32 +387,12 @@ export function getLatestPromptIdentity(
   };
 }
-export function appendCanonicalRecord(record: CanonicalRecord, logPath?: string): void {
+export function appendCanonicalRecord(record: CanonicalRecord, _logPath?: string): void {
   writeCanonicalToDb(record);
-  // JSONL append — best-effort backup for prompt state recovery
-  try {
-    const path = logPath ?? CANONICAL_LOG;
-    const dir = dirname(path);
-    if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
-    appendFileSync(path, `${JSON.stringify(record)}\n`, "utf-8");
-  } catch {
-    /* best-effort only */
-  }
 }
-export function appendCanonicalRecords(records: CanonicalRecord[], logPath?: string): void {
+export function appendCanonicalRecords(records: CanonicalRecord[], _logPath?: string): void {
   writeCanonicalBatchToDb(records);
-  // JSONL append — best-effort backup for prompt state recovery
-  try {
-    const path = logPath ?? CANONICAL_LOG;
-    const dir = dirname(path);
-    if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
-    for (const record of records) {
-      appendFileSync(path, `${JSON.stringify(record)}\n`, "utf-8");
-    }
-  } catch {
-    /* best-effort only */
-  }
 }
 // ---------------------------------------------------------------------------

package/cli/selftune/orchestrate.ts CHANGED Viewed

@@ -9,9 +9,9 @@
  * explicit dry-run and review-required modes for human-in-the-loop operation.
  */
-import { existsSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
+import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
 import { homedir } from "node:os";
-import { join } from "node:path";
+import { dirname, join } from "node:path";
 import { parseArgs } from "node:util";
 import { readAlphaIdentity } from "./alpha-identity.js";
@@ -19,9 +19,19 @@ import type { UploadCycleSummary } from "./alpha-upload/index.js";
 import { ORCHESTRATE_LOCK, SELFTUNE_CONFIG_PATH } from "./constants.js";
 import type { OrchestrateRunReport, OrchestrateRunSkillAction } from "./dashboard-contract.js";
 import type { EvolveResult } from "./evolution/evolve.js";
+import {
+  buildDefaultGradingOutputPath,
+  deriveExpectationsFromSkill,
+  gradeSession,
+  resolveLatestSessionForSkill,
+} from "./grading/grade-session.js";
 import { readGradingResultsForSkill } from "./grading/results.js";
 import { getDb } from "./localdb/db.js";
-import { updateSignalConsumed, writeOrchestrateRunToDb } from "./localdb/direct-write.js";
+import {
+  updateSignalConsumed,
+  writeGradingResultToDb,
+  writeOrchestrateRunToDb,
+} from "./localdb/direct-write.js";
 import {
   queryEvolutionAudit,
   queryImprovementSignals,
@@ -50,6 +60,7 @@ import {
   findRepositoryClaudeSkillDirs,
   findRepositorySkillDirs,
 } from "./utils/skill-discovery.js";
+import { readExcerpt } from "./utils/transcript.js";
 // ---------------------------------------------------------------------------
 // Lockfile management
@@ -156,6 +167,8 @@ export interface OrchestrateOptions {
   recentWindowHours: number;
   /** Force sync to rescan all sources. */
   syncForce: boolean;
+  /** Max ungraded skills to auto-grade per run (default: 5). Set 0 to disable. */
+  maxAutoGrade: number;
 }
 export interface SkillAction {
@@ -178,6 +191,7 @@ export interface OrchestrateResult {
     deployed: number;
     watched: number;
     skipped: number;
+    autoGraded: number;
     dryRun: boolean;
     approvalMode: "auto" | "review";
     elapsedMs: number;
@@ -335,6 +349,7 @@ export function formatOrchestrateReport(result: OrchestrateResult): string {
   // Final summary
   lines.push("Summary");
+  lines.push(`  Auto-graded:  ${result.summary.autoGraded}`);
   lines.push(`  Evaluated:    ${result.summary.evaluated} skills`);
   lines.push(`  Deployed:     ${result.summary.deployed}`);
   lines.push(`  Watched:      ${result.summary.watched}`);
@@ -620,6 +635,111 @@ function findRecentlyDeployedSkills(
   return names;
 }
+// ---------------------------------------------------------------------------
+// Auto-grade ungraded skills
+// ---------------------------------------------------------------------------
+/**
+ * Auto-grade the top ungraded skills that have some session data.
+ * Fail-open: individual grading errors are logged but never propagated.
+ *
+ * @returns Number of skills successfully graded.
+ */
+export async function autoGradeTopUngraded(
+  skills: SkillStatus[],
+  maxAutoGrade: number,
+  agent: string,
+  deps: {
+    readTelemetry: () => SessionTelemetryRecord[];
+    readSkillRecords: () => SkillUsageRecord[];
+  },
+): Promise<number> {
+  // Filter: UNGRADED skills with some data (skill_checks > 0)
+  const ungradedWithData = skills
+    .filter((s) => s.status === "UNGRADED" && (s.snapshot?.skill_checks ?? 0) > 0)
+    .sort((a, b) => (b.snapshot?.skill_checks ?? 0) - (a.snapshot?.skill_checks ?? 0))
+    .slice(0, maxAutoGrade);
+  if (ungradedWithData.length === 0) return 0;
+  let graded = 0;
+  for (const skill of ungradedWithData) {
+    try {
+      const telemetry = deps.readTelemetry();
+      const skillUsage = deps.readSkillRecords();
+      // Resolve the latest session for this skill
+      const resolved = resolveLatestSessionForSkill(telemetry, skillUsage, skill.name);
+      if (!resolved) {
+        console.error(`  [auto-grade] ${skill.name}: no session found, skipping`);
+        continue;
+      }
+      // Derive expectations from SKILL.md
+      const derived = deriveExpectationsFromSkill(skill.name);
+      let transcriptExcerpt = "(no transcript)";
+      if (resolved.transcriptPath) {
+        try {
+          transcriptExcerpt = readExcerpt(resolved.transcriptPath);
+        } catch {
+          transcriptExcerpt = "(no transcript)";
+        }
+      }
+      console.error(`  [auto-grade] Grading "${skill.name}" (session ${resolved.sessionId})...`);
+      const result = await gradeSession({
+        expectations: derived.expectations,
+        telemetry: resolved.telemetry,
+        sessionId: resolved.sessionId,
+        skillName: skill.name,
+        transcriptExcerpt,
+        transcriptPath: resolved.transcriptPath,
+        agent,
+      });
+      // Persist to SQLite — only count as graded if DB write succeeds
+      let persisted = false;
+      try {
+        persisted = writeGradingResultToDb(result);
+      } catch {
+        persisted = false;
+      }
+      if (!persisted) {
+        console.error(`  [auto-grade] ${skill.name}: graded but failed to persist result`);
+        continue;
+      }
+      // Persist to file (fail-open, supplementary)
+      try {
+        const basePath = buildDefaultGradingOutputPath(resolved.sessionId);
+        const safeName = skill.name.replace(/[^a-zA-Z0-9_-]/g, "_");
+        const outputPath = basePath.replace(/\.json$/, `_${safeName}.json`);
+        const outputDir = dirname(outputPath);
+        mkdirSync(outputDir, { recursive: true });
+        writeFileSync(outputPath, JSON.stringify(result, null, 2), "utf-8");
+      } catch {
+        // fail-open: DB is authoritative, file is supplementary
+      }
+      const passRate = result.summary.pass_rate;
+      console.error(
+        `  [auto-grade] ${skill.name}: ${result.summary.passed}/${result.summary.total} passed (${Math.round(passRate * 100)}%)`,
+      );
+      graded++;
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      console.error(
+        `  [auto-grade] ${skill.name}: error — ${msg}. Retry with: selftune grade ${skill.name}`,
+      );
+      // fail-open: continue to next skill
+    }
+  }
+  return graded;
+}
 // ---------------------------------------------------------------------------
 // Main orchestrator
 // ---------------------------------------------------------------------------
@@ -665,6 +785,7 @@ export async function orchestrate(
         deployed: 0,
         watched: 0,
         skipped: 0,
+        autoGraded: 0,
         dryRun: options.dryRun,
         approvalMode: options.approvalMode,
         elapsedMs: 0,
@@ -732,7 +853,7 @@ export async function orchestrate(
     const auditEntries = _readAuditEntries();
     const doctorResult = await _doctor();
-    const statusResult = _computeStatus(
+    let statusResult = _computeStatus(
       telemetry,
       skillRecords,
       queryRecords,
@@ -743,6 +864,61 @@ export async function orchestrate(
       `[orchestrate] Status: ${statusResult.skills.length} skills, system=${statusResult.system.healthy ? "healthy" : "unhealthy"}`,
     );
+    // -------------------------------------------------------------------------
+    // Step 2a: Auto-grade ungraded skills with sufficient data
+    // -------------------------------------------------------------------------
+    let autoGradedCount = 0;
+    const scopedSkills = options.skillFilter
+      ? statusResult.skills.filter((s) => s.name === options.skillFilter)
+      : statusResult.skills;
+    const ungradedWithData = scopedSkills.filter(
+      (s) => s.status === "UNGRADED" && (s.snapshot?.skill_checks ?? 0) > 0,
+    );
+    if (!options.dryRun && options.maxAutoGrade > 0 && ungradedWithData.length > 0) {
+      const gradeAgent = _detectAgent();
+      if (gradeAgent) {
+        console.error(
+          `[orchestrate] Auto-grading ${Math.min(ungradedWithData.length, options.maxAutoGrade)} ungraded skill(s)...`,
+        );
+        autoGradedCount = await autoGradeTopUngraded(
+          scopedSkills,
+          options.maxAutoGrade,
+          gradeAgent,
+          { readTelemetry: _readTelemetry, readSkillRecords: _readSkillRecords },
+        );
+        if (autoGradedCount > 0) {
+          // Recompute status so candidate selection sees updated grades
+          console.error(
+            `[orchestrate] Recomputing status after grading ${autoGradedCount} skill(s)...`,
+          );
+          try {
+            const freshTelemetry = _readTelemetry();
+            const freshSkillRecords = _readSkillRecords();
+            const freshQueryRecords = _readQueryRecords();
+            const freshAudit = _readAuditEntries();
+            const freshDoctor = doctorResult; // reuse — environment unchanged during grading
+            statusResult = _computeStatus(
+              freshTelemetry,
+              freshSkillRecords,
+              freshQueryRecords,
+              freshAudit,
+              freshDoctor,
+            );
+          } catch (recomputeErr) {
+            console.error(
+              `[orchestrate] Warning: failed to recompute status after grading — using pre-grade status. ${recomputeErr instanceof Error ? recomputeErr.message : String(recomputeErr)}`,
+            );
+          }
+        }
+      } else {
+        console.error(
+          "[orchestrate] No agent CLI found — skipping auto-grade. To disable, rerun with: selftune orchestrate --max-auto-grade 0",
+        );
+      }
+    }
     // -------------------------------------------------------------------------
     // Step 2b: Read pending improvement signals
     // -------------------------------------------------------------------------
@@ -919,6 +1095,7 @@ export async function orchestrate(
       deployed: candidates.filter((c) => c.evolveResult?.deployed).length,
       watched: candidates.filter((c) => c.action === "watch").length,
       skipped: candidates.filter((c) => c.action === "skip").length,
+      autoGraded: autoGradedCount,
     };
     const result: OrchestrateResult = {
@@ -956,6 +1133,7 @@ export async function orchestrate(
       deployed: finalTotals.deployed,
       watched: finalTotals.watched,
       skipped: finalTotals.skipped,
+      auto_graded: finalTotals.autoGraded,
       skill_actions: candidates.map(
         (c): OrchestrateRunSkillAction => ({
           skill: c.skill,
@@ -1023,6 +1201,7 @@ export async function cliMain(): Promise<void> {
       "max-skills": { type: "string", default: "5" },
       "recent-window": { type: "string", default: "48" },
       "sync-force": { type: "boolean", default: false },
+      "max-auto-grade": { type: "string", default: "5" },
       loop: { type: "boolean", default: false },
       "loop-interval": { type: "string", default: "3600" },
       help: { type: "boolean", short: "h", default: false },
@@ -1033,7 +1212,7 @@ export async function cliMain(): Promise<void> {
   if (values.help) {
     console.log(`selftune orchestrate — Autonomous core loop
-Runs the full improvement cycle: sync → status → evolve → watch.
+Runs the full improvement cycle: sync → status → auto-grade → evolve → watch.
 Usage:
   selftune orchestrate [options]
@@ -1046,6 +1225,7 @@ Options:
   --max-skills <n>      Cap skills processed per run (default: 5)
   --recent-window <hrs> Hours to look back for watch targets (default: 48)
   --sync-force          Force full rescan during sync
+  --max-auto-grade <n>  Max ungraded skills to auto-grade per run (default: 5, 0 to disable)
   --loop                Run in continuous loop mode (never stops)
   --loop-interval <s>   Seconds between iterations (default: 3600, min: 60)
   -h, --help            Show this help message
@@ -1067,23 +1247,41 @@ Examples:
     process.exit(0);
   }
-  const maxSkills = Number.parseInt(values["max-skills"] ?? "5", 10);
-  if (Number.isNaN(maxSkills) || maxSkills < 1) {
-    console.error("[ERROR] --max-skills must be a positive integer");
+  const maxSkillsRaw = values["max-skills"] ?? "5";
+  if (!/^\d+$/.test(maxSkillsRaw) || Number(maxSkillsRaw) < 1) {
+    console.error(
+      "[ERROR] --max-skills must be a positive integer. Retry with: selftune orchestrate --max-skills 5",
+    );
+    process.exit(1);
+  }
+  const maxSkills = Number(maxSkillsRaw);
+  const recentWindowRaw = values["recent-window"] ?? "48";
+  if (!/^\d+$/.test(recentWindowRaw) || Number(recentWindowRaw) < 1) {
+    console.error(
+      "[ERROR] --recent-window must be a positive integer. Retry with: selftune orchestrate --recent-window 48",
+    );
     process.exit(1);
   }
+  const recentWindow = Number(recentWindowRaw);
-  const recentWindow = Number.parseInt(values["recent-window"] ?? "48", 10);
-  if (Number.isNaN(recentWindow) || recentWindow < 1) {
-    console.error("[ERROR] --recent-window must be a positive integer");
+  const maxAutoGradeRaw = values["max-auto-grade"] ?? "5";
+  if (!/^\d+$/.test(maxAutoGradeRaw)) {
+    console.error(
+      "[ERROR] --max-auto-grade must be a non-negative integer. Retry with: selftune orchestrate --max-auto-grade 5",
+    );
     process.exit(1);
   }
+  const maxAutoGrade = Number(maxAutoGradeRaw);
-  const loopInterval = Number.parseInt(values["loop-interval"] ?? "3600", 10);
-  if (values.loop && (Number.isNaN(loopInterval) || loopInterval < 60)) {
-    console.error("[ERROR] --loop-interval must be an integer >= 60 (seconds)");
+  const loopIntervalRaw = values["loop-interval"] ?? "3600";
+  if (!/^\d+$/.test(loopIntervalRaw) || (values.loop && Number(loopIntervalRaw) < 60)) {
+    console.error(
+      "[ERROR] --loop-interval must be an integer >= 60 (seconds). Retry with: selftune orchestrate --loop --loop-interval 3600",
+    );
     process.exit(1);
   }
+  const loopInterval = Number(loopIntervalRaw);
   const autoApprove = values["auto-approve"] ?? false;
   if (autoApprove) {
@@ -1132,6 +1330,7 @@ Examples:
       maxSkills,
       recentWindowHours: recentWindow,
       syncForce: values["sync-force"] ?? false,
+      maxAutoGrade,
     });
     // JSON output: include per-skill decisions for machine consumption