npm - selftune - Versions diffs - 0.2.13 → 0.2.15 - Mend

selftune 0.2.13 → 0.2.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +2 -0
package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +16 -0
package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +12 -0
package/apps/local-dashboard/dist/index.html +3 -3
package/cli/selftune/activation-rules.ts +24 -48
package/cli/selftune/analytics.ts +13 -11
package/cli/selftune/badge/badge.ts +13 -9
package/cli/selftune/canonical-export.ts +6 -6
package/cli/selftune/constants.ts +7 -0
package/cli/selftune/contribute/bundle.ts +9 -44
package/cli/selftune/contribute/contribute.ts +2 -1
package/cli/selftune/cron/setup.ts +3 -1
package/cli/selftune/dashboard-contract.ts +22 -0
package/cli/selftune/dashboard.ts +10 -5
package/cli/selftune/eval/baseline.ts +20 -30
package/cli/selftune/eval/hooks-to-evals.ts +27 -34
package/cli/selftune/eval/import-skillsbench.ts +21 -8
package/cli/selftune/eval/unit-test-cli.ts +22 -11
package/cli/selftune/evolution/description-quality.ts +224 -0
package/cli/selftune/evolution/evolve-body.ts +17 -10
package/cli/selftune/evolution/evolve.ts +70 -57
package/cli/selftune/evolution/rollback.ts +7 -6
package/cli/selftune/grading/auto-grade.ts +27 -35
package/cli/selftune/grading/grade-session.ts +24 -30
package/cli/selftune/hooks/auto-activate.ts +12 -3
package/cli/selftune/hooks/evolution-guard.ts +14 -24
package/cli/selftune/hooks/prompt-log.ts +7 -9
package/cli/selftune/hooks/session-stop.ts +0 -8
package/cli/selftune/index.ts +66 -69
package/cli/selftune/ingestors/claude-replay.ts +29 -14
package/cli/selftune/ingestors/codex-rollout.ts +15 -5
package/cli/selftune/ingestors/codex-wrapper.ts +15 -13
package/cli/selftune/ingestors/openclaw-ingest.ts +24 -5
package/cli/selftune/ingestors/opencode-ingest.ts +9 -4
package/cli/selftune/init.ts +14 -9
package/cli/selftune/localdb/queries.ts +57 -0
package/cli/selftune/monitoring/watch.ts +39 -38
package/cli/selftune/normalization.ts +2 -23
package/cli/selftune/orchestrate.ts +224 -24
package/cli/selftune/routes/skill-report.ts +17 -0
package/cli/selftune/schedule.ts +74 -14
package/cli/selftune/sync.ts +7 -3
package/cli/selftune/types.ts +44 -10
package/cli/selftune/utils/cli-error.ts +102 -0
package/cli/selftune/utils/jsonl.ts +2 -0
package/cli/selftune/workflows/workflows.ts +23 -17
package/package.json +3 -1
package/packages/ui/src/components/RecentActivityFeed.tsx +86 -0
package/packages/ui/src/components/index.ts +1 -0
package/packages/ui/src/components/section-cards.tsx +13 -0
package/skill/SKILL.md +1 -1
package/skill/Workflows/Evolve.md +4 -0
package/skill/Workflows/Initialize.md +8 -8
package/skill/Workflows/Orchestrate.md +11 -7
package/skill/Workflows/Schedule.md +11 -0
package/skill/references/logs.md +22 -21
package/skill/settings_snippet.json +29 -6
package/apps/local-dashboard/dist/assets/index-4_dAY17K.js +0 -16
package/apps/local-dashboard/dist/assets/index-BxV5WZHc.css +0 -2
package/apps/local-dashboard/dist/assets/vendor-ui-7xD7fNEU.js +0 -12

package/cli/selftune/localdb/queries.ts CHANGED Viewed

@@ -11,6 +11,7 @@ import type {
   OrchestrateRunReport,
   OverviewPayload,
   PendingProposal,
+  RecentActivityItem,
   SkillReportPayload,
   SkillSummary,
 } from "../dashboard-contract.js";
@@ -126,6 +127,10 @@ export function getOverviewPayload(db: Database): OverviewPayload {
   // Pending proposals: created/validated but no terminal action (deduped in SQL)
   const pending_proposals = getPendingProposals(db);
+  // Active sessions and recent activity
+  const active_sessions = getActiveSessionCount(db);
+  const recent_activity = getRecentActivity(db);
   return {
     telemetry,
     skills,
@@ -133,6 +138,8 @@ export function getOverviewPayload(db: Database): OverviewPayload {
     counts,
     unmatched_queries: unmatchedRows,
     pending_proposals,
+    active_sessions,
+    recent_activity,
   };
 }
@@ -361,6 +368,56 @@ export function getOrchestrateRuns(db: Database, limit = 20): OrchestrateRunRepo
   }));
 }
+/**
+ * Count sessions that have queries recorded but no session_telemetry yet
+ * (i.e., the session is still in progress).
+ */
+export function getActiveSessionCount(db: Database): number {
+  const row = db
+    .query(
+      `SELECT COUNT(DISTINCT q.session_id) as count
+       FROM queries q
+       WHERE NOT EXISTS (
+         SELECT 1 FROM session_telemetry st WHERE st.session_id = q.session_id
+       )`,
+    )
+    .get() as { count: number };
+  return row.count;
+}
+/**
+ * Get the most recent skill invocations with a flag indicating whether the
+ * session is still in progress (no session_telemetry row yet).
+ */
+export function getRecentActivity(db: Database, limit = 20): RecentActivityItem[] {
+  const rows = db
+    .query(
+      `SELECT si.occurred_at, si.session_id, si.skill_name, si.query, si.triggered,
+              CASE WHEN st.session_id IS NULL THEN 1 ELSE 0 END as is_live
+       FROM skill_invocations si
+       LEFT JOIN session_telemetry st ON si.session_id = st.session_id
+       ORDER BY si.occurred_at DESC
+       LIMIT ?`,
+    )
+    .all(limit) as Array<{
+    occurred_at: string;
+    session_id: string;
+    skill_name: string;
+    query: string;
+    triggered: number;
+    is_live: number;
+  }>;
+  return rows.map((row) => ({
+    timestamp: row.occurred_at,
+    session_id: row.session_id,
+    skill_name: row.skill_name,
+    query: row.query ?? "",
+    triggered: row.triggered === 1,
+    is_live: row.is_live === 1,
+  }));
+}
 // -- Generic read queries (Phase 3: replace readJsonl calls) ------------------
 /**

package/cli/selftune/monitoring/watch.ts CHANGED Viewed

@@ -26,7 +26,7 @@ import type {
   SessionTelemetryRecord,
   SkillUsageRecord,
 } from "../types.js";
-import { readJsonl } from "../utils/jsonl.js";
+import { CLIError, handleCLIError } from "../utils/cli-error.js";
 import {
   filterActionableQueryRecords,
   filterActionableSkillUsageRecords,
@@ -212,27 +212,13 @@ export async function watch(options: WatchOptions): Promise<WatchResult> {
     );
   }
-  // 1. Read log files from SQLite (fall back to JSONL for custom paths)
-  let telemetry: SessionTelemetryRecord[];
-  let skillRecords: SkillUsageRecord[];
-  let queryRecords: QueryLogRecord[];
-  if (
-    _telemetryLogPath === TELEMETRY_LOG &&
-    _skillLogPath === SKILL_LOG &&
-    _queryLogPath === QUERY_LOG
-  ) {
-    const db = getDb();
-    telemetry = querySessionTelemetry(db) as SessionTelemetryRecord[];
-    // SQLite queries return DESC order; computeMonitoringSnapshot expects chronological (ASC)
-    telemetry.sort((a, b) => a.timestamp.localeCompare(b.timestamp));
-    skillRecords = querySkillUsageRecords(db) as SkillUsageRecord[];
-    queryRecords = queryQueryLog(db) as QueryLogRecord[];
-  } else {
-    // Intentional JSONL fallback: custom log path overrides bypass SQLite reads
-    telemetry = readJsonl<SessionTelemetryRecord>(_telemetryLogPath);
-    skillRecords = readJsonl<SkillUsageRecord>(_skillLogPath);
-    queryRecords = readJsonl<QueryLogRecord>(_queryLogPath);
-  }
+  // 1. Read log files from SQLite
+  const db = getDb();
+  const telemetry = querySessionTelemetry(db) as SessionTelemetryRecord[];
+  // SQLite queries return DESC order; computeMonitoringSnapshot expects chronological (ASC)
+  telemetry.sort((a, b) => a.timestamp.localeCompare(b.timestamp));
+  const skillRecords = querySkillUsageRecords(db) as SkillUsageRecord[];
+  const queryRecords = queryQueryLog(db) as QueryLogRecord[];
   // 2. Determine baseline pass rate from last deployed audit entry
   const lastDeployed = getLastDeployedProposal(skillName, _auditLogPath);
@@ -369,34 +355,52 @@ Options:
   }
   if (!values.skill || !values["skill-path"]) {
-    console.error("[ERROR] --skill and --skill-path are required");
-    process.exit(1);
+    throw new CLIError(
+      "--skill and --skill-path are required.",
+      "MISSING_FLAG",
+      "Usage: selftune watch --skill <name> --skill-path <path>",
+    );
   }
   if ((values["sync-force"] ?? false) && !(values["sync-first"] ?? false)) {
-    console.error("[ERROR] --sync-force requires --sync-first");
-    process.exit(1);
+    throw new CLIError(
+      "--sync-force requires --sync-first.",
+      "INVALID_FLAG",
+      "Add --sync-first when using --sync-force.",
+    );
   }
   const rawWindow = values.window ?? "20";
   if (!/^\d+$/.test(rawWindow)) {
-    console.error("[ERROR] --window must be a positive integer >= 1");
-    process.exit(1);
+    throw new CLIError(
+      "--window must be a positive integer >= 1.",
+      "INVALID_FLAG",
+      "selftune watch --window 20",
+    );
   }
   const windowSessions = Number.parseInt(rawWindow, 10);
   if (windowSessions < 1) {
-    console.error("[ERROR] --window must be a positive integer >= 1");
-    process.exit(1);
+    throw new CLIError(
+      "--window must be a positive integer >= 1.",
+      "INVALID_FLAG",
+      "selftune watch --window 20",
+    );
   }
   const rawThreshold = values.threshold ?? "0.1";
   if (!/^\d+(\.\d+)?$/.test(rawThreshold)) {
-    console.error("[ERROR] --threshold must be a finite number between 0 and 1");
-    process.exit(1);
+    throw new CLIError(
+      "--threshold must be a finite number between 0 and 1.",
+      "INVALID_FLAG",
+      "selftune watch --threshold 0.1",
+    );
   }
   const regressionThreshold = Number.parseFloat(rawThreshold);
   if (regressionThreshold < 0 || regressionThreshold > 1) {
-    console.error("[ERROR] --threshold must be a finite number between 0 and 1");
-    process.exit(1);
+    throw new CLIError(
+      "--threshold must be a finite number between 0 and 1.",
+      "INVALID_FLAG",
+      "selftune watch --threshold 0.1",
+    );
   }
   const result = await watch({
@@ -414,8 +418,5 @@ Options:
 }
 if (import.meta.main) {
-  cliMain().catch((err) => {
-    console.error(`[FATAL] ${err}`);
-    process.exit(1);
-  });
+  cliMain().catch(handleCLIError);
 }

package/cli/selftune/normalization.ts CHANGED Viewed

@@ -14,7 +14,6 @@
 import { createHash } from "node:crypto";
 import {
-  appendFileSync,
   existsSync,
   mkdirSync,
   readFileSync,
@@ -388,32 +387,12 @@ export function getLatestPromptIdentity(
   };
 }
-export function appendCanonicalRecord(record: CanonicalRecord, logPath?: string): void {
+export function appendCanonicalRecord(record: CanonicalRecord, _logPath?: string): void {
   writeCanonicalToDb(record);
-  // JSONL append — best-effort backup for prompt state recovery
-  try {
-    const path = logPath ?? CANONICAL_LOG;
-    const dir = dirname(path);
-    if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
-    appendFileSync(path, `${JSON.stringify(record)}\n`, "utf-8");
-  } catch {
-    /* best-effort only */
-  }
 }
-export function appendCanonicalRecords(records: CanonicalRecord[], logPath?: string): void {
+export function appendCanonicalRecords(records: CanonicalRecord[], _logPath?: string): void {
   writeCanonicalBatchToDb(records);
-  // JSONL append — best-effort backup for prompt state recovery
-  try {
-    const path = logPath ?? CANONICAL_LOG;
-    const dir = dirname(path);
-    if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
-    for (const record of records) {
-      appendFileSync(path, `${JSON.stringify(record)}\n`, "utf-8");
-    }
-  } catch {
-    /* best-effort only */
-  }
 }
 // ---------------------------------------------------------------------------

package/cli/selftune/orchestrate.ts CHANGED Viewed

@@ -9,9 +9,9 @@
  * explicit dry-run and review-required modes for human-in-the-loop operation.
  */
-import { existsSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
+import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
 import { homedir } from "node:os";
-import { join } from "node:path";
+import { dirname, join } from "node:path";
 import { parseArgs } from "node:util";
 import { readAlphaIdentity } from "./alpha-identity.js";
@@ -19,9 +19,19 @@ import type { UploadCycleSummary } from "./alpha-upload/index.js";
 import { ORCHESTRATE_LOCK, SELFTUNE_CONFIG_PATH } from "./constants.js";
 import type { OrchestrateRunReport, OrchestrateRunSkillAction } from "./dashboard-contract.js";
 import type { EvolveResult } from "./evolution/evolve.js";
+import {
+  buildDefaultGradingOutputPath,
+  deriveExpectationsFromSkill,
+  gradeSession,
+  resolveLatestSessionForSkill,
+} from "./grading/grade-session.js";
 import { readGradingResultsForSkill } from "./grading/results.js";
 import { getDb } from "./localdb/db.js";
-import { updateSignalConsumed, writeOrchestrateRunToDb } from "./localdb/direct-write.js";
+import {
+  updateSignalConsumed,
+  writeGradingResultToDb,
+  writeOrchestrateRunToDb,
+} from "./localdb/direct-write.js";
 import {
   queryEvolutionAudit,
   queryImprovementSignals,
@@ -43,6 +53,7 @@ import type {
   SessionTelemetryRecord,
   SkillUsageRecord,
 } from "./types.js";
+import { CLIError, handleCLIError } from "./utils/cli-error.js";
 import { detectAgent } from "./utils/llm-call.js";
 import { getSelftuneVersion, readConfiguredAgentType } from "./utils/selftune-meta.js";
 import {
@@ -50,6 +61,7 @@ import {
   findRepositoryClaudeSkillDirs,
   findRepositorySkillDirs,
 } from "./utils/skill-discovery.js";
+import { readExcerpt } from "./utils/transcript.js";
 // ---------------------------------------------------------------------------
 // Lockfile management
@@ -156,6 +168,8 @@ export interface OrchestrateOptions {
   recentWindowHours: number;
   /** Force sync to rescan all sources. */
   syncForce: boolean;
+  /** Max ungraded skills to auto-grade per run (default: 5). Set 0 to disable. */
+  maxAutoGrade: number;
 }
 export interface SkillAction {
@@ -178,6 +192,7 @@ export interface OrchestrateResult {
     deployed: number;
     watched: number;
     skipped: number;
+    autoGraded: number;
     dryRun: boolean;
     approvalMode: "auto" | "review";
     elapsedMs: number;
@@ -335,6 +350,7 @@ export function formatOrchestrateReport(result: OrchestrateResult): string {
   // Final summary
   lines.push("Summary");
+  lines.push(`  Auto-graded:  ${result.summary.autoGraded}`);
   lines.push(`  Evaluated:    ${result.summary.evaluated} skills`);
   lines.push(`  Deployed:     ${result.summary.deployed}`);
   lines.push(`  Watched:      ${result.summary.watched}`);
@@ -620,6 +636,111 @@ function findRecentlyDeployedSkills(
   return names;
 }
+// ---------------------------------------------------------------------------
+// Auto-grade ungraded skills
+// ---------------------------------------------------------------------------
+/**
+ * Auto-grade the top ungraded skills that have some session data.
+ * Fail-open: individual grading errors are logged but never propagated.
+ *
+ * @returns Number of skills successfully graded.
+ */
+export async function autoGradeTopUngraded(
+  skills: SkillStatus[],
+  maxAutoGrade: number,
+  agent: string,
+  deps: {
+    readTelemetry: () => SessionTelemetryRecord[];
+    readSkillRecords: () => SkillUsageRecord[];
+  },
+): Promise<number> {
+  // Filter: UNGRADED skills with some data (skill_checks > 0)
+  const ungradedWithData = skills
+    .filter((s) => s.status === "UNGRADED" && (s.snapshot?.skill_checks ?? 0) > 0)
+    .sort((a, b) => (b.snapshot?.skill_checks ?? 0) - (a.snapshot?.skill_checks ?? 0))
+    .slice(0, maxAutoGrade);
+  if (ungradedWithData.length === 0) return 0;
+  let graded = 0;
+  for (const skill of ungradedWithData) {
+    try {
+      const telemetry = deps.readTelemetry();
+      const skillUsage = deps.readSkillRecords();
+      // Resolve the latest session for this skill
+      const resolved = resolveLatestSessionForSkill(telemetry, skillUsage, skill.name);
+      if (!resolved) {
+        console.error(`  [auto-grade] ${skill.name}: no session found, skipping`);
+        continue;
+      }
+      // Derive expectations from SKILL.md
+      const derived = deriveExpectationsFromSkill(skill.name);
+      let transcriptExcerpt = "(no transcript)";
+      if (resolved.transcriptPath) {
+        try {
+          transcriptExcerpt = readExcerpt(resolved.transcriptPath);
+        } catch {
+          transcriptExcerpt = "(no transcript)";
+        }
+      }
+      console.error(`  [auto-grade] Grading "${skill.name}" (session ${resolved.sessionId})...`);
+      const result = await gradeSession({
+        expectations: derived.expectations,
+        telemetry: resolved.telemetry,
+        sessionId: resolved.sessionId,
+        skillName: skill.name,
+        transcriptExcerpt,
+        transcriptPath: resolved.transcriptPath,
+        agent,
+      });
+      // Persist to SQLite — only count as graded if DB write succeeds
+      let persisted = false;
+      try {
+        persisted = writeGradingResultToDb(result);
+      } catch {
+        persisted = false;
+      }
+      if (!persisted) {
+        console.error(`  [auto-grade] ${skill.name}: graded but failed to persist result`);
+        continue;
+      }
+      // Persist to file (fail-open, supplementary)
+      try {
+        const basePath = buildDefaultGradingOutputPath(resolved.sessionId);
+        const safeName = skill.name.replace(/[^a-zA-Z0-9_-]/g, "_");
+        const outputPath = basePath.replace(/\.json$/, `_${safeName}.json`);
+        const outputDir = dirname(outputPath);
+        mkdirSync(outputDir, { recursive: true });
+        writeFileSync(outputPath, JSON.stringify(result, null, 2), "utf-8");
+      } catch {
+        // fail-open: DB is authoritative, file is supplementary
+      }
+      const passRate = result.summary.pass_rate;
+      console.error(
+        `  [auto-grade] ${skill.name}: ${result.summary.passed}/${result.summary.total} passed (${Math.round(passRate * 100)}%)`,
+      );
+      graded++;
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      console.error(
+        `  [auto-grade] ${skill.name}: error — ${msg}. Retry with: selftune grade ${skill.name}`,
+      );
+      // fail-open: continue to next skill
+    }
+  }
+  return graded;
+}
 // ---------------------------------------------------------------------------
 // Main orchestrator
 // ---------------------------------------------------------------------------
@@ -665,6 +786,7 @@ export async function orchestrate(
         deployed: 0,
         watched: 0,
         skipped: 0,
+        autoGraded: 0,
         dryRun: options.dryRun,
         approvalMode: options.approvalMode,
         elapsedMs: 0,
@@ -732,7 +854,7 @@ export async function orchestrate(
     const auditEntries = _readAuditEntries();
     const doctorResult = await _doctor();
-    const statusResult = _computeStatus(
+    let statusResult = _computeStatus(
       telemetry,
       skillRecords,
       queryRecords,
@@ -743,6 +865,61 @@ export async function orchestrate(
       `[orchestrate] Status: ${statusResult.skills.length} skills, system=${statusResult.system.healthy ? "healthy" : "unhealthy"}`,
     );
+    // -------------------------------------------------------------------------
+    // Step 2a: Auto-grade ungraded skills with sufficient data
+    // -------------------------------------------------------------------------
+    let autoGradedCount = 0;
+    const scopedSkills = options.skillFilter
+      ? statusResult.skills.filter((s) => s.name === options.skillFilter)
+      : statusResult.skills;
+    const ungradedWithData = scopedSkills.filter(
+      (s) => s.status === "UNGRADED" && (s.snapshot?.skill_checks ?? 0) > 0,
+    );
+    if (!options.dryRun && options.maxAutoGrade > 0 && ungradedWithData.length > 0) {
+      const gradeAgent = _detectAgent();
+      if (gradeAgent) {
+        console.error(
+          `[orchestrate] Auto-grading ${Math.min(ungradedWithData.length, options.maxAutoGrade)} ungraded skill(s)...`,
+        );
+        autoGradedCount = await autoGradeTopUngraded(
+          scopedSkills,
+          options.maxAutoGrade,
+          gradeAgent,
+          { readTelemetry: _readTelemetry, readSkillRecords: _readSkillRecords },
+        );
+        if (autoGradedCount > 0) {
+          // Recompute status so candidate selection sees updated grades
+          console.error(
+            `[orchestrate] Recomputing status after grading ${autoGradedCount} skill(s)...`,
+          );
+          try {
+            const freshTelemetry = _readTelemetry();
+            const freshSkillRecords = _readSkillRecords();
+            const freshQueryRecords = _readQueryRecords();
+            const freshAudit = _readAuditEntries();
+            const freshDoctor = doctorResult; // reuse — environment unchanged during grading
+            statusResult = _computeStatus(
+              freshTelemetry,
+              freshSkillRecords,
+              freshQueryRecords,
+              freshAudit,
+              freshDoctor,
+            );
+          } catch (recomputeErr) {
+            console.error(
+              `[orchestrate] Warning: failed to recompute status after grading — using pre-grade status. ${recomputeErr instanceof Error ? recomputeErr.message : String(recomputeErr)}`,
+            );
+          }
+        }
+      } else {
+        console.error(
+          "[orchestrate] No agent CLI found — skipping auto-grade. To disable, rerun with: selftune orchestrate --max-auto-grade 0",
+        );
+      }
+    }
     // -------------------------------------------------------------------------
     // Step 2b: Read pending improvement signals
     // -------------------------------------------------------------------------
@@ -919,6 +1096,7 @@ export async function orchestrate(
       deployed: candidates.filter((c) => c.evolveResult?.deployed).length,
       watched: candidates.filter((c) => c.action === "watch").length,
       skipped: candidates.filter((c) => c.action === "skip").length,
+      autoGraded: autoGradedCount,
     };
     const result: OrchestrateResult = {
@@ -956,6 +1134,7 @@ export async function orchestrate(
       deployed: finalTotals.deployed,
       watched: finalTotals.watched,
       skipped: finalTotals.skipped,
+      auto_graded: finalTotals.autoGraded,
       skill_actions: candidates.map(
         (c): OrchestrateRunSkillAction => ({
           skill: c.skill,
@@ -1023,6 +1202,7 @@ export async function cliMain(): Promise<void> {
       "max-skills": { type: "string", default: "5" },
       "recent-window": { type: "string", default: "48" },
       "sync-force": { type: "boolean", default: false },
+      "max-auto-grade": { type: "string", default: "5" },
       loop: { type: "boolean", default: false },
       "loop-interval": { type: "string", default: "3600" },
       help: { type: "boolean", short: "h", default: false },
@@ -1033,7 +1213,7 @@ export async function cliMain(): Promise<void> {
   if (values.help) {
     console.log(`selftune orchestrate — Autonomous core loop
-Runs the full improvement cycle: sync → status → evolve → watch.
+Runs the full improvement cycle: sync → status → auto-grade → evolve → watch.
 Usage:
   selftune orchestrate [options]
@@ -1046,6 +1226,7 @@ Options:
   --max-skills <n>      Cap skills processed per run (default: 5)
   --recent-window <hrs> Hours to look back for watch targets (default: 48)
   --sync-force          Force full rescan during sync
+  --max-auto-grade <n>  Max ungraded skills to auto-grade per run (default: 5, 0 to disable)
   --loop                Run in continuous loop mode (never stops)
   --loop-interval <s>   Seconds between iterations (default: 3600, min: 60)
   -h, --help            Show this help message
@@ -1067,23 +1248,45 @@ Examples:
     process.exit(0);
   }
-  const maxSkills = Number.parseInt(values["max-skills"] ?? "5", 10);
-  if (Number.isNaN(maxSkills) || maxSkills < 1) {
-    console.error("[ERROR] --max-skills must be a positive integer");
-    process.exit(1);
+  const maxSkillsRaw = values["max-skills"] ?? "5";
+  if (!/^\d+$/.test(maxSkillsRaw) || Number(maxSkillsRaw) < 1) {
+    throw new CLIError(
+      "--max-skills must be a positive integer",
+      "INVALID_FLAG",
+      "selftune orchestrate --max-skills 5",
+    );
   }
-  const recentWindow = Number.parseInt(values["recent-window"] ?? "48", 10);
-  if (Number.isNaN(recentWindow) || recentWindow < 1) {
-    console.error("[ERROR] --recent-window must be a positive integer");
-    process.exit(1);
+  const maxSkills = Number(maxSkillsRaw);
+  const recentWindowRaw = values["recent-window"] ?? "48";
+  if (!/^\d+$/.test(recentWindowRaw) || Number(recentWindowRaw) < 1) {
+    throw new CLIError(
+      "--recent-window must be a positive integer",
+      "INVALID_FLAG",
+      "selftune orchestrate --recent-window 48",
+    );
   }
-  const loopInterval = Number.parseInt(values["loop-interval"] ?? "3600", 10);
-  if (values.loop && (Number.isNaN(loopInterval) || loopInterval < 60)) {
-    console.error("[ERROR] --loop-interval must be an integer >= 60 (seconds)");
-    process.exit(1);
+  const recentWindow = Number(recentWindowRaw);
+  const maxAutoGradeRaw = values["max-auto-grade"] ?? "5";
+  if (!/^\d+$/.test(maxAutoGradeRaw)) {
+    throw new CLIError(
+      "--max-auto-grade must be a non-negative integer",
+      "INVALID_FLAG",
+      "selftune orchestrate --max-auto-grade 5",
+    );
+  }
+  const maxAutoGrade = Number(maxAutoGradeRaw);
+  const loopIntervalRaw = values["loop-interval"] ?? "3600";
+  if (!/^\d+$/.test(loopIntervalRaw) || (values.loop && Number(loopIntervalRaw) < 60)) {
+    throw new CLIError(
+      "--loop-interval must be an integer >= 60 (seconds)",
+      "INVALID_FLAG",
+      "selftune orchestrate --loop --loop-interval 3600",
+    );
   }
+  const loopInterval = Number(loopIntervalRaw);
   const autoApprove = values["auto-approve"] ?? false;
   if (autoApprove) {
@@ -1132,6 +1335,7 @@ Examples:
       maxSkills,
       recentWindowHours: recentWindow,
       syncForce: values["sync-force"] ?? false,
+      maxAutoGrade,
     });
     // JSON output: include per-skill decisions for machine consumption
@@ -1188,9 +1392,5 @@ Examples:
 }
 if (import.meta.main) {
-  cliMain().catch((err) => {
-    const message = err instanceof Error ? err.message : String(err);
-    console.error(`[FATAL] ${message}`);
-    process.exit(1);
-  });
+  cliMain().catch(handleCLIError);
 }

package/cli/selftune/routes/skill-report.ts CHANGED Viewed

@@ -8,6 +8,7 @@
 import type { Database } from "bun:sqlite";
+import { scoreDescription } from "../evolution/description-quality.js";
 import { getPendingProposals, getSkillReportPayload, safeParseJson } from "../localdb/queries.js";
 export function handleSkillReport(db: Database, skillName: string): Response {
@@ -203,6 +204,21 @@ export function handleSkillReport(db: Database, skillName: string): Response {
     completion_status: string | null;
   }>;
+  // 8. Description quality score — computed from latest evolution evidence
+  const latestEvidence = db
+    .query(
+      `SELECT proposed_text, original_text FROM evolution_evidence
+       WHERE skill_name = ? AND (proposed_text IS NOT NULL OR original_text IS NOT NULL)
+       ORDER BY timestamp DESC LIMIT 1`,
+    )
+    .get(skillName) as { proposed_text: string | null; original_text: string | null } | null;
+  // Use the most recent description: deployed proposed_text, or fallback to original_text
+  const currentDescriptionText = latestEvidence?.proposed_text ?? latestEvidence?.original_text;
+  const descriptionQuality = currentDescriptionText
+    ? scoreDescription(currentDescriptionText, skillName)
+    : null;
   return Response.json({
     ...report,
     evolution: evolutionWithSnapshot,
@@ -227,5 +243,6 @@ export function handleSkillReport(db: Database, skillName: string): Response {
       is_actionable: p.is_actionable === 1,
     })),
     session_metadata: sessionMeta,
+    description_quality: descriptionQuality,
   });
 }