npm - selftune - Versions diffs - 0.1.0 → 0.1.4 - Mend

selftune 0.1.0 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/CHANGELOG.md +11 -0
package/LICENSE +21 -0
package/README.md +62 -5
package/bin/selftune.cjs +3 -1
package/cli/selftune/constants.ts +0 -6
package/cli/selftune/dashboard.ts +176 -0
package/cli/selftune/evolution/evolve.ts +31 -20
package/cli/selftune/evolution/propose-description.ts +2 -3
package/cli/selftune/evolution/rollback.ts +1 -1
package/cli/selftune/evolution/validate-proposal.ts +3 -4
package/cli/selftune/grading/grade-session.ts +18 -62
package/cli/selftune/index.ts +21 -0
package/cli/selftune/ingestors/codex-rollout.ts +1 -1
package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
package/cli/selftune/init.ts +25 -36
package/cli/selftune/last.ts +138 -0
package/cli/selftune/observability.ts +1 -1
package/cli/selftune/status.ts +318 -0
package/cli/selftune/types.ts +1 -1
package/cli/selftune/utils/llm-call.ts +8 -57
package/dashboard/index.html +1119 -0
package/package.json +32 -3
package/skill/SKILL.md +19 -14
package/skill/Workflows/Doctor.md +2 -9
package/skill/Workflows/Evals.md +8 -17
package/skill/Workflows/Evolve.md +5 -11
package/skill/Workflows/Grade.md +5 -10
package/skill/Workflows/Ingest.md +16 -34
package/skill/Workflows/Initialize.md +32 -34
package/skill/Workflows/Rollback.md +3 -10
package/skill/Workflows/Watch.md +2 -9

package/CHANGELOG.md CHANGED Viewed

@@ -5,6 +5,17 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/),
 and this project adheres to [Semantic Versioning](https://semver.org/).
+## [0.1.4] - 2026-03-01
+### Added
+- `selftune status` — CLI skill health summary with pass rates, trends, and system health
+- `selftune last` — Quick insight from the most recent session
+- `selftune dashboard` — Skill-health-centric HTML dashboard with grid view and drill-down
+- CI/CD workflows: publish, auto-bump, CodeQL, scorecard
+- FOSS governance: LICENSE (MIT), CODE_OF_CONDUCT, CONTRIBUTING, SECURITY
+- npm package configuration with CJS bin entry point
 ## [0.1.0] - 2026-02-28
 ### Added

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 WellDunDun
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md CHANGED Viewed

@@ -1,9 +1,18 @@
+[![CI](https://github.com/WellDunDun/selftune/actions/workflows/ci.yml/badge.svg)](https://github.com/WellDunDun/selftune/actions/workflows/ci.yml)
+[![CodeQL](https://github.com/WellDunDun/selftune/actions/workflows/codeql.yml/badge.svg)](https://github.com/WellDunDun/selftune/actions/workflows/codeql.yml)
+[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/WellDunDun/selftune/badge)](https://securityscorecards.dev/viewer/?uri=github.com/WellDunDun/selftune)
+[![npm version](https://img.shields.io/npm/v/selftune)](https://www.npmjs.com/package/selftune)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
+[![TypeScript](https://img.shields.io/badge/TypeScript-5.0-blue.svg)](https://www.typescriptlang.org/)
+[![Zero Dependencies](https://img.shields.io/badge/dependencies-0-brightgreen)](https://www.npmjs.com/package/selftune?activeTab=dependencies)
+[![Bun](https://img.shields.io/badge/runtime-bun%20%7C%20node-black)](https://bun.sh)
 # selftune — Skill Observability & Continuous Improvement CLI
 [![npm version](https://img.shields.io/npm/v/selftune)](https://www.npmjs.com/package/selftune)
-[![CI](https://github.com/WellDunDun/douala/actions/workflows/ci.yml/badge.svg)](https://github.com/WellDunDun/douala/actions/workflows/ci.yml)
+[![CI](https://github.com/WellDunDun/selftune/actions/workflows/ci.yml/badge.svg)](https://github.com/WellDunDun/selftune/actions/workflows/ci.yml)
 [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
-[![Zero Dependencies](https://img.shields.io/badge/dependencies-0-brightgreen)]()
+[![Zero Dependencies](https://img.shields.io/badge/dependencies-0-brightgreen)](https://www.npmjs.com/package/selftune?activeTab=dependencies)
 [![Bun](https://img.shields.io/badge/runtime-bun%20%7C%20node-black)](https://bun.sh)
 Observe real sessions, detect missed triggers, grade execution quality, and automatically evolve skill descriptions toward the language real users actually use.
@@ -54,7 +63,25 @@ selftune closes this feedback loop.
 ---
-## Quick Start
+## Setup
+### 1. Add the skill
+```bash
+npx skills add WellDunDun/selftune
+```
+### 2. Initialize
+Tell your agent: **"initialize selftune"**
+The agent will install the CLI (`npm install -g selftune`) if needed, run `selftune init` to bootstrap config, install hooks, and verify with `selftune doctor`.
+---
+## Development
+For contributors running from source.
 ### 1. Initialize
@@ -68,7 +95,7 @@ Use `--agent claude_code|codex|opencode` to override detection, `--llm-mode agen
 ### 4. Install hooks (Claude Code)
-If `init` reports hooks are not installed, merge the entries from `skill/settings_snippet.json` into `~/.claude/settings.json`. Replace `/PATH/TO/` with the absolute path to this repository.
+If `init` reports hooks are not installed, merge the entries from `skill/settings_snippet.json` into `~/.claude/settings.json`. Derive hook script paths from the `cli_path` field in `~/.selftune/config.json` — the hooks directory is at `dirname(cli_path)/hooks/`.
 ### 5. Verify setup
@@ -112,12 +139,15 @@ selftune <command> [options]
 | `evolve --skill <name> --skill-path <path>` | Analyze failures, propose and deploy improved description |
 | `rollback --skill <name> --skill-path <path>` | Restore pre-evolution description |
 | `watch --skill <name> --skill-path <path>` | Monitor post-deploy pass rates, detect regressions |
+| `status` | Show skill health summary (pass rates, trends, missed queries) |
+| `last` | Show quick insight from the most recent session |
 | `doctor` | Health checks on logs, hooks, config, and schema |
+| `dashboard` | Open skill-health-centric HTML dashboard in browser |
 | `ingest-codex` | Batch ingest Codex rollout logs |
 | `ingest-opencode` | Backfill historical OpenCode sessions from SQLite |
 | `wrap-codex -- <args>` | Real-time Codex wrapper with telemetry |
-No separate API key required — grading and evolution use whatever agent CLI you already have installed. Set `ANTHROPIC_API_KEY` to use the API directly instead.
+No separate API key required — grading and evolution use whatever agent CLI you already have installed (Claude Code, Codex, or OpenCode).
 See `skill/Workflows/` for detailed step-by-step guides for each command.
@@ -185,6 +215,9 @@ cli/selftune/
 ├── init.ts                      Agent detection, config bootstrap
 ├── types.ts, constants.ts       Shared interfaces and constants
 ├── observability.ts             Health checks (doctor command)
+├── status.ts                    Skill health summary (status command)
+├── last.ts                      Last session insight (last command)
+├── dashboard.ts                 HTML dashboard builder (dashboard command)
 ├── utils/                       JSONL, transcript parsing, LLM calls, schema validation
 ├── hooks/                       Claude Code + OpenCode telemetry capture
 ├── ingestors/                   Codex adapters + OpenCode backfill
@@ -193,6 +226,9 @@ cli/selftune/
 ├── evolution/                   Failure extraction, proposal, validation, deploy, rollback
 └── monitoring/                  Post-deploy regression detection
+dashboard/
+└── index.html                   Skill-health-centric HTML dashboard template
 skill/
 ├── SKILL.md                     Routing table (~120 lines)
 ├── settings_snippet.json        Claude Code hook config template
@@ -248,6 +284,26 @@ Zero runtime dependencies. Uses Bun built-ins only.
 ---
+## Contributing
+See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup, architecture rules, and PR guidelines.
+Please follow our [Code of Conduct](CODE_OF_CONDUCT.md).
+---
+## Security
+To report a vulnerability, see [SECURITY.md](SECURITY.md).
+---
+## Sponsor
+If selftune saves you time, consider [sponsoring the project](https://github.com/sponsors/WellDunDun).
+---
 ## Milestones
 | Version | Scope | Status |
@@ -257,3 +313,4 @@ Zero runtime dependencies. Uses Bun built-ins only.
 | v0.3 | Evolution loop (propose, validate, deploy, rollback) | Done |
 | v0.4 | Post-deploy monitoring, regression detection | Done |
 | v0.5 | Agent-first skill restructure, `init` command, config bootstrap | Done |
+| v0.6 | Three-layer observability: `status`, `last`, redesigned dashboard | Done |

package/bin/selftune.cjs CHANGED Viewed

@@ -15,7 +15,9 @@ for (const [cmd, args] of runners) {
     execFileSync(cmd, args, { stdio: "inherit" });
     process.exit(0);
   } catch (e) {
-    if (e.status !== undefined) {
+    // If the runner exits non-zero, propagate that status.
+    // If the runner is not found (ENOENT), e.status is null — continue to next runner.
+    if (e.status != null) {
       process.exit(e.status);
     }
   }

package/cli/selftune/constants.ts CHANGED Viewed

@@ -63,9 +63,3 @@ export const REQUIRED_FIELDS: Record<string, Set<string>> = {
 /** Agent CLI candidates in detection order. */
 export const AGENT_CANDIDATES = ["claude", "codex", "opencode"] as const;
-/** Anthropic API URL for direct grading. */
-export const API_URL = "https://api.anthropic.com/v1/messages";
-/** Default model for direct API grading. */
-export const MODEL = "claude-sonnet-4-20250514";

package/cli/selftune/dashboard.ts ADDED Viewed

@@ -0,0 +1,176 @@
+/**
+ * selftune dashboard — Exports JSONL data into a standalone HTML viewer.
+ *
+ * Usage:
+ *   selftune dashboard              — Open dashboard in default browser
+ *   selftune dashboard --export     — Export data-embedded HTML to stdout
+ *   selftune dashboard --out FILE   — Write data-embedded HTML to FILE
+ */
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
+import { homedir } from "node:os";
+import { dirname, join, resolve } from "node:path";
+import { EVOLUTION_AUDIT_LOG, QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "./constants.js";
+import { getLastDeployedProposal, readAuditTrail } from "./evolution/audit.js";
+import { computeMonitoringSnapshot } from "./monitoring/watch.js";
+import type {
+  EvolutionAuditEntry,
+  QueryLogRecord,
+  SessionTelemetryRecord,
+  SkillUsageRecord,
+} from "./types.js";
+import { readJsonl } from "./utils/jsonl.js";
+function findViewerHTML(): string {
+  // Try relative to this module first (works for both dev and installed)
+  const candidates = [
+    join(dirname(import.meta.dir), "..", "dashboard", "index.html"),
+    join(dirname(import.meta.dir), "dashboard", "index.html"),
+    resolve("dashboard", "index.html"),
+  ];
+  for (const c of candidates) {
+    if (existsSync(c)) return c;
+  }
+  throw new Error("Could not find dashboard/index.html. Ensure it exists in the selftune repo.");
+}
+function buildEmbeddedHTML(): string {
+  const template = readFileSync(findViewerHTML(), "utf-8");
+  const telemetry = readJsonl<SessionTelemetryRecord>(TELEMETRY_LOG);
+  const skills = readJsonl<SkillUsageRecord>(SKILL_LOG);
+  const queries = readJsonl<QueryLogRecord>(QUERY_LOG);
+  const evolution = readJsonl<EvolutionAuditEntry>(EVOLUTION_AUDIT_LOG);
+  const totalRecords = telemetry.length + skills.length + queries.length + evolution.length;
+  if (totalRecords === 0) {
+    console.error("No log data found. Run some sessions first.");
+    console.error(`  Checked: ${TELEMETRY_LOG}`);
+    console.error(`           ${SKILL_LOG}`);
+    console.error(`           ${QUERY_LOG}`);
+    console.error(`           ${EVOLUTION_AUDIT_LOG}`);
+    process.exit(1);
+  }
+  // Compute per-skill monitoring snapshots
+  const skillNames = [...new Set(skills.map((r) => r.skill_name))];
+  const snapshots: Record<string, ReturnType<typeof computeMonitoringSnapshot>> = {};
+  for (const name of skillNames) {
+    const lastDeployed = getLastDeployedProposal(name);
+    const baselinePassRate = lastDeployed?.eval_snapshot?.pass_rate ?? 0.5;
+    snapshots[name] = computeMonitoringSnapshot(
+      name,
+      telemetry,
+      skills,
+      queries,
+      telemetry.length,
+      baselinePassRate,
+    );
+  }
+  // Compute unmatched queries
+  const triggeredQueries = new Set(
+    skills.filter((r) => r.triggered).map((r) => r.query.toLowerCase().trim()),
+  );
+  const unmatched = queries
+    .filter((q) => !triggeredQueries.has(q.query.toLowerCase().trim()))
+    .map((q) => ({
+      timestamp: q.timestamp,
+      session_id: q.session_id,
+      query: q.query,
+    }));
+  // Compute pending proposals
+  const auditTrail = readAuditTrail();
+  const proposalStatus: Record<string, string[]> = {};
+  for (const e of auditTrail) {
+    if (!proposalStatus[e.proposal_id]) proposalStatus[e.proposal_id] = [];
+    proposalStatus[e.proposal_id].push(e.action);
+  }
+  // Deduplicate by proposal_id: one entry per pending proposal
+  const terminalActions = new Set(["deployed", "rejected", "rolled_back"]);
+  const seenProposals = new Set<string>();
+  const pendingProposals = auditTrail.filter((e) => {
+    if (e.action !== "created" && e.action !== "validated") return false;
+    if (seenProposals.has(e.proposal_id)) return false;
+    const actions = proposalStatus[e.proposal_id] || [];
+    const isPending = !actions.some((a: string) => terminalActions.has(a));
+    if (isPending) seenProposals.add(e.proposal_id);
+    return isPending;
+  });
+  const data = {
+    telemetry,
+    skills,
+    queries,
+    evolution,
+    computed: {
+      snapshots,
+      unmatched,
+      pendingProposals,
+    },
+  };
+  // Inject embedded data right before </body>
+  // Escape </script> sequences to prevent XSS via embedded JSON
+  const safeJson = JSON.stringify(data).replace(/<\/script>/gi, "<\\/script>");
+  const dataScript = `<script id="embedded-data" type="application/json">${safeJson}</script>`;
+  return template.replace("</body>", `${dataScript}\n</body>`);
+}
+export async function cliMain(): Promise<void> {
+  const args = process.argv.slice(2);
+  if (args.includes("--help") || args.includes("-h")) {
+    console.log(`selftune dashboard — Visual data dashboard
+Usage:
+  selftune dashboard              Open dashboard in default browser
+  selftune dashboard --export     Export data-embedded HTML to stdout
+  selftune dashboard --out FILE   Write data-embedded HTML to FILE`);
+    process.exit(0);
+  }
+  if (args.includes("--export")) {
+    process.stdout.write(buildEmbeddedHTML());
+    return;
+  }
+  const outIdx = args.indexOf("--out");
+  if (outIdx !== -1) {
+    const outPath = args[outIdx + 1];
+    if (!outPath) {
+      console.error("--out requires a file path argument");
+      process.exit(1);
+    }
+    const html = buildEmbeddedHTML();
+    writeFileSync(outPath, html, "utf-8");
+    console.log(`Dashboard written to ${outPath}`);
+    return;
+  }
+  // Default: write to temp file and open in browser
+  const tmpDir = join(homedir(), ".selftune");
+  if (!existsSync(tmpDir)) {
+    mkdirSync(tmpDir, { recursive: true });
+  }
+  const tmpPath = join(tmpDir, "dashboard.html");
+  const html = buildEmbeddedHTML();
+  writeFileSync(tmpPath, html, "utf-8");
+  console.log(`Dashboard saved to ${tmpPath}`);
+  console.log("Opening in browser...");
+  try {
+    const platform = process.platform;
+    const cmd = platform === "darwin" ? "open" : platform === "linux" ? "xdg-open" : null;
+    if (!cmd) throw new Error("Unsupported platform");
+    const proc = Bun.spawn([cmd, tmpPath], { stdio: ["ignore", "ignore", "ignore"] });
+    await proc.exited;
+    if (proc.exitCode !== 0) throw new Error(`Failed to launch ${cmd}`);
+  } catch {
+    console.log(`Open manually: file://${tmpPath}`);
+  }
+  process.exit(0);
+}

package/cli/selftune/evolution/evolve.ts CHANGED Viewed

@@ -23,8 +23,8 @@ import { readJsonl } from "../utils/jsonl.js";
 import { appendAuditEntry } from "./audit.js";
 import { extractFailurePatterns } from "./extract-patterns.js";
 import { generateProposal } from "./propose-description.js";
-import { validateProposal } from "./validate-proposal.js";
 import type { ValidationResult } from "./validate-proposal.js";
+import { validateProposal } from "./validate-proposal.js";
 // ---------------------------------------------------------------------------
 // Types
@@ -34,8 +34,7 @@ export interface EvolveOptions {
   skillName: string;
   skillPath: string;
   evalSetPath?: string;
-  mode: "agent" | "api";
-  agent?: string;
+  agent: string;
   dryRun: boolean;
   confidenceThreshold: number; // default 0.6
   maxIterations: number; // default 3
@@ -88,16 +87,8 @@ export async function evolve(
   options: EvolveOptions,
   _deps: EvolveDeps = {},
 ): Promise<EvolveResult> {
-  const {
-    skillName,
-    skillPath,
-    evalSetPath,
-    mode,
-    agent,
-    dryRun,
-    confidenceThreshold,
-    maxIterations,
-  } = options;
+  const { skillName, skillPath, evalSetPath, agent, dryRun, confidenceThreshold, maxIterations } =
+    options;
   // Resolve injectable dependencies with real-import fallbacks
   const _extractFailurePatterns = _deps.extractFailurePatterns ?? extractFailurePatterns;
@@ -201,7 +192,6 @@ export async function evolve(
         effectiveMissedQueries,
         skillName,
         skillPath,
-        mode,
         agent,
       );
@@ -238,7 +228,7 @@ export async function evolve(
       }
       // Step 10: Validate against eval set
-      const validation = await _validateProposal(proposal, evalSet, mode, agent);
+      const validation = await _validateProposal(proposal, evalSet, agent);
       lastValidation = validation;
       // Step 11: Audit "validated"
@@ -347,7 +337,6 @@ export async function cliMain(): Promise<void> {
       skill: { type: "string" },
       "skill-path": { type: "string" },
       "eval-set": { type: "string" },
-      mode: { type: "string", default: "agent" },
       agent: { type: "string" },
       "dry-run": { type: "boolean", default: false },
       confidence: { type: "string", default: "0.6" },
@@ -367,7 +356,6 @@ Options:
   --skill             Skill name (required)
   --skill-path        Path to SKILL.md (required)
   --eval-set          Path to eval set JSON (optional, builds from logs if omitted)
-  --mode              Execution mode: "agent" or "api" (default: "agent")
   --agent             Agent CLI to use (claude, codex, opencode)
   --dry-run           Validate proposal without deploying
   --confidence        Confidence threshold 0.0-1.0 (default: 0.6)
@@ -381,14 +369,37 @@ Options:
     process.exit(1);
   }
-  const mode = values.mode === "api" ? "api" : "agent";
+  const { detectAgent } = await import("../utils/llm-call.js");
+  const requestedAgent = values.agent;
+  if (requestedAgent && !Bun.which(requestedAgent)) {
+    console.error(
+      JSON.stringify({
+        level: "error",
+        code: "agent_not_in_path",
+        message: `Agent CLI '${requestedAgent}' not found in PATH.`,
+        action: "Install it or omit --agent to use auto-detection.",
+      }),
+    );
+    process.exit(1);
+  }
+  const agent = requestedAgent ?? detectAgent();
+  if (!agent) {
+    console.error(
+      JSON.stringify({
+        level: "error",
+        code: "agent_not_found",
+        message: "No agent CLI (claude/codex/opencode) found in PATH.",
+        action: "Install Claude Code, Codex, or OpenCode.",
+      }),
+    );
+    process.exit(1);
+  }
   const result = await evolve({
     skillName: values.skill,
     skillPath: values["skill-path"],
     evalSetPath: values["eval-set"],
-    mode,
-    agent: values.agent,
+    agent,
     dryRun: values["dry-run"] ?? false,
     confidenceThreshold: Number.parseFloat(values.confidence ?? "0.6"),
     maxIterations: Number.parseInt(values["max-iterations"] ?? "3", 10),

package/cli/selftune/evolution/propose-description.ts CHANGED Viewed

@@ -120,11 +120,10 @@ export async function generateProposal(
   missedQueries: string[],
   skillName: string,
   skillPath: string,
-  mode: "agent" | "api",
-  agent?: string,
+  agent: string,
 ): Promise<EvolutionProposal> {
   const prompt = buildProposalPrompt(currentDescription, failurePatterns, missedQueries, skillName);
-  const rawResponse = await callLlm(PROPOSER_SYSTEM, prompt, mode, agent);
+  const rawResponse = await callLlm(PROPOSER_SYSTEM, prompt, agent);
   const { proposed_description, rationale, confidence } = parseProposalResponse(rawResponse);
   return {

package/cli/selftune/evolution/rollback.ts CHANGED Viewed

@@ -7,7 +7,7 @@
  * 3. Recording a "rolled_back" entry in the audit trail
  */
-import { existsSync, readFileSync, readdirSync, unlinkSync, writeFileSync } from "node:fs";
+import { existsSync, readdirSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
 import { basename, dirname, join } from "node:path";
 import { parseArgs } from "node:util";

package/cli/selftune/evolution/validate-proposal.ts CHANGED Viewed

@@ -61,8 +61,7 @@ export function parseTriggerResponse(response: string): boolean {
 export async function validateProposal(
   proposal: EvolutionProposal,
   evalSet: EvalEntry[],
-  mode: "agent" | "api",
-  agent?: string,
+  agent: string,
 ): Promise<ValidationResult> {
   if (evalSet.length === 0) {
     return {
@@ -85,14 +84,14 @@ export async function validateProposal(
   for (const entry of evalSet) {
     // Check with original description
     const beforePrompt = buildTriggerCheckPrompt(proposal.original_description, entry.query);
-    const beforeRaw = await callLlm(systemPrompt, beforePrompt, mode, agent);
+    const beforeRaw = await callLlm(systemPrompt, beforePrompt, agent);
     const beforeTriggered = parseTriggerResponse(beforeRaw);
     const beforePass =
       (entry.should_trigger && beforeTriggered) || (!entry.should_trigger && !beforeTriggered);
     // Check with proposed description
     const afterPrompt = buildTriggerCheckPrompt(proposal.proposed_description, entry.query);
-    const afterRaw = await callLlm(systemPrompt, afterPrompt, mode, agent);
+    const afterRaw = await callLlm(systemPrompt, afterPrompt, agent);
     const afterTriggered = parseTriggerResponse(afterRaw);
     const afterPass =
       (entry.should_trigger && afterTriggered) || (!entry.should_trigger && !afterTriggered);

package/cli/selftune/grading/grade-session.ts CHANGED Viewed

@@ -5,9 +5,7 @@
  * Rubric-based grader for Claude Code skill sessions.
  * Migrated from grade_session.py.
  *
- * Two modes:
- *   1. --use-agent  (default when no ANTHROPIC_API_KEY) — invokes installed agent CLI
- *   2. --use-api    (default when ANTHROPIC_API_KEY set) — calls Anthropic API directly
+ * Grades via installed agent CLI (claude/codex/opencode).
  */
 import { mkdirSync, readFileSync, writeFileSync } from "node:fs";
@@ -26,7 +24,6 @@ import {
   detectAgent as _detectAgent,
   stripMarkdownFences as _stripMarkdownFences,
   callViaAgent,
-  callViaApi,
 } from "../utils/llm-call.js";
 import { readExcerpt } from "../utils/transcript.js";
@@ -226,22 +223,6 @@ export async function gradeViaAgent(prompt: string, agent: string): Promise<Grad
   }
 }
-// ---------------------------------------------------------------------------
-// Grading via direct Anthropic API
-// ---------------------------------------------------------------------------
-export async function gradeViaApi(prompt: string): Promise<GraderOutput> {
-  const raw = await callViaApi(GRADER_SYSTEM, prompt);
-  try {
-    return JSON.parse(_stripMarkdownFences(raw)) as GraderOutput;
-  } catch (err) {
-    throw new Error(
-      `gradeViaApi: failed to parse LLM output as JSON. Raw (truncated): ${raw.slice(0, 200)}`,
-      { cause: err },
-    );
-  }
-}
 // ---------------------------------------------------------------------------
 // Result assembly
 // ---------------------------------------------------------------------------
@@ -306,8 +287,6 @@ export async function cliMain(): Promise<void> {
       transcript: { type: "string" },
       "telemetry-log": { type: "string", default: TELEMETRY_LOG },
       output: { type: "string", default: "grading.json" },
-      "use-agent": { type: "boolean", default: false },
-      "use-api": { type: "boolean", default: false },
       agent: { type: "string" },
       "show-transcript": { type: "boolean", default: false },
     },
@@ -320,50 +299,31 @@ export async function cliMain(): Promise<void> {
     process.exit(1);
   }
-  // --- Determine mode ---
-  const hasApiKey = Boolean(process.env.ANTHROPIC_API_KEY);
-  let mode: "agent" | "api";
+  // --- Determine agent ---
   let agent: string | null = null;
-  if (values["use-api"]) {
-    mode = "api";
-  } else if (values["use-agent"]) {
-    mode = "agent";
-  } else {
-    const availableAgent = _detectAgent();
-    if (availableAgent) {
-      mode = "agent";
-    } else if (hasApiKey) {
-      mode = "api";
-    } else {
+  const validAgents = ["claude", "codex", "opencode"];
+  if (values.agent) {
+    if (!validAgents.includes(values.agent)) {
       console.error(
-        "[ERROR] No agent CLI (claude/codex/opencode) found in PATH " +
-          "and ANTHROPIC_API_KEY not set.\n" +
-          "Install Claude Code, Codex, or OpenCode, or set ANTHROPIC_API_KEY.",
+        `[ERROR] Invalid --agent '${values.agent}'. Expected one of: ${validAgents.join(", ")}`,
       );
       process.exit(1);
     }
+    agent = values.agent;
+  } else {
+    agent = _detectAgent();
   }
-  if (mode === "agent") {
-    const validAgents = ["claude", "codex", "opencode"];
-    if (values.agent && validAgents.includes(values.agent)) {
-      agent = values.agent;
-    } else {
-      agent = _detectAgent();
-    }
-    if (!agent) {
-      console.error(
-        "[ERROR] --use-agent specified but no agent found in PATH.\n" +
-          "Install claude, codex, or opencode, or use --use-api instead.",
-      );
-      process.exit(1);
-    }
-    console.error(`[INFO] Grading via agent: ${agent}`);
-  } else {
-    console.error("[INFO] Grading via direct Anthropic API");
+  if (!agent) {
+    console.error(
+      "[ERROR] No agent CLI (claude/codex/opencode) found in PATH.\n" +
+        "Install Claude Code, Codex, or OpenCode.",
+    );
+    process.exit(1);
   }
+  console.error(`[INFO] Grading via agent: ${agent}`);
   // --- Resolve expectations ---
   let expectations: string[] = [];
   if (values["evals-json"] && values["eval-id"] != null) {
@@ -427,11 +387,7 @@ export async function cliMain(): Promise<void> {
   let graderOutput: GraderOutput;
   try {
-    if (mode === "agent") {
-      graderOutput = await gradeViaAgent(prompt, agent as string);
-    } else {
-      graderOutput = await gradeViaApi(prompt);
-    }
+    graderOutput = await gradeViaAgent(prompt, agent);
   } catch (e) {
     console.error(`[ERROR] Grading failed: ${e}`);
     process.exit(1);