selftune 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -5,6 +5,17 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/).
7
7
 
8
+ ## [0.6.0] - 2026-03-01
9
+
10
+ ### Added
11
+
12
+ - `selftune status` — CLI skill health summary with pass rates, trends, and system health
13
+ - `selftune last` — Quick insight from the most recent session
14
+ - `selftune dashboard` — Skill-health-centric HTML dashboard with grid view and drill-down
15
+ - CI/CD workflows: publish, auto-bump, CodeQL, scorecard
16
+ - FOSS governance: LICENSE (MIT), CODE_OF_CONDUCT, CONTRIBUTING, SECURITY
17
+ - npm package configuration with CJS bin entry point
18
+
8
19
  ## [0.1.0] - 2026-02-28
9
20
 
10
21
  ### Added
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 WellDunDun
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md CHANGED
@@ -1,9 +1,18 @@
1
+ [![CI](https://github.com/WellDunDun/selftune/actions/workflows/ci.yml/badge.svg)](https://github.com/WellDunDun/selftune/actions/workflows/ci.yml)
2
+ [![CodeQL](https://github.com/WellDunDun/selftune/actions/workflows/codeql.yml/badge.svg)](https://github.com/WellDunDun/selftune/actions/workflows/codeql.yml)
3
+ [![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/WellDunDun/selftune/badge)](https://securityscorecards.dev/viewer/?uri=github.com/WellDunDun/selftune)
4
+ [![npm version](https://img.shields.io/npm/v/selftune)](https://www.npmjs.com/package/selftune)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
6
+ [![TypeScript](https://img.shields.io/badge/TypeScript-5.0-blue.svg)](https://www.typescriptlang.org/)
7
+ [![Zero Dependencies](https://img.shields.io/badge/dependencies-0-brightgreen)](https://www.npmjs.com/package/selftune?activeTab=dependencies)
8
+ [![Bun](https://img.shields.io/badge/runtime-bun%20%7C%20node-black)](https://bun.sh)
9
+
1
10
  # selftune — Skill Observability & Continuous Improvement CLI
2
11
 
3
12
  [![npm version](https://img.shields.io/npm/v/selftune)](https://www.npmjs.com/package/selftune)
4
- [![CI](https://github.com/WellDunDun/douala/actions/workflows/ci.yml/badge.svg)](https://github.com/WellDunDun/douala/actions/workflows/ci.yml)
13
+ [![CI](https://github.com/WellDunDun/selftune/actions/workflows/ci.yml/badge.svg)](https://github.com/WellDunDun/selftune/actions/workflows/ci.yml)
5
14
  [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
6
- [![Zero Dependencies](https://img.shields.io/badge/dependencies-0-brightgreen)]()
15
+ [![Zero Dependencies](https://img.shields.io/badge/dependencies-0-brightgreen)](https://www.npmjs.com/package/selftune?activeTab=dependencies)
7
16
  [![Bun](https://img.shields.io/badge/runtime-bun%20%7C%20node-black)](https://bun.sh)
8
17
 
9
18
  Observe real sessions, detect missed triggers, grade execution quality, and automatically evolve skill descriptions toward the language real users actually use.
@@ -54,7 +63,25 @@ selftune closes this feedback loop.
54
63
 
55
64
  ---
56
65
 
57
- ## Quick Start
66
+ ## Setup
67
+
68
+ ### 1. Add the skill
69
+
70
+ ```bash
71
+ npx skills add WellDunDun/selftune
72
+ ```
73
+
74
+ ### 2. Initialize
75
+
76
+ Tell your agent: **"initialize selftune"**
77
+
78
+ The agent will install the CLI (`npm install -g selftune`) if needed, run `selftune init` to bootstrap config, install hooks, and verify with `selftune doctor`.
79
+
80
+ ---
81
+
82
+ ## Development
83
+
84
+ For contributors running from source.
58
85
 
59
86
  ### 1. Initialize
60
87
 
@@ -68,7 +95,7 @@ Use `--agent claude_code|codex|opencode` to override detection, `--llm-mode agen
68
95
 
69
96
  ### 4. Install hooks (Claude Code)
70
97
 
71
- If `init` reports hooks are not installed, merge the entries from `skill/settings_snippet.json` into `~/.claude/settings.json`. Replace `/PATH/TO/` with the absolute path to this repository.
98
+ If `init` reports hooks are not installed, merge the entries from `skill/settings_snippet.json` into `~/.claude/settings.json`. Derive hook script paths from the `cli_path` field in `~/.selftune/config.json` the hooks directory is at `dirname(cli_path)/hooks/`.
72
99
 
73
100
  ### 5. Verify setup
74
101
 
@@ -112,12 +139,15 @@ selftune <command> [options]
112
139
  | `evolve --skill <name> --skill-path <path>` | Analyze failures, propose and deploy improved description |
113
140
  | `rollback --skill <name> --skill-path <path>` | Restore pre-evolution description |
114
141
  | `watch --skill <name> --skill-path <path>` | Monitor post-deploy pass rates, detect regressions |
142
+ | `status` | Show skill health summary (pass rates, trends, missed queries) |
143
+ | `last` | Show quick insight from the most recent session |
115
144
  | `doctor` | Health checks on logs, hooks, config, and schema |
145
+ | `dashboard` | Open skill-health-centric HTML dashboard in browser |
116
146
  | `ingest-codex` | Batch ingest Codex rollout logs |
117
147
  | `ingest-opencode` | Backfill historical OpenCode sessions from SQLite |
118
148
  | `wrap-codex -- <args>` | Real-time Codex wrapper with telemetry |
119
149
 
120
- No separate API key required — grading and evolution use whatever agent CLI you already have installed. Set `ANTHROPIC_API_KEY` to use the API directly instead.
150
+ No separate API key required — grading and evolution use whatever agent CLI you already have installed (Claude Code, Codex, or OpenCode).
121
151
 
122
152
  See `skill/Workflows/` for detailed step-by-step guides for each command.
123
153
 
@@ -185,6 +215,9 @@ cli/selftune/
185
215
  ├── init.ts Agent detection, config bootstrap
186
216
  ├── types.ts, constants.ts Shared interfaces and constants
187
217
  ├── observability.ts Health checks (doctor command)
218
+ ├── status.ts Skill health summary (status command)
219
+ ├── last.ts Last session insight (last command)
220
+ ├── dashboard.ts HTML dashboard builder (dashboard command)
188
221
  ├── utils/ JSONL, transcript parsing, LLM calls, schema validation
189
222
  ├── hooks/ Claude Code + OpenCode telemetry capture
190
223
  ├── ingestors/ Codex adapters + OpenCode backfill
@@ -193,6 +226,9 @@ cli/selftune/
193
226
  ├── evolution/ Failure extraction, proposal, validation, deploy, rollback
194
227
  └── monitoring/ Post-deploy regression detection
195
228
 
229
+ dashboard/
230
+ └── index.html Skill-health-centric HTML dashboard template
231
+
196
232
  skill/
197
233
  ├── SKILL.md Routing table (~120 lines)
198
234
  ├── settings_snippet.json Claude Code hook config template
@@ -248,6 +284,26 @@ Zero runtime dependencies. Uses Bun built-ins only.
248
284
 
249
285
  ---
250
286
 
287
+ ## Contributing
288
+
289
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup, architecture rules, and PR guidelines.
290
+
291
+ Please follow our [Code of Conduct](CODE_OF_CONDUCT.md).
292
+
293
+ ---
294
+
295
+ ## Security
296
+
297
+ To report a vulnerability, see [SECURITY.md](SECURITY.md).
298
+
299
+ ---
300
+
301
+ ## Sponsor
302
+
303
+ If selftune saves you time, consider [sponsoring the project](https://github.com/sponsors/WellDunDun).
304
+
305
+ ---
306
+
251
307
  ## Milestones
252
308
 
253
309
  | Version | Scope | Status |
@@ -257,3 +313,4 @@ Zero runtime dependencies. Uses Bun built-ins only.
257
313
  | v0.3 | Evolution loop (propose, validate, deploy, rollback) | Done |
258
314
  | v0.4 | Post-deploy monitoring, regression detection | Done |
259
315
  | v0.5 | Agent-first skill restructure, `init` command, config bootstrap | Done |
316
+ | v0.6 | Three-layer observability: `status`, `last`, redesigned dashboard | Done |
package/bin/selftune.cjs CHANGED
@@ -15,7 +15,9 @@ for (const [cmd, args] of runners) {
15
15
  execFileSync(cmd, args, { stdio: "inherit" });
16
16
  process.exit(0);
17
17
  } catch (e) {
18
- if (e.status !== undefined) {
18
+ // If the runner exits non-zero, propagate that status.
19
+ // If the runner is not found (ENOENT), e.status is null — continue to next runner.
20
+ if (e.status != null) {
19
21
  process.exit(e.status);
20
22
  }
21
23
  }
@@ -63,9 +63,3 @@ export const REQUIRED_FIELDS: Record<string, Set<string>> = {
63
63
 
64
64
  /** Agent CLI candidates in detection order. */
65
65
  export const AGENT_CANDIDATES = ["claude", "codex", "opencode"] as const;
66
-
67
- /** Anthropic API URL for direct grading. */
68
- export const API_URL = "https://api.anthropic.com/v1/messages";
69
-
70
- /** Default model for direct API grading. */
71
- export const MODEL = "claude-sonnet-4-20250514";
@@ -0,0 +1,179 @@
1
+ /**
2
+ * selftune dashboard — Exports JSONL data into a standalone HTML viewer.
3
+ *
4
+ * Usage:
5
+ * selftune dashboard — Open dashboard in default browser
6
+ * selftune dashboard --export — Export data-embedded HTML to stdout
7
+ * selftune dashboard --out FILE — Write data-embedded HTML to FILE
8
+ */
9
+
10
+ import { execSync } from "node:child_process";
11
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
12
+ import { homedir } from "node:os";
13
+ import { dirname, join, resolve } from "node:path";
14
+ import { EVOLUTION_AUDIT_LOG, QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "./constants.js";
15
+ import { getLastDeployedProposal, readAuditTrail } from "./evolution/audit.js";
16
+ import { computeMonitoringSnapshot } from "./monitoring/watch.js";
17
+ import type {
18
+ EvolutionAuditEntry,
19
+ QueryLogRecord,
20
+ SessionTelemetryRecord,
21
+ SkillUsageRecord,
22
+ } from "./types.js";
23
+ import { readJsonl } from "./utils/jsonl.js";
24
+
25
+ function findViewerHTML(): string {
26
+ // Try relative to this module first (works for both dev and installed)
27
+ const candidates = [
28
+ join(dirname(import.meta.dir), "..", "dashboard", "index.html"),
29
+ join(dirname(import.meta.dir), "dashboard", "index.html"),
30
+ resolve("dashboard", "index.html"),
31
+ ];
32
+ for (const c of candidates) {
33
+ if (existsSync(c)) return c;
34
+ }
35
+ throw new Error("Could not find dashboard/index.html. Ensure it exists in the selftune repo.");
36
+ }
37
+
38
+ function buildEmbeddedHTML(): string {
39
+ const template = readFileSync(findViewerHTML(), "utf-8");
40
+
41
+ const telemetry = readJsonl<SessionTelemetryRecord>(TELEMETRY_LOG);
42
+ const skills = readJsonl<SkillUsageRecord>(SKILL_LOG);
43
+ const queries = readJsonl<QueryLogRecord>(QUERY_LOG);
44
+ const evolution = readJsonl<EvolutionAuditEntry>(EVOLUTION_AUDIT_LOG);
45
+
46
+ const totalRecords = telemetry.length + skills.length + queries.length + evolution.length;
47
+
48
+ if (totalRecords === 0) {
49
+ console.error("No log data found. Run some sessions first.");
50
+ console.error(` Checked: ${TELEMETRY_LOG}`);
51
+ console.error(` ${SKILL_LOG}`);
52
+ console.error(` ${QUERY_LOG}`);
53
+ console.error(` ${EVOLUTION_AUDIT_LOG}`);
54
+ process.exit(1);
55
+ }
56
+
57
+ // Compute per-skill monitoring snapshots
58
+ const skillNames = [...new Set(skills.map((r) => r.skill_name))];
59
+ const snapshots: Record<string, ReturnType<typeof computeMonitoringSnapshot>> = {};
60
+ for (const name of skillNames) {
61
+ const lastDeployed = getLastDeployedProposal(name);
62
+ const baselinePassRate = lastDeployed?.eval_snapshot?.pass_rate ?? 0.5;
63
+ snapshots[name] = computeMonitoringSnapshot(
64
+ name,
65
+ telemetry,
66
+ skills,
67
+ queries,
68
+ telemetry.length,
69
+ baselinePassRate,
70
+ );
71
+ }
72
+
73
+ // Compute unmatched queries
74
+ const triggeredQueries = new Set(
75
+ skills.filter((r) => r.triggered).map((r) => r.query.toLowerCase().trim()),
76
+ );
77
+ const unmatched = queries
78
+ .filter((q) => !triggeredQueries.has(q.query.toLowerCase().trim()))
79
+ .map((q) => ({
80
+ timestamp: q.timestamp,
81
+ session_id: q.session_id,
82
+ query: q.query,
83
+ }));
84
+
85
+ // Compute pending proposals
86
+ const auditTrail = readAuditTrail();
87
+ const proposalStatus: Record<string, string[]> = {};
88
+ for (const e of auditTrail) {
89
+ if (!proposalStatus[e.proposal_id]) proposalStatus[e.proposal_id] = [];
90
+ proposalStatus[e.proposal_id].push(e.action);
91
+ }
92
+ // Deduplicate by proposal_id: one entry per pending proposal
93
+ const terminalActions = new Set(["deployed", "rejected", "rolled_back"]);
94
+ const seenProposals = new Set<string>();
95
+ const pendingProposals = auditTrail.filter((e) => {
96
+ if (e.action !== "created" && e.action !== "validated") return false;
97
+ if (seenProposals.has(e.proposal_id)) return false;
98
+ const actions = proposalStatus[e.proposal_id] || [];
99
+ const isPending = !actions.some((a: string) => terminalActions.has(a));
100
+ if (isPending) seenProposals.add(e.proposal_id);
101
+ return isPending;
102
+ });
103
+
104
+ const data = {
105
+ telemetry,
106
+ skills,
107
+ queries,
108
+ evolution,
109
+ computed: {
110
+ snapshots,
111
+ unmatched,
112
+ pendingProposals,
113
+ },
114
+ };
115
+
116
+ // Inject embedded data right before </body>
117
+ // Escape </script> sequences to prevent XSS via embedded JSON
118
+ const safeJson = JSON.stringify(data).replace(/<\/script>/gi, "<\\/script>");
119
+ const dataScript = `<script id="embedded-data" type="application/json">${safeJson}</script>`;
120
+ return template.replace("</body>", `${dataScript}\n</body>`);
121
+ }
122
+
123
+ export function cliMain(): void {
124
+ const args = process.argv.slice(2);
125
+
126
+ if (args.includes("--help") || args.includes("-h")) {
127
+ console.log(`selftune dashboard — Visual data dashboard
128
+
129
+ Usage:
130
+ selftune dashboard Open dashboard in default browser
131
+ selftune dashboard --export Export data-embedded HTML to stdout
132
+ selftune dashboard --out FILE Write data-embedded HTML to FILE`);
133
+ process.exit(0);
134
+ }
135
+
136
+ if (args.includes("--export")) {
137
+ process.stdout.write(buildEmbeddedHTML());
138
+ return;
139
+ }
140
+
141
+ const outIdx = args.indexOf("--out");
142
+ if (outIdx !== -1) {
143
+ const outPath = args[outIdx + 1];
144
+ if (!outPath) {
145
+ console.error("--out requires a file path argument");
146
+ process.exit(1);
147
+ }
148
+ const html = buildEmbeddedHTML();
149
+ writeFileSync(outPath, html, "utf-8");
150
+ console.log(`Dashboard written to ${outPath}`);
151
+ return;
152
+ }
153
+
154
+ // Default: write to temp file and open in browser
155
+ const tmpDir = join(homedir(), ".selftune");
156
+ if (!existsSync(tmpDir)) {
157
+ mkdirSync(tmpDir, { recursive: true });
158
+ }
159
+ const tmpPath = join(tmpDir, "dashboard.html");
160
+ const html = buildEmbeddedHTML();
161
+ writeFileSync(tmpPath, html, "utf-8");
162
+
163
+ console.log(`Dashboard saved to ${tmpPath}`);
164
+ console.log("Opening in browser...");
165
+
166
+ try {
167
+ const platform = process.platform;
168
+ if (platform === "darwin") {
169
+ execSync(`open "${tmpPath}"`);
170
+ } else if (platform === "linux") {
171
+ execSync(`xdg-open "${tmpPath}"`);
172
+ } else if (platform === "win32") {
173
+ execSync(`start "" "${tmpPath}"`);
174
+ }
175
+ } catch {
176
+ console.log(`Open manually: file://${tmpPath}`);
177
+ }
178
+ process.exit(0);
179
+ }
@@ -34,8 +34,7 @@ export interface EvolveOptions {
34
34
  skillName: string;
35
35
  skillPath: string;
36
36
  evalSetPath?: string;
37
- mode: "agent" | "api";
38
- agent?: string;
37
+ agent: string;
39
38
  dryRun: boolean;
40
39
  confidenceThreshold: number; // default 0.6
41
40
  maxIterations: number; // default 3
@@ -88,16 +87,8 @@ export async function evolve(
88
87
  options: EvolveOptions,
89
88
  _deps: EvolveDeps = {},
90
89
  ): Promise<EvolveResult> {
91
- const {
92
- skillName,
93
- skillPath,
94
- evalSetPath,
95
- mode,
96
- agent,
97
- dryRun,
98
- confidenceThreshold,
99
- maxIterations,
100
- } = options;
90
+ const { skillName, skillPath, evalSetPath, agent, dryRun, confidenceThreshold, maxIterations } =
91
+ options;
101
92
 
102
93
  // Resolve injectable dependencies with real-import fallbacks
103
94
  const _extractFailurePatterns = _deps.extractFailurePatterns ?? extractFailurePatterns;
@@ -201,7 +192,6 @@ export async function evolve(
201
192
  effectiveMissedQueries,
202
193
  skillName,
203
194
  skillPath,
204
- mode,
205
195
  agent,
206
196
  );
207
197
 
@@ -238,7 +228,7 @@ export async function evolve(
238
228
  }
239
229
 
240
230
  // Step 10: Validate against eval set
241
- const validation = await _validateProposal(proposal, evalSet, mode, agent);
231
+ const validation = await _validateProposal(proposal, evalSet, agent);
242
232
  lastValidation = validation;
243
233
 
244
234
  // Step 11: Audit "validated"
@@ -347,7 +337,6 @@ export async function cliMain(): Promise<void> {
347
337
  skill: { type: "string" },
348
338
  "skill-path": { type: "string" },
349
339
  "eval-set": { type: "string" },
350
- mode: { type: "string", default: "agent" },
351
340
  agent: { type: "string" },
352
341
  "dry-run": { type: "boolean", default: false },
353
342
  confidence: { type: "string", default: "0.6" },
@@ -367,7 +356,6 @@ Options:
367
356
  --skill Skill name (required)
368
357
  --skill-path Path to SKILL.md (required)
369
358
  --eval-set Path to eval set JSON (optional, builds from logs if omitted)
370
- --mode Execution mode: "agent" or "api" (default: "agent")
371
359
  --agent Agent CLI to use (claude, codex, opencode)
372
360
  --dry-run Validate proposal without deploying
373
361
  --confidence Confidence threshold 0.0-1.0 (default: 0.6)
@@ -381,14 +369,25 @@ Options:
381
369
  process.exit(1);
382
370
  }
383
371
 
384
- const mode = values.mode === "api" ? "api" : "agent";
372
+ const { detectAgent } = await import("../utils/llm-call.js");
373
+ const agent = values.agent ?? detectAgent();
374
+ if (!agent) {
375
+ console.error(
376
+ JSON.stringify({
377
+ level: "error",
378
+ code: "agent_not_found",
379
+ message: "No agent CLI (claude/codex/opencode) found in PATH.",
380
+ action: "Install Claude Code, Codex, or OpenCode.",
381
+ }),
382
+ );
383
+ process.exit(1);
384
+ }
385
385
 
386
386
  const result = await evolve({
387
387
  skillName: values.skill,
388
388
  skillPath: values["skill-path"],
389
389
  evalSetPath: values["eval-set"],
390
- mode,
391
- agent: values.agent,
390
+ agent,
392
391
  dryRun: values["dry-run"] ?? false,
393
392
  confidenceThreshold: Number.parseFloat(values.confidence ?? "0.6"),
394
393
  maxIterations: Number.parseInt(values["max-iterations"] ?? "3", 10),
@@ -120,11 +120,10 @@ export async function generateProposal(
120
120
  missedQueries: string[],
121
121
  skillName: string,
122
122
  skillPath: string,
123
- mode: "agent" | "api",
124
- agent?: string,
123
+ agent: string,
125
124
  ): Promise<EvolutionProposal> {
126
125
  const prompt = buildProposalPrompt(currentDescription, failurePatterns, missedQueries, skillName);
127
- const rawResponse = await callLlm(PROPOSER_SYSTEM, prompt, mode, agent);
126
+ const rawResponse = await callLlm(PROPOSER_SYSTEM, prompt, agent);
128
127
  const { proposed_description, rationale, confidence } = parseProposalResponse(rawResponse);
129
128
 
130
129
  return {
@@ -61,8 +61,7 @@ export function parseTriggerResponse(response: string): boolean {
61
61
  export async function validateProposal(
62
62
  proposal: EvolutionProposal,
63
63
  evalSet: EvalEntry[],
64
- mode: "agent" | "api",
65
- agent?: string,
64
+ agent: string,
66
65
  ): Promise<ValidationResult> {
67
66
  if (evalSet.length === 0) {
68
67
  return {
@@ -85,14 +84,14 @@ export async function validateProposal(
85
84
  for (const entry of evalSet) {
86
85
  // Check with original description
87
86
  const beforePrompt = buildTriggerCheckPrompt(proposal.original_description, entry.query);
88
- const beforeRaw = await callLlm(systemPrompt, beforePrompt, mode, agent);
87
+ const beforeRaw = await callLlm(systemPrompt, beforePrompt, agent);
89
88
  const beforeTriggered = parseTriggerResponse(beforeRaw);
90
89
  const beforePass =
91
90
  (entry.should_trigger && beforeTriggered) || (!entry.should_trigger && !beforeTriggered);
92
91
 
93
92
  // Check with proposed description
94
93
  const afterPrompt = buildTriggerCheckPrompt(proposal.proposed_description, entry.query);
95
- const afterRaw = await callLlm(systemPrompt, afterPrompt, mode, agent);
94
+ const afterRaw = await callLlm(systemPrompt, afterPrompt, agent);
96
95
  const afterTriggered = parseTriggerResponse(afterRaw);
97
96
  const afterPass =
98
97
  (entry.should_trigger && afterTriggered) || (!entry.should_trigger && !afterTriggered);
@@ -5,9 +5,7 @@
5
5
  * Rubric-based grader for Claude Code skill sessions.
6
6
  * Migrated from grade_session.py.
7
7
  *
8
- * Two modes:
9
- * 1. --use-agent (default when no ANTHROPIC_API_KEY) — invokes installed agent CLI
10
- * 2. --use-api (default when ANTHROPIC_API_KEY set) — calls Anthropic API directly
8
+ * Grades via installed agent CLI (claude/codex/opencode).
11
9
  */
12
10
 
13
11
  import { mkdirSync, readFileSync, writeFileSync } from "node:fs";
@@ -26,7 +24,6 @@ import {
26
24
  detectAgent as _detectAgent,
27
25
  stripMarkdownFences as _stripMarkdownFences,
28
26
  callViaAgent,
29
- callViaApi,
30
27
  } from "../utils/llm-call.js";
31
28
  import { readExcerpt } from "../utils/transcript.js";
32
29
 
@@ -226,22 +223,6 @@ export async function gradeViaAgent(prompt: string, agent: string): Promise<Grad
226
223
  }
227
224
  }
228
225
 
229
- // ---------------------------------------------------------------------------
230
- // Grading via direct Anthropic API
231
- // ---------------------------------------------------------------------------
232
-
233
- export async function gradeViaApi(prompt: string): Promise<GraderOutput> {
234
- const raw = await callViaApi(GRADER_SYSTEM, prompt);
235
- try {
236
- return JSON.parse(_stripMarkdownFences(raw)) as GraderOutput;
237
- } catch (err) {
238
- throw new Error(
239
- `gradeViaApi: failed to parse LLM output as JSON. Raw (truncated): ${raw.slice(0, 200)}`,
240
- { cause: err },
241
- );
242
- }
243
- }
244
-
245
226
  // ---------------------------------------------------------------------------
246
227
  // Result assembly
247
228
  // ---------------------------------------------------------------------------
@@ -306,8 +287,6 @@ export async function cliMain(): Promise<void> {
306
287
  transcript: { type: "string" },
307
288
  "telemetry-log": { type: "string", default: TELEMETRY_LOG },
308
289
  output: { type: "string", default: "grading.json" },
309
- "use-agent": { type: "boolean", default: false },
310
- "use-api": { type: "boolean", default: false },
311
290
  agent: { type: "string" },
312
291
  "show-transcript": { type: "boolean", default: false },
313
292
  },
@@ -320,50 +299,25 @@ export async function cliMain(): Promise<void> {
320
299
  process.exit(1);
321
300
  }
322
301
 
323
- // --- Determine mode ---
324
- const hasApiKey = Boolean(process.env.ANTHROPIC_API_KEY);
325
- let mode: "agent" | "api";
302
+ // --- Determine agent ---
326
303
  let agent: string | null = null;
327
-
328
- if (values["use-api"]) {
329
- mode = "api";
330
- } else if (values["use-agent"]) {
331
- mode = "agent";
304
+ const validAgents = ["claude", "codex", "opencode"];
305
+ if (values.agent && validAgents.includes(values.agent)) {
306
+ agent = values.agent;
332
307
  } else {
333
- const availableAgent = _detectAgent();
334
- if (availableAgent) {
335
- mode = "agent";
336
- } else if (hasApiKey) {
337
- mode = "api";
338
- } else {
339
- console.error(
340
- "[ERROR] No agent CLI (claude/codex/opencode) found in PATH " +
341
- "and ANTHROPIC_API_KEY not set.\n" +
342
- "Install Claude Code, Codex, or OpenCode, or set ANTHROPIC_API_KEY.",
343
- );
344
- process.exit(1);
345
- }
308
+ agent = _detectAgent();
346
309
  }
347
310
 
348
- if (mode === "agent") {
349
- const validAgents = ["claude", "codex", "opencode"];
350
- if (values.agent && validAgents.includes(values.agent)) {
351
- agent = values.agent;
352
- } else {
353
- agent = _detectAgent();
354
- }
355
- if (!agent) {
356
- console.error(
357
- "[ERROR] --use-agent specified but no agent found in PATH.\n" +
358
- "Install claude, codex, or opencode, or use --use-api instead.",
359
- );
360
- process.exit(1);
361
- }
362
- console.error(`[INFO] Grading via agent: ${agent}`);
363
- } else {
364
- console.error("[INFO] Grading via direct Anthropic API");
311
+ if (!agent) {
312
+ console.error(
313
+ "[ERROR] No agent CLI (claude/codex/opencode) found in PATH.\n" +
314
+ "Install Claude Code, Codex, or OpenCode.",
315
+ );
316
+ process.exit(1);
365
317
  }
366
318
 
319
+ console.error(`[INFO] Grading via agent: ${agent}`);
320
+
367
321
  // --- Resolve expectations ---
368
322
  let expectations: string[] = [];
369
323
  if (values["evals-json"] && values["eval-id"] != null) {
@@ -427,11 +381,7 @@ export async function cliMain(): Promise<void> {
427
381
 
428
382
  let graderOutput: GraderOutput;
429
383
  try {
430
- if (mode === "agent") {
431
- graderOutput = await gradeViaAgent(prompt, agent as string);
432
- } else {
433
- graderOutput = await gradeViaApi(prompt);
434
- }
384
+ graderOutput = await gradeViaAgent(prompt, agent);
435
385
  } catch (e) {
436
386
  console.error(`[ERROR] Grading failed: ${e}`);
437
387
  process.exit(1);
@@ -13,6 +13,9 @@
13
13
  * selftune rollback [options] — Rollback a skill to its pre-evolution state
14
14
  * selftune watch [options] — Monitor post-deploy skill health
15
15
  * selftune doctor — Run health checks
16
+ * selftune status — Show skill health summary
17
+ * selftune last — Show last session details
18
+ * selftune dashboard [options] — Open visual data dashboard
16
19
  */
17
20
 
18
21
  const command = process.argv[2];
@@ -34,6 +37,9 @@ Commands:
34
37
  rollback Rollback a skill to its pre-evolution state
35
38
  watch Monitor post-deploy skill health
36
39
  doctor Run health checks
40
+ status Show skill health summary
41
+ last Show last session details
42
+ dashboard Open visual data dashboard
37
43
 
38
44
  Run 'selftune <command> --help' for command-specific options.`);
39
45
  process.exit(0);
@@ -98,6 +104,21 @@ switch (command) {
98
104
  process.exit(result.healthy ? 0 : 1);
99
105
  break;
100
106
  }
107
+ case "status": {
108
+ const { cliMain } = await import("./status.js");
109
+ cliMain();
110
+ break;
111
+ }
112
+ case "last": {
113
+ const { cliMain } = await import("./last.js");
114
+ cliMain();
115
+ break;
116
+ }
117
+ case "dashboard": {
118
+ const { cliMain } = await import("./dashboard.js");
119
+ cliMain();
120
+ break;
121
+ }
101
122
  default:
102
123
  console.error(`Unknown command: ${command}\nRun 'selftune --help' for available commands.`);
103
124
  process.exit(1);