npm - selftune - Versions diffs - 0.1.2 → 0.1.4 - Mend

selftune 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/CHANGELOG.md +1 -1
package/cli/selftune/dashboard.ts +6 -9
package/cli/selftune/evolution/evolve.ts +14 -2
package/cli/selftune/evolution/rollback.ts +1 -1
package/cli/selftune/grading/grade-session.ts +7 -1
package/cli/selftune/ingestors/codex-rollout.ts +1 -1
package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
package/cli/selftune/init.ts +18 -2
package/cli/selftune/last.ts +2 -2
package/cli/selftune/status.ts +1 -2
package/dashboard/index.html +16 -11
package/package.json +10 -3
package/skill/SKILL.md +3 -2
package/skill/Workflows/Evals.md +4 -4
package/skill/Workflows/Ingest.md +12 -12
package/skill/Workflows/Initialize.md +1 -0

package/CHANGELOG.md CHANGED Viewed

@@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/),
 and this project adheres to [Semantic Versioning](https://semver.org/).
-## [0.6.0] - 2026-03-01
+## [0.1.4] - 2026-03-01
 ### Added

package/cli/selftune/dashboard.ts CHANGED Viewed

@@ -7,7 +7,6 @@
  *   selftune dashboard --out FILE   — Write data-embedded HTML to FILE
  */
-import { execSync } from "node:child_process";
 import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
 import { homedir } from "node:os";
 import { dirname, join, resolve } from "node:path";
@@ -120,7 +119,7 @@ function buildEmbeddedHTML(): string {
   return template.replace("</body>", `${dataScript}\n</body>`);
 }
-export function cliMain(): void {
+export async function cliMain(): Promise<void> {
   const args = process.argv.slice(2);
   if (args.includes("--help") || args.includes("-h")) {
@@ -165,13 +164,11 @@ Usage:
   try {
     const platform = process.platform;
-    if (platform === "darwin") {
-      execSync(`open "${tmpPath}"`);
-    } else if (platform === "linux") {
-      execSync(`xdg-open "${tmpPath}"`);
-    } else if (platform === "win32") {
-      execSync(`start "" "${tmpPath}"`);
-    }
+    const cmd = platform === "darwin" ? "open" : platform === "linux" ? "xdg-open" : null;
+    if (!cmd) throw new Error("Unsupported platform");
+    const proc = Bun.spawn([cmd, tmpPath], { stdio: ["ignore", "ignore", "ignore"] });
+    await proc.exited;
+    if (proc.exitCode !== 0) throw new Error(`Failed to launch ${cmd}`);
   } catch {
     console.log(`Open manually: file://${tmpPath}`);
   }

package/cli/selftune/evolution/evolve.ts CHANGED Viewed

@@ -23,8 +23,8 @@ import { readJsonl } from "../utils/jsonl.js";
 import { appendAuditEntry } from "./audit.js";
 import { extractFailurePatterns } from "./extract-patterns.js";
 import { generateProposal } from "./propose-description.js";
-import { validateProposal } from "./validate-proposal.js";
 import type { ValidationResult } from "./validate-proposal.js";
+import { validateProposal } from "./validate-proposal.js";
 // ---------------------------------------------------------------------------
 // Types
@@ -370,7 +370,19 @@ Options:
   }
   const { detectAgent } = await import("../utils/llm-call.js");
-  const agent = values.agent ?? detectAgent();
+  const requestedAgent = values.agent;
+  if (requestedAgent && !Bun.which(requestedAgent)) {
+    console.error(
+      JSON.stringify({
+        level: "error",
+        code: "agent_not_in_path",
+        message: `Agent CLI '${requestedAgent}' not found in PATH.`,
+        action: "Install it or omit --agent to use auto-detection.",
+      }),
+    );
+    process.exit(1);
+  }
+  const agent = requestedAgent ?? detectAgent();
   if (!agent) {
     console.error(
       JSON.stringify({

package/cli/selftune/evolution/rollback.ts CHANGED Viewed

@@ -7,7 +7,7 @@
  * 3. Recording a "rolled_back" entry in the audit trail
  */
-import { existsSync, readFileSync, readdirSync, unlinkSync, writeFileSync } from "node:fs";
+import { existsSync, readdirSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
 import { basename, dirname, join } from "node:path";
 import { parseArgs } from "node:util";

package/cli/selftune/grading/grade-session.ts CHANGED Viewed

@@ -302,7 +302,13 @@ export async function cliMain(): Promise<void> {
   // --- Determine agent ---
   let agent: string | null = null;
   const validAgents = ["claude", "codex", "opencode"];
-  if (values.agent && validAgents.includes(values.agent)) {
+  if (values.agent) {
+    if (!validAgents.includes(values.agent)) {
+      console.error(
+        `[ERROR] Invalid --agent '${values.agent}'. Expected one of: ${validAgents.join(", ")}`,
+      );
+      process.exit(1);
+    }
     agent = values.agent;
   } else {
     agent = _detectAgent();

package/cli/selftune/ingestors/codex-rollout.ts CHANGED Viewed

@@ -21,7 +21,7 @@
  *   bun codex-rollout.ts --force
  */
-import { existsSync, readFileSync, readdirSync, statSync } from "node:fs";
+import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
 import { homedir } from "node:os";
 import { basename, join } from "node:path";
 import { parseArgs } from "node:util";

package/cli/selftune/ingestors/opencode-ingest.ts CHANGED Viewed

@@ -21,12 +21,12 @@
  */
 import { Database } from "bun:sqlite";
-import { existsSync, readFileSync, readdirSync, statSync } from "node:fs";
+import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
 import { homedir } from "node:os";
 import { basename, join } from "node:path";
 import { parseArgs } from "node:util";
 import { QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "../constants.js";
-import type { QueryLogRecord, SessionTelemetryRecord, SkillUsageRecord } from "../types.js";
+import type { QueryLogRecord, SkillUsageRecord } from "../types.js";
 import { appendJsonl, loadMarker, saveMarker } from "../utils/jsonl.js";
 const XDG_DATA_HOME = process.env.XDG_DATA_HOME ?? join(homedir(), ".local", "share");

package/cli/selftune/init.ts CHANGED Viewed

@@ -40,6 +40,16 @@ const VALID_AGENT_TYPES: SelftuneConfig["agent_type"][] = [
   "unknown",
 ];
+const AGENT_TYPE_CLI_MAP: Record<string, string> = {
+  claude_code: "claude",
+  codex: "codex",
+  opencode: "opencode",
+};
+function agentTypeToCli(agentType: string): string | null {
+  return AGENT_TYPE_CLI_MAP[agentType] ?? null;
+}
 export function detectAgentType(
   override?: string,
   homeOverride?: string,
@@ -179,8 +189,14 @@ export function runInit(opts: InitOptions): SelftuneConfig {
   // Resolve CLI path
   const cliPath = determineCliPath(opts.cliPathOverride);
-  // Detect agent CLI
-  const agentCli = detectAgent();
+  // Detect agent CLI — when an override is provided, fall back to mapped CLI
+  // name so init works in test/CI environments without agent binaries in PATH
+  const agentCli = detectAgent() ?? (opts.agentOverride ? agentTypeToCli(agentType) : null);
+  if (!agentCli) {
+    throw new Error(
+      "No supported agent CLI detected (claude, codex, opencode). Install one, then rerun `selftune init`.",
+    );
+  }
   // Determine LLM mode
   const { llm_mode, agent_cli } = determineLlmMode(agentCli);

package/cli/selftune/last.ts CHANGED Viewed

@@ -51,7 +51,7 @@ export function computeLastInsight(
       skillRecords
         .filter((r) => r.session_id === sessionId && r.triggered)
         .map((r) => {
-          triggeredSkillQueries.add(r.query);
+          triggeredSkillQueries.add(r.query.toLowerCase().trim());
           return r.skill_name;
         }),
     ),
@@ -60,7 +60,7 @@ export function computeLastInsight(
   // Unmatched queries: session queries whose text does NOT appear in any triggered skill record
   const sessionQueries = queryRecords.filter((r) => r.session_id === sessionId);
   const unmatchedQueries = sessionQueries
-    .filter((q) => !triggeredSkillQueries.has(q.query))
+    .filter((q) => !triggeredSkillQueries.has(q.query.toLowerCase().trim()))
     .map((q) => q.query);
   const errors = latest.errors_encountered;

package/cli/selftune/status.ts CHANGED Viewed

@@ -8,7 +8,6 @@
  */
 import { EVOLUTION_AUDIT_LOG, QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "./constants.js";
-import { getLastDeployedProposal } from "./evolution/audit.js";
 import { computeMonitoringSnapshot } from "./monitoring/watch.js";
 import { doctor } from "./observability.js";
 import type {
@@ -227,7 +226,7 @@ export function formatStatus(result: StatusResult): string {
   // Skills table
   const skillCount = result.skills.length;
   lines.push(
-    `Skills (${skillCount})${" ".repeat(36 - `Skills (${skillCount})`.length)}Last 7 days`,
+    `Skills (${skillCount})${" ".repeat(36 - `Skills (${skillCount})`.length)}Recent data`,
   );
   lines.push("  Name            Pass Rate  Trend  Missed  Status");

package/dashboard/index.html CHANGED Viewed

@@ -421,7 +421,7 @@
 <div class="header">
   <div class="header-left">
     <h1>self<span>tune</span></h1>
-    <span class="version">v0.5</span>
+    <span class="version">v0.1.4</span>
   </div>
   <div class="status" id="headerStatus">Drop log files to get started</div>
 </div>
@@ -726,6 +726,8 @@ function formatDate(ts) {
   return d.toLocaleDateString('en-US', { month: 'short', day: 'numeric' });
 }
+function toDayKey(ts) { return new Date(ts).toISOString().slice(0, 10); }
 function formatTimestamp(ts) {
   const d = toDate(ts);
   return d.toLocaleString('en-US', {
@@ -746,7 +748,7 @@ function escapeHtml(s) {
 function groupByDay(records) {
   const map = {};
   for (const r of records) {
-    const day = formatDate(r.timestamp);
+    const day = toDayKey(r.timestamp);
     map[day] = (map[day] || 0) + 1;
   }
   return map;
@@ -935,24 +937,25 @@ function updateDrillPassRateChart(skillName) {
   const records = state.skills.filter(r => r.skill_name === skillName);
   const byDay = {};
   for (const r of records) {
-    const day = formatDate(r.timestamp);
+    const day = toDayKey(r.timestamp);
     if (!byDay[day]) byDay[day] = { triggered: 0, total: 0 };
     byDay[day].total++;
     if (r.triggered) byDay[day].triggered++;
   }
-  const labels = Object.keys(byDay);
-  const data = labels.map(d => ((byDay[d].triggered / byDay[d].total) * 100).toFixed(1));
+  const dayKeys = Object.keys(byDay).sort();
+  const labels = dayKeys.map(d => formatDate(d + "T00:00:00Z"));
+  const data = dayKeys.map(d => ((byDay[d].triggered / byDay[d].total) * 100).toFixed(1));
   // Deploy events as annotations
   const deployDays = new Set(
     state.evolution
       .filter(e => e.action === 'deployed' && (e.details || '').toLowerCase().includes(skillName.toLowerCase()))
-      .map(e => formatDate(e.timestamp))
+      .map(e => toDayKey(e.timestamp))
   );
-  const pointColors = labels.map(d => deployDays.has(d) ? '#d97757' : '#788c5d');
-  const pointSizes = labels.map(d => deployDays.has(d) ? 8 : 3);
+  const pointColors = dayKeys.map(d => deployDays.has(d) ? '#d97757' : '#788c5d');
+  const pointSizes = dayKeys.map(d => deployDays.has(d) ? 8 : 3);
   if (charts.drillPassRate) charts.drillPassRate.destroy();
   charts.drillPassRate = new Chart(document.getElementById('chartDrillPassRate'), {
@@ -1023,12 +1026,14 @@ function updateDrillSessions(skillName) {
   const sorted = [...sessions].sort((a, b) => new Date(b.timestamp) - new Date(a.timestamp));
   tbody.innerHTML = sorted.slice(0, 30).map(r => {
     const skills = (r.skills_triggered || []).join(', ') || '\u2014';
-    const errorBadge = r.errors_encountered > 0
-      ? `<span class="badge badge-red">${r.errors_encountered}</span>`
+    const errorCount = Number.isFinite(Number(r.errors_encountered)) ? Number(r.errors_encountered) : 0;
+    const totalToolCalls = Number.isFinite(Number(r.total_tool_calls)) ? Number(r.total_tool_calls) : 0;
+    const errorBadge = errorCount > 0
+      ? `<span class="badge badge-red">${errorCount}</span>`
       : '<span class="badge badge-green">0</span>';
     return `<tr>
       <td class="mono">${escapeHtml(formatTimestamp(r.timestamp))}</td>
-      <td>${r.total_tool_calls || 0}</td>
+      <td>${totalToolCalls}</td>
       <td>${escapeHtml(truncate(skills, 30))}</td>
       <td>${errorBadge}</td>
     </tr>`;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "selftune",
-  "version": "0.1.2",
+  "version": "0.1.4",
   "description": "Skill observability and continuous improvement CLI for agent platforms",
   "type": "module",
   "license": "MIT",
@@ -36,7 +36,14 @@
   "bin": {
     "selftune": "bin/selftune.cjs"
   },
-  "files": ["bin/", "cli/selftune/", "dashboard/", "skill/", "README.md", "CHANGELOG.md"],
+  "files": [
+    "bin/",
+    "cli/selftune/",
+    "dashboard/",
+    "skill/",
+    "README.md",
+    "CHANGELOG.md"
+  ],
   "scripts": {
     "lint": "bunx biome check .",
     "lint:fix": "bunx biome check --write .",
@@ -45,7 +52,7 @@
     "check": "bun run lint && bun run lint:arch && bun test"
   },
   "devDependencies": {
-    "@biomejs/biome": "^1.9.4",
+    "@biomejs/biome": "^2.4.4",
     "@types/bun": "^1.1.0"
   }
 }

package/skill/SKILL.md CHANGED Viewed

@@ -25,8 +25,9 @@ will work. Do not proceed with other commands until initialization is complete.
 selftune <command> [options]
 ```
-All commands output deterministic JSON. Always parse JSON output -- never
-text-match against output strings.
+Most commands output deterministic JSON. Parse JSON output for machine-readable commands.
+`selftune dashboard` is an exception: it generates an HTML artifact and may print
+informational progress lines.
 ## Quick Reference

package/skill/Workflows/Evals.md CHANGED Viewed

@@ -134,7 +134,7 @@ selftune evals --skill pptx --stats
 ### 1. List Available Skills
-Run `--list-skills` to see what skills have telemetry data. If the target
+Run `selftune evals --list-skills` to see what skills have telemetry data. If the target
 skill has zero or very few queries, more sessions are needed before
 eval generation is useful.
@@ -170,15 +170,15 @@ beyond trigger coverage.
 ## Common Patterns
 **"What skills are undertriggering?"**
-> Run `--list-skills`, then for each skill with significant query counts,
+> Run `selftune evals --list-skills`, then for each skill with significant query counts,
 > generate evals and check for missed implicit/contextual queries.
 **"Generate evals for pptx"**
-> Run `evals --skill pptx`. Review the invocation type distribution.
+> Run `selftune evals --skill pptx`. Review the invocation type distribution.
 > Feed the output to `evolve` if coverage gaps exist.
 **"Show me skill stats"**
-> Run `evals --skill <name> --stats` for aggregate telemetry.
+> Run `selftune evals --skill <name> --stats` for aggregate telemetry.
 **"I want reproducible evals"**
 > Use `--seed <n>` to fix the random sampling of negative examples.

package/skill/Workflows/Ingest.md CHANGED Viewed

@@ -42,9 +42,9 @@ Writes to:
 ### Steps
 1. Verify `$CODEX_HOME/sessions/` directory exists and contains session files
-2. Run `ingest-codex`
+2. Run `selftune ingest-codex`
 3. Verify entries were written by checking log file line counts
-4. Run `doctor` to confirm logs are healthy
+4. Run `selftune doctor` to confirm logs are healthy
 ---
@@ -78,9 +78,9 @@ Writes to:
 ### Steps
 1. Verify the OpenCode database exists at the expected path
-2. Run `ingest-opencode`
+2. Run `selftune ingest-opencode`
 3. Verify entries were written by checking log file line counts
-4. Run `doctor` to confirm logs are healthy
+4. Run `selftune doctor` to confirm logs are healthy
 ---
@@ -117,25 +117,25 @@ stream for telemetry; it does not modify Codex behavior.
 1. Build the wrap-codex command with the desired Codex arguments
 2. Run the command (replaces `codex exec` in your workflow)
 3. Session telemetry is captured automatically
-4. Verify with `doctor` after first use
+4. Verify with `selftune doctor` after first use
 ---
 ## Common Patterns
 **"Ingest codex logs"**
-> Run `ingest-codex`. No options needed. Reads from `$CODEX_HOME/sessions/`.
+> Run `selftune ingest-codex`. No options needed. Reads from `$CODEX_HOME/sessions/`.
 **"Import opencode sessions"**
-> Run `ingest-opencode`. Reads from the SQLite database automatically.
+> Run `selftune ingest-opencode`. Reads from the SQLite database automatically.
 **"Run codex through selftune"**
-> Use `wrap-codex -- <codex args>` instead of `codex exec <args>` directly.
+> Use `selftune wrap-codex -- <codex args>` instead of `codex exec <args>` directly.
 **"Batch ingest vs real-time"**
-> Use `ingest-codex` or `ingest-opencode` for historical sessions.
-> Use `wrap-codex` for ongoing sessions. Both produce the same log format.
+> Use `selftune ingest-codex` or `selftune ingest-opencode` for historical sessions.
+> Use `selftune wrap-codex` for ongoing sessions. Both produce the same log format.
 **"How do I know it worked?"**
-> Run `doctor` after ingestion. Check that log files exist and are parseable.
-> Run `evals --list-skills` to see if the ingested sessions appear.
+> Run `selftune doctor` after ingestion. Check that log files exist and are parseable.
+> Run `selftune evals --list-skills` to see if the ingested sessions appear.

package/skill/Workflows/Initialize.md CHANGED Viewed

@@ -19,6 +19,7 @@ selftune init [--agent <type>] [--cli-path <path>] [--force]
 | Flag | Description | Default |
 |------|-------------|---------|
 | `--agent <type>` | Agent platform: `claude`, `codex`, `opencode` | Auto-detected |
+| `--cli-path <path>` | Override auto-detected CLI entry-point path | Auto-detected |
 | `--force` | Reinitialize even if config already exists | Off |
 ## Output Format