selftune 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/).
7
7
 
8
- ## [0.6.0] - 2026-03-01
8
+ ## [0.1.4] - 2026-03-01
9
9
 
10
10
  ### Added
11
11
 
@@ -7,7 +7,6 @@
7
7
  * selftune dashboard --out FILE — Write data-embedded HTML to FILE
8
8
  */
9
9
 
10
- import { execSync } from "node:child_process";
11
10
  import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
12
11
  import { homedir } from "node:os";
13
12
  import { dirname, join, resolve } from "node:path";
@@ -120,7 +119,7 @@ function buildEmbeddedHTML(): string {
120
119
  return template.replace("</body>", `${dataScript}\n</body>`);
121
120
  }
122
121
 
123
- export function cliMain(): void {
122
+ export async function cliMain(): Promise<void> {
124
123
  const args = process.argv.slice(2);
125
124
 
126
125
  if (args.includes("--help") || args.includes("-h")) {
@@ -165,13 +164,11 @@ Usage:
165
164
 
166
165
  try {
167
166
  const platform = process.platform;
168
- if (platform === "darwin") {
169
- execSync(`open "${tmpPath}"`);
170
- } else if (platform === "linux") {
171
- execSync(`xdg-open "${tmpPath}"`);
172
- } else if (platform === "win32") {
173
- execSync(`start "" "${tmpPath}"`);
174
- }
167
+ const cmd = platform === "darwin" ? "open" : platform === "linux" ? "xdg-open" : null;
168
+ if (!cmd) throw new Error("Unsupported platform");
169
+ const proc = Bun.spawn([cmd, tmpPath], { stdio: ["ignore", "ignore", "ignore"] });
170
+ await proc.exited;
171
+ if (proc.exitCode !== 0) throw new Error(`Failed to launch ${cmd}`);
175
172
  } catch {
176
173
  console.log(`Open manually: file://${tmpPath}`);
177
174
  }
@@ -23,8 +23,8 @@ import { readJsonl } from "../utils/jsonl.js";
23
23
  import { appendAuditEntry } from "./audit.js";
24
24
  import { extractFailurePatterns } from "./extract-patterns.js";
25
25
  import { generateProposal } from "./propose-description.js";
26
- import { validateProposal } from "./validate-proposal.js";
27
26
  import type { ValidationResult } from "./validate-proposal.js";
27
+ import { validateProposal } from "./validate-proposal.js";
28
28
 
29
29
  // ---------------------------------------------------------------------------
30
30
  // Types
@@ -370,7 +370,19 @@ Options:
370
370
  }
371
371
 
372
372
  const { detectAgent } = await import("../utils/llm-call.js");
373
- const agent = values.agent ?? detectAgent();
373
+ const requestedAgent = values.agent;
374
+ if (requestedAgent && !Bun.which(requestedAgent)) {
375
+ console.error(
376
+ JSON.stringify({
377
+ level: "error",
378
+ code: "agent_not_in_path",
379
+ message: `Agent CLI '${requestedAgent}' not found in PATH.`,
380
+ action: "Install it or omit --agent to use auto-detection.",
381
+ }),
382
+ );
383
+ process.exit(1);
384
+ }
385
+ const agent = requestedAgent ?? detectAgent();
374
386
  if (!agent) {
375
387
  console.error(
376
388
  JSON.stringify({
@@ -7,7 +7,7 @@
7
7
  * 3. Recording a "rolled_back" entry in the audit trail
8
8
  */
9
9
 
10
- import { existsSync, readFileSync, readdirSync, unlinkSync, writeFileSync } from "node:fs";
10
+ import { existsSync, readdirSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
11
11
  import { basename, dirname, join } from "node:path";
12
12
  import { parseArgs } from "node:util";
13
13
 
@@ -302,7 +302,13 @@ export async function cliMain(): Promise<void> {
302
302
  // --- Determine agent ---
303
303
  let agent: string | null = null;
304
304
  const validAgents = ["claude", "codex", "opencode"];
305
- if (values.agent && validAgents.includes(values.agent)) {
305
+ if (values.agent) {
306
+ if (!validAgents.includes(values.agent)) {
307
+ console.error(
308
+ `[ERROR] Invalid --agent '${values.agent}'. Expected one of: ${validAgents.join(", ")}`,
309
+ );
310
+ process.exit(1);
311
+ }
306
312
  agent = values.agent;
307
313
  } else {
308
314
  agent = _detectAgent();
@@ -21,7 +21,7 @@
21
21
  * bun codex-rollout.ts --force
22
22
  */
23
23
 
24
- import { existsSync, readFileSync, readdirSync, statSync } from "node:fs";
24
+ import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
25
25
  import { homedir } from "node:os";
26
26
  import { basename, join } from "node:path";
27
27
  import { parseArgs } from "node:util";
@@ -21,12 +21,12 @@
21
21
  */
22
22
 
23
23
  import { Database } from "bun:sqlite";
24
- import { existsSync, readFileSync, readdirSync, statSync } from "node:fs";
24
+ import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
25
25
  import { homedir } from "node:os";
26
26
  import { basename, join } from "node:path";
27
27
  import { parseArgs } from "node:util";
28
28
  import { QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "../constants.js";
29
- import type { QueryLogRecord, SessionTelemetryRecord, SkillUsageRecord } from "../types.js";
29
+ import type { QueryLogRecord, SkillUsageRecord } from "../types.js";
30
30
  import { appendJsonl, loadMarker, saveMarker } from "../utils/jsonl.js";
31
31
 
32
32
  const XDG_DATA_HOME = process.env.XDG_DATA_HOME ?? join(homedir(), ".local", "share");
@@ -40,6 +40,16 @@ const VALID_AGENT_TYPES: SelftuneConfig["agent_type"][] = [
40
40
  "unknown",
41
41
  ];
42
42
 
43
+ const AGENT_TYPE_CLI_MAP: Record<string, string> = {
44
+ claude_code: "claude",
45
+ codex: "codex",
46
+ opencode: "opencode",
47
+ };
48
+
49
+ function agentTypeToCli(agentType: string): string | null {
50
+ return AGENT_TYPE_CLI_MAP[agentType] ?? null;
51
+ }
52
+
43
53
  export function detectAgentType(
44
54
  override?: string,
45
55
  homeOverride?: string,
@@ -179,8 +189,14 @@ export function runInit(opts: InitOptions): SelftuneConfig {
179
189
  // Resolve CLI path
180
190
  const cliPath = determineCliPath(opts.cliPathOverride);
181
191
 
182
- // Detect agent CLI
183
- const agentCli = detectAgent();
192
+ // Detect agent CLI — when an override is provided, fall back to mapped CLI
193
+ // name so init works in test/CI environments without agent binaries in PATH
194
+ const agentCli = detectAgent() ?? (opts.agentOverride ? agentTypeToCli(agentType) : null);
195
+ if (!agentCli) {
196
+ throw new Error(
197
+ "No supported agent CLI detected (claude, codex, opencode). Install one, then rerun `selftune init`.",
198
+ );
199
+ }
184
200
 
185
201
  // Determine LLM mode
186
202
  const { llm_mode, agent_cli } = determineLlmMode(agentCli);
@@ -51,7 +51,7 @@ export function computeLastInsight(
51
51
  skillRecords
52
52
  .filter((r) => r.session_id === sessionId && r.triggered)
53
53
  .map((r) => {
54
- triggeredSkillQueries.add(r.query);
54
+ triggeredSkillQueries.add(r.query.toLowerCase().trim());
55
55
  return r.skill_name;
56
56
  }),
57
57
  ),
@@ -60,7 +60,7 @@ export function computeLastInsight(
60
60
  // Unmatched queries: session queries whose text does NOT appear in any triggered skill record
61
61
  const sessionQueries = queryRecords.filter((r) => r.session_id === sessionId);
62
62
  const unmatchedQueries = sessionQueries
63
- .filter((q) => !triggeredSkillQueries.has(q.query))
63
+ .filter((q) => !triggeredSkillQueries.has(q.query.toLowerCase().trim()))
64
64
  .map((q) => q.query);
65
65
 
66
66
  const errors = latest.errors_encountered;
@@ -8,7 +8,6 @@
8
8
  */
9
9
 
10
10
  import { EVOLUTION_AUDIT_LOG, QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "./constants.js";
11
- import { getLastDeployedProposal } from "./evolution/audit.js";
12
11
  import { computeMonitoringSnapshot } from "./monitoring/watch.js";
13
12
  import { doctor } from "./observability.js";
14
13
  import type {
@@ -227,7 +226,7 @@ export function formatStatus(result: StatusResult): string {
227
226
  // Skills table
228
227
  const skillCount = result.skills.length;
229
228
  lines.push(
230
- `Skills (${skillCount})${" ".repeat(36 - `Skills (${skillCount})`.length)}Last 7 days`,
229
+ `Skills (${skillCount})${" ".repeat(36 - `Skills (${skillCount})`.length)}Recent data`,
231
230
  );
232
231
  lines.push(" Name Pass Rate Trend Missed Status");
233
232
 
@@ -421,7 +421,7 @@
421
421
  <div class="header">
422
422
  <div class="header-left">
423
423
  <h1>self<span>tune</span></h1>
424
- <span class="version">v0.5</span>
424
+ <span class="version">v0.1.4</span>
425
425
  </div>
426
426
  <div class="status" id="headerStatus">Drop log files to get started</div>
427
427
  </div>
@@ -726,6 +726,8 @@ function formatDate(ts) {
726
726
  return d.toLocaleDateString('en-US', { month: 'short', day: 'numeric' });
727
727
  }
728
728
 
729
+ function toDayKey(ts) { return new Date(ts).toISOString().slice(0, 10); }
730
+
729
731
  function formatTimestamp(ts) {
730
732
  const d = toDate(ts);
731
733
  return d.toLocaleString('en-US', {
@@ -746,7 +748,7 @@ function escapeHtml(s) {
746
748
  function groupByDay(records) {
747
749
  const map = {};
748
750
  for (const r of records) {
749
- const day = formatDate(r.timestamp);
751
+ const day = toDayKey(r.timestamp);
750
752
  map[day] = (map[day] || 0) + 1;
751
753
  }
752
754
  return map;
@@ -935,24 +937,25 @@ function updateDrillPassRateChart(skillName) {
935
937
  const records = state.skills.filter(r => r.skill_name === skillName);
936
938
  const byDay = {};
937
939
  for (const r of records) {
938
- const day = formatDate(r.timestamp);
940
+ const day = toDayKey(r.timestamp);
939
941
  if (!byDay[day]) byDay[day] = { triggered: 0, total: 0 };
940
942
  byDay[day].total++;
941
943
  if (r.triggered) byDay[day].triggered++;
942
944
  }
943
945
 
944
- const labels = Object.keys(byDay);
945
- const data = labels.map(d => ((byDay[d].triggered / byDay[d].total) * 100).toFixed(1));
946
+ const dayKeys = Object.keys(byDay).sort();
947
+ const labels = dayKeys.map(d => formatDate(d + "T00:00:00Z"));
948
+ const data = dayKeys.map(d => ((byDay[d].triggered / byDay[d].total) * 100).toFixed(1));
946
949
 
947
950
  // Deploy events as annotations
948
951
  const deployDays = new Set(
949
952
  state.evolution
950
953
  .filter(e => e.action === 'deployed' && (e.details || '').toLowerCase().includes(skillName.toLowerCase()))
951
- .map(e => formatDate(e.timestamp))
954
+ .map(e => toDayKey(e.timestamp))
952
955
  );
953
956
 
954
- const pointColors = labels.map(d => deployDays.has(d) ? '#d97757' : '#788c5d');
955
- const pointSizes = labels.map(d => deployDays.has(d) ? 8 : 3);
957
+ const pointColors = dayKeys.map(d => deployDays.has(d) ? '#d97757' : '#788c5d');
958
+ const pointSizes = dayKeys.map(d => deployDays.has(d) ? 8 : 3);
956
959
 
957
960
  if (charts.drillPassRate) charts.drillPassRate.destroy();
958
961
  charts.drillPassRate = new Chart(document.getElementById('chartDrillPassRate'), {
@@ -1023,12 +1026,14 @@ function updateDrillSessions(skillName) {
1023
1026
  const sorted = [...sessions].sort((a, b) => new Date(b.timestamp) - new Date(a.timestamp));
1024
1027
  tbody.innerHTML = sorted.slice(0, 30).map(r => {
1025
1028
  const skills = (r.skills_triggered || []).join(', ') || '\u2014';
1026
- const errorBadge = r.errors_encountered > 0
1027
- ? `<span class="badge badge-red">${r.errors_encountered}</span>`
1029
+ const errorCount = Number.isFinite(Number(r.errors_encountered)) ? Number(r.errors_encountered) : 0;
1030
+ const totalToolCalls = Number.isFinite(Number(r.total_tool_calls)) ? Number(r.total_tool_calls) : 0;
1031
+ const errorBadge = errorCount > 0
1032
+ ? `<span class="badge badge-red">${errorCount}</span>`
1028
1033
  : '<span class="badge badge-green">0</span>';
1029
1034
  return `<tr>
1030
1035
  <td class="mono">${escapeHtml(formatTimestamp(r.timestamp))}</td>
1031
- <td>${r.total_tool_calls || 0}</td>
1036
+ <td>${totalToolCalls}</td>
1032
1037
  <td>${escapeHtml(truncate(skills, 30))}</td>
1033
1038
  <td>${errorBadge}</td>
1034
1039
  </tr>`;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "selftune",
3
- "version": "0.1.2",
3
+ "version": "0.1.4",
4
4
  "description": "Skill observability and continuous improvement CLI for agent platforms",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -36,7 +36,14 @@
36
36
  "bin": {
37
37
  "selftune": "bin/selftune.cjs"
38
38
  },
39
- "files": ["bin/", "cli/selftune/", "dashboard/", "skill/", "README.md", "CHANGELOG.md"],
39
+ "files": [
40
+ "bin/",
41
+ "cli/selftune/",
42
+ "dashboard/",
43
+ "skill/",
44
+ "README.md",
45
+ "CHANGELOG.md"
46
+ ],
40
47
  "scripts": {
41
48
  "lint": "bunx biome check .",
42
49
  "lint:fix": "bunx biome check --write .",
@@ -45,7 +52,7 @@
45
52
  "check": "bun run lint && bun run lint:arch && bun test"
46
53
  },
47
54
  "devDependencies": {
48
- "@biomejs/biome": "^1.9.4",
55
+ "@biomejs/biome": "^2.4.4",
49
56
  "@types/bun": "^1.1.0"
50
57
  }
51
58
  }
package/skill/SKILL.md CHANGED
@@ -25,8 +25,9 @@ will work. Do not proceed with other commands until initialization is complete.
25
25
  selftune <command> [options]
26
26
  ```
27
27
 
28
- All commands output deterministic JSON. Always parse JSON output -- never
29
- text-match against output strings.
28
+ Most commands output deterministic JSON. Parse JSON output for machine-readable commands.
29
+ `selftune dashboard` is an exception: it generates an HTML artifact and may print
30
+ informational progress lines.
30
31
 
31
32
  ## Quick Reference
32
33
 
@@ -134,7 +134,7 @@ selftune evals --skill pptx --stats
134
134
 
135
135
  ### 1. List Available Skills
136
136
 
137
- Run `--list-skills` to see what skills have telemetry data. If the target
137
+ Run `selftune evals --list-skills` to see what skills have telemetry data. If the target
138
138
  skill has zero or very few queries, more sessions are needed before
139
139
  eval generation is useful.
140
140
 
@@ -170,15 +170,15 @@ beyond trigger coverage.
170
170
  ## Common Patterns
171
171
 
172
172
  **"What skills are undertriggering?"**
173
- > Run `--list-skills`, then for each skill with significant query counts,
173
+ > Run `selftune evals --list-skills`, then for each skill with significant query counts,
174
174
  > generate evals and check for missed implicit/contextual queries.
175
175
 
176
176
  **"Generate evals for pptx"**
177
- > Run `evals --skill pptx`. Review the invocation type distribution.
177
+ > Run `selftune evals --skill pptx`. Review the invocation type distribution.
178
178
  > Feed the output to `evolve` if coverage gaps exist.
179
179
 
180
180
  **"Show me skill stats"**
181
- > Run `evals --skill <name> --stats` for aggregate telemetry.
181
+ > Run `selftune evals --skill <name> --stats` for aggregate telemetry.
182
182
 
183
183
  **"I want reproducible evals"**
184
184
  > Use `--seed <n>` to fix the random sampling of negative examples.
@@ -42,9 +42,9 @@ Writes to:
42
42
  ### Steps
43
43
 
44
44
  1. Verify `$CODEX_HOME/sessions/` directory exists and contains session files
45
- 2. Run `ingest-codex`
45
+ 2. Run `selftune ingest-codex`
46
46
  3. Verify entries were written by checking log file line counts
47
- 4. Run `doctor` to confirm logs are healthy
47
+ 4. Run `selftune doctor` to confirm logs are healthy
48
48
 
49
49
  ---
50
50
 
@@ -78,9 +78,9 @@ Writes to:
78
78
  ### Steps
79
79
 
80
80
  1. Verify the OpenCode database exists at the expected path
81
- 2. Run `ingest-opencode`
81
+ 2. Run `selftune ingest-opencode`
82
82
  3. Verify entries were written by checking log file line counts
83
- 4. Run `doctor` to confirm logs are healthy
83
+ 4. Run `selftune doctor` to confirm logs are healthy
84
84
 
85
85
  ---
86
86
 
@@ -117,25 +117,25 @@ stream for telemetry; it does not modify Codex behavior.
117
117
  1. Build the wrap-codex command with the desired Codex arguments
118
118
  2. Run the command (replaces `codex exec` in your workflow)
119
119
  3. Session telemetry is captured automatically
120
- 4. Verify with `doctor` after first use
120
+ 4. Verify with `selftune doctor` after first use
121
121
 
122
122
  ---
123
123
 
124
124
  ## Common Patterns
125
125
 
126
126
  **"Ingest codex logs"**
127
- > Run `ingest-codex`. No options needed. Reads from `$CODEX_HOME/sessions/`.
127
+ > Run `selftune ingest-codex`. No options needed. Reads from `$CODEX_HOME/sessions/`.
128
128
 
129
129
  **"Import opencode sessions"**
130
- > Run `ingest-opencode`. Reads from the SQLite database automatically.
130
+ > Run `selftune ingest-opencode`. Reads from the SQLite database automatically.
131
131
 
132
132
  **"Run codex through selftune"**
133
- > Use `wrap-codex -- <codex args>` instead of `codex exec <args>` directly.
133
+ > Use `selftune wrap-codex -- <codex args>` instead of `codex exec <args>` directly.
134
134
 
135
135
  **"Batch ingest vs real-time"**
136
- > Use `ingest-codex` or `ingest-opencode` for historical sessions.
137
- > Use `wrap-codex` for ongoing sessions. Both produce the same log format.
136
+ > Use `selftune ingest-codex` or `selftune ingest-opencode` for historical sessions.
137
+ > Use `selftune wrap-codex` for ongoing sessions. Both produce the same log format.
138
138
 
139
139
  **"How do I know it worked?"**
140
- > Run `doctor` after ingestion. Check that log files exist and are parseable.
141
- > Run `evals --list-skills` to see if the ingested sessions appear.
140
+ > Run `selftune doctor` after ingestion. Check that log files exist and are parseable.
141
+ > Run `selftune evals --list-skills` to see if the ingested sessions appear.
@@ -19,6 +19,7 @@ selftune init [--agent <type>] [--cli-path <path>] [--force]
19
19
  | Flag | Description | Default |
20
20
  |------|-------------|---------|
21
21
  | `--agent <type>` | Agent platform: `claude`, `codex`, `opencode` | Auto-detected |
22
+ | `--cli-path <path>` | Override auto-detected CLI entry-point path | Auto-detected |
22
23
  | `--force` | Reinitialize even if config already exists | Off |
23
24
 
24
25
  ## Output Format