npm - selftune - Versions diffs - 0.2.30 → 0.2.32 - Mend

selftune 0.2.30 → 0.2.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (102) hide show

package/README.md +83 -56
package/apps/local-dashboard/dist/assets/index-B-ut4w0B.js +15 -0
package/apps/local-dashboard/dist/assets/index-BFGfCVrL.css +1 -0
package/apps/local-dashboard/dist/assets/vendor-ui-DfowE3Hu.js +1 -0
package/apps/local-dashboard/dist/index.html +3 -3
package/cli/selftune/command-surface.ts +613 -2
package/cli/selftune/create/baseline.ts +429 -0
package/cli/selftune/create/check.ts +35 -0
package/cli/selftune/create/init.ts +115 -0
package/cli/selftune/create/package-candidate-state.ts +771 -0
package/cli/selftune/create/package-evaluator.ts +710 -0
package/cli/selftune/create/package-fingerprint.ts +142 -0
package/cli/selftune/create/package-search.ts +377 -0
package/cli/selftune/create/publish.ts +431 -0
package/cli/selftune/create/readiness.ts +495 -0
package/cli/selftune/create/replay.ts +330 -0
package/cli/selftune/create/report.ts +74 -0
package/cli/selftune/create/scaffold.ts +121 -0
package/cli/selftune/create/skills-ref-adapter.ts +177 -0
package/cli/selftune/create/status.ts +33 -0
package/cli/selftune/create/templates.ts +249 -0
package/cli/selftune/cron/setup.ts +1 -1
package/cli/selftune/dashboard-action-events.ts +4 -1
package/cli/selftune/dashboard-action-result.ts +789 -24
package/cli/selftune/dashboard-action-stream.ts +80 -0
package/cli/selftune/dashboard-contract.ts +146 -3
package/cli/selftune/dashboard-server.ts +5 -4
package/cli/selftune/eval/hooks-to-evals.ts +58 -35
package/cli/selftune/eval/synthetic-evals.ts +145 -17
package/cli/selftune/evolution/bounded-mutations.ts +1045 -0
package/cli/selftune/evolution/evolve-body.ts +9 -36
package/cli/selftune/evolution/evolve.ts +8 -72
package/cli/selftune/evolution/stopping-criteria.ts +5 -13
package/cli/selftune/evolution/unblock-suggestions.ts +0 -16
package/cli/selftune/evolution/validate-host-replay.ts +115 -15
package/cli/selftune/improve.ts +206 -0
package/cli/selftune/index.ts +123 -6
package/cli/selftune/init.ts +1 -1
package/cli/selftune/localdb/queries/dashboard.ts +30 -0
package/cli/selftune/localdb/schema.ts +52 -0
package/cli/selftune/monitoring/watch.ts +257 -23
package/cli/selftune/orchestrate/execute.ts +300 -1
package/cli/selftune/orchestrate/finalize.ts +14 -0
package/cli/selftune/orchestrate/plan.ts +22 -5
package/cli/selftune/orchestrate/prepare.ts +59 -4
package/cli/selftune/orchestrate/report.ts +1 -1
package/cli/selftune/orchestrate.ts +34 -1
package/cli/selftune/publish.ts +35 -0
package/cli/selftune/registry/github-install.ts +256 -0
package/cli/selftune/registry/index.ts +1 -1
package/cli/selftune/registry/install.ts +58 -7
package/cli/selftune/routes/actions.ts +81 -15
package/cli/selftune/routes/overview.ts +1 -1
package/cli/selftune/routes/skill-report.ts +147 -2
package/cli/selftune/run.ts +18 -0
package/cli/selftune/schedule.ts +3 -3
package/cli/selftune/search-run.ts +703 -0
package/cli/selftune/status.ts +35 -11
package/cli/selftune/testing-readiness.ts +431 -40
package/cli/selftune/types.ts +316 -0
package/cli/selftune/utils/eval-readiness.ts +1 -0
package/cli/selftune/utils/json-output.ts +11 -0
package/cli/selftune/utils/lifecycle-surface.ts +48 -0
package/cli/selftune/utils/query-filter.ts +82 -1
package/cli/selftune/utils/tui.ts +85 -2
package/cli/selftune/verify.ts +205 -0
package/cli/selftune/workflows/proposals.ts +1 -1
package/cli/selftune/workflows/skill-scaffold.ts +141 -63
package/cli/selftune/workflows/workflows.ts +4 -4
package/package.json +1 -1
package/packages/dashboard-core/src/routes/manifest.ts +2 -2
package/packages/ui/src/components/SkillReportPanels.tsx +7 -7
package/packages/ui/src/primitives/button.tsx +5 -0
package/skill/SKILL.md +148 -85
package/skill/references/cli-quick-reference.md +16 -1
package/skill/references/creator-playbook.md +31 -10
package/skill/workflows/Baseline.md +8 -9
package/skill/workflows/Contributions.md +4 -4
package/skill/workflows/Create.md +173 -0
package/skill/workflows/CreateTestDeploy.md +34 -30
package/skill/workflows/Cron.md +2 -2
package/skill/workflows/Dashboard.md +3 -3
package/skill/workflows/Evals.md +13 -7
package/skill/workflows/Evolve.md +75 -32
package/skill/workflows/EvolveBody.md +22 -15
package/skill/workflows/Hook.md +1 -1
package/skill/workflows/Improve.md +168 -0
package/skill/workflows/Initialize.md +3 -3
package/skill/workflows/Orchestrate.md +49 -12
package/skill/workflows/Publish.md +100 -0
package/skill/workflows/Registry.md +19 -13
package/skill/workflows/Run.md +72 -0
package/skill/workflows/Schedule.md +2 -2
package/skill/workflows/SearchRun.md +89 -0
package/skill/workflows/SignalsDashboard.md +2 -2
package/skill/workflows/UnitTest.md +13 -4
package/skill/workflows/Verify.md +136 -0
package/skill/workflows/Watch.md +114 -47
package/skill/workflows/Workflows.md +13 -8
package/apps/local-dashboard/dist/assets/index-BcXquWFB.css +0 -1
package/apps/local-dashboard/dist/assets/index-Coq42hE4.js +0 -15
package/apps/local-dashboard/dist/assets/vendor-ui-B0H8s1mP.js +0 -1

package/cli/selftune/create/replay.ts ADDED Viewed

@@ -0,0 +1,330 @@
+import { existsSync, readFileSync } from "node:fs";
+import { parseArgs } from "node:util";
+import { PUBLIC_COMMAND_SURFACES, renderCommandHelp } from "../command-surface.js";
+import { parseSkillSections } from "../evolution/deploy-proposal.js";
+import {
+  buildRoutingReplayFixture,
+  resolveRuntimeReplayPlatform,
+  runHostRuntimeReplayFixture,
+  type RuntimeReplayInvoker,
+} from "../evolution/validate-host-replay.js";
+import { writeReplayEntryResultsToDb } from "../localdb/direct-write.js";
+import { getCanonicalEvalSetPath } from "../testing-readiness.js";
+import type {
+  EvalEntry,
+  ReplayStagingMode,
+  RoutingReplayEntryResult,
+  RuntimeReplayAggregateMetrics,
+} from "../types.js";
+import { isLlmBackedAgent, detectLlmAgent } from "../utils/llm-call.js";
+import { CLIError, handleCLIError } from "../utils/cli-error.js";
+import { readCreateSkillContext } from "./readiness.js";
+export type CreateReplayMode = ReplayStagingMode;
+export interface CreateReplayResult {
+  skill: string;
+  skill_path: string;
+  mode: CreateReplayMode;
+  agent: string;
+  proposal_id: string;
+  total: number;
+  passed: number;
+  failed: number;
+  pass_rate: number;
+  fixture_id: string;
+  results: RoutingReplayEntryResult[];
+  runtime_metrics: RuntimeReplayAggregateMetrics;
+}
+export interface RunCreateReplayOptions {
+  skillPath: string;
+  mode: CreateReplayMode;
+  agent?: string | null;
+  evalSetPath?: string;
+  includeTargetSkill?: boolean;
+  runtimeInvoker?: RuntimeReplayInvoker;
+}
+export function loadCreateEvalSet(skillName: string, explicitPath?: string): EvalEntry[] {
+  const path = explicitPath?.trim() || getCanonicalEvalSetPath(skillName);
+  if (!existsSync(path)) {
+    throw new CLIError(
+      `No canonical eval set found for "${skillName}" at ${path}.`,
+      "MISSING_DATA",
+      `Run selftune eval generate --skill ${skillName} --skill-path /path/to/${skillName}/SKILL.md --auto-synthetic`,
+    );
+  }
+  try {
+    const parsed = JSON.parse(readFileSync(path, "utf-8")) as unknown;
+    if (!Array.isArray(parsed)) {
+      throw new Error("expected a JSON array");
+    }
+    return parsed as EvalEntry[];
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    throw new CLIError(
+      `Eval set at ${path} is invalid: ${message}`,
+      "INVALID_FLAG",
+      `Regenerate the eval set with selftune eval generate --skill ${skillName}`,
+    );
+  }
+}
+function resolveReplayAgent(requestedAgent?: string | null): string {
+  if (requestedAgent) {
+    if (!isLlmBackedAgent(requestedAgent)) {
+      throw new CLIError(
+        `Unsupported --agent value "${requestedAgent}".`,
+        "INVALID_FLAG",
+        "Use claude, codex, opencode, or pi.",
+      );
+    }
+    if (!Bun.which(requestedAgent)) {
+      throw new CLIError(
+        `Agent CLI '${requestedAgent}' not found in PATH`,
+        "AGENT_NOT_FOUND",
+        "Install it or omit --agent to use auto-detection",
+      );
+    }
+    return requestedAgent;
+  }
+  const detected = detectLlmAgent();
+  if (!detected) {
+    throw new CLIError(
+      "No supported runtime replay agent was found in PATH.",
+      "AGENT_NOT_FOUND",
+      "Install Claude Code, Codex, OpenCode, or Pi, or pass --agent explicitly.",
+    );
+  }
+  return detected;
+}
+function buildReplayContent(
+  skillContent: string,
+  mode: CreateReplayMode,
+): {
+  content: string;
+  contentTarget: "routing" | "body";
+} {
+  const parsed = parseSkillSections(skillContent);
+  if (mode === "routing") {
+    return {
+      content: parsed.sections["Workflow Routing"] ?? "",
+      contentTarget: "routing",
+    };
+  }
+  const bodyParts: string[] = [];
+  if (parsed.description.trim()) {
+    bodyParts.push(parsed.description.trim());
+  }
+  for (const [sectionName, sectionContent] of Object.entries(parsed.sections)) {
+    bodyParts.push(`## ${sectionName}`);
+    bodyParts.push("");
+    bodyParts.push(sectionContent.trim());
+    bodyParts.push("");
+  }
+  return {
+    content: bodyParts.join("\n").trim(),
+    contentTarget: "body",
+  };
+}
+function persistReplayResults(
+  proposalId: string,
+  skillName: string,
+  mode: CreateReplayMode,
+  results: RoutingReplayEntryResult[],
+): void {
+  writeReplayEntryResultsToDb(
+    results.map((result) => ({
+      proposal_id: proposalId,
+      skill_name: skillName,
+      validation_mode: "host_replay",
+      phase: `current_${mode}`,
+      query: result.query,
+      should_trigger: result.should_trigger,
+      triggered: result.triggered,
+      passed: result.passed,
+      evidence: result.evidence,
+    })),
+  );
+}
+function sumKnownMetric(values: Array<number | null | undefined>): {
+  total: number | null;
+  count: number;
+} {
+  let total = 0;
+  let count = 0;
+  for (const value of values) {
+    if (typeof value !== "number" || !Number.isFinite(value)) continue;
+    total += value;
+    count += 1;
+  }
+  return {
+    total: count > 0 ? total : null,
+    count,
+  };
+}
+export function summarizeReplayRuntimeMetrics(
+  results: RoutingReplayEntryResult[],
+): RuntimeReplayAggregateMetrics {
+  const evalRuns = results.length;
+  const totalDurationMs = results.reduce(
+    (sum, result) => sum + (result.runtime_metrics?.duration_ms ?? 0),
+    0,
+  );
+  const inputTokens = sumKnownMetric(results.map((result) => result.runtime_metrics?.input_tokens));
+  const outputTokens = sumKnownMetric(
+    results.map((result) => result.runtime_metrics?.output_tokens),
+  );
+  const cacheCreationTokens = sumKnownMetric(
+    results.map((result) => result.runtime_metrics?.cache_creation_input_tokens),
+  );
+  const cacheReadTokens = sumKnownMetric(
+    results.map((result) => result.runtime_metrics?.cache_read_input_tokens),
+  );
+  const totalCost = sumKnownMetric(results.map((result) => result.runtime_metrics?.total_cost_usd));
+  const totalTurns = sumKnownMetric(results.map((result) => result.runtime_metrics?.num_turns));
+  const usageObservations = results.filter((result) => {
+    const metrics = result.runtime_metrics;
+    return Boolean(
+      metrics &&
+      (metrics.input_tokens != null ||
+        metrics.output_tokens != null ||
+        metrics.total_cost_usd != null ||
+        metrics.num_turns != null),
+    );
+  }).length;
+  return {
+    eval_runs: evalRuns,
+    usage_observations: usageObservations,
+    total_duration_ms: totalDurationMs,
+    avg_duration_ms: evalRuns > 0 ? totalDurationMs / evalRuns : 0,
+    total_input_tokens: inputTokens.total,
+    total_output_tokens: outputTokens.total,
+    total_cache_creation_input_tokens: cacheCreationTokens.total,
+    total_cache_read_input_tokens: cacheReadTokens.total,
+    total_cost_usd: totalCost.total,
+    total_turns: totalTurns.total,
+  };
+}
+export async function runCreateReplay(
+  options: RunCreateReplayOptions,
+): Promise<CreateReplayResult> {
+  const context = readCreateSkillContext(options.skillPath);
+  const agent = resolveReplayAgent(options.agent);
+  const platform = resolveRuntimeReplayPlatform(agent);
+  if (!platform) {
+    throw new CLIError(
+      `Runtime replay is unavailable for agent "${agent}".`,
+      "REPLAY_UNAVAILABLE",
+      "Use claude, codex, or opencode for create replay.",
+    );
+  }
+  const evalSet = loadCreateEvalSet(context.skill_name, options.evalSetPath);
+  const { content, contentTarget } = buildReplayContent(context.skill_content, options.mode);
+  const fixture = buildRoutingReplayFixture({
+    skillName: context.skill_name,
+    skillPath: context.skill_path,
+    platform,
+    stagingMode: options.mode,
+  });
+  const results = await runHostRuntimeReplayFixture({
+    routing: content,
+    evalSet,
+    fixture,
+    contentTarget,
+    includeTargetSkill: options.includeTargetSkill,
+    runtimeInvoker: options.runtimeInvoker,
+  });
+  const passed = results.filter((result) => result.passed).length;
+  const total = results.length;
+  const proposalId = `create-replay-${context.skill_name}-${options.mode}-${Date.now()}`;
+  persistReplayResults(proposalId, context.skill_name, options.mode, results);
+  const runtimeMetrics = summarizeReplayRuntimeMetrics(results);
+  return {
+    skill: context.skill_name,
+    skill_path: context.skill_path,
+    mode: options.mode,
+    agent,
+    proposal_id: proposalId,
+    total,
+    passed,
+    failed: total - passed,
+    pass_rate: total > 0 ? passed / total : 0,
+    fixture_id: fixture.fixture_id,
+    results,
+    runtime_metrics: runtimeMetrics,
+  };
+}
+function formatReplayResult(result: CreateReplayResult): string {
+  return [
+    `Skill: ${result.skill}`,
+    `Mode: ${result.mode}`,
+    `Agent: ${result.agent}`,
+    `Pass rate: ${(result.pass_rate * 100).toFixed(1)}% (${result.passed}/${result.total})`,
+    `Replay record: ${result.proposal_id}`,
+  ].join("\n");
+}
+export async function cliMain(): Promise<void> {
+  const { values } = parseArgs({
+    options: {
+      "skill-path": { type: "string" },
+      mode: { type: "string", default: "routing" },
+      agent: { type: "string" },
+      "eval-set": { type: "string" },
+      json: { type: "boolean", default: false },
+      help: { type: "boolean", short: "h", default: false },
+    },
+    strict: true,
+  });
+  if (values.help) {
+    console.log(renderCommandHelp(PUBLIC_COMMAND_SURFACES.createReplay));
+    process.exit(0);
+  }
+  const mode = values.mode;
+  if (mode !== "routing" && mode !== "package") {
+    throw new CLIError(
+      `Unsupported --mode value "${mode}".`,
+      "INVALID_FLAG",
+      "Use --mode routing or --mode package.",
+    );
+  }
+  const result = await runCreateReplay({
+    skillPath: values["skill-path"] ?? "",
+    mode,
+    agent: values.agent,
+    evalSetPath: values["eval-set"],
+  });
+  if (values.json || !process.stdout.isTTY) {
+    console.log(JSON.stringify(result, null, 2));
+  } else {
+    console.log(formatReplayResult(result));
+  }
+  process.exit(result.failed === 0 ? 0 : 1);
+}
+if (import.meta.main) {
+  cliMain().catch(handleCLIError);
+}

package/cli/selftune/create/report.ts ADDED Viewed

@@ -0,0 +1,74 @@
+import { parseArgs } from "node:util";
+import { PUBLIC_COMMAND_SURFACES, renderCommandHelp } from "../command-surface.js";
+import { CLIError, handleCLIError } from "../utils/cli-error.js";
+import {
+  formatCreatePackageBenchmarkReport,
+  runCreatePackageEvaluation,
+  type CreatePackageEvaluationDeps,
+  type CreatePackageEvaluationResult,
+} from "./package-evaluator.js";
+export interface RunCreateReportOptions {
+  skillPath: string;
+  agent?: string;
+  evalSetPath?: string;
+}
+export async function runCreateReport(
+  options: RunCreateReportOptions,
+  deps: CreatePackageEvaluationDeps = {},
+): Promise<CreatePackageEvaluationResult> {
+  if (!options.skillPath.trim()) {
+    throw new CLIError(
+      "--skill-path <path> is required.",
+      "MISSING_FLAG",
+      "selftune create report --skill-path <path>",
+    );
+  }
+  return runCreatePackageEvaluation(
+    {
+      skillPath: options.skillPath,
+      agent: options.agent,
+      evalSetPath: options.evalSetPath,
+    },
+    deps,
+  );
+}
+export async function cliMain(): Promise<void> {
+  const { values } = parseArgs({
+    options: {
+      "skill-path": { type: "string" },
+      agent: { type: "string" },
+      "eval-set": { type: "string" },
+      json: { type: "boolean", default: false },
+      help: { type: "boolean", short: "h", default: false },
+    },
+    strict: true,
+  });
+  if (values.help) {
+    console.log(renderCommandHelp(PUBLIC_COMMAND_SURFACES.createReport));
+    process.exit(0);
+  }
+  const result = await runCreateReport({
+    skillPath: values["skill-path"] ?? "",
+    agent: values.agent,
+    evalSetPath: values["eval-set"],
+  });
+  if (values.json || !process.stdout.isTTY) {
+    console.log(JSON.stringify(result, null, 2));
+  } else {
+    console.log(formatCreatePackageBenchmarkReport(result));
+  }
+  process.exit(result.summary.evaluation_passed ? 0 : 1);
+}
+if (import.meta.main) {
+  cliMain().catch(handleCLIError);
+}

package/cli/selftune/create/scaffold.ts ADDED Viewed

@@ -0,0 +1,121 @@
+import { existsSync } from "node:fs";
+import { parseArgs } from "node:util";
+import { PUBLIC_COMMAND_SURFACES, renderCommandHelp } from "../command-surface.js";
+import { getDb } from "../localdb/db.js";
+import { querySessionTelemetry, querySkillUsageRecords } from "../localdb/queries.js";
+import type {
+  DiscoveredWorkflow,
+  SessionTelemetryRecord,
+  SkillUsageRecord,
+  WorkflowDiscoveryReport,
+} from "../types.js";
+import { CLIError, handleCLIError } from "../utils/cli-error.js";
+import { discoverWorkflows } from "../workflows/discover.js";
+import { buildWorkflowSkillDraft } from "../workflows/skill-scaffold.js";
+import { writeCreateSkillDraft } from "./init.js";
+function resolveWorkflowSelection(
+  report: WorkflowDiscoveryReport,
+  selection: string | undefined,
+): DiscoveredWorkflow {
+  if (!selection) {
+    throw new CLIError(
+      "--from-workflow <id|index> is required",
+      "MISSING_FLAG",
+      "selftune create scaffold --from-workflow <id|index>",
+    );
+  }
+  let workflow = report.workflows.find((candidate) => candidate.workflow_id === selection);
+  if (!workflow) {
+    const index = Number.parseInt(selection, 10);
+    if (!Number.isNaN(index) && index >= 1 && index <= report.workflows.length) {
+      workflow = report.workflows[index - 1];
+    }
+  }
+  if (!workflow) {
+    throw new CLIError(
+      `No workflow found matching "${selection}".`,
+      "INVALID_FLAG",
+      "Run 'selftune workflows' to inspect discovered workflows first.",
+    );
+  }
+  return workflow;
+}
+export async function cliMain(): Promise<void> {
+  const { values } = parseArgs({
+    options: {
+      "from-workflow": { type: "string" },
+      "output-dir": { type: "string" },
+      "skill-name": { type: "string" },
+      description: { type: "string" },
+      write: { type: "boolean", default: false },
+      force: { type: "boolean", default: false },
+      json: { type: "boolean", default: false },
+      "min-occurrences": { type: "string" },
+      skill: { type: "string" },
+      help: { type: "boolean", short: "h", default: false },
+    },
+    strict: true,
+  });
+  if (values.help) {
+    console.log(renderCommandHelp(PUBLIC_COMMAND_SURFACES.createScaffold));
+    process.exit(0);
+  }
+  const minOccurrences = values["min-occurrences"]
+    ? Number.parseInt(values["min-occurrences"], 10)
+    : undefined;
+  if (minOccurrences !== undefined && (Number.isNaN(minOccurrences) || minOccurrences < 0)) {
+    throw new CLIError("--min-occurrences must be a non-negative integer.", "INVALID_FLAG");
+  }
+  const db = getDb();
+  const telemetry = querySessionTelemetry(db) as SessionTelemetryRecord[];
+  const usage = querySkillUsageRecords(db) as SkillUsageRecord[];
+  const report = discoverWorkflows(telemetry, usage, {
+    minOccurrences,
+    skill: values.skill,
+  });
+  const workflow = resolveWorkflowSelection(report, values["from-workflow"]);
+  const draft = buildWorkflowSkillDraft(workflow, {
+    outputDir: values["output-dir"],
+    skillName: values["skill-name"],
+    description: values.description,
+    generatedBy: "selftune create scaffold",
+  });
+  if (values.write) {
+    const result = writeCreateSkillDraft(draft, { force: values.force });
+    if (values.json || !process.stdout.isTTY) {
+      console.log(JSON.stringify(result, null, 2));
+      return;
+    }
+    console.log(
+      `Scaffolded skill package "${draft.skill_name}" to ${draft.skill_dir}${result.overwritten ? " (overwritten)" : ""}`,
+    );
+    return;
+  }
+  if (values.json || !process.stdout.isTTY) {
+    console.log(JSON.stringify({ ...draft, written: false }, null, 2));
+    return;
+  }
+  console.log(draft.content);
+  if (existsSync(draft.skill_dir)) {
+    console.log("");
+    console.log(
+      `[WARN] ${draft.skill_dir} already exists. Re-run with --write --force to overwrite.`,
+    );
+  }
+}
+if (import.meta.main) {
+  cliMain().catch(handleCLIError);
+}

package/cli/selftune/create/skills-ref-adapter.ts ADDED Viewed

@@ -0,0 +1,177 @@
+import type { AgentSkillValidationIssue, AgentSkillValidationResult } from "../types.js";
+interface ValidatorCommand {
+  command: string;
+  argv: string[];
+}
+export interface ValidateAgentSkillDeps {
+  which?: (command: string) => string | null;
+  spawnSync?: typeof Bun.spawnSync;
+}
+const VALIDATOR_COMMANDS: readonly ValidatorCommand[] = [
+  {
+    command: "uvx --from skills-ref agentskills validate",
+    argv: ["uvx", "--from", "skills-ref", "agentskills", "validate"],
+  },
+  {
+    command: "uvx skills-ref validate",
+    argv: ["uvx", "skills-ref", "validate"],
+  },
+  {
+    command: "npx skills-ref validate",
+    argv: ["npx", "skills-ref", "validate"],
+  },
+] as const;
+function classifyIssueLevel(line: string): "error" | "warning" {
+  return /\bwarn(?:ing)?\b/i.test(line) ? "warning" : "error";
+}
+function normalizeIssues(
+  stdout: string,
+  stderr: string,
+  exitCode: number | null,
+): AgentSkillValidationIssue[] {
+  const merged = `${stderr}\n${stdout}`.trim();
+  if (!merged) {
+    return exitCode === 0
+      ? []
+      : [
+          {
+            level: "error",
+            code: "validation_failed",
+            message: `skills-ref exited with code ${exitCode ?? "unknown"}.`,
+          },
+        ];
+  }
+  const seen = new Set<string>();
+  const lines = merged
+    .split(/\r?\n/)
+    .map((line) => line.trim())
+    .filter(Boolean)
+    .filter((line) => {
+      if (seen.has(line)) return false;
+      seen.add(line);
+      return true;
+    });
+  return lines.map((line, index) => ({
+    level: classifyIssueLevel(line),
+    code: `skills_ref_${index + 1}`,
+    message: line,
+  }));
+}
+function readSpawnText(output: unknown): string {
+  if (typeof output === "string") return output;
+  if (output == null) return "";
+  return Buffer.from(output as ArrayBufferLike).toString("utf-8");
+}
+function isValidatorInvocationFailure(
+  stdout: string,
+  stderr: string,
+  exitCode: number | null,
+): boolean {
+  if (exitCode === 0) return false;
+  const merged = `${stderr}\n${stdout}`.trim();
+  if (!merged) return true;
+  return [
+    /No such option/i,
+    /unknown command/i,
+    /unknown argument/i,
+    /unrecognized (argument|option)/i,
+    /usage:\s*(uvx|npx|skills-ref|agentskills)\b/i,
+    /command not found/i,
+    /not found in PATH/i,
+    /No such file or directory/i,
+    /Unable to locate executable/i,
+    /The executable [`'"]?agentskills[`'"]? was not found/i,
+    /No package .*skills-ref/i,
+    /failed to resolve/i,
+  ].some((pattern) => pattern.test(merged));
+}
+export async function validateAgentSkill(
+  skillDir: string,
+  deps: ValidateAgentSkillDeps = {},
+): Promise<AgentSkillValidationResult> {
+  const which = deps.which ?? ((command: string) => Bun.which(command));
+  const spawnSync = deps.spawnSync ?? Bun.spawnSync;
+  const candidates = VALIDATOR_COMMANDS.filter((option) => which(option.argv[0]) != null);
+  if (candidates.length === 0) {
+    return {
+      ok: false,
+      issues: [
+        {
+          level: "error",
+          code: "validator_unavailable",
+          message:
+            "No Agent Skills validator was found. Install uv/uvx or use npx so selftune can run skills-ref validate.",
+        },
+      ],
+      raw_stdout: "",
+      raw_stderr: "",
+      exit_code: null,
+      validator: "skills-ref",
+      command: null,
+    };
+  }
+  let lastFailure: AgentSkillValidationResult | null = null;
+  for (const candidate of candidates) {
+    const result = spawnSync([...candidate.argv, skillDir], {
+      cwd: skillDir,
+      stdout: "pipe",
+      stderr: "pipe",
+      env: process.env,
+    });
+    const stdout = readSpawnText(result.stdout);
+    const stderr = readSpawnText(result.stderr);
+    const exitCode = result.exitCode;
+    const issues = normalizeIssues(stdout, stderr, exitCode);
+    const response: AgentSkillValidationResult = {
+      ok: exitCode === 0,
+      issues: exitCode === 0 ? issues.filter((issue) => issue.level === "warning") : issues,
+      raw_stdout: stdout,
+      raw_stderr: stderr,
+      exit_code: exitCode,
+      validator: "skills-ref",
+      command: `${candidate.command} ${skillDir}`,
+    };
+    if (exitCode === 0) {
+      return response;
+    }
+    lastFailure = response;
+    if (!isValidatorInvocationFailure(stdout, stderr, exitCode)) {
+      return response;
+    }
+  }
+  return (
+    lastFailure ?? {
+      ok: false,
+      issues: [
+        {
+          level: "error",
+          code: "validation_failed",
+          message: "skills-ref validation failed for an unknown reason.",
+        },
+      ],
+      raw_stdout: "",
+      raw_stderr: "",
+      exit_code: null,
+      validator: "skills-ref",
+      command: null,
+    }
+  );
+}