npm - selftune - Versions diffs - 0.2.29 → 0.2.30 - Mend

selftune 0.2.29 → 0.2.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/apps/local-dashboard/dist/assets/index-BcXquWFB.css +1 -0
package/apps/local-dashboard/dist/assets/index-Coq42hE4.js +15 -0
package/apps/local-dashboard/dist/assets/vendor-ui-B0H8s1mP.js +1 -0
package/apps/local-dashboard/dist/index.html +3 -3
package/cli/selftune/auto-update.ts +40 -8
package/cli/selftune/command-surface.ts +1 -1
package/cli/selftune/constants.ts +5 -0
package/cli/selftune/dashboard-action-events.ts +117 -0
package/cli/selftune/dashboard-action-instrumentation.ts +103 -0
package/cli/selftune/dashboard-action-result.ts +90 -0
package/cli/selftune/dashboard-action-stream.ts +252 -0
package/cli/selftune/dashboard-contract.ts +81 -1
package/cli/selftune/dashboard-server.ts +133 -16
package/cli/selftune/eval/hooks-to-evals.ts +157 -0
package/cli/selftune/eval/synthetic-evals.ts +33 -2
package/cli/selftune/eval/unit-test-cli.ts +53 -5
package/cli/selftune/evolution/validate-host-replay.ts +191 -14
package/cli/selftune/index.ts +4 -0
package/cli/selftune/ingestors/opencode-ingest.ts +117 -8
package/cli/selftune/localdb/schema.ts +34 -0
package/cli/selftune/routes/actions.ts +273 -42
package/cli/selftune/testing-readiness.ts +203 -10
package/cli/selftune/utils/llm-call.ts +90 -1
package/package.json +1 -1
package/packages/ui/src/components/EvolutionTimeline.tsx +1 -1
package/skill/SKILL.md +1 -1
package/skill/workflows/Dashboard.md +50 -23
package/apps/local-dashboard/dist/assets/index-BcvtYmmL.js +0 -15
package/apps/local-dashboard/dist/assets/index-BpRIxnpS.css +0 -1
package/apps/local-dashboard/dist/assets/vendor-ui-DqH_uxum.js +0 -1

package/cli/selftune/routes/actions.ts CHANGED Viewed

@@ -4,44 +4,235 @@
  * Triggers selftune CLI commands as child processes and returns the result.
  */
+import { randomUUID } from "node:crypto";
 import { join } from "node:path";
+import {
+  dashboardActionContextEnv,
+  type DashboardActionContext,
+} from "../dashboard-action-events.js";
+import { resolveDashboardActionOutcome } from "../dashboard-action-result.js";
+import type { DashboardActionEvent, DashboardActionName } from "../dashboard-contract.js";
+import { getCanonicalEvalSetPath, getUnitTestPath } from "../testing-readiness.js";
 import { saveWatchedSkills } from "../watchlist.js";
+export interface ActionExecutionHooks {
+  actionContext?: DashboardActionContext;
+  onStdout?: (chunk: string) => void;
+  onStderr?: (chunk: string) => void;
+}
 export type ActionRunner = (
   command: string,
   args: string[],
-) => Promise<{ success: boolean; output: string; error: string | null }>;
+  hooks?: ActionExecutionHooks,
+) => Promise<{
+  success: boolean;
+  output: string;
+  error: string | null;
+  exitCode: number | null;
+}>;
+export type ActionEventEmitter = (event: DashboardActionEvent) => void;
+async function readProcessStream(
+  stream: ReadableStream<Uint8Array> | null | undefined,
+  onChunk?: (chunk: string) => void,
+): Promise<string> {
+  if (!stream) return "";
+  const reader = stream.getReader();
+  const decoder = new TextDecoder();
+  let output = "";
+  while (true) {
+    const { value, done } = await reader.read();
+    if (done) break;
+    const chunk = decoder.decode(value, { stream: true });
+    if (!chunk) continue;
+    output += chunk;
+    onChunk?.(chunk);
+  }
+  const tail = decoder.decode();
+  if (tail) {
+    output += tail;
+    onChunk?.(tail);
+  }
+  return output;
+}
 export async function runAction(
   command: string,
   args: string[],
-): Promise<{ success: boolean; output: string; error: string | null }> {
+  hooks?: ActionExecutionHooks,
+): Promise<{
+  success: boolean;
+  output: string;
+  error: string | null;
+  exitCode: number | null;
+}> {
   try {
     const indexPath = join(import.meta.dir, "..", "index.ts");
     const proc = Bun.spawn(["bun", "run", indexPath, command, ...args], {
       stdout: "pipe",
       stderr: "pipe",
+      env: {
+        ...process.env,
+        SELFTUNE_SKIP_AUTO_UPDATE: "1",
+        SELFTUNE_DASHBOARD_STREAM_DISABLE: "1",
+        ...dashboardActionContextEnv(hooks?.actionContext ?? null),
+      },
     });
-    const [stdout, stderr] = await Promise.all([
-      new Response(proc.stdout).text(),
-      new Response(proc.stderr).text(),
+    const stdoutPromise = readProcessStream(proc.stdout, hooks?.onStdout);
+    const stderrPromise = readProcessStream(proc.stderr, hooks?.onStderr);
+    const [exitCode, stdout, stderr] = await Promise.all([
+      proc.exited,
+      stdoutPromise,
+      stderrPromise,
     ]);
-    const exitCode = await proc.exited;
-    if (exitCode !== 0) {
-      return { success: false, output: stdout, error: stderr || `Exit code ${exitCode}` };
-    }
-    return { success: true, output: stdout, error: null };
+    const action = command === "evolve" && args.includes("--dry-run") ? "replay-dry-run" : null;
+    const outcome = action
+      ? resolveDashboardActionOutcome({
+          action,
+          stdout,
+          stderr,
+          exitCode,
+        })
+      : {
+          success: exitCode === 0,
+          error: exitCode === 0 ? null : stderr || `Exit code ${exitCode}`,
+        };
+    return {
+      success: outcome.success,
+      output: stdout,
+      error: outcome.error,
+      exitCode,
+    };
   } catch (err: unknown) {
     const message = err instanceof Error ? err.message : String(err);
-    return { success: false, output: "", error: message };
+    return { success: false, output: "", error: message, exitCode: null };
+  }
+}
+function requireSkillInput(
+  body: Record<string, unknown>,
+): { skill: string; skillPath: string } | Response {
+  const skill = body.skill as string | undefined;
+  const skillPath = body.skillPath as string | undefined;
+  if (!skill || !skillPath) {
+    return Response.json(
+      { success: false, error: "Missing required fields: skill, skillPath" },
+      { status: 400 },
+    );
   }
+  return { skill, skillPath };
+}
+function buildActionExecution(
+  action: DashboardActionName,
+  body: Record<string, unknown>,
+): { command: string; args: string[]; skill: string; skillPath: string } | Response {
+  const skillInput = requireSkillInput(body);
+  if (skillInput instanceof Response) return skillInput;
+  const { skill, skillPath } = skillInput;
+  if (action === "generate-evals") {
+    const args = [
+      "generate",
+      "--skill",
+      skill,
+      "--skill-path",
+      skillPath,
+      "--output",
+      getCanonicalEvalSetPath(skill),
+    ];
+    if (body.autoSynthetic === true) {
+      args.push("--auto-synthetic");
+    }
+    return { command: "eval", args, skill, skillPath };
+  }
+  if (action === "generate-unit-tests") {
+    return {
+      command: "eval",
+      args: [
+        "unit-test",
+        "--skill",
+        skill,
+        "--generate",
+        "--skill-path",
+        skillPath,
+        "--tests",
+        getUnitTestPath(skill),
+      ],
+      skill,
+      skillPath,
+    };
+  }
+  if (action === "replay-dry-run") {
+    return {
+      command: "evolve",
+      args: [
+        "--skill",
+        skill,
+        "--skill-path",
+        skillPath,
+        "--dry-run",
+        "--validation-mode",
+        "replay",
+        "--sync-first",
+      ],
+      skill,
+      skillPath,
+    };
+  }
+  if (action === "measure-baseline") {
+    return {
+      command: "grade",
+      args: ["baseline", "--skill", skill, "--skill-path", skillPath],
+      skill,
+      skillPath,
+    };
+  }
+  if (action === "deploy-candidate") {
+    return {
+      command: "evolve",
+      args: ["--skill", skill, "--skill-path", skillPath, "--sync-first"],
+      skill,
+      skillPath,
+    };
+  }
+  if (action === "watch") {
+    return {
+      command: "watch",
+      args: ["--skill", skill, "--skill-path", skillPath, "--sync-first"],
+      skill,
+      skillPath,
+    };
+  }
+  if (action === "rollback") {
+    const proposalId = body.proposalId as string | undefined;
+    const args = ["rollback", "--skill", skill, "--skill-path", skillPath];
+    if (proposalId) {
+      args.push("--proposal-id", proposalId);
+    }
+    return { command: "evolve", args, skill, skillPath };
+  }
+  return Response.json({ success: false, error: `Unknown action: ${action}` }, { status: 400 });
 }
 export async function handleAction(
   action: string,
   body: Record<string, unknown>,
   executeAction: ActionRunner = runAction,
+  emitEvent?: ActionEventEmitter,
 ): Promise<Response> {
   if (action === "watchlist") {
     const skills = body.skills;
@@ -62,7 +253,11 @@ export async function handleAction(
     }
     try {
       const saved = saveWatchedSkills(skills);
-      return Response.json({ success: true, watched_skills: saved, error: null });
+      return Response.json({
+        success: true,
+        watched_skills: saved,
+        error: null,
+      });
     } catch (error: unknown) {
       const message = error instanceof Error ? error.message : String(error);
       return Response.json(
@@ -75,37 +270,73 @@ export async function handleAction(
     }
   }
-  if (action === "watch" || action === "evolve") {
-    const skill = body.skill as string | undefined;
-    const skillPath = body.skillPath as string | undefined;
-    if (!skill || !skillPath) {
-      return Response.json(
-        { success: false, error: "Missing required fields: skill, skillPath" },
-        { status: 400 },
-      );
-    }
-    const args = ["--skill", skill, "--skill-path", skillPath, "--sync-first"];
-    const result = await executeAction(action, args);
-    return Response.json(result);
+  const normalizedAction = action === "evolve" ? "deploy-candidate" : action;
+  const executable = buildActionExecution(normalizedAction as DashboardActionName, body);
+  if (executable instanceof Response) {
+    return executable;
   }
-  if (action === "rollback") {
-    const skill = body.skill as string | undefined;
-    const skillPath = body.skillPath as string | undefined;
-    const proposalId = body.proposalId as string | undefined;
-    if (!skill || !skillPath) {
-      return Response.json(
-        { success: false, error: "Missing required fields: skill, skillPath" },
-        { status: 400 },
-      );
-    }
-    const args = ["--skill", skill, "--skill-path", skillPath];
-    if (proposalId) {
-      args.push("--proposal-id", proposalId);
-    }
-    const result = await executeAction(action, args);
-    return Response.json(result);
-  }
+  const eventId = randomUUID();
+  emitEvent?.({
+    event_id: eventId,
+    action: normalizedAction as DashboardActionName,
+    stage: "started",
+    skill_name: executable.skill,
+    skill_path: executable.skillPath,
+    ts: Date.now(),
+  });
-  return Response.json({ success: false, error: `Unknown action: ${action}` }, { status: 400 });
+  const result = await executeAction(executable.command, executable.args, {
+    actionContext: {
+      eventId,
+      action: normalizedAction as DashboardActionName,
+      skillName: executable.skill,
+      skillPath: executable.skillPath,
+    },
+    onStdout(chunk) {
+      emitEvent?.({
+        event_id: eventId,
+        action: normalizedAction as DashboardActionName,
+        stage: "stdout",
+        skill_name: executable.skill,
+        skill_path: executable.skillPath,
+        ts: Date.now(),
+        chunk,
+      });
+    },
+    onStderr(chunk) {
+      emitEvent?.({
+        event_id: eventId,
+        action: normalizedAction as DashboardActionName,
+        stage: "stderr",
+        skill_name: executable.skill,
+        skill_path: executable.skillPath,
+        ts: Date.now(),
+        chunk,
+      });
+    },
+  });
+  emitEvent?.({
+    event_id: eventId,
+    action: normalizedAction as DashboardActionName,
+    stage: "finished",
+    skill_name: executable.skill,
+    skill_path: executable.skillPath,
+    ts: Date.now(),
+    success: result.success,
+    exit_code: result.exitCode,
+    error: result.error,
+    summary:
+      executable.command === "evolve" && executable.args.includes("--dry-run")
+        ? resolveDashboardActionOutcome({
+            action: "replay-dry-run",
+            stdout: result.output,
+            stderr: result.error,
+            exitCode: result.exitCode ?? 0,
+          }).summary
+        : null,
+  });
+  return Response.json(result);
 }

package/cli/selftune/testing-readiness.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import type { Database } from "bun:sqlite";
 import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync } from "node:fs";
-import { join } from "node:path";
+import { dirname, join } from "node:path";
 import { SELFTUNE_CONFIG_DIR } from "./constants.js";
 import type {
@@ -11,7 +11,8 @@ import type {
   SkillEvalReadiness,
   SkillTestingReadiness,
 } from "./dashboard-contract.js";
-import type { EvalEntry, UnitTestSuiteResult } from "./types.js";
+import { getDb } from "./localdb/db.js";
+import type { EvalEntry, SkillUnitTest, UnitTestSuiteResult } from "./types.js";
 import { queryEvolutionEvidence } from "./localdb/queries/evolution.js";
 import { queryTrustedSkillObservationRows } from "./localdb/queries/trust.js";
 import {
@@ -27,6 +28,7 @@ interface TrustedSkillObservationSummary {
 }
 interface TestingReadinessContext {
+  db: Database;
   knownSkills: Set<string>;
   searchDirs: string[];
   trustedRowsBySkill: Map<string, TrustedSkillObservationSummary[]>;
@@ -64,14 +66,188 @@ export function getUnitTestResultPath(skillName: string): string {
   return join(getUnitTestDir(), `${skillName}.last-run.json`);
 }
+function getOptionalDb(): Database | null {
+  try {
+    return getDb();
+  } catch {
+    return null;
+  }
+}
+function parseJsonArray(value: string | null | undefined): unknown[] {
+  if (!value) return [];
+  try {
+    const parsed = JSON.parse(value) as unknown;
+    return Array.isArray(parsed) ? parsed : [];
+  } catch {
+    return [];
+  }
+}
+function upsertCanonicalEvalSet(db: Database, skillName: string, evalSet: EvalEntry[]): void {
+  db.run(
+    `INSERT INTO canonical_eval_sets (skill_name, stored_at, eval_set_json)
+     VALUES (?, ?, ?)
+     ON CONFLICT(skill_name) DO UPDATE SET
+       stored_at = excluded.stored_at,
+       eval_set_json = excluded.eval_set_json`,
+    [skillName, new Date().toISOString(), JSON.stringify(evalSet)],
+  );
+}
+function upsertUnitTestFile(db: Database, skillName: string, tests: SkillUnitTest[]): void {
+  db.run(
+    `INSERT INTO unit_test_files (skill_name, stored_at, tests_json)
+     VALUES (?, ?, ?)
+     ON CONFLICT(skill_name) DO UPDATE SET
+       stored_at = excluded.stored_at,
+       tests_json = excluded.tests_json`,
+    [skillName, new Date().toISOString(), JSON.stringify(tests)],
+  );
+}
+function upsertUnitTestRunResult(
+  db: Database,
+  skillName: string,
+  suite: UnitTestSuiteResult,
+): void {
+  db.run(
+    `INSERT INTO unit_test_run_results
+      (skill_name, run_at, total, passed, failed, pass_rate, result_json)
+     VALUES (?, ?, ?, ?, ?, ?, ?)
+     ON CONFLICT(skill_name) DO UPDATE SET
+       run_at = excluded.run_at,
+       total = excluded.total,
+       passed = excluded.passed,
+       failed = excluded.failed,
+       pass_rate = excluded.pass_rate,
+       result_json = excluded.result_json`,
+    [
+      skillName,
+      suite.run_at,
+      suite.total,
+      suite.passed,
+      suite.failed,
+      suite.pass_rate,
+      JSON.stringify(suite),
+    ],
+  );
+}
+function readCanonicalEvalSetFromDb(
+  db: Database,
+  skillName: string,
+): {
+  entries: EvalEntry[];
+  storedAt: string | null;
+} | null {
+  const row = db
+    .query(
+      `SELECT eval_set_json, stored_at
+       FROM canonical_eval_sets
+       WHERE skill_name = ?`,
+    )
+    .get(skillName) as { eval_set_json: string; stored_at: string } | null;
+  if (!row) return null;
+  return {
+    entries: parseJsonArray(row.eval_set_json) as EvalEntry[],
+    storedAt: row.stored_at ?? null,
+  };
+}
+function readUnitTestsFromDb(
+  db: Database,
+  skillName: string,
+): {
+  tests: SkillUnitTest[];
+  storedAt: string | null;
+} | null {
+  const row = db
+    .query(
+      `SELECT tests_json, stored_at
+       FROM unit_test_files
+       WHERE skill_name = ?`,
+    )
+    .get(skillName) as { tests_json: string; stored_at: string } | null;
+  if (!row) return null;
+  return {
+    tests: parseJsonArray(row.tests_json) as SkillUnitTest[],
+    storedAt: row.stored_at ?? null,
+  };
+}
+function readUnitTestRunResultFromDb(db: Database, skillName: string): UnitTestSuiteResult | null {
+  const row = db
+    .query(
+      `SELECT result_json
+       FROM unit_test_run_results
+       WHERE skill_name = ?`,
+    )
+    .get(skillName) as { result_json: string } | null;
+  if (!row?.result_json) return null;
+  try {
+    const parsed = JSON.parse(row.result_json) as Partial<UnitTestSuiteResult>;
+    if (
+      typeof parsed !== "object" ||
+      parsed == null ||
+      typeof parsed.skill_name !== "string" ||
+      typeof parsed.total !== "number" ||
+      typeof parsed.passed !== "number" ||
+      typeof parsed.failed !== "number" ||
+      typeof parsed.pass_rate !== "number" ||
+      typeof parsed.run_at !== "string"
+    ) {
+      return null;
+    }
+    return parsed as UnitTestSuiteResult;
+  } catch {
+    return null;
+  }
+}
+function listStoredSkillNames(db: Database, tableName: string): Set<string> {
+  const rows = db.query(`SELECT skill_name FROM ${tableName}`).all() as Array<{
+    skill_name: string;
+  }>;
+  return new Set(rows.map((row) => row.skill_name).filter(Boolean));
+}
 export function writeCanonicalEvalSet(skillName: string, evalSet: EvalEntry[]): string {
-  mkdirSync(getEvalSetDir(), { recursive: true });
   const path = getCanonicalEvalSetPath(skillName);
+  const db = getOptionalDb();
+  if (db) {
+    upsertCanonicalEvalSet(db, skillName, evalSet);
+  }
+  mkdirSync(getEvalSetDir(), { recursive: true });
   writeFileSync(path, JSON.stringify(evalSet, null, 2), "utf-8");
   return path;
 }
+export function writeCanonicalUnitTests(
+  skillName: string,
+  tests: SkillUnitTest[],
+  outputPath?: string,
+): string {
+  const canonicalPath = getUnitTestPath(skillName);
+  const db = getOptionalDb();
+  if (db) {
+    upsertUnitTestFile(db, skillName, tests);
+  }
+  mkdirSync(getUnitTestDir(), { recursive: true });
+  writeFileSync(canonicalPath, JSON.stringify(tests, null, 2), "utf-8");
+  if (outputPath && outputPath !== canonicalPath) {
+    mkdirSync(dirname(outputPath), { recursive: true });
+    writeFileSync(outputPath, JSON.stringify(tests, null, 2), "utf-8");
+    return outputPath;
+  }
+  return canonicalPath;
+}
 export function writeUnitTestRunResult(skillName: string, suite: UnitTestSuiteResult): string {
+  const db = getOptionalDb();
+  if (db) {
+    upsertUnitTestRunResult(db, skillName, suite);
+  }
   mkdirSync(getUnitTestDir(), { recursive: true });
   const path = getUnitTestResultPath(skillName);
   writeFileSync(path, JSON.stringify(suite, null, 2), "utf-8");
@@ -188,14 +364,14 @@ function summarizeReadiness(
   switch (nextStep) {
     case "generate_evals":
       if (evalReadiness === "log_ready") {
-        return "Trusted telemetry exists, but no canonical eval set is saved yet.";
+        return "Trusted telemetry exists, but no canonical eval set is stored yet.";
       }
       if (evalReadiness === "cold_start_ready") {
         return "Installed locally but still cold-start. Generate synthetic evals before you evolve it.";
       }
       return "Telemetry exists, but selftune cannot resolve a local SKILL.md yet. Point it at the skill and generate evals.";
     case "run_unit_tests":
-      return `Eval coverage is present (${evalSetEntries} entries), but no unit test file is saved yet.`;
+      return `Eval coverage is present (${evalSetEntries} entries), but no unit tests are stored yet.`;
     case "run_replay_dry_run": {
       const passRateText =
         unitTestPassRate != null
@@ -331,6 +507,9 @@ function buildTestingReadinessContext(db: Database, searchDirs: string[]): Testi
     if (!entry.endsWith(".json")) return null;
     return entry.slice(0, -".json".length);
   });
+  const storedEvalNames = listStoredSkillNames(db, "canonical_eval_sets");
+  const storedUnitTestNames = listStoredSkillNames(db, "unit_test_files");
+  const storedUnitTestRunNames = listStoredSkillNames(db, "unit_test_run_results");
   const evidenceRows = queryEvolutionEvidence(db);
   const evalEvidenceBySkill = new Map<string, { count: number; latestAt: string | null }>();
@@ -445,6 +624,9 @@ function buildTestingReadinessContext(db: Database, searchDirs: string[]): Testi
     ...unitTestNames,
     ...unitTestResultNames,
     ...canonicalEvalNames,
+    ...storedEvalNames,
+    ...storedUnitTestNames,
+    ...storedUnitTestRunNames,
     ...evalEvidenceBySkill.keys(),
     ...replayBySkill.keys(),
     ...baselineBySkill.keys(),
@@ -452,6 +634,7 @@ function buildTestingReadinessContext(db: Database, searchDirs: string[]): Testi
   ]);
   return {
+    db,
     knownSkills,
     searchDirs,
     trustedRowsBySkill,
@@ -480,16 +663,26 @@ function buildSkillTestingReadinessRow(
   const evalReadiness = deriveEvalReadiness(skillPath, trustedTriggerCount);
   const canonicalEvalPath = getCanonicalEvalSetPath(skillName);
-  const canonicalEvalEntries = readJsonArrayFile(canonicalEvalPath);
-  const canonicalEvalStat = existsSync(canonicalEvalPath) ? statSync(canonicalEvalPath) : null;
+  const storedEvalSet = readCanonicalEvalSetFromDb(context.db, skillName);
+  const canonicalEvalEntries =
+    storedEvalSet?.entries ?? (readJsonArrayFile(canonicalEvalPath) as EvalEntry[]);
+  const canonicalEvalStat =
+    !storedEvalSet && existsSync(canonicalEvalPath) ? statSync(canonicalEvalPath) : null;
   const evidenceEval = context.evalEvidenceBySkill.get(skillName) ?? { count: 0, latestAt: null };
   const evalSetEntries =
     canonicalEvalEntries.length > 0 ? canonicalEvalEntries.length : evidenceEval.count;
-  const latestEvalAt = canonicalEvalStat?.mtime.toISOString?.() ?? evidenceEval.latestAt ?? null;
+  const latestEvalAt =
+    storedEvalSet?.storedAt ??
+    canonicalEvalStat?.mtime.toISOString?.() ??
+    evidenceEval.latestAt ??
+    null;
   const unitTestPath = getUnitTestPath(skillName);
-  const unitTestCases = readJsonArrayFile(unitTestPath).length;
-  const unitTestResult = readUnitTestResult(getUnitTestResultPath(skillName));
+  const storedUnitTests = readUnitTestsFromDb(context.db, skillName);
+  const unitTestCases = storedUnitTests?.tests.length ?? readJsonArrayFile(unitTestPath).length;
+  const unitTestResult =
+    readUnitTestRunResultFromDb(context.db, skillName) ??
+    readUnitTestResult(getUnitTestResultPath(skillName));
   const replay = context.replayBySkill.get(skillName) ?? {
     check_count: 0,