npm - selftune - Versions diffs - 0.2.31 → 0.2.32 - Mend

selftune 0.2.31 → 0.2.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

package/README.md +83 -56
package/apps/local-dashboard/dist/assets/index-B-ut4w0B.js +15 -0
package/apps/local-dashboard/dist/assets/index-BFGfCVrL.css +1 -0
package/apps/local-dashboard/dist/assets/vendor-ui-DfowE3Hu.js +1 -0
package/apps/local-dashboard/dist/index.html +3 -3
package/cli/selftune/command-surface.ts +613 -2
package/cli/selftune/create/baseline.ts +429 -0
package/cli/selftune/create/check.ts +35 -0
package/cli/selftune/create/init.ts +115 -0
package/cli/selftune/create/package-candidate-state.ts +771 -0
package/cli/selftune/create/package-evaluator.ts +710 -0
package/cli/selftune/create/package-fingerprint.ts +142 -0
package/cli/selftune/create/package-search.ts +377 -0
package/cli/selftune/create/publish.ts +431 -0
package/cli/selftune/create/readiness.ts +495 -0
package/cli/selftune/create/replay.ts +330 -0
package/cli/selftune/create/report.ts +74 -0
package/cli/selftune/create/scaffold.ts +121 -0
package/cli/selftune/create/skills-ref-adapter.ts +177 -0
package/cli/selftune/create/status.ts +33 -0
package/cli/selftune/create/templates.ts +249 -0
package/cli/selftune/cron/setup.ts +1 -1
package/cli/selftune/dashboard-action-events.ts +4 -1
package/cli/selftune/dashboard-action-result.ts +789 -24
package/cli/selftune/dashboard-action-stream.ts +80 -0
package/cli/selftune/dashboard-contract.ts +146 -3
package/cli/selftune/dashboard-server.ts +5 -4
package/cli/selftune/eval/hooks-to-evals.ts +58 -35
package/cli/selftune/eval/synthetic-evals.ts +145 -17
package/cli/selftune/evolution/bounded-mutations.ts +1045 -0
package/cli/selftune/evolution/evolve-body.ts +9 -36
package/cli/selftune/evolution/evolve.ts +8 -72
package/cli/selftune/evolution/stopping-criteria.ts +5 -13
package/cli/selftune/evolution/unblock-suggestions.ts +0 -16
package/cli/selftune/evolution/validate-host-replay.ts +115 -15
package/cli/selftune/improve.ts +206 -0
package/cli/selftune/index.ts +123 -6
package/cli/selftune/init.ts +1 -1
package/cli/selftune/localdb/queries/dashboard.ts +30 -0
package/cli/selftune/localdb/schema.ts +52 -0
package/cli/selftune/monitoring/watch.ts +257 -23
package/cli/selftune/orchestrate/execute.ts +300 -1
package/cli/selftune/orchestrate/finalize.ts +14 -0
package/cli/selftune/orchestrate/plan.ts +22 -5
package/cli/selftune/orchestrate/prepare.ts +59 -4
package/cli/selftune/orchestrate/report.ts +1 -1
package/cli/selftune/orchestrate.ts +34 -1
package/cli/selftune/publish.ts +35 -0
package/cli/selftune/routes/actions.ts +81 -15
package/cli/selftune/routes/overview.ts +1 -1
package/cli/selftune/routes/skill-report.ts +147 -2
package/cli/selftune/run.ts +18 -0
package/cli/selftune/schedule.ts +3 -3
package/cli/selftune/search-run.ts +703 -0
package/cli/selftune/status.ts +35 -11
package/cli/selftune/testing-readiness.ts +431 -40
package/cli/selftune/types.ts +316 -0
package/cli/selftune/utils/eval-readiness.ts +1 -0
package/cli/selftune/utils/json-output.ts +11 -0
package/cli/selftune/utils/lifecycle-surface.ts +48 -0
package/cli/selftune/utils/query-filter.ts +82 -1
package/cli/selftune/utils/tui.ts +85 -2
package/cli/selftune/verify.ts +205 -0
package/cli/selftune/workflows/proposals.ts +1 -1
package/cli/selftune/workflows/skill-scaffold.ts +141 -63
package/cli/selftune/workflows/workflows.ts +4 -4
package/package.json +1 -1
package/skill/SKILL.md +148 -85
package/skill/references/cli-quick-reference.md +16 -1
package/skill/references/creator-playbook.md +31 -10
package/skill/workflows/Baseline.md +8 -9
package/skill/workflows/Contributions.md +4 -4
package/skill/workflows/Create.md +173 -0
package/skill/workflows/CreateTestDeploy.md +34 -30
package/skill/workflows/Cron.md +2 -2
package/skill/workflows/Dashboard.md +3 -3
package/skill/workflows/Evals.md +13 -7
package/skill/workflows/Evolve.md +75 -32
package/skill/workflows/EvolveBody.md +22 -15
package/skill/workflows/Hook.md +1 -1
package/skill/workflows/Improve.md +168 -0
package/skill/workflows/Initialize.md +3 -3
package/skill/workflows/Orchestrate.md +49 -12
package/skill/workflows/Publish.md +100 -0
package/skill/workflows/Run.md +72 -0
package/skill/workflows/Schedule.md +2 -2
package/skill/workflows/SearchRun.md +89 -0
package/skill/workflows/SignalsDashboard.md +2 -2
package/skill/workflows/UnitTest.md +13 -4
package/skill/workflows/Verify.md +136 -0
package/skill/workflows/Watch.md +114 -47
package/skill/workflows/Workflows.md +13 -8
package/apps/local-dashboard/dist/assets/index-B7v_o1WC.js +0 -15
package/apps/local-dashboard/dist/assets/index-CrO77SVi.css +0 -1
package/apps/local-dashboard/dist/assets/vendor-ui-B0H8s1mP.js +0 -1

package/cli/selftune/create/package-fingerprint.ts ADDED Viewed

@@ -0,0 +1,142 @@
+import { createHash } from "node:crypto";
+import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
+import { dirname, join, relative, resolve } from "node:path";
+import { buildCreateSkillManifest, type CreateSkillManifest } from "./templates.js";
+function resolveDraftSkillPaths(
+  skillPathArg: string,
+): { skillDir: string; skillPath: string } | null {
+  const trimmed = skillPathArg.trim();
+  if (!trimmed) return null;
+  const absolute = resolve(trimmed);
+  if (!existsSync(absolute)) return null;
+  const stat = statSync(absolute);
+  if (stat.isDirectory()) {
+    const skillPath = join(absolute, "SKILL.md");
+    return existsSync(skillPath) ? { skillDir: absolute, skillPath } : null;
+  }
+  return { skillDir: dirname(absolute), skillPath: absolute };
+}
+function loadDraftManifest(skillDir: string): { manifest: CreateSkillManifest; present: boolean } {
+  const manifestPath = join(skillDir, "selftune.create.json");
+  const fallback = buildCreateSkillManifest();
+  if (!existsSync(manifestPath)) {
+    return { manifest: fallback, present: false };
+  }
+  try {
+    const parsed = JSON.parse(readFileSync(manifestPath, "utf-8")) as Partial<CreateSkillManifest>;
+    return {
+      manifest: {
+        version: 1,
+        entry_workflow:
+          typeof parsed.entry_workflow === "string" && parsed.entry_workflow.trim().length > 0
+            ? parsed.entry_workflow
+            : fallback.entry_workflow,
+        supports_package_replay:
+          typeof parsed.supports_package_replay === "boolean"
+            ? parsed.supports_package_replay
+            : fallback.supports_package_replay,
+        expected_resources: {
+          workflows:
+            typeof parsed.expected_resources?.workflows === "boolean"
+              ? parsed.expected_resources.workflows
+              : fallback.expected_resources.workflows,
+          references:
+            typeof parsed.expected_resources?.references === "boolean"
+              ? parsed.expected_resources.references
+              : fallback.expected_resources.references,
+          scripts:
+            typeof parsed.expected_resources?.scripts === "boolean"
+              ? parsed.expected_resources.scripts
+              : fallback.expected_resources.scripts,
+          assets:
+            typeof parsed.expected_resources?.assets === "boolean"
+              ? parsed.expected_resources.assets
+              : fallback.expected_resources.assets,
+        },
+      },
+      present: true,
+    };
+  } catch {
+    return { manifest: fallback, present: false };
+  }
+}
+function collectFiles(root: string, dir: string): string[] {
+  if (!existsSync(dir)) return [];
+  const discovered: string[] = [];
+  for (const entry of readdirSync(dir)) {
+    const absolute = join(dir, entry);
+    const stat = statSync(absolute);
+    if (stat.isDirectory()) {
+      discovered.push(...collectFiles(root, absolute));
+    } else if (stat.isFile()) {
+      discovered.push(relative(root, absolute));
+    }
+  }
+  return discovered;
+}
+export function computeCreatePackageFingerprint(skillPathArg: string): string | null {
+  const resolvedPaths = resolveDraftSkillPaths(skillPathArg);
+  if (!resolvedPaths) return null;
+  const { skillDir, skillPath } = resolvedPaths;
+  const { manifest, present: manifestPresent } = loadDraftManifest(skillDir);
+  const trackedPaths = new Set<string>(["SKILL.md"]);
+  if (manifestPresent) {
+    trackedPaths.add("selftune.create.json");
+  }
+  if (manifest.entry_workflow.trim().length > 0) {
+    trackedPaths.add(manifest.entry_workflow);
+  }
+  if (manifest.expected_resources.workflows) {
+    for (const entry of collectFiles(skillDir, join(skillDir, "workflows"))) {
+      trackedPaths.add(entry);
+    }
+  }
+  if (manifest.expected_resources.references) {
+    for (const entry of collectFiles(skillDir, join(skillDir, "references"))) {
+      trackedPaths.add(entry);
+    }
+  }
+  if (manifest.expected_resources.scripts) {
+    for (const entry of collectFiles(skillDir, join(skillDir, "scripts"))) {
+      trackedPaths.add(entry);
+    }
+  }
+  if (manifest.expected_resources.assets) {
+    for (const entry of collectFiles(skillDir, join(skillDir, "assets"))) {
+      trackedPaths.add(entry);
+    }
+  }
+  const hasher = createHash("sha256");
+  hasher.update("selftune:create-package:v1\0");
+  hasher.update(`${relative(skillDir, skillPath) || "SKILL.md"}\0`);
+  for (const relativePath of [...trackedPaths].toSorted()) {
+    const absolutePath = join(skillDir, relativePath);
+    if (!existsSync(absolutePath)) continue;
+    const stat = statSync(absolutePath);
+    if (!stat.isFile()) continue;
+    hasher.update(relativePath);
+    hasher.update("\0");
+    hasher.update(readFileSync(absolutePath));
+    hasher.update("\0");
+  }
+  return `pkg_sha256_${hasher.digest("hex").slice(0, 16)}`;
+}

package/cli/selftune/create/package-search.ts ADDED Viewed

@@ -0,0 +1,377 @@
+/**
+ * Bounded package search runner.
+ *
+ * Orchestrates a minibatch of candidate evaluations against the accepted
+ * frontier parent. Candidates are passed in (mutation is external);
+ * this module only evaluates, compares, and persists results.
+ */
+import { cpSync, mkdtempSync, readFileSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { basename, dirname, join } from "node:path";
+import { randomUUIDv7 } from "bun";
+import type { Database } from "bun:sqlite";
+import type { PackageSearchProvenance, PackageSearchRunResult } from "../types.js";
+import { parseSkillSections, replaceSection } from "../evolution/deploy-proposal.js";
+import {
+  listAcceptedPackageFrontierCandidates,
+  readPackageCandidateArtifactByFingerprint,
+  selectAcceptedPackageFrontierCandidate,
+} from "./package-candidate-state.js";
+import { computeCreatePackageFingerprint } from "./package-fingerprint.js";
+import {
+  runCreatePackageEvaluation,
+  type CreatePackageEvaluationDeps,
+} from "./package-evaluator.js";
+// ---------------------------------------------------------------------------
+// Search options
+// ---------------------------------------------------------------------------
+export interface PackageSearchOptions {
+  /** Skill name to search packages for. */
+  skill_name: string;
+  /** Candidate variant paths to evaluate this run. */
+  candidate_paths: Array<{
+    skill_path: string;
+    fingerprint: string;
+    mutation_surface?: "routing" | "body" | "merged";
+  }>;
+  /** Maximum candidates to evaluate per run (minibatch size). Default 5. */
+  max_candidates?: number;
+  /** Optional measured routing/body budget used to build this search run. */
+  surface_plan?: PackageSearchProvenance["surface_plan"];
+  /** Database handle. */
+  db: Database;
+  /** Agent identifier for replay. */
+  agent?: string;
+  /** Optional eval-set override for package evaluation. */
+  evalSetPath?: string;
+  /** Optional evaluator dependency overrides. */
+  evaluator_deps?: CreatePackageEvaluationDeps;
+}
+type EvaluatedCandidate = {
+  candidateId: string;
+  decision: string;
+  rationale: string;
+  skillPath: string;
+  fingerprint: string;
+  mutationSurface: "routing" | "body" | "merged" | null;
+  evaluation: Awaited<ReturnType<typeof runCreatePackageEvaluation>>;
+};
+function mergeComplementarySkillCandidates(
+  routingSkillPath: string,
+  bodySkillPath: string,
+): string {
+  const routingContent = readFileSync(routingSkillPath, "utf-8");
+  const bodyContent = readFileSync(bodySkillPath, "utf-8");
+  const routingSection = parseSkillSections(routingContent).sections["Workflow Routing"] ?? "";
+  if (!routingSection.trim()) {
+    throw new Error(
+      `Routing variant at ${routingSkillPath} does not contain a Workflow Routing section`,
+    );
+  }
+  const mergedContent = replaceSection(bodyContent, "Workflow Routing", routingSection.trim());
+  const bodyVariantDir = dirname(bodySkillPath);
+  const mergedVariantDir = join(
+    mkdtempSync(join(tmpdir(), "selftune-package-search-merged-")),
+    basename(bodyVariantDir),
+  );
+  cpSync(bodyVariantDir, mergedVariantDir, { recursive: true });
+  const mergedSkillPath = join(mergedVariantDir, basename(bodySkillPath));
+  writeFileSync(mergedSkillPath, mergedContent, "utf-8");
+  return mergedSkillPath;
+}
+function pickBestAcceptedCandidate(
+  candidates: EvaluatedCandidate[],
+  surface: "routing" | "body",
+): EvaluatedCandidate | null {
+  const matching = candidates.filter(
+    (candidate) => candidate.decision === "accepted" && candidate.mutationSurface === surface,
+  );
+  if (matching.length === 0) return null;
+  return matching.toSorted((left, right) => {
+    if (surface === "routing") {
+      const leftScore =
+        left.evaluation.summary.routing?.pass_rate ?? left.evaluation.summary.replay.pass_rate;
+      const rightScore =
+        right.evaluation.summary.routing?.pass_rate ?? right.evaluation.summary.replay.pass_rate;
+      return rightScore - leftScore;
+    }
+    const leftBody = left.evaluation.summary.body;
+    const rightBody = right.evaluation.summary.body;
+    const leftValid = leftBody?.valid ? 1 : 0;
+    const rightValid = rightBody?.valid ? 1 : 0;
+    if (rightValid !== leftValid) {
+      return rightValid - leftValid;
+    }
+    return (rightBody?.quality_score ?? -1) - (leftBody?.quality_score ?? -1);
+  })[0]!;
+}
+// ---------------------------------------------------------------------------
+// Search persistence
+// ---------------------------------------------------------------------------
+/** Persist a search run result to the package_search_runs table. */
+export function insertSearchRun(db: Database, result: PackageSearchRunResult): void {
+  db.run(
+    `INSERT INTO package_search_runs
+       (search_id, skill_name, parent_candidate_id, winner_candidate_id,
+        winner_rationale, candidates_evaluated, provenance_json,
+        started_at, completed_at)
+     VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+    [
+      result.search_id,
+      result.skill_name,
+      result.parent_candidate_id,
+      result.winner_candidate_id,
+      result.winner_rationale,
+      result.candidates_evaluated,
+      JSON.stringify(result.provenance),
+      result.started_at,
+      result.completed_at,
+    ],
+  );
+}
+/** Read all search runs for a skill, newest first. */
+export function readSearchRuns(db: Database, skillName: string): PackageSearchRunResult[] {
+  const rows = db
+    .query(
+      `SELECT search_id, skill_name, parent_candidate_id, winner_candidate_id,
+              winner_rationale, candidates_evaluated, provenance_json,
+              started_at, completed_at
+       FROM package_search_runs
+       WHERE skill_name = ?
+       ORDER BY started_at DESC`,
+    )
+    .all(skillName) as Array<{
+    search_id: string;
+    skill_name: string;
+    parent_candidate_id: string | null;
+    winner_candidate_id: string | null;
+    winner_rationale: string | null;
+    candidates_evaluated: number;
+    provenance_json: string;
+    started_at: string;
+    completed_at: string;
+  }>;
+  return rows.map((r) => ({
+    search_id: r.search_id,
+    skill_name: r.skill_name,
+    parent_candidate_id: r.parent_candidate_id,
+    candidates_evaluated: r.candidates_evaluated,
+    winner_candidate_id: r.winner_candidate_id,
+    winner_rationale: r.winner_rationale,
+    started_at: r.started_at,
+    completed_at: r.completed_at,
+    provenance: JSON.parse(r.provenance_json) as PackageSearchProvenance,
+  }));
+}
+function selectWinningCandidate(
+  skillName: string,
+  evaluatedCandidateIds: Set<string>,
+  db: Database,
+): {
+  winnerCandidateId: string | null;
+  winnerRationale: string | null;
+} {
+  if (evaluatedCandidateIds.size === 0) {
+    return {
+      winnerCandidateId: null,
+      winnerRationale: null,
+    };
+  }
+  const winner =
+    listAcceptedPackageFrontierCandidates(skillName, db).find((candidate) =>
+      evaluatedCandidateIds.has(candidate.candidate_id),
+    ) ?? null;
+  return {
+    winnerCandidateId: winner?.candidate_id ?? null,
+    winnerRationale: winner?.summary.candidate_acceptance?.rationale ?? null,
+  };
+}
+// ---------------------------------------------------------------------------
+// Search runner
+// ---------------------------------------------------------------------------
+/**
+ * Run a bounded package search.
+ *
+ * 1. Reads the accepted frontier for the skill
+ * 2. Selects a parent from the frontier (or null for first-ever run)
+ * 3. Evaluates each candidate (up to max_candidates) through the evaluator
+ * 4. Compares results, picks the best accepted winner using frontier ranking
+ * 5. Persists the search run with full provenance
+ */
+export async function runPackageSearch(
+  opts: PackageSearchOptions,
+): Promise<PackageSearchRunResult> {
+  const startedAt = new Date().toISOString();
+  const searchId = randomUUIDv7();
+  const maxCandidates = opts.max_candidates ?? 5;
+  // 1. Read frontier and select parent
+  const frontier = listAcceptedPackageFrontierCandidates(opts.skill_name, opts.db);
+  const parent = selectAcceptedPackageFrontierCandidate(opts.skill_name, { db: opts.db });
+  // 2. Filter candidates: skip already-evaluated fingerprints, cap at maxCandidates
+  const candidatesToEvaluate = opts.candidate_paths
+    .filter((c) => {
+      const existing = readPackageCandidateArtifactByFingerprint(opts.skill_name, c.fingerprint, {
+        db: opts.db,
+      });
+      return existing === null;
+    })
+    .slice(0, maxCandidates);
+  // 3. Evaluate each candidate through the shared package evaluator
+  const evaluationSummaries: PackageSearchProvenance["evaluation_summaries"] = [];
+  const acceptedCandidateIds = new Set<string>();
+  const evaluatedCandidates: EvaluatedCandidate[] = [];
+  const deps: CreatePackageEvaluationDeps = {
+    ...opts.evaluator_deps,
+    getDb: () => opts.db,
+  };
+  for (const candidate of candidatesToEvaluate) {
+    const evaluation = await runCreatePackageEvaluation(
+      {
+        skillPath: candidate.skill_path,
+        skillName: opts.skill_name,
+        mode: "package",
+        agent: opts.agent,
+        evalSetPath: opts.evalSetPath,
+      },
+      deps,
+    );
+    const acceptance = evaluation.summary.candidate_acceptance;
+    const decision = acceptance?.decision ?? "rejected";
+    const rationale = acceptance?.rationale ?? "No acceptance summary produced.";
+    const candidateId = evaluation.summary.candidate_id ?? candidate.fingerprint;
+    const mutationSurface = candidate.mutation_surface ?? null;
+    evaluationSummaries.push({
+      candidate_id: candidateId,
+      decision,
+      rationale,
+    });
+    evaluatedCandidates.push({
+      candidateId,
+      decision,
+      rationale,
+      skillPath: candidate.skill_path,
+      fingerprint: candidate.fingerprint,
+      mutationSurface,
+      evaluation,
+    });
+    if (decision === "accepted") {
+      acceptedCandidateIds.add(candidateId);
+    }
+  }
+  const acceptedRoutingCandidate = pickBestAcceptedCandidate(evaluatedCandidates, "routing");
+  const acceptedBodyCandidate = pickBestAcceptedCandidate(evaluatedCandidates, "body");
+  if (acceptedRoutingCandidate && acceptedBodyCandidate) {
+    const mergedVariantPath = mergeComplementarySkillCandidates(
+      acceptedRoutingCandidate.skillPath,
+      acceptedBodyCandidate.skillPath,
+    );
+    const mergedFingerprint = computeCreatePackageFingerprint(mergedVariantPath);
+    if (mergedFingerprint) {
+      const mergedEvaluation = await runCreatePackageEvaluation(
+        {
+          skillPath: mergedVariantPath,
+          skillName: opts.skill_name,
+          mode: "package",
+          agent: opts.agent,
+          evalSetPath: opts.evalSetPath,
+        },
+        deps,
+      );
+      const mergedAcceptance = mergedEvaluation.summary.candidate_acceptance;
+      const mergedDecision = mergedAcceptance?.decision ?? "rejected";
+      const mergedRationalePrefix = `Merged accepted routing ${acceptedRoutingCandidate.candidateId} with accepted body ${acceptedBodyCandidate.candidateId}.`;
+      const mergedRationale = mergedAcceptance?.rationale
+        ? `${mergedRationalePrefix} ${mergedAcceptance.rationale}`
+        : mergedRationalePrefix;
+      const mergedCandidateId = mergedEvaluation.summary.candidate_id ?? mergedFingerprint;
+      evaluationSummaries.push({
+        candidate_id: mergedCandidateId,
+        decision: mergedDecision,
+        rationale: mergedRationale,
+      });
+      evaluatedCandidates.push({
+        candidateId: mergedCandidateId,
+        decision: mergedDecision,
+        rationale: mergedRationale,
+        skillPath: mergedVariantPath,
+        fingerprint: mergedFingerprint,
+        mutationSurface: "merged",
+        evaluation: mergedEvaluation,
+      });
+      candidatesToEvaluate.push({
+        skill_path: mergedVariantPath,
+        fingerprint: mergedFingerprint,
+        mutation_surface: "merged",
+      });
+      if (mergedDecision === "accepted") {
+        acceptedCandidateIds.add(mergedCandidateId);
+      }
+    }
+  }
+  const completedAt = new Date().toISOString();
+  const { winnerCandidateId, winnerRationale } = selectWinningCandidate(
+    opts.skill_name,
+    acceptedCandidateIds,
+    opts.db,
+  );
+  // 4. Build result with provenance
+  const provenance: PackageSearchProvenance = {
+    frontier_size: frontier.length,
+    parent_selection_method: parent ? "highest_ranked_frontier" : "none_first_run",
+    candidate_fingerprints: candidatesToEvaluate.map((c) => c.fingerprint),
+    ...(opts.surface_plan ? { surface_plan: opts.surface_plan } : {}),
+    evaluation_summaries: evaluationSummaries,
+  };
+  const result: PackageSearchRunResult = {
+    search_id: searchId,
+    skill_name: opts.skill_name,
+    parent_candidate_id: parent?.candidate_id ?? null,
+    candidates_evaluated: candidatesToEvaluate.length,
+    winner_candidate_id: winnerCandidateId,
+    winner_rationale: winnerRationale,
+    started_at: startedAt,
+    completed_at: completedAt,
+    provenance,
+  };
+  // 5. Persist the search run
+  insertSearchRun(opts.db, result);
+  return result;
+}