npm - selftune - Versions diffs - 0.2.13 → 0.2.15 - Mend

selftune 0.2.13 → 0.2.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +2 -0
package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +16 -0
package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +12 -0
package/apps/local-dashboard/dist/index.html +3 -3
package/cli/selftune/activation-rules.ts +24 -48
package/cli/selftune/analytics.ts +13 -11
package/cli/selftune/badge/badge.ts +13 -9
package/cli/selftune/canonical-export.ts +6 -6
package/cli/selftune/constants.ts +7 -0
package/cli/selftune/contribute/bundle.ts +9 -44
package/cli/selftune/contribute/contribute.ts +2 -1
package/cli/selftune/cron/setup.ts +3 -1
package/cli/selftune/dashboard-contract.ts +22 -0
package/cli/selftune/dashboard.ts +10 -5
package/cli/selftune/eval/baseline.ts +20 -30
package/cli/selftune/eval/hooks-to-evals.ts +27 -34
package/cli/selftune/eval/import-skillsbench.ts +21 -8
package/cli/selftune/eval/unit-test-cli.ts +22 -11
package/cli/selftune/evolution/description-quality.ts +224 -0
package/cli/selftune/evolution/evolve-body.ts +17 -10
package/cli/selftune/evolution/evolve.ts +70 -57
package/cli/selftune/evolution/rollback.ts +7 -6
package/cli/selftune/grading/auto-grade.ts +27 -35
package/cli/selftune/grading/grade-session.ts +24 -30
package/cli/selftune/hooks/auto-activate.ts +12 -3
package/cli/selftune/hooks/evolution-guard.ts +14 -24
package/cli/selftune/hooks/prompt-log.ts +7 -9
package/cli/selftune/hooks/session-stop.ts +0 -8
package/cli/selftune/index.ts +66 -69
package/cli/selftune/ingestors/claude-replay.ts +29 -14
package/cli/selftune/ingestors/codex-rollout.ts +15 -5
package/cli/selftune/ingestors/codex-wrapper.ts +15 -13
package/cli/selftune/ingestors/openclaw-ingest.ts +24 -5
package/cli/selftune/ingestors/opencode-ingest.ts +9 -4
package/cli/selftune/init.ts +14 -9
package/cli/selftune/localdb/queries.ts +57 -0
package/cli/selftune/monitoring/watch.ts +39 -38
package/cli/selftune/normalization.ts +2 -23
package/cli/selftune/orchestrate.ts +224 -24
package/cli/selftune/routes/skill-report.ts +17 -0
package/cli/selftune/schedule.ts +74 -14
package/cli/selftune/sync.ts +7 -3
package/cli/selftune/types.ts +44 -10
package/cli/selftune/utils/cli-error.ts +102 -0
package/cli/selftune/utils/jsonl.ts +2 -0
package/cli/selftune/workflows/workflows.ts +23 -17
package/package.json +3 -1
package/packages/ui/src/components/RecentActivityFeed.tsx +86 -0
package/packages/ui/src/components/index.ts +1 -0
package/packages/ui/src/components/section-cards.tsx +13 -0
package/skill/SKILL.md +1 -1
package/skill/Workflows/Evolve.md +4 -0
package/skill/Workflows/Initialize.md +8 -8
package/skill/Workflows/Orchestrate.md +11 -7
package/skill/Workflows/Schedule.md +11 -0
package/skill/references/logs.md +22 -21
package/skill/settings_snippet.json +29 -6
package/apps/local-dashboard/dist/assets/index-4_dAY17K.js +0 -16
package/apps/local-dashboard/dist/assets/index-BxV5WZHc.css +0 -2
package/apps/local-dashboard/dist/assets/vendor-ui-7xD7fNEU.js +0 -12

package/cli/selftune/eval/hooks-to-evals.ts CHANGED Viewed

@@ -36,7 +36,7 @@ import type {
   SessionTelemetryRecord,
   SkillUsageRecord,
 } from "../types.js";
-import { readJsonl } from "../utils/jsonl.js";
+import { CLIError, handleCLIError } from "../utils/cli-error.js";
 import { detectAgent } from "../utils/llm-call.js";
 import {
   filterActionableQueryRecords,
@@ -410,18 +410,27 @@ export async function cliMain(): Promise<void> {
   // --- Synthetic mode: generate evals from SKILL.md via LLM ---
   if (values.synthetic) {
     if (!values.skill) {
-      console.error("[ERROR] --skill required with --synthetic");
-      process.exit(1);
+      throw new CLIError(
+        "--skill required with --synthetic",
+        "MISSING_FLAG",
+        "selftune evals --synthetic --skill <name> --skill-path <path>",
+      );
     }
     if (!values["skill-path"]) {
-      console.error("[ERROR] --skill-path required with --synthetic");
-      process.exit(1);
+      throw new CLIError(
+        "--skill-path required with --synthetic",
+        "MISSING_FLAG",
+        "selftune evals --synthetic --skill <name> --skill-path <path>",
+      );
     }
     const agent = detectAgent();
     if (!agent) {
-      console.error("[ERROR] No agent CLI found (claude/codex/opencode). Install one first.");
-      process.exit(1);
+      throw new CLIError(
+        "No agent CLI found (claude/codex/opencode)",
+        "AGENT_NOT_FOUND",
+        "Install one of the supported agent CLIs",
+      );
     }
     const maxPerSide = Number.parseInt(values.max ?? "50", 10);
@@ -464,31 +473,15 @@ export async function cliMain(): Promise<void> {
     return;
   }
-  // --- Log-based mode (original behavior) ---
-  const skillLogPath = values["skill-log"] ?? SKILL_LOG;
-  const queryLogPath = values["query-log"] ?? QUERY_LOG;
-  const telemetryLogPath = values["telemetry-log"] ?? TELEMETRY_LOG;
+  // --- SQLite-based mode ---
   let skillRecords: SkillUsageRecord[];
   let queryRecords: QueryLogRecord[];
   let telemetryRecords: SessionTelemetryRecord[];
-  // SQLite is the default path; JSONL fallback only for custom --*-log overrides
-  if (
-    skillLogPath === SKILL_LOG &&
-    queryLogPath === QUERY_LOG &&
-    telemetryLogPath === TELEMETRY_LOG
-  ) {
-    const db = getDb();
-    skillRecords = querySkillUsageRecords(db) as SkillUsageRecord[];
-    queryRecords = queryQueryLog(db) as QueryLogRecord[];
-    telemetryRecords = querySessionTelemetry(db) as SessionTelemetryRecord[];
-  } else {
-    // test/custom-path fallback
-    skillRecords = readJsonl<SkillUsageRecord>(skillLogPath);
-    queryRecords = readJsonl<QueryLogRecord>(queryLogPath);
-    telemetryRecords = readJsonl<SessionTelemetryRecord>(telemetryLogPath);
-  }
+  const db = getDb();
+  skillRecords = querySkillUsageRecords(db) as SkillUsageRecord[];
+  queryRecords = queryQueryLog(db) as QueryLogRecord[];
+  telemetryRecords = querySessionTelemetry(db) as SessionTelemetryRecord[];
   if (values["list-skills"]) {
     listSkills(skillRecords, queryRecords, telemetryRecords);
@@ -496,8 +489,11 @@ export async function cliMain(): Promise<void> {
   }
   if (!values.skill) {
-    console.error("[ERROR] --skill required (or use --list-skills)");
-    process.exit(1);
+    throw new CLIError(
+      "--skill required (or use --list-skills)",
+      "MISSING_FLAG",
+      "selftune evals --skill <name> or selftune evals --list-skills",
+    );
   }
   if (values.stats) {
@@ -525,8 +521,5 @@ export async function cliMain(): Promise<void> {
 }
 if (import.meta.main) {
-  cliMain().catch((err) => {
-    console.error(err);
-    process.exit(1);
-  });
+  cliMain().catch(handleCLIError);
 }

package/cli/selftune/eval/import-skillsbench.ts CHANGED Viewed

@@ -15,6 +15,7 @@ import { join } from "node:path";
 import { parseArgs } from "node:util";
 import type { EvalEntry, SkillsBenchTask } from "../types.js";
+import { CLIError, handleCLIError } from "../utils/cli-error.js";
 // ---------------------------------------------------------------------------
 // Minimal TOML parser (handles the subset used by SkillsBench task.toml files)
@@ -175,13 +176,19 @@ export function cliMain(): void {
   });
   if (!values.dir) {
-    console.error("[ERROR] --dir required (path to SkillsBench corpus directory)");
-    process.exit(1);
+    throw new CLIError(
+      "--dir required (path to SkillsBench corpus directory)",
+      "MISSING_FLAG",
+      "selftune import-skillsbench --dir <path> --skill <name>",
+    );
   }
   if (!values.skill) {
-    console.error("[ERROR] --skill required (target skill name)");
-    process.exit(1);
+    throw new CLIError(
+      "--skill required (target skill name)",
+      "MISSING_FLAG",
+      "selftune import-skillsbench --dir <path> --skill <name>",
+    );
   }
   const matchStrategy = values["match-strategy"] === "fuzzy" ? "fuzzy" : "exact";
@@ -189,9 +196,11 @@ export function cliMain(): void {
   const tasks = parseSkillsBenchDir(values.dir);
   if (tasks.length === 0) {
-    console.error(`[WARN] No tasks found in ${values.dir}/tasks/`);
-    console.error("Expected structure: <dir>/tasks/<task-id>/instruction.md");
-    process.exit(1);
+    throw new CLIError(
+      `No tasks found in ${values.dir}/tasks/`,
+      "MISSING_DATA",
+      "Expected structure: <dir>/tasks/<task-id>/instruction.md",
+    );
   }
   console.log(`Parsed ${tasks.length} tasks from ${values.dir}`);
@@ -218,5 +227,9 @@ export function cliMain(): void {
 }
 if (import.meta.main) {
-  cliMain();
+  try {
+    cliMain();
+  } catch (err) {
+    handleCLIError(err);
+  }
 }

package/cli/selftune/eval/unit-test-cli.ts CHANGED Viewed

@@ -19,6 +19,7 @@ import { parseArgs } from "node:util";
 import { SELFTUNE_CONFIG_DIR } from "../constants.js";
 import type { EvalEntry } from "../types.js";
+import { CLIError } from "../utils/cli-error.js";
 import { callLlm, detectAgent } from "../utils/llm-call.js";
 import { generateUnitTests } from "./generate-unit-tests.js";
 import type { AgentRunner } from "./unit-test.js";
@@ -43,8 +44,11 @@ export async function cliMain(): Promise<void> {
   });
   if (!values.skill) {
-    console.error("[ERROR] --skill <name> is required.");
-    process.exit(1);
+    throw new CLIError(
+      "--skill <name> is required",
+      "MISSING_FLAG",
+      "selftune eval unit-test --skill <name>",
+    );
   }
   const skillName = values.skill;
@@ -56,8 +60,11 @@ export async function cliMain(): Promise<void> {
   if (values.generate) {
     const agent = detectAgent();
     if (!agent) {
-      console.error("[ERROR] No agent CLI found (claude/codex/opencode). Cannot generate tests.");
-      process.exit(1);
+      throw new CLIError(
+        "No agent CLI found (claude/codex/opencode). Cannot generate tests",
+        "AGENT_NOT_FOUND",
+        "Install one of the supported agent CLIs",
+      );
     }
     let skillContent = `Skill: ${skillName}`;
@@ -86,8 +93,7 @@ export async function cliMain(): Promise<void> {
     const tests = await generateUnitTests(skillName, skillContent, evalFailures, llmCaller);
     if (tests.length === 0) {
-      console.error("[ERROR] No tests generated. Check agent/LLM availability.");
-      process.exit(1);
+      throw new CLIError("No tests generated", "OPERATION_FAILED", "Check agent/LLM availability");
     }
     // Ensure output directory exists
@@ -100,9 +106,11 @@ export async function cliMain(): Promise<void> {
   // Load and run tests
   const tests = loadUnitTests(testsPath);
   if (tests.length === 0) {
-    console.error(`[ERROR] No tests found at ${testsPath}`);
-    console.error("  Use --generate to create tests, or provide --tests <path>.");
-    process.exit(1);
+    throw new CLIError(
+      `No tests found at ${testsPath}`,
+      "FILE_NOT_FOUND",
+      "Use --generate to create tests, or provide --tests <path>",
+    );
   }
   console.log(`Loaded ${tests.length} unit tests for skill '${skillName}'`);
@@ -112,8 +120,11 @@ export async function cliMain(): Promise<void> {
   if (values["run-agent"]) {
     const agent = detectAgent();
     if (!agent) {
-      console.error("[ERROR] No agent CLI found. Cannot run agent-based tests.");
-      process.exit(1);
+      throw new CLIError(
+        "No agent CLI found. Cannot run agent-based tests",
+        "AGENT_NOT_FOUND",
+        "Install one of the supported agent CLIs",
+      );
     }
     const modelFlag = values.model;
     agentRunner = async (query: string): Promise<string> => {

package/cli/selftune/evolution/description-quality.ts ADDED Viewed

@@ -0,0 +1,224 @@
+/**
+ * description-quality.ts
+ *
+ * Pure, deterministic scoring function that evaluates the quality of a skill
+ * description for routing accuracy. No LLM calls — heuristic-only.
+ *
+ * Inspired by OpenAI's finding that "writing better skill descriptions improved
+ * routing accuracy more than any change to the underlying skill logic itself."
+ */
+import type { DescriptionQualityScore } from "../types.js";
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+/** Optimal description length range (characters). */
+const MIN_LENGTH = 40;
+const MAX_LENGTH = 500;
+const IDEAL_MIN = 80;
+const IDEAL_MAX = 300;
+/** Words that indicate trigger context — the description says *when* the skill fires. */
+const TRIGGER_CONTEXT_WORDS = [
+  "when",
+  "if",
+  "after",
+  "before",
+  "during",
+  "while",
+  "upon",
+  "whenever",
+  "use when",
+  "trigger",
+  "activate",
+];
+/** Vague words that weaken routing precision. */
+const VAGUE_WORDS = [
+  "various",
+  "general",
+  "misc",
+  "miscellaneous",
+  "stuff",
+  "things",
+  "etc",
+  "and more",
+  "and so on",
+  "other",
+  "multiple",
+  "several",
+  "many",
+  "some",
+  "certain",
+  "related",
+];
+/** Common filler phrases that add no routing signal. */
+const FILLER_PHRASES = [
+  "this skill",
+  "a tool for",
+  "a tool that",
+  "helps with",
+  "is used for",
+  "can be used",
+  "is designed to",
+];
+/** Action verbs that signal concrete behavior. */
+const ACTION_VERBS = [
+  "run",
+  "execute",
+  "analyze",
+  "generate",
+  "create",
+  "deploy",
+  "validate",
+  "check",
+  "build",
+  "test",
+  "scan",
+  "extract",
+  "transform",
+  "monitor",
+  "grade",
+  "evolve",
+  "sync",
+  "watch",
+  "review",
+  "audit",
+  "parse",
+  "format",
+  "search",
+  "fetch",
+  "publish",
+  "install",
+  "configure",
+  "diagnose",
+  "debug",
+  "fix",
+  "optimize",
+  "measure",
+];
+// ---------------------------------------------------------------------------
+// Pre-compiled word-boundary patterns
+// ---------------------------------------------------------------------------
+/** Compile a word list into pre-built RegExp patterns at module load time. */
+function compileWordPatterns(words: string[]): RegExp[] {
+  return words.map((w) => new RegExp(`\\b${w.replace(/\s+/g, "\\s+")}\\b`, "i"));
+}
+const TRIGGER_PATTERNS = compileWordPatterns(TRIGGER_CONTEXT_WORDS);
+const VAGUE_PATTERNS = compileWordPatterns(VAGUE_WORDS);
+const ACTION_PATTERNS = compileWordPatterns(ACTION_VERBS);
+/** Count how many pre-compiled patterns match in a string. */
+function countWordMatches(text: string, patterns: RegExp[]): number {
+  let count = 0;
+  for (const p of patterns) {
+    if (p.test(text)) count++;
+  }
+  return count;
+}
+// ---------------------------------------------------------------------------
+// Criterion scorers
+// ---------------------------------------------------------------------------
+/** Score description length: 1.0 for ideal range, graded falloff outside. */
+export function scoreLengthCriterion(description: string): number {
+  const len = description.length;
+  if (len < MIN_LENGTH) return len / MIN_LENGTH;
+  if (len >= IDEAL_MIN && len <= IDEAL_MAX) return 1.0;
+  if (len < IDEAL_MIN) return 0.7 + 0.3 * ((len - MIN_LENGTH) / (IDEAL_MIN - MIN_LENGTH));
+  if (len <= MAX_LENGTH) return 0.7 + 0.3 * ((MAX_LENGTH - len) / (MAX_LENGTH - IDEAL_MAX));
+  return Math.max(0.3, 0.7 - 0.4 * ((len - MAX_LENGTH) / MAX_LENGTH));
+}
+/** Score presence of trigger context words (when/if/before/after etc). */
+export function scoreTriggerContextCriterion(description: string): number {
+  const matches = countWordMatches(description.toLowerCase(), TRIGGER_PATTERNS);
+  if (matches === 0) return 0.0;
+  if (matches === 1) return 0.7;
+  return Math.min(1.0, 0.7 + 0.15 * (matches - 1));
+}
+/** Score absence of vague words (lower is worse). */
+export function scoreVaguenessCriterion(description: string): number {
+  const matches = countWordMatches(description.toLowerCase(), VAGUE_PATTERNS);
+  if (matches === 0) return 1.0;
+  if (matches === 1) return 0.6;
+  return Math.max(0.1, 0.6 - 0.15 * (matches - 1));
+}
+/** Score whether description specifies at least one concrete action or domain. */
+export function scoreSpecificityCriterion(description: string): number {
+  const lower = description.toLowerCase();
+  const hasAction = ACTION_PATTERNS.some((p) => p.test(lower));
+  const fillerCount = FILLER_PHRASES.filter((f) => lower.includes(f)).length;
+  const words = description.split(/\s+/).length;
+  const fillerRatio = fillerCount > 0 ? fillerCount / Math.max(1, words / 10) : 0;
+  if (!hasAction) return 0.2;
+  return Math.max(0.3, 1.0 - fillerRatio * 0.3);
+}
+/** Score whether description is not just the skill name restated. */
+export function scoreNotJustNameCriterion(description: string, skillName?: string): number {
+  if (!skillName) return 1.0;
+  const descNorm = description
+    .toLowerCase()
+    .trim()
+    .replace(/[^a-z0-9\s]/g, "");
+  const nameNorm = skillName
+    .toLowerCase()
+    .trim()
+    .replace(/[^a-z0-9\s]/g, "");
+  const nameFromKebab = skillName.replace(/[-_]/g, " ").toLowerCase().trim();
+  if (descNorm === nameNorm || descNorm === nameFromKebab) return 0.0;
+  if (descNorm.length < nameNorm.length + 10) return 0.3;
+  return 1.0;
+}
+// ---------------------------------------------------------------------------
+// Main scoring function
+// ---------------------------------------------------------------------------
+/** Criterion weights — trigger context is weighted highest per OpenAI's finding. */
+const WEIGHTS = {
+  length: 0.15,
+  trigger_context: 0.3,
+  vagueness: 0.2,
+  specificity: 0.2,
+  not_just_name: 0.15,
+} as const;
+/**
+ * Score a skill description on heuristic quality criteria.
+ * Returns a 0.0-1.0 composite score with per-criterion breakdown.
+ * Pure function — no I/O, no LLM calls.
+ */
+export function scoreDescription(description: string, skillName?: string): DescriptionQualityScore {
+  const criteria = {
+    length: scoreLengthCriterion(description),
+    trigger_context: scoreTriggerContextCriterion(description),
+    vagueness: scoreVaguenessCriterion(description),
+    specificity: scoreSpecificityCriterion(description),
+    not_just_name: scoreNotJustNameCriterion(description, skillName),
+  };
+  const composite = (Object.keys(WEIGHTS) as (keyof typeof WEIGHTS)[]).reduce(
+    (sum, key) => sum + criteria[key] * WEIGHTS[key],
+    0,
+  );
+  return {
+    composite: +composite.toFixed(3),
+    criteria,
+  };
+}

package/cli/selftune/evolution/evolve-body.ts CHANGED Viewed

@@ -25,6 +25,7 @@ import type {
   QueryLogRecord,
   SkillUsageRecord,
 } from "../types.js";
+import { CLIError, handleCLIError } from "../utils/cli-error.js";
 import type { EffortLevel, SubagentCallOptions } from "../utils/llm-call.js";
 import { callViaSubagent } from "../utils/llm-call.js";
 import { appendAuditEntry } from "./audit.js";
@@ -710,8 +711,11 @@ Options:
   }
   if (!values.skill || !values["skill-path"]) {
-    console.error("[ERROR] --skill and --skill-path are required");
-    process.exit(1);
+    throw new CLIError(
+      "--skill and --skill-path are required",
+      "MISSING_FLAG",
+      "selftune evolve body --skill <name> --skill-path <path>",
+    );
   }
   const { detectAgent } = await import("../utils/llm-call.js");
@@ -719,15 +723,21 @@ Options:
   const studentAgent = values["student-agent"] ?? teacherAgent;
   if (!teacherAgent) {
-    console.error("[ERROR] No agent CLI found. Install Claude Code, Codex, or OpenCode.");
-    process.exit(1);
+    throw new CLIError(
+      "No agent CLI found. Install Claude Code, Codex, or OpenCode.",
+      "AGENT_NOT_FOUND",
+      "Install Claude Code, Codex, or OpenCode.",
+    );
   }
   // Parse target
   const targetStr = values.target ?? "body";
   if (targetStr !== "body" && targetStr !== "routing") {
-    console.error("[ERROR] --target must be 'body' or 'routing'");
-    process.exit(1);
+    throw new CLIError(
+      "--target must be 'body' or 'routing'",
+      "INVALID_FLAG",
+      "Use --target body or --target routing",
+    );
   }
   // Parse few-shot examples
@@ -763,8 +773,5 @@ Options:
 }
 if (import.meta.main) {
-  cliMain().catch((err) => {
-    console.error(`[FATAL] ${err}`);
-    process.exit(1);
-  });
+  cliMain().catch(handleCLIError);
 }