npm - selftune - Versions diffs - 0.2.18 → 0.2.19 - Mend

selftune 0.2.18 → 0.2.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

package/README.md +9 -4
package/apps/local-dashboard/dist/assets/index-DnhnXQm6.js +60 -0
package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +1 -0
package/apps/local-dashboard/dist/assets/vendor-table-BIiI3YhS.js +1 -0
package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +12 -0
package/apps/local-dashboard/dist/index.html +5 -5
package/cli/selftune/alpha-upload/stage-canonical.ts +7 -6
package/cli/selftune/constants.ts +10 -0
package/cli/selftune/contribute/contribute.ts +30 -2
package/cli/selftune/contribution-config.ts +249 -0
package/cli/selftune/contribution-relay.ts +177 -0
package/cli/selftune/contribution-signals.ts +219 -0
package/cli/selftune/contribution-staging.ts +147 -0
package/cli/selftune/contributions.ts +532 -0
package/cli/selftune/creator-contributions.ts +333 -0
package/cli/selftune/dashboard-contract.ts +205 -1
package/cli/selftune/dashboard-server.ts +45 -11
package/cli/selftune/eval/family-overlap.ts +395 -0
package/cli/selftune/eval/hooks-to-evals.ts +182 -28
package/cli/selftune/eval/synthetic-evals.ts +298 -11
package/cli/selftune/export.ts +2 -2
package/cli/selftune/index.ts +41 -5
package/cli/selftune/ingestors/codex-rollout.ts +31 -35
package/cli/selftune/ingestors/codex-wrapper.ts +32 -24
package/cli/selftune/localdb/db.ts +2 -2
package/cli/selftune/localdb/queries.ts +701 -30
package/cli/selftune/localdb/schema.ts +20 -0
package/cli/selftune/recover.ts +153 -0
package/cli/selftune/repair/skill-usage.ts +363 -4
package/cli/selftune/routes/actions.ts +35 -1
package/cli/selftune/routes/analytics.ts +14 -0
package/cli/selftune/routes/index.ts +1 -0
package/cli/selftune/routes/overview.ts +112 -4
package/cli/selftune/routes/skill-report.ts +569 -10
package/cli/selftune/status.ts +81 -2
package/cli/selftune/sync.ts +56 -2
package/cli/selftune/trust-model.ts +66 -0
package/cli/selftune/types.ts +49 -0
package/cli/selftune/utils/skill-detection.ts +43 -0
package/cli/selftune/watchlist.ts +65 -0
package/package.json +1 -1
package/packages/ui/src/components/ActivityTimeline.tsx +165 -150
package/packages/ui/src/components/EvidenceViewer.tsx +335 -144
package/packages/ui/src/components/EvolutionTimeline.tsx +58 -28
package/packages/ui/src/components/OrchestrateRunsPanel.tsx +33 -16
package/packages/ui/src/components/RecentActivityFeed.tsx +72 -41
package/packages/ui/src/components/section-cards.tsx +12 -9
package/packages/ui/src/primitives/card.tsx +1 -1
package/skill/SKILL.md +11 -1
package/skill/Workflows/AlphaUpload.md +4 -0
package/skill/Workflows/Composability.md +64 -0
package/skill/Workflows/Contribute.md +6 -3
package/skill/Workflows/Contributions.md +97 -0
package/skill/Workflows/CreatorContributions.md +74 -0
package/skill/Workflows/Dashboard.md +31 -0
package/skill/Workflows/Evals.md +57 -8
package/skill/Workflows/Ingest.md +7 -0
package/skill/Workflows/Initialize.md +20 -1
package/skill/Workflows/Recover.md +84 -0
package/skill/Workflows/RepairSkillUsage.md +12 -4
package/skill/Workflows/Sync.md +18 -12
package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +0 -2
package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +0 -16
package/apps/local-dashboard/dist/assets/vendor-table-pHbDxq36.js +0 -8
package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +0 -12

package/cli/selftune/eval/synthetic-evals.ts CHANGED Viewed

@@ -10,6 +10,7 @@ import { readFileSync } from "node:fs";
 import type { EvalEntry, InvocationType } from "../types.js";
 import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
+import { findInstalledSkillNames } from "../utils/skill-discovery.js";
 import { classifyInvocation } from "./hooks-to-evals.js";
 // ---------------------------------------------------------------------------
@@ -28,6 +29,181 @@ interface RawSyntheticEntry {
   invocation_type?: string;
 }
+interface SyntheticPromptRealExamples {
+  positive: string[];
+  negative: string[];
+}
+interface PromptFamilyTargets {
+  explicitCount: number;
+  implicitCount: number;
+  contextualCount: number;
+  siblingNegativeCount: number;
+  adjacentNegativeCount: number;
+  unrelatedNegativeCount: number;
+}
+function getSyntheticSkillSearchDirs(): string[] {
+  const cwd = process.cwd();
+  const homeDir = process.env.HOME ?? "";
+  const codexHome = process.env.CODEX_HOME ?? `${homeDir}/.codex`;
+  return [
+    `${cwd}/.agents/skills`,
+    `${cwd}/.claude/skills`,
+    `${homeDir}/.agents/skills`,
+    `${homeDir}/.claude/skills`,
+    `${codexHome}/skills`,
+  ];
+}
+function inferSiblingSkills(
+  skillName: string,
+  searchDirs: string[] = getSyntheticSkillSearchDirs(),
+): string[] {
+  const normalized = skillName.trim().toLowerCase();
+  if (!normalized) return [];
+  const familyPrefix = normalized.includes("-") ? normalized.split("-")[0] : "";
+  const installedNames = [...findInstalledSkillNames(searchDirs)];
+  const sameFamily = installedNames
+    .filter((name) => name.toLowerCase() !== normalized)
+    .filter((name) => familyPrefix && name.toLowerCase().startsWith(`${familyPrefix}-`))
+    .sort((a, b) => a.localeCompare(b));
+  if (sameFamily.length >= 5) return sameFamily.slice(0, 5);
+  const adjacent = installedNames
+    .filter((name) => name.toLowerCase() !== normalized)
+    .filter((name) => !sameFamily.includes(name))
+    .sort((a, b) => a.localeCompare(b));
+  return [...sameFamily, ...adjacent].slice(0, 5);
+}
+function buildPromptFamilyTargets(
+  maxPositives: number,
+  maxNegatives: number,
+  hasSiblingSkills: boolean,
+): PromptFamilyTargets {
+  const explicitCount = Math.max(1, Math.round(maxPositives * 0.2));
+  const contextualCount = Math.max(1, Math.round(maxPositives * 0.4));
+  const implicitCount = Math.max(1, maxPositives - explicitCount - contextualCount);
+  const siblingNegativeCount =
+    hasSiblingSkills && maxNegatives > 0 ? Math.max(1, Math.round(maxNegatives * 0.4)) : 0;
+  const adjacentNegativeCount = Math.max(
+    1,
+    maxNegatives - siblingNegativeCount - Math.max(1, Math.round(maxNegatives * 0.2)),
+  );
+  const unrelatedNegativeCount = Math.max(
+    1,
+    maxNegatives - siblingNegativeCount - adjacentNegativeCount,
+  );
+  return {
+    explicitCount,
+    implicitCount,
+    contextualCount,
+    siblingNegativeCount,
+    adjacentNegativeCount,
+    unrelatedNegativeCount,
+  };
+}
+function normalizeEvalQuery(query: string): string {
+  return query.trim().toLowerCase().replace(/\s+/g, " ");
+}
+function dedupeEvalEntries(entries: EvalEntry[]): EvalEntry[] {
+  const seen = new Set<string>();
+  const deduped: EvalEntry[] = [];
+  for (const entry of entries) {
+    const key = `${entry.should_trigger ? "p" : "n"}:${normalizeEvalQuery(entry.query)}`;
+    if (seen.has(key)) continue;
+    seen.add(key);
+    deduped.push(entry);
+  }
+  return deduped;
+}
+function takeEntries(entries: EvalEntry[], count: number): EvalEntry[] {
+  if (count <= 0) return [];
+  return entries.slice(0, count);
+}
+export function selectBalancedEvalEntries(
+  entries: EvalEntry[],
+  maxPositives: number,
+  maxNegatives: number,
+  siblingSkills: string[] | boolean,
+): EvalEntry[] {
+  const normalizedSiblingSkills = Array.isArray(siblingSkills)
+    ? siblingSkills.map((skill) => skill.trim().toLowerCase()).filter(Boolean)
+    : [];
+  const hasSiblingSkills = normalizedSiblingSkills.length > 0;
+  const targets = buildPromptFamilyTargets(maxPositives, maxNegatives, hasSiblingSkills);
+  const positives = entries.filter((entry) => entry.should_trigger);
+  const negatives = entries.filter((entry) => !entry.should_trigger);
+  const explicit = positives.filter((entry) => entry.invocation_type === "explicit");
+  const implicit = positives.filter((entry) => entry.invocation_type === "implicit");
+  const contextual = positives.filter((entry) => entry.invocation_type === "contextual");
+  const remainingPositive = positives.filter(
+    (entry) => !["explicit", "implicit", "contextual"].includes(entry.invocation_type ?? ""),
+  );
+  const selectedPositives = [
+    ...takeEntries(explicit, targets.explicitCount),
+    ...takeEntries(implicit, targets.implicitCount),
+    ...takeEntries(contextual, targets.contextualCount),
+  ];
+  const selectedPositiveKeys = new Set(
+    selectedPositives.map((entry) => normalizeEvalQuery(entry.query)),
+  );
+  for (const entry of [...positives, ...remainingPositive]) {
+    if (selectedPositives.length >= maxPositives) break;
+    const key = normalizeEvalQuery(entry.query);
+    if (selectedPositiveKeys.has(key)) continue;
+    selectedPositiveKeys.add(key);
+    selectedPositives.push(entry);
+  }
+  const siblingMentions = hasSiblingSkills
+    ? negatives.filter((entry) => {
+        const normalizedQuery = entry.query.toLowerCase();
+        return normalizedSiblingSkills.some((skill) => normalizedQuery.includes(skill));
+      })
+    : siblingSkills === true
+      ? negatives.filter((entry) =>
+          /(^|[\s/$-])(sc-[a-z0-9-]+|mentor cli|State Change mentor CLI|resource\s+\d+|mental model)/i.test(
+            entry.query,
+          ),
+        )
+      : [];
+  const nonSiblingNegatives = negatives.filter((entry) => !siblingMentions.includes(entry));
+  const selectedNegatives = [
+    ...takeEntries(siblingMentions, targets.siblingNegativeCount),
+    ...takeEntries(
+      nonSiblingNegatives,
+      maxNegatives - Math.min(targets.siblingNegativeCount, siblingMentions.length),
+    ),
+  ];
+  const selectedNegativeKeys = new Set(
+    selectedNegatives.map((entry) => normalizeEvalQuery(entry.query)),
+  );
+  for (const entry of negatives) {
+    if (selectedNegatives.length >= maxNegatives) break;
+    const key = normalizeEvalQuery(entry.query);
+    if (selectedNegativeKeys.has(key)) continue;
+    selectedNegativeKeys.add(key);
+    selectedNegatives.push(entry);
+  }
+  return [...selectedPositives.slice(0, maxPositives), ...selectedNegatives.slice(0, maxNegatives)];
+}
 // ---------------------------------------------------------------------------
 // Prompt building
 // ---------------------------------------------------------------------------
@@ -37,21 +213,38 @@ export function buildSyntheticPrompt(
   skillName: string,
   maxPositives: number,
   maxNegatives: number,
-  realExamples?: { positive: string[]; negative: string[] },
+  realExamples?: SyntheticPromptRealExamples,
+  siblingSkills: string[] = [],
 ): { system: string; user: string } {
+  const {
+    explicitCount,
+    implicitCount,
+    contextualCount,
+    siblingNegativeCount,
+    adjacentNegativeCount,
+    unrelatedNegativeCount,
+  } = buildPromptFamilyTargets(maxPositives, maxNegatives, siblingSkills.length > 0);
   const system = `You are generating test queries for a coding agent skill. Given the skill description below, generate realistic user queries.
+Your job is to create a SMALL, TARGETED benchmark for cold-start routing quality.
 For POSITIVE queries (should trigger this skill):
-- Generate a mix of:
+- Generate a balanced mix of:
   - Explicit: directly names the skill or uses $${skillName} syntax
   - Implicit: describes the task without naming the skill
-  - Contextual: natural language with domain context, proper nouns, dates, filenames
-- Vary phrasing, formality, and specificity
+  - Contextual: realistic natural language with domain context, proper nouns, filenames, or setup noise
+- Avoid merely paraphrasing bullet points from the skill
+- Prefer realistic user phrasing over polished product copy
+- Include at least a few prompts that test the edge of the skill's scope, not just the obvious center
 For NEGATIVE queries (should NOT trigger this skill):
-- Queries that are topically adjacent but wrong intent
-- Queries for different skills that share keywords
-- Generic queries unrelated to this skill
+- Include hard negative controls:
+  - sibling-skill confusion cases
+  - topically adjacent but wrong-intent cases
+  - clearly unrelated cases
+- Make the hard negatives plausible, not cartoonishly unrelated
+- If a query belongs to another installed skill, make that obvious from the task itself
 Output as JSON array with no surrounding text:
 [{"query": "...", "should_trigger": true, "invocation_type": "explicit|implicit|contextual|negative"}]`;
@@ -61,7 +254,19 @@ Output as JSON array with no surrounding text:
 Skill content:
 ${skillContent}
-Generate exactly ${maxPositives} positive queries (should_trigger: true) and ${maxNegatives} negative queries (should_trigger: false). Return ONLY the JSON array.`;
+Generate exactly ${maxPositives} positive queries (should_trigger: true) and ${maxNegatives} negative queries (should_trigger: false).
+Required positive mix:
+- ${explicitCount} explicit
+- ${implicitCount} implicit
+- ${contextualCount} contextual
+Required negative mix:
+- ${siblingNegativeCount} sibling-skill confusion cases
+- ${adjacentNegativeCount} adjacent but wrong-intent cases
+- ${unrelatedNegativeCount} clearly unrelated cases
+Return ONLY the JSON array.`;
   if (realExamples && (realExamples.positive.length > 0 || realExamples.negative.length > 0)) {
     const parts: string[] = ["\n\nReal user queries for style and phrasing reference:"];
@@ -77,6 +282,61 @@ Generate exactly ${maxPositives} positive queries (should_trigger: true) and ${m
     user += parts.join("\n");
   }
+  if (siblingSkills.length > 0) {
+    user += `\n\nNearby installed skills to use for boundary-setting hard negatives:\n${siblingSkills
+      .map((skill) => `- ${skill}`)
+      .join(
+        "\n",
+      )}\n\nAt least ${siblingNegativeCount} negative queries should clearly belong to one of these sibling skills instead of ${skillName}.`;
+  }
+  return { system, user };
+}
+export function buildSyntheticRefinementPrompt(
+  skillContent: string,
+  skillName: string,
+  candidates: EvalEntry[],
+  maxPositives: number,
+  maxNegatives: number,
+  siblingSkills: string[] = [],
+): { system: string; user: string } {
+  const targets = buildPromptFamilyTargets(maxPositives, maxNegatives, siblingSkills.length > 0);
+  const system = `You are refining a cold-start eval benchmark for a coding agent skill.
+Your job is to critique and prune a candidate pool into a SMALL, SHARP benchmark.
+For each candidate, reason using binary questions:
+- Is this realistic user phrasing?
+- Is this more than a trivial paraphrase of the skill bullets?
+- Does this clearly test in-scope behavior, or clearly test a boundary?
+- For negatives: does it clearly belong elsewhere or represent a plausible wrong-intent adjacent request?
+- Is it sufficiently distinct from the other selected prompts?
+Return ONLY a JSON array with the final benchmark.`;
+  const user = `Skill name: ${skillName}
+Skill content:
+${skillContent}
+Target final benchmark:
+- ${maxPositives} positives
+- ${maxNegatives} negatives
+- Positive mix: ${targets.explicitCount} explicit, ${targets.implicitCount} implicit, ${targets.contextualCount} contextual
+- Negative mix: ${targets.siblingNegativeCount} sibling-skill confusion, ${targets.adjacentNegativeCount} adjacent wrong-intent, ${targets.unrelatedNegativeCount} unrelated
+${siblingSkills.length > 0 ? `Sibling skills for hard-negative boundaries:\n${siblingSkills.map((skill) => `- ${skill}`).join("\n")}\n` : ""}
+Candidate pool:
+${JSON.stringify(candidates, null, 2)}
+Instructions:
+- Remove duplicates and near-duplicates
+- Prefer prompts that test trigger boundaries, not just center-of-mass obvious usage
+- Keep sibling-skill negatives if they are strong boundary tests
+- Keep the final set compact, diverse, and realistic
+- Return ONLY the final JSON array`;
   return { system, user };
 }
@@ -172,8 +432,10 @@ export async function generateSyntheticEvals(
 ): Promise<EvalEntry[]> {
   const maxPositives = options.maxPositives ?? 15;
   const maxNegatives = options.maxNegatives ?? 10;
+  const oversampleFactor = 2;
   const skillContent = readFileSync(skillPath, "utf-8");
+  const siblingSkills = inferSiblingSkills(skillName);
   // Load real query examples from the database for few-shot style guidance.
   // Uses dynamic imports since SQLite may not be available in all contexts.
@@ -214,11 +476,36 @@ export async function generateSyntheticEvals(
   const { system, user } = buildSyntheticPrompt(
     skillContent,
     skillName,
-    maxPositives,
-    maxNegatives,
+    maxPositives * oversampleFactor,
+    maxNegatives * oversampleFactor,
     realExamples,
+    siblingSkills,
   );
   const raw = await callLlm(system, user, agent, options.modelFlag);
-  return parseSyntheticResponse(raw, skillName);
+  const firstPass = dedupeEvalEntries(parseSyntheticResponse(raw, skillName));
+  try {
+    const refinement = buildSyntheticRefinementPrompt(
+      skillContent,
+      skillName,
+      firstPass,
+      maxPositives,
+      maxNegatives,
+      siblingSkills,
+    );
+    const refinedRaw = await callLlm(refinement.system, refinement.user, agent, options.modelFlag);
+    const refined = dedupeEvalEntries(parseSyntheticResponse(refinedRaw, skillName));
+    const selected = selectBalancedEvalEntries(refined, maxPositives, maxNegatives, siblingSkills);
+    if (
+      selected.filter((entry) => entry.should_trigger).length >= maxPositives &&
+      selected.filter((entry) => !entry.should_trigger).length >= maxNegatives
+    ) {
+      return selected;
+    }
+  } catch {
+    // fall through to first-pass selection
+  }
+  return selectBalancedEvalEntries(firstPass, maxPositives, maxNegatives, siblingSkills);
 }

package/cli/selftune/export.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
  * Export SQLite data to JSONL format.
- * Replaces the removed JSONL write path -- use this when you need
- * JSONL files for debugging, the contribute workflow, or external tools.
+ * Use this only when you explicitly need portable/debuggable JSONL snapshots
+ * for recovery, the contribute workflow, or external tools.
  */
 import { mkdirSync, writeFileSync } from "node:fs";
 import { join } from "node:path";

package/cli/selftune/index.ts CHANGED Viewed

@@ -6,7 +6,7 @@
  *   selftune ingest <agent>     — Ingest agent sessions (claude, codex, opencode, openclaw, wrap-codex)
  *   selftune grade [mode]       — Grade skill sessions (auto, baseline)
  *   selftune evolve [target]    — Evolve skill descriptions (body, rollback)
- *   selftune eval <action>      — Evaluation tools (generate, unit-test, import, composability)
+ *   selftune eval <action>      — Evaluation tools (generate, unit-test, import, composability, family-overlap)
  *   selftune sync               — Sync source-truth telemetry across supported agents
  *   selftune orchestrate        — Run autonomous core loop (sync → status → evolve → watch)
  *   selftune init               — Initialize agent identity and config
@@ -19,11 +19,14 @@
  *   selftune cron               — Scheduling & automation (setup, list, remove)
  *   selftune badge              — Generate skill health badges for READMEs
  *   selftune contribute         — Export anonymized skill data for community
+ *   selftune contributions      — Manage creator-directed sharing preferences
+ *   selftune creator-contributions — Manage creator-side contribution configs
  *   selftune workflows          — Discover and manage multi-skill workflows
  *   selftune quickstart         — Guided onboarding: init, ingest, status, and suggestions
  *   selftune repair-skill-usage — Rebuild trustworthy skill usage from transcripts
- *   selftune export             — Export SQLite data to JSONL files
+ *   selftune export             — Export SQLite data to JSONL snapshots
  *   selftune export-canonical   — Export canonical telemetry for downstream ingestion
+ *   selftune recover            — Recover SQLite from legacy/exported JSONL
  *   selftune telemetry          — Manage anonymous usage analytics (status, enable, disable)
  *   selftune alpha <subcommand> — Alpha program management (upload)
  *   selftune hook <name>        — Run a hook by name (prompt-log, session-stop, etc.)
@@ -46,7 +49,7 @@ Commands:
   ingest <agent>     Ingest agent sessions (claude, codex, opencode, openclaw, wrap-codex)
   grade [mode]       Grade skill sessions (auto, baseline)
   evolve [target]    Evolve skill descriptions (body, rollback)
-  eval <action>      Evaluation tools (generate, unit-test, import, composability)
+  eval <action>      Evaluation tools (generate, unit-test, import, composability, family-overlap)
   sync               Sync source-truth telemetry across supported agents
   orchestrate        Run autonomous core loop (sync → status → evolve → watch)
   init               Initialize agent identity and config
@@ -59,11 +62,14 @@ Commands:
   cron               Scheduling & automation (setup, list, remove)
   badge              Generate skill health badges for READMEs
   contribute         Export anonymized skill data for community
+  contributions      Manage creator-directed sharing preferences
+  creator-contributions Manage creator-side contribution configs
   workflows          Discover and manage multi-skill workflows
   quickstart         Guided onboarding: init, ingest, status, and suggestions
   repair-skill-usage Rebuild trustworthy skill usage from transcripts
-  export             Export SQLite data to JSONL files
+  export             Export SQLite data to JSONL snapshots
   export-canonical   Export canonical telemetry for downstream ingestion
+  recover            Recover SQLite from legacy/exported JSONL
   alpha <subcommand> Alpha program management (upload)
   telemetry          Manage anonymous usage analytics (status, enable, disable)
   hook <name>        Run a hook by name (prompt-log, session-stop, etc.)
@@ -254,6 +260,7 @@ Actions:
   unit-test      Run or generate skill unit tests
   import         Import SkillsBench task corpus as eval entries
   composability  Analyze skill co-occurrence conflicts
+  family-overlap Detect sibling-skill overlap and consolidation pressure
 Run 'selftune eval <action> --help' for action-specific options.`);
       process.exit(0);
@@ -341,6 +348,17 @@ Run 'selftune eval <action> --help' for action-specific options.`);
         console.log(JSON.stringify(report, null, 2));
         break;
       }
+      case "family-overlap": {
+        if (process.argv[2] === "--help" || process.argv[2] === "-h") {
+          console.log(
+            "selftune eval family-overlap --prefix <family-> | --skills <a,b,c> [--parent-skill <name>] [--min-overlap 0.3] [--min-shared 2]",
+          );
+          process.exit(0);
+        }
+        const { cliMain } = await import("./eval/family-overlap.js");
+        await cliMain();
+        break;
+      }
       default:
         throw new CLIError(
           `Unknown eval action: ${sub}`,
@@ -368,6 +386,16 @@ Run 'selftune eval <action> --help' for action-specific options.`);
     await cliMain();
     break;
   }
+  case "contributions": {
+    const { cliMain } = await import("./contributions.js");
+    await cliMain();
+    break;
+  }
+  case "creator-contributions": {
+    const { cliMain } = await import("./creator-contributions.js");
+    await cliMain();
+    break;
+  }
   case "watch": {
     const { cliMain } = await import("./monitoring/watch.js");
     await cliMain();
@@ -527,11 +555,14 @@ Run 'selftune cron <subcommand> --help' for subcommand-specific options.`);
       throw new CLIError(`Invalid arguments: ${message}`, "INVALID_FLAG", "selftune export --help");
     }
     if (values.help) {
-      console.log(`selftune export — Export SQLite data to JSONL files
+      console.log(`selftune export — Export SQLite data to JSONL snapshots
 Usage:
   selftune export [tables...] [options]
+Use this for portability, debugging, contribute flows, or explicit recovery
+snapshots. Normal runtime reads and writes stay in SQLite.
 Tables (default: all):
   telemetry    Session telemetry records
   skills       Skill usage records
@@ -570,6 +601,11 @@ Options:
     cliMain();
     break;
   }
+  case "recover": {
+    const { cliMain } = await import("./recover.js");
+    cliMain();
+    break;
+  }
   case "orchestrate": {
     const { cliMain } = await import("./orchestrate.js");
     await cliMain();

package/cli/selftune/ingestors/codex-rollout.ts CHANGED Viewed

@@ -52,9 +52,9 @@ import type {
 import { handleCLIError } from "../utils/cli-error.js";
 import { loadMarker, saveMarker } from "../utils/jsonl.js";
 import { extractActionableQueryText } from "../utils/query-filter.js";
+import { getInternalPromptTargetSkill, isWrappedNonUserPart } from "../utils/skill-detection.js";
 import {
   classifySkillPath,
-  containsWholeSkillMention,
   extractExplicitSkillMentions,
   extractSkillNamesFromInstructions,
   extractSkillNamesFromPathReferences,
@@ -228,6 +228,15 @@ export function parseRolloutFile(path: string, skillNames: Set<string>): ParsedR
   let observedCwd: string | undefined;
   const sessionSkillNames = new Set(skillNames);
   let hasActionablePrompt = false;
+  const markSkillTriggered = (skillName: string, evidence: "explicit" | "inferred"): void => {
+    if (!skillsTriggered.includes(skillName)) {
+      skillsTriggered.push(skillName);
+    }
+    const existingEvidence = skillEvidence.get(skillName);
+    if (existingEvidence !== "explicit") {
+      skillEvidence.set(skillName, evidence);
+    }
+  };
   const rememberSessionSkillNames = (text: unknown): void => {
     if (typeof text !== "string" || !text) return;
     for (const skillName of extractSkillNamesFromInstructions(text, sessionSkillNames)) {
@@ -240,33 +249,23 @@ export function parseRolloutFile(path: string, skillNames: Set<string>): ParsedR
       sessionSkillNames.add(skillName);
     }
   };
-  const detectTriggeredSkills = (text: unknown): void => {
-    if (typeof text !== "string" || !text) return;
-    for (const skillName of sessionSkillNames) {
-      if (containsWholeSkillMention(text, skillName) && !skillsTriggered.includes(skillName)) {
-        skillsTriggered.push(skillName);
-      }
-      if (containsWholeSkillMention(text, skillName) && !skillEvidence.has(skillName)) {
-        skillEvidence.set(skillName, "inferred");
-      }
-    }
-  };
   const detectExplicitPromptSkillMentions = (text: unknown): void => {
     if (typeof text !== "string" || !text) return;
-    for (const skillName of extractExplicitSkillMentions(text, sessionSkillNames)) {
-      if (!skillsTriggered.includes(skillName)) {
-        skillsTriggered.push(skillName);
-      }
-      skillEvidence.set(skillName, "explicit");
+    if (isWrappedNonUserPart(text)) return;
+    const actionableText = extractActionableQueryText(text) ?? text;
+    const internalTargetSkill = getInternalPromptTargetSkill(actionableText, sessionSkillNames);
+    if (internalTargetSkill) {
+      markSkillTriggered(internalTargetSkill, "explicit");
+      return;
+    }
+    for (const skillName of extractExplicitSkillMentions(actionableText, sessionSkillNames)) {
+      markSkillTriggered(skillName, "explicit");
     }
   };
   const detectExplicitSkillReads = (text: unknown): void => {
     if (typeof text !== "string" || !text) return;
     for (const skillName of extractSkillNamesFromPathReferences(text, sessionSkillNames)) {
-      if (!skillsTriggered.includes(skillName)) {
-        skillsTriggered.push(skillName);
-      }
-      skillEvidence.set(skillName, "explicit");
+      markSkillTriggered(skillName, "explicit");
     }
   };
   const rememberPromptCandidate = (value: unknown): void => {
@@ -352,27 +351,26 @@ export function parseRolloutFile(path: string, skillNames: Set<string>): ParsedR
       if (itemType === "function_call") {
         const fnName = (payload.name as string) ?? "function_call";
         toolCalls[fnName] = (toolCalls[fnName] ?? 0) + 1;
-        // Check for skill mentions in function arguments
+        // Only path-based skill references count as triggers here.
         detectExplicitSkillReads(payload.arguments);
-        detectTriggeredSkills(payload.arguments);
       } else if (itemType === "agent_reasoning") {
         toolCalls.reasoning = (toolCalls.reasoning ?? 0) + 1;
-        detectTriggeredSkills(payload.text);
       } else if (itemType === "message") {
-        const content = Array.isArray(payload.content)
+        const parts = Array.isArray(payload.content)
           ? payload.content
               .map((part) =>
                 typeof part === "object" && part
                   ? (((part as Record<string, unknown>).text as string | undefined) ?? "")
                   : "",
               )
-              .join("\n")
-          : "";
+              .filter(Boolean)
+          : [];
+        const content = parts.join("\n");
         rememberSessionSkillNames(content);
-        if ((payload.role as string) === "assistant") {
-          detectTriggeredSkills(content);
-        } else if ((payload.role as string) === "user") {
-          detectExplicitPromptSkillMentions(content);
+        if ((payload.role as string) === "user") {
+          for (const part of parts) {
+            detectExplicitPromptSkillMentions(part);
+          }
         }
       }
     } else if (etype === "turn.started") {
@@ -410,10 +408,8 @@ export function parseRolloutFile(path: string, skillNames: Set<string>): ParsedR
       }
       // Detect skill names in text content on completed events
-      const textContent = ((item.text as string) ?? "") + ((item.command as string) ?? "");
-      detectExplicitSkillReads(textContent);
-      if (etype === "item.completed") {
-        detectTriggeredSkills(textContent);
+      if (itemType === "command_execution") {
+        detectExplicitSkillReads(item.command);
       }
     } else if (etype === "error") {
       errors += 1;