selftune 0.2.18 → 0.2.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/README.md +9 -4
  2. package/apps/local-dashboard/dist/assets/index-DnhnXQm6.js +60 -0
  3. package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +1 -0
  4. package/apps/local-dashboard/dist/assets/vendor-table-BIiI3YhS.js +1 -0
  5. package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +12 -0
  6. package/apps/local-dashboard/dist/index.html +5 -5
  7. package/cli/selftune/alpha-upload/stage-canonical.ts +7 -6
  8. package/cli/selftune/constants.ts +10 -0
  9. package/cli/selftune/contribute/contribute.ts +30 -2
  10. package/cli/selftune/contribution-config.ts +249 -0
  11. package/cli/selftune/contribution-relay.ts +177 -0
  12. package/cli/selftune/contribution-signals.ts +219 -0
  13. package/cli/selftune/contribution-staging.ts +147 -0
  14. package/cli/selftune/contributions.ts +532 -0
  15. package/cli/selftune/creator-contributions.ts +333 -0
  16. package/cli/selftune/dashboard-contract.ts +205 -1
  17. package/cli/selftune/dashboard-server.ts +45 -11
  18. package/cli/selftune/eval/family-overlap.ts +395 -0
  19. package/cli/selftune/eval/hooks-to-evals.ts +182 -28
  20. package/cli/selftune/eval/synthetic-evals.ts +298 -11
  21. package/cli/selftune/export.ts +2 -2
  22. package/cli/selftune/index.ts +41 -5
  23. package/cli/selftune/ingestors/codex-rollout.ts +31 -35
  24. package/cli/selftune/ingestors/codex-wrapper.ts +32 -24
  25. package/cli/selftune/localdb/db.ts +2 -2
  26. package/cli/selftune/localdb/queries.ts +701 -30
  27. package/cli/selftune/localdb/schema.ts +20 -0
  28. package/cli/selftune/recover.ts +153 -0
  29. package/cli/selftune/repair/skill-usage.ts +363 -4
  30. package/cli/selftune/routes/actions.ts +35 -1
  31. package/cli/selftune/routes/analytics.ts +14 -0
  32. package/cli/selftune/routes/index.ts +1 -0
  33. package/cli/selftune/routes/overview.ts +112 -4
  34. package/cli/selftune/routes/skill-report.ts +569 -10
  35. package/cli/selftune/status.ts +81 -2
  36. package/cli/selftune/sync.ts +56 -2
  37. package/cli/selftune/trust-model.ts +66 -0
  38. package/cli/selftune/types.ts +49 -0
  39. package/cli/selftune/utils/skill-detection.ts +43 -0
  40. package/cli/selftune/watchlist.ts +65 -0
  41. package/package.json +1 -1
  42. package/packages/ui/src/components/ActivityTimeline.tsx +165 -150
  43. package/packages/ui/src/components/EvidenceViewer.tsx +335 -144
  44. package/packages/ui/src/components/EvolutionTimeline.tsx +58 -28
  45. package/packages/ui/src/components/OrchestrateRunsPanel.tsx +33 -16
  46. package/packages/ui/src/components/RecentActivityFeed.tsx +72 -41
  47. package/packages/ui/src/components/section-cards.tsx +12 -9
  48. package/packages/ui/src/primitives/card.tsx +1 -1
  49. package/skill/SKILL.md +11 -1
  50. package/skill/Workflows/AlphaUpload.md +4 -0
  51. package/skill/Workflows/Composability.md +64 -0
  52. package/skill/Workflows/Contribute.md +6 -3
  53. package/skill/Workflows/Contributions.md +97 -0
  54. package/skill/Workflows/CreatorContributions.md +74 -0
  55. package/skill/Workflows/Dashboard.md +31 -0
  56. package/skill/Workflows/Evals.md +57 -8
  57. package/skill/Workflows/Ingest.md +7 -0
  58. package/skill/Workflows/Initialize.md +20 -1
  59. package/skill/Workflows/Recover.md +84 -0
  60. package/skill/Workflows/RepairSkillUsage.md +12 -4
  61. package/skill/Workflows/Sync.md +18 -12
  62. package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +0 -2
  63. package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +0 -16
  64. package/apps/local-dashboard/dist/assets/vendor-table-pHbDxq36.js +0 -8
  65. package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +0 -12
@@ -10,6 +10,7 @@ import { readFileSync } from "node:fs";
10
10
 
11
11
  import type { EvalEntry, InvocationType } from "../types.js";
12
12
  import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
13
+ import { findInstalledSkillNames } from "../utils/skill-discovery.js";
13
14
  import { classifyInvocation } from "./hooks-to-evals.js";
14
15
 
15
16
  // ---------------------------------------------------------------------------
@@ -28,6 +29,181 @@ interface RawSyntheticEntry {
28
29
  invocation_type?: string;
29
30
  }
30
31
 
32
+ interface SyntheticPromptRealExamples {
33
+ positive: string[];
34
+ negative: string[];
35
+ }
36
+
37
+ interface PromptFamilyTargets {
38
+ explicitCount: number;
39
+ implicitCount: number;
40
+ contextualCount: number;
41
+ siblingNegativeCount: number;
42
+ adjacentNegativeCount: number;
43
+ unrelatedNegativeCount: number;
44
+ }
45
+
46
+ function getSyntheticSkillSearchDirs(): string[] {
47
+ const cwd = process.cwd();
48
+ const homeDir = process.env.HOME ?? "";
49
+ const codexHome = process.env.CODEX_HOME ?? `${homeDir}/.codex`;
50
+ return [
51
+ `${cwd}/.agents/skills`,
52
+ `${cwd}/.claude/skills`,
53
+ `${homeDir}/.agents/skills`,
54
+ `${homeDir}/.claude/skills`,
55
+ `${codexHome}/skills`,
56
+ ];
57
+ }
58
+
59
+ function inferSiblingSkills(
60
+ skillName: string,
61
+ searchDirs: string[] = getSyntheticSkillSearchDirs(),
62
+ ): string[] {
63
+ const normalized = skillName.trim().toLowerCase();
64
+ if (!normalized) return [];
65
+
66
+ const familyPrefix = normalized.includes("-") ? normalized.split("-")[0] : "";
67
+ const installedNames = [...findInstalledSkillNames(searchDirs)];
68
+
69
+ const sameFamily = installedNames
70
+ .filter((name) => name.toLowerCase() !== normalized)
71
+ .filter((name) => familyPrefix && name.toLowerCase().startsWith(`${familyPrefix}-`))
72
+ .sort((a, b) => a.localeCompare(b));
73
+
74
+ if (sameFamily.length >= 5) return sameFamily.slice(0, 5);
75
+
76
+ const adjacent = installedNames
77
+ .filter((name) => name.toLowerCase() !== normalized)
78
+ .filter((name) => !sameFamily.includes(name))
79
+ .sort((a, b) => a.localeCompare(b));
80
+
81
+ return [...sameFamily, ...adjacent].slice(0, 5);
82
+ }
83
+
84
+ function buildPromptFamilyTargets(
85
+ maxPositives: number,
86
+ maxNegatives: number,
87
+ hasSiblingSkills: boolean,
88
+ ): PromptFamilyTargets {
89
+ const explicitCount = Math.max(1, Math.round(maxPositives * 0.2));
90
+ const contextualCount = Math.max(1, Math.round(maxPositives * 0.4));
91
+ const implicitCount = Math.max(1, maxPositives - explicitCount - contextualCount);
92
+
93
+ const siblingNegativeCount =
94
+ hasSiblingSkills && maxNegatives > 0 ? Math.max(1, Math.round(maxNegatives * 0.4)) : 0;
95
+ const adjacentNegativeCount = Math.max(
96
+ 1,
97
+ maxNegatives - siblingNegativeCount - Math.max(1, Math.round(maxNegatives * 0.2)),
98
+ );
99
+ const unrelatedNegativeCount = Math.max(
100
+ 1,
101
+ maxNegatives - siblingNegativeCount - adjacentNegativeCount,
102
+ );
103
+
104
+ return {
105
+ explicitCount,
106
+ implicitCount,
107
+ contextualCount,
108
+ siblingNegativeCount,
109
+ adjacentNegativeCount,
110
+ unrelatedNegativeCount,
111
+ };
112
+ }
113
+
114
+ function normalizeEvalQuery(query: string): string {
115
+ return query.trim().toLowerCase().replace(/\s+/g, " ");
116
+ }
117
+
118
+ function dedupeEvalEntries(entries: EvalEntry[]): EvalEntry[] {
119
+ const seen = new Set<string>();
120
+ const deduped: EvalEntry[] = [];
121
+ for (const entry of entries) {
122
+ const key = `${entry.should_trigger ? "p" : "n"}:${normalizeEvalQuery(entry.query)}`;
123
+ if (seen.has(key)) continue;
124
+ seen.add(key);
125
+ deduped.push(entry);
126
+ }
127
+ return deduped;
128
+ }
129
+
130
+ function takeEntries(entries: EvalEntry[], count: number): EvalEntry[] {
131
+ if (count <= 0) return [];
132
+ return entries.slice(0, count);
133
+ }
134
+
135
+ export function selectBalancedEvalEntries(
136
+ entries: EvalEntry[],
137
+ maxPositives: number,
138
+ maxNegatives: number,
139
+ siblingSkills: string[] | boolean,
140
+ ): EvalEntry[] {
141
+ const normalizedSiblingSkills = Array.isArray(siblingSkills)
142
+ ? siblingSkills.map((skill) => skill.trim().toLowerCase()).filter(Boolean)
143
+ : [];
144
+ const hasSiblingSkills = normalizedSiblingSkills.length > 0;
145
+ const targets = buildPromptFamilyTargets(maxPositives, maxNegatives, hasSiblingSkills);
146
+ const positives = entries.filter((entry) => entry.should_trigger);
147
+ const negatives = entries.filter((entry) => !entry.should_trigger);
148
+
149
+ const explicit = positives.filter((entry) => entry.invocation_type === "explicit");
150
+ const implicit = positives.filter((entry) => entry.invocation_type === "implicit");
151
+ const contextual = positives.filter((entry) => entry.invocation_type === "contextual");
152
+ const remainingPositive = positives.filter(
153
+ (entry) => !["explicit", "implicit", "contextual"].includes(entry.invocation_type ?? ""),
154
+ );
155
+
156
+ const selectedPositives = [
157
+ ...takeEntries(explicit, targets.explicitCount),
158
+ ...takeEntries(implicit, targets.implicitCount),
159
+ ...takeEntries(contextual, targets.contextualCount),
160
+ ];
161
+ const selectedPositiveKeys = new Set(
162
+ selectedPositives.map((entry) => normalizeEvalQuery(entry.query)),
163
+ );
164
+ for (const entry of [...positives, ...remainingPositive]) {
165
+ if (selectedPositives.length >= maxPositives) break;
166
+ const key = normalizeEvalQuery(entry.query);
167
+ if (selectedPositiveKeys.has(key)) continue;
168
+ selectedPositiveKeys.add(key);
169
+ selectedPositives.push(entry);
170
+ }
171
+
172
+ const siblingMentions = hasSiblingSkills
173
+ ? negatives.filter((entry) => {
174
+ const normalizedQuery = entry.query.toLowerCase();
175
+ return normalizedSiblingSkills.some((skill) => normalizedQuery.includes(skill));
176
+ })
177
+ : siblingSkills === true
178
+ ? negatives.filter((entry) =>
179
+ /(^|[\s/$-])(sc-[a-z0-9-]+|mentor cli|State Change mentor CLI|resource\s+\d+|mental model)/i.test(
180
+ entry.query,
181
+ ),
182
+ )
183
+ : [];
184
+ const nonSiblingNegatives = negatives.filter((entry) => !siblingMentions.includes(entry));
185
+ const selectedNegatives = [
186
+ ...takeEntries(siblingMentions, targets.siblingNegativeCount),
187
+ ...takeEntries(
188
+ nonSiblingNegatives,
189
+ maxNegatives - Math.min(targets.siblingNegativeCount, siblingMentions.length),
190
+ ),
191
+ ];
192
+
193
+ const selectedNegativeKeys = new Set(
194
+ selectedNegatives.map((entry) => normalizeEvalQuery(entry.query)),
195
+ );
196
+ for (const entry of negatives) {
197
+ if (selectedNegatives.length >= maxNegatives) break;
198
+ const key = normalizeEvalQuery(entry.query);
199
+ if (selectedNegativeKeys.has(key)) continue;
200
+ selectedNegativeKeys.add(key);
201
+ selectedNegatives.push(entry);
202
+ }
203
+
204
+ return [...selectedPositives.slice(0, maxPositives), ...selectedNegatives.slice(0, maxNegatives)];
205
+ }
206
+
31
207
  // ---------------------------------------------------------------------------
32
208
  // Prompt building
33
209
  // ---------------------------------------------------------------------------
@@ -37,21 +213,38 @@ export function buildSyntheticPrompt(
37
213
  skillName: string,
38
214
  maxPositives: number,
39
215
  maxNegatives: number,
40
- realExamples?: { positive: string[]; negative: string[] },
216
+ realExamples?: SyntheticPromptRealExamples,
217
+ siblingSkills: string[] = [],
41
218
  ): { system: string; user: string } {
219
+ const {
220
+ explicitCount,
221
+ implicitCount,
222
+ contextualCount,
223
+ siblingNegativeCount,
224
+ adjacentNegativeCount,
225
+ unrelatedNegativeCount,
226
+ } = buildPromptFamilyTargets(maxPositives, maxNegatives, siblingSkills.length > 0);
227
+
42
228
  const system = `You are generating test queries for a coding agent skill. Given the skill description below, generate realistic user queries.
43
229
 
230
+ Your job is to create a SMALL, TARGETED benchmark for cold-start routing quality.
231
+
44
232
  For POSITIVE queries (should trigger this skill):
45
- - Generate a mix of:
233
+ - Generate a balanced mix of:
46
234
  - Explicit: directly names the skill or uses $${skillName} syntax
47
235
  - Implicit: describes the task without naming the skill
48
- - Contextual: natural language with domain context, proper nouns, dates, filenames
49
- - Vary phrasing, formality, and specificity
236
+ - Contextual: realistic natural language with domain context, proper nouns, filenames, or setup noise
237
+ - Avoid merely paraphrasing bullet points from the skill
238
+ - Prefer realistic user phrasing over polished product copy
239
+ - Include at least a few prompts that test the edge of the skill's scope, not just the obvious center
50
240
 
51
241
  For NEGATIVE queries (should NOT trigger this skill):
52
- - Queries that are topically adjacent but wrong intent
53
- - Queries for different skills that share keywords
54
- - Generic queries unrelated to this skill
242
+ - Include hard negative controls:
243
+ - sibling-skill confusion cases
244
+ - topically adjacent but wrong-intent cases
245
+ - clearly unrelated cases
246
+ - Make the hard negatives plausible, not cartoonishly unrelated
247
+ - If a query belongs to another installed skill, make that obvious from the task itself
55
248
 
56
249
  Output as JSON array with no surrounding text:
57
250
  [{"query": "...", "should_trigger": true, "invocation_type": "explicit|implicit|contextual|negative"}]`;
@@ -61,7 +254,19 @@ Output as JSON array with no surrounding text:
61
254
  Skill content:
62
255
  ${skillContent}
63
256
 
64
- Generate exactly ${maxPositives} positive queries (should_trigger: true) and ${maxNegatives} negative queries (should_trigger: false). Return ONLY the JSON array.`;
257
+ Generate exactly ${maxPositives} positive queries (should_trigger: true) and ${maxNegatives} negative queries (should_trigger: false).
258
+
259
+ Required positive mix:
260
+ - ${explicitCount} explicit
261
+ - ${implicitCount} implicit
262
+ - ${contextualCount} contextual
263
+
264
+ Required negative mix:
265
+ - ${siblingNegativeCount} sibling-skill confusion cases
266
+ - ${adjacentNegativeCount} adjacent but wrong-intent cases
267
+ - ${unrelatedNegativeCount} clearly unrelated cases
268
+
269
+ Return ONLY the JSON array.`;
65
270
 
66
271
  if (realExamples && (realExamples.positive.length > 0 || realExamples.negative.length > 0)) {
67
272
  const parts: string[] = ["\n\nReal user queries for style and phrasing reference:"];
@@ -77,6 +282,61 @@ Generate exactly ${maxPositives} positive queries (should_trigger: true) and ${m
77
282
  user += parts.join("\n");
78
283
  }
79
284
 
285
+ if (siblingSkills.length > 0) {
286
+ user += `\n\nNearby installed skills to use for boundary-setting hard negatives:\n${siblingSkills
287
+ .map((skill) => `- ${skill}`)
288
+ .join(
289
+ "\n",
290
+ )}\n\nAt least ${siblingNegativeCount} negative queries should clearly belong to one of these sibling skills instead of ${skillName}.`;
291
+ }
292
+
293
+ return { system, user };
294
+ }
295
+
296
+ export function buildSyntheticRefinementPrompt(
297
+ skillContent: string,
298
+ skillName: string,
299
+ candidates: EvalEntry[],
300
+ maxPositives: number,
301
+ maxNegatives: number,
302
+ siblingSkills: string[] = [],
303
+ ): { system: string; user: string } {
304
+ const targets = buildPromptFamilyTargets(maxPositives, maxNegatives, siblingSkills.length > 0);
305
+ const system = `You are refining a cold-start eval benchmark for a coding agent skill.
306
+
307
+ Your job is to critique and prune a candidate pool into a SMALL, SHARP benchmark.
308
+
309
+ For each candidate, reason using binary questions:
310
+ - Is this realistic user phrasing?
311
+ - Is this more than a trivial paraphrase of the skill bullets?
312
+ - Does this clearly test in-scope behavior, or clearly test a boundary?
313
+ - For negatives: does it clearly belong elsewhere or represent a plausible wrong-intent adjacent request?
314
+ - Is it sufficiently distinct from the other selected prompts?
315
+
316
+ Return ONLY a JSON array with the final benchmark.`;
317
+
318
+ const user = `Skill name: ${skillName}
319
+
320
+ Skill content:
321
+ ${skillContent}
322
+
323
+ Target final benchmark:
324
+ - ${maxPositives} positives
325
+ - ${maxNegatives} negatives
326
+ - Positive mix: ${targets.explicitCount} explicit, ${targets.implicitCount} implicit, ${targets.contextualCount} contextual
327
+ - Negative mix: ${targets.siblingNegativeCount} sibling-skill confusion, ${targets.adjacentNegativeCount} adjacent wrong-intent, ${targets.unrelatedNegativeCount} unrelated
328
+
329
+ ${siblingSkills.length > 0 ? `Sibling skills for hard-negative boundaries:\n${siblingSkills.map((skill) => `- ${skill}`).join("\n")}\n` : ""}
330
+ Candidate pool:
331
+ ${JSON.stringify(candidates, null, 2)}
332
+
333
+ Instructions:
334
+ - Remove duplicates and near-duplicates
335
+ - Prefer prompts that test trigger boundaries, not just center-of-mass obvious usage
336
+ - Keep sibling-skill negatives if they are strong boundary tests
337
+ - Keep the final set compact, diverse, and realistic
338
+ - Return ONLY the final JSON array`;
339
+
80
340
  return { system, user };
81
341
  }
82
342
 
@@ -172,8 +432,10 @@ export async function generateSyntheticEvals(
172
432
  ): Promise<EvalEntry[]> {
173
433
  const maxPositives = options.maxPositives ?? 15;
174
434
  const maxNegatives = options.maxNegatives ?? 10;
435
+ const oversampleFactor = 2;
175
436
 
176
437
  const skillContent = readFileSync(skillPath, "utf-8");
438
+ const siblingSkills = inferSiblingSkills(skillName);
177
439
 
178
440
  // Load real query examples from the database for few-shot style guidance.
179
441
  // Uses dynamic imports since SQLite may not be available in all contexts.
@@ -214,11 +476,36 @@ export async function generateSyntheticEvals(
214
476
  const { system, user } = buildSyntheticPrompt(
215
477
  skillContent,
216
478
  skillName,
217
- maxPositives,
218
- maxNegatives,
479
+ maxPositives * oversampleFactor,
480
+ maxNegatives * oversampleFactor,
219
481
  realExamples,
482
+ siblingSkills,
220
483
  );
221
484
 
222
485
  const raw = await callLlm(system, user, agent, options.modelFlag);
223
- return parseSyntheticResponse(raw, skillName);
486
+ const firstPass = dedupeEvalEntries(parseSyntheticResponse(raw, skillName));
487
+
488
+ try {
489
+ const refinement = buildSyntheticRefinementPrompt(
490
+ skillContent,
491
+ skillName,
492
+ firstPass,
493
+ maxPositives,
494
+ maxNegatives,
495
+ siblingSkills,
496
+ );
497
+ const refinedRaw = await callLlm(refinement.system, refinement.user, agent, options.modelFlag);
498
+ const refined = dedupeEvalEntries(parseSyntheticResponse(refinedRaw, skillName));
499
+ const selected = selectBalancedEvalEntries(refined, maxPositives, maxNegatives, siblingSkills);
500
+ if (
501
+ selected.filter((entry) => entry.should_trigger).length >= maxPositives &&
502
+ selected.filter((entry) => !entry.should_trigger).length >= maxNegatives
503
+ ) {
504
+ return selected;
505
+ }
506
+ } catch {
507
+ // fall through to first-pass selection
508
+ }
509
+
510
+ return selectBalancedEvalEntries(firstPass, maxPositives, maxNegatives, siblingSkills);
224
511
  }
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * Export SQLite data to JSONL format.
3
- * Replaces the removed JSONL write path -- use this when you need
4
- * JSONL files for debugging, the contribute workflow, or external tools.
3
+ * Use this only when you explicitly need portable/debuggable JSONL snapshots
4
+ * for recovery, the contribute workflow, or external tools.
5
5
  */
6
6
  import { mkdirSync, writeFileSync } from "node:fs";
7
7
  import { join } from "node:path";
@@ -6,7 +6,7 @@
6
6
  * selftune ingest <agent> — Ingest agent sessions (claude, codex, opencode, openclaw, wrap-codex)
7
7
  * selftune grade [mode] — Grade skill sessions (auto, baseline)
8
8
  * selftune evolve [target] — Evolve skill descriptions (body, rollback)
9
- * selftune eval <action> — Evaluation tools (generate, unit-test, import, composability)
9
+ * selftune eval <action> — Evaluation tools (generate, unit-test, import, composability, family-overlap)
10
10
  * selftune sync — Sync source-truth telemetry across supported agents
11
11
  * selftune orchestrate — Run autonomous core loop (sync → status → evolve → watch)
12
12
  * selftune init — Initialize agent identity and config
@@ -19,11 +19,14 @@
19
19
  * selftune cron — Scheduling & automation (setup, list, remove)
20
20
  * selftune badge — Generate skill health badges for READMEs
21
21
  * selftune contribute — Export anonymized skill data for community
22
+ * selftune contributions — Manage creator-directed sharing preferences
23
+ * selftune creator-contributions — Manage creator-side contribution configs
22
24
  * selftune workflows — Discover and manage multi-skill workflows
23
25
  * selftune quickstart — Guided onboarding: init, ingest, status, and suggestions
24
26
  * selftune repair-skill-usage — Rebuild trustworthy skill usage from transcripts
25
- * selftune export — Export SQLite data to JSONL files
27
+ * selftune export — Export SQLite data to JSONL snapshots
26
28
  * selftune export-canonical — Export canonical telemetry for downstream ingestion
29
+ * selftune recover — Recover SQLite from legacy/exported JSONL
27
30
  * selftune telemetry — Manage anonymous usage analytics (status, enable, disable)
28
31
  * selftune alpha <subcommand> — Alpha program management (upload)
29
32
  * selftune hook <name> — Run a hook by name (prompt-log, session-stop, etc.)
@@ -46,7 +49,7 @@ Commands:
46
49
  ingest <agent> Ingest agent sessions (claude, codex, opencode, openclaw, wrap-codex)
47
50
  grade [mode] Grade skill sessions (auto, baseline)
48
51
  evolve [target] Evolve skill descriptions (body, rollback)
49
- eval <action> Evaluation tools (generate, unit-test, import, composability)
52
+ eval <action> Evaluation tools (generate, unit-test, import, composability, family-overlap)
50
53
  sync Sync source-truth telemetry across supported agents
51
54
  orchestrate Run autonomous core loop (sync → status → evolve → watch)
52
55
  init Initialize agent identity and config
@@ -59,11 +62,14 @@ Commands:
59
62
  cron Scheduling & automation (setup, list, remove)
60
63
  badge Generate skill health badges for READMEs
61
64
  contribute Export anonymized skill data for community
65
+ contributions Manage creator-directed sharing preferences
66
+ creator-contributions Manage creator-side contribution configs
62
67
  workflows Discover and manage multi-skill workflows
63
68
  quickstart Guided onboarding: init, ingest, status, and suggestions
64
69
  repair-skill-usage Rebuild trustworthy skill usage from transcripts
65
- export Export SQLite data to JSONL files
70
+ export Export SQLite data to JSONL snapshots
66
71
  export-canonical Export canonical telemetry for downstream ingestion
72
+ recover Recover SQLite from legacy/exported JSONL
67
73
  alpha <subcommand> Alpha program management (upload)
68
74
  telemetry Manage anonymous usage analytics (status, enable, disable)
69
75
  hook <name> Run a hook by name (prompt-log, session-stop, etc.)
@@ -254,6 +260,7 @@ Actions:
254
260
  unit-test Run or generate skill unit tests
255
261
  import Import SkillsBench task corpus as eval entries
256
262
  composability Analyze skill co-occurrence conflicts
263
+ family-overlap Detect sibling-skill overlap and consolidation pressure
257
264
 
258
265
  Run 'selftune eval <action> --help' for action-specific options.`);
259
266
  process.exit(0);
@@ -341,6 +348,17 @@ Run 'selftune eval <action> --help' for action-specific options.`);
341
348
  console.log(JSON.stringify(report, null, 2));
342
349
  break;
343
350
  }
351
+ case "family-overlap": {
352
+ if (process.argv[2] === "--help" || process.argv[2] === "-h") {
353
+ console.log(
354
+ "selftune eval family-overlap --prefix <family-> | --skills <a,b,c> [--parent-skill <name>] [--min-overlap 0.3] [--min-shared 2]",
355
+ );
356
+ process.exit(0);
357
+ }
358
+ const { cliMain } = await import("./eval/family-overlap.js");
359
+ await cliMain();
360
+ break;
361
+ }
344
362
  default:
345
363
  throw new CLIError(
346
364
  `Unknown eval action: ${sub}`,
@@ -368,6 +386,16 @@ Run 'selftune eval <action> --help' for action-specific options.`);
368
386
  await cliMain();
369
387
  break;
370
388
  }
389
+ case "contributions": {
390
+ const { cliMain } = await import("./contributions.js");
391
+ await cliMain();
392
+ break;
393
+ }
394
+ case "creator-contributions": {
395
+ const { cliMain } = await import("./creator-contributions.js");
396
+ await cliMain();
397
+ break;
398
+ }
371
399
  case "watch": {
372
400
  const { cliMain } = await import("./monitoring/watch.js");
373
401
  await cliMain();
@@ -527,11 +555,14 @@ Run 'selftune cron <subcommand> --help' for subcommand-specific options.`);
527
555
  throw new CLIError(`Invalid arguments: ${message}`, "INVALID_FLAG", "selftune export --help");
528
556
  }
529
557
  if (values.help) {
530
- console.log(`selftune export — Export SQLite data to JSONL files
558
+ console.log(`selftune export — Export SQLite data to JSONL snapshots
531
559
 
532
560
  Usage:
533
561
  selftune export [tables...] [options]
534
562
 
563
+ Use this for portability, debugging, contribute flows, or explicit recovery
564
+ snapshots. Normal runtime reads and writes stay in SQLite.
565
+
535
566
  Tables (default: all):
536
567
  telemetry Session telemetry records
537
568
  skills Skill usage records
@@ -570,6 +601,11 @@ Options:
570
601
  cliMain();
571
602
  break;
572
603
  }
604
+ case "recover": {
605
+ const { cliMain } = await import("./recover.js");
606
+ cliMain();
607
+ break;
608
+ }
573
609
  case "orchestrate": {
574
610
  const { cliMain } = await import("./orchestrate.js");
575
611
  await cliMain();
@@ -52,9 +52,9 @@ import type {
52
52
  import { handleCLIError } from "../utils/cli-error.js";
53
53
  import { loadMarker, saveMarker } from "../utils/jsonl.js";
54
54
  import { extractActionableQueryText } from "../utils/query-filter.js";
55
+ import { getInternalPromptTargetSkill, isWrappedNonUserPart } from "../utils/skill-detection.js";
55
56
  import {
56
57
  classifySkillPath,
57
- containsWholeSkillMention,
58
58
  extractExplicitSkillMentions,
59
59
  extractSkillNamesFromInstructions,
60
60
  extractSkillNamesFromPathReferences,
@@ -228,6 +228,15 @@ export function parseRolloutFile(path: string, skillNames: Set<string>): ParsedR
228
228
  let observedCwd: string | undefined;
229
229
  const sessionSkillNames = new Set(skillNames);
230
230
  let hasActionablePrompt = false;
231
+ const markSkillTriggered = (skillName: string, evidence: "explicit" | "inferred"): void => {
232
+ if (!skillsTriggered.includes(skillName)) {
233
+ skillsTriggered.push(skillName);
234
+ }
235
+ const existingEvidence = skillEvidence.get(skillName);
236
+ if (existingEvidence !== "explicit") {
237
+ skillEvidence.set(skillName, evidence);
238
+ }
239
+ };
231
240
  const rememberSessionSkillNames = (text: unknown): void => {
232
241
  if (typeof text !== "string" || !text) return;
233
242
  for (const skillName of extractSkillNamesFromInstructions(text, sessionSkillNames)) {
@@ -240,33 +249,23 @@ export function parseRolloutFile(path: string, skillNames: Set<string>): ParsedR
240
249
  sessionSkillNames.add(skillName);
241
250
  }
242
251
  };
243
- const detectTriggeredSkills = (text: unknown): void => {
244
- if (typeof text !== "string" || !text) return;
245
- for (const skillName of sessionSkillNames) {
246
- if (containsWholeSkillMention(text, skillName) && !skillsTriggered.includes(skillName)) {
247
- skillsTriggered.push(skillName);
248
- }
249
- if (containsWholeSkillMention(text, skillName) && !skillEvidence.has(skillName)) {
250
- skillEvidence.set(skillName, "inferred");
251
- }
252
- }
253
- };
254
252
  const detectExplicitPromptSkillMentions = (text: unknown): void => {
255
253
  if (typeof text !== "string" || !text) return;
256
- for (const skillName of extractExplicitSkillMentions(text, sessionSkillNames)) {
257
- if (!skillsTriggered.includes(skillName)) {
258
- skillsTriggered.push(skillName);
259
- }
260
- skillEvidence.set(skillName, "explicit");
254
+ if (isWrappedNonUserPart(text)) return;
255
+ const actionableText = extractActionableQueryText(text) ?? text;
256
+ const internalTargetSkill = getInternalPromptTargetSkill(actionableText, sessionSkillNames);
257
+ if (internalTargetSkill) {
258
+ markSkillTriggered(internalTargetSkill, "explicit");
259
+ return;
260
+ }
261
+ for (const skillName of extractExplicitSkillMentions(actionableText, sessionSkillNames)) {
262
+ markSkillTriggered(skillName, "explicit");
261
263
  }
262
264
  };
263
265
  const detectExplicitSkillReads = (text: unknown): void => {
264
266
  if (typeof text !== "string" || !text) return;
265
267
  for (const skillName of extractSkillNamesFromPathReferences(text, sessionSkillNames)) {
266
- if (!skillsTriggered.includes(skillName)) {
267
- skillsTriggered.push(skillName);
268
- }
269
- skillEvidence.set(skillName, "explicit");
268
+ markSkillTriggered(skillName, "explicit");
270
269
  }
271
270
  };
272
271
  const rememberPromptCandidate = (value: unknown): void => {
@@ -352,27 +351,26 @@ export function parseRolloutFile(path: string, skillNames: Set<string>): ParsedR
352
351
  if (itemType === "function_call") {
353
352
  const fnName = (payload.name as string) ?? "function_call";
354
353
  toolCalls[fnName] = (toolCalls[fnName] ?? 0) + 1;
355
- // Check for skill mentions in function arguments
354
+ // Only path-based skill references count as triggers here.
356
355
  detectExplicitSkillReads(payload.arguments);
357
- detectTriggeredSkills(payload.arguments);
358
356
  } else if (itemType === "agent_reasoning") {
359
357
  toolCalls.reasoning = (toolCalls.reasoning ?? 0) + 1;
360
- detectTriggeredSkills(payload.text);
361
358
  } else if (itemType === "message") {
362
- const content = Array.isArray(payload.content)
359
+ const parts = Array.isArray(payload.content)
363
360
  ? payload.content
364
361
  .map((part) =>
365
362
  typeof part === "object" && part
366
363
  ? (((part as Record<string, unknown>).text as string | undefined) ?? "")
367
364
  : "",
368
365
  )
369
- .join("\n")
370
- : "";
366
+ .filter(Boolean)
367
+ : [];
368
+ const content = parts.join("\n");
371
369
  rememberSessionSkillNames(content);
372
- if ((payload.role as string) === "assistant") {
373
- detectTriggeredSkills(content);
374
- } else if ((payload.role as string) === "user") {
375
- detectExplicitPromptSkillMentions(content);
370
+ if ((payload.role as string) === "user") {
371
+ for (const part of parts) {
372
+ detectExplicitPromptSkillMentions(part);
373
+ }
376
374
  }
377
375
  }
378
376
  } else if (etype === "turn.started") {
@@ -410,10 +408,8 @@ export function parseRolloutFile(path: string, skillNames: Set<string>): ParsedR
410
408
  }
411
409
 
412
410
  // Detect skill names in text content on completed events
413
- const textContent = ((item.text as string) ?? "") + ((item.command as string) ?? "");
414
- detectExplicitSkillReads(textContent);
415
- if (etype === "item.completed") {
416
- detectTriggeredSkills(textContent);
411
+ if (itemType === "command_execution") {
412
+ detectExplicitSkillReads(item.command);
417
413
  }
418
414
  } else if (etype === "error") {
419
415
  errors += 1;