selftune 0.1.4 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/.claude/agents/diagnosis-analyst.md +156 -0
  2. package/.claude/agents/evolution-reviewer.md +180 -0
  3. package/.claude/agents/integration-guide.md +212 -0
  4. package/.claude/agents/pattern-analyst.md +160 -0
  5. package/CHANGELOG.md +46 -1
  6. package/README.md +105 -257
  7. package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
  8. package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
  9. package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
  10. package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
  11. package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
  12. package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
  13. package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
  14. package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
  15. package/apps/local-dashboard/dist/favicon.png +0 -0
  16. package/apps/local-dashboard/dist/index.html +17 -0
  17. package/apps/local-dashboard/dist/logo.png +0 -0
  18. package/apps/local-dashboard/dist/logo.svg +9 -0
  19. package/assets/BeforeAfter.gif +0 -0
  20. package/assets/FeedbackLoop.gif +0 -0
  21. package/assets/logo.svg +9 -0
  22. package/assets/skill-health-badge.svg +20 -0
  23. package/cli/selftune/activation-rules.ts +171 -0
  24. package/cli/selftune/badge/badge-data.ts +108 -0
  25. package/cli/selftune/badge/badge-svg.ts +212 -0
  26. package/cli/selftune/badge/badge.ts +99 -0
  27. package/cli/selftune/canonical-export.ts +183 -0
  28. package/cli/selftune/constants.ts +103 -1
  29. package/cli/selftune/contribute/bundle.ts +314 -0
  30. package/cli/selftune/contribute/contribute.ts +214 -0
  31. package/cli/selftune/contribute/sanitize.ts +162 -0
  32. package/cli/selftune/cron/setup.ts +266 -0
  33. package/cli/selftune/dashboard-contract.ts +202 -0
  34. package/cli/selftune/dashboard-server.ts +1049 -0
  35. package/cli/selftune/dashboard.ts +43 -156
  36. package/cli/selftune/eval/baseline.ts +248 -0
  37. package/cli/selftune/eval/composability-v2.ts +273 -0
  38. package/cli/selftune/eval/composability.ts +117 -0
  39. package/cli/selftune/eval/generate-unit-tests.ts +143 -0
  40. package/cli/selftune/eval/hooks-to-evals.ts +101 -16
  41. package/cli/selftune/eval/import-skillsbench.ts +221 -0
  42. package/cli/selftune/eval/synthetic-evals.ts +172 -0
  43. package/cli/selftune/eval/unit-test-cli.ts +152 -0
  44. package/cli/selftune/eval/unit-test.ts +196 -0
  45. package/cli/selftune/evolution/deploy-proposal.ts +142 -1
  46. package/cli/selftune/evolution/evidence.ts +26 -0
  47. package/cli/selftune/evolution/evolve-body.ts +586 -0
  48. package/cli/selftune/evolution/evolve.ts +825 -116
  49. package/cli/selftune/evolution/extract-patterns.ts +105 -16
  50. package/cli/selftune/evolution/pareto.ts +314 -0
  51. package/cli/selftune/evolution/propose-body.ts +171 -0
  52. package/cli/selftune/evolution/propose-description.ts +100 -2
  53. package/cli/selftune/evolution/propose-routing.ts +166 -0
  54. package/cli/selftune/evolution/refine-body.ts +141 -0
  55. package/cli/selftune/evolution/rollback.ts +21 -4
  56. package/cli/selftune/evolution/validate-body.ts +254 -0
  57. package/cli/selftune/evolution/validate-proposal.ts +257 -35
  58. package/cli/selftune/evolution/validate-routing.ts +177 -0
  59. package/cli/selftune/grading/auto-grade.ts +200 -0
  60. package/cli/selftune/grading/grade-session.ts +513 -42
  61. package/cli/selftune/grading/pre-gates.ts +104 -0
  62. package/cli/selftune/grading/results.ts +42 -0
  63. package/cli/selftune/hooks/auto-activate.ts +185 -0
  64. package/cli/selftune/hooks/evolution-guard.ts +165 -0
  65. package/cli/selftune/hooks/prompt-log.ts +172 -2
  66. package/cli/selftune/hooks/session-stop.ts +123 -3
  67. package/cli/selftune/hooks/skill-change-guard.ts +112 -0
  68. package/cli/selftune/hooks/skill-eval.ts +119 -3
  69. package/cli/selftune/index.ts +415 -48
  70. package/cli/selftune/ingestors/claude-replay.ts +377 -0
  71. package/cli/selftune/ingestors/codex-rollout.ts +345 -46
  72. package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
  73. package/cli/selftune/ingestors/openclaw-ingest.ts +573 -0
  74. package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
  75. package/cli/selftune/init.ts +376 -16
  76. package/cli/selftune/last.ts +14 -5
  77. package/cli/selftune/localdb/db.ts +63 -0
  78. package/cli/selftune/localdb/materialize.ts +428 -0
  79. package/cli/selftune/localdb/queries.ts +376 -0
  80. package/cli/selftune/localdb/schema.ts +204 -0
  81. package/cli/selftune/memory/writer.ts +447 -0
  82. package/cli/selftune/monitoring/watch.ts +90 -16
  83. package/cli/selftune/normalization.ts +682 -0
  84. package/cli/selftune/observability.ts +19 -44
  85. package/cli/selftune/orchestrate.ts +1073 -0
  86. package/cli/selftune/quickstart.ts +203 -0
  87. package/cli/selftune/repair/skill-usage.ts +576 -0
  88. package/cli/selftune/schedule.ts +561 -0
  89. package/cli/selftune/status.ts +59 -33
  90. package/cli/selftune/sync.ts +627 -0
  91. package/cli/selftune/types.ts +525 -5
  92. package/cli/selftune/utils/canonical-log.ts +45 -0
  93. package/cli/selftune/utils/frontmatter.ts +217 -0
  94. package/cli/selftune/utils/hooks.ts +41 -0
  95. package/cli/selftune/utils/html.ts +27 -0
  96. package/cli/selftune/utils/llm-call.ts +103 -19
  97. package/cli/selftune/utils/math.ts +10 -0
  98. package/cli/selftune/utils/query-filter.ts +139 -0
  99. package/cli/selftune/utils/skill-discovery.ts +340 -0
  100. package/cli/selftune/utils/skill-log.ts +68 -0
  101. package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
  102. package/cli/selftune/utils/transcript.ts +307 -26
  103. package/cli/selftune/utils/trigger-check.ts +89 -0
  104. package/cli/selftune/utils/tui.ts +156 -0
  105. package/cli/selftune/workflows/discover.ts +254 -0
  106. package/cli/selftune/workflows/skill-md-writer.ts +288 -0
  107. package/cli/selftune/workflows/workflows.ts +188 -0
  108. package/package.json +28 -11
  109. package/packages/telemetry-contract/README.md +11 -0
  110. package/packages/telemetry-contract/fixtures/golden.json +87 -0
  111. package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
  112. package/packages/telemetry-contract/index.ts +1 -0
  113. package/packages/telemetry-contract/package.json +19 -0
  114. package/packages/telemetry-contract/src/index.ts +2 -0
  115. package/packages/telemetry-contract/src/types.ts +163 -0
  116. package/packages/telemetry-contract/src/validators.ts +109 -0
  117. package/skill/SKILL.md +180 -33
  118. package/skill/Workflows/AutoActivation.md +145 -0
  119. package/skill/Workflows/Badge.md +124 -0
  120. package/skill/Workflows/Baseline.md +144 -0
  121. package/skill/Workflows/Composability.md +107 -0
  122. package/skill/Workflows/Contribute.md +94 -0
  123. package/skill/Workflows/Cron.md +132 -0
  124. package/skill/Workflows/Dashboard.md +214 -0
  125. package/skill/Workflows/Doctor.md +63 -14
  126. package/skill/Workflows/Evals.md +110 -18
  127. package/skill/Workflows/EvolutionMemory.md +154 -0
  128. package/skill/Workflows/Evolve.md +181 -21
  129. package/skill/Workflows/EvolveBody.md +159 -0
  130. package/skill/Workflows/Grade.md +36 -31
  131. package/skill/Workflows/ImportSkillsBench.md +117 -0
  132. package/skill/Workflows/Ingest.md +142 -21
  133. package/skill/Workflows/Initialize.md +91 -23
  134. package/skill/Workflows/Orchestrate.md +139 -0
  135. package/skill/Workflows/Replay.md +91 -0
  136. package/skill/Workflows/Rollback.md +23 -4
  137. package/skill/Workflows/Schedule.md +61 -0
  138. package/skill/Workflows/Sync.md +88 -0
  139. package/skill/Workflows/UnitTest.md +150 -0
  140. package/skill/Workflows/Watch.md +33 -1
  141. package/skill/Workflows/Workflows.md +129 -0
  142. package/skill/assets/activation-rules-default.json +26 -0
  143. package/skill/assets/multi-skill-settings.json +63 -0
  144. package/skill/assets/single-skill-settings.json +57 -0
  145. package/skill/references/invocation-taxonomy.md +2 -2
  146. package/skill/references/logs.md +164 -2
  147. package/skill/references/setup-patterns.md +65 -0
  148. package/skill/references/version-history.md +40 -0
  149. package/skill/settings_snippet.json +23 -0
  150. package/templates/activation-rules-default.json +27 -0
  151. package/templates/multi-skill-settings.json +64 -0
  152. package/templates/single-skill-settings.json +58 -0
  153. package/dashboard/index.html +0 -1119
@@ -7,8 +7,8 @@
7
7
  * - cliMain() (reads logs, runs doctor, prints output)
8
8
  */
9
9
 
10
- import { EVOLUTION_AUDIT_LOG, QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "./constants.js";
11
- import { computeMonitoringSnapshot } from "./monitoring/watch.js";
10
+ import { EVOLUTION_AUDIT_LOG, QUERY_LOG, TELEMETRY_LOG } from "./constants.js";
11
+ import { computeMonitoringSnapshot, MIN_MONITORING_SKILL_CHECKS } from "./monitoring/watch.js";
12
12
  import { doctor } from "./observability.js";
13
13
  import type {
14
14
  DoctorResult,
@@ -19,6 +19,11 @@ import type {
19
19
  SkillUsageRecord,
20
20
  } from "./types.js";
21
21
  import { readJsonl } from "./utils/jsonl.js";
22
+ import {
23
+ filterActionableQueryRecords,
24
+ filterActionableSkillUsageRecords,
25
+ } from "./utils/query-filter.js";
26
+ import { readEffectiveSkillUsageRecords } from "./utils/skill-log.js";
22
27
 
23
28
  // ---------------------------------------------------------------------------
24
29
  // Result types
@@ -29,7 +34,7 @@ export interface SkillStatus {
29
34
  passRate: number | null;
30
35
  trend: "up" | "down" | "stable" | "unknown";
31
36
  missedQueries: number;
32
- status: "HEALTHY" | "REGRESSED" | "NO DATA";
37
+ status: "HEALTHY" | "WARNING" | "CRITICAL" | "UNGRADED" | "UNKNOWN";
33
38
  snapshot: MonitoringSnapshot | null;
34
39
  }
35
40
 
@@ -50,7 +55,7 @@ export interface StatusResult {
50
55
  // Constants
51
56
  // ---------------------------------------------------------------------------
52
57
 
53
- const DEFAULT_WINDOW_SESSIONS = 20;
58
+ export const DEFAULT_WINDOW_SESSIONS = 20;
54
59
  const DEFAULT_BASELINE_PASS_RATE = 0.5;
55
60
 
56
61
  // ---------------------------------------------------------------------------
@@ -64,13 +69,14 @@ export function computeStatus(
64
69
  auditEntries: EvolutionAuditEntry[],
65
70
  doctorResult: DoctorResult,
66
71
  ): StatusResult {
72
+ const actionableSkillRecords = filterActionableSkillUsageRecords(skillRecords);
73
+ const actionableQueryRecords = filterActionableQueryRecords(queryRecords);
67
74
  // Derive unique skill names from skill records
68
- const skillNames = [...new Set(skillRecords.map((r) => r.skill_name))];
75
+ const skillNames = [...new Set(actionableSkillRecords.map((r) => r.skill_name))];
69
76
 
70
77
  // Build per-skill status
71
78
  const skills: SkillStatus[] = skillNames.map((skillName) => {
72
- const skillSpecificRecords = skillRecords.filter((r) => r.skill_name === skillName);
73
- const triggeredRecords = skillSpecificRecords.filter((r) => r.triggered);
79
+ const skillSpecificRecords = actionableSkillRecords.filter((r) => r.skill_name === skillName);
74
80
 
75
81
  // Get baseline from last deployed proposal
76
82
  const lastDeployed = getLastDeployedProposalFromEntries(auditEntries, skillName);
@@ -80,21 +86,19 @@ export function computeStatus(
80
86
  const snapshot = computeMonitoringSnapshot(
81
87
  skillName,
82
88
  telemetry,
83
- skillRecords,
84
- queryRecords,
89
+ actionableSkillRecords,
90
+ actionableQueryRecords,
85
91
  DEFAULT_WINDOW_SESSIONS,
86
92
  baselinePassRate,
87
93
  );
88
94
 
89
- // Determine if there's any meaningful data
90
- const totalQueries = queryRecords.length;
91
- const hasData = triggeredRecords.length > 0 || totalQueries > 0;
95
+ // A skill has data when it has explicit check records, regardless of whether any passed.
96
+ // Using triggered-only rows would incorrectly hide meaningful all-false samples.
97
+ const hasData = skillSpecificRecords.length > 0;
98
+ const hasEnoughSamples = snapshot.skill_checks >= MIN_MONITORING_SKILL_CHECKS;
92
99
 
93
- // Compute pass rate (null if no data)
94
- let passRate: number | null = null;
95
- if (hasData && totalQueries > 0) {
96
- passRate = snapshot.pass_rate;
97
- }
100
+ // Compute pass rate (null only if this skill has no graded checks at all)
101
+ const passRate = hasData ? snapshot.pass_rate : null;
98
102
 
99
103
  // Determine trend: compare first-half vs second-half pass rates
100
104
  const trend = computeTrend(skillSpecificRecords);
@@ -102,12 +106,15 @@ export function computeStatus(
102
106
  // Count missed queries for this skill (queries where skill was checked but not triggered)
103
107
  const missedQueries = skillSpecificRecords.filter((r) => !r.triggered).length;
104
108
 
105
- // Determine status
106
- let status: "HEALTHY" | "REGRESSED" | "NO DATA";
107
- if (!hasData || passRate === null) {
108
- status = "NO DATA";
109
- } else if (snapshot.regression_detected) {
110
- status = "REGRESSED";
109
+ // Determine status (5-state)
110
+ let status: "HEALTHY" | "WARNING" | "CRITICAL" | "UNGRADED" | "UNKNOWN";
111
+ if (!hasData || passRate === null || !hasEnoughSamples) {
112
+ // Skill exists in logs but has too little data for a meaningful health label
113
+ status = skillSpecificRecords.length > 0 ? "UNGRADED" : "UNKNOWN";
114
+ } else if (snapshot.regression_detected || passRate < 0.4) {
115
+ status = "CRITICAL";
116
+ } else if (passRate < 0.7) {
117
+ status = "WARNING";
111
118
  } else {
112
119
  status = "HEALTHY";
113
120
  }
@@ -115,15 +122,23 @@ export function computeStatus(
115
122
  return { name: skillName, passRate, trend, missedQueries, status, snapshot };
116
123
  });
117
124
 
118
- // Sort: REGRESSED first, then HEALTHY, then NO DATA
119
- const statusOrder = { REGRESSED: 0, HEALTHY: 1, "NO DATA": 2 };
125
+ // Sort: CRITICAL first, then WARNING, then HEALTHY, then UNKNOWN
126
+ const statusOrder: Record<string, number> = {
127
+ CRITICAL: 0,
128
+ WARNING: 1,
129
+ HEALTHY: 2,
130
+ UNGRADED: 3,
131
+ UNKNOWN: 4,
132
+ };
120
133
  skills.sort((a, b) => statusOrder[a.status] - statusOrder[b.status]);
121
134
 
122
135
  // Unmatched queries: queries whose text appears in zero triggered skill_usage_log entries
123
136
  const triggeredQueryTexts = new Set(
124
- skillRecords.filter((r) => r.triggered).map((r) => r.query.toLowerCase().trim()),
137
+ actionableSkillRecords
138
+ .filter((r) => r.triggered && typeof r.query === "string")
139
+ .map((r) => r.query.toLowerCase().trim()),
125
140
  );
126
- const unmatchedQueries = queryRecords.filter(
141
+ const unmatchedQueries = actionableQueryRecords.filter(
127
142
  (q) => !triggeredQueryTexts.has(q.query.toLowerCase().trim()),
128
143
  ).length;
129
144
 
@@ -231,7 +246,7 @@ export function formatStatus(result: StatusResult): string {
231
246
  lines.push(" Name Pass Rate Trend Missed Status");
232
247
 
233
248
  for (const skill of result.skills) {
234
- const name = skill.name.padEnd(16);
249
+ const name = skill.name.slice(0, 16).padEnd(16);
235
250
  const passRate =
236
251
  skill.passRate !== null
237
252
  ? `${Math.round(skill.passRate * 100)}%`.padEnd(11)
@@ -239,14 +254,25 @@ export function formatStatus(result: StatusResult): string {
239
254
  const trend = TREND_SYMBOLS[skill.trend].padEnd(7);
240
255
  const missed = String(skill.missedQueries).padEnd(8);
241
256
  const statusText =
242
- skill.status === "REGRESSED"
257
+ skill.status === "CRITICAL"
243
258
  ? red(skill.status)
244
- : skill.status === "HEALTHY"
245
- ? green(skill.status)
246
- : amber(skill.status);
259
+ : skill.status === "WARNING"
260
+ ? amber(skill.status)
261
+ : skill.status === "HEALTHY"
262
+ ? green(skill.status)
263
+ : skill.status === "UNGRADED"
264
+ ? amber(skill.status)
265
+ : amber(skill.status);
247
266
  lines.push(` ${name}${passRate}${trend}${missed}${statusText}`);
248
267
  }
249
268
 
269
+ // Onboarding hint for ungraded skills
270
+ const ungradedSkills = result.skills.filter((s) => s.status === "UNGRADED");
271
+ if (ungradedSkills.length > 0) {
272
+ lines.push("");
273
+ lines.push(` Hint: Run \`selftune grade --skill <name>\` to establish baselines`);
274
+ }
275
+
250
276
  lines.push("");
251
277
 
252
278
  // Summary stats
@@ -301,7 +327,7 @@ function colorize(text: string, hex: string): string {
301
327
  export function cliMain(): void {
302
328
  try {
303
329
  const telemetry = readJsonl<SessionTelemetryRecord>(TELEMETRY_LOG);
304
- const skillRecords = readJsonl<SkillUsageRecord>(SKILL_LOG);
330
+ const skillRecords = readEffectiveSkillUsageRecords();
305
331
  const queryRecords = readJsonl<QueryLogRecord>(QUERY_LOG);
306
332
  const auditEntries = readJsonl<EvolutionAuditEntry>(EVOLUTION_AUDIT_LOG);
307
333
  const doctorResult = doctor();