selftune 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/.claude/agents/diagnosis-analyst.md +20 -10
  2. package/.claude/agents/evolution-reviewer.md +14 -1
  3. package/.claude/agents/integration-guide.md +18 -6
  4. package/.claude/agents/pattern-analyst.md +18 -5
  5. package/CHANGELOG.md +12 -4
  6. package/README.md +43 -35
  7. package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
  8. package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
  9. package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
  10. package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
  11. package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
  12. package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
  13. package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
  14. package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
  15. package/apps/local-dashboard/dist/favicon.png +0 -0
  16. package/apps/local-dashboard/dist/index.html +17 -0
  17. package/apps/local-dashboard/dist/logo.png +0 -0
  18. package/apps/local-dashboard/dist/logo.svg +9 -0
  19. package/cli/selftune/badge/badge-data.ts +1 -1
  20. package/cli/selftune/badge/badge.ts +4 -8
  21. package/cli/selftune/canonical-export.ts +183 -0
  22. package/cli/selftune/constants.ts +28 -0
  23. package/cli/selftune/contribute/contribute.ts +1 -1
  24. package/cli/selftune/cron/setup.ts +17 -17
  25. package/cli/selftune/dashboard-contract.ts +202 -0
  26. package/cli/selftune/dashboard-server.ts +653 -186
  27. package/cli/selftune/dashboard.ts +41 -176
  28. package/cli/selftune/eval/baseline.ts +5 -4
  29. package/cli/selftune/eval/composability-v2.ts +273 -0
  30. package/cli/selftune/eval/hooks-to-evals.ts +34 -15
  31. package/cli/selftune/eval/unit-test-cli.ts +1 -1
  32. package/cli/selftune/evolution/evidence.ts +26 -0
  33. package/cli/selftune/evolution/evolve-body.ts +105 -11
  34. package/cli/selftune/evolution/evolve.ts +371 -25
  35. package/cli/selftune/evolution/extract-patterns.ts +87 -29
  36. package/cli/selftune/evolution/rollback.ts +2 -2
  37. package/cli/selftune/grading/auto-grade.ts +200 -0
  38. package/cli/selftune/grading/grade-session.ts +448 -97
  39. package/cli/selftune/grading/results.ts +42 -0
  40. package/cli/selftune/hooks/prompt-log.ts +172 -2
  41. package/cli/selftune/hooks/session-stop.ts +123 -3
  42. package/cli/selftune/hooks/skill-eval.ts +119 -3
  43. package/cli/selftune/index.ts +395 -116
  44. package/cli/selftune/ingestors/claude-replay.ts +140 -114
  45. package/cli/selftune/ingestors/codex-rollout.ts +345 -46
  46. package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
  47. package/cli/selftune/ingestors/openclaw-ingest.ts +141 -8
  48. package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
  49. package/cli/selftune/init.ts +227 -14
  50. package/cli/selftune/last.ts +14 -5
  51. package/cli/selftune/localdb/db.ts +63 -0
  52. package/cli/selftune/localdb/materialize.ts +428 -0
  53. package/cli/selftune/localdb/queries.ts +376 -0
  54. package/cli/selftune/localdb/schema.ts +204 -0
  55. package/cli/selftune/monitoring/watch.ts +66 -15
  56. package/cli/selftune/normalization.ts +682 -0
  57. package/cli/selftune/observability.ts +19 -44
  58. package/cli/selftune/orchestrate.ts +1073 -0
  59. package/cli/selftune/quickstart.ts +203 -0
  60. package/cli/selftune/repair/skill-usage.ts +576 -0
  61. package/cli/selftune/schedule.ts +561 -0
  62. package/cli/selftune/status.ts +48 -26
  63. package/cli/selftune/sync.ts +627 -0
  64. package/cli/selftune/types.ts +148 -0
  65. package/cli/selftune/utils/canonical-log.ts +45 -0
  66. package/cli/selftune/utils/hooks.ts +41 -0
  67. package/cli/selftune/utils/html.ts +27 -0
  68. package/cli/selftune/utils/llm-call.ts +78 -20
  69. package/cli/selftune/utils/math.ts +10 -0
  70. package/cli/selftune/utils/query-filter.ts +139 -0
  71. package/cli/selftune/utils/skill-discovery.ts +340 -0
  72. package/cli/selftune/utils/skill-log.ts +68 -0
  73. package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
  74. package/cli/selftune/utils/transcript.ts +272 -26
  75. package/cli/selftune/workflows/discover.ts +254 -0
  76. package/cli/selftune/workflows/skill-md-writer.ts +288 -0
  77. package/cli/selftune/workflows/workflows.ts +188 -0
  78. package/package.json +21 -8
  79. package/packages/telemetry-contract/README.md +11 -0
  80. package/packages/telemetry-contract/fixtures/golden.json +87 -0
  81. package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
  82. package/packages/telemetry-contract/index.ts +1 -0
  83. package/packages/telemetry-contract/package.json +19 -0
  84. package/packages/telemetry-contract/src/index.ts +2 -0
  85. package/packages/telemetry-contract/src/types.ts +163 -0
  86. package/packages/telemetry-contract/src/validators.ts +109 -0
  87. package/skill/SKILL.md +84 -53
  88. package/skill/Workflows/AutoActivation.md +17 -16
  89. package/skill/Workflows/Badge.md +6 -0
  90. package/skill/Workflows/Baseline.md +46 -23
  91. package/skill/Workflows/Composability.md +12 -5
  92. package/skill/Workflows/Contribute.md +17 -14
  93. package/skill/Workflows/Cron.md +56 -79
  94. package/skill/Workflows/Dashboard.md +45 -34
  95. package/skill/Workflows/Doctor.md +30 -17
  96. package/skill/Workflows/Evals.md +64 -40
  97. package/skill/Workflows/EvolutionMemory.md +2 -0
  98. package/skill/Workflows/Evolve.md +102 -47
  99. package/skill/Workflows/EvolveBody.md +6 -6
  100. package/skill/Workflows/Grade.md +36 -31
  101. package/skill/Workflows/ImportSkillsBench.md +11 -5
  102. package/skill/Workflows/Ingest.md +43 -36
  103. package/skill/Workflows/Initialize.md +44 -30
  104. package/skill/Workflows/Orchestrate.md +139 -0
  105. package/skill/Workflows/Replay.md +39 -18
  106. package/skill/Workflows/Rollback.md +3 -3
  107. package/skill/Workflows/Schedule.md +61 -0
  108. package/skill/Workflows/Sync.md +88 -0
  109. package/skill/Workflows/UnitTest.md +34 -22
  110. package/skill/Workflows/Watch.md +14 -4
  111. package/skill/Workflows/Workflows.md +129 -0
  112. package/skill/assets/activation-rules-default.json +26 -0
  113. package/skill/assets/multi-skill-settings.json +63 -0
  114. package/skill/assets/single-skill-settings.json +57 -0
  115. package/skill/references/invocation-taxonomy.md +2 -2
  116. package/skill/references/logs.md +164 -2
  117. package/skill/references/setup-patterns.md +65 -0
  118. package/skill/references/version-history.md +40 -0
  119. package/skill/settings_snippet.json +1 -1
  120. package/templates/multi-skill-settings.json +7 -7
  121. package/templates/single-skill-settings.json +6 -6
  122. package/dashboard/index.html +0 -1680
@@ -7,8 +7,8 @@
7
7
  * - cliMain() (reads logs, runs doctor, prints output)
8
8
  */
9
9
 
10
- import { EVOLUTION_AUDIT_LOG, QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "./constants.js";
11
- import { computeMonitoringSnapshot } from "./monitoring/watch.js";
10
+ import { EVOLUTION_AUDIT_LOG, QUERY_LOG, TELEMETRY_LOG } from "./constants.js";
11
+ import { computeMonitoringSnapshot, MIN_MONITORING_SKILL_CHECKS } from "./monitoring/watch.js";
12
12
  import { doctor } from "./observability.js";
13
13
  import type {
14
14
  DoctorResult,
@@ -19,6 +19,11 @@ import type {
19
19
  SkillUsageRecord,
20
20
  } from "./types.js";
21
21
  import { readJsonl } from "./utils/jsonl.js";
22
+ import {
23
+ filterActionableQueryRecords,
24
+ filterActionableSkillUsageRecords,
25
+ } from "./utils/query-filter.js";
26
+ import { readEffectiveSkillUsageRecords } from "./utils/skill-log.js";
22
27
 
23
28
  // ---------------------------------------------------------------------------
24
29
  // Result types
@@ -29,7 +34,7 @@ export interface SkillStatus {
29
34
  passRate: number | null;
30
35
  trend: "up" | "down" | "stable" | "unknown";
31
36
  missedQueries: number;
32
- status: "HEALTHY" | "WARNING" | "CRITICAL" | "UNKNOWN";
37
+ status: "HEALTHY" | "WARNING" | "CRITICAL" | "UNGRADED" | "UNKNOWN";
33
38
  snapshot: MonitoringSnapshot | null;
34
39
  }
35
40
 
@@ -50,7 +55,7 @@ export interface StatusResult {
50
55
  // Constants
51
56
  // ---------------------------------------------------------------------------
52
57
 
53
- const DEFAULT_WINDOW_SESSIONS = 20;
58
+ export const DEFAULT_WINDOW_SESSIONS = 20;
54
59
  const DEFAULT_BASELINE_PASS_RATE = 0.5;
55
60
 
56
61
  // ---------------------------------------------------------------------------
@@ -64,13 +69,14 @@ export function computeStatus(
64
69
  auditEntries: EvolutionAuditEntry[],
65
70
  doctorResult: DoctorResult,
66
71
  ): StatusResult {
72
+ const actionableSkillRecords = filterActionableSkillUsageRecords(skillRecords);
73
+ const actionableQueryRecords = filterActionableQueryRecords(queryRecords);
67
74
  // Derive unique skill names from skill records
68
- const skillNames = [...new Set(skillRecords.map((r) => r.skill_name))];
75
+ const skillNames = [...new Set(actionableSkillRecords.map((r) => r.skill_name))];
69
76
 
70
77
  // Build per-skill status
71
78
  const skills: SkillStatus[] = skillNames.map((skillName) => {
72
- const skillSpecificRecords = skillRecords.filter((r) => r.skill_name === skillName);
73
- const triggeredRecords = skillSpecificRecords.filter((r) => r.triggered);
79
+ const skillSpecificRecords = actionableSkillRecords.filter((r) => r.skill_name === skillName);
74
80
 
75
81
  // Get baseline from last deployed proposal
76
82
  const lastDeployed = getLastDeployedProposalFromEntries(auditEntries, skillName);
@@ -80,21 +86,19 @@ export function computeStatus(
80
86
  const snapshot = computeMonitoringSnapshot(
81
87
  skillName,
82
88
  telemetry,
83
- skillRecords,
84
- queryRecords,
89
+ actionableSkillRecords,
90
+ actionableQueryRecords,
85
91
  DEFAULT_WINDOW_SESSIONS,
86
92
  baselinePassRate,
87
93
  );
88
94
 
89
- // Determine if there's any meaningful data
90
- const totalQueries = queryRecords.length;
91
- const hasData = triggeredRecords.length > 0 || totalQueries > 0;
95
+ // A skill has data when it has explicit check records, regardless of whether any passed.
96
+ // Using triggered-only rows would incorrectly hide meaningful all-false samples.
97
+ const hasData = skillSpecificRecords.length > 0;
98
+ const hasEnoughSamples = snapshot.skill_checks >= MIN_MONITORING_SKILL_CHECKS;
92
99
 
93
- // Compute pass rate (null if no data)
94
- let passRate: number | null = null;
95
- if (hasData && totalQueries > 0) {
96
- passRate = snapshot.pass_rate;
97
- }
100
+ // Compute pass rate (null only if this skill has no graded checks at all)
101
+ const passRate = hasData ? snapshot.pass_rate : null;
98
102
 
99
103
  // Determine trend: compare first-half vs second-half pass rates
100
104
  const trend = computeTrend(skillSpecificRecords);
@@ -102,10 +106,11 @@ export function computeStatus(
102
106
  // Count missed queries for this skill (queries where skill was checked but not triggered)
103
107
  const missedQueries = skillSpecificRecords.filter((r) => !r.triggered).length;
104
108
 
105
- // Determine status (4-state)
106
- let status: "HEALTHY" | "WARNING" | "CRITICAL" | "UNKNOWN";
107
- if (!hasData || passRate === null) {
108
- status = "UNKNOWN";
109
+ // Determine status (5-state)
110
+ let status: "HEALTHY" | "WARNING" | "CRITICAL" | "UNGRADED" | "UNKNOWN";
111
+ if (!hasData || passRate === null || !hasEnoughSamples) {
112
+ // Skill exists in logs but has too little data for a meaningful health label
113
+ status = skillSpecificRecords.length > 0 ? "UNGRADED" : "UNKNOWN";
109
114
  } else if (snapshot.regression_detected || passRate < 0.4) {
110
115
  status = "CRITICAL";
111
116
  } else if (passRate < 0.7) {
@@ -118,14 +123,22 @@ export function computeStatus(
118
123
  });
119
124
 
120
125
  // Sort: CRITICAL first, then WARNING, then HEALTHY, then UNKNOWN
121
- const statusOrder: Record<string, number> = { CRITICAL: 0, WARNING: 1, HEALTHY: 2, UNKNOWN: 3 };
126
+ const statusOrder: Record<string, number> = {
127
+ CRITICAL: 0,
128
+ WARNING: 1,
129
+ HEALTHY: 2,
130
+ UNGRADED: 3,
131
+ UNKNOWN: 4,
132
+ };
122
133
  skills.sort((a, b) => statusOrder[a.status] - statusOrder[b.status]);
123
134
 
124
135
  // Unmatched queries: queries whose text appears in zero triggered skill_usage_log entries
125
136
  const triggeredQueryTexts = new Set(
126
- skillRecords.filter((r) => r.triggered).map((r) => r.query.toLowerCase().trim()),
137
+ actionableSkillRecords
138
+ .filter((r) => r.triggered && typeof r.query === "string")
139
+ .map((r) => r.query.toLowerCase().trim()),
127
140
  );
128
- const unmatchedQueries = queryRecords.filter(
141
+ const unmatchedQueries = actionableQueryRecords.filter(
129
142
  (q) => !triggeredQueryTexts.has(q.query.toLowerCase().trim()),
130
143
  ).length;
131
144
 
@@ -247,10 +260,19 @@ export function formatStatus(result: StatusResult): string {
247
260
  ? amber(skill.status)
248
261
  : skill.status === "HEALTHY"
249
262
  ? green(skill.status)
250
- : amber(skill.status);
263
+ : skill.status === "UNGRADED"
264
+ ? amber(skill.status)
265
+ : amber(skill.status);
251
266
  lines.push(` ${name}${passRate}${trend}${missed}${statusText}`);
252
267
  }
253
268
 
269
+ // Onboarding hint for ungraded skills
270
+ const ungradedSkills = result.skills.filter((s) => s.status === "UNGRADED");
271
+ if (ungradedSkills.length > 0) {
272
+ lines.push("");
273
+ lines.push(` Hint: Run \`selftune grade --skill <name>\` to establish baselines`);
274
+ }
275
+
254
276
  lines.push("");
255
277
 
256
278
  // Summary stats
@@ -305,7 +327,7 @@ function colorize(text: string, hex: string): string {
305
327
  export function cliMain(): void {
306
328
  try {
307
329
  const telemetry = readJsonl<SessionTelemetryRecord>(TELEMETRY_LOG);
308
- const skillRecords = readJsonl<SkillUsageRecord>(SKILL_LOG);
330
+ const skillRecords = readEffectiveSkillUsageRecords();
309
331
  const queryRecords = readJsonl<QueryLogRecord>(QUERY_LOG);
310
332
  const auditEntries = readJsonl<EvolutionAuditEntry>(EVOLUTION_AUDIT_LOG);
311
333
  const doctorResult = doctor();