selftune 0.1.4 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/.claude/agents/diagnosis-analyst.md +156 -0
  2. package/.claude/agents/evolution-reviewer.md +180 -0
  3. package/.claude/agents/integration-guide.md +212 -0
  4. package/.claude/agents/pattern-analyst.md +160 -0
  5. package/CHANGELOG.md +46 -1
  6. package/README.md +105 -257
  7. package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
  8. package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
  9. package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
  10. package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
  11. package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
  12. package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
  13. package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
  14. package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
  15. package/apps/local-dashboard/dist/favicon.png +0 -0
  16. package/apps/local-dashboard/dist/index.html +17 -0
  17. package/apps/local-dashboard/dist/logo.png +0 -0
  18. package/apps/local-dashboard/dist/logo.svg +9 -0
  19. package/assets/BeforeAfter.gif +0 -0
  20. package/assets/FeedbackLoop.gif +0 -0
  21. package/assets/logo.svg +9 -0
  22. package/assets/skill-health-badge.svg +20 -0
  23. package/cli/selftune/activation-rules.ts +171 -0
  24. package/cli/selftune/badge/badge-data.ts +108 -0
  25. package/cli/selftune/badge/badge-svg.ts +212 -0
  26. package/cli/selftune/badge/badge.ts +99 -0
  27. package/cli/selftune/canonical-export.ts +183 -0
  28. package/cli/selftune/constants.ts +103 -1
  29. package/cli/selftune/contribute/bundle.ts +314 -0
  30. package/cli/selftune/contribute/contribute.ts +214 -0
  31. package/cli/selftune/contribute/sanitize.ts +162 -0
  32. package/cli/selftune/cron/setup.ts +266 -0
  33. package/cli/selftune/dashboard-contract.ts +202 -0
  34. package/cli/selftune/dashboard-server.ts +1049 -0
  35. package/cli/selftune/dashboard.ts +43 -156
  36. package/cli/selftune/eval/baseline.ts +248 -0
  37. package/cli/selftune/eval/composability-v2.ts +273 -0
  38. package/cli/selftune/eval/composability.ts +117 -0
  39. package/cli/selftune/eval/generate-unit-tests.ts +143 -0
  40. package/cli/selftune/eval/hooks-to-evals.ts +101 -16
  41. package/cli/selftune/eval/import-skillsbench.ts +221 -0
  42. package/cli/selftune/eval/synthetic-evals.ts +172 -0
  43. package/cli/selftune/eval/unit-test-cli.ts +152 -0
  44. package/cli/selftune/eval/unit-test.ts +196 -0
  45. package/cli/selftune/evolution/deploy-proposal.ts +142 -1
  46. package/cli/selftune/evolution/evidence.ts +26 -0
  47. package/cli/selftune/evolution/evolve-body.ts +586 -0
  48. package/cli/selftune/evolution/evolve.ts +825 -116
  49. package/cli/selftune/evolution/extract-patterns.ts +105 -16
  50. package/cli/selftune/evolution/pareto.ts +314 -0
  51. package/cli/selftune/evolution/propose-body.ts +171 -0
  52. package/cli/selftune/evolution/propose-description.ts +100 -2
  53. package/cli/selftune/evolution/propose-routing.ts +166 -0
  54. package/cli/selftune/evolution/refine-body.ts +141 -0
  55. package/cli/selftune/evolution/rollback.ts +21 -4
  56. package/cli/selftune/evolution/validate-body.ts +254 -0
  57. package/cli/selftune/evolution/validate-proposal.ts +257 -35
  58. package/cli/selftune/evolution/validate-routing.ts +177 -0
  59. package/cli/selftune/grading/auto-grade.ts +200 -0
  60. package/cli/selftune/grading/grade-session.ts +513 -42
  61. package/cli/selftune/grading/pre-gates.ts +104 -0
  62. package/cli/selftune/grading/results.ts +42 -0
  63. package/cli/selftune/hooks/auto-activate.ts +185 -0
  64. package/cli/selftune/hooks/evolution-guard.ts +165 -0
  65. package/cli/selftune/hooks/prompt-log.ts +172 -2
  66. package/cli/selftune/hooks/session-stop.ts +123 -3
  67. package/cli/selftune/hooks/skill-change-guard.ts +112 -0
  68. package/cli/selftune/hooks/skill-eval.ts +119 -3
  69. package/cli/selftune/index.ts +415 -48
  70. package/cli/selftune/ingestors/claude-replay.ts +377 -0
  71. package/cli/selftune/ingestors/codex-rollout.ts +345 -46
  72. package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
  73. package/cli/selftune/ingestors/openclaw-ingest.ts +573 -0
  74. package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
  75. package/cli/selftune/init.ts +376 -16
  76. package/cli/selftune/last.ts +14 -5
  77. package/cli/selftune/localdb/db.ts +63 -0
  78. package/cli/selftune/localdb/materialize.ts +428 -0
  79. package/cli/selftune/localdb/queries.ts +376 -0
  80. package/cli/selftune/localdb/schema.ts +204 -0
  81. package/cli/selftune/memory/writer.ts +447 -0
  82. package/cli/selftune/monitoring/watch.ts +90 -16
  83. package/cli/selftune/normalization.ts +682 -0
  84. package/cli/selftune/observability.ts +19 -44
  85. package/cli/selftune/orchestrate.ts +1073 -0
  86. package/cli/selftune/quickstart.ts +203 -0
  87. package/cli/selftune/repair/skill-usage.ts +576 -0
  88. package/cli/selftune/schedule.ts +561 -0
  89. package/cli/selftune/status.ts +59 -33
  90. package/cli/selftune/sync.ts +627 -0
  91. package/cli/selftune/types.ts +525 -5
  92. package/cli/selftune/utils/canonical-log.ts +45 -0
  93. package/cli/selftune/utils/frontmatter.ts +217 -0
  94. package/cli/selftune/utils/hooks.ts +41 -0
  95. package/cli/selftune/utils/html.ts +27 -0
  96. package/cli/selftune/utils/llm-call.ts +103 -19
  97. package/cli/selftune/utils/math.ts +10 -0
  98. package/cli/selftune/utils/query-filter.ts +139 -0
  99. package/cli/selftune/utils/skill-discovery.ts +340 -0
  100. package/cli/selftune/utils/skill-log.ts +68 -0
  101. package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
  102. package/cli/selftune/utils/transcript.ts +307 -26
  103. package/cli/selftune/utils/trigger-check.ts +89 -0
  104. package/cli/selftune/utils/tui.ts +156 -0
  105. package/cli/selftune/workflows/discover.ts +254 -0
  106. package/cli/selftune/workflows/skill-md-writer.ts +288 -0
  107. package/cli/selftune/workflows/workflows.ts +188 -0
  108. package/package.json +28 -11
  109. package/packages/telemetry-contract/README.md +11 -0
  110. package/packages/telemetry-contract/fixtures/golden.json +87 -0
  111. package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
  112. package/packages/telemetry-contract/index.ts +1 -0
  113. package/packages/telemetry-contract/package.json +19 -0
  114. package/packages/telemetry-contract/src/index.ts +2 -0
  115. package/packages/telemetry-contract/src/types.ts +163 -0
  116. package/packages/telemetry-contract/src/validators.ts +109 -0
  117. package/skill/SKILL.md +180 -33
  118. package/skill/Workflows/AutoActivation.md +145 -0
  119. package/skill/Workflows/Badge.md +124 -0
  120. package/skill/Workflows/Baseline.md +144 -0
  121. package/skill/Workflows/Composability.md +107 -0
  122. package/skill/Workflows/Contribute.md +94 -0
  123. package/skill/Workflows/Cron.md +132 -0
  124. package/skill/Workflows/Dashboard.md +214 -0
  125. package/skill/Workflows/Doctor.md +63 -14
  126. package/skill/Workflows/Evals.md +110 -18
  127. package/skill/Workflows/EvolutionMemory.md +154 -0
  128. package/skill/Workflows/Evolve.md +181 -21
  129. package/skill/Workflows/EvolveBody.md +159 -0
  130. package/skill/Workflows/Grade.md +36 -31
  131. package/skill/Workflows/ImportSkillsBench.md +117 -0
  132. package/skill/Workflows/Ingest.md +142 -21
  133. package/skill/Workflows/Initialize.md +91 -23
  134. package/skill/Workflows/Orchestrate.md +139 -0
  135. package/skill/Workflows/Replay.md +91 -0
  136. package/skill/Workflows/Rollback.md +23 -4
  137. package/skill/Workflows/Schedule.md +61 -0
  138. package/skill/Workflows/Sync.md +88 -0
  139. package/skill/Workflows/UnitTest.md +150 -0
  140. package/skill/Workflows/Watch.md +33 -1
  141. package/skill/Workflows/Workflows.md +129 -0
  142. package/skill/assets/activation-rules-default.json +26 -0
  143. package/skill/assets/multi-skill-settings.json +63 -0
  144. package/skill/assets/single-skill-settings.json +57 -0
  145. package/skill/references/invocation-taxonomy.md +2 -2
  146. package/skill/references/logs.md +164 -2
  147. package/skill/references/setup-patterns.md +65 -0
  148. package/skill/references/version-history.md +40 -0
  149. package/skill/settings_snippet.json +23 -0
  150. package/templates/activation-rules-default.json +27 -0
  151. package/templates/multi-skill-settings.json +64 -0
  152. package/templates/single-skill-settings.json +58 -0
  153. package/dashboard/index.html +0 -1119
@@ -0,0 +1,447 @@
1
+ /**
2
+ * Memory writer — pure functions for reading/writing evolution memory files.
3
+ *
4
+ * Memory files live at ~/.selftune/memory/ and provide human-readable session
5
+ * context that survives context resets. Three files:
6
+ * - context.md — active evolutions, known issues
7
+ * - plan.md — current priorities, strategy
8
+ * - decisions.md — append-only decision log
9
+ *
10
+ * All functions accept an optional memoryDir parameter for testability.
11
+ * Default: MEMORY_DIR from constants.
12
+ */
13
+
14
+ import { appendFileSync, existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
15
+ import { join } from "node:path";
16
+
17
+ import { MEMORY_DIR } from "../constants.js";
18
+ import type { EvolveResult } from "../evolution/evolve.js";
19
+ import type { RollbackResult } from "../evolution/rollback.js";
20
+ import type {
21
+ DecisionRecord,
22
+ EvolutionProposal,
23
+ MemoryContext,
24
+ MemoryPlan,
25
+ MonitoringSnapshot,
26
+ } from "../types.js";
27
+
28
+ // ---------------------------------------------------------------------------
29
+ // Directory management
30
+ // ---------------------------------------------------------------------------
31
+
32
+ export function ensureMemoryDir(memoryDir: string = MEMORY_DIR): void {
33
+ if (!existsSync(memoryDir)) {
34
+ mkdirSync(memoryDir, { recursive: true });
35
+ }
36
+ }
37
+
38
+ // ---------------------------------------------------------------------------
39
+ // context.md
40
+ // ---------------------------------------------------------------------------
41
+
42
+ function formatContext(data: MemoryContext): string {
43
+ const lines: string[] = ["# Selftune Context", ""];
44
+
45
+ lines.push("## Active Evolutions");
46
+ if (data.activeEvolutions.length === 0) {
47
+ lines.push("- (none)");
48
+ } else {
49
+ for (const evo of data.activeEvolutions) {
50
+ lines.push(`- ${evo.skillName}: ${evo.status} — ${evo.description}`);
51
+ }
52
+ }
53
+ lines.push("");
54
+
55
+ lines.push("## Known Issues");
56
+ if (data.knownIssues.length === 0) {
57
+ lines.push("- (none)");
58
+ } else {
59
+ for (const issue of data.knownIssues) {
60
+ lines.push(`- ${issue}`);
61
+ }
62
+ }
63
+ lines.push("");
64
+
65
+ lines.push("## Last Updated");
66
+ lines.push(data.lastUpdated);
67
+ lines.push("");
68
+
69
+ return lines.join("\n");
70
+ }
71
+
72
+ function parseContext(content: string): MemoryContext {
73
+ const result: MemoryContext = {
74
+ activeEvolutions: [],
75
+ knownIssues: [],
76
+ lastUpdated: "",
77
+ };
78
+
79
+ const lines = content.split("\n");
80
+ let section = "";
81
+
82
+ for (const line of lines) {
83
+ const trimmed = line.trim();
84
+
85
+ if (trimmed === "## Active Evolutions") {
86
+ section = "evolutions";
87
+ continue;
88
+ }
89
+ if (trimmed === "## Known Issues") {
90
+ section = "issues";
91
+ continue;
92
+ }
93
+ if (trimmed === "## Last Updated") {
94
+ section = "updated";
95
+ continue;
96
+ }
97
+ if (trimmed.startsWith("# ")) {
98
+ section = "";
99
+ continue;
100
+ }
101
+
102
+ if (section === "evolutions" && trimmed.startsWith("- ") && trimmed !== "- (none)") {
103
+ // Format: "- skillName: status — description"
104
+ const body = trimmed.slice(2);
105
+ const colonIdx = body.indexOf(":");
106
+ if (colonIdx === -1) continue;
107
+ const skillName = body.slice(0, colonIdx).trim();
108
+ const rest = body.slice(colonIdx + 1).trim();
109
+ const dashIdx = rest.indexOf("—");
110
+ if (dashIdx === -1) {
111
+ result.activeEvolutions.push({ skillName, status: rest.trim(), description: "" });
112
+ } else {
113
+ const status = rest.slice(0, dashIdx).trim();
114
+ const description = rest.slice(dashIdx + 1).trim();
115
+ result.activeEvolutions.push({ skillName, status, description });
116
+ }
117
+ }
118
+
119
+ if (section === "issues" && trimmed.startsWith("- ") && trimmed !== "- (none)") {
120
+ result.knownIssues.push(trimmed.slice(2));
121
+ }
122
+
123
+ if (section === "updated" && trimmed.length > 0) {
124
+ result.lastUpdated = trimmed;
125
+ section = "";
126
+ }
127
+ }
128
+
129
+ return result;
130
+ }
131
+
132
+ export function writeContext(data: MemoryContext, memoryDir: string = MEMORY_DIR): void {
133
+ ensureMemoryDir(memoryDir);
134
+ const filePath = join(memoryDir, "context.md");
135
+ writeFileSync(filePath, formatContext(data), "utf-8");
136
+ }
137
+
138
+ export function readContext(memoryDir: string = MEMORY_DIR): MemoryContext {
139
+ const filePath = join(memoryDir, "context.md");
140
+ if (!existsSync(filePath)) {
141
+ return { activeEvolutions: [], knownIssues: [], lastUpdated: "" };
142
+ }
143
+ const content = readFileSync(filePath, "utf-8");
144
+ return parseContext(content);
145
+ }
146
+
147
+ // ---------------------------------------------------------------------------
148
+ // plan.md
149
+ // ---------------------------------------------------------------------------
150
+
151
+ function formatPlan(data: MemoryPlan): string {
152
+ const lines: string[] = ["# Evolution Plan", ""];
153
+
154
+ lines.push("## Current Priorities");
155
+ if (data.currentPriorities.length === 0) {
156
+ lines.push("1. (none)");
157
+ } else {
158
+ for (let i = 0; i < data.currentPriorities.length; i++) {
159
+ lines.push(`${i + 1}. ${data.currentPriorities[i]}`);
160
+ }
161
+ }
162
+ lines.push("");
163
+
164
+ lines.push("## Strategy");
165
+ lines.push(data.strategy || "(no strategy defined)");
166
+ lines.push("");
167
+
168
+ lines.push("## Last Updated");
169
+ lines.push(data.lastUpdated);
170
+ lines.push("");
171
+
172
+ return lines.join("\n");
173
+ }
174
+
175
+ function parsePlan(content: string): MemoryPlan {
176
+ const result: MemoryPlan = {
177
+ currentPriorities: [],
178
+ strategy: "",
179
+ lastUpdated: "",
180
+ };
181
+
182
+ const lines = content.split("\n");
183
+ let section = "";
184
+
185
+ for (const line of lines) {
186
+ const trimmed = line.trim();
187
+
188
+ if (trimmed === "## Current Priorities") {
189
+ section = "priorities";
190
+ continue;
191
+ }
192
+ if (trimmed === "## Strategy") {
193
+ section = "strategy";
194
+ continue;
195
+ }
196
+ if (trimmed === "## Last Updated") {
197
+ section = "updated";
198
+ continue;
199
+ }
200
+ if (trimmed.startsWith("# ")) {
201
+ section = "";
202
+ continue;
203
+ }
204
+
205
+ if (section === "priorities") {
206
+ // Format: "1. priority text"
207
+ const match = trimmed.match(/^\d+\.\s+(.+)$/);
208
+ if (match && match[1] !== "(none)") {
209
+ result.currentPriorities.push(match[1]);
210
+ }
211
+ }
212
+
213
+ // Intentionally captures only the first non-empty line as the strategy for simplicity
214
+ if (section === "strategy" && trimmed.length > 0 && trimmed !== "(no strategy defined)") {
215
+ result.strategy = trimmed;
216
+ }
217
+
218
+ if (section === "updated" && trimmed.length > 0) {
219
+ result.lastUpdated = trimmed;
220
+ section = "";
221
+ }
222
+ }
223
+
224
+ return result;
225
+ }
226
+
227
+ export function writePlan(data: MemoryPlan, memoryDir: string = MEMORY_DIR): void {
228
+ ensureMemoryDir(memoryDir);
229
+ const filePath = join(memoryDir, "plan.md");
230
+ writeFileSync(filePath, formatPlan(data), "utf-8");
231
+ }
232
+
233
+ export function readPlan(memoryDir: string = MEMORY_DIR): MemoryPlan {
234
+ const filePath = join(memoryDir, "plan.md");
235
+ if (!existsSync(filePath)) {
236
+ return { currentPriorities: [], strategy: "", lastUpdated: "" };
237
+ }
238
+ const content = readFileSync(filePath, "utf-8");
239
+ return parsePlan(content);
240
+ }
241
+
242
+ // ---------------------------------------------------------------------------
243
+ // decisions.md (append-only)
244
+ // ---------------------------------------------------------------------------
245
+
246
+ function formatDecisionEntry(record: DecisionRecord): string {
247
+ const lines: string[] = [
248
+ `## ${record.timestamp} — ${record.actionType}`,
249
+ `- **Skill:** ${record.skillName}`,
250
+ `- **Action:** ${record.action}`,
251
+ `- **Rationale:** ${record.rationale}`,
252
+ `- **Result:** ${record.result}`,
253
+ "",
254
+ "---",
255
+ "",
256
+ ];
257
+ return lines.join("\n");
258
+ }
259
+
260
+ function parseDecisions(content: string): DecisionRecord[] {
261
+ const records: DecisionRecord[] = [];
262
+ // Split on --- separators
263
+ const blocks = content.split(/^---$/m);
264
+
265
+ for (const block of blocks) {
266
+ const lines = block
267
+ .split("\n")
268
+ .map((l) => l.trim())
269
+ .filter((l) => l.length > 0);
270
+
271
+ let timestamp = "";
272
+ let actionType = "";
273
+ let skillName = "";
274
+ let action: DecisionRecord["action"] = "watched";
275
+ let rationale = "";
276
+ let result = "";
277
+
278
+ for (const line of lines) {
279
+ // Header: "## 2026-03-01T00:00:00Z — evolve"
280
+ const headerMatch = line.match(/^## (.+?) — (.+)$/);
281
+ if (headerMatch) {
282
+ timestamp = headerMatch[1];
283
+ actionType = headerMatch[2];
284
+ continue;
285
+ }
286
+
287
+ if (line.startsWith("- **Skill:**")) {
288
+ skillName = line.replace("- **Skill:**", "").trim();
289
+ } else if (line.startsWith("- **Action:**")) {
290
+ const raw = line.replace("- **Action:**", "").trim();
291
+ if (raw === "evolved" || raw === "rolled-back" || raw === "watched") {
292
+ action = raw;
293
+ }
294
+ } else if (line.startsWith("- **Rationale:**")) {
295
+ rationale = line.replace("- **Rationale:**", "").trim();
296
+ } else if (line.startsWith("- **Result:**")) {
297
+ result = line.replace("- **Result:**", "").trim();
298
+ }
299
+ }
300
+
301
+ if (timestamp && skillName) {
302
+ records.push({ timestamp, actionType, skillName, action, rationale, result });
303
+ }
304
+ }
305
+
306
+ return records;
307
+ }
308
+
309
+ export function appendDecision(record: DecisionRecord, memoryDir: string = MEMORY_DIR): void {
310
+ ensureMemoryDir(memoryDir);
311
+ const filePath = join(memoryDir, "decisions.md");
312
+
313
+ if (!existsSync(filePath)) {
314
+ writeFileSync(filePath, "# Decision Log\n\n", "utf-8");
315
+ }
316
+
317
+ const entry = formatDecisionEntry(record);
318
+ appendFileSync(filePath, entry, "utf-8");
319
+ }
320
+
321
+ export function readDecisions(memoryDir: string = MEMORY_DIR): DecisionRecord[] {
322
+ const filePath = join(memoryDir, "decisions.md");
323
+ if (!existsSync(filePath)) {
324
+ return [];
325
+ }
326
+ const content = readFileSync(filePath, "utf-8");
327
+ return parseDecisions(content);
328
+ }
329
+
330
+ // ---------------------------------------------------------------------------
331
+ // High-level helpers for integration
332
+ // ---------------------------------------------------------------------------
333
+
334
+ export function updateContextAfterEvolve(
335
+ skillName: string,
336
+ proposal: EvolutionProposal,
337
+ result: EvolveResult,
338
+ memoryDir: string = MEMORY_DIR,
339
+ ): void {
340
+ const now = new Date().toISOString();
341
+ const context = readContext(memoryDir);
342
+
343
+ const status = result.deployed ? "deployed" : "failed";
344
+ const description = proposal.rationale || result.reason;
345
+
346
+ // Update or add the evolution entry
347
+ const idx = context.activeEvolutions.findIndex((e) => e.skillName === skillName);
348
+ if (idx >= 0) {
349
+ context.activeEvolutions[idx] = { skillName, status, description };
350
+ } else {
351
+ context.activeEvolutions.push({ skillName, status, description });
352
+ }
353
+
354
+ context.lastUpdated = now;
355
+ writeContext(context, memoryDir);
356
+
357
+ // Append decision
358
+ appendDecision(
359
+ {
360
+ timestamp: now,
361
+ actionType: "evolve",
362
+ skillName,
363
+ action: "evolved",
364
+ rationale: proposal.rationale || "Evolution triggered",
365
+ result: result.reason,
366
+ },
367
+ memoryDir,
368
+ );
369
+ }
370
+
371
+ export function updateContextAfterRollback(
372
+ skillName: string,
373
+ result: RollbackResult,
374
+ memoryDir: string = MEMORY_DIR,
375
+ ): void {
376
+ const now = new Date().toISOString();
377
+ const context = readContext(memoryDir);
378
+
379
+ const status = result.rolledBack ? "rolled-back" : "rollback-failed";
380
+ const description = result.reason;
381
+
382
+ const idx = context.activeEvolutions.findIndex((e) => e.skillName === skillName);
383
+ if (idx >= 0) {
384
+ context.activeEvolutions[idx] = { skillName, status, description };
385
+ } else {
386
+ context.activeEvolutions.push({ skillName, status, description });
387
+ }
388
+
389
+ context.lastUpdated = now;
390
+ writeContext(context, memoryDir);
391
+
392
+ appendDecision(
393
+ {
394
+ timestamp: now,
395
+ actionType: "rollback",
396
+ skillName,
397
+ action: "rolled-back",
398
+ rationale: result.reason,
399
+ result: result.rolledBack ? "Successfully rolled back" : "Rollback failed",
400
+ },
401
+ memoryDir,
402
+ );
403
+ }
404
+
405
+ export function updateContextAfterWatch(
406
+ skillName: string,
407
+ snapshot: MonitoringSnapshot,
408
+ memoryDir: string = MEMORY_DIR,
409
+ ): void {
410
+ const now = new Date().toISOString();
411
+ const context = readContext(memoryDir);
412
+
413
+ const status = snapshot.regression_detected ? "regression" : "healthy";
414
+ const description = `pass_rate=${snapshot.pass_rate.toFixed(2)}, baseline=${snapshot.baseline_pass_rate.toFixed(2)}`;
415
+
416
+ const idx = context.activeEvolutions.findIndex((e) => e.skillName === skillName);
417
+ if (idx >= 0) {
418
+ context.activeEvolutions[idx] = { skillName, status, description };
419
+ } else {
420
+ context.activeEvolutions.push({ skillName, status, description });
421
+ }
422
+
423
+ // Add known issue if regression detected
424
+ if (snapshot.regression_detected) {
425
+ const issue = `Regression detected for ${skillName}: pass_rate=${snapshot.pass_rate.toFixed(2)} below baseline=${snapshot.baseline_pass_rate.toFixed(2)}`;
426
+ if (!context.knownIssues.some((i) => i.includes(skillName) && i.includes("Regression"))) {
427
+ context.knownIssues.push(issue);
428
+ }
429
+ }
430
+
431
+ context.lastUpdated = now;
432
+ writeContext(context, memoryDir);
433
+
434
+ appendDecision(
435
+ {
436
+ timestamp: now,
437
+ actionType: "watch",
438
+ skillName,
439
+ action: "watched",
440
+ rationale: `Monitoring check: pass_rate=${snapshot.pass_rate.toFixed(2)}, regression=${snapshot.regression_detected}`,
441
+ result: snapshot.regression_detected
442
+ ? `Regression detected (pass_rate=${snapshot.pass_rate.toFixed(2)})`
443
+ : `Healthy (pass_rate=${snapshot.pass_rate.toFixed(2)})`,
444
+ },
445
+ memoryDir,
446
+ );
447
+ }
@@ -9,7 +9,10 @@
9
9
  import { parseArgs } from "node:util";
10
10
 
11
11
  import { QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "../constants.js";
12
+ import { classifyInvocation } from "../eval/hooks-to-evals.js";
12
13
  import { getLastDeployedProposal } from "../evolution/audit.js";
14
+ import { updateContextAfterWatch } from "../memory/writer.js";
15
+ import type { SyncResult } from "../sync.js";
13
16
  import type {
14
17
  InvocationType,
15
18
  MonitoringSnapshot,
@@ -18,6 +21,11 @@ import type {
18
21
  SkillUsageRecord,
19
22
  } from "../types.js";
20
23
  import { readJsonl } from "../utils/jsonl.js";
24
+ import {
25
+ filterActionableQueryRecords,
26
+ filterActionableSkillUsageRecords,
27
+ } from "../utils/query-filter.js";
28
+ import { readEffectiveSkillUsageRecords } from "../utils/skill-log.js";
21
29
 
22
30
  // ---------------------------------------------------------------------------
23
31
  // Public interfaces
@@ -40,6 +48,10 @@ export interface WatchOptions {
40
48
  skillPath: string;
41
49
  proposalId?: string;
42
50
  }) => Promise<{ rolledBack: boolean; restoredDescription: string; reason: string }>;
51
+ /** Source-truth refresh before reading logs. */
52
+ syncFirst?: boolean;
53
+ syncForce?: boolean;
54
+ _syncFn?: typeof import("../sync.js").syncSources;
43
55
  }
44
56
 
45
57
  export interface WatchResult {
@@ -47,6 +59,7 @@ export interface WatchResult {
47
59
  alert: string | null;
48
60
  rolledBack: boolean;
49
61
  recommendation: string;
62
+ sync_result?: SyncResult;
50
63
  }
51
64
 
52
65
  // ---------------------------------------------------------------------------
@@ -55,6 +68,7 @@ export interface WatchResult {
55
68
 
56
69
  const DEFAULT_BASELINE_PASS_RATE = 0.5;
57
70
  const DEFAULT_REGRESSION_THRESHOLD = 0.1;
71
+ export const MIN_MONITORING_SKILL_CHECKS = 3;
58
72
 
59
73
  // ---------------------------------------------------------------------------
60
74
  // computeMonitoringSnapshot - pure function
@@ -64,9 +78,9 @@ const DEFAULT_REGRESSION_THRESHOLD = 0.1;
64
78
  * Compute a monitoring snapshot from raw log records.
65
79
  *
66
80
  * The function windows telemetry to the last `windowSessions` entries, then
67
- * scopes skill and query records to those sessions. If telemetry is empty or
68
- * no records match the windowed session IDs, all provided skill/query records
69
- * are used directly (unfiltered by session).
81
+ * scopes skill and actionable query records to those sessions. If telemetry is
82
+ * empty or no records match the windowed session IDs, all provided skill/query
83
+ * records are used directly (unfiltered by session).
70
84
  *
71
85
  * @param skillName - The skill to monitor
72
86
  * @param telemetry - All session telemetry records
@@ -86,43 +100,53 @@ export function computeMonitoringSnapshot(
86
100
  regressionThreshold: number = DEFAULT_REGRESSION_THRESHOLD,
87
101
  ): MonitoringSnapshot {
88
102
  // 1. Window the telemetry to the last N sessions (by array order, assumed chronological)
103
+ const actionableSkillRecords = filterActionableSkillUsageRecords(skillRecords);
104
+ const actionableQueryRecords = filterActionableQueryRecords(queryRecords);
89
105
  const windowedTelemetry = telemetry.slice(-windowSessions);
90
106
  const windowedSessionIds = new Set(windowedTelemetry.map((t) => t.session_id));
91
107
 
92
108
  // 2. Filter skill records by skill name first
93
- const skillNameFiltered = skillRecords.filter((r) => r.skill_name === skillName);
109
+ const skillNameFiltered = actionableSkillRecords.filter((r) => r.skill_name === skillName);
94
110
 
95
111
  // 3. Apply session ID windowing only if telemetry is present and overlaps
96
112
  const hasSessionOverlap =
97
113
  windowedSessionIds.size > 0 &&
98
114
  (skillNameFiltered.some((r) => windowedSessionIds.has(r.session_id)) ||
99
- queryRecords.some((r) => windowedSessionIds.has(r.session_id)));
115
+ actionableQueryRecords.some((r) => windowedSessionIds.has(r.session_id)));
100
116
 
101
117
  const filteredSkillRecords = hasSessionOverlap
102
118
  ? skillNameFiltered.filter((r) => windowedSessionIds.has(r.session_id))
103
119
  : skillNameFiltered;
104
-
105
120
  const filteredQueryRecords = hasSessionOverlap
106
- ? queryRecords.filter((r) => windowedSessionIds.has(r.session_id))
107
- : queryRecords;
121
+ ? actionableQueryRecords.filter((r) => windowedSessionIds.has(r.session_id))
122
+ : actionableQueryRecords;
108
123
 
109
- // 4. Compute pass rate: triggered_count / total_query_count
124
+ // 4. Compute pass rate from explicit skill checks, not from all queries.
110
125
  const triggeredCount = filteredSkillRecords.filter((r) => r.triggered).length;
111
- const totalQueries = filteredQueryRecords.length;
112
- const passRate = totalQueries === 0 ? 1.0 : triggeredCount / totalQueries;
126
+ const totalSkillChecks = filteredSkillRecords.length;
127
+ const passRate = totalSkillChecks === 0 ? 0 : triggeredCount / totalSkillChecks;
113
128
 
114
129
  // 5. Compute false negative rate from skill usage records
115
- const totalSkillChecks = filteredSkillRecords.length;
116
130
  const falseNegatives = filteredSkillRecords.filter((r) => !r.triggered).length;
117
131
  const falseNegativeRate = totalSkillChecks === 0 ? 0 : falseNegatives / totalSkillChecks;
118
132
 
119
- // 6. by_invocation_type: MVP classifies everything as "implicit"
133
+ // 6. by_invocation_type: classify each skill record using classifyInvocation
120
134
  const byInvocationType: Record<InvocationType, { passed: number; total: number }> = {
121
135
  explicit: { passed: 0, total: 0 },
122
- implicit: { passed: triggeredCount, total: totalSkillChecks },
136
+ implicit: { passed: 0, total: 0 },
123
137
  contextual: { passed: 0, total: 0 },
124
138
  negative: { passed: 0, total: 0 },
125
139
  };
140
+ for (const record of filteredSkillRecords) {
141
+ const invType = classifyInvocation(
142
+ typeof record.query === "string" ? record.query : "",
143
+ skillName,
144
+ );
145
+ byInvocationType[invType].total++;
146
+ if (record.triggered) {
147
+ byInvocationType[invType].passed++;
148
+ }
149
+ }
126
150
 
127
151
  // 7. Regression detection: pass_rate < baseline - threshold
128
152
  // Use rounding to avoid floating-point boundary issues (e.g. 0.8 - 0.1 = 0.7000000000000001)
@@ -130,12 +154,16 @@ export function computeMonitoringSnapshot(
130
154
  const adjustedThreshold =
131
155
  Math.round((baselinePassRate - regressionThreshold) * precision) / precision;
132
156
  const roundedPassRate = Math.round(passRate * precision) / precision;
133
- const regressionDetected = roundedPassRate < adjustedThreshold;
157
+ const hasEnoughSignalForRegression =
158
+ totalSkillChecks >= MIN_MONITORING_SKILL_CHECKS ||
159
+ (totalSkillChecks === 0 && filteredQueryRecords.length >= MIN_MONITORING_SKILL_CHECKS);
160
+ const regressionDetected = hasEnoughSignalForRegression && roundedPassRate < adjustedThreshold;
134
161
 
135
162
  return {
136
163
  timestamp: new Date().toISOString(),
137
164
  skill_name: skillName,
138
165
  window_sessions: windowSessions,
166
+ skill_checks: totalSkillChecks,
139
167
  pass_rate: passRate,
140
168
  false_negative_rate: falseNegativeRate,
141
169
  by_invocation_type: byInvocationType,
@@ -163,11 +191,28 @@ export async function watch(options: WatchOptions): Promise<WatchResult> {
163
191
  _queryLogPath = QUERY_LOG,
164
192
  _auditLogPath,
165
193
  _rollbackFn,
194
+ syncFirst = false,
195
+ syncForce = false,
196
+ _syncFn,
166
197
  } = options;
167
198
 
199
+ let syncResult: SyncResult | undefined;
200
+ if (syncFirst) {
201
+ const { createDefaultSyncOptions, syncSources: realSyncSources } = await import("../sync.js");
202
+ const syncRunner = _syncFn ?? realSyncSources;
203
+ syncResult = syncRunner(
204
+ createDefaultSyncOptions({
205
+ force: syncForce,
206
+ }),
207
+ );
208
+ }
209
+
168
210
  // 1. Read log files
169
211
  const telemetry = readJsonl<SessionTelemetryRecord>(_telemetryLogPath);
170
- const skillRecords = readJsonl<SkillUsageRecord>(_skillLogPath);
212
+ const skillRecords =
213
+ _skillLogPath === SKILL_LOG
214
+ ? readEffectiveSkillUsageRecords()
215
+ : readJsonl<SkillUsageRecord>(_skillLogPath);
171
216
  const queryRecords = readJsonl<QueryLogRecord>(_queryLogPath);
172
217
 
173
218
  // 2. Determine baseline pass rate from last deployed audit entry
@@ -208,15 +253,34 @@ export async function watch(options: WatchOptions): Promise<WatchResult> {
208
253
  recommendation = rolledBack
209
254
  ? `Rolled back "${skillName}" to previous version. Monitor to confirm recovery.`
210
255
  : `Consider running: selftune rollback --skill "${skillName}" --skill-path "${skillPath}"`;
256
+ } else if (snapshot.skill_checks < MIN_MONITORING_SKILL_CHECKS) {
257
+ recommendation =
258
+ `Skill "${skillName}" has only ${snapshot.skill_checks} actionable check(s) in the current window. ` +
259
+ `Need at least ${MIN_MONITORING_SKILL_CHECKS} before calling it stable.`;
211
260
  } else {
212
261
  recommendation = `Skill "${skillName}" is stable. Pass rate ${snapshot.pass_rate.toFixed(2)} is within acceptable range of baseline ${baselinePassRate.toFixed(2)}.`;
213
262
  }
214
263
 
264
+ // Update evolution memory (fail-open)
265
+ try {
266
+ updateContextAfterWatch(skillName, snapshot);
267
+ } catch (err) {
268
+ // Fail-open: memory writes should never fail the main operation
269
+ console.error(
270
+ JSON.stringify({
271
+ level: "debug",
272
+ code: "memory_write_failed",
273
+ message: `Failed to update memory after watch for "${skillName}": ${err instanceof Error ? err.message : String(err)}`,
274
+ }),
275
+ );
276
+ }
277
+
215
278
  return {
216
279
  snapshot,
217
280
  alert,
218
281
  rolledBack,
219
282
  recommendation,
283
+ ...(syncResult ? { sync_result: syncResult } : {}),
220
284
  };
221
285
  }
222
286
 
@@ -260,6 +324,8 @@ export async function cliMain(): Promise<void> {
260
324
  window: { type: "string", default: "20" },
261
325
  threshold: { type: "string", default: "0.1" },
262
326
  "auto-rollback": { type: "boolean", default: false },
327
+ "sync-first": { type: "boolean", default: false },
328
+ "sync-force": { type: "boolean", default: false },
263
329
  help: { type: "boolean", default: false },
264
330
  },
265
331
  strict: true,
@@ -277,6 +343,8 @@ Options:
277
343
  --window Number of recent sessions to consider (default: 20)
278
344
  --threshold Regression threshold below baseline (default: 0.1)
279
345
  --auto-rollback Automatically rollback on regression detection
346
+ --sync-first Refresh source-truth telemetry before reading watch inputs
347
+ --sync-force Force a full rescan during --sync-first
280
348
  --help Show this help message`);
281
349
  process.exit(0);
282
350
  }
@@ -285,6 +353,10 @@ Options:
285
353
  console.error("[ERROR] --skill and --skill-path are required");
286
354
  process.exit(1);
287
355
  }
356
+ if ((values["sync-force"] ?? false) && !(values["sync-first"] ?? false)) {
357
+ console.error("[ERROR] --sync-force requires --sync-first");
358
+ process.exit(1);
359
+ }
288
360
 
289
361
  const rawWindow = values.window ?? "20";
290
362
  if (!/^\d+$/.test(rawWindow)) {
@@ -314,6 +386,8 @@ Options:
314
386
  windowSessions,
315
387
  regressionThreshold,
316
388
  autoRollback: values["auto-rollback"] ?? false,
389
+ syncFirst: values["sync-first"] ?? false,
390
+ syncForce: values["sync-force"] ?? false,
317
391
  });
318
392
 
319
393
  console.log(JSON.stringify(result, null, 2));