selftune 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/diagnosis-analyst.md +146 -0
- package/.claude/agents/evolution-reviewer.md +167 -0
- package/.claude/agents/integration-guide.md +200 -0
- package/.claude/agents/pattern-analyst.md +147 -0
- package/CHANGELOG.md +37 -0
- package/README.md +96 -256
- package/assets/BeforeAfter.gif +0 -0
- package/assets/FeedbackLoop.gif +0 -0
- package/assets/logo.svg +9 -0
- package/assets/skill-health-badge.svg +20 -0
- package/cli/selftune/activation-rules.ts +171 -0
- package/cli/selftune/badge/badge-data.ts +108 -0
- package/cli/selftune/badge/badge-svg.ts +212 -0
- package/cli/selftune/badge/badge.ts +103 -0
- package/cli/selftune/constants.ts +75 -1
- package/cli/selftune/contribute/bundle.ts +314 -0
- package/cli/selftune/contribute/contribute.ts +214 -0
- package/cli/selftune/contribute/sanitize.ts +162 -0
- package/cli/selftune/cron/setup.ts +266 -0
- package/cli/selftune/dashboard-server.ts +582 -0
- package/cli/selftune/dashboard.ts +25 -3
- package/cli/selftune/eval/baseline.ts +247 -0
- package/cli/selftune/eval/composability.ts +117 -0
- package/cli/selftune/eval/generate-unit-tests.ts +143 -0
- package/cli/selftune/eval/hooks-to-evals.ts +68 -2
- package/cli/selftune/eval/import-skillsbench.ts +221 -0
- package/cli/selftune/eval/synthetic-evals.ts +172 -0
- package/cli/selftune/eval/unit-test-cli.ts +152 -0
- package/cli/selftune/eval/unit-test.ts +196 -0
- package/cli/selftune/evolution/deploy-proposal.ts +142 -1
- package/cli/selftune/evolution/evolve-body.ts +492 -0
- package/cli/selftune/evolution/evolve.ts +466 -103
- package/cli/selftune/evolution/extract-patterns.ts +32 -1
- package/cli/selftune/evolution/pareto.ts +314 -0
- package/cli/selftune/evolution/propose-body.ts +171 -0
- package/cli/selftune/evolution/propose-description.ts +100 -2
- package/cli/selftune/evolution/propose-routing.ts +166 -0
- package/cli/selftune/evolution/refine-body.ts +141 -0
- package/cli/selftune/evolution/rollback.ts +19 -2
- package/cli/selftune/evolution/validate-body.ts +254 -0
- package/cli/selftune/evolution/validate-proposal.ts +257 -35
- package/cli/selftune/evolution/validate-routing.ts +177 -0
- package/cli/selftune/grading/grade-session.ts +138 -18
- package/cli/selftune/grading/pre-gates.ts +104 -0
- package/cli/selftune/hooks/auto-activate.ts +185 -0
- package/cli/selftune/hooks/evolution-guard.ts +165 -0
- package/cli/selftune/hooks/skill-change-guard.ts +112 -0
- package/cli/selftune/index.ts +88 -0
- package/cli/selftune/ingestors/claude-replay.ts +351 -0
- package/cli/selftune/ingestors/openclaw-ingest.ts +440 -0
- package/cli/selftune/init.ts +150 -3
- package/cli/selftune/memory/writer.ts +447 -0
- package/cli/selftune/monitoring/watch.ts +25 -2
- package/cli/selftune/status.ts +17 -13
- package/cli/selftune/types.ts +377 -5
- package/cli/selftune/utils/frontmatter.ts +217 -0
- package/cli/selftune/utils/llm-call.ts +29 -3
- package/cli/selftune/utils/transcript.ts +35 -0
- package/cli/selftune/utils/trigger-check.ts +89 -0
- package/cli/selftune/utils/tui.ts +156 -0
- package/dashboard/index.html +569 -8
- package/package.json +8 -4
- package/skill/SKILL.md +124 -8
- package/skill/Workflows/AutoActivation.md +144 -0
- package/skill/Workflows/Badge.md +118 -0
- package/skill/Workflows/Baseline.md +121 -0
- package/skill/Workflows/Composability.md +100 -0
- package/skill/Workflows/Contribute.md +91 -0
- package/skill/Workflows/Cron.md +155 -0
- package/skill/Workflows/Dashboard.md +203 -0
- package/skill/Workflows/Doctor.md +37 -1
- package/skill/Workflows/Evals.md +69 -1
- package/skill/Workflows/EvolutionMemory.md +152 -0
- package/skill/Workflows/Evolve.md +111 -6
- package/skill/Workflows/EvolveBody.md +159 -0
- package/skill/Workflows/ImportSkillsBench.md +111 -0
- package/skill/Workflows/Ingest.md +117 -3
- package/skill/Workflows/Initialize.md +57 -3
- package/skill/Workflows/Replay.md +70 -0
- package/skill/Workflows/Rollback.md +20 -1
- package/skill/Workflows/UnitTest.md +138 -0
- package/skill/Workflows/Watch.md +22 -0
- package/skill/settings_snippet.json +23 -0
- package/templates/activation-rules-default.json +27 -0
- package/templates/multi-skill-settings.json +64 -0
- package/templates/single-skill-settings.json +58 -0
|
@@ -0,0 +1,447 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory writer — pure functions for reading/writing evolution memory files.
|
|
3
|
+
*
|
|
4
|
+
* Memory files live at ~/.selftune/memory/ and provide human-readable session
|
|
5
|
+
* context that survives context resets. Three files:
|
|
6
|
+
* - context.md — active evolutions, known issues
|
|
7
|
+
* - plan.md — current priorities, strategy
|
|
8
|
+
* - decisions.md — append-only decision log
|
|
9
|
+
*
|
|
10
|
+
* All functions accept an optional memoryDir parameter for testability.
|
|
11
|
+
* Default: MEMORY_DIR from constants.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { appendFileSync, existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
15
|
+
import { join } from "node:path";
|
|
16
|
+
|
|
17
|
+
import { MEMORY_DIR } from "../constants.js";
|
|
18
|
+
import type { EvolveResult } from "../evolution/evolve.js";
|
|
19
|
+
import type { RollbackResult } from "../evolution/rollback.js";
|
|
20
|
+
import type {
|
|
21
|
+
DecisionRecord,
|
|
22
|
+
EvolutionProposal,
|
|
23
|
+
MemoryContext,
|
|
24
|
+
MemoryPlan,
|
|
25
|
+
MonitoringSnapshot,
|
|
26
|
+
} from "../types.js";
|
|
27
|
+
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
// Directory management
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
export function ensureMemoryDir(memoryDir: string = MEMORY_DIR): void {
|
|
33
|
+
if (!existsSync(memoryDir)) {
|
|
34
|
+
mkdirSync(memoryDir, { recursive: true });
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
// context.md
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
function formatContext(data: MemoryContext): string {
|
|
43
|
+
const lines: string[] = ["# Selftune Context", ""];
|
|
44
|
+
|
|
45
|
+
lines.push("## Active Evolutions");
|
|
46
|
+
if (data.activeEvolutions.length === 0) {
|
|
47
|
+
lines.push("- (none)");
|
|
48
|
+
} else {
|
|
49
|
+
for (const evo of data.activeEvolutions) {
|
|
50
|
+
lines.push(`- ${evo.skillName}: ${evo.status} — ${evo.description}`);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
lines.push("");
|
|
54
|
+
|
|
55
|
+
lines.push("## Known Issues");
|
|
56
|
+
if (data.knownIssues.length === 0) {
|
|
57
|
+
lines.push("- (none)");
|
|
58
|
+
} else {
|
|
59
|
+
for (const issue of data.knownIssues) {
|
|
60
|
+
lines.push(`- ${issue}`);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
lines.push("");
|
|
64
|
+
|
|
65
|
+
lines.push("## Last Updated");
|
|
66
|
+
lines.push(data.lastUpdated);
|
|
67
|
+
lines.push("");
|
|
68
|
+
|
|
69
|
+
return lines.join("\n");
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function parseContext(content: string): MemoryContext {
|
|
73
|
+
const result: MemoryContext = {
|
|
74
|
+
activeEvolutions: [],
|
|
75
|
+
knownIssues: [],
|
|
76
|
+
lastUpdated: "",
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
const lines = content.split("\n");
|
|
80
|
+
let section = "";
|
|
81
|
+
|
|
82
|
+
for (const line of lines) {
|
|
83
|
+
const trimmed = line.trim();
|
|
84
|
+
|
|
85
|
+
if (trimmed === "## Active Evolutions") {
|
|
86
|
+
section = "evolutions";
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
if (trimmed === "## Known Issues") {
|
|
90
|
+
section = "issues";
|
|
91
|
+
continue;
|
|
92
|
+
}
|
|
93
|
+
if (trimmed === "## Last Updated") {
|
|
94
|
+
section = "updated";
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
if (trimmed.startsWith("# ")) {
|
|
98
|
+
section = "";
|
|
99
|
+
continue;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (section === "evolutions" && trimmed.startsWith("- ") && trimmed !== "- (none)") {
|
|
103
|
+
// Format: "- skillName: status — description"
|
|
104
|
+
const body = trimmed.slice(2);
|
|
105
|
+
const colonIdx = body.indexOf(":");
|
|
106
|
+
if (colonIdx === -1) continue;
|
|
107
|
+
const skillName = body.slice(0, colonIdx).trim();
|
|
108
|
+
const rest = body.slice(colonIdx + 1).trim();
|
|
109
|
+
const dashIdx = rest.indexOf("—");
|
|
110
|
+
if (dashIdx === -1) {
|
|
111
|
+
result.activeEvolutions.push({ skillName, status: rest.trim(), description: "" });
|
|
112
|
+
} else {
|
|
113
|
+
const status = rest.slice(0, dashIdx).trim();
|
|
114
|
+
const description = rest.slice(dashIdx + 1).trim();
|
|
115
|
+
result.activeEvolutions.push({ skillName, status, description });
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (section === "issues" && trimmed.startsWith("- ") && trimmed !== "- (none)") {
|
|
120
|
+
result.knownIssues.push(trimmed.slice(2));
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if (section === "updated" && trimmed.length > 0) {
|
|
124
|
+
result.lastUpdated = trimmed;
|
|
125
|
+
section = "";
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
return result;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
export function writeContext(data: MemoryContext, memoryDir: string = MEMORY_DIR): void {
|
|
133
|
+
ensureMemoryDir(memoryDir);
|
|
134
|
+
const filePath = join(memoryDir, "context.md");
|
|
135
|
+
writeFileSync(filePath, formatContext(data), "utf-8");
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
export function readContext(memoryDir: string = MEMORY_DIR): MemoryContext {
|
|
139
|
+
const filePath = join(memoryDir, "context.md");
|
|
140
|
+
if (!existsSync(filePath)) {
|
|
141
|
+
return { activeEvolutions: [], knownIssues: [], lastUpdated: "" };
|
|
142
|
+
}
|
|
143
|
+
const content = readFileSync(filePath, "utf-8");
|
|
144
|
+
return parseContext(content);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// ---------------------------------------------------------------------------
|
|
148
|
+
// plan.md
|
|
149
|
+
// ---------------------------------------------------------------------------
|
|
150
|
+
|
|
151
|
+
function formatPlan(data: MemoryPlan): string {
|
|
152
|
+
const lines: string[] = ["# Evolution Plan", ""];
|
|
153
|
+
|
|
154
|
+
lines.push("## Current Priorities");
|
|
155
|
+
if (data.currentPriorities.length === 0) {
|
|
156
|
+
lines.push("1. (none)");
|
|
157
|
+
} else {
|
|
158
|
+
for (let i = 0; i < data.currentPriorities.length; i++) {
|
|
159
|
+
lines.push(`${i + 1}. ${data.currentPriorities[i]}`);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
lines.push("");
|
|
163
|
+
|
|
164
|
+
lines.push("## Strategy");
|
|
165
|
+
lines.push(data.strategy || "(no strategy defined)");
|
|
166
|
+
lines.push("");
|
|
167
|
+
|
|
168
|
+
lines.push("## Last Updated");
|
|
169
|
+
lines.push(data.lastUpdated);
|
|
170
|
+
lines.push("");
|
|
171
|
+
|
|
172
|
+
return lines.join("\n");
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
function parsePlan(content: string): MemoryPlan {
|
|
176
|
+
const result: MemoryPlan = {
|
|
177
|
+
currentPriorities: [],
|
|
178
|
+
strategy: "",
|
|
179
|
+
lastUpdated: "",
|
|
180
|
+
};
|
|
181
|
+
|
|
182
|
+
const lines = content.split("\n");
|
|
183
|
+
let section = "";
|
|
184
|
+
|
|
185
|
+
for (const line of lines) {
|
|
186
|
+
const trimmed = line.trim();
|
|
187
|
+
|
|
188
|
+
if (trimmed === "## Current Priorities") {
|
|
189
|
+
section = "priorities";
|
|
190
|
+
continue;
|
|
191
|
+
}
|
|
192
|
+
if (trimmed === "## Strategy") {
|
|
193
|
+
section = "strategy";
|
|
194
|
+
continue;
|
|
195
|
+
}
|
|
196
|
+
if (trimmed === "## Last Updated") {
|
|
197
|
+
section = "updated";
|
|
198
|
+
continue;
|
|
199
|
+
}
|
|
200
|
+
if (trimmed.startsWith("# ")) {
|
|
201
|
+
section = "";
|
|
202
|
+
continue;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
if (section === "priorities") {
|
|
206
|
+
// Format: "1. priority text"
|
|
207
|
+
const match = trimmed.match(/^\d+\.\s+(.+)$/);
|
|
208
|
+
if (match && match[1] !== "(none)") {
|
|
209
|
+
result.currentPriorities.push(match[1]);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// Intentionally captures only the first non-empty line as the strategy for simplicity
|
|
214
|
+
if (section === "strategy" && trimmed.length > 0 && trimmed !== "(no strategy defined)") {
|
|
215
|
+
result.strategy = trimmed;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
if (section === "updated" && trimmed.length > 0) {
|
|
219
|
+
result.lastUpdated = trimmed;
|
|
220
|
+
section = "";
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
return result;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
export function writePlan(data: MemoryPlan, memoryDir: string = MEMORY_DIR): void {
|
|
228
|
+
ensureMemoryDir(memoryDir);
|
|
229
|
+
const filePath = join(memoryDir, "plan.md");
|
|
230
|
+
writeFileSync(filePath, formatPlan(data), "utf-8");
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
export function readPlan(memoryDir: string = MEMORY_DIR): MemoryPlan {
|
|
234
|
+
const filePath = join(memoryDir, "plan.md");
|
|
235
|
+
if (!existsSync(filePath)) {
|
|
236
|
+
return { currentPriorities: [], strategy: "", lastUpdated: "" };
|
|
237
|
+
}
|
|
238
|
+
const content = readFileSync(filePath, "utf-8");
|
|
239
|
+
return parsePlan(content);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// ---------------------------------------------------------------------------
|
|
243
|
+
// decisions.md (append-only)
|
|
244
|
+
// ---------------------------------------------------------------------------
|
|
245
|
+
|
|
246
|
+
function formatDecisionEntry(record: DecisionRecord): string {
|
|
247
|
+
const lines: string[] = [
|
|
248
|
+
`## ${record.timestamp} — ${record.actionType}`,
|
|
249
|
+
`- **Skill:** ${record.skillName}`,
|
|
250
|
+
`- **Action:** ${record.action}`,
|
|
251
|
+
`- **Rationale:** ${record.rationale}`,
|
|
252
|
+
`- **Result:** ${record.result}`,
|
|
253
|
+
"",
|
|
254
|
+
"---",
|
|
255
|
+
"",
|
|
256
|
+
];
|
|
257
|
+
return lines.join("\n");
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
function parseDecisions(content: string): DecisionRecord[] {
|
|
261
|
+
const records: DecisionRecord[] = [];
|
|
262
|
+
// Split on --- separators
|
|
263
|
+
const blocks = content.split(/^---$/m);
|
|
264
|
+
|
|
265
|
+
for (const block of blocks) {
|
|
266
|
+
const lines = block
|
|
267
|
+
.split("\n")
|
|
268
|
+
.map((l) => l.trim())
|
|
269
|
+
.filter((l) => l.length > 0);
|
|
270
|
+
|
|
271
|
+
let timestamp = "";
|
|
272
|
+
let actionType = "";
|
|
273
|
+
let skillName = "";
|
|
274
|
+
let action: DecisionRecord["action"] = "watched";
|
|
275
|
+
let rationale = "";
|
|
276
|
+
let result = "";
|
|
277
|
+
|
|
278
|
+
for (const line of lines) {
|
|
279
|
+
// Header: "## 2026-03-01T00:00:00Z — evolve"
|
|
280
|
+
const headerMatch = line.match(/^## (.+?) — (.+)$/);
|
|
281
|
+
if (headerMatch) {
|
|
282
|
+
timestamp = headerMatch[1];
|
|
283
|
+
actionType = headerMatch[2];
|
|
284
|
+
continue;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
if (line.startsWith("- **Skill:**")) {
|
|
288
|
+
skillName = line.replace("- **Skill:**", "").trim();
|
|
289
|
+
} else if (line.startsWith("- **Action:**")) {
|
|
290
|
+
const raw = line.replace("- **Action:**", "").trim();
|
|
291
|
+
if (raw === "evolved" || raw === "rolled-back" || raw === "watched") {
|
|
292
|
+
action = raw;
|
|
293
|
+
}
|
|
294
|
+
} else if (line.startsWith("- **Rationale:**")) {
|
|
295
|
+
rationale = line.replace("- **Rationale:**", "").trim();
|
|
296
|
+
} else if (line.startsWith("- **Result:**")) {
|
|
297
|
+
result = line.replace("- **Result:**", "").trim();
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
if (timestamp && skillName) {
|
|
302
|
+
records.push({ timestamp, actionType, skillName, action, rationale, result });
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
return records;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
export function appendDecision(record: DecisionRecord, memoryDir: string = MEMORY_DIR): void {
|
|
310
|
+
ensureMemoryDir(memoryDir);
|
|
311
|
+
const filePath = join(memoryDir, "decisions.md");
|
|
312
|
+
|
|
313
|
+
if (!existsSync(filePath)) {
|
|
314
|
+
writeFileSync(filePath, "# Decision Log\n\n", "utf-8");
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
const entry = formatDecisionEntry(record);
|
|
318
|
+
appendFileSync(filePath, entry, "utf-8");
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
export function readDecisions(memoryDir: string = MEMORY_DIR): DecisionRecord[] {
|
|
322
|
+
const filePath = join(memoryDir, "decisions.md");
|
|
323
|
+
if (!existsSync(filePath)) {
|
|
324
|
+
return [];
|
|
325
|
+
}
|
|
326
|
+
const content = readFileSync(filePath, "utf-8");
|
|
327
|
+
return parseDecisions(content);
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// ---------------------------------------------------------------------------
|
|
331
|
+
// High-level helpers for integration
|
|
332
|
+
// ---------------------------------------------------------------------------
|
|
333
|
+
|
|
334
|
+
export function updateContextAfterEvolve(
|
|
335
|
+
skillName: string,
|
|
336
|
+
proposal: EvolutionProposal,
|
|
337
|
+
result: EvolveResult,
|
|
338
|
+
memoryDir: string = MEMORY_DIR,
|
|
339
|
+
): void {
|
|
340
|
+
const now = new Date().toISOString();
|
|
341
|
+
const context = readContext(memoryDir);
|
|
342
|
+
|
|
343
|
+
const status = result.deployed ? "deployed" : "failed";
|
|
344
|
+
const description = proposal.rationale || result.reason;
|
|
345
|
+
|
|
346
|
+
// Update or add the evolution entry
|
|
347
|
+
const idx = context.activeEvolutions.findIndex((e) => e.skillName === skillName);
|
|
348
|
+
if (idx >= 0) {
|
|
349
|
+
context.activeEvolutions[idx] = { skillName, status, description };
|
|
350
|
+
} else {
|
|
351
|
+
context.activeEvolutions.push({ skillName, status, description });
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
context.lastUpdated = now;
|
|
355
|
+
writeContext(context, memoryDir);
|
|
356
|
+
|
|
357
|
+
// Append decision
|
|
358
|
+
appendDecision(
|
|
359
|
+
{
|
|
360
|
+
timestamp: now,
|
|
361
|
+
actionType: "evolve",
|
|
362
|
+
skillName,
|
|
363
|
+
action: "evolved",
|
|
364
|
+
rationale: proposal.rationale || "Evolution triggered",
|
|
365
|
+
result: result.reason,
|
|
366
|
+
},
|
|
367
|
+
memoryDir,
|
|
368
|
+
);
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
export function updateContextAfterRollback(
|
|
372
|
+
skillName: string,
|
|
373
|
+
result: RollbackResult,
|
|
374
|
+
memoryDir: string = MEMORY_DIR,
|
|
375
|
+
): void {
|
|
376
|
+
const now = new Date().toISOString();
|
|
377
|
+
const context = readContext(memoryDir);
|
|
378
|
+
|
|
379
|
+
const status = result.rolledBack ? "rolled-back" : "rollback-failed";
|
|
380
|
+
const description = result.reason;
|
|
381
|
+
|
|
382
|
+
const idx = context.activeEvolutions.findIndex((e) => e.skillName === skillName);
|
|
383
|
+
if (idx >= 0) {
|
|
384
|
+
context.activeEvolutions[idx] = { skillName, status, description };
|
|
385
|
+
} else {
|
|
386
|
+
context.activeEvolutions.push({ skillName, status, description });
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
context.lastUpdated = now;
|
|
390
|
+
writeContext(context, memoryDir);
|
|
391
|
+
|
|
392
|
+
appendDecision(
|
|
393
|
+
{
|
|
394
|
+
timestamp: now,
|
|
395
|
+
actionType: "rollback",
|
|
396
|
+
skillName,
|
|
397
|
+
action: "rolled-back",
|
|
398
|
+
rationale: result.reason,
|
|
399
|
+
result: result.rolledBack ? "Successfully rolled back" : "Rollback failed",
|
|
400
|
+
},
|
|
401
|
+
memoryDir,
|
|
402
|
+
);
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
export function updateContextAfterWatch(
|
|
406
|
+
skillName: string,
|
|
407
|
+
snapshot: MonitoringSnapshot,
|
|
408
|
+
memoryDir: string = MEMORY_DIR,
|
|
409
|
+
): void {
|
|
410
|
+
const now = new Date().toISOString();
|
|
411
|
+
const context = readContext(memoryDir);
|
|
412
|
+
|
|
413
|
+
const status = snapshot.regression_detected ? "regression" : "healthy";
|
|
414
|
+
const description = `pass_rate=${snapshot.pass_rate.toFixed(2)}, baseline=${snapshot.baseline_pass_rate.toFixed(2)}`;
|
|
415
|
+
|
|
416
|
+
const idx = context.activeEvolutions.findIndex((e) => e.skillName === skillName);
|
|
417
|
+
if (idx >= 0) {
|
|
418
|
+
context.activeEvolutions[idx] = { skillName, status, description };
|
|
419
|
+
} else {
|
|
420
|
+
context.activeEvolutions.push({ skillName, status, description });
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
// Add known issue if regression detected
|
|
424
|
+
if (snapshot.regression_detected) {
|
|
425
|
+
const issue = `Regression detected for ${skillName}: pass_rate=${snapshot.pass_rate.toFixed(2)} below baseline=${snapshot.baseline_pass_rate.toFixed(2)}`;
|
|
426
|
+
if (!context.knownIssues.some((i) => i.includes(skillName) && i.includes("Regression"))) {
|
|
427
|
+
context.knownIssues.push(issue);
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
context.lastUpdated = now;
|
|
432
|
+
writeContext(context, memoryDir);
|
|
433
|
+
|
|
434
|
+
appendDecision(
|
|
435
|
+
{
|
|
436
|
+
timestamp: now,
|
|
437
|
+
actionType: "watch",
|
|
438
|
+
skillName,
|
|
439
|
+
action: "watched",
|
|
440
|
+
rationale: `Monitoring check: pass_rate=${snapshot.pass_rate.toFixed(2)}, regression=${snapshot.regression_detected}`,
|
|
441
|
+
result: snapshot.regression_detected
|
|
442
|
+
? `Regression detected (pass_rate=${snapshot.pass_rate.toFixed(2)})`
|
|
443
|
+
: `Healthy (pass_rate=${snapshot.pass_rate.toFixed(2)})`,
|
|
444
|
+
},
|
|
445
|
+
memoryDir,
|
|
446
|
+
);
|
|
447
|
+
}
|
|
@@ -9,7 +9,9 @@
|
|
|
9
9
|
import { parseArgs } from "node:util";
|
|
10
10
|
|
|
11
11
|
import { QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "../constants.js";
|
|
12
|
+
import { classifyInvocation } from "../eval/hooks-to-evals.js";
|
|
12
13
|
import { getLastDeployedProposal } from "../evolution/audit.js";
|
|
14
|
+
import { updateContextAfterWatch } from "../memory/writer.js";
|
|
13
15
|
import type {
|
|
14
16
|
InvocationType,
|
|
15
17
|
MonitoringSnapshot,
|
|
@@ -116,13 +118,20 @@ export function computeMonitoringSnapshot(
|
|
|
116
118
|
const falseNegatives = filteredSkillRecords.filter((r) => !r.triggered).length;
|
|
117
119
|
const falseNegativeRate = totalSkillChecks === 0 ? 0 : falseNegatives / totalSkillChecks;
|
|
118
120
|
|
|
119
|
-
// 6. by_invocation_type:
|
|
121
|
+
// 6. by_invocation_type: classify each skill record using classifyInvocation
|
|
120
122
|
const byInvocationType: Record<InvocationType, { passed: number; total: number }> = {
|
|
121
123
|
explicit: { passed: 0, total: 0 },
|
|
122
|
-
implicit: { passed:
|
|
124
|
+
implicit: { passed: 0, total: 0 },
|
|
123
125
|
contextual: { passed: 0, total: 0 },
|
|
124
126
|
negative: { passed: 0, total: 0 },
|
|
125
127
|
};
|
|
128
|
+
for (const record of filteredSkillRecords) {
|
|
129
|
+
const invType = classifyInvocation(record.query, skillName);
|
|
130
|
+
byInvocationType[invType].total++;
|
|
131
|
+
if (record.triggered) {
|
|
132
|
+
byInvocationType[invType].passed++;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
126
135
|
|
|
127
136
|
// 7. Regression detection: pass_rate < baseline - threshold
|
|
128
137
|
// Use rounding to avoid floating-point boundary issues (e.g. 0.8 - 0.1 = 0.7000000000000001)
|
|
@@ -212,6 +221,20 @@ export async function watch(options: WatchOptions): Promise<WatchResult> {
|
|
|
212
221
|
recommendation = `Skill "${skillName}" is stable. Pass rate ${snapshot.pass_rate.toFixed(2)} is within acceptable range of baseline ${baselinePassRate.toFixed(2)}.`;
|
|
213
222
|
}
|
|
214
223
|
|
|
224
|
+
// Update evolution memory (fail-open)
|
|
225
|
+
try {
|
|
226
|
+
updateContextAfterWatch(skillName, snapshot);
|
|
227
|
+
} catch (err) {
|
|
228
|
+
// Fail-open: memory writes should never fail the main operation
|
|
229
|
+
console.error(
|
|
230
|
+
JSON.stringify({
|
|
231
|
+
level: "debug",
|
|
232
|
+
code: "memory_write_failed",
|
|
233
|
+
message: `Failed to update memory after watch for "${skillName}": ${err instanceof Error ? err.message : String(err)}`,
|
|
234
|
+
}),
|
|
235
|
+
);
|
|
236
|
+
}
|
|
237
|
+
|
|
215
238
|
return {
|
|
216
239
|
snapshot,
|
|
217
240
|
alert,
|
package/cli/selftune/status.ts
CHANGED
|
@@ -29,7 +29,7 @@ export interface SkillStatus {
|
|
|
29
29
|
passRate: number | null;
|
|
30
30
|
trend: "up" | "down" | "stable" | "unknown";
|
|
31
31
|
missedQueries: number;
|
|
32
|
-
status: "HEALTHY" | "
|
|
32
|
+
status: "HEALTHY" | "WARNING" | "CRITICAL" | "UNKNOWN";
|
|
33
33
|
snapshot: MonitoringSnapshot | null;
|
|
34
34
|
}
|
|
35
35
|
|
|
@@ -102,12 +102,14 @@ export function computeStatus(
|
|
|
102
102
|
// Count missed queries for this skill (queries where skill was checked but not triggered)
|
|
103
103
|
const missedQueries = skillSpecificRecords.filter((r) => !r.triggered).length;
|
|
104
104
|
|
|
105
|
-
// Determine status
|
|
106
|
-
let status: "HEALTHY" | "
|
|
105
|
+
// Determine status (4-state)
|
|
106
|
+
let status: "HEALTHY" | "WARNING" | "CRITICAL" | "UNKNOWN";
|
|
107
107
|
if (!hasData || passRate === null) {
|
|
108
|
-
status = "
|
|
109
|
-
} else if (snapshot.regression_detected) {
|
|
110
|
-
status = "
|
|
108
|
+
status = "UNKNOWN";
|
|
109
|
+
} else if (snapshot.regression_detected || passRate < 0.4) {
|
|
110
|
+
status = "CRITICAL";
|
|
111
|
+
} else if (passRate < 0.7) {
|
|
112
|
+
status = "WARNING";
|
|
111
113
|
} else {
|
|
112
114
|
status = "HEALTHY";
|
|
113
115
|
}
|
|
@@ -115,8 +117,8 @@ export function computeStatus(
|
|
|
115
117
|
return { name: skillName, passRate, trend, missedQueries, status, snapshot };
|
|
116
118
|
});
|
|
117
119
|
|
|
118
|
-
// Sort:
|
|
119
|
-
const statusOrder = {
|
|
120
|
+
// Sort: CRITICAL first, then WARNING, then HEALTHY, then UNKNOWN
|
|
121
|
+
const statusOrder: Record<string, number> = { CRITICAL: 0, WARNING: 1, HEALTHY: 2, UNKNOWN: 3 };
|
|
120
122
|
skills.sort((a, b) => statusOrder[a.status] - statusOrder[b.status]);
|
|
121
123
|
|
|
122
124
|
// Unmatched queries: queries whose text appears in zero triggered skill_usage_log entries
|
|
@@ -231,7 +233,7 @@ export function formatStatus(result: StatusResult): string {
|
|
|
231
233
|
lines.push(" Name Pass Rate Trend Missed Status");
|
|
232
234
|
|
|
233
235
|
for (const skill of result.skills) {
|
|
234
|
-
const name = skill.name.padEnd(16);
|
|
236
|
+
const name = skill.name.slice(0, 16).padEnd(16);
|
|
235
237
|
const passRate =
|
|
236
238
|
skill.passRate !== null
|
|
237
239
|
? `${Math.round(skill.passRate * 100)}%`.padEnd(11)
|
|
@@ -239,11 +241,13 @@ export function formatStatus(result: StatusResult): string {
|
|
|
239
241
|
const trend = TREND_SYMBOLS[skill.trend].padEnd(7);
|
|
240
242
|
const missed = String(skill.missedQueries).padEnd(8);
|
|
241
243
|
const statusText =
|
|
242
|
-
skill.status === "
|
|
244
|
+
skill.status === "CRITICAL"
|
|
243
245
|
? red(skill.status)
|
|
244
|
-
: skill.status === "
|
|
245
|
-
?
|
|
246
|
-
:
|
|
246
|
+
: skill.status === "WARNING"
|
|
247
|
+
? amber(skill.status)
|
|
248
|
+
: skill.status === "HEALTHY"
|
|
249
|
+
? green(skill.status)
|
|
250
|
+
: amber(skill.status);
|
|
247
251
|
lines.push(` ${name}${passRate}${trend}${missed}${statusText}`);
|
|
248
252
|
}
|
|
249
253
|
|