selftune 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/diagnosis-analyst.md +146 -0
- package/.claude/agents/evolution-reviewer.md +167 -0
- package/.claude/agents/integration-guide.md +200 -0
- package/.claude/agents/pattern-analyst.md +147 -0
- package/CHANGELOG.md +38 -1
- package/README.md +96 -256
- package/assets/BeforeAfter.gif +0 -0
- package/assets/FeedbackLoop.gif +0 -0
- package/assets/logo.svg +9 -0
- package/assets/skill-health-badge.svg +20 -0
- package/cli/selftune/activation-rules.ts +171 -0
- package/cli/selftune/badge/badge-data.ts +108 -0
- package/cli/selftune/badge/badge-svg.ts +212 -0
- package/cli/selftune/badge/badge.ts +103 -0
- package/cli/selftune/constants.ts +75 -1
- package/cli/selftune/contribute/bundle.ts +314 -0
- package/cli/selftune/contribute/contribute.ts +214 -0
- package/cli/selftune/contribute/sanitize.ts +162 -0
- package/cli/selftune/cron/setup.ts +266 -0
- package/cli/selftune/dashboard-server.ts +582 -0
- package/cli/selftune/dashboard.ts +31 -12
- package/cli/selftune/eval/baseline.ts +247 -0
- package/cli/selftune/eval/composability.ts +117 -0
- package/cli/selftune/eval/generate-unit-tests.ts +143 -0
- package/cli/selftune/eval/hooks-to-evals.ts +68 -2
- package/cli/selftune/eval/import-skillsbench.ts +221 -0
- package/cli/selftune/eval/synthetic-evals.ts +172 -0
- package/cli/selftune/eval/unit-test-cli.ts +152 -0
- package/cli/selftune/eval/unit-test.ts +196 -0
- package/cli/selftune/evolution/deploy-proposal.ts +142 -1
- package/cli/selftune/evolution/evolve-body.ts +492 -0
- package/cli/selftune/evolution/evolve.ts +479 -104
- package/cli/selftune/evolution/extract-patterns.ts +32 -1
- package/cli/selftune/evolution/pareto.ts +314 -0
- package/cli/selftune/evolution/propose-body.ts +171 -0
- package/cli/selftune/evolution/propose-description.ts +100 -2
- package/cli/selftune/evolution/propose-routing.ts +166 -0
- package/cli/selftune/evolution/refine-body.ts +141 -0
- package/cli/selftune/evolution/rollback.ts +20 -3
- package/cli/selftune/evolution/validate-body.ts +254 -0
- package/cli/selftune/evolution/validate-proposal.ts +257 -35
- package/cli/selftune/evolution/validate-routing.ts +177 -0
- package/cli/selftune/grading/grade-session.ts +145 -19
- package/cli/selftune/grading/pre-gates.ts +104 -0
- package/cli/selftune/hooks/auto-activate.ts +185 -0
- package/cli/selftune/hooks/evolution-guard.ts +165 -0
- package/cli/selftune/hooks/skill-change-guard.ts +112 -0
- package/cli/selftune/index.ts +88 -0
- package/cli/selftune/ingestors/claude-replay.ts +351 -0
- package/cli/selftune/ingestors/codex-rollout.ts +1 -1
- package/cli/selftune/ingestors/openclaw-ingest.ts +440 -0
- package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
- package/cli/selftune/init.ts +168 -5
- package/cli/selftune/last.ts +2 -2
- package/cli/selftune/memory/writer.ts +447 -0
- package/cli/selftune/monitoring/watch.ts +25 -2
- package/cli/selftune/status.ts +18 -15
- package/cli/selftune/types.ts +377 -5
- package/cli/selftune/utils/frontmatter.ts +217 -0
- package/cli/selftune/utils/llm-call.ts +29 -3
- package/cli/selftune/utils/transcript.ts +35 -0
- package/cli/selftune/utils/trigger-check.ts +89 -0
- package/cli/selftune/utils/tui.ts +156 -0
- package/dashboard/index.html +585 -19
- package/package.json +17 -6
- package/skill/SKILL.md +127 -10
- package/skill/Workflows/AutoActivation.md +144 -0
- package/skill/Workflows/Badge.md +118 -0
- package/skill/Workflows/Baseline.md +121 -0
- package/skill/Workflows/Composability.md +100 -0
- package/skill/Workflows/Contribute.md +91 -0
- package/skill/Workflows/Cron.md +155 -0
- package/skill/Workflows/Dashboard.md +203 -0
- package/skill/Workflows/Doctor.md +37 -1
- package/skill/Workflows/Evals.md +73 -5
- package/skill/Workflows/EvolutionMemory.md +152 -0
- package/skill/Workflows/Evolve.md +111 -6
- package/skill/Workflows/EvolveBody.md +159 -0
- package/skill/Workflows/ImportSkillsBench.md +111 -0
- package/skill/Workflows/Ingest.md +129 -15
- package/skill/Workflows/Initialize.md +58 -3
- package/skill/Workflows/Replay.md +70 -0
- package/skill/Workflows/Rollback.md +20 -1
- package/skill/Workflows/UnitTest.md +138 -0
- package/skill/Workflows/Watch.md +22 -0
- package/skill/settings_snippet.json +23 -0
- package/templates/activation-rules-default.json +27 -0
- package/templates/multi-skill-settings.json +64 -0
- package/templates/single-skill-settings.json +58 -0
|
@@ -0,0 +1,447 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory writer — pure functions for reading/writing evolution memory files.
|
|
3
|
+
*
|
|
4
|
+
* Memory files live at ~/.selftune/memory/ and provide human-readable session
|
|
5
|
+
* context that survives context resets. Three files:
|
|
6
|
+
* - context.md — active evolutions, known issues
|
|
7
|
+
* - plan.md — current priorities, strategy
|
|
8
|
+
* - decisions.md — append-only decision log
|
|
9
|
+
*
|
|
10
|
+
* All functions accept an optional memoryDir parameter for testability.
|
|
11
|
+
* Default: MEMORY_DIR from constants.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { appendFileSync, existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
15
|
+
import { join } from "node:path";
|
|
16
|
+
|
|
17
|
+
import { MEMORY_DIR } from "../constants.js";
|
|
18
|
+
import type { EvolveResult } from "../evolution/evolve.js";
|
|
19
|
+
import type { RollbackResult } from "../evolution/rollback.js";
|
|
20
|
+
import type {
|
|
21
|
+
DecisionRecord,
|
|
22
|
+
EvolutionProposal,
|
|
23
|
+
MemoryContext,
|
|
24
|
+
MemoryPlan,
|
|
25
|
+
MonitoringSnapshot,
|
|
26
|
+
} from "../types.js";
|
|
27
|
+
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
// Directory management
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
export function ensureMemoryDir(memoryDir: string = MEMORY_DIR): void {
|
|
33
|
+
if (!existsSync(memoryDir)) {
|
|
34
|
+
mkdirSync(memoryDir, { recursive: true });
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
// context.md
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
function formatContext(data: MemoryContext): string {
|
|
43
|
+
const lines: string[] = ["# Selftune Context", ""];
|
|
44
|
+
|
|
45
|
+
lines.push("## Active Evolutions");
|
|
46
|
+
if (data.activeEvolutions.length === 0) {
|
|
47
|
+
lines.push("- (none)");
|
|
48
|
+
} else {
|
|
49
|
+
for (const evo of data.activeEvolutions) {
|
|
50
|
+
lines.push(`- ${evo.skillName}: ${evo.status} — ${evo.description}`);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
lines.push("");
|
|
54
|
+
|
|
55
|
+
lines.push("## Known Issues");
|
|
56
|
+
if (data.knownIssues.length === 0) {
|
|
57
|
+
lines.push("- (none)");
|
|
58
|
+
} else {
|
|
59
|
+
for (const issue of data.knownIssues) {
|
|
60
|
+
lines.push(`- ${issue}`);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
lines.push("");
|
|
64
|
+
|
|
65
|
+
lines.push("## Last Updated");
|
|
66
|
+
lines.push(data.lastUpdated);
|
|
67
|
+
lines.push("");
|
|
68
|
+
|
|
69
|
+
return lines.join("\n");
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function parseContext(content: string): MemoryContext {
|
|
73
|
+
const result: MemoryContext = {
|
|
74
|
+
activeEvolutions: [],
|
|
75
|
+
knownIssues: [],
|
|
76
|
+
lastUpdated: "",
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
const lines = content.split("\n");
|
|
80
|
+
let section = "";
|
|
81
|
+
|
|
82
|
+
for (const line of lines) {
|
|
83
|
+
const trimmed = line.trim();
|
|
84
|
+
|
|
85
|
+
if (trimmed === "## Active Evolutions") {
|
|
86
|
+
section = "evolutions";
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
if (trimmed === "## Known Issues") {
|
|
90
|
+
section = "issues";
|
|
91
|
+
continue;
|
|
92
|
+
}
|
|
93
|
+
if (trimmed === "## Last Updated") {
|
|
94
|
+
section = "updated";
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
if (trimmed.startsWith("# ")) {
|
|
98
|
+
section = "";
|
|
99
|
+
continue;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (section === "evolutions" && trimmed.startsWith("- ") && trimmed !== "- (none)") {
|
|
103
|
+
// Format: "- skillName: status — description"
|
|
104
|
+
const body = trimmed.slice(2);
|
|
105
|
+
const colonIdx = body.indexOf(":");
|
|
106
|
+
if (colonIdx === -1) continue;
|
|
107
|
+
const skillName = body.slice(0, colonIdx).trim();
|
|
108
|
+
const rest = body.slice(colonIdx + 1).trim();
|
|
109
|
+
const dashIdx = rest.indexOf("—");
|
|
110
|
+
if (dashIdx === -1) {
|
|
111
|
+
result.activeEvolutions.push({ skillName, status: rest.trim(), description: "" });
|
|
112
|
+
} else {
|
|
113
|
+
const status = rest.slice(0, dashIdx).trim();
|
|
114
|
+
const description = rest.slice(dashIdx + 1).trim();
|
|
115
|
+
result.activeEvolutions.push({ skillName, status, description });
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (section === "issues" && trimmed.startsWith("- ") && trimmed !== "- (none)") {
|
|
120
|
+
result.knownIssues.push(trimmed.slice(2));
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if (section === "updated" && trimmed.length > 0) {
|
|
124
|
+
result.lastUpdated = trimmed;
|
|
125
|
+
section = "";
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
return result;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
export function writeContext(data: MemoryContext, memoryDir: string = MEMORY_DIR): void {
|
|
133
|
+
ensureMemoryDir(memoryDir);
|
|
134
|
+
const filePath = join(memoryDir, "context.md");
|
|
135
|
+
writeFileSync(filePath, formatContext(data), "utf-8");
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
export function readContext(memoryDir: string = MEMORY_DIR): MemoryContext {
|
|
139
|
+
const filePath = join(memoryDir, "context.md");
|
|
140
|
+
if (!existsSync(filePath)) {
|
|
141
|
+
return { activeEvolutions: [], knownIssues: [], lastUpdated: "" };
|
|
142
|
+
}
|
|
143
|
+
const content = readFileSync(filePath, "utf-8");
|
|
144
|
+
return parseContext(content);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// ---------------------------------------------------------------------------
|
|
148
|
+
// plan.md
|
|
149
|
+
// ---------------------------------------------------------------------------
|
|
150
|
+
|
|
151
|
+
function formatPlan(data: MemoryPlan): string {
|
|
152
|
+
const lines: string[] = ["# Evolution Plan", ""];
|
|
153
|
+
|
|
154
|
+
lines.push("## Current Priorities");
|
|
155
|
+
if (data.currentPriorities.length === 0) {
|
|
156
|
+
lines.push("1. (none)");
|
|
157
|
+
} else {
|
|
158
|
+
for (let i = 0; i < data.currentPriorities.length; i++) {
|
|
159
|
+
lines.push(`${i + 1}. ${data.currentPriorities[i]}`);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
lines.push("");
|
|
163
|
+
|
|
164
|
+
lines.push("## Strategy");
|
|
165
|
+
lines.push(data.strategy || "(no strategy defined)");
|
|
166
|
+
lines.push("");
|
|
167
|
+
|
|
168
|
+
lines.push("## Last Updated");
|
|
169
|
+
lines.push(data.lastUpdated);
|
|
170
|
+
lines.push("");
|
|
171
|
+
|
|
172
|
+
return lines.join("\n");
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
function parsePlan(content: string): MemoryPlan {
|
|
176
|
+
const result: MemoryPlan = {
|
|
177
|
+
currentPriorities: [],
|
|
178
|
+
strategy: "",
|
|
179
|
+
lastUpdated: "",
|
|
180
|
+
};
|
|
181
|
+
|
|
182
|
+
const lines = content.split("\n");
|
|
183
|
+
let section = "";
|
|
184
|
+
|
|
185
|
+
for (const line of lines) {
|
|
186
|
+
const trimmed = line.trim();
|
|
187
|
+
|
|
188
|
+
if (trimmed === "## Current Priorities") {
|
|
189
|
+
section = "priorities";
|
|
190
|
+
continue;
|
|
191
|
+
}
|
|
192
|
+
if (trimmed === "## Strategy") {
|
|
193
|
+
section = "strategy";
|
|
194
|
+
continue;
|
|
195
|
+
}
|
|
196
|
+
if (trimmed === "## Last Updated") {
|
|
197
|
+
section = "updated";
|
|
198
|
+
continue;
|
|
199
|
+
}
|
|
200
|
+
if (trimmed.startsWith("# ")) {
|
|
201
|
+
section = "";
|
|
202
|
+
continue;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
if (section === "priorities") {
|
|
206
|
+
// Format: "1. priority text"
|
|
207
|
+
const match = trimmed.match(/^\d+\.\s+(.+)$/);
|
|
208
|
+
if (match && match[1] !== "(none)") {
|
|
209
|
+
result.currentPriorities.push(match[1]);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// Intentionally captures only the first non-empty line as the strategy for simplicity
|
|
214
|
+
if (section === "strategy" && trimmed.length > 0 && trimmed !== "(no strategy defined)") {
|
|
215
|
+
result.strategy = trimmed;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
if (section === "updated" && trimmed.length > 0) {
|
|
219
|
+
result.lastUpdated = trimmed;
|
|
220
|
+
section = "";
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
return result;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
export function writePlan(data: MemoryPlan, memoryDir: string = MEMORY_DIR): void {
|
|
228
|
+
ensureMemoryDir(memoryDir);
|
|
229
|
+
const filePath = join(memoryDir, "plan.md");
|
|
230
|
+
writeFileSync(filePath, formatPlan(data), "utf-8");
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
export function readPlan(memoryDir: string = MEMORY_DIR): MemoryPlan {
|
|
234
|
+
const filePath = join(memoryDir, "plan.md");
|
|
235
|
+
if (!existsSync(filePath)) {
|
|
236
|
+
return { currentPriorities: [], strategy: "", lastUpdated: "" };
|
|
237
|
+
}
|
|
238
|
+
const content = readFileSync(filePath, "utf-8");
|
|
239
|
+
return parsePlan(content);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// ---------------------------------------------------------------------------
|
|
243
|
+
// decisions.md (append-only)
|
|
244
|
+
// ---------------------------------------------------------------------------
|
|
245
|
+
|
|
246
|
+
function formatDecisionEntry(record: DecisionRecord): string {
|
|
247
|
+
const lines: string[] = [
|
|
248
|
+
`## ${record.timestamp} — ${record.actionType}`,
|
|
249
|
+
`- **Skill:** ${record.skillName}`,
|
|
250
|
+
`- **Action:** ${record.action}`,
|
|
251
|
+
`- **Rationale:** ${record.rationale}`,
|
|
252
|
+
`- **Result:** ${record.result}`,
|
|
253
|
+
"",
|
|
254
|
+
"---",
|
|
255
|
+
"",
|
|
256
|
+
];
|
|
257
|
+
return lines.join("\n");
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
function parseDecisions(content: string): DecisionRecord[] {
|
|
261
|
+
const records: DecisionRecord[] = [];
|
|
262
|
+
// Split on --- separators
|
|
263
|
+
const blocks = content.split(/^---$/m);
|
|
264
|
+
|
|
265
|
+
for (const block of blocks) {
|
|
266
|
+
const lines = block
|
|
267
|
+
.split("\n")
|
|
268
|
+
.map((l) => l.trim())
|
|
269
|
+
.filter((l) => l.length > 0);
|
|
270
|
+
|
|
271
|
+
let timestamp = "";
|
|
272
|
+
let actionType = "";
|
|
273
|
+
let skillName = "";
|
|
274
|
+
let action: DecisionRecord["action"] = "watched";
|
|
275
|
+
let rationale = "";
|
|
276
|
+
let result = "";
|
|
277
|
+
|
|
278
|
+
for (const line of lines) {
|
|
279
|
+
// Header: "## 2026-03-01T00:00:00Z — evolve"
|
|
280
|
+
const headerMatch = line.match(/^## (.+?) — (.+)$/);
|
|
281
|
+
if (headerMatch) {
|
|
282
|
+
timestamp = headerMatch[1];
|
|
283
|
+
actionType = headerMatch[2];
|
|
284
|
+
continue;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
if (line.startsWith("- **Skill:**")) {
|
|
288
|
+
skillName = line.replace("- **Skill:**", "").trim();
|
|
289
|
+
} else if (line.startsWith("- **Action:**")) {
|
|
290
|
+
const raw = line.replace("- **Action:**", "").trim();
|
|
291
|
+
if (raw === "evolved" || raw === "rolled-back" || raw === "watched") {
|
|
292
|
+
action = raw;
|
|
293
|
+
}
|
|
294
|
+
} else if (line.startsWith("- **Rationale:**")) {
|
|
295
|
+
rationale = line.replace("- **Rationale:**", "").trim();
|
|
296
|
+
} else if (line.startsWith("- **Result:**")) {
|
|
297
|
+
result = line.replace("- **Result:**", "").trim();
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
if (timestamp && skillName) {
|
|
302
|
+
records.push({ timestamp, actionType, skillName, action, rationale, result });
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
return records;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
export function appendDecision(record: DecisionRecord, memoryDir: string = MEMORY_DIR): void {
|
|
310
|
+
ensureMemoryDir(memoryDir);
|
|
311
|
+
const filePath = join(memoryDir, "decisions.md");
|
|
312
|
+
|
|
313
|
+
if (!existsSync(filePath)) {
|
|
314
|
+
writeFileSync(filePath, "# Decision Log\n\n", "utf-8");
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
const entry = formatDecisionEntry(record);
|
|
318
|
+
appendFileSync(filePath, entry, "utf-8");
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
export function readDecisions(memoryDir: string = MEMORY_DIR): DecisionRecord[] {
|
|
322
|
+
const filePath = join(memoryDir, "decisions.md");
|
|
323
|
+
if (!existsSync(filePath)) {
|
|
324
|
+
return [];
|
|
325
|
+
}
|
|
326
|
+
const content = readFileSync(filePath, "utf-8");
|
|
327
|
+
return parseDecisions(content);
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// ---------------------------------------------------------------------------
|
|
331
|
+
// High-level helpers for integration
|
|
332
|
+
// ---------------------------------------------------------------------------
|
|
333
|
+
|
|
334
|
+
export function updateContextAfterEvolve(
|
|
335
|
+
skillName: string,
|
|
336
|
+
proposal: EvolutionProposal,
|
|
337
|
+
result: EvolveResult,
|
|
338
|
+
memoryDir: string = MEMORY_DIR,
|
|
339
|
+
): void {
|
|
340
|
+
const now = new Date().toISOString();
|
|
341
|
+
const context = readContext(memoryDir);
|
|
342
|
+
|
|
343
|
+
const status = result.deployed ? "deployed" : "failed";
|
|
344
|
+
const description = proposal.rationale || result.reason;
|
|
345
|
+
|
|
346
|
+
// Update or add the evolution entry
|
|
347
|
+
const idx = context.activeEvolutions.findIndex((e) => e.skillName === skillName);
|
|
348
|
+
if (idx >= 0) {
|
|
349
|
+
context.activeEvolutions[idx] = { skillName, status, description };
|
|
350
|
+
} else {
|
|
351
|
+
context.activeEvolutions.push({ skillName, status, description });
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
context.lastUpdated = now;
|
|
355
|
+
writeContext(context, memoryDir);
|
|
356
|
+
|
|
357
|
+
// Append decision
|
|
358
|
+
appendDecision(
|
|
359
|
+
{
|
|
360
|
+
timestamp: now,
|
|
361
|
+
actionType: "evolve",
|
|
362
|
+
skillName,
|
|
363
|
+
action: "evolved",
|
|
364
|
+
rationale: proposal.rationale || "Evolution triggered",
|
|
365
|
+
result: result.reason,
|
|
366
|
+
},
|
|
367
|
+
memoryDir,
|
|
368
|
+
);
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
export function updateContextAfterRollback(
|
|
372
|
+
skillName: string,
|
|
373
|
+
result: RollbackResult,
|
|
374
|
+
memoryDir: string = MEMORY_DIR,
|
|
375
|
+
): void {
|
|
376
|
+
const now = new Date().toISOString();
|
|
377
|
+
const context = readContext(memoryDir);
|
|
378
|
+
|
|
379
|
+
const status = result.rolledBack ? "rolled-back" : "rollback-failed";
|
|
380
|
+
const description = result.reason;
|
|
381
|
+
|
|
382
|
+
const idx = context.activeEvolutions.findIndex((e) => e.skillName === skillName);
|
|
383
|
+
if (idx >= 0) {
|
|
384
|
+
context.activeEvolutions[idx] = { skillName, status, description };
|
|
385
|
+
} else {
|
|
386
|
+
context.activeEvolutions.push({ skillName, status, description });
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
context.lastUpdated = now;
|
|
390
|
+
writeContext(context, memoryDir);
|
|
391
|
+
|
|
392
|
+
appendDecision(
|
|
393
|
+
{
|
|
394
|
+
timestamp: now,
|
|
395
|
+
actionType: "rollback",
|
|
396
|
+
skillName,
|
|
397
|
+
action: "rolled-back",
|
|
398
|
+
rationale: result.reason,
|
|
399
|
+
result: result.rolledBack ? "Successfully rolled back" : "Rollback failed",
|
|
400
|
+
},
|
|
401
|
+
memoryDir,
|
|
402
|
+
);
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
export function updateContextAfterWatch(
|
|
406
|
+
skillName: string,
|
|
407
|
+
snapshot: MonitoringSnapshot,
|
|
408
|
+
memoryDir: string = MEMORY_DIR,
|
|
409
|
+
): void {
|
|
410
|
+
const now = new Date().toISOString();
|
|
411
|
+
const context = readContext(memoryDir);
|
|
412
|
+
|
|
413
|
+
const status = snapshot.regression_detected ? "regression" : "healthy";
|
|
414
|
+
const description = `pass_rate=${snapshot.pass_rate.toFixed(2)}, baseline=${snapshot.baseline_pass_rate.toFixed(2)}`;
|
|
415
|
+
|
|
416
|
+
const idx = context.activeEvolutions.findIndex((e) => e.skillName === skillName);
|
|
417
|
+
if (idx >= 0) {
|
|
418
|
+
context.activeEvolutions[idx] = { skillName, status, description };
|
|
419
|
+
} else {
|
|
420
|
+
context.activeEvolutions.push({ skillName, status, description });
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
// Add known issue if regression detected
|
|
424
|
+
if (snapshot.regression_detected) {
|
|
425
|
+
const issue = `Regression detected for ${skillName}: pass_rate=${snapshot.pass_rate.toFixed(2)} below baseline=${snapshot.baseline_pass_rate.toFixed(2)}`;
|
|
426
|
+
if (!context.knownIssues.some((i) => i.includes(skillName) && i.includes("Regression"))) {
|
|
427
|
+
context.knownIssues.push(issue);
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
context.lastUpdated = now;
|
|
432
|
+
writeContext(context, memoryDir);
|
|
433
|
+
|
|
434
|
+
appendDecision(
|
|
435
|
+
{
|
|
436
|
+
timestamp: now,
|
|
437
|
+
actionType: "watch",
|
|
438
|
+
skillName,
|
|
439
|
+
action: "watched",
|
|
440
|
+
rationale: `Monitoring check: pass_rate=${snapshot.pass_rate.toFixed(2)}, regression=${snapshot.regression_detected}`,
|
|
441
|
+
result: snapshot.regression_detected
|
|
442
|
+
? `Regression detected (pass_rate=${snapshot.pass_rate.toFixed(2)})`
|
|
443
|
+
: `Healthy (pass_rate=${snapshot.pass_rate.toFixed(2)})`,
|
|
444
|
+
},
|
|
445
|
+
memoryDir,
|
|
446
|
+
);
|
|
447
|
+
}
|
|
@@ -9,7 +9,9 @@
|
|
|
9
9
|
import { parseArgs } from "node:util";
|
|
10
10
|
|
|
11
11
|
import { QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "../constants.js";
|
|
12
|
+
import { classifyInvocation } from "../eval/hooks-to-evals.js";
|
|
12
13
|
import { getLastDeployedProposal } from "../evolution/audit.js";
|
|
14
|
+
import { updateContextAfterWatch } from "../memory/writer.js";
|
|
13
15
|
import type {
|
|
14
16
|
InvocationType,
|
|
15
17
|
MonitoringSnapshot,
|
|
@@ -116,13 +118,20 @@ export function computeMonitoringSnapshot(
|
|
|
116
118
|
const falseNegatives = filteredSkillRecords.filter((r) => !r.triggered).length;
|
|
117
119
|
const falseNegativeRate = totalSkillChecks === 0 ? 0 : falseNegatives / totalSkillChecks;
|
|
118
120
|
|
|
119
|
-
// 6. by_invocation_type:
|
|
121
|
+
// 6. by_invocation_type: classify each skill record using classifyInvocation
|
|
120
122
|
const byInvocationType: Record<InvocationType, { passed: number; total: number }> = {
|
|
121
123
|
explicit: { passed: 0, total: 0 },
|
|
122
|
-
implicit: { passed:
|
|
124
|
+
implicit: { passed: 0, total: 0 },
|
|
123
125
|
contextual: { passed: 0, total: 0 },
|
|
124
126
|
negative: { passed: 0, total: 0 },
|
|
125
127
|
};
|
|
128
|
+
for (const record of filteredSkillRecords) {
|
|
129
|
+
const invType = classifyInvocation(record.query, skillName);
|
|
130
|
+
byInvocationType[invType].total++;
|
|
131
|
+
if (record.triggered) {
|
|
132
|
+
byInvocationType[invType].passed++;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
126
135
|
|
|
127
136
|
// 7. Regression detection: pass_rate < baseline - threshold
|
|
128
137
|
// Use rounding to avoid floating-point boundary issues (e.g. 0.8 - 0.1 = 0.7000000000000001)
|
|
@@ -212,6 +221,20 @@ export async function watch(options: WatchOptions): Promise<WatchResult> {
|
|
|
212
221
|
recommendation = `Skill "${skillName}" is stable. Pass rate ${snapshot.pass_rate.toFixed(2)} is within acceptable range of baseline ${baselinePassRate.toFixed(2)}.`;
|
|
213
222
|
}
|
|
214
223
|
|
|
224
|
+
// Update evolution memory (fail-open)
|
|
225
|
+
try {
|
|
226
|
+
updateContextAfterWatch(skillName, snapshot);
|
|
227
|
+
} catch (err) {
|
|
228
|
+
// Fail-open: memory writes should never fail the main operation
|
|
229
|
+
console.error(
|
|
230
|
+
JSON.stringify({
|
|
231
|
+
level: "debug",
|
|
232
|
+
code: "memory_write_failed",
|
|
233
|
+
message: `Failed to update memory after watch for "${skillName}": ${err instanceof Error ? err.message : String(err)}`,
|
|
234
|
+
}),
|
|
235
|
+
);
|
|
236
|
+
}
|
|
237
|
+
|
|
215
238
|
return {
|
|
216
239
|
snapshot,
|
|
217
240
|
alert,
|
package/cli/selftune/status.ts
CHANGED
|
@@ -8,7 +8,6 @@
|
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
10
|
import { EVOLUTION_AUDIT_LOG, QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "./constants.js";
|
|
11
|
-
import { getLastDeployedProposal } from "./evolution/audit.js";
|
|
12
11
|
import { computeMonitoringSnapshot } from "./monitoring/watch.js";
|
|
13
12
|
import { doctor } from "./observability.js";
|
|
14
13
|
import type {
|
|
@@ -30,7 +29,7 @@ export interface SkillStatus {
|
|
|
30
29
|
passRate: number | null;
|
|
31
30
|
trend: "up" | "down" | "stable" | "unknown";
|
|
32
31
|
missedQueries: number;
|
|
33
|
-
status: "HEALTHY" | "
|
|
32
|
+
status: "HEALTHY" | "WARNING" | "CRITICAL" | "UNKNOWN";
|
|
34
33
|
snapshot: MonitoringSnapshot | null;
|
|
35
34
|
}
|
|
36
35
|
|
|
@@ -103,12 +102,14 @@ export function computeStatus(
|
|
|
103
102
|
// Count missed queries for this skill (queries where skill was checked but not triggered)
|
|
104
103
|
const missedQueries = skillSpecificRecords.filter((r) => !r.triggered).length;
|
|
105
104
|
|
|
106
|
-
// Determine status
|
|
107
|
-
let status: "HEALTHY" | "
|
|
105
|
+
// Determine status (4-state)
|
|
106
|
+
let status: "HEALTHY" | "WARNING" | "CRITICAL" | "UNKNOWN";
|
|
108
107
|
if (!hasData || passRate === null) {
|
|
109
|
-
status = "
|
|
110
|
-
} else if (snapshot.regression_detected) {
|
|
111
|
-
status = "
|
|
108
|
+
status = "UNKNOWN";
|
|
109
|
+
} else if (snapshot.regression_detected || passRate < 0.4) {
|
|
110
|
+
status = "CRITICAL";
|
|
111
|
+
} else if (passRate < 0.7) {
|
|
112
|
+
status = "WARNING";
|
|
112
113
|
} else {
|
|
113
114
|
status = "HEALTHY";
|
|
114
115
|
}
|
|
@@ -116,8 +117,8 @@ export function computeStatus(
|
|
|
116
117
|
return { name: skillName, passRate, trend, missedQueries, status, snapshot };
|
|
117
118
|
});
|
|
118
119
|
|
|
119
|
-
// Sort:
|
|
120
|
-
const statusOrder = {
|
|
120
|
+
// Sort: CRITICAL first, then WARNING, then HEALTHY, then UNKNOWN
|
|
121
|
+
const statusOrder: Record<string, number> = { CRITICAL: 0, WARNING: 1, HEALTHY: 2, UNKNOWN: 3 };
|
|
121
122
|
skills.sort((a, b) => statusOrder[a.status] - statusOrder[b.status]);
|
|
122
123
|
|
|
123
124
|
// Unmatched queries: queries whose text appears in zero triggered skill_usage_log entries
|
|
@@ -227,12 +228,12 @@ export function formatStatus(result: StatusResult): string {
|
|
|
227
228
|
// Skills table
|
|
228
229
|
const skillCount = result.skills.length;
|
|
229
230
|
lines.push(
|
|
230
|
-
`Skills (${skillCount})${" ".repeat(36 - `Skills (${skillCount})`.length)}
|
|
231
|
+
`Skills (${skillCount})${" ".repeat(36 - `Skills (${skillCount})`.length)}Recent data`,
|
|
231
232
|
);
|
|
232
233
|
lines.push(" Name Pass Rate Trend Missed Status");
|
|
233
234
|
|
|
234
235
|
for (const skill of result.skills) {
|
|
235
|
-
const name = skill.name.padEnd(16);
|
|
236
|
+
const name = skill.name.slice(0, 16).padEnd(16);
|
|
236
237
|
const passRate =
|
|
237
238
|
skill.passRate !== null
|
|
238
239
|
? `${Math.round(skill.passRate * 100)}%`.padEnd(11)
|
|
@@ -240,11 +241,13 @@ export function formatStatus(result: StatusResult): string {
|
|
|
240
241
|
const trend = TREND_SYMBOLS[skill.trend].padEnd(7);
|
|
241
242
|
const missed = String(skill.missedQueries).padEnd(8);
|
|
242
243
|
const statusText =
|
|
243
|
-
skill.status === "
|
|
244
|
+
skill.status === "CRITICAL"
|
|
244
245
|
? red(skill.status)
|
|
245
|
-
: skill.status === "
|
|
246
|
-
?
|
|
247
|
-
:
|
|
246
|
+
: skill.status === "WARNING"
|
|
247
|
+
? amber(skill.status)
|
|
248
|
+
: skill.status === "HEALTHY"
|
|
249
|
+
? green(skill.status)
|
|
250
|
+
: amber(skill.status);
|
|
248
251
|
lines.push(` ${name}${passRate}${trend}${missed}${statusText}`);
|
|
249
252
|
}
|
|
250
253
|
|