selftune 0.1.4 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/diagnosis-analyst.md +156 -0
- package/.claude/agents/evolution-reviewer.md +180 -0
- package/.claude/agents/integration-guide.md +212 -0
- package/.claude/agents/pattern-analyst.md +160 -0
- package/CHANGELOG.md +46 -1
- package/README.md +105 -257
- package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
- package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
- package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
- package/apps/local-dashboard/dist/favicon.png +0 -0
- package/apps/local-dashboard/dist/index.html +17 -0
- package/apps/local-dashboard/dist/logo.png +0 -0
- package/apps/local-dashboard/dist/logo.svg +9 -0
- package/assets/BeforeAfter.gif +0 -0
- package/assets/FeedbackLoop.gif +0 -0
- package/assets/logo.svg +9 -0
- package/assets/skill-health-badge.svg +20 -0
- package/cli/selftune/activation-rules.ts +171 -0
- package/cli/selftune/badge/badge-data.ts +108 -0
- package/cli/selftune/badge/badge-svg.ts +212 -0
- package/cli/selftune/badge/badge.ts +99 -0
- package/cli/selftune/canonical-export.ts +183 -0
- package/cli/selftune/constants.ts +103 -1
- package/cli/selftune/contribute/bundle.ts +314 -0
- package/cli/selftune/contribute/contribute.ts +214 -0
- package/cli/selftune/contribute/sanitize.ts +162 -0
- package/cli/selftune/cron/setup.ts +266 -0
- package/cli/selftune/dashboard-contract.ts +202 -0
- package/cli/selftune/dashboard-server.ts +1049 -0
- package/cli/selftune/dashboard.ts +43 -156
- package/cli/selftune/eval/baseline.ts +248 -0
- package/cli/selftune/eval/composability-v2.ts +273 -0
- package/cli/selftune/eval/composability.ts +117 -0
- package/cli/selftune/eval/generate-unit-tests.ts +143 -0
- package/cli/selftune/eval/hooks-to-evals.ts +101 -16
- package/cli/selftune/eval/import-skillsbench.ts +221 -0
- package/cli/selftune/eval/synthetic-evals.ts +172 -0
- package/cli/selftune/eval/unit-test-cli.ts +152 -0
- package/cli/selftune/eval/unit-test.ts +196 -0
- package/cli/selftune/evolution/deploy-proposal.ts +142 -1
- package/cli/selftune/evolution/evidence.ts +26 -0
- package/cli/selftune/evolution/evolve-body.ts +586 -0
- package/cli/selftune/evolution/evolve.ts +825 -116
- package/cli/selftune/evolution/extract-patterns.ts +105 -16
- package/cli/selftune/evolution/pareto.ts +314 -0
- package/cli/selftune/evolution/propose-body.ts +171 -0
- package/cli/selftune/evolution/propose-description.ts +100 -2
- package/cli/selftune/evolution/propose-routing.ts +166 -0
- package/cli/selftune/evolution/refine-body.ts +141 -0
- package/cli/selftune/evolution/rollback.ts +21 -4
- package/cli/selftune/evolution/validate-body.ts +254 -0
- package/cli/selftune/evolution/validate-proposal.ts +257 -35
- package/cli/selftune/evolution/validate-routing.ts +177 -0
- package/cli/selftune/grading/auto-grade.ts +200 -0
- package/cli/selftune/grading/grade-session.ts +513 -42
- package/cli/selftune/grading/pre-gates.ts +104 -0
- package/cli/selftune/grading/results.ts +42 -0
- package/cli/selftune/hooks/auto-activate.ts +185 -0
- package/cli/selftune/hooks/evolution-guard.ts +165 -0
- package/cli/selftune/hooks/prompt-log.ts +172 -2
- package/cli/selftune/hooks/session-stop.ts +123 -3
- package/cli/selftune/hooks/skill-change-guard.ts +112 -0
- package/cli/selftune/hooks/skill-eval.ts +119 -3
- package/cli/selftune/index.ts +415 -48
- package/cli/selftune/ingestors/claude-replay.ts +377 -0
- package/cli/selftune/ingestors/codex-rollout.ts +345 -46
- package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
- package/cli/selftune/ingestors/openclaw-ingest.ts +573 -0
- package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
- package/cli/selftune/init.ts +376 -16
- package/cli/selftune/last.ts +14 -5
- package/cli/selftune/localdb/db.ts +63 -0
- package/cli/selftune/localdb/materialize.ts +428 -0
- package/cli/selftune/localdb/queries.ts +376 -0
- package/cli/selftune/localdb/schema.ts +204 -0
- package/cli/selftune/memory/writer.ts +447 -0
- package/cli/selftune/monitoring/watch.ts +90 -16
- package/cli/selftune/normalization.ts +682 -0
- package/cli/selftune/observability.ts +19 -44
- package/cli/selftune/orchestrate.ts +1073 -0
- package/cli/selftune/quickstart.ts +203 -0
- package/cli/selftune/repair/skill-usage.ts +576 -0
- package/cli/selftune/schedule.ts +561 -0
- package/cli/selftune/status.ts +59 -33
- package/cli/selftune/sync.ts +627 -0
- package/cli/selftune/types.ts +525 -5
- package/cli/selftune/utils/canonical-log.ts +45 -0
- package/cli/selftune/utils/frontmatter.ts +217 -0
- package/cli/selftune/utils/hooks.ts +41 -0
- package/cli/selftune/utils/html.ts +27 -0
- package/cli/selftune/utils/llm-call.ts +103 -19
- package/cli/selftune/utils/math.ts +10 -0
- package/cli/selftune/utils/query-filter.ts +139 -0
- package/cli/selftune/utils/skill-discovery.ts +340 -0
- package/cli/selftune/utils/skill-log.ts +68 -0
- package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
- package/cli/selftune/utils/transcript.ts +307 -26
- package/cli/selftune/utils/trigger-check.ts +89 -0
- package/cli/selftune/utils/tui.ts +156 -0
- package/cli/selftune/workflows/discover.ts +254 -0
- package/cli/selftune/workflows/skill-md-writer.ts +288 -0
- package/cli/selftune/workflows/workflows.ts +188 -0
- package/package.json +28 -11
- package/packages/telemetry-contract/README.md +11 -0
- package/packages/telemetry-contract/fixtures/golden.json +87 -0
- package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
- package/packages/telemetry-contract/index.ts +1 -0
- package/packages/telemetry-contract/package.json +19 -0
- package/packages/telemetry-contract/src/index.ts +2 -0
- package/packages/telemetry-contract/src/types.ts +163 -0
- package/packages/telemetry-contract/src/validators.ts +109 -0
- package/skill/SKILL.md +180 -33
- package/skill/Workflows/AutoActivation.md +145 -0
- package/skill/Workflows/Badge.md +124 -0
- package/skill/Workflows/Baseline.md +144 -0
- package/skill/Workflows/Composability.md +107 -0
- package/skill/Workflows/Contribute.md +94 -0
- package/skill/Workflows/Cron.md +132 -0
- package/skill/Workflows/Dashboard.md +214 -0
- package/skill/Workflows/Doctor.md +63 -14
- package/skill/Workflows/Evals.md +110 -18
- package/skill/Workflows/EvolutionMemory.md +154 -0
- package/skill/Workflows/Evolve.md +181 -21
- package/skill/Workflows/EvolveBody.md +159 -0
- package/skill/Workflows/Grade.md +36 -31
- package/skill/Workflows/ImportSkillsBench.md +117 -0
- package/skill/Workflows/Ingest.md +142 -21
- package/skill/Workflows/Initialize.md +91 -23
- package/skill/Workflows/Orchestrate.md +139 -0
- package/skill/Workflows/Replay.md +91 -0
- package/skill/Workflows/Rollback.md +23 -4
- package/skill/Workflows/Schedule.md +61 -0
- package/skill/Workflows/Sync.md +88 -0
- package/skill/Workflows/UnitTest.md +150 -0
- package/skill/Workflows/Watch.md +33 -1
- package/skill/Workflows/Workflows.md +129 -0
- package/skill/assets/activation-rules-default.json +26 -0
- package/skill/assets/multi-skill-settings.json +63 -0
- package/skill/assets/single-skill-settings.json +57 -0
- package/skill/references/invocation-taxonomy.md +2 -2
- package/skill/references/logs.md +164 -2
- package/skill/references/setup-patterns.md +65 -0
- package/skill/references/version-history.md +40 -0
- package/skill/settings_snippet.json +23 -0
- package/templates/activation-rules-default.json +27 -0
- package/templates/multi-skill-settings.json +64 -0
- package/templates/single-skill-settings.json +58 -0
- package/dashboard/index.html +0 -1119
|
@@ -0,0 +1,447 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory writer — pure functions for reading/writing evolution memory files.
|
|
3
|
+
*
|
|
4
|
+
* Memory files live at ~/.selftune/memory/ and provide human-readable session
|
|
5
|
+
* context that survives context resets. Three files:
|
|
6
|
+
* - context.md — active evolutions, known issues
|
|
7
|
+
* - plan.md — current priorities, strategy
|
|
8
|
+
* - decisions.md — append-only decision log
|
|
9
|
+
*
|
|
10
|
+
* All functions accept an optional memoryDir parameter for testability.
|
|
11
|
+
* Default: MEMORY_DIR from constants.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { appendFileSync, existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
15
|
+
import { join } from "node:path";
|
|
16
|
+
|
|
17
|
+
import { MEMORY_DIR } from "../constants.js";
|
|
18
|
+
import type { EvolveResult } from "../evolution/evolve.js";
|
|
19
|
+
import type { RollbackResult } from "../evolution/rollback.js";
|
|
20
|
+
import type {
|
|
21
|
+
DecisionRecord,
|
|
22
|
+
EvolutionProposal,
|
|
23
|
+
MemoryContext,
|
|
24
|
+
MemoryPlan,
|
|
25
|
+
MonitoringSnapshot,
|
|
26
|
+
} from "../types.js";
|
|
27
|
+
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
// Directory management
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
export function ensureMemoryDir(memoryDir: string = MEMORY_DIR): void {
|
|
33
|
+
if (!existsSync(memoryDir)) {
|
|
34
|
+
mkdirSync(memoryDir, { recursive: true });
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
// context.md
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
function formatContext(data: MemoryContext): string {
|
|
43
|
+
const lines: string[] = ["# Selftune Context", ""];
|
|
44
|
+
|
|
45
|
+
lines.push("## Active Evolutions");
|
|
46
|
+
if (data.activeEvolutions.length === 0) {
|
|
47
|
+
lines.push("- (none)");
|
|
48
|
+
} else {
|
|
49
|
+
for (const evo of data.activeEvolutions) {
|
|
50
|
+
lines.push(`- ${evo.skillName}: ${evo.status} — ${evo.description}`);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
lines.push("");
|
|
54
|
+
|
|
55
|
+
lines.push("## Known Issues");
|
|
56
|
+
if (data.knownIssues.length === 0) {
|
|
57
|
+
lines.push("- (none)");
|
|
58
|
+
} else {
|
|
59
|
+
for (const issue of data.knownIssues) {
|
|
60
|
+
lines.push(`- ${issue}`);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
lines.push("");
|
|
64
|
+
|
|
65
|
+
lines.push("## Last Updated");
|
|
66
|
+
lines.push(data.lastUpdated);
|
|
67
|
+
lines.push("");
|
|
68
|
+
|
|
69
|
+
return lines.join("\n");
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function parseContext(content: string): MemoryContext {
|
|
73
|
+
const result: MemoryContext = {
|
|
74
|
+
activeEvolutions: [],
|
|
75
|
+
knownIssues: [],
|
|
76
|
+
lastUpdated: "",
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
const lines = content.split("\n");
|
|
80
|
+
let section = "";
|
|
81
|
+
|
|
82
|
+
for (const line of lines) {
|
|
83
|
+
const trimmed = line.trim();
|
|
84
|
+
|
|
85
|
+
if (trimmed === "## Active Evolutions") {
|
|
86
|
+
section = "evolutions";
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
if (trimmed === "## Known Issues") {
|
|
90
|
+
section = "issues";
|
|
91
|
+
continue;
|
|
92
|
+
}
|
|
93
|
+
if (trimmed === "## Last Updated") {
|
|
94
|
+
section = "updated";
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
if (trimmed.startsWith("# ")) {
|
|
98
|
+
section = "";
|
|
99
|
+
continue;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (section === "evolutions" && trimmed.startsWith("- ") && trimmed !== "- (none)") {
|
|
103
|
+
// Format: "- skillName: status — description"
|
|
104
|
+
const body = trimmed.slice(2);
|
|
105
|
+
const colonIdx = body.indexOf(":");
|
|
106
|
+
if (colonIdx === -1) continue;
|
|
107
|
+
const skillName = body.slice(0, colonIdx).trim();
|
|
108
|
+
const rest = body.slice(colonIdx + 1).trim();
|
|
109
|
+
const dashIdx = rest.indexOf("—");
|
|
110
|
+
if (dashIdx === -1) {
|
|
111
|
+
result.activeEvolutions.push({ skillName, status: rest.trim(), description: "" });
|
|
112
|
+
} else {
|
|
113
|
+
const status = rest.slice(0, dashIdx).trim();
|
|
114
|
+
const description = rest.slice(dashIdx + 1).trim();
|
|
115
|
+
result.activeEvolutions.push({ skillName, status, description });
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (section === "issues" && trimmed.startsWith("- ") && trimmed !== "- (none)") {
|
|
120
|
+
result.knownIssues.push(trimmed.slice(2));
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if (section === "updated" && trimmed.length > 0) {
|
|
124
|
+
result.lastUpdated = trimmed;
|
|
125
|
+
section = "";
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
return result;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
export function writeContext(data: MemoryContext, memoryDir: string = MEMORY_DIR): void {
|
|
133
|
+
ensureMemoryDir(memoryDir);
|
|
134
|
+
const filePath = join(memoryDir, "context.md");
|
|
135
|
+
writeFileSync(filePath, formatContext(data), "utf-8");
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
export function readContext(memoryDir: string = MEMORY_DIR): MemoryContext {
|
|
139
|
+
const filePath = join(memoryDir, "context.md");
|
|
140
|
+
if (!existsSync(filePath)) {
|
|
141
|
+
return { activeEvolutions: [], knownIssues: [], lastUpdated: "" };
|
|
142
|
+
}
|
|
143
|
+
const content = readFileSync(filePath, "utf-8");
|
|
144
|
+
return parseContext(content);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// ---------------------------------------------------------------------------
|
|
148
|
+
// plan.md
|
|
149
|
+
// ---------------------------------------------------------------------------
|
|
150
|
+
|
|
151
|
+
function formatPlan(data: MemoryPlan): string {
|
|
152
|
+
const lines: string[] = ["# Evolution Plan", ""];
|
|
153
|
+
|
|
154
|
+
lines.push("## Current Priorities");
|
|
155
|
+
if (data.currentPriorities.length === 0) {
|
|
156
|
+
lines.push("1. (none)");
|
|
157
|
+
} else {
|
|
158
|
+
for (let i = 0; i < data.currentPriorities.length; i++) {
|
|
159
|
+
lines.push(`${i + 1}. ${data.currentPriorities[i]}`);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
lines.push("");
|
|
163
|
+
|
|
164
|
+
lines.push("## Strategy");
|
|
165
|
+
lines.push(data.strategy || "(no strategy defined)");
|
|
166
|
+
lines.push("");
|
|
167
|
+
|
|
168
|
+
lines.push("## Last Updated");
|
|
169
|
+
lines.push(data.lastUpdated);
|
|
170
|
+
lines.push("");
|
|
171
|
+
|
|
172
|
+
return lines.join("\n");
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
function parsePlan(content: string): MemoryPlan {
|
|
176
|
+
const result: MemoryPlan = {
|
|
177
|
+
currentPriorities: [],
|
|
178
|
+
strategy: "",
|
|
179
|
+
lastUpdated: "",
|
|
180
|
+
};
|
|
181
|
+
|
|
182
|
+
const lines = content.split("\n");
|
|
183
|
+
let section = "";
|
|
184
|
+
|
|
185
|
+
for (const line of lines) {
|
|
186
|
+
const trimmed = line.trim();
|
|
187
|
+
|
|
188
|
+
if (trimmed === "## Current Priorities") {
|
|
189
|
+
section = "priorities";
|
|
190
|
+
continue;
|
|
191
|
+
}
|
|
192
|
+
if (trimmed === "## Strategy") {
|
|
193
|
+
section = "strategy";
|
|
194
|
+
continue;
|
|
195
|
+
}
|
|
196
|
+
if (trimmed === "## Last Updated") {
|
|
197
|
+
section = "updated";
|
|
198
|
+
continue;
|
|
199
|
+
}
|
|
200
|
+
if (trimmed.startsWith("# ")) {
|
|
201
|
+
section = "";
|
|
202
|
+
continue;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
if (section === "priorities") {
|
|
206
|
+
// Format: "1. priority text"
|
|
207
|
+
const match = trimmed.match(/^\d+\.\s+(.+)$/);
|
|
208
|
+
if (match && match[1] !== "(none)") {
|
|
209
|
+
result.currentPriorities.push(match[1]);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// Intentionally captures only the first non-empty line as the strategy for simplicity
|
|
214
|
+
if (section === "strategy" && trimmed.length > 0 && trimmed !== "(no strategy defined)") {
|
|
215
|
+
result.strategy = trimmed;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
if (section === "updated" && trimmed.length > 0) {
|
|
219
|
+
result.lastUpdated = trimmed;
|
|
220
|
+
section = "";
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
return result;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
export function writePlan(data: MemoryPlan, memoryDir: string = MEMORY_DIR): void {
|
|
228
|
+
ensureMemoryDir(memoryDir);
|
|
229
|
+
const filePath = join(memoryDir, "plan.md");
|
|
230
|
+
writeFileSync(filePath, formatPlan(data), "utf-8");
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
export function readPlan(memoryDir: string = MEMORY_DIR): MemoryPlan {
|
|
234
|
+
const filePath = join(memoryDir, "plan.md");
|
|
235
|
+
if (!existsSync(filePath)) {
|
|
236
|
+
return { currentPriorities: [], strategy: "", lastUpdated: "" };
|
|
237
|
+
}
|
|
238
|
+
const content = readFileSync(filePath, "utf-8");
|
|
239
|
+
return parsePlan(content);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// ---------------------------------------------------------------------------
|
|
243
|
+
// decisions.md (append-only)
|
|
244
|
+
// ---------------------------------------------------------------------------
|
|
245
|
+
|
|
246
|
+
function formatDecisionEntry(record: DecisionRecord): string {
|
|
247
|
+
const lines: string[] = [
|
|
248
|
+
`## ${record.timestamp} — ${record.actionType}`,
|
|
249
|
+
`- **Skill:** ${record.skillName}`,
|
|
250
|
+
`- **Action:** ${record.action}`,
|
|
251
|
+
`- **Rationale:** ${record.rationale}`,
|
|
252
|
+
`- **Result:** ${record.result}`,
|
|
253
|
+
"",
|
|
254
|
+
"---",
|
|
255
|
+
"",
|
|
256
|
+
];
|
|
257
|
+
return lines.join("\n");
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
function parseDecisions(content: string): DecisionRecord[] {
|
|
261
|
+
const records: DecisionRecord[] = [];
|
|
262
|
+
// Split on --- separators
|
|
263
|
+
const blocks = content.split(/^---$/m);
|
|
264
|
+
|
|
265
|
+
for (const block of blocks) {
|
|
266
|
+
const lines = block
|
|
267
|
+
.split("\n")
|
|
268
|
+
.map((l) => l.trim())
|
|
269
|
+
.filter((l) => l.length > 0);
|
|
270
|
+
|
|
271
|
+
let timestamp = "";
|
|
272
|
+
let actionType = "";
|
|
273
|
+
let skillName = "";
|
|
274
|
+
let action: DecisionRecord["action"] = "watched";
|
|
275
|
+
let rationale = "";
|
|
276
|
+
let result = "";
|
|
277
|
+
|
|
278
|
+
for (const line of lines) {
|
|
279
|
+
// Header: "## 2026-03-01T00:00:00Z — evolve"
|
|
280
|
+
const headerMatch = line.match(/^## (.+?) — (.+)$/);
|
|
281
|
+
if (headerMatch) {
|
|
282
|
+
timestamp = headerMatch[1];
|
|
283
|
+
actionType = headerMatch[2];
|
|
284
|
+
continue;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
if (line.startsWith("- **Skill:**")) {
|
|
288
|
+
skillName = line.replace("- **Skill:**", "").trim();
|
|
289
|
+
} else if (line.startsWith("- **Action:**")) {
|
|
290
|
+
const raw = line.replace("- **Action:**", "").trim();
|
|
291
|
+
if (raw === "evolved" || raw === "rolled-back" || raw === "watched") {
|
|
292
|
+
action = raw;
|
|
293
|
+
}
|
|
294
|
+
} else if (line.startsWith("- **Rationale:**")) {
|
|
295
|
+
rationale = line.replace("- **Rationale:**", "").trim();
|
|
296
|
+
} else if (line.startsWith("- **Result:**")) {
|
|
297
|
+
result = line.replace("- **Result:**", "").trim();
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
if (timestamp && skillName) {
|
|
302
|
+
records.push({ timestamp, actionType, skillName, action, rationale, result });
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
return records;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
export function appendDecision(record: DecisionRecord, memoryDir: string = MEMORY_DIR): void {
|
|
310
|
+
ensureMemoryDir(memoryDir);
|
|
311
|
+
const filePath = join(memoryDir, "decisions.md");
|
|
312
|
+
|
|
313
|
+
if (!existsSync(filePath)) {
|
|
314
|
+
writeFileSync(filePath, "# Decision Log\n\n", "utf-8");
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
const entry = formatDecisionEntry(record);
|
|
318
|
+
appendFileSync(filePath, entry, "utf-8");
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
export function readDecisions(memoryDir: string = MEMORY_DIR): DecisionRecord[] {
|
|
322
|
+
const filePath = join(memoryDir, "decisions.md");
|
|
323
|
+
if (!existsSync(filePath)) {
|
|
324
|
+
return [];
|
|
325
|
+
}
|
|
326
|
+
const content = readFileSync(filePath, "utf-8");
|
|
327
|
+
return parseDecisions(content);
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// ---------------------------------------------------------------------------
|
|
331
|
+
// High-level helpers for integration
|
|
332
|
+
// ---------------------------------------------------------------------------
|
|
333
|
+
|
|
334
|
+
export function updateContextAfterEvolve(
|
|
335
|
+
skillName: string,
|
|
336
|
+
proposal: EvolutionProposal,
|
|
337
|
+
result: EvolveResult,
|
|
338
|
+
memoryDir: string = MEMORY_DIR,
|
|
339
|
+
): void {
|
|
340
|
+
const now = new Date().toISOString();
|
|
341
|
+
const context = readContext(memoryDir);
|
|
342
|
+
|
|
343
|
+
const status = result.deployed ? "deployed" : "failed";
|
|
344
|
+
const description = proposal.rationale || result.reason;
|
|
345
|
+
|
|
346
|
+
// Update or add the evolution entry
|
|
347
|
+
const idx = context.activeEvolutions.findIndex((e) => e.skillName === skillName);
|
|
348
|
+
if (idx >= 0) {
|
|
349
|
+
context.activeEvolutions[idx] = { skillName, status, description };
|
|
350
|
+
} else {
|
|
351
|
+
context.activeEvolutions.push({ skillName, status, description });
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
context.lastUpdated = now;
|
|
355
|
+
writeContext(context, memoryDir);
|
|
356
|
+
|
|
357
|
+
// Append decision
|
|
358
|
+
appendDecision(
|
|
359
|
+
{
|
|
360
|
+
timestamp: now,
|
|
361
|
+
actionType: "evolve",
|
|
362
|
+
skillName,
|
|
363
|
+
action: "evolved",
|
|
364
|
+
rationale: proposal.rationale || "Evolution triggered",
|
|
365
|
+
result: result.reason,
|
|
366
|
+
},
|
|
367
|
+
memoryDir,
|
|
368
|
+
);
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
export function updateContextAfterRollback(
|
|
372
|
+
skillName: string,
|
|
373
|
+
result: RollbackResult,
|
|
374
|
+
memoryDir: string = MEMORY_DIR,
|
|
375
|
+
): void {
|
|
376
|
+
const now = new Date().toISOString();
|
|
377
|
+
const context = readContext(memoryDir);
|
|
378
|
+
|
|
379
|
+
const status = result.rolledBack ? "rolled-back" : "rollback-failed";
|
|
380
|
+
const description = result.reason;
|
|
381
|
+
|
|
382
|
+
const idx = context.activeEvolutions.findIndex((e) => e.skillName === skillName);
|
|
383
|
+
if (idx >= 0) {
|
|
384
|
+
context.activeEvolutions[idx] = { skillName, status, description };
|
|
385
|
+
} else {
|
|
386
|
+
context.activeEvolutions.push({ skillName, status, description });
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
context.lastUpdated = now;
|
|
390
|
+
writeContext(context, memoryDir);
|
|
391
|
+
|
|
392
|
+
appendDecision(
|
|
393
|
+
{
|
|
394
|
+
timestamp: now,
|
|
395
|
+
actionType: "rollback",
|
|
396
|
+
skillName,
|
|
397
|
+
action: "rolled-back",
|
|
398
|
+
rationale: result.reason,
|
|
399
|
+
result: result.rolledBack ? "Successfully rolled back" : "Rollback failed",
|
|
400
|
+
},
|
|
401
|
+
memoryDir,
|
|
402
|
+
);
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
export function updateContextAfterWatch(
|
|
406
|
+
skillName: string,
|
|
407
|
+
snapshot: MonitoringSnapshot,
|
|
408
|
+
memoryDir: string = MEMORY_DIR,
|
|
409
|
+
): void {
|
|
410
|
+
const now = new Date().toISOString();
|
|
411
|
+
const context = readContext(memoryDir);
|
|
412
|
+
|
|
413
|
+
const status = snapshot.regression_detected ? "regression" : "healthy";
|
|
414
|
+
const description = `pass_rate=${snapshot.pass_rate.toFixed(2)}, baseline=${snapshot.baseline_pass_rate.toFixed(2)}`;
|
|
415
|
+
|
|
416
|
+
const idx = context.activeEvolutions.findIndex((e) => e.skillName === skillName);
|
|
417
|
+
if (idx >= 0) {
|
|
418
|
+
context.activeEvolutions[idx] = { skillName, status, description };
|
|
419
|
+
} else {
|
|
420
|
+
context.activeEvolutions.push({ skillName, status, description });
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
// Add known issue if regression detected
|
|
424
|
+
if (snapshot.regression_detected) {
|
|
425
|
+
const issue = `Regression detected for ${skillName}: pass_rate=${snapshot.pass_rate.toFixed(2)} below baseline=${snapshot.baseline_pass_rate.toFixed(2)}`;
|
|
426
|
+
if (!context.knownIssues.some((i) => i.includes(skillName) && i.includes("Regression"))) {
|
|
427
|
+
context.knownIssues.push(issue);
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
context.lastUpdated = now;
|
|
432
|
+
writeContext(context, memoryDir);
|
|
433
|
+
|
|
434
|
+
appendDecision(
|
|
435
|
+
{
|
|
436
|
+
timestamp: now,
|
|
437
|
+
actionType: "watch",
|
|
438
|
+
skillName,
|
|
439
|
+
action: "watched",
|
|
440
|
+
rationale: `Monitoring check: pass_rate=${snapshot.pass_rate.toFixed(2)}, regression=${snapshot.regression_detected}`,
|
|
441
|
+
result: snapshot.regression_detected
|
|
442
|
+
? `Regression detected (pass_rate=${snapshot.pass_rate.toFixed(2)})`
|
|
443
|
+
: `Healthy (pass_rate=${snapshot.pass_rate.toFixed(2)})`,
|
|
444
|
+
},
|
|
445
|
+
memoryDir,
|
|
446
|
+
);
|
|
447
|
+
}
|
|
@@ -9,7 +9,10 @@
|
|
|
9
9
|
import { parseArgs } from "node:util";
|
|
10
10
|
|
|
11
11
|
import { QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "../constants.js";
|
|
12
|
+
import { classifyInvocation } from "../eval/hooks-to-evals.js";
|
|
12
13
|
import { getLastDeployedProposal } from "../evolution/audit.js";
|
|
14
|
+
import { updateContextAfterWatch } from "../memory/writer.js";
|
|
15
|
+
import type { SyncResult } from "../sync.js";
|
|
13
16
|
import type {
|
|
14
17
|
InvocationType,
|
|
15
18
|
MonitoringSnapshot,
|
|
@@ -18,6 +21,11 @@ import type {
|
|
|
18
21
|
SkillUsageRecord,
|
|
19
22
|
} from "../types.js";
|
|
20
23
|
import { readJsonl } from "../utils/jsonl.js";
|
|
24
|
+
import {
|
|
25
|
+
filterActionableQueryRecords,
|
|
26
|
+
filterActionableSkillUsageRecords,
|
|
27
|
+
} from "../utils/query-filter.js";
|
|
28
|
+
import { readEffectiveSkillUsageRecords } from "../utils/skill-log.js";
|
|
21
29
|
|
|
22
30
|
// ---------------------------------------------------------------------------
|
|
23
31
|
// Public interfaces
|
|
@@ -40,6 +48,10 @@ export interface WatchOptions {
|
|
|
40
48
|
skillPath: string;
|
|
41
49
|
proposalId?: string;
|
|
42
50
|
}) => Promise<{ rolledBack: boolean; restoredDescription: string; reason: string }>;
|
|
51
|
+
/** Source-truth refresh before reading logs. */
|
|
52
|
+
syncFirst?: boolean;
|
|
53
|
+
syncForce?: boolean;
|
|
54
|
+
_syncFn?: typeof import("../sync.js").syncSources;
|
|
43
55
|
}
|
|
44
56
|
|
|
45
57
|
export interface WatchResult {
|
|
@@ -47,6 +59,7 @@ export interface WatchResult {
|
|
|
47
59
|
alert: string | null;
|
|
48
60
|
rolledBack: boolean;
|
|
49
61
|
recommendation: string;
|
|
62
|
+
sync_result?: SyncResult;
|
|
50
63
|
}
|
|
51
64
|
|
|
52
65
|
// ---------------------------------------------------------------------------
|
|
@@ -55,6 +68,7 @@ export interface WatchResult {
|
|
|
55
68
|
|
|
56
69
|
const DEFAULT_BASELINE_PASS_RATE = 0.5;
|
|
57
70
|
const DEFAULT_REGRESSION_THRESHOLD = 0.1;
|
|
71
|
+
export const MIN_MONITORING_SKILL_CHECKS = 3;
|
|
58
72
|
|
|
59
73
|
// ---------------------------------------------------------------------------
|
|
60
74
|
// computeMonitoringSnapshot - pure function
|
|
@@ -64,9 +78,9 @@ const DEFAULT_REGRESSION_THRESHOLD = 0.1;
|
|
|
64
78
|
* Compute a monitoring snapshot from raw log records.
|
|
65
79
|
*
|
|
66
80
|
* The function windows telemetry to the last `windowSessions` entries, then
|
|
67
|
-
* scopes skill and query records to those sessions. If telemetry is
|
|
68
|
-
* no records match the windowed session IDs, all provided skill/query
|
|
69
|
-
* are used directly (unfiltered by session).
|
|
81
|
+
* scopes skill and actionable query records to those sessions. If telemetry is
|
|
82
|
+
* empty or no records match the windowed session IDs, all provided skill/query
|
|
83
|
+
* records are used directly (unfiltered by session).
|
|
70
84
|
*
|
|
71
85
|
* @param skillName - The skill to monitor
|
|
72
86
|
* @param telemetry - All session telemetry records
|
|
@@ -86,43 +100,53 @@ export function computeMonitoringSnapshot(
|
|
|
86
100
|
regressionThreshold: number = DEFAULT_REGRESSION_THRESHOLD,
|
|
87
101
|
): MonitoringSnapshot {
|
|
88
102
|
// 1. Window the telemetry to the last N sessions (by array order, assumed chronological)
|
|
103
|
+
const actionableSkillRecords = filterActionableSkillUsageRecords(skillRecords);
|
|
104
|
+
const actionableQueryRecords = filterActionableQueryRecords(queryRecords);
|
|
89
105
|
const windowedTelemetry = telemetry.slice(-windowSessions);
|
|
90
106
|
const windowedSessionIds = new Set(windowedTelemetry.map((t) => t.session_id));
|
|
91
107
|
|
|
92
108
|
// 2. Filter skill records by skill name first
|
|
93
|
-
const skillNameFiltered =
|
|
109
|
+
const skillNameFiltered = actionableSkillRecords.filter((r) => r.skill_name === skillName);
|
|
94
110
|
|
|
95
111
|
// 3. Apply session ID windowing only if telemetry is present and overlaps
|
|
96
112
|
const hasSessionOverlap =
|
|
97
113
|
windowedSessionIds.size > 0 &&
|
|
98
114
|
(skillNameFiltered.some((r) => windowedSessionIds.has(r.session_id)) ||
|
|
99
|
-
|
|
115
|
+
actionableQueryRecords.some((r) => windowedSessionIds.has(r.session_id)));
|
|
100
116
|
|
|
101
117
|
const filteredSkillRecords = hasSessionOverlap
|
|
102
118
|
? skillNameFiltered.filter((r) => windowedSessionIds.has(r.session_id))
|
|
103
119
|
: skillNameFiltered;
|
|
104
|
-
|
|
105
120
|
const filteredQueryRecords = hasSessionOverlap
|
|
106
|
-
?
|
|
107
|
-
:
|
|
121
|
+
? actionableQueryRecords.filter((r) => windowedSessionIds.has(r.session_id))
|
|
122
|
+
: actionableQueryRecords;
|
|
108
123
|
|
|
109
|
-
// 4. Compute pass rate
|
|
124
|
+
// 4. Compute pass rate from explicit skill checks, not from all queries.
|
|
110
125
|
const triggeredCount = filteredSkillRecords.filter((r) => r.triggered).length;
|
|
111
|
-
const
|
|
112
|
-
const passRate =
|
|
126
|
+
const totalSkillChecks = filteredSkillRecords.length;
|
|
127
|
+
const passRate = totalSkillChecks === 0 ? 0 : triggeredCount / totalSkillChecks;
|
|
113
128
|
|
|
114
129
|
// 5. Compute false negative rate from skill usage records
|
|
115
|
-
const totalSkillChecks = filteredSkillRecords.length;
|
|
116
130
|
const falseNegatives = filteredSkillRecords.filter((r) => !r.triggered).length;
|
|
117
131
|
const falseNegativeRate = totalSkillChecks === 0 ? 0 : falseNegatives / totalSkillChecks;
|
|
118
132
|
|
|
119
|
-
// 6. by_invocation_type:
|
|
133
|
+
// 6. by_invocation_type: classify each skill record using classifyInvocation
|
|
120
134
|
const byInvocationType: Record<InvocationType, { passed: number; total: number }> = {
|
|
121
135
|
explicit: { passed: 0, total: 0 },
|
|
122
|
-
implicit: { passed:
|
|
136
|
+
implicit: { passed: 0, total: 0 },
|
|
123
137
|
contextual: { passed: 0, total: 0 },
|
|
124
138
|
negative: { passed: 0, total: 0 },
|
|
125
139
|
};
|
|
140
|
+
for (const record of filteredSkillRecords) {
|
|
141
|
+
const invType = classifyInvocation(
|
|
142
|
+
typeof record.query === "string" ? record.query : "",
|
|
143
|
+
skillName,
|
|
144
|
+
);
|
|
145
|
+
byInvocationType[invType].total++;
|
|
146
|
+
if (record.triggered) {
|
|
147
|
+
byInvocationType[invType].passed++;
|
|
148
|
+
}
|
|
149
|
+
}
|
|
126
150
|
|
|
127
151
|
// 7. Regression detection: pass_rate < baseline - threshold
|
|
128
152
|
// Use rounding to avoid floating-point boundary issues (e.g. 0.8 - 0.1 = 0.7000000000000001)
|
|
@@ -130,12 +154,16 @@ export function computeMonitoringSnapshot(
|
|
|
130
154
|
const adjustedThreshold =
|
|
131
155
|
Math.round((baselinePassRate - regressionThreshold) * precision) / precision;
|
|
132
156
|
const roundedPassRate = Math.round(passRate * precision) / precision;
|
|
133
|
-
const
|
|
157
|
+
const hasEnoughSignalForRegression =
|
|
158
|
+
totalSkillChecks >= MIN_MONITORING_SKILL_CHECKS ||
|
|
159
|
+
(totalSkillChecks === 0 && filteredQueryRecords.length >= MIN_MONITORING_SKILL_CHECKS);
|
|
160
|
+
const regressionDetected = hasEnoughSignalForRegression && roundedPassRate < adjustedThreshold;
|
|
134
161
|
|
|
135
162
|
return {
|
|
136
163
|
timestamp: new Date().toISOString(),
|
|
137
164
|
skill_name: skillName,
|
|
138
165
|
window_sessions: windowSessions,
|
|
166
|
+
skill_checks: totalSkillChecks,
|
|
139
167
|
pass_rate: passRate,
|
|
140
168
|
false_negative_rate: falseNegativeRate,
|
|
141
169
|
by_invocation_type: byInvocationType,
|
|
@@ -163,11 +191,28 @@ export async function watch(options: WatchOptions): Promise<WatchResult> {
|
|
|
163
191
|
_queryLogPath = QUERY_LOG,
|
|
164
192
|
_auditLogPath,
|
|
165
193
|
_rollbackFn,
|
|
194
|
+
syncFirst = false,
|
|
195
|
+
syncForce = false,
|
|
196
|
+
_syncFn,
|
|
166
197
|
} = options;
|
|
167
198
|
|
|
199
|
+
let syncResult: SyncResult | undefined;
|
|
200
|
+
if (syncFirst) {
|
|
201
|
+
const { createDefaultSyncOptions, syncSources: realSyncSources } = await import("../sync.js");
|
|
202
|
+
const syncRunner = _syncFn ?? realSyncSources;
|
|
203
|
+
syncResult = syncRunner(
|
|
204
|
+
createDefaultSyncOptions({
|
|
205
|
+
force: syncForce,
|
|
206
|
+
}),
|
|
207
|
+
);
|
|
208
|
+
}
|
|
209
|
+
|
|
168
210
|
// 1. Read log files
|
|
169
211
|
const telemetry = readJsonl<SessionTelemetryRecord>(_telemetryLogPath);
|
|
170
|
-
const skillRecords =
|
|
212
|
+
const skillRecords =
|
|
213
|
+
_skillLogPath === SKILL_LOG
|
|
214
|
+
? readEffectiveSkillUsageRecords()
|
|
215
|
+
: readJsonl<SkillUsageRecord>(_skillLogPath);
|
|
171
216
|
const queryRecords = readJsonl<QueryLogRecord>(_queryLogPath);
|
|
172
217
|
|
|
173
218
|
// 2. Determine baseline pass rate from last deployed audit entry
|
|
@@ -208,15 +253,34 @@ export async function watch(options: WatchOptions): Promise<WatchResult> {
|
|
|
208
253
|
recommendation = rolledBack
|
|
209
254
|
? `Rolled back "${skillName}" to previous version. Monitor to confirm recovery.`
|
|
210
255
|
: `Consider running: selftune rollback --skill "${skillName}" --skill-path "${skillPath}"`;
|
|
256
|
+
} else if (snapshot.skill_checks < MIN_MONITORING_SKILL_CHECKS) {
|
|
257
|
+
recommendation =
|
|
258
|
+
`Skill "${skillName}" has only ${snapshot.skill_checks} actionable check(s) in the current window. ` +
|
|
259
|
+
`Need at least ${MIN_MONITORING_SKILL_CHECKS} before calling it stable.`;
|
|
211
260
|
} else {
|
|
212
261
|
recommendation = `Skill "${skillName}" is stable. Pass rate ${snapshot.pass_rate.toFixed(2)} is within acceptable range of baseline ${baselinePassRate.toFixed(2)}.`;
|
|
213
262
|
}
|
|
214
263
|
|
|
264
|
+
// Update evolution memory (fail-open)
|
|
265
|
+
try {
|
|
266
|
+
updateContextAfterWatch(skillName, snapshot);
|
|
267
|
+
} catch (err) {
|
|
268
|
+
// Fail-open: memory writes should never fail the main operation
|
|
269
|
+
console.error(
|
|
270
|
+
JSON.stringify({
|
|
271
|
+
level: "debug",
|
|
272
|
+
code: "memory_write_failed",
|
|
273
|
+
message: `Failed to update memory after watch for "${skillName}": ${err instanceof Error ? err.message : String(err)}`,
|
|
274
|
+
}),
|
|
275
|
+
);
|
|
276
|
+
}
|
|
277
|
+
|
|
215
278
|
return {
|
|
216
279
|
snapshot,
|
|
217
280
|
alert,
|
|
218
281
|
rolledBack,
|
|
219
282
|
recommendation,
|
|
283
|
+
...(syncResult ? { sync_result: syncResult } : {}),
|
|
220
284
|
};
|
|
221
285
|
}
|
|
222
286
|
|
|
@@ -260,6 +324,8 @@ export async function cliMain(): Promise<void> {
|
|
|
260
324
|
window: { type: "string", default: "20" },
|
|
261
325
|
threshold: { type: "string", default: "0.1" },
|
|
262
326
|
"auto-rollback": { type: "boolean", default: false },
|
|
327
|
+
"sync-first": { type: "boolean", default: false },
|
|
328
|
+
"sync-force": { type: "boolean", default: false },
|
|
263
329
|
help: { type: "boolean", default: false },
|
|
264
330
|
},
|
|
265
331
|
strict: true,
|
|
@@ -277,6 +343,8 @@ Options:
|
|
|
277
343
|
--window Number of recent sessions to consider (default: 20)
|
|
278
344
|
--threshold Regression threshold below baseline (default: 0.1)
|
|
279
345
|
--auto-rollback Automatically rollback on regression detection
|
|
346
|
+
--sync-first Refresh source-truth telemetry before reading watch inputs
|
|
347
|
+
--sync-force Force a full rescan during --sync-first
|
|
280
348
|
--help Show this help message`);
|
|
281
349
|
process.exit(0);
|
|
282
350
|
}
|
|
@@ -285,6 +353,10 @@ Options:
|
|
|
285
353
|
console.error("[ERROR] --skill and --skill-path are required");
|
|
286
354
|
process.exit(1);
|
|
287
355
|
}
|
|
356
|
+
if ((values["sync-force"] ?? false) && !(values["sync-first"] ?? false)) {
|
|
357
|
+
console.error("[ERROR] --sync-force requires --sync-first");
|
|
358
|
+
process.exit(1);
|
|
359
|
+
}
|
|
288
360
|
|
|
289
361
|
const rawWindow = values.window ?? "20";
|
|
290
362
|
if (!/^\d+$/.test(rawWindow)) {
|
|
@@ -314,6 +386,8 @@ Options:
|
|
|
314
386
|
windowSessions,
|
|
315
387
|
regressionThreshold,
|
|
316
388
|
autoRollback: values["auto-rollback"] ?? false,
|
|
389
|
+
syncFirst: values["sync-first"] ?? false,
|
|
390
|
+
syncForce: values["sync-force"] ?? false,
|
|
317
391
|
});
|
|
318
392
|
|
|
319
393
|
console.log(JSON.stringify(result, null, 2));
|