selftune 0.2.5 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/apps/local-dashboard/dist/assets/index-Bk9vSHHd.js +15 -0
- package/apps/local-dashboard/dist/assets/index-CRtLkBTi.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-react-BQH_6WrG.js +60 -0
- package/apps/local-dashboard/dist/assets/{vendor-table-B7VF2Ipl.js → vendor-table-dK1QMLq9.js} +1 -1
- package/apps/local-dashboard/dist/assets/{vendor-ui-r2k_Ku_V.js → vendor-ui-CO2mrx6e.js} +60 -65
- package/apps/local-dashboard/dist/index.html +5 -5
- package/cli/selftune/activation-rules.ts +30 -9
- package/cli/selftune/agent-guidance.ts +96 -0
- package/cli/selftune/alpha-identity.ts +157 -0
- package/cli/selftune/alpha-upload/build-payloads.ts +151 -0
- package/cli/selftune/alpha-upload/client.ts +113 -0
- package/cli/selftune/alpha-upload/flush.ts +191 -0
- package/cli/selftune/alpha-upload/index.ts +194 -0
- package/cli/selftune/alpha-upload/queue.ts +252 -0
- package/cli/selftune/alpha-upload/stage-canonical.ts +242 -0
- package/cli/selftune/alpha-upload-contract.ts +52 -0
- package/cli/selftune/auth/device-code.ts +110 -0
- package/cli/selftune/auto-update.ts +130 -0
- package/cli/selftune/badge/badge.ts +19 -9
- package/cli/selftune/canonical-export.ts +16 -3
- package/cli/selftune/constants.ts +28 -8
- package/cli/selftune/contribute/bundle.ts +32 -5
- package/cli/selftune/dashboard-contract.ts +32 -1
- package/cli/selftune/dashboard-server.ts +256 -692
- package/cli/selftune/dashboard.ts +1 -1
- package/cli/selftune/eval/baseline.ts +11 -7
- package/cli/selftune/eval/hooks-to-evals.ts +27 -9
- package/cli/selftune/eval/synthetic-evals.ts +54 -1
- package/cli/selftune/evolution/audit.ts +24 -19
- package/cli/selftune/evolution/constitutional.ts +176 -0
- package/cli/selftune/evolution/evidence.ts +18 -13
- package/cli/selftune/evolution/evolve-body.ts +104 -7
- package/cli/selftune/evolution/evolve.ts +195 -22
- package/cli/selftune/evolution/propose-body.ts +18 -1
- package/cli/selftune/evolution/propose-description.ts +27 -2
- package/cli/selftune/evolution/rollback.ts +11 -15
- package/cli/selftune/export.ts +84 -0
- package/cli/selftune/grading/auto-grade.ts +13 -4
- package/cli/selftune/grading/grade-session.ts +16 -6
- package/cli/selftune/hooks/evolution-guard.ts +26 -9
- package/cli/selftune/hooks/prompt-log.ts +23 -9
- package/cli/selftune/hooks/session-stop.ts +78 -15
- package/cli/selftune/hooks/skill-eval.ts +189 -10
- package/cli/selftune/index.ts +274 -2
- package/cli/selftune/ingestors/claude-replay.ts +48 -21
- package/cli/selftune/init.ts +249 -47
- package/cli/selftune/last.ts +7 -7
- package/cli/selftune/localdb/db.ts +90 -10
- package/cli/selftune/localdb/direct-write.ts +531 -0
- package/cli/selftune/localdb/materialize.ts +296 -42
- package/cli/selftune/localdb/queries.ts +325 -32
- package/cli/selftune/localdb/schema.ts +109 -0
- package/cli/selftune/monitoring/watch.ts +26 -8
- package/cli/selftune/normalization.ts +85 -15
- package/cli/selftune/observability.ts +248 -2
- package/cli/selftune/orchestrate.ts +165 -20
- package/cli/selftune/quickstart.ts +34 -10
- package/cli/selftune/repair/skill-usage.ts +12 -2
- package/cli/selftune/routes/actions.ts +77 -0
- package/cli/selftune/routes/badge.ts +66 -0
- package/cli/selftune/routes/doctor.ts +12 -0
- package/cli/selftune/routes/index.ts +14 -0
- package/cli/selftune/routes/orchestrate-runs.ts +13 -0
- package/cli/selftune/routes/overview.ts +14 -0
- package/cli/selftune/routes/report.ts +293 -0
- package/cli/selftune/routes/skill-report.ts +230 -0
- package/cli/selftune/status.ts +203 -7
- package/cli/selftune/sync.ts +13 -1
- package/cli/selftune/types.ts +50 -0
- package/cli/selftune/utils/jsonl.ts +58 -1
- package/cli/selftune/utils/selftune-meta.ts +38 -0
- package/cli/selftune/utils/skill-log.ts +30 -4
- package/cli/selftune/utils/transcript.ts +15 -0
- package/cli/selftune/workflows/workflows.ts +7 -6
- package/package.json +11 -7
- package/packages/telemetry-contract/fixtures/complete-push.ts +184 -0
- package/packages/telemetry-contract/fixtures/evidence-only-push.ts +58 -0
- package/packages/telemetry-contract/fixtures/golden.json +1 -0
- package/packages/telemetry-contract/fixtures/index.ts +4 -0
- package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +40 -0
- package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +79 -0
- package/packages/telemetry-contract/package.json +6 -1
- package/packages/telemetry-contract/src/index.ts +1 -0
- package/packages/telemetry-contract/src/schemas.ts +215 -0
- package/packages/telemetry-contract/src/types.ts +3 -1
- package/packages/telemetry-contract/src/validators.ts +3 -1
- package/packages/telemetry-contract/tests/compatibility.test.ts +144 -0
- package/packages/ui/package.json +4 -0
- package/packages/ui/src/components/ActivityTimeline.tsx +61 -29
- package/packages/ui/src/components/section-cards.tsx +31 -14
- package/packages/ui/src/types.ts +1 -0
- package/skill/SKILL.md +214 -174
- package/skill/Workflows/AlphaUpload.md +45 -0
- package/skill/Workflows/Baseline.md +18 -12
- package/skill/Workflows/Composability.md +3 -3
- package/skill/Workflows/Dashboard.md +44 -91
- package/skill/Workflows/Doctor.md +93 -66
- package/skill/Workflows/Evals.md +49 -40
- package/skill/Workflows/Evolve.md +76 -28
- package/skill/Workflows/EvolveBody.md +37 -38
- package/skill/Workflows/Initialize.md +172 -26
- package/skill/Workflows/Orchestrate.md +11 -2
- package/skill/Workflows/Sync.md +23 -0
- package/skill/Workflows/Watch.md +2 -5
- package/skill/agents/diagnosis-analyst.md +163 -0
- package/skill/agents/evolution-reviewer.md +149 -0
- package/skill/agents/integration-guide.md +154 -0
- package/skill/agents/pattern-analyst.md +149 -0
- package/skill/assets/multi-skill-settings.json +1 -1
- package/skill/assets/single-skill-settings.json +1 -1
- package/skill/references/interactive-config.md +39 -0
- package/skill/references/invocation-taxonomy.md +34 -0
- package/skill/references/logs.md +9 -1
- package/skill/references/setup-patterns.md +3 -3
- package/skill/settings_snippet.json +1 -1
- package/apps/local-dashboard/dist/assets/index-C75H1Q3n.css +0 -1
- package/apps/local-dashboard/dist/assets/index-axE4kz3Q.js +0 -15
- package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +0 -60
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Route handler: GET /report/:name
|
|
3
|
+
*
|
|
4
|
+
* Returns an HTML skill health report page with evolution evidence,
|
|
5
|
+
* validation results, and monitoring snapshot.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { SkillStatus, StatusResult } from "../status.js";
|
|
9
|
+
import type { EvolutionEvidenceEntry } from "../types.js";
|
|
10
|
+
|
|
11
|
+
interface MergedEvidenceEntry {
|
|
12
|
+
proposal_id: string;
|
|
13
|
+
target: string;
|
|
14
|
+
rationale: string;
|
|
15
|
+
confidence?: number;
|
|
16
|
+
original_text: string;
|
|
17
|
+
proposed_text: string;
|
|
18
|
+
eval_set: import("../types.js").EvalEntry[];
|
|
19
|
+
validation: import("../types.js").EvolutionEvidenceValidation | null;
|
|
20
|
+
stages: Array<{ stage: string; timestamp: string; details: string }>;
|
|
21
|
+
latest_timestamp: string;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function mergeEvidenceEntries(entries: EvolutionEvidenceEntry[]): MergedEvidenceEntry[] {
|
|
25
|
+
const merged = new Map<string, MergedEvidenceEntry>();
|
|
26
|
+
const sorted = [...entries].sort((a, b) => b.timestamp.localeCompare(a.timestamp));
|
|
27
|
+
|
|
28
|
+
for (const entry of sorted) {
|
|
29
|
+
if (!merged.has(entry.proposal_id)) {
|
|
30
|
+
merged.set(entry.proposal_id, {
|
|
31
|
+
proposal_id: entry.proposal_id,
|
|
32
|
+
target: entry.target,
|
|
33
|
+
rationale: entry.rationale ?? "",
|
|
34
|
+
confidence: entry.confidence,
|
|
35
|
+
original_text: entry.original_text ?? "",
|
|
36
|
+
proposed_text: entry.proposed_text ?? "",
|
|
37
|
+
eval_set: entry.eval_set ?? [],
|
|
38
|
+
validation: entry.validation ?? null,
|
|
39
|
+
stages: [],
|
|
40
|
+
latest_timestamp: entry.timestamp,
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const current = merged.get(entry.proposal_id);
|
|
45
|
+
if (!current) continue;
|
|
46
|
+
current.stages.push({
|
|
47
|
+
stage: entry.stage,
|
|
48
|
+
timestamp: entry.timestamp,
|
|
49
|
+
details: entry.details ?? "",
|
|
50
|
+
});
|
|
51
|
+
if (!current.rationale && entry.rationale) current.rationale = entry.rationale;
|
|
52
|
+
if (current.confidence === undefined && entry.confidence !== undefined) {
|
|
53
|
+
current.confidence = entry.confidence;
|
|
54
|
+
}
|
|
55
|
+
if (!current.original_text && entry.original_text) current.original_text = entry.original_text;
|
|
56
|
+
if (!current.proposed_text && entry.proposed_text) current.proposed_text = entry.proposed_text;
|
|
57
|
+
if (current.eval_set.length === 0 && entry.eval_set) current.eval_set = entry.eval_set;
|
|
58
|
+
if (!current.validation && entry.validation) current.validation = entry.validation;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return [...merged.values()].sort((a, b) => b.latest_timestamp.localeCompare(a.latest_timestamp));
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function escapeHtml(text: string): string {
|
|
65
|
+
return text
|
|
66
|
+
.replace(/&/g, "&")
|
|
67
|
+
.replace(/</g, "<")
|
|
68
|
+
.replace(/>/g, ">")
|
|
69
|
+
.replace(/"/g, """);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function buildReportHTML(
|
|
73
|
+
skillName: string,
|
|
74
|
+
skill: SkillStatus,
|
|
75
|
+
statusResult: StatusResult,
|
|
76
|
+
evidenceEntries: EvolutionEvidenceEntry[],
|
|
77
|
+
): string {
|
|
78
|
+
const mergedEvidence = mergeEvidenceEntries(evidenceEntries);
|
|
79
|
+
const latestValidation = mergedEvidence.find(
|
|
80
|
+
(entry) => entry.validation?.per_entry_results?.length,
|
|
81
|
+
);
|
|
82
|
+
const passRateDisplay =
|
|
83
|
+
skill.passRate !== null ? `${Math.round(skill.passRate * 100)}%` : "No data";
|
|
84
|
+
const trendArrows: Record<string, string> = {
|
|
85
|
+
up: "\u2191",
|
|
86
|
+
down: "\u2193",
|
|
87
|
+
stable: "\u2192",
|
|
88
|
+
unknown: "?",
|
|
89
|
+
};
|
|
90
|
+
const trendDisplay = trendArrows[skill.trend] ?? "?";
|
|
91
|
+
const statusColor =
|
|
92
|
+
skill.status === "HEALTHY"
|
|
93
|
+
? "#4c1"
|
|
94
|
+
: skill.status === "CRITICAL"
|
|
95
|
+
? "#e05d44"
|
|
96
|
+
: skill.status === "WARNING"
|
|
97
|
+
? "#dfb317"
|
|
98
|
+
: "#9f9f9f";
|
|
99
|
+
|
|
100
|
+
return `<!DOCTYPE html>
|
|
101
|
+
<html lang="en">
|
|
102
|
+
<head>
|
|
103
|
+
<meta charset="UTF-8">
|
|
104
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
105
|
+
<title>selftune report: ${escapeHtml(skillName)}</title>
|
|
106
|
+
<style>
|
|
107
|
+
body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; max-width: 1100px; margin: 40px auto; padding: 0 20px; color: #333; background: #fafafa; }
|
|
108
|
+
h1 { font-size: 1.5rem; margin-bottom: 8px; }
|
|
109
|
+
.badge { margin: 16px 0; }
|
|
110
|
+
.card { background: #fff; border: 1px solid #e0e0e0; border-radius: 8px; padding: 20px; margin: 16px 0; }
|
|
111
|
+
.card h2 { font-size: 1.1rem; margin-top: 0; }
|
|
112
|
+
.stat { display: inline-block; margin-right: 32px; }
|
|
113
|
+
.stat-value { font-size: 2rem; font-weight: bold; }
|
|
114
|
+
.stat-label { font-size: 0.85rem; color: #666; }
|
|
115
|
+
table { width: 100%; border-collapse: collapse; margin: 12px 0; }
|
|
116
|
+
th, td { text-align: left; padding: 8px 12px; border-bottom: 1px solid #eee; }
|
|
117
|
+
th { font-weight: 600; font-size: 0.85rem; color: #666; text-transform: uppercase; }
|
|
118
|
+
a { color: #0366d6; text-decoration: none; }
|
|
119
|
+
a:hover { text-decoration: underline; }
|
|
120
|
+
.status-badge { display: inline-block; padding: 2px 8px; border-radius: 4px; color: #fff; font-size: 0.85rem; font-weight: 600; }
|
|
121
|
+
.grid { display: grid; gap: 16px; grid-template-columns: repeat(2, minmax(0, 1fr)); }
|
|
122
|
+
.muted { color: #666; font-size: 0.9rem; }
|
|
123
|
+
.chip { display: inline-flex; align-items: center; padding: 4px 8px; border-radius: 999px; border: 1px solid #e2e8f0; background: #f8fafc; color: #475569; font-size: 0.75rem; margin-right: 6px; margin-bottom: 6px; }
|
|
124
|
+
.artifact { border: 1px solid #e2e8f0; border-radius: 8px; padding: 16px; margin-top: 12px; background: #f8fafc; }
|
|
125
|
+
.artifact pre { white-space: pre-wrap; word-break: break-word; font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-size: 0.8rem; line-height: 1.5; margin: 0; }
|
|
126
|
+
.diff { display: grid; gap: 12px; grid-template-columns: repeat(2, minmax(0, 1fr)); margin-top: 12px; }
|
|
127
|
+
.empty { color: #666; font-size: 0.9rem; }
|
|
128
|
+
@media (max-width: 800px) {
|
|
129
|
+
.grid, .diff { grid-template-columns: 1fr; }
|
|
130
|
+
.stat { display: block; margin-right: 0; margin-bottom: 16px; }
|
|
131
|
+
}
|
|
132
|
+
</style>
|
|
133
|
+
</head>
|
|
134
|
+
<body>
|
|
135
|
+
<a href="/">\u2190 Dashboard</a>
|
|
136
|
+
<h1>Skill Report: ${escapeHtml(skillName)}</h1>
|
|
137
|
+
<div class="badge">
|
|
138
|
+
<img src="/badge/${encodeURIComponent(skillName)}" alt="Skill Health Badge" />
|
|
139
|
+
</div>
|
|
140
|
+
|
|
141
|
+
<div class="card">
|
|
142
|
+
<h2>Health Summary</h2>
|
|
143
|
+
<div class="stat">
|
|
144
|
+
<div class="stat-value">${passRateDisplay}</div>
|
|
145
|
+
<div class="stat-label">Pass Rate</div>
|
|
146
|
+
</div>
|
|
147
|
+
<div class="stat">
|
|
148
|
+
<div class="stat-value">${trendDisplay}</div>
|
|
149
|
+
<div class="stat-label">Trend</div>
|
|
150
|
+
</div>
|
|
151
|
+
<div class="stat">
|
|
152
|
+
<div class="stat-value">${skill.missedQueries}</div>
|
|
153
|
+
<div class="stat-label">Missed Queries</div>
|
|
154
|
+
</div>
|
|
155
|
+
<div class="stat">
|
|
156
|
+
<span class="status-badge" style="background: ${statusColor}">${skill.status}</span>
|
|
157
|
+
</div>
|
|
158
|
+
</div>
|
|
159
|
+
|
|
160
|
+
${
|
|
161
|
+
skill.snapshot
|
|
162
|
+
? `
|
|
163
|
+
<div class="card">
|
|
164
|
+
<h2>Monitoring Snapshot</h2>
|
|
165
|
+
<table>
|
|
166
|
+
<tr><th>Metric</th><th>Value</th></tr>
|
|
167
|
+
<tr><td>Window Sessions</td><td>${skill.snapshot.window_sessions}</td></tr>
|
|
168
|
+
<tr><td>Pass Rate</td><td>${(skill.snapshot.pass_rate * 100).toFixed(1)}%</td></tr>
|
|
169
|
+
<tr><td>False Negative Rate</td><td>${(skill.snapshot.false_negative_rate * 100).toFixed(1)}%</td></tr>
|
|
170
|
+
<tr><td>Regression Detected</td><td>${skill.snapshot.regression_detected ? "Yes" : "No"}</td></tr>
|
|
171
|
+
<tr><td>Baseline Pass Rate</td><td>${(skill.snapshot.baseline_pass_rate * 100).toFixed(1)}%</td></tr>
|
|
172
|
+
</table>
|
|
173
|
+
</div>`
|
|
174
|
+
: ""
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
<div class="card">
|
|
178
|
+
<h2>System Overview</h2>
|
|
179
|
+
<table>
|
|
180
|
+
<tr><th>Metric</th><th>Value</th></tr>
|
|
181
|
+
<tr><td>Total Skills</td><td>${statusResult.skills.length}</td></tr>
|
|
182
|
+
<tr><td>Unmatched Queries</td><td>${statusResult.unmatchedQueries}</td></tr>
|
|
183
|
+
<tr><td>Pending Proposals</td><td>${statusResult.pendingProposals}</td></tr>
|
|
184
|
+
<tr><td>Last Session</td><td>${escapeHtml(statusResult.lastSession ?? "\u2014")}</td></tr>
|
|
185
|
+
</table>
|
|
186
|
+
</div>
|
|
187
|
+
|
|
188
|
+
<div class="card">
|
|
189
|
+
<h2>Description Versions</h2>
|
|
190
|
+
${
|
|
191
|
+
mergedEvidence.length === 0
|
|
192
|
+
? '<p class="empty">No proposal evidence recorded for this skill yet.</p>'
|
|
193
|
+
: mergedEvidence
|
|
194
|
+
.slice(0, 6)
|
|
195
|
+
.map((entry) => {
|
|
196
|
+
const before = entry.validation?.before_pass_rate;
|
|
197
|
+
const after = entry.validation?.after_pass_rate;
|
|
198
|
+
const net = entry.validation?.net_change;
|
|
199
|
+
return `<div class="artifact">
|
|
200
|
+
<div><strong>${escapeHtml(entry.proposal_id)}</strong></div>
|
|
201
|
+
<div class="muted" style="margin-top:6px;">${escapeHtml(
|
|
202
|
+
entry.stages
|
|
203
|
+
.sort((a, b) => b.timestamp.localeCompare(a.timestamp))
|
|
204
|
+
.map(
|
|
205
|
+
(stage) =>
|
|
206
|
+
`${stage.stage} ${new Date(stage.timestamp).toLocaleString("en-US")}`,
|
|
207
|
+
)
|
|
208
|
+
.join(" \u00b7 "),
|
|
209
|
+
)}</div>
|
|
210
|
+
<div style="margin-top:10px;">
|
|
211
|
+
<span class="chip">${escapeHtml(entry.target)}</span>
|
|
212
|
+
${
|
|
213
|
+
entry.confidence !== undefined
|
|
214
|
+
? `<span class="chip">conf ${entry.confidence.toFixed(2)}</span>`
|
|
215
|
+
: ""
|
|
216
|
+
}
|
|
217
|
+
<span class="chip">before ${before !== undefined ? `${(before * 100).toFixed(1)}%` : "\u2014"}</span>
|
|
218
|
+
<span class="chip">after ${after !== undefined ? `${(after * 100).toFixed(1)}%` : "\u2014"}</span>
|
|
219
|
+
<span class="chip">net ${net !== undefined ? `${net >= 0 ? "+" : ""}${(net * 100).toFixed(1)}pp` : "\u2014"}</span>
|
|
220
|
+
</div>
|
|
221
|
+
<p class="muted" style="margin-top:10px;">${escapeHtml(entry.rationale || "No rationale recorded")}</p>
|
|
222
|
+
<div class="diff">
|
|
223
|
+
<div>
|
|
224
|
+
<h3 style="font-size:0.8rem;text-transform:uppercase;color:#666;">Original</h3>
|
|
225
|
+
<pre>${escapeHtml(entry.original_text || "No original text recorded")}</pre>
|
|
226
|
+
</div>
|
|
227
|
+
<div>
|
|
228
|
+
<h3 style="font-size:0.8rem;text-transform:uppercase;color:#666;">Proposed</h3>
|
|
229
|
+
<pre>${escapeHtml(entry.proposed_text || "No proposed text recorded")}</pre>
|
|
230
|
+
</div>
|
|
231
|
+
</div>
|
|
232
|
+
</div>`;
|
|
233
|
+
})
|
|
234
|
+
.join("")
|
|
235
|
+
}
|
|
236
|
+
</div>
|
|
237
|
+
|
|
238
|
+
<div class="card">
|
|
239
|
+
<h2>Validation Evidence</h2>
|
|
240
|
+
${
|
|
241
|
+
latestValidation?.validation?.per_entry_results?.length
|
|
242
|
+
? `<p class="muted">Latest proposal with per-entry validation: ${escapeHtml(latestValidation.proposal_id)}</p>
|
|
243
|
+
<table>
|
|
244
|
+
<tr><th>Query</th><th>Expected</th><th>Before</th><th>After</th><th>Delta</th></tr>
|
|
245
|
+
${latestValidation.validation.per_entry_results
|
|
246
|
+
.slice(0, 100)
|
|
247
|
+
.map((result) => {
|
|
248
|
+
const delta =
|
|
249
|
+
result.before_pass === result.after_pass
|
|
250
|
+
? "Unchanged"
|
|
251
|
+
: result.after_pass
|
|
252
|
+
? "New pass"
|
|
253
|
+
: "Regression";
|
|
254
|
+
return `<tr>
|
|
255
|
+
<td>${escapeHtml(result.entry.query)}</td>
|
|
256
|
+
<td>${result.entry.should_trigger ? "Yes" : "No"}</td>
|
|
257
|
+
<td>${result.before_pass ? "Yes" : "No"}</td>
|
|
258
|
+
<td>${result.after_pass ? "Yes" : "No"}</td>
|
|
259
|
+
<td>${delta}</td>
|
|
260
|
+
</tr>`;
|
|
261
|
+
})
|
|
262
|
+
.join("")}
|
|
263
|
+
</table>`
|
|
264
|
+
: '<p class="empty">No per-entry validation evidence recorded for this skill yet.</p>'
|
|
265
|
+
}
|
|
266
|
+
</div>
|
|
267
|
+
</body>
|
|
268
|
+
</html>`;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
export function handleReport(
|
|
272
|
+
statusResult: StatusResult,
|
|
273
|
+
skillName: string,
|
|
274
|
+
evidenceEntries: EvolutionEvidenceEntry[],
|
|
275
|
+
): Response {
|
|
276
|
+
const skill = statusResult.skills.find((s) => s.name === skillName);
|
|
277
|
+
const filteredEvidence = evidenceEntries.filter((entry) => entry.skill_name === skillName);
|
|
278
|
+
|
|
279
|
+
if (!skill) {
|
|
280
|
+
return new Response("Skill not found", {
|
|
281
|
+
status: 404,
|
|
282
|
+
headers: { "Content-Type": "text/plain" },
|
|
283
|
+
});
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
const html = buildReportHTML(skillName, skill, statusResult, filteredEvidence);
|
|
287
|
+
return new Response(html, {
|
|
288
|
+
headers: {
|
|
289
|
+
"Content-Type": "text/html; charset=utf-8",
|
|
290
|
+
"Cache-Control": "no-cache, no-store",
|
|
291
|
+
},
|
|
292
|
+
});
|
|
293
|
+
}
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Route handler: GET /api/v2/skills/:name
|
|
3
|
+
*
|
|
4
|
+
* Returns SQLite-backed per-skill report with evolution audit, pending proposals,
|
|
5
|
+
* invocation details, duration stats, selftune resource usage, prompt samples,
|
|
6
|
+
* and session metadata.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import type { Database } from "bun:sqlite";
|
|
10
|
+
import { getPendingProposals, getSkillReportPayload, safeParseJson } from "../localdb/queries.js";
|
|
11
|
+
|
|
12
|
+
export function handleSkillReport(db: Database, skillName: string): Response {
|
|
13
|
+
const report = getSkillReportPayload(db, skillName);
|
|
14
|
+
|
|
15
|
+
// 1. Evolution audit with eval_snapshot
|
|
16
|
+
const evolution = db
|
|
17
|
+
.query(
|
|
18
|
+
`SELECT timestamp, proposal_id, skill_name, action, details, eval_snapshot_json
|
|
19
|
+
FROM evolution_audit
|
|
20
|
+
WHERE skill_name = ? OR (skill_name IS NULL AND proposal_id LIKE 'evo-' || ? || '-%')
|
|
21
|
+
ORDER BY timestamp DESC
|
|
22
|
+
LIMIT 100`,
|
|
23
|
+
)
|
|
24
|
+
.all(skillName, skillName) as Array<{
|
|
25
|
+
timestamp: string;
|
|
26
|
+
proposal_id: string;
|
|
27
|
+
skill_name: string | null;
|
|
28
|
+
action: string;
|
|
29
|
+
details: string;
|
|
30
|
+
eval_snapshot_json: string | null;
|
|
31
|
+
}>;
|
|
32
|
+
const evolutionWithSnapshot = evolution.map((e) => ({
|
|
33
|
+
...e,
|
|
34
|
+
eval_snapshot: e.eval_snapshot_json ? safeParseJson(e.eval_snapshot_json) : null,
|
|
35
|
+
eval_snapshot_json: undefined,
|
|
36
|
+
}));
|
|
37
|
+
|
|
38
|
+
// 2. Pending proposals (shared helper from queries.ts)
|
|
39
|
+
const pending_proposals = getPendingProposals(db, skillName);
|
|
40
|
+
|
|
41
|
+
// CTE subquery for session IDs — avoids expanding bind parameters
|
|
42
|
+
const skillSessionsCte = `
|
|
43
|
+
WITH skill_sessions AS (
|
|
44
|
+
SELECT DISTINCT session_id FROM skill_invocations WHERE skill_name = ?
|
|
45
|
+
)`;
|
|
46
|
+
|
|
47
|
+
// 3. Selftune resource usage from orchestrate runs that touched this skill
|
|
48
|
+
const orchestrateRows = db
|
|
49
|
+
.query(
|
|
50
|
+
`SELECT skill_actions_json FROM orchestrate_runs
|
|
51
|
+
WHERE skill_actions_json LIKE ? ESCAPE '\\'`,
|
|
52
|
+
)
|
|
53
|
+
.all(
|
|
54
|
+
`%${skillName.replace(/\\/g, "\\\\").replace(/%/g, "\\%").replace(/_/g, "\\_")}%`,
|
|
55
|
+
) as Array<{
|
|
56
|
+
skill_actions_json: string;
|
|
57
|
+
}>;
|
|
58
|
+
|
|
59
|
+
let totalLlmCalls = 0;
|
|
60
|
+
let totalSelftunElapsedMs = 0;
|
|
61
|
+
let selftuneRunCount = 0;
|
|
62
|
+
for (const row of orchestrateRows) {
|
|
63
|
+
try {
|
|
64
|
+
const actions = JSON.parse(row.skill_actions_json) as Array<{
|
|
65
|
+
skill: string;
|
|
66
|
+
action?: string;
|
|
67
|
+
elapsed_ms?: number;
|
|
68
|
+
llm_calls?: number;
|
|
69
|
+
}>;
|
|
70
|
+
for (const a of actions) {
|
|
71
|
+
if (a.skill !== skillName || a.action === "skip" || a.action === "watch") continue;
|
|
72
|
+
if (a.elapsed_ms === undefined && a.llm_calls === undefined) continue;
|
|
73
|
+
totalSelftunElapsedMs += a.elapsed_ms ?? 0;
|
|
74
|
+
totalLlmCalls += a.llm_calls ?? 0;
|
|
75
|
+
selftuneRunCount++;
|
|
76
|
+
}
|
|
77
|
+
} catch {
|
|
78
|
+
// skip malformed JSON
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
const selftuneStats = {
|
|
82
|
+
total_llm_calls: totalLlmCalls,
|
|
83
|
+
total_elapsed_ms: totalSelftunElapsedMs,
|
|
84
|
+
avg_elapsed_ms: selftuneRunCount > 0 ? totalSelftunElapsedMs / selftuneRunCount : 0,
|
|
85
|
+
run_count: selftuneRunCount,
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
// 4. Skill invocations — single source of truth
|
|
89
|
+
// JOIN prompts to recover query text when si.query is null (canonical records
|
|
90
|
+
// don't carry query; it's only populated via the direct-write hook path).
|
|
91
|
+
const invocationsWithConfidence = db
|
|
92
|
+
.query(
|
|
93
|
+
`SELECT si.occurred_at as timestamp, si.session_id, si.skill_name,
|
|
94
|
+
si.invocation_mode, si.triggered, si.confidence, si.tool_name,
|
|
95
|
+
si.agent_type, COALESCE(si.query, p.prompt_text) as query, si.source
|
|
96
|
+
FROM skill_invocations si
|
|
97
|
+
LEFT JOIN prompts p ON si.matched_prompt_id = p.prompt_id
|
|
98
|
+
WHERE si.skill_name = ?
|
|
99
|
+
ORDER BY si.occurred_at DESC
|
|
100
|
+
LIMIT 100`,
|
|
101
|
+
)
|
|
102
|
+
.all(skillName) as Array<{
|
|
103
|
+
timestamp: string;
|
|
104
|
+
session_id: string;
|
|
105
|
+
skill_name: string;
|
|
106
|
+
invocation_mode: string | null;
|
|
107
|
+
triggered: number;
|
|
108
|
+
confidence: number | null;
|
|
109
|
+
tool_name: string | null;
|
|
110
|
+
agent_type: string | null;
|
|
111
|
+
query: string | null;
|
|
112
|
+
source: string | null;
|
|
113
|
+
}>;
|
|
114
|
+
|
|
115
|
+
// Not-found check — after all enrichment queries so evidence-only skills aren't 404'd
|
|
116
|
+
const hasData =
|
|
117
|
+
report.usage.total_checks > 0 ||
|
|
118
|
+
report.recent_invocations.length > 0 ||
|
|
119
|
+
report.evidence.length > 0 ||
|
|
120
|
+
evolution.length > 0 ||
|
|
121
|
+
pending_proposals.length > 0 ||
|
|
122
|
+
invocationsWithConfidence.length > 0;
|
|
123
|
+
if (!hasData) {
|
|
124
|
+
return Response.json({ error: "Skill not found" }, { status: 404 });
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// 5. Duration stats from execution_facts + missed trigger count
|
|
128
|
+
const executionRow = db
|
|
129
|
+
.query(
|
|
130
|
+
`${skillSessionsCte}
|
|
131
|
+
SELECT
|
|
132
|
+
COALESCE(AVG(ef.duration_ms), 0) AS avg_duration_ms,
|
|
133
|
+
COALESCE(SUM(ef.duration_ms), 0) AS total_duration_ms,
|
|
134
|
+
COUNT(ef.duration_ms) AS execution_count,
|
|
135
|
+
COALESCE(SUM(ef.input_tokens), 0) AS total_input_tokens,
|
|
136
|
+
COALESCE(SUM(ef.output_tokens), 0) AS total_output_tokens
|
|
137
|
+
FROM execution_facts ef
|
|
138
|
+
WHERE ef.session_id IN (SELECT session_id FROM skill_sessions)`,
|
|
139
|
+
)
|
|
140
|
+
.get(skillName) as {
|
|
141
|
+
avg_duration_ms: number;
|
|
142
|
+
total_duration_ms: number;
|
|
143
|
+
execution_count: number;
|
|
144
|
+
total_input_tokens: number;
|
|
145
|
+
total_output_tokens: number;
|
|
146
|
+
} | null;
|
|
147
|
+
|
|
148
|
+
// Missed triggers: checks where the skill was evaluated but did not fire
|
|
149
|
+
const missedRow = db
|
|
150
|
+
.query(
|
|
151
|
+
`SELECT COUNT(*) AS missed_triggers
|
|
152
|
+
FROM skill_invocations
|
|
153
|
+
WHERE skill_name = ? AND triggered = 0`,
|
|
154
|
+
)
|
|
155
|
+
.get(skillName) as { missed_triggers: number } | null;
|
|
156
|
+
|
|
157
|
+
// 6. Prompt texts — prefer matched prompts (the prompt that invoked the skill),
|
|
158
|
+
// fall back to all prompts from sessions that used the skill.
|
|
159
|
+
const promptSamples = db
|
|
160
|
+
.query(
|
|
161
|
+
`${skillSessionsCte}
|
|
162
|
+
SELECT p.prompt_text, p.prompt_kind, p.is_actionable, p.occurred_at, p.session_id,
|
|
163
|
+
CASE WHEN si.matched_prompt_id IS NOT NULL THEN 1 ELSE 0 END AS is_matched
|
|
164
|
+
FROM prompts p
|
|
165
|
+
LEFT JOIN skill_invocations si ON si.matched_prompt_id = p.prompt_id
|
|
166
|
+
AND si.skill_name = ?
|
|
167
|
+
WHERE p.session_id IN (SELECT session_id FROM skill_sessions)
|
|
168
|
+
AND p.prompt_text IS NOT NULL
|
|
169
|
+
AND p.prompt_text != ''
|
|
170
|
+
ORDER BY is_matched DESC, p.occurred_at DESC
|
|
171
|
+
LIMIT 50`,
|
|
172
|
+
)
|
|
173
|
+
.all(skillName, skillName) as Array<{
|
|
174
|
+
prompt_text: string;
|
|
175
|
+
prompt_kind: string | null;
|
|
176
|
+
is_actionable: number;
|
|
177
|
+
occurred_at: string;
|
|
178
|
+
session_id: string;
|
|
179
|
+
is_matched: number;
|
|
180
|
+
}>;
|
|
181
|
+
|
|
182
|
+
// 7. Session metadata for sessions that used this skill
|
|
183
|
+
const sessionMeta = db
|
|
184
|
+
.query(
|
|
185
|
+
`${skillSessionsCte}
|
|
186
|
+
SELECT s.session_id, s.platform, s.model, s.agent_cli, s.branch,
|
|
187
|
+
s.workspace_path, s.started_at, s.ended_at, s.completion_status
|
|
188
|
+
FROM sessions s
|
|
189
|
+
WHERE s.session_id IN (SELECT session_id FROM skill_sessions)
|
|
190
|
+
ORDER BY s.started_at DESC
|
|
191
|
+
LIMIT 50`,
|
|
192
|
+
)
|
|
193
|
+
.all(skillName) as Array<{
|
|
194
|
+
session_id: string;
|
|
195
|
+
platform: string | null;
|
|
196
|
+
model: string | null;
|
|
197
|
+
agent_cli: string | null;
|
|
198
|
+
branch: string | null;
|
|
199
|
+
workspace_path: string | null;
|
|
200
|
+
started_at: string | null;
|
|
201
|
+
ended_at: string | null;
|
|
202
|
+
completion_status: string | null;
|
|
203
|
+
}>;
|
|
204
|
+
|
|
205
|
+
return Response.json({
|
|
206
|
+
...report,
|
|
207
|
+
evolution: evolutionWithSnapshot,
|
|
208
|
+
pending_proposals,
|
|
209
|
+
token_usage: {
|
|
210
|
+
total_input_tokens: executionRow?.total_input_tokens ?? 0,
|
|
211
|
+
total_output_tokens: executionRow?.total_output_tokens ?? 0,
|
|
212
|
+
},
|
|
213
|
+
canonical_invocations: invocationsWithConfidence.map((i) => ({
|
|
214
|
+
...i,
|
|
215
|
+
triggered: i.triggered === 1,
|
|
216
|
+
})),
|
|
217
|
+
duration_stats: {
|
|
218
|
+
avg_duration_ms: executionRow?.avg_duration_ms ?? 0,
|
|
219
|
+
total_duration_ms: executionRow?.total_duration_ms ?? 0,
|
|
220
|
+
execution_count: executionRow?.execution_count ?? 0,
|
|
221
|
+
missed_triggers: missedRow?.missed_triggers ?? 0,
|
|
222
|
+
},
|
|
223
|
+
selftune_stats: selftuneStats,
|
|
224
|
+
prompt_samples: promptSamples.map((p) => ({
|
|
225
|
+
...p,
|
|
226
|
+
is_actionable: p.is_actionable === 1,
|
|
227
|
+
})),
|
|
228
|
+
session_metadata: sessionMeta,
|
|
229
|
+
});
|
|
230
|
+
}
|