selftune 0.1.4 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/diagnosis-analyst.md +156 -0
- package/.claude/agents/evolution-reviewer.md +180 -0
- package/.claude/agents/integration-guide.md +212 -0
- package/.claude/agents/pattern-analyst.md +160 -0
- package/CHANGELOG.md +46 -1
- package/README.md +105 -257
- package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
- package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
- package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
- package/apps/local-dashboard/dist/favicon.png +0 -0
- package/apps/local-dashboard/dist/index.html +17 -0
- package/apps/local-dashboard/dist/logo.png +0 -0
- package/apps/local-dashboard/dist/logo.svg +9 -0
- package/assets/BeforeAfter.gif +0 -0
- package/assets/FeedbackLoop.gif +0 -0
- package/assets/logo.svg +9 -0
- package/assets/skill-health-badge.svg +20 -0
- package/cli/selftune/activation-rules.ts +171 -0
- package/cli/selftune/badge/badge-data.ts +108 -0
- package/cli/selftune/badge/badge-svg.ts +212 -0
- package/cli/selftune/badge/badge.ts +99 -0
- package/cli/selftune/canonical-export.ts +183 -0
- package/cli/selftune/constants.ts +103 -1
- package/cli/selftune/contribute/bundle.ts +314 -0
- package/cli/selftune/contribute/contribute.ts +214 -0
- package/cli/selftune/contribute/sanitize.ts +162 -0
- package/cli/selftune/cron/setup.ts +266 -0
- package/cli/selftune/dashboard-contract.ts +202 -0
- package/cli/selftune/dashboard-server.ts +1049 -0
- package/cli/selftune/dashboard.ts +43 -156
- package/cli/selftune/eval/baseline.ts +248 -0
- package/cli/selftune/eval/composability-v2.ts +273 -0
- package/cli/selftune/eval/composability.ts +117 -0
- package/cli/selftune/eval/generate-unit-tests.ts +143 -0
- package/cli/selftune/eval/hooks-to-evals.ts +101 -16
- package/cli/selftune/eval/import-skillsbench.ts +221 -0
- package/cli/selftune/eval/synthetic-evals.ts +172 -0
- package/cli/selftune/eval/unit-test-cli.ts +152 -0
- package/cli/selftune/eval/unit-test.ts +196 -0
- package/cli/selftune/evolution/deploy-proposal.ts +142 -1
- package/cli/selftune/evolution/evidence.ts +26 -0
- package/cli/selftune/evolution/evolve-body.ts +586 -0
- package/cli/selftune/evolution/evolve.ts +825 -116
- package/cli/selftune/evolution/extract-patterns.ts +105 -16
- package/cli/selftune/evolution/pareto.ts +314 -0
- package/cli/selftune/evolution/propose-body.ts +171 -0
- package/cli/selftune/evolution/propose-description.ts +100 -2
- package/cli/selftune/evolution/propose-routing.ts +166 -0
- package/cli/selftune/evolution/refine-body.ts +141 -0
- package/cli/selftune/evolution/rollback.ts +21 -4
- package/cli/selftune/evolution/validate-body.ts +254 -0
- package/cli/selftune/evolution/validate-proposal.ts +257 -35
- package/cli/selftune/evolution/validate-routing.ts +177 -0
- package/cli/selftune/grading/auto-grade.ts +200 -0
- package/cli/selftune/grading/grade-session.ts +513 -42
- package/cli/selftune/grading/pre-gates.ts +104 -0
- package/cli/selftune/grading/results.ts +42 -0
- package/cli/selftune/hooks/auto-activate.ts +185 -0
- package/cli/selftune/hooks/evolution-guard.ts +165 -0
- package/cli/selftune/hooks/prompt-log.ts +172 -2
- package/cli/selftune/hooks/session-stop.ts +123 -3
- package/cli/selftune/hooks/skill-change-guard.ts +112 -0
- package/cli/selftune/hooks/skill-eval.ts +119 -3
- package/cli/selftune/index.ts +415 -48
- package/cli/selftune/ingestors/claude-replay.ts +377 -0
- package/cli/selftune/ingestors/codex-rollout.ts +345 -46
- package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
- package/cli/selftune/ingestors/openclaw-ingest.ts +573 -0
- package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
- package/cli/selftune/init.ts +376 -16
- package/cli/selftune/last.ts +14 -5
- package/cli/selftune/localdb/db.ts +63 -0
- package/cli/selftune/localdb/materialize.ts +428 -0
- package/cli/selftune/localdb/queries.ts +376 -0
- package/cli/selftune/localdb/schema.ts +204 -0
- package/cli/selftune/memory/writer.ts +447 -0
- package/cli/selftune/monitoring/watch.ts +90 -16
- package/cli/selftune/normalization.ts +682 -0
- package/cli/selftune/observability.ts +19 -44
- package/cli/selftune/orchestrate.ts +1073 -0
- package/cli/selftune/quickstart.ts +203 -0
- package/cli/selftune/repair/skill-usage.ts +576 -0
- package/cli/selftune/schedule.ts +561 -0
- package/cli/selftune/status.ts +59 -33
- package/cli/selftune/sync.ts +627 -0
- package/cli/selftune/types.ts +525 -5
- package/cli/selftune/utils/canonical-log.ts +45 -0
- package/cli/selftune/utils/frontmatter.ts +217 -0
- package/cli/selftune/utils/hooks.ts +41 -0
- package/cli/selftune/utils/html.ts +27 -0
- package/cli/selftune/utils/llm-call.ts +103 -19
- package/cli/selftune/utils/math.ts +10 -0
- package/cli/selftune/utils/query-filter.ts +139 -0
- package/cli/selftune/utils/skill-discovery.ts +340 -0
- package/cli/selftune/utils/skill-log.ts +68 -0
- package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
- package/cli/selftune/utils/transcript.ts +307 -26
- package/cli/selftune/utils/trigger-check.ts +89 -0
- package/cli/selftune/utils/tui.ts +156 -0
- package/cli/selftune/workflows/discover.ts +254 -0
- package/cli/selftune/workflows/skill-md-writer.ts +288 -0
- package/cli/selftune/workflows/workflows.ts +188 -0
- package/package.json +28 -11
- package/packages/telemetry-contract/README.md +11 -0
- package/packages/telemetry-contract/fixtures/golden.json +87 -0
- package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
- package/packages/telemetry-contract/index.ts +1 -0
- package/packages/telemetry-contract/package.json +19 -0
- package/packages/telemetry-contract/src/index.ts +2 -0
- package/packages/telemetry-contract/src/types.ts +163 -0
- package/packages/telemetry-contract/src/validators.ts +109 -0
- package/skill/SKILL.md +180 -33
- package/skill/Workflows/AutoActivation.md +145 -0
- package/skill/Workflows/Badge.md +124 -0
- package/skill/Workflows/Baseline.md +144 -0
- package/skill/Workflows/Composability.md +107 -0
- package/skill/Workflows/Contribute.md +94 -0
- package/skill/Workflows/Cron.md +132 -0
- package/skill/Workflows/Dashboard.md +214 -0
- package/skill/Workflows/Doctor.md +63 -14
- package/skill/Workflows/Evals.md +110 -18
- package/skill/Workflows/EvolutionMemory.md +154 -0
- package/skill/Workflows/Evolve.md +181 -21
- package/skill/Workflows/EvolveBody.md +159 -0
- package/skill/Workflows/Grade.md +36 -31
- package/skill/Workflows/ImportSkillsBench.md +117 -0
- package/skill/Workflows/Ingest.md +142 -21
- package/skill/Workflows/Initialize.md +91 -23
- package/skill/Workflows/Orchestrate.md +139 -0
- package/skill/Workflows/Replay.md +91 -0
- package/skill/Workflows/Rollback.md +23 -4
- package/skill/Workflows/Schedule.md +61 -0
- package/skill/Workflows/Sync.md +88 -0
- package/skill/Workflows/UnitTest.md +150 -0
- package/skill/Workflows/Watch.md +33 -1
- package/skill/Workflows/Workflows.md +129 -0
- package/skill/assets/activation-rules-default.json +26 -0
- package/skill/assets/multi-skill-settings.json +63 -0
- package/skill/assets/single-skill-settings.json +57 -0
- package/skill/references/invocation-taxonomy.md +2 -2
- package/skill/references/logs.md +164 -2
- package/skill/references/setup-patterns.md +65 -0
- package/skill/references/version-history.md +40 -0
- package/skill/settings_snippet.json +23 -0
- package/templates/activation-rules-default.json +27 -0
- package/templates/multi-skill-settings.json +64 -0
- package/templates/single-skill-settings.json +58 -0
- package/dashboard/index.html +0 -1119
package/cli/selftune/status.ts
CHANGED
|
@@ -7,8 +7,8 @@
|
|
|
7
7
|
* - cliMain() (reads logs, runs doctor, prints output)
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
|
-
import { EVOLUTION_AUDIT_LOG, QUERY_LOG,
|
|
11
|
-
import { computeMonitoringSnapshot } from "./monitoring/watch.js";
|
|
10
|
+
import { EVOLUTION_AUDIT_LOG, QUERY_LOG, TELEMETRY_LOG } from "./constants.js";
|
|
11
|
+
import { computeMonitoringSnapshot, MIN_MONITORING_SKILL_CHECKS } from "./monitoring/watch.js";
|
|
12
12
|
import { doctor } from "./observability.js";
|
|
13
13
|
import type {
|
|
14
14
|
DoctorResult,
|
|
@@ -19,6 +19,11 @@ import type {
|
|
|
19
19
|
SkillUsageRecord,
|
|
20
20
|
} from "./types.js";
|
|
21
21
|
import { readJsonl } from "./utils/jsonl.js";
|
|
22
|
+
import {
|
|
23
|
+
filterActionableQueryRecords,
|
|
24
|
+
filterActionableSkillUsageRecords,
|
|
25
|
+
} from "./utils/query-filter.js";
|
|
26
|
+
import { readEffectiveSkillUsageRecords } from "./utils/skill-log.js";
|
|
22
27
|
|
|
23
28
|
// ---------------------------------------------------------------------------
|
|
24
29
|
// Result types
|
|
@@ -29,7 +34,7 @@ export interface SkillStatus {
|
|
|
29
34
|
passRate: number | null;
|
|
30
35
|
trend: "up" | "down" | "stable" | "unknown";
|
|
31
36
|
missedQueries: number;
|
|
32
|
-
status: "HEALTHY" | "
|
|
37
|
+
status: "HEALTHY" | "WARNING" | "CRITICAL" | "UNGRADED" | "UNKNOWN";
|
|
33
38
|
snapshot: MonitoringSnapshot | null;
|
|
34
39
|
}
|
|
35
40
|
|
|
@@ -50,7 +55,7 @@ export interface StatusResult {
|
|
|
50
55
|
// Constants
|
|
51
56
|
// ---------------------------------------------------------------------------
|
|
52
57
|
|
|
53
|
-
const DEFAULT_WINDOW_SESSIONS = 20;
|
|
58
|
+
export const DEFAULT_WINDOW_SESSIONS = 20;
|
|
54
59
|
const DEFAULT_BASELINE_PASS_RATE = 0.5;
|
|
55
60
|
|
|
56
61
|
// ---------------------------------------------------------------------------
|
|
@@ -64,13 +69,14 @@ export function computeStatus(
|
|
|
64
69
|
auditEntries: EvolutionAuditEntry[],
|
|
65
70
|
doctorResult: DoctorResult,
|
|
66
71
|
): StatusResult {
|
|
72
|
+
const actionableSkillRecords = filterActionableSkillUsageRecords(skillRecords);
|
|
73
|
+
const actionableQueryRecords = filterActionableQueryRecords(queryRecords);
|
|
67
74
|
// Derive unique skill names from skill records
|
|
68
|
-
const skillNames = [...new Set(
|
|
75
|
+
const skillNames = [...new Set(actionableSkillRecords.map((r) => r.skill_name))];
|
|
69
76
|
|
|
70
77
|
// Build per-skill status
|
|
71
78
|
const skills: SkillStatus[] = skillNames.map((skillName) => {
|
|
72
|
-
const skillSpecificRecords =
|
|
73
|
-
const triggeredRecords = skillSpecificRecords.filter((r) => r.triggered);
|
|
79
|
+
const skillSpecificRecords = actionableSkillRecords.filter((r) => r.skill_name === skillName);
|
|
74
80
|
|
|
75
81
|
// Get baseline from last deployed proposal
|
|
76
82
|
const lastDeployed = getLastDeployedProposalFromEntries(auditEntries, skillName);
|
|
@@ -80,21 +86,19 @@ export function computeStatus(
|
|
|
80
86
|
const snapshot = computeMonitoringSnapshot(
|
|
81
87
|
skillName,
|
|
82
88
|
telemetry,
|
|
83
|
-
|
|
84
|
-
|
|
89
|
+
actionableSkillRecords,
|
|
90
|
+
actionableQueryRecords,
|
|
85
91
|
DEFAULT_WINDOW_SESSIONS,
|
|
86
92
|
baselinePassRate,
|
|
87
93
|
);
|
|
88
94
|
|
|
89
|
-
//
|
|
90
|
-
|
|
91
|
-
const hasData =
|
|
95
|
+
// A skill has data when it has explicit check records, regardless of whether any passed.
|
|
96
|
+
// Using triggered-only rows would incorrectly hide meaningful all-false samples.
|
|
97
|
+
const hasData = skillSpecificRecords.length > 0;
|
|
98
|
+
const hasEnoughSamples = snapshot.skill_checks >= MIN_MONITORING_SKILL_CHECKS;
|
|
92
99
|
|
|
93
|
-
// Compute pass rate (null if no
|
|
94
|
-
|
|
95
|
-
if (hasData && totalQueries > 0) {
|
|
96
|
-
passRate = snapshot.pass_rate;
|
|
97
|
-
}
|
|
100
|
+
// Compute pass rate (null only if this skill has no graded checks at all)
|
|
101
|
+
const passRate = hasData ? snapshot.pass_rate : null;
|
|
98
102
|
|
|
99
103
|
// Determine trend: compare first-half vs second-half pass rates
|
|
100
104
|
const trend = computeTrend(skillSpecificRecords);
|
|
@@ -102,12 +106,15 @@ export function computeStatus(
|
|
|
102
106
|
// Count missed queries for this skill (queries where skill was checked but not triggered)
|
|
103
107
|
const missedQueries = skillSpecificRecords.filter((r) => !r.triggered).length;
|
|
104
108
|
|
|
105
|
-
// Determine status
|
|
106
|
-
let status: "HEALTHY" | "
|
|
107
|
-
if (!hasData || passRate === null) {
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
109
|
+
// Determine status (5-state)
|
|
110
|
+
let status: "HEALTHY" | "WARNING" | "CRITICAL" | "UNGRADED" | "UNKNOWN";
|
|
111
|
+
if (!hasData || passRate === null || !hasEnoughSamples) {
|
|
112
|
+
// Skill exists in logs but has too little data for a meaningful health label
|
|
113
|
+
status = skillSpecificRecords.length > 0 ? "UNGRADED" : "UNKNOWN";
|
|
114
|
+
} else if (snapshot.regression_detected || passRate < 0.4) {
|
|
115
|
+
status = "CRITICAL";
|
|
116
|
+
} else if (passRate < 0.7) {
|
|
117
|
+
status = "WARNING";
|
|
111
118
|
} else {
|
|
112
119
|
status = "HEALTHY";
|
|
113
120
|
}
|
|
@@ -115,15 +122,23 @@ export function computeStatus(
|
|
|
115
122
|
return { name: skillName, passRate, trend, missedQueries, status, snapshot };
|
|
116
123
|
});
|
|
117
124
|
|
|
118
|
-
// Sort:
|
|
119
|
-
const statusOrder
|
|
125
|
+
// Sort: CRITICAL first, then WARNING, then HEALTHY, then UNKNOWN
|
|
126
|
+
const statusOrder: Record<string, number> = {
|
|
127
|
+
CRITICAL: 0,
|
|
128
|
+
WARNING: 1,
|
|
129
|
+
HEALTHY: 2,
|
|
130
|
+
UNGRADED: 3,
|
|
131
|
+
UNKNOWN: 4,
|
|
132
|
+
};
|
|
120
133
|
skills.sort((a, b) => statusOrder[a.status] - statusOrder[b.status]);
|
|
121
134
|
|
|
122
135
|
// Unmatched queries: queries whose text appears in zero triggered skill_usage_log entries
|
|
123
136
|
const triggeredQueryTexts = new Set(
|
|
124
|
-
|
|
137
|
+
actionableSkillRecords
|
|
138
|
+
.filter((r) => r.triggered && typeof r.query === "string")
|
|
139
|
+
.map((r) => r.query.toLowerCase().trim()),
|
|
125
140
|
);
|
|
126
|
-
const unmatchedQueries =
|
|
141
|
+
const unmatchedQueries = actionableQueryRecords.filter(
|
|
127
142
|
(q) => !triggeredQueryTexts.has(q.query.toLowerCase().trim()),
|
|
128
143
|
).length;
|
|
129
144
|
|
|
@@ -231,7 +246,7 @@ export function formatStatus(result: StatusResult): string {
|
|
|
231
246
|
lines.push(" Name Pass Rate Trend Missed Status");
|
|
232
247
|
|
|
233
248
|
for (const skill of result.skills) {
|
|
234
|
-
const name = skill.name.padEnd(16);
|
|
249
|
+
const name = skill.name.slice(0, 16).padEnd(16);
|
|
235
250
|
const passRate =
|
|
236
251
|
skill.passRate !== null
|
|
237
252
|
? `${Math.round(skill.passRate * 100)}%`.padEnd(11)
|
|
@@ -239,14 +254,25 @@ export function formatStatus(result: StatusResult): string {
|
|
|
239
254
|
const trend = TREND_SYMBOLS[skill.trend].padEnd(7);
|
|
240
255
|
const missed = String(skill.missedQueries).padEnd(8);
|
|
241
256
|
const statusText =
|
|
242
|
-
skill.status === "
|
|
257
|
+
skill.status === "CRITICAL"
|
|
243
258
|
? red(skill.status)
|
|
244
|
-
: skill.status === "
|
|
245
|
-
?
|
|
246
|
-
:
|
|
259
|
+
: skill.status === "WARNING"
|
|
260
|
+
? amber(skill.status)
|
|
261
|
+
: skill.status === "HEALTHY"
|
|
262
|
+
? green(skill.status)
|
|
263
|
+
: skill.status === "UNGRADED"
|
|
264
|
+
? amber(skill.status)
|
|
265
|
+
: amber(skill.status);
|
|
247
266
|
lines.push(` ${name}${passRate}${trend}${missed}${statusText}`);
|
|
248
267
|
}
|
|
249
268
|
|
|
269
|
+
// Onboarding hint for ungraded skills
|
|
270
|
+
const ungradedSkills = result.skills.filter((s) => s.status === "UNGRADED");
|
|
271
|
+
if (ungradedSkills.length > 0) {
|
|
272
|
+
lines.push("");
|
|
273
|
+
lines.push(` Hint: Run \`selftune grade --skill <name>\` to establish baselines`);
|
|
274
|
+
}
|
|
275
|
+
|
|
250
276
|
lines.push("");
|
|
251
277
|
|
|
252
278
|
// Summary stats
|
|
@@ -301,7 +327,7 @@ function colorize(text: string, hex: string): string {
|
|
|
301
327
|
export function cliMain(): void {
|
|
302
328
|
try {
|
|
303
329
|
const telemetry = readJsonl<SessionTelemetryRecord>(TELEMETRY_LOG);
|
|
304
|
-
const skillRecords =
|
|
330
|
+
const skillRecords = readEffectiveSkillUsageRecords();
|
|
305
331
|
const queryRecords = readJsonl<QueryLogRecord>(QUERY_LOG);
|
|
306
332
|
const auditEntries = readJsonl<EvolutionAuditEntry>(EVOLUTION_AUDIT_LOG);
|
|
307
333
|
const doctorResult = doctor();
|