selftune 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/diagnosis-analyst.md +20 -10
- package/.claude/agents/evolution-reviewer.md +14 -1
- package/.claude/agents/integration-guide.md +18 -6
- package/.claude/agents/pattern-analyst.md +18 -5
- package/CHANGELOG.md +12 -4
- package/README.md +43 -35
- package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
- package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
- package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
- package/apps/local-dashboard/dist/favicon.png +0 -0
- package/apps/local-dashboard/dist/index.html +17 -0
- package/apps/local-dashboard/dist/logo.png +0 -0
- package/apps/local-dashboard/dist/logo.svg +9 -0
- package/cli/selftune/badge/badge-data.ts +1 -1
- package/cli/selftune/badge/badge.ts +4 -8
- package/cli/selftune/canonical-export.ts +183 -0
- package/cli/selftune/constants.ts +28 -0
- package/cli/selftune/contribute/contribute.ts +1 -1
- package/cli/selftune/cron/setup.ts +17 -17
- package/cli/selftune/dashboard-contract.ts +202 -0
- package/cli/selftune/dashboard-server.ts +653 -186
- package/cli/selftune/dashboard.ts +41 -176
- package/cli/selftune/eval/baseline.ts +5 -4
- package/cli/selftune/eval/composability-v2.ts +273 -0
- package/cli/selftune/eval/hooks-to-evals.ts +34 -15
- package/cli/selftune/eval/unit-test-cli.ts +1 -1
- package/cli/selftune/evolution/evidence.ts +26 -0
- package/cli/selftune/evolution/evolve-body.ts +105 -11
- package/cli/selftune/evolution/evolve.ts +371 -25
- package/cli/selftune/evolution/extract-patterns.ts +87 -29
- package/cli/selftune/evolution/rollback.ts +2 -2
- package/cli/selftune/grading/auto-grade.ts +200 -0
- package/cli/selftune/grading/grade-session.ts +448 -97
- package/cli/selftune/grading/results.ts +42 -0
- package/cli/selftune/hooks/prompt-log.ts +172 -2
- package/cli/selftune/hooks/session-stop.ts +123 -3
- package/cli/selftune/hooks/skill-eval.ts +119 -3
- package/cli/selftune/index.ts +395 -116
- package/cli/selftune/ingestors/claude-replay.ts +140 -114
- package/cli/selftune/ingestors/codex-rollout.ts +345 -46
- package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
- package/cli/selftune/ingestors/openclaw-ingest.ts +141 -8
- package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
- package/cli/selftune/init.ts +227 -14
- package/cli/selftune/last.ts +14 -5
- package/cli/selftune/localdb/db.ts +63 -0
- package/cli/selftune/localdb/materialize.ts +428 -0
- package/cli/selftune/localdb/queries.ts +376 -0
- package/cli/selftune/localdb/schema.ts +204 -0
- package/cli/selftune/monitoring/watch.ts +66 -15
- package/cli/selftune/normalization.ts +682 -0
- package/cli/selftune/observability.ts +19 -44
- package/cli/selftune/orchestrate.ts +1073 -0
- package/cli/selftune/quickstart.ts +203 -0
- package/cli/selftune/repair/skill-usage.ts +576 -0
- package/cli/selftune/schedule.ts +561 -0
- package/cli/selftune/status.ts +48 -26
- package/cli/selftune/sync.ts +627 -0
- package/cli/selftune/types.ts +148 -0
- package/cli/selftune/utils/canonical-log.ts +45 -0
- package/cli/selftune/utils/hooks.ts +41 -0
- package/cli/selftune/utils/html.ts +27 -0
- package/cli/selftune/utils/llm-call.ts +78 -20
- package/cli/selftune/utils/math.ts +10 -0
- package/cli/selftune/utils/query-filter.ts +139 -0
- package/cli/selftune/utils/skill-discovery.ts +340 -0
- package/cli/selftune/utils/skill-log.ts +68 -0
- package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
- package/cli/selftune/utils/transcript.ts +272 -26
- package/cli/selftune/workflows/discover.ts +254 -0
- package/cli/selftune/workflows/skill-md-writer.ts +288 -0
- package/cli/selftune/workflows/workflows.ts +188 -0
- package/package.json +21 -8
- package/packages/telemetry-contract/README.md +11 -0
- package/packages/telemetry-contract/fixtures/golden.json +87 -0
- package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
- package/packages/telemetry-contract/index.ts +1 -0
- package/packages/telemetry-contract/package.json +19 -0
- package/packages/telemetry-contract/src/index.ts +2 -0
- package/packages/telemetry-contract/src/types.ts +163 -0
- package/packages/telemetry-contract/src/validators.ts +109 -0
- package/skill/SKILL.md +84 -53
- package/skill/Workflows/AutoActivation.md +17 -16
- package/skill/Workflows/Badge.md +6 -0
- package/skill/Workflows/Baseline.md +46 -23
- package/skill/Workflows/Composability.md +12 -5
- package/skill/Workflows/Contribute.md +17 -14
- package/skill/Workflows/Cron.md +56 -79
- package/skill/Workflows/Dashboard.md +45 -34
- package/skill/Workflows/Doctor.md +30 -17
- package/skill/Workflows/Evals.md +64 -40
- package/skill/Workflows/EvolutionMemory.md +2 -0
- package/skill/Workflows/Evolve.md +102 -47
- package/skill/Workflows/EvolveBody.md +6 -6
- package/skill/Workflows/Grade.md +36 -31
- package/skill/Workflows/ImportSkillsBench.md +11 -5
- package/skill/Workflows/Ingest.md +43 -36
- package/skill/Workflows/Initialize.md +44 -30
- package/skill/Workflows/Orchestrate.md +139 -0
- package/skill/Workflows/Replay.md +39 -18
- package/skill/Workflows/Rollback.md +3 -3
- package/skill/Workflows/Schedule.md +61 -0
- package/skill/Workflows/Sync.md +88 -0
- package/skill/Workflows/UnitTest.md +34 -22
- package/skill/Workflows/Watch.md +14 -4
- package/skill/Workflows/Workflows.md +129 -0
- package/skill/assets/activation-rules-default.json +26 -0
- package/skill/assets/multi-skill-settings.json +63 -0
- package/skill/assets/single-skill-settings.json +57 -0
- package/skill/references/invocation-taxonomy.md +2 -2
- package/skill/references/logs.md +164 -2
- package/skill/references/setup-patterns.md +65 -0
- package/skill/references/version-history.md +40 -0
- package/skill/settings_snippet.json +1 -1
- package/templates/multi-skill-settings.json +7 -7
- package/templates/single-skill-settings.json +6 -6
- package/dashboard/index.html +0 -1680
package/cli/selftune/status.ts
CHANGED
|
@@ -7,8 +7,8 @@
|
|
|
7
7
|
* - cliMain() (reads logs, runs doctor, prints output)
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
|
-
import { EVOLUTION_AUDIT_LOG, QUERY_LOG,
|
|
11
|
-
import { computeMonitoringSnapshot } from "./monitoring/watch.js";
|
|
10
|
+
import { EVOLUTION_AUDIT_LOG, QUERY_LOG, TELEMETRY_LOG } from "./constants.js";
|
|
11
|
+
import { computeMonitoringSnapshot, MIN_MONITORING_SKILL_CHECKS } from "./monitoring/watch.js";
|
|
12
12
|
import { doctor } from "./observability.js";
|
|
13
13
|
import type {
|
|
14
14
|
DoctorResult,
|
|
@@ -19,6 +19,11 @@ import type {
|
|
|
19
19
|
SkillUsageRecord,
|
|
20
20
|
} from "./types.js";
|
|
21
21
|
import { readJsonl } from "./utils/jsonl.js";
|
|
22
|
+
import {
|
|
23
|
+
filterActionableQueryRecords,
|
|
24
|
+
filterActionableSkillUsageRecords,
|
|
25
|
+
} from "./utils/query-filter.js";
|
|
26
|
+
import { readEffectiveSkillUsageRecords } from "./utils/skill-log.js";
|
|
22
27
|
|
|
23
28
|
// ---------------------------------------------------------------------------
|
|
24
29
|
// Result types
|
|
@@ -29,7 +34,7 @@ export interface SkillStatus {
|
|
|
29
34
|
passRate: number | null;
|
|
30
35
|
trend: "up" | "down" | "stable" | "unknown";
|
|
31
36
|
missedQueries: number;
|
|
32
|
-
status: "HEALTHY" | "WARNING" | "CRITICAL" | "UNKNOWN";
|
|
37
|
+
status: "HEALTHY" | "WARNING" | "CRITICAL" | "UNGRADED" | "UNKNOWN";
|
|
33
38
|
snapshot: MonitoringSnapshot | null;
|
|
34
39
|
}
|
|
35
40
|
|
|
@@ -50,7 +55,7 @@ export interface StatusResult {
|
|
|
50
55
|
// Constants
|
|
51
56
|
// ---------------------------------------------------------------------------
|
|
52
57
|
|
|
53
|
-
const DEFAULT_WINDOW_SESSIONS = 20;
|
|
58
|
+
export const DEFAULT_WINDOW_SESSIONS = 20;
|
|
54
59
|
const DEFAULT_BASELINE_PASS_RATE = 0.5;
|
|
55
60
|
|
|
56
61
|
// ---------------------------------------------------------------------------
|
|
@@ -64,13 +69,14 @@ export function computeStatus(
|
|
|
64
69
|
auditEntries: EvolutionAuditEntry[],
|
|
65
70
|
doctorResult: DoctorResult,
|
|
66
71
|
): StatusResult {
|
|
72
|
+
const actionableSkillRecords = filterActionableSkillUsageRecords(skillRecords);
|
|
73
|
+
const actionableQueryRecords = filterActionableQueryRecords(queryRecords);
|
|
67
74
|
// Derive unique skill names from skill records
|
|
68
|
-
const skillNames = [...new Set(
|
|
75
|
+
const skillNames = [...new Set(actionableSkillRecords.map((r) => r.skill_name))];
|
|
69
76
|
|
|
70
77
|
// Build per-skill status
|
|
71
78
|
const skills: SkillStatus[] = skillNames.map((skillName) => {
|
|
72
|
-
const skillSpecificRecords =
|
|
73
|
-
const triggeredRecords = skillSpecificRecords.filter((r) => r.triggered);
|
|
79
|
+
const skillSpecificRecords = actionableSkillRecords.filter((r) => r.skill_name === skillName);
|
|
74
80
|
|
|
75
81
|
// Get baseline from last deployed proposal
|
|
76
82
|
const lastDeployed = getLastDeployedProposalFromEntries(auditEntries, skillName);
|
|
@@ -80,21 +86,19 @@ export function computeStatus(
|
|
|
80
86
|
const snapshot = computeMonitoringSnapshot(
|
|
81
87
|
skillName,
|
|
82
88
|
telemetry,
|
|
83
|
-
|
|
84
|
-
|
|
89
|
+
actionableSkillRecords,
|
|
90
|
+
actionableQueryRecords,
|
|
85
91
|
DEFAULT_WINDOW_SESSIONS,
|
|
86
92
|
baselinePassRate,
|
|
87
93
|
);
|
|
88
94
|
|
|
89
|
-
//
|
|
90
|
-
|
|
91
|
-
const hasData =
|
|
95
|
+
// A skill has data when it has explicit check records, regardless of whether any passed.
|
|
96
|
+
// Using triggered-only rows would incorrectly hide meaningful all-false samples.
|
|
97
|
+
const hasData = skillSpecificRecords.length > 0;
|
|
98
|
+
const hasEnoughSamples = snapshot.skill_checks >= MIN_MONITORING_SKILL_CHECKS;
|
|
92
99
|
|
|
93
|
-
// Compute pass rate (null if no
|
|
94
|
-
|
|
95
|
-
if (hasData && totalQueries > 0) {
|
|
96
|
-
passRate = snapshot.pass_rate;
|
|
97
|
-
}
|
|
100
|
+
// Compute pass rate (null only if this skill has no graded checks at all)
|
|
101
|
+
const passRate = hasData ? snapshot.pass_rate : null;
|
|
98
102
|
|
|
99
103
|
// Determine trend: compare first-half vs second-half pass rates
|
|
100
104
|
const trend = computeTrend(skillSpecificRecords);
|
|
@@ -102,10 +106,11 @@ export function computeStatus(
|
|
|
102
106
|
// Count missed queries for this skill (queries where skill was checked but not triggered)
|
|
103
107
|
const missedQueries = skillSpecificRecords.filter((r) => !r.triggered).length;
|
|
104
108
|
|
|
105
|
-
// Determine status (
|
|
106
|
-
let status: "HEALTHY" | "WARNING" | "CRITICAL" | "UNKNOWN";
|
|
107
|
-
if (!hasData || passRate === null) {
|
|
108
|
-
|
|
109
|
+
// Determine status (5-state)
|
|
110
|
+
let status: "HEALTHY" | "WARNING" | "CRITICAL" | "UNGRADED" | "UNKNOWN";
|
|
111
|
+
if (!hasData || passRate === null || !hasEnoughSamples) {
|
|
112
|
+
// Skill exists in logs but has too little data for a meaningful health label
|
|
113
|
+
status = skillSpecificRecords.length > 0 ? "UNGRADED" : "UNKNOWN";
|
|
109
114
|
} else if (snapshot.regression_detected || passRate < 0.4) {
|
|
110
115
|
status = "CRITICAL";
|
|
111
116
|
} else if (passRate < 0.7) {
|
|
@@ -118,14 +123,22 @@ export function computeStatus(
|
|
|
118
123
|
});
|
|
119
124
|
|
|
120
125
|
// Sort: CRITICAL first, then WARNING, then HEALTHY, then UNKNOWN
|
|
121
|
-
const statusOrder: Record<string, number> = {
|
|
126
|
+
const statusOrder: Record<string, number> = {
|
|
127
|
+
CRITICAL: 0,
|
|
128
|
+
WARNING: 1,
|
|
129
|
+
HEALTHY: 2,
|
|
130
|
+
UNGRADED: 3,
|
|
131
|
+
UNKNOWN: 4,
|
|
132
|
+
};
|
|
122
133
|
skills.sort((a, b) => statusOrder[a.status] - statusOrder[b.status]);
|
|
123
134
|
|
|
124
135
|
// Unmatched queries: queries whose text appears in zero triggered skill_usage_log entries
|
|
125
136
|
const triggeredQueryTexts = new Set(
|
|
126
|
-
|
|
137
|
+
actionableSkillRecords
|
|
138
|
+
.filter((r) => r.triggered && typeof r.query === "string")
|
|
139
|
+
.map((r) => r.query.toLowerCase().trim()),
|
|
127
140
|
);
|
|
128
|
-
const unmatchedQueries =
|
|
141
|
+
const unmatchedQueries = actionableQueryRecords.filter(
|
|
129
142
|
(q) => !triggeredQueryTexts.has(q.query.toLowerCase().trim()),
|
|
130
143
|
).length;
|
|
131
144
|
|
|
@@ -247,10 +260,19 @@ export function formatStatus(result: StatusResult): string {
|
|
|
247
260
|
? amber(skill.status)
|
|
248
261
|
: skill.status === "HEALTHY"
|
|
249
262
|
? green(skill.status)
|
|
250
|
-
:
|
|
263
|
+
: skill.status === "UNGRADED"
|
|
264
|
+
? amber(skill.status)
|
|
265
|
+
: amber(skill.status);
|
|
251
266
|
lines.push(` ${name}${passRate}${trend}${missed}${statusText}`);
|
|
252
267
|
}
|
|
253
268
|
|
|
269
|
+
// Onboarding hint for ungraded skills
|
|
270
|
+
const ungradedSkills = result.skills.filter((s) => s.status === "UNGRADED");
|
|
271
|
+
if (ungradedSkills.length > 0) {
|
|
272
|
+
lines.push("");
|
|
273
|
+
lines.push(` Hint: Run \`selftune grade --skill <name>\` to establish baselines`);
|
|
274
|
+
}
|
|
275
|
+
|
|
254
276
|
lines.push("");
|
|
255
277
|
|
|
256
278
|
// Summary stats
|
|
@@ -305,7 +327,7 @@ function colorize(text: string, hex: string): string {
|
|
|
305
327
|
export function cliMain(): void {
|
|
306
328
|
try {
|
|
307
329
|
const telemetry = readJsonl<SessionTelemetryRecord>(TELEMETRY_LOG);
|
|
308
|
-
const skillRecords =
|
|
330
|
+
const skillRecords = readEffectiveSkillUsageRecords();
|
|
309
331
|
const queryRecords = readJsonl<QueryLogRecord>(QUERY_LOG);
|
|
310
332
|
const auditEntries = readJsonl<EvolutionAuditEntry>(EVOLUTION_AUDIT_LOG);
|
|
311
333
|
const doctorResult = doctor();
|