selftune 0.2.13 → 0.2.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +2 -0
- package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +16 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +12 -0
- package/apps/local-dashboard/dist/index.html +3 -3
- package/cli/selftune/activation-rules.ts +24 -48
- package/cli/selftune/analytics.ts +13 -11
- package/cli/selftune/badge/badge.ts +13 -9
- package/cli/selftune/canonical-export.ts +6 -6
- package/cli/selftune/constants.ts +7 -0
- package/cli/selftune/contribute/bundle.ts +9 -44
- package/cli/selftune/contribute/contribute.ts +2 -1
- package/cli/selftune/cron/setup.ts +3 -1
- package/cli/selftune/dashboard-contract.ts +22 -0
- package/cli/selftune/dashboard.ts +10 -5
- package/cli/selftune/eval/baseline.ts +20 -30
- package/cli/selftune/eval/hooks-to-evals.ts +27 -34
- package/cli/selftune/eval/import-skillsbench.ts +21 -8
- package/cli/selftune/eval/unit-test-cli.ts +22 -11
- package/cli/selftune/evolution/description-quality.ts +224 -0
- package/cli/selftune/evolution/evolve-body.ts +17 -10
- package/cli/selftune/evolution/evolve.ts +70 -57
- package/cli/selftune/evolution/rollback.ts +7 -6
- package/cli/selftune/grading/auto-grade.ts +27 -35
- package/cli/selftune/grading/grade-session.ts +24 -30
- package/cli/selftune/hooks/auto-activate.ts +12 -3
- package/cli/selftune/hooks/evolution-guard.ts +14 -24
- package/cli/selftune/hooks/prompt-log.ts +7 -9
- package/cli/selftune/hooks/session-stop.ts +0 -8
- package/cli/selftune/index.ts +66 -69
- package/cli/selftune/ingestors/claude-replay.ts +29 -14
- package/cli/selftune/ingestors/codex-rollout.ts +15 -5
- package/cli/selftune/ingestors/codex-wrapper.ts +15 -13
- package/cli/selftune/ingestors/openclaw-ingest.ts +24 -5
- package/cli/selftune/ingestors/opencode-ingest.ts +9 -4
- package/cli/selftune/init.ts +14 -9
- package/cli/selftune/localdb/queries.ts +57 -0
- package/cli/selftune/monitoring/watch.ts +39 -38
- package/cli/selftune/normalization.ts +2 -23
- package/cli/selftune/orchestrate.ts +224 -24
- package/cli/selftune/routes/skill-report.ts +17 -0
- package/cli/selftune/schedule.ts +74 -14
- package/cli/selftune/sync.ts +7 -3
- package/cli/selftune/types.ts +44 -10
- package/cli/selftune/utils/cli-error.ts +102 -0
- package/cli/selftune/utils/jsonl.ts +2 -0
- package/cli/selftune/workflows/workflows.ts +23 -17
- package/package.json +3 -1
- package/packages/ui/src/components/RecentActivityFeed.tsx +86 -0
- package/packages/ui/src/components/index.ts +1 -0
- package/packages/ui/src/components/section-cards.tsx +13 -0
- package/skill/SKILL.md +1 -1
- package/skill/Workflows/Evolve.md +4 -0
- package/skill/Workflows/Initialize.md +8 -8
- package/skill/Workflows/Orchestrate.md +11 -7
- package/skill/Workflows/Schedule.md +11 -0
- package/skill/references/logs.md +22 -21
- package/skill/settings_snippet.json +29 -6
- package/apps/local-dashboard/dist/assets/index-4_dAY17K.js +0 -16
- package/apps/local-dashboard/dist/assets/index-BxV5WZHc.css +0 -2
- package/apps/local-dashboard/dist/assets/vendor-ui-7xD7fNEU.js +0 -12
|
@@ -36,7 +36,7 @@ import type {
|
|
|
36
36
|
SessionTelemetryRecord,
|
|
37
37
|
SkillUsageRecord,
|
|
38
38
|
} from "../types.js";
|
|
39
|
-
import {
|
|
39
|
+
import { CLIError, handleCLIError } from "../utils/cli-error.js";
|
|
40
40
|
import { detectAgent } from "../utils/llm-call.js";
|
|
41
41
|
import {
|
|
42
42
|
filterActionableQueryRecords,
|
|
@@ -410,18 +410,27 @@ export async function cliMain(): Promise<void> {
|
|
|
410
410
|
// --- Synthetic mode: generate evals from SKILL.md via LLM ---
|
|
411
411
|
if (values.synthetic) {
|
|
412
412
|
if (!values.skill) {
|
|
413
|
-
|
|
414
|
-
|
|
413
|
+
throw new CLIError(
|
|
414
|
+
"--skill required with --synthetic",
|
|
415
|
+
"MISSING_FLAG",
|
|
416
|
+
"selftune evals --synthetic --skill <name> --skill-path <path>",
|
|
417
|
+
);
|
|
415
418
|
}
|
|
416
419
|
if (!values["skill-path"]) {
|
|
417
|
-
|
|
418
|
-
|
|
420
|
+
throw new CLIError(
|
|
421
|
+
"--skill-path required with --synthetic",
|
|
422
|
+
"MISSING_FLAG",
|
|
423
|
+
"selftune evals --synthetic --skill <name> --skill-path <path>",
|
|
424
|
+
);
|
|
419
425
|
}
|
|
420
426
|
|
|
421
427
|
const agent = detectAgent();
|
|
422
428
|
if (!agent) {
|
|
423
|
-
|
|
424
|
-
|
|
429
|
+
throw new CLIError(
|
|
430
|
+
"No agent CLI found (claude/codex/opencode)",
|
|
431
|
+
"AGENT_NOT_FOUND",
|
|
432
|
+
"Install one of the supported agent CLIs",
|
|
433
|
+
);
|
|
425
434
|
}
|
|
426
435
|
|
|
427
436
|
const maxPerSide = Number.parseInt(values.max ?? "50", 10);
|
|
@@ -464,31 +473,15 @@ export async function cliMain(): Promise<void> {
|
|
|
464
473
|
return;
|
|
465
474
|
}
|
|
466
475
|
|
|
467
|
-
// ---
|
|
468
|
-
const skillLogPath = values["skill-log"] ?? SKILL_LOG;
|
|
469
|
-
const queryLogPath = values["query-log"] ?? QUERY_LOG;
|
|
470
|
-
const telemetryLogPath = values["telemetry-log"] ?? TELEMETRY_LOG;
|
|
471
|
-
|
|
476
|
+
// --- SQLite-based mode ---
|
|
472
477
|
let skillRecords: SkillUsageRecord[];
|
|
473
478
|
let queryRecords: QueryLogRecord[];
|
|
474
479
|
let telemetryRecords: SessionTelemetryRecord[];
|
|
475
480
|
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
telemetryLogPath === TELEMETRY_LOG
|
|
481
|
-
) {
|
|
482
|
-
const db = getDb();
|
|
483
|
-
skillRecords = querySkillUsageRecords(db) as SkillUsageRecord[];
|
|
484
|
-
queryRecords = queryQueryLog(db) as QueryLogRecord[];
|
|
485
|
-
telemetryRecords = querySessionTelemetry(db) as SessionTelemetryRecord[];
|
|
486
|
-
} else {
|
|
487
|
-
// test/custom-path fallback
|
|
488
|
-
skillRecords = readJsonl<SkillUsageRecord>(skillLogPath);
|
|
489
|
-
queryRecords = readJsonl<QueryLogRecord>(queryLogPath);
|
|
490
|
-
telemetryRecords = readJsonl<SessionTelemetryRecord>(telemetryLogPath);
|
|
491
|
-
}
|
|
481
|
+
const db = getDb();
|
|
482
|
+
skillRecords = querySkillUsageRecords(db) as SkillUsageRecord[];
|
|
483
|
+
queryRecords = queryQueryLog(db) as QueryLogRecord[];
|
|
484
|
+
telemetryRecords = querySessionTelemetry(db) as SessionTelemetryRecord[];
|
|
492
485
|
|
|
493
486
|
if (values["list-skills"]) {
|
|
494
487
|
listSkills(skillRecords, queryRecords, telemetryRecords);
|
|
@@ -496,8 +489,11 @@ export async function cliMain(): Promise<void> {
|
|
|
496
489
|
}
|
|
497
490
|
|
|
498
491
|
if (!values.skill) {
|
|
499
|
-
|
|
500
|
-
|
|
492
|
+
throw new CLIError(
|
|
493
|
+
"--skill required (or use --list-skills)",
|
|
494
|
+
"MISSING_FLAG",
|
|
495
|
+
"selftune evals --skill <name> or selftune evals --list-skills",
|
|
496
|
+
);
|
|
501
497
|
}
|
|
502
498
|
|
|
503
499
|
if (values.stats) {
|
|
@@ -525,8 +521,5 @@ export async function cliMain(): Promise<void> {
|
|
|
525
521
|
}
|
|
526
522
|
|
|
527
523
|
if (import.meta.main) {
|
|
528
|
-
cliMain().catch(
|
|
529
|
-
console.error(err);
|
|
530
|
-
process.exit(1);
|
|
531
|
-
});
|
|
524
|
+
cliMain().catch(handleCLIError);
|
|
532
525
|
}
|
|
@@ -15,6 +15,7 @@ import { join } from "node:path";
|
|
|
15
15
|
import { parseArgs } from "node:util";
|
|
16
16
|
|
|
17
17
|
import type { EvalEntry, SkillsBenchTask } from "../types.js";
|
|
18
|
+
import { CLIError, handleCLIError } from "../utils/cli-error.js";
|
|
18
19
|
|
|
19
20
|
// ---------------------------------------------------------------------------
|
|
20
21
|
// Minimal TOML parser (handles the subset used by SkillsBench task.toml files)
|
|
@@ -175,13 +176,19 @@ export function cliMain(): void {
|
|
|
175
176
|
});
|
|
176
177
|
|
|
177
178
|
if (!values.dir) {
|
|
178
|
-
|
|
179
|
-
|
|
179
|
+
throw new CLIError(
|
|
180
|
+
"--dir required (path to SkillsBench corpus directory)",
|
|
181
|
+
"MISSING_FLAG",
|
|
182
|
+
"selftune import-skillsbench --dir <path> --skill <name>",
|
|
183
|
+
);
|
|
180
184
|
}
|
|
181
185
|
|
|
182
186
|
if (!values.skill) {
|
|
183
|
-
|
|
184
|
-
|
|
187
|
+
throw new CLIError(
|
|
188
|
+
"--skill required (target skill name)",
|
|
189
|
+
"MISSING_FLAG",
|
|
190
|
+
"selftune import-skillsbench --dir <path> --skill <name>",
|
|
191
|
+
);
|
|
185
192
|
}
|
|
186
193
|
|
|
187
194
|
const matchStrategy = values["match-strategy"] === "fuzzy" ? "fuzzy" : "exact";
|
|
@@ -189,9 +196,11 @@ export function cliMain(): void {
|
|
|
189
196
|
const tasks = parseSkillsBenchDir(values.dir);
|
|
190
197
|
|
|
191
198
|
if (tasks.length === 0) {
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
199
|
+
throw new CLIError(
|
|
200
|
+
`No tasks found in ${values.dir}/tasks/`,
|
|
201
|
+
"MISSING_DATA",
|
|
202
|
+
"Expected structure: <dir>/tasks/<task-id>/instruction.md",
|
|
203
|
+
);
|
|
195
204
|
}
|
|
196
205
|
|
|
197
206
|
console.log(`Parsed ${tasks.length} tasks from ${values.dir}`);
|
|
@@ -218,5 +227,9 @@ export function cliMain(): void {
|
|
|
218
227
|
}
|
|
219
228
|
|
|
220
229
|
if (import.meta.main) {
|
|
221
|
-
|
|
230
|
+
try {
|
|
231
|
+
cliMain();
|
|
232
|
+
} catch (err) {
|
|
233
|
+
handleCLIError(err);
|
|
234
|
+
}
|
|
222
235
|
}
|
|
@@ -19,6 +19,7 @@ import { parseArgs } from "node:util";
|
|
|
19
19
|
|
|
20
20
|
import { SELFTUNE_CONFIG_DIR } from "../constants.js";
|
|
21
21
|
import type { EvalEntry } from "../types.js";
|
|
22
|
+
import { CLIError } from "../utils/cli-error.js";
|
|
22
23
|
import { callLlm, detectAgent } from "../utils/llm-call.js";
|
|
23
24
|
import { generateUnitTests } from "./generate-unit-tests.js";
|
|
24
25
|
import type { AgentRunner } from "./unit-test.js";
|
|
@@ -43,8 +44,11 @@ export async function cliMain(): Promise<void> {
|
|
|
43
44
|
});
|
|
44
45
|
|
|
45
46
|
if (!values.skill) {
|
|
46
|
-
|
|
47
|
-
|
|
47
|
+
throw new CLIError(
|
|
48
|
+
"--skill <name> is required",
|
|
49
|
+
"MISSING_FLAG",
|
|
50
|
+
"selftune eval unit-test --skill <name>",
|
|
51
|
+
);
|
|
48
52
|
}
|
|
49
53
|
|
|
50
54
|
const skillName = values.skill;
|
|
@@ -56,8 +60,11 @@ export async function cliMain(): Promise<void> {
|
|
|
56
60
|
if (values.generate) {
|
|
57
61
|
const agent = detectAgent();
|
|
58
62
|
if (!agent) {
|
|
59
|
-
|
|
60
|
-
|
|
63
|
+
throw new CLIError(
|
|
64
|
+
"No agent CLI found (claude/codex/opencode). Cannot generate tests",
|
|
65
|
+
"AGENT_NOT_FOUND",
|
|
66
|
+
"Install one of the supported agent CLIs",
|
|
67
|
+
);
|
|
61
68
|
}
|
|
62
69
|
|
|
63
70
|
let skillContent = `Skill: ${skillName}`;
|
|
@@ -86,8 +93,7 @@ export async function cliMain(): Promise<void> {
|
|
|
86
93
|
const tests = await generateUnitTests(skillName, skillContent, evalFailures, llmCaller);
|
|
87
94
|
|
|
88
95
|
if (tests.length === 0) {
|
|
89
|
-
|
|
90
|
-
process.exit(1);
|
|
96
|
+
throw new CLIError("No tests generated", "OPERATION_FAILED", "Check agent/LLM availability");
|
|
91
97
|
}
|
|
92
98
|
|
|
93
99
|
// Ensure output directory exists
|
|
@@ -100,9 +106,11 @@ export async function cliMain(): Promise<void> {
|
|
|
100
106
|
// Load and run tests
|
|
101
107
|
const tests = loadUnitTests(testsPath);
|
|
102
108
|
if (tests.length === 0) {
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
109
|
+
throw new CLIError(
|
|
110
|
+
`No tests found at ${testsPath}`,
|
|
111
|
+
"FILE_NOT_FOUND",
|
|
112
|
+
"Use --generate to create tests, or provide --tests <path>",
|
|
113
|
+
);
|
|
106
114
|
}
|
|
107
115
|
|
|
108
116
|
console.log(`Loaded ${tests.length} unit tests for skill '${skillName}'`);
|
|
@@ -112,8 +120,11 @@ export async function cliMain(): Promise<void> {
|
|
|
112
120
|
if (values["run-agent"]) {
|
|
113
121
|
const agent = detectAgent();
|
|
114
122
|
if (!agent) {
|
|
115
|
-
|
|
116
|
-
|
|
123
|
+
throw new CLIError(
|
|
124
|
+
"No agent CLI found. Cannot run agent-based tests",
|
|
125
|
+
"AGENT_NOT_FOUND",
|
|
126
|
+
"Install one of the supported agent CLIs",
|
|
127
|
+
);
|
|
117
128
|
}
|
|
118
129
|
const modelFlag = values.model;
|
|
119
130
|
agentRunner = async (query: string): Promise<string> => {
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* description-quality.ts
|
|
3
|
+
*
|
|
4
|
+
* Pure, deterministic scoring function that evaluates the quality of a skill
|
|
5
|
+
* description for routing accuracy. No LLM calls — heuristic-only.
|
|
6
|
+
*
|
|
7
|
+
* Inspired by OpenAI's finding that "writing better skill descriptions improved
|
|
8
|
+
* routing accuracy more than any change to the underlying skill logic itself."
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import type { DescriptionQualityScore } from "../types.js";
|
|
12
|
+
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
// Constants
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
|
|
17
|
+
/** Optimal description length range (characters). */
|
|
18
|
+
const MIN_LENGTH = 40;
|
|
19
|
+
const MAX_LENGTH = 500;
|
|
20
|
+
const IDEAL_MIN = 80;
|
|
21
|
+
const IDEAL_MAX = 300;
|
|
22
|
+
|
|
23
|
+
/** Words that indicate trigger context — the description says *when* the skill fires. */
|
|
24
|
+
const TRIGGER_CONTEXT_WORDS = [
|
|
25
|
+
"when",
|
|
26
|
+
"if",
|
|
27
|
+
"after",
|
|
28
|
+
"before",
|
|
29
|
+
"during",
|
|
30
|
+
"while",
|
|
31
|
+
"upon",
|
|
32
|
+
"whenever",
|
|
33
|
+
"use when",
|
|
34
|
+
"trigger",
|
|
35
|
+
"activate",
|
|
36
|
+
];
|
|
37
|
+
|
|
38
|
+
/** Vague words that weaken routing precision. */
|
|
39
|
+
const VAGUE_WORDS = [
|
|
40
|
+
"various",
|
|
41
|
+
"general",
|
|
42
|
+
"misc",
|
|
43
|
+
"miscellaneous",
|
|
44
|
+
"stuff",
|
|
45
|
+
"things",
|
|
46
|
+
"etc",
|
|
47
|
+
"and more",
|
|
48
|
+
"and so on",
|
|
49
|
+
"other",
|
|
50
|
+
"multiple",
|
|
51
|
+
"several",
|
|
52
|
+
"many",
|
|
53
|
+
"some",
|
|
54
|
+
"certain",
|
|
55
|
+
"related",
|
|
56
|
+
];
|
|
57
|
+
|
|
58
|
+
/** Common filler phrases that add no routing signal. */
|
|
59
|
+
const FILLER_PHRASES = [
|
|
60
|
+
"this skill",
|
|
61
|
+
"a tool for",
|
|
62
|
+
"a tool that",
|
|
63
|
+
"helps with",
|
|
64
|
+
"is used for",
|
|
65
|
+
"can be used",
|
|
66
|
+
"is designed to",
|
|
67
|
+
];
|
|
68
|
+
|
|
69
|
+
/** Action verbs that signal concrete behavior. */
|
|
70
|
+
const ACTION_VERBS = [
|
|
71
|
+
"run",
|
|
72
|
+
"execute",
|
|
73
|
+
"analyze",
|
|
74
|
+
"generate",
|
|
75
|
+
"create",
|
|
76
|
+
"deploy",
|
|
77
|
+
"validate",
|
|
78
|
+
"check",
|
|
79
|
+
"build",
|
|
80
|
+
"test",
|
|
81
|
+
"scan",
|
|
82
|
+
"extract",
|
|
83
|
+
"transform",
|
|
84
|
+
"monitor",
|
|
85
|
+
"grade",
|
|
86
|
+
"evolve",
|
|
87
|
+
"sync",
|
|
88
|
+
"watch",
|
|
89
|
+
"review",
|
|
90
|
+
"audit",
|
|
91
|
+
"parse",
|
|
92
|
+
"format",
|
|
93
|
+
"search",
|
|
94
|
+
"fetch",
|
|
95
|
+
"publish",
|
|
96
|
+
"install",
|
|
97
|
+
"configure",
|
|
98
|
+
"diagnose",
|
|
99
|
+
"debug",
|
|
100
|
+
"fix",
|
|
101
|
+
"optimize",
|
|
102
|
+
"measure",
|
|
103
|
+
];
|
|
104
|
+
|
|
105
|
+
// ---------------------------------------------------------------------------
|
|
106
|
+
// Pre-compiled word-boundary patterns
|
|
107
|
+
// ---------------------------------------------------------------------------
|
|
108
|
+
|
|
109
|
+
/** Compile a word list into pre-built RegExp patterns at module load time. */
|
|
110
|
+
function compileWordPatterns(words: string[]): RegExp[] {
|
|
111
|
+
return words.map((w) => new RegExp(`\\b${w.replace(/\s+/g, "\\s+")}\\b`, "i"));
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const TRIGGER_PATTERNS = compileWordPatterns(TRIGGER_CONTEXT_WORDS);
|
|
115
|
+
const VAGUE_PATTERNS = compileWordPatterns(VAGUE_WORDS);
|
|
116
|
+
const ACTION_PATTERNS = compileWordPatterns(ACTION_VERBS);
|
|
117
|
+
|
|
118
|
+
/** Count how many pre-compiled patterns match in a string. */
|
|
119
|
+
function countWordMatches(text: string, patterns: RegExp[]): number {
|
|
120
|
+
let count = 0;
|
|
121
|
+
for (const p of patterns) {
|
|
122
|
+
if (p.test(text)) count++;
|
|
123
|
+
}
|
|
124
|
+
return count;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// ---------------------------------------------------------------------------
|
|
128
|
+
// Criterion scorers
|
|
129
|
+
// ---------------------------------------------------------------------------
|
|
130
|
+
|
|
131
|
+
/** Score description length: 1.0 for ideal range, graded falloff outside. */
|
|
132
|
+
export function scoreLengthCriterion(description: string): number {
|
|
133
|
+
const len = description.length;
|
|
134
|
+
if (len < MIN_LENGTH) return len / MIN_LENGTH;
|
|
135
|
+
if (len >= IDEAL_MIN && len <= IDEAL_MAX) return 1.0;
|
|
136
|
+
if (len < IDEAL_MIN) return 0.7 + 0.3 * ((len - MIN_LENGTH) / (IDEAL_MIN - MIN_LENGTH));
|
|
137
|
+
if (len <= MAX_LENGTH) return 0.7 + 0.3 * ((MAX_LENGTH - len) / (MAX_LENGTH - IDEAL_MAX));
|
|
138
|
+
return Math.max(0.3, 0.7 - 0.4 * ((len - MAX_LENGTH) / MAX_LENGTH));
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/** Score presence of trigger context words (when/if/before/after etc). */
|
|
142
|
+
export function scoreTriggerContextCriterion(description: string): number {
|
|
143
|
+
const matches = countWordMatches(description.toLowerCase(), TRIGGER_PATTERNS);
|
|
144
|
+
if (matches === 0) return 0.0;
|
|
145
|
+
if (matches === 1) return 0.7;
|
|
146
|
+
return Math.min(1.0, 0.7 + 0.15 * (matches - 1));
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/** Score absence of vague words (lower is worse). */
|
|
150
|
+
export function scoreVaguenessCriterion(description: string): number {
|
|
151
|
+
const matches = countWordMatches(description.toLowerCase(), VAGUE_PATTERNS);
|
|
152
|
+
if (matches === 0) return 1.0;
|
|
153
|
+
if (matches === 1) return 0.6;
|
|
154
|
+
return Math.max(0.1, 0.6 - 0.15 * (matches - 1));
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/** Score whether description specifies at least one concrete action or domain. */
|
|
158
|
+
export function scoreSpecificityCriterion(description: string): number {
|
|
159
|
+
const lower = description.toLowerCase();
|
|
160
|
+
const hasAction = ACTION_PATTERNS.some((p) => p.test(lower));
|
|
161
|
+
|
|
162
|
+
const fillerCount = FILLER_PHRASES.filter((f) => lower.includes(f)).length;
|
|
163
|
+
const words = description.split(/\s+/).length;
|
|
164
|
+
const fillerRatio = fillerCount > 0 ? fillerCount / Math.max(1, words / 10) : 0;
|
|
165
|
+
|
|
166
|
+
if (!hasAction) return 0.2;
|
|
167
|
+
return Math.max(0.3, 1.0 - fillerRatio * 0.3);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/** Score whether description is not just the skill name restated. */
|
|
171
|
+
export function scoreNotJustNameCriterion(description: string, skillName?: string): number {
|
|
172
|
+
if (!skillName) return 1.0;
|
|
173
|
+
const descNorm = description
|
|
174
|
+
.toLowerCase()
|
|
175
|
+
.trim()
|
|
176
|
+
.replace(/[^a-z0-9\s]/g, "");
|
|
177
|
+
const nameNorm = skillName
|
|
178
|
+
.toLowerCase()
|
|
179
|
+
.trim()
|
|
180
|
+
.replace(/[^a-z0-9\s]/g, "");
|
|
181
|
+
const nameFromKebab = skillName.replace(/[-_]/g, " ").toLowerCase().trim();
|
|
182
|
+
|
|
183
|
+
if (descNorm === nameNorm || descNorm === nameFromKebab) return 0.0;
|
|
184
|
+
if (descNorm.length < nameNorm.length + 10) return 0.3;
|
|
185
|
+
return 1.0;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// ---------------------------------------------------------------------------
|
|
189
|
+
// Main scoring function
|
|
190
|
+
// ---------------------------------------------------------------------------
|
|
191
|
+
|
|
192
|
+
/** Criterion weights — trigger context is weighted highest per OpenAI's finding. */
|
|
193
|
+
const WEIGHTS = {
|
|
194
|
+
length: 0.15,
|
|
195
|
+
trigger_context: 0.3,
|
|
196
|
+
vagueness: 0.2,
|
|
197
|
+
specificity: 0.2,
|
|
198
|
+
not_just_name: 0.15,
|
|
199
|
+
} as const;
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Score a skill description on heuristic quality criteria.
|
|
203
|
+
* Returns a 0.0-1.0 composite score with per-criterion breakdown.
|
|
204
|
+
* Pure function — no I/O, no LLM calls.
|
|
205
|
+
*/
|
|
206
|
+
export function scoreDescription(description: string, skillName?: string): DescriptionQualityScore {
|
|
207
|
+
const criteria = {
|
|
208
|
+
length: scoreLengthCriterion(description),
|
|
209
|
+
trigger_context: scoreTriggerContextCriterion(description),
|
|
210
|
+
vagueness: scoreVaguenessCriterion(description),
|
|
211
|
+
specificity: scoreSpecificityCriterion(description),
|
|
212
|
+
not_just_name: scoreNotJustNameCriterion(description, skillName),
|
|
213
|
+
};
|
|
214
|
+
|
|
215
|
+
const composite = (Object.keys(WEIGHTS) as (keyof typeof WEIGHTS)[]).reduce(
|
|
216
|
+
(sum, key) => sum + criteria[key] * WEIGHTS[key],
|
|
217
|
+
0,
|
|
218
|
+
);
|
|
219
|
+
|
|
220
|
+
return {
|
|
221
|
+
composite: +composite.toFixed(3),
|
|
222
|
+
criteria,
|
|
223
|
+
};
|
|
224
|
+
}
|
|
@@ -25,6 +25,7 @@ import type {
|
|
|
25
25
|
QueryLogRecord,
|
|
26
26
|
SkillUsageRecord,
|
|
27
27
|
} from "../types.js";
|
|
28
|
+
import { CLIError, handleCLIError } from "../utils/cli-error.js";
|
|
28
29
|
import type { EffortLevel, SubagentCallOptions } from "../utils/llm-call.js";
|
|
29
30
|
import { callViaSubagent } from "../utils/llm-call.js";
|
|
30
31
|
import { appendAuditEntry } from "./audit.js";
|
|
@@ -710,8 +711,11 @@ Options:
|
|
|
710
711
|
}
|
|
711
712
|
|
|
712
713
|
if (!values.skill || !values["skill-path"]) {
|
|
713
|
-
|
|
714
|
-
|
|
714
|
+
throw new CLIError(
|
|
715
|
+
"--skill and --skill-path are required",
|
|
716
|
+
"MISSING_FLAG",
|
|
717
|
+
"selftune evolve body --skill <name> --skill-path <path>",
|
|
718
|
+
);
|
|
715
719
|
}
|
|
716
720
|
|
|
717
721
|
const { detectAgent } = await import("../utils/llm-call.js");
|
|
@@ -719,15 +723,21 @@ Options:
|
|
|
719
723
|
const studentAgent = values["student-agent"] ?? teacherAgent;
|
|
720
724
|
|
|
721
725
|
if (!teacherAgent) {
|
|
722
|
-
|
|
723
|
-
|
|
726
|
+
throw new CLIError(
|
|
727
|
+
"No agent CLI found. Install Claude Code, Codex, or OpenCode.",
|
|
728
|
+
"AGENT_NOT_FOUND",
|
|
729
|
+
"Install Claude Code, Codex, or OpenCode.",
|
|
730
|
+
);
|
|
724
731
|
}
|
|
725
732
|
|
|
726
733
|
// Parse target
|
|
727
734
|
const targetStr = values.target ?? "body";
|
|
728
735
|
if (targetStr !== "body" && targetStr !== "routing") {
|
|
729
|
-
|
|
730
|
-
|
|
736
|
+
throw new CLIError(
|
|
737
|
+
"--target must be 'body' or 'routing'",
|
|
738
|
+
"INVALID_FLAG",
|
|
739
|
+
"Use --target body or --target routing",
|
|
740
|
+
);
|
|
731
741
|
}
|
|
732
742
|
|
|
733
743
|
// Parse few-shot examples
|
|
@@ -763,8 +773,5 @@ Options:
|
|
|
763
773
|
}
|
|
764
774
|
|
|
765
775
|
if (import.meta.main) {
|
|
766
|
-
cliMain().catch(
|
|
767
|
-
console.error(`[FATAL] ${err}`);
|
|
768
|
-
process.exit(1);
|
|
769
|
-
});
|
|
776
|
+
cliMain().catch(handleCLIError);
|
|
770
777
|
}
|