selftune 0.2.9 → 0.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -35
- package/apps/local-dashboard/dist/assets/index-4_dAY17K.js +16 -0
- package/apps/local-dashboard/dist/assets/index-BxV5WZHc.css +2 -0
- package/apps/local-dashboard/dist/assets/rolldown-runtime-Dw2cE7zH.js +1 -0
- package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +11 -0
- package/apps/local-dashboard/dist/assets/vendor-table-pHbDxq36.js +8 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-7xD7fNEU.js +12 -0
- package/apps/local-dashboard/dist/index.html +16 -15
- package/bin/selftune.cjs +1 -1
- package/cli/selftune/activation-rules.ts +1 -0
- package/cli/selftune/alpha-upload/build-payloads.ts +18 -2
- package/cli/selftune/alpha-upload/stage-canonical.ts +94 -0
- package/cli/selftune/auth/device-code.ts +32 -0
- package/cli/selftune/auto-update.ts +12 -0
- package/cli/selftune/badge/badge.ts +1 -0
- package/cli/selftune/canonical-export.ts +5 -0
- package/cli/selftune/claude-agents.ts +154 -0
- package/cli/selftune/contribute/bundle.ts +1 -0
- package/cli/selftune/contribute/contribute.ts +1 -0
- package/cli/selftune/cron/setup.ts +2 -2
- package/cli/selftune/dashboard-server.ts +1 -0
- package/cli/selftune/eval/hooks-to-evals.ts +1 -0
- package/cli/selftune/eval/import-skillsbench.ts +1 -0
- package/cli/selftune/eval/synthetic-evals.ts +2 -3
- package/cli/selftune/eval/unit-test.ts +1 -0
- package/cli/selftune/evolution/deploy-proposal.ts +9 -238
- package/cli/selftune/evolution/evolve-body.ts +93 -6
- package/cli/selftune/evolution/evolve.ts +3 -7
- package/cli/selftune/evolution/propose-body.ts +3 -2
- package/cli/selftune/evolution/propose-routing.ts +3 -2
- package/cli/selftune/evolution/refine-body.ts +3 -2
- package/cli/selftune/evolution/rollback.ts +1 -1
- package/cli/selftune/export.ts +1 -0
- package/cli/selftune/grading/grade-session.ts +8 -0
- package/cli/selftune/hooks/auto-activate.ts +1 -0
- package/cli/selftune/hooks/evolution-guard.ts +1 -1
- package/cli/selftune/hooks/prompt-log.ts +1 -0
- package/cli/selftune/hooks/session-stop.ts +34 -40
- package/cli/selftune/hooks/skill-change-guard.ts +1 -0
- package/cli/selftune/hooks/skill-eval.ts +1 -1
- package/cli/selftune/index.ts +23 -14
- package/cli/selftune/ingestors/claude-replay.ts +1 -0
- package/cli/selftune/ingestors/codex-rollout.ts +1 -0
- package/cli/selftune/ingestors/codex-wrapper.ts +1 -0
- package/cli/selftune/ingestors/openclaw-ingest.ts +1 -0
- package/cli/selftune/ingestors/opencode-ingest.ts +1 -0
- package/cli/selftune/init.ts +121 -29
- package/cli/selftune/localdb/db.ts +1 -0
- package/cli/selftune/localdb/direct-write.ts +39 -0
- package/cli/selftune/localdb/materialize.ts +2 -0
- package/cli/selftune/localdb/queries.ts +53 -0
- package/cli/selftune/localdb/schema.ts +28 -0
- package/cli/selftune/normalization.ts +1 -0
- package/cli/selftune/observability.ts +1 -0
- package/cli/selftune/repair/skill-usage.ts +1 -0
- package/cli/selftune/routes/orchestrate-runs.ts +1 -0
- package/cli/selftune/routes/overview.ts +1 -0
- package/cli/selftune/routes/report.ts +1 -1
- package/cli/selftune/routes/skill-report.ts +2 -1
- package/cli/selftune/status.ts +1 -1
- package/cli/selftune/sync.ts +30 -1
- package/cli/selftune/uninstall.ts +412 -0
- package/cli/selftune/utils/canonical-log.ts +2 -0
- package/cli/selftune/utils/frontmatter.ts +50 -7
- package/cli/selftune/utils/jsonl.ts +1 -0
- package/cli/selftune/utils/llm-call.ts +131 -3
- package/cli/selftune/utils/skill-log.ts +1 -0
- package/cli/selftune/utils/transcript.ts +1 -0
- package/cli/selftune/utils/trigger-check.ts +1 -1
- package/cli/selftune/workflows/skill-md-writer.ts +5 -5
- package/cli/selftune/workflows/workflows.ts +1 -0
- package/package.json +37 -33
- package/packages/telemetry-contract/fixtures/golden.test.ts +1 -0
- package/packages/telemetry-contract/package.json +1 -1
- package/packages/telemetry-contract/src/schemas.ts +1 -0
- package/packages/telemetry-contract/tests/compatibility.test.ts +1 -0
- package/packages/ui/README.md +35 -34
- package/packages/ui/package.json +3 -3
- package/packages/ui/src/components/ActivityTimeline.tsx +50 -43
- package/packages/ui/src/components/EvidenceViewer.tsx +306 -182
- package/packages/ui/src/components/EvolutionTimeline.tsx +83 -72
- package/packages/ui/src/components/InfoTip.tsx +4 -3
- package/packages/ui/src/components/OrchestrateRunsPanel.tsx +60 -53
- package/packages/ui/src/components/section-cards.tsx +20 -25
- package/packages/ui/src/components/skill-health-grid.tsx +213 -193
- package/packages/ui/src/lib/constants.tsx +1 -0
- package/packages/ui/src/primitives/badge.tsx +12 -15
- package/packages/ui/src/primitives/button.tsx +7 -7
- package/packages/ui/src/primitives/card.tsx +15 -26
- package/packages/ui/src/primitives/checkbox.tsx +7 -8
- package/packages/ui/src/primitives/collapsible.tsx +5 -5
- package/packages/ui/src/primitives/dropdown-menu.tsx +45 -55
- package/packages/ui/src/primitives/label.tsx +6 -6
- package/packages/ui/src/primitives/select.tsx +28 -37
- package/packages/ui/src/primitives/table.tsx +17 -44
- package/packages/ui/src/primitives/tabs.tsx +14 -21
- package/packages/ui/src/primitives/tooltip.tsx +10 -22
- package/skill/SKILL.md +70 -57
- package/skill/Workflows/AlphaUpload.md +4 -4
- package/skill/Workflows/AutoActivation.md +11 -6
- package/skill/Workflows/Badge.md +22 -16
- package/skill/Workflows/Baseline.md +34 -36
- package/skill/Workflows/Composability.md +16 -11
- package/skill/Workflows/Contribute.md +26 -21
- package/skill/Workflows/Cron.md +23 -22
- package/skill/Workflows/Dashboard.md +32 -27
- package/skill/Workflows/Doctor.md +33 -27
- package/skill/Workflows/Evals.md +48 -47
- package/skill/Workflows/EvolutionMemory.md +31 -21
- package/skill/Workflows/Evolve.md +84 -82
- package/skill/Workflows/EvolveBody.md +58 -47
- package/skill/Workflows/Grade.md +16 -13
- package/skill/Workflows/ImportSkillsBench.md +9 -6
- package/skill/Workflows/Ingest.md +36 -21
- package/skill/Workflows/Initialize.md +108 -40
- package/skill/Workflows/Orchestrate.md +22 -16
- package/skill/Workflows/Replay.md +12 -7
- package/skill/Workflows/Rollback.md +13 -6
- package/skill/Workflows/Schedule.md +6 -6
- package/skill/Workflows/Sync.md +18 -11
- package/skill/Workflows/UnitTest.md +28 -17
- package/skill/Workflows/Watch.md +28 -21
- package/skill/agents/diagnosis-analyst.md +11 -0
- package/skill/agents/evolution-reviewer.md +15 -1
- package/skill/agents/integration-guide.md +10 -0
- package/skill/agents/pattern-analyst.md +12 -1
- package/skill/references/grading-methodology.md +23 -24
- package/skill/references/interactive-config.md +7 -7
- package/skill/references/invocation-taxonomy.md +22 -20
- package/skill/references/logs.md +14 -6
- package/skill/references/setup-patterns.md +4 -2
- package/.claude/agents/diagnosis-analyst.md +0 -156
- package/.claude/agents/evolution-reviewer.md +0 -180
- package/.claude/agents/integration-guide.md +0 -212
- package/.claude/agents/pattern-analyst.md +0 -160
- package/apps/local-dashboard/dist/assets/index-Bs3Y4ixf.css +0 -1
- package/apps/local-dashboard/dist/assets/index-C4UYGWKr.js +0 -15
- package/apps/local-dashboard/dist/assets/vendor-react-BQH_6WrG.js +0 -60
- package/apps/local-dashboard/dist/assets/vendor-table-dK1QMLq9.js +0 -26
- package/apps/local-dashboard/dist/assets/vendor-ui-CO2mrx6e.js +0 -341
|
@@ -1,97 +1,18 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* deploy-proposal.ts
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*/
|
|
8
|
-
|
|
9
|
-
import { copyFileSync, existsSync, readFileSync, writeFileSync } from "node:fs";
|
|
10
|
-
import type { EvolutionProposal, SkillSections } from "../types.js";
|
|
11
|
-
import type { ValidationResult } from "./validate-proposal.js";
|
|
12
|
-
|
|
13
|
-
// ---------------------------------------------------------------------------
|
|
14
|
-
// Types
|
|
15
|
-
// ---------------------------------------------------------------------------
|
|
16
|
-
|
|
17
|
-
export interface DeployOptions {
|
|
18
|
-
proposal: EvolutionProposal;
|
|
19
|
-
validation: ValidationResult;
|
|
20
|
-
skillPath: string;
|
|
21
|
-
createPr: boolean;
|
|
22
|
-
branchPrefix?: string; // default "selftune/evolve"
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
export interface DeployResult {
|
|
26
|
-
skillMdUpdated: boolean;
|
|
27
|
-
backupPath: string | null;
|
|
28
|
-
branchName: string | null;
|
|
29
|
-
commitMessage: string;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
// ---------------------------------------------------------------------------
|
|
33
|
-
// SKILL.md reading
|
|
34
|
-
// ---------------------------------------------------------------------------
|
|
35
|
-
|
|
36
|
-
/** Read the contents of a SKILL.md file. Throws if the file does not exist. */
|
|
37
|
-
export function readSkillMd(skillPath: string): string {
|
|
38
|
-
if (!existsSync(skillPath)) {
|
|
39
|
-
throw new Error(`SKILL.md not found at ${skillPath}`);
|
|
40
|
-
}
|
|
41
|
-
return readFileSync(skillPath, "utf-8");
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
// ---------------------------------------------------------------------------
|
|
45
|
-
// Description replacement
|
|
46
|
-
// ---------------------------------------------------------------------------
|
|
47
|
-
|
|
48
|
-
/**
|
|
49
|
-
* Replace the description section of a SKILL.md file.
|
|
4
|
+
* SKILL.md manipulation utilities for the evolution pipeline: description
|
|
5
|
+
* replacement, structured section parsing, section replacement, and full
|
|
6
|
+
* body replacement.
|
|
50
7
|
*
|
|
51
|
-
*
|
|
52
|
-
*
|
|
53
|
-
*
|
|
8
|
+
* Evolution is a local personalization — the evolved description reflects how
|
|
9
|
+
* *this user* works, not a change the skill creator should adopt. A future
|
|
10
|
+
* upstream feedback channel (anonymized patterns, not raw descriptions) may
|
|
11
|
+
* let end-users send useful signal back to skill creators, but that's a
|
|
12
|
+
* separate concern from deploy. See TD-019 in tech-debt-tracker.md.
|
|
54
13
|
*/
|
|
55
|
-
export function replaceDescription(currentContent: string, newDescription: string): string {
|
|
56
|
-
const lines = currentContent.split("\n");
|
|
57
|
-
|
|
58
|
-
// Find the first # heading line
|
|
59
|
-
let headingIndex = -1;
|
|
60
|
-
for (let i = 0; i < lines.length; i++) {
|
|
61
|
-
if (lines[i].startsWith("# ") && !lines[i].startsWith("## ")) {
|
|
62
|
-
headingIndex = i;
|
|
63
|
-
break;
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
14
|
|
|
67
|
-
|
|
68
|
-
if (headingIndex === -1) {
|
|
69
|
-
return `${newDescription}\n${currentContent}`;
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
// Find the first ## heading after the main heading
|
|
73
|
-
let subHeadingIndex = -1;
|
|
74
|
-
for (let i = headingIndex + 1; i < lines.length; i++) {
|
|
75
|
-
if (lines[i].startsWith("## ")) {
|
|
76
|
-
subHeadingIndex = i;
|
|
77
|
-
break;
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
// Build the new content, preserving any preamble before the first heading
|
|
82
|
-
const preamble = headingIndex > 0 ? `${lines.slice(0, headingIndex).join("\n")}\n` : "";
|
|
83
|
-
const headingLine = lines[headingIndex];
|
|
84
|
-
const descriptionBlock = newDescription.length > 0 ? `\n${newDescription}\n` : "\n";
|
|
85
|
-
|
|
86
|
-
if (subHeadingIndex === -1) {
|
|
87
|
-
// No sub-heading: preamble + heading + new description + trailing newline
|
|
88
|
-
return `${preamble}${headingLine}\n${descriptionBlock}\n`;
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
// Preamble + heading + description + everything from the first ## onward
|
|
92
|
-
const afterSubHeading = lines.slice(subHeadingIndex).join("\n");
|
|
93
|
-
return `${preamble}${headingLine}\n${descriptionBlock}\n${afterSubHeading}`;
|
|
94
|
-
}
|
|
15
|
+
import type { SkillSections } from "../types.js";
|
|
95
16
|
|
|
96
17
|
// ---------------------------------------------------------------------------
|
|
97
18
|
// Structured SKILL.md parsing
|
|
@@ -233,153 +154,3 @@ export function replaceBody(currentContent: string, proposedBody: string): strin
|
|
|
233
154
|
|
|
234
155
|
return `${parts.join("\n").trimEnd()}\n`;
|
|
235
156
|
}
|
|
236
|
-
|
|
237
|
-
// ---------------------------------------------------------------------------
|
|
238
|
-
// Commit message builder
|
|
239
|
-
// ---------------------------------------------------------------------------
|
|
240
|
-
|
|
241
|
-
/** Build a commit message that includes the skill name and pass rate change. */
|
|
242
|
-
export function buildCommitMessage(
|
|
243
|
-
proposal: EvolutionProposal,
|
|
244
|
-
validation: ValidationResult,
|
|
245
|
-
): string {
|
|
246
|
-
const changePercent = Math.round(validation.net_change * 100);
|
|
247
|
-
const sign = changePercent >= 0 ? "+" : "";
|
|
248
|
-
const passRateStr = `${sign}${changePercent}% pass rate`;
|
|
249
|
-
|
|
250
|
-
return `evolve(${proposal.skill_name}): ${passRateStr}`;
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
// ---------------------------------------------------------------------------
|
|
254
|
-
// Git/GH operations (PR creation)
|
|
255
|
-
// ---------------------------------------------------------------------------
|
|
256
|
-
|
|
257
|
-
/** Sanitize a string for use in a git branch name. */
|
|
258
|
-
function sanitizeForGitRef(name: string): string {
|
|
259
|
-
return name
|
|
260
|
-
.replace(/[^a-zA-Z0-9._-]/g, "-")
|
|
261
|
-
.replace(/\.{2,}/g, ".")
|
|
262
|
-
.replace(/^[.-]|[.-]$/g, "")
|
|
263
|
-
.replace(/-{2,}/g, "-");
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
/** Generate a branch name from the prefix and skill name. */
|
|
267
|
-
function makeBranchName(prefix: string, skillName: string): string {
|
|
268
|
-
const timestamp = Date.now();
|
|
269
|
-
const safeName = sanitizeForGitRef(skillName) || "untitled";
|
|
270
|
-
return `${prefix}/${safeName}-${timestamp}`;
|
|
271
|
-
}
|
|
272
|
-
|
|
273
|
-
/**
|
|
274
|
-
* Run a git/gh command via Bun.spawn. Returns stdout on success.
|
|
275
|
-
* Throws on non-zero exit code or if the command exceeds timeoutMs.
|
|
276
|
-
*/
|
|
277
|
-
async function runCommand(args: string[], cwd?: string, timeoutMs = 30_000): Promise<string> {
|
|
278
|
-
const proc = Bun.spawn(args, {
|
|
279
|
-
cwd,
|
|
280
|
-
stdout: "pipe",
|
|
281
|
-
stderr: "pipe",
|
|
282
|
-
});
|
|
283
|
-
|
|
284
|
-
let timedOut = false;
|
|
285
|
-
const timer = setTimeout(() => {
|
|
286
|
-
timedOut = true;
|
|
287
|
-
proc.kill();
|
|
288
|
-
}, timeoutMs);
|
|
289
|
-
|
|
290
|
-
try {
|
|
291
|
-
// Read stdout and stderr concurrently to avoid deadlock when both pipes fill.
|
|
292
|
-
const [stdout, stderr] = await Promise.all([
|
|
293
|
-
new Response(proc.stdout).text(),
|
|
294
|
-
new Response(proc.stderr).text(),
|
|
295
|
-
]);
|
|
296
|
-
const exitCode = await proc.exited;
|
|
297
|
-
|
|
298
|
-
if (timedOut) {
|
|
299
|
-
throw new Error(`Command timed out after ${timeoutMs}ms: ${args.join(" ")}`);
|
|
300
|
-
}
|
|
301
|
-
|
|
302
|
-
if (exitCode !== 0) {
|
|
303
|
-
throw new Error(`Command failed (exit ${exitCode}): ${args.join(" ")}\n${stderr}`);
|
|
304
|
-
}
|
|
305
|
-
|
|
306
|
-
return stdout.trim();
|
|
307
|
-
} finally {
|
|
308
|
-
clearTimeout(timer);
|
|
309
|
-
}
|
|
310
|
-
}
|
|
311
|
-
|
|
312
|
-
// ---------------------------------------------------------------------------
|
|
313
|
-
// Main deploy function
|
|
314
|
-
// ---------------------------------------------------------------------------
|
|
315
|
-
|
|
316
|
-
/** Deploy a validated evolution proposal to SKILL.md and optionally create a PR. */
|
|
317
|
-
export async function deployProposal(options: DeployOptions): Promise<DeployResult> {
|
|
318
|
-
const { proposal, validation, skillPath, createPr, branchPrefix = "selftune/evolve" } = options;
|
|
319
|
-
|
|
320
|
-
// Step 1: Read current SKILL.md
|
|
321
|
-
const currentContent = readSkillMd(skillPath);
|
|
322
|
-
|
|
323
|
-
// Step 2: Create backup (unique per deploy to avoid overwriting previous backups)
|
|
324
|
-
const backupTimestamp = new Date().toISOString().replace(/[:.]/g, "-");
|
|
325
|
-
const backupPath = `${skillPath}.${backupTimestamp}.bak`;
|
|
326
|
-
copyFileSync(skillPath, backupPath);
|
|
327
|
-
|
|
328
|
-
// Step 3: Replace description and write
|
|
329
|
-
const updatedContent = replaceDescription(currentContent, proposal.proposed_description);
|
|
330
|
-
writeFileSync(skillPath, updatedContent, "utf-8");
|
|
331
|
-
|
|
332
|
-
// Step 4: Build commit message
|
|
333
|
-
const commitMessage = buildCommitMessage(proposal, validation);
|
|
334
|
-
|
|
335
|
-
// Step 5: Optionally create branch and PR
|
|
336
|
-
let branchName: string | null = null;
|
|
337
|
-
|
|
338
|
-
if (createPr) {
|
|
339
|
-
branchName = makeBranchName(branchPrefix, proposal.skill_name);
|
|
340
|
-
|
|
341
|
-
try {
|
|
342
|
-
// Create and checkout branch
|
|
343
|
-
await runCommand(["git", "checkout", "-b", branchName]);
|
|
344
|
-
|
|
345
|
-
// Stage the SKILL.md
|
|
346
|
-
await runCommand(["git", "add", skillPath]);
|
|
347
|
-
|
|
348
|
-
// Commit
|
|
349
|
-
await runCommand(["git", "commit", "-m", commitMessage]);
|
|
350
|
-
|
|
351
|
-
// Push
|
|
352
|
-
await runCommand(["git", "push", "-u", "origin", branchName]);
|
|
353
|
-
|
|
354
|
-
// Create PR
|
|
355
|
-
await runCommand([
|
|
356
|
-
"gh",
|
|
357
|
-
"pr",
|
|
358
|
-
"create",
|
|
359
|
-
"--title",
|
|
360
|
-
commitMessage,
|
|
361
|
-
"--body",
|
|
362
|
-
`Proposal: ${proposal.proposal_id}\nRationale: ${proposal.rationale}\nNet change: ${validation.net_change > 0 ? "+" : ""}${Math.round(validation.net_change * 100)}%`,
|
|
363
|
-
]);
|
|
364
|
-
} catch (err) {
|
|
365
|
-
// Git/GH operations are best-effort in test environments.
|
|
366
|
-
// The branch name is still returned for tracking.
|
|
367
|
-
console.error(`[WARN] Git/GH operation failed: ${err instanceof Error ? err.message : err}`);
|
|
368
|
-
}
|
|
369
|
-
}
|
|
370
|
-
|
|
371
|
-
return {
|
|
372
|
-
skillMdUpdated: true,
|
|
373
|
-
backupPath,
|
|
374
|
-
branchName,
|
|
375
|
-
commitMessage,
|
|
376
|
-
};
|
|
377
|
-
}
|
|
378
|
-
|
|
379
|
-
// ---------------------------------------------------------------------------
|
|
380
|
-
// CLI entry guard
|
|
381
|
-
// ---------------------------------------------------------------------------
|
|
382
|
-
|
|
383
|
-
if (import.meta.main) {
|
|
384
|
-
console.log("deploy-proposal: use deployProposal() programmatically or via evolve CLI");
|
|
385
|
-
}
|
|
@@ -25,7 +25,8 @@ import type {
|
|
|
25
25
|
QueryLogRecord,
|
|
26
26
|
SkillUsageRecord,
|
|
27
27
|
} from "../types.js";
|
|
28
|
-
|
|
28
|
+
import type { EffortLevel, SubagentCallOptions } from "../utils/llm-call.js";
|
|
29
|
+
import { callViaSubagent } from "../utils/llm-call.js";
|
|
29
30
|
import { appendAuditEntry } from "./audit.js";
|
|
30
31
|
import { checkConstitutionSizeOnly } from "./constitutional.js";
|
|
31
32
|
import { parseSkillSections, replaceBody, replaceSection } from "./deploy-proposal.js";
|
|
@@ -57,6 +58,9 @@ export interface EvolveBodyOptions {
|
|
|
57
58
|
fewShotExamples?: string[];
|
|
58
59
|
gradingResults?: GradingResult[];
|
|
59
60
|
validationModel?: string;
|
|
61
|
+
teacherEffort?: EffortLevel;
|
|
62
|
+
/** Run evolution-reviewer subagent as Gate 4 before deployment. */
|
|
63
|
+
useReviewer?: boolean;
|
|
60
64
|
}
|
|
61
65
|
|
|
62
66
|
export interface EvolveBodyResult {
|
|
@@ -89,6 +93,7 @@ export interface EvolveBodyDeps {
|
|
|
89
93
|
readEffectiveSkillUsageRecords?: () => SkillUsageRecord[];
|
|
90
94
|
readFileSync?: typeof readFileSync;
|
|
91
95
|
writeFileSync?: (path: string, data: string, encoding: string) => void;
|
|
96
|
+
callViaSubagent?: (options: SubagentCallOptions) => Promise<string>;
|
|
92
97
|
}
|
|
93
98
|
|
|
94
99
|
// ---------------------------------------------------------------------------
|
|
@@ -110,6 +115,19 @@ function createAuditEntry(
|
|
|
110
115
|
};
|
|
111
116
|
}
|
|
112
117
|
|
|
118
|
+
// ---------------------------------------------------------------------------
|
|
119
|
+
// Pipeline defaults — enforced even when the calling agent omits flags
|
|
120
|
+
// ---------------------------------------------------------------------------
|
|
121
|
+
|
|
122
|
+
/** Default teacher model: Opus 4.6 for highest-quality proposals. */
|
|
123
|
+
const DEFAULT_TEACHER_MODEL = "opus";
|
|
124
|
+
|
|
125
|
+
/** Default student model: Haiku for cheap, fast validation gates. */
|
|
126
|
+
const DEFAULT_STUDENT_MODEL = "haiku";
|
|
127
|
+
|
|
128
|
+
/** Default teacher effort: extended thinking for multi-constraint reasoning. */
|
|
129
|
+
const DEFAULT_TEACHER_EFFORT: EffortLevel = "high";
|
|
130
|
+
|
|
113
131
|
// ---------------------------------------------------------------------------
|
|
114
132
|
// Main orchestrator
|
|
115
133
|
// ---------------------------------------------------------------------------
|
|
@@ -124,8 +142,6 @@ export async function evolveBody(
|
|
|
124
142
|
target,
|
|
125
143
|
teacherAgent,
|
|
126
144
|
studentAgent,
|
|
127
|
-
teacherModel,
|
|
128
|
-
studentModel,
|
|
129
145
|
evalSetPath,
|
|
130
146
|
dryRun,
|
|
131
147
|
maxIterations,
|
|
@@ -133,6 +149,11 @@ export async function evolveBody(
|
|
|
133
149
|
fewShotExamples,
|
|
134
150
|
} = options;
|
|
135
151
|
|
|
152
|
+
// Apply pipeline defaults for models/effort when not explicitly provided
|
|
153
|
+
const teacherModel = options.teacherModel ?? DEFAULT_TEACHER_MODEL;
|
|
154
|
+
const studentModel = options.studentModel ?? DEFAULT_STUDENT_MODEL;
|
|
155
|
+
const teacherEffort = options.teacherEffort ?? DEFAULT_TEACHER_EFFORT;
|
|
156
|
+
|
|
136
157
|
// Resolve injectable dependencies
|
|
137
158
|
const _extractFailurePatterns = _deps.extractFailurePatterns ?? extractFailurePatterns;
|
|
138
159
|
const _generateBodyProposal = _deps.generateBodyProposal ?? generateBodyProposal;
|
|
@@ -151,6 +172,7 @@ export async function evolveBody(
|
|
|
151
172
|
});
|
|
152
173
|
const _readFileSync = _deps.readFileSync ?? readFileSync;
|
|
153
174
|
const _writeFileSync = _deps.writeFileSync ?? (await import("node:fs")).writeFileSync;
|
|
175
|
+
const _callViaSubagent = _deps.callViaSubagent ?? callViaSubagent;
|
|
154
176
|
|
|
155
177
|
const auditEntries: EvolutionAuditEntry[] = [];
|
|
156
178
|
|
|
@@ -306,6 +328,7 @@ export async function evolveBody(
|
|
|
306
328
|
skillPath,
|
|
307
329
|
teacherAgent,
|
|
308
330
|
teacherModel,
|
|
331
|
+
teacherEffort,
|
|
309
332
|
);
|
|
310
333
|
} else {
|
|
311
334
|
proposal = await _generateBodyProposal(
|
|
@@ -318,6 +341,7 @@ export async function evolveBody(
|
|
|
318
341
|
teacherModel,
|
|
319
342
|
fewShotExamples,
|
|
320
343
|
executionContext,
|
|
344
|
+
teacherEffort,
|
|
321
345
|
);
|
|
322
346
|
}
|
|
323
347
|
} else if (lastProposal && lastValidation) {
|
|
@@ -327,6 +351,7 @@ export async function evolveBody(
|
|
|
327
351
|
lastValidation,
|
|
328
352
|
teacherAgent,
|
|
329
353
|
teacherModel,
|
|
354
|
+
options.teacherEffort,
|
|
330
355
|
);
|
|
331
356
|
} else {
|
|
332
357
|
break;
|
|
@@ -496,7 +521,63 @@ export async function evolveBody(
|
|
|
496
521
|
}
|
|
497
522
|
}
|
|
498
523
|
|
|
499
|
-
// Step 5:
|
|
524
|
+
// Step 5: Optional evolution-reviewer gate (Gate 4)
|
|
525
|
+
if (options.useReviewer && lastProposal && lastValidation?.improved) {
|
|
526
|
+
try {
|
|
527
|
+
const reviewPrompt = [
|
|
528
|
+
`Review this ${target} evolution proposal for the "${skillName}" skill.`,
|
|
529
|
+
``,
|
|
530
|
+
`Proposal ID: ${lastProposal.proposal_id}`,
|
|
531
|
+
`Skill path: ${skillPath}`,
|
|
532
|
+
`Target: ${target}`,
|
|
533
|
+
`Confidence: ${lastProposal.confidence}`,
|
|
534
|
+
`Validation: ${lastValidation.gates_passed}/${lastValidation.gates_total} gates passed`,
|
|
535
|
+
`Regressions: ${lastValidation.regressions.length > 0 ? lastValidation.regressions.join(", ") : "none"}`,
|
|
536
|
+
``,
|
|
537
|
+
`Original content:`,
|
|
538
|
+
lastProposal.original_body,
|
|
539
|
+
``,
|
|
540
|
+
`Proposed content:`,
|
|
541
|
+
lastProposal.proposed_body,
|
|
542
|
+
``,
|
|
543
|
+
`Rationale: ${lastProposal.rationale}`,
|
|
544
|
+
].join("\n");
|
|
545
|
+
|
|
546
|
+
const reviewOutput = await _callViaSubagent({
|
|
547
|
+
agentName: "evolution-reviewer",
|
|
548
|
+
prompt: reviewPrompt,
|
|
549
|
+
maxTurns: 8,
|
|
550
|
+
allowedTools: ["Read", "Grep", "Glob", "Bash"],
|
|
551
|
+
});
|
|
552
|
+
|
|
553
|
+
const isRejected = /\bREJECT\b/.test(reviewOutput) && !/\bAPPROVE\b/.test(reviewOutput);
|
|
554
|
+
recordAudit(
|
|
555
|
+
lastProposal.proposal_id,
|
|
556
|
+
isRejected ? "rejected" : "validated",
|
|
557
|
+
`Evolution reviewer: ${isRejected ? "REJECTED" : "APPROVED"}`,
|
|
558
|
+
);
|
|
559
|
+
|
|
560
|
+
if (isRejected) {
|
|
561
|
+
return {
|
|
562
|
+
proposal: lastProposal,
|
|
563
|
+
validation: lastValidation,
|
|
564
|
+
deployed: false,
|
|
565
|
+
auditEntries,
|
|
566
|
+
reason: `Evolution reviewer rejected proposal: ${reviewOutput.slice(0, 500)}`,
|
|
567
|
+
};
|
|
568
|
+
}
|
|
569
|
+
} catch (reviewError) {
|
|
570
|
+
// Fail-open: if reviewer crashes, log it and continue to deploy
|
|
571
|
+
const msg = reviewError instanceof Error ? reviewError.message : String(reviewError);
|
|
572
|
+
recordAudit(
|
|
573
|
+
lastProposal.proposal_id,
|
|
574
|
+
"validated",
|
|
575
|
+
`Evolution reviewer failed (fail-open): ${msg}`,
|
|
576
|
+
);
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
// Step 6: Deploy or dry-run
|
|
500
581
|
if (dryRun) {
|
|
501
582
|
return {
|
|
502
583
|
proposal: lastProposal,
|
|
@@ -594,6 +675,8 @@ export async function cliMain(): Promise<void> {
|
|
|
594
675
|
"task-description": { type: "string" },
|
|
595
676
|
"few-shot": { type: "string" },
|
|
596
677
|
"validation-model": { type: "string" },
|
|
678
|
+
"teacher-effort": { type: "string", default: "high" },
|
|
679
|
+
review: { type: "boolean", default: false },
|
|
597
680
|
help: { type: "boolean", default: false },
|
|
598
681
|
},
|
|
599
682
|
strict: true,
|
|
@@ -611,8 +694,8 @@ Options:
|
|
|
611
694
|
--target Evolution target: body, routing (default: body)
|
|
612
695
|
--teacher-agent Teacher agent CLI (claude, codex, etc.)
|
|
613
696
|
--student-agent Student agent CLI for validation
|
|
614
|
-
--teacher-model Model flag for teacher agent
|
|
615
|
-
--student-model Model flag for student agent
|
|
697
|
+
--teacher-model Model flag for teacher agent (default: opus)
|
|
698
|
+
--student-model Model flag for student agent (default: haiku)
|
|
616
699
|
--eval-set Path to eval set JSON
|
|
617
700
|
--dry-run Validate without deploying
|
|
618
701
|
--max-iterations Max refinement iterations (default: 3)
|
|
@@ -620,6 +703,8 @@ Options:
|
|
|
620
703
|
--task-description Optional task description context
|
|
621
704
|
--few-shot Comma-separated paths to example skill files
|
|
622
705
|
--validation-model Model for trigger-check validation calls (overrides --student-model for validation)
|
|
706
|
+
--teacher-effort Effort level for teacher LLM: low, medium, high, max (default: high)
|
|
707
|
+
--review Run evolution-reviewer subagent before deployment (Gate 4)
|
|
623
708
|
--help Show this help message`);
|
|
624
709
|
process.exit(0);
|
|
625
710
|
}
|
|
@@ -669,6 +754,8 @@ Options:
|
|
|
669
754
|
fewShotExamples,
|
|
670
755
|
gradingResults,
|
|
671
756
|
validationModel: values["validation-model"],
|
|
757
|
+
teacherEffort: (values["teacher-effort"] as EffortLevel) ?? "high",
|
|
758
|
+
useReviewer: values.review ?? false,
|
|
672
759
|
});
|
|
673
760
|
|
|
674
761
|
console.log(JSON.stringify(result, null, 2));
|
|
@@ -36,8 +36,7 @@ import type {
|
|
|
36
36
|
SessionTelemetryRecord,
|
|
37
37
|
SkillUsageRecord,
|
|
38
38
|
} from "../types.js";
|
|
39
|
-
import { parseFrontmatter,
|
|
40
|
-
|
|
39
|
+
import { parseFrontmatter, replaceDescription } from "../utils/frontmatter.js";
|
|
41
40
|
import { createEvolveTUI } from "../utils/tui.js";
|
|
42
41
|
import { appendAuditEntry } from "./audit.js";
|
|
43
42
|
import { checkConstitution } from "./constitutional.js";
|
|
@@ -959,11 +958,8 @@ export async function evolve(
|
|
|
959
958
|
copyFileSync(skillPath, backupPath);
|
|
960
959
|
tui.done(`Backup created at ${backupPath}`);
|
|
961
960
|
|
|
962
|
-
// Replace the frontmatter
|
|
963
|
-
const updatedContent =
|
|
964
|
-
rawContent,
|
|
965
|
-
lastProposal.proposed_description,
|
|
966
|
-
);
|
|
961
|
+
// Replace the description (handles both frontmatter and plain markdown)
|
|
962
|
+
const updatedContent = replaceDescription(rawContent, lastProposal.proposed_description);
|
|
967
963
|
writeFileSync(skillPath, updatedContent, "utf-8");
|
|
968
964
|
tui.done(`Deployed updated description to ${skillPath}`);
|
|
969
965
|
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
9
|
import type { BodyEvolutionProposal, EvolutionTarget, FailurePattern } from "../types.js";
|
|
10
|
-
import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
|
|
10
|
+
import { type EffortLevel, callLlm, stripMarkdownFences } from "../utils/llm-call.js";
|
|
11
11
|
|
|
12
12
|
// ---------------------------------------------------------------------------
|
|
13
13
|
// System prompt
|
|
@@ -160,6 +160,7 @@ export async function generateBodyProposal(
|
|
|
160
160
|
modelFlag?: string,
|
|
161
161
|
fewShotExamples?: string[],
|
|
162
162
|
executionContext?: ExecutionContext,
|
|
163
|
+
effort?: EffortLevel,
|
|
163
164
|
): Promise<BodyEvolutionProposal> {
|
|
164
165
|
const prompt = buildBodyGenerationPrompt(
|
|
165
166
|
currentContent,
|
|
@@ -169,7 +170,7 @@ export async function generateBodyProposal(
|
|
|
169
170
|
fewShotExamples,
|
|
170
171
|
executionContext,
|
|
171
172
|
);
|
|
172
|
-
const rawResponse = await callLlm(BODY_GENERATOR_SYSTEM, prompt, agent, modelFlag);
|
|
173
|
+
const rawResponse = await callLlm(BODY_GENERATOR_SYSTEM, prompt, agent, modelFlag, effort);
|
|
173
174
|
const { proposed_body, rationale, confidence } = parseBodyProposalResponse(rawResponse);
|
|
174
175
|
|
|
175
176
|
return {
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
import type { BodyEvolutionProposal, EvolutionTarget, FailurePattern } from "../types.js";
|
|
9
|
-
import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
|
|
9
|
+
import { type EffortLevel, callLlm, stripMarkdownFences } from "../utils/llm-call.js";
|
|
10
10
|
|
|
11
11
|
// ---------------------------------------------------------------------------
|
|
12
12
|
// System prompt
|
|
@@ -139,6 +139,7 @@ export async function generateRoutingProposal(
|
|
|
139
139
|
skillPath: string,
|
|
140
140
|
agent: string,
|
|
141
141
|
modelFlag?: string,
|
|
142
|
+
effort?: EffortLevel,
|
|
142
143
|
): Promise<BodyEvolutionProposal> {
|
|
143
144
|
const prompt = buildRoutingProposalPrompt(
|
|
144
145
|
currentRouting,
|
|
@@ -147,7 +148,7 @@ export async function generateRoutingProposal(
|
|
|
147
148
|
missedQueries,
|
|
148
149
|
skillName,
|
|
149
150
|
);
|
|
150
|
-
const rawResponse = await callLlm(ROUTING_PROPOSER_SYSTEM, prompt, agent, modelFlag);
|
|
151
|
+
const rawResponse = await callLlm(ROUTING_PROPOSER_SYSTEM, prompt, agent, modelFlag, effort);
|
|
151
152
|
const { proposed_routing, rationale, confidence } = parseRoutingProposalResponse(rawResponse);
|
|
152
153
|
|
|
153
154
|
return {
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
import type { BodyEvolutionProposal, BodyValidationResult } from "../types.js";
|
|
9
|
-
import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
|
|
9
|
+
import { type EffortLevel, callLlm, stripMarkdownFences } from "../utils/llm-call.js";
|
|
10
10
|
|
|
11
11
|
// ---------------------------------------------------------------------------
|
|
12
12
|
// System prompt
|
|
@@ -118,6 +118,7 @@ export async function refineBodyProposal(
|
|
|
118
118
|
validationResult: BodyValidationResult,
|
|
119
119
|
agent: string,
|
|
120
120
|
modelFlag?: string,
|
|
121
|
+
effort?: EffortLevel,
|
|
121
122
|
): Promise<BodyEvolutionProposal> {
|
|
122
123
|
const prompt = buildRefinementPrompt(
|
|
123
124
|
proposal.proposed_body,
|
|
@@ -126,7 +127,7 @@ export async function refineBodyProposal(
|
|
|
126
127
|
validationResult.regressions,
|
|
127
128
|
);
|
|
128
129
|
|
|
129
|
-
const rawResponse = await callLlm(BODY_REFINER_SYSTEM, prompt, agent, modelFlag);
|
|
130
|
+
const rawResponse = await callLlm(BODY_REFINER_SYSTEM, prompt, agent, modelFlag, effort);
|
|
130
131
|
const { refined_body, changes_made, confidence } = parseRefinementResponse(rawResponse);
|
|
131
132
|
|
|
132
133
|
return {
|
|
@@ -13,8 +13,8 @@ import { parseArgs } from "node:util";
|
|
|
13
13
|
|
|
14
14
|
import { updateContextAfterRollback } from "../memory/writer.js";
|
|
15
15
|
import type { EvolutionAuditEntry } from "../types.js";
|
|
16
|
+
import { replaceDescription } from "../utils/frontmatter.js";
|
|
16
17
|
import { appendAuditEntry, getLastDeployedProposal, readAuditTrail } from "./audit.js";
|
|
17
|
-
import { replaceDescription } from "./deploy-proposal.js";
|
|
18
18
|
|
|
19
19
|
// ---------------------------------------------------------------------------
|
|
20
20
|
// Types
|
package/cli/selftune/export.ts
CHANGED
|
@@ -884,6 +884,14 @@ Options:
|
|
|
884
884
|
}
|
|
885
885
|
writeFileSync(outputPath, JSON.stringify(result, null, 2), "utf-8");
|
|
886
886
|
|
|
887
|
+
// Persist to SQLite for upload staging (fail-open)
|
|
888
|
+
try {
|
|
889
|
+
const { writeGradingResultToDb } = await import("../localdb/direct-write.js");
|
|
890
|
+
writeGradingResultToDb(result);
|
|
891
|
+
} catch {
|
|
892
|
+
// fail-open: grading file is already written above
|
|
893
|
+
}
|
|
894
|
+
|
|
887
895
|
printSummary(result);
|
|
888
896
|
console.log(`\nWrote ${outputPath}`);
|
|
889
897
|
}
|
|
@@ -16,8 +16,8 @@
|
|
|
16
16
|
|
|
17
17
|
import { existsSync, readFileSync } from "node:fs";
|
|
18
18
|
import { basename, dirname, join } from "node:path";
|
|
19
|
-
import { EVOLUTION_AUDIT_LOG, SELFTUNE_CONFIG_DIR } from "../constants.js";
|
|
20
19
|
|
|
20
|
+
import { EVOLUTION_AUDIT_LOG, SELFTUNE_CONFIG_DIR } from "../constants.js";
|
|
21
21
|
import type { PreToolUsePayload } from "../types.js";
|
|
22
22
|
import { readJsonl } from "../utils/jsonl.js";
|
|
23
23
|
|