selftune 0.2.8 → 0.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -35
- package/apps/local-dashboard/dist/assets/index-BZVLv70T.js +16 -0
- package/apps/local-dashboard/dist/assets/{index-CRtLkBTi.css → index-Bs3Y4ixf.css} +1 -1
- package/apps/local-dashboard/dist/assets/{vendor-react-BQH_6WrG.js → vendor-react-BXP54cYo.js} +4 -4
- package/apps/local-dashboard/dist/assets/{vendor-table-dK1QMLq9.js → vendor-table-DTF_SXoy.js} +1 -1
- package/apps/local-dashboard/dist/assets/{vendor-ui-CO2mrx6e.js → vendor-ui-CWU0d1wd.js} +66 -66
- package/apps/local-dashboard/dist/index.html +15 -15
- package/bin/selftune.cjs +1 -1
- package/cli/selftune/activation-rules.ts +37 -18
- package/cli/selftune/agent-guidance.ts +16 -16
- package/cli/selftune/alpha-identity.ts +1 -2
- package/cli/selftune/alpha-upload/build-payloads.ts +18 -2
- package/cli/selftune/alpha-upload/flush.ts +2 -2
- package/cli/selftune/alpha-upload/stage-canonical.ts +106 -3
- package/cli/selftune/auth/device-code.ts +32 -0
- package/cli/selftune/auto-update.ts +12 -0
- package/cli/selftune/badge/badge.ts +1 -0
- package/cli/selftune/canonical-export.ts +5 -0
- package/cli/selftune/claude-agents.ts +154 -0
- package/cli/selftune/contribute/bundle.ts +2 -0
- package/cli/selftune/contribute/contribute.ts +1 -0
- package/cli/selftune/cron/setup.ts +2 -2
- package/cli/selftune/dashboard-contract.ts +1 -1
- package/cli/selftune/dashboard-server.ts +11 -52
- package/cli/selftune/eval/hooks-to-evals.ts +13 -6
- package/cli/selftune/eval/import-skillsbench.ts +1 -0
- package/cli/selftune/eval/synthetic-evals.ts +2 -3
- package/cli/selftune/eval/unit-test.ts +1 -0
- package/cli/selftune/evolution/deploy-proposal.ts +1 -0
- package/cli/selftune/evolution/evolve-body.ts +93 -6
- package/cli/selftune/evolution/evolve.ts +0 -1
- package/cli/selftune/evolution/propose-body.ts +3 -2
- package/cli/selftune/evolution/propose-routing.ts +3 -2
- package/cli/selftune/evolution/refine-body.ts +3 -2
- package/cli/selftune/export.ts +1 -0
- package/cli/selftune/grading/auto-grade.ts +1 -0
- package/cli/selftune/grading/grade-session.ts +9 -0
- package/cli/selftune/hooks/auto-activate.ts +6 -0
- package/cli/selftune/hooks/evolution-guard.ts +12 -15
- package/cli/selftune/hooks/prompt-log.ts +1 -0
- package/cli/selftune/hooks/session-stop.ts +34 -40
- package/cli/selftune/hooks/skill-change-guard.ts +1 -0
- package/cli/selftune/hooks/skill-eval.ts +1 -1
- package/cli/selftune/index.ts +23 -14
- package/cli/selftune/ingestors/claude-replay.ts +1 -0
- package/cli/selftune/ingestors/codex-rollout.ts +1 -0
- package/cli/selftune/ingestors/codex-wrapper.ts +1 -0
- package/cli/selftune/ingestors/openclaw-ingest.ts +1 -0
- package/cli/selftune/ingestors/opencode-ingest.ts +1 -0
- package/cli/selftune/init.ts +197 -96
- package/cli/selftune/localdb/db.ts +1 -0
- package/cli/selftune/localdb/direct-write.ts +93 -12
- package/cli/selftune/localdb/materialize.ts +2 -0
- package/cli/selftune/localdb/queries.ts +210 -0
- package/cli/selftune/localdb/schema.ts +72 -1
- package/cli/selftune/monitoring/watch.ts +1 -0
- package/cli/selftune/normalization.ts +4 -0
- package/cli/selftune/observability.ts +14 -7
- package/cli/selftune/orchestrate.ts +15 -37
- package/cli/selftune/repair/skill-usage.ts +7 -3
- package/cli/selftune/routes/orchestrate-runs.ts +1 -0
- package/cli/selftune/routes/overview.ts +1 -0
- package/cli/selftune/routes/skill-report.ts +1 -0
- package/cli/selftune/sync.ts +31 -1
- package/cli/selftune/types.ts +2 -2
- package/cli/selftune/uninstall.ts +412 -0
- package/cli/selftune/utils/canonical-log.ts +2 -0
- package/cli/selftune/utils/jsonl.ts +1 -0
- package/cli/selftune/utils/llm-call.ts +131 -3
- package/cli/selftune/utils/skill-log.ts +1 -0
- package/cli/selftune/utils/transcript.ts +1 -0
- package/cli/selftune/utils/trigger-check.ts +1 -1
- package/cli/selftune/workflows/skill-md-writer.ts +5 -5
- package/cli/selftune/workflows/workflows.ts +1 -0
- package/package.json +38 -33
- package/packages/telemetry-contract/fixtures/golden.test.ts +1 -0
- package/packages/telemetry-contract/package.json +3 -3
- package/packages/telemetry-contract/src/index.ts +0 -1
- package/packages/telemetry-contract/src/schemas.ts +6 -24
- package/packages/telemetry-contract/tests/compatibility.test.ts +1 -0
- package/packages/ui/README.md +35 -34
- package/packages/ui/package.json +3 -3
- package/packages/ui/src/components/ActivityTimeline.tsx +49 -42
- package/packages/ui/src/components/EvidenceViewer.tsx +306 -182
- package/packages/ui/src/components/EvolutionTimeline.tsx +83 -72
- package/packages/ui/src/components/InfoTip.tsx +4 -3
- package/packages/ui/src/components/OrchestrateRunsPanel.tsx +60 -53
- package/packages/ui/src/components/section-cards.tsx +19 -24
- package/packages/ui/src/components/skill-health-grid.tsx +213 -193
- package/packages/ui/src/lib/constants.tsx +1 -0
- package/packages/ui/src/primitives/badge.tsx +12 -15
- package/packages/ui/src/primitives/button.tsx +7 -7
- package/packages/ui/src/primitives/card.tsx +15 -26
- package/packages/ui/src/primitives/checkbox.tsx +7 -8
- package/packages/ui/src/primitives/collapsible.tsx +5 -5
- package/packages/ui/src/primitives/dropdown-menu.tsx +45 -55
- package/packages/ui/src/primitives/label.tsx +6 -6
- package/packages/ui/src/primitives/select.tsx +28 -37
- package/packages/ui/src/primitives/table.tsx +17 -44
- package/packages/ui/src/primitives/tabs.tsx +14 -21
- package/packages/ui/src/primitives/tooltip.tsx +10 -22
- package/skill/SKILL.md +72 -59
- package/skill/Workflows/AlphaUpload.md +4 -4
- package/skill/Workflows/AutoActivation.md +11 -6
- package/skill/Workflows/Badge.md +22 -16
- package/skill/Workflows/Baseline.md +34 -36
- package/skill/Workflows/Composability.md +16 -11
- package/skill/Workflows/Contribute.md +26 -21
- package/skill/Workflows/Cron.md +23 -22
- package/skill/Workflows/Dashboard.md +40 -40
- package/skill/Workflows/Doctor.md +40 -34
- package/skill/Workflows/Evals.md +48 -47
- package/skill/Workflows/EvolutionMemory.md +31 -21
- package/skill/Workflows/Evolve.md +84 -82
- package/skill/Workflows/EvolveBody.md +58 -47
- package/skill/Workflows/Grade.md +16 -13
- package/skill/Workflows/ImportSkillsBench.md +9 -6
- package/skill/Workflows/Ingest.md +36 -21
- package/skill/Workflows/Initialize.md +138 -97
- package/skill/Workflows/Orchestrate.md +22 -16
- package/skill/Workflows/Replay.md +12 -7
- package/skill/Workflows/Rollback.md +13 -6
- package/skill/Workflows/Schedule.md +6 -6
- package/skill/Workflows/Sync.md +18 -11
- package/skill/Workflows/UnitTest.md +28 -17
- package/skill/Workflows/Watch.md +28 -21
- package/skill/agents/diagnosis-analyst.md +11 -0
- package/skill/agents/evolution-reviewer.md +15 -1
- package/skill/agents/integration-guide.md +10 -0
- package/skill/agents/pattern-analyst.md +12 -1
- package/skill/references/grading-methodology.md +23 -24
- package/skill/references/interactive-config.md +7 -7
- package/skill/references/invocation-taxonomy.md +22 -20
- package/skill/references/logs.md +20 -6
- package/skill/references/setup-patterns.md +4 -2
- package/.claude/agents/diagnosis-analyst.md +0 -156
- package/.claude/agents/evolution-reviewer.md +0 -180
- package/.claude/agents/integration-guide.md +0 -212
- package/.claude/agents/pattern-analyst.md +0 -160
- package/apps/local-dashboard/dist/assets/index-Bk9vSHHd.js +0 -15
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import { existsSync, mkdirSync, readFileSync, readdirSync, rmSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { homedir } from "node:os";
|
|
3
|
+
import { dirname, join, resolve } from "node:path";
|
|
4
|
+
|
|
5
|
+
const MANIFEST_FILENAME = ".selftune-manifest.json";
|
|
6
|
+
|
|
7
|
+
const LEGACY_SELFTUNE_AGENT_FILES = [
|
|
8
|
+
"diagnosis-analyst.md",
|
|
9
|
+
"evolution-reviewer.md",
|
|
10
|
+
"integration-guide.md",
|
|
11
|
+
"pattern-analyst.md",
|
|
12
|
+
] as const;
|
|
13
|
+
|
|
14
|
+
const BUNDLED_AGENT_DIR = resolve(dirname(import.meta.path), "..", "..", "skill", "agents");
|
|
15
|
+
|
|
16
|
+
interface AgentManifest {
|
|
17
|
+
version: 1;
|
|
18
|
+
files: string[];
|
|
19
|
+
synced_at: string;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function readManifest(path: string): AgentManifest | null {
|
|
23
|
+
try {
|
|
24
|
+
if (!existsSync(path)) return null;
|
|
25
|
+
const parsed = JSON.parse(readFileSync(path, "utf-8")) as Partial<AgentManifest>;
|
|
26
|
+
if (!Array.isArray(parsed.files)) return null;
|
|
27
|
+
return {
|
|
28
|
+
version: 1,
|
|
29
|
+
files: parsed.files.filter((name): name is string => typeof name === "string"),
|
|
30
|
+
synced_at: typeof parsed.synced_at === "string" ? parsed.synced_at : "",
|
|
31
|
+
};
|
|
32
|
+
} catch {
|
|
33
|
+
return null;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function writeManifest(path: string, files: string[]): void {
|
|
38
|
+
const manifest: AgentManifest = {
|
|
39
|
+
version: 1,
|
|
40
|
+
files: [...files].sort(),
|
|
41
|
+
synced_at: new Date().toISOString(),
|
|
42
|
+
};
|
|
43
|
+
writeFileSync(path, JSON.stringify(manifest, null, 2), "utf-8");
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function readTextIfExists(path: string): string | null {
|
|
47
|
+
try {
|
|
48
|
+
if (!existsSync(path)) return null;
|
|
49
|
+
return readFileSync(path, "utf-8");
|
|
50
|
+
} catch {
|
|
51
|
+
return null;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export function getClaudeAgentsDir(homeDir = homedir()): string {
|
|
56
|
+
return join(homeDir, ".claude", "agents");
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export function getClaudeAgentManifestPath(homeDir = homedir()): string {
|
|
60
|
+
return join(getClaudeAgentsDir(homeDir), MANIFEST_FILENAME);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export function listBundledAgentFiles(sourceDir = BUNDLED_AGENT_DIR): string[] {
|
|
64
|
+
try {
|
|
65
|
+
if (!existsSync(sourceDir)) return [];
|
|
66
|
+
return readdirSync(sourceDir)
|
|
67
|
+
.filter((name) => name.endsWith(".md"))
|
|
68
|
+
.sort();
|
|
69
|
+
} catch {
|
|
70
|
+
return [];
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export function installAgentFiles(options?: {
|
|
75
|
+
homeDir?: string;
|
|
76
|
+
force?: boolean;
|
|
77
|
+
sourceDir?: string;
|
|
78
|
+
}): string[] {
|
|
79
|
+
const homeDir = options?.homeDir ?? homedir();
|
|
80
|
+
const targetDir = getClaudeAgentsDir(homeDir);
|
|
81
|
+
const manifestPath = getClaudeAgentManifestPath(homeDir);
|
|
82
|
+
const sourceDir = options?.sourceDir ?? BUNDLED_AGENT_DIR;
|
|
83
|
+
const sourceFiles = listBundledAgentFiles(sourceDir);
|
|
84
|
+
if (sourceFiles.length === 0) return [];
|
|
85
|
+
|
|
86
|
+
mkdirSync(targetDir, { recursive: true });
|
|
87
|
+
|
|
88
|
+
const manifest = readManifest(manifestPath);
|
|
89
|
+
const managedFiles = new Set<string>([
|
|
90
|
+
...LEGACY_SELFTUNE_AGENT_FILES,
|
|
91
|
+
...(manifest?.files ?? []),
|
|
92
|
+
]);
|
|
93
|
+
const sourceSet = new Set(sourceFiles);
|
|
94
|
+
const changed = new Set<string>();
|
|
95
|
+
|
|
96
|
+
for (const staleFile of managedFiles) {
|
|
97
|
+
if (sourceSet.has(staleFile)) continue;
|
|
98
|
+
const stalePath = join(targetDir, staleFile);
|
|
99
|
+
if (existsSync(stalePath)) {
|
|
100
|
+
rmSync(stalePath, { force: true });
|
|
101
|
+
changed.add(staleFile);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
for (const fileName of sourceFiles) {
|
|
106
|
+
const sourcePath = join(sourceDir, fileName);
|
|
107
|
+
const targetPath = join(targetDir, fileName);
|
|
108
|
+
const sourceContent = readTextIfExists(sourcePath);
|
|
109
|
+
if (sourceContent === null) continue;
|
|
110
|
+
const existingContent = readTextIfExists(targetPath);
|
|
111
|
+
|
|
112
|
+
if (options?.force || existingContent !== sourceContent) {
|
|
113
|
+
writeFileSync(targetPath, sourceContent, "utf-8");
|
|
114
|
+
changed.add(fileName);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
writeManifest(manifestPath, sourceFiles);
|
|
119
|
+
return [...changed].sort();
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
export function removeInstalledAgentFiles(options?: { homeDir?: string; dryRun?: boolean }): {
|
|
123
|
+
removed: number;
|
|
124
|
+
files: string[];
|
|
125
|
+
} {
|
|
126
|
+
const homeDir = options?.homeDir ?? homedir();
|
|
127
|
+
const targetDir = getClaudeAgentsDir(homeDir);
|
|
128
|
+
const manifestPath = getClaudeAgentManifestPath(homeDir);
|
|
129
|
+
const manifest = readManifest(manifestPath);
|
|
130
|
+
const managedFiles = new Set<string>([
|
|
131
|
+
...LEGACY_SELFTUNE_AGENT_FILES,
|
|
132
|
+
...listBundledAgentFiles(),
|
|
133
|
+
...(manifest?.files ?? []),
|
|
134
|
+
]);
|
|
135
|
+
const removed: string[] = [];
|
|
136
|
+
|
|
137
|
+
for (const fileName of managedFiles) {
|
|
138
|
+
const targetPath = join(targetDir, fileName);
|
|
139
|
+
if (!existsSync(targetPath)) continue;
|
|
140
|
+
if (!options?.dryRun) {
|
|
141
|
+
rmSync(targetPath, { force: true });
|
|
142
|
+
}
|
|
143
|
+
removed.push(targetPath);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
if (existsSync(manifestPath)) {
|
|
147
|
+
if (!options?.dryRun) {
|
|
148
|
+
rmSync(manifestPath, { force: true });
|
|
149
|
+
}
|
|
150
|
+
removed.push(manifestPath);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
return { removed: removed.length, files: removed };
|
|
154
|
+
}
|
|
@@ -8,6 +8,7 @@ import { randomUUID } from "node:crypto";
|
|
|
8
8
|
import { existsSync, readdirSync, readFileSync } from "node:fs";
|
|
9
9
|
import { homedir } from "node:os";
|
|
10
10
|
import { join } from "node:path";
|
|
11
|
+
|
|
11
12
|
import {
|
|
12
13
|
EVOLUTION_AUDIT_LOG,
|
|
13
14
|
QUERY_LOG,
|
|
@@ -224,6 +225,7 @@ export function assembleBundle(options: {
|
|
|
224
225
|
let allEvolutionRecords: EvolutionAuditEntry[];
|
|
225
226
|
|
|
226
227
|
if (useJsonl) {
|
|
228
|
+
// JSONL fallback: only used when custom (non-default) log paths are provided (test isolation)
|
|
227
229
|
allSkillRecords = readJsonl<SkillUsageRecord>(skillLogPath);
|
|
228
230
|
allQueryRecords = readJsonl<QueryLogRecord>(queryLogPath);
|
|
229
231
|
allTelemetryRecords = readJsonl<SessionTelemetryRecord>(telemetryLogPath);
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
import { spawnSync } from "node:child_process";
|
|
11
11
|
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
12
12
|
import { parseArgs } from "node:util";
|
|
13
|
+
|
|
13
14
|
import { CONTRIBUTIONS_DIR } from "../constants.js";
|
|
14
15
|
import { assembleBundle } from "./bundle.js";
|
|
15
16
|
import { sanitizeBundle } from "./sanitize.js";
|
|
@@ -46,10 +46,10 @@ export const DEFAULT_CRON_JOBS: CronJobConfig[] = [
|
|
|
46
46
|
},
|
|
47
47
|
{
|
|
48
48
|
name: "selftune-orchestrate",
|
|
49
|
-
cron: "0 */
|
|
49
|
+
cron: "0 */2 * * *",
|
|
50
50
|
message:
|
|
51
51
|
"Run selftune orchestrate --max-skills 3. This performs source-truth sync, selects candidate skills, evolves validated low-risk descriptions autonomously, and watches recent deployments for regressions.",
|
|
52
|
-
description: "Autonomous improvement loop every
|
|
52
|
+
description: "Autonomous improvement loop every 2 hours",
|
|
53
53
|
},
|
|
54
54
|
];
|
|
55
55
|
|
|
@@ -199,7 +199,7 @@ export interface HealthResponse {
|
|
|
199
199
|
db_path: string;
|
|
200
200
|
log_dir: string;
|
|
201
201
|
config_dir: string;
|
|
202
|
-
watcher_mode: "jsonl" | "none";
|
|
202
|
+
watcher_mode: "wal" | "jsonl" | "none";
|
|
203
203
|
process_mode: "standalone" | "dev-server" | "test";
|
|
204
204
|
host: string;
|
|
205
205
|
port: number;
|
|
@@ -17,16 +17,11 @@
|
|
|
17
17
|
*/
|
|
18
18
|
|
|
19
19
|
import type { Database } from "bun:sqlite";
|
|
20
|
-
import { existsSync,
|
|
20
|
+
import { existsSync, readFileSync, unwatchFile, watchFile } from "node:fs";
|
|
21
21
|
import { dirname, extname, isAbsolute, join, relative, resolve } from "node:path";
|
|
22
|
+
|
|
22
23
|
import type { BadgeFormat } from "./badge/badge-svg.js";
|
|
23
|
-
import {
|
|
24
|
-
EVOLUTION_AUDIT_LOG,
|
|
25
|
-
LOG_DIR,
|
|
26
|
-
QUERY_LOG,
|
|
27
|
-
SELFTUNE_CONFIG_DIR,
|
|
28
|
-
TELEMETRY_LOG,
|
|
29
|
-
} from "./constants.js";
|
|
24
|
+
import { LOG_DIR, SELFTUNE_CONFIG_DIR } from "./constants.js";
|
|
30
25
|
import type {
|
|
31
26
|
HealthResponse,
|
|
32
27
|
OverviewResponse,
|
|
@@ -237,14 +232,14 @@ export async function startDashboardServer(
|
|
|
237
232
|
}
|
|
238
233
|
}, SSE_KEEPALIVE_MS);
|
|
239
234
|
|
|
240
|
-
// --
|
|
241
|
-
const
|
|
242
|
-
|
|
235
|
+
// -- SQLite WAL watcher for push-based updates ------------------------------
|
|
236
|
+
const walPath = `${DB_PATH}-wal`;
|
|
237
|
+
let walWatcherActive = false;
|
|
243
238
|
|
|
244
239
|
let fsDebounceTimer: ReturnType<typeof setTimeout> | null = null;
|
|
245
240
|
const FS_DEBOUNCE_MS = 500;
|
|
246
241
|
|
|
247
|
-
function
|
|
242
|
+
function onWALChange(): void {
|
|
248
243
|
if (fsDebounceTimer) return;
|
|
249
244
|
fsDebounceTimer = setTimeout(() => {
|
|
250
245
|
fsDebounceTimer = null;
|
|
@@ -253,47 +248,11 @@ export async function startDashboardServer(
|
|
|
253
248
|
}, FS_DEBOUNCE_MS);
|
|
254
249
|
}
|
|
255
250
|
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
let directoryWatcherActive = false;
|
|
259
|
-
|
|
260
|
-
function registerFileWatcher(logPath: string): void {
|
|
261
|
-
if (watchedFiles.has(logPath) || !existsSync(logPath)) return;
|
|
262
|
-
try {
|
|
263
|
-
fileWatchers.push(fsWatch(logPath, onLogFileChange));
|
|
264
|
-
watchedFiles.add(logPath);
|
|
265
|
-
} catch {
|
|
266
|
-
// Non-fatal: fall back to polling if watch fails
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
for (const logPath of WATCHED_LOGS) {
|
|
271
|
-
registerFileWatcher(logPath);
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
try {
|
|
275
|
-
fileWatchers.push(
|
|
276
|
-
fsWatch(LOG_DIR, (_eventType, filename) => {
|
|
277
|
-
if (typeof filename !== "string" || filename.length === 0) return;
|
|
278
|
-
const fullPath = join(LOG_DIR, filename);
|
|
279
|
-
if (!watchedLogPaths.has(fullPath)) return;
|
|
280
|
-
registerFileWatcher(fullPath);
|
|
281
|
-
onLogFileChange();
|
|
282
|
-
}),
|
|
283
|
-
);
|
|
284
|
-
directoryWatcherActive = true;
|
|
285
|
-
} catch {
|
|
286
|
-
directoryWatcherActive = false;
|
|
287
|
-
}
|
|
251
|
+
watchFile(walPath, { interval: 500 }, onWALChange);
|
|
252
|
+
walWatcherActive = true;
|
|
288
253
|
|
|
289
254
|
function getWatcherMode(): HealthResponse["watcher_mode"] {
|
|
290
|
-
return
|
|
291
|
-
}
|
|
292
|
-
|
|
293
|
-
if (runtimeMode !== "test" && getWatcherMode() === "jsonl") {
|
|
294
|
-
console.warn(
|
|
295
|
-
"Dashboard freshness mode: JSONL watcher invalidation (legacy). Live updates can miss SQLite-only writes until WAL cutover lands.",
|
|
296
|
-
);
|
|
255
|
+
return walWatcherActive ? "wal" : "none";
|
|
297
256
|
}
|
|
298
257
|
|
|
299
258
|
let cachedStatusResult: StatusResult | null = null;
|
|
@@ -572,7 +531,7 @@ export async function startDashboardServer(
|
|
|
572
531
|
|
|
573
532
|
// Graceful shutdown
|
|
574
533
|
const shutdownHandler = () => {
|
|
575
|
-
|
|
534
|
+
unwatchFile(walPath, onWALChange);
|
|
576
535
|
clearInterval(sseKeepaliveTimer);
|
|
577
536
|
for (const c of sseClients) {
|
|
578
537
|
try {
|
|
@@ -4,19 +4,24 @@
|
|
|
4
4
|
*
|
|
5
5
|
* Converts hook logs into trigger eval sets compatible with run_eval / run_loop.
|
|
6
6
|
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
7
|
+
* Default read path is SQLite (via localdb/queries). JSONL fallback is used only
|
|
8
|
+
* when custom --skill-log / --query-log / --telemetry-log paths are supplied
|
|
9
|
+
* (test/custom-path override).
|
|
10
|
+
*
|
|
11
|
+
* Three underlying log sources (all written automatically by hooks):
|
|
12
|
+
* skill_usage - queries that DID trigger a skill
|
|
13
|
+
* query_log - ALL queries, triggered or not
|
|
14
|
+
* session_telemetry - per-session process metrics (Stop hook)
|
|
11
15
|
*
|
|
12
16
|
* For a given skill:
|
|
13
|
-
* Positives (should_trigger=true) -> queries in
|
|
14
|
-
* Negatives (should_trigger=false) -> queries in
|
|
17
|
+
* Positives (should_trigger=true) -> queries in skill_usage for that skill
|
|
18
|
+
* Negatives (should_trigger=false) -> queries in query_log that never triggered
|
|
15
19
|
* that skill (cross-skill AND untriggered queries)
|
|
16
20
|
*/
|
|
17
21
|
|
|
18
22
|
import { writeFileSync } from "node:fs";
|
|
19
23
|
import { parseArgs } from "node:util";
|
|
24
|
+
|
|
20
25
|
import { GENERIC_NEGATIVES, QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "../constants.js";
|
|
21
26
|
import { getDb } from "../localdb/db.js";
|
|
22
27
|
import {
|
|
@@ -468,6 +473,7 @@ export async function cliMain(): Promise<void> {
|
|
|
468
473
|
let queryRecords: QueryLogRecord[];
|
|
469
474
|
let telemetryRecords: SessionTelemetryRecord[];
|
|
470
475
|
|
|
476
|
+
// SQLite is the default path; JSONL fallback only for custom --*-log overrides
|
|
471
477
|
if (
|
|
472
478
|
skillLogPath === SKILL_LOG &&
|
|
473
479
|
queryLogPath === QUERY_LOG &&
|
|
@@ -478,6 +484,7 @@ export async function cliMain(): Promise<void> {
|
|
|
478
484
|
queryRecords = queryQueryLog(db) as QueryLogRecord[];
|
|
479
485
|
telemetryRecords = querySessionTelemetry(db) as SessionTelemetryRecord[];
|
|
480
486
|
} else {
|
|
487
|
+
// test/custom-path fallback
|
|
481
488
|
skillRecords = readJsonl<SkillUsageRecord>(skillLogPath);
|
|
482
489
|
queryRecords = readJsonl<QueryLogRecord>(queryLogPath);
|
|
483
490
|
telemetryRecords = readJsonl<SessionTelemetryRecord>(telemetryLogPath);
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
import { existsSync, readdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
14
14
|
import { join } from "node:path";
|
|
15
15
|
import { parseArgs } from "node:util";
|
|
16
|
+
|
|
16
17
|
import type { EvalEntry, SkillsBenchTask } from "../types.js";
|
|
17
18
|
|
|
18
19
|
// ---------------------------------------------------------------------------
|
|
@@ -181,9 +181,8 @@ export async function generateSyntheticEvals(
|
|
|
181
181
|
try {
|
|
182
182
|
const { getDb } = await import("../localdb/db.js");
|
|
183
183
|
const { querySkillUsageRecords, queryQueryLog } = await import("../localdb/queries.js");
|
|
184
|
-
const { isHighConfidencePositiveSkillRecord } =
|
|
185
|
-
"../utils/skill-usage-confidence.js"
|
|
186
|
-
);
|
|
184
|
+
const { isHighConfidencePositiveSkillRecord } =
|
|
185
|
+
await import("../utils/skill-usage-confidence.js");
|
|
187
186
|
|
|
188
187
|
const db = getDb();
|
|
189
188
|
|
|
@@ -25,7 +25,8 @@ import type {
|
|
|
25
25
|
QueryLogRecord,
|
|
26
26
|
SkillUsageRecord,
|
|
27
27
|
} from "../types.js";
|
|
28
|
-
|
|
28
|
+
import type { EffortLevel, SubagentCallOptions } from "../utils/llm-call.js";
|
|
29
|
+
import { callViaSubagent } from "../utils/llm-call.js";
|
|
29
30
|
import { appendAuditEntry } from "./audit.js";
|
|
30
31
|
import { checkConstitutionSizeOnly } from "./constitutional.js";
|
|
31
32
|
import { parseSkillSections, replaceBody, replaceSection } from "./deploy-proposal.js";
|
|
@@ -57,6 +58,9 @@ export interface EvolveBodyOptions {
|
|
|
57
58
|
fewShotExamples?: string[];
|
|
58
59
|
gradingResults?: GradingResult[];
|
|
59
60
|
validationModel?: string;
|
|
61
|
+
teacherEffort?: EffortLevel;
|
|
62
|
+
/** Run evolution-reviewer subagent as Gate 4 before deployment. */
|
|
63
|
+
useReviewer?: boolean;
|
|
60
64
|
}
|
|
61
65
|
|
|
62
66
|
export interface EvolveBodyResult {
|
|
@@ -89,6 +93,7 @@ export interface EvolveBodyDeps {
|
|
|
89
93
|
readEffectiveSkillUsageRecords?: () => SkillUsageRecord[];
|
|
90
94
|
readFileSync?: typeof readFileSync;
|
|
91
95
|
writeFileSync?: (path: string, data: string, encoding: string) => void;
|
|
96
|
+
callViaSubagent?: (options: SubagentCallOptions) => Promise<string>;
|
|
92
97
|
}
|
|
93
98
|
|
|
94
99
|
// ---------------------------------------------------------------------------
|
|
@@ -110,6 +115,19 @@ function createAuditEntry(
|
|
|
110
115
|
};
|
|
111
116
|
}
|
|
112
117
|
|
|
118
|
+
// ---------------------------------------------------------------------------
|
|
119
|
+
// Pipeline defaults — enforced even when the calling agent omits flags
|
|
120
|
+
// ---------------------------------------------------------------------------
|
|
121
|
+
|
|
122
|
+
/** Default teacher model: Opus 4.6 for highest-quality proposals. */
|
|
123
|
+
const DEFAULT_TEACHER_MODEL = "opus";
|
|
124
|
+
|
|
125
|
+
/** Default student model: Haiku for cheap, fast validation gates. */
|
|
126
|
+
const DEFAULT_STUDENT_MODEL = "haiku";
|
|
127
|
+
|
|
128
|
+
/** Default teacher effort: extended thinking for multi-constraint reasoning. */
|
|
129
|
+
const DEFAULT_TEACHER_EFFORT: EffortLevel = "high";
|
|
130
|
+
|
|
113
131
|
// ---------------------------------------------------------------------------
|
|
114
132
|
// Main orchestrator
|
|
115
133
|
// ---------------------------------------------------------------------------
|
|
@@ -124,8 +142,6 @@ export async function evolveBody(
|
|
|
124
142
|
target,
|
|
125
143
|
teacherAgent,
|
|
126
144
|
studentAgent,
|
|
127
|
-
teacherModel,
|
|
128
|
-
studentModel,
|
|
129
145
|
evalSetPath,
|
|
130
146
|
dryRun,
|
|
131
147
|
maxIterations,
|
|
@@ -133,6 +149,11 @@ export async function evolveBody(
|
|
|
133
149
|
fewShotExamples,
|
|
134
150
|
} = options;
|
|
135
151
|
|
|
152
|
+
// Apply pipeline defaults for models/effort when not explicitly provided
|
|
153
|
+
const teacherModel = options.teacherModel ?? DEFAULT_TEACHER_MODEL;
|
|
154
|
+
const studentModel = options.studentModel ?? DEFAULT_STUDENT_MODEL;
|
|
155
|
+
const teacherEffort = options.teacherEffort ?? DEFAULT_TEACHER_EFFORT;
|
|
156
|
+
|
|
136
157
|
// Resolve injectable dependencies
|
|
137
158
|
const _extractFailurePatterns = _deps.extractFailurePatterns ?? extractFailurePatterns;
|
|
138
159
|
const _generateBodyProposal = _deps.generateBodyProposal ?? generateBodyProposal;
|
|
@@ -151,6 +172,7 @@ export async function evolveBody(
|
|
|
151
172
|
});
|
|
152
173
|
const _readFileSync = _deps.readFileSync ?? readFileSync;
|
|
153
174
|
const _writeFileSync = _deps.writeFileSync ?? (await import("node:fs")).writeFileSync;
|
|
175
|
+
const _callViaSubagent = _deps.callViaSubagent ?? callViaSubagent;
|
|
154
176
|
|
|
155
177
|
const auditEntries: EvolutionAuditEntry[] = [];
|
|
156
178
|
|
|
@@ -306,6 +328,7 @@ export async function evolveBody(
|
|
|
306
328
|
skillPath,
|
|
307
329
|
teacherAgent,
|
|
308
330
|
teacherModel,
|
|
331
|
+
teacherEffort,
|
|
309
332
|
);
|
|
310
333
|
} else {
|
|
311
334
|
proposal = await _generateBodyProposal(
|
|
@@ -318,6 +341,7 @@ export async function evolveBody(
|
|
|
318
341
|
teacherModel,
|
|
319
342
|
fewShotExamples,
|
|
320
343
|
executionContext,
|
|
344
|
+
teacherEffort,
|
|
321
345
|
);
|
|
322
346
|
}
|
|
323
347
|
} else if (lastProposal && lastValidation) {
|
|
@@ -327,6 +351,7 @@ export async function evolveBody(
|
|
|
327
351
|
lastValidation,
|
|
328
352
|
teacherAgent,
|
|
329
353
|
teacherModel,
|
|
354
|
+
options.teacherEffort,
|
|
330
355
|
);
|
|
331
356
|
} else {
|
|
332
357
|
break;
|
|
@@ -496,7 +521,63 @@ export async function evolveBody(
|
|
|
496
521
|
}
|
|
497
522
|
}
|
|
498
523
|
|
|
499
|
-
// Step 5:
|
|
524
|
+
// Step 5: Optional evolution-reviewer gate (Gate 4)
|
|
525
|
+
if (options.useReviewer && lastProposal && lastValidation?.improved) {
|
|
526
|
+
try {
|
|
527
|
+
const reviewPrompt = [
|
|
528
|
+
`Review this ${target} evolution proposal for the "${skillName}" skill.`,
|
|
529
|
+
``,
|
|
530
|
+
`Proposal ID: ${lastProposal.proposal_id}`,
|
|
531
|
+
`Skill path: ${skillPath}`,
|
|
532
|
+
`Target: ${target}`,
|
|
533
|
+
`Confidence: ${lastProposal.confidence}`,
|
|
534
|
+
`Validation: ${lastValidation.gates_passed}/${lastValidation.gates_total} gates passed`,
|
|
535
|
+
`Regressions: ${lastValidation.regressions.length > 0 ? lastValidation.regressions.join(", ") : "none"}`,
|
|
536
|
+
``,
|
|
537
|
+
`Original content:`,
|
|
538
|
+
lastProposal.original_body,
|
|
539
|
+
``,
|
|
540
|
+
`Proposed content:`,
|
|
541
|
+
lastProposal.proposed_body,
|
|
542
|
+
``,
|
|
543
|
+
`Rationale: ${lastProposal.rationale}`,
|
|
544
|
+
].join("\n");
|
|
545
|
+
|
|
546
|
+
const reviewOutput = await _callViaSubagent({
|
|
547
|
+
agentName: "evolution-reviewer",
|
|
548
|
+
prompt: reviewPrompt,
|
|
549
|
+
maxTurns: 8,
|
|
550
|
+
allowedTools: ["Read", "Grep", "Glob", "Bash"],
|
|
551
|
+
});
|
|
552
|
+
|
|
553
|
+
const isRejected = /\bREJECT\b/.test(reviewOutput) && !/\bAPPROVE\b/.test(reviewOutput);
|
|
554
|
+
recordAudit(
|
|
555
|
+
lastProposal.proposal_id,
|
|
556
|
+
isRejected ? "rejected" : "validated",
|
|
557
|
+
`Evolution reviewer: ${isRejected ? "REJECTED" : "APPROVED"}`,
|
|
558
|
+
);
|
|
559
|
+
|
|
560
|
+
if (isRejected) {
|
|
561
|
+
return {
|
|
562
|
+
proposal: lastProposal,
|
|
563
|
+
validation: lastValidation,
|
|
564
|
+
deployed: false,
|
|
565
|
+
auditEntries,
|
|
566
|
+
reason: `Evolution reviewer rejected proposal: ${reviewOutput.slice(0, 500)}`,
|
|
567
|
+
};
|
|
568
|
+
}
|
|
569
|
+
} catch (reviewError) {
|
|
570
|
+
// Fail-open: if reviewer crashes, log it and continue to deploy
|
|
571
|
+
const msg = reviewError instanceof Error ? reviewError.message : String(reviewError);
|
|
572
|
+
recordAudit(
|
|
573
|
+
lastProposal.proposal_id,
|
|
574
|
+
"validated",
|
|
575
|
+
`Evolution reviewer failed (fail-open): ${msg}`,
|
|
576
|
+
);
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
// Step 6: Deploy or dry-run
|
|
500
581
|
if (dryRun) {
|
|
501
582
|
return {
|
|
502
583
|
proposal: lastProposal,
|
|
@@ -594,6 +675,8 @@ export async function cliMain(): Promise<void> {
|
|
|
594
675
|
"task-description": { type: "string" },
|
|
595
676
|
"few-shot": { type: "string" },
|
|
596
677
|
"validation-model": { type: "string" },
|
|
678
|
+
"teacher-effort": { type: "string", default: "high" },
|
|
679
|
+
review: { type: "boolean", default: false },
|
|
597
680
|
help: { type: "boolean", default: false },
|
|
598
681
|
},
|
|
599
682
|
strict: true,
|
|
@@ -611,8 +694,8 @@ Options:
|
|
|
611
694
|
--target Evolution target: body, routing (default: body)
|
|
612
695
|
--teacher-agent Teacher agent CLI (claude, codex, etc.)
|
|
613
696
|
--student-agent Student agent CLI for validation
|
|
614
|
-
--teacher-model Model flag for teacher agent
|
|
615
|
-
--student-model Model flag for student agent
|
|
697
|
+
--teacher-model Model flag for teacher agent (default: opus)
|
|
698
|
+
--student-model Model flag for student agent (default: haiku)
|
|
616
699
|
--eval-set Path to eval set JSON
|
|
617
700
|
--dry-run Validate without deploying
|
|
618
701
|
--max-iterations Max refinement iterations (default: 3)
|
|
@@ -620,6 +703,8 @@ Options:
|
|
|
620
703
|
--task-description Optional task description context
|
|
621
704
|
--few-shot Comma-separated paths to example skill files
|
|
622
705
|
--validation-model Model for trigger-check validation calls (overrides --student-model for validation)
|
|
706
|
+
--teacher-effort Effort level for teacher LLM: low, medium, high, max (default: high)
|
|
707
|
+
--review Run evolution-reviewer subagent before deployment (Gate 4)
|
|
623
708
|
--help Show this help message`);
|
|
624
709
|
process.exit(0);
|
|
625
710
|
}
|
|
@@ -669,6 +754,8 @@ Options:
|
|
|
669
754
|
fewShotExamples,
|
|
670
755
|
gradingResults,
|
|
671
756
|
validationModel: values["validation-model"],
|
|
757
|
+
teacherEffort: (values["teacher-effort"] as EffortLevel) ?? "high",
|
|
758
|
+
useReviewer: values.review ?? false,
|
|
672
759
|
});
|
|
673
760
|
|
|
674
761
|
console.log(JSON.stringify(result, null, 2));
|
|
@@ -37,7 +37,6 @@ import type {
|
|
|
37
37
|
SkillUsageRecord,
|
|
38
38
|
} from "../types.js";
|
|
39
39
|
import { parseFrontmatter, replaceFrontmatterDescription } from "../utils/frontmatter.js";
|
|
40
|
-
|
|
41
40
|
import { createEvolveTUI } from "../utils/tui.js";
|
|
42
41
|
import { appendAuditEntry } from "./audit.js";
|
|
43
42
|
import { checkConstitution } from "./constitutional.js";
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
9
|
import type { BodyEvolutionProposal, EvolutionTarget, FailurePattern } from "../types.js";
|
|
10
|
-
import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
|
|
10
|
+
import { type EffortLevel, callLlm, stripMarkdownFences } from "../utils/llm-call.js";
|
|
11
11
|
|
|
12
12
|
// ---------------------------------------------------------------------------
|
|
13
13
|
// System prompt
|
|
@@ -160,6 +160,7 @@ export async function generateBodyProposal(
|
|
|
160
160
|
modelFlag?: string,
|
|
161
161
|
fewShotExamples?: string[],
|
|
162
162
|
executionContext?: ExecutionContext,
|
|
163
|
+
effort?: EffortLevel,
|
|
163
164
|
): Promise<BodyEvolutionProposal> {
|
|
164
165
|
const prompt = buildBodyGenerationPrompt(
|
|
165
166
|
currentContent,
|
|
@@ -169,7 +170,7 @@ export async function generateBodyProposal(
|
|
|
169
170
|
fewShotExamples,
|
|
170
171
|
executionContext,
|
|
171
172
|
);
|
|
172
|
-
const rawResponse = await callLlm(BODY_GENERATOR_SYSTEM, prompt, agent, modelFlag);
|
|
173
|
+
const rawResponse = await callLlm(BODY_GENERATOR_SYSTEM, prompt, agent, modelFlag, effort);
|
|
173
174
|
const { proposed_body, rationale, confidence } = parseBodyProposalResponse(rawResponse);
|
|
174
175
|
|
|
175
176
|
return {
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
import type { BodyEvolutionProposal, EvolutionTarget, FailurePattern } from "../types.js";
|
|
9
|
-
import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
|
|
9
|
+
import { type EffortLevel, callLlm, stripMarkdownFences } from "../utils/llm-call.js";
|
|
10
10
|
|
|
11
11
|
// ---------------------------------------------------------------------------
|
|
12
12
|
// System prompt
|
|
@@ -139,6 +139,7 @@ export async function generateRoutingProposal(
|
|
|
139
139
|
skillPath: string,
|
|
140
140
|
agent: string,
|
|
141
141
|
modelFlag?: string,
|
|
142
|
+
effort?: EffortLevel,
|
|
142
143
|
): Promise<BodyEvolutionProposal> {
|
|
143
144
|
const prompt = buildRoutingProposalPrompt(
|
|
144
145
|
currentRouting,
|
|
@@ -147,7 +148,7 @@ export async function generateRoutingProposal(
|
|
|
147
148
|
missedQueries,
|
|
148
149
|
skillName,
|
|
149
150
|
);
|
|
150
|
-
const rawResponse = await callLlm(ROUTING_PROPOSER_SYSTEM, prompt, agent, modelFlag);
|
|
151
|
+
const rawResponse = await callLlm(ROUTING_PROPOSER_SYSTEM, prompt, agent, modelFlag, effort);
|
|
151
152
|
const { proposed_routing, rationale, confidence } = parseRoutingProposalResponse(rawResponse);
|
|
152
153
|
|
|
153
154
|
return {
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
import type { BodyEvolutionProposal, BodyValidationResult } from "../types.js";
|
|
9
|
-
import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
|
|
9
|
+
import { type EffortLevel, callLlm, stripMarkdownFences } from "../utils/llm-call.js";
|
|
10
10
|
|
|
11
11
|
// ---------------------------------------------------------------------------
|
|
12
12
|
// System prompt
|
|
@@ -118,6 +118,7 @@ export async function refineBodyProposal(
|
|
|
118
118
|
validationResult: BodyValidationResult,
|
|
119
119
|
agent: string,
|
|
120
120
|
modelFlag?: string,
|
|
121
|
+
effort?: EffortLevel,
|
|
121
122
|
): Promise<BodyEvolutionProposal> {
|
|
122
123
|
const prompt = buildRefinementPrompt(
|
|
123
124
|
proposal.proposed_body,
|
|
@@ -126,7 +127,7 @@ export async function refineBodyProposal(
|
|
|
126
127
|
validationResult.regressions,
|
|
127
128
|
);
|
|
128
129
|
|
|
129
|
-
const rawResponse = await callLlm(BODY_REFINER_SYSTEM, prompt, agent, modelFlag);
|
|
130
|
+
const rawResponse = await callLlm(BODY_REFINER_SYSTEM, prompt, agent, modelFlag, effort);
|
|
130
131
|
const { refined_body, changes_made, confidence } = parseRefinementResponse(rawResponse);
|
|
131
132
|
|
|
132
133
|
return {
|