selftune 0.1.4 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/diagnosis-analyst.md +156 -0
- package/.claude/agents/evolution-reviewer.md +180 -0
- package/.claude/agents/integration-guide.md +212 -0
- package/.claude/agents/pattern-analyst.md +160 -0
- package/CHANGELOG.md +46 -1
- package/README.md +105 -257
- package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
- package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
- package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
- package/apps/local-dashboard/dist/favicon.png +0 -0
- package/apps/local-dashboard/dist/index.html +17 -0
- package/apps/local-dashboard/dist/logo.png +0 -0
- package/apps/local-dashboard/dist/logo.svg +9 -0
- package/assets/BeforeAfter.gif +0 -0
- package/assets/FeedbackLoop.gif +0 -0
- package/assets/logo.svg +9 -0
- package/assets/skill-health-badge.svg +20 -0
- package/cli/selftune/activation-rules.ts +171 -0
- package/cli/selftune/badge/badge-data.ts +108 -0
- package/cli/selftune/badge/badge-svg.ts +212 -0
- package/cli/selftune/badge/badge.ts +99 -0
- package/cli/selftune/canonical-export.ts +183 -0
- package/cli/selftune/constants.ts +103 -1
- package/cli/selftune/contribute/bundle.ts +314 -0
- package/cli/selftune/contribute/contribute.ts +214 -0
- package/cli/selftune/contribute/sanitize.ts +162 -0
- package/cli/selftune/cron/setup.ts +266 -0
- package/cli/selftune/dashboard-contract.ts +202 -0
- package/cli/selftune/dashboard-server.ts +1049 -0
- package/cli/selftune/dashboard.ts +43 -156
- package/cli/selftune/eval/baseline.ts +248 -0
- package/cli/selftune/eval/composability-v2.ts +273 -0
- package/cli/selftune/eval/composability.ts +117 -0
- package/cli/selftune/eval/generate-unit-tests.ts +143 -0
- package/cli/selftune/eval/hooks-to-evals.ts +101 -16
- package/cli/selftune/eval/import-skillsbench.ts +221 -0
- package/cli/selftune/eval/synthetic-evals.ts +172 -0
- package/cli/selftune/eval/unit-test-cli.ts +152 -0
- package/cli/selftune/eval/unit-test.ts +196 -0
- package/cli/selftune/evolution/deploy-proposal.ts +142 -1
- package/cli/selftune/evolution/evidence.ts +26 -0
- package/cli/selftune/evolution/evolve-body.ts +586 -0
- package/cli/selftune/evolution/evolve.ts +825 -116
- package/cli/selftune/evolution/extract-patterns.ts +105 -16
- package/cli/selftune/evolution/pareto.ts +314 -0
- package/cli/selftune/evolution/propose-body.ts +171 -0
- package/cli/selftune/evolution/propose-description.ts +100 -2
- package/cli/selftune/evolution/propose-routing.ts +166 -0
- package/cli/selftune/evolution/refine-body.ts +141 -0
- package/cli/selftune/evolution/rollback.ts +21 -4
- package/cli/selftune/evolution/validate-body.ts +254 -0
- package/cli/selftune/evolution/validate-proposal.ts +257 -35
- package/cli/selftune/evolution/validate-routing.ts +177 -0
- package/cli/selftune/grading/auto-grade.ts +200 -0
- package/cli/selftune/grading/grade-session.ts +513 -42
- package/cli/selftune/grading/pre-gates.ts +104 -0
- package/cli/selftune/grading/results.ts +42 -0
- package/cli/selftune/hooks/auto-activate.ts +185 -0
- package/cli/selftune/hooks/evolution-guard.ts +165 -0
- package/cli/selftune/hooks/prompt-log.ts +172 -2
- package/cli/selftune/hooks/session-stop.ts +123 -3
- package/cli/selftune/hooks/skill-change-guard.ts +112 -0
- package/cli/selftune/hooks/skill-eval.ts +119 -3
- package/cli/selftune/index.ts +415 -48
- package/cli/selftune/ingestors/claude-replay.ts +377 -0
- package/cli/selftune/ingestors/codex-rollout.ts +345 -46
- package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
- package/cli/selftune/ingestors/openclaw-ingest.ts +573 -0
- package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
- package/cli/selftune/init.ts +376 -16
- package/cli/selftune/last.ts +14 -5
- package/cli/selftune/localdb/db.ts +63 -0
- package/cli/selftune/localdb/materialize.ts +428 -0
- package/cli/selftune/localdb/queries.ts +376 -0
- package/cli/selftune/localdb/schema.ts +204 -0
- package/cli/selftune/memory/writer.ts +447 -0
- package/cli/selftune/monitoring/watch.ts +90 -16
- package/cli/selftune/normalization.ts +682 -0
- package/cli/selftune/observability.ts +19 -44
- package/cli/selftune/orchestrate.ts +1073 -0
- package/cli/selftune/quickstart.ts +203 -0
- package/cli/selftune/repair/skill-usage.ts +576 -0
- package/cli/selftune/schedule.ts +561 -0
- package/cli/selftune/status.ts +59 -33
- package/cli/selftune/sync.ts +627 -0
- package/cli/selftune/types.ts +525 -5
- package/cli/selftune/utils/canonical-log.ts +45 -0
- package/cli/selftune/utils/frontmatter.ts +217 -0
- package/cli/selftune/utils/hooks.ts +41 -0
- package/cli/selftune/utils/html.ts +27 -0
- package/cli/selftune/utils/llm-call.ts +103 -19
- package/cli/selftune/utils/math.ts +10 -0
- package/cli/selftune/utils/query-filter.ts +139 -0
- package/cli/selftune/utils/skill-discovery.ts +340 -0
- package/cli/selftune/utils/skill-log.ts +68 -0
- package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
- package/cli/selftune/utils/transcript.ts +307 -26
- package/cli/selftune/utils/trigger-check.ts +89 -0
- package/cli/selftune/utils/tui.ts +156 -0
- package/cli/selftune/workflows/discover.ts +254 -0
- package/cli/selftune/workflows/skill-md-writer.ts +288 -0
- package/cli/selftune/workflows/workflows.ts +188 -0
- package/package.json +28 -11
- package/packages/telemetry-contract/README.md +11 -0
- package/packages/telemetry-contract/fixtures/golden.json +87 -0
- package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
- package/packages/telemetry-contract/index.ts +1 -0
- package/packages/telemetry-contract/package.json +19 -0
- package/packages/telemetry-contract/src/index.ts +2 -0
- package/packages/telemetry-contract/src/types.ts +163 -0
- package/packages/telemetry-contract/src/validators.ts +109 -0
- package/skill/SKILL.md +180 -33
- package/skill/Workflows/AutoActivation.md +145 -0
- package/skill/Workflows/Badge.md +124 -0
- package/skill/Workflows/Baseline.md +144 -0
- package/skill/Workflows/Composability.md +107 -0
- package/skill/Workflows/Contribute.md +94 -0
- package/skill/Workflows/Cron.md +132 -0
- package/skill/Workflows/Dashboard.md +214 -0
- package/skill/Workflows/Doctor.md +63 -14
- package/skill/Workflows/Evals.md +110 -18
- package/skill/Workflows/EvolutionMemory.md +154 -0
- package/skill/Workflows/Evolve.md +181 -21
- package/skill/Workflows/EvolveBody.md +159 -0
- package/skill/Workflows/Grade.md +36 -31
- package/skill/Workflows/ImportSkillsBench.md +117 -0
- package/skill/Workflows/Ingest.md +142 -21
- package/skill/Workflows/Initialize.md +91 -23
- package/skill/Workflows/Orchestrate.md +139 -0
- package/skill/Workflows/Replay.md +91 -0
- package/skill/Workflows/Rollback.md +23 -4
- package/skill/Workflows/Schedule.md +61 -0
- package/skill/Workflows/Sync.md +88 -0
- package/skill/Workflows/UnitTest.md +150 -0
- package/skill/Workflows/Watch.md +33 -1
- package/skill/Workflows/Workflows.md +129 -0
- package/skill/assets/activation-rules-default.json +26 -0
- package/skill/assets/multi-skill-settings.json +63 -0
- package/skill/assets/single-skill-settings.json +57 -0
- package/skill/references/invocation-taxonomy.md +2 -2
- package/skill/references/logs.md +164 -2
- package/skill/references/setup-patterns.md +65 -0
- package/skill/references/version-history.md +40 -0
- package/skill/settings_snippet.json +23 -0
- package/templates/activation-rules-default.json +27 -0
- package/templates/multi-skill-settings.json +64 -0
- package/templates/single-skill-settings.json +58 -0
- package/dashboard/index.html +0 -1119
|
@@ -0,0 +1,1073 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* selftune orchestrate — Autonomous core loop: sync → status → evolve → watch.
|
|
3
|
+
*
|
|
4
|
+
* This is the single entry point for the closed-loop improvement cycle.
|
|
5
|
+
* It chains existing modules (sync, status, evolve, watch) into one
|
|
6
|
+
* coordinated run with explicit candidate selection and safety controls.
|
|
7
|
+
*
|
|
8
|
+
* Default behavior is autonomous for low-risk description evolution, with
|
|
9
|
+
* explicit dry-run and review-required modes for human-in-the-loop operation.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { existsSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
|
|
13
|
+
import { homedir } from "node:os";
|
|
14
|
+
import { join } from "node:path";
|
|
15
|
+
import { parseArgs } from "node:util";
|
|
16
|
+
|
|
17
|
+
import {
|
|
18
|
+
EVOLUTION_AUDIT_LOG,
|
|
19
|
+
ORCHESTRATE_LOCK,
|
|
20
|
+
ORCHESTRATE_RUN_LOG,
|
|
21
|
+
QUERY_LOG,
|
|
22
|
+
SIGNAL_LOG,
|
|
23
|
+
TELEMETRY_LOG,
|
|
24
|
+
} from "./constants.js";
|
|
25
|
+
import type { OrchestrateRunReport, OrchestrateRunSkillAction } from "./dashboard-contract.js";
|
|
26
|
+
import type { EvolveResult } from "./evolution/evolve.js";
|
|
27
|
+
import { readGradingResultsForSkill } from "./grading/results.js";
|
|
28
|
+
import type { WatchResult } from "./monitoring/watch.js";
|
|
29
|
+
import { doctor } from "./observability.js";
|
|
30
|
+
import type { SkillStatus, StatusResult } from "./status.js";
|
|
31
|
+
import { computeStatus } from "./status.js";
|
|
32
|
+
import type { SyncResult } from "./sync.js";
|
|
33
|
+
import { createDefaultSyncOptions, syncSources } from "./sync.js";
|
|
34
|
+
import type {
|
|
35
|
+
EvolutionAuditEntry,
|
|
36
|
+
ImprovementSignalRecord,
|
|
37
|
+
QueryLogRecord,
|
|
38
|
+
SessionTelemetryRecord,
|
|
39
|
+
} from "./types.js";
|
|
40
|
+
import { appendJsonl, readJsonl } from "./utils/jsonl.js";
|
|
41
|
+
import { detectAgent } from "./utils/llm-call.js";
|
|
42
|
+
import {
|
|
43
|
+
findInstalledSkillPath,
|
|
44
|
+
findRepositoryClaudeSkillDirs,
|
|
45
|
+
findRepositorySkillDirs,
|
|
46
|
+
} from "./utils/skill-discovery.js";
|
|
47
|
+
import { readEffectiveSkillUsageRecords } from "./utils/skill-log.js";
|
|
48
|
+
|
|
49
|
+
// ---------------------------------------------------------------------------
|
|
50
|
+
// Lockfile management
|
|
51
|
+
// ---------------------------------------------------------------------------
|
|
52
|
+
|
|
53
|
+
interface LockInfo {
|
|
54
|
+
pid: number;
|
|
55
|
+
timestamp: string;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const LOCK_STALE_MS = 30 * 60 * 1000; // 30 minutes
|
|
59
|
+
|
|
60
|
+
export function acquireLock(lockPath: string = ORCHESTRATE_LOCK): boolean {
|
|
61
|
+
try {
|
|
62
|
+
if (existsSync(lockPath)) {
|
|
63
|
+
try {
|
|
64
|
+
const raw = readFileSync(lockPath, "utf-8");
|
|
65
|
+
const info: LockInfo = JSON.parse(raw);
|
|
66
|
+
const lockAge = Date.now() - Date.parse(info.timestamp);
|
|
67
|
+
if (lockAge < LOCK_STALE_MS) {
|
|
68
|
+
return false; // lock is fresh, cannot acquire
|
|
69
|
+
}
|
|
70
|
+
// Lock is stale, fall through to overwrite
|
|
71
|
+
} catch {
|
|
72
|
+
// Corrupted lock file, treat as stale and overwrite
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
const lock: LockInfo = { pid: process.pid, timestamp: new Date().toISOString() };
|
|
76
|
+
writeFileSync(lockPath, JSON.stringify(lock));
|
|
77
|
+
return true;
|
|
78
|
+
} catch {
|
|
79
|
+
// Fail-open: if we can't check/write, allow the run
|
|
80
|
+
return true;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export function releaseLock(lockPath: string = ORCHESTRATE_LOCK): void {
|
|
85
|
+
try {
|
|
86
|
+
unlinkSync(lockPath);
|
|
87
|
+
} catch {
|
|
88
|
+
// Silent on errors (file may not exist)
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// ---------------------------------------------------------------------------
|
|
93
|
+
// Signal reading helpers
|
|
94
|
+
// ---------------------------------------------------------------------------
|
|
95
|
+
|
|
96
|
+
function readPendingSignals(reader?: () => ImprovementSignalRecord[]): ImprovementSignalRecord[] {
|
|
97
|
+
const _read = reader ?? (() => readJsonl<ImprovementSignalRecord>(SIGNAL_LOG));
|
|
98
|
+
try {
|
|
99
|
+
return _read().filter((s) => !s.consumed);
|
|
100
|
+
} catch {
|
|
101
|
+
return [];
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
export function groupSignalsBySkill(signals: ImprovementSignalRecord[]): Map<string, number> {
|
|
106
|
+
const map = new Map<string, number>();
|
|
107
|
+
for (const s of signals) {
|
|
108
|
+
if (s.mentioned_skill) {
|
|
109
|
+
const key = s.mentioned_skill.toLowerCase();
|
|
110
|
+
map.set(key, (map.get(key) ?? 0) + 1);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
return map;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export function markSignalsConsumed(
|
|
117
|
+
signals: ImprovementSignalRecord[],
|
|
118
|
+
runId: string,
|
|
119
|
+
signalLogPath: string = SIGNAL_LOG,
|
|
120
|
+
): void {
|
|
121
|
+
try {
|
|
122
|
+
if (signals.length === 0) return;
|
|
123
|
+
if (!existsSync(signalLogPath)) return;
|
|
124
|
+
|
|
125
|
+
// Build lookup set for matching pending signals
|
|
126
|
+
const pendingKeys = new Set(signals.map((s) => `${s.timestamp}|${s.session_id}`));
|
|
127
|
+
|
|
128
|
+
const allRecords = readJsonl<ImprovementSignalRecord>(signalLogPath);
|
|
129
|
+
const now = new Date().toISOString();
|
|
130
|
+
const updated = allRecords.map((record) => {
|
|
131
|
+
const key = `${record.timestamp}|${record.session_id}`;
|
|
132
|
+
if (pendingKeys.has(key) && !record.consumed) {
|
|
133
|
+
return {
|
|
134
|
+
...record,
|
|
135
|
+
consumed: true,
|
|
136
|
+
consumed_at: now,
|
|
137
|
+
consumed_by_run: runId,
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
return record;
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
// Re-read to capture any signals appended between our read and write
|
|
144
|
+
const freshRecords = readJsonl<ImprovementSignalRecord>(signalLogPath);
|
|
145
|
+
const existingKeys = new Set(updated.map((r) => `${r.timestamp}|${r.session_id}`));
|
|
146
|
+
const newlyAppended = freshRecords.filter(
|
|
147
|
+
(r) => !existingKeys.has(`${r.timestamp}|${r.session_id}`),
|
|
148
|
+
);
|
|
149
|
+
const merged = [...updated, ...newlyAppended];
|
|
150
|
+
|
|
151
|
+
writeFileSync(signalLogPath, `${merged.map((r) => JSON.stringify(r)).join("\n")}\n`);
|
|
152
|
+
} catch {
|
|
153
|
+
// Silent on errors
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// ---------------------------------------------------------------------------
|
|
158
|
+
// Types
|
|
159
|
+
// ---------------------------------------------------------------------------
|
|
160
|
+
|
|
161
|
+
export interface OrchestrateOptions {
|
|
162
|
+
/** Run sync → status → evolve → watch without writing changes. */
|
|
163
|
+
dryRun: boolean;
|
|
164
|
+
/** Approval policy for low-risk description evolution. */
|
|
165
|
+
approvalMode: "auto" | "review";
|
|
166
|
+
/** Scope to a single skill by name. */
|
|
167
|
+
skillFilter?: string;
|
|
168
|
+
/** Cap the number of skills processed per run. */
|
|
169
|
+
maxSkills: number;
|
|
170
|
+
/** Hours to look back for recently-evolved skills to watch. */
|
|
171
|
+
recentWindowHours: number;
|
|
172
|
+
/** Force sync to rescan all sources. */
|
|
173
|
+
syncForce: boolean;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
export interface SkillAction {
|
|
177
|
+
skill: string;
|
|
178
|
+
action: "evolve" | "watch" | "skip";
|
|
179
|
+
reason: string;
|
|
180
|
+
evolveResult?: EvolveResult;
|
|
181
|
+
watchResult?: WatchResult;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
export interface OrchestrateResult {
|
|
185
|
+
syncResult: SyncResult;
|
|
186
|
+
statusResult: StatusResult;
|
|
187
|
+
candidates: SkillAction[];
|
|
188
|
+
summary: {
|
|
189
|
+
totalSkills: number;
|
|
190
|
+
evaluated: number;
|
|
191
|
+
evolved: number;
|
|
192
|
+
deployed: number;
|
|
193
|
+
watched: number;
|
|
194
|
+
skipped: number;
|
|
195
|
+
dryRun: boolean;
|
|
196
|
+
approvalMode: "auto" | "review";
|
|
197
|
+
elapsedMs: number;
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// ---------------------------------------------------------------------------
|
|
202
|
+
// Human-readable decision report
|
|
203
|
+
// ---------------------------------------------------------------------------
|
|
204
|
+
|
|
205
|
+
function formatSyncPhase(syncResult: SyncResult): string[] {
|
|
206
|
+
const lines: string[] = ["Phase 1: Sync"];
|
|
207
|
+
const sources: [string, keyof SyncResult["sources"]][] = [
|
|
208
|
+
["Claude", "claude"],
|
|
209
|
+
["Codex", "codex"],
|
|
210
|
+
["OpenCode", "opencode"],
|
|
211
|
+
["OpenClaw", "openclaw"],
|
|
212
|
+
];
|
|
213
|
+
|
|
214
|
+
for (const [label, key] of sources) {
|
|
215
|
+
const s = syncResult.sources[key];
|
|
216
|
+
if (!s.available) {
|
|
217
|
+
lines.push(` ${label.padEnd(12)}not available`);
|
|
218
|
+
} else if (s.synced > 0) {
|
|
219
|
+
lines.push(` ${label.padEnd(12)}scanned ${s.scanned}, synced ${s.synced}`);
|
|
220
|
+
} else {
|
|
221
|
+
lines.push(` ${label.padEnd(12)}scanned ${s.scanned}, up to date`);
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
if (syncResult.repair.ran && syncResult.repair.repaired_records > 0) {
|
|
226
|
+
lines.push(
|
|
227
|
+
` Repair ${syncResult.repair.repaired_records} records across ${syncResult.repair.repaired_sessions} sessions`,
|
|
228
|
+
);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
return lines;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
function formatStatusPhase(statusResult: StatusResult): string[] {
|
|
235
|
+
const lines: string[] = ["Phase 2: Status"];
|
|
236
|
+
const byStatus: Record<string, number> = {};
|
|
237
|
+
for (const skill of statusResult.skills) {
|
|
238
|
+
byStatus[skill.status] = (byStatus[skill.status] ?? 0) + 1;
|
|
239
|
+
}
|
|
240
|
+
const healthLabel = statusResult.system.healthy ? "healthy" : "UNHEALTHY";
|
|
241
|
+
lines.push(` ${statusResult.skills.length} skills found, system ${healthLabel}`);
|
|
242
|
+
|
|
243
|
+
const parts: string[] = [];
|
|
244
|
+
for (const s of ["CRITICAL", "WARNING", "HEALTHY", "UNGRADED", "UNKNOWN"]) {
|
|
245
|
+
if (byStatus[s]) parts.push(`${byStatus[s]} ${s}`);
|
|
246
|
+
}
|
|
247
|
+
if (parts.length > 0) lines.push(` ${parts.join(", ")}`);
|
|
248
|
+
|
|
249
|
+
return lines;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
function formatDecisionPhase(candidates: SkillAction[]): string[] {
|
|
253
|
+
const lines: string[] = ["Phase 3: Skill Decisions"];
|
|
254
|
+
if (candidates.length === 0) {
|
|
255
|
+
lines.push(" (no skills to evaluate)");
|
|
256
|
+
return lines;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
for (const c of candidates) {
|
|
260
|
+
const icon = c.action === "skip" ? "⊘" : c.action === "watch" ? "○" : "→";
|
|
261
|
+
const actionLabel = c.action.toUpperCase().padEnd(7);
|
|
262
|
+
lines.push(` ${icon} ${c.skill.padEnd(20)} ${actionLabel} ${c.reason}`);
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
return lines;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
function formatEvolutionPhase(candidates: SkillAction[]): string[] {
|
|
269
|
+
const evolved = candidates.filter((c) => c.action === "evolve" && c.evolveResult !== undefined);
|
|
270
|
+
if (evolved.length === 0) return [];
|
|
271
|
+
|
|
272
|
+
const lines: string[] = ["Phase 4: Evolution Results"];
|
|
273
|
+
for (const c of evolved) {
|
|
274
|
+
const r = c.evolveResult as NonNullable<typeof c.evolveResult>;
|
|
275
|
+
const status = r.deployed ? "deployed" : "not deployed";
|
|
276
|
+
const detail = r.reason;
|
|
277
|
+
const validation = r.validation
|
|
278
|
+
? ` (${(r.validation.before_pass_rate * 100).toFixed(0)}% → ${(r.validation.after_pass_rate * 100).toFixed(0)}%)`
|
|
279
|
+
: "";
|
|
280
|
+
lines.push(` ${c.skill.padEnd(20)} ${status}${validation}`);
|
|
281
|
+
lines.push(` ${"".padEnd(20)} ${detail}`);
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
return lines;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
function formatWatchPhase(candidates: SkillAction[]): string[] {
|
|
288
|
+
const watched = candidates.filter((c) => c.action === "watch");
|
|
289
|
+
if (watched.length === 0) return [];
|
|
290
|
+
|
|
291
|
+
const lines: string[] = ["Phase 5: Watch"];
|
|
292
|
+
for (const c of watched) {
|
|
293
|
+
const snap = c.watchResult?.snapshot;
|
|
294
|
+
const metrics = snap
|
|
295
|
+
? ` (pass_rate=${snap.pass_rate.toFixed(2)}, baseline=${snap.baseline_pass_rate.toFixed(2)})`
|
|
296
|
+
: "";
|
|
297
|
+
const alertTag = c.watchResult?.alert ? " [ALERT]" : "";
|
|
298
|
+
const rollbackTag = c.watchResult?.rolledBack ? " [ROLLED BACK]" : "";
|
|
299
|
+
lines.push(` ${c.skill.padEnd(20)} ${c.reason}${alertTag}${rollbackTag}${metrics}`);
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
return lines;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
export function formatOrchestrateReport(result: OrchestrateResult): string {
|
|
306
|
+
const sep = "═".repeat(48);
|
|
307
|
+
const lines: string[] = [];
|
|
308
|
+
|
|
309
|
+
lines.push(sep);
|
|
310
|
+
lines.push("selftune orchestrate — decision report");
|
|
311
|
+
lines.push(sep);
|
|
312
|
+
lines.push("");
|
|
313
|
+
|
|
314
|
+
// Mode banner
|
|
315
|
+
if (result.summary.dryRun) {
|
|
316
|
+
lines.push("Mode: DRY RUN (no mutations applied)");
|
|
317
|
+
} else if (result.summary.approvalMode === "review") {
|
|
318
|
+
lines.push("Mode: REVIEW (proposals validated but not deployed)");
|
|
319
|
+
} else {
|
|
320
|
+
lines.push("Mode: AUTONOMOUS (validated changes deployed automatically)");
|
|
321
|
+
}
|
|
322
|
+
lines.push("");
|
|
323
|
+
|
|
324
|
+
// Phase 1: Sync
|
|
325
|
+
lines.push(...formatSyncPhase(result.syncResult));
|
|
326
|
+
lines.push("");
|
|
327
|
+
|
|
328
|
+
// Phase 2: Status
|
|
329
|
+
lines.push(...formatStatusPhase(result.statusResult));
|
|
330
|
+
lines.push("");
|
|
331
|
+
|
|
332
|
+
// Phase 3: Skill decisions
|
|
333
|
+
lines.push(...formatDecisionPhase(result.candidates));
|
|
334
|
+
lines.push("");
|
|
335
|
+
|
|
336
|
+
// Phase 4: Evolution results (only if any evolve ran)
|
|
337
|
+
const evoLines = formatEvolutionPhase(result.candidates);
|
|
338
|
+
if (evoLines.length > 0) {
|
|
339
|
+
lines.push(...evoLines);
|
|
340
|
+
lines.push("");
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// Phase 5: Watch (only if any watched)
|
|
344
|
+
const watchLines = formatWatchPhase(result.candidates);
|
|
345
|
+
if (watchLines.length > 0) {
|
|
346
|
+
lines.push(...watchLines);
|
|
347
|
+
lines.push("");
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
// Final summary
|
|
351
|
+
lines.push("Summary");
|
|
352
|
+
lines.push(` Evaluated: ${result.summary.evaluated} skills`);
|
|
353
|
+
lines.push(` Deployed: ${result.summary.deployed}`);
|
|
354
|
+
lines.push(` Watched: ${result.summary.watched}`);
|
|
355
|
+
lines.push(` Skipped: ${result.summary.skipped}`);
|
|
356
|
+
lines.push(` Elapsed: ${(result.summary.elapsedMs / 1000).toFixed(1)}s`);
|
|
357
|
+
|
|
358
|
+
if (result.summary.dryRun && result.summary.evaluated > 0) {
|
|
359
|
+
lines.push("");
|
|
360
|
+
lines.push(" Rerun without --dry-run to allow validated deployments.");
|
|
361
|
+
} else if (result.summary.approvalMode === "review" && result.summary.evaluated > 0) {
|
|
362
|
+
lines.push("");
|
|
363
|
+
lines.push(" Rerun without --review-required to allow validated deployments.");
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
return lines.join("\n");
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
/** Candidate selection criteria. */
|
|
370
|
+
const CANDIDATE_STATUSES = new Set(["CRITICAL", "WARNING", "UNGRADED"]);
|
|
371
|
+
|
|
372
|
+
/** Minimum skill_checks before autonomous evolution is allowed. */
|
|
373
|
+
export const MIN_CANDIDATE_EVIDENCE = 3;
|
|
374
|
+
|
|
375
|
+
/** Default cooldown hours after a deploy before re-evolving the same skill. */
|
|
376
|
+
export const DEFAULT_COOLDOWN_HOURS = 24;
|
|
377
|
+
|
|
378
|
+
function candidatePriority(skill: SkillStatus, signalCount = 0): number {
|
|
379
|
+
const statusWeight = skill.status === "CRITICAL" ? 300 : skill.status === "WARNING" ? 200 : 100;
|
|
380
|
+
const missedWeight = Math.min(skill.missedQueries, 50);
|
|
381
|
+
const passPenalty = skill.passRate === null ? 0 : Math.round((1 - skill.passRate) * 100);
|
|
382
|
+
const trendBoost = skill.trend === "down" ? 30 : 0;
|
|
383
|
+
const signalBoost = Math.min(signalCount * 150, 450);
|
|
384
|
+
return statusWeight + missedWeight + passPenalty + trendBoost + signalBoost;
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
/**
|
|
388
|
+
* Injectable dependencies for orchestrate(). Pass overrides in tests.
|
|
389
|
+
*/
|
|
390
|
+
export interface OrchestrateDeps {
|
|
391
|
+
syncSources?: typeof syncSources;
|
|
392
|
+
computeStatus?: typeof computeStatus;
|
|
393
|
+
evolve?: typeof import("./evolution/evolve.js").evolve;
|
|
394
|
+
watch?: typeof import("./monitoring/watch.js").watch;
|
|
395
|
+
detectAgent?: typeof detectAgent;
|
|
396
|
+
doctor?: typeof doctor;
|
|
397
|
+
readTelemetry?: () => SessionTelemetryRecord[];
|
|
398
|
+
readSkillRecords?: () => ReturnType<typeof readEffectiveSkillUsageRecords>;
|
|
399
|
+
readQueryRecords?: () => QueryLogRecord[];
|
|
400
|
+
readAuditEntries?: () => EvolutionAuditEntry[];
|
|
401
|
+
resolveSkillPath?: (skillName: string) => string | undefined;
|
|
402
|
+
readGradingResults?: (skillName: string) => ReturnType<typeof readGradingResultsForSkill>;
|
|
403
|
+
readSignals?: () => ImprovementSignalRecord[];
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// ---------------------------------------------------------------------------
|
|
407
|
+
// Skill path resolution
|
|
408
|
+
// ---------------------------------------------------------------------------
|
|
409
|
+
|
|
410
|
+
function getSkillSearchDirs(): string[] {
|
|
411
|
+
const home = homedir();
|
|
412
|
+
const cwd = process.cwd();
|
|
413
|
+
return [
|
|
414
|
+
join(home, ".claude", "skills"),
|
|
415
|
+
join(home, ".agents", "skills"),
|
|
416
|
+
join(home, ".codex", "skills"),
|
|
417
|
+
...findRepositorySkillDirs(cwd),
|
|
418
|
+
...findRepositoryClaudeSkillDirs(cwd),
|
|
419
|
+
];
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
function defaultResolveSkillPath(skillName: string): string | undefined {
|
|
423
|
+
return findInstalledSkillPath(skillName, getSkillSearchDirs());
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
// ---------------------------------------------------------------------------
|
|
427
|
+
// Candidate selection
|
|
428
|
+
// ---------------------------------------------------------------------------
|
|
429
|
+
|
|
430
|
+
/** Context for candidate selection beyond simple status checks. */
|
|
431
|
+
export interface CandidateContext {
|
|
432
|
+
skillFilter?: string;
|
|
433
|
+
maxSkills: number;
|
|
434
|
+
auditEntries?: EvolutionAuditEntry[];
|
|
435
|
+
/** Hours since last deploy before a skill can be re-evolved. */
|
|
436
|
+
cooldownHours?: number;
|
|
437
|
+
/** Skill name (lowercase) to improvement signal count. */
|
|
438
|
+
signaledSkills?: Map<string, number>;
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
export function selectCandidates(skills: SkillStatus[], options: CandidateContext): SkillAction[] {
|
|
442
|
+
const actions: SkillAction[] = [];
|
|
443
|
+
const orderedSkills = [...skills].sort((a, b) => {
|
|
444
|
+
const aSignals = options.signaledSkills?.get(a.name.toLowerCase()) ?? 0;
|
|
445
|
+
const bSignals = options.signaledSkills?.get(b.name.toLowerCase()) ?? 0;
|
|
446
|
+
return candidatePriority(b, bSignals) - candidatePriority(a, aSignals);
|
|
447
|
+
});
|
|
448
|
+
|
|
449
|
+
const cooldownHours = options.cooldownHours ?? DEFAULT_COOLDOWN_HOURS;
|
|
450
|
+
const recentlyDeployed = findRecentlyDeployedSkills(options.auditEntries ?? [], cooldownHours);
|
|
451
|
+
|
|
452
|
+
for (const skill of orderedSkills) {
|
|
453
|
+
const signalCount = options.signaledSkills?.get(skill.name.toLowerCase()) ?? 0;
|
|
454
|
+
|
|
455
|
+
// Apply skill filter
|
|
456
|
+
if (options.skillFilter && skill.name !== options.skillFilter) {
|
|
457
|
+
actions.push({
|
|
458
|
+
skill: skill.name,
|
|
459
|
+
action: "skip",
|
|
460
|
+
reason: `filtered out (--skill ${options.skillFilter})`,
|
|
461
|
+
});
|
|
462
|
+
continue;
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
// Check if skill is a candidate
|
|
466
|
+
if (!CANDIDATE_STATUSES.has(skill.status)) {
|
|
467
|
+
actions.push({
|
|
468
|
+
skill: skill.name,
|
|
469
|
+
action: "skip",
|
|
470
|
+
reason: `status=${skill.status} — no action needed`,
|
|
471
|
+
});
|
|
472
|
+
continue;
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
// Gate: cooldown — skip if this skill was deployed recently
|
|
476
|
+
if (recentlyDeployed.has(skill.name)) {
|
|
477
|
+
actions.push({
|
|
478
|
+
skill: skill.name,
|
|
479
|
+
action: "skip",
|
|
480
|
+
reason: `recently evolved (cooldown ${cooldownHours}h) — let it bake`,
|
|
481
|
+
});
|
|
482
|
+
continue;
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
// Gate: insufficient evidence — need enough data points for autonomous action
|
|
486
|
+
// Bypass if there are improvement signals for this skill
|
|
487
|
+
const skillChecks = skill.snapshot?.skill_checks ?? 0;
|
|
488
|
+
if (skillChecks < MIN_CANDIDATE_EVIDENCE && skill.status !== "UNGRADED" && signalCount === 0) {
|
|
489
|
+
actions.push({
|
|
490
|
+
skill: skill.name,
|
|
491
|
+
action: "skip",
|
|
492
|
+
reason: `insufficient evidence (${skillChecks}/${MIN_CANDIDATE_EVIDENCE} checks) — need more data`,
|
|
493
|
+
});
|
|
494
|
+
continue;
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
// UNGRADED: only evolve if there are missed queries (some signal)
|
|
498
|
+
// Bypass if there are improvement signals for this skill
|
|
499
|
+
if (skill.status === "UNGRADED" && skill.missedQueries === 0 && signalCount === 0) {
|
|
500
|
+
actions.push({
|
|
501
|
+
skill: skill.name,
|
|
502
|
+
action: "skip",
|
|
503
|
+
reason: "UNGRADED with 0 missed queries — insufficient signal",
|
|
504
|
+
});
|
|
505
|
+
continue;
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
// Gate: weak WARNING signal — skip if no missed queries and trend isn't declining
|
|
509
|
+
if (skill.status === "WARNING" && skill.missedQueries === 0 && skill.trend !== "down") {
|
|
510
|
+
actions.push({
|
|
511
|
+
skill: skill.name,
|
|
512
|
+
action: "skip",
|
|
513
|
+
reason: `WARNING but no missed queries and trend=${skill.trend} — weak signal`,
|
|
514
|
+
});
|
|
515
|
+
continue;
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
actions.push({
|
|
519
|
+
skill: skill.name,
|
|
520
|
+
action: "evolve",
|
|
521
|
+
reason: `status=${skill.status}, passRate=${skill.passRate !== null ? `${(skill.passRate * 100).toFixed(0)}%` : "—"}, missed=${skill.missedQueries}, trend=${skill.trend}`,
|
|
522
|
+
});
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
// Apply max-skills cap to evolve candidates only
|
|
526
|
+
let evolveCount = 0;
|
|
527
|
+
for (const action of actions) {
|
|
528
|
+
if (action.action === "evolve") {
|
|
529
|
+
evolveCount++;
|
|
530
|
+
if (evolveCount > options.maxSkills) {
|
|
531
|
+
action.action = "skip";
|
|
532
|
+
action.reason = `capped by --max-skills ${options.maxSkills}`;
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
return actions;
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
/**
|
|
541
|
+
* Find skills deployed within the given window.
|
|
542
|
+
* Used for both cooldown gating (don't re-evolve) and watch targeting
|
|
543
|
+
* (monitor recently deployed skills for regressions).
|
|
544
|
+
*/
|
|
545
|
+
function findRecentlyDeployedSkills(
|
|
546
|
+
auditEntries: EvolutionAuditEntry[],
|
|
547
|
+
windowHours: number,
|
|
548
|
+
): Set<string> {
|
|
549
|
+
const cutoffMs = Date.now() - windowHours * 60 * 60 * 1000;
|
|
550
|
+
const names = new Set<string>();
|
|
551
|
+
for (const entry of auditEntries) {
|
|
552
|
+
const deployedAtMs = Date.parse(entry.timestamp);
|
|
553
|
+
if (
|
|
554
|
+
entry.action === "deployed" &&
|
|
555
|
+
entry.skill_name &&
|
|
556
|
+
Number.isFinite(deployedAtMs) &&
|
|
557
|
+
deployedAtMs >= cutoffMs
|
|
558
|
+
) {
|
|
559
|
+
names.add(entry.skill_name);
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
return names;
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
// ---------------------------------------------------------------------------
|
|
566
|
+
// Main orchestrator
|
|
567
|
+
// ---------------------------------------------------------------------------
|
|
568
|
+
|
|
569
|
+
export async function orchestrate(
|
|
570
|
+
options: OrchestrateOptions,
|
|
571
|
+
deps: OrchestrateDeps = {},
|
|
572
|
+
): Promise<OrchestrateResult> {
|
|
573
|
+
if (!acquireLock()) {
|
|
574
|
+
// Another orchestrate run is in progress
|
|
575
|
+
console.error("[orchestrate] Another run is in progress (lock held). Exiting.");
|
|
576
|
+
return {
|
|
577
|
+
syncResult: {
|
|
578
|
+
since: null,
|
|
579
|
+
dry_run: options.dryRun,
|
|
580
|
+
sources: {
|
|
581
|
+
claude: { available: false, scanned: 0, synced: 0, skipped: 0 },
|
|
582
|
+
codex: { available: false, scanned: 0, synced: 0, skipped: 0 },
|
|
583
|
+
opencode: { available: false, scanned: 0, synced: 0, skipped: 0 },
|
|
584
|
+
openclaw: { available: false, scanned: 0, synced: 0, skipped: 0 },
|
|
585
|
+
},
|
|
586
|
+
repair: {
|
|
587
|
+
ran: false,
|
|
588
|
+
repaired_sessions: 0,
|
|
589
|
+
repaired_records: 0,
|
|
590
|
+
codex_repaired_records: 0,
|
|
591
|
+
},
|
|
592
|
+
timings: [],
|
|
593
|
+
total_elapsed_ms: 0,
|
|
594
|
+
},
|
|
595
|
+
statusResult: {
|
|
596
|
+
skills: [],
|
|
597
|
+
unmatchedQueries: 0,
|
|
598
|
+
pendingProposals: 0,
|
|
599
|
+
lastSession: null,
|
|
600
|
+
system: { healthy: true, pass: 0, fail: 0, warn: 0 },
|
|
601
|
+
},
|
|
602
|
+
candidates: [],
|
|
603
|
+
summary: {
|
|
604
|
+
totalSkills: 0,
|
|
605
|
+
evaluated: 0,
|
|
606
|
+
evolved: 0,
|
|
607
|
+
deployed: 0,
|
|
608
|
+
watched: 0,
|
|
609
|
+
skipped: 0,
|
|
610
|
+
dryRun: options.dryRun,
|
|
611
|
+
approvalMode: options.approvalMode,
|
|
612
|
+
elapsedMs: 0,
|
|
613
|
+
},
|
|
614
|
+
};
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
try {
|
|
618
|
+
const startTime = Date.now();
|
|
619
|
+
|
|
620
|
+
const _syncSources = deps.syncSources ?? syncSources;
|
|
621
|
+
const _computeStatus = deps.computeStatus ?? computeStatus;
|
|
622
|
+
const _detectAgent = deps.detectAgent ?? detectAgent;
|
|
623
|
+
const _doctor = deps.doctor ?? doctor;
|
|
624
|
+
const _readTelemetry =
|
|
625
|
+
deps.readTelemetry ?? (() => readJsonl<SessionTelemetryRecord>(TELEMETRY_LOG));
|
|
626
|
+
const _readSkillRecords = deps.readSkillRecords ?? readEffectiveSkillUsageRecords;
|
|
627
|
+
const _readQueryRecords = deps.readQueryRecords ?? (() => readJsonl<QueryLogRecord>(QUERY_LOG));
|
|
628
|
+
const _readAuditEntries =
|
|
629
|
+
deps.readAuditEntries ?? (() => readJsonl<EvolutionAuditEntry>(EVOLUTION_AUDIT_LOG));
|
|
630
|
+
const _resolveSkillPath = deps.resolveSkillPath ?? defaultResolveSkillPath;
|
|
631
|
+
const _readGradingResults = deps.readGradingResults ?? readGradingResultsForSkill;
|
|
632
|
+
|
|
633
|
+
// Lazy-load evolve and watch to avoid circular imports
|
|
634
|
+
const _evolve = deps.evolve ?? (await import("./evolution/evolve.js")).evolve;
|
|
635
|
+
const _watch = deps.watch ?? (await import("./monitoring/watch.js")).watch;
|
|
636
|
+
|
|
637
|
+
// -------------------------------------------------------------------------
|
|
638
|
+
// Step 1: Sync source-truth telemetry (mandatory)
|
|
639
|
+
// -------------------------------------------------------------------------
|
|
640
|
+
console.error("[orchestrate] Syncing source-truth telemetry...");
|
|
641
|
+
const syncResult = _syncSources(createDefaultSyncOptions({ force: options.syncForce }));
|
|
642
|
+
const sourceSynced = Object.values(syncResult.sources).reduce((sum, s) => sum + s.synced, 0);
|
|
643
|
+
console.error(
|
|
644
|
+
`[orchestrate] Sync complete: ${sourceSynced} sessions synced, ${syncResult.repair.repaired_records} repaired`,
|
|
645
|
+
);
|
|
646
|
+
|
|
647
|
+
// -------------------------------------------------------------------------
|
|
648
|
+
// Step 2: Compute status
|
|
649
|
+
// -------------------------------------------------------------------------
|
|
650
|
+
console.error("[orchestrate] Computing skill status...");
|
|
651
|
+
const telemetry = _readTelemetry();
|
|
652
|
+
const skillRecords = _readSkillRecords();
|
|
653
|
+
const queryRecords = _readQueryRecords();
|
|
654
|
+
const auditEntries = _readAuditEntries();
|
|
655
|
+
const doctorResult = _doctor();
|
|
656
|
+
|
|
657
|
+
const statusResult = _computeStatus(
|
|
658
|
+
telemetry,
|
|
659
|
+
skillRecords,
|
|
660
|
+
queryRecords,
|
|
661
|
+
auditEntries,
|
|
662
|
+
doctorResult,
|
|
663
|
+
);
|
|
664
|
+
console.error(
|
|
665
|
+
`[orchestrate] Status: ${statusResult.skills.length} skills, system=${statusResult.system.healthy ? "healthy" : "unhealthy"}`,
|
|
666
|
+
);
|
|
667
|
+
|
|
668
|
+
// -------------------------------------------------------------------------
|
|
669
|
+
// Step 2b: Read pending improvement signals
|
|
670
|
+
// -------------------------------------------------------------------------
|
|
671
|
+
const pendingSignals = readPendingSignals(deps.readSignals);
|
|
672
|
+
const signaledSkills = groupSignalsBySkill(pendingSignals);
|
|
673
|
+
if (signaledSkills.size > 0) {
|
|
674
|
+
console.error(
|
|
675
|
+
`[orchestrate] Improvement signals: ${pendingSignals.length} pending for ${signaledSkills.size} skill(s)`,
|
|
676
|
+
);
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
// -------------------------------------------------------------------------
|
|
680
|
+
// Step 3: Select candidates
|
|
681
|
+
// -------------------------------------------------------------------------
|
|
682
|
+
const candidates = selectCandidates(statusResult.skills, {
|
|
683
|
+
skillFilter: options.skillFilter,
|
|
684
|
+
maxSkills: options.maxSkills,
|
|
685
|
+
auditEntries,
|
|
686
|
+
signaledSkills,
|
|
687
|
+
});
|
|
688
|
+
|
|
689
|
+
const evolveCandidates = candidates.filter((c) => c.action === "evolve");
|
|
690
|
+
const skipCount = candidates.filter((c) => c.action === "skip").length;
|
|
691
|
+
console.error(
|
|
692
|
+
`[orchestrate] Candidates: ${evolveCandidates.length} to evolve, ${skipCount} skipped`,
|
|
693
|
+
);
|
|
694
|
+
|
|
695
|
+
// Log each decision
|
|
696
|
+
for (const c of candidates) {
|
|
697
|
+
console.error(` ${c.action === "skip" ? "⊘" : "→"} ${c.skill}: ${c.reason}`);
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
// -------------------------------------------------------------------------
|
|
701
|
+
// Step 4: Detect agent
|
|
702
|
+
// -------------------------------------------------------------------------
|
|
703
|
+
const agent = _detectAgent();
|
|
704
|
+
if (!agent && evolveCandidates.length > 0) {
|
|
705
|
+
console.error("[orchestrate] WARNING: No agent CLI found in PATH. Evolve will be skipped.");
|
|
706
|
+
for (const c of evolveCandidates) {
|
|
707
|
+
c.action = "skip";
|
|
708
|
+
c.reason = "no agent CLI available";
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
// -------------------------------------------------------------------------
|
|
713
|
+
// Step 5: Evolve candidates
|
|
714
|
+
// -------------------------------------------------------------------------
|
|
715
|
+
for (const candidate of evolveCandidates) {
|
|
716
|
+
// Skip if agent detection marked this candidate as skip
|
|
717
|
+
if (candidate.action === "skip") continue;
|
|
718
|
+
|
|
719
|
+
const skillPath = _resolveSkillPath(candidate.skill);
|
|
720
|
+
if (!skillPath) {
|
|
721
|
+
candidate.action = "skip";
|
|
722
|
+
candidate.reason = `SKILL.md not found for "${candidate.skill}"`;
|
|
723
|
+
console.error(` ⊘ ${candidate.skill}: ${candidate.reason}`);
|
|
724
|
+
continue;
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
const effectiveDryRun = options.dryRun || options.approvalMode === "review";
|
|
728
|
+
console.error(
|
|
729
|
+
`[orchestrate] Evolving "${candidate.skill}"${effectiveDryRun ? " (dry-run)" : ""}...`,
|
|
730
|
+
);
|
|
731
|
+
|
|
732
|
+
try {
|
|
733
|
+
const evolveResult = await _evolve({
|
|
734
|
+
skillName: candidate.skill,
|
|
735
|
+
skillPath,
|
|
736
|
+
agent: agent as string,
|
|
737
|
+
dryRun: effectiveDryRun,
|
|
738
|
+
confidenceThreshold: 0.6,
|
|
739
|
+
maxIterations: 3,
|
|
740
|
+
gradingResults: _readGradingResults(candidate.skill),
|
|
741
|
+
syncFirst: false, // We already synced
|
|
742
|
+
});
|
|
743
|
+
|
|
744
|
+
candidate.evolveResult = evolveResult;
|
|
745
|
+
|
|
746
|
+
if (evolveResult.deployed) {
|
|
747
|
+
console.error(` ✓ ${candidate.skill}: deployed (${evolveResult.reason})`);
|
|
748
|
+
} else {
|
|
749
|
+
console.error(` ✗ ${candidate.skill}: not deployed (${evolveResult.reason})`);
|
|
750
|
+
}
|
|
751
|
+
} catch (err) {
|
|
752
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
753
|
+
candidate.action = "skip";
|
|
754
|
+
candidate.reason = `evolve error: ${msg}`;
|
|
755
|
+
console.error(` ✗ ${candidate.skill}: error — ${msg}`);
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
// -------------------------------------------------------------------------
|
|
760
|
+
// Step 6: Watch recently evolved skills
|
|
761
|
+
// -------------------------------------------------------------------------
|
|
762
|
+
// Re-read audit entries to capture any newly-deployed entries from the evolve loop above.
|
|
763
|
+
// evolve() writes audit entries synchronously, so a fresh read is needed.
|
|
764
|
+
const freshAuditEntries = _readAuditEntries();
|
|
765
|
+
const recentlyEvolved = findRecentlyDeployedSkills(
|
|
766
|
+
freshAuditEntries,
|
|
767
|
+
options.recentWindowHours,
|
|
768
|
+
);
|
|
769
|
+
|
|
770
|
+
// O(1) lookup for skills already processed as evolve candidates
|
|
771
|
+
const evolvedSkillNames = new Set(
|
|
772
|
+
candidates.filter((c) => c.action === "evolve").map((c) => c.skill),
|
|
773
|
+
);
|
|
774
|
+
|
|
775
|
+
for (const skillName of recentlyEvolved) {
|
|
776
|
+
// Skip if already processed in this run as evolve candidate
|
|
777
|
+
if (evolvedSkillNames.has(skillName)) {
|
|
778
|
+
continue;
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
// Apply skill filter
|
|
782
|
+
if (options.skillFilter && skillName !== options.skillFilter) continue;
|
|
783
|
+
|
|
784
|
+
const skillPath = _resolveSkillPath(skillName);
|
|
785
|
+
if (!skillPath) continue;
|
|
786
|
+
|
|
787
|
+
console.error(`[orchestrate] Watching "${skillName}" (recently evolved)...`);
|
|
788
|
+
|
|
789
|
+
try {
|
|
790
|
+
const watchResult = await _watch({
|
|
791
|
+
skillName,
|
|
792
|
+
skillPath,
|
|
793
|
+
windowSessions: 20,
|
|
794
|
+
regressionThreshold: 0.1,
|
|
795
|
+
autoRollback: true,
|
|
796
|
+
syncFirst: false,
|
|
797
|
+
});
|
|
798
|
+
|
|
799
|
+
candidates.push({
|
|
800
|
+
skill: skillName,
|
|
801
|
+
action: "watch",
|
|
802
|
+
reason: watchResult.alert ?? "stable",
|
|
803
|
+
watchResult,
|
|
804
|
+
});
|
|
805
|
+
|
|
806
|
+
console.error(
|
|
807
|
+
` ${watchResult.alert ? "⚠" : "✓"} ${skillName}: ${watchResult.recommendation}`,
|
|
808
|
+
);
|
|
809
|
+
} catch (err) {
|
|
810
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
811
|
+
console.error(` ✗ ${skillName}: watch error — ${msg}`);
|
|
812
|
+
}
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
// -------------------------------------------------------------------------
|
|
816
|
+
// Step 7: Build summary (single source of truth for both CLI and dashboard)
|
|
817
|
+
// -------------------------------------------------------------------------
|
|
818
|
+
const finalTotals = {
|
|
819
|
+
totalSkills: statusResult.skills.length,
|
|
820
|
+
evaluated: candidates.filter((c) => c.action === "evolve").length,
|
|
821
|
+
evolved: candidates.filter((c) => c.action === "evolve" && c.evolveResult !== undefined)
|
|
822
|
+
.length,
|
|
823
|
+
deployed: candidates.filter((c) => c.evolveResult?.deployed).length,
|
|
824
|
+
watched: candidates.filter((c) => c.action === "watch").length,
|
|
825
|
+
skipped: candidates.filter((c) => c.action === "skip").length,
|
|
826
|
+
};
|
|
827
|
+
|
|
828
|
+
const result: OrchestrateResult = {
|
|
829
|
+
syncResult,
|
|
830
|
+
statusResult,
|
|
831
|
+
candidates,
|
|
832
|
+
summary: {
|
|
833
|
+
...finalTotals,
|
|
834
|
+
dryRun: options.dryRun,
|
|
835
|
+
approvalMode: options.approvalMode,
|
|
836
|
+
elapsedMs: Date.now() - startTime,
|
|
837
|
+
},
|
|
838
|
+
};
|
|
839
|
+
|
|
840
|
+
// -------------------------------------------------------------------------
|
|
841
|
+
// Step 7b: Mark consumed signals
|
|
842
|
+
// -------------------------------------------------------------------------
|
|
843
|
+
const runId = `run_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
|
|
844
|
+
if (pendingSignals.length > 0) {
|
|
845
|
+
markSignalsConsumed(pendingSignals, runId);
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
// -------------------------------------------------------------------------
|
|
849
|
+
// Step 8: Persist run report
|
|
850
|
+
// -------------------------------------------------------------------------
|
|
851
|
+
const runReport: OrchestrateRunReport = {
|
|
852
|
+
run_id: runId,
|
|
853
|
+
timestamp: new Date().toISOString(),
|
|
854
|
+
elapsed_ms: result.summary.elapsedMs,
|
|
855
|
+
dry_run: result.summary.dryRun,
|
|
856
|
+
approval_mode: result.summary.approvalMode,
|
|
857
|
+
total_skills: finalTotals.totalSkills,
|
|
858
|
+
evaluated: finalTotals.evaluated,
|
|
859
|
+
evolved: finalTotals.evolved,
|
|
860
|
+
deployed: finalTotals.deployed,
|
|
861
|
+
watched: finalTotals.watched,
|
|
862
|
+
skipped: finalTotals.skipped,
|
|
863
|
+
skill_actions: candidates.map(
|
|
864
|
+
(c): OrchestrateRunSkillAction => ({
|
|
865
|
+
skill: c.skill,
|
|
866
|
+
action: c.action,
|
|
867
|
+
reason: c.reason,
|
|
868
|
+
deployed: c.evolveResult?.deployed,
|
|
869
|
+
rolledBack: c.watchResult?.rolledBack,
|
|
870
|
+
alert: c.watchResult?.alert,
|
|
871
|
+
elapsed_ms: c.evolveResult?.elapsedMs,
|
|
872
|
+
llm_calls: c.evolveResult?.llmCallCount,
|
|
873
|
+
}),
|
|
874
|
+
),
|
|
875
|
+
};
|
|
876
|
+
|
|
877
|
+
try {
|
|
878
|
+
appendJsonl(ORCHESTRATE_RUN_LOG, runReport);
|
|
879
|
+
} catch (err) {
|
|
880
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
881
|
+
console.error(`[orchestrate] Warning: failed to persist run report: ${message}`);
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
return result;
|
|
885
|
+
} finally {
|
|
886
|
+
releaseLock();
|
|
887
|
+
}
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
// ---------------------------------------------------------------------------
|
|
891
|
+
// CLI entry point
|
|
892
|
+
// ---------------------------------------------------------------------------
|
|
893
|
+
|
|
894
|
+
export async function cliMain(): Promise<void> {
|
|
895
|
+
const { values } = parseArgs({
|
|
896
|
+
options: {
|
|
897
|
+
"dry-run": { type: "boolean", default: false },
|
|
898
|
+
"review-required": { type: "boolean", default: false },
|
|
899
|
+
"auto-approve": { type: "boolean", default: false },
|
|
900
|
+
skill: { type: "string" },
|
|
901
|
+
"max-skills": { type: "string", default: "5" },
|
|
902
|
+
"recent-window": { type: "string", default: "48" },
|
|
903
|
+
"sync-force": { type: "boolean", default: false },
|
|
904
|
+
loop: { type: "boolean", default: false },
|
|
905
|
+
"loop-interval": { type: "string", default: "3600" },
|
|
906
|
+
help: { type: "boolean", short: "h", default: false },
|
|
907
|
+
},
|
|
908
|
+
strict: true,
|
|
909
|
+
});
|
|
910
|
+
|
|
911
|
+
if (values.help) {
|
|
912
|
+
console.log(`selftune orchestrate — Autonomous core loop
|
|
913
|
+
|
|
914
|
+
Runs the full improvement cycle: sync → status → evolve → watch.
|
|
915
|
+
|
|
916
|
+
Usage:
|
|
917
|
+
selftune orchestrate [options]
|
|
918
|
+
|
|
919
|
+
Options:
|
|
920
|
+
--dry-run Preview actions without mutations
|
|
921
|
+
--review-required Validate candidates but require human review before deploy
|
|
922
|
+
--auto-approve Deprecated alias; autonomous mode is now the default
|
|
923
|
+
--skill <name> Scope to a single skill
|
|
924
|
+
--max-skills <n> Cap skills processed per run (default: 5)
|
|
925
|
+
--recent-window <hrs> Hours to look back for watch targets (default: 48)
|
|
926
|
+
--sync-force Force full rescan during sync
|
|
927
|
+
--loop Run in continuous loop mode (never stops)
|
|
928
|
+
--loop-interval <s> Seconds between iterations (default: 3600, min: 60)
|
|
929
|
+
-h, --help Show this help message
|
|
930
|
+
|
|
931
|
+
Safety:
|
|
932
|
+
By default, low-risk description evolution runs autonomously after
|
|
933
|
+
validation. Use --review-required to keep a human in the loop, or
|
|
934
|
+
--dry-run to preview the whole loop without mutations. Every deploy
|
|
935
|
+
still passes validation gates first.
|
|
936
|
+
|
|
937
|
+
Examples:
|
|
938
|
+
selftune orchestrate # autonomous description evolution
|
|
939
|
+
selftune orchestrate --review-required # validate but do not deploy
|
|
940
|
+
selftune orchestrate --dry-run # preview only
|
|
941
|
+
selftune orchestrate --skill Research # single skill
|
|
942
|
+
selftune orchestrate --max-skills 3 # limit scope
|
|
943
|
+
selftune orchestrate --loop # continuous loop (hourly)
|
|
944
|
+
selftune orchestrate --loop --loop-interval 600 # every 10 minutes`);
|
|
945
|
+
process.exit(0);
|
|
946
|
+
}
|
|
947
|
+
|
|
948
|
+
const maxSkills = Number.parseInt(values["max-skills"] ?? "5", 10);
|
|
949
|
+
if (Number.isNaN(maxSkills) || maxSkills < 1) {
|
|
950
|
+
console.error("[ERROR] --max-skills must be a positive integer");
|
|
951
|
+
process.exit(1);
|
|
952
|
+
}
|
|
953
|
+
|
|
954
|
+
const recentWindow = Number.parseInt(values["recent-window"] ?? "48", 10);
|
|
955
|
+
if (Number.isNaN(recentWindow) || recentWindow < 1) {
|
|
956
|
+
console.error("[ERROR] --recent-window must be a positive integer");
|
|
957
|
+
process.exit(1);
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
const loopInterval = Number.parseInt(values["loop-interval"] ?? "3600", 10);
|
|
961
|
+
if (values.loop && (Number.isNaN(loopInterval) || loopInterval < 60)) {
|
|
962
|
+
console.error("[ERROR] --loop-interval must be an integer >= 60 (seconds)");
|
|
963
|
+
process.exit(1);
|
|
964
|
+
}
|
|
965
|
+
|
|
966
|
+
const autoApprove = values["auto-approve"] ?? false;
|
|
967
|
+
if (autoApprove) {
|
|
968
|
+
console.error(
|
|
969
|
+
"[orchestrate] --auto-approve is deprecated; autonomous mode is now the default.",
|
|
970
|
+
);
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
const reviewRequired = values["review-required"] ?? false;
|
|
974
|
+
const dryRun = values["dry-run"] ?? false;
|
|
975
|
+
const approvalMode: "auto" | "review" = reviewRequired ? "review" : "auto";
|
|
976
|
+
|
|
977
|
+
const isLoop = values.loop ?? false;
|
|
978
|
+
let stopRequested = false;
|
|
979
|
+
let sleepTimer: ReturnType<typeof setTimeout> | null = null;
|
|
980
|
+
let sleepResolve: (() => void) | null = null;
|
|
981
|
+
|
|
982
|
+
if (isLoop) {
|
|
983
|
+
const requestStop = () => {
|
|
984
|
+
stopRequested = true;
|
|
985
|
+
if (sleepTimer) {
|
|
986
|
+
clearTimeout(sleepTimer);
|
|
987
|
+
sleepTimer = null;
|
|
988
|
+
}
|
|
989
|
+
if (sleepResolve) {
|
|
990
|
+
sleepResolve();
|
|
991
|
+
sleepResolve = null;
|
|
992
|
+
}
|
|
993
|
+
console.error("\n[orchestrate] Loop interrupted. Finishing current cycle...");
|
|
994
|
+
};
|
|
995
|
+
process.on("SIGINT", requestStop);
|
|
996
|
+
process.on("SIGTERM", requestStop);
|
|
997
|
+
}
|
|
998
|
+
|
|
999
|
+
let iteration = 0;
|
|
1000
|
+
do {
|
|
1001
|
+
iteration++;
|
|
1002
|
+
if (isLoop && iteration > 1) {
|
|
1003
|
+
console.error(`\n[orchestrate] === Loop iteration ${iteration} ===`);
|
|
1004
|
+
}
|
|
1005
|
+
|
|
1006
|
+
const result = await orchestrate({
|
|
1007
|
+
dryRun,
|
|
1008
|
+
approvalMode,
|
|
1009
|
+
skillFilter: values.skill,
|
|
1010
|
+
maxSkills,
|
|
1011
|
+
recentWindowHours: recentWindow,
|
|
1012
|
+
syncForce: values["sync-force"] ?? false,
|
|
1013
|
+
});
|
|
1014
|
+
|
|
1015
|
+
// JSON output: include per-skill decisions for machine consumption
|
|
1016
|
+
const jsonOutput = {
|
|
1017
|
+
...result.summary,
|
|
1018
|
+
decisions: result.candidates.map((c) => ({
|
|
1019
|
+
skill: c.skill,
|
|
1020
|
+
action: c.action,
|
|
1021
|
+
reason: c.reason,
|
|
1022
|
+
...(c.evolveResult
|
|
1023
|
+
? {
|
|
1024
|
+
deployed: c.evolveResult.deployed,
|
|
1025
|
+
evolveReason: c.evolveResult.reason,
|
|
1026
|
+
validation: c.evolveResult.validation
|
|
1027
|
+
? {
|
|
1028
|
+
before: c.evolveResult.validation.before_pass_rate,
|
|
1029
|
+
after: c.evolveResult.validation.after_pass_rate,
|
|
1030
|
+
improved: c.evolveResult.validation.improved,
|
|
1031
|
+
}
|
|
1032
|
+
: null,
|
|
1033
|
+
}
|
|
1034
|
+
: {}),
|
|
1035
|
+
...(c.watchResult
|
|
1036
|
+
? {
|
|
1037
|
+
alert: c.watchResult.alert,
|
|
1038
|
+
rolledBack: c.watchResult.rolledBack,
|
|
1039
|
+
passRate: c.watchResult.snapshot?.pass_rate ?? null,
|
|
1040
|
+
recommendation: c.watchResult.recommendation,
|
|
1041
|
+
}
|
|
1042
|
+
: {}),
|
|
1043
|
+
})),
|
|
1044
|
+
};
|
|
1045
|
+
console.log(JSON.stringify(jsonOutput, null, 2));
|
|
1046
|
+
|
|
1047
|
+
// Print human-readable decision report to stderr
|
|
1048
|
+
console.error(`\n${formatOrchestrateReport(result)}`);
|
|
1049
|
+
|
|
1050
|
+
if (!isLoop || stopRequested) break;
|
|
1051
|
+
|
|
1052
|
+
const nextMinutes = Math.round(loopInterval / 60);
|
|
1053
|
+
console.error(`\n[orchestrate] Next cycle in ${nextMinutes} minute(s)... (Ctrl+C to stop)`);
|
|
1054
|
+
await new Promise<void>((resolve) => {
|
|
1055
|
+
sleepResolve = resolve;
|
|
1056
|
+
sleepTimer = setTimeout(() => {
|
|
1057
|
+
sleepTimer = null;
|
|
1058
|
+
sleepResolve = null;
|
|
1059
|
+
resolve();
|
|
1060
|
+
}, loopInterval * 1000);
|
|
1061
|
+
});
|
|
1062
|
+
} while (isLoop && !stopRequested);
|
|
1063
|
+
|
|
1064
|
+
process.exit(0);
|
|
1065
|
+
}
|
|
1066
|
+
|
|
1067
|
+
if (import.meta.main) {
|
|
1068
|
+
cliMain().catch((err) => {
|
|
1069
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
1070
|
+
console.error(`[FATAL] ${message}`);
|
|
1071
|
+
process.exit(1);
|
|
1072
|
+
});
|
|
1073
|
+
}
|