closed-loop-cli 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of closed-loop-cli might be problematic. Click here for more details.
- package/CLAUDE.md +17 -0
- package/Learnings.md +73 -0
- package/dist/index.js +56 -126
- package/package.json +1 -7
- package/src/index.ts +356 -425
- package/tsconfig.json +16 -0
- package/dist/orchestrator/autogenesis.js +0 -973
- package/dist/orchestrator/dgm-archive.js +0 -223
- package/dist/orchestrator/fitness-evaluator.js +0 -99
- package/dist/orchestrator/mutation-strategies.js +0 -174
- package/dist/orchestrator/prompt-benchmark.js +0 -102
- package/dist/orchestrator/prompt-optimizer.js +0 -169
- package/dist/orchestrator/refactor-scanner.js +0 -222
- package/src/orchestrator/autogenesis.ts +0 -1078
- package/src/orchestrator/dgm-archive.ts +0 -257
- package/src/orchestrator/fitness-evaluator.ts +0 -154
- package/src/orchestrator/mutation-strategies.ts +0 -214
|
@@ -1,1078 +0,0 @@
|
|
|
1
|
-
import * as fs from 'fs';
|
|
2
|
-
import * as path from 'path';
|
|
3
|
-
import { spawn } from 'child_process';
|
|
4
|
-
import { runCommand, CommandResult } from '../tools/shell-tools';
|
|
5
|
-
import { runSelfImprovingTask } from './meta-agent';
|
|
6
|
-
import { Spinner } from '../tools/tui-tools';
|
|
7
|
-
import { generateRepoMap } from '../tools/repo-map';
|
|
8
|
-
import Anthropic from '@anthropic-ai/sdk';
|
|
9
|
-
import * as dotenv from 'dotenv';
|
|
10
|
-
import { runTaskAgent } from './task-agent';
|
|
11
|
-
import { updateEvolutionState, getEvolutionState, appendHistory } from './state-manager';
|
|
12
|
-
import { ResearchManager } from './research-manager';
|
|
13
|
-
import { DGMArchive } from './dgm-archive';
|
|
14
|
-
import { evaluateFitness, compareToBaseline } from './fitness-evaluator';
|
|
15
|
-
import { selectMutationStrategy, generateMutationPrompt } from './mutation-strategies';
|
|
16
|
-
|
|
17
|
-
dotenv.config();
|
|
18
|
-
|
|
19
|
-
const apiKey = process.env.ANTHROPIC_API_KEY || process.env.ANTHROPIC_AUTH_TOKEN || '';
|
|
20
|
-
const baseURL = process.env.ANTHROPIC_BASE_URL || undefined;
|
|
21
|
-
const defaultModel = process.env.ANTHROPIC_MODEL || 'mimo-v2.5-pro[1m]';
|
|
22
|
-
|
|
23
|
-
// RSPL (Resource Substrate Protocol Layer) Types
|
|
24
|
-
export interface PromptResource {
|
|
25
|
-
id: string;
|
|
26
|
-
template: string;
|
|
27
|
-
version: string;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
export interface ToolResource {
|
|
31
|
-
name: string;
|
|
32
|
-
description: string;
|
|
33
|
-
version: string;
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
export interface AgentResource {
|
|
37
|
-
id: string;
|
|
38
|
-
role: string;
|
|
39
|
-
model: string;
|
|
40
|
-
version: string;
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
export interface RSPLSubstrate {
|
|
44
|
-
prompts: Map<string, PromptResource>;
|
|
45
|
-
tools: Map<string, ToolResource>;
|
|
46
|
-
agents: Map<string, AgentResource>;
|
|
47
|
-
workspaceVersion: string;
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
// SEPL (Self-Evolution Protocol Layer) Engine
|
|
51
|
-
export class AutogenesisEngine {
|
|
52
|
-
private substrate: RSPLSubstrate;
|
|
53
|
-
private effort: 'standard' | 'ultracode' = 'standard';
|
|
54
|
-
public codeactMode = false;
|
|
55
|
-
|
|
56
|
-
constructor() {
|
|
57
|
-
this.substrate = {
|
|
58
|
-
prompts: new Map(),
|
|
59
|
-
tools: new Map(),
|
|
60
|
-
agents: new Map(),
|
|
61
|
-
workspaceVersion: '1.0.0'
|
|
62
|
-
};
|
|
63
|
-
const envEffort = process.env.CLAUDE_CODE_EFFORT_LEVEL || 'standard';
|
|
64
|
-
this.effort = (envEffort === 'max' || envEffort === 'ultracode') ? 'ultracode' : 'standard';
|
|
65
|
-
this.initializeSubstrates();
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
setEffort(effort: 'standard' | 'ultracode') {
|
|
69
|
-
this.effort = effort;
|
|
70
|
-
console.log(`\x1b[36m[AutogenesisEngine]\x1b[0m Effort level set to: \x1b[32m${this.effort.toUpperCase()}\x1b[0m`);
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
private initializeSubstrates() {
|
|
74
|
-
// Register baseline substrates
|
|
75
|
-
this.substrate.prompts.set('system', {
|
|
76
|
-
id: 'system',
|
|
77
|
-
template: 'Standard coding agent prompt...',
|
|
78
|
-
version: '1.0.0'
|
|
79
|
-
});
|
|
80
|
-
this.substrate.tools.set('readFile', { name: 'readFile', description: 'Reads files', version: '1.0.0' });
|
|
81
|
-
this.substrate.tools.set('writeFile', { name: 'writeFile', description: 'Writes files', version: '1.0.0' });
|
|
82
|
-
this.substrate.tools.set('editFile', { name: 'editFile', description: 'Edits files', version: '1.0.0' });
|
|
83
|
-
this.substrate.tools.set('runCommand', { name: 'runCommand', description: 'Runs terminal commands', version: '1.0.0' });
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
/**
|
|
87
|
-
* SEPL Phase 1: Reflect (R)
|
|
88
|
-
* Analyzes current workspace compile status and tests logs to identify improvement areas.
|
|
89
|
-
*/
|
|
90
|
-
async reflect(): Promise<string> {
|
|
91
|
-
const spinner = new Spinner('AGP - Reflect: Checking codebase compile & test status...');
|
|
92
|
-
spinner.start();
|
|
93
|
-
|
|
94
|
-
// Check if git is initialized and clean
|
|
95
|
-
const status = await runCommand('git status --porcelain');
|
|
96
|
-
|
|
97
|
-
// Run tests as a diagnostic baseline
|
|
98
|
-
const testResult = await runCommand('npm test');
|
|
99
|
-
|
|
100
|
-
if (testResult.exitCode !== 0) {
|
|
101
|
-
spinner.stop(false, 'AGP - Reflect: Diagnostic checks failed. Core compilation or tests are currently FAILING.');
|
|
102
|
-
return `Reflect Analysis: Core compilation or test suite is currently FAILING.\nLogs:\n${testResult.stderr || testResult.stdout}`;
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
spinner.stop(true, 'AGP - Reflect: Codebase compiles and tests pass. Workspace is stable.');
|
|
106
|
-
return 'Reflect Analysis: Core compilation and test suite are passing. Workspace is stable.';
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
/**
|
|
110
|
-
* SEPL Phase 2: Select (S)
|
|
111
|
-
* Formulates a proposal of what should be optimized (prompts, tools, or code).
|
|
112
|
-
*/
|
|
113
|
-
select(analysis: string, targetTask: string): string {
|
|
114
|
-
console.log('\x1b[32m✔\x1b[0m AGP - Select: Evolutionary path formulated.');
|
|
115
|
-
return `Proposal: Mutate the codebase to implement the target task: "${targetTask}". Analysis confirms workspace status: ${analysis.substring(0, 100)}...`;
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
/**
|
|
119
|
-
* SEPL Phase 3: Improve (I)
|
|
120
|
-
* Executes the modification proposal by running the Meta-Agent.
|
|
121
|
-
*/
|
|
122
|
-
async improve(proposal: string, targetTask: string): Promise<string> {
|
|
123
|
-
console.log('\x1b[36m[AGP - Improve]\x1b[0m Triggering Self-Modification Loop...');
|
|
124
|
-
|
|
125
|
-
// Delegate actual code mutation to the Meta-Agent
|
|
126
|
-
return await runSelfImprovingTask(targetTask, this.effort, this.codeactMode);
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
/**
|
|
130
|
-
* SEPL Phase 4: Evaluate (E)
|
|
131
|
-
* Evaluates the mutated candidate codebase against compilation, tests, and SAHOO.
|
|
132
|
-
* Note: The Meta-Agent's internally integrated SAHOO Gateway handles file-specific checks,
|
|
133
|
-
* while AGP evaluates overall workspace stability here.
|
|
134
|
-
*/
|
|
135
|
-
async evaluate(): Promise<{ success: boolean; message: string }> {
|
|
136
|
-
const result = await this.verifyBuildAndTest();
|
|
137
|
-
return {
|
|
138
|
-
success: result.success,
|
|
139
|
-
message: result.success
|
|
140
|
-
? 'Evaluate Passed: Codebase compiles and passes all unit tests.'
|
|
141
|
-
: `Evaluate Failed: ${result.message}`
|
|
142
|
-
};
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
/**
|
|
146
|
-
* SEPL Phase 5: Commit (C)
|
|
147
|
-
* Commits the mutation to Git or executes rollback if evaluation fails.
|
|
148
|
-
*/
|
|
149
|
-
async commit(evaluationSuccess: boolean, rollbackMessage?: string, targetTask?: string): Promise<void> {
|
|
150
|
-
if (evaluationSuccess) {
|
|
151
|
-
const spinner = new Spinner('AGP - Commit: Evolution successful. Committing snapshot...');
|
|
152
|
-
spinner.start();
|
|
153
|
-
|
|
154
|
-
// Extract and save reusable skill playbook
|
|
155
|
-
if (targetTask) {
|
|
156
|
-
try {
|
|
157
|
-
const diffRes = await this.gitGetDiff();
|
|
158
|
-
if (diffRes.exitCode === 0 && diffRes.stdout.trim()) {
|
|
159
|
-
spinner.stop(true, 'AGP - Commit: Code healthy. Extracting reusable skill from changes...');
|
|
160
|
-
const { extractAndSaveSkill } = require('../tools/skills');
|
|
161
|
-
await extractAndSaveSkill(targetTask, diffRes.stdout);
|
|
162
|
-
}
|
|
163
|
-
} catch (e) {
|
|
164
|
-
console.error('[Skill Registry] Failed to register skill:', e);
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
spinner.stop(true);
|
|
169
|
-
const commitRes = await this.commitAll();
|
|
170
|
-
|
|
171
|
-
if (commitRes.exitCode === 0 && targetTask) {
|
|
172
|
-
const revRes = await runCommand('git rev-parse HEAD');
|
|
173
|
-
if (revRes.exitCode === 0) {
|
|
174
|
-
const hash = revRes.stdout.trim();
|
|
175
|
-
const currentRefactorResult = getEvolutionState().lastRefactorResult;
|
|
176
|
-
const { appendArchive } = require('./state-manager');
|
|
177
|
-
appendArchive({
|
|
178
|
-
commitHash: hash,
|
|
179
|
-
task: targetTask,
|
|
180
|
-
cycle: getEvolutionState().currentCycle,
|
|
181
|
-
metrics: currentRefactorResult?.metrics
|
|
182
|
-
});
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
} else {
|
|
186
|
-
const spinner = new Spinner('AGP - Rollback: Verification failed. Reverting mutations...');
|
|
187
|
-
spinner.start();
|
|
188
|
-
await this.gitResetClean();
|
|
189
|
-
spinner.stop(false, `AGP - Rollback: Codebase reverted to prior HEAD checkpoint.`);
|
|
190
|
-
console.log(`\x1b[33mFailure Reason: ${rollbackMessage}\x1b[0m`);
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
/**
|
|
195
|
-
* Helper to calculate prompt utility score: higher is better.
|
|
196
|
-
*/
|
|
197
|
-
public calculatePromptScore(timeSeconds: number, totalTokens: number): number {
|
|
198
|
-
const timeWeight = 0.4;
|
|
199
|
-
const tokenWeight = 0.6;
|
|
200
|
-
if (totalTokens === 0 || timeSeconds === 0) return 0;
|
|
201
|
-
return 100000 / (timeSeconds * timeWeight + totalTokens * tokenWeight);
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
/**
|
|
205
|
-
* Atomic workspace rollback. Resets HEAD and cleans untracked files.
|
|
206
|
-
*/
|
|
207
|
-
private async gitResetClean(): Promise<void> {
|
|
208
|
-
await runCommand('git reset --hard HEAD');
|
|
209
|
-
await runCommand('git clean -fd');
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
/**
|
|
213
|
-
* Runs npm run build and npm test with a managed spinner.
|
|
214
|
-
* Returns success status and a descriptive message.
|
|
215
|
-
*/
|
|
216
|
-
private async verifyBuildAndTest(): Promise<{ success: boolean; message: string }> {
|
|
217
|
-
const spinner = new Spinner('Verifying: Running build & test suites...');
|
|
218
|
-
spinner.start();
|
|
219
|
-
const compile = await runCommand('npm run build');
|
|
220
|
-
if (compile.exitCode !== 0) {
|
|
221
|
-
spinner.stop(false, 'Verification failed (Compilation error).');
|
|
222
|
-
return { success: false, message: `Compilation error:\n${compile.stdout}\n${compile.stderr}` };
|
|
223
|
-
}
|
|
224
|
-
const test = await runCommand('npm test');
|
|
225
|
-
if (test.exitCode !== 0) {
|
|
226
|
-
spinner.stop(false, 'Verification failed (Tests failed).');
|
|
227
|
-
return { success: false, message: `Test suite failed:\n${test.stdout}\n${test.stderr}` };
|
|
228
|
-
}
|
|
229
|
-
spinner.stop(true, 'Verification passed: Build & tests succeeded.');
|
|
230
|
-
return { success: true, message: 'Build & tests succeeded.' };
|
|
231
|
-
}
|
|
232
|
-
|
|
233
|
-
/**
|
|
234
|
-
* Stages all changes and commits with the standard auto-evolution message.
|
|
235
|
-
*/
|
|
236
|
-
private async commitAll(): Promise<CommandResult> {
|
|
237
|
-
const spinner = new Spinner('Committing: Staging all changes and committing...');
|
|
238
|
-
spinner.start();
|
|
239
|
-
const addRes = await runCommand('git add -A');
|
|
240
|
-
if (addRes.exitCode !== 0) {
|
|
241
|
-
spinner.stop(false, 'Commit failed: git add -A failed.');
|
|
242
|
-
return addRes;
|
|
243
|
-
}
|
|
244
|
-
const commitRes = await runCommand('git commit -m "Auto-evolution step: code modification successfully passed SAHOO & AGP gates."');
|
|
245
|
-
if (commitRes.exitCode !== 0) {
|
|
246
|
-
spinner.stop(false, 'Commit failed: git commit failed.');
|
|
247
|
-
} else {
|
|
248
|
-
spinner.stop(true, 'Evolution transaction committed successfully.');
|
|
249
|
-
}
|
|
250
|
-
return commitRes;
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
/**
|
|
254
|
-
* Thin wrapper around git diff for readability.
|
|
255
|
-
*/
|
|
256
|
-
private async gitGetDiff(): Promise<CommandResult> {
|
|
257
|
-
return await runCommand('git diff');
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
/**
|
|
261
|
-
* Thin wrapper for targeted git add <path>.
|
|
262
|
-
*/
|
|
263
|
-
private async gitAdd(target: string): Promise<CommandResult> {
|
|
264
|
-
return await runCommand(`git add ${target}`);
|
|
265
|
-
}
|
|
266
|
-
|
|
267
|
-
/**
|
|
268
|
-
* Thin wrapper for git commit with a custom message.
|
|
269
|
-
*/
|
|
270
|
-
private async gitCommit(message: string): Promise<CommandResult> {
|
|
271
|
-
return await runCommand(`git commit -m "${message}"`);
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
/**
|
|
275
|
-
* Benchmarks a system prompt on a standard task.
|
|
276
|
-
* Returns token usage, execution time, and compilation success.
|
|
277
|
-
*/
|
|
278
|
-
private async benchmarkPrompt(systemPrompt: string): Promise<{ success: boolean; time: number; tokens: number; score: number }> {
|
|
279
|
-
const benchmarkTask = "Create a typescript helper file src/tools/math-helper.ts that exports a sum(a: number, b: number) function. Ensure it compiles. Do not change any other files.";
|
|
280
|
-
|
|
281
|
-
// Clear any previous math-helper.ts file
|
|
282
|
-
const helperPath = path.join(process.cwd(), 'src/tools/math-helper.ts');
|
|
283
|
-
if (fs.existsSync(helperPath)) {
|
|
284
|
-
fs.unlinkSync(helperPath);
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
// Reset workspace to clean baseline
|
|
288
|
-
await this.gitResetClean();
|
|
289
|
-
|
|
290
|
-
const startTime = Date.now();
|
|
291
|
-
let success = false;
|
|
292
|
-
let inputTokens = 0;
|
|
293
|
-
let outputTokens = 0;
|
|
294
|
-
let timeSeconds = 0;
|
|
295
|
-
|
|
296
|
-
try {
|
|
297
|
-
// Delegate to Task Agent directly with the specified system prompt override
|
|
298
|
-
const report = await runTaskAgent(benchmarkTask, { systemPrompt });
|
|
299
|
-
timeSeconds = report.timeSeconds;
|
|
300
|
-
inputTokens = report.inputTokens;
|
|
301
|
-
outputTokens = report.outputTokens;
|
|
302
|
-
|
|
303
|
-
// Verify compile
|
|
304
|
-
const verification = await this.verifyBuildAndTest();
|
|
305
|
-
if (verification.success) {
|
|
306
|
-
success = true;
|
|
307
|
-
}
|
|
308
|
-
} catch (err) {
|
|
309
|
-
// Failed to execute task or compile
|
|
310
|
-
} finally {
|
|
311
|
-
// Clean up changes
|
|
312
|
-
if (fs.existsSync(helperPath)) {
|
|
313
|
-
fs.unlinkSync(helperPath);
|
|
314
|
-
}
|
|
315
|
-
await this.gitResetClean();
|
|
316
|
-
}
|
|
317
|
-
|
|
318
|
-
if (!timeSeconds) {
|
|
319
|
-
timeSeconds = (Date.now() - startTime) / 1000;
|
|
320
|
-
}
|
|
321
|
-
const totalTokens = inputTokens + outputTokens;
|
|
322
|
-
const score = success ? this.calculatePromptScore(timeSeconds, totalTokens) : 0;
|
|
323
|
-
|
|
324
|
-
return { success, time: timeSeconds, tokens: totalTokens, score };
|
|
325
|
-
}
|
|
326
|
-
|
|
327
|
-
/**
|
|
328
|
-
* Runs the prompt optimization and benchmarking pipeline.
|
|
329
|
-
*/
|
|
330
|
-
async runPromptOptimization(): Promise<void> {
|
|
331
|
-
const workspaceRoot = process.cwd();
|
|
332
|
-
const promptPath = path.join(workspaceRoot, 'src/orchestrator/system-prompt.txt');
|
|
333
|
-
|
|
334
|
-
let baselinePrompt = '';
|
|
335
|
-
if (fs.existsSync(promptPath)) {
|
|
336
|
-
baselinePrompt = fs.readFileSync(promptPath, 'utf-8');
|
|
337
|
-
} else {
|
|
338
|
-
baselinePrompt = `You are an elite coding agent designed to operate inside a codebase workspace.
|
|
339
|
-
Your primary objective is to fulfill the user's coding tasks accurately and cleanly.
|
|
340
|
-
|
|
341
|
-
You have access to a set of local tools to read, write, edit files and execute terminal commands.
|
|
342
|
-
Always prefer editing precise parts of files using editFile instead of overwriting the whole file with writeFile unless it is a new file.
|
|
343
|
-
When running commands, verify compilation and test outcomes. If a test fails, you must attempt to fix the issues (Self-Healing).
|
|
344
|
-
|
|
345
|
-
Be concise and professional. Formulate plans before making changes.`;
|
|
346
|
-
}
|
|
347
|
-
|
|
348
|
-
console.log(`\n\x1b[35m=== STEP 1: BENCHMARKING BASELINE SYSTEM PROMPT ===\x1b[0m`);
|
|
349
|
-
const baselineReport = await this.benchmarkPrompt(baselinePrompt);
|
|
350
|
-
console.log(`Baseline Prompt Benchmark Result:`);
|
|
351
|
-
console.log(`- Success: ${baselineReport.success ? '\x1b[32mPASS\x1b[0m' : '\x1b[31mFAIL\x1b[0m'}`);
|
|
352
|
-
console.log(`- Time: ${baselineReport.time.toFixed(1)}s`);
|
|
353
|
-
console.log(`- Tokens: ${baselineReport.tokens}`);
|
|
354
|
-
console.log(`- Score: ${baselineReport.score.toFixed(2)}`);
|
|
355
|
-
|
|
356
|
-
console.log(`\n\x1b[35m=== STEP 2: GENERATING OPTIMIZED CANDIDATE SYSTEM PROMPT ===\x1b[0m`);
|
|
357
|
-
const optimizerSpinner = new Spinner('AGP - Optimizer: Refinement model generating optimized system prompt...');
|
|
358
|
-
optimizerSpinner.start();
|
|
359
|
-
|
|
360
|
-
const anthropic = new Anthropic({
|
|
361
|
-
apiKey: apiKey,
|
|
362
|
-
baseURL: baseURL,
|
|
363
|
-
});
|
|
364
|
-
|
|
365
|
-
const optimizerPrompt = `You are an expert system prompt engineer. Your goal is to optimize the following system prompt for an autonomous coding agent.
|
|
366
|
-
You need to make the prompt more concise, remove redundancy, and explicitly instruct the agent to reduce token waste and thinking overhead, while ensuring it retains all functionality (using file-tools, rulez, self-healing, compiling, running tests).
|
|
367
|
-
|
|
368
|
-
Here is the Current System Prompt:
|
|
369
|
-
"""
|
|
370
|
-
${baselinePrompt}
|
|
371
|
-
"""
|
|
372
|
-
|
|
373
|
-
Provide your output strictly as the new optimized system prompt. Do not write any markdown wrappers (like \`\`\` or \`\`\`txt), explanations, greetings, or conversational headers/footers. Output ONLY the raw optimized system prompt text.`;
|
|
374
|
-
|
|
375
|
-
let candidatePrompt = '';
|
|
376
|
-
try {
|
|
377
|
-
const response = await anthropic.messages.create({
|
|
378
|
-
model: defaultModel,
|
|
379
|
-
max_tokens: 1500,
|
|
380
|
-
messages: [{ role: 'user', content: optimizerPrompt }]
|
|
381
|
-
});
|
|
382
|
-
|
|
383
|
-
const block = response.content.find(b => b.type === 'text') as Anthropic.TextBlock | undefined;
|
|
384
|
-
if (!block || !block.text) {
|
|
385
|
-
throw new Error('Failed to retrieve system prompt candidate from LLM.');
|
|
386
|
-
}
|
|
387
|
-
candidatePrompt = block.text.trim();
|
|
388
|
-
optimizerSpinner.stop(true, 'AGP - Optimizer: Prompt candidate generated.');
|
|
389
|
-
} catch (err: any) {
|
|
390
|
-
optimizerSpinner.stop(false, 'AGP - Optimizer: Prompt candidate generation failed.');
|
|
391
|
-
throw new Error(`Prompt Optimization Failed: ${err.message}`);
|
|
392
|
-
}
|
|
393
|
-
|
|
394
|
-
console.log('\n\x1b[33m┌── Candidate System Prompt ──────────────────────────────────────────────────────┐\x1b[0m');
|
|
395
|
-
candidatePrompt.split('\n').forEach(line => console.log(`\x1b[33m│\x1b[0m ${line}`));
|
|
396
|
-
console.log('\x1b[33m└─────────────────────────────────────────────────────────────────────────────────┘\x1b[0m\n');
|
|
397
|
-
|
|
398
|
-
console.log(`\x1b[35m=== STEP 3: BENCHMARKING CANDIDATE SYSTEM PROMPT ===\x1b[0m`);
|
|
399
|
-
const candidateReport = await this.benchmarkPrompt(candidatePrompt);
|
|
400
|
-
console.log(`Candidate Prompt Benchmark Result:`);
|
|
401
|
-
console.log(`- Success: ${candidateReport.success ? '\x1b[32mPASS\x1b[0m' : '\x1b[31mFAIL\x1b[0m'}`);
|
|
402
|
-
console.log(`- Time: ${candidateReport.time.toFixed(1)}s`);
|
|
403
|
-
console.log(`- Tokens: ${candidateReport.tokens}`);
|
|
404
|
-
console.log(`- Score: ${candidateReport.score.toFixed(2)}`);
|
|
405
|
-
|
|
406
|
-
console.log(`\n\x1b[35m=== STEP 4: COMPARING PERFORMANCE METRICS ===\x1b[0m`);
|
|
407
|
-
console.log(`Baseline Score: \x1b[1m${baselineReport.score.toFixed(2)}\x1b[0m`);
|
|
408
|
-
console.log(`Candidate Score: \x1b[1m${candidateReport.score.toFixed(2)}\x1b[0m`);
|
|
409
|
-
|
|
410
|
-
if (candidateReport.success && candidateReport.score > baselineReport.score) {
|
|
411
|
-
console.log(`\n\x1b[32;1m[Optimization Approved] Candidate prompt is more efficient. Overwriting system-prompt.txt...\x1b[0m`);
|
|
412
|
-
fs.writeFileSync(promptPath, candidatePrompt, 'utf-8');
|
|
413
|
-
|
|
414
|
-
// Commit the changes to Git
|
|
415
|
-
await this.gitAdd('src/orchestrator/system-prompt.txt');
|
|
416
|
-
await this.gitCommit('chore: optimize Task Agent system prompt via automated benchmarking loop');
|
|
417
|
-
console.log(`\x1b[32m[Git Status] System prompt snapshot committed to repository HEAD.\x1b[0m`);
|
|
418
|
-
} else {
|
|
419
|
-
console.log(`\n\x1b[31;1m[Optimization Rejected] Candidate prompt did not improve performance. Retaining baseline.\x1b[0m`);
|
|
420
|
-
}
|
|
421
|
-
|
|
422
|
-
updateEvolutionState({
|
|
423
|
-
lastPromptOptimization: {
|
|
424
|
-
success: candidateReport.success,
|
|
425
|
-
baselineScore: baselineReport.score,
|
|
426
|
-
candidateScore: candidateReport.score,
|
|
427
|
-
approved: candidateReport.success && candidateReport.score > baselineReport.score,
|
|
428
|
-
time: new Date().toISOString()
|
|
429
|
-
}
|
|
430
|
-
});
|
|
431
|
-
|
|
432
|
-
appendHistory({
|
|
433
|
-
cycle: getEvolutionState().currentCycle,
|
|
434
|
-
type: 'prompt_opt',
|
|
435
|
-
description: `Optimized system prompt. Baseline: ${baselineReport.score.toFixed(2)}, Candidate: ${candidateReport.score.toFixed(2)}`,
|
|
436
|
-
success: candidateReport.success && candidateReport.score > baselineReport.score
|
|
437
|
-
});
|
|
438
|
-
}
|
|
439
|
-
|
|
440
|
-
/**
|
|
441
|
-
* Generates a refactoring proposal by scanning the codebase repository map.
|
|
442
|
-
*/
|
|
443
|
-
async getAutonomousRefactorProposal(): Promise<{ targetFile: string; refactorGoal: string }> {
|
|
444
|
-
const workspaceRoot = process.cwd();
|
|
445
|
-
const repoMap = generateRepoMap(workspaceRoot);
|
|
446
|
-
|
|
447
|
-
const scanSpinner = new Spinner('AGP - Scan: Auditing codebase structure for optimizations...');
|
|
448
|
-
scanSpinner.start();
|
|
449
|
-
|
|
450
|
-
// 1. Context Gathering: logs & learnings
|
|
451
|
-
let evolutionLog = '';
|
|
452
|
-
const logPath = path.join(workspaceRoot, 'evolution.log');
|
|
453
|
-
if (fs.existsSync(logPath)) {
|
|
454
|
-
try {
|
|
455
|
-
const fullLog = fs.readFileSync(logPath, 'utf-8');
|
|
456
|
-
const lines = fullLog.split('\n');
|
|
457
|
-
evolutionLog = lines.slice(-40).join('\n'); // Ingest last 40 lines of log
|
|
458
|
-
} catch (e) {}
|
|
459
|
-
}
|
|
460
|
-
|
|
461
|
-
let learningsMd = '';
|
|
462
|
-
const learningsPath = path.join(workspaceRoot, 'Learnings.md');
|
|
463
|
-
if (fs.existsSync(learningsPath)) {
|
|
464
|
-
try {
|
|
465
|
-
learningsMd = fs.readFileSync(learningsPath, 'utf-8');
|
|
466
|
-
} catch (e) {}
|
|
467
|
-
}
|
|
468
|
-
|
|
469
|
-
const anthropic = new Anthropic({
|
|
470
|
-
apiKey: apiKey,
|
|
471
|
-
baseURL: baseURL,
|
|
472
|
-
});
|
|
473
|
-
|
|
474
|
-
try {
|
|
475
|
-
// Step 1: Select 2 target candidate files based on Repo Map, Logs, and Learnings
|
|
476
|
-
const candidatePrompt = `You are an expert software architect. Your goal is to select the top 2 files in this codebase that would benefit most from refactoring (improving type-safety, code duplication, helper functions cleanup, or performance).
|
|
477
|
-
Review the codebase map, past evolution logs, and lessons learned.
|
|
478
|
-
|
|
479
|
-
Codebase Repository Map:
|
|
480
|
-
${repoMap}
|
|
481
|
-
|
|
482
|
-
Last 40 lines of evolution logs (shows recent runs/warnings/failures):
|
|
483
|
-
${evolutionLog || 'No log history available.'}
|
|
484
|
-
|
|
485
|
-
Learnings Registry (shows past insights):
|
|
486
|
-
${learningsMd || 'No past learnings recorded.'}
|
|
487
|
-
|
|
488
|
-
Output your response strictly as a JSON object containing a list of 2 candidate target files (relative paths). Do not include any other text, markdown blocks, or formatting, just the raw JSON.
|
|
489
|
-
|
|
490
|
-
Format:
|
|
491
|
-
{
|
|
492
|
-
"candidates": [
|
|
493
|
-
{ "targetFile": "relative/path/to/file1.ts" },
|
|
494
|
-
{ "targetFile": "relative/path/to/file2.ts" }
|
|
495
|
-
]
|
|
496
|
-
}`;
|
|
497
|
-
|
|
498
|
-
const candidateResponse = await anthropic.messages.create({
|
|
499
|
-
model: defaultModel,
|
|
500
|
-
max_tokens: 4000,
|
|
501
|
-
messages: [{ role: 'user', content: candidatePrompt }]
|
|
502
|
-
});
|
|
503
|
-
|
|
504
|
-
const candBlock = candidateResponse.content.find(b => b.type === 'text') as Anthropic.TextBlock | undefined;
|
|
505
|
-
if (!candBlock || !candBlock.text) {
|
|
506
|
-
console.error('DEBUG - candidateResponse content:', JSON.stringify(candidateResponse.content));
|
|
507
|
-
throw new Error('Failed to retrieve candidates from LLM.');
|
|
508
|
-
}
|
|
509
|
-
|
|
510
|
-
const candText = candBlock.text.trim().replace(/^```json/, '').replace(/```$/, '').trim();
|
|
511
|
-
const candJson = JSON.parse(candText);
|
|
512
|
-
const candidates: Array<{ targetFile: string }> = candJson.candidates || [];
|
|
513
|
-
|
|
514
|
-
if (candidates.length === 0) {
|
|
515
|
-
throw new Error('No candidates identified by the LLM.');
|
|
516
|
-
}
|
|
517
|
-
|
|
518
|
-
// Step 2: Read candidate file contents
|
|
519
|
-
let fileContext = '';
|
|
520
|
-
for (const cand of candidates) {
|
|
521
|
-
const fullPath = path.join(workspaceRoot, cand.targetFile);
|
|
522
|
-
if (fs.existsSync(fullPath)) {
|
|
523
|
-
try {
|
|
524
|
-
const fileContent = fs.readFileSync(fullPath, 'utf-8');
|
|
525
|
-
fileContext += `\n--- FILE: ${cand.targetFile} ---\n${fileContent}\n`;
|
|
526
|
-
} catch (e) {
|
|
527
|
-
fileContext += `\n--- FILE: ${cand.targetFile} (Could not read file contents) ---\n`;
|
|
528
|
-
}
|
|
529
|
-
} else {
|
|
530
|
-
fileContext += `\n--- FILE: ${cand.targetFile} (File not found on disk) ---\n`;
|
|
531
|
-
}
|
|
532
|
-
}
|
|
533
|
-
|
|
534
|
-
// Step 3: Evaluate and Select Winner based on actual code inspection
|
|
535
|
-
const evaluationPrompt = `You are an expert software architect. Analyze the actual code content of the candidate files below and select the single best refactoring proposal.
|
|
536
|
-
Evaluate each file and design a specific refactor goal.
|
|
537
|
-
Score each proposal out of 10 based on:
|
|
538
|
-
1. Impact (readability, duplication reduction, safety, performance).
|
|
539
|
-
2. Feasibility (compilation safety, low risk of breaking tests).
|
|
540
|
-
|
|
541
|
-
Candidate File Contents:
|
|
542
|
-
${fileContext}
|
|
543
|
-
|
|
544
|
-
Output your response strictly as a JSON object containing the winning proposal's target file and the specific refactoring goal. Do not include any other text, markdown blocks, or formatting, just the raw JSON.
|
|
545
|
-
|
|
546
|
-
Format:
|
|
547
|
-
{
|
|
548
|
-
"winningProposal": {
|
|
549
|
-
"targetFile": "relative/path/to/file.ts",
|
|
550
|
-
"refactorGoal": "A concise, specific description of the refactoring goal (e.g., 'Refactor search-tools to use strong interfaces instead of any[]')",
|
|
551
|
-
"impactScore": 8.5,
|
|
552
|
-
"feasibilityScore": 9.0
|
|
553
|
-
}
|
|
554
|
-
}`;
|
|
555
|
-
|
|
556
|
-
const evaluationResponse = await anthropic.messages.create({
|
|
557
|
-
model: defaultModel,
|
|
558
|
-
max_tokens: 4000,
|
|
559
|
-
messages: [{ role: 'user', content: evaluationPrompt }]
|
|
560
|
-
});
|
|
561
|
-
|
|
562
|
-
const evalBlock = evaluationResponse.content.find(b => b.type === 'text') as Anthropic.TextBlock | undefined;
|
|
563
|
-
if (!evalBlock || !evalBlock.text) {
|
|
564
|
-
throw new Error('Failed to retrieve evaluation from LLM.');
|
|
565
|
-
}
|
|
566
|
-
|
|
567
|
-
const evalText = evalBlock.text.trim().replace(/^```json/, '').replace(/```$/, '').trim();
|
|
568
|
-
const evalJson = JSON.parse(evalText);
|
|
569
|
-
const winningProposal = evalJson.winningProposal;
|
|
570
|
-
|
|
571
|
-
if (!winningProposal || !winningProposal.targetFile || !winningProposal.refactorGoal) {
|
|
572
|
-
throw new Error('Invalid JSON structure returned for winning proposal.');
|
|
573
|
-
}
|
|
574
|
-
|
|
575
|
-
console.log(`\n\x1b[32m✔\x1b[0m AGP - Scan: Selected best proposal: ${winningProposal.targetFile} (Impact: ${winningProposal.impactScore || 'N/A'}, Feasibility: ${winningProposal.feasibilityScore || 'N/A'})`);
|
|
576
|
-
|
|
577
|
-
scanSpinner.stop(true, 'AGP - Scan: Codebase audit complete.');
|
|
578
|
-
return {
|
|
579
|
-
targetFile: winningProposal.targetFile,
|
|
580
|
-
refactorGoal: winningProposal.refactorGoal
|
|
581
|
-
};
|
|
582
|
-
} catch (err: any) {
|
|
583
|
-
scanSpinner.stop(false, 'AGP - Scan: Codebase audit failed.');
|
|
584
|
-
throw new Error(`Refactoring Analysis Failed: ${err.message}`);
|
|
585
|
-
}
|
|
586
|
-
}
|
|
587
|
-
|
|
588
|
-
/**
|
|
589
|
-
* Scans codebase for refactoring opportunities and triggers an autonomous evolution run.
|
|
590
|
-
*/
|
|
591
|
-
async runAutonomousRefactor(): Promise<void> {
|
|
592
|
-
const proposal = await this.getAutonomousRefactorProposal();
|
|
593
|
-
|
|
594
|
-
console.log('\n\x1b[35m┌── Autonomous Refactoring Proposal ──────────────────────────────────────────────┐\x1b[0m');
|
|
595
|
-
console.log(`\x1b[35m│\x1b[0m Target File: \x1b[1m\x1b[32m${proposal.targetFile}\x1b[0m`);
|
|
596
|
-
console.log(`\x1b[35m│\x1b[0m Refactor Goal: \x1b[33m${proposal.refactorGoal}\x1b[0m`);
|
|
597
|
-
console.log('\x1b[35m└─────────────────────────────────────────────────────────────────────────────────┘\x1b[0m\n');
|
|
598
|
-
|
|
599
|
-
updateEvolutionState({
|
|
600
|
-
lastRefactorProposal: {
|
|
601
|
-
targetFile: proposal.targetFile,
|
|
602
|
-
refactorGoal: proposal.refactorGoal,
|
|
603
|
-
time: new Date().toISOString()
|
|
604
|
-
}
|
|
605
|
-
});
|
|
606
|
-
|
|
607
|
-
const taskText = `Refactor the file "${proposal.targetFile}" to achieve the following goal: ${proposal.refactorGoal}. Do not modify any other file unless required for compilation. Compile and verify tests pass.`;
|
|
608
|
-
|
|
609
|
-
// Delegate to evolution loop
|
|
610
|
-
await this.runEvolutionStep(taskText);
|
|
611
|
-
}
|
|
612
|
-
|
|
613
|
-
/**
|
|
614
|
-
* High-Level Autogenesis Evolution Run
|
|
615
|
-
*/
|
|
616
|
-
async runEvolutionStep(targetTask: string): Promise<void> {
|
|
617
|
-
console.log('\x1b[35m=== STARTING AUTOGENESIS PROTOCOL EVOLUTION STEP ===\x1b[0m');
|
|
618
|
-
|
|
619
|
-
// Start active research session
|
|
620
|
-
ResearchManager.startSession(targetTask, this.effort);
|
|
621
|
-
|
|
622
|
-
updateEvolutionState({
|
|
623
|
-
status: 'reflecting',
|
|
624
|
-
currentTask: `Executing task: "${targetTask}"`
|
|
625
|
-
});
|
|
626
|
-
|
|
627
|
-
// 1. Reflect
|
|
628
|
-
const analysis = await this.reflect();
|
|
629
|
-
|
|
630
|
-
updateEvolutionState({
|
|
631
|
-
status: 'scanning',
|
|
632
|
-
currentTask: 'Selecting mutation plan'
|
|
633
|
-
});
|
|
634
|
-
|
|
635
|
-
// 2. Select
|
|
636
|
-
const proposal = this.select(analysis, targetTask);
|
|
637
|
-
|
|
638
|
-
updateEvolutionState({
|
|
639
|
-
status: 'refactoring',
|
|
640
|
-
currentTask: `Mutating codebase for task: "${targetTask}"`
|
|
641
|
-
});
|
|
642
|
-
|
|
643
|
-
// 3. Improve & 4. Evaluate (The Meta-Agent execution is self-safeguarded by SAHOO)
|
|
644
|
-
let executionSuccess = false;
|
|
645
|
-
let failReason = '';
|
|
646
|
-
|
|
647
|
-
try {
|
|
648
|
-
await this.improve(proposal, targetTask);
|
|
649
|
-
|
|
650
|
-
updateEvolutionState({
|
|
651
|
-
status: 'evaluating',
|
|
652
|
-
currentTask: 'Running verification test suite'
|
|
653
|
-
});
|
|
654
|
-
|
|
655
|
-
// Double check overall workspace via AGP evaluate
|
|
656
|
-
const evalResult = await this.evaluate();
|
|
657
|
-
if (evalResult.success) {
|
|
658
|
-
executionSuccess = true;
|
|
659
|
-
} else {
|
|
660
|
-
failReason = evalResult.message;
|
|
661
|
-
}
|
|
662
|
-
} catch (err: any) {
|
|
663
|
-
failReason = err.message;
|
|
664
|
-
}
|
|
665
|
-
|
|
666
|
-
// 5. Commit or Rollback
|
|
667
|
-
updateEvolutionState({
|
|
668
|
-
status: executionSuccess ? 'committing' : 'rolling_back',
|
|
669
|
-
currentTask: executionSuccess ? 'Committing changes to Git' : 'Rolling back workspace modifications'
|
|
670
|
-
});
|
|
671
|
-
|
|
672
|
-
await this.commit(executionSuccess, failReason, targetTask);
|
|
673
|
-
|
|
674
|
-
// End active research session
|
|
675
|
-
ResearchManager.endSession(executionSuccess ? 'success' : 'failed');
|
|
676
|
-
|
|
677
|
-
const targetFile = targetTask.includes('Refactor the file "')
|
|
678
|
-
? targetTask.split('"')[1]
|
|
679
|
-
: (targetTask.includes('file "') ? targetTask.split('"')[1] : 'Workspace');
|
|
680
|
-
|
|
681
|
-
const currentRefactorResult = getEvolutionState().lastRefactorResult;
|
|
682
|
-
|
|
683
|
-
updateEvolutionState({
|
|
684
|
-
status: 'idle',
|
|
685
|
-
currentTask: null,
|
|
686
|
-
lastRefactorResult: {
|
|
687
|
-
success: executionSuccess,
|
|
688
|
-
targetFile: targetFile,
|
|
689
|
-
message: executionSuccess ? 'Evolution succeeded.' : `Evolution failed: ${failReason}`,
|
|
690
|
-
time: new Date().toISOString(),
|
|
691
|
-
reason: executionSuccess ? undefined : failReason,
|
|
692
|
-
metrics: currentRefactorResult?.metrics
|
|
693
|
-
}
|
|
694
|
-
});
|
|
695
|
-
|
|
696
|
-
appendHistory({
|
|
697
|
-
cycle: getEvolutionState().currentCycle,
|
|
698
|
-
type: 'refactor',
|
|
699
|
-
description: targetTask.length > 80 ? targetTask.substring(0, 80) + '...' : targetTask,
|
|
700
|
-
success: executionSuccess,
|
|
701
|
-
details: executionSuccess ? undefined : failReason
|
|
702
|
-
});
|
|
703
|
-
|
|
704
|
-
console.log('\x1b[35m=== AUTOGENESIS STEP COMPLETED ===\x1b[0m\n');
|
|
705
|
-
}
|
|
706
|
-
|
|
707
|
-
private runChildProcess(args: string[]): Promise<number> {
|
|
708
|
-
return new Promise((resolve) => {
|
|
709
|
-
const child = spawn('node', args, {
|
|
710
|
-
stdio: 'inherit',
|
|
711
|
-
env: { ...process.env, CLAUDE_CODE_EFFORT_LEVEL: this.effort }
|
|
712
|
-
});
|
|
713
|
-
child.on('close', (code) => {
|
|
714
|
-
resolve(code ?? 1);
|
|
715
|
-
});
|
|
716
|
-
child.on('error', () => {
|
|
717
|
-
resolve(1);
|
|
718
|
-
});
|
|
719
|
-
});
|
|
720
|
-
}
|
|
721
|
-
|
|
722
|
-
/**
|
|
723
|
-
* Continuous Self-Evolution Daemon Loop
|
|
724
|
-
*/
|
|
725
|
-
async runContinuousEvolution(maxCycles: number, tokenBudget: number): Promise<void> {
|
|
726
|
-
const historyPath = path.join(process.cwd(), 'evolution-history.json');
|
|
727
|
-
const tokenPath = path.join(process.cwd(), 'session-tokens.json');
|
|
728
|
-
|
|
729
|
-
// Clear previous token file on start
|
|
730
|
-
if (fs.existsSync(tokenPath)) {
|
|
731
|
-
try {
|
|
732
|
-
fs.unlinkSync(tokenPath);
|
|
733
|
-
} catch (e) {}
|
|
734
|
-
}
|
|
735
|
-
|
|
736
|
-
updateEvolutionState({
|
|
737
|
-
status: 'idle',
|
|
738
|
-
currentCycle: 0,
|
|
739
|
-
maxCycles,
|
|
740
|
-
tokenBudget,
|
|
741
|
-
tokensUsed: { input: 0, output: 0, total: 0 }
|
|
742
|
-
});
|
|
743
|
-
|
|
744
|
-
let cycle = 0;
|
|
745
|
-
let consecutiveFailures = 0;
|
|
746
|
-
while (true) {
|
|
747
|
-
cycle++;
|
|
748
|
-
if (maxCycles > 0 && cycle > maxCycles) {
|
|
749
|
-
console.log(`\n\x1b[32m[Continuous Evolution] Completed maximum cycles (${maxCycles}). Stopping.\x1b[0m`);
|
|
750
|
-
updateEvolutionState({ status: 'idle', currentTask: null });
|
|
751
|
-
break;
|
|
752
|
-
}
|
|
753
|
-
|
|
754
|
-
console.log(`\n\x1b[35m==================================================\x1b[0m`);
|
|
755
|
-
console.log(`\x1b[35m STARTING CONTINUOUS EVOLUTION CYCLE ${cycle} \x1b[0m`);
|
|
756
|
-
console.log(`\x1b[35m==================================================\x1b[0m\n`);
|
|
757
|
-
|
|
758
|
-
updateEvolutionState({
|
|
759
|
-
status: 'reflecting',
|
|
760
|
-
currentCycle: cycle,
|
|
761
|
-
currentTask: 'Checking workspace baseline status'
|
|
762
|
-
});
|
|
763
|
-
|
|
764
|
-
// 1. Check token budget
|
|
765
|
-
let totalInput = 0;
|
|
766
|
-
let totalOutput = 0;
|
|
767
|
-
if (fs.existsSync(tokenPath)) {
|
|
768
|
-
try {
|
|
769
|
-
const data = JSON.parse(fs.readFileSync(tokenPath, 'utf-8'));
|
|
770
|
-
totalInput = data.inputTokens || 0;
|
|
771
|
-
totalOutput = data.outputTokens || 0;
|
|
772
|
-
} catch (e) {}
|
|
773
|
-
}
|
|
774
|
-
|
|
775
|
-
const totalTokens = totalInput + totalOutput;
|
|
776
|
-
console.log(`Current Session Cumulative Token Usage: ${totalTokens} / ${tokenBudget}`);
|
|
777
|
-
|
|
778
|
-
updateEvolutionState({
|
|
779
|
-
tokensUsed: {
|
|
780
|
-
input: totalInput,
|
|
781
|
-
output: totalOutput,
|
|
782
|
-
total: totalTokens
|
|
783
|
-
}
|
|
784
|
-
});
|
|
785
|
-
|
|
786
|
-
if (totalTokens > tokenBudget) {
|
|
787
|
-
console.log(`\n\x1b[31;1m[Budget Exceeded] Token usage (${totalTokens}) has exceeded the budget cap (${tokenBudget}). Halting daemon.\x1b[0m`);
|
|
788
|
-
updateEvolutionState({ status: 'idle', currentTask: 'Token budget exceeded' });
|
|
789
|
-
break;
|
|
790
|
-
}
|
|
791
|
-
|
|
792
|
-
updateEvolutionState({
|
|
793
|
-
status: 'scanning',
|
|
794
|
-
currentTask: 'Generating next mutation proposal'
|
|
795
|
-
});
|
|
796
|
-
|
|
797
|
-
// 2. Fetch Refactor Proposal and check history to avoid ping-pong loops
|
|
798
|
-
let proposal;
|
|
799
|
-
try {
|
|
800
|
-
proposal = await this.getAutonomousRefactorProposal();
|
|
801
|
-
|
|
802
|
-
let historyList: string[] = [];
|
|
803
|
-
if (fs.existsSync(historyPath)) {
|
|
804
|
-
try {
|
|
805
|
-
historyList = JSON.parse(fs.readFileSync(historyPath, 'utf-8')) || [];
|
|
806
|
-
} catch (e) {}
|
|
807
|
-
}
|
|
808
|
-
|
|
809
|
-
const proposalKey = `${proposal.targetFile}::${proposal.refactorGoal}`;
|
|
810
|
-
if (historyList.includes(proposalKey)) {
|
|
811
|
-
console.log(`\n\x1b[33m[Divergence Prevention] Proposal "${proposalKey}" has been successfully executed recently. Skipping to prevent cycles.\x1b[0m`);
|
|
812
|
-
// Try a second time to get a different proposal
|
|
813
|
-
proposal = await this.getAutonomousRefactorProposal();
|
|
814
|
-
}
|
|
815
|
-
} catch (err: any) {
|
|
816
|
-
console.error(`\n\x1b[31m[Proposal Error] Failed to scan proposal: ${err.message}\x1b[0m`);
|
|
817
|
-
}
|
|
818
|
-
|
|
819
|
-
if (proposal) {
|
|
820
|
-
const proposalKey = `${proposal.targetFile}::${proposal.refactorGoal}`;
|
|
821
|
-
console.log(`\n\x1b[36m[Daemon Worker] Spawning refactor child worker for target: ${proposal.targetFile}...\x1b[0m`);
|
|
822
|
-
|
|
823
|
-
updateEvolutionState({
|
|
824
|
-
status: 'refactoring',
|
|
825
|
-
currentTask: `Refactoring: ${proposal.refactorGoal}`,
|
|
826
|
-
lastRefactorProposal: {
|
|
827
|
-
targetFile: proposal.targetFile,
|
|
828
|
-
refactorGoal: proposal.refactorGoal,
|
|
829
|
-
time: new Date().toISOString()
|
|
830
|
-
}
|
|
831
|
-
});
|
|
832
|
-
|
|
833
|
-
// Spawn --refactor worker
|
|
834
|
-
const refactorExit = await this.runChildProcess(['dist/index.js', '--refactor']);
|
|
835
|
-
|
|
836
|
-
if (refactorExit === 0) {
|
|
837
|
-
console.log(`\n\x1b[32m✔ [Daemon Worker] Refactor cycle completed successfully.\x1b[0m`);
|
|
838
|
-
consecutiveFailures = 0; // Reset failures on success
|
|
839
|
-
|
|
840
|
-
// Append to history cache
|
|
841
|
-
let historyList: string[] = [];
|
|
842
|
-
if (fs.existsSync(historyPath)) {
|
|
843
|
-
try {
|
|
844
|
-
historyList = JSON.parse(fs.readFileSync(historyPath, 'utf-8')) || [];
|
|
845
|
-
} catch (e) {}
|
|
846
|
-
}
|
|
847
|
-
historyList.push(proposalKey);
|
|
848
|
-
if (historyList.length > 20) {
|
|
849
|
-
historyList.shift();
|
|
850
|
-
}
|
|
851
|
-
fs.writeFileSync(historyPath, JSON.stringify(historyList, null, 2), 'utf-8');
|
|
852
|
-
} else {
|
|
853
|
-
console.log(`\n\x1b[31m❌ [Daemon Worker] Refactor cycle failed or was rolled back. (Exit Code: ${refactorExit})\x1b[0m`);
|
|
854
|
-
consecutiveFailures++;
|
|
855
|
-
// Ensure git rollback just in case the child process crashed mid-way or didn't roll back
|
|
856
|
-
await this.gitResetClean();
|
|
857
|
-
|
|
858
|
-
// DGM-inspired Evolutionary Backtracking
|
|
859
|
-
if (consecutiveFailures >= 2) {
|
|
860
|
-
const state = getEvolutionState();
|
|
861
|
-
const archive = state.archive || [];
|
|
862
|
-
if (archive.length > 0) {
|
|
863
|
-
const parentNode = archive[archive.length - 1];
|
|
864
|
-
console.log(`\n\x1b[31m[DGM Backtracking]\x1b[0m Consecutive failures (${consecutiveFailures}) exceeded limit. Backtracking to parent node cycle ${parentNode.cycle} (commit: ${parentNode.commitHash.substring(0, 7)})...`);
|
|
865
|
-
|
|
866
|
-
const spinner = new Spinner('DGM - Backtracking: Resetting workspace to stable parent node...');
|
|
867
|
-
spinner.start();
|
|
868
|
-
await runCommand(`git reset --hard ${parentNode.commitHash}`);
|
|
869
|
-
await runCommand('git clean -fd');
|
|
870
|
-
spinner.stop(true, 'DGM - Backtracking: Workspace successfully restored to stable parent node.');
|
|
871
|
-
|
|
872
|
-
consecutiveFailures = 0; // Reset consecutive failures after backtracking
|
|
873
|
-
}
|
|
874
|
-
}
|
|
875
|
-
}
|
|
876
|
-
}
|
|
877
|
-
|
|
878
|
-
// 3. Spawns Prompt Optimizer
|
|
879
|
-
console.log(`\n\x1b[36m[Daemon Worker] Spawning prompt optimization child worker...\x1b[0m`);
|
|
880
|
-
|
|
881
|
-
updateEvolutionState({
|
|
882
|
-
status: 'optimizing_prompt',
|
|
883
|
-
currentTask: 'Optimizing System Prompt Benchmarks'
|
|
884
|
-
});
|
|
885
|
-
|
|
886
|
-
const optimizeExit = await this.runChildProcess(['dist/index.js', '--optimize-prompt']);
|
|
887
|
-
if (optimizeExit === 0) {
|
|
888
|
-
console.log(`\n\x1b[32m✔ [Daemon Worker] Prompt optimization benchmark completed.\x1b[0m`);
|
|
889
|
-
} else {
|
|
890
|
-
console.log(`\n\x1b[31m❌ [Daemon Worker] Prompt optimization benchmark failed. (Exit Code: ${optimizeExit})\x1b[0m`);
|
|
891
|
-
await this.gitResetClean();
|
|
892
|
-
}
|
|
893
|
-
|
|
894
|
-
console.log(`\n\x1b[33m[Daemon Cooldown] Sleeping for 60 seconds before next cycle...\x1b[0m`);
|
|
895
|
-
|
|
896
|
-
updateEvolutionState({
|
|
897
|
-
status: 'cooldown',
|
|
898
|
-
currentTask: 'Cooldown period (60s)'
|
|
899
|
-
});
|
|
900
|
-
|
|
901
|
-
await new Promise(resolve => setTimeout(resolve, 60000));
|
|
902
|
-
}
|
|
903
|
-
}
|
|
904
|
-
|
|
905
|
-
/**
|
|
906
|
-
* DGM Evolution Loop
|
|
907
|
-
* Implements Darwin Gödel Machine open-ended evolution:
|
|
908
|
-
* 1. Measure baseline fitness
|
|
909
|
-
* 2. Select parent from archive (fitness-weighted)
|
|
910
|
-
* 3. Select mutation strategy (adaptive)
|
|
911
|
-
* 4. Apply mutation via Meta-Agent
|
|
912
|
-
* 5. Evaluate candidate fitness empirically
|
|
913
|
-
* 6. Add to archive if fitness improved or maintained
|
|
914
|
-
* 7. Commit if better; rollback if worse
|
|
915
|
-
*/
|
|
916
|
-
async runDGMEvolution(task: string, maxGenerations: number = 5): Promise<void> {
|
|
917
|
-
console.log('\x1b[35m=== STARTING DGM (Darwin Gödel Machine) EVOLUTION ===\x1b[0m');
|
|
918
|
-
console.log(`Task: "${task}"`);
|
|
919
|
-
console.log(`Max Generations: ${maxGenerations}\n`);
|
|
920
|
-
|
|
921
|
-
const archive = new DGMArchive(process.cwd());
|
|
922
|
-
ResearchManager.startSession(`DGM Evolution: ${task}`, this.effort);
|
|
923
|
-
|
|
924
|
-
// Step 1: Measure baseline fitness
|
|
925
|
-
updateEvolutionState({ status: 'reflecting', currentTask: 'DGM: Measuring baseline fitness' });
|
|
926
|
-
console.log('\x1b[34m[DGM]\x1b[0m Measuring baseline fitness...');
|
|
927
|
-
const baselineFitness = await evaluateFitness();
|
|
928
|
-
console.log(`\x1b[32m[DGM]\x1b[0m Baseline: ${baselineFitness.summary}`);
|
|
929
|
-
|
|
930
|
-
// Register initial baseline into archive if empty
|
|
931
|
-
const archiveStats = archive.getStats();
|
|
932
|
-
let baselineEntry = archiveStats.bestEntryId ? archive.findById(archiveStats.bestEntryId) : null;
|
|
933
|
-
if (!baselineEntry) {
|
|
934
|
-
const gitHeadRes = await runCommand('git rev-parse HEAD');
|
|
935
|
-
const commitHash = gitHeadRes.exitCode === 0 ? gitHeadRes.stdout.trim() : 'unknown';
|
|
936
|
-
baselineEntry = archive.addEntry({
|
|
937
|
-
commitHash,
|
|
938
|
-
parentId: null,
|
|
939
|
-
fitness: baselineFitness.score,
|
|
940
|
-
mutationStrategy: 'baseline',
|
|
941
|
-
task: 'Initial baseline snapshot',
|
|
942
|
-
metadata: {
|
|
943
|
-
passCount: baselineFitness.passCount,
|
|
944
|
-
failCount: baselineFitness.failCount,
|
|
945
|
-
totalTests: baselineFitness.totalTests,
|
|
946
|
-
passRate: baselineFitness.passRate
|
|
947
|
-
}
|
|
948
|
-
});
|
|
949
|
-
console.log(`\x1b[34m[DGM]\x1b[0m Registered baseline snapshot: ${baselineEntry.id} (fitness: ${(baselineFitness.score * 100).toFixed(1)}%)`);
|
|
950
|
-
}
|
|
951
|
-
|
|
952
|
-
updateEvolutionState({
|
|
953
|
-
currentFitness: baselineFitness.score,
|
|
954
|
-
bestFitness: Math.max(getEvolutionState().bestFitness, baselineFitness.score),
|
|
955
|
-
dgmPopulationSize: archive.getStats().totalEntries,
|
|
956
|
-
dgmBestEntryId: archive.getStats().bestEntryId
|
|
957
|
-
});
|
|
958
|
-
|
|
959
|
-
let currentBestFitness = baselineFitness.score;
|
|
960
|
-
|
|
961
|
-
// Step 2-7: Evolution loop
|
|
962
|
-
for (let gen = 1; gen <= maxGenerations; gen++) {
|
|
963
|
-
console.log(`\n\x1b[35m─── DGM Generation ${gen}/${maxGenerations} ───\x1b[0m`);
|
|
964
|
-
|
|
965
|
-
// 2a. Select parent from archive (fitness-weighted)
|
|
966
|
-
const parent = archive.getBestParent('fitness_weighted');
|
|
967
|
-
if (parent) {
|
|
968
|
-
console.log(`\x1b[34m[DGM]\x1b[0m Selected parent: ${parent.id} (fitness: ${(parent.fitness * 100).toFixed(1)}%, strategy: ${parent.mutationStrategy})`);
|
|
969
|
-
}
|
|
970
|
-
|
|
971
|
-
// 2b. Select mutation strategy (adaptive)
|
|
972
|
-
updateEvolutionState({ status: 'scanning', currentTask: `DGM Gen ${gen}: Selecting mutation strategy` });
|
|
973
|
-
const mutationSel = selectMutationStrategy(archive, task, currentBestFitness);
|
|
974
|
-
console.log(`\x1b[34m[DGM]\x1b[0m Mutation: \x1b[33m${mutationSel.strategy}\x1b[0m — ${mutationSel.rationale}`);
|
|
975
|
-
|
|
976
|
-
// 2c. Generate mutation prompt
|
|
977
|
-
const mutationTask = generateMutationPrompt(mutationSel.strategy, task, {
|
|
978
|
-
parentEntry: parent,
|
|
979
|
-
currentFitness: currentBestFitness
|
|
980
|
-
});
|
|
981
|
-
|
|
982
|
-
// 3. Apply mutation via Meta-Agent
|
|
983
|
-
updateEvolutionState({ status: 'refactoring', currentTask: `DGM Gen ${gen}: Applying ${mutationSel.strategy} mutation` });
|
|
984
|
-
ResearchManager.logDecision('DGM Mutation', `Gen ${gen}: Applying strategy ${mutationSel.strategy}`, 'info');
|
|
985
|
-
|
|
986
|
-
let mutationSucceeded = false;
|
|
987
|
-
try {
|
|
988
|
-
await runSelfImprovingTask(mutationTask, this.effort);
|
|
989
|
-
mutationSucceeded = true;
|
|
990
|
-
} catch (err: any) {
|
|
991
|
-
console.error(`\x1b[31m[DGM Gen ${gen}]\x1b[0m Mutation failed: ${err.message}`);
|
|
992
|
-
ResearchManager.logDecision('DGM Mutation', `Gen ${gen}: Mutation error — ${err.message}`, 'failure');
|
|
993
|
-
}
|
|
994
|
-
|
|
995
|
-
if (!mutationSucceeded) {
|
|
996
|
-
console.log(`\x1b[33m[DGM Gen ${gen}]\x1b[0m Skipping fitness evaluation due to mutation failure.`);
|
|
997
|
-
continue;
|
|
998
|
-
}
|
|
999
|
-
|
|
1000
|
-
// 4. Evaluate candidate fitness empirically
|
|
1001
|
-
updateEvolutionState({ status: 'evaluating', currentTask: `DGM Gen ${gen}: Evaluating candidate fitness` });
|
|
1002
|
-
console.log(`\x1b[34m[DGM Gen ${gen}]\x1b[0m Evaluating candidate fitness...`);
|
|
1003
|
-
const candidateFitness = await evaluateFitness();
|
|
1004
|
-
const comparison = compareToBaseline(candidateFitness, { ...baselineFitness });
|
|
1005
|
-
console.log(`\x1b[${comparison.improved ? '32' : '33'}m[DGM Gen ${gen}]\x1b[0m ${comparison.message}`);
|
|
1006
|
-
ResearchManager.logDecision('DGM Fitness', `Gen ${gen}: ${comparison.message}`, comparison.improved ? 'success' : 'info');
|
|
1007
|
-
|
|
1008
|
-
// 5. Add to archive + commit/rollback decision
|
|
1009
|
-
updateEvolutionState({ status: comparison.improved ? 'committing' : 'rolling_back', currentTask: `DGM Gen ${gen}: ${comparison.improved ? 'Committing' : 'Rolling back'}` });
|
|
1010
|
-
|
|
1011
|
-
if (candidateFitness.compileSuccess && candidateFitness.score >= currentBestFitness) {
|
|
1012
|
-
// Commit the mutation
|
|
1013
|
-
const commitRes = await this.commitAll();
|
|
1014
|
-
let newCommitHash = 'unknown';
|
|
1015
|
-
if (commitRes.exitCode === 0) {
|
|
1016
|
-
const revRes = await runCommand('git rev-parse HEAD');
|
|
1017
|
-
newCommitHash = revRes.exitCode === 0 ? revRes.stdout.trim() : 'unknown';
|
|
1018
|
-
}
|
|
1019
|
-
|
|
1020
|
-
// Add to DGM archive
|
|
1021
|
-
const newEntry = archive.addEntry({
|
|
1022
|
-
commitHash: newCommitHash,
|
|
1023
|
-
parentId: parent?.id ?? baselineEntry.id,
|
|
1024
|
-
fitness: candidateFitness.score,
|
|
1025
|
-
mutationStrategy: mutationSel.strategy,
|
|
1026
|
-
task,
|
|
1027
|
-
metadata: {
|
|
1028
|
-
passCount: candidateFitness.passCount,
|
|
1029
|
-
failCount: candidateFitness.failCount,
|
|
1030
|
-
totalTests: candidateFitness.totalTests,
|
|
1031
|
-
passRate: candidateFitness.passRate
|
|
1032
|
-
}
|
|
1033
|
-
});
|
|
1034
|
-
|
|
1035
|
-
archive.pruneOldEntries();
|
|
1036
|
-
currentBestFitness = Math.max(currentBestFitness, candidateFitness.score);
|
|
1037
|
-
|
|
1038
|
-
console.log(`\x1b[32m[DGM Gen ${gen}]\x1b[0m Committed & archived: ${newEntry.id} (fitness: ${(candidateFitness.score * 100).toFixed(1)}%)`);
|
|
1039
|
-
|
|
1040
|
-
appendHistory({
|
|
1041
|
-
cycle: gen,
|
|
1042
|
-
type: 'dgm_evolution',
|
|
1043
|
-
description: `DGM Gen ${gen}: ${mutationSel.strategy} → fitness ${(candidateFitness.score * 100).toFixed(1)}%`,
|
|
1044
|
-
success: true
|
|
1045
|
-
});
|
|
1046
|
-
} else {
|
|
1047
|
-
// Rollback — candidate did not improve fitness
|
|
1048
|
-
console.log(`\x1b[31m[DGM Gen ${gen}]\x1b[0m Rolling back — candidate did not improve fitness.`);
|
|
1049
|
-
await this.gitResetClean();
|
|
1050
|
-
ResearchManager.logDecision('DGM Rollback', `Gen ${gen}: Reverted — fitness did not improve`, 'info');
|
|
1051
|
-
|
|
1052
|
-
appendHistory({
|
|
1053
|
-
cycle: gen,
|
|
1054
|
-
type: 'dgm_evolution',
|
|
1055
|
-
description: `DGM Gen ${gen}: ${mutationSel.strategy} → rolled back (fitness unchanged or regressed)`,
|
|
1056
|
-
success: false
|
|
1057
|
-
});
|
|
1058
|
-
}
|
|
1059
|
-
|
|
1060
|
-
// Update state
|
|
1061
|
-
const updatedStats = archive.getStats();
|
|
1062
|
-
updateEvolutionState({
|
|
1063
|
-
currentFitness: candidateFitness.score,
|
|
1064
|
-
bestFitness: currentBestFitness,
|
|
1065
|
-
dgmPopulationSize: updatedStats.totalEntries,
|
|
1066
|
-
dgmBestEntryId: updatedStats.bestEntryId
|
|
1067
|
-
});
|
|
1068
|
-
}
|
|
1069
|
-
|
|
1070
|
-
ResearchManager.endSession(currentBestFitness > baselineFitness.score ? 'success' : 'failed');
|
|
1071
|
-
const finalStats = archive.getStats();
|
|
1072
|
-
console.log(`\n\x1b[35m=== DGM EVOLUTION COMPLETE ===\x1b[0m`);
|
|
1073
|
-
console.log(`Final best fitness: ${(currentBestFitness * 100).toFixed(1)}% (started: ${(baselineFitness.score * 100).toFixed(1)}%)`);
|
|
1074
|
-
console.log(`Archive size: ${finalStats.totalEntries} snapshots`);
|
|
1075
|
-
console.log(`Generation depth: ${finalStats.generationDepth}`);
|
|
1076
|
-
updateEvolutionState({ status: 'idle', currentTask: null });
|
|
1077
|
-
}
|
|
1078
|
-
}
|