closed-loop-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of closed-loop-cli might be problematic. Click here for more details.

Files changed (86) hide show
  1. package/dist/dashboard/server.js +237 -0
  2. package/dist/index.js +272 -0
  3. package/dist/orchestrator/agent-prompts.js +42 -0
  4. package/dist/orchestrator/autogenesis.js +973 -0
  5. package/dist/orchestrator/dgm-archive.js +223 -0
  6. package/dist/orchestrator/event-stream.js +103 -0
  7. package/dist/orchestrator/fitness-evaluator.js +99 -0
  8. package/dist/orchestrator/meta-agent.js +421 -0
  9. package/dist/orchestrator/microagent-registry.js +134 -0
  10. package/dist/orchestrator/mutation-strategies.js +174 -0
  11. package/dist/orchestrator/prompt-benchmark.js +102 -0
  12. package/dist/orchestrator/prompt-optimizer.js +169 -0
  13. package/dist/orchestrator/refactor-scanner.js +222 -0
  14. package/dist/orchestrator/research-manager.js +104 -0
  15. package/dist/orchestrator/rulez.js +135 -0
  16. package/dist/orchestrator/sahoo-gateway.js +261 -0
  17. package/dist/orchestrator/state-manager.js +121 -0
  18. package/dist/orchestrator/task-agent.js +444 -0
  19. package/dist/orchestrator/telegram-bot.js +374 -0
  20. package/dist/orchestrator/types.js +2 -0
  21. package/dist/tests/dynamic/dependencies.test.js +37 -0
  22. package/dist/tests/dynamic/dummy.test.js +7 -0
  23. package/dist/tests/dynamic/fuzzy-patch.test.js +68 -0
  24. package/dist/tests/dynamic/indexer.test.js +60 -0
  25. package/dist/tests/dynamic/openhands.test.js +83 -0
  26. package/dist/tests/dynamic/skills.test.js +88 -0
  27. package/dist/tests/run-tests.js +294 -0
  28. package/dist/tools/diff-tools.js +24 -0
  29. package/dist/tools/file-tools.js +191 -0
  30. package/dist/tools/indexer.js +301 -0
  31. package/dist/tools/math-helper.js +6 -0
  32. package/dist/tools/repo-map.js +122 -0
  33. package/dist/tools/search-tools.js +271 -0
  34. package/dist/tools/shell-tools.js +75 -0
  35. package/dist/tools/skills.js +122 -0
  36. package/dist/tools/tui-tools.js +82 -0
  37. package/docs/AI_Arch_Opt_Anti_Gaming.md +227 -0
  38. package/docs/AI_Self_Improvement_Safety.md +457 -0
  39. package/docs/Anthropic AI Agents_ Capabilities and Concerns.md +134 -0
  40. package/docs/Auto_ClosedLoop_AI_Agent.md +415 -0
  41. package/docs/Autonomous AI Agents_ Closing the Loop.docx +0 -0
  42. package/docs/Secure_AI_Sandbox_Framework.md +358 -0
  43. package/docs/skills/add-file-existence-check-utility.json +9 -0
  44. package/docs/skills/add-utility-function-for-file-existence-check.json +9 -0
  45. package/docs/skills/add-utility-function-to-module.json +9 -0
  46. package/docs/skills/extract-command-runner-utility.json +9 -0
  47. package/docs/skills/file-existence-check-utility.json +9 -0
  48. package/package.json +36 -0
  49. package/src/dashboard/public/index.css +1334 -0
  50. package/src/dashboard/public/index.html +385 -0
  51. package/src/dashboard/public/index.js +1059 -0
  52. package/src/dashboard/server.ts +209 -0
  53. package/src/index.ts +256 -0
  54. package/src/orchestrator/agent-prompts.ts +43 -0
  55. package/src/orchestrator/autogenesis.ts +1078 -0
  56. package/src/orchestrator/dgm-archive.ts +257 -0
  57. package/src/orchestrator/event-stream.ts +90 -0
  58. package/src/orchestrator/fitness-evaluator.ts +154 -0
  59. package/src/orchestrator/meta-agent.ts +434 -0
  60. package/src/orchestrator/microagent-registry.ts +115 -0
  61. package/src/orchestrator/microagents/git-helper.md +11 -0
  62. package/src/orchestrator/microagents/test-fixer.md +10 -0
  63. package/src/orchestrator/microagents/typescript-expert.md +11 -0
  64. package/src/orchestrator/mutation-strategies.ts +214 -0
  65. package/src/orchestrator/research-manager.ts +88 -0
  66. package/src/orchestrator/rulez.ts +118 -0
  67. package/src/orchestrator/sahoo-gateway.ts +300 -0
  68. package/src/orchestrator/state-manager.ts +161 -0
  69. package/src/orchestrator/system-prompt.txt +1 -0
  70. package/src/orchestrator/task-agent.ts +461 -0
  71. package/src/orchestrator/telegram-bot.ts +358 -0
  72. package/src/tests/dynamic/dependencies.test.ts +48 -0
  73. package/src/tests/dynamic/dummy.test.ts +4 -0
  74. package/src/tests/dynamic/fuzzy-patch.test.ts +42 -0
  75. package/src/tests/dynamic/indexer.test.ts +31 -0
  76. package/src/tests/dynamic/openhands.test.ts +59 -0
  77. package/src/tests/dynamic/skills.test.ts +63 -0
  78. package/src/tests/run-tests.ts +296 -0
  79. package/src/tools/diff-tools.ts +27 -0
  80. package/src/tools/file-tools.ts +187 -0
  81. package/src/tools/indexer.ts +325 -0
  82. package/src/tools/repo-map.ts +96 -0
  83. package/src/tools/search-tools.ts +258 -0
  84. package/src/tools/shell-tools.ts +90 -0
  85. package/src/tools/skills.ts +101 -0
  86. package/src/tools/tui-tools.ts +87 -0
@@ -0,0 +1,174 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.MutationStrategy = void 0;
4
+ exports.selectMutationStrategy = selectMutationStrategy;
5
+ exports.generateMutationPrompt = generateMutationPrompt;
6
+ /**
7
+ * DGM Mutation Strategies
8
+ *
9
+ * จาก DGM paper: open-ended evolution ต้องการ diversity ของ mutation types
10
+ * ไม่ใช่แค่ refactor อย่างเดียว — เพื่อสำรวจ search space ที่กว้างขึ้น
11
+ */
12
+ var MutationStrategy;
13
+ (function (MutationStrategy) {
14
+ /** เพิ่ม feature ใหม่เข้าไปในระบบ */
15
+ MutationStrategy["ADD_FEATURE"] = "add_feature";
16
+ /** Refactor code เดิมให้อ่านง่ายขึ้น / ลด duplication */
17
+ MutationStrategy["REFACTOR"] = "refactor";
18
+ /** Optimize performance หรือ token efficiency */
19
+ MutationStrategy["OPTIMIZE"] = "optimize";
20
+ /** เพิ่ม unit tests เพื่อเพิ่ม coverage */
21
+ MutationStrategy["ADD_TESTS"] = "add_tests";
22
+ /** แก้ bug ที่ตรวจพบจาก test failures */
23
+ MutationStrategy["FIX_BUG"] = "fix_bug";
24
+ /** ปรับปรุง system prompt / agent prompt */
25
+ MutationStrategy["IMPROVE_PROMPT"] = "improve_prompt";
26
+ })(MutationStrategy || (exports.MutationStrategy = MutationStrategy = {}));
27
+ /**
28
+ * เลือก mutation strategy ที่เหมาะสมจาก archive history + task context
29
+ *
30
+ * DGM approach: strategy selection ควร adaptive ตาม population history
31
+ * — ถ้า fitness ต่ำ → ลอง ADD_TESTS หรือ FIX_BUG ก่อน
32
+ * — ถ้า fitness สูงแล้ว → ลอง ADD_FEATURE หรือ OPTIMIZE
33
+ * — ถ้า task พูดถึง bug → เลือก FIX_BUG ก่อน
34
+ */
35
+ function selectMutationStrategy(archive, task, currentFitness = 0) {
36
+ const taskLower = task.toLowerCase();
37
+ // 1. Task-based override: ถ้า task บ่งชี้ strategy ชัดเจน
38
+ if (taskLower.includes('fix') || taskLower.includes('bug') || taskLower.includes('error')) {
39
+ return {
40
+ strategy: MutationStrategy.FIX_BUG,
41
+ rationale: 'Task description indicates a bug fix is needed'
42
+ };
43
+ }
44
+ if (taskLower.includes('test') || taskLower.includes('coverage')) {
45
+ return {
46
+ strategy: MutationStrategy.ADD_TESTS,
47
+ rationale: 'Task description requests test improvements'
48
+ };
49
+ }
50
+ if (taskLower.includes('prompt') || taskLower.includes('instruction')) {
51
+ return {
52
+ strategy: MutationStrategy.IMPROVE_PROMPT,
53
+ rationale: 'Task description targets prompt engineering'
54
+ };
55
+ }
56
+ if (taskLower.includes('refactor') || taskLower.includes('clean') || taskLower.includes('reformat')) {
57
+ return {
58
+ strategy: MutationStrategy.REFACTOR,
59
+ rationale: 'Task description requests code refactoring'
60
+ };
61
+ }
62
+ if (taskLower.includes('optim') || taskLower.includes('speed') || taskLower.includes('performance') || taskLower.includes('token')) {
63
+ return {
64
+ strategy: MutationStrategy.OPTIMIZE,
65
+ rationale: 'Task description requests optimization'
66
+ };
67
+ }
68
+ if (taskLower.includes('add') || taskLower.includes('implement') || taskLower.includes('create') || taskLower.includes('new feature')) {
69
+ return {
70
+ strategy: MutationStrategy.ADD_FEATURE,
71
+ rationale: 'Task description requests adding a new feature'
72
+ };
73
+ }
74
+ // 2. Fitness-based heuristic: ถ้า fitness ต่ำ → focus on fixing
75
+ if (currentFitness < 0.7) {
76
+ return {
77
+ strategy: MutationStrategy.FIX_BUG,
78
+ rationale: `Low fitness (${(currentFitness * 100).toFixed(1)}%) — prioritizing stability fixes`
79
+ };
80
+ }
81
+ // 3. Archive diversity: ตรวจสอบว่า strategy ไหนถูกใช้ไปน้อยสุดใน archive
82
+ const recentHistory = archive.getRecentHistory(8);
83
+ if (recentHistory.length > 0) {
84
+ const strategyCounts = {};
85
+ for (const s of Object.values(MutationStrategy)) {
86
+ strategyCounts[s] = 0;
87
+ }
88
+ for (const entry of recentHistory) {
89
+ if (entry.mutationStrategy && strategyCounts[entry.mutationStrategy] !== undefined) {
90
+ strategyCounts[entry.mutationStrategy]++;
91
+ }
92
+ }
93
+ // เลือก strategy ที่ถูกใช้น้อยสุด (diversity promotion)
94
+ const leastUsed = Object.entries(strategyCounts)
95
+ .sort((a, b) => a[1] - b[1])[0];
96
+ if (leastUsed && leastUsed[1] < 2) {
97
+ return {
98
+ strategy: leastUsed[0],
99
+ rationale: `Diversity promotion: "${leastUsed[0]}" has been used least (${leastUsed[1]}x in recent history)`
100
+ };
101
+ }
102
+ }
103
+ // 4. Default: ADD_FEATURE (DGM paper prefers expansive mutations)
104
+ return {
105
+ strategy: MutationStrategy.ADD_FEATURE,
106
+ rationale: 'Default open-ended evolution strategy: expanding capabilities'
107
+ };
108
+ }
109
+ /**
110
+ * สร้าง task prompt เฉพาะ mutation strategy
111
+ * ให้ Agent ทราบว่ากำลังทำ mutation ประเภทใด
112
+ */
113
+ function generateMutationPrompt(strategy, originalTask, context) {
114
+ const parentInfo = context?.parentEntry
115
+ ? `\n[DGM Context] Building upon parent snapshot: ${context.parentEntry.id} (fitness: ${(context.parentEntry.fitness * 100).toFixed(1)}%)`
116
+ : '';
117
+ const fitnessInfo = context?.currentFitness !== undefined
118
+ ? `\n[DGM Context] Current system fitness: ${(context.currentFitness * 100).toFixed(1)}% (test pass rate)`
119
+ : '';
120
+ const fileInfo = context?.targetFile
121
+ ? `\n[DGM Context] Primary target file: ${context.targetFile}`
122
+ : '';
123
+ const dgmHeader = `[DGM Mutation: ${strategy.toUpperCase()}]${parentInfo}${fitnessInfo}${fileInfo}\n\n`;
124
+ switch (strategy) {
125
+ case MutationStrategy.ADD_FEATURE:
126
+ return dgmHeader +
127
+ `Your mutation goal is to ADD A NEW FEATURE. Implement the following capability:\n${originalTask}\n\n` +
128
+ `Guidelines:\n` +
129
+ `- Create new files or add new exported functions/classes as needed\n` +
130
+ `- Do not break existing functionality\n` +
131
+ `- Ensure all new code compiles and existing tests still pass\n` +
132
+ `- The feature should integrate cleanly with the existing codebase`;
133
+ case MutationStrategy.REFACTOR:
134
+ return dgmHeader +
135
+ `Your mutation goal is to REFACTOR existing code for better quality:\n${originalTask}\n\n` +
136
+ `Guidelines:\n` +
137
+ `- Improve readability, reduce duplication, or strengthen type safety\n` +
138
+ `- Preserve all existing behavior exactly (zero functional change)\n` +
139
+ `- All existing tests must still pass after refactoring`;
140
+ case MutationStrategy.OPTIMIZE:
141
+ return dgmHeader +
142
+ `Your mutation goal is to OPTIMIZE for performance or efficiency:\n${originalTask}\n\n` +
143
+ `Guidelines:\n` +
144
+ `- Focus on reducing token usage, execution time, or memory\n` +
145
+ `- Preserve correctness — all tests must still pass\n` +
146
+ `- Measure and report the optimization impact if possible`;
147
+ case MutationStrategy.ADD_TESTS:
148
+ return dgmHeader +
149
+ `Your mutation goal is to ADD UNIT TESTS to improve coverage:\n${originalTask}\n\n` +
150
+ `Guidelines:\n` +
151
+ `- Add tests to src/tests/dynamic/ directory as .ts files\n` +
152
+ `- Tests must export a default function or a run() function\n` +
153
+ `- Prioritize testing edge cases and untested public functions\n` +
154
+ `- Do not modify existing test files (protected by Campbell Regime)`;
155
+ case MutationStrategy.FIX_BUG:
156
+ return dgmHeader +
157
+ `Your mutation goal is to FIX A BUG or stability issue:\n${originalTask}\n\n` +
158
+ `Guidelines:\n` +
159
+ `- Diagnose the root cause carefully before making changes\n` +
160
+ `- Make the minimal change required to fix the issue\n` +
161
+ `- Add a regression test if appropriate\n` +
162
+ `- All tests must pass after the fix`;
163
+ case MutationStrategy.IMPROVE_PROMPT:
164
+ return dgmHeader +
165
+ `Your mutation goal is to IMPROVE AGENT PROMPTS for better performance:\n${originalTask}\n\n` +
166
+ `Guidelines:\n` +
167
+ `- Edit src/orchestrator/system-prompt.txt or src/orchestrator/agent-prompts.ts\n` +
168
+ `- Make prompts more concise, clear, and effective\n` +
169
+ `- Preserve all existing agent capabilities\n` +
170
+ `- Build compiles and tests pass after changes`;
171
+ default:
172
+ return dgmHeader + originalTask;
173
+ }
174
+ }
@@ -0,0 +1,102 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.calculatePromptScore = calculatePromptScore;
37
+ exports.benchmarkPrompt = benchmarkPrompt;
38
+ const fs = __importStar(require("fs"));
39
+ const path = __importStar(require("path"));
40
+ const shell_tools_1 = require("../tools/shell-tools");
41
+ const task_agent_1 = require("./task-agent");
42
+ /**
43
+ * Calculates a prompt utility score where higher is better.
44
+ * Combines time and token cost with configurable weights.
45
+ */
46
+ function calculatePromptScore(timeSeconds, totalTokens) {
47
+ const timeWeight = 0.4;
48
+ const tokenWeight = 0.6;
49
+ if (totalTokens === 0 || timeSeconds === 0)
50
+ return 0;
51
+ return 100000 / (timeSeconds * timeWeight + totalTokens * tokenWeight);
52
+ }
53
+ /**
54
+ * Benchmarks a system prompt on a standard task.
55
+ * Returns token usage, execution time, and compilation success.
56
+ * The workspace is reset before and after the benchmark to avoid side-effects.
57
+ */
58
+ async function benchmarkPrompt(systemPrompt) {
59
+ const benchmarkTask = "Create a typescript helper file src/tools/math-helper.ts that exports a sum(a: number, b: number) function. Ensure it compiles. Do not change any other files.";
60
+ // Clear any previous math-helper.ts file
61
+ const helperPath = path.join(process.cwd(), 'src/tools/math-helper.ts');
62
+ if (fs.existsSync(helperPath)) {
63
+ fs.unlinkSync(helperPath);
64
+ }
65
+ // Reset workspace to clean baseline
66
+ await (0, shell_tools_1.runCommand)('git reset --hard HEAD');
67
+ await (0, shell_tools_1.runCommand)('git clean -fd');
68
+ const startTime = Date.now();
69
+ let success = false;
70
+ let inputTokens = 0;
71
+ let outputTokens = 0;
72
+ let timeSeconds = 0;
73
+ try {
74
+ // Delegate to Task Agent directly with the specified system prompt override
75
+ const report = await (0, task_agent_1.runTaskAgent)(benchmarkTask, { systemPrompt });
76
+ timeSeconds = report.timeSeconds;
77
+ inputTokens = report.inputTokens;
78
+ outputTokens = report.outputTokens;
79
+ // Verify compile
80
+ const buildRes = await (0, shell_tools_1.runCommand)('npm run build');
81
+ if (buildRes.exitCode === 0) {
82
+ success = true;
83
+ }
84
+ }
85
+ catch (err) {
86
+ // Failed to execute task or compile
87
+ }
88
+ finally {
89
+ // Clean up changes
90
+ if (fs.existsSync(helperPath)) {
91
+ fs.unlinkSync(helperPath);
92
+ }
93
+ await (0, shell_tools_1.runCommand)('git reset --hard HEAD');
94
+ await (0, shell_tools_1.runCommand)('git clean -fd');
95
+ }
96
+ if (!timeSeconds) {
97
+ timeSeconds = (Date.now() - startTime) / 1000;
98
+ }
99
+ const totalTokens = inputTokens + outputTokens;
100
+ const score = success ? calculatePromptScore(timeSeconds, totalTokens) : 0;
101
+ return { success, time: timeSeconds, tokens: totalTokens, score };
102
+ }
@@ -0,0 +1,169 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ var __importDefault = (this && this.__importDefault) || function (mod) {
36
+ return (mod && mod.__esModule) ? mod : { "default": mod };
37
+ };
38
+ Object.defineProperty(exports, "__esModule", { value: true });
39
+ exports.runPromptOptimization = runPromptOptimization;
40
+ const fs = __importStar(require("fs"));
41
+ const path = __importStar(require("path"));
42
+ const sdk_1 = __importDefault(require("@anthropic-ai/sdk"));
43
+ const dotenv = __importStar(require("dotenv"));
44
+ const shell_tools_1 = require("../tools/shell-tools");
45
+ const tui_tools_1 = require("../tools/tui-tools");
46
+ const state_manager_1 = require("./state-manager");
47
+ const prompt_benchmark_1 = require("./prompt-benchmark");
48
+ dotenv.config();
49
+ const apiKey = process.env.ANTHROPIC_API_KEY || process.env.ANTHROPIC_AUTH_TOKEN || '';
50
+ const baseURL = process.env.ANTHROPIC_BASE_URL || undefined;
51
+ const defaultModel = process.env.ANTHROPIC_MODEL || 'mimo-v2.5-pro[1m]';
52
+ const FALLBACK_PROMPT = `You are an elite coding agent designed to operate inside a codebase workspace.
53
+ Your primary objective is to fulfill the user's coding tasks accurately and cleanly.
54
+
55
+ You have access to a set of local tools to read, write, edit files and execute terminal commands.
56
+ Always prefer editing precise parts of files using editFile instead of overwriting the whole file with writeFile unless it is a new file.
57
+ When running commands, verify compilation and test outcomes. If a test fails, you must attempt to fix the issues (Self-Healing).
58
+
59
+ Be concise and professional. Formulate plans before making changes.`;
60
+ /**
61
+ * Loads the current baseline system prompt from disk, falling back to a hardcoded default.
62
+ */
63
+ function loadBaselinePrompt(promptPath) {
64
+ if (fs.existsSync(promptPath)) {
65
+ return fs.readFileSync(promptPath, 'utf-8');
66
+ }
67
+ return FALLBACK_PROMPT;
68
+ }
69
+ /**
70
+ * Generates an optimized candidate system prompt using an LLM refinement call.
71
+ */
72
+ async function generateCandidatePrompt(baselinePrompt) {
73
+ const anthropic = new sdk_1.default({ apiKey, baseURL });
74
+ const optimizerPrompt = `You are an expert system prompt engineer. Your goal is to optimize the following system prompt for an autonomous coding agent.
75
+ You need to make the prompt more concise, remove redundancy, and explicitly instruct the agent to reduce token waste and thinking overhead, while ensuring it retains all functionality (using file-tools, rulez, self-healing, compiling, running tests).
76
+
77
+ Here is the Current System Prompt:
78
+ """
79
+ ${baselinePrompt}
80
+ """
81
+
82
+ Provide your output strictly as the new optimized system prompt. Do not write any markdown wrappers (like \`\`\` or \`\`\`txt), explanations, greetings, or conversational headers/footers. Output ONLY the raw optimized system prompt text.`;
83
+ const response = await anthropic.messages.create({
84
+ model: defaultModel,
85
+ max_tokens: 1500,
86
+ messages: [{ role: 'user', content: optimizerPrompt }]
87
+ });
88
+ const block = response.content.find(b => b.type === 'text');
89
+ if (!block || !block.text) {
90
+ throw new Error('Failed to retrieve system prompt candidate from LLM.');
91
+ }
92
+ return block.text.trim();
93
+ }
94
+ /**
95
+ * Runs the full prompt optimization and benchmarking pipeline:
96
+ * 1. Benchmark the baseline prompt
97
+ * 2. Generate an optimized candidate via LLM
98
+ * 3. Benchmark the candidate
99
+ * 4. Compare scores and commit if improved
100
+ */
101
+ async function runPromptOptimization() {
102
+ const workspaceRoot = process.cwd();
103
+ const promptPath = path.join(workspaceRoot, 'src/orchestrator/system-prompt.txt');
104
+ const baselinePrompt = loadBaselinePrompt(promptPath);
105
+ // --- Step 1: Benchmark baseline ---
106
+ console.log(`\n\x1b[35m=== STEP 1: BENCHMARKING BASELINE SYSTEM PROMPT ===\x1b[0m`);
107
+ const baselineReport = await (0, prompt_benchmark_1.benchmarkPrompt)(baselinePrompt);
108
+ console.log(`Baseline Prompt Benchmark Result:`);
109
+ console.log(`- Success: ${baselineReport.success ? '\x1b[32mPASS\x1b[0m' : '\x1b[31mFAIL\x1b[0m'}`);
110
+ console.log(`- Time: ${baselineReport.time.toFixed(1)}s`);
111
+ console.log(`- Tokens: ${baselineReport.tokens}`);
112
+ console.log(`- Score: ${baselineReport.score.toFixed(2)}`);
113
+ // --- Step 2: Generate candidate ---
114
+ console.log(`\n\x1b[35m=== STEP 2: GENERATING OPTIMIZED CANDIDATE SYSTEM PROMPT ===\x1b[0m`);
115
+ const optimizerSpinner = new tui_tools_1.Spinner('AGP - Optimizer: Refinement model generating optimized system prompt...');
116
+ optimizerSpinner.start();
117
+ let candidatePrompt = '';
118
+ try {
119
+ candidatePrompt = await generateCandidatePrompt(baselinePrompt);
120
+ optimizerSpinner.stop(true, 'AGP - Optimizer: Prompt candidate generated.');
121
+ }
122
+ catch (err) {
123
+ const message = err instanceof Error ? err.message : String(err);
124
+ optimizerSpinner.stop(false, 'AGP - Optimizer: Prompt candidate generation failed.');
125
+ throw new Error(`Prompt Optimization Failed: ${message}`);
126
+ }
127
+ console.log('\n\x1b[33m┌── Candidate System Prompt ──────────────────────────────────────────────────────┐\x1b[0m');
128
+ candidatePrompt.split('\n').forEach(line => console.log(`\x1b[33m│\x1b[0m ${line}`));
129
+ console.log('\x1b[33m└─────────────────────────────────────────────────────────────────────────────────┘\x1b[0m\n');
130
+ // --- Step 3: Benchmark candidate ---
131
+ console.log(`\x1b[35m=== STEP 3: BENCHMARKING CANDIDATE SYSTEM PROMPT ===\x1b[0m`);
132
+ const candidateReport = await (0, prompt_benchmark_1.benchmarkPrompt)(candidatePrompt);
133
+ console.log(`Candidate Prompt Benchmark Result:`);
134
+ console.log(`- Success: ${candidateReport.success ? '\x1b[32mPASS\x1b[0m' : '\x1b[31mFAIL\x1b[0m'}`);
135
+ console.log(`- Time: ${candidateReport.time.toFixed(1)}s`);
136
+ console.log(`- Tokens: ${candidateReport.tokens}`);
137
+ console.log(`- Score: ${candidateReport.score.toFixed(2)}`);
138
+ // --- Step 4: Compare and commit ---
139
+ console.log(`\x1b[35m=== STEP 4: COMPARING PERFORMANCE METRICS ===\x1b[0m`);
140
+ console.log(`Baseline Score: \x1b[1m${baselineReport.score.toFixed(2)}\x1b[0m`);
141
+ console.log(`Candidate Score: \x1b[1m${candidateReport.score.toFixed(2)}\x1b[0m`);
142
+ const approved = candidateReport.success && candidateReport.score > baselineReport.score;
143
+ if (approved) {
144
+ console.log(`\n\x1b[32;1m[Optimization Approved] Candidate prompt is more efficient. Overwriting system-prompt.txt...\x1b[0m`);
145
+ fs.writeFileSync(promptPath, candidatePrompt, 'utf-8');
146
+ // Commit the changes to Git
147
+ await (0, shell_tools_1.runCommand)('git add src/orchestrator/system-prompt.txt');
148
+ await (0, shell_tools_1.runCommand)('git commit -m "chore: optimize Task Agent system prompt via automated benchmarking loop"');
149
+ console.log(`\x1b[32m[Git Status] System prompt snapshot committed to repository HEAD.\x1b[0m`);
150
+ }
151
+ else {
152
+ console.log(`\n\x1b[31;1m[Optimization Rejected] Candidate prompt did not improve performance. Retaining baseline.\x1b[0m`);
153
+ }
154
+ (0, state_manager_1.updateEvolutionState)({
155
+ lastPromptOptimization: {
156
+ success: candidateReport.success,
157
+ baselineScore: baselineReport.score,
158
+ candidateScore: candidateReport.score,
159
+ approved,
160
+ time: new Date().toISOString()
161
+ }
162
+ });
163
+ (0, state_manager_1.appendHistory)({
164
+ cycle: (0, state_manager_1.getEvolutionState)().currentCycle,
165
+ type: 'prompt_opt',
166
+ description: `Optimized system prompt. Baseline: ${baselineReport.score.toFixed(2)}, Candidate: ${candidateReport.score.toFixed(2)}`,
167
+ success: approved
168
+ });
169
+ }
@@ -0,0 +1,222 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ var __importDefault = (this && this.__importDefault) || function (mod) {
36
+ return (mod && mod.__esModule) ? mod : { "default": mod };
37
+ };
38
+ Object.defineProperty(exports, "__esModule", { value: true });
39
+ exports.getAutonomousRefactorProposal = getAutonomousRefactorProposal;
40
+ const fs = __importStar(require("fs"));
41
+ const path = __importStar(require("path"));
42
+ const sdk_1 = __importDefault(require("@anthropic-ai/sdk"));
43
+ const dotenv = __importStar(require("dotenv"));
44
+ const tui_tools_1 = require("../tools/tui-tools");
45
+ const repo_map_1 = require("../tools/repo-map");
46
+ dotenv.config();
47
+ const apiKey = process.env.ANTHROPIC_API_KEY || process.env.ANTHROPIC_AUTH_TOKEN || '';
48
+ const baseURL = process.env.ANTHROPIC_BASE_URL || undefined;
49
+ const defaultModel = process.env.ANTHROPIC_MODEL || 'mimo-v2.5-pro[1m]';
50
+ /**
51
+ * Reads the last N lines of the evolution log file.
52
+ */
53
+ function readRecentEvolutionLog(workspaceRoot, maxLines) {
54
+ const logPath = path.join(workspaceRoot, 'evolution.log');
55
+ if (!fs.existsSync(logPath))
56
+ return '';
57
+ try {
58
+ const fullLog = fs.readFileSync(logPath, 'utf-8');
59
+ const lines = fullLog.split('\n');
60
+ return lines.slice(-maxLines).join('\n');
61
+ }
62
+ catch (e) {
63
+ return '';
64
+ }
65
+ }
66
+ /**
67
+ * Reads the Learnings.md file if present.
68
+ */
69
+ function readLearnings(workspaceRoot) {
70
+ const learningsPath = path.join(workspaceRoot, 'Learnings.md');
71
+ if (!fs.existsSync(learningsPath))
72
+ return '';
73
+ try {
74
+ return fs.readFileSync(learningsPath, 'utf-8');
75
+ }
76
+ catch (e) {
77
+ return '';
78
+ }
79
+ }
80
+ /**
81
+ * Step 1: Ask the LLM to select 2 candidate files for refactoring
82
+ * based on the repo map, logs, and learnings.
83
+ */
84
+ async function selectCandidateFiles(anthropic, repoMap, evolutionLog, learningsMd) {
85
+ const candidatePrompt = `You are an expert software architect. Your goal is to select the top 2 files in this codebase that would benefit most from refactoring (improving type-safety, code duplication, helper functions cleanup, or performance).
86
+ Review the codebase map, past evolution logs, and lessons learned.
87
+
88
+ Codebase Repository Map:
89
+ ${repoMap}
90
+
91
+ Last 40 lines of evolution logs (shows recent runs/warnings/failures):
92
+ ${evolutionLog || 'No log history available.'}
93
+
94
+ Learnings Registry (shows past insights):
95
+ ${learningsMd || 'No past learnings recorded.'}
96
+
97
+ Output your response strictly as a JSON object containing a list of 2 candidate target files (relative paths). Do not include any other text, markdown blocks, or formatting, just the raw JSON.
98
+
99
+ Format:
100
+ {
101
+ "candidates": [
102
+ { "targetFile": "relative/path/to/file1.ts" },
103
+ { "targetFile": "relative/path/to/file2.ts" }
104
+ ]
105
+ }`;
106
+ const response = await anthropic.messages.create({
107
+ model: defaultModel,
108
+ max_tokens: 4000,
109
+ messages: [{ role: 'user', content: candidatePrompt }]
110
+ });
111
+ const block = response.content.find(b => b.type === 'text');
112
+ if (!block || !block.text) {
113
+ console.error('DEBUG - candidateResponse content:', JSON.stringify(response.content));
114
+ throw new Error('Failed to retrieve candidates from LLM.');
115
+ }
116
+ const text = block.text.trim().replace(/^```json/, '').replace(/```$/, '').trim();
117
+ const json = JSON.parse(text);
118
+ const candidates = json.candidates || [];
119
+ if (candidates.length === 0) {
120
+ throw new Error('No candidates identified by the LLM.');
121
+ }
122
+ return candidates;
123
+ }
124
+ /**
125
+ * Reads the contents of candidate files from disk for context.
126
+ */
127
+ function readCandidateFileContents(workspaceRoot, candidates) {
128
+ let fileContext = '';
129
+ for (const cand of candidates) {
130
+ const fullPath = path.join(workspaceRoot, cand.targetFile);
131
+ if (fs.existsSync(fullPath)) {
132
+ try {
133
+ const fileContent = fs.readFileSync(fullPath, 'utf-8');
134
+ fileContext += `\n--- FILE: ${cand.targetFile} ---\n${fileContent}\n`;
135
+ }
136
+ catch (e) {
137
+ fileContext += `\n--- FILE: ${cand.targetFile} (Could not read file contents) ---\n`;
138
+ }
139
+ }
140
+ else {
141
+ fileContext += `\n--- FILE: ${cand.targetFile} (File not found on disk) ---\n`;
142
+ }
143
+ }
144
+ return fileContext;
145
+ }
146
+ /**
147
+ * Step 2: Ask the LLM to evaluate the candidate file contents and select the single
148
+ * best refactoring proposal.
149
+ */
150
+ async function evaluateAndSelectWinner(anthropic, fileContext) {
151
+ const evaluationPrompt = `You are an expert software architect. Analyze the actual code content of the candidate files below and select the single best refactoring proposal.
152
+ Evaluate each file and design a specific refactor goal.
153
+ Score each proposal out of 10 based on:
154
+ 1. Impact (readability, duplication reduction, safety, performance).
155
+ 2. Feasibility (compilation safety, low risk of breaking tests).
156
+
157
+ Candidate File Contents:
158
+ ${fileContext}
159
+
160
+ Output your response strictly as a JSON object containing the winning proposal's target file and the specific refactoring goal. Do not include any other text, markdown blocks, or formatting, just the raw JSON.
161
+
162
+ Format:
163
+ {
164
+ "winningProposal": {
165
+ "targetFile": "relative/path/to/file.ts",
166
+ "refactorGoal": "A concise, specific description of the refactoring goal (e.g., 'Refactor search-tools to use strong interfaces instead of any[]')",
167
+ "impactScore": 8.5,
168
+ "feasibilityScore": 9.0
169
+ }
170
+ }`;
171
+ const response = await anthropic.messages.create({
172
+ model: defaultModel,
173
+ max_tokens: 4000,
174
+ messages: [{ role: 'user', content: evaluationPrompt }]
175
+ });
176
+ const block = response.content.find(b => b.type === 'text');
177
+ if (!block || !block.text) {
178
+ throw new Error('Failed to retrieve evaluation from LLM.');
179
+ }
180
+ const text = block.text.trim().replace(/^```json/, '').replace(/```$/, '').trim();
181
+ const json = JSON.parse(text);
182
+ const winningProposal = json.winningProposal;
183
+ if (!winningProposal || !winningProposal.targetFile || !winningProposal.refactorGoal) {
184
+ throw new Error('Invalid JSON structure returned for winning proposal.');
185
+ }
186
+ return {
187
+ targetFile: winningProposal.targetFile,
188
+ refactorGoal: winningProposal.refactorGoal
189
+ };
190
+ }
191
+ /**
192
+ * Generates a refactoring proposal by scanning the codebase repository map,
193
+ * evolution logs, and learnings. Uses a two-pass LLM strategy:
194
+ * 1. Select candidate files
195
+ * 2. Evaluate actual code and pick the winner
196
+ */
197
+ async function getAutonomousRefactorProposal() {
198
+ const workspaceRoot = process.cwd();
199
+ const repoMap = (0, repo_map_1.generateRepoMap)(workspaceRoot);
200
+ const scanSpinner = new tui_tools_1.Spinner('AGP - Scan: Auditing codebase structure for optimizations...');
201
+ scanSpinner.start();
202
+ // Context gathering
203
+ const evolutionLog = readRecentEvolutionLog(workspaceRoot, 40);
204
+ const learningsMd = readLearnings(workspaceRoot);
205
+ const anthropic = new sdk_1.default({ apiKey, baseURL });
206
+ try {
207
+ // Step 1: Select 2 target candidate files
208
+ const candidates = await selectCandidateFiles(anthropic, repoMap, evolutionLog, learningsMd);
209
+ // Step 2: Read candidate file contents
210
+ const fileContext = readCandidateFileContents(workspaceRoot, candidates);
211
+ // Step 3: Evaluate and select winner
212
+ const proposal = await evaluateAndSelectWinner(anthropic, fileContext);
213
+ console.log(`\n\x1b[32m✔\x1b[0m AGP - Scan: Selected best proposal: ${proposal.targetFile}`);
214
+ scanSpinner.stop(true, 'AGP - Scan: Codebase audit complete.');
215
+ return proposal;
216
+ }
217
+ catch (err) {
218
+ const message = err instanceof Error ? err.message : String(err);
219
+ scanSpinner.stop(false, 'AGP - Scan: Codebase audit failed.');
220
+ throw new Error(`Refactoring Analysis Failed: ${message}`);
221
+ }
222
+ }