closed-loop-cli 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of closed-loop-cli might be problematic. Click here for more details.

@@ -1,169 +0,0 @@
1
- "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
35
- var __importDefault = (this && this.__importDefault) || function (mod) {
36
- return (mod && mod.__esModule) ? mod : { "default": mod };
37
- };
38
- Object.defineProperty(exports, "__esModule", { value: true });
39
- exports.runPromptOptimization = runPromptOptimization;
40
- const fs = __importStar(require("fs"));
41
- const path = __importStar(require("path"));
42
- const sdk_1 = __importDefault(require("@anthropic-ai/sdk"));
43
- const dotenv = __importStar(require("dotenv"));
44
- const shell_tools_1 = require("../tools/shell-tools");
45
- const tui_tools_1 = require("../tools/tui-tools");
46
- const state_manager_1 = require("./state-manager");
47
- const prompt_benchmark_1 = require("./prompt-benchmark");
48
- dotenv.config();
49
- const apiKey = process.env.ANTHROPIC_API_KEY || process.env.ANTHROPIC_AUTH_TOKEN || '';
50
- const baseURL = process.env.ANTHROPIC_BASE_URL || undefined;
51
- const defaultModel = process.env.ANTHROPIC_MODEL || 'mimo-v2.5-pro[1m]';
52
- const FALLBACK_PROMPT = `You are an elite coding agent designed to operate inside a codebase workspace.
53
- Your primary objective is to fulfill the user's coding tasks accurately and cleanly.
54
-
55
- You have access to a set of local tools to read, write, edit files and execute terminal commands.
56
- Always prefer editing precise parts of files using editFile instead of overwriting the whole file with writeFile unless it is a new file.
57
- When running commands, verify compilation and test outcomes. If a test fails, you must attempt to fix the issues (Self-Healing).
58
-
59
- Be concise and professional. Formulate plans before making changes.`;
60
- /**
61
- * Loads the current baseline system prompt from disk, falling back to a hardcoded default.
62
- */
63
- function loadBaselinePrompt(promptPath) {
64
- if (fs.existsSync(promptPath)) {
65
- return fs.readFileSync(promptPath, 'utf-8');
66
- }
67
- return FALLBACK_PROMPT;
68
- }
69
- /**
70
- * Generates an optimized candidate system prompt using an LLM refinement call.
71
- */
72
- async function generateCandidatePrompt(baselinePrompt) {
73
- const anthropic = new sdk_1.default({ apiKey, baseURL });
74
- const optimizerPrompt = `You are an expert system prompt engineer. Your goal is to optimize the following system prompt for an autonomous coding agent.
75
- You need to make the prompt more concise, remove redundancy, and explicitly instruct the agent to reduce token waste and thinking overhead, while ensuring it retains all functionality (using file-tools, rulez, self-healing, compiling, running tests).
76
-
77
- Here is the Current System Prompt:
78
- """
79
- ${baselinePrompt}
80
- """
81
-
82
- Provide your output strictly as the new optimized system prompt. Do not write any markdown wrappers (like \`\`\` or \`\`\`txt), explanations, greetings, or conversational headers/footers. Output ONLY the raw optimized system prompt text.`;
83
- const response = await anthropic.messages.create({
84
- model: defaultModel,
85
- max_tokens: 1500,
86
- messages: [{ role: 'user', content: optimizerPrompt }]
87
- });
88
- const block = response.content.find(b => b.type === 'text');
89
- if (!block || !block.text) {
90
- throw new Error('Failed to retrieve system prompt candidate from LLM.');
91
- }
92
- return block.text.trim();
93
- }
94
- /**
95
- * Runs the full prompt optimization and benchmarking pipeline:
96
- * 1. Benchmark the baseline prompt
97
- * 2. Generate an optimized candidate via LLM
98
- * 3. Benchmark the candidate
99
- * 4. Compare scores and commit if improved
100
- */
101
- async function runPromptOptimization() {
102
- const workspaceRoot = process.cwd();
103
- const promptPath = path.join(workspaceRoot, 'src/orchestrator/system-prompt.txt');
104
- const baselinePrompt = loadBaselinePrompt(promptPath);
105
- // --- Step 1: Benchmark baseline ---
106
- console.log(`\n\x1b[35m=== STEP 1: BENCHMARKING BASELINE SYSTEM PROMPT ===\x1b[0m`);
107
- const baselineReport = await (0, prompt_benchmark_1.benchmarkPrompt)(baselinePrompt);
108
- console.log(`Baseline Prompt Benchmark Result:`);
109
- console.log(`- Success: ${baselineReport.success ? '\x1b[32mPASS\x1b[0m' : '\x1b[31mFAIL\x1b[0m'}`);
110
- console.log(`- Time: ${baselineReport.time.toFixed(1)}s`);
111
- console.log(`- Tokens: ${baselineReport.tokens}`);
112
- console.log(`- Score: ${baselineReport.score.toFixed(2)}`);
113
- // --- Step 2: Generate candidate ---
114
- console.log(`\n\x1b[35m=== STEP 2: GENERATING OPTIMIZED CANDIDATE SYSTEM PROMPT ===\x1b[0m`);
115
- const optimizerSpinner = new tui_tools_1.Spinner('AGP - Optimizer: Refinement model generating optimized system prompt...');
116
- optimizerSpinner.start();
117
- let candidatePrompt = '';
118
- try {
119
- candidatePrompt = await generateCandidatePrompt(baselinePrompt);
120
- optimizerSpinner.stop(true, 'AGP - Optimizer: Prompt candidate generated.');
121
- }
122
- catch (err) {
123
- const message = err instanceof Error ? err.message : String(err);
124
- optimizerSpinner.stop(false, 'AGP - Optimizer: Prompt candidate generation failed.');
125
- throw new Error(`Prompt Optimization Failed: ${message}`);
126
- }
127
- console.log('\n\x1b[33m┌── Candidate System Prompt ──────────────────────────────────────────────────────┐\x1b[0m');
128
- candidatePrompt.split('\n').forEach(line => console.log(`\x1b[33m│\x1b[0m ${line}`));
129
- console.log('\x1b[33m└─────────────────────────────────────────────────────────────────────────────────┘\x1b[0m\n');
130
- // --- Step 3: Benchmark candidate ---
131
- console.log(`\x1b[35m=== STEP 3: BENCHMARKING CANDIDATE SYSTEM PROMPT ===\x1b[0m`);
132
- const candidateReport = await (0, prompt_benchmark_1.benchmarkPrompt)(candidatePrompt);
133
- console.log(`Candidate Prompt Benchmark Result:`);
134
- console.log(`- Success: ${candidateReport.success ? '\x1b[32mPASS\x1b[0m' : '\x1b[31mFAIL\x1b[0m'}`);
135
- console.log(`- Time: ${candidateReport.time.toFixed(1)}s`);
136
- console.log(`- Tokens: ${candidateReport.tokens}`);
137
- console.log(`- Score: ${candidateReport.score.toFixed(2)}`);
138
- // --- Step 4: Compare and commit ---
139
- console.log(`\x1b[35m=== STEP 4: COMPARING PERFORMANCE METRICS ===\x1b[0m`);
140
- console.log(`Baseline Score: \x1b[1m${baselineReport.score.toFixed(2)}\x1b[0m`);
141
- console.log(`Candidate Score: \x1b[1m${candidateReport.score.toFixed(2)}\x1b[0m`);
142
- const approved = candidateReport.success && candidateReport.score > baselineReport.score;
143
- if (approved) {
144
- console.log(`\n\x1b[32;1m[Optimization Approved] Candidate prompt is more efficient. Overwriting system-prompt.txt...\x1b[0m`);
145
- fs.writeFileSync(promptPath, candidatePrompt, 'utf-8');
146
- // Commit the changes to Git
147
- await (0, shell_tools_1.runCommand)('git add src/orchestrator/system-prompt.txt');
148
- await (0, shell_tools_1.runCommand)('git commit -m "chore: optimize Task Agent system prompt via automated benchmarking loop"');
149
- console.log(`\x1b[32m[Git Status] System prompt snapshot committed to repository HEAD.\x1b[0m`);
150
- }
151
- else {
152
- console.log(`\n\x1b[31;1m[Optimization Rejected] Candidate prompt did not improve performance. Retaining baseline.\x1b[0m`);
153
- }
154
- (0, state_manager_1.updateEvolutionState)({
155
- lastPromptOptimization: {
156
- success: candidateReport.success,
157
- baselineScore: baselineReport.score,
158
- candidateScore: candidateReport.score,
159
- approved,
160
- time: new Date().toISOString()
161
- }
162
- });
163
- (0, state_manager_1.appendHistory)({
164
- cycle: (0, state_manager_1.getEvolutionState)().currentCycle,
165
- type: 'prompt_opt',
166
- description: `Optimized system prompt. Baseline: ${baselineReport.score.toFixed(2)}, Candidate: ${candidateReport.score.toFixed(2)}`,
167
- success: approved
168
- });
169
- }
@@ -1,222 +0,0 @@
1
- "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
35
- var __importDefault = (this && this.__importDefault) || function (mod) {
36
- return (mod && mod.__esModule) ? mod : { "default": mod };
37
- };
38
- Object.defineProperty(exports, "__esModule", { value: true });
39
- exports.getAutonomousRefactorProposal = getAutonomousRefactorProposal;
40
- const fs = __importStar(require("fs"));
41
- const path = __importStar(require("path"));
42
- const sdk_1 = __importDefault(require("@anthropic-ai/sdk"));
43
- const dotenv = __importStar(require("dotenv"));
44
- const tui_tools_1 = require("../tools/tui-tools");
45
- const repo_map_1 = require("../tools/repo-map");
46
- dotenv.config();
47
- const apiKey = process.env.ANTHROPIC_API_KEY || process.env.ANTHROPIC_AUTH_TOKEN || '';
48
- const baseURL = process.env.ANTHROPIC_BASE_URL || undefined;
49
- const defaultModel = process.env.ANTHROPIC_MODEL || 'mimo-v2.5-pro[1m]';
50
- /**
51
- * Reads the last N lines of the evolution log file.
52
- */
53
- function readRecentEvolutionLog(workspaceRoot, maxLines) {
54
- const logPath = path.join(workspaceRoot, 'evolution.log');
55
- if (!fs.existsSync(logPath))
56
- return '';
57
- try {
58
- const fullLog = fs.readFileSync(logPath, 'utf-8');
59
- const lines = fullLog.split('\n');
60
- return lines.slice(-maxLines).join('\n');
61
- }
62
- catch (e) {
63
- return '';
64
- }
65
- }
66
- /**
67
- * Reads the Learnings.md file if present.
68
- */
69
- function readLearnings(workspaceRoot) {
70
- const learningsPath = path.join(workspaceRoot, 'Learnings.md');
71
- if (!fs.existsSync(learningsPath))
72
- return '';
73
- try {
74
- return fs.readFileSync(learningsPath, 'utf-8');
75
- }
76
- catch (e) {
77
- return '';
78
- }
79
- }
80
- /**
81
- * Step 1: Ask the LLM to select 2 candidate files for refactoring
82
- * based on the repo map, logs, and learnings.
83
- */
84
- async function selectCandidateFiles(anthropic, repoMap, evolutionLog, learningsMd) {
85
- const candidatePrompt = `You are an expert software architect. Your goal is to select the top 2 files in this codebase that would benefit most from refactoring (improving type-safety, code duplication, helper functions cleanup, or performance).
86
- Review the codebase map, past evolution logs, and lessons learned.
87
-
88
- Codebase Repository Map:
89
- ${repoMap}
90
-
91
- Last 40 lines of evolution logs (shows recent runs/warnings/failures):
92
- ${evolutionLog || 'No log history available.'}
93
-
94
- Learnings Registry (shows past insights):
95
- ${learningsMd || 'No past learnings recorded.'}
96
-
97
- Output your response strictly as a JSON object containing a list of 2 candidate target files (relative paths). Do not include any other text, markdown blocks, or formatting, just the raw JSON.
98
-
99
- Format:
100
- {
101
- "candidates": [
102
- { "targetFile": "relative/path/to/file1.ts" },
103
- { "targetFile": "relative/path/to/file2.ts" }
104
- ]
105
- }`;
106
- const response = await anthropic.messages.create({
107
- model: defaultModel,
108
- max_tokens: 4000,
109
- messages: [{ role: 'user', content: candidatePrompt }]
110
- });
111
- const block = response.content.find(b => b.type === 'text');
112
- if (!block || !block.text) {
113
- console.error('DEBUG - candidateResponse content:', JSON.stringify(response.content));
114
- throw new Error('Failed to retrieve candidates from LLM.');
115
- }
116
- const text = block.text.trim().replace(/^```json/, '').replace(/```$/, '').trim();
117
- const json = JSON.parse(text);
118
- const candidates = json.candidates || [];
119
- if (candidates.length === 0) {
120
- throw new Error('No candidates identified by the LLM.');
121
- }
122
- return candidates;
123
- }
124
- /**
125
- * Reads the contents of candidate files from disk for context.
126
- */
127
- function readCandidateFileContents(workspaceRoot, candidates) {
128
- let fileContext = '';
129
- for (const cand of candidates) {
130
- const fullPath = path.join(workspaceRoot, cand.targetFile);
131
- if (fs.existsSync(fullPath)) {
132
- try {
133
- const fileContent = fs.readFileSync(fullPath, 'utf-8');
134
- fileContext += `\n--- FILE: ${cand.targetFile} ---\n${fileContent}\n`;
135
- }
136
- catch (e) {
137
- fileContext += `\n--- FILE: ${cand.targetFile} (Could not read file contents) ---\n`;
138
- }
139
- }
140
- else {
141
- fileContext += `\n--- FILE: ${cand.targetFile} (File not found on disk) ---\n`;
142
- }
143
- }
144
- return fileContext;
145
- }
146
- /**
147
- * Step 2: Ask the LLM to evaluate the candidate file contents and select the single
148
- * best refactoring proposal.
149
- */
150
- async function evaluateAndSelectWinner(anthropic, fileContext) {
151
- const evaluationPrompt = `You are an expert software architect. Analyze the actual code content of the candidate files below and select the single best refactoring proposal.
152
- Evaluate each file and design a specific refactor goal.
153
- Score each proposal out of 10 based on:
154
- 1. Impact (readability, duplication reduction, safety, performance).
155
- 2. Feasibility (compilation safety, low risk of breaking tests).
156
-
157
- Candidate File Contents:
158
- ${fileContext}
159
-
160
- Output your response strictly as a JSON object containing the winning proposal's target file and the specific refactoring goal. Do not include any other text, markdown blocks, or formatting, just the raw JSON.
161
-
162
- Format:
163
- {
164
- "winningProposal": {
165
- "targetFile": "relative/path/to/file.ts",
166
- "refactorGoal": "A concise, specific description of the refactoring goal (e.g., 'Refactor search-tools to use strong interfaces instead of any[]')",
167
- "impactScore": 8.5,
168
- "feasibilityScore": 9.0
169
- }
170
- }`;
171
- const response = await anthropic.messages.create({
172
- model: defaultModel,
173
- max_tokens: 4000,
174
- messages: [{ role: 'user', content: evaluationPrompt }]
175
- });
176
- const block = response.content.find(b => b.type === 'text');
177
- if (!block || !block.text) {
178
- throw new Error('Failed to retrieve evaluation from LLM.');
179
- }
180
- const text = block.text.trim().replace(/^```json/, '').replace(/```$/, '').trim();
181
- const json = JSON.parse(text);
182
- const winningProposal = json.winningProposal;
183
- if (!winningProposal || !winningProposal.targetFile || !winningProposal.refactorGoal) {
184
- throw new Error('Invalid JSON structure returned for winning proposal.');
185
- }
186
- return {
187
- targetFile: winningProposal.targetFile,
188
- refactorGoal: winningProposal.refactorGoal
189
- };
190
- }
191
- /**
192
- * Generates a refactoring proposal by scanning the codebase repository map,
193
- * evolution logs, and learnings. Uses a two-pass LLM strategy:
194
- * 1. Select candidate files
195
- * 2. Evaluate actual code and pick the winner
196
- */
197
- async function getAutonomousRefactorProposal() {
198
- const workspaceRoot = process.cwd();
199
- const repoMap = (0, repo_map_1.generateRepoMap)(workspaceRoot);
200
- const scanSpinner = new tui_tools_1.Spinner('AGP - Scan: Auditing codebase structure for optimizations...');
201
- scanSpinner.start();
202
- // Context gathering
203
- const evolutionLog = readRecentEvolutionLog(workspaceRoot, 40);
204
- const learningsMd = readLearnings(workspaceRoot);
205
- const anthropic = new sdk_1.default({ apiKey, baseURL });
206
- try {
207
- // Step 1: Select 2 target candidate files
208
- const candidates = await selectCandidateFiles(anthropic, repoMap, evolutionLog, learningsMd);
209
- // Step 2: Read candidate file contents
210
- const fileContext = readCandidateFileContents(workspaceRoot, candidates);
211
- // Step 3: Evaluate and select winner
212
- const proposal = await evaluateAndSelectWinner(anthropic, fileContext);
213
- console.log(`\n\x1b[32m✔\x1b[0m AGP - Scan: Selected best proposal: ${proposal.targetFile}`);
214
- scanSpinner.stop(true, 'AGP - Scan: Codebase audit complete.');
215
- return proposal;
216
- }
217
- catch (err) {
218
- const message = err instanceof Error ? err.message : String(err);
219
- scanSpinner.stop(false, 'AGP - Scan: Codebase audit failed.');
220
- throw new Error(`Refactoring Analysis Failed: ${message}`);
221
- }
222
- }