closed-loop-cli 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of closed-loop-cli might be problematic. Click here for more details.

@@ -1,223 +0,0 @@
1
- "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
35
- Object.defineProperty(exports, "__esModule", { value: true });
36
- exports.DGMArchive = void 0;
37
- const fs = __importStar(require("fs"));
38
- const path = __importStar(require("path"));
39
- const ARCHIVE_FILENAME = 'dgm-archive.json';
40
- const DEFAULT_MAX_SIZE = 20;
41
- /**
42
- * DGM Archive Manager
43
- *
44
- * เก็บ population ของ agent snapshots และจัดการ selection สำหรับ
45
- * Darwin Gödel Machine evolution loop ตาม paper:
46
- * "Darwin Gödel Machine: Open-Ended Evolution of Self-Improving Agents"
47
- */
48
- class DGMArchive {
49
- archivePath;
50
- entries;
51
- constructor(workspaceRoot) {
52
- const root = workspaceRoot || process.cwd();
53
- this.archivePath = path.join(root, ARCHIVE_FILENAME);
54
- this.entries = this.load();
55
- }
56
- // ─────────────────────────────────────────
57
- // Persistence
58
- // ─────────────────────────────────────────
59
- load() {
60
- if (!fs.existsSync(this.archivePath)) {
61
- return [];
62
- }
63
- try {
64
- const raw = fs.readFileSync(this.archivePath, 'utf-8');
65
- const parsed = JSON.parse(raw);
66
- return Array.isArray(parsed.entries) ? parsed.entries : [];
67
- }
68
- catch {
69
- return [];
70
- }
71
- }
72
- save() {
73
- const data = {
74
- version: '1.0.0',
75
- lastUpdated: new Date().toISOString(),
76
- entries: this.entries
77
- };
78
- try {
79
- fs.writeFileSync(this.archivePath, JSON.stringify(data, null, 2), 'utf-8');
80
- }
81
- catch (err) {
82
- console.error('[DGMArchive] Failed to save archive:', err);
83
- }
84
- }
85
- // ─────────────────────────────────────────
86
- // Core Operations
87
- // ─────────────────────────────────────────
88
- /**
89
- * เพิ่ม snapshot ใหม่เข้า archive
90
- * @returns entry ที่เพิ่งเพิ่มเข้าไป
91
- */
92
- addEntry(entry) {
93
- const newEntry = {
94
- ...entry,
95
- id: this.generateId(),
96
- timestamp: new Date().toISOString()
97
- };
98
- this.entries.push(newEntry);
99
- this.save();
100
- return newEntry;
101
- }
102
- /**
103
- * เลือก parent entry สำหรับ mutation รอบต่อไป
104
- * ใช้ fitness-weighted sampling (DGM parent selection strategy)
105
- *
106
- * @param strategy 'fitness_weighted' | 'best' | 'random'
107
- */
108
- getBestParent(strategy = 'fitness_weighted') {
109
- if (this.entries.length === 0)
110
- return null;
111
- if (strategy === 'best') {
112
- return [...this.entries].sort((a, b) => b.fitness - a.fitness)[0];
113
- }
114
- if (strategy === 'random') {
115
- return this.entries[Math.floor(Math.random() * this.entries.length)];
116
- }
117
- // fitness_weighted: สุ่มโดยน้ำหนักตาม fitness score (DGM default)
118
- return this.fitnessWeightedSample();
119
- }
120
- /**
121
- * Trace lineage จาก entry ไปถึง origin
122
- * @returns ordered array ตั้งแต่ origin จนถึง entry นี้
123
- */
124
- getLineage(entryId) {
125
- const lineage = [];
126
- let current = this.findById(entryId);
127
- while (current) {
128
- lineage.unshift(current);
129
- if (!current.parentId)
130
- break;
131
- current = this.findById(current.parentId);
132
- }
133
- return lineage;
134
- }
135
- /**
136
- * ตัด entries เก่าเพื่อป้องกัน archive ใหญ่เกินไป
137
- * จะเก็บ entries ที่มี fitness สูงสุดไว้ก่อน
138
- */
139
- pruneOldEntries(maxSize = DEFAULT_MAX_SIZE) {
140
- if (this.entries.length <= maxSize)
141
- return 0;
142
- // เรียง fitness สูงสุดก่อน แล้วตัดส่วนที่เกิน
143
- const sorted = [...this.entries].sort((a, b) => b.fitness - a.fitness);
144
- const pruned = this.entries.length - maxSize;
145
- this.entries = sorted.slice(0, maxSize);
146
- this.save();
147
- return pruned;
148
- }
149
- // ─────────────────────────────────────────
150
- // Query / Stats
151
- // ─────────────────────────────────────────
152
- getAll() {
153
- return [...this.entries];
154
- }
155
- findById(id) {
156
- return this.entries.find(e => e.id === id);
157
- }
158
- getBestEntry() {
159
- if (this.entries.length === 0)
160
- return null;
161
- return [...this.entries].sort((a, b) => b.fitness - a.fitness)[0];
162
- }
163
- getStats() {
164
- if (this.entries.length === 0) {
165
- return {
166
- totalEntries: 0,
167
- bestFitness: 0,
168
- averageFitness: 0,
169
- bestEntryId: null,
170
- latestEntryId: null,
171
- generationDepth: 0
172
- };
173
- }
174
- const sorted = [...this.entries].sort((a, b) => b.fitness - a.fitness);
175
- const best = sorted[0];
176
- const latest = [...this.entries].sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime())[0];
177
- const avgFitness = this.entries.reduce((sum, e) => sum + e.fitness, 0) / this.entries.length;
178
- const depth = latest ? this.getLineage(latest.id).length : 0;
179
- return {
180
- totalEntries: this.entries.length,
181
- bestFitness: best.fitness,
182
- averageFitness: Math.round(avgFitness * 1000) / 1000,
183
- bestEntryId: best.id,
184
- latestEntryId: latest?.id ?? null,
185
- generationDepth: depth
186
- };
187
- }
188
- /**
189
- * ดึง history ล่าสุด N entries (เรียงตามเวลา)
190
- */
191
- getRecentHistory(n = 10) {
192
- return [...this.entries]
193
- .sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime())
194
- .slice(0, n);
195
- }
196
- // ─────────────────────────────────────────
197
- // Private Helpers
198
- // ─────────────────────────────────────────
199
- /**
200
- * Fitness-weighted random sampling (Roulette Wheel Selection)
201
- * สูตรจาก DGM paper: เลือก parent ตาม probability ∝ fitness
202
- */
203
- fitnessWeightedSample() {
204
- const totalFitness = this.entries.reduce((sum, e) => sum + Math.max(e.fitness, 0.01), 0);
205
- let r = Math.random() * totalFitness;
206
- for (const entry of this.entries) {
207
- r -= Math.max(entry.fitness, 0.01);
208
- if (r <= 0)
209
- return entry;
210
- }
211
- // fallback: คืน entry สุดท้าย
212
- return this.entries[this.entries.length - 1];
213
- }
214
- /**
215
- * สร้าง unique ID แบบ timestamp + random
216
- */
217
- generateId() {
218
- const ts = Date.now().toString(36);
219
- const rand = Math.random().toString(36).substring(2, 6);
220
- return `dgm_${ts}_${rand}`;
221
- }
222
- }
223
- exports.DGMArchive = DGMArchive;
@@ -1,99 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.evaluateFitness = evaluateFitness;
4
- exports.compareToBaseline = compareToBaseline;
5
- exports.parseTestOutput = parseTestOutput;
6
- const shell_tools_1 = require("../tools/shell-tools");
7
- /**
8
- * วัด empirical fitness จาก test suite ปัจจุบัน
9
- *
10
- * DGM approach: fitness วัดจากผล unit test จริง ไม่ใช่ heuristic เพียงอย่างเดียว
11
- * "Each new agent version is evaluated empirically using coding benchmarks"
12
- */
13
- async function evaluateFitness() {
14
- // Step 1: ตรวจ compile
15
- const compileRes = await (0, shell_tools_1.runCommand)('npm run build');
16
- const compileSuccess = compileRes.exitCode === 0;
17
- if (!compileSuccess) {
18
- return {
19
- passRate: 0,
20
- passCount: 0,
21
- failCount: 0,
22
- totalTests: 0,
23
- score: 0,
24
- compileSuccess: false,
25
- summary: `Compilation failed — fitness = 0`,
26
- rawOutput: compileRes.stdout + compileRes.stderr
27
- };
28
- }
29
- // Step 2: run tests
30
- const testRes = await (0, shell_tools_1.runCommand)('npm test');
31
- const rawOutput = (testRes.stdout || '') + (testRes.stderr || '');
32
- // Step 3: parse test output
33
- const parsed = parseTestOutput(rawOutput);
34
- // Step 4: คำนวณ composite score
35
- // score = pass_rate ถ้า compile ผ่าน, 0 ถ้า compile fail
36
- const score = compileSuccess ? parsed.passRate : 0;
37
- const summary = `Compile: ${compileSuccess ? 'PASS' : 'FAIL'} | ` +
38
- `Tests: ${parsed.passCount}/${parsed.totalTests} passed ` +
39
- `(${(parsed.passRate * 100).toFixed(1)}%) | Score: ${score.toFixed(3)}`;
40
- return {
41
- ...parsed,
42
- score,
43
- compileSuccess,
44
- summary,
45
- rawOutput
46
- };
47
- }
48
- /**
49
- * เปรียบเทียบ fitness ระหว่าง baseline snapshot กับ candidate snapshot
50
- */
51
- function compareToBaseline(candidate, baseline) {
52
- const delta = candidate.score - baseline.score;
53
- const improved = delta > 0;
54
- const message = improved
55
- ? `✔ Fitness improved: ${baseline.score.toFixed(3)} → ${candidate.score.toFixed(3)} (+${delta.toFixed(3)})`
56
- : delta === 0
57
- ? `= Fitness unchanged: ${candidate.score.toFixed(3)}`
58
- : `✘ Fitness regressed: ${baseline.score.toFixed(3)} → ${candidate.score.toFixed(3)} (${delta.toFixed(3)})`;
59
- return { improved, delta, baseline, candidate, message };
60
- }
61
- /**
62
- * Parse test runner output เพื่อนับ pass/fail counts
63
- * รองรับ format ของ test runner ปัจจุบันในโปรเจกต์
64
- */
65
- function parseTestOutput(output) {
66
- let passCount = 0;
67
- let failCount = 0;
68
- // Pattern 1: "[Pass] <test name>" ซึ่งเป็น format ของ run-tests.ts ปัจจุบัน
69
- const passMatches = output.match(/\[Pass\]/g);
70
- const failMatches = output.match(/\[Fail\]/g);
71
- if (passMatches || failMatches) {
72
- passCount = passMatches ? passMatches.length : 0;
73
- failCount = failMatches ? failMatches.length : 0;
74
- }
75
- // Pattern 2: Mocha format — "X passing" / "X failing"
76
- const mochaPassing = output.match(/(\d+)\s+passing/i);
77
- const mochaFailing = output.match(/(\d+)\s+failing/i);
78
- if (mochaPassing || mochaFailing) {
79
- passCount = mochaPassing ? parseInt(mochaPassing[1], 10) : passCount;
80
- failCount = mochaFailing ? parseInt(mochaFailing[1], 10) : failCount;
81
- }
82
- // Pattern 3: Jest format — "Tests: X passed, Y failed"
83
- const jestLine = output.match(/Tests:\s+(\d+)\s+passed(?:,\s+(\d+)\s+failed)?/i);
84
- if (jestLine) {
85
- passCount = parseInt(jestLine[1], 10) || passCount;
86
- failCount = jestLine[2] ? parseInt(jestLine[2], 10) : failCount;
87
- }
88
- // Pattern 4: "ALL BINARY ASSERTION TESTS PASSED" — หมายถึง 100% ผ่าน
89
- if (output.includes('ALL BINARY ASSERTION TESTS PASSED')) {
90
- // ถ้าตรวจจับ pass ไม่ได้ชัดเจนให้ fallback = ผ่านทั้งหมด
91
- if (passCount === 0 && failCount === 0) {
92
- passCount = 11; // จำนวน tests ใน run-tests.ts (Test 1–11)
93
- failCount = 0;
94
- }
95
- }
96
- const totalTests = passCount + failCount;
97
- const passRate = totalTests > 0 ? passCount / totalTests : (failCount === 0 ? 1 : 0);
98
- return { passRate, passCount, failCount, totalTests };
99
- }
@@ -1,174 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.MutationStrategy = void 0;
4
- exports.selectMutationStrategy = selectMutationStrategy;
5
- exports.generateMutationPrompt = generateMutationPrompt;
6
- /**
7
- * DGM Mutation Strategies
8
- *
9
- * จาก DGM paper: open-ended evolution ต้องการ diversity ของ mutation types
10
- * ไม่ใช่แค่ refactor อย่างเดียว — เพื่อสำรวจ search space ที่กว้างขึ้น
11
- */
12
- var MutationStrategy;
13
- (function (MutationStrategy) {
14
- /** เพิ่ม feature ใหม่เข้าไปในระบบ */
15
- MutationStrategy["ADD_FEATURE"] = "add_feature";
16
- /** Refactor code เดิมให้อ่านง่ายขึ้น / ลด duplication */
17
- MutationStrategy["REFACTOR"] = "refactor";
18
- /** Optimize performance หรือ token efficiency */
19
- MutationStrategy["OPTIMIZE"] = "optimize";
20
- /** เพิ่ม unit tests เพื่อเพิ่ม coverage */
21
- MutationStrategy["ADD_TESTS"] = "add_tests";
22
- /** แก้ bug ที่ตรวจพบจาก test failures */
23
- MutationStrategy["FIX_BUG"] = "fix_bug";
24
- /** ปรับปรุง system prompt / agent prompt */
25
- MutationStrategy["IMPROVE_PROMPT"] = "improve_prompt";
26
- })(MutationStrategy || (exports.MutationStrategy = MutationStrategy = {}));
27
- /**
28
- * เลือก mutation strategy ที่เหมาะสมจาก archive history + task context
29
- *
30
- * DGM approach: strategy selection ควร adaptive ตาม population history
31
- * — ถ้า fitness ต่ำ → ลอง ADD_TESTS หรือ FIX_BUG ก่อน
32
- * — ถ้า fitness สูงแล้ว → ลอง ADD_FEATURE หรือ OPTIMIZE
33
- * — ถ้า task พูดถึง bug → เลือก FIX_BUG ก่อน
34
- */
35
- function selectMutationStrategy(archive, task, currentFitness = 0) {
36
- const taskLower = task.toLowerCase();
37
- // 1. Task-based override: ถ้า task บ่งชี้ strategy ชัดเจน
38
- if (taskLower.includes('fix') || taskLower.includes('bug') || taskLower.includes('error')) {
39
- return {
40
- strategy: MutationStrategy.FIX_BUG,
41
- rationale: 'Task description indicates a bug fix is needed'
42
- };
43
- }
44
- if (taskLower.includes('test') || taskLower.includes('coverage')) {
45
- return {
46
- strategy: MutationStrategy.ADD_TESTS,
47
- rationale: 'Task description requests test improvements'
48
- };
49
- }
50
- if (taskLower.includes('prompt') || taskLower.includes('instruction')) {
51
- return {
52
- strategy: MutationStrategy.IMPROVE_PROMPT,
53
- rationale: 'Task description targets prompt engineering'
54
- };
55
- }
56
- if (taskLower.includes('refactor') || taskLower.includes('clean') || taskLower.includes('reformat')) {
57
- return {
58
- strategy: MutationStrategy.REFACTOR,
59
- rationale: 'Task description requests code refactoring'
60
- };
61
- }
62
- if (taskLower.includes('optim') || taskLower.includes('speed') || taskLower.includes('performance') || taskLower.includes('token')) {
63
- return {
64
- strategy: MutationStrategy.OPTIMIZE,
65
- rationale: 'Task description requests optimization'
66
- };
67
- }
68
- if (taskLower.includes('add') || taskLower.includes('implement') || taskLower.includes('create') || taskLower.includes('new feature')) {
69
- return {
70
- strategy: MutationStrategy.ADD_FEATURE,
71
- rationale: 'Task description requests adding a new feature'
72
- };
73
- }
74
- // 2. Fitness-based heuristic: ถ้า fitness ต่ำ → focus on fixing
75
- if (currentFitness < 0.7) {
76
- return {
77
- strategy: MutationStrategy.FIX_BUG,
78
- rationale: `Low fitness (${(currentFitness * 100).toFixed(1)}%) — prioritizing stability fixes`
79
- };
80
- }
81
- // 3. Archive diversity: ตรวจสอบว่า strategy ไหนถูกใช้ไปน้อยสุดใน archive
82
- const recentHistory = archive.getRecentHistory(8);
83
- if (recentHistory.length > 0) {
84
- const strategyCounts = {};
85
- for (const s of Object.values(MutationStrategy)) {
86
- strategyCounts[s] = 0;
87
- }
88
- for (const entry of recentHistory) {
89
- if (entry.mutationStrategy && strategyCounts[entry.mutationStrategy] !== undefined) {
90
- strategyCounts[entry.mutationStrategy]++;
91
- }
92
- }
93
- // เลือก strategy ที่ถูกใช้น้อยสุด (diversity promotion)
94
- const leastUsed = Object.entries(strategyCounts)
95
- .sort((a, b) => a[1] - b[1])[0];
96
- if (leastUsed && leastUsed[1] < 2) {
97
- return {
98
- strategy: leastUsed[0],
99
- rationale: `Diversity promotion: "${leastUsed[0]}" has been used least (${leastUsed[1]}x in recent history)`
100
- };
101
- }
102
- }
103
- // 4. Default: ADD_FEATURE (DGM paper prefers expansive mutations)
104
- return {
105
- strategy: MutationStrategy.ADD_FEATURE,
106
- rationale: 'Default open-ended evolution strategy: expanding capabilities'
107
- };
108
- }
109
- /**
110
- * สร้าง task prompt เฉพาะ mutation strategy
111
- * ให้ Agent ทราบว่ากำลังทำ mutation ประเภทใด
112
- */
113
- function generateMutationPrompt(strategy, originalTask, context) {
114
- const parentInfo = context?.parentEntry
115
- ? `\n[DGM Context] Building upon parent snapshot: ${context.parentEntry.id} (fitness: ${(context.parentEntry.fitness * 100).toFixed(1)}%)`
116
- : '';
117
- const fitnessInfo = context?.currentFitness !== undefined
118
- ? `\n[DGM Context] Current system fitness: ${(context.currentFitness * 100).toFixed(1)}% (test pass rate)`
119
- : '';
120
- const fileInfo = context?.targetFile
121
- ? `\n[DGM Context] Primary target file: ${context.targetFile}`
122
- : '';
123
- const dgmHeader = `[DGM Mutation: ${strategy.toUpperCase()}]${parentInfo}${fitnessInfo}${fileInfo}\n\n`;
124
- switch (strategy) {
125
- case MutationStrategy.ADD_FEATURE:
126
- return dgmHeader +
127
- `Your mutation goal is to ADD A NEW FEATURE. Implement the following capability:\n${originalTask}\n\n` +
128
- `Guidelines:\n` +
129
- `- Create new files or add new exported functions/classes as needed\n` +
130
- `- Do not break existing functionality\n` +
131
- `- Ensure all new code compiles and existing tests still pass\n` +
132
- `- The feature should integrate cleanly with the existing codebase`;
133
- case MutationStrategy.REFACTOR:
134
- return dgmHeader +
135
- `Your mutation goal is to REFACTOR existing code for better quality:\n${originalTask}\n\n` +
136
- `Guidelines:\n` +
137
- `- Improve readability, reduce duplication, or strengthen type safety\n` +
138
- `- Preserve all existing behavior exactly (zero functional change)\n` +
139
- `- All existing tests must still pass after refactoring`;
140
- case MutationStrategy.OPTIMIZE:
141
- return dgmHeader +
142
- `Your mutation goal is to OPTIMIZE for performance or efficiency:\n${originalTask}\n\n` +
143
- `Guidelines:\n` +
144
- `- Focus on reducing token usage, execution time, or memory\n` +
145
- `- Preserve correctness — all tests must still pass\n` +
146
- `- Measure and report the optimization impact if possible`;
147
- case MutationStrategy.ADD_TESTS:
148
- return dgmHeader +
149
- `Your mutation goal is to ADD UNIT TESTS to improve coverage:\n${originalTask}\n\n` +
150
- `Guidelines:\n` +
151
- `- Add tests to src/tests/dynamic/ directory as .ts files\n` +
152
- `- Tests must export a default function or a run() function\n` +
153
- `- Prioritize testing edge cases and untested public functions\n` +
154
- `- Do not modify existing test files (protected by Campbell Regime)`;
155
- case MutationStrategy.FIX_BUG:
156
- return dgmHeader +
157
- `Your mutation goal is to FIX A BUG or stability issue:\n${originalTask}\n\n` +
158
- `Guidelines:\n` +
159
- `- Diagnose the root cause carefully before making changes\n` +
160
- `- Make the minimal change required to fix the issue\n` +
161
- `- Add a regression test if appropriate\n` +
162
- `- All tests must pass after the fix`;
163
- case MutationStrategy.IMPROVE_PROMPT:
164
- return dgmHeader +
165
- `Your mutation goal is to IMPROVE AGENT PROMPTS for better performance:\n${originalTask}\n\n` +
166
- `Guidelines:\n` +
167
- `- Edit src/orchestrator/system-prompt.txt or src/orchestrator/agent-prompts.ts\n` +
168
- `- Make prompts more concise, clear, and effective\n` +
169
- `- Preserve all existing agent capabilities\n` +
170
- `- Build compiles and tests pass after changes`;
171
- default:
172
- return dgmHeader + originalTask;
173
- }
174
- }
@@ -1,102 +0,0 @@
1
- "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
35
- Object.defineProperty(exports, "__esModule", { value: true });
36
- exports.calculatePromptScore = calculatePromptScore;
37
- exports.benchmarkPrompt = benchmarkPrompt;
38
- const fs = __importStar(require("fs"));
39
- const path = __importStar(require("path"));
40
- const shell_tools_1 = require("../tools/shell-tools");
41
- const task_agent_1 = require("./task-agent");
42
- /**
43
- * Calculates a prompt utility score where higher is better.
44
- * Combines time and token cost with configurable weights.
45
- */
46
- function calculatePromptScore(timeSeconds, totalTokens) {
47
- const timeWeight = 0.4;
48
- const tokenWeight = 0.6;
49
- if (totalTokens === 0 || timeSeconds === 0)
50
- return 0;
51
- return 100000 / (timeSeconds * timeWeight + totalTokens * tokenWeight);
52
- }
53
- /**
54
- * Benchmarks a system prompt on a standard task.
55
- * Returns token usage, execution time, and compilation success.
56
- * The workspace is reset before and after the benchmark to avoid side-effects.
57
- */
58
- async function benchmarkPrompt(systemPrompt) {
59
- const benchmarkTask = "Create a typescript helper file src/tools/math-helper.ts that exports a sum(a: number, b: number) function. Ensure it compiles. Do not change any other files.";
60
- // Clear any previous math-helper.ts file
61
- const helperPath = path.join(process.cwd(), 'src/tools/math-helper.ts');
62
- if (fs.existsSync(helperPath)) {
63
- fs.unlinkSync(helperPath);
64
- }
65
- // Reset workspace to clean baseline
66
- await (0, shell_tools_1.runCommand)('git reset --hard HEAD');
67
- await (0, shell_tools_1.runCommand)('git clean -fd');
68
- const startTime = Date.now();
69
- let success = false;
70
- let inputTokens = 0;
71
- let outputTokens = 0;
72
- let timeSeconds = 0;
73
- try {
74
- // Delegate to Task Agent directly with the specified system prompt override
75
- const report = await (0, task_agent_1.runTaskAgent)(benchmarkTask, { systemPrompt });
76
- timeSeconds = report.timeSeconds;
77
- inputTokens = report.inputTokens;
78
- outputTokens = report.outputTokens;
79
- // Verify compile
80
- const buildRes = await (0, shell_tools_1.runCommand)('npm run build');
81
- if (buildRes.exitCode === 0) {
82
- success = true;
83
- }
84
- }
85
- catch (err) {
86
- // Failed to execute task or compile
87
- }
88
- finally {
89
- // Clean up changes
90
- if (fs.existsSync(helperPath)) {
91
- fs.unlinkSync(helperPath);
92
- }
93
- await (0, shell_tools_1.runCommand)('git reset --hard HEAD');
94
- await (0, shell_tools_1.runCommand)('git clean -fd');
95
- }
96
- if (!timeSeconds) {
97
- timeSeconds = (Date.now() - startTime) / 1000;
98
- }
99
- const totalTokens = inputTokens + outputTokens;
100
- const score = success ? calculatePromptScore(timeSeconds, totalTokens) : 0;
101
- return { success, time: timeSeconds, tokens: totalTokens, score };
102
- }