closed-loop-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of closed-loop-cli might be problematic. Click here for more details.
- package/dist/dashboard/server.js +237 -0
- package/dist/index.js +272 -0
- package/dist/orchestrator/agent-prompts.js +42 -0
- package/dist/orchestrator/autogenesis.js +973 -0
- package/dist/orchestrator/dgm-archive.js +223 -0
- package/dist/orchestrator/event-stream.js +103 -0
- package/dist/orchestrator/fitness-evaluator.js +99 -0
- package/dist/orchestrator/meta-agent.js +421 -0
- package/dist/orchestrator/microagent-registry.js +134 -0
- package/dist/orchestrator/mutation-strategies.js +174 -0
- package/dist/orchestrator/prompt-benchmark.js +102 -0
- package/dist/orchestrator/prompt-optimizer.js +169 -0
- package/dist/orchestrator/refactor-scanner.js +222 -0
- package/dist/orchestrator/research-manager.js +104 -0
- package/dist/orchestrator/rulez.js +135 -0
- package/dist/orchestrator/sahoo-gateway.js +261 -0
- package/dist/orchestrator/state-manager.js +121 -0
- package/dist/orchestrator/task-agent.js +444 -0
- package/dist/orchestrator/telegram-bot.js +374 -0
- package/dist/orchestrator/types.js +2 -0
- package/dist/tests/dynamic/dependencies.test.js +37 -0
- package/dist/tests/dynamic/dummy.test.js +7 -0
- package/dist/tests/dynamic/fuzzy-patch.test.js +68 -0
- package/dist/tests/dynamic/indexer.test.js +60 -0
- package/dist/tests/dynamic/openhands.test.js +83 -0
- package/dist/tests/dynamic/skills.test.js +88 -0
- package/dist/tests/run-tests.js +294 -0
- package/dist/tools/diff-tools.js +24 -0
- package/dist/tools/file-tools.js +191 -0
- package/dist/tools/indexer.js +301 -0
- package/dist/tools/math-helper.js +6 -0
- package/dist/tools/repo-map.js +122 -0
- package/dist/tools/search-tools.js +271 -0
- package/dist/tools/shell-tools.js +75 -0
- package/dist/tools/skills.js +122 -0
- package/dist/tools/tui-tools.js +82 -0
- package/docs/AI_Arch_Opt_Anti_Gaming.md +227 -0
- package/docs/AI_Self_Improvement_Safety.md +457 -0
- package/docs/Anthropic AI Agents_ Capabilities and Concerns.md +134 -0
- package/docs/Auto_ClosedLoop_AI_Agent.md +415 -0
- package/docs/Autonomous AI Agents_ Closing the Loop.docx +0 -0
- package/docs/Secure_AI_Sandbox_Framework.md +358 -0
- package/docs/skills/add-file-existence-check-utility.json +9 -0
- package/docs/skills/add-utility-function-for-file-existence-check.json +9 -0
- package/docs/skills/add-utility-function-to-module.json +9 -0
- package/docs/skills/extract-command-runner-utility.json +9 -0
- package/docs/skills/file-existence-check-utility.json +9 -0
- package/package.json +36 -0
- package/src/dashboard/public/index.css +1334 -0
- package/src/dashboard/public/index.html +385 -0
- package/src/dashboard/public/index.js +1059 -0
- package/src/dashboard/server.ts +209 -0
- package/src/index.ts +256 -0
- package/src/orchestrator/agent-prompts.ts +43 -0
- package/src/orchestrator/autogenesis.ts +1078 -0
- package/src/orchestrator/dgm-archive.ts +257 -0
- package/src/orchestrator/event-stream.ts +90 -0
- package/src/orchestrator/fitness-evaluator.ts +154 -0
- package/src/orchestrator/meta-agent.ts +434 -0
- package/src/orchestrator/microagent-registry.ts +115 -0
- package/src/orchestrator/microagents/git-helper.md +11 -0
- package/src/orchestrator/microagents/test-fixer.md +10 -0
- package/src/orchestrator/microagents/typescript-expert.md +11 -0
- package/src/orchestrator/mutation-strategies.ts +214 -0
- package/src/orchestrator/research-manager.ts +88 -0
- package/src/orchestrator/rulez.ts +118 -0
- package/src/orchestrator/sahoo-gateway.ts +300 -0
- package/src/orchestrator/state-manager.ts +161 -0
- package/src/orchestrator/system-prompt.txt +1 -0
- package/src/orchestrator/task-agent.ts +461 -0
- package/src/orchestrator/telegram-bot.ts +358 -0
- package/src/tests/dynamic/dependencies.test.ts +48 -0
- package/src/tests/dynamic/dummy.test.ts +4 -0
- package/src/tests/dynamic/fuzzy-patch.test.ts +42 -0
- package/src/tests/dynamic/indexer.test.ts +31 -0
- package/src/tests/dynamic/openhands.test.ts +59 -0
- package/src/tests/dynamic/skills.test.ts +63 -0
- package/src/tests/run-tests.ts +296 -0
- package/src/tools/diff-tools.ts +27 -0
- package/src/tools/file-tools.ts +187 -0
- package/src/tools/indexer.ts +325 -0
- package/src/tools/repo-map.ts +96 -0
- package/src/tools/search-tools.ts +258 -0
- package/src/tools/shell-tools.ts +90 -0
- package/src/tools/skills.ts +101 -0
- package/src/tools/tui-tools.ts +87 -0
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
import * as fs from 'fs';
|
|
2
|
+
import * as path from 'path';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Darwin Gödel Machine (DGM) — Archive Entry
|
|
6
|
+
* แต่ละ entry คือ snapshot ของ agent ณ จุดหนึ่งใน evolutionary lineage
|
|
7
|
+
*/
|
|
8
|
+
export interface ArchiveEntry {
|
|
9
|
+
/** Unique identifier สำหรับ snapshot นี้ */
|
|
10
|
+
id: string;
|
|
11
|
+
/** Git commit hash ที่ snapshot นี้ถูก commit ไว้ */
|
|
12
|
+
commitHash: string;
|
|
13
|
+
/** ID ของ parent entry ที่สร้าง snapshot นี้ขึ้นมา (null = origin) */
|
|
14
|
+
parentId: string | null;
|
|
15
|
+
/** Empirical fitness score (0–1) วัดจาก test pass rate */
|
|
16
|
+
fitness: number;
|
|
17
|
+
/** Mutation strategy ที่ถูกใช้เพื่อสร้าง snapshot นี้ */
|
|
18
|
+
mutationStrategy: string;
|
|
19
|
+
/** Task description ที่ mutation นี้ถูก apply สำหรับ */
|
|
20
|
+
task: string;
|
|
21
|
+
/** Timestamp ของการสร้าง snapshot */
|
|
22
|
+
timestamp: string;
|
|
23
|
+
/** Metadata เพิ่มเติม (test counts, metrics, etc.) */
|
|
24
|
+
metadata: {
|
|
25
|
+
passCount?: number;
|
|
26
|
+
failCount?: number;
|
|
27
|
+
totalTests?: number;
|
|
28
|
+
passRate?: number;
|
|
29
|
+
goalDriftIndex?: number;
|
|
30
|
+
constraintPreservationScore?: number;
|
|
31
|
+
[key: string]: unknown;
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Summary statistics ของ Archive ปัจจุบัน
|
|
37
|
+
*/
|
|
38
|
+
export interface ArchiveStats {
|
|
39
|
+
totalEntries: number;
|
|
40
|
+
bestFitness: number;
|
|
41
|
+
averageFitness: number;
|
|
42
|
+
bestEntryId: string | null;
|
|
43
|
+
latestEntryId: string | null;
|
|
44
|
+
generationDepth: number;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const ARCHIVE_FILENAME = 'dgm-archive.json';
|
|
48
|
+
const DEFAULT_MAX_SIZE = 20;
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* DGM Archive Manager
|
|
52
|
+
*
|
|
53
|
+
* เก็บ population ของ agent snapshots และจัดการ selection สำหรับ
|
|
54
|
+
* Darwin Gödel Machine evolution loop ตาม paper:
|
|
55
|
+
* "Darwin Gödel Machine: Open-Ended Evolution of Self-Improving Agents"
|
|
56
|
+
*/
|
|
57
|
+
export class DGMArchive {
|
|
58
|
+
private archivePath: string;
|
|
59
|
+
private entries: ArchiveEntry[];
|
|
60
|
+
|
|
61
|
+
constructor(workspaceRoot?: string) {
|
|
62
|
+
const root = workspaceRoot || process.cwd();
|
|
63
|
+
this.archivePath = path.join(root, ARCHIVE_FILENAME);
|
|
64
|
+
this.entries = this.load();
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// ─────────────────────────────────────────
|
|
68
|
+
// Persistence
|
|
69
|
+
// ─────────────────────────────────────────
|
|
70
|
+
|
|
71
|
+
private load(): ArchiveEntry[] {
|
|
72
|
+
if (!fs.existsSync(this.archivePath)) {
|
|
73
|
+
return [];
|
|
74
|
+
}
|
|
75
|
+
try {
|
|
76
|
+
const raw = fs.readFileSync(this.archivePath, 'utf-8');
|
|
77
|
+
const parsed = JSON.parse(raw);
|
|
78
|
+
return Array.isArray(parsed.entries) ? parsed.entries : [];
|
|
79
|
+
} catch {
|
|
80
|
+
return [];
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
private save(): void {
|
|
85
|
+
const data = {
|
|
86
|
+
version: '1.0.0',
|
|
87
|
+
lastUpdated: new Date().toISOString(),
|
|
88
|
+
entries: this.entries
|
|
89
|
+
};
|
|
90
|
+
try {
|
|
91
|
+
fs.writeFileSync(this.archivePath, JSON.stringify(data, null, 2), 'utf-8');
|
|
92
|
+
} catch (err) {
|
|
93
|
+
console.error('[DGMArchive] Failed to save archive:', err);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// ─────────────────────────────────────────
|
|
98
|
+
// Core Operations
|
|
99
|
+
// ─────────────────────────────────────────
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* เพิ่ม snapshot ใหม่เข้า archive
|
|
103
|
+
* @returns entry ที่เพิ่งเพิ่มเข้าไป
|
|
104
|
+
*/
|
|
105
|
+
addEntry(entry: Omit<ArchiveEntry, 'id' | 'timestamp'>): ArchiveEntry {
|
|
106
|
+
const newEntry: ArchiveEntry = {
|
|
107
|
+
...entry,
|
|
108
|
+
id: this.generateId(),
|
|
109
|
+
timestamp: new Date().toISOString()
|
|
110
|
+
};
|
|
111
|
+
this.entries.push(newEntry);
|
|
112
|
+
this.save();
|
|
113
|
+
return newEntry;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* เลือก parent entry สำหรับ mutation รอบต่อไป
|
|
118
|
+
* ใช้ fitness-weighted sampling (DGM parent selection strategy)
|
|
119
|
+
*
|
|
120
|
+
* @param strategy 'fitness_weighted' | 'best' | 'random'
|
|
121
|
+
*/
|
|
122
|
+
getBestParent(strategy: 'fitness_weighted' | 'best' | 'random' = 'fitness_weighted'): ArchiveEntry | null {
|
|
123
|
+
if (this.entries.length === 0) return null;
|
|
124
|
+
|
|
125
|
+
if (strategy === 'best') {
|
|
126
|
+
return [...this.entries].sort((a, b) => b.fitness - a.fitness)[0];
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
if (strategy === 'random') {
|
|
130
|
+
return this.entries[Math.floor(Math.random() * this.entries.length)];
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// fitness_weighted: สุ่มโดยน้ำหนักตาม fitness score (DGM default)
|
|
134
|
+
return this.fitnessWeightedSample();
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Trace lineage จาก entry ไปถึง origin
|
|
139
|
+
* @returns ordered array ตั้งแต่ origin จนถึง entry นี้
|
|
140
|
+
*/
|
|
141
|
+
getLineage(entryId: string): ArchiveEntry[] {
|
|
142
|
+
const lineage: ArchiveEntry[] = [];
|
|
143
|
+
let current = this.findById(entryId);
|
|
144
|
+
|
|
145
|
+
while (current) {
|
|
146
|
+
lineage.unshift(current);
|
|
147
|
+
if (!current.parentId) break;
|
|
148
|
+
current = this.findById(current.parentId);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
return lineage;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* ตัด entries เก่าเพื่อป้องกัน archive ใหญ่เกินไป
|
|
156
|
+
* จะเก็บ entries ที่มี fitness สูงสุดไว้ก่อน
|
|
157
|
+
*/
|
|
158
|
+
pruneOldEntries(maxSize: number = DEFAULT_MAX_SIZE): number {
|
|
159
|
+
if (this.entries.length <= maxSize) return 0;
|
|
160
|
+
|
|
161
|
+
// เรียง fitness สูงสุดก่อน แล้วตัดส่วนที่เกิน
|
|
162
|
+
const sorted = [...this.entries].sort((a, b) => b.fitness - a.fitness);
|
|
163
|
+
const pruned = this.entries.length - maxSize;
|
|
164
|
+
this.entries = sorted.slice(0, maxSize);
|
|
165
|
+
this.save();
|
|
166
|
+
return pruned;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// ─────────────────────────────────────────
|
|
170
|
+
// Query / Stats
|
|
171
|
+
// ─────────────────────────────────────────
|
|
172
|
+
|
|
173
|
+
getAll(): ArchiveEntry[] {
|
|
174
|
+
return [...this.entries];
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
findById(id: string): ArchiveEntry | undefined {
|
|
178
|
+
return this.entries.find(e => e.id === id);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
getBestEntry(): ArchiveEntry | null {
|
|
182
|
+
if (this.entries.length === 0) return null;
|
|
183
|
+
return [...this.entries].sort((a, b) => b.fitness - a.fitness)[0];
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
getStats(): ArchiveStats {
|
|
187
|
+
if (this.entries.length === 0) {
|
|
188
|
+
return {
|
|
189
|
+
totalEntries: 0,
|
|
190
|
+
bestFitness: 0,
|
|
191
|
+
averageFitness: 0,
|
|
192
|
+
bestEntryId: null,
|
|
193
|
+
latestEntryId: null,
|
|
194
|
+
generationDepth: 0
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
const sorted = [...this.entries].sort((a, b) => b.fitness - a.fitness);
|
|
199
|
+
const best = sorted[0];
|
|
200
|
+
const latest = [...this.entries].sort(
|
|
201
|
+
(a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime()
|
|
202
|
+
)[0];
|
|
203
|
+
|
|
204
|
+
const avgFitness =
|
|
205
|
+
this.entries.reduce((sum, e) => sum + e.fitness, 0) / this.entries.length;
|
|
206
|
+
|
|
207
|
+
const depth = latest ? this.getLineage(latest.id).length : 0;
|
|
208
|
+
|
|
209
|
+
return {
|
|
210
|
+
totalEntries: this.entries.length,
|
|
211
|
+
bestFitness: best.fitness,
|
|
212
|
+
averageFitness: Math.round(avgFitness * 1000) / 1000,
|
|
213
|
+
bestEntryId: best.id,
|
|
214
|
+
latestEntryId: latest?.id ?? null,
|
|
215
|
+
generationDepth: depth
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* ดึง history ล่าสุด N entries (เรียงตามเวลา)
|
|
221
|
+
*/
|
|
222
|
+
getRecentHistory(n: number = 10): ArchiveEntry[] {
|
|
223
|
+
return [...this.entries]
|
|
224
|
+
.sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime())
|
|
225
|
+
.slice(0, n);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// ─────────────────────────────────────────
|
|
229
|
+
// Private Helpers
|
|
230
|
+
// ─────────────────────────────────────────
|
|
231
|
+
|
|
232
|
+
/**
|
|
233
|
+
* Fitness-weighted random sampling (Roulette Wheel Selection)
|
|
234
|
+
* สูตรจาก DGM paper: เลือก parent ตาม probability ∝ fitness
|
|
235
|
+
*/
|
|
236
|
+
private fitnessWeightedSample(): ArchiveEntry {
|
|
237
|
+
const totalFitness = this.entries.reduce((sum, e) => sum + Math.max(e.fitness, 0.01), 0);
|
|
238
|
+
let r = Math.random() * totalFitness;
|
|
239
|
+
|
|
240
|
+
for (const entry of this.entries) {
|
|
241
|
+
r -= Math.max(entry.fitness, 0.01);
|
|
242
|
+
if (r <= 0) return entry;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// fallback: คืน entry สุดท้าย
|
|
246
|
+
return this.entries[this.entries.length - 1];
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* สร้าง unique ID แบบ timestamp + random
|
|
251
|
+
*/
|
|
252
|
+
private generateId(): string {
|
|
253
|
+
const ts = Date.now().toString(36);
|
|
254
|
+
const rand = Math.random().toString(36).substring(2, 6);
|
|
255
|
+
return `dgm_${ts}_${rand}`;
|
|
256
|
+
}
|
|
257
|
+
}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import * as fs from 'fs';
|
|
2
|
+
import * as path from 'path';
|
|
3
|
+
|
|
4
|
+
export interface EventStreamMessage {
|
|
5
|
+
id: string;
|
|
6
|
+
timestamp: string;
|
|
7
|
+
source: 'user' | 'agent' | 'environment' | 'system';
|
|
8
|
+
type: 'message' | 'action' | 'observation' | 'log';
|
|
9
|
+
name: string;
|
|
10
|
+
content: string;
|
|
11
|
+
metadata?: any;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export class EventStream {
|
|
15
|
+
private static instance: EventStream;
|
|
16
|
+
private filePath: string;
|
|
17
|
+
private events: EventStreamMessage[] = [];
|
|
18
|
+
|
|
19
|
+
private constructor() {
|
|
20
|
+
const workspaceRoot = process.cwd();
|
|
21
|
+
this.filePath = path.join(workspaceRoot, 'event-stream-history.json');
|
|
22
|
+
this.loadEvents();
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
public static getInstance(): EventStream {
|
|
26
|
+
if (!EventStream.instance) {
|
|
27
|
+
EventStream.instance = new EventStream();
|
|
28
|
+
}
|
|
29
|
+
return EventStream.instance;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
private loadEvents() {
|
|
33
|
+
try {
|
|
34
|
+
if (fs.existsSync(this.filePath)) {
|
|
35
|
+
const data = fs.readFileSync(this.filePath, 'utf-8');
|
|
36
|
+
this.events = JSON.parse(data);
|
|
37
|
+
} else {
|
|
38
|
+
this.events = [];
|
|
39
|
+
}
|
|
40
|
+
} catch (e) {
|
|
41
|
+
console.error('[EventStream] Failed to load event stream history:', e);
|
|
42
|
+
this.events = [];
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
private saveEvents() {
|
|
47
|
+
try {
|
|
48
|
+
fs.writeFileSync(this.filePath, JSON.stringify(this.events, null, 2), 'utf-8');
|
|
49
|
+
} catch (e) {
|
|
50
|
+
console.error('[EventStream] Failed to save event stream history:', e);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
public publish(
|
|
55
|
+
source: 'user' | 'agent' | 'environment' | 'system',
|
|
56
|
+
type: 'message' | 'action' | 'observation' | 'log',
|
|
57
|
+
name: string,
|
|
58
|
+
content: string,
|
|
59
|
+
metadata?: any
|
|
60
|
+
): EventStreamMessage {
|
|
61
|
+
const event: EventStreamMessage = {
|
|
62
|
+
id: Math.random().toString(36).substring(2, 11),
|
|
63
|
+
timestamp: new Date().toISOString(),
|
|
64
|
+
source,
|
|
65
|
+
type,
|
|
66
|
+
name,
|
|
67
|
+
content,
|
|
68
|
+
metadata
|
|
69
|
+
};
|
|
70
|
+
this.events.push(event);
|
|
71
|
+
|
|
72
|
+
// Cap at 1000 events to prevent massive file sizes
|
|
73
|
+
if (this.events.length > 1000) {
|
|
74
|
+
this.events.shift();
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
this.saveEvents();
|
|
78
|
+
|
|
79
|
+
return event;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
public getEvents(): EventStreamMessage[] {
|
|
83
|
+
return this.events;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
public clear() {
|
|
87
|
+
this.events = [];
|
|
88
|
+
this.saveEvents();
|
|
89
|
+
}
|
|
90
|
+
}
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import { runCommand } from '../tools/shell-tools';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Fitness Report จากการ evaluate โดย unit tests
|
|
5
|
+
* Fitness = empirical test pass rate (0–1) ตาม DGM paper concept
|
|
6
|
+
*/
|
|
7
|
+
export interface FitnessReport {
|
|
8
|
+
/** Pass rate (0–1) = passCount / totalTests */
|
|
9
|
+
passRate: number;
|
|
10
|
+
/** จำนวน tests ที่ผ่าน */
|
|
11
|
+
passCount: number;
|
|
12
|
+
/** จำนวน tests ที่ fail */
|
|
13
|
+
failCount: number;
|
|
14
|
+
/** จำนวน tests ทั้งหมด */
|
|
15
|
+
totalTests: number;
|
|
16
|
+
/** Composite fitness score (0–1) รวม compile status */
|
|
17
|
+
score: number;
|
|
18
|
+
/** Compilation ผ่านหรือไม่ */
|
|
19
|
+
compileSuccess: boolean;
|
|
20
|
+
/** สรุปผล */
|
|
21
|
+
summary: string;
|
|
22
|
+
/** Raw output จาก test runner */
|
|
23
|
+
rawOutput?: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* ผลเปรียบเทียบ fitness ระหว่าง baseline กับ candidate
|
|
28
|
+
*/
|
|
29
|
+
export interface FitnessComparison {
|
|
30
|
+
improved: boolean;
|
|
31
|
+
delta: number;
|
|
32
|
+
baseline: FitnessReport;
|
|
33
|
+
candidate: FitnessReport;
|
|
34
|
+
message: string;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* วัด empirical fitness จาก test suite ปัจจุบัน
|
|
39
|
+
*
|
|
40
|
+
* DGM approach: fitness วัดจากผล unit test จริง ไม่ใช่ heuristic เพียงอย่างเดียว
|
|
41
|
+
* "Each new agent version is evaluated empirically using coding benchmarks"
|
|
42
|
+
*/
|
|
43
|
+
export async function evaluateFitness(): Promise<FitnessReport> {
|
|
44
|
+
// Step 1: ตรวจ compile
|
|
45
|
+
const compileRes = await runCommand('npm run build');
|
|
46
|
+
const compileSuccess = compileRes.exitCode === 0;
|
|
47
|
+
|
|
48
|
+
if (!compileSuccess) {
|
|
49
|
+
return {
|
|
50
|
+
passRate: 0,
|
|
51
|
+
passCount: 0,
|
|
52
|
+
failCount: 0,
|
|
53
|
+
totalTests: 0,
|
|
54
|
+
score: 0,
|
|
55
|
+
compileSuccess: false,
|
|
56
|
+
summary: `Compilation failed — fitness = 0`,
|
|
57
|
+
rawOutput: compileRes.stdout + compileRes.stderr
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Step 2: run tests
|
|
62
|
+
const testRes = await runCommand('npm test');
|
|
63
|
+
const rawOutput = (testRes.stdout || '') + (testRes.stderr || '');
|
|
64
|
+
|
|
65
|
+
// Step 3: parse test output
|
|
66
|
+
const parsed = parseTestOutput(rawOutput);
|
|
67
|
+
|
|
68
|
+
// Step 4: คำนวณ composite score
|
|
69
|
+
// score = pass_rate ถ้า compile ผ่าน, 0 ถ้า compile fail
|
|
70
|
+
const score = compileSuccess ? parsed.passRate : 0;
|
|
71
|
+
|
|
72
|
+
const summary =
|
|
73
|
+
`Compile: ${compileSuccess ? 'PASS' : 'FAIL'} | ` +
|
|
74
|
+
`Tests: ${parsed.passCount}/${parsed.totalTests} passed ` +
|
|
75
|
+
`(${(parsed.passRate * 100).toFixed(1)}%) | Score: ${score.toFixed(3)}`;
|
|
76
|
+
|
|
77
|
+
return {
|
|
78
|
+
...parsed,
|
|
79
|
+
score,
|
|
80
|
+
compileSuccess,
|
|
81
|
+
summary,
|
|
82
|
+
rawOutput
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* เปรียบเทียบ fitness ระหว่าง baseline snapshot กับ candidate snapshot
|
|
88
|
+
*/
|
|
89
|
+
export function compareToBaseline(
|
|
90
|
+
candidate: FitnessReport,
|
|
91
|
+
baseline: FitnessReport
|
|
92
|
+
): FitnessComparison {
|
|
93
|
+
const delta = candidate.score - baseline.score;
|
|
94
|
+
const improved = delta > 0;
|
|
95
|
+
|
|
96
|
+
const message = improved
|
|
97
|
+
? `✔ Fitness improved: ${baseline.score.toFixed(3)} → ${candidate.score.toFixed(3)} (+${delta.toFixed(3)})`
|
|
98
|
+
: delta === 0
|
|
99
|
+
? `= Fitness unchanged: ${candidate.score.toFixed(3)}`
|
|
100
|
+
: `✘ Fitness regressed: ${baseline.score.toFixed(3)} → ${candidate.score.toFixed(3)} (${delta.toFixed(3)})`;
|
|
101
|
+
|
|
102
|
+
return { improved, delta, baseline, candidate, message };
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Parse test runner output เพื่อนับ pass/fail counts
|
|
107
|
+
* รองรับ format ของ test runner ปัจจุบันในโปรเจกต์
|
|
108
|
+
*/
|
|
109
|
+
export function parseTestOutput(output: string): Pick<
|
|
110
|
+
FitnessReport,
|
|
111
|
+
'passRate' | 'passCount' | 'failCount' | 'totalTests'
|
|
112
|
+
> {
|
|
113
|
+
let passCount = 0;
|
|
114
|
+
let failCount = 0;
|
|
115
|
+
|
|
116
|
+
// Pattern 1: "[Pass] <test name>" ซึ่งเป็น format ของ run-tests.ts ปัจจุบัน
|
|
117
|
+
const passMatches = output.match(/\[Pass\]/g);
|
|
118
|
+
const failMatches = output.match(/\[Fail\]/g);
|
|
119
|
+
|
|
120
|
+
if (passMatches || failMatches) {
|
|
121
|
+
passCount = passMatches ? passMatches.length : 0;
|
|
122
|
+
failCount = failMatches ? failMatches.length : 0;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Pattern 2: Mocha format — "X passing" / "X failing"
|
|
126
|
+
const mochaPassing = output.match(/(\d+)\s+passing/i);
|
|
127
|
+
const mochaFailing = output.match(/(\d+)\s+failing/i);
|
|
128
|
+
|
|
129
|
+
if (mochaPassing || mochaFailing) {
|
|
130
|
+
passCount = mochaPassing ? parseInt(mochaPassing[1], 10) : passCount;
|
|
131
|
+
failCount = mochaFailing ? parseInt(mochaFailing[1], 10) : failCount;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Pattern 3: Jest format — "Tests: X passed, Y failed"
|
|
135
|
+
const jestLine = output.match(/Tests:\s+(\d+)\s+passed(?:,\s+(\d+)\s+failed)?/i);
|
|
136
|
+
if (jestLine) {
|
|
137
|
+
passCount = parseInt(jestLine[1], 10) || passCount;
|
|
138
|
+
failCount = jestLine[2] ? parseInt(jestLine[2], 10) : failCount;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Pattern 4: "ALL BINARY ASSERTION TESTS PASSED" — หมายถึง 100% ผ่าน
|
|
142
|
+
if (output.includes('ALL BINARY ASSERTION TESTS PASSED')) {
|
|
143
|
+
// ถ้าตรวจจับ pass ไม่ได้ชัดเจนให้ fallback = ผ่านทั้งหมด
|
|
144
|
+
if (passCount === 0 && failCount === 0) {
|
|
145
|
+
passCount = 11; // จำนวน tests ใน run-tests.ts (Test 1–11)
|
|
146
|
+
failCount = 0;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
const totalTests = passCount + failCount;
|
|
151
|
+
const passRate = totalTests > 0 ? passCount / totalTests : (failCount === 0 ? 1 : 0);
|
|
152
|
+
|
|
153
|
+
return { passRate, passCount, failCount, totalTests };
|
|
154
|
+
}
|