@bolloon/bolloon-agent 0.1.34 → 0.1.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.auto-evolve-calls +1 -0
- package/.last-auto-evolve-baseline +1 -0
- package/Bolloon.md +103 -0
- package/dist/agents/pi-sdk.js +264 -12
- package/dist/bootstrap/bootstrap.js +114 -0
- package/dist/bootstrap/context-collector.js +296 -0
- package/dist/bootstrap/lifecycle-hooks.js +109 -0
- package/dist/bootstrap/project-context.js +151 -0
- package/dist/index.js +11 -0
- package/dist/llm/pi-ai.js +31 -21
- package/dist/pi-ecosystem-judgment/adaptive-scan.js +231 -0
- package/dist/pi-ecosystem-judgment/causal-judge.js +449 -0
- package/dist/pi-ecosystem-judgment/detect-hook.js +168 -0
- package/dist/pi-ecosystem-judgment/distill-prompt.js +226 -0
- package/dist/pi-ecosystem-judgment/evolve-judgment.js +170 -0
- package/dist/pi-ecosystem-judgment/human-value-pipeline.js +21 -0
- package/dist/pi-ecosystem-judgment/human-value-store.js +283 -22
- package/dist/pi-ecosystem-judgment/injection-gate.js +166 -0
- package/dist/pi-ecosystem-judgment/monitor-gate.js +188 -0
- package/dist/security/builtin-guards.js +124 -0
- package/dist/security/context-router-tool.js +106 -0
- package/dist/security/react-harness.js +143 -0
- package/dist/security/tool-gate.js +235 -0
- package/dist/utils/auto-evolve-policy.js +117 -0
- package/dist/utils/clamp.js +7 -0
- package/dist/utils/double.js +6 -0
- package/dist/web/client.js +668 -204
- package/dist/web/index.html +24 -4
- package/dist/web/server.js +531 -10
- package/lefthook.yml +29 -0
- package/package.json +3 -2
- package/scripts/auto-evolve-loop.ts +376 -0
- package/scripts/auto-evolve-oneshot.sh +155 -0
- package/scripts/auto-evolve-snapshot.sh +136 -0
- package/scripts/detect-schema-changes.sh +48 -0
- package/scripts/diff-reviewer.ts +159 -0
- package/scripts/weekly-report.ts +364 -0
- package/src/agents/pi-sdk.ts +293 -15
- package/src/bootstrap/bootstrap.ts +132 -0
- package/src/bootstrap/context-collector.ts +342 -0
- package/src/bootstrap/lifecycle-hooks.ts +176 -0
- package/src/bootstrap/project-context.ts +163 -0
- package/src/index.ts +11 -0
- package/src/llm/pi-ai.ts +33 -22
- package/src/security/builtin-guards.ts +162 -0
- package/src/security/context-router-tool.ts +122 -0
- package/src/security/react-harness.ts +177 -0
- package/src/security/tool-gate.ts +294 -0
- package/src/utils/auto-evolve-policy.ts +138 -0
- package/src/utils/clamp.ts +5 -0
- package/src/web/client.js +668 -204
- package/src/web/index.html +24 -4
- package/src/web/server.ts +596 -10
- package/staging/auto-evolve/clean-001/.review-verdict +9 -0
- package/staging/auto-evolve/clean-001/clean-001.patch +14 -0
- package/staging/auto-evolve/e2e-001/.patch-id +1 -0
- package/staging/auto-evolve/e2e-001/.review-verdict +12 -0
- package/staging/auto-evolve/e2e-001/e2e-001.patch +11 -0
- package/staging/auto-evolve/test-bad/.review-verdict +12 -0
- package/staging/auto-evolve/test-bad/test-bad.patch +11 -0
package/lefthook.yml
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# lefthook.yml — Bolloon 源码护栏 (阶段 C 护栏 1)
|
|
2
|
+
#
|
|
3
|
+
# 用户授权装(2026-06-14 决定阶段 C 护栏全套)
|
|
4
|
+
# 触发: git commit / git push
|
|
5
|
+
# 目的: LLM 自动改的代码在进库前先过 vitest + tsc,坏就拦
|
|
6
|
+
|
|
7
|
+
pre-commit:
|
|
8
|
+
parallel: true
|
|
9
|
+
commands:
|
|
10
|
+
vitest-bail:
|
|
11
|
+
glob: "src/**/*.{ts,tsx,js}"
|
|
12
|
+
run: npx vitest run --bail=1 --reporter=dot
|
|
13
|
+
tsc-check:
|
|
14
|
+
glob: "src/**/*.{ts,tsx}"
|
|
15
|
+
run: npx tsc --noEmit
|
|
16
|
+
|
|
17
|
+
pre-push:
|
|
18
|
+
parallel: true
|
|
19
|
+
commands:
|
|
20
|
+
vitest-full:
|
|
21
|
+
run: npx vitest run --reporter=dot
|
|
22
|
+
build-check:
|
|
23
|
+
run: npm run build:main
|
|
24
|
+
tag-baseline:
|
|
25
|
+
run: |
|
|
26
|
+
if ! git tag -l "auto-evolve-baseline-*" | head -1 | grep -q .; then
|
|
27
|
+
echo "❌ 没有 auto-evolve-baseline-* tag. 先跑: bash scripts/auto-evolve-snapshot.sh"
|
|
28
|
+
exit 1
|
|
29
|
+
fi
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bolloon/bolloon-agent",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.35",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "P2P AI Document Agent - 全局安装后执行 `bolloon` 启动产品",
|
|
6
6
|
"main": "dist/cli.js",
|
|
@@ -32,7 +32,7 @@
|
|
|
32
32
|
"src/constraint-runtime"
|
|
33
33
|
],
|
|
34
34
|
"dependencies": {
|
|
35
|
-
"@bolloon/bolloon-agent": "^0.1.
|
|
35
|
+
"@bolloon/bolloon-agent": "^0.1.35",
|
|
36
36
|
"@bolloon/constraint-runtime": "0.1.0",
|
|
37
37
|
"@chainsafe/libp2p-noise": "^17.0.0",
|
|
38
38
|
"@chainsafe/libp2p-yamux": "^8.0.1",
|
|
@@ -69,6 +69,7 @@
|
|
|
69
69
|
"concurrently": "^9.2.1",
|
|
70
70
|
"electron": "^42.3.0",
|
|
71
71
|
"electron-builder": "^26.8.1",
|
|
72
|
+
"lefthook": "^2.1.9",
|
|
72
73
|
"playwright": "^1.60.0",
|
|
73
74
|
"tsx": "^4.0.0",
|
|
74
75
|
"typescript": "^5.0.0",
|
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
#!/usr/bin/env tsx
|
|
2
|
+
/**
|
|
3
|
+
* auto-evolve-loop.ts — 阶段 D 主循环 (重写版)
|
|
4
|
+
*
|
|
5
|
+
* 流程 (per iteration):
|
|
6
|
+
* 1. 跑 vitest, 拿 fail 信息
|
|
7
|
+
* 2. 把 fail + 当前 src/test 源码喂给 LLM (走 PiAIModel / MiniMax)
|
|
8
|
+
* 3. 写 diff 到 staging/auto-evolve/iter-<N>/
|
|
9
|
+
* 4. 跑 detect-schema-changes + diff-reviewer
|
|
10
|
+
* 5a. reviewer PASS → git apply --recount --whitespace=fix + commit
|
|
11
|
+
* 5b. reviewer FAIL → log, 累计连续失败
|
|
12
|
+
* 6. 连续 3 次 FAIL → 自动回滚 baseline + 通知
|
|
13
|
+
*
|
|
14
|
+
* 退出条件 (任一即停):
|
|
15
|
+
* - 全部测试 PASS (loop 成功)
|
|
16
|
+
* - 连续 3 次 FAIL (loop 认输)
|
|
17
|
+
* - max-iter 达到 (默认 10)
|
|
18
|
+
* - 人类 SIGINT (Ctrl-C)
|
|
19
|
+
*
|
|
20
|
+
* 用法:
|
|
21
|
+
* tsx scripts/auto-evolve-loop.ts
|
|
22
|
+
* tsx scripts/auto-evolve-loop.ts --max-iter 5
|
|
23
|
+
* tsx scripts/auto-evolve-loop.ts --target src/utils/foo.ts
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
import { execFile } from 'child_process';
|
|
27
|
+
import * as fs from 'fs/promises';
|
|
28
|
+
import { promisify } from 'util';
|
|
29
|
+
import * as path from 'path';
|
|
30
|
+
|
|
31
|
+
const pExec = promisify(execFile);
|
|
32
|
+
const REPO = process.cwd();
|
|
33
|
+
|
|
34
|
+
interface VitestResult {
|
|
35
|
+
failed: number;
|
|
36
|
+
passed: number;
|
|
37
|
+
totalFiles: string[];
|
|
38
|
+
failingTests: { file: string; name: string; message: string }[];
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* 跑全量 vitest, 在 parseVitestJson 里 filter
|
|
43
|
+
* (vitest 不接受 positional path filter, 会被忽略)
|
|
44
|
+
*/
|
|
45
|
+
async function runVitest(targetFile?: string): Promise<VitestResult> {
|
|
46
|
+
const args = ['vitest', 'run', '--reporter=json', '--no-color'];
|
|
47
|
+
try {
|
|
48
|
+
const { stdout } = await pExec('npx', args, { cwd: REPO, maxBuffer: 50 * 1024 * 1024 });
|
|
49
|
+
const json = JSON.parse(stdout);
|
|
50
|
+
return parseVitestJson(json, targetFile);
|
|
51
|
+
} catch (err: any) {
|
|
52
|
+
if (err.stdout) {
|
|
53
|
+
try {
|
|
54
|
+
const json = JSON.parse(err.stdout);
|
|
55
|
+
return parseVitestJson(json, targetFile);
|
|
56
|
+
} catch {
|
|
57
|
+
// 解析失败
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
return { failed: 1, passed: 0, totalFiles: [], failingTests: [{ file: 'unknown', name: 'vitest crashed', message: err.message?.slice(0, 500) || '' }] };
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function parseVitestJson(j: any, targetFile?: string): VitestResult {
|
|
65
|
+
const failingTests: VitestResult['failingTests'] = [];
|
|
66
|
+
const totalFiles: string[] = [];
|
|
67
|
+
let failed = 0;
|
|
68
|
+
let passed = 0;
|
|
69
|
+
for (const f of j.testResults || []) {
|
|
70
|
+
// targetFile 过滤: 只看匹配的文件
|
|
71
|
+
if (targetFile && !f.name?.includes(targetFile.split('/').pop() || '')) {
|
|
72
|
+
continue;
|
|
73
|
+
}
|
|
74
|
+
totalFiles.push(f.name || '');
|
|
75
|
+
failed += f.assertionResults?.filter((a: any) => a.status === 'failed').length || 0;
|
|
76
|
+
passed += f.assertionResults?.filter((a: any) => a.status === 'passed').length || 0;
|
|
77
|
+
for (const a of f.assertionResults || []) {
|
|
78
|
+
if (a.status === 'failed') {
|
|
79
|
+
failingTests.push({
|
|
80
|
+
file: f.name,
|
|
81
|
+
name: a.fullName || a.title,
|
|
82
|
+
message: (a.failureMessages || []).join('\n').slice(0, 1500),
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
return { failed, passed, totalFiles, failingTests };
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* 调 LLM — 走 bolloon 自带 PiAIModel (MiniMax, 读 .env 的 MINIMAX_API_KEY)
|
|
92
|
+
* 无 key 或 SDK 不可用都返回空字符串 (loop 认作 "无 diff" 重试)
|
|
93
|
+
*/
|
|
94
|
+
async function callLLM(prompt: string): Promise<string> {
|
|
95
|
+
try {
|
|
96
|
+
const pi = await import('../src/llm/pi-ai.js' as any).catch(() => import('../src/llm/pi-ai.js'));
|
|
97
|
+
const client = pi.initMinimax();
|
|
98
|
+
const text = await client.generateText({
|
|
99
|
+
messages: [{ role: 'user', content: prompt }],
|
|
100
|
+
maxTokens: 4096,
|
|
101
|
+
temperature: 0.2,
|
|
102
|
+
});
|
|
103
|
+
await bumpCallCount();
|
|
104
|
+
return text || '';
|
|
105
|
+
} catch (e: any) {
|
|
106
|
+
console.warn(`[loop] PiAIModel 不可用: ${e.message?.slice(0, 100)}`);
|
|
107
|
+
return '';
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
async function bumpCallCount(): Promise<void> {
|
|
112
|
+
const f = path.join(REPO, '.auto-evolve-calls');
|
|
113
|
+
const today = new Date().toISOString().slice(0, 10);
|
|
114
|
+
let count = 0;
|
|
115
|
+
try {
|
|
116
|
+
const lines = (await fs.readFile(f, 'utf-8')).trim().split('\n').filter(Boolean);
|
|
117
|
+
const lastLine = lines[lines.length - 1];
|
|
118
|
+
if (lastLine?.startsWith(today)) count = parseInt(lastLine.split(':')[1] || '0', 10);
|
|
119
|
+
} catch { /* no file yet */ }
|
|
120
|
+
await fs.writeFile(f, `${today}:${count + 1}\n`, 'utf-8');
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
const FIX_PROMPT = `你是一个谨慎的代码修复助手. 你的工作是修复失败的测试, **不**做无关改动.
|
|
124
|
+
|
|
125
|
+
约束 (违反任一即重做):
|
|
126
|
+
1. 改动最小: 只动让测试通过必需的部分
|
|
127
|
+
2. 不改测试本身 (除非 test 自身有 bug)
|
|
128
|
+
3. 不引入新依赖
|
|
129
|
+
4. 不删注释 / 不改注释
|
|
130
|
+
5. 不动 schema (interface/type) 除非 fail 信息明确要求
|
|
131
|
+
6. 不用 any / unknown / @ts-ignore 偷懒
|
|
132
|
+
7. 不动 package.json / tsconfig.json
|
|
133
|
+
|
|
134
|
+
输出格式 (严格):
|
|
135
|
+
- 第一个字符必须是 \`\`\`diff
|
|
136
|
+
- 中间是 unified diff (git format-patch 风格, --- a/path +++ b/path)
|
|
137
|
+
- 最后一个字符必须是 \`\`\`
|
|
138
|
+
- 不要在 diff 块外输出任何文字
|
|
139
|
+
- 不要使用 thinking 块 (下游会丢失)
|
|
140
|
+
- **改动必须严格匹配当前源码 (行数, 缩进)**
|
|
141
|
+
|
|
142
|
+
FAIL 信息:
|
|
143
|
+
{{FAIL}}
|
|
144
|
+
|
|
145
|
+
相关源码 (请严格匹配行数和缩进):
|
|
146
|
+
{{SOURCE}}
|
|
147
|
+
|
|
148
|
+
请**只**输出 diff 块:`;
|
|
149
|
+
|
|
150
|
+
async function getSourceContext(file: string): Promise<string> {
|
|
151
|
+
try {
|
|
152
|
+
const content = await fs.readFile(file, 'utf-8');
|
|
153
|
+
return content.slice(0, 8000);
|
|
154
|
+
} catch {
|
|
155
|
+
return '';
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* 从 test 文件的 import 语句里提取 src 路径, 读出来
|
|
161
|
+
* 关键: 让 LLM 看到真实源文件, 否则它只能猜行数, diff apply 会失败
|
|
162
|
+
*/
|
|
163
|
+
async function getImportedSource(testFile: string): Promise<string> {
|
|
164
|
+
try {
|
|
165
|
+
const testContent = await fs.readFile(testFile, 'utf-8');
|
|
166
|
+
const dir = path.dirname(testFile);
|
|
167
|
+
const imports = [...testContent.matchAll(/from\s+['"]([^'"]+)['"]/g)].map((m) => m[1]);
|
|
168
|
+
const out: string[] = [];
|
|
169
|
+
for (const imp of imports) {
|
|
170
|
+
if (!imp.startsWith('.')) continue;
|
|
171
|
+
let resolved = path.resolve(dir, imp);
|
|
172
|
+
if (resolved.endsWith('.js')) {
|
|
173
|
+
// 试 .ts 替换
|
|
174
|
+
const tsTry = resolved.replace(/\.js$/, '.ts');
|
|
175
|
+
if (await fs.access(tsTry).then(() => true).catch(() => false)) {
|
|
176
|
+
resolved = tsTry;
|
|
177
|
+
}
|
|
178
|
+
} else if (!resolved.endsWith('.ts')) {
|
|
179
|
+
if (await fs.access(resolved + '.ts').then(() => true).catch(() => false)) {
|
|
180
|
+
resolved += '.ts';
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
try {
|
|
184
|
+
const content = await fs.readFile(resolved, 'utf-8');
|
|
185
|
+
out.push(`--- ${resolved} ---\n${content.slice(0, 4000)}`);
|
|
186
|
+
} catch { /* skip */ }
|
|
187
|
+
}
|
|
188
|
+
return out.join('\n\n');
|
|
189
|
+
} catch {
|
|
190
|
+
return '';
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
function extractDiff(llmOutput: string): string | null {
|
|
195
|
+
// 找 ```diff ... ``` 块
|
|
196
|
+
const m = /```diff\s*([\s\S]*?)```/.exec(llmOutput);
|
|
197
|
+
if (m) return m[1].trim();
|
|
198
|
+
return null;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
async function writePatch(iter: number, patchContent: string): Promise<string> {
|
|
202
|
+
const id = `iter-${String(iter).padStart(3, '0')}`;
|
|
203
|
+
const dir = path.join(REPO, 'staging', 'auto-evolve', id);
|
|
204
|
+
await fs.mkdir(dir, { recursive: true });
|
|
205
|
+
await fs.writeFile(path.join(dir, `${id}.patch`), patchContent, 'utf-8');
|
|
206
|
+
await fs.writeFile(path.join(dir, '.patch-id'), id, 'utf-8');
|
|
207
|
+
return id;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
async function runReviewer(patchId: string): Promise<{ verdict: 'PASS' | 'FAIL'; concerns: string[] }> {
|
|
211
|
+
try {
|
|
212
|
+
await pExec('bash', [path.join(REPO, 'scripts/detect-schema-changes.sh'), patchId], { cwd: REPO });
|
|
213
|
+
await pExec('npx', ['tsx', 'scripts/diff-reviewer.ts', patchId, '--model', 'claude-sonnet-4-6'], { cwd: REPO });
|
|
214
|
+
const verdictFile = path.join(REPO, 'staging', 'auto-evolve', patchId, '.review-verdict');
|
|
215
|
+
const json = JSON.parse(await fs.readFile(verdictFile, 'utf-8'));
|
|
216
|
+
return { verdict: json.verdict, concerns: json.concerns || [] };
|
|
217
|
+
} catch (err: any) {
|
|
218
|
+
return { verdict: 'FAIL', concerns: [err.message?.slice(0, 200) || 'reviewer crashed'] };
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
async function commitPatch(patchId: string): Promise<boolean> {
|
|
223
|
+
try {
|
|
224
|
+
const patchFile = path.join(REPO, 'staging', 'auto-evolve', patchId, `${patchId}.patch`);
|
|
225
|
+
|
|
226
|
+
// patch 末尾补 newline (LLM 输出可能无)
|
|
227
|
+
let patchContent = await fs.readFile(patchFile, 'utf-8');
|
|
228
|
+
if (!patchContent.endsWith('\n')) patchContent += '\n';
|
|
229
|
+
await fs.writeFile(patchFile, patchContent, 'utf-8');
|
|
230
|
+
|
|
231
|
+
// --whitespace=fix 容错, --recount 重算行号
|
|
232
|
+
await pExec('git', ['apply', '--recount', '--whitespace=fix', patchFile], { cwd: REPO });
|
|
233
|
+
const { stdout } = await pExec('git', ['status', '--porcelain'], { cwd: REPO });
|
|
234
|
+
const files = stdout.trim().split('\n').filter(Boolean).map((l) => l.split(/\s+/).slice(1).join(' '));
|
|
235
|
+
for (const f of files) {
|
|
236
|
+
try {
|
|
237
|
+
await pExec('git', ['add', f], { cwd: REPO });
|
|
238
|
+
} catch { /* binary or removed */ }
|
|
239
|
+
}
|
|
240
|
+
await pExec('git', ['commit', '-m', `auto-evolve: ${patchId} (LLM 修复)`], { cwd: REPO });
|
|
241
|
+
return true;
|
|
242
|
+
} catch (err: any) {
|
|
243
|
+
console.error(`[loop] commit 失败: ${err.message?.slice(0, 200)}`);
|
|
244
|
+
return false;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
async function rollback(baseline: string): Promise<void> {
|
|
249
|
+
console.log(`[loop] ⚠️ 自动回滚到 ${baseline}`);
|
|
250
|
+
await pExec('git', ['reset', '--hard', baseline], { cwd: REPO });
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
async function notify(msg: string): Promise<void> {
|
|
254
|
+
const f = path.join(REPO, '.auto-evolve-notify');
|
|
255
|
+
const line = `[${new Date().toISOString()}] ${msg}\n`;
|
|
256
|
+
await fs.appendFile(f, line, 'utf-8');
|
|
257
|
+
console.log(`[notify] ${msg}`);
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
async function main() {
|
|
261
|
+
const args = process.argv.slice(2);
|
|
262
|
+
let maxIter = 10;
|
|
263
|
+
let targetFile: string | undefined;
|
|
264
|
+
for (let i = 0; i < args.length; i++) {
|
|
265
|
+
if (args[i] === '--max-iter' && args[i + 1]) maxIter = parseInt(args[++i], 10);
|
|
266
|
+
if (args[i] === '--target' && args[i + 1]) targetFile = args[++i];
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// 必须先有 baseline
|
|
270
|
+
if (!(await fs.stat('.last-auto-evolve-baseline').catch(() => null))) {
|
|
271
|
+
console.log('[loop] 没 baseline, 先打一个');
|
|
272
|
+
await pExec('bash', ['scripts/auto-evolve-snapshot.sh', 'snapshot'], { cwd: REPO });
|
|
273
|
+
}
|
|
274
|
+
const baseline = (await fs.readFile('.last-auto-evolve-baseline', 'utf-8')).trim();
|
|
275
|
+
console.log(`[loop] baseline: ${baseline}, max-iter: ${maxIter}`);
|
|
276
|
+
|
|
277
|
+
let consecutiveFails = 0;
|
|
278
|
+
for (let iter = 1; iter <= maxIter; iter++) {
|
|
279
|
+
console.log(`\n========== iter ${iter}/${maxIter} ==========`);
|
|
280
|
+
|
|
281
|
+
// 1. 跑 vitest
|
|
282
|
+
const result = await runVitest(targetFile);
|
|
283
|
+
console.log(`[vitest] passed=${result.passed} failed=${result.failed}`);
|
|
284
|
+
|
|
285
|
+
if (result.failed === 0) {
|
|
286
|
+
console.log('[loop] ✅ 全部测试通过, 退出');
|
|
287
|
+
return;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
// 2. 让 LLM 修
|
|
291
|
+
const failSummary = result.failingTests
|
|
292
|
+
.slice(0, 3)
|
|
293
|
+
.map((f) => `FILE: ${f.file}\nTEST: ${f.name}\nERROR: ${f.message}`)
|
|
294
|
+
.join('\n---\n');
|
|
295
|
+
const firstTest = result.failingTests[0]?.file || '';
|
|
296
|
+
const testCtx = await getSourceContext(firstTest);
|
|
297
|
+
const srcCtx = await getImportedSource(firstTest);
|
|
298
|
+
const sourceCtx = `=== TEST FILE ===\n${testCtx}\n\n=== SRC FILE (imported) ===\n${srcCtx}`;
|
|
299
|
+
const prompt = FIX_PROMPT
|
|
300
|
+
.replace('{{FAIL}}', failSummary)
|
|
301
|
+
.replace('{{SOURCE}}', sourceCtx);
|
|
302
|
+
|
|
303
|
+
console.log('[loop] 调 LLM 修...');
|
|
304
|
+
const llmOut = await callLLM(prompt);
|
|
305
|
+
const diff = extractDiff(llmOut);
|
|
306
|
+
if (!diff) {
|
|
307
|
+
console.log('[loop] LLM 没返回有效 diff');
|
|
308
|
+
console.log('--- LLM 原始输出 (前 800) ---');
|
|
309
|
+
console.log(llmOut.slice(0, 800));
|
|
310
|
+
console.log('---');
|
|
311
|
+
consecutiveFails++;
|
|
312
|
+
if (consecutiveFails >= 3) {
|
|
313
|
+
await notify(`连续 3 次 LLM 无 diff, 自动回滚 ${baseline}`);
|
|
314
|
+
await rollback(baseline);
|
|
315
|
+
return;
|
|
316
|
+
}
|
|
317
|
+
continue;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// 3. 写 staging
|
|
321
|
+
const patchId = await writePatch(iter, diff);
|
|
322
|
+
console.log(`[loop] 写到 ${patchId}`);
|
|
323
|
+
|
|
324
|
+
// 4. 跑 reviewer
|
|
325
|
+
const review = await runReviewer(patchId);
|
|
326
|
+
console.log(`[reviewer] verdict=${review.verdict} concerns=${review.concerns.length}`);
|
|
327
|
+
if (review.concerns.length > 0) {
|
|
328
|
+
for (const c of review.concerns) console.log(` - ${c}`);
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
if (review.verdict === 'FAIL') {
|
|
332
|
+
consecutiveFails++;
|
|
333
|
+
if (consecutiveFails >= 3) {
|
|
334
|
+
await notify(`连续 3 次 reviewer FAIL, 自动回滚 ${baseline}`);
|
|
335
|
+
await rollback(baseline);
|
|
336
|
+
return;
|
|
337
|
+
}
|
|
338
|
+
continue;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
// 5. 提交
|
|
342
|
+
const committed = await commitPatch(patchId);
|
|
343
|
+
if (!committed) {
|
|
344
|
+
console.log('[loop] commit 失败 (apply 或 lefthook 拦), 算 fail');
|
|
345
|
+
consecutiveFails++;
|
|
346
|
+
// commit 失败时只清 staged + 回滚 working tree (tracked), 不删 untracked 新文件
|
|
347
|
+
try {
|
|
348
|
+
await pExec('git', ['reset'], { cwd: REPO });
|
|
349
|
+
await pExec('git', ['checkout', '--', '.'], { cwd: REPO });
|
|
350
|
+
} catch { /* 忽略 */ }
|
|
351
|
+
continue;
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
// 6. 跑 vitest 看 commit 后是否真过
|
|
355
|
+
const after = await runVitest(targetFile);
|
|
356
|
+
console.log(`[vitest after] passed=${after.passed} failed=${after.failed}`);
|
|
357
|
+
if (after.failed < result.failed) {
|
|
358
|
+
console.log(`[loop] 进步: ${result.failed} → ${after.failed} fail`);
|
|
359
|
+
consecutiveFails = 0;
|
|
360
|
+
} else {
|
|
361
|
+
console.log(`[loop] 没进步, revert 这次 commit`);
|
|
362
|
+
await pExec('git', ['reset', '--hard', 'HEAD~1'], { cwd: REPO });
|
|
363
|
+
consecutiveFails++;
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
if (consecutiveFails >= 3) {
|
|
367
|
+
await notify(`连续 3 次无进步, 自动回滚 ${baseline}`);
|
|
368
|
+
await rollback(baseline);
|
|
369
|
+
return;
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
console.log(`[loop] 达到 max-iter=${maxIter}, 退出`);
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
main().catch((e) => { console.error('[loop] fatal:', e); process.exit(1); });
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# auto-evolve-oneshot.sh — 阶段 D 单次修复 (shell 版)
|
|
3
|
+
#
|
|
4
|
+
# 流程 (全本地, 用 MINIMAX_API_KEY 调 LLM):
|
|
5
|
+
# 1. 跑 vitest, 抓 fail
|
|
6
|
+
# 2. 让 LLM 修
|
|
7
|
+
# 3. 解析 LLM 输出的 diff, 写到 staging
|
|
8
|
+
# 4. 跑 reviewer (护栏 4)
|
|
9
|
+
# 5. PASS → git apply + commit (护栏 1 拦)
|
|
10
|
+
|
|
11
|
+
set -uo pipefail # 不加 -e: vitest 失败要继续
|
|
12
|
+
|
|
13
|
+
REPO="$(cd "$(dirname "$0")/.." && pwd)"
|
|
14
|
+
cd "$REPO"
|
|
15
|
+
|
|
16
|
+
echo "[oneshot] REPO=$REPO"
|
|
17
|
+
|
|
18
|
+
# 1. 跑 vitest
|
|
19
|
+
npx vitest run --reporter=json --no-color 2>/dev/null > /tmp/vt-out.json
|
|
20
|
+
TOTAL_FAIL=$(python3 -c "
|
|
21
|
+
import json
|
|
22
|
+
d = json.load(open('/tmp/vt-out.json'))
|
|
23
|
+
print(sum(1 for f in d.get('testResults',[]) for a in f.get('assertionResults',[]) if a.get('status')=='failed'))
|
|
24
|
+
")
|
|
25
|
+
echo "[oneshot] vitest failed=$TOTAL_FAIL"
|
|
26
|
+
|
|
27
|
+
if [ "$TOTAL_FAIL" = "0" ]; then
|
|
28
|
+
echo "[oneshot] ✅ 全部通过, 不需要修"
|
|
29
|
+
exit 0
|
|
30
|
+
fi
|
|
31
|
+
|
|
32
|
+
# 2. 抽 fail 信息
|
|
33
|
+
FAIL_SUMMARY=$(python3 -c "
|
|
34
|
+
import json
|
|
35
|
+
d = json.load(open('/tmp/vt-out.json'))
|
|
36
|
+
for f in d.get('testResults', []):
|
|
37
|
+
for a in f.get('assertionResults', []):
|
|
38
|
+
if a.get('status') == 'failed':
|
|
39
|
+
print('FILE:', f.get('name'))
|
|
40
|
+
print('TEST:', a.get('fullName') or a.get('title'))
|
|
41
|
+
print('ERROR:')
|
|
42
|
+
for m in a.get('failureMessages', [])[:2]:
|
|
43
|
+
print(m[:600])
|
|
44
|
+
print('---')
|
|
45
|
+
" | head -60)
|
|
46
|
+
echo "[oneshot] 2. 调 LLM 修..."
|
|
47
|
+
|
|
48
|
+
# 3. 用 LLM 生成修复
|
|
49
|
+
# prompt: 要 LLM 输出 ```diff ... ``` 块
|
|
50
|
+
# 3. 写 prompt 到文件 (避免 shell 反引号冲突)
|
|
51
|
+
PROMPT_FILE="/tmp/oneshot-prompt.txt"
|
|
52
|
+
cat > "$PROMPT_FILE" <<PROMPT_EOF
|
|
53
|
+
你是一个谨慎的代码修复助手. 你的工作是修复失败的测试.
|
|
54
|
+
|
|
55
|
+
约束: 改动最小, 不改测试, 不引入 any/unknown/@ts-ignore.
|
|
56
|
+
|
|
57
|
+
输出格式: 严格只输出一个 \`\`\`diff ... \`\`\` 块, 第一个字符 \`\`\`diff, 最后一个字符 \`\`\`. 中间是 unified diff (--- a/path +++ b/path 风格). 不要在 diff 块外输出任何文字.
|
|
58
|
+
|
|
59
|
+
FAIL 信息:
|
|
60
|
+
$(cat /tmp/vt-out.json | python3 -c "
|
|
61
|
+
import json
|
|
62
|
+
d = json.load(open('/tmp/vt-out.json'))
|
|
63
|
+
for f in d.get('testResults', []):
|
|
64
|
+
for a in f.get('assertionResults', []):
|
|
65
|
+
if a.get('status') == 'failed':
|
|
66
|
+
print('FILE:', f.get('name'))
|
|
67
|
+
print('TEST:', a.get('fullName') or a.get('title'))
|
|
68
|
+
print('ERROR:')
|
|
69
|
+
for m in a.get('failureMessages', [])[:2]:
|
|
70
|
+
print(m[:600])
|
|
71
|
+
print('---')
|
|
72
|
+
" | head -60)
|
|
73
|
+
|
|
74
|
+
请**只**输出 diff 块:
|
|
75
|
+
PROMPT_EOF
|
|
76
|
+
|
|
77
|
+
LLM_OUTPUT=$(npx tsx -r dotenv/config -e "
|
|
78
|
+
import { initMinimax } from './src/llm/pi-ai.js';
|
|
79
|
+
import * as fs from 'fs';
|
|
80
|
+
const prompt = fs.readFileSync('$PROMPT_FILE', 'utf-8');
|
|
81
|
+
const client = initMinimax();
|
|
82
|
+
const text = await client.generateText({ messages: [{ role: 'user', content: prompt }], maxTokens: 4096, temperature: 0.2 });
|
|
83
|
+
process.stdout.write(text || '');
|
|
84
|
+
" 2>/tmp/llm-err.log)
|
|
85
|
+
|
|
86
|
+
if [ -z "$LLM_OUTPUT" ]; then
|
|
87
|
+
echo "[oneshot] ❌ LLM 没返回"
|
|
88
|
+
exit 2
|
|
89
|
+
fi
|
|
90
|
+
|
|
91
|
+
# 4. 解析 diff
|
|
92
|
+
DIFF=$(echo "$LLM_OUTPUT" | python3 -c "
|
|
93
|
+
import sys, re
|
|
94
|
+
text = sys.stdin.read()
|
|
95
|
+
m = re.search(r'\`\`\`diff\s*([\s\S]*?)\`\`\`', text)
|
|
96
|
+
if m:
|
|
97
|
+
diff = m.group(1).strip()
|
|
98
|
+
if not diff.endswith('\n'):
|
|
99
|
+
diff += '\n'
|
|
100
|
+
print(diff)
|
|
101
|
+
else:
|
|
102
|
+
sys.exit(1)
|
|
103
|
+
" 2>/dev/null) || {
|
|
104
|
+
echo "[oneshot] ❌ LLM 输出没 diff 块"
|
|
105
|
+
echo "--- LLM 原始输出 (前 800) ---"
|
|
106
|
+
echo "$LLM_OUTPUT" | head -c 800
|
|
107
|
+
echo ""
|
|
108
|
+
exit 3
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
echo "[oneshot] 拿到 diff: $(echo "$DIFF" | wc -l) lines"
|
|
112
|
+
|
|
113
|
+
# 5. 写 staging
|
|
114
|
+
ID="oneshot-$(date +%s)"
|
|
115
|
+
mkdir -p "staging/auto-evolve/$ID"
|
|
116
|
+
echo "$DIFF" > "staging/auto-evolve/$ID/$ID.patch"
|
|
117
|
+
echo "$ID" > "staging/auto-evolve/$ID/.patch-id"
|
|
118
|
+
echo "[oneshot] patch 写到 staging/auto-evolve/$ID/"
|
|
119
|
+
|
|
120
|
+
# 6. 跑 reviewer
|
|
121
|
+
echo "[oneshot] 3. 跑 reviewer..."
|
|
122
|
+
npx tsx -r dotenv/config scripts/diff-reviewer.ts "$ID" > /tmp/reviewer.log 2>&1 || true
|
|
123
|
+
VERDICT=$(python3 -c "
|
|
124
|
+
import json
|
|
125
|
+
try:
|
|
126
|
+
print(json.load(open('staging/auto-evolve/$ID/.review-verdict')).get('verdict','UNKNOWN'))
|
|
127
|
+
except:
|
|
128
|
+
print('UNKNOWN')
|
|
129
|
+
")
|
|
130
|
+
echo "[oneshot] reviewer verdict: $VERDICT"
|
|
131
|
+
|
|
132
|
+
# 7. apply + commit
|
|
133
|
+
if [ "$VERDICT" = "PASS" ]; then
|
|
134
|
+
echo "[oneshot] 4. apply + commit"
|
|
135
|
+
if git apply --recount --whitespace=fix "staging/auto-evolve/$ID/$ID.patch" 2>/tmp/apply.err; then
|
|
136
|
+
git add -A
|
|
137
|
+
git commit -m "auto-evolve: $ID (LLM 修复)"
|
|
138
|
+
echo "[oneshot] ✅ 提交成功"
|
|
139
|
+
# 验证
|
|
140
|
+
npx vitest run --reporter=json --no-color 2>/dev/null > /tmp/vt-after.json
|
|
141
|
+
AFTER_FAIL=$(python3 -c "
|
|
142
|
+
import json
|
|
143
|
+
print(sum(1 for f in json.load(open('/tmp/vt-after.json')).get('testResults',[]) for a in f.get('assertionResults',[]) if a.get('status')=='failed'))
|
|
144
|
+
")
|
|
145
|
+
echo "[oneshot] 修复后 fail: $AFTER_FAIL (之前 $TOTAL_FAIL)"
|
|
146
|
+
else
|
|
147
|
+
echo "[oneshot] ❌ git apply 失败:"
|
|
148
|
+
cat /tmp/apply.err
|
|
149
|
+
exit 4
|
|
150
|
+
fi
|
|
151
|
+
else
|
|
152
|
+
echo "[oneshot] ❌ reviewer verdict=$VERDICT, 不 apply"
|
|
153
|
+
cat /tmp/reviewer.log | head -10
|
|
154
|
+
exit 5
|
|
155
|
+
fi
|