@bolloon/bolloon-agent 0.1.33 → 0.1.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/.auto-evolve-calls +1 -0
  2. package/.last-auto-evolve-baseline +1 -0
  3. package/Bolloon.md +103 -0
  4. package/README.md +7 -2
  5. package/dist/agents/pi-sdk.js +264 -12
  6. package/dist/bollharness-integration/index.js +8 -1
  7. package/dist/bootstrap/bootstrap.js +114 -0
  8. package/dist/bootstrap/context-collector.js +296 -0
  9. package/dist/bootstrap/lifecycle-hooks.js +109 -0
  10. package/dist/bootstrap/project-context.js +151 -0
  11. package/dist/heartbeat/Watchdog.js +9 -1
  12. package/dist/index.js +11 -0
  13. package/dist/llm/pi-ai.js +31 -21
  14. package/dist/network/p2p-direct.js +59 -2
  15. package/dist/pi-ecosystem/index.js +9 -6
  16. package/dist/pi-ecosystem-judgment/adaptive-scan.js +231 -0
  17. package/dist/pi-ecosystem-judgment/causal-judge.js +449 -0
  18. package/dist/pi-ecosystem-judgment/decision.js +5 -2
  19. package/dist/pi-ecosystem-judgment/detect-hook.js +168 -0
  20. package/dist/pi-ecosystem-judgment/distill-prompt.js +226 -0
  21. package/dist/pi-ecosystem-judgment/evolve-judgment.js +170 -0
  22. package/dist/pi-ecosystem-judgment/human-value-pipeline.js +21 -0
  23. package/dist/pi-ecosystem-judgment/human-value-store.js +283 -22
  24. package/dist/pi-ecosystem-judgment/injection-gate.js +166 -0
  25. package/dist/pi-ecosystem-judgment/monitor-gate.js +188 -0
  26. package/dist/security/builtin-guards.js +124 -0
  27. package/dist/security/context-router-tool.js +106 -0
  28. package/dist/security/react-harness.js +143 -0
  29. package/dist/security/tool-gate.js +235 -0
  30. package/dist/social/heartbeat.js +19 -2
  31. package/dist/utils/auto-evolve-policy.js +117 -0
  32. package/dist/utils/clamp.js +7 -0
  33. package/dist/utils/double.js +6 -0
  34. package/dist/web/api-config.html +3 -3
  35. package/dist/web/client.js +1328 -351
  36. package/dist/web/index.html +34 -31
  37. package/dist/web/server.js +1128 -58
  38. package/dist/web/style.css +370 -0
  39. package/lefthook.yml +29 -0
  40. package/package.json +4 -2
  41. package/scripts/auto-evolve-loop.ts +376 -0
  42. package/scripts/auto-evolve-oneshot.sh +155 -0
  43. package/scripts/auto-evolve-snapshot.sh +136 -0
  44. package/scripts/detect-schema-changes.sh +48 -0
  45. package/scripts/diff-reviewer.ts +159 -0
  46. package/scripts/weekly-report.ts +364 -0
  47. package/src/agents/pi-sdk.ts +293 -15
  48. package/src/bollharness-integration/index.ts +8 -32
  49. package/src/bootstrap/bootstrap.ts +132 -0
  50. package/src/bootstrap/context-collector.ts +342 -0
  51. package/src/bootstrap/lifecycle-hooks.ts +176 -0
  52. package/src/bootstrap/project-context.ts +163 -0
  53. package/src/heartbeat/Watchdog.ts +9 -1
  54. package/src/index.ts +11 -0
  55. package/src/llm/pi-ai.ts +33 -22
  56. package/src/network/p2p-direct.ts +59 -3
  57. package/src/security/builtin-guards.ts +162 -0
  58. package/src/security/context-router-tool.ts +122 -0
  59. package/src/security/react-harness.ts +177 -0
  60. package/src/security/tool-gate.ts +294 -0
  61. package/src/social/ant-colony/index.js +19 -0
  62. package/src/social/heartbeat.ts +18 -2
  63. package/src/utils/auto-evolve-policy.ts +138 -0
  64. package/src/utils/clamp.ts +5 -0
  65. package/src/web/api-config.html +3 -3
  66. package/src/web/client.js +1328 -351
  67. package/src/web/index.html +34 -31
  68. package/src/web/server.ts +1179 -53
  69. package/src/web/style.css +370 -0
  70. package/staging/auto-evolve/clean-001/.review-verdict +9 -0
  71. package/staging/auto-evolve/clean-001/clean-001.patch +14 -0
  72. package/staging/auto-evolve/e2e-001/.patch-id +1 -0
  73. package/staging/auto-evolve/e2e-001/.review-verdict +12 -0
  74. package/staging/auto-evolve/e2e-001/e2e-001.patch +11 -0
  75. package/staging/auto-evolve/test-bad/.review-verdict +12 -0
  76. package/staging/auto-evolve/test-bad/test-bad.patch +11 -0
  77. package/src/social/ant-colony/AdaptiveHeartbeat.ts +0 -131
  78. package/src/social/ant-colony/PheromoneEngine.ts +0 -302
  79. package/src/social/ant-colony/index.ts +0 -18
  80. package/src/social/ant-colony/types.ts +0 -94
@@ -0,0 +1,376 @@
1
+ #!/usr/bin/env tsx
2
+ /**
3
+ * auto-evolve-loop.ts — 阶段 D 主循环 (重写版)
4
+ *
5
+ * 流程 (per iteration):
6
+ * 1. 跑 vitest, 拿 fail 信息
7
+ * 2. 把 fail + 当前 src/test 源码喂给 LLM (走 PiAIModel / MiniMax)
8
+ * 3. 写 diff 到 staging/auto-evolve/iter-<N>/
9
+ * 4. 跑 detect-schema-changes + diff-reviewer
10
+ * 5a. reviewer PASS → git apply --recount --whitespace=fix + commit
11
+ * 5b. reviewer FAIL → log, 累计连续失败
12
+ * 6. 连续 3 次 FAIL → 自动回滚 baseline + 通知
13
+ *
14
+ * 退出条件 (任一即停):
15
+ * - 全部测试 PASS (loop 成功)
16
+ * - 连续 3 次 FAIL (loop 认输)
17
+ * - max-iter 达到 (默认 10)
18
+ * - 人类 SIGINT (Ctrl-C)
19
+ *
20
+ * 用法:
21
+ * tsx scripts/auto-evolve-loop.ts
22
+ * tsx scripts/auto-evolve-loop.ts --max-iter 5
23
+ * tsx scripts/auto-evolve-loop.ts --target src/utils/foo.ts
24
+ */
25
+
26
+ import { execFile } from 'child_process';
27
+ import * as fs from 'fs/promises';
28
+ import { promisify } from 'util';
29
+ import * as path from 'path';
30
+
31
+ const pExec = promisify(execFile);
32
+ const REPO = process.cwd();
33
+
34
+ interface VitestResult {
35
+ failed: number;
36
+ passed: number;
37
+ totalFiles: string[];
38
+ failingTests: { file: string; name: string; message: string }[];
39
+ }
40
+
41
+ /**
42
+ * 跑全量 vitest, 在 parseVitestJson 里 filter
43
+ * (vitest 不接受 positional path filter, 会被忽略)
44
+ */
45
+ async function runVitest(targetFile?: string): Promise<VitestResult> {
46
+ const args = ['vitest', 'run', '--reporter=json', '--no-color'];
47
+ try {
48
+ const { stdout } = await pExec('npx', args, { cwd: REPO, maxBuffer: 50 * 1024 * 1024 });
49
+ const json = JSON.parse(stdout);
50
+ return parseVitestJson(json, targetFile);
51
+ } catch (err: any) {
52
+ if (err.stdout) {
53
+ try {
54
+ const json = JSON.parse(err.stdout);
55
+ return parseVitestJson(json, targetFile);
56
+ } catch {
57
+ // 解析失败
58
+ }
59
+ }
60
+ return { failed: 1, passed: 0, totalFiles: [], failingTests: [{ file: 'unknown', name: 'vitest crashed', message: err.message?.slice(0, 500) || '' }] };
61
+ }
62
+ }
63
+
64
+ function parseVitestJson(j: any, targetFile?: string): VitestResult {
65
+ const failingTests: VitestResult['failingTests'] = [];
66
+ const totalFiles: string[] = [];
67
+ let failed = 0;
68
+ let passed = 0;
69
+ for (const f of j.testResults || []) {
70
+ // targetFile 过滤: 只看匹配的文件
71
+ if (targetFile && !f.name?.includes(targetFile.split('/').pop() || '')) {
72
+ continue;
73
+ }
74
+ totalFiles.push(f.name || '');
75
+ failed += f.assertionResults?.filter((a: any) => a.status === 'failed').length || 0;
76
+ passed += f.assertionResults?.filter((a: any) => a.status === 'passed').length || 0;
77
+ for (const a of f.assertionResults || []) {
78
+ if (a.status === 'failed') {
79
+ failingTests.push({
80
+ file: f.name,
81
+ name: a.fullName || a.title,
82
+ message: (a.failureMessages || []).join('\n').slice(0, 1500),
83
+ });
84
+ }
85
+ }
86
+ }
87
+ return { failed, passed, totalFiles, failingTests };
88
+ }
89
+
90
+ /**
91
+ * 调 LLM — 走 bolloon 自带 PiAIModel (MiniMax, 读 .env 的 MINIMAX_API_KEY)
92
+ * 无 key 或 SDK 不可用都返回空字符串 (loop 认作 "无 diff" 重试)
93
+ */
94
+ async function callLLM(prompt: string): Promise<string> {
95
+ try {
96
+ const pi = await import('../src/llm/pi-ai.js' as any).catch(() => import('../src/llm/pi-ai.js'));
97
+ const client = pi.initMinimax();
98
+ const text = await client.generateText({
99
+ messages: [{ role: 'user', content: prompt }],
100
+ maxTokens: 4096,
101
+ temperature: 0.2,
102
+ });
103
+ await bumpCallCount();
104
+ return text || '';
105
+ } catch (e: any) {
106
+ console.warn(`[loop] PiAIModel 不可用: ${e.message?.slice(0, 100)}`);
107
+ return '';
108
+ }
109
+ }
110
+
111
+ async function bumpCallCount(): Promise<void> {
112
+ const f = path.join(REPO, '.auto-evolve-calls');
113
+ const today = new Date().toISOString().slice(0, 10);
114
+ let count = 0;
115
+ try {
116
+ const lines = (await fs.readFile(f, 'utf-8')).trim().split('\n').filter(Boolean);
117
+ const lastLine = lines[lines.length - 1];
118
+ if (lastLine?.startsWith(today)) count = parseInt(lastLine.split(':')[1] || '0', 10);
119
+ } catch { /* no file yet */ }
120
+ await fs.writeFile(f, `${today}:${count + 1}\n`, 'utf-8');
121
+ }
122
+
123
+ const FIX_PROMPT = `你是一个谨慎的代码修复助手. 你的工作是修复失败的测试, **不**做无关改动.
124
+
125
+ 约束 (违反任一即重做):
126
+ 1. 改动最小: 只动让测试通过必需的部分
127
+ 2. 不改测试本身 (除非 test 自身有 bug)
128
+ 3. 不引入新依赖
129
+ 4. 不删注释 / 不改注释
130
+ 5. 不动 schema (interface/type) 除非 fail 信息明确要求
131
+ 6. 不用 any / unknown / @ts-ignore 偷懒
132
+ 7. 不动 package.json / tsconfig.json
133
+
134
+ 输出格式 (严格):
135
+ - 第一个字符必须是 \`\`\`diff
136
+ - 中间是 unified diff (git format-patch 风格, --- a/path +++ b/path)
137
+ - 最后一个字符必须是 \`\`\`
138
+ - 不要在 diff 块外输出任何文字
139
+ - 不要使用 thinking 块 (下游会丢失)
140
+ - **改动必须严格匹配当前源码 (行数, 缩进)**
141
+
142
+ FAIL 信息:
143
+ {{FAIL}}
144
+
145
+ 相关源码 (请严格匹配行数和缩进):
146
+ {{SOURCE}}
147
+
148
+ 请**只**输出 diff 块:`;
149
+
150
+ async function getSourceContext(file: string): Promise<string> {
151
+ try {
152
+ const content = await fs.readFile(file, 'utf-8');
153
+ return content.slice(0, 8000);
154
+ } catch {
155
+ return '';
156
+ }
157
+ }
158
+
159
+ /**
160
+ * 从 test 文件的 import 语句里提取 src 路径, 读出来
161
+ * 关键: 让 LLM 看到真实源文件, 否则它只能猜行数, diff apply 会失败
162
+ */
163
+ async function getImportedSource(testFile: string): Promise<string> {
164
+ try {
165
+ const testContent = await fs.readFile(testFile, 'utf-8');
166
+ const dir = path.dirname(testFile);
167
+ const imports = [...testContent.matchAll(/from\s+['"]([^'"]+)['"]/g)].map((m) => m[1]);
168
+ const out: string[] = [];
169
+ for (const imp of imports) {
170
+ if (!imp.startsWith('.')) continue;
171
+ let resolved = path.resolve(dir, imp);
172
+ if (resolved.endsWith('.js')) {
173
+ // 试 .ts 替换
174
+ const tsTry = resolved.replace(/\.js$/, '.ts');
175
+ if (await fs.access(tsTry).then(() => true).catch(() => false)) {
176
+ resolved = tsTry;
177
+ }
178
+ } else if (!resolved.endsWith('.ts')) {
179
+ if (await fs.access(resolved + '.ts').then(() => true).catch(() => false)) {
180
+ resolved += '.ts';
181
+ }
182
+ }
183
+ try {
184
+ const content = await fs.readFile(resolved, 'utf-8');
185
+ out.push(`--- ${resolved} ---\n${content.slice(0, 4000)}`);
186
+ } catch { /* skip */ }
187
+ }
188
+ return out.join('\n\n');
189
+ } catch {
190
+ return '';
191
+ }
192
+ }
193
+
194
+ function extractDiff(llmOutput: string): string | null {
195
+ // 找 ```diff ... ``` 块
196
+ const m = /```diff\s*([\s\S]*?)```/.exec(llmOutput);
197
+ if (m) return m[1].trim();
198
+ return null;
199
+ }
200
+
201
+ async function writePatch(iter: number, patchContent: string): Promise<string> {
202
+ const id = `iter-${String(iter).padStart(3, '0')}`;
203
+ const dir = path.join(REPO, 'staging', 'auto-evolve', id);
204
+ await fs.mkdir(dir, { recursive: true });
205
+ await fs.writeFile(path.join(dir, `${id}.patch`), patchContent, 'utf-8');
206
+ await fs.writeFile(path.join(dir, '.patch-id'), id, 'utf-8');
207
+ return id;
208
+ }
209
+
210
+ async function runReviewer(patchId: string): Promise<{ verdict: 'PASS' | 'FAIL'; concerns: string[] }> {
211
+ try {
212
+ await pExec('bash', [path.join(REPO, 'scripts/detect-schema-changes.sh'), patchId], { cwd: REPO });
213
+ await pExec('npx', ['tsx', 'scripts/diff-reviewer.ts', patchId, '--model', 'claude-sonnet-4-6'], { cwd: REPO });
214
+ const verdictFile = path.join(REPO, 'staging', 'auto-evolve', patchId, '.review-verdict');
215
+ const json = JSON.parse(await fs.readFile(verdictFile, 'utf-8'));
216
+ return { verdict: json.verdict, concerns: json.concerns || [] };
217
+ } catch (err: any) {
218
+ return { verdict: 'FAIL', concerns: [err.message?.slice(0, 200) || 'reviewer crashed'] };
219
+ }
220
+ }
221
+
222
+ async function commitPatch(patchId: string): Promise<boolean> {
223
+ try {
224
+ const patchFile = path.join(REPO, 'staging', 'auto-evolve', patchId, `${patchId}.patch`);
225
+
226
+ // patch 末尾补 newline (LLM 输出可能无)
227
+ let patchContent = await fs.readFile(patchFile, 'utf-8');
228
+ if (!patchContent.endsWith('\n')) patchContent += '\n';
229
+ await fs.writeFile(patchFile, patchContent, 'utf-8');
230
+
231
+ // --whitespace=fix 容错, --recount 重算行号
232
+ await pExec('git', ['apply', '--recount', '--whitespace=fix', patchFile], { cwd: REPO });
233
+ const { stdout } = await pExec('git', ['status', '--porcelain'], { cwd: REPO });
234
+ const files = stdout.trim().split('\n').filter(Boolean).map((l) => l.split(/\s+/).slice(1).join(' '));
235
+ for (const f of files) {
236
+ try {
237
+ await pExec('git', ['add', f], { cwd: REPO });
238
+ } catch { /* binary or removed */ }
239
+ }
240
+ await pExec('git', ['commit', '-m', `auto-evolve: ${patchId} (LLM 修复)`], { cwd: REPO });
241
+ return true;
242
+ } catch (err: any) {
243
+ console.error(`[loop] commit 失败: ${err.message?.slice(0, 200)}`);
244
+ return false;
245
+ }
246
+ }
247
+
248
+ async function rollback(baseline: string): Promise<void> {
249
+ console.log(`[loop] ⚠️ 自动回滚到 ${baseline}`);
250
+ await pExec('git', ['reset', '--hard', baseline], { cwd: REPO });
251
+ }
252
+
253
+ async function notify(msg: string): Promise<void> {
254
+ const f = path.join(REPO, '.auto-evolve-notify');
255
+ const line = `[${new Date().toISOString()}] ${msg}\n`;
256
+ await fs.appendFile(f, line, 'utf-8');
257
+ console.log(`[notify] ${msg}`);
258
+ }
259
+
260
+ async function main() {
261
+ const args = process.argv.slice(2);
262
+ let maxIter = 10;
263
+ let targetFile: string | undefined;
264
+ for (let i = 0; i < args.length; i++) {
265
+ if (args[i] === '--max-iter' && args[i + 1]) maxIter = parseInt(args[++i], 10);
266
+ if (args[i] === '--target' && args[i + 1]) targetFile = args[++i];
267
+ }
268
+
269
+ // 必须先有 baseline
270
+ if (!(await fs.stat('.last-auto-evolve-baseline').catch(() => null))) {
271
+ console.log('[loop] 没 baseline, 先打一个');
272
+ await pExec('bash', ['scripts/auto-evolve-snapshot.sh', 'snapshot'], { cwd: REPO });
273
+ }
274
+ const baseline = (await fs.readFile('.last-auto-evolve-baseline', 'utf-8')).trim();
275
+ console.log(`[loop] baseline: ${baseline}, max-iter: ${maxIter}`);
276
+
277
+ let consecutiveFails = 0;
278
+ for (let iter = 1; iter <= maxIter; iter++) {
279
+ console.log(`\n========== iter ${iter}/${maxIter} ==========`);
280
+
281
+ // 1. 跑 vitest
282
+ const result = await runVitest(targetFile);
283
+ console.log(`[vitest] passed=${result.passed} failed=${result.failed}`);
284
+
285
+ if (result.failed === 0) {
286
+ console.log('[loop] ✅ 全部测试通过, 退出');
287
+ return;
288
+ }
289
+
290
+ // 2. 让 LLM 修
291
+ const failSummary = result.failingTests
292
+ .slice(0, 3)
293
+ .map((f) => `FILE: ${f.file}\nTEST: ${f.name}\nERROR: ${f.message}`)
294
+ .join('\n---\n');
295
+ const firstTest = result.failingTests[0]?.file || '';
296
+ const testCtx = await getSourceContext(firstTest);
297
+ const srcCtx = await getImportedSource(firstTest);
298
+ const sourceCtx = `=== TEST FILE ===\n${testCtx}\n\n=== SRC FILE (imported) ===\n${srcCtx}`;
299
+ const prompt = FIX_PROMPT
300
+ .replace('{{FAIL}}', failSummary)
301
+ .replace('{{SOURCE}}', sourceCtx);
302
+
303
+ console.log('[loop] 调 LLM 修...');
304
+ const llmOut = await callLLM(prompt);
305
+ const diff = extractDiff(llmOut);
306
+ if (!diff) {
307
+ console.log('[loop] LLM 没返回有效 diff');
308
+ console.log('--- LLM 原始输出 (前 800) ---');
309
+ console.log(llmOut.slice(0, 800));
310
+ console.log('---');
311
+ consecutiveFails++;
312
+ if (consecutiveFails >= 3) {
313
+ await notify(`连续 3 次 LLM 无 diff, 自动回滚 ${baseline}`);
314
+ await rollback(baseline);
315
+ return;
316
+ }
317
+ continue;
318
+ }
319
+
320
+ // 3. 写 staging
321
+ const patchId = await writePatch(iter, diff);
322
+ console.log(`[loop] 写到 ${patchId}`);
323
+
324
+ // 4. 跑 reviewer
325
+ const review = await runReviewer(patchId);
326
+ console.log(`[reviewer] verdict=${review.verdict} concerns=${review.concerns.length}`);
327
+ if (review.concerns.length > 0) {
328
+ for (const c of review.concerns) console.log(` - ${c}`);
329
+ }
330
+
331
+ if (review.verdict === 'FAIL') {
332
+ consecutiveFails++;
333
+ if (consecutiveFails >= 3) {
334
+ await notify(`连续 3 次 reviewer FAIL, 自动回滚 ${baseline}`);
335
+ await rollback(baseline);
336
+ return;
337
+ }
338
+ continue;
339
+ }
340
+
341
+ // 5. 提交
342
+ const committed = await commitPatch(patchId);
343
+ if (!committed) {
344
+ console.log('[loop] commit 失败 (apply 或 lefthook 拦), 算 fail');
345
+ consecutiveFails++;
346
+ // commit 失败时只清 staged + 回滚 working tree (tracked), 不删 untracked 新文件
347
+ try {
348
+ await pExec('git', ['reset'], { cwd: REPO });
349
+ await pExec('git', ['checkout', '--', '.'], { cwd: REPO });
350
+ } catch { /* 忽略 */ }
351
+ continue;
352
+ }
353
+
354
+ // 6. 跑 vitest 看 commit 后是否真过
355
+ const after = await runVitest(targetFile);
356
+ console.log(`[vitest after] passed=${after.passed} failed=${after.failed}`);
357
+ if (after.failed < result.failed) {
358
+ console.log(`[loop] 进步: ${result.failed} → ${after.failed} fail`);
359
+ consecutiveFails = 0;
360
+ } else {
361
+ console.log(`[loop] 没进步, revert 这次 commit`);
362
+ await pExec('git', ['reset', '--hard', 'HEAD~1'], { cwd: REPO });
363
+ consecutiveFails++;
364
+ }
365
+
366
+ if (consecutiveFails >= 3) {
367
+ await notify(`连续 3 次无进步, 自动回滚 ${baseline}`);
368
+ await rollback(baseline);
369
+ return;
370
+ }
371
+ }
372
+
373
+ console.log(`[loop] 达到 max-iter=${maxIter}, 退出`);
374
+ }
375
+
376
+ main().catch((e) => { console.error('[loop] fatal:', e); process.exit(1); });
@@ -0,0 +1,155 @@
1
+ #!/bin/bash
2
+ # auto-evolve-oneshot.sh — 阶段 D 单次修复 (shell 版)
3
+ #
4
+ # 流程 (全本地, 用 MINIMAX_API_KEY 调 LLM):
5
+ # 1. 跑 vitest, 抓 fail
6
+ # 2. 让 LLM 修
7
+ # 3. 解析 LLM 输出的 diff, 写到 staging
8
+ # 4. 跑 reviewer (护栏 4)
9
+ # 5. PASS → git apply + commit (护栏 1 拦)
10
+
11
+ set -uo pipefail # 不加 -e: vitest 失败要继续
12
+
13
+ REPO="$(cd "$(dirname "$0")/.." && pwd)"
14
+ cd "$REPO"
15
+
16
+ echo "[oneshot] REPO=$REPO"
17
+
18
+ # 1. 跑 vitest
19
+ npx vitest run --reporter=json --no-color 2>/dev/null > /tmp/vt-out.json
20
+ TOTAL_FAIL=$(python3 -c "
21
+ import json
22
+ d = json.load(open('/tmp/vt-out.json'))
23
+ print(sum(1 for f in d.get('testResults',[]) for a in f.get('assertionResults',[]) if a.get('status')=='failed'))
24
+ ")
25
+ echo "[oneshot] vitest failed=$TOTAL_FAIL"
26
+
27
+ if [ "$TOTAL_FAIL" = "0" ]; then
28
+ echo "[oneshot] ✅ 全部通过, 不需要修"
29
+ exit 0
30
+ fi
31
+
32
+ # 2. 抽 fail 信息
33
+ FAIL_SUMMARY=$(python3 -c "
34
+ import json
35
+ d = json.load(open('/tmp/vt-out.json'))
36
+ for f in d.get('testResults', []):
37
+ for a in f.get('assertionResults', []):
38
+ if a.get('status') == 'failed':
39
+ print('FILE:', f.get('name'))
40
+ print('TEST:', a.get('fullName') or a.get('title'))
41
+ print('ERROR:')
42
+ for m in a.get('failureMessages', [])[:2]:
43
+ print(m[:600])
44
+ print('---')
45
+ " | head -60)
46
+ echo "[oneshot] 2. 调 LLM 修..."
47
+
48
+ # 3. 用 LLM 生成修复
49
+ # prompt: 要 LLM 输出 ```diff ... ``` 块
50
+ # 3. 写 prompt 到文件 (避免 shell 反引号冲突)
51
+ PROMPT_FILE="/tmp/oneshot-prompt.txt"
52
+ cat > "$PROMPT_FILE" <<PROMPT_EOF
53
+ 你是一个谨慎的代码修复助手. 你的工作是修复失败的测试.
54
+
55
+ 约束: 改动最小, 不改测试, 不引入 any/unknown/@ts-ignore.
56
+
57
+ 输出格式: 严格只输出一个 \`\`\`diff ... \`\`\` 块, 第一个字符 \`\`\`diff, 最后一个字符 \`\`\`. 中间是 unified diff (--- a/path +++ b/path 风格). 不要在 diff 块外输出任何文字.
58
+
59
+ FAIL 信息:
60
+ $(cat /tmp/vt-out.json | python3 -c "
61
+ import json
62
+ d = json.load(open('/tmp/vt-out.json'))
63
+ for f in d.get('testResults', []):
64
+ for a in f.get('assertionResults', []):
65
+ if a.get('status') == 'failed':
66
+ print('FILE:', f.get('name'))
67
+ print('TEST:', a.get('fullName') or a.get('title'))
68
+ print('ERROR:')
69
+ for m in a.get('failureMessages', [])[:2]:
70
+ print(m[:600])
71
+ print('---')
72
+ " | head -60)
73
+
74
+ 请**只**输出 diff 块:
75
+ PROMPT_EOF
76
+
77
+ LLM_OUTPUT=$(npx tsx -r dotenv/config -e "
78
+ import { initMinimax } from './src/llm/pi-ai.js';
79
+ import * as fs from 'fs';
80
+ const prompt = fs.readFileSync('$PROMPT_FILE', 'utf-8');
81
+ const client = initMinimax();
82
+ const text = await client.generateText({ messages: [{ role: 'user', content: prompt }], maxTokens: 4096, temperature: 0.2 });
83
+ process.stdout.write(text || '');
84
+ " 2>/tmp/llm-err.log)
85
+
86
+ if [ -z "$LLM_OUTPUT" ]; then
87
+ echo "[oneshot] ❌ LLM 没返回"
88
+ exit 2
89
+ fi
90
+
91
+ # 4. 解析 diff
92
+ DIFF=$(echo "$LLM_OUTPUT" | python3 -c "
93
+ import sys, re
94
+ text = sys.stdin.read()
95
+ m = re.search(r'\`\`\`diff\s*([\s\S]*?)\`\`\`', text)
96
+ if m:
97
+ diff = m.group(1).strip()
98
+ if not diff.endswith('\n'):
99
+ diff += '\n'
100
+ print(diff)
101
+ else:
102
+ sys.exit(1)
103
+ " 2>/dev/null) || {
104
+ echo "[oneshot] ❌ LLM 输出没 diff 块"
105
+ echo "--- LLM 原始输出 (前 800) ---"
106
+ echo "$LLM_OUTPUT" | head -c 800
107
+ echo ""
108
+ exit 3
109
+ }
110
+
111
+ echo "[oneshot] 拿到 diff: $(echo "$DIFF" | wc -l) lines"
112
+
113
+ # 5. 写 staging
114
+ ID="oneshot-$(date +%s)"
115
+ mkdir -p "staging/auto-evolve/$ID"
116
+ echo "$DIFF" > "staging/auto-evolve/$ID/$ID.patch"
117
+ echo "$ID" > "staging/auto-evolve/$ID/.patch-id"
118
+ echo "[oneshot] patch 写到 staging/auto-evolve/$ID/"
119
+
120
+ # 6. 跑 reviewer
121
+ echo "[oneshot] 3. 跑 reviewer..."
122
+ npx tsx -r dotenv/config scripts/diff-reviewer.ts "$ID" > /tmp/reviewer.log 2>&1 || true
123
+ VERDICT=$(python3 -c "
124
+ import json
125
+ try:
126
+ print(json.load(open('staging/auto-evolve/$ID/.review-verdict')).get('verdict','UNKNOWN'))
127
+ except:
128
+ print('UNKNOWN')
129
+ ")
130
+ echo "[oneshot] reviewer verdict: $VERDICT"
131
+
132
+ # 7. apply + commit
133
+ if [ "$VERDICT" = "PASS" ]; then
134
+ echo "[oneshot] 4. apply + commit"
135
+ if git apply --recount --whitespace=fix "staging/auto-evolve/$ID/$ID.patch" 2>/tmp/apply.err; then
136
+ git add -A
137
+ git commit -m "auto-evolve: $ID (LLM 修复)"
138
+ echo "[oneshot] ✅ 提交成功"
139
+ # 验证
140
+ npx vitest run --reporter=json --no-color 2>/dev/null > /tmp/vt-after.json
141
+ AFTER_FAIL=$(python3 -c "
142
+ import json
143
+ print(sum(1 for f in json.load(open('/tmp/vt-after.json')).get('testResults',[]) for a in f.get('assertionResults',[]) if a.get('status')=='failed'))
144
+ ")
145
+ echo "[oneshot] 修复后 fail: $AFTER_FAIL (之前 $TOTAL_FAIL)"
146
+ else
147
+ echo "[oneshot] ❌ git apply 失败:"
148
+ cat /tmp/apply.err
149
+ exit 4
150
+ fi
151
+ else
152
+ echo "[oneshot] ❌ reviewer verdict=$VERDICT, 不 apply"
153
+ cat /tmp/reviewer.log | head -10
154
+ exit 5
155
+ fi
@@ -0,0 +1,136 @@
1
+ #!/bin/bash
2
+ # auto-evolve-snapshot.sh — 阶段 C 护栏 2 + 3
3
+ #
4
+ # 用法 (LLM 改源码前必调):
5
+ # bash scripts/auto-evolve-snapshot.sh # 打 baseline tag + 记当前 HEAD
6
+ # bash scripts/auto-evolve-snapshot.sh apply <patch-id> # 人类批准后合并 staging → main
7
+ # bash scripts/auto-evolve-snapshot.sh list # 列所有 baseline
8
+ # bash scripts/auto-evolve-snapshot.sh rollback <tag> # 回滚到指定 baseline
9
+ #
10
+ # 流程:
11
+ # 1. LLM 改之前调 snapshot: 当前 HEAD 打 auto-evolve-baseline-<ts> tag
12
+ # 2. LLM 改 staging/auto-evolve/<patch-id>/ (不进 src/)
13
+ # 3. 护栏 1 (lefthook) 跑 vitest + tsc, 坏就 abort
14
+ # 4. 护栏 4 (reviewer hook) 审 diff, 通过才准 apply
15
+ # 5. 人类调 apply: git apply staging/auto-evolve/<patch-id>/*.patch → src/
16
+ # 6. 出问题: 调 rollback → git reset --hard <tag>
17
+
18
+ set -euo pipefail
19
+
20
+ REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
21
+ cd "$REPO_ROOT"
22
+
23
+ STAGING_DIR="staging/auto-evolve"
24
+ TAG_PREFIX="auto-evolve-baseline-"
25
+
26
+ cmd="${1:-snapshot}"
27
+ patch_id="${2:-}"
28
+
29
+ case "$cmd" in
30
+ snapshot)
31
+ # 护栏 2: 打 baseline tag
32
+ if ! git diff --quiet HEAD 2>/dev/null; then
33
+ echo "❌ 有未提交改动. 先 git commit 或 git stash"
34
+ exit 1
35
+ fi
36
+ ts="$(date -u +%Y%m%dT%H%M%SZ)"
37
+ tag="${TAG_PREFIX}${ts}"
38
+ git tag -a "$tag" -m "auto-evolve baseline @ ${ts}" HEAD
39
+ echo "✅ baseline tag: $tag"
40
+ echo "$tag" > .last-auto-evolve-baseline
41
+ echo " 回滚命令: bash $0 rollback $tag"
42
+ ;;
43
+
44
+ prepare)
45
+ # LLM 改之前调: 创建 staging 目录
46
+ if [ -z "$patch_id" ]; then
47
+ echo "用法: $0 prepare <patch-id>"
48
+ exit 1
49
+ fi
50
+ mkdir -p "$STAGING_DIR/$patch_id"
51
+ echo "$patch_id" > "$STAGING_DIR/$patch_id/.patch-id"
52
+ echo "✅ staging 创建: $STAGING_DIR/$patch_id/"
53
+ echo " LLM 改完后把 patch 放这里: $STAGING_DIR/$patch_id/*.patch"
54
+ ;;
55
+
56
+ apply)
57
+ # 人类批准: 把 staging 的 patch 合并到 src/
58
+ if [ -z "$patch_id" ]; then
59
+ echo "用法: $0 apply <patch-id>"
60
+ exit 1
61
+ fi
62
+ patch_dir="$STAGING_DIR/$patch_id"
63
+ if [ ! -d "$patch_dir" ]; then
64
+ echo "❌ staging 不存在: $patch_dir"
65
+ exit 1
66
+ fi
67
+ # 必须先 snapshot
68
+ if [ ! -f .last-auto-evolve-baseline ]; then
69
+ echo "❌ 没有 baseline. 先跑: $0 snapshot"
70
+ exit 1
71
+ fi
72
+ baseline="$(cat .last-auto-evolve-baseline)"
73
+
74
+ # 应用所有 patch
75
+ applied=0
76
+ for p in "$patch_dir"/*.patch; do
77
+ [ -e "$p" ] || continue
78
+ echo " applying: $p"
79
+ if ! git apply --check "$p"; then
80
+ echo "❌ patch 不可用: $p (可能已应用过)"
81
+ exit 1
82
+ fi
83
+ git apply "$p"
84
+ applied=$((applied + 1))
85
+ done
86
+
87
+ if [ $applied -eq 0 ]; then
88
+ echo "❌ staging 里没有 .patch 文件"
89
+ exit 1
90
+ fi
91
+
92
+ echo "✅ 应用 $applied 个 patch"
93
+ echo " 建议现在跑: npm test + git commit -m 'auto-evolve: $patch_id'"
94
+ echo " 出问题回滚: $0 rollback $baseline"
95
+ ;;
96
+
97
+ rollback)
98
+ # 回滚到指定 baseline
99
+ if [ -z "$patch_id" ]; then
100
+ echo "用法: $0 rollback <tag>"
101
+ echo "可用的 baseline:"
102
+ git tag -l "${TAG_PREFIX}*" | sort -r | head -10
103
+ exit 1
104
+ fi
105
+ if ! git tag -l | grep -qx "$patch_id"; then
106
+ echo "❌ tag 不存在: $patch_id"
107
+ exit 1
108
+ fi
109
+ echo "⚠️ 将回滚到 $patch_id (hard reset, 丢弃之后所有改动)"
110
+ read -p "确认? [y/N] " -n 1 -r
111
+ echo
112
+ if [[ $REPLY =~ ^[Yy]$ ]]; then
113
+ git reset --hard "$patch_id"
114
+ echo "✅ 回滚到 $patch_id"
115
+ else
116
+ echo "取消"
117
+ fi
118
+ ;;
119
+
120
+ list)
121
+ echo "auto-evolve baselines (最近 10):"
122
+ git tag -l "${TAG_PREFIX}*" | sort -r | head -10 | while read tag; do
123
+ msg="$(git tag -l --format='%(contents)' "$tag" | head -1)"
124
+ echo " $tag — $msg"
125
+ done
126
+ if [ -f .last-auto-evolve-baseline ]; then
127
+ echo ""
128
+ echo "当前 baseline: $(cat .last-auto-evolve-baseline)"
129
+ fi
130
+ ;;
131
+
132
+ *)
133
+ echo "用法: $0 {snapshot|prepare <id>|apply <id>|rollback <tag>|list}"
134
+ exit 1
135
+ ;;
136
+ esac