agent-gauntlet 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +106 -0
- package/package.json +51 -0
- package/src/cli-adapters/claude.ts +114 -0
- package/src/cli-adapters/codex.ts +123 -0
- package/src/cli-adapters/gemini.ts +149 -0
- package/src/cli-adapters/index.ts +79 -0
- package/src/commands/check.test.ts +25 -0
- package/src/commands/check.ts +67 -0
- package/src/commands/detect.test.ts +37 -0
- package/src/commands/detect.ts +69 -0
- package/src/commands/health.test.ts +79 -0
- package/src/commands/health.ts +148 -0
- package/src/commands/help.test.ts +44 -0
- package/src/commands/help.ts +24 -0
- package/src/commands/index.ts +9 -0
- package/src/commands/init.test.ts +105 -0
- package/src/commands/init.ts +330 -0
- package/src/commands/list.test.ts +104 -0
- package/src/commands/list.ts +29 -0
- package/src/commands/rerun.ts +118 -0
- package/src/commands/review.test.ts +25 -0
- package/src/commands/review.ts +67 -0
- package/src/commands/run.test.ts +25 -0
- package/src/commands/run.ts +64 -0
- package/src/commands/shared.ts +10 -0
- package/src/config/loader.test.ts +129 -0
- package/src/config/loader.ts +130 -0
- package/src/config/schema.ts +63 -0
- package/src/config/types.ts +23 -0
- package/src/config/validator.ts +493 -0
- package/src/core/change-detector.ts +112 -0
- package/src/core/entry-point.test.ts +63 -0
- package/src/core/entry-point.ts +80 -0
- package/src/core/job.ts +74 -0
- package/src/core/runner.ts +226 -0
- package/src/gates/check.ts +82 -0
- package/src/gates/result.ts +9 -0
- package/src/gates/review.ts +501 -0
- package/src/index.ts +38 -0
- package/src/output/console.ts +201 -0
- package/src/output/logger.ts +66 -0
- package/src/utils/log-parser.ts +228 -0
- package/src/utils/sanitizer.ts +3 -0
|
@@ -0,0 +1,501 @@
|
|
|
1
|
+
import { exec } from 'node:child_process';
|
|
2
|
+
import { promisify } from 'node:util';
|
|
3
|
+
import { ReviewGateConfig, ReviewPromptFrontmatter } from '../config/types.js';
|
|
4
|
+
import { GateResult } from './result.js';
|
|
5
|
+
import { CLIAdapter, getAdapter } from '../cli-adapters/index.js';
|
|
6
|
+
import { PreviousViolation } from '../utils/log-parser.js';
|
|
7
|
+
|
|
8
|
+
const execAsync = promisify(exec);
|
|
9
|
+
|
|
10
|
+
const MAX_BUFFER_BYTES = 10 * 1024 * 1024;
|
|
11
|
+
|
|
12
|
+
const JSON_SYSTEM_INSTRUCTION = `
|
|
13
|
+
You are in a read-only mode. You may read files in the repository to gather context.
|
|
14
|
+
Do NOT attempt to modify files or run shell commands that change system state.
|
|
15
|
+
Do NOT access files outside the repository root.
|
|
16
|
+
Use your available file-reading and search tools to find information.
|
|
17
|
+
If the diff is insufficient or ambiguous, use your tools to read the full file content or related files.
|
|
18
|
+
|
|
19
|
+
IMPORTANT: You must output ONLY a valid JSON object. Do not output any markdown text, explanations, or code blocks outside of the JSON.
|
|
20
|
+
Each violation MUST include a "priority" field with one of: "critical", "high", "medium", "low".
|
|
21
|
+
|
|
22
|
+
If violations are found:
|
|
23
|
+
{
|
|
24
|
+
"status": "fail",
|
|
25
|
+
"violations": [
|
|
26
|
+
{
|
|
27
|
+
"file": "path/to/file.rb",
|
|
28
|
+
"line": 10,
|
|
29
|
+
"issue": "Description of the violation",
|
|
30
|
+
"fix": "Suggestion on how to fix it",
|
|
31
|
+
"priority": "high"
|
|
32
|
+
}
|
|
33
|
+
]
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
If NO violations are found:
|
|
37
|
+
{
|
|
38
|
+
"status": "pass",
|
|
39
|
+
"message": "No architecture violations found."
|
|
40
|
+
}
|
|
41
|
+
`;
|
|
42
|
+
|
|
43
|
+
type ReviewConfig = ReviewGateConfig & ReviewPromptFrontmatter & { promptContent?: string };
|
|
44
|
+
|
|
45
|
+
export class ReviewGateExecutor {
|
|
46
|
+
private constructPrompt(config: ReviewConfig, previousViolations: PreviousViolation[] = []): string {
|
|
47
|
+
const baseContent = config.promptContent || '';
|
|
48
|
+
|
|
49
|
+
if (previousViolations.length > 0) {
|
|
50
|
+
return baseContent +
|
|
51
|
+
'\n\n' + this.buildPreviousFailuresSection(previousViolations) +
|
|
52
|
+
'\n' + JSON_SYSTEM_INSTRUCTION;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return baseContent + '\n' + JSON_SYSTEM_INSTRUCTION;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
async execute(
|
|
59
|
+
jobId: string,
|
|
60
|
+
config: ReviewConfig,
|
|
61
|
+
entryPointPath: string,
|
|
62
|
+
loggerFactory: (adapterName?: string) => Promise<(output: string) => Promise<void>>,
|
|
63
|
+
baseBranch: string,
|
|
64
|
+
previousFailures?: Map<string, PreviousViolation[]>,
|
|
65
|
+
changeOptions?: { commit?: string; uncommitted?: boolean },
|
|
66
|
+
checkUsageLimit: boolean = false
|
|
67
|
+
): Promise<GateResult> {
|
|
68
|
+
const startTime = Date.now();
|
|
69
|
+
const mainLogger = await loggerFactory();
|
|
70
|
+
|
|
71
|
+
try {
|
|
72
|
+
await mainLogger(`Starting review: ${config.name}\n`);
|
|
73
|
+
await mainLogger(`Entry point: ${entryPointPath}\n`);
|
|
74
|
+
await mainLogger(`Base branch: ${baseBranch}\n`);
|
|
75
|
+
|
|
76
|
+
const diff = await this.getDiff(entryPointPath, baseBranch, changeOptions);
|
|
77
|
+
if (!diff.trim()) {
|
|
78
|
+
await mainLogger('No changes found in entry point, skipping review.\n');
|
|
79
|
+
await mainLogger('Result: pass - No changes to review\n');
|
|
80
|
+
return {
|
|
81
|
+
jobId,
|
|
82
|
+
status: 'pass',
|
|
83
|
+
duration: Date.now() - startTime,
|
|
84
|
+
message: 'No changes to review'
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const required = config.num_reviews ?? 1;
|
|
89
|
+
const outputs: Array<{ adapter: string; status: 'pass' | 'fail' | 'error'; message: string }> = [];
|
|
90
|
+
const usedAdapters = new Set<string>();
|
|
91
|
+
|
|
92
|
+
const preferences = config.cli_preference || [];
|
|
93
|
+
const parallel = config.parallel ?? false;
|
|
94
|
+
|
|
95
|
+
if (parallel && required > 1) {
|
|
96
|
+
// Parallel Execution Logic
|
|
97
|
+
// Check health of adapters in parallel, but only as many as needed
|
|
98
|
+
const healthyAdapters: string[] = [];
|
|
99
|
+
let prefIndex = 0;
|
|
100
|
+
|
|
101
|
+
while (healthyAdapters.length < required && prefIndex < preferences.length) {
|
|
102
|
+
const batchSize = required - healthyAdapters.length;
|
|
103
|
+
const batch = preferences.slice(prefIndex, prefIndex + batchSize);
|
|
104
|
+
prefIndex += batchSize;
|
|
105
|
+
|
|
106
|
+
const batchResults = await Promise.all(
|
|
107
|
+
batch.map(async (toolName) => {
|
|
108
|
+
const adapter = getAdapter(toolName);
|
|
109
|
+
if (!adapter) return { toolName, status: 'missing' as const };
|
|
110
|
+
const health = await adapter.checkHealth({ checkUsageLimit });
|
|
111
|
+
return { toolName, ...health };
|
|
112
|
+
})
|
|
113
|
+
);
|
|
114
|
+
|
|
115
|
+
for (const res of batchResults) {
|
|
116
|
+
if (res.status === 'healthy') {
|
|
117
|
+
healthyAdapters.push(res.toolName);
|
|
118
|
+
} else if (res.status === 'unhealthy') {
|
|
119
|
+
await mainLogger(`Skipping ${res.toolName}: ${res.message || 'Unhealthy'}\n`);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
if (healthyAdapters.length < required) {
|
|
125
|
+
const msg = `Not enough healthy adapters. Need ${required}, found ${healthyAdapters.length}.`;
|
|
126
|
+
await mainLogger(`Result: error - ${msg}\n`);
|
|
127
|
+
return {
|
|
128
|
+
jobId,
|
|
129
|
+
status: 'error',
|
|
130
|
+
duration: Date.now() - startTime,
|
|
131
|
+
message: msg
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Launch exactly 'required' reviews in parallel
|
|
136
|
+
const selectedAdapters = healthyAdapters.slice(0, required);
|
|
137
|
+
await mainLogger(`Starting parallel reviews with: ${selectedAdapters.join(', ')}\n`);
|
|
138
|
+
|
|
139
|
+
const results = await Promise.all(
|
|
140
|
+
selectedAdapters.map((toolName) =>
|
|
141
|
+
this.runSingleReview(toolName, config, diff, loggerFactory, mainLogger, previousFailures, true, checkUsageLimit)
|
|
142
|
+
)
|
|
143
|
+
);
|
|
144
|
+
|
|
145
|
+
for (const res of results) {
|
|
146
|
+
if (res) {
|
|
147
|
+
outputs.push({ adapter: res.adapter, ...res.evaluation });
|
|
148
|
+
usedAdapters.add(res.adapter);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
} else {
|
|
152
|
+
// Sequential Execution Logic
|
|
153
|
+
for (const toolName of preferences) {
|
|
154
|
+
if (usedAdapters.size >= required) break;
|
|
155
|
+
const res = await this.runSingleReview(toolName, config, diff, loggerFactory, mainLogger, previousFailures, false, checkUsageLimit);
|
|
156
|
+
if (res) {
|
|
157
|
+
outputs.push({ adapter: res.adapter, ...res.evaluation });
|
|
158
|
+
usedAdapters.add(res.adapter);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if (usedAdapters.size < required) {
|
|
164
|
+
const msg = `Failed to complete ${required} reviews. Completed: ${usedAdapters.size}. See logs for details.`;
|
|
165
|
+
await mainLogger(`Result: error - ${msg}\n`);
|
|
166
|
+
return {
|
|
167
|
+
jobId,
|
|
168
|
+
status: 'error',
|
|
169
|
+
duration: Date.now() - startTime,
|
|
170
|
+
message: msg
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const failed = outputs.find(result => result.status === 'fail');
|
|
175
|
+
const error = outputs.find(result => result.status === 'error');
|
|
176
|
+
|
|
177
|
+
let status: 'pass' | 'fail' | 'error' = 'pass';
|
|
178
|
+
let message = 'Passed';
|
|
179
|
+
|
|
180
|
+
if (error) {
|
|
181
|
+
status = 'error';
|
|
182
|
+
message = `Error (${error.adapter}): ${error.message}`;
|
|
183
|
+
} else if (failed) {
|
|
184
|
+
status = 'fail';
|
|
185
|
+
message = `Failed (${failed.adapter}): ${failed.message}`;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
await mainLogger(`Result: ${status} - ${message}\n`);
|
|
189
|
+
|
|
190
|
+
return {
|
|
191
|
+
jobId,
|
|
192
|
+
status,
|
|
193
|
+
duration: Date.now() - startTime,
|
|
194
|
+
message
|
|
195
|
+
};
|
|
196
|
+
} catch (error: any) {
|
|
197
|
+
await mainLogger(`Critical Error: ${error.message}\n`);
|
|
198
|
+
await mainLogger('Result: error\n');
|
|
199
|
+
return {
|
|
200
|
+
jobId,
|
|
201
|
+
status: 'error',
|
|
202
|
+
duration: Date.now() - startTime,
|
|
203
|
+
message: error.message
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
private async runSingleReview(
|
|
209
|
+
toolName: string,
|
|
210
|
+
config: ReviewConfig,
|
|
211
|
+
diff: string,
|
|
212
|
+
loggerFactory: (adapterName?: string) => Promise<(output: string) => Promise<void>>,
|
|
213
|
+
mainLogger: (output: string) => Promise<void>,
|
|
214
|
+
previousFailures?: Map<string, PreviousViolation[]>,
|
|
215
|
+
skipHealthCheck: boolean = false,
|
|
216
|
+
checkUsageLimit: boolean = false
|
|
217
|
+
): Promise<{ adapter: string; evaluation: { status: 'pass' | 'fail' | 'error'; message: string; json?: any } } | null> {
|
|
218
|
+
const adapter = getAdapter(toolName);
|
|
219
|
+
if (!adapter) return null;
|
|
220
|
+
|
|
221
|
+
if (!skipHealthCheck) {
|
|
222
|
+
const health = await adapter.checkHealth({ checkUsageLimit });
|
|
223
|
+
if (health.status === 'missing') return null;
|
|
224
|
+
if (health.status === 'unhealthy') {
|
|
225
|
+
await mainLogger(`Skipping ${adapter.name}: ${health.message || 'Unhealthy'}\n`);
|
|
226
|
+
return null;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// Create per-adapter logger
|
|
231
|
+
const adapterLogger = await loggerFactory(adapter.name);
|
|
232
|
+
|
|
233
|
+
try {
|
|
234
|
+
const startMsg = `[START] review:.:${config.name} (${adapter.name})`;
|
|
235
|
+
await adapterLogger(`${startMsg}\n`);
|
|
236
|
+
|
|
237
|
+
const adapterPreviousViolations = previousFailures?.get(adapter.name) || [];
|
|
238
|
+
const finalPrompt = this.constructPrompt(config, adapterPreviousViolations);
|
|
239
|
+
|
|
240
|
+
const output = await adapter.execute({
|
|
241
|
+
prompt: finalPrompt,
|
|
242
|
+
diff,
|
|
243
|
+
model: config.model,
|
|
244
|
+
timeoutMs: config.timeout ? config.timeout * 1000 : undefined
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
await adapterLogger(`\n--- Review Output (${adapter.name}) ---\n${output}\n`);
|
|
248
|
+
|
|
249
|
+
const evaluation = this.evaluateOutput(output);
|
|
250
|
+
|
|
251
|
+
// Log formatted summary
|
|
252
|
+
if (evaluation.json) {
|
|
253
|
+
await adapterLogger(`\n--- Parsed Result (${adapter.name}) ---\n`);
|
|
254
|
+
if (evaluation.json.status === 'fail' && Array.isArray(evaluation.json.violations)) {
|
|
255
|
+
await adapterLogger(`Status: FAIL\n`);
|
|
256
|
+
await adapterLogger(`Violations:\n`);
|
|
257
|
+
for (const [i, v] of evaluation.json.violations.entries()) {
|
|
258
|
+
await adapterLogger(`${i + 1}. ${v.file}:${v.line || '?'} - ${v.issue}\n`);
|
|
259
|
+
if (v.fix) await adapterLogger(` Fix: ${v.fix}\n`);
|
|
260
|
+
}
|
|
261
|
+
} else if (evaluation.json.status === 'pass') {
|
|
262
|
+
await adapterLogger(`Status: PASS\n`);
|
|
263
|
+
if (evaluation.json.message) await adapterLogger(`Message: ${evaluation.json.message}\n`);
|
|
264
|
+
} else {
|
|
265
|
+
await adapterLogger(`Status: ${evaluation.json.status}\n`);
|
|
266
|
+
await adapterLogger(`Raw: ${JSON.stringify(evaluation.json, null, 2)}\n`);
|
|
267
|
+
}
|
|
268
|
+
await adapterLogger(`---------------------\n`);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
const resultMsg = `Review result (${adapter.name}): ${evaluation.status} - ${evaluation.message}`;
|
|
272
|
+
await adapterLogger(`${resultMsg}\n`);
|
|
273
|
+
await mainLogger(`${resultMsg}\n`);
|
|
274
|
+
|
|
275
|
+
return { adapter: adapter.name, evaluation };
|
|
276
|
+
} catch (error: any) {
|
|
277
|
+
const errorMsg = `Error running ${adapter.name}: ${error.message}`;
|
|
278
|
+
await adapterLogger(`${errorMsg}\n`);
|
|
279
|
+
await mainLogger(`${errorMsg}\n`);
|
|
280
|
+
return null;
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
private async getDiff(
|
|
285
|
+
entryPointPath: string,
|
|
286
|
+
baseBranch: string,
|
|
287
|
+
options?: { commit?: string; uncommitted?: boolean }
|
|
288
|
+
): Promise<string> {
|
|
289
|
+
// If uncommitted mode is explicitly requested
|
|
290
|
+
if (options?.uncommitted) {
|
|
291
|
+
const pathArg = this.pathArg(entryPointPath);
|
|
292
|
+
// Match ChangeDetector.getUncommittedChangedFiles() behavior
|
|
293
|
+
const staged = await this.execDiff(`git diff --cached${pathArg}`);
|
|
294
|
+
const unstaged = await this.execDiff(`git diff${pathArg}`);
|
|
295
|
+
const untracked = await this.untrackedDiff(entryPointPath);
|
|
296
|
+
return [staged, unstaged, untracked].filter(Boolean).join('\n');
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
// If a specific commit is requested
|
|
300
|
+
if (options?.commit) {
|
|
301
|
+
const pathArg = this.pathArg(entryPointPath);
|
|
302
|
+
// Match ChangeDetector.getCommitChangedFiles() behavior
|
|
303
|
+
try {
|
|
304
|
+
return await this.execDiff(`git diff ${options.commit}^..${options.commit}${pathArg}`);
|
|
305
|
+
} catch (error: any) {
|
|
306
|
+
// Handle initial commit case
|
|
307
|
+
if (error.message?.includes('unknown revision') || error.stderr?.includes('unknown revision')) {
|
|
308
|
+
return await this.execDiff(`git diff --root ${options.commit}${pathArg}`);
|
|
309
|
+
}
|
|
310
|
+
throw error;
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
const isCI = process.env.CI === 'true' || process.env.GITHUB_ACTIONS === 'true';
|
|
315
|
+
return isCI
|
|
316
|
+
? this.getCIDiff(entryPointPath, baseBranch)
|
|
317
|
+
: this.getLocalDiff(entryPointPath, baseBranch);
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
private async getCIDiff(entryPointPath: string, baseBranch: string): Promise<string> {
|
|
321
|
+
const baseRef = process.env.GITHUB_BASE_REF || baseBranch;
|
|
322
|
+
const headRef = process.env.GITHUB_SHA || 'HEAD';
|
|
323
|
+
const pathArg = this.pathArg(entryPointPath);
|
|
324
|
+
|
|
325
|
+
try {
|
|
326
|
+
return await this.execDiff(`git diff ${baseRef}...${headRef}${pathArg}`);
|
|
327
|
+
} catch (error) {
|
|
328
|
+
const fallback = await this.execDiff(`git diff HEAD^...HEAD${pathArg}`);
|
|
329
|
+
return fallback;
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
private async getLocalDiff(entryPointPath: string, baseBranch: string): Promise<string> {
|
|
334
|
+
const pathArg = this.pathArg(entryPointPath);
|
|
335
|
+
const committed = await this.execDiff(`git diff ${baseBranch}...HEAD${pathArg}`);
|
|
336
|
+
const uncommitted = await this.execDiff(`git diff HEAD${pathArg}`);
|
|
337
|
+
const untracked = await this.untrackedDiff(entryPointPath);
|
|
338
|
+
|
|
339
|
+
return [committed, uncommitted, untracked].filter(Boolean).join('\n');
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
private async untrackedDiff(entryPointPath: string): Promise<string> {
|
|
343
|
+
const pathArg = this.pathArg(entryPointPath);
|
|
344
|
+
const { stdout } = await execAsync(`git ls-files --others --exclude-standard${pathArg}`, {
|
|
345
|
+
maxBuffer: MAX_BUFFER_BYTES
|
|
346
|
+
});
|
|
347
|
+
const files = this.parseLines(stdout);
|
|
348
|
+
const diffs: string[] = [];
|
|
349
|
+
|
|
350
|
+
for (const file of files) {
|
|
351
|
+
try {
|
|
352
|
+
const diff = await this.execDiff(`git diff --no-index -- /dev/null ${this.quoteArg(file)}`);
|
|
353
|
+
if (diff.trim()) diffs.push(diff);
|
|
354
|
+
} catch (error: any) {
|
|
355
|
+
// Only suppress errors for missing/deleted files (ENOENT or "Could not access")
|
|
356
|
+
// Re-throw other errors (permissions, git issues) so they surface properly
|
|
357
|
+
const msg = [error.message, error.stderr].filter(Boolean).join('\n');
|
|
358
|
+
if (msg.includes('Could not access') || msg.includes('ENOENT') || msg.includes('No such file')) {
|
|
359
|
+
// File was deleted/moved between listing and diff; skip it
|
|
360
|
+
continue;
|
|
361
|
+
}
|
|
362
|
+
throw error;
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
return diffs.join('\n');
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
private async execDiff(command: string): Promise<string> {
|
|
370
|
+
try {
|
|
371
|
+
const { stdout } = await execAsync(command, { maxBuffer: MAX_BUFFER_BYTES });
|
|
372
|
+
return stdout;
|
|
373
|
+
} catch (error: any) {
|
|
374
|
+
if (typeof error.code === 'number' && error.stdout) {
|
|
375
|
+
return error.stdout;
|
|
376
|
+
}
|
|
377
|
+
throw error;
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
private buildPreviousFailuresSection(violations: PreviousViolation[]): string {
|
|
382
|
+
const lines = [
|
|
383
|
+
'━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━',
|
|
384
|
+
'PREVIOUS FAILURES TO VERIFY (from last run)',
|
|
385
|
+
'━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━',
|
|
386
|
+
'',
|
|
387
|
+
'The following violations were identified in the previous review. Your PRIMARY TASK is to verify whether these specific issues have been fixed in the current changes:',
|
|
388
|
+
''
|
|
389
|
+
];
|
|
390
|
+
|
|
391
|
+
violations.forEach((v, i) => {
|
|
392
|
+
lines.push(`${i + 1}. ${v.file}:${v.line} - ${v.issue}`);
|
|
393
|
+
if (v.fix) {
|
|
394
|
+
lines.push(` Suggested fix: ${v.fix}`);
|
|
395
|
+
}
|
|
396
|
+
lines.push('');
|
|
397
|
+
});
|
|
398
|
+
|
|
399
|
+
lines.push('INSTRUCTIONS:');
|
|
400
|
+
lines.push('- Check if each violation listed above has been addressed in the diff');
|
|
401
|
+
lines.push('- For violations that are fixed, confirm they no longer appear');
|
|
402
|
+
lines.push('- For violations that remain unfixed, include them in your violations array');
|
|
403
|
+
lines.push('- Also check for any NEW violations in the changed code');
|
|
404
|
+
lines.push('- Return status "pass" only if ALL previous violations are fixed AND no new violations exist');
|
|
405
|
+
lines.push('');
|
|
406
|
+
lines.push('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━');
|
|
407
|
+
|
|
408
|
+
return lines.join('\n');
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
public evaluateOutput(output: string): { status: 'pass' | 'fail' | 'error'; message: string; json?: any } {
|
|
412
|
+
try {
|
|
413
|
+
// 1. Try to extract from markdown code block first (most reliable)
|
|
414
|
+
const jsonBlockMatch = output.match(/```json\s*([\s\S]*?)\s*```/);
|
|
415
|
+
if (jsonBlockMatch) {
|
|
416
|
+
try {
|
|
417
|
+
const json = JSON.parse(jsonBlockMatch[1]);
|
|
418
|
+
return this.validateAndReturn(json);
|
|
419
|
+
} catch {
|
|
420
|
+
// If code block parse fails, fall back to other methods
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
// 2. Fallback: Find the last valid JSON object
|
|
425
|
+
// This helps when there are braces in the explanation text before the actual JSON
|
|
426
|
+
// We start from the last '}' and search backwards for a matching '{' that creates valid JSON
|
|
427
|
+
const end = output.lastIndexOf('}');
|
|
428
|
+
if (end !== -1) {
|
|
429
|
+
let start = output.lastIndexOf('{', end);
|
|
430
|
+
while (start !== -1) {
|
|
431
|
+
const candidate = output.substring(start, end + 1);
|
|
432
|
+
try {
|
|
433
|
+
const json = JSON.parse(candidate);
|
|
434
|
+
// If we successfully parsed an object with 'status', it's likely our result
|
|
435
|
+
if (json.status) {
|
|
436
|
+
return this.validateAndReturn(json);
|
|
437
|
+
}
|
|
438
|
+
} catch {
|
|
439
|
+
// Not valid JSON, keep searching backwards
|
|
440
|
+
}
|
|
441
|
+
start = output.lastIndexOf('{', start - 1);
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
// 3. Last resort: simplistic extraction (original behavior)
|
|
446
|
+
const firstStart = output.indexOf('{');
|
|
447
|
+
if (firstStart !== -1 && end !== -1 && end > firstStart) {
|
|
448
|
+
try {
|
|
449
|
+
const candidate = output.substring(firstStart, end + 1);
|
|
450
|
+
const json = JSON.parse(candidate);
|
|
451
|
+
return this.validateAndReturn(json);
|
|
452
|
+
} catch {
|
|
453
|
+
// Ignore
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
return { status: 'error', message: 'No valid JSON object found in output' };
|
|
458
|
+
|
|
459
|
+
} catch (error: any) {
|
|
460
|
+
return { status: 'error', message: `Failed to parse JSON output: ${error.message}` };
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
private validateAndReturn(json: any): { status: 'pass' | 'fail' | 'error'; message: string; json?: any } {
|
|
465
|
+
// Validate Schema
|
|
466
|
+
if (!json.status || (json.status !== 'pass' && json.status !== 'fail')) {
|
|
467
|
+
return { status: 'error', message: 'Invalid JSON: missing or invalid "status" field', json };
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
if (json.status === 'pass') {
|
|
471
|
+
return { status: 'pass', message: json.message || 'Passed', json };
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
// json.status === 'fail'
|
|
475
|
+
const violationCount = Array.isArray(json.violations) ? json.violations.length : 'some';
|
|
476
|
+
|
|
477
|
+
// Construct a summary message
|
|
478
|
+
let msg = `Found ${violationCount} violations`;
|
|
479
|
+
if (Array.isArray(json.violations) && json.violations.length > 0) {
|
|
480
|
+
const first = json.violations[0];
|
|
481
|
+
msg += `. Example: ${first.issue} in ${first.file}`;
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
return { status: 'fail', message: msg, json };
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
private parseLines(stdout: string): string[] {
|
|
488
|
+
return stdout
|
|
489
|
+
.split('\n')
|
|
490
|
+
.map(line => line.trim())
|
|
491
|
+
.filter(line => line.length > 0);
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
private pathArg(entryPointPath: string): string {
|
|
495
|
+
return ` -- ${this.quoteArg(entryPointPath)}`;
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
private quoteArg(value: string): string {
|
|
499
|
+
return `"${value.replace(/(["\\$`])/g, '\\$1')}"`;
|
|
500
|
+
}
|
|
501
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
import { Command } from 'commander';
|
|
3
|
+
import {
|
|
4
|
+
registerRunCommand,
|
|
5
|
+
registerRerunCommand,
|
|
6
|
+
registerCheckCommand,
|
|
7
|
+
registerReviewCommand,
|
|
8
|
+
registerDetectCommand,
|
|
9
|
+
registerListCommand,
|
|
10
|
+
registerHealthCommand,
|
|
11
|
+
registerInitCommand,
|
|
12
|
+
registerHelpCommand,
|
|
13
|
+
} from './commands/index.js';
|
|
14
|
+
|
|
15
|
+
const program = new Command();
|
|
16
|
+
|
|
17
|
+
program
|
|
18
|
+
.name('agent-gauntlet')
|
|
19
|
+
.description('AI-assisted quality gates')
|
|
20
|
+
.version('0.1.0');
|
|
21
|
+
|
|
22
|
+
// Register all commands
|
|
23
|
+
registerRunCommand(program);
|
|
24
|
+
registerRerunCommand(program);
|
|
25
|
+
registerCheckCommand(program);
|
|
26
|
+
registerReviewCommand(program);
|
|
27
|
+
registerDetectCommand(program);
|
|
28
|
+
registerListCommand(program);
|
|
29
|
+
registerHealthCommand(program);
|
|
30
|
+
registerInitCommand(program);
|
|
31
|
+
registerHelpCommand(program);
|
|
32
|
+
|
|
33
|
+
// Default action: help
|
|
34
|
+
if (process.argv.length < 3) {
|
|
35
|
+
process.argv.push('help');
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
program.parse(process.argv);
|