agent-gauntlet 1.2.0 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +195 -122
- package/dist/index.js.map +8 -8
- package/package.json +1 -1
- package/skills/gauntlet-commit/SKILL.md +7 -3
- package/skills/gauntlet-run/SKILL.md +15 -19
package/dist/index.js
CHANGED
|
@@ -299,7 +299,7 @@ import { Command } from "commander";
|
|
|
299
299
|
// package.json
|
|
300
300
|
var package_default = {
|
|
301
301
|
name: "agent-gauntlet",
|
|
302
|
-
version: "1.2.
|
|
302
|
+
version: "1.2.2",
|
|
303
303
|
description: "A CLI tool for testing AI coding agents",
|
|
304
304
|
license: "MIT",
|
|
305
305
|
author: "Paul Caplan",
|
|
@@ -4105,6 +4105,10 @@ async function gitExec(args) {
|
|
|
4105
4105
|
});
|
|
4106
4106
|
return stdout;
|
|
4107
4107
|
}
|
|
4108
|
+
function splitLines(output) {
|
|
4109
|
+
return output.split(`
|
|
4110
|
+
`).map((f) => f.replace(/\r$/, "")).filter((f) => f.length > 0);
|
|
4111
|
+
}
|
|
4108
4112
|
async function computeDiffStats(baseBranch, options = {}) {
|
|
4109
4113
|
if (options.commit) {
|
|
4110
4114
|
return computeCommitDiffStats(options.commit);
|
|
@@ -4175,8 +4179,7 @@ async function computeUncommittedDiffStats() {
|
|
|
4175
4179
|
"--others",
|
|
4176
4180
|
"--exclude-standard"
|
|
4177
4181
|
]);
|
|
4178
|
-
const untrackedFiles = untrackedList
|
|
4179
|
-
`).map((f) => f.trim()).filter((f) => f.length > 0);
|
|
4182
|
+
const untrackedFiles = splitLines(untrackedList);
|
|
4180
4183
|
return {
|
|
4181
4184
|
baseRef: "uncommitted",
|
|
4182
4185
|
total: stagedFiles.total + unstagedFiles.total + untrackedFiles.length - countOverlap(stagedStatus, unstagedStatus),
|
|
@@ -4194,62 +4197,71 @@ async function getStashUntrackedFiles(fixBase, pathFilter) {
|
|
|
4194
4197
|
args.push("--", pathFilter);
|
|
4195
4198
|
}
|
|
4196
4199
|
const treeFiles = await gitExec(args);
|
|
4197
|
-
return new Set(treeFiles
|
|
4198
|
-
`).map((f) => f.trim()).filter((f) => f.length > 0));
|
|
4200
|
+
return new Set(splitLines(treeFiles));
|
|
4199
4201
|
} catch {
|
|
4200
4202
|
return new Set;
|
|
4201
4203
|
}
|
|
4202
4204
|
}
|
|
4203
|
-
async function
|
|
4204
|
-
|
|
4205
|
-
|
|
4205
|
+
async function getUnchangedCommittedFromStash(fixBaseUntrackedFiles, currentUntrackedSet, fixBase) {
|
|
4206
|
+
const committed = [...fixBaseUntrackedFiles].filter((f) => !currentUntrackedSet.has(f));
|
|
4207
|
+
if (committed.length === 0)
|
|
4208
|
+
return new Set;
|
|
4209
|
+
const unchanged = new Set;
|
|
4210
|
+
for (const file of committed) {
|
|
4206
4211
|
try {
|
|
4207
4212
|
const [oldHash, newHash] = await Promise.all([
|
|
4208
4213
|
gitExec(["rev-parse", `${fixBase}^3:${file}`]),
|
|
4209
4214
|
gitExec(["hash-object", "--", file])
|
|
4210
4215
|
]);
|
|
4211
|
-
if (oldHash.trim()
|
|
4212
|
-
|
|
4213
|
-
|
|
4214
|
-
} catch {
|
|
4215
|
-
changed++;
|
|
4216
|
-
}
|
|
4216
|
+
if (oldHash.trim() === newHash.trim())
|
|
4217
|
+
unchanged.add(file);
|
|
4218
|
+
} catch {}
|
|
4217
4219
|
}
|
|
4218
|
-
return
|
|
4220
|
+
return unchanged;
|
|
4219
4221
|
}
|
|
4220
4222
|
async function computeFixBaseDiffStats(fixBase) {
|
|
4221
4223
|
try {
|
|
4222
|
-
const
|
|
4223
|
-
|
|
4224
|
-
|
|
4225
|
-
|
|
4226
|
-
const
|
|
4227
|
-
|
|
4224
|
+
const [numstatRaw, nameStatusRaw] = await Promise.all([
|
|
4225
|
+
gitExec(["diff", "--numstat", fixBase]),
|
|
4226
|
+
gitExec(["diff", "--name-status", fixBase])
|
|
4227
|
+
]);
|
|
4228
|
+
const raw = await gitExec(["ls-files", "--others", "--exclude-standard"]);
|
|
4229
|
+
const currentUntrackedList = splitLines(raw);
|
|
4230
|
+
const currentUntrackedSet = new Set(currentUntrackedList);
|
|
4228
4231
|
let fixBaseTrackedFiles;
|
|
4229
4232
|
try {
|
|
4230
|
-
const
|
|
4231
|
-
|
|
4232
|
-
"-r",
|
|
4233
|
-
"--name-only",
|
|
4234
|
-
fixBase
|
|
4235
|
-
]);
|
|
4236
|
-
fixBaseTrackedFiles = new Set(treeFiles.split(`
|
|
4237
|
-
`).map((f) => f.trim()).filter((f) => f.length > 0));
|
|
4233
|
+
const t = await gitExec(["ls-tree", "-r", "--name-only", fixBase]);
|
|
4234
|
+
fixBaseTrackedFiles = new Set(splitLines(t));
|
|
4238
4235
|
} catch {
|
|
4239
4236
|
fixBaseTrackedFiles = new Set;
|
|
4240
4237
|
}
|
|
4241
4238
|
const fixBaseUntrackedFiles = await getStashUntrackedFiles(fixBase);
|
|
4239
|
+
const unchangedCommittedFromStash = await getUnchangedCommittedFromStash(fixBaseUntrackedFiles, currentUntrackedSet, fixBase);
|
|
4240
|
+
const fileStats = parseNameStatus(nameStatusRaw, unchangedCommittedFromStash);
|
|
4241
|
+
const lineStats = parseNumstat(numstatRaw, unchangedCommittedFromStash);
|
|
4242
4242
|
const allSnapshotFiles = new Set([
|
|
4243
4243
|
...fixBaseTrackedFiles,
|
|
4244
4244
|
...fixBaseUntrackedFiles
|
|
4245
4245
|
]);
|
|
4246
|
-
const
|
|
4247
|
-
const
|
|
4248
|
-
|
|
4246
|
+
const newUntrackedCount = currentUntrackedList.filter((f) => !allSnapshotFiles.has(f)).length;
|
|
4247
|
+
const knownUntracked = currentUntrackedList.filter((f) => fixBaseUntrackedFiles.has(f));
|
|
4248
|
+
let changedKnownUntracked = 0;
|
|
4249
|
+
for (const file of knownUntracked) {
|
|
4250
|
+
try {
|
|
4251
|
+
const [o, n] = await Promise.all([
|
|
4252
|
+
gitExec(["rev-parse", `${fixBase}^3:${file}`]),
|
|
4253
|
+
gitExec(["hash-object", "--", file])
|
|
4254
|
+
]);
|
|
4255
|
+
if (o.trim() !== n.trim())
|
|
4256
|
+
changedKnownUntracked++;
|
|
4257
|
+
} catch {
|
|
4258
|
+
changedKnownUntracked++;
|
|
4259
|
+
}
|
|
4260
|
+
}
|
|
4249
4261
|
return {
|
|
4250
4262
|
baseRef: fixBase,
|
|
4251
|
-
total: fileStats.total +
|
|
4252
|
-
newFiles: fileStats.newFiles +
|
|
4263
|
+
total: fileStats.total + newUntrackedCount + changedKnownUntracked,
|
|
4264
|
+
newFiles: fileStats.newFiles + newUntrackedCount,
|
|
4253
4265
|
modifiedFiles: fileStats.modifiedFiles + changedKnownUntracked,
|
|
4254
4266
|
deletedFiles: fileStats.deletedFiles,
|
|
4255
4267
|
linesAdded: lineStats.linesAdded,
|
|
@@ -4321,8 +4333,7 @@ async function computeLocalDiffStats(baseBranch) {
|
|
|
4321
4333
|
"--others",
|
|
4322
4334
|
"--exclude-standard"
|
|
4323
4335
|
]);
|
|
4324
|
-
const untrackedFiles = untrackedList
|
|
4325
|
-
`).map((f) => f.trim()).filter((f) => f.length > 0);
|
|
4336
|
+
const untrackedFiles = splitLines(untrackedList);
|
|
4326
4337
|
const totalNew = committedFiles.newFiles + uncommittedFiles.newFiles + untrackedFiles.length;
|
|
4327
4338
|
const totalModified = committedFiles.modifiedFiles + uncommittedFiles.modifiedFiles;
|
|
4328
4339
|
const totalDeleted = committedFiles.deletedFiles + uncommittedFiles.deletedFiles;
|
|
@@ -4336,7 +4347,12 @@ async function computeLocalDiffStats(baseBranch) {
|
|
|
4336
4347
|
linesRemoved: committedLines.linesRemoved + uncommittedLines.linesRemoved
|
|
4337
4348
|
};
|
|
4338
4349
|
}
|
|
4339
|
-
function
|
|
4350
|
+
function parseNumstatValue(val) {
|
|
4351
|
+
if (!val || val === "-")
|
|
4352
|
+
return 0;
|
|
4353
|
+
return parseInt(val, 10) || 0;
|
|
4354
|
+
}
|
|
4355
|
+
function parseNumstat(output, excludeFiles) {
|
|
4340
4356
|
let linesAdded = 0;
|
|
4341
4357
|
let linesRemoved = 0;
|
|
4342
4358
|
for (const line of output.split(`
|
|
@@ -4346,18 +4362,15 @@ function parseNumstat(output) {
|
|
|
4346
4362
|
const parts = line.split("\t");
|
|
4347
4363
|
if (parts.length < 3)
|
|
4348
4364
|
continue;
|
|
4349
|
-
const
|
|
4350
|
-
|
|
4351
|
-
|
|
4352
|
-
|
|
4353
|
-
|
|
4354
|
-
if (removed && removed !== "-") {
|
|
4355
|
-
linesRemoved += parseInt(removed, 10) || 0;
|
|
4356
|
-
}
|
|
4365
|
+
const file = parts[2];
|
|
4366
|
+
if (excludeFiles && file && excludeFiles.has(file.trim()))
|
|
4367
|
+
continue;
|
|
4368
|
+
linesAdded += parseNumstatValue(parts[0]);
|
|
4369
|
+
linesRemoved += parseNumstatValue(parts[1]);
|
|
4357
4370
|
}
|
|
4358
4371
|
return { linesAdded, linesRemoved };
|
|
4359
4372
|
}
|
|
4360
|
-
function parseNameStatus(output) {
|
|
4373
|
+
function parseNameStatus(output, excludeFiles) {
|
|
4361
4374
|
let newFiles = 0;
|
|
4362
4375
|
let modifiedFiles = 0;
|
|
4363
4376
|
let deletedFiles = 0;
|
|
@@ -4365,7 +4378,11 @@ function parseNameStatus(output) {
|
|
|
4365
4378
|
`)) {
|
|
4366
4379
|
if (!line.trim())
|
|
4367
4380
|
continue;
|
|
4368
|
-
const
|
|
4381
|
+
const parts = line.split("\t");
|
|
4382
|
+
const status = parts[0]?.[0];
|
|
4383
|
+
const file = parts[1]?.trim();
|
|
4384
|
+
if (excludeFiles && file && excludeFiles.has(file))
|
|
4385
|
+
continue;
|
|
4369
4386
|
switch (status) {
|
|
4370
4387
|
case "A":
|
|
4371
4388
|
newFiles++;
|
|
@@ -4554,18 +4571,59 @@ async function diffFallbackToDevNull(file) {
|
|
|
4554
4571
|
return placeholderDiff(file);
|
|
4555
4572
|
}
|
|
4556
4573
|
}
|
|
4574
|
+
function filterDiffByExcludeSet(diff, excludeFiles) {
|
|
4575
|
+
if (excludeFiles.size === 0)
|
|
4576
|
+
return diff;
|
|
4577
|
+
const lines = diff.split(`
|
|
4578
|
+
`);
|
|
4579
|
+
const kept = [];
|
|
4580
|
+
let skip = false;
|
|
4581
|
+
for (const line of lines) {
|
|
4582
|
+
if (line.startsWith("diff --git ")) {
|
|
4583
|
+
const match = /^diff --git (?:"a\/(.+?)"|a\/(\S+)) (?:"b\/.*?"|b\/\S+)$/.exec(line);
|
|
4584
|
+
const file = match?.[1] ?? match?.[2];
|
|
4585
|
+
skip = file !== undefined && excludeFiles.has(file);
|
|
4586
|
+
}
|
|
4587
|
+
if (!skip) {
|
|
4588
|
+
kept.push(line);
|
|
4589
|
+
}
|
|
4590
|
+
}
|
|
4591
|
+
while (kept.length > 0 && kept[kept.length - 1] === "") {
|
|
4592
|
+
kept.pop();
|
|
4593
|
+
}
|
|
4594
|
+
return kept.join(`
|
|
4595
|
+
`);
|
|
4596
|
+
}
|
|
4597
|
+
async function getUnchangedCommittedFromStashForDiff(snapshotUntrackedFiles, currentUntracked, fixBase) {
|
|
4598
|
+
const committedFromStash = [...snapshotUntrackedFiles].filter((f) => !currentUntracked.has(f));
|
|
4599
|
+
if (committedFromStash.length === 0) {
|
|
4600
|
+
return new Set;
|
|
4601
|
+
}
|
|
4602
|
+
const unchanged = new Set;
|
|
4603
|
+
for (const file of committedFromStash) {
|
|
4604
|
+
try {
|
|
4605
|
+
const unchanged_ = !await hasFileChangedSinceStash(file, fixBase);
|
|
4606
|
+
if (unchanged_) {
|
|
4607
|
+
unchanged.add(file);
|
|
4608
|
+
}
|
|
4609
|
+
} catch {}
|
|
4610
|
+
}
|
|
4611
|
+
return unchanged;
|
|
4612
|
+
}
|
|
4557
4613
|
async function getFixBaseDiff(entryPointPath, fixBase) {
|
|
4558
4614
|
if (!/^[a-f0-9]+$/.test(fixBase)) {
|
|
4559
4615
|
throw new Error(`Invalid session ref: ${fixBase}`);
|
|
4560
4616
|
}
|
|
4561
4617
|
const pArg = pathArg(entryPointPath);
|
|
4562
4618
|
try {
|
|
4563
|
-
const
|
|
4619
|
+
const rawDiff = await execDiff(`git diff ${fixBase}${pArg}`);
|
|
4564
4620
|
const { stdout: untrackedStdout } = await execAsync8(`git ls-files --others --exclude-standard${pArg}`, { maxBuffer: MAX_BUFFER_BYTES });
|
|
4565
4621
|
const currentUntracked = new Set(parseLines(untrackedStdout));
|
|
4566
4622
|
const { stdout: snapshotFilesStdout } = await execAsync8(`git ls-tree -r --name-only ${fixBase}${pArg}`, { maxBuffer: MAX_BUFFER_BYTES });
|
|
4567
4623
|
const snapshotTrackedFiles = new Set(parseLines(snapshotFilesStdout));
|
|
4568
4624
|
const snapshotUntrackedFiles = await getStashUntrackedFiles(fixBase, entryPointPath);
|
|
4625
|
+
const unchangedCommittedFromStash = await getUnchangedCommittedFromStashForDiff(snapshotUntrackedFiles, currentUntracked, fixBase);
|
|
4626
|
+
const diff = filterDiffByExcludeSet(rawDiff, unchangedCommittedFromStash);
|
|
4569
4627
|
const allSnapshotFiles = new Set([
|
|
4570
4628
|
...snapshotTrackedFiles,
|
|
4571
4629
|
...snapshotUntrackedFiles
|
|
@@ -4577,7 +4635,7 @@ async function getFixBaseDiff(entryPointPath, fixBase) {
|
|
|
4577
4635
|
const scopedDiff = [diff, ...newUntrackedDiffs, ...knownUntrackedDiffs].filter(Boolean).join(`
|
|
4578
4636
|
`);
|
|
4579
4637
|
log4.debug(`Scoped diff via fixBase: ${scopedDiff.split(`
|
|
4580
|
-
`).length} lines (${newUntracked.length} new, ${knownUntracked.length} known untracked)`);
|
|
4638
|
+
`).length} lines (${newUntracked.length} new, ${knownUntracked.length} known untracked, ${unchangedCommittedFromStash.size} committed-from-stash excluded)`);
|
|
4581
4639
|
return scopedDiff;
|
|
4582
4640
|
} catch (error) {
|
|
4583
4641
|
log4.warn(`Failed to compute diff against fixBase ${fixBase}, falling back to full uncommitted diff. ${error instanceof Error ? error.message : error}`);
|
|
@@ -7407,7 +7465,7 @@ function registerDetectCommand(program) {
|
|
|
7407
7465
|
const changes = await changeDetector.getChangedFiles();
|
|
7408
7466
|
if (changes.length === 0) {
|
|
7409
7467
|
console.log(chalk6.green("No changes detected."));
|
|
7410
|
-
|
|
7468
|
+
process.exit(2);
|
|
7411
7469
|
}
|
|
7412
7470
|
console.log(chalk6.dim(`Found ${changes.length} changed files:`));
|
|
7413
7471
|
for (const file of changes) {
|
|
@@ -7418,7 +7476,7 @@ function registerDetectCommand(program) {
|
|
|
7418
7476
|
const jobs = jobGen.generateJobs(entryPoints);
|
|
7419
7477
|
if (jobs.length === 0) {
|
|
7420
7478
|
console.log(chalk6.yellow("No applicable gates for these changes."));
|
|
7421
|
-
|
|
7479
|
+
process.exit(2);
|
|
7422
7480
|
}
|
|
7423
7481
|
console.log(chalk6.bold(`Would run ${jobs.length} gate(s):
|
|
7424
7482
|
`));
|
|
@@ -8121,16 +8179,22 @@ function registerHelpCommand(program) {
|
|
|
8121
8179
|
`);
|
|
8122
8180
|
console.log(chalk8.bold(`Commands:
|
|
8123
8181
|
`));
|
|
8124
|
-
console.log(" run
|
|
8125
|
-
console.log(" check
|
|
8126
|
-
console.log(" review
|
|
8127
|
-
console.log(" clean
|
|
8128
|
-
console.log(" detect
|
|
8129
|
-
console.log(" list
|
|
8130
|
-
console.log(" health
|
|
8131
|
-
console.log(" init
|
|
8132
|
-
console.log("
|
|
8133
|
-
console.log(
|
|
8182
|
+
console.log(" run Run gates for detected changes");
|
|
8183
|
+
console.log(" check Run only applicable checks");
|
|
8184
|
+
console.log(" review Run only applicable reviews");
|
|
8185
|
+
console.log(" clean Archive logs (move current logs into previous/)");
|
|
8186
|
+
console.log(" detect Show what gates would run (without executing them)");
|
|
8187
|
+
console.log(" list List configured gates");
|
|
8188
|
+
console.log(" health Check CLI tool availability");
|
|
8189
|
+
console.log(" init Initialize .gauntlet configuration");
|
|
8190
|
+
console.log(" validate Validate .gauntlet/ config files against schemas");
|
|
8191
|
+
console.log(" skip Advance execution state baseline without running gates");
|
|
8192
|
+
console.log(" status Show a summary of the most recent gauntlet session");
|
|
8193
|
+
console.log(" review-audit Audit review execution from the debug log (--date or --since)");
|
|
8194
|
+
console.log(" ci CI integration commands (init, list-jobs)");
|
|
8195
|
+
console.log(" start-hook Session start hook (primes agent with verification instructions)");
|
|
8196
|
+
console.log(" stop-hook Claude Code stop hook (validates gauntlet completion)");
|
|
8197
|
+
console.log(` help Show this help message
|
|
8134
8198
|
`);
|
|
8135
8199
|
console.log("For more information, see: https://github.com/your-repo/agent-gauntlet");
|
|
8136
8200
|
console.log("Or run: agent-gauntlet <command> --help");
|
|
@@ -8798,12 +8862,11 @@ var parseDuration = (d) => {
|
|
|
8798
8862
|
return val;
|
|
8799
8863
|
};
|
|
8800
8864
|
function getLogDir3(cwd) {
|
|
8801
|
-
const configPath = path31.join(cwd, ".gauntlet", "config.yml");
|
|
8802
8865
|
try {
|
|
8803
|
-
const
|
|
8804
|
-
const
|
|
8805
|
-
if (
|
|
8806
|
-
return
|
|
8866
|
+
const cfg = path31.join(cwd, ".gauntlet", "config.yml");
|
|
8867
|
+
const m = fs33.readFileSync(cfg, "utf-8").match(/^log_dir:\s*(.+)$/m);
|
|
8868
|
+
if (m?.[1])
|
|
8869
|
+
return m[1].trim().replace(/^["']|["']$/g, "");
|
|
8807
8870
|
} catch {}
|
|
8808
8871
|
return "gauntlet_logs";
|
|
8809
8872
|
}
|
|
@@ -8820,8 +8883,7 @@ function handleRunStart(ts, body) {
|
|
|
8820
8883
|
};
|
|
8821
8884
|
}
|
|
8822
8885
|
function handleGateResult(current, body) {
|
|
8823
|
-
const
|
|
8824
|
-
const gateId = gateIdMatch?.[1] ?? "";
|
|
8886
|
+
const gateId = body.match(/^(\S+)/)?.[1] ?? "";
|
|
8825
8887
|
if (!gateId.startsWith("review:"))
|
|
8826
8888
|
return;
|
|
8827
8889
|
const kv = parseKeyValue2(body);
|
|
@@ -8830,7 +8892,8 @@ function handleGateResult(current, body) {
|
|
|
8830
8892
|
reviewType: gateId.split(":").at(-1) ?? "other",
|
|
8831
8893
|
cli: kv.cli,
|
|
8832
8894
|
durationS: parseDuration(kv.duration ?? "0s"),
|
|
8833
|
-
violations: safeNum(kv.violations)
|
|
8895
|
+
violations: safeNum(kv.violations),
|
|
8896
|
+
violationsFixed: 0
|
|
8834
8897
|
});
|
|
8835
8898
|
} else {
|
|
8836
8899
|
current.priorPassSkips++;
|
|
@@ -8854,22 +8917,19 @@ function handleTelemetry(current, body) {
|
|
|
8854
8917
|
}
|
|
8855
8918
|
function handleRunEnd(current, body) {
|
|
8856
8919
|
const kv = parseKeyValue2(body);
|
|
8857
|
-
current.end = {
|
|
8858
|
-
status: kv.status ?? "unknown",
|
|
8859
|
-
fixed: safeNum(kv.fixed),
|
|
8860
|
-
skipped: safeNum(kv.skipped),
|
|
8861
|
-
failed: safeNum(kv.failed)
|
|
8862
|
-
};
|
|
8920
|
+
current.end = { status: kv.status ?? "unknown", failed: safeNum(kv.failed) };
|
|
8863
8921
|
}
|
|
8864
8922
|
var emptyStat = () => ({
|
|
8865
8923
|
count: 0,
|
|
8866
8924
|
totalDuration: 0,
|
|
8867
|
-
totalViolations: 0
|
|
8925
|
+
totalViolations: 0,
|
|
8926
|
+
totalViolationsFixed: 0
|
|
8868
8927
|
});
|
|
8869
8928
|
function addGate(s, g) {
|
|
8870
8929
|
s.count++;
|
|
8871
8930
|
s.totalDuration += g.durationS;
|
|
8872
8931
|
s.totalViolations += g.violations;
|
|
8932
|
+
s.totalViolationsFixed += g.violationsFixed;
|
|
8873
8933
|
}
|
|
8874
8934
|
function getOrCreate(map, key, init) {
|
|
8875
8935
|
if (!map.has(key))
|
|
@@ -8879,7 +8939,8 @@ function getOrCreate(map, key, init) {
|
|
|
8879
8939
|
var REVIEW_TYPES = ["code-quality", "task-compliance", "artifact-review"];
|
|
8880
8940
|
function accumulateBlock(block, a) {
|
|
8881
8941
|
for (const g of block.reviewGates) {
|
|
8882
|
-
const inner =
|
|
8942
|
+
const inner = a.cells.get(g.reviewType) ?? new Map;
|
|
8943
|
+
a.cells.set(g.reviewType, inner);
|
|
8883
8944
|
addGate(getOrCreate(inner, g.cli, emptyStat), g);
|
|
8884
8945
|
addGate(getOrCreate(a.cliTotals, g.cli, emptyStat), g);
|
|
8885
8946
|
addGate(getOrCreate(a.typeTotals, g.reviewType, emptyStat), g);
|
|
@@ -8956,10 +9017,9 @@ var capitalize = (s) => s.charAt(0).toUpperCase() + s.slice(1);
|
|
|
8956
9017
|
var fmtType = (t) => t.split("-").map(capitalize).join("-");
|
|
8957
9018
|
function formatCrossTable(title, rowLabels, colLabels, cell, rowTotal, colTotal, grandTotal) {
|
|
8958
9019
|
const rlW = Math.max(17, ...rowLabels.map((r) => fmtType(r).length)) + 2;
|
|
8959
|
-
const
|
|
8960
|
-
const
|
|
8961
|
-
const
|
|
8962
|
-
const totalRow = padRight("Total", rlW) + colLabels.map((c) => padRight(colTotal(c), cW)).join("") + grandTotal;
|
|
9020
|
+
const hdr = padRight("", rlW) + colLabels.map((c) => padRight(capitalize(c), 12)).join("") + "Total";
|
|
9021
|
+
const rows = rowLabels.map((r) => padRight(fmtType(r), rlW) + colLabels.map((c) => padRight(cell(r, c), 12)).join("") + rowTotal(r));
|
|
9022
|
+
const totalRow = padRight("Total", rlW) + colLabels.map((c) => padRight(colTotal(c), 12)).join("") + grandTotal;
|
|
8963
9023
|
return [title, hdr, ...rows, totalRow, ""];
|
|
8964
9024
|
}
|
|
8965
9025
|
function formatRunCounts(ct) {
|
|
@@ -8980,6 +9040,10 @@ function formatViolations(ct) {
|
|
|
8980
9040
|
const avg = (s) => s && s.count > 0 ? (s.totalViolations / s.count).toFixed(2) : "n/a";
|
|
8981
9041
|
return formatCrossTable("=== Violations (avg per run) ===", ct.allTypes, ct.allClis, (t, c) => avg(ct.cells.get(t)?.get(c)), (t) => avg(ct.typeTotals.get(t)), (c) => avg(ct.cliTotals.get(c)), avg(ct.grandTotal));
|
|
8982
9042
|
}
|
|
9043
|
+
function formatViolationsFixed(ct) {
|
|
9044
|
+
const avg = (s) => s && s.count > 0 ? (s.totalViolationsFixed / s.count).toFixed(2) : "n/a";
|
|
9045
|
+
return formatCrossTable("=== Violations Fixed (avg per run) ===", ct.allTypes, ct.allClis, (t, c) => avg(ct.cells.get(t)?.get(c)), (t) => avg(ct.typeTotals.get(t)), (c) => avg(ct.cliTotals.get(c)), avg(ct.grandTotal));
|
|
9046
|
+
}
|
|
8983
9047
|
function formatTokenEntry(t, totalRuns) {
|
|
8984
9048
|
const out = [
|
|
8985
9049
|
`${capitalize(t.adapter)} (${t.runsWithTelemetry} of ${totalRuns} runs had telemetry):`
|
|
@@ -9013,35 +9077,30 @@ function formatTokenUsage(ts, m) {
|
|
|
9013
9077
|
...ts.flatMap((t) => formatTokenEntry(t, m.get(t.adapter) ?? t.runsWithTelemetry))
|
|
9014
9078
|
];
|
|
9015
9079
|
}
|
|
9016
|
-
function
|
|
9017
|
-
const
|
|
9018
|
-
|
|
9019
|
-
|
|
9020
|
-
|
|
9021
|
-
|
|
9022
|
-
|
|
9023
|
-
const priorPass = blocks.reduce((s, b) => s + b.priorPassSkips, 0);
|
|
9024
|
-
const lines = [
|
|
9025
|
-
"=== Fix / Skip ===",
|
|
9026
|
-
`Gauntlet runs: ${total} total (${passed} passed, ${total - passed} failed)`,
|
|
9027
|
-
` Violations fixed: ${fixed}`,
|
|
9028
|
-
` Violations skipped: ${skipped}`,
|
|
9029
|
-
` Gates failed: ${failed}`,
|
|
9030
|
-
` Review gates skipped (prior pass): ${priorPass}`
|
|
9080
|
+
function formatSummary(blocks, ct, totalFixed) {
|
|
9081
|
+
const total = blocks.filter((b) => b.end).length;
|
|
9082
|
+
return [
|
|
9083
|
+
"=== Summary ===",
|
|
9084
|
+
`Gauntlet rounds: ${total}`,
|
|
9085
|
+
` Total gate executions: ${ct.grandTotal.count}`,
|
|
9086
|
+
` Total review issues fixed: ${totalFixed}`
|
|
9031
9087
|
];
|
|
9032
|
-
|
|
9033
|
-
|
|
9034
|
-
|
|
9035
|
-
|
|
9036
|
-
|
|
9037
|
-
|
|
9038
|
-
|
|
9088
|
+
}
|
|
9089
|
+
function computeViolationsFixed(blocks) {
|
|
9090
|
+
blocks.slice(1).forEach((cur, j) => {
|
|
9091
|
+
for (const g of cur.reviewGates) {
|
|
9092
|
+
const p = blocks[j]?.reviewGates.find((pg) => pg.reviewType === g.reviewType && pg.cli === g.cli);
|
|
9093
|
+
if (p && p.violations > g.violations)
|
|
9094
|
+
g.violationsFixed = p.violations - g.violations;
|
|
9095
|
+
}
|
|
9096
|
+
});
|
|
9039
9097
|
}
|
|
9040
9098
|
function formatAuditReport(blocks, date) {
|
|
9041
9099
|
if (blocks.length === 0)
|
|
9042
9100
|
return `Review Execution Audit — ${date}
|
|
9043
9101
|
|
|
9044
9102
|
No gauntlet runs found for this date.`;
|
|
9103
|
+
computeViolationsFixed(blocks);
|
|
9045
9104
|
const ct = buildCrossTab(blocks);
|
|
9046
9105
|
const tokenStats = aggregateTokenStats(blocks);
|
|
9047
9106
|
const cliBlockCount = new Map;
|
|
@@ -9055,29 +9114,27 @@ No gauntlet runs found for this date.`;
|
|
|
9055
9114
|
...formatRunCounts(ct),
|
|
9056
9115
|
...formatTiming(ct),
|
|
9057
9116
|
...formatViolations(ct),
|
|
9117
|
+
...formatViolationsFixed(ct),
|
|
9058
9118
|
...formatTokenUsage(tokenStats, cliBlockCount),
|
|
9059
|
-
...
|
|
9119
|
+
...formatSummary(blocks, ct, ct.grandTotal.totalViolationsFixed)
|
|
9060
9120
|
].join(`
|
|
9061
9121
|
`);
|
|
9062
9122
|
}
|
|
9063
9123
|
function todayLocalDate() {
|
|
9064
9124
|
const now = new Date;
|
|
9065
|
-
const y = now.getFullYear();
|
|
9066
9125
|
const mo = String(now.getMonth() + 1).padStart(2, "0");
|
|
9067
|
-
const
|
|
9068
|
-
return `${
|
|
9126
|
+
const dy = String(now.getDate()).padStart(2, "0");
|
|
9127
|
+
return `${now.getFullYear()}-${mo}-${dy}`;
|
|
9069
9128
|
}
|
|
9070
|
-
async function readBlocks(filePath,
|
|
9071
|
-
const rl = readline.createInterface({
|
|
9072
|
-
input: fs33.createReadStream(filePath)
|
|
9073
|
-
});
|
|
9129
|
+
async function readBlocks(filePath, matchDate) {
|
|
9130
|
+
const rl = readline.createInterface({ input: fs33.createReadStream(filePath) });
|
|
9074
9131
|
const blocks = [];
|
|
9075
9132
|
let current = null;
|
|
9076
9133
|
for await (const line of rl) {
|
|
9077
9134
|
if (!line.trim())
|
|
9078
9135
|
continue;
|
|
9079
9136
|
const ts = parseTimestamp2(line);
|
|
9080
|
-
if (!ts.
|
|
9137
|
+
if (!matchDate(ts.slice(0, 10)))
|
|
9081
9138
|
continue;
|
|
9082
9139
|
const event = parseEventType2(line);
|
|
9083
9140
|
const body = parseEventBody2(line);
|
|
@@ -9097,20 +9154,32 @@ async function readBlocks(filePath, date) {
|
|
|
9097
9154
|
}
|
|
9098
9155
|
return blocks;
|
|
9099
9156
|
}
|
|
9100
|
-
async function main2(date) {
|
|
9157
|
+
async function main2(date, since) {
|
|
9101
9158
|
const cwd = process.cwd();
|
|
9102
|
-
if (date &&
|
|
9159
|
+
if (date && since) {
|
|
9160
|
+
console.error("Use either --date or --since, not both.");
|
|
9161
|
+
process.exit(1);
|
|
9162
|
+
}
|
|
9163
|
+
const dateRe = /^\d{4}-\d{2}-\d{2}$/;
|
|
9164
|
+
if (date && !dateRe.test(date)) {
|
|
9103
9165
|
console.error("Invalid --date. Expected YYYY-MM-DD");
|
|
9104
9166
|
process.exit(1);
|
|
9105
9167
|
}
|
|
9106
|
-
|
|
9107
|
-
|
|
9168
|
+
if (since && !dateRe.test(since)) {
|
|
9169
|
+
console.error("Invalid --since. Expected YYYY-MM-DD");
|
|
9170
|
+
process.exit(1);
|
|
9171
|
+
}
|
|
9172
|
+
const logDir = getLogDir3(cwd);
|
|
9173
|
+
const debugLogPath = path31.join(cwd, logDir, ".debug.log");
|
|
9108
9174
|
if (!fs33.existsSync(debugLogPath)) {
|
|
9109
|
-
console.log(`No debug log found. (looked in ${
|
|
9175
|
+
console.log(`No debug log found. (looked in ${logDir}/.debug.log)`);
|
|
9110
9176
|
process.exit(0);
|
|
9111
9177
|
}
|
|
9112
|
-
const
|
|
9113
|
-
|
|
9178
|
+
const targetDate = since ?? date ?? todayLocalDate();
|
|
9179
|
+
const matchDate = since ? (d) => d >= since : (d) => d === targetDate;
|
|
9180
|
+
const label = since ? `${since} – ${todayLocalDate()}` : targetDate;
|
|
9181
|
+
const blocks = await readBlocks(debugLogPath, matchDate);
|
|
9182
|
+
console.log(formatAuditReport(blocks, label));
|
|
9114
9183
|
}
|
|
9115
9184
|
var isDirectRun2 = (import.meta.url === `file://${process.argv[1]}` || typeof Bun !== "undefined" && import.meta.url === `file://${Bun.main}`) && (process.argv[1]?.endsWith("review-audit.ts") || process.argv[1]?.endsWith("review-audit.js"));
|
|
9116
9185
|
if (isDirectRun2)
|
|
@@ -9118,8 +9187,12 @@ if (isDirectRun2)
|
|
|
9118
9187
|
|
|
9119
9188
|
// src/commands/review-audit.ts
|
|
9120
9189
|
function registerReviewAuditCommand(program) {
|
|
9121
|
-
program.command("review-audit").description("Audit review execution
|
|
9122
|
-
|
|
9190
|
+
program.command("review-audit").description("Audit review execution from the debug log (--date or --since)").option("--date <YYYY-MM-DD>", "Date to filter (default: today)").option("--since <YYYY-MM-DD>", "Include all runs from this date onwards").action(async (opts) => {
|
|
9191
|
+
if (opts.date && opts.since) {
|
|
9192
|
+
console.error("Use either --date or --since, not both.");
|
|
9193
|
+
process.exit(1);
|
|
9194
|
+
}
|
|
9195
|
+
await main2(opts.date, opts.since);
|
|
9123
9196
|
});
|
|
9124
9197
|
}
|
|
9125
9198
|
// src/core/run-executor-lock.ts
|
|
@@ -9717,4 +9790,4 @@ if (process.argv.length < 3) {
|
|
|
9717
9790
|
}
|
|
9718
9791
|
program.parse(process.argv);
|
|
9719
9792
|
|
|
9720
|
-
//# debugId=
|
|
9793
|
+
//# debugId=CF37A587784E2DFE64756E2164756E21
|