agent-gauntlet 1.1.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -299,7 +299,7 @@ import { Command } from "commander";
299
299
  // package.json
300
300
  var package_default = {
301
301
  name: "agent-gauntlet",
302
- version: "1.1.0",
302
+ version: "1.2.1",
303
303
  description: "A CLI tool for testing AI coding agents",
304
304
  license: "MIT",
305
305
  author: "Paul Caplan",
@@ -4105,6 +4105,10 @@ async function gitExec(args) {
4105
4105
  });
4106
4106
  return stdout;
4107
4107
  }
4108
+ function splitLines(output) {
4109
+ return output.split(`
4110
+ `).map((f) => f.replace(/\r$/, "")).filter((f) => f.length > 0);
4111
+ }
4108
4112
  async function computeDiffStats(baseBranch, options = {}) {
4109
4113
  if (options.commit) {
4110
4114
  return computeCommitDiffStats(options.commit);
@@ -4175,8 +4179,7 @@ async function computeUncommittedDiffStats() {
4175
4179
  "--others",
4176
4180
  "--exclude-standard"
4177
4181
  ]);
4178
- const untrackedFiles = untrackedList.split(`
4179
- `).map((f) => f.trim()).filter((f) => f.length > 0);
4182
+ const untrackedFiles = splitLines(untrackedList);
4180
4183
  return {
4181
4184
  baseRef: "uncommitted",
4182
4185
  total: stagedFiles.total + unstagedFiles.total + untrackedFiles.length - countOverlap(stagedStatus, unstagedStatus),
@@ -4194,62 +4197,71 @@ async function getStashUntrackedFiles(fixBase, pathFilter) {
4194
4197
  args.push("--", pathFilter);
4195
4198
  }
4196
4199
  const treeFiles = await gitExec(args);
4197
- return new Set(treeFiles.split(`
4198
- `).map((f) => f.trim()).filter((f) => f.length > 0));
4200
+ return new Set(splitLines(treeFiles));
4199
4201
  } catch {
4200
4202
  return new Set;
4201
4203
  }
4202
4204
  }
4203
- async function countChangedStashUntracked(files, fixBase) {
4204
- let changed = 0;
4205
- for (const file of files) {
4205
+ async function getUnchangedCommittedFromStash(fixBaseUntrackedFiles, currentUntrackedSet, fixBase) {
4206
+ const committed = [...fixBaseUntrackedFiles].filter((f) => !currentUntrackedSet.has(f));
4207
+ if (committed.length === 0)
4208
+ return new Set;
4209
+ const unchanged = new Set;
4210
+ for (const file of committed) {
4206
4211
  try {
4207
4212
  const [oldHash, newHash] = await Promise.all([
4208
4213
  gitExec(["rev-parse", `${fixBase}^3:${file}`]),
4209
4214
  gitExec(["hash-object", "--", file])
4210
4215
  ]);
4211
- if (oldHash.trim() !== newHash.trim()) {
4212
- changed++;
4213
- }
4214
- } catch {
4215
- changed++;
4216
- }
4216
+ if (oldHash.trim() === newHash.trim())
4217
+ unchanged.add(file);
4218
+ } catch {}
4217
4219
  }
4218
- return changed;
4220
+ return unchanged;
4219
4221
  }
4220
4222
  async function computeFixBaseDiffStats(fixBase) {
4221
4223
  try {
4222
- const numstat = await gitExec(["diff", "--numstat", fixBase]);
4223
- const lineStats = parseNumstat(numstat);
4224
- const nameStatus = await gitExec(["diff", "--name-status", fixBase]);
4225
- const fileStats = parseNameStatus(nameStatus);
4226
- const currentUntracked = (await gitExec(["ls-files", "--others", "--exclude-standard"])).split(`
4227
- `).map((f) => f.trim()).filter((f) => f.length > 0);
4224
+ const [numstatRaw, nameStatusRaw] = await Promise.all([
4225
+ gitExec(["diff", "--numstat", fixBase]),
4226
+ gitExec(["diff", "--name-status", fixBase])
4227
+ ]);
4228
+ const raw = await gitExec(["ls-files", "--others", "--exclude-standard"]);
4229
+ const currentUntrackedList = splitLines(raw);
4230
+ const currentUntrackedSet = new Set(currentUntrackedList);
4228
4231
  let fixBaseTrackedFiles;
4229
4232
  try {
4230
- const treeFiles = await gitExec([
4231
- "ls-tree",
4232
- "-r",
4233
- "--name-only",
4234
- fixBase
4235
- ]);
4236
- fixBaseTrackedFiles = new Set(treeFiles.split(`
4237
- `).map((f) => f.trim()).filter((f) => f.length > 0));
4233
+ const t = await gitExec(["ls-tree", "-r", "--name-only", fixBase]);
4234
+ fixBaseTrackedFiles = new Set(splitLines(t));
4238
4235
  } catch {
4239
4236
  fixBaseTrackedFiles = new Set;
4240
4237
  }
4241
4238
  const fixBaseUntrackedFiles = await getStashUntrackedFiles(fixBase);
4239
+ const unchangedCommittedFromStash = await getUnchangedCommittedFromStash(fixBaseUntrackedFiles, currentUntrackedSet, fixBase);
4240
+ const fileStats = parseNameStatus(nameStatusRaw, unchangedCommittedFromStash);
4241
+ const lineStats = parseNumstat(numstatRaw, unchangedCommittedFromStash);
4242
4242
  const allSnapshotFiles = new Set([
4243
4243
  ...fixBaseTrackedFiles,
4244
4244
  ...fixBaseUntrackedFiles
4245
4245
  ]);
4246
- const newUntrackedFiles = currentUntracked.filter((f) => !allSnapshotFiles.has(f));
4247
- const knownUntrackedFiles = currentUntracked.filter((f) => fixBaseUntrackedFiles.has(f));
4248
- const changedKnownUntracked = await countChangedStashUntracked(knownUntrackedFiles, fixBase);
4246
+ const newUntrackedCount = currentUntrackedList.filter((f) => !allSnapshotFiles.has(f)).length;
4247
+ const knownUntracked = currentUntrackedList.filter((f) => fixBaseUntrackedFiles.has(f));
4248
+ let changedKnownUntracked = 0;
4249
+ for (const file of knownUntracked) {
4250
+ try {
4251
+ const [o, n] = await Promise.all([
4252
+ gitExec(["rev-parse", `${fixBase}^3:${file}`]),
4253
+ gitExec(["hash-object", "--", file])
4254
+ ]);
4255
+ if (o.trim() !== n.trim())
4256
+ changedKnownUntracked++;
4257
+ } catch {
4258
+ changedKnownUntracked++;
4259
+ }
4260
+ }
4249
4261
  return {
4250
4262
  baseRef: fixBase,
4251
- total: fileStats.total + newUntrackedFiles.length + changedKnownUntracked,
4252
- newFiles: fileStats.newFiles + newUntrackedFiles.length,
4263
+ total: fileStats.total + newUntrackedCount + changedKnownUntracked,
4264
+ newFiles: fileStats.newFiles + newUntrackedCount,
4253
4265
  modifiedFiles: fileStats.modifiedFiles + changedKnownUntracked,
4254
4266
  deletedFiles: fileStats.deletedFiles,
4255
4267
  linesAdded: lineStats.linesAdded,
@@ -4321,8 +4333,7 @@ async function computeLocalDiffStats(baseBranch) {
4321
4333
  "--others",
4322
4334
  "--exclude-standard"
4323
4335
  ]);
4324
- const untrackedFiles = untrackedList.split(`
4325
- `).map((f) => f.trim()).filter((f) => f.length > 0);
4336
+ const untrackedFiles = splitLines(untrackedList);
4326
4337
  const totalNew = committedFiles.newFiles + uncommittedFiles.newFiles + untrackedFiles.length;
4327
4338
  const totalModified = committedFiles.modifiedFiles + uncommittedFiles.modifiedFiles;
4328
4339
  const totalDeleted = committedFiles.deletedFiles + uncommittedFiles.deletedFiles;
@@ -4336,7 +4347,12 @@ async function computeLocalDiffStats(baseBranch) {
4336
4347
  linesRemoved: committedLines.linesRemoved + uncommittedLines.linesRemoved
4337
4348
  };
4338
4349
  }
4339
- function parseNumstat(output) {
4350
+ function parseNumstatValue(val) {
4351
+ if (!val || val === "-")
4352
+ return 0;
4353
+ return parseInt(val, 10) || 0;
4354
+ }
4355
+ function parseNumstat(output, excludeFiles) {
4340
4356
  let linesAdded = 0;
4341
4357
  let linesRemoved = 0;
4342
4358
  for (const line of output.split(`
@@ -4346,18 +4362,15 @@ function parseNumstat(output) {
4346
4362
  const parts = line.split("\t");
4347
4363
  if (parts.length < 3)
4348
4364
  continue;
4349
- const added = parts[0];
4350
- const removed = parts[1];
4351
- if (added && added !== "-") {
4352
- linesAdded += parseInt(added, 10) || 0;
4353
- }
4354
- if (removed && removed !== "-") {
4355
- linesRemoved += parseInt(removed, 10) || 0;
4356
- }
4365
+ const file = parts[2];
4366
+ if (excludeFiles && file && excludeFiles.has(file.trim()))
4367
+ continue;
4368
+ linesAdded += parseNumstatValue(parts[0]);
4369
+ linesRemoved += parseNumstatValue(parts[1]);
4357
4370
  }
4358
4371
  return { linesAdded, linesRemoved };
4359
4372
  }
4360
- function parseNameStatus(output) {
4373
+ function parseNameStatus(output, excludeFiles) {
4361
4374
  let newFiles = 0;
4362
4375
  let modifiedFiles = 0;
4363
4376
  let deletedFiles = 0;
@@ -4365,7 +4378,11 @@ function parseNameStatus(output) {
4365
4378
  `)) {
4366
4379
  if (!line.trim())
4367
4380
  continue;
4368
- const status = line[0];
4381
+ const parts = line.split("\t");
4382
+ const status = parts[0]?.[0];
4383
+ const file = parts[1]?.trim();
4384
+ if (excludeFiles && file && excludeFiles.has(file))
4385
+ continue;
4369
4386
  switch (status) {
4370
4387
  case "A":
4371
4388
  newFiles++;
@@ -4554,18 +4571,59 @@ async function diffFallbackToDevNull(file) {
4554
4571
  return placeholderDiff(file);
4555
4572
  }
4556
4573
  }
4574
+ function filterDiffByExcludeSet(diff, excludeFiles) {
4575
+ if (excludeFiles.size === 0)
4576
+ return diff;
4577
+ const lines = diff.split(`
4578
+ `);
4579
+ const kept = [];
4580
+ let skip = false;
4581
+ for (const line of lines) {
4582
+ if (line.startsWith("diff --git ")) {
4583
+ const match = /^diff --git (?:"a\/(.+?)"|a\/(\S+)) (?:"b\/.*?"|b\/\S+)$/.exec(line);
4584
+ const file = match?.[1] ?? match?.[2];
4585
+ skip = file !== undefined && excludeFiles.has(file);
4586
+ }
4587
+ if (!skip) {
4588
+ kept.push(line);
4589
+ }
4590
+ }
4591
+ while (kept.length > 0 && kept[kept.length - 1] === "") {
4592
+ kept.pop();
4593
+ }
4594
+ return kept.join(`
4595
+ `);
4596
+ }
4597
+ async function getUnchangedCommittedFromStashForDiff(snapshotUntrackedFiles, currentUntracked, fixBase) {
4598
+ const committedFromStash = [...snapshotUntrackedFiles].filter((f) => !currentUntracked.has(f));
4599
+ if (committedFromStash.length === 0) {
4600
+ return new Set;
4601
+ }
4602
+ const unchanged = new Set;
4603
+ for (const file of committedFromStash) {
4604
+ try {
4605
+ const unchanged_ = !await hasFileChangedSinceStash(file, fixBase);
4606
+ if (unchanged_) {
4607
+ unchanged.add(file);
4608
+ }
4609
+ } catch {}
4610
+ }
4611
+ return unchanged;
4612
+ }
4557
4613
  async function getFixBaseDiff(entryPointPath, fixBase) {
4558
4614
  if (!/^[a-f0-9]+$/.test(fixBase)) {
4559
4615
  throw new Error(`Invalid session ref: ${fixBase}`);
4560
4616
  }
4561
4617
  const pArg = pathArg(entryPointPath);
4562
4618
  try {
4563
- const diff = await execDiff(`git diff ${fixBase}${pArg}`);
4619
+ const rawDiff = await execDiff(`git diff ${fixBase}${pArg}`);
4564
4620
  const { stdout: untrackedStdout } = await execAsync8(`git ls-files --others --exclude-standard${pArg}`, { maxBuffer: MAX_BUFFER_BYTES });
4565
4621
  const currentUntracked = new Set(parseLines(untrackedStdout));
4566
4622
  const { stdout: snapshotFilesStdout } = await execAsync8(`git ls-tree -r --name-only ${fixBase}${pArg}`, { maxBuffer: MAX_BUFFER_BYTES });
4567
4623
  const snapshotTrackedFiles = new Set(parseLines(snapshotFilesStdout));
4568
4624
  const snapshotUntrackedFiles = await getStashUntrackedFiles(fixBase, entryPointPath);
4625
+ const unchangedCommittedFromStash = await getUnchangedCommittedFromStashForDiff(snapshotUntrackedFiles, currentUntracked, fixBase);
4626
+ const diff = filterDiffByExcludeSet(rawDiff, unchangedCommittedFromStash);
4569
4627
  const allSnapshotFiles = new Set([
4570
4628
  ...snapshotTrackedFiles,
4571
4629
  ...snapshotUntrackedFiles
@@ -4577,7 +4635,7 @@ async function getFixBaseDiff(entryPointPath, fixBase) {
4577
4635
  const scopedDiff = [diff, ...newUntrackedDiffs, ...knownUntrackedDiffs].filter(Boolean).join(`
4578
4636
  `);
4579
4637
  log4.debug(`Scoped diff via fixBase: ${scopedDiff.split(`
4580
- `).length} lines (${newUntracked.length} new, ${knownUntracked.length} known untracked)`);
4638
+ `).length} lines (${newUntracked.length} new, ${knownUntracked.length} known untracked, ${unchangedCommittedFromStash.size} committed-from-stash excluded)`);
4581
4639
  return scopedDiff;
4582
4640
  } catch (error) {
4583
4641
  log4.warn(`Failed to compute diff against fixBase ${fixBase}, falling back to full uncommitted diff. ${error instanceof Error ? error.message : error}`);
@@ -8121,16 +8179,22 @@ function registerHelpCommand(program) {
8121
8179
  `);
8122
8180
  console.log(chalk8.bold(`Commands:
8123
8181
  `));
8124
- console.log(" run Run gates for detected changes");
8125
- console.log(" check Run only applicable checks");
8126
- console.log(" review Run only applicable reviews");
8127
- console.log(" clean Archive logs (move current logs into previous/)");
8128
- console.log(" detect Show what gates would run (without executing them)");
8129
- console.log(" list List configured gates");
8130
- console.log(" health Check CLI tool availability");
8131
- console.log(" init Initialize .gauntlet configuration");
8132
- console.log(" ci CI integration commands (init, list-jobs)");
8133
- console.log(` help Show this help message
8182
+ console.log(" run Run gates for detected changes");
8183
+ console.log(" check Run only applicable checks");
8184
+ console.log(" review Run only applicable reviews");
8185
+ console.log(" clean Archive logs (move current logs into previous/)");
8186
+ console.log(" detect Show what gates would run (without executing them)");
8187
+ console.log(" list List configured gates");
8188
+ console.log(" health Check CLI tool availability");
8189
+ console.log(" init Initialize .gauntlet configuration");
8190
+ console.log(" validate Validate .gauntlet/ config files against schemas");
8191
+ console.log(" skip Advance execution state baseline without running gates");
8192
+ console.log(" status Show a summary of the most recent gauntlet session");
8193
+ console.log(" review-audit Audit review execution from the debug log (--date or --since)");
8194
+ console.log(" ci CI integration commands (init, list-jobs)");
8195
+ console.log(" start-hook Session start hook (primes agent with verification instructions)");
8196
+ console.log(" stop-hook Claude Code stop hook (validates gauntlet completion)");
8197
+ console.log(` help Show this help message
8134
8198
  `);
8135
8199
  console.log("For more information, see: https://github.com/your-repo/agent-gauntlet");
8136
8200
  console.log("Or run: agent-gauntlet <command> --help");
@@ -8454,14 +8518,10 @@ var SKILLS_SOURCE_DIR = (() => {
8454
8518
  throw err;
8455
8519
  }
8456
8520
  })();
8457
- var SKILL_ACTIONS = ["run", "check", "status", "help", "setup"];
8458
- var SKILL_DESCRIPTIONS = {
8459
- run: "Run the verification suite",
8460
- check: "Run checks only (no reviews)",
8461
- status: "Show gauntlet status",
8462
- help: "Diagnose and explain gauntlet behavior",
8463
- setup: "Configure checks and reviews interactively"
8464
- };
8521
+ async function getSkillDirNames() {
8522
+ const entries = await fs32.readdir(SKILLS_SOURCE_DIR, { withFileTypes: true });
8523
+ return entries.filter((e) => e.isDirectory()).map((e) => e.name).sort();
8524
+ }
8465
8525
  var CLI_PREFERENCE_ORDER = [
8466
8526
  "codex",
8467
8527
  "claude",
@@ -8520,7 +8580,7 @@ async function runInit(options) {
8520
8580
  }
8521
8581
  await installExternalFiles(projectRoot, hookAdapters, skipPrompts);
8522
8582
  await addToGitignore(projectRoot, "gauntlet_logs");
8523
- printPostInitInstructions(instructionCLINames);
8583
+ await printPostInitInstructions(instructionCLINames);
8524
8584
  }
8525
8585
  function printNoCLIsMessage() {
8526
8586
  console.log();
@@ -8559,8 +8619,7 @@ async function copyDirRecursive(opts) {
8559
8619
  }
8560
8620
  async function installSkillsWithChecksums(projectRoot, skipPrompts) {
8561
8621
  const skillsDir = path30.join(projectRoot, ".claude", "skills");
8562
- for (const action of SKILL_ACTIONS) {
8563
- const dirName = `gauntlet-${action}`;
8622
+ for (const dirName of await getSkillDirNames()) {
8564
8623
  const sourceDir = path30.join(SKILLS_SOURCE_DIR, dirName);
8565
8624
  const targetDir = path30.join(skillsDir, dirName);
8566
8625
  const relativeDir = `${path30.relative(projectRoot, targetDir)}/`;
@@ -8585,7 +8644,7 @@ async function installExternalFiles(projectRoot, devAdapters, skipPrompts) {
8585
8644
  await installSkillsWithChecksums(projectRoot, skipPrompts);
8586
8645
  await installHooksForAdapters(projectRoot, devAdapters, skipPrompts);
8587
8646
  }
8588
- function printPostInitInstructions(devCLINames) {
8647
+ async function printPostInitInstructions(devCLINames) {
8589
8648
  const hasNative = devCLINames.some((name) => NATIVE_CLIS.has(name));
8590
8649
  const nonNativeNames = devCLINames.filter((name) => !NATIVE_CLIS.has(name));
8591
8650
  const hasNonNative = nonNativeNames.length > 0;
@@ -8597,8 +8656,8 @@ function printPostInitInstructions(devCLINames) {
8597
8656
  console.log(chalk11.bold("To complete setup, reference the setup skill in your CLI: @.claude/skills/gauntlet-setup/SKILL.md. This will guide you through configuring the static checks (unit tests, linters, etc.) that Agent Gauntlet will run."));
8598
8657
  console.log();
8599
8658
  console.log("Available skills:");
8600
- for (const action of SKILL_ACTIONS) {
8601
- console.log(` @.claude/skills/gauntlet-${action}/SKILL.md — ${SKILL_DESCRIPTIONS[action]}`);
8659
+ for (const dirName of await getSkillDirNames()) {
8660
+ console.log(` @.claude/skills/${dirName}/SKILL.md`);
8602
8661
  }
8603
8662
  }
8604
8663
  }
@@ -8775,9 +8834,370 @@ function registerReviewCommand(program) {
8775
8834
  enableReviews: new Set(options.enableReview ?? [])
8776
8835
  }));
8777
8836
  }
8778
- // src/core/run-executor-lock.ts
8779
- import fs33 from "node:fs/promises";
8837
+ // src/scripts/review-audit.ts
8838
+ import fs33 from "node:fs";
8780
8839
  import path31 from "node:path";
8840
+ import readline from "node:readline";
8841
+ function parseKeyValue2(text) {
8842
+ const result = {};
8843
+ for (const [, key, value] of text.matchAll(/(\w+)=(\S+)/g))
8844
+ if (key && value)
8845
+ result[key] = value;
8846
+ return result;
8847
+ }
8848
+ var parseTimestamp2 = (line) => line.match(/^\[([^\]]+)\]/)?.[1] ?? "";
8849
+ var parseEventType2 = (line) => line.match(/^\[[^\]]+\]\s+(\S+)/)?.[1] ?? "";
8850
+ var parseEventBody2 = (line) => line.match(/^\[[^\]]+\]\s+\S+\s*(.*)/)?.[1] ?? "";
8851
+ var safeNum = (v) => {
8852
+ const n = Number(v ?? 0);
8853
+ return Number.isNaN(n) ? 0 : n;
8854
+ };
8855
+ var parseDuration = (d) => {
8856
+ const m = d.match(/^([\d.]+)(ms|s|m)?$/);
8857
+ const val = safeNum(m?.[1]);
8858
+ if (m?.[2] === "ms")
8859
+ return val / 1000;
8860
+ if (m?.[2] === "m")
8861
+ return val * 60;
8862
+ return val;
8863
+ };
8864
+ function getLogDir3(cwd) {
8865
+ try {
8866
+ const cfg = path31.join(cwd, ".gauntlet", "config.yml");
8867
+ const m = fs33.readFileSync(cfg, "utf-8").match(/^log_dir:\s*(.+)$/m);
8868
+ if (m?.[1])
8869
+ return m[1].trim().replace(/^["']|["']$/g, "");
8870
+ } catch {}
8871
+ return "gauntlet_logs";
8872
+ }
8873
+ function handleRunStart(ts, body) {
8874
+ const kv = parseKeyValue2(body);
8875
+ return {
8876
+ timestamp: ts,
8877
+ mode: kv.mode ?? "unknown",
8878
+ linesAdded: safeNum(kv.lines_added),
8879
+ linesRemoved: safeNum(kv.lines_removed),
8880
+ reviewGates: [],
8881
+ priorPassSkips: 0,
8882
+ telemetry: []
8883
+ };
8884
+ }
8885
+ function handleGateResult(current, body) {
8886
+ const gateId = body.match(/^(\S+)/)?.[1] ?? "";
8887
+ if (!gateId.startsWith("review:"))
8888
+ return;
8889
+ const kv = parseKeyValue2(body);
8890
+ if (kv.cli) {
8891
+ current.reviewGates.push({
8892
+ reviewType: gateId.split(":").at(-1) ?? "other",
8893
+ cli: kv.cli,
8894
+ durationS: parseDuration(kv.duration ?? "0s"),
8895
+ violations: safeNum(kv.violations),
8896
+ violationsFixed: 0
8897
+ });
8898
+ } else {
8899
+ current.priorPassSkips++;
8900
+ }
8901
+ }
8902
+ function handleTelemetry(current, body) {
8903
+ const kv = parseKeyValue2(body);
8904
+ if (!kv.adapter)
8905
+ return;
8906
+ current.telemetry.push({
8907
+ adapter: kv.adapter,
8908
+ inTokens: safeNum(kv.in),
8909
+ cacheTokens: safeNum(kv.cache),
8910
+ outTokens: safeNum(kv.out),
8911
+ thoughtTokens: safeNum(kv.thought),
8912
+ toolTokens: safeNum(kv.tool),
8913
+ apiRequests: safeNum(kv.api_requests),
8914
+ cacheRead: safeNum(kv.cacheRead),
8915
+ cacheWrite: safeNum(kv.cacheWrite)
8916
+ });
8917
+ }
8918
+ function handleRunEnd(current, body) {
8919
+ const kv = parseKeyValue2(body);
8920
+ current.end = { status: kv.status ?? "unknown", failed: safeNum(kv.failed) };
8921
+ }
8922
+ var emptyStat = () => ({
8923
+ count: 0,
8924
+ totalDuration: 0,
8925
+ totalViolations: 0,
8926
+ totalViolationsFixed: 0
8927
+ });
8928
+ function addGate(s, g) {
8929
+ s.count++;
8930
+ s.totalDuration += g.durationS;
8931
+ s.totalViolations += g.violations;
8932
+ s.totalViolationsFixed += g.violationsFixed;
8933
+ }
8934
+ function getOrCreate(map, key, init) {
8935
+ if (!map.has(key))
8936
+ map.set(key, init());
8937
+ return map.get(key);
8938
+ }
8939
+ var REVIEW_TYPES = ["code-quality", "task-compliance", "artifact-review"];
8940
+ function accumulateBlock(block, a) {
8941
+ for (const g of block.reviewGates) {
8942
+ const inner = a.cells.get(g.reviewType) ?? new Map;
8943
+ a.cells.set(g.reviewType, inner);
8944
+ addGate(getOrCreate(inner, g.cli, emptyStat), g);
8945
+ addGate(getOrCreate(a.cliTotals, g.cli, emptyStat), g);
8946
+ addGate(getOrCreate(a.typeTotals, g.reviewType, emptyStat), g);
8947
+ addGate(a.grandTotal, g);
8948
+ }
8949
+ const diff = block.linesAdded + block.linesRemoved;
8950
+ if (diff <= 0)
8951
+ return;
8952
+ for (const cli of new Set(block.reviewGates.map((g) => g.cli))) {
8953
+ const dur = block.reviewGates.filter((g) => g.cli === cli).reduce((s, g) => s + g.durationS, 0);
8954
+ const p = getOrCreate(a.per100, cli, () => ({ dur: 0, diff: 0 }));
8955
+ p.dur += dur;
8956
+ p.diff += diff;
8957
+ }
8958
+ }
8959
+ function buildCrossTab(blocks) {
8960
+ const a = {
8961
+ cells: new Map,
8962
+ cliTotals: new Map,
8963
+ typeTotals: new Map,
8964
+ per100: new Map,
8965
+ grandTotal: emptyStat()
8966
+ };
8967
+ for (const block of blocks)
8968
+ accumulateBlock(block, a);
8969
+ const allTypes = [
8970
+ ...REVIEW_TYPES.filter((t) => a.typeTotals.has(t)),
8971
+ ...[...a.typeTotals.keys()].filter((t) => !REVIEW_TYPES.includes(t))
8972
+ ];
8973
+ return { ...a, allTypes, allClis: [...a.cliTotals.keys()] };
8974
+ }
8975
+ var emptyTokenStats = (adapter) => ({
8976
+ adapter,
8977
+ inTokens: 0,
8978
+ cacheTokens: 0,
8979
+ outTokens: 0,
8980
+ thoughtTokens: 0,
8981
+ toolTokens: 0,
8982
+ apiRequests: 0,
8983
+ cacheRead: 0,
8984
+ cacheWrite: 0,
8985
+ runsWithTelemetry: 0
8986
+ });
8987
+ function accumulateTelemetryEntry(t, statsMap) {
8988
+ const s = statsMap.get(t.adapter) ?? emptyTokenStats(t.adapter);
8989
+ statsMap.set(t.adapter, s);
8990
+ s.inTokens += t.inTokens;
8991
+ s.cacheTokens += t.cacheTokens;
8992
+ s.outTokens += t.outTokens;
8993
+ s.thoughtTokens += t.thoughtTokens;
8994
+ s.toolTokens += t.toolTokens;
8995
+ s.apiRequests += t.apiRequests;
8996
+ s.cacheRead += t.cacheRead;
8997
+ s.cacheWrite += t.cacheWrite;
8998
+ }
8999
+ function aggregateTokenStats(blocks) {
9000
+ const statsMap = new Map;
9001
+ for (const block of blocks) {
9002
+ const adaptersInBlock = new Set(block.telemetry.map((t) => t.adapter));
9003
+ for (const t of block.telemetry)
9004
+ accumulateTelemetryEntry(t, statsMap);
9005
+ for (const adapter of adaptersInBlock) {
9006
+ const s = statsMap.get(adapter);
9007
+ if (s)
9008
+ s.runsWithTelemetry++;
9009
+ }
9010
+ }
9011
+ return Array.from(statsMap.values());
9012
+ }
9013
+ var formatNum = (n) => n.toLocaleString("en-US");
9014
+ var padRight = (s, w) => s + " ".repeat(Math.max(0, w - s.length));
9015
+ var padLeft = (s, w) => " ".repeat(Math.max(0, w - s.length)) + s;
9016
+ var capitalize = (s) => s.charAt(0).toUpperCase() + s.slice(1);
9017
+ var fmtType = (t) => t.split("-").map(capitalize).join("-");
9018
+ function formatCrossTable(title, rowLabels, colLabels, cell, rowTotal, colTotal, grandTotal) {
9019
+ const rlW = Math.max(17, ...rowLabels.map((r) => fmtType(r).length)) + 2;
9020
+ const hdr = padRight("", rlW) + colLabels.map((c) => padRight(capitalize(c), 12)).join("") + "Total";
9021
+ const rows = rowLabels.map((r) => padRight(fmtType(r), rlW) + colLabels.map((c) => padRight(cell(r, c), 12)).join("") + rowTotal(r));
9022
+ const totalRow = padRight("Total", rlW) + colLabels.map((c) => padRight(colTotal(c), 12)).join("") + grandTotal;
9023
+ return [title, hdr, ...rows, totalRow, ""];
9024
+ }
9025
+ function formatRunCounts(ct) {
9026
+ return formatCrossTable("=== Run Counts ===", ct.allTypes, ct.allClis, (t, c) => String(ct.cells.get(t)?.get(c)?.count ?? 0), (t) => String(ct.typeTotals.get(t)?.count ?? 0), (c) => String(ct.cliTotals.get(c)?.count ?? 0), String(ct.grandTotal.count));
9027
+ }
9028
+ function formatTiming(ct) {
9029
+ const avg = (s) => s && s.count > 0 ? `${(s.totalDuration / s.count).toFixed(1)}s` : "n/a";
9030
+ const lines = formatCrossTable("=== Timing ===", ct.allTypes, ct.allClis, (t, c) => avg(ct.cells.get(t)?.get(c)), (t) => avg(ct.typeTotals.get(t)), (c) => avg(ct.cliTotals.get(c)), avg(ct.grandTotal));
9031
+ const p100parts = ct.allClis.map((c) => {
9032
+ const p = ct.per100.get(c);
9033
+ return p && p.diff > 0 ? `${c}=${(p.dur / p.diff * 100).toFixed(1)}s` : "";
9034
+ }).filter(Boolean);
9035
+ if (p100parts.length > 0)
9036
+ lines.splice(lines.length - 1, 0, `Per 100 diff lines (excl. zero-diff): ${p100parts.join(" ")}`);
9037
+ return lines;
9038
+ }
9039
+ function formatViolations(ct) {
9040
+ const avg = (s) => s && s.count > 0 ? (s.totalViolations / s.count).toFixed(2) : "n/a";
9041
+ return formatCrossTable("=== Violations (avg per run) ===", ct.allTypes, ct.allClis, (t, c) => avg(ct.cells.get(t)?.get(c)), (t) => avg(ct.typeTotals.get(t)), (c) => avg(ct.cliTotals.get(c)), avg(ct.grandTotal));
9042
+ }
9043
+ function formatViolationsFixed(ct) {
9044
+ const avg = (s) => s && s.count > 0 ? (s.totalViolationsFixed / s.count).toFixed(2) : "n/a";
9045
+ return formatCrossTable("=== Violations Fixed (avg per run) ===", ct.allTypes, ct.allClis, (t, c) => avg(ct.cells.get(t)?.get(c)), (t) => avg(ct.typeTotals.get(t)), (c) => avg(ct.cliTotals.get(c)), avg(ct.grandTotal));
9046
+ }
9047
+ function formatTokenEntry(t, totalRuns) {
9048
+ const out = [
9049
+ `${capitalize(t.adapter)} (${t.runsWithTelemetry} of ${totalRuns} runs had telemetry):`
9050
+ ];
9051
+ if (t.inTokens > 0 || t.cacheTokens > 0) {
9052
+ const total = t.inTokens + t.cacheTokens;
9053
+ out.push(` Input: ${padLeft(formatNum(total), 12)} (non-cached: ${formatNum(t.inTokens)} | cached: ${formatNum(t.cacheTokens)})`);
9054
+ }
9055
+ if (t.outTokens > 0)
9056
+ out.push(` Output: ${padLeft(formatNum(t.outTokens), 12)}`);
9057
+ if (t.thoughtTokens > 0)
9058
+ out.push(` Thinking: ${padLeft(formatNum(t.thoughtTokens), 12)}`);
9059
+ if (t.toolTokens > 0)
9060
+ out.push(` Tool tokens: ${padLeft(formatNum(t.toolTokens), 12)}`);
9061
+ if (t.cacheRead > 0 || t.cacheWrite > 0) {
9062
+ out.push(` Cache reads: ${padLeft(formatNum(t.cacheRead), 12)}`);
9063
+ out.push(` Cache writes: ${padLeft(formatNum(t.cacheWrite), 12)}`);
9064
+ }
9065
+ if (t.apiRequests > 0) {
9066
+ const avg = t.runsWithTelemetry > 0 ? (t.apiRequests / t.runsWithTelemetry).toFixed(1) : "?";
9067
+ out.push(` API requests: ${padLeft(formatNum(t.apiRequests), 12)} (avg ${avg}/run)`);
9068
+ }
9069
+ out.push("");
9070
+ return out;
9071
+ }
9072
+ function formatTokenUsage(ts, m) {
9073
+ if (ts.length === 0)
9074
+ return ["=== Token Usage ===", "No telemetry data found.", ""];
9075
+ return [
9076
+ "=== Token Usage ===",
9077
+ ...ts.flatMap((t) => formatTokenEntry(t, m.get(t.adapter) ?? t.runsWithTelemetry))
9078
+ ];
9079
+ }
9080
+ function formatSummary(blocks, ct, totalFixed) {
9081
+ const total = blocks.filter((b) => b.end).length;
9082
+ return [
9083
+ "=== Summary ===",
9084
+ `Gauntlet rounds: ${total}`,
9085
+ ` Total gate executions: ${ct.grandTotal.count}`,
9086
+ ` Total review issues fixed: ${totalFixed}`
9087
+ ];
9088
+ }
9089
+ function computeViolationsFixed(blocks) {
9090
+ blocks.slice(1).forEach((cur, j) => {
9091
+ for (const g of cur.reviewGates) {
9092
+ const p = blocks[j]?.reviewGates.find((pg) => pg.reviewType === g.reviewType && pg.cli === g.cli);
9093
+ if (p && p.violations > g.violations)
9094
+ g.violationsFixed = p.violations - g.violations;
9095
+ }
9096
+ });
9097
+ }
9098
+ function formatAuditReport(blocks, date) {
9099
+ if (blocks.length === 0)
9100
+ return `Review Execution Audit — ${date}
9101
+
9102
+ No gauntlet runs found for this date.`;
9103
+ computeViolationsFixed(blocks);
9104
+ const ct = buildCrossTab(blocks);
9105
+ const tokenStats = aggregateTokenStats(blocks);
9106
+ const cliBlockCount = new Map;
9107
+ for (const block of blocks) {
9108
+ for (const cli of new Set(block.reviewGates.map((g) => g.cli)))
9109
+ cliBlockCount.set(cli, (cliBlockCount.get(cli) ?? 0) + 1);
9110
+ }
9111
+ return [
9112
+ `Review Execution Audit — ${date}`,
9113
+ "",
9114
+ ...formatRunCounts(ct),
9115
+ ...formatTiming(ct),
9116
+ ...formatViolations(ct),
9117
+ ...formatViolationsFixed(ct),
9118
+ ...formatTokenUsage(tokenStats, cliBlockCount),
9119
+ ...formatSummary(blocks, ct, ct.grandTotal.totalViolationsFixed)
9120
+ ].join(`
9121
+ `);
9122
+ }
9123
+ function todayLocalDate() {
9124
+ const now = new Date;
9125
+ const mo = String(now.getMonth() + 1).padStart(2, "0");
9126
+ const dy = String(now.getDate()).padStart(2, "0");
9127
+ return `${now.getFullYear()}-${mo}-${dy}`;
9128
+ }
9129
+ async function readBlocks(filePath, matchDate) {
9130
+ const rl = readline.createInterface({ input: fs33.createReadStream(filePath) });
9131
+ const blocks = [];
9132
+ let current = null;
9133
+ for await (const line of rl) {
9134
+ if (!line.trim())
9135
+ continue;
9136
+ const ts = parseTimestamp2(line);
9137
+ if (!matchDate(ts.slice(0, 10)))
9138
+ continue;
9139
+ const event = parseEventType2(line);
9140
+ const body = parseEventBody2(line);
9141
+ if (event === "RUN_START") {
9142
+ current = handleRunStart(ts, body);
9143
+ blocks.push(current);
9144
+ continue;
9145
+ }
9146
+ if (!current)
9147
+ continue;
9148
+ if (event === "GATE_RESULT")
9149
+ handleGateResult(current, body);
9150
+ else if (event === "TELEMETRY")
9151
+ handleTelemetry(current, body);
9152
+ else if (event === "RUN_END")
9153
+ handleRunEnd(current, body);
9154
+ }
9155
+ return blocks;
9156
+ }
9157
+ async function main2(date, since) {
9158
+ const cwd = process.cwd();
9159
+ if (date && since) {
9160
+ console.error("Use either --date or --since, not both.");
9161
+ process.exit(1);
9162
+ }
9163
+ const dateRe = /^\d{4}-\d{2}-\d{2}$/;
9164
+ if (date && !dateRe.test(date)) {
9165
+ console.error("Invalid --date. Expected YYYY-MM-DD");
9166
+ process.exit(1);
9167
+ }
9168
+ if (since && !dateRe.test(since)) {
9169
+ console.error("Invalid --since. Expected YYYY-MM-DD");
9170
+ process.exit(1);
9171
+ }
9172
+ const logDir = getLogDir3(cwd);
9173
+ const debugLogPath = path31.join(cwd, logDir, ".debug.log");
9174
+ if (!fs33.existsSync(debugLogPath)) {
9175
+ console.log(`No debug log found. (looked in ${logDir}/.debug.log)`);
9176
+ process.exit(0);
9177
+ }
9178
+ const targetDate = since ?? date ?? todayLocalDate();
9179
+ const matchDate = since ? (d) => d >= since : (d) => d === targetDate;
9180
+ const label = since ? `${since} – ${todayLocalDate()}` : targetDate;
9181
+ const blocks = await readBlocks(debugLogPath, matchDate);
9182
+ console.log(formatAuditReport(blocks, label));
9183
+ }
9184
+ var isDirectRun2 = (import.meta.url === `file://${process.argv[1]}` || typeof Bun !== "undefined" && import.meta.url === `file://${Bun.main}`) && (process.argv[1]?.endsWith("review-audit.ts") || process.argv[1]?.endsWith("review-audit.js"));
9185
+ if (isDirectRun2)
9186
+ main2();
9187
+
9188
+ // src/commands/review-audit.ts
9189
+ function registerReviewAuditCommand(program) {
9190
+ program.command("review-audit").description("Audit review execution from the debug log (--date or --since)").option("--date <YYYY-MM-DD>", "Date to filter (default: today)").option("--since <YYYY-MM-DD>", "Include all runs from this date onwards").action(async (opts) => {
9191
+ if (opts.date && opts.since) {
9192
+ console.error("Use either --date or --since, not both.");
9193
+ process.exit(1);
9194
+ }
9195
+ await main2(opts.date, opts.since);
9196
+ });
9197
+ }
9198
+ // src/core/run-executor-lock.ts
9199
+ import fs34 from "node:fs/promises";
9200
+ import path32 from "node:path";
8781
9201
  var LOCK_FILENAME2 = ".gauntlet-run.lock";
8782
9202
  var STALE_LOCK_MS = 10 * 60 * 1000;
8783
9203
  function isProcessAlive(pid) {
@@ -8793,9 +9213,9 @@ function isProcessAlive(pid) {
8793
9213
  }
8794
9214
  async function isLockStale(lockPath) {
8795
9215
  try {
8796
- const lockContent = await fs33.readFile(lockPath, "utf-8");
9216
+ const lockContent = await fs34.readFile(lockPath, "utf-8");
8797
9217
  const lockPid = Number.parseInt(lockContent.trim(), 10);
8798
- const lockStat = await fs33.stat(lockPath);
9218
+ const lockStat = await fs34.stat(lockPath);
8799
9219
  const lockAgeMs = Date.now() - lockStat.mtimeMs;
8800
9220
  const pidValid = !Number.isNaN(lockPid);
8801
9221
  if (pidValid && !isProcessAlive(lockPid)) {
@@ -8810,10 +9230,10 @@ async function isLockStale(lockPath) {
8810
9230
  }
8811
9231
  }
8812
9232
  async function tryAcquireLock(logDir) {
8813
- await fs33.mkdir(logDir, { recursive: true });
8814
- const lockPath = path31.resolve(logDir, LOCK_FILENAME2);
9233
+ await fs34.mkdir(logDir, { recursive: true });
9234
+ const lockPath = path32.resolve(logDir, LOCK_FILENAME2);
8815
9235
  try {
8816
- await fs33.writeFile(lockPath, String(process.pid), { flag: "wx" });
9236
+ await fs34.writeFile(lockPath, String(process.pid), { flag: "wx" });
8817
9237
  return true;
8818
9238
  } catch (err) {
8819
9239
  const isExist = typeof err === "object" && err !== null && "code" in err && err.code === "EEXIST";
@@ -8824,9 +9244,9 @@ async function tryAcquireLock(logDir) {
8824
9244
  if (!stale) {
8825
9245
  return false;
8826
9246
  }
8827
- await fs33.rm(lockPath, { force: true });
9247
+ await fs34.rm(lockPath, { force: true });
8828
9248
  try {
8829
- await fs33.writeFile(lockPath, String(process.pid), { flag: "wx" });
9249
+ await fs34.writeFile(lockPath, String(process.pid), { flag: "wx" });
8830
9250
  return true;
8831
9251
  } catch {
8832
9252
  return false;
@@ -8835,7 +9255,7 @@ async function tryAcquireLock(logDir) {
8835
9255
  }
8836
9256
  async function findLatestConsoleLog(logDir) {
8837
9257
  try {
8838
- const files = await fs33.readdir(logDir);
9258
+ const files = await fs34.readdir(logDir);
8839
9259
  let maxNum = -1;
8840
9260
  let latestFile = null;
8841
9261
  for (const file of files) {
@@ -8851,7 +9271,7 @@ async function findLatestConsoleLog(logDir) {
8851
9271
  }
8852
9272
  }
8853
9273
  }
8854
- return latestFile ? path31.join(logDir, latestFile) : null;
9274
+ return latestFile ? path32.join(logDir, latestFile) : null;
8855
9275
  } catch {
8856
9276
  return null;
8857
9277
  }
@@ -9253,8 +9673,8 @@ function registerSkipCommand(program) {
9253
9673
  });
9254
9674
  }
9255
9675
  // src/commands/start-hook.ts
9256
- import fs34 from "node:fs/promises";
9257
- import path32 from "node:path";
9676
+ import fs35 from "node:fs/promises";
9677
+ import path33 from "node:path";
9258
9678
  import YAML8 from "yaml";
9259
9679
  var START_HOOK_MESSAGE = `<IMPORTANT>
9260
9680
  This project uses Agent Gauntlet for automated quality verification.
@@ -9302,9 +9722,9 @@ function isValidConfig(content) {
9302
9722
  }
9303
9723
  function registerStartHookCommand(program) {
9304
9724
  program.command("start-hook").description("Session start hook - primes agent with gauntlet verification instructions").option("--adapter <adapter>", "Output format: claude or cursor", "claude").action(async (options) => {
9305
- const configPath = path32.join(process.cwd(), ".gauntlet", "config.yml");
9725
+ const configPath = path33.join(process.cwd(), ".gauntlet", "config.yml");
9306
9726
  try {
9307
- const content = await fs34.readFile(configPath, "utf-8");
9727
+ const content = await fs35.readFile(configPath, "utf-8");
9308
9728
  if (!isValidConfig(content)) {
9309
9729
  return;
9310
9730
  }
@@ -9314,7 +9734,7 @@ function registerStartHookCommand(program) {
9314
9734
  const adapter = options.adapter;
9315
9735
  try {
9316
9736
  const cwd = process.cwd();
9317
- const logDir = path32.join(cwd, await getLogDir2(cwd));
9737
+ const logDir = path33.join(cwd, await getLogDir2(cwd));
9318
9738
  const globalConfig = await loadGlobalConfig();
9319
9739
  const projectDebugLogConfig = await getDebugLogConfig(cwd);
9320
9740
  const debugLogConfig = mergeDebugLogConfig(projectDebugLogConfig, globalConfig.debug_log);
@@ -9353,6 +9773,7 @@ registerRunCommand(program);
9353
9773
  registerCheckCommand(program);
9354
9774
  registerCICommand(program);
9355
9775
  registerCleanCommand(program);
9776
+ registerReviewAuditCommand(program);
9356
9777
  registerReviewCommand(program);
9357
9778
  registerDetectCommand(program);
9358
9779
  registerListCommand(program);
@@ -9369,4 +9790,4 @@ if (process.argv.length < 3) {
9369
9790
  }
9370
9791
  program.parse(process.argv);
9371
9792
 
9372
- //# debugId=9CAFDAA7414B382764756E2164756E21
9793
+ //# debugId=BA297B6C5F2D9C1264756E2164756E21