agent-gauntlet 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +380 -32
- package/dist/index.js.map +7 -5
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -299,7 +299,7 @@ import { Command } from "commander";
|
|
|
299
299
|
// package.json
|
|
300
300
|
var package_default = {
|
|
301
301
|
name: "agent-gauntlet",
|
|
302
|
-
version: "1.
|
|
302
|
+
version: "1.2.0",
|
|
303
303
|
description: "A CLI tool for testing AI coding agents",
|
|
304
304
|
license: "MIT",
|
|
305
305
|
author: "Paul Caplan",
|
|
@@ -8454,14 +8454,10 @@ var SKILLS_SOURCE_DIR = (() => {
|
|
|
8454
8454
|
throw err;
|
|
8455
8455
|
}
|
|
8456
8456
|
})();
|
|
8457
|
-
|
|
8458
|
-
|
|
8459
|
-
|
|
8460
|
-
|
|
8461
|
-
status: "Show gauntlet status",
|
|
8462
|
-
help: "Diagnose and explain gauntlet behavior",
|
|
8463
|
-
setup: "Configure checks and reviews interactively"
|
|
8464
|
-
};
|
|
8457
|
+
async function getSkillDirNames() {
|
|
8458
|
+
const entries = await fs32.readdir(SKILLS_SOURCE_DIR, { withFileTypes: true });
|
|
8459
|
+
return entries.filter((e) => e.isDirectory()).map((e) => e.name).sort();
|
|
8460
|
+
}
|
|
8465
8461
|
var CLI_PREFERENCE_ORDER = [
|
|
8466
8462
|
"codex",
|
|
8467
8463
|
"claude",
|
|
@@ -8520,7 +8516,7 @@ async function runInit(options) {
|
|
|
8520
8516
|
}
|
|
8521
8517
|
await installExternalFiles(projectRoot, hookAdapters, skipPrompts);
|
|
8522
8518
|
await addToGitignore(projectRoot, "gauntlet_logs");
|
|
8523
|
-
printPostInitInstructions(instructionCLINames);
|
|
8519
|
+
await printPostInitInstructions(instructionCLINames);
|
|
8524
8520
|
}
|
|
8525
8521
|
function printNoCLIsMessage() {
|
|
8526
8522
|
console.log();
|
|
@@ -8559,8 +8555,7 @@ async function copyDirRecursive(opts) {
|
|
|
8559
8555
|
}
|
|
8560
8556
|
async function installSkillsWithChecksums(projectRoot, skipPrompts) {
|
|
8561
8557
|
const skillsDir = path30.join(projectRoot, ".claude", "skills");
|
|
8562
|
-
for (const
|
|
8563
|
-
const dirName = `gauntlet-${action}`;
|
|
8558
|
+
for (const dirName of await getSkillDirNames()) {
|
|
8564
8559
|
const sourceDir = path30.join(SKILLS_SOURCE_DIR, dirName);
|
|
8565
8560
|
const targetDir = path30.join(skillsDir, dirName);
|
|
8566
8561
|
const relativeDir = `${path30.relative(projectRoot, targetDir)}/`;
|
|
@@ -8585,7 +8580,7 @@ async function installExternalFiles(projectRoot, devAdapters, skipPrompts) {
|
|
|
8585
8580
|
await installSkillsWithChecksums(projectRoot, skipPrompts);
|
|
8586
8581
|
await installHooksForAdapters(projectRoot, devAdapters, skipPrompts);
|
|
8587
8582
|
}
|
|
8588
|
-
function printPostInitInstructions(devCLINames) {
|
|
8583
|
+
async function printPostInitInstructions(devCLINames) {
|
|
8589
8584
|
const hasNative = devCLINames.some((name) => NATIVE_CLIS.has(name));
|
|
8590
8585
|
const nonNativeNames = devCLINames.filter((name) => !NATIVE_CLIS.has(name));
|
|
8591
8586
|
const hasNonNative = nonNativeNames.length > 0;
|
|
@@ -8597,8 +8592,8 @@ function printPostInitInstructions(devCLINames) {
|
|
|
8597
8592
|
console.log(chalk11.bold("To complete setup, reference the setup skill in your CLI: @.claude/skills/gauntlet-setup/SKILL.md. This will guide you through configuring the static checks (unit tests, linters, etc.) that Agent Gauntlet will run."));
|
|
8598
8593
|
console.log();
|
|
8599
8594
|
console.log("Available skills:");
|
|
8600
|
-
for (const
|
|
8601
|
-
console.log(` @.claude/skills
|
|
8595
|
+
for (const dirName of await getSkillDirNames()) {
|
|
8596
|
+
console.log(` @.claude/skills/${dirName}/SKILL.md`);
|
|
8602
8597
|
}
|
|
8603
8598
|
}
|
|
8604
8599
|
}
|
|
@@ -8775,9 +8770,361 @@ function registerReviewCommand(program) {
|
|
|
8775
8770
|
enableReviews: new Set(options.enableReview ?? [])
|
|
8776
8771
|
}));
|
|
8777
8772
|
}
|
|
8778
|
-
// src/
|
|
8779
|
-
import fs33 from "node:fs
|
|
8773
|
+
// src/scripts/review-audit.ts
|
|
8774
|
+
import fs33 from "node:fs";
|
|
8780
8775
|
import path31 from "node:path";
|
|
8776
|
+
import readline from "node:readline";
|
|
8777
|
+
function parseKeyValue2(text) {
|
|
8778
|
+
const result = {};
|
|
8779
|
+
for (const [, key, value] of text.matchAll(/(\w+)=(\S+)/g))
|
|
8780
|
+
if (key && value)
|
|
8781
|
+
result[key] = value;
|
|
8782
|
+
return result;
|
|
8783
|
+
}
|
|
8784
|
+
var parseTimestamp2 = (line) => line.match(/^\[([^\]]+)\]/)?.[1] ?? "";
|
|
8785
|
+
var parseEventType2 = (line) => line.match(/^\[[^\]]+\]\s+(\S+)/)?.[1] ?? "";
|
|
8786
|
+
var parseEventBody2 = (line) => line.match(/^\[[^\]]+\]\s+\S+\s*(.*)/)?.[1] ?? "";
|
|
8787
|
+
var safeNum = (v) => {
|
|
8788
|
+
const n = Number(v ?? 0);
|
|
8789
|
+
return Number.isNaN(n) ? 0 : n;
|
|
8790
|
+
};
|
|
8791
|
+
var parseDuration = (d) => {
|
|
8792
|
+
const m = d.match(/^([\d.]+)(ms|s|m)?$/);
|
|
8793
|
+
const val = safeNum(m?.[1]);
|
|
8794
|
+
if (m?.[2] === "ms")
|
|
8795
|
+
return val / 1000;
|
|
8796
|
+
if (m?.[2] === "m")
|
|
8797
|
+
return val * 60;
|
|
8798
|
+
return val;
|
|
8799
|
+
};
|
|
8800
|
+
function getLogDir3(cwd) {
|
|
8801
|
+
const configPath = path31.join(cwd, ".gauntlet", "config.yml");
|
|
8802
|
+
try {
|
|
8803
|
+
const content = fs33.readFileSync(configPath, "utf-8");
|
|
8804
|
+
const match = content.match(/^log_dir:\s*(.+)$/m);
|
|
8805
|
+
if (match?.[1])
|
|
8806
|
+
return match[1].trim();
|
|
8807
|
+
} catch {}
|
|
8808
|
+
return "gauntlet_logs";
|
|
8809
|
+
}
|
|
8810
|
+
function handleRunStart(ts, body) {
|
|
8811
|
+
const kv = parseKeyValue2(body);
|
|
8812
|
+
return {
|
|
8813
|
+
timestamp: ts,
|
|
8814
|
+
mode: kv.mode ?? "unknown",
|
|
8815
|
+
linesAdded: safeNum(kv.lines_added),
|
|
8816
|
+
linesRemoved: safeNum(kv.lines_removed),
|
|
8817
|
+
reviewGates: [],
|
|
8818
|
+
priorPassSkips: 0,
|
|
8819
|
+
telemetry: []
|
|
8820
|
+
};
|
|
8821
|
+
}
|
|
8822
|
+
function handleGateResult(current, body) {
|
|
8823
|
+
const gateIdMatch = body.match(/^(\S+)/);
|
|
8824
|
+
const gateId = gateIdMatch?.[1] ?? "";
|
|
8825
|
+
if (!gateId.startsWith("review:"))
|
|
8826
|
+
return;
|
|
8827
|
+
const kv = parseKeyValue2(body);
|
|
8828
|
+
if (kv.cli) {
|
|
8829
|
+
current.reviewGates.push({
|
|
8830
|
+
reviewType: gateId.split(":").at(-1) ?? "other",
|
|
8831
|
+
cli: kv.cli,
|
|
8832
|
+
durationS: parseDuration(kv.duration ?? "0s"),
|
|
8833
|
+
violations: safeNum(kv.violations)
|
|
8834
|
+
});
|
|
8835
|
+
} else {
|
|
8836
|
+
current.priorPassSkips++;
|
|
8837
|
+
}
|
|
8838
|
+
}
|
|
8839
|
+
function handleTelemetry(current, body) {
|
|
8840
|
+
const kv = parseKeyValue2(body);
|
|
8841
|
+
if (!kv.adapter)
|
|
8842
|
+
return;
|
|
8843
|
+
current.telemetry.push({
|
|
8844
|
+
adapter: kv.adapter,
|
|
8845
|
+
inTokens: safeNum(kv.in),
|
|
8846
|
+
cacheTokens: safeNum(kv.cache),
|
|
8847
|
+
outTokens: safeNum(kv.out),
|
|
8848
|
+
thoughtTokens: safeNum(kv.thought),
|
|
8849
|
+
toolTokens: safeNum(kv.tool),
|
|
8850
|
+
apiRequests: safeNum(kv.api_requests),
|
|
8851
|
+
cacheRead: safeNum(kv.cacheRead),
|
|
8852
|
+
cacheWrite: safeNum(kv.cacheWrite)
|
|
8853
|
+
});
|
|
8854
|
+
}
|
|
8855
|
+
function handleRunEnd(current, body) {
|
|
8856
|
+
const kv = parseKeyValue2(body);
|
|
8857
|
+
current.end = {
|
|
8858
|
+
status: kv.status ?? "unknown",
|
|
8859
|
+
fixed: safeNum(kv.fixed),
|
|
8860
|
+
skipped: safeNum(kv.skipped),
|
|
8861
|
+
failed: safeNum(kv.failed)
|
|
8862
|
+
};
|
|
8863
|
+
}
|
|
8864
|
+
var emptyStat = () => ({
|
|
8865
|
+
count: 0,
|
|
8866
|
+
totalDuration: 0,
|
|
8867
|
+
totalViolations: 0
|
|
8868
|
+
});
|
|
8869
|
+
function addGate(s, g) {
|
|
8870
|
+
s.count++;
|
|
8871
|
+
s.totalDuration += g.durationS;
|
|
8872
|
+
s.totalViolations += g.violations;
|
|
8873
|
+
}
|
|
8874
|
+
function getOrCreate(map, key, init) {
|
|
8875
|
+
if (!map.has(key))
|
|
8876
|
+
map.set(key, init());
|
|
8877
|
+
return map.get(key);
|
|
8878
|
+
}
|
|
8879
|
+
var REVIEW_TYPES = ["code-quality", "task-compliance", "artifact-review"];
|
|
8880
|
+
function accumulateBlock(block, a) {
|
|
8881
|
+
for (const g of block.reviewGates) {
|
|
8882
|
+
const inner = getOrCreate(a.cells, g.reviewType, () => new Map);
|
|
8883
|
+
addGate(getOrCreate(inner, g.cli, emptyStat), g);
|
|
8884
|
+
addGate(getOrCreate(a.cliTotals, g.cli, emptyStat), g);
|
|
8885
|
+
addGate(getOrCreate(a.typeTotals, g.reviewType, emptyStat), g);
|
|
8886
|
+
addGate(a.grandTotal, g);
|
|
8887
|
+
}
|
|
8888
|
+
const diff = block.linesAdded + block.linesRemoved;
|
|
8889
|
+
if (diff <= 0)
|
|
8890
|
+
return;
|
|
8891
|
+
for (const cli of new Set(block.reviewGates.map((g) => g.cli))) {
|
|
8892
|
+
const dur = block.reviewGates.filter((g) => g.cli === cli).reduce((s, g) => s + g.durationS, 0);
|
|
8893
|
+
const p = getOrCreate(a.per100, cli, () => ({ dur: 0, diff: 0 }));
|
|
8894
|
+
p.dur += dur;
|
|
8895
|
+
p.diff += diff;
|
|
8896
|
+
}
|
|
8897
|
+
}
|
|
8898
|
+
function buildCrossTab(blocks) {
|
|
8899
|
+
const a = {
|
|
8900
|
+
cells: new Map,
|
|
8901
|
+
cliTotals: new Map,
|
|
8902
|
+
typeTotals: new Map,
|
|
8903
|
+
per100: new Map,
|
|
8904
|
+
grandTotal: emptyStat()
|
|
8905
|
+
};
|
|
8906
|
+
for (const block of blocks)
|
|
8907
|
+
accumulateBlock(block, a);
|
|
8908
|
+
const allTypes = [
|
|
8909
|
+
...REVIEW_TYPES.filter((t) => a.typeTotals.has(t)),
|
|
8910
|
+
...[...a.typeTotals.keys()].filter((t) => !REVIEW_TYPES.includes(t))
|
|
8911
|
+
];
|
|
8912
|
+
return { ...a, allTypes, allClis: [...a.cliTotals.keys()] };
|
|
8913
|
+
}
|
|
8914
|
+
var emptyTokenStats = (adapter) => ({
|
|
8915
|
+
adapter,
|
|
8916
|
+
inTokens: 0,
|
|
8917
|
+
cacheTokens: 0,
|
|
8918
|
+
outTokens: 0,
|
|
8919
|
+
thoughtTokens: 0,
|
|
8920
|
+
toolTokens: 0,
|
|
8921
|
+
apiRequests: 0,
|
|
8922
|
+
cacheRead: 0,
|
|
8923
|
+
cacheWrite: 0,
|
|
8924
|
+
runsWithTelemetry: 0
|
|
8925
|
+
});
|
|
8926
|
+
function accumulateTelemetryEntry(t, statsMap) {
|
|
8927
|
+
const s = statsMap.get(t.adapter) ?? emptyTokenStats(t.adapter);
|
|
8928
|
+
statsMap.set(t.adapter, s);
|
|
8929
|
+
s.inTokens += t.inTokens;
|
|
8930
|
+
s.cacheTokens += t.cacheTokens;
|
|
8931
|
+
s.outTokens += t.outTokens;
|
|
8932
|
+
s.thoughtTokens += t.thoughtTokens;
|
|
8933
|
+
s.toolTokens += t.toolTokens;
|
|
8934
|
+
s.apiRequests += t.apiRequests;
|
|
8935
|
+
s.cacheRead += t.cacheRead;
|
|
8936
|
+
s.cacheWrite += t.cacheWrite;
|
|
8937
|
+
}
|
|
8938
|
+
function aggregateTokenStats(blocks) {
|
|
8939
|
+
const statsMap = new Map;
|
|
8940
|
+
for (const block of blocks) {
|
|
8941
|
+
const adaptersInBlock = new Set(block.telemetry.map((t) => t.adapter));
|
|
8942
|
+
for (const t of block.telemetry)
|
|
8943
|
+
accumulateTelemetryEntry(t, statsMap);
|
|
8944
|
+
for (const adapter of adaptersInBlock) {
|
|
8945
|
+
const s = statsMap.get(adapter);
|
|
8946
|
+
if (s)
|
|
8947
|
+
s.runsWithTelemetry++;
|
|
8948
|
+
}
|
|
8949
|
+
}
|
|
8950
|
+
return Array.from(statsMap.values());
|
|
8951
|
+
}
|
|
8952
|
+
var formatNum = (n) => n.toLocaleString("en-US");
|
|
8953
|
+
var padRight = (s, w) => s + " ".repeat(Math.max(0, w - s.length));
|
|
8954
|
+
var padLeft = (s, w) => " ".repeat(Math.max(0, w - s.length)) + s;
|
|
8955
|
+
var capitalize = (s) => s.charAt(0).toUpperCase() + s.slice(1);
|
|
8956
|
+
var fmtType = (t) => t.split("-").map(capitalize).join("-");
|
|
8957
|
+
function formatCrossTable(title, rowLabels, colLabels, cell, rowTotal, colTotal, grandTotal) {
|
|
8958
|
+
const rlW = Math.max(17, ...rowLabels.map((r) => fmtType(r).length)) + 2;
|
|
8959
|
+
const cW = 12;
|
|
8960
|
+
const hdr = padRight("", rlW) + colLabels.map((c) => padRight(capitalize(c), cW)).join("") + "Total";
|
|
8961
|
+
const rows = rowLabels.map((r) => padRight(fmtType(r), rlW) + colLabels.map((c) => padRight(cell(r, c), cW)).join("") + rowTotal(r));
|
|
8962
|
+
const totalRow = padRight("Total", rlW) + colLabels.map((c) => padRight(colTotal(c), cW)).join("") + grandTotal;
|
|
8963
|
+
return [title, hdr, ...rows, totalRow, ""];
|
|
8964
|
+
}
|
|
8965
|
+
function formatRunCounts(ct) {
|
|
8966
|
+
return formatCrossTable("=== Run Counts ===", ct.allTypes, ct.allClis, (t, c) => String(ct.cells.get(t)?.get(c)?.count ?? 0), (t) => String(ct.typeTotals.get(t)?.count ?? 0), (c) => String(ct.cliTotals.get(c)?.count ?? 0), String(ct.grandTotal.count));
|
|
8967
|
+
}
|
|
8968
|
+
function formatTiming(ct) {
|
|
8969
|
+
const avg = (s) => s && s.count > 0 ? `${(s.totalDuration / s.count).toFixed(1)}s` : "n/a";
|
|
8970
|
+
const lines = formatCrossTable("=== Timing ===", ct.allTypes, ct.allClis, (t, c) => avg(ct.cells.get(t)?.get(c)), (t) => avg(ct.typeTotals.get(t)), (c) => avg(ct.cliTotals.get(c)), avg(ct.grandTotal));
|
|
8971
|
+
const p100parts = ct.allClis.map((c) => {
|
|
8972
|
+
const p = ct.per100.get(c);
|
|
8973
|
+
return p && p.diff > 0 ? `${c}=${(p.dur / p.diff * 100).toFixed(1)}s` : "";
|
|
8974
|
+
}).filter(Boolean);
|
|
8975
|
+
if (p100parts.length > 0)
|
|
8976
|
+
lines.splice(lines.length - 1, 0, `Per 100 diff lines (excl. zero-diff): ${p100parts.join(" ")}`);
|
|
8977
|
+
return lines;
|
|
8978
|
+
}
|
|
8979
|
+
function formatViolations(ct) {
|
|
8980
|
+
const avg = (s) => s && s.count > 0 ? (s.totalViolations / s.count).toFixed(2) : "n/a";
|
|
8981
|
+
return formatCrossTable("=== Violations (avg per run) ===", ct.allTypes, ct.allClis, (t, c) => avg(ct.cells.get(t)?.get(c)), (t) => avg(ct.typeTotals.get(t)), (c) => avg(ct.cliTotals.get(c)), avg(ct.grandTotal));
|
|
8982
|
+
}
|
|
8983
|
+
function formatTokenEntry(t, totalRuns) {
|
|
8984
|
+
const out = [
|
|
8985
|
+
`${capitalize(t.adapter)} (${t.runsWithTelemetry} of ${totalRuns} runs had telemetry):`
|
|
8986
|
+
];
|
|
8987
|
+
if (t.inTokens > 0 || t.cacheTokens > 0) {
|
|
8988
|
+
const total = t.inTokens + t.cacheTokens;
|
|
8989
|
+
out.push(` Input: ${padLeft(formatNum(total), 12)} (non-cached: ${formatNum(t.inTokens)} | cached: ${formatNum(t.cacheTokens)})`);
|
|
8990
|
+
}
|
|
8991
|
+
if (t.outTokens > 0)
|
|
8992
|
+
out.push(` Output: ${padLeft(formatNum(t.outTokens), 12)}`);
|
|
8993
|
+
if (t.thoughtTokens > 0)
|
|
8994
|
+
out.push(` Thinking: ${padLeft(formatNum(t.thoughtTokens), 12)}`);
|
|
8995
|
+
if (t.toolTokens > 0)
|
|
8996
|
+
out.push(` Tool tokens: ${padLeft(formatNum(t.toolTokens), 12)}`);
|
|
8997
|
+
if (t.cacheRead > 0 || t.cacheWrite > 0) {
|
|
8998
|
+
out.push(` Cache reads: ${padLeft(formatNum(t.cacheRead), 12)}`);
|
|
8999
|
+
out.push(` Cache writes: ${padLeft(formatNum(t.cacheWrite), 12)}`);
|
|
9000
|
+
}
|
|
9001
|
+
if (t.apiRequests > 0) {
|
|
9002
|
+
const avg = t.runsWithTelemetry > 0 ? (t.apiRequests / t.runsWithTelemetry).toFixed(1) : "?";
|
|
9003
|
+
out.push(` API requests: ${padLeft(formatNum(t.apiRequests), 12)} (avg ${avg}/run)`);
|
|
9004
|
+
}
|
|
9005
|
+
out.push("");
|
|
9006
|
+
return out;
|
|
9007
|
+
}
|
|
9008
|
+
function formatTokenUsage(ts, m) {
|
|
9009
|
+
if (ts.length === 0)
|
|
9010
|
+
return ["=== Token Usage ===", "No telemetry data found.", ""];
|
|
9011
|
+
return [
|
|
9012
|
+
"=== Token Usage ===",
|
|
9013
|
+
...ts.flatMap((t) => formatTokenEntry(t, m.get(t.adapter) ?? t.runsWithTelemetry))
|
|
9014
|
+
];
|
|
9015
|
+
}
|
|
9016
|
+
function formatFixSkip(blocks) {
|
|
9017
|
+
const withEnd = blocks.filter((b) => b.end);
|
|
9018
|
+
const total = withEnd.length;
|
|
9019
|
+
const passed = withEnd.filter((b) => b.end?.status === "pass").length;
|
|
9020
|
+
const fixed = withEnd.reduce((s, b) => s + (b.end?.fixed ?? 0), 0);
|
|
9021
|
+
const skipped = withEnd.reduce((s, b) => s + (b.end?.skipped ?? 0), 0);
|
|
9022
|
+
const failed = withEnd.reduce((s, b) => s + (b.end?.failed ?? 0), 0);
|
|
9023
|
+
const priorPass = blocks.reduce((s, b) => s + b.priorPassSkips, 0);
|
|
9024
|
+
const lines = [
|
|
9025
|
+
"=== Fix / Skip ===",
|
|
9026
|
+
`Gauntlet runs: ${total} total (${passed} passed, ${total - passed} failed)`,
|
|
9027
|
+
` Violations fixed: ${fixed}`,
|
|
9028
|
+
` Violations skipped: ${skipped}`,
|
|
9029
|
+
` Gates failed: ${failed}`,
|
|
9030
|
+
` Review gates skipped (prior pass): ${priorPass}`
|
|
9031
|
+
];
|
|
9032
|
+
const totalFixedSkipped = fixed + skipped;
|
|
9033
|
+
if (totalFixedSkipped > 0) {
|
|
9034
|
+
const fp = (fixed / totalFixedSkipped * 100).toFixed(1);
|
|
9035
|
+
const sp = (skipped / totalFixedSkipped * 100).toFixed(1);
|
|
9036
|
+
lines.push(` (fixed: ${fp}% | skipped: ${sp}% of fixed+skipped)`);
|
|
9037
|
+
}
|
|
9038
|
+
return lines;
|
|
9039
|
+
}
|
|
9040
|
+
function formatAuditReport(blocks, date) {
|
|
9041
|
+
if (blocks.length === 0)
|
|
9042
|
+
return `Review Execution Audit — ${date}
|
|
9043
|
+
|
|
9044
|
+
No gauntlet runs found for this date.`;
|
|
9045
|
+
const ct = buildCrossTab(blocks);
|
|
9046
|
+
const tokenStats = aggregateTokenStats(blocks);
|
|
9047
|
+
const cliBlockCount = new Map;
|
|
9048
|
+
for (const block of blocks) {
|
|
9049
|
+
for (const cli of new Set(block.reviewGates.map((g) => g.cli)))
|
|
9050
|
+
cliBlockCount.set(cli, (cliBlockCount.get(cli) ?? 0) + 1);
|
|
9051
|
+
}
|
|
9052
|
+
return [
|
|
9053
|
+
`Review Execution Audit — ${date}`,
|
|
9054
|
+
"",
|
|
9055
|
+
...formatRunCounts(ct),
|
|
9056
|
+
...formatTiming(ct),
|
|
9057
|
+
...formatViolations(ct),
|
|
9058
|
+
...formatTokenUsage(tokenStats, cliBlockCount),
|
|
9059
|
+
...formatFixSkip(blocks)
|
|
9060
|
+
].join(`
|
|
9061
|
+
`);
|
|
9062
|
+
}
|
|
9063
|
+
function todayLocalDate() {
|
|
9064
|
+
const now = new Date;
|
|
9065
|
+
const y = now.getFullYear();
|
|
9066
|
+
const mo = String(now.getMonth() + 1).padStart(2, "0");
|
|
9067
|
+
const d = String(now.getDate()).padStart(2, "0");
|
|
9068
|
+
return `${y}-${mo}-${d}`;
|
|
9069
|
+
}
|
|
9070
|
+
async function readBlocks(filePath, date) {
|
|
9071
|
+
const rl = readline.createInterface({
|
|
9072
|
+
input: fs33.createReadStream(filePath)
|
|
9073
|
+
});
|
|
9074
|
+
const blocks = [];
|
|
9075
|
+
let current = null;
|
|
9076
|
+
for await (const line of rl) {
|
|
9077
|
+
if (!line.trim())
|
|
9078
|
+
continue;
|
|
9079
|
+
const ts = parseTimestamp2(line);
|
|
9080
|
+
if (!ts.startsWith(date))
|
|
9081
|
+
continue;
|
|
9082
|
+
const event = parseEventType2(line);
|
|
9083
|
+
const body = parseEventBody2(line);
|
|
9084
|
+
if (event === "RUN_START") {
|
|
9085
|
+
current = handleRunStart(ts, body);
|
|
9086
|
+
blocks.push(current);
|
|
9087
|
+
continue;
|
|
9088
|
+
}
|
|
9089
|
+
if (!current)
|
|
9090
|
+
continue;
|
|
9091
|
+
if (event === "GATE_RESULT")
|
|
9092
|
+
handleGateResult(current, body);
|
|
9093
|
+
else if (event === "TELEMETRY")
|
|
9094
|
+
handleTelemetry(current, body);
|
|
9095
|
+
else if (event === "RUN_END")
|
|
9096
|
+
handleRunEnd(current, body);
|
|
9097
|
+
}
|
|
9098
|
+
return blocks;
|
|
9099
|
+
}
|
|
9100
|
+
async function main2(date) {
|
|
9101
|
+
const cwd = process.cwd();
|
|
9102
|
+
if (date && !/^\d{4}-\d{2}-\d{2}$/.test(date)) {
|
|
9103
|
+
console.error("Invalid --date. Expected YYYY-MM-DD");
|
|
9104
|
+
process.exit(1);
|
|
9105
|
+
}
|
|
9106
|
+
const targetDate = date ?? todayLocalDate();
|
|
9107
|
+
const debugLogPath = path31.join(cwd, getLogDir3(cwd), ".debug.log");
|
|
9108
|
+
if (!fs33.existsSync(debugLogPath)) {
|
|
9109
|
+
console.log(`No debug log found. (looked in ${getLogDir3(cwd)}/.debug.log)`);
|
|
9110
|
+
process.exit(0);
|
|
9111
|
+
}
|
|
9112
|
+
const blocks = await readBlocks(debugLogPath, targetDate);
|
|
9113
|
+
console.log(formatAuditReport(blocks, targetDate));
|
|
9114
|
+
}
|
|
9115
|
+
var isDirectRun2 = (import.meta.url === `file://${process.argv[1]}` || typeof Bun !== "undefined" && import.meta.url === `file://${Bun.main}`) && (process.argv[1]?.endsWith("review-audit.ts") || process.argv[1]?.endsWith("review-audit.js"));
|
|
9116
|
+
if (isDirectRun2)
|
|
9117
|
+
main2();
|
|
9118
|
+
|
|
9119
|
+
// src/commands/review-audit.ts
|
|
9120
|
+
function registerReviewAuditCommand(program) {
|
|
9121
|
+
program.command("review-audit").description("Audit review execution for a given date from the debug log").option("--date <YYYY-MM-DD>", "Date to filter (default: today)").action(async (opts) => {
|
|
9122
|
+
await main2(opts.date);
|
|
9123
|
+
});
|
|
9124
|
+
}
|
|
9125
|
+
// src/core/run-executor-lock.ts
|
|
9126
|
+
import fs34 from "node:fs/promises";
|
|
9127
|
+
import path32 from "node:path";
|
|
8781
9128
|
var LOCK_FILENAME2 = ".gauntlet-run.lock";
|
|
8782
9129
|
var STALE_LOCK_MS = 10 * 60 * 1000;
|
|
8783
9130
|
function isProcessAlive(pid) {
|
|
@@ -8793,9 +9140,9 @@ function isProcessAlive(pid) {
|
|
|
8793
9140
|
}
|
|
8794
9141
|
async function isLockStale(lockPath) {
|
|
8795
9142
|
try {
|
|
8796
|
-
const lockContent = await
|
|
9143
|
+
const lockContent = await fs34.readFile(lockPath, "utf-8");
|
|
8797
9144
|
const lockPid = Number.parseInt(lockContent.trim(), 10);
|
|
8798
|
-
const lockStat = await
|
|
9145
|
+
const lockStat = await fs34.stat(lockPath);
|
|
8799
9146
|
const lockAgeMs = Date.now() - lockStat.mtimeMs;
|
|
8800
9147
|
const pidValid = !Number.isNaN(lockPid);
|
|
8801
9148
|
if (pidValid && !isProcessAlive(lockPid)) {
|
|
@@ -8810,10 +9157,10 @@ async function isLockStale(lockPath) {
|
|
|
8810
9157
|
}
|
|
8811
9158
|
}
|
|
8812
9159
|
async function tryAcquireLock(logDir) {
|
|
8813
|
-
await
|
|
8814
|
-
const lockPath =
|
|
9160
|
+
await fs34.mkdir(logDir, { recursive: true });
|
|
9161
|
+
const lockPath = path32.resolve(logDir, LOCK_FILENAME2);
|
|
8815
9162
|
try {
|
|
8816
|
-
await
|
|
9163
|
+
await fs34.writeFile(lockPath, String(process.pid), { flag: "wx" });
|
|
8817
9164
|
return true;
|
|
8818
9165
|
} catch (err) {
|
|
8819
9166
|
const isExist = typeof err === "object" && err !== null && "code" in err && err.code === "EEXIST";
|
|
@@ -8824,9 +9171,9 @@ async function tryAcquireLock(logDir) {
|
|
|
8824
9171
|
if (!stale) {
|
|
8825
9172
|
return false;
|
|
8826
9173
|
}
|
|
8827
|
-
await
|
|
9174
|
+
await fs34.rm(lockPath, { force: true });
|
|
8828
9175
|
try {
|
|
8829
|
-
await
|
|
9176
|
+
await fs34.writeFile(lockPath, String(process.pid), { flag: "wx" });
|
|
8830
9177
|
return true;
|
|
8831
9178
|
} catch {
|
|
8832
9179
|
return false;
|
|
@@ -8835,7 +9182,7 @@ async function tryAcquireLock(logDir) {
|
|
|
8835
9182
|
}
|
|
8836
9183
|
async function findLatestConsoleLog(logDir) {
|
|
8837
9184
|
try {
|
|
8838
|
-
const files = await
|
|
9185
|
+
const files = await fs34.readdir(logDir);
|
|
8839
9186
|
let maxNum = -1;
|
|
8840
9187
|
let latestFile = null;
|
|
8841
9188
|
for (const file of files) {
|
|
@@ -8851,7 +9198,7 @@ async function findLatestConsoleLog(logDir) {
|
|
|
8851
9198
|
}
|
|
8852
9199
|
}
|
|
8853
9200
|
}
|
|
8854
|
-
return latestFile ?
|
|
9201
|
+
return latestFile ? path32.join(logDir, latestFile) : null;
|
|
8855
9202
|
} catch {
|
|
8856
9203
|
return null;
|
|
8857
9204
|
}
|
|
@@ -9253,8 +9600,8 @@ function registerSkipCommand(program) {
|
|
|
9253
9600
|
});
|
|
9254
9601
|
}
|
|
9255
9602
|
// src/commands/start-hook.ts
|
|
9256
|
-
import
|
|
9257
|
-
import
|
|
9603
|
+
import fs35 from "node:fs/promises";
|
|
9604
|
+
import path33 from "node:path";
|
|
9258
9605
|
import YAML8 from "yaml";
|
|
9259
9606
|
var START_HOOK_MESSAGE = `<IMPORTANT>
|
|
9260
9607
|
This project uses Agent Gauntlet for automated quality verification.
|
|
@@ -9302,9 +9649,9 @@ function isValidConfig(content) {
|
|
|
9302
9649
|
}
|
|
9303
9650
|
function registerStartHookCommand(program) {
|
|
9304
9651
|
program.command("start-hook").description("Session start hook - primes agent with gauntlet verification instructions").option("--adapter <adapter>", "Output format: claude or cursor", "claude").action(async (options) => {
|
|
9305
|
-
const configPath =
|
|
9652
|
+
const configPath = path33.join(process.cwd(), ".gauntlet", "config.yml");
|
|
9306
9653
|
try {
|
|
9307
|
-
const content = await
|
|
9654
|
+
const content = await fs35.readFile(configPath, "utf-8");
|
|
9308
9655
|
if (!isValidConfig(content)) {
|
|
9309
9656
|
return;
|
|
9310
9657
|
}
|
|
@@ -9314,7 +9661,7 @@ function registerStartHookCommand(program) {
|
|
|
9314
9661
|
const adapter = options.adapter;
|
|
9315
9662
|
try {
|
|
9316
9663
|
const cwd = process.cwd();
|
|
9317
|
-
const logDir =
|
|
9664
|
+
const logDir = path33.join(cwd, await getLogDir2(cwd));
|
|
9318
9665
|
const globalConfig = await loadGlobalConfig();
|
|
9319
9666
|
const projectDebugLogConfig = await getDebugLogConfig(cwd);
|
|
9320
9667
|
const debugLogConfig = mergeDebugLogConfig(projectDebugLogConfig, globalConfig.debug_log);
|
|
@@ -9353,6 +9700,7 @@ registerRunCommand(program);
|
|
|
9353
9700
|
registerCheckCommand(program);
|
|
9354
9701
|
registerCICommand(program);
|
|
9355
9702
|
registerCleanCommand(program);
|
|
9703
|
+
registerReviewAuditCommand(program);
|
|
9356
9704
|
registerReviewCommand(program);
|
|
9357
9705
|
registerDetectCommand(program);
|
|
9358
9706
|
registerListCommand(program);
|
|
@@ -9369,4 +9717,4 @@ if (process.argv.length < 3) {
|
|
|
9369
9717
|
}
|
|
9370
9718
|
program.parse(process.argv);
|
|
9371
9719
|
|
|
9372
|
-
//# debugId=
|
|
9720
|
+
//# debugId=CAFA132ED75F0B9564756E2164756E21
|