agent-gauntlet 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -297,4 +297,4 @@ export {
297
297
  main
298
298
  };
299
299
 
300
- //# debugId=FF4F0EE650C6850464756E2164756E21
300
+ //# debugId=38D823BEF2803BA364756E2164756E21
@@ -5,6 +5,6 @@
5
5
  "#!/usr/bin/env node\n/**\n * Gauntlet Status Script\n *\n * Parses the configured log_dir (default: gauntlet_logs/) to produce a structured\n * summary of the most recent gauntlet session from the .debug.log, plus a file\n * inventory of all log/JSON files for further inspection.\n *\n * This script handles structured data only (debug log events). Detailed failure\n * analysis (reading individual check logs, review JSONs) is left to the caller\n * (the /gauntlet-status skill) since log formats vary by check type.\n */\n\nimport fs from 'node:fs';\nimport path from 'node:path';\n\n// --- Types ---\n\ninterface RunStart {\n timestamp: string;\n mode: string;\n baseRef?: string;\n filesChanged: number;\n linesAdded: number;\n linesRemoved: number;\n gates: number;\n}\n\ninterface GateResult {\n timestamp: string;\n gateId: string;\n cli?: string;\n status: string;\n duration: string;\n violations?: number;\n}\n\ninterface RunEnd {\n timestamp: string;\n status: string;\n fixed: number;\n skipped: number;\n failed: number;\n iterations: number;\n duration: string;\n}\n\ninterface StopHookEntry {\n timestamp: string;\n decision: string;\n reason: string;\n}\n\ninterface SessionRun {\n start: RunStart;\n gates: GateResult[];\n end?: RunEnd;\n stopHook?: StopHookEntry;\n}\n\n// --- Parsing helpers ---\n\nfunction parseKeyValue(text: string): Record<string, string> {\n const result: Record<string, string> = {};\n for (const match of text.matchAll(/(\\w+)=(\\S+)/g)) {\n const key = match[1];\n const value = match[2];\n if (key && value) result[key] = value;\n }\n return result;\n}\n\nfunction parseTimestamp(line: string): string {\n const m = line.match(/^\\[([^\\]]+)\\]/);\n return m?.[1] ?? '';\n}\n\nfunction parseEventType(line: string): string {\n const m = line.match(/^\\[[^\\]]+\\]\\s+(\\S+)/);\n return m?.[1] ?? '';\n}\n\nfunction parseEventBody(line: string): string {\n const m = line.match(/^\\[[^\\]]+\\]\\s+\\S+\\s*(.*)/);\n return m?.[1] ?? '';\n}\n\n// --- Debug log parsing ---\n\nfunction parseRunStart(ts: string, body: string): SessionRun {\n const kv = parseKeyValue(body);\n return {\n start: {\n timestamp: ts,\n mode: kv.mode ?? 'unknown',\n baseRef: kv.base_ref,\n filesChanged: Number(kv.files_changed ?? kv.changes ?? 0),\n linesAdded: Number(kv.lines_added ?? 0),\n linesRemoved: Number(kv.lines_removed ?? 0),\n gates: Number(kv.gates ?? 0),\n },\n gates: [],\n };\n}\n\nfunction parseGateResult(ts: string, body: string): GateResult {\n const gateIdMatch = body.match(/^(\\S+)/);\n const kv = parseKeyValue(body);\n return {\n timestamp: ts,\n gateId: gateIdMatch?.[1] ?? 'unknown',\n cli: kv.cli,\n status: kv.status ?? 'unknown',\n duration: kv.duration ?? '?',\n violations: kv.violations !== undefined ? Number(kv.violations) : undefined,\n };\n}\n\nfunction parseRunEnd(ts: string, body: string): RunEnd {\n const kv = parseKeyValue(body);\n return {\n timestamp: ts,\n status: kv.status ?? 'unknown',\n fixed: Number(kv.fixed ?? 0),\n skipped: Number(kv.skipped ?? 0),\n failed: Number(kv.failed ?? 0),\n iterations: Number(kv.iterations ?? 0),\n duration: kv.duration ?? '?',\n };\n}\n\nfunction parseStopHookEntry(ts: string, body: string): StopHookEntry {\n const kv = parseKeyValue(body);\n return {\n timestamp: ts,\n decision: kv.decision ?? 'unknown',\n reason: kv.reason ?? 'unknown',\n };\n}\n\nfunction isBeforeSession(\n ts: string,\n sessionStartTime: Date | undefined,\n): boolean {\n return sessionStartTime !== undefined && new Date(ts) < sessionStartTime;\n}\n\nfunction parseDebugLog(content: string, sessionStartTime?: Date): SessionRun[] {\n const lines = content.split('\\n').filter((l) => l.trim());\n const sessions: SessionRun[] = [];\n let current: SessionRun | null = null;\n\n for (const line of lines) {\n const event = parseEventType(line);\n const body = parseEventBody(line);\n const ts = parseTimestamp(line);\n\n switch (event) {\n case 'RUN_START':\n if (isBeforeSession(ts, sessionStartTime)) {\n current = null;\n } else {\n current = parseRunStart(ts, body);\n sessions.push(current);\n }\n break;\n case 'GATE_RESULT':\n if (current) current.gates.push(parseGateResult(ts, body));\n break;\n case 'RUN_END':\n if (current) current.end = parseRunEnd(ts, body);\n break;\n case 'STOP_HOOK':\n if (current) current.stopHook = parseStopHookEntry(ts, body);\n break;\n }\n }\n\n return sessions;\n}\n\n/**\n * Find the earliest mtime of non-hidden log files in the directory.\n * This marks the start of the current session.\n */\nfunction getSessionStartTime(logDir: string): Date | undefined {\n const entries = fs\n .readdirSync(logDir)\n .filter((f) => !f.startsWith('.') && f !== 'previous');\n let earliest: number | undefined;\n for (const entry of entries) {\n const mtime = fs.statSync(path.join(logDir, entry)).mtimeMs;\n if (earliest === undefined || mtime < earliest) {\n earliest = mtime;\n }\n }\n return earliest !== undefined ? new Date(earliest) : undefined;\n}\n\n// --- File inventory ---\n\nfunction formatFileInventory(logDir: string): string[] {\n const lines: string[] = [];\n const entries = fs\n .readdirSync(logDir)\n .filter((f) => !f.startsWith('.') && f !== 'previous');\n if (entries.length === 0) return lines;\n\n const checks: string[] = [];\n const reviews: string[] = [];\n const other: string[] = [];\n\n for (const entry of entries.sort()) {\n const fullPath = path.join(logDir, entry);\n const stat = fs.statSync(fullPath);\n const sizeKB = (stat.size / 1024).toFixed(1);\n const line = `- ${fullPath} (${sizeKB} KB)`;\n\n if (entry.startsWith('review_')) {\n reviews.push(line);\n } else if (entry.startsWith('check_')) {\n checks.push(line);\n } else {\n other.push(line);\n }\n }\n\n lines.push('### Log Files');\n lines.push('');\n if (checks.length > 0) {\n lines.push('**Check logs:**');\n lines.push(...checks);\n }\n if (reviews.length > 0) {\n lines.push('**Review logs/JSON:**');\n lines.push(...reviews);\n }\n if (other.length > 0) {\n lines.push('**Other:**');\n lines.push(...other);\n }\n lines.push('');\n\n return lines;\n}\n\n// --- Summary output ---\n\nfunction formatStatusLine(end: RunEnd): string {\n if (end.status === 'pass') return 'PASSED';\n if (end.status === 'fail') return 'FAILED';\n return end.status.toUpperCase();\n}\n\nfunction formatAllRuns(sessions: SessionRun[]): string[] {\n const lines: string[] = [];\n lines.push('### All Runs in Session');\n lines.push('');\n for (let i = 0; i < sessions.length; i++) {\n const s = sessions[i];\n if (!s) continue;\n const status = s.end ? s.end.status : 'in-progress';\n const duration = s.end ? s.end.duration : '?';\n lines.push(\n `${i + 1}. [${s.start.timestamp}] mode=${s.start.mode} status=${status} duration=${duration}`,\n );\n }\n lines.push('');\n return lines;\n}\n\nfunction formatSession(sessions: SessionRun[], logDir: string): string {\n if (sessions.length === 0) {\n return 'No gauntlet runs found in logs.';\n }\n\n const lastComplete = [...sessions].reverse().find((s) => s.end);\n const session = lastComplete ?? sessions[sessions.length - 1];\n if (!session) return 'No gauntlet runs found in logs.';\n\n const lines: string[] = [];\n\n // Header\n lines.push('## Gauntlet Session Summary');\n lines.push('');\n\n // Overall status\n if (session.end) {\n lines.push(`**Status:** ${formatStatusLine(session.end)}`);\n lines.push(`**Iterations:** ${session.end.iterations}`);\n lines.push(`**Duration:** ${session.end.duration}`);\n lines.push(\n `**Fixed:** ${session.end.fixed} | **Skipped:** ${session.end.skipped} | **Failed:** ${session.end.failed}`,\n );\n } else {\n lines.push('**Status:** In Progress (no RUN_END found)');\n }\n lines.push('');\n\n // Diff stats\n lines.push('### Diff Stats');\n lines.push(`- Mode: ${session.start.mode}`);\n if (session.start.baseRef) {\n lines.push(`- Base ref: ${session.start.baseRef}`);\n }\n lines.push(`- Files changed: ${session.start.filesChanged}`);\n lines.push(\n `- Lines: +${session.start.linesAdded} / -${session.start.linesRemoved}`,\n );\n lines.push(`- Gates: ${session.start.gates}`);\n lines.push('');\n\n // Gate results\n lines.push('### Gate Results');\n lines.push('');\n lines.push('| Gate | CLI | Status | Duration | Violations |');\n lines.push('|------|-----|--------|----------|------------|');\n for (const gate of session.gates) {\n const violations =\n gate.violations !== undefined ? String(gate.violations) : '-';\n const statusIcon = gate.status === 'pass' ? 'pass' : 'FAIL';\n lines.push(\n `| ${gate.gateId} | ${gate.cli ?? '-'} | ${statusIcon} | ${gate.duration} | ${violations} |`,\n );\n }\n lines.push('');\n\n // Stop hook\n if (session.stopHook) {\n lines.push('### Stop Hook');\n lines.push(`- Decision: ${session.stopHook.decision}`);\n lines.push(`- Reason: ${session.stopHook.reason}`);\n lines.push('');\n }\n\n // File inventory\n lines.push(...formatFileInventory(logDir));\n\n // All sessions summary (if multiple runs)\n if (sessions.length > 1) {\n lines.push(...formatAllRuns(sessions));\n }\n\n return lines.join('\\n');\n}\n\n// --- Main ---\n\n/**\n * Read the configured log_dir from .gauntlet/config.yml.\n * Falls back to \"gauntlet_logs\" if not found.\n */\nfunction getLogDir(cwd: string): string {\n const configPath = path.join(cwd, '.gauntlet', 'config.yml');\n try {\n const content = fs.readFileSync(configPath, 'utf-8');\n const match = content.match(/^log_dir:\\s*(.+)$/m);\n if (match?.[1]) return match[1].trim();\n } catch {\n // Config not found — use default\n }\n return 'gauntlet_logs';\n}\n\n/**\n * Resolve the log directory and debug log path.\n * Returns null if no logs are found (after printing a message).\n */\nfunction resolveLogPaths(\n activeDir: string,\n): { logDir: string; debugLogPath: string } | null {\n const previousDir = path.join(activeDir, 'previous');\n const debugLogPath = path.join(activeDir, '.debug.log');\n\n // Check active directory first for non-debug log files\n const activeHasLogs =\n fs.existsSync(activeDir) &&\n fs\n .readdirSync(activeDir)\n .some((f) => !f.startsWith('.') && f !== 'previous');\n\n if (activeHasLogs) {\n return { logDir: activeDir, debugLogPath };\n }\n\n if (!fs.existsSync(previousDir)) {\n console.log('No gauntlet_logs directory found.');\n return null;\n }\n\n // Fall back to previous directory — cleanLogs archives files directly here\n const logDir = resolvePreviousLogDir(previousDir);\n if (!logDir) return null;\n\n // Debug log stays in the main gauntlet_logs dir, not in previous/\n return { logDir, debugLogPath };\n}\n\nfunction resolvePreviousLogDir(previousDir: string): string | null {\n const prevEntries = fs.readdirSync(previousDir);\n const hasDirectFiles = prevEntries.some(\n (f) => f.endsWith('.log') || f.endsWith('.json'),\n );\n\n if (hasDirectFiles) return previousDir;\n\n // Legacy: check for timestamped subdirectories\n const prevDirs = prevEntries\n .map((d) => path.join(previousDir, d))\n .filter((d) => fs.statSync(d).isDirectory())\n .sort()\n .reverse();\n\n if (prevDirs.length === 0) {\n console.log('No gauntlet logs found.');\n return null;\n }\n\n return prevDirs[0] as string;\n}\n\nexport function main(): void {\n const cwd = process.cwd();\n const logDirName = getLogDir(cwd);\n const activeDir = path.join(cwd, logDirName);\n\n const paths = resolveLogPaths(activeDir);\n if (!paths) {\n process.exit(0);\n }\n\n // Parse debug log, filtering to current session based on log file timestamps\n let sessions: SessionRun[] = [];\n if (fs.existsSync(paths.debugLogPath)) {\n const debugContent = fs.readFileSync(paths.debugLogPath, 'utf-8');\n const sessionStart = getSessionStartTime(paths.logDir);\n sessions = parseDebugLog(debugContent, sessionStart);\n }\n\n // Format and output\n const output = formatSession(sessions, paths.logDir);\n console.log(output);\n}\n\n// Auto-execute when run directly (e.g., `bun src/scripts/status.ts`\n// or `node dist/scripts/status.js`). The filename check prevents\n// this from triggering when the module is bundled into dist/index.js.\nconst isDirectRun =\n (import.meta.url === `file://${process.argv[1]}` ||\n (typeof Bun !== 'undefined' && import.meta.url === `file://${Bun.main}`)) &&\n (process.argv[1]?.endsWith('status.ts') ||\n process.argv[1]?.endsWith('status.js'));\nif (isDirectRun) {\n main();\n}\n"
6
6
  ],
7
7
  "mappings": ";;;;;AAaA;AACA;AAgDA,SAAS,aAAa,CAAC,MAAsC;AAAA,EAC3D,MAAM,SAAiC,CAAC;AAAA,EACxC,WAAW,SAAS,KAAK,SAAS,cAAc,GAAG;AAAA,IACjD,MAAM,MAAM,MAAM;AAAA,IAClB,MAAM,QAAQ,MAAM;AAAA,IACpB,IAAI,OAAO;AAAA,MAAO,OAAO,OAAO;AAAA,EAClC;AAAA,EACA,OAAO;AAAA;AAGT,SAAS,cAAc,CAAC,MAAsB;AAAA,EAC5C,MAAM,IAAI,KAAK,MAAM,eAAe;AAAA,EACpC,OAAO,IAAI,MAAM;AAAA;AAGnB,SAAS,cAAc,CAAC,MAAsB;AAAA,EAC5C,MAAM,IAAI,KAAK,MAAM,qBAAqB;AAAA,EAC1C,OAAO,IAAI,MAAM;AAAA;AAGnB,SAAS,cAAc,CAAC,MAAsB;AAAA,EAC5C,MAAM,IAAI,KAAK,MAAM,0BAA0B;AAAA,EAC/C,OAAO,IAAI,MAAM;AAAA;AAKnB,SAAS,aAAa,CAAC,IAAY,MAA0B;AAAA,EAC3D,MAAM,KAAK,cAAc,IAAI;AAAA,EAC7B,OAAO;AAAA,IACL,OAAO;AAAA,MACL,WAAW;AAAA,MACX,MAAM,GAAG,QAAQ;AAAA,MACjB,SAAS,GAAG;AAAA,MACZ,cAAc,OAAO,GAAG,iBAAiB,GAAG,WAAW,CAAC;AAAA,MACxD,YAAY,OAAO,GAAG,eAAe,CAAC;AAAA,MACtC,cAAc,OAAO,GAAG,iBAAiB,CAAC;AAAA,MAC1C,OAAO,OAAO,GAAG,SAAS,CAAC;AAAA,IAC7B;AAAA,IACA,OAAO,CAAC;AAAA,EACV;AAAA;AAGF,SAAS,eAAe,CAAC,IAAY,MAA0B;AAAA,EAC7D,MAAM,cAAc,KAAK,MAAM,QAAQ;AAAA,EACvC,MAAM,KAAK,cAAc,IAAI;AAAA,EAC7B,OAAO;AAAA,IACL,WAAW;AAAA,IACX,QAAQ,cAAc,MAAM;AAAA,IAC5B,KAAK,GAAG;AAAA,IACR,QAAQ,GAAG,UAAU;AAAA,IACrB,UAAU,GAAG,YAAY;AAAA,IACzB,YAAY,GAAG,eAAe,YAAY,OAAO,GAAG,UAAU,IAAI;AAAA,EACpE;AAAA;AAGF,SAAS,WAAW,CAAC,IAAY,MAAsB;AAAA,EACrD,MAAM,KAAK,cAAc,IAAI;AAAA,EAC7B,OAAO;AAAA,IACL,WAAW;AAAA,IACX,QAAQ,GAAG,UAAU;AAAA,IACrB,OAAO,OAAO,GAAG,SAAS,CAAC;AAAA,IAC3B,SAAS,OAAO,GAAG,WAAW,CAAC;AAAA,IAC/B,QAAQ,OAAO,GAAG,UAAU,CAAC;AAAA,IAC7B,YAAY,OAAO,GAAG,cAAc,CAAC;AAAA,IACrC,UAAU,GAAG,YAAY;AAAA,EAC3B;AAAA;AAGF,SAAS,kBAAkB,CAAC,IAAY,MAA6B;AAAA,EACnE,MAAM,KAAK,cAAc,IAAI;AAAA,EAC7B,OAAO;AAAA,IACL,WAAW;AAAA,IACX,UAAU,GAAG,YAAY;AAAA,IACzB,QAAQ,GAAG,UAAU;AAAA,EACvB;AAAA;AAGF,SAAS,eAAe,CACtB,IACA,kBACS;AAAA,EACT,OAAO,qBAAqB,aAAa,IAAI,KAAK,EAAE,IAAI;AAAA;AAG1D,SAAS,aAAa,CAAC,SAAiB,kBAAuC;AAAA,EAC7E,MAAM,QAAQ,QAAQ,MAAM;AAAA,CAAI,EAAE,OAAO,CAAC,MAAM,EAAE,KAAK,CAAC;AAAA,EACxD,MAAM,WAAyB,CAAC;AAAA,EAChC,IAAI,UAA6B;AAAA,EAEjC,WAAW,QAAQ,OAAO;AAAA,IACxB,MAAM,QAAQ,eAAe,IAAI;AAAA,IACjC,MAAM,OAAO,eAAe,IAAI;AAAA,IAChC,MAAM,KAAK,eAAe,IAAI;AAAA,IAE9B,QAAQ;AAAA,WACD;AAAA,QACH,IAAI,gBAAgB,IAAI,gBAAgB,GAAG;AAAA,UACzC,UAAU;AAAA,QACZ,EAAO;AAAA,UACL,UAAU,cAAc,IAAI,IAAI;AAAA,UAChC,SAAS,KAAK,OAAO;AAAA;AAAA,QAEvB;AAAA,WACG;AAAA,QACH,IAAI;AAAA,UAAS,QAAQ,MAAM,KAAK,gBAAgB,IAAI,IAAI,CAAC;AAAA,QACzD;AAAA,WACG;AAAA,QACH,IAAI;AAAA,UAAS,QAAQ,MAAM,YAAY,IAAI,IAAI;AAAA,QAC/C;AAAA,WACG;AAAA,QACH,IAAI;AAAA,UAAS,QAAQ,WAAW,mBAAmB,IAAI,IAAI;AAAA,QAC3D;AAAA;AAAA,EAEN;AAAA,EAEA,OAAO;AAAA;AAOT,SAAS,mBAAmB,CAAC,QAAkC;AAAA,EAC7D,MAAM,UAAU,GACb,YAAY,MAAM,EAClB,OAAO,CAAC,MAAM,CAAC,EAAE,WAAW,GAAG,KAAK,MAAM,UAAU;AAAA,EACvD,IAAI;AAAA,EACJ,WAAW,SAAS,SAAS;AAAA,IAC3B,MAAM,QAAQ,GAAG,SAAS,KAAK,KAAK,QAAQ,KAAK,CAAC,EAAE;AAAA,IACpD,IAAI,aAAa,aAAa,QAAQ,UAAU;AAAA,MAC9C,WAAW;AAAA,IACb;AAAA,EACF;AAAA,EACA,OAAO,aAAa,YAAY,IAAI,KAAK,QAAQ,IAAI;AAAA;AAKvD,SAAS,mBAAmB,CAAC,QAA0B;AAAA,EACrD,MAAM,QAAkB,CAAC;AAAA,EACzB,MAAM,UAAU,GACb,YAAY,MAAM,EAClB,OAAO,CAAC,MAAM,CAAC,EAAE,WAAW,GAAG,KAAK,MAAM,UAAU;AAAA,EACvD,IAAI,QAAQ,WAAW;AAAA,IAAG,OAAO;AAAA,EAEjC,MAAM,SAAmB,CAAC;AAAA,EAC1B,MAAM,UAAoB,CAAC;AAAA,EAC3B,MAAM,QAAkB,CAAC;AAAA,EAEzB,WAAW,SAAS,QAAQ,KAAK,GAAG;AAAA,IAClC,MAAM,WAAW,KAAK,KAAK,QAAQ,KAAK;AAAA,IACxC,MAAM,OAAO,GAAG,SAAS,QAAQ;AAAA,IACjC,MAAM,UAAU,KAAK,OAAO,MAAM,QAAQ,CAAC;AAAA,IAC3C,MAAM,OAAO,KAAK,aAAa;AAAA,IAE/B,IAAI,MAAM,WAAW,SAAS,GAAG;AAAA,MAC/B,QAAQ,KAAK,IAAI;AAAA,IACnB,EAAO,SAAI,MAAM,WAAW,QAAQ,GAAG;AAAA,MACrC,OAAO,KAAK,IAAI;AAAA,IAClB,EAAO;AAAA,MACL,MAAM,KAAK,IAAI;AAAA;AAAA,EAEnB;AAAA,EAEA,MAAM,KAAK,eAAe;AAAA,EAC1B,MAAM,KAAK,EAAE;AAAA,EACb,IAAI,OAAO,SAAS,GAAG;AAAA,IACrB,MAAM,KAAK,iBAAiB;AAAA,IAC5B,MAAM,KAAK,GAAG,MAAM;AAAA,EACtB;AAAA,EACA,IAAI,QAAQ,SAAS,GAAG;AAAA,IACtB,MAAM,KAAK,uBAAuB;AAAA,IAClC,MAAM,KAAK,GAAG,OAAO;AAAA,EACvB;AAAA,EACA,IAAI,MAAM,SAAS,GAAG;AAAA,IACpB,MAAM,KAAK,YAAY;AAAA,IACvB,MAAM,KAAK,GAAG,KAAK;AAAA,EACrB;AAAA,EACA,MAAM,KAAK,EAAE;AAAA,EAEb,OAAO;AAAA;AAKT,SAAS,gBAAgB,CAAC,KAAqB;AAAA,EAC7C,IAAI,IAAI,WAAW;AAAA,IAAQ,OAAO;AAAA,EAClC,IAAI,IAAI,WAAW;AAAA,IAAQ,OAAO;AAAA,EAClC,OAAO,IAAI,OAAO,YAAY;AAAA;AAGhC,SAAS,aAAa,CAAC,UAAkC;AAAA,EACvD,MAAM,QAAkB,CAAC;AAAA,EACzB,MAAM,KAAK,yBAAyB;AAAA,EACpC,MAAM,KAAK,EAAE;AAAA,EACb,SAAS,IAAI,EAAG,IAAI,SAAS,QAAQ,KAAK;AAAA,IACxC,MAAM,IAAI,SAAS;AAAA,IACnB,IAAI,CAAC;AAAA,MAAG;AAAA,IACR,MAAM,SAAS,EAAE,MAAM,EAAE,IAAI,SAAS;AAAA,IACtC,MAAM,WAAW,EAAE,MAAM,EAAE,IAAI,WAAW;AAAA,IAC1C,MAAM,KACJ,GAAG,IAAI,OAAO,EAAE,MAAM,mBAAmB,EAAE,MAAM,eAAe,mBAAmB,UACrF;AAAA,EACF;AAAA,EACA,MAAM,KAAK,EAAE;AAAA,EACb,OAAO;AAAA;AAGT,SAAS,aAAa,CAAC,UAAwB,QAAwB;AAAA,EACrE,IAAI,SAAS,WAAW,GAAG;AAAA,IACzB,OAAO;AAAA,EACT;AAAA,EAEA,MAAM,eAAe,CAAC,GAAG,QAAQ,EAAE,QAAQ,EAAE,KAAK,CAAC,MAAM,EAAE,GAAG;AAAA,EAC9D,MAAM,UAAU,gBAAgB,SAAS,SAAS,SAAS;AAAA,EAC3D,IAAI,CAAC;AAAA,IAAS,OAAO;AAAA,EAErB,MAAM,QAAkB,CAAC;AAAA,EAGzB,MAAM,KAAK,6BAA6B;AAAA,EACxC,MAAM,KAAK,EAAE;AAAA,EAGb,IAAI,QAAQ,KAAK;AAAA,IACf,MAAM,KAAK,eAAe,iBAAiB,QAAQ,GAAG,GAAG;AAAA,IACzD,MAAM,KAAK,mBAAmB,QAAQ,IAAI,YAAY;AAAA,IACtD,MAAM,KAAK,iBAAiB,QAAQ,IAAI,UAAU;AAAA,IAClD,MAAM,KACJ,cAAc,QAAQ,IAAI,wBAAwB,QAAQ,IAAI,yBAAyB,QAAQ,IAAI,QACrG;AAAA,EACF,EAAO;AAAA,IACL,MAAM,KAAK,4CAA4C;AAAA;AAAA,EAEzD,MAAM,KAAK,EAAE;AAAA,EAGb,MAAM,KAAK,gBAAgB;AAAA,EAC3B,MAAM,KAAK,WAAW,QAAQ,MAAM,MAAM;AAAA,EAC1C,IAAI,QAAQ,MAAM,SAAS;AAAA,IACzB,MAAM,KAAK,eAAe,QAAQ,MAAM,SAAS;AAAA,EACnD;AAAA,EACA,MAAM,KAAK,oBAAoB,QAAQ,MAAM,cAAc;AAAA,EAC3D,MAAM,KACJ,aAAa,QAAQ,MAAM,iBAAiB,QAAQ,MAAM,cAC5D;AAAA,EACA,MAAM,KAAK,YAAY,QAAQ,MAAM,OAAO;AAAA,EAC5C,MAAM,KAAK,EAAE;AAAA,EAGb,MAAM,KAAK,kBAAkB;AAAA,EAC7B,MAAM,KAAK,EAAE;AAAA,EACb,MAAM,KAAK,iDAAiD;AAAA,EAC5D,MAAM,KAAK,iDAAiD;AAAA,EAC5D,WAAW,QAAQ,QAAQ,OAAO;AAAA,IAChC,MAAM,aACJ,KAAK,eAAe,YAAY,OAAO,KAAK,UAAU,IAAI;AAAA,IAC5D,MAAM,aAAa,KAAK,WAAW,SAAS,SAAS;AAAA,IACrD,MAAM,KACJ,KAAK,KAAK,YAAY,KAAK,OAAO,SAAS,gBAAgB,KAAK,cAAc,cAChF;AAAA,EACF;AAAA,EACA,MAAM,KAAK,EAAE;AAAA,EAGb,IAAI,QAAQ,UAAU;AAAA,IACpB,MAAM,KAAK,eAAe;AAAA,IAC1B,MAAM,KAAK,eAAe,QAAQ,SAAS,UAAU;AAAA,IACrD,MAAM,KAAK,aAAa,QAAQ,SAAS,QAAQ;AAAA,IACjD,MAAM,KAAK,EAAE;AAAA,EACf;AAAA,EAGA,MAAM,KAAK,GAAG,oBAAoB,MAAM,CAAC;AAAA,EAGzC,IAAI,SAAS,SAAS,GAAG;AAAA,IACvB,MAAM,KAAK,GAAG,cAAc,QAAQ,CAAC;AAAA,EACvC;AAAA,EAEA,OAAO,MAAM,KAAK;AAAA,CAAI;AAAA;AASxB,SAAS,SAAS,CAAC,KAAqB;AAAA,EACtC,MAAM,aAAa,KAAK,KAAK,KAAK,aAAa,YAAY;AAAA,EAC3D,IAAI;AAAA,IACF,MAAM,UAAU,GAAG,aAAa,YAAY,OAAO;AAAA,IACnD,MAAM,QAAQ,QAAQ,MAAM,oBAAoB;AAAA,IAChD,IAAI,QAAQ;AAAA,MAAI,OAAO,MAAM,GAAG,KAAK;AAAA,IACrC,MAAM;AAAA,EAGR,OAAO;AAAA;AAOT,SAAS,eAAe,CACtB,WACiD;AAAA,EACjD,MAAM,cAAc,KAAK,KAAK,WAAW,UAAU;AAAA,EACnD,MAAM,eAAe,KAAK,KAAK,WAAW,YAAY;AAAA,EAGtD,MAAM,gBACJ,GAAG,WAAW,SAAS,KACvB,GACG,YAAY,SAAS,EACrB,KAAK,CAAC,MAAM,CAAC,EAAE,WAAW,GAAG,KAAK,MAAM,UAAU;AAAA,EAEvD,IAAI,eAAe;AAAA,IACjB,OAAO,EAAE,QAAQ,WAAW,aAAa;AAAA,EAC3C;AAAA,EAEA,IAAI,CAAC,GAAG,WAAW,WAAW,GAAG;AAAA,IAC/B,QAAQ,IAAI,mCAAmC;AAAA,IAC/C,OAAO;AAAA,EACT;AAAA,EAGA,MAAM,SAAS,sBAAsB,WAAW;AAAA,EAChD,IAAI,CAAC;AAAA,IAAQ,OAAO;AAAA,EAGpB,OAAO,EAAE,QAAQ,aAAa;AAAA;AAGhC,SAAS,qBAAqB,CAAC,aAAoC;AAAA,EACjE,MAAM,cAAc,GAAG,YAAY,WAAW;AAAA,EAC9C,MAAM,iBAAiB,YAAY,KACjC,CAAC,MAAM,EAAE,SAAS,MAAM,KAAK,EAAE,SAAS,OAAO,CACjD;AAAA,EAEA,IAAI;AAAA,IAAgB,OAAO;AAAA,EAG3B,MAAM,WAAW,YACd,IAAI,CAAC,MAAM,KAAK,KAAK,aAAa,CAAC,CAAC,EACpC,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,EAAE,YAAY,CAAC,EAC1C,KAAK,EACL,QAAQ;AAAA,EAEX,IAAI,SAAS,WAAW,GAAG;AAAA,IACzB,QAAQ,IAAI,yBAAyB;AAAA,IACrC,OAAO;AAAA,EACT;AAAA,EAEA,OAAO,SAAS;AAAA;AAGX,SAAS,IAAI,GAAS;AAAA,EAC3B,MAAM,MAAM,QAAQ,IAAI;AAAA,EACxB,MAAM,aAAa,UAAU,GAAG;AAAA,EAChC,MAAM,YAAY,KAAK,KAAK,KAAK,UAAU;AAAA,EAE3C,MAAM,QAAQ,gBAAgB,SAAS;AAAA,EACvC,IAAI,CAAC,OAAO;AAAA,IACV,QAAQ,KAAK,CAAC;AAAA,EAChB;AAAA,EAGA,IAAI,WAAyB,CAAC;AAAA,EAC9B,IAAI,GAAG,WAAW,MAAM,YAAY,GAAG;AAAA,IACrC,MAAM,eAAe,GAAG,aAAa,MAAM,cAAc,OAAO;AAAA,IAChE,MAAM,eAAe,oBAAoB,MAAM,MAAM;AAAA,IACrD,WAAW,cAAc,cAAc,YAAY;AAAA,EACrD;AAAA,EAGA,MAAM,SAAS,cAAc,UAAU,MAAM,MAAM;AAAA,EACnD,QAAQ,IAAI,MAAM;AAAA;AAMpB,IAAM,eACH,YAAY,QAAQ,UAAU,QAAQ,KAAK,QACzC,OAAO,QAAQ,eAAe,YAAY,QAAQ,UAAU,IAAI,YAClE,QAAQ,KAAK,IAAI,SAAS,WAAW,KACpC,QAAQ,KAAK,IAAI,SAAS,WAAW;AACzC,IAAI,aAAa;AAAA,EACf,KAAK;AACP;",
8
- "debugId": "FF4F0EE650C6850464756E2164756E21",
8
+ "debugId": "38D823BEF2803BA364756E2164756E21",
9
9
  "names": []
10
10
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-gauntlet",
3
- "version": "1.0.0",
3
+ "version": "1.2.0",
4
4
  "description": "A CLI tool for testing AI coding agents",
5
5
  "license": "MIT",
6
6
  "author": "Paul Caplan",
@@ -3,20 +3,55 @@ name: gauntlet-check
3
3
  description: >-
4
4
  Run checks only (no reviews)
5
5
  disable-model-invocation: true
6
- allowed-tools: Bash
6
+ allowed-tools: Bash, Task
7
7
  ---
8
8
 
9
9
  # /gauntlet-check
10
10
  Run the gauntlet checks only — no AI reviews.
11
11
 
12
- 1. Run `agent-gauntlet clean` to archive any previous log files
13
- 2. Run `agent-gauntlet check`
14
- 3. If any checks fail:
15
- - Read the `.log` file path provided in the output for each failed check. If the log contains a `--- Fix Instructions ---` section, follow those instructions. If it contains a `--- Fix Skill: <name> ---` section, invoke that skill.
16
- - Fix the issues found.
17
- 4. Run `agent-gauntlet check` again to verify your fixes. Do NOT run `agent-gauntlet clean` between retries.
18
- 5. Repeat steps 3-4 until all checks pass or you've made 3 attempts.
19
- 6. Provide a summary of the session:
20
- - Checks Passed: (list)
21
- - Checks Failed: (list with brief reason)
22
- - Fixes Applied: (list key fixes)
12
+
13
+ ## Procedure
14
+
15
+ ### Step 1 - Clean Logs
16
+
17
+ Run `agent-gauntlet clean` to archive any previous log files.
18
+
19
+ ### Step 2 - Run Checks
20
+
21
+ Run `agent-gauntlet check` using `Bash` with `timeout: 300000`. **ALWAYS wait for and read the full command output** before proceeding. **Verify you can see a `Status:` line in the output before continuing.**
22
+
23
+ ### Step 3 - Check Status
24
+
25
+ **NEVER assume success** — you must see an explicit `Status:` line before continuing. Check it and route accordingly:
26
+ - `Status: Passed` → Go to Step 7.
27
+ - `Status: Passed with warnings` → Go to Step 7.
28
+ - `Status: Failed` → Continue to Step 4. **You MUST continue — do not stop here.**
29
+ - `Status: Retry limit exceeded` → Run `agent-gauntlet clean` to archive logs. Go to Step 7.
30
+ - No status line visible → **Known issue:** Bun can drop all stdout/stderr. Read the console log file to get the status: find the latest `console.*.log` in the gauntlet log directory (e.g., `gauntlet_logs/console.1.log`) and look for the `Status:` line there. If no console log is found there, also check `gauntlet_logs/previous/` for logs from the most recent archived run. If no console log exists in either location, the command may have timed out or failed to run — re-run with a longer timeout or investigate the error. Do NOT proceed as if it passed.
31
+
32
+ ### Step 4 - Extract Failures
33
+
34
+ Required when status is Failed:
35
+ - Infer the log directory from the file paths in the console output (e.g., if output references `gauntlet_logs/check_._lint.1.log`, the log directory is `gauntlet_logs/`)
36
+ - Read `extract-prompt.md` from this skill's directory
37
+ - **Extract log failures** using the first available strategy:
38
+ a. **Task tool** (Claude Code): `Task` with `subagent_type="general-purpose"`, `model="haiku"`, `prompt=` extract-prompt content + `"\n\nLog directory: <inferred path>"`. **Task calls MUST be synchronous** — NEVER use `run_in_background: true`.
39
+ b. **Subagent delegation**: If your environment supports delegating work to a subagent but not the Task tool, delegate the extract-prompt instructions with the log directory to a subagent for processing.
40
+ c. **Inline fallback**: If no subagent capability is available, follow the extract-prompt instructions yourself to read the log files and produce the compact failure summary.
41
+
42
+ ### Step 5 - Fix
43
+
44
+ Execute the fixes for all failed checks:
45
+ - CHECK failures with Fix Skill: invoke the named skill
46
+ - CHECK failures with Fix Instructions: follow the instructions
47
+
48
+ ### Step 6 - Re-run Verification
49
+
50
+ **NEVER skip this step** — if the run failed, you MUST fix and re-run. Run `agent-gauntlet check` again with `Bash` and `timeout: 300000`. Do NOT run `agent-gauntlet clean` between retries. The tool detects existing logs and automatically switches to verification mode. **Go back to Step 3** to check the status line and repeat.
51
+
52
+ ### Step 7 - Summarize Session
53
+
54
+ Provide a summary of the session:
55
+ - Final Status: (Passed / Passed with warnings / Retry limit exceeded)
56
+ - Checks Fixed: (list key fixes)
57
+ - Outstanding Failures: (if retry limit exceeded, list unverified fixes and remaining issues)
@@ -0,0 +1,73 @@
1
+ ---
2
+ name: gauntlet-commit
3
+ description: >-
4
+ Gates commits behind optional gauntlet validation by detecting changes, running selected validations, handling failures, and completing the commit flow.
5
+ Activates when requests include "commit with gauntlet", "run checks before commit", "run gauntlet then commit", or "skip gauntlet and commit".
6
+ disable-model-invocation: false
7
+ allowed-tools: Bash, Task
8
+ ---
9
+
10
+ # /gauntlet-commit $ARGUMENTS
11
+
12
+ Commit with optional gauntlet validation. Runs `agent-gauntlet detect` first, validates based on intent (full run, checks only, or skip), handles failures, then commits.
13
+
14
+ ## Step 1 - Detect Changes
15
+
16
+ Run `agent-gauntlet detect` using `Bash`:
17
+
18
+ ```bash
19
+ agent-gauntlet detect 2>&1
20
+ ```
21
+
22
+ - If no changed files are reported → **skip to Step 4** (commit directly, no validation needed)
23
+ - If changed files are reported → continue to Step 2
24
+
25
+ ## Step 2 - Determine Validation Intent
26
+
27
+ Parse `$ARGUMENTS` for a validation intent. Do not prompt the user if a clear intent is found.
28
+
29
+ | ARGUMENTS pattern | Action |
30
+ |-------------------|--------|
31
+ | Contains "run", "full", or "all gates" | Invoke `/gauntlet-run` (Step 3a) |
32
+ | Contains "check" or "checks" | Invoke `/gauntlet-check` (Step 3b) |
33
+ | Contains "skip" | Run `agent-gauntlet skip 2>&1` (Step 3c), then go to Step 4 |
34
+ | Empty or no clear intent | Present the three choices below to the user, wait for selection |
35
+
36
+ **When prompting the user**, present these choices:
37
+
38
+ 1. **Run all gates** — full validation (checks + reviews)
39
+ 2. **Run checks only** — checks without AI reviews
40
+ 3. **Skip gauntlet** — advance baseline without running any gates
41
+
42
+ Then proceed to the step matching the user's selection.
43
+
44
+ ## Step 3a - Full Validation (gauntlet-run)
45
+
46
+ Invoke `/gauntlet-run`.
47
+
48
+ - If it passes → go to Step 4
49
+ - If it fails → the `/gauntlet-run` skill handles fixing and re-running. After that skill completes, ask the user: **"Ready to commit?"**. Proceed to Step 4 only on confirmation.
50
+
51
+ ## Step 3b - Checks-Only Validation (gauntlet-check)
52
+
53
+ Invoke `/gauntlet-check`.
54
+
55
+ - If it passes → go to Step 4
56
+ - If it fails → the `/gauntlet-check` skill handles fixing and re-running. After that skill completes, ask the user: **"Ready to commit?"**. Proceed to Step 4 only on confirmation.
57
+
58
+ ## Step 3c - Skip Validation
59
+
60
+ Run:
61
+
62
+ ```bash
63
+ agent-gauntlet skip 2>&1
64
+ ```
65
+
66
+ Report the command output to the user, then go to Step 4.
67
+
68
+ ## Step 4 - Commit
69
+
70
+ Check whether you have a skill for committing git changes available (excluding `gauntlet-commit` itself to avoid self-invocation).
71
+
72
+ - **If a commit skill is found** → invoke that skill to perform the commit
73
+ - **If no commit skill is found** → stage all tracked changes, propose a commit message following the conventional commits format (`<type>: <description>`), then run `git commit -m "<message>"`
@@ -87,3 +87,12 @@ Downgrade confidence when:
87
87
 
88
88
  ### Next Steps
89
89
  Actionable recommendations for the user. If confidence is not high, suggest what additional evidence would confirm the diagnosis.
90
+
91
+ ## Bug Filing
92
+
93
+ After completing your diagnosis, apply the following routing logic to determine whether to file a GitHub issue:
94
+
95
+ - **High confidence + bug indicated** (evidence points to a defect in agent-gauntlet, not a configuration issue, user error, or expected behavior): Automatically invoke `gauntlet-issue` with `--auto-file <diagnosis summary>` as arguments. This passes the diagnosis summary as the bug description and skips the interactive confirmation — the issue is filed immediately after showing the draft.
96
+ - **High confidence + not a bug** (diagnosis concludes configuration issue, user error, or expected behavior): Do nothing. Do not invoke `gauntlet-issue`.
97
+ - **Medium confidence + possible bug** (evidence suggests a possible gauntlet defect but is not conclusive): Ask the user: "This may be a gauntlet bug. Want me to file a GitHub issue?" If the user confirms, invoke `gauntlet-issue` with the diagnosis summary as the bug description. If the user declines, exit without filing.
98
+ - **Low confidence**: Do nothing. Do not prompt the user and do not invoke `gauntlet-issue`.
@@ -0,0 +1,106 @@
1
+ ---
2
+ name: gauntlet-issue
3
+ description: Files structured GitHub bug reports for agent-gauntlet when users ask to file, report, or open an issue for a suspected defect
4
+ disable-model-invocation: false
5
+ allowed-tools: Bash, Read, Glob
6
+ ---
7
+
8
+ # /gauntlet-issue
9
+
10
+ Collect runtime evidence, draft a structured GitHub issue for a suspected agent-gauntlet bug, present a full preview, and file only after confirmation — unless invoked in auto-file mode.
11
+
12
+ ## Step 1: Get the Bug Description and Mode
13
+
14
+ **Check for auto-file mode**: If `$ARGUMENTS` begins with `--auto-file `, set auto-file mode to **on** and strip the prefix to get the remaining text as the bug description. Skip the confirmation step (Step 4) when auto-file mode is on.
15
+
16
+ **Get the description**:
17
+ - If a non-empty description remains after stripping any `--auto-file` prefix, use it as the bug description and proceed to Step 2.
18
+ - If `$ARGUMENTS` is empty (or becomes empty after stripping), ask the user:
19
+
20
+ > "Please describe the bug you encountered with agent-gauntlet. What happened, what did you expect, and what were you trying to do?"
21
+
22
+ Wait for their response before continuing.
23
+
24
+ ## Step 2: Collect Evidence
25
+
26
+ Read `.gauntlet/config.yml` first to resolve `log_dir` (default: `gauntlet_logs` if the field is absent or the file doesn't exist).
27
+
28
+ Collect the following evidence. For each item, note if it is absent — do not fail if files are missing:
29
+
30
+ 1. **Config file**: Read `.gauntlet/config.yml` in full.
31
+ 2. **Debug log (last 50 lines)**: Read the last 50 lines of `<log_dir>/.debug.log`.
32
+ ```bash
33
+ tail -n 50 <log_dir>/.debug.log
34
+ ```
35
+ 3. **Execution state**: Read the full contents of `<log_dir>/.execution_state`.
36
+
37
+ Record which files were found and which were absent.
38
+
39
+ ## Step 3: Draft the Issue
40
+
41
+ Draft a GitHub issue with the following structure:
42
+
43
+ ```
44
+ ## Problem
45
+
46
+ <A clear, concise description of the bug. Based on the user's description.>
47
+
48
+ ## Steps to Reproduce
49
+
50
+ <Step-by-step instructions to reproduce the issue. Infer from the description and evidence, or note "Not yet determined" if unclear.>
51
+
52
+ ## Expected vs Actual
53
+
54
+ **Expected:** <What should have happened>
55
+
56
+ **Actual:** <What actually happened>
57
+
58
+ ## Evidence
59
+
60
+ > **Before including evidence, redact sensitive values**: remove or replace tokens, API keys, email addresses, and absolute local paths that may appear in config, logs, or state. Replace them with `[REDACTED]` or a generic placeholder.
61
+
62
+ **Config (`.gauntlet/config.yml`):**
63
+ <Paste only relevant, non-sensitive config values. Redact tokens, emails, and absolute paths. Note "File not found" if absent.>
64
+
65
+ **Debug log (last 50 lines of `<log_dir>/.debug.log`):**
66
+ <Paste minimal relevant excerpt with sensitive values redacted. Note "File not found" if absent.>
67
+
68
+ **Execution state (`<log_dir>/.execution_state`):**
69
+ <Paste only fields needed to diagnose the bug; redact sensitive values. Note "File not found" if absent.>
70
+
71
+ **Absent files:** <List any files that were not found, or "None">
72
+ ```
73
+
74
+ Choose a concise, descriptive title: `Bug: <short summary of the problem>`.
75
+
76
+ ## Step 4: Show Preview and Confirm
77
+
78
+ Present the full draft to the user — both title and body.
79
+
80
+ **If auto-file mode is on**: Inform the user that the issue will be filed automatically (show the title and body), then proceed directly to Step 5 without asking.
81
+
82
+ **Otherwise**, ask:
83
+
84
+ > "Here is the draft issue. Shall I file it? (yes/no)"
85
+
86
+ - If the user confirms: proceed to Step 5.
87
+ - If the user declines: exit without creating an issue. Inform the user that no issue was filed.
88
+
89
+ ## Step 5: File the Issue
90
+
91
+ Write the issue body to a temporary file and pass it via `--body-file` to avoid shell interpolation issues with special characters in the body text:
92
+
93
+ ```bash
94
+ ISSUE_TITLE=$(cat <<'TITLE_EOF'
95
+ Bug: <short summary>
96
+ TITLE_EOF
97
+ )
98
+ BODY_FILE=$(mktemp)
99
+ cat > "$BODY_FILE" << 'ISSUE_EOF'
100
+ <paste the full issue body here>
101
+ ISSUE_EOF
102
+ gh issue create --repo pacaplan/agent-gauntlet --title "$ISSUE_TITLE" --body-file "$BODY_FILE"
103
+ rm -f "$BODY_FILE"
104
+ ```
105
+
106
+ Report the created issue URL to the user.
@@ -0,0 +1,24 @@
1
+ ---
2
+ name: gauntlet-merge
3
+ description: >-
4
+ Merges a named branch into the current worktree and propagates the validated execution state from that branch's worktree, eliminating redundant re-validation. Activates for requests such as "merge with gauntlet state", "merge and carry execution state", "reuse validated state after merge", or "merge branch without re-running gauntlet".
5
+ disable-model-invocation: false
6
+ allowed-tools: Bash
7
+ ---
8
+
9
+ # /gauntlet-merge $ARGUMENTS
10
+
11
+ Merge a branch and copy its validated execution state so the current directory inherits the already-verified results.
12
+
13
+ ## Step 1 - Run the merge script
14
+
15
+ ```bash
16
+ bash skills/gauntlet-merge/merge-state.sh "$ARGUMENTS"
17
+ ```
18
+
19
+ Run this command using `Bash` and capture both stdout and the exit code.
20
+
21
+ ## Step 2 - Report the result
22
+
23
+ - **If exit code is 0**: Report success. Include the script's output confirming the merge and state copy.
24
+ - **If exit code is non-zero**: Report the error. The script will have printed an error message — relay it clearly to the user. Do not proceed further.
@@ -0,0 +1,68 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ BRANCH="${1:-}"
5
+
6
+ if [[ -z "$BRANCH" ]]; then
7
+ echo "Usage: merge-state.sh <branch>" >&2
8
+ exit 1
9
+ fi
10
+
11
+ # Parse git worktree list --porcelain to find the worktree with this branch checked out
12
+ SOURCE_DIR=""
13
+ while IFS= read -r line; do
14
+ if [[ "$line" =~ ^worktree\ (.+)$ ]]; then
15
+ current_wt="${BASH_REMATCH[1]}"
16
+ elif [[ "$line" == "branch refs/heads/$BRANCH" ]]; then
17
+ SOURCE_DIR="$current_wt"
18
+ break
19
+ fi
20
+ done < <(git worktree list --porcelain)
21
+
22
+ if [[ -z "$SOURCE_DIR" ]]; then
23
+ echo "Error: No worktree found with branch '$BRANCH' checked out — cannot copy execution state." >&2
24
+ exit 1
25
+ fi
26
+
27
+ # Read log_dir from a config file; returns "gauntlet_logs" if absent or key not found
28
+ read_log_dir() {
29
+ local config_path="$1"
30
+ local parsed=""
31
+ if [[ -f "$config_path" ]]; then
32
+ parsed="$(grep '^log_dir:' "$config_path" | head -n1 | sed 's/^log_dir:[[:space:]]*//' | tr -d '[:space:]')" || true
33
+ fi
34
+ printf '%s' "${parsed:-gauntlet_logs}"
35
+ }
36
+
37
+ # Read source log_dir from source worktree config (default: gauntlet_logs)
38
+ SOURCE_CONFIG="$SOURCE_DIR/.gauntlet/config.yml"
39
+ SOURCE_LOG_DIR="$(read_log_dir "$SOURCE_CONFIG")"
40
+
41
+ # Read destination log_dir from current directory config (default: gauntlet_logs)
42
+ DEST_CONFIG=".gauntlet/config.yml"
43
+ DEST_LOG_DIR="$(read_log_dir "$DEST_CONFIG")"
44
+
45
+ # Verify source execution state exists before merging (fail fast, no partial state)
46
+ SOURCE_STATE="$SOURCE_DIR/$SOURCE_LOG_DIR/.execution_state"
47
+ if [[ ! -f "$SOURCE_STATE" ]]; then
48
+ echo "Error: Missing source execution state: $SOURCE_STATE — cannot copy execution state." >&2
49
+ exit 1
50
+ fi
51
+
52
+ # Run the merge
53
+ git merge "$BRANCH"
54
+
55
+ # Create destination log directory if it doesn't exist
56
+ mkdir -p "$DEST_LOG_DIR"
57
+
58
+ DEST_STATE="$DEST_LOG_DIR/.execution_state"
59
+
60
+ # Compare by file identity (inode) to handle symlinks and path form variations
61
+ if [[ -e "$DEST_STATE" ]] && [[ "$SOURCE_STATE" -ef "$DEST_STATE" ]]; then
62
+ echo "Merged '$BRANCH'; execution state already current (source and destination are the same)."
63
+ exit 0
64
+ fi
65
+
66
+ cp -f "$SOURCE_STATE" "$DEST_STATE"
67
+
68
+ echo "Merged '$BRANCH' and copied execution state from '$SOURCE_DIR/$SOURCE_LOG_DIR' to '$DEST_LOG_DIR'."
@@ -5,69 +5,71 @@ description: >-
5
5
  disable-model-invocation: false
6
6
  allowed-tools: Bash, Task
7
7
  ---
8
- <!--
9
- REVIEW TRUST LEVEL
10
- Controls how aggressively the agent acts on AI reviewer feedback.
11
- Change the trust_level value below to one of: high, medium, low
12
-
13
- - high: Fix all issues unless you strongly disagree or have low confidence the human wants the change.
14
- - medium: Fix issues you reasonably agree with or believe the human wants fixed. (DEFAULT)
15
- - low: Fix only issues you strongly agree with or are confident the human wants fixed.
16
- -->
17
- <!-- trust_level: medium -->
18
-
19
8
  # /gauntlet-run
20
9
  Execute the autonomous verification suite.
21
10
 
22
- **Review trust level: medium** — Fix issues you reasonably agree with or believe the human wants to be fixed. Skip issues that are purely stylistic, subjective, or that you believe the human would not want changed. When you skip an issue, briefly state what was skipped and why.
11
+ Fix issues you reasonably agree with or believe the human wants to be fixed. Skip issues that are purely stylistic, subjective, or that you believe the human would not want changed. When you skip an issue, briefly state what was skipped and why.
23
12
 
24
- ## Critical rules — read before proceeding
25
13
 
26
- **This is a BLOCKING GATE, not an optional check.** You may NOT declare your coding task complete, commit, push, or create a PR until the gauntlet reaches a terminal status. Treat every gauntlet run as the single most important step of your workflow.
14
+ ## Procedure
27
15
 
28
- **MANDATORY BEHAVIORS:**
29
- - **ALL Bash commands in this skill MUST be synchronous.** NEVER use `run_in_background: true` for any Bash call. NEVER use `&` to background any command.
30
- - **ALL Task tool calls MUST be synchronous.** NEVER use `run_in_background: true`.
31
- - **ALWAYS wait for and read the full command output** before proceeding. The command typically takes 1-2 minutes. Set `timeout: 300000` (5 minutes) on Bash calls to allow headroom.
32
- - **NEVER assume success.** You must see an explicit `Status:` line in the output. If you do not see `Status: Passed`, `Status: Passed with warnings`, or `Status: Retry limit exceeded` in the output, the run is not complete — wait for it or investigate.
33
- - **NEVER skip the fix-retry loop.** If the run fails, you MUST extract failures, fix code, and re-run. This is not optional.
16
+ ### Step 1 - Clean Logs
34
17
 
35
- ## Procedure
18
+ Run `agent-gauntlet clean` to archive any previous log files.
19
+
20
+ ### Step 2 - Run Gauntlet
21
+
22
+ If the caller requests a specific review to be enabled, append `--enable-review <name>` to the run command for each requested review.
23
+
24
+ Run `agent-gauntlet run` using `Bash` with `timeout: 300000`. **ALWAYS wait for and read the full command output** before proceeding — the command typically takes 1-2 minutes. **Verify you can see a `Status:` line in the output before continuing.**
25
+
26
+ ### Step 3 - Check Status
27
+
28
+ **NEVER assume success** — you must see an explicit `Status:` line before continuing. Check it and route accordingly:
29
+ - `Status: Passed` → Go to Step 9.
30
+ - `Status: Passed with warnings` → Go to Step 9.
31
+ - `Status: Failed` → Continue to Step 4. **You MUST continue — do not stop here.**
32
+ - `Status: Retry limit exceeded` → Run `agent-gauntlet clean` to archive logs. Go to Step 9.
33
+ - No status line visible → **Known issue:** Bun can drop all stdout/stderr when LLM review subprocesses run. Read the console log file to get the status: find the latest `console.*.log` in the gauntlet log directory (e.g., `gauntlet_logs/console.1.log`) and look for the `Status:` line there. If no console log is found there, also check `gauntlet_logs/previous/` for logs from the most recent archived run. If no console log exists in either location, the command may have timed out or failed to run — re-run with a longer timeout or investigate the error. Do NOT proceed as if it passed.
34
+
35
+ ### Step 4 - Extract Failures
36
+
37
+ Required when status is Failed:
38
+ - Infer the log directory from the file paths in the console output (e.g., if output references `gauntlet_logs/check_._lint.1.log`, the log directory is `gauntlet_logs/`)
39
+ - Read `extract-prompt.md` from this skill's directory
40
+ - **Extract log failures** using the first available strategy:
41
+ a. **Task tool** (Claude Code): `Task` with `subagent_type="general-purpose"`, `model="haiku"`, `prompt=` extract-prompt content + `"\n\nLog directory: <inferred path>"`. **Task calls MUST be synchronous** — NEVER use `run_in_background: true`.
42
+ b. **Subagent delegation**: If your environment supports delegating work to a subagent but not the Task tool, delegate the extract-prompt instructions with the log directory to a subagent for processing.
43
+ c. **Inline fallback**: If no subagent capability is available, follow the extract-prompt instructions yourself to read the log files and produce the compact failure summary.
44
+
45
+ ### Step 5 - Report Failures
46
+
47
+ Print the compact failure summary returned from Step 4.
48
+
49
+ ### Step 6 - Fix
50
+
51
+ Apply the review guidance above to each failure and fix accordingly:
52
+ - CHECK failures with Fix Skill: invoke the named skill
53
+ - CHECK failures with Fix Instructions: follow the instructions
54
+ - REVIEW violations: fix or skip per the review guidance above
55
+
56
+ ### Step 7 - Update Review Decisions
57
+
58
+ For REVIEW violations you addressed:
59
+ - Read `update-prompt.md` from this skill's directory
60
+ - **Update review decisions** using the first available strategy (same as Step 4):
61
+ a. **Task tool** (Claude Code): `Task` with `subagent_type="general-purpose"`, `model="haiku"`, `prompt=` update-prompt content + log directory + decisions list. **Task calls MUST be synchronous** — NEVER use `run_in_background: true`.
62
+ b. **Subagent delegation**: Delegate the update-prompt instructions with the log directory and decisions to a subagent.
63
+ c. **Inline fallback**: Follow the update-prompt instructions yourself to update the review JSON files.
64
+
65
+ ### Step 8 - Re-run Verification
66
+
67
+ **NEVER skip this step** — if the run failed, you MUST fix and re-run. Run the same command from Step 2 (including any `--enable-review` flags) again with `Bash` and `timeout: 300000`. Do NOT run `agent-gauntlet clean` between retries. The tool detects existing logs and automatically switches to verification mode. **Go back to Step 3** to check the status line and repeat.
68
+
69
+ ### Step 9 - Summarize Session
36
70
 
37
- 1. Run `agent-gauntlet clean` to archive any previous log files.
38
- 2. Run `agent-gauntlet run` using `Bash` with `timeout: 300000`. Wait for the complete output. **Verify you can see a `Status:` line in the output before continuing.**
39
- 3. **Check the status line:**
40
- - `Status: Passed` Go to step 8.
41
- - `Status: Passed with warnings` Go to step 8.
42
- - `Status: Failed` → Continue to step 4. **You MUST continue — do not stop here.**
43
- - `Status: Retry limit exceeded` → Run `agent-gauntlet clean` to archive logs. Go to step 8.
44
- - No status line visible → **Known issue:** Bun can drop all stdout/stderr when LLM review subprocesses run. Read the console log file to get the status: find the latest `console.*.log` in the gauntlet log directory (e.g., `gauntlet_logs/console.1.log`) and look for the `Status:` line there. If no console log is found there, also check `gauntlet_logs/previous/` for logs from the most recent archived run. If no console log exists in either location, the command may have timed out or failed to run — re-run with a longer timeout or investigate the error. Do NOT proceed as if it passed.
45
- 4. **Extract failures** (required when status is Failed):
46
- - Infer the log directory from the file paths in the console output (e.g., if output references `gauntlet_logs/check_._lint.1.log`, the log directory is `gauntlet_logs/`)
47
- - Read `extract-prompt.md` from this skill's directory
48
- - **Extract log failures** using the first available strategy:
49
- a. **Task tool** (Claude Code): `Task` with `subagent_type="general-purpose"`, `model="haiku"`, `prompt=` extract-prompt content + `"\n\nLog directory: <inferred path>"`. NEVER use `run_in_background: true`.
50
- b. **Subagent delegation**: If your environment supports delegating work to a subagent but not the Task tool, delegate the extract-prompt instructions with the log directory to a subagent for processing.
51
- c. **Inline fallback**: If no subagent capability is available, follow the extract-prompt instructions yourself to read the log files and produce the compact failure summary.
52
- 5. **Fix code** based on the compact summary. You MUST address every actionable item:
53
- - CHECK failures with Fix Skill: invoke the named skill
54
- - CHECK failures with Fix Instructions: follow the instructions
55
- - REVIEW violations: apply the trust level above, fix or skip
56
- 5b. **Capture noteworthy violations for eval inventory** (if any REVIEW violations were found):
57
- - Collect the JSON file paths from the REVIEW failures identified in step 4 (the `.json` file paths)
58
- - Read `SKILL.md` from the `capture-eval-issues` skill directory (sibling of this skill's directory)
59
- - Follow the capture skill's procedure, passing the JSON file paths
60
- - Note the `CAPTURED:` summary line for inclusion in step 8
61
- 6. For REVIEW violations you addressed:
62
- - Read `update-prompt.md` from this skill's directory
63
- - **Update review decisions** using the first available strategy (same as step 4):
64
- a. **Task tool** (Claude Code): `Task` with `subagent_type="general-purpose"`, `model="haiku"`, `prompt=` update-prompt content + log directory + decisions list. NEVER use `run_in_background: true`.
65
- b. **Subagent delegation**: Delegate the update-prompt instructions with the log directory and decisions to a subagent.
66
- c. **Inline fallback**: Follow the update-prompt instructions yourself to update the review JSON files.
67
- 7. **Re-run verification:** Run `agent-gauntlet run` again with `Bash` and `timeout: 300000`. Do NOT run `agent-gauntlet clean` between retries. The tool detects existing logs and automatically switches to verification mode. **Go back to step 3** to check the status line and repeat.
68
- 8. **Provide a summary** of the session:
69
- - Final Status: (Passed / Passed with warnings / Retry limit exceeded)
70
- - Issues Fixed: (list key fixes)
71
- - Issues Skipped: (list skipped items and reasons)
72
- - Eval Captures: (list captured issue IDs from step 5b, or "none")
73
- - Outstanding Failures: (if retry limit exceeded, list unverified fixes and remaining issues)
71
+ Provide a summary of the session:
72
+ - Final Status: (Passed / Passed with warnings / Retry limit exceeded)
73
+ - Issues Fixed: (list key fixes)
74
+ - Issues Skipped: (list skipped items and reasons)
75
+ - Outstanding Failures: (if retry limit exceeded, list unverified fixes and remaining issues)
@@ -21,17 +21,18 @@ Return a plain-text summary using EXACTLY this format:
21
21
  For check failures:
22
22
  ```text
23
23
  CHECKS:
24
- - <gate_label> | FAIL | <log_file_path>
25
- Errors: <concise error description>
26
- Fix Instructions: <extracted text if present, otherwise omit this line>
27
- Fix Skill: <skill name if present, otherwise omit this line>
24
+ [fail] <gate_label>
25
+ <concise error description>
26
+ Fix Instructions: <extracted text if present, otherwise omit this line>
27
+ Fix Skill: <skill name if present, otherwise omit this line>
28
28
  ```
29
29
 
30
30
  For review failures:
31
31
  ```text
32
32
  REVIEWS:
33
- - <gate_label> | FAIL | <json_file_path>
34
- [<priority>] <file>:<line> <issue summary> (fix: <fix suggestion>)
33
+ [<priority>] <gate_label>
34
+ <file>:<line> - <issue summary>
35
+ Fix: <fix suggestion>
35
36
  ```
36
37
 
37
38
  If there are no failures of a type, omit that section entirely.
@@ -102,13 +103,14 @@ Replace all `var` declarations with `const` or `let`.
102
103
 
103
104
  ```text
104
105
  CHECKS:
105
- - check:src:lint | FAIL | gauntlet_logs/check_src_lint.2.log
106
- Errors: src/helpers.ts:3:5 - error: Unexpected var, use let or const instead
107
- Fix Instructions: Replace all `var` declarations with `const` or `let`.
106
+ [fail] check:src:lint
107
+ src/helpers.ts:3:5 - error: Unexpected var, use let or const instead
108
+ Fix Instructions: Replace all `var` declarations with `const` or `let`.
108
109
 
109
110
  REVIEWS:
110
- - review:src:code-quality (claude@1) | FAIL | gauntlet_logs/review_src_code-quality_claude@1.2.json
111
- [high] src/main.ts:45 Missing error handling for async database call (fix: Wrap in try-catch block)
111
+ [high] review:src:code-quality (claude@1)
112
+ src/main.ts:45 - Missing error handling for async database call
113
+ Fix: Wrap in try-catch block
112
114
  ```
113
115
 
114
116
  Note: The `src/utils.ts:10` violation was omitted because its status is `"fixed"`, not `"new"`.
@@ -117,5 +119,5 @@ Note: The `src/utils.ts:10` violation was omitted because its status is `"fixed"
117
119
 
118
120
  - Do NOT summarize or editorialize — copy error details verbatim where possible
119
121
  - Do NOT skip any `[FAIL]` entries
120
- - Keep the output compact — one line per check error, one line per review violation
122
+ - Keep the output compact — one entry per check failure, one entry per review violation (3 lines each)
121
123
  - For review violations, only include those with `status: "new"` — skip `"fixed"` and `"skipped"`