agent-gauntlet 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/dist/index.js +1190 -703
- package/dist/index.js.map +22 -20
- package/dist/scripts/status.js +1 -1
- package/dist/scripts/status.js.map +1 -1
- package/package.json +1 -1
- package/skills/gauntlet-check/SKILL.md +47 -12
- package/skills/gauntlet-commit/SKILL.md +73 -0
- package/skills/gauntlet-help/SKILL.md +9 -0
- package/skills/gauntlet-issue/SKILL.md +106 -0
- package/skills/gauntlet-merge/SKILL.md +24 -0
- package/skills/gauntlet-merge/merge-state.sh +68 -0
- package/skills/gauntlet-run/SKILL.md +60 -58
- package/skills/gauntlet-run/extract-prompt.md +14 -12
package/dist/scripts/status.js
CHANGED
|
@@ -5,6 +5,6 @@
|
|
|
5
5
|
"#!/usr/bin/env node\n/**\n * Gauntlet Status Script\n *\n * Parses the configured log_dir (default: gauntlet_logs/) to produce a structured\n * summary of the most recent gauntlet session from the .debug.log, plus a file\n * inventory of all log/JSON files for further inspection.\n *\n * This script handles structured data only (debug log events). Detailed failure\n * analysis (reading individual check logs, review JSONs) is left to the caller\n * (the /gauntlet-status skill) since log formats vary by check type.\n */\n\nimport fs from 'node:fs';\nimport path from 'node:path';\n\n// --- Types ---\n\ninterface RunStart {\n timestamp: string;\n mode: string;\n baseRef?: string;\n filesChanged: number;\n linesAdded: number;\n linesRemoved: number;\n gates: number;\n}\n\ninterface GateResult {\n timestamp: string;\n gateId: string;\n cli?: string;\n status: string;\n duration: string;\n violations?: number;\n}\n\ninterface RunEnd {\n timestamp: string;\n status: string;\n fixed: number;\n skipped: number;\n failed: number;\n iterations: number;\n duration: string;\n}\n\ninterface StopHookEntry {\n timestamp: string;\n decision: string;\n reason: string;\n}\n\ninterface SessionRun {\n start: RunStart;\n gates: GateResult[];\n end?: RunEnd;\n stopHook?: StopHookEntry;\n}\n\n// --- Parsing helpers ---\n\nfunction parseKeyValue(text: string): Record<string, string> {\n const result: Record<string, string> = {};\n for (const match of text.matchAll(/(\\w+)=(\\S+)/g)) {\n const key = match[1];\n const value = match[2];\n if (key && value) result[key] = value;\n }\n return result;\n}\n\nfunction parseTimestamp(line: string): string {\n const m = line.match(/^\\[([^\\]]+)\\]/);\n return m?.[1] ?? '';\n}\n\nfunction parseEventType(line: string): string {\n const m = line.match(/^\\[[^\\]]+\\]\\s+(\\S+)/);\n return m?.[1] ?? '';\n}\n\nfunction parseEventBody(line: string): string {\n const m = line.match(/^\\[[^\\]]+\\]\\s+\\S+\\s*(.*)/);\n return m?.[1] ?? '';\n}\n\n// --- Debug log parsing ---\n\nfunction parseRunStart(ts: string, body: string): SessionRun {\n const kv = parseKeyValue(body);\n return {\n start: {\n timestamp: ts,\n mode: kv.mode ?? 'unknown',\n baseRef: kv.base_ref,\n filesChanged: Number(kv.files_changed ?? kv.changes ?? 0),\n linesAdded: Number(kv.lines_added ?? 0),\n linesRemoved: Number(kv.lines_removed ?? 0),\n gates: Number(kv.gates ?? 0),\n },\n gates: [],\n };\n}\n\nfunction parseGateResult(ts: string, body: string): GateResult {\n const gateIdMatch = body.match(/^(\\S+)/);\n const kv = parseKeyValue(body);\n return {\n timestamp: ts,\n gateId: gateIdMatch?.[1] ?? 'unknown',\n cli: kv.cli,\n status: kv.status ?? 'unknown',\n duration: kv.duration ?? '?',\n violations: kv.violations !== undefined ? Number(kv.violations) : undefined,\n };\n}\n\nfunction parseRunEnd(ts: string, body: string): RunEnd {\n const kv = parseKeyValue(body);\n return {\n timestamp: ts,\n status: kv.status ?? 'unknown',\n fixed: Number(kv.fixed ?? 0),\n skipped: Number(kv.skipped ?? 0),\n failed: Number(kv.failed ?? 0),\n iterations: Number(kv.iterations ?? 0),\n duration: kv.duration ?? '?',\n };\n}\n\nfunction parseStopHookEntry(ts: string, body: string): StopHookEntry {\n const kv = parseKeyValue(body);\n return {\n timestamp: ts,\n decision: kv.decision ?? 'unknown',\n reason: kv.reason ?? 'unknown',\n };\n}\n\nfunction isBeforeSession(\n ts: string,\n sessionStartTime: Date | undefined,\n): boolean {\n return sessionStartTime !== undefined && new Date(ts) < sessionStartTime;\n}\n\nfunction parseDebugLog(content: string, sessionStartTime?: Date): SessionRun[] {\n const lines = content.split('\\n').filter((l) => l.trim());\n const sessions: SessionRun[] = [];\n let current: SessionRun | null = null;\n\n for (const line of lines) {\n const event = parseEventType(line);\n const body = parseEventBody(line);\n const ts = parseTimestamp(line);\n\n switch (event) {\n case 'RUN_START':\n if (isBeforeSession(ts, sessionStartTime)) {\n current = null;\n } else {\n current = parseRunStart(ts, body);\n sessions.push(current);\n }\n break;\n case 'GATE_RESULT':\n if (current) current.gates.push(parseGateResult(ts, body));\n break;\n case 'RUN_END':\n if (current) current.end = parseRunEnd(ts, body);\n break;\n case 'STOP_HOOK':\n if (current) current.stopHook = parseStopHookEntry(ts, body);\n break;\n }\n }\n\n return sessions;\n}\n\n/**\n * Find the earliest mtime of non-hidden log files in the directory.\n * This marks the start of the current session.\n */\nfunction getSessionStartTime(logDir: string): Date | undefined {\n const entries = fs\n .readdirSync(logDir)\n .filter((f) => !f.startsWith('.') && f !== 'previous');\n let earliest: number | undefined;\n for (const entry of entries) {\n const mtime = fs.statSync(path.join(logDir, entry)).mtimeMs;\n if (earliest === undefined || mtime < earliest) {\n earliest = mtime;\n }\n }\n return earliest !== undefined ? new Date(earliest) : undefined;\n}\n\n// --- File inventory ---\n\nfunction formatFileInventory(logDir: string): string[] {\n const lines: string[] = [];\n const entries = fs\n .readdirSync(logDir)\n .filter((f) => !f.startsWith('.') && f !== 'previous');\n if (entries.length === 0) return lines;\n\n const checks: string[] = [];\n const reviews: string[] = [];\n const other: string[] = [];\n\n for (const entry of entries.sort()) {\n const fullPath = path.join(logDir, entry);\n const stat = fs.statSync(fullPath);\n const sizeKB = (stat.size / 1024).toFixed(1);\n const line = `- ${fullPath} (${sizeKB} KB)`;\n\n if (entry.startsWith('review_')) {\n reviews.push(line);\n } else if (entry.startsWith('check_')) {\n checks.push(line);\n } else {\n other.push(line);\n }\n }\n\n lines.push('### Log Files');\n lines.push('');\n if (checks.length > 0) {\n lines.push('**Check logs:**');\n lines.push(...checks);\n }\n if (reviews.length > 0) {\n lines.push('**Review logs/JSON:**');\n lines.push(...reviews);\n }\n if (other.length > 0) {\n lines.push('**Other:**');\n lines.push(...other);\n }\n lines.push('');\n\n return lines;\n}\n\n// --- Summary output ---\n\nfunction formatStatusLine(end: RunEnd): string {\n if (end.status === 'pass') return 'PASSED';\n if (end.status === 'fail') return 'FAILED';\n return end.status.toUpperCase();\n}\n\nfunction formatAllRuns(sessions: SessionRun[]): string[] {\n const lines: string[] = [];\n lines.push('### All Runs in Session');\n lines.push('');\n for (let i = 0; i < sessions.length; i++) {\n const s = sessions[i];\n if (!s) continue;\n const status = s.end ? s.end.status : 'in-progress';\n const duration = s.end ? s.end.duration : '?';\n lines.push(\n `${i + 1}. [${s.start.timestamp}] mode=${s.start.mode} status=${status} duration=${duration}`,\n );\n }\n lines.push('');\n return lines;\n}\n\nfunction formatSession(sessions: SessionRun[], logDir: string): string {\n if (sessions.length === 0) {\n return 'No gauntlet runs found in logs.';\n }\n\n const lastComplete = [...sessions].reverse().find((s) => s.end);\n const session = lastComplete ?? sessions[sessions.length - 1];\n if (!session) return 'No gauntlet runs found in logs.';\n\n const lines: string[] = [];\n\n // Header\n lines.push('## Gauntlet Session Summary');\n lines.push('');\n\n // Overall status\n if (session.end) {\n lines.push(`**Status:** ${formatStatusLine(session.end)}`);\n lines.push(`**Iterations:** ${session.end.iterations}`);\n lines.push(`**Duration:** ${session.end.duration}`);\n lines.push(\n `**Fixed:** ${session.end.fixed} | **Skipped:** ${session.end.skipped} | **Failed:** ${session.end.failed}`,\n );\n } else {\n lines.push('**Status:** In Progress (no RUN_END found)');\n }\n lines.push('');\n\n // Diff stats\n lines.push('### Diff Stats');\n lines.push(`- Mode: ${session.start.mode}`);\n if (session.start.baseRef) {\n lines.push(`- Base ref: ${session.start.baseRef}`);\n }\n lines.push(`- Files changed: ${session.start.filesChanged}`);\n lines.push(\n `- Lines: +${session.start.linesAdded} / -${session.start.linesRemoved}`,\n );\n lines.push(`- Gates: ${session.start.gates}`);\n lines.push('');\n\n // Gate results\n lines.push('### Gate Results');\n lines.push('');\n lines.push('| Gate | CLI | Status | Duration | Violations |');\n lines.push('|------|-----|--------|----------|------------|');\n for (const gate of session.gates) {\n const violations =\n gate.violations !== undefined ? String(gate.violations) : '-';\n const statusIcon = gate.status === 'pass' ? 'pass' : 'FAIL';\n lines.push(\n `| ${gate.gateId} | ${gate.cli ?? '-'} | ${statusIcon} | ${gate.duration} | ${violations} |`,\n );\n }\n lines.push('');\n\n // Stop hook\n if (session.stopHook) {\n lines.push('### Stop Hook');\n lines.push(`- Decision: ${session.stopHook.decision}`);\n lines.push(`- Reason: ${session.stopHook.reason}`);\n lines.push('');\n }\n\n // File inventory\n lines.push(...formatFileInventory(logDir));\n\n // All sessions summary (if multiple runs)\n if (sessions.length > 1) {\n lines.push(...formatAllRuns(sessions));\n }\n\n return lines.join('\\n');\n}\n\n// --- Main ---\n\n/**\n * Read the configured log_dir from .gauntlet/config.yml.\n * Falls back to \"gauntlet_logs\" if not found.\n */\nfunction getLogDir(cwd: string): string {\n const configPath = path.join(cwd, '.gauntlet', 'config.yml');\n try {\n const content = fs.readFileSync(configPath, 'utf-8');\n const match = content.match(/^log_dir:\\s*(.+)$/m);\n if (match?.[1]) return match[1].trim();\n } catch {\n // Config not found — use default\n }\n return 'gauntlet_logs';\n}\n\n/**\n * Resolve the log directory and debug log path.\n * Returns null if no logs are found (after printing a message).\n */\nfunction resolveLogPaths(\n activeDir: string,\n): { logDir: string; debugLogPath: string } | null {\n const previousDir = path.join(activeDir, 'previous');\n const debugLogPath = path.join(activeDir, '.debug.log');\n\n // Check active directory first for non-debug log files\n const activeHasLogs =\n fs.existsSync(activeDir) &&\n fs\n .readdirSync(activeDir)\n .some((f) => !f.startsWith('.') && f !== 'previous');\n\n if (activeHasLogs) {\n return { logDir: activeDir, debugLogPath };\n }\n\n if (!fs.existsSync(previousDir)) {\n console.log('No gauntlet_logs directory found.');\n return null;\n }\n\n // Fall back to previous directory — cleanLogs archives files directly here\n const logDir = resolvePreviousLogDir(previousDir);\n if (!logDir) return null;\n\n // Debug log stays in the main gauntlet_logs dir, not in previous/\n return { logDir, debugLogPath };\n}\n\nfunction resolvePreviousLogDir(previousDir: string): string | null {\n const prevEntries = fs.readdirSync(previousDir);\n const hasDirectFiles = prevEntries.some(\n (f) => f.endsWith('.log') || f.endsWith('.json'),\n );\n\n if (hasDirectFiles) return previousDir;\n\n // Legacy: check for timestamped subdirectories\n const prevDirs = prevEntries\n .map((d) => path.join(previousDir, d))\n .filter((d) => fs.statSync(d).isDirectory())\n .sort()\n .reverse();\n\n if (prevDirs.length === 0) {\n console.log('No gauntlet logs found.');\n return null;\n }\n\n return prevDirs[0] as string;\n}\n\nexport function main(): void {\n const cwd = process.cwd();\n const logDirName = getLogDir(cwd);\n const activeDir = path.join(cwd, logDirName);\n\n const paths = resolveLogPaths(activeDir);\n if (!paths) {\n process.exit(0);\n }\n\n // Parse debug log, filtering to current session based on log file timestamps\n let sessions: SessionRun[] = [];\n if (fs.existsSync(paths.debugLogPath)) {\n const debugContent = fs.readFileSync(paths.debugLogPath, 'utf-8');\n const sessionStart = getSessionStartTime(paths.logDir);\n sessions = parseDebugLog(debugContent, sessionStart);\n }\n\n // Format and output\n const output = formatSession(sessions, paths.logDir);\n console.log(output);\n}\n\n// Auto-execute when run directly (e.g., `bun src/scripts/status.ts`\n// or `node dist/scripts/status.js`). The filename check prevents\n// this from triggering when the module is bundled into dist/index.js.\nconst isDirectRun =\n (import.meta.url === `file://${process.argv[1]}` ||\n (typeof Bun !== 'undefined' && import.meta.url === `file://${Bun.main}`)) &&\n (process.argv[1]?.endsWith('status.ts') ||\n process.argv[1]?.endsWith('status.js'));\nif (isDirectRun) {\n main();\n}\n"
|
|
6
6
|
],
|
|
7
7
|
"mappings": ";;;;;AAaA;AACA;AAgDA,SAAS,aAAa,CAAC,MAAsC;AAAA,EAC3D,MAAM,SAAiC,CAAC;AAAA,EACxC,WAAW,SAAS,KAAK,SAAS,cAAc,GAAG;AAAA,IACjD,MAAM,MAAM,MAAM;AAAA,IAClB,MAAM,QAAQ,MAAM;AAAA,IACpB,IAAI,OAAO;AAAA,MAAO,OAAO,OAAO;AAAA,EAClC;AAAA,EACA,OAAO;AAAA;AAGT,SAAS,cAAc,CAAC,MAAsB;AAAA,EAC5C,MAAM,IAAI,KAAK,MAAM,eAAe;AAAA,EACpC,OAAO,IAAI,MAAM;AAAA;AAGnB,SAAS,cAAc,CAAC,MAAsB;AAAA,EAC5C,MAAM,IAAI,KAAK,MAAM,qBAAqB;AAAA,EAC1C,OAAO,IAAI,MAAM;AAAA;AAGnB,SAAS,cAAc,CAAC,MAAsB;AAAA,EAC5C,MAAM,IAAI,KAAK,MAAM,0BAA0B;AAAA,EAC/C,OAAO,IAAI,MAAM;AAAA;AAKnB,SAAS,aAAa,CAAC,IAAY,MAA0B;AAAA,EAC3D,MAAM,KAAK,cAAc,IAAI;AAAA,EAC7B,OAAO;AAAA,IACL,OAAO;AAAA,MACL,WAAW;AAAA,MACX,MAAM,GAAG,QAAQ;AAAA,MACjB,SAAS,GAAG;AAAA,MACZ,cAAc,OAAO,GAAG,iBAAiB,GAAG,WAAW,CAAC;AAAA,MACxD,YAAY,OAAO,GAAG,eAAe,CAAC;AAAA,MACtC,cAAc,OAAO,GAAG,iBAAiB,CAAC;AAAA,MAC1C,OAAO,OAAO,GAAG,SAAS,CAAC;AAAA,IAC7B;AAAA,IACA,OAAO,CAAC;AAAA,EACV;AAAA;AAGF,SAAS,eAAe,CAAC,IAAY,MAA0B;AAAA,EAC7D,MAAM,cAAc,KAAK,MAAM,QAAQ;AAAA,EACvC,MAAM,KAAK,cAAc,IAAI;AAAA,EAC7B,OAAO;AAAA,IACL,WAAW;AAAA,IACX,QAAQ,cAAc,MAAM;AAAA,IAC5B,KAAK,GAAG;AAAA,IACR,QAAQ,GAAG,UAAU;AAAA,IACrB,UAAU,GAAG,YAAY;AAAA,IACzB,YAAY,GAAG,eAAe,YAAY,OAAO,GAAG,UAAU,IAAI;AAAA,EACpE;AAAA;AAGF,SAAS,WAAW,CAAC,IAAY,MAAsB;AAAA,EACrD,MAAM,KAAK,cAAc,IAAI;AAAA,EAC7B,OAAO;AAAA,IACL,WAAW;AAAA,IACX,QAAQ,GAAG,UAAU;AAAA,IACrB,OAAO,OAAO,GAAG,SAAS,CAAC;AAAA,IAC3B,SAAS,OAAO,GAAG,WAAW,CAAC;AAAA,IAC/B,QAAQ,OAAO,GAAG,UAAU,CAAC;AAAA,IAC7B,YAAY,OAAO,GAAG,cAAc,CAAC;AAAA,IACrC,UAAU,GAAG,YAAY;AAAA,EAC3B;AAAA;AAGF,SAAS,kBAAkB,CAAC,IAAY,MAA6B;AAAA,EACnE,MAAM,KAAK,cAAc,IAAI;AAAA,EAC7B,OAAO;AAAA,IACL,WAAW;AAAA,IACX,UAAU,GAAG,YAAY;AAAA,IACzB,QAAQ,GAAG,UAAU;AAAA,EACvB;AAAA;AAGF,SAAS,eAAe,CACtB,IACA,kBACS;AAAA,EACT,OAAO,qBAAqB,aAAa,IAAI,KAAK,EAAE,IAAI;AAAA;AAG1D,SAAS,aAAa,CAAC,SAAiB,kBAAuC;AAAA,EAC7E,MAAM,QAAQ,QAAQ,MAAM;AAAA,CAAI,EAAE,OAAO,CAAC,MAAM,EAAE,KAAK,CAAC;AAAA,EACxD,MAAM,WAAyB,CAAC;AAAA,EAChC,IAAI,UAA6B;AAAA,EAEjC,WAAW,QAAQ,OAAO;AAAA,IACxB,MAAM,QAAQ,eAAe,IAAI;AAAA,IACjC,MAAM,OAAO,eAAe,IAAI;AAAA,IAChC,MAAM,KAAK,eAAe,IAAI;AAAA,IAE9B,QAAQ;AAAA,WACD;AAAA,QACH,IAAI,gBAAgB,IAAI,gBAAgB,GAAG;AAAA,UACzC,UAAU;AAAA,QACZ,EAAO;AAAA,UACL,UAAU,cAAc,IAAI,IAAI;AAAA,UAChC,SAAS,KAAK,OAAO;AAAA;AAAA,QAEvB;AAAA,WACG;AAAA,QACH,IAAI;AAAA,UAAS,QAAQ,MAAM,KAAK,gBAAgB,IAAI,IAAI,CAAC;AAAA,QACzD;AAAA,WACG;AAAA,QACH,IAAI;AAAA,UAAS,QAAQ,MAAM,YAAY,IAAI,IAAI;AAAA,QAC/C;AAAA,WACG;AAAA,QACH,IAAI;AAAA,UAAS,QAAQ,WAAW,mBAAmB,IAAI,IAAI;AAAA,QAC3D;AAAA;AAAA,EAEN;AAAA,EAEA,OAAO;AAAA;AAOT,SAAS,mBAAmB,CAAC,QAAkC;AAAA,EAC7D,MAAM,UAAU,GACb,YAAY,MAAM,EAClB,OAAO,CAAC,MAAM,CAAC,EAAE,WAAW,GAAG,KAAK,MAAM,UAAU;AAAA,EACvD,IAAI;AAAA,EACJ,WAAW,SAAS,SAAS;AAAA,IAC3B,MAAM,QAAQ,GAAG,SAAS,KAAK,KAAK,QAAQ,KAAK,CAAC,EAAE;AAAA,IACpD,IAAI,aAAa,aAAa,QAAQ,UAAU;AAAA,MAC9C,WAAW;AAAA,IACb;AAAA,EACF;AAAA,EACA,OAAO,aAAa,YAAY,IAAI,KAAK,QAAQ,IAAI;AAAA;AAKvD,SAAS,mBAAmB,CAAC,QAA0B;AAAA,EACrD,MAAM,QAAkB,CAAC;AAAA,EACzB,MAAM,UAAU,GACb,YAAY,MAAM,EAClB,OAAO,CAAC,MAAM,CAAC,EAAE,WAAW,GAAG,KAAK,MAAM,UAAU;AAAA,EACvD,IAAI,QAAQ,WAAW;AAAA,IAAG,OAAO;AAAA,EAEjC,MAAM,SAAmB,CAAC;AAAA,EAC1B,MAAM,UAAoB,CAAC;AAAA,EAC3B,MAAM,QAAkB,CAAC;AAAA,EAEzB,WAAW,SAAS,QAAQ,KAAK,GAAG;AAAA,IAClC,MAAM,WAAW,KAAK,KAAK,QAAQ,KAAK;AAAA,IACxC,MAAM,OAAO,GAAG,SAAS,QAAQ;AAAA,IACjC,MAAM,UAAU,KAAK,OAAO,MAAM,QAAQ,CAAC;AAAA,IAC3C,MAAM,OAAO,KAAK,aAAa;AAAA,IAE/B,IAAI,MAAM,WAAW,SAAS,GAAG;AAAA,MAC/B,QAAQ,KAAK,IAAI;AAAA,IACnB,EAAO,SAAI,MAAM,WAAW,QAAQ,GAAG;AAAA,MACrC,OAAO,KAAK,IAAI;AAAA,IAClB,EAAO;AAAA,MACL,MAAM,KAAK,IAAI;AAAA;AAAA,EAEnB;AAAA,EAEA,MAAM,KAAK,eAAe;AAAA,EAC1B,MAAM,KAAK,EAAE;AAAA,EACb,IAAI,OAAO,SAAS,GAAG;AAAA,IACrB,MAAM,KAAK,iBAAiB;AAAA,IAC5B,MAAM,KAAK,GAAG,MAAM;AAAA,EACtB;AAAA,EACA,IAAI,QAAQ,SAAS,GAAG;AAAA,IACtB,MAAM,KAAK,uBAAuB;AAAA,IAClC,MAAM,KAAK,GAAG,OAAO;AAAA,EACvB;AAAA,EACA,IAAI,MAAM,SAAS,GAAG;AAAA,IACpB,MAAM,KAAK,YAAY;AAAA,IACvB,MAAM,KAAK,GAAG,KAAK;AAAA,EACrB;AAAA,EACA,MAAM,KAAK,EAAE;AAAA,EAEb,OAAO;AAAA;AAKT,SAAS,gBAAgB,CAAC,KAAqB;AAAA,EAC7C,IAAI,IAAI,WAAW;AAAA,IAAQ,OAAO;AAAA,EAClC,IAAI,IAAI,WAAW;AAAA,IAAQ,OAAO;AAAA,EAClC,OAAO,IAAI,OAAO,YAAY;AAAA;AAGhC,SAAS,aAAa,CAAC,UAAkC;AAAA,EACvD,MAAM,QAAkB,CAAC;AAAA,EACzB,MAAM,KAAK,yBAAyB;AAAA,EACpC,MAAM,KAAK,EAAE;AAAA,EACb,SAAS,IAAI,EAAG,IAAI,SAAS,QAAQ,KAAK;AAAA,IACxC,MAAM,IAAI,SAAS;AAAA,IACnB,IAAI,CAAC;AAAA,MAAG;AAAA,IACR,MAAM,SAAS,EAAE,MAAM,EAAE,IAAI,SAAS;AAAA,IACtC,MAAM,WAAW,EAAE,MAAM,EAAE,IAAI,WAAW;AAAA,IAC1C,MAAM,KACJ,GAAG,IAAI,OAAO,EAAE,MAAM,mBAAmB,EAAE,MAAM,eAAe,mBAAmB,UACrF;AAAA,EACF;AAAA,EACA,MAAM,KAAK,EAAE;AAAA,EACb,OAAO;AAAA;AAGT,SAAS,aAAa,CAAC,UAAwB,QAAwB;AAAA,EACrE,IAAI,SAAS,WAAW,GAAG;AAAA,IACzB,OAAO;AAAA,EACT;AAAA,EAEA,MAAM,eAAe,CAAC,GAAG,QAAQ,EAAE,QAAQ,EAAE,KAAK,CAAC,MAAM,EAAE,GAAG;AAAA,EAC9D,MAAM,UAAU,gBAAgB,SAAS,SAAS,SAAS;AAAA,EAC3D,IAAI,CAAC;AAAA,IAAS,OAAO;AAAA,EAErB,MAAM,QAAkB,CAAC;AAAA,EAGzB,MAAM,KAAK,6BAA6B;AAAA,EACxC,MAAM,KAAK,EAAE;AAAA,EAGb,IAAI,QAAQ,KAAK;AAAA,IACf,MAAM,KAAK,eAAe,iBAAiB,QAAQ,GAAG,GAAG;AAAA,IACzD,MAAM,KAAK,mBAAmB,QAAQ,IAAI,YAAY;AAAA,IACtD,MAAM,KAAK,iBAAiB,QAAQ,IAAI,UAAU;AAAA,IAClD,MAAM,KACJ,cAAc,QAAQ,IAAI,wBAAwB,QAAQ,IAAI,yBAAyB,QAAQ,IAAI,QACrG;AAAA,EACF,EAAO;AAAA,IACL,MAAM,KAAK,4CAA4C;AAAA;AAAA,EAEzD,MAAM,KAAK,EAAE;AAAA,EAGb,MAAM,KAAK,gBAAgB;AAAA,EAC3B,MAAM,KAAK,WAAW,QAAQ,MAAM,MAAM;AAAA,EAC1C,IAAI,QAAQ,MAAM,SAAS;AAAA,IACzB,MAAM,KAAK,eAAe,QAAQ,MAAM,SAAS;AAAA,EACnD;AAAA,EACA,MAAM,KAAK,oBAAoB,QAAQ,MAAM,cAAc;AAAA,EAC3D,MAAM,KACJ,aAAa,QAAQ,MAAM,iBAAiB,QAAQ,MAAM,cAC5D;AAAA,EACA,MAAM,KAAK,YAAY,QAAQ,MAAM,OAAO;AAAA,EAC5C,MAAM,KAAK,EAAE;AAAA,EAGb,MAAM,KAAK,kBAAkB;AAAA,EAC7B,MAAM,KAAK,EAAE;AAAA,EACb,MAAM,KAAK,iDAAiD;AAAA,EAC5D,MAAM,KAAK,iDAAiD;AAAA,EAC5D,WAAW,QAAQ,QAAQ,OAAO;AAAA,IAChC,MAAM,aACJ,KAAK,eAAe,YAAY,OAAO,KAAK,UAAU,IAAI;AAAA,IAC5D,MAAM,aAAa,KAAK,WAAW,SAAS,SAAS;AAAA,IACrD,MAAM,KACJ,KAAK,KAAK,YAAY,KAAK,OAAO,SAAS,gBAAgB,KAAK,cAAc,cAChF;AAAA,EACF;AAAA,EACA,MAAM,KAAK,EAAE;AAAA,EAGb,IAAI,QAAQ,UAAU;AAAA,IACpB,MAAM,KAAK,eAAe;AAAA,IAC1B,MAAM,KAAK,eAAe,QAAQ,SAAS,UAAU;AAAA,IACrD,MAAM,KAAK,aAAa,QAAQ,SAAS,QAAQ;AAAA,IACjD,MAAM,KAAK,EAAE;AAAA,EACf;AAAA,EAGA,MAAM,KAAK,GAAG,oBAAoB,MAAM,CAAC;AAAA,EAGzC,IAAI,SAAS,SAAS,GAAG;AAAA,IACvB,MAAM,KAAK,GAAG,cAAc,QAAQ,CAAC;AAAA,EACvC;AAAA,EAEA,OAAO,MAAM,KAAK;AAAA,CAAI;AAAA;AASxB,SAAS,SAAS,CAAC,KAAqB;AAAA,EACtC,MAAM,aAAa,KAAK,KAAK,KAAK,aAAa,YAAY;AAAA,EAC3D,IAAI;AAAA,IACF,MAAM,UAAU,GAAG,aAAa,YAAY,OAAO;AAAA,IACnD,MAAM,QAAQ,QAAQ,MAAM,oBAAoB;AAAA,IAChD,IAAI,QAAQ;AAAA,MAAI,OAAO,MAAM,GAAG,KAAK;AAAA,IACrC,MAAM;AAAA,EAGR,OAAO;AAAA;AAOT,SAAS,eAAe,CACtB,WACiD;AAAA,EACjD,MAAM,cAAc,KAAK,KAAK,WAAW,UAAU;AAAA,EACnD,MAAM,eAAe,KAAK,KAAK,WAAW,YAAY;AAAA,EAGtD,MAAM,gBACJ,GAAG,WAAW,SAAS,KACvB,GACG,YAAY,SAAS,EACrB,KAAK,CAAC,MAAM,CAAC,EAAE,WAAW,GAAG,KAAK,MAAM,UAAU;AAAA,EAEvD,IAAI,eAAe;AAAA,IACjB,OAAO,EAAE,QAAQ,WAAW,aAAa;AAAA,EAC3C;AAAA,EAEA,IAAI,CAAC,GAAG,WAAW,WAAW,GAAG;AAAA,IAC/B,QAAQ,IAAI,mCAAmC;AAAA,IAC/C,OAAO;AAAA,EACT;AAAA,EAGA,MAAM,SAAS,sBAAsB,WAAW;AAAA,EAChD,IAAI,CAAC;AAAA,IAAQ,OAAO;AAAA,EAGpB,OAAO,EAAE,QAAQ,aAAa;AAAA;AAGhC,SAAS,qBAAqB,CAAC,aAAoC;AAAA,EACjE,MAAM,cAAc,GAAG,YAAY,WAAW;AAAA,EAC9C,MAAM,iBAAiB,YAAY,KACjC,CAAC,MAAM,EAAE,SAAS,MAAM,KAAK,EAAE,SAAS,OAAO,CACjD;AAAA,EAEA,IAAI;AAAA,IAAgB,OAAO;AAAA,EAG3B,MAAM,WAAW,YACd,IAAI,CAAC,MAAM,KAAK,KAAK,aAAa,CAAC,CAAC,EACpC,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,EAAE,YAAY,CAAC,EAC1C,KAAK,EACL,QAAQ;AAAA,EAEX,IAAI,SAAS,WAAW,GAAG;AAAA,IACzB,QAAQ,IAAI,yBAAyB;AAAA,IACrC,OAAO;AAAA,EACT;AAAA,EAEA,OAAO,SAAS;AAAA;AAGX,SAAS,IAAI,GAAS;AAAA,EAC3B,MAAM,MAAM,QAAQ,IAAI;AAAA,EACxB,MAAM,aAAa,UAAU,GAAG;AAAA,EAChC,MAAM,YAAY,KAAK,KAAK,KAAK,UAAU;AAAA,EAE3C,MAAM,QAAQ,gBAAgB,SAAS;AAAA,EACvC,IAAI,CAAC,OAAO;AAAA,IACV,QAAQ,KAAK,CAAC;AAAA,EAChB;AAAA,EAGA,IAAI,WAAyB,CAAC;AAAA,EAC9B,IAAI,GAAG,WAAW,MAAM,YAAY,GAAG;AAAA,IACrC,MAAM,eAAe,GAAG,aAAa,MAAM,cAAc,OAAO;AAAA,IAChE,MAAM,eAAe,oBAAoB,MAAM,MAAM;AAAA,IACrD,WAAW,cAAc,cAAc,YAAY;AAAA,EACrD;AAAA,EAGA,MAAM,SAAS,cAAc,UAAU,MAAM,MAAM;AAAA,EACnD,QAAQ,IAAI,MAAM;AAAA;AAMpB,IAAM,eACH,YAAY,QAAQ,UAAU,QAAQ,KAAK,QACzC,OAAO,QAAQ,eAAe,YAAY,QAAQ,UAAU,IAAI,YAClE,QAAQ,KAAK,IAAI,SAAS,WAAW,KACpC,QAAQ,KAAK,IAAI,SAAS,WAAW;AACzC,IAAI,aAAa;AAAA,EACf,KAAK;AACP;",
|
|
8
|
-
"debugId": "
|
|
8
|
+
"debugId": "38D823BEF2803BA364756E2164756E21",
|
|
9
9
|
"names": []
|
|
10
10
|
}
|
package/package.json
CHANGED
|
@@ -3,20 +3,55 @@ name: gauntlet-check
|
|
|
3
3
|
description: >-
|
|
4
4
|
Run checks only (no reviews)
|
|
5
5
|
disable-model-invocation: true
|
|
6
|
-
allowed-tools: Bash
|
|
6
|
+
allowed-tools: Bash, Task
|
|
7
7
|
---
|
|
8
8
|
|
|
9
9
|
# /gauntlet-check
|
|
10
10
|
Run the gauntlet checks only — no AI reviews.
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
12
|
+
|
|
13
|
+
## Procedure
|
|
14
|
+
|
|
15
|
+
### Step 1 - Clean Logs
|
|
16
|
+
|
|
17
|
+
Run `agent-gauntlet clean` to archive any previous log files.
|
|
18
|
+
|
|
19
|
+
### Step 2 - Run Checks
|
|
20
|
+
|
|
21
|
+
Run `agent-gauntlet check` using `Bash` with `timeout: 300000`. **ALWAYS wait for and read the full command output** before proceeding. **Verify you can see a `Status:` line in the output before continuing.**
|
|
22
|
+
|
|
23
|
+
### Step 3 - Check Status
|
|
24
|
+
|
|
25
|
+
**NEVER assume success** — you must see an explicit `Status:` line before continuing. Check it and route accordingly:
|
|
26
|
+
- `Status: Passed` → Go to Step 7.
|
|
27
|
+
- `Status: Passed with warnings` → Go to Step 7.
|
|
28
|
+
- `Status: Failed` → Continue to Step 4. **You MUST continue — do not stop here.**
|
|
29
|
+
- `Status: Retry limit exceeded` → Run `agent-gauntlet clean` to archive logs. Go to Step 7.
|
|
30
|
+
- No status line visible → **Known issue:** Bun can drop all stdout/stderr. Read the console log file to get the status: find the latest `console.*.log` in the gauntlet log directory (e.g., `gauntlet_logs/console.1.log`) and look for the `Status:` line there. If no console log is found there, also check `gauntlet_logs/previous/` for logs from the most recent archived run. If no console log exists in either location, the command may have timed out or failed to run — re-run with a longer timeout or investigate the error. Do NOT proceed as if it passed.
|
|
31
|
+
|
|
32
|
+
### Step 4 - Extract Failures
|
|
33
|
+
|
|
34
|
+
Required when status is Failed:
|
|
35
|
+
- Infer the log directory from the file paths in the console output (e.g., if output references `gauntlet_logs/check_._lint.1.log`, the log directory is `gauntlet_logs/`)
|
|
36
|
+
- Read `extract-prompt.md` from this skill's directory
|
|
37
|
+
- **Extract log failures** using the first available strategy:
|
|
38
|
+
a. **Task tool** (Claude Code): `Task` with `subagent_type="general-purpose"`, `model="haiku"`, `prompt=` extract-prompt content + `"\n\nLog directory: <inferred path>"`. **Task calls MUST be synchronous** — NEVER use `run_in_background: true`.
|
|
39
|
+
b. **Subagent delegation**: If your environment supports delegating work to a subagent but not the Task tool, delegate the extract-prompt instructions with the log directory to a subagent for processing.
|
|
40
|
+
c. **Inline fallback**: If no subagent capability is available, follow the extract-prompt instructions yourself to read the log files and produce the compact failure summary.
|
|
41
|
+
|
|
42
|
+
### Step 5 - Fix
|
|
43
|
+
|
|
44
|
+
Execute the fixes for all failed checks:
|
|
45
|
+
- CHECK failures with Fix Skill: invoke the named skill
|
|
46
|
+
- CHECK failures with Fix Instructions: follow the instructions
|
|
47
|
+
|
|
48
|
+
### Step 6 - Re-run Verification
|
|
49
|
+
|
|
50
|
+
**NEVER skip this step** — if the run failed, you MUST fix and re-run. Run `agent-gauntlet check` again with `Bash` and `timeout: 300000`. Do NOT run `agent-gauntlet clean` between retries. The tool detects existing logs and automatically switches to verification mode. **Go back to Step 3** to check the status line and repeat.
|
|
51
|
+
|
|
52
|
+
### Step 7 - Summarize Session
|
|
53
|
+
|
|
54
|
+
Provide a summary of the session:
|
|
55
|
+
- Final Status: (Passed / Passed with warnings / Retry limit exceeded)
|
|
56
|
+
- Checks Fixed: (list key fixes)
|
|
57
|
+
- Outstanding Failures: (if retry limit exceeded, list unverified fixes and remaining issues)
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: gauntlet-commit
|
|
3
|
+
description: >-
|
|
4
|
+
Gates commits behind optional gauntlet validation by detecting changes, running selected validations, handling failures, and completing the commit flow.
|
|
5
|
+
Activates when requests include "commit with gauntlet", "run checks before commit", "run gauntlet then commit", or "skip gauntlet and commit".
|
|
6
|
+
disable-model-invocation: false
|
|
7
|
+
allowed-tools: Bash, Task
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# /gauntlet-commit $ARGUMENTS
|
|
11
|
+
|
|
12
|
+
Commit with optional gauntlet validation. Runs `agent-gauntlet detect` first, validates based on intent (full run, checks only, or skip), handles failures, then commits.
|
|
13
|
+
|
|
14
|
+
## Step 1 - Detect Changes
|
|
15
|
+
|
|
16
|
+
Run `agent-gauntlet detect` using `Bash`:
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
agent-gauntlet detect 2>&1
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
- If no changed files are reported → **skip to Step 4** (commit directly, no validation needed)
|
|
23
|
+
- If changed files are reported → continue to Step 2
|
|
24
|
+
|
|
25
|
+
## Step 2 - Determine Validation Intent
|
|
26
|
+
|
|
27
|
+
Parse `$ARGUMENTS` for a validation intent. Do not prompt the user if a clear intent is found.
|
|
28
|
+
|
|
29
|
+
| ARGUMENTS pattern | Action |
|
|
30
|
+
|-------------------|--------|
|
|
31
|
+
| Contains "run", "full", or "all gates" | Invoke `/gauntlet-run` (Step 3a) |
|
|
32
|
+
| Contains "check" or "checks" | Invoke `/gauntlet-check` (Step 3b) |
|
|
33
|
+
| Contains "skip" | Run `agent-gauntlet skip 2>&1` (Step 3c), then go to Step 4 |
|
|
34
|
+
| Empty or no clear intent | Present the three choices below to the user, wait for selection |
|
|
35
|
+
|
|
36
|
+
**When prompting the user**, present these choices:
|
|
37
|
+
|
|
38
|
+
1. **Run all gates** — full validation (checks + reviews)
|
|
39
|
+
2. **Run checks only** — checks without AI reviews
|
|
40
|
+
3. **Skip gauntlet** — advance baseline without running any gates
|
|
41
|
+
|
|
42
|
+
Then proceed to the step matching the user's selection.
|
|
43
|
+
|
|
44
|
+
## Step 3a - Full Validation (gauntlet-run)
|
|
45
|
+
|
|
46
|
+
Invoke `/gauntlet-run`.
|
|
47
|
+
|
|
48
|
+
- If it passes → go to Step 4
|
|
49
|
+
- If it fails → the `/gauntlet-run` skill handles fixing and re-running. After that skill completes, ask the user: **"Ready to commit?"**. Proceed to Step 4 only on confirmation.
|
|
50
|
+
|
|
51
|
+
## Step 3b - Checks-Only Validation (gauntlet-check)
|
|
52
|
+
|
|
53
|
+
Invoke `/gauntlet-check`.
|
|
54
|
+
|
|
55
|
+
- If it passes → go to Step 4
|
|
56
|
+
- If it fails → the `/gauntlet-check` skill handles fixing and re-running. After that skill completes, ask the user: **"Ready to commit?"**. Proceed to Step 4 only on confirmation.
|
|
57
|
+
|
|
58
|
+
## Step 3c - Skip Validation
|
|
59
|
+
|
|
60
|
+
Run:
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
agent-gauntlet skip 2>&1
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Report the command output to the user, then go to Step 4.
|
|
67
|
+
|
|
68
|
+
## Step 4 - Commit
|
|
69
|
+
|
|
70
|
+
Check whether you have a skill for committing git changes available (excluding `gauntlet-commit` itself to avoid self-invocation).
|
|
71
|
+
|
|
72
|
+
- **If a commit skill is found** → invoke that skill to perform the commit
|
|
73
|
+
- **If no commit skill is found** → stage all tracked changes, propose a commit message following the conventional commits format (`<type>: <description>`), then run `git commit -m "<message>"`
|
|
@@ -87,3 +87,12 @@ Downgrade confidence when:
|
|
|
87
87
|
|
|
88
88
|
### Next Steps
|
|
89
89
|
Actionable recommendations for the user. If confidence is not high, suggest what additional evidence would confirm the diagnosis.
|
|
90
|
+
|
|
91
|
+
## Bug Filing
|
|
92
|
+
|
|
93
|
+
After completing your diagnosis, apply the following routing logic to determine whether to file a GitHub issue:
|
|
94
|
+
|
|
95
|
+
- **High confidence + bug indicated** (evidence points to a defect in agent-gauntlet, not a configuration issue, user error, or expected behavior): Automatically invoke `gauntlet-issue` with `--auto-file <diagnosis summary>` as arguments. This passes the diagnosis summary as the bug description and skips the interactive confirmation — the issue is filed immediately after showing the draft.
|
|
96
|
+
- **High confidence + not a bug** (diagnosis concludes configuration issue, user error, or expected behavior): Do nothing. Do not invoke `gauntlet-issue`.
|
|
97
|
+
- **Medium confidence + possible bug** (evidence suggests a possible gauntlet defect but is not conclusive): Ask the user: "This may be a gauntlet bug. Want me to file a GitHub issue?" If the user confirms, invoke `gauntlet-issue` with the diagnosis summary as the bug description. If the user declines, exit without filing.
|
|
98
|
+
- **Low confidence**: Do nothing. Do not prompt the user and do not invoke `gauntlet-issue`.
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: gauntlet-issue
|
|
3
|
+
description: Files structured GitHub bug reports for agent-gauntlet when users ask to file, report, or open an issue for a suspected defect
|
|
4
|
+
disable-model-invocation: false
|
|
5
|
+
allowed-tools: Bash, Read, Glob
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
# /gauntlet-issue
|
|
9
|
+
|
|
10
|
+
Collect runtime evidence, draft a structured GitHub issue for a suspected agent-gauntlet bug, present a full preview, and file only after confirmation — unless invoked in auto-file mode.
|
|
11
|
+
|
|
12
|
+
## Step 1: Get the Bug Description and Mode
|
|
13
|
+
|
|
14
|
+
**Check for auto-file mode**: If `$ARGUMENTS` begins with `--auto-file `, set auto-file mode to **on** and strip the prefix to get the remaining text as the bug description. Skip the confirmation step (Step 4) when auto-file mode is on.
|
|
15
|
+
|
|
16
|
+
**Get the description**:
|
|
17
|
+
- If a non-empty description remains after stripping any `--auto-file` prefix, use it as the bug description and proceed to Step 2.
|
|
18
|
+
- If `$ARGUMENTS` is empty (or becomes empty after stripping), ask the user:
|
|
19
|
+
|
|
20
|
+
> "Please describe the bug you encountered with agent-gauntlet. What happened, what did you expect, and what were you trying to do?"
|
|
21
|
+
|
|
22
|
+
Wait for their response before continuing.
|
|
23
|
+
|
|
24
|
+
## Step 2: Collect Evidence
|
|
25
|
+
|
|
26
|
+
Read `.gauntlet/config.yml` first to resolve `log_dir` (default: `gauntlet_logs` if the field is absent or the file doesn't exist).
|
|
27
|
+
|
|
28
|
+
Collect the following evidence. For each item, note if it is absent — do not fail if files are missing:
|
|
29
|
+
|
|
30
|
+
1. **Config file**: Read `.gauntlet/config.yml` in full.
|
|
31
|
+
2. **Debug log (last 50 lines)**: Read the last 50 lines of `<log_dir>/.debug.log`.
|
|
32
|
+
```bash
|
|
33
|
+
tail -n 50 <log_dir>/.debug.log
|
|
34
|
+
```
|
|
35
|
+
3. **Execution state**: Read the full contents of `<log_dir>/.execution_state`.
|
|
36
|
+
|
|
37
|
+
Record which files were found and which were absent.
|
|
38
|
+
|
|
39
|
+
## Step 3: Draft the Issue
|
|
40
|
+
|
|
41
|
+
Draft a GitHub issue with the following structure:
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
## Problem
|
|
45
|
+
|
|
46
|
+
<A clear, concise description of the bug. Based on the user's description.>
|
|
47
|
+
|
|
48
|
+
## Steps to Reproduce
|
|
49
|
+
|
|
50
|
+
<Step-by-step instructions to reproduce the issue. Infer from the description and evidence, or note "Not yet determined" if unclear.>
|
|
51
|
+
|
|
52
|
+
## Expected vs Actual
|
|
53
|
+
|
|
54
|
+
**Expected:** <What should have happened>
|
|
55
|
+
|
|
56
|
+
**Actual:** <What actually happened>
|
|
57
|
+
|
|
58
|
+
## Evidence
|
|
59
|
+
|
|
60
|
+
> **Before including evidence, redact sensitive values**: remove or replace tokens, API keys, email addresses, and absolute local paths that may appear in config, logs, or state. Replace them with `[REDACTED]` or a generic placeholder.
|
|
61
|
+
|
|
62
|
+
**Config (`.gauntlet/config.yml`):**
|
|
63
|
+
<Paste only relevant, non-sensitive config values. Redact tokens, emails, and absolute paths. Note "File not found" if absent.>
|
|
64
|
+
|
|
65
|
+
**Debug log (last 50 lines of `<log_dir>/.debug.log`):**
|
|
66
|
+
<Paste minimal relevant excerpt with sensitive values redacted. Note "File not found" if absent.>
|
|
67
|
+
|
|
68
|
+
**Execution state (`<log_dir>/.execution_state`):**
|
|
69
|
+
<Paste only fields needed to diagnose the bug; redact sensitive values. Note "File not found" if absent.>
|
|
70
|
+
|
|
71
|
+
**Absent files:** <List any files that were not found, or "None">
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Choose a concise, descriptive title: `Bug: <short summary of the problem>`.
|
|
75
|
+
|
|
76
|
+
## Step 4: Show Preview and Confirm
|
|
77
|
+
|
|
78
|
+
Present the full draft to the user — both title and body.
|
|
79
|
+
|
|
80
|
+
**If auto-file mode is on**: Inform the user that the issue will be filed automatically (show the title and body), then proceed directly to Step 5 without asking.
|
|
81
|
+
|
|
82
|
+
**Otherwise**, ask:
|
|
83
|
+
|
|
84
|
+
> "Here is the draft issue. Shall I file it? (yes/no)"
|
|
85
|
+
|
|
86
|
+
- If the user confirms: proceed to Step 5.
|
|
87
|
+
- If the user declines: exit without creating an issue. Inform the user that no issue was filed.
|
|
88
|
+
|
|
89
|
+
## Step 5: File the Issue
|
|
90
|
+
|
|
91
|
+
Write the issue body to a temporary file and pass it via `--body-file` to avoid shell interpolation issues with special characters in the body text:
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
ISSUE_TITLE=$(cat <<'TITLE_EOF'
|
|
95
|
+
Bug: <short summary>
|
|
96
|
+
TITLE_EOF
|
|
97
|
+
)
|
|
98
|
+
BODY_FILE=$(mktemp)
|
|
99
|
+
cat > "$BODY_FILE" << 'ISSUE_EOF'
|
|
100
|
+
<paste the full issue body here>
|
|
101
|
+
ISSUE_EOF
|
|
102
|
+
gh issue create --repo pacaplan/agent-gauntlet --title "$ISSUE_TITLE" --body-file "$BODY_FILE"
|
|
103
|
+
rm -f "$BODY_FILE"
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Report the created issue URL to the user.
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: gauntlet-merge
|
|
3
|
+
description: >-
|
|
4
|
+
Merges a named branch into the current worktree and propagates the validated execution state from that branch's worktree, eliminating redundant re-validation. Activates for requests such as "merge with gauntlet state", "merge and carry execution state", "reuse validated state after merge", or "merge branch without re-running gauntlet".
|
|
5
|
+
disable-model-invocation: false
|
|
6
|
+
allowed-tools: Bash
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# /gauntlet-merge $ARGUMENTS
|
|
10
|
+
|
|
11
|
+
Merge a branch and copy its validated execution state so the current directory inherits the already-verified results.
|
|
12
|
+
|
|
13
|
+
## Step 1 - Run the merge script
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
bash skills/gauntlet-merge/merge-state.sh "$ARGUMENTS"
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Run this command using `Bash` and capture both stdout and the exit code.
|
|
20
|
+
|
|
21
|
+
## Step 2 - Report the result
|
|
22
|
+
|
|
23
|
+
- **If exit code is 0**: Report success. Include the script's output confirming the merge and state copy.
|
|
24
|
+
- **If exit code is non-zero**: Report the error. The script will have printed an error message — relay it clearly to the user. Do not proceed further.
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
BRANCH="${1:-}"
|
|
5
|
+
|
|
6
|
+
if [[ -z "$BRANCH" ]]; then
|
|
7
|
+
echo "Usage: merge-state.sh <branch>" >&2
|
|
8
|
+
exit 1
|
|
9
|
+
fi
|
|
10
|
+
|
|
11
|
+
# Parse git worktree list --porcelain to find the worktree with this branch checked out
|
|
12
|
+
SOURCE_DIR=""
|
|
13
|
+
while IFS= read -r line; do
|
|
14
|
+
if [[ "$line" =~ ^worktree\ (.+)$ ]]; then
|
|
15
|
+
current_wt="${BASH_REMATCH[1]}"
|
|
16
|
+
elif [[ "$line" == "branch refs/heads/$BRANCH" ]]; then
|
|
17
|
+
SOURCE_DIR="$current_wt"
|
|
18
|
+
break
|
|
19
|
+
fi
|
|
20
|
+
done < <(git worktree list --porcelain)
|
|
21
|
+
|
|
22
|
+
if [[ -z "$SOURCE_DIR" ]]; then
|
|
23
|
+
echo "Error: No worktree found with branch '$BRANCH' checked out — cannot copy execution state." >&2
|
|
24
|
+
exit 1
|
|
25
|
+
fi
|
|
26
|
+
|
|
27
|
+
# Read log_dir from a config file; returns "gauntlet_logs" if absent or key not found
|
|
28
|
+
read_log_dir() {
|
|
29
|
+
local config_path="$1"
|
|
30
|
+
local parsed=""
|
|
31
|
+
if [[ -f "$config_path" ]]; then
|
|
32
|
+
parsed="$(grep '^log_dir:' "$config_path" | head -n1 | sed 's/^log_dir:[[:space:]]*//' | tr -d '[:space:]')" || true
|
|
33
|
+
fi
|
|
34
|
+
printf '%s' "${parsed:-gauntlet_logs}"
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
# Read source log_dir from source worktree config (default: gauntlet_logs)
|
|
38
|
+
SOURCE_CONFIG="$SOURCE_DIR/.gauntlet/config.yml"
|
|
39
|
+
SOURCE_LOG_DIR="$(read_log_dir "$SOURCE_CONFIG")"
|
|
40
|
+
|
|
41
|
+
# Read destination log_dir from current directory config (default: gauntlet_logs)
|
|
42
|
+
DEST_CONFIG=".gauntlet/config.yml"
|
|
43
|
+
DEST_LOG_DIR="$(read_log_dir "$DEST_CONFIG")"
|
|
44
|
+
|
|
45
|
+
# Verify source execution state exists before merging (fail fast, no partial state)
|
|
46
|
+
SOURCE_STATE="$SOURCE_DIR/$SOURCE_LOG_DIR/.execution_state"
|
|
47
|
+
if [[ ! -f "$SOURCE_STATE" ]]; then
|
|
48
|
+
echo "Error: Missing source execution state: $SOURCE_STATE — cannot copy execution state." >&2
|
|
49
|
+
exit 1
|
|
50
|
+
fi
|
|
51
|
+
|
|
52
|
+
# Run the merge
|
|
53
|
+
git merge "$BRANCH"
|
|
54
|
+
|
|
55
|
+
# Create destination log directory if it doesn't exist
|
|
56
|
+
mkdir -p "$DEST_LOG_DIR"
|
|
57
|
+
|
|
58
|
+
DEST_STATE="$DEST_LOG_DIR/.execution_state"
|
|
59
|
+
|
|
60
|
+
# Compare by file identity (inode) to handle symlinks and path form variations
|
|
61
|
+
if [[ -e "$DEST_STATE" ]] && [[ "$SOURCE_STATE" -ef "$DEST_STATE" ]]; then
|
|
62
|
+
echo "Merged '$BRANCH'; execution state already current (source and destination are the same)."
|
|
63
|
+
exit 0
|
|
64
|
+
fi
|
|
65
|
+
|
|
66
|
+
cp -f "$SOURCE_STATE" "$DEST_STATE"
|
|
67
|
+
|
|
68
|
+
echo "Merged '$BRANCH' and copied execution state from '$SOURCE_DIR/$SOURCE_LOG_DIR' to '$DEST_LOG_DIR'."
|
|
@@ -5,69 +5,71 @@ description: >-
|
|
|
5
5
|
disable-model-invocation: false
|
|
6
6
|
allowed-tools: Bash, Task
|
|
7
7
|
---
|
|
8
|
-
<!--
|
|
9
|
-
REVIEW TRUST LEVEL
|
|
10
|
-
Controls how aggressively the agent acts on AI reviewer feedback.
|
|
11
|
-
Change the trust_level value below to one of: high, medium, low
|
|
12
|
-
|
|
13
|
-
- high: Fix all issues unless you strongly disagree or have low confidence the human wants the change.
|
|
14
|
-
- medium: Fix issues you reasonably agree with or believe the human wants fixed. (DEFAULT)
|
|
15
|
-
- low: Fix only issues you strongly agree with or are confident the human wants fixed.
|
|
16
|
-
-->
|
|
17
|
-
<!-- trust_level: medium -->
|
|
18
|
-
|
|
19
8
|
# /gauntlet-run
|
|
20
9
|
Execute the autonomous verification suite.
|
|
21
10
|
|
|
22
|
-
|
|
11
|
+
Fix issues you reasonably agree with or believe the human wants to be fixed. Skip issues that are purely stylistic, subjective, or that you believe the human would not want changed. When you skip an issue, briefly state what was skipped and why.
|
|
23
12
|
|
|
24
|
-
## Critical rules — read before proceeding
|
|
25
13
|
|
|
26
|
-
|
|
14
|
+
## Procedure
|
|
27
15
|
|
|
28
|
-
|
|
29
|
-
- **ALL Bash commands in this skill MUST be synchronous.** NEVER use `run_in_background: true` for any Bash call. NEVER use `&` to background any command.
|
|
30
|
-
- **ALL Task tool calls MUST be synchronous.** NEVER use `run_in_background: true`.
|
|
31
|
-
- **ALWAYS wait for and read the full command output** before proceeding. The command typically takes 1-2 minutes. Set `timeout: 300000` (5 minutes) on Bash calls to allow headroom.
|
|
32
|
-
- **NEVER assume success.** You must see an explicit `Status:` line in the output. If you do not see `Status: Passed`, `Status: Passed with warnings`, or `Status: Retry limit exceeded` in the output, the run is not complete — wait for it or investigate.
|
|
33
|
-
- **NEVER skip the fix-retry loop.** If the run fails, you MUST extract failures, fix code, and re-run. This is not optional.
|
|
16
|
+
### Step 1 - Clean Logs
|
|
34
17
|
|
|
35
|
-
|
|
18
|
+
Run `agent-gauntlet clean` to archive any previous log files.
|
|
19
|
+
|
|
20
|
+
### Step 2 - Run Gauntlet
|
|
21
|
+
|
|
22
|
+
If the caller requests a specific review to be enabled, append `--enable-review <name>` to the run command for each requested review.
|
|
23
|
+
|
|
24
|
+
Run `agent-gauntlet run` using `Bash` with `timeout: 300000`. **ALWAYS wait for and read the full command output** before proceeding — the command typically takes 1-2 minutes. **Verify you can see a `Status:` line in the output before continuing.**
|
|
25
|
+
|
|
26
|
+
### Step 3 - Check Status
|
|
27
|
+
|
|
28
|
+
**NEVER assume success** — you must see an explicit `Status:` line before continuing. Check it and route accordingly:
|
|
29
|
+
- `Status: Passed` → Go to Step 9.
|
|
30
|
+
- `Status: Passed with warnings` → Go to Step 9.
|
|
31
|
+
- `Status: Failed` → Continue to Step 4. **You MUST continue — do not stop here.**
|
|
32
|
+
- `Status: Retry limit exceeded` → Run `agent-gauntlet clean` to archive logs. Go to Step 9.
|
|
33
|
+
- No status line visible → **Known issue:** Bun can drop all stdout/stderr when LLM review subprocesses run. Read the console log file to get the status: find the latest `console.*.log` in the gauntlet log directory (e.g., `gauntlet_logs/console.1.log`) and look for the `Status:` line there. If no console log is found there, also check `gauntlet_logs/previous/` for logs from the most recent archived run. If no console log exists in either location, the command may have timed out or failed to run — re-run with a longer timeout or investigate the error. Do NOT proceed as if it passed.
|
|
34
|
+
|
|
35
|
+
### Step 4 - Extract Failures
|
|
36
|
+
|
|
37
|
+
Required when status is Failed:
|
|
38
|
+
- Infer the log directory from the file paths in the console output (e.g., if output references `gauntlet_logs/check_._lint.1.log`, the log directory is `gauntlet_logs/`)
|
|
39
|
+
- Read `extract-prompt.md` from this skill's directory
|
|
40
|
+
- **Extract log failures** using the first available strategy:
|
|
41
|
+
a. **Task tool** (Claude Code): `Task` with `subagent_type="general-purpose"`, `model="haiku"`, `prompt=` extract-prompt content + `"\n\nLog directory: <inferred path>"`. **Task calls MUST be synchronous** — NEVER use `run_in_background: true`.
|
|
42
|
+
b. **Subagent delegation**: If your environment supports delegating work to a subagent but not the Task tool, delegate the extract-prompt instructions with the log directory to a subagent for processing.
|
|
43
|
+
c. **Inline fallback**: If no subagent capability is available, follow the extract-prompt instructions yourself to read the log files and produce the compact failure summary.
|
|
44
|
+
|
|
45
|
+
### Step 5 - Report Failures
|
|
46
|
+
|
|
47
|
+
Print the compact failure summary returned from Step 4.
|
|
48
|
+
|
|
49
|
+
### Step 6 - Fix
|
|
50
|
+
|
|
51
|
+
Apply the review guidance above to each failure and fix accordingly:
|
|
52
|
+
- CHECK failures with Fix Skill: invoke the named skill
|
|
53
|
+
- CHECK failures with Fix Instructions: follow the instructions
|
|
54
|
+
- REVIEW violations: fix or skip per the review guidance above
|
|
55
|
+
|
|
56
|
+
### Step 7 - Update Review Decisions
|
|
57
|
+
|
|
58
|
+
For REVIEW violations you addressed:
|
|
59
|
+
- Read `update-prompt.md` from this skill's directory
|
|
60
|
+
- **Update review decisions** using the first available strategy (same as Step 4):
|
|
61
|
+
a. **Task tool** (Claude Code): `Task` with `subagent_type="general-purpose"`, `model="haiku"`, `prompt=` update-prompt content + log directory + decisions list. **Task calls MUST be synchronous** — NEVER use `run_in_background: true`.
|
|
62
|
+
b. **Subagent delegation**: Delegate the update-prompt instructions with the log directory and decisions to a subagent.
|
|
63
|
+
c. **Inline fallback**: Follow the update-prompt instructions yourself to update the review JSON files.
|
|
64
|
+
|
|
65
|
+
### Step 8 - Re-run Verification
|
|
66
|
+
|
|
67
|
+
**NEVER skip this step** — if the run failed, you MUST fix and re-run. Run the same command from Step 2 (including any `--enable-review` flags) again with `Bash` and `timeout: 300000`. Do NOT run `agent-gauntlet clean` between retries. The tool detects existing logs and automatically switches to verification mode. **Go back to Step 3** to check the status line and repeat.
|
|
68
|
+
|
|
69
|
+
### Step 9 - Summarize Session
|
|
36
70
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
- `Status: Failed` → Continue to step 4. **You MUST continue — do not stop here.**
|
|
43
|
-
- `Status: Retry limit exceeded` → Run `agent-gauntlet clean` to archive logs. Go to step 8.
|
|
44
|
-
- No status line visible → **Known issue:** Bun can drop all stdout/stderr when LLM review subprocesses run. Read the console log file to get the status: find the latest `console.*.log` in the gauntlet log directory (e.g., `gauntlet_logs/console.1.log`) and look for the `Status:` line there. If no console log is found there, also check `gauntlet_logs/previous/` for logs from the most recent archived run. If no console log exists in either location, the command may have timed out or failed to run — re-run with a longer timeout or investigate the error. Do NOT proceed as if it passed.
|
|
45
|
-
4. **Extract failures** (required when status is Failed):
|
|
46
|
-
- Infer the log directory from the file paths in the console output (e.g., if output references `gauntlet_logs/check_._lint.1.log`, the log directory is `gauntlet_logs/`)
|
|
47
|
-
- Read `extract-prompt.md` from this skill's directory
|
|
48
|
-
- **Extract log failures** using the first available strategy:
|
|
49
|
-
a. **Task tool** (Claude Code): `Task` with `subagent_type="general-purpose"`, `model="haiku"`, `prompt=` extract-prompt content + `"\n\nLog directory: <inferred path>"`. NEVER use `run_in_background: true`.
|
|
50
|
-
b. **Subagent delegation**: If your environment supports delegating work to a subagent but not the Task tool, delegate the extract-prompt instructions with the log directory to a subagent for processing.
|
|
51
|
-
c. **Inline fallback**: If no subagent capability is available, follow the extract-prompt instructions yourself to read the log files and produce the compact failure summary.
|
|
52
|
-
5. **Fix code** based on the compact summary. You MUST address every actionable item:
|
|
53
|
-
- CHECK failures with Fix Skill: invoke the named skill
|
|
54
|
-
- CHECK failures with Fix Instructions: follow the instructions
|
|
55
|
-
- REVIEW violations: apply the trust level above, fix or skip
|
|
56
|
-
5b. **Capture noteworthy violations for eval inventory** (if any REVIEW violations were found):
|
|
57
|
-
- Collect the JSON file paths from the REVIEW failures identified in step 4 (the `.json` file paths)
|
|
58
|
-
- Read `SKILL.md` from the `capture-eval-issues` skill directory (sibling of this skill's directory)
|
|
59
|
-
- Follow the capture skill's procedure, passing the JSON file paths
|
|
60
|
-
- Note the `CAPTURED:` summary line for inclusion in step 8
|
|
61
|
-
6. For REVIEW violations you addressed:
|
|
62
|
-
- Read `update-prompt.md` from this skill's directory
|
|
63
|
-
- **Update review decisions** using the first available strategy (same as step 4):
|
|
64
|
-
a. **Task tool** (Claude Code): `Task` with `subagent_type="general-purpose"`, `model="haiku"`, `prompt=` update-prompt content + log directory + decisions list. NEVER use `run_in_background: true`.
|
|
65
|
-
b. **Subagent delegation**: Delegate the update-prompt instructions with the log directory and decisions to a subagent.
|
|
66
|
-
c. **Inline fallback**: Follow the update-prompt instructions yourself to update the review JSON files.
|
|
67
|
-
7. **Re-run verification:** Run `agent-gauntlet run` again with `Bash` and `timeout: 300000`. Do NOT run `agent-gauntlet clean` between retries. The tool detects existing logs and automatically switches to verification mode. **Go back to step 3** to check the status line and repeat.
|
|
68
|
-
8. **Provide a summary** of the session:
|
|
69
|
-
- Final Status: (Passed / Passed with warnings / Retry limit exceeded)
|
|
70
|
-
- Issues Fixed: (list key fixes)
|
|
71
|
-
- Issues Skipped: (list skipped items and reasons)
|
|
72
|
-
- Eval Captures: (list captured issue IDs from step 5b, or "none")
|
|
73
|
-
- Outstanding Failures: (if retry limit exceeded, list unverified fixes and remaining issues)
|
|
71
|
+
Provide a summary of the session:
|
|
72
|
+
- Final Status: (Passed / Passed with warnings / Retry limit exceeded)
|
|
73
|
+
- Issues Fixed: (list key fixes)
|
|
74
|
+
- Issues Skipped: (list skipped items and reasons)
|
|
75
|
+
- Outstanding Failures: (if retry limit exceeded, list unverified fixes and remaining issues)
|
|
@@ -21,17 +21,18 @@ Return a plain-text summary using EXACTLY this format:
|
|
|
21
21
|
For check failures:
|
|
22
22
|
```text
|
|
23
23
|
CHECKS:
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
24
|
+
[fail] <gate_label>
|
|
25
|
+
<concise error description>
|
|
26
|
+
Fix Instructions: <extracted text if present, otherwise omit this line>
|
|
27
|
+
Fix Skill: <skill name if present, otherwise omit this line>
|
|
28
28
|
```
|
|
29
29
|
|
|
30
30
|
For review failures:
|
|
31
31
|
```text
|
|
32
32
|
REVIEWS:
|
|
33
|
-
|
|
34
|
-
|
|
33
|
+
[<priority>] <gate_label>
|
|
34
|
+
<file>:<line> - <issue summary>
|
|
35
|
+
Fix: <fix suggestion>
|
|
35
36
|
```
|
|
36
37
|
|
|
37
38
|
If there are no failures of a type, omit that section entirely.
|
|
@@ -102,13 +103,14 @@ Replace all `var` declarations with `const` or `let`.
|
|
|
102
103
|
|
|
103
104
|
```text
|
|
104
105
|
CHECKS:
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
106
|
+
[fail] check:src:lint
|
|
107
|
+
src/helpers.ts:3:5 - error: Unexpected var, use let or const instead
|
|
108
|
+
Fix Instructions: Replace all `var` declarations with `const` or `let`.
|
|
108
109
|
|
|
109
110
|
REVIEWS:
|
|
110
|
-
|
|
111
|
-
|
|
111
|
+
[high] review:src:code-quality (claude@1)
|
|
112
|
+
src/main.ts:45 - Missing error handling for async database call
|
|
113
|
+
Fix: Wrap in try-catch block
|
|
112
114
|
```
|
|
113
115
|
|
|
114
116
|
Note: The `src/utils.ts:10` violation was omitted because its status is `"fixed"`, not `"new"`.
|
|
@@ -117,5 +119,5 @@ Note: The `src/utils.ts:10` violation was omitted because its status is `"fixed"
|
|
|
117
119
|
|
|
118
120
|
- Do NOT summarize or editorialize — copy error details verbatim where possible
|
|
119
121
|
- Do NOT skip any `[FAIL]` entries
|
|
120
|
-
- Keep the output compact — one
|
|
122
|
+
- Keep the output compact — one entry per check failure, one entry per review violation (3 lines each)
|
|
121
123
|
- For review violations, only include those with `status: "new"` — skip `"fixed"` and `"skipped"`
|