agent-gauntlet 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +25 -23
  2. package/dist/index.js +9226 -0
  3. package/dist/index.js.map +65 -0
  4. package/dist/scripts/status.js +280 -0
  5. package/dist/scripts/status.js.map +10 -0
  6. package/package.json +22 -8
  7. package/src/built-in-reviews/code-quality.md +0 -25
  8. package/src/built-in-reviews/index.ts +0 -28
  9. package/src/bun-plugins.d.ts +0 -4
  10. package/src/cli-adapters/claude.ts +0 -327
  11. package/src/cli-adapters/codex.ts +0 -290
  12. package/src/cli-adapters/cursor.ts +0 -128
  13. package/src/cli-adapters/gemini.ts +0 -510
  14. package/src/cli-adapters/github-copilot.ts +0 -141
  15. package/src/cli-adapters/index.ts +0 -250
  16. package/src/cli-adapters/thinking-budget.ts +0 -23
  17. package/src/commands/check.ts +0 -311
  18. package/src/commands/ci/index.ts +0 -15
  19. package/src/commands/ci/init.ts +0 -96
  20. package/src/commands/ci/list-jobs.ts +0 -90
  21. package/src/commands/clean.ts +0 -54
  22. package/src/commands/detect.ts +0 -173
  23. package/src/commands/health.ts +0 -169
  24. package/src/commands/help.ts +0 -34
  25. package/src/commands/index.ts +0 -13
  26. package/src/commands/init.ts +0 -1878
  27. package/src/commands/list.ts +0 -33
  28. package/src/commands/review.ts +0 -311
  29. package/src/commands/run.ts +0 -29
  30. package/src/commands/shared.ts +0 -267
  31. package/src/commands/stop-hook.ts +0 -567
  32. package/src/commands/validate.ts +0 -20
  33. package/src/commands/wait-ci.ts +0 -518
  34. package/src/config/ci-loader.ts +0 -33
  35. package/src/config/ci-schema.ts +0 -28
  36. package/src/config/global.ts +0 -87
  37. package/src/config/loader.ts +0 -301
  38. package/src/config/schema.ts +0 -165
  39. package/src/config/stop-hook-config.ts +0 -130
  40. package/src/config/types.ts +0 -65
  41. package/src/config/validator.ts +0 -592
  42. package/src/core/change-detector.ts +0 -137
  43. package/src/core/diff-stats.ts +0 -442
  44. package/src/core/entry-point.ts +0 -190
  45. package/src/core/job.ts +0 -96
  46. package/src/core/run-executor.ts +0 -621
  47. package/src/core/runner.ts +0 -290
  48. package/src/gates/check.ts +0 -118
  49. package/src/gates/resolve-check-command.ts +0 -21
  50. package/src/gates/result.ts +0 -54
  51. package/src/gates/review.ts +0 -1333
  52. package/src/hooks/adapters/claude-stop-hook.ts +0 -99
  53. package/src/hooks/adapters/cursor-stop-hook.ts +0 -122
  54. package/src/hooks/adapters/types.ts +0 -94
  55. package/src/hooks/stop-hook-handler.ts +0 -748
  56. package/src/index.ts +0 -47
  57. package/src/output/app-logger.ts +0 -214
  58. package/src/output/console-log.ts +0 -168
  59. package/src/output/console.ts +0 -359
  60. package/src/output/logger.ts +0 -126
  61. package/src/output/sinks/console-sink.ts +0 -59
  62. package/src/output/sinks/file-sink.ts +0 -110
  63. package/src/scripts/status.ts +0 -433
  64. package/src/templates/workflow.yml +0 -79
  65. package/src/types/gauntlet-status.ts +0 -79
  66. package/src/utils/debug-log.ts +0 -392
  67. package/src/utils/diff-parser.ts +0 -103
  68. package/src/utils/execution-state.ts +0 -472
  69. package/src/utils/log-parser.ts +0 -696
  70. package/src/utils/sanitizer.ts +0 -3
  71. package/src/utils/session-ref.ts +0 -91
@@ -1,1878 +0,0 @@
1
- import fs from "node:fs/promises";
2
- import path from "node:path";
3
- import readline from "node:readline";
4
- import chalk from "chalk";
5
- import type { Command } from "commander";
6
- import { type CLIAdapter, getAllAdapters } from "../cli-adapters/index.js";
7
- import { exists } from "./shared.js";
8
-
9
- const MAX_PROMPT_ATTEMPTS = 10;
10
-
11
- // --- Skill content templates ---
12
- // These are used for both skills (Claude) and flat commands (other agents).
13
- // The frontmatter fields (name, disable-model-invocation) are only meaningful
14
- // for skills but are harmless in flat command files.
15
-
16
- /**
17
- * Build gauntlet run/check skill content. Shared structure avoids duplication
18
- * between the "run" and "check" skills.
19
- */
20
- function buildGauntletSkillContent(mode: "run" | "check"): string {
21
- const isRun = mode === "run";
22
- const name = isRun ? "run" : "check";
23
- const description = isRun
24
- ? "Run the full verification gauntlet"
25
- : "Run checks only (no reviews)";
26
- const command = isRun ? "agent-gauntlet run" : "agent-gauntlet check";
27
- const heading = isRun
28
- ? "Execute the autonomous verification suite."
29
- : "Run the gauntlet checks only \u2014 no AI reviews.";
30
-
31
- const frontmatter = `---
32
- name: gauntlet-${name}
33
- description: ${description}
34
- disable-model-invocation: true
35
- allowed-tools: Bash
36
- ---`;
37
-
38
- // Common prefix: archive old logs, then run the command
39
- const steps = [
40
- `1. Run \`agent-gauntlet clean\` to archive any previous log files`,
41
- `2. Run \`${command}\``,
42
- ];
43
-
44
- if (isRun) {
45
- steps.push(
46
- `3. If it fails:
47
- - Identify the failed gates from the console output.
48
- - For CHECK failures: Read the \`.log\` file path provided in the output.
49
- - For REVIEW failures: Read the \`.json\` file path provided in the "Review: <path>" output.
50
- 4. Address the violations:
51
- - For REVIEW violations: You MUST update the \`"status"\` and \`"result"\` fields in the provided \`.json\` file for EACH violation.
52
- - Set \`"status": "fixed"\` and add a brief description to \`"result"\` for issues you fix.
53
- - Set \`"status": "skipped"\` and add a brief reason to \`"result"\` for issues you skip (based on the trust level).
54
- - Do NOT modify any other attributes (file, line, issue, priority) in the JSON file.
55
- - Apply the trust level above when deciding whether to act on AI reviewer feedback.
56
- 5. Run \`${command}\` again to verify your fixes. Do NOT run \`agent-gauntlet clean\` between retries. The tool detects existing logs and automatically switches to verification mode.
57
- 6. Repeat steps 3-5 until one of the following termination conditions is met:
58
- - "Status: Passed" appears in the output (logs are automatically archived)
59
- - "Status: Passed with warnings" appears in the output (remaining issues were skipped)
60
- - "Status: Retry limit exceeded" appears in the output -> Run \`agent-gauntlet clean\` to archive logs for the session record. Do NOT retry after cleaning.
61
- 7. Provide a summary of the session:
62
- - Issues Fixed: (list key fixes)
63
- - Issues Skipped: (list skipped items and reasons)
64
- - Outstanding Failures: (if retry limit exceeded, list unverified fixes and remaining issues)`,
65
- );
66
- } else {
67
- steps.push(
68
- `3. If any checks fail:
69
- - Read the \`.log\` file path provided in the output for each failed check.
70
- - Fix the issues found.
71
- 4. Run \`${command}\` again to verify your fixes. Do NOT run \`agent-gauntlet clean\` between retries.
72
- 5. Repeat steps 3-4 until all checks pass or you've made 3 attempts.
73
- 6. Provide a summary of the session:
74
- - Checks Passed: (list)
75
- - Checks Failed: (list with brief reason)
76
- - Fixes Applied: (list key fixes)`,
77
- );
78
- }
79
-
80
- if (isRun) {
81
- return `${frontmatter}
82
- <!--
83
- REVIEW TRUST LEVEL
84
- Controls how aggressively the agent acts on AI reviewer feedback.
85
- Change the trust_level value below to one of: high, medium, low
86
-
87
- - high: Fix all issues unless you strongly disagree or have low confidence the human wants the change.
88
- - medium: Fix issues you reasonably agree with or believe the human wants fixed. (DEFAULT)
89
- - low: Fix only issues you strongly agree with or are confident the human wants fixed.
90
- -->
91
- <!-- trust_level: medium -->
92
-
93
- # /gauntlet-${name}
94
- ${heading}
95
-
96
- **Review trust level: medium** \u2014 Fix issues you reasonably agree with or believe the human wants to be fixed. Skip issues that are purely stylistic, subjective, or that you believe the human would not want changed. When you skip an issue, briefly state what was skipped and why.
97
-
98
- ${steps.join("\n")}
99
- `;
100
- }
101
-
102
- return `${frontmatter}
103
-
104
- # /gauntlet-${name}
105
- ${heading}
106
-
107
- ${steps.join("\n")}
108
- `;
109
- }
110
-
111
- const GAUNTLET_RUN_SKILL_CONTENT = buildGauntletSkillContent("run");
112
- const GAUNTLET_CHECK_SKILL_CONTENT = buildGauntletSkillContent("check");
113
-
114
- const PUSH_PR_SKILL_CONTENT = `---
115
- name: gauntlet-push-pr
116
- description: Commit changes, push to remote, and create or update a pull request
117
- disable-model-invocation: true
118
- allowed-tools: Bash
119
- ---
120
-
121
- # /gauntlet-push-pr
122
- Commit all changes, push to remote, and create or update a pull request for the current branch.
123
-
124
- After the PR is created or updated, verify it exists by running \`gh pr view\`.
125
- `;
126
-
127
- const FIX_PR_SKILL_CONTENT = `---
128
- name: gauntlet-fix-pr
129
- description: Fix CI failures or address review comments on a pull request
130
- disable-model-invocation: true
131
- allowed-tools: Bash
132
- ---
133
-
134
- # /gauntlet-fix-pr
135
- Fix CI failures or address review comments on the current pull request.
136
-
137
- 1. Check CI status and review comments: \`gh pr checks\` and \`gh pr view --comments\`
138
- 2. Fix any failing checks or address reviewer feedback
139
- 3. Commit and push your changes
140
- 4. After pushing, verify the PR is updated: \`gh pr view\`
141
- `;
142
-
143
- const GAUNTLET_STATUS_SKILL_CONTENT = `---
144
- name: gauntlet-status
145
- description: Show a summary of the most recent gauntlet session
146
- disable-model-invocation: true
147
- allowed-tools: Bash, Read
148
- ---
149
-
150
- # /gauntlet-status
151
- Show a detailed summary of the most recent gauntlet session.
152
-
153
- ## Step 1: Run the status script
154
-
155
- \`\`\`bash
156
- bun .gauntlet/skills/gauntlet/status/scripts/status.ts 2>&1
157
- \`\`\`
158
-
159
- The script parses the \`.debug.log\` for session-level data (run count, gate results, pass/fail status) and lists all log files with their paths and sizes.
160
-
161
- ## Step 2: Read failed gate details
162
-
163
- For each gate marked **FAIL** in the Gate Results table, read the corresponding log files to extract failure details:
164
-
165
- - **Check failures** (e.g., \`check:src:code-health\`): Read the matching \`check_*.log\` file. Check log formats vary by tool (linters, test runners, code health analyzers) — read the file and extract the relevant error/warning output.
166
- - **Review failures** (e.g., \`review:.:code-quality\`): Read the matching \`review_*.json\` file(s). These contain structured violation data with \`file\`, \`line\`, \`issue\`, \`priority\`, and \`status\` fields.
167
-
168
- Use the file paths from the "Log Files" section of the script output. Match gate IDs to file names: \`check:.:lint\` corresponds to \`check_._lint.*.log\`, \`review:.:code-quality\` corresponds to \`review_._code-quality_*.{log,json}\`.
169
-
170
- ## Step 3: Present the results
171
-
172
- Combine the script's session summary with the detailed failure information into a comprehensive report:
173
-
174
- 1. Session overview (status, iterations, duration, fixed/skipped/failed counts)
175
- 2. Gate results table
176
- 3. For any failed gates: the specific errors, violations, or test failures from the log files
177
- 4. For reviews with violations: list each violation with file, line, issue, priority, and current status (fixed/skipped/outstanding)
178
- `;
179
-
180
- /**
181
- * Build the gauntlet-help skill bundle content.
182
- * Returns { content, references } for the multi-file skill.
183
- */
184
- function buildHelpSkillBundle(): {
185
- content: string;
186
- references: Record<string, string>;
187
- } {
188
- const content = `---
189
- name: gauntlet-help
190
- description: Diagnose and explain gauntlet behavior using runtime evidence
191
- allowed-tools: Bash, Read, Glob, Grep
192
- ---
193
-
194
- # /gauntlet-help
195
-
196
- Evidence-based diagnosis of gauntlet behavior. This skill is **diagnosis-only** — it explains what happened and why, but does not auto-fix issues. It operates from **runtime artifacts and CLI outputs**, not source code.
197
-
198
- ## Diagnostic Workflow
199
-
200
- Follow this order for every diagnostic question:
201
-
202
- 1. **Resolve \`log_dir\`**: Read \`.gauntlet/config.yml\` and extract the \`log_dir\` field (default: \`gauntlet_logs\`). All log paths below are relative to \`<log_dir>/\`.
203
- 2. **Passive evidence first**: Read files before running commands.
204
- - \`<log_dir>/.debug.log\` — timestamped event log (commands, gate results, state changes, errors)
205
- - \`<log_dir>/.execution_state\` — JSON with \`last_run_completed_at\`, \`branch\`, \`commit\`, \`working_tree_ref\`, and \`unhealthy_adapters\` (adapter name → \`{marked_at, reason}\`)
206
- - \`<log_dir>/console.*.log\` — console output per run (highest number = latest)
207
- - \`<log_dir>/check_*.log\` — check gate output
208
- - \`<log_dir>/review_*.json\` — review gate results with violations (\`file\`, \`line\`, \`issue\`, \`fix\`, \`priority\`, \`status\`)
209
- - \`.gauntlet/config.yml\` — project configuration
210
- 3. **Active evidence when needed**: Run CLI commands only when passive evidence is insufficient for a confident diagnosis.
211
- 4. **Explain with evidence**: Clearly distinguish confirmed findings from inference.
212
-
213
- ## Evidence Sources
214
-
215
- | Source | What It Confirms |
216
- |--------|-----------------|
217
- | \`.gauntlet/config.yml\` | \`log_dir\`, \`base_branch\`, \`entry_points\`, \`cli.default_preference\`, \`stop_hook\` settings, \`max_retries\`, \`rerun_new_issue_threshold\` |
218
- | \`<log_dir>/.debug.log\` | Timestamped event history: commands executed, gate results, state transitions, errors |
219
- | \`<log_dir>/.execution_state\` | Last successful run timestamp, branch/commit at that time, working tree stash ref, unhealthy adapter cooldowns |
220
- | \`<log_dir>/console.*.log\` | Human-readable output from each run iteration |
221
- | \`<log_dir>/check_*.log\` | Raw output from check gate commands (linters, test runners, etc.) |
222
- | \`<log_dir>/review_*.json\` | Structured review violations with file, line, issue, priority, and resolution status |
223
- | \`<log_dir>/.gauntlet-run.lock\` | Lock file (contains PID) — present only during active execution |
224
- | \`<log_dir>/.stop-hook-active\` | Marker file (contains PID) — present only during active stop-hook execution |
225
- | \`<log_dir>/.ci-wait-attempts\` | CI wait attempt counter |
226
-
227
- ## CLI Command Quick-Reference
228
-
229
- Use these only when passive evidence is insufficient:
230
-
231
- | Command | When to Use |
232
- |---------|-------------|
233
- | \`agent-gauntlet list\` | See configured gates and entry points |
234
- | \`agent-gauntlet health\` | Check adapter availability and health status |
235
- | \`agent-gauntlet detect\` | See which files changed and which gates would apply |
236
- | \`agent-gauntlet validate\` | Validate config.yml syntax and schema |
237
- | \`agent-gauntlet clean\` | Archive current logs and reset state (destructive — confirm with user first) |
238
-
239
- ## Routing Logic
240
-
241
- Based on the user's question, load the appropriate reference file for detailed guidance:
242
-
243
- | Question Domain | Reference File |
244
- |----------------|---------------|
245
- | Stop hook blocked/allowed, hook statuses, recursion, timing | \`references/stop-hook-troubleshooting.md\` |
246
- | Missing config, YAML errors, misconfiguration, init problems | \`references/config-troubleshooting.md\` |
247
- | Check failures, review failures, no_changes, no_applicable_gates, rerun mode | \`references/gate-troubleshooting.md\` |
248
- | Lock conflict, stale locks, parallel runs, cleanup | \`references/lock-troubleshooting.md\` |
249
- | Adapter health, missing tools, usage limits, cooldown | \`references/adapter-troubleshooting.md\` |
250
- | PR push, CI status, auto_push_pr, auto_fix_pr, CI wait | \`references/ci-pr-troubleshooting.md\` |
251
-
252
- If the question spans multiple domains, load each relevant reference.
253
-
254
- ## Output Contract
255
-
256
- Every diagnostic response MUST include these sections:
257
-
258
- ### Diagnosis
259
- What happened and why, stated clearly.
260
-
261
- ### Evidence
262
- Specific files read, field values observed, and command outputs that support the diagnosis. Quote relevant log lines or config values.
263
-
264
- ### Confidence
265
- One of:
266
- - **High** — diagnosis is fully supported by direct evidence
267
- - **Medium** — diagnosis is likely but some evidence is missing or ambiguous
268
- - **Low** — diagnosis is inferred; key evidence is unavailable
269
-
270
- Downgrade confidence when:
271
- - \`.debug.log\` or \`.execution_state\` is missing or empty
272
- - Log files referenced in output don't exist
273
- - Config values can't be verified
274
- - CLI commands fail or return unexpected output
275
-
276
- ### Next Steps
277
- Actionable recommendations for the user. If confidence is not high, suggest what additional evidence would confirm the diagnosis.
278
- `;
279
-
280
- const references: Record<string, string> = {
281
- "stop-hook-troubleshooting.md": `# Stop Hook Troubleshooting
282
-
283
- ## All Stop-Hook Statuses
284
-
285
- ### Allowing Statuses (stop is permitted)
286
-
287
- | Status | Message | Meaning |
288
- |--------|---------|---------|
289
- | \`passed\` | All gates completed successfully | Every configured check and review gate passed |
290
- | \`passed_with_warnings\` | Passed with warnings (some issues were skipped) | Gates ran but some review violations were skipped rather than fixed |
291
- | \`no_applicable_gates\` | No applicable gates matched current changes | Changed files didn't match any configured entry point |
292
- | \`no_changes\` | No changes detected | No files changed relative to \`base_branch\` |
293
- | \`ci_passed\` | CI passed — all checks completed and no blocking reviews | GitHub CI checks succeeded and no \`CHANGES_REQUESTED\` reviews |
294
- | \`ci_timeout\` | CI wait exhausted — max attempts reached | CI polling hit 3 attempts; allows stop for manual review |
295
- | \`no_config\` | Not a gauntlet project — no \`.gauntlet/config.yml\` found | No gauntlet configuration in this repo |
296
- | \`stop_hook_active\` | Stop hook cycle detected — allowing stop to prevent infinite loop | Recursion prevention triggered |
297
- | \`stop_hook_disabled\` | Stop hook is disabled via configuration | \`stop_hook.enabled: false\` in config or \`GAUNTLET_STOP_HOOK_ENABLED=false\` |
298
- | \`interval_not_elapsed\` | Run interval not elapsed | \`stop_hook.run_interval_minutes\` hasn't elapsed since last run |
299
- | \`invalid_input\` | Invalid hook input — could not parse JSON | Stop-hook couldn't parse stdin JSON from the IDE |
300
- | \`lock_conflict\` | Another gauntlet run is already in progress | Lock file exists with a live PID |
301
- | \`error\` | Stop hook error | Unexpected error during execution |
302
- | \`retry_limit_exceeded\` | Retry limit exceeded | Max retries (default 3) exhausted; requires \`agent-gauntlet clean\` |
303
-
304
- ### Blocking Statuses (stop is prevented)
305
-
306
- | Status | Message | Meaning |
307
- |--------|---------|---------|
308
- | \`failed\` | Issues must be fixed before stopping | One or more gates failed; agent must fix and re-run |
309
- | \`pr_push_required\` | PR needs to be created or updated before stopping | Gates passed but \`auto_push_pr\` is enabled and PR hasn't been pushed |
310
- | \`ci_pending\` | CI checks still running — waiting for completion | Waiting for GitHub CI to finish |
311
- | \`ci_failed\` | CI failed or review changes requested | GitHub CI checks failed or a reviewer requested changes |
312
-
313
- ## Common Scenarios
314
-
315
- ### "The hook blocked my stop"
316
- 1. Check the status in \`.debug.log\` — search for \`status:\` entries
317
- 2. If \`failed\`: Read the gate output files listed in \`.debug.log\` or the latest \`console.*.log\`
318
- 3. If \`pr_push_required\`: The agent needs to commit, push, and create a PR
319
- 4. If \`ci_pending\`: CI is still running; the hook will re-check on next stop attempt
320
- 5. If \`ci_failed\`: Read CI failure details — run \`agent-gauntlet wait-ci\` or check \`gh pr checks\`
321
-
322
- ### "The hook allowed but shouldn't have"
323
- 1. Check if the status was \`no_changes\` — verify \`base_branch\` is correct in \`config.yml\`
324
- 2. Check if \`no_applicable_gates\` — run \`agent-gauntlet detect\` to see which files changed and which gates match
325
- 3. Check if \`interval_not_elapsed\` — the run was skipped because \`run_interval_minutes\` hadn't elapsed
326
- 4. Check if \`stop_hook_disabled\` — verify \`stop_hook.enabled\` in config and \`GAUNTLET_STOP_HOOK_ENABLED\` env var
327
-
328
- ### "The gauntlet isn't running gates / keeps allowing stops immediately"
329
- This happens when the iteration counter is inherited from a previous session's failures. Symptoms:
330
- 1. \`.debug.log\` shows \`RUN_START\` followed immediately by \`RUN_END\` with \`duration=0.0s\`
331
- 2. \`iterations\` value is high (e.g., 7, 8, 9) even though the current session hasn't run that many times
332
- 3. Stop-hook returns \`retry_limit_exceeded\` without executing any gates
333
- 4. \`failed=0\` in \`RUN_END\` (no gates ran, so none failed — but status is still \`fail\`)
334
-
335
- **Root cause**: The iteration counter persists in \`.execution_state\` across sessions. If a previous session ended with unresolved failures and hit the retry limit, the counter carries over. The next session enters verification mode and immediately exceeds the limit.
336
-
337
- **Fix**: Run \`agent-gauntlet clean\` to reset the state and iteration counter, then re-run.
338
-
339
- **Prevention**: Before starting a new task, check if the previous session left failures behind. If \`.debug.log\` shows a recent \`STOP_HOOK decision=block reason=failed\` or \`retry_limit_exceeded\`, clean state first.
340
-
341
- ### "The hook seems stuck"
342
- 1. Check for \`.stop-hook-active\` marker in \`<log_dir>/\` — if present, a stop-hook may be running
343
- 2. Check PID in the marker file — is that process alive?
344
- 3. The stop-hook has a **5-minute hard timeout** (\`STOP_HOOK_TIMEOUT_MS\`) and will self-terminate
345
- 4. Stale marker files older than **10 minutes** are automatically cleaned up on next invocation
346
-
347
- ## Recursion Prevention
348
-
349
- The stop-hook uses three layers to prevent infinite loops:
350
-
351
- ### Layer 1: Environment Variable
352
- - Variable: \`GAUNTLET_STOP_HOOK_ACTIVE\`
353
- - Set by the parent gauntlet when spawning child CLI processes for reviews
354
- - If \`GAUNTLET_STOP_HOOK_ACTIVE=1\`, the stop-hook exits immediately with \`stop_hook_active\`
355
- - Prevents child review processes from triggering nested gauntlets
356
-
357
- ### Layer 2: Marker File
358
- - File: \`<log_dir>/.stop-hook-active\` (contains the PID)
359
- - Created before execution, removed after completion (in \`finally\` block)
360
- - If another stop-hook fires during execution and finds a fresh marker (< 10 min old), it exits with \`stop_hook_active\`
361
- - Stale markers (> 10 min) are deleted and execution proceeds
362
- - Needed because Claude Code does NOT pass env vars to hooks
363
-
364
- ### Layer 3: IDE Input Field
365
- - Claude Code: \`stop_hook_active\` boolean in the stdin JSON
366
- - Cursor: \`loop_count\` field; threshold is 10 (returns \`retry_limit_exceeded\` if exceeded)
367
- - Additional safety net from the IDE itself
368
-
369
- ## Timing Values
370
-
371
- | Timer | Value | Purpose |
372
- |-------|-------|---------|
373
- | Stdin timeout | 5 seconds | Safety net for delayed stdin from IDE |
374
- | Hard timeout | 5 minutes | Self-timeout to prevent zombie processes |
375
- | Stale marker | 10 minutes | Marker files older than this are cleaned up |
376
- | \`run_interval_minutes\` | Configurable (default 0 = always run) | Minimum time between stop-hook runs |
377
-
378
- ## Environment Variable Overrides
379
-
380
- These override project config values (env > project config > global config):
381
-
382
- | Variable | Type | Effect |
383
- |----------|------|--------|
384
- | \`GAUNTLET_STOP_HOOK_ENABLED\` | \`true\`/\`1\`/\`false\`/\`0\` | Enable or disable the stop hook entirely |
385
- | \`GAUNTLET_STOP_HOOK_INTERVAL_MINUTES\` | Integer >= 0 | Minutes between runs (0 = always run) |
386
- | \`GAUNTLET_AUTO_PUSH_PR\` | \`true\`/\`1\`/\`false\`/\`0\` | Check PR status after gates pass |
387
- | \`GAUNTLET_AUTO_FIX_PR\` | \`true\`/\`1\`/\`false\`/\`0\` | Enable CI wait workflow after PR created |
388
-
389
- ## Diagnosing \`stop_hook_disabled\`
390
-
391
- This status means the stop hook has been explicitly disabled. Check in order:
392
-
393
- 1. \`GAUNTLET_STOP_HOOK_ENABLED\` environment variable (highest precedence)
394
- 2. \`.gauntlet/config.yml\` → \`stop_hook.enabled\`
395
- 3. \`~/.config/agent-gauntlet/config.yml\` → \`stop_hook.enabled\` (global)
396
-
397
- To re-enable: remove the env var or set \`stop_hook.enabled: true\` in config.
398
- `,
399
- "config-troubleshooting.md": `# Config Troubleshooting
400
-
401
- ## \`no_config\` — Missing Configuration
402
-
403
- The stop hook returns \`no_config\` when \`.gauntlet/config.yml\` doesn't exist. This is normal for non-gauntlet projects.
404
-
405
- **If it should exist:**
406
- 1. Run \`agent-gauntlet init\` to create the configuration
407
- 2. Or manually create \`.gauntlet/config.yml\`
408
-
409
- ## YAML Syntax and Schema Errors
410
-
411
- Run \`agent-gauntlet validate\` to check config syntax and schema.
412
-
413
- **Common YAML issues:**
414
- - Indentation errors (YAML requires consistent indentation)
415
- - Missing colons after keys
416
- - Unquoted special characters in values
417
-
418
- **Schema validation catches:**
419
- - Missing required fields (\`cli.default_preference\`, \`entry_points\`)
420
- - Wrong types (e.g., string where array expected)
421
- - Invalid enum values (e.g., invalid \`rerun_new_issue_threshold\`)
422
-
423
- ## Common Misconfigurations
424
-
425
- ### Missing or Empty \`cli.default_preference\`
426
- \`\`\`yaml
427
- # WRONG — missing
428
- cli: {}
429
-
430
- # WRONG — empty
431
- cli:
432
- default_preference: []
433
-
434
- # CORRECT
435
- cli:
436
- default_preference:
437
- - claude
438
- \`\`\`
439
-
440
- ### Empty \`entry_points\`
441
- \`\`\`yaml
442
- # WRONG
443
- entry_points: []
444
-
445
- # CORRECT
446
- entry_points:
447
- - path: "."
448
- reviews:
449
- - code-quality
450
- \`\`\`
451
-
452
- ### \`fail_fast\` with \`parallel\`
453
- These are mutually exclusive for check gates. Schema validation rejects this:
454
- \`\`\`yaml
455
- # WRONG — in a check YAML file
456
- parallel: true
457
- fail_fast: true
458
-
459
- # CORRECT — fail_fast only works with sequential
460
- parallel: false
461
- fail_fast: true
462
- \`\`\`
463
-
464
- ### Conflicting Fix Instruction Fields
465
- Check gates support only one fix method. These are mutually exclusive:
466
- - \`fix_instructions\` (inline string)
467
- - \`fix_instructions_file\` (path to file)
468
- - \`fix_with_skill\` (skill name)
469
-
470
- ### Entry Point References Non-Existent Gate
471
- If an entry point lists a check or review name that doesn't exist in \`.gauntlet/checks/\` or \`.gauntlet/reviews/\`, validation fails.
472
-
473
- ### Review Gate Uses Tool Not in \`default_preference\`
474
- Review gates can specify \`cli_preference\` but the tools must also appear in \`cli.default_preference\`.
475
-
476
- ## \`log_dir\` Issues
477
-
478
- The \`log_dir\` field (default: \`gauntlet_logs\`) determines where all logs are written.
479
-
480
- **Can't find logs:**
481
- 1. Check \`config.yml\` for the \`log_dir\` value
482
- 2. Verify the directory exists (it's created automatically on first run)
483
- 3. Check if a previous \`agent-gauntlet clean\` archived everything to \`previous/\`
484
-
485
- **Permissions:**
486
- - The gauntlet needs write access to \`log_dir\`
487
- - On some setups, the directory may not be writable
488
-
489
- ## \`base_branch\` Misconfiguration
490
-
491
- The \`base_branch\` (default: \`origin/main\`) is used for diff calculation. Wrong values cause:
492
- - \`no_changes\` when there are actually changes (wrong base)
493
- - Diff includes too many files (base too far back)
494
-
495
- **Verify:**
496
- \`\`\`bash
497
- git log --oneline origin/main..HEAD # Should show your commits
498
- \`\`\`
499
-
500
- If using a different default branch:
501
- \`\`\`yaml
502
- base_branch: origin/develop
503
- \`\`\`
504
-
505
- ## Config Precedence
506
-
507
- Configuration is loaded with this precedence (highest first):
508
- 1. **Environment variables** (e.g., \`GAUNTLET_STOP_HOOK_ENABLED\`)
509
- 2. **Project config** (\`.gauntlet/config.yml\`)
510
- 3. **Global config** (\`~/.config/agent-gauntlet/config.yml\`)
511
- 4. **Defaults** (built-in)
512
-
513
- ## Init Setup Problems
514
-
515
- ### "\`.gauntlet\` directory already exists"
516
- \`agent-gauntlet init\` won't overwrite an existing \`.gauntlet/\` directory. Delete it first or manually edit.
517
-
518
- ### Git Not Initialized
519
- Some features require a git repository. Run \`git init\` first.
520
-
521
- ### No Remote Configured
522
- The \`base_branch\` (e.g., \`origin/main\`) requires a remote. Run \`git remote add origin <url>\`.
523
-
524
- ## Adapter Configuration
525
-
526
- Per-adapter settings are configured under \`cli.adapters\`:
527
- \`\`\`yaml
528
- cli:
529
- default_preference:
530
- - claude
531
- adapters:
532
- claude:
533
- allow_tool_use: true
534
- thinking_budget: medium # off, low, medium, high
535
- \`\`\`
536
-
537
- **\`thinking_budget\` mapping:**
538
- - Claude: off=0, low=8000, medium=16000, high=31999 tokens
539
- - Codex: off=minimal, low=low, medium=medium, high=high
540
- - Gemini: off=0, low=4096, medium=8192, high=24576 tokens
541
-
542
- ## Debug Logging
543
-
544
- Enable detailed logging in config:
545
- \`\`\`yaml
546
- debug_log:
547
- enabled: true
548
- max_size_mb: 10
549
- \`\`\`
550
-
551
- This creates \`<log_dir>/.debug.log\` with timestamped events.
552
- `,
553
- "gate-troubleshooting.md": `# Gate Troubleshooting
554
-
555
- ## Check Gate Failures
556
-
557
- Check gates run shell commands (linters, test runners, etc.) and report pass/fail based on exit code.
558
-
559
- ### Common Failure Modes
560
-
561
- | Failure | Cause | Evidence |
562
- |---------|-------|----------|
563
- | Command not found | Binary not installed or not in PATH | Check gate log for "command not found" error |
564
- | Non-zero exit code | Linter/test failures | Read the \`check_*.log\` file for specific errors |
565
- | Timeout | Command exceeded configured timeout | Log shows SIGTERM; check \`timeout\` in check YAML |
566
- | Output truncation | Command output exceeded 10MB buffer | Log may be cut off; increase timeout or reduce output |
567
-
568
- ### Reading Check Logs
569
- - File pattern: \`<log_dir>/check_<CHECK_NAME>.log\`
570
- - Contains raw stdout/stderr from the check command
571
- - Format depends on the tool (linter output, test runner output, etc.)
572
-
573
- ### Rerun Commands
574
- Check gates can define a \`rerun_command\` for verification runs. If set, the rerun uses this command instead of the original \`command\`.
575
-
576
- ## Review Gate Failures
577
-
578
- Review gates use AI CLI tools to review code changes.
579
-
580
- ### Common Failure Modes
581
-
582
- | Failure | Cause | Evidence |
583
- |---------|-------|----------|
584
- | No healthy adapters | All configured CLI tools are missing, unhealthy, or in cooldown | Run \`agent-gauntlet health\` |
585
- | JSON parsing error | Adapter returned non-JSON output | Review log shows raw output instead of violations |
586
- | Violations outside diff scope | Reviewer flagged code not in the current diff | Check violation \`file\` and \`line\` against changed files |
587
- | Usage limit | API quota exceeded for the adapter | Look for "usage limit" in review log; adapter enters 1-hour cooldown |
588
-
589
- ### Reading Review JSON
590
- - File pattern: \`<log_dir>/review_<REVIEW_NAME>_<ADAPTER>@<INDEX>.json\`
591
- - Fields per violation:
592
- - \`file\`: Source file path
593
- - \`line\`: Line number
594
- - \`issue\`: Description of the problem
595
- - \`fix\`: Suggested fix
596
- - \`priority\`: \`critical\`, \`high\`, \`medium\`, or \`low\`
597
- - \`status\`: \`new\`, \`fixed\`, \`skipped\`
598
- - Status \`skipped_prior_pass\` means this review slot passed on a previous run and was skipped for efficiency
599
-
600
- ### Diff Calculation
601
- - **Local mode**: committed changes (base...HEAD) + uncommitted changes (HEAD) + untracked files
602
- - **CI mode**: \`git diff GITHUB_BASE_REF...GITHUB_SHA\` (falls back to HEAD^...HEAD)
603
- - **Rerun mode**: scoped to changes since last pass using \`working_tree_ref\` from \`.execution_state\`
604
-
605
- ## \`no_applicable_gates\`
606
-
607
- All configured gates were skipped because no changed files matched any entry point path.
608
-
609
- **Diagnosis:**
610
- 1. Run \`agent-gauntlet detect\` to see which files changed and which gates match
611
- 2. Check \`entry_points\` in \`config.yml\` — do the paths cover your changed files?
612
- 3. Verify \`base_branch\` — if wrong, the diff may not include your changes
613
-
614
- ## \`no_changes\`
615
-
616
- No files changed relative to \`base_branch\`.
617
-
618
- **Diagnosis:**
619
- 1. Check \`base_branch\` in \`config.yml\` (default: \`origin/main\`)
620
- 2. Run \`git diff origin/main...HEAD --stat\` to verify
621
- 3. If working on uncommitted changes, they are included in local mode but may not be in CI mode
622
- 4. Check if a recent \`agent-gauntlet clean\` reset the execution state
623
-
624
- ## Parallel vs Sequential Execution
625
-
626
- ### Check Gates
627
- - Each check gate has a \`parallel\` setting (default: \`false\`)
628
- - Parallel checks run concurrently; sequential checks run one at a time
629
- - \`allow_parallel\` in \`config.yml\` (default: \`true\`) is the global switch
630
-
631
- ### \`fail_fast\` Behavior
632
- - Only applies to sequential check gates (\`parallel: false\`)
633
- - When enabled, stops running remaining sequential gates after the first failure
634
- - Cannot be combined with \`parallel: true\` (schema validation rejects this)
635
-
636
- ### Review Gates
637
- - Each review gate independently controls parallelism for its own adapter dispatch
638
- - When \`parallel: true\` (default) and \`num_reviews > 1\`, reviews run concurrently across adapters
639
- - When \`parallel: false\`, reviews run sequentially
640
-
641
- ## Rerun / Verification Mode
642
-
643
- When the gauntlet detects existing logs in \`<log_dir>/\`, it enters **rerun mode** instead of a fresh run.
644
-
645
- ### How It Works
646
- 1. Previous violations are loaded from existing \`review_*.json\` files
647
- 2. Only violations at the configured threshold priority or higher are re-evaluated
648
- 3. Check gates re-run their commands (or \`rerun_command\` if configured)
649
- 4. Review gates scope their diff to changes since the last pass using \`working_tree_ref\` from \`.execution_state\`
650
-
651
- ### \`rerun_new_issue_threshold\`
652
- - Config field: \`rerun_new_issue_threshold\` (default: \`medium\`)
653
- - Controls which priority levels are re-evaluated: \`critical\` > \`high\` > \`medium\` > \`low\`
654
- - Violations below the threshold are ignored in reruns
655
-
656
- ### Passed Slot Optimization
657
- When \`num_reviews > 1\` in rerun mode:
658
- - If all review slots passed previously: only slot 1 re-runs (safety latch)
659
- - If some slots failed: only failed slots re-run; passed slots get \`skipped_prior_pass\`
660
-
661
- ### Why Violations Aren't Detected on Rerun
662
- - The diff is scoped to changes since the last pass — if the violation is in unchanged code, it won't appear
663
- - The threshold may filter out lower-priority violations
664
- - Passed slots may be skipped entirely
665
-
666
- ## How to Read Gate Logs
667
-
668
- ### Console Logs
669
- - Pattern: \`<log_dir>/console.*.log\` (highest number = latest run)
670
- - Contains unified output from all gates for that run iteration
671
- - Shows gate names, pass/fail status, and output file paths
672
-
673
- ### Debug Log
674
- - File: \`<log_dir>/.debug.log\`
675
- - Timestamped entries for every significant event
676
- - Search for \`gate\`, \`check\`, \`review\`, or specific gate names
677
-
678
- ### Gate Result Status Values
679
- - Check gates: \`pass\`, \`fail\`, \`error\`
680
- - Review gates: \`pass\`, \`fail\`, \`error\`, \`skipped_prior_pass\`
681
- `,
682
- "lock-troubleshooting.md": `# Lock Troubleshooting
683
-
684
- ## \`lock_conflict\` — Another Run in Progress
685
-
686
- The gauntlet uses a lock file to prevent concurrent runs from interfering with each other.
687
-
688
- ### Lock File Details
689
- - **File**: \`<log_dir>/.gauntlet-run.lock\`
690
- - **Content**: PID of the process holding the lock
691
- - **Created**: At the start of a gauntlet run (exclusive write — fails if file exists)
692
- - **Released**: Always in a \`finally\` block (guaranteed cleanup on success, failure, or error)
693
-
694
- ### Diagnosing Lock Conflicts
695
-
696
- 1. Check if the lock file exists: \`<log_dir>/.gauntlet-run.lock\`
697
- 2. Read the PID from the file
698
- 3. Check if that process is alive:
699
- - If alive: a gauntlet run is genuinely in progress — wait for it to finish
700
- - If dead: the lock is stale (see below)
701
-
702
- ## Stale Lock Detection
703
-
704
- The gauntlet automatically detects and cleans stale locks:
705
-
706
- | Condition | Detection | Action |
707
- |-----------|-----------|--------|
708
- | PID is dead | \`kill(pid, 0)\` fails with ESRCH | Lock removed, retry once |
709
- | PID unparseable, lock > 10 min old | File age check | Lock removed, retry once |
710
- | PID alive | Process exists | Lock kept (genuine conflict) |
711
-
712
- **The gauntlet never steals a lock from a live process**, regardless of lock age.
713
-
714
- ## \`allow_parallel\` Config
715
-
716
- The \`allow_parallel\` config setting (default: \`true\`) controls whether gates can run in parallel **within** a single gauntlet run. It does **not** control concurrent gauntlet runs — that's what the lock file prevents.
717
-
718
- ## Marker Files
719
-
720
- ### \`.gauntlet-run.lock\`
721
- - **Location**: \`<log_dir>/.gauntlet-run.lock\`
722
- - **Purpose**: Prevent concurrent gauntlet runs
723
- - **Lifecycle**: Created at run start, removed at run end (always in \`finally\`)
724
-
725
- ### \`.stop-hook-active\`
726
- - **Location**: \`<log_dir>/.stop-hook-active\`
727
- - **Purpose**: Prevent stop-hook recursion (see stop-hook-troubleshooting.md)
728
- - **Content**: PID of the stop-hook process
729
- - **Stale threshold**: 10 minutes
730
- - **Lifecycle**: Created before stop-hook execution, removed after (always in \`finally\`)
731
-
732
- ## Manual Cleanup
733
-
734
- If a lock is stuck and the process is dead:
735
-
736
- \`\`\`bash
737
- agent-gauntlet clean
738
- \`\`\`
739
-
740
- This command:
741
- 1. Archives current logs to \`<log_dir>/previous/\`
742
- 2. Removes the lock file
743
- 3. Removes the stop-hook marker file
744
- 4. Resets execution state
745
-
746
- **Confirm with the user before running \`clean\`** — it archives all current logs and resets state, which means the next run starts fresh (no rerun mode).
747
-
748
- ## Troubleshooting Checklist
749
-
750
- 1. **Is another run actually in progress?** Check the PID in the lock file.
751
- 2. **Is the process alive?** The gauntlet should auto-clean stale locks on retry.
752
- 3. **Did a crash leave a stale lock?** Run \`agent-gauntlet clean\` to reset.
753
- 4. **Is this happening repeatedly?** Check for processes spawning concurrent gauntlet runs (e.g., multiple IDE hooks firing simultaneously).
754
- `,
755
- "adapter-troubleshooting.md": `# Adapter Troubleshooting
756
-
757
- ## \`agent-gauntlet health\` Output
758
-
759
- Run \`agent-gauntlet health\` to check adapter status. Each adapter reports one of:
760
-
761
- | Status | Meaning |
762
- |--------|---------|
763
- | \`healthy\` | Binary found and available |
764
- | \`missing\` | Binary not found in PATH |
765
- | \`unhealthy\` | Binary found but not functional (auth issue, etc.) |
766
-
767
- ## Missing CLI Tools
768
-
769
- If an adapter reports \`missing\`:
770
- 1. Verify the tool is installed
771
- 2. Check that it's in your PATH: \`which claude\`, \`which gemini\`, \`which codex\`
772
- 3. If installed but not in PATH, add the installation directory to your PATH
773
-
774
- Missing adapters are skipped during review gate dispatch with a "Skipping X: Missing" message.
775
-
776
- ## Authentication Issues
777
-
778
- If an adapter reports \`unhealthy\`:
779
- 1. Check the tool's authentication: try running the CLI tool directly
780
- 2. For Claude: \`claude --version\` (may need \`claude login\`)
781
- 3. For Gemini: check Google Cloud authentication
782
- 4. For Codex: check OpenAI authentication
783
-
784
- ## Usage Limits and 1-Hour Cooldown
785
-
786
- ### How Usage Limits Are Detected
787
- The gauntlet checks adapter output for these keywords:
788
- - "usage limit"
789
- - "quota exceeded"
790
- - "quota will reset"
791
- - "credit balance is too low"
792
- - "out of extra usage"
793
- - "out of usage"
794
-
795
- ### Cooldown Mechanism
796
- When a usage limit is detected:
797
- 1. The adapter is marked **unhealthy** in \`.execution_state\`
798
- 2. A **1-hour cooldown** starts (60 minutes)
799
- 3. During cooldown, the adapter is skipped for review dispatch
800
- 4. After cooldown expires, the adapter is re-probed and cleared if available
801
-
802
- ### Checking Cooldown Status
803
- Read \`<log_dir>/.execution_state\` and look at the \`unhealthy_adapters\` field:
804
-
805
- \`\`\`json
806
- {
807
- "unhealthy_adapters": {
808
- "claude": {
809
- "marked_at": "2025-01-15T10:30:00.000Z",
810
- "reason": "Usage limit exceeded"
811
- }
812
- }
813
- }
814
- \`\`\`
815
-
816
- - \`marked_at\`: When the cooldown started (ISO 8601)
817
- - Cooldown expires 60 minutes after \`marked_at\`
818
-
819
- ### All Adapters in Cooldown
820
- If every configured adapter is in cooldown, review gates will fail with "no healthy adapters". Wait for the cooldown to expire or resolve the usage limit.
821
-
822
- ## \`cli.default_preference\` and Adapter Selection
823
-
824
- The \`cli.default_preference\` array in \`config.yml\` determines:
825
- 1. **Which adapters are available** for review dispatch
826
- 2. **Selection order** for round-robin assignment
827
-
828
- Review gates can override with \`cli_preference\` but those tools must also be in \`default_preference\`.
829
-
830
- \`\`\`yaml
831
- cli:
832
- default_preference:
833
- - claude
834
- - gemini
835
- \`\`\`
836
-
837
- ## \`allow_tool_use\` and \`thinking_budget\` Settings
838
-
839
- Per-adapter settings in \`config.yml\`:
840
-
841
- \`\`\`yaml
842
- cli:
843
- adapters:
844
- claude:
845
- allow_tool_use: true # Whether the adapter can use tools during review
846
- thinking_budget: medium # off, low, medium, high
847
- \`\`\`
848
-
849
- ### \`thinking_budget\` Token Mapping
850
-
851
- | Level | Claude | Codex | Gemini |
852
- |-------|--------|-------|--------|
853
- | \`off\` | 0 | minimal | 0 |
854
- | \`low\` | 8,000 | low | 4,096 |
855
- | \`medium\` | 16,000 | medium | 8,192 |
856
- | \`high\` | 31,999 | high | 24,576 |
857
-
858
- ## \`.execution_state\` File
859
-
860
- The \`.execution_state\` file in \`<log_dir>/\` tracks run context:
861
-
862
- \`\`\`json
863
- {
864
- "last_run_completed_at": "2025-01-15T10:30:00.000Z",
865
- "branch": "feature/my-branch",
866
- "commit": "abc123",
867
- "working_tree_ref": "def456",
868
- "unhealthy_adapters": {}
869
- }
870
- \`\`\`
871
-
872
- | Field | Purpose |
873
- |-------|---------|
874
- | \`last_run_completed_at\` | When the last successful run finished |
875
- | \`branch\` | Git branch at last completion |
876
- | \`commit\` | HEAD SHA at last completion |
877
- | \`working_tree_ref\` | Stash SHA of working tree (used for rerun diff scoping) |
878
- | \`unhealthy_adapters\` | Map of adapter name to cooldown info |
879
-
880
- This file is:
881
- - Written after successful execution
882
- - Preserved across runs
883
- - Auto-cleaned when the branch changes or commit is merged
884
- - Deleted by \`agent-gauntlet clean\`
885
-
886
- ## Troubleshooting Checklist
887
-
888
- 1. **Run \`agent-gauntlet health\`** to see overall adapter status
889
- 2. **Check \`.execution_state\`** for cooldown entries
890
- 3. **Verify \`cli.default_preference\`** includes the adapters you expect
891
- 4. **Try the CLI tool directly** (e.g., \`claude --version\`) to isolate the issue
892
- 5. **Check for usage limit messages** in review logs (\`review_*.log\`)
893
- `,
894
- "ci-pr-troubleshooting.md": `# CI/PR Troubleshooting
895
-
896
- ## \`pr_push_required\`
897
-
898
- Gates passed but the stop hook detected that a PR needs to be created or updated.
899
-
900
- **When this happens:**
901
- - \`auto_push_pr: true\` is set in \`stop_hook\` config
902
- - Gates have passed
903
- - No PR exists for the current branch, or the PR is out of date
904
-
905
- **Resolution:**
906
- 1. Commit and push your changes
907
- 2. Create a PR: \`gh pr create\` or use \`/gauntlet-push-pr\`
908
- 3. The next stop-hook invocation will check PR/CI status instead of re-running gates
909
-
910
- ## CI Status Values
911
-
912
- | Status | Message | Blocking? |
913
- |--------|---------|-----------|
914
- | \`ci_pending\` | CI checks still running | Yes — agent waits |
915
- | \`ci_failed\` | CI failed or review changes requested | Yes — must fix |
916
- | \`ci_passed\` | All checks completed, no blocking reviews | No — stop allowed |
917
- | \`ci_timeout\` | Max wait attempts reached | No — stop allowed for manual review |
918
-
919
- ## \`auto_push_pr\` and \`auto_fix_pr\` Configuration
920
-
921
- \`\`\`yaml
922
- stop_hook:
923
- auto_push_pr: true # Check PR status after gates pass
924
- auto_fix_pr: true # Wait for CI and enable fix workflow
925
- \`\`\`
926
-
927
- **Dependency:** \`auto_fix_pr\` requires \`auto_push_pr\`. If \`auto_fix_pr: true\` but \`auto_push_pr: false\`, \`auto_fix_pr\` is forced to \`false\` with a warning.
928
-
929
- **Environment variable overrides:**
930
- - \`GAUNTLET_AUTO_PUSH_PR=true/false\`
931
- - \`GAUNTLET_AUTO_FIX_PR=true/false\`
932
-
933
- ## CI Wait Mechanism (\`wait-ci\`)
934
-
935
- ### How It Works
936
- 1. After gates pass and PR is pushed, the stop hook enters CI wait mode
937
- 2. It polls GitHub CI status using \`gh pr checks\`
938
- 3. Polls every **15 seconds** (default)
939
- 4. Times out after **270 seconds** (4.5 minutes, default)
940
- 5. Up to **3 attempts** total across stop-hook invocations
941
-
942
- ### Attempt Tracking
943
- - File: \`<log_dir>/.ci-wait-attempts\`
944
- - Incremented on each CI wait invocation
945
- - When attempts >= 3: returns \`ci_timeout\` and allows the stop
946
-
947
- ### What \`wait-ci\` Checks
948
-
949
- **CI Checks:**
950
- - Runs \`gh pr checks --json name,state,link\`
951
- - Check states: \`PENDING\`, \`QUEUED\`, \`IN_PROGRESS\`, \`SUCCESS\`, \`FAILURE\`
952
- - All checks must reach \`SUCCESS\` for \`ci_passed\`
953
-
954
- **Blocking Reviews:**
955
- - Queries \`gh api repos/OWNER/REPO/pulls/PR_NUM/reviews\`
956
- - \`CHANGES_REQUESTED\` state is blocking
957
- - Latest review per author takes precedence (later reviews override earlier)
958
- - If any author's latest review is \`CHANGES_REQUESTED\`: \`ci_failed\`
959
-
960
- ### Failed Check Logs
961
- - For GitHub Actions: retrieves error output via \`gh run view RUN_ID --log-failed\`
962
- - For external checks (no run ID): no logs available
963
- - Output limited to last 100 lines
964
-
965
- ## CI Detection Environment Variables
966
-
967
- The gauntlet detects CI environments using:
968
-
969
- | Variable | Detection |
970
- |----------|-----------|
971
- | \`CI=true\` | Generic CI environment |
972
- | \`GITHUB_ACTIONS=true\` | GitHub Actions specifically |
973
- | \`GITHUB_BASE_REF\` | PR base branch in GitHub Actions (overrides \`base_branch\` for diff) |
974
- | \`GITHUB_SHA\` | Commit SHA in GitHub Actions (used for diff calculation) |
975
-
976
- **CI mode differences:**
977
- - Diff uses \`GITHUB_BASE_REF...GITHUB_SHA\` instead of local branch comparison
978
- - Falls back to \`HEAD^...HEAD\` if CI variables are incomplete
979
-
980
- ## Troubleshooting Checklist
981
-
982
- ### \`ci_pending\` — CI Still Running
983
- 1. Check \`gh pr checks\` to see which checks are still pending
984
- 2. Wait and try again — the stop hook will re-poll on next attempt
985
- 3. After 3 attempts, it will timeout and allow the stop
986
-
987
- ### \`ci_failed\` — CI Failed
988
- 1. Run \`gh pr checks\` to see failed checks
989
- 2. Run \`gh pr view --comments\` to see review feedback
990
- 3. Check for \`CHANGES_REQUESTED\` reviews: \`gh api repos/OWNER/REPO/pulls/PR_NUM/reviews\`
991
- 4. Fix the issues, commit, and push
992
- 5. The stop hook will re-check on next invocation
993
-
994
- ### \`ci_timeout\` — Wait Exhausted
995
- - The CI wait hit 3 attempts without all checks passing
996
- - The stop is allowed for manual review
997
- - Check \`<log_dir>/.ci-wait-attempts\` for the attempt count
998
- - After fixing issues and pushing, run \`agent-gauntlet clean\` to reset the attempt counter
999
-
1000
- ### PR-Related Issues
1001
- - **No PR for branch**: \`gh pr view\` returns an error — create a PR first
1002
- - **PR out of date**: Push latest changes before CI can pass
1003
- - **\`gh\` CLI not installed**: CI features require the GitHub CLI (\`gh\`)
1004
- `,
1005
- };
1006
-
1007
- return { content, references };
1008
- }
1009
-
1010
- const HELP_SKILL_BUNDLE = buildHelpSkillBundle();
1011
-
1012
- /**
1013
- * Skill definitions used by installCommands.
1014
- * Each entry maps a skill action name to its content and metadata.
1015
- */
1016
- const SKILL_DEFINITIONS = [
1017
- { action: "run", content: GAUNTLET_RUN_SKILL_CONTENT },
1018
- { action: "check", content: GAUNTLET_CHECK_SKILL_CONTENT },
1019
- { action: "push-pr", content: PUSH_PR_SKILL_CONTENT },
1020
- { action: "fix-pr", content: FIX_PR_SKILL_CONTENT },
1021
- { action: "status", content: GAUNTLET_STATUS_SKILL_CONTENT },
1022
- {
1023
- action: "help",
1024
- content: HELP_SKILL_BUNDLE.content,
1025
- references: HELP_SKILL_BUNDLE.references,
1026
- skillsOnly: true,
1027
- },
1028
- ] as const;
1029
-
1030
- type InstallLevel = "none" | "project" | "user";
1031
-
1032
- interface InitOptions {
1033
- yes?: boolean;
1034
- }
1035
-
1036
- interface InitConfig {
1037
- baseBranch: string;
1038
- sourceDir: string;
1039
- lintCmd: string | null; // null means not selected, empty string means selected but blank (TODO)
1040
- testCmd: string | null; // null means not selected, empty string means selected but blank (TODO)
1041
- selectedAdapters: CLIAdapter[];
1042
- }
1043
-
1044
- export function registerInitCommand(program: Command): void {
1045
- program
1046
- .command("init")
1047
- .description("Initialize .gauntlet configuration")
1048
- .option(
1049
- "-y, --yes",
1050
- "Skip prompts and use defaults (all available CLIs, source: ., no extra checks)",
1051
- )
1052
- .action(async (options: InitOptions) => {
1053
- const projectRoot = process.cwd();
1054
- const targetDir = path.join(projectRoot, ".gauntlet");
1055
-
1056
- if (await exists(targetDir)) {
1057
- console.log(chalk.yellow(".gauntlet directory already exists."));
1058
- return;
1059
- }
1060
-
1061
- // 1. CLI Detection
1062
- console.log("Detecting available CLI agents...");
1063
- const availableAdapters = await detectAvailableCLIs();
1064
-
1065
- if (availableAdapters.length === 0) {
1066
- console.log();
1067
- console.log(
1068
- chalk.red("Error: No CLI agents found. Install at least one:"),
1069
- );
1070
- console.log(
1071
- " - Claude: https://docs.anthropic.com/en/docs/claude-code",
1072
- );
1073
- console.log(" - Gemini: https://github.com/google-gemini/gemini-cli");
1074
- console.log(" - Codex: https://github.com/openai/codex");
1075
- console.log();
1076
- return;
1077
- }
1078
-
1079
- let config: InitConfig;
1080
-
1081
- if (options.yes) {
1082
- config = {
1083
- baseBranch: "origin/main",
1084
- sourceDir: ".",
1085
- lintCmd: null,
1086
- testCmd: null,
1087
- selectedAdapters: availableAdapters,
1088
- };
1089
- } else {
1090
- config = await promptForConfig(availableAdapters);
1091
- }
1092
-
1093
- // Create base config structure
1094
- await fs.mkdir(targetDir);
1095
- await fs.mkdir(path.join(targetDir, "checks"));
1096
- await fs.mkdir(path.join(targetDir, "reviews"));
1097
-
1098
- // 4. Commented Config Templates
1099
- // Generate config.yml
1100
- const configContent = generateConfigYml(config);
1101
- await fs.writeFile(path.join(targetDir, "config.yml"), configContent);
1102
- console.log(chalk.green("Created .gauntlet/config.yml"));
1103
-
1104
- // Generate check files if selected
1105
- if (config.lintCmd !== null) {
1106
- const lintContent = `name: lint
1107
- command: ${config.lintCmd || "# command: TODO - add your lint command (e.g., npm run lint)"}
1108
- # parallel: false
1109
- # run_in_ci: true
1110
- # run_locally: true
1111
- # timeout: 300
1112
- `;
1113
- await fs.writeFile(
1114
- path.join(targetDir, "checks", "lint.yml"),
1115
- lintContent,
1116
- );
1117
- console.log(chalk.green("Created .gauntlet/checks/lint.yml"));
1118
- }
1119
-
1120
- if (config.testCmd !== null) {
1121
- const testContent = `name: unit-tests
1122
- command: ${config.testCmd || "# command: TODO - add your test command (e.g., npm test)"}
1123
- # parallel: false
1124
- # run_in_ci: true
1125
- # run_locally: true
1126
- # timeout: 300
1127
- `;
1128
- await fs.writeFile(
1129
- path.join(targetDir, "checks", "unit-tests.yml"),
1130
- testContent,
1131
- );
1132
- console.log(chalk.green("Created .gauntlet/checks/unit-tests.yml"));
1133
- }
1134
-
1135
- // 5. Default code review (YAML config referencing built-in prompt)
1136
- const reviewYamlContent = `builtin: code-quality\nnum_reviews: 2\n`;
1137
- await fs.writeFile(
1138
- path.join(targetDir, "reviews", "code-quality.yml"),
1139
- reviewYamlContent,
1140
- );
1141
- console.log(chalk.green("Created .gauntlet/reviews/code-quality.yml"));
1142
-
1143
- // Copy status script bundle into .gauntlet/
1144
- await copyStatusScript(targetDir);
1145
-
1146
- // Build the commands list from skill definitions
1147
- const commands: SkillCommand[] = SKILL_DEFINITIONS.map((skill) => ({
1148
- action: skill.action,
1149
- content: skill.content,
1150
- ...("references" in skill ? { references: skill.references } : {}),
1151
- ...("skillsOnly" in skill ? { skillsOnly: skill.skillsOnly } : {}),
1152
- }));
1153
-
1154
- // Handle command installation
1155
- if (options.yes) {
1156
- // Default: install at project level for all selected agents (if they support it)
1157
- const adaptersToInstall = config.selectedAdapters.filter(
1158
- (a) =>
1159
- a.getProjectCommandDir() !== null ||
1160
- a.getProjectSkillDir() !== null,
1161
- );
1162
- if (adaptersToInstall.length > 0) {
1163
- await installCommands({
1164
- level: "project",
1165
- agentNames: adaptersToInstall.map((a) => a.name),
1166
- projectRoot,
1167
- commands,
1168
- });
1169
- }
1170
- } else {
1171
- // Interactive prompts
1172
- await promptAndInstallCommands({
1173
- projectRoot,
1174
- commands,
1175
- availableAdapters,
1176
- });
1177
- }
1178
-
1179
- // Handle stop hook installation (only in interactive mode)
1180
- if (!options.yes) {
1181
- await promptAndInstallStopHook(projectRoot);
1182
- }
1183
- });
1184
- }
1185
-
1186
- async function detectAvailableCLIs(): Promise<CLIAdapter[]> {
1187
- const allAdapters = getAllAdapters();
1188
- const available: CLIAdapter[] = [];
1189
-
1190
- for (const adapter of allAdapters) {
1191
- const isAvailable = await adapter.isAvailable();
1192
- if (isAvailable) {
1193
- console.log(chalk.green(` \u2713 ${adapter.name}`));
1194
- available.push(adapter);
1195
- } else {
1196
- console.log(chalk.dim(` \u2717 ${adapter.name} (not installed)`));
1197
- }
1198
- }
1199
- return available;
1200
- }
1201
-
1202
- async function promptForConfig(
1203
- availableAdapters: CLIAdapter[],
1204
- ): Promise<InitConfig> {
1205
- const rl = readline.createInterface({
1206
- input: process.stdin,
1207
- output: process.stdout,
1208
- });
1209
-
1210
- const question = (prompt: string): Promise<string> => {
1211
- return new Promise((resolve) => {
1212
- rl.question(prompt, (answer) => {
1213
- resolve(answer?.trim() ?? "");
1214
- });
1215
- });
1216
- };
1217
-
1218
- try {
1219
- // CLI Selection
1220
- console.log();
1221
- console.log("Which CLIs would you like to use?");
1222
- availableAdapters.forEach((adapter, i) => {
1223
- console.log(` ${i + 1}) ${adapter.name}`);
1224
- });
1225
- console.log(` ${availableAdapters.length + 1}) All`);
1226
-
1227
- let selectedAdapters: CLIAdapter[] = [];
1228
- let attempts = 0;
1229
- while (true) {
1230
- attempts++;
1231
- if (attempts > MAX_PROMPT_ATTEMPTS)
1232
- throw new Error("Too many invalid attempts");
1233
- const answer = await question(`(comma-separated, e.g., 1,2): `);
1234
- const selections = answer
1235
- .split(",")
1236
- .map((s) => s.trim())
1237
- .filter((s) => s);
1238
-
1239
- if (selections.length === 0) {
1240
- selectedAdapters = availableAdapters;
1241
- break;
1242
- }
1243
-
1244
- const chosen = parseSelections(selections, availableAdapters);
1245
- if (chosen) {
1246
- selectedAdapters = chosen;
1247
- break;
1248
- }
1249
- }
1250
-
1251
- // Base Branch
1252
- console.log();
1253
- const baseBranchInput = await question(
1254
- "Enter your base branch (e.g., origin/main, origin/develop) [default: origin/main]: ",
1255
- );
1256
- const baseBranch = baseBranchInput || "origin/main";
1257
-
1258
- // Source Directory
1259
- console.log();
1260
- const sourceDirInput = await question(
1261
- "Enter your source directory (e.g., src, lib, .) [default: .]: ",
1262
- );
1263
- const sourceDir = sourceDirInput || ".";
1264
-
1265
- // Lint Check
1266
- console.log();
1267
- const addLint = await question(
1268
- "Would you like to add a linting check? [y/N]: ",
1269
- );
1270
- let lintCmd: string | null = null;
1271
- if (addLint.toLowerCase().startsWith("y")) {
1272
- lintCmd = await question("Enter lint command (blank to fill later): ");
1273
- }
1274
-
1275
- // Unit Test Check
1276
- console.log();
1277
- const addTest = await question(
1278
- "Would you like to add a unit test check? [y/N]: ",
1279
- );
1280
- let testCmd: string | null = null;
1281
- if (addTest.toLowerCase().startsWith("y")) {
1282
- testCmd = await question("Enter test command (blank to fill later): ");
1283
- }
1284
-
1285
- rl.close();
1286
- return {
1287
- baseBranch,
1288
- sourceDir,
1289
- lintCmd,
1290
- testCmd,
1291
- selectedAdapters,
1292
- };
1293
- } catch (error) {
1294
- rl.close();
1295
- throw error;
1296
- }
1297
- }
1298
-
1299
- /**
1300
- * Parse numeric selections into adapter list. Returns null if any selection is invalid.
1301
- * Used by both CLI selection (returns adapters) and agent selection (caller maps to names).
1302
- */
1303
- function parseSelections(
1304
- selections: string[],
1305
- adapters: CLIAdapter[],
1306
- ): CLIAdapter[] | null {
1307
- const chosen: CLIAdapter[] = [];
1308
- for (const sel of selections) {
1309
- const num = parseInt(sel, 10);
1310
- if (Number.isNaN(num) || num < 1 || num > adapters.length + 1) {
1311
- console.log(chalk.yellow(`Invalid selection: ${sel}`));
1312
- return null;
1313
- }
1314
- if (num === adapters.length + 1) {
1315
- chosen.push(...adapters);
1316
- } else {
1317
- const adapter = adapters[num - 1];
1318
- if (adapter) chosen.push(adapter);
1319
- }
1320
- }
1321
- return [...new Set(chosen)];
1322
- }
1323
-
1324
- function generateConfigYml(config: InitConfig): string {
1325
- const cliList = config.selectedAdapters
1326
- .map((a) => ` - ${a.name}`)
1327
- .join("\n");
1328
-
1329
- let entryPoints = "";
1330
-
1331
- // If we have checks, we need a source directory entry point
1332
- if (config.lintCmd !== null || config.testCmd !== null) {
1333
- entryPoints += ` - path: "${config.sourceDir}"
1334
- checks:\n`;
1335
- if (config.lintCmd !== null) entryPoints += ` - lint\n`;
1336
- if (config.testCmd !== null) entryPoints += ` - unit-tests\n`;
1337
- }
1338
-
1339
- // Always include root entry point for reviews
1340
- entryPoints += ` - path: "."
1341
- reviews:
1342
- - code-quality`;
1343
-
1344
- return `base_branch: ${config.baseBranch}
1345
- log_dir: gauntlet_logs
1346
-
1347
- # Run gates in parallel when possible (default: true)
1348
- # allow_parallel: true
1349
-
1350
- cli:
1351
- default_preference:
1352
- ${cliList}
1353
-
1354
- entry_points:
1355
- ${entryPoints}
1356
- `;
1357
- }
1358
-
1359
- /**
1360
- * Copy the status script bundle into .gauntlet/skills/gauntlet/status/scripts/.
1361
- * The script is sourced from the package's src/scripts/status.ts.
1362
- */
1363
- async function copyStatusScript(targetDir: string): Promise<void> {
1364
- const statusScriptDir = path.join(
1365
- targetDir,
1366
- "skills",
1367
- "gauntlet",
1368
- "status",
1369
- "scripts",
1370
- );
1371
- const statusScriptPath = path.join(statusScriptDir, "status.ts");
1372
- await fs.mkdir(statusScriptDir, { recursive: true });
1373
-
1374
- if (await exists(statusScriptPath)) return;
1375
-
1376
- const bundledScript = path.join(
1377
- path.dirname(new URL(import.meta.url).pathname),
1378
- "..",
1379
- "scripts",
1380
- "status.ts",
1381
- );
1382
- if (await exists(bundledScript)) {
1383
- await fs.copyFile(bundledScript, statusScriptPath);
1384
- console.log(
1385
- chalk.green("Created .gauntlet/skills/gauntlet/status/scripts/status.ts"),
1386
- );
1387
- } else {
1388
- console.log(
1389
- chalk.yellow(
1390
- "Warning: bundled status script not found; /gauntlet-status may fail.",
1391
- ),
1392
- );
1393
- }
1394
- }
1395
-
1396
- interface PromptAndInstallOptions {
1397
- projectRoot: string;
1398
- commands: SkillCommand[];
1399
- availableAdapters: CLIAdapter[];
1400
- }
1401
-
1402
- /**
1403
- * Prompt the user to select an install level (none, project, user).
1404
- */
1405
- async function promptInstallLevel(
1406
- questionFn: (prompt: string) => Promise<string>,
1407
- ): Promise<InstallLevel> {
1408
- console.log("Where would you like to install the /gauntlet command?");
1409
- console.log(" 1) Don't install commands");
1410
- console.log(
1411
- " 2) Project level (in this repo's .claude/skills, .gemini/commands, etc.)",
1412
- );
1413
- console.log(
1414
- " 3) User level (in ~/.claude/skills, ~/.gemini/commands, etc.)",
1415
- );
1416
- console.log();
1417
-
1418
- let answer = await questionFn("Select option [1-3]: ");
1419
- let attempts = 0;
1420
-
1421
- while (true) {
1422
- attempts++;
1423
- if (attempts > MAX_PROMPT_ATTEMPTS)
1424
- throw new Error("Too many invalid attempts");
1425
-
1426
- if (answer === "1") return "none";
1427
- if (answer === "2") return "project";
1428
- if (answer === "3") return "user";
1429
-
1430
- console.log(chalk.yellow("Please enter 1, 2, or 3"));
1431
- answer = await questionFn("Select option [1-3]: ");
1432
- }
1433
- }
1434
-
1435
- /**
1436
- * Prompt the user to select which agents to install commands for.
1437
- * Returns the selected agent names (deduplicated).
1438
- */
1439
- async function promptAgentSelection(
1440
- questionFn: (prompt: string) => Promise<string>,
1441
- installableAdapters: CLIAdapter[],
1442
- ): Promise<string[]> {
1443
- console.log();
1444
- console.log("Which CLI agents would you like to install the command for?");
1445
- installableAdapters.forEach((adapter, i) => {
1446
- console.log(` ${i + 1}) ${adapter.name}`);
1447
- });
1448
- console.log(` ${installableAdapters.length + 1}) All of the above`);
1449
- console.log();
1450
-
1451
- const promptText = `Select options (comma-separated, e.g., 1,2 or ${installableAdapters.length + 1} for all): `;
1452
- let answer = await questionFn(promptText);
1453
- let attempts = 0;
1454
-
1455
- while (true) {
1456
- attempts++;
1457
- if (attempts > MAX_PROMPT_ATTEMPTS)
1458
- throw new Error("Too many invalid attempts");
1459
-
1460
- const selections = answer
1461
- .split(",")
1462
- .map((s) => s.trim())
1463
- .filter((s) => s);
1464
-
1465
- if (selections.length === 0) {
1466
- console.log(chalk.yellow("Please select at least one option"));
1467
- answer = await questionFn(promptText);
1468
- continue;
1469
- }
1470
-
1471
- const chosen = parseSelections(selections, installableAdapters);
1472
- if (chosen) return chosen.map((a) => a.name);
1473
-
1474
- answer = await questionFn(promptText);
1475
- }
1476
- }
1477
-
1478
- async function promptAndInstallCommands(
1479
- options: PromptAndInstallOptions,
1480
- ): Promise<void> {
1481
- const { projectRoot, commands, availableAdapters } = options;
1482
- if (availableAdapters.length === 0) return;
1483
-
1484
- const rl = readline.createInterface({
1485
- input: process.stdin,
1486
- output: process.stdout,
1487
- });
1488
-
1489
- const question = (prompt: string): Promise<string> => {
1490
- return new Promise((resolve) => {
1491
- rl.question(prompt, (answer) => {
1492
- resolve(answer?.trim() ?? "");
1493
- });
1494
- });
1495
- };
1496
-
1497
- try {
1498
- console.log();
1499
- console.log(chalk.bold("CLI Agent Command Setup"));
1500
- console.log(
1501
- chalk.dim(
1502
- "The gauntlet command can be installed for CLI agents so you can run /gauntlet directly.",
1503
- ),
1504
- );
1505
- console.log();
1506
-
1507
- const installLevel = await promptInstallLevel(question);
1508
-
1509
- if (installLevel === "none") {
1510
- console.log(chalk.dim("\nSkipping command installation."));
1511
- rl.close();
1512
- return;
1513
- }
1514
-
1515
- const installableAdapters =
1516
- installLevel === "project"
1517
- ? availableAdapters.filter(
1518
- (a) =>
1519
- a.getProjectCommandDir() !== null ||
1520
- a.getProjectSkillDir() !== null,
1521
- )
1522
- : availableAdapters.filter(
1523
- (a) =>
1524
- a.getUserCommandDir() !== null || a.getUserSkillDir() !== null,
1525
- );
1526
-
1527
- if (installableAdapters.length === 0) {
1528
- console.log(
1529
- chalk.yellow(
1530
- `No available agents support ${installLevel}-level commands.`,
1531
- ),
1532
- );
1533
- rl.close();
1534
- return;
1535
- }
1536
-
1537
- const selectedAgents = await promptAgentSelection(
1538
- question,
1539
- installableAdapters,
1540
- );
1541
-
1542
- rl.close();
1543
-
1544
- await installCommands({
1545
- level: installLevel,
1546
- agentNames: selectedAgents,
1547
- projectRoot,
1548
- commands,
1549
- });
1550
- } catch (error: unknown) {
1551
- rl.close();
1552
- throw error;
1553
- }
1554
- }
1555
-
1556
- /**
1557
- * A skill/command to be installed.
1558
- */
1559
- interface SkillCommand {
1560
- /** The skill action name (e.g., "run", "check", "push-pr"). */
1561
- action: string;
1562
- /** The Markdown content (with YAML frontmatter). */
1563
- content: string;
1564
- /** Optional reference files to install alongside SKILL.md (skills-only). */
1565
- references?: Record<string, string>;
1566
- /** If true, this skill is only installed for skills-capable adapters (not flat commands). */
1567
- skillsOnly?: boolean;
1568
- }
1569
-
1570
- interface InstallContext {
1571
- isUserLevel: boolean;
1572
- projectRoot: string;
1573
- }
1574
-
1575
- interface InstallCommandsOptions {
1576
- level: InstallLevel;
1577
- agentNames: string[];
1578
- projectRoot: string;
1579
- commands: SkillCommand[];
1580
- }
1581
-
1582
- /**
1583
- * Install a single skill for Claude as a SKILL.md in a nested directory.
1584
- */
1585
- async function installSkill(
1586
- skillDir: string,
1587
- ctx: InstallContext,
1588
- command: SkillCommand,
1589
- ): Promise<void> {
1590
- const actionDir = path.join(skillDir, `gauntlet-${command.action}`);
1591
- const skillPath = path.join(actionDir, "SKILL.md");
1592
-
1593
- await fs.mkdir(actionDir, { recursive: true });
1594
-
1595
- if (await exists(skillPath)) {
1596
- const relPath = ctx.isUserLevel
1597
- ? skillPath
1598
- : path.relative(ctx.projectRoot, skillPath);
1599
- console.log(chalk.dim(` claude: ${relPath} already exists, skipping`));
1600
- return;
1601
- }
1602
-
1603
- await fs.writeFile(skillPath, command.content);
1604
- const relPath = ctx.isUserLevel
1605
- ? skillPath
1606
- : path.relative(ctx.projectRoot, skillPath);
1607
- console.log(chalk.green(`Created ${relPath}`));
1608
-
1609
- // Install reference files if present
1610
- if (command.references) {
1611
- const refsDir = path.join(actionDir, "references");
1612
- await fs.mkdir(refsDir, { recursive: true });
1613
- for (const [fileName, fileContent] of Object.entries(command.references)) {
1614
- const refPath = path.join(refsDir, fileName);
1615
- if (await exists(refPath)) continue;
1616
- await fs.writeFile(refPath, fileContent);
1617
- const refRelPath = ctx.isUserLevel
1618
- ? refPath
1619
- : path.relative(ctx.projectRoot, refPath);
1620
- console.log(chalk.green(`Created ${refRelPath}`));
1621
- }
1622
- }
1623
- }
1624
-
1625
- /**
1626
- * Install a single flat command file for a non-Claude adapter.
1627
- * Uses the "gauntlet" name prefix for non-namespaced agents.
1628
- */
1629
- async function installFlatCommand(
1630
- adapter: CLIAdapter,
1631
- commandDir: string,
1632
- ctx: InstallContext,
1633
- command: SkillCommand,
1634
- ): Promise<void> {
1635
- // Non-Claude agents get flat files named "gauntlet" (for run) or the action name
1636
- const name = command.action === "run" ? "gauntlet" : command.action;
1637
- const fileName = `${name}${adapter.getCommandExtension()}`;
1638
- const filePath = path.join(commandDir, fileName);
1639
-
1640
- if (await exists(filePath)) {
1641
- const relPath = ctx.isUserLevel
1642
- ? filePath
1643
- : path.relative(ctx.projectRoot, filePath);
1644
- console.log(
1645
- chalk.dim(` ${adapter.name}: ${relPath} already exists, skipping`),
1646
- );
1647
- return;
1648
- }
1649
-
1650
- const transformedContent = adapter.transformCommand(command.content);
1651
- await fs.writeFile(filePath, transformedContent);
1652
- const relPath = ctx.isUserLevel
1653
- ? filePath
1654
- : path.relative(ctx.projectRoot, filePath);
1655
- console.log(chalk.green(`Created ${relPath}`));
1656
- }
1657
-
1658
- /**
1659
- * Install skills for a skills-capable adapter (e.g., Claude).
1660
- */
1661
- async function installSkillsForAdapter(
1662
- adapter: CLIAdapter,
1663
- skillDir: string,
1664
- ctx: InstallContext,
1665
- commands: SkillCommand[],
1666
- ): Promise<void> {
1667
- const resolvedSkillDir = ctx.isUserLevel
1668
- ? skillDir
1669
- : path.join(ctx.projectRoot, skillDir);
1670
- try {
1671
- for (const command of commands) {
1672
- await installSkill(resolvedSkillDir, ctx, command);
1673
- }
1674
- } catch (error: unknown) {
1675
- const err = error as { message?: string };
1676
- console.log(
1677
- chalk.yellow(
1678
- ` ${adapter.name}: Could not create skill - ${err.message}`,
1679
- ),
1680
- );
1681
- }
1682
- }
1683
-
1684
- /**
1685
- * Install flat command files for a non-skills adapter.
1686
- */
1687
- async function installFlatCommandsForAdapter(
1688
- adapter: CLIAdapter,
1689
- commandDir: string,
1690
- ctx: InstallContext,
1691
- commands: SkillCommand[],
1692
- ): Promise<void> {
1693
- const resolvedCommandDir = ctx.isUserLevel
1694
- ? commandDir
1695
- : path.join(ctx.projectRoot, commandDir);
1696
- try {
1697
- await fs.mkdir(resolvedCommandDir, { recursive: true });
1698
- // Non-Claude agents only get run, push-pr, and fix-pr (not check/status/help)
1699
- const flatCommands = commands.filter(
1700
- (c) => c.action !== "check" && c.action !== "status" && !c.skillsOnly,
1701
- );
1702
- for (const command of flatCommands) {
1703
- await installFlatCommand(adapter, resolvedCommandDir, ctx, command);
1704
- }
1705
- } catch (error: unknown) {
1706
- const err = error as { message?: string };
1707
- console.log(
1708
- chalk.yellow(
1709
- ` ${adapter.name}: Could not create command - ${err.message}`,
1710
- ),
1711
- );
1712
- }
1713
- }
1714
-
1715
- async function installCommands(options: InstallCommandsOptions): Promise<void> {
1716
- const { level, agentNames, projectRoot, commands } = options;
1717
- if (level === "none" || agentNames.length === 0) return;
1718
-
1719
- console.log();
1720
- const allAdapters = getAllAdapters();
1721
-
1722
- const isUserLevel = level === "user";
1723
- const ctx: InstallContext = { isUserLevel, projectRoot };
1724
-
1725
- for (const agentName of agentNames) {
1726
- const adapter = allAdapters.find((a) => a.name === agentName);
1727
- if (!adapter) continue;
1728
-
1729
- const skillDir = isUserLevel
1730
- ? adapter.getUserSkillDir()
1731
- : adapter.getProjectSkillDir();
1732
-
1733
- if (skillDir) {
1734
- await installSkillsForAdapter(adapter, skillDir, ctx, commands);
1735
- continue;
1736
- }
1737
-
1738
- const commandDir = isUserLevel
1739
- ? adapter.getUserCommandDir()
1740
- : adapter.getProjectCommandDir();
1741
- if (!commandDir) continue;
1742
-
1743
- await installFlatCommandsForAdapter(adapter, commandDir, ctx, commands);
1744
- }
1745
- }
1746
-
1747
- /**
1748
- * The stop hook configuration for Claude Code.
1749
- */
1750
- const STOP_HOOK_CONFIG = {
1751
- hooks: {
1752
- Stop: [
1753
- {
1754
- hooks: [
1755
- {
1756
- type: "command",
1757
- command: "agent-gauntlet stop-hook",
1758
- timeout: 300,
1759
- },
1760
- ],
1761
- },
1762
- ],
1763
- },
1764
- };
1765
-
1766
- /**
1767
- * Check if running in an interactive TTY environment.
1768
- */
1769
- function isInteractive(): boolean {
1770
- return Boolean(process.stdin.isTTY);
1771
- }
1772
-
1773
- /**
1774
- * Prompt user to install the Claude Code stop hook.
1775
- */
1776
- async function promptAndInstallStopHook(projectRoot: string): Promise<void> {
1777
- // Skip in non-interactive mode
1778
- if (!isInteractive()) {
1779
- return;
1780
- }
1781
-
1782
- const rl = readline.createInterface({
1783
- input: process.stdin,
1784
- output: process.stdout,
1785
- });
1786
-
1787
- const question = (prompt: string): Promise<string> => {
1788
- return new Promise((resolve) => {
1789
- rl.question(prompt, (answer) => {
1790
- resolve(answer?.trim() ?? "");
1791
- });
1792
- });
1793
- };
1794
-
1795
- try {
1796
- console.log();
1797
- const answer = await question("Install Claude Code stop hook? (y/n): ");
1798
-
1799
- const shouldInstall =
1800
- answer.toLowerCase() === "y" || answer.toLowerCase() === "yes";
1801
-
1802
- if (!shouldInstall) {
1803
- rl.close();
1804
- return;
1805
- }
1806
-
1807
- rl.close();
1808
- await installStopHook(projectRoot);
1809
- } catch (error: unknown) {
1810
- rl.close();
1811
- throw error;
1812
- }
1813
- }
1814
-
1815
- /**
1816
- * Install the stop hook configuration to .claude/settings.local.json.
1817
- */
1818
- export async function installStopHook(projectRoot: string): Promise<void> {
1819
- const claudeDir = path.join(projectRoot, ".claude");
1820
- const settingsPath = path.join(claudeDir, "settings.local.json");
1821
-
1822
- // Ensure .claude directory exists
1823
- await fs.mkdir(claudeDir, { recursive: true });
1824
-
1825
- let existingSettings: Record<string, unknown> = {};
1826
-
1827
- // Check if settings.local.json already exists
1828
- if (await exists(settingsPath)) {
1829
- try {
1830
- const content = await fs.readFile(settingsPath, "utf-8");
1831
- existingSettings = JSON.parse(content);
1832
- } catch {
1833
- // If parsing fails, start fresh
1834
- existingSettings = {};
1835
- }
1836
- }
1837
-
1838
- // Merge hooks configuration
1839
- const existingHooks =
1840
- (existingSettings.hooks as Record<string, unknown>) || {};
1841
- const existingStopHooks = Array.isArray(existingHooks.Stop)
1842
- ? existingHooks.Stop
1843
- : [];
1844
-
1845
- // Check if stop hook already exists to avoid duplicates
1846
- const hookExists = existingStopHooks.some((hook: unknown) =>
1847
- (hook as { hooks?: { command?: string }[] })?.hooks?.some?.(
1848
- (h) => h?.command === "agent-gauntlet stop-hook",
1849
- ),
1850
- );
1851
- if (hookExists) {
1852
- console.log(chalk.dim("Stop hook already installed"));
1853
- return;
1854
- }
1855
-
1856
- // Add our stop hook to the existing Stop hooks
1857
- const newStopHooks = [...existingStopHooks, ...STOP_HOOK_CONFIG.hooks.Stop];
1858
-
1859
- const mergedSettings = {
1860
- ...existingSettings,
1861
- hooks: {
1862
- ...existingHooks,
1863
- Stop: newStopHooks,
1864
- },
1865
- };
1866
-
1867
- // Write with pretty formatting
1868
- await fs.writeFile(
1869
- settingsPath,
1870
- `${JSON.stringify(mergedSettings, null, 2)}\n`,
1871
- );
1872
-
1873
- console.log(
1874
- chalk.green(
1875
- "Stop hook installed - gauntlet will run automatically when agent stops",
1876
- ),
1877
- );
1878
- }