agent-gauntlet 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -7,7 +7,7 @@ import { Command } from "commander";
7
7
  // package.json
8
8
  var package_default = {
9
9
  name: "agent-gauntlet",
10
- version: "0.11.0",
10
+ version: "0.12.0",
11
11
  description: "A CLI tool for testing AI coding agents",
12
12
  license: "Apache-2.0",
13
13
  author: "Paul Caplan",
@@ -5383,12 +5383,14 @@ async function shouldAutoClean(logDir, baseBranch) {
5383
5383
  } catch {
5384
5384
  return { clean: false };
5385
5385
  }
5386
- try {
5387
- const isMerged = await isCommitInBranch(state.commit, baseBranch);
5388
- if (isMerged) {
5389
- return { clean: true, reason: "commit merged", resetState: true };
5390
- }
5391
- } catch {}
5386
+ if (!state.working_tree_ref || state.working_tree_ref === state.commit) {
5387
+ try {
5388
+ const isMerged = await isCommitInBranch(state.commit, baseBranch);
5389
+ if (isMerged) {
5390
+ return { clean: true, reason: "commit merged", resetState: true };
5391
+ }
5392
+ } catch {}
5393
+ }
5392
5394
  return { clean: false };
5393
5395
  }
5394
5396
  async function performAutoClean(logDir, result, maxPreviousLogs = 3) {
@@ -6632,10 +6634,17 @@ function registerHelpCommand(program) {
6632
6634
  });
6633
6635
  }
6634
6636
  // src/commands/init.ts
6637
+ import { readFileSync } from "node:fs";
6635
6638
  import fs25 from "node:fs/promises";
6636
6639
  import path23 from "node:path";
6637
6640
  import readline from "node:readline";
6641
+ import { fileURLToPath } from "node:url";
6638
6642
  import chalk9 from "chalk";
6643
+ var __dirname2 = path23.dirname(fileURLToPath(import.meta.url));
6644
+ function readSkillTemplate(filename) {
6645
+ const templatePath = path23.join(__dirname2, "skill-templates", filename);
6646
+ return readFileSync(templatePath, "utf-8");
6647
+ }
6639
6648
  var MAX_PROMPT_ATTEMPTS = 10;
6640
6649
  function makeQuestion(rl) {
6641
6650
  return (prompt) => new Promise((resolve) => rl.question(prompt, (a) => resolve(a?.trim() ?? "")));
@@ -6725,883 +6734,22 @@ ${steps.join(`
6725
6734
  }
6726
6735
  var GAUNTLET_RUN_SKILL_CONTENT = buildGauntletSkillContent("run");
6727
6736
  var GAUNTLET_CHECK_SKILL_CONTENT = buildGauntletSkillContent("check");
6728
- var PUSH_PR_SKILL_CONTENT = `---
6729
- name: gauntlet-push-pr
6730
- description: Commit changes, push to remote, and create or update a pull request
6731
- disable-model-invocation: true
6732
- allowed-tools: Bash
6733
- ---
6734
-
6735
- # /gauntlet-push-pr
6736
- Commit all changes, push to remote, and create or update a pull request for the current branch.
6737
-
6738
- After the PR is created or updated, verify it exists by running \`gh pr view\`.
6739
- `;
6740
- var FIX_PR_SKILL_CONTENT = `---
6741
- name: gauntlet-fix-pr
6742
- description: Fix CI failures or address review comments on a pull request
6743
- disable-model-invocation: true
6744
- allowed-tools: Bash
6745
- ---
6746
-
6747
- # /gauntlet-fix-pr
6748
- Fix CI failures or address review comments on the current pull request.
6749
-
6750
- 1. Check CI status and review comments: \`gh pr checks\` and \`gh pr view --comments\`
6751
- 2. Fix any failing checks or address reviewer feedback
6752
- 3. Commit and push your changes
6753
- 4. After pushing, verify the PR is updated: \`gh pr view\`
6754
- `;
6755
- var GAUNTLET_STATUS_SKILL_CONTENT = `---
6756
- name: gauntlet-status
6757
- description: Show a summary of the most recent gauntlet session
6758
- disable-model-invocation: true
6759
- allowed-tools: Bash, Read
6760
- ---
6761
-
6762
- # /gauntlet-status
6763
- Show a detailed summary of the most recent gauntlet session.
6764
-
6765
- ## Step 1: Run the status script
6766
-
6767
- \`\`\`bash
6768
- bun .gauntlet/skills/gauntlet/status/scripts/status.ts 2>&1
6769
- \`\`\`
6770
-
6771
- The script parses the \`.debug.log\` for session-level data (run count, gate results, pass/fail status) and lists all log files with their paths and sizes.
6772
-
6773
- ## Step 2: Read failed gate details
6774
-
6775
- For each gate marked **FAIL** in the Gate Results table, read the corresponding log files to extract failure details:
6776
-
6777
- - **Check failures** (e.g., \`check:src:code-health\`): Read the matching \`check_*.log\` file. Check log formats vary by tool (linters, test runners, code health analyzers) — read the file and extract the relevant error/warning output.
6778
- - **Review failures** (e.g., \`review:.:code-quality\`): Read the matching \`review_*.json\` file(s). These contain structured violation data with \`file\`, \`line\`, \`issue\`, \`priority\`, and \`status\` fields.
6779
-
6780
- Use the file paths from the "Log Files" section of the script output. Match gate IDs to file names: \`check:.:lint\` corresponds to \`check_._lint.*.log\`, \`review:.:code-quality\` corresponds to \`review_._code-quality_*.{log,json}\`.
6781
-
6782
- ## Step 3: Present the results
6783
-
6784
- Combine the script's session summary with the detailed failure information into a comprehensive report:
6785
-
6786
- 1. Session overview (status, iterations, duration, fixed/skipped/failed counts)
6787
- 2. Gate results table
6788
- 3. For any failed gates: the specific errors, violations, or test failures from the log files
6789
- 4. For reviews with violations: list each violation with file, line, issue, priority, and current status (fixed/skipped/outstanding)
6790
- `;
6791
- function buildHelpSkillBundle() {
6792
- const content = `---
6793
- name: gauntlet-help
6794
- description: Diagnose and explain gauntlet behavior using runtime evidence
6795
- allowed-tools: Bash, Read, Glob, Grep
6796
- ---
6797
-
6798
- # /gauntlet-help
6799
-
6800
- Evidence-based diagnosis of gauntlet behavior. This skill is **diagnosis-only** — it explains what happened and why, but does not auto-fix issues. It operates from **runtime artifacts and CLI outputs**, not source code.
6801
-
6802
- ## Diagnostic Workflow
6803
-
6804
- Follow this order for every diagnostic question:
6805
-
6806
- 1. **Resolve \`log_dir\`**: Read \`.gauntlet/config.yml\` and extract the \`log_dir\` field (default: \`gauntlet_logs\`). All log paths below are relative to \`<log_dir>/\`.
6807
- 2. **Passive evidence first**: Read files before running commands.
6808
- - \`<log_dir>/.debug.log\` — timestamped event log (commands, gate results, state changes, errors)
6809
- - \`<log_dir>/.execution_state\` — JSON with \`last_run_completed_at\`, \`branch\`, \`commit\`, \`working_tree_ref\`, and \`unhealthy_adapters\` (adapter name → \`{marked_at, reason}\`)
6810
- - \`<log_dir>/console.*.log\` — console output per run (highest number = latest)
6811
- - \`<log_dir>/check_*.log\` — check gate output
6812
- - \`<log_dir>/review_*.json\` — review gate results with violations (\`file\`, \`line\`, \`issue\`, \`fix\`, \`priority\`, \`status\`)
6813
- - \`.gauntlet/config.yml\` — project configuration
6814
- 3. **Active evidence when needed**: Run CLI commands only when passive evidence is insufficient for a confident diagnosis.
6815
- 4. **Explain with evidence**: Clearly distinguish confirmed findings from inference.
6816
-
6817
- ## Evidence Sources
6818
-
6819
- | Source | What It Confirms |
6820
- |--------|-----------------|
6821
- | \`.gauntlet/config.yml\` | \`log_dir\`, \`base_branch\`, \`entry_points\`, \`cli.default_preference\`, \`stop_hook\` settings, \`max_retries\`, \`rerun_new_issue_threshold\` |
6822
- | \`<log_dir>/.debug.log\` | Timestamped event history: commands executed, gate results, state transitions, errors |
6823
- | \`<log_dir>/.execution_state\` | Last successful run timestamp, branch/commit at that time, working tree stash ref, unhealthy adapter cooldowns |
6824
- | \`<log_dir>/console.*.log\` | Human-readable output from each run iteration |
6825
- | \`<log_dir>/check_*.log\` | Raw output from check gate commands (linters, test runners, etc.) |
6826
- | \`<log_dir>/review_*.json\` | Structured review violations with file, line, issue, priority, and resolution status |
6827
- | \`<log_dir>/.gauntlet-run.lock\` | Lock file (contains PID) — present only during active execution |
6828
- | \`<log_dir>/.stop-hook-active\` | Marker file (contains PID) — present only during active stop-hook execution |
6829
- | \`<log_dir>/.ci-wait-attempts\` | CI wait attempt counter |
6830
-
6831
- ## CLI Command Quick-Reference
6832
-
6833
- Use these only when passive evidence is insufficient:
6834
-
6835
- | Command | When to Use |
6836
- |---------|-------------|
6837
- | \`agent-gauntlet list\` | See configured gates and entry points |
6838
- | \`agent-gauntlet health\` | Check adapter availability and health status |
6839
- | \`agent-gauntlet detect\` | See which files changed and which gates would apply |
6840
- | \`agent-gauntlet validate\` | Validate config.yml syntax and schema |
6841
- | \`agent-gauntlet clean\` | Archive current logs and reset state (destructive — confirm with user first) |
6842
-
6843
- ## Routing Logic
6844
-
6845
- Based on the user's question, load the appropriate reference file for detailed guidance:
6846
-
6847
- | Question Domain | Reference File |
6848
- |----------------|---------------|
6849
- | Stop hook blocked/allowed, hook statuses, recursion, timing | \`references/stop-hook-troubleshooting.md\` |
6850
- | Missing config, YAML errors, misconfiguration, init problems | \`references/config-troubleshooting.md\` |
6851
- | Check failures, review failures, no_changes, no_applicable_gates, rerun mode | \`references/gate-troubleshooting.md\` |
6852
- | Lock conflict, stale locks, parallel runs, cleanup | \`references/lock-troubleshooting.md\` |
6853
- | Adapter health, missing tools, usage limits, cooldown | \`references/adapter-troubleshooting.md\` |
6854
- | PR push, CI status, auto_push_pr, auto_fix_pr, CI wait | \`references/ci-pr-troubleshooting.md\` |
6855
-
6856
- If the question spans multiple domains, load each relevant reference.
6857
-
6858
- ## Output Contract
6859
-
6860
- Every diagnostic response MUST include these sections:
6861
-
6862
- ### Diagnosis
6863
- What happened and why, stated clearly.
6864
-
6865
- ### Evidence
6866
- Specific files read, field values observed, and command outputs that support the diagnosis. Quote relevant log lines or config values.
6867
-
6868
- ### Confidence
6869
- One of:
6870
- - **High** — diagnosis is fully supported by direct evidence
6871
- - **Medium** — diagnosis is likely but some evidence is missing or ambiguous
6872
- - **Low** — diagnosis is inferred; key evidence is unavailable
6873
-
6874
- Downgrade confidence when:
6875
- - \`.debug.log\` or \`.execution_state\` is missing or empty
6876
- - Log files referenced in output don't exist
6877
- - Config values can't be verified
6878
- - CLI commands fail or return unexpected output
6879
-
6880
- ### Next Steps
6881
- Actionable recommendations for the user. If confidence is not high, suggest what additional evidence would confirm the diagnosis.
6882
- `;
6883
- const references = {
6884
- "stop-hook-troubleshooting.md": `# Stop Hook Troubleshooting
6885
-
6886
- ## All Stop-Hook Statuses
6887
-
6888
- ### Allowing Statuses (stop is permitted)
6889
-
6890
- | Status | Message | Meaning |
6891
- |--------|---------|---------|
6892
- | \`passed\` | All gates completed successfully | Every configured check and review gate passed |
6893
- | \`passed_with_warnings\` | Passed with warnings (some issues were skipped) | Gates ran but some review violations were skipped rather than fixed |
6894
- | \`no_applicable_gates\` | No applicable gates matched current changes | Changed files didn't match any configured entry point |
6895
- | \`no_changes\` | No changes detected | No files changed relative to \`base_branch\` |
6896
- | \`ci_passed\` | CI passed — all checks completed and no blocking reviews | GitHub CI checks succeeded and no \`CHANGES_REQUESTED\` reviews |
6897
- | \`no_config\` | Not a gauntlet project — no \`.gauntlet/config.yml\` found | No gauntlet configuration in this repo |
6898
- | \`stop_hook_active\` | Stop hook cycle detected — allowing stop to prevent infinite loop | Recursion prevention triggered |
6899
- | \`stop_hook_disabled\` | Stop hook is disabled via configuration | \`stop_hook.enabled: false\` in config or \`GAUNTLET_STOP_HOOK_ENABLED=false\` |
6900
- | \`interval_not_elapsed\` | Run interval not elapsed | \`stop_hook.run_interval_minutes\` hasn't elapsed since last run |
6901
- | \`invalid_input\` | Invalid hook input — could not parse JSON | Stop-hook couldn't parse stdin JSON from the IDE |
6902
- | \`lock_conflict\` | Another gauntlet run is already in progress | Lock file exists with a live PID |
6903
- | \`error\` | Stop hook error | Unexpected error during execution |
6904
- | \`retry_limit_exceeded\` | Retry limit exceeded | Max retries (default 3) exhausted; requires \`agent-gauntlet clean\` |
6905
-
6906
- ### Blocking Statuses (stop is prevented)
6907
-
6908
- | Status | Message | Meaning |
6909
- |--------|---------|---------|
6910
- | \`failed\` | Issues must be fixed before stopping | One or more gates failed; agent must fix and re-run |
6911
- | \`pr_push_required\` | PR needs to be created or updated before stopping | Gates passed but \`auto_push_pr\` is enabled and PR hasn't been pushed |
6912
- | \`ci_pending\` | CI checks still running — waiting for completion | Waiting for GitHub CI to finish |
6913
- | \`ci_failed\` | CI failed or review changes requested | GitHub CI checks failed or a reviewer requested changes |
6914
-
6915
- ## Common Scenarios
6916
-
6917
- ### "The hook blocked my stop"
6918
- 1. Check the status in \`.debug.log\` — search for \`status:\` entries
6919
- 2. If \`failed\`: Read the gate output files listed in \`.debug.log\` or the latest \`console.*.log\`
6920
- 3. If \`pr_push_required\`: The agent needs to commit, push, and create a PR
6921
- 4. If \`ci_pending\`: CI is still running; the hook will re-check on next stop attempt
6922
- 5. If \`ci_failed\`: Read CI failure details — run \`agent-gauntlet wait-ci\` or check \`gh pr checks\`
6923
-
6924
- ### "The hook allowed but shouldn't have"
6925
- 1. Check if the status was \`no_changes\` — verify \`base_branch\` is correct in \`config.yml\`
6926
- 2. Check if \`no_applicable_gates\` — run \`agent-gauntlet detect\` to see which files changed and which gates match
6927
- 3. Check if \`interval_not_elapsed\` — the run was skipped because \`run_interval_minutes\` hadn't elapsed
6928
- 4. Check if \`stop_hook_disabled\` — verify \`stop_hook.enabled\` in config and \`GAUNTLET_STOP_HOOK_ENABLED\` env var
6929
-
6930
- ### "The gauntlet isn't running gates / keeps allowing stops immediately"
6931
- This happens when the iteration counter is inherited from a previous session's failures. Symptoms:
6932
- 1. \`.debug.log\` shows \`RUN_START\` followed immediately by \`RUN_END\` with \`duration=0.0s\`
6933
- 2. \`iterations\` value is high (e.g., 7, 8, 9) even though the current session hasn't run that many times
6934
- 3. Stop-hook returns \`retry_limit_exceeded\` without executing any gates
6935
- 4. \`failed=0\` in \`RUN_END\` (no gates ran, so none failed — but status is still \`fail\`)
6936
-
6937
- **Root cause**: The iteration counter persists in \`.execution_state\` across sessions. If a previous session ended with unresolved failures and hit the retry limit, the counter carries over. The next session enters verification mode and immediately exceeds the limit.
6938
-
6939
- **Fix**: Run \`agent-gauntlet clean\` to reset the state and iteration counter, then re-run.
6940
-
6941
- **Prevention**: Before starting a new task, check if the previous session left failures behind. If \`.debug.log\` shows a recent \`STOP_HOOK decision=block reason=failed\` or \`retry_limit_exceeded\`, clean state first.
6942
-
6943
- ### "The hook seems stuck"
6944
- 1. Check for \`.stop-hook-active\` marker in \`<log_dir>/\` — if present, a stop-hook may be running
6945
- 2. Check PID in the marker file — is that process alive?
6946
- 3. The stop-hook has a **5-minute hard timeout** (\`STOP_HOOK_TIMEOUT_MS\`) and will self-terminate
6947
- 4. Stale marker files older than **10 minutes** are automatically cleaned up on next invocation
6948
-
6949
- ## Recursion Prevention
6950
-
6951
- The stop-hook uses three layers to prevent infinite loops:
6952
-
6953
- ### Layer 1: Environment Variable
6954
- - Variable: \`GAUNTLET_STOP_HOOK_ACTIVE\`
6955
- - Set by the parent gauntlet when spawning child CLI processes for reviews
6956
- - If \`GAUNTLET_STOP_HOOK_ACTIVE=1\`, the stop-hook exits immediately with \`stop_hook_active\`
6957
- - Prevents child review processes from triggering nested gauntlets
6958
-
6959
- ### Layer 2: Marker File
6960
- - File: \`<log_dir>/.stop-hook-active\` (contains the PID)
6961
- - Created before execution, removed after completion (in \`finally\` block)
6962
- - If another stop-hook fires during execution and finds a fresh marker (< 10 min old), it exits with \`stop_hook_active\`
6963
- - Stale markers (> 10 min) are deleted and execution proceeds
6964
- - Needed because Claude Code does NOT pass env vars to hooks
6965
-
6966
- ### Layer 3: IDE Input Field
6967
- - Claude Code: \`stop_hook_active\` boolean in the stdin JSON
6968
- - Cursor: \`loop_count\` field; threshold is 10 (returns \`retry_limit_exceeded\` if exceeded)
6969
- - Additional safety net from the IDE itself
6970
-
6971
- ## Timing Values
6972
-
6973
- | Timer | Value | Purpose |
6974
- |-------|-------|---------|
6975
- | Stdin timeout | 5 seconds | Safety net for delayed stdin from IDE |
6976
- | Hard timeout | 5 minutes | Self-timeout to prevent zombie processes |
6977
- | Stale marker | 10 minutes | Marker files older than this are cleaned up |
6978
- | \`run_interval_minutes\` | Configurable (default 0 = always run) | Minimum time between stop-hook runs |
6979
-
6980
- ## Environment Variable Overrides
6981
-
6982
- These override project config values (env > project config > global config):
6983
-
6984
- | Variable | Type | Effect |
6985
- |----------|------|--------|
6986
- | \`GAUNTLET_STOP_HOOK_ENABLED\` | \`true\`/\`1\`/\`false\`/\`0\` | Enable or disable the stop hook entirely |
6987
- | \`GAUNTLET_STOP_HOOK_INTERVAL_MINUTES\` | Integer >= 0 | Minutes between runs (0 = always run) |
6988
- | \`GAUNTLET_AUTO_PUSH_PR\` | \`true\`/\`1\`/\`false\`/\`0\` | Check PR status after gates pass |
6989
- | \`GAUNTLET_AUTO_FIX_PR\` | \`true\`/\`1\`/\`false\`/\`0\` | Enable CI wait workflow after PR created |
6990
-
6991
- ## Diagnosing \`stop_hook_disabled\`
6992
-
6993
- This status means the stop hook has been explicitly disabled. Check in order:
6994
-
6995
- 1. \`GAUNTLET_STOP_HOOK_ENABLED\` environment variable (highest precedence)
6996
- 2. \`.gauntlet/config.yml\` → \`stop_hook.enabled\`
6997
- 3. \`~/.config/agent-gauntlet/config.yml\` → \`stop_hook.enabled\` (global)
6998
-
6999
- To re-enable: remove the env var or set \`stop_hook.enabled: true\` in config.
7000
- `,
7001
- "config-troubleshooting.md": `# Config Troubleshooting
7002
-
7003
- ## \`no_config\` — Missing Configuration
7004
-
7005
- The stop hook returns \`no_config\` when \`.gauntlet/config.yml\` doesn't exist. This is normal for non-gauntlet projects.
7006
-
7007
- **If it should exist:**
7008
- 1. Run \`agent-gauntlet init\` to create the configuration
7009
- 2. Or manually create \`.gauntlet/config.yml\`
7010
-
7011
- ## YAML Syntax and Schema Errors
7012
-
7013
- Run \`agent-gauntlet validate\` to check config syntax and schema.
7014
-
7015
- **Common YAML issues:**
7016
- - Indentation errors (YAML requires consistent indentation)
7017
- - Missing colons after keys
7018
- - Unquoted special characters in values
7019
-
7020
- **Schema validation catches:**
7021
- - Missing required fields (\`cli.default_preference\`, \`entry_points\`)
7022
- - Wrong types (e.g., string where array expected)
7023
- - Invalid enum values (e.g., invalid \`rerun_new_issue_threshold\`)
7024
-
7025
- ## Common Misconfigurations
7026
-
7027
- ### Missing or Empty \`cli.default_preference\`
7028
- \`\`\`yaml
7029
- # WRONG — missing
7030
- cli: {}
7031
-
7032
- # WRONG — empty
7033
- cli:
7034
- default_preference: []
7035
-
7036
- # CORRECT
7037
- cli:
7038
- default_preference:
7039
- - claude
7040
- \`\`\`
7041
-
7042
- ### Empty \`entry_points\`
7043
- \`\`\`yaml
7044
- # WRONG
7045
- entry_points: []
7046
-
7047
- # CORRECT
7048
- entry_points:
7049
- - path: "."
7050
- reviews:
7051
- - code-quality
7052
- \`\`\`
7053
-
7054
- ### \`fail_fast\` with \`parallel\`
7055
- These are mutually exclusive for check gates. Schema validation rejects this:
7056
- \`\`\`yaml
7057
- # WRONG — in a check YAML file
7058
- parallel: true
7059
- fail_fast: true
7060
-
7061
- # CORRECT — fail_fast only works with sequential
7062
- parallel: false
7063
- fail_fast: true
7064
- \`\`\`
7065
-
7066
- ### Conflicting Fix Instruction Fields
7067
- Check gates support only one fix method. These are mutually exclusive:
7068
- - \`fix_instructions\` (inline string)
7069
- - \`fix_instructions_file\` (path to file)
7070
- - \`fix_with_skill\` (skill name)
7071
-
7072
- ### Entry Point References Non-Existent Gate
7073
- If an entry point lists a check or review name that doesn't exist in \`.gauntlet/checks/\` or \`.gauntlet/reviews/\`, validation fails.
7074
-
7075
- ### Review Gate Uses Tool Not in \`default_preference\`
7076
- Review gates can specify \`cli_preference\` but the tools must also appear in \`cli.default_preference\`.
7077
-
7078
- ## \`log_dir\` Issues
7079
-
7080
- The \`log_dir\` field (default: \`gauntlet_logs\`) determines where all logs are written.
7081
-
7082
- **Can't find logs:**
7083
- 1. Check \`config.yml\` for the \`log_dir\` value
7084
- 2. Verify the directory exists (it's created automatically on first run)
7085
- 3. Check if a previous \`agent-gauntlet clean\` archived everything to \`previous/\`
7086
-
7087
- **Permissions:**
7088
- - The gauntlet needs write access to \`log_dir\`
7089
- - On some setups, the directory may not be writable
7090
-
7091
- ## \`base_branch\` Misconfiguration
7092
-
7093
- The \`base_branch\` (default: \`origin/main\`) is used for diff calculation. Wrong values cause:
7094
- - \`no_changes\` when there are actually changes (wrong base)
7095
- - Diff includes too many files (base too far back)
7096
-
7097
- **Verify:**
7098
- \`\`\`bash
7099
- git log --oneline origin/main..HEAD # Should show your commits
7100
- \`\`\`
7101
-
7102
- If using a different default branch:
7103
- \`\`\`yaml
7104
- base_branch: origin/develop
7105
- \`\`\`
7106
-
7107
- ## Config Precedence
7108
-
7109
- Configuration is loaded with this precedence (highest first):
7110
- 1. **Environment variables** (e.g., \`GAUNTLET_STOP_HOOK_ENABLED\`)
7111
- 2. **Project config** (\`.gauntlet/config.yml\`)
7112
- 3. **Global config** (\`~/.config/agent-gauntlet/config.yml\`)
7113
- 4. **Defaults** (built-in)
7114
-
7115
- ## Init Setup Problems
7116
-
7117
- ### "\`.gauntlet\` directory already exists"
7118
- \`agent-gauntlet init\` won't overwrite an existing \`.gauntlet/\` directory. Delete it first or manually edit.
7119
-
7120
- ### Git Not Initialized
7121
- Some features require a git repository. Run \`git init\` first.
7122
-
7123
- ### No Remote Configured
7124
- The \`base_branch\` (e.g., \`origin/main\`) requires a remote. Run \`git remote add origin <url>\`.
7125
-
7126
- ## Adapter Configuration
7127
-
7128
- Per-adapter settings are configured under \`cli.adapters\`:
7129
- \`\`\`yaml
7130
- cli:
7131
- default_preference:
7132
- - claude
7133
- adapters:
7134
- claude:
7135
- allow_tool_use: true
7136
- thinking_budget: medium # off, low, medium, high
7137
- \`\`\`
7138
-
7139
- **\`thinking_budget\` mapping:**
7140
- - Claude: off=0, low=8000, medium=16000, high=31999 tokens
7141
- - Codex: off=minimal, low=low, medium=medium, high=high
7142
- - Gemini: off=0, low=4096, medium=8192, high=24576 tokens
7143
-
7144
- ## Debug Logging
7145
-
7146
- Enable detailed logging in config:
7147
- \`\`\`yaml
7148
- debug_log:
7149
- enabled: true
7150
- max_size_mb: 10
7151
- \`\`\`
7152
-
7153
- This creates \`<log_dir>/.debug.log\` with timestamped events.
7154
- `,
7155
- "gate-troubleshooting.md": `# Gate Troubleshooting
7156
-
7157
- ## Check Gate Failures
7158
-
7159
- Check gates run shell commands (linters, test runners, etc.) and report pass/fail based on exit code.
7160
-
7161
- ### Common Failure Modes
7162
-
7163
- | Failure | Cause | Evidence |
7164
- |---------|-------|----------|
7165
- | Command not found | Binary not installed or not in PATH | Check gate log for "command not found" error |
7166
- | Non-zero exit code | Linter/test failures | Read the \`check_*.log\` file for specific errors |
7167
- | Timeout | Command exceeded configured timeout | Log shows SIGTERM; check \`timeout\` in check YAML |
7168
- | Output truncation | Command output exceeded 10MB buffer | Log may be cut off; increase timeout or reduce output |
7169
-
7170
- ### Reading Check Logs
7171
- - File pattern: \`<log_dir>/check_<CHECK_NAME>.log\`
7172
- - Contains raw stdout/stderr from the check command
7173
- - Format depends on the tool (linter output, test runner output, etc.)
7174
-
7175
- ### Rerun Commands
7176
- Check gates can define a \`rerun_command\` for verification runs. If set, the rerun uses this command instead of the original \`command\`.
7177
-
7178
- ## Review Gate Failures
7179
-
7180
- Review gates use AI CLI tools to review code changes.
7181
-
7182
- ### Common Failure Modes
7183
-
7184
- | Failure | Cause | Evidence |
7185
- |---------|-------|----------|
7186
- | No healthy adapters | All configured CLI tools are missing, unhealthy, or in cooldown | Run \`agent-gauntlet health\` |
7187
- | JSON parsing error | Adapter returned non-JSON output | Review log shows raw output instead of violations |
7188
- | Violations outside diff scope | Reviewer flagged code not in the current diff | Check violation \`file\` and \`line\` against changed files |
7189
- | Usage limit | API quota exceeded for the adapter | Look for "usage limit" in review log; adapter enters 1-hour cooldown |
7190
-
7191
- ### Reading Review JSON
7192
- - File pattern: \`<log_dir>/review_<REVIEW_NAME>_<ADAPTER>@<INDEX>.json\`
7193
- - Fields per violation:
7194
- - \`file\`: Source file path
7195
- - \`line\`: Line number
7196
- - \`issue\`: Description of the problem
7197
- - \`fix\`: Suggested fix
7198
- - \`priority\`: \`critical\`, \`high\`, \`medium\`, or \`low\`
7199
- - \`status\`: \`new\`, \`fixed\`, \`skipped\`
7200
- - Status \`skipped_prior_pass\` means this review slot passed on a previous run and was skipped for efficiency
7201
-
7202
- ### Diff Calculation
7203
- - **Local mode**: committed changes (base...HEAD) + uncommitted changes (HEAD) + untracked files
7204
- - **CI mode**: \`git diff GITHUB_BASE_REF...GITHUB_SHA\` (falls back to HEAD^...HEAD)
7205
- - **Rerun mode**: scoped to changes since last pass using \`working_tree_ref\` from \`.execution_state\`
7206
-
7207
- ## \`no_applicable_gates\`
7208
-
7209
- All configured gates were skipped because no changed files matched any entry point path.
7210
-
7211
- **Diagnosis:**
7212
- 1. Run \`agent-gauntlet detect\` to see which files changed and which gates match
7213
- 2. Check \`entry_points\` in \`config.yml\` — do the paths cover your changed files?
7214
- 3. Verify \`base_branch\` — if wrong, the diff may not include your changes
7215
-
7216
- ## \`no_changes\`
7217
-
7218
- No files changed relative to \`base_branch\`.
7219
-
7220
- **Diagnosis:**
7221
- 1. Check \`base_branch\` in \`config.yml\` (default: \`origin/main\`)
7222
- 2. Run \`git diff origin/main...HEAD --stat\` to verify
7223
- 3. If working on uncommitted changes, they are included in local mode but may not be in CI mode
7224
- 4. Check if a recent \`agent-gauntlet clean\` reset the execution state
7225
-
7226
- ## Parallel vs Sequential Execution
7227
-
7228
- ### Check Gates
7229
- - Each check gate has a \`parallel\` setting (default: \`false\`)
7230
- - Parallel checks run concurrently; sequential checks run one at a time
7231
- - \`allow_parallel\` in \`config.yml\` (default: \`true\`) is the global switch
7232
-
7233
- ### \`fail_fast\` Behavior
7234
- - Only applies to sequential check gates (\`parallel: false\`)
7235
- - When enabled, stops running remaining sequential gates after the first failure
7236
- - Cannot be combined with \`parallel: true\` (schema validation rejects this)
7237
-
7238
- ### Review Gates
7239
- - Each review gate independently controls parallelism for its own adapter dispatch
7240
- - When \`parallel: true\` (default) and \`num_reviews > 1\`, reviews run concurrently across adapters
7241
- - When \`parallel: false\`, reviews run sequentially
7242
-
7243
- ## Rerun / Verification Mode
7244
-
7245
- When the gauntlet detects existing logs in \`<log_dir>/\`, it enters **rerun mode** instead of a fresh run.
7246
-
7247
- ### How It Works
7248
- 1. Previous violations are loaded from existing \`review_*.json\` files
7249
- 2. Only violations at the configured threshold priority or higher are re-evaluated
7250
- 3. Check gates re-run their commands (or \`rerun_command\` if configured)
7251
- 4. Review gates scope their diff to changes since the last pass using \`working_tree_ref\` from \`.execution_state\`
7252
-
7253
- ### \`rerun_new_issue_threshold\`
7254
- - Config field: \`rerun_new_issue_threshold\` (default: \`medium\`)
7255
- - Controls which priority levels are re-evaluated: \`critical\` > \`high\` > \`medium\` > \`low\`
7256
- - Violations below the threshold are ignored in reruns
7257
-
7258
- ### Passed Slot Optimization
7259
- When \`num_reviews > 1\` in rerun mode:
7260
- - If all review slots passed previously: only slot 1 re-runs (safety latch)
7261
- - If some slots failed: only failed slots re-run; passed slots get \`skipped_prior_pass\`
7262
-
7263
- ### Why Violations Aren't Detected on Rerun
7264
- - The diff is scoped to changes since the last pass — if the violation is in unchanged code, it won't appear
7265
- - The threshold may filter out lower-priority violations
7266
- - Passed slots may be skipped entirely
7267
-
7268
- ## How to Read Gate Logs
7269
-
7270
- ### Console Logs
7271
- - Pattern: \`<log_dir>/console.*.log\` (highest number = latest run)
7272
- - Contains unified output from all gates for that run iteration
7273
- - Shows gate names, pass/fail status, and output file paths
7274
-
7275
- ### Debug Log
7276
- - File: \`<log_dir>/.debug.log\`
7277
- - Timestamped entries for every significant event
7278
- - Search for \`gate\`, \`check\`, \`review\`, or specific gate names
7279
-
7280
- ### Gate Result Status Values
7281
- - Check gates: \`pass\`, \`fail\`, \`error\`
7282
- - Review gates: \`pass\`, \`fail\`, \`error\`, \`skipped_prior_pass\`
7283
- `,
7284
- "lock-troubleshooting.md": `# Lock Troubleshooting
7285
-
7286
- ## \`lock_conflict\` — Another Run in Progress
7287
-
7288
- The gauntlet uses a lock file to prevent concurrent runs from interfering with each other.
7289
-
7290
- ### Lock File Details
7291
- - **File**: \`<log_dir>/.gauntlet-run.lock\`
7292
- - **Content**: PID of the process holding the lock
7293
- - **Created**: At the start of a gauntlet run (exclusive write — fails if file exists)
7294
- - **Released**: Always in a \`finally\` block (guaranteed cleanup on success, failure, or error)
7295
-
7296
- ### Diagnosing Lock Conflicts
7297
-
7298
- 1. Check if the lock file exists: \`<log_dir>/.gauntlet-run.lock\`
7299
- 2. Read the PID from the file
7300
- 3. Check if that process is alive:
7301
- - If alive: a gauntlet run is genuinely in progress — wait for it to finish
7302
- - If dead: the lock is stale (see below)
7303
-
7304
- ## Stale Lock Detection
7305
-
7306
- The gauntlet automatically detects and cleans stale locks:
7307
-
7308
- | Condition | Detection | Action |
7309
- |-----------|-----------|--------|
7310
- | PID is dead | \`kill(pid, 0)\` fails with ESRCH | Lock removed, retry once |
7311
- | PID unparseable, lock > 10 min old | File age check | Lock removed, retry once |
7312
- | PID alive | Process exists | Lock kept (genuine conflict) |
7313
-
7314
- **The gauntlet never steals a lock from a live process**, regardless of lock age.
7315
-
7316
- ## \`allow_parallel\` Config
7317
-
7318
- The \`allow_parallel\` config setting (default: \`true\`) controls whether gates can run in parallel **within** a single gauntlet run. It does **not** control concurrent gauntlet runs — that's what the lock file prevents.
7319
-
7320
- ## Marker Files
7321
-
7322
- ### \`.gauntlet-run.lock\`
7323
- - **Location**: \`<log_dir>/.gauntlet-run.lock\`
7324
- - **Purpose**: Prevent concurrent gauntlet runs
7325
- - **Lifecycle**: Created at run start, removed at run end (always in \`finally\`)
7326
-
7327
- ### \`.stop-hook-active\`
7328
- - **Location**: \`<log_dir>/.stop-hook-active\`
7329
- - **Purpose**: Prevent stop-hook recursion (see stop-hook-troubleshooting.md)
7330
- - **Content**: PID of the stop-hook process
7331
- - **Stale threshold**: 10 minutes
7332
- - **Lifecycle**: Created before stop-hook execution, removed after (always in \`finally\`)
7333
-
7334
- ## Manual Cleanup
7335
-
7336
- If a lock is stuck and the process is dead:
7337
-
7338
- \`\`\`bash
7339
- agent-gauntlet clean
7340
- \`\`\`
7341
-
7342
- This command:
7343
- 1. Archives current logs to \`<log_dir>/previous/\`
7344
- 2. Removes the lock file
7345
- 3. Removes the stop-hook marker file
7346
- 4. Resets execution state
7347
-
7348
- **Confirm with the user before running \`clean\`** — it archives all current logs and resets state, which means the next run starts fresh (no rerun mode).
7349
-
7350
- ## Troubleshooting Checklist
7351
-
7352
- 1. **Is another run actually in progress?** Check the PID in the lock file.
7353
- 2. **Is the process alive?** The gauntlet should auto-clean stale locks on retry.
7354
- 3. **Did a crash leave a stale lock?** Run \`agent-gauntlet clean\` to reset.
7355
- 4. **Is this happening repeatedly?** Check for processes spawning concurrent gauntlet runs (e.g., multiple IDE hooks firing simultaneously).
7356
- `,
7357
- "adapter-troubleshooting.md": `# Adapter Troubleshooting
7358
-
7359
- ## \`agent-gauntlet health\` Output
7360
-
7361
- Run \`agent-gauntlet health\` to check adapter status. Each adapter reports one of:
7362
-
7363
- | Status | Meaning |
7364
- |--------|---------|
7365
- | \`healthy\` | Binary found and available |
7366
- | \`missing\` | Binary not found in PATH |
7367
- | \`unhealthy\` | Binary found but not functional (auth issue, etc.) |
7368
-
7369
- ## Missing CLI Tools
7370
-
7371
- If an adapter reports \`missing\`:
7372
- 1. Verify the tool is installed
7373
- 2. Check that it's in your PATH: \`which claude\`, \`which gemini\`, \`which codex\`
7374
- 3. If installed but not in PATH, add the installation directory to your PATH
7375
-
7376
- Missing adapters are skipped during review gate dispatch with a "Skipping X: Missing" message.
7377
-
7378
- ## Authentication Issues
7379
-
7380
- If an adapter reports \`unhealthy\`:
7381
- 1. Check the tool's authentication: try running the CLI tool directly
7382
- 2. For Claude: \`claude --version\` (may need \`claude login\`)
7383
- 3. For Gemini: check Google Cloud authentication
7384
- 4. For Codex: check OpenAI authentication
7385
-
7386
- ## Usage Limits and 1-Hour Cooldown
7387
-
7388
- ### How Usage Limits Are Detected
7389
- The gauntlet checks adapter output for these keywords:
7390
- - "usage limit"
7391
- - "quota exceeded"
7392
- - "quota will reset"
7393
- - "credit balance is too low"
7394
- - "out of extra usage"
7395
- - "out of usage"
7396
-
7397
- ### Cooldown Mechanism
7398
- When a usage limit is detected:
7399
- 1. The adapter is marked **unhealthy** in \`.execution_state\`
7400
- 2. A **1-hour cooldown** starts (60 minutes)
7401
- 3. During cooldown, the adapter is skipped for review dispatch
7402
- 4. After cooldown expires, the adapter is re-probed and cleared if available
7403
-
7404
- ### Checking Cooldown Status
7405
- Read \`<log_dir>/.execution_state\` and look at the \`unhealthy_adapters\` field:
7406
-
7407
- \`\`\`json
7408
- {
7409
- "unhealthy_adapters": {
7410
- "claude": {
7411
- "marked_at": "2025-01-15T10:30:00.000Z",
7412
- "reason": "Usage limit exceeded"
7413
- }
6737
+ var PUSH_PR_SKILL_CONTENT = readSkillTemplate("push-pr.md");
6738
+ var FIX_PR_SKILL_CONTENT = readSkillTemplate("fix-pr.md");
6739
+ var GAUNTLET_STATUS_SKILL_CONTENT = readSkillTemplate("status.md");
6740
+ var HELP_SKILL_BUNDLE = {
6741
+ content: readSkillTemplate("help-skill.md"),
6742
+ references: {
6743
+ "stop-hook-troubleshooting.md": readSkillTemplate("help-ref-stop-hook-troubleshooting.md"),
6744
+ "config-troubleshooting.md": readSkillTemplate("help-ref-config-troubleshooting.md"),
6745
+ "gate-troubleshooting.md": readSkillTemplate("help-ref-gate-troubleshooting.md"),
6746
+ "lock-troubleshooting.md": readSkillTemplate("help-ref-lock-troubleshooting.md"),
6747
+ "adapter-troubleshooting.md": readSkillTemplate("help-ref-adapter-troubleshooting.md"),
6748
+ "ci-pr-troubleshooting.md": readSkillTemplate("help-ref-ci-pr-troubleshooting.md")
7414
6749
  }
7415
- }
7416
- \`\`\`
7417
-
7418
- - \`marked_at\`: When the cooldown started (ISO 8601)
7419
- - Cooldown expires 60 minutes after \`marked_at\`
7420
-
7421
- ### All Adapters in Cooldown
7422
- If every configured adapter is in cooldown, review gates will fail with "no healthy adapters". Wait for the cooldown to expire or resolve the usage limit.
7423
-
7424
- ## \`cli.default_preference\` and Adapter Selection
7425
-
7426
- The \`cli.default_preference\` array in \`config.yml\` determines:
7427
- 1. **Which adapters are available** for review dispatch
7428
- 2. **Selection order** for round-robin assignment
7429
-
7430
- Review gates can override with \`cli_preference\` but those tools must also be in \`default_preference\`.
7431
-
7432
- \`\`\`yaml
7433
- cli:
7434
- default_preference:
7435
- - claude
7436
- - gemini
7437
- \`\`\`
7438
-
7439
- ## \`allow_tool_use\` and \`thinking_budget\` Settings
7440
-
7441
- Per-adapter settings in \`config.yml\`:
7442
-
7443
- \`\`\`yaml
7444
- cli:
7445
- adapters:
7446
- claude:
7447
- allow_tool_use: true # Whether the adapter can use tools during review
7448
- thinking_budget: medium # off, low, medium, high
7449
- \`\`\`
7450
-
7451
- ### \`thinking_budget\` Token Mapping
7452
-
7453
- | Level | Claude | Codex | Gemini |
7454
- |-------|--------|-------|--------|
7455
- | \`off\` | 0 | minimal | 0 |
7456
- | \`low\` | 8,000 | low | 4,096 |
7457
- | \`medium\` | 16,000 | medium | 8,192 |
7458
- | \`high\` | 31,999 | high | 24,576 |
7459
-
7460
- ## \`.execution_state\` File
7461
-
7462
- The \`.execution_state\` file in \`<log_dir>/\` tracks run context:
7463
-
7464
- \`\`\`json
7465
- {
7466
- "last_run_completed_at": "2025-01-15T10:30:00.000Z",
7467
- "branch": "feature/my-branch",
7468
- "commit": "abc123",
7469
- "working_tree_ref": "def456",
7470
- "unhealthy_adapters": {}
7471
- }
7472
- \`\`\`
7473
-
7474
- | Field | Purpose |
7475
- |-------|---------|
7476
- | \`last_run_completed_at\` | When the last successful run finished |
7477
- | \`branch\` | Git branch at last completion |
7478
- | \`commit\` | HEAD SHA at last completion |
7479
- | \`working_tree_ref\` | Stash SHA of working tree (used for rerun diff scoping) |
7480
- | \`unhealthy_adapters\` | Map of adapter name to cooldown info |
7481
-
7482
- This file is:
7483
- - Written after successful execution
7484
- - Preserved across runs
7485
- - Auto-cleaned when the branch changes or commit is merged
7486
- - Deleted by \`agent-gauntlet clean\`
7487
-
7488
- ## Troubleshooting Checklist
7489
-
7490
- 1. **Run \`agent-gauntlet health\`** to see overall adapter status
7491
- 2. **Check \`.execution_state\`** for cooldown entries
7492
- 3. **Verify \`cli.default_preference\`** includes the adapters you expect
7493
- 4. **Try the CLI tool directly** (e.g., \`claude --version\`) to isolate the issue
7494
- 5. **Check for usage limit messages** in review logs (\`review_*.log\`)
7495
- `,
7496
- "ci-pr-troubleshooting.md": `# CI/PR Troubleshooting
7497
-
7498
- ## \`pr_push_required\`
7499
-
7500
- Gates passed but the stop hook detected that a PR needs to be created or updated.
7501
-
7502
- **When this happens:**
7503
- - \`auto_push_pr: true\` is set in \`stop_hook\` config
7504
- - Gates have passed
7505
- - No PR exists for the current branch, or the PR is out of date
7506
-
7507
- **Resolution:**
7508
- 1. Commit and push your changes
7509
- 2. Create a PR: \`gh pr create\` or use \`/gauntlet-push-pr\`
7510
- 3. The next stop-hook invocation will check PR/CI status instead of re-running gates
7511
-
7512
- ## CI Status Values
7513
-
7514
- | Status | Message | Blocking? |
7515
- |--------|---------|-----------|
7516
- | \`ci_pending\` | CI checks still running | Yes — agent waits |
7517
- | \`ci_failed\` | CI failed or review changes requested | Yes — must fix |
7518
- | \`ci_passed\` | All checks completed, no blocking reviews | No — stop allowed |
7519
- | \`validation_required\` | Changes need validation | Yes — must validate |
7520
-
7521
- ## \`auto_push_pr\` and \`auto_fix_pr\` Configuration
7522
-
7523
- \`\`\`yaml
7524
- stop_hook:
7525
- auto_push_pr: true # Check PR status after gates pass
7526
- auto_fix_pr: true # Wait for CI and enable fix workflow
7527
- \`\`\`
7528
-
7529
- **Dependency:** \`auto_fix_pr\` requires \`auto_push_pr\`. If \`auto_fix_pr: true\` but \`auto_push_pr: false\`, \`auto_fix_pr\` is forced to \`false\` with a warning.
7530
-
7531
- **Environment variable overrides:**
7532
- - \`GAUNTLET_AUTO_PUSH_PR=true/false\`
7533
- - \`GAUNTLET_AUTO_FIX_PR=true/false\`
7534
-
7535
- ## CI Wait Mechanism (\`wait-ci\`)
7536
-
7537
- ### How It Works
7538
- 1. After gates pass and PR is pushed, the stop hook enters CI wait mode
7539
- 2. It polls GitHub CI status using \`gh pr checks\`
7540
- 3. Polls every **15 seconds** (default)
7541
- 4. Times out after **270 seconds** (4.5 minutes, default)
7542
- 5. Up to **3 attempts** total across stop-hook invocations
7543
-
7544
- ### Attempt Tracking
7545
- - File: \`<log_dir>/.ci-wait-attempts\`
7546
- - Incremented on each CI wait invocation
7547
- - When attempts >= 3: returns an error and allows the stop
7548
-
7549
- ### What \`wait-ci\` Checks
7550
-
7551
- **CI Checks:**
7552
- - Runs \`gh pr checks --json name,state,link\`
7553
- - Check states: \`PENDING\`, \`QUEUED\`, \`IN_PROGRESS\`, \`SUCCESS\`, \`FAILURE\`
7554
- - All checks must reach \`SUCCESS\` for \`ci_passed\`
7555
-
7556
- **Blocking Reviews:**
7557
- - Queries \`gh api repos/OWNER/REPO/pulls/PR_NUM/reviews\`
7558
- - \`CHANGES_REQUESTED\` state is blocking
7559
- - Latest review per author takes precedence (later reviews override earlier)
7560
- - If any author's latest review is \`CHANGES_REQUESTED\`: \`ci_failed\`
7561
-
7562
- ### Failed Check Logs
7563
- - For GitHub Actions: retrieves error output via \`gh run view RUN_ID --log-failed\`
7564
- - For external checks (no run ID): no logs available
7565
- - Output limited to last 100 lines
7566
-
7567
- ## CI Detection Environment Variables
7568
-
7569
- The gauntlet detects CI environments using:
7570
-
7571
- | Variable | Detection |
7572
- |----------|-----------|
7573
- | \`CI=true\` | Generic CI environment |
7574
- | \`GITHUB_ACTIONS=true\` | GitHub Actions specifically |
7575
- | \`GITHUB_BASE_REF\` | PR base branch in GitHub Actions (overrides \`base_branch\` for diff) |
7576
- | \`GITHUB_SHA\` | Commit SHA in GitHub Actions (used for diff calculation) |
7577
-
7578
- **CI mode differences:**
7579
- - Diff uses \`GITHUB_BASE_REF...GITHUB_SHA\` instead of local branch comparison
7580
- - Falls back to \`HEAD^...HEAD\` if CI variables are incomplete
7581
-
7582
- ## Troubleshooting Checklist
7583
-
7584
- ### \`ci_pending\` — CI Still Running
7585
- 1. Check \`gh pr checks\` to see which checks are still pending
7586
- 2. Wait and try again — the stop hook will re-poll on next attempt
7587
- 3. After 3 attempts, it will timeout and allow the stop
7588
-
7589
- ### \`ci_failed\` — CI Failed
7590
- 1. Run \`gh pr checks\` to see failed checks
7591
- 2. Run \`gh pr view --comments\` to see review feedback
7592
- 3. Check for \`CHANGES_REQUESTED\` reviews: \`gh api repos/OWNER/REPO/pulls/PR_NUM/reviews\`
7593
- 4. Fix the issues, commit, and push
7594
- 5. The stop hook will re-check on next invocation
7595
-
7596
- ### PR-Related Issues
7597
- - **No PR for branch**: \`gh pr view\` returns an error — create a PR first
7598
- - **PR out of date**: Push latest changes before CI can pass
7599
- - **\`gh\` CLI not installed**: CI features require the GitHub CLI (\`gh\`)
7600
- `
7601
- };
7602
- return { content, references };
7603
- }
7604
- var HELP_SKILL_BUNDLE = buildHelpSkillBundle();
6750
+ };
6751
+ var SETUP_SKILL_CONTENT = readSkillTemplate("setup-skill.md");
6752
+ var CHECK_CATALOG_REFERENCE = readSkillTemplate("check-catalog.md");
7605
6753
  var SKILL_DEFINITIONS = [
7606
6754
  { action: "run", content: GAUNTLET_RUN_SKILL_CONTENT },
7607
6755
  { action: "check", content: GAUNTLET_CHECK_SKILL_CONTENT },
@@ -7613,10 +6761,16 @@ var SKILL_DEFINITIONS = [
7613
6761
  content: HELP_SKILL_BUNDLE.content,
7614
6762
  references: HELP_SKILL_BUNDLE.references,
7615
6763
  skillsOnly: true
6764
+ },
6765
+ {
6766
+ action: "setup",
6767
+ content: SETUP_SKILL_CONTENT,
6768
+ references: { "check-catalog.md": CHECK_CATALOG_REFERENCE },
6769
+ skillsOnly: true
7616
6770
  }
7617
6771
  ];
7618
6772
  function registerInitCommand(program) {
7619
- program.command("init").description("Initialize .gauntlet configuration").option("-y, --yes", "Skip prompts and use defaults (all available CLIs, source: ., no extra checks)").action(async (options) => {
6773
+ program.command("init").description("Initialize .gauntlet configuration").option("-y, --yes", "Skip prompts and use defaults (all available CLIs)").action(async (options) => {
7620
6774
  const projectRoot = process.cwd();
7621
6775
  const targetDir = path23.join(projectRoot, ".gauntlet");
7622
6776
  if (await exists(targetDir)) {
@@ -7634,60 +6788,20 @@ function registerInitCommand(program) {
7634
6788
  console.log();
7635
6789
  return;
7636
6790
  }
7637
- let config;
7638
- if (options.yes) {
7639
- config = {
7640
- baseBranch: "origin/main",
7641
- sourceDir: ".",
7642
- lintCmd: null,
7643
- testCmd: null,
7644
- selectedAdapters: availableAdapters
7645
- };
7646
- } else {
7647
- config = await promptForConfig(availableAdapters);
7648
- }
6791
+ const baseBranch = await detectBaseBranch();
7649
6792
  await fs25.mkdir(targetDir);
7650
6793
  await fs25.mkdir(path23.join(targetDir, "checks"));
7651
6794
  await fs25.mkdir(path23.join(targetDir, "reviews"));
7652
- const configContent = generateConfigYml(config);
7653
- await fs25.writeFile(path23.join(targetDir, "config.yml"), configContent);
7654
- console.log(chalk9.green("Created .gauntlet/config.yml"));
7655
- if (config.lintCmd !== null) {
7656
- const lintContent = `name: lint
7657
- command: ${config.lintCmd || "# command: TODO - add your lint command (e.g., npm run lint)"}
7658
- # parallel: false
7659
- # run_in_ci: true
7660
- # run_locally: true
7661
- # timeout: 300
7662
- `;
7663
- await fs25.writeFile(path23.join(targetDir, "checks", "lint.yml"), lintContent);
7664
- console.log(chalk9.green("Created .gauntlet/checks/lint.yml"));
7665
- }
7666
- if (config.testCmd !== null) {
7667
- const testContent = `name: unit-tests
7668
- command: ${config.testCmd || "# command: TODO - add your test command (e.g., npm test)"}
7669
- # parallel: false
7670
- # run_in_ci: true
7671
- # run_locally: true
7672
- # timeout: 300
7673
- `;
7674
- await fs25.writeFile(path23.join(targetDir, "checks", "unit-tests.yml"), testContent);
7675
- console.log(chalk9.green("Created .gauntlet/checks/unit-tests.yml"));
7676
- }
7677
- const reviewYamlContent = `builtin: code-quality
7678
- num_reviews: 2
7679
- `;
7680
- await fs25.writeFile(path23.join(targetDir, "reviews", "code-quality.yml"), reviewYamlContent);
7681
- console.log(chalk9.green("Created .gauntlet/reviews/code-quality.yml"));
7682
- await copyStatusScript(targetDir);
7683
6795
  const commands = SKILL_DEFINITIONS.map((skill) => ({
7684
6796
  action: skill.action,
7685
6797
  content: skill.content,
7686
6798
  ..."references" in skill ? { references: skill.references } : {},
7687
6799
  ..."skillsOnly" in skill ? { skillsOnly: skill.skillsOnly } : {}
7688
6800
  }));
6801
+ let installedNames;
7689
6802
  if (options.yes) {
7690
- const adaptersToInstall = config.selectedAdapters.filter((a) => a.getProjectCommandDir() !== null || a.getProjectSkillDir() !== null);
6803
+ installedNames = availableAdapters.map((a) => a.name);
6804
+ const adaptersToInstall = availableAdapters.filter((a) => a.getProjectCommandDir() !== null || a.getProjectSkillDir() !== null);
7691
6805
  if (adaptersToInstall.length > 0) {
7692
6806
  await installCommands({
7693
6807
  level: "project",
@@ -7697,16 +6811,71 @@ num_reviews: 2
7697
6811
  });
7698
6812
  }
7699
6813
  } else {
7700
- await promptAndInstallCommands({
6814
+ installedNames = await promptAndInstallCommands({
7701
6815
  projectRoot,
7702
6816
  commands,
7703
6817
  availableAdapters
7704
6818
  });
7705
6819
  }
7706
- if (!options.yes) {
7707
- await promptAndInstallStopHook(projectRoot);
6820
+ const cliList = availableAdapters.map((a) => ` - ${a.name}`).join(`
6821
+ `);
6822
+ const adapterSettings = buildAdapterSettingsBlock(availableAdapters);
6823
+ const configContent = `base_branch: ${baseBranch}
6824
+ log_dir: gauntlet_logs
6825
+
6826
+ # Run gates in parallel when possible (default: true)
6827
+ # allow_parallel: true
6828
+
6829
+ cli:
6830
+ default_preference:
6831
+ ${cliList}
6832
+ ${adapterSettings}
6833
+ # entry_points configured by /gauntlet-setup
6834
+ entry_points: []
6835
+ `;
6836
+ await fs25.writeFile(path23.join(targetDir, "config.yml"), configContent);
6837
+ console.log(chalk9.green("Created .gauntlet/config.yml"));
6838
+ const reviewYamlContent = `builtin: code-quality
6839
+ num_reviews: 1
6840
+ `;
6841
+ await fs25.writeFile(path23.join(targetDir, "reviews", "code-quality.yml"), reviewYamlContent);
6842
+ console.log(chalk9.green("Created .gauntlet/reviews/code-quality.yml"));
6843
+ await copyStatusScript(targetDir);
6844
+ if (installedNames.includes("claude")) {
6845
+ await installStopHook(projectRoot);
6846
+ }
6847
+ if (installedNames.includes("cursor")) {
6848
+ await installCursorStopHook(projectRoot);
7708
6849
  }
6850
+ console.log();
6851
+ console.log(chalk9.bold("Run /gauntlet-setup to configure your checks and reviews"));
6852
+ });
6853
+ }
6854
+ async function detectBaseBranch() {
6855
+ try {
6856
+ const { execSync } = await import("node:child_process");
6857
+ const ref = execSync("git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null", { encoding: "utf-8" }).trim();
6858
+ if (ref) {
6859
+ return ref.replace("refs/remotes/", "");
6860
+ }
6861
+ } catch {}
6862
+ return "origin/main";
6863
+ }
6864
+ function buildAdapterSettingsBlock(adapters3) {
6865
+ const items = adapters3.filter((a) => ADAPTER_CONFIG[a.name]);
6866
+ if (items.length === 0)
6867
+ return "";
6868
+ const lines = items.map((a) => {
6869
+ const c = ADAPTER_CONFIG[a.name];
6870
+ return ` ${a.name}:
6871
+ allow_tool_use: ${c?.allow_tool_use}
6872
+ thinking_budget: ${c?.thinking_budget}`;
7709
6873
  });
6874
+ return ` # Recommended settings (see docs/eval-results.md)
6875
+ adapters:
6876
+ ${lines.join(`
6877
+ `)}
6878
+ `;
7710
6879
  }
7711
6880
  async function detectAvailableCLIs() {
7712
6881
  const allAdapters = getAllAdapters();
@@ -7722,68 +6891,6 @@ async function detectAvailableCLIs() {
7722
6891
  }
7723
6892
  return available;
7724
6893
  }
7725
- async function promptForConfig(availableAdapters) {
7726
- const rl = readline.createInterface({
7727
- input: process.stdin,
7728
- output: process.stdout
7729
- });
7730
- const question = makeQuestion(rl);
7731
- try {
7732
- console.log();
7733
- console.log("Which CLIs would you like to use?");
7734
- availableAdapters.forEach((adapter, i) => {
7735
- console.log(` ${i + 1}) ${adapter.name}`);
7736
- });
7737
- console.log(` ${availableAdapters.length + 1}) All`);
7738
- let selectedAdapters = [];
7739
- let attempts = 0;
7740
- while (true) {
7741
- attempts++;
7742
- if (attempts > MAX_PROMPT_ATTEMPTS)
7743
- throw new Error("Too many invalid attempts");
7744
- const answer = await question(`(comma-separated, e.g., 1,2): `);
7745
- const selections = answer.split(",").map((s) => s.trim()).filter((s) => s);
7746
- if (selections.length === 0) {
7747
- selectedAdapters = availableAdapters;
7748
- break;
7749
- }
7750
- const chosen = parseSelections(selections, availableAdapters);
7751
- if (chosen) {
7752
- selectedAdapters = chosen;
7753
- break;
7754
- }
7755
- }
7756
- console.log();
7757
- const baseBranchInput = await question("Enter your base branch (e.g., origin/main, origin/develop) [default: origin/main]: ");
7758
- const baseBranch = baseBranchInput || "origin/main";
7759
- console.log();
7760
- const sourceDirInput = await question("Enter your source directory (e.g., src, lib, .) [default: .]: ");
7761
- const sourceDir = sourceDirInput || ".";
7762
- console.log();
7763
- const addLint = await question("Would you like to add a linting check? [y/N]: ");
7764
- let lintCmd = null;
7765
- if (addLint.toLowerCase().startsWith("y")) {
7766
- lintCmd = await question("Enter lint command (blank to fill later): ");
7767
- }
7768
- console.log();
7769
- const addTest = await question("Would you like to add a unit test check? [y/N]: ");
7770
- let testCmd = null;
7771
- if (addTest.toLowerCase().startsWith("y")) {
7772
- testCmd = await question("Enter test command (blank to fill later): ");
7773
- }
7774
- rl.close();
7775
- return {
7776
- baseBranch,
7777
- sourceDir,
7778
- lintCmd,
7779
- testCmd,
7780
- selectedAdapters
7781
- };
7782
- } catch (error) {
7783
- rl.close();
7784
- throw error;
7785
- }
7786
- }
7787
6894
  function parseSelections(selections, adapters3) {
7788
6895
  const chosen = [];
7789
6896
  for (const sel of selections) {
@@ -7802,56 +6909,6 @@ function parseSelections(selections, adapters3) {
7802
6909
  }
7803
6910
  return [...new Set(chosen)];
7804
6911
  }
7805
- function buildAdapterSettings(adapters3) {
7806
- const items = adapters3.filter((a) => ADAPTER_CONFIG[a.name]);
7807
- if (items.length === 0)
7808
- return "";
7809
- const lines = items.map((a) => {
7810
- const c = ADAPTER_CONFIG[a.name];
7811
- return ` ${a.name}:
7812
- allow_tool_use: ${c?.allow_tool_use}
7813
- thinking_budget: ${c?.thinking_budget}`;
7814
- });
7815
- return `
7816
- # Recommended settings (see docs/eval-results.md)
7817
- adapters:
7818
- ${lines.join(`
7819
- `)}
7820
- `;
7821
- }
7822
- function generateConfigYml(config) {
7823
- const cliList = config.selectedAdapters.map((a) => ` - ${a.name}`).join(`
7824
- `);
7825
- const adapterSettings = buildAdapterSettings(config.selectedAdapters);
7826
- let entryPoints = "";
7827
- if (config.lintCmd !== null || config.testCmd !== null) {
7828
- entryPoints += ` - path: "${config.sourceDir}"
7829
- checks:
7830
- `;
7831
- if (config.lintCmd !== null)
7832
- entryPoints += ` - lint
7833
- `;
7834
- if (config.testCmd !== null)
7835
- entryPoints += ` - unit-tests
7836
- `;
7837
- }
7838
- entryPoints += ` - path: "."
7839
- reviews:
7840
- - code-quality`;
7841
- return `base_branch: ${config.baseBranch}
7842
- log_dir: gauntlet_logs
7843
-
7844
- # Run gates in parallel when possible (default: true)
7845
- # allow_parallel: true
7846
-
7847
- cli:
7848
- default_preference:
7849
- ${cliList}
7850
- ${adapterSettings}
7851
- entry_points:
7852
- ${entryPoints}
7853
- `;
7854
- }
7855
6912
  async function copyStatusScript(targetDir) {
7856
6913
  const statusScriptDir = path23.join(targetDir, "skills", "gauntlet", "status", "scripts");
7857
6914
  const statusScriptPath = path23.join(statusScriptDir, "status.ts");
@@ -7918,7 +6975,7 @@ async function promptAgentSelection(questionFn, installableAdapters) {
7918
6975
  async function promptAndInstallCommands(options) {
7919
6976
  const { projectRoot, commands, availableAdapters } = options;
7920
6977
  if (availableAdapters.length === 0)
7921
- return;
6978
+ return [];
7922
6979
  const rl = readline.createInterface({
7923
6980
  input: process.stdin,
7924
6981
  output: process.stdout
@@ -7934,13 +6991,13 @@ async function promptAndInstallCommands(options) {
7934
6991
  console.log(chalk9.dim(`
7935
6992
  Skipping command installation.`));
7936
6993
  rl.close();
7937
- return;
6994
+ return [];
7938
6995
  }
7939
6996
  const installableAdapters = installLevel === "project" ? availableAdapters.filter((a) => a.getProjectCommandDir() !== null || a.getProjectSkillDir() !== null) : availableAdapters.filter((a) => a.getUserCommandDir() !== null || a.getUserSkillDir() !== null);
7940
6997
  if (installableAdapters.length === 0) {
7941
6998
  console.log(chalk9.yellow(`No available agents support ${installLevel}-level commands.`));
7942
6999
  rl.close();
7943
- return;
7000
+ return [];
7944
7001
  }
7945
7002
  const selectedAgents = await promptAgentSelection(question, installableAdapters);
7946
7003
  rl.close();
@@ -7950,6 +7007,7 @@ Skipping command installation.`));
7950
7007
  projectRoot,
7951
7008
  commands
7952
7009
  });
7010
+ return selectedAgents;
7953
7011
  } catch (error) {
7954
7012
  rl.close();
7955
7013
  throw error;
@@ -8056,33 +7114,17 @@ var STOP_HOOK_CONFIG = {
8056
7114
  ]
8057
7115
  }
8058
7116
  };
8059
- function isInteractive() {
8060
- return Boolean(process.stdin.isTTY);
8061
- }
8062
- async function promptAndInstallStopHook(projectRoot) {
8063
- if (!isInteractive()) {
8064
- return;
8065
- }
8066
- const rl = readline.createInterface({
8067
- input: process.stdin,
8068
- output: process.stdout
8069
- });
8070
- const question = makeQuestion(rl);
8071
- try {
8072
- console.log();
8073
- const answer = await question("Install Claude Code stop hook? (y/n): ");
8074
- const shouldInstall = answer.toLowerCase() === "y" || answer.toLowerCase() === "yes";
8075
- if (!shouldInstall) {
8076
- rl.close();
8077
- return;
8078
- }
8079
- rl.close();
8080
- await installStopHook(projectRoot);
8081
- } catch (error) {
8082
- rl.close();
8083
- throw error;
7117
+ var CURSOR_STOP_HOOK_CONFIG = {
7118
+ version: 1,
7119
+ hooks: {
7120
+ stop: [
7121
+ {
7122
+ command: "agent-gauntlet stop-hook",
7123
+ loop_limit: 10
7124
+ }
7125
+ ]
8084
7126
  }
8085
- }
7127
+ };
8086
7128
  async function installStopHook(projectRoot) {
8087
7129
  const claudeDir = path23.join(projectRoot, ".claude");
8088
7130
  const settingsPath = path23.join(claudeDir, "settings.local.json");
@@ -8115,6 +7157,42 @@ async function installStopHook(projectRoot) {
8115
7157
  `);
8116
7158
  console.log(chalk9.green("Stop hook installed - gauntlet will run automatically when agent stops"));
8117
7159
  }
7160
+ async function installCursorStopHook(projectRoot) {
7161
+ const cursorDir = path23.join(projectRoot, ".cursor");
7162
+ const hooksPath = path23.join(cursorDir, "hooks.json");
7163
+ await fs25.mkdir(cursorDir, { recursive: true });
7164
+ let existingConfig = {};
7165
+ if (await exists(hooksPath)) {
7166
+ try {
7167
+ const content = await fs25.readFile(hooksPath, "utf-8");
7168
+ existingConfig = JSON.parse(content);
7169
+ } catch {
7170
+ existingConfig = {};
7171
+ }
7172
+ }
7173
+ const existingHooks = existingConfig.hooks || {};
7174
+ const existingStopHooks = Array.isArray(existingHooks.stop) ? existingHooks.stop : [];
7175
+ const hookExists = existingStopHooks.some((hook) => hook?.command === "agent-gauntlet stop-hook");
7176
+ if (hookExists) {
7177
+ console.log(chalk9.dim("Cursor stop hook already installed"));
7178
+ return;
7179
+ }
7180
+ const newStopHooks = [
7181
+ ...existingStopHooks,
7182
+ ...CURSOR_STOP_HOOK_CONFIG.hooks.stop
7183
+ ];
7184
+ const mergedConfig = {
7185
+ ...existingConfig,
7186
+ version: existingConfig.version ?? CURSOR_STOP_HOOK_CONFIG.version,
7187
+ hooks: {
7188
+ ...existingHooks,
7189
+ stop: newStopHooks
7190
+ }
7191
+ };
7192
+ await fs25.writeFile(hooksPath, `${JSON.stringify(mergedConfig, null, 2)}
7193
+ `);
7194
+ console.log(chalk9.green("Cursor stop hook installed - gauntlet will run automatically when agent stops"));
7195
+ }
8118
7196
  // src/commands/list.ts
8119
7197
  import chalk10 from "chalk";
8120
7198
  function registerListCommand(program) {
@@ -9223,4 +8301,4 @@ if (process.argv.length < 3) {
9223
8301
  }
9224
8302
  program.parse(process.argv);
9225
8303
 
9226
- //# debugId=9E5D337E8B644EE964756E2164756E21
8304
+ //# debugId=5D4AF6110EC21D0564756E2164756E21