agent-gauntlet 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +151 -1073
- package/dist/index.js.map +4 -4
- package/dist/skill-templates/check-catalog.md +356 -0
- package/dist/skill-templates/fix-pr.md +14 -0
- package/dist/skill-templates/help-ref-adapter-troubleshooting.md +138 -0
- package/dist/skill-templates/help-ref-ci-pr-troubleshooting.md +104 -0
- package/dist/skill-templates/help-ref-config-troubleshooting.md +153 -0
- package/dist/skill-templates/help-ref-gate-troubleshooting.md +128 -0
- package/dist/skill-templates/help-ref-lock-troubleshooting.md +72 -0
- package/dist/skill-templates/help-ref-stop-hook-troubleshooting.md +116 -0
- package/dist/skill-templates/help-skill.md +90 -0
- package/dist/skill-templates/push-pr.md +11 -0
- package/dist/skill-templates/setup-skill.md +209 -0
- package/dist/skill-templates/status.md +35 -0
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -7,7 +7,7 @@ import { Command } from "commander";
|
|
|
7
7
|
// package.json
|
|
8
8
|
var package_default = {
|
|
9
9
|
name: "agent-gauntlet",
|
|
10
|
-
version: "0.
|
|
10
|
+
version: "0.12.0",
|
|
11
11
|
description: "A CLI tool for testing AI coding agents",
|
|
12
12
|
license: "Apache-2.0",
|
|
13
13
|
author: "Paul Caplan",
|
|
@@ -5383,12 +5383,14 @@ async function shouldAutoClean(logDir, baseBranch) {
|
|
|
5383
5383
|
} catch {
|
|
5384
5384
|
return { clean: false };
|
|
5385
5385
|
}
|
|
5386
|
-
|
|
5387
|
-
|
|
5388
|
-
|
|
5389
|
-
|
|
5390
|
-
|
|
5391
|
-
|
|
5386
|
+
if (!state.working_tree_ref || state.working_tree_ref === state.commit) {
|
|
5387
|
+
try {
|
|
5388
|
+
const isMerged = await isCommitInBranch(state.commit, baseBranch);
|
|
5389
|
+
if (isMerged) {
|
|
5390
|
+
return { clean: true, reason: "commit merged", resetState: true };
|
|
5391
|
+
}
|
|
5392
|
+
} catch {}
|
|
5393
|
+
}
|
|
5392
5394
|
return { clean: false };
|
|
5393
5395
|
}
|
|
5394
5396
|
async function performAutoClean(logDir, result, maxPreviousLogs = 3) {
|
|
@@ -6632,10 +6634,17 @@ function registerHelpCommand(program) {
|
|
|
6632
6634
|
});
|
|
6633
6635
|
}
|
|
6634
6636
|
// src/commands/init.ts
|
|
6637
|
+
import { readFileSync } from "node:fs";
|
|
6635
6638
|
import fs25 from "node:fs/promises";
|
|
6636
6639
|
import path23 from "node:path";
|
|
6637
6640
|
import readline from "node:readline";
|
|
6641
|
+
import { fileURLToPath } from "node:url";
|
|
6638
6642
|
import chalk9 from "chalk";
|
|
6643
|
+
var __dirname2 = path23.dirname(fileURLToPath(import.meta.url));
|
|
6644
|
+
function readSkillTemplate(filename) {
|
|
6645
|
+
const templatePath = path23.join(__dirname2, "skill-templates", filename);
|
|
6646
|
+
return readFileSync(templatePath, "utf-8");
|
|
6647
|
+
}
|
|
6639
6648
|
var MAX_PROMPT_ATTEMPTS = 10;
|
|
6640
6649
|
function makeQuestion(rl) {
|
|
6641
6650
|
return (prompt) => new Promise((resolve) => rl.question(prompt, (a) => resolve(a?.trim() ?? "")));
|
|
@@ -6725,883 +6734,22 @@ ${steps.join(`
|
|
|
6725
6734
|
}
|
|
6726
6735
|
var GAUNTLET_RUN_SKILL_CONTENT = buildGauntletSkillContent("run");
|
|
6727
6736
|
var GAUNTLET_CHECK_SKILL_CONTENT = buildGauntletSkillContent("check");
|
|
6728
|
-
var PUSH_PR_SKILL_CONTENT =
|
|
6729
|
-
|
|
6730
|
-
|
|
6731
|
-
|
|
6732
|
-
|
|
6733
|
-
|
|
6734
|
-
|
|
6735
|
-
|
|
6736
|
-
|
|
6737
|
-
|
|
6738
|
-
|
|
6739
|
-
|
|
6740
|
-
var FIX_PR_SKILL_CONTENT = `---
|
|
6741
|
-
name: gauntlet-fix-pr
|
|
6742
|
-
description: Fix CI failures or address review comments on a pull request
|
|
6743
|
-
disable-model-invocation: true
|
|
6744
|
-
allowed-tools: Bash
|
|
6745
|
-
---
|
|
6746
|
-
|
|
6747
|
-
# /gauntlet-fix-pr
|
|
6748
|
-
Fix CI failures or address review comments on the current pull request.
|
|
6749
|
-
|
|
6750
|
-
1. Check CI status and review comments: \`gh pr checks\` and \`gh pr view --comments\`
|
|
6751
|
-
2. Fix any failing checks or address reviewer feedback
|
|
6752
|
-
3. Commit and push your changes
|
|
6753
|
-
4. After pushing, verify the PR is updated: \`gh pr view\`
|
|
6754
|
-
`;
|
|
6755
|
-
var GAUNTLET_STATUS_SKILL_CONTENT = `---
|
|
6756
|
-
name: gauntlet-status
|
|
6757
|
-
description: Show a summary of the most recent gauntlet session
|
|
6758
|
-
disable-model-invocation: true
|
|
6759
|
-
allowed-tools: Bash, Read
|
|
6760
|
-
---
|
|
6761
|
-
|
|
6762
|
-
# /gauntlet-status
|
|
6763
|
-
Show a detailed summary of the most recent gauntlet session.
|
|
6764
|
-
|
|
6765
|
-
## Step 1: Run the status script
|
|
6766
|
-
|
|
6767
|
-
\`\`\`bash
|
|
6768
|
-
bun .gauntlet/skills/gauntlet/status/scripts/status.ts 2>&1
|
|
6769
|
-
\`\`\`
|
|
6770
|
-
|
|
6771
|
-
The script parses the \`.debug.log\` for session-level data (run count, gate results, pass/fail status) and lists all log files with their paths and sizes.
|
|
6772
|
-
|
|
6773
|
-
## Step 2: Read failed gate details
|
|
6774
|
-
|
|
6775
|
-
For each gate marked **FAIL** in the Gate Results table, read the corresponding log files to extract failure details:
|
|
6776
|
-
|
|
6777
|
-
- **Check failures** (e.g., \`check:src:code-health\`): Read the matching \`check_*.log\` file. Check log formats vary by tool (linters, test runners, code health analyzers) — read the file and extract the relevant error/warning output.
|
|
6778
|
-
- **Review failures** (e.g., \`review:.:code-quality\`): Read the matching \`review_*.json\` file(s). These contain structured violation data with \`file\`, \`line\`, \`issue\`, \`priority\`, and \`status\` fields.
|
|
6779
|
-
|
|
6780
|
-
Use the file paths from the "Log Files" section of the script output. Match gate IDs to file names: \`check:.:lint\` corresponds to \`check_._lint.*.log\`, \`review:.:code-quality\` corresponds to \`review_._code-quality_*.{log,json}\`.
|
|
6781
|
-
|
|
6782
|
-
## Step 3: Present the results
|
|
6783
|
-
|
|
6784
|
-
Combine the script's session summary with the detailed failure information into a comprehensive report:
|
|
6785
|
-
|
|
6786
|
-
1. Session overview (status, iterations, duration, fixed/skipped/failed counts)
|
|
6787
|
-
2. Gate results table
|
|
6788
|
-
3. For any failed gates: the specific errors, violations, or test failures from the log files
|
|
6789
|
-
4. For reviews with violations: list each violation with file, line, issue, priority, and current status (fixed/skipped/outstanding)
|
|
6790
|
-
`;
|
|
6791
|
-
function buildHelpSkillBundle() {
|
|
6792
|
-
const content = `---
|
|
6793
|
-
name: gauntlet-help
|
|
6794
|
-
description: Diagnose and explain gauntlet behavior using runtime evidence
|
|
6795
|
-
allowed-tools: Bash, Read, Glob, Grep
|
|
6796
|
-
---
|
|
6797
|
-
|
|
6798
|
-
# /gauntlet-help
|
|
6799
|
-
|
|
6800
|
-
Evidence-based diagnosis of gauntlet behavior. This skill is **diagnosis-only** — it explains what happened and why, but does not auto-fix issues. It operates from **runtime artifacts and CLI outputs**, not source code.
|
|
6801
|
-
|
|
6802
|
-
## Diagnostic Workflow
|
|
6803
|
-
|
|
6804
|
-
Follow this order for every diagnostic question:
|
|
6805
|
-
|
|
6806
|
-
1. **Resolve \`log_dir\`**: Read \`.gauntlet/config.yml\` and extract the \`log_dir\` field (default: \`gauntlet_logs\`). All log paths below are relative to \`<log_dir>/\`.
|
|
6807
|
-
2. **Passive evidence first**: Read files before running commands.
|
|
6808
|
-
- \`<log_dir>/.debug.log\` — timestamped event log (commands, gate results, state changes, errors)
|
|
6809
|
-
- \`<log_dir>/.execution_state\` — JSON with \`last_run_completed_at\`, \`branch\`, \`commit\`, \`working_tree_ref\`, and \`unhealthy_adapters\` (adapter name → \`{marked_at, reason}\`)
|
|
6810
|
-
- \`<log_dir>/console.*.log\` — console output per run (highest number = latest)
|
|
6811
|
-
- \`<log_dir>/check_*.log\` — check gate output
|
|
6812
|
-
- \`<log_dir>/review_*.json\` — review gate results with violations (\`file\`, \`line\`, \`issue\`, \`fix\`, \`priority\`, \`status\`)
|
|
6813
|
-
- \`.gauntlet/config.yml\` — project configuration
|
|
6814
|
-
3. **Active evidence when needed**: Run CLI commands only when passive evidence is insufficient for a confident diagnosis.
|
|
6815
|
-
4. **Explain with evidence**: Clearly distinguish confirmed findings from inference.
|
|
6816
|
-
|
|
6817
|
-
## Evidence Sources
|
|
6818
|
-
|
|
6819
|
-
| Source | What It Confirms |
|
|
6820
|
-
|--------|-----------------|
|
|
6821
|
-
| \`.gauntlet/config.yml\` | \`log_dir\`, \`base_branch\`, \`entry_points\`, \`cli.default_preference\`, \`stop_hook\` settings, \`max_retries\`, \`rerun_new_issue_threshold\` |
|
|
6822
|
-
| \`<log_dir>/.debug.log\` | Timestamped event history: commands executed, gate results, state transitions, errors |
|
|
6823
|
-
| \`<log_dir>/.execution_state\` | Last successful run timestamp, branch/commit at that time, working tree stash ref, unhealthy adapter cooldowns |
|
|
6824
|
-
| \`<log_dir>/console.*.log\` | Human-readable output from each run iteration |
|
|
6825
|
-
| \`<log_dir>/check_*.log\` | Raw output from check gate commands (linters, test runners, etc.) |
|
|
6826
|
-
| \`<log_dir>/review_*.json\` | Structured review violations with file, line, issue, priority, and resolution status |
|
|
6827
|
-
| \`<log_dir>/.gauntlet-run.lock\` | Lock file (contains PID) — present only during active execution |
|
|
6828
|
-
| \`<log_dir>/.stop-hook-active\` | Marker file (contains PID) — present only during active stop-hook execution |
|
|
6829
|
-
| \`<log_dir>/.ci-wait-attempts\` | CI wait attempt counter |
|
|
6830
|
-
|
|
6831
|
-
## CLI Command Quick-Reference
|
|
6832
|
-
|
|
6833
|
-
Use these only when passive evidence is insufficient:
|
|
6834
|
-
|
|
6835
|
-
| Command | When to Use |
|
|
6836
|
-
|---------|-------------|
|
|
6837
|
-
| \`agent-gauntlet list\` | See configured gates and entry points |
|
|
6838
|
-
| \`agent-gauntlet health\` | Check adapter availability and health status |
|
|
6839
|
-
| \`agent-gauntlet detect\` | See which files changed and which gates would apply |
|
|
6840
|
-
| \`agent-gauntlet validate\` | Validate config.yml syntax and schema |
|
|
6841
|
-
| \`agent-gauntlet clean\` | Archive current logs and reset state (destructive — confirm with user first) |
|
|
6842
|
-
|
|
6843
|
-
## Routing Logic
|
|
6844
|
-
|
|
6845
|
-
Based on the user's question, load the appropriate reference file for detailed guidance:
|
|
6846
|
-
|
|
6847
|
-
| Question Domain | Reference File |
|
|
6848
|
-
|----------------|---------------|
|
|
6849
|
-
| Stop hook blocked/allowed, hook statuses, recursion, timing | \`references/stop-hook-troubleshooting.md\` |
|
|
6850
|
-
| Missing config, YAML errors, misconfiguration, init problems | \`references/config-troubleshooting.md\` |
|
|
6851
|
-
| Check failures, review failures, no_changes, no_applicable_gates, rerun mode | \`references/gate-troubleshooting.md\` |
|
|
6852
|
-
| Lock conflict, stale locks, parallel runs, cleanup | \`references/lock-troubleshooting.md\` |
|
|
6853
|
-
| Adapter health, missing tools, usage limits, cooldown | \`references/adapter-troubleshooting.md\` |
|
|
6854
|
-
| PR push, CI status, auto_push_pr, auto_fix_pr, CI wait | \`references/ci-pr-troubleshooting.md\` |
|
|
6855
|
-
|
|
6856
|
-
If the question spans multiple domains, load each relevant reference.
|
|
6857
|
-
|
|
6858
|
-
## Output Contract
|
|
6859
|
-
|
|
6860
|
-
Every diagnostic response MUST include these sections:
|
|
6861
|
-
|
|
6862
|
-
### Diagnosis
|
|
6863
|
-
What happened and why, stated clearly.
|
|
6864
|
-
|
|
6865
|
-
### Evidence
|
|
6866
|
-
Specific files read, field values observed, and command outputs that support the diagnosis. Quote relevant log lines or config values.
|
|
6867
|
-
|
|
6868
|
-
### Confidence
|
|
6869
|
-
One of:
|
|
6870
|
-
- **High** — diagnosis is fully supported by direct evidence
|
|
6871
|
-
- **Medium** — diagnosis is likely but some evidence is missing or ambiguous
|
|
6872
|
-
- **Low** — diagnosis is inferred; key evidence is unavailable
|
|
6873
|
-
|
|
6874
|
-
Downgrade confidence when:
|
|
6875
|
-
- \`.debug.log\` or \`.execution_state\` is missing or empty
|
|
6876
|
-
- Log files referenced in output don't exist
|
|
6877
|
-
- Config values can't be verified
|
|
6878
|
-
- CLI commands fail or return unexpected output
|
|
6879
|
-
|
|
6880
|
-
### Next Steps
|
|
6881
|
-
Actionable recommendations for the user. If confidence is not high, suggest what additional evidence would confirm the diagnosis.
|
|
6882
|
-
`;
|
|
6883
|
-
const references = {
|
|
6884
|
-
"stop-hook-troubleshooting.md": `# Stop Hook Troubleshooting
|
|
6885
|
-
|
|
6886
|
-
## All Stop-Hook Statuses
|
|
6887
|
-
|
|
6888
|
-
### Allowing Statuses (stop is permitted)
|
|
6889
|
-
|
|
6890
|
-
| Status | Message | Meaning |
|
|
6891
|
-
|--------|---------|---------|
|
|
6892
|
-
| \`passed\` | All gates completed successfully | Every configured check and review gate passed |
|
|
6893
|
-
| \`passed_with_warnings\` | Passed with warnings (some issues were skipped) | Gates ran but some review violations were skipped rather than fixed |
|
|
6894
|
-
| \`no_applicable_gates\` | No applicable gates matched current changes | Changed files didn't match any configured entry point |
|
|
6895
|
-
| \`no_changes\` | No changes detected | No files changed relative to \`base_branch\` |
|
|
6896
|
-
| \`ci_passed\` | CI passed — all checks completed and no blocking reviews | GitHub CI checks succeeded and no \`CHANGES_REQUESTED\` reviews |
|
|
6897
|
-
| \`no_config\` | Not a gauntlet project — no \`.gauntlet/config.yml\` found | No gauntlet configuration in this repo |
|
|
6898
|
-
| \`stop_hook_active\` | Stop hook cycle detected — allowing stop to prevent infinite loop | Recursion prevention triggered |
|
|
6899
|
-
| \`stop_hook_disabled\` | Stop hook is disabled via configuration | \`stop_hook.enabled: false\` in config or \`GAUNTLET_STOP_HOOK_ENABLED=false\` |
|
|
6900
|
-
| \`interval_not_elapsed\` | Run interval not elapsed | \`stop_hook.run_interval_minutes\` hasn't elapsed since last run |
|
|
6901
|
-
| \`invalid_input\` | Invalid hook input — could not parse JSON | Stop-hook couldn't parse stdin JSON from the IDE |
|
|
6902
|
-
| \`lock_conflict\` | Another gauntlet run is already in progress | Lock file exists with a live PID |
|
|
6903
|
-
| \`error\` | Stop hook error | Unexpected error during execution |
|
|
6904
|
-
| \`retry_limit_exceeded\` | Retry limit exceeded | Max retries (default 3) exhausted; requires \`agent-gauntlet clean\` |
|
|
6905
|
-
|
|
6906
|
-
### Blocking Statuses (stop is prevented)
|
|
6907
|
-
|
|
6908
|
-
| Status | Message | Meaning |
|
|
6909
|
-
|--------|---------|---------|
|
|
6910
|
-
| \`failed\` | Issues must be fixed before stopping | One or more gates failed; agent must fix and re-run |
|
|
6911
|
-
| \`pr_push_required\` | PR needs to be created or updated before stopping | Gates passed but \`auto_push_pr\` is enabled and PR hasn't been pushed |
|
|
6912
|
-
| \`ci_pending\` | CI checks still running — waiting for completion | Waiting for GitHub CI to finish |
|
|
6913
|
-
| \`ci_failed\` | CI failed or review changes requested | GitHub CI checks failed or a reviewer requested changes |
|
|
6914
|
-
|
|
6915
|
-
## Common Scenarios
|
|
6916
|
-
|
|
6917
|
-
### "The hook blocked my stop"
|
|
6918
|
-
1. Check the status in \`.debug.log\` — search for \`status:\` entries
|
|
6919
|
-
2. If \`failed\`: Read the gate output files listed in \`.debug.log\` or the latest \`console.*.log\`
|
|
6920
|
-
3. If \`pr_push_required\`: The agent needs to commit, push, and create a PR
|
|
6921
|
-
4. If \`ci_pending\`: CI is still running; the hook will re-check on next stop attempt
|
|
6922
|
-
5. If \`ci_failed\`: Read CI failure details — run \`agent-gauntlet wait-ci\` or check \`gh pr checks\`
|
|
6923
|
-
|
|
6924
|
-
### "The hook allowed but shouldn't have"
|
|
6925
|
-
1. Check if the status was \`no_changes\` — verify \`base_branch\` is correct in \`config.yml\`
|
|
6926
|
-
2. Check if \`no_applicable_gates\` — run \`agent-gauntlet detect\` to see which files changed and which gates match
|
|
6927
|
-
3. Check if \`interval_not_elapsed\` — the run was skipped because \`run_interval_minutes\` hadn't elapsed
|
|
6928
|
-
4. Check if \`stop_hook_disabled\` — verify \`stop_hook.enabled\` in config and \`GAUNTLET_STOP_HOOK_ENABLED\` env var
|
|
6929
|
-
|
|
6930
|
-
### "The gauntlet isn't running gates / keeps allowing stops immediately"
|
|
6931
|
-
This happens when the iteration counter is inherited from a previous session's failures. Symptoms:
|
|
6932
|
-
1. \`.debug.log\` shows \`RUN_START\` followed immediately by \`RUN_END\` with \`duration=0.0s\`
|
|
6933
|
-
2. \`iterations\` value is high (e.g., 7, 8, 9) even though the current session hasn't run that many times
|
|
6934
|
-
3. Stop-hook returns \`retry_limit_exceeded\` without executing any gates
|
|
6935
|
-
4. \`failed=0\` in \`RUN_END\` (no gates ran, so none failed — but status is still \`fail\`)
|
|
6936
|
-
|
|
6937
|
-
**Root cause**: The iteration counter persists in \`.execution_state\` across sessions. If a previous session ended with unresolved failures and hit the retry limit, the counter carries over. The next session enters verification mode and immediately exceeds the limit.
|
|
6938
|
-
|
|
6939
|
-
**Fix**: Run \`agent-gauntlet clean\` to reset the state and iteration counter, then re-run.
|
|
6940
|
-
|
|
6941
|
-
**Prevention**: Before starting a new task, check if the previous session left failures behind. If \`.debug.log\` shows a recent \`STOP_HOOK decision=block reason=failed\` or \`retry_limit_exceeded\`, clean state first.
|
|
6942
|
-
|
|
6943
|
-
### "The hook seems stuck"
|
|
6944
|
-
1. Check for \`.stop-hook-active\` marker in \`<log_dir>/\` — if present, a stop-hook may be running
|
|
6945
|
-
2. Check PID in the marker file — is that process alive?
|
|
6946
|
-
3. The stop-hook has a **5-minute hard timeout** (\`STOP_HOOK_TIMEOUT_MS\`) and will self-terminate
|
|
6947
|
-
4. Stale marker files older than **10 minutes** are automatically cleaned up on next invocation
|
|
6948
|
-
|
|
6949
|
-
## Recursion Prevention
|
|
6950
|
-
|
|
6951
|
-
The stop-hook uses three layers to prevent infinite loops:
|
|
6952
|
-
|
|
6953
|
-
### Layer 1: Environment Variable
|
|
6954
|
-
- Variable: \`GAUNTLET_STOP_HOOK_ACTIVE\`
|
|
6955
|
-
- Set by the parent gauntlet when spawning child CLI processes for reviews
|
|
6956
|
-
- If \`GAUNTLET_STOP_HOOK_ACTIVE=1\`, the stop-hook exits immediately with \`stop_hook_active\`
|
|
6957
|
-
- Prevents child review processes from triggering nested gauntlets
|
|
6958
|
-
|
|
6959
|
-
### Layer 2: Marker File
|
|
6960
|
-
- File: \`<log_dir>/.stop-hook-active\` (contains the PID)
|
|
6961
|
-
- Created before execution, removed after completion (in \`finally\` block)
|
|
6962
|
-
- If another stop-hook fires during execution and finds a fresh marker (< 10 min old), it exits with \`stop_hook_active\`
|
|
6963
|
-
- Stale markers (> 10 min) are deleted and execution proceeds
|
|
6964
|
-
- Needed because Claude Code does NOT pass env vars to hooks
|
|
6965
|
-
|
|
6966
|
-
### Layer 3: IDE Input Field
|
|
6967
|
-
- Claude Code: \`stop_hook_active\` boolean in the stdin JSON
|
|
6968
|
-
- Cursor: \`loop_count\` field; threshold is 10 (returns \`retry_limit_exceeded\` if exceeded)
|
|
6969
|
-
- Additional safety net from the IDE itself
|
|
6970
|
-
|
|
6971
|
-
## Timing Values
|
|
6972
|
-
|
|
6973
|
-
| Timer | Value | Purpose |
|
|
6974
|
-
|-------|-------|---------|
|
|
6975
|
-
| Stdin timeout | 5 seconds | Safety net for delayed stdin from IDE |
|
|
6976
|
-
| Hard timeout | 5 minutes | Self-timeout to prevent zombie processes |
|
|
6977
|
-
| Stale marker | 10 minutes | Marker files older than this are cleaned up |
|
|
6978
|
-
| \`run_interval_minutes\` | Configurable (default 0 = always run) | Minimum time between stop-hook runs |
|
|
6979
|
-
|
|
6980
|
-
## Environment Variable Overrides
|
|
6981
|
-
|
|
6982
|
-
These override project config values (env > project config > global config):
|
|
6983
|
-
|
|
6984
|
-
| Variable | Type | Effect |
|
|
6985
|
-
|----------|------|--------|
|
|
6986
|
-
| \`GAUNTLET_STOP_HOOK_ENABLED\` | \`true\`/\`1\`/\`false\`/\`0\` | Enable or disable the stop hook entirely |
|
|
6987
|
-
| \`GAUNTLET_STOP_HOOK_INTERVAL_MINUTES\` | Integer >= 0 | Minutes between runs (0 = always run) |
|
|
6988
|
-
| \`GAUNTLET_AUTO_PUSH_PR\` | \`true\`/\`1\`/\`false\`/\`0\` | Check PR status after gates pass |
|
|
6989
|
-
| \`GAUNTLET_AUTO_FIX_PR\` | \`true\`/\`1\`/\`false\`/\`0\` | Enable CI wait workflow after PR created |
|
|
6990
|
-
|
|
6991
|
-
## Diagnosing \`stop_hook_disabled\`
|
|
6992
|
-
|
|
6993
|
-
This status means the stop hook has been explicitly disabled. Check in order:
|
|
6994
|
-
|
|
6995
|
-
1. \`GAUNTLET_STOP_HOOK_ENABLED\` environment variable (highest precedence)
|
|
6996
|
-
2. \`.gauntlet/config.yml\` → \`stop_hook.enabled\`
|
|
6997
|
-
3. \`~/.config/agent-gauntlet/config.yml\` → \`stop_hook.enabled\` (global)
|
|
6998
|
-
|
|
6999
|
-
To re-enable: remove the env var or set \`stop_hook.enabled: true\` in config.
|
|
7000
|
-
`,
|
|
7001
|
-
"config-troubleshooting.md": `# Config Troubleshooting
|
|
7002
|
-
|
|
7003
|
-
## \`no_config\` — Missing Configuration
|
|
7004
|
-
|
|
7005
|
-
The stop hook returns \`no_config\` when \`.gauntlet/config.yml\` doesn't exist. This is normal for non-gauntlet projects.
|
|
7006
|
-
|
|
7007
|
-
**If it should exist:**
|
|
7008
|
-
1. Run \`agent-gauntlet init\` to create the configuration
|
|
7009
|
-
2. Or manually create \`.gauntlet/config.yml\`
|
|
7010
|
-
|
|
7011
|
-
## YAML Syntax and Schema Errors
|
|
7012
|
-
|
|
7013
|
-
Run \`agent-gauntlet validate\` to check config syntax and schema.
|
|
7014
|
-
|
|
7015
|
-
**Common YAML issues:**
|
|
7016
|
-
- Indentation errors (YAML requires consistent indentation)
|
|
7017
|
-
- Missing colons after keys
|
|
7018
|
-
- Unquoted special characters in values
|
|
7019
|
-
|
|
7020
|
-
**Schema validation catches:**
|
|
7021
|
-
- Missing required fields (\`cli.default_preference\`, \`entry_points\`)
|
|
7022
|
-
- Wrong types (e.g., string where array expected)
|
|
7023
|
-
- Invalid enum values (e.g., invalid \`rerun_new_issue_threshold\`)
|
|
7024
|
-
|
|
7025
|
-
## Common Misconfigurations
|
|
7026
|
-
|
|
7027
|
-
### Missing or Empty \`cli.default_preference\`
|
|
7028
|
-
\`\`\`yaml
|
|
7029
|
-
# WRONG — missing
|
|
7030
|
-
cli: {}
|
|
7031
|
-
|
|
7032
|
-
# WRONG — empty
|
|
7033
|
-
cli:
|
|
7034
|
-
default_preference: []
|
|
7035
|
-
|
|
7036
|
-
# CORRECT
|
|
7037
|
-
cli:
|
|
7038
|
-
default_preference:
|
|
7039
|
-
- claude
|
|
7040
|
-
\`\`\`
|
|
7041
|
-
|
|
7042
|
-
### Empty \`entry_points\`
|
|
7043
|
-
\`\`\`yaml
|
|
7044
|
-
# WRONG
|
|
7045
|
-
entry_points: []
|
|
7046
|
-
|
|
7047
|
-
# CORRECT
|
|
7048
|
-
entry_points:
|
|
7049
|
-
- path: "."
|
|
7050
|
-
reviews:
|
|
7051
|
-
- code-quality
|
|
7052
|
-
\`\`\`
|
|
7053
|
-
|
|
7054
|
-
### \`fail_fast\` with \`parallel\`
|
|
7055
|
-
These are mutually exclusive for check gates. Schema validation rejects this:
|
|
7056
|
-
\`\`\`yaml
|
|
7057
|
-
# WRONG — in a check YAML file
|
|
7058
|
-
parallel: true
|
|
7059
|
-
fail_fast: true
|
|
7060
|
-
|
|
7061
|
-
# CORRECT — fail_fast only works with sequential
|
|
7062
|
-
parallel: false
|
|
7063
|
-
fail_fast: true
|
|
7064
|
-
\`\`\`
|
|
7065
|
-
|
|
7066
|
-
### Conflicting Fix Instruction Fields
|
|
7067
|
-
Check gates support only one fix method. These are mutually exclusive:
|
|
7068
|
-
- \`fix_instructions\` (inline string)
|
|
7069
|
-
- \`fix_instructions_file\` (path to file)
|
|
7070
|
-
- \`fix_with_skill\` (skill name)
|
|
7071
|
-
|
|
7072
|
-
### Entry Point References Non-Existent Gate
|
|
7073
|
-
If an entry point lists a check or review name that doesn't exist in \`.gauntlet/checks/\` or \`.gauntlet/reviews/\`, validation fails.
|
|
7074
|
-
|
|
7075
|
-
### Review Gate Uses Tool Not in \`default_preference\`
|
|
7076
|
-
Review gates can specify \`cli_preference\` but the tools must also appear in \`cli.default_preference\`.
|
|
7077
|
-
|
|
7078
|
-
## \`log_dir\` Issues
|
|
7079
|
-
|
|
7080
|
-
The \`log_dir\` field (default: \`gauntlet_logs\`) determines where all logs are written.
|
|
7081
|
-
|
|
7082
|
-
**Can't find logs:**
|
|
7083
|
-
1. Check \`config.yml\` for the \`log_dir\` value
|
|
7084
|
-
2. Verify the directory exists (it's created automatically on first run)
|
|
7085
|
-
3. Check if a previous \`agent-gauntlet clean\` archived everything to \`previous/\`
|
|
7086
|
-
|
|
7087
|
-
**Permissions:**
|
|
7088
|
-
- The gauntlet needs write access to \`log_dir\`
|
|
7089
|
-
- On some setups, the directory may not be writable
|
|
7090
|
-
|
|
7091
|
-
## \`base_branch\` Misconfiguration
|
|
7092
|
-
|
|
7093
|
-
The \`base_branch\` (default: \`origin/main\`) is used for diff calculation. Wrong values cause:
|
|
7094
|
-
- \`no_changes\` when there are actually changes (wrong base)
|
|
7095
|
-
- Diff includes too many files (base too far back)
|
|
7096
|
-
|
|
7097
|
-
**Verify:**
|
|
7098
|
-
\`\`\`bash
|
|
7099
|
-
git log --oneline origin/main..HEAD # Should show your commits
|
|
7100
|
-
\`\`\`
|
|
7101
|
-
|
|
7102
|
-
If using a different default branch:
|
|
7103
|
-
\`\`\`yaml
|
|
7104
|
-
base_branch: origin/develop
|
|
7105
|
-
\`\`\`
|
|
7106
|
-
|
|
7107
|
-
## Config Precedence
|
|
7108
|
-
|
|
7109
|
-
Configuration is loaded with this precedence (highest first):
|
|
7110
|
-
1. **Environment variables** (e.g., \`GAUNTLET_STOP_HOOK_ENABLED\`)
|
|
7111
|
-
2. **Project config** (\`.gauntlet/config.yml\`)
|
|
7112
|
-
3. **Global config** (\`~/.config/agent-gauntlet/config.yml\`)
|
|
7113
|
-
4. **Defaults** (built-in)
|
|
7114
|
-
|
|
7115
|
-
## Init Setup Problems
|
|
7116
|
-
|
|
7117
|
-
### "\`.gauntlet\` directory already exists"
|
|
7118
|
-
\`agent-gauntlet init\` won't overwrite an existing \`.gauntlet/\` directory. Delete it first or manually edit.
|
|
7119
|
-
|
|
7120
|
-
### Git Not Initialized
|
|
7121
|
-
Some features require a git repository. Run \`git init\` first.
|
|
7122
|
-
|
|
7123
|
-
### No Remote Configured
|
|
7124
|
-
The \`base_branch\` (e.g., \`origin/main\`) requires a remote. Run \`git remote add origin <url>\`.
|
|
7125
|
-
|
|
7126
|
-
## Adapter Configuration
|
|
7127
|
-
|
|
7128
|
-
Per-adapter settings are configured under \`cli.adapters\`:
|
|
7129
|
-
\`\`\`yaml
|
|
7130
|
-
cli:
|
|
7131
|
-
default_preference:
|
|
7132
|
-
- claude
|
|
7133
|
-
adapters:
|
|
7134
|
-
claude:
|
|
7135
|
-
allow_tool_use: true
|
|
7136
|
-
thinking_budget: medium # off, low, medium, high
|
|
7137
|
-
\`\`\`
|
|
7138
|
-
|
|
7139
|
-
**\`thinking_budget\` mapping:**
|
|
7140
|
-
- Claude: off=0, low=8000, medium=16000, high=31999 tokens
|
|
7141
|
-
- Codex: off=minimal, low=low, medium=medium, high=high
|
|
7142
|
-
- Gemini: off=0, low=4096, medium=8192, high=24576 tokens
|
|
7143
|
-
|
|
7144
|
-
## Debug Logging
|
|
7145
|
-
|
|
7146
|
-
Enable detailed logging in config:
|
|
7147
|
-
\`\`\`yaml
|
|
7148
|
-
debug_log:
|
|
7149
|
-
enabled: true
|
|
7150
|
-
max_size_mb: 10
|
|
7151
|
-
\`\`\`
|
|
7152
|
-
|
|
7153
|
-
This creates \`<log_dir>/.debug.log\` with timestamped events.
|
|
7154
|
-
`,
|
|
7155
|
-
"gate-troubleshooting.md": `# Gate Troubleshooting
|
|
7156
|
-
|
|
7157
|
-
## Check Gate Failures
|
|
7158
|
-
|
|
7159
|
-
Check gates run shell commands (linters, test runners, etc.) and report pass/fail based on exit code.
|
|
7160
|
-
|
|
7161
|
-
### Common Failure Modes
|
|
7162
|
-
|
|
7163
|
-
| Failure | Cause | Evidence |
|
|
7164
|
-
|---------|-------|----------|
|
|
7165
|
-
| Command not found | Binary not installed or not in PATH | Check gate log for "command not found" error |
|
|
7166
|
-
| Non-zero exit code | Linter/test failures | Read the \`check_*.log\` file for specific errors |
|
|
7167
|
-
| Timeout | Command exceeded configured timeout | Log shows SIGTERM; check \`timeout\` in check YAML |
|
|
7168
|
-
| Output truncation | Command output exceeded 10MB buffer | Log may be cut off; increase timeout or reduce output |
|
|
7169
|
-
|
|
7170
|
-
### Reading Check Logs
|
|
7171
|
-
- File pattern: \`<log_dir>/check_<CHECK_NAME>.log\`
|
|
7172
|
-
- Contains raw stdout/stderr from the check command
|
|
7173
|
-
- Format depends on the tool (linter output, test runner output, etc.)
|
|
7174
|
-
|
|
7175
|
-
### Rerun Commands
|
|
7176
|
-
Check gates can define a \`rerun_command\` for verification runs. If set, the rerun uses this command instead of the original \`command\`.
|
|
7177
|
-
|
|
7178
|
-
## Review Gate Failures
|
|
7179
|
-
|
|
7180
|
-
Review gates use AI CLI tools to review code changes.
|
|
7181
|
-
|
|
7182
|
-
### Common Failure Modes
|
|
7183
|
-
|
|
7184
|
-
| Failure | Cause | Evidence |
|
|
7185
|
-
|---------|-------|----------|
|
|
7186
|
-
| No healthy adapters | All configured CLI tools are missing, unhealthy, or in cooldown | Run \`agent-gauntlet health\` |
|
|
7187
|
-
| JSON parsing error | Adapter returned non-JSON output | Review log shows raw output instead of violations |
|
|
7188
|
-
| Violations outside diff scope | Reviewer flagged code not in the current diff | Check violation \`file\` and \`line\` against changed files |
|
|
7189
|
-
| Usage limit | API quota exceeded for the adapter | Look for "usage limit" in review log; adapter enters 1-hour cooldown |
|
|
7190
|
-
|
|
7191
|
-
### Reading Review JSON
|
|
7192
|
-
- File pattern: \`<log_dir>/review_<REVIEW_NAME>_<ADAPTER>@<INDEX>.json\`
|
|
7193
|
-
- Fields per violation:
|
|
7194
|
-
- \`file\`: Source file path
|
|
7195
|
-
- \`line\`: Line number
|
|
7196
|
-
- \`issue\`: Description of the problem
|
|
7197
|
-
- \`fix\`: Suggested fix
|
|
7198
|
-
- \`priority\`: \`critical\`, \`high\`, \`medium\`, or \`low\`
|
|
7199
|
-
- \`status\`: \`new\`, \`fixed\`, \`skipped\`
|
|
7200
|
-
- Status \`skipped_prior_pass\` means this review slot passed on a previous run and was skipped for efficiency
|
|
7201
|
-
|
|
7202
|
-
### Diff Calculation
|
|
7203
|
-
- **Local mode**: committed changes (base...HEAD) + uncommitted changes (HEAD) + untracked files
|
|
7204
|
-
- **CI mode**: \`git diff GITHUB_BASE_REF...GITHUB_SHA\` (falls back to HEAD^...HEAD)
|
|
7205
|
-
- **Rerun mode**: scoped to changes since last pass using \`working_tree_ref\` from \`.execution_state\`
|
|
7206
|
-
|
|
7207
|
-
## \`no_applicable_gates\`
|
|
7208
|
-
|
|
7209
|
-
All configured gates were skipped because no changed files matched any entry point path.
|
|
7210
|
-
|
|
7211
|
-
**Diagnosis:**
|
|
7212
|
-
1. Run \`agent-gauntlet detect\` to see which files changed and which gates match
|
|
7213
|
-
2. Check \`entry_points\` in \`config.yml\` — do the paths cover your changed files?
|
|
7214
|
-
3. Verify \`base_branch\` — if wrong, the diff may not include your changes
|
|
7215
|
-
|
|
7216
|
-
## \`no_changes\`
|
|
7217
|
-
|
|
7218
|
-
No files changed relative to \`base_branch\`.
|
|
7219
|
-
|
|
7220
|
-
**Diagnosis:**
|
|
7221
|
-
1. Check \`base_branch\` in \`config.yml\` (default: \`origin/main\`)
|
|
7222
|
-
2. Run \`git diff origin/main...HEAD --stat\` to verify
|
|
7223
|
-
3. If working on uncommitted changes, they are included in local mode but may not be in CI mode
|
|
7224
|
-
4. Check if a recent \`agent-gauntlet clean\` reset the execution state
|
|
7225
|
-
|
|
7226
|
-
## Parallel vs Sequential Execution
|
|
7227
|
-
|
|
7228
|
-
### Check Gates
|
|
7229
|
-
- Each check gate has a \`parallel\` setting (default: \`false\`)
|
|
7230
|
-
- Parallel checks run concurrently; sequential checks run one at a time
|
|
7231
|
-
- \`allow_parallel\` in \`config.yml\` (default: \`true\`) is the global switch
|
|
7232
|
-
|
|
7233
|
-
### \`fail_fast\` Behavior
|
|
7234
|
-
- Only applies to sequential check gates (\`parallel: false\`)
|
|
7235
|
-
- When enabled, stops running remaining sequential gates after the first failure
|
|
7236
|
-
- Cannot be combined with \`parallel: true\` (schema validation rejects this)
|
|
7237
|
-
|
|
7238
|
-
### Review Gates
|
|
7239
|
-
- Each review gate independently controls parallelism for its own adapter dispatch
|
|
7240
|
-
- When \`parallel: true\` (default) and \`num_reviews > 1\`, reviews run concurrently across adapters
|
|
7241
|
-
- When \`parallel: false\`, reviews run sequentially
|
|
7242
|
-
|
|
7243
|
-
## Rerun / Verification Mode
|
|
7244
|
-
|
|
7245
|
-
When the gauntlet detects existing logs in \`<log_dir>/\`, it enters **rerun mode** instead of a fresh run.
|
|
7246
|
-
|
|
7247
|
-
### How It Works
|
|
7248
|
-
1. Previous violations are loaded from existing \`review_*.json\` files
|
|
7249
|
-
2. Only violations at the configured threshold priority or higher are re-evaluated
|
|
7250
|
-
3. Check gates re-run their commands (or \`rerun_command\` if configured)
|
|
7251
|
-
4. Review gates scope their diff to changes since the last pass using \`working_tree_ref\` from \`.execution_state\`
|
|
7252
|
-
|
|
7253
|
-
### \`rerun_new_issue_threshold\`
|
|
7254
|
-
- Config field: \`rerun_new_issue_threshold\` (default: \`medium\`)
|
|
7255
|
-
- Controls which priority levels are re-evaluated: \`critical\` > \`high\` > \`medium\` > \`low\`
|
|
7256
|
-
- Violations below the threshold are ignored in reruns
|
|
7257
|
-
|
|
7258
|
-
### Passed Slot Optimization
|
|
7259
|
-
When \`num_reviews > 1\` in rerun mode:
|
|
7260
|
-
- If all review slots passed previously: only slot 1 re-runs (safety latch)
|
|
7261
|
-
- If some slots failed: only failed slots re-run; passed slots get \`skipped_prior_pass\`
|
|
7262
|
-
|
|
7263
|
-
### Why Violations Aren't Detected on Rerun
|
|
7264
|
-
- The diff is scoped to changes since the last pass — if the violation is in unchanged code, it won't appear
|
|
7265
|
-
- The threshold may filter out lower-priority violations
|
|
7266
|
-
- Passed slots may be skipped entirely
|
|
7267
|
-
|
|
7268
|
-
## How to Read Gate Logs
|
|
7269
|
-
|
|
7270
|
-
### Console Logs
|
|
7271
|
-
- Pattern: \`<log_dir>/console.*.log\` (highest number = latest run)
|
|
7272
|
-
- Contains unified output from all gates for that run iteration
|
|
7273
|
-
- Shows gate names, pass/fail status, and output file paths
|
|
7274
|
-
|
|
7275
|
-
### Debug Log
|
|
7276
|
-
- File: \`<log_dir>/.debug.log\`
|
|
7277
|
-
- Timestamped entries for every significant event
|
|
7278
|
-
- Search for \`gate\`, \`check\`, \`review\`, or specific gate names
|
|
7279
|
-
|
|
7280
|
-
### Gate Result Status Values
|
|
7281
|
-
- Check gates: \`pass\`, \`fail\`, \`error\`
|
|
7282
|
-
- Review gates: \`pass\`, \`fail\`, \`error\`, \`skipped_prior_pass\`
|
|
7283
|
-
`,
|
|
7284
|
-
"lock-troubleshooting.md": `# Lock Troubleshooting
|
|
7285
|
-
|
|
7286
|
-
## \`lock_conflict\` — Another Run in Progress
|
|
7287
|
-
|
|
7288
|
-
The gauntlet uses a lock file to prevent concurrent runs from interfering with each other.
|
|
7289
|
-
|
|
7290
|
-
### Lock File Details
|
|
7291
|
-
- **File**: \`<log_dir>/.gauntlet-run.lock\`
|
|
7292
|
-
- **Content**: PID of the process holding the lock
|
|
7293
|
-
- **Created**: At the start of a gauntlet run (exclusive write — fails if file exists)
|
|
7294
|
-
- **Released**: Always in a \`finally\` block (guaranteed cleanup on success, failure, or error)
|
|
7295
|
-
|
|
7296
|
-
### Diagnosing Lock Conflicts
|
|
7297
|
-
|
|
7298
|
-
1. Check if the lock file exists: \`<log_dir>/.gauntlet-run.lock\`
|
|
7299
|
-
2. Read the PID from the file
|
|
7300
|
-
3. Check if that process is alive:
|
|
7301
|
-
- If alive: a gauntlet run is genuinely in progress — wait for it to finish
|
|
7302
|
-
- If dead: the lock is stale (see below)
|
|
7303
|
-
|
|
7304
|
-
## Stale Lock Detection
|
|
7305
|
-
|
|
7306
|
-
The gauntlet automatically detects and cleans stale locks:
|
|
7307
|
-
|
|
7308
|
-
| Condition | Detection | Action |
|
|
7309
|
-
|-----------|-----------|--------|
|
|
7310
|
-
| PID is dead | \`kill(pid, 0)\` fails with ESRCH | Lock removed, retry once |
|
|
7311
|
-
| PID unparseable, lock > 10 min old | File age check | Lock removed, retry once |
|
|
7312
|
-
| PID alive | Process exists | Lock kept (genuine conflict) |
|
|
7313
|
-
|
|
7314
|
-
**The gauntlet never steals a lock from a live process**, regardless of lock age.
|
|
7315
|
-
|
|
7316
|
-
## \`allow_parallel\` Config
|
|
7317
|
-
|
|
7318
|
-
The \`allow_parallel\` config setting (default: \`true\`) controls whether gates can run in parallel **within** a single gauntlet run. It does **not** control concurrent gauntlet runs — that's what the lock file prevents.
|
|
7319
|
-
|
|
7320
|
-
## Marker Files
|
|
7321
|
-
|
|
7322
|
-
### \`.gauntlet-run.lock\`
|
|
7323
|
-
- **Location**: \`<log_dir>/.gauntlet-run.lock\`
|
|
7324
|
-
- **Purpose**: Prevent concurrent gauntlet runs
|
|
7325
|
-
- **Lifecycle**: Created at run start, removed at run end (always in \`finally\`)
|
|
7326
|
-
|
|
7327
|
-
### \`.stop-hook-active\`
|
|
7328
|
-
- **Location**: \`<log_dir>/.stop-hook-active\`
|
|
7329
|
-
- **Purpose**: Prevent stop-hook recursion (see stop-hook-troubleshooting.md)
|
|
7330
|
-
- **Content**: PID of the stop-hook process
|
|
7331
|
-
- **Stale threshold**: 10 minutes
|
|
7332
|
-
- **Lifecycle**: Created before stop-hook execution, removed after (always in \`finally\`)
|
|
7333
|
-
|
|
7334
|
-
## Manual Cleanup
|
|
7335
|
-
|
|
7336
|
-
If a lock is stuck and the process is dead:
|
|
7337
|
-
|
|
7338
|
-
\`\`\`bash
|
|
7339
|
-
agent-gauntlet clean
|
|
7340
|
-
\`\`\`
|
|
7341
|
-
|
|
7342
|
-
This command:
|
|
7343
|
-
1. Archives current logs to \`<log_dir>/previous/\`
|
|
7344
|
-
2. Removes the lock file
|
|
7345
|
-
3. Removes the stop-hook marker file
|
|
7346
|
-
4. Resets execution state
|
|
7347
|
-
|
|
7348
|
-
**Confirm with the user before running \`clean\`** — it archives all current logs and resets state, which means the next run starts fresh (no rerun mode).
|
|
7349
|
-
|
|
7350
|
-
## Troubleshooting Checklist
|
|
7351
|
-
|
|
7352
|
-
1. **Is another run actually in progress?** Check the PID in the lock file.
|
|
7353
|
-
2. **Is the process alive?** The gauntlet should auto-clean stale locks on retry.
|
|
7354
|
-
3. **Did a crash leave a stale lock?** Run \`agent-gauntlet clean\` to reset.
|
|
7355
|
-
4. **Is this happening repeatedly?** Check for processes spawning concurrent gauntlet runs (e.g., multiple IDE hooks firing simultaneously).
|
|
7356
|
-
`,
|
|
7357
|
-
"adapter-troubleshooting.md": `# Adapter Troubleshooting
|
|
7358
|
-
|
|
7359
|
-
## \`agent-gauntlet health\` Output
|
|
7360
|
-
|
|
7361
|
-
Run \`agent-gauntlet health\` to check adapter status. Each adapter reports one of:
|
|
7362
|
-
|
|
7363
|
-
| Status | Meaning |
|
|
7364
|
-
|--------|---------|
|
|
7365
|
-
| \`healthy\` | Binary found and available |
|
|
7366
|
-
| \`missing\` | Binary not found in PATH |
|
|
7367
|
-
| \`unhealthy\` | Binary found but not functional (auth issue, etc.) |
|
|
7368
|
-
|
|
7369
|
-
## Missing CLI Tools
|
|
7370
|
-
|
|
7371
|
-
If an adapter reports \`missing\`:
|
|
7372
|
-
1. Verify the tool is installed
|
|
7373
|
-
2. Check that it's in your PATH: \`which claude\`, \`which gemini\`, \`which codex\`
|
|
7374
|
-
3. If installed but not in PATH, add the installation directory to your PATH
|
|
7375
|
-
|
|
7376
|
-
Missing adapters are skipped during review gate dispatch with a "Skipping X: Missing" message.
|
|
7377
|
-
|
|
7378
|
-
## Authentication Issues
|
|
7379
|
-
|
|
7380
|
-
If an adapter reports \`unhealthy\`:
|
|
7381
|
-
1. Check the tool's authentication: try running the CLI tool directly
|
|
7382
|
-
2. For Claude: \`claude --version\` (may need \`claude login\`)
|
|
7383
|
-
3. For Gemini: check Google Cloud authentication
|
|
7384
|
-
4. For Codex: check OpenAI authentication
|
|
7385
|
-
|
|
7386
|
-
## Usage Limits and 1-Hour Cooldown
|
|
7387
|
-
|
|
7388
|
-
### How Usage Limits Are Detected
|
|
7389
|
-
The gauntlet checks adapter output for these keywords:
|
|
7390
|
-
- "usage limit"
|
|
7391
|
-
- "quota exceeded"
|
|
7392
|
-
- "quota will reset"
|
|
7393
|
-
- "credit balance is too low"
|
|
7394
|
-
- "out of extra usage"
|
|
7395
|
-
- "out of usage"
|
|
7396
|
-
|
|
7397
|
-
### Cooldown Mechanism
|
|
7398
|
-
When a usage limit is detected:
|
|
7399
|
-
1. The adapter is marked **unhealthy** in \`.execution_state\`
|
|
7400
|
-
2. A **1-hour cooldown** starts (60 minutes)
|
|
7401
|
-
3. During cooldown, the adapter is skipped for review dispatch
|
|
7402
|
-
4. After cooldown expires, the adapter is re-probed and cleared if available
|
|
7403
|
-
|
|
7404
|
-
### Checking Cooldown Status
|
|
7405
|
-
Read \`<log_dir>/.execution_state\` and look at the \`unhealthy_adapters\` field:
|
|
7406
|
-
|
|
7407
|
-
\`\`\`json
|
|
7408
|
-
{
|
|
7409
|
-
"unhealthy_adapters": {
|
|
7410
|
-
"claude": {
|
|
7411
|
-
"marked_at": "2025-01-15T10:30:00.000Z",
|
|
7412
|
-
"reason": "Usage limit exceeded"
|
|
7413
|
-
}
|
|
6737
|
+
var PUSH_PR_SKILL_CONTENT = readSkillTemplate("push-pr.md");
|
|
6738
|
+
var FIX_PR_SKILL_CONTENT = readSkillTemplate("fix-pr.md");
|
|
6739
|
+
var GAUNTLET_STATUS_SKILL_CONTENT = readSkillTemplate("status.md");
|
|
6740
|
+
var HELP_SKILL_BUNDLE = {
|
|
6741
|
+
content: readSkillTemplate("help-skill.md"),
|
|
6742
|
+
references: {
|
|
6743
|
+
"stop-hook-troubleshooting.md": readSkillTemplate("help-ref-stop-hook-troubleshooting.md"),
|
|
6744
|
+
"config-troubleshooting.md": readSkillTemplate("help-ref-config-troubleshooting.md"),
|
|
6745
|
+
"gate-troubleshooting.md": readSkillTemplate("help-ref-gate-troubleshooting.md"),
|
|
6746
|
+
"lock-troubleshooting.md": readSkillTemplate("help-ref-lock-troubleshooting.md"),
|
|
6747
|
+
"adapter-troubleshooting.md": readSkillTemplate("help-ref-adapter-troubleshooting.md"),
|
|
6748
|
+
"ci-pr-troubleshooting.md": readSkillTemplate("help-ref-ci-pr-troubleshooting.md")
|
|
7414
6749
|
}
|
|
7415
|
-
}
|
|
7416
|
-
|
|
7417
|
-
|
|
7418
|
-
- \`marked_at\`: When the cooldown started (ISO 8601)
|
|
7419
|
-
- Cooldown expires 60 minutes after \`marked_at\`
|
|
7420
|
-
|
|
7421
|
-
### All Adapters in Cooldown
|
|
7422
|
-
If every configured adapter is in cooldown, review gates will fail with "no healthy adapters". Wait for the cooldown to expire or resolve the usage limit.
|
|
7423
|
-
|
|
7424
|
-
## \`cli.default_preference\` and Adapter Selection
|
|
7425
|
-
|
|
7426
|
-
The \`cli.default_preference\` array in \`config.yml\` determines:
|
|
7427
|
-
1. **Which adapters are available** for review dispatch
|
|
7428
|
-
2. **Selection order** for round-robin assignment
|
|
7429
|
-
|
|
7430
|
-
Review gates can override with \`cli_preference\` but those tools must also be in \`default_preference\`.
|
|
7431
|
-
|
|
7432
|
-
\`\`\`yaml
|
|
7433
|
-
cli:
|
|
7434
|
-
default_preference:
|
|
7435
|
-
- claude
|
|
7436
|
-
- gemini
|
|
7437
|
-
\`\`\`
|
|
7438
|
-
|
|
7439
|
-
## \`allow_tool_use\` and \`thinking_budget\` Settings
|
|
7440
|
-
|
|
7441
|
-
Per-adapter settings in \`config.yml\`:
|
|
7442
|
-
|
|
7443
|
-
\`\`\`yaml
|
|
7444
|
-
cli:
|
|
7445
|
-
adapters:
|
|
7446
|
-
claude:
|
|
7447
|
-
allow_tool_use: true # Whether the adapter can use tools during review
|
|
7448
|
-
thinking_budget: medium # off, low, medium, high
|
|
7449
|
-
\`\`\`
|
|
7450
|
-
|
|
7451
|
-
### \`thinking_budget\` Token Mapping
|
|
7452
|
-
|
|
7453
|
-
| Level | Claude | Codex | Gemini |
|
|
7454
|
-
|-------|--------|-------|--------|
|
|
7455
|
-
| \`off\` | 0 | minimal | 0 |
|
|
7456
|
-
| \`low\` | 8,000 | low | 4,096 |
|
|
7457
|
-
| \`medium\` | 16,000 | medium | 8,192 |
|
|
7458
|
-
| \`high\` | 31,999 | high | 24,576 |
|
|
7459
|
-
|
|
7460
|
-
## \`.execution_state\` File
|
|
7461
|
-
|
|
7462
|
-
The \`.execution_state\` file in \`<log_dir>/\` tracks run context:
|
|
7463
|
-
|
|
7464
|
-
\`\`\`json
|
|
7465
|
-
{
|
|
7466
|
-
"last_run_completed_at": "2025-01-15T10:30:00.000Z",
|
|
7467
|
-
"branch": "feature/my-branch",
|
|
7468
|
-
"commit": "abc123",
|
|
7469
|
-
"working_tree_ref": "def456",
|
|
7470
|
-
"unhealthy_adapters": {}
|
|
7471
|
-
}
|
|
7472
|
-
\`\`\`
|
|
7473
|
-
|
|
7474
|
-
| Field | Purpose |
|
|
7475
|
-
|-------|---------|
|
|
7476
|
-
| \`last_run_completed_at\` | When the last successful run finished |
|
|
7477
|
-
| \`branch\` | Git branch at last completion |
|
|
7478
|
-
| \`commit\` | HEAD SHA at last completion |
|
|
7479
|
-
| \`working_tree_ref\` | Stash SHA of working tree (used for rerun diff scoping) |
|
|
7480
|
-
| \`unhealthy_adapters\` | Map of adapter name to cooldown info |
|
|
7481
|
-
|
|
7482
|
-
This file is:
|
|
7483
|
-
- Written after successful execution
|
|
7484
|
-
- Preserved across runs
|
|
7485
|
-
- Auto-cleaned when the branch changes or commit is merged
|
|
7486
|
-
- Deleted by \`agent-gauntlet clean\`
|
|
7487
|
-
|
|
7488
|
-
## Troubleshooting Checklist
|
|
7489
|
-
|
|
7490
|
-
1. **Run \`agent-gauntlet health\`** to see overall adapter status
|
|
7491
|
-
2. **Check \`.execution_state\`** for cooldown entries
|
|
7492
|
-
3. **Verify \`cli.default_preference\`** includes the adapters you expect
|
|
7493
|
-
4. **Try the CLI tool directly** (e.g., \`claude --version\`) to isolate the issue
|
|
7494
|
-
5. **Check for usage limit messages** in review logs (\`review_*.log\`)
|
|
7495
|
-
`,
|
|
7496
|
-
"ci-pr-troubleshooting.md": `# CI/PR Troubleshooting
|
|
7497
|
-
|
|
7498
|
-
## \`pr_push_required\`
|
|
7499
|
-
|
|
7500
|
-
Gates passed but the stop hook detected that a PR needs to be created or updated.
|
|
7501
|
-
|
|
7502
|
-
**When this happens:**
|
|
7503
|
-
- \`auto_push_pr: true\` is set in \`stop_hook\` config
|
|
7504
|
-
- Gates have passed
|
|
7505
|
-
- No PR exists for the current branch, or the PR is out of date
|
|
7506
|
-
|
|
7507
|
-
**Resolution:**
|
|
7508
|
-
1. Commit and push your changes
|
|
7509
|
-
2. Create a PR: \`gh pr create\` or use \`/gauntlet-push-pr\`
|
|
7510
|
-
3. The next stop-hook invocation will check PR/CI status instead of re-running gates
|
|
7511
|
-
|
|
7512
|
-
## CI Status Values
|
|
7513
|
-
|
|
7514
|
-
| Status | Message | Blocking? |
|
|
7515
|
-
|--------|---------|-----------|
|
|
7516
|
-
| \`ci_pending\` | CI checks still running | Yes — agent waits |
|
|
7517
|
-
| \`ci_failed\` | CI failed or review changes requested | Yes — must fix |
|
|
7518
|
-
| \`ci_passed\` | All checks completed, no blocking reviews | No — stop allowed |
|
|
7519
|
-
| \`validation_required\` | Changes need validation | Yes — must validate |
|
|
7520
|
-
|
|
7521
|
-
## \`auto_push_pr\` and \`auto_fix_pr\` Configuration
|
|
7522
|
-
|
|
7523
|
-
\`\`\`yaml
|
|
7524
|
-
stop_hook:
|
|
7525
|
-
auto_push_pr: true # Check PR status after gates pass
|
|
7526
|
-
auto_fix_pr: true # Wait for CI and enable fix workflow
|
|
7527
|
-
\`\`\`
|
|
7528
|
-
|
|
7529
|
-
**Dependency:** \`auto_fix_pr\` requires \`auto_push_pr\`. If \`auto_fix_pr: true\` but \`auto_push_pr: false\`, \`auto_fix_pr\` is forced to \`false\` with a warning.
|
|
7530
|
-
|
|
7531
|
-
**Environment variable overrides:**
|
|
7532
|
-
- \`GAUNTLET_AUTO_PUSH_PR=true/false\`
|
|
7533
|
-
- \`GAUNTLET_AUTO_FIX_PR=true/false\`
|
|
7534
|
-
|
|
7535
|
-
## CI Wait Mechanism (\`wait-ci\`)
|
|
7536
|
-
|
|
7537
|
-
### How It Works
|
|
7538
|
-
1. After gates pass and PR is pushed, the stop hook enters CI wait mode
|
|
7539
|
-
2. It polls GitHub CI status using \`gh pr checks\`
|
|
7540
|
-
3. Polls every **15 seconds** (default)
|
|
7541
|
-
4. Times out after **270 seconds** (4.5 minutes, default)
|
|
7542
|
-
5. Up to **3 attempts** total across stop-hook invocations
|
|
7543
|
-
|
|
7544
|
-
### Attempt Tracking
|
|
7545
|
-
- File: \`<log_dir>/.ci-wait-attempts\`
|
|
7546
|
-
- Incremented on each CI wait invocation
|
|
7547
|
-
- When attempts >= 3: returns an error and allows the stop
|
|
7548
|
-
|
|
7549
|
-
### What \`wait-ci\` Checks
|
|
7550
|
-
|
|
7551
|
-
**CI Checks:**
|
|
7552
|
-
- Runs \`gh pr checks --json name,state,link\`
|
|
7553
|
-
- Check states: \`PENDING\`, \`QUEUED\`, \`IN_PROGRESS\`, \`SUCCESS\`, \`FAILURE\`
|
|
7554
|
-
- All checks must reach \`SUCCESS\` for \`ci_passed\`
|
|
7555
|
-
|
|
7556
|
-
**Blocking Reviews:**
|
|
7557
|
-
- Queries \`gh api repos/OWNER/REPO/pulls/PR_NUM/reviews\`
|
|
7558
|
-
- \`CHANGES_REQUESTED\` state is blocking
|
|
7559
|
-
- Latest review per author takes precedence (later reviews override earlier)
|
|
7560
|
-
- If any author's latest review is \`CHANGES_REQUESTED\`: \`ci_failed\`
|
|
7561
|
-
|
|
7562
|
-
### Failed Check Logs
|
|
7563
|
-
- For GitHub Actions: retrieves error output via \`gh run view RUN_ID --log-failed\`
|
|
7564
|
-
- For external checks (no run ID): no logs available
|
|
7565
|
-
- Output limited to last 100 lines
|
|
7566
|
-
|
|
7567
|
-
## CI Detection Environment Variables
|
|
7568
|
-
|
|
7569
|
-
The gauntlet detects CI environments using:
|
|
7570
|
-
|
|
7571
|
-
| Variable | Detection |
|
|
7572
|
-
|----------|-----------|
|
|
7573
|
-
| \`CI=true\` | Generic CI environment |
|
|
7574
|
-
| \`GITHUB_ACTIONS=true\` | GitHub Actions specifically |
|
|
7575
|
-
| \`GITHUB_BASE_REF\` | PR base branch in GitHub Actions (overrides \`base_branch\` for diff) |
|
|
7576
|
-
| \`GITHUB_SHA\` | Commit SHA in GitHub Actions (used for diff calculation) |
|
|
7577
|
-
|
|
7578
|
-
**CI mode differences:**
|
|
7579
|
-
- Diff uses \`GITHUB_BASE_REF...GITHUB_SHA\` instead of local branch comparison
|
|
7580
|
-
- Falls back to \`HEAD^...HEAD\` if CI variables are incomplete
|
|
7581
|
-
|
|
7582
|
-
## Troubleshooting Checklist
|
|
7583
|
-
|
|
7584
|
-
### \`ci_pending\` — CI Still Running
|
|
7585
|
-
1. Check \`gh pr checks\` to see which checks are still pending
|
|
7586
|
-
2. Wait and try again — the stop hook will re-poll on next attempt
|
|
7587
|
-
3. After 3 attempts, it will timeout and allow the stop
|
|
7588
|
-
|
|
7589
|
-
### \`ci_failed\` — CI Failed
|
|
7590
|
-
1. Run \`gh pr checks\` to see failed checks
|
|
7591
|
-
2. Run \`gh pr view --comments\` to see review feedback
|
|
7592
|
-
3. Check for \`CHANGES_REQUESTED\` reviews: \`gh api repos/OWNER/REPO/pulls/PR_NUM/reviews\`
|
|
7593
|
-
4. Fix the issues, commit, and push
|
|
7594
|
-
5. The stop hook will re-check on next invocation
|
|
7595
|
-
|
|
7596
|
-
### PR-Related Issues
|
|
7597
|
-
- **No PR for branch**: \`gh pr view\` returns an error — create a PR first
|
|
7598
|
-
- **PR out of date**: Push latest changes before CI can pass
|
|
7599
|
-
- **\`gh\` CLI not installed**: CI features require the GitHub CLI (\`gh\`)
|
|
7600
|
-
`
|
|
7601
|
-
};
|
|
7602
|
-
return { content, references };
|
|
7603
|
-
}
|
|
7604
|
-
var HELP_SKILL_BUNDLE = buildHelpSkillBundle();
|
|
6750
|
+
};
|
|
6751
|
+
var SETUP_SKILL_CONTENT = readSkillTemplate("setup-skill.md");
|
|
6752
|
+
var CHECK_CATALOG_REFERENCE = readSkillTemplate("check-catalog.md");
|
|
7605
6753
|
var SKILL_DEFINITIONS = [
|
|
7606
6754
|
{ action: "run", content: GAUNTLET_RUN_SKILL_CONTENT },
|
|
7607
6755
|
{ action: "check", content: GAUNTLET_CHECK_SKILL_CONTENT },
|
|
@@ -7613,10 +6761,16 @@ var SKILL_DEFINITIONS = [
|
|
|
7613
6761
|
content: HELP_SKILL_BUNDLE.content,
|
|
7614
6762
|
references: HELP_SKILL_BUNDLE.references,
|
|
7615
6763
|
skillsOnly: true
|
|
6764
|
+
},
|
|
6765
|
+
{
|
|
6766
|
+
action: "setup",
|
|
6767
|
+
content: SETUP_SKILL_CONTENT,
|
|
6768
|
+
references: { "check-catalog.md": CHECK_CATALOG_REFERENCE },
|
|
6769
|
+
skillsOnly: true
|
|
7616
6770
|
}
|
|
7617
6771
|
];
|
|
7618
6772
|
function registerInitCommand(program) {
|
|
7619
|
-
program.command("init").description("Initialize .gauntlet configuration").option("-y, --yes", "Skip prompts and use defaults (all available CLIs
|
|
6773
|
+
program.command("init").description("Initialize .gauntlet configuration").option("-y, --yes", "Skip prompts and use defaults (all available CLIs)").action(async (options) => {
|
|
7620
6774
|
const projectRoot = process.cwd();
|
|
7621
6775
|
const targetDir = path23.join(projectRoot, ".gauntlet");
|
|
7622
6776
|
if (await exists(targetDir)) {
|
|
@@ -7634,60 +6788,20 @@ function registerInitCommand(program) {
|
|
|
7634
6788
|
console.log();
|
|
7635
6789
|
return;
|
|
7636
6790
|
}
|
|
7637
|
-
|
|
7638
|
-
if (options.yes) {
|
|
7639
|
-
config = {
|
|
7640
|
-
baseBranch: "origin/main",
|
|
7641
|
-
sourceDir: ".",
|
|
7642
|
-
lintCmd: null,
|
|
7643
|
-
testCmd: null,
|
|
7644
|
-
selectedAdapters: availableAdapters
|
|
7645
|
-
};
|
|
7646
|
-
} else {
|
|
7647
|
-
config = await promptForConfig(availableAdapters);
|
|
7648
|
-
}
|
|
6791
|
+
const baseBranch = await detectBaseBranch();
|
|
7649
6792
|
await fs25.mkdir(targetDir);
|
|
7650
6793
|
await fs25.mkdir(path23.join(targetDir, "checks"));
|
|
7651
6794
|
await fs25.mkdir(path23.join(targetDir, "reviews"));
|
|
7652
|
-
const configContent = generateConfigYml(config);
|
|
7653
|
-
await fs25.writeFile(path23.join(targetDir, "config.yml"), configContent);
|
|
7654
|
-
console.log(chalk9.green("Created .gauntlet/config.yml"));
|
|
7655
|
-
if (config.lintCmd !== null) {
|
|
7656
|
-
const lintContent = `name: lint
|
|
7657
|
-
command: ${config.lintCmd || "# command: TODO - add your lint command (e.g., npm run lint)"}
|
|
7658
|
-
# parallel: false
|
|
7659
|
-
# run_in_ci: true
|
|
7660
|
-
# run_locally: true
|
|
7661
|
-
# timeout: 300
|
|
7662
|
-
`;
|
|
7663
|
-
await fs25.writeFile(path23.join(targetDir, "checks", "lint.yml"), lintContent);
|
|
7664
|
-
console.log(chalk9.green("Created .gauntlet/checks/lint.yml"));
|
|
7665
|
-
}
|
|
7666
|
-
if (config.testCmd !== null) {
|
|
7667
|
-
const testContent = `name: unit-tests
|
|
7668
|
-
command: ${config.testCmd || "# command: TODO - add your test command (e.g., npm test)"}
|
|
7669
|
-
# parallel: false
|
|
7670
|
-
# run_in_ci: true
|
|
7671
|
-
# run_locally: true
|
|
7672
|
-
# timeout: 300
|
|
7673
|
-
`;
|
|
7674
|
-
await fs25.writeFile(path23.join(targetDir, "checks", "unit-tests.yml"), testContent);
|
|
7675
|
-
console.log(chalk9.green("Created .gauntlet/checks/unit-tests.yml"));
|
|
7676
|
-
}
|
|
7677
|
-
const reviewYamlContent = `builtin: code-quality
|
|
7678
|
-
num_reviews: 2
|
|
7679
|
-
`;
|
|
7680
|
-
await fs25.writeFile(path23.join(targetDir, "reviews", "code-quality.yml"), reviewYamlContent);
|
|
7681
|
-
console.log(chalk9.green("Created .gauntlet/reviews/code-quality.yml"));
|
|
7682
|
-
await copyStatusScript(targetDir);
|
|
7683
6795
|
const commands = SKILL_DEFINITIONS.map((skill) => ({
|
|
7684
6796
|
action: skill.action,
|
|
7685
6797
|
content: skill.content,
|
|
7686
6798
|
..."references" in skill ? { references: skill.references } : {},
|
|
7687
6799
|
..."skillsOnly" in skill ? { skillsOnly: skill.skillsOnly } : {}
|
|
7688
6800
|
}));
|
|
6801
|
+
let installedNames;
|
|
7689
6802
|
if (options.yes) {
|
|
7690
|
-
|
|
6803
|
+
installedNames = availableAdapters.map((a) => a.name);
|
|
6804
|
+
const adaptersToInstall = availableAdapters.filter((a) => a.getProjectCommandDir() !== null || a.getProjectSkillDir() !== null);
|
|
7691
6805
|
if (adaptersToInstall.length > 0) {
|
|
7692
6806
|
await installCommands({
|
|
7693
6807
|
level: "project",
|
|
@@ -7697,16 +6811,71 @@ num_reviews: 2
|
|
|
7697
6811
|
});
|
|
7698
6812
|
}
|
|
7699
6813
|
} else {
|
|
7700
|
-
await promptAndInstallCommands({
|
|
6814
|
+
installedNames = await promptAndInstallCommands({
|
|
7701
6815
|
projectRoot,
|
|
7702
6816
|
commands,
|
|
7703
6817
|
availableAdapters
|
|
7704
6818
|
});
|
|
7705
6819
|
}
|
|
7706
|
-
|
|
7707
|
-
|
|
6820
|
+
const cliList = availableAdapters.map((a) => ` - ${a.name}`).join(`
|
|
6821
|
+
`);
|
|
6822
|
+
const adapterSettings = buildAdapterSettingsBlock(availableAdapters);
|
|
6823
|
+
const configContent = `base_branch: ${baseBranch}
|
|
6824
|
+
log_dir: gauntlet_logs
|
|
6825
|
+
|
|
6826
|
+
# Run gates in parallel when possible (default: true)
|
|
6827
|
+
# allow_parallel: true
|
|
6828
|
+
|
|
6829
|
+
cli:
|
|
6830
|
+
default_preference:
|
|
6831
|
+
${cliList}
|
|
6832
|
+
${adapterSettings}
|
|
6833
|
+
# entry_points configured by /gauntlet-setup
|
|
6834
|
+
entry_points: []
|
|
6835
|
+
`;
|
|
6836
|
+
await fs25.writeFile(path23.join(targetDir, "config.yml"), configContent);
|
|
6837
|
+
console.log(chalk9.green("Created .gauntlet/config.yml"));
|
|
6838
|
+
const reviewYamlContent = `builtin: code-quality
|
|
6839
|
+
num_reviews: 1
|
|
6840
|
+
`;
|
|
6841
|
+
await fs25.writeFile(path23.join(targetDir, "reviews", "code-quality.yml"), reviewYamlContent);
|
|
6842
|
+
console.log(chalk9.green("Created .gauntlet/reviews/code-quality.yml"));
|
|
6843
|
+
await copyStatusScript(targetDir);
|
|
6844
|
+
if (installedNames.includes("claude")) {
|
|
6845
|
+
await installStopHook(projectRoot);
|
|
6846
|
+
}
|
|
6847
|
+
if (installedNames.includes("cursor")) {
|
|
6848
|
+
await installCursorStopHook(projectRoot);
|
|
7708
6849
|
}
|
|
6850
|
+
console.log();
|
|
6851
|
+
console.log(chalk9.bold("Run /gauntlet-setup to configure your checks and reviews"));
|
|
6852
|
+
});
|
|
6853
|
+
}
|
|
6854
|
+
async function detectBaseBranch() {
|
|
6855
|
+
try {
|
|
6856
|
+
const { execSync } = await import("node:child_process");
|
|
6857
|
+
const ref = execSync("git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null", { encoding: "utf-8" }).trim();
|
|
6858
|
+
if (ref) {
|
|
6859
|
+
return ref.replace("refs/remotes/", "");
|
|
6860
|
+
}
|
|
6861
|
+
} catch {}
|
|
6862
|
+
return "origin/main";
|
|
6863
|
+
}
|
|
6864
|
+
function buildAdapterSettingsBlock(adapters3) {
|
|
6865
|
+
const items = adapters3.filter((a) => ADAPTER_CONFIG[a.name]);
|
|
6866
|
+
if (items.length === 0)
|
|
6867
|
+
return "";
|
|
6868
|
+
const lines = items.map((a) => {
|
|
6869
|
+
const c = ADAPTER_CONFIG[a.name];
|
|
6870
|
+
return ` ${a.name}:
|
|
6871
|
+
allow_tool_use: ${c?.allow_tool_use}
|
|
6872
|
+
thinking_budget: ${c?.thinking_budget}`;
|
|
7709
6873
|
});
|
|
6874
|
+
return ` # Recommended settings (see docs/eval-results.md)
|
|
6875
|
+
adapters:
|
|
6876
|
+
${lines.join(`
|
|
6877
|
+
`)}
|
|
6878
|
+
`;
|
|
7710
6879
|
}
|
|
7711
6880
|
async function detectAvailableCLIs() {
|
|
7712
6881
|
const allAdapters = getAllAdapters();
|
|
@@ -7722,68 +6891,6 @@ async function detectAvailableCLIs() {
|
|
|
7722
6891
|
}
|
|
7723
6892
|
return available;
|
|
7724
6893
|
}
|
|
7725
|
-
async function promptForConfig(availableAdapters) {
|
|
7726
|
-
const rl = readline.createInterface({
|
|
7727
|
-
input: process.stdin,
|
|
7728
|
-
output: process.stdout
|
|
7729
|
-
});
|
|
7730
|
-
const question = makeQuestion(rl);
|
|
7731
|
-
try {
|
|
7732
|
-
console.log();
|
|
7733
|
-
console.log("Which CLIs would you like to use?");
|
|
7734
|
-
availableAdapters.forEach((adapter, i) => {
|
|
7735
|
-
console.log(` ${i + 1}) ${adapter.name}`);
|
|
7736
|
-
});
|
|
7737
|
-
console.log(` ${availableAdapters.length + 1}) All`);
|
|
7738
|
-
let selectedAdapters = [];
|
|
7739
|
-
let attempts = 0;
|
|
7740
|
-
while (true) {
|
|
7741
|
-
attempts++;
|
|
7742
|
-
if (attempts > MAX_PROMPT_ATTEMPTS)
|
|
7743
|
-
throw new Error("Too many invalid attempts");
|
|
7744
|
-
const answer = await question(`(comma-separated, e.g., 1,2): `);
|
|
7745
|
-
const selections = answer.split(",").map((s) => s.trim()).filter((s) => s);
|
|
7746
|
-
if (selections.length === 0) {
|
|
7747
|
-
selectedAdapters = availableAdapters;
|
|
7748
|
-
break;
|
|
7749
|
-
}
|
|
7750
|
-
const chosen = parseSelections(selections, availableAdapters);
|
|
7751
|
-
if (chosen) {
|
|
7752
|
-
selectedAdapters = chosen;
|
|
7753
|
-
break;
|
|
7754
|
-
}
|
|
7755
|
-
}
|
|
7756
|
-
console.log();
|
|
7757
|
-
const baseBranchInput = await question("Enter your base branch (e.g., origin/main, origin/develop) [default: origin/main]: ");
|
|
7758
|
-
const baseBranch = baseBranchInput || "origin/main";
|
|
7759
|
-
console.log();
|
|
7760
|
-
const sourceDirInput = await question("Enter your source directory (e.g., src, lib, .) [default: .]: ");
|
|
7761
|
-
const sourceDir = sourceDirInput || ".";
|
|
7762
|
-
console.log();
|
|
7763
|
-
const addLint = await question("Would you like to add a linting check? [y/N]: ");
|
|
7764
|
-
let lintCmd = null;
|
|
7765
|
-
if (addLint.toLowerCase().startsWith("y")) {
|
|
7766
|
-
lintCmd = await question("Enter lint command (blank to fill later): ");
|
|
7767
|
-
}
|
|
7768
|
-
console.log();
|
|
7769
|
-
const addTest = await question("Would you like to add a unit test check? [y/N]: ");
|
|
7770
|
-
let testCmd = null;
|
|
7771
|
-
if (addTest.toLowerCase().startsWith("y")) {
|
|
7772
|
-
testCmd = await question("Enter test command (blank to fill later): ");
|
|
7773
|
-
}
|
|
7774
|
-
rl.close();
|
|
7775
|
-
return {
|
|
7776
|
-
baseBranch,
|
|
7777
|
-
sourceDir,
|
|
7778
|
-
lintCmd,
|
|
7779
|
-
testCmd,
|
|
7780
|
-
selectedAdapters
|
|
7781
|
-
};
|
|
7782
|
-
} catch (error) {
|
|
7783
|
-
rl.close();
|
|
7784
|
-
throw error;
|
|
7785
|
-
}
|
|
7786
|
-
}
|
|
7787
6894
|
function parseSelections(selections, adapters3) {
|
|
7788
6895
|
const chosen = [];
|
|
7789
6896
|
for (const sel of selections) {
|
|
@@ -7802,56 +6909,6 @@ function parseSelections(selections, adapters3) {
|
|
|
7802
6909
|
}
|
|
7803
6910
|
return [...new Set(chosen)];
|
|
7804
6911
|
}
|
|
7805
|
-
function buildAdapterSettings(adapters3) {
|
|
7806
|
-
const items = adapters3.filter((a) => ADAPTER_CONFIG[a.name]);
|
|
7807
|
-
if (items.length === 0)
|
|
7808
|
-
return "";
|
|
7809
|
-
const lines = items.map((a) => {
|
|
7810
|
-
const c = ADAPTER_CONFIG[a.name];
|
|
7811
|
-
return ` ${a.name}:
|
|
7812
|
-
allow_tool_use: ${c?.allow_tool_use}
|
|
7813
|
-
thinking_budget: ${c?.thinking_budget}`;
|
|
7814
|
-
});
|
|
7815
|
-
return `
|
|
7816
|
-
# Recommended settings (see docs/eval-results.md)
|
|
7817
|
-
adapters:
|
|
7818
|
-
${lines.join(`
|
|
7819
|
-
`)}
|
|
7820
|
-
`;
|
|
7821
|
-
}
|
|
7822
|
-
function generateConfigYml(config) {
|
|
7823
|
-
const cliList = config.selectedAdapters.map((a) => ` - ${a.name}`).join(`
|
|
7824
|
-
`);
|
|
7825
|
-
const adapterSettings = buildAdapterSettings(config.selectedAdapters);
|
|
7826
|
-
let entryPoints = "";
|
|
7827
|
-
if (config.lintCmd !== null || config.testCmd !== null) {
|
|
7828
|
-
entryPoints += ` - path: "${config.sourceDir}"
|
|
7829
|
-
checks:
|
|
7830
|
-
`;
|
|
7831
|
-
if (config.lintCmd !== null)
|
|
7832
|
-
entryPoints += ` - lint
|
|
7833
|
-
`;
|
|
7834
|
-
if (config.testCmd !== null)
|
|
7835
|
-
entryPoints += ` - unit-tests
|
|
7836
|
-
`;
|
|
7837
|
-
}
|
|
7838
|
-
entryPoints += ` - path: "."
|
|
7839
|
-
reviews:
|
|
7840
|
-
- code-quality`;
|
|
7841
|
-
return `base_branch: ${config.baseBranch}
|
|
7842
|
-
log_dir: gauntlet_logs
|
|
7843
|
-
|
|
7844
|
-
# Run gates in parallel when possible (default: true)
|
|
7845
|
-
# allow_parallel: true
|
|
7846
|
-
|
|
7847
|
-
cli:
|
|
7848
|
-
default_preference:
|
|
7849
|
-
${cliList}
|
|
7850
|
-
${adapterSettings}
|
|
7851
|
-
entry_points:
|
|
7852
|
-
${entryPoints}
|
|
7853
|
-
`;
|
|
7854
|
-
}
|
|
7855
6912
|
async function copyStatusScript(targetDir) {
|
|
7856
6913
|
const statusScriptDir = path23.join(targetDir, "skills", "gauntlet", "status", "scripts");
|
|
7857
6914
|
const statusScriptPath = path23.join(statusScriptDir, "status.ts");
|
|
@@ -7918,7 +6975,7 @@ async function promptAgentSelection(questionFn, installableAdapters) {
|
|
|
7918
6975
|
async function promptAndInstallCommands(options) {
|
|
7919
6976
|
const { projectRoot, commands, availableAdapters } = options;
|
|
7920
6977
|
if (availableAdapters.length === 0)
|
|
7921
|
-
return;
|
|
6978
|
+
return [];
|
|
7922
6979
|
const rl = readline.createInterface({
|
|
7923
6980
|
input: process.stdin,
|
|
7924
6981
|
output: process.stdout
|
|
@@ -7934,13 +6991,13 @@ async function promptAndInstallCommands(options) {
|
|
|
7934
6991
|
console.log(chalk9.dim(`
|
|
7935
6992
|
Skipping command installation.`));
|
|
7936
6993
|
rl.close();
|
|
7937
|
-
return;
|
|
6994
|
+
return [];
|
|
7938
6995
|
}
|
|
7939
6996
|
const installableAdapters = installLevel === "project" ? availableAdapters.filter((a) => a.getProjectCommandDir() !== null || a.getProjectSkillDir() !== null) : availableAdapters.filter((a) => a.getUserCommandDir() !== null || a.getUserSkillDir() !== null);
|
|
7940
6997
|
if (installableAdapters.length === 0) {
|
|
7941
6998
|
console.log(chalk9.yellow(`No available agents support ${installLevel}-level commands.`));
|
|
7942
6999
|
rl.close();
|
|
7943
|
-
return;
|
|
7000
|
+
return [];
|
|
7944
7001
|
}
|
|
7945
7002
|
const selectedAgents = await promptAgentSelection(question, installableAdapters);
|
|
7946
7003
|
rl.close();
|
|
@@ -7950,6 +7007,7 @@ Skipping command installation.`));
|
|
|
7950
7007
|
projectRoot,
|
|
7951
7008
|
commands
|
|
7952
7009
|
});
|
|
7010
|
+
return selectedAgents;
|
|
7953
7011
|
} catch (error) {
|
|
7954
7012
|
rl.close();
|
|
7955
7013
|
throw error;
|
|
@@ -8056,33 +7114,17 @@ var STOP_HOOK_CONFIG = {
|
|
|
8056
7114
|
]
|
|
8057
7115
|
}
|
|
8058
7116
|
};
|
|
8059
|
-
|
|
8060
|
-
|
|
8061
|
-
|
|
8062
|
-
|
|
8063
|
-
|
|
8064
|
-
|
|
8065
|
-
|
|
8066
|
-
|
|
8067
|
-
|
|
8068
|
-
output: process.stdout
|
|
8069
|
-
});
|
|
8070
|
-
const question = makeQuestion(rl);
|
|
8071
|
-
try {
|
|
8072
|
-
console.log();
|
|
8073
|
-
const answer = await question("Install Claude Code stop hook? (y/n): ");
|
|
8074
|
-
const shouldInstall = answer.toLowerCase() === "y" || answer.toLowerCase() === "yes";
|
|
8075
|
-
if (!shouldInstall) {
|
|
8076
|
-
rl.close();
|
|
8077
|
-
return;
|
|
8078
|
-
}
|
|
8079
|
-
rl.close();
|
|
8080
|
-
await installStopHook(projectRoot);
|
|
8081
|
-
} catch (error) {
|
|
8082
|
-
rl.close();
|
|
8083
|
-
throw error;
|
|
7117
|
+
var CURSOR_STOP_HOOK_CONFIG = {
|
|
7118
|
+
version: 1,
|
|
7119
|
+
hooks: {
|
|
7120
|
+
stop: [
|
|
7121
|
+
{
|
|
7122
|
+
command: "agent-gauntlet stop-hook",
|
|
7123
|
+
loop_limit: 10
|
|
7124
|
+
}
|
|
7125
|
+
]
|
|
8084
7126
|
}
|
|
8085
|
-
}
|
|
7127
|
+
};
|
|
8086
7128
|
async function installStopHook(projectRoot) {
|
|
8087
7129
|
const claudeDir = path23.join(projectRoot, ".claude");
|
|
8088
7130
|
const settingsPath = path23.join(claudeDir, "settings.local.json");
|
|
@@ -8115,6 +7157,42 @@ async function installStopHook(projectRoot) {
|
|
|
8115
7157
|
`);
|
|
8116
7158
|
console.log(chalk9.green("Stop hook installed - gauntlet will run automatically when agent stops"));
|
|
8117
7159
|
}
|
|
7160
|
+
async function installCursorStopHook(projectRoot) {
|
|
7161
|
+
const cursorDir = path23.join(projectRoot, ".cursor");
|
|
7162
|
+
const hooksPath = path23.join(cursorDir, "hooks.json");
|
|
7163
|
+
await fs25.mkdir(cursorDir, { recursive: true });
|
|
7164
|
+
let existingConfig = {};
|
|
7165
|
+
if (await exists(hooksPath)) {
|
|
7166
|
+
try {
|
|
7167
|
+
const content = await fs25.readFile(hooksPath, "utf-8");
|
|
7168
|
+
existingConfig = JSON.parse(content);
|
|
7169
|
+
} catch {
|
|
7170
|
+
existingConfig = {};
|
|
7171
|
+
}
|
|
7172
|
+
}
|
|
7173
|
+
const existingHooks = existingConfig.hooks || {};
|
|
7174
|
+
const existingStopHooks = Array.isArray(existingHooks.stop) ? existingHooks.stop : [];
|
|
7175
|
+
const hookExists = existingStopHooks.some((hook) => hook?.command === "agent-gauntlet stop-hook");
|
|
7176
|
+
if (hookExists) {
|
|
7177
|
+
console.log(chalk9.dim("Cursor stop hook already installed"));
|
|
7178
|
+
return;
|
|
7179
|
+
}
|
|
7180
|
+
const newStopHooks = [
|
|
7181
|
+
...existingStopHooks,
|
|
7182
|
+
...CURSOR_STOP_HOOK_CONFIG.hooks.stop
|
|
7183
|
+
];
|
|
7184
|
+
const mergedConfig = {
|
|
7185
|
+
...existingConfig,
|
|
7186
|
+
version: existingConfig.version ?? CURSOR_STOP_HOOK_CONFIG.version,
|
|
7187
|
+
hooks: {
|
|
7188
|
+
...existingHooks,
|
|
7189
|
+
stop: newStopHooks
|
|
7190
|
+
}
|
|
7191
|
+
};
|
|
7192
|
+
await fs25.writeFile(hooksPath, `${JSON.stringify(mergedConfig, null, 2)}
|
|
7193
|
+
`);
|
|
7194
|
+
console.log(chalk9.green("Cursor stop hook installed - gauntlet will run automatically when agent stops"));
|
|
7195
|
+
}
|
|
8118
7196
|
// src/commands/list.ts
|
|
8119
7197
|
import chalk10 from "chalk";
|
|
8120
7198
|
function registerListCommand(program) {
|
|
@@ -9223,4 +8301,4 @@ if (process.argv.length < 3) {
|
|
|
9223
8301
|
}
|
|
9224
8302
|
program.parse(process.argv);
|
|
9225
8303
|
|
|
9226
|
-
//# debugId=
|
|
8304
|
+
//# debugId=5D4AF6110EC21D0564756E2164756E21
|