@sentry/warden 0.13.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents.lock +7 -0
- package/dist/cli/args.d.ts +15 -12
- package/dist/cli/args.d.ts.map +1 -1
- package/dist/cli/args.js +61 -3
- package/dist/cli/args.js.map +1 -1
- package/dist/cli/commands/add.d.ts.map +1 -1
- package/dist/cli/commands/add.js +25 -33
- package/dist/cli/commands/add.js.map +1 -1
- package/dist/cli/commands/init.d.ts +0 -3
- package/dist/cli/commands/init.d.ts.map +1 -1
- package/dist/cli/commands/init.js +206 -19
- package/dist/cli/commands/init.js.map +1 -1
- package/dist/cli/commands/logs.d.ts +19 -0
- package/dist/cli/commands/logs.d.ts.map +1 -0
- package/dist/cli/commands/logs.js +412 -0
- package/dist/cli/commands/logs.js.map +1 -0
- package/dist/cli/commands/setup-app.d.ts.map +1 -1
- package/dist/cli/commands/setup-app.js +19 -15
- package/dist/cli/commands/setup-app.js.map +1 -1
- package/dist/cli/context.d.ts +2 -0
- package/dist/cli/context.d.ts.map +1 -1
- package/dist/cli/context.js +8 -2
- package/dist/cli/context.js.map +1 -1
- package/dist/cli/files.d.ts.map +1 -1
- package/dist/cli/files.js +27 -30
- package/dist/cli/files.js.map +1 -1
- package/dist/cli/git.d.ts +8 -3
- package/dist/cli/git.d.ts.map +1 -1
- package/dist/cli/git.js +24 -13
- package/dist/cli/git.js.map +1 -1
- package/dist/cli/index.js +10 -0
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/input.d.ts +7 -0
- package/dist/cli/input.d.ts.map +1 -1
- package/dist/cli/input.js +13 -2
- package/dist/cli/input.js.map +1 -1
- package/dist/cli/main.d.ts.map +1 -1
- package/dist/cli/main.js +108 -32
- package/dist/cli/main.js.map +1 -1
- package/dist/cli/output/formatters.d.ts +2 -1
- package/dist/cli/output/formatters.d.ts.map +1 -1
- package/dist/cli/output/formatters.js +22 -19
- package/dist/cli/output/formatters.js.map +1 -1
- package/dist/cli/output/index.d.ts +1 -1
- package/dist/cli/output/index.d.ts.map +1 -1
- package/dist/cli/output/index.js +1 -1
- package/dist/cli/output/index.js.map +1 -1
- package/dist/cli/output/ink-runner.js +1 -1
- package/dist/cli/output/ink-runner.js.map +1 -1
- package/dist/cli/output/jsonl.d.ts +49 -13
- package/dist/cli/output/jsonl.d.ts.map +1 -1
- package/dist/cli/output/jsonl.js +137 -4
- package/dist/cli/output/jsonl.js.map +1 -1
- package/dist/cli/output/tasks.d.ts.map +1 -1
- package/dist/cli/output/tasks.js +1 -22
- package/dist/cli/output/tasks.js.map +1 -1
- package/dist/cli/terminal.d.ts.map +1 -1
- package/dist/cli/terminal.js +0 -2
- package/dist/cli/terminal.js.map +1 -1
- package/dist/config/schema.d.ts +49 -98
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/config/schema.js +0 -12
- package/dist/config/schema.js.map +1 -1
- package/dist/config/writer.d.ts.map +1 -1
- package/dist/config/writer.js +18 -0
- package/dist/config/writer.js.map +1 -1
- package/dist/evals/index.js +1 -1
- package/dist/evals/index.js.map +1 -1
- package/dist/evals/runner.d.ts.map +1 -1
- package/dist/evals/runner.js +0 -1
- package/dist/evals/runner.js.map +1 -1
- package/dist/evals/types.d.ts +9 -15
- package/dist/evals/types.d.ts.map +1 -1
- package/dist/output/github-checks.d.ts +1 -1
- package/dist/output/github-checks.d.ts.map +1 -1
- package/dist/output/github-checks.js +2 -6
- package/dist/output/github-checks.js.map +1 -1
- package/dist/output/github-issues.d.ts.map +1 -1
- package/dist/output/github-issues.js +14 -8
- package/dist/output/github-issues.js.map +1 -1
- package/dist/output/issue-renderer.js +1 -1
- package/dist/output/issue-renderer.js.map +1 -1
- package/dist/sdk/analyze.d.ts.map +1 -1
- package/dist/sdk/analyze.js +14 -27
- package/dist/sdk/analyze.js.map +1 -1
- package/dist/sdk/auth.d.ts +16 -0
- package/dist/sdk/auth.d.ts.map +1 -0
- package/dist/sdk/auth.js +37 -0
- package/dist/sdk/auth.js.map +1 -0
- package/dist/sdk/errors.d.ts +8 -1
- package/dist/sdk/errors.d.ts.map +1 -1
- package/dist/sdk/errors.js +22 -2
- package/dist/sdk/errors.js.map +1 -1
- package/dist/sdk/prompt.js +1 -1
- package/dist/sdk/runner.d.ts +2 -1
- package/dist/sdk/runner.d.ts.map +1 -1
- package/dist/sdk/runner.js +3 -1
- package/dist/sdk/runner.js.map +1 -1
- package/dist/sdk/types.d.ts +0 -3
- package/dist/sdk/types.d.ts.map +1 -1
- package/dist/sdk/types.js.map +1 -1
- package/dist/skills/remote.js +1 -1
- package/dist/skills/remote.js.map +1 -1
- package/dist/types/index.d.ts +23 -24
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js +19 -7
- package/dist/types/index.js.map +1 -1
- package/dist/utils/exec.d.ts +4 -1
- package/dist/utils/exec.d.ts.map +1 -1
- package/dist/utils/exec.js +6 -4
- package/dist/utils/exec.js.map +1 -1
- package/package.json +1 -1
- package/skills/warden/SKILL.md +76 -0
- package/skills/warden/references/cli-reference.md +142 -0
- package/skills/warden/references/config-schema.md +111 -0
- package/skills/warden/references/configuration.md +110 -0
- package/skills/warden/references/creating-skills.md +84 -0
- package/skills/warden-sweep/SKILL.md +400 -0
- package/skills/warden-sweep/references/patch-prompt.md +72 -0
- package/skills/warden-sweep/references/verify-prompt.md +25 -0
- package/skills/warden-sweep/scripts/_utils.py +99 -0
- package/skills/warden-sweep/scripts/create_issue.py +189 -0
- package/skills/warden-sweep/scripts/extract_findings.py +219 -0
- package/skills/warden-sweep/scripts/find_reviewers.py +114 -0
- package/skills/warden-sweep/scripts/generate_report.py +266 -0
- package/skills/warden-sweep/scripts/index_prs.py +187 -0
- package/skills/warden-sweep/scripts/organize.py +422 -0
- package/skills/warden-sweep/scripts/scan.py +584 -0
- package/dist/sdk/session.d.ts +0 -43
- package/dist/sdk/session.d.ts.map +0 -1
- package/dist/sdk/session.js +0 -105
- package/dist/sdk/session.js.map +0 -1
|
@@ -0,0 +1,400 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: warden-sweep
|
|
3
|
+
description: Full-repository code sweep. Scans every file with warden, verifies findings via deep tracing, creates draft PRs for validated issues. Use when asked to "sweep the repo", "scan everything", "find all bugs", "full codebase review", "batch code analysis", or run warden across the entire repository.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Warden Sweep
|
|
8
|
+
|
|
9
|
+
Full-repository code sweep: scan every file, verify findings with deep tracing, create draft PRs for validated issues.
|
|
10
|
+
|
|
11
|
+
**Requires**: `warden`, `gh`, `git`, `jq`, `uv`
|
|
12
|
+
|
|
13
|
+
**Important**: Run all scripts from the repository root using `${CLAUDE_SKILL_ROOT}`. Output goes to `.warden/sweeps/<run-id>/`.
|
|
14
|
+
|
|
15
|
+
## Bundled Scripts
|
|
16
|
+
|
|
17
|
+
### `scripts/scan.py`
|
|
18
|
+
|
|
19
|
+
Runs setup and scan in one call: generates run ID, creates sweep dir, checks deps, creates `warden` label, enumerates files, runs warden per file, extracts findings.
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
uv run ${CLAUDE_SKILL_ROOT}/scripts/scan.py [file ...]
|
|
23
|
+
--sweep-dir DIR # Resume into existing sweep dir
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### `scripts/index_prs.py`
|
|
27
|
+
|
|
28
|
+
Fetches open warden-labeled PRs, builds file-to-PR dedup index, caches diffs for overlapping PRs.
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
uv run ${CLAUDE_SKILL_ROOT}/scripts/index_prs.py <sweep-dir>
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### `scripts/create_issue.py`
|
|
35
|
+
|
|
36
|
+
Creates a GitHub tracking issue summarizing sweep results. Run after verification, before patching.
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
uv run ${CLAUDE_SKILL_ROOT}/scripts/create_issue.py <sweep-dir>
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### `scripts/organize.py`
|
|
43
|
+
|
|
44
|
+
Tags security findings, labels security PRs, updates finding reports with PR links, posts final results to tracking issue, generates summary report, finalizes manifest.
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
uv run ${CLAUDE_SKILL_ROOT}/scripts/organize.py <sweep-dir>
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### `scripts/extract_findings.py`
|
|
51
|
+
|
|
52
|
+
Parses warden JSONL log files and extracts normalized findings. Called automatically by `scan.py`.
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
uv run ${CLAUDE_SKILL_ROOT}/scripts/extract_findings.py <log-path-or-directory> -o <output.jsonl>
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### `scripts/generate_report.py`
|
|
59
|
+
|
|
60
|
+
Builds `summary.md` and `report.json` from sweep data. Called automatically by `organize.py`.
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
uv run ${CLAUDE_SKILL_ROOT}/scripts/generate_report.py <sweep-dir>
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### `scripts/find_reviewers.py`
|
|
67
|
+
|
|
68
|
+
Finds top 2 git contributors for a file (last 12 months).
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
uv run ${CLAUDE_SKILL_ROOT}/scripts/find_reviewers.py <file-path>
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Returns JSON: `{"reviewers": ["user1", "user2"]}`
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## Phase 1: Scan
|
|
79
|
+
|
|
80
|
+
**Run** (1 tool call):
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
uv run ${CLAUDE_SKILL_ROOT}/scripts/scan.py
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
To resume a partial scan:
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
uv run ${CLAUDE_SKILL_ROOT}/scripts/scan.py --sweep-dir .warden/sweeps/<run-id>
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Parse the JSON stdout. Save `runId` and `sweepDir` for subsequent phases.
|
|
93
|
+
|
|
94
|
+
**Report** to user:
|
|
95
|
+
|
|
96
|
+
```
|
|
97
|
+
## Scan Complete
|
|
98
|
+
|
|
99
|
+
Scanned **{filesScanned}** files, **{filesTimedOut}** timed out, **{filesErrored}** errors.
|
|
100
|
+
|
|
101
|
+
### Findings ({totalFindings} total)
|
|
102
|
+
|
|
103
|
+
| # | Severity | Skill | File | Title |
|
|
104
|
+
|---|----------|-------|------|-------|
|
|
105
|
+
| 1 | **HIGH** | security-review | `src/db/query.ts:42` | SQL injection in query builder |
|
|
106
|
+
...
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Render every finding from the `findings` array. Bold severity for high and above.
|
|
110
|
+
|
|
111
|
+
**On failure**: If exit code 1, show the error JSON and stop. If exit code 2, show the partial results. List timed-out files separately from errored files so users know which can be retried.
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## Phase 2: Verify
|
|
116
|
+
|
|
117
|
+
Deep-trace each finding using Task subagents to qualify or disqualify.
|
|
118
|
+
|
|
119
|
+
**For each finding in `data/all-findings.jsonl`:**
|
|
120
|
+
|
|
121
|
+
Check if `data/verify/<finding-id>.json` already exists (incrementality). If it does, skip.
|
|
122
|
+
|
|
123
|
+
Launch a Task subagent (`subagent_type: "general-purpose"`) for each finding. Process findings in parallel batches of up to 8 to improve throughput.
|
|
124
|
+
|
|
125
|
+
**Task prompt for each finding:**
|
|
126
|
+
|
|
127
|
+
Read `${CLAUDE_SKILL_ROOT}/references/verify-prompt.md` for the prompt template. Substitute the finding's values into the `${...}` placeholders.
|
|
128
|
+
|
|
129
|
+
**Process results:**
|
|
130
|
+
|
|
131
|
+
Parse the JSON from the subagent response and:
|
|
132
|
+
- Write result to `data/verify/<finding-id>.json`
|
|
133
|
+
- Append to `data/verified.jsonl` or `data/rejected.jsonl`
|
|
134
|
+
- For verified findings, generate `findings/<finding-id>.md`:
|
|
135
|
+
|
|
136
|
+
```markdown
|
|
137
|
+
# ${TITLE}
|
|
138
|
+
|
|
139
|
+
**ID**: ${FINDING_ID} | **Severity**: ${SEVERITY} | **Confidence**: ${CONFIDENCE}
|
|
140
|
+
**Skill**: ${SKILL} | **File**: ${FILE_PATH}:${START_LINE}
|
|
141
|
+
|
|
142
|
+
## Description
|
|
143
|
+
${DESCRIPTION}
|
|
144
|
+
|
|
145
|
+
## Verification
|
|
146
|
+
**Verdict**: Verified (${VERIFICATION_CONFIDENCE})
|
|
147
|
+
**Reasoning**: ${REASONING}
|
|
148
|
+
**Code trace**: ${TRACE_NOTES}
|
|
149
|
+
|
|
150
|
+
## Suggested Fix
|
|
151
|
+
${FIX_DESCRIPTION}
|
|
152
|
+
```diff
|
|
153
|
+
${FIX_DIFF}
|
|
154
|
+
```
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
Update manifest: set `phases.verify` to `"complete"`.
|
|
158
|
+
|
|
159
|
+
**Report** to user after all verifications:
|
|
160
|
+
|
|
161
|
+
```
|
|
162
|
+
## Verification Complete
|
|
163
|
+
|
|
164
|
+
**{verified}** verified, **{rejected}** rejected.
|
|
165
|
+
|
|
166
|
+
### Verified Findings
|
|
167
|
+
|
|
168
|
+
| # | Severity | Confidence | File | Title | Reasoning |
|
|
169
|
+
|---|----------|------------|------|-------|-----------|
|
|
170
|
+
| 1 | **HIGH** | high | `src/db/query.ts:42` | SQL injection in query builder | User input flows directly into... |
|
|
171
|
+
...
|
|
172
|
+
|
|
173
|
+
### Rejected ({rejected_count})
|
|
174
|
+
|
|
175
|
+
- `{findingId}` {file}: {reasoning}
|
|
176
|
+
...
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
---
|
|
180
|
+
|
|
181
|
+
## Phase 3: Issue
|
|
182
|
+
|
|
183
|
+
Create a tracking issue that ties all PRs together and gives reviewers a single overview.
|
|
184
|
+
|
|
185
|
+
**Run** (1 tool call):
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
uv run ${CLAUDE_SKILL_ROOT}/scripts/create_issue.py ${SWEEP_DIR}
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
Parse the JSON stdout. Save `issueUrl` and `issueNumber` for Phase 4.
|
|
192
|
+
|
|
193
|
+
**Report** to user:
|
|
194
|
+
|
|
195
|
+
```
|
|
196
|
+
## Tracking Issue Created
|
|
197
|
+
|
|
198
|
+
{issueUrl}
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
**On failure**: Show the error. Continue to Phase 4 (PRs can still be created without a tracking issue).
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
## Phase 4: Patch
|
|
206
|
+
|
|
207
|
+
For each verified finding, create a worktree, fix the code, and open a draft PR. Process findings **sequentially** (one at a time) since parallel subagents cross-contaminate worktrees.
|
|
208
|
+
|
|
209
|
+
**Severity triage**: Patch HIGH and above. For MEDIUM, only patch findings from bug-detection skills (e.g., `code-review`, `security-review`). Skip LOW and INFO findings.
|
|
210
|
+
|
|
211
|
+
**Step 0: Setup** (run once before the loop):
|
|
212
|
+
|
|
213
|
+
```bash
|
|
214
|
+
uv run ${CLAUDE_SKILL_ROOT}/scripts/index_prs.py ${SWEEP_DIR}
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
Parse the JSON stdout. Use `fileIndex` for dedup checks.
|
|
218
|
+
|
|
219
|
+
Determine the default branch and fetch latest so worktrees branch from current upstream:
|
|
220
|
+
|
|
221
|
+
```bash
|
|
222
|
+
DEFAULT_BRANCH=$(gh repo view --json defaultBranchRef --jq '.defaultBranchRef.name')
|
|
223
|
+
git fetch origin "${DEFAULT_BRANCH}"
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
**For each finding in `data/verified.jsonl`:**
|
|
227
|
+
|
|
228
|
+
Check if finding ID already exists in `data/patches.jsonl` (incrementality). If it does, skip.
|
|
229
|
+
|
|
230
|
+
**Dedup check**: Use the file index from `index_prs.py` output to determine if an existing open PR already addresses the same issue.
|
|
231
|
+
|
|
232
|
+
1. **File match**: Look up the finding's file path in the `fileIndex`. If no PR touches that file, no conflict; proceed to Step 1.
|
|
233
|
+
2. **Chunk overlap**: If a PR does touch the same file, read its cached diff from `data/pr-diffs/<number>.diff` and check whether the PR's changed hunks overlap with the finding's line range (startLine-endLine). Overlapping or adjacent hunks (within ~10 lines) indicate the same code region.
|
|
234
|
+
3. **Same concern**: If the hunks overlap, compare the PR title and the finding title/description. Are they fixing the same kind of defect? A PR fixing an off-by-one error and a finding about a null check in the same function are different issues; both should proceed.
|
|
235
|
+
|
|
236
|
+
Skip the finding only when there is both chunk overlap AND the PR addresses the same concern. Record it in `data/patches.jsonl` with `"status": "existing"` and `"prUrl"` pointing to the matching PR, then continue to the next finding.
|
|
237
|
+
|
|
238
|
+
**Step 1: Create worktree**
|
|
239
|
+
|
|
240
|
+
```bash
|
|
241
|
+
BRANCH="warden-sweep/${RUN_ID}/${FINDING_ID}"
|
|
242
|
+
WORKTREE="${SWEEP_DIR}/worktrees/${FINDING_ID}"
|
|
243
|
+
git worktree add "${WORKTREE}" -b "${BRANCH}" "origin/${DEFAULT_BRANCH}"
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
Each finding branches from the repo's default branch so PRs contain only the fix commit.
|
|
247
|
+
|
|
248
|
+
**Step 2: Generate fix**
|
|
249
|
+
|
|
250
|
+
Launch a Task subagent (`subagent_type: "general-purpose"`) to apply the fix in the worktree. Read `${CLAUDE_SKILL_ROOT}/references/patch-prompt.md` for the prompt template. Substitute the finding's values and worktree path into the `${...}` placeholders.
|
|
251
|
+
|
|
252
|
+
**Step 2b: Handle skipped findings**
|
|
253
|
+
|
|
254
|
+
If the subagent returned `"status": "skipped"` (not `"applied"`), do NOT proceed to Steps 3-4. Instead:
|
|
255
|
+
1. Record the finding in `data/patches.jsonl` with `"status": "error"` and `"error": "Subagent skipped: ${skipReason}"`
|
|
256
|
+
2. Clean up the worktree
|
|
257
|
+
3. Continue to the next finding
|
|
258
|
+
|
|
259
|
+
**Step 3: Find reviewers**
|
|
260
|
+
|
|
261
|
+
```bash
|
|
262
|
+
uv run ${CLAUDE_SKILL_ROOT}/scripts/find_reviewers.py "${FILE_PATH}"
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
**Step 4: Create draft PR**
|
|
266
|
+
|
|
267
|
+
```bash
|
|
268
|
+
cd "${WORKTREE}" && git push -u origin HEAD:"${BRANCH}"
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
Create the PR with a 1-2 sentence "What" summary based on the finding and fix, followed by the finding description and verification reasoning:
|
|
272
|
+
|
|
273
|
+
```bash
|
|
274
|
+
REVIEWERS=""
|
|
275
|
+
# If find_reviewers.py returned reviewers, build the flags
|
|
276
|
+
# e.g., REVIEWERS="--reviewer user1 --reviewer user2"
|
|
277
|
+
|
|
278
|
+
gh pr create --draft \
|
|
279
|
+
--label "warden" \
|
|
280
|
+
--title "fix: ${TITLE}" \
|
|
281
|
+
--body "$(cat <<'EOF'
|
|
282
|
+
${FIX_WHAT_DESCRIPTION}
|
|
283
|
+
|
|
284
|
+
${DESCRIPTION}
|
|
285
|
+
|
|
286
|
+
${REASONING}
|
|
287
|
+
|
|
288
|
+
Automated fix for Warden finding ${FINDING_ID} (${SEVERITY}, detected by ${SKILL}).
|
|
289
|
+
|
|
290
|
+
<!-- Only include the next line if Phase 3 succeeded and ISSUE_NUMBER is available -->
|
|
291
|
+
Ref #${ISSUE_NUMBER}
|
|
292
|
+
|
|
293
|
+
> This PR was auto-generated by a Warden Sweep (run ${RUN_ID}).
|
|
294
|
+
> The finding has been validated through automated deep tracing,
|
|
295
|
+
> but human confirmation is requested as this is batch work.
|
|
296
|
+
EOF
|
|
297
|
+
)" ${REVIEWERS}
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
Save the PR URL.
|
|
301
|
+
|
|
302
|
+
**Step 5: Record and cleanup**
|
|
303
|
+
|
|
304
|
+
Append to `data/patches.jsonl` (use `"created"` as status for successful PRs, not the subagent's `"applied"`):
|
|
305
|
+
```json
|
|
306
|
+
{"findingId": "...", "prUrl": "https://...", "branch": "...", "reviewers": ["user1", "user2"], "filesChanged": ["..."], "status": "created|existing|error"}
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
Remove the worktree:
|
|
310
|
+
```bash
|
|
311
|
+
cd "$(git rev-parse --show-toplevel)"
|
|
312
|
+
git worktree remove "${WORKTREE}" --force
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
**Error handling**: On failure at any step, write to `data/patches.jsonl` with `"status": "error"` and `"error": "..."`, clean up the worktree, and continue to the next finding.
|
|
316
|
+
|
|
317
|
+
Update manifest: set `phases.patch` to `"complete"`.
|
|
318
|
+
|
|
319
|
+
**Report** to user after all patches:
|
|
320
|
+
|
|
321
|
+
```
|
|
322
|
+
## PRs Created
|
|
323
|
+
|
|
324
|
+
**{created}** created, **{skipped}** skipped (existing), **{failed}** failed.
|
|
325
|
+
|
|
326
|
+
| # | Finding | PR | Status |
|
|
327
|
+
|---|---------|-----|--------|
|
|
328
|
+
| 1 | `security-review-a1b2c3d4` SQL injection in query builder | #142 | created |
|
|
329
|
+
| 2 | `code-review-e5f6g7h8` Null pointer in handler | - | existing (#138) |
|
|
330
|
+
...
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
---
|
|
334
|
+
|
|
335
|
+
## Phase 5: Organize
|
|
336
|
+
|
|
337
|
+
**Run** (1 tool call):
|
|
338
|
+
|
|
339
|
+
```bash
|
|
340
|
+
uv run ${CLAUDE_SKILL_ROOT}/scripts/organize.py ${SWEEP_DIR}
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
Parse the JSON stdout.
|
|
344
|
+
|
|
345
|
+
**Report** to user:
|
|
346
|
+
|
|
347
|
+
```
|
|
348
|
+
## Sweep Complete
|
|
349
|
+
|
|
350
|
+
| Metric | Count |
|
|
351
|
+
|--------|-------|
|
|
352
|
+
| Files scanned | {filesScanned} |
|
|
353
|
+
| Findings verified | {verified} |
|
|
354
|
+
| PRs created | {prsCreated} |
|
|
355
|
+
| Security findings | {securityFindings} |
|
|
356
|
+
|
|
357
|
+
Full report: `{summaryPath}`
|
|
358
|
+
```
|
|
359
|
+
|
|
360
|
+
**On failure**: Show the error and note which steps completed.
|
|
361
|
+
|
|
362
|
+
---
|
|
363
|
+
|
|
364
|
+
## Resuming a Sweep
|
|
365
|
+
|
|
366
|
+
Each phase is incremental. To resume from where you left off:
|
|
367
|
+
|
|
368
|
+
1. Check `data/manifest.json` to see which phases are complete
|
|
369
|
+
2. For scan: pass `--sweep-dir` to `scan.py`
|
|
370
|
+
3. For verify: existing `data/verify/<id>.json` files are skipped
|
|
371
|
+
4. For issue: `create_issue.py` is idempotent (skips if `issueUrl` in manifest)
|
|
372
|
+
5. For patch: existing entries in `data/patches.jsonl` are skipped
|
|
373
|
+
6. For organize: safe to re-run (idempotent)
|
|
374
|
+
|
|
375
|
+
## Output Directory Structure
|
|
376
|
+
|
|
377
|
+
```
|
|
378
|
+
.warden/sweeps/<run-id>/
|
|
379
|
+
summary.md # Stats, key findings, PR links
|
|
380
|
+
findings/ # One markdown per verified finding
|
|
381
|
+
<finding-id>.md
|
|
382
|
+
security/ # Security-specific view
|
|
383
|
+
index.jsonl # Security findings index
|
|
384
|
+
<finding-id>.md # Copies of security findings
|
|
385
|
+
data/ # Structured data for tooling
|
|
386
|
+
manifest.json # Run metadata, phase state
|
|
387
|
+
scan-index.jsonl # Per-file scan tracking
|
|
388
|
+
all-findings.jsonl # Every finding from scan
|
|
389
|
+
verified.jsonl # Findings that passed verification
|
|
390
|
+
rejected.jsonl # Findings that failed verification
|
|
391
|
+
patches.jsonl # Finding -> PR URL -> reviewers
|
|
392
|
+
existing-prs.json # Cached open warden PRs
|
|
393
|
+
report.json # Machine-readable summary
|
|
394
|
+
verify/ # Individual verification results
|
|
395
|
+
<finding-id>.json
|
|
396
|
+
logs/ # Warden JSONL logs per file
|
|
397
|
+
<hash>.jsonl
|
|
398
|
+
pr-diffs/ # Cached PR diffs for dedup
|
|
399
|
+
<number>.diff
|
|
400
|
+
```
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
Fix a verified code issue. You are working in a git worktree at: ${WORKTREE}
|
|
2
|
+
|
|
3
|
+
## Finding
|
|
4
|
+
- Title: ${TITLE}
|
|
5
|
+
- File: ${FILE_PATH}:${START_LINE}
|
|
6
|
+
- Description: ${DESCRIPTION}
|
|
7
|
+
- Verification: ${REASONING}
|
|
8
|
+
- Suggested Fix: ${FIX_DESCRIPTION}
|
|
9
|
+
```diff
|
|
10
|
+
${FIX_DIFF}
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Instructions
|
|
14
|
+
|
|
15
|
+
### Step 1: Understand the code
|
|
16
|
+
Read the file at ${WORKTREE}/${FILE_PATH}. Read at least 50 lines above and below the reported location. Trace callers and callees of the affected code using Grep/Glob to understand how it is used. Do NOT skip this step.
|
|
17
|
+
|
|
18
|
+
### Step 2: Apply a minimal fix
|
|
19
|
+
Apply the smallest change that addresses the finding. If the suggested diff doesn't apply cleanly, adapt it while preserving intent. Do NOT refactor surrounding code, rename variables, add comments, or make any change beyond what the finding requires.
|
|
20
|
+
|
|
21
|
+
### Step 3: Write tests
|
|
22
|
+
Write or update tests that verify the fix:
|
|
23
|
+
- Follow existing test patterns (co-located files, same framework)
|
|
24
|
+
- At minimum, write a test that would have caught the original bug
|
|
25
|
+
- Test the specific edge case, not just the happy path
|
|
26
|
+
|
|
27
|
+
Only modify the fix target and its test file.
|
|
28
|
+
|
|
29
|
+
### Step 4: Self-review
|
|
30
|
+
Before staging, run `git diff` in the worktree and review every changed line. Verify:
|
|
31
|
+
1. The change addresses the specific finding described, not something else
|
|
32
|
+
2. No unrelated code was modified (no drive-by cleanups, no formatting changes)
|
|
33
|
+
3. Trace through changed code paths: does the fix introduce any new bug, null reference, type error, or broken import?
|
|
34
|
+
4. Tests exercise the fix (the failure case), not just that the code runs
|
|
35
|
+
|
|
36
|
+
If ANY check fails, fix the problem before proceeding. If the suggested fix is wrong or would introduce a regression you cannot resolve, do NOT commit. Instead, skip to the output step and report why.
|
|
37
|
+
|
|
38
|
+
### Step 5: Commit
|
|
39
|
+
Do NOT run tests locally. CI will validate the changes.
|
|
40
|
+
|
|
41
|
+
Stage and commit with this exact message:
|
|
42
|
+
|
|
43
|
+
fix: ${TITLE}
|
|
44
|
+
|
|
45
|
+
Warden finding ${FINDING_ID}
|
|
46
|
+
Severity: ${SEVERITY}
|
|
47
|
+
|
|
48
|
+
Co-Authored-By: Warden <noreply@getsentry.com>
|
|
49
|
+
|
|
50
|
+
### Step 6: Output
|
|
51
|
+
Return ONLY valid JSON (no surrounding text). Use `"status": "applied"` if you committed a fix, or `"status": "skipped"` if you did not.
|
|
52
|
+
|
|
53
|
+
```json
|
|
54
|
+
{
|
|
55
|
+
"status": "applied",
|
|
56
|
+
"filesChanged": ["src/example.ts"],
|
|
57
|
+
"testFilesChanged": ["src/example.test.ts"],
|
|
58
|
+
"selfReview": "Verified the fix addresses the null check and test covers the failure case",
|
|
59
|
+
"skipReason": null
|
|
60
|
+
}
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
When skipping:
|
|
64
|
+
```json
|
|
65
|
+
{
|
|
66
|
+
"status": "skipped",
|
|
67
|
+
"filesChanged": [],
|
|
68
|
+
"testFilesChanged": [],
|
|
69
|
+
"selfReview": null,
|
|
70
|
+
"skipReason": "The suggested fix would introduce a regression in the error handling path"
|
|
71
|
+
}
|
|
72
|
+
```
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
Verify a code analysis finding. Determine if this is a TRUE issue or a FALSE POSITIVE.
|
|
2
|
+
Do NOT write or edit any files. Research only.
|
|
3
|
+
|
|
4
|
+
## Finding
|
|
5
|
+
- Title: ${TITLE}
|
|
6
|
+
- Severity: ${SEVERITY} | Confidence: ${CONFIDENCE}
|
|
7
|
+
- Skill: ${SKILL}
|
|
8
|
+
- Location: ${FILE_PATH}:${START_LINE}-${END_LINE}
|
|
9
|
+
- Description: ${DESCRIPTION}
|
|
10
|
+
- Verification hint: ${VERIFICATION}
|
|
11
|
+
|
|
12
|
+
## Instructions
|
|
13
|
+
1. Read the file at the reported location. Examine at least 50 lines of surrounding context.
|
|
14
|
+
2. Trace data flow to/from the flagged code using Grep/Glob.
|
|
15
|
+
3. Check if the issue is mitigated elsewhere (guards, validation, try/catch upstream).
|
|
16
|
+
4. Check if the issue is actually reachable in practice.
|
|
17
|
+
|
|
18
|
+
Return your verdict as JSON:
|
|
19
|
+
{
|
|
20
|
+
"findingId": "${FINDING_ID}",
|
|
21
|
+
"verdict": "verified" or "rejected",
|
|
22
|
+
"confidence": "high" or "medium" or "low",
|
|
23
|
+
"reasoning": "2-3 sentence explanation",
|
|
24
|
+
"traceNotes": "What code paths you examined"
|
|
25
|
+
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Shared utilities for warden-sweep scripts."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import subprocess
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def run_cmd(
|
|
11
|
+
args: list[str], timeout: int = 30, cwd: str | None = None
|
|
12
|
+
) -> subprocess.CompletedProcess[str]:
|
|
13
|
+
"""Run a command and return the result."""
|
|
14
|
+
return subprocess.run(
|
|
15
|
+
args,
|
|
16
|
+
capture_output=True,
|
|
17
|
+
text=True,
|
|
18
|
+
timeout=timeout,
|
|
19
|
+
cwd=cwd,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def run_cmd_stdout(
|
|
24
|
+
args: list[str], timeout: int = 30, cwd: str | None = None
|
|
25
|
+
) -> str | None:
|
|
26
|
+
"""Run a command and return stripped stdout, or None on failure."""
|
|
27
|
+
try:
|
|
28
|
+
result = run_cmd(args, timeout=timeout, cwd=cwd)
|
|
29
|
+
return result.stdout.strip() if result.returncode == 0 else None
|
|
30
|
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
31
|
+
return None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def read_json(path: str) -> dict[str, Any] | None:
|
|
35
|
+
"""Read a JSON file and return parsed object, or None on failure."""
|
|
36
|
+
if not os.path.exists(path):
|
|
37
|
+
return None
|
|
38
|
+
try:
|
|
39
|
+
with open(path) as f:
|
|
40
|
+
return json.load(f)
|
|
41
|
+
except (json.JSONDecodeError, OSError):
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def write_json(path: str, data: dict[str, Any]) -> None:
|
|
46
|
+
"""Write a dict to a JSON file with trailing newline."""
|
|
47
|
+
with open(path, "w") as f:
|
|
48
|
+
json.dump(data, f, indent=2)
|
|
49
|
+
f.write("\n")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def read_jsonl(path: str) -> list[dict[str, Any]]:
|
|
53
|
+
"""Read a JSONL file and return list of parsed objects."""
|
|
54
|
+
entries: list[dict[str, Any]] = []
|
|
55
|
+
if not os.path.exists(path):
|
|
56
|
+
return entries
|
|
57
|
+
with open(path) as f:
|
|
58
|
+
for line in f:
|
|
59
|
+
line = line.strip()
|
|
60
|
+
if not line:
|
|
61
|
+
continue
|
|
62
|
+
try:
|
|
63
|
+
entries.append(json.loads(line))
|
|
64
|
+
except json.JSONDecodeError:
|
|
65
|
+
continue
|
|
66
|
+
return entries
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def severity_badge(severity: str) -> str:
|
|
70
|
+
"""Return a markdown-friendly severity indicator."""
|
|
71
|
+
badges = {
|
|
72
|
+
"critical": "**CRITICAL**",
|
|
73
|
+
"high": "**HIGH**",
|
|
74
|
+
"medium": "MEDIUM",
|
|
75
|
+
"low": "LOW",
|
|
76
|
+
"info": "info",
|
|
77
|
+
}
|
|
78
|
+
return badges.get(severity, severity)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def pr_number_from_url(pr_url: str) -> str:
|
|
82
|
+
"""Extract the PR or issue number from a GitHub URL's last path segment."""
|
|
83
|
+
return pr_url.rstrip("/").split("/")[-1]
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def ensure_github_label(name: str, color: str, description: str) -> None:
|
|
87
|
+
"""Create a GitHub label if it doesn't exist (idempotent)."""
|
|
88
|
+
try:
|
|
89
|
+
subprocess.run(
|
|
90
|
+
[
|
|
91
|
+
"gh", "label", "create", name,
|
|
92
|
+
"--color", color,
|
|
93
|
+
"--description", description,
|
|
94
|
+
],
|
|
95
|
+
capture_output=True,
|
|
96
|
+
timeout=15,
|
|
97
|
+
)
|
|
98
|
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
99
|
+
pass
|