@sentry/warden 0.13.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. package/agents.lock +7 -0
  2. package/dist/cli/args.d.ts +15 -12
  3. package/dist/cli/args.d.ts.map +1 -1
  4. package/dist/cli/args.js +61 -3
  5. package/dist/cli/args.js.map +1 -1
  6. package/dist/cli/commands/add.d.ts.map +1 -1
  7. package/dist/cli/commands/add.js +25 -33
  8. package/dist/cli/commands/add.js.map +1 -1
  9. package/dist/cli/commands/init.d.ts +0 -3
  10. package/dist/cli/commands/init.d.ts.map +1 -1
  11. package/dist/cli/commands/init.js +206 -19
  12. package/dist/cli/commands/init.js.map +1 -1
  13. package/dist/cli/commands/logs.d.ts +19 -0
  14. package/dist/cli/commands/logs.d.ts.map +1 -0
  15. package/dist/cli/commands/logs.js +412 -0
  16. package/dist/cli/commands/logs.js.map +1 -0
  17. package/dist/cli/commands/setup-app.d.ts.map +1 -1
  18. package/dist/cli/commands/setup-app.js +19 -15
  19. package/dist/cli/commands/setup-app.js.map +1 -1
  20. package/dist/cli/context.d.ts +2 -0
  21. package/dist/cli/context.d.ts.map +1 -1
  22. package/dist/cli/context.js +8 -2
  23. package/dist/cli/context.js.map +1 -1
  24. package/dist/cli/files.d.ts.map +1 -1
  25. package/dist/cli/files.js +27 -30
  26. package/dist/cli/files.js.map +1 -1
  27. package/dist/cli/git.d.ts +8 -3
  28. package/dist/cli/git.d.ts.map +1 -1
  29. package/dist/cli/git.js +24 -13
  30. package/dist/cli/git.js.map +1 -1
  31. package/dist/cli/index.js +10 -0
  32. package/dist/cli/index.js.map +1 -1
  33. package/dist/cli/input.d.ts +7 -0
  34. package/dist/cli/input.d.ts.map +1 -1
  35. package/dist/cli/input.js +13 -2
  36. package/dist/cli/input.js.map +1 -1
  37. package/dist/cli/main.d.ts.map +1 -1
  38. package/dist/cli/main.js +108 -32
  39. package/dist/cli/main.js.map +1 -1
  40. package/dist/cli/output/formatters.d.ts +2 -1
  41. package/dist/cli/output/formatters.d.ts.map +1 -1
  42. package/dist/cli/output/formatters.js +22 -19
  43. package/dist/cli/output/formatters.js.map +1 -1
  44. package/dist/cli/output/index.d.ts +1 -1
  45. package/dist/cli/output/index.d.ts.map +1 -1
  46. package/dist/cli/output/index.js +1 -1
  47. package/dist/cli/output/index.js.map +1 -1
  48. package/dist/cli/output/ink-runner.js +1 -1
  49. package/dist/cli/output/ink-runner.js.map +1 -1
  50. package/dist/cli/output/jsonl.d.ts +49 -13
  51. package/dist/cli/output/jsonl.d.ts.map +1 -1
  52. package/dist/cli/output/jsonl.js +137 -4
  53. package/dist/cli/output/jsonl.js.map +1 -1
  54. package/dist/cli/output/tasks.d.ts.map +1 -1
  55. package/dist/cli/output/tasks.js +1 -22
  56. package/dist/cli/output/tasks.js.map +1 -1
  57. package/dist/cli/terminal.d.ts.map +1 -1
  58. package/dist/cli/terminal.js +0 -2
  59. package/dist/cli/terminal.js.map +1 -1
  60. package/dist/config/schema.d.ts +49 -98
  61. package/dist/config/schema.d.ts.map +1 -1
  62. package/dist/config/schema.js +0 -12
  63. package/dist/config/schema.js.map +1 -1
  64. package/dist/config/writer.d.ts.map +1 -1
  65. package/dist/config/writer.js +18 -0
  66. package/dist/config/writer.js.map +1 -1
  67. package/dist/evals/index.js +1 -1
  68. package/dist/evals/index.js.map +1 -1
  69. package/dist/evals/runner.d.ts.map +1 -1
  70. package/dist/evals/runner.js +0 -1
  71. package/dist/evals/runner.js.map +1 -1
  72. package/dist/evals/types.d.ts +9 -15
  73. package/dist/evals/types.d.ts.map +1 -1
  74. package/dist/output/github-checks.d.ts +1 -1
  75. package/dist/output/github-checks.d.ts.map +1 -1
  76. package/dist/output/github-checks.js +2 -6
  77. package/dist/output/github-checks.js.map +1 -1
  78. package/dist/output/github-issues.d.ts.map +1 -1
  79. package/dist/output/github-issues.js +14 -8
  80. package/dist/output/github-issues.js.map +1 -1
  81. package/dist/output/issue-renderer.js +1 -1
  82. package/dist/output/issue-renderer.js.map +1 -1
  83. package/dist/sdk/analyze.d.ts.map +1 -1
  84. package/dist/sdk/analyze.js +14 -27
  85. package/dist/sdk/analyze.js.map +1 -1
  86. package/dist/sdk/auth.d.ts +16 -0
  87. package/dist/sdk/auth.d.ts.map +1 -0
  88. package/dist/sdk/auth.js +37 -0
  89. package/dist/sdk/auth.js.map +1 -0
  90. package/dist/sdk/errors.d.ts +8 -1
  91. package/dist/sdk/errors.d.ts.map +1 -1
  92. package/dist/sdk/errors.js +22 -2
  93. package/dist/sdk/errors.js.map +1 -1
  94. package/dist/sdk/prompt.js +1 -1
  95. package/dist/sdk/runner.d.ts +2 -1
  96. package/dist/sdk/runner.d.ts.map +1 -1
  97. package/dist/sdk/runner.js +3 -1
  98. package/dist/sdk/runner.js.map +1 -1
  99. package/dist/sdk/types.d.ts +0 -3
  100. package/dist/sdk/types.d.ts.map +1 -1
  101. package/dist/sdk/types.js.map +1 -1
  102. package/dist/skills/remote.js +1 -1
  103. package/dist/skills/remote.js.map +1 -1
  104. package/dist/types/index.d.ts +23 -24
  105. package/dist/types/index.d.ts.map +1 -1
  106. package/dist/types/index.js +19 -7
  107. package/dist/types/index.js.map +1 -1
  108. package/dist/utils/exec.d.ts +4 -1
  109. package/dist/utils/exec.d.ts.map +1 -1
  110. package/dist/utils/exec.js +6 -4
  111. package/dist/utils/exec.js.map +1 -1
  112. package/package.json +1 -1
  113. package/skills/warden/SKILL.md +76 -0
  114. package/skills/warden/references/cli-reference.md +142 -0
  115. package/skills/warden/references/config-schema.md +111 -0
  116. package/skills/warden/references/configuration.md +110 -0
  117. package/skills/warden/references/creating-skills.md +84 -0
  118. package/skills/warden-sweep/SKILL.md +400 -0
  119. package/skills/warden-sweep/references/patch-prompt.md +72 -0
  120. package/skills/warden-sweep/references/verify-prompt.md +25 -0
  121. package/skills/warden-sweep/scripts/_utils.py +99 -0
  122. package/skills/warden-sweep/scripts/create_issue.py +189 -0
  123. package/skills/warden-sweep/scripts/extract_findings.py +219 -0
  124. package/skills/warden-sweep/scripts/find_reviewers.py +114 -0
  125. package/skills/warden-sweep/scripts/generate_report.py +266 -0
  126. package/skills/warden-sweep/scripts/index_prs.py +187 -0
  127. package/skills/warden-sweep/scripts/organize.py +422 -0
  128. package/skills/warden-sweep/scripts/scan.py +584 -0
  129. package/dist/sdk/session.d.ts +0 -43
  130. package/dist/sdk/session.d.ts.map +0 -1
  131. package/dist/sdk/session.js +0 -105
  132. package/dist/sdk/session.js.map +0 -1
@@ -0,0 +1,400 @@
1
+ ---
2
+ name: warden-sweep
3
+ description: Full-repository code sweep. Scans every file with warden, verifies findings via deep tracing, creates draft PRs for validated issues. Use when asked to "sweep the repo", "scan everything", "find all bugs", "full codebase review", "batch code analysis", or run warden across the entire repository.
4
+ disable-model-invocation: true
5
+ ---
6
+
7
+ # Warden Sweep
8
+
9
+ Full-repository code sweep: scan every file, verify findings with deep tracing, create draft PRs for validated issues.
10
+
11
+ **Requires**: `warden`, `gh`, `git`, `jq`, `uv`
12
+
13
+ **Important**: Run all scripts from the repository root using `${CLAUDE_SKILL_ROOT}`. Output goes to `.warden/sweeps/<run-id>/`.
14
+
15
+ ## Bundled Scripts
16
+
17
+ ### `scripts/scan.py`
18
+
19
+ Runs setup and scan in one call: generates run ID, creates sweep dir, checks deps, creates `warden` label, enumerates files, runs warden per file, extracts findings.
20
+
21
+ ```bash
22
+ uv run ${CLAUDE_SKILL_ROOT}/scripts/scan.py [file ...]
23
+ --sweep-dir DIR # Resume into existing sweep dir
24
+ ```
25
+
26
+ ### `scripts/index_prs.py`
27
+
28
+ Fetches open warden-labeled PRs, builds file-to-PR dedup index, caches diffs for overlapping PRs.
29
+
30
+ ```bash
31
+ uv run ${CLAUDE_SKILL_ROOT}/scripts/index_prs.py <sweep-dir>
32
+ ```
33
+
34
+ ### `scripts/create_issue.py`
35
+
36
+ Creates a GitHub tracking issue summarizing sweep results. Run after verification, before patching.
37
+
38
+ ```bash
39
+ uv run ${CLAUDE_SKILL_ROOT}/scripts/create_issue.py <sweep-dir>
40
+ ```
41
+
42
+ ### `scripts/organize.py`
43
+
44
+ Tags security findings, labels security PRs, updates finding reports with PR links, posts final results to tracking issue, generates summary report, finalizes manifest.
45
+
46
+ ```bash
47
+ uv run ${CLAUDE_SKILL_ROOT}/scripts/organize.py <sweep-dir>
48
+ ```
49
+
50
+ ### `scripts/extract_findings.py`
51
+
52
+ Parses warden JSONL log files and extracts normalized findings. Called automatically by `scan.py`.
53
+
54
+ ```bash
55
+ uv run ${CLAUDE_SKILL_ROOT}/scripts/extract_findings.py <log-path-or-directory> -o <output.jsonl>
56
+ ```
57
+
58
+ ### `scripts/generate_report.py`
59
+
60
+ Builds `summary.md` and `report.json` from sweep data. Called automatically by `organize.py`.
61
+
62
+ ```bash
63
+ uv run ${CLAUDE_SKILL_ROOT}/scripts/generate_report.py <sweep-dir>
64
+ ```
65
+
66
+ ### `scripts/find_reviewers.py`
67
+
68
+ Finds top 2 git contributors for a file (last 12 months).
69
+
70
+ ```bash
71
+ uv run ${CLAUDE_SKILL_ROOT}/scripts/find_reviewers.py <file-path>
72
+ ```
73
+
74
+ Returns JSON: `{"reviewers": ["user1", "user2"]}`
75
+
76
+ ---
77
+
78
+ ## Phase 1: Scan
79
+
80
+ **Run** (1 tool call):
81
+
82
+ ```bash
83
+ uv run ${CLAUDE_SKILL_ROOT}/scripts/scan.py
84
+ ```
85
+
86
+ To resume a partial scan:
87
+
88
+ ```bash
89
+ uv run ${CLAUDE_SKILL_ROOT}/scripts/scan.py --sweep-dir .warden/sweeps/<run-id>
90
+ ```
91
+
92
+ Parse the JSON stdout. Save `runId` and `sweepDir` for subsequent phases.
93
+
94
+ **Report** to user:
95
+
96
+ ```
97
+ ## Scan Complete
98
+
99
+ Scanned **{filesScanned}** files, **{filesTimedOut}** timed out, **{filesErrored}** errors.
100
+
101
+ ### Findings ({totalFindings} total)
102
+
103
+ | # | Severity | Skill | File | Title |
104
+ |---|----------|-------|------|-------|
105
+ | 1 | **HIGH** | security-review | `src/db/query.ts:42` | SQL injection in query builder |
106
+ ...
107
+ ```
108
+
109
+ Render every finding from the `findings` array. Bold severity for high and above.
110
+
111
+ **On failure**: If exit code 1, show the error JSON and stop. If exit code 2, show the partial results. List timed-out files separately from errored files so users know which can be retried.
112
+
113
+ ---
114
+
115
+ ## Phase 2: Verify
116
+
117
+ Deep-trace each finding using Task subagents to qualify or disqualify.
118
+
119
+ **For each finding in `data/all-findings.jsonl`:**
120
+
121
+ Check if `data/verify/<finding-id>.json` already exists (incrementality). If it does, skip.
122
+
123
+ Launch a Task subagent (`subagent_type: "general-purpose"`) for each finding. Process findings in parallel batches of up to 8 to improve throughput.
124
+
125
+ **Task prompt for each finding:**
126
+
127
+ Read `${CLAUDE_SKILL_ROOT}/references/verify-prompt.md` for the prompt template. Substitute the finding's values into the `${...}` placeholders.
128
+
129
+ **Process results:**
130
+
131
+ Parse the JSON from the subagent response and:
132
+ - Write result to `data/verify/<finding-id>.json`
133
+ - Append to `data/verified.jsonl` or `data/rejected.jsonl`
134
+ - For verified findings, generate `findings/<finding-id>.md`:
135
+
136
+ ```markdown
137
+ # ${TITLE}
138
+
139
+ **ID**: ${FINDING_ID} | **Severity**: ${SEVERITY} | **Confidence**: ${CONFIDENCE}
140
+ **Skill**: ${SKILL} | **File**: ${FILE_PATH}:${START_LINE}
141
+
142
+ ## Description
143
+ ${DESCRIPTION}
144
+
145
+ ## Verification
146
+ **Verdict**: Verified (${VERIFICATION_CONFIDENCE})
147
+ **Reasoning**: ${REASONING}
148
+ **Code trace**: ${TRACE_NOTES}
149
+
150
+ ## Suggested Fix
151
+ ${FIX_DESCRIPTION}
152
+ ```diff
153
+ ${FIX_DIFF}
154
+ ```
155
+ ```
156
+
157
+ Update manifest: set `phases.verify` to `"complete"`.
158
+
159
+ **Report** to user after all verifications:
160
+
161
+ ```
162
+ ## Verification Complete
163
+
164
+ **{verified}** verified, **{rejected}** rejected.
165
+
166
+ ### Verified Findings
167
+
168
+ | # | Severity | Confidence | File | Title | Reasoning |
169
+ |---|----------|------------|------|-------|-----------|
170
+ | 1 | **HIGH** | high | `src/db/query.ts:42` | SQL injection in query builder | User input flows directly into... |
171
+ ...
172
+
173
+ ### Rejected ({rejected_count})
174
+
175
+ - `{findingId}` {file}: {reasoning}
176
+ ...
177
+ ```
178
+
179
+ ---
180
+
181
+ ## Phase 3: Issue
182
+
183
+ Create a tracking issue that ties all PRs together and gives reviewers a single overview.
184
+
185
+ **Run** (1 tool call):
186
+
187
+ ```bash
188
+ uv run ${CLAUDE_SKILL_ROOT}/scripts/create_issue.py ${SWEEP_DIR}
189
+ ```
190
+
191
+ Parse the JSON stdout. Save `issueUrl` and `issueNumber` for Phase 4.
192
+
193
+ **Report** to user:
194
+
195
+ ```
196
+ ## Tracking Issue Created
197
+
198
+ {issueUrl}
199
+ ```
200
+
201
+ **On failure**: Show the error. Continue to Phase 4 (PRs can still be created without a tracking issue).
202
+
203
+ ---
204
+
205
+ ## Phase 4: Patch
206
+
207
+ For each verified finding, create a worktree, fix the code, and open a draft PR. Process findings **sequentially** (one at a time) since parallel subagents cross-contaminate worktrees.
208
+
209
+ **Severity triage**: Patch HIGH and above. For MEDIUM, only patch findings from bug-detection skills (e.g., `code-review`, `security-review`). Skip LOW and INFO findings.
210
+
211
+ **Step 0: Setup** (run once before the loop):
212
+
213
+ ```bash
214
+ uv run ${CLAUDE_SKILL_ROOT}/scripts/index_prs.py ${SWEEP_DIR}
215
+ ```
216
+
217
+ Parse the JSON stdout. Use `fileIndex` for dedup checks.
218
+
219
+ Determine the default branch and fetch latest so worktrees branch from current upstream:
220
+
221
+ ```bash
222
+ DEFAULT_BRANCH=$(gh repo view --json defaultBranchRef --jq '.defaultBranchRef.name')
223
+ git fetch origin "${DEFAULT_BRANCH}"
224
+ ```
225
+
226
+ **For each finding in `data/verified.jsonl`:**
227
+
228
+ Check if finding ID already exists in `data/patches.jsonl` (incrementality). If it does, skip.
229
+
230
+ **Dedup check**: Use the file index from `index_prs.py` output to determine if an existing open PR already addresses the same issue.
231
+
232
+ 1. **File match**: Look up the finding's file path in the `fileIndex`. If no PR touches that file, no conflict; proceed to Step 1.
233
+ 2. **Chunk overlap**: If a PR does touch the same file, read its cached diff from `data/pr-diffs/<number>.diff` and check whether the PR's changed hunks overlap with the finding's line range (startLine-endLine). Overlapping or adjacent hunks (within ~10 lines) indicate the same code region.
234
+ 3. **Same concern**: If the hunks overlap, compare the PR title and the finding title/description. Are they fixing the same kind of defect? A PR fixing an off-by-one error and a finding about a null check in the same function are different issues; both should proceed.
235
+
236
+ Skip the finding only when there is both chunk overlap AND the PR addresses the same concern. Record it in `data/patches.jsonl` with `"status": "existing"` and `"prUrl"` pointing to the matching PR, then continue to the next finding.
237
+
238
+ **Step 1: Create worktree**
239
+
240
+ ```bash
241
+ BRANCH="warden-sweep/${RUN_ID}/${FINDING_ID}"
242
+ WORKTREE="${SWEEP_DIR}/worktrees/${FINDING_ID}"
243
+ git worktree add "${WORKTREE}" -b "${BRANCH}" "origin/${DEFAULT_BRANCH}"
244
+ ```
245
+
246
+ Each finding branches from the repo's default branch so PRs contain only the fix commit.
247
+
248
+ **Step 2: Generate fix**
249
+
250
+ Launch a Task subagent (`subagent_type: "general-purpose"`) to apply the fix in the worktree. Read `${CLAUDE_SKILL_ROOT}/references/patch-prompt.md` for the prompt template. Substitute the finding's values and worktree path into the `${...}` placeholders.
251
+
252
+ **Step 2b: Handle skipped findings**
253
+
254
+ If the subagent returned `"status": "skipped"` (not `"applied"`), do NOT proceed to Steps 3-4. Instead:
255
+ 1. Record the finding in `data/patches.jsonl` with `"status": "error"` and `"error": "Subagent skipped: ${skipReason}"`
256
+ 2. Clean up the worktree
257
+ 3. Continue to the next finding
258
+
259
+ **Step 3: Find reviewers**
260
+
261
+ ```bash
262
+ uv run ${CLAUDE_SKILL_ROOT}/scripts/find_reviewers.py "${FILE_PATH}"
263
+ ```
264
+
265
+ **Step 4: Create draft PR**
266
+
267
+ ```bash
268
+ cd "${WORKTREE}" && git push -u origin HEAD:"${BRANCH}"
269
+ ```
270
+
271
+ Create the PR with a 1-2 sentence "What" summary based on the finding and fix, followed by the finding description and verification reasoning:
272
+
273
+ ```bash
274
+ REVIEWERS=""
275
+ # If find_reviewers.py returned reviewers, build the flags
276
+ # e.g., REVIEWERS="--reviewer user1 --reviewer user2"
277
+
278
+ gh pr create --draft \
279
+ --label "warden" \
280
+ --title "fix: ${TITLE}" \
281
+ --body "$(cat <<'EOF'
282
+ ${FIX_WHAT_DESCRIPTION}
283
+
284
+ ${DESCRIPTION}
285
+
286
+ ${REASONING}
287
+
288
+ Automated fix for Warden finding ${FINDING_ID} (${SEVERITY}, detected by ${SKILL}).
289
+
290
+ <!-- Only include the next line if Phase 3 succeeded and ISSUE_NUMBER is available -->
291
+ Ref #${ISSUE_NUMBER}
292
+
293
+ > This PR was auto-generated by a Warden Sweep (run ${RUN_ID}).
294
+ > The finding has been validated through automated deep tracing,
295
+ > but human confirmation is requested as this is batch work.
296
+ EOF
297
+ )" ${REVIEWERS}
298
+ ```
299
+
300
+ Save the PR URL.
301
+
302
+ **Step 5: Record and cleanup**
303
+
304
+ Append to `data/patches.jsonl` (use `"created"` as status for successful PRs, not the subagent's `"applied"`):
305
+ ```json
306
+ {"findingId": "...", "prUrl": "https://...", "branch": "...", "reviewers": ["user1", "user2"], "filesChanged": ["..."], "status": "created|existing|error"}
307
+ ```
308
+
309
+ Remove the worktree:
310
+ ```bash
311
+ cd "$(git rev-parse --show-toplevel)"
312
+ git worktree remove "${WORKTREE}" --force
313
+ ```
314
+
315
+ **Error handling**: On failure at any step, write to `data/patches.jsonl` with `"status": "error"` and `"error": "..."`, clean up the worktree, and continue to the next finding.
316
+
317
+ Update manifest: set `phases.patch` to `"complete"`.
318
+
319
+ **Report** to user after all patches:
320
+
321
+ ```
322
+ ## PRs Created
323
+
324
+ **{created}** created, **{skipped}** skipped (existing), **{failed}** failed.
325
+
326
+ | # | Finding | PR | Status |
327
+ |---|---------|-----|--------|
328
+ | 1 | `security-review-a1b2c3d4` SQL injection in query builder | #142 | created |
329
+ | 2 | `code-review-e5f6g7h8` Null pointer in handler | - | existing (#138) |
330
+ ...
331
+ ```
332
+
333
+ ---
334
+
335
+ ## Phase 5: Organize
336
+
337
+ **Run** (1 tool call):
338
+
339
+ ```bash
340
+ uv run ${CLAUDE_SKILL_ROOT}/scripts/organize.py ${SWEEP_DIR}
341
+ ```
342
+
343
+ Parse the JSON stdout.
344
+
345
+ **Report** to user:
346
+
347
+ ```
348
+ ## Sweep Complete
349
+
350
+ | Metric | Count |
351
+ |--------|-------|
352
+ | Files scanned | {filesScanned} |
353
+ | Findings verified | {verified} |
354
+ | PRs created | {prsCreated} |
355
+ | Security findings | {securityFindings} |
356
+
357
+ Full report: `{summaryPath}`
358
+ ```
359
+
360
+ **On failure**: Show the error and note which steps completed.
361
+
362
+ ---
363
+
364
+ ## Resuming a Sweep
365
+
366
+ Each phase is incremental. To resume from where you left off:
367
+
368
+ 1. Check `data/manifest.json` to see which phases are complete
369
+ 2. For scan: pass `--sweep-dir` to `scan.py`
370
+ 3. For verify: existing `data/verify/<id>.json` files are skipped
371
+ 4. For issue: `create_issue.py` is idempotent (skips if `issueUrl` in manifest)
372
+ 5. For patch: existing entries in `data/patches.jsonl` are skipped
373
+ 6. For organize: safe to re-run (idempotent)
374
+
375
+ ## Output Directory Structure
376
+
377
+ ```
378
+ .warden/sweeps/<run-id>/
379
+ summary.md # Stats, key findings, PR links
380
+ findings/ # One markdown per verified finding
381
+ <finding-id>.md
382
+ security/ # Security-specific view
383
+ index.jsonl # Security findings index
384
+ <finding-id>.md # Copies of security findings
385
+ data/ # Structured data for tooling
386
+ manifest.json # Run metadata, phase state
387
+ scan-index.jsonl # Per-file scan tracking
388
+ all-findings.jsonl # Every finding from scan
389
+ verified.jsonl # Findings that passed verification
390
+ rejected.jsonl # Findings that failed verification
391
+ patches.jsonl # Finding -> PR URL -> reviewers
392
+ existing-prs.json # Cached open warden PRs
393
+ report.json # Machine-readable summary
394
+ verify/ # Individual verification results
395
+ <finding-id>.json
396
+ logs/ # Warden JSONL logs per file
397
+ <hash>.jsonl
398
+ pr-diffs/ # Cached PR diffs for dedup
399
+ <number>.diff
400
+ ```
@@ -0,0 +1,72 @@
1
+ Fix a verified code issue. You are working in a git worktree at: ${WORKTREE}
2
+
3
+ ## Finding
4
+ - Title: ${TITLE}
5
+ - File: ${FILE_PATH}:${START_LINE}
6
+ - Description: ${DESCRIPTION}
7
+ - Verification: ${REASONING}
8
+ - Suggested Fix: ${FIX_DESCRIPTION}
9
+ ```diff
10
+ ${FIX_DIFF}
11
+ ```
12
+
13
+ ## Instructions
14
+
15
+ ### Step 1: Understand the code
16
+ Read the file at ${WORKTREE}/${FILE_PATH}. Read at least 50 lines above and below the reported location. Trace callers and callees of the affected code using Grep/Glob to understand how it is used. Do NOT skip this step.
17
+
18
+ ### Step 2: Apply a minimal fix
19
+ Apply the smallest change that addresses the finding. If the suggested diff doesn't apply cleanly, adapt it while preserving intent. Do NOT refactor surrounding code, rename variables, add comments, or make any change beyond what the finding requires.
20
+
21
+ ### Step 3: Write tests
22
+ Write or update tests that verify the fix:
23
+ - Follow existing test patterns (co-located files, same framework)
24
+ - At minimum, write a test that would have caught the original bug
25
+ - Test the specific edge case, not just the happy path
26
+
27
+ Only modify the fix target and its test file.
28
+
29
+ ### Step 4: Self-review
30
+ Before staging, run `git diff` in the worktree and review every changed line. Verify:
31
+ 1. The change addresses the specific finding described, not something else
32
+ 2. No unrelated code was modified (no drive-by cleanups, no formatting changes)
33
+ 3. Trace through changed code paths: does the fix introduce any new bug, null reference, type error, or broken import?
34
+ 4. Tests exercise the fix (the failure case), not just that the code runs
35
+
36
+ If ANY check fails, fix the problem before proceeding. If the suggested fix is wrong or would introduce a regression you cannot resolve, do NOT commit. Instead, skip to the output step and report why.
37
+
38
+ ### Step 5: Commit
39
+ Do NOT run tests locally. CI will validate the changes.
40
+
41
+ Stage and commit with this exact message:
42
+
43
+ fix: ${TITLE}
44
+
45
+ Warden finding ${FINDING_ID}
46
+ Severity: ${SEVERITY}
47
+
48
+ Co-Authored-By: Warden <noreply@getsentry.com>
49
+
50
+ ### Step 6: Output
51
+ Return ONLY valid JSON (no surrounding text). Use `"status": "applied"` if you committed a fix, or `"status": "skipped"` if you did not.
52
+
53
+ ```json
54
+ {
55
+ "status": "applied",
56
+ "filesChanged": ["src/example.ts"],
57
+ "testFilesChanged": ["src/example.test.ts"],
58
+ "selfReview": "Verified the fix addresses the null check and test covers the failure case",
59
+ "skipReason": null
60
+ }
61
+ ```
62
+
63
+ When skipping:
64
+ ```json
65
+ {
66
+ "status": "skipped",
67
+ "filesChanged": [],
68
+ "testFilesChanged": [],
69
+ "selfReview": null,
70
+ "skipReason": "The suggested fix would introduce a regression in the error handling path"
71
+ }
72
+ ```
@@ -0,0 +1,25 @@
1
+ Verify a code analysis finding. Determine if this is a TRUE issue or a FALSE POSITIVE.
2
+ Do NOT write or edit any files. Research only.
3
+
4
+ ## Finding
5
+ - Title: ${TITLE}
6
+ - Severity: ${SEVERITY} | Confidence: ${CONFIDENCE}
7
+ - Skill: ${SKILL}
8
+ - Location: ${FILE_PATH}:${START_LINE}-${END_LINE}
9
+ - Description: ${DESCRIPTION}
10
+ - Verification hint: ${VERIFICATION}
11
+
12
+ ## Instructions
13
+ 1. Read the file at the reported location. Examine at least 50 lines of surrounding context.
14
+ 2. Trace data flow to/from the flagged code using Grep/Glob.
15
+ 3. Check if the issue is mitigated elsewhere (guards, validation, try/catch upstream).
16
+ 4. Check if the issue is actually reachable in practice.
17
+
18
+ Return your verdict as JSON:
19
+ {
20
+ "findingId": "${FINDING_ID}",
21
+ "verdict": "verified" or "rejected",
22
+ "confidence": "high" or "medium" or "low",
23
+ "reasoning": "2-3 sentence explanation",
24
+ "traceNotes": "What code paths you examined"
25
+ }
@@ -0,0 +1,99 @@
1
+ """Shared utilities for warden-sweep scripts."""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import os
6
+ import subprocess
7
+ from typing import Any
8
+
9
+
10
+ def run_cmd(
11
+ args: list[str], timeout: int = 30, cwd: str | None = None
12
+ ) -> subprocess.CompletedProcess[str]:
13
+ """Run a command and return the result."""
14
+ return subprocess.run(
15
+ args,
16
+ capture_output=True,
17
+ text=True,
18
+ timeout=timeout,
19
+ cwd=cwd,
20
+ )
21
+
22
+
23
+ def run_cmd_stdout(
24
+ args: list[str], timeout: int = 30, cwd: str | None = None
25
+ ) -> str | None:
26
+ """Run a command and return stripped stdout, or None on failure."""
27
+ try:
28
+ result = run_cmd(args, timeout=timeout, cwd=cwd)
29
+ return result.stdout.strip() if result.returncode == 0 else None
30
+ except (subprocess.TimeoutExpired, FileNotFoundError):
31
+ return None
32
+
33
+
34
+ def read_json(path: str) -> dict[str, Any] | None:
35
+ """Read a JSON file and return parsed object, or None on failure."""
36
+ if not os.path.exists(path):
37
+ return None
38
+ try:
39
+ with open(path) as f:
40
+ return json.load(f)
41
+ except (json.JSONDecodeError, OSError):
42
+ return None
43
+
44
+
45
+ def write_json(path: str, data: dict[str, Any]) -> None:
46
+ """Write a dict to a JSON file with trailing newline."""
47
+ with open(path, "w") as f:
48
+ json.dump(data, f, indent=2)
49
+ f.write("\n")
50
+
51
+
52
+ def read_jsonl(path: str) -> list[dict[str, Any]]:
53
+ """Read a JSONL file and return list of parsed objects."""
54
+ entries: list[dict[str, Any]] = []
55
+ if not os.path.exists(path):
56
+ return entries
57
+ with open(path) as f:
58
+ for line in f:
59
+ line = line.strip()
60
+ if not line:
61
+ continue
62
+ try:
63
+ entries.append(json.loads(line))
64
+ except json.JSONDecodeError:
65
+ continue
66
+ return entries
67
+
68
+
69
+ def severity_badge(severity: str) -> str:
70
+ """Return a markdown-friendly severity indicator."""
71
+ badges = {
72
+ "critical": "**CRITICAL**",
73
+ "high": "**HIGH**",
74
+ "medium": "MEDIUM",
75
+ "low": "LOW",
76
+ "info": "info",
77
+ }
78
+ return badges.get(severity, severity)
79
+
80
+
81
+ def pr_number_from_url(pr_url: str) -> str:
82
+ """Extract the PR or issue number from a GitHub URL's last path segment."""
83
+ return pr_url.rstrip("/").split("/")[-1]
84
+
85
+
86
+ def ensure_github_label(name: str, color: str, description: str) -> None:
87
+ """Create a GitHub label if it doesn't exist (idempotent)."""
88
+ try:
89
+ subprocess.run(
90
+ [
91
+ "gh", "label", "create", name,
92
+ "--color", color,
93
+ "--description", description,
94
+ ],
95
+ capture_output=True,
96
+ timeout=15,
97
+ )
98
+ except (subprocess.TimeoutExpired, FileNotFoundError):
99
+ pass