@sentry/warden 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/agents.lock +7 -0
  2. package/dist/cli/args.d.ts +14 -12
  3. package/dist/cli/args.d.ts.map +1 -1
  4. package/dist/cli/args.js +44 -1
  5. package/dist/cli/args.js.map +1 -1
  6. package/dist/cli/commands/init.d.ts +0 -3
  7. package/dist/cli/commands/init.d.ts.map +1 -1
  8. package/dist/cli/commands/init.js +206 -19
  9. package/dist/cli/commands/init.js.map +1 -1
  10. package/dist/cli/commands/logs.d.ts +19 -0
  11. package/dist/cli/commands/logs.d.ts.map +1 -0
  12. package/dist/cli/commands/logs.js +419 -0
  13. package/dist/cli/commands/logs.js.map +1 -0
  14. package/dist/cli/main.d.ts.map +1 -1
  15. package/dist/cli/main.js +54 -21
  16. package/dist/cli/main.js.map +1 -1
  17. package/dist/cli/output/formatters.d.ts +2 -1
  18. package/dist/cli/output/formatters.d.ts.map +1 -1
  19. package/dist/cli/output/formatters.js +22 -19
  20. package/dist/cli/output/formatters.js.map +1 -1
  21. package/dist/cli/output/index.d.ts +1 -1
  22. package/dist/cli/output/index.d.ts.map +1 -1
  23. package/dist/cli/output/index.js +1 -1
  24. package/dist/cli/output/index.js.map +1 -1
  25. package/dist/cli/output/ink-runner.js +1 -1
  26. package/dist/cli/output/ink-runner.js.map +1 -1
  27. package/dist/cli/output/jsonl.d.ts +49 -13
  28. package/dist/cli/output/jsonl.d.ts.map +1 -1
  29. package/dist/cli/output/jsonl.js +137 -4
  30. package/dist/cli/output/jsonl.js.map +1 -1
  31. package/dist/cli/output/tasks.d.ts.map +1 -1
  32. package/dist/cli/output/tasks.js +1 -22
  33. package/dist/cli/output/tasks.js.map +1 -1
  34. package/dist/cli/terminal.d.ts.map +1 -1
  35. package/dist/cli/terminal.js +0 -2
  36. package/dist/cli/terminal.js.map +1 -1
  37. package/dist/config/schema.d.ts +49 -98
  38. package/dist/config/schema.d.ts.map +1 -1
  39. package/dist/config/schema.js +0 -12
  40. package/dist/config/schema.js.map +1 -1
  41. package/dist/evals/runner.d.ts.map +1 -1
  42. package/dist/evals/runner.js +0 -1
  43. package/dist/evals/runner.js.map +1 -1
  44. package/dist/evals/types.d.ts +9 -15
  45. package/dist/evals/types.d.ts.map +1 -1
  46. package/dist/output/github-checks.d.ts +1 -1
  47. package/dist/output/github-checks.d.ts.map +1 -1
  48. package/dist/output/github-checks.js +2 -6
  49. package/dist/output/github-checks.js.map +1 -1
  50. package/dist/output/issue-renderer.js +1 -1
  51. package/dist/output/issue-renderer.js.map +1 -1
  52. package/dist/sdk/analyze.d.ts.map +1 -1
  53. package/dist/sdk/analyze.js +13 -26
  54. package/dist/sdk/analyze.js.map +1 -1
  55. package/dist/sdk/auth.d.ts +16 -0
  56. package/dist/sdk/auth.d.ts.map +1 -0
  57. package/dist/sdk/auth.js +37 -0
  58. package/dist/sdk/auth.js.map +1 -0
  59. package/dist/sdk/errors.d.ts +5 -0
  60. package/dist/sdk/errors.d.ts.map +1 -1
  61. package/dist/sdk/errors.js +20 -0
  62. package/dist/sdk/errors.js.map +1 -1
  63. package/dist/sdk/prompt.js +1 -1
  64. package/dist/sdk/runner.d.ts +2 -1
  65. package/dist/sdk/runner.d.ts.map +1 -1
  66. package/dist/sdk/runner.js +3 -1
  67. package/dist/sdk/runner.js.map +1 -1
  68. package/dist/sdk/types.d.ts +0 -3
  69. package/dist/sdk/types.d.ts.map +1 -1
  70. package/dist/sdk/types.js.map +1 -1
  71. package/dist/types/index.d.ts +23 -24
  72. package/dist/types/index.d.ts.map +1 -1
  73. package/dist/types/index.js +19 -7
  74. package/dist/types/index.js.map +1 -1
  75. package/package.json +1 -1
  76. package/skills/warden/SKILL.md +76 -0
  77. package/skills/warden/references/cli-reference.md +142 -0
  78. package/skills/warden/references/config-schema.md +111 -0
  79. package/skills/warden/references/configuration.md +110 -0
  80. package/skills/warden/references/creating-skills.md +84 -0
  81. package/skills/warden-sweep/SKILL.md +407 -0
  82. package/skills/warden-sweep/scripts/_utils.py +37 -0
  83. package/skills/warden-sweep/scripts/extract_findings.py +219 -0
  84. package/skills/warden-sweep/scripts/find_reviewers.py +115 -0
  85. package/skills/warden-sweep/scripts/generate_report.py +271 -0
  86. package/skills/warden-sweep/scripts/index_prs.py +187 -0
  87. package/skills/warden-sweep/scripts/organize.py +315 -0
  88. package/skills/warden-sweep/scripts/scan.py +632 -0
  89. package/dist/sdk/session.d.ts +0 -43
  90. package/dist/sdk/session.d.ts.map +0 -1
  91. package/dist/sdk/session.js +0 -105
  92. package/dist/sdk/session.js.map +0 -1
@@ -0,0 +1,115 @@
1
+ #!/usr/bin/env python3
2
+ # /// script
3
+ # requires-python = ">=3.9"
4
+ # ///
5
+ """
6
+ Find top git contributors for a file to use as PR reviewers.
7
+
8
+ Usage:
9
+ python find_reviewers.py <file-path>
10
+ python find_reviewers.py src/foo.ts
11
+
12
+ Output: JSON to stdout with GitHub usernames of top 2 contributors
13
+ from the last 12 months.
14
+
15
+ {"reviewers": ["user1", "user2"]}
16
+
17
+ If no contributors found or mapping fails, returns empty list.
18
+ """
19
+ from __future__ import annotations
20
+
21
+ import argparse
22
+ import json
23
+ import subprocess
24
+ import sys
25
+
26
+
27
+ def run_cmd(args: list[str], timeout: int = 30) -> str | None:
28
+ """Run a command and return stdout, or None on failure."""
29
+ try:
30
+ result = subprocess.run(
31
+ args,
32
+ capture_output=True,
33
+ text=True,
34
+ timeout=timeout,
35
+ )
36
+ return result.stdout.strip() if result.returncode == 0 else None
37
+ except (subprocess.TimeoutExpired, FileNotFoundError):
38
+ return None
39
+
40
+
41
+ def get_top_authors(file_path: str, count: int = 2) -> list[str]:
42
+ """Get top N author emails for a file from git log (last 12 months)."""
43
+ output = run_cmd([
44
+ "git", "log",
45
+ "--format=%ae",
46
+ "--since=12 months ago",
47
+ "--", file_path,
48
+ ])
49
+
50
+ if not output:
51
+ return []
52
+
53
+ # Count occurrences of each email
54
+ email_counts: dict[str, int] = {}
55
+ for email in output.splitlines():
56
+ email = email.strip()
57
+ if email:
58
+ email_counts[email] = email_counts.get(email, 0) + 1
59
+
60
+ # Sort by count descending
61
+ sorted_emails = sorted(email_counts.items(), key=lambda x: x[1], reverse=True)
62
+
63
+ return [email for email, _ in sorted_emails[:count]]
64
+
65
+
66
+ def email_to_github_username(email: str) -> str | None:
67
+ """Try to map a git email to a GitHub username.
68
+
69
+ Extracts from noreply emails directly. For other emails,
70
+ uses the GitHub search-by-email API via gh CLI.
71
+ """
72
+ # Handle GitHub noreply emails directly
73
+ if email.endswith("@users.noreply.github.com"):
74
+ # Format: 12345+username@users.noreply.github.com
75
+ # or: username@users.noreply.github.com
76
+ local = email.split("@")[0]
77
+ if "+" in local:
78
+ return local.split("+", 1)[1]
79
+ return local
80
+
81
+ # gh api handles URL encoding; pass email directly in the query
82
+ output = run_cmd([
83
+ "gh", "api", f"search/users?q={email}+in:email",
84
+ "--jq", ".items[0].login",
85
+ ])
86
+ return output if output else None
87
+
88
+
89
+ def main():
90
+ parser = argparse.ArgumentParser(
91
+ description="Find top git contributors for PR reviewer assignment"
92
+ )
93
+ parser.add_argument("file_path", help="Path to the file to find reviewers for")
94
+ parser.add_argument(
95
+ "--count", type=int, default=2,
96
+ help="Number of reviewers to find (default: 2)",
97
+ )
98
+ args = parser.parse_args()
99
+
100
+ emails = get_top_authors(args.file_path, args.count)
101
+ if not emails:
102
+ print(json.dumps({"reviewers": [], "note": "No recent authors found"}))
103
+ return
104
+
105
+ reviewers: list[str] = []
106
+ for email in emails:
107
+ username = email_to_github_username(email)
108
+ if username:
109
+ reviewers.append(username)
110
+
111
+ print(json.dumps({"reviewers": reviewers}))
112
+
113
+
114
+ if __name__ == "__main__":
115
+ main()
@@ -0,0 +1,271 @@
1
+ #!/usr/bin/env python3
2
+ # /// script
3
+ # requires-python = ">=3.9"
4
+ # ///
5
+ """
6
+ Generate summary.md and report.json from a completed sweep.
7
+
8
+ Usage:
9
+ python generate_report.py <sweep-dir>
10
+
11
+ Reads the data/ subdirectory for all-findings.jsonl, verified.jsonl,
12
+ rejected.jsonl, patches.jsonl, and security/index.jsonl, then produces:
13
+ - <sweep-dir>/summary.md
14
+ - <sweep-dir>/data/report.json
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import argparse
19
+ import json
20
+ import os
21
+ import sys
22
+ from datetime import datetime, timezone
23
+ from typing import Any
24
+
25
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
26
+ from _utils import read_jsonl # noqa: E402
27
+
28
+
29
+ def read_json(path: str) -> dict[str, Any] | None:
30
+ """Read a JSON file and return parsed object."""
31
+ if not os.path.exists(path):
32
+ return None
33
+ try:
34
+ with open(path) as f:
35
+ return json.load(f)
36
+ except (json.JSONDecodeError, OSError):
37
+ return None
38
+
39
+
40
+ def severity_badge(severity: str) -> str:
41
+ """Return a markdown-friendly severity indicator."""
42
+ badges = {
43
+ "critical": "**CRITICAL**",
44
+ "high": "**HIGH**",
45
+ "medium": "MEDIUM",
46
+ "low": "LOW",
47
+ "info": "info",
48
+ }
49
+ return badges.get(severity, severity)
50
+
51
+
52
+ def generate_summary_md(
53
+ manifest: dict[str, Any],
54
+ scan_index: list[dict[str, Any]],
55
+ all_findings: list[dict[str, Any]],
56
+ verified: list[dict[str, Any]],
57
+ rejected: list[dict[str, Any]],
58
+ patches: list[dict[str, Any]],
59
+ security_index: list[dict[str, Any]],
60
+ ) -> str:
61
+ """Generate the summary.md content."""
62
+ run_id = manifest.get("runId", "unknown")
63
+ started_at = manifest.get("startedAt", "unknown")
64
+ repo = manifest.get("repo", "unknown")
65
+ completed_at = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
66
+
67
+ files_scanned = sum(1 for e in scan_index if e.get("status") == "complete")
68
+ files_errored = sum(1 for e in scan_index if e.get("status") == "error")
69
+
70
+ prs_created = sum(1 for p in patches if p.get("status") == "created")
71
+ prs_failed = sum(1 for p in patches if p.get("status") == "error")
72
+
73
+ # Severity breakdown of verified findings
74
+ by_severity: dict[str, int] = {}
75
+ for f in verified:
76
+ sev = f.get("severity", "info")
77
+ by_severity[sev] = by_severity.get(sev, 0) + 1
78
+
79
+ lines = [
80
+ f"# Warden Sweep: `{run_id}`",
81
+ "",
82
+ f"**Repo**: {repo}",
83
+ f"**Started**: {started_at}",
84
+ f"**Completed**: {completed_at}",
85
+ "",
86
+ "## Stats",
87
+ "",
88
+ f"| Metric | Count |",
89
+ f"|--------|-------|",
90
+ f"| Files scanned | {files_scanned} |",
91
+ f"| Files errored | {files_errored} |",
92
+ f"| Total findings | {len(all_findings)} |",
93
+ f"| Verified | {len(verified)} |",
94
+ f"| Rejected | {len(rejected)} |",
95
+ f"| PRs created | {prs_created} |",
96
+ f"| PRs failed | {prs_failed} |",
97
+ f"| Security findings | {len(security_index)} |",
98
+ "",
99
+ ]
100
+
101
+ if by_severity:
102
+ lines.append("### By Severity")
103
+ lines.append("")
104
+ for sev in ["critical", "high", "medium", "low", "info"]:
105
+ count = by_severity.get(sev, 0)
106
+ if count > 0:
107
+ lines.append(f"- {severity_badge(sev)}: {count}")
108
+ lines.append("")
109
+
110
+ # Security callout
111
+ if security_index:
112
+ lines.append("## Security Findings")
113
+ lines.append("")
114
+ lines.append("The following findings are security-related and may need priority review:")
115
+ lines.append("")
116
+ lines.append("| ID | Severity | Skill | File | Title |")
117
+ lines.append("|----|----------|-------|------|-------|")
118
+ for sf in security_index:
119
+ fid = sf.get("findingId", "")
120
+ sev = severity_badge(sf.get("severity", "info"))
121
+ skill = sf.get("skill", "")
122
+ filepath = sf.get("file", "")
123
+ title = sf.get("title", "")
124
+ lines.append(f"| `{fid}` | {sev} | {skill} | `{filepath}` | {title} |")
125
+ lines.append("")
126
+
127
+ # Verified findings table
128
+ if verified:
129
+ lines.append("## Verified Findings")
130
+ lines.append("")
131
+ lines.append("| ID | Severity | Skill | File | Title | PR |")
132
+ lines.append("|----|----------|-------|------|-------|-----|")
133
+
134
+ # Build patches lookup
135
+ pr_lookup: dict[str, str] = {}
136
+ for p in patches:
137
+ if p.get("status") == "created" and p.get("findingId"):
138
+ pr_lookup[p["findingId"]] = p.get("prUrl", "")
139
+
140
+ for f in verified:
141
+ fid = f.get("findingId", "")
142
+ sev = severity_badge(f.get("severity", "info"))
143
+ skill = f.get("skill", "")
144
+ filepath = f.get("file", "")
145
+ title = f.get("title", "")
146
+ pr_url = pr_lookup.get(fid, "")
147
+ pr_link = f"[PR]({pr_url})" if pr_url else "-"
148
+ lines.append(f"| `{fid}` | {sev} | {skill} | `{filepath}` | {title} | {pr_link} |")
149
+ lines.append("")
150
+
151
+ # Rejected findings summary
152
+ if rejected:
153
+ lines.append(f"## Rejected Findings ({len(rejected)})")
154
+ lines.append("")
155
+ lines.append("These findings were evaluated and determined to be false positives.")
156
+ lines.append("See `data/rejected.jsonl` for details.")
157
+ lines.append("")
158
+
159
+ lines.append("---")
160
+ lines.append(f"*Generated by Warden Sweep `{run_id}`*")
161
+
162
+ return "\n".join(lines) + "\n"
163
+
164
+
165
+ def generate_report_json(
166
+ manifest: dict[str, Any],
167
+ scan_index: list[dict[str, Any]],
168
+ all_findings: list[dict[str, Any]],
169
+ verified: list[dict[str, Any]],
170
+ rejected: list[dict[str, Any]],
171
+ patches: list[dict[str, Any]],
172
+ security_index: list[dict[str, Any]],
173
+ ) -> dict[str, Any]:
174
+ """Generate the report.json data."""
175
+ run_id = manifest.get("runId", "unknown")
176
+ completed_at = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
177
+
178
+ files_scanned = sum(1 for e in scan_index if e.get("status") == "complete")
179
+ prs_created = sum(1 for p in patches if p.get("status") == "created")
180
+ prs_failed = sum(1 for p in patches if p.get("status") == "error")
181
+
182
+ # Count verify errors (findings in all but not in verified or rejected)
183
+ verified_ids = {f["findingId"] for f in verified if "findingId" in f}
184
+ rejected_ids = {f["findingId"] for f in rejected if "findingId" in f}
185
+ all_ids = {f["findingId"] for f in all_findings if "findingId" in f}
186
+ verify_errors = len(all_ids - verified_ids - rejected_ids)
187
+
188
+ return {
189
+ "runId": run_id,
190
+ "completedAt": completed_at,
191
+ "scan": {
192
+ "filesScanned": files_scanned,
193
+ "totalFindings": len(all_findings),
194
+ },
195
+ "verify": {
196
+ "verified": len(verified),
197
+ "rejected": len(rejected),
198
+ "errors": verify_errors,
199
+ },
200
+ "patch": {
201
+ "prsCreated": prs_created,
202
+ "prsFailed": prs_failed,
203
+ },
204
+ "security": {
205
+ "count": len(security_index),
206
+ },
207
+ "prs": [
208
+ {
209
+ "findingId": p.get("findingId", ""),
210
+ "url": p.get("prUrl", ""),
211
+ "severity": next(
212
+ (f.get("severity", "") for f in verified if f.get("findingId") == p.get("findingId")),
213
+ "",
214
+ ),
215
+ }
216
+ for p in patches
217
+ if p.get("status") == "created"
218
+ ],
219
+ }
220
+
221
+
222
+ def main():
223
+ parser = argparse.ArgumentParser(
224
+ description="Generate sweep summary and report"
225
+ )
226
+ parser.add_argument("sweep_dir", help="Path to the sweep output directory")
227
+ args = parser.parse_args()
228
+
229
+ sweep_dir = args.sweep_dir
230
+ data_dir = os.path.join(sweep_dir, "data")
231
+
232
+ # Read inputs
233
+ manifest = read_json(os.path.join(data_dir, "manifest.json")) or {}
234
+ scan_index = read_jsonl(os.path.join(data_dir, "scan-index.jsonl"))
235
+ all_findings = read_jsonl(os.path.join(data_dir, "all-findings.jsonl"))
236
+ verified = read_jsonl(os.path.join(data_dir, "verified.jsonl"))
237
+ rejected = read_jsonl(os.path.join(data_dir, "rejected.jsonl"))
238
+ patches = read_jsonl(os.path.join(data_dir, "patches.jsonl"))
239
+ security_index = read_jsonl(os.path.join(sweep_dir, "security", "index.jsonl"))
240
+
241
+ # Generate summary.md
242
+ summary_md = generate_summary_md(
243
+ manifest, scan_index,
244
+ all_findings, verified, rejected, patches, security_index,
245
+ )
246
+ summary_path = os.path.join(sweep_dir, "summary.md")
247
+ with open(summary_path, "w") as f:
248
+ f.write(summary_md)
249
+
250
+ # Generate report.json
251
+ report = generate_report_json(
252
+ manifest, scan_index, all_findings,
253
+ verified, rejected, patches, security_index,
254
+ )
255
+ report_path = os.path.join(data_dir, "report.json")
256
+ with open(report_path, "w") as f:
257
+ json.dump(report, f, indent=2)
258
+ f.write("\n")
259
+
260
+ print(json.dumps({
261
+ "summaryPath": summary_path,
262
+ "reportPath": report_path,
263
+ "verified": len(verified),
264
+ "rejected": len(rejected),
265
+ "prsCreated": report["patch"]["prsCreated"],
266
+ "securityFindings": len(security_index),
267
+ }))
268
+
269
+
270
+ if __name__ == "__main__":
271
+ main()
@@ -0,0 +1,187 @@
1
+ #!/usr/bin/env python3
2
+ # /// script
3
+ # requires-python = ">=3.9"
4
+ # ///
5
+ """
6
+ Warden Sweep: Index existing PRs for deduplication.
7
+
8
+ Fetches open warden-labeled PRs via gh, identifies file overlap with
9
+ verified findings, and caches diffs for overlapping PRs.
10
+
11
+ Usage:
12
+ uv run index_prs.py <sweep-dir>
13
+
14
+ Stdout: JSON summary (for LLM consumption)
15
+ Stderr: Progress lines
16
+
17
+ Side effects:
18
+ - Creates data/existing-prs.json
19
+ - Creates data/pr-diffs/<number>.diff for overlapping PRs
20
+ """
21
+ from __future__ import annotations
22
+
23
+ import argparse
24
+ import json
25
+ import os
26
+ import subprocess
27
+ import sys
28
+ from typing import Any
29
+
30
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
31
+ from _utils import read_jsonl, run_cmd # noqa: E402
32
+
33
+
34
+ def fetch_warden_prs(sweep_dir: str) -> list[dict[str, Any]]:
35
+ """Fetch open PRs with the warden label."""
36
+ result = run_cmd(
37
+ [
38
+ "gh", "pr", "list",
39
+ "--label", "warden",
40
+ "--state", "open",
41
+ "--json", "number,title,url,files",
42
+ "--limit", "100",
43
+ ],
44
+ timeout=30,
45
+ )
46
+
47
+ if result.returncode != 0:
48
+ print(f"Warning: gh pr list failed: {result.stderr}", file=sys.stderr)
49
+ return []
50
+
51
+ try:
52
+ prs = json.loads(result.stdout)
53
+ except json.JSONDecodeError:
54
+ print("Warning: Failed to parse gh pr list output", file=sys.stderr)
55
+ return []
56
+
57
+ # Save raw PR data
58
+ prs_path = os.path.join(sweep_dir, "data", "existing-prs.json")
59
+ with open(prs_path, "w") as f:
60
+ json.dump(prs, f, indent=2)
61
+ f.write("\n")
62
+
63
+ return prs
64
+
65
+
66
+ def build_file_index(
67
+ prs: list[dict[str, Any]],
68
+ ) -> dict[str, list[dict[str, Any]]]:
69
+ """Build a file-to-PR lookup from the PR list."""
70
+ index: dict[str, list[dict[str, Any]]] = {}
71
+
72
+ for pr in prs:
73
+ pr_info = {
74
+ "number": pr.get("number"),
75
+ "title": pr.get("title", ""),
76
+ "url": pr.get("url", ""),
77
+ }
78
+ files = pr.get("files") or []
79
+ for file_entry in files:
80
+ # gh returns files as objects with "path" key
81
+ if isinstance(file_entry, dict):
82
+ path = file_entry.get("path", "")
83
+ else:
84
+ path = str(file_entry)
85
+ if path:
86
+ index.setdefault(path, []).append(pr_info)
87
+
88
+ return index
89
+
90
+
91
+ def get_verified_files(sweep_dir: str) -> set[str]:
92
+ """Get the set of files that have verified findings."""
93
+ verified_path = os.path.join(sweep_dir, "data", "verified.jsonl")
94
+ entries = read_jsonl(verified_path)
95
+ return {e.get("file", "") for e in entries if e.get("file")}
96
+
97
+
98
+ def fetch_pr_diff(pr_number: int, sweep_dir: str) -> bool:
99
+ """Fetch and cache a PR diff. Returns True on success."""
100
+ diff_path = os.path.join(
101
+ sweep_dir, "data", "pr-diffs", f"{pr_number}.diff"
102
+ )
103
+
104
+ # Skip if already cached
105
+ if os.path.exists(diff_path):
106
+ return True
107
+
108
+ result = run_cmd(
109
+ ["gh", "pr", "diff", str(pr_number)],
110
+ timeout=30,
111
+ )
112
+
113
+ if result.returncode != 0:
114
+ print(
115
+ f"Warning: Failed to fetch diff for PR #{pr_number}: {result.stderr}",
116
+ file=sys.stderr,
117
+ )
118
+ return False
119
+
120
+ with open(diff_path, "w") as f:
121
+ f.write(result.stdout)
122
+
123
+ return True
124
+
125
+
126
+ def main() -> None:
127
+ parser = argparse.ArgumentParser(
128
+ description="Warden Sweep: Index existing PRs for dedup"
129
+ )
130
+ parser.add_argument("sweep_dir", help="Path to the sweep directory")
131
+ args = parser.parse_args()
132
+
133
+ sweep_dir = args.sweep_dir
134
+
135
+ if not os.path.isdir(sweep_dir):
136
+ print(
137
+ json.dumps({"error": f"Sweep directory not found: {sweep_dir}"}),
138
+ file=sys.stdout,
139
+ )
140
+ sys.exit(1)
141
+
142
+ # Ensure pr-diffs directory exists
143
+ os.makedirs(os.path.join(sweep_dir, "data", "pr-diffs"), exist_ok=True)
144
+
145
+ # Fetch open warden PRs
146
+ print("Fetching open warden-labeled PRs...", file=sys.stderr)
147
+ prs = fetch_warden_prs(sweep_dir)
148
+ print(f"Found {len(prs)} open warden PR(s)", file=sys.stderr)
149
+
150
+ # Build file index
151
+ file_index = build_file_index(prs)
152
+
153
+ # Find overlap with verified findings
154
+ verified_files = get_verified_files(sweep_dir)
155
+ overlapping_prs: set[int] = set()
156
+
157
+ for vfile in verified_files:
158
+ if vfile in file_index:
159
+ for pr_info in file_index[vfile]:
160
+ overlapping_prs.add(pr_info["number"])
161
+
162
+ # Fetch diffs for overlapping PRs
163
+ diffs_cached = 0
164
+ for pr_number in sorted(overlapping_prs):
165
+ print(f"Caching diff for PR #{pr_number}...", file=sys.stderr)
166
+ if fetch_pr_diff(pr_number, sweep_dir):
167
+ diffs_cached += 1
168
+
169
+ # Build output file index (only for files that have verified findings)
170
+ output_file_index: dict[str, list[dict[str, Any]]] = {}
171
+ for vfile in verified_files:
172
+ if vfile in file_index:
173
+ output_file_index[vfile] = file_index[vfile]
174
+
175
+ # Output summary
176
+ output = {
177
+ "totalPRs": len(prs),
178
+ "overlappingPRs": len(overlapping_prs),
179
+ "fileIndex": output_file_index,
180
+ "diffsCached": diffs_cached,
181
+ }
182
+
183
+ print(json.dumps(output, indent=2))
184
+
185
+
186
+ if __name__ == "__main__":
187
+ main()