@sentry/warden 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents.lock +7 -0
- package/dist/cli/args.d.ts +14 -12
- package/dist/cli/args.d.ts.map +1 -1
- package/dist/cli/args.js +44 -1
- package/dist/cli/args.js.map +1 -1
- package/dist/cli/commands/init.d.ts +0 -3
- package/dist/cli/commands/init.d.ts.map +1 -1
- package/dist/cli/commands/init.js +206 -19
- package/dist/cli/commands/init.js.map +1 -1
- package/dist/cli/commands/logs.d.ts +19 -0
- package/dist/cli/commands/logs.d.ts.map +1 -0
- package/dist/cli/commands/logs.js +419 -0
- package/dist/cli/commands/logs.js.map +1 -0
- package/dist/cli/main.d.ts.map +1 -1
- package/dist/cli/main.js +54 -21
- package/dist/cli/main.js.map +1 -1
- package/dist/cli/output/formatters.d.ts +2 -1
- package/dist/cli/output/formatters.d.ts.map +1 -1
- package/dist/cli/output/formatters.js +22 -19
- package/dist/cli/output/formatters.js.map +1 -1
- package/dist/cli/output/index.d.ts +1 -1
- package/dist/cli/output/index.d.ts.map +1 -1
- package/dist/cli/output/index.js +1 -1
- package/dist/cli/output/index.js.map +1 -1
- package/dist/cli/output/ink-runner.js +1 -1
- package/dist/cli/output/ink-runner.js.map +1 -1
- package/dist/cli/output/jsonl.d.ts +49 -13
- package/dist/cli/output/jsonl.d.ts.map +1 -1
- package/dist/cli/output/jsonl.js +137 -4
- package/dist/cli/output/jsonl.js.map +1 -1
- package/dist/cli/output/tasks.d.ts.map +1 -1
- package/dist/cli/output/tasks.js +1 -22
- package/dist/cli/output/tasks.js.map +1 -1
- package/dist/cli/terminal.d.ts.map +1 -1
- package/dist/cli/terminal.js +0 -2
- package/dist/cli/terminal.js.map +1 -1
- package/dist/config/schema.d.ts +49 -98
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/config/schema.js +0 -12
- package/dist/config/schema.js.map +1 -1
- package/dist/evals/runner.d.ts.map +1 -1
- package/dist/evals/runner.js +0 -1
- package/dist/evals/runner.js.map +1 -1
- package/dist/evals/types.d.ts +9 -15
- package/dist/evals/types.d.ts.map +1 -1
- package/dist/output/github-checks.d.ts +1 -1
- package/dist/output/github-checks.d.ts.map +1 -1
- package/dist/output/github-checks.js +2 -6
- package/dist/output/github-checks.js.map +1 -1
- package/dist/output/issue-renderer.js +1 -1
- package/dist/output/issue-renderer.js.map +1 -1
- package/dist/sdk/analyze.d.ts.map +1 -1
- package/dist/sdk/analyze.js +13 -26
- package/dist/sdk/analyze.js.map +1 -1
- package/dist/sdk/auth.d.ts +16 -0
- package/dist/sdk/auth.d.ts.map +1 -0
- package/dist/sdk/auth.js +37 -0
- package/dist/sdk/auth.js.map +1 -0
- package/dist/sdk/errors.d.ts +5 -0
- package/dist/sdk/errors.d.ts.map +1 -1
- package/dist/sdk/errors.js +20 -0
- package/dist/sdk/errors.js.map +1 -1
- package/dist/sdk/prompt.js +1 -1
- package/dist/sdk/runner.d.ts +2 -1
- package/dist/sdk/runner.d.ts.map +1 -1
- package/dist/sdk/runner.js +3 -1
- package/dist/sdk/runner.js.map +1 -1
- package/dist/sdk/types.d.ts +0 -3
- package/dist/sdk/types.d.ts.map +1 -1
- package/dist/sdk/types.js.map +1 -1
- package/dist/types/index.d.ts +23 -24
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js +19 -7
- package/dist/types/index.js.map +1 -1
- package/package.json +1 -1
- package/skills/warden/SKILL.md +76 -0
- package/skills/warden/references/cli-reference.md +142 -0
- package/skills/warden/references/config-schema.md +111 -0
- package/skills/warden/references/configuration.md +110 -0
- package/skills/warden/references/creating-skills.md +84 -0
- package/skills/warden-sweep/SKILL.md +407 -0
- package/skills/warden-sweep/scripts/_utils.py +37 -0
- package/skills/warden-sweep/scripts/extract_findings.py +219 -0
- package/skills/warden-sweep/scripts/find_reviewers.py +115 -0
- package/skills/warden-sweep/scripts/generate_report.py +271 -0
- package/skills/warden-sweep/scripts/index_prs.py +187 -0
- package/skills/warden-sweep/scripts/organize.py +315 -0
- package/skills/warden-sweep/scripts/scan.py +632 -0
- package/dist/sdk/session.d.ts +0 -43
- package/dist/sdk/session.d.ts.map +0 -1
- package/dist/sdk/session.js +0 -105
- package/dist/sdk/session.js.map +0 -1
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# /// script
|
|
3
|
+
# requires-python = ">=3.9"
|
|
4
|
+
# ///
|
|
5
|
+
"""
|
|
6
|
+
Find top git contributors for a file to use as PR reviewers.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python find_reviewers.py <file-path>
|
|
10
|
+
python find_reviewers.py src/foo.ts
|
|
11
|
+
|
|
12
|
+
Output: JSON to stdout with GitHub usernames of top 2 contributors
|
|
13
|
+
from the last 12 months.
|
|
14
|
+
|
|
15
|
+
{"reviewers": ["user1", "user2"]}
|
|
16
|
+
|
|
17
|
+
If no contributors found or mapping fails, returns empty list.
|
|
18
|
+
"""
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import argparse
|
|
22
|
+
import json
|
|
23
|
+
import subprocess
|
|
24
|
+
import sys
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def run_cmd(args: list[str], timeout: int = 30) -> str | None:
|
|
28
|
+
"""Run a command and return stdout, or None on failure."""
|
|
29
|
+
try:
|
|
30
|
+
result = subprocess.run(
|
|
31
|
+
args,
|
|
32
|
+
capture_output=True,
|
|
33
|
+
text=True,
|
|
34
|
+
timeout=timeout,
|
|
35
|
+
)
|
|
36
|
+
return result.stdout.strip() if result.returncode == 0 else None
|
|
37
|
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def get_top_authors(file_path: str, count: int = 2) -> list[str]:
|
|
42
|
+
"""Get top N author emails for a file from git log (last 12 months)."""
|
|
43
|
+
output = run_cmd([
|
|
44
|
+
"git", "log",
|
|
45
|
+
"--format=%ae",
|
|
46
|
+
"--since=12 months ago",
|
|
47
|
+
"--", file_path,
|
|
48
|
+
])
|
|
49
|
+
|
|
50
|
+
if not output:
|
|
51
|
+
return []
|
|
52
|
+
|
|
53
|
+
# Count occurrences of each email
|
|
54
|
+
email_counts: dict[str, int] = {}
|
|
55
|
+
for email in output.splitlines():
|
|
56
|
+
email = email.strip()
|
|
57
|
+
if email:
|
|
58
|
+
email_counts[email] = email_counts.get(email, 0) + 1
|
|
59
|
+
|
|
60
|
+
# Sort by count descending
|
|
61
|
+
sorted_emails = sorted(email_counts.items(), key=lambda x: x[1], reverse=True)
|
|
62
|
+
|
|
63
|
+
return [email for email, _ in sorted_emails[:count]]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def email_to_github_username(email: str) -> str | None:
|
|
67
|
+
"""Try to map a git email to a GitHub username.
|
|
68
|
+
|
|
69
|
+
Extracts from noreply emails directly. For other emails,
|
|
70
|
+
uses the GitHub search-by-email API via gh CLI.
|
|
71
|
+
"""
|
|
72
|
+
# Handle GitHub noreply emails directly
|
|
73
|
+
if email.endswith("@users.noreply.github.com"):
|
|
74
|
+
# Format: 12345+username@users.noreply.github.com
|
|
75
|
+
# or: username@users.noreply.github.com
|
|
76
|
+
local = email.split("@")[0]
|
|
77
|
+
if "+" in local:
|
|
78
|
+
return local.split("+", 1)[1]
|
|
79
|
+
return local
|
|
80
|
+
|
|
81
|
+
# gh api handles URL encoding; pass email directly in the query
|
|
82
|
+
output = run_cmd([
|
|
83
|
+
"gh", "api", f"search/users?q={email}+in:email",
|
|
84
|
+
"--jq", ".items[0].login",
|
|
85
|
+
])
|
|
86
|
+
return output if output else None
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def main():
|
|
90
|
+
parser = argparse.ArgumentParser(
|
|
91
|
+
description="Find top git contributors for PR reviewer assignment"
|
|
92
|
+
)
|
|
93
|
+
parser.add_argument("file_path", help="Path to the file to find reviewers for")
|
|
94
|
+
parser.add_argument(
|
|
95
|
+
"--count", type=int, default=2,
|
|
96
|
+
help="Number of reviewers to find (default: 2)",
|
|
97
|
+
)
|
|
98
|
+
args = parser.parse_args()
|
|
99
|
+
|
|
100
|
+
emails = get_top_authors(args.file_path, args.count)
|
|
101
|
+
if not emails:
|
|
102
|
+
print(json.dumps({"reviewers": [], "note": "No recent authors found"}))
|
|
103
|
+
return
|
|
104
|
+
|
|
105
|
+
reviewers: list[str] = []
|
|
106
|
+
for email in emails:
|
|
107
|
+
username = email_to_github_username(email)
|
|
108
|
+
if username:
|
|
109
|
+
reviewers.append(username)
|
|
110
|
+
|
|
111
|
+
print(json.dumps({"reviewers": reviewers}))
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
if __name__ == "__main__":
|
|
115
|
+
main()
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# /// script
|
|
3
|
+
# requires-python = ">=3.9"
|
|
4
|
+
# ///
|
|
5
|
+
"""
|
|
6
|
+
Generate summary.md and report.json from a completed sweep.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python generate_report.py <sweep-dir>
|
|
10
|
+
|
|
11
|
+
Reads the data/ subdirectory for all-findings.jsonl, verified.jsonl,
|
|
12
|
+
rejected.jsonl, patches.jsonl, and security/index.jsonl, then produces:
|
|
13
|
+
- <sweep-dir>/summary.md
|
|
14
|
+
- <sweep-dir>/data/report.json
|
|
15
|
+
"""
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import argparse
|
|
19
|
+
import json
|
|
20
|
+
import os
|
|
21
|
+
import sys
|
|
22
|
+
from datetime import datetime, timezone
|
|
23
|
+
from typing import Any
|
|
24
|
+
|
|
25
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
26
|
+
from _utils import read_jsonl # noqa: E402
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def read_json(path: str) -> dict[str, Any] | None:
|
|
30
|
+
"""Read a JSON file and return parsed object."""
|
|
31
|
+
if not os.path.exists(path):
|
|
32
|
+
return None
|
|
33
|
+
try:
|
|
34
|
+
with open(path) as f:
|
|
35
|
+
return json.load(f)
|
|
36
|
+
except (json.JSONDecodeError, OSError):
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def severity_badge(severity: str) -> str:
|
|
41
|
+
"""Return a markdown-friendly severity indicator."""
|
|
42
|
+
badges = {
|
|
43
|
+
"critical": "**CRITICAL**",
|
|
44
|
+
"high": "**HIGH**",
|
|
45
|
+
"medium": "MEDIUM",
|
|
46
|
+
"low": "LOW",
|
|
47
|
+
"info": "info",
|
|
48
|
+
}
|
|
49
|
+
return badges.get(severity, severity)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def generate_summary_md(
|
|
53
|
+
manifest: dict[str, Any],
|
|
54
|
+
scan_index: list[dict[str, Any]],
|
|
55
|
+
all_findings: list[dict[str, Any]],
|
|
56
|
+
verified: list[dict[str, Any]],
|
|
57
|
+
rejected: list[dict[str, Any]],
|
|
58
|
+
patches: list[dict[str, Any]],
|
|
59
|
+
security_index: list[dict[str, Any]],
|
|
60
|
+
) -> str:
|
|
61
|
+
"""Generate the summary.md content."""
|
|
62
|
+
run_id = manifest.get("runId", "unknown")
|
|
63
|
+
started_at = manifest.get("startedAt", "unknown")
|
|
64
|
+
repo = manifest.get("repo", "unknown")
|
|
65
|
+
completed_at = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
66
|
+
|
|
67
|
+
files_scanned = sum(1 for e in scan_index if e.get("status") == "complete")
|
|
68
|
+
files_errored = sum(1 for e in scan_index if e.get("status") == "error")
|
|
69
|
+
|
|
70
|
+
prs_created = sum(1 for p in patches if p.get("status") == "created")
|
|
71
|
+
prs_failed = sum(1 for p in patches if p.get("status") == "error")
|
|
72
|
+
|
|
73
|
+
# Severity breakdown of verified findings
|
|
74
|
+
by_severity: dict[str, int] = {}
|
|
75
|
+
for f in verified:
|
|
76
|
+
sev = f.get("severity", "info")
|
|
77
|
+
by_severity[sev] = by_severity.get(sev, 0) + 1
|
|
78
|
+
|
|
79
|
+
lines = [
|
|
80
|
+
f"# Warden Sweep: `{run_id}`",
|
|
81
|
+
"",
|
|
82
|
+
f"**Repo**: {repo}",
|
|
83
|
+
f"**Started**: {started_at}",
|
|
84
|
+
f"**Completed**: {completed_at}",
|
|
85
|
+
"",
|
|
86
|
+
"## Stats",
|
|
87
|
+
"",
|
|
88
|
+
f"| Metric | Count |",
|
|
89
|
+
f"|--------|-------|",
|
|
90
|
+
f"| Files scanned | {files_scanned} |",
|
|
91
|
+
f"| Files errored | {files_errored} |",
|
|
92
|
+
f"| Total findings | {len(all_findings)} |",
|
|
93
|
+
f"| Verified | {len(verified)} |",
|
|
94
|
+
f"| Rejected | {len(rejected)} |",
|
|
95
|
+
f"| PRs created | {prs_created} |",
|
|
96
|
+
f"| PRs failed | {prs_failed} |",
|
|
97
|
+
f"| Security findings | {len(security_index)} |",
|
|
98
|
+
"",
|
|
99
|
+
]
|
|
100
|
+
|
|
101
|
+
if by_severity:
|
|
102
|
+
lines.append("### By Severity")
|
|
103
|
+
lines.append("")
|
|
104
|
+
for sev in ["critical", "high", "medium", "low", "info"]:
|
|
105
|
+
count = by_severity.get(sev, 0)
|
|
106
|
+
if count > 0:
|
|
107
|
+
lines.append(f"- {severity_badge(sev)}: {count}")
|
|
108
|
+
lines.append("")
|
|
109
|
+
|
|
110
|
+
# Security callout
|
|
111
|
+
if security_index:
|
|
112
|
+
lines.append("## Security Findings")
|
|
113
|
+
lines.append("")
|
|
114
|
+
lines.append("The following findings are security-related and may need priority review:")
|
|
115
|
+
lines.append("")
|
|
116
|
+
lines.append("| ID | Severity | Skill | File | Title |")
|
|
117
|
+
lines.append("|----|----------|-------|------|-------|")
|
|
118
|
+
for sf in security_index:
|
|
119
|
+
fid = sf.get("findingId", "")
|
|
120
|
+
sev = severity_badge(sf.get("severity", "info"))
|
|
121
|
+
skill = sf.get("skill", "")
|
|
122
|
+
filepath = sf.get("file", "")
|
|
123
|
+
title = sf.get("title", "")
|
|
124
|
+
lines.append(f"| `{fid}` | {sev} | {skill} | `{filepath}` | {title} |")
|
|
125
|
+
lines.append("")
|
|
126
|
+
|
|
127
|
+
# Verified findings table
|
|
128
|
+
if verified:
|
|
129
|
+
lines.append("## Verified Findings")
|
|
130
|
+
lines.append("")
|
|
131
|
+
lines.append("| ID | Severity | Skill | File | Title | PR |")
|
|
132
|
+
lines.append("|----|----------|-------|------|-------|-----|")
|
|
133
|
+
|
|
134
|
+
# Build patches lookup
|
|
135
|
+
pr_lookup: dict[str, str] = {}
|
|
136
|
+
for p in patches:
|
|
137
|
+
if p.get("status") == "created" and p.get("findingId"):
|
|
138
|
+
pr_lookup[p["findingId"]] = p.get("prUrl", "")
|
|
139
|
+
|
|
140
|
+
for f in verified:
|
|
141
|
+
fid = f.get("findingId", "")
|
|
142
|
+
sev = severity_badge(f.get("severity", "info"))
|
|
143
|
+
skill = f.get("skill", "")
|
|
144
|
+
filepath = f.get("file", "")
|
|
145
|
+
title = f.get("title", "")
|
|
146
|
+
pr_url = pr_lookup.get(fid, "")
|
|
147
|
+
pr_link = f"[PR]({pr_url})" if pr_url else "-"
|
|
148
|
+
lines.append(f"| `{fid}` | {sev} | {skill} | `{filepath}` | {title} | {pr_link} |")
|
|
149
|
+
lines.append("")
|
|
150
|
+
|
|
151
|
+
# Rejected findings summary
|
|
152
|
+
if rejected:
|
|
153
|
+
lines.append(f"## Rejected Findings ({len(rejected)})")
|
|
154
|
+
lines.append("")
|
|
155
|
+
lines.append("These findings were evaluated and determined to be false positives.")
|
|
156
|
+
lines.append("See `data/rejected.jsonl` for details.")
|
|
157
|
+
lines.append("")
|
|
158
|
+
|
|
159
|
+
lines.append("---")
|
|
160
|
+
lines.append(f"*Generated by Warden Sweep `{run_id}`*")
|
|
161
|
+
|
|
162
|
+
return "\n".join(lines) + "\n"
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def generate_report_json(
|
|
166
|
+
manifest: dict[str, Any],
|
|
167
|
+
scan_index: list[dict[str, Any]],
|
|
168
|
+
all_findings: list[dict[str, Any]],
|
|
169
|
+
verified: list[dict[str, Any]],
|
|
170
|
+
rejected: list[dict[str, Any]],
|
|
171
|
+
patches: list[dict[str, Any]],
|
|
172
|
+
security_index: list[dict[str, Any]],
|
|
173
|
+
) -> dict[str, Any]:
|
|
174
|
+
"""Generate the report.json data."""
|
|
175
|
+
run_id = manifest.get("runId", "unknown")
|
|
176
|
+
completed_at = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
177
|
+
|
|
178
|
+
files_scanned = sum(1 for e in scan_index if e.get("status") == "complete")
|
|
179
|
+
prs_created = sum(1 for p in patches if p.get("status") == "created")
|
|
180
|
+
prs_failed = sum(1 for p in patches if p.get("status") == "error")
|
|
181
|
+
|
|
182
|
+
# Count verify errors (findings in all but not in verified or rejected)
|
|
183
|
+
verified_ids = {f["findingId"] for f in verified if "findingId" in f}
|
|
184
|
+
rejected_ids = {f["findingId"] for f in rejected if "findingId" in f}
|
|
185
|
+
all_ids = {f["findingId"] for f in all_findings if "findingId" in f}
|
|
186
|
+
verify_errors = len(all_ids - verified_ids - rejected_ids)
|
|
187
|
+
|
|
188
|
+
return {
|
|
189
|
+
"runId": run_id,
|
|
190
|
+
"completedAt": completed_at,
|
|
191
|
+
"scan": {
|
|
192
|
+
"filesScanned": files_scanned,
|
|
193
|
+
"totalFindings": len(all_findings),
|
|
194
|
+
},
|
|
195
|
+
"verify": {
|
|
196
|
+
"verified": len(verified),
|
|
197
|
+
"rejected": len(rejected),
|
|
198
|
+
"errors": verify_errors,
|
|
199
|
+
},
|
|
200
|
+
"patch": {
|
|
201
|
+
"prsCreated": prs_created,
|
|
202
|
+
"prsFailed": prs_failed,
|
|
203
|
+
},
|
|
204
|
+
"security": {
|
|
205
|
+
"count": len(security_index),
|
|
206
|
+
},
|
|
207
|
+
"prs": [
|
|
208
|
+
{
|
|
209
|
+
"findingId": p.get("findingId", ""),
|
|
210
|
+
"url": p.get("prUrl", ""),
|
|
211
|
+
"severity": next(
|
|
212
|
+
(f.get("severity", "") for f in verified if f.get("findingId") == p.get("findingId")),
|
|
213
|
+
"",
|
|
214
|
+
),
|
|
215
|
+
}
|
|
216
|
+
for p in patches
|
|
217
|
+
if p.get("status") == "created"
|
|
218
|
+
],
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def main():
|
|
223
|
+
parser = argparse.ArgumentParser(
|
|
224
|
+
description="Generate sweep summary and report"
|
|
225
|
+
)
|
|
226
|
+
parser.add_argument("sweep_dir", help="Path to the sweep output directory")
|
|
227
|
+
args = parser.parse_args()
|
|
228
|
+
|
|
229
|
+
sweep_dir = args.sweep_dir
|
|
230
|
+
data_dir = os.path.join(sweep_dir, "data")
|
|
231
|
+
|
|
232
|
+
# Read inputs
|
|
233
|
+
manifest = read_json(os.path.join(data_dir, "manifest.json")) or {}
|
|
234
|
+
scan_index = read_jsonl(os.path.join(data_dir, "scan-index.jsonl"))
|
|
235
|
+
all_findings = read_jsonl(os.path.join(data_dir, "all-findings.jsonl"))
|
|
236
|
+
verified = read_jsonl(os.path.join(data_dir, "verified.jsonl"))
|
|
237
|
+
rejected = read_jsonl(os.path.join(data_dir, "rejected.jsonl"))
|
|
238
|
+
patches = read_jsonl(os.path.join(data_dir, "patches.jsonl"))
|
|
239
|
+
security_index = read_jsonl(os.path.join(sweep_dir, "security", "index.jsonl"))
|
|
240
|
+
|
|
241
|
+
# Generate summary.md
|
|
242
|
+
summary_md = generate_summary_md(
|
|
243
|
+
manifest, scan_index,
|
|
244
|
+
all_findings, verified, rejected, patches, security_index,
|
|
245
|
+
)
|
|
246
|
+
summary_path = os.path.join(sweep_dir, "summary.md")
|
|
247
|
+
with open(summary_path, "w") as f:
|
|
248
|
+
f.write(summary_md)
|
|
249
|
+
|
|
250
|
+
# Generate report.json
|
|
251
|
+
report = generate_report_json(
|
|
252
|
+
manifest, scan_index, all_findings,
|
|
253
|
+
verified, rejected, patches, security_index,
|
|
254
|
+
)
|
|
255
|
+
report_path = os.path.join(data_dir, "report.json")
|
|
256
|
+
with open(report_path, "w") as f:
|
|
257
|
+
json.dump(report, f, indent=2)
|
|
258
|
+
f.write("\n")
|
|
259
|
+
|
|
260
|
+
print(json.dumps({
|
|
261
|
+
"summaryPath": summary_path,
|
|
262
|
+
"reportPath": report_path,
|
|
263
|
+
"verified": len(verified),
|
|
264
|
+
"rejected": len(rejected),
|
|
265
|
+
"prsCreated": report["patch"]["prsCreated"],
|
|
266
|
+
"securityFindings": len(security_index),
|
|
267
|
+
}))
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
if __name__ == "__main__":
|
|
271
|
+
main()
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# /// script
|
|
3
|
+
# requires-python = ">=3.9"
|
|
4
|
+
# ///
|
|
5
|
+
"""
|
|
6
|
+
Warden Sweep: Index existing PRs for deduplication.
|
|
7
|
+
|
|
8
|
+
Fetches open warden-labeled PRs via gh, identifies file overlap with
|
|
9
|
+
verified findings, and caches diffs for overlapping PRs.
|
|
10
|
+
|
|
11
|
+
Usage:
|
|
12
|
+
uv run index_prs.py <sweep-dir>
|
|
13
|
+
|
|
14
|
+
Stdout: JSON summary (for LLM consumption)
|
|
15
|
+
Stderr: Progress lines
|
|
16
|
+
|
|
17
|
+
Side effects:
|
|
18
|
+
- Creates data/existing-prs.json
|
|
19
|
+
- Creates data/pr-diffs/<number>.diff for overlapping PRs
|
|
20
|
+
"""
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import argparse
|
|
24
|
+
import json
|
|
25
|
+
import os
|
|
26
|
+
import subprocess
|
|
27
|
+
import sys
|
|
28
|
+
from typing import Any
|
|
29
|
+
|
|
30
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
31
|
+
from _utils import read_jsonl, run_cmd # noqa: E402
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def fetch_warden_prs(sweep_dir: str) -> list[dict[str, Any]]:
|
|
35
|
+
"""Fetch open PRs with the warden label."""
|
|
36
|
+
result = run_cmd(
|
|
37
|
+
[
|
|
38
|
+
"gh", "pr", "list",
|
|
39
|
+
"--label", "warden",
|
|
40
|
+
"--state", "open",
|
|
41
|
+
"--json", "number,title,url,files",
|
|
42
|
+
"--limit", "100",
|
|
43
|
+
],
|
|
44
|
+
timeout=30,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
if result.returncode != 0:
|
|
48
|
+
print(f"Warning: gh pr list failed: {result.stderr}", file=sys.stderr)
|
|
49
|
+
return []
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
prs = json.loads(result.stdout)
|
|
53
|
+
except json.JSONDecodeError:
|
|
54
|
+
print("Warning: Failed to parse gh pr list output", file=sys.stderr)
|
|
55
|
+
return []
|
|
56
|
+
|
|
57
|
+
# Save raw PR data
|
|
58
|
+
prs_path = os.path.join(sweep_dir, "data", "existing-prs.json")
|
|
59
|
+
with open(prs_path, "w") as f:
|
|
60
|
+
json.dump(prs, f, indent=2)
|
|
61
|
+
f.write("\n")
|
|
62
|
+
|
|
63
|
+
return prs
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def build_file_index(
|
|
67
|
+
prs: list[dict[str, Any]],
|
|
68
|
+
) -> dict[str, list[dict[str, Any]]]:
|
|
69
|
+
"""Build a file-to-PR lookup from the PR list."""
|
|
70
|
+
index: dict[str, list[dict[str, Any]]] = {}
|
|
71
|
+
|
|
72
|
+
for pr in prs:
|
|
73
|
+
pr_info = {
|
|
74
|
+
"number": pr.get("number"),
|
|
75
|
+
"title": pr.get("title", ""),
|
|
76
|
+
"url": pr.get("url", ""),
|
|
77
|
+
}
|
|
78
|
+
files = pr.get("files") or []
|
|
79
|
+
for file_entry in files:
|
|
80
|
+
# gh returns files as objects with "path" key
|
|
81
|
+
if isinstance(file_entry, dict):
|
|
82
|
+
path = file_entry.get("path", "")
|
|
83
|
+
else:
|
|
84
|
+
path = str(file_entry)
|
|
85
|
+
if path:
|
|
86
|
+
index.setdefault(path, []).append(pr_info)
|
|
87
|
+
|
|
88
|
+
return index
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def get_verified_files(sweep_dir: str) -> set[str]:
|
|
92
|
+
"""Get the set of files that have verified findings."""
|
|
93
|
+
verified_path = os.path.join(sweep_dir, "data", "verified.jsonl")
|
|
94
|
+
entries = read_jsonl(verified_path)
|
|
95
|
+
return {e.get("file", "") for e in entries if e.get("file")}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def fetch_pr_diff(pr_number: int, sweep_dir: str) -> bool:
|
|
99
|
+
"""Fetch and cache a PR diff. Returns True on success."""
|
|
100
|
+
diff_path = os.path.join(
|
|
101
|
+
sweep_dir, "data", "pr-diffs", f"{pr_number}.diff"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Skip if already cached
|
|
105
|
+
if os.path.exists(diff_path):
|
|
106
|
+
return True
|
|
107
|
+
|
|
108
|
+
result = run_cmd(
|
|
109
|
+
["gh", "pr", "diff", str(pr_number)],
|
|
110
|
+
timeout=30,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
if result.returncode != 0:
|
|
114
|
+
print(
|
|
115
|
+
f"Warning: Failed to fetch diff for PR #{pr_number}: {result.stderr}",
|
|
116
|
+
file=sys.stderr,
|
|
117
|
+
)
|
|
118
|
+
return False
|
|
119
|
+
|
|
120
|
+
with open(diff_path, "w") as f:
|
|
121
|
+
f.write(result.stdout)
|
|
122
|
+
|
|
123
|
+
return True
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def main() -> None:
|
|
127
|
+
parser = argparse.ArgumentParser(
|
|
128
|
+
description="Warden Sweep: Index existing PRs for dedup"
|
|
129
|
+
)
|
|
130
|
+
parser.add_argument("sweep_dir", help="Path to the sweep directory")
|
|
131
|
+
args = parser.parse_args()
|
|
132
|
+
|
|
133
|
+
sweep_dir = args.sweep_dir
|
|
134
|
+
|
|
135
|
+
if not os.path.isdir(sweep_dir):
|
|
136
|
+
print(
|
|
137
|
+
json.dumps({"error": f"Sweep directory not found: {sweep_dir}"}),
|
|
138
|
+
file=sys.stdout,
|
|
139
|
+
)
|
|
140
|
+
sys.exit(1)
|
|
141
|
+
|
|
142
|
+
# Ensure pr-diffs directory exists
|
|
143
|
+
os.makedirs(os.path.join(sweep_dir, "data", "pr-diffs"), exist_ok=True)
|
|
144
|
+
|
|
145
|
+
# Fetch open warden PRs
|
|
146
|
+
print("Fetching open warden-labeled PRs...", file=sys.stderr)
|
|
147
|
+
prs = fetch_warden_prs(sweep_dir)
|
|
148
|
+
print(f"Found {len(prs)} open warden PR(s)", file=sys.stderr)
|
|
149
|
+
|
|
150
|
+
# Build file index
|
|
151
|
+
file_index = build_file_index(prs)
|
|
152
|
+
|
|
153
|
+
# Find overlap with verified findings
|
|
154
|
+
verified_files = get_verified_files(sweep_dir)
|
|
155
|
+
overlapping_prs: set[int] = set()
|
|
156
|
+
|
|
157
|
+
for vfile in verified_files:
|
|
158
|
+
if vfile in file_index:
|
|
159
|
+
for pr_info in file_index[vfile]:
|
|
160
|
+
overlapping_prs.add(pr_info["number"])
|
|
161
|
+
|
|
162
|
+
# Fetch diffs for overlapping PRs
|
|
163
|
+
diffs_cached = 0
|
|
164
|
+
for pr_number in sorted(overlapping_prs):
|
|
165
|
+
print(f"Caching diff for PR #{pr_number}...", file=sys.stderr)
|
|
166
|
+
if fetch_pr_diff(pr_number, sweep_dir):
|
|
167
|
+
diffs_cached += 1
|
|
168
|
+
|
|
169
|
+
# Build output file index (only for files that have verified findings)
|
|
170
|
+
output_file_index: dict[str, list[dict[str, Any]]] = {}
|
|
171
|
+
for vfile in verified_files:
|
|
172
|
+
if vfile in file_index:
|
|
173
|
+
output_file_index[vfile] = file_index[vfile]
|
|
174
|
+
|
|
175
|
+
# Output summary
|
|
176
|
+
output = {
|
|
177
|
+
"totalPRs": len(prs),
|
|
178
|
+
"overlappingPRs": len(overlapping_prs),
|
|
179
|
+
"fileIndex": output_file_index,
|
|
180
|
+
"diffsCached": diffs_cached,
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
print(json.dumps(output, indent=2))
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
if __name__ == "__main__":
|
|
187
|
+
main()
|