@sentry/warden 0.14.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/args.d.ts +1 -0
- package/dist/cli/args.d.ts.map +1 -1
- package/dist/cli/args.js +17 -2
- package/dist/cli/args.js.map +1 -1
- package/dist/cli/commands/add.d.ts.map +1 -1
- package/dist/cli/commands/add.js +25 -33
- package/dist/cli/commands/add.js.map +1 -1
- package/dist/cli/commands/logs.d.ts.map +1 -1
- package/dist/cli/commands/logs.js +4 -11
- package/dist/cli/commands/logs.js.map +1 -1
- package/dist/cli/commands/setup-app.d.ts.map +1 -1
- package/dist/cli/commands/setup-app.js +19 -15
- package/dist/cli/commands/setup-app.js.map +1 -1
- package/dist/cli/context.d.ts +2 -0
- package/dist/cli/context.d.ts.map +1 -1
- package/dist/cli/context.js +8 -2
- package/dist/cli/context.js.map +1 -1
- package/dist/cli/files.d.ts.map +1 -1
- package/dist/cli/files.js +27 -30
- package/dist/cli/files.js.map +1 -1
- package/dist/cli/git.d.ts +8 -3
- package/dist/cli/git.d.ts.map +1 -1
- package/dist/cli/git.js +24 -13
- package/dist/cli/git.js.map +1 -1
- package/dist/cli/index.js +10 -0
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/input.d.ts +7 -0
- package/dist/cli/input.d.ts.map +1 -1
- package/dist/cli/input.js +13 -2
- package/dist/cli/input.js.map +1 -1
- package/dist/cli/main.d.ts.map +1 -1
- package/dist/cli/main.js +62 -19
- package/dist/cli/main.js.map +1 -1
- package/dist/cli/output/tasks.d.ts.map +1 -1
- package/dist/cli/output/tasks.js +22 -3
- package/dist/cli/output/tasks.js.map +1 -1
- package/dist/config/writer.d.ts.map +1 -1
- package/dist/config/writer.js +18 -0
- package/dist/config/writer.js.map +1 -1
- package/dist/diff/apply.d.ts +6 -0
- package/dist/diff/apply.d.ts.map +1 -0
- package/dist/diff/apply.js +44 -0
- package/dist/diff/apply.js.map +1 -0
- package/dist/diff/index.d.ts +1 -0
- package/dist/diff/index.d.ts.map +1 -1
- package/dist/diff/index.js +1 -0
- package/dist/diff/index.js.map +1 -1
- package/dist/evals/index.js +1 -1
- package/dist/evals/index.js.map +1 -1
- package/dist/output/github-issues.d.ts.map +1 -1
- package/dist/output/github-issues.js +15 -57
- package/dist/output/github-issues.js.map +1 -1
- package/dist/sdk/analyze.d.ts.map +1 -1
- package/dist/sdk/analyze.js +24 -5
- package/dist/sdk/analyze.js.map +1 -1
- package/dist/sdk/auth.d.ts.map +1 -1
- package/dist/sdk/auth.js +2 -2
- package/dist/sdk/auth.js.map +1 -1
- package/dist/sdk/errors.d.ts +3 -1
- package/dist/sdk/errors.d.ts.map +1 -1
- package/dist/sdk/errors.js +2 -2
- package/dist/sdk/errors.js.map +1 -1
- package/dist/sdk/fix-quality.d.ts +20 -0
- package/dist/sdk/fix-quality.d.ts.map +1 -0
- package/dist/sdk/fix-quality.js +167 -0
- package/dist/sdk/fix-quality.js.map +1 -0
- package/dist/sdk/prepare.d.ts.map +1 -1
- package/dist/sdk/prepare.js +5 -0
- package/dist/sdk/prepare.js.map +1 -1
- package/dist/sentry.d.ts +5 -3
- package/dist/sentry.d.ts.map +1 -1
- package/dist/sentry.js +37 -11
- package/dist/sentry.js.map +1 -1
- package/dist/skills/remote.js +1 -1
- package/dist/skills/remote.js.map +1 -1
- package/dist/utils/exec.d.ts +4 -1
- package/dist/utils/exec.d.ts.map +1 -1
- package/dist/utils/exec.js +6 -4
- package/dist/utils/exec.js.map +1 -1
- package/package.json +1 -1
- package/skills/warden-sweep/SKILL.md +67 -74
- package/skills/warden-sweep/references/patch-prompt.md +72 -0
- package/skills/warden-sweep/references/verify-prompt.md +25 -0
- package/skills/warden-sweep/scripts/_utils.py +62 -0
- package/skills/warden-sweep/scripts/create_issue.py +189 -0
- package/skills/warden-sweep/scripts/find_reviewers.py +16 -17
- package/skills/warden-sweep/scripts/generate_report.py +20 -25
- package/skills/warden-sweep/scripts/organize.py +128 -21
- package/skills/warden-sweep/scripts/scan.py +82 -130
|
@@ -23,30 +23,7 @@ from datetime import datetime, timezone
|
|
|
23
23
|
from typing import Any
|
|
24
24
|
|
|
25
25
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
26
|
-
from _utils import read_jsonl # noqa: E402
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def read_json(path: str) -> dict[str, Any] | None:
|
|
30
|
-
"""Read a JSON file and return parsed object."""
|
|
31
|
-
if not os.path.exists(path):
|
|
32
|
-
return None
|
|
33
|
-
try:
|
|
34
|
-
with open(path) as f:
|
|
35
|
-
return json.load(f)
|
|
36
|
-
except (json.JSONDecodeError, OSError):
|
|
37
|
-
return None
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def severity_badge(severity: str) -> str:
|
|
41
|
-
"""Return a markdown-friendly severity indicator."""
|
|
42
|
-
badges = {
|
|
43
|
-
"critical": "**CRITICAL**",
|
|
44
|
-
"high": "**HIGH**",
|
|
45
|
-
"medium": "MEDIUM",
|
|
46
|
-
"low": "LOW",
|
|
47
|
-
"info": "info",
|
|
48
|
-
}
|
|
49
|
-
return badges.get(severity, severity)
|
|
26
|
+
from _utils import read_json, read_jsonl, severity_badge # noqa: E402
|
|
50
27
|
|
|
51
28
|
|
|
52
29
|
def generate_summary_md(
|
|
@@ -65,7 +42,14 @@ def generate_summary_md(
|
|
|
65
42
|
completed_at = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
66
43
|
|
|
67
44
|
files_scanned = sum(1 for e in scan_index if e.get("status") == "complete")
|
|
68
|
-
|
|
45
|
+
files_timed_out = sum(
|
|
46
|
+
1 for e in scan_index
|
|
47
|
+
if e.get("status") == "error" and e.get("error") == "timeout"
|
|
48
|
+
)
|
|
49
|
+
files_errored = sum(
|
|
50
|
+
1 for e in scan_index
|
|
51
|
+
if e.get("status") == "error" and e.get("error") != "timeout"
|
|
52
|
+
)
|
|
69
53
|
|
|
70
54
|
prs_created = sum(1 for p in patches if p.get("status") == "created")
|
|
71
55
|
prs_failed = sum(1 for p in patches if p.get("status") == "error")
|
|
@@ -88,6 +72,7 @@ def generate_summary_md(
|
|
|
88
72
|
f"| Metric | Count |",
|
|
89
73
|
f"|--------|-------|",
|
|
90
74
|
f"| Files scanned | {files_scanned} |",
|
|
75
|
+
f"| Files timed out | {files_timed_out} |",
|
|
91
76
|
f"| Files errored | {files_errored} |",
|
|
92
77
|
f"| Total findings | {len(all_findings)} |",
|
|
93
78
|
f"| Verified | {len(verified)} |",
|
|
@@ -176,6 +161,14 @@ def generate_report_json(
|
|
|
176
161
|
completed_at = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
177
162
|
|
|
178
163
|
files_scanned = sum(1 for e in scan_index if e.get("status") == "complete")
|
|
164
|
+
files_timed_out = sum(
|
|
165
|
+
1 for e in scan_index
|
|
166
|
+
if e.get("status") == "error" and e.get("error") == "timeout"
|
|
167
|
+
)
|
|
168
|
+
files_errored = sum(
|
|
169
|
+
1 for e in scan_index
|
|
170
|
+
if e.get("status") == "error" and e.get("error") != "timeout"
|
|
171
|
+
)
|
|
179
172
|
prs_created = sum(1 for p in patches if p.get("status") == "created")
|
|
180
173
|
prs_failed = sum(1 for p in patches if p.get("status") == "error")
|
|
181
174
|
|
|
@@ -190,6 +183,8 @@ def generate_report_json(
|
|
|
190
183
|
"completedAt": completed_at,
|
|
191
184
|
"scan": {
|
|
192
185
|
"filesScanned": files_scanned,
|
|
186
|
+
"filesTimedOut": files_timed_out,
|
|
187
|
+
"filesErrored": files_errored,
|
|
193
188
|
"totalFindings": len(all_findings),
|
|
194
189
|
},
|
|
195
190
|
"verify": {
|
|
@@ -36,7 +36,7 @@ from datetime import datetime, timezone
|
|
|
36
36
|
from typing import Any
|
|
37
37
|
|
|
38
38
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
39
|
-
from _utils import read_jsonl # noqa: E402
|
|
39
|
+
from _utils import ensure_github_label, pr_number_from_url, read_json, read_jsonl, write_json # noqa: E402
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
SECURITY_SKILL_PATTERNS = [
|
|
@@ -54,6 +54,15 @@ def is_security_skill(skill_name: str) -> bool:
|
|
|
54
54
|
return name_lower in SECURITY_SKILL_PATTERNS
|
|
55
55
|
|
|
56
56
|
|
|
57
|
+
def severity_label(severity: str) -> str:
|
|
58
|
+
"""Format a severity string for inline display in issue comments."""
|
|
59
|
+
if not severity:
|
|
60
|
+
return ""
|
|
61
|
+
if severity in ("critical", "high"):
|
|
62
|
+
return f" (**{severity.upper()}**)"
|
|
63
|
+
return f" ({severity.upper()})"
|
|
64
|
+
|
|
65
|
+
|
|
57
66
|
def identify_security_findings(
|
|
58
67
|
sweep_dir: str,
|
|
59
68
|
) -> list[dict[str, Any]]:
|
|
@@ -101,18 +110,7 @@ def copy_security_findings(
|
|
|
101
110
|
|
|
102
111
|
def create_security_label() -> None:
|
|
103
112
|
"""Create the security label on GitHub (idempotent)."""
|
|
104
|
-
|
|
105
|
-
subprocess.run(
|
|
106
|
-
[
|
|
107
|
-
"gh", "label", "create", "security",
|
|
108
|
-
"--color", "D93F0B",
|
|
109
|
-
"--description", "Security-related changes",
|
|
110
|
-
],
|
|
111
|
-
capture_output=True,
|
|
112
|
-
timeout=15,
|
|
113
|
-
)
|
|
114
|
-
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
115
|
-
pass
|
|
113
|
+
ensure_github_label("security", "D93F0B", "Security-related changes")
|
|
116
114
|
|
|
117
115
|
|
|
118
116
|
def label_security_prs(
|
|
@@ -156,6 +154,115 @@ def label_security_prs(
|
|
|
156
154
|
return labeled
|
|
157
155
|
|
|
158
156
|
|
|
157
|
+
def _has_sweep_complete_comment(issue_url: str) -> bool:
|
|
158
|
+
"""Check if the tracking issue already has a 'Sweep Complete' comment."""
|
|
159
|
+
try:
|
|
160
|
+
result = subprocess.run(
|
|
161
|
+
["gh", "issue", "view", issue_url, "--json", "comments", "--jq",
|
|
162
|
+
'.comments[].body | select(startswith("## Sweep Complete"))'],
|
|
163
|
+
capture_output=True,
|
|
164
|
+
text=True,
|
|
165
|
+
timeout=15,
|
|
166
|
+
)
|
|
167
|
+
return result.returncode == 0 and result.stdout.strip() != ""
|
|
168
|
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
169
|
+
return False
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def update_tracking_issue(sweep_dir: str) -> None:
|
|
173
|
+
"""Post a comment on the tracking issue with final PR results. Idempotent."""
|
|
174
|
+
manifest = read_json(os.path.join(sweep_dir, "data", "manifest.json"))
|
|
175
|
+
if not manifest:
|
|
176
|
+
return
|
|
177
|
+
|
|
178
|
+
issue_url = manifest.get("issueUrl")
|
|
179
|
+
if not issue_url:
|
|
180
|
+
return
|
|
181
|
+
|
|
182
|
+
if _has_sweep_complete_comment(issue_url):
|
|
183
|
+
print("Tracking issue already has completion comment, skipping.", file=sys.stderr)
|
|
184
|
+
return
|
|
185
|
+
|
|
186
|
+
patches = read_jsonl(os.path.join(sweep_dir, "data", "patches.jsonl"))
|
|
187
|
+
verified = read_jsonl(os.path.join(sweep_dir, "data", "verified.jsonl"))
|
|
188
|
+
security_index = read_jsonl(os.path.join(sweep_dir, "security", "index.jsonl"))
|
|
189
|
+
|
|
190
|
+
# Build lookup from findingId to verified finding
|
|
191
|
+
verified_lookup: dict[str, dict[str, Any]] = {}
|
|
192
|
+
for f in verified:
|
|
193
|
+
fid = f.get("findingId", "")
|
|
194
|
+
if fid:
|
|
195
|
+
verified_lookup[fid] = f
|
|
196
|
+
|
|
197
|
+
security_ids = {f.get("findingId", "") for f in security_index}
|
|
198
|
+
|
|
199
|
+
created = sum(1 for p in patches if p.get("status") == "created")
|
|
200
|
+
existing = sum(1 for p in patches if p.get("status") == "existing")
|
|
201
|
+
failed = sum(1 for p in patches if p.get("status") == "error")
|
|
202
|
+
|
|
203
|
+
lines = [
|
|
204
|
+
"## Sweep Complete",
|
|
205
|
+
"",
|
|
206
|
+
"| PRs Created | PRs Skipped (existing) | PRs Failed | Security Findings |",
|
|
207
|
+
"|-------------|------------------------|------------|-------------------|",
|
|
208
|
+
f"| {created} | {existing} | {failed} | {len(security_index)} |",
|
|
209
|
+
"",
|
|
210
|
+
]
|
|
211
|
+
|
|
212
|
+
# PR task list
|
|
213
|
+
pr_entries = [p for p in patches if p.get("status") == "created" and p.get("prUrl")]
|
|
214
|
+
if pr_entries:
|
|
215
|
+
lines.append("### PRs")
|
|
216
|
+
lines.append("")
|
|
217
|
+
for p in pr_entries:
|
|
218
|
+
fid = p.get("findingId", "")
|
|
219
|
+
pr_number = pr_number_from_url(p.get("prUrl", ""))
|
|
220
|
+
finding = verified_lookup.get(fid, {})
|
|
221
|
+
title = finding.get("title", fid)
|
|
222
|
+
sev = severity_label(finding.get("severity", ""))
|
|
223
|
+
lines.append(f"- [ ] #{pr_number} - fix: {title}{sev}")
|
|
224
|
+
lines.append("")
|
|
225
|
+
|
|
226
|
+
# Security findings section
|
|
227
|
+
security_prs = [
|
|
228
|
+
p for p in patches
|
|
229
|
+
if p.get("status") == "created"
|
|
230
|
+
and p.get("findingId", "") in security_ids
|
|
231
|
+
and p.get("prUrl")
|
|
232
|
+
]
|
|
233
|
+
if security_prs:
|
|
234
|
+
lines.append("### Security Findings")
|
|
235
|
+
lines.append("")
|
|
236
|
+
for p in security_prs:
|
|
237
|
+
fid = p.get("findingId", "")
|
|
238
|
+
pr_number = pr_number_from_url(p.get("prUrl", ""))
|
|
239
|
+
finding = verified_lookup.get(fid, {})
|
|
240
|
+
title = finding.get("title", fid)
|
|
241
|
+
sev = severity_label(finding.get("severity", ""))
|
|
242
|
+
lines.append(f"- #{pr_number} - {title}{sev}")
|
|
243
|
+
lines.append("")
|
|
244
|
+
|
|
245
|
+
body = "\n".join(lines)
|
|
246
|
+
|
|
247
|
+
try:
|
|
248
|
+
result = subprocess.run(
|
|
249
|
+
["gh", "issue", "comment", issue_url, "--body", body],
|
|
250
|
+
capture_output=True,
|
|
251
|
+
text=True,
|
|
252
|
+
timeout=30,
|
|
253
|
+
)
|
|
254
|
+
if result.returncode != 0:
|
|
255
|
+
print(
|
|
256
|
+
f"Warning: Failed to comment on tracking issue: {result.stderr.strip()}",
|
|
257
|
+
file=sys.stderr,
|
|
258
|
+
)
|
|
259
|
+
except (subprocess.TimeoutExpired, FileNotFoundError) as e:
|
|
260
|
+
print(
|
|
261
|
+
f"Warning: Failed to comment on tracking issue: {e}",
|
|
262
|
+
file=sys.stderr,
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
|
|
159
266
|
def update_findings_with_pr_links(sweep_dir: str) -> None:
|
|
160
267
|
"""Append PR links to findings/*.md for created PRs."""
|
|
161
268
|
patches = read_jsonl(os.path.join(sweep_dir, "data", "patches.jsonl"))
|
|
@@ -218,20 +325,16 @@ def run_generate_report(sweep_dir: str, script_dir: str) -> None:
|
|
|
218
325
|
def update_manifest(sweep_dir: str) -> None:
|
|
219
326
|
"""Mark organize phase complete and add completedAt timestamp."""
|
|
220
327
|
manifest_path = os.path.join(sweep_dir, "data", "manifest.json")
|
|
221
|
-
|
|
328
|
+
manifest = read_json(manifest_path)
|
|
329
|
+
if not manifest:
|
|
222
330
|
return
|
|
223
331
|
|
|
224
|
-
with open(manifest_path) as f:
|
|
225
|
-
manifest = json.load(f)
|
|
226
|
-
|
|
227
332
|
manifest.setdefault("phases", {})["organize"] = "complete"
|
|
228
333
|
manifest["completedAt"] = datetime.now(timezone.utc).strftime(
|
|
229
334
|
"%Y-%m-%dT%H:%M:%SZ"
|
|
230
335
|
)
|
|
231
336
|
|
|
232
|
-
|
|
233
|
-
json.dump(manifest, f, indent=2)
|
|
234
|
-
f.write("\n")
|
|
337
|
+
write_json(manifest_path, manifest)
|
|
235
338
|
|
|
236
339
|
|
|
237
340
|
def main() -> None:
|
|
@@ -279,7 +382,11 @@ def main() -> None:
|
|
|
279
382
|
print("Generating summary and report...", file=sys.stderr)
|
|
280
383
|
run_generate_report(sweep_dir, script_dir)
|
|
281
384
|
|
|
282
|
-
# Step 6: Update
|
|
385
|
+
# Step 6: Update tracking issue with PR results
|
|
386
|
+
print("Updating tracking issue...", file=sys.stderr)
|
|
387
|
+
update_tracking_issue(sweep_dir)
|
|
388
|
+
|
|
389
|
+
# Step 7: Update manifest
|
|
283
390
|
update_manifest(sweep_dir)
|
|
284
391
|
|
|
285
392
|
# Gather stats for output
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
# /// script
|
|
3
3
|
# requires-python = ">=3.9"
|
|
4
|
+
# dependencies = ["tomli; python_version < '3.11'"]
|
|
4
5
|
# ///
|
|
5
6
|
"""
|
|
6
7
|
Warden Sweep: Scan phase.
|
|
@@ -28,12 +29,19 @@ import os
|
|
|
28
29
|
import secrets
|
|
29
30
|
import subprocess
|
|
30
31
|
import sys
|
|
32
|
+
import threading
|
|
33
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
31
34
|
from datetime import datetime, timezone
|
|
32
35
|
from pathlib import Path
|
|
33
36
|
from typing import Any
|
|
34
37
|
|
|
38
|
+
try:
|
|
39
|
+
import tomllib
|
|
40
|
+
except ModuleNotFoundError:
|
|
41
|
+
import tomli as tomllib # type: ignore[no-redefine]
|
|
42
|
+
|
|
35
43
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
36
|
-
from _utils import run_cmd # noqa: E402
|
|
44
|
+
from _utils import ensure_github_label, run_cmd # noqa: E402
|
|
37
45
|
|
|
38
46
|
|
|
39
47
|
SUPPORTED_EXTENSIONS = {
|
|
@@ -66,22 +74,6 @@ def create_sweep_dir(sweep_dir: str) -> None:
|
|
|
66
74
|
os.makedirs(os.path.join(sweep_dir, subdir), exist_ok=True)
|
|
67
75
|
|
|
68
76
|
|
|
69
|
-
def create_warden_label() -> None:
|
|
70
|
-
"""Create the warden label on GitHub (idempotent)."""
|
|
71
|
-
try:
|
|
72
|
-
subprocess.run(
|
|
73
|
-
[
|
|
74
|
-
"gh", "label", "create", "warden",
|
|
75
|
-
"--color", "5319E7",
|
|
76
|
-
"--description", "Automated fix from Warden Sweep",
|
|
77
|
-
],
|
|
78
|
-
capture_output=True,
|
|
79
|
-
timeout=15,
|
|
80
|
-
)
|
|
81
|
-
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
82
|
-
pass
|
|
83
|
-
|
|
84
|
-
|
|
85
77
|
def write_manifest(sweep_dir: str, run_id: str) -> None:
|
|
86
78
|
"""Write the initial manifest.json."""
|
|
87
79
|
repo = "unknown"
|
|
@@ -101,6 +93,7 @@ def write_manifest(sweep_dir: str, run_id: str) -> None:
|
|
|
101
93
|
"phases": {
|
|
102
94
|
"scan": "pending",
|
|
103
95
|
"verify": "pending",
|
|
96
|
+
"issue": "pending",
|
|
104
97
|
"patch": "pending",
|
|
105
98
|
"organize": "pending",
|
|
106
99
|
},
|
|
@@ -112,84 +105,16 @@ def write_manifest(sweep_dir: str, run_id: str) -> None:
|
|
|
112
105
|
f.write("\n")
|
|
113
106
|
|
|
114
107
|
|
|
115
|
-
def _strip_toml_inline_comment(line: str) -> str:
|
|
116
|
-
"""Strip inline TOML comments (# outside of quoted strings)."""
|
|
117
|
-
in_quote = False
|
|
118
|
-
quote_char = ""
|
|
119
|
-
for i, ch in enumerate(line):
|
|
120
|
-
if in_quote:
|
|
121
|
-
if ch == quote_char:
|
|
122
|
-
in_quote = False
|
|
123
|
-
elif ch in ('"', "'"):
|
|
124
|
-
in_quote = True
|
|
125
|
-
quote_char = ch
|
|
126
|
-
elif ch == "#":
|
|
127
|
-
return line[:i].rstrip()
|
|
128
|
-
return line
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
def _toml_array_to_json(value: str) -> str:
|
|
132
|
-
"""Convert a TOML array string to JSON-compatible format.
|
|
133
|
-
|
|
134
|
-
Handles TOML single-quoted strings and trailing commas.
|
|
135
|
-
Inline comments should be stripped before calling this function.
|
|
136
|
-
"""
|
|
137
|
-
import re
|
|
138
|
-
# Replace single-quoted strings with double-quoted (TOML literal strings)
|
|
139
|
-
value = re.sub(r"'([^']*)'", r'"\1"', value)
|
|
140
|
-
# Strip trailing comma before closing bracket
|
|
141
|
-
value = re.sub(r",\s*]", "]", value)
|
|
142
|
-
return value
|
|
143
|
-
|
|
144
|
-
|
|
145
108
|
def load_ignore_paths() -> list[str]:
|
|
146
109
|
"""Load ignorePaths from warden.toml defaults if present."""
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
toml_path = "warden.toml"
|
|
150
|
-
if not os.path.exists(toml_path):
|
|
151
|
-
return []
|
|
152
|
-
|
|
153
|
-
with open(toml_path) as f:
|
|
154
|
-
content = f.read()
|
|
155
|
-
|
|
156
|
-
# Simple TOML parsing for ignorePaths in [defaults] section
|
|
157
|
-
in_defaults = False
|
|
158
|
-
collecting_value = False
|
|
159
|
-
value_parts: list[str] = []
|
|
160
|
-
for line in content.splitlines():
|
|
161
|
-
stripped = line.strip()
|
|
162
|
-
if collecting_value:
|
|
163
|
-
# Skip TOML comment lines inside multiline arrays
|
|
164
|
-
if stripped.startswith("#"):
|
|
165
|
-
continue
|
|
166
|
-
# Strip inline comments before accumulating
|
|
167
|
-
stripped = _strip_toml_inline_comment(stripped)
|
|
168
|
-
value_parts.append(stripped)
|
|
169
|
-
combined = "".join(value_parts)
|
|
170
|
-
if combined.count("[") <= combined.count("]"):
|
|
171
|
-
try:
|
|
172
|
-
return json.loads(_toml_array_to_json(combined))
|
|
173
|
-
except json.JSONDecodeError:
|
|
174
|
-
return []
|
|
175
|
-
continue
|
|
176
|
-
if stripped == "[defaults]":
|
|
177
|
-
in_defaults = True
|
|
178
|
-
continue
|
|
179
|
-
if stripped.startswith("[") and stripped != "[defaults]":
|
|
180
|
-
in_defaults = False
|
|
181
|
-
continue
|
|
182
|
-
if in_defaults and stripped.startswith("ignorePaths"):
|
|
183
|
-
_, _, value = stripped.partition("=")
|
|
184
|
-
value = _strip_toml_inline_comment(value.strip())
|
|
185
|
-
if not value:
|
|
186
|
-
continue
|
|
187
|
-
try:
|
|
188
|
-
return json.loads(_toml_array_to_json(value))
|
|
189
|
-
except json.JSONDecodeError:
|
|
190
|
-
value_parts = [value]
|
|
191
|
-
collecting_value = True
|
|
110
|
+
toml_path = "warden.toml"
|
|
111
|
+
if not os.path.exists(toml_path):
|
|
192
112
|
return []
|
|
113
|
+
try:
|
|
114
|
+
with open(toml_path, "rb") as f:
|
|
115
|
+
config = tomllib.load(f)
|
|
116
|
+
paths = config.get("defaults", {}).get("ignorePaths", [])
|
|
117
|
+
return paths if isinstance(paths, list) else []
|
|
193
118
|
except Exception:
|
|
194
119
|
return []
|
|
195
120
|
|
|
@@ -246,7 +171,7 @@ def enumerate_files(
|
|
|
246
171
|
) -> list[str]:
|
|
247
172
|
"""Enumerate files to scan using git ls-files, filtered by extension."""
|
|
248
173
|
if specific_files:
|
|
249
|
-
return specific_files
|
|
174
|
+
return [f for f in specific_files if not should_ignore(f, ignore_patterns)]
|
|
250
175
|
|
|
251
176
|
result = run_cmd(["git", "ls-files"])
|
|
252
177
|
if result.returncode != 0:
|
|
@@ -301,19 +226,22 @@ def log_path_for_file(sweep_dir: str, file_path: str) -> str:
|
|
|
301
226
|
|
|
302
227
|
|
|
303
228
|
def scan_file(
|
|
304
|
-
file_path: str, log_file: str, timeout: int =
|
|
229
|
+
file_path: str, log_file: str, timeout: int = 600, skill: str | None = None
|
|
305
230
|
) -> dict[str, Any]:
|
|
306
231
|
"""Run warden on a single file. Returns scan-index entry."""
|
|
307
232
|
try:
|
|
233
|
+
cmd = [
|
|
234
|
+
"warden", file_path,
|
|
235
|
+
"--json", "--log",
|
|
236
|
+
"--min-confidence", "off",
|
|
237
|
+
"--fail-on", "off",
|
|
238
|
+
"--quiet",
|
|
239
|
+
"--output", log_file,
|
|
240
|
+
]
|
|
241
|
+
if skill:
|
|
242
|
+
cmd.extend(["--skill", skill])
|
|
308
243
|
result = subprocess.run(
|
|
309
|
-
|
|
310
|
-
"warden", file_path,
|
|
311
|
-
"--json", "--log",
|
|
312
|
-
"--min-confidence", "off",
|
|
313
|
-
"--fail-on", "off",
|
|
314
|
-
"--quiet",
|
|
315
|
-
"--output", log_file,
|
|
316
|
-
],
|
|
244
|
+
cmd,
|
|
317
245
|
capture_output=True,
|
|
318
246
|
text=True,
|
|
319
247
|
timeout=timeout,
|
|
@@ -350,9 +278,9 @@ def scan_file(
|
|
|
350
278
|
record = json.loads(line)
|
|
351
279
|
if record.get("type") == "summary":
|
|
352
280
|
continue
|
|
353
|
-
|
|
354
|
-
if
|
|
355
|
-
skills.add(
|
|
281
|
+
record_skill = record.get("skill", "")
|
|
282
|
+
if record_skill:
|
|
283
|
+
skills.add(record_skill)
|
|
356
284
|
findings = record.get("findings", [])
|
|
357
285
|
finding_count += len(findings)
|
|
358
286
|
except json.JSONDecodeError:
|
|
@@ -482,6 +410,10 @@ def main() -> None:
|
|
|
482
410
|
"--sweep-dir",
|
|
483
411
|
help="Resume into an existing sweep directory",
|
|
484
412
|
)
|
|
413
|
+
parser.add_argument(
|
|
414
|
+
"--skill",
|
|
415
|
+
help="Run only this skill (passed through to warden --skill)",
|
|
416
|
+
)
|
|
485
417
|
args = parser.parse_args()
|
|
486
418
|
|
|
487
419
|
# Check dependencies
|
|
@@ -510,7 +442,7 @@ def main() -> None:
|
|
|
510
442
|
if not os.path.exists(manifest_path):
|
|
511
443
|
write_manifest(sweep_dir, run_id)
|
|
512
444
|
|
|
513
|
-
|
|
445
|
+
ensure_github_label("warden", "5319E7", "Automated fix from Warden Sweep")
|
|
514
446
|
|
|
515
447
|
# Enumerate files
|
|
516
448
|
ignore_patterns = load_ignore_paths()
|
|
@@ -542,30 +474,41 @@ def main() -> None:
|
|
|
542
474
|
file=sys.stderr,
|
|
543
475
|
)
|
|
544
476
|
|
|
545
|
-
# Scan remaining files
|
|
477
|
+
# Scan remaining files concurrently
|
|
546
478
|
scanned = already_done
|
|
479
|
+
index_lock = threading.Lock()
|
|
547
480
|
|
|
548
|
-
|
|
481
|
+
def _scan_and_record(file_path: str) -> dict[str, Any]:
|
|
549
482
|
log_file = log_path_for_file(sweep_dir, file_path)
|
|
550
|
-
entry = scan_file(file_path, log_file)
|
|
483
|
+
entry = scan_file(file_path, log_file, skill=args.skill)
|
|
551
484
|
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
485
|
+
with index_lock:
|
|
486
|
+
with open(scan_index_path, "a") as f:
|
|
487
|
+
f.write(json.dumps(entry) + "\n")
|
|
555
488
|
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
489
|
+
return entry
|
|
490
|
+
|
|
491
|
+
with ThreadPoolExecutor(max_workers=4) as pool:
|
|
492
|
+
futures = {
|
|
493
|
+
pool.submit(_scan_and_record, fp): fp for fp in remaining
|
|
494
|
+
}
|
|
495
|
+
for future in as_completed(futures):
|
|
496
|
+
entry = future.result()
|
|
497
|
+
scanned += 1
|
|
498
|
+
file_path = entry.get("file", futures[future])
|
|
499
|
+
if entry["status"] == "error":
|
|
500
|
+
label = "TIMEOUT" if entry.get("error") == "timeout" else "ERROR"
|
|
501
|
+
print(
|
|
502
|
+
f"[{scanned}/{total}] {file_path} ({label}: {entry.get('error', 'unknown')})",
|
|
503
|
+
file=sys.stderr,
|
|
504
|
+
)
|
|
505
|
+
else:
|
|
506
|
+
count = entry.get("findingCount", 0)
|
|
507
|
+
suffix = f"({count} finding{'s' if count != 1 else ''})" if count > 0 else ""
|
|
508
|
+
print(
|
|
509
|
+
f"[{scanned}/{total}] {file_path} {suffix}".rstrip(),
|
|
510
|
+
file=sys.stderr,
|
|
511
|
+
)
|
|
569
512
|
|
|
570
513
|
# Extract findings
|
|
571
514
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
@@ -578,6 +521,7 @@ def main() -> None:
|
|
|
578
521
|
# so that resumed scans don't include stale errors for files that later succeeded.
|
|
579
522
|
# Scope to current file list so counts stay consistent with `scanned`.
|
|
580
523
|
files_set = set(files)
|
|
524
|
+
timeouts: list[dict[str, Any]] = []
|
|
581
525
|
errors: list[dict[str, Any]] = []
|
|
582
526
|
if os.path.exists(scan_index_path):
|
|
583
527
|
last_status: dict[str, dict[str, Any]] = {}
|
|
@@ -595,36 +539,44 @@ def main() -> None:
|
|
|
595
539
|
continue
|
|
596
540
|
for entry in last_status.values():
|
|
597
541
|
if entry.get("status") == "error":
|
|
598
|
-
|
|
542
|
+
item = {
|
|
599
543
|
"file": entry.get("file", ""),
|
|
600
544
|
"error": entry.get("error", "unknown"),
|
|
601
545
|
"exitCode": entry.get("exitCode", -1),
|
|
602
|
-
}
|
|
546
|
+
}
|
|
547
|
+
if entry.get("error") == "timeout":
|
|
548
|
+
timeouts.append(item)
|
|
549
|
+
else:
|
|
550
|
+
errors.append(item)
|
|
551
|
+
|
|
552
|
+
total_failed = len(timeouts) + len(errors)
|
|
603
553
|
|
|
604
554
|
# Output JSON summary
|
|
605
555
|
output = {
|
|
606
556
|
"runId": run_id,
|
|
607
557
|
"sweepDir": sweep_dir,
|
|
608
|
-
"filesScanned": scanned -
|
|
558
|
+
"filesScanned": scanned - total_failed,
|
|
559
|
+
"filesTimedOut": len(timeouts),
|
|
609
560
|
"filesErrored": len(errors),
|
|
610
561
|
"totalFindings": len(findings),
|
|
611
562
|
"bySeverity": by_severity,
|
|
612
563
|
"findingsPath": os.path.join(sweep_dir, "data", "all-findings.jsonl"),
|
|
613
564
|
"findings": findings,
|
|
565
|
+
"timeouts": timeouts,
|
|
614
566
|
"errors": errors,
|
|
615
567
|
}
|
|
616
568
|
|
|
617
569
|
print(json.dumps(output, indent=2))
|
|
618
570
|
|
|
619
571
|
# Fatal only if every file across all runs errored (no successful scans at all)
|
|
620
|
-
successful = scanned -
|
|
572
|
+
successful = scanned - total_failed
|
|
621
573
|
if successful == 0 and scanned > 0:
|
|
622
574
|
update_manifest_phase(sweep_dir, "scan", "error")
|
|
623
575
|
sys.exit(1)
|
|
624
576
|
|
|
625
577
|
update_manifest_phase(sweep_dir, "scan", "complete")
|
|
626
578
|
|
|
627
|
-
if
|
|
579
|
+
if total_failed > 0:
|
|
628
580
|
sys.exit(2)
|
|
629
581
|
|
|
630
582
|
|