@sentry/warden 0.14.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/dist/cli/args.d.ts +1 -0
  2. package/dist/cli/args.d.ts.map +1 -1
  3. package/dist/cli/args.js +17 -2
  4. package/dist/cli/args.js.map +1 -1
  5. package/dist/cli/commands/add.d.ts.map +1 -1
  6. package/dist/cli/commands/add.js +25 -33
  7. package/dist/cli/commands/add.js.map +1 -1
  8. package/dist/cli/commands/logs.d.ts.map +1 -1
  9. package/dist/cli/commands/logs.js +4 -11
  10. package/dist/cli/commands/logs.js.map +1 -1
  11. package/dist/cli/commands/setup-app.d.ts.map +1 -1
  12. package/dist/cli/commands/setup-app.js +19 -15
  13. package/dist/cli/commands/setup-app.js.map +1 -1
  14. package/dist/cli/context.d.ts +2 -0
  15. package/dist/cli/context.d.ts.map +1 -1
  16. package/dist/cli/context.js +8 -2
  17. package/dist/cli/context.js.map +1 -1
  18. package/dist/cli/files.d.ts.map +1 -1
  19. package/dist/cli/files.js +27 -30
  20. package/dist/cli/files.js.map +1 -1
  21. package/dist/cli/git.d.ts +8 -3
  22. package/dist/cli/git.d.ts.map +1 -1
  23. package/dist/cli/git.js +24 -13
  24. package/dist/cli/git.js.map +1 -1
  25. package/dist/cli/index.js +10 -0
  26. package/dist/cli/index.js.map +1 -1
  27. package/dist/cli/input.d.ts +7 -0
  28. package/dist/cli/input.d.ts.map +1 -1
  29. package/dist/cli/input.js +13 -2
  30. package/dist/cli/input.js.map +1 -1
  31. package/dist/cli/main.d.ts.map +1 -1
  32. package/dist/cli/main.js +62 -19
  33. package/dist/cli/main.js.map +1 -1
  34. package/dist/cli/output/tasks.d.ts.map +1 -1
  35. package/dist/cli/output/tasks.js +22 -3
  36. package/dist/cli/output/tasks.js.map +1 -1
  37. package/dist/config/writer.d.ts.map +1 -1
  38. package/dist/config/writer.js +18 -0
  39. package/dist/config/writer.js.map +1 -1
  40. package/dist/diff/apply.d.ts +6 -0
  41. package/dist/diff/apply.d.ts.map +1 -0
  42. package/dist/diff/apply.js +44 -0
  43. package/dist/diff/apply.js.map +1 -0
  44. package/dist/diff/index.d.ts +1 -0
  45. package/dist/diff/index.d.ts.map +1 -1
  46. package/dist/diff/index.js +1 -0
  47. package/dist/diff/index.js.map +1 -1
  48. package/dist/evals/index.js +1 -1
  49. package/dist/evals/index.js.map +1 -1
  50. package/dist/output/github-issues.d.ts.map +1 -1
  51. package/dist/output/github-issues.js +15 -57
  52. package/dist/output/github-issues.js.map +1 -1
  53. package/dist/sdk/analyze.d.ts.map +1 -1
  54. package/dist/sdk/analyze.js +24 -5
  55. package/dist/sdk/analyze.js.map +1 -1
  56. package/dist/sdk/auth.d.ts.map +1 -1
  57. package/dist/sdk/auth.js +2 -2
  58. package/dist/sdk/auth.js.map +1 -1
  59. package/dist/sdk/errors.d.ts +3 -1
  60. package/dist/sdk/errors.d.ts.map +1 -1
  61. package/dist/sdk/errors.js +2 -2
  62. package/dist/sdk/errors.js.map +1 -1
  63. package/dist/sdk/fix-quality.d.ts +20 -0
  64. package/dist/sdk/fix-quality.d.ts.map +1 -0
  65. package/dist/sdk/fix-quality.js +167 -0
  66. package/dist/sdk/fix-quality.js.map +1 -0
  67. package/dist/sdk/prepare.d.ts.map +1 -1
  68. package/dist/sdk/prepare.js +5 -0
  69. package/dist/sdk/prepare.js.map +1 -1
  70. package/dist/sentry.d.ts +5 -3
  71. package/dist/sentry.d.ts.map +1 -1
  72. package/dist/sentry.js +37 -11
  73. package/dist/sentry.js.map +1 -1
  74. package/dist/skills/remote.js +1 -1
  75. package/dist/skills/remote.js.map +1 -1
  76. package/dist/utils/exec.d.ts +4 -1
  77. package/dist/utils/exec.d.ts.map +1 -1
  78. package/dist/utils/exec.js +6 -4
  79. package/dist/utils/exec.js.map +1 -1
  80. package/package.json +1 -1
  81. package/skills/warden-sweep/SKILL.md +67 -74
  82. package/skills/warden-sweep/references/patch-prompt.md +72 -0
  83. package/skills/warden-sweep/references/verify-prompt.md +25 -0
  84. package/skills/warden-sweep/scripts/_utils.py +62 -0
  85. package/skills/warden-sweep/scripts/create_issue.py +189 -0
  86. package/skills/warden-sweep/scripts/find_reviewers.py +16 -17
  87. package/skills/warden-sweep/scripts/generate_report.py +20 -25
  88. package/skills/warden-sweep/scripts/organize.py +128 -21
  89. package/skills/warden-sweep/scripts/scan.py +82 -130
@@ -23,30 +23,7 @@ from datetime import datetime, timezone
23
23
  from typing import Any
24
24
 
25
25
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
26
- from _utils import read_jsonl # noqa: E402
27
-
28
-
29
- def read_json(path: str) -> dict[str, Any] | None:
30
- """Read a JSON file and return parsed object."""
31
- if not os.path.exists(path):
32
- return None
33
- try:
34
- with open(path) as f:
35
- return json.load(f)
36
- except (json.JSONDecodeError, OSError):
37
- return None
38
-
39
-
40
- def severity_badge(severity: str) -> str:
41
- """Return a markdown-friendly severity indicator."""
42
- badges = {
43
- "critical": "**CRITICAL**",
44
- "high": "**HIGH**",
45
- "medium": "MEDIUM",
46
- "low": "LOW",
47
- "info": "info",
48
- }
49
- return badges.get(severity, severity)
26
+ from _utils import read_json, read_jsonl, severity_badge # noqa: E402
50
27
 
51
28
 
52
29
  def generate_summary_md(
@@ -65,7 +42,14 @@ def generate_summary_md(
65
42
  completed_at = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
66
43
 
67
44
  files_scanned = sum(1 for e in scan_index if e.get("status") == "complete")
68
- files_errored = sum(1 for e in scan_index if e.get("status") == "error")
45
+ files_timed_out = sum(
46
+ 1 for e in scan_index
47
+ if e.get("status") == "error" and e.get("error") == "timeout"
48
+ )
49
+ files_errored = sum(
50
+ 1 for e in scan_index
51
+ if e.get("status") == "error" and e.get("error") != "timeout"
52
+ )
69
53
 
70
54
  prs_created = sum(1 for p in patches if p.get("status") == "created")
71
55
  prs_failed = sum(1 for p in patches if p.get("status") == "error")
@@ -88,6 +72,7 @@ def generate_summary_md(
88
72
  f"| Metric | Count |",
89
73
  f"|--------|-------|",
90
74
  f"| Files scanned | {files_scanned} |",
75
+ f"| Files timed out | {files_timed_out} |",
91
76
  f"| Files errored | {files_errored} |",
92
77
  f"| Total findings | {len(all_findings)} |",
93
78
  f"| Verified | {len(verified)} |",
@@ -176,6 +161,14 @@ def generate_report_json(
176
161
  completed_at = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
177
162
 
178
163
  files_scanned = sum(1 for e in scan_index if e.get("status") == "complete")
164
+ files_timed_out = sum(
165
+ 1 for e in scan_index
166
+ if e.get("status") == "error" and e.get("error") == "timeout"
167
+ )
168
+ files_errored = sum(
169
+ 1 for e in scan_index
170
+ if e.get("status") == "error" and e.get("error") != "timeout"
171
+ )
179
172
  prs_created = sum(1 for p in patches if p.get("status") == "created")
180
173
  prs_failed = sum(1 for p in patches if p.get("status") == "error")
181
174
 
@@ -190,6 +183,8 @@ def generate_report_json(
190
183
  "completedAt": completed_at,
191
184
  "scan": {
192
185
  "filesScanned": files_scanned,
186
+ "filesTimedOut": files_timed_out,
187
+ "filesErrored": files_errored,
193
188
  "totalFindings": len(all_findings),
194
189
  },
195
190
  "verify": {
@@ -36,7 +36,7 @@ from datetime import datetime, timezone
36
36
  from typing import Any
37
37
 
38
38
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
39
- from _utils import read_jsonl # noqa: E402
39
+ from _utils import ensure_github_label, pr_number_from_url, read_json, read_jsonl, write_json # noqa: E402
40
40
 
41
41
 
42
42
  SECURITY_SKILL_PATTERNS = [
@@ -54,6 +54,15 @@ def is_security_skill(skill_name: str) -> bool:
54
54
  return name_lower in SECURITY_SKILL_PATTERNS
55
55
 
56
56
 
57
+ def severity_label(severity: str) -> str:
58
+ """Format a severity string for inline display in issue comments."""
59
+ if not severity:
60
+ return ""
61
+ if severity in ("critical", "high"):
62
+ return f" (**{severity.upper()}**)"
63
+ return f" ({severity.upper()})"
64
+
65
+
57
66
  def identify_security_findings(
58
67
  sweep_dir: str,
59
68
  ) -> list[dict[str, Any]]:
@@ -101,18 +110,7 @@ def copy_security_findings(
101
110
 
102
111
  def create_security_label() -> None:
103
112
  """Create the security label on GitHub (idempotent)."""
104
- try:
105
- subprocess.run(
106
- [
107
- "gh", "label", "create", "security",
108
- "--color", "D93F0B",
109
- "--description", "Security-related changes",
110
- ],
111
- capture_output=True,
112
- timeout=15,
113
- )
114
- except (subprocess.TimeoutExpired, FileNotFoundError):
115
- pass
113
+ ensure_github_label("security", "D93F0B", "Security-related changes")
116
114
 
117
115
 
118
116
  def label_security_prs(
@@ -156,6 +154,115 @@ def label_security_prs(
156
154
  return labeled
157
155
 
158
156
 
157
+ def _has_sweep_complete_comment(issue_url: str) -> bool:
158
+ """Check if the tracking issue already has a 'Sweep Complete' comment."""
159
+ try:
160
+ result = subprocess.run(
161
+ ["gh", "issue", "view", issue_url, "--json", "comments", "--jq",
162
+ '.comments[].body | select(startswith("## Sweep Complete"))'],
163
+ capture_output=True,
164
+ text=True,
165
+ timeout=15,
166
+ )
167
+ return result.returncode == 0 and result.stdout.strip() != ""
168
+ except (subprocess.TimeoutExpired, FileNotFoundError):
169
+ return False
170
+
171
+
172
+ def update_tracking_issue(sweep_dir: str) -> None:
173
+ """Post a comment on the tracking issue with final PR results. Idempotent."""
174
+ manifest = read_json(os.path.join(sweep_dir, "data", "manifest.json"))
175
+ if not manifest:
176
+ return
177
+
178
+ issue_url = manifest.get("issueUrl")
179
+ if not issue_url:
180
+ return
181
+
182
+ if _has_sweep_complete_comment(issue_url):
183
+ print("Tracking issue already has completion comment, skipping.", file=sys.stderr)
184
+ return
185
+
186
+ patches = read_jsonl(os.path.join(sweep_dir, "data", "patches.jsonl"))
187
+ verified = read_jsonl(os.path.join(sweep_dir, "data", "verified.jsonl"))
188
+ security_index = read_jsonl(os.path.join(sweep_dir, "security", "index.jsonl"))
189
+
190
+ # Build lookup from findingId to verified finding
191
+ verified_lookup: dict[str, dict[str, Any]] = {}
192
+ for f in verified:
193
+ fid = f.get("findingId", "")
194
+ if fid:
195
+ verified_lookup[fid] = f
196
+
197
+ security_ids = {f.get("findingId", "") for f in security_index}
198
+
199
+ created = sum(1 for p in patches if p.get("status") == "created")
200
+ existing = sum(1 for p in patches if p.get("status") == "existing")
201
+ failed = sum(1 for p in patches if p.get("status") == "error")
202
+
203
+ lines = [
204
+ "## Sweep Complete",
205
+ "",
206
+ "| PRs Created | PRs Skipped (existing) | PRs Failed | Security Findings |",
207
+ "|-------------|------------------------|------------|-------------------|",
208
+ f"| {created} | {existing} | {failed} | {len(security_index)} |",
209
+ "",
210
+ ]
211
+
212
+ # PR task list
213
+ pr_entries = [p for p in patches if p.get("status") == "created" and p.get("prUrl")]
214
+ if pr_entries:
215
+ lines.append("### PRs")
216
+ lines.append("")
217
+ for p in pr_entries:
218
+ fid = p.get("findingId", "")
219
+ pr_number = pr_number_from_url(p.get("prUrl", ""))
220
+ finding = verified_lookup.get(fid, {})
221
+ title = finding.get("title", fid)
222
+ sev = severity_label(finding.get("severity", ""))
223
+ lines.append(f"- [ ] #{pr_number} - fix: {title}{sev}")
224
+ lines.append("")
225
+
226
+ # Security findings section
227
+ security_prs = [
228
+ p for p in patches
229
+ if p.get("status") == "created"
230
+ and p.get("findingId", "") in security_ids
231
+ and p.get("prUrl")
232
+ ]
233
+ if security_prs:
234
+ lines.append("### Security Findings")
235
+ lines.append("")
236
+ for p in security_prs:
237
+ fid = p.get("findingId", "")
238
+ pr_number = pr_number_from_url(p.get("prUrl", ""))
239
+ finding = verified_lookup.get(fid, {})
240
+ title = finding.get("title", fid)
241
+ sev = severity_label(finding.get("severity", ""))
242
+ lines.append(f"- #{pr_number} - {title}{sev}")
243
+ lines.append("")
244
+
245
+ body = "\n".join(lines)
246
+
247
+ try:
248
+ result = subprocess.run(
249
+ ["gh", "issue", "comment", issue_url, "--body", body],
250
+ capture_output=True,
251
+ text=True,
252
+ timeout=30,
253
+ )
254
+ if result.returncode != 0:
255
+ print(
256
+ f"Warning: Failed to comment on tracking issue: {result.stderr.strip()}",
257
+ file=sys.stderr,
258
+ )
259
+ except (subprocess.TimeoutExpired, FileNotFoundError) as e:
260
+ print(
261
+ f"Warning: Failed to comment on tracking issue: {e}",
262
+ file=sys.stderr,
263
+ )
264
+
265
+
159
266
  def update_findings_with_pr_links(sweep_dir: str) -> None:
160
267
  """Append PR links to findings/*.md for created PRs."""
161
268
  patches = read_jsonl(os.path.join(sweep_dir, "data", "patches.jsonl"))
@@ -218,20 +325,16 @@ def run_generate_report(sweep_dir: str, script_dir: str) -> None:
218
325
  def update_manifest(sweep_dir: str) -> None:
219
326
  """Mark organize phase complete and add completedAt timestamp."""
220
327
  manifest_path = os.path.join(sweep_dir, "data", "manifest.json")
221
- if not os.path.exists(manifest_path):
328
+ manifest = read_json(manifest_path)
329
+ if not manifest:
222
330
  return
223
331
 
224
- with open(manifest_path) as f:
225
- manifest = json.load(f)
226
-
227
332
  manifest.setdefault("phases", {})["organize"] = "complete"
228
333
  manifest["completedAt"] = datetime.now(timezone.utc).strftime(
229
334
  "%Y-%m-%dT%H:%M:%SZ"
230
335
  )
231
336
 
232
- with open(manifest_path, "w") as f:
233
- json.dump(manifest, f, indent=2)
234
- f.write("\n")
337
+ write_json(manifest_path, manifest)
235
338
 
236
339
 
237
340
  def main() -> None:
@@ -279,7 +382,11 @@ def main() -> None:
279
382
  print("Generating summary and report...", file=sys.stderr)
280
383
  run_generate_report(sweep_dir, script_dir)
281
384
 
282
- # Step 6: Update manifest
385
+ # Step 6: Update tracking issue with PR results
386
+ print("Updating tracking issue...", file=sys.stderr)
387
+ update_tracking_issue(sweep_dir)
388
+
389
+ # Step 7: Update manifest
283
390
  update_manifest(sweep_dir)
284
391
 
285
392
  # Gather stats for output
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  # /// script
3
3
  # requires-python = ">=3.9"
4
+ # dependencies = ["tomli; python_version < '3.11'"]
4
5
  # ///
5
6
  """
6
7
  Warden Sweep: Scan phase.
@@ -28,12 +29,19 @@ import os
28
29
  import secrets
29
30
  import subprocess
30
31
  import sys
32
+ import threading
33
+ from concurrent.futures import ThreadPoolExecutor, as_completed
31
34
  from datetime import datetime, timezone
32
35
  from pathlib import Path
33
36
  from typing import Any
34
37
 
38
+ try:
39
+ import tomllib
40
+ except ModuleNotFoundError:
41
+ import tomli as tomllib # type: ignore[no-redefine]
42
+
35
43
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
36
- from _utils import run_cmd # noqa: E402
44
+ from _utils import ensure_github_label, run_cmd # noqa: E402
37
45
 
38
46
 
39
47
  SUPPORTED_EXTENSIONS = {
@@ -66,22 +74,6 @@ def create_sweep_dir(sweep_dir: str) -> None:
66
74
  os.makedirs(os.path.join(sweep_dir, subdir), exist_ok=True)
67
75
 
68
76
 
69
- def create_warden_label() -> None:
70
- """Create the warden label on GitHub (idempotent)."""
71
- try:
72
- subprocess.run(
73
- [
74
- "gh", "label", "create", "warden",
75
- "--color", "5319E7",
76
- "--description", "Automated fix from Warden Sweep",
77
- ],
78
- capture_output=True,
79
- timeout=15,
80
- )
81
- except (subprocess.TimeoutExpired, FileNotFoundError):
82
- pass
83
-
84
-
85
77
  def write_manifest(sweep_dir: str, run_id: str) -> None:
86
78
  """Write the initial manifest.json."""
87
79
  repo = "unknown"
@@ -101,6 +93,7 @@ def write_manifest(sweep_dir: str, run_id: str) -> None:
101
93
  "phases": {
102
94
  "scan": "pending",
103
95
  "verify": "pending",
96
+ "issue": "pending",
104
97
  "patch": "pending",
105
98
  "organize": "pending",
106
99
  },
@@ -112,84 +105,16 @@ def write_manifest(sweep_dir: str, run_id: str) -> None:
112
105
  f.write("\n")
113
106
 
114
107
 
115
- def _strip_toml_inline_comment(line: str) -> str:
116
- """Strip inline TOML comments (# outside of quoted strings)."""
117
- in_quote = False
118
- quote_char = ""
119
- for i, ch in enumerate(line):
120
- if in_quote:
121
- if ch == quote_char:
122
- in_quote = False
123
- elif ch in ('"', "'"):
124
- in_quote = True
125
- quote_char = ch
126
- elif ch == "#":
127
- return line[:i].rstrip()
128
- return line
129
-
130
-
131
- def _toml_array_to_json(value: str) -> str:
132
- """Convert a TOML array string to JSON-compatible format.
133
-
134
- Handles TOML single-quoted strings and trailing commas.
135
- Inline comments should be stripped before calling this function.
136
- """
137
- import re
138
- # Replace single-quoted strings with double-quoted (TOML literal strings)
139
- value = re.sub(r"'([^']*)'", r'"\1"', value)
140
- # Strip trailing comma before closing bracket
141
- value = re.sub(r",\s*]", "]", value)
142
- return value
143
-
144
-
145
108
  def load_ignore_paths() -> list[str]:
146
109
  """Load ignorePaths from warden.toml defaults if present."""
147
- try:
148
- # Try to parse warden.toml for defaults.ignorePaths
149
- toml_path = "warden.toml"
150
- if not os.path.exists(toml_path):
151
- return []
152
-
153
- with open(toml_path) as f:
154
- content = f.read()
155
-
156
- # Simple TOML parsing for ignorePaths in [defaults] section
157
- in_defaults = False
158
- collecting_value = False
159
- value_parts: list[str] = []
160
- for line in content.splitlines():
161
- stripped = line.strip()
162
- if collecting_value:
163
- # Skip TOML comment lines inside multiline arrays
164
- if stripped.startswith("#"):
165
- continue
166
- # Strip inline comments before accumulating
167
- stripped = _strip_toml_inline_comment(stripped)
168
- value_parts.append(stripped)
169
- combined = "".join(value_parts)
170
- if combined.count("[") <= combined.count("]"):
171
- try:
172
- return json.loads(_toml_array_to_json(combined))
173
- except json.JSONDecodeError:
174
- return []
175
- continue
176
- if stripped == "[defaults]":
177
- in_defaults = True
178
- continue
179
- if stripped.startswith("[") and stripped != "[defaults]":
180
- in_defaults = False
181
- continue
182
- if in_defaults and stripped.startswith("ignorePaths"):
183
- _, _, value = stripped.partition("=")
184
- value = _strip_toml_inline_comment(value.strip())
185
- if not value:
186
- continue
187
- try:
188
- return json.loads(_toml_array_to_json(value))
189
- except json.JSONDecodeError:
190
- value_parts = [value]
191
- collecting_value = True
110
+ toml_path = "warden.toml"
111
+ if not os.path.exists(toml_path):
192
112
  return []
113
+ try:
114
+ with open(toml_path, "rb") as f:
115
+ config = tomllib.load(f)
116
+ paths = config.get("defaults", {}).get("ignorePaths", [])
117
+ return paths if isinstance(paths, list) else []
193
118
  except Exception:
194
119
  return []
195
120
 
@@ -246,7 +171,7 @@ def enumerate_files(
246
171
  ) -> list[str]:
247
172
  """Enumerate files to scan using git ls-files, filtered by extension."""
248
173
  if specific_files:
249
- return specific_files
174
+ return [f for f in specific_files if not should_ignore(f, ignore_patterns)]
250
175
 
251
176
  result = run_cmd(["git", "ls-files"])
252
177
  if result.returncode != 0:
@@ -301,19 +226,22 @@ def log_path_for_file(sweep_dir: str, file_path: str) -> str:
301
226
 
302
227
 
303
228
  def scan_file(
304
- file_path: str, log_file: str, timeout: int = 300
229
+ file_path: str, log_file: str, timeout: int = 600, skill: str | None = None
305
230
  ) -> dict[str, Any]:
306
231
  """Run warden on a single file. Returns scan-index entry."""
307
232
  try:
233
+ cmd = [
234
+ "warden", file_path,
235
+ "--json", "--log",
236
+ "--min-confidence", "off",
237
+ "--fail-on", "off",
238
+ "--quiet",
239
+ "--output", log_file,
240
+ ]
241
+ if skill:
242
+ cmd.extend(["--skill", skill])
308
243
  result = subprocess.run(
309
- [
310
- "warden", file_path,
311
- "--json", "--log",
312
- "--min-confidence", "off",
313
- "--fail-on", "off",
314
- "--quiet",
315
- "--output", log_file,
316
- ],
244
+ cmd,
317
245
  capture_output=True,
318
246
  text=True,
319
247
  timeout=timeout,
@@ -350,9 +278,9 @@ def scan_file(
350
278
  record = json.loads(line)
351
279
  if record.get("type") == "summary":
352
280
  continue
353
- skill = record.get("skill", "")
354
- if skill:
355
- skills.add(skill)
281
+ record_skill = record.get("skill", "")
282
+ if record_skill:
283
+ skills.add(record_skill)
356
284
  findings = record.get("findings", [])
357
285
  finding_count += len(findings)
358
286
  except json.JSONDecodeError:
@@ -482,6 +410,10 @@ def main() -> None:
482
410
  "--sweep-dir",
483
411
  help="Resume into an existing sweep directory",
484
412
  )
413
+ parser.add_argument(
414
+ "--skill",
415
+ help="Run only this skill (passed through to warden --skill)",
416
+ )
485
417
  args = parser.parse_args()
486
418
 
487
419
  # Check dependencies
@@ -510,7 +442,7 @@ def main() -> None:
510
442
  if not os.path.exists(manifest_path):
511
443
  write_manifest(sweep_dir, run_id)
512
444
 
513
- create_warden_label()
445
+ ensure_github_label("warden", "5319E7", "Automated fix from Warden Sweep")
514
446
 
515
447
  # Enumerate files
516
448
  ignore_patterns = load_ignore_paths()
@@ -542,30 +474,41 @@ def main() -> None:
542
474
  file=sys.stderr,
543
475
  )
544
476
 
545
- # Scan remaining files
477
+ # Scan remaining files concurrently
546
478
  scanned = already_done
479
+ index_lock = threading.Lock()
547
480
 
548
- for i, file_path in enumerate(remaining, start=1):
481
+ def _scan_and_record(file_path: str) -> dict[str, Any]:
549
482
  log_file = log_path_for_file(sweep_dir, file_path)
550
- entry = scan_file(file_path, log_file)
483
+ entry = scan_file(file_path, log_file, skill=args.skill)
551
484
 
552
- # Append to scan-index.jsonl
553
- with open(scan_index_path, "a") as f:
554
- f.write(json.dumps(entry) + "\n")
485
+ with index_lock:
486
+ with open(scan_index_path, "a") as f:
487
+ f.write(json.dumps(entry) + "\n")
555
488
 
556
- scanned += 1
557
- if entry["status"] == "error":
558
- print(
559
- f"[{scanned}/{total}] {file_path} (ERROR: {entry.get('error', 'unknown')})",
560
- file=sys.stderr,
561
- )
562
- else:
563
- count = entry.get("findingCount", 0)
564
- suffix = f"({count} finding{'s' if count != 1 else ''})" if count > 0 else ""
565
- print(
566
- f"[{scanned}/{total}] {file_path} {suffix}".rstrip(),
567
- file=sys.stderr,
568
- )
489
+ return entry
490
+
491
+ with ThreadPoolExecutor(max_workers=4) as pool:
492
+ futures = {
493
+ pool.submit(_scan_and_record, fp): fp for fp in remaining
494
+ }
495
+ for future in as_completed(futures):
496
+ entry = future.result()
497
+ scanned += 1
498
+ file_path = entry.get("file", futures[future])
499
+ if entry["status"] == "error":
500
+ label = "TIMEOUT" if entry.get("error") == "timeout" else "ERROR"
501
+ print(
502
+ f"[{scanned}/{total}] {file_path} ({label}: {entry.get('error', 'unknown')})",
503
+ file=sys.stderr,
504
+ )
505
+ else:
506
+ count = entry.get("findingCount", 0)
507
+ suffix = f"({count} finding{'s' if count != 1 else ''})" if count > 0 else ""
508
+ print(
509
+ f"[{scanned}/{total}] {file_path} {suffix}".rstrip(),
510
+ file=sys.stderr,
511
+ )
569
512
 
570
513
  # Extract findings
571
514
  script_dir = os.path.dirname(os.path.abspath(__file__))
@@ -578,6 +521,7 @@ def main() -> None:
578
521
  # so that resumed scans don't include stale errors for files that later succeeded.
579
522
  # Scope to current file list so counts stay consistent with `scanned`.
580
523
  files_set = set(files)
524
+ timeouts: list[dict[str, Any]] = []
581
525
  errors: list[dict[str, Any]] = []
582
526
  if os.path.exists(scan_index_path):
583
527
  last_status: dict[str, dict[str, Any]] = {}
@@ -595,36 +539,44 @@ def main() -> None:
595
539
  continue
596
540
  for entry in last_status.values():
597
541
  if entry.get("status") == "error":
598
- errors.append({
542
+ item = {
599
543
  "file": entry.get("file", ""),
600
544
  "error": entry.get("error", "unknown"),
601
545
  "exitCode": entry.get("exitCode", -1),
602
- })
546
+ }
547
+ if entry.get("error") == "timeout":
548
+ timeouts.append(item)
549
+ else:
550
+ errors.append(item)
551
+
552
+ total_failed = len(timeouts) + len(errors)
603
553
 
604
554
  # Output JSON summary
605
555
  output = {
606
556
  "runId": run_id,
607
557
  "sweepDir": sweep_dir,
608
- "filesScanned": scanned - len(errors),
558
+ "filesScanned": scanned - total_failed,
559
+ "filesTimedOut": len(timeouts),
609
560
  "filesErrored": len(errors),
610
561
  "totalFindings": len(findings),
611
562
  "bySeverity": by_severity,
612
563
  "findingsPath": os.path.join(sweep_dir, "data", "all-findings.jsonl"),
613
564
  "findings": findings,
565
+ "timeouts": timeouts,
614
566
  "errors": errors,
615
567
  }
616
568
 
617
569
  print(json.dumps(output, indent=2))
618
570
 
619
571
  # Fatal only if every file across all runs errored (no successful scans at all)
620
- successful = scanned - len(errors)
572
+ successful = scanned - total_failed
621
573
  if successful == 0 and scanned > 0:
622
574
  update_manifest_phase(sweep_dir, "scan", "error")
623
575
  sys.exit(1)
624
576
 
625
577
  update_manifest_phase(sweep_dir, "scan", "complete")
626
578
 
627
- if len(errors) > 0:
579
+ if total_failed > 0:
628
580
  sys.exit(2)
629
581
 
630
582