@sentry/warden 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents.lock +7 -0
- package/dist/cli/args.d.ts +14 -12
- package/dist/cli/args.d.ts.map +1 -1
- package/dist/cli/args.js +44 -1
- package/dist/cli/args.js.map +1 -1
- package/dist/cli/commands/init.d.ts +0 -3
- package/dist/cli/commands/init.d.ts.map +1 -1
- package/dist/cli/commands/init.js +206 -19
- package/dist/cli/commands/init.js.map +1 -1
- package/dist/cli/commands/logs.d.ts +19 -0
- package/dist/cli/commands/logs.d.ts.map +1 -0
- package/dist/cli/commands/logs.js +419 -0
- package/dist/cli/commands/logs.js.map +1 -0
- package/dist/cli/main.d.ts.map +1 -1
- package/dist/cli/main.js +54 -21
- package/dist/cli/main.js.map +1 -1
- package/dist/cli/output/formatters.d.ts +2 -1
- package/dist/cli/output/formatters.d.ts.map +1 -1
- package/dist/cli/output/formatters.js +22 -19
- package/dist/cli/output/formatters.js.map +1 -1
- package/dist/cli/output/index.d.ts +1 -1
- package/dist/cli/output/index.d.ts.map +1 -1
- package/dist/cli/output/index.js +1 -1
- package/dist/cli/output/index.js.map +1 -1
- package/dist/cli/output/ink-runner.js +1 -1
- package/dist/cli/output/ink-runner.js.map +1 -1
- package/dist/cli/output/jsonl.d.ts +49 -13
- package/dist/cli/output/jsonl.d.ts.map +1 -1
- package/dist/cli/output/jsonl.js +137 -4
- package/dist/cli/output/jsonl.js.map +1 -1
- package/dist/cli/output/tasks.d.ts.map +1 -1
- package/dist/cli/output/tasks.js +1 -22
- package/dist/cli/output/tasks.js.map +1 -1
- package/dist/cli/terminal.d.ts.map +1 -1
- package/dist/cli/terminal.js +0 -2
- package/dist/cli/terminal.js.map +1 -1
- package/dist/config/schema.d.ts +49 -98
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/config/schema.js +0 -12
- package/dist/config/schema.js.map +1 -1
- package/dist/evals/runner.d.ts.map +1 -1
- package/dist/evals/runner.js +0 -1
- package/dist/evals/runner.js.map +1 -1
- package/dist/evals/types.d.ts +9 -15
- package/dist/evals/types.d.ts.map +1 -1
- package/dist/output/github-checks.d.ts +1 -1
- package/dist/output/github-checks.d.ts.map +1 -1
- package/dist/output/github-checks.js +2 -6
- package/dist/output/github-checks.js.map +1 -1
- package/dist/output/issue-renderer.js +1 -1
- package/dist/output/issue-renderer.js.map +1 -1
- package/dist/sdk/analyze.d.ts.map +1 -1
- package/dist/sdk/analyze.js +13 -26
- package/dist/sdk/analyze.js.map +1 -1
- package/dist/sdk/auth.d.ts +16 -0
- package/dist/sdk/auth.d.ts.map +1 -0
- package/dist/sdk/auth.js +37 -0
- package/dist/sdk/auth.js.map +1 -0
- package/dist/sdk/errors.d.ts +5 -0
- package/dist/sdk/errors.d.ts.map +1 -1
- package/dist/sdk/errors.js +20 -0
- package/dist/sdk/errors.js.map +1 -1
- package/dist/sdk/prompt.js +1 -1
- package/dist/sdk/runner.d.ts +2 -1
- package/dist/sdk/runner.d.ts.map +1 -1
- package/dist/sdk/runner.js +3 -1
- package/dist/sdk/runner.js.map +1 -1
- package/dist/sdk/types.d.ts +0 -3
- package/dist/sdk/types.d.ts.map +1 -1
- package/dist/sdk/types.js.map +1 -1
- package/dist/types/index.d.ts +23 -24
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js +19 -7
- package/dist/types/index.js.map +1 -1
- package/package.json +1 -1
- package/skills/warden/SKILL.md +76 -0
- package/skills/warden/references/cli-reference.md +142 -0
- package/skills/warden/references/config-schema.md +111 -0
- package/skills/warden/references/configuration.md +110 -0
- package/skills/warden/references/creating-skills.md +84 -0
- package/skills/warden-sweep/SKILL.md +407 -0
- package/skills/warden-sweep/scripts/_utils.py +37 -0
- package/skills/warden-sweep/scripts/extract_findings.py +219 -0
- package/skills/warden-sweep/scripts/find_reviewers.py +115 -0
- package/skills/warden-sweep/scripts/generate_report.py +271 -0
- package/skills/warden-sweep/scripts/index_prs.py +187 -0
- package/skills/warden-sweep/scripts/organize.py +315 -0
- package/skills/warden-sweep/scripts/scan.py +632 -0
- package/dist/sdk/session.d.ts +0 -43
- package/dist/sdk/session.d.ts.map +0 -1
- package/dist/sdk/session.js +0 -105
- package/dist/sdk/session.js.map +0 -1
|
@@ -0,0 +1,632 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# /// script
|
|
3
|
+
# requires-python = ">=3.9"
|
|
4
|
+
# ///
|
|
5
|
+
"""
|
|
6
|
+
Warden Sweep: Scan phase.
|
|
7
|
+
|
|
8
|
+
Replaces Phase 0 (setup) and Phase 1 (scan) with a single script.
|
|
9
|
+
Generates a run ID, creates the sweep directory, checks dependencies,
|
|
10
|
+
creates the warden label, enumerates files, runs warden on each file,
|
|
11
|
+
writes scan-index.jsonl, and calls extract_findings.py.
|
|
12
|
+
|
|
13
|
+
Usage:
|
|
14
|
+
uv run scan.py [file ...]
|
|
15
|
+
uv run scan.py --sweep-dir .warden/sweeps/abc123
|
|
16
|
+
uv run scan.py src/foo.ts src/bar.ts
|
|
17
|
+
|
|
18
|
+
Stdout: JSON summary (for LLM consumption)
|
|
19
|
+
Stderr: Progress lines as files complete
|
|
20
|
+
Exit codes: 0 = success, 1 = fatal, 2 = partial (some files errored)
|
|
21
|
+
"""
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import argparse
|
|
25
|
+
import hashlib
|
|
26
|
+
import json
|
|
27
|
+
import os
|
|
28
|
+
import secrets
|
|
29
|
+
import subprocess
|
|
30
|
+
import sys
|
|
31
|
+
from datetime import datetime, timezone
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
from typing import Any
|
|
34
|
+
|
|
35
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
36
|
+
from _utils import run_cmd # noqa: E402
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
SUPPORTED_EXTENSIONS = {
|
|
40
|
+
".ts", ".tsx", ".js", ".jsx", ".py", ".go", ".rs", ".java",
|
|
41
|
+
".rb", ".php", ".c", ".cpp", ".h", ".hpp", ".cs", ".swift",
|
|
42
|
+
".kt", ".scala", ".sh", ".bash", ".zsh",
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def generate_run_id() -> str:
|
|
47
|
+
"""Generate a short random run ID."""
|
|
48
|
+
return secrets.token_hex(4)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def check_dependencies() -> list[str]:
|
|
52
|
+
"""Check that required commands are available. Return list of missing."""
|
|
53
|
+
import shutil
|
|
54
|
+
return [cmd for cmd in ["warden", "gh", "git"] if shutil.which(cmd) is None]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def create_sweep_dir(sweep_dir: str) -> None:
|
|
58
|
+
"""Create the sweep directory structure."""
|
|
59
|
+
for subdir in [
|
|
60
|
+
"findings",
|
|
61
|
+
"security",
|
|
62
|
+
"data/verify",
|
|
63
|
+
"data/logs",
|
|
64
|
+
"data/pr-diffs",
|
|
65
|
+
]:
|
|
66
|
+
os.makedirs(os.path.join(sweep_dir, subdir), exist_ok=True)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def create_warden_label() -> None:
|
|
70
|
+
"""Create the warden label on GitHub (idempotent)."""
|
|
71
|
+
try:
|
|
72
|
+
subprocess.run(
|
|
73
|
+
[
|
|
74
|
+
"gh", "label", "create", "warden",
|
|
75
|
+
"--color", "5319E7",
|
|
76
|
+
"--description", "Automated fix from Warden Sweep",
|
|
77
|
+
],
|
|
78
|
+
capture_output=True,
|
|
79
|
+
timeout=15,
|
|
80
|
+
)
|
|
81
|
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
82
|
+
pass
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def write_manifest(sweep_dir: str, run_id: str) -> None:
|
|
86
|
+
"""Write the initial manifest.json."""
|
|
87
|
+
repo = "unknown"
|
|
88
|
+
try:
|
|
89
|
+
result = run_cmd(["git", "remote", "get-url", "origin"])
|
|
90
|
+
if result.returncode == 0 and result.stdout.strip():
|
|
91
|
+
repo = result.stdout.strip()
|
|
92
|
+
else:
|
|
93
|
+
repo = os.path.basename(os.getcwd())
|
|
94
|
+
except Exception:
|
|
95
|
+
repo = os.path.basename(os.getcwd())
|
|
96
|
+
|
|
97
|
+
manifest = {
|
|
98
|
+
"runId": run_id,
|
|
99
|
+
"startedAt": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
100
|
+
"repo": repo,
|
|
101
|
+
"phases": {
|
|
102
|
+
"scan": "pending",
|
|
103
|
+
"verify": "pending",
|
|
104
|
+
"patch": "pending",
|
|
105
|
+
"organize": "pending",
|
|
106
|
+
},
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
manifest_path = os.path.join(sweep_dir, "data", "manifest.json")
|
|
110
|
+
with open(manifest_path, "w") as f:
|
|
111
|
+
json.dump(manifest, f, indent=2)
|
|
112
|
+
f.write("\n")
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _strip_toml_inline_comment(line: str) -> str:
|
|
116
|
+
"""Strip inline TOML comments (# outside of quoted strings)."""
|
|
117
|
+
in_quote = False
|
|
118
|
+
quote_char = ""
|
|
119
|
+
for i, ch in enumerate(line):
|
|
120
|
+
if in_quote:
|
|
121
|
+
if ch == quote_char:
|
|
122
|
+
in_quote = False
|
|
123
|
+
elif ch in ('"', "'"):
|
|
124
|
+
in_quote = True
|
|
125
|
+
quote_char = ch
|
|
126
|
+
elif ch == "#":
|
|
127
|
+
return line[:i].rstrip()
|
|
128
|
+
return line
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _toml_array_to_json(value: str) -> str:
|
|
132
|
+
"""Convert a TOML array string to JSON-compatible format.
|
|
133
|
+
|
|
134
|
+
Handles TOML single-quoted strings and trailing commas.
|
|
135
|
+
Inline comments should be stripped before calling this function.
|
|
136
|
+
"""
|
|
137
|
+
import re
|
|
138
|
+
# Replace single-quoted strings with double-quoted (TOML literal strings)
|
|
139
|
+
value = re.sub(r"'([^']*)'", r'"\1"', value)
|
|
140
|
+
# Strip trailing comma before closing bracket
|
|
141
|
+
value = re.sub(r",\s*]", "]", value)
|
|
142
|
+
return value
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def load_ignore_paths() -> list[str]:
|
|
146
|
+
"""Load ignorePaths from warden.toml defaults if present."""
|
|
147
|
+
try:
|
|
148
|
+
# Try to parse warden.toml for defaults.ignorePaths
|
|
149
|
+
toml_path = "warden.toml"
|
|
150
|
+
if not os.path.exists(toml_path):
|
|
151
|
+
return []
|
|
152
|
+
|
|
153
|
+
with open(toml_path) as f:
|
|
154
|
+
content = f.read()
|
|
155
|
+
|
|
156
|
+
# Simple TOML parsing for ignorePaths in [defaults] section
|
|
157
|
+
in_defaults = False
|
|
158
|
+
collecting_value = False
|
|
159
|
+
value_parts: list[str] = []
|
|
160
|
+
for line in content.splitlines():
|
|
161
|
+
stripped = line.strip()
|
|
162
|
+
if collecting_value:
|
|
163
|
+
# Skip TOML comment lines inside multiline arrays
|
|
164
|
+
if stripped.startswith("#"):
|
|
165
|
+
continue
|
|
166
|
+
# Strip inline comments before accumulating
|
|
167
|
+
stripped = _strip_toml_inline_comment(stripped)
|
|
168
|
+
value_parts.append(stripped)
|
|
169
|
+
combined = "".join(value_parts)
|
|
170
|
+
if combined.count("[") <= combined.count("]"):
|
|
171
|
+
try:
|
|
172
|
+
return json.loads(_toml_array_to_json(combined))
|
|
173
|
+
except json.JSONDecodeError:
|
|
174
|
+
return []
|
|
175
|
+
continue
|
|
176
|
+
if stripped == "[defaults]":
|
|
177
|
+
in_defaults = True
|
|
178
|
+
continue
|
|
179
|
+
if stripped.startswith("[") and stripped != "[defaults]":
|
|
180
|
+
in_defaults = False
|
|
181
|
+
continue
|
|
182
|
+
if in_defaults and stripped.startswith("ignorePaths"):
|
|
183
|
+
_, _, value = stripped.partition("=")
|
|
184
|
+
value = _strip_toml_inline_comment(value.strip())
|
|
185
|
+
if not value:
|
|
186
|
+
continue
|
|
187
|
+
try:
|
|
188
|
+
return json.loads(_toml_array_to_json(value))
|
|
189
|
+
except json.JSONDecodeError:
|
|
190
|
+
value_parts = [value]
|
|
191
|
+
collecting_value = True
|
|
192
|
+
return []
|
|
193
|
+
except Exception:
|
|
194
|
+
return []
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def should_ignore(path: str, ignore_patterns: list[str]) -> bool:
|
|
198
|
+
"""Check if a path matches any ignore pattern (simple glob matching)."""
|
|
199
|
+
if not ignore_patterns:
|
|
200
|
+
return False
|
|
201
|
+
|
|
202
|
+
from fnmatch import fnmatch
|
|
203
|
+
|
|
204
|
+
for pattern in ignore_patterns:
|
|
205
|
+
if fnmatch(path, pattern):
|
|
206
|
+
return True
|
|
207
|
+
# Handle ** patterns
|
|
208
|
+
if "**" in pattern:
|
|
209
|
+
# Convert ** glob to work with fnmatch
|
|
210
|
+
simple = pattern.replace("**/", "*/")
|
|
211
|
+
if fnmatch(path, simple):
|
|
212
|
+
return True
|
|
213
|
+
# Also try zero-directory match (** matches zero directories)
|
|
214
|
+
collapsed = pattern.replace("**/", "")
|
|
215
|
+
if fnmatch(path, collapsed):
|
|
216
|
+
return True
|
|
217
|
+
# Also try matching any subdirectory
|
|
218
|
+
parts = path.split("/")
|
|
219
|
+
glob_parts = pattern.split("/")
|
|
220
|
+
if glob_parts[0] == "**":
|
|
221
|
+
# Match from any point
|
|
222
|
+
rest = "/".join(glob_parts[1:])
|
|
223
|
+
for i in range(len(parts)):
|
|
224
|
+
if fnmatch("/".join(parts[i:]), rest):
|
|
225
|
+
return True
|
|
226
|
+
elif glob_parts[-1].startswith("*"):
|
|
227
|
+
# e.g., dist/** matches dist/anything, src/**/*.py matches src/x/y.py
|
|
228
|
+
prefix = pattern.split("**")[0].rstrip("/")
|
|
229
|
+
if path.startswith(prefix + "/") or path == prefix:
|
|
230
|
+
suffix = pattern.split("**")[-1]
|
|
231
|
+
if not suffix or suffix == "/":
|
|
232
|
+
# Pure prefix pattern like dist/** - any subpath matches
|
|
233
|
+
return True
|
|
234
|
+
# Has suffix like **/*.py - check with fnmatch on the remaining path
|
|
235
|
+
remaining = path[len(prefix) :].lstrip("/")
|
|
236
|
+
suffix_pattern = suffix.lstrip("/")
|
|
237
|
+
if fnmatch(remaining, suffix_pattern) or fnmatch(
|
|
238
|
+
remaining.split("/")[-1], suffix_pattern
|
|
239
|
+
):
|
|
240
|
+
return True
|
|
241
|
+
return False
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def enumerate_files(
|
|
245
|
+
specific_files: list[str] | None, ignore_patterns: list[str]
|
|
246
|
+
) -> list[str]:
|
|
247
|
+
"""Enumerate files to scan using git ls-files, filtered by extension."""
|
|
248
|
+
if specific_files:
|
|
249
|
+
return specific_files
|
|
250
|
+
|
|
251
|
+
result = run_cmd(["git", "ls-files"])
|
|
252
|
+
if result.returncode != 0:
|
|
253
|
+
print(f"git ls-files failed: {result.stderr}", file=sys.stderr)
|
|
254
|
+
return []
|
|
255
|
+
|
|
256
|
+
files = []
|
|
257
|
+
for line in result.stdout.splitlines():
|
|
258
|
+
path = line.strip()
|
|
259
|
+
if not path:
|
|
260
|
+
continue
|
|
261
|
+
|
|
262
|
+
# Filter by extension
|
|
263
|
+
ext = os.path.splitext(path)[1].lower()
|
|
264
|
+
if ext not in SUPPORTED_EXTENSIONS:
|
|
265
|
+
continue
|
|
266
|
+
|
|
267
|
+
# Filter by ignore patterns
|
|
268
|
+
if should_ignore(path, ignore_patterns):
|
|
269
|
+
continue
|
|
270
|
+
|
|
271
|
+
files.append(path)
|
|
272
|
+
|
|
273
|
+
return files
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def load_completed_files(sweep_dir: str) -> set[str]:
|
|
277
|
+
"""Load already-completed files from scan-index.jsonl for incrementality."""
|
|
278
|
+
index_path = os.path.join(sweep_dir, "data", "scan-index.jsonl")
|
|
279
|
+
completed: set[str] = set()
|
|
280
|
+
if not os.path.exists(index_path):
|
|
281
|
+
return completed
|
|
282
|
+
|
|
283
|
+
with open(index_path) as f:
|
|
284
|
+
for line in f:
|
|
285
|
+
line = line.strip()
|
|
286
|
+
if not line:
|
|
287
|
+
continue
|
|
288
|
+
try:
|
|
289
|
+
entry = json.loads(line)
|
|
290
|
+
if entry.get("status") == "complete":
|
|
291
|
+
completed.add(entry.get("file", ""))
|
|
292
|
+
except json.JSONDecodeError:
|
|
293
|
+
continue
|
|
294
|
+
return completed
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def log_path_for_file(sweep_dir: str, file_path: str) -> str:
|
|
298
|
+
"""Generate a deterministic log path for a file."""
|
|
299
|
+
digest = hashlib.sha256(file_path.encode()).hexdigest()[:16]
|
|
300
|
+
return os.path.join(sweep_dir, "data", "logs", f"{digest}.jsonl")
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def scan_file(
|
|
304
|
+
file_path: str, log_file: str, timeout: int = 300
|
|
305
|
+
) -> dict[str, Any]:
|
|
306
|
+
"""Run warden on a single file. Returns scan-index entry."""
|
|
307
|
+
try:
|
|
308
|
+
result = subprocess.run(
|
|
309
|
+
[
|
|
310
|
+
"warden", file_path,
|
|
311
|
+
"--json", "--log",
|
|
312
|
+
"--min-confidence", "off",
|
|
313
|
+
"--fail-on", "off",
|
|
314
|
+
"--quiet",
|
|
315
|
+
"--output", log_file,
|
|
316
|
+
],
|
|
317
|
+
capture_output=True,
|
|
318
|
+
text=True,
|
|
319
|
+
timeout=timeout,
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
# Check for warden failure
|
|
323
|
+
if result.returncode != 0:
|
|
324
|
+
error_msg = result.stderr.strip() if result.stderr else "non-zero exit"
|
|
325
|
+
return {
|
|
326
|
+
"file": file_path,
|
|
327
|
+
"status": "error",
|
|
328
|
+
"error": f"warden failed: {error_msg}",
|
|
329
|
+
"exitCode": result.returncode,
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
# Check that log file was created
|
|
333
|
+
if not os.path.exists(log_file):
|
|
334
|
+
return {
|
|
335
|
+
"file": file_path,
|
|
336
|
+
"status": "error",
|
|
337
|
+
"error": "log file not created",
|
|
338
|
+
"exitCode": result.returncode,
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
# Count findings from the log file
|
|
342
|
+
finding_count = 0
|
|
343
|
+
skills: set[str] = set()
|
|
344
|
+
with open(log_file) as f:
|
|
345
|
+
for line in f:
|
|
346
|
+
line = line.strip()
|
|
347
|
+
if not line:
|
|
348
|
+
continue
|
|
349
|
+
try:
|
|
350
|
+
record = json.loads(line)
|
|
351
|
+
if record.get("type") == "summary":
|
|
352
|
+
continue
|
|
353
|
+
skill = record.get("skill", "")
|
|
354
|
+
if skill:
|
|
355
|
+
skills.add(skill)
|
|
356
|
+
findings = record.get("findings", [])
|
|
357
|
+
finding_count += len(findings)
|
|
358
|
+
except json.JSONDecodeError:
|
|
359
|
+
continue
|
|
360
|
+
|
|
361
|
+
return {
|
|
362
|
+
"file": file_path,
|
|
363
|
+
"logPath": log_file,
|
|
364
|
+
"skills": sorted(skills),
|
|
365
|
+
"findingCount": finding_count,
|
|
366
|
+
"status": "complete",
|
|
367
|
+
"exitCode": result.returncode,
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
except subprocess.TimeoutExpired:
|
|
371
|
+
return {
|
|
372
|
+
"file": file_path,
|
|
373
|
+
"status": "error",
|
|
374
|
+
"error": "timeout",
|
|
375
|
+
"exitCode": -1,
|
|
376
|
+
}
|
|
377
|
+
except FileNotFoundError:
|
|
378
|
+
return {
|
|
379
|
+
"file": file_path,
|
|
380
|
+
"status": "error",
|
|
381
|
+
"error": "warden not found",
|
|
382
|
+
"exitCode": -1,
|
|
383
|
+
}
|
|
384
|
+
except Exception as e:
|
|
385
|
+
return {
|
|
386
|
+
"file": file_path,
|
|
387
|
+
"status": "error",
|
|
388
|
+
"error": str(e),
|
|
389
|
+
"exitCode": -1,
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
def run_extract_findings(sweep_dir: str, script_dir: str) -> None:
|
|
394
|
+
"""Run extract_findings.py as a subprocess."""
|
|
395
|
+
extract_script = os.path.join(script_dir, "extract_findings.py")
|
|
396
|
+
logs_dir = os.path.join(sweep_dir, "data", "logs")
|
|
397
|
+
scan_index = os.path.join(sweep_dir, "data", "scan-index.jsonl")
|
|
398
|
+
output = os.path.join(sweep_dir, "data", "all-findings.jsonl")
|
|
399
|
+
|
|
400
|
+
try:
|
|
401
|
+
result = subprocess.run(
|
|
402
|
+
[
|
|
403
|
+
sys.executable, extract_script,
|
|
404
|
+
logs_dir,
|
|
405
|
+
"--scan-index", scan_index,
|
|
406
|
+
"-o", output,
|
|
407
|
+
],
|
|
408
|
+
capture_output=True,
|
|
409
|
+
text=True,
|
|
410
|
+
timeout=120,
|
|
411
|
+
)
|
|
412
|
+
if result.returncode != 0:
|
|
413
|
+
print(
|
|
414
|
+
f"Warning: extract_findings.py failed: {result.stderr}",
|
|
415
|
+
file=sys.stderr,
|
|
416
|
+
)
|
|
417
|
+
except Exception as e:
|
|
418
|
+
print(f"Warning: extract_findings.py failed: {e}", file=sys.stderr)
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
def load_findings_compact(sweep_dir: str) -> tuple[list[dict[str, Any]], dict[str, int]]:
|
|
422
|
+
"""Load findings from all-findings.jsonl and return compact list + severity counts."""
|
|
423
|
+
findings_path = os.path.join(sweep_dir, "data", "all-findings.jsonl")
|
|
424
|
+
findings: list[dict[str, Any]] = []
|
|
425
|
+
by_severity: dict[str, int] = {}
|
|
426
|
+
|
|
427
|
+
if not os.path.exists(findings_path):
|
|
428
|
+
return findings, by_severity
|
|
429
|
+
|
|
430
|
+
with open(findings_path) as f:
|
|
431
|
+
for line in f:
|
|
432
|
+
line = line.strip()
|
|
433
|
+
if not line:
|
|
434
|
+
continue
|
|
435
|
+
try:
|
|
436
|
+
record = json.loads(line)
|
|
437
|
+
severity = record.get("severity", "info")
|
|
438
|
+
by_severity[severity] = by_severity.get(severity, 0) + 1
|
|
439
|
+
|
|
440
|
+
location = record.get("location", {})
|
|
441
|
+
findings.append({
|
|
442
|
+
"findingId": record.get("findingId", ""),
|
|
443
|
+
"title": record.get("title", ""),
|
|
444
|
+
"file": record.get("file", ""),
|
|
445
|
+
"startLine": location.get("startLine"),
|
|
446
|
+
"severity": severity,
|
|
447
|
+
"confidence": record.get("confidence"),
|
|
448
|
+
"skill": record.get("skill", ""),
|
|
449
|
+
})
|
|
450
|
+
except json.JSONDecodeError:
|
|
451
|
+
continue
|
|
452
|
+
|
|
453
|
+
return findings, by_severity
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
def update_manifest_phase(sweep_dir: str, phase: str, status: str) -> None:
|
|
457
|
+
"""Update a phase status in manifest.json."""
|
|
458
|
+
manifest_path = os.path.join(sweep_dir, "data", "manifest.json")
|
|
459
|
+
if not os.path.exists(manifest_path):
|
|
460
|
+
return
|
|
461
|
+
|
|
462
|
+
with open(manifest_path) as f:
|
|
463
|
+
manifest = json.load(f)
|
|
464
|
+
|
|
465
|
+
manifest.setdefault("phases", {})[phase] = status
|
|
466
|
+
|
|
467
|
+
with open(manifest_path, "w") as f:
|
|
468
|
+
json.dump(manifest, f, indent=2)
|
|
469
|
+
f.write("\n")
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
def main() -> None:
|
|
473
|
+
parser = argparse.ArgumentParser(
|
|
474
|
+
description="Warden Sweep: Scan phase (setup + scan)"
|
|
475
|
+
)
|
|
476
|
+
parser.add_argument(
|
|
477
|
+
"files",
|
|
478
|
+
nargs="*",
|
|
479
|
+
help="Specific files to scan (default: all tracked files)",
|
|
480
|
+
)
|
|
481
|
+
parser.add_argument(
|
|
482
|
+
"--sweep-dir",
|
|
483
|
+
help="Resume into an existing sweep directory",
|
|
484
|
+
)
|
|
485
|
+
args = parser.parse_args()
|
|
486
|
+
|
|
487
|
+
# Check dependencies
|
|
488
|
+
missing = check_dependencies()
|
|
489
|
+
if missing:
|
|
490
|
+
print(
|
|
491
|
+
json.dumps({"error": f"Missing dependencies: {', '.join(missing)}"}),
|
|
492
|
+
file=sys.stdout,
|
|
493
|
+
)
|
|
494
|
+
sys.exit(1)
|
|
495
|
+
|
|
496
|
+
# Determine sweep dir and run ID
|
|
497
|
+
if args.sweep_dir:
|
|
498
|
+
sweep_dir = args.sweep_dir
|
|
499
|
+
# Extract run ID from path (normalize to handle trailing slashes)
|
|
500
|
+
run_id = os.path.basename(os.path.normpath(sweep_dir))
|
|
501
|
+
else:
|
|
502
|
+
run_id = generate_run_id()
|
|
503
|
+
sweep_dir = os.path.join(".warden", "sweeps", run_id)
|
|
504
|
+
|
|
505
|
+
# Setup
|
|
506
|
+
create_sweep_dir(sweep_dir)
|
|
507
|
+
|
|
508
|
+
# Only write manifest if it doesn't exist (for resume support)
|
|
509
|
+
manifest_path = os.path.join(sweep_dir, "data", "manifest.json")
|
|
510
|
+
if not os.path.exists(manifest_path):
|
|
511
|
+
write_manifest(sweep_dir, run_id)
|
|
512
|
+
|
|
513
|
+
create_warden_label()
|
|
514
|
+
|
|
515
|
+
# Enumerate files
|
|
516
|
+
ignore_patterns = load_ignore_paths()
|
|
517
|
+
specific_files = args.files if args.files else None
|
|
518
|
+
files = enumerate_files(specific_files, ignore_patterns)
|
|
519
|
+
|
|
520
|
+
if not files:
|
|
521
|
+
print(
|
|
522
|
+
json.dumps({
|
|
523
|
+
"error": "No files to scan",
|
|
524
|
+
"runId": run_id,
|
|
525
|
+
"sweepDir": sweep_dir,
|
|
526
|
+
}),
|
|
527
|
+
file=sys.stdout,
|
|
528
|
+
)
|
|
529
|
+
sys.exit(1)
|
|
530
|
+
|
|
531
|
+
# Load completed files for incrementality
|
|
532
|
+
completed = load_completed_files(sweep_dir)
|
|
533
|
+
remaining = [f for f in files if f not in completed]
|
|
534
|
+
|
|
535
|
+
total = len(files)
|
|
536
|
+
already_done = len(completed & set(files))
|
|
537
|
+
scan_index_path = os.path.join(sweep_dir, "data", "scan-index.jsonl")
|
|
538
|
+
|
|
539
|
+
if already_done > 0:
|
|
540
|
+
print(
|
|
541
|
+
f"Resuming: {already_done}/{total} files already scanned",
|
|
542
|
+
file=sys.stderr,
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
# Scan remaining files
|
|
546
|
+
scanned = already_done
|
|
547
|
+
|
|
548
|
+
for i, file_path in enumerate(remaining, start=1):
|
|
549
|
+
log_file = log_path_for_file(sweep_dir, file_path)
|
|
550
|
+
entry = scan_file(file_path, log_file)
|
|
551
|
+
|
|
552
|
+
# Append to scan-index.jsonl
|
|
553
|
+
with open(scan_index_path, "a") as f:
|
|
554
|
+
f.write(json.dumps(entry) + "\n")
|
|
555
|
+
|
|
556
|
+
scanned += 1
|
|
557
|
+
if entry["status"] == "error":
|
|
558
|
+
print(
|
|
559
|
+
f"[{scanned}/{total}] {file_path} (ERROR: {entry.get('error', 'unknown')})",
|
|
560
|
+
file=sys.stderr,
|
|
561
|
+
)
|
|
562
|
+
else:
|
|
563
|
+
count = entry.get("findingCount", 0)
|
|
564
|
+
suffix = f"({count} finding{'s' if count != 1 else ''})" if count > 0 else ""
|
|
565
|
+
print(
|
|
566
|
+
f"[{scanned}/{total}] {file_path} {suffix}".rstrip(),
|
|
567
|
+
file=sys.stderr,
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
# Extract findings
|
|
571
|
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
572
|
+
run_extract_findings(sweep_dir, script_dir)
|
|
573
|
+
|
|
574
|
+
# Load findings for output
|
|
575
|
+
findings, by_severity = load_findings_compact(sweep_dir)
|
|
576
|
+
|
|
577
|
+
# Collect errors for output, deduplicating by file (last entry wins)
|
|
578
|
+
# so that resumed scans don't include stale errors for files that later succeeded.
|
|
579
|
+
# Scope to current file list so counts stay consistent with `scanned`.
|
|
580
|
+
files_set = set(files)
|
|
581
|
+
errors: list[dict[str, Any]] = []
|
|
582
|
+
if os.path.exists(scan_index_path):
|
|
583
|
+
last_status: dict[str, dict[str, Any]] = {}
|
|
584
|
+
with open(scan_index_path) as f:
|
|
585
|
+
for line in f:
|
|
586
|
+
line = line.strip()
|
|
587
|
+
if not line:
|
|
588
|
+
continue
|
|
589
|
+
try:
|
|
590
|
+
entry = json.loads(line)
|
|
591
|
+
file_path_key = entry.get("file", "")
|
|
592
|
+
if file_path_key in files_set:
|
|
593
|
+
last_status[file_path_key] = entry
|
|
594
|
+
except json.JSONDecodeError:
|
|
595
|
+
continue
|
|
596
|
+
for entry in last_status.values():
|
|
597
|
+
if entry.get("status") == "error":
|
|
598
|
+
errors.append({
|
|
599
|
+
"file": entry.get("file", ""),
|
|
600
|
+
"error": entry.get("error", "unknown"),
|
|
601
|
+
"exitCode": entry.get("exitCode", -1),
|
|
602
|
+
})
|
|
603
|
+
|
|
604
|
+
# Output JSON summary
|
|
605
|
+
output = {
|
|
606
|
+
"runId": run_id,
|
|
607
|
+
"sweepDir": sweep_dir,
|
|
608
|
+
"filesScanned": scanned - len(errors),
|
|
609
|
+
"filesErrored": len(errors),
|
|
610
|
+
"totalFindings": len(findings),
|
|
611
|
+
"bySeverity": by_severity,
|
|
612
|
+
"findingsPath": os.path.join(sweep_dir, "data", "all-findings.jsonl"),
|
|
613
|
+
"findings": findings,
|
|
614
|
+
"errors": errors,
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
print(json.dumps(output, indent=2))
|
|
618
|
+
|
|
619
|
+
# Fatal only if every file across all runs errored (no successful scans at all)
|
|
620
|
+
successful = scanned - len(errors)
|
|
621
|
+
if successful == 0 and scanned > 0:
|
|
622
|
+
update_manifest_phase(sweep_dir, "scan", "error")
|
|
623
|
+
sys.exit(1)
|
|
624
|
+
|
|
625
|
+
update_manifest_phase(sweep_dir, "scan", "complete")
|
|
626
|
+
|
|
627
|
+
if len(errors) > 0:
|
|
628
|
+
sys.exit(2)
|
|
629
|
+
|
|
630
|
+
|
|
631
|
+
if __name__ == "__main__":
|
|
632
|
+
main()
|
package/dist/sdk/session.d.ts
DELETED
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
/** Default directory for session storage relative to repo root */
|
|
2
|
-
export declare const DEFAULT_SESSIONS_DIR = ".warden/sessions";
|
|
3
|
-
/** Options for session storage */
|
|
4
|
-
export interface SessionStorageOptions {
|
|
5
|
-
/** Enable session storage (default: true) */
|
|
6
|
-
enabled?: boolean;
|
|
7
|
-
/** Directory to store sessions (default: .warden/sessions) */
|
|
8
|
-
directory?: string;
|
|
9
|
-
}
|
|
10
|
-
/**
|
|
11
|
-
* Derive the directory key Claude Code uses for a given project path.
|
|
12
|
-
* Claude Code maps /abs/path/to/project → -abs-path-to-project
|
|
13
|
-
*/
|
|
14
|
-
export declare function getClaudeProjectHash(projectPath: string): string;
|
|
15
|
-
/**
|
|
16
|
-
* Return the directory where Claude Code stores session files for a given repo path.
|
|
17
|
-
* Sessions are stored as <uuid>.jsonl files inside this directory.
|
|
18
|
-
*/
|
|
19
|
-
export declare function getClaudeProjectDir(repoPath: string): string;
|
|
20
|
-
/**
|
|
21
|
-
* Ensure the sessions directory exists.
|
|
22
|
-
* Creates the directory and any parent directories if they don't exist.
|
|
23
|
-
*/
|
|
24
|
-
export declare function ensureSessionsDir(dir: string): void;
|
|
25
|
-
/**
|
|
26
|
-
* Snapshot the set of .jsonl files in Claude's project directory for a given repo.
|
|
27
|
-
* Call before analysis, then use moveNewSessions after to capture any new files.
|
|
28
|
-
*/
|
|
29
|
-
export declare function snapshotSessionFiles(repoPath: string): Set<string>;
|
|
30
|
-
/**
|
|
31
|
-
* Move any new session files that appeared since the snapshot.
|
|
32
|
-
* Files are named <prefix>-<uuid>.jsonl where prefix identifies the warden run
|
|
33
|
-
* (e.g. "notseer-a049e7f7") and uuid is the Claude session ID.
|
|
34
|
-
*
|
|
35
|
-
* Safe to call concurrently -- skips files already moved by another caller.
|
|
36
|
-
* Returns paths of moved files.
|
|
37
|
-
*/
|
|
38
|
-
export declare function moveNewSessions(repoPath: string, before: Set<string>, targetDir: string, prefix?: string): string[];
|
|
39
|
-
/**
|
|
40
|
-
* Resolve the absolute sessions directory from options and repo path.
|
|
41
|
-
*/
|
|
42
|
-
export declare function resolveSessionsDir(repoPath: string, directory?: string): string;
|
|
43
|
-
//# sourceMappingURL=session.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"session.d.ts","sourceRoot":"","sources":["../../src/sdk/session.ts"],"names":[],"mappings":"AAIA,kEAAkE;AAClE,eAAO,MAAM,oBAAoB,qBAAqB,CAAC;AAEvD,kCAAkC;AAClC,MAAM,WAAW,qBAAqB;IACpC,6CAA6C;IAC7C,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,8DAA8D;IAC9D,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;;GAGG;AACH,wBAAgB,oBAAoB,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,CAEhE;AAED;;;GAGG;AACH,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAI5D;AAED;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,CAInD;AAED;;;GAGG;AACH,wBAAgB,oBAAoB,CAAC,QAAQ,EAAE,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC,CASlE;AAED;;;;;;;GAOG;AACH,wBAAgB,eAAe,CAC7B,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,GAAG,CAAC,MAAM,CAAC,EACnB,SAAS,EAAE,MAAM,EACjB,MAAM,CAAC,EAAE,MAAM,GACd,MAAM,EAAE,CA6CV;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,MAAM,CAG/E"}
|