@sentry/warden 0.13.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents.lock +7 -0
- package/dist/cli/args.d.ts +15 -12
- package/dist/cli/args.d.ts.map +1 -1
- package/dist/cli/args.js +61 -3
- package/dist/cli/args.js.map +1 -1
- package/dist/cli/commands/add.d.ts.map +1 -1
- package/dist/cli/commands/add.js +25 -33
- package/dist/cli/commands/add.js.map +1 -1
- package/dist/cli/commands/init.d.ts +0 -3
- package/dist/cli/commands/init.d.ts.map +1 -1
- package/dist/cli/commands/init.js +206 -19
- package/dist/cli/commands/init.js.map +1 -1
- package/dist/cli/commands/logs.d.ts +19 -0
- package/dist/cli/commands/logs.d.ts.map +1 -0
- package/dist/cli/commands/logs.js +412 -0
- package/dist/cli/commands/logs.js.map +1 -0
- package/dist/cli/commands/setup-app.d.ts.map +1 -1
- package/dist/cli/commands/setup-app.js +19 -15
- package/dist/cli/commands/setup-app.js.map +1 -1
- package/dist/cli/context.d.ts +2 -0
- package/dist/cli/context.d.ts.map +1 -1
- package/dist/cli/context.js +8 -2
- package/dist/cli/context.js.map +1 -1
- package/dist/cli/files.d.ts.map +1 -1
- package/dist/cli/files.js +27 -30
- package/dist/cli/files.js.map +1 -1
- package/dist/cli/git.d.ts +8 -3
- package/dist/cli/git.d.ts.map +1 -1
- package/dist/cli/git.js +24 -13
- package/dist/cli/git.js.map +1 -1
- package/dist/cli/index.js +10 -0
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/input.d.ts +7 -0
- package/dist/cli/input.d.ts.map +1 -1
- package/dist/cli/input.js +13 -2
- package/dist/cli/input.js.map +1 -1
- package/dist/cli/main.d.ts.map +1 -1
- package/dist/cli/main.js +108 -32
- package/dist/cli/main.js.map +1 -1
- package/dist/cli/output/formatters.d.ts +2 -1
- package/dist/cli/output/formatters.d.ts.map +1 -1
- package/dist/cli/output/formatters.js +22 -19
- package/dist/cli/output/formatters.js.map +1 -1
- package/dist/cli/output/index.d.ts +1 -1
- package/dist/cli/output/index.d.ts.map +1 -1
- package/dist/cli/output/index.js +1 -1
- package/dist/cli/output/index.js.map +1 -1
- package/dist/cli/output/ink-runner.js +1 -1
- package/dist/cli/output/ink-runner.js.map +1 -1
- package/dist/cli/output/jsonl.d.ts +49 -13
- package/dist/cli/output/jsonl.d.ts.map +1 -1
- package/dist/cli/output/jsonl.js +137 -4
- package/dist/cli/output/jsonl.js.map +1 -1
- package/dist/cli/output/tasks.d.ts.map +1 -1
- package/dist/cli/output/tasks.js +1 -22
- package/dist/cli/output/tasks.js.map +1 -1
- package/dist/cli/terminal.d.ts.map +1 -1
- package/dist/cli/terminal.js +0 -2
- package/dist/cli/terminal.js.map +1 -1
- package/dist/config/schema.d.ts +49 -98
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/config/schema.js +0 -12
- package/dist/config/schema.js.map +1 -1
- package/dist/config/writer.d.ts.map +1 -1
- package/dist/config/writer.js +18 -0
- package/dist/config/writer.js.map +1 -1
- package/dist/evals/index.js +1 -1
- package/dist/evals/index.js.map +1 -1
- package/dist/evals/runner.d.ts.map +1 -1
- package/dist/evals/runner.js +0 -1
- package/dist/evals/runner.js.map +1 -1
- package/dist/evals/types.d.ts +9 -15
- package/dist/evals/types.d.ts.map +1 -1
- package/dist/output/github-checks.d.ts +1 -1
- package/dist/output/github-checks.d.ts.map +1 -1
- package/dist/output/github-checks.js +2 -6
- package/dist/output/github-checks.js.map +1 -1
- package/dist/output/github-issues.d.ts.map +1 -1
- package/dist/output/github-issues.js +14 -8
- package/dist/output/github-issues.js.map +1 -1
- package/dist/output/issue-renderer.js +1 -1
- package/dist/output/issue-renderer.js.map +1 -1
- package/dist/sdk/analyze.d.ts.map +1 -1
- package/dist/sdk/analyze.js +14 -27
- package/dist/sdk/analyze.js.map +1 -1
- package/dist/sdk/auth.d.ts +16 -0
- package/dist/sdk/auth.d.ts.map +1 -0
- package/dist/sdk/auth.js +37 -0
- package/dist/sdk/auth.js.map +1 -0
- package/dist/sdk/errors.d.ts +8 -1
- package/dist/sdk/errors.d.ts.map +1 -1
- package/dist/sdk/errors.js +22 -2
- package/dist/sdk/errors.js.map +1 -1
- package/dist/sdk/prompt.js +1 -1
- package/dist/sdk/runner.d.ts +2 -1
- package/dist/sdk/runner.d.ts.map +1 -1
- package/dist/sdk/runner.js +3 -1
- package/dist/sdk/runner.js.map +1 -1
- package/dist/sdk/types.d.ts +0 -3
- package/dist/sdk/types.d.ts.map +1 -1
- package/dist/sdk/types.js.map +1 -1
- package/dist/skills/remote.js +1 -1
- package/dist/skills/remote.js.map +1 -1
- package/dist/types/index.d.ts +23 -24
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js +19 -7
- package/dist/types/index.js.map +1 -1
- package/dist/utils/exec.d.ts +4 -1
- package/dist/utils/exec.d.ts.map +1 -1
- package/dist/utils/exec.js +6 -4
- package/dist/utils/exec.js.map +1 -1
- package/package.json +1 -1
- package/skills/warden/SKILL.md +76 -0
- package/skills/warden/references/cli-reference.md +142 -0
- package/skills/warden/references/config-schema.md +111 -0
- package/skills/warden/references/configuration.md +110 -0
- package/skills/warden/references/creating-skills.md +84 -0
- package/skills/warden-sweep/SKILL.md +400 -0
- package/skills/warden-sweep/references/patch-prompt.md +72 -0
- package/skills/warden-sweep/references/verify-prompt.md +25 -0
- package/skills/warden-sweep/scripts/_utils.py +99 -0
- package/skills/warden-sweep/scripts/create_issue.py +189 -0
- package/skills/warden-sweep/scripts/extract_findings.py +219 -0
- package/skills/warden-sweep/scripts/find_reviewers.py +114 -0
- package/skills/warden-sweep/scripts/generate_report.py +266 -0
- package/skills/warden-sweep/scripts/index_prs.py +187 -0
- package/skills/warden-sweep/scripts/organize.py +422 -0
- package/skills/warden-sweep/scripts/scan.py +584 -0
- package/dist/sdk/session.d.ts +0 -43
- package/dist/sdk/session.d.ts.map +0 -1
- package/dist/sdk/session.js +0 -105
- package/dist/sdk/session.js.map +0 -1
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# /// script
|
|
3
|
+
# requires-python = ">=3.9"
|
|
4
|
+
# ///
|
|
5
|
+
"""
|
|
6
|
+
Warden Sweep: Create tracking issue.
|
|
7
|
+
|
|
8
|
+
Creates a GitHub issue summarizing the sweep results after verification
|
|
9
|
+
but before patching. Gives every PR a parent to reference and gives
|
|
10
|
+
reviewers a single place to see the full picture.
|
|
11
|
+
|
|
12
|
+
Usage:
|
|
13
|
+
uv run create_issue.py <sweep-dir>
|
|
14
|
+
|
|
15
|
+
Stdout: JSON with issueUrl and issueNumber
|
|
16
|
+
Stderr: Progress lines
|
|
17
|
+
|
|
18
|
+
Idempotent: if issueUrl already exists in manifest, skips creation.
|
|
19
|
+
"""
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import argparse
|
|
23
|
+
import json
|
|
24
|
+
import os
|
|
25
|
+
import subprocess
|
|
26
|
+
import sys
|
|
27
|
+
from typing import Any
|
|
28
|
+
|
|
29
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
30
|
+
from _utils import ( # noqa: E402
|
|
31
|
+
ensure_github_label,
|
|
32
|
+
pr_number_from_url,
|
|
33
|
+
read_json,
|
|
34
|
+
read_jsonl,
|
|
35
|
+
severity_badge,
|
|
36
|
+
write_json,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def build_issue_body(
|
|
41
|
+
run_id: str,
|
|
42
|
+
scan_index: list[dict[str, Any]],
|
|
43
|
+
all_findings: list[dict[str, Any]],
|
|
44
|
+
verified: list[dict[str, Any]],
|
|
45
|
+
rejected: list[dict[str, Any]],
|
|
46
|
+
) -> str:
|
|
47
|
+
"""Build the GitHub issue body markdown."""
|
|
48
|
+
files_scanned = sum(1 for e in scan_index if e.get("status") == "complete")
|
|
49
|
+
files_timed_out = sum(
|
|
50
|
+
1 for e in scan_index
|
|
51
|
+
if e.get("status") == "error" and e.get("error") == "timeout"
|
|
52
|
+
)
|
|
53
|
+
files_errored = sum(
|
|
54
|
+
1 for e in scan_index
|
|
55
|
+
if e.get("status") == "error" and e.get("error") != "timeout"
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Collect unique skills from scan index
|
|
59
|
+
skills: set[str] = set()
|
|
60
|
+
for entry in scan_index:
|
|
61
|
+
for skill in entry.get("skills", []):
|
|
62
|
+
skills.add(skill)
|
|
63
|
+
|
|
64
|
+
lines = [
|
|
65
|
+
f"## Warden Sweep `{run_id}`",
|
|
66
|
+
"",
|
|
67
|
+
"| Metric | Count |",
|
|
68
|
+
"|--------|-------|",
|
|
69
|
+
f"| Files scanned | {files_scanned} |",
|
|
70
|
+
f"| Files timed out | {files_timed_out} |",
|
|
71
|
+
f"| Files errored | {files_errored} |",
|
|
72
|
+
f"| Total findings | {len(all_findings)} |",
|
|
73
|
+
f"| Verified | {len(verified)} |",
|
|
74
|
+
f"| Rejected | {len(rejected)} |",
|
|
75
|
+
"",
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
if verified:
|
|
79
|
+
lines.append("### Verified Findings")
|
|
80
|
+
lines.append("")
|
|
81
|
+
lines.append("| Severity | Skill | File | Title |")
|
|
82
|
+
lines.append("|----------|-------|------|-------|")
|
|
83
|
+
for f in verified:
|
|
84
|
+
sev = severity_badge(f.get("severity", "info"))
|
|
85
|
+
skill = f.get("skill", "")
|
|
86
|
+
file_path = f.get("file", "")
|
|
87
|
+
start_line = f.get("startLine")
|
|
88
|
+
location = f"{file_path}:{start_line}" if start_line else file_path
|
|
89
|
+
title = f.get("title", "")
|
|
90
|
+
lines.append(f"| {sev} | {skill} | `{location}` | {title} |")
|
|
91
|
+
lines.append("")
|
|
92
|
+
|
|
93
|
+
if skills:
|
|
94
|
+
lines.append("### Skills Run")
|
|
95
|
+
lines.append("")
|
|
96
|
+
lines.append(", ".join(sorted(skills)))
|
|
97
|
+
lines.append("")
|
|
98
|
+
|
|
99
|
+
lines.append("> Generated by Warden Sweep. PRs referencing this issue will appear below.")
|
|
100
|
+
|
|
101
|
+
return "\n".join(lines) + "\n"
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def create_github_issue(title: str, body: str) -> dict[str, Any]:
|
|
105
|
+
"""Create a GitHub issue with the warden label. Returns issueUrl and issueNumber."""
|
|
106
|
+
ensure_github_label("warden", "5319E7", "Automated fix from Warden Sweep")
|
|
107
|
+
|
|
108
|
+
result = subprocess.run(
|
|
109
|
+
[
|
|
110
|
+
"gh", "issue", "create",
|
|
111
|
+
"--label", "warden",
|
|
112
|
+
"--title", title,
|
|
113
|
+
"--body", body,
|
|
114
|
+
],
|
|
115
|
+
capture_output=True,
|
|
116
|
+
text=True,
|
|
117
|
+
timeout=30,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
if result.returncode != 0:
|
|
121
|
+
raise RuntimeError(f"gh issue create failed: {result.stderr.strip()}")
|
|
122
|
+
|
|
123
|
+
issue_url = result.stdout.strip()
|
|
124
|
+
try:
|
|
125
|
+
issue_number = int(pr_number_from_url(issue_url))
|
|
126
|
+
except (ValueError, IndexError):
|
|
127
|
+
raise RuntimeError(f"Could not parse issue number from gh output: {issue_url}")
|
|
128
|
+
|
|
129
|
+
return {"issueUrl": issue_url, "issueNumber": issue_number}
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def main() -> None:
|
|
133
|
+
parser = argparse.ArgumentParser(
|
|
134
|
+
description="Warden Sweep: Create tracking issue"
|
|
135
|
+
)
|
|
136
|
+
parser.add_argument("sweep_dir", help="Path to the sweep directory")
|
|
137
|
+
args = parser.parse_args()
|
|
138
|
+
|
|
139
|
+
sweep_dir = args.sweep_dir
|
|
140
|
+
data_dir = os.path.join(sweep_dir, "data")
|
|
141
|
+
manifest_path = os.path.join(data_dir, "manifest.json")
|
|
142
|
+
|
|
143
|
+
if not os.path.isdir(sweep_dir):
|
|
144
|
+
print(
|
|
145
|
+
json.dumps({"error": f"Sweep directory not found: {sweep_dir}"}),
|
|
146
|
+
file=sys.stdout,
|
|
147
|
+
)
|
|
148
|
+
sys.exit(1)
|
|
149
|
+
|
|
150
|
+
manifest = read_json(manifest_path) or {}
|
|
151
|
+
|
|
152
|
+
# Idempotency: if issue already exists, return existing values
|
|
153
|
+
if manifest.get("issueUrl"):
|
|
154
|
+
output = {
|
|
155
|
+
"issueUrl": manifest["issueUrl"],
|
|
156
|
+
"issueNumber": manifest.get("issueNumber", 0),
|
|
157
|
+
}
|
|
158
|
+
print(json.dumps(output))
|
|
159
|
+
return
|
|
160
|
+
|
|
161
|
+
run_id = manifest.get("runId", "unknown")
|
|
162
|
+
|
|
163
|
+
# Read sweep data
|
|
164
|
+
scan_index = read_jsonl(os.path.join(data_dir, "scan-index.jsonl"))
|
|
165
|
+
all_findings = read_jsonl(os.path.join(data_dir, "all-findings.jsonl"))
|
|
166
|
+
verified = read_jsonl(os.path.join(data_dir, "verified.jsonl"))
|
|
167
|
+
rejected = read_jsonl(os.path.join(data_dir, "rejected.jsonl"))
|
|
168
|
+
|
|
169
|
+
files_scanned = sum(1 for e in scan_index if e.get("status") == "complete")
|
|
170
|
+
|
|
171
|
+
# Build issue
|
|
172
|
+
title = f"Warden Sweep {run_id}: {len(verified)} findings across {files_scanned} files"
|
|
173
|
+
body = build_issue_body(run_id, scan_index, all_findings, verified, rejected)
|
|
174
|
+
|
|
175
|
+
print("Creating tracking issue...", file=sys.stderr)
|
|
176
|
+
result = create_github_issue(title, body)
|
|
177
|
+
print(f"Created issue: {result['issueUrl']}", file=sys.stderr)
|
|
178
|
+
|
|
179
|
+
# Write issueUrl and issueNumber to manifest
|
|
180
|
+
manifest["issueUrl"] = result["issueUrl"]
|
|
181
|
+
manifest["issueNumber"] = result["issueNumber"]
|
|
182
|
+
manifest.setdefault("phases", {})["issue"] = "complete"
|
|
183
|
+
write_json(manifest_path, manifest)
|
|
184
|
+
|
|
185
|
+
print(json.dumps(result))
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
if __name__ == "__main__":
|
|
189
|
+
main()
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# /// script
|
|
3
|
+
# requires-python = ">=3.9"
|
|
4
|
+
# ///
|
|
5
|
+
"""
|
|
6
|
+
Extract individual findings from warden JSONL log files.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python extract_findings.py <log-path-or-directory> -o <output.jsonl>
|
|
10
|
+
python extract_findings.py .warden/logs/ --scan-index data/scan-index.jsonl -o findings.jsonl
|
|
11
|
+
|
|
12
|
+
Reads warden JSONL logs (one skill record per line, summary as last line),
|
|
13
|
+
extracts each finding as a standalone record with a stable ID, and writes
|
|
14
|
+
one finding per line to the output file.
|
|
15
|
+
|
|
16
|
+
Finding ID format: <skill>-<sha256(title+path+line)[:8]>
|
|
17
|
+
"""
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import argparse
|
|
21
|
+
import hashlib
|
|
22
|
+
import json
|
|
23
|
+
import os
|
|
24
|
+
import sys
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from typing import Any
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def generate_finding_id(skill: str, title: str, path: str, line: int | None) -> str:
|
|
30
|
+
"""Generate a stable, deterministic finding ID."""
|
|
31
|
+
raw = f"{title}:{path}:{line or 0}"
|
|
32
|
+
digest = hashlib.sha256(raw.encode()).hexdigest()[:8]
|
|
33
|
+
# Sanitize skill name for use in ID
|
|
34
|
+
safe_skill = skill.replace("/", "-").replace(" ", "-").lower()
|
|
35
|
+
return f"{safe_skill}-{digest}"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def parse_jsonl_log(log_path: str) -> list[dict[str, Any]]:
|
|
39
|
+
"""Parse a warden JSONL log file and extract individual findings.
|
|
40
|
+
|
|
41
|
+
Each non-summary line has the shape:
|
|
42
|
+
{
|
|
43
|
+
"run": {...},
|
|
44
|
+
"skill": "...",
|
|
45
|
+
"findings": [{...}, ...],
|
|
46
|
+
...
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
The last line is a summary record with "type": "summary" which we skip.
|
|
50
|
+
"""
|
|
51
|
+
findings = []
|
|
52
|
+
try:
|
|
53
|
+
with open(log_path) as f:
|
|
54
|
+
for line in f:
|
|
55
|
+
line = line.strip()
|
|
56
|
+
if not line:
|
|
57
|
+
continue
|
|
58
|
+
try:
|
|
59
|
+
record = json.loads(line)
|
|
60
|
+
except json.JSONDecodeError:
|
|
61
|
+
continue
|
|
62
|
+
|
|
63
|
+
# Skip summary records
|
|
64
|
+
if record.get("type") == "summary":
|
|
65
|
+
continue
|
|
66
|
+
|
|
67
|
+
skill = record.get("skill", "unknown")
|
|
68
|
+
run_meta = record.get("run", {})
|
|
69
|
+
record_findings = record.get("findings", [])
|
|
70
|
+
|
|
71
|
+
for finding in record_findings:
|
|
72
|
+
location = finding.get("location", {})
|
|
73
|
+
file_path = location.get("path", "")
|
|
74
|
+
start_line = location.get("startLine")
|
|
75
|
+
end_line = location.get("endLine")
|
|
76
|
+
|
|
77
|
+
finding_id = generate_finding_id(
|
|
78
|
+
skill=skill,
|
|
79
|
+
title=finding.get("title", ""),
|
|
80
|
+
path=file_path,
|
|
81
|
+
line=start_line,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
normalized = {
|
|
85
|
+
"findingId": finding_id,
|
|
86
|
+
"file": file_path,
|
|
87
|
+
"skill": skill,
|
|
88
|
+
"severity": finding.get("severity", "info"),
|
|
89
|
+
"confidence": finding.get("confidence"),
|
|
90
|
+
"title": finding.get("title", ""),
|
|
91
|
+
"description": finding.get("description", ""),
|
|
92
|
+
"verification": finding.get("verification"),
|
|
93
|
+
"location": {
|
|
94
|
+
"path": file_path,
|
|
95
|
+
"startLine": start_line,
|
|
96
|
+
"endLine": end_line,
|
|
97
|
+
},
|
|
98
|
+
"suggestedFix": finding.get("suggestedFix"),
|
|
99
|
+
"logPath": log_path,
|
|
100
|
+
"runId": run_meta.get("runId", ""),
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
findings.append(normalized)
|
|
104
|
+
|
|
105
|
+
except (OSError, IOError) as e:
|
|
106
|
+
print(f"Error reading {log_path}: {e}", file=sys.stderr)
|
|
107
|
+
|
|
108
|
+
return findings
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def collect_log_paths(source: str, scan_index: str | None = None) -> list[str]:
|
|
112
|
+
"""Collect log file paths from a directory or scan index."""
|
|
113
|
+
paths: list[str] = []
|
|
114
|
+
|
|
115
|
+
if scan_index and os.path.exists(scan_index):
|
|
116
|
+
# Read log paths from scan-index.jsonl
|
|
117
|
+
seen = set()
|
|
118
|
+
total_entries = 0
|
|
119
|
+
missing = 0
|
|
120
|
+
with open(scan_index) as f:
|
|
121
|
+
for line in f:
|
|
122
|
+
line = line.strip()
|
|
123
|
+
if not line:
|
|
124
|
+
continue
|
|
125
|
+
try:
|
|
126
|
+
entry = json.loads(line)
|
|
127
|
+
except json.JSONDecodeError:
|
|
128
|
+
continue
|
|
129
|
+
if entry.get("status") != "complete":
|
|
130
|
+
continue
|
|
131
|
+
total_entries += 1
|
|
132
|
+
log_path = entry.get("logPath", "")
|
|
133
|
+
if log_path and log_path not in seen:
|
|
134
|
+
seen.add(log_path)
|
|
135
|
+
if os.path.isfile(log_path):
|
|
136
|
+
paths.append(log_path)
|
|
137
|
+
else:
|
|
138
|
+
missing += 1
|
|
139
|
+
if missing > 0:
|
|
140
|
+
print(
|
|
141
|
+
f"Warning: {missing} log path(s) from scan-index not found on disk",
|
|
142
|
+
file=sys.stderr,
|
|
143
|
+
)
|
|
144
|
+
# Only use scan-index results if we actually found logs;
|
|
145
|
+
# fall through to source directory otherwise
|
|
146
|
+
if paths:
|
|
147
|
+
return paths
|
|
148
|
+
if total_entries > 0:
|
|
149
|
+
print(
|
|
150
|
+
"Warning: scan-index had entries but no valid log paths; "
|
|
151
|
+
"falling back to source directory",
|
|
152
|
+
file=sys.stderr,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
source_path = Path(source)
|
|
156
|
+
if source_path.is_file():
|
|
157
|
+
return [str(source_path)]
|
|
158
|
+
|
|
159
|
+
if source_path.is_dir():
|
|
160
|
+
for f in sorted(source_path.glob("*.jsonl")):
|
|
161
|
+
paths.append(str(f))
|
|
162
|
+
return paths
|
|
163
|
+
|
|
164
|
+
print(f"Source not found: {source}", file=sys.stderr)
|
|
165
|
+
return paths
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def main():
|
|
169
|
+
parser = argparse.ArgumentParser(
|
|
170
|
+
description="Extract findings from warden JSONL logs"
|
|
171
|
+
)
|
|
172
|
+
parser.add_argument(
|
|
173
|
+
"source",
|
|
174
|
+
help="Path to a JSONL log file or directory of log files",
|
|
175
|
+
)
|
|
176
|
+
parser.add_argument(
|
|
177
|
+
"-o", "--output",
|
|
178
|
+
required=True,
|
|
179
|
+
help="Output path for normalized findings JSONL",
|
|
180
|
+
)
|
|
181
|
+
parser.add_argument(
|
|
182
|
+
"--scan-index",
|
|
183
|
+
help="Path to scan-index.jsonl (uses log paths from completed scans)",
|
|
184
|
+
)
|
|
185
|
+
args = parser.parse_args()
|
|
186
|
+
|
|
187
|
+
log_paths = collect_log_paths(args.source, args.scan_index)
|
|
188
|
+
if not log_paths:
|
|
189
|
+
print("No log files found.", file=sys.stderr)
|
|
190
|
+
sys.exit(1)
|
|
191
|
+
|
|
192
|
+
all_findings: list[dict[str, Any]] = []
|
|
193
|
+
seen_ids: set[str] = set()
|
|
194
|
+
|
|
195
|
+
for log_path in log_paths:
|
|
196
|
+
findings = parse_jsonl_log(log_path)
|
|
197
|
+
for f in findings:
|
|
198
|
+
fid = f["findingId"]
|
|
199
|
+
if fid not in seen_ids:
|
|
200
|
+
seen_ids.add(fid)
|
|
201
|
+
all_findings.append(f)
|
|
202
|
+
|
|
203
|
+
# Write output
|
|
204
|
+
os.makedirs(os.path.dirname(os.path.abspath(args.output)), exist_ok=True)
|
|
205
|
+
with open(args.output, "w") as out:
|
|
206
|
+
for finding in all_findings:
|
|
207
|
+
out.write(json.dumps(finding) + "\n")
|
|
208
|
+
|
|
209
|
+
print(
|
|
210
|
+
json.dumps({
|
|
211
|
+
"logsProcessed": len(log_paths),
|
|
212
|
+
"findingsExtracted": len(all_findings),
|
|
213
|
+
"outputPath": args.output,
|
|
214
|
+
})
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
if __name__ == "__main__":
|
|
219
|
+
main()
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# /// script
|
|
3
|
+
# requires-python = ">=3.9"
|
|
4
|
+
# ///
|
|
5
|
+
"""
|
|
6
|
+
Find top git contributors for a file to use as PR reviewers.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python find_reviewers.py <file-path>
|
|
10
|
+
python find_reviewers.py src/foo.ts
|
|
11
|
+
|
|
12
|
+
Output: JSON to stdout with GitHub usernames of top 2 contributors
|
|
13
|
+
from the last 12 months.
|
|
14
|
+
|
|
15
|
+
{"reviewers": ["user1", "user2"]}
|
|
16
|
+
|
|
17
|
+
If no contributors found or mapping fails, returns empty list.
|
|
18
|
+
"""
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import argparse
|
|
22
|
+
import json
|
|
23
|
+
import os
|
|
24
|
+
import sys
|
|
25
|
+
|
|
26
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
27
|
+
from _utils import run_cmd_stdout as run_cmd # noqa: E402
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_top_authors(file_path: str, count: int = 2) -> list[str]:
|
|
31
|
+
"""Get top N author emails for a file from git log (last 12 months)."""
|
|
32
|
+
output = run_cmd([
|
|
33
|
+
"git", "log",
|
|
34
|
+
"--format=%ae",
|
|
35
|
+
"--since=12 months ago",
|
|
36
|
+
"--", file_path,
|
|
37
|
+
])
|
|
38
|
+
|
|
39
|
+
if not output:
|
|
40
|
+
return []
|
|
41
|
+
|
|
42
|
+
# Count occurrences of each email
|
|
43
|
+
email_counts: dict[str, int] = {}
|
|
44
|
+
for email in output.splitlines():
|
|
45
|
+
email = email.strip()
|
|
46
|
+
if email:
|
|
47
|
+
email_counts[email] = email_counts.get(email, 0) + 1
|
|
48
|
+
|
|
49
|
+
# Sort by count descending
|
|
50
|
+
sorted_emails = sorted(email_counts.items(), key=lambda x: x[1], reverse=True)
|
|
51
|
+
|
|
52
|
+
return [email for email, _ in sorted_emails[:count]]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def email_to_github_username(email: str) -> str | None:
|
|
56
|
+
"""Try to map a git email to a GitHub username.
|
|
57
|
+
|
|
58
|
+
Extracts from noreply emails directly. For other emails,
|
|
59
|
+
uses the GitHub search-by-email API via gh CLI.
|
|
60
|
+
"""
|
|
61
|
+
# Handle GitHub noreply emails directly
|
|
62
|
+
if email.endswith("@users.noreply.github.com"):
|
|
63
|
+
# Format: 12345+username@users.noreply.github.com
|
|
64
|
+
# or: username@users.noreply.github.com
|
|
65
|
+
local = email.split("@")[0]
|
|
66
|
+
if "+" in local:
|
|
67
|
+
return local.split("+", 1)[1]
|
|
68
|
+
return local
|
|
69
|
+
|
|
70
|
+
# gh api handles URL encoding; pass email directly in the query
|
|
71
|
+
output = run_cmd([
|
|
72
|
+
"gh", "api", f"search/users?q={email}+in:email",
|
|
73
|
+
"--jq", ".items[0].login",
|
|
74
|
+
])
|
|
75
|
+
return output if output else None
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def get_current_github_user() -> str | None:
|
|
79
|
+
"""Get the currently authenticated GitHub username."""
|
|
80
|
+
output = run_cmd(["gh", "api", "/user", "--jq", ".login"])
|
|
81
|
+
return output if output else None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def main():
|
|
85
|
+
parser = argparse.ArgumentParser(
|
|
86
|
+
description="Find top git contributors for PR reviewer assignment"
|
|
87
|
+
)
|
|
88
|
+
parser.add_argument("file_path", help="Path to the file to find reviewers for")
|
|
89
|
+
parser.add_argument(
|
|
90
|
+
"--count", type=int, default=2,
|
|
91
|
+
help="Number of reviewers to find (default: 2)",
|
|
92
|
+
)
|
|
93
|
+
args = parser.parse_args()
|
|
94
|
+
|
|
95
|
+
current_user = get_current_github_user()
|
|
96
|
+
|
|
97
|
+
# Request extra candidates to compensate for self-exclusion
|
|
98
|
+
fetch_count = args.count + 1 if current_user else args.count
|
|
99
|
+
emails = get_top_authors(args.file_path, fetch_count)
|
|
100
|
+
if not emails:
|
|
101
|
+
print(json.dumps({"reviewers": [], "note": "No recent authors found"}))
|
|
102
|
+
return
|
|
103
|
+
|
|
104
|
+
reviewers: list[str] = []
|
|
105
|
+
for email in emails:
|
|
106
|
+
username = email_to_github_username(email)
|
|
107
|
+
if username and username != current_user:
|
|
108
|
+
reviewers.append(username)
|
|
109
|
+
|
|
110
|
+
print(json.dumps({"reviewers": reviewers[:args.count]}))
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
if __name__ == "__main__":
|
|
114
|
+
main()
|