devguard 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devguard/INTEGRATION_SUMMARY.md +121 -0
- devguard/__init__.py +3 -0
- devguard/__main__.py +6 -0
- devguard/checkers/__init__.py +41 -0
- devguard/checkers/api_usage.py +523 -0
- devguard/checkers/aws_cost.py +331 -0
- devguard/checkers/aws_iam.py +284 -0
- devguard/checkers/base.py +25 -0
- devguard/checkers/container.py +137 -0
- devguard/checkers/domain.py +189 -0
- devguard/checkers/firecrawl.py +117 -0
- devguard/checkers/fly.py +225 -0
- devguard/checkers/github.py +210 -0
- devguard/checkers/npm.py +327 -0
- devguard/checkers/npm_security.py +244 -0
- devguard/checkers/redteam.py +290 -0
- devguard/checkers/secret.py +279 -0
- devguard/checkers/swarm.py +376 -0
- devguard/checkers/tailscale.py +143 -0
- devguard/checkers/tailsnitch.py +303 -0
- devguard/checkers/tavily.py +179 -0
- devguard/checkers/vercel.py +192 -0
- devguard/cli.py +1510 -0
- devguard/cli_helpers.py +189 -0
- devguard/config.py +249 -0
- devguard/core.py +293 -0
- devguard/dashboard.py +715 -0
- devguard/discovery.py +363 -0
- devguard/http_client.py +142 -0
- devguard/llm_service.py +481 -0
- devguard/mcp_server.py +259 -0
- devguard/metrics.py +144 -0
- devguard/models.py +208 -0
- devguard/reporting.py +1571 -0
- devguard/sarif.py +295 -0
- devguard/scripts/ANALYSIS_SUMMARY.md +141 -0
- devguard/scripts/README.md +221 -0
- devguard/scripts/auto_fix_recommendations.py +145 -0
- devguard/scripts/generate_npmignore.py +175 -0
- devguard/scripts/generate_security_report.py +324 -0
- devguard/scripts/prepublish_check.sh +29 -0
- devguard/scripts/redteam_npm_packages.py +1262 -0
- devguard/scripts/review_all_repos.py +300 -0
- devguard/spec.py +617 -0
- devguard/sweeps/__init__.py +23 -0
- devguard/sweeps/ai_editor_config_audit.py +697 -0
- devguard/sweeps/cargo_publish_audit.py +655 -0
- devguard/sweeps/dependency_audit.py +419 -0
- devguard/sweeps/gitignore_audit.py +336 -0
- devguard/sweeps/local_dev.py +260 -0
- devguard/sweeps/local_dirty_worktree_secrets.py +521 -0
- devguard/sweeps/project_flaudit.py +636 -0
- devguard/sweeps/public_github_secrets.py +680 -0
- devguard/sweeps/publish_audit.py +478 -0
- devguard/sweeps/ssh_key_audit.py +327 -0
- devguard/utils.py +174 -0
- devguard-0.2.0.dist-info/METADATA +225 -0
- devguard-0.2.0.dist-info/RECORD +60 -0
- devguard-0.2.0.dist-info/WHEEL +4 -0
- devguard-0.2.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,636 @@
|
|
|
1
|
+
"""Project flaudit sweep: files-to-prompt per project + OpenRouter/Gemini analysis.
|
|
2
|
+
|
|
3
|
+
For each project (or k most recently edited), aggregates README, implementation,
|
|
4
|
+
and tests into a prompt, then uses OpenRouter + Gemini to find flaws:
|
|
5
|
+
- README vs implementation drift
|
|
6
|
+
- README vs tests mismatch
|
|
7
|
+
- Disobedience of project/workspace rules (e.g. .cursor/rules)
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import asyncio
|
|
13
|
+
import fnmatch
|
|
14
|
+
import json
|
|
15
|
+
import logging
|
|
16
|
+
import subprocess
|
|
17
|
+
import time
|
|
18
|
+
from dataclasses import asdict, dataclass, field
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
# File patterns for files-to-prompt aggregation
|
|
24
|
+
README_GLOBS = ["README*", "readme*", "Readme*"]
|
|
25
|
+
IMPL_EXTENSIONS = {".py", ".rs", ".ts", ".tsx", ".js", ".jsx", ".go", ".java", ".kt"}
|
|
26
|
+
IMPL_EXCLUDE_DIRS = {
|
|
27
|
+
".git", "node_modules", "target", ".venv", "venv", "__pycache__",
|
|
28
|
+
".pytest_cache", ".ruff_cache", "dist", "build", ".next",
|
|
29
|
+
}
|
|
30
|
+
TEST_PATTERNS = [
|
|
31
|
+
"**/test_*.py", "**/tests/**/*.py", "**/*_test.py", "**/*.test.ts",
|
|
32
|
+
"**/*.spec.ts", "**/__tests__/**/*", "**/test/**/*",
|
|
33
|
+
]
|
|
34
|
+
RULES_GLOBS = [".cursor/rules/**/*.mdc", ".cursor/rules/**/*.md"]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class FlauditFinding:
|
|
39
|
+
"""A single flaw finding from the LLM analysis."""
|
|
40
|
+
|
|
41
|
+
severity: str # critical, high, medium, low
|
|
42
|
+
category: str # readme_impl_drift, readme_tests_mismatch, rules_violation, other
|
|
43
|
+
description: str
|
|
44
|
+
file_ref: str | None = None
|
|
45
|
+
suggestion: str | None = None
|
|
46
|
+
rule_ref: str | None = None # For rules_violation: which rule file (e.g. user-core.mdc)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass
|
|
50
|
+
class ProjectFlauditResult:
|
|
51
|
+
"""Result of flaudit for one project."""
|
|
52
|
+
|
|
53
|
+
repo_path: str
|
|
54
|
+
prompt_char_count: int
|
|
55
|
+
findings: list[FlauditFinding] = field(default_factory=list)
|
|
56
|
+
error: str | None = None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _discover_git_repos(
|
|
60
|
+
dev_root: Path,
|
|
61
|
+
max_depth: int = 2,
|
|
62
|
+
depth_0_skip_prefixes: list[str] | None = None,
|
|
63
|
+
depth_0_allow_names: list[str] | None = None,
|
|
64
|
+
) -> list[Path]:
|
|
65
|
+
"""Discover git repos under dev_root (bounded by max_depth).
|
|
66
|
+
|
|
67
|
+
depth_0_skip_prefixes: at depth 0, skip dirs whose names start with these.
|
|
68
|
+
depth_0_allow_names: dir names to allow despite skip_prefixes (e.g. _infra).
|
|
69
|
+
"""
|
|
70
|
+
repos: list[Path] = []
|
|
71
|
+
dev_root = dev_root.expanduser().resolve()
|
|
72
|
+
if not dev_root.exists():
|
|
73
|
+
return repos
|
|
74
|
+
|
|
75
|
+
skip_prefixes = depth_0_skip_prefixes if depth_0_skip_prefixes is not None else ["_", "."]
|
|
76
|
+
allow_names = set(depth_0_allow_names if depth_0_allow_names is not None else ["_infra"])
|
|
77
|
+
|
|
78
|
+
if (dev_root / ".git").exists():
|
|
79
|
+
repos.append(dev_root)
|
|
80
|
+
|
|
81
|
+
frontier: list[tuple[Path, int]] = [(dev_root, 0)]
|
|
82
|
+
while frontier:
|
|
83
|
+
cur, depth = frontier.pop()
|
|
84
|
+
if depth >= max_depth:
|
|
85
|
+
continue
|
|
86
|
+
try:
|
|
87
|
+
children = list(cur.iterdir())
|
|
88
|
+
except (OSError, PermissionError):
|
|
89
|
+
continue
|
|
90
|
+
for child in children:
|
|
91
|
+
if not child.is_dir():
|
|
92
|
+
continue
|
|
93
|
+
name = child.name
|
|
94
|
+
if name in {".git", ".venv", "venv", "node_modules", "target", ".cache", ".pytest_cache", ".ruff_cache"}:
|
|
95
|
+
continue
|
|
96
|
+
if depth == 0 and skip_prefixes:
|
|
97
|
+
if any(name.startswith(p) for p in skip_prefixes) and name not in allow_names:
|
|
98
|
+
continue
|
|
99
|
+
if (child / ".git").exists():
|
|
100
|
+
repos.append(child)
|
|
101
|
+
continue
|
|
102
|
+
frontier.append((child, depth + 1))
|
|
103
|
+
|
|
104
|
+
seen: set[Path] = set()
|
|
105
|
+
out: list[Path] = []
|
|
106
|
+
for r in repos:
|
|
107
|
+
rr = r.resolve()
|
|
108
|
+
if rr in seen:
|
|
109
|
+
continue
|
|
110
|
+
seen.add(rr)
|
|
111
|
+
out.append(rr)
|
|
112
|
+
return out
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _git_ls_files(repo: Path) -> list[str]:
|
|
116
|
+
proc = subprocess.run(
|
|
117
|
+
["git", "-C", str(repo), "ls-files", "-z"],
|
|
118
|
+
check=False,
|
|
119
|
+
stdout=subprocess.PIPE,
|
|
120
|
+
stderr=subprocess.DEVNULL,
|
|
121
|
+
)
|
|
122
|
+
if proc.returncode != 0:
|
|
123
|
+
return []
|
|
124
|
+
out = proc.stdout.decode("utf-8", errors="replace")
|
|
125
|
+
return [p for p in out.split("\0") if p]
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _git_files_changed_last_n(repo: Path, n: int) -> set[str]:
|
|
129
|
+
"""Return set of file paths changed in last n commits (relative to repo root)."""
|
|
130
|
+
proc = subprocess.run(
|
|
131
|
+
["git", "-C", str(repo), "log", "-n", str(n), "--name-only", "--format="],
|
|
132
|
+
check=False,
|
|
133
|
+
stdout=subprocess.PIPE,
|
|
134
|
+
stderr=subprocess.DEVNULL,
|
|
135
|
+
)
|
|
136
|
+
if proc.returncode != 0:
|
|
137
|
+
return set()
|
|
138
|
+
out = proc.stdout.decode("utf-8", errors="replace")
|
|
139
|
+
return {p.strip() for p in out.splitlines() if p.strip()}
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _repo_last_commit_time(repo: Path) -> float:
|
|
143
|
+
"""Return Unix timestamp of last commit (for sorting by recency)."""
|
|
144
|
+
proc = subprocess.run(
|
|
145
|
+
["git", "-C", str(repo), "log", "-1", "--format=%ct"],
|
|
146
|
+
check=False,
|
|
147
|
+
stdout=subprocess.PIPE,
|
|
148
|
+
stderr=subprocess.DEVNULL,
|
|
149
|
+
)
|
|
150
|
+
if proc.returncode != 0:
|
|
151
|
+
return 0.0
|
|
152
|
+
try:
|
|
153
|
+
return float(proc.stdout.decode().strip() or "0")
|
|
154
|
+
except ValueError:
|
|
155
|
+
return 0.0
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _is_test_file(rel_path: str) -> bool:
|
|
159
|
+
for pat in TEST_PATTERNS:
|
|
160
|
+
if fnmatch.fnmatch(rel_path, pat):
|
|
161
|
+
return True
|
|
162
|
+
return False
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _is_impl_file(rel_path: str) -> bool:
|
|
166
|
+
p = Path(rel_path)
|
|
167
|
+
if p.suffix.lower() not in IMPL_EXTENSIONS:
|
|
168
|
+
return False
|
|
169
|
+
parts = p.parts
|
|
170
|
+
if any(d in parts for d in IMPL_EXCLUDE_DIRS):
|
|
171
|
+
return False
|
|
172
|
+
if _is_test_file(rel_path):
|
|
173
|
+
return False
|
|
174
|
+
return True
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _is_readme(rel_path: str) -> bool:
|
|
178
|
+
name = Path(rel_path).name
|
|
179
|
+
for g in README_GLOBS:
|
|
180
|
+
if fnmatch.fnmatch(name, g):
|
|
181
|
+
return True
|
|
182
|
+
return False
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def _is_rules_file(rel_path: str) -> bool:
|
|
186
|
+
for g in RULES_GLOBS:
|
|
187
|
+
if fnmatch.fnmatch(rel_path, g):
|
|
188
|
+
return True
|
|
189
|
+
return False
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _read_file_safe(path: Path, max_chars: int = 50_000) -> str | None:
|
|
193
|
+
try:
|
|
194
|
+
text = path.read_text(encoding="utf-8", errors="replace")
|
|
195
|
+
if len(text) > max_chars:
|
|
196
|
+
text = text[:max_chars] + "\n\n[... truncated ...]"
|
|
197
|
+
return text
|
|
198
|
+
except (OSError, UnicodeDecodeError):
|
|
199
|
+
return None
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
# Manifest files to always include (entry points, features, deps)
|
|
203
|
+
MANIFEST_FILES = ["pyproject.toml", "Cargo.toml", "package.json"]
|
|
204
|
+
MANIFEST_MAX_CHARS = 4_000
|
|
205
|
+
|
|
206
|
+
# Default workspace rule files when workspace_rules_include is empty
|
|
207
|
+
DEFAULT_WORKSPACE_RULES = [
|
|
208
|
+
"user-core.mdc",
|
|
209
|
+
"user-output-structure.mdc",
|
|
210
|
+
"hygiene.mdc",
|
|
211
|
+
"docs.mdc",
|
|
212
|
+
]
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def files_to_prompt(
|
|
216
|
+
repo: Path,
|
|
217
|
+
tracked: list[str],
|
|
218
|
+
max_readme_chars: int = 15_000,
|
|
219
|
+
max_impl_files: int = 20,
|
|
220
|
+
max_impl_chars_per_file: int = 8_000,
|
|
221
|
+
max_test_files: int = 15,
|
|
222
|
+
max_test_chars_per_file: int = 5_000,
|
|
223
|
+
max_rules_chars: int = 10_000,
|
|
224
|
+
include_rules: bool = True,
|
|
225
|
+
workspace_rules_path: Path | None = None,
|
|
226
|
+
workspace_rules_include: list[str] | None = None,
|
|
227
|
+
max_workspace_rules_chars: int = 15_000,
|
|
228
|
+
scope_files: set[str] | None = None,
|
|
229
|
+
max_total_chars: int | None = None,
|
|
230
|
+
) -> tuple[str, int]:
|
|
231
|
+
"""Aggregate README, impl, tests, and optional rules into a prompt string.
|
|
232
|
+
|
|
233
|
+
workspace_rules_path: optional path to parent/workspace .cursor/rules.
|
|
234
|
+
workspace_rules_include: filenames to include; if None/empty, use DEFAULT_WORKSPACE_RULES.
|
|
235
|
+
max_workspace_rules_chars: cap for workspace rules section.
|
|
236
|
+
scope_files: when set, only include these paths (manifests + README always included).
|
|
237
|
+
max_total_chars: when set, stop adding sections once total exceeds this (evict tests first, then impl).
|
|
238
|
+
|
|
239
|
+
Returns (prompt_text, total_char_count).
|
|
240
|
+
"""
|
|
241
|
+
def in_scope(rel: str) -> bool:
|
|
242
|
+
if scope_files is None:
|
|
243
|
+
return True
|
|
244
|
+
return rel in scope_files
|
|
245
|
+
|
|
246
|
+
def would_exceed(add: int) -> bool:
|
|
247
|
+
if max_total_chars is None:
|
|
248
|
+
return False
|
|
249
|
+
return total + add > max_total_chars
|
|
250
|
+
|
|
251
|
+
parts: list[str] = []
|
|
252
|
+
total = 0
|
|
253
|
+
|
|
254
|
+
# 0. Manifests (entry points, features, deps — reduces false positives)
|
|
255
|
+
for rel in MANIFEST_FILES:
|
|
256
|
+
if rel not in tracked:
|
|
257
|
+
continue
|
|
258
|
+
fp = repo / rel
|
|
259
|
+
if fp.is_file():
|
|
260
|
+
text = _read_file_safe(fp, MANIFEST_MAX_CHARS)
|
|
261
|
+
if text:
|
|
262
|
+
parts.append(f"## Manifest: {rel}\n\n{text}")
|
|
263
|
+
total += len(text)
|
|
264
|
+
|
|
265
|
+
# 1. README (always include when in scope)
|
|
266
|
+
readme_paths = [p for p in tracked if _is_readme(p) and in_scope(p)]
|
|
267
|
+
for rel in readme_paths[:3]: # At most 3 readme-like files
|
|
268
|
+
if would_exceed(max_readme_chars):
|
|
269
|
+
break
|
|
270
|
+
fp = repo / rel
|
|
271
|
+
if fp.is_file():
|
|
272
|
+
text = _read_file_safe(fp, max_readme_chars)
|
|
273
|
+
if text:
|
|
274
|
+
parts.append(f"## README: {rel}\n\n{text}")
|
|
275
|
+
total += len(text)
|
|
276
|
+
|
|
277
|
+
# 2. Implementation files
|
|
278
|
+
impl_paths = sorted([p for p in tracked if _is_impl_file(p) and in_scope(p)])[:max_impl_files]
|
|
279
|
+
for rel in impl_paths:
|
|
280
|
+
fp = repo / rel
|
|
281
|
+
if fp.is_file():
|
|
282
|
+
text = _read_file_safe(fp, max_impl_chars_per_file)
|
|
283
|
+
if text and not would_exceed(len(text)):
|
|
284
|
+
parts.append(f"## Implementation: {rel}\n\n{text}")
|
|
285
|
+
total += len(text)
|
|
286
|
+
elif would_exceed(0):
|
|
287
|
+
break
|
|
288
|
+
|
|
289
|
+
# 3. Test files (evicted first when near limit)
|
|
290
|
+
test_paths = sorted([p for p in tracked if _is_test_file(p) and in_scope(p)])[:max_test_files]
|
|
291
|
+
for rel in test_paths:
|
|
292
|
+
if would_exceed(max_test_chars_per_file):
|
|
293
|
+
break
|
|
294
|
+
fp = repo / rel
|
|
295
|
+
if fp.is_file():
|
|
296
|
+
text = _read_file_safe(fp, max_test_chars_per_file)
|
|
297
|
+
if text:
|
|
298
|
+
parts.append(f"## Test: {rel}\n\n{text}")
|
|
299
|
+
total += len(text)
|
|
300
|
+
|
|
301
|
+
# 4. Per-repo rules (from tracked files)
|
|
302
|
+
if include_rules and not would_exceed(max_rules_chars):
|
|
303
|
+
rules_paths = [p for p in tracked if _is_rules_file(p) and in_scope(p)]
|
|
304
|
+
rules_text: list[str] = []
|
|
305
|
+
rules_chars = 0
|
|
306
|
+
for rel in rules_paths:
|
|
307
|
+
if rules_chars >= max_rules_chars:
|
|
308
|
+
break
|
|
309
|
+
fp = repo / rel
|
|
310
|
+
if fp.is_file():
|
|
311
|
+
text = _read_file_safe(fp, max_rules_chars - rules_chars)
|
|
312
|
+
if text:
|
|
313
|
+
rules_text.append(f"### {rel}\n\n{text}")
|
|
314
|
+
rules_chars += len(text)
|
|
315
|
+
if rules_text:
|
|
316
|
+
parts.append("## Project Rules (repo-local)\n\n" + "\n\n".join(rules_text))
|
|
317
|
+
total += rules_chars
|
|
318
|
+
|
|
319
|
+
# 5. Workspace rules (opportunistic: when path exists and repo is under it)
|
|
320
|
+
if workspace_rules_path and not would_exceed(max_workspace_rules_chars):
|
|
321
|
+
wr_path = Path(workspace_rules_path).expanduser().resolve()
|
|
322
|
+
if wr_path.is_dir():
|
|
323
|
+
include = workspace_rules_include or DEFAULT_WORKSPACE_RULES
|
|
324
|
+
ws_rules_text: list[str] = []
|
|
325
|
+
ws_chars = 0
|
|
326
|
+
for fname in include:
|
|
327
|
+
if ws_chars >= max_workspace_rules_chars:
|
|
328
|
+
break
|
|
329
|
+
fp = wr_path / fname
|
|
330
|
+
if fp.is_file():
|
|
331
|
+
text = _read_file_safe(fp, max_workspace_rules_chars - ws_chars)
|
|
332
|
+
if text:
|
|
333
|
+
ws_rules_text.append(f"### {fname}\n\n{text}")
|
|
334
|
+
ws_chars += len(text)
|
|
335
|
+
if ws_rules_text:
|
|
336
|
+
parts.append(
|
|
337
|
+
"## Workspace Rules (shared)\n\n" + "\n\n".join(ws_rules_text)
|
|
338
|
+
)
|
|
339
|
+
total += ws_chars
|
|
340
|
+
|
|
341
|
+
header = f"# Project: {repo.name}\n\nPath: {repo}\n\n"
|
|
342
|
+
prompt = header + "\n\n---\n\n".join(parts)
|
|
343
|
+
return prompt, total + len(header)
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def _try_parse_json(text: str) -> list[FlauditFinding] | None:
|
|
347
|
+
"""Attempt to parse text as findings JSON. Returns None on failure."""
|
|
348
|
+
try:
|
|
349
|
+
# Handle markdown code block (```json or ``` json or bare ```)
|
|
350
|
+
if "```" in text:
|
|
351
|
+
parts = text.split("```")
|
|
352
|
+
# Find the first non-empty block after a fence opener
|
|
353
|
+
for i in range(1, len(parts)):
|
|
354
|
+
candidate = parts[i].strip()
|
|
355
|
+
# Strip optional language tag (json, JSON, etc.)
|
|
356
|
+
if candidate.lower().startswith("json"):
|
|
357
|
+
candidate = candidate[4:].strip()
|
|
358
|
+
if candidate:
|
|
359
|
+
text = candidate
|
|
360
|
+
break
|
|
361
|
+
data = json.loads(text)
|
|
362
|
+
if isinstance(data, list):
|
|
363
|
+
items = data
|
|
364
|
+
else:
|
|
365
|
+
items = data.get("findings", data.get("findings_list", []))
|
|
366
|
+
out: list[FlauditFinding] = []
|
|
367
|
+
for item in items:
|
|
368
|
+
if isinstance(item, dict):
|
|
369
|
+
out.append(
|
|
370
|
+
FlauditFinding(
|
|
371
|
+
severity=str(item.get("severity", "medium")).lower(),
|
|
372
|
+
category=str(item.get("category", "other")),
|
|
373
|
+
description=str(item.get("description", "")),
|
|
374
|
+
file_ref=item.get("file_ref") or item.get("file"),
|
|
375
|
+
suggestion=item.get("suggestion"),
|
|
376
|
+
rule_ref=item.get("rule_ref") or item.get("rule"),
|
|
377
|
+
)
|
|
378
|
+
)
|
|
379
|
+
return out
|
|
380
|
+
except (json.JSONDecodeError, KeyError, TypeError):
|
|
381
|
+
return None
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def _parse_llm_findings(content: str) -> list[FlauditFinding]:
|
|
385
|
+
"""Parse LLM JSON response into FlauditFinding list. Retries once on parse failure."""
|
|
386
|
+
findings: list[FlauditFinding] = []
|
|
387
|
+
raw = content
|
|
388
|
+
|
|
389
|
+
result = _try_parse_json(raw)
|
|
390
|
+
if result is not None:
|
|
391
|
+
return result
|
|
392
|
+
# Retry: common JSON repair (trailing comma)
|
|
393
|
+
repaired = raw.replace(", ]", "]").replace(", }", "}")
|
|
394
|
+
result = _try_parse_json(repaired)
|
|
395
|
+
if result is not None:
|
|
396
|
+
return result
|
|
397
|
+
# Retry: truncation repair — find last complete {...} object in the array, close JSON.
|
|
398
|
+
# Handles both {"findings": [...]} and bare [...] formats.
|
|
399
|
+
result = _try_truncation_repair(raw)
|
|
400
|
+
if result is not None:
|
|
401
|
+
logger.info("flaudit parse recovered %d findings from truncated JSON", len(result))
|
|
402
|
+
return result
|
|
403
|
+
logger.warning(
|
|
404
|
+
"flaudit parse failed; raw response (truncated): %s",
|
|
405
|
+
(raw[:500] + "..." if len(raw) > 500 else raw),
|
|
406
|
+
)
|
|
407
|
+
return findings
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def _find_array_start(raw: str) -> tuple[int, str] | None:
|
|
411
|
+
"""Find the start of the findings array and the suffix needed to close the JSON.
|
|
412
|
+
|
|
413
|
+
Returns (array_start_index, closing_suffix) or None.
|
|
414
|
+
"""
|
|
415
|
+
# {"findings": [... => suffix = "]}"
|
|
416
|
+
if '"findings"' in raw:
|
|
417
|
+
start = raw.find("[", raw.find('"findings"'))
|
|
418
|
+
if start >= 0:
|
|
419
|
+
return start, "]}"
|
|
420
|
+
if '"findings_list"' in raw:
|
|
421
|
+
start = raw.find("[", raw.find('"findings_list"'))
|
|
422
|
+
if start >= 0:
|
|
423
|
+
return start, "]}"
|
|
424
|
+
# Bare list: [... => suffix = "]"
|
|
425
|
+
stripped = raw.lstrip()
|
|
426
|
+
if stripped.startswith("["):
|
|
427
|
+
return raw.index("["), "]"
|
|
428
|
+
return None
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def _try_truncation_repair(raw: str) -> list[FlauditFinding] | None:
|
|
432
|
+
"""Attempt to recover findings from truncated JSON by closing at the last complete object."""
|
|
433
|
+
loc = _find_array_start(raw)
|
|
434
|
+
if loc is None:
|
|
435
|
+
return None
|
|
436
|
+
start, suffix = loc
|
|
437
|
+
try:
|
|
438
|
+
depth = 0
|
|
439
|
+
last_close = -1
|
|
440
|
+
i = start + 1
|
|
441
|
+
in_string = False
|
|
442
|
+
escape = False
|
|
443
|
+
quote = None
|
|
444
|
+
while i < len(raw):
|
|
445
|
+
c = raw[i]
|
|
446
|
+
if in_string:
|
|
447
|
+
if escape:
|
|
448
|
+
escape = False
|
|
449
|
+
elif c == "\\":
|
|
450
|
+
escape = True
|
|
451
|
+
elif c == quote:
|
|
452
|
+
in_string = False
|
|
453
|
+
i += 1
|
|
454
|
+
continue
|
|
455
|
+
if c in ('"', "'"):
|
|
456
|
+
in_string = True
|
|
457
|
+
quote = c
|
|
458
|
+
elif c == "{":
|
|
459
|
+
depth += 1
|
|
460
|
+
elif c == "}":
|
|
461
|
+
depth -= 1
|
|
462
|
+
if depth == 0:
|
|
463
|
+
last_close = i
|
|
464
|
+
i += 1
|
|
465
|
+
if last_close > 0:
|
|
466
|
+
repaired = raw[: last_close + 1] + suffix
|
|
467
|
+
return _try_parse_json(repaired)
|
|
468
|
+
except Exception:
|
|
469
|
+
pass
|
|
470
|
+
return None
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
def scan_project_flaudit(
|
|
474
|
+
dev_root: Path,
|
|
475
|
+
k_recent: int = 5,
|
|
476
|
+
max_depth: int = 2,
|
|
477
|
+
model_id: str = "google/gemini-2.5-flash",
|
|
478
|
+
settings=None,
|
|
479
|
+
max_prompt_chars: int = 120_000,
|
|
480
|
+
include_rules: bool = True,
|
|
481
|
+
exclude_repo_globs: list[str] | None = None,
|
|
482
|
+
workspace_rules_path: str | Path | None = None,
|
|
483
|
+
workspace_rules_include: list[str] | None = None,
|
|
484
|
+
max_workspace_rules_chars: int = 15_000,
|
|
485
|
+
severity_guidance: str | None = None,
|
|
486
|
+
depth_0_skip_prefixes: list[str] | None = None,
|
|
487
|
+
depth_0_allow_names: list[str] | None = None,
|
|
488
|
+
scope_recent_commits: int | None = None,
|
|
489
|
+
public_repo_names: list[str] | None = None,
|
|
490
|
+
stricter_public_prompt: bool = True,
|
|
491
|
+
) -> tuple[list[ProjectFlauditResult], dict]:
|
|
492
|
+
"""Run flaudit on k most recently edited projects, or only on named public repos.
|
|
493
|
+
|
|
494
|
+
When public_repo_names is non-empty, only those repos (by dir name) are analyzed
|
|
495
|
+
and stricter_public_prompt is used. Otherwise k_recent applies. Returns (results, metadata).
|
|
496
|
+
"""
|
|
497
|
+
exclude_globs = exclude_repo_globs or [
|
|
498
|
+
"*/_trash/*", "*/_scratch/*", "*/_external/*", "*/_archive/*", "*/_forks/*",
|
|
499
|
+
]
|
|
500
|
+
repos = _discover_git_repos(
|
|
501
|
+
dev_root,
|
|
502
|
+
max_depth=max_depth,
|
|
503
|
+
depth_0_skip_prefixes=depth_0_skip_prefixes,
|
|
504
|
+
depth_0_allow_names=depth_0_allow_names,
|
|
505
|
+
)
|
|
506
|
+
|
|
507
|
+
# Filter by exclude globs
|
|
508
|
+
def excluded(r: Path) -> bool:
|
|
509
|
+
rel = str(r.relative_to(dev_root)) if r.is_relative_to(dev_root) else str(r)
|
|
510
|
+
for g in exclude_globs:
|
|
511
|
+
if fnmatch.fnmatch(rel, g) or fnmatch.fnmatch(str(r), f"*{g}"):
|
|
512
|
+
return True
|
|
513
|
+
return False
|
|
514
|
+
|
|
515
|
+
repos = [r for r in repos if not excluded(r)]
|
|
516
|
+
|
|
517
|
+
if public_repo_names:
|
|
518
|
+
name_set = {n.strip().lower() for n in public_repo_names if n.strip()}
|
|
519
|
+
selected = [r for r in repos if r.name.lower() in name_set][:30]
|
|
520
|
+
public_repo_mode = stricter_public_prompt
|
|
521
|
+
else:
|
|
522
|
+
with_times = [(r, _repo_last_commit_time(r)) for r in repos]
|
|
523
|
+
with_times.sort(key=lambda x: x[1], reverse=True)
|
|
524
|
+
selected = [r for r, _ in with_times[:k_recent]]
|
|
525
|
+
public_repo_mode = False
|
|
526
|
+
|
|
527
|
+
results: list[ProjectFlauditResult] = []
|
|
528
|
+
llm_service = None
|
|
529
|
+
if settings and getattr(settings, "openrouter_api_key", None):
|
|
530
|
+
from devguard.llm_service import LLMService
|
|
531
|
+
llm_service = LLMService(settings)
|
|
532
|
+
|
|
533
|
+
# Phase 1: build prompts (CPU/IO-bound git work, no async needed).
|
|
534
|
+
pending: list[tuple[Path, str, int]] = [] # (repo, prompt, char_count)
|
|
535
|
+
for repo in selected:
|
|
536
|
+
tracked = _git_ls_files(repo)
|
|
537
|
+
if not tracked:
|
|
538
|
+
results.append(
|
|
539
|
+
ProjectFlauditResult(repo_path=str(repo), prompt_char_count=0, error="no tracked files")
|
|
540
|
+
)
|
|
541
|
+
continue
|
|
542
|
+
|
|
543
|
+
scope_files: set[str] | None = None
|
|
544
|
+
if scope_recent_commits and scope_recent_commits > 0:
|
|
545
|
+
recent = _git_files_changed_last_n(repo, scope_recent_commits)
|
|
546
|
+
always = {p for p in tracked if p in MANIFEST_FILES or _is_readme(p)}
|
|
547
|
+
scope_files = recent | always
|
|
548
|
+
|
|
549
|
+
wr_path: Path | None = None
|
|
550
|
+
if workspace_rules_path:
|
|
551
|
+
wr_path = Path(workspace_rules_path).expanduser().resolve()
|
|
552
|
+
if not wr_path.is_dir():
|
|
553
|
+
wr_path = None
|
|
554
|
+
|
|
555
|
+
prompt, char_count = files_to_prompt(
|
|
556
|
+
repo,
|
|
557
|
+
tracked,
|
|
558
|
+
include_rules=include_rules,
|
|
559
|
+
workspace_rules_path=wr_path,
|
|
560
|
+
workspace_rules_include=workspace_rules_include,
|
|
561
|
+
max_workspace_rules_chars=max_workspace_rules_chars,
|
|
562
|
+
scope_files=scope_files,
|
|
563
|
+
max_total_chars=max_prompt_chars,
|
|
564
|
+
)
|
|
565
|
+
if char_count > max_prompt_chars:
|
|
566
|
+
prompt = prompt[:max_prompt_chars] + "\n\n[... prompt truncated ...]"
|
|
567
|
+
char_count = max_prompt_chars
|
|
568
|
+
|
|
569
|
+
if not llm_service:
|
|
570
|
+
results.append(
|
|
571
|
+
ProjectFlauditResult(
|
|
572
|
+
repo_path=str(repo),
|
|
573
|
+
prompt_char_count=char_count,
|
|
574
|
+
error="OPENROUTER_API_KEY not set; skipping LLM analysis",
|
|
575
|
+
)
|
|
576
|
+
)
|
|
577
|
+
continue
|
|
578
|
+
|
|
579
|
+
pending.append((repo, prompt, char_count))
|
|
580
|
+
|
|
581
|
+
# Phase 2: send all LLM calls concurrently in a single event loop.
|
|
582
|
+
if pending and llm_service:
|
|
583
|
+
async def _run_all() -> list[tuple[Path, int, str | Exception]]:
|
|
584
|
+
"""Fire all LLM calls concurrently; return (repo, char_count, raw_response | Exception)."""
|
|
585
|
+
async def _one(repo: Path, prompt: str, cc: int) -> tuple[Path, int, str | Exception]:
|
|
586
|
+
try:
|
|
587
|
+
raw = await llm_service.analyze_project_flaudit(
|
|
588
|
+
prompt,
|
|
589
|
+
model_id=model_id,
|
|
590
|
+
severity_guidance=severity_guidance,
|
|
591
|
+
public_repo_mode=public_repo_mode,
|
|
592
|
+
)
|
|
593
|
+
return repo, cc, raw
|
|
594
|
+
except Exception as e:
|
|
595
|
+
return repo, cc, e
|
|
596
|
+
|
|
597
|
+
return await asyncio.gather(*[_one(r, p, c) for r, p, c in pending])
|
|
598
|
+
|
|
599
|
+
llm_results = asyncio.run(_run_all())
|
|
600
|
+
for repo, char_count, raw_or_err in llm_results:
|
|
601
|
+
if isinstance(raw_or_err, Exception):
|
|
602
|
+
results.append(
|
|
603
|
+
ProjectFlauditResult(repo_path=str(repo), prompt_char_count=char_count, error=str(raw_or_err))
|
|
604
|
+
)
|
|
605
|
+
else:
|
|
606
|
+
findings = _parse_llm_findings(raw_or_err)
|
|
607
|
+
results.append(
|
|
608
|
+
ProjectFlauditResult(repo_path=str(repo), prompt_char_count=char_count, findings=findings)
|
|
609
|
+
)
|
|
610
|
+
|
|
611
|
+
meta = {
|
|
612
|
+
"generated_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
|
613
|
+
"dev_root": str(dev_root.expanduser()),
|
|
614
|
+
"repos_scanned": len(selected),
|
|
615
|
+
"k_recent": k_recent,
|
|
616
|
+
"model_id": model_id,
|
|
617
|
+
}
|
|
618
|
+
return results, meta
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
def write_report(path: Path, results: list[ProjectFlauditResult], meta: dict) -> None:
|
|
622
|
+
"""Write flaudit report to JSON."""
|
|
623
|
+
payload = {
|
|
624
|
+
**meta,
|
|
625
|
+
"results": [
|
|
626
|
+
{
|
|
627
|
+
"repo_path": r.repo_path,
|
|
628
|
+
"prompt_char_count": r.prompt_char_count,
|
|
629
|
+
"findings": [asdict(f) for f in r.findings],
|
|
630
|
+
"error": r.error,
|
|
631
|
+
}
|
|
632
|
+
for r in results
|
|
633
|
+
],
|
|
634
|
+
}
|
|
635
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
636
|
+
path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|