devguard 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. devguard/INTEGRATION_SUMMARY.md +121 -0
  2. devguard/__init__.py +3 -0
  3. devguard/__main__.py +6 -0
  4. devguard/checkers/__init__.py +41 -0
  5. devguard/checkers/api_usage.py +523 -0
  6. devguard/checkers/aws_cost.py +331 -0
  7. devguard/checkers/aws_iam.py +284 -0
  8. devguard/checkers/base.py +25 -0
  9. devguard/checkers/container.py +137 -0
  10. devguard/checkers/domain.py +189 -0
  11. devguard/checkers/firecrawl.py +117 -0
  12. devguard/checkers/fly.py +225 -0
  13. devguard/checkers/github.py +210 -0
  14. devguard/checkers/npm.py +327 -0
  15. devguard/checkers/npm_security.py +244 -0
  16. devguard/checkers/redteam.py +290 -0
  17. devguard/checkers/secret.py +279 -0
  18. devguard/checkers/swarm.py +376 -0
  19. devguard/checkers/tailscale.py +143 -0
  20. devguard/checkers/tailsnitch.py +303 -0
  21. devguard/checkers/tavily.py +179 -0
  22. devguard/checkers/vercel.py +192 -0
  23. devguard/cli.py +1510 -0
  24. devguard/cli_helpers.py +189 -0
  25. devguard/config.py +249 -0
  26. devguard/core.py +293 -0
  27. devguard/dashboard.py +715 -0
  28. devguard/discovery.py +363 -0
  29. devguard/http_client.py +142 -0
  30. devguard/llm_service.py +481 -0
  31. devguard/mcp_server.py +259 -0
  32. devguard/metrics.py +144 -0
  33. devguard/models.py +208 -0
  34. devguard/reporting.py +1571 -0
  35. devguard/sarif.py +295 -0
  36. devguard/scripts/ANALYSIS_SUMMARY.md +141 -0
  37. devguard/scripts/README.md +221 -0
  38. devguard/scripts/auto_fix_recommendations.py +145 -0
  39. devguard/scripts/generate_npmignore.py +175 -0
  40. devguard/scripts/generate_security_report.py +324 -0
  41. devguard/scripts/prepublish_check.sh +29 -0
  42. devguard/scripts/redteam_npm_packages.py +1262 -0
  43. devguard/scripts/review_all_repos.py +300 -0
  44. devguard/spec.py +617 -0
  45. devguard/sweeps/__init__.py +23 -0
  46. devguard/sweeps/ai_editor_config_audit.py +697 -0
  47. devguard/sweeps/cargo_publish_audit.py +655 -0
  48. devguard/sweeps/dependency_audit.py +419 -0
  49. devguard/sweeps/gitignore_audit.py +336 -0
  50. devguard/sweeps/local_dev.py +260 -0
  51. devguard/sweeps/local_dirty_worktree_secrets.py +521 -0
  52. devguard/sweeps/project_flaudit.py +636 -0
  53. devguard/sweeps/public_github_secrets.py +680 -0
  54. devguard/sweeps/publish_audit.py +478 -0
  55. devguard/sweeps/ssh_key_audit.py +327 -0
  56. devguard/utils.py +174 -0
  57. devguard-0.2.0.dist-info/METADATA +225 -0
  58. devguard-0.2.0.dist-info/RECORD +60 -0
  59. devguard-0.2.0.dist-info/WHEEL +4 -0
  60. devguard-0.2.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,636 @@
1
+ """Project flaudit sweep: files-to-prompt per project + OpenRouter/Gemini analysis.
2
+
3
+ For each project (or k most recently edited), aggregates README, implementation,
4
+ and tests into a prompt, then uses OpenRouter + Gemini to find flaws:
5
+ - README vs implementation drift
6
+ - README vs tests mismatch
7
+ - Disobedience of project/workspace rules (e.g. .cursor/rules)
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import asyncio
13
+ import fnmatch
14
+ import json
15
+ import logging
16
+ import subprocess
17
+ import time
18
+ from dataclasses import asdict, dataclass, field
19
+ from pathlib import Path
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # File patterns for files-to-prompt aggregation
24
+ README_GLOBS = ["README*", "readme*", "Readme*"]
25
+ IMPL_EXTENSIONS = {".py", ".rs", ".ts", ".tsx", ".js", ".jsx", ".go", ".java", ".kt"}
26
+ IMPL_EXCLUDE_DIRS = {
27
+ ".git", "node_modules", "target", ".venv", "venv", "__pycache__",
28
+ ".pytest_cache", ".ruff_cache", "dist", "build", ".next",
29
+ }
30
+ TEST_PATTERNS = [
31
+ "**/test_*.py", "**/tests/**/*.py", "**/*_test.py", "**/*.test.ts",
32
+ "**/*.spec.ts", "**/__tests__/**/*", "**/test/**/*",
33
+ ]
34
+ RULES_GLOBS = [".cursor/rules/**/*.mdc", ".cursor/rules/**/*.md"]
35
+
36
+
37
+ @dataclass
38
+ class FlauditFinding:
39
+ """A single flaw finding from the LLM analysis."""
40
+
41
+ severity: str # critical, high, medium, low
42
+ category: str # readme_impl_drift, readme_tests_mismatch, rules_violation, other
43
+ description: str
44
+ file_ref: str | None = None
45
+ suggestion: str | None = None
46
+ rule_ref: str | None = None # For rules_violation: which rule file (e.g. user-core.mdc)
47
+
48
+
49
+ @dataclass
50
+ class ProjectFlauditResult:
51
+ """Result of flaudit for one project."""
52
+
53
+ repo_path: str
54
+ prompt_char_count: int
55
+ findings: list[FlauditFinding] = field(default_factory=list)
56
+ error: str | None = None
57
+
58
+
59
+ def _discover_git_repos(
60
+ dev_root: Path,
61
+ max_depth: int = 2,
62
+ depth_0_skip_prefixes: list[str] | None = None,
63
+ depth_0_allow_names: list[str] | None = None,
64
+ ) -> list[Path]:
65
+ """Discover git repos under dev_root (bounded by max_depth).
66
+
67
+ depth_0_skip_prefixes: at depth 0, skip dirs whose names start with these.
68
+ depth_0_allow_names: dir names to allow despite skip_prefixes (e.g. _infra).
69
+ """
70
+ repos: list[Path] = []
71
+ dev_root = dev_root.expanduser().resolve()
72
+ if not dev_root.exists():
73
+ return repos
74
+
75
+ skip_prefixes = depth_0_skip_prefixes if depth_0_skip_prefixes is not None else ["_", "."]
76
+ allow_names = set(depth_0_allow_names if depth_0_allow_names is not None else ["_infra"])
77
+
78
+ if (dev_root / ".git").exists():
79
+ repos.append(dev_root)
80
+
81
+ frontier: list[tuple[Path, int]] = [(dev_root, 0)]
82
+ while frontier:
83
+ cur, depth = frontier.pop()
84
+ if depth >= max_depth:
85
+ continue
86
+ try:
87
+ children = list(cur.iterdir())
88
+ except (OSError, PermissionError):
89
+ continue
90
+ for child in children:
91
+ if not child.is_dir():
92
+ continue
93
+ name = child.name
94
+ if name in {".git", ".venv", "venv", "node_modules", "target", ".cache", ".pytest_cache", ".ruff_cache"}:
95
+ continue
96
+ if depth == 0 and skip_prefixes:
97
+ if any(name.startswith(p) for p in skip_prefixes) and name not in allow_names:
98
+ continue
99
+ if (child / ".git").exists():
100
+ repos.append(child)
101
+ continue
102
+ frontier.append((child, depth + 1))
103
+
104
+ seen: set[Path] = set()
105
+ out: list[Path] = []
106
+ for r in repos:
107
+ rr = r.resolve()
108
+ if rr in seen:
109
+ continue
110
+ seen.add(rr)
111
+ out.append(rr)
112
+ return out
113
+
114
+
115
+ def _git_ls_files(repo: Path) -> list[str]:
116
+ proc = subprocess.run(
117
+ ["git", "-C", str(repo), "ls-files", "-z"],
118
+ check=False,
119
+ stdout=subprocess.PIPE,
120
+ stderr=subprocess.DEVNULL,
121
+ )
122
+ if proc.returncode != 0:
123
+ return []
124
+ out = proc.stdout.decode("utf-8", errors="replace")
125
+ return [p for p in out.split("\0") if p]
126
+
127
+
128
+ def _git_files_changed_last_n(repo: Path, n: int) -> set[str]:
129
+ """Return set of file paths changed in last n commits (relative to repo root)."""
130
+ proc = subprocess.run(
131
+ ["git", "-C", str(repo), "log", "-n", str(n), "--name-only", "--format="],
132
+ check=False,
133
+ stdout=subprocess.PIPE,
134
+ stderr=subprocess.DEVNULL,
135
+ )
136
+ if proc.returncode != 0:
137
+ return set()
138
+ out = proc.stdout.decode("utf-8", errors="replace")
139
+ return {p.strip() for p in out.splitlines() if p.strip()}
140
+
141
+
142
+ def _repo_last_commit_time(repo: Path) -> float:
143
+ """Return Unix timestamp of last commit (for sorting by recency)."""
144
+ proc = subprocess.run(
145
+ ["git", "-C", str(repo), "log", "-1", "--format=%ct"],
146
+ check=False,
147
+ stdout=subprocess.PIPE,
148
+ stderr=subprocess.DEVNULL,
149
+ )
150
+ if proc.returncode != 0:
151
+ return 0.0
152
+ try:
153
+ return float(proc.stdout.decode().strip() or "0")
154
+ except ValueError:
155
+ return 0.0
156
+
157
+
158
+ def _is_test_file(rel_path: str) -> bool:
159
+ for pat in TEST_PATTERNS:
160
+ if fnmatch.fnmatch(rel_path, pat):
161
+ return True
162
+ return False
163
+
164
+
165
+ def _is_impl_file(rel_path: str) -> bool:
166
+ p = Path(rel_path)
167
+ if p.suffix.lower() not in IMPL_EXTENSIONS:
168
+ return False
169
+ parts = p.parts
170
+ if any(d in parts for d in IMPL_EXCLUDE_DIRS):
171
+ return False
172
+ if _is_test_file(rel_path):
173
+ return False
174
+ return True
175
+
176
+
177
+ def _is_readme(rel_path: str) -> bool:
178
+ name = Path(rel_path).name
179
+ for g in README_GLOBS:
180
+ if fnmatch.fnmatch(name, g):
181
+ return True
182
+ return False
183
+
184
+
185
+ def _is_rules_file(rel_path: str) -> bool:
186
+ for g in RULES_GLOBS:
187
+ if fnmatch.fnmatch(rel_path, g):
188
+ return True
189
+ return False
190
+
191
+
192
+ def _read_file_safe(path: Path, max_chars: int = 50_000) -> str | None:
193
+ try:
194
+ text = path.read_text(encoding="utf-8", errors="replace")
195
+ if len(text) > max_chars:
196
+ text = text[:max_chars] + "\n\n[... truncated ...]"
197
+ return text
198
+ except (OSError, UnicodeDecodeError):
199
+ return None
200
+
201
+
202
+ # Manifest files to always include (entry points, features, deps)
203
+ MANIFEST_FILES = ["pyproject.toml", "Cargo.toml", "package.json"]
204
+ MANIFEST_MAX_CHARS = 4_000
205
+
206
+ # Default workspace rule files when workspace_rules_include is empty
207
+ DEFAULT_WORKSPACE_RULES = [
208
+ "user-core.mdc",
209
+ "user-output-structure.mdc",
210
+ "hygiene.mdc",
211
+ "docs.mdc",
212
+ ]
213
+
214
+
215
+ def files_to_prompt(
216
+ repo: Path,
217
+ tracked: list[str],
218
+ max_readme_chars: int = 15_000,
219
+ max_impl_files: int = 20,
220
+ max_impl_chars_per_file: int = 8_000,
221
+ max_test_files: int = 15,
222
+ max_test_chars_per_file: int = 5_000,
223
+ max_rules_chars: int = 10_000,
224
+ include_rules: bool = True,
225
+ workspace_rules_path: Path | None = None,
226
+ workspace_rules_include: list[str] | None = None,
227
+ max_workspace_rules_chars: int = 15_000,
228
+ scope_files: set[str] | None = None,
229
+ max_total_chars: int | None = None,
230
+ ) -> tuple[str, int]:
231
+ """Aggregate README, impl, tests, and optional rules into a prompt string.
232
+
233
+ workspace_rules_path: optional path to parent/workspace .cursor/rules.
234
+ workspace_rules_include: filenames to include; if None/empty, use DEFAULT_WORKSPACE_RULES.
235
+ max_workspace_rules_chars: cap for workspace rules section.
236
+ scope_files: when set, only include these paths (manifests + README always included).
237
+ max_total_chars: when set, stop adding sections once total exceeds this (evict tests first, then impl).
238
+
239
+ Returns (prompt_text, total_char_count).
240
+ """
241
+ def in_scope(rel: str) -> bool:
242
+ if scope_files is None:
243
+ return True
244
+ return rel in scope_files
245
+
246
+ def would_exceed(add: int) -> bool:
247
+ if max_total_chars is None:
248
+ return False
249
+ return total + add > max_total_chars
250
+
251
+ parts: list[str] = []
252
+ total = 0
253
+
254
+ # 0. Manifests (entry points, features, deps — reduces false positives)
255
+ for rel in MANIFEST_FILES:
256
+ if rel not in tracked:
257
+ continue
258
+ fp = repo / rel
259
+ if fp.is_file():
260
+ text = _read_file_safe(fp, MANIFEST_MAX_CHARS)
261
+ if text:
262
+ parts.append(f"## Manifest: {rel}\n\n{text}")
263
+ total += len(text)
264
+
265
+ # 1. README (always include when in scope)
266
+ readme_paths = [p for p in tracked if _is_readme(p) and in_scope(p)]
267
+ for rel in readme_paths[:3]: # At most 3 readme-like files
268
+ if would_exceed(max_readme_chars):
269
+ break
270
+ fp = repo / rel
271
+ if fp.is_file():
272
+ text = _read_file_safe(fp, max_readme_chars)
273
+ if text:
274
+ parts.append(f"## README: {rel}\n\n{text}")
275
+ total += len(text)
276
+
277
+ # 2. Implementation files
278
+ impl_paths = sorted([p for p in tracked if _is_impl_file(p) and in_scope(p)])[:max_impl_files]
279
+ for rel in impl_paths:
280
+ fp = repo / rel
281
+ if fp.is_file():
282
+ text = _read_file_safe(fp, max_impl_chars_per_file)
283
+ if text and not would_exceed(len(text)):
284
+ parts.append(f"## Implementation: {rel}\n\n{text}")
285
+ total += len(text)
286
+ elif would_exceed(0):
287
+ break
288
+
289
+ # 3. Test files (evicted first when near limit)
290
+ test_paths = sorted([p for p in tracked if _is_test_file(p) and in_scope(p)])[:max_test_files]
291
+ for rel in test_paths:
292
+ if would_exceed(max_test_chars_per_file):
293
+ break
294
+ fp = repo / rel
295
+ if fp.is_file():
296
+ text = _read_file_safe(fp, max_test_chars_per_file)
297
+ if text:
298
+ parts.append(f"## Test: {rel}\n\n{text}")
299
+ total += len(text)
300
+
301
+ # 4. Per-repo rules (from tracked files)
302
+ if include_rules and not would_exceed(max_rules_chars):
303
+ rules_paths = [p for p in tracked if _is_rules_file(p) and in_scope(p)]
304
+ rules_text: list[str] = []
305
+ rules_chars = 0
306
+ for rel in rules_paths:
307
+ if rules_chars >= max_rules_chars:
308
+ break
309
+ fp = repo / rel
310
+ if fp.is_file():
311
+ text = _read_file_safe(fp, max_rules_chars - rules_chars)
312
+ if text:
313
+ rules_text.append(f"### {rel}\n\n{text}")
314
+ rules_chars += len(text)
315
+ if rules_text:
316
+ parts.append("## Project Rules (repo-local)\n\n" + "\n\n".join(rules_text))
317
+ total += rules_chars
318
+
319
+ # 5. Workspace rules (opportunistic: when path exists and repo is under it)
320
+ if workspace_rules_path and not would_exceed(max_workspace_rules_chars):
321
+ wr_path = Path(workspace_rules_path).expanduser().resolve()
322
+ if wr_path.is_dir():
323
+ include = workspace_rules_include or DEFAULT_WORKSPACE_RULES
324
+ ws_rules_text: list[str] = []
325
+ ws_chars = 0
326
+ for fname in include:
327
+ if ws_chars >= max_workspace_rules_chars:
328
+ break
329
+ fp = wr_path / fname
330
+ if fp.is_file():
331
+ text = _read_file_safe(fp, max_workspace_rules_chars - ws_chars)
332
+ if text:
333
+ ws_rules_text.append(f"### {fname}\n\n{text}")
334
+ ws_chars += len(text)
335
+ if ws_rules_text:
336
+ parts.append(
337
+ "## Workspace Rules (shared)\n\n" + "\n\n".join(ws_rules_text)
338
+ )
339
+ total += ws_chars
340
+
341
+ header = f"# Project: {repo.name}\n\nPath: {repo}\n\n"
342
+ prompt = header + "\n\n---\n\n".join(parts)
343
+ return prompt, total + len(header)
344
+
345
+
346
+ def _try_parse_json(text: str) -> list[FlauditFinding] | None:
347
+ """Attempt to parse text as findings JSON. Returns None on failure."""
348
+ try:
349
+ # Handle markdown code block (```json or ``` json or bare ```)
350
+ if "```" in text:
351
+ parts = text.split("```")
352
+ # Find the first non-empty block after a fence opener
353
+ for i in range(1, len(parts)):
354
+ candidate = parts[i].strip()
355
+ # Strip optional language tag (json, JSON, etc.)
356
+ if candidate.lower().startswith("json"):
357
+ candidate = candidate[4:].strip()
358
+ if candidate:
359
+ text = candidate
360
+ break
361
+ data = json.loads(text)
362
+ if isinstance(data, list):
363
+ items = data
364
+ else:
365
+ items = data.get("findings", data.get("findings_list", []))
366
+ out: list[FlauditFinding] = []
367
+ for item in items:
368
+ if isinstance(item, dict):
369
+ out.append(
370
+ FlauditFinding(
371
+ severity=str(item.get("severity", "medium")).lower(),
372
+ category=str(item.get("category", "other")),
373
+ description=str(item.get("description", "")),
374
+ file_ref=item.get("file_ref") or item.get("file"),
375
+ suggestion=item.get("suggestion"),
376
+ rule_ref=item.get("rule_ref") or item.get("rule"),
377
+ )
378
+ )
379
+ return out
380
+ except (json.JSONDecodeError, KeyError, TypeError):
381
+ return None
382
+
383
+
384
+ def _parse_llm_findings(content: str) -> list[FlauditFinding]:
385
+ """Parse LLM JSON response into FlauditFinding list. Retries once on parse failure."""
386
+ findings: list[FlauditFinding] = []
387
+ raw = content
388
+
389
+ result = _try_parse_json(raw)
390
+ if result is not None:
391
+ return result
392
+ # Retry: common JSON repair (trailing comma)
393
+ repaired = raw.replace(", ]", "]").replace(", }", "}")
394
+ result = _try_parse_json(repaired)
395
+ if result is not None:
396
+ return result
397
+ # Retry: truncation repair — find last complete {...} object in the array, close JSON.
398
+ # Handles both {"findings": [...]} and bare [...] formats.
399
+ result = _try_truncation_repair(raw)
400
+ if result is not None:
401
+ logger.info("flaudit parse recovered %d findings from truncated JSON", len(result))
402
+ return result
403
+ logger.warning(
404
+ "flaudit parse failed; raw response (truncated): %s",
405
+ (raw[:500] + "..." if len(raw) > 500 else raw),
406
+ )
407
+ return findings
408
+
409
+
410
+ def _find_array_start(raw: str) -> tuple[int, str] | None:
411
+ """Find the start of the findings array and the suffix needed to close the JSON.
412
+
413
+ Returns (array_start_index, closing_suffix) or None.
414
+ """
415
+ # {"findings": [... => suffix = "]}"
416
+ if '"findings"' in raw:
417
+ start = raw.find("[", raw.find('"findings"'))
418
+ if start >= 0:
419
+ return start, "]}"
420
+ if '"findings_list"' in raw:
421
+ start = raw.find("[", raw.find('"findings_list"'))
422
+ if start >= 0:
423
+ return start, "]}"
424
+ # Bare list: [... => suffix = "]"
425
+ stripped = raw.lstrip()
426
+ if stripped.startswith("["):
427
+ return raw.index("["), "]"
428
+ return None
429
+
430
+
431
+ def _try_truncation_repair(raw: str) -> list[FlauditFinding] | None:
432
+ """Attempt to recover findings from truncated JSON by closing at the last complete object."""
433
+ loc = _find_array_start(raw)
434
+ if loc is None:
435
+ return None
436
+ start, suffix = loc
437
+ try:
438
+ depth = 0
439
+ last_close = -1
440
+ i = start + 1
441
+ in_string = False
442
+ escape = False
443
+ quote = None
444
+ while i < len(raw):
445
+ c = raw[i]
446
+ if in_string:
447
+ if escape:
448
+ escape = False
449
+ elif c == "\\":
450
+ escape = True
451
+ elif c == quote:
452
+ in_string = False
453
+ i += 1
454
+ continue
455
+ if c in ('"', "'"):
456
+ in_string = True
457
+ quote = c
458
+ elif c == "{":
459
+ depth += 1
460
+ elif c == "}":
461
+ depth -= 1
462
+ if depth == 0:
463
+ last_close = i
464
+ i += 1
465
+ if last_close > 0:
466
+ repaired = raw[: last_close + 1] + suffix
467
+ return _try_parse_json(repaired)
468
+ except Exception:
469
+ pass
470
+ return None
471
+
472
+
473
+ def scan_project_flaudit(
474
+ dev_root: Path,
475
+ k_recent: int = 5,
476
+ max_depth: int = 2,
477
+ model_id: str = "google/gemini-2.5-flash",
478
+ settings=None,
479
+ max_prompt_chars: int = 120_000,
480
+ include_rules: bool = True,
481
+ exclude_repo_globs: list[str] | None = None,
482
+ workspace_rules_path: str | Path | None = None,
483
+ workspace_rules_include: list[str] | None = None,
484
+ max_workspace_rules_chars: int = 15_000,
485
+ severity_guidance: str | None = None,
486
+ depth_0_skip_prefixes: list[str] | None = None,
487
+ depth_0_allow_names: list[str] | None = None,
488
+ scope_recent_commits: int | None = None,
489
+ public_repo_names: list[str] | None = None,
490
+ stricter_public_prompt: bool = True,
491
+ ) -> tuple[list[ProjectFlauditResult], dict]:
492
+ """Run flaudit on k most recently edited projects, or only on named public repos.
493
+
494
+ When public_repo_names is non-empty, only those repos (by dir name) are analyzed
495
+ and stricter_public_prompt is used. Otherwise k_recent applies. Returns (results, metadata).
496
+ """
497
+ exclude_globs = exclude_repo_globs or [
498
+ "*/_trash/*", "*/_scratch/*", "*/_external/*", "*/_archive/*", "*/_forks/*",
499
+ ]
500
+ repos = _discover_git_repos(
501
+ dev_root,
502
+ max_depth=max_depth,
503
+ depth_0_skip_prefixes=depth_0_skip_prefixes,
504
+ depth_0_allow_names=depth_0_allow_names,
505
+ )
506
+
507
+ # Filter by exclude globs
508
+ def excluded(r: Path) -> bool:
509
+ rel = str(r.relative_to(dev_root)) if r.is_relative_to(dev_root) else str(r)
510
+ for g in exclude_globs:
511
+ if fnmatch.fnmatch(rel, g) or fnmatch.fnmatch(str(r), f"*{g}"):
512
+ return True
513
+ return False
514
+
515
+ repos = [r for r in repos if not excluded(r)]
516
+
517
+ if public_repo_names:
518
+ name_set = {n.strip().lower() for n in public_repo_names if n.strip()}
519
+ selected = [r for r in repos if r.name.lower() in name_set][:30]
520
+ public_repo_mode = stricter_public_prompt
521
+ else:
522
+ with_times = [(r, _repo_last_commit_time(r)) for r in repos]
523
+ with_times.sort(key=lambda x: x[1], reverse=True)
524
+ selected = [r for r, _ in with_times[:k_recent]]
525
+ public_repo_mode = False
526
+
527
+ results: list[ProjectFlauditResult] = []
528
+ llm_service = None
529
+ if settings and getattr(settings, "openrouter_api_key", None):
530
+ from devguard.llm_service import LLMService
531
+ llm_service = LLMService(settings)
532
+
533
+ # Phase 1: build prompts (CPU/IO-bound git work, no async needed).
534
+ pending: list[tuple[Path, str, int]] = [] # (repo, prompt, char_count)
535
+ for repo in selected:
536
+ tracked = _git_ls_files(repo)
537
+ if not tracked:
538
+ results.append(
539
+ ProjectFlauditResult(repo_path=str(repo), prompt_char_count=0, error="no tracked files")
540
+ )
541
+ continue
542
+
543
+ scope_files: set[str] | None = None
544
+ if scope_recent_commits and scope_recent_commits > 0:
545
+ recent = _git_files_changed_last_n(repo, scope_recent_commits)
546
+ always = {p for p in tracked if p in MANIFEST_FILES or _is_readme(p)}
547
+ scope_files = recent | always
548
+
549
+ wr_path: Path | None = None
550
+ if workspace_rules_path:
551
+ wr_path = Path(workspace_rules_path).expanduser().resolve()
552
+ if not wr_path.is_dir():
553
+ wr_path = None
554
+
555
+ prompt, char_count = files_to_prompt(
556
+ repo,
557
+ tracked,
558
+ include_rules=include_rules,
559
+ workspace_rules_path=wr_path,
560
+ workspace_rules_include=workspace_rules_include,
561
+ max_workspace_rules_chars=max_workspace_rules_chars,
562
+ scope_files=scope_files,
563
+ max_total_chars=max_prompt_chars,
564
+ )
565
+ if char_count > max_prompt_chars:
566
+ prompt = prompt[:max_prompt_chars] + "\n\n[... prompt truncated ...]"
567
+ char_count = max_prompt_chars
568
+
569
+ if not llm_service:
570
+ results.append(
571
+ ProjectFlauditResult(
572
+ repo_path=str(repo),
573
+ prompt_char_count=char_count,
574
+ error="OPENROUTER_API_KEY not set; skipping LLM analysis",
575
+ )
576
+ )
577
+ continue
578
+
579
+ pending.append((repo, prompt, char_count))
580
+
581
+ # Phase 2: send all LLM calls concurrently in a single event loop.
582
+ if pending and llm_service:
583
+ async def _run_all() -> list[tuple[Path, int, str | Exception]]:
584
+ """Fire all LLM calls concurrently; return (repo, char_count, raw_response | Exception)."""
585
+ async def _one(repo: Path, prompt: str, cc: int) -> tuple[Path, int, str | Exception]:
586
+ try:
587
+ raw = await llm_service.analyze_project_flaudit(
588
+ prompt,
589
+ model_id=model_id,
590
+ severity_guidance=severity_guidance,
591
+ public_repo_mode=public_repo_mode,
592
+ )
593
+ return repo, cc, raw
594
+ except Exception as e:
595
+ return repo, cc, e
596
+
597
+ return await asyncio.gather(*[_one(r, p, c) for r, p, c in pending])
598
+
599
+ llm_results = asyncio.run(_run_all())
600
+ for repo, char_count, raw_or_err in llm_results:
601
+ if isinstance(raw_or_err, Exception):
602
+ results.append(
603
+ ProjectFlauditResult(repo_path=str(repo), prompt_char_count=char_count, error=str(raw_or_err))
604
+ )
605
+ else:
606
+ findings = _parse_llm_findings(raw_or_err)
607
+ results.append(
608
+ ProjectFlauditResult(repo_path=str(repo), prompt_char_count=char_count, findings=findings)
609
+ )
610
+
611
+ meta = {
612
+ "generated_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
613
+ "dev_root": str(dev_root.expanduser()),
614
+ "repos_scanned": len(selected),
615
+ "k_recent": k_recent,
616
+ "model_id": model_id,
617
+ }
618
+ return results, meta
619
+
620
+
621
+ def write_report(path: Path, results: list[ProjectFlauditResult], meta: dict) -> None:
622
+ """Write flaudit report to JSON."""
623
+ payload = {
624
+ **meta,
625
+ "results": [
626
+ {
627
+ "repo_path": r.repo_path,
628
+ "prompt_char_count": r.prompt_char_count,
629
+ "findings": [asdict(f) for f in r.findings],
630
+ "error": r.error,
631
+ }
632
+ for r in results
633
+ ],
634
+ }
635
+ path.parent.mkdir(parents=True, exist_ok=True)
636
+ path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")