gwc-pybundle 2.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwc-pybundle might be problematic. Click here for more details.

Files changed (82) hide show
  1. gwc_pybundle-2.1.2.dist-info/METADATA +903 -0
  2. gwc_pybundle-2.1.2.dist-info/RECORD +82 -0
  3. gwc_pybundle-2.1.2.dist-info/WHEEL +5 -0
  4. gwc_pybundle-2.1.2.dist-info/entry_points.txt +2 -0
  5. gwc_pybundle-2.1.2.dist-info/licenses/LICENSE.md +25 -0
  6. gwc_pybundle-2.1.2.dist-info/top_level.txt +1 -0
  7. pybundle/__init__.py +0 -0
  8. pybundle/__main__.py +4 -0
  9. pybundle/cli.py +546 -0
  10. pybundle/context.py +404 -0
  11. pybundle/doctor.py +148 -0
  12. pybundle/filters.py +228 -0
  13. pybundle/manifest.py +77 -0
  14. pybundle/packaging.py +45 -0
  15. pybundle/policy.py +132 -0
  16. pybundle/profiles.py +454 -0
  17. pybundle/roadmap_model.py +42 -0
  18. pybundle/roadmap_scan.py +328 -0
  19. pybundle/root_detect.py +14 -0
  20. pybundle/runner.py +180 -0
  21. pybundle/steps/__init__.py +26 -0
  22. pybundle/steps/ai_context.py +791 -0
  23. pybundle/steps/api_docs.py +219 -0
  24. pybundle/steps/asyncio_analysis.py +358 -0
  25. pybundle/steps/bandit.py +72 -0
  26. pybundle/steps/base.py +20 -0
  27. pybundle/steps/blocking_call_detection.py +291 -0
  28. pybundle/steps/call_graph.py +219 -0
  29. pybundle/steps/compileall.py +76 -0
  30. pybundle/steps/config_docs.py +319 -0
  31. pybundle/steps/config_validation.py +302 -0
  32. pybundle/steps/container_image.py +294 -0
  33. pybundle/steps/context_expand.py +272 -0
  34. pybundle/steps/copy_pack.py +293 -0
  35. pybundle/steps/coverage.py +101 -0
  36. pybundle/steps/cprofile_step.py +166 -0
  37. pybundle/steps/dependency_sizes.py +136 -0
  38. pybundle/steps/django_checks.py +214 -0
  39. pybundle/steps/dockerfile_lint.py +282 -0
  40. pybundle/steps/dockerignore.py +311 -0
  41. pybundle/steps/duplication.py +103 -0
  42. pybundle/steps/env_completeness.py +269 -0
  43. pybundle/steps/env_var_usage.py +253 -0
  44. pybundle/steps/error_refs.py +204 -0
  45. pybundle/steps/event_loop_patterns.py +280 -0
  46. pybundle/steps/exception_patterns.py +190 -0
  47. pybundle/steps/fastapi_integration.py +250 -0
  48. pybundle/steps/flask_debugging.py +312 -0
  49. pybundle/steps/git_analytics.py +315 -0
  50. pybundle/steps/handoff_md.py +176 -0
  51. pybundle/steps/import_time.py +175 -0
  52. pybundle/steps/interrogate.py +106 -0
  53. pybundle/steps/license_scan.py +96 -0
  54. pybundle/steps/line_profiler.py +117 -0
  55. pybundle/steps/link_validation.py +287 -0
  56. pybundle/steps/logging_analysis.py +233 -0
  57. pybundle/steps/memory_profile.py +176 -0
  58. pybundle/steps/migration_history.py +336 -0
  59. pybundle/steps/mutation_testing.py +141 -0
  60. pybundle/steps/mypy.py +103 -0
  61. pybundle/steps/orm_optimization.py +316 -0
  62. pybundle/steps/pip_audit.py +45 -0
  63. pybundle/steps/pipdeptree.py +62 -0
  64. pybundle/steps/pylance.py +562 -0
  65. pybundle/steps/pytest.py +66 -0
  66. pybundle/steps/query_pattern_analysis.py +334 -0
  67. pybundle/steps/radon.py +161 -0
  68. pybundle/steps/repro_md.py +161 -0
  69. pybundle/steps/rg_scans.py +78 -0
  70. pybundle/steps/roadmap.py +153 -0
  71. pybundle/steps/ruff.py +117 -0
  72. pybundle/steps/secrets_detection.py +235 -0
  73. pybundle/steps/security_headers.py +309 -0
  74. pybundle/steps/shell.py +74 -0
  75. pybundle/steps/slow_tests.py +178 -0
  76. pybundle/steps/sqlalchemy_validation.py +269 -0
  77. pybundle/steps/test_flakiness.py +184 -0
  78. pybundle/steps/tree.py +116 -0
  79. pybundle/steps/type_coverage.py +277 -0
  80. pybundle/steps/unused_deps.py +211 -0
  81. pybundle/steps/vulture.py +167 -0
  82. pybundle/tools.py +63 -0
@@ -0,0 +1,78 @@
1
+ from __future__ import annotations
2
+
3
+ import subprocess # nosec B404 - Required for tool execution, paths validated
4
+ import time
5
+ from dataclasses import dataclass
6
+
7
+ from .base import StepResult
8
+ from ..context import BundleContext
9
+ from ..tools import which
10
+
11
+
12
+ @dataclass
13
+ class RipgrepScanStep:
14
+ name: str
15
+ pattern: str
16
+ outfile: str
17
+ target: str = "." # directory or file
18
+ extra_args: list[str] | None = None
19
+
20
+ def run(self, ctx: BundleContext) -> StepResult:
21
+ start = time.time()
22
+ out = ctx.workdir / self.outfile
23
+ out.parent.mkdir(parents=True, exist_ok=True)
24
+
25
+ rg = which("rg")
26
+ if not rg:
27
+ out.write_text(
28
+ "rg (ripgrep) not found; skipping (install ripgrep)\n", encoding="utf-8"
29
+ )
30
+ return StepResult(self.name, "SKIP", 0, "missing rg")
31
+
32
+ args = self.extra_args or []
33
+ # -n line numbers, --no-heading keeps it grep-like, -S smart case can be handy
34
+ cmd = [rg, "-n", "--no-heading", "-S", *args, self.pattern, self.target]
35
+ header = f"## PWD: {ctx.root}\n## CMD: {' '.join(cmd)}\n\n"
36
+
37
+ cp = subprocess.run( # nosec B603
38
+ cmd, cwd=str(ctx.root), text=True, capture_output=True, check=False
39
+ )
40
+ # rg exit codes:
41
+ # 0 = matches found
42
+ # 1 = no matches found (not an error!)
43
+ # 2 = actual error
44
+ text = header + (cp.stdout or "") + ("\n" + cp.stderr if cp.stderr else "")
45
+ out.write_text(ctx.redact_text(text), encoding="utf-8")
46
+
47
+ dur = int(time.time() - start)
48
+ note = ""
49
+ if cp.returncode == 2:
50
+ note = "rg error (exit=2) recorded"
51
+ elif cp.returncode == 1:
52
+ note = "no matches"
53
+
54
+ # Always PASS; we’re collecting info, not enforcing policy (yet).
55
+ return StepResult(self.name, "PASS", dur, note)
56
+
57
+
58
+ def default_rg_steps(target: str = ".") -> list[RipgrepScanStep]:
59
+ return [
60
+ RipgrepScanStep(
61
+ name="rg TODO/FIXME/HACK",
62
+ pattern=r"TODO|FIXME|HACK",
63
+ outfile="logs/40_rg_todos.txt",
64
+ target=target,
65
+ ),
66
+ RipgrepScanStep(
67
+ name="rg print(",
68
+ pattern=r"^\s*print\(",
69
+ outfile="logs/41_rg_prints.txt",
70
+ target=target,
71
+ ),
72
+ RipgrepScanStep(
73
+ name="rg except patterns",
74
+ pattern=r"except\s+Exception|except\s*:",
75
+ outfile="logs/42_rg_bare_excepts.txt",
76
+ target=target,
77
+ ),
78
+ ]
@@ -0,0 +1,153 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import time
5
+ from dataclasses import dataclass
6
+ from typing import Any, Protocol
7
+
8
+ from .base import StepResult
9
+ from ..context import BundleContext
10
+ from ..policy import AIContextPolicy
11
+ from ..roadmap_scan import build_roadmap # keep only if you actually use it
12
+
13
+
14
+ class RoadmapGraph(Protocol):
15
+ entrypoints: list[Any]
16
+ nodes: list[Any]
17
+ edges: list[Any]
18
+ stats: dict[str, Any]
19
+
20
+
21
+ @dataclass
22
+ class RoadmapStep:
23
+ name: str = "roadmap (project map)"
24
+ out_md: str = "meta/70_roadmap.md"
25
+ out_json: str = "meta/70_roadmap.json"
26
+ include: list[str] | None = None
27
+ policy: AIContextPolicy | None = None
28
+
29
+ def run(self, ctx: BundleContext) -> StepResult:
30
+ start = time.time()
31
+
32
+ policy = self.policy or AIContextPolicy()
33
+
34
+ # Include dirs: explicit override wins; otherwise policy candidates (with fallback)
35
+ if self.include:
36
+ include_dirs = [
37
+ ctx.root / p for p in self.include if (ctx.root / p).exists()
38
+ ]
39
+ if not include_dirs:
40
+ include_dirs = [ctx.root]
41
+ else:
42
+ include_dirs = policy.include_dir_candidates(
43
+ ctx.root
44
+ ) # includes fallback to [root]
45
+
46
+ exclude_dirs = set(policy.exclude_dirs)
47
+
48
+ graph = build_roadmap(
49
+ root=ctx.root,
50
+ include_dirs=include_dirs,
51
+ exclude_dirs=exclude_dirs,
52
+ max_files=policy.roadmap_max_files,
53
+ )
54
+
55
+ # JSON
56
+ out_json_path = ctx.workdir / self.out_json
57
+ out_json_path.parent.mkdir(parents=True, exist_ok=True)
58
+ out_json_path.write_text(
59
+ json.dumps(graph.to_dict(), indent=2), encoding="utf-8"
60
+ )
61
+
62
+ # Markdown (policy-driven Mermaid knobs)
63
+ out_md_path = ctx.workdir / self.out_md
64
+ out_md_path.parent.mkdir(parents=True, exist_ok=True)
65
+ out_md_path.write_text(self._render_md(graph, policy), encoding="utf-8")
66
+
67
+ langs = sorted({n.lang for n in graph.nodes if getattr(n, "lang", None)})
68
+ summary = {
69
+ "languages": langs,
70
+ "entrypoints": [ep.node for ep in graph.entrypoints[:50]],
71
+ "stats": graph.stats,
72
+ }
73
+ (ctx.workdir / "meta" / "71_roadmap_summary.json").write_text(
74
+ json.dumps(summary, indent=2), encoding="utf-8"
75
+ )
76
+
77
+ dur = int(time.time() - start)
78
+ note = f"nodes={len(graph.nodes)} edges={len(graph.edges)} entrypoints={len(graph.entrypoints)}"
79
+ return StepResult(self.name, "PASS", dur, note)
80
+
81
+ def _render_md(self, graph: RoadmapGraph, policy: AIContextPolicy) -> str:
82
+ depth = policy.roadmap_mermaid_depth
83
+ max_edges = policy.roadmap_mermaid_max_edges
84
+
85
+ lines: list[str] = []
86
+ lines.append("# Project Roadmap")
87
+ lines.append("")
88
+ lines.append("## Entrypoints")
89
+ if not graph.entrypoints:
90
+ lines.append("- (none detected)")
91
+ else:
92
+ for ep in graph.entrypoints[:50]:
93
+ lines.append(
94
+ f"- `{ep.node}` — {ep.reason} (confidence {ep.confidence}/3)"
95
+ )
96
+ lines.append("")
97
+ lines.append("## High-level map")
98
+ lines.append("```mermaid")
99
+ lines.append("flowchart LR")
100
+ lines.extend(
101
+ self._render_mermaid_bfs(graph, max_depth=depth, max_edges=max_edges)
102
+ )
103
+ lines.append("```")
104
+ lines.append("")
105
+ lines.append("## Stats")
106
+ for k in sorted(graph.stats.keys()):
107
+ lines.append(f"- **{k}**: {graph.stats[k]}")
108
+ lines.append("")
109
+ lines.append("## Notes")
110
+ lines.append(
111
+ "- Destinations like `py:...`, `js:...`, `rs:...` are dependency specs (not resolved to paths yet)."
112
+ )
113
+ lines.append(
114
+ "- This is designed to be deterministic and readable, not a perfect compiler-grade call graph."
115
+ )
116
+ lines.append("")
117
+ return "\n".join(lines)
118
+
119
+ def _render_mermaid_bfs(
120
+ self, graph: RoadmapGraph, max_depth: int = 2, max_edges: int = 180
121
+ ) -> list[str]:
122
+ from collections import deque
123
+
124
+ adj: dict[str, list[str]] = {}
125
+ for e in graph.edges:
126
+ adj.setdefault(e.src, []).append(e.dst)
127
+
128
+ entry = [ep.node for ep in graph.entrypoints]
129
+ if not entry:
130
+ return [' A["(no entrypoints)"]']
131
+
132
+ q = deque([(n, 0) for n in entry])
133
+ seen_edges: set[tuple[str, str]] = set()
134
+ shown: list[str] = []
135
+ seen_nodes: set[str] = set(entry)
136
+
137
+ while q and len(shown) < max_edges:
138
+ node, depth = q.popleft()
139
+ if depth >= max_depth:
140
+ continue
141
+ for dst in adj.get(node, []):
142
+ key = (node, dst)
143
+ if key in seen_edges:
144
+ continue
145
+ seen_edges.add(key)
146
+ shown.append(f' "{node}" --> "{dst}"')
147
+ if dst not in seen_nodes:
148
+ seen_nodes.add(dst)
149
+ q.append((dst, depth + 1))
150
+ if len(shown) >= max_edges:
151
+ break
152
+
153
+ return shown or [' A["(no edges rendered)"]']
pybundle/steps/ruff.py ADDED
@@ -0,0 +1,117 @@
1
+ from __future__ import annotations
2
+
3
+ import subprocess # nosec B404 - Required for tool execution, paths validated
4
+ import time
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+
8
+ from .base import StepResult
9
+ from ..context import BundleContext
10
+ from ..tools import which
11
+
12
+
13
+ def _repo_has_py_files(root: Path) -> bool:
14
+ # Fast-ish heuristic: look for any .py file in top couple levels
15
+ # (Avoid walking deep trees; ruff itself can handle it.)
16
+ for p in root.rglob("*.py"):
17
+ # ignore common junk dirs
18
+ parts = set(p.parts)
19
+ if (
20
+ ".venv" in parts
21
+ or "__pycache__" in parts
22
+ or ".mypy_cache" in parts
23
+ or ".ruff_cache" in parts
24
+ ):
25
+ continue
26
+ if (
27
+ "node_modules" in parts
28
+ or "dist" in parts
29
+ or "build" in parts
30
+ or "artifacts" in parts
31
+ ):
32
+ continue
33
+ return True
34
+ return False
35
+
36
+
37
+ @dataclass
38
+ class RuffCheckStep:
39
+ name: str = "ruff check"
40
+ target: str = "."
41
+ outfile: str = "logs/31_ruff_check.txt"
42
+
43
+ def run(self, ctx: BundleContext) -> StepResult:
44
+ start = time.time()
45
+ out = ctx.workdir / self.outfile
46
+ out.parent.mkdir(parents=True, exist_ok=True)
47
+
48
+ ruff = which("ruff")
49
+ if not ruff:
50
+ out.write_text(
51
+ "ruff not found; skipping (pip install ruff)\n", encoding="utf-8"
52
+ )
53
+ return StepResult(self.name, "SKIP", 0, "missing ruff")
54
+
55
+ if not _repo_has_py_files(ctx.root):
56
+ out.write_text(
57
+ "no .py files detected; skipping ruff check\n", encoding="utf-8"
58
+ )
59
+ return StepResult(self.name, "SKIP", 0, "no python files")
60
+
61
+ cmd = [ruff, "check", self.target]
62
+ header = f"## PWD: {ctx.root}\n## CMD: {' '.join(cmd)}\n\n"
63
+
64
+ cp = subprocess.run( # nosec B603
65
+ cmd, cwd=str(ctx.root), text=True, capture_output=True, check=False
66
+ )
67
+ text = header + (cp.stdout or "") + ("\n" + cp.stderr if cp.stderr else "")
68
+ out.write_text(ctx.redact_text(text), encoding="utf-8")
69
+
70
+ dur = int(time.time() - start)
71
+ # ruff nonzero = lint failures; that’s *valuable*, but for bundling we record it.
72
+ note = "" if cp.returncode == 0 else f"exit={cp.returncode} (lint findings)"
73
+ return StepResult(self.name, "PASS", dur, note)
74
+
75
+
76
+ @dataclass
77
+ class RuffFormatCheckStep:
78
+ name: str = "ruff format --check"
79
+ target: str = "."
80
+ outfile: str = "logs/32_ruff_format_check.txt"
81
+
82
+ def run(self, ctx: BundleContext) -> StepResult:
83
+ start = time.time()
84
+ out = ctx.workdir / self.outfile
85
+ out.parent.mkdir(parents=True, exist_ok=True)
86
+
87
+ ruff = which("ruff")
88
+ if not ruff:
89
+ out.write_text(
90
+ "ruff not found; skipping (pip install ruff)\n", encoding="utf-8"
91
+ )
92
+ return StepResult(self.name, "SKIP", 0, "missing ruff")
93
+
94
+ if not _repo_has_py_files(ctx.root):
95
+ out.write_text(
96
+ "no .py files detected; skipping ruff format check\n", encoding="utf-8"
97
+ )
98
+ return StepResult(self.name, "SKIP", 0, "no python files")
99
+
100
+ cmd = [ruff, "format", "--check", self.target]
101
+ header = f"## PWD: {ctx.root}\n## CMD: {' '.join(cmd)}\n\n"
102
+
103
+ cp = subprocess.run( # nosec B603
104
+ cmd, cwd=str(ctx.root), text=True, capture_output=True, check=False
105
+ )
106
+ text = header + (cp.stdout or "") + ("\n" + cp.stderr if cp.stderr else "")
107
+ out.write_text(ctx.redact_text(text), encoding="utf-8")
108
+
109
+ dur = int(time.time() - start)
110
+
111
+ # Exit code 0 = formatted correctly, non-zero = needs formatting
112
+ if cp.returncode == 0:
113
+ return StepResult(self.name, "PASS", dur, "")
114
+ else:
115
+ # Format drift should be WARN, not PASS - it's actionable
116
+ note = f"exit={cp.returncode} (format drift detected)"
117
+ return StepResult(self.name, "WARN", dur, note)
@@ -0,0 +1,235 @@
1
+ """
2
+ Step: Enhanced Secrets Detection
3
+ Advanced secrets detection using entropy analysis and patterns.
4
+ """
5
+
6
+ import re
7
+ import math
8
+ import json
9
+ from pathlib import Path
10
+ from typing import Dict, List, Set, Tuple, Optional
11
+
12
+ from .base import Step, StepResult
13
+
14
+
15
+ class SecretsDetectionStep(Step):
16
+ """Detect secrets in codebase using entropy and regex patterns."""
17
+
18
+ name = "secrets detection"
19
+
20
+ # Common secret patterns
21
+ SECRET_PATTERNS = {
22
+ "AWS_KEY_ID": r"AKIA[0-9A-Z]{16}",
23
+ "AWS_SECRET": r"aws_secret_access_key[\"']?\s*[:=]\s*[\"']([^\"'\n]+)[\"']",
24
+ "GITHUB_TOKEN": r"gh[pousr]_[A-Za-z0-9_]{36,255}",
25
+ "PRIVATE_KEY": r"-----BEGIN (RSA|DSA|EC|OPENSSH) PRIVATE KEY-----",
26
+ "API_KEY": r"api[_-]?key[\"']?\s*[:=]\s*[\"']([^\"'\n]+)[\"']",
27
+ "DATABASE_PASSWORD": r"(?:password|passwd)[\"']?\s*[:=]\s*[\"']([^\"'\n]+)[\"']",
28
+ "JWT_SECRET": r"jwt[_-]?secret[\"']?\s*[:=]\s*[\"']([^\"'\n]+)[\"']",
29
+ "SLACK_TOKEN": r"xox[baprs]-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24,32}",
30
+ "STRIPE_KEY": r"sk_live_[0-9a-zA-Z]{24}",
31
+ }
32
+
33
+ def run(self, ctx: "BundleContext") -> StepResult: # type: ignore[name-defined]
34
+ """Detect secrets in codebase."""
35
+ import time
36
+
37
+ start = time.time()
38
+
39
+ root = ctx.root
40
+
41
+ # Scan for secrets
42
+ secrets_found = self._scan_for_secrets(root)
43
+
44
+ # Generate report
45
+ lines = [
46
+ "=" * 80,
47
+ "SECRETS DETECTION REPORT",
48
+ "=" * 80,
49
+ "",
50
+ ]
51
+
52
+ if secrets_found["matches"]:
53
+ lines.append("⚠ POTENTIAL SECRETS DETECTED (pattern-based)")
54
+ lines.append("")
55
+
56
+ for file_path, details in secrets_found["matches"].items():
57
+ lines.append(f"File: {file_path}")
58
+
59
+ for issue in details:
60
+ lines.append(f" Line {issue['line']}: {issue['type']}")
61
+ if issue.get("context"):
62
+ context_line = issue["context"].strip()
63
+ if len(context_line) > 70:
64
+ context_line = context_line[:67] + "..."
65
+ lines.append(f" Context: {context_line}")
66
+ if issue.get("entropy"):
67
+ lines.append(f" Entropy: {issue['entropy']:.2f}")
68
+
69
+ lines.append("")
70
+
71
+ lines.append(f"Total files with potential secrets: {len(secrets_found['matches'])}")
72
+ lines.append(f"Total potential secrets: {secrets_found['total_matches']}")
73
+ else:
74
+ lines.append("✓ No pattern-based secrets detected")
75
+ lines.append(" (API keys, AWS keys, GitHub tokens, private keys, etc.)")
76
+
77
+ lines.extend(
78
+ [
79
+ "",
80
+ "=" * 80,
81
+ "ENTROPY ANALYSIS",
82
+ "=" * 80,
83
+ "",
84
+ ]
85
+ )
86
+
87
+ # High entropy strings analysis (LIMITED OUTPUT)
88
+ high_entropy = secrets_found.get("high_entropy", [])
89
+ if high_entropy:
90
+ lines.append(f"Found {len(high_entropy)} high-entropy strings (may include hashes, tokens, UUIDs):")
91
+ lines.append("")
92
+ lines.append("NOTE: High-entropy detection produces many false positives.")
93
+ lines.append(" Focus on pattern-based findings above for actual secrets.")
94
+ lines.append("")
95
+
96
+ # Show only top 10 highest-entropy, not all
97
+ display_count = min(10, len(high_entropy))
98
+ for item in high_entropy[:display_count]:
99
+ lines.append(f" File: {item['file']}")
100
+ lines.append(f" Line: {item['line']}")
101
+ lines.append(f" Entropy: {item['entropy']:.3f}")
102
+ if item.get("context"):
103
+ context = item["context"].strip()
104
+ if len(context) > 60:
105
+ context = context[:57] + "..."
106
+ lines.append(f" Value preview: {context}")
107
+ lines.append("")
108
+
109
+ if len(high_entropy) > display_count:
110
+ lines.append(f" ... and {len(high_entropy) - display_count} more (suppressed for readability)")
111
+ lines.append(f" Run with --deep-scan to see full entropy analysis")
112
+ lines.append("")
113
+
114
+ else:
115
+ lines.append("✓ No high-entropy strings detected")
116
+ lines.append("")
117
+
118
+ # Recommendations
119
+ lines.extend(
120
+ [
121
+ "=" * 80,
122
+ "RECOMMENDATIONS",
123
+ "=" * 80,
124
+ "",
125
+ ]
126
+ )
127
+
128
+ if secrets_found["matches"] or high_entropy:
129
+ lines.append(" - Review and rotate any exposed secrets immediately")
130
+ lines.append(" - Use a secrets manager (AWS Secrets Manager, HashiCorp Vault)")
131
+ lines.append(" - Configure git hooks to prevent committing secrets")
132
+ lines.append(" - Use .gitignore to exclude .env and secrets files")
133
+ lines.append(" - Consider using detect-secrets or similar tools in CI/CD")
134
+
135
+ else:
136
+ lines.append(" - ✓ Good security practice: no obvious secrets detected")
137
+ lines.append(" - Continue to use secrets management best practices")
138
+ lines.append(" - Store sensitive data in .env or secrets manager")
139
+
140
+ lines.append("")
141
+
142
+ # Write report
143
+ output = "\n".join(lines)
144
+ dest = ctx.workdir / "logs" / "121_secrets_advanced.txt"
145
+ dest.parent.mkdir(parents=True, exist_ok=True)
146
+ dest.write_text(output, encoding="utf-8")
147
+
148
+ elapsed = int(time.time() - start)
149
+ return StepResult(self.name, "OK", elapsed, "")
150
+
151
+ def _scan_for_secrets(self, root: Path) -> Dict:
152
+ """Scan files for secrets using patterns and entropy analysis."""
153
+ matches = {}
154
+ high_entropy = []
155
+ total_matches = 0
156
+
157
+ python_files = list(root.rglob("*.py")) + list(root.rglob("*.json")) + list(
158
+ root.rglob("*.yaml")
159
+ ) + list(root.rglob("*.yml"))
160
+
161
+ for py_file in python_files:
162
+ # Skip venv, cache, and dependency directories (PROJECT SCOPE ONLY)
163
+ if any(
164
+ part in py_file.parts
165
+ for part in [
166
+ "venv", ".venv", "env", "__pycache__", "site-packages",
167
+ ".mypy_cache", ".pytest_cache", ".ruff_cache", ".freeze-venv",
168
+ "node_modules", "dist", "build", "target"
169
+ ]
170
+ ):
171
+ continue
172
+
173
+ try:
174
+ source = py_file.read_text(encoding="utf-8", errors="ignore")
175
+ rel_path = str(py_file.relative_to(root))
176
+
177
+ file_matches = []
178
+
179
+ # Check against secret patterns
180
+ for line_num, line in enumerate(source.split("\n"), 1):
181
+ for secret_type, pattern in self.SECRET_PATTERNS.items():
182
+ if re.search(pattern, line, re.IGNORECASE):
183
+ file_matches.append(
184
+ {
185
+ "line": line_num,
186
+ "type": secret_type,
187
+ "context": line,
188
+ }
189
+ )
190
+ total_matches += 1
191
+
192
+ # Check entropy of quoted strings
193
+ string_pattern = r'["\']([A-Za-z0-9_\-\.]{20,})["\']'
194
+ for line_num, line in enumerate(source.split("\n"), 1):
195
+ for match in re.finditer(string_pattern, line):
196
+ string_val = match.group(1)
197
+ entropy = self._calculate_entropy(string_val)
198
+
199
+ # Flag high entropy strings (likely encrypted or random)
200
+ if entropy > 4.0:
201
+ high_entropy.append(
202
+ {
203
+ "file": rel_path,
204
+ "line": line_num,
205
+ "entropy": entropy,
206
+ "context": string_val,
207
+ }
208
+ )
209
+
210
+ if file_matches:
211
+ matches[rel_path] = file_matches
212
+
213
+ except (OSError, UnicodeDecodeError):
214
+ continue
215
+
216
+ # Sort high entropy by entropy score
217
+ high_entropy.sort(key=lambda x: x["entropy"], reverse=True)
218
+
219
+ return {
220
+ "matches": matches,
221
+ "total_matches": total_matches,
222
+ "high_entropy": high_entropy,
223
+ }
224
+
225
+ def _calculate_entropy(self, s: str) -> float:
226
+ """Calculate Shannon entropy of a string."""
227
+ if not s:
228
+ return 0.0
229
+
230
+ entropy = 0.0
231
+ for byte in set(s):
232
+ freq = s.count(byte) / len(s)
233
+ entropy -= freq * math.log2(freq)
234
+
235
+ return entropy