gwc-pybundle 2.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwc-pybundle might be problematic. Click here for more details.
- gwc_pybundle-2.1.2.dist-info/METADATA +903 -0
- gwc_pybundle-2.1.2.dist-info/RECORD +82 -0
- gwc_pybundle-2.1.2.dist-info/WHEEL +5 -0
- gwc_pybundle-2.1.2.dist-info/entry_points.txt +2 -0
- gwc_pybundle-2.1.2.dist-info/licenses/LICENSE.md +25 -0
- gwc_pybundle-2.1.2.dist-info/top_level.txt +1 -0
- pybundle/__init__.py +0 -0
- pybundle/__main__.py +4 -0
- pybundle/cli.py +546 -0
- pybundle/context.py +404 -0
- pybundle/doctor.py +148 -0
- pybundle/filters.py +228 -0
- pybundle/manifest.py +77 -0
- pybundle/packaging.py +45 -0
- pybundle/policy.py +132 -0
- pybundle/profiles.py +454 -0
- pybundle/roadmap_model.py +42 -0
- pybundle/roadmap_scan.py +328 -0
- pybundle/root_detect.py +14 -0
- pybundle/runner.py +180 -0
- pybundle/steps/__init__.py +26 -0
- pybundle/steps/ai_context.py +791 -0
- pybundle/steps/api_docs.py +219 -0
- pybundle/steps/asyncio_analysis.py +358 -0
- pybundle/steps/bandit.py +72 -0
- pybundle/steps/base.py +20 -0
- pybundle/steps/blocking_call_detection.py +291 -0
- pybundle/steps/call_graph.py +219 -0
- pybundle/steps/compileall.py +76 -0
- pybundle/steps/config_docs.py +319 -0
- pybundle/steps/config_validation.py +302 -0
- pybundle/steps/container_image.py +294 -0
- pybundle/steps/context_expand.py +272 -0
- pybundle/steps/copy_pack.py +293 -0
- pybundle/steps/coverage.py +101 -0
- pybundle/steps/cprofile_step.py +166 -0
- pybundle/steps/dependency_sizes.py +136 -0
- pybundle/steps/django_checks.py +214 -0
- pybundle/steps/dockerfile_lint.py +282 -0
- pybundle/steps/dockerignore.py +311 -0
- pybundle/steps/duplication.py +103 -0
- pybundle/steps/env_completeness.py +269 -0
- pybundle/steps/env_var_usage.py +253 -0
- pybundle/steps/error_refs.py +204 -0
- pybundle/steps/event_loop_patterns.py +280 -0
- pybundle/steps/exception_patterns.py +190 -0
- pybundle/steps/fastapi_integration.py +250 -0
- pybundle/steps/flask_debugging.py +312 -0
- pybundle/steps/git_analytics.py +315 -0
- pybundle/steps/handoff_md.py +176 -0
- pybundle/steps/import_time.py +175 -0
- pybundle/steps/interrogate.py +106 -0
- pybundle/steps/license_scan.py +96 -0
- pybundle/steps/line_profiler.py +117 -0
- pybundle/steps/link_validation.py +287 -0
- pybundle/steps/logging_analysis.py +233 -0
- pybundle/steps/memory_profile.py +176 -0
- pybundle/steps/migration_history.py +336 -0
- pybundle/steps/mutation_testing.py +141 -0
- pybundle/steps/mypy.py +103 -0
- pybundle/steps/orm_optimization.py +316 -0
- pybundle/steps/pip_audit.py +45 -0
- pybundle/steps/pipdeptree.py +62 -0
- pybundle/steps/pylance.py +562 -0
- pybundle/steps/pytest.py +66 -0
- pybundle/steps/query_pattern_analysis.py +334 -0
- pybundle/steps/radon.py +161 -0
- pybundle/steps/repro_md.py +161 -0
- pybundle/steps/rg_scans.py +78 -0
- pybundle/steps/roadmap.py +153 -0
- pybundle/steps/ruff.py +117 -0
- pybundle/steps/secrets_detection.py +235 -0
- pybundle/steps/security_headers.py +309 -0
- pybundle/steps/shell.py +74 -0
- pybundle/steps/slow_tests.py +178 -0
- pybundle/steps/sqlalchemy_validation.py +269 -0
- pybundle/steps/test_flakiness.py +184 -0
- pybundle/steps/tree.py +116 -0
- pybundle/steps/type_coverage.py +277 -0
- pybundle/steps/unused_deps.py +211 -0
- pybundle/steps/vulture.py +167 -0
- pybundle/tools.py +63 -0
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import subprocess # nosec B404 - Required for tool execution, paths validated
|
|
4
|
+
import time
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
from .base import StepResult
|
|
8
|
+
from ..context import BundleContext
|
|
9
|
+
from ..tools import which
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class RipgrepScanStep:
|
|
14
|
+
name: str
|
|
15
|
+
pattern: str
|
|
16
|
+
outfile: str
|
|
17
|
+
target: str = "." # directory or file
|
|
18
|
+
extra_args: list[str] | None = None
|
|
19
|
+
|
|
20
|
+
def run(self, ctx: BundleContext) -> StepResult:
|
|
21
|
+
start = time.time()
|
|
22
|
+
out = ctx.workdir / self.outfile
|
|
23
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
24
|
+
|
|
25
|
+
rg = which("rg")
|
|
26
|
+
if not rg:
|
|
27
|
+
out.write_text(
|
|
28
|
+
"rg (ripgrep) not found; skipping (install ripgrep)\n", encoding="utf-8"
|
|
29
|
+
)
|
|
30
|
+
return StepResult(self.name, "SKIP", 0, "missing rg")
|
|
31
|
+
|
|
32
|
+
args = self.extra_args or []
|
|
33
|
+
# -n line numbers, --no-heading keeps it grep-like, -S smart case can be handy
|
|
34
|
+
cmd = [rg, "-n", "--no-heading", "-S", *args, self.pattern, self.target]
|
|
35
|
+
header = f"## PWD: {ctx.root}\n## CMD: {' '.join(cmd)}\n\n"
|
|
36
|
+
|
|
37
|
+
cp = subprocess.run( # nosec B603
|
|
38
|
+
cmd, cwd=str(ctx.root), text=True, capture_output=True, check=False
|
|
39
|
+
)
|
|
40
|
+
# rg exit codes:
|
|
41
|
+
# 0 = matches found
|
|
42
|
+
# 1 = no matches found (not an error!)
|
|
43
|
+
# 2 = actual error
|
|
44
|
+
text = header + (cp.stdout or "") + ("\n" + cp.stderr if cp.stderr else "")
|
|
45
|
+
out.write_text(ctx.redact_text(text), encoding="utf-8")
|
|
46
|
+
|
|
47
|
+
dur = int(time.time() - start)
|
|
48
|
+
note = ""
|
|
49
|
+
if cp.returncode == 2:
|
|
50
|
+
note = "rg error (exit=2) recorded"
|
|
51
|
+
elif cp.returncode == 1:
|
|
52
|
+
note = "no matches"
|
|
53
|
+
|
|
54
|
+
# Always PASS; we’re collecting info, not enforcing policy (yet).
|
|
55
|
+
return StepResult(self.name, "PASS", dur, note)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def default_rg_steps(target: str = ".") -> list[RipgrepScanStep]:
|
|
59
|
+
return [
|
|
60
|
+
RipgrepScanStep(
|
|
61
|
+
name="rg TODO/FIXME/HACK",
|
|
62
|
+
pattern=r"TODO|FIXME|HACK",
|
|
63
|
+
outfile="logs/40_rg_todos.txt",
|
|
64
|
+
target=target,
|
|
65
|
+
),
|
|
66
|
+
RipgrepScanStep(
|
|
67
|
+
name="rg print(",
|
|
68
|
+
pattern=r"^\s*print\(",
|
|
69
|
+
outfile="logs/41_rg_prints.txt",
|
|
70
|
+
target=target,
|
|
71
|
+
),
|
|
72
|
+
RipgrepScanStep(
|
|
73
|
+
name="rg except patterns",
|
|
74
|
+
pattern=r"except\s+Exception|except\s*:",
|
|
75
|
+
outfile="logs/42_rg_bare_excepts.txt",
|
|
76
|
+
target=target,
|
|
77
|
+
),
|
|
78
|
+
]
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import time
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any, Protocol
|
|
7
|
+
|
|
8
|
+
from .base import StepResult
|
|
9
|
+
from ..context import BundleContext
|
|
10
|
+
from ..policy import AIContextPolicy
|
|
11
|
+
from ..roadmap_scan import build_roadmap # keep only if you actually use it
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class RoadmapGraph(Protocol):
|
|
15
|
+
entrypoints: list[Any]
|
|
16
|
+
nodes: list[Any]
|
|
17
|
+
edges: list[Any]
|
|
18
|
+
stats: dict[str, Any]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class RoadmapStep:
|
|
23
|
+
name: str = "roadmap (project map)"
|
|
24
|
+
out_md: str = "meta/70_roadmap.md"
|
|
25
|
+
out_json: str = "meta/70_roadmap.json"
|
|
26
|
+
include: list[str] | None = None
|
|
27
|
+
policy: AIContextPolicy | None = None
|
|
28
|
+
|
|
29
|
+
def run(self, ctx: BundleContext) -> StepResult:
|
|
30
|
+
start = time.time()
|
|
31
|
+
|
|
32
|
+
policy = self.policy or AIContextPolicy()
|
|
33
|
+
|
|
34
|
+
# Include dirs: explicit override wins; otherwise policy candidates (with fallback)
|
|
35
|
+
if self.include:
|
|
36
|
+
include_dirs = [
|
|
37
|
+
ctx.root / p for p in self.include if (ctx.root / p).exists()
|
|
38
|
+
]
|
|
39
|
+
if not include_dirs:
|
|
40
|
+
include_dirs = [ctx.root]
|
|
41
|
+
else:
|
|
42
|
+
include_dirs = policy.include_dir_candidates(
|
|
43
|
+
ctx.root
|
|
44
|
+
) # includes fallback to [root]
|
|
45
|
+
|
|
46
|
+
exclude_dirs = set(policy.exclude_dirs)
|
|
47
|
+
|
|
48
|
+
graph = build_roadmap(
|
|
49
|
+
root=ctx.root,
|
|
50
|
+
include_dirs=include_dirs,
|
|
51
|
+
exclude_dirs=exclude_dirs,
|
|
52
|
+
max_files=policy.roadmap_max_files,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# JSON
|
|
56
|
+
out_json_path = ctx.workdir / self.out_json
|
|
57
|
+
out_json_path.parent.mkdir(parents=True, exist_ok=True)
|
|
58
|
+
out_json_path.write_text(
|
|
59
|
+
json.dumps(graph.to_dict(), indent=2), encoding="utf-8"
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# Markdown (policy-driven Mermaid knobs)
|
|
63
|
+
out_md_path = ctx.workdir / self.out_md
|
|
64
|
+
out_md_path.parent.mkdir(parents=True, exist_ok=True)
|
|
65
|
+
out_md_path.write_text(self._render_md(graph, policy), encoding="utf-8")
|
|
66
|
+
|
|
67
|
+
langs = sorted({n.lang for n in graph.nodes if getattr(n, "lang", None)})
|
|
68
|
+
summary = {
|
|
69
|
+
"languages": langs,
|
|
70
|
+
"entrypoints": [ep.node for ep in graph.entrypoints[:50]],
|
|
71
|
+
"stats": graph.stats,
|
|
72
|
+
}
|
|
73
|
+
(ctx.workdir / "meta" / "71_roadmap_summary.json").write_text(
|
|
74
|
+
json.dumps(summary, indent=2), encoding="utf-8"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
dur = int(time.time() - start)
|
|
78
|
+
note = f"nodes={len(graph.nodes)} edges={len(graph.edges)} entrypoints={len(graph.entrypoints)}"
|
|
79
|
+
return StepResult(self.name, "PASS", dur, note)
|
|
80
|
+
|
|
81
|
+
def _render_md(self, graph: RoadmapGraph, policy: AIContextPolicy) -> str:
|
|
82
|
+
depth = policy.roadmap_mermaid_depth
|
|
83
|
+
max_edges = policy.roadmap_mermaid_max_edges
|
|
84
|
+
|
|
85
|
+
lines: list[str] = []
|
|
86
|
+
lines.append("# Project Roadmap")
|
|
87
|
+
lines.append("")
|
|
88
|
+
lines.append("## Entrypoints")
|
|
89
|
+
if not graph.entrypoints:
|
|
90
|
+
lines.append("- (none detected)")
|
|
91
|
+
else:
|
|
92
|
+
for ep in graph.entrypoints[:50]:
|
|
93
|
+
lines.append(
|
|
94
|
+
f"- `{ep.node}` — {ep.reason} (confidence {ep.confidence}/3)"
|
|
95
|
+
)
|
|
96
|
+
lines.append("")
|
|
97
|
+
lines.append("## High-level map")
|
|
98
|
+
lines.append("```mermaid")
|
|
99
|
+
lines.append("flowchart LR")
|
|
100
|
+
lines.extend(
|
|
101
|
+
self._render_mermaid_bfs(graph, max_depth=depth, max_edges=max_edges)
|
|
102
|
+
)
|
|
103
|
+
lines.append("```")
|
|
104
|
+
lines.append("")
|
|
105
|
+
lines.append("## Stats")
|
|
106
|
+
for k in sorted(graph.stats.keys()):
|
|
107
|
+
lines.append(f"- **{k}**: {graph.stats[k]}")
|
|
108
|
+
lines.append("")
|
|
109
|
+
lines.append("## Notes")
|
|
110
|
+
lines.append(
|
|
111
|
+
"- Destinations like `py:...`, `js:...`, `rs:...` are dependency specs (not resolved to paths yet)."
|
|
112
|
+
)
|
|
113
|
+
lines.append(
|
|
114
|
+
"- This is designed to be deterministic and readable, not a perfect compiler-grade call graph."
|
|
115
|
+
)
|
|
116
|
+
lines.append("")
|
|
117
|
+
return "\n".join(lines)
|
|
118
|
+
|
|
119
|
+
def _render_mermaid_bfs(
|
|
120
|
+
self, graph: RoadmapGraph, max_depth: int = 2, max_edges: int = 180
|
|
121
|
+
) -> list[str]:
|
|
122
|
+
from collections import deque
|
|
123
|
+
|
|
124
|
+
adj: dict[str, list[str]] = {}
|
|
125
|
+
for e in graph.edges:
|
|
126
|
+
adj.setdefault(e.src, []).append(e.dst)
|
|
127
|
+
|
|
128
|
+
entry = [ep.node for ep in graph.entrypoints]
|
|
129
|
+
if not entry:
|
|
130
|
+
return [' A["(no entrypoints)"]']
|
|
131
|
+
|
|
132
|
+
q = deque([(n, 0) for n in entry])
|
|
133
|
+
seen_edges: set[tuple[str, str]] = set()
|
|
134
|
+
shown: list[str] = []
|
|
135
|
+
seen_nodes: set[str] = set(entry)
|
|
136
|
+
|
|
137
|
+
while q and len(shown) < max_edges:
|
|
138
|
+
node, depth = q.popleft()
|
|
139
|
+
if depth >= max_depth:
|
|
140
|
+
continue
|
|
141
|
+
for dst in adj.get(node, []):
|
|
142
|
+
key = (node, dst)
|
|
143
|
+
if key in seen_edges:
|
|
144
|
+
continue
|
|
145
|
+
seen_edges.add(key)
|
|
146
|
+
shown.append(f' "{node}" --> "{dst}"')
|
|
147
|
+
if dst not in seen_nodes:
|
|
148
|
+
seen_nodes.add(dst)
|
|
149
|
+
q.append((dst, depth + 1))
|
|
150
|
+
if len(shown) >= max_edges:
|
|
151
|
+
break
|
|
152
|
+
|
|
153
|
+
return shown or [' A["(no edges rendered)"]']
|
pybundle/steps/ruff.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import subprocess # nosec B404 - Required for tool execution, paths validated
|
|
4
|
+
import time
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from .base import StepResult
|
|
9
|
+
from ..context import BundleContext
|
|
10
|
+
from ..tools import which
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _repo_has_py_files(root: Path) -> bool:
|
|
14
|
+
# Fast-ish heuristic: look for any .py file in top couple levels
|
|
15
|
+
# (Avoid walking deep trees; ruff itself can handle it.)
|
|
16
|
+
for p in root.rglob("*.py"):
|
|
17
|
+
# ignore common junk dirs
|
|
18
|
+
parts = set(p.parts)
|
|
19
|
+
if (
|
|
20
|
+
".venv" in parts
|
|
21
|
+
or "__pycache__" in parts
|
|
22
|
+
or ".mypy_cache" in parts
|
|
23
|
+
or ".ruff_cache" in parts
|
|
24
|
+
):
|
|
25
|
+
continue
|
|
26
|
+
if (
|
|
27
|
+
"node_modules" in parts
|
|
28
|
+
or "dist" in parts
|
|
29
|
+
or "build" in parts
|
|
30
|
+
or "artifacts" in parts
|
|
31
|
+
):
|
|
32
|
+
continue
|
|
33
|
+
return True
|
|
34
|
+
return False
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class RuffCheckStep:
|
|
39
|
+
name: str = "ruff check"
|
|
40
|
+
target: str = "."
|
|
41
|
+
outfile: str = "logs/31_ruff_check.txt"
|
|
42
|
+
|
|
43
|
+
def run(self, ctx: BundleContext) -> StepResult:
|
|
44
|
+
start = time.time()
|
|
45
|
+
out = ctx.workdir / self.outfile
|
|
46
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
47
|
+
|
|
48
|
+
ruff = which("ruff")
|
|
49
|
+
if not ruff:
|
|
50
|
+
out.write_text(
|
|
51
|
+
"ruff not found; skipping (pip install ruff)\n", encoding="utf-8"
|
|
52
|
+
)
|
|
53
|
+
return StepResult(self.name, "SKIP", 0, "missing ruff")
|
|
54
|
+
|
|
55
|
+
if not _repo_has_py_files(ctx.root):
|
|
56
|
+
out.write_text(
|
|
57
|
+
"no .py files detected; skipping ruff check\n", encoding="utf-8"
|
|
58
|
+
)
|
|
59
|
+
return StepResult(self.name, "SKIP", 0, "no python files")
|
|
60
|
+
|
|
61
|
+
cmd = [ruff, "check", self.target]
|
|
62
|
+
header = f"## PWD: {ctx.root}\n## CMD: {' '.join(cmd)}\n\n"
|
|
63
|
+
|
|
64
|
+
cp = subprocess.run( # nosec B603
|
|
65
|
+
cmd, cwd=str(ctx.root), text=True, capture_output=True, check=False
|
|
66
|
+
)
|
|
67
|
+
text = header + (cp.stdout or "") + ("\n" + cp.stderr if cp.stderr else "")
|
|
68
|
+
out.write_text(ctx.redact_text(text), encoding="utf-8")
|
|
69
|
+
|
|
70
|
+
dur = int(time.time() - start)
|
|
71
|
+
# ruff nonzero = lint failures; that’s *valuable*, but for bundling we record it.
|
|
72
|
+
note = "" if cp.returncode == 0 else f"exit={cp.returncode} (lint findings)"
|
|
73
|
+
return StepResult(self.name, "PASS", dur, note)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@dataclass
|
|
77
|
+
class RuffFormatCheckStep:
|
|
78
|
+
name: str = "ruff format --check"
|
|
79
|
+
target: str = "."
|
|
80
|
+
outfile: str = "logs/32_ruff_format_check.txt"
|
|
81
|
+
|
|
82
|
+
def run(self, ctx: BundleContext) -> StepResult:
|
|
83
|
+
start = time.time()
|
|
84
|
+
out = ctx.workdir / self.outfile
|
|
85
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
86
|
+
|
|
87
|
+
ruff = which("ruff")
|
|
88
|
+
if not ruff:
|
|
89
|
+
out.write_text(
|
|
90
|
+
"ruff not found; skipping (pip install ruff)\n", encoding="utf-8"
|
|
91
|
+
)
|
|
92
|
+
return StepResult(self.name, "SKIP", 0, "missing ruff")
|
|
93
|
+
|
|
94
|
+
if not _repo_has_py_files(ctx.root):
|
|
95
|
+
out.write_text(
|
|
96
|
+
"no .py files detected; skipping ruff format check\n", encoding="utf-8"
|
|
97
|
+
)
|
|
98
|
+
return StepResult(self.name, "SKIP", 0, "no python files")
|
|
99
|
+
|
|
100
|
+
cmd = [ruff, "format", "--check", self.target]
|
|
101
|
+
header = f"## PWD: {ctx.root}\n## CMD: {' '.join(cmd)}\n\n"
|
|
102
|
+
|
|
103
|
+
cp = subprocess.run( # nosec B603
|
|
104
|
+
cmd, cwd=str(ctx.root), text=True, capture_output=True, check=False
|
|
105
|
+
)
|
|
106
|
+
text = header + (cp.stdout or "") + ("\n" + cp.stderr if cp.stderr else "")
|
|
107
|
+
out.write_text(ctx.redact_text(text), encoding="utf-8")
|
|
108
|
+
|
|
109
|
+
dur = int(time.time() - start)
|
|
110
|
+
|
|
111
|
+
# Exit code 0 = formatted correctly, non-zero = needs formatting
|
|
112
|
+
if cp.returncode == 0:
|
|
113
|
+
return StepResult(self.name, "PASS", dur, "")
|
|
114
|
+
else:
|
|
115
|
+
# Format drift should be WARN, not PASS - it's actionable
|
|
116
|
+
note = f"exit={cp.returncode} (format drift detected)"
|
|
117
|
+
return StepResult(self.name, "WARN", dur, note)
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Step: Enhanced Secrets Detection
|
|
3
|
+
Advanced secrets detection using entropy analysis and patterns.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import re
|
|
7
|
+
import math
|
|
8
|
+
import json
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Dict, List, Set, Tuple, Optional
|
|
11
|
+
|
|
12
|
+
from .base import Step, StepResult
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SecretsDetectionStep(Step):
|
|
16
|
+
"""Detect secrets in codebase using entropy and regex patterns."""
|
|
17
|
+
|
|
18
|
+
name = "secrets detection"
|
|
19
|
+
|
|
20
|
+
# Common secret patterns
|
|
21
|
+
SECRET_PATTERNS = {
|
|
22
|
+
"AWS_KEY_ID": r"AKIA[0-9A-Z]{16}",
|
|
23
|
+
"AWS_SECRET": r"aws_secret_access_key[\"']?\s*[:=]\s*[\"']([^\"'\n]+)[\"']",
|
|
24
|
+
"GITHUB_TOKEN": r"gh[pousr]_[A-Za-z0-9_]{36,255}",
|
|
25
|
+
"PRIVATE_KEY": r"-----BEGIN (RSA|DSA|EC|OPENSSH) PRIVATE KEY-----",
|
|
26
|
+
"API_KEY": r"api[_-]?key[\"']?\s*[:=]\s*[\"']([^\"'\n]+)[\"']",
|
|
27
|
+
"DATABASE_PASSWORD": r"(?:password|passwd)[\"']?\s*[:=]\s*[\"']([^\"'\n]+)[\"']",
|
|
28
|
+
"JWT_SECRET": r"jwt[_-]?secret[\"']?\s*[:=]\s*[\"']([^\"'\n]+)[\"']",
|
|
29
|
+
"SLACK_TOKEN": r"xox[baprs]-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24,32}",
|
|
30
|
+
"STRIPE_KEY": r"sk_live_[0-9a-zA-Z]{24}",
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
def run(self, ctx: "BundleContext") -> StepResult: # type: ignore[name-defined]
|
|
34
|
+
"""Detect secrets in codebase."""
|
|
35
|
+
import time
|
|
36
|
+
|
|
37
|
+
start = time.time()
|
|
38
|
+
|
|
39
|
+
root = ctx.root
|
|
40
|
+
|
|
41
|
+
# Scan for secrets
|
|
42
|
+
secrets_found = self._scan_for_secrets(root)
|
|
43
|
+
|
|
44
|
+
# Generate report
|
|
45
|
+
lines = [
|
|
46
|
+
"=" * 80,
|
|
47
|
+
"SECRETS DETECTION REPORT",
|
|
48
|
+
"=" * 80,
|
|
49
|
+
"",
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
if secrets_found["matches"]:
|
|
53
|
+
lines.append("⚠ POTENTIAL SECRETS DETECTED (pattern-based)")
|
|
54
|
+
lines.append("")
|
|
55
|
+
|
|
56
|
+
for file_path, details in secrets_found["matches"].items():
|
|
57
|
+
lines.append(f"File: {file_path}")
|
|
58
|
+
|
|
59
|
+
for issue in details:
|
|
60
|
+
lines.append(f" Line {issue['line']}: {issue['type']}")
|
|
61
|
+
if issue.get("context"):
|
|
62
|
+
context_line = issue["context"].strip()
|
|
63
|
+
if len(context_line) > 70:
|
|
64
|
+
context_line = context_line[:67] + "..."
|
|
65
|
+
lines.append(f" Context: {context_line}")
|
|
66
|
+
if issue.get("entropy"):
|
|
67
|
+
lines.append(f" Entropy: {issue['entropy']:.2f}")
|
|
68
|
+
|
|
69
|
+
lines.append("")
|
|
70
|
+
|
|
71
|
+
lines.append(f"Total files with potential secrets: {len(secrets_found['matches'])}")
|
|
72
|
+
lines.append(f"Total potential secrets: {secrets_found['total_matches']}")
|
|
73
|
+
else:
|
|
74
|
+
lines.append("✓ No pattern-based secrets detected")
|
|
75
|
+
lines.append(" (API keys, AWS keys, GitHub tokens, private keys, etc.)")
|
|
76
|
+
|
|
77
|
+
lines.extend(
|
|
78
|
+
[
|
|
79
|
+
"",
|
|
80
|
+
"=" * 80,
|
|
81
|
+
"ENTROPY ANALYSIS",
|
|
82
|
+
"=" * 80,
|
|
83
|
+
"",
|
|
84
|
+
]
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# High entropy strings analysis (LIMITED OUTPUT)
|
|
88
|
+
high_entropy = secrets_found.get("high_entropy", [])
|
|
89
|
+
if high_entropy:
|
|
90
|
+
lines.append(f"Found {len(high_entropy)} high-entropy strings (may include hashes, tokens, UUIDs):")
|
|
91
|
+
lines.append("")
|
|
92
|
+
lines.append("NOTE: High-entropy detection produces many false positives.")
|
|
93
|
+
lines.append(" Focus on pattern-based findings above for actual secrets.")
|
|
94
|
+
lines.append("")
|
|
95
|
+
|
|
96
|
+
# Show only top 10 highest-entropy, not all
|
|
97
|
+
display_count = min(10, len(high_entropy))
|
|
98
|
+
for item in high_entropy[:display_count]:
|
|
99
|
+
lines.append(f" File: {item['file']}")
|
|
100
|
+
lines.append(f" Line: {item['line']}")
|
|
101
|
+
lines.append(f" Entropy: {item['entropy']:.3f}")
|
|
102
|
+
if item.get("context"):
|
|
103
|
+
context = item["context"].strip()
|
|
104
|
+
if len(context) > 60:
|
|
105
|
+
context = context[:57] + "..."
|
|
106
|
+
lines.append(f" Value preview: {context}")
|
|
107
|
+
lines.append("")
|
|
108
|
+
|
|
109
|
+
if len(high_entropy) > display_count:
|
|
110
|
+
lines.append(f" ... and {len(high_entropy) - display_count} more (suppressed for readability)")
|
|
111
|
+
lines.append(f" Run with --deep-scan to see full entropy analysis")
|
|
112
|
+
lines.append("")
|
|
113
|
+
|
|
114
|
+
else:
|
|
115
|
+
lines.append("✓ No high-entropy strings detected")
|
|
116
|
+
lines.append("")
|
|
117
|
+
|
|
118
|
+
# Recommendations
|
|
119
|
+
lines.extend(
|
|
120
|
+
[
|
|
121
|
+
"=" * 80,
|
|
122
|
+
"RECOMMENDATIONS",
|
|
123
|
+
"=" * 80,
|
|
124
|
+
"",
|
|
125
|
+
]
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
if secrets_found["matches"] or high_entropy:
|
|
129
|
+
lines.append(" - Review and rotate any exposed secrets immediately")
|
|
130
|
+
lines.append(" - Use a secrets manager (AWS Secrets Manager, HashiCorp Vault)")
|
|
131
|
+
lines.append(" - Configure git hooks to prevent committing secrets")
|
|
132
|
+
lines.append(" - Use .gitignore to exclude .env and secrets files")
|
|
133
|
+
lines.append(" - Consider using detect-secrets or similar tools in CI/CD")
|
|
134
|
+
|
|
135
|
+
else:
|
|
136
|
+
lines.append(" - ✓ Good security practice: no obvious secrets detected")
|
|
137
|
+
lines.append(" - Continue to use secrets management best practices")
|
|
138
|
+
lines.append(" - Store sensitive data in .env or secrets manager")
|
|
139
|
+
|
|
140
|
+
lines.append("")
|
|
141
|
+
|
|
142
|
+
# Write report
|
|
143
|
+
output = "\n".join(lines)
|
|
144
|
+
dest = ctx.workdir / "logs" / "121_secrets_advanced.txt"
|
|
145
|
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
146
|
+
dest.write_text(output, encoding="utf-8")
|
|
147
|
+
|
|
148
|
+
elapsed = int(time.time() - start)
|
|
149
|
+
return StepResult(self.name, "OK", elapsed, "")
|
|
150
|
+
|
|
151
|
+
def _scan_for_secrets(self, root: Path) -> Dict:
|
|
152
|
+
"""Scan files for secrets using patterns and entropy analysis."""
|
|
153
|
+
matches = {}
|
|
154
|
+
high_entropy = []
|
|
155
|
+
total_matches = 0
|
|
156
|
+
|
|
157
|
+
python_files = list(root.rglob("*.py")) + list(root.rglob("*.json")) + list(
|
|
158
|
+
root.rglob("*.yaml")
|
|
159
|
+
) + list(root.rglob("*.yml"))
|
|
160
|
+
|
|
161
|
+
for py_file in python_files:
|
|
162
|
+
# Skip venv, cache, and dependency directories (PROJECT SCOPE ONLY)
|
|
163
|
+
if any(
|
|
164
|
+
part in py_file.parts
|
|
165
|
+
for part in [
|
|
166
|
+
"venv", ".venv", "env", "__pycache__", "site-packages",
|
|
167
|
+
".mypy_cache", ".pytest_cache", ".ruff_cache", ".freeze-venv",
|
|
168
|
+
"node_modules", "dist", "build", "target"
|
|
169
|
+
]
|
|
170
|
+
):
|
|
171
|
+
continue
|
|
172
|
+
|
|
173
|
+
try:
|
|
174
|
+
source = py_file.read_text(encoding="utf-8", errors="ignore")
|
|
175
|
+
rel_path = str(py_file.relative_to(root))
|
|
176
|
+
|
|
177
|
+
file_matches = []
|
|
178
|
+
|
|
179
|
+
# Check against secret patterns
|
|
180
|
+
for line_num, line in enumerate(source.split("\n"), 1):
|
|
181
|
+
for secret_type, pattern in self.SECRET_PATTERNS.items():
|
|
182
|
+
if re.search(pattern, line, re.IGNORECASE):
|
|
183
|
+
file_matches.append(
|
|
184
|
+
{
|
|
185
|
+
"line": line_num,
|
|
186
|
+
"type": secret_type,
|
|
187
|
+
"context": line,
|
|
188
|
+
}
|
|
189
|
+
)
|
|
190
|
+
total_matches += 1
|
|
191
|
+
|
|
192
|
+
# Check entropy of quoted strings
|
|
193
|
+
string_pattern = r'["\']([A-Za-z0-9_\-\.]{20,})["\']'
|
|
194
|
+
for line_num, line in enumerate(source.split("\n"), 1):
|
|
195
|
+
for match in re.finditer(string_pattern, line):
|
|
196
|
+
string_val = match.group(1)
|
|
197
|
+
entropy = self._calculate_entropy(string_val)
|
|
198
|
+
|
|
199
|
+
# Flag high entropy strings (likely encrypted or random)
|
|
200
|
+
if entropy > 4.0:
|
|
201
|
+
high_entropy.append(
|
|
202
|
+
{
|
|
203
|
+
"file": rel_path,
|
|
204
|
+
"line": line_num,
|
|
205
|
+
"entropy": entropy,
|
|
206
|
+
"context": string_val,
|
|
207
|
+
}
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
if file_matches:
|
|
211
|
+
matches[rel_path] = file_matches
|
|
212
|
+
|
|
213
|
+
except (OSError, UnicodeDecodeError):
|
|
214
|
+
continue
|
|
215
|
+
|
|
216
|
+
# Sort high entropy by entropy score
|
|
217
|
+
high_entropy.sort(key=lambda x: x["entropy"], reverse=True)
|
|
218
|
+
|
|
219
|
+
return {
|
|
220
|
+
"matches": matches,
|
|
221
|
+
"total_matches": total_matches,
|
|
222
|
+
"high_entropy": high_entropy,
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
def _calculate_entropy(self, s: str) -> float:
|
|
226
|
+
"""Calculate Shannon entropy of a string."""
|
|
227
|
+
if not s:
|
|
228
|
+
return 0.0
|
|
229
|
+
|
|
230
|
+
entropy = 0.0
|
|
231
|
+
for byte in set(s):
|
|
232
|
+
freq = s.count(byte) / len(s)
|
|
233
|
+
entropy -= freq * math.log2(freq)
|
|
234
|
+
|
|
235
|
+
return entropy
|