gwc-pybundle 2.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwc-pybundle might be problematic. Click here for more details.
- gwc_pybundle-2.1.2.dist-info/METADATA +903 -0
- gwc_pybundle-2.1.2.dist-info/RECORD +82 -0
- gwc_pybundle-2.1.2.dist-info/WHEEL +5 -0
- gwc_pybundle-2.1.2.dist-info/entry_points.txt +2 -0
- gwc_pybundle-2.1.2.dist-info/licenses/LICENSE.md +25 -0
- gwc_pybundle-2.1.2.dist-info/top_level.txt +1 -0
- pybundle/__init__.py +0 -0
- pybundle/__main__.py +4 -0
- pybundle/cli.py +546 -0
- pybundle/context.py +404 -0
- pybundle/doctor.py +148 -0
- pybundle/filters.py +228 -0
- pybundle/manifest.py +77 -0
- pybundle/packaging.py +45 -0
- pybundle/policy.py +132 -0
- pybundle/profiles.py +454 -0
- pybundle/roadmap_model.py +42 -0
- pybundle/roadmap_scan.py +328 -0
- pybundle/root_detect.py +14 -0
- pybundle/runner.py +180 -0
- pybundle/steps/__init__.py +26 -0
- pybundle/steps/ai_context.py +791 -0
- pybundle/steps/api_docs.py +219 -0
- pybundle/steps/asyncio_analysis.py +358 -0
- pybundle/steps/bandit.py +72 -0
- pybundle/steps/base.py +20 -0
- pybundle/steps/blocking_call_detection.py +291 -0
- pybundle/steps/call_graph.py +219 -0
- pybundle/steps/compileall.py +76 -0
- pybundle/steps/config_docs.py +319 -0
- pybundle/steps/config_validation.py +302 -0
- pybundle/steps/container_image.py +294 -0
- pybundle/steps/context_expand.py +272 -0
- pybundle/steps/copy_pack.py +293 -0
- pybundle/steps/coverage.py +101 -0
- pybundle/steps/cprofile_step.py +166 -0
- pybundle/steps/dependency_sizes.py +136 -0
- pybundle/steps/django_checks.py +214 -0
- pybundle/steps/dockerfile_lint.py +282 -0
- pybundle/steps/dockerignore.py +311 -0
- pybundle/steps/duplication.py +103 -0
- pybundle/steps/env_completeness.py +269 -0
- pybundle/steps/env_var_usage.py +253 -0
- pybundle/steps/error_refs.py +204 -0
- pybundle/steps/event_loop_patterns.py +280 -0
- pybundle/steps/exception_patterns.py +190 -0
- pybundle/steps/fastapi_integration.py +250 -0
- pybundle/steps/flask_debugging.py +312 -0
- pybundle/steps/git_analytics.py +315 -0
- pybundle/steps/handoff_md.py +176 -0
- pybundle/steps/import_time.py +175 -0
- pybundle/steps/interrogate.py +106 -0
- pybundle/steps/license_scan.py +96 -0
- pybundle/steps/line_profiler.py +117 -0
- pybundle/steps/link_validation.py +287 -0
- pybundle/steps/logging_analysis.py +233 -0
- pybundle/steps/memory_profile.py +176 -0
- pybundle/steps/migration_history.py +336 -0
- pybundle/steps/mutation_testing.py +141 -0
- pybundle/steps/mypy.py +103 -0
- pybundle/steps/orm_optimization.py +316 -0
- pybundle/steps/pip_audit.py +45 -0
- pybundle/steps/pipdeptree.py +62 -0
- pybundle/steps/pylance.py +562 -0
- pybundle/steps/pytest.py +66 -0
- pybundle/steps/query_pattern_analysis.py +334 -0
- pybundle/steps/radon.py +161 -0
- pybundle/steps/repro_md.py +161 -0
- pybundle/steps/rg_scans.py +78 -0
- pybundle/steps/roadmap.py +153 -0
- pybundle/steps/ruff.py +117 -0
- pybundle/steps/secrets_detection.py +235 -0
- pybundle/steps/security_headers.py +309 -0
- pybundle/steps/shell.py +74 -0
- pybundle/steps/slow_tests.py +178 -0
- pybundle/steps/sqlalchemy_validation.py +269 -0
- pybundle/steps/test_flakiness.py +184 -0
- pybundle/steps/tree.py +116 -0
- pybundle/steps/type_coverage.py +277 -0
- pybundle/steps/unused_deps.py +211 -0
- pybundle/steps/vulture.py +167 -0
- pybundle/tools.py +63 -0
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import shutil
|
|
5
|
+
import time
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from .base import StepResult
|
|
10
|
+
from pybundle.context import BundleContext
|
|
11
|
+
from pybundle.policy import AIContextPolicy, PathFilter
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _is_venv_root(p: Path) -> bool:
|
|
15
|
+
if not p.is_dir():
|
|
16
|
+
return False
|
|
17
|
+
|
|
18
|
+
# Strong marker: standard venv metadata
|
|
19
|
+
if (p / "pyvenv.cfg").is_file():
|
|
20
|
+
return True
|
|
21
|
+
|
|
22
|
+
# Typical venv executables (Linux/macOS)
|
|
23
|
+
if (p / "bin").is_dir():
|
|
24
|
+
# venv/virtualenv always has python here
|
|
25
|
+
if (p / "bin" / "python").exists() or (p / "bin" / "python3").exists():
|
|
26
|
+
# activation script is common but not guaranteed; still strong signal
|
|
27
|
+
if (p / "bin" / "activate").is_file():
|
|
28
|
+
return True
|
|
29
|
+
# also accept presence of site-packages under lib
|
|
30
|
+
if any((p / "lib").glob("python*/site-packages")):
|
|
31
|
+
return True
|
|
32
|
+
|
|
33
|
+
# Windows venv layout
|
|
34
|
+
if (p / "Scripts").is_dir():
|
|
35
|
+
if (p / "Scripts" / "python.exe").is_file() or (
|
|
36
|
+
p / "Scripts" / "python"
|
|
37
|
+
).exists():
|
|
38
|
+
if (p / "Scripts" / "activate").is_file():
|
|
39
|
+
return True
|
|
40
|
+
if (p / "Lib" / "site-packages").is_dir():
|
|
41
|
+
return True
|
|
42
|
+
|
|
43
|
+
# Some virtualenvs keep a .Python marker (macOS, older tooling)
|
|
44
|
+
if (p / ".Python").exists():
|
|
45
|
+
return True
|
|
46
|
+
|
|
47
|
+
return False
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _is_under_venv(root: Path, rel_path: Path) -> bool:
|
|
51
|
+
# walk ancestors: a/b/c.py -> check a, a/b, a/b/c
|
|
52
|
+
cur = root
|
|
53
|
+
for part in rel_path.parts:
|
|
54
|
+
cur = cur / part
|
|
55
|
+
if _is_venv_root(cur):
|
|
56
|
+
return True
|
|
57
|
+
return False
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _safe_copy_file(src: Path, dst: Path) -> None:
|
|
61
|
+
dst.parent.mkdir(parents=True, exist_ok=True)
|
|
62
|
+
# preserve mode + timestamps where possible
|
|
63
|
+
shutil.copy2(src, dst)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _copy_tree_filtered(
|
|
67
|
+
root: Path,
|
|
68
|
+
src_dir: Path,
|
|
69
|
+
dst_dir: Path,
|
|
70
|
+
filt: "PathFilter",
|
|
71
|
+
) -> tuple[int, int, int]:
|
|
72
|
+
"""
|
|
73
|
+
Copy directory tree while pruning excluded directories and skipping excluded files.
|
|
74
|
+
|
|
75
|
+
Returns: (files_copied, dirs_pruned, files_excluded)
|
|
76
|
+
"""
|
|
77
|
+
seen_files = 0
|
|
78
|
+
files_copied = 0
|
|
79
|
+
pruned_dirs = 0
|
|
80
|
+
|
|
81
|
+
for dirpath, dirnames, filenames in os.walk(src_dir):
|
|
82
|
+
dp = Path(dirpath)
|
|
83
|
+
rel_dir = dp.relative_to(src_dir)
|
|
84
|
+
|
|
85
|
+
# prune dirs in-place so os.walk doesn't descend into them
|
|
86
|
+
kept: list[str] = []
|
|
87
|
+
for d in dirnames:
|
|
88
|
+
if filt.should_prune_dir(dp, d):
|
|
89
|
+
pruned_dirs += 1
|
|
90
|
+
continue
|
|
91
|
+
kept.append(d)
|
|
92
|
+
dirnames[:] = kept
|
|
93
|
+
|
|
94
|
+
for fn in filenames:
|
|
95
|
+
seen_files += 1
|
|
96
|
+
sp = dp / fn
|
|
97
|
+
rel_file = rel_dir / fn
|
|
98
|
+
|
|
99
|
+
# single source of truth: PathFilter handles excluded dirs, patterns, and extensions
|
|
100
|
+
if not filt.should_include_file(root, sp):
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
tp = dst_dir / rel_file
|
|
104
|
+
try:
|
|
105
|
+
_safe_copy_file(sp, tp)
|
|
106
|
+
except OSError:
|
|
107
|
+
continue
|
|
108
|
+
|
|
109
|
+
files_copied += 1
|
|
110
|
+
|
|
111
|
+
files_excluded = max(0, seen_files - files_copied)
|
|
112
|
+
return files_copied, pruned_dirs, files_excluded
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _guess_package_dirs(root: Path, filt: "PathFilter") -> list[Path]:
|
|
116
|
+
out: list[Path] = []
|
|
117
|
+
for p in sorted(root.iterdir()):
|
|
118
|
+
if not p.is_dir():
|
|
119
|
+
continue
|
|
120
|
+
if p.name.startswith("."):
|
|
121
|
+
continue
|
|
122
|
+
if filt.should_prune_dir(root, p.name):
|
|
123
|
+
continue
|
|
124
|
+
if (p / "__init__.py").is_file():
|
|
125
|
+
out.append(p)
|
|
126
|
+
return out
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@dataclass
|
|
130
|
+
class CuratedCopyStep:
|
|
131
|
+
name: str = "copy curated source pack"
|
|
132
|
+
include_files: list[str] | None = None
|
|
133
|
+
include_dirs: list[str] | None = None
|
|
134
|
+
include_globs: list[str] | None = None
|
|
135
|
+
exclude_dirs: set[str] | None = None
|
|
136
|
+
max_files: int = 20000
|
|
137
|
+
policy: AIContextPolicy | None = None
|
|
138
|
+
|
|
139
|
+
def run(self, ctx: BundleContext) -> StepResult:
|
|
140
|
+
start = time.time()
|
|
141
|
+
dst_root = ctx.srcdir # bundle/src
|
|
142
|
+
dst_root.mkdir(parents=True, exist_ok=True)
|
|
143
|
+
|
|
144
|
+
policy = self.policy or AIContextPolicy()
|
|
145
|
+
|
|
146
|
+
exclude = (
|
|
147
|
+
set(self.exclude_dirs) if self.exclude_dirs else set(policy.exclude_dirs)
|
|
148
|
+
)
|
|
149
|
+
exclude_patterns = set(policy.exclude_patterns)
|
|
150
|
+
filt = PathFilter(
|
|
151
|
+
exclude_dirs=exclude,
|
|
152
|
+
exclude_patterns=exclude_patterns,
|
|
153
|
+
exclude_file_exts=set(policy.exclude_file_exts),
|
|
154
|
+
)
|
|
155
|
+
include_files = self.include_files or list(policy.include_files)
|
|
156
|
+
include_dirs = self.include_dirs or list(policy.include_dirs)
|
|
157
|
+
include_globs = self.include_globs or list(policy.include_globs)
|
|
158
|
+
|
|
159
|
+
copied = 0
|
|
160
|
+
pruned = 0
|
|
161
|
+
excluded_total = 0
|
|
162
|
+
|
|
163
|
+
# 1) Include well-known top-level files if present
|
|
164
|
+
for rel_file in include_files:
|
|
165
|
+
if copied >= self.max_files:
|
|
166
|
+
break
|
|
167
|
+
|
|
168
|
+
sp = ctx.root / rel_file
|
|
169
|
+
if not sp.is_file():
|
|
170
|
+
continue
|
|
171
|
+
if not filt.should_include_file(ctx.root, sp):
|
|
172
|
+
continue
|
|
173
|
+
|
|
174
|
+
try:
|
|
175
|
+
_safe_copy_file(sp, dst_root / rel_file)
|
|
176
|
+
copied += 1
|
|
177
|
+
if copied >= self.max_files:
|
|
178
|
+
break
|
|
179
|
+
except OSError:
|
|
180
|
+
pass
|
|
181
|
+
|
|
182
|
+
# 2) Include common top-level dirs (src/tests/tools)
|
|
183
|
+
for rel_dir in include_dirs:
|
|
184
|
+
sp = ctx.root / rel_dir
|
|
185
|
+
if not sp.is_dir():
|
|
186
|
+
continue
|
|
187
|
+
|
|
188
|
+
# policy prune (exact + patterns + venv detection inside PathFilter)
|
|
189
|
+
if filt.should_prune_dir(ctx.root, rel_dir):
|
|
190
|
+
pruned += 1
|
|
191
|
+
continue
|
|
192
|
+
|
|
193
|
+
# extra-strong venv detection for oddly-named envs
|
|
194
|
+
if _is_venv_root(sp):
|
|
195
|
+
pruned += 1
|
|
196
|
+
continue
|
|
197
|
+
|
|
198
|
+
files_copied, dirs_pruned, files_excluded = _copy_tree_filtered(
|
|
199
|
+
ctx.root, sp, dst_root / rel_dir, filt
|
|
200
|
+
)
|
|
201
|
+
copied += files_copied
|
|
202
|
+
pruned += dirs_pruned
|
|
203
|
+
excluded_total += files_excluded
|
|
204
|
+
|
|
205
|
+
if copied >= self.max_files:
|
|
206
|
+
break
|
|
207
|
+
|
|
208
|
+
# 3) Include detected package dirs at root (if not already copied)
|
|
209
|
+
if copied < self.max_files:
|
|
210
|
+
for pkg_dir in _guess_package_dirs(ctx.root, filt):
|
|
211
|
+
rel_pkg_name = pkg_dir.name
|
|
212
|
+
if (dst_root / rel_pkg_name).exists():
|
|
213
|
+
continue
|
|
214
|
+
files_copied, dirs_pruned, files_excluded = _copy_tree_filtered(
|
|
215
|
+
ctx.root, pkg_dir, dst_root / rel_pkg_name, filt
|
|
216
|
+
)
|
|
217
|
+
copied += files_copied
|
|
218
|
+
pruned += dirs_pruned
|
|
219
|
+
excluded_total += files_excluded
|
|
220
|
+
if copied >= self.max_files:
|
|
221
|
+
break
|
|
222
|
+
|
|
223
|
+
# 4) Optional globs (best-effort; avoid deep explosion by pruning excluded dirs)
|
|
224
|
+
# We’ll apply globs but skip anything under excluded dirs.
|
|
225
|
+
if copied < self.max_files:
|
|
226
|
+
for g in include_globs:
|
|
227
|
+
for sp in ctx.root.glob(g):
|
|
228
|
+
try:
|
|
229
|
+
if not sp.exists():
|
|
230
|
+
continue
|
|
231
|
+
|
|
232
|
+
rel_path = sp.relative_to(ctx.root)
|
|
233
|
+
|
|
234
|
+
if _is_under_venv(ctx.root, rel_path):
|
|
235
|
+
pruned += 1
|
|
236
|
+
continue
|
|
237
|
+
|
|
238
|
+
dst = dst_root / rel_path
|
|
239
|
+
if dst.exists():
|
|
240
|
+
continue
|
|
241
|
+
|
|
242
|
+
if sp.is_file():
|
|
243
|
+
if not filt.should_include_file(ctx.root, sp):
|
|
244
|
+
continue
|
|
245
|
+
_safe_copy_file(sp, dst)
|
|
246
|
+
copied += 1
|
|
247
|
+
|
|
248
|
+
elif sp.is_dir():
|
|
249
|
+
# prune dir itself before copying
|
|
250
|
+
parent = (
|
|
251
|
+
ctx.root
|
|
252
|
+
if rel_path.parent == Path(".")
|
|
253
|
+
else (ctx.root / rel_path.parent)
|
|
254
|
+
)
|
|
255
|
+
if filt.should_prune_dir(parent, rel_path.name):
|
|
256
|
+
pruned += 1
|
|
257
|
+
continue
|
|
258
|
+
if _is_venv_root(sp):
|
|
259
|
+
pruned += 1
|
|
260
|
+
continue
|
|
261
|
+
|
|
262
|
+
files_copied, dirs_pruned, files_excluded = (
|
|
263
|
+
_copy_tree_filtered(
|
|
264
|
+
ctx.root, sp, dst_root / rel_path, filt
|
|
265
|
+
)
|
|
266
|
+
)
|
|
267
|
+
copied += files_copied
|
|
268
|
+
pruned += dirs_pruned
|
|
269
|
+
excluded_total += files_excluded
|
|
270
|
+
|
|
271
|
+
if copied >= self.max_files:
|
|
272
|
+
break
|
|
273
|
+
except Exception:
|
|
274
|
+
continue
|
|
275
|
+
if copied >= self.max_files:
|
|
276
|
+
break
|
|
277
|
+
|
|
278
|
+
# write a short manifest for sanity
|
|
279
|
+
manifest = ctx.workdir / "meta" / "50_copy_manifest.txt"
|
|
280
|
+
manifest.parent.mkdir(parents=True, exist_ok=True)
|
|
281
|
+
manifest.write_text(
|
|
282
|
+
f"copied_files={copied}\n"
|
|
283
|
+
f"excluded_files={excluded_total}\n"
|
|
284
|
+
f"pruned_dirs={pruned}\n"
|
|
285
|
+
f"max_files={self.max_files}\n",
|
|
286
|
+
encoding="utf-8",
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
dur = int(time.time() - start)
|
|
290
|
+
note = f"copied={copied} pruned={pruned}"
|
|
291
|
+
if copied >= self.max_files:
|
|
292
|
+
note += " (HIT MAX)"
|
|
293
|
+
return StepResult(self.name, "PASS", dur, note)
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import subprocess # nosec B404 - Required for tool execution, paths validated
|
|
4
|
+
import time
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from .base import StepResult
|
|
9
|
+
from ..context import BundleContext
|
|
10
|
+
from ..tools import which
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _has_pytest(root: Path) -> bool:
|
|
14
|
+
"""Check if pytest is likely used (tests exist or pytest in deps)."""
|
|
15
|
+
# Look for common test directories (exclude venv)
|
|
16
|
+
for test_dir in ["tests", "test"]:
|
|
17
|
+
test_path = root / test_dir
|
|
18
|
+
if test_path.is_dir():
|
|
19
|
+
# Make sure it's not inside a venv
|
|
20
|
+
if not any(
|
|
21
|
+
p.name.endswith("venv") or p.name.startswith(".")
|
|
22
|
+
for p in test_path.parents
|
|
23
|
+
):
|
|
24
|
+
return True
|
|
25
|
+
|
|
26
|
+
# Look for test files in the project root and immediate subdirectories
|
|
27
|
+
# (not deep recursion to avoid finding venv tests)
|
|
28
|
+
for pattern in ["test_*.py", "*_test.py"]:
|
|
29
|
+
for p in root.glob(pattern):
|
|
30
|
+
return True
|
|
31
|
+
# Check one level deep
|
|
32
|
+
for subdir in root.iterdir():
|
|
33
|
+
if (
|
|
34
|
+
subdir.is_dir()
|
|
35
|
+
and not subdir.name.startswith(".")
|
|
36
|
+
and not subdir.name.endswith("venv")
|
|
37
|
+
):
|
|
38
|
+
for p in subdir.glob(pattern):
|
|
39
|
+
return True
|
|
40
|
+
|
|
41
|
+
return False
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class CoverageStep:
|
|
46
|
+
name: str = "coverage"
|
|
47
|
+
outfile: str = "logs/35_coverage.txt"
|
|
48
|
+
|
|
49
|
+
def run(self, ctx: BundleContext) -> StepResult:
|
|
50
|
+
start = time.time()
|
|
51
|
+
out = ctx.workdir / self.outfile
|
|
52
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
53
|
+
|
|
54
|
+
# Check for pytest first (since we use pytest-cov)
|
|
55
|
+
pytest_bin = which("pytest")
|
|
56
|
+
if not pytest_bin:
|
|
57
|
+
out.write_text(
|
|
58
|
+
"pytest not found; skipping coverage (pip install pytest pytest-cov)\n",
|
|
59
|
+
encoding="utf-8",
|
|
60
|
+
)
|
|
61
|
+
return StepResult(self.name, "SKIP", 0, "missing pytest")
|
|
62
|
+
|
|
63
|
+
# Check if there are tests to run
|
|
64
|
+
if not _has_pytest(ctx.root):
|
|
65
|
+
out.write_text("no tests detected; skipping coverage\n", encoding="utf-8")
|
|
66
|
+
return StepResult(self.name, "SKIP", 0, "no tests")
|
|
67
|
+
|
|
68
|
+
# Run pytest with coverage (including branch coverage for v1.4.1+)
|
|
69
|
+
cmd = [
|
|
70
|
+
pytest_bin,
|
|
71
|
+
"--cov",
|
|
72
|
+
"--cov-branch", # Enable branch coverage (v1.4.1+)
|
|
73
|
+
"--cov-report=term-missing:skip-covered",
|
|
74
|
+
"--no-cov-on-fail",
|
|
75
|
+
"-q",
|
|
76
|
+
]
|
|
77
|
+
header = f"## PWD: {ctx.root}\n## CMD: {' '.join(cmd)}\n\n"
|
|
78
|
+
|
|
79
|
+
cp = subprocess.run( # nosec B603
|
|
80
|
+
cmd, cwd=str(ctx.root), text=True, capture_output=True, check=False
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Combine stdout and stderr
|
|
84
|
+
text = header + (cp.stdout or "") + ("\n" + cp.stderr if cp.stderr else "")
|
|
85
|
+
|
|
86
|
+
# If pytest-cov is not installed, provide helpful message
|
|
87
|
+
if "pytest: error: unrecognized arguments: --cov" in text:
|
|
88
|
+
text = (
|
|
89
|
+
header
|
|
90
|
+
+ "pytest-cov not found; install with: pip install pytest-cov\n\n"
|
|
91
|
+
+ text
|
|
92
|
+
)
|
|
93
|
+
out.write_text(ctx.redact_text(text), encoding="utf-8")
|
|
94
|
+
return StepResult(self.name, "SKIP", 0, "missing pytest-cov")
|
|
95
|
+
|
|
96
|
+
out.write_text(ctx.redact_text(text), encoding="utf-8")
|
|
97
|
+
|
|
98
|
+
dur = int(time.time() - start)
|
|
99
|
+
# Non-zero exit means test failures or coverage threshold not met
|
|
100
|
+
note = "" if cp.returncode == 0 else f"exit={cp.returncode}"
|
|
101
|
+
return StepResult(self.name, "PASS", dur, note)
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CPU profiling with cProfile - Milestone 3 (v1.4.0)
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import subprocess
|
|
8
|
+
import time
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from .base import StepResult
|
|
13
|
+
from ..context import BundleContext
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class CProfileStep:
|
|
18
|
+
"""
|
|
19
|
+
Run cProfile on the project entry point or test suite to identify CPU bottlenecks.
|
|
20
|
+
|
|
21
|
+
Outputs:
|
|
22
|
+
- logs/60_cprofile.txt: Top 50 slowest functions
|
|
23
|
+
- meta/60_cprofile.stats: Binary stats file for further analysis
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
name: str = "cprofile"
|
|
27
|
+
|
|
28
|
+
def run(self, ctx: BundleContext) -> StepResult:
|
|
29
|
+
start = time.time()
|
|
30
|
+
|
|
31
|
+
if ctx.options.no_profile:
|
|
32
|
+
return StepResult(self.name, "SKIP", 0, "profiling disabled")
|
|
33
|
+
|
|
34
|
+
# Determine default profiling target
|
|
35
|
+
entry_point = ctx.options.profile_entry_point
|
|
36
|
+
if not entry_point:
|
|
37
|
+
# Default: profile pytest if tests/ exists
|
|
38
|
+
tests_dir = ctx.root / "tests"
|
|
39
|
+
if not tests_dir.is_dir():
|
|
40
|
+
return StepResult(self.name, "SKIP", 0, "no tests/ and no entry point")
|
|
41
|
+
|
|
42
|
+
# Determine what to profile
|
|
43
|
+
if entry_point:
|
|
44
|
+
target_path = Path(entry_point)
|
|
45
|
+
if not target_path.is_absolute():
|
|
46
|
+
target_path = ctx.root / entry_point
|
|
47
|
+
|
|
48
|
+
if not target_path.exists():
|
|
49
|
+
return StepResult(
|
|
50
|
+
self.name, "SKIP", 0, f"entry point not found: {entry_point}"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
if target_path.is_file():
|
|
54
|
+
# Profile a specific script
|
|
55
|
+
cmd = [
|
|
56
|
+
str(ctx.tools.python),
|
|
57
|
+
"-m",
|
|
58
|
+
"cProfile",
|
|
59
|
+
"-o",
|
|
60
|
+
str(ctx.workdir / "meta" / "60_cprofile.stats"),
|
|
61
|
+
str(target_path),
|
|
62
|
+
]
|
|
63
|
+
desc = f"Profiling {target_path.name}"
|
|
64
|
+
else:
|
|
65
|
+
# Assume it's a directory, profile pytest
|
|
66
|
+
cmd = [
|
|
67
|
+
str(ctx.tools.python),
|
|
68
|
+
"-m",
|
|
69
|
+
"cProfile",
|
|
70
|
+
"-o",
|
|
71
|
+
str(ctx.workdir / "meta" / "60_cprofile.stats"),
|
|
72
|
+
"-m",
|
|
73
|
+
"pytest",
|
|
74
|
+
str(target_path),
|
|
75
|
+
"-q",
|
|
76
|
+
]
|
|
77
|
+
desc = f"Profiling pytest in {target_path.name}/"
|
|
78
|
+
else:
|
|
79
|
+
# Default: profile pytest
|
|
80
|
+
cmd = [
|
|
81
|
+
str(ctx.tools.python),
|
|
82
|
+
"-m",
|
|
83
|
+
"cProfile",
|
|
84
|
+
"-o",
|
|
85
|
+
str(ctx.workdir / "meta" / "60_cprofile.stats"),
|
|
86
|
+
"-m",
|
|
87
|
+
"pytest",
|
|
88
|
+
"-q",
|
|
89
|
+
]
|
|
90
|
+
desc = "Profiling pytest"
|
|
91
|
+
|
|
92
|
+
ctx.emit(f" {desc}")
|
|
93
|
+
|
|
94
|
+
try:
|
|
95
|
+
# Run profiling
|
|
96
|
+
result = subprocess.run(
|
|
97
|
+
cmd,
|
|
98
|
+
cwd=ctx.root,
|
|
99
|
+
capture_output=True,
|
|
100
|
+
text=True,
|
|
101
|
+
timeout=300, # 5 minute timeout for profiling
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Generate human-readable report
|
|
105
|
+
stats_file = ctx.workdir / "meta" / "60_cprofile.stats"
|
|
106
|
+
if stats_file.exists():
|
|
107
|
+
self._generate_report(stats_file, ctx.workdir)
|
|
108
|
+
elapsed = int((time.time() - start) * 1000)
|
|
109
|
+
return StepResult(self.name, "OK", elapsed)
|
|
110
|
+
else:
|
|
111
|
+
# Still write output for debugging
|
|
112
|
+
output_file = ctx.workdir / "logs" / "60_cprofile.txt"
|
|
113
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
114
|
+
|
|
115
|
+
with output_file.open("w") as f:
|
|
116
|
+
f.write("=" * 70 + "\n")
|
|
117
|
+
f.write("CPU PROFILING FAILED\n")
|
|
118
|
+
f.write("=" * 70 + "\n\n")
|
|
119
|
+
f.write("STDOUT:\n")
|
|
120
|
+
f.write(result.stdout)
|
|
121
|
+
f.write("\n\nSTDERR:\n")
|
|
122
|
+
f.write(result.stderr)
|
|
123
|
+
|
|
124
|
+
elapsed = int((time.time() - start) * 1000)
|
|
125
|
+
return StepResult(self.name, "FAIL", elapsed, "stats file not created")
|
|
126
|
+
|
|
127
|
+
except subprocess.TimeoutExpired:
|
|
128
|
+
elapsed = int((time.time() - start) * 1000)
|
|
129
|
+
return StepResult(self.name, "FAIL", elapsed, "timeout")
|
|
130
|
+
except Exception as e:
|
|
131
|
+
elapsed = int((time.time() - start) * 1000)
|
|
132
|
+
return StepResult(self.name, "FAIL", elapsed, str(e))
|
|
133
|
+
|
|
134
|
+
def _generate_report(self, stats_file: Path, workdir: Path) -> None:
|
|
135
|
+
"""Generate top 50 slowest functions report"""
|
|
136
|
+
import pstats
|
|
137
|
+
|
|
138
|
+
output_file = workdir / "logs" / "60_cprofile.txt"
|
|
139
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
140
|
+
|
|
141
|
+
with output_file.open("w") as f:
|
|
142
|
+
f.write("=" * 70 + "\n")
|
|
143
|
+
f.write("TOP 50 SLOWEST FUNCTIONS (CPU PROFILING)\n")
|
|
144
|
+
f.write("=" * 70 + "\n\n")
|
|
145
|
+
|
|
146
|
+
# Load stats
|
|
147
|
+
stats = pstats.Stats(str(stats_file), stream=f)
|
|
148
|
+
|
|
149
|
+
# Remove directory paths for cleaner output
|
|
150
|
+
stats.strip_dirs()
|
|
151
|
+
|
|
152
|
+
# Sort by cumulative time and print top 50
|
|
153
|
+
f.write("Sorted by cumulative time:\n")
|
|
154
|
+
f.write("-" * 70 + "\n")
|
|
155
|
+
stats.sort_stats("cumulative")
|
|
156
|
+
stats.print_stats(50)
|
|
157
|
+
|
|
158
|
+
f.write("\n" + "=" * 70 + "\n")
|
|
159
|
+
f.write("Sorted by total time (time spent in function itself):\n")
|
|
160
|
+
f.write("-" * 70 + "\n")
|
|
161
|
+
stats.sort_stats("time")
|
|
162
|
+
stats.print_stats(50)
|
|
163
|
+
|
|
164
|
+
f.write("\n" + "=" * 70 + "\n")
|
|
165
|
+
f.write("Full binary stats saved to: meta/60_cprofile.stats\n")
|
|
166
|
+
f.write("Analyze with: python -m pstats meta/60_cprofile.stats\n")
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import subprocess # nosec B404 - Required for tool execution, paths validated
|
|
4
|
+
import time
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
from .base import StepResult
|
|
8
|
+
from ..context import BundleContext
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class DependencySizesStep:
|
|
13
|
+
name: str = "dependency sizes"
|
|
14
|
+
outfile: str = "meta/33_dependency_sizes.txt"
|
|
15
|
+
top_n: int = 50 # Show top N largest packages
|
|
16
|
+
|
|
17
|
+
def run(self, ctx: BundleContext) -> StepResult:
|
|
18
|
+
start = time.time()
|
|
19
|
+
out = ctx.workdir / self.outfile
|
|
20
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
21
|
+
|
|
22
|
+
python = ctx.tools.python
|
|
23
|
+
if not python:
|
|
24
|
+
out.write_text("python not found; skipping\n", encoding="utf-8")
|
|
25
|
+
return StepResult(self.name, "SKIP", 0, "missing python")
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
# Get list of installed packages
|
|
29
|
+
list_result = subprocess.run( # nosec B603
|
|
30
|
+
[python, "-m", "pip", "list", "--format=json"],
|
|
31
|
+
cwd=ctx.root,
|
|
32
|
+
stdout=subprocess.PIPE,
|
|
33
|
+
stderr=subprocess.PIPE,
|
|
34
|
+
text=True,
|
|
35
|
+
timeout=30,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
if list_result.returncode != 0:
|
|
39
|
+
out.write_text(
|
|
40
|
+
f"pip list failed: {list_result.stderr}\n", encoding="utf-8"
|
|
41
|
+
)
|
|
42
|
+
return StepResult(self.name, "FAIL", 0, "pip list failed")
|
|
43
|
+
|
|
44
|
+
import json
|
|
45
|
+
|
|
46
|
+
packages = json.loads(list_result.stdout)
|
|
47
|
+
|
|
48
|
+
# Get size for each package
|
|
49
|
+
package_sizes = []
|
|
50
|
+
for pkg in packages:
|
|
51
|
+
pkg_name = pkg["name"]
|
|
52
|
+
try:
|
|
53
|
+
show_result = subprocess.run( # nosec B603
|
|
54
|
+
[python, "-m", "pip", "show", pkg_name],
|
|
55
|
+
stdout=subprocess.PIPE,
|
|
56
|
+
stderr=subprocess.PIPE,
|
|
57
|
+
text=True,
|
|
58
|
+
timeout=5,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
if show_result.returncode == 0:
|
|
62
|
+
# Parse Location from pip show output
|
|
63
|
+
location = None
|
|
64
|
+
for line in show_result.stdout.splitlines():
|
|
65
|
+
if line.startswith("Location:"):
|
|
66
|
+
location = line.split(":", 1)[1].strip()
|
|
67
|
+
break
|
|
68
|
+
|
|
69
|
+
if location:
|
|
70
|
+
# Calculate directory size
|
|
71
|
+
from pathlib import Path
|
|
72
|
+
|
|
73
|
+
pkg_path = Path(location) / pkg_name.replace("-", "_")
|
|
74
|
+
if not pkg_path.exists():
|
|
75
|
+
pkg_path = Path(location) / pkg_name
|
|
76
|
+
|
|
77
|
+
if pkg_path.exists() and pkg_path.is_dir():
|
|
78
|
+
size = sum(
|
|
79
|
+
f.stat().st_size
|
|
80
|
+
for f in pkg_path.rglob("*")
|
|
81
|
+
if f.is_file()
|
|
82
|
+
)
|
|
83
|
+
package_sizes.append((pkg_name, pkg["version"], size))
|
|
84
|
+
except Exception:
|
|
85
|
+
# Skip packages that fail
|
|
86
|
+
continue
|
|
87
|
+
|
|
88
|
+
# Sort by size (descending)
|
|
89
|
+
package_sizes.sort(key=lambda x: x[2], reverse=True)
|
|
90
|
+
|
|
91
|
+
# Write results
|
|
92
|
+
with out.open("w", encoding="utf-8") as f:
|
|
93
|
+
f.write("=" * 70 + "\n")
|
|
94
|
+
f.write(
|
|
95
|
+
f"TOP {min(self.top_n, len(package_sizes))} LARGEST DEPENDENCIES\n"
|
|
96
|
+
)
|
|
97
|
+
f.write("=" * 70 + "\n\n")
|
|
98
|
+
f.write(f"Total packages analyzed: {len(packages)}\n")
|
|
99
|
+
f.write(f"Packages with size data: {len(package_sizes)}\n\n")
|
|
100
|
+
|
|
101
|
+
if package_sizes:
|
|
102
|
+
# Calculate total size
|
|
103
|
+
total_size = sum(size for _, _, size in package_sizes)
|
|
104
|
+
f.write(f"Total size: {self._format_size(total_size)}\n\n")
|
|
105
|
+
|
|
106
|
+
f.write(f"{'Package':<40} {'Version':<15} {'Size':>15}\n")
|
|
107
|
+
f.write("-" * 70 + "\n")
|
|
108
|
+
|
|
109
|
+
for pkg_name, version, size in package_sizes[: self.top_n]:
|
|
110
|
+
f.write(
|
|
111
|
+
f"{pkg_name:<40} {version:<15} {self._format_size(size):>15}\n"
|
|
112
|
+
)
|
|
113
|
+
else:
|
|
114
|
+
f.write("No package size data available.\n")
|
|
115
|
+
|
|
116
|
+
elapsed = int((time.time() - start) * 1000)
|
|
117
|
+
return StepResult(self.name, "OK", elapsed, "")
|
|
118
|
+
|
|
119
|
+
except subprocess.TimeoutExpired:
|
|
120
|
+
out.write_text("Analysis timed out\n", encoding="utf-8")
|
|
121
|
+
return StepResult(
|
|
122
|
+
self.name, "FAIL", int((time.time() - start) * 1000), "timeout"
|
|
123
|
+
)
|
|
124
|
+
except Exception as e:
|
|
125
|
+
out.write_text(f"Error: {e}\n", encoding="utf-8")
|
|
126
|
+
return StepResult(
|
|
127
|
+
self.name, "FAIL", int((time.time() - start) * 1000), str(e)
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
def _format_size(self, size_bytes: float) -> str:
|
|
131
|
+
"""Format size in human-readable format."""
|
|
132
|
+
for unit in ["B", "KB", "MB", "GB"]:
|
|
133
|
+
if size_bytes < 1024.0:
|
|
134
|
+
return f"{size_bytes:.1f} {unit}"
|
|
135
|
+
size_bytes /= 1024.0
|
|
136
|
+
return f"{size_bytes:.1f} TB"
|