gwc-pybundle 2.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwc-pybundle might be problematic. Click here for more details.

Files changed (82) hide show
  1. gwc_pybundle-2.1.2.dist-info/METADATA +903 -0
  2. gwc_pybundle-2.1.2.dist-info/RECORD +82 -0
  3. gwc_pybundle-2.1.2.dist-info/WHEEL +5 -0
  4. gwc_pybundle-2.1.2.dist-info/entry_points.txt +2 -0
  5. gwc_pybundle-2.1.2.dist-info/licenses/LICENSE.md +25 -0
  6. gwc_pybundle-2.1.2.dist-info/top_level.txt +1 -0
  7. pybundle/__init__.py +0 -0
  8. pybundle/__main__.py +4 -0
  9. pybundle/cli.py +546 -0
  10. pybundle/context.py +404 -0
  11. pybundle/doctor.py +148 -0
  12. pybundle/filters.py +228 -0
  13. pybundle/manifest.py +77 -0
  14. pybundle/packaging.py +45 -0
  15. pybundle/policy.py +132 -0
  16. pybundle/profiles.py +454 -0
  17. pybundle/roadmap_model.py +42 -0
  18. pybundle/roadmap_scan.py +328 -0
  19. pybundle/root_detect.py +14 -0
  20. pybundle/runner.py +180 -0
  21. pybundle/steps/__init__.py +26 -0
  22. pybundle/steps/ai_context.py +791 -0
  23. pybundle/steps/api_docs.py +219 -0
  24. pybundle/steps/asyncio_analysis.py +358 -0
  25. pybundle/steps/bandit.py +72 -0
  26. pybundle/steps/base.py +20 -0
  27. pybundle/steps/blocking_call_detection.py +291 -0
  28. pybundle/steps/call_graph.py +219 -0
  29. pybundle/steps/compileall.py +76 -0
  30. pybundle/steps/config_docs.py +319 -0
  31. pybundle/steps/config_validation.py +302 -0
  32. pybundle/steps/container_image.py +294 -0
  33. pybundle/steps/context_expand.py +272 -0
  34. pybundle/steps/copy_pack.py +293 -0
  35. pybundle/steps/coverage.py +101 -0
  36. pybundle/steps/cprofile_step.py +166 -0
  37. pybundle/steps/dependency_sizes.py +136 -0
  38. pybundle/steps/django_checks.py +214 -0
  39. pybundle/steps/dockerfile_lint.py +282 -0
  40. pybundle/steps/dockerignore.py +311 -0
  41. pybundle/steps/duplication.py +103 -0
  42. pybundle/steps/env_completeness.py +269 -0
  43. pybundle/steps/env_var_usage.py +253 -0
  44. pybundle/steps/error_refs.py +204 -0
  45. pybundle/steps/event_loop_patterns.py +280 -0
  46. pybundle/steps/exception_patterns.py +190 -0
  47. pybundle/steps/fastapi_integration.py +250 -0
  48. pybundle/steps/flask_debugging.py +312 -0
  49. pybundle/steps/git_analytics.py +315 -0
  50. pybundle/steps/handoff_md.py +176 -0
  51. pybundle/steps/import_time.py +175 -0
  52. pybundle/steps/interrogate.py +106 -0
  53. pybundle/steps/license_scan.py +96 -0
  54. pybundle/steps/line_profiler.py +117 -0
  55. pybundle/steps/link_validation.py +287 -0
  56. pybundle/steps/logging_analysis.py +233 -0
  57. pybundle/steps/memory_profile.py +176 -0
  58. pybundle/steps/migration_history.py +336 -0
  59. pybundle/steps/mutation_testing.py +141 -0
  60. pybundle/steps/mypy.py +103 -0
  61. pybundle/steps/orm_optimization.py +316 -0
  62. pybundle/steps/pip_audit.py +45 -0
  63. pybundle/steps/pipdeptree.py +62 -0
  64. pybundle/steps/pylance.py +562 -0
  65. pybundle/steps/pytest.py +66 -0
  66. pybundle/steps/query_pattern_analysis.py +334 -0
  67. pybundle/steps/radon.py +161 -0
  68. pybundle/steps/repro_md.py +161 -0
  69. pybundle/steps/rg_scans.py +78 -0
  70. pybundle/steps/roadmap.py +153 -0
  71. pybundle/steps/ruff.py +117 -0
  72. pybundle/steps/secrets_detection.py +235 -0
  73. pybundle/steps/security_headers.py +309 -0
  74. pybundle/steps/shell.py +74 -0
  75. pybundle/steps/slow_tests.py +178 -0
  76. pybundle/steps/sqlalchemy_validation.py +269 -0
  77. pybundle/steps/test_flakiness.py +184 -0
  78. pybundle/steps/tree.py +116 -0
  79. pybundle/steps/type_coverage.py +277 -0
  80. pybundle/steps/unused_deps.py +211 -0
  81. pybundle/steps/vulture.py +167 -0
  82. pybundle/tools.py +63 -0
@@ -0,0 +1,293 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import shutil
5
+ import time
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+
9
+ from .base import StepResult
10
+ from pybundle.context import BundleContext
11
+ from pybundle.policy import AIContextPolicy, PathFilter
12
+
13
+
14
+ def _is_venv_root(p: Path) -> bool:
15
+ if not p.is_dir():
16
+ return False
17
+
18
+ # Strong marker: standard venv metadata
19
+ if (p / "pyvenv.cfg").is_file():
20
+ return True
21
+
22
+ # Typical venv executables (Linux/macOS)
23
+ if (p / "bin").is_dir():
24
+ # venv/virtualenv always has python here
25
+ if (p / "bin" / "python").exists() or (p / "bin" / "python3").exists():
26
+ # activation script is common but not guaranteed; still strong signal
27
+ if (p / "bin" / "activate").is_file():
28
+ return True
29
+ # also accept presence of site-packages under lib
30
+ if any((p / "lib").glob("python*/site-packages")):
31
+ return True
32
+
33
+ # Windows venv layout
34
+ if (p / "Scripts").is_dir():
35
+ if (p / "Scripts" / "python.exe").is_file() or (
36
+ p / "Scripts" / "python"
37
+ ).exists():
38
+ if (p / "Scripts" / "activate").is_file():
39
+ return True
40
+ if (p / "Lib" / "site-packages").is_dir():
41
+ return True
42
+
43
+ # Some virtualenvs keep a .Python marker (macOS, older tooling)
44
+ if (p / ".Python").exists():
45
+ return True
46
+
47
+ return False
48
+
49
+
50
+ def _is_under_venv(root: Path, rel_path: Path) -> bool:
51
+ # walk ancestors: a/b/c.py -> check a, a/b, a/b/c
52
+ cur = root
53
+ for part in rel_path.parts:
54
+ cur = cur / part
55
+ if _is_venv_root(cur):
56
+ return True
57
+ return False
58
+
59
+
60
+ def _safe_copy_file(src: Path, dst: Path) -> None:
61
+ dst.parent.mkdir(parents=True, exist_ok=True)
62
+ # preserve mode + timestamps where possible
63
+ shutil.copy2(src, dst)
64
+
65
+
66
+ def _copy_tree_filtered(
67
+ root: Path,
68
+ src_dir: Path,
69
+ dst_dir: Path,
70
+ filt: "PathFilter",
71
+ ) -> tuple[int, int, int]:
72
+ """
73
+ Copy directory tree while pruning excluded directories and skipping excluded files.
74
+
75
+ Returns: (files_copied, dirs_pruned, files_excluded)
76
+ """
77
+ seen_files = 0
78
+ files_copied = 0
79
+ pruned_dirs = 0
80
+
81
+ for dirpath, dirnames, filenames in os.walk(src_dir):
82
+ dp = Path(dirpath)
83
+ rel_dir = dp.relative_to(src_dir)
84
+
85
+ # prune dirs in-place so os.walk doesn't descend into them
86
+ kept: list[str] = []
87
+ for d in dirnames:
88
+ if filt.should_prune_dir(dp, d):
89
+ pruned_dirs += 1
90
+ continue
91
+ kept.append(d)
92
+ dirnames[:] = kept
93
+
94
+ for fn in filenames:
95
+ seen_files += 1
96
+ sp = dp / fn
97
+ rel_file = rel_dir / fn
98
+
99
+ # single source of truth: PathFilter handles excluded dirs, patterns, and extensions
100
+ if not filt.should_include_file(root, sp):
101
+ continue
102
+
103
+ tp = dst_dir / rel_file
104
+ try:
105
+ _safe_copy_file(sp, tp)
106
+ except OSError:
107
+ continue
108
+
109
+ files_copied += 1
110
+
111
+ files_excluded = max(0, seen_files - files_copied)
112
+ return files_copied, pruned_dirs, files_excluded
113
+
114
+
115
+ def _guess_package_dirs(root: Path, filt: "PathFilter") -> list[Path]:
116
+ out: list[Path] = []
117
+ for p in sorted(root.iterdir()):
118
+ if not p.is_dir():
119
+ continue
120
+ if p.name.startswith("."):
121
+ continue
122
+ if filt.should_prune_dir(root, p.name):
123
+ continue
124
+ if (p / "__init__.py").is_file():
125
+ out.append(p)
126
+ return out
127
+
128
+
129
+ @dataclass
130
+ class CuratedCopyStep:
131
+ name: str = "copy curated source pack"
132
+ include_files: list[str] | None = None
133
+ include_dirs: list[str] | None = None
134
+ include_globs: list[str] | None = None
135
+ exclude_dirs: set[str] | None = None
136
+ max_files: int = 20000
137
+ policy: AIContextPolicy | None = None
138
+
139
+ def run(self, ctx: BundleContext) -> StepResult:
140
+ start = time.time()
141
+ dst_root = ctx.srcdir # bundle/src
142
+ dst_root.mkdir(parents=True, exist_ok=True)
143
+
144
+ policy = self.policy or AIContextPolicy()
145
+
146
+ exclude = (
147
+ set(self.exclude_dirs) if self.exclude_dirs else set(policy.exclude_dirs)
148
+ )
149
+ exclude_patterns = set(policy.exclude_patterns)
150
+ filt = PathFilter(
151
+ exclude_dirs=exclude,
152
+ exclude_patterns=exclude_patterns,
153
+ exclude_file_exts=set(policy.exclude_file_exts),
154
+ )
155
+ include_files = self.include_files or list(policy.include_files)
156
+ include_dirs = self.include_dirs or list(policy.include_dirs)
157
+ include_globs = self.include_globs or list(policy.include_globs)
158
+
159
+ copied = 0
160
+ pruned = 0
161
+ excluded_total = 0
162
+
163
+ # 1) Include well-known top-level files if present
164
+ for rel_file in include_files:
165
+ if copied >= self.max_files:
166
+ break
167
+
168
+ sp = ctx.root / rel_file
169
+ if not sp.is_file():
170
+ continue
171
+ if not filt.should_include_file(ctx.root, sp):
172
+ continue
173
+
174
+ try:
175
+ _safe_copy_file(sp, dst_root / rel_file)
176
+ copied += 1
177
+ if copied >= self.max_files:
178
+ break
179
+ except OSError:
180
+ pass
181
+
182
+ # 2) Include common top-level dirs (src/tests/tools)
183
+ for rel_dir in include_dirs:
184
+ sp = ctx.root / rel_dir
185
+ if not sp.is_dir():
186
+ continue
187
+
188
+ # policy prune (exact + patterns + venv detection inside PathFilter)
189
+ if filt.should_prune_dir(ctx.root, rel_dir):
190
+ pruned += 1
191
+ continue
192
+
193
+ # extra-strong venv detection for oddly-named envs
194
+ if _is_venv_root(sp):
195
+ pruned += 1
196
+ continue
197
+
198
+ files_copied, dirs_pruned, files_excluded = _copy_tree_filtered(
199
+ ctx.root, sp, dst_root / rel_dir, filt
200
+ )
201
+ copied += files_copied
202
+ pruned += dirs_pruned
203
+ excluded_total += files_excluded
204
+
205
+ if copied >= self.max_files:
206
+ break
207
+
208
+ # 3) Include detected package dirs at root (if not already copied)
209
+ if copied < self.max_files:
210
+ for pkg_dir in _guess_package_dirs(ctx.root, filt):
211
+ rel_pkg_name = pkg_dir.name
212
+ if (dst_root / rel_pkg_name).exists():
213
+ continue
214
+ files_copied, dirs_pruned, files_excluded = _copy_tree_filtered(
215
+ ctx.root, pkg_dir, dst_root / rel_pkg_name, filt
216
+ )
217
+ copied += files_copied
218
+ pruned += dirs_pruned
219
+ excluded_total += files_excluded
220
+ if copied >= self.max_files:
221
+ break
222
+
223
+ # 4) Optional globs (best-effort; avoid deep explosion by pruning excluded dirs)
224
+ # We’ll apply globs but skip anything under excluded dirs.
225
+ if copied < self.max_files:
226
+ for g in include_globs:
227
+ for sp in ctx.root.glob(g):
228
+ try:
229
+ if not sp.exists():
230
+ continue
231
+
232
+ rel_path = sp.relative_to(ctx.root)
233
+
234
+ if _is_under_venv(ctx.root, rel_path):
235
+ pruned += 1
236
+ continue
237
+
238
+ dst = dst_root / rel_path
239
+ if dst.exists():
240
+ continue
241
+
242
+ if sp.is_file():
243
+ if not filt.should_include_file(ctx.root, sp):
244
+ continue
245
+ _safe_copy_file(sp, dst)
246
+ copied += 1
247
+
248
+ elif sp.is_dir():
249
+ # prune dir itself before copying
250
+ parent = (
251
+ ctx.root
252
+ if rel_path.parent == Path(".")
253
+ else (ctx.root / rel_path.parent)
254
+ )
255
+ if filt.should_prune_dir(parent, rel_path.name):
256
+ pruned += 1
257
+ continue
258
+ if _is_venv_root(sp):
259
+ pruned += 1
260
+ continue
261
+
262
+ files_copied, dirs_pruned, files_excluded = (
263
+ _copy_tree_filtered(
264
+ ctx.root, sp, dst_root / rel_path, filt
265
+ )
266
+ )
267
+ copied += files_copied
268
+ pruned += dirs_pruned
269
+ excluded_total += files_excluded
270
+
271
+ if copied >= self.max_files:
272
+ break
273
+ except Exception:
274
+ continue
275
+ if copied >= self.max_files:
276
+ break
277
+
278
+ # write a short manifest for sanity
279
+ manifest = ctx.workdir / "meta" / "50_copy_manifest.txt"
280
+ manifest.parent.mkdir(parents=True, exist_ok=True)
281
+ manifest.write_text(
282
+ f"copied_files={copied}\n"
283
+ f"excluded_files={excluded_total}\n"
284
+ f"pruned_dirs={pruned}\n"
285
+ f"max_files={self.max_files}\n",
286
+ encoding="utf-8",
287
+ )
288
+
289
+ dur = int(time.time() - start)
290
+ note = f"copied={copied} pruned={pruned}"
291
+ if copied >= self.max_files:
292
+ note += " (HIT MAX)"
293
+ return StepResult(self.name, "PASS", dur, note)
@@ -0,0 +1,101 @@
1
+ from __future__ import annotations
2
+
3
+ import subprocess # nosec B404 - Required for tool execution, paths validated
4
+ import time
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+
8
+ from .base import StepResult
9
+ from ..context import BundleContext
10
+ from ..tools import which
11
+
12
+
13
+ def _has_pytest(root: Path) -> bool:
14
+ """Check if pytest is likely used (tests exist or pytest in deps)."""
15
+ # Look for common test directories (exclude venv)
16
+ for test_dir in ["tests", "test"]:
17
+ test_path = root / test_dir
18
+ if test_path.is_dir():
19
+ # Make sure it's not inside a venv
20
+ if not any(
21
+ p.name.endswith("venv") or p.name.startswith(".")
22
+ for p in test_path.parents
23
+ ):
24
+ return True
25
+
26
+ # Look for test files in the project root and immediate subdirectories
27
+ # (not deep recursion to avoid finding venv tests)
28
+ for pattern in ["test_*.py", "*_test.py"]:
29
+ for p in root.glob(pattern):
30
+ return True
31
+ # Check one level deep
32
+ for subdir in root.iterdir():
33
+ if (
34
+ subdir.is_dir()
35
+ and not subdir.name.startswith(".")
36
+ and not subdir.name.endswith("venv")
37
+ ):
38
+ for p in subdir.glob(pattern):
39
+ return True
40
+
41
+ return False
42
+
43
+
44
+ @dataclass
45
+ class CoverageStep:
46
+ name: str = "coverage"
47
+ outfile: str = "logs/35_coverage.txt"
48
+
49
+ def run(self, ctx: BundleContext) -> StepResult:
50
+ start = time.time()
51
+ out = ctx.workdir / self.outfile
52
+ out.parent.mkdir(parents=True, exist_ok=True)
53
+
54
+ # Check for pytest first (since we use pytest-cov)
55
+ pytest_bin = which("pytest")
56
+ if not pytest_bin:
57
+ out.write_text(
58
+ "pytest not found; skipping coverage (pip install pytest pytest-cov)\n",
59
+ encoding="utf-8",
60
+ )
61
+ return StepResult(self.name, "SKIP", 0, "missing pytest")
62
+
63
+ # Check if there are tests to run
64
+ if not _has_pytest(ctx.root):
65
+ out.write_text("no tests detected; skipping coverage\n", encoding="utf-8")
66
+ return StepResult(self.name, "SKIP", 0, "no tests")
67
+
68
+ # Run pytest with coverage (including branch coverage for v1.4.1+)
69
+ cmd = [
70
+ pytest_bin,
71
+ "--cov",
72
+ "--cov-branch", # Enable branch coverage (v1.4.1+)
73
+ "--cov-report=term-missing:skip-covered",
74
+ "--no-cov-on-fail",
75
+ "-q",
76
+ ]
77
+ header = f"## PWD: {ctx.root}\n## CMD: {' '.join(cmd)}\n\n"
78
+
79
+ cp = subprocess.run( # nosec B603
80
+ cmd, cwd=str(ctx.root), text=True, capture_output=True, check=False
81
+ )
82
+
83
+ # Combine stdout and stderr
84
+ text = header + (cp.stdout or "") + ("\n" + cp.stderr if cp.stderr else "")
85
+
86
+ # If pytest-cov is not installed, provide helpful message
87
+ if "pytest: error: unrecognized arguments: --cov" in text:
88
+ text = (
89
+ header
90
+ + "pytest-cov not found; install with: pip install pytest-cov\n\n"
91
+ + text
92
+ )
93
+ out.write_text(ctx.redact_text(text), encoding="utf-8")
94
+ return StepResult(self.name, "SKIP", 0, "missing pytest-cov")
95
+
96
+ out.write_text(ctx.redact_text(text), encoding="utf-8")
97
+
98
+ dur = int(time.time() - start)
99
+ # Non-zero exit means test failures or coverage threshold not met
100
+ note = "" if cp.returncode == 0 else f"exit={cp.returncode}"
101
+ return StepResult(self.name, "PASS", dur, note)
@@ -0,0 +1,166 @@
1
+ """
2
+ CPU profiling with cProfile - Milestone 3 (v1.4.0)
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import subprocess
8
+ import time
9
+ from dataclasses import dataclass
10
+ from pathlib import Path
11
+
12
+ from .base import StepResult
13
+ from ..context import BundleContext
14
+
15
+
16
+ @dataclass
17
+ class CProfileStep:
18
+ """
19
+ Run cProfile on the project entry point or test suite to identify CPU bottlenecks.
20
+
21
+ Outputs:
22
+ - logs/60_cprofile.txt: Top 50 slowest functions
23
+ - meta/60_cprofile.stats: Binary stats file for further analysis
24
+ """
25
+
26
+ name: str = "cprofile"
27
+
28
+ def run(self, ctx: BundleContext) -> StepResult:
29
+ start = time.time()
30
+
31
+ if ctx.options.no_profile:
32
+ return StepResult(self.name, "SKIP", 0, "profiling disabled")
33
+
34
+ # Determine default profiling target
35
+ entry_point = ctx.options.profile_entry_point
36
+ if not entry_point:
37
+ # Default: profile pytest if tests/ exists
38
+ tests_dir = ctx.root / "tests"
39
+ if not tests_dir.is_dir():
40
+ return StepResult(self.name, "SKIP", 0, "no tests/ and no entry point")
41
+
42
+ # Determine what to profile
43
+ if entry_point:
44
+ target_path = Path(entry_point)
45
+ if not target_path.is_absolute():
46
+ target_path = ctx.root / entry_point
47
+
48
+ if not target_path.exists():
49
+ return StepResult(
50
+ self.name, "SKIP", 0, f"entry point not found: {entry_point}"
51
+ )
52
+
53
+ if target_path.is_file():
54
+ # Profile a specific script
55
+ cmd = [
56
+ str(ctx.tools.python),
57
+ "-m",
58
+ "cProfile",
59
+ "-o",
60
+ str(ctx.workdir / "meta" / "60_cprofile.stats"),
61
+ str(target_path),
62
+ ]
63
+ desc = f"Profiling {target_path.name}"
64
+ else:
65
+ # Assume it's a directory, profile pytest
66
+ cmd = [
67
+ str(ctx.tools.python),
68
+ "-m",
69
+ "cProfile",
70
+ "-o",
71
+ str(ctx.workdir / "meta" / "60_cprofile.stats"),
72
+ "-m",
73
+ "pytest",
74
+ str(target_path),
75
+ "-q",
76
+ ]
77
+ desc = f"Profiling pytest in {target_path.name}/"
78
+ else:
79
+ # Default: profile pytest
80
+ cmd = [
81
+ str(ctx.tools.python),
82
+ "-m",
83
+ "cProfile",
84
+ "-o",
85
+ str(ctx.workdir / "meta" / "60_cprofile.stats"),
86
+ "-m",
87
+ "pytest",
88
+ "-q",
89
+ ]
90
+ desc = "Profiling pytest"
91
+
92
+ ctx.emit(f" {desc}")
93
+
94
+ try:
95
+ # Run profiling
96
+ result = subprocess.run(
97
+ cmd,
98
+ cwd=ctx.root,
99
+ capture_output=True,
100
+ text=True,
101
+ timeout=300, # 5 minute timeout for profiling
102
+ )
103
+
104
+ # Generate human-readable report
105
+ stats_file = ctx.workdir / "meta" / "60_cprofile.stats"
106
+ if stats_file.exists():
107
+ self._generate_report(stats_file, ctx.workdir)
108
+ elapsed = int((time.time() - start) * 1000)
109
+ return StepResult(self.name, "OK", elapsed)
110
+ else:
111
+ # Still write output for debugging
112
+ output_file = ctx.workdir / "logs" / "60_cprofile.txt"
113
+ output_file.parent.mkdir(parents=True, exist_ok=True)
114
+
115
+ with output_file.open("w") as f:
116
+ f.write("=" * 70 + "\n")
117
+ f.write("CPU PROFILING FAILED\n")
118
+ f.write("=" * 70 + "\n\n")
119
+ f.write("STDOUT:\n")
120
+ f.write(result.stdout)
121
+ f.write("\n\nSTDERR:\n")
122
+ f.write(result.stderr)
123
+
124
+ elapsed = int((time.time() - start) * 1000)
125
+ return StepResult(self.name, "FAIL", elapsed, "stats file not created")
126
+
127
+ except subprocess.TimeoutExpired:
128
+ elapsed = int((time.time() - start) * 1000)
129
+ return StepResult(self.name, "FAIL", elapsed, "timeout")
130
+ except Exception as e:
131
+ elapsed = int((time.time() - start) * 1000)
132
+ return StepResult(self.name, "FAIL", elapsed, str(e))
133
+
134
+ def _generate_report(self, stats_file: Path, workdir: Path) -> None:
135
+ """Generate top 50 slowest functions report"""
136
+ import pstats
137
+
138
+ output_file = workdir / "logs" / "60_cprofile.txt"
139
+ output_file.parent.mkdir(parents=True, exist_ok=True)
140
+
141
+ with output_file.open("w") as f:
142
+ f.write("=" * 70 + "\n")
143
+ f.write("TOP 50 SLOWEST FUNCTIONS (CPU PROFILING)\n")
144
+ f.write("=" * 70 + "\n\n")
145
+
146
+ # Load stats
147
+ stats = pstats.Stats(str(stats_file), stream=f)
148
+
149
+ # Remove directory paths for cleaner output
150
+ stats.strip_dirs()
151
+
152
+ # Sort by cumulative time and print top 50
153
+ f.write("Sorted by cumulative time:\n")
154
+ f.write("-" * 70 + "\n")
155
+ stats.sort_stats("cumulative")
156
+ stats.print_stats(50)
157
+
158
+ f.write("\n" + "=" * 70 + "\n")
159
+ f.write("Sorted by total time (time spent in function itself):\n")
160
+ f.write("-" * 70 + "\n")
161
+ stats.sort_stats("time")
162
+ stats.print_stats(50)
163
+
164
+ f.write("\n" + "=" * 70 + "\n")
165
+ f.write("Full binary stats saved to: meta/60_cprofile.stats\n")
166
+ f.write("Analyze with: python -m pstats meta/60_cprofile.stats\n")
@@ -0,0 +1,136 @@
1
+ from __future__ import annotations
2
+
3
+ import subprocess # nosec B404 - Required for tool execution, paths validated
4
+ import time
5
+ from dataclasses import dataclass
6
+
7
+ from .base import StepResult
8
+ from ..context import BundleContext
9
+
10
+
11
+ @dataclass
12
+ class DependencySizesStep:
13
+ name: str = "dependency sizes"
14
+ outfile: str = "meta/33_dependency_sizes.txt"
15
+ top_n: int = 50 # Show top N largest packages
16
+
17
+ def run(self, ctx: BundleContext) -> StepResult:
18
+ start = time.time()
19
+ out = ctx.workdir / self.outfile
20
+ out.parent.mkdir(parents=True, exist_ok=True)
21
+
22
+ python = ctx.tools.python
23
+ if not python:
24
+ out.write_text("python not found; skipping\n", encoding="utf-8")
25
+ return StepResult(self.name, "SKIP", 0, "missing python")
26
+
27
+ try:
28
+ # Get list of installed packages
29
+ list_result = subprocess.run( # nosec B603
30
+ [python, "-m", "pip", "list", "--format=json"],
31
+ cwd=ctx.root,
32
+ stdout=subprocess.PIPE,
33
+ stderr=subprocess.PIPE,
34
+ text=True,
35
+ timeout=30,
36
+ )
37
+
38
+ if list_result.returncode != 0:
39
+ out.write_text(
40
+ f"pip list failed: {list_result.stderr}\n", encoding="utf-8"
41
+ )
42
+ return StepResult(self.name, "FAIL", 0, "pip list failed")
43
+
44
+ import json
45
+
46
+ packages = json.loads(list_result.stdout)
47
+
48
+ # Get size for each package
49
+ package_sizes = []
50
+ for pkg in packages:
51
+ pkg_name = pkg["name"]
52
+ try:
53
+ show_result = subprocess.run( # nosec B603
54
+ [python, "-m", "pip", "show", pkg_name],
55
+ stdout=subprocess.PIPE,
56
+ stderr=subprocess.PIPE,
57
+ text=True,
58
+ timeout=5,
59
+ )
60
+
61
+ if show_result.returncode == 0:
62
+ # Parse Location from pip show output
63
+ location = None
64
+ for line in show_result.stdout.splitlines():
65
+ if line.startswith("Location:"):
66
+ location = line.split(":", 1)[1].strip()
67
+ break
68
+
69
+ if location:
70
+ # Calculate directory size
71
+ from pathlib import Path
72
+
73
+ pkg_path = Path(location) / pkg_name.replace("-", "_")
74
+ if not pkg_path.exists():
75
+ pkg_path = Path(location) / pkg_name
76
+
77
+ if pkg_path.exists() and pkg_path.is_dir():
78
+ size = sum(
79
+ f.stat().st_size
80
+ for f in pkg_path.rglob("*")
81
+ if f.is_file()
82
+ )
83
+ package_sizes.append((pkg_name, pkg["version"], size))
84
+ except Exception:
85
+ # Skip packages that fail
86
+ continue
87
+
88
+ # Sort by size (descending)
89
+ package_sizes.sort(key=lambda x: x[2], reverse=True)
90
+
91
+ # Write results
92
+ with out.open("w", encoding="utf-8") as f:
93
+ f.write("=" * 70 + "\n")
94
+ f.write(
95
+ f"TOP {min(self.top_n, len(package_sizes))} LARGEST DEPENDENCIES\n"
96
+ )
97
+ f.write("=" * 70 + "\n\n")
98
+ f.write(f"Total packages analyzed: {len(packages)}\n")
99
+ f.write(f"Packages with size data: {len(package_sizes)}\n\n")
100
+
101
+ if package_sizes:
102
+ # Calculate total size
103
+ total_size = sum(size for _, _, size in package_sizes)
104
+ f.write(f"Total size: {self._format_size(total_size)}\n\n")
105
+
106
+ f.write(f"{'Package':<40} {'Version':<15} {'Size':>15}\n")
107
+ f.write("-" * 70 + "\n")
108
+
109
+ for pkg_name, version, size in package_sizes[: self.top_n]:
110
+ f.write(
111
+ f"{pkg_name:<40} {version:<15} {self._format_size(size):>15}\n"
112
+ )
113
+ else:
114
+ f.write("No package size data available.\n")
115
+
116
+ elapsed = int((time.time() - start) * 1000)
117
+ return StepResult(self.name, "OK", elapsed, "")
118
+
119
+ except subprocess.TimeoutExpired:
120
+ out.write_text("Analysis timed out\n", encoding="utf-8")
121
+ return StepResult(
122
+ self.name, "FAIL", int((time.time() - start) * 1000), "timeout"
123
+ )
124
+ except Exception as e:
125
+ out.write_text(f"Error: {e}\n", encoding="utf-8")
126
+ return StepResult(
127
+ self.name, "FAIL", int((time.time() - start) * 1000), str(e)
128
+ )
129
+
130
+ def _format_size(self, size_bytes: float) -> str:
131
+ """Format size in human-readable format."""
132
+ for unit in ["B", "KB", "MB", "GB"]:
133
+ if size_bytes < 1024.0:
134
+ return f"{size_bytes:.1f} {unit}"
135
+ size_bytes /= 1024.0
136
+ return f"{size_bytes:.1f} TB"