gwc-pybundle 1.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwc-pybundle might be problematic. Click here for more details.
- gwc_pybundle-1.4.5.dist-info/METADATA +876 -0
- gwc_pybundle-1.4.5.dist-info/RECORD +55 -0
- gwc_pybundle-1.4.5.dist-info/WHEEL +5 -0
- gwc_pybundle-1.4.5.dist-info/entry_points.txt +2 -0
- gwc_pybundle-1.4.5.dist-info/licenses/LICENSE.md +25 -0
- gwc_pybundle-1.4.5.dist-info/top_level.txt +1 -0
- pybundle/__init__.py +0 -0
- pybundle/__main__.py +4 -0
- pybundle/cli.py +365 -0
- pybundle/context.py +362 -0
- pybundle/doctor.py +148 -0
- pybundle/filters.py +178 -0
- pybundle/manifest.py +77 -0
- pybundle/packaging.py +45 -0
- pybundle/policy.py +132 -0
- pybundle/profiles.py +340 -0
- pybundle/roadmap_model.py +42 -0
- pybundle/roadmap_scan.py +295 -0
- pybundle/root_detect.py +14 -0
- pybundle/runner.py +163 -0
- pybundle/steps/__init__.py +26 -0
- pybundle/steps/bandit.py +72 -0
- pybundle/steps/base.py +20 -0
- pybundle/steps/compileall.py +76 -0
- pybundle/steps/context_expand.py +272 -0
- pybundle/steps/copy_pack.py +293 -0
- pybundle/steps/coverage.py +101 -0
- pybundle/steps/cprofile_step.py +155 -0
- pybundle/steps/dependency_sizes.py +120 -0
- pybundle/steps/duplication.py +94 -0
- pybundle/steps/error_refs.py +204 -0
- pybundle/steps/handoff_md.py +167 -0
- pybundle/steps/import_time.py +165 -0
- pybundle/steps/interrogate.py +84 -0
- pybundle/steps/license_scan.py +96 -0
- pybundle/steps/line_profiler.py +108 -0
- pybundle/steps/memory_profile.py +173 -0
- pybundle/steps/mutation_testing.py +136 -0
- pybundle/steps/mypy.py +60 -0
- pybundle/steps/pip_audit.py +45 -0
- pybundle/steps/pipdeptree.py +61 -0
- pybundle/steps/pylance.py +562 -0
- pybundle/steps/pytest.py +66 -0
- pybundle/steps/radon.py +121 -0
- pybundle/steps/repro_md.py +161 -0
- pybundle/steps/rg_scans.py +78 -0
- pybundle/steps/roadmap.py +153 -0
- pybundle/steps/ruff.py +111 -0
- pybundle/steps/shell.py +74 -0
- pybundle/steps/slow_tests.py +170 -0
- pybundle/steps/test_flakiness.py +172 -0
- pybundle/steps/tree.py +116 -0
- pybundle/steps/unused_deps.py +112 -0
- pybundle/steps/vulture.py +83 -0
- pybundle/tools.py +63 -0
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Slow test identification - Milestone 4 (v1.4.1)
|
|
3
|
+
"""
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import subprocess
|
|
7
|
+
import time
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from .base import StepResult
|
|
12
|
+
from ..context import BundleContext
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class SlowTestsStep:
|
|
17
|
+
"""
|
|
18
|
+
Identify slow tests by parsing pytest duration output.
|
|
19
|
+
|
|
20
|
+
Outputs:
|
|
21
|
+
- logs/71_slow_tests.txt: Ranked list of slowest tests
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
name: str = "slow_tests"
|
|
25
|
+
|
|
26
|
+
def run(self, ctx: BundleContext) -> StepResult:
|
|
27
|
+
start = time.time()
|
|
28
|
+
|
|
29
|
+
if not ctx.tools.pytest:
|
|
30
|
+
return StepResult(self.name, "SKIP", 0, "pytest not found")
|
|
31
|
+
|
|
32
|
+
tests_dir = ctx.root / "tests"
|
|
33
|
+
if not tests_dir.is_dir():
|
|
34
|
+
return StepResult(self.name, "SKIP", 0, "no tests/ directory")
|
|
35
|
+
|
|
36
|
+
threshold = ctx.options.slow_test_threshold
|
|
37
|
+
ctx.emit(f" Identifying tests slower than {threshold}s...")
|
|
38
|
+
|
|
39
|
+
output_file = ctx.workdir / "logs" / "71_slow_tests.txt"
|
|
40
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
# Run pytest with duration reporting
|
|
44
|
+
result = subprocess.run(
|
|
45
|
+
[
|
|
46
|
+
str(ctx.tools.pytest),
|
|
47
|
+
"-v",
|
|
48
|
+
"--durations=0", # Show all durations
|
|
49
|
+
"--tb=no" # No traceback to keep output clean
|
|
50
|
+
],
|
|
51
|
+
cwd=ctx.root,
|
|
52
|
+
capture_output=True,
|
|
53
|
+
text=True,
|
|
54
|
+
timeout=180 # 3 minute timeout
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# Parse durations from output
|
|
58
|
+
slow_tests = self._parse_durations(result.stdout, threshold)
|
|
59
|
+
|
|
60
|
+
# Generate report
|
|
61
|
+
with output_file.open("w") as f:
|
|
62
|
+
f.write("=" * 70 + "\n")
|
|
63
|
+
f.write(f"SLOW TEST IDENTIFICATION (threshold: {threshold}s)\n")
|
|
64
|
+
f.write("=" * 70 + "\n\n")
|
|
65
|
+
|
|
66
|
+
if slow_tests:
|
|
67
|
+
f.write(f"Found {len(slow_tests)} test(s) exceeding {threshold}s:\n\n")
|
|
68
|
+
|
|
69
|
+
# Sort by duration (descending)
|
|
70
|
+
slow_tests.sort(key=lambda x: x[1], reverse=True)
|
|
71
|
+
|
|
72
|
+
f.write(f"{'Duration (s)':<15} {'Test'}\n")
|
|
73
|
+
f.write("-" * 70 + "\n")
|
|
74
|
+
|
|
75
|
+
for test_name, duration in slow_tests:
|
|
76
|
+
f.write(f"{duration:>13.2f} {test_name}\n")
|
|
77
|
+
|
|
78
|
+
f.write("\n" + "=" * 70 + "\n")
|
|
79
|
+
f.write("STATISTICS:\n")
|
|
80
|
+
f.write("-" * 70 + "\n")
|
|
81
|
+
total_time = sum(d for _, d in slow_tests)
|
|
82
|
+
avg_time = total_time / len(slow_tests)
|
|
83
|
+
f.write(f"Total slow test time: {total_time:.2f}s\n")
|
|
84
|
+
f.write(f"Average slow test time: {avg_time:.2f}s\n")
|
|
85
|
+
f.write(f"Slowest test: {slow_tests[0][1]:.2f}s ({slow_tests[0][0]})\n")
|
|
86
|
+
|
|
87
|
+
f.write("\n" + "=" * 70 + "\n")
|
|
88
|
+
f.write("RECOMMENDATIONS:\n")
|
|
89
|
+
f.write("- Profile slow tests to identify bottlenecks\n")
|
|
90
|
+
f.write("- Consider using pytest fixtures to reduce setup time\n")
|
|
91
|
+
f.write("- Mock external dependencies (DB, API calls, file I/O)\n")
|
|
92
|
+
f.write("- Use pytest-xdist for parallel test execution\n")
|
|
93
|
+
else:
|
|
94
|
+
f.write(f"✅ No tests exceed {threshold}s threshold!\n\n")
|
|
95
|
+
|
|
96
|
+
# Still show fastest tests for context
|
|
97
|
+
all_tests = self._parse_all_durations(result.stdout)
|
|
98
|
+
if all_tests:
|
|
99
|
+
all_tests.sort(key=lambda x: x[1], reverse=True)
|
|
100
|
+
f.write("Top 10 longest tests (all under threshold):\n\n")
|
|
101
|
+
f.write(f"{'Duration (s)':<15} {'Test'}\n")
|
|
102
|
+
f.write("-" * 70 + "\n")
|
|
103
|
+
for test_name, duration in all_tests[:10]:
|
|
104
|
+
f.write(f"{duration:>13.2f} {test_name}\n")
|
|
105
|
+
|
|
106
|
+
# Append raw duration output for reference
|
|
107
|
+
f.write("\n" + "=" * 70 + "\n")
|
|
108
|
+
f.write("RAW PYTEST DURATION OUTPUT:\n")
|
|
109
|
+
f.write("-" * 70 + "\n")
|
|
110
|
+
# Find and include the duration section
|
|
111
|
+
in_duration_section = False
|
|
112
|
+
for line in result.stdout.splitlines():
|
|
113
|
+
if "slowest durations" in line.lower() or "=== " in line:
|
|
114
|
+
in_duration_section = True
|
|
115
|
+
if in_duration_section:
|
|
116
|
+
f.write(line + "\n")
|
|
117
|
+
|
|
118
|
+
elapsed = int((time.time() - start) * 1000)
|
|
119
|
+
|
|
120
|
+
if slow_tests:
|
|
121
|
+
return StepResult(self.name, "OK", elapsed, f"{len(slow_tests)} slow tests")
|
|
122
|
+
else:
|
|
123
|
+
return StepResult(self.name, "OK", elapsed)
|
|
124
|
+
|
|
125
|
+
except subprocess.TimeoutExpired:
|
|
126
|
+
elapsed = int((time.time() - start) * 1000)
|
|
127
|
+
return StepResult(self.name, "FAIL", elapsed, "timeout")
|
|
128
|
+
except Exception as e:
|
|
129
|
+
elapsed = int((time.time() - start) * 1000)
|
|
130
|
+
return StepResult(self.name, "FAIL", elapsed, str(e))
|
|
131
|
+
|
|
132
|
+
def _parse_durations(self, output: str, threshold: float) -> list[tuple[str, float]]:
|
|
133
|
+
"""Parse pytest --durations output for tests exceeding threshold"""
|
|
134
|
+
slow_tests = []
|
|
135
|
+
|
|
136
|
+
# Look for duration lines like: "0.52s call test_file.py::test_name"
|
|
137
|
+
for line in output.splitlines():
|
|
138
|
+
if "s call" in line or "s setup" in line or "s teardown" in line:
|
|
139
|
+
parts = line.split()
|
|
140
|
+
if len(parts) >= 3:
|
|
141
|
+
try:
|
|
142
|
+
duration_str = parts[0].rstrip('s')
|
|
143
|
+
duration = float(duration_str)
|
|
144
|
+
|
|
145
|
+
if duration >= threshold:
|
|
146
|
+
# Extract test name (usually the last part)
|
|
147
|
+
test_name = parts[-1]
|
|
148
|
+
slow_tests.append((test_name, duration))
|
|
149
|
+
except (ValueError, IndexError):
|
|
150
|
+
continue
|
|
151
|
+
|
|
152
|
+
return slow_tests
|
|
153
|
+
|
|
154
|
+
def _parse_all_durations(self, output: str) -> list[tuple[str, float]]:
|
|
155
|
+
"""Parse all test durations"""
|
|
156
|
+
all_tests = []
|
|
157
|
+
|
|
158
|
+
for line in output.splitlines():
|
|
159
|
+
if "s call" in line:
|
|
160
|
+
parts = line.split()
|
|
161
|
+
if len(parts) >= 3:
|
|
162
|
+
try:
|
|
163
|
+
duration_str = parts[0].rstrip('s')
|
|
164
|
+
duration = float(duration_str)
|
|
165
|
+
test_name = parts[-1]
|
|
166
|
+
all_tests.append((test_name, duration))
|
|
167
|
+
except (ValueError, IndexError):
|
|
168
|
+
continue
|
|
169
|
+
|
|
170
|
+
return all_tests
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Test flakiness detection - Milestone 4 (v1.4.1)
|
|
3
|
+
"""
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import subprocess
|
|
7
|
+
import time
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from .base import StepResult
|
|
12
|
+
from ..context import BundleContext
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class TestFlakinessStep:
|
|
17
|
+
"""
|
|
18
|
+
Run tests multiple times to detect non-deterministic failures (flaky tests).
|
|
19
|
+
|
|
20
|
+
Outputs:
|
|
21
|
+
- logs/70_test_flakiness.txt: Report of flaky tests with pass/fail patterns
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
name: str = "test_flakiness"
|
|
25
|
+
|
|
26
|
+
def run(self, ctx: BundleContext) -> StepResult:
|
|
27
|
+
start = time.time()
|
|
28
|
+
|
|
29
|
+
if not ctx.tools.pytest:
|
|
30
|
+
return StepResult(self.name, "SKIP", 0, "pytest not found")
|
|
31
|
+
|
|
32
|
+
tests_dir = ctx.root / "tests"
|
|
33
|
+
if not tests_dir.is_dir():
|
|
34
|
+
return StepResult(self.name, "SKIP", 0, "no tests/ directory")
|
|
35
|
+
|
|
36
|
+
runs = ctx.options.test_flakiness_runs
|
|
37
|
+
ctx.emit(f" Running tests {runs}x to detect flakiness...")
|
|
38
|
+
|
|
39
|
+
output_file = ctx.workdir / "logs" / "70_test_flakiness.txt"
|
|
40
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
# Run tests multiple times and collect results
|
|
44
|
+
results = []
|
|
45
|
+
test_outcomes = {} # test_name -> [pass/fail/error]
|
|
46
|
+
|
|
47
|
+
for i in range(runs):
|
|
48
|
+
ctx.emit(f" Run {i+1}/{runs}...")
|
|
49
|
+
result = subprocess.run(
|
|
50
|
+
[str(ctx.tools.pytest), "-v", "--tb=no"],
|
|
51
|
+
cwd=ctx.root,
|
|
52
|
+
capture_output=True,
|
|
53
|
+
text=True,
|
|
54
|
+
timeout=180 # 3 minute timeout per run
|
|
55
|
+
)
|
|
56
|
+
results.append(result)
|
|
57
|
+
|
|
58
|
+
# Parse test results
|
|
59
|
+
self._parse_test_outcomes(result.stdout, test_outcomes, i)
|
|
60
|
+
|
|
61
|
+
# Analyze for flakiness
|
|
62
|
+
flaky_tests = self._identify_flaky_tests(test_outcomes)
|
|
63
|
+
|
|
64
|
+
# Generate report
|
|
65
|
+
with output_file.open("w") as f:
|
|
66
|
+
f.write("=" * 70 + "\n")
|
|
67
|
+
f.write(f"TEST FLAKINESS DETECTION ({runs} runs)\n")
|
|
68
|
+
f.write("=" * 70 + "\n\n")
|
|
69
|
+
|
|
70
|
+
if not test_outcomes:
|
|
71
|
+
f.write("No test results collected.\n\n")
|
|
72
|
+
for i, result in enumerate(results):
|
|
73
|
+
f.write(f"Run {i+1} output:\n")
|
|
74
|
+
f.write(result.stdout[:500])
|
|
75
|
+
f.write("\n\n")
|
|
76
|
+
else:
|
|
77
|
+
total_tests = len(test_outcomes)
|
|
78
|
+
f.write(f"Total tests analyzed: {total_tests}\n")
|
|
79
|
+
f.write(f"Flaky tests detected: {len(flaky_tests)}\n\n")
|
|
80
|
+
|
|
81
|
+
if flaky_tests:
|
|
82
|
+
f.write("=" * 70 + "\n")
|
|
83
|
+
f.write("FLAKY TESTS (non-deterministic results):\n")
|
|
84
|
+
f.write("=" * 70 + "\n\n")
|
|
85
|
+
|
|
86
|
+
for test_name, outcomes in flaky_tests.items():
|
|
87
|
+
pattern = " -> ".join(outcomes)
|
|
88
|
+
f.write(f"⚠️ {test_name}\n")
|
|
89
|
+
f.write(f" Pattern: {pattern}\n\n")
|
|
90
|
+
else:
|
|
91
|
+
f.write("✅ No flaky tests detected - all tests deterministic!\n\n")
|
|
92
|
+
|
|
93
|
+
# Summary of all tests
|
|
94
|
+
f.write("=" * 70 + "\n")
|
|
95
|
+
f.write("ALL TESTS SUMMARY:\n")
|
|
96
|
+
f.write("=" * 70 + "\n\n")
|
|
97
|
+
|
|
98
|
+
stable_pass = []
|
|
99
|
+
stable_fail = []
|
|
100
|
+
flaky = []
|
|
101
|
+
|
|
102
|
+
for test_name, outcomes in test_outcomes.items():
|
|
103
|
+
unique_outcomes = set(outcomes)
|
|
104
|
+
if len(unique_outcomes) == 1:
|
|
105
|
+
if "PASSED" in unique_outcomes:
|
|
106
|
+
stable_pass.append(test_name)
|
|
107
|
+
else:
|
|
108
|
+
stable_fail.append(test_name)
|
|
109
|
+
else:
|
|
110
|
+
flaky.append(test_name)
|
|
111
|
+
|
|
112
|
+
f.write(f"Stable passing: {len(stable_pass)}\n")
|
|
113
|
+
f.write(f"Stable failing: {len(stable_fail)}\n")
|
|
114
|
+
f.write(f"Flaky: {len(flaky)}\n\n")
|
|
115
|
+
|
|
116
|
+
if stable_fail:
|
|
117
|
+
f.write("Consistently failing tests:\n")
|
|
118
|
+
for test in stable_fail[:20]: # Limit to 20
|
|
119
|
+
f.write(f" - {test}\n")
|
|
120
|
+
if len(stable_fail) > 20:
|
|
121
|
+
f.write(f" ... and {len(stable_fail) - 20} more\n")
|
|
122
|
+
f.write("\n")
|
|
123
|
+
|
|
124
|
+
f.write("=" * 70 + "\n")
|
|
125
|
+
f.write("RECOMMENDATIONS:\n")
|
|
126
|
+
f.write("- Fix flaky tests by removing non-deterministic behavior\n")
|
|
127
|
+
f.write("- Common causes: timing issues, random data, external dependencies\n")
|
|
128
|
+
f.write("- Use pytest-randomly to test with different orderings\n")
|
|
129
|
+
|
|
130
|
+
elapsed = int((time.time() - start) * 1000)
|
|
131
|
+
|
|
132
|
+
if flaky_tests:
|
|
133
|
+
return StepResult(self.name, "OK", elapsed, f"{len(flaky_tests)} flaky tests")
|
|
134
|
+
else:
|
|
135
|
+
return StepResult(self.name, "OK", elapsed)
|
|
136
|
+
|
|
137
|
+
except subprocess.TimeoutExpired:
|
|
138
|
+
elapsed = int((time.time() - start) * 1000)
|
|
139
|
+
return StepResult(self.name, "FAIL", elapsed, "timeout")
|
|
140
|
+
except Exception as e:
|
|
141
|
+
elapsed = int((time.time() - start) * 1000)
|
|
142
|
+
return StepResult(self.name, "FAIL", elapsed, str(e))
|
|
143
|
+
|
|
144
|
+
def _parse_test_outcomes(self, output: str, test_outcomes: dict, run_num: int) -> None:
|
|
145
|
+
"""Parse pytest -v output to extract test results"""
|
|
146
|
+
for line in output.splitlines():
|
|
147
|
+
# Look for pytest verbose output: "test_file.py::test_name PASSED"
|
|
148
|
+
if "::" in line and any(status in line for status in ["PASSED", "FAILED", "ERROR", "SKIPPED"]):
|
|
149
|
+
parts = line.split()
|
|
150
|
+
if len(parts) >= 2:
|
|
151
|
+
test_name = parts[0]
|
|
152
|
+
# Find status
|
|
153
|
+
status = None
|
|
154
|
+
for s in ["PASSED", "FAILED", "ERROR", "SKIPPED"]:
|
|
155
|
+
if s in line:
|
|
156
|
+
status = s
|
|
157
|
+
break
|
|
158
|
+
|
|
159
|
+
if status:
|
|
160
|
+
if test_name not in test_outcomes:
|
|
161
|
+
test_outcomes[test_name] = []
|
|
162
|
+
test_outcomes[test_name].append(status)
|
|
163
|
+
|
|
164
|
+
def _identify_flaky_tests(self, test_outcomes: dict) -> dict:
|
|
165
|
+
"""Identify tests with inconsistent results across runs"""
|
|
166
|
+
flaky = {}
|
|
167
|
+
for test_name, outcomes in test_outcomes.items():
|
|
168
|
+
unique_outcomes = set(outcomes)
|
|
169
|
+
# Flaky if not all the same outcome
|
|
170
|
+
if len(unique_outcomes) > 1:
|
|
171
|
+
flaky[test_name] = outcomes
|
|
172
|
+
return flaky
|
pybundle/steps/tree.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import time
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from .base import StepResult
|
|
9
|
+
from pybundle.context import BundleContext
|
|
10
|
+
from pybundle.policy import AIContextPolicy, PathFilter
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class TreeStep:
|
|
15
|
+
name: str = "tree (filtered)"
|
|
16
|
+
max_depth: int = 4
|
|
17
|
+
excludes: list[str] | None = None
|
|
18
|
+
policy: AIContextPolicy | None = None
|
|
19
|
+
|
|
20
|
+
def run(self, ctx: BundleContext) -> StepResult:
|
|
21
|
+
start = time.time()
|
|
22
|
+
policy = self.policy or AIContextPolicy()
|
|
23
|
+
|
|
24
|
+
# allow overrides
|
|
25
|
+
exclude_dirs = set(self.excludes) if self.excludes else set(policy.exclude_dirs)
|
|
26
|
+
filt = PathFilter(
|
|
27
|
+
exclude_dirs=exclude_dirs,
|
|
28
|
+
exclude_patterns=set(policy.exclude_patterns),
|
|
29
|
+
exclude_file_exts=set(policy.exclude_file_exts),
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
out = ctx.metadir / "10_tree.txt"
|
|
33
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
34
|
+
|
|
35
|
+
root = ctx.root
|
|
36
|
+
lines: list[str] = []
|
|
37
|
+
|
|
38
|
+
for dirpath, dirnames, filenames in os.walk(root):
|
|
39
|
+
dp = Path(dirpath)
|
|
40
|
+
rel_dp = dp.relative_to(root)
|
|
41
|
+
depth = 0 if rel_dp == Path(".") else len(rel_dp.parts)
|
|
42
|
+
|
|
43
|
+
if depth > self.max_depth:
|
|
44
|
+
dirnames[:] = []
|
|
45
|
+
continue
|
|
46
|
+
|
|
47
|
+
# prune dirs (name + venv-structure)
|
|
48
|
+
kept = []
|
|
49
|
+
for d in dirnames:
|
|
50
|
+
if filt.should_prune_dir(dp, d):
|
|
51
|
+
continue
|
|
52
|
+
kept.append(d)
|
|
53
|
+
dirnames[:] = kept
|
|
54
|
+
|
|
55
|
+
for fn in filenames:
|
|
56
|
+
p = dp / fn
|
|
57
|
+
if not filt.should_include_file(root, p):
|
|
58
|
+
continue
|
|
59
|
+
lines.append(str(p.relative_to(root)))
|
|
60
|
+
|
|
61
|
+
lines.sort()
|
|
62
|
+
out.write_text("\n".join(lines) + ("\n" if lines else ""), encoding="utf-8")
|
|
63
|
+
dur = int(time.time() - start)
|
|
64
|
+
return StepResult(self.name, "PASS", dur, "python-walk")
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclass
|
|
68
|
+
class LargestFilesStep:
|
|
69
|
+
name: str = "largest files"
|
|
70
|
+
limit: int = 80
|
|
71
|
+
excludes: list[str] | None = None
|
|
72
|
+
policy: AIContextPolicy | None = None
|
|
73
|
+
|
|
74
|
+
def run(self, ctx: BundleContext) -> StepResult:
|
|
75
|
+
start = time.time()
|
|
76
|
+
policy = self.policy or AIContextPolicy()
|
|
77
|
+
|
|
78
|
+
exclude_dirs = set(self.excludes) if self.excludes else set(policy.exclude_dirs)
|
|
79
|
+
filt = PathFilter(
|
|
80
|
+
exclude_dirs=exclude_dirs,
|
|
81
|
+
exclude_patterns=set(policy.exclude_patterns),
|
|
82
|
+
exclude_file_exts=set(policy.exclude_file_exts),
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
out = ctx.metadir / "11_largest_files.txt"
|
|
86
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
87
|
+
|
|
88
|
+
files: list[tuple[int, str]] = []
|
|
89
|
+
root = ctx.root
|
|
90
|
+
|
|
91
|
+
for dirpath, dirnames, filenames in os.walk(root):
|
|
92
|
+
dp = Path(dirpath)
|
|
93
|
+
|
|
94
|
+
kept = []
|
|
95
|
+
for d in dirnames:
|
|
96
|
+
if filt.should_prune_dir(dp, d):
|
|
97
|
+
continue
|
|
98
|
+
kept.append(d)
|
|
99
|
+
dirnames[:] = kept
|
|
100
|
+
|
|
101
|
+
for fn in filenames:
|
|
102
|
+
p = dp / fn
|
|
103
|
+
if not filt.should_include_file(root, p):
|
|
104
|
+
continue
|
|
105
|
+
try:
|
|
106
|
+
size = p.stat().st_size
|
|
107
|
+
except OSError:
|
|
108
|
+
continue
|
|
109
|
+
files.append((size, str(p.relative_to(root))))
|
|
110
|
+
|
|
111
|
+
files.sort(key=lambda x: x[0], reverse=True)
|
|
112
|
+
lines = [f"{size}\t{path}" for size, path in files[: self.limit]]
|
|
113
|
+
out.write_text("\n".join(lines) + ("\n" if lines else ""), encoding="utf-8")
|
|
114
|
+
|
|
115
|
+
dur = int(time.time() - start)
|
|
116
|
+
return StepResult(self.name, "PASS", dur, f"count={len(files)}")
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import subprocess # nosec B404 - Required for tool execution, paths validated
|
|
5
|
+
import time
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from .base import StepResult
|
|
10
|
+
from ..context import BundleContext
|
|
11
|
+
from ..tools import which
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class UnusedDependenciesStep:
|
|
16
|
+
name: str = "unused dependencies"
|
|
17
|
+
outfile: str = "meta/31_unused_packages.txt"
|
|
18
|
+
|
|
19
|
+
def run(self, ctx: BundleContext) -> StepResult:
|
|
20
|
+
start = time.time()
|
|
21
|
+
out = ctx.workdir / self.outfile
|
|
22
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
23
|
+
|
|
24
|
+
python = ctx.tools.python
|
|
25
|
+
if not python:
|
|
26
|
+
out.write_text("python not found; skipping\n", encoding="utf-8")
|
|
27
|
+
return StepResult(self.name, "SKIP", 0, "missing python")
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
# Get all installed packages
|
|
31
|
+
pip_freeze_result = subprocess.run( # nosec B603
|
|
32
|
+
[python, "-m", "pip", "freeze"],
|
|
33
|
+
cwd=ctx.root,
|
|
34
|
+
stdout=subprocess.PIPE,
|
|
35
|
+
stderr=subprocess.PIPE,
|
|
36
|
+
text=True,
|
|
37
|
+
timeout=30,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
if pip_freeze_result.returncode != 0:
|
|
41
|
+
out.write_text(f"pip freeze failed: {pip_freeze_result.stderr}\n", encoding="utf-8")
|
|
42
|
+
return StepResult(self.name, "FAIL", 0, "pip freeze failed")
|
|
43
|
+
|
|
44
|
+
# Parse installed packages (normalize names)
|
|
45
|
+
installed_packages = set()
|
|
46
|
+
for line in pip_freeze_result.stdout.splitlines():
|
|
47
|
+
if line and not line.startswith("-") and "==" in line:
|
|
48
|
+
pkg_name = line.split("==")[0].strip().lower().replace("_", "-")
|
|
49
|
+
installed_packages.add(pkg_name)
|
|
50
|
+
|
|
51
|
+
# Get imported modules from source code
|
|
52
|
+
imported_modules = self._get_imported_modules(ctx.root)
|
|
53
|
+
|
|
54
|
+
# Find unused packages (installed but not imported)
|
|
55
|
+
# Note: This is a heuristic - some packages are used indirectly
|
|
56
|
+
unused = sorted(installed_packages - imported_modules)
|
|
57
|
+
|
|
58
|
+
# Write results
|
|
59
|
+
with out.open("w", encoding="utf-8") as f:
|
|
60
|
+
f.write("=" * 70 + "\n")
|
|
61
|
+
f.write("UNUSED DEPENDENCIES ANALYSIS\n")
|
|
62
|
+
f.write("=" * 70 + "\n\n")
|
|
63
|
+
f.write(f"Total installed packages: {len(installed_packages)}\n")
|
|
64
|
+
f.write(f"Total imported modules: {len(imported_modules)}\n")
|
|
65
|
+
f.write(f"Potentially unused packages: {len(unused)}\n\n")
|
|
66
|
+
|
|
67
|
+
if unused:
|
|
68
|
+
f.write("Packages installed but not directly imported:\n")
|
|
69
|
+
f.write("(Note: Some may be indirect dependencies or plugins)\n\n")
|
|
70
|
+
for pkg in unused:
|
|
71
|
+
f.write(f" - {pkg}\n")
|
|
72
|
+
else:
|
|
73
|
+
f.write("No obviously unused packages detected.\n")
|
|
74
|
+
|
|
75
|
+
elapsed = int((time.time() - start) * 1000)
|
|
76
|
+
return StepResult(self.name, "OK", elapsed, None)
|
|
77
|
+
|
|
78
|
+
except subprocess.TimeoutExpired:
|
|
79
|
+
out.write_text("Analysis timed out\n", encoding="utf-8")
|
|
80
|
+
return StepResult(self.name, "FAIL", int((time.time() - start) * 1000), "timeout")
|
|
81
|
+
except Exception as e:
|
|
82
|
+
out.write_text(f"Error: {e}\n", encoding="utf-8")
|
|
83
|
+
return StepResult(self.name, "FAIL", int((time.time() - start) * 1000), str(e))
|
|
84
|
+
|
|
85
|
+
def _get_imported_modules(self, root: Path) -> set[str]:
|
|
86
|
+
"""Extract top-level module names from import statements."""
|
|
87
|
+
imported = set()
|
|
88
|
+
|
|
89
|
+
for py_file in root.rglob("*.py"):
|
|
90
|
+
# Skip venv and common excluded directories
|
|
91
|
+
parts = set(py_file.parts)
|
|
92
|
+
if any(x in parts for x in [".venv", "venv", "__pycache__", "node_modules",
|
|
93
|
+
"dist", "build", "artifacts", ".git", ".tox"]):
|
|
94
|
+
continue
|
|
95
|
+
|
|
96
|
+
try:
|
|
97
|
+
content = py_file.read_text(encoding="utf-8", errors="ignore")
|
|
98
|
+
for line in content.splitlines():
|
|
99
|
+
line = line.strip()
|
|
100
|
+
# Match: import foo, from foo import bar
|
|
101
|
+
if line.startswith("import "):
|
|
102
|
+
module = line[7:].split()[0].split(".")[0].split(",")[0].strip()
|
|
103
|
+
imported.add(module.lower().replace("_", "-"))
|
|
104
|
+
elif line.startswith("from "):
|
|
105
|
+
parts = line.split()
|
|
106
|
+
if len(parts) >= 2:
|
|
107
|
+
module = parts[1].split(".")[0].strip()
|
|
108
|
+
imported.add(module.lower().replace("_", "-"))
|
|
109
|
+
except Exception:
|
|
110
|
+
continue
|
|
111
|
+
|
|
112
|
+
return imported
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import subprocess # nosec B404 - Required for tool execution, paths validated
|
|
4
|
+
import time
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from .base import StepResult
|
|
9
|
+
from ..context import BundleContext
|
|
10
|
+
from ..tools import which
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _repo_has_py_files(root: Path) -> bool:
|
|
14
|
+
"""Fast check if there are Python files to scan."""
|
|
15
|
+
for p in root.rglob("*.py"):
|
|
16
|
+
parts = set(p.parts)
|
|
17
|
+
if (
|
|
18
|
+
".venv" not in parts
|
|
19
|
+
and "__pycache__" not in parts
|
|
20
|
+
and "node_modules" not in parts
|
|
21
|
+
and "dist" not in parts
|
|
22
|
+
and "build" not in parts
|
|
23
|
+
and "artifacts" not in parts
|
|
24
|
+
):
|
|
25
|
+
return True
|
|
26
|
+
return False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class VultureStep:
|
|
31
|
+
name: str = "vulture"
|
|
32
|
+
target: str = "."
|
|
33
|
+
outfile: str = "logs/50_vulture.txt"
|
|
34
|
+
|
|
35
|
+
def run(self, ctx: BundleContext) -> StepResult:
|
|
36
|
+
start = time.time()
|
|
37
|
+
out = ctx.workdir / self.outfile
|
|
38
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
39
|
+
|
|
40
|
+
vulture = which("vulture")
|
|
41
|
+
if not vulture:
|
|
42
|
+
out.write_text(
|
|
43
|
+
"vulture not found; skipping (pip install vulture)\n", encoding="utf-8"
|
|
44
|
+
)
|
|
45
|
+
return StepResult(self.name, "SKIP", 0, "missing vulture")
|
|
46
|
+
|
|
47
|
+
if not _repo_has_py_files(ctx.root):
|
|
48
|
+
out.write_text("no .py files detected; skipping vulture\n", encoding="utf-8")
|
|
49
|
+
return StepResult(self.name, "SKIP", 0, "no python files")
|
|
50
|
+
|
|
51
|
+
target_path = ctx.root / self.target
|
|
52
|
+
cmd = [
|
|
53
|
+
vulture,
|
|
54
|
+
str(target_path),
|
|
55
|
+
"--exclude", "*venv*,*.venv*,.pybundle-venv,venv,env,.env,__pycache__",
|
|
56
|
+
"--min-confidence", "60", # Configurable confidence threshold
|
|
57
|
+
"--sort-by-size",
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
try:
|
|
61
|
+
result = subprocess.run( # nosec B603 - Using full path from which()
|
|
62
|
+
cmd,
|
|
63
|
+
cwd=ctx.root,
|
|
64
|
+
stdout=subprocess.PIPE,
|
|
65
|
+
stderr=subprocess.STDOUT,
|
|
66
|
+
text=True,
|
|
67
|
+
timeout=120,
|
|
68
|
+
)
|
|
69
|
+
out.write_text(result.stdout, encoding="utf-8")
|
|
70
|
+
elapsed = int((time.time() - start) * 1000)
|
|
71
|
+
|
|
72
|
+
# Vulture returns 0 if no dead code found, 1 if dead code found
|
|
73
|
+
# Both are "success" for our purposes
|
|
74
|
+
if result.returncode in (0, 1):
|
|
75
|
+
return StepResult(self.name, "OK", elapsed, None)
|
|
76
|
+
else:
|
|
77
|
+
return StepResult(self.name, "FAIL", elapsed, f"exit {result.returncode}")
|
|
78
|
+
except subprocess.TimeoutExpired:
|
|
79
|
+
out.write_text("vulture timed out after 120s\n", encoding="utf-8")
|
|
80
|
+
return StepResult(self.name, "FAIL", 120000, "timeout")
|
|
81
|
+
except Exception as e:
|
|
82
|
+
out.write_text(f"vulture error: {e}\n", encoding="utf-8")
|
|
83
|
+
return StepResult(self.name, "FAIL", 0, str(e))
|