gwc-pybundle 1.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwc-pybundle might be problematic. Click here for more details.
- gwc_pybundle-1.4.5.dist-info/METADATA +876 -0
- gwc_pybundle-1.4.5.dist-info/RECORD +55 -0
- gwc_pybundle-1.4.5.dist-info/WHEEL +5 -0
- gwc_pybundle-1.4.5.dist-info/entry_points.txt +2 -0
- gwc_pybundle-1.4.5.dist-info/licenses/LICENSE.md +25 -0
- gwc_pybundle-1.4.5.dist-info/top_level.txt +1 -0
- pybundle/__init__.py +0 -0
- pybundle/__main__.py +4 -0
- pybundle/cli.py +365 -0
- pybundle/context.py +362 -0
- pybundle/doctor.py +148 -0
- pybundle/filters.py +178 -0
- pybundle/manifest.py +77 -0
- pybundle/packaging.py +45 -0
- pybundle/policy.py +132 -0
- pybundle/profiles.py +340 -0
- pybundle/roadmap_model.py +42 -0
- pybundle/roadmap_scan.py +295 -0
- pybundle/root_detect.py +14 -0
- pybundle/runner.py +163 -0
- pybundle/steps/__init__.py +26 -0
- pybundle/steps/bandit.py +72 -0
- pybundle/steps/base.py +20 -0
- pybundle/steps/compileall.py +76 -0
- pybundle/steps/context_expand.py +272 -0
- pybundle/steps/copy_pack.py +293 -0
- pybundle/steps/coverage.py +101 -0
- pybundle/steps/cprofile_step.py +155 -0
- pybundle/steps/dependency_sizes.py +120 -0
- pybundle/steps/duplication.py +94 -0
- pybundle/steps/error_refs.py +204 -0
- pybundle/steps/handoff_md.py +167 -0
- pybundle/steps/import_time.py +165 -0
- pybundle/steps/interrogate.py +84 -0
- pybundle/steps/license_scan.py +96 -0
- pybundle/steps/line_profiler.py +108 -0
- pybundle/steps/memory_profile.py +173 -0
- pybundle/steps/mutation_testing.py +136 -0
- pybundle/steps/mypy.py +60 -0
- pybundle/steps/pip_audit.py +45 -0
- pybundle/steps/pipdeptree.py +61 -0
- pybundle/steps/pylance.py +562 -0
- pybundle/steps/pytest.py +66 -0
- pybundle/steps/radon.py +121 -0
- pybundle/steps/repro_md.py +161 -0
- pybundle/steps/rg_scans.py +78 -0
- pybundle/steps/roadmap.py +153 -0
- pybundle/steps/ruff.py +111 -0
- pybundle/steps/shell.py +74 -0
- pybundle/steps/slow_tests.py +170 -0
- pybundle/steps/test_flakiness.py +172 -0
- pybundle/steps/tree.py +116 -0
- pybundle/steps/unused_deps.py +112 -0
- pybundle/steps/vulture.py +83 -0
- pybundle/tools.py +63 -0
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CPU profiling with cProfile - Milestone 3 (v1.4.0)
|
|
3
|
+
"""
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import subprocess
|
|
7
|
+
import time
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from .base import StepResult
|
|
12
|
+
from ..context import BundleContext
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class CProfileStep:
|
|
17
|
+
"""
|
|
18
|
+
Run cProfile on the project entry point or test suite to identify CPU bottlenecks.
|
|
19
|
+
|
|
20
|
+
Outputs:
|
|
21
|
+
- logs/60_cprofile.txt: Top 50 slowest functions
|
|
22
|
+
- meta/60_cprofile.stats: Binary stats file for further analysis
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
name: str = "cprofile"
|
|
26
|
+
|
|
27
|
+
def run(self, ctx: BundleContext) -> StepResult:
|
|
28
|
+
start = time.time()
|
|
29
|
+
|
|
30
|
+
if ctx.options.no_profile:
|
|
31
|
+
return StepResult(self.name, "SKIP", 0, "profiling disabled")
|
|
32
|
+
|
|
33
|
+
# Determine default profiling target
|
|
34
|
+
entry_point = ctx.options.profile_entry_point
|
|
35
|
+
if not entry_point:
|
|
36
|
+
# Default: profile pytest if tests/ exists
|
|
37
|
+
tests_dir = ctx.root / "tests"
|
|
38
|
+
if not tests_dir.is_dir():
|
|
39
|
+
return StepResult(self.name, "SKIP", 0, "no tests/ and no entry point")
|
|
40
|
+
|
|
41
|
+
# Determine what to profile
|
|
42
|
+
if entry_point:
|
|
43
|
+
target_path = Path(entry_point)
|
|
44
|
+
if not target_path.is_absolute():
|
|
45
|
+
target_path = ctx.root / entry_point
|
|
46
|
+
|
|
47
|
+
if not target_path.exists():
|
|
48
|
+
return StepResult(self.name, "SKIP", 0, f"entry point not found: {entry_point}")
|
|
49
|
+
|
|
50
|
+
if target_path.is_file():
|
|
51
|
+
# Profile a specific script
|
|
52
|
+
cmd = [
|
|
53
|
+
str(ctx.tools.python),
|
|
54
|
+
"-m", "cProfile",
|
|
55
|
+
"-o", str(ctx.workdir / "meta" / "60_cprofile.stats"),
|
|
56
|
+
str(target_path)
|
|
57
|
+
]
|
|
58
|
+
desc = f"Profiling {target_path.name}"
|
|
59
|
+
else:
|
|
60
|
+
# Assume it's a directory, profile pytest
|
|
61
|
+
cmd = [
|
|
62
|
+
str(ctx.tools.python),
|
|
63
|
+
"-m", "cProfile",
|
|
64
|
+
"-o", str(ctx.workdir / "meta" / "60_cprofile.stats"),
|
|
65
|
+
"-m", "pytest",
|
|
66
|
+
str(target_path),
|
|
67
|
+
"-q"
|
|
68
|
+
]
|
|
69
|
+
desc = f"Profiling pytest in {target_path.name}/"
|
|
70
|
+
else:
|
|
71
|
+
# Default: profile pytest
|
|
72
|
+
cmd = [
|
|
73
|
+
str(ctx.tools.python),
|
|
74
|
+
"-m", "cProfile",
|
|
75
|
+
"-o", str(ctx.workdir / "meta" / "60_cprofile.stats"),
|
|
76
|
+
"-m", "pytest",
|
|
77
|
+
"-q"
|
|
78
|
+
]
|
|
79
|
+
desc = "Profiling pytest"
|
|
80
|
+
|
|
81
|
+
ctx.emit(f" {desc}")
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
# Run profiling
|
|
85
|
+
result = subprocess.run(
|
|
86
|
+
cmd,
|
|
87
|
+
cwd=ctx.root,
|
|
88
|
+
capture_output=True,
|
|
89
|
+
text=True,
|
|
90
|
+
timeout=300 # 5 minute timeout for profiling
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Generate human-readable report
|
|
94
|
+
stats_file = ctx.workdir / "meta" / "60_cprofile.stats"
|
|
95
|
+
if stats_file.exists():
|
|
96
|
+
self._generate_report(stats_file, ctx.workdir)
|
|
97
|
+
elapsed = int((time.time() - start) * 1000)
|
|
98
|
+
return StepResult(self.name, "OK", elapsed)
|
|
99
|
+
else:
|
|
100
|
+
# Still write output for debugging
|
|
101
|
+
output_file = ctx.workdir / "logs" / "60_cprofile.txt"
|
|
102
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
103
|
+
|
|
104
|
+
with output_file.open("w") as f:
|
|
105
|
+
f.write("=" * 70 + "\n")
|
|
106
|
+
f.write("CPU PROFILING FAILED\n")
|
|
107
|
+
f.write("=" * 70 + "\n\n")
|
|
108
|
+
f.write("STDOUT:\n")
|
|
109
|
+
f.write(result.stdout)
|
|
110
|
+
f.write("\n\nSTDERR:\n")
|
|
111
|
+
f.write(result.stderr)
|
|
112
|
+
|
|
113
|
+
elapsed = int((time.time() - start) * 1000)
|
|
114
|
+
return StepResult(self.name, "FAIL", elapsed, "stats file not created")
|
|
115
|
+
|
|
116
|
+
except subprocess.TimeoutExpired:
|
|
117
|
+
elapsed = int((time.time() - start) * 1000)
|
|
118
|
+
return StepResult(self.name, "FAIL", elapsed, "timeout")
|
|
119
|
+
except Exception as e:
|
|
120
|
+
elapsed = int((time.time() - start) * 1000)
|
|
121
|
+
return StepResult(self.name, "FAIL", elapsed, str(e))
|
|
122
|
+
|
|
123
|
+
def _generate_report(self, stats_file: Path, workdir: Path) -> None:
|
|
124
|
+
"""Generate top 50 slowest functions report"""
|
|
125
|
+
import pstats
|
|
126
|
+
|
|
127
|
+
output_file = workdir / "logs" / "60_cprofile.txt"
|
|
128
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
129
|
+
|
|
130
|
+
with output_file.open("w") as f:
|
|
131
|
+
f.write("=" * 70 + "\n")
|
|
132
|
+
f.write("TOP 50 SLOWEST FUNCTIONS (CPU PROFILING)\n")
|
|
133
|
+
f.write("=" * 70 + "\n\n")
|
|
134
|
+
|
|
135
|
+
# Load stats
|
|
136
|
+
stats = pstats.Stats(str(stats_file), stream=f)
|
|
137
|
+
|
|
138
|
+
# Remove directory paths for cleaner output
|
|
139
|
+
stats.strip_dirs()
|
|
140
|
+
|
|
141
|
+
# Sort by cumulative time and print top 50
|
|
142
|
+
f.write("Sorted by cumulative time:\n")
|
|
143
|
+
f.write("-" * 70 + "\n")
|
|
144
|
+
stats.sort_stats('cumulative')
|
|
145
|
+
stats.print_stats(50)
|
|
146
|
+
|
|
147
|
+
f.write("\n" + "=" * 70 + "\n")
|
|
148
|
+
f.write("Sorted by total time (time spent in function itself):\n")
|
|
149
|
+
f.write("-" * 70 + "\n")
|
|
150
|
+
stats.sort_stats('time')
|
|
151
|
+
stats.print_stats(50)
|
|
152
|
+
|
|
153
|
+
f.write("\n" + "=" * 70 + "\n")
|
|
154
|
+
f.write(f"Full binary stats saved to: meta/60_cprofile.stats\n")
|
|
155
|
+
f.write("Analyze with: python -m pstats meta/60_cprofile.stats\n")
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import subprocess # nosec B404 - Required for tool execution, paths validated
|
|
4
|
+
import time
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
from .base import StepResult
|
|
8
|
+
from ..context import BundleContext
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class DependencySizesStep:
|
|
13
|
+
name: str = "dependency sizes"
|
|
14
|
+
outfile: str = "meta/33_dependency_sizes.txt"
|
|
15
|
+
top_n: int = 50 # Show top N largest packages
|
|
16
|
+
|
|
17
|
+
def run(self, ctx: BundleContext) -> StepResult:
|
|
18
|
+
start = time.time()
|
|
19
|
+
out = ctx.workdir / self.outfile
|
|
20
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
21
|
+
|
|
22
|
+
python = ctx.tools.python
|
|
23
|
+
if not python:
|
|
24
|
+
out.write_text("python not found; skipping\n", encoding="utf-8")
|
|
25
|
+
return StepResult(self.name, "SKIP", 0, "missing python")
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
# Get list of installed packages
|
|
29
|
+
list_result = subprocess.run( # nosec B603
|
|
30
|
+
[python, "-m", "pip", "list", "--format=json"],
|
|
31
|
+
cwd=ctx.root,
|
|
32
|
+
stdout=subprocess.PIPE,
|
|
33
|
+
stderr=subprocess.PIPE,
|
|
34
|
+
text=True,
|
|
35
|
+
timeout=30,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
if list_result.returncode != 0:
|
|
39
|
+
out.write_text(f"pip list failed: {list_result.stderr}\n", encoding="utf-8")
|
|
40
|
+
return StepResult(self.name, "FAIL", 0, "pip list failed")
|
|
41
|
+
|
|
42
|
+
import json
|
|
43
|
+
packages = json.loads(list_result.stdout)
|
|
44
|
+
|
|
45
|
+
# Get size for each package
|
|
46
|
+
package_sizes = []
|
|
47
|
+
for pkg in packages:
|
|
48
|
+
pkg_name = pkg["name"]
|
|
49
|
+
try:
|
|
50
|
+
show_result = subprocess.run( # nosec B603
|
|
51
|
+
[python, "-m", "pip", "show", pkg_name],
|
|
52
|
+
stdout=subprocess.PIPE,
|
|
53
|
+
stderr=subprocess.PIPE,
|
|
54
|
+
text=True,
|
|
55
|
+
timeout=5,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
if show_result.returncode == 0:
|
|
59
|
+
# Parse Location from pip show output
|
|
60
|
+
location = None
|
|
61
|
+
for line in show_result.stdout.splitlines():
|
|
62
|
+
if line.startswith("Location:"):
|
|
63
|
+
location = line.split(":", 1)[1].strip()
|
|
64
|
+
break
|
|
65
|
+
|
|
66
|
+
if location:
|
|
67
|
+
# Calculate directory size
|
|
68
|
+
from pathlib import Path
|
|
69
|
+
pkg_path = Path(location) / pkg_name.replace("-", "_")
|
|
70
|
+
if not pkg_path.exists():
|
|
71
|
+
pkg_path = Path(location) / pkg_name
|
|
72
|
+
|
|
73
|
+
if pkg_path.exists() and pkg_path.is_dir():
|
|
74
|
+
size = sum(f.stat().st_size for f in pkg_path.rglob("*") if f.is_file())
|
|
75
|
+
package_sizes.append((pkg_name, pkg["version"], size))
|
|
76
|
+
except Exception:
|
|
77
|
+
# Skip packages that fail
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
# Sort by size (descending)
|
|
81
|
+
package_sizes.sort(key=lambda x: x[2], reverse=True)
|
|
82
|
+
|
|
83
|
+
# Write results
|
|
84
|
+
with out.open("w", encoding="utf-8") as f:
|
|
85
|
+
f.write("=" * 70 + "\n")
|
|
86
|
+
f.write(f"TOP {min(self.top_n, len(package_sizes))} LARGEST DEPENDENCIES\n")
|
|
87
|
+
f.write("=" * 70 + "\n\n")
|
|
88
|
+
f.write(f"Total packages analyzed: {len(packages)}\n")
|
|
89
|
+
f.write(f"Packages with size data: {len(package_sizes)}\n\n")
|
|
90
|
+
|
|
91
|
+
if package_sizes:
|
|
92
|
+
# Calculate total size
|
|
93
|
+
total_size = sum(size for _, _, size in package_sizes)
|
|
94
|
+
f.write(f"Total size: {self._format_size(total_size)}\n\n")
|
|
95
|
+
|
|
96
|
+
f.write(f"{'Package':<40} {'Version':<15} {'Size':>15}\n")
|
|
97
|
+
f.write("-" * 70 + "\n")
|
|
98
|
+
|
|
99
|
+
for pkg_name, version, size in package_sizes[:self.top_n]:
|
|
100
|
+
f.write(f"{pkg_name:<40} {version:<15} {self._format_size(size):>15}\n")
|
|
101
|
+
else:
|
|
102
|
+
f.write("No package size data available.\n")
|
|
103
|
+
|
|
104
|
+
elapsed = int((time.time() - start) * 1000)
|
|
105
|
+
return StepResult(self.name, "OK", elapsed, None)
|
|
106
|
+
|
|
107
|
+
except subprocess.TimeoutExpired:
|
|
108
|
+
out.write_text("Analysis timed out\n", encoding="utf-8")
|
|
109
|
+
return StepResult(self.name, "FAIL", int((time.time() - start) * 1000), "timeout")
|
|
110
|
+
except Exception as e:
|
|
111
|
+
out.write_text(f"Error: {e}\n", encoding="utf-8")
|
|
112
|
+
return StepResult(self.name, "FAIL", int((time.time() - start) * 1000), str(e))
|
|
113
|
+
|
|
114
|
+
def _format_size(self, size_bytes: int) -> str:
|
|
115
|
+
"""Format size in human-readable format."""
|
|
116
|
+
for unit in ["B", "KB", "MB", "GB"]:
|
|
117
|
+
if size_bytes < 1024.0:
|
|
118
|
+
return f"{size_bytes:.1f} {unit}"
|
|
119
|
+
size_bytes /= 1024.0
|
|
120
|
+
return f"{size_bytes:.1f} TB"
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import subprocess # nosec B404 - Required for tool execution, paths validated
|
|
4
|
+
import time
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from .base import StepResult
|
|
9
|
+
from ..context import BundleContext
|
|
10
|
+
from ..tools import which
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _repo_has_py_files(root: Path) -> bool:
|
|
14
|
+
"""Fast check if there are Python files to scan."""
|
|
15
|
+
for p in root.rglob("*.py"):
|
|
16
|
+
parts = set(p.parts)
|
|
17
|
+
if (
|
|
18
|
+
".venv" not in parts
|
|
19
|
+
and "__pycache__" not in parts
|
|
20
|
+
and "node_modules" not in parts
|
|
21
|
+
and "dist" not in parts
|
|
22
|
+
and "build" not in parts
|
|
23
|
+
and "artifacts" not in parts
|
|
24
|
+
):
|
|
25
|
+
return True
|
|
26
|
+
return False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class DuplicationStep:
|
|
31
|
+
name: str = "duplication"
|
|
32
|
+
target: str = "."
|
|
33
|
+
outfile: str = "logs/53_duplication.txt"
|
|
34
|
+
|
|
35
|
+
def run(self, ctx: BundleContext) -> StepResult:
|
|
36
|
+
start = time.time()
|
|
37
|
+
out = ctx.workdir / self.outfile
|
|
38
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
39
|
+
|
|
40
|
+
pylint = which("pylint")
|
|
41
|
+
if not pylint:
|
|
42
|
+
out.write_text(
|
|
43
|
+
"pylint not found; skipping (pip install pylint)\n",
|
|
44
|
+
encoding="utf-8"
|
|
45
|
+
)
|
|
46
|
+
return StepResult(self.name, "SKIP", 0, "missing pylint")
|
|
47
|
+
|
|
48
|
+
if not _repo_has_py_files(ctx.root):
|
|
49
|
+
out.write_text(
|
|
50
|
+
"no .py files detected; skipping duplication check\n",
|
|
51
|
+
encoding="utf-8"
|
|
52
|
+
)
|
|
53
|
+
return StepResult(self.name, "SKIP", 0, "no python files")
|
|
54
|
+
|
|
55
|
+
target_path = ctx.root / self.target
|
|
56
|
+
cmd = [
|
|
57
|
+
pylint,
|
|
58
|
+
str(target_path),
|
|
59
|
+
"--disable=all", # Disable all other checks
|
|
60
|
+
"--enable=duplicate-code", # Only check for duplication
|
|
61
|
+
"--min-similarity-lines=6", # Minimum 6 lines to be considered duplicate
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
result = subprocess.run( # nosec B603 - Using full path from which()
|
|
66
|
+
cmd,
|
|
67
|
+
cwd=ctx.root,
|
|
68
|
+
stdout=subprocess.PIPE,
|
|
69
|
+
stderr=subprocess.STDOUT,
|
|
70
|
+
text=True,
|
|
71
|
+
timeout=180, # Duplication detection can be slower
|
|
72
|
+
)
|
|
73
|
+
out.write_text(result.stdout, encoding="utf-8")
|
|
74
|
+
elapsed = int((time.time() - start) * 1000)
|
|
75
|
+
|
|
76
|
+
# pylint returns various exit codes:
|
|
77
|
+
# 0 = no issues
|
|
78
|
+
# 1, 2, 4, 8, 16 = various issue types (we still want the output)
|
|
79
|
+
# We consider all of these as success
|
|
80
|
+
if result.returncode in (0, 1, 2, 4, 8, 16, 24, 32):
|
|
81
|
+
return StepResult(self.name, "OK", elapsed, None)
|
|
82
|
+
else:
|
|
83
|
+
return StepResult(
|
|
84
|
+
self.name,
|
|
85
|
+
"FAIL",
|
|
86
|
+
elapsed,
|
|
87
|
+
f"exit {result.returncode}"
|
|
88
|
+
)
|
|
89
|
+
except subprocess.TimeoutExpired:
|
|
90
|
+
out.write_text("duplication check timed out after 180s\n", encoding="utf-8")
|
|
91
|
+
return StepResult(self.name, "FAIL", 180000, "timeout")
|
|
92
|
+
except Exception as e:
|
|
93
|
+
out.write_text(f"duplication check error: {e}\n", encoding="utf-8")
|
|
94
|
+
return StepResult(self.name, "FAIL", 0, str(e))
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
import time
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from .base import StepResult
|
|
9
|
+
from ..context import BundleContext
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
DEFAULT_EXCLUDE_PREFIXES = (
|
|
13
|
+
".git/",
|
|
14
|
+
".venv/",
|
|
15
|
+
".mypy_cache/",
|
|
16
|
+
".ruff_cache/",
|
|
17
|
+
".pytest_cache/",
|
|
18
|
+
"__pycache__/",
|
|
19
|
+
"node_modules/",
|
|
20
|
+
"dist/",
|
|
21
|
+
"build/",
|
|
22
|
+
"artifacts/",
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
# Patterns based on your bash sed rules:
|
|
26
|
+
# 1) tool-style: path:line(:col)...
|
|
27
|
+
_RE_COLON_LINE = re.compile(r"^([A-Za-z0-9_.\/-]+\.[A-Za-z0-9]+):\d+(?::\d+)?\b.*$")
|
|
28
|
+
|
|
29
|
+
# 2) pytest traceback: File "path", line N
|
|
30
|
+
_RE_PYTEST_FILE = re.compile(r'^\s*File "([^"]+)", line \d+\b.*$')
|
|
31
|
+
|
|
32
|
+
# 3) mypy: (optional "mypy:") ./path:line: (error|note|warning):
|
|
33
|
+
_RE_MYPY_LINE = re.compile(
|
|
34
|
+
r"^(?:mypy:\s*)?(?:\./)?([A-Za-z0-9_.\/-]+\.[A-Za-z0-9]+):\d+:\s*(?:error|note|warning):.*$"
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# 4) mypy rare: path: (error|note|warning): ...
|
|
38
|
+
_RE_MYPY_NOLINE = re.compile(
|
|
39
|
+
r"^(?:mypy:\s*)?(?:\./)?([A-Za-z0-9_.\/-]+\.[A-Za-z0-9]+):\s*(?:error|note|warning):.*$"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _normalize_to_repo_rel(root: Path, p: str) -> str | None:
|
|
44
|
+
p = p.strip()
|
|
45
|
+
if not p:
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
# remove leading ./ for consistency
|
|
49
|
+
if p.startswith("./"):
|
|
50
|
+
p = p[2:]
|
|
51
|
+
|
|
52
|
+
# absolute path -> must be under repo root
|
|
53
|
+
if p.startswith("/"):
|
|
54
|
+
try:
|
|
55
|
+
rp = Path(p).resolve()
|
|
56
|
+
rr = rp.relative_to(root.resolve())
|
|
57
|
+
return str(rr).replace("\\", "/")
|
|
58
|
+
except Exception:
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
# relative path
|
|
62
|
+
return p.replace("\\", "/")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _is_allowed_repo_file(root: Path, rel: str) -> bool:
|
|
66
|
+
rel = rel.lstrip("./")
|
|
67
|
+
if not rel or rel.endswith("/"):
|
|
68
|
+
return False
|
|
69
|
+
|
|
70
|
+
# exclude common junk
|
|
71
|
+
for pref in DEFAULT_EXCLUDE_PREFIXES:
|
|
72
|
+
if rel.startswith(pref):
|
|
73
|
+
return False
|
|
74
|
+
if "/__pycache__/" in f"/{rel}/":
|
|
75
|
+
return False
|
|
76
|
+
|
|
77
|
+
# must exist and be a file inside repo
|
|
78
|
+
fp = (root / rel).resolve()
|
|
79
|
+
try:
|
|
80
|
+
fp.relative_to(root.resolve())
|
|
81
|
+
except Exception:
|
|
82
|
+
return False
|
|
83
|
+
|
|
84
|
+
return fp.is_file()
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _extract_paths_from_text(text: str) -> list[str]:
|
|
88
|
+
out: list[str] = []
|
|
89
|
+
for line in text.splitlines():
|
|
90
|
+
m = _RE_COLON_LINE.match(line)
|
|
91
|
+
if m:
|
|
92
|
+
out.append(m.group(1))
|
|
93
|
+
continue
|
|
94
|
+
|
|
95
|
+
m = _RE_PYTEST_FILE.match(line)
|
|
96
|
+
if m:
|
|
97
|
+
out.append(m.group(1))
|
|
98
|
+
continue
|
|
99
|
+
|
|
100
|
+
m = _RE_MYPY_LINE.match(line)
|
|
101
|
+
if m:
|
|
102
|
+
out.append(m.group(1))
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
m = _RE_MYPY_NOLINE.match(line)
|
|
106
|
+
if m:
|
|
107
|
+
out.append(m.group(1))
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
return out
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@dataclass
|
|
114
|
+
class ErrorReferencedFilesStep:
|
|
115
|
+
name: str = "collect error-referenced files"
|
|
116
|
+
max_files: int = 250
|
|
117
|
+
# Paths are relative to the bundle workdir
|
|
118
|
+
log_files: list[str] | None = None
|
|
119
|
+
|
|
120
|
+
def run(self, ctx: BundleContext) -> StepResult:
|
|
121
|
+
start = time.time()
|
|
122
|
+
|
|
123
|
+
# Default set aligned to our step numbers
|
|
124
|
+
log_files = self.log_files or [
|
|
125
|
+
"logs/31_ruff_check.txt",
|
|
126
|
+
"logs/32_ruff_format_check.txt",
|
|
127
|
+
"logs/33_mypy.txt",
|
|
128
|
+
"logs/34_pytest_q.txt",
|
|
129
|
+
]
|
|
130
|
+
|
|
131
|
+
out_list = ctx.workdir / "error_files_from_logs.txt"
|
|
132
|
+
out_count = ctx.workdir / "error_refs_count.txt"
|
|
133
|
+
report = ctx.metadir / "60_error_refs_report.txt"
|
|
134
|
+
|
|
135
|
+
dest_root = ctx.srcdir / "_error_refs"
|
|
136
|
+
dest_root.mkdir(parents=True, exist_ok=True)
|
|
137
|
+
|
|
138
|
+
# Collect candidate paths
|
|
139
|
+
candidates: set[str] = set()
|
|
140
|
+
scanned = 0
|
|
141
|
+
missing_logs = 0
|
|
142
|
+
|
|
143
|
+
for lf in log_files:
|
|
144
|
+
lp = ctx.workdir / lf
|
|
145
|
+
if not lp.is_file():
|
|
146
|
+
missing_logs += 1
|
|
147
|
+
continue
|
|
148
|
+
scanned += 1
|
|
149
|
+
try:
|
|
150
|
+
txt = lp.read_text(encoding="utf-8", errors="replace")
|
|
151
|
+
except Exception:
|
|
152
|
+
continue
|
|
153
|
+
|
|
154
|
+
for raw in _extract_paths_from_text(txt):
|
|
155
|
+
norm = _normalize_to_repo_rel(ctx.root, raw)
|
|
156
|
+
if norm:
|
|
157
|
+
candidates.add(norm)
|
|
158
|
+
|
|
159
|
+
# Normalize / filter to real repo files
|
|
160
|
+
allowed = sorted([p for p in candidates if _is_allowed_repo_file(ctx.root, p)])
|
|
161
|
+
|
|
162
|
+
# Write list file (even if empty)
|
|
163
|
+
out_list.write_text(
|
|
164
|
+
"\n".join(allowed) + ("\n" if allowed else ""), encoding="utf-8"
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# Copy up to max_files
|
|
168
|
+
copied = 0
|
|
169
|
+
for rel in allowed:
|
|
170
|
+
if copied >= self.max_files:
|
|
171
|
+
break
|
|
172
|
+
src = ctx.root / rel
|
|
173
|
+
dst = dest_root / rel
|
|
174
|
+
dst.parent.mkdir(parents=True, exist_ok=True)
|
|
175
|
+
try:
|
|
176
|
+
# preserve mode/timestamps
|
|
177
|
+
dst.write_bytes(src.read_bytes())
|
|
178
|
+
copied += 1
|
|
179
|
+
except Exception:
|
|
180
|
+
continue
|
|
181
|
+
|
|
182
|
+
out_count.write_text(f"{copied}\n", encoding="utf-8")
|
|
183
|
+
|
|
184
|
+
report.write_text(
|
|
185
|
+
"\n".join(
|
|
186
|
+
[
|
|
187
|
+
f"scanned_logs={scanned}",
|
|
188
|
+
f"missing_logs={missing_logs}",
|
|
189
|
+
f"candidates_total={len(candidates)}",
|
|
190
|
+
f"allowed_repo_files={len(allowed)}",
|
|
191
|
+
f"copied={copied}",
|
|
192
|
+
f"max_files={self.max_files}",
|
|
193
|
+
"dest=src/_error_refs",
|
|
194
|
+
]
|
|
195
|
+
)
|
|
196
|
+
+ "\n",
|
|
197
|
+
encoding="utf-8",
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
dur = int(time.time() - start)
|
|
201
|
+
note = f"allowed={len(allowed)} copied={copied}"
|
|
202
|
+
if copied >= self.max_files:
|
|
203
|
+
note += " (HIT MAX)"
|
|
204
|
+
return StepResult(self.name, "PASS", dur, note)
|