gwc-pybundle 2.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwc-pybundle might be problematic. Click here for more details.

Files changed (82) hide show
  1. gwc_pybundle-2.1.2.dist-info/METADATA +903 -0
  2. gwc_pybundle-2.1.2.dist-info/RECORD +82 -0
  3. gwc_pybundle-2.1.2.dist-info/WHEEL +5 -0
  4. gwc_pybundle-2.1.2.dist-info/entry_points.txt +2 -0
  5. gwc_pybundle-2.1.2.dist-info/licenses/LICENSE.md +25 -0
  6. gwc_pybundle-2.1.2.dist-info/top_level.txt +1 -0
  7. pybundle/__init__.py +0 -0
  8. pybundle/__main__.py +4 -0
  9. pybundle/cli.py +546 -0
  10. pybundle/context.py +404 -0
  11. pybundle/doctor.py +148 -0
  12. pybundle/filters.py +228 -0
  13. pybundle/manifest.py +77 -0
  14. pybundle/packaging.py +45 -0
  15. pybundle/policy.py +132 -0
  16. pybundle/profiles.py +454 -0
  17. pybundle/roadmap_model.py +42 -0
  18. pybundle/roadmap_scan.py +328 -0
  19. pybundle/root_detect.py +14 -0
  20. pybundle/runner.py +180 -0
  21. pybundle/steps/__init__.py +26 -0
  22. pybundle/steps/ai_context.py +791 -0
  23. pybundle/steps/api_docs.py +219 -0
  24. pybundle/steps/asyncio_analysis.py +358 -0
  25. pybundle/steps/bandit.py +72 -0
  26. pybundle/steps/base.py +20 -0
  27. pybundle/steps/blocking_call_detection.py +291 -0
  28. pybundle/steps/call_graph.py +219 -0
  29. pybundle/steps/compileall.py +76 -0
  30. pybundle/steps/config_docs.py +319 -0
  31. pybundle/steps/config_validation.py +302 -0
  32. pybundle/steps/container_image.py +294 -0
  33. pybundle/steps/context_expand.py +272 -0
  34. pybundle/steps/copy_pack.py +293 -0
  35. pybundle/steps/coverage.py +101 -0
  36. pybundle/steps/cprofile_step.py +166 -0
  37. pybundle/steps/dependency_sizes.py +136 -0
  38. pybundle/steps/django_checks.py +214 -0
  39. pybundle/steps/dockerfile_lint.py +282 -0
  40. pybundle/steps/dockerignore.py +311 -0
  41. pybundle/steps/duplication.py +103 -0
  42. pybundle/steps/env_completeness.py +269 -0
  43. pybundle/steps/env_var_usage.py +253 -0
  44. pybundle/steps/error_refs.py +204 -0
  45. pybundle/steps/event_loop_patterns.py +280 -0
  46. pybundle/steps/exception_patterns.py +190 -0
  47. pybundle/steps/fastapi_integration.py +250 -0
  48. pybundle/steps/flask_debugging.py +312 -0
  49. pybundle/steps/git_analytics.py +315 -0
  50. pybundle/steps/handoff_md.py +176 -0
  51. pybundle/steps/import_time.py +175 -0
  52. pybundle/steps/interrogate.py +106 -0
  53. pybundle/steps/license_scan.py +96 -0
  54. pybundle/steps/line_profiler.py +117 -0
  55. pybundle/steps/link_validation.py +287 -0
  56. pybundle/steps/logging_analysis.py +233 -0
  57. pybundle/steps/memory_profile.py +176 -0
  58. pybundle/steps/migration_history.py +336 -0
  59. pybundle/steps/mutation_testing.py +141 -0
  60. pybundle/steps/mypy.py +103 -0
  61. pybundle/steps/orm_optimization.py +316 -0
  62. pybundle/steps/pip_audit.py +45 -0
  63. pybundle/steps/pipdeptree.py +62 -0
  64. pybundle/steps/pylance.py +562 -0
  65. pybundle/steps/pytest.py +66 -0
  66. pybundle/steps/query_pattern_analysis.py +334 -0
  67. pybundle/steps/radon.py +161 -0
  68. pybundle/steps/repro_md.py +161 -0
  69. pybundle/steps/rg_scans.py +78 -0
  70. pybundle/steps/roadmap.py +153 -0
  71. pybundle/steps/ruff.py +117 -0
  72. pybundle/steps/secrets_detection.py +235 -0
  73. pybundle/steps/security_headers.py +309 -0
  74. pybundle/steps/shell.py +74 -0
  75. pybundle/steps/slow_tests.py +178 -0
  76. pybundle/steps/sqlalchemy_validation.py +269 -0
  77. pybundle/steps/test_flakiness.py +184 -0
  78. pybundle/steps/tree.py +116 -0
  79. pybundle/steps/type_coverage.py +277 -0
  80. pybundle/steps/unused_deps.py +211 -0
  81. pybundle/steps/vulture.py +167 -0
  82. pybundle/tools.py +63 -0
@@ -0,0 +1,269 @@
1
+ """
2
+ Step: SQLAlchemy Validation
3
+ Validate SQLAlchemy models and relationships.
4
+ """
5
+
6
+ import re
7
+ from pathlib import Path
8
+ from typing import Dict, List, Any, Optional, Tuple
9
+
10
+ from .base import Step, StepResult
11
+
12
+
13
+ class SQLAlchemyValidationStep(Step):
14
+ """Validate SQLAlchemy model definitions and relationships."""
15
+
16
+ name = "sqlalchemy validation"
17
+
18
+ def run(self, ctx: "BundleContext") -> StepResult: # type: ignore[name-defined]
19
+ """Validate SQLAlchemy models."""
20
+ import time
21
+
22
+ start = time.time()
23
+
24
+ root = ctx.root
25
+
26
+ # Analyze models
27
+ models = self._find_sqlalchemy_models(root)
28
+ if not models:
29
+ elapsed = int(time.time() - start)
30
+ return StepResult(
31
+ self.name, "SKIP", elapsed, "No SQLAlchemy models found"
32
+ )
33
+
34
+ # Validate relationships
35
+ issues = self._validate_relationships(root, models)
36
+
37
+ # Generate report
38
+ lines = [
39
+ "=" * 80,
40
+ "SQLALCHEMY VALIDATION REPORT",
41
+ "=" * 80,
42
+ "",
43
+ ]
44
+
45
+ lines.extend(
46
+ [
47
+ "SUMMARY",
48
+ "=" * 80,
49
+ f"Models found: {len(models)}",
50
+ "",
51
+ ]
52
+ )
53
+
54
+ # Models list
55
+ lines.extend(
56
+ [
57
+ "MODEL DEFINITIONS",
58
+ "-" * 80,
59
+ "",
60
+ ]
61
+ )
62
+
63
+ for model in sorted(models, key=lambda m: m["name"]):
64
+ lines.append(f" {model['name']}")
65
+ lines.append(f" File: {model['file']}")
66
+ lines.append(f" Table: {model.get('table', '(auto-generated)')}")
67
+ if model.get("columns"):
68
+ lines.append(f" Columns: {', '.join(model['columns'][:5])}")
69
+ if len(model["columns"]) > 5:
70
+ lines.append(f" ... and {len(model['columns']) - 5} more")
71
+ if model.get("relationships"):
72
+ lines.append(f" Relationships: {', '.join(model['relationships'])}")
73
+ lines.append("")
74
+
75
+ # Validation issues
76
+ lines.extend(
77
+ [
78
+ "VALIDATION ISSUES",
79
+ "=" * 80,
80
+ "",
81
+ ]
82
+ )
83
+
84
+ if issues:
85
+ error_count = sum(1 for _, level, _, _ in issues if level == "ERROR")
86
+ warning_count = sum(1 for _, level, _, _ in issues if level == "WARNING")
87
+
88
+ lines.append(
89
+ f"Found {len(issues)} issue(s): {error_count} error(s), {warning_count} warning(s)"
90
+ )
91
+ lines.append("")
92
+
93
+ for model_name, level, issue, detail in issues:
94
+ icon = "✗" if level == "ERROR" else "⚠"
95
+ lines.append(f" {icon} [{model_name}] {issue}")
96
+ if detail:
97
+ lines.append(f" {detail}")
98
+ lines.append("")
99
+ else:
100
+ lines.append("✓ No validation issues detected")
101
+ lines.append("")
102
+
103
+ # Relationship analysis
104
+ lines.extend(
105
+ [
106
+ "RELATIONSHIP ANALYSIS",
107
+ "-" * 80,
108
+ "",
109
+ ]
110
+ )
111
+
112
+ relationships = self._analyze_relationships(models)
113
+ if relationships:
114
+ for rel in relationships:
115
+ lines.append(f" {rel}")
116
+ else:
117
+ lines.append(" ℹ No relationships defined")
118
+
119
+ lines.append("")
120
+
121
+ # Recommendations
122
+ lines.extend(
123
+ [
124
+ "=" * 80,
125
+ "BEST PRACTICES & RECOMMENDATIONS",
126
+ "=" * 80,
127
+ "",
128
+ "1. MODEL DESIGN",
129
+ " ✓ Use descriptive model and column names",
130
+ " ✓ Define primary keys explicitly",
131
+ " ✓ Use UUID or serial primary keys",
132
+ " ✓ Add created_at and updated_at timestamps",
133
+ "",
134
+ "2. RELATIONSHIPS",
135
+ " ✓ Use relationship() for ORM-level access",
136
+ " ✓ Define foreign keys explicitly",
137
+ " ✓ Set cascade rules (delete-orphan for children)",
138
+ " ✓ Use back_populates for bidirectional relationships",
139
+ "",
140
+ "3. CONSTRAINTS",
141
+ " ✓ Add CHECK constraints for valid data",
142
+ " ✓ Use nullable=False for required fields",
143
+ " ✓ Add unique=True for unique fields",
144
+ " ✓ Add indexes to frequently queried fields",
145
+ "",
146
+ "4. INHERITANCE",
147
+ " ✓ Consider single table inheritance for polymorphism",
148
+ " ✓ Use joined table inheritance for distinct tables",
149
+ " ✓ Document inheritance strategy clearly",
150
+ "",
151
+ "5. SERIALIZATION",
152
+ " ✓ Use Pydantic models for API responses",
153
+ " ✓ Define __repr__ for debugging",
154
+ " ✓ Define to_dict() for serialization",
155
+ " ✓ Exclude sensitive fields from serialization",
156
+ "",
157
+ "6. QUERIES",
158
+ " ✓ Use lazy='select' or 'selectin' for relationships",
159
+ " ✓ Use .only() or .defer() to limit columns",
160
+ " ✓ Use exists() for existence checks",
161
+ " ✓ Use bulk_insert_mappings() for bulk operations",
162
+ "",
163
+ "7. TESTING",
164
+ " ✓ Test model creation and validation",
165
+ " ✓ Test relationship cascades",
166
+ " ✓ Test constraint violations",
167
+ " ✓ Use pytest fixtures for model factories",
168
+ "",
169
+ ]
170
+ )
171
+
172
+ # Write report
173
+ output = "\n".join(lines)
174
+ dest = ctx.workdir / "logs" / "152_sqlalchemy_validation.txt"
175
+ dest.parent.mkdir(parents=True, exist_ok=True)
176
+ dest.write_text(output, encoding="utf-8")
177
+
178
+ elapsed = int(time.time() - start)
179
+ return StepResult(self.name, "OK", elapsed, "")
180
+
181
+ def _find_sqlalchemy_models(self, root: Path) -> List[Dict[str, Any]]:
182
+ """Find SQLAlchemy model definitions."""
183
+ models = []
184
+ python_files = list(root.rglob("*.py"))
185
+
186
+ for py_file in python_files:
187
+ if any(
188
+ part in py_file.parts
189
+ for part in ["venv", ".venv", "env", "__pycache__", "site-packages"]
190
+ ):
191
+ continue
192
+
193
+ try:
194
+ source = py_file.read_text(encoding="utf-8", errors="ignore")
195
+
196
+ if "from sqlalchemy" not in source:
197
+ continue
198
+
199
+ # Look for model class definitions
200
+ # Pattern: class ModelName(Base): or class ModelName(declarative_base()):
201
+ class_pattern = r"class\s+(\w+)\s*\((.*?(Base|DeclarativeMeta|declarative_base).*?)\):"
202
+ for match in re.finditer(class_pattern, source):
203
+ model_name = match.group(1)
204
+
205
+ # Extract table name if specified
206
+ table_match = re.search(
207
+ rf"class\s+{model_name}.*?\n\s+__tablename__\s*=\s*['\"](\w+)['\"]",
208
+ source,
209
+ )
210
+ table_name = table_match.group(1) if table_match else None
211
+
212
+ # Extract columns
213
+ columns = re.findall(r"(\w+)\s*=\s*Column\(", source)
214
+
215
+ # Extract relationships
216
+ relationships = re.findall(r"(\w+)\s*=\s*relationship\(", source)
217
+
218
+ models.append(
219
+ {
220
+ "name": model_name,
221
+ "file": str(py_file.relative_to(root)),
222
+ "table": table_name,
223
+ "columns": columns,
224
+ "relationships": relationships,
225
+ }
226
+ )
227
+
228
+ except (OSError, UnicodeDecodeError):
229
+ continue
230
+
231
+ return models
232
+
233
+ def _validate_relationships(
234
+ self, root: Path, models: List[Dict[str, Any]]
235
+ ) -> List[Tuple[str, str, str, str]]:
236
+ """Validate model relationships."""
237
+ issues = []
238
+ model_names = {m["name"] for m in models}
239
+
240
+ for model in models:
241
+ # Check if relationships reference existing models
242
+ for rel in model.get("relationships", []):
243
+ # Very basic check - relationship should reference a model
244
+ if not any(model_name in rel for model_name in model_names):
245
+ # This is a heuristic - might be false positive
246
+ pass
247
+
248
+ # Check for missing primary key
249
+ if not any(col.lower() == "id" for col in model.get("columns", [])):
250
+ issues.append(
251
+ (
252
+ model["name"],
253
+ "WARNING",
254
+ "No obvious primary key found",
255
+ "Ensure model has a primary key defined",
256
+ )
257
+ )
258
+
259
+ return issues
260
+
261
+ def _analyze_relationships(self, models: List[Dict[str, Any]]) -> List[str]:
262
+ """Analyze relationships between models."""
263
+ relationships = []
264
+
265
+ for model in models:
266
+ for rel in model.get("relationships", []):
267
+ relationships.append(f" {model['name']}.{rel}")
268
+
269
+ return relationships
@@ -0,0 +1,184 @@
1
+ """
2
+ Test flakiness detection - Milestone 4 (v1.4.1)
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import subprocess
8
+ import time
9
+ from dataclasses import dataclass
10
+
11
+ from .base import StepResult
12
+ from ..context import BundleContext
13
+
14
+
15
+ @dataclass
16
+ class TestFlakinessStep:
17
+ """
18
+ Run tests multiple times to detect non-deterministic failures (flaky tests).
19
+
20
+ Outputs:
21
+ - logs/70_test_flakiness.txt: Report of flaky tests with pass/fail patterns
22
+ """
23
+
24
+ name: str = "test_flakiness"
25
+
26
+ def run(self, ctx: BundleContext) -> StepResult:
27
+ start = time.time()
28
+
29
+ if not ctx.tools.pytest:
30
+ return StepResult(self.name, "SKIP", 0, "pytest not found")
31
+
32
+ tests_dir = ctx.root / "tests"
33
+ if not tests_dir.is_dir():
34
+ return StepResult(self.name, "SKIP", 0, "no tests/ directory")
35
+
36
+ runs = ctx.options.test_flakiness_runs
37
+ ctx.emit(f" Running tests {runs}x to detect flakiness...")
38
+
39
+ output_file = ctx.workdir / "logs" / "70_test_flakiness.txt"
40
+ output_file.parent.mkdir(parents=True, exist_ok=True)
41
+
42
+ try:
43
+ # Run tests multiple times and collect results
44
+ results = []
45
+ test_outcomes: dict[str, list[str]] = {} # test_name -> [pass/fail/error]
46
+
47
+ for i in range(runs):
48
+ ctx.emit(f" Run {i + 1}/{runs}...")
49
+ result = subprocess.run(
50
+ [str(ctx.tools.pytest), "-v", "--tb=no"],
51
+ cwd=ctx.root,
52
+ capture_output=True,
53
+ text=True,
54
+ timeout=180, # 3 minute timeout per run
55
+ )
56
+ results.append(result)
57
+
58
+ # Parse test results
59
+ self._parse_test_outcomes(result.stdout, test_outcomes, i)
60
+
61
+ # Analyze for flakiness
62
+ flaky_tests = self._identify_flaky_tests(test_outcomes)
63
+
64
+ # Generate report
65
+ with output_file.open("w") as f:
66
+ f.write("=" * 70 + "\n")
67
+ f.write(f"TEST FLAKINESS DETECTION ({runs} runs)\n")
68
+ f.write("=" * 70 + "\n\n")
69
+
70
+ if not test_outcomes:
71
+ f.write("No test results collected.\n\n")
72
+ for i, result in enumerate(results):
73
+ f.write(f"Run {i + 1} output:\n")
74
+ f.write(result.stdout[:500])
75
+ f.write("\n\n")
76
+ else:
77
+ total_tests = len(test_outcomes)
78
+ f.write(f"Total tests analyzed: {total_tests}\n")
79
+ f.write(f"Flaky tests detected: {len(flaky_tests)}\n\n")
80
+
81
+ if flaky_tests:
82
+ f.write("=" * 70 + "\n")
83
+ f.write("FLAKY TESTS (non-deterministic results):\n")
84
+ f.write("=" * 70 + "\n\n")
85
+
86
+ for test_name, outcomes in flaky_tests.items():
87
+ pattern = " -> ".join(outcomes)
88
+ f.write(f"⚠️ {test_name}\n")
89
+ f.write(f" Pattern: {pattern}\n\n")
90
+ else:
91
+ f.write(
92
+ "✅ No flaky tests detected - all tests deterministic!\n\n"
93
+ )
94
+
95
+ # Summary of all tests
96
+ f.write("=" * 70 + "\n")
97
+ f.write("ALL TESTS SUMMARY:\n")
98
+ f.write("=" * 70 + "\n\n")
99
+
100
+ stable_pass = []
101
+ stable_fail = []
102
+ flaky = []
103
+
104
+ for test_name, outcomes in test_outcomes.items():
105
+ unique_outcomes = set(outcomes)
106
+ if len(unique_outcomes) == 1:
107
+ if "PASSED" in unique_outcomes:
108
+ stable_pass.append(test_name)
109
+ else:
110
+ stable_fail.append(test_name)
111
+ else:
112
+ flaky.append(test_name)
113
+
114
+ f.write(f"Stable passing: {len(stable_pass)}\n")
115
+ f.write(f"Stable failing: {len(stable_fail)}\n")
116
+ f.write(f"Flaky: {len(flaky)}\n\n")
117
+
118
+ if stable_fail:
119
+ f.write("Consistently failing tests:\n")
120
+ for test in stable_fail[:20]: # Limit to 20
121
+ f.write(f" - {test}\n")
122
+ if len(stable_fail) > 20:
123
+ f.write(f" ... and {len(stable_fail) - 20} more\n")
124
+ f.write("\n")
125
+
126
+ f.write("=" * 70 + "\n")
127
+ f.write("RECOMMENDATIONS:\n")
128
+ f.write(
129
+ "- Fix flaky tests by removing non-deterministic behavior\n"
130
+ )
131
+ f.write(
132
+ "- Common causes: timing issues, random data, external dependencies\n"
133
+ )
134
+ f.write("- Use pytest-randomly to test with different orderings\n")
135
+
136
+ elapsed = int((time.time() - start) * 1000)
137
+
138
+ if flaky_tests:
139
+ return StepResult(
140
+ self.name, "OK", elapsed, f"{len(flaky_tests)} flaky tests"
141
+ )
142
+ else:
143
+ return StepResult(self.name, "OK", elapsed)
144
+
145
+ except subprocess.TimeoutExpired:
146
+ elapsed = int((time.time() - start) * 1000)
147
+ return StepResult(self.name, "FAIL", elapsed, "timeout")
148
+ except Exception as e:
149
+ elapsed = int((time.time() - start) * 1000)
150
+ return StepResult(self.name, "FAIL", elapsed, str(e))
151
+
152
+ def _parse_test_outcomes(
153
+ self, output: str, test_outcomes: dict, run_num: int
154
+ ) -> None:
155
+ """Parse pytest -v output to extract test results"""
156
+ for line in output.splitlines():
157
+ # Look for pytest verbose output: "test_file.py::test_name PASSED"
158
+ if "::" in line and any(
159
+ status in line for status in ["PASSED", "FAILED", "ERROR", "SKIPPED"]
160
+ ):
161
+ parts = line.split()
162
+ if len(parts) >= 2:
163
+ test_name = parts[0]
164
+ # Find status
165
+ status = None
166
+ for s in ["PASSED", "FAILED", "ERROR", "SKIPPED"]:
167
+ if s in line:
168
+ status = s
169
+ break
170
+
171
+ if status:
172
+ if test_name not in test_outcomes:
173
+ test_outcomes[test_name] = []
174
+ test_outcomes[test_name].append(status)
175
+
176
+ def _identify_flaky_tests(self, test_outcomes: dict) -> dict:
177
+ """Identify tests with inconsistent results across runs"""
178
+ flaky = {}
179
+ for test_name, outcomes in test_outcomes.items():
180
+ unique_outcomes = set(outcomes)
181
+ # Flaky if not all the same outcome
182
+ if len(unique_outcomes) > 1:
183
+ flaky[test_name] = outcomes
184
+ return flaky
pybundle/steps/tree.py ADDED
@@ -0,0 +1,116 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import time
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+
8
+ from .base import StepResult
9
+ from pybundle.context import BundleContext
10
+ from pybundle.policy import AIContextPolicy, PathFilter
11
+
12
+
13
+ @dataclass
14
+ class TreeStep:
15
+ name: str = "tree (filtered)"
16
+ max_depth: int = 4
17
+ excludes: list[str] | None = None
18
+ policy: AIContextPolicy | None = None
19
+
20
+ def run(self, ctx: BundleContext) -> StepResult:
21
+ start = time.time()
22
+ policy = self.policy or AIContextPolicy()
23
+
24
+ # allow overrides
25
+ exclude_dirs = set(self.excludes) if self.excludes else set(policy.exclude_dirs)
26
+ filt = PathFilter(
27
+ exclude_dirs=exclude_dirs,
28
+ exclude_patterns=set(policy.exclude_patterns),
29
+ exclude_file_exts=set(policy.exclude_file_exts),
30
+ )
31
+
32
+ out = ctx.metadir / "10_tree.txt"
33
+ out.parent.mkdir(parents=True, exist_ok=True)
34
+
35
+ root = ctx.root
36
+ lines: list[str] = []
37
+
38
+ for dirpath, dirnames, filenames in os.walk(root):
39
+ dp = Path(dirpath)
40
+ rel_dp = dp.relative_to(root)
41
+ depth = 0 if rel_dp == Path(".") else len(rel_dp.parts)
42
+
43
+ if depth > self.max_depth:
44
+ dirnames[:] = []
45
+ continue
46
+
47
+ # prune dirs (name + venv-structure)
48
+ kept = []
49
+ for d in dirnames:
50
+ if filt.should_prune_dir(dp, d):
51
+ continue
52
+ kept.append(d)
53
+ dirnames[:] = kept
54
+
55
+ for fn in filenames:
56
+ p = dp / fn
57
+ if not filt.should_include_file(root, p):
58
+ continue
59
+ lines.append(str(p.relative_to(root)))
60
+
61
+ lines.sort()
62
+ out.write_text("\n".join(lines) + ("\n" if lines else ""), encoding="utf-8")
63
+ dur = int(time.time() - start)
64
+ return StepResult(self.name, "PASS", dur, "python-walk")
65
+
66
+
67
+ @dataclass
68
+ class LargestFilesStep:
69
+ name: str = "largest files"
70
+ limit: int = 80
71
+ excludes: list[str] | None = None
72
+ policy: AIContextPolicy | None = None
73
+
74
+ def run(self, ctx: BundleContext) -> StepResult:
75
+ start = time.time()
76
+ policy = self.policy or AIContextPolicy()
77
+
78
+ exclude_dirs = set(self.excludes) if self.excludes else set(policy.exclude_dirs)
79
+ filt = PathFilter(
80
+ exclude_dirs=exclude_dirs,
81
+ exclude_patterns=set(policy.exclude_patterns),
82
+ exclude_file_exts=set(policy.exclude_file_exts),
83
+ )
84
+
85
+ out = ctx.metadir / "11_largest_files.txt"
86
+ out.parent.mkdir(parents=True, exist_ok=True)
87
+
88
+ files: list[tuple[int, str]] = []
89
+ root = ctx.root
90
+
91
+ for dirpath, dirnames, filenames in os.walk(root):
92
+ dp = Path(dirpath)
93
+
94
+ kept = []
95
+ for d in dirnames:
96
+ if filt.should_prune_dir(dp, d):
97
+ continue
98
+ kept.append(d)
99
+ dirnames[:] = kept
100
+
101
+ for fn in filenames:
102
+ p = dp / fn
103
+ if not filt.should_include_file(root, p):
104
+ continue
105
+ try:
106
+ size = p.stat().st_size
107
+ except OSError:
108
+ continue
109
+ files.append((size, str(p.relative_to(root))))
110
+
111
+ files.sort(key=lambda x: x[0], reverse=True)
112
+ lines = [f"{size}\t{path}" for size, path in files[: self.limit]]
113
+ out.write_text("\n".join(lines) + ("\n" if lines else ""), encoding="utf-8")
114
+
115
+ dur = int(time.time() - start)
116
+ return StepResult(self.name, "PASS", dur, f"count={len(files)}")