gwc-pybundle 2.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwc-pybundle might be problematic. Click here for more details.
- gwc_pybundle-2.1.2.dist-info/METADATA +903 -0
- gwc_pybundle-2.1.2.dist-info/RECORD +82 -0
- gwc_pybundle-2.1.2.dist-info/WHEEL +5 -0
- gwc_pybundle-2.1.2.dist-info/entry_points.txt +2 -0
- gwc_pybundle-2.1.2.dist-info/licenses/LICENSE.md +25 -0
- gwc_pybundle-2.1.2.dist-info/top_level.txt +1 -0
- pybundle/__init__.py +0 -0
- pybundle/__main__.py +4 -0
- pybundle/cli.py +546 -0
- pybundle/context.py +404 -0
- pybundle/doctor.py +148 -0
- pybundle/filters.py +228 -0
- pybundle/manifest.py +77 -0
- pybundle/packaging.py +45 -0
- pybundle/policy.py +132 -0
- pybundle/profiles.py +454 -0
- pybundle/roadmap_model.py +42 -0
- pybundle/roadmap_scan.py +328 -0
- pybundle/root_detect.py +14 -0
- pybundle/runner.py +180 -0
- pybundle/steps/__init__.py +26 -0
- pybundle/steps/ai_context.py +791 -0
- pybundle/steps/api_docs.py +219 -0
- pybundle/steps/asyncio_analysis.py +358 -0
- pybundle/steps/bandit.py +72 -0
- pybundle/steps/base.py +20 -0
- pybundle/steps/blocking_call_detection.py +291 -0
- pybundle/steps/call_graph.py +219 -0
- pybundle/steps/compileall.py +76 -0
- pybundle/steps/config_docs.py +319 -0
- pybundle/steps/config_validation.py +302 -0
- pybundle/steps/container_image.py +294 -0
- pybundle/steps/context_expand.py +272 -0
- pybundle/steps/copy_pack.py +293 -0
- pybundle/steps/coverage.py +101 -0
- pybundle/steps/cprofile_step.py +166 -0
- pybundle/steps/dependency_sizes.py +136 -0
- pybundle/steps/django_checks.py +214 -0
- pybundle/steps/dockerfile_lint.py +282 -0
- pybundle/steps/dockerignore.py +311 -0
- pybundle/steps/duplication.py +103 -0
- pybundle/steps/env_completeness.py +269 -0
- pybundle/steps/env_var_usage.py +253 -0
- pybundle/steps/error_refs.py +204 -0
- pybundle/steps/event_loop_patterns.py +280 -0
- pybundle/steps/exception_patterns.py +190 -0
- pybundle/steps/fastapi_integration.py +250 -0
- pybundle/steps/flask_debugging.py +312 -0
- pybundle/steps/git_analytics.py +315 -0
- pybundle/steps/handoff_md.py +176 -0
- pybundle/steps/import_time.py +175 -0
- pybundle/steps/interrogate.py +106 -0
- pybundle/steps/license_scan.py +96 -0
- pybundle/steps/line_profiler.py +117 -0
- pybundle/steps/link_validation.py +287 -0
- pybundle/steps/logging_analysis.py +233 -0
- pybundle/steps/memory_profile.py +176 -0
- pybundle/steps/migration_history.py +336 -0
- pybundle/steps/mutation_testing.py +141 -0
- pybundle/steps/mypy.py +103 -0
- pybundle/steps/orm_optimization.py +316 -0
- pybundle/steps/pip_audit.py +45 -0
- pybundle/steps/pipdeptree.py +62 -0
- pybundle/steps/pylance.py +562 -0
- pybundle/steps/pytest.py +66 -0
- pybundle/steps/query_pattern_analysis.py +334 -0
- pybundle/steps/radon.py +161 -0
- pybundle/steps/repro_md.py +161 -0
- pybundle/steps/rg_scans.py +78 -0
- pybundle/steps/roadmap.py +153 -0
- pybundle/steps/ruff.py +117 -0
- pybundle/steps/secrets_detection.py +235 -0
- pybundle/steps/security_headers.py +309 -0
- pybundle/steps/shell.py +74 -0
- pybundle/steps/slow_tests.py +178 -0
- pybundle/steps/sqlalchemy_validation.py +269 -0
- pybundle/steps/test_flakiness.py +184 -0
- pybundle/steps/tree.py +116 -0
- pybundle/steps/type_coverage.py +277 -0
- pybundle/steps/unused_deps.py +211 -0
- pybundle/steps/vulture.py +167 -0
- pybundle/tools.py +63 -0
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Step: Environment Variable Usage
|
|
3
|
+
Track usage of environment variables via os.getenv, os.environ patterns.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import ast
|
|
7
|
+
import re
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Dict, List, Set
|
|
10
|
+
|
|
11
|
+
from .base import Step, StepResult
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class EnvVarUsageStep(Step):
|
|
15
|
+
"""Analyze environment variable usage patterns in Python code."""
|
|
16
|
+
|
|
17
|
+
name = "env var usage"
|
|
18
|
+
|
|
19
|
+
def run(self, ctx: "BundleContext") -> StepResult: # type: ignore[name-defined]
|
|
20
|
+
"""Find all environment variable accesses."""
|
|
21
|
+
import time
|
|
22
|
+
|
|
23
|
+
start = time.time()
|
|
24
|
+
|
|
25
|
+
root = ctx.root
|
|
26
|
+
python_files = sorted(root.rglob("*.py"))
|
|
27
|
+
if not python_files:
|
|
28
|
+
return StepResult(self.name, "SKIP", int(time.time() - start), "No Python files found")
|
|
29
|
+
|
|
30
|
+
# Track environment variable patterns
|
|
31
|
+
env_vars: Dict[str, List[str]] = {} # var_name -> [file:line, ...]
|
|
32
|
+
env_patterns = {
|
|
33
|
+
"os.getenv": [],
|
|
34
|
+
"os.environ.get": [],
|
|
35
|
+
"os.environ[]": [],
|
|
36
|
+
"dotenv": [],
|
|
37
|
+
}
|
|
38
|
+
analyzed_files = 0
|
|
39
|
+
|
|
40
|
+
# Regex patterns for detection
|
|
41
|
+
getenv_pattern = re.compile(r'os\.getenv\(["\']([^"\']+)["\']')
|
|
42
|
+
environ_get_pattern = re.compile(r'os\.environ\.get\(["\']([^"\']+)["\']')
|
|
43
|
+
environ_bracket_pattern = re.compile(r'os\.environ\[["\']([^"\']+)["\']\]')
|
|
44
|
+
dotenv_pattern = re.compile(r'load_dotenv|from\s+dotenv\s+import')
|
|
45
|
+
|
|
46
|
+
for py_file in python_files:
|
|
47
|
+
# Skip non-user code
|
|
48
|
+
if any(
|
|
49
|
+
part in py_file.parts
|
|
50
|
+
for part in [
|
|
51
|
+
"venv",
|
|
52
|
+
".venv",
|
|
53
|
+
"env",
|
|
54
|
+
"site-packages",
|
|
55
|
+
"__pycache__",
|
|
56
|
+
".git",
|
|
57
|
+
"node_modules",
|
|
58
|
+
]
|
|
59
|
+
):
|
|
60
|
+
continue
|
|
61
|
+
|
|
62
|
+
analyzed_files += 1
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
source = py_file.read_text(encoding="utf-8", errors="ignore")
|
|
66
|
+
rel_path = py_file.relative_to(root)
|
|
67
|
+
|
|
68
|
+
# Check for environment variable usage patterns
|
|
69
|
+
if "os.getenv" not in source and "os.environ" not in source and "dotenv" not in source:
|
|
70
|
+
continue
|
|
71
|
+
|
|
72
|
+
# Check for dotenv usage
|
|
73
|
+
if dotenv_pattern.search(source):
|
|
74
|
+
env_patterns["dotenv"].append(str(rel_path))
|
|
75
|
+
|
|
76
|
+
# Parse with regex (faster for simple patterns)
|
|
77
|
+
for line_num, line in enumerate(source.split("\n"), start=1):
|
|
78
|
+
location = f"{rel_path}:{line_num}"
|
|
79
|
+
|
|
80
|
+
# os.getenv()
|
|
81
|
+
for match in getenv_pattern.finditer(line):
|
|
82
|
+
var_name = match.group(1)
|
|
83
|
+
if var_name not in env_vars:
|
|
84
|
+
env_vars[var_name] = []
|
|
85
|
+
env_vars[var_name].append(location)
|
|
86
|
+
env_patterns["os.getenv"].append(location)
|
|
87
|
+
|
|
88
|
+
# os.environ.get()
|
|
89
|
+
for match in environ_get_pattern.finditer(line):
|
|
90
|
+
var_name = match.group(1)
|
|
91
|
+
if var_name not in env_vars:
|
|
92
|
+
env_vars[var_name] = []
|
|
93
|
+
env_vars[var_name].append(location)
|
|
94
|
+
env_patterns["os.environ.get"].append(location)
|
|
95
|
+
|
|
96
|
+
# os.environ[]
|
|
97
|
+
for match in environ_bracket_pattern.finditer(line):
|
|
98
|
+
var_name = match.group(1)
|
|
99
|
+
if var_name not in env_vars:
|
|
100
|
+
env_vars[var_name] = []
|
|
101
|
+
env_vars[var_name].append(location)
|
|
102
|
+
env_patterns["os.environ[]"].append(location)
|
|
103
|
+
|
|
104
|
+
except (UnicodeDecodeError, OSError):
|
|
105
|
+
continue
|
|
106
|
+
|
|
107
|
+
# Calculate statistics
|
|
108
|
+
total_vars = len(env_vars)
|
|
109
|
+
total_accesses = sum(len(locations) for locations in env_vars.values())
|
|
110
|
+
|
|
111
|
+
# Generate report
|
|
112
|
+
lines = [
|
|
113
|
+
"=" * 80,
|
|
114
|
+
"ENVIRONMENT VARIABLE USAGE ANALYSIS",
|
|
115
|
+
"=" * 80,
|
|
116
|
+
"",
|
|
117
|
+
f"Total Python files analyzed: {analyzed_files}",
|
|
118
|
+
f"Unique environment variables: {total_vars}",
|
|
119
|
+
f"Total environment variable accesses: {total_accesses}",
|
|
120
|
+
"",
|
|
121
|
+
]
|
|
122
|
+
|
|
123
|
+
# Usage patterns
|
|
124
|
+
lines.extend(
|
|
125
|
+
[
|
|
126
|
+
"=" * 80,
|
|
127
|
+
"USAGE PATTERNS",
|
|
128
|
+
"=" * 80,
|
|
129
|
+
"",
|
|
130
|
+
]
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
for pattern, locations in env_patterns.items():
|
|
134
|
+
lines.append(f"{pattern:20} {len(locations):5} occurrence(s)")
|
|
135
|
+
lines.append("")
|
|
136
|
+
|
|
137
|
+
# Dotenv detection
|
|
138
|
+
if env_patterns["dotenv"]:
|
|
139
|
+
lines.extend(
|
|
140
|
+
[
|
|
141
|
+
"=" * 80,
|
|
142
|
+
"DOTENV USAGE DETECTED",
|
|
143
|
+
"=" * 80,
|
|
144
|
+
"",
|
|
145
|
+
]
|
|
146
|
+
)
|
|
147
|
+
for file in env_patterns["dotenv"]:
|
|
148
|
+
lines.append(f" - {file}")
|
|
149
|
+
lines.append("")
|
|
150
|
+
|
|
151
|
+
# Environment variables
|
|
152
|
+
if env_vars:
|
|
153
|
+
lines.extend(
|
|
154
|
+
[
|
|
155
|
+
"=" * 80,
|
|
156
|
+
"ENVIRONMENT VARIABLES (sorted by frequency)",
|
|
157
|
+
"=" * 80,
|
|
158
|
+
"",
|
|
159
|
+
]
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
sorted_vars = sorted(env_vars.items(), key=lambda x: len(x[1]), reverse=True)
|
|
163
|
+
for var_name, locations in sorted_vars:
|
|
164
|
+
lines.append(f"{var_name}: {len(locations)} access(es)")
|
|
165
|
+
for loc in locations[:3]: # Show first 3 locations
|
|
166
|
+
lines.append(f" - {loc}")
|
|
167
|
+
if len(locations) > 3:
|
|
168
|
+
lines.append(f" ... and {len(locations) - 3} more")
|
|
169
|
+
lines.append("")
|
|
170
|
+
|
|
171
|
+
# Check for .env file
|
|
172
|
+
env_file = root / ".env"
|
|
173
|
+
env_example = root / ".env.example"
|
|
174
|
+
|
|
175
|
+
lines.extend(
|
|
176
|
+
[
|
|
177
|
+
"=" * 80,
|
|
178
|
+
"ENVIRONMENT FILES",
|
|
179
|
+
"=" * 80,
|
|
180
|
+
"",
|
|
181
|
+
]
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
if env_file.exists():
|
|
185
|
+
lines.append(" ✓ .env file found")
|
|
186
|
+
else:
|
|
187
|
+
lines.append(" ✗ .env file not found")
|
|
188
|
+
|
|
189
|
+
if env_example.exists():
|
|
190
|
+
lines.append(" ✓ .env.example file found")
|
|
191
|
+
# Parse .env.example for documented variables
|
|
192
|
+
try:
|
|
193
|
+
example_content = env_example.read_text(encoding="utf-8")
|
|
194
|
+
example_vars = set()
|
|
195
|
+
for line_content in example_content.split("\n"):
|
|
196
|
+
line_content = line_content.strip()
|
|
197
|
+
if line_content and not line_content.startswith("#"):
|
|
198
|
+
if "=" in line_content:
|
|
199
|
+
var = line_content.split("=")[0].strip()
|
|
200
|
+
example_vars.add(var)
|
|
201
|
+
|
|
202
|
+
if example_vars:
|
|
203
|
+
lines.append(f" - Documents {len(example_vars)} variable(s)")
|
|
204
|
+
|
|
205
|
+
# Check for undocumented variables
|
|
206
|
+
undocumented = set(env_vars.keys()) - example_vars
|
|
207
|
+
if undocumented:
|
|
208
|
+
lines.append(f" - {len(undocumented)} variable(s) used but not documented:")
|
|
209
|
+
for var in sorted(undocumented)[:10]:
|
|
210
|
+
lines.append(f" {var}")
|
|
211
|
+
if len(undocumented) > 10:
|
|
212
|
+
lines.append(f" ... and {len(undocumented) - 10} more")
|
|
213
|
+
except (OSError, UnicodeDecodeError):
|
|
214
|
+
pass
|
|
215
|
+
else:
|
|
216
|
+
lines.append(" ✗ .env.example file not found")
|
|
217
|
+
|
|
218
|
+
lines.append("")
|
|
219
|
+
|
|
220
|
+
# Recommendations
|
|
221
|
+
lines.extend(
|
|
222
|
+
[
|
|
223
|
+
"=" * 80,
|
|
224
|
+
"RECOMMENDATIONS",
|
|
225
|
+
"=" * 80,
|
|
226
|
+
"",
|
|
227
|
+
]
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
if total_vars == 0:
|
|
231
|
+
lines.append(" - No environment variables detected")
|
|
232
|
+
else:
|
|
233
|
+
if not env_example.exists():
|
|
234
|
+
lines.append(" - Create .env.example to document required environment variables")
|
|
235
|
+
|
|
236
|
+
# Check for hardcoded defaults
|
|
237
|
+
if len(env_patterns["os.environ[]"]) > 0:
|
|
238
|
+
lines.append(" - Consider using os.getenv() with defaults instead of os.environ[]")
|
|
239
|
+
lines.append(" to avoid KeyError exceptions")
|
|
240
|
+
|
|
241
|
+
if not env_patterns["dotenv"]:
|
|
242
|
+
lines.append(" - Consider using python-dotenv to load .env files")
|
|
243
|
+
|
|
244
|
+
lines.append("")
|
|
245
|
+
|
|
246
|
+
# Write report
|
|
247
|
+
output = "\n".join(lines)
|
|
248
|
+
dest = ctx.workdir / "meta" / "104_env_var_usage.txt"
|
|
249
|
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
250
|
+
dest.write_text(output, encoding="utf-8")
|
|
251
|
+
|
|
252
|
+
elapsed = int(time.time() - start)
|
|
253
|
+
return StepResult(self.name, "OK", elapsed, "")
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
import time
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from .base import StepResult
|
|
9
|
+
from ..context import BundleContext
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
DEFAULT_EXCLUDE_PREFIXES = (
|
|
13
|
+
".git/",
|
|
14
|
+
".venv/",
|
|
15
|
+
".mypy_cache/",
|
|
16
|
+
".ruff_cache/",
|
|
17
|
+
".pytest_cache/",
|
|
18
|
+
"__pycache__/",
|
|
19
|
+
"node_modules/",
|
|
20
|
+
"dist/",
|
|
21
|
+
"build/",
|
|
22
|
+
"artifacts/",
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
# Patterns based on your bash sed rules:
|
|
26
|
+
# 1) tool-style: path:line(:col)...
|
|
27
|
+
_RE_COLON_LINE = re.compile(r"^([A-Za-z0-9_.\/-]+\.[A-Za-z0-9]+):\d+(?::\d+)?\b.*$")
|
|
28
|
+
|
|
29
|
+
# 2) pytest traceback: File "path", line N
|
|
30
|
+
_RE_PYTEST_FILE = re.compile(r'^\s*File "([^"]+)", line \d+\b.*$')
|
|
31
|
+
|
|
32
|
+
# 3) mypy: (optional "mypy:") ./path:line: (error|note|warning):
|
|
33
|
+
_RE_MYPY_LINE = re.compile(
|
|
34
|
+
r"^(?:mypy:\s*)?(?:\./)?([A-Za-z0-9_.\/-]+\.[A-Za-z0-9]+):\d+:\s*(?:error|note|warning):.*$"
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# 4) mypy rare: path: (error|note|warning): ...
|
|
38
|
+
_RE_MYPY_NOLINE = re.compile(
|
|
39
|
+
r"^(?:mypy:\s*)?(?:\./)?([A-Za-z0-9_.\/-]+\.[A-Za-z0-9]+):\s*(?:error|note|warning):.*$"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _normalize_to_repo_rel(root: Path, p: str) -> str | None:
|
|
44
|
+
p = p.strip()
|
|
45
|
+
if not p:
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
# remove leading ./ for consistency
|
|
49
|
+
if p.startswith("./"):
|
|
50
|
+
p = p[2:]
|
|
51
|
+
|
|
52
|
+
# absolute path -> must be under repo root
|
|
53
|
+
if p.startswith("/"):
|
|
54
|
+
try:
|
|
55
|
+
rp = Path(p).resolve()
|
|
56
|
+
rr = rp.relative_to(root.resolve())
|
|
57
|
+
return str(rr).replace("\\", "/")
|
|
58
|
+
except Exception:
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
# relative path
|
|
62
|
+
return p.replace("\\", "/")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _is_allowed_repo_file(root: Path, rel: str) -> bool:
|
|
66
|
+
rel = rel.lstrip("./")
|
|
67
|
+
if not rel or rel.endswith("/"):
|
|
68
|
+
return False
|
|
69
|
+
|
|
70
|
+
# exclude common junk
|
|
71
|
+
for pref in DEFAULT_EXCLUDE_PREFIXES:
|
|
72
|
+
if rel.startswith(pref):
|
|
73
|
+
return False
|
|
74
|
+
if "/__pycache__/" in f"/{rel}/":
|
|
75
|
+
return False
|
|
76
|
+
|
|
77
|
+
# must exist and be a file inside repo
|
|
78
|
+
fp = (root / rel).resolve()
|
|
79
|
+
try:
|
|
80
|
+
fp.relative_to(root.resolve())
|
|
81
|
+
except Exception:
|
|
82
|
+
return False
|
|
83
|
+
|
|
84
|
+
return fp.is_file()
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _extract_paths_from_text(text: str) -> list[str]:
|
|
88
|
+
out: list[str] = []
|
|
89
|
+
for line in text.splitlines():
|
|
90
|
+
m = _RE_COLON_LINE.match(line)
|
|
91
|
+
if m:
|
|
92
|
+
out.append(m.group(1))
|
|
93
|
+
continue
|
|
94
|
+
|
|
95
|
+
m = _RE_PYTEST_FILE.match(line)
|
|
96
|
+
if m:
|
|
97
|
+
out.append(m.group(1))
|
|
98
|
+
continue
|
|
99
|
+
|
|
100
|
+
m = _RE_MYPY_LINE.match(line)
|
|
101
|
+
if m:
|
|
102
|
+
out.append(m.group(1))
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
m = _RE_MYPY_NOLINE.match(line)
|
|
106
|
+
if m:
|
|
107
|
+
out.append(m.group(1))
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
return out
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@dataclass
|
|
114
|
+
class ErrorReferencedFilesStep:
|
|
115
|
+
name: str = "collect error-referenced files"
|
|
116
|
+
max_files: int = 250
|
|
117
|
+
# Paths are relative to the bundle workdir
|
|
118
|
+
log_files: list[str] | None = None
|
|
119
|
+
|
|
120
|
+
def run(self, ctx: BundleContext) -> StepResult:
|
|
121
|
+
start = time.time()
|
|
122
|
+
|
|
123
|
+
# Default set aligned to our step numbers
|
|
124
|
+
log_files = self.log_files or [
|
|
125
|
+
"logs/31_ruff_check.txt",
|
|
126
|
+
"logs/32_ruff_format_check.txt",
|
|
127
|
+
"logs/33_mypy.txt",
|
|
128
|
+
"logs/34_pytest_q.txt",
|
|
129
|
+
]
|
|
130
|
+
|
|
131
|
+
out_list = ctx.workdir / "error_files_from_logs.txt"
|
|
132
|
+
out_count = ctx.workdir / "error_refs_count.txt"
|
|
133
|
+
report = ctx.metadir / "60_error_refs_report.txt"
|
|
134
|
+
|
|
135
|
+
dest_root = ctx.srcdir / "_error_refs"
|
|
136
|
+
dest_root.mkdir(parents=True, exist_ok=True)
|
|
137
|
+
|
|
138
|
+
# Collect candidate paths
|
|
139
|
+
candidates: set[str] = set()
|
|
140
|
+
scanned = 0
|
|
141
|
+
missing_logs = 0
|
|
142
|
+
|
|
143
|
+
for lf in log_files:
|
|
144
|
+
lp = ctx.workdir / lf
|
|
145
|
+
if not lp.is_file():
|
|
146
|
+
missing_logs += 1
|
|
147
|
+
continue
|
|
148
|
+
scanned += 1
|
|
149
|
+
try:
|
|
150
|
+
txt = lp.read_text(encoding="utf-8", errors="replace")
|
|
151
|
+
except Exception:
|
|
152
|
+
continue
|
|
153
|
+
|
|
154
|
+
for raw in _extract_paths_from_text(txt):
|
|
155
|
+
norm = _normalize_to_repo_rel(ctx.root, raw)
|
|
156
|
+
if norm:
|
|
157
|
+
candidates.add(norm)
|
|
158
|
+
|
|
159
|
+
# Normalize / filter to real repo files
|
|
160
|
+
allowed = sorted([p for p in candidates if _is_allowed_repo_file(ctx.root, p)])
|
|
161
|
+
|
|
162
|
+
# Write list file (even if empty)
|
|
163
|
+
out_list.write_text(
|
|
164
|
+
"\n".join(allowed) + ("\n" if allowed else ""), encoding="utf-8"
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# Copy up to max_files
|
|
168
|
+
copied = 0
|
|
169
|
+
for rel in allowed:
|
|
170
|
+
if copied >= self.max_files:
|
|
171
|
+
break
|
|
172
|
+
src = ctx.root / rel
|
|
173
|
+
dst = dest_root / rel
|
|
174
|
+
dst.parent.mkdir(parents=True, exist_ok=True)
|
|
175
|
+
try:
|
|
176
|
+
# preserve mode/timestamps
|
|
177
|
+
dst.write_bytes(src.read_bytes())
|
|
178
|
+
copied += 1
|
|
179
|
+
except Exception:
|
|
180
|
+
continue
|
|
181
|
+
|
|
182
|
+
out_count.write_text(f"{copied}\n", encoding="utf-8")
|
|
183
|
+
|
|
184
|
+
report.write_text(
|
|
185
|
+
"\n".join(
|
|
186
|
+
[
|
|
187
|
+
f"scanned_logs={scanned}",
|
|
188
|
+
f"missing_logs={missing_logs}",
|
|
189
|
+
f"candidates_total={len(candidates)}",
|
|
190
|
+
f"allowed_repo_files={len(allowed)}",
|
|
191
|
+
f"copied={copied}",
|
|
192
|
+
f"max_files={self.max_files}",
|
|
193
|
+
"dest=src/_error_refs",
|
|
194
|
+
]
|
|
195
|
+
)
|
|
196
|
+
+ "\n",
|
|
197
|
+
encoding="utf-8",
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
dur = int(time.time() - start)
|
|
201
|
+
note = f"allowed={len(allowed)} copied={copied}"
|
|
202
|
+
if copied >= self.max_files:
|
|
203
|
+
note += " (HIT MAX)"
|
|
204
|
+
return StepResult(self.name, "PASS", dur, note)
|