gwc-pybundle 2.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwc-pybundle might be problematic. Click here for more details.
- gwc_pybundle-2.1.2.dist-info/METADATA +903 -0
- gwc_pybundle-2.1.2.dist-info/RECORD +82 -0
- gwc_pybundle-2.1.2.dist-info/WHEEL +5 -0
- gwc_pybundle-2.1.2.dist-info/entry_points.txt +2 -0
- gwc_pybundle-2.1.2.dist-info/licenses/LICENSE.md +25 -0
- gwc_pybundle-2.1.2.dist-info/top_level.txt +1 -0
- pybundle/__init__.py +0 -0
- pybundle/__main__.py +4 -0
- pybundle/cli.py +546 -0
- pybundle/context.py +404 -0
- pybundle/doctor.py +148 -0
- pybundle/filters.py +228 -0
- pybundle/manifest.py +77 -0
- pybundle/packaging.py +45 -0
- pybundle/policy.py +132 -0
- pybundle/profiles.py +454 -0
- pybundle/roadmap_model.py +42 -0
- pybundle/roadmap_scan.py +328 -0
- pybundle/root_detect.py +14 -0
- pybundle/runner.py +180 -0
- pybundle/steps/__init__.py +26 -0
- pybundle/steps/ai_context.py +791 -0
- pybundle/steps/api_docs.py +219 -0
- pybundle/steps/asyncio_analysis.py +358 -0
- pybundle/steps/bandit.py +72 -0
- pybundle/steps/base.py +20 -0
- pybundle/steps/blocking_call_detection.py +291 -0
- pybundle/steps/call_graph.py +219 -0
- pybundle/steps/compileall.py +76 -0
- pybundle/steps/config_docs.py +319 -0
- pybundle/steps/config_validation.py +302 -0
- pybundle/steps/container_image.py +294 -0
- pybundle/steps/context_expand.py +272 -0
- pybundle/steps/copy_pack.py +293 -0
- pybundle/steps/coverage.py +101 -0
- pybundle/steps/cprofile_step.py +166 -0
- pybundle/steps/dependency_sizes.py +136 -0
- pybundle/steps/django_checks.py +214 -0
- pybundle/steps/dockerfile_lint.py +282 -0
- pybundle/steps/dockerignore.py +311 -0
- pybundle/steps/duplication.py +103 -0
- pybundle/steps/env_completeness.py +269 -0
- pybundle/steps/env_var_usage.py +253 -0
- pybundle/steps/error_refs.py +204 -0
- pybundle/steps/event_loop_patterns.py +280 -0
- pybundle/steps/exception_patterns.py +190 -0
- pybundle/steps/fastapi_integration.py +250 -0
- pybundle/steps/flask_debugging.py +312 -0
- pybundle/steps/git_analytics.py +315 -0
- pybundle/steps/handoff_md.py +176 -0
- pybundle/steps/import_time.py +175 -0
- pybundle/steps/interrogate.py +106 -0
- pybundle/steps/license_scan.py +96 -0
- pybundle/steps/line_profiler.py +117 -0
- pybundle/steps/link_validation.py +287 -0
- pybundle/steps/logging_analysis.py +233 -0
- pybundle/steps/memory_profile.py +176 -0
- pybundle/steps/migration_history.py +336 -0
- pybundle/steps/mutation_testing.py +141 -0
- pybundle/steps/mypy.py +103 -0
- pybundle/steps/orm_optimization.py +316 -0
- pybundle/steps/pip_audit.py +45 -0
- pybundle/steps/pipdeptree.py +62 -0
- pybundle/steps/pylance.py +562 -0
- pybundle/steps/pytest.py +66 -0
- pybundle/steps/query_pattern_analysis.py +334 -0
- pybundle/steps/radon.py +161 -0
- pybundle/steps/repro_md.py +161 -0
- pybundle/steps/rg_scans.py +78 -0
- pybundle/steps/roadmap.py +153 -0
- pybundle/steps/ruff.py +117 -0
- pybundle/steps/secrets_detection.py +235 -0
- pybundle/steps/security_headers.py +309 -0
- pybundle/steps/shell.py +74 -0
- pybundle/steps/slow_tests.py +178 -0
- pybundle/steps/sqlalchemy_validation.py +269 -0
- pybundle/steps/test_flakiness.py +184 -0
- pybundle/steps/tree.py +116 -0
- pybundle/steps/type_coverage.py +277 -0
- pybundle/steps/unused_deps.py +211 -0
- pybundle/steps/vulture.py +167 -0
- pybundle/tools.py +63 -0
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import subprocess # nosec B404 - Required for tool execution, paths validated
|
|
4
|
+
import time
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from .base import StepResult
|
|
9
|
+
from ..context import BundleContext
|
|
10
|
+
from ..tools import which
|
|
11
|
+
from ..filters import should_exclude_from_analysis
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _repo_has_py_files(root: Path) -> bool:
|
|
15
|
+
"""Fast check if there are Python files to scan."""
|
|
16
|
+
for p in root.rglob("*.py"):
|
|
17
|
+
parts = set(p.parts)
|
|
18
|
+
if (
|
|
19
|
+
".venv" not in parts
|
|
20
|
+
and "__pycache__" not in parts
|
|
21
|
+
and "node_modules" not in parts
|
|
22
|
+
and "dist" not in parts
|
|
23
|
+
and "build" not in parts
|
|
24
|
+
and "artifacts" not in parts
|
|
25
|
+
):
|
|
26
|
+
return True
|
|
27
|
+
return False
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class InterrogateStep:
|
|
32
|
+
name: str = "interrogate"
|
|
33
|
+
target: str = "."
|
|
34
|
+
outfile: str = "logs/52_docstring_coverage.txt"
|
|
35
|
+
|
|
36
|
+
def run(self, ctx: BundleContext) -> StepResult:
|
|
37
|
+
start = time.time()
|
|
38
|
+
out = ctx.workdir / self.outfile
|
|
39
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
40
|
+
|
|
41
|
+
interrogate = which("interrogate")
|
|
42
|
+
if not interrogate:
|
|
43
|
+
out.write_text(
|
|
44
|
+
"interrogate not found; skipping (pip install interrogate)\n",
|
|
45
|
+
encoding="utf-8",
|
|
46
|
+
)
|
|
47
|
+
return StepResult(self.name, "SKIP", 0, "missing interrogate")
|
|
48
|
+
|
|
49
|
+
if not _repo_has_py_files(ctx.root):
|
|
50
|
+
out.write_text(
|
|
51
|
+
"no .py files detected; skipping interrogate\n", encoding="utf-8"
|
|
52
|
+
)
|
|
53
|
+
return StepResult(self.name, "SKIP", 0, "no python files")
|
|
54
|
+
|
|
55
|
+
target_path = ctx.root / self.target
|
|
56
|
+
|
|
57
|
+
# Build comprehensive exclusion list for interrogate
|
|
58
|
+
# Exclude ALL venvs, caches, and dependency directories
|
|
59
|
+
exclude_patterns = [
|
|
60
|
+
"*venv*", # Catches .venv, venv, .freeze-venv, .gaslog-venv, etc.
|
|
61
|
+
"*site-packages*",
|
|
62
|
+
"__pycache__",
|
|
63
|
+
".pytest_cache",
|
|
64
|
+
".mypy_cache",
|
|
65
|
+
".ruff_cache",
|
|
66
|
+
".tox",
|
|
67
|
+
".nox",
|
|
68
|
+
"artifacts",
|
|
69
|
+
"build",
|
|
70
|
+
"dist",
|
|
71
|
+
".git",
|
|
72
|
+
"node_modules",
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
cmd = [
|
|
76
|
+
interrogate,
|
|
77
|
+
str(target_path),
|
|
78
|
+
"-v", # Verbose output
|
|
79
|
+
"--fail-under",
|
|
80
|
+
"0", # Don't fail the step based on coverage percentage
|
|
81
|
+
"--color",
|
|
82
|
+
"--exclude",
|
|
83
|
+
",".join(exclude_patterns),
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
result = subprocess.run( # nosec B603 - Using full path from which()
|
|
88
|
+
cmd,
|
|
89
|
+
cwd=ctx.root,
|
|
90
|
+
stdout=subprocess.PIPE,
|
|
91
|
+
stderr=subprocess.STDOUT,
|
|
92
|
+
text=True,
|
|
93
|
+
timeout=120,
|
|
94
|
+
)
|
|
95
|
+
out.write_text(result.stdout, encoding="utf-8")
|
|
96
|
+
elapsed = int((time.time() - start) * 1000)
|
|
97
|
+
|
|
98
|
+
# interrogate returns 0 even with missing docstrings when --fail-under=0
|
|
99
|
+
# We consider any execution a success
|
|
100
|
+
return StepResult(self.name, "OK", elapsed, "")
|
|
101
|
+
except subprocess.TimeoutExpired:
|
|
102
|
+
out.write_text("interrogate timed out after 120s\n", encoding="utf-8")
|
|
103
|
+
return StepResult(self.name, "FAIL", 120000, "timeout")
|
|
104
|
+
except Exception as e:
|
|
105
|
+
out.write_text(f"interrogate error: {e}\n", encoding="utf-8")
|
|
106
|
+
return StepResult(self.name, "FAIL", 0, str(e))
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import subprocess # nosec B404 - Required for tool execution, paths validated
|
|
4
|
+
import time
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
from .base import StepResult
|
|
8
|
+
from ..context import BundleContext
|
|
9
|
+
from ..tools import which
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class LicenseScanStep:
|
|
14
|
+
name: str = "license scan"
|
|
15
|
+
outfile: str = "meta/32_licenses.txt"
|
|
16
|
+
|
|
17
|
+
def run(self, ctx: BundleContext) -> StepResult:
|
|
18
|
+
start = time.time()
|
|
19
|
+
out = ctx.workdir / self.outfile
|
|
20
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
21
|
+
|
|
22
|
+
pip_licenses = which("pip-licenses")
|
|
23
|
+
if not pip_licenses:
|
|
24
|
+
out.write_text(
|
|
25
|
+
"pip-licenses not found; skipping (pip install pip-licenses)\n",
|
|
26
|
+
encoding="utf-8",
|
|
27
|
+
)
|
|
28
|
+
return StepResult(self.name, "SKIP", 0, "missing pip-licenses")
|
|
29
|
+
|
|
30
|
+
# Run pip-licenses with detailed output
|
|
31
|
+
cmd = [
|
|
32
|
+
pip_licenses,
|
|
33
|
+
"--format=markdown", # Markdown table format
|
|
34
|
+
"--with-urls", # Include project URLs
|
|
35
|
+
"--with-description", # Include package descriptions
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
result = subprocess.run( # nosec B603 - Using full path from which()
|
|
40
|
+
cmd,
|
|
41
|
+
cwd=ctx.root,
|
|
42
|
+
stdout=subprocess.PIPE,
|
|
43
|
+
stderr=subprocess.PIPE,
|
|
44
|
+
text=True,
|
|
45
|
+
timeout=60,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
output = result.stdout
|
|
49
|
+
|
|
50
|
+
# Add license compatibility warnings
|
|
51
|
+
warnings = self._check_license_compatibility(output)
|
|
52
|
+
if warnings:
|
|
53
|
+
output += "\n\n" + "=" * 70 + "\n"
|
|
54
|
+
output += "LICENSE COMPATIBILITY WARNINGS\n"
|
|
55
|
+
output += "=" * 70 + "\n\n"
|
|
56
|
+
output += "\n".join(warnings)
|
|
57
|
+
|
|
58
|
+
out.write_text(output, encoding="utf-8")
|
|
59
|
+
elapsed = int((time.time() - start) * 1000)
|
|
60
|
+
|
|
61
|
+
return StepResult(self.name, "OK", elapsed, "")
|
|
62
|
+
except subprocess.TimeoutExpired:
|
|
63
|
+
out.write_text("pip-licenses timed out after 60s\n", encoding="utf-8")
|
|
64
|
+
return StepResult(self.name, "FAIL", 60000, "timeout")
|
|
65
|
+
except Exception as e:
|
|
66
|
+
out.write_text(f"pip-licenses error: {e}\n", encoding="utf-8")
|
|
67
|
+
return StepResult(self.name, "FAIL", 0, str(e))
|
|
68
|
+
|
|
69
|
+
def _check_license_compatibility(self, output: str) -> list[str]:
|
|
70
|
+
"""Check for common license compatibility issues."""
|
|
71
|
+
warnings = []
|
|
72
|
+
|
|
73
|
+
# Simple heuristic: look for GPL + permissive license mixing
|
|
74
|
+
has_gpl = any(gpl in output for gpl in ["GPL", "AGPL", "LGPL"])
|
|
75
|
+
has_mit = "MIT" in output
|
|
76
|
+
has_apache = "Apache" in output
|
|
77
|
+
has_bsd = "BSD" in output
|
|
78
|
+
|
|
79
|
+
if has_gpl and (has_mit or has_apache or has_bsd):
|
|
80
|
+
warnings.append(
|
|
81
|
+
"⚠️ Potential GPL compatibility issue detected:\n"
|
|
82
|
+
" - GPL/LGPL/AGPL licenses found alongside permissive licenses (MIT/Apache/BSD)\n"
|
|
83
|
+
" - Review GPL obligations if redistributing\n"
|
|
84
|
+
" - LGPL is generally compatible with permissive licenses\n"
|
|
85
|
+
" - Consult legal counsel for production use"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Check for proprietary or unknown licenses
|
|
89
|
+
if "UNKNOWN" in output:
|
|
90
|
+
warnings.append(
|
|
91
|
+
"⚠️ Packages with UNKNOWN licenses detected:\n"
|
|
92
|
+
" - Review manually before distribution\n"
|
|
93
|
+
" - May indicate missing license metadata"
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
return warnings
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Line-by-line profiling with line_profiler - Milestone 3 (v1.4.0)
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import subprocess
|
|
8
|
+
import time
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from .base import StepResult
|
|
13
|
+
from ..context import BundleContext
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class LineProfilerStep:
|
|
18
|
+
"""
|
|
19
|
+
Line-by-line profiling using line_profiler (optional, requires manual annotation).
|
|
20
|
+
|
|
21
|
+
This step is disabled by default and requires:
|
|
22
|
+
1. line_profiler installed
|
|
23
|
+
2. Functions decorated with @profile or listed in config
|
|
24
|
+
|
|
25
|
+
Outputs:
|
|
26
|
+
- logs/63_line_profile.txt: Line-by-line execution times
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
name: str = "line_profiler"
|
|
30
|
+
|
|
31
|
+
def run(self, ctx: BundleContext) -> StepResult:
|
|
32
|
+
start = time.time()
|
|
33
|
+
|
|
34
|
+
# Only run if explicitly enabled
|
|
35
|
+
if ctx.options.no_profile or not ctx.options.enable_line_profiler:
|
|
36
|
+
return StepResult(self.name, "SKIP", 0, "line profiler not enabled")
|
|
37
|
+
|
|
38
|
+
# Check if line_profiler is installed
|
|
39
|
+
if not ctx.tools.line_profiler:
|
|
40
|
+
return StepResult(self.name, "SKIP", 0, "line_profiler not installed")
|
|
41
|
+
|
|
42
|
+
# Check if entry point exists
|
|
43
|
+
entry_point = ctx.options.profile_entry_point
|
|
44
|
+
if not entry_point:
|
|
45
|
+
return StepResult(self.name, "SKIP", 0, "no entry point specified")
|
|
46
|
+
|
|
47
|
+
target_path = Path(entry_point)
|
|
48
|
+
if not target_path.is_absolute():
|
|
49
|
+
target_path = ctx.root / entry_point
|
|
50
|
+
|
|
51
|
+
if not target_path.exists():
|
|
52
|
+
return StepResult(
|
|
53
|
+
self.name, "SKIP", 0, f"entry point not found: {entry_point}"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
ctx.emit(f" Running line profiler on {target_path.name}")
|
|
57
|
+
ctx.emit(" Note: Functions must be decorated with @profile")
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
# Run line_profiler via kernprof
|
|
61
|
+
result = subprocess.run(
|
|
62
|
+
[
|
|
63
|
+
str(ctx.tools.line_profiler),
|
|
64
|
+
"-l", # Line-by-line
|
|
65
|
+
"-v", # Verbose output
|
|
66
|
+
str(target_path),
|
|
67
|
+
],
|
|
68
|
+
cwd=ctx.root,
|
|
69
|
+
capture_output=True,
|
|
70
|
+
text=True,
|
|
71
|
+
timeout=300, # 5 minute timeout
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# Write output
|
|
75
|
+
output_file = ctx.workdir / "logs" / "63_line_profile.txt"
|
|
76
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
77
|
+
|
|
78
|
+
with output_file.open("w") as f:
|
|
79
|
+
f.write("=" * 70 + "\n")
|
|
80
|
+
f.write("LINE-BY-LINE PROFILING (line_profiler)\n")
|
|
81
|
+
f.write("=" * 70 + "\n\n")
|
|
82
|
+
|
|
83
|
+
if result.returncode == 0:
|
|
84
|
+
f.write(result.stdout)
|
|
85
|
+
if result.stderr:
|
|
86
|
+
f.write("\n\nWarnings/Errors:\n")
|
|
87
|
+
f.write(result.stderr)
|
|
88
|
+
else:
|
|
89
|
+
f.write(
|
|
90
|
+
"Line profiling failed or no functions decorated with @profile\n\n"
|
|
91
|
+
)
|
|
92
|
+
f.write("To use line_profiler:\n")
|
|
93
|
+
f.write("1. Install: pip install line_profiler\n")
|
|
94
|
+
f.write("2. Decorate functions with @profile\n")
|
|
95
|
+
f.write(
|
|
96
|
+
"3. Specify entry point: --profile-entry-point path/to/script.py\n"
|
|
97
|
+
)
|
|
98
|
+
f.write("4. Enable: --enable-line-profiler\n\n")
|
|
99
|
+
f.write("STDOUT:\n")
|
|
100
|
+
f.write(result.stdout)
|
|
101
|
+
f.write("\n\nSTDERR:\n")
|
|
102
|
+
f.write(result.stderr)
|
|
103
|
+
|
|
104
|
+
elapsed = int((time.time() - start) * 1000)
|
|
105
|
+
if result.returncode == 0:
|
|
106
|
+
return StepResult(self.name, "OK", elapsed)
|
|
107
|
+
else:
|
|
108
|
+
return StepResult(
|
|
109
|
+
self.name, "FAIL", elapsed, f"exit {result.returncode}"
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
except subprocess.TimeoutExpired:
|
|
113
|
+
elapsed = int((time.time() - start) * 1000)
|
|
114
|
+
return StepResult(self.name, "FAIL", elapsed, "timeout")
|
|
115
|
+
except Exception as e:
|
|
116
|
+
elapsed = int((time.time() - start) * 1000)
|
|
117
|
+
return StepResult(self.name, "FAIL", elapsed, str(e))
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
"""README and documentation link validation step.
|
|
2
|
+
|
|
3
|
+
Checks all markdown files for broken links (HTTP/HTTPS URLs).
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import re
|
|
7
|
+
import time
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Dict, List, Tuple
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
|
|
12
|
+
from .base import StepResult
|
|
13
|
+
from ..context import BundleContext
|
|
14
|
+
from ..filters import should_exclude_from_analysis
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class LinkValidationStep:
|
|
19
|
+
"""Step that validates links in markdown files."""
|
|
20
|
+
|
|
21
|
+
name: str = "link-validation"
|
|
22
|
+
outfile: str = "logs/81_link_validation.txt"
|
|
23
|
+
|
|
24
|
+
def run(self, context: BundleContext) -> StepResult:
|
|
25
|
+
"""Validate links in markdown documentation."""
|
|
26
|
+
start = time.time()
|
|
27
|
+
|
|
28
|
+
# Find all markdown files
|
|
29
|
+
md_files = self._find_markdown_files(context.root)
|
|
30
|
+
|
|
31
|
+
if not md_files:
|
|
32
|
+
elapsed = time.time() - start
|
|
33
|
+
note = "No markdown files found"
|
|
34
|
+
return StepResult(self.name, "SKIP", int(elapsed), note)
|
|
35
|
+
|
|
36
|
+
# Extract all links from markdown files
|
|
37
|
+
all_links = self._extract_links(md_files, context.root)
|
|
38
|
+
|
|
39
|
+
if not all_links:
|
|
40
|
+
elapsed = time.time() - start
|
|
41
|
+
note = "No HTTP(S) links found"
|
|
42
|
+
return StepResult(self.name, "SKIP", int(elapsed), note)
|
|
43
|
+
|
|
44
|
+
# Check links
|
|
45
|
+
results = self._check_links(all_links)
|
|
46
|
+
|
|
47
|
+
elapsed = time.time() - start
|
|
48
|
+
|
|
49
|
+
# Write report
|
|
50
|
+
log_path = context.workdir / self.outfile
|
|
51
|
+
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
52
|
+
with open(log_path, "w") as f:
|
|
53
|
+
f.write("=" * 80 + "\n")
|
|
54
|
+
f.write("LINK VALIDATION REPORT\n")
|
|
55
|
+
f.write("=" * 80 + "\n\n")
|
|
56
|
+
|
|
57
|
+
# Summary
|
|
58
|
+
total_links = len(results)
|
|
59
|
+
broken_links = sum(1 for status, _ in results.values() if status != "OK")
|
|
60
|
+
success_rate = (
|
|
61
|
+
((total_links - broken_links) / total_links * 100)
|
|
62
|
+
if total_links > 0
|
|
63
|
+
else 100
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
f.write("Summary:\n")
|
|
67
|
+
f.write("-" * 80 + "\n")
|
|
68
|
+
f.write(f"Total URLs checked: {total_links}\n")
|
|
69
|
+
f.write(f"Broken links: {broken_links}\n")
|
|
70
|
+
f.write(f"Success rate: {success_rate:.1f}%\n")
|
|
71
|
+
f.write("\n")
|
|
72
|
+
|
|
73
|
+
# Group results by file
|
|
74
|
+
links_by_file: Dict[str, List[Tuple[str, str, str]]] = {}
|
|
75
|
+
for (filepath, url), (status, message) in results.items():
|
|
76
|
+
if filepath not in links_by_file:
|
|
77
|
+
links_by_file[filepath] = []
|
|
78
|
+
links_by_file[filepath].append((url, status, message))
|
|
79
|
+
|
|
80
|
+
# Write results
|
|
81
|
+
for filepath in sorted(links_by_file.keys()):
|
|
82
|
+
f.write(f"\n{filepath}:\n")
|
|
83
|
+
f.write("-" * 80 + "\n")
|
|
84
|
+
|
|
85
|
+
for url, status, message in links_by_file[filepath]:
|
|
86
|
+
status_icon = "✓" if status == "OK" else "✗"
|
|
87
|
+
f.write(f"{status_icon} [{status:6s}] {url}\n")
|
|
88
|
+
if message and status != "OK":
|
|
89
|
+
f.write(f" {message}\n")
|
|
90
|
+
|
|
91
|
+
f.write("\n" + "=" * 80 + "\n")
|
|
92
|
+
f.write(f"Validation complete - {len(md_files)} markdown files scanned\n")
|
|
93
|
+
f.write("=" * 80 + "\n")
|
|
94
|
+
|
|
95
|
+
# Determine overall status (WARN not FAIL - broken links shouldn't block shipping)
|
|
96
|
+
if broken_links == 0:
|
|
97
|
+
status = "OK"
|
|
98
|
+
note = f"All {total_links} links valid"
|
|
99
|
+
else:
|
|
100
|
+
status = "WARN" # Changed from FAIL - documentation links are quality issues
|
|
101
|
+
note = f"{broken_links}/{total_links} broken links"
|
|
102
|
+
|
|
103
|
+
return StepResult(self.name, status, int(elapsed), note)
|
|
104
|
+
|
|
105
|
+
def _find_markdown_files(self, root: Path) -> List[Path]:
|
|
106
|
+
"""Find all markdown files, excluding dependencies/caches/build dirs."""
|
|
107
|
+
md_files = []
|
|
108
|
+
|
|
109
|
+
for path in root.rglob("*.md"):
|
|
110
|
+
# Use comprehensive exclusion filter for PROJECT files only
|
|
111
|
+
if should_exclude_from_analysis(path):
|
|
112
|
+
continue
|
|
113
|
+
md_files.append(path)
|
|
114
|
+
|
|
115
|
+
return md_files
|
|
116
|
+
|
|
117
|
+
def _extract_links(
|
|
118
|
+
self, md_files: List[Path], root: Path
|
|
119
|
+
) -> Dict[Tuple[str, str], None]:
|
|
120
|
+
"""Extract HTTP/HTTPS links from markdown files.
|
|
121
|
+
|
|
122
|
+
Returns dict with (filepath, url) as keys for deduplication.
|
|
123
|
+
"""
|
|
124
|
+
links: Dict[Tuple[str, str], None] = {}
|
|
125
|
+
# Regex for markdown links and bare URLs
|
|
126
|
+
link_pattern = re.compile(
|
|
127
|
+
r"\[([^\]]+)\]\(([^)]+)\)|(?:^|[^(])(https?://[^\s\)<>]+)", re.MULTILINE
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
for filepath in md_files:
|
|
131
|
+
try:
|
|
132
|
+
with open(filepath, "r", encoding="utf-8") as f:
|
|
133
|
+
content = f.read()
|
|
134
|
+
|
|
135
|
+
rel_path = str(filepath.relative_to(root))
|
|
136
|
+
|
|
137
|
+
for match in link_pattern.finditer(content):
|
|
138
|
+
# Group 2 is markdown link URL, group 3 is bare URL
|
|
139
|
+
url = match.group(2) or match.group(3)
|
|
140
|
+
if url and (
|
|
141
|
+
url.startswith("http://") or url.startswith("https://")
|
|
142
|
+
):
|
|
143
|
+
# Clean up URL (remove trailing punctuation)
|
|
144
|
+
url = url.rstrip(".,;:!?")
|
|
145
|
+
links[(rel_path, url)] = None
|
|
146
|
+
|
|
147
|
+
except Exception:
|
|
148
|
+
# Skip files that can't be read
|
|
149
|
+
continue
|
|
150
|
+
|
|
151
|
+
return links
|
|
152
|
+
|
|
153
|
+
def _check_links(
|
|
154
|
+
self, links: Dict[Tuple[str, str], None]
|
|
155
|
+
) -> Dict[Tuple[str, str], Tuple[str, str]]:
|
|
156
|
+
"""Check if links are valid.
|
|
157
|
+
|
|
158
|
+
Returns dict with (filepath, url) -> (status, message).
|
|
159
|
+
"""
|
|
160
|
+
import subprocess
|
|
161
|
+
|
|
162
|
+
results = {}
|
|
163
|
+
|
|
164
|
+
# Check if we have curl available
|
|
165
|
+
try:
|
|
166
|
+
subprocess.run(
|
|
167
|
+
["curl", "--version"],
|
|
168
|
+
capture_output=True,
|
|
169
|
+
check=True,
|
|
170
|
+
timeout=5,
|
|
171
|
+
)
|
|
172
|
+
has_curl = True
|
|
173
|
+
except (subprocess.SubprocessError, FileNotFoundError):
|
|
174
|
+
has_curl = False
|
|
175
|
+
|
|
176
|
+
# If no curl, try Python requests
|
|
177
|
+
has_requests = False
|
|
178
|
+
if not has_curl:
|
|
179
|
+
try:
|
|
180
|
+
import requests # type: ignore[import-untyped]
|
|
181
|
+
|
|
182
|
+
has_requests = True
|
|
183
|
+
except ImportError:
|
|
184
|
+
pass
|
|
185
|
+
|
|
186
|
+
if not has_curl and not has_requests:
|
|
187
|
+
# Can't validate links without tools
|
|
188
|
+
for key in links:
|
|
189
|
+
results[key] = ("SKIP", "curl or requests not available")
|
|
190
|
+
return results
|
|
191
|
+
|
|
192
|
+
# Limit number of links to check (prevent hanging on large docs)
|
|
193
|
+
max_links = 50
|
|
194
|
+
links_to_check = list(links.keys())[:max_links]
|
|
195
|
+
|
|
196
|
+
if len(links) > max_links:
|
|
197
|
+
# Mark excess links as skipped
|
|
198
|
+
for key in list(links.keys())[max_links:]:
|
|
199
|
+
results[key] = ("SKIP", f"Exceeded {max_links} link limit")
|
|
200
|
+
|
|
201
|
+
# Check each link with overall timeout protection
|
|
202
|
+
check_start = time.time()
|
|
203
|
+
max_check_time = 120 # 2 minutes max for all link checking
|
|
204
|
+
|
|
205
|
+
for filepath, url in links_to_check:
|
|
206
|
+
# Check if we've exceeded overall time budget
|
|
207
|
+
if time.time() - check_start > max_check_time:
|
|
208
|
+
results[(filepath, url)] = ("SKIP", "Overall timeout exceeded")
|
|
209
|
+
# Mark remaining as skipped
|
|
210
|
+
remaining_idx = links_to_check.index((filepath, url)) + 1
|
|
211
|
+
for key in links_to_check[remaining_idx:]:
|
|
212
|
+
results[key] = ("SKIP", "Overall timeout exceeded")
|
|
213
|
+
break
|
|
214
|
+
|
|
215
|
+
status, message = self._check_single_link(url, has_curl, has_requests)
|
|
216
|
+
results[(filepath, url)] = (status, message)
|
|
217
|
+
|
|
218
|
+
return results
|
|
219
|
+
|
|
220
|
+
def _check_single_link(
|
|
221
|
+
self, url: str, has_curl: bool, has_requests: bool
|
|
222
|
+
) -> Tuple[str, str]:
|
|
223
|
+
"""Check a single link.
|
|
224
|
+
|
|
225
|
+
Returns (status, message) tuple.
|
|
226
|
+
"""
|
|
227
|
+
import subprocess
|
|
228
|
+
|
|
229
|
+
if has_curl:
|
|
230
|
+
try:
|
|
231
|
+
# Use curl with HEAD request, follow redirects, shorter timeout
|
|
232
|
+
result = subprocess.run(
|
|
233
|
+
[
|
|
234
|
+
"curl",
|
|
235
|
+
"-I", # HEAD request
|
|
236
|
+
"-L", # Follow redirects
|
|
237
|
+
"-s", # Silent
|
|
238
|
+
"-o",
|
|
239
|
+
"/dev/null", # Discard output
|
|
240
|
+
"-w",
|
|
241
|
+
"%{http_code}", # Write HTTP code
|
|
242
|
+
"--connect-timeout",
|
|
243
|
+
"3", # 3 second connection timeout
|
|
244
|
+
"--max-time",
|
|
245
|
+
"5", # 5 second total timeout
|
|
246
|
+
url,
|
|
247
|
+
],
|
|
248
|
+
capture_output=True,
|
|
249
|
+
text=True,
|
|
250
|
+
timeout=6, # Python timeout slightly higher than curl's
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
http_code = result.stdout.strip()
|
|
254
|
+
if http_code.startswith("2") or http_code.startswith("3"):
|
|
255
|
+
return ("OK", "")
|
|
256
|
+
elif http_code == "000":
|
|
257
|
+
return ("FAIL", "Connection failed")
|
|
258
|
+
else:
|
|
259
|
+
return ("FAIL", f"HTTP {http_code}")
|
|
260
|
+
|
|
261
|
+
except subprocess.TimeoutExpired:
|
|
262
|
+
return ("FAIL", "Timeout")
|
|
263
|
+
except Exception as e:
|
|
264
|
+
return ("FAIL", f"Error: {str(e)[:50]}")
|
|
265
|
+
|
|
266
|
+
elif has_requests:
|
|
267
|
+
try:
|
|
268
|
+
import requests
|
|
269
|
+
|
|
270
|
+
response = requests.head(
|
|
271
|
+
url,
|
|
272
|
+
allow_redirects=True,
|
|
273
|
+
timeout=5, # Reduced from 10 to 5 seconds
|
|
274
|
+
headers={"User-Agent": "pybundle-link-checker"},
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
if response.status_code < 400:
|
|
278
|
+
return ("OK", "")
|
|
279
|
+
else:
|
|
280
|
+
return ("FAIL", f"HTTP {response.status_code}")
|
|
281
|
+
|
|
282
|
+
except requests.exceptions.Timeout:
|
|
283
|
+
return ("FAIL", "Timeout")
|
|
284
|
+
except requests.exceptions.RequestException as e:
|
|
285
|
+
return ("FAIL", f"Error: {str(e)[:50]}")
|
|
286
|
+
|
|
287
|
+
return ("SKIP", "No validation method available")
|