rolfedh-doc-utils 0.1.10__py3-none-any.whl → 0.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- doc_utils/unused_attributes.py +94 -6
- doc_utils/validate_links.py +576 -0
- find_unused_attributes.py +47 -6
- {rolfedh_doc_utils-0.1.10.dist-info → rolfedh_doc_utils-0.1.11.dist-info}/METADATA +3 -2
- {rolfedh_doc_utils-0.1.10.dist-info → rolfedh_doc_utils-0.1.11.dist-info}/RECORD +10 -8
- {rolfedh_doc_utils-0.1.10.dist-info → rolfedh_doc_utils-0.1.11.dist-info}/entry_points.txt +1 -0
- {rolfedh_doc_utils-0.1.10.dist-info → rolfedh_doc_utils-0.1.11.dist-info}/top_level.txt +1 -0
- validate_links.py +202 -0
- {rolfedh_doc_utils-0.1.10.dist-info → rolfedh_doc_utils-0.1.11.dist-info}/WHEEL +0 -0
- {rolfedh_doc_utils-0.1.10.dist-info → rolfedh_doc_utils-0.1.11.dist-info}/licenses/LICENSE +0 -0
doc_utils/unused_attributes.py
CHANGED
|
@@ -6,19 +6,36 @@ Functions:
|
|
|
6
6
|
- find_adoc_files: Recursively find all .adoc files in a directory (ignoring symlinks).
|
|
7
7
|
- scan_for_attribute_usage: Find which attributes are used in a set of .adoc files.
|
|
8
8
|
- find_unused_attributes: Main function to return unused attributes.
|
|
9
|
+
- find_attributes_files: Find all potential attributes files in the repository.
|
|
9
10
|
"""
|
|
10
11
|
|
|
11
12
|
import os
|
|
12
13
|
import re
|
|
13
|
-
from
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Set, List, Optional
|
|
14
16
|
|
|
15
17
|
def parse_attributes_file(attr_file: str) -> Set[str]:
|
|
16
18
|
attributes = set()
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
19
|
+
|
|
20
|
+
# Check if file exists
|
|
21
|
+
if not os.path.exists(attr_file):
|
|
22
|
+
raise FileNotFoundError(f"Attributes file not found: {attr_file}")
|
|
23
|
+
|
|
24
|
+
# Check if it's a file (not a directory)
|
|
25
|
+
if not os.path.isfile(attr_file):
|
|
26
|
+
raise ValueError(f"Path is not a file: {attr_file}")
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
with open(attr_file, 'r', encoding='utf-8') as f:
|
|
30
|
+
for line in f:
|
|
31
|
+
match = re.match(r'^:([\w-]+):', line.strip())
|
|
32
|
+
if match:
|
|
33
|
+
attributes.add(match.group(1))
|
|
34
|
+
except PermissionError:
|
|
35
|
+
raise PermissionError(f"Permission denied reading file: {attr_file}")
|
|
36
|
+
except UnicodeDecodeError as e:
|
|
37
|
+
raise ValueError(f"Unable to read file (encoding issue): {attr_file}\n{str(e)}")
|
|
38
|
+
|
|
22
39
|
return attributes
|
|
23
40
|
|
|
24
41
|
def find_adoc_files(root_dir: str) -> List[str]:
|
|
@@ -42,6 +59,77 @@ def scan_for_attribute_usage(adoc_files: List[str], attributes: Set[str]) -> Set
|
|
|
42
59
|
used.add(match)
|
|
43
60
|
return used
|
|
44
61
|
|
|
62
|
+
def find_attributes_files(root_dir: str = '.') -> List[str]:
|
|
63
|
+
"""Find all attributes.adoc files in the repository."""
|
|
64
|
+
attributes_files = []
|
|
65
|
+
root_path = Path(root_dir)
|
|
66
|
+
|
|
67
|
+
# Common attribute file patterns
|
|
68
|
+
patterns = ['**/attributes.adoc', '**/attributes*.adoc', '**/*attributes.adoc', '**/*-attributes.adoc']
|
|
69
|
+
|
|
70
|
+
for pattern in patterns:
|
|
71
|
+
for path in root_path.glob(pattern):
|
|
72
|
+
# Skip hidden directories and common build directories
|
|
73
|
+
parts = path.parts
|
|
74
|
+
if any(part.startswith('.') or part in ['target', 'build', 'node_modules', '.archive'] for part in parts):
|
|
75
|
+
continue
|
|
76
|
+
# Convert to string and avoid duplicates
|
|
77
|
+
str_path = str(path)
|
|
78
|
+
if str_path not in attributes_files:
|
|
79
|
+
attributes_files.append(str_path)
|
|
80
|
+
|
|
81
|
+
# Sort for consistent ordering
|
|
82
|
+
attributes_files.sort()
|
|
83
|
+
return attributes_files
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def select_attributes_file(attributes_files: List[str]) -> Optional[str]:
|
|
87
|
+
"""Interactive selection of attributes file from a list."""
|
|
88
|
+
if not attributes_files:
|
|
89
|
+
return None
|
|
90
|
+
|
|
91
|
+
if len(attributes_files) == 1:
|
|
92
|
+
print(f"Found attributes file: {attributes_files[0]}")
|
|
93
|
+
response = input("Use this file? (y/n): ").strip().lower()
|
|
94
|
+
if response == 'y':
|
|
95
|
+
return attributes_files[0]
|
|
96
|
+
else:
|
|
97
|
+
response = input("Enter the path to your attributes file: ").strip()
|
|
98
|
+
if os.path.exists(response) and os.path.isfile(response):
|
|
99
|
+
return response
|
|
100
|
+
else:
|
|
101
|
+
print(f"Error: File not found: {response}")
|
|
102
|
+
return None
|
|
103
|
+
|
|
104
|
+
# Multiple files found
|
|
105
|
+
print("\nFound multiple attributes files:")
|
|
106
|
+
for i, file_path in enumerate(attributes_files, 1):
|
|
107
|
+
print(f" {i}. {file_path}")
|
|
108
|
+
print(f" {len(attributes_files) + 1}. Enter custom path")
|
|
109
|
+
|
|
110
|
+
while True:
|
|
111
|
+
response = input(f"\nSelect option (1-{len(attributes_files) + 1}) or 'q' to quit: ").strip()
|
|
112
|
+
if response.lower() == 'q':
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
choice = int(response)
|
|
117
|
+
if 1 <= choice <= len(attributes_files):
|
|
118
|
+
return attributes_files[choice - 1]
|
|
119
|
+
elif choice == len(attributes_files) + 1:
|
|
120
|
+
response = input("Enter the path to your attributes file: ").strip()
|
|
121
|
+
if os.path.exists(response) and os.path.isfile(response):
|
|
122
|
+
return response
|
|
123
|
+
else:
|
|
124
|
+
print(f"Error: File not found: {response}")
|
|
125
|
+
else:
|
|
126
|
+
print(f"Invalid choice. Please enter a number between 1 and {len(attributes_files) + 1}")
|
|
127
|
+
except ValueError:
|
|
128
|
+
print("Invalid input. Please enter a number.")
|
|
129
|
+
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
|
|
45
133
|
def find_unused_attributes(attr_file: str, adoc_root: str = '.') -> List[str]:
|
|
46
134
|
attributes = parse_attributes_file(attr_file)
|
|
47
135
|
adoc_files = find_adoc_files(adoc_root)
|
|
@@ -0,0 +1,576 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Validate links in AsciiDoc documentation, checking for broken URLs and missing references.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import re
|
|
8
|
+
import time
|
|
9
|
+
import json
|
|
10
|
+
import hashlib
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Dict, List, Tuple, Optional, Set
|
|
13
|
+
from collections import defaultdict
|
|
14
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
15
|
+
from urllib.parse import urlparse, urljoin
|
|
16
|
+
import urllib.request
|
|
17
|
+
import urllib.error
|
|
18
|
+
import socket
|
|
19
|
+
from datetime import datetime, timedelta
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class LinkValidator:
|
|
23
|
+
"""Validates links in AsciiDoc documentation."""
|
|
24
|
+
|
|
25
|
+
def __init__(self,
|
|
26
|
+
timeout: int = 10,
|
|
27
|
+
retry: int = 3,
|
|
28
|
+
parallel: int = 10,
|
|
29
|
+
cache_duration: int = 3600,
|
|
30
|
+
transpositions: List[Tuple[str, str]] = None):
|
|
31
|
+
"""
|
|
32
|
+
Initialize the link validator.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
timeout: Timeout in seconds for each URL check
|
|
36
|
+
retry: Number of retries for failed URLs
|
|
37
|
+
parallel: Number of parallel URL checks
|
|
38
|
+
cache_duration: Cache duration in seconds
|
|
39
|
+
transpositions: List of (from_url, to_url) tuples for URL replacement
|
|
40
|
+
"""
|
|
41
|
+
self.timeout = timeout
|
|
42
|
+
self.retry = retry
|
|
43
|
+
self.parallel = parallel
|
|
44
|
+
self.cache_duration = cache_duration
|
|
45
|
+
self.transpositions = transpositions or []
|
|
46
|
+
self.cache = {}
|
|
47
|
+
self.cache_file = Path.home() / '.cache' / 'doc-utils' / 'link-validation.json'
|
|
48
|
+
self._load_cache()
|
|
49
|
+
|
|
50
|
+
def _load_cache(self):
|
|
51
|
+
"""Load cached validation results."""
|
|
52
|
+
if self.cache_file.exists():
|
|
53
|
+
try:
|
|
54
|
+
with open(self.cache_file, 'r') as f:
|
|
55
|
+
cached_data = json.load(f)
|
|
56
|
+
# Check cache expiry
|
|
57
|
+
now = datetime.now().timestamp()
|
|
58
|
+
self.cache = {
|
|
59
|
+
url: result for url, result in cached_data.items()
|
|
60
|
+
if now - result.get('timestamp', 0) < self.cache_duration
|
|
61
|
+
}
|
|
62
|
+
except (json.JSONDecodeError, IOError):
|
|
63
|
+
self.cache = {}
|
|
64
|
+
|
|
65
|
+
def _save_cache(self):
|
|
66
|
+
"""Save validation results to cache."""
|
|
67
|
+
self.cache_file.parent.mkdir(parents=True, exist_ok=True)
|
|
68
|
+
with open(self.cache_file, 'w') as f:
|
|
69
|
+
json.dump(self.cache, f, indent=2)
|
|
70
|
+
|
|
71
|
+
def transpose_url(self, url: str) -> str:
|
|
72
|
+
"""
|
|
73
|
+
Apply transposition rules to URL.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
url: Original URL
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Transposed URL if rules match, otherwise original URL
|
|
80
|
+
"""
|
|
81
|
+
for from_pattern, to_pattern in self.transpositions:
|
|
82
|
+
if url.startswith(from_pattern):
|
|
83
|
+
return url.replace(from_pattern, to_pattern, 1)
|
|
84
|
+
return url
|
|
85
|
+
|
|
86
|
+
def extract_links(self, file_path: str, attributes: Dict[str, str] = None) -> List[Dict]:
|
|
87
|
+
"""
|
|
88
|
+
Extract all links from an AsciiDoc file.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
file_path: Path to the AsciiDoc file
|
|
92
|
+
attributes: Dictionary of attribute definitions
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
List of link dictionaries with url, text, type, line_number
|
|
96
|
+
"""
|
|
97
|
+
links = []
|
|
98
|
+
attributes = attributes or {}
|
|
99
|
+
|
|
100
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
101
|
+
for line_num, line in enumerate(f, 1):
|
|
102
|
+
# Find link: macros
|
|
103
|
+
link_matches = re.finditer(r'link:([^[\]]+)\[([^\]]*)\]', line)
|
|
104
|
+
for match in link_matches:
|
|
105
|
+
url = match.group(1)
|
|
106
|
+
text = match.group(2)
|
|
107
|
+
# Resolve attributes in URL
|
|
108
|
+
resolved_url = self._resolve_attributes(url, attributes)
|
|
109
|
+
links.append({
|
|
110
|
+
'url': resolved_url,
|
|
111
|
+
'original_url': url,
|
|
112
|
+
'text': text,
|
|
113
|
+
'type': 'external',
|
|
114
|
+
'file': file_path,
|
|
115
|
+
'line': line_num
|
|
116
|
+
})
|
|
117
|
+
|
|
118
|
+
# Find xref: macros
|
|
119
|
+
xref_matches = re.finditer(r'xref:([^[\]]+)\[([^\]]*)\]', line)
|
|
120
|
+
for match in xref_matches:
|
|
121
|
+
target = match.group(1)
|
|
122
|
+
text = match.group(2)
|
|
123
|
+
# Resolve attributes in target
|
|
124
|
+
resolved_target = self._resolve_attributes(target, attributes)
|
|
125
|
+
links.append({
|
|
126
|
+
'url': resolved_target,
|
|
127
|
+
'original_url': target,
|
|
128
|
+
'text': text,
|
|
129
|
+
'type': 'internal',
|
|
130
|
+
'file': file_path,
|
|
131
|
+
'line': line_num
|
|
132
|
+
})
|
|
133
|
+
|
|
134
|
+
# Find image:: directives
|
|
135
|
+
image_matches = re.finditer(r'image::([^[\]]+)\[', line)
|
|
136
|
+
for match in image_matches:
|
|
137
|
+
path = match.group(1)
|
|
138
|
+
resolved_path = self._resolve_attributes(path, attributes)
|
|
139
|
+
links.append({
|
|
140
|
+
'url': resolved_path,
|
|
141
|
+
'original_url': path,
|
|
142
|
+
'text': 'image',
|
|
143
|
+
'type': 'image',
|
|
144
|
+
'file': file_path,
|
|
145
|
+
'line': line_num
|
|
146
|
+
})
|
|
147
|
+
|
|
148
|
+
return links
|
|
149
|
+
|
|
150
|
+
def _resolve_attributes(self, text: str, attributes: Dict[str, str]) -> str:
|
|
151
|
+
"""Resolve attributes in text."""
|
|
152
|
+
resolved = text
|
|
153
|
+
max_iterations = 10
|
|
154
|
+
|
|
155
|
+
for _ in range(max_iterations):
|
|
156
|
+
# Find all attribute references
|
|
157
|
+
refs = re.findall(r'\{([^}]+)\}', resolved)
|
|
158
|
+
if not refs:
|
|
159
|
+
break
|
|
160
|
+
|
|
161
|
+
changes_made = False
|
|
162
|
+
for ref in refs:
|
|
163
|
+
if ref in attributes:
|
|
164
|
+
resolved = resolved.replace(f'{{{ref}}}', attributes[ref])
|
|
165
|
+
changes_made = True
|
|
166
|
+
|
|
167
|
+
if not changes_made:
|
|
168
|
+
break
|
|
169
|
+
|
|
170
|
+
return resolved
|
|
171
|
+
|
|
172
|
+
def validate_url(self, url: str, original_url: str = None, use_cache: bool = True) -> Dict:
|
|
173
|
+
"""
|
|
174
|
+
Validate a single URL.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
url: URL to validate
|
|
178
|
+
original_url: Original URL before transposition
|
|
179
|
+
use_cache: Whether to use cached results
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
Dictionary with validation results
|
|
183
|
+
"""
|
|
184
|
+
# Check cache first
|
|
185
|
+
cache_key = f"{url}:{original_url}" if original_url else url
|
|
186
|
+
if use_cache and cache_key in self.cache:
|
|
187
|
+
cached = self.cache[cache_key]
|
|
188
|
+
if datetime.now().timestamp() - cached['timestamp'] < self.cache_duration:
|
|
189
|
+
return cached
|
|
190
|
+
|
|
191
|
+
result = {
|
|
192
|
+
'url': url,
|
|
193
|
+
'original_url': original_url or url,
|
|
194
|
+
'status': None,
|
|
195
|
+
'error': None,
|
|
196
|
+
'redirect': None,
|
|
197
|
+
'timestamp': datetime.now().timestamp()
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
# Apply transposition if needed
|
|
201
|
+
check_url = self.transpose_url(url)
|
|
202
|
+
if check_url != url:
|
|
203
|
+
result['transposed_url'] = check_url
|
|
204
|
+
|
|
205
|
+
# Validate the URL
|
|
206
|
+
for attempt in range(self.retry):
|
|
207
|
+
try:
|
|
208
|
+
req = urllib.request.Request(
|
|
209
|
+
check_url,
|
|
210
|
+
headers={
|
|
211
|
+
'User-Agent': 'Mozilla/5.0 (doc-utils link validator)',
|
|
212
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
|
|
213
|
+
}
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
with urllib.request.urlopen(req, timeout=self.timeout) as response:
|
|
217
|
+
result['status'] = response.status
|
|
218
|
+
# Check for redirect
|
|
219
|
+
if response.url != check_url:
|
|
220
|
+
result['redirect'] = response.url
|
|
221
|
+
break
|
|
222
|
+
|
|
223
|
+
except urllib.error.HTTPError as e:
|
|
224
|
+
result['status'] = e.code
|
|
225
|
+
result['error'] = str(e)
|
|
226
|
+
if e.code not in [500, 502, 503, 504]: # Don't retry client errors
|
|
227
|
+
break
|
|
228
|
+
|
|
229
|
+
except urllib.error.URLError as e:
|
|
230
|
+
result['error'] = str(e.reason)
|
|
231
|
+
|
|
232
|
+
except socket.timeout:
|
|
233
|
+
result['error'] = 'Timeout'
|
|
234
|
+
|
|
235
|
+
except Exception as e:
|
|
236
|
+
result['error'] = str(e)
|
|
237
|
+
|
|
238
|
+
# Wait before retry
|
|
239
|
+
if attempt < self.retry - 1:
|
|
240
|
+
time.sleep(2 ** attempt) # Exponential backoff
|
|
241
|
+
|
|
242
|
+
# Cache the result
|
|
243
|
+
self.cache[cache_key] = result
|
|
244
|
+
|
|
245
|
+
return result
|
|
246
|
+
|
|
247
|
+
def validate_internal_reference(self, ref: str, base_dir: str) -> Dict:
|
|
248
|
+
"""
|
|
249
|
+
Validate an internal reference (xref).
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
ref: Reference path
|
|
253
|
+
base_dir: Base directory for relative paths
|
|
254
|
+
|
|
255
|
+
Returns:
|
|
256
|
+
Dictionary with validation results
|
|
257
|
+
"""
|
|
258
|
+
result = {
|
|
259
|
+
'url': ref,
|
|
260
|
+
'type': 'internal',
|
|
261
|
+
'status': None,
|
|
262
|
+
'error': None
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
# Handle anchor references
|
|
266
|
+
if ref.startswith('#'):
|
|
267
|
+
# TODO: Check if anchor exists in current file
|
|
268
|
+
result['status'] = 'anchor'
|
|
269
|
+
return result
|
|
270
|
+
|
|
271
|
+
# Parse file and anchor
|
|
272
|
+
parts = ref.split('#', 1)
|
|
273
|
+
file_ref = parts[0]
|
|
274
|
+
anchor = parts[1] if len(parts) > 1 else None
|
|
275
|
+
|
|
276
|
+
# Resolve file path
|
|
277
|
+
if os.path.isabs(file_ref):
|
|
278
|
+
file_path = file_ref
|
|
279
|
+
else:
|
|
280
|
+
file_path = os.path.normpath(os.path.join(base_dir, file_ref))
|
|
281
|
+
|
|
282
|
+
# Check if file exists
|
|
283
|
+
if os.path.exists(file_path):
|
|
284
|
+
result['status'] = 'ok'
|
|
285
|
+
# TODO: If anchor provided, check if it exists in the file
|
|
286
|
+
else:
|
|
287
|
+
result['status'] = 'missing'
|
|
288
|
+
result['error'] = f"File not found: {file_path}"
|
|
289
|
+
|
|
290
|
+
return result
|
|
291
|
+
|
|
292
|
+
def validate_image(self, path: str, base_dir: str) -> Dict:
|
|
293
|
+
"""
|
|
294
|
+
Validate an image path.
|
|
295
|
+
|
|
296
|
+
Args:
|
|
297
|
+
path: Image path
|
|
298
|
+
base_dir: Base directory for relative paths
|
|
299
|
+
|
|
300
|
+
Returns:
|
|
301
|
+
Dictionary with validation results
|
|
302
|
+
"""
|
|
303
|
+
result = {
|
|
304
|
+
'url': path,
|
|
305
|
+
'type': 'image',
|
|
306
|
+
'status': None,
|
|
307
|
+
'error': None
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
# Check if it's a URL
|
|
311
|
+
if path.startswith(('http://', 'https://')):
|
|
312
|
+
return self.validate_url(path)
|
|
313
|
+
|
|
314
|
+
# Resolve file path
|
|
315
|
+
if os.path.isabs(path):
|
|
316
|
+
file_path = path
|
|
317
|
+
else:
|
|
318
|
+
file_path = os.path.normpath(os.path.join(base_dir, path))
|
|
319
|
+
|
|
320
|
+
# Check if file exists
|
|
321
|
+
if os.path.exists(file_path):
|
|
322
|
+
result['status'] = 'ok'
|
|
323
|
+
else:
|
|
324
|
+
result['status'] = 'missing'
|
|
325
|
+
result['error'] = f"Image not found: {file_path}"
|
|
326
|
+
|
|
327
|
+
return result
|
|
328
|
+
|
|
329
|
+
def validate_links_in_file(self, file_path: str, attributes: Dict[str, str] = None) -> List[Dict]:
|
|
330
|
+
"""
|
|
331
|
+
Validate all links in a single file.
|
|
332
|
+
|
|
333
|
+
Args:
|
|
334
|
+
file_path: Path to the AsciiDoc file
|
|
335
|
+
attributes: Dictionary of attribute definitions
|
|
336
|
+
|
|
337
|
+
Returns:
|
|
338
|
+
List of validation results
|
|
339
|
+
"""
|
|
340
|
+
links = self.extract_links(file_path, attributes)
|
|
341
|
+
results = []
|
|
342
|
+
base_dir = os.path.dirname(file_path)
|
|
343
|
+
|
|
344
|
+
# Group links by type for efficient processing
|
|
345
|
+
external_links = [l for l in links if l['type'] == 'external']
|
|
346
|
+
internal_links = [l for l in links if l['type'] == 'internal']
|
|
347
|
+
image_links = [l for l in links if l['type'] == 'image']
|
|
348
|
+
|
|
349
|
+
# Validate external links in parallel
|
|
350
|
+
if external_links:
|
|
351
|
+
with ThreadPoolExecutor(max_workers=self.parallel) as executor:
|
|
352
|
+
futures = {
|
|
353
|
+
executor.submit(self.validate_url, link['url'], link['original_url']): link
|
|
354
|
+
for link in external_links
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
for future in as_completed(futures):
|
|
358
|
+
link = futures[future]
|
|
359
|
+
try:
|
|
360
|
+
result = future.result()
|
|
361
|
+
result.update(link)
|
|
362
|
+
results.append(result)
|
|
363
|
+
except Exception as e:
|
|
364
|
+
result = link.copy()
|
|
365
|
+
result['error'] = str(e)
|
|
366
|
+
results.append(result)
|
|
367
|
+
|
|
368
|
+
# Validate internal references
|
|
369
|
+
for link in internal_links:
|
|
370
|
+
result = self.validate_internal_reference(link['url'], base_dir)
|
|
371
|
+
result.update(link)
|
|
372
|
+
results.append(result)
|
|
373
|
+
|
|
374
|
+
# Validate image paths
|
|
375
|
+
for link in image_links:
|
|
376
|
+
result = self.validate_image(link['url'], base_dir)
|
|
377
|
+
result.update(link)
|
|
378
|
+
results.append(result)
|
|
379
|
+
|
|
380
|
+
return results
|
|
381
|
+
|
|
382
|
+
def validate_all(self, scan_dirs: List[str] = None,
|
|
383
|
+
attributes_file: str = None,
|
|
384
|
+
exclude_domains: List[str] = None) -> Dict:
|
|
385
|
+
"""
|
|
386
|
+
Validate all links in documentation.
|
|
387
|
+
|
|
388
|
+
Args:
|
|
389
|
+
scan_dirs: Directories to scan
|
|
390
|
+
attributes_file: Path to attributes file
|
|
391
|
+
exclude_domains: Domains to skip
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
Dictionary with all validation results
|
|
395
|
+
"""
|
|
396
|
+
if scan_dirs is None:
|
|
397
|
+
scan_dirs = ['.']
|
|
398
|
+
|
|
399
|
+
exclude_domains = exclude_domains or []
|
|
400
|
+
|
|
401
|
+
# Load attributes
|
|
402
|
+
attributes = {}
|
|
403
|
+
if attributes_file and os.path.exists(attributes_file):
|
|
404
|
+
attributes = self._load_attributes(attributes_file)
|
|
405
|
+
|
|
406
|
+
# Collect all .adoc files
|
|
407
|
+
adoc_files = []
|
|
408
|
+
for scan_dir in scan_dirs:
|
|
409
|
+
for root, _, files in os.walk(scan_dir):
|
|
410
|
+
# Skip hidden directories
|
|
411
|
+
if '/.' in root:
|
|
412
|
+
continue
|
|
413
|
+
for file in files:
|
|
414
|
+
if file.endswith('.adoc'):
|
|
415
|
+
adoc_files.append(os.path.join(root, file))
|
|
416
|
+
|
|
417
|
+
# Validate links in all files
|
|
418
|
+
all_results = {
|
|
419
|
+
'files': {},
|
|
420
|
+
'summary': {
|
|
421
|
+
'total': 0,
|
|
422
|
+
'valid': 0,
|
|
423
|
+
'broken': 0,
|
|
424
|
+
'warnings': 0,
|
|
425
|
+
'skipped': 0
|
|
426
|
+
},
|
|
427
|
+
'broken_links': [],
|
|
428
|
+
'warnings': [],
|
|
429
|
+
'transpositions': [
|
|
430
|
+
{'from': t[0], 'to': t[1]} for t in self.transpositions
|
|
431
|
+
]
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
for file_path in adoc_files:
|
|
435
|
+
results = self.validate_links_in_file(file_path, attributes)
|
|
436
|
+
|
|
437
|
+
# Filter out excluded domains
|
|
438
|
+
filtered_results = []
|
|
439
|
+
for result in results:
|
|
440
|
+
url = result.get('url', '')
|
|
441
|
+
parsed = urlparse(url)
|
|
442
|
+
if parsed.netloc in exclude_domains:
|
|
443
|
+
result['status'] = 'skipped'
|
|
444
|
+
result['reason'] = 'Domain excluded'
|
|
445
|
+
filtered_results.append(result)
|
|
446
|
+
|
|
447
|
+
all_results['files'][file_path] = filtered_results
|
|
448
|
+
|
|
449
|
+
# Update summary
|
|
450
|
+
for result in filtered_results:
|
|
451
|
+
all_results['summary']['total'] += 1
|
|
452
|
+
|
|
453
|
+
if result.get('status') == 'skipped':
|
|
454
|
+
all_results['summary']['skipped'] += 1
|
|
455
|
+
elif result.get('status') in ['ok', 200, 'anchor']:
|
|
456
|
+
all_results['summary']['valid'] += 1
|
|
457
|
+
elif result.get('status') in [301, 302, 303, 307, 308]:
|
|
458
|
+
all_results['summary']['warnings'] += 1
|
|
459
|
+
all_results['warnings'].append(result)
|
|
460
|
+
elif result.get('error') or result.get('status') in ['missing', 404]:
|
|
461
|
+
all_results['summary']['broken'] += 1
|
|
462
|
+
all_results['broken_links'].append(result)
|
|
463
|
+
else:
|
|
464
|
+
# Treat other status codes as broken
|
|
465
|
+
all_results['summary']['broken'] += 1
|
|
466
|
+
all_results['broken_links'].append(result)
|
|
467
|
+
|
|
468
|
+
# Save cache
|
|
469
|
+
self._save_cache()
|
|
470
|
+
|
|
471
|
+
return all_results
|
|
472
|
+
|
|
473
|
+
def _load_attributes(self, attributes_file: str) -> Dict[str, str]:
|
|
474
|
+
"""Load attributes from file."""
|
|
475
|
+
attributes = {}
|
|
476
|
+
|
|
477
|
+
with open(attributes_file, 'r', encoding='utf-8') as f:
|
|
478
|
+
for line in f:
|
|
479
|
+
# Match attribute definitions
|
|
480
|
+
match = re.match(r'^:([^:]+):\s*(.*)$', line)
|
|
481
|
+
if match:
|
|
482
|
+
attr_name = match.group(1).strip()
|
|
483
|
+
attr_value = match.group(2).strip()
|
|
484
|
+
attributes[attr_name] = attr_value
|
|
485
|
+
|
|
486
|
+
return attributes
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
def parse_transpositions(transpose_args: List[str]) -> List[Tuple[str, str]]:
|
|
490
|
+
"""
|
|
491
|
+
Parse transposition arguments.
|
|
492
|
+
|
|
493
|
+
Args:
|
|
494
|
+
transpose_args: List of transposition strings in format "from--to"
|
|
495
|
+
|
|
496
|
+
Returns:
|
|
497
|
+
List of (from_url, to_url) tuples
|
|
498
|
+
"""
|
|
499
|
+
transpositions = []
|
|
500
|
+
|
|
501
|
+
for arg in transpose_args or []:
|
|
502
|
+
parts = arg.split('--')
|
|
503
|
+
if len(parts) == 2:
|
|
504
|
+
from_url = parts[0].strip()
|
|
505
|
+
to_url = parts[1].strip()
|
|
506
|
+
transpositions.append((from_url, to_url))
|
|
507
|
+
else:
|
|
508
|
+
print(f"Warning: Invalid transposition format: {arg}")
|
|
509
|
+
print("Expected format: from_url--to_url")
|
|
510
|
+
|
|
511
|
+
return transpositions
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
def format_results(results: Dict, verbose: bool = False) -> str:
|
|
515
|
+
"""
|
|
516
|
+
Format validation results for display.
|
|
517
|
+
|
|
518
|
+
Args:
|
|
519
|
+
results: Validation results dictionary
|
|
520
|
+
verbose: Whether to show verbose output
|
|
521
|
+
|
|
522
|
+
Returns:
|
|
523
|
+
Formatted string for display
|
|
524
|
+
"""
|
|
525
|
+
output = []
|
|
526
|
+
|
|
527
|
+
# Show transpositions if any
|
|
528
|
+
if results.get('transpositions'):
|
|
529
|
+
output.append("URL Transposition Rules:")
|
|
530
|
+
for trans in results['transpositions']:
|
|
531
|
+
output.append(f" {trans['from']} → {trans['to']}")
|
|
532
|
+
output.append("")
|
|
533
|
+
|
|
534
|
+
# Summary
|
|
535
|
+
summary = results['summary']
|
|
536
|
+
output.append("SUMMARY:")
|
|
537
|
+
output.append(f"✓ Valid: {summary['valid']} links")
|
|
538
|
+
if summary['broken'] > 0:
|
|
539
|
+
output.append(f"✗ Broken: {summary['broken']} links")
|
|
540
|
+
if summary['warnings'] > 0:
|
|
541
|
+
output.append(f"⚠ Warnings: {summary['warnings']} redirects")
|
|
542
|
+
if summary['skipped'] > 0:
|
|
543
|
+
output.append(f"⊘ Skipped: {summary['skipped']} links (excluded domains)")
|
|
544
|
+
output.append("")
|
|
545
|
+
|
|
546
|
+
# Broken links
|
|
547
|
+
if results['broken_links']:
|
|
548
|
+
output.append("BROKEN LINKS:")
|
|
549
|
+
for i, link in enumerate(results['broken_links'], 1):
|
|
550
|
+
output.append(f"\n{i}. {link['file']}:{link['line']}")
|
|
551
|
+
if link.get('original_url') and link.get('original_url') != link.get('url'):
|
|
552
|
+
output.append(f" Original: {link['original_url']}")
|
|
553
|
+
output.append(f" Resolved: {link['url']}")
|
|
554
|
+
else:
|
|
555
|
+
output.append(f" URL: {link['url']}")
|
|
556
|
+
|
|
557
|
+
if link.get('transposed_url'):
|
|
558
|
+
output.append(f" Checked: {link['transposed_url']}")
|
|
559
|
+
|
|
560
|
+
if link.get('status'):
|
|
561
|
+
output.append(f" Status: {link['status']}")
|
|
562
|
+
if link.get('error'):
|
|
563
|
+
output.append(f" Error: {link['error']}")
|
|
564
|
+
output.append("")
|
|
565
|
+
|
|
566
|
+
# Warnings (redirects)
|
|
567
|
+
if results['warnings'] and verbose:
|
|
568
|
+
output.append("WARNINGS (Redirects):")
|
|
569
|
+
for i, link in enumerate(results['warnings'], 1):
|
|
570
|
+
output.append(f"\n{i}. {link['file']}:{link['line']}")
|
|
571
|
+
output.append(f" URL: {link['url']}")
|
|
572
|
+
if link.get('redirect'):
|
|
573
|
+
output.append(f" Redirects to: {link['redirect']}")
|
|
574
|
+
output.append("")
|
|
575
|
+
|
|
576
|
+
return '\n'.join(output)
|
find_unused_attributes.py
CHANGED
|
@@ -1,23 +1,61 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Find Unused AsciiDoc Attributes
|
|
3
3
|
|
|
4
|
-
Scans
|
|
4
|
+
Scans an attributes file for attribute definitions (e.g., :version: 1.1), then recursively scans all .adoc files in the current directory (ignoring symlinks) for usages of those attributes (e.g., {version}).
|
|
5
|
+
|
|
6
|
+
If no attributes file is specified, the tool will auto-discover attributes files in the repository and let you choose one interactively.
|
|
5
7
|
|
|
6
8
|
Any attribute defined but not used in any .adoc file is reported as NOT USED in both the command line output and a timestamped output file.
|
|
7
9
|
"""
|
|
8
10
|
|
|
9
11
|
import argparse
|
|
10
12
|
import os
|
|
13
|
+
import sys
|
|
11
14
|
from datetime import datetime
|
|
12
|
-
from doc_utils.unused_attributes import find_unused_attributes
|
|
15
|
+
from doc_utils.unused_attributes import find_unused_attributes, find_attributes_files, select_attributes_file
|
|
13
16
|
|
|
14
17
|
def main():
|
|
15
18
|
parser = argparse.ArgumentParser(description='Find unused AsciiDoc attributes.')
|
|
16
|
-
parser.add_argument(
|
|
19
|
+
parser.add_argument(
|
|
20
|
+
'attributes_file',
|
|
21
|
+
nargs='?', # Make it optional
|
|
22
|
+
help='Path to the attributes file. If not specified, auto-discovers attributes files.'
|
|
23
|
+
)
|
|
17
24
|
parser.add_argument('-o', '--output', action='store_true', help='Write results to a timestamped txt file in your home directory.')
|
|
18
25
|
args = parser.parse_args()
|
|
19
26
|
|
|
20
|
-
|
|
27
|
+
# Determine which attributes file to use
|
|
28
|
+
if args.attributes_file:
|
|
29
|
+
# User specified a file
|
|
30
|
+
attr_file = args.attributes_file
|
|
31
|
+
else:
|
|
32
|
+
# Auto-discover attributes files
|
|
33
|
+
print("Searching for attributes files...")
|
|
34
|
+
attributes_files = find_attributes_files('.')
|
|
35
|
+
|
|
36
|
+
if not attributes_files:
|
|
37
|
+
print("No attributes files found in the repository.")
|
|
38
|
+
print("You can specify a file directly: find-unused-attributes <path-to-attributes-file>")
|
|
39
|
+
return 1
|
|
40
|
+
|
|
41
|
+
attr_file = select_attributes_file(attributes_files)
|
|
42
|
+
if not attr_file:
|
|
43
|
+
print("No attributes file selected.")
|
|
44
|
+
return 1
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
unused = find_unused_attributes(attr_file, '.')
|
|
48
|
+
except FileNotFoundError as e:
|
|
49
|
+
print(f"Error: {e}")
|
|
50
|
+
print(f"\nPlease ensure the file '{attr_file}' exists.")
|
|
51
|
+
print("Usage: find-unused-attributes [<path-to-attributes-file>]")
|
|
52
|
+
return 1
|
|
53
|
+
except (ValueError, PermissionError) as e:
|
|
54
|
+
print(f"Error: {e}")
|
|
55
|
+
return 1
|
|
56
|
+
except Exception as e:
|
|
57
|
+
print(f"Unexpected error: {e}")
|
|
58
|
+
return 1
|
|
21
59
|
|
|
22
60
|
lines = [f":{attr}: NOT USED" for attr in unused]
|
|
23
61
|
output = '\n'.join(lines)
|
|
@@ -33,9 +71,12 @@ def main():
|
|
|
33
71
|
home_dir = os.path.expanduser('~')
|
|
34
72
|
filename = os.path.join(home_dir, f'unused_attributes_{timestamp}.txt')
|
|
35
73
|
with open(filename, 'w', encoding='utf-8') as f:
|
|
36
|
-
f.write('Unused attributes in ' +
|
|
74
|
+
f.write('Unused attributes in ' + attr_file + '\n')
|
|
37
75
|
f.write(output + '\n')
|
|
38
76
|
print(f'Results written to: {filename}')
|
|
39
77
|
|
|
78
|
+
return 0
|
|
79
|
+
|
|
40
80
|
if __name__ == '__main__':
|
|
41
|
-
|
|
81
|
+
import sys
|
|
82
|
+
sys.exit(main())
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rolfedh-doc-utils
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.11
|
|
4
4
|
Summary: CLI tools for AsciiDoc documentation projects
|
|
5
5
|
Author: Rolfe Dlugy-Hegwer
|
|
6
6
|
License: MIT License
|
|
@@ -79,9 +79,10 @@ pip install -e .
|
|
|
79
79
|
|
|
80
80
|
| Tool | Description | Usage |
|
|
81
81
|
|------|-------------|-------|
|
|
82
|
+
| **`validate-links`** [EXPERIMENTAL] | Validates all links in documentation, with URL transposition for preview environments | `validate-links --transpose "https://prod--https://preview"` |
|
|
82
83
|
| **`extract-link-attributes`** | Extracts link/xref macros with attributes into reusable definitions | `extract-link-attributes --dry-run` |
|
|
83
84
|
| **`replace-link-attributes`** | Resolves Vale LinkAttribute issues by replacing attributes in link URLs | `replace-link-attributes --dry-run` |
|
|
84
|
-
| **`format-asciidoc-spacing`** | Standardizes spacing after headings and around includes | `format-asciidoc-spacing --dry-run modules/` |
|
|
85
|
+
| **`format-asciidoc-spacing`** [EXPERIMENTAL] | Standardizes spacing after headings and around includes | `format-asciidoc-spacing --dry-run modules/` |
|
|
85
86
|
| **`check-scannability`** | Analyzes readability (sentence/paragraph length) | `check-scannability --max-words 25` |
|
|
86
87
|
| **`archive-unused-files`** | Finds and archives unreferenced .adoc files | `archive-unused-files` (preview)<br>`archive-unused-files --archive` (execute) |
|
|
87
88
|
| **`archive-unused-images`** | Finds and archives unreferenced images | `archive-unused-images` (preview)<br>`archive-unused-images --archive` (execute) |
|
|
@@ -2,9 +2,10 @@ archive_unused_files.py,sha256=KMC5a1WL3rZ5owoVnncvfpT1YeMKbVXq9giHvadDgbM,1936
|
|
|
2
2
|
archive_unused_images.py,sha256=PG2o3haovYckgfhoPhl6KRG_a9czyZuqlLkzkupKTCY,1526
|
|
3
3
|
check_scannability.py,sha256=gcM-vFXKHGP_yFBz7-V5xbXWhIMmtMzBYIGwP9CFbzI,5140
|
|
4
4
|
extract_link_attributes.py,sha256=utDM1FE-VEr649HhIH5BreXvxDNLnnAJO9dB5rs5f9Q,2535
|
|
5
|
-
find_unused_attributes.py,sha256=
|
|
5
|
+
find_unused_attributes.py,sha256=V8qI7O0u18ExbSho-hLfyBeRVqowLKGrFugY55JxZN0,3023
|
|
6
6
|
format_asciidoc_spacing.py,sha256=ROp-cdMs2_hk8H4z5ljT0iDgGtsiECZ8TVjjcN_oOWE,3874
|
|
7
7
|
replace_link_attributes.py,sha256=vg_aufw7dKXvh_epCKRNq_hEBMU_9crZ_JyJPpxSMNk,6454
|
|
8
|
+
validate_links.py,sha256=DoSB0h3mmjzTY2f0oN6ybTP6jCNkzN7T3qM6oXc2AwE,5585
|
|
8
9
|
doc_utils/__init__.py,sha256=qqZR3lohzkP63soymrEZPBGzzk6-nFzi4_tSffjmu_0,74
|
|
9
10
|
doc_utils/extract_link_attributes.py,sha256=qBpJuTXNrhy15klpqC0iELZzcSLztEzMSmhEnKyQZT0,15574
|
|
10
11
|
doc_utils/file_utils.py,sha256=fpTh3xx759sF8sNocdn_arsP3KAv8XA6cTQTAVIZiZg,4247
|
|
@@ -13,11 +14,12 @@ doc_utils/replace_link_attributes.py,sha256=kBiePbxjQn3O2rzqmYY8Mqy_mJgZ6yw048vS
|
|
|
13
14
|
doc_utils/scannability.py,sha256=XwlmHqDs69p_V36X7DLjPTy0DUoLszSGqYjJ9wE-3hg,982
|
|
14
15
|
doc_utils/topic_map_parser.py,sha256=tKcIO1m9r2K6dvPRGue58zqMr0O2zKU1gnZMzEE3U6o,4571
|
|
15
16
|
doc_utils/unused_adoc.py,sha256=2cbqcYr1os2EhETUU928BlPRlsZVSdI00qaMhqjSIqQ,5263
|
|
16
|
-
doc_utils/unused_attributes.py,sha256=
|
|
17
|
+
doc_utils/unused_attributes.py,sha256=EjTtWIKW_aXsR1JOgw5RSDVAqitJ_NfRMVOXVGaiWTY,5282
|
|
17
18
|
doc_utils/unused_images.py,sha256=nqn36Bbrmon2KlGlcaruNjJJvTQ8_9H0WU9GvCW7rW8,1456
|
|
18
|
-
|
|
19
|
-
rolfedh_doc_utils-0.1.
|
|
20
|
-
rolfedh_doc_utils-0.1.
|
|
21
|
-
rolfedh_doc_utils-0.1.
|
|
22
|
-
rolfedh_doc_utils-0.1.
|
|
23
|
-
rolfedh_doc_utils-0.1.
|
|
19
|
+
doc_utils/validate_links.py,sha256=iBGXnwdeLlgIT3fo3v01ApT5k0X2FtctsvkrE6E3VMk,19610
|
|
20
|
+
rolfedh_doc_utils-0.1.11.dist-info/licenses/LICENSE,sha256=vLxtwMVOJA_hEy8b77niTkdmQI9kNJskXHq0dBS36e0,1075
|
|
21
|
+
rolfedh_doc_utils-0.1.11.dist-info/METADATA,sha256=22seO4nEGTjlibUZ8tPRxTFyYpmLRsfY7sZssteQl1g,7386
|
|
22
|
+
rolfedh_doc_utils-0.1.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
23
|
+
rolfedh_doc_utils-0.1.11.dist-info/entry_points.txt,sha256=2J4Ojc3kkuArpe2xcUOPc0LxSWCmnctvw8hy8zpnbO4,418
|
|
24
|
+
rolfedh_doc_utils-0.1.11.dist-info/top_level.txt,sha256=1w0JWD7w7gnM5Sga2K4fJieNZ7CHPTAf0ozYk5iIlmo,182
|
|
25
|
+
rolfedh_doc_utils-0.1.11.dist-info/RECORD,,
|
validate_links.py
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Validate links in AsciiDoc documentation.
|
|
4
|
+
|
|
5
|
+
This tool checks all links in AsciiDoc files for validity, including:
|
|
6
|
+
- External HTTP/HTTPS links
|
|
7
|
+
- Internal cross-references (xref)
|
|
8
|
+
- Image paths
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import argparse
|
|
12
|
+
import sys
|
|
13
|
+
import json
|
|
14
|
+
from doc_utils.validate_links import LinkValidator, parse_transpositions, format_results
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def main():
|
|
18
|
+
"""Main entry point for the validate-links CLI tool."""
|
|
19
|
+
parser = argparse.ArgumentParser(
|
|
20
|
+
description='Validate links in AsciiDoc documentation',
|
|
21
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
22
|
+
epilog="""
|
|
23
|
+
Examples:
|
|
24
|
+
# Basic validation
|
|
25
|
+
validate-links
|
|
26
|
+
|
|
27
|
+
# Validate against preview environment
|
|
28
|
+
validate-links --transpose "https://docs.redhat.com--https://preview.docs.redhat.com"
|
|
29
|
+
|
|
30
|
+
# Multiple transpositions
|
|
31
|
+
validate-links \\
|
|
32
|
+
--transpose "https://docs.redhat.com--https://preview.docs.redhat.com" \\
|
|
33
|
+
--transpose "https://access.redhat.com--https://stage.access.redhat.com"
|
|
34
|
+
|
|
35
|
+
# With specific options
|
|
36
|
+
validate-links \\
|
|
37
|
+
--transpose "https://docs.example.com--https://preview.example.com" \\
|
|
38
|
+
--attributes-file common-attributes.adoc \\
|
|
39
|
+
--timeout 15 \\
|
|
40
|
+
--retry 3 \\
|
|
41
|
+
--parallel 20 \\
|
|
42
|
+
--exclude-domain localhost \\
|
|
43
|
+
--exclude-domain example.com
|
|
44
|
+
|
|
45
|
+
# Export results to JSON
|
|
46
|
+
validate-links --output report.json --format json
|
|
47
|
+
"""
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
parser.add_argument(
|
|
51
|
+
'--transpose',
|
|
52
|
+
action='append',
|
|
53
|
+
help='Transpose URLs from production to preview/staging (format: from_url--to_url)'
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
parser.add_argument(
|
|
57
|
+
'--attributes-file',
|
|
58
|
+
help='Path to the AsciiDoc attributes file'
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
parser.add_argument(
|
|
62
|
+
'--scan-dir',
|
|
63
|
+
action='append',
|
|
64
|
+
help='Directory to scan for .adoc files (can be used multiple times, default: current directory)'
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
parser.add_argument(
|
|
68
|
+
'--timeout',
|
|
69
|
+
type=int,
|
|
70
|
+
default=10,
|
|
71
|
+
help='Timeout in seconds for each URL check (default: 10)'
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
parser.add_argument(
|
|
75
|
+
'--retry',
|
|
76
|
+
type=int,
|
|
77
|
+
default=3,
|
|
78
|
+
help='Number of retries for failed URLs (default: 3)'
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
parser.add_argument(
|
|
82
|
+
'--parallel',
|
|
83
|
+
type=int,
|
|
84
|
+
default=10,
|
|
85
|
+
help='Number of parallel URL checks (default: 10)'
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
parser.add_argument(
|
|
89
|
+
'--cache-duration',
|
|
90
|
+
type=int,
|
|
91
|
+
default=3600,
|
|
92
|
+
help='Cache duration in seconds (default: 3600)'
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
parser.add_argument(
|
|
96
|
+
'--exclude-domain',
|
|
97
|
+
action='append',
|
|
98
|
+
dest='exclude_domains',
|
|
99
|
+
help='Domain to exclude from validation (can be used multiple times)'
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
parser.add_argument(
|
|
103
|
+
'--no-cache',
|
|
104
|
+
action='store_true',
|
|
105
|
+
help='Disable caching of validation results'
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
parser.add_argument(
|
|
109
|
+
'--output',
|
|
110
|
+
help='Output file for results'
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
parser.add_argument(
|
|
114
|
+
'--format',
|
|
115
|
+
choices=['text', 'json', 'junit'],
|
|
116
|
+
default='text',
|
|
117
|
+
help='Output format (default: text)'
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
parser.add_argument(
|
|
121
|
+
'-v', '--verbose',
|
|
122
|
+
action='store_true',
|
|
123
|
+
help='Show verbose output including warnings'
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
parser.add_argument(
|
|
127
|
+
'--fail-on-broken',
|
|
128
|
+
action='store_true',
|
|
129
|
+
help='Exit with error code if broken links are found'
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
args = parser.parse_args()
|
|
133
|
+
|
|
134
|
+
# Parse transpositions
|
|
135
|
+
transpositions = parse_transpositions(args.transpose)
|
|
136
|
+
|
|
137
|
+
# Show configuration
|
|
138
|
+
print("Validating links in documentation...")
|
|
139
|
+
if args.attributes_file:
|
|
140
|
+
print(f"Loading attributes from {args.attributes_file}")
|
|
141
|
+
if transpositions:
|
|
142
|
+
print("\nURL Transposition Rules:")
|
|
143
|
+
for from_url, to_url in transpositions:
|
|
144
|
+
print(f" {from_url} → {to_url}")
|
|
145
|
+
print()
|
|
146
|
+
|
|
147
|
+
# Create validator
|
|
148
|
+
validator = LinkValidator(
|
|
149
|
+
timeout=args.timeout,
|
|
150
|
+
retry=args.retry,
|
|
151
|
+
parallel=args.parallel,
|
|
152
|
+
cache_duration=args.cache_duration if not args.no_cache else 0,
|
|
153
|
+
transpositions=transpositions
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
try:
|
|
157
|
+
# Run validation
|
|
158
|
+
results = validator.validate_all(
|
|
159
|
+
scan_dirs=args.scan_dir,
|
|
160
|
+
attributes_file=args.attributes_file,
|
|
161
|
+
exclude_domains=args.exclude_domains
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
# Format output
|
|
165
|
+
if args.format == 'json':
|
|
166
|
+
output = json.dumps(results, indent=2)
|
|
167
|
+
elif args.format == 'junit':
|
|
168
|
+
# TODO: Implement JUnit XML format
|
|
169
|
+
output = format_results(results, verbose=args.verbose)
|
|
170
|
+
else:
|
|
171
|
+
output = format_results(results, verbose=args.verbose)
|
|
172
|
+
|
|
173
|
+
# Save or print output
|
|
174
|
+
if args.output:
|
|
175
|
+
with open(args.output, 'w', encoding='utf-8') as f:
|
|
176
|
+
f.write(output)
|
|
177
|
+
print(f"Results saved to {args.output}")
|
|
178
|
+
# Still print summary to console
|
|
179
|
+
if args.format != 'text':
|
|
180
|
+
summary = results['summary']
|
|
181
|
+
print(f"\nSummary: {summary['valid']} valid, {summary['broken']} broken, "
|
|
182
|
+
f"{summary['warnings']} warnings")
|
|
183
|
+
else:
|
|
184
|
+
print(output)
|
|
185
|
+
|
|
186
|
+
# Exit code
|
|
187
|
+
if args.fail_on_broken and results['summary']['broken'] > 0:
|
|
188
|
+
sys.exit(1)
|
|
189
|
+
|
|
190
|
+
except KeyboardInterrupt:
|
|
191
|
+
print("\nValidation cancelled.")
|
|
192
|
+
sys.exit(1)
|
|
193
|
+
except Exception as e:
|
|
194
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
195
|
+
if args.verbose:
|
|
196
|
+
import traceback
|
|
197
|
+
traceback.print_exc()
|
|
198
|
+
sys.exit(1)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
if __name__ == '__main__':
|
|
202
|
+
main()
|
|
File without changes
|
|
File without changes
|