rolfedh-doc-utils 0.1.10__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,19 +6,36 @@ Functions:
6
6
  - find_adoc_files: Recursively find all .adoc files in a directory (ignoring symlinks).
7
7
  - scan_for_attribute_usage: Find which attributes are used in a set of .adoc files.
8
8
  - find_unused_attributes: Main function to return unused attributes.
9
+ - find_attributes_files: Find all potential attributes files in the repository.
9
10
  """
10
11
 
11
12
  import os
12
13
  import re
13
- from typing import Set, List
14
+ from pathlib import Path
15
+ from typing import Set, List, Optional
14
16
 
15
17
  def parse_attributes_file(attr_file: str) -> Set[str]:
16
18
  attributes = set()
17
- with open(attr_file, 'r', encoding='utf-8') as f:
18
- for line in f:
19
- match = re.match(r'^:([\w-]+):', line.strip())
20
- if match:
21
- attributes.add(match.group(1))
19
+
20
+ # Check if file exists
21
+ if not os.path.exists(attr_file):
22
+ raise FileNotFoundError(f"Attributes file not found: {attr_file}")
23
+
24
+ # Check if it's a file (not a directory)
25
+ if not os.path.isfile(attr_file):
26
+ raise ValueError(f"Path is not a file: {attr_file}")
27
+
28
+ try:
29
+ with open(attr_file, 'r', encoding='utf-8') as f:
30
+ for line in f:
31
+ match = re.match(r'^:([\w-]+):', line.strip())
32
+ if match:
33
+ attributes.add(match.group(1))
34
+ except PermissionError:
35
+ raise PermissionError(f"Permission denied reading file: {attr_file}")
36
+ except UnicodeDecodeError as e:
37
+ raise ValueError(f"Unable to read file (encoding issue): {attr_file}\n{str(e)}")
38
+
22
39
  return attributes
23
40
 
24
41
  def find_adoc_files(root_dir: str) -> List[str]:
@@ -42,6 +59,77 @@ def scan_for_attribute_usage(adoc_files: List[str], attributes: Set[str]) -> Set
42
59
  used.add(match)
43
60
  return used
44
61
 
62
+ def find_attributes_files(root_dir: str = '.') -> List[str]:
63
+ """Find all attributes.adoc files in the repository."""
64
+ attributes_files = []
65
+ root_path = Path(root_dir)
66
+
67
+ # Common attribute file patterns
68
+ patterns = ['**/attributes.adoc', '**/attributes*.adoc', '**/*attributes.adoc', '**/*-attributes.adoc']
69
+
70
+ for pattern in patterns:
71
+ for path in root_path.glob(pattern):
72
+ # Skip hidden directories and common build directories
73
+ parts = path.parts
74
+ if any(part.startswith('.') or part in ['target', 'build', 'node_modules', '.archive'] for part in parts):
75
+ continue
76
+ # Convert to string and avoid duplicates
77
+ str_path = str(path)
78
+ if str_path not in attributes_files:
79
+ attributes_files.append(str_path)
80
+
81
+ # Sort for consistent ordering
82
+ attributes_files.sort()
83
+ return attributes_files
84
+
85
+
86
+ def select_attributes_file(attributes_files: List[str]) -> Optional[str]:
87
+ """Interactive selection of attributes file from a list."""
88
+ if not attributes_files:
89
+ return None
90
+
91
+ if len(attributes_files) == 1:
92
+ print(f"Found attributes file: {attributes_files[0]}")
93
+ response = input("Use this file? (y/n): ").strip().lower()
94
+ if response == 'y':
95
+ return attributes_files[0]
96
+ else:
97
+ response = input("Enter the path to your attributes file: ").strip()
98
+ if os.path.exists(response) and os.path.isfile(response):
99
+ return response
100
+ else:
101
+ print(f"Error: File not found: {response}")
102
+ return None
103
+
104
+ # Multiple files found
105
+ print("\nFound multiple attributes files:")
106
+ for i, file_path in enumerate(attributes_files, 1):
107
+ print(f" {i}. {file_path}")
108
+ print(f" {len(attributes_files) + 1}. Enter custom path")
109
+
110
+ while True:
111
+ response = input(f"\nSelect option (1-{len(attributes_files) + 1}) or 'q' to quit: ").strip()
112
+ if response.lower() == 'q':
113
+ return None
114
+
115
+ try:
116
+ choice = int(response)
117
+ if 1 <= choice <= len(attributes_files):
118
+ return attributes_files[choice - 1]
119
+ elif choice == len(attributes_files) + 1:
120
+ response = input("Enter the path to your attributes file: ").strip()
121
+ if os.path.exists(response) and os.path.isfile(response):
122
+ return response
123
+ else:
124
+ print(f"Error: File not found: {response}")
125
+ else:
126
+ print(f"Invalid choice. Please enter a number between 1 and {len(attributes_files) + 1}")
127
+ except ValueError:
128
+ print("Invalid input. Please enter a number.")
129
+
130
+ return None
131
+
132
+
45
133
  def find_unused_attributes(attr_file: str, adoc_root: str = '.') -> List[str]:
46
134
  attributes = parse_attributes_file(attr_file)
47
135
  adoc_files = find_adoc_files(adoc_root)
@@ -0,0 +1,576 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Validate links in AsciiDoc documentation, checking for broken URLs and missing references.
4
+ """
5
+
6
+ import os
7
+ import re
8
+ import time
9
+ import json
10
+ import hashlib
11
+ from pathlib import Path
12
+ from typing import Dict, List, Tuple, Optional, Set
13
+ from collections import defaultdict
14
+ from concurrent.futures import ThreadPoolExecutor, as_completed
15
+ from urllib.parse import urlparse, urljoin
16
+ import urllib.request
17
+ import urllib.error
18
+ import socket
19
+ from datetime import datetime, timedelta
20
+
21
+
22
+ class LinkValidator:
23
+ """Validates links in AsciiDoc documentation."""
24
+
25
+ def __init__(self,
26
+ timeout: int = 10,
27
+ retry: int = 3,
28
+ parallel: int = 10,
29
+ cache_duration: int = 3600,
30
+ transpositions: List[Tuple[str, str]] = None):
31
+ """
32
+ Initialize the link validator.
33
+
34
+ Args:
35
+ timeout: Timeout in seconds for each URL check
36
+ retry: Number of retries for failed URLs
37
+ parallel: Number of parallel URL checks
38
+ cache_duration: Cache duration in seconds
39
+ transpositions: List of (from_url, to_url) tuples for URL replacement
40
+ """
41
+ self.timeout = timeout
42
+ self.retry = retry
43
+ self.parallel = parallel
44
+ self.cache_duration = cache_duration
45
+ self.transpositions = transpositions or []
46
+ self.cache = {}
47
+ self.cache_file = Path.home() / '.cache' / 'doc-utils' / 'link-validation.json'
48
+ self._load_cache()
49
+
50
+ def _load_cache(self):
51
+ """Load cached validation results."""
52
+ if self.cache_file.exists():
53
+ try:
54
+ with open(self.cache_file, 'r') as f:
55
+ cached_data = json.load(f)
56
+ # Check cache expiry
57
+ now = datetime.now().timestamp()
58
+ self.cache = {
59
+ url: result for url, result in cached_data.items()
60
+ if now - result.get('timestamp', 0) < self.cache_duration
61
+ }
62
+ except (json.JSONDecodeError, IOError):
63
+ self.cache = {}
64
+
65
+ def _save_cache(self):
66
+ """Save validation results to cache."""
67
+ self.cache_file.parent.mkdir(parents=True, exist_ok=True)
68
+ with open(self.cache_file, 'w') as f:
69
+ json.dump(self.cache, f, indent=2)
70
+
71
+ def transpose_url(self, url: str) -> str:
72
+ """
73
+ Apply transposition rules to URL.
74
+
75
+ Args:
76
+ url: Original URL
77
+
78
+ Returns:
79
+ Transposed URL if rules match, otherwise original URL
80
+ """
81
+ for from_pattern, to_pattern in self.transpositions:
82
+ if url.startswith(from_pattern):
83
+ return url.replace(from_pattern, to_pattern, 1)
84
+ return url
85
+
86
+ def extract_links(self, file_path: str, attributes: Dict[str, str] = None) -> List[Dict]:
87
+ """
88
+ Extract all links from an AsciiDoc file.
89
+
90
+ Args:
91
+ file_path: Path to the AsciiDoc file
92
+ attributes: Dictionary of attribute definitions
93
+
94
+ Returns:
95
+ List of link dictionaries with url, text, type, line_number
96
+ """
97
+ links = []
98
+ attributes = attributes or {}
99
+
100
+ with open(file_path, 'r', encoding='utf-8') as f:
101
+ for line_num, line in enumerate(f, 1):
102
+ # Find link: macros
103
+ link_matches = re.finditer(r'link:([^[\]]+)\[([^\]]*)\]', line)
104
+ for match in link_matches:
105
+ url = match.group(1)
106
+ text = match.group(2)
107
+ # Resolve attributes in URL
108
+ resolved_url = self._resolve_attributes(url, attributes)
109
+ links.append({
110
+ 'url': resolved_url,
111
+ 'original_url': url,
112
+ 'text': text,
113
+ 'type': 'external',
114
+ 'file': file_path,
115
+ 'line': line_num
116
+ })
117
+
118
+ # Find xref: macros
119
+ xref_matches = re.finditer(r'xref:([^[\]]+)\[([^\]]*)\]', line)
120
+ for match in xref_matches:
121
+ target = match.group(1)
122
+ text = match.group(2)
123
+ # Resolve attributes in target
124
+ resolved_target = self._resolve_attributes(target, attributes)
125
+ links.append({
126
+ 'url': resolved_target,
127
+ 'original_url': target,
128
+ 'text': text,
129
+ 'type': 'internal',
130
+ 'file': file_path,
131
+ 'line': line_num
132
+ })
133
+
134
+ # Find image:: directives
135
+ image_matches = re.finditer(r'image::([^[\]]+)\[', line)
136
+ for match in image_matches:
137
+ path = match.group(1)
138
+ resolved_path = self._resolve_attributes(path, attributes)
139
+ links.append({
140
+ 'url': resolved_path,
141
+ 'original_url': path,
142
+ 'text': 'image',
143
+ 'type': 'image',
144
+ 'file': file_path,
145
+ 'line': line_num
146
+ })
147
+
148
+ return links
149
+
150
+ def _resolve_attributes(self, text: str, attributes: Dict[str, str]) -> str:
151
+ """Resolve attributes in text."""
152
+ resolved = text
153
+ max_iterations = 10
154
+
155
+ for _ in range(max_iterations):
156
+ # Find all attribute references
157
+ refs = re.findall(r'\{([^}]+)\}', resolved)
158
+ if not refs:
159
+ break
160
+
161
+ changes_made = False
162
+ for ref in refs:
163
+ if ref in attributes:
164
+ resolved = resolved.replace(f'{{{ref}}}', attributes[ref])
165
+ changes_made = True
166
+
167
+ if not changes_made:
168
+ break
169
+
170
+ return resolved
171
+
172
+ def validate_url(self, url: str, original_url: str = None, use_cache: bool = True) -> Dict:
173
+ """
174
+ Validate a single URL.
175
+
176
+ Args:
177
+ url: URL to validate
178
+ original_url: Original URL before transposition
179
+ use_cache: Whether to use cached results
180
+
181
+ Returns:
182
+ Dictionary with validation results
183
+ """
184
+ # Check cache first
185
+ cache_key = f"{url}:{original_url}" if original_url else url
186
+ if use_cache and cache_key in self.cache:
187
+ cached = self.cache[cache_key]
188
+ if datetime.now().timestamp() - cached['timestamp'] < self.cache_duration:
189
+ return cached
190
+
191
+ result = {
192
+ 'url': url,
193
+ 'original_url': original_url or url,
194
+ 'status': None,
195
+ 'error': None,
196
+ 'redirect': None,
197
+ 'timestamp': datetime.now().timestamp()
198
+ }
199
+
200
+ # Apply transposition if needed
201
+ check_url = self.transpose_url(url)
202
+ if check_url != url:
203
+ result['transposed_url'] = check_url
204
+
205
+ # Validate the URL
206
+ for attempt in range(self.retry):
207
+ try:
208
+ req = urllib.request.Request(
209
+ check_url,
210
+ headers={
211
+ 'User-Agent': 'Mozilla/5.0 (doc-utils link validator)',
212
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
213
+ }
214
+ )
215
+
216
+ with urllib.request.urlopen(req, timeout=self.timeout) as response:
217
+ result['status'] = response.status
218
+ # Check for redirect
219
+ if response.url != check_url:
220
+ result['redirect'] = response.url
221
+ break
222
+
223
+ except urllib.error.HTTPError as e:
224
+ result['status'] = e.code
225
+ result['error'] = str(e)
226
+ if e.code not in [500, 502, 503, 504]: # Don't retry client errors
227
+ break
228
+
229
+ except urllib.error.URLError as e:
230
+ result['error'] = str(e.reason)
231
+
232
+ except socket.timeout:
233
+ result['error'] = 'Timeout'
234
+
235
+ except Exception as e:
236
+ result['error'] = str(e)
237
+
238
+ # Wait before retry
239
+ if attempt < self.retry - 1:
240
+ time.sleep(2 ** attempt) # Exponential backoff
241
+
242
+ # Cache the result
243
+ self.cache[cache_key] = result
244
+
245
+ return result
246
+
247
+ def validate_internal_reference(self, ref: str, base_dir: str) -> Dict:
248
+ """
249
+ Validate an internal reference (xref).
250
+
251
+ Args:
252
+ ref: Reference path
253
+ base_dir: Base directory for relative paths
254
+
255
+ Returns:
256
+ Dictionary with validation results
257
+ """
258
+ result = {
259
+ 'url': ref,
260
+ 'type': 'internal',
261
+ 'status': None,
262
+ 'error': None
263
+ }
264
+
265
+ # Handle anchor references
266
+ if ref.startswith('#'):
267
+ # TODO: Check if anchor exists in current file
268
+ result['status'] = 'anchor'
269
+ return result
270
+
271
+ # Parse file and anchor
272
+ parts = ref.split('#', 1)
273
+ file_ref = parts[0]
274
+ anchor = parts[1] if len(parts) > 1 else None
275
+
276
+ # Resolve file path
277
+ if os.path.isabs(file_ref):
278
+ file_path = file_ref
279
+ else:
280
+ file_path = os.path.normpath(os.path.join(base_dir, file_ref))
281
+
282
+ # Check if file exists
283
+ if os.path.exists(file_path):
284
+ result['status'] = 'ok'
285
+ # TODO: If anchor provided, check if it exists in the file
286
+ else:
287
+ result['status'] = 'missing'
288
+ result['error'] = f"File not found: {file_path}"
289
+
290
+ return result
291
+
292
+ def validate_image(self, path: str, base_dir: str) -> Dict:
293
+ """
294
+ Validate an image path.
295
+
296
+ Args:
297
+ path: Image path
298
+ base_dir: Base directory for relative paths
299
+
300
+ Returns:
301
+ Dictionary with validation results
302
+ """
303
+ result = {
304
+ 'url': path,
305
+ 'type': 'image',
306
+ 'status': None,
307
+ 'error': None
308
+ }
309
+
310
+ # Check if it's a URL
311
+ if path.startswith(('http://', 'https://')):
312
+ return self.validate_url(path)
313
+
314
+ # Resolve file path
315
+ if os.path.isabs(path):
316
+ file_path = path
317
+ else:
318
+ file_path = os.path.normpath(os.path.join(base_dir, path))
319
+
320
+ # Check if file exists
321
+ if os.path.exists(file_path):
322
+ result['status'] = 'ok'
323
+ else:
324
+ result['status'] = 'missing'
325
+ result['error'] = f"Image not found: {file_path}"
326
+
327
+ return result
328
+
329
+ def validate_links_in_file(self, file_path: str, attributes: Dict[str, str] = None) -> List[Dict]:
330
+ """
331
+ Validate all links in a single file.
332
+
333
+ Args:
334
+ file_path: Path to the AsciiDoc file
335
+ attributes: Dictionary of attribute definitions
336
+
337
+ Returns:
338
+ List of validation results
339
+ """
340
+ links = self.extract_links(file_path, attributes)
341
+ results = []
342
+ base_dir = os.path.dirname(file_path)
343
+
344
+ # Group links by type for efficient processing
345
+ external_links = [l for l in links if l['type'] == 'external']
346
+ internal_links = [l for l in links if l['type'] == 'internal']
347
+ image_links = [l for l in links if l['type'] == 'image']
348
+
349
+ # Validate external links in parallel
350
+ if external_links:
351
+ with ThreadPoolExecutor(max_workers=self.parallel) as executor:
352
+ futures = {
353
+ executor.submit(self.validate_url, link['url'], link['original_url']): link
354
+ for link in external_links
355
+ }
356
+
357
+ for future in as_completed(futures):
358
+ link = futures[future]
359
+ try:
360
+ result = future.result()
361
+ result.update(link)
362
+ results.append(result)
363
+ except Exception as e:
364
+ result = link.copy()
365
+ result['error'] = str(e)
366
+ results.append(result)
367
+
368
+ # Validate internal references
369
+ for link in internal_links:
370
+ result = self.validate_internal_reference(link['url'], base_dir)
371
+ result.update(link)
372
+ results.append(result)
373
+
374
+ # Validate image paths
375
+ for link in image_links:
376
+ result = self.validate_image(link['url'], base_dir)
377
+ result.update(link)
378
+ results.append(result)
379
+
380
+ return results
381
+
382
+ def validate_all(self, scan_dirs: List[str] = None,
383
+ attributes_file: str = None,
384
+ exclude_domains: List[str] = None) -> Dict:
385
+ """
386
+ Validate all links in documentation.
387
+
388
+ Args:
389
+ scan_dirs: Directories to scan
390
+ attributes_file: Path to attributes file
391
+ exclude_domains: Domains to skip
392
+
393
+ Returns:
394
+ Dictionary with all validation results
395
+ """
396
+ if scan_dirs is None:
397
+ scan_dirs = ['.']
398
+
399
+ exclude_domains = exclude_domains or []
400
+
401
+ # Load attributes
402
+ attributes = {}
403
+ if attributes_file and os.path.exists(attributes_file):
404
+ attributes = self._load_attributes(attributes_file)
405
+
406
+ # Collect all .adoc files
407
+ adoc_files = []
408
+ for scan_dir in scan_dirs:
409
+ for root, _, files in os.walk(scan_dir):
410
+ # Skip hidden directories
411
+ if '/.' in root:
412
+ continue
413
+ for file in files:
414
+ if file.endswith('.adoc'):
415
+ adoc_files.append(os.path.join(root, file))
416
+
417
+ # Validate links in all files
418
+ all_results = {
419
+ 'files': {},
420
+ 'summary': {
421
+ 'total': 0,
422
+ 'valid': 0,
423
+ 'broken': 0,
424
+ 'warnings': 0,
425
+ 'skipped': 0
426
+ },
427
+ 'broken_links': [],
428
+ 'warnings': [],
429
+ 'transpositions': [
430
+ {'from': t[0], 'to': t[1]} for t in self.transpositions
431
+ ]
432
+ }
433
+
434
+ for file_path in adoc_files:
435
+ results = self.validate_links_in_file(file_path, attributes)
436
+
437
+ # Filter out excluded domains
438
+ filtered_results = []
439
+ for result in results:
440
+ url = result.get('url', '')
441
+ parsed = urlparse(url)
442
+ if parsed.netloc in exclude_domains:
443
+ result['status'] = 'skipped'
444
+ result['reason'] = 'Domain excluded'
445
+ filtered_results.append(result)
446
+
447
+ all_results['files'][file_path] = filtered_results
448
+
449
+ # Update summary
450
+ for result in filtered_results:
451
+ all_results['summary']['total'] += 1
452
+
453
+ if result.get('status') == 'skipped':
454
+ all_results['summary']['skipped'] += 1
455
+ elif result.get('status') in ['ok', 200, 'anchor']:
456
+ all_results['summary']['valid'] += 1
457
+ elif result.get('status') in [301, 302, 303, 307, 308]:
458
+ all_results['summary']['warnings'] += 1
459
+ all_results['warnings'].append(result)
460
+ elif result.get('error') or result.get('status') in ['missing', 404]:
461
+ all_results['summary']['broken'] += 1
462
+ all_results['broken_links'].append(result)
463
+ else:
464
+ # Treat other status codes as broken
465
+ all_results['summary']['broken'] += 1
466
+ all_results['broken_links'].append(result)
467
+
468
+ # Save cache
469
+ self._save_cache()
470
+
471
+ return all_results
472
+
473
+ def _load_attributes(self, attributes_file: str) -> Dict[str, str]:
474
+ """Load attributes from file."""
475
+ attributes = {}
476
+
477
+ with open(attributes_file, 'r', encoding='utf-8') as f:
478
+ for line in f:
479
+ # Match attribute definitions
480
+ match = re.match(r'^:([^:]+):\s*(.*)$', line)
481
+ if match:
482
+ attr_name = match.group(1).strip()
483
+ attr_value = match.group(2).strip()
484
+ attributes[attr_name] = attr_value
485
+
486
+ return attributes
487
+
488
+
489
+ def parse_transpositions(transpose_args: List[str]) -> List[Tuple[str, str]]:
490
+ """
491
+ Parse transposition arguments.
492
+
493
+ Args:
494
+ transpose_args: List of transposition strings in format "from--to"
495
+
496
+ Returns:
497
+ List of (from_url, to_url) tuples
498
+ """
499
+ transpositions = []
500
+
501
+ for arg in transpose_args or []:
502
+ parts = arg.split('--')
503
+ if len(parts) == 2:
504
+ from_url = parts[0].strip()
505
+ to_url = parts[1].strip()
506
+ transpositions.append((from_url, to_url))
507
+ else:
508
+ print(f"Warning: Invalid transposition format: {arg}")
509
+ print("Expected format: from_url--to_url")
510
+
511
+ return transpositions
512
+
513
+
514
+ def format_results(results: Dict, verbose: bool = False) -> str:
515
+ """
516
+ Format validation results for display.
517
+
518
+ Args:
519
+ results: Validation results dictionary
520
+ verbose: Whether to show verbose output
521
+
522
+ Returns:
523
+ Formatted string for display
524
+ """
525
+ output = []
526
+
527
+ # Show transpositions if any
528
+ if results.get('transpositions'):
529
+ output.append("URL Transposition Rules:")
530
+ for trans in results['transpositions']:
531
+ output.append(f" {trans['from']} → {trans['to']}")
532
+ output.append("")
533
+
534
+ # Summary
535
+ summary = results['summary']
536
+ output.append("SUMMARY:")
537
+ output.append(f"✓ Valid: {summary['valid']} links")
538
+ if summary['broken'] > 0:
539
+ output.append(f"✗ Broken: {summary['broken']} links")
540
+ if summary['warnings'] > 0:
541
+ output.append(f"⚠ Warnings: {summary['warnings']} redirects")
542
+ if summary['skipped'] > 0:
543
+ output.append(f"⊘ Skipped: {summary['skipped']} links (excluded domains)")
544
+ output.append("")
545
+
546
+ # Broken links
547
+ if results['broken_links']:
548
+ output.append("BROKEN LINKS:")
549
+ for i, link in enumerate(results['broken_links'], 1):
550
+ output.append(f"\n{i}. {link['file']}:{link['line']}")
551
+ if link.get('original_url') and link.get('original_url') != link.get('url'):
552
+ output.append(f" Original: {link['original_url']}")
553
+ output.append(f" Resolved: {link['url']}")
554
+ else:
555
+ output.append(f" URL: {link['url']}")
556
+
557
+ if link.get('transposed_url'):
558
+ output.append(f" Checked: {link['transposed_url']}")
559
+
560
+ if link.get('status'):
561
+ output.append(f" Status: {link['status']}")
562
+ if link.get('error'):
563
+ output.append(f" Error: {link['error']}")
564
+ output.append("")
565
+
566
+ # Warnings (redirects)
567
+ if results['warnings'] and verbose:
568
+ output.append("WARNINGS (Redirects):")
569
+ for i, link in enumerate(results['warnings'], 1):
570
+ output.append(f"\n{i}. {link['file']}:{link['line']}")
571
+ output.append(f" URL: {link['url']}")
572
+ if link.get('redirect'):
573
+ output.append(f" Redirects to: {link['redirect']}")
574
+ output.append("")
575
+
576
+ return '\n'.join(output)
find_unused_attributes.py CHANGED
@@ -1,23 +1,61 @@
1
1
  """
2
2
  Find Unused AsciiDoc Attributes
3
3
 
4
- Scans a user-specified attributes file (e.g., attributes.adoc) for attribute definitions (e.g., :version: 1.1), then recursively scans all .adoc files in the current directory (ignoring symlinks) for usages of those attributes (e.g., {version}).
4
+ Scans an attributes file for attribute definitions (e.g., :version: 1.1), then recursively scans all .adoc files in the current directory (ignoring symlinks) for usages of those attributes (e.g., {version}).
5
+
6
+ If no attributes file is specified, the tool will auto-discover attributes files in the repository and let you choose one interactively.
5
7
 
6
8
  Any attribute defined but not used in any .adoc file is reported as NOT USED in both the command line output and a timestamped output file.
7
9
  """
8
10
 
9
11
  import argparse
10
12
  import os
13
+ import sys
11
14
  from datetime import datetime
12
- from doc_utils.unused_attributes import find_unused_attributes
15
+ from doc_utils.unused_attributes import find_unused_attributes, find_attributes_files, select_attributes_file
13
16
 
14
17
  def main():
15
18
  parser = argparse.ArgumentParser(description='Find unused AsciiDoc attributes.')
16
- parser.add_argument('attributes_file', help='Path to the attributes.adoc file to scan for attribute definitions.')
19
+ parser.add_argument(
20
+ 'attributes_file',
21
+ nargs='?', # Make it optional
22
+ help='Path to the attributes file. If not specified, auto-discovers attributes files.'
23
+ )
17
24
  parser.add_argument('-o', '--output', action='store_true', help='Write results to a timestamped txt file in your home directory.')
18
25
  args = parser.parse_args()
19
26
 
20
- unused = find_unused_attributes(args.attributes_file, '.')
27
+ # Determine which attributes file to use
28
+ if args.attributes_file:
29
+ # User specified a file
30
+ attr_file = args.attributes_file
31
+ else:
32
+ # Auto-discover attributes files
33
+ print("Searching for attributes files...")
34
+ attributes_files = find_attributes_files('.')
35
+
36
+ if not attributes_files:
37
+ print("No attributes files found in the repository.")
38
+ print("You can specify a file directly: find-unused-attributes <path-to-attributes-file>")
39
+ return 1
40
+
41
+ attr_file = select_attributes_file(attributes_files)
42
+ if not attr_file:
43
+ print("No attributes file selected.")
44
+ return 1
45
+
46
+ try:
47
+ unused = find_unused_attributes(attr_file, '.')
48
+ except FileNotFoundError as e:
49
+ print(f"Error: {e}")
50
+ print(f"\nPlease ensure the file '{attr_file}' exists.")
51
+ print("Usage: find-unused-attributes [<path-to-attributes-file>]")
52
+ return 1
53
+ except (ValueError, PermissionError) as e:
54
+ print(f"Error: {e}")
55
+ return 1
56
+ except Exception as e:
57
+ print(f"Unexpected error: {e}")
58
+ return 1
21
59
 
22
60
  lines = [f":{attr}: NOT USED" for attr in unused]
23
61
  output = '\n'.join(lines)
@@ -33,9 +71,12 @@ def main():
33
71
  home_dir = os.path.expanduser('~')
34
72
  filename = os.path.join(home_dir, f'unused_attributes_{timestamp}.txt')
35
73
  with open(filename, 'w', encoding='utf-8') as f:
36
- f.write('Unused attributes in ' + args.attributes_file + '\n')
74
+ f.write('Unused attributes in ' + attr_file + '\n')
37
75
  f.write(output + '\n')
38
76
  print(f'Results written to: {filename}')
39
77
 
78
+ return 0
79
+
40
80
  if __name__ == '__main__':
41
- main()
81
+ import sys
82
+ sys.exit(main())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rolfedh-doc-utils
3
- Version: 0.1.10
3
+ Version: 0.1.11
4
4
  Summary: CLI tools for AsciiDoc documentation projects
5
5
  Author: Rolfe Dlugy-Hegwer
6
6
  License: MIT License
@@ -79,9 +79,10 @@ pip install -e .
79
79
 
80
80
  | Tool | Description | Usage |
81
81
  |------|-------------|-------|
82
+ | **`validate-links`** [EXPERIMENTAL] | Validates all links in documentation, with URL transposition for preview environments | `validate-links --transpose "https://prod--https://preview"` |
82
83
  | **`extract-link-attributes`** | Extracts link/xref macros with attributes into reusable definitions | `extract-link-attributes --dry-run` |
83
84
  | **`replace-link-attributes`** | Resolves Vale LinkAttribute issues by replacing attributes in link URLs | `replace-link-attributes --dry-run` |
84
- | **`format-asciidoc-spacing`** | Standardizes spacing after headings and around includes | `format-asciidoc-spacing --dry-run modules/` |
85
+ | **`format-asciidoc-spacing`** [EXPERIMENTAL] | Standardizes spacing after headings and around includes | `format-asciidoc-spacing --dry-run modules/` |
85
86
  | **`check-scannability`** | Analyzes readability (sentence/paragraph length) | `check-scannability --max-words 25` |
86
87
  | **`archive-unused-files`** | Finds and archives unreferenced .adoc files | `archive-unused-files` (preview)<br>`archive-unused-files --archive` (execute) |
87
88
  | **`archive-unused-images`** | Finds and archives unreferenced images | `archive-unused-images` (preview)<br>`archive-unused-images --archive` (execute) |
@@ -2,9 +2,10 @@ archive_unused_files.py,sha256=KMC5a1WL3rZ5owoVnncvfpT1YeMKbVXq9giHvadDgbM,1936
2
2
  archive_unused_images.py,sha256=PG2o3haovYckgfhoPhl6KRG_a9czyZuqlLkzkupKTCY,1526
3
3
  check_scannability.py,sha256=gcM-vFXKHGP_yFBz7-V5xbXWhIMmtMzBYIGwP9CFbzI,5140
4
4
  extract_link_attributes.py,sha256=utDM1FE-VEr649HhIH5BreXvxDNLnnAJO9dB5rs5f9Q,2535
5
- find_unused_attributes.py,sha256=fk-K32eoCVHxoj7RiBNgSmX1arBLuwYfdSAOMc-wIx0,1677
5
+ find_unused_attributes.py,sha256=V8qI7O0u18ExbSho-hLfyBeRVqowLKGrFugY55JxZN0,3023
6
6
  format_asciidoc_spacing.py,sha256=ROp-cdMs2_hk8H4z5ljT0iDgGtsiECZ8TVjjcN_oOWE,3874
7
7
  replace_link_attributes.py,sha256=vg_aufw7dKXvh_epCKRNq_hEBMU_9crZ_JyJPpxSMNk,6454
8
+ validate_links.py,sha256=DoSB0h3mmjzTY2f0oN6ybTP6jCNkzN7T3qM6oXc2AwE,5585
8
9
  doc_utils/__init__.py,sha256=qqZR3lohzkP63soymrEZPBGzzk6-nFzi4_tSffjmu_0,74
9
10
  doc_utils/extract_link_attributes.py,sha256=qBpJuTXNrhy15klpqC0iELZzcSLztEzMSmhEnKyQZT0,15574
10
11
  doc_utils/file_utils.py,sha256=fpTh3xx759sF8sNocdn_arsP3KAv8XA6cTQTAVIZiZg,4247
@@ -13,11 +14,12 @@ doc_utils/replace_link_attributes.py,sha256=kBiePbxjQn3O2rzqmYY8Mqy_mJgZ6yw048vS
13
14
  doc_utils/scannability.py,sha256=XwlmHqDs69p_V36X7DLjPTy0DUoLszSGqYjJ9wE-3hg,982
14
15
  doc_utils/topic_map_parser.py,sha256=tKcIO1m9r2K6dvPRGue58zqMr0O2zKU1gnZMzEE3U6o,4571
15
16
  doc_utils/unused_adoc.py,sha256=2cbqcYr1os2EhETUU928BlPRlsZVSdI00qaMhqjSIqQ,5263
16
- doc_utils/unused_attributes.py,sha256=HBgmHelqearfWl3TTC2bZGiJytjLADIgiGQUNKqXXPg,1847
17
+ doc_utils/unused_attributes.py,sha256=EjTtWIKW_aXsR1JOgw5RSDVAqitJ_NfRMVOXVGaiWTY,5282
17
18
  doc_utils/unused_images.py,sha256=nqn36Bbrmon2KlGlcaruNjJJvTQ8_9H0WU9GvCW7rW8,1456
18
- rolfedh_doc_utils-0.1.10.dist-info/licenses/LICENSE,sha256=vLxtwMVOJA_hEy8b77niTkdmQI9kNJskXHq0dBS36e0,1075
19
- rolfedh_doc_utils-0.1.10.dist-info/METADATA,sha256=Kk1Ur-SbE2XIP55NJ7Y5oVB-KNScnlADwmZyFSthTXo,7180
20
- rolfedh_doc_utils-0.1.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
- rolfedh_doc_utils-0.1.10.dist-info/entry_points.txt,sha256=aQtQRDwcdDN-VLBCnQBfmoozzQiaCUZ9dqcLLv8fCkM,381
22
- rolfedh_doc_utils-0.1.10.dist-info/top_level.txt,sha256=ILTc2mA4sHdDp0GvKC8JXO1I_DBP7vvF5hn-PFkMcL8,167
23
- rolfedh_doc_utils-0.1.10.dist-info/RECORD,,
19
+ doc_utils/validate_links.py,sha256=iBGXnwdeLlgIT3fo3v01ApT5k0X2FtctsvkrE6E3VMk,19610
20
+ rolfedh_doc_utils-0.1.11.dist-info/licenses/LICENSE,sha256=vLxtwMVOJA_hEy8b77niTkdmQI9kNJskXHq0dBS36e0,1075
21
+ rolfedh_doc_utils-0.1.11.dist-info/METADATA,sha256=22seO4nEGTjlibUZ8tPRxTFyYpmLRsfY7sZssteQl1g,7386
22
+ rolfedh_doc_utils-0.1.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
23
+ rolfedh_doc_utils-0.1.11.dist-info/entry_points.txt,sha256=2J4Ojc3kkuArpe2xcUOPc0LxSWCmnctvw8hy8zpnbO4,418
24
+ rolfedh_doc_utils-0.1.11.dist-info/top_level.txt,sha256=1w0JWD7w7gnM5Sga2K4fJieNZ7CHPTAf0ozYk5iIlmo,182
25
+ rolfedh_doc_utils-0.1.11.dist-info/RECORD,,
@@ -6,3 +6,4 @@ extract-link-attributes = extract_link_attributes:main
6
6
  find-unused-attributes = find_unused_attributes:main
7
7
  format-asciidoc-spacing = format_asciidoc_spacing:main
8
8
  replace-link-attributes = replace_link_attributes:main
9
+ validate-links = validate_links:main
@@ -6,3 +6,4 @@ extract_link_attributes
6
6
  find_unused_attributes
7
7
  format_asciidoc_spacing
8
8
  replace_link_attributes
9
+ validate_links
validate_links.py ADDED
@@ -0,0 +1,202 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Validate links in AsciiDoc documentation.
4
+
5
+ This tool checks all links in AsciiDoc files for validity, including:
6
+ - External HTTP/HTTPS links
7
+ - Internal cross-references (xref)
8
+ - Image paths
9
+ """
10
+
11
+ import argparse
12
+ import sys
13
+ import json
14
+ from doc_utils.validate_links import LinkValidator, parse_transpositions, format_results
15
+
16
+
17
+ def main():
18
+ """Main entry point for the validate-links CLI tool."""
19
+ parser = argparse.ArgumentParser(
20
+ description='Validate links in AsciiDoc documentation',
21
+ formatter_class=argparse.RawDescriptionHelpFormatter,
22
+ epilog="""
23
+ Examples:
24
+ # Basic validation
25
+ validate-links
26
+
27
+ # Validate against preview environment
28
+ validate-links --transpose "https://docs.redhat.com--https://preview.docs.redhat.com"
29
+
30
+ # Multiple transpositions
31
+ validate-links \\
32
+ --transpose "https://docs.redhat.com--https://preview.docs.redhat.com" \\
33
+ --transpose "https://access.redhat.com--https://stage.access.redhat.com"
34
+
35
+ # With specific options
36
+ validate-links \\
37
+ --transpose "https://docs.example.com--https://preview.example.com" \\
38
+ --attributes-file common-attributes.adoc \\
39
+ --timeout 15 \\
40
+ --retry 3 \\
41
+ --parallel 20 \\
42
+ --exclude-domain localhost \\
43
+ --exclude-domain example.com
44
+
45
+ # Export results to JSON
46
+ validate-links --output report.json --format json
47
+ """
48
+ )
49
+
50
+ parser.add_argument(
51
+ '--transpose',
52
+ action='append',
53
+ help='Transpose URLs from production to preview/staging (format: from_url--to_url)'
54
+ )
55
+
56
+ parser.add_argument(
57
+ '--attributes-file',
58
+ help='Path to the AsciiDoc attributes file'
59
+ )
60
+
61
+ parser.add_argument(
62
+ '--scan-dir',
63
+ action='append',
64
+ help='Directory to scan for .adoc files (can be used multiple times, default: current directory)'
65
+ )
66
+
67
+ parser.add_argument(
68
+ '--timeout',
69
+ type=int,
70
+ default=10,
71
+ help='Timeout in seconds for each URL check (default: 10)'
72
+ )
73
+
74
+ parser.add_argument(
75
+ '--retry',
76
+ type=int,
77
+ default=3,
78
+ help='Number of retries for failed URLs (default: 3)'
79
+ )
80
+
81
+ parser.add_argument(
82
+ '--parallel',
83
+ type=int,
84
+ default=10,
85
+ help='Number of parallel URL checks (default: 10)'
86
+ )
87
+
88
+ parser.add_argument(
89
+ '--cache-duration',
90
+ type=int,
91
+ default=3600,
92
+ help='Cache duration in seconds (default: 3600)'
93
+ )
94
+
95
+ parser.add_argument(
96
+ '--exclude-domain',
97
+ action='append',
98
+ dest='exclude_domains',
99
+ help='Domain to exclude from validation (can be used multiple times)'
100
+ )
101
+
102
+ parser.add_argument(
103
+ '--no-cache',
104
+ action='store_true',
105
+ help='Disable caching of validation results'
106
+ )
107
+
108
+ parser.add_argument(
109
+ '--output',
110
+ help='Output file for results'
111
+ )
112
+
113
+ parser.add_argument(
114
+ '--format',
115
+ choices=['text', 'json', 'junit'],
116
+ default='text',
117
+ help='Output format (default: text)'
118
+ )
119
+
120
+ parser.add_argument(
121
+ '-v', '--verbose',
122
+ action='store_true',
123
+ help='Show verbose output including warnings'
124
+ )
125
+
126
+ parser.add_argument(
127
+ '--fail-on-broken',
128
+ action='store_true',
129
+ help='Exit with error code if broken links are found'
130
+ )
131
+
132
+ args = parser.parse_args()
133
+
134
+ # Parse transpositions
135
+ transpositions = parse_transpositions(args.transpose)
136
+
137
+ # Show configuration
138
+ print("Validating links in documentation...")
139
+ if args.attributes_file:
140
+ print(f"Loading attributes from {args.attributes_file}")
141
+ if transpositions:
142
+ print("\nURL Transposition Rules:")
143
+ for from_url, to_url in transpositions:
144
+ print(f" {from_url} → {to_url}")
145
+ print()
146
+
147
+ # Create validator
148
+ validator = LinkValidator(
149
+ timeout=args.timeout,
150
+ retry=args.retry,
151
+ parallel=args.parallel,
152
+ cache_duration=args.cache_duration if not args.no_cache else 0,
153
+ transpositions=transpositions
154
+ )
155
+
156
+ try:
157
+ # Run validation
158
+ results = validator.validate_all(
159
+ scan_dirs=args.scan_dir,
160
+ attributes_file=args.attributes_file,
161
+ exclude_domains=args.exclude_domains
162
+ )
163
+
164
+ # Format output
165
+ if args.format == 'json':
166
+ output = json.dumps(results, indent=2)
167
+ elif args.format == 'junit':
168
+ # TODO: Implement JUnit XML format
169
+ output = format_results(results, verbose=args.verbose)
170
+ else:
171
+ output = format_results(results, verbose=args.verbose)
172
+
173
+ # Save or print output
174
+ if args.output:
175
+ with open(args.output, 'w', encoding='utf-8') as f:
176
+ f.write(output)
177
+ print(f"Results saved to {args.output}")
178
+ # Still print summary to console
179
+ if args.format != 'text':
180
+ summary = results['summary']
181
+ print(f"\nSummary: {summary['valid']} valid, {summary['broken']} broken, "
182
+ f"{summary['warnings']} warnings")
183
+ else:
184
+ print(output)
185
+
186
+ # Exit code
187
+ if args.fail_on_broken and results['summary']['broken'] > 0:
188
+ sys.exit(1)
189
+
190
+ except KeyboardInterrupt:
191
+ print("\nValidation cancelled.")
192
+ sys.exit(1)
193
+ except Exception as e:
194
+ print(f"Error: {e}", file=sys.stderr)
195
+ if args.verbose:
196
+ import traceback
197
+ traceback.print_exc()
198
+ sys.exit(1)
199
+
200
+
201
+ if __name__ == '__main__':
202
+ main()