rolfedh-doc-utils 0.1.10__tar.gz → 0.1.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {rolfedh_doc_utils-0.1.10/rolfedh_doc_utils.egg-info → rolfedh_doc_utils-0.1.12}/PKG-INFO +3 -2
  2. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/README.md +2 -1
  3. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/archive_unused_files.py +1 -0
  4. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/archive_unused_images.py +1 -0
  5. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/check_scannability.py +1 -0
  6. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/doc_utils/extract_link_attributes.py +117 -5
  7. rolfedh_doc_utils-0.1.12/doc_utils/spinner.py +119 -0
  8. rolfedh_doc_utils-0.1.12/doc_utils/unused_attributes.py +138 -0
  9. rolfedh_doc_utils-0.1.12/doc_utils/validate_links.py +576 -0
  10. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/extract_link_attributes.py +15 -1
  11. rolfedh_doc_utils-0.1.12/find_unused_attributes.py +88 -0
  12. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/format_asciidoc_spacing.py +1 -0
  13. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/pyproject.toml +3 -2
  14. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/replace_link_attributes.py +16 -9
  15. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12/rolfedh_doc_utils.egg-info}/PKG-INFO +3 -2
  16. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/rolfedh_doc_utils.egg-info/SOURCES.txt +5 -1
  17. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/rolfedh_doc_utils.egg-info/entry_points.txt +1 -0
  18. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/rolfedh_doc_utils.egg-info/top_level.txt +1 -0
  19. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/tests/test_cli_entry_points.py +13 -6
  20. rolfedh_doc_utils-0.1.12/tests/test_validate_links.py +385 -0
  21. rolfedh_doc_utils-0.1.12/validate_links.py +208 -0
  22. rolfedh_doc_utils-0.1.10/doc_utils/unused_attributes.py +0 -50
  23. rolfedh_doc_utils-0.1.10/find_unused_attributes.py +0 -41
  24. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/LICENSE +0 -0
  25. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/doc_utils/__init__.py +0 -0
  26. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/doc_utils/file_utils.py +0 -0
  27. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/doc_utils/format_asciidoc_spacing.py +0 -0
  28. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/doc_utils/replace_link_attributes.py +0 -0
  29. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/doc_utils/scannability.py +0 -0
  30. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/doc_utils/topic_map_parser.py +0 -0
  31. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/doc_utils/unused_adoc.py +0 -0
  32. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/doc_utils/unused_images.py +0 -0
  33. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/rolfedh_doc_utils.egg-info/dependency_links.txt +0 -0
  34. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/rolfedh_doc_utils.egg-info/requires.txt +0 -0
  35. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/setup.cfg +0 -0
  36. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/setup.py +0 -0
  37. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/tests/test_archive_unused_files.py +0 -0
  38. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/tests/test_archive_unused_images.py +0 -0
  39. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/tests/test_auto_discovery.py +0 -0
  40. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/tests/test_check_scannability.py +0 -0
  41. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/tests/test_extract_link_attributes.py +0 -0
  42. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/tests/test_file_utils.py +0 -0
  43. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/tests/test_fixture_archive_unused_files.py +0 -0
  44. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/tests/test_fixture_archive_unused_images.py +0 -0
  45. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/tests/test_fixture_check_scannability.py +0 -0
  46. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/tests/test_parse_exclude_list.py +0 -0
  47. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/tests/test_symlink_handling.py +0 -0
  48. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/tests/test_topic_map_parser.py +0 -0
  49. {rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/tests/test_unused_attributes.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rolfedh-doc-utils
3
- Version: 0.1.10
3
+ Version: 0.1.12
4
4
  Summary: CLI tools for AsciiDoc documentation projects
5
5
  Author: Rolfe Dlugy-Hegwer
6
6
  License: MIT License
@@ -79,9 +79,10 @@ pip install -e .
79
79
 
80
80
  | Tool | Description | Usage |
81
81
  |------|-------------|-------|
82
+ | **`validate-links`** [EXPERIMENTAL] | Validates all links in documentation, with URL transposition for preview environments | `validate-links --transpose "https://prod--https://preview"` |
82
83
  | **`extract-link-attributes`** | Extracts link/xref macros with attributes into reusable definitions | `extract-link-attributes --dry-run` |
83
84
  | **`replace-link-attributes`** | Resolves Vale LinkAttribute issues by replacing attributes in link URLs | `replace-link-attributes --dry-run` |
84
- | **`format-asciidoc-spacing`** | Standardizes spacing after headings and around includes | `format-asciidoc-spacing --dry-run modules/` |
85
+ | **`format-asciidoc-spacing`** [EXPERIMENTAL] | Standardizes spacing after headings and around includes | `format-asciidoc-spacing --dry-run modules/` |
85
86
  | **`check-scannability`** | Analyzes readability (sentence/paragraph length) | `check-scannability --max-words 25` |
86
87
  | **`archive-unused-files`** | Finds and archives unreferenced .adoc files | `archive-unused-files` (preview)<br>`archive-unused-files --archive` (execute) |
87
88
  | **`archive-unused-images`** | Finds and archives unreferenced images | `archive-unused-images` (preview)<br>`archive-unused-images --archive` (execute) |
@@ -46,9 +46,10 @@ pip install -e .
46
46
 
47
47
  | Tool | Description | Usage |
48
48
  |------|-------------|-------|
49
+ | **`validate-links`** [EXPERIMENTAL] | Validates all links in documentation, with URL transposition for preview environments | `validate-links --transpose "https://prod--https://preview"` |
49
50
  | **`extract-link-attributes`** | Extracts link/xref macros with attributes into reusable definitions | `extract-link-attributes --dry-run` |
50
51
  | **`replace-link-attributes`** | Resolves Vale LinkAttribute issues by replacing attributes in link URLs | `replace-link-attributes --dry-run` |
51
- | **`format-asciidoc-spacing`** | Standardizes spacing after headings and around includes | `format-asciidoc-spacing --dry-run modules/` |
52
+ | **`format-asciidoc-spacing`** [EXPERIMENTAL] | Standardizes spacing after headings and around includes | `format-asciidoc-spacing --dry-run modules/` |
52
53
  | **`check-scannability`** | Analyzes readability (sentence/paragraph length) | `check-scannability --max-words 25` |
53
54
  | **`archive-unused-files`** | Finds and archives unreferenced .adoc files | `archive-unused-files` (preview)<br>`archive-unused-files --archive` (execute) |
54
55
  | **`archive-unused-images`** | Finds and archives unreferenced images | `archive-unused-images` (preview)<br>`archive-unused-images --archive` (execute) |
@@ -11,6 +11,7 @@ import argparse
11
11
  from doc_utils.unused_adoc import find_unused_adoc
12
12
  from doc_utils.file_utils import parse_exclude_list_file
13
13
 
14
+ from doc_utils.spinner import Spinner
14
15
  def main():
15
16
  parser = argparse.ArgumentParser(
16
17
  description='Archive unused AsciiDoc files.',
@@ -10,6 +10,7 @@ import argparse
10
10
  from doc_utils.unused_images import find_unused_images
11
11
  from doc_utils.file_utils import parse_exclude_list_file
12
12
 
13
+ from doc_utils.spinner import Spinner
13
14
  def main():
14
15
  parser = argparse.ArgumentParser(description='Archive unused image files.')
15
16
  parser.add_argument('--archive', action='store_true', help='Move the files to a dated zip in the archive directory.')
@@ -19,6 +19,7 @@ from datetime import datetime
19
19
  from doc_utils.scannability import check_scannability
20
20
  from doc_utils.file_utils import collect_files, parse_exclude_list_file
21
21
 
22
+ from doc_utils.spinner import Spinner
22
23
  BASE_SENTENCE_WORD_LIMIT = 22
23
24
  BASE_PARAGRAPH_SENTENCE_LIMIT = 3
24
25
 
@@ -10,6 +10,9 @@ from typing import Dict, List, Set, Tuple, Optional
10
10
  from collections import defaultdict
11
11
  import unicodedata
12
12
 
13
+ from .spinner import Spinner
14
+ from .validate_links import LinkValidator
15
+
13
16
 
14
17
  def find_attribute_files(base_path: str = '.') -> List[str]:
15
18
  """Find potential attribute files in the repository."""
@@ -381,10 +384,73 @@ def prepare_file_updates(url_groups: Dict[str, List[Tuple[str, str, str, int]]],
381
384
  return dict(file_updates)
382
385
 
383
386
 
387
+ def validate_link_attributes(attributes_file: str, fail_on_broken: bool = False) -> bool:
388
+ """
389
+ Validate URLs in link-* attributes.
390
+
391
+ Returns: True if validation passes (no broken links or fail_on_broken is False), False otherwise
392
+ """
393
+ if not os.path.exists(attributes_file):
394
+ return True # No file to validate yet
395
+
396
+ print(f"\nValidating links in {attributes_file}...")
397
+ spinner = Spinner("Validating link attributes")
398
+ spinner.start()
399
+
400
+ # Extract link attributes from file
401
+ link_attributes = {}
402
+ with open(attributes_file, 'r', encoding='utf-8') as f:
403
+ for line_num, line in enumerate(f, 1):
404
+ # Match :link-*: URL patterns
405
+ match = re.match(r'^:(link-[a-zA-Z0-9_-]+):\s*(https?://[^\s]+)', line)
406
+ if match:
407
+ attr_name = match.group(1)
408
+ url = match.group(2).strip()
409
+ link_attributes[attr_name] = (url, line_num)
410
+
411
+ if not link_attributes:
412
+ spinner.stop("No link attributes to validate")
413
+ return True
414
+
415
+ # Validate each URL
416
+ validator = LinkValidator(timeout=10, retry=2, parallel=5)
417
+ broken_links = []
418
+
419
+ for attr_name, (url, line_num) in link_attributes.items():
420
+ try:
421
+ is_valid = validator.validate_url(url)
422
+ if not is_valid:
423
+ broken_links.append((attr_name, url, line_num))
424
+ except Exception as e:
425
+ broken_links.append((attr_name, url, line_num))
426
+
427
+ # Report results
428
+ total = len(link_attributes)
429
+ broken = len(broken_links)
430
+ valid = total - broken
431
+
432
+ spinner.stop(f"Validated {total} link attributes: {valid} valid, {broken} broken")
433
+
434
+ if broken_links:
435
+ print("\n⚠️ Broken link attributes found:")
436
+ for attr_name, url, line_num in broken_links:
437
+ print(f" Line {line_num}: :{attr_name}: {url}")
438
+
439
+ if fail_on_broken:
440
+ print("\nStopping extraction due to broken links (--fail-on-broken)")
441
+ return False
442
+ else:
443
+ print("\nContinuing with extraction despite broken links...")
444
+
445
+ return True
446
+
447
+
384
448
  def extract_link_attributes(attributes_file: str = None,
385
449
  scan_dirs: List[str] = None,
386
450
  interactive: bool = True,
387
- dry_run: bool = False) -> bool:
451
+ dry_run: bool = False,
452
+ validate_links: bool = False,
453
+ fail_on_broken: bool = False) -> bool:
388
454
  """
389
455
  Main function to extract link attributes.
390
456
 
@@ -410,13 +476,22 @@ def extract_link_attributes(attributes_file: str = None,
410
476
  if not attributes_file:
411
477
  return False
412
478
 
479
+ # Validate existing link attributes if requested
480
+ if validate_links:
481
+ if not validate_link_attributes(attributes_file, fail_on_broken):
482
+ return False
483
+
413
484
  # Load existing attributes
485
+ spinner = Spinner("Loading existing attributes")
486
+ spinner.start()
414
487
  existing_attrs = load_existing_attributes(attributes_file)
415
- print(f"Loaded {len(existing_attrs)} existing attributes")
488
+ spinner.stop(f"Loaded {len(existing_attrs)} existing attributes")
416
489
 
417
490
  # Collect all macros
418
- print("\nScanning for link and xref macros with attributes...")
491
+ spinner = Spinner("Scanning for link and xref macros with attributes")
492
+ spinner.start()
419
493
  all_macros = collect_all_macros(scan_dirs)
494
+ spinner.stop()
420
495
 
421
496
  if not all_macros:
422
497
  print("No link or xref macros with attributes found.")
@@ -425,8 +500,10 @@ def extract_link_attributes(attributes_file: str = None,
425
500
  print(f"Found {len(all_macros)} link/xref macros with attributes")
426
501
 
427
502
  # Group by URL
503
+ spinner = Spinner("Grouping macros by URL")
504
+ spinner.start()
428
505
  url_groups = group_macros_by_url(all_macros)
429
- print(f"Grouped into {len(url_groups)} unique URLs")
506
+ spinner.stop(f"Grouped into {len(url_groups)} unique URLs")
430
507
 
431
508
  # Create new attributes
432
509
  new_attributes = create_attributes(url_groups, existing_attrs, interactive)
@@ -435,6 +512,37 @@ def extract_link_attributes(attributes_file: str = None,
435
512
  print("No new attributes to create.")
436
513
  return True
437
514
 
515
+ # Validate new attributes before writing if requested
516
+ if validate_links and not dry_run:
517
+ print("\nValidating new link attributes...")
518
+ spinner = Spinner("Validating new URLs")
519
+ spinner.start()
520
+
521
+ validator = LinkValidator(timeout=10, retry=2, parallel=5)
522
+ broken_new = []
523
+
524
+ for attr_name, attr_value in new_attributes.items():
525
+ # Extract URL from attribute value (could be link: or xref:)
526
+ url_match = re.search(r'(https?://[^\[]+)', attr_value)
527
+ if url_match:
528
+ url = url_match.group(1).strip()
529
+ try:
530
+ if not validator.validate_url(url):
531
+ broken_new.append((attr_name, url))
532
+ except Exception:
533
+ broken_new.append((attr_name, url))
534
+
535
+ spinner.stop(f"Validated {len(new_attributes)} new attributes")
536
+
537
+ if broken_new:
538
+ print("\n⚠️ Broken URLs in new attributes:")
539
+ for attr_name, url in broken_new:
540
+ print(f" :{attr_name}: {url}")
541
+
542
+ if fail_on_broken:
543
+ print("\nStopping due to broken URLs in new attributes (--fail-on-broken)")
544
+ return False
545
+
438
546
  # Update attribute file
439
547
  update_attribute_file(attributes_file, new_attributes, dry_run)
440
548
 
@@ -443,7 +551,11 @@ def extract_link_attributes(attributes_file: str = None,
443
551
  file_updates = prepare_file_updates(url_groups, all_attributes)
444
552
 
445
553
  # Replace macros
446
- replace_macros_with_attributes(file_updates, dry_run)
554
+ if file_updates:
555
+ spinner = Spinner(f"Updating {len(file_updates)} files")
556
+ spinner.start()
557
+ replace_macros_with_attributes(file_updates, dry_run)
558
+ spinner.stop(f"Updated {len(file_updates)} files")
447
559
 
448
560
  if dry_run:
449
561
  print("\n[DRY RUN] No files were modified. Run without --dry-run to apply changes.")
@@ -0,0 +1,119 @@
1
+ """
2
+ Spinner utility for showing progress during long-running operations.
3
+
4
+ This module provides a simple spinner that can be used by all doc-utils tools
5
+ to indicate that processing is in progress.
6
+ """
7
+
8
+ import sys
9
+ import time
10
+ import threading
11
+ from typing import Optional
12
+
13
+
14
+ class Spinner:
15
+ """A simple spinner to show progress during long operations."""
16
+
17
+ FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']
18
+
19
+ def __init__(self, message: str = "Processing"):
20
+ """
21
+ Initialize the spinner with a message.
22
+
23
+ Args:
24
+ message: The message to display alongside the spinner
25
+ """
26
+ self.message = message
27
+ self.spinning = False
28
+ self.thread: Optional[threading.Thread] = None
29
+ self.frame_index = 0
30
+
31
+ def _spin(self):
32
+ """Internal method that runs in a separate thread to animate the spinner."""
33
+ while self.spinning:
34
+ frame = self.FRAMES[self.frame_index % len(self.FRAMES)]
35
+ sys.stdout.write(f'\r{frame} {self.message}...')
36
+ sys.stdout.flush()
37
+ self.frame_index += 1
38
+ time.sleep(0.1)
39
+
40
+ def start(self):
41
+ """Start the spinner animation."""
42
+ if not self.spinning:
43
+ self.spinning = True
44
+ self.thread = threading.Thread(target=self._spin)
45
+ self.thread.daemon = True
46
+ self.thread.start()
47
+
48
+ def stop(self, final_message: Optional[str] = None, success: bool = True):
49
+ """
50
+ Stop the spinner animation.
51
+
52
+ Args:
53
+ final_message: Optional message to display after stopping
54
+ success: Whether the operation was successful (affects the symbol shown)
55
+ """
56
+ if self.spinning:
57
+ self.spinning = False
58
+ if self.thread:
59
+ self.thread.join()
60
+
61
+ # Clear the spinner line completely
62
+ sys.stdout.write('\r' + ' ' * 80 + '\r')
63
+
64
+ # Write final message if provided
65
+ if final_message:
66
+ symbol = '✓' if success else '✗'
67
+ sys.stdout.write(f'{symbol} {final_message}\n')
68
+
69
+ sys.stdout.flush()
70
+
71
+ def __enter__(self):
72
+ """Context manager entry - start the spinner."""
73
+ self.start()
74
+ return self
75
+
76
+ def __exit__(self, exc_type, exc_val, exc_tb):
77
+ """Context manager exit - stop the spinner."""
78
+ success = exc_type is None
79
+ self.stop(success=success)
80
+ return False
81
+
82
+
83
+ def with_spinner(message: str = "Processing"):
84
+ """
85
+ Decorator to add a spinner to a function.
86
+
87
+ Usage:
88
+ @with_spinner("Loading data")
89
+ def load_data():
90
+ # ... long running operation
91
+ return data
92
+ """
93
+ def decorator(func):
94
+ def wrapper(*args, **kwargs):
95
+ spinner = Spinner(message)
96
+ spinner.start()
97
+ try:
98
+ result = func(*args, **kwargs)
99
+ spinner.stop(success=True)
100
+ return result
101
+ except Exception as e:
102
+ spinner.stop(success=False)
103
+ raise e
104
+ return wrapper
105
+ return decorator
106
+
107
+
108
+ # Convenience functions for common operations
109
+ def show_progress(message: str = "Processing", total: Optional[int] = None):
110
+ """
111
+ Show progress with optional item count.
112
+
113
+ Args:
114
+ message: The base message to display
115
+ total: Optional total number of items being processed
116
+ """
117
+ if total:
118
+ return Spinner(f"{message} ({total} items)")
119
+ return Spinner(message)
@@ -0,0 +1,138 @@
1
+ """
2
+ Module for finding unused AsciiDoc attributes.
3
+
4
+ Functions:
5
+ - parse_attributes_file: Parse attribute names from an attributes.adoc file.
6
+ - find_adoc_files: Recursively find all .adoc files in a directory (ignoring symlinks).
7
+ - scan_for_attribute_usage: Find which attributes are used in a set of .adoc files.
8
+ - find_unused_attributes: Main function to return unused attributes.
9
+ - find_attributes_files: Find all potential attributes files in the repository.
10
+ """
11
+
12
+ import os
13
+ import re
14
+ from pathlib import Path
15
+ from typing import Set, List, Optional
16
+
17
+ def parse_attributes_file(attr_file: str) -> Set[str]:
18
+ attributes = set()
19
+
20
+ # Check if file exists
21
+ if not os.path.exists(attr_file):
22
+ raise FileNotFoundError(f"Attributes file not found: {attr_file}")
23
+
24
+ # Check if it's a file (not a directory)
25
+ if not os.path.isfile(attr_file):
26
+ raise ValueError(f"Path is not a file: {attr_file}")
27
+
28
+ try:
29
+ with open(attr_file, 'r', encoding='utf-8') as f:
30
+ for line in f:
31
+ match = re.match(r'^:([\w-]+):', line.strip())
32
+ if match:
33
+ attributes.add(match.group(1))
34
+ except PermissionError:
35
+ raise PermissionError(f"Permission denied reading file: {attr_file}")
36
+ except UnicodeDecodeError as e:
37
+ raise ValueError(f"Unable to read file (encoding issue): {attr_file}\n{str(e)}")
38
+
39
+ return attributes
40
+
41
+ def find_adoc_files(root_dir: str) -> List[str]:
42
+ adoc_files = []
43
+ for dirpath, dirnames, filenames in os.walk(root_dir, followlinks=False):
44
+ for fname in filenames:
45
+ if fname.endswith('.adoc'):
46
+ full_path = os.path.join(dirpath, fname)
47
+ if not os.path.islink(full_path):
48
+ adoc_files.append(full_path)
49
+ return adoc_files
50
+
51
+ def scan_for_attribute_usage(adoc_files: List[str], attributes: Set[str]) -> Set[str]:
52
+ used = set()
53
+ attr_pattern = re.compile(r'\{([\w-]+)\}')
54
+ for file in adoc_files:
55
+ with open(file, 'r', encoding='utf-8') as f:
56
+ for line in f:
57
+ for match in attr_pattern.findall(line):
58
+ if match in attributes:
59
+ used.add(match)
60
+ return used
61
+
62
+ def find_attributes_files(root_dir: str = '.') -> List[str]:
63
+ """Find all attributes.adoc files in the repository."""
64
+ attributes_files = []
65
+ root_path = Path(root_dir)
66
+
67
+ # Common attribute file patterns
68
+ patterns = ['**/attributes.adoc', '**/attributes*.adoc', '**/*attributes.adoc', '**/*-attributes.adoc']
69
+
70
+ for pattern in patterns:
71
+ for path in root_path.glob(pattern):
72
+ # Skip hidden directories and common build directories
73
+ parts = path.parts
74
+ if any(part.startswith('.') or part in ['target', 'build', 'node_modules', '.archive'] for part in parts):
75
+ continue
76
+ # Convert to string and avoid duplicates
77
+ str_path = str(path)
78
+ if str_path not in attributes_files:
79
+ attributes_files.append(str_path)
80
+
81
+ # Sort for consistent ordering
82
+ attributes_files.sort()
83
+ return attributes_files
84
+
85
+
86
+ def select_attributes_file(attributes_files: List[str]) -> Optional[str]:
87
+ """Interactive selection of attributes file from a list."""
88
+ if not attributes_files:
89
+ return None
90
+
91
+ if len(attributes_files) == 1:
92
+ print(f"Found attributes file: {attributes_files[0]}")
93
+ response = input("Use this file? (y/n): ").strip().lower()
94
+ if response == 'y':
95
+ return attributes_files[0]
96
+ else:
97
+ response = input("Enter the path to your attributes file: ").strip()
98
+ if os.path.exists(response) and os.path.isfile(response):
99
+ return response
100
+ else:
101
+ print(f"Error: File not found: {response}")
102
+ return None
103
+
104
+ # Multiple files found
105
+ print("\nFound multiple attributes files:")
106
+ for i, file_path in enumerate(attributes_files, 1):
107
+ print(f" {i}. {file_path}")
108
+ print(f" {len(attributes_files) + 1}. Enter custom path")
109
+
110
+ while True:
111
+ response = input(f"\nSelect option (1-{len(attributes_files) + 1}) or 'q' to quit: ").strip()
112
+ if response.lower() == 'q':
113
+ return None
114
+
115
+ try:
116
+ choice = int(response)
117
+ if 1 <= choice <= len(attributes_files):
118
+ return attributes_files[choice - 1]
119
+ elif choice == len(attributes_files) + 1:
120
+ response = input("Enter the path to your attributes file: ").strip()
121
+ if os.path.exists(response) and os.path.isfile(response):
122
+ return response
123
+ else:
124
+ print(f"Error: File not found: {response}")
125
+ else:
126
+ print(f"Invalid choice. Please enter a number between 1 and {len(attributes_files) + 1}")
127
+ except ValueError:
128
+ print("Invalid input. Please enter a number.")
129
+
130
+ return None
131
+
132
+
133
+ def find_unused_attributes(attr_file: str, adoc_root: str = '.') -> List[str]:
134
+ attributes = parse_attributes_file(attr_file)
135
+ adoc_files = find_adoc_files(adoc_root)
136
+ used = scan_for_attribute_usage(adoc_files, attributes)
137
+ unused = sorted(attributes - used)
138
+ return unused