PyPI - rolfedh-doc-utils - Versions diffs - 0.1.10__tar.gz → 0.1.12__tar.gz - Mend

rolfedh-doc-utils 0.1.10tar.gz → 0.1.12tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

{rolfedh_doc_utils-0.1.10/rolfedh_doc_utils.egg-info → rolfedh_doc_utils-0.1.12}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rolfedh-doc-utils
-Version: 0.1.10
+Version: 0.1.12
 Summary: CLI tools for AsciiDoc documentation projects
 Author: Rolfe Dlugy-Hegwer
 License: MIT License
@@ -79,9 +79,10 @@ pip install -e .
 | Tool | Description | Usage |
 |------|-------------|-------|
+| **`validate-links`** [EXPERIMENTAL] | Validates all links in documentation, with URL transposition for preview environments | `validate-links --transpose "https://prod--https://preview"` |
 | **`extract-link-attributes`** | Extracts link/xref macros with attributes into reusable definitions | `extract-link-attributes --dry-run` |
 | **`replace-link-attributes`** | Resolves Vale LinkAttribute issues by replacing attributes in link URLs | `replace-link-attributes --dry-run` |
-| **`format-asciidoc-spacing`** | Standardizes spacing after headings and around includes | `format-asciidoc-spacing --dry-run modules/` |
+| **`format-asciidoc-spacing`** [EXPERIMENTAL] | Standardizes spacing after headings and around includes | `format-asciidoc-spacing --dry-run modules/` |
 | **`check-scannability`** | Analyzes readability (sentence/paragraph length) | `check-scannability --max-words 25` |
 | **`archive-unused-files`** | Finds and archives unreferenced .adoc files | `archive-unused-files` (preview)<br>`archive-unused-files --archive` (execute) |
 | **`archive-unused-images`** | Finds and archives unreferenced images | `archive-unused-images` (preview)<br>`archive-unused-images --archive` (execute) |

{rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/README.md RENAMED Viewed

@@ -46,9 +46,10 @@ pip install -e .
 | Tool | Description | Usage |
 |------|-------------|-------|
+| **`validate-links`** [EXPERIMENTAL] | Validates all links in documentation, with URL transposition for preview environments | `validate-links --transpose "https://prod--https://preview"` |
 | **`extract-link-attributes`** | Extracts link/xref macros with attributes into reusable definitions | `extract-link-attributes --dry-run` |
 | **`replace-link-attributes`** | Resolves Vale LinkAttribute issues by replacing attributes in link URLs | `replace-link-attributes --dry-run` |
-| **`format-asciidoc-spacing`** | Standardizes spacing after headings and around includes | `format-asciidoc-spacing --dry-run modules/` |
+| **`format-asciidoc-spacing`** [EXPERIMENTAL] | Standardizes spacing after headings and around includes | `format-asciidoc-spacing --dry-run modules/` |
 | **`check-scannability`** | Analyzes readability (sentence/paragraph length) | `check-scannability --max-words 25` |
 | **`archive-unused-files`** | Finds and archives unreferenced .adoc files | `archive-unused-files` (preview)<br>`archive-unused-files --archive` (execute) |
 | **`archive-unused-images`** | Finds and archives unreferenced images | `archive-unused-images` (preview)<br>`archive-unused-images --archive` (execute) |

{rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/archive_unused_files.py RENAMED Viewed

@@ -11,6 +11,7 @@ import argparse
 from doc_utils.unused_adoc import find_unused_adoc
 from doc_utils.file_utils import parse_exclude_list_file
+from doc_utils.spinner import Spinner
 def main():
     parser = argparse.ArgumentParser(
         description='Archive unused AsciiDoc files.',

{rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/archive_unused_images.py RENAMED Viewed

@@ -10,6 +10,7 @@ import argparse
 from doc_utils.unused_images import find_unused_images
 from doc_utils.file_utils import parse_exclude_list_file
+from doc_utils.spinner import Spinner
 def main():
     parser = argparse.ArgumentParser(description='Archive unused image files.')
     parser.add_argument('--archive', action='store_true', help='Move the files to a dated zip in the archive directory.')

{rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/check_scannability.py RENAMED Viewed

@@ -19,6 +19,7 @@ from datetime import datetime
 from doc_utils.scannability import check_scannability
 from doc_utils.file_utils import collect_files, parse_exclude_list_file
+from doc_utils.spinner import Spinner
 BASE_SENTENCE_WORD_LIMIT = 22
 BASE_PARAGRAPH_SENTENCE_LIMIT = 3

{rolfedh_doc_utils-0.1.10 → rolfedh_doc_utils-0.1.12}/doc_utils/extract_link_attributes.py RENAMED Viewed

@@ -10,6 +10,9 @@ from typing import Dict, List, Set, Tuple, Optional
 from collections import defaultdict
 import unicodedata
+from .spinner import Spinner
+from .validate_links import LinkValidator
 def find_attribute_files(base_path: str = '.') -> List[str]:
     """Find potential attribute files in the repository."""
@@ -381,10 +384,73 @@ def prepare_file_updates(url_groups: Dict[str, List[Tuple[str, str, str, int]]],
     return dict(file_updates)
+def validate_link_attributes(attributes_file: str, fail_on_broken: bool = False) -> bool:
+    """
+    Validate URLs in link-* attributes.
+    Returns: True if validation passes (no broken links or fail_on_broken is False), False otherwise
+    """
+    if not os.path.exists(attributes_file):
+        return True  # No file to validate yet
+    print(f"\nValidating links in {attributes_file}...")
+    spinner = Spinner("Validating link attributes")
+    spinner.start()
+    # Extract link attributes from file
+    link_attributes = {}
+    with open(attributes_file, 'r', encoding='utf-8') as f:
+        for line_num, line in enumerate(f, 1):
+            # Match :link-*: URL patterns
+            match = re.match(r'^:(link-[a-zA-Z0-9_-]+):\s*(https?://[^\s]+)', line)
+            if match:
+                attr_name = match.group(1)
+                url = match.group(2).strip()
+                link_attributes[attr_name] = (url, line_num)
+    if not link_attributes:
+        spinner.stop("No link attributes to validate")
+        return True
+    # Validate each URL
+    validator = LinkValidator(timeout=10, retry=2, parallel=5)
+    broken_links = []
+    for attr_name, (url, line_num) in link_attributes.items():
+        try:
+            is_valid = validator.validate_url(url)
+            if not is_valid:
+                broken_links.append((attr_name, url, line_num))
+        except Exception as e:
+            broken_links.append((attr_name, url, line_num))
+    # Report results
+    total = len(link_attributes)
+    broken = len(broken_links)
+    valid = total - broken
+    spinner.stop(f"Validated {total} link attributes: {valid} valid, {broken} broken")
+    if broken_links:
+        print("\n⚠️  Broken link attributes found:")
+        for attr_name, url, line_num in broken_links:
+            print(f"  Line {line_num}: :{attr_name}: {url}")
+        if fail_on_broken:
+            print("\nStopping extraction due to broken links (--fail-on-broken)")
+            return False
+        else:
+            print("\nContinuing with extraction despite broken links...")
+    return True
 def extract_link_attributes(attributes_file: str = None,
                            scan_dirs: List[str] = None,
                            interactive: bool = True,
-                           dry_run: bool = False) -> bool:
+                           dry_run: bool = False,
+                           validate_links: bool = False,
+                           fail_on_broken: bool = False) -> bool:
     """
     Main function to extract link attributes.
@@ -410,13 +476,22 @@ def extract_link_attributes(attributes_file: str = None,
             if not attributes_file:
                 return False
+    # Validate existing link attributes if requested
+    if validate_links:
+        if not validate_link_attributes(attributes_file, fail_on_broken):
+            return False
     # Load existing attributes
+    spinner = Spinner("Loading existing attributes")
+    spinner.start()
     existing_attrs = load_existing_attributes(attributes_file)
-    print(f"Loaded {len(existing_attrs)} existing attributes")
+    spinner.stop(f"Loaded {len(existing_attrs)} existing attributes")
     # Collect all macros
-    print("\nScanning for link and xref macros with attributes...")
+    spinner = Spinner("Scanning for link and xref macros with attributes")
+    spinner.start()
     all_macros = collect_all_macros(scan_dirs)
+    spinner.stop()
     if not all_macros:
         print("No link or xref macros with attributes found.")
@@ -425,8 +500,10 @@ def extract_link_attributes(attributes_file: str = None,
     print(f"Found {len(all_macros)} link/xref macros with attributes")
     # Group by URL
+    spinner = Spinner("Grouping macros by URL")
+    spinner.start()
     url_groups = group_macros_by_url(all_macros)
-    print(f"Grouped into {len(url_groups)} unique URLs")
+    spinner.stop(f"Grouped into {len(url_groups)} unique URLs")
     # Create new attributes
     new_attributes = create_attributes(url_groups, existing_attrs, interactive)
@@ -435,6 +512,37 @@ def extract_link_attributes(attributes_file: str = None,
         print("No new attributes to create.")
         return True
+    # Validate new attributes before writing if requested
+    if validate_links and not dry_run:
+        print("\nValidating new link attributes...")
+        spinner = Spinner("Validating new URLs")
+        spinner.start()
+        validator = LinkValidator(timeout=10, retry=2, parallel=5)
+        broken_new = []
+        for attr_name, attr_value in new_attributes.items():
+            # Extract URL from attribute value (could be link: or xref:)
+            url_match = re.search(r'(https?://[^\[]+)', attr_value)
+            if url_match:
+                url = url_match.group(1).strip()
+                try:
+                    if not validator.validate_url(url):
+                        broken_new.append((attr_name, url))
+                except Exception:
+                    broken_new.append((attr_name, url))
+        spinner.stop(f"Validated {len(new_attributes)} new attributes")
+        if broken_new:
+            print("\n⚠️  Broken URLs in new attributes:")
+            for attr_name, url in broken_new:
+                print(f"  :{attr_name}: {url}")
+            if fail_on_broken:
+                print("\nStopping due to broken URLs in new attributes (--fail-on-broken)")
+                return False
     # Update attribute file
     update_attribute_file(attributes_file, new_attributes, dry_run)
@@ -443,7 +551,11 @@ def extract_link_attributes(attributes_file: str = None,
     file_updates = prepare_file_updates(url_groups, all_attributes)
     # Replace macros
-    replace_macros_with_attributes(file_updates, dry_run)
+    if file_updates:
+        spinner = Spinner(f"Updating {len(file_updates)} files")
+        spinner.start()
+        replace_macros_with_attributes(file_updates, dry_run)
+        spinner.stop(f"Updated {len(file_updates)} files")
     if dry_run:
         print("\n[DRY RUN] No files were modified. Run without --dry-run to apply changes.")

rolfedh_doc_utils-0.1.12/doc_utils/spinner.py ADDED Viewed

@@ -0,0 +1,119 @@
+"""
+Spinner utility for showing progress during long-running operations.
+This module provides a simple spinner that can be used by all doc-utils tools
+to indicate that processing is in progress.
+"""
+import sys
+import time
+import threading
+from typing import Optional
+class Spinner:
+    """A simple spinner to show progress during long operations."""
+    FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']
+    def __init__(self, message: str = "Processing"):
+        """
+        Initialize the spinner with a message.
+        Args:
+            message: The message to display alongside the spinner
+        """
+        self.message = message
+        self.spinning = False
+        self.thread: Optional[threading.Thread] = None
+        self.frame_index = 0
+    def _spin(self):
+        """Internal method that runs in a separate thread to animate the spinner."""
+        while self.spinning:
+            frame = self.FRAMES[self.frame_index % len(self.FRAMES)]
+            sys.stdout.write(f'\r{frame} {self.message}...')
+            sys.stdout.flush()
+            self.frame_index += 1
+            time.sleep(0.1)
+    def start(self):
+        """Start the spinner animation."""
+        if not self.spinning:
+            self.spinning = True
+            self.thread = threading.Thread(target=self._spin)
+            self.thread.daemon = True
+            self.thread.start()
+    def stop(self, final_message: Optional[str] = None, success: bool = True):
+        """
+        Stop the spinner animation.
+        Args:
+            final_message: Optional message to display after stopping
+            success: Whether the operation was successful (affects the symbol shown)
+        """
+        if self.spinning:
+            self.spinning = False
+            if self.thread:
+                self.thread.join()
+            # Clear the spinner line completely
+            sys.stdout.write('\r' + ' ' * 80 + '\r')
+            # Write final message if provided
+            if final_message:
+                symbol = '✓' if success else '✗'
+                sys.stdout.write(f'{symbol} {final_message}\n')
+            sys.stdout.flush()
+    def __enter__(self):
+        """Context manager entry - start the spinner."""
+        self.start()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit - stop the spinner."""
+        success = exc_type is None
+        self.stop(success=success)
+        return False
+def with_spinner(message: str = "Processing"):
+    """
+    Decorator to add a spinner to a function.
+    Usage:
+        @with_spinner("Loading data")
+        def load_data():
+            # ... long running operation
+            return data
+    """
+    def decorator(func):
+        def wrapper(*args, **kwargs):
+            spinner = Spinner(message)
+            spinner.start()
+            try:
+                result = func(*args, **kwargs)
+                spinner.stop(success=True)
+                return result
+            except Exception as e:
+                spinner.stop(success=False)
+                raise e
+        return wrapper
+    return decorator
+# Convenience functions for common operations
+def show_progress(message: str = "Processing", total: Optional[int] = None):
+    """
+    Show progress with optional item count.
+    Args:
+        message: The base message to display
+        total: Optional total number of items being processed
+    """
+    if total:
+        return Spinner(f"{message} ({total} items)")
+    return Spinner(message)

rolfedh_doc_utils-0.1.12/doc_utils/unused_attributes.py ADDED Viewed

@@ -0,0 +1,138 @@
+"""
+Module for finding unused AsciiDoc attributes.
+Functions:
+- parse_attributes_file: Parse attribute names from an attributes.adoc file.
+- find_adoc_files: Recursively find all .adoc files in a directory (ignoring symlinks).
+- scan_for_attribute_usage: Find which attributes are used in a set of .adoc files.
+- find_unused_attributes: Main function to return unused attributes.
+- find_attributes_files: Find all potential attributes files in the repository.
+"""
+import os
+import re
+from pathlib import Path
+from typing import Set, List, Optional
+def parse_attributes_file(attr_file: str) -> Set[str]:
+    attributes = set()
+    # Check if file exists
+    if not os.path.exists(attr_file):
+        raise FileNotFoundError(f"Attributes file not found: {attr_file}")
+    # Check if it's a file (not a directory)
+    if not os.path.isfile(attr_file):
+        raise ValueError(f"Path is not a file: {attr_file}")
+    try:
+        with open(attr_file, 'r', encoding='utf-8') as f:
+            for line in f:
+                match = re.match(r'^:([\w-]+):', line.strip())
+                if match:
+                    attributes.add(match.group(1))
+    except PermissionError:
+        raise PermissionError(f"Permission denied reading file: {attr_file}")
+    except UnicodeDecodeError as e:
+        raise ValueError(f"Unable to read file (encoding issue): {attr_file}\n{str(e)}")
+    return attributes
+def find_adoc_files(root_dir: str) -> List[str]:
+    adoc_files = []
+    for dirpath, dirnames, filenames in os.walk(root_dir, followlinks=False):
+        for fname in filenames:
+            if fname.endswith('.adoc'):
+                full_path = os.path.join(dirpath, fname)
+                if not os.path.islink(full_path):
+                    adoc_files.append(full_path)
+    return adoc_files
+def scan_for_attribute_usage(adoc_files: List[str], attributes: Set[str]) -> Set[str]:
+    used = set()
+    attr_pattern = re.compile(r'\{([\w-]+)\}')
+    for file in adoc_files:
+        with open(file, 'r', encoding='utf-8') as f:
+            for line in f:
+                for match in attr_pattern.findall(line):
+                    if match in attributes:
+                        used.add(match)
+    return used
+def find_attributes_files(root_dir: str = '.') -> List[str]:
+    """Find all attributes.adoc files in the repository."""
+    attributes_files = []
+    root_path = Path(root_dir)
+    # Common attribute file patterns
+    patterns = ['**/attributes.adoc', '**/attributes*.adoc', '**/*attributes.adoc', '**/*-attributes.adoc']
+    for pattern in patterns:
+        for path in root_path.glob(pattern):
+            # Skip hidden directories and common build directories
+            parts = path.parts
+            if any(part.startswith('.') or part in ['target', 'build', 'node_modules', '.archive'] for part in parts):
+                continue
+            # Convert to string and avoid duplicates
+            str_path = str(path)
+            if str_path not in attributes_files:
+                attributes_files.append(str_path)
+    # Sort for consistent ordering
+    attributes_files.sort()
+    return attributes_files
+def select_attributes_file(attributes_files: List[str]) -> Optional[str]:
+    """Interactive selection of attributes file from a list."""
+    if not attributes_files:
+        return None
+    if len(attributes_files) == 1:
+        print(f"Found attributes file: {attributes_files[0]}")
+        response = input("Use this file? (y/n): ").strip().lower()
+        if response == 'y':
+            return attributes_files[0]
+        else:
+            response = input("Enter the path to your attributes file: ").strip()
+            if os.path.exists(response) and os.path.isfile(response):
+                return response
+            else:
+                print(f"Error: File not found: {response}")
+                return None
+    # Multiple files found
+    print("\nFound multiple attributes files:")
+    for i, file_path in enumerate(attributes_files, 1):
+        print(f"  {i}. {file_path}")
+    print(f"  {len(attributes_files) + 1}. Enter custom path")
+    while True:
+        response = input(f"\nSelect option (1-{len(attributes_files) + 1}) or 'q' to quit: ").strip()
+        if response.lower() == 'q':
+            return None
+        try:
+            choice = int(response)
+            if 1 <= choice <= len(attributes_files):
+                return attributes_files[choice - 1]
+            elif choice == len(attributes_files) + 1:
+                response = input("Enter the path to your attributes file: ").strip()
+                if os.path.exists(response) and os.path.isfile(response):
+                    return response
+                else:
+                    print(f"Error: File not found: {response}")
+            else:
+                print(f"Invalid choice. Please enter a number between 1 and {len(attributes_files) + 1}")
+        except ValueError:
+            print("Invalid input. Please enter a number.")
+    return None
+def find_unused_attributes(attr_file: str, adoc_root: str = '.') -> List[str]:
+    attributes = parse_attributes_file(attr_file)
+    adoc_files = find_adoc_files(adoc_root)
+    used = scan_for_attribute_usage(adoc_files, attributes)
+    unused = sorted(attributes - used)
+    return unused

rolfedh-doc-utils 0.1.10__tar.gz → 0.1.12__tar.gz

rolfedh-doc-utils 0.1.10tar.gz → 0.1.12tar.gz