PyPI - rolfedh-doc-utils - Versions diffs - 0.1.4__py3-none-any.whl → 0.1.41__py3-none-any.whl - Mend

rolfedh-doc-utils 0.1.4py3-none-any.whl → 0.1.41py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

archive_unused_files.py +18 -5
archive_unused_images.py +9 -2
callout_lib/__init__.py +22 -0
callout_lib/converter_bullets.py +103 -0
callout_lib/converter_comments.py +295 -0
callout_lib/converter_deflist.py +134 -0
callout_lib/detector.py +364 -0
callout_lib/table_parser.py +804 -0
check_published_links.py +1083 -0
check_scannability.py +6 -0
check_source_directives.py +101 -0
convert_callouts_interactive.py +567 -0
convert_callouts_to_deflist.py +628 -0
convert_freemarker_to_asciidoc.py +288 -0
convert_tables_to_deflists.py +479 -0
doc_utils/convert_freemarker_to_asciidoc.py +708 -0
doc_utils/duplicate_content.py +409 -0
doc_utils/duplicate_includes.py +347 -0
doc_utils/extract_link_attributes.py +618 -0
doc_utils/format_asciidoc_spacing.py +285 -0
doc_utils/insert_abstract_role.py +220 -0
doc_utils/inventory_conditionals.py +164 -0
doc_utils/missing_source_directive.py +211 -0
doc_utils/replace_link_attributes.py +187 -0
doc_utils/spinner.py +119 -0
doc_utils/unused_adoc.py +150 -22
doc_utils/unused_attributes.py +218 -6
doc_utils/unused_images.py +81 -9
doc_utils/validate_links.py +576 -0
doc_utils/version.py +8 -0
doc_utils/version_check.py +243 -0
doc_utils/warnings_report.py +237 -0
doc_utils_cli.py +158 -0
extract_link_attributes.py +120 -0
find_duplicate_content.py +209 -0
find_duplicate_includes.py +198 -0
find_unused_attributes.py +84 -6
format_asciidoc_spacing.py +134 -0
insert_abstract_role.py +163 -0
inventory_conditionals.py +53 -0
replace_link_attributes.py +214 -0
rolfedh_doc_utils-0.1.41.dist-info/METADATA +246 -0
rolfedh_doc_utils-0.1.41.dist-info/RECORD +52 -0
{rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/WHEEL +1 -1
rolfedh_doc_utils-0.1.41.dist-info/entry_points.txt +20 -0
rolfedh_doc_utils-0.1.41.dist-info/top_level.txt +21 -0
validate_links.py +213 -0
rolfedh_doc_utils-0.1.4.dist-info/METADATA +0 -285
rolfedh_doc_utils-0.1.4.dist-info/RECORD +0 -17
rolfedh_doc_utils-0.1.4.dist-info/entry_points.txt +0 -5
rolfedh_doc_utils-0.1.4.dist-info/top_level.txt +0 -5
{rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/licenses/LICENSE +0 -0

doc_utils/missing_source_directive.py ADDED Viewed

@@ -0,0 +1,211 @@
+# doc_utils/missing_source_directive.py
+"""
+Detects code blocks (----) that are missing [source] directive on the preceding line.
+This module provides functionality to scan AsciiDoc files for code blocks that lack
+proper source directives, which can cause issues with AsciiDoc-to-DocBook XML conversion.
+"""
+import os
+import re
+def is_code_block_start(line):
+    """Check if line is a code block delimiter (4 or more dashes)"""
+    return re.match(r'^-{4,}$', line.strip())
+def has_source_directive(line):
+    """Check if line contains [source] directive"""
+    # Match [source], [source,lang], [source, lang], etc.
+    return re.match(r'^\[source[\s,]', line.strip())
+def is_empty_or_whitespace(line):
+    """Check if line is empty or contains only whitespace"""
+    return len(line.strip()) == 0
+def scan_file(filepath):
+    """
+    Scan a single AsciiDoc file for missing [source] directives.
+    Args:
+        filepath: Path to the AsciiDoc file to scan
+    Returns:
+        List of issue dictionaries containing line_num, prev_line_num, and prev_line
+    """
+    issues = []
+    try:
+        with open(filepath, 'r', encoding='utf-8') as f:
+            lines = f.readlines()
+        in_code_block = False
+        for i, line in enumerate(lines, start=1):
+            # Check if current line is a code block delimiter
+            if is_code_block_start(line):
+                if not in_code_block:
+                    # This is the START of a code block
+                    prev_line_num = i - 1
+                    prev_line = lines[prev_line_num - 1].rstrip() if prev_line_num > 0 else ""
+                    # Check if [source] exists in previous lines (within last 3 lines)
+                    # This handles cases where there's a title between [source] and ----
+                    has_source_in_context = False
+                    for lookback in range(1, min(4, i)):
+                        check_line = lines[i - lookback - 1].strip()
+                        if has_source_directive(check_line):
+                            has_source_in_context = True
+                            break
+                        # Stop looking if we hit an empty line or structural element
+                        if not check_line or check_line.startswith(('=', '----')):
+                            break
+                    # Only flag if:
+                    # 1. No [source] directive in recent context
+                    # 2. Previous line is not empty (which might be valid formatting)
+                    if (not has_source_in_context and
+                        not is_empty_or_whitespace(prev_line)):
+                        # Additional heuristic: check if previous line looks like it should have [source]
+                        # Skip if previous line is a title, comment, or other structural element
+                        prev_stripped = prev_line.strip()
+                        # Skip common valid patterns
+                        if prev_stripped.startswith(('=', '//', 'NOTE:', 'TIP:', 'WARNING:', 'IMPORTANT:', 'CAUTION:')):
+                            in_code_block = True
+                            continue
+                        # Skip if previous line is already an attribute block (but not [source])
+                        if prev_stripped.startswith('[') and prev_stripped.endswith(']'):
+                            # It's some other attribute like [id], [role], etc., might be intentional
+                            in_code_block = True
+                            continue
+                        # Skip if previous line is just a plus sign (continuation)
+                        if prev_stripped == '+':
+                            in_code_block = True
+                            continue
+                        # Skip if previous line is a block title (starts with .)
+                        if prev_stripped.startswith('.') and len(prev_stripped) > 1:
+                            # This might be a title for a source block that's defined earlier
+                            # Check if there's a [source] before the title
+                            if i >= 3:
+                                two_lines_back = lines[i - 3].strip()
+                                if has_source_directive(two_lines_back):
+                                    in_code_block = True
+                                    continue
+                        issues.append({
+                            'line_num': i,
+                            'prev_line_num': prev_line_num,
+                            'prev_line': prev_line[:80]  # Truncate for display
+                        })
+                    in_code_block = True
+                else:
+                    # This is the END of a code block
+                    in_code_block = False
+    except Exception as e:
+        raise IOError(f"Error reading {filepath}: {e}")
+    return issues
+def fix_file(filepath, issues):
+    """
+    Insert [source] directives for missing code blocks.
+    Args:
+        filepath: Path to the AsciiDoc file to fix
+        issues: List of issue dictionaries from scan_file()
+    Returns:
+        True if successful, False otherwise
+    """
+    try:
+        with open(filepath, 'r', encoding='utf-8') as f:
+            lines = f.readlines()
+        # Sort issues by line number in reverse order so we can insert from bottom to top
+        # This prevents line number shifts from affecting subsequent insertions
+        sorted_issues = sorted(issues, key=lambda x: x['line_num'], reverse=True)
+        for issue in sorted_issues:
+            line_num = issue['line_num']
+            # Insert [source] directive before the ---- line (at line_num - 1, which is index line_num - 1)
+            insert_index = line_num - 1
+            lines.insert(insert_index, '[source]\n')
+        # Write the modified content back to the file
+        with open(filepath, 'w', encoding='utf-8') as f:
+            f.writelines(lines)
+        return True
+    except Exception as e:
+        raise IOError(f"Error fixing {filepath}: {e}")
+def find_missing_source_directives(scan_dir='.', auto_fix=False):
+    """
+    Scan directory for AsciiDoc files with missing [source] directives.
+    Args:
+        scan_dir: Directory to scan (default: current directory)
+        auto_fix: If True, automatically insert [source] directives
+    Returns:
+        Dictionary with statistics:
+        - total_issues: Total number of issues found
+        - files_with_issues: Number of files with issues
+        - files_fixed: Number of files successfully fixed (if auto_fix=True)
+        - file_details: List of dictionaries with file paths and their issues
+    """
+    if not os.path.isdir(scan_dir):
+        raise ValueError(f"Directory '{scan_dir}' does not exist")
+    total_issues = 0
+    files_with_issues = 0
+    files_fixed = 0
+    file_details = []
+    # Find all .adoc files (excluding symbolic links)
+    adoc_files = []
+    for root, dirs, files in os.walk(scan_dir):
+        for filename in files:
+            if filename.endswith('.adoc'):
+                filepath = os.path.join(root, filename)
+                # Skip symbolic links
+                if not os.path.islink(filepath):
+                    adoc_files.append(filepath)
+    for filepath in sorted(adoc_files):
+        issues = scan_file(filepath)
+        if issues:
+            files_with_issues += 1
+            total_issues += len(issues)
+            file_info = {
+                'filepath': filepath,
+                'issues': issues,
+                'fixed': False
+            }
+            if auto_fix:
+                try:
+                    if fix_file(filepath, issues):
+                        files_fixed += 1
+                        file_info['fixed'] = True
+                except Exception as e:
+                    file_info['error'] = str(e)
+            file_details.append(file_info)
+    return {
+        'total_issues': total_issues,
+        'files_with_issues': files_with_issues,
+        'files_fixed': files_fixed,
+        'file_details': file_details
+    }

doc_utils/replace_link_attributes.py ADDED Viewed

@@ -0,0 +1,187 @@
+"""
+Replace AsciiDoc attributes within link URLs with their actual values.
+This module finds and replaces attribute references (like {attribute-name}) that appear
+in the URL portion of AsciiDoc link macros (link: and xref:) with their resolved values
+from attributes.adoc. Link text is preserved unchanged.
+"""
+import re
+from pathlib import Path
+from typing import Dict, List, Tuple, Optional
+def find_attributes_files(root_dir: Path) -> List[Path]:
+    """Find all attributes.adoc files in the repository."""
+    attributes_files = []
+    for path in root_dir.rglob('**/attributes.adoc'):
+        # Skip hidden directories and common build directories
+        parts = path.parts
+        if any(part.startswith('.') or part in ['target', 'build', 'node_modules'] for part in parts):
+            continue
+        attributes_files.append(path)
+    return attributes_files
+def load_attributes(attributes_file: Path) -> Dict[str, str]:
+    """Load attribute definitions from an attributes.adoc file."""
+    attributes = {}
+    with open(attributes_file, 'r', encoding='utf-8') as f:
+        for line in f:
+            # Match attribute definitions
+            # Format: :attribute-name: value
+            match = re.match(r'^:([a-zA-Z0-9_-]+):\s*(.*)$', line)
+            if match:
+                attr_name = match.group(1)
+                attr_value = match.group(2).strip()
+                attributes[attr_name] = attr_value
+    return attributes
+def resolve_nested_attributes(attributes: Dict[str, str], max_iterations: int = 10) -> Dict[str, str]:
+    """Resolve nested attribute references within attribute values."""
+    for _ in range(max_iterations):
+        changes_made = False
+        for attr_name, attr_value in attributes.items():
+            # Find all attribute references in the value
+            refs = re.findall(r'\{([a-zA-Z0-9_-]+)\}', attr_value)
+            for ref in refs:
+                if ref in attributes:
+                    new_value = attr_value.replace(f'{{{ref}}}', attributes[ref])
+                    if new_value != attr_value:
+                        attributes[attr_name] = new_value
+                        changes_made = True
+                        attr_value = new_value
+        if not changes_made:
+            break
+    return attributes
+def replace_link_attributes_in_file(file_path: Path, attributes: Dict[str, str], dry_run: bool = False, macro_type: str = 'both') -> int:
+    """
+    Replace attribute references within link macros in a single file.
+    Args:
+        file_path: Path to the file to process
+        attributes: Dictionary of attribute definitions
+        dry_run: Preview changes without modifying files
+        macro_type: Type of macros to process - 'link', 'xref', or 'both' (default: 'both')
+    Returns: Number of replacements made
+    """
+    with open(file_path, 'r', encoding='utf-8') as f:
+        content = f.read()
+    original_content = content
+    replacement_count = 0
+    # Find all link macros containing attributes in the URL portion only
+    # Match link: and xref: macros, capturing URL and text separately
+    link_patterns = []
+    if macro_type in ('link', 'both'):
+        # link:url[text] - replace only in URL portion
+        link_patterns.append((r'link:([^[\]]*)\[([^\]]*)\]', 'link'))
+    if macro_type in ('xref', 'both'):
+        # xref:target[text] - replace only in target portion
+        link_patterns.append((r'xref:([^[\]]*)\[([^\]]*)\]', 'xref'))
+    # Handle empty text cases based on macro type
+    if macro_type == 'both':
+        link_patterns.append((r'(link|xref):([^[\]]*)\[\]', 'empty_text'))
+    elif macro_type == 'link':
+        link_patterns.append((r'(link):([^[\]]*)\[\]', 'empty_text'))
+    elif macro_type == 'xref':
+        link_patterns.append((r'(xref):([^[\]]*)\[\]', 'empty_text'))
+    for pattern, link_type in link_patterns:
+        matches = list(re.finditer(pattern, content))
+        # Process matches in reverse order to maintain string positions
+        for match in reversed(matches):
+            if link_type == 'empty_text':
+                # For links with empty text []
+                macro_type = match.group(1)  # 'link' or 'xref'
+                url_part = match.group(2)
+                text_part = ''
+                # Check if URL contains attributes
+                if re.search(r'\{[a-zA-Z0-9_-]+\}', url_part):
+                    modified_url = url_part
+                    # Replace attributes only in URL
+                    attr_matches = re.findall(r'\{([a-zA-Z0-9_-]+)\}', url_part)
+                    for attr_name in attr_matches:
+                        if attr_name in attributes:
+                            attr_pattern = re.escape(f'{{{attr_name}}}')
+                            modified_url = re.sub(attr_pattern, attributes[attr_name], modified_url)
+                            replacement_count += 1
+                    if modified_url != url_part:
+                        # Reconstruct the link with modified URL
+                        modified = f'{macro_type}:{modified_url}[]'
+                        start = match.start()
+                        end = match.end()
+                        content = content[:start] + modified + content[end:]
+            else:
+                # For links with text
+                url_part = match.group(1)
+                text_part = match.group(2)
+                # Check if URL contains attributes
+                if re.search(r'\{[a-zA-Z0-9_-]+\}', url_part):
+                    modified_url = url_part
+                    # Replace attributes only in URL
+                    attr_matches = re.findall(r'\{([a-zA-Z0-9_-]+)\}', url_part)
+                    for attr_name in attr_matches:
+                        if attr_name in attributes:
+                            attr_pattern = re.escape(f'{{{attr_name}}}')
+                            modified_url = re.sub(attr_pattern, attributes[attr_name], modified_url)
+                            replacement_count += 1
+                    if modified_url != url_part:
+                        # Reconstruct the link with modified URL but original text
+                        if link_type == 'link':
+                            modified = f'link:{modified_url}[{text_part}]'
+                        else:  # xref
+                            modified = f'xref:{modified_url}[{text_part}]'
+                        start = match.start()
+                        end = match.end()
+                        content = content[:start] + modified + content[end:]
+    # Write changes if not in dry-run mode
+    if content != original_content:
+        if not dry_run:
+            with open(file_path, 'w', encoding='utf-8') as f:
+                f.write(content)
+        return replacement_count
+    return 0
+def find_adoc_files(root_dir: Path, exclude_dirs: Optional[set] = None) -> List[Path]:
+    """Find all *.adoc files in the repository."""
+    if exclude_dirs is None:
+        exclude_dirs = {'.git', 'target', 'build', 'node_modules'}
+    adoc_files = []
+    for path in root_dir.rglob('*.adoc'):
+        # Check if any part of the path is in exclude_dirs
+        parts = set(path.parts)
+        if not parts.intersection(exclude_dirs):
+            adoc_files.append(path)
+    return adoc_files

doc_utils/spinner.py ADDED Viewed

@@ -0,0 +1,119 @@
+"""
+Spinner utility for showing progress during long-running operations.
+This module provides a simple spinner that can be used by all doc-utils tools
+to indicate that processing is in progress.
+"""
+import sys
+import time
+import threading
+from typing import Optional
+class Spinner:
+    """A simple spinner to show progress during long operations."""
+    FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']
+    def __init__(self, message: str = "Processing"):
+        """
+        Initialize the spinner with a message.
+        Args:
+            message: The message to display alongside the spinner
+        """
+        self.message = message
+        self.spinning = False
+        self.thread: Optional[threading.Thread] = None
+        self.frame_index = 0
+    def _spin(self):
+        """Internal method that runs in a separate thread to animate the spinner."""
+        while self.spinning:
+            frame = self.FRAMES[self.frame_index % len(self.FRAMES)]
+            sys.stdout.write(f'\r{frame} {self.message}...')
+            sys.stdout.flush()
+            self.frame_index += 1
+            time.sleep(0.1)
+    def start(self):
+        """Start the spinner animation."""
+        if not self.spinning:
+            self.spinning = True
+            self.thread = threading.Thread(target=self._spin)
+            self.thread.daemon = True
+            self.thread.start()
+    def stop(self, final_message: Optional[str] = None, success: bool = True):
+        """
+        Stop the spinner animation.
+        Args:
+            final_message: Optional message to display after stopping
+            success: Whether the operation was successful (affects the symbol shown)
+        """
+        if self.spinning:
+            self.spinning = False
+            if self.thread:
+                self.thread.join()
+            # Clear the spinner line completely
+            sys.stdout.write('\r' + ' ' * 80 + '\r')
+            # Write final message if provided
+            if final_message:
+                symbol = '✓' if success else '✗'
+                sys.stdout.write(f'{symbol} {final_message}\n')
+            sys.stdout.flush()
+    def __enter__(self):
+        """Context manager entry - start the spinner."""
+        self.start()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit - stop the spinner."""
+        success = exc_type is None
+        self.stop(success=success)
+        return False
+def with_spinner(message: str = "Processing"):
+    """
+    Decorator to add a spinner to a function.
+    Usage:
+        @with_spinner("Loading data")
+        def load_data():
+            # ... long running operation
+            return data
+    """
+    def decorator(func):
+        def wrapper(*args, **kwargs):
+            spinner = Spinner(message)
+            spinner.start()
+            try:
+                result = func(*args, **kwargs)
+                spinner.stop(success=True)
+                return result
+            except Exception as e:
+                spinner.stop(success=False)
+                raise e
+        return wrapper
+    return decorator
+# Convenience functions for common operations
+def show_progress(message: str = "Processing", total: Optional[int] = None):
+    """
+    Show progress with optional item count.
+    Args:
+        message: The base message to display
+        total: Optional total number of items being processed
+    """
+    if total:
+        return Spinner(f"{message} ({total} items)")
+    return Spinner(message)

rolfedh-doc-utils 0.1.4__py3-none-any.whl → 0.1.41__py3-none-any.whl

rolfedh-doc-utils 0.1.4py3-none-any.whl → 0.1.41py3-none-any.whl