PyPI - rolfedh-doc-utils - Versions diffs - 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl - Mend

rolfedh-doc-utils 0.1.8py3-none-any.whl → 0.1.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

doc_utils/extract_link_attributes.py ADDED Viewed

@@ -0,0 +1,453 @@
+#!/usr/bin/env python3
+"""
+Extract link and xref macros containing attributes into attribute definitions.
+"""
+import os
+import re
+from pathlib import Path
+from typing import Dict, List, Set, Tuple, Optional
+from collections import defaultdict
+import unicodedata
+def find_attribute_files(base_path: str = '.') -> List[str]:
+    """Find potential attribute files in the repository."""
+    common_patterns = [
+        '**/common-attributes.adoc',
+        '**/attributes.adoc',
+        '**/*-attributes.adoc',
+        '**/attributes-*.adoc',
+        '**/common_attributes.adoc',
+        '**/_common-attributes.adoc'
+    ]
+    attribute_files = []
+    base = Path(base_path)
+    for pattern in common_patterns:
+        for file_path in base.glob(pattern):
+            if file_path.is_file():
+                rel_path = file_path.relative_to(base)
+                attribute_files.append(str(rel_path))
+    # Remove duplicates while preserving order
+    seen = set()
+    unique_files = []
+    for f in attribute_files:
+        if f not in seen:
+            seen.add(f)
+            unique_files.append(f)
+    return sorted(unique_files)
+def select_attribute_file(attribute_files: List[str]) -> str:
+    """Let user interactively select an attribute file."""
+    if not attribute_files:
+        return None
+    print("\nMultiple attribute files found. Please select one:")
+    for i, file_path in enumerate(attribute_files, 1):
+        print(f"  {i}. {file_path}")
+    while True:
+        try:
+            choice = input(f"\nEnter your choice (1-{len(attribute_files)}): ").strip()
+            index = int(choice) - 1
+            if 0 <= index < len(attribute_files):
+                return attribute_files[index]
+            else:
+                print(f"Please enter a number between 1 and {len(attribute_files)}")
+        except (ValueError, EOFError, KeyboardInterrupt):
+            print("\nOperation cancelled.")
+            return None
+def load_existing_attributes(file_path: str) -> Dict[str, str]:
+    """Load existing attributes from file."""
+    attributes = {}
+    if not os.path.exists(file_path):
+        return attributes
+    with open(file_path, 'r', encoding='utf-8') as f:
+        for line in f:
+            # Match attribute definitions
+            match = re.match(r'^:([^:]+):\s*(.+)$', line)
+            if match:
+                attr_name = match.group(1).strip()
+                attr_value = match.group(2).strip()
+                attributes[attr_name] = attr_value
+    return attributes
+def find_link_macros(file_path: str) -> List[Tuple[str, str, str, int]]:
+    """
+    Find all link: and xref: macros containing attributes in their URLs.
+    Returns list of tuples: (full_macro, url, link_text, line_number)
+    """
+    macros = []
+    with open(file_path, 'r', encoding='utf-8') as f:
+        for line_num, line in enumerate(f, 1):
+            # Pattern to match link: and xref: macros
+            # Matches: (link|xref):url[text] where url contains {attribute}
+            patterns = [
+                r'(link:([^[\]]*\{[^}]+\}[^[\]]*)\[([^\]]*)\])',
+                r'(xref:([^[\]]*\{[^}]+\}[^[\]]*)\[([^\]]*)\])'
+            ]
+            for pattern in patterns:
+                for match in re.finditer(pattern, line, re.IGNORECASE):
+                    full_macro = match.group(1)
+                    url = match.group(2)
+                    link_text = match.group(3)
+                    macros.append((full_macro, url, link_text, line_num))
+    return macros
+def generate_attribute_name(url: str, existing_attrs: Set[str], counter: int) -> str:
+    """Generate a unique attribute name from URL."""
+    # Start with a base name from the URL
+    base_name = url
+    # Extract domain or path components
+    if '://' in url:
+        # Remove protocol
+        base_name = re.sub(r'^[^:]+://', '', url)
+    # Remove attributes from the name generation
+    base_name = re.sub(r'\{[^}]+\}', '', base_name)
+    # Extract meaningful parts
+    if '/' in base_name:
+        parts = base_name.split('/')
+        # Use domain and last path component
+        if len(parts) > 1:
+            domain_part = parts[0].replace('.', '-')
+            path_part = parts[-1].split('.')[0] if parts[-1] else ''
+            if path_part:
+                base_name = f"{domain_part}-{path_part}"
+            else:
+                base_name = domain_part
+    # Clean up the name
+    base_name = re.sub(r'[^a-zA-Z0-9-]', '-', base_name)
+    base_name = re.sub(r'-+', '-', base_name)
+    base_name = base_name.strip('-').lower()
+    # Limit length
+    if len(base_name) > 30:
+        base_name = base_name[:30]
+    # Make it unique
+    attr_name = f"link-{base_name}"
+    original_name = attr_name
+    suffix = 1
+    while attr_name in existing_attrs:
+        attr_name = f"{original_name}-{suffix}"
+        suffix += 1
+    return attr_name
+def group_macros_by_url(macros: List[Tuple[str, str, str, str, int]]) -> Dict[str, List[Tuple[str, str, str, int]]]:
+    """
+    Group macros by URL, collecting all link text variations.
+    Returns: Dict[url, List[(file_path, link_text, full_macro, line_number)]]
+    """
+    url_groups = defaultdict(list)
+    for file_path, full_macro, url, link_text, line_num in macros:
+        url_groups[url].append((file_path, link_text, full_macro, line_num))
+    return url_groups
+def select_link_text(url: str, variations: List[Tuple[str, str, str, int]], interactive: bool = True) -> str:
+    """
+    Select link text for a URL with multiple variations.
+    variations: List[(file_path, link_text, full_macro, line_number)]
+    """
+    # Extract unique link texts
+    unique_texts = {}
+    for file_path, link_text, _, line_num in variations:
+        if link_text not in unique_texts:
+            unique_texts[link_text] = []
+        unique_texts[link_text].append(f"{file_path}:{line_num}")
+    if len(unique_texts) == 1:
+        # Only one variation, use it
+        return list(unique_texts.keys())[0]
+    if not interactive:
+        # Use most common (appears in most locations)
+        most_common = max(unique_texts.items(), key=lambda x: len(x[1]))
+        return most_common[0]
+    # Interactive selection
+    print(f"\nMultiple link text variations found for URL: {url}")
+    print("Please select the preferred text:")
+    text_list = list(unique_texts.items())
+    for i, (text, locations) in enumerate(text_list, 1):
+        print(f"\n  {i}. \"{text}\"")
+        print(f"     Used in: {', '.join(locations[:3])}")
+        if len(locations) > 3:
+            print(f"     ... and {len(locations) - 3} more locations")
+    print(f"\n  {len(text_list) + 1}. Enter custom text")
+    while True:
+        try:
+            choice = input(f"\nEnter your choice (1-{len(text_list) + 1}): ").strip()
+            index = int(choice) - 1
+            if 0 <= index < len(text_list):
+                return text_list[index][0]
+            elif index == len(text_list):
+                custom_text = input("Enter custom link text: ").strip()
+                if custom_text:
+                    return custom_text
+                else:
+                    print("Text cannot be empty. Please try again.")
+            else:
+                print(f"Please enter a number between 1 and {len(text_list) + 1}")
+        except (ValueError, EOFError, KeyboardInterrupt):
+            print("\nUsing most common text variation.")
+            most_common = max(unique_texts.items(), key=lambda x: len(x[1]))
+            return most_common[0]
+def collect_all_macros(scan_dirs: List[str] = None) -> List[Tuple[str, str, str, str, int]]:
+    """
+    Collect all link/xref macros with attributes from all .adoc files.
+    Returns: List[(file_path, full_macro, url, link_text, line_number)]
+    """
+    if scan_dirs is None:
+        scan_dirs = ['.']
+    all_macros = []
+    for scan_dir in scan_dirs:
+        for root, _, files in os.walk(scan_dir):
+            # Skip hidden directories and .archive
+            if '/.archive' in root or '/.' in root:
+                continue
+            for file in files:
+                if file.endswith('.adoc'):
+                    file_path = os.path.join(root, file)
+                    macros = find_link_macros(file_path)
+                    for full_macro, url, link_text, line_num in macros:
+                        all_macros.append((file_path, full_macro, url, link_text, line_num))
+    return all_macros
+def create_attributes(url_groups: Dict[str, List[Tuple[str, str, str, int]]],
+                     existing_attrs: Dict[str, str],
+                     interactive: bool = True) -> Dict[str, str]:
+    """
+    Create new attributes for each unique URL.
+    Returns: Dict[attribute_name, attribute_value]
+    """
+    new_attributes = {}
+    existing_attr_names = set(existing_attrs.keys())
+    counter = 1
+    for url, variations in url_groups.items():
+        # Check if this URL already has an attribute
+        existing_attr = None
+        for attr_name, attr_value in existing_attrs.items():
+            if url in attr_value:
+                existing_attr = attr_name
+                break
+        if existing_attr:
+            print(f"URL already has attribute {{{existing_attr}}}: {url}")
+            continue
+        # Select link text
+        link_text = select_link_text(url, variations, interactive)
+        # Generate attribute name
+        attr_name = generate_attribute_name(url, existing_attr_names | set(new_attributes.keys()), counter)
+        counter += 1
+        # Determine macro type (link or xref)
+        first_macro = variations[0][2]  # full_macro from first variation
+        macro_type = 'xref' if first_macro.startswith('xref:') else 'link'
+        # Create attribute value
+        attr_value = f"{macro_type}:{url}[{link_text}]"
+        new_attributes[attr_name] = attr_value
+        print(f"Created attribute: :{attr_name}: {attr_value}")
+    return new_attributes
+def update_attribute_file(file_path: str, new_attributes: Dict[str, str], dry_run: bool = False):
+    """Add new attributes to the attribute file."""
+    if not new_attributes:
+        print("No new attributes to add.")
+        return
+    if dry_run:
+        print(f"\n[DRY RUN] Would add {len(new_attributes)} attributes to {file_path}:")
+        for attr_name, attr_value in new_attributes.items():
+            print(f"  :{attr_name}: {attr_value}")
+        return
+    # Ensure directory exists
+    os.makedirs(os.path.dirname(file_path) if os.path.dirname(file_path) else '.', exist_ok=True)
+    # Append new attributes
+    with open(file_path, 'a', encoding='utf-8') as f:
+        if os.path.getsize(file_path) > 0:
+            f.write('\n')  # Add newline if file not empty
+        f.write('// Extracted link attributes\n')
+        for attr_name, attr_value in sorted(new_attributes.items()):
+            f.write(f":{attr_name}: {attr_value}\n")
+    print(f"Added {len(new_attributes)} attributes to {file_path}")
+def replace_macros_with_attributes(file_updates: Dict[str, List[Tuple[str, str]]], dry_run: bool = False):
+    """
+    Replace link/xref macros with their attribute references.
+    file_updates: Dict[file_path, List[(old_macro, attribute_ref)]]
+    """
+    for file_path, replacements in file_updates.items():
+        if dry_run:
+            print(f"\n[DRY RUN] Would update {file_path}:")
+            for old_macro, attr_ref in replacements[:3]:
+                print(f"  Replace: {old_macro}")
+                print(f"     With: {attr_ref}")
+            if len(replacements) > 3:
+                print(f"  ... and {len(replacements) - 3} more replacements")
+            continue
+        # Read file
+        with open(file_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+        # Apply replacements
+        for old_macro, attr_ref in replacements:
+            content = content.replace(old_macro, attr_ref)
+        # Write file
+        with open(file_path, 'w', encoding='utf-8') as f:
+            f.write(content)
+        print(f"Updated {file_path}: {len(replacements)} replacements")
+def prepare_file_updates(url_groups: Dict[str, List[Tuple[str, str, str, int]]],
+                        attribute_mapping: Dict[str, str]) -> Dict[str, List[Tuple[str, str]]]:
+    """
+    Prepare file updates mapping macros to attribute references.
+    Returns: Dict[file_path, List[(old_macro, attribute_ref)]]
+    """
+    file_updates = defaultdict(list)
+    # Create reverse mapping from URL to attribute name
+    url_to_attr = {}
+    for attr_name, attr_value in attribute_mapping.items():
+        # Extract URL from attribute value
+        match = re.match(r'(?:link|xref):([^\[]+)\[', attr_value)
+        if match:
+            url = match.group(1)
+            url_to_attr[url] = attr_name
+    # Map each macro occurrence to its attribute
+    for url, variations in url_groups.items():
+        if url in url_to_attr:
+            attr_name = url_to_attr[url]
+            for file_path, _, full_macro, _ in variations:
+                file_updates[file_path].append((full_macro, f"{{{attr_name}}}"))
+    return dict(file_updates)
+def extract_link_attributes(attributes_file: str = None,
+                           scan_dirs: List[str] = None,
+                           interactive: bool = True,
+                           dry_run: bool = False) -> bool:
+    """
+    Main function to extract link attributes.
+    Returns: True if successful, False otherwise
+    """
+    # Find or confirm attributes file
+    if not attributes_file:
+        found_files = find_attribute_files()
+        if not found_files:
+            print("No attribute files found.")
+            response = input("Create common-attributes.adoc? (y/n): ").strip().lower()
+            if response == 'y':
+                attributes_file = 'common-attributes.adoc'
+            else:
+                print("Please specify an attribute file with --attributes-file")
+                return False
+        elif len(found_files) == 1:
+            attributes_file = found_files[0]
+            print(f"Using attribute file: {attributes_file}")
+        else:
+            attributes_file = select_attribute_file(found_files)
+            if not attributes_file:
+                return False
+    # Load existing attributes
+    existing_attrs = load_existing_attributes(attributes_file)
+    print(f"Loaded {len(existing_attrs)} existing attributes")
+    # Collect all macros
+    print("\nScanning for link and xref macros with attributes...")
+    all_macros = collect_all_macros(scan_dirs)
+    if not all_macros:
+        print("No link or xref macros with attributes found.")
+        return True
+    print(f"Found {len(all_macros)} link/xref macros with attributes")
+    # Group by URL
+    url_groups = group_macros_by_url(all_macros)
+    print(f"Grouped into {len(url_groups)} unique URLs")
+    # Create new attributes
+    new_attributes = create_attributes(url_groups, existing_attrs, interactive)
+    if not new_attributes:
+        print("No new attributes to create.")
+        return True
+    # Update attribute file
+    update_attribute_file(attributes_file, new_attributes, dry_run)
+    # Prepare file updates
+    all_attributes = {**existing_attrs, **new_attributes}
+    file_updates = prepare_file_updates(url_groups, all_attributes)
+    # Replace macros
+    replace_macros_with_attributes(file_updates, dry_run)
+    if dry_run:
+        print("\n[DRY RUN] No files were modified. Run without --dry-run to apply changes.")
+    else:
+        print(f"\nSuccessfully extracted {len(new_attributes)} link attributes")
+    return True

doc_utils/format_asciidoc_spacing.py ADDED Viewed

@@ -0,0 +1,222 @@
+"""
+Format AsciiDoc spacing - ensures blank lines after headings and around include directives.
+Core logic for formatting AsciiDoc files with proper spacing.
+"""
+import re
+from pathlib import Path
+from typing import List, Tuple
+def process_file(file_path: Path, dry_run: bool = False, verbose: bool = False) -> Tuple[bool, List[str]]:
+    """
+    Process a single AsciiDoc file to fix spacing issues.
+    Args:
+        file_path: Path to the file to process
+        dry_run: If True, show what would be changed without modifying
+        verbose: If True, show detailed output
+    Returns:
+        Tuple of (changes_made, messages) where messages is a list of verbose output
+    """
+    messages = []
+    if verbose:
+        messages.append(f"Processing: {file_path}")
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            lines = f.readlines()
+    except (IOError, UnicodeDecodeError) as e:
+        raise IOError(f"Error reading {file_path}: {e}")
+    # Remove trailing newlines from lines for processing
+    lines = [line.rstrip('\n\r') for line in lines]
+    new_lines = []
+    changes_made = False
+    in_block = False  # Track if we're inside a block (admonition, listing, etc.)
+    in_conditional = False  # Track if we're inside a conditional block
+    for i, current_line in enumerate(lines):
+        prev_line = lines[i-1] if i > 0 else ""
+        next_line = lines[i+1] if i + 1 < len(lines) else ""
+        # Check for conditional start (ifdef:: or ifndef::)
+        if re.match(r'^(ifdef::|ifndef::)', current_line):
+            in_conditional = True
+            # Add blank line before conditional if needed
+            if (prev_line and
+                not re.match(r'^\s*$', prev_line) and
+                not re.match(r'^(ifdef::|ifndef::|endif::)', prev_line)):
+                new_lines.append("")
+                changes_made = True
+                if verbose:
+                    messages.append("  Added blank line before conditional block")
+            new_lines.append(current_line)
+        # Check for conditional end (endif::)
+        elif re.match(r'^endif::', current_line):
+            new_lines.append(current_line)
+            in_conditional = False
+            # Add blank line after conditional if needed
+            if (next_line and
+                not re.match(r'^\s*$', next_line) and
+                not re.match(r'^(ifdef::|ifndef::|endif::)', next_line)):
+                new_lines.append("")
+                changes_made = True
+                if verbose:
+                    messages.append("  Added blank line after conditional block")
+        # Check for block delimiters (====, ----, ...., ____)
+        # These are used for admonitions, listing blocks, literal blocks, etc.
+        elif re.match(r'^(====+|----+|\.\.\.\.+|____+)$', current_line):
+            in_block = not in_block  # Toggle block state
+            new_lines.append(current_line)
+        # Check if current line is a heading (but not if we're in a block)
+        elif not in_block and re.match(r'^=+\s+', current_line):
+            new_lines.append(current_line)
+            # Check if next line is not empty and not another heading
+            if (next_line and
+                not re.match(r'^=+\s+', next_line) and
+                not re.match(r'^\s*$', next_line)):
+                new_lines.append("")
+                changes_made = True
+                if verbose:
+                    truncated = current_line[:50] + "..." if len(current_line) > 50 else current_line
+                    messages.append(f"  Added blank line after heading: {truncated}")
+        # Check if current line is a comment (AsciiDoc comments start with //)
+        elif re.match(r'^//', current_line):
+            # Skip special handling if we're inside a conditional block
+            if in_conditional:
+                new_lines.append(current_line)
+            else:
+                # Check if next line is an include directive
+                if next_line and re.match(r'^include::', next_line):
+                    # This comment belongs to the include, add blank line before comment if needed
+                    if (prev_line and
+                        not re.match(r'^\s*$', prev_line) and
+                        not re.match(r'^//', prev_line) and
+                        not re.match(r'^:', prev_line)):  # Don't add if previous is attribute
+                        new_lines.append("")
+                        changes_made = True
+                        if verbose:
+                            messages.append("  Added blank line before comment above include")
+                new_lines.append(current_line)
+        # Check if current line is an attribute (starts with :)
+        elif re.match(r'^:', current_line):
+            # Skip special handling if we're inside a conditional block
+            if in_conditional:
+                new_lines.append(current_line)
+            else:
+                # Check if next line is an include directive
+                if next_line and re.match(r'^include::', next_line):
+                    # This attribute belongs to the include, add blank line before attribute if needed
+                    if (prev_line and
+                        not re.match(r'^\s*$', prev_line) and
+                        not re.match(r'^//', prev_line) and
+                        not re.match(r'^:', prev_line)):  # Don't add if previous is comment or attribute
+                        new_lines.append("")
+                        changes_made = True
+                        if verbose:
+                            messages.append("  Added blank line before attribute above include")
+                new_lines.append(current_line)
+        # Check if current line is an include directive
+        elif re.match(r'^include::', current_line):
+            # Skip special handling if we're inside a conditional block
+            if in_conditional:
+                new_lines.append(current_line)
+            else:
+                # Check if this is an attribute include (contains "attribute" in the path)
+                is_attribute_include = 'attribute' in current_line.lower()
+                # Check if this appears near the top of the file (within first 10 lines after H1)
+                # Find the H1 heading position
+                h1_position = -1
+                for j in range(min(i, 10)):  # Look back up to 10 lines or to current position
+                    if re.match(r'^=\s+', lines[j]):  # H1 heading starts with single =
+                        h1_position = j
+                        break
+                # If this is an attribute include near the H1 heading, don't add surrounding blank lines
+                is_near_h1 = h1_position >= 0 and (i - h1_position) <= 2
+                # Check if previous line is a comment or attribute (which belongs to this include)
+                has_comment_above = prev_line and re.match(r'^//', prev_line)
+                has_attribute_above = prev_line and re.match(r'^:', prev_line)
+                # If it's an attribute include near H1, only the heading's blank line is needed
+                if not (is_attribute_include and is_near_h1):
+                    # Don't add blank line if there's a comment or attribute above (it was handled by the comment/attribute logic)
+                    if not has_comment_above and not has_attribute_above:
+                        # Add blank line before include if previous line is not empty and not an include
+                        if (prev_line and
+                            not re.match(r'^\s*$', prev_line) and
+                            not re.match(r'^include::', prev_line)):
+                            new_lines.append("")
+                            changes_made = True
+                            if verbose:
+                                truncated = current_line[:50] + "..." if len(current_line) > 50 else current_line
+                                messages.append(f"  Added blank line before include: {truncated}")
+                new_lines.append(current_line)
+                # If it's an attribute include near H1, don't add blank line after
+                if not (is_attribute_include and is_near_h1):
+                    # Add blank line after include if next line exists and is not empty and not an include
+                    if (next_line and
+                        not re.match(r'^\s*$', next_line) and
+                        not re.match(r'^include::', next_line)):
+                        new_lines.append("")
+                        changes_made = True
+                        if verbose:
+                            truncated = current_line[:50] + "..." if len(current_line) > 50 else current_line
+                            messages.append(f"  Added blank line after include: {truncated}")
+        else:
+            new_lines.append(current_line)
+    # Apply changes if any were made
+    if changes_made:
+        # Clean up any consecutive blank lines we may have added
+        cleaned_lines = []
+        for i, line in enumerate(new_lines):
+            # Check if this is a blank line we're about to add
+            if line == "":
+                # Check if the previous line is also a blank line
+                if i > 0 and cleaned_lines and cleaned_lines[-1] == "":
+                    # Skip this blank line as we already have one
+                    continue
+            cleaned_lines.append(line)
+        if not dry_run:
+            try:
+                with open(file_path, 'w', encoding='utf-8') as f:
+                    for line in cleaned_lines:
+                        f.write(line + '\n')
+            except IOError as e:
+                raise IOError(f"Error writing {file_path}: {e}")
+    else:
+        if verbose:
+            messages.append("  No changes needed")
+    return changes_made, messages
+def find_adoc_files(path: Path) -> List[Path]:
+    """Find all .adoc files in the given path"""
+    adoc_files = []
+    if path.is_file():
+        if path.suffix == '.adoc':
+            adoc_files.append(path)
+    elif path.is_dir():
+        adoc_files = list(path.rglob('*.adoc'))
+    return adoc_files

rolfedh-doc-utils 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

rolfedh-doc-utils 0.1.8py3-none-any.whl → 0.1.10py3-none-any.whl