PyPI - rolfedh-doc-utils - Versions diffs - 0.1.38__py3-none-any.whl → 0.1.40__py3-none-any.whl - Mend

rolfedh-doc-utils 0.1.38py3-none-any.whl → 0.1.40py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

convert_freemarker_to_asciidoc.py +288 -0
doc_utils/convert_freemarker_to_asciidoc.py +708 -0
doc_utils/duplicate_content.py +409 -0
doc_utils/duplicate_includes.py +347 -0
doc_utils/inventory_conditionals.py +164 -0
doc_utils/unused_attributes.py +48 -0
doc_utils/version.py +1 -1
find_duplicate_content.py +209 -0
find_duplicate_includes.py +198 -0
find_unused_attributes.py +16 -1
inventory_conditionals.py +53 -0
{rolfedh_doc_utils-0.1.38.dist-info → rolfedh_doc_utils-0.1.40.dist-info}/METADATA +2 -1
{rolfedh_doc_utils-0.1.38.dist-info → rolfedh_doc_utils-0.1.40.dist-info}/RECORD +17 -9
{rolfedh_doc_utils-0.1.38.dist-info → rolfedh_doc_utils-0.1.40.dist-info}/WHEEL +1 -1
{rolfedh_doc_utils-0.1.38.dist-info → rolfedh_doc_utils-0.1.40.dist-info}/entry_points.txt +4 -0
{rolfedh_doc_utils-0.1.38.dist-info → rolfedh_doc_utils-0.1.40.dist-info}/top_level.txt +4 -0
{rolfedh_doc_utils-0.1.38.dist-info → rolfedh_doc_utils-0.1.40.dist-info}/licenses/LICENSE +0 -0

doc_utils/duplicate_includes.py ADDED Viewed

@@ -0,0 +1,347 @@
+"""
+Core logic for finding AsciiDoc files that are included more than once.
+Scans AsciiDoc files for include:: macros and identifies files that are
+included from multiple locations, which may indicate opportunities for
+content reuse or potential maintenance issues.
+"""
+import os
+import re
+from collections import defaultdict
+from dataclasses import dataclass, field
+from pathlib import Path
+INCLUDE_PATTERN = re.compile(r'^include::([^\[]+)\[', re.MULTILINE)
+# Files commonly expected to be included in multiple places
+DEFAULT_COMMON_INCLUDES = {
+    'attributes.adoc',
+    'common/attributes.adoc',
+    'common/revision-info.adoc',
+    '_attributes.adoc',
+}
+# Default directories to exclude
+DEFAULT_EXCLUDE_DIRS = {'.git', '.archive', 'target', 'build', 'node_modules'}
+@dataclass
+class IncludeLocation:
+    """Represents where an include was found."""
+    source_file: str
+    line_number: int
+    raw_include_path: str
+@dataclass
+class DuplicateInclude:
+    """Represents a file that is included multiple times."""
+    resolved_path: str
+    locations: list[IncludeLocation] = field(default_factory=list)
+    is_common: bool = False
+    @property
+    def count(self) -> int:
+        return len(self.locations)
+def find_includes_in_file(file_path: str) -> list[tuple[str, int]]:
+    """
+    Extract all include:: targets from an AsciiDoc file.
+    Returns list of (include_target, line_number) tuples.
+    """
+    includes = []
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            for line_num, line in enumerate(f, 1):
+                match = re.match(r'^include::([^\[]+)\[', line)
+                if match:
+                    includes.append((match.group(1), line_num))
+    except (IOError, UnicodeDecodeError) as e:
+        print(f"Warning: Could not read {file_path}: {e}")
+    return includes
+def resolve_include_path(include_target: str, source_file: str, base_dir: str) -> str:
+    """
+    Resolve an include target to a normalized path relative to base directory.
+    """
+    source_dir = os.path.dirname(source_file)
+    # Resolve the path relative to source file's directory
+    if include_target.startswith('../') or include_target.startswith('./'):
+        resolved = os.path.normpath(os.path.join(source_dir, include_target))
+    else:
+        resolved = os.path.normpath(os.path.join(source_dir, include_target))
+    # Make relative to base directory if possible
+    try:
+        resolved = os.path.relpath(resolved, base_dir)
+    except ValueError:
+        pass  # Keep absolute path if on different drive (Windows)
+    return resolved
+def is_common_include(path: str, common_includes: set[str]) -> bool:
+    """Check if a path matches a common include pattern."""
+    basename = os.path.basename(path)
+    return path in common_includes or basename in common_includes
+def collect_adoc_files(
+    directory: str,
+    exclude_dirs: set[str] | None = None,
+    exclude_files: set[str] | None = None
+) -> list[str]:
+    """
+    Collect all .adoc files in a directory recursively.
+    Args:
+        directory: Base directory to scan
+        exclude_dirs: Directory names to exclude
+        exclude_files: File names or paths to exclude
+    Returns:
+        List of absolute paths to .adoc files
+    """
+    exclude_dirs = exclude_dirs or DEFAULT_EXCLUDE_DIRS
+    exclude_files = exclude_files or set()
+    adoc_files = []
+    base_path = os.path.abspath(directory)
+    for root, dirs, files in os.walk(base_path, followlinks=False):
+        # Filter out excluded directories
+        dirs[:] = [d for d in dirs if d not in exclude_dirs]
+        for filename in files:
+            if not filename.endswith('.adoc'):
+                continue
+            filepath = os.path.join(root, filename)
+            rel_path = os.path.relpath(filepath, base_path)
+            # Check exclusions
+            if filename in exclude_files or rel_path in exclude_files:
+                continue
+            adoc_files.append(filepath)
+    return sorted(adoc_files)
+def find_duplicate_includes(
+    directory: str,
+    exclude_dirs: set[str] | None = None,
+    exclude_files: set[str] | None = None,
+    include_common: bool = False,
+    common_includes: set[str] | None = None
+) -> tuple[list[DuplicateInclude], int, int]:
+    """
+    Find all files that are included more than once.
+    Args:
+        directory: Base directory to scan
+        exclude_dirs: Directory names to exclude
+        exclude_files: File names or paths to exclude
+        include_common: If True, include common files in results
+        common_includes: Set of paths considered "common" (expected duplicates)
+    Returns:
+        Tuple of (duplicates, total_files_scanned, excluded_common_count)
+    """
+    if common_includes is None:
+        common_includes = DEFAULT_COMMON_INCLUDES
+    # Collect all .adoc files
+    adoc_files = collect_adoc_files(directory, exclude_dirs, exclude_files)
+    base_dir = os.path.abspath(directory)
+    # Track includes: {resolved_path: [IncludeLocation, ...]}
+    include_map: dict[str, list[IncludeLocation]] = defaultdict(list)
+    for source_file in adoc_files:
+        includes = find_includes_in_file(source_file)
+        for include_target, line_num in includes:
+            resolved = resolve_include_path(include_target, source_file, base_dir)
+            rel_source = os.path.relpath(source_file, base_dir)
+            include_map[resolved].append(IncludeLocation(
+                source_file=rel_source,
+                line_number=line_num,
+                raw_include_path=include_target
+            ))
+    # Find duplicates
+    duplicates = []
+    excluded_common_count = 0
+    for path, locations in include_map.items():
+        if len(locations) <= 1:
+            continue
+        is_common = is_common_include(path, common_includes)
+        if is_common and not include_common:
+            excluded_common_count += 1
+            continue
+        duplicates.append(DuplicateInclude(
+            resolved_path=path,
+            locations=locations,
+            is_common=is_common
+        ))
+    # Sort by count descending
+    duplicates.sort(key=lambda d: d.count, reverse=True)
+    return duplicates, len(adoc_files), excluded_common_count
+def format_txt_report(
+    duplicates: list[DuplicateInclude],
+    total_files: int,
+    excluded_common: int,
+    directory: str,
+    cmd_line: str
+) -> str:
+    """Format results as plain text."""
+    lines = []
+    lines.append(f"Command: {cmd_line}")
+    lines.append(f"Directory: {os.path.abspath(directory)}")
+    lines.append(f"Files scanned: {total_files}")
+    lines.append("")
+    if not duplicates:
+        if excluded_common:
+            lines.append(f"No unexpected duplicates found ({excluded_common} common files excluded).")
+            lines.append("Use --include-common to see all duplicates.")
+        else:
+            lines.append("No files are included more than once.")
+        return '\n'.join(lines)
+    lines.append(f"Found {len(duplicates)} files included more than once:")
+    if excluded_common:
+        lines.append(f"  ({excluded_common} common files excluded; use --include-common to see all)")
+    lines.append("")
+    lines.append("=" * 70)
+    for i, dup in enumerate(duplicates, 1):
+        common_marker = " [COMMON]" if dup.is_common else ""
+        lines.append(f"\n[{i}] {dup.resolved_path}{common_marker}")
+        lines.append(f"    Included {dup.count} times:")
+        lines.append("-" * 50)
+        for loc in dup.locations:
+            lines.append(f"    - {loc.source_file}:{loc.line_number}")
+    return '\n'.join(lines)
+def format_csv_report(
+    duplicates: list[DuplicateInclude],
+    total_files: int,
+    excluded_common: int,
+    directory: str,
+    cmd_line: str
+) -> str:
+    """Format results as CSV."""
+    lines = []
+    lines.append("Included File,Inclusion Count,Is Common,Source File,Line Number,Raw Include Path")
+    for dup in duplicates:
+        for loc in dup.locations:
+            lines.append(
+                f'"{dup.resolved_path}",{dup.count},{dup.is_common},'
+                f'"{loc.source_file}",{loc.line_number},"{loc.raw_include_path}"'
+            )
+    return '\n'.join(lines)
+def format_json_report(
+    duplicates: list[DuplicateInclude],
+    total_files: int,
+    excluded_common: int,
+    directory: str,
+    cmd_line: str
+) -> str:
+    """Format results as JSON."""
+    import json
+    data = {
+        "command": cmd_line,
+        "directory": os.path.abspath(directory),
+        "files_scanned": total_files,
+        "excluded_common_count": excluded_common,
+        "duplicate_count": len(duplicates),
+        "duplicates": [
+            {
+                "path": dup.resolved_path,
+                "count": dup.count,
+                "is_common": dup.is_common,
+                "locations": [
+                    {
+                        "source_file": loc.source_file,
+                        "line_number": loc.line_number,
+                        "raw_include_path": loc.raw_include_path
+                    }
+                    for loc in dup.locations
+                ]
+            }
+            for dup in duplicates
+        ]
+    }
+    return json.dumps(data, indent=2)
+def format_md_report(
+    duplicates: list[DuplicateInclude],
+    total_files: int,
+    excluded_common: int,
+    directory: str,
+    cmd_line: str
+) -> str:
+    """Format results as Markdown."""
+    lines = []
+    lines.append("# Duplicate Includes Report")
+    lines.append("")
+    lines.append(f"**Command:** `{cmd_line}`")
+    lines.append(f"**Directory:** `{os.path.abspath(directory)}`")
+    lines.append(f"**Files scanned:** {total_files}")
+    lines.append("")
+    if not duplicates:
+        if excluded_common:
+            lines.append(f"No unexpected duplicates found ({excluded_common} common files excluded).")
+        else:
+            lines.append("No files are included more than once.")
+        return '\n'.join(lines)
+    lines.append(f"## Summary")
+    lines.append("")
+    lines.append(f"Found **{len(duplicates)}** files included more than once.")
+    if excluded_common:
+        lines.append(f"({excluded_common} common files excluded)")
+    lines.append("")
+    for i, dup in enumerate(duplicates, 1):
+        common_marker = " *(common)*" if dup.is_common else ""
+        lines.append(f"### {i}. `{dup.resolved_path}`{common_marker}")
+        lines.append("")
+        lines.append(f"Included **{dup.count}** times:")
+        lines.append("")
+        for loc in dup.locations:
+            lines.append(f"- `{loc.source_file}:{loc.line_number}`")
+        lines.append("")
+    return '\n'.join(lines)

doc_utils/inventory_conditionals.py ADDED Viewed

@@ -0,0 +1,164 @@
+"""
+Module for inventorying AsciiDoc conditional directives.
+Functions:
+- find_adoc_files: Recursively find all .adoc files in a directory.
+- scan_file_for_conditionals: Scan a file for conditional directives.
+- create_inventory: Create an inventory of all conditionals found in .adoc files.
+"""
+import re
+from datetime import datetime
+from pathlib import Path
+from collections import defaultdict
+from typing import List, Tuple, Dict, Set
+# Pattern to match AsciiDoc conditionals
+CONDITIONAL_PATTERN = re.compile(
+    r'^(ifdef|ifndef|endif|ifeval)::(.*)$',
+    re.MULTILINE
+)
+def find_adoc_files(directory: Path) -> List[Path]:
+    """Find all .adoc files in the given directory recursively."""
+    return sorted(directory.rglob('*.adoc'))
+def scan_file_for_conditionals(filepath: Path) -> List[Tuple[int, str, str]]:
+    """
+    Scan a file for conditional directives.
+    Args:
+        filepath: Path to the .adoc file to scan.
+    Returns:
+        A list of tuples: (line_number, directive_type, condition)
+    """
+    results = []
+    try:
+        content = filepath.read_text(encoding='utf-8')
+        for i, line in enumerate(content.splitlines(), start=1):
+            match = CONDITIONAL_PATTERN.match(line.strip())
+            if match:
+                directive_type = match.group(1)
+                condition = match.group(2)
+                results.append((i, directive_type, condition))
+    except Exception as e:
+        print(f"Warning: Could not read {filepath}: {e}")
+    return results
+def create_inventory(directory: Path, output_dir: Path = None) -> Path:
+    """
+    Create an inventory of all conditionals found in .adoc files.
+    Args:
+        directory: Directory to scan for .adoc files.
+        output_dir: Directory to write the inventory file. Defaults to current directory.
+    Returns:
+        The path to the created inventory file.
+    """
+    if output_dir is None:
+        output_dir = Path.cwd()
+    timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
+    output_file = output_dir / f'inventory-{timestamp}.txt'
+    adoc_files = find_adoc_files(directory)
+    # Track statistics
+    stats: Dict[str, int] = defaultdict(int)
+    conditions_used: Dict[str, List[Tuple[Path, int]]] = defaultdict(list)
+    total_files_with_conditionals = 0
+    lines = []
+    lines.append("AsciiDoc Conditionals Inventory")
+    lines.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    lines.append(f"Directory: {directory.resolve()}")
+    lines.append("=" * 80)
+    lines.append("")
+    for filepath in adoc_files:
+        conditionals = scan_file_for_conditionals(filepath)
+        if conditionals:
+            total_files_with_conditionals += 1
+            relative_path = filepath.relative_to(directory)
+            lines.append(f"File: {relative_path}")
+            lines.append("-" * 60)
+            for line_num, directive, condition in conditionals:
+                stats[directive] += 1
+                # Extract the condition name (before any brackets)
+                cond_name = condition.split('[')[0] if condition else '(empty)'
+                if directive in ('ifdef', 'ifndef', 'ifeval'):
+                    conditions_used[cond_name].append((relative_path, line_num))
+                lines.append(f"  Line {line_num:5d}: {directive}::{condition}")
+            lines.append("")
+    # Add summary section
+    lines.append("=" * 80)
+    lines.append("SUMMARY")
+    lines.append("=" * 80)
+    lines.append("")
+    lines.append(f"Total .adoc files scanned: {len(adoc_files)}")
+    lines.append(f"Files with conditionals: {total_files_with_conditionals}")
+    lines.append("")
+    lines.append("Directive counts:")
+    for directive in sorted(stats.keys()):
+        lines.append(f"  {directive}: {stats[directive]}")
+    lines.append(f"  Total: {sum(stats.values())}")
+    lines.append("")
+    # List unique conditions
+    lines.append("=" * 80)
+    lines.append("UNIQUE CONDITIONS USED")
+    lines.append("=" * 80)
+    lines.append("")
+    for cond in sorted(conditions_used.keys()):
+        occurrences = conditions_used[cond]
+        lines.append(f"  {cond}: {len(occurrences)} occurrences")
+    # Write the inventory file
+    output_file.write_text('\n'.join(lines), encoding='utf-8')
+    return output_file
+def get_inventory_stats(directory: Path) -> Dict:
+    """
+    Get statistics about conditionals without writing a file.
+    Args:
+        directory: Directory to scan for .adoc files.
+    Returns:
+        Dictionary with statistics about conditionals found.
+    """
+    adoc_files = find_adoc_files(directory)
+    stats: Dict[str, int] = defaultdict(int)
+    conditions_used: Dict[str, int] = defaultdict(int)
+    files_with_conditionals: Set[Path] = set()
+    for filepath in adoc_files:
+        conditionals = scan_file_for_conditionals(filepath)
+        if conditionals:
+            files_with_conditionals.add(filepath)
+            for line_num, directive, condition in conditionals:
+                stats[directive] += 1
+                cond_name = condition.split('[')[0] if condition else '(empty)'
+                if directive in ('ifdef', 'ifndef', 'ifeval'):
+                    conditions_used[cond_name] += 1
+    return {
+        'total_files': len(adoc_files),
+        'files_with_conditionals': len(files_with_conditionals),
+        'directive_counts': dict(stats),
+        'total_conditionals': sum(stats.values()),
+        'unique_conditions': dict(conditions_used),
+    }

doc_utils/unused_attributes.py CHANGED Viewed

@@ -212,3 +212,51 @@ def comment_out_unused_attributes(attr_file: str, unused_attrs: List[str]) -> in
         f.writelines(new_lines)
     return commented_count
+def remove_unused_attributes(attr_file: str, unused_attrs: List[str] = None) -> int:
+    """
+    Remove unused attributes from the attributes file.
+    This removes lines that either:
+    - Define an attribute in the unused_attrs list, or
+    - Are already marked with "// Unused" prefix
+    Args:
+        attr_file: Path to the attributes file
+        unused_attrs: Optional list of unused attribute names. If None, only
+                      removes lines already marked with "// Unused".
+    Returns:
+        Number of lines removed
+    """
+    # Read the file
+    with open(attr_file, 'r', encoding='utf-8') as f:
+        lines = f.readlines()
+    # Create a set for faster lookup
+    unused_set = set(unused_attrs) if unused_attrs else set()
+    removed_count = 0
+    # Process each line
+    new_lines = []
+    for line in lines:
+        # Check if line is already marked as unused
+        if line.startswith('// Unused '):
+            removed_count += 1
+            continue
+        # Check if this line defines an unused attribute
+        if unused_attrs:
+            match = re.match(r'^:([\w-]+):', line)
+            if match and match.group(1) in unused_set:
+                removed_count += 1
+                continue
+        new_lines.append(line)
+    # Write back to the file
+    with open(attr_file, 'w', encoding='utf-8') as f:
+        f.writelines(new_lines)
+    return removed_count

doc_utils/version.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Version information for doc-utils."""
 # This should match the version in pyproject.toml
-__version__ = "0.1.37"
+__version__ = "0.1.40"
 def get_version():
     """Return the current version string."""

rolfedh-doc-utils 0.1.38__py3-none-any.whl → 0.1.40__py3-none-any.whl

rolfedh-doc-utils 0.1.38py3-none-any.whl → 0.1.40py3-none-any.whl