PyPI - rolfedh-doc-utils - Versions diffs - 0.1.34__py3-none-any.whl → 0.1.37__py3-none-any.whl - Mend

rolfedh-doc-utils 0.1.34py3-none-any.whl → 0.1.37py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

archive_unused_files.py CHANGED Viewed

@@ -22,6 +22,7 @@ def main():
         epilog='By default, automatically discovers all modules and assemblies directories in the repository.'
     )
     parser.add_argument('--archive', action='store_true', help='Move the files to a dated zip in the archive directory.')
+    parser.add_argument('--commented', action='store_true', help='Include files that are referenced only in commented lines in the archive operation.')
     parser.add_argument('--scan-dir', action='append', default=[], help='Specific directory to scan (can be used multiple times). If not specified, auto-discovers directories.')
     parser.add_argument('--exclude-dir', action='append', default=[], help='Directory to exclude (can be used multiple times).')
     parser.add_argument('--exclude-file', action='append', default=[], help='File to exclude (can be used multiple times).')
@@ -35,13 +36,13 @@ def main():
     exclude_dirs = list(args.exclude_dir)
     exclude_files = list(args.exclude_file)
     if args.exclude_list:
         list_dirs, list_files = parse_exclude_list_file(args.exclude_list)
         exclude_dirs.extend(list_dirs)
         exclude_files.extend(list_files)
-    find_unused_adoc(scan_dirs, archive_dir, args.archive, exclude_dirs, exclude_files)
+    find_unused_adoc(scan_dirs, archive_dir, args.archive, exclude_dirs, exclude_files, args.commented)
 if __name__ == '__main__':
     main()

archive_unused_images.py CHANGED Viewed

@@ -18,6 +18,7 @@ def main():
     check_version_on_startup()
     parser = argparse.ArgumentParser(description='Archive unused image files.')
     parser.add_argument('--archive', action='store_true', help='Move the files to a dated zip in the archive directory.')
+    parser.add_argument('--commented', action='store_true', help='Include images that are referenced only in commented lines in the archive operation.')
     parser.add_argument('--exclude-dir', action='append', default=[], help='Directory to exclude (can be used multiple times).')
     parser.add_argument('--exclude-file', action='append', default=[], help='File to exclude (can be used multiple times).')
     parser.add_argument('--exclude-list', type=str, help='Path to a file containing directories or files to exclude, one per line.')
@@ -29,13 +30,13 @@ def main():
     exclude_dirs = list(args.exclude_dir)
     exclude_files = list(args.exclude_file)
     if args.exclude_list:
         list_dirs, list_files = parse_exclude_list_file(args.exclude_list)
         exclude_dirs.extend(list_dirs)
         exclude_files.extend(list_files)
-    find_unused_images(scan_dirs, archive_dir, args.archive, exclude_dirs, exclude_files)
+    find_unused_images(scan_dirs, archive_dir, args.archive, exclude_dirs, exclude_files, args.commented)
 if __name__ == '__main__':
     main()

check_source_directives.py ADDED Viewed

@@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+"""
+Check Source Directives
+Detects code blocks (----) that are missing [source] directive in AsciiDoc files.
+This helps prevent AsciiDoc-to-DocBook XML conversion errors.
+Usage:
+  check-source-directives                    # Scan current directory
+  check-source-directives asciidoc          # Scan asciidoc/ directory
+  check-source-directives --fix             # Scan and fix issues in current directory
+  check-source-directives --fix asciidoc    # Scan and fix issues in asciidoc/ directory
+"""
+import argparse
+import sys
+from doc_utils.missing_source_directive import find_missing_source_directives
+from doc_utils.version_check import check_version_on_startup
+from doc_utils.version import __version__
+# ANSI color codes
+RED = '\033[0;31m'
+YELLOW = '\033[1;33m'
+GREEN = '\033[0;32m'
+NC = '\033[0m'  # No Color
+def main():
+    # Check for updates (non-blocking)
+    check_version_on_startup()
+    parser = argparse.ArgumentParser(
+        description='Detect code blocks (----) missing [source] directive in AsciiDoc files',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s                    # Scan current directory
+  %(prog)s asciidoc          # Scan asciidoc/ directory
+  %(prog)s --fix             # Scan and fix issues in current directory
+  %(prog)s --fix asciidoc    # Scan and fix issues in asciidoc/ directory
+        """
+    )
+    parser.add_argument('directory', nargs='?', default='.',
+                        help='Directory to scan (default: current directory)')
+    parser.add_argument('--fix', action='store_true',
+                        help='Automatically insert [source] directives where missing')
+    parser.add_argument('--version', action='version', version=f'%(prog)s {__version__}')
+    args = parser.parse_args()
+    mode = "Fixing" if args.fix else "Scanning for"
+    print(f"{mode} code blocks missing [source] directive in: {args.directory}")
+    print("=" * 64)
+    print()
+    try:
+        results = find_missing_source_directives(
+            scan_dir=args.directory,
+            auto_fix=args.fix
+        )
+    except ValueError as e:
+        print(f"{RED}Error: {e}{NC}", file=sys.stderr)
+        sys.exit(1)
+    except Exception as e:
+        print(f"{RED}Unexpected error: {e}{NC}", file=sys.stderr)
+        sys.exit(1)
+    # Display results
+    for file_info in results['file_details']:
+        filepath = file_info['filepath']
+        issues = file_info['issues']
+        print(f"{YELLOW}File: {filepath}{NC}")
+        for issue in issues:
+            print(f"  {RED}Line {issue['line_num']}:{NC} Code block without [source] directive")
+            print(f"    Previous line ({issue['prev_line_num']}): {issue['prev_line']}")
+            print()
+        if args.fix:
+            if file_info.get('fixed'):
+                print(f"  {GREEN}✓ Fixed {len(issues)} issue(s){NC}")
+            elif 'error' in file_info:
+                print(f"  {RED}✗ Failed to fix file: {file_info['error']}{NC}")
+            print()
+    # Summary
+    print("=" * 64)
+    if results['total_issues'] == 0:
+        print(f"{GREEN}✓ No issues found!{NC}")
+        sys.exit(0)
+    else:
+        if args.fix:
+            print(f"{GREEN}Fixed {results['total_issues']} code block(s) in {results['files_fixed']} file(s){NC}")
+            sys.exit(0)
+        else:
+            print(f"{RED}Found {results['total_issues']} code block(s) missing [source] directive in {results['files_with_issues']} file(s){NC}")
+            print(f"\nRun with --fix to automatically fix these issues")
+            sys.exit(1)
+if __name__ == '__main__':
+    main()

convert_tables_to_deflists.py ADDED Viewed

@@ -0,0 +1,479 @@
+#!/usr/bin/env python3
+"""
+convert-tables-to-deflists: Convert AsciiDoc tables to definition lists.
+Converts 2-column AsciiDoc tables to definition list format, where:
+- The first column becomes the term
+- The second column becomes the definition
+Tables with more than 2 columns are skipped (use --columns to specify which
+columns to use as term and definition).
+Usage:
+    convert-tables-to-deflists [OPTIONS] [PATH]
+Examples:
+    # Preview changes (dry-run mode)
+    convert-tables-to-deflists .
+    # Apply changes to all .adoc files
+    convert-tables-to-deflists --apply .
+    # Process a single file
+    convert-tables-to-deflists --apply path/to/file.adoc
+    # Use columns 1 and 3 for 3-column tables
+    convert-tables-to-deflists --columns 1,3 .
+    # Skip tables with headers
+    convert-tables-to-deflists --skip-header-tables .
+"""
+import argparse
+import sys
+import re
+from pathlib import Path
+from typing import List, Optional, Tuple
+from callout_lib.table_parser import TableParser, AsciiDocTable
+from doc_utils.version import __version__
+from doc_utils.file_utils import parse_exclude_list_file
+class Colors:
+    """ANSI color codes for terminal output."""
+    RED = '\033[0;31m'
+    GREEN = '\033[0;32m'
+    YELLOW = '\033[1;33m'
+    BLUE = '\033[0;34m'
+    CYAN = '\033[0;36m'
+    NC = '\033[0m'  # No Color
+def print_colored(message: str, color: str = Colors.NC) -> None:
+    """Print a message with optional color."""
+    print(f"{color}{message}{Colors.NC}")
+class TableToDeflistConverter:
+    """Converts AsciiDoc tables to definition lists."""
+    def __init__(self, dry_run: bool = True, verbose: bool = False,
+                 columns: Optional[Tuple[int, int]] = None,
+                 skip_header_tables: bool = False,
+                 skip_callout_tables: bool = True):
+        """
+        Initialize the converter.
+        Args:
+            dry_run: If True, don't modify files (preview mode)
+            verbose: If True, show detailed output
+            columns: Tuple of (term_col, def_col) for multi-column tables (1-indexed)
+            skip_header_tables: If True, skip tables that have header rows
+            skip_callout_tables: If True, skip tables that look like callout tables
+        """
+        self.dry_run = dry_run
+        self.verbose = verbose
+        self.columns = columns  # 1-indexed column numbers
+        self.skip_header_tables = skip_header_tables
+        self.skip_callout_tables = skip_callout_tables
+        self.parser = TableParser()
+        self.files_processed = 0
+        self.files_modified = 0
+        self.tables_converted = 0
+    def find_adoc_files(self, path: Path, exclude_dirs: List[str] = None,
+                        exclude_files: List[str] = None) -> List[Path]:
+        """Find all .adoc files in the given path."""
+        exclude_dirs = exclude_dirs or []
+        exclude_files = exclude_files or []
+        if path.is_file():
+            return [path] if path.suffix == '.adoc' else []
+        adoc_files = []
+        for adoc_file in path.rglob('*.adoc'):
+            # Skip excluded directories
+            if any(excl in str(adoc_file) for excl in exclude_dirs):
+                continue
+            # Skip excluded files
+            if any(excl in str(adoc_file) for excl in exclude_files):
+                continue
+            # Skip symlinks
+            if adoc_file.is_symlink():
+                continue
+            adoc_files.append(adoc_file)
+        return sorted(adoc_files)
+    def _should_skip_table(self, table: AsciiDocTable) -> Tuple[bool, str]:
+        """
+        Determine if a table should be skipped.
+        Returns:
+            Tuple of (should_skip, reason)
+        """
+        # Skip empty tables
+        if not table.rows:
+            return True, "empty table"
+        # Skip callout tables (they're handled by convert-callouts-to-deflist)
+        if self.skip_callout_tables:
+            if self.parser.is_callout_table(table) or self.parser.is_3column_callout_table(table):
+                return True, "callout table (use convert-callouts-to-deflist)"
+        # Check column count
+        if table.rows:
+            first_row_cols = len(table.rows[0].cells)
+            # If specific columns are specified, verify they exist
+            if self.columns:
+                term_col, def_col = self.columns
+                if term_col > first_row_cols or def_col > first_row_cols:
+                    return True, f"specified columns ({term_col}, {def_col}) exceed table columns ({first_row_cols})"
+            else:
+                # Default: only process 2-column tables
+                if first_row_cols != 2:
+                    return True, f"{first_row_cols}-column table (use --columns to specify term and definition columns)"
+        # Check for header row
+        if self.skip_header_tables and self.parser._has_header_row(table):
+            return True, "table has header row"
+        return False, ""
+    def _convert_table_to_deflist(self, table: AsciiDocTable) -> List[str]:
+        """
+        Convert a table to definition list format.
+        Args:
+            table: The AsciiDocTable to convert
+        Returns:
+            List of lines representing the definition list
+        """
+        output = []
+        # Determine which columns to use (0-indexed internally)
+        if self.columns:
+            term_idx = self.columns[0] - 1  # Convert to 0-indexed
+            def_idx = self.columns[1] - 1
+        else:
+            term_idx = 0
+            def_idx = 1
+        # Check if table has a header row
+        has_header = self.parser._has_header_row(table)
+        data_rows = table.rows[1:] if has_header else table.rows
+        for row in data_rows:
+            # Verify row has enough cells
+            if len(row.cells) <= max(term_idx, def_idx):
+                continue
+            # Add conditionals before row
+            if row.conditionals_before:
+                output.extend(row.conditionals_before)
+            # Get term (first specified column)
+            term_cell = row.cells[term_idx]
+            term = ' '.join(line.strip() for line in term_cell.content if line.strip())
+            # Get definition (second specified column)
+            def_cell = row.cells[def_idx]
+            def_lines = def_cell.content
+            # Create definition list entry
+            if term:
+                output.append(f'{term}::')
+                # Add definition lines
+                first_content_line = True
+                for line in def_lines:
+                    stripped = line.strip()
+                    # Handle conditional directives
+                    if stripped.startswith(('ifdef::', 'ifndef::', 'endif::')):
+                        output.append(line)
+                        continue
+                    # Skip empty lines within definition but track them
+                    if not stripped:
+                        continue
+                    # First content line gets no indent, subsequent lines do
+                    if first_content_line:
+                        output.append(stripped)
+                        first_content_line = False
+                    else:
+                        output.append(f'+\n{stripped}')
+                # Add blank line after entry
+                output.append('')
+            # Add conditionals after row
+            if row.conditionals_after:
+                output.extend(row.conditionals_after)
+        # Remove trailing blank line if present
+        if output and not output[-1].strip():
+            output.pop()
+        return output
+    def process_file(self, file_path: Path) -> int:
+        """
+        Process a single file, converting tables to definition lists.
+        Args:
+            file_path: Path to the .adoc file
+        Returns:
+            Number of tables converted
+        """
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                lines = [line.rstrip('\n') for line in f]
+        except Exception as e:
+            print_colored(f"Error reading {file_path}: {e}", Colors.RED)
+            return 0
+        original_lines = lines.copy()
+        tables = self.parser.find_tables(lines)
+        conversions = 0
+        # Process tables in reverse order to preserve line numbers
+        for table in reversed(tables):
+            should_skip, reason = self._should_skip_table(table)
+            if should_skip:
+                if self.verbose:
+                    print(f"  Skipping table at line {table.start_line + 1}: {reason}")
+                continue
+            # Convert the table
+            deflist_lines = self._convert_table_to_deflist(table)
+            if deflist_lines:
+                # Replace table with definition list
+                lines[table.start_line:table.end_line + 1] = deflist_lines
+                conversions += 1
+                if self.verbose:
+                    print(f"  Converted table at line {table.start_line + 1}")
+        # Write changes if not in dry-run mode
+        if conversions > 0:
+            if self.dry_run:
+                print_colored(f"Would modify: {file_path} ({conversions} table(s))", Colors.YELLOW)
+            else:
+                try:
+                    with open(file_path, 'w', encoding='utf-8') as f:
+                        f.write('\n'.join(lines) + '\n')
+                    print_colored(f"Modified: {file_path} ({conversions} table(s))", Colors.GREEN)
+                except Exception as e:
+                    print_colored(f"Error writing {file_path}: {e}", Colors.RED)
+                    return 0
+        return conversions
+    def process_path(self, path: Path, exclude_dirs: List[str] = None,
+                     exclude_files: List[str] = None) -> None:
+        """
+        Process all .adoc files in the given path.
+        Args:
+            path: File or directory path to process
+            exclude_dirs: List of directory patterns to exclude
+            exclude_files: List of file patterns to exclude
+        """
+        adoc_files = self.find_adoc_files(path, exclude_dirs, exclude_files)
+        if not adoc_files:
+            print_colored("No .adoc files found.", Colors.YELLOW)
+            return
+        if self.dry_run:
+            print_colored("DRY RUN MODE - No files will be modified", Colors.YELLOW)
+            print()
+        for file_path in adoc_files:
+            self.files_processed += 1
+            conversions = self.process_file(file_path)
+            if conversions > 0:
+                self.files_modified += 1
+                self.tables_converted += conversions
+        # Print summary
+        print()
+        print(f"Processed {self.files_processed} file(s)")
+        print(f"Tables converted: {self.tables_converted}")
+        print(f"Files {'would be ' if self.dry_run else ''}modified: {self.files_modified}")
+        if self.dry_run and self.files_modified > 0:
+            print()
+            print_colored("DRY RUN - No files were modified. Use --apply to apply changes.", Colors.YELLOW)
+def parse_columns(columns_str: str) -> Tuple[int, int]:
+    """
+    Parse a columns specification like "1,3" into a tuple.
+    Args:
+        columns_str: String like "1,3" specifying term and definition columns
+    Returns:
+        Tuple of (term_column, definition_column) as 1-indexed integers
+    Raises:
+        argparse.ArgumentTypeError: If the format is invalid
+    """
+    try:
+        parts = columns_str.split(',')
+        if len(parts) != 2:
+            raise ValueError("Expected exactly two column numbers")
+        term_col = int(parts[0].strip())
+        def_col = int(parts[1].strip())
+        if term_col < 1 or def_col < 1:
+            raise ValueError("Column numbers must be 1 or greater")
+        if term_col == def_col:
+            raise ValueError("Term and definition columns must be different")
+        return (term_col, def_col)
+    except ValueError as e:
+        raise argparse.ArgumentTypeError(
+            f"Invalid columns format '{columns_str}': {e}. "
+            "Use format like '1,2' or '1,3' (1-indexed column numbers)"
+        )
+def main() -> int:
+    """Main entry point for the CLI."""
+    parser = argparse.ArgumentParser(
+        description='Convert AsciiDoc tables to definition lists.',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Preview changes (default dry-run mode)
+  convert-tables-to-deflists .
+  # Apply changes to all .adoc files
+  convert-tables-to-deflists --apply .
+  # Process a single file
+  convert-tables-to-deflists --apply path/to/file.adoc
+  # For 3-column tables, use columns 1 and 3
+  convert-tables-to-deflists --columns 1,3 .
+  # Skip tables that have header rows
+  convert-tables-to-deflists --skip-header-tables .
+Notes:
+  - By default, only 2-column tables are converted
+  - Callout tables are automatically skipped (use convert-callouts-to-deflist)
+  - Use --columns to specify which columns to use for multi-column tables
+  - The first specified column becomes the term, the second becomes the definition
+"""
+    )
+    parser.add_argument(
+        '--version',
+        action='version',
+        version=f'%(prog)s {__version__}'
+    )
+    parser.add_argument(
+        'path',
+        nargs='?',
+        default='.',
+        help='File or directory to process (default: current directory)'
+    )
+    parser.add_argument(
+        '--apply',
+        action='store_true',
+        help='Apply changes (default is dry-run mode)'
+    )
+    parser.add_argument(
+        '-v', '--verbose',
+        action='store_true',
+        help='Show detailed output'
+    )
+    parser.add_argument(
+        '--columns',
+        type=parse_columns,
+        metavar='TERM,DEF',
+        help='Column numbers to use as term and definition (1-indexed, e.g., "1,3")'
+    )
+    parser.add_argument(
+        '--skip-header-tables',
+        action='store_true',
+        help='Skip tables that have header rows'
+    )
+    parser.add_argument(
+        '--include-callout-tables',
+        action='store_true',
+        help='Include callout tables (normally skipped)'
+    )
+    parser.add_argument(
+        '--exclude-dir',
+        action='append',
+        default=[],
+        metavar='DIR',
+        help='Directory pattern to exclude (can be specified multiple times)'
+    )
+    parser.add_argument(
+        '--exclude-file',
+        action='append',
+        default=[],
+        metavar='FILE',
+        help='File pattern to exclude (can be specified multiple times)'
+    )
+    parser.add_argument(
+        '--exclude-list',
+        type=Path,
+        metavar='FILE',
+        help='Path to file containing exclusion patterns (one per line)'
+    )
+    args = parser.parse_args()
+    # Parse exclusion list if provided
+    exclude_dirs = list(args.exclude_dir)
+    exclude_files = list(args.exclude_file)
+    if args.exclude_list:
+        list_dirs, list_files = parse_exclude_list_file(args.exclude_list)
+        exclude_dirs.extend(list_dirs)
+        exclude_files.extend(list_files)
+    # Create converter
+    converter = TableToDeflistConverter(
+        dry_run=not args.apply,
+        verbose=args.verbose,
+        columns=args.columns,
+        skip_header_tables=args.skip_header_tables,
+        skip_callout_tables=not args.include_callout_tables
+    )
+    # Process files
+    path = Path(args.path)
+    if not path.exists():
+        print_colored(f"Error: Path does not exist: {path}", Colors.RED)
+        return 1
+    converter.process_path(path, exclude_dirs, exclude_files)
+    return 0
+if __name__ == '__main__':
+    sys.exit(main())

doc_utils/missing_source_directive.py ADDED Viewed

@@ -0,0 +1,211 @@
+# doc_utils/missing_source_directive.py
+"""
+Detects code blocks (----) that are missing [source] directive on the preceding line.
+This module provides functionality to scan AsciiDoc files for code blocks that lack
+proper source directives, which can cause issues with AsciiDoc-to-DocBook XML conversion.
+"""
+import os
+import re
+def is_code_block_start(line):
+    """Check if line is a code block delimiter (4 or more dashes)"""
+    return re.match(r'^-{4,}$', line.strip())
+def has_source_directive(line):
+    """Check if line contains [source] directive"""
+    # Match [source], [source,lang], [source, lang], etc.
+    return re.match(r'^\[source[\s,]', line.strip())
+def is_empty_or_whitespace(line):
+    """Check if line is empty or contains only whitespace"""
+    return len(line.strip()) == 0
+def scan_file(filepath):
+    """
+    Scan a single AsciiDoc file for missing [source] directives.
+    Args:
+        filepath: Path to the AsciiDoc file to scan
+    Returns:
+        List of issue dictionaries containing line_num, prev_line_num, and prev_line
+    """
+    issues = []
+    try:
+        with open(filepath, 'r', encoding='utf-8') as f:
+            lines = f.readlines()
+        in_code_block = False
+        for i, line in enumerate(lines, start=1):
+            # Check if current line is a code block delimiter
+            if is_code_block_start(line):
+                if not in_code_block:
+                    # This is the START of a code block
+                    prev_line_num = i - 1
+                    prev_line = lines[prev_line_num - 1].rstrip() if prev_line_num > 0 else ""
+                    # Check if [source] exists in previous lines (within last 3 lines)
+                    # This handles cases where there's a title between [source] and ----
+                    has_source_in_context = False
+                    for lookback in range(1, min(4, i)):
+                        check_line = lines[i - lookback - 1].strip()
+                        if has_source_directive(check_line):
+                            has_source_in_context = True
+                            break
+                        # Stop looking if we hit an empty line or structural element
+                        if not check_line or check_line.startswith(('=', '----')):
+                            break
+                    # Only flag if:
+                    # 1. No [source] directive in recent context
+                    # 2. Previous line is not empty (which might be valid formatting)
+                    if (not has_source_in_context and
+                        not is_empty_or_whitespace(prev_line)):
+                        # Additional heuristic: check if previous line looks like it should have [source]
+                        # Skip if previous line is a title, comment, or other structural element
+                        prev_stripped = prev_line.strip()
+                        # Skip common valid patterns
+                        if prev_stripped.startswith(('=', '//', 'NOTE:', 'TIP:', 'WARNING:', 'IMPORTANT:', 'CAUTION:')):
+                            in_code_block = True
+                            continue
+                        # Skip if previous line is already an attribute block (but not [source])
+                        if prev_stripped.startswith('[') and prev_stripped.endswith(']'):
+                            # It's some other attribute like [id], [role], etc., might be intentional
+                            in_code_block = True
+                            continue
+                        # Skip if previous line is just a plus sign (continuation)
+                        if prev_stripped == '+':
+                            in_code_block = True
+                            continue
+                        # Skip if previous line is a block title (starts with .)
+                        if prev_stripped.startswith('.') and len(prev_stripped) > 1:
+                            # This might be a title for a source block that's defined earlier
+                            # Check if there's a [source] before the title
+                            if i >= 3:
+                                two_lines_back = lines[i - 3].strip()
+                                if has_source_directive(two_lines_back):
+                                    in_code_block = True
+                                    continue
+                        issues.append({
+                            'line_num': i,
+                            'prev_line_num': prev_line_num,
+                            'prev_line': prev_line[:80]  # Truncate for display
+                        })
+                    in_code_block = True
+                else:
+                    # This is the END of a code block
+                    in_code_block = False
+    except Exception as e:
+        raise IOError(f"Error reading {filepath}: {e}")
+    return issues
+def fix_file(filepath, issues):
+    """
+    Insert [source] directives for missing code blocks.
+    Args:
+        filepath: Path to the AsciiDoc file to fix
+        issues: List of issue dictionaries from scan_file()
+    Returns:
+        True if successful, False otherwise
+    """
+    try:
+        with open(filepath, 'r', encoding='utf-8') as f:
+            lines = f.readlines()
+        # Sort issues by line number in reverse order so we can insert from bottom to top
+        # This prevents line number shifts from affecting subsequent insertions
+        sorted_issues = sorted(issues, key=lambda x: x['line_num'], reverse=True)
+        for issue in sorted_issues:
+            line_num = issue['line_num']
+            # Insert [source] directive before the ---- line (at line_num - 1, which is index line_num - 1)
+            insert_index = line_num - 1
+            lines.insert(insert_index, '[source]\n')
+        # Write the modified content back to the file
+        with open(filepath, 'w', encoding='utf-8') as f:
+            f.writelines(lines)
+        return True
+    except Exception as e:
+        raise IOError(f"Error fixing {filepath}: {e}")
+def find_missing_source_directives(scan_dir='.', auto_fix=False):
+    """
+    Scan directory for AsciiDoc files with missing [source] directives.
+    Args:
+        scan_dir: Directory to scan (default: current directory)
+        auto_fix: If True, automatically insert [source] directives
+    Returns:
+        Dictionary with statistics:
+        - total_issues: Total number of issues found
+        - files_with_issues: Number of files with issues
+        - files_fixed: Number of files successfully fixed (if auto_fix=True)
+        - file_details: List of dictionaries with file paths and their issues
+    """
+    if not os.path.isdir(scan_dir):
+        raise ValueError(f"Directory '{scan_dir}' does not exist")
+    total_issues = 0
+    files_with_issues = 0
+    files_fixed = 0
+    file_details = []
+    # Find all .adoc files (excluding symbolic links)
+    adoc_files = []
+    for root, dirs, files in os.walk(scan_dir):
+        for filename in files:
+            if filename.endswith('.adoc'):
+                filepath = os.path.join(root, filename)
+                # Skip symbolic links
+                if not os.path.islink(filepath):
+                    adoc_files.append(filepath)
+    for filepath in sorted(adoc_files):
+        issues = scan_file(filepath)
+        if issues:
+            files_with_issues += 1
+            total_issues += len(issues)
+            file_info = {
+                'filepath': filepath,
+                'issues': issues,
+                'fixed': False
+            }
+            if auto_fix:
+                try:
+                    if fix_file(filepath, issues):
+                        files_fixed += 1
+                        file_info['fixed'] = True
+                except Exception as e:
+                    file_info['error'] = str(e)
+            file_details.append(file_info)
+    return {
+        'total_issues': total_issues,
+        'files_with_issues': files_with_issues,
+        'files_fixed': files_fixed,
+        'file_details': file_details
+    }

doc_utils/unused_adoc.py CHANGED Viewed

@@ -60,10 +60,10 @@ def find_scan_directories(base_path='.', exclude_dirs=None):
     return scan_dirs
-def find_unused_adoc(scan_dirs=None, archive_dir='./archive', archive=False, exclude_dirs=None, exclude_files=None):
+def find_unused_adoc(scan_dirs=None, archive_dir='./archive', archive=False, exclude_dirs=None, exclude_files=None, include_commented=False):
     # Print safety warning
     print("\n⚠️  SAFETY: Work in a git branch! Run without --archive first to preview.\n")
     # If no scan_dirs provided, auto-discover them
     if not scan_dirs:
         scan_dirs = find_scan_directories(exclude_dirs=exclude_dirs)
@@ -75,46 +75,107 @@ def find_unused_adoc(scan_dirs=None, archive_dir='./archive', archive=False, exc
             print("No 'modules' or 'assemblies' directories found containing .adoc files.")
             print("Please run this tool from your documentation repository root.")
             return
     # Detect repository type
     repo_type = detect_repo_type()
     print(f"Detected repository type: {repo_type}")
     # Collect all .adoc files in scan directories
     asciidoc_files = collect_files(scan_dirs, {'.adoc'}, exclude_dirs, exclude_files)
-    # Track which files are referenced
-    referenced_files = set()
+    # Track which files are referenced (uncommented and commented separately)
+    referenced_files = set()  # Files in uncommented includes
+    commented_only_files = {}  # Files referenced ONLY in commented lines: {basename: [(file, line_num, line_text)]}
     if repo_type == 'topic_map':
         # For OpenShift-docs style repos, get references from topic maps
         topic_references = get_all_topic_map_references()
         # Convert to basenames for comparison
         referenced_files.update(os.path.basename(ref) for ref in topic_references)
-    # Always scan for include:: directives in all .adoc files
+    # Patterns for finding includes (both commented and uncommented)
     include_pattern = re.compile(r'include::(.+?)\[')
+    commented_include_pattern = re.compile(r'^\s*//.*include::(.+?)\[')
     adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
     for file_path in adoc_files:
         try:
             with open(file_path, 'r', encoding='utf-8') as f:
-                content = f.read()
-                includes = include_pattern.findall(content)
-                # Extract just the filename from the include path
-                for include in includes:
-                    # Handle both relative and absolute includes
-                    include_basename = os.path.basename(include)
-                    referenced_files.add(include_basename)
+                lines = f.readlines()
+                for line_num, line in enumerate(lines, 1):
+                    # Check if this is a commented include
+                    commented_match = commented_include_pattern.search(line)
+                    if commented_match:
+                        include_basename = os.path.basename(commented_match.group(1))
+                        # Track location of commented reference
+                        if include_basename not in commented_only_files:
+                            commented_only_files[include_basename] = []
+                        commented_only_files[include_basename].append((file_path, line_num, line.strip()))
+                    else:
+                        # Check for uncommented includes
+                        uncommented_match = include_pattern.search(line)
+                        if uncommented_match:
+                            include_basename = os.path.basename(uncommented_match.group(1))
+                            referenced_files.add(include_basename)
+                            # If we found an uncommented reference, remove from commented_only tracking
+                            if include_basename in commented_only_files:
+                                del commented_only_files[include_basename]
         except Exception as e:
             print(f"Warning: could not read {file_path}: {e}")
-    # Find unused files by comparing basenames
-    unused_files = [f for f in asciidoc_files if os.path.basename(f) not in referenced_files]
+    # Determine which files are unused based on the include_commented flag
+    if include_commented:
+        # When --commented is used: treat files with commented-only references as unused
+        # Only files with uncommented references are considered "used"
+        unused_files = [f for f in asciidoc_files if os.path.basename(f) not in referenced_files]
+        commented_only_unused = []
+    else:
+        # Default behavior: files referenced only in commented lines are considered "used"
+        # They should NOT be in the unused list, but we track them for reporting
+        all_referenced = referenced_files.union(set(commented_only_files.keys()))
+        unused_files = [f for f in asciidoc_files if os.path.basename(f) not in all_referenced]
+        # Generate list of files referenced only in comments for the report
+        commented_only_unused = []
+        for basename, references in commented_only_files.items():
+            # Find the full path for this basename in asciidoc_files
+            matching_files = [f for f in asciidoc_files if os.path.basename(f) == basename]
+            for f in matching_files:
+                commented_only_unused.append((f, references))
     unused_files = list(dict.fromkeys(unused_files))  # Remove duplicates
+    # Print summary
     print(f"Found {len(unused_files)} unused files out of {len(asciidoc_files)} total files in scan directories")
+    # Generate detailed report for commented-only references
+    if commented_only_unused and not include_commented:
+        report_path = os.path.join(archive_dir, 'commented-references-report.txt')
+        os.makedirs(archive_dir, exist_ok=True)
+        with open(report_path, 'w', encoding='utf-8') as report:
+            report.write("Files Referenced Only in Commented Lines\n")
+            report.write("=" * 70 + "\n\n")
+            report.write(f"Found {len(commented_only_unused)} files that are referenced only in commented-out includes.\n")
+            report.write("These files are considered 'used' by default and will NOT be archived.\n\n")
+            report.write("To archive these files along with other unused files, use the --commented flag.\n\n")
+            report.write("-" * 70 + "\n\n")
+            for file_path, references in sorted(commented_only_unused):
+                report.write(f"File: {file_path}\n")
+                report.write(f"Referenced in {len(references)} commented line(s):\n")
+                for ref_file, line_num, line_text in references:
+                    report.write(f"  {ref_file}:{line_num}\n")
+                    report.write(f"    {line_text}\n")
+                report.write("\n")
+        print(f"\n📋 Found {len(commented_only_unused)} files referenced only in commented lines.")
+        print(f"   Detailed report saved to: {report_path}")
+        print(f"   These files are considered 'used' and will NOT be archived by default.")
+        print(f"   To include them in the archive operation, use the --commented flag.\n")
     return write_manifest_and_archive(
         unused_files, archive_dir, 'to-archive', 'to-archive', archive=archive
     )

doc_utils/unused_images.py CHANGED Viewed

@@ -6,26 +6,98 @@ from .file_utils import collect_files, write_manifest_and_archive
 IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.svg'}
-def find_unused_images(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None):
+def find_unused_images(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None, include_commented=False):
     # Print safety warning
     print("\n⚠️  SAFETY: Work in a git branch! Run without --archive first to preview.\n")
     image_files = collect_files(scan_dirs, IMAGE_EXTENSIONS, exclude_dirs, exclude_files)
     adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
-    referenced_images = set()
+    # Track which images are referenced (uncommented and commented separately)
+    referenced_images = set()  # Images in uncommented references
+    commented_only_images = {}  # Images referenced ONLY in commented lines: {basename: [(file, line_num, line_text)]}
+    # Patterns for finding image references (both commented and uncommented)
     image_ref_pattern = re.compile(r'(?i)image::([^\[]+)[\[]|image:([^\[]+)[\[]|"([^"\s]+\.(?:png|jpg|jpeg|gif|svg))"')
+    commented_line_pattern = re.compile(r'^\s*//')
     for adoc_file in adoc_files:
         try:
             with open(adoc_file, 'r', encoding='utf-8') as f:
-                content = f.read()
-                for match in image_ref_pattern.findall(content):
-                    for group in match:
-                        if group:
-                            referenced_images.add(os.path.basename(group))
+                lines = f.readlines()
+                for line_num, line in enumerate(lines, 1):
+                    # Check if this line is commented
+                    is_commented = commented_line_pattern.match(line)
+                    # Find all image references in the line
+                    for match in image_ref_pattern.findall(line):
+                        for group in match:
+                            if group:
+                                image_basename = os.path.basename(group)
+                                if is_commented:
+                                    # Track location of commented reference
+                                    if image_basename not in commented_only_images:
+                                        commented_only_images[image_basename] = []
+                                    commented_only_images[image_basename].append((adoc_file, line_num, line.strip()))
+                                else:
+                                    # Add to uncommented references
+                                    referenced_images.add(image_basename)
+                                    # If we found an uncommented reference, remove from commented_only tracking
+                                    if image_basename in commented_only_images:
+                                        del commented_only_images[image_basename]
         except Exception as e:
             print(f"Warning: could not read {adoc_file}: {e}")
-    unused_images = [f for f in image_files if os.path.basename(f) not in referenced_images]
+    # Determine which images are unused based on the include_commented flag
+    if include_commented:
+        # When --commented is used: treat images with commented-only references as unused
+        # Only images with uncommented references are considered "used"
+        unused_images = [f for f in image_files if os.path.basename(f) not in referenced_images]
+        commented_only_unused = []
+    else:
+        # Default behavior: images referenced only in commented lines are considered "used"
+        # They should NOT be in the unused list, but we track them for reporting
+        all_referenced = referenced_images.union(set(commented_only_images.keys()))
+        unused_images = [f for f in image_files if os.path.basename(f) not in all_referenced]
+        # Generate list of images referenced only in comments for the report
+        commented_only_unused = []
+        for basename, references in commented_only_images.items():
+            # Find the full path for this basename in image_files
+            matching_files = [f for f in image_files if os.path.basename(f) == basename]
+            for f in matching_files:
+                commented_only_unused.append((f, references))
     unused_images = list(dict.fromkeys(unused_images))
+    # Generate detailed report for commented-only references
+    if commented_only_unused and not include_commented:
+        report_path = os.path.join(archive_dir, 'commented-image-references-report.txt')
+        os.makedirs(archive_dir, exist_ok=True)
+        with open(report_path, 'w', encoding='utf-8') as report:
+            report.write("Images Referenced Only in Commented Lines\n")
+            report.write("=" * 70 + "\n\n")
+            report.write(f"Found {len(commented_only_unused)} images that are referenced only in commented-out lines.\n")
+            report.write("These images are considered 'used' by default and will NOT be archived.\n\n")
+            report.write("To archive these images along with other unused images, use the --commented flag.\n\n")
+            report.write("-" * 70 + "\n\n")
+            for file_path, references in sorted(commented_only_unused):
+                report.write(f"Image: {file_path}\n")
+                report.write(f"Referenced in {len(references)} commented line(s):\n")
+                for ref_file, line_num, line_text in references:
+                    report.write(f"  {ref_file}:{line_num}\n")
+                    report.write(f"    {line_text}\n")
+                report.write("\n")
+        print(f"\n📋 Found {len(commented_only_unused)} images referenced only in commented lines.")
+        print(f"   Detailed report saved to: {report_path}")
+        print(f"   These images are considered 'used' and will NOT be archived by default.")
+        print(f"   To include them in the archive operation, use the --commented flag.\n")
     return write_manifest_and_archive(
         unused_images, archive_dir, 'unused-images', 'unused-images', archive=archive
     )

doc_utils/version.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Version information for doc-utils."""
 # This should match the version in pyproject.toml
-__version__ = "0.1.34"
+__version__ = "0.1.37"
 def get_version():
     """Return the current version string."""

{rolfedh_doc_utils-0.1.34.dist-info → rolfedh_doc_utils-0.1.37.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rolfedh-doc-utils
-Version: 0.1.34
+Version: 0.1.37
 Summary: CLI tools for AsciiDoc documentation projects
 Author: Rolfe Dlugy-Hegwer
 License: MIT License

{rolfedh_doc_utils-0.1.34.dist-info → rolfedh_doc_utils-0.1.37.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,10 @@
-archive_unused_files.py,sha256=OJZrkqn70hiOXED218jMYPFNFWnsDpjsCYOmBRxYnHU,2274
-archive_unused_images.py,sha256=fZeyEZtTd72Gbd3YBXTy5xoshAAM9qb4qFPMjhHL1Fg,1864
+archive_unused_files.py,sha256=YKYPtuBHEZcsyQSwSYxSYvw9v9Mh6Of8MqT53A5bM44,2438
+archive_unused_images.py,sha256=EvPhMIwp6_AHKtuNYQ663q6biXBeXaqf88NzWrhvtIE,2029
 check_scannability.py,sha256=O6ROr-e624jVPvPpASpsWo0gTfuCFpA2mTSX61BjAEI,5478
+check_source_directives.py,sha256=JiIvn_ph9VKPMH4zg-aSsuIGQZcnI_imj7rZLLE04L8,3660
 convert_callouts_interactive.py,sha256=4PjiVIOWxNJiJLQuBHT3x6rE46-hgfFHSaoo5quYIs8,22889
 convert_callouts_to_deflist.py,sha256=BoqW5_GkQ-KqNzn4vmE6lsQosrPV0lkB-bfAx3dzyMw,25886
+convert_tables_to_deflists.py,sha256=PIP6xummuMqC3aSzahKKRBYahes_j5ZpHp_-k6BjurY,15599
 doc_utils_cli.py,sha256=J3CE7cTDDCRGkhAknYejNWHhk5t9YFGt27WDVfR98Xk,5111
 extract_link_attributes.py,sha256=wR2SmR2la-jR6DzDbas2PoNONgRZ4dZ6aqwzkwEv8Gs,3516
 find_unused_attributes.py,sha256=77CxFdm72wj6SO81w-auMdDjnvF83jWy_qaM7DsAtBw,4263
@@ -19,20 +21,21 @@ doc_utils/__init__.py,sha256=qqZR3lohzkP63soymrEZPBGzzk6-nFzi4_tSffjmu_0,74
 doc_utils/extract_link_attributes.py,sha256=U0EvPZReJQigNfbT-icBsVT6Li64hYki5W7MQz6qqbc,22743
 doc_utils/file_utils.py,sha256=fpTh3xx759sF8sNocdn_arsP3KAv8XA6cTQTAVIZiZg,4247
 doc_utils/format_asciidoc_spacing.py,sha256=RL2WU_dG_UfGL01LnevcyJfKsvYy_ogNyeoVX-Fyqks,13579
+doc_utils/missing_source_directive.py,sha256=X3Acn0QJTk6XjmBXhGus5JAjlIitCiicCRE3fslifyw,8048
 doc_utils/replace_link_attributes.py,sha256=gmAs68_njBqEz-Qni-UGgeYEDTMxlTWk_IOm76FONNE,7279
 doc_utils/scannability.py,sha256=XwlmHqDs69p_V36X7DLjPTy0DUoLszSGqYjJ9wE-3hg,982
 doc_utils/spinner.py,sha256=lJg15qzODiKoR0G6uFIk2BdVNgn9jFexoTRUMrjiWvk,3554
 doc_utils/topic_map_parser.py,sha256=tKcIO1m9r2K6dvPRGue58zqMr0O2zKU1gnZMzEE3U6o,4571
-doc_utils/unused_adoc.py,sha256=2cbqcYr1os2EhETUU928BlPRlsZVSdI00qaMhqjSIqQ,5263
+doc_utils/unused_adoc.py,sha256=LPQWPGEOizXECxepk7E_5cjTVvKn6RXQYTWG97Ps5VQ,9077
 doc_utils/unused_attributes.py,sha256=OHyAdaBD7aNo357B0SLBN5NC_jNY5TWXMwgtfJNh3X8,7621
-doc_utils/unused_images.py,sha256=nqn36Bbrmon2KlGlcaruNjJJvTQ8_9H0WU9GvCW7rW8,1456
+doc_utils/unused_images.py,sha256=hL8Qrik9QCkVh54eBLuNczRS9tMnsqIEfavNamM1UeQ,5664
 doc_utils/validate_links.py,sha256=iBGXnwdeLlgIT3fo3v01ApT5k0X2FtctsvkrE6E3VMk,19610
-doc_utils/version.py,sha256=LpXe7kXo5uNMJOga179IYdU101aWLSTOnciZkUlrK0E,203
+doc_utils/version.py,sha256=zVnktTYITGhLqPNoyXbSnWi5bQassZ3M9S4LgDCGD-E,203
 doc_utils/version_check.py,sha256=-31Y6AN0KGi_CUCAVOOhf6bPO3r7SQIXPxxeffLAF0w,7535
 doc_utils/warnings_report.py,sha256=20yfwqBjOprfFhQwCujbcsvjJCbHHhmH84uAujm-y-o,8877
-rolfedh_doc_utils-0.1.34.dist-info/licenses/LICENSE,sha256=vLxtwMVOJA_hEy8b77niTkdmQI9kNJskXHq0dBS36e0,1075
-rolfedh_doc_utils-0.1.34.dist-info/METADATA,sha256=uDcruRVK6RPRkZtBtM5DsH9FZ5q9LXEf8hEqOsg3mig,8325
-rolfedh_doc_utils-0.1.34.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-rolfedh_doc_utils-0.1.34.dist-info/entry_points.txt,sha256=vL_LlLKOiurRzchrq8iRUQG19Xi9lSAFVZGjO-xyErk,577
-rolfedh_doc_utils-0.1.34.dist-info/top_level.txt,sha256=J4xtr3zoyCip27b3GnticFVZoyz5HHtgGqHQ-SZONCA,265
-rolfedh_doc_utils-0.1.34.dist-info/RECORD,,
+rolfedh_doc_utils-0.1.37.dist-info/licenses/LICENSE,sha256=vLxtwMVOJA_hEy8b77niTkdmQI9kNJskXHq0dBS36e0,1075
+rolfedh_doc_utils-0.1.37.dist-info/METADATA,sha256=RZ3wEHeIdcjF52LN0DQ7LbrANY_2WlxK96cSJIP1cAg,8325
+rolfedh_doc_utils-0.1.37.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+rolfedh_doc_utils-0.1.37.dist-info/entry_points.txt,sha256=pICKzbXMRsD5l_9RK4n7WYltHj4cqW4FXu6fv_EuVyE,693
+rolfedh_doc_utils-0.1.37.dist-info/top_level.txt,sha256=ii_0OmWdCjgCBV1RX6LY63jdH4SOEL0aYtfTMsRGAtU,316
+rolfedh_doc_utils-0.1.37.dist-info/RECORD,,

{rolfedh_doc_utils-0.1.34.dist-info → rolfedh_doc_utils-0.1.37.dist-info}/entry_points.txt RENAMED Viewed

@@ -2,8 +2,10 @@
 archive-unused-files = archive_unused_files:main
 archive-unused-images = archive_unused_images:main
 check-scannability = check_scannability:main
+check-source-directives = check_source_directives:main
 convert-callouts-interactive = convert_callouts_interactive:main
 convert-callouts-to-deflist = convert_callouts_to_deflist:main
+convert-tables-to-deflists = convert_tables_to_deflists:main
 doc-utils = doc_utils_cli:main
 extract-link-attributes = extract_link_attributes:main
 find-unused-attributes = find_unused_attributes:main

{rolfedh_doc_utils-0.1.34.dist-info → rolfedh_doc_utils-0.1.37.dist-info}/top_level.txt RENAMED Viewed

@@ -2,8 +2,10 @@ archive_unused_files
 archive_unused_images
 callout_lib
 check_scannability
+check_source_directives
 convert_callouts_interactive
 convert_callouts_to_deflist
+convert_tables_to_deflists
 doc_utils
 doc_utils_cli
 extract_link_attributes

{rolfedh_doc_utils-0.1.34.dist-info → rolfedh_doc_utils-0.1.37.dist-info}/WHEEL RENAMED Viewed

File without changes

{rolfedh_doc_utils-0.1.34.dist-info → rolfedh_doc_utils-0.1.37.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

rolfedh-doc-utils 0.1.34__py3-none-any.whl → 0.1.37__py3-none-any.whl

rolfedh-doc-utils 0.1.34py3-none-any.whl → 0.1.37py3-none-any.whl