PyPI - rolfedh-doc-utils - Versions diffs - 0.1.4__py3-none-any.whl → 0.1.41__py3-none-any.whl - Mend

rolfedh-doc-utils 0.1.4py3-none-any.whl → 0.1.41py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

archive_unused_files.py +18 -5
archive_unused_images.py +9 -2
callout_lib/__init__.py +22 -0
callout_lib/converter_bullets.py +103 -0
callout_lib/converter_comments.py +295 -0
callout_lib/converter_deflist.py +134 -0
callout_lib/detector.py +364 -0
callout_lib/table_parser.py +804 -0
check_published_links.py +1083 -0
check_scannability.py +6 -0
check_source_directives.py +101 -0
convert_callouts_interactive.py +567 -0
convert_callouts_to_deflist.py +628 -0
convert_freemarker_to_asciidoc.py +288 -0
convert_tables_to_deflists.py +479 -0
doc_utils/convert_freemarker_to_asciidoc.py +708 -0
doc_utils/duplicate_content.py +409 -0
doc_utils/duplicate_includes.py +347 -0
doc_utils/extract_link_attributes.py +618 -0
doc_utils/format_asciidoc_spacing.py +285 -0
doc_utils/insert_abstract_role.py +220 -0
doc_utils/inventory_conditionals.py +164 -0
doc_utils/missing_source_directive.py +211 -0
doc_utils/replace_link_attributes.py +187 -0
doc_utils/spinner.py +119 -0
doc_utils/unused_adoc.py +150 -22
doc_utils/unused_attributes.py +218 -6
doc_utils/unused_images.py +81 -9
doc_utils/validate_links.py +576 -0
doc_utils/version.py +8 -0
doc_utils/version_check.py +243 -0
doc_utils/warnings_report.py +237 -0
doc_utils_cli.py +158 -0
extract_link_attributes.py +120 -0
find_duplicate_content.py +209 -0
find_duplicate_includes.py +198 -0
find_unused_attributes.py +84 -6
format_asciidoc_spacing.py +134 -0
insert_abstract_role.py +163 -0
inventory_conditionals.py +53 -0
replace_link_attributes.py +214 -0
rolfedh_doc_utils-0.1.41.dist-info/METADATA +246 -0
rolfedh_doc_utils-0.1.41.dist-info/RECORD +52 -0
{rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/WHEEL +1 -1
rolfedh_doc_utils-0.1.41.dist-info/entry_points.txt +20 -0
rolfedh_doc_utils-0.1.41.dist-info/top_level.txt +21 -0
validate_links.py +213 -0
rolfedh_doc_utils-0.1.4.dist-info/METADATA +0 -285
rolfedh_doc_utils-0.1.4.dist-info/RECORD +0 -17
rolfedh_doc_utils-0.1.4.dist-info/entry_points.txt +0 -5
rolfedh_doc_utils-0.1.4.dist-info/top_level.txt +0 -5
{rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/licenses/LICENSE +0 -0

extract_link_attributes.py ADDED Viewed

@@ -0,0 +1,120 @@
+#!/usr/bin/env python3
+"""
+Extract link and xref macros containing attributes into attribute definitions.
+This tool finds all link: and xref: macros whose URLs contain attributes,
+creates attribute definitions for them, and replaces the macros with
+attribute references.
+"""
+import argparse
+import sys
+from doc_utils.extract_link_attributes import extract_link_attributes
+from doc_utils.version_check import check_version_on_startup
+from doc_utils.version import __version__
+def main():
+    # Check for updates (non-blocking, won't interfere with tool operation)
+    check_version_on_startup()
+    """Main entry point for the extract-link-attributes CLI tool."""
+    parser = argparse.ArgumentParser(
+        description='Extract link and xref macros containing attributes into attribute definitions',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Interactive mode with auto-discovery
+  extract-link-attributes
+  # Specify attribute file
+  extract-link-attributes --attributes-file common-attributes.adoc
+  # Non-interactive mode (uses most common link text)
+  extract-link-attributes --non-interactive
+  # Dry run to preview changes
+  extract-link-attributes --dry-run
+  # Scan specific directories
+  extract-link-attributes --scan-dir modules --scan-dir assemblies
+        """
+    )
+    parser.add_argument(
+        '--attributes-file',
+        help='Path to the attributes file to update (auto-discovered if not specified)'
+    )
+    parser.add_argument(
+        '--scan-dir',
+        action='append',
+        help='Directory to scan for .adoc files (can be used multiple times, default: current directory)'
+    )
+    parser.add_argument(
+        '--non-interactive',
+        action='store_true',
+        help='Non-interactive mode: automatically use most common link text for variations'
+    )
+    parser.add_argument(
+        '--dry-run',
+        action='store_true',
+        help='Preview changes without modifying files'
+    )
+    parser.add_argument(
+        '-v', '--verbose',
+        action='store_true',
+        help='Enable verbose output'
+    )
+    parser.add_argument(
+        '--validate-links',
+        action='store_true',
+        help='Validate URLs in link-* attributes before extraction'
+    )
+    parser.add_argument(
+        '--fail-on-broken',
+        action='store_true',
+        help='Exit extraction if broken links are found in attributes (requires --validate-links)'
+    )
+    parser.add_argument(
+        '--macro-type',
+        choices=['link', 'xref', 'both'],
+        default='both',
+        help='Type of macros to process: link, xref, or both (default: both)'
+    )
+    parser.add_argument('--version', action='version', version=f'%(prog)s {__version__}')
+    args = parser.parse_args()
+    try:
+        success = extract_link_attributes(
+            attributes_file=args.attributes_file,
+            scan_dirs=args.scan_dir,
+            interactive=not args.non_interactive,
+            dry_run=args.dry_run,
+            validate_links=args.validate_links,
+            fail_on_broken=args.fail_on_broken,
+            macro_type=args.macro_type
+        )
+        if not success:
+            sys.exit(1)
+    except KeyboardInterrupt:
+        print("\nOperation cancelled.")
+        sys.exit(1)
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        if args.verbose:
+            import traceback
+            traceback.print_exc()
+        sys.exit(1)
+if __name__ == '__main__':
+    main()

find_duplicate_content.py ADDED Viewed

@@ -0,0 +1,209 @@
+"""
+Find Duplicate Content in AsciiDoc Files
+Scans AsciiDoc files for duplicate and similar content blocks including:
+- Recurring notes (NOTE, TIP, WARNING, IMPORTANT, CAUTION)
+- Tables
+- Step sequences (ordered lists)
+- Code blocks
+This tool helps identify content that could be refactored into reusable components.
+"""
+import argparse
+import os
+import sys
+from datetime import datetime
+from doc_utils.duplicate_content import (
+    find_duplicates,
+    format_report,
+    generate_csv_report
+)
+from doc_utils.spinner import Spinner
+from doc_utils.version_check import check_version_on_startup
+from doc_utils.version import __version__
+def main():
+    # Check for updates (non-blocking, won't interfere with tool operation)
+    check_version_on_startup()
+    parser = argparse.ArgumentParser(
+        description='Find duplicate and similar content in AsciiDoc files.',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  find-duplicate-content                     # Scan current directory, write txt report
+  find-duplicate-content ./docs              # Scan specific directory
+  find-duplicate-content -t note -t table    # Find only notes and tables
+  find-duplicate-content -s 0.7              # Include 70%+ similar content
+  find-duplicate-content --format csv        # Write CSV report to ./reports/
+  find-duplicate-content --no-output         # Display results without saving report
+        """
+    )
+    parser.add_argument(
+        'directory',
+        nargs='?',
+        default='.',
+        help='Directory to scan (default: current directory)'
+    )
+    parser.add_argument(
+        '-t', '--type',
+        dest='block_types',
+        action='append',
+        choices=['note', 'tip', 'warning', 'important', 'caution', 'table', 'steps', 'code'],
+        help='Block types to search for (can be specified multiple times). Default: all types'
+    )
+    parser.add_argument(
+        '-s', '--similarity',
+        type=float,
+        default=0.8,
+        metavar='THRESHOLD',
+        help='Minimum similarity threshold (0.0-1.0). Default: 0.8'
+    )
+    parser.add_argument(
+        '-m', '--min-length',
+        type=int,
+        default=50,
+        metavar='CHARS',
+        help='Minimum content length to consider. Default: 50 characters'
+    )
+    parser.add_argument(
+        '--exact-only',
+        action='store_true',
+        help='Only find exact duplicates (sets similarity to 1.0)'
+    )
+    parser.add_argument(
+        '-e', '--exclude-dir',
+        dest='exclude_dirs',
+        action='append',
+        default=[],
+        metavar='DIR',
+        help='Directory to exclude (can be specified multiple times)'
+    )
+    parser.add_argument(
+        '--no-content',
+        action='store_true',
+        help='Hide content preview in output'
+    )
+    parser.add_argument(
+        '--no-output',
+        action='store_true',
+        help='Do not write report to ./reports/ directory (report is written by default)'
+    )
+    parser.add_argument(
+        '--format',
+        choices=['txt', 'csv', 'json', 'md'],
+        default='txt',
+        help='Output format (default: txt)'
+    )
+    parser.add_argument(
+        '--version',
+        action='version',
+        version=f'%(prog)s {__version__}'
+    )
+    args = parser.parse_args()
+    # Validate arguments
+    if not os.path.isdir(args.directory):
+        print(f"Error: '{args.directory}' is not a valid directory")
+        return 1
+    if args.similarity < 0 or args.similarity > 1:
+        print("Error: Similarity threshold must be between 0.0 and 1.0")
+        return 1
+    # Set up parameters
+    similarity = 1.0 if args.exact_only else args.similarity
+    exclude_dirs = ['.git', '.archive', 'target', 'build', 'node_modules'] + args.exclude_dirs
+    # Build command line options summary
+    cmd_options = ['find-duplicate-content']
+    if args.directory != '.':
+        cmd_options.append(args.directory)
+    if args.block_types:
+        for bt in args.block_types:
+            cmd_options.append(f'-t {bt}')
+    if args.exact_only:
+        cmd_options.append('--exact-only')
+    elif args.similarity != 0.8:
+        cmd_options.append(f'-s {args.similarity}')
+    if args.min_length != 50:
+        cmd_options.append(f'-m {args.min_length}')
+    for ed in args.exclude_dirs:
+        cmd_options.append(f'-e {ed}')
+    if args.no_content:
+        cmd_options.append('--no-content')
+    if args.no_output:
+        cmd_options.append('--no-output')
+    if args.format != 'txt':
+        cmd_options.append(f'--format {args.format}')
+    cmd_line = ' '.join(cmd_options)
+    # Run analysis
+    spinner = Spinner(f"Scanning AsciiDoc files in {args.directory}")
+    spinner.start()
+    try:
+        duplicate_groups = find_duplicates(
+            root_dir=args.directory,
+            min_similarity=similarity,
+            min_content_length=args.min_length,
+            exclude_dirs=exclude_dirs,
+            block_types=args.block_types
+        )
+    except Exception as e:
+        spinner.stop()
+        print(f"Error: {e}")
+        return 1
+    spinner.stop(f"Found {len(duplicate_groups)} groups of duplicate content")
+    # Print command line options used
+    print(f"\nCommand: {cmd_line}")
+    print(f"Directory: {os.path.abspath(args.directory)}\n")
+    # Generate report based on format
+    if args.format == 'csv':
+        report = generate_csv_report(duplicate_groups)
+    else:
+        report = format_report(
+            duplicate_groups,
+            show_content=not args.no_content
+        )
+    print(report)
+    # Write report file by default (unless --no-output)
+    if not args.no_output and duplicate_groups:
+        reports_dir = './reports'
+        os.makedirs(reports_dir, exist_ok=True)
+        timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
+        filename = f'{reports_dir}/duplicate-content_{timestamp}.{args.format}'
+        with open(filename, 'w', encoding='utf-8') as f:
+            f.write(f"Duplicate content report\n")
+            f.write(f"Command: {cmd_line}\n")
+            f.write(f"Directory: {os.path.abspath(args.directory)}\n")
+            f.write(f"Generated: {datetime.now().isoformat()}\n\n")
+            f.write(report)
+        print(f"\nReport written to: {filename}")
+    return 0
+if __name__ == '__main__':
+    sys.exit(main())

find_duplicate_includes.py ADDED Viewed

@@ -0,0 +1,198 @@
+#!/usr/bin/env python3
+"""
+Find AsciiDoc files that are included more than once.
+Scans AsciiDoc files for include:: macros and identifies files that are
+included from multiple locations, helping identify opportunities for
+content reuse or potential maintenance issues.
+Usage:
+    find-duplicate-includes [directory] [options]
+"""
+import argparse
+import os
+import sys
+from datetime import datetime
+from doc_utils.duplicate_includes import (
+    DEFAULT_COMMON_INCLUDES,
+    DEFAULT_EXCLUDE_DIRS,
+    find_duplicate_includes,
+    format_txt_report,
+    format_csv_report,
+    format_json_report,
+    format_md_report,
+)
+def build_cmd_line(args: argparse.Namespace) -> str:
+    """Reconstruct the command line for display."""
+    parts = ['find-duplicate-includes']
+    if args.directory != '.':
+        parts.append(args.directory)
+    if args.include_common:
+        parts.append('--include-common')
+    for d in (args.exclude_dir or []):
+        parts.append(f'-e {d}')
+    for f in (args.exclude_file or []):
+        parts.append(f'--exclude-file {f}')
+    if args.format != 'txt':
+        parts.append(f'--format {args.format}')
+    if args.no_output:
+        parts.append('--no-output')
+    return ' '.join(parts)
+def main():
+    parser = argparse.ArgumentParser(
+        description='Find AsciiDoc files that are included more than once.',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Scan current directory
+  find-duplicate-includes
+  # Scan a specific directory
+  find-duplicate-includes ./docs
+  # Include common files (attributes.adoc, etc.) in results
+  find-duplicate-includes --include-common
+  # Exclude specific directories
+  find-duplicate-includes -e archive -e drafts
+  # Generate CSV report
+  find-duplicate-includes --format csv
+  # Display only, no report file
+  find-duplicate-includes --no-output
+"""
+    )
+    parser.add_argument(
+        'directory',
+        nargs='?',
+        default='.',
+        help='Directory to scan (default: current directory)'
+    )
+    parser.add_argument(
+        '--include-common',
+        action='store_true',
+        help='Include common files (attributes.adoc, etc.) in results'
+    )
+    parser.add_argument(
+        '-e', '--exclude-dir',
+        action='append',
+        metavar='DIR',
+        help='Directory to exclude (can be repeated)'
+    )
+    parser.add_argument(
+        '--exclude-file',
+        action='append',
+        metavar='FILE',
+        help='File to exclude (can be repeated)'
+    )
+    parser.add_argument(
+        '--no-output',
+        action='store_true',
+        help='Do not write report file (stdout only)'
+    )
+    parser.add_argument(
+        '--format',
+        choices=['txt', 'csv', 'json', 'md'],
+        default='txt',
+        help='Output format (default: txt)'
+    )
+    args = parser.parse_args()
+    # Validate directory
+    if not os.path.isdir(args.directory):
+        print(f"Error: '{args.directory}' is not a valid directory", file=sys.stderr)
+        sys.exit(1)
+    # Build exclusion sets
+    exclude_dirs = set(DEFAULT_EXCLUDE_DIRS)
+    if args.exclude_dir:
+        exclude_dirs.update(args.exclude_dir)
+    exclude_files = set()
+    if args.exclude_file:
+        exclude_files.update(args.exclude_file)
+    # Build command line for display
+    cmd_line = build_cmd_line(args)
+    # Find duplicates
+    duplicates, total_files, excluded_common = find_duplicate_includes(
+        directory=args.directory,
+        exclude_dirs=exclude_dirs,
+        exclude_files=exclude_files,
+        include_common=args.include_common,
+        common_includes=DEFAULT_COMMON_INCLUDES
+    )
+    # Format report
+    formatters = {
+        'txt': format_txt_report,
+        'csv': format_csv_report,
+        'json': format_json_report,
+        'md': format_md_report,
+    }
+    formatter = formatters[args.format]
+    report = formatter(duplicates, total_files, excluded_common, args.directory, cmd_line)
+    # Output summary to stdout
+    if duplicates:
+        print(f"\n\u2713 Found {len(duplicates)} files included more than once")
+    else:
+        if excluded_common:
+            print(f"\n\u2713 No unexpected duplicates found ({excluded_common} common files excluded)")
+        else:
+            print("\n\u2713 No files are included more than once")
+    print(f"\nCommand: {cmd_line}")
+    print(f"Directory: {os.path.abspath(args.directory)}")
+    print(f"Files scanned: {total_files}\n")
+    # Print report content
+    if args.format == 'txt':
+        # Skip header lines already printed
+        lines = report.split('\n')
+        # Find where the actual results start (after the header)
+        start = 0
+        for i, line in enumerate(lines):
+            if line.startswith('=') or line.startswith('No ') or line.startswith('Found '):
+                start = i
+                break
+        print('\n'.join(lines[start:]))
+    else:
+        print(report)
+    # Write report file
+    if not args.no_output and duplicates:
+        reports_dir = './reports'
+        os.makedirs(reports_dir, exist_ok=True)
+        timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
+        filename = f'{reports_dir}/duplicate-includes_{timestamp}.{args.format}'
+        with open(filename, 'w', encoding='utf-8') as f:
+            f.write(report)
+        print(f"\nReport written to: {filename}")
+    return 1 if duplicates else 0
+if __name__ == '__main__':
+    sys.exit(main())

find_unused_attributes.py CHANGED Viewed

@@ -1,23 +1,74 @@
 """
 Find Unused AsciiDoc Attributes
-Scans a user-specified attributes file (e.g., attributes.adoc) for attribute definitions (e.g., :version: 1.1), then recursively scans all .adoc files in the current directory (ignoring symlinks) for usages of those attributes (e.g., {version}).
+Scans an attributes file for attribute definitions (e.g., :version: 1.1), then recursively scans all .adoc files in the current directory (ignoring symlinks) for usages of those attributes (e.g., {version}).
+If no attributes file is specified, the tool will auto-discover attributes files in the repository and let you choose one interactively.
 Any attribute defined but not used in any .adoc file is reported as NOT USED in both the command line output and a timestamped output file.
 """
 import argparse
 import os
+import sys
 from datetime import datetime
-from doc_utils.unused_attributes import find_unused_attributes
+from doc_utils.unused_attributes import find_unused_attributes, find_attributes_files, select_attributes_file, comment_out_unused_attributes, remove_unused_attributes
+from doc_utils.spinner import Spinner
+from doc_utils.version_check import check_version_on_startup
+from doc_utils.version import __version__
 def main():
+    # Check for updates (non-blocking, won't interfere with tool operation)
+    check_version_on_startup()
     parser = argparse.ArgumentParser(description='Find unused AsciiDoc attributes.')
-    parser.add_argument('attributes_file', help='Path to the attributes.adoc file to scan for attribute definitions.')
+    parser.add_argument(
+        'attributes_file',
+        nargs='?',  # Make it optional
+        help='Path to the attributes file. If not specified, auto-discovers attributes files.'
+    )
     parser.add_argument('-o', '--output', action='store_true', help='Write results to a timestamped txt file in your home directory.')
+    parser.add_argument('-c', '--comment-out', action='store_true', help='Comment out unused attributes in the attributes file with "// Unused".')
+    parser.add_argument('-r', '--remove', action='store_true', help='Remove unused attributes from the attributes file. Also removes lines already marked with "// Unused".')
+    parser.add_argument('--version', action='version', version=f'%(prog)s {__version__}')
     args = parser.parse_args()
-    unused = find_unused_attributes(args.attributes_file, '.')
+    # Determine which attributes file to use
+    if args.attributes_file:
+        # User specified a file
+        attr_file = args.attributes_file
+    else:
+        # Auto-discover attributes files
+        spinner = Spinner("Searching for attributes files")
+        spinner.start()
+        attributes_files = find_attributes_files('.')
+        spinner.stop()
+        if not attributes_files:
+            print("No attributes files found in the repository.")
+            print("You can specify a file directly: find-unused-attributes <path-to-attributes-file>")
+            return 1
+        attr_file = select_attributes_file(attributes_files)
+        if not attr_file:
+            print("No attributes file selected.")
+            return 1
+    try:
+        spinner = Spinner(f"Analyzing attributes in {os.path.basename(attr_file)}")
+        spinner.start()
+        unused = find_unused_attributes(attr_file, '.')
+        spinner.stop(f"Found {len(unused)} unused attributes")
+    except FileNotFoundError as e:
+        print(f"Error: {e}")
+        print(f"\nPlease ensure the file '{attr_file}' exists.")
+        print("Usage: find-unused-attributes [<path-to-attributes-file>]")
+        return 1
+    except (ValueError, PermissionError) as e:
+        print(f"Error: {e}")
+        return 1
+    except Exception as e:
+        print(f"Unexpected error: {e}")
+        return 1
     lines = [f":{attr}:  NOT USED" for attr in unused]
     output = '\n'.join(lines)
@@ -33,9 +84,36 @@ def main():
         home_dir = os.path.expanduser('~')
         filename = os.path.join(home_dir, f'unused_attributes_{timestamp}.txt')
         with open(filename, 'w', encoding='utf-8') as f:
-            f.write('Unused attributes in ' + args.attributes_file + '\n')
+            f.write('Unused attributes in ' + attr_file + '\n')
             f.write(output + '\n')
         print(f'Results written to: {filename}')
+    if args.comment_out and output:
+        # Ask for confirmation before modifying the file
+        print(f'\nThis will comment out {len(unused)} unused attributes in: {attr_file}')
+        response = input('Continue? (y/n): ').strip().lower()
+        if response == 'y':
+            commented_count = comment_out_unused_attributes(attr_file, unused)
+            print(f'Commented out {commented_count} unused attributes in: {attr_file}')
+        else:
+            print('Operation cancelled.')
+    if args.remove:
+        # Ask for confirmation before modifying the file
+        if output:
+            print(f'\nThis will remove {len(unused)} unused attributes from: {attr_file}')
+            print('(Also removes any lines already marked with "// Unused")')
+        else:
+            print(f'\nThis will remove lines marked with "// Unused" from: {attr_file}')
+        response = input('Continue? (y/n): ').strip().lower()
+        if response == 'y':
+            removed_count = remove_unused_attributes(attr_file, unused if output else None)
+            print(f'Removed {removed_count} lines from: {attr_file}')
+        else:
+            print('Operation cancelled.')
+    return 0
 if __name__ == '__main__':
-    main()
+    import sys
+    sys.exit(main())

rolfedh-doc-utils 0.1.4__py3-none-any.whl → 0.1.41__py3-none-any.whl

rolfedh-doc-utils 0.1.4py3-none-any.whl → 0.1.41py3-none-any.whl