PyPI - rolfedh-doc-utils - Versions diffs - 0.1.34__tar.gz → 0.1.35__tar.gz - Mend

rolfedh-doc-utils 0.1.34tar.gz → 0.1.35tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

{rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rolfedh-doc-utils
-Version: 0.1.34
+Version: 0.1.35
 Summary: CLI tools for AsciiDoc documentation projects
 Author: Rolfe Dlugy-Hegwer
 License: MIT License

{rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/archive_unused_files.py RENAMED Viewed

@@ -22,6 +22,7 @@ def main():
         epilog='By default, automatically discovers all modules and assemblies directories in the repository.'
     )
     parser.add_argument('--archive', action='store_true', help='Move the files to a dated zip in the archive directory.')
+    parser.add_argument('--commented', action='store_true', help='Include files that are referenced only in commented lines in the archive operation.')
     parser.add_argument('--scan-dir', action='append', default=[], help='Specific directory to scan (can be used multiple times). If not specified, auto-discovers directories.')
     parser.add_argument('--exclude-dir', action='append', default=[], help='Directory to exclude (can be used multiple times).')
     parser.add_argument('--exclude-file', action='append', default=[], help='File to exclude (can be used multiple times).')
@@ -35,13 +36,13 @@ def main():
     exclude_dirs = list(args.exclude_dir)
     exclude_files = list(args.exclude_file)
     if args.exclude_list:
         list_dirs, list_files = parse_exclude_list_file(args.exclude_list)
         exclude_dirs.extend(list_dirs)
         exclude_files.extend(list_files)
-    find_unused_adoc(scan_dirs, archive_dir, args.archive, exclude_dirs, exclude_files)
+    find_unused_adoc(scan_dirs, archive_dir, args.archive, exclude_dirs, exclude_files, args.commented)
 if __name__ == '__main__':
     main()

{rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/archive_unused_images.py RENAMED Viewed

@@ -18,6 +18,7 @@ def main():
     check_version_on_startup()
     parser = argparse.ArgumentParser(description='Archive unused image files.')
     parser.add_argument('--archive', action='store_true', help='Move the files to a dated zip in the archive directory.')
+    parser.add_argument('--commented', action='store_true', help='Include images that are referenced only in commented lines in the archive operation.')
     parser.add_argument('--exclude-dir', action='append', default=[], help='Directory to exclude (can be used multiple times).')
     parser.add_argument('--exclude-file', action='append', default=[], help='File to exclude (can be used multiple times).')
     parser.add_argument('--exclude-list', type=str, help='Path to a file containing directories or files to exclude, one per line.')
@@ -29,13 +30,13 @@ def main():
     exclude_dirs = list(args.exclude_dir)
     exclude_files = list(args.exclude_file)
     if args.exclude_list:
         list_dirs, list_files = parse_exclude_list_file(args.exclude_list)
         exclude_dirs.extend(list_dirs)
         exclude_files.extend(list_files)
-    find_unused_images(scan_dirs, archive_dir, args.archive, exclude_dirs, exclude_files)
+    find_unused_images(scan_dirs, archive_dir, args.archive, exclude_dirs, exclude_files, args.commented)
 if __name__ == '__main__':
     main()

rolfedh_doc_utils-0.1.35/doc_utils/unused_adoc.py ADDED Viewed

@@ -0,0 +1,181 @@
+# doc_utils/unused_adoc.py
+import os
+import re
+from .file_utils import collect_files, write_manifest_and_archive
+from .topic_map_parser import detect_repo_type, get_all_topic_map_references
+def find_scan_directories(base_path='.', exclude_dirs=None):
+    """
+    Automatically find all 'modules' and 'assemblies' directories in the repository.
+    Returns a list of paths to scan.
+    """
+    scan_dirs = []
+    exclude_dirs = exclude_dirs or []
+    for root, dirs, files in os.walk(base_path):
+        # Skip symbolic links to prevent issues
+        dirs[:] = [d for d in dirs if not os.path.islink(os.path.join(root, d))]
+        # Skip excluded directories
+        for exclude_dir in exclude_dirs:
+            abs_exclude = os.path.abspath(exclude_dir)
+            if os.path.abspath(root).startswith(abs_exclude):
+                dirs[:] = []  # Don't descend into excluded directories
+                break
+        # Skip hidden directories and common non-content directories
+        dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['node_modules', 'build', 'dist', 'target']]
+        # Look for modules and assemblies directories
+        for d in dirs:
+            if d in ['modules', 'assemblies']:
+                dir_path = os.path.join(root, d)
+                # Check if this directory or any subdirectory contains .adoc files
+                has_adoc = False
+                for subroot, subdirs, subfiles in os.walk(dir_path):
+                    # Skip symbolic links
+                    subdirs[:] = [sd for sd in subdirs if not os.path.islink(os.path.join(subroot, sd))]
+                    if any(f.endswith('.adoc') for f in subfiles):
+                        has_adoc = True
+                        break
+                if has_adoc:
+                    scan_dirs.append(dir_path)
+    # Also check for modules/rn pattern if modules exists
+    modules_dirs = [d for d in scan_dirs if os.path.basename(d) == 'modules']
+    for modules_dir in modules_dirs:
+        rn_dir = os.path.join(modules_dir, 'rn')
+        if os.path.isdir(rn_dir):
+            # Check if rn directory or subdirectories contain .adoc files
+            has_adoc = False
+            for subroot, subdirs, subfiles in os.walk(rn_dir):
+                subdirs[:] = [sd for sd in subdirs if not os.path.islink(os.path.join(subroot, sd))]
+                if any(f.endswith('.adoc') for f in subfiles):
+                    has_adoc = True
+                    break
+            if has_adoc:
+                scan_dirs.append(rn_dir)
+    return scan_dirs
+def find_unused_adoc(scan_dirs=None, archive_dir='./archive', archive=False, exclude_dirs=None, exclude_files=None, include_commented=False):
+    # Print safety warning
+    print("\n⚠️  SAFETY: Work in a git branch! Run without --archive first to preview.\n")
+    # If no scan_dirs provided, auto-discover them
+    if not scan_dirs:
+        scan_dirs = find_scan_directories(exclude_dirs=exclude_dirs)
+        if scan_dirs:
+            print(f"Auto-discovered directories to scan:")
+            for dir_path in sorted(scan_dirs):
+                print(f"  - {dir_path}")
+        else:
+            print("No 'modules' or 'assemblies' directories found containing .adoc files.")
+            print("Please run this tool from your documentation repository root.")
+            return
+    # Detect repository type
+    repo_type = detect_repo_type()
+    print(f"Detected repository type: {repo_type}")
+    # Collect all .adoc files in scan directories
+    asciidoc_files = collect_files(scan_dirs, {'.adoc'}, exclude_dirs, exclude_files)
+    # Track which files are referenced (uncommented and commented separately)
+    referenced_files = set()  # Files in uncommented includes
+    commented_only_files = {}  # Files referenced ONLY in commented lines: {basename: [(file, line_num, line_text)]}
+    if repo_type == 'topic_map':
+        # For OpenShift-docs style repos, get references from topic maps
+        topic_references = get_all_topic_map_references()
+        # Convert to basenames for comparison
+        referenced_files.update(os.path.basename(ref) for ref in topic_references)
+    # Patterns for finding includes (both commented and uncommented)
+    include_pattern = re.compile(r'include::(.+?)\[')
+    commented_include_pattern = re.compile(r'^\s*//.*include::(.+?)\[')
+    adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
+    for file_path in adoc_files:
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                lines = f.readlines()
+                for line_num, line in enumerate(lines, 1):
+                    # Check if this is a commented include
+                    commented_match = commented_include_pattern.search(line)
+                    if commented_match:
+                        include_basename = os.path.basename(commented_match.group(1))
+                        # Track location of commented reference
+                        if include_basename not in commented_only_files:
+                            commented_only_files[include_basename] = []
+                        commented_only_files[include_basename].append((file_path, line_num, line.strip()))
+                    else:
+                        # Check for uncommented includes
+                        uncommented_match = include_pattern.search(line)
+                        if uncommented_match:
+                            include_basename = os.path.basename(uncommented_match.group(1))
+                            referenced_files.add(include_basename)
+                            # If we found an uncommented reference, remove from commented_only tracking
+                            if include_basename in commented_only_files:
+                                del commented_only_files[include_basename]
+        except Exception as e:
+            print(f"Warning: could not read {file_path}: {e}")
+    # Determine which files are unused based on the include_commented flag
+    if include_commented:
+        # When --commented is used: treat files with commented-only references as unused
+        # Only files with uncommented references are considered "used"
+        unused_files = [f for f in asciidoc_files if os.path.basename(f) not in referenced_files]
+        commented_only_unused = []
+    else:
+        # Default behavior: files referenced only in commented lines are considered "used"
+        # They should NOT be in the unused list, but we track them for reporting
+        all_referenced = referenced_files.union(set(commented_only_files.keys()))
+        unused_files = [f for f in asciidoc_files if os.path.basename(f) not in all_referenced]
+        # Generate list of files referenced only in comments for the report
+        commented_only_unused = []
+        for basename, references in commented_only_files.items():
+            # Find the full path for this basename in asciidoc_files
+            matching_files = [f for f in asciidoc_files if os.path.basename(f) == basename]
+            for f in matching_files:
+                commented_only_unused.append((f, references))
+    unused_files = list(dict.fromkeys(unused_files))  # Remove duplicates
+    # Print summary
+    print(f"Found {len(unused_files)} unused files out of {len(asciidoc_files)} total files in scan directories")
+    # Generate detailed report for commented-only references
+    if commented_only_unused and not include_commented:
+        report_path = os.path.join(archive_dir, 'commented-references-report.txt')
+        os.makedirs(archive_dir, exist_ok=True)
+        with open(report_path, 'w', encoding='utf-8') as report:
+            report.write("Files Referenced Only in Commented Lines\n")
+            report.write("=" * 70 + "\n\n")
+            report.write(f"Found {len(commented_only_unused)} files that are referenced only in commented-out includes.\n")
+            report.write("These files are considered 'used' by default and will NOT be archived.\n\n")
+            report.write("To archive these files along with other unused files, use the --commented flag.\n\n")
+            report.write("-" * 70 + "\n\n")
+            for file_path, references in sorted(commented_only_unused):
+                report.write(f"File: {file_path}\n")
+                report.write(f"Referenced in {len(references)} commented line(s):\n")
+                for ref_file, line_num, line_text in references:
+                    report.write(f"  {ref_file}:{line_num}\n")
+                    report.write(f"    {line_text}\n")
+                report.write("\n")
+        print(f"\n📋 Found {len(commented_only_unused)} files referenced only in commented lines.")
+        print(f"   Detailed report saved to: {report_path}")
+        print(f"   These files are considered 'used' and will NOT be archived by default.")
+        print(f"   To include them in the archive operation, use the --commented flag.\n")
+    return write_manifest_and_archive(
+        unused_files, archive_dir, 'to-archive', 'to-archive', archive=archive
+    )

rolfedh_doc_utils-0.1.35/doc_utils/unused_images.py ADDED Viewed

@@ -0,0 +1,103 @@
+# doc_utils/unused_images.py
+import os
+import re
+from .file_utils import collect_files, write_manifest_and_archive
+IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.svg'}
+def find_unused_images(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None, include_commented=False):
+    # Print safety warning
+    print("\n⚠️  SAFETY: Work in a git branch! Run without --archive first to preview.\n")
+    image_files = collect_files(scan_dirs, IMAGE_EXTENSIONS, exclude_dirs, exclude_files)
+    adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
+    # Track which images are referenced (uncommented and commented separately)
+    referenced_images = set()  # Images in uncommented references
+    commented_only_images = {}  # Images referenced ONLY in commented lines: {basename: [(file, line_num, line_text)]}
+    # Patterns for finding image references (both commented and uncommented)
+    image_ref_pattern = re.compile(r'(?i)image::([^\[]+)[\[]|image:([^\[]+)[\[]|"([^"\s]+\.(?:png|jpg|jpeg|gif|svg))"')
+    commented_line_pattern = re.compile(r'^\s*//')
+    for adoc_file in adoc_files:
+        try:
+            with open(adoc_file, 'r', encoding='utf-8') as f:
+                lines = f.readlines()
+                for line_num, line in enumerate(lines, 1):
+                    # Check if this line is commented
+                    is_commented = commented_line_pattern.match(line)
+                    # Find all image references in the line
+                    for match in image_ref_pattern.findall(line):
+                        for group in match:
+                            if group:
+                                image_basename = os.path.basename(group)
+                                if is_commented:
+                                    # Track location of commented reference
+                                    if image_basename not in commented_only_images:
+                                        commented_only_images[image_basename] = []
+                                    commented_only_images[image_basename].append((adoc_file, line_num, line.strip()))
+                                else:
+                                    # Add to uncommented references
+                                    referenced_images.add(image_basename)
+                                    # If we found an uncommented reference, remove from commented_only tracking
+                                    if image_basename in commented_only_images:
+                                        del commented_only_images[image_basename]
+        except Exception as e:
+            print(f"Warning: could not read {adoc_file}: {e}")
+    # Determine which images are unused based on the include_commented flag
+    if include_commented:
+        # When --commented is used: treat images with commented-only references as unused
+        # Only images with uncommented references are considered "used"
+        unused_images = [f for f in image_files if os.path.basename(f) not in referenced_images]
+        commented_only_unused = []
+    else:
+        # Default behavior: images referenced only in commented lines are considered "used"
+        # They should NOT be in the unused list, but we track them for reporting
+        all_referenced = referenced_images.union(set(commented_only_images.keys()))
+        unused_images = [f for f in image_files if os.path.basename(f) not in all_referenced]
+        # Generate list of images referenced only in comments for the report
+        commented_only_unused = []
+        for basename, references in commented_only_images.items():
+            # Find the full path for this basename in image_files
+            matching_files = [f for f in image_files if os.path.basename(f) == basename]
+            for f in matching_files:
+                commented_only_unused.append((f, references))
+    unused_images = list(dict.fromkeys(unused_images))
+    # Generate detailed report for commented-only references
+    if commented_only_unused and not include_commented:
+        report_path = os.path.join(archive_dir, 'commented-image-references-report.txt')
+        os.makedirs(archive_dir, exist_ok=True)
+        with open(report_path, 'w', encoding='utf-8') as report:
+            report.write("Images Referenced Only in Commented Lines\n")
+            report.write("=" * 70 + "\n\n")
+            report.write(f"Found {len(commented_only_unused)} images that are referenced only in commented-out lines.\n")
+            report.write("These images are considered 'used' by default and will NOT be archived.\n\n")
+            report.write("To archive these images along with other unused images, use the --commented flag.\n\n")
+            report.write("-" * 70 + "\n\n")
+            for file_path, references in sorted(commented_only_unused):
+                report.write(f"Image: {file_path}\n")
+                report.write(f"Referenced in {len(references)} commented line(s):\n")
+                for ref_file, line_num, line_text in references:
+                    report.write(f"  {ref_file}:{line_num}\n")
+                    report.write(f"    {line_text}\n")
+                report.write("\n")
+        print(f"\n📋 Found {len(commented_only_unused)} images referenced only in commented lines.")
+        print(f"   Detailed report saved to: {report_path}")
+        print(f"   These images are considered 'used' and will NOT be archived by default.")
+        print(f"   To include them in the archive operation, use the --commented flag.\n")
+    return write_manifest_and_archive(
+        unused_images, archive_dir, 'unused-images', 'unused-images', archive=archive
+    )

{rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/doc_utils/version.py RENAMED Viewed

@@ -1,7 +1,7 @@
 """Version information for doc-utils."""
 # This should match the version in pyproject.toml
-__version__ = "0.1.34"
+__version__ = "0.1.35"
 def get_version():
     """Return the current version string."""

{rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "rolfedh-doc-utils"
-version = "0.1.34"
+version = "0.1.35"
 description = "CLI tools for AsciiDoc documentation projects"
 readme = "README.md"
 requires-python = ">=3.8"

{rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/rolfedh_doc_utils.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rolfedh-doc-utils
-Version: 0.1.34
+Version: 0.1.35
 Summary: CLI tools for AsciiDoc documentation projects
 Author: Rolfe Dlugy-Hegwer
 License: MIT License

rolfedh_doc_utils-0.1.35/tests/test_archive_unused_files.py ADDED Viewed

@@ -0,0 +1,119 @@
+import os
+import sys
+sys.path.insert(0, os.path.dirname(__file__))
+import tempfile
+import shutil
+import pytest
+from test_fixture_archive_unused_files import setup_test_fixture_archive_unused_files
+import subprocess
+SCRIPT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'archive_unused_files.py'))
+def run_script(args, cwd):
+    result = subprocess.run([sys.executable, SCRIPT] + args, cwd=cwd, capture_output=True, text=True)
+    return result
+def test_archive_unused_files_basic():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        setup_test_fixture_archive_unused_files(tmpdir)
+        result = run_script([], cwd=tmpdir)
+        assert 'unused1.adoc' in result.stdout
+        assert 'unused2.adoc' in result.stdout
+        assert 'used.adoc' not in result.stdout
+        # Manifest file should be created in archive
+        archive_dir = os.path.join(tmpdir, 'archive')
+        manifest_files = [f for f in os.listdir(archive_dir) if f.startswith('to-archive-') and f.endswith('.txt')]
+        assert manifest_files
+@pytest.mark.parametrize('exclude_args,should_find', [
+    (['--exclude-file', './modules/unused1.adoc'], 'unused2.adoc'),
+    (['--exclude-dir', './modules'], None),
+])
+def test_archive_unused_files_exclusions(exclude_args, should_find):
+    with tempfile.TemporaryDirectory() as tmpdir:
+        setup_test_fixture_archive_unused_files(tmpdir)
+        result = run_script(exclude_args, cwd=tmpdir)
+        archive_dir = os.path.join(tmpdir, 'archive')
+        manifest_files = [f for f in os.listdir(archive_dir) if f.startswith('to-archive-') and f.endswith('.txt')]
+        assert manifest_files
+        manifest_path = os.path.join(archive_dir, manifest_files[0])
+        with open(manifest_path, 'r', encoding='utf-8') as f:
+            manifest_content = f.read()
+        if should_find:
+            assert should_find in manifest_content
+        else:
+            # If all modules are excluded, no unused files from './modules' should be present
+            assert 'modules/unused1.adoc' not in manifest_content
+            assert 'modules/unused2.adoc' not in manifest_content
+def test_archive_unused_files_commented_references():
+    """Test that files referenced only in commented lines are tracked correctly"""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # Create test structure
+        os.makedirs(os.path.join(tmpdir, 'modules'), exist_ok=True)
+        os.makedirs(os.path.join(tmpdir, 'assemblies'), exist_ok=True)
+        os.makedirs(os.path.join(tmpdir, 'archive'), exist_ok=True)
+        # Create files
+        with open(os.path.join(tmpdir, 'modules', 'commented-only.adoc'), 'w') as f:
+            f.write('This file is only referenced in comments.\n')
+        with open(os.path.join(tmpdir, 'modules', 'truly-unused.adoc'), 'w') as f:
+            f.write('This file has no references at all.\n')
+        with open(os.path.join(tmpdir, 'modules', 'used.adoc'), 'w') as f:
+            f.write('This file is properly used.\n')
+        with open(os.path.join(tmpdir, 'assemblies', 'master.adoc'), 'w') as f:
+            f.write('include::../modules/used.adoc[]\n')
+            f.write('// include::../modules/commented-only.adoc[]\n')
+        # Run without --commented flag
+        result = run_script([], cwd=tmpdir)
+        # Check that commented-only file is NOT in unused list (considered "used")
+        assert 'commented-only.adoc' not in result.stdout
+        # Check that truly unused file IS in the list
+        assert 'truly-unused.adoc' in result.stdout
+        # Check that report was generated
+        archive_dir = os.path.join(tmpdir, 'archive')
+        report_path = os.path.join(archive_dir, 'commented-references-report.txt')
+        assert os.path.exists(report_path), "Commented references report should be created"
+        with open(report_path, 'r') as f:
+            report_content = f.read()
+            assert 'commented-only.adoc' in report_content
+            assert 'master.adoc' in report_content
+def test_archive_unused_files_with_commented_flag():
+    """Test that --commented flag includes files with commented-only references"""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # Create test structure
+        os.makedirs(os.path.join(tmpdir, 'modules'), exist_ok=True)
+        os.makedirs(os.path.join(tmpdir, 'assemblies'), exist_ok=True)
+        os.makedirs(os.path.join(tmpdir, 'archive'), exist_ok=True)
+        # Create files
+        with open(os.path.join(tmpdir, 'modules', 'commented-only.adoc'), 'w') as f:
+            f.write('This file is only referenced in comments.\n')
+        with open(os.path.join(tmpdir, 'modules', 'truly-unused.adoc'), 'w') as f:
+            f.write('This file has no references at all.\n')
+        with open(os.path.join(tmpdir, 'modules', 'used.adoc'), 'w') as f:
+            f.write('This file is properly used.\n')
+        with open(os.path.join(tmpdir, 'assemblies', 'master.adoc'), 'w') as f:
+            f.write('include::../modules/used.adoc[]\n')
+            f.write('// include::../modules/commented-only.adoc[]\n')
+        # Run WITH --commented flag
+        result = run_script(['--commented'], cwd=tmpdir)
+        # With --commented flag, both should be in unused list
+        assert 'commented-only.adoc' in result.stdout
+        assert 'truly-unused.adoc' in result.stdout
+        # Check that 'used.adoc' is NOT in the output (should be excluded because it has uncommented reference)
+        # Split by lines to avoid substring matching issues
+        output_lines = result.stdout.strip().split('\n')
+        assert not any(line.endswith('modules/used.adoc') or line == 'modules/used.adoc' for line in output_lines)
+        # No report should be generated when --commented is used
+        archive_dir = os.path.join(tmpdir, 'archive')
+        report_path = os.path.join(archive_dir, 'commented-references-report.txt')
+        # Report may exist from earlier, but shouldn't mention the file is "considered used"
+        # The key is that the file should be in the manifest/stdout

rolfedh_doc_utils-0.1.35/tests/test_archive_unused_images.py ADDED Viewed

@@ -0,0 +1,110 @@
+import os
+import sys
+sys.path.insert(0, os.path.dirname(__file__))
+import tempfile
+import shutil
+import pytest
+from test_fixture_archive_unused_images import setup_test_fixture
+import subprocess
+SCRIPT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'archive_unused_images.py'))
+def run_script(args, cwd):
+    result = subprocess.run([sys.executable, SCRIPT] + args, cwd=cwd, capture_output=True, text=True)
+    return result
+def test_archive_unused_images_basic():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        setup_test_fixture(tmpdir)
+        result = run_script([], cwd=tmpdir)
+        # Check that unused images are found
+        assert 'images/unused1.png' in result.stdout
+        assert 'images/unused2.jpg' in result.stdout
+        # Check that used images are not in the output
+        # Split by lines to avoid substring matching issues
+        output_lines = result.stdout.strip().split('\n')
+        assert not any('images/used1.png' in line for line in output_lines)
+        assert not any('images/used2.jpg' in line for line in output_lines)
+        # Manifest file should be created in archive
+        archive_dir = os.path.join(tmpdir, 'archive')
+        manifest_files = [f for f in os.listdir(archive_dir) if f.startswith('unused-images-') and f.endswith('.txt')]
+        assert manifest_files
+@pytest.mark.parametrize('exclude_args,should_find', [
+    (['--exclude-file', './images/unused1.png'], 'unused2.jpg'),
+    (['--exclude-dir', './images'], None),
+])
+def test_archive_unused_images_exclusions(exclude_args, should_find):
+    with tempfile.TemporaryDirectory() as tmpdir:
+        setup_test_fixture(tmpdir)
+        result = run_script(exclude_args, cwd=tmpdir)
+        if should_find:
+            assert should_find in result.stdout
+        else:
+            # If all images are excluded, no unused images from './images' should be present
+            assert 'images/unused1.png' not in result.stdout
+            assert 'images/unused2.jpg' not in result.stdout
+def test_archive_unused_images_commented_references():
+    """Test that images referenced only in commented lines are tracked correctly"""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # Create test structure
+        os.makedirs(os.path.join(tmpdir, 'images'), exist_ok=True)
+        os.makedirs(os.path.join(tmpdir, 'modules'), exist_ok=True)
+        os.makedirs(os.path.join(tmpdir, 'archive'), exist_ok=True)
+        # Create image files
+        open(os.path.join(tmpdir, 'images', 'commented-only.png'), 'w').close()
+        open(os.path.join(tmpdir, 'images', 'truly-unused.png'), 'w').close()
+        open(os.path.join(tmpdir, 'images', 'used.png'), 'w').close()
+        # Create AsciiDoc file with references
+        with open(os.path.join(tmpdir, 'modules', 'test.adoc'), 'w') as f:
+            f.write('image::../images/used.png[]\n')
+            f.write('// image::../images/commented-only.png[]\n')
+        # Run without --commented flag
+        result = run_script([], cwd=tmpdir)
+        # Check that commented-only image is NOT in unused list (considered "used")
+        assert 'commented-only.png' not in result.stdout
+        # Check that truly unused image IS in the list
+        assert 'truly-unused.png' in result.stdout
+        # Check that report was generated
+        archive_dir = os.path.join(tmpdir, 'archive')
+        report_path = os.path.join(archive_dir, 'commented-image-references-report.txt')
+        assert os.path.exists(report_path), "Commented image references report should be created"
+        with open(report_path, 'r') as f:
+            report_content = f.read()
+            assert 'commented-only.png' in report_content
+            assert 'test.adoc' in report_content
+def test_archive_unused_images_with_commented_flag():
+    """Test that --commented flag includes images with commented-only references"""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # Create test structure
+        os.makedirs(os.path.join(tmpdir, 'images'), exist_ok=True)
+        os.makedirs(os.path.join(tmpdir, 'modules'), exist_ok=True)
+        os.makedirs(os.path.join(tmpdir, 'archive'), exist_ok=True)
+        # Create image files
+        open(os.path.join(tmpdir, 'images', 'commented-only.png'), 'w').close()
+        open(os.path.join(tmpdir, 'images', 'truly-unused.png'), 'w').close()
+        open(os.path.join(tmpdir, 'images', 'used.png'), 'w').close()
+        # Create AsciiDoc file with references
+        with open(os.path.join(tmpdir, 'modules', 'test.adoc'), 'w') as f:
+            f.write('image::../images/used.png[]\n')
+            f.write('// image::../images/commented-only.png[]\n')
+        # Run WITH --commented flag
+        result = run_script(['--commented'], cwd=tmpdir)
+        # With --commented flag, both should be in unused list
+        assert 'commented-only.png' in result.stdout
+        assert 'truly-unused.png' in result.stdout
+        # Check that 'used.png' is NOT in the output (should be excluded because it has uncommented reference)
+        # Split by lines to avoid substring matching issues
+        output_lines = result.stdout.strip().split('\n')
+        assert not any(line.endswith('images/used.png') or line == 'images/used.png' for line in output_lines)

rolfedh_doc_utils-0.1.34/doc_utils/unused_adoc.py DELETED Viewed

@@ -1,120 +0,0 @@
-# doc_utils/unused_adoc.py
-import os
-import re
-from .file_utils import collect_files, write_manifest_and_archive
-from .topic_map_parser import detect_repo_type, get_all_topic_map_references
-def find_scan_directories(base_path='.', exclude_dirs=None):
-    """
-    Automatically find all 'modules' and 'assemblies' directories in the repository.
-    Returns a list of paths to scan.
-    """
-    scan_dirs = []
-    exclude_dirs = exclude_dirs or []
-    for root, dirs, files in os.walk(base_path):
-        # Skip symbolic links to prevent issues
-        dirs[:] = [d for d in dirs if not os.path.islink(os.path.join(root, d))]
-        # Skip excluded directories
-        for exclude_dir in exclude_dirs:
-            abs_exclude = os.path.abspath(exclude_dir)
-            if os.path.abspath(root).startswith(abs_exclude):
-                dirs[:] = []  # Don't descend into excluded directories
-                break
-        # Skip hidden directories and common non-content directories
-        dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['node_modules', 'build', 'dist', 'target']]
-        # Look for modules and assemblies directories
-        for d in dirs:
-            if d in ['modules', 'assemblies']:
-                dir_path = os.path.join(root, d)
-                # Check if this directory or any subdirectory contains .adoc files
-                has_adoc = False
-                for subroot, subdirs, subfiles in os.walk(dir_path):
-                    # Skip symbolic links
-                    subdirs[:] = [sd for sd in subdirs if not os.path.islink(os.path.join(subroot, sd))]
-                    if any(f.endswith('.adoc') for f in subfiles):
-                        has_adoc = True
-                        break
-                if has_adoc:
-                    scan_dirs.append(dir_path)
-    # Also check for modules/rn pattern if modules exists
-    modules_dirs = [d for d in scan_dirs if os.path.basename(d) == 'modules']
-    for modules_dir in modules_dirs:
-        rn_dir = os.path.join(modules_dir, 'rn')
-        if os.path.isdir(rn_dir):
-            # Check if rn directory or subdirectories contain .adoc files
-            has_adoc = False
-            for subroot, subdirs, subfiles in os.walk(rn_dir):
-                subdirs[:] = [sd for sd in subdirs if not os.path.islink(os.path.join(subroot, sd))]
-                if any(f.endswith('.adoc') for f in subfiles):
-                    has_adoc = True
-                    break
-            if has_adoc:
-                scan_dirs.append(rn_dir)
-    return scan_dirs
-def find_unused_adoc(scan_dirs=None, archive_dir='./archive', archive=False, exclude_dirs=None, exclude_files=None):
-    # Print safety warning
-    print("\n⚠️  SAFETY: Work in a git branch! Run without --archive first to preview.\n")
-    # If no scan_dirs provided, auto-discover them
-    if not scan_dirs:
-        scan_dirs = find_scan_directories(exclude_dirs=exclude_dirs)
-        if scan_dirs:
-            print(f"Auto-discovered directories to scan:")
-            for dir_path in sorted(scan_dirs):
-                print(f"  - {dir_path}")
-        else:
-            print("No 'modules' or 'assemblies' directories found containing .adoc files.")
-            print("Please run this tool from your documentation repository root.")
-            return
-    # Detect repository type
-    repo_type = detect_repo_type()
-    print(f"Detected repository type: {repo_type}")
-    # Collect all .adoc files in scan directories
-    asciidoc_files = collect_files(scan_dirs, {'.adoc'}, exclude_dirs, exclude_files)
-    # Track which files are referenced
-    referenced_files = set()
-    if repo_type == 'topic_map':
-        # For OpenShift-docs style repos, get references from topic maps
-        topic_references = get_all_topic_map_references()
-        # Convert to basenames for comparison
-        referenced_files.update(os.path.basename(ref) for ref in topic_references)
-    # Always scan for include:: directives in all .adoc files
-    include_pattern = re.compile(r'include::(.+?)\[')
-    adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
-    for file_path in adoc_files:
-        try:
-            with open(file_path, 'r', encoding='utf-8') as f:
-                content = f.read()
-                includes = include_pattern.findall(content)
-                # Extract just the filename from the include path
-                for include in includes:
-                    # Handle both relative and absolute includes
-                    include_basename = os.path.basename(include)
-                    referenced_files.add(include_basename)
-        except Exception as e:
-            print(f"Warning: could not read {file_path}: {e}")
-    # Find unused files by comparing basenames
-    unused_files = [f for f in asciidoc_files if os.path.basename(f) not in referenced_files]
-    unused_files = list(dict.fromkeys(unused_files))  # Remove duplicates
-    print(f"Found {len(unused_files)} unused files out of {len(asciidoc_files)} total files in scan directories")
-    return write_manifest_and_archive(
-        unused_files, archive_dir, 'to-archive', 'to-archive', archive=archive
-    )

rolfedh_doc_utils-0.1.34/doc_utils/unused_images.py DELETED Viewed

@@ -1,31 +0,0 @@
-# doc_utils/unused_images.py
-import os
-import re
-from .file_utils import collect_files, write_manifest_and_archive
-IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.svg'}
-def find_unused_images(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None):
-    # Print safety warning
-    print("\n⚠️  SAFETY: Work in a git branch! Run without --archive first to preview.\n")
-    image_files = collect_files(scan_dirs, IMAGE_EXTENSIONS, exclude_dirs, exclude_files)
-    adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
-    referenced_images = set()
-    image_ref_pattern = re.compile(r'(?i)image::([^\[]+)[\[]|image:([^\[]+)[\[]|"([^"\s]+\.(?:png|jpg|jpeg|gif|svg))"')
-    for adoc_file in adoc_files:
-        try:
-            with open(adoc_file, 'r', encoding='utf-8') as f:
-                content = f.read()
-                for match in image_ref_pattern.findall(content):
-                    for group in match:
-                        if group:
-                            referenced_images.add(os.path.basename(group))
-        except Exception as e:
-            print(f"Warning: could not read {adoc_file}: {e}")
-    unused_images = [f for f in image_files if os.path.basename(f) not in referenced_images]
-    unused_images = list(dict.fromkeys(unused_images))
-    return write_manifest_and_archive(
-        unused_images, archive_dir, 'unused-images', 'unused-images', archive=archive
-    )

rolfedh_doc_utils-0.1.34/tests/test_archive_unused_files.py DELETED Viewed

@@ -1,47 +0,0 @@
-import os
-import sys
-sys.path.insert(0, os.path.dirname(__file__))
-import tempfile
-import shutil
-import pytest
-from test_fixture_archive_unused_files import setup_test_fixture_archive_unused_files
-import subprocess
-SCRIPT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'archive_unused_files.py'))
-def run_script(args, cwd):
-    result = subprocess.run([sys.executable, SCRIPT] + args, cwd=cwd, capture_output=True, text=True)
-    return result
-def test_archive_unused_files_basic():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        setup_test_fixture_archive_unused_files(tmpdir)
-        result = run_script([], cwd=tmpdir)
-        assert 'unused1.adoc' in result.stdout
-        assert 'unused2.adoc' in result.stdout
-        assert 'used.adoc' not in result.stdout
-        # Manifest file should be created in archive
-        archive_dir = os.path.join(tmpdir, 'archive')
-        manifest_files = [f for f in os.listdir(archive_dir) if f.startswith('to-archive-') and f.endswith('.txt')]
-        assert manifest_files
-@pytest.mark.parametrize('exclude_args,should_find', [
-    (['--exclude-file', './modules/unused1.adoc'], 'unused2.adoc'),
-    (['--exclude-dir', './modules'], None),
-])
-def test_archive_unused_files_exclusions(exclude_args, should_find):
-    with tempfile.TemporaryDirectory() as tmpdir:
-        setup_test_fixture_archive_unused_files(tmpdir)
-        result = run_script(exclude_args, cwd=tmpdir)
-        archive_dir = os.path.join(tmpdir, 'archive')
-        manifest_files = [f for f in os.listdir(archive_dir) if f.startswith('to-archive-') and f.endswith('.txt')]
-        assert manifest_files
-        manifest_path = os.path.join(archive_dir, manifest_files[0])
-        with open(manifest_path, 'r', encoding='utf-8') as f:
-            manifest_content = f.read()
-        if should_find:
-            assert should_find in manifest_content
-        else:
-            # If all modules are excluded, no unused files from './modules' should be present
-            assert 'modules/unused1.adoc' not in manifest_content
-            assert 'modules/unused2.adoc' not in manifest_content

rolfedh_doc_utils-0.1.34/tests/test_archive_unused_images.py DELETED Viewed

@@ -1,46 +0,0 @@
-import os
-import sys
-sys.path.insert(0, os.path.dirname(__file__))
-import tempfile
-import shutil
-import pytest
-from test_fixture_archive_unused_images import setup_test_fixture
-import subprocess
-SCRIPT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'archive_unused_images.py'))
-def run_script(args, cwd):
-    result = subprocess.run([sys.executable, SCRIPT] + args, cwd=cwd, capture_output=True, text=True)
-    return result
-def test_archive_unused_images_basic():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        setup_test_fixture(tmpdir)
-        result = run_script([], cwd=tmpdir)
-        # Check that unused images are found
-        assert 'images/unused1.png' in result.stdout
-        assert 'images/unused2.jpg' in result.stdout
-        # Check that used images are not in the output
-        # Split by lines to avoid substring matching issues
-        output_lines = result.stdout.strip().split('\n')
-        assert not any('images/used1.png' in line for line in output_lines)
-        assert not any('images/used2.jpg' in line for line in output_lines)
-        # Manifest file should be created in archive
-        archive_dir = os.path.join(tmpdir, 'archive')
-        manifest_files = [f for f in os.listdir(archive_dir) if f.startswith('unused-images-') and f.endswith('.txt')]
-        assert manifest_files
-@pytest.mark.parametrize('exclude_args,should_find', [
-    (['--exclude-file', './images/unused1.png'], 'unused2.jpg'),
-    (['--exclude-dir', './images'], None),
-])
-def test_archive_unused_images_exclusions(exclude_args, should_find):
-    with tempfile.TemporaryDirectory() as tmpdir:
-        setup_test_fixture(tmpdir)
-        result = run_script(exclude_args, cwd=tmpdir)
-        if should_find:
-            assert should_find in result.stdout
-        else:
-            # If all images are excluded, no unused images from './images' should be present
-            assert 'images/unused1.png' not in result.stdout
-            assert 'images/unused2.jpg' not in result.stdout