PyPI - rolfedh-doc-utils - Versions diffs - 0.1.34__py3-none-any.whl → 0.1.35__py3-none-any.whl - Mend

rolfedh-doc-utils 0.1.34py3-none-any.whl → 0.1.35py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

archive_unused_files.py CHANGED Viewed

@@ -22,6 +22,7 @@ def main():
         epilog='By default, automatically discovers all modules and assemblies directories in the repository.'
     )
     parser.add_argument('--archive', action='store_true', help='Move the files to a dated zip in the archive directory.')
+    parser.add_argument('--commented', action='store_true', help='Include files that are referenced only in commented lines in the archive operation.')
     parser.add_argument('--scan-dir', action='append', default=[], help='Specific directory to scan (can be used multiple times). If not specified, auto-discovers directories.')
     parser.add_argument('--exclude-dir', action='append', default=[], help='Directory to exclude (can be used multiple times).')
     parser.add_argument('--exclude-file', action='append', default=[], help='File to exclude (can be used multiple times).')
@@ -35,13 +36,13 @@ def main():
     exclude_dirs = list(args.exclude_dir)
     exclude_files = list(args.exclude_file)
     if args.exclude_list:
         list_dirs, list_files = parse_exclude_list_file(args.exclude_list)
         exclude_dirs.extend(list_dirs)
         exclude_files.extend(list_files)
-    find_unused_adoc(scan_dirs, archive_dir, args.archive, exclude_dirs, exclude_files)
+    find_unused_adoc(scan_dirs, archive_dir, args.archive, exclude_dirs, exclude_files, args.commented)
 if __name__ == '__main__':
     main()

archive_unused_images.py CHANGED Viewed

@@ -18,6 +18,7 @@ def main():
     check_version_on_startup()
     parser = argparse.ArgumentParser(description='Archive unused image files.')
     parser.add_argument('--archive', action='store_true', help='Move the files to a dated zip in the archive directory.')
+    parser.add_argument('--commented', action='store_true', help='Include images that are referenced only in commented lines in the archive operation.')
     parser.add_argument('--exclude-dir', action='append', default=[], help='Directory to exclude (can be used multiple times).')
     parser.add_argument('--exclude-file', action='append', default=[], help='File to exclude (can be used multiple times).')
     parser.add_argument('--exclude-list', type=str, help='Path to a file containing directories or files to exclude, one per line.')
@@ -29,13 +30,13 @@ def main():
     exclude_dirs = list(args.exclude_dir)
     exclude_files = list(args.exclude_file)
     if args.exclude_list:
         list_dirs, list_files = parse_exclude_list_file(args.exclude_list)
         exclude_dirs.extend(list_dirs)
         exclude_files.extend(list_files)
-    find_unused_images(scan_dirs, archive_dir, args.archive, exclude_dirs, exclude_files)
+    find_unused_images(scan_dirs, archive_dir, args.archive, exclude_dirs, exclude_files, args.commented)
 if __name__ == '__main__':
     main()

doc_utils/unused_adoc.py CHANGED Viewed

@@ -60,10 +60,10 @@ def find_scan_directories(base_path='.', exclude_dirs=None):
     return scan_dirs
-def find_unused_adoc(scan_dirs=None, archive_dir='./archive', archive=False, exclude_dirs=None, exclude_files=None):
+def find_unused_adoc(scan_dirs=None, archive_dir='./archive', archive=False, exclude_dirs=None, exclude_files=None, include_commented=False):
     # Print safety warning
     print("\n⚠️  SAFETY: Work in a git branch! Run without --archive first to preview.\n")
     # If no scan_dirs provided, auto-discover them
     if not scan_dirs:
         scan_dirs = find_scan_directories(exclude_dirs=exclude_dirs)
@@ -75,46 +75,107 @@ def find_unused_adoc(scan_dirs=None, archive_dir='./archive', archive=False, exc
             print("No 'modules' or 'assemblies' directories found containing .adoc files.")
             print("Please run this tool from your documentation repository root.")
             return
     # Detect repository type
     repo_type = detect_repo_type()
     print(f"Detected repository type: {repo_type}")
     # Collect all .adoc files in scan directories
     asciidoc_files = collect_files(scan_dirs, {'.adoc'}, exclude_dirs, exclude_files)
-    # Track which files are referenced
-    referenced_files = set()
+    # Track which files are referenced (uncommented and commented separately)
+    referenced_files = set()  # Files in uncommented includes
+    commented_only_files = {}  # Files referenced ONLY in commented lines: {basename: [(file, line_num, line_text)]}
     if repo_type == 'topic_map':
         # For OpenShift-docs style repos, get references from topic maps
         topic_references = get_all_topic_map_references()
         # Convert to basenames for comparison
         referenced_files.update(os.path.basename(ref) for ref in topic_references)
-    # Always scan for include:: directives in all .adoc files
+    # Patterns for finding includes (both commented and uncommented)
     include_pattern = re.compile(r'include::(.+?)\[')
+    commented_include_pattern = re.compile(r'^\s*//.*include::(.+?)\[')
     adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
     for file_path in adoc_files:
         try:
             with open(file_path, 'r', encoding='utf-8') as f:
-                content = f.read()
-                includes = include_pattern.findall(content)
-                # Extract just the filename from the include path
-                for include in includes:
-                    # Handle both relative and absolute includes
-                    include_basename = os.path.basename(include)
-                    referenced_files.add(include_basename)
+                lines = f.readlines()
+                for line_num, line in enumerate(lines, 1):
+                    # Check if this is a commented include
+                    commented_match = commented_include_pattern.search(line)
+                    if commented_match:
+                        include_basename = os.path.basename(commented_match.group(1))
+                        # Track location of commented reference
+                        if include_basename not in commented_only_files:
+                            commented_only_files[include_basename] = []
+                        commented_only_files[include_basename].append((file_path, line_num, line.strip()))
+                    else:
+                        # Check for uncommented includes
+                        uncommented_match = include_pattern.search(line)
+                        if uncommented_match:
+                            include_basename = os.path.basename(uncommented_match.group(1))
+                            referenced_files.add(include_basename)
+                            # If we found an uncommented reference, remove from commented_only tracking
+                            if include_basename in commented_only_files:
+                                del commented_only_files[include_basename]
         except Exception as e:
             print(f"Warning: could not read {file_path}: {e}")
-    # Find unused files by comparing basenames
-    unused_files = [f for f in asciidoc_files if os.path.basename(f) not in referenced_files]
+    # Determine which files are unused based on the include_commented flag
+    if include_commented:
+        # When --commented is used: treat files with commented-only references as unused
+        # Only files with uncommented references are considered "used"
+        unused_files = [f for f in asciidoc_files if os.path.basename(f) not in referenced_files]
+        commented_only_unused = []
+    else:
+        # Default behavior: files referenced only in commented lines are considered "used"
+        # They should NOT be in the unused list, but we track them for reporting
+        all_referenced = referenced_files.union(set(commented_only_files.keys()))
+        unused_files = [f for f in asciidoc_files if os.path.basename(f) not in all_referenced]
+        # Generate list of files referenced only in comments for the report
+        commented_only_unused = []
+        for basename, references in commented_only_files.items():
+            # Find the full path for this basename in asciidoc_files
+            matching_files = [f for f in asciidoc_files if os.path.basename(f) == basename]
+            for f in matching_files:
+                commented_only_unused.append((f, references))
     unused_files = list(dict.fromkeys(unused_files))  # Remove duplicates
+    # Print summary
     print(f"Found {len(unused_files)} unused files out of {len(asciidoc_files)} total files in scan directories")
+    # Generate detailed report for commented-only references
+    if commented_only_unused and not include_commented:
+        report_path = os.path.join(archive_dir, 'commented-references-report.txt')
+        os.makedirs(archive_dir, exist_ok=True)
+        with open(report_path, 'w', encoding='utf-8') as report:
+            report.write("Files Referenced Only in Commented Lines\n")
+            report.write("=" * 70 + "\n\n")
+            report.write(f"Found {len(commented_only_unused)} files that are referenced only in commented-out includes.\n")
+            report.write("These files are considered 'used' by default and will NOT be archived.\n\n")
+            report.write("To archive these files along with other unused files, use the --commented flag.\n\n")
+            report.write("-" * 70 + "\n\n")
+            for file_path, references in sorted(commented_only_unused):
+                report.write(f"File: {file_path}\n")
+                report.write(f"Referenced in {len(references)} commented line(s):\n")
+                for ref_file, line_num, line_text in references:
+                    report.write(f"  {ref_file}:{line_num}\n")
+                    report.write(f"    {line_text}\n")
+                report.write("\n")
+        print(f"\n📋 Found {len(commented_only_unused)} files referenced only in commented lines.")
+        print(f"   Detailed report saved to: {report_path}")
+        print(f"   These files are considered 'used' and will NOT be archived by default.")
+        print(f"   To include them in the archive operation, use the --commented flag.\n")
     return write_manifest_and_archive(
         unused_files, archive_dir, 'to-archive', 'to-archive', archive=archive
     )

doc_utils/unused_images.py CHANGED Viewed

@@ -6,26 +6,98 @@ from .file_utils import collect_files, write_manifest_and_archive
 IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.svg'}
-def find_unused_images(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None):
+def find_unused_images(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None, include_commented=False):
     # Print safety warning
     print("\n⚠️  SAFETY: Work in a git branch! Run without --archive first to preview.\n")
     image_files = collect_files(scan_dirs, IMAGE_EXTENSIONS, exclude_dirs, exclude_files)
     adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
-    referenced_images = set()
+    # Track which images are referenced (uncommented and commented separately)
+    referenced_images = set()  # Images in uncommented references
+    commented_only_images = {}  # Images referenced ONLY in commented lines: {basename: [(file, line_num, line_text)]}
+    # Patterns for finding image references (both commented and uncommented)
     image_ref_pattern = re.compile(r'(?i)image::([^\[]+)[\[]|image:([^\[]+)[\[]|"([^"\s]+\.(?:png|jpg|jpeg|gif|svg))"')
+    commented_line_pattern = re.compile(r'^\s*//')
     for adoc_file in adoc_files:
         try:
             with open(adoc_file, 'r', encoding='utf-8') as f:
-                content = f.read()
-                for match in image_ref_pattern.findall(content):
-                    for group in match:
-                        if group:
-                            referenced_images.add(os.path.basename(group))
+                lines = f.readlines()
+                for line_num, line in enumerate(lines, 1):
+                    # Check if this line is commented
+                    is_commented = commented_line_pattern.match(line)
+                    # Find all image references in the line
+                    for match in image_ref_pattern.findall(line):
+                        for group in match:
+                            if group:
+                                image_basename = os.path.basename(group)
+                                if is_commented:
+                                    # Track location of commented reference
+                                    if image_basename not in commented_only_images:
+                                        commented_only_images[image_basename] = []
+                                    commented_only_images[image_basename].append((adoc_file, line_num, line.strip()))
+                                else:
+                                    # Add to uncommented references
+                                    referenced_images.add(image_basename)
+                                    # If we found an uncommented reference, remove from commented_only tracking
+                                    if image_basename in commented_only_images:
+                                        del commented_only_images[image_basename]
         except Exception as e:
             print(f"Warning: could not read {adoc_file}: {e}")
-    unused_images = [f for f in image_files if os.path.basename(f) not in referenced_images]
+    # Determine which images are unused based on the include_commented flag
+    if include_commented:
+        # When --commented is used: treat images with commented-only references as unused
+        # Only images with uncommented references are considered "used"
+        unused_images = [f for f in image_files if os.path.basename(f) not in referenced_images]
+        commented_only_unused = []
+    else:
+        # Default behavior: images referenced only in commented lines are considered "used"
+        # They should NOT be in the unused list, but we track them for reporting
+        all_referenced = referenced_images.union(set(commented_only_images.keys()))
+        unused_images = [f for f in image_files if os.path.basename(f) not in all_referenced]
+        # Generate list of images referenced only in comments for the report
+        commented_only_unused = []
+        for basename, references in commented_only_images.items():
+            # Find the full path for this basename in image_files
+            matching_files = [f for f in image_files if os.path.basename(f) == basename]
+            for f in matching_files:
+                commented_only_unused.append((f, references))
     unused_images = list(dict.fromkeys(unused_images))
+    # Generate detailed report for commented-only references
+    if commented_only_unused and not include_commented:
+        report_path = os.path.join(archive_dir, 'commented-image-references-report.txt')
+        os.makedirs(archive_dir, exist_ok=True)
+        with open(report_path, 'w', encoding='utf-8') as report:
+            report.write("Images Referenced Only in Commented Lines\n")
+            report.write("=" * 70 + "\n\n")
+            report.write(f"Found {len(commented_only_unused)} images that are referenced only in commented-out lines.\n")
+            report.write("These images are considered 'used' by default and will NOT be archived.\n\n")
+            report.write("To archive these images along with other unused images, use the --commented flag.\n\n")
+            report.write("-" * 70 + "\n\n")
+            for file_path, references in sorted(commented_only_unused):
+                report.write(f"Image: {file_path}\n")
+                report.write(f"Referenced in {len(references)} commented line(s):\n")
+                for ref_file, line_num, line_text in references:
+                    report.write(f"  {ref_file}:{line_num}\n")
+                    report.write(f"    {line_text}\n")
+                report.write("\n")
+        print(f"\n📋 Found {len(commented_only_unused)} images referenced only in commented lines.")
+        print(f"   Detailed report saved to: {report_path}")
+        print(f"   These images are considered 'used' and will NOT be archived by default.")
+        print(f"   To include them in the archive operation, use the --commented flag.\n")
     return write_manifest_and_archive(
         unused_images, archive_dir, 'unused-images', 'unused-images', archive=archive
     )

doc_utils/version.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Version information for doc-utils."""
 # This should match the version in pyproject.toml
-__version__ = "0.1.34"
+__version__ = "0.1.35"
 def get_version():
     """Return the current version string."""

{rolfedh_doc_utils-0.1.34.dist-info → rolfedh_doc_utils-0.1.35.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rolfedh-doc-utils
-Version: 0.1.34
+Version: 0.1.35
 Summary: CLI tools for AsciiDoc documentation projects
 Author: Rolfe Dlugy-Hegwer
 License: MIT License

{rolfedh_doc_utils-0.1.34.dist-info → rolfedh_doc_utils-0.1.35.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
-archive_unused_files.py,sha256=OJZrkqn70hiOXED218jMYPFNFWnsDpjsCYOmBRxYnHU,2274
-archive_unused_images.py,sha256=fZeyEZtTd72Gbd3YBXTy5xoshAAM9qb4qFPMjhHL1Fg,1864
+archive_unused_files.py,sha256=YKYPtuBHEZcsyQSwSYxSYvw9v9Mh6Of8MqT53A5bM44,2438
+archive_unused_images.py,sha256=EvPhMIwp6_AHKtuNYQ663q6biXBeXaqf88NzWrhvtIE,2029
 check_scannability.py,sha256=O6ROr-e624jVPvPpASpsWo0gTfuCFpA2mTSX61BjAEI,5478
 convert_callouts_interactive.py,sha256=4PjiVIOWxNJiJLQuBHT3x6rE46-hgfFHSaoo5quYIs8,22889
 convert_callouts_to_deflist.py,sha256=BoqW5_GkQ-KqNzn4vmE6lsQosrPV0lkB-bfAx3dzyMw,25886
@@ -23,16 +23,16 @@ doc_utils/replace_link_attributes.py,sha256=gmAs68_njBqEz-Qni-UGgeYEDTMxlTWk_IOm
 doc_utils/scannability.py,sha256=XwlmHqDs69p_V36X7DLjPTy0DUoLszSGqYjJ9wE-3hg,982
 doc_utils/spinner.py,sha256=lJg15qzODiKoR0G6uFIk2BdVNgn9jFexoTRUMrjiWvk,3554
 doc_utils/topic_map_parser.py,sha256=tKcIO1m9r2K6dvPRGue58zqMr0O2zKU1gnZMzEE3U6o,4571
-doc_utils/unused_adoc.py,sha256=2cbqcYr1os2EhETUU928BlPRlsZVSdI00qaMhqjSIqQ,5263
+doc_utils/unused_adoc.py,sha256=LPQWPGEOizXECxepk7E_5cjTVvKn6RXQYTWG97Ps5VQ,9077
 doc_utils/unused_attributes.py,sha256=OHyAdaBD7aNo357B0SLBN5NC_jNY5TWXMwgtfJNh3X8,7621
-doc_utils/unused_images.py,sha256=nqn36Bbrmon2KlGlcaruNjJJvTQ8_9H0WU9GvCW7rW8,1456
+doc_utils/unused_images.py,sha256=hL8Qrik9QCkVh54eBLuNczRS9tMnsqIEfavNamM1UeQ,5664
 doc_utils/validate_links.py,sha256=iBGXnwdeLlgIT3fo3v01ApT5k0X2FtctsvkrE6E3VMk,19610
-doc_utils/version.py,sha256=LpXe7kXo5uNMJOga179IYdU101aWLSTOnciZkUlrK0E,203
+doc_utils/version.py,sha256=rh_oI-y8ZbMOpFC_a1Qkwb1Tq4FPWdm-vTVRUBqtHGU,203
 doc_utils/version_check.py,sha256=-31Y6AN0KGi_CUCAVOOhf6bPO3r7SQIXPxxeffLAF0w,7535
 doc_utils/warnings_report.py,sha256=20yfwqBjOprfFhQwCujbcsvjJCbHHhmH84uAujm-y-o,8877
-rolfedh_doc_utils-0.1.34.dist-info/licenses/LICENSE,sha256=vLxtwMVOJA_hEy8b77niTkdmQI9kNJskXHq0dBS36e0,1075
-rolfedh_doc_utils-0.1.34.dist-info/METADATA,sha256=uDcruRVK6RPRkZtBtM5DsH9FZ5q9LXEf8hEqOsg3mig,8325
-rolfedh_doc_utils-0.1.34.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-rolfedh_doc_utils-0.1.34.dist-info/entry_points.txt,sha256=vL_LlLKOiurRzchrq8iRUQG19Xi9lSAFVZGjO-xyErk,577
-rolfedh_doc_utils-0.1.34.dist-info/top_level.txt,sha256=J4xtr3zoyCip27b3GnticFVZoyz5HHtgGqHQ-SZONCA,265
-rolfedh_doc_utils-0.1.34.dist-info/RECORD,,
+rolfedh_doc_utils-0.1.35.dist-info/licenses/LICENSE,sha256=vLxtwMVOJA_hEy8b77niTkdmQI9kNJskXHq0dBS36e0,1075
+rolfedh_doc_utils-0.1.35.dist-info/METADATA,sha256=hsWjS4apZYXM4Qk38o3KOhMBEaqe55gc9e5QGZqAYEc,8325
+rolfedh_doc_utils-0.1.35.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+rolfedh_doc_utils-0.1.35.dist-info/entry_points.txt,sha256=vL_LlLKOiurRzchrq8iRUQG19Xi9lSAFVZGjO-xyErk,577
+rolfedh_doc_utils-0.1.35.dist-info/top_level.txt,sha256=J4xtr3zoyCip27b3GnticFVZoyz5HHtgGqHQ-SZONCA,265
+rolfedh_doc_utils-0.1.35.dist-info/RECORD,,

{rolfedh_doc_utils-0.1.34.dist-info → rolfedh_doc_utils-0.1.35.dist-info}/WHEEL RENAMED Viewed

File without changes

{rolfedh_doc_utils-0.1.34.dist-info → rolfedh_doc_utils-0.1.35.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{rolfedh_doc_utils-0.1.34.dist-info → rolfedh_doc_utils-0.1.35.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{rolfedh_doc_utils-0.1.34.dist-info → rolfedh_doc_utils-0.1.35.dist-info}/top_level.txt RENAMED Viewed

File without changes

rolfedh-doc-utils 0.1.34__py3-none-any.whl → 0.1.35__py3-none-any.whl

rolfedh-doc-utils 0.1.34py3-none-any.whl → 0.1.35py3-none-any.whl