rolfedh-doc-utils 0.1.34__tar.gz → 0.1.35__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/PKG-INFO +1 -1
  2. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/archive_unused_files.py +3 -2
  3. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/archive_unused_images.py +3 -2
  4. rolfedh_doc_utils-0.1.35/doc_utils/unused_adoc.py +181 -0
  5. rolfedh_doc_utils-0.1.35/doc_utils/unused_images.py +103 -0
  6. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/doc_utils/version.py +1 -1
  7. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/pyproject.toml +1 -1
  8. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/rolfedh_doc_utils.egg-info/PKG-INFO +1 -1
  9. rolfedh_doc_utils-0.1.35/tests/test_archive_unused_files.py +119 -0
  10. rolfedh_doc_utils-0.1.35/tests/test_archive_unused_images.py +110 -0
  11. rolfedh_doc_utils-0.1.34/doc_utils/unused_adoc.py +0 -120
  12. rolfedh_doc_utils-0.1.34/doc_utils/unused_images.py +0 -31
  13. rolfedh_doc_utils-0.1.34/tests/test_archive_unused_files.py +0 -47
  14. rolfedh_doc_utils-0.1.34/tests/test_archive_unused_images.py +0 -46
  15. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/LICENSE +0 -0
  16. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/README.md +0 -0
  17. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/callout_lib/__init__.py +0 -0
  18. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/callout_lib/converter_bullets.py +0 -0
  19. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/callout_lib/converter_comments.py +0 -0
  20. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/callout_lib/converter_deflist.py +0 -0
  21. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/callout_lib/detector.py +0 -0
  22. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/callout_lib/table_parser.py +0 -0
  23. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/check_scannability.py +0 -0
  24. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/convert_callouts_interactive.py +0 -0
  25. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/convert_callouts_to_deflist.py +0 -0
  26. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/doc_utils/__init__.py +0 -0
  27. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/doc_utils/extract_link_attributes.py +0 -0
  28. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/doc_utils/file_utils.py +0 -0
  29. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/doc_utils/format_asciidoc_spacing.py +0 -0
  30. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/doc_utils/replace_link_attributes.py +0 -0
  31. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/doc_utils/scannability.py +0 -0
  32. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/doc_utils/spinner.py +0 -0
  33. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/doc_utils/topic_map_parser.py +0 -0
  34. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/doc_utils/unused_attributes.py +0 -0
  35. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/doc_utils/validate_links.py +0 -0
  36. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/doc_utils/version_check.py +0 -0
  37. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/doc_utils/warnings_report.py +0 -0
  38. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/doc_utils_cli.py +0 -0
  39. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/extract_link_attributes.py +0 -0
  40. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/find_unused_attributes.py +0 -0
  41. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/format_asciidoc_spacing.py +0 -0
  42. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/replace_link_attributes.py +0 -0
  43. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/rolfedh_doc_utils.egg-info/SOURCES.txt +0 -0
  44. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/rolfedh_doc_utils.egg-info/dependency_links.txt +0 -0
  45. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/rolfedh_doc_utils.egg-info/entry_points.txt +0 -0
  46. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/rolfedh_doc_utils.egg-info/requires.txt +0 -0
  47. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/rolfedh_doc_utils.egg-info/top_level.txt +0 -0
  48. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/setup.cfg +0 -0
  49. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/setup.py +0 -0
  50. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/tests/test_auto_discovery.py +0 -0
  51. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/tests/test_check_scannability.py +0 -0
  52. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/tests/test_cli_entry_points.py +0 -0
  53. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/tests/test_extract_link_attributes.py +0 -0
  54. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/tests/test_file_utils.py +0 -0
  55. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/tests/test_fixture_archive_unused_files.py +0 -0
  56. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/tests/test_fixture_archive_unused_images.py +0 -0
  57. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/tests/test_fixture_check_scannability.py +0 -0
  58. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/tests/test_parse_exclude_list.py +0 -0
  59. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/tests/test_replace_link_attributes.py +0 -0
  60. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/tests/test_symlink_handling.py +0 -0
  61. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/tests/test_table_callout_conversion.py +0 -0
  62. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/tests/test_table_parser.py +0 -0
  63. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/tests/test_topic_map_parser.py +0 -0
  64. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/tests/test_unused_attributes.py +0 -0
  65. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/tests/test_validate_links.py +0 -0
  66. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/tests/test_version_check.py +0 -0
  67. {rolfedh_doc_utils-0.1.34 → rolfedh_doc_utils-0.1.35}/validate_links.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rolfedh-doc-utils
3
- Version: 0.1.34
3
+ Version: 0.1.35
4
4
  Summary: CLI tools for AsciiDoc documentation projects
5
5
  Author: Rolfe Dlugy-Hegwer
6
6
  License: MIT License
@@ -22,6 +22,7 @@ def main():
22
22
  epilog='By default, automatically discovers all modules and assemblies directories in the repository.'
23
23
  )
24
24
  parser.add_argument('--archive', action='store_true', help='Move the files to a dated zip in the archive directory.')
25
+ parser.add_argument('--commented', action='store_true', help='Include files that are referenced only in commented lines in the archive operation.')
25
26
  parser.add_argument('--scan-dir', action='append', default=[], help='Specific directory to scan (can be used multiple times). If not specified, auto-discovers directories.')
26
27
  parser.add_argument('--exclude-dir', action='append', default=[], help='Directory to exclude (can be used multiple times).')
27
28
  parser.add_argument('--exclude-file', action='append', default=[], help='File to exclude (can be used multiple times).')
@@ -35,13 +36,13 @@ def main():
35
36
 
36
37
  exclude_dirs = list(args.exclude_dir)
37
38
  exclude_files = list(args.exclude_file)
38
-
39
+
39
40
  if args.exclude_list:
40
41
  list_dirs, list_files = parse_exclude_list_file(args.exclude_list)
41
42
  exclude_dirs.extend(list_dirs)
42
43
  exclude_files.extend(list_files)
43
44
 
44
- find_unused_adoc(scan_dirs, archive_dir, args.archive, exclude_dirs, exclude_files)
45
+ find_unused_adoc(scan_dirs, archive_dir, args.archive, exclude_dirs, exclude_files, args.commented)
45
46
 
46
47
  if __name__ == '__main__':
47
48
  main()
@@ -18,6 +18,7 @@ def main():
18
18
  check_version_on_startup()
19
19
  parser = argparse.ArgumentParser(description='Archive unused image files.')
20
20
  parser.add_argument('--archive', action='store_true', help='Move the files to a dated zip in the archive directory.')
21
+ parser.add_argument('--commented', action='store_true', help='Include images that are referenced only in commented lines in the archive operation.')
21
22
  parser.add_argument('--exclude-dir', action='append', default=[], help='Directory to exclude (can be used multiple times).')
22
23
  parser.add_argument('--exclude-file', action='append', default=[], help='File to exclude (can be used multiple times).')
23
24
  parser.add_argument('--exclude-list', type=str, help='Path to a file containing directories or files to exclude, one per line.')
@@ -29,13 +30,13 @@ def main():
29
30
 
30
31
  exclude_dirs = list(args.exclude_dir)
31
32
  exclude_files = list(args.exclude_file)
32
-
33
+
33
34
  if args.exclude_list:
34
35
  list_dirs, list_files = parse_exclude_list_file(args.exclude_list)
35
36
  exclude_dirs.extend(list_dirs)
36
37
  exclude_files.extend(list_files)
37
38
 
38
- find_unused_images(scan_dirs, archive_dir, args.archive, exclude_dirs, exclude_files)
39
+ find_unused_images(scan_dirs, archive_dir, args.archive, exclude_dirs, exclude_files, args.commented)
39
40
 
40
41
  if __name__ == '__main__':
41
42
  main()
@@ -0,0 +1,181 @@
1
+ # doc_utils/unused_adoc.py
2
+
3
+ import os
4
+ import re
5
+ from .file_utils import collect_files, write_manifest_and_archive
6
+ from .topic_map_parser import detect_repo_type, get_all_topic_map_references
7
+
8
+ def find_scan_directories(base_path='.', exclude_dirs=None):
9
+ """
10
+ Automatically find all 'modules' and 'assemblies' directories in the repository.
11
+
12
+ Returns a list of paths to scan.
13
+ """
14
+ scan_dirs = []
15
+ exclude_dirs = exclude_dirs or []
16
+
17
+ for root, dirs, files in os.walk(base_path):
18
+ # Skip symbolic links to prevent issues
19
+ dirs[:] = [d for d in dirs if not os.path.islink(os.path.join(root, d))]
20
+
21
+ # Skip excluded directories
22
+ for exclude_dir in exclude_dirs:
23
+ abs_exclude = os.path.abspath(exclude_dir)
24
+ if os.path.abspath(root).startswith(abs_exclude):
25
+ dirs[:] = [] # Don't descend into excluded directories
26
+ break
27
+
28
+ # Skip hidden directories and common non-content directories
29
+ dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['node_modules', 'build', 'dist', 'target']]
30
+
31
+ # Look for modules and assemblies directories
32
+ for d in dirs:
33
+ if d in ['modules', 'assemblies']:
34
+ dir_path = os.path.join(root, d)
35
+ # Check if this directory or any subdirectory contains .adoc files
36
+ has_adoc = False
37
+ for subroot, subdirs, subfiles in os.walk(dir_path):
38
+ # Skip symbolic links
39
+ subdirs[:] = [sd for sd in subdirs if not os.path.islink(os.path.join(subroot, sd))]
40
+ if any(f.endswith('.adoc') for f in subfiles):
41
+ has_adoc = True
42
+ break
43
+ if has_adoc:
44
+ scan_dirs.append(dir_path)
45
+
46
+ # Also check for modules/rn pattern if modules exists
47
+ modules_dirs = [d for d in scan_dirs if os.path.basename(d) == 'modules']
48
+ for modules_dir in modules_dirs:
49
+ rn_dir = os.path.join(modules_dir, 'rn')
50
+ if os.path.isdir(rn_dir):
51
+ # Check if rn directory or subdirectories contain .adoc files
52
+ has_adoc = False
53
+ for subroot, subdirs, subfiles in os.walk(rn_dir):
54
+ subdirs[:] = [sd for sd in subdirs if not os.path.islink(os.path.join(subroot, sd))]
55
+ if any(f.endswith('.adoc') for f in subfiles):
56
+ has_adoc = True
57
+ break
58
+ if has_adoc:
59
+ scan_dirs.append(rn_dir)
60
+
61
+ return scan_dirs
62
+
63
+ def find_unused_adoc(scan_dirs=None, archive_dir='./archive', archive=False, exclude_dirs=None, exclude_files=None, include_commented=False):
64
+ # Print safety warning
65
+ print("\n⚠️ SAFETY: Work in a git branch! Run without --archive first to preview.\n")
66
+
67
+ # If no scan_dirs provided, auto-discover them
68
+ if not scan_dirs:
69
+ scan_dirs = find_scan_directories(exclude_dirs=exclude_dirs)
70
+ if scan_dirs:
71
+ print(f"Auto-discovered directories to scan:")
72
+ for dir_path in sorted(scan_dirs):
73
+ print(f" - {dir_path}")
74
+ else:
75
+ print("No 'modules' or 'assemblies' directories found containing .adoc files.")
76
+ print("Please run this tool from your documentation repository root.")
77
+ return
78
+
79
+ # Detect repository type
80
+ repo_type = detect_repo_type()
81
+ print(f"Detected repository type: {repo_type}")
82
+
83
+ # Collect all .adoc files in scan directories
84
+ asciidoc_files = collect_files(scan_dirs, {'.adoc'}, exclude_dirs, exclude_files)
85
+
86
+ # Track which files are referenced (uncommented and commented separately)
87
+ referenced_files = set() # Files in uncommented includes
88
+ commented_only_files = {} # Files referenced ONLY in commented lines: {basename: [(file, line_num, line_text)]}
89
+
90
+ if repo_type == 'topic_map':
91
+ # For OpenShift-docs style repos, get references from topic maps
92
+ topic_references = get_all_topic_map_references()
93
+ # Convert to basenames for comparison
94
+ referenced_files.update(os.path.basename(ref) for ref in topic_references)
95
+
96
+ # Patterns for finding includes (both commented and uncommented)
97
+ include_pattern = re.compile(r'include::(.+?)\[')
98
+ commented_include_pattern = re.compile(r'^\s*//.*include::(.+?)\[')
99
+
100
+ adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
101
+
102
+ for file_path in adoc_files:
103
+ try:
104
+ with open(file_path, 'r', encoding='utf-8') as f:
105
+ lines = f.readlines()
106
+
107
+ for line_num, line in enumerate(lines, 1):
108
+ # Check if this is a commented include
109
+ commented_match = commented_include_pattern.search(line)
110
+ if commented_match:
111
+ include_basename = os.path.basename(commented_match.group(1))
112
+ # Track location of commented reference
113
+ if include_basename not in commented_only_files:
114
+ commented_only_files[include_basename] = []
115
+ commented_only_files[include_basename].append((file_path, line_num, line.strip()))
116
+ else:
117
+ # Check for uncommented includes
118
+ uncommented_match = include_pattern.search(line)
119
+ if uncommented_match:
120
+ include_basename = os.path.basename(uncommented_match.group(1))
121
+ referenced_files.add(include_basename)
122
+ # If we found an uncommented reference, remove from commented_only tracking
123
+ if include_basename in commented_only_files:
124
+ del commented_only_files[include_basename]
125
+ except Exception as e:
126
+ print(f"Warning: could not read {file_path}: {e}")
127
+
128
+ # Determine which files are unused based on the include_commented flag
129
+ if include_commented:
130
+ # When --commented is used: treat files with commented-only references as unused
131
+ # Only files with uncommented references are considered "used"
132
+ unused_files = [f for f in asciidoc_files if os.path.basename(f) not in referenced_files]
133
+ commented_only_unused = []
134
+ else:
135
+ # Default behavior: files referenced only in commented lines are considered "used"
136
+ # They should NOT be in the unused list, but we track them for reporting
137
+ all_referenced = referenced_files.union(set(commented_only_files.keys()))
138
+ unused_files = [f for f in asciidoc_files if os.path.basename(f) not in all_referenced]
139
+
140
+ # Generate list of files referenced only in comments for the report
141
+ commented_only_unused = []
142
+ for basename, references in commented_only_files.items():
143
+ # Find the full path for this basename in asciidoc_files
144
+ matching_files = [f for f in asciidoc_files if os.path.basename(f) == basename]
145
+ for f in matching_files:
146
+ commented_only_unused.append((f, references))
147
+
148
+ unused_files = list(dict.fromkeys(unused_files)) # Remove duplicates
149
+
150
+ # Print summary
151
+ print(f"Found {len(unused_files)} unused files out of {len(asciidoc_files)} total files in scan directories")
152
+
153
+ # Generate detailed report for commented-only references
154
+ if commented_only_unused and not include_commented:
155
+ report_path = os.path.join(archive_dir, 'commented-references-report.txt')
156
+ os.makedirs(archive_dir, exist_ok=True)
157
+
158
+ with open(report_path, 'w', encoding='utf-8') as report:
159
+ report.write("Files Referenced Only in Commented Lines\n")
160
+ report.write("=" * 70 + "\n\n")
161
+ report.write(f"Found {len(commented_only_unused)} files that are referenced only in commented-out includes.\n")
162
+ report.write("These files are considered 'used' by default and will NOT be archived.\n\n")
163
+ report.write("To archive these files along with other unused files, use the --commented flag.\n\n")
164
+ report.write("-" * 70 + "\n\n")
165
+
166
+ for file_path, references in sorted(commented_only_unused):
167
+ report.write(f"File: {file_path}\n")
168
+ report.write(f"Referenced in {len(references)} commented line(s):\n")
169
+ for ref_file, line_num, line_text in references:
170
+ report.write(f" {ref_file}:{line_num}\n")
171
+ report.write(f" {line_text}\n")
172
+ report.write("\n")
173
+
174
+ print(f"\n📋 Found {len(commented_only_unused)} files referenced only in commented lines.")
175
+ print(f" Detailed report saved to: {report_path}")
176
+ print(f" These files are considered 'used' and will NOT be archived by default.")
177
+ print(f" To include them in the archive operation, use the --commented flag.\n")
178
+
179
+ return write_manifest_and_archive(
180
+ unused_files, archive_dir, 'to-archive', 'to-archive', archive=archive
181
+ )
@@ -0,0 +1,103 @@
1
+ # doc_utils/unused_images.py
2
+
3
+ import os
4
+ import re
5
+ from .file_utils import collect_files, write_manifest_and_archive
6
+
7
+ IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.svg'}
8
+
9
+ def find_unused_images(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None, include_commented=False):
10
+ # Print safety warning
11
+ print("\n⚠️ SAFETY: Work in a git branch! Run without --archive first to preview.\n")
12
+
13
+ image_files = collect_files(scan_dirs, IMAGE_EXTENSIONS, exclude_dirs, exclude_files)
14
+ adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
15
+
16
+ # Track which images are referenced (uncommented and commented separately)
17
+ referenced_images = set() # Images in uncommented references
18
+ commented_only_images = {} # Images referenced ONLY in commented lines: {basename: [(file, line_num, line_text)]}
19
+
20
+ # Patterns for finding image references (both commented and uncommented)
21
+ image_ref_pattern = re.compile(r'(?i)image::([^\[]+)[\[]|image:([^\[]+)[\[]|"([^"\s]+\.(?:png|jpg|jpeg|gif|svg))"')
22
+ commented_line_pattern = re.compile(r'^\s*//')
23
+
24
+ for adoc_file in adoc_files:
25
+ try:
26
+ with open(adoc_file, 'r', encoding='utf-8') as f:
27
+ lines = f.readlines()
28
+
29
+ for line_num, line in enumerate(lines, 1):
30
+ # Check if this line is commented
31
+ is_commented = commented_line_pattern.match(line)
32
+
33
+ # Find all image references in the line
34
+ for match in image_ref_pattern.findall(line):
35
+ for group in match:
36
+ if group:
37
+ image_basename = os.path.basename(group)
38
+
39
+ if is_commented:
40
+ # Track location of commented reference
41
+ if image_basename not in commented_only_images:
42
+ commented_only_images[image_basename] = []
43
+ commented_only_images[image_basename].append((adoc_file, line_num, line.strip()))
44
+ else:
45
+ # Add to uncommented references
46
+ referenced_images.add(image_basename)
47
+ # If we found an uncommented reference, remove from commented_only tracking
48
+ if image_basename in commented_only_images:
49
+ del commented_only_images[image_basename]
50
+ except Exception as e:
51
+ print(f"Warning: could not read {adoc_file}: {e}")
52
+
53
+ # Determine which images are unused based on the include_commented flag
54
+ if include_commented:
55
+ # When --commented is used: treat images with commented-only references as unused
56
+ # Only images with uncommented references are considered "used"
57
+ unused_images = [f for f in image_files if os.path.basename(f) not in referenced_images]
58
+ commented_only_unused = []
59
+ else:
60
+ # Default behavior: images referenced only in commented lines are considered "used"
61
+ # They should NOT be in the unused list, but we track them for reporting
62
+ all_referenced = referenced_images.union(set(commented_only_images.keys()))
63
+ unused_images = [f for f in image_files if os.path.basename(f) not in all_referenced]
64
+
65
+ # Generate list of images referenced only in comments for the report
66
+ commented_only_unused = []
67
+ for basename, references in commented_only_images.items():
68
+ # Find the full path for this basename in image_files
69
+ matching_files = [f for f in image_files if os.path.basename(f) == basename]
70
+ for f in matching_files:
71
+ commented_only_unused.append((f, references))
72
+
73
+ unused_images = list(dict.fromkeys(unused_images))
74
+
75
+ # Generate detailed report for commented-only references
76
+ if commented_only_unused and not include_commented:
77
+ report_path = os.path.join(archive_dir, 'commented-image-references-report.txt')
78
+ os.makedirs(archive_dir, exist_ok=True)
79
+
80
+ with open(report_path, 'w', encoding='utf-8') as report:
81
+ report.write("Images Referenced Only in Commented Lines\n")
82
+ report.write("=" * 70 + "\n\n")
83
+ report.write(f"Found {len(commented_only_unused)} images that are referenced only in commented-out lines.\n")
84
+ report.write("These images are considered 'used' by default and will NOT be archived.\n\n")
85
+ report.write("To archive these images along with other unused images, use the --commented flag.\n\n")
86
+ report.write("-" * 70 + "\n\n")
87
+
88
+ for file_path, references in sorted(commented_only_unused):
89
+ report.write(f"Image: {file_path}\n")
90
+ report.write(f"Referenced in {len(references)} commented line(s):\n")
91
+ for ref_file, line_num, line_text in references:
92
+ report.write(f" {ref_file}:{line_num}\n")
93
+ report.write(f" {line_text}\n")
94
+ report.write("\n")
95
+
96
+ print(f"\n📋 Found {len(commented_only_unused)} images referenced only in commented lines.")
97
+ print(f" Detailed report saved to: {report_path}")
98
+ print(f" These images are considered 'used' and will NOT be archived by default.")
99
+ print(f" To include them in the archive operation, use the --commented flag.\n")
100
+
101
+ return write_manifest_and_archive(
102
+ unused_images, archive_dir, 'unused-images', 'unused-images', archive=archive
103
+ )
@@ -1,7 +1,7 @@
1
1
  """Version information for doc-utils."""
2
2
 
3
3
  # This should match the version in pyproject.toml
4
- __version__ = "0.1.34"
4
+ __version__ = "0.1.35"
5
5
 
6
6
  def get_version():
7
7
  """Return the current version string."""
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "rolfedh-doc-utils"
7
- version = "0.1.34"
7
+ version = "0.1.35"
8
8
  description = "CLI tools for AsciiDoc documentation projects"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rolfedh-doc-utils
3
- Version: 0.1.34
3
+ Version: 0.1.35
4
4
  Summary: CLI tools for AsciiDoc documentation projects
5
5
  Author: Rolfe Dlugy-Hegwer
6
6
  License: MIT License
@@ -0,0 +1,119 @@
1
+ import os
2
+ import sys
3
+ sys.path.insert(0, os.path.dirname(__file__))
4
+ import tempfile
5
+ import shutil
6
+ import pytest
7
+ from test_fixture_archive_unused_files import setup_test_fixture_archive_unused_files
8
+ import subprocess
9
+
10
+ SCRIPT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'archive_unused_files.py'))
11
+
12
+ def run_script(args, cwd):
13
+ result = subprocess.run([sys.executable, SCRIPT] + args, cwd=cwd, capture_output=True, text=True)
14
+ return result
15
+
16
+ def test_archive_unused_files_basic():
17
+ with tempfile.TemporaryDirectory() as tmpdir:
18
+ setup_test_fixture_archive_unused_files(tmpdir)
19
+ result = run_script([], cwd=tmpdir)
20
+ assert 'unused1.adoc' in result.stdout
21
+ assert 'unused2.adoc' in result.stdout
22
+ assert 'used.adoc' not in result.stdout
23
+ # Manifest file should be created in archive
24
+ archive_dir = os.path.join(tmpdir, 'archive')
25
+ manifest_files = [f for f in os.listdir(archive_dir) if f.startswith('to-archive-') and f.endswith('.txt')]
26
+ assert manifest_files
27
+
28
+ @pytest.mark.parametrize('exclude_args,should_find', [
29
+ (['--exclude-file', './modules/unused1.adoc'], 'unused2.adoc'),
30
+ (['--exclude-dir', './modules'], None),
31
+ ])
32
+ def test_archive_unused_files_exclusions(exclude_args, should_find):
33
+ with tempfile.TemporaryDirectory() as tmpdir:
34
+ setup_test_fixture_archive_unused_files(tmpdir)
35
+ result = run_script(exclude_args, cwd=tmpdir)
36
+ archive_dir = os.path.join(tmpdir, 'archive')
37
+ manifest_files = [f for f in os.listdir(archive_dir) if f.startswith('to-archive-') and f.endswith('.txt')]
38
+ assert manifest_files
39
+ manifest_path = os.path.join(archive_dir, manifest_files[0])
40
+ with open(manifest_path, 'r', encoding='utf-8') as f:
41
+ manifest_content = f.read()
42
+ if should_find:
43
+ assert should_find in manifest_content
44
+ else:
45
+ # If all modules are excluded, no unused files from './modules' should be present
46
+ assert 'modules/unused1.adoc' not in manifest_content
47
+ assert 'modules/unused2.adoc' not in manifest_content
48
+
49
+ def test_archive_unused_files_commented_references():
50
+ """Test that files referenced only in commented lines are tracked correctly"""
51
+ with tempfile.TemporaryDirectory() as tmpdir:
52
+ # Create test structure
53
+ os.makedirs(os.path.join(tmpdir, 'modules'), exist_ok=True)
54
+ os.makedirs(os.path.join(tmpdir, 'assemblies'), exist_ok=True)
55
+ os.makedirs(os.path.join(tmpdir, 'archive'), exist_ok=True)
56
+
57
+ # Create files
58
+ with open(os.path.join(tmpdir, 'modules', 'commented-only.adoc'), 'w') as f:
59
+ f.write('This file is only referenced in comments.\n')
60
+ with open(os.path.join(tmpdir, 'modules', 'truly-unused.adoc'), 'w') as f:
61
+ f.write('This file has no references at all.\n')
62
+ with open(os.path.join(tmpdir, 'modules', 'used.adoc'), 'w') as f:
63
+ f.write('This file is properly used.\n')
64
+ with open(os.path.join(tmpdir, 'assemblies', 'master.adoc'), 'w') as f:
65
+ f.write('include::../modules/used.adoc[]\n')
66
+ f.write('// include::../modules/commented-only.adoc[]\n')
67
+
68
+ # Run without --commented flag
69
+ result = run_script([], cwd=tmpdir)
70
+
71
+ # Check that commented-only file is NOT in unused list (considered "used")
72
+ assert 'commented-only.adoc' not in result.stdout
73
+ # Check that truly unused file IS in the list
74
+ assert 'truly-unused.adoc' in result.stdout
75
+ # Check that report was generated
76
+ archive_dir = os.path.join(tmpdir, 'archive')
77
+ report_path = os.path.join(archive_dir, 'commented-references-report.txt')
78
+ assert os.path.exists(report_path), "Commented references report should be created"
79
+
80
+ with open(report_path, 'r') as f:
81
+ report_content = f.read()
82
+ assert 'commented-only.adoc' in report_content
83
+ assert 'master.adoc' in report_content
84
+
85
+ def test_archive_unused_files_with_commented_flag():
86
+ """Test that --commented flag includes files with commented-only references"""
87
+ with tempfile.TemporaryDirectory() as tmpdir:
88
+ # Create test structure
89
+ os.makedirs(os.path.join(tmpdir, 'modules'), exist_ok=True)
90
+ os.makedirs(os.path.join(tmpdir, 'assemblies'), exist_ok=True)
91
+ os.makedirs(os.path.join(tmpdir, 'archive'), exist_ok=True)
92
+
93
+ # Create files
94
+ with open(os.path.join(tmpdir, 'modules', 'commented-only.adoc'), 'w') as f:
95
+ f.write('This file is only referenced in comments.\n')
96
+ with open(os.path.join(tmpdir, 'modules', 'truly-unused.adoc'), 'w') as f:
97
+ f.write('This file has no references at all.\n')
98
+ with open(os.path.join(tmpdir, 'modules', 'used.adoc'), 'w') as f:
99
+ f.write('This file is properly used.\n')
100
+ with open(os.path.join(tmpdir, 'assemblies', 'master.adoc'), 'w') as f:
101
+ f.write('include::../modules/used.adoc[]\n')
102
+ f.write('// include::../modules/commented-only.adoc[]\n')
103
+
104
+ # Run WITH --commented flag
105
+ result = run_script(['--commented'], cwd=tmpdir)
106
+
107
+ # With --commented flag, both should be in unused list
108
+ assert 'commented-only.adoc' in result.stdout
109
+ assert 'truly-unused.adoc' in result.stdout
110
+ # Check that 'used.adoc' is NOT in the output (should be excluded because it has uncommented reference)
111
+ # Split by lines to avoid substring matching issues
112
+ output_lines = result.stdout.strip().split('\n')
113
+ assert not any(line.endswith('modules/used.adoc') or line == 'modules/used.adoc' for line in output_lines)
114
+
115
+ # No report should be generated when --commented is used
116
+ archive_dir = os.path.join(tmpdir, 'archive')
117
+ report_path = os.path.join(archive_dir, 'commented-references-report.txt')
118
+ # Report may exist from earlier, but shouldn't mention the file is "considered used"
119
+ # The key is that the file should be in the manifest/stdout
@@ -0,0 +1,110 @@
1
+ import os
2
+ import sys
3
+ sys.path.insert(0, os.path.dirname(__file__))
4
+ import tempfile
5
+ import shutil
6
+ import pytest
7
+ from test_fixture_archive_unused_images import setup_test_fixture
8
+ import subprocess
9
+
10
+ SCRIPT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'archive_unused_images.py'))
11
+
12
+ def run_script(args, cwd):
13
+ result = subprocess.run([sys.executable, SCRIPT] + args, cwd=cwd, capture_output=True, text=True)
14
+ return result
15
+
16
+ def test_archive_unused_images_basic():
17
+ with tempfile.TemporaryDirectory() as tmpdir:
18
+ setup_test_fixture(tmpdir)
19
+ result = run_script([], cwd=tmpdir)
20
+ # Check that unused images are found
21
+ assert 'images/unused1.png' in result.stdout
22
+ assert 'images/unused2.jpg' in result.stdout
23
+ # Check that used images are not in the output
24
+ # Split by lines to avoid substring matching issues
25
+ output_lines = result.stdout.strip().split('\n')
26
+ assert not any('images/used1.png' in line for line in output_lines)
27
+ assert not any('images/used2.jpg' in line for line in output_lines)
28
+ # Manifest file should be created in archive
29
+ archive_dir = os.path.join(tmpdir, 'archive')
30
+ manifest_files = [f for f in os.listdir(archive_dir) if f.startswith('unused-images-') and f.endswith('.txt')]
31
+ assert manifest_files
32
+
33
+ @pytest.mark.parametrize('exclude_args,should_find', [
34
+ (['--exclude-file', './images/unused1.png'], 'unused2.jpg'),
35
+ (['--exclude-dir', './images'], None),
36
+ ])
37
+ def test_archive_unused_images_exclusions(exclude_args, should_find):
38
+ with tempfile.TemporaryDirectory() as tmpdir:
39
+ setup_test_fixture(tmpdir)
40
+ result = run_script(exclude_args, cwd=tmpdir)
41
+ if should_find:
42
+ assert should_find in result.stdout
43
+ else:
44
+ # If all images are excluded, no unused images from './images' should be present
45
+ assert 'images/unused1.png' not in result.stdout
46
+ assert 'images/unused2.jpg' not in result.stdout
47
+
48
+ def test_archive_unused_images_commented_references():
49
+ """Test that images referenced only in commented lines are tracked correctly"""
50
+ with tempfile.TemporaryDirectory() as tmpdir:
51
+ # Create test structure
52
+ os.makedirs(os.path.join(tmpdir, 'images'), exist_ok=True)
53
+ os.makedirs(os.path.join(tmpdir, 'modules'), exist_ok=True)
54
+ os.makedirs(os.path.join(tmpdir, 'archive'), exist_ok=True)
55
+
56
+ # Create image files
57
+ open(os.path.join(tmpdir, 'images', 'commented-only.png'), 'w').close()
58
+ open(os.path.join(tmpdir, 'images', 'truly-unused.png'), 'w').close()
59
+ open(os.path.join(tmpdir, 'images', 'used.png'), 'w').close()
60
+
61
+ # Create AsciiDoc file with references
62
+ with open(os.path.join(tmpdir, 'modules', 'test.adoc'), 'w') as f:
63
+ f.write('image::../images/used.png[]\n')
64
+ f.write('// image::../images/commented-only.png[]\n')
65
+
66
+ # Run without --commented flag
67
+ result = run_script([], cwd=tmpdir)
68
+
69
+ # Check that commented-only image is NOT in unused list (considered "used")
70
+ assert 'commented-only.png' not in result.stdout
71
+ # Check that truly unused image IS in the list
72
+ assert 'truly-unused.png' in result.stdout
73
+ # Check that report was generated
74
+ archive_dir = os.path.join(tmpdir, 'archive')
75
+ report_path = os.path.join(archive_dir, 'commented-image-references-report.txt')
76
+ assert os.path.exists(report_path), "Commented image references report should be created"
77
+
78
+ with open(report_path, 'r') as f:
79
+ report_content = f.read()
80
+ assert 'commented-only.png' in report_content
81
+ assert 'test.adoc' in report_content
82
+
83
+ def test_archive_unused_images_with_commented_flag():
84
+ """Test that --commented flag includes images with commented-only references"""
85
+ with tempfile.TemporaryDirectory() as tmpdir:
86
+ # Create test structure
87
+ os.makedirs(os.path.join(tmpdir, 'images'), exist_ok=True)
88
+ os.makedirs(os.path.join(tmpdir, 'modules'), exist_ok=True)
89
+ os.makedirs(os.path.join(tmpdir, 'archive'), exist_ok=True)
90
+
91
+ # Create image files
92
+ open(os.path.join(tmpdir, 'images', 'commented-only.png'), 'w').close()
93
+ open(os.path.join(tmpdir, 'images', 'truly-unused.png'), 'w').close()
94
+ open(os.path.join(tmpdir, 'images', 'used.png'), 'w').close()
95
+
96
+ # Create AsciiDoc file with references
97
+ with open(os.path.join(tmpdir, 'modules', 'test.adoc'), 'w') as f:
98
+ f.write('image::../images/used.png[]\n')
99
+ f.write('// image::../images/commented-only.png[]\n')
100
+
101
+ # Run WITH --commented flag
102
+ result = run_script(['--commented'], cwd=tmpdir)
103
+
104
+ # With --commented flag, both should be in unused list
105
+ assert 'commented-only.png' in result.stdout
106
+ assert 'truly-unused.png' in result.stdout
107
+ # Check that 'used.png' is NOT in the output (should be excluded because it has uncommented reference)
108
+ # Split by lines to avoid substring matching issues
109
+ output_lines = result.stdout.strip().split('\n')
110
+ assert not any(line.endswith('images/used.png') or line == 'images/used.png' for line in output_lines)
@@ -1,120 +0,0 @@
1
- # doc_utils/unused_adoc.py
2
-
3
- import os
4
- import re
5
- from .file_utils import collect_files, write_manifest_and_archive
6
- from .topic_map_parser import detect_repo_type, get_all_topic_map_references
7
-
8
- def find_scan_directories(base_path='.', exclude_dirs=None):
9
- """
10
- Automatically find all 'modules' and 'assemblies' directories in the repository.
11
-
12
- Returns a list of paths to scan.
13
- """
14
- scan_dirs = []
15
- exclude_dirs = exclude_dirs or []
16
-
17
- for root, dirs, files in os.walk(base_path):
18
- # Skip symbolic links to prevent issues
19
- dirs[:] = [d for d in dirs if not os.path.islink(os.path.join(root, d))]
20
-
21
- # Skip excluded directories
22
- for exclude_dir in exclude_dirs:
23
- abs_exclude = os.path.abspath(exclude_dir)
24
- if os.path.abspath(root).startswith(abs_exclude):
25
- dirs[:] = [] # Don't descend into excluded directories
26
- break
27
-
28
- # Skip hidden directories and common non-content directories
29
- dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['node_modules', 'build', 'dist', 'target']]
30
-
31
- # Look for modules and assemblies directories
32
- for d in dirs:
33
- if d in ['modules', 'assemblies']:
34
- dir_path = os.path.join(root, d)
35
- # Check if this directory or any subdirectory contains .adoc files
36
- has_adoc = False
37
- for subroot, subdirs, subfiles in os.walk(dir_path):
38
- # Skip symbolic links
39
- subdirs[:] = [sd for sd in subdirs if not os.path.islink(os.path.join(subroot, sd))]
40
- if any(f.endswith('.adoc') for f in subfiles):
41
- has_adoc = True
42
- break
43
- if has_adoc:
44
- scan_dirs.append(dir_path)
45
-
46
- # Also check for modules/rn pattern if modules exists
47
- modules_dirs = [d for d in scan_dirs if os.path.basename(d) == 'modules']
48
- for modules_dir in modules_dirs:
49
- rn_dir = os.path.join(modules_dir, 'rn')
50
- if os.path.isdir(rn_dir):
51
- # Check if rn directory or subdirectories contain .adoc files
52
- has_adoc = False
53
- for subroot, subdirs, subfiles in os.walk(rn_dir):
54
- subdirs[:] = [sd for sd in subdirs if not os.path.islink(os.path.join(subroot, sd))]
55
- if any(f.endswith('.adoc') for f in subfiles):
56
- has_adoc = True
57
- break
58
- if has_adoc:
59
- scan_dirs.append(rn_dir)
60
-
61
- return scan_dirs
62
-
63
- def find_unused_adoc(scan_dirs=None, archive_dir='./archive', archive=False, exclude_dirs=None, exclude_files=None):
64
- # Print safety warning
65
- print("\n⚠️ SAFETY: Work in a git branch! Run without --archive first to preview.\n")
66
-
67
- # If no scan_dirs provided, auto-discover them
68
- if not scan_dirs:
69
- scan_dirs = find_scan_directories(exclude_dirs=exclude_dirs)
70
- if scan_dirs:
71
- print(f"Auto-discovered directories to scan:")
72
- for dir_path in sorted(scan_dirs):
73
- print(f" - {dir_path}")
74
- else:
75
- print("No 'modules' or 'assemblies' directories found containing .adoc files.")
76
- print("Please run this tool from your documentation repository root.")
77
- return
78
-
79
- # Detect repository type
80
- repo_type = detect_repo_type()
81
- print(f"Detected repository type: {repo_type}")
82
-
83
- # Collect all .adoc files in scan directories
84
- asciidoc_files = collect_files(scan_dirs, {'.adoc'}, exclude_dirs, exclude_files)
85
-
86
- # Track which files are referenced
87
- referenced_files = set()
88
-
89
- if repo_type == 'topic_map':
90
- # For OpenShift-docs style repos, get references from topic maps
91
- topic_references = get_all_topic_map_references()
92
- # Convert to basenames for comparison
93
- referenced_files.update(os.path.basename(ref) for ref in topic_references)
94
-
95
- # Always scan for include:: directives in all .adoc files
96
- include_pattern = re.compile(r'include::(.+?)\[')
97
- adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
98
-
99
- for file_path in adoc_files:
100
- try:
101
- with open(file_path, 'r', encoding='utf-8') as f:
102
- content = f.read()
103
- includes = include_pattern.findall(content)
104
- # Extract just the filename from the include path
105
- for include in includes:
106
- # Handle both relative and absolute includes
107
- include_basename = os.path.basename(include)
108
- referenced_files.add(include_basename)
109
- except Exception as e:
110
- print(f"Warning: could not read {file_path}: {e}")
111
-
112
- # Find unused files by comparing basenames
113
- unused_files = [f for f in asciidoc_files if os.path.basename(f) not in referenced_files]
114
- unused_files = list(dict.fromkeys(unused_files)) # Remove duplicates
115
-
116
- print(f"Found {len(unused_files)} unused files out of {len(asciidoc_files)} total files in scan directories")
117
-
118
- return write_manifest_and_archive(
119
- unused_files, archive_dir, 'to-archive', 'to-archive', archive=archive
120
- )
@@ -1,31 +0,0 @@
1
- # doc_utils/unused_images.py
2
-
3
- import os
4
- import re
5
- from .file_utils import collect_files, write_manifest_and_archive
6
-
7
- IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.svg'}
8
-
9
- def find_unused_images(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None):
10
- # Print safety warning
11
- print("\n⚠️ SAFETY: Work in a git branch! Run without --archive first to preview.\n")
12
-
13
- image_files = collect_files(scan_dirs, IMAGE_EXTENSIONS, exclude_dirs, exclude_files)
14
- adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
15
- referenced_images = set()
16
- image_ref_pattern = re.compile(r'(?i)image::([^\[]+)[\[]|image:([^\[]+)[\[]|"([^"\s]+\.(?:png|jpg|jpeg|gif|svg))"')
17
- for adoc_file in adoc_files:
18
- try:
19
- with open(adoc_file, 'r', encoding='utf-8') as f:
20
- content = f.read()
21
- for match in image_ref_pattern.findall(content):
22
- for group in match:
23
- if group:
24
- referenced_images.add(os.path.basename(group))
25
- except Exception as e:
26
- print(f"Warning: could not read {adoc_file}: {e}")
27
- unused_images = [f for f in image_files if os.path.basename(f) not in referenced_images]
28
- unused_images = list(dict.fromkeys(unused_images))
29
- return write_manifest_and_archive(
30
- unused_images, archive_dir, 'unused-images', 'unused-images', archive=archive
31
- )
@@ -1,47 +0,0 @@
1
- import os
2
- import sys
3
- sys.path.insert(0, os.path.dirname(__file__))
4
- import tempfile
5
- import shutil
6
- import pytest
7
- from test_fixture_archive_unused_files import setup_test_fixture_archive_unused_files
8
- import subprocess
9
-
10
- SCRIPT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'archive_unused_files.py'))
11
-
12
- def run_script(args, cwd):
13
- result = subprocess.run([sys.executable, SCRIPT] + args, cwd=cwd, capture_output=True, text=True)
14
- return result
15
-
16
- def test_archive_unused_files_basic():
17
- with tempfile.TemporaryDirectory() as tmpdir:
18
- setup_test_fixture_archive_unused_files(tmpdir)
19
- result = run_script([], cwd=tmpdir)
20
- assert 'unused1.adoc' in result.stdout
21
- assert 'unused2.adoc' in result.stdout
22
- assert 'used.adoc' not in result.stdout
23
- # Manifest file should be created in archive
24
- archive_dir = os.path.join(tmpdir, 'archive')
25
- manifest_files = [f for f in os.listdir(archive_dir) if f.startswith('to-archive-') and f.endswith('.txt')]
26
- assert manifest_files
27
-
28
- @pytest.mark.parametrize('exclude_args,should_find', [
29
- (['--exclude-file', './modules/unused1.adoc'], 'unused2.adoc'),
30
- (['--exclude-dir', './modules'], None),
31
- ])
32
- def test_archive_unused_files_exclusions(exclude_args, should_find):
33
- with tempfile.TemporaryDirectory() as tmpdir:
34
- setup_test_fixture_archive_unused_files(tmpdir)
35
- result = run_script(exclude_args, cwd=tmpdir)
36
- archive_dir = os.path.join(tmpdir, 'archive')
37
- manifest_files = [f for f in os.listdir(archive_dir) if f.startswith('to-archive-') and f.endswith('.txt')]
38
- assert manifest_files
39
- manifest_path = os.path.join(archive_dir, manifest_files[0])
40
- with open(manifest_path, 'r', encoding='utf-8') as f:
41
- manifest_content = f.read()
42
- if should_find:
43
- assert should_find in manifest_content
44
- else:
45
- # If all modules are excluded, no unused files from './modules' should be present
46
- assert 'modules/unused1.adoc' not in manifest_content
47
- assert 'modules/unused2.adoc' not in manifest_content
@@ -1,46 +0,0 @@
1
- import os
2
- import sys
3
- sys.path.insert(0, os.path.dirname(__file__))
4
- import tempfile
5
- import shutil
6
- import pytest
7
- from test_fixture_archive_unused_images import setup_test_fixture
8
- import subprocess
9
-
10
- SCRIPT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'archive_unused_images.py'))
11
-
12
- def run_script(args, cwd):
13
- result = subprocess.run([sys.executable, SCRIPT] + args, cwd=cwd, capture_output=True, text=True)
14
- return result
15
-
16
- def test_archive_unused_images_basic():
17
- with tempfile.TemporaryDirectory() as tmpdir:
18
- setup_test_fixture(tmpdir)
19
- result = run_script([], cwd=tmpdir)
20
- # Check that unused images are found
21
- assert 'images/unused1.png' in result.stdout
22
- assert 'images/unused2.jpg' in result.stdout
23
- # Check that used images are not in the output
24
- # Split by lines to avoid substring matching issues
25
- output_lines = result.stdout.strip().split('\n')
26
- assert not any('images/used1.png' in line for line in output_lines)
27
- assert not any('images/used2.jpg' in line for line in output_lines)
28
- # Manifest file should be created in archive
29
- archive_dir = os.path.join(tmpdir, 'archive')
30
- manifest_files = [f for f in os.listdir(archive_dir) if f.startswith('unused-images-') and f.endswith('.txt')]
31
- assert manifest_files
32
-
33
- @pytest.mark.parametrize('exclude_args,should_find', [
34
- (['--exclude-file', './images/unused1.png'], 'unused2.jpg'),
35
- (['--exclude-dir', './images'], None),
36
- ])
37
- def test_archive_unused_images_exclusions(exclude_args, should_find):
38
- with tempfile.TemporaryDirectory() as tmpdir:
39
- setup_test_fixture(tmpdir)
40
- result = run_script(exclude_args, cwd=tmpdir)
41
- if should_find:
42
- assert should_find in result.stdout
43
- else:
44
- # If all images are excluded, no unused images from './images' should be present
45
- assert 'images/unused1.png' not in result.stdout
46
- assert 'images/unused2.jpg' not in result.stdout