rolfedh-doc-utils 0.1.4__py3-none-any.whl → 0.1.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. archive_unused_files.py +18 -5
  2. archive_unused_images.py +9 -2
  3. callout_lib/__init__.py +22 -0
  4. callout_lib/converter_bullets.py +103 -0
  5. callout_lib/converter_comments.py +295 -0
  6. callout_lib/converter_deflist.py +134 -0
  7. callout_lib/detector.py +364 -0
  8. callout_lib/table_parser.py +804 -0
  9. check_published_links.py +1083 -0
  10. check_scannability.py +6 -0
  11. check_source_directives.py +101 -0
  12. convert_callouts_interactive.py +567 -0
  13. convert_callouts_to_deflist.py +628 -0
  14. convert_freemarker_to_asciidoc.py +288 -0
  15. convert_tables_to_deflists.py +479 -0
  16. doc_utils/convert_freemarker_to_asciidoc.py +708 -0
  17. doc_utils/duplicate_content.py +409 -0
  18. doc_utils/duplicate_includes.py +347 -0
  19. doc_utils/extract_link_attributes.py +618 -0
  20. doc_utils/format_asciidoc_spacing.py +285 -0
  21. doc_utils/insert_abstract_role.py +220 -0
  22. doc_utils/inventory_conditionals.py +164 -0
  23. doc_utils/missing_source_directive.py +211 -0
  24. doc_utils/replace_link_attributes.py +187 -0
  25. doc_utils/spinner.py +119 -0
  26. doc_utils/unused_adoc.py +150 -22
  27. doc_utils/unused_attributes.py +218 -6
  28. doc_utils/unused_images.py +81 -9
  29. doc_utils/validate_links.py +576 -0
  30. doc_utils/version.py +8 -0
  31. doc_utils/version_check.py +243 -0
  32. doc_utils/warnings_report.py +237 -0
  33. doc_utils_cli.py +158 -0
  34. extract_link_attributes.py +120 -0
  35. find_duplicate_content.py +209 -0
  36. find_duplicate_includes.py +198 -0
  37. find_unused_attributes.py +84 -6
  38. format_asciidoc_spacing.py +134 -0
  39. insert_abstract_role.py +163 -0
  40. inventory_conditionals.py +53 -0
  41. replace_link_attributes.py +214 -0
  42. rolfedh_doc_utils-0.1.41.dist-info/METADATA +246 -0
  43. rolfedh_doc_utils-0.1.41.dist-info/RECORD +52 -0
  44. {rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/WHEEL +1 -1
  45. rolfedh_doc_utils-0.1.41.dist-info/entry_points.txt +20 -0
  46. rolfedh_doc_utils-0.1.41.dist-info/top_level.txt +21 -0
  47. validate_links.py +213 -0
  48. rolfedh_doc_utils-0.1.4.dist-info/METADATA +0 -285
  49. rolfedh_doc_utils-0.1.4.dist-info/RECORD +0 -17
  50. rolfedh_doc_utils-0.1.4.dist-info/entry_points.txt +0 -5
  51. rolfedh_doc_utils-0.1.4.dist-info/top_level.txt +0 -5
  52. {rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/licenses/LICENSE +0 -0
doc_utils/unused_adoc.py CHANGED
@@ -5,49 +5,177 @@ import re
5
5
  from .file_utils import collect_files, write_manifest_and_archive
6
6
  from .topic_map_parser import detect_repo_type, get_all_topic_map_references
7
7
 
8
- def find_unused_adoc(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None):
8
+ def find_scan_directories(base_path='.', exclude_dirs=None):
9
+ """
10
+ Automatically find all 'modules' and 'assemblies' directories in the repository.
11
+
12
+ Returns a list of paths to scan.
13
+ """
14
+ scan_dirs = []
15
+ exclude_dirs = exclude_dirs or []
16
+
17
+ for root, dirs, files in os.walk(base_path):
18
+ # Skip symbolic links to prevent issues
19
+ dirs[:] = [d for d in dirs if not os.path.islink(os.path.join(root, d))]
20
+
21
+ # Skip excluded directories
22
+ for exclude_dir in exclude_dirs:
23
+ abs_exclude = os.path.abspath(exclude_dir)
24
+ if os.path.abspath(root).startswith(abs_exclude):
25
+ dirs[:] = [] # Don't descend into excluded directories
26
+ break
27
+
28
+ # Skip hidden directories and common non-content directories
29
+ dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['node_modules', 'build', 'dist', 'target']]
30
+
31
+ # Look for modules and assemblies directories
32
+ for d in dirs:
33
+ if d in ['modules', 'assemblies']:
34
+ dir_path = os.path.join(root, d)
35
+ # Check if this directory or any subdirectory contains .adoc files
36
+ has_adoc = False
37
+ for subroot, subdirs, subfiles in os.walk(dir_path):
38
+ # Skip symbolic links
39
+ subdirs[:] = [sd for sd in subdirs if not os.path.islink(os.path.join(subroot, sd))]
40
+ if any(f.endswith('.adoc') for f in subfiles):
41
+ has_adoc = True
42
+ break
43
+ if has_adoc:
44
+ scan_dirs.append(dir_path)
45
+
46
+ # Also check for modules/rn pattern if modules exists
47
+ modules_dirs = [d for d in scan_dirs if os.path.basename(d) == 'modules']
48
+ for modules_dir in modules_dirs:
49
+ rn_dir = os.path.join(modules_dir, 'rn')
50
+ if os.path.isdir(rn_dir):
51
+ # Check if rn directory or subdirectories contain .adoc files
52
+ has_adoc = False
53
+ for subroot, subdirs, subfiles in os.walk(rn_dir):
54
+ subdirs[:] = [sd for sd in subdirs if not os.path.islink(os.path.join(subroot, sd))]
55
+ if any(f.endswith('.adoc') for f in subfiles):
56
+ has_adoc = True
57
+ break
58
+ if has_adoc:
59
+ scan_dirs.append(rn_dir)
60
+
61
+ return scan_dirs
62
+
63
+ def find_unused_adoc(scan_dirs=None, archive_dir='./archive', archive=False, exclude_dirs=None, exclude_files=None, include_commented=False):
9
64
  # Print safety warning
10
65
  print("\n⚠️ SAFETY: Work in a git branch! Run without --archive first to preview.\n")
11
-
66
+
67
+ # If no scan_dirs provided, auto-discover them
68
+ if not scan_dirs:
69
+ scan_dirs = find_scan_directories(exclude_dirs=exclude_dirs)
70
+ if scan_dirs:
71
+ print(f"Auto-discovered directories to scan:")
72
+ for dir_path in sorted(scan_dirs):
73
+ print(f" - {dir_path}")
74
+ else:
75
+ print("No 'modules' or 'assemblies' directories found containing .adoc files.")
76
+ print("Please run this tool from your documentation repository root.")
77
+ return
78
+
12
79
  # Detect repository type
13
80
  repo_type = detect_repo_type()
14
81
  print(f"Detected repository type: {repo_type}")
15
-
82
+
16
83
  # Collect all .adoc files in scan directories
17
84
  asciidoc_files = collect_files(scan_dirs, {'.adoc'}, exclude_dirs, exclude_files)
18
-
19
- # Track which files are referenced
20
- referenced_files = set()
21
-
85
+
86
+ # Track which files are referenced (uncommented and commented separately)
87
+ referenced_files = set() # Files in uncommented includes
88
+ commented_only_files = {} # Files referenced ONLY in commented lines: {basename: [(file, line_num, line_text)]}
89
+
22
90
  if repo_type == 'topic_map':
23
91
  # For OpenShift-docs style repos, get references from topic maps
24
92
  topic_references = get_all_topic_map_references()
25
93
  # Convert to basenames for comparison
26
94
  referenced_files.update(os.path.basename(ref) for ref in topic_references)
27
-
28
- # Always scan for include:: directives in all .adoc files
95
+
96
+ # Patterns for finding includes (both commented and uncommented)
29
97
  include_pattern = re.compile(r'include::(.+?)\[')
98
+ commented_include_pattern = re.compile(r'^\s*//.*include::(.+?)\[')
99
+
30
100
  adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
31
-
101
+
32
102
  for file_path in adoc_files:
33
103
  try:
34
104
  with open(file_path, 'r', encoding='utf-8') as f:
35
- content = f.read()
36
- includes = include_pattern.findall(content)
37
- # Extract just the filename from the include path
38
- for include in includes:
39
- # Handle both relative and absolute includes
40
- include_basename = os.path.basename(include)
41
- referenced_files.add(include_basename)
105
+ lines = f.readlines()
106
+
107
+ for line_num, line in enumerate(lines, 1):
108
+ # Check if this is a commented include
109
+ commented_match = commented_include_pattern.search(line)
110
+ if commented_match:
111
+ include_basename = os.path.basename(commented_match.group(1))
112
+ # Track location of commented reference
113
+ if include_basename not in commented_only_files:
114
+ commented_only_files[include_basename] = []
115
+ commented_only_files[include_basename].append((file_path, line_num, line.strip()))
116
+ else:
117
+ # Check for uncommented includes
118
+ uncommented_match = include_pattern.search(line)
119
+ if uncommented_match:
120
+ include_basename = os.path.basename(uncommented_match.group(1))
121
+ referenced_files.add(include_basename)
122
+ # If we found an uncommented reference, remove from commented_only tracking
123
+ if include_basename in commented_only_files:
124
+ del commented_only_files[include_basename]
42
125
  except Exception as e:
43
126
  print(f"Warning: could not read {file_path}: {e}")
44
-
45
- # Find unused files by comparing basenames
46
- unused_files = [f for f in asciidoc_files if os.path.basename(f) not in referenced_files]
127
+
128
+ # Determine which files are unused based on the include_commented flag
129
+ if include_commented:
130
+ # When --commented is used: treat files with commented-only references as unused
131
+ # Only files with uncommented references are considered "used"
132
+ unused_files = [f for f in asciidoc_files if os.path.basename(f) not in referenced_files]
133
+ commented_only_unused = []
134
+ else:
135
+ # Default behavior: files referenced only in commented lines are considered "used"
136
+ # They should NOT be in the unused list, but we track them for reporting
137
+ all_referenced = referenced_files.union(set(commented_only_files.keys()))
138
+ unused_files = [f for f in asciidoc_files if os.path.basename(f) not in all_referenced]
139
+
140
+ # Generate list of files referenced only in comments for the report
141
+ commented_only_unused = []
142
+ for basename, references in commented_only_files.items():
143
+ # Find the full path for this basename in asciidoc_files
144
+ matching_files = [f for f in asciidoc_files if os.path.basename(f) == basename]
145
+ for f in matching_files:
146
+ commented_only_unused.append((f, references))
147
+
47
148
  unused_files = list(dict.fromkeys(unused_files)) # Remove duplicates
48
-
149
+
150
+ # Print summary
49
151
  print(f"Found {len(unused_files)} unused files out of {len(asciidoc_files)} total files in scan directories")
50
-
152
+
153
+ # Generate detailed report for commented-only references
154
+ if commented_only_unused and not include_commented:
155
+ report_path = os.path.join(archive_dir, 'commented-references-report.txt')
156
+ os.makedirs(archive_dir, exist_ok=True)
157
+
158
+ with open(report_path, 'w', encoding='utf-8') as report:
159
+ report.write("Files Referenced Only in Commented Lines\n")
160
+ report.write("=" * 70 + "\n\n")
161
+ report.write(f"Found {len(commented_only_unused)} files that are referenced only in commented-out includes.\n")
162
+ report.write("These files are considered 'used' by default and will NOT be archived.\n\n")
163
+ report.write("To archive these files along with other unused files, use the --commented flag.\n\n")
164
+ report.write("-" * 70 + "\n\n")
165
+
166
+ for file_path, references in sorted(commented_only_unused):
167
+ report.write(f"File: {file_path}\n")
168
+ report.write(f"Referenced in {len(references)} commented line(s):\n")
169
+ for ref_file, line_num, line_text in references:
170
+ report.write(f" {ref_file}:{line_num}\n")
171
+ report.write(f" {line_text}\n")
172
+ report.write("\n")
173
+
174
+ print(f"\n📋 Found {len(commented_only_unused)} files referenced only in commented lines.")
175
+ print(f" Detailed report saved to: {report_path}")
176
+ print(f" These files are considered 'used' and will NOT be archived by default.")
177
+ print(f" To include them in the archive operation, use the --commented flag.\n")
178
+
51
179
  return write_manifest_and_archive(
52
180
  unused_files, archive_dir, 'to-archive', 'to-archive', archive=archive
53
181
  )
@@ -6,19 +6,62 @@ Functions:
6
6
  - find_adoc_files: Recursively find all .adoc files in a directory (ignoring symlinks).
7
7
  - scan_for_attribute_usage: Find which attributes are used in a set of .adoc files.
8
8
  - find_unused_attributes: Main function to return unused attributes.
9
+ - find_attributes_files: Find all potential attributes files in the repository.
9
10
  """
10
11
 
11
12
  import os
12
13
  import re
13
- from typing import Set, List
14
+ from pathlib import Path
15
+ from typing import Set, List, Optional
14
16
 
15
17
  def parse_attributes_file(attr_file: str) -> Set[str]:
18
+ # AsciiDoc configuration attributes that control the processor itself
19
+ # These should be ignored as they won't appear in content
20
+ IGNORED_ATTRIBUTES = {
21
+ 'data-uri',
22
+ 'doctype',
23
+ 'experimental',
24
+ 'idprefix',
25
+ 'imagesdir',
26
+ 'includes',
27
+ 'sectanchors',
28
+ 'sectlinks',
29
+ 'source-highlighter',
30
+ 'linkattrs',
31
+ 'toclevels',
32
+ 'idseparator',
33
+ 'icons',
34
+ 'iconsdir',
35
+ 'generated-dir',
36
+ 'code-examples',
37
+ 'doc-guides',
38
+ 'doc-examples',
39
+ }
40
+
16
41
  attributes = set()
17
- with open(attr_file, 'r', encoding='utf-8') as f:
18
- for line in f:
19
- match = re.match(r'^:([\w-]+):', line.strip())
20
- if match:
21
- attributes.add(match.group(1))
42
+
43
+ # Check if file exists
44
+ if not os.path.exists(attr_file):
45
+ raise FileNotFoundError(f"Attributes file not found: {attr_file}")
46
+
47
+ # Check if it's a file (not a directory)
48
+ if not os.path.isfile(attr_file):
49
+ raise ValueError(f"Path is not a file: {attr_file}")
50
+
51
+ try:
52
+ with open(attr_file, 'r', encoding='utf-8') as f:
53
+ for line in f:
54
+ match = re.match(r'^:([\w-]+):', line.strip())
55
+ if match:
56
+ attr_name = match.group(1)
57
+ # Skip ignored configuration attributes
58
+ if attr_name not in IGNORED_ATTRIBUTES:
59
+ attributes.add(attr_name)
60
+ except PermissionError:
61
+ raise PermissionError(f"Permission denied reading file: {attr_file}")
62
+ except UnicodeDecodeError as e:
63
+ raise ValueError(f"Unable to read file (encoding issue): {attr_file}\n{str(e)}")
64
+
22
65
  return attributes
23
66
 
24
67
  def find_adoc_files(root_dir: str) -> List[str]:
@@ -33,18 +76,187 @@ def find_adoc_files(root_dir: str) -> List[str]:
33
76
 
34
77
  def scan_for_attribute_usage(adoc_files: List[str], attributes: Set[str]) -> Set[str]:
35
78
  used = set()
79
+ # Pattern for attribute references: {attribute-name}
36
80
  attr_pattern = re.compile(r'\{([\w-]+)\}')
81
+ # Patterns for conditional directives: ifdef::attr[], ifndef::attr[], endif::attr[]
82
+ conditional_pattern = re.compile(r'(?:ifdef|ifndef|endif)::([\w-]+)\[')
83
+
37
84
  for file in adoc_files:
38
85
  with open(file, 'r', encoding='utf-8') as f:
39
86
  for line in f:
87
+ # Check for {attribute} references
40
88
  for match in attr_pattern.findall(line):
41
89
  if match in attributes:
42
90
  used.add(match)
91
+ # Check for ifdef::attribute[], ifndef::attribute[], endif::attribute[]
92
+ for match in conditional_pattern.findall(line):
93
+ if match in attributes:
94
+ used.add(match)
43
95
  return used
44
96
 
97
+ def find_attributes_files(root_dir: str = '.') -> List[str]:
98
+ """Find all attributes.adoc files in the repository."""
99
+ attributes_files = []
100
+ root_path = Path(root_dir)
101
+
102
+ # Common attribute file patterns
103
+ patterns = ['**/attributes.adoc', '**/attributes*.adoc', '**/*attributes.adoc', '**/*-attributes.adoc']
104
+
105
+ for pattern in patterns:
106
+ for path in root_path.glob(pattern):
107
+ # Skip hidden directories and common build directories
108
+ parts = path.parts
109
+ if any(part.startswith('.') or part in ['target', 'build', 'node_modules', '.archive'] for part in parts):
110
+ continue
111
+ # Convert to string and avoid duplicates
112
+ str_path = str(path)
113
+ if str_path not in attributes_files:
114
+ attributes_files.append(str_path)
115
+
116
+ # Sort for consistent ordering
117
+ attributes_files.sort()
118
+ return attributes_files
119
+
120
+
121
+ def select_attributes_file(attributes_files: List[str]) -> Optional[str]:
122
+ """Interactive selection of attributes file from a list."""
123
+ if not attributes_files:
124
+ return None
125
+
126
+ if len(attributes_files) == 1:
127
+ print(f"Found attributes file: {attributes_files[0]}")
128
+ response = input("Use this file? (y/n): ").strip().lower()
129
+ if response == 'y':
130
+ return attributes_files[0]
131
+ else:
132
+ response = input("Enter the path to your attributes file: ").strip()
133
+ if os.path.exists(response) and os.path.isfile(response):
134
+ return response
135
+ else:
136
+ print(f"Error: File not found: {response}")
137
+ return None
138
+
139
+ # Multiple files found
140
+ print("\nFound multiple attributes files:")
141
+ for i, file_path in enumerate(attributes_files, 1):
142
+ print(f" {i}. {file_path}")
143
+ print(f" {len(attributes_files) + 1}. Enter custom path")
144
+
145
+ while True:
146
+ response = input(f"\nSelect option (1-{len(attributes_files) + 1}) or 'q' to quit: ").strip()
147
+ if response.lower() == 'q':
148
+ return None
149
+
150
+ try:
151
+ choice = int(response)
152
+ if 1 <= choice <= len(attributes_files):
153
+ return attributes_files[choice - 1]
154
+ elif choice == len(attributes_files) + 1:
155
+ response = input("Enter the path to your attributes file: ").strip()
156
+ if os.path.exists(response) and os.path.isfile(response):
157
+ return response
158
+ else:
159
+ print(f"Error: File not found: {response}")
160
+ else:
161
+ print(f"Invalid choice. Please enter a number between 1 and {len(attributes_files) + 1}")
162
+ except ValueError:
163
+ print("Invalid input. Please enter a number.")
164
+
165
+ return None
166
+
167
+
45
168
  def find_unused_attributes(attr_file: str, adoc_root: str = '.') -> List[str]:
46
169
  attributes = parse_attributes_file(attr_file)
47
170
  adoc_files = find_adoc_files(adoc_root)
48
171
  used = scan_for_attribute_usage(adoc_files, attributes)
49
172
  unused = sorted(attributes - used)
50
173
  return unused
174
+
175
+
176
+ def comment_out_unused_attributes(attr_file: str, unused_attrs: List[str]) -> int:
177
+ """
178
+ Comment out unused attributes in the attributes file.
179
+
180
+ Args:
181
+ attr_file: Path to the attributes file
182
+ unused_attrs: List of unused attribute names
183
+
184
+ Returns:
185
+ Number of attributes commented out
186
+ """
187
+ if not unused_attrs:
188
+ return 0
189
+
190
+ # Read the file
191
+ with open(attr_file, 'r', encoding='utf-8') as f:
192
+ lines = f.readlines()
193
+
194
+ # Create a set for faster lookup
195
+ unused_set = set(unused_attrs)
196
+ commented_count = 0
197
+
198
+ # Process each line
199
+ new_lines = []
200
+ for line in lines:
201
+ # Check if this line defines an attribute
202
+ match = re.match(r'^:([\w-]+):', line)
203
+ if match and match.group(1) in unused_set:
204
+ # Comment out this line
205
+ new_lines.append(f'// Unused {line}')
206
+ commented_count += 1
207
+ else:
208
+ new_lines.append(line)
209
+
210
+ # Write back to the file
211
+ with open(attr_file, 'w', encoding='utf-8') as f:
212
+ f.writelines(new_lines)
213
+
214
+ return commented_count
215
+
216
+
217
+ def remove_unused_attributes(attr_file: str, unused_attrs: List[str] = None) -> int:
218
+ """
219
+ Remove unused attributes from the attributes file.
220
+
221
+ This removes lines that either:
222
+ - Define an attribute in the unused_attrs list, or
223
+ - Are already marked with "// Unused" prefix
224
+
225
+ Args:
226
+ attr_file: Path to the attributes file
227
+ unused_attrs: Optional list of unused attribute names. If None, only
228
+ removes lines already marked with "// Unused".
229
+
230
+ Returns:
231
+ Number of lines removed
232
+ """
233
+ # Read the file
234
+ with open(attr_file, 'r', encoding='utf-8') as f:
235
+ lines = f.readlines()
236
+
237
+ # Create a set for faster lookup
238
+ unused_set = set(unused_attrs) if unused_attrs else set()
239
+ removed_count = 0
240
+
241
+ # Process each line
242
+ new_lines = []
243
+ for line in lines:
244
+ # Check if line is already marked as unused
245
+ if line.startswith('// Unused '):
246
+ removed_count += 1
247
+ continue
248
+
249
+ # Check if this line defines an unused attribute
250
+ if unused_attrs:
251
+ match = re.match(r'^:([\w-]+):', line)
252
+ if match and match.group(1) in unused_set:
253
+ removed_count += 1
254
+ continue
255
+
256
+ new_lines.append(line)
257
+
258
+ # Write back to the file
259
+ with open(attr_file, 'w', encoding='utf-8') as f:
260
+ f.writelines(new_lines)
261
+
262
+ return removed_count
@@ -6,26 +6,98 @@ from .file_utils import collect_files, write_manifest_and_archive
6
6
 
7
7
  IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.svg'}
8
8
 
9
- def find_unused_images(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None):
9
+ def find_unused_images(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None, include_commented=False):
10
10
  # Print safety warning
11
11
  print("\n⚠️ SAFETY: Work in a git branch! Run without --archive first to preview.\n")
12
-
12
+
13
13
  image_files = collect_files(scan_dirs, IMAGE_EXTENSIONS, exclude_dirs, exclude_files)
14
14
  adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
15
- referenced_images = set()
15
+
16
+ # Track which images are referenced (uncommented and commented separately)
17
+ referenced_images = set() # Images in uncommented references
18
+ commented_only_images = {} # Images referenced ONLY in commented lines: {basename: [(file, line_num, line_text)]}
19
+
20
+ # Patterns for finding image references (both commented and uncommented)
16
21
  image_ref_pattern = re.compile(r'(?i)image::([^\[]+)[\[]|image:([^\[]+)[\[]|"([^"\s]+\.(?:png|jpg|jpeg|gif|svg))"')
22
+ commented_line_pattern = re.compile(r'^\s*//')
23
+
17
24
  for adoc_file in adoc_files:
18
25
  try:
19
26
  with open(adoc_file, 'r', encoding='utf-8') as f:
20
- content = f.read()
21
- for match in image_ref_pattern.findall(content):
22
- for group in match:
23
- if group:
24
- referenced_images.add(os.path.basename(group))
27
+ lines = f.readlines()
28
+
29
+ for line_num, line in enumerate(lines, 1):
30
+ # Check if this line is commented
31
+ is_commented = commented_line_pattern.match(line)
32
+
33
+ # Find all image references in the line
34
+ for match in image_ref_pattern.findall(line):
35
+ for group in match:
36
+ if group:
37
+ image_basename = os.path.basename(group)
38
+
39
+ if is_commented:
40
+ # Track location of commented reference
41
+ if image_basename not in commented_only_images:
42
+ commented_only_images[image_basename] = []
43
+ commented_only_images[image_basename].append((adoc_file, line_num, line.strip()))
44
+ else:
45
+ # Add to uncommented references
46
+ referenced_images.add(image_basename)
47
+ # If we found an uncommented reference, remove from commented_only tracking
48
+ if image_basename in commented_only_images:
49
+ del commented_only_images[image_basename]
25
50
  except Exception as e:
26
51
  print(f"Warning: could not read {adoc_file}: {e}")
27
- unused_images = [f for f in image_files if os.path.basename(f) not in referenced_images]
52
+
53
+ # Determine which images are unused based on the include_commented flag
54
+ if include_commented:
55
+ # When --commented is used: treat images with commented-only references as unused
56
+ # Only images with uncommented references are considered "used"
57
+ unused_images = [f for f in image_files if os.path.basename(f) not in referenced_images]
58
+ commented_only_unused = []
59
+ else:
60
+ # Default behavior: images referenced only in commented lines are considered "used"
61
+ # They should NOT be in the unused list, but we track them for reporting
62
+ all_referenced = referenced_images.union(set(commented_only_images.keys()))
63
+ unused_images = [f for f in image_files if os.path.basename(f) not in all_referenced]
64
+
65
+ # Generate list of images referenced only in comments for the report
66
+ commented_only_unused = []
67
+ for basename, references in commented_only_images.items():
68
+ # Find the full path for this basename in image_files
69
+ matching_files = [f for f in image_files if os.path.basename(f) == basename]
70
+ for f in matching_files:
71
+ commented_only_unused.append((f, references))
72
+
28
73
  unused_images = list(dict.fromkeys(unused_images))
74
+
75
+ # Generate detailed report for commented-only references
76
+ if commented_only_unused and not include_commented:
77
+ report_path = os.path.join(archive_dir, 'commented-image-references-report.txt')
78
+ os.makedirs(archive_dir, exist_ok=True)
79
+
80
+ with open(report_path, 'w', encoding='utf-8') as report:
81
+ report.write("Images Referenced Only in Commented Lines\n")
82
+ report.write("=" * 70 + "\n\n")
83
+ report.write(f"Found {len(commented_only_unused)} images that are referenced only in commented-out lines.\n")
84
+ report.write("These images are considered 'used' by default and will NOT be archived.\n\n")
85
+ report.write("To archive these images along with other unused images, use the --commented flag.\n\n")
86
+ report.write("-" * 70 + "\n\n")
87
+
88
+ for file_path, references in sorted(commented_only_unused):
89
+ report.write(f"Image: {file_path}\n")
90
+ report.write(f"Referenced in {len(references)} commented line(s):\n")
91
+ for ref_file, line_num, line_text in references:
92
+ report.write(f" {ref_file}:{line_num}\n")
93
+ report.write(f" {line_text}\n")
94
+ report.write("\n")
95
+
96
+ print(f"\n📋 Found {len(commented_only_unused)} images referenced only in commented lines.")
97
+ print(f" Detailed report saved to: {report_path}")
98
+ print(f" These images are considered 'used' and will NOT be archived by default.")
99
+ print(f" To include them in the archive operation, use the --commented flag.\n")
100
+
29
101
  return write_manifest_and_archive(
30
102
  unused_images, archive_dir, 'unused-images', 'unused-images', archive=archive
31
103
  )