rolfedh-doc-utils 0.1.34__py3-none-any.whl → 0.1.35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
archive_unused_files.py CHANGED
@@ -22,6 +22,7 @@ def main():
22
22
  epilog='By default, automatically discovers all modules and assemblies directories in the repository.'
23
23
  )
24
24
  parser.add_argument('--archive', action='store_true', help='Move the files to a dated zip in the archive directory.')
25
+ parser.add_argument('--commented', action='store_true', help='Include files that are referenced only in commented lines in the archive operation.')
25
26
  parser.add_argument('--scan-dir', action='append', default=[], help='Specific directory to scan (can be used multiple times). If not specified, auto-discovers directories.')
26
27
  parser.add_argument('--exclude-dir', action='append', default=[], help='Directory to exclude (can be used multiple times).')
27
28
  parser.add_argument('--exclude-file', action='append', default=[], help='File to exclude (can be used multiple times).')
@@ -35,13 +36,13 @@ def main():
35
36
 
36
37
  exclude_dirs = list(args.exclude_dir)
37
38
  exclude_files = list(args.exclude_file)
38
-
39
+
39
40
  if args.exclude_list:
40
41
  list_dirs, list_files = parse_exclude_list_file(args.exclude_list)
41
42
  exclude_dirs.extend(list_dirs)
42
43
  exclude_files.extend(list_files)
43
44
 
44
- find_unused_adoc(scan_dirs, archive_dir, args.archive, exclude_dirs, exclude_files)
45
+ find_unused_adoc(scan_dirs, archive_dir, args.archive, exclude_dirs, exclude_files, args.commented)
45
46
 
46
47
  if __name__ == '__main__':
47
48
  main()
archive_unused_images.py CHANGED
@@ -18,6 +18,7 @@ def main():
18
18
  check_version_on_startup()
19
19
  parser = argparse.ArgumentParser(description='Archive unused image files.')
20
20
  parser.add_argument('--archive', action='store_true', help='Move the files to a dated zip in the archive directory.')
21
+ parser.add_argument('--commented', action='store_true', help='Include images that are referenced only in commented lines in the archive operation.')
21
22
  parser.add_argument('--exclude-dir', action='append', default=[], help='Directory to exclude (can be used multiple times).')
22
23
  parser.add_argument('--exclude-file', action='append', default=[], help='File to exclude (can be used multiple times).')
23
24
  parser.add_argument('--exclude-list', type=str, help='Path to a file containing directories or files to exclude, one per line.')
@@ -29,13 +30,13 @@ def main():
29
30
 
30
31
  exclude_dirs = list(args.exclude_dir)
31
32
  exclude_files = list(args.exclude_file)
32
-
33
+
33
34
  if args.exclude_list:
34
35
  list_dirs, list_files = parse_exclude_list_file(args.exclude_list)
35
36
  exclude_dirs.extend(list_dirs)
36
37
  exclude_files.extend(list_files)
37
38
 
38
- find_unused_images(scan_dirs, archive_dir, args.archive, exclude_dirs, exclude_files)
39
+ find_unused_images(scan_dirs, archive_dir, args.archive, exclude_dirs, exclude_files, args.commented)
39
40
 
40
41
  if __name__ == '__main__':
41
42
  main()
doc_utils/unused_adoc.py CHANGED
@@ -60,10 +60,10 @@ def find_scan_directories(base_path='.', exclude_dirs=None):
60
60
 
61
61
  return scan_dirs
62
62
 
63
- def find_unused_adoc(scan_dirs=None, archive_dir='./archive', archive=False, exclude_dirs=None, exclude_files=None):
63
+ def find_unused_adoc(scan_dirs=None, archive_dir='./archive', archive=False, exclude_dirs=None, exclude_files=None, include_commented=False):
64
64
  # Print safety warning
65
65
  print("\n⚠️ SAFETY: Work in a git branch! Run without --archive first to preview.\n")
66
-
66
+
67
67
  # If no scan_dirs provided, auto-discover them
68
68
  if not scan_dirs:
69
69
  scan_dirs = find_scan_directories(exclude_dirs=exclude_dirs)
@@ -75,46 +75,107 @@ def find_unused_adoc(scan_dirs=None, archive_dir='./archive', archive=False, exc
75
75
  print("No 'modules' or 'assemblies' directories found containing .adoc files.")
76
76
  print("Please run this tool from your documentation repository root.")
77
77
  return
78
-
78
+
79
79
  # Detect repository type
80
80
  repo_type = detect_repo_type()
81
81
  print(f"Detected repository type: {repo_type}")
82
-
82
+
83
83
  # Collect all .adoc files in scan directories
84
84
  asciidoc_files = collect_files(scan_dirs, {'.adoc'}, exclude_dirs, exclude_files)
85
-
86
- # Track which files are referenced
87
- referenced_files = set()
88
-
85
+
86
+ # Track which files are referenced (uncommented and commented separately)
87
+ referenced_files = set() # Files in uncommented includes
88
+ commented_only_files = {} # Files referenced ONLY in commented lines: {basename: [(file, line_num, line_text)]}
89
+
89
90
  if repo_type == 'topic_map':
90
91
  # For OpenShift-docs style repos, get references from topic maps
91
92
  topic_references = get_all_topic_map_references()
92
93
  # Convert to basenames for comparison
93
94
  referenced_files.update(os.path.basename(ref) for ref in topic_references)
94
-
95
- # Always scan for include:: directives in all .adoc files
95
+
96
+ # Patterns for finding includes (both commented and uncommented)
96
97
  include_pattern = re.compile(r'include::(.+?)\[')
98
+ commented_include_pattern = re.compile(r'^\s*//.*include::(.+?)\[')
99
+
97
100
  adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
98
-
101
+
99
102
  for file_path in adoc_files:
100
103
  try:
101
104
  with open(file_path, 'r', encoding='utf-8') as f:
102
- content = f.read()
103
- includes = include_pattern.findall(content)
104
- # Extract just the filename from the include path
105
- for include in includes:
106
- # Handle both relative and absolute includes
107
- include_basename = os.path.basename(include)
108
- referenced_files.add(include_basename)
105
+ lines = f.readlines()
106
+
107
+ for line_num, line in enumerate(lines, 1):
108
+ # Check if this is a commented include
109
+ commented_match = commented_include_pattern.search(line)
110
+ if commented_match:
111
+ include_basename = os.path.basename(commented_match.group(1))
112
+ # Track location of commented reference
113
+ if include_basename not in commented_only_files:
114
+ commented_only_files[include_basename] = []
115
+ commented_only_files[include_basename].append((file_path, line_num, line.strip()))
116
+ else:
117
+ # Check for uncommented includes
118
+ uncommented_match = include_pattern.search(line)
119
+ if uncommented_match:
120
+ include_basename = os.path.basename(uncommented_match.group(1))
121
+ referenced_files.add(include_basename)
122
+ # If we found an uncommented reference, remove from commented_only tracking
123
+ if include_basename in commented_only_files:
124
+ del commented_only_files[include_basename]
109
125
  except Exception as e:
110
126
  print(f"Warning: could not read {file_path}: {e}")
111
-
112
- # Find unused files by comparing basenames
113
- unused_files = [f for f in asciidoc_files if os.path.basename(f) not in referenced_files]
127
+
128
+ # Determine which files are unused based on the include_commented flag
129
+ if include_commented:
130
+ # When --commented is used: treat files with commented-only references as unused
131
+ # Only files with uncommented references are considered "used"
132
+ unused_files = [f for f in asciidoc_files if os.path.basename(f) not in referenced_files]
133
+ commented_only_unused = []
134
+ else:
135
+ # Default behavior: files referenced only in commented lines are considered "used"
136
+ # They should NOT be in the unused list, but we track them for reporting
137
+ all_referenced = referenced_files.union(set(commented_only_files.keys()))
138
+ unused_files = [f for f in asciidoc_files if os.path.basename(f) not in all_referenced]
139
+
140
+ # Generate list of files referenced only in comments for the report
141
+ commented_only_unused = []
142
+ for basename, references in commented_only_files.items():
143
+ # Find the full path for this basename in asciidoc_files
144
+ matching_files = [f for f in asciidoc_files if os.path.basename(f) == basename]
145
+ for f in matching_files:
146
+ commented_only_unused.append((f, references))
147
+
114
148
  unused_files = list(dict.fromkeys(unused_files)) # Remove duplicates
115
-
149
+
150
+ # Print summary
116
151
  print(f"Found {len(unused_files)} unused files out of {len(asciidoc_files)} total files in scan directories")
117
-
152
+
153
+ # Generate detailed report for commented-only references
154
+ if commented_only_unused and not include_commented:
155
+ report_path = os.path.join(archive_dir, 'commented-references-report.txt')
156
+ os.makedirs(archive_dir, exist_ok=True)
157
+
158
+ with open(report_path, 'w', encoding='utf-8') as report:
159
+ report.write("Files Referenced Only in Commented Lines\n")
160
+ report.write("=" * 70 + "\n\n")
161
+ report.write(f"Found {len(commented_only_unused)} files that are referenced only in commented-out includes.\n")
162
+ report.write("These files are considered 'used' by default and will NOT be archived.\n\n")
163
+ report.write("To archive these files along with other unused files, use the --commented flag.\n\n")
164
+ report.write("-" * 70 + "\n\n")
165
+
166
+ for file_path, references in sorted(commented_only_unused):
167
+ report.write(f"File: {file_path}\n")
168
+ report.write(f"Referenced in {len(references)} commented line(s):\n")
169
+ for ref_file, line_num, line_text in references:
170
+ report.write(f" {ref_file}:{line_num}\n")
171
+ report.write(f" {line_text}\n")
172
+ report.write("\n")
173
+
174
+ print(f"\n📋 Found {len(commented_only_unused)} files referenced only in commented lines.")
175
+ print(f" Detailed report saved to: {report_path}")
176
+ print(f" These files are considered 'used' and will NOT be archived by default.")
177
+ print(f" To include them in the archive operation, use the --commented flag.\n")
178
+
118
179
  return write_manifest_and_archive(
119
180
  unused_files, archive_dir, 'to-archive', 'to-archive', archive=archive
120
181
  )
@@ -6,26 +6,98 @@ from .file_utils import collect_files, write_manifest_and_archive
6
6
 
7
7
  IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.svg'}
8
8
 
9
- def find_unused_images(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None):
9
+ def find_unused_images(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None, include_commented=False):
10
10
  # Print safety warning
11
11
  print("\n⚠️ SAFETY: Work in a git branch! Run without --archive first to preview.\n")
12
-
12
+
13
13
  image_files = collect_files(scan_dirs, IMAGE_EXTENSIONS, exclude_dirs, exclude_files)
14
14
  adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
15
- referenced_images = set()
15
+
16
+ # Track which images are referenced (uncommented and commented separately)
17
+ referenced_images = set() # Images in uncommented references
18
+ commented_only_images = {} # Images referenced ONLY in commented lines: {basename: [(file, line_num, line_text)]}
19
+
20
+ # Patterns for finding image references (both commented and uncommented)
16
21
  image_ref_pattern = re.compile(r'(?i)image::([^\[]+)[\[]|image:([^\[]+)[\[]|"([^"\s]+\.(?:png|jpg|jpeg|gif|svg))"')
22
+ commented_line_pattern = re.compile(r'^\s*//')
23
+
17
24
  for adoc_file in adoc_files:
18
25
  try:
19
26
  with open(adoc_file, 'r', encoding='utf-8') as f:
20
- content = f.read()
21
- for match in image_ref_pattern.findall(content):
22
- for group in match:
23
- if group:
24
- referenced_images.add(os.path.basename(group))
27
+ lines = f.readlines()
28
+
29
+ for line_num, line in enumerate(lines, 1):
30
+ # Check if this line is commented
31
+ is_commented = commented_line_pattern.match(line)
32
+
33
+ # Find all image references in the line
34
+ for match in image_ref_pattern.findall(line):
35
+ for group in match:
36
+ if group:
37
+ image_basename = os.path.basename(group)
38
+
39
+ if is_commented:
40
+ # Track location of commented reference
41
+ if image_basename not in commented_only_images:
42
+ commented_only_images[image_basename] = []
43
+ commented_only_images[image_basename].append((adoc_file, line_num, line.strip()))
44
+ else:
45
+ # Add to uncommented references
46
+ referenced_images.add(image_basename)
47
+ # If we found an uncommented reference, remove from commented_only tracking
48
+ if image_basename in commented_only_images:
49
+ del commented_only_images[image_basename]
25
50
  except Exception as e:
26
51
  print(f"Warning: could not read {adoc_file}: {e}")
27
- unused_images = [f for f in image_files if os.path.basename(f) not in referenced_images]
52
+
53
+ # Determine which images are unused based on the include_commented flag
54
+ if include_commented:
55
+ # When --commented is used: treat images with commented-only references as unused
56
+ # Only images with uncommented references are considered "used"
57
+ unused_images = [f for f in image_files if os.path.basename(f) not in referenced_images]
58
+ commented_only_unused = []
59
+ else:
60
+ # Default behavior: images referenced only in commented lines are considered "used"
61
+ # They should NOT be in the unused list, but we track them for reporting
62
+ all_referenced = referenced_images.union(set(commented_only_images.keys()))
63
+ unused_images = [f for f in image_files if os.path.basename(f) not in all_referenced]
64
+
65
+ # Generate list of images referenced only in comments for the report
66
+ commented_only_unused = []
67
+ for basename, references in commented_only_images.items():
68
+ # Find the full path for this basename in image_files
69
+ matching_files = [f for f in image_files if os.path.basename(f) == basename]
70
+ for f in matching_files:
71
+ commented_only_unused.append((f, references))
72
+
28
73
  unused_images = list(dict.fromkeys(unused_images))
74
+
75
+ # Generate detailed report for commented-only references
76
+ if commented_only_unused and not include_commented:
77
+ report_path = os.path.join(archive_dir, 'commented-image-references-report.txt')
78
+ os.makedirs(archive_dir, exist_ok=True)
79
+
80
+ with open(report_path, 'w', encoding='utf-8') as report:
81
+ report.write("Images Referenced Only in Commented Lines\n")
82
+ report.write("=" * 70 + "\n\n")
83
+ report.write(f"Found {len(commented_only_unused)} images that are referenced only in commented-out lines.\n")
84
+ report.write("These images are considered 'used' by default and will NOT be archived.\n\n")
85
+ report.write("To archive these images along with other unused images, use the --commented flag.\n\n")
86
+ report.write("-" * 70 + "\n\n")
87
+
88
+ for file_path, references in sorted(commented_only_unused):
89
+ report.write(f"Image: {file_path}\n")
90
+ report.write(f"Referenced in {len(references)} commented line(s):\n")
91
+ for ref_file, line_num, line_text in references:
92
+ report.write(f" {ref_file}:{line_num}\n")
93
+ report.write(f" {line_text}\n")
94
+ report.write("\n")
95
+
96
+ print(f"\n📋 Found {len(commented_only_unused)} images referenced only in commented lines.")
97
+ print(f" Detailed report saved to: {report_path}")
98
+ print(f" These images are considered 'used' and will NOT be archived by default.")
99
+ print(f" To include them in the archive operation, use the --commented flag.\n")
100
+
29
101
  return write_manifest_and_archive(
30
102
  unused_images, archive_dir, 'unused-images', 'unused-images', archive=archive
31
103
  )
doc_utils/version.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """Version information for doc-utils."""
2
2
 
3
3
  # This should match the version in pyproject.toml
4
- __version__ = "0.1.34"
4
+ __version__ = "0.1.35"
5
5
 
6
6
  def get_version():
7
7
  """Return the current version string."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rolfedh-doc-utils
3
- Version: 0.1.34
3
+ Version: 0.1.35
4
4
  Summary: CLI tools for AsciiDoc documentation projects
5
5
  Author: Rolfe Dlugy-Hegwer
6
6
  License: MIT License
@@ -1,5 +1,5 @@
1
- archive_unused_files.py,sha256=OJZrkqn70hiOXED218jMYPFNFWnsDpjsCYOmBRxYnHU,2274
2
- archive_unused_images.py,sha256=fZeyEZtTd72Gbd3YBXTy5xoshAAM9qb4qFPMjhHL1Fg,1864
1
+ archive_unused_files.py,sha256=YKYPtuBHEZcsyQSwSYxSYvw9v9Mh6Of8MqT53A5bM44,2438
2
+ archive_unused_images.py,sha256=EvPhMIwp6_AHKtuNYQ663q6biXBeXaqf88NzWrhvtIE,2029
3
3
  check_scannability.py,sha256=O6ROr-e624jVPvPpASpsWo0gTfuCFpA2mTSX61BjAEI,5478
4
4
  convert_callouts_interactive.py,sha256=4PjiVIOWxNJiJLQuBHT3x6rE46-hgfFHSaoo5quYIs8,22889
5
5
  convert_callouts_to_deflist.py,sha256=BoqW5_GkQ-KqNzn4vmE6lsQosrPV0lkB-bfAx3dzyMw,25886
@@ -23,16 +23,16 @@ doc_utils/replace_link_attributes.py,sha256=gmAs68_njBqEz-Qni-UGgeYEDTMxlTWk_IOm
23
23
  doc_utils/scannability.py,sha256=XwlmHqDs69p_V36X7DLjPTy0DUoLszSGqYjJ9wE-3hg,982
24
24
  doc_utils/spinner.py,sha256=lJg15qzODiKoR0G6uFIk2BdVNgn9jFexoTRUMrjiWvk,3554
25
25
  doc_utils/topic_map_parser.py,sha256=tKcIO1m9r2K6dvPRGue58zqMr0O2zKU1gnZMzEE3U6o,4571
26
- doc_utils/unused_adoc.py,sha256=2cbqcYr1os2EhETUU928BlPRlsZVSdI00qaMhqjSIqQ,5263
26
+ doc_utils/unused_adoc.py,sha256=LPQWPGEOizXECxepk7E_5cjTVvKn6RXQYTWG97Ps5VQ,9077
27
27
  doc_utils/unused_attributes.py,sha256=OHyAdaBD7aNo357B0SLBN5NC_jNY5TWXMwgtfJNh3X8,7621
28
- doc_utils/unused_images.py,sha256=nqn36Bbrmon2KlGlcaruNjJJvTQ8_9H0WU9GvCW7rW8,1456
28
+ doc_utils/unused_images.py,sha256=hL8Qrik9QCkVh54eBLuNczRS9tMnsqIEfavNamM1UeQ,5664
29
29
  doc_utils/validate_links.py,sha256=iBGXnwdeLlgIT3fo3v01ApT5k0X2FtctsvkrE6E3VMk,19610
30
- doc_utils/version.py,sha256=LpXe7kXo5uNMJOga179IYdU101aWLSTOnciZkUlrK0E,203
30
+ doc_utils/version.py,sha256=rh_oI-y8ZbMOpFC_a1Qkwb1Tq4FPWdm-vTVRUBqtHGU,203
31
31
  doc_utils/version_check.py,sha256=-31Y6AN0KGi_CUCAVOOhf6bPO3r7SQIXPxxeffLAF0w,7535
32
32
  doc_utils/warnings_report.py,sha256=20yfwqBjOprfFhQwCujbcsvjJCbHHhmH84uAujm-y-o,8877
33
- rolfedh_doc_utils-0.1.34.dist-info/licenses/LICENSE,sha256=vLxtwMVOJA_hEy8b77niTkdmQI9kNJskXHq0dBS36e0,1075
34
- rolfedh_doc_utils-0.1.34.dist-info/METADATA,sha256=uDcruRVK6RPRkZtBtM5DsH9FZ5q9LXEf8hEqOsg3mig,8325
35
- rolfedh_doc_utils-0.1.34.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
36
- rolfedh_doc_utils-0.1.34.dist-info/entry_points.txt,sha256=vL_LlLKOiurRzchrq8iRUQG19Xi9lSAFVZGjO-xyErk,577
37
- rolfedh_doc_utils-0.1.34.dist-info/top_level.txt,sha256=J4xtr3zoyCip27b3GnticFVZoyz5HHtgGqHQ-SZONCA,265
38
- rolfedh_doc_utils-0.1.34.dist-info/RECORD,,
33
+ rolfedh_doc_utils-0.1.35.dist-info/licenses/LICENSE,sha256=vLxtwMVOJA_hEy8b77niTkdmQI9kNJskXHq0dBS36e0,1075
34
+ rolfedh_doc_utils-0.1.35.dist-info/METADATA,sha256=hsWjS4apZYXM4Qk38o3KOhMBEaqe55gc9e5QGZqAYEc,8325
35
+ rolfedh_doc_utils-0.1.35.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
36
+ rolfedh_doc_utils-0.1.35.dist-info/entry_points.txt,sha256=vL_LlLKOiurRzchrq8iRUQG19Xi9lSAFVZGjO-xyErk,577
37
+ rolfedh_doc_utils-0.1.35.dist-info/top_level.txt,sha256=J4xtr3zoyCip27b3GnticFVZoyz5HHtgGqHQ-SZONCA,265
38
+ rolfedh_doc_utils-0.1.35.dist-info/RECORD,,