rolfedh-doc-utils 0.1.4__py3-none-any.whl → 0.1.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- archive_unused_files.py +18 -5
- archive_unused_images.py +9 -2
- callout_lib/__init__.py +22 -0
- callout_lib/converter_bullets.py +103 -0
- callout_lib/converter_comments.py +295 -0
- callout_lib/converter_deflist.py +134 -0
- callout_lib/detector.py +364 -0
- callout_lib/table_parser.py +804 -0
- check_published_links.py +1083 -0
- check_scannability.py +6 -0
- check_source_directives.py +101 -0
- convert_callouts_interactive.py +567 -0
- convert_callouts_to_deflist.py +628 -0
- convert_freemarker_to_asciidoc.py +288 -0
- convert_tables_to_deflists.py +479 -0
- doc_utils/convert_freemarker_to_asciidoc.py +708 -0
- doc_utils/duplicate_content.py +409 -0
- doc_utils/duplicate_includes.py +347 -0
- doc_utils/extract_link_attributes.py +618 -0
- doc_utils/format_asciidoc_spacing.py +285 -0
- doc_utils/insert_abstract_role.py +220 -0
- doc_utils/inventory_conditionals.py +164 -0
- doc_utils/missing_source_directive.py +211 -0
- doc_utils/replace_link_attributes.py +187 -0
- doc_utils/spinner.py +119 -0
- doc_utils/unused_adoc.py +150 -22
- doc_utils/unused_attributes.py +218 -6
- doc_utils/unused_images.py +81 -9
- doc_utils/validate_links.py +576 -0
- doc_utils/version.py +8 -0
- doc_utils/version_check.py +243 -0
- doc_utils/warnings_report.py +237 -0
- doc_utils_cli.py +158 -0
- extract_link_attributes.py +120 -0
- find_duplicate_content.py +209 -0
- find_duplicate_includes.py +198 -0
- find_unused_attributes.py +84 -6
- format_asciidoc_spacing.py +134 -0
- insert_abstract_role.py +163 -0
- inventory_conditionals.py +53 -0
- replace_link_attributes.py +214 -0
- rolfedh_doc_utils-0.1.41.dist-info/METADATA +246 -0
- rolfedh_doc_utils-0.1.41.dist-info/RECORD +52 -0
- {rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/WHEEL +1 -1
- rolfedh_doc_utils-0.1.41.dist-info/entry_points.txt +20 -0
- rolfedh_doc_utils-0.1.41.dist-info/top_level.txt +21 -0
- validate_links.py +213 -0
- rolfedh_doc_utils-0.1.4.dist-info/METADATA +0 -285
- rolfedh_doc_utils-0.1.4.dist-info/RECORD +0 -17
- rolfedh_doc_utils-0.1.4.dist-info/entry_points.txt +0 -5
- rolfedh_doc_utils-0.1.4.dist-info/top_level.txt +0 -5
- {rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/licenses/LICENSE +0 -0
doc_utils/unused_adoc.py
CHANGED
|
@@ -5,49 +5,177 @@ import re
|
|
|
5
5
|
from .file_utils import collect_files, write_manifest_and_archive
|
|
6
6
|
from .topic_map_parser import detect_repo_type, get_all_topic_map_references
|
|
7
7
|
|
|
8
|
-
def
|
|
8
|
+
def find_scan_directories(base_path='.', exclude_dirs=None):
|
|
9
|
+
"""
|
|
10
|
+
Automatically find all 'modules' and 'assemblies' directories in the repository.
|
|
11
|
+
|
|
12
|
+
Returns a list of paths to scan.
|
|
13
|
+
"""
|
|
14
|
+
scan_dirs = []
|
|
15
|
+
exclude_dirs = exclude_dirs or []
|
|
16
|
+
|
|
17
|
+
for root, dirs, files in os.walk(base_path):
|
|
18
|
+
# Skip symbolic links to prevent issues
|
|
19
|
+
dirs[:] = [d for d in dirs if not os.path.islink(os.path.join(root, d))]
|
|
20
|
+
|
|
21
|
+
# Skip excluded directories
|
|
22
|
+
for exclude_dir in exclude_dirs:
|
|
23
|
+
abs_exclude = os.path.abspath(exclude_dir)
|
|
24
|
+
if os.path.abspath(root).startswith(abs_exclude):
|
|
25
|
+
dirs[:] = [] # Don't descend into excluded directories
|
|
26
|
+
break
|
|
27
|
+
|
|
28
|
+
# Skip hidden directories and common non-content directories
|
|
29
|
+
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['node_modules', 'build', 'dist', 'target']]
|
|
30
|
+
|
|
31
|
+
# Look for modules and assemblies directories
|
|
32
|
+
for d in dirs:
|
|
33
|
+
if d in ['modules', 'assemblies']:
|
|
34
|
+
dir_path = os.path.join(root, d)
|
|
35
|
+
# Check if this directory or any subdirectory contains .adoc files
|
|
36
|
+
has_adoc = False
|
|
37
|
+
for subroot, subdirs, subfiles in os.walk(dir_path):
|
|
38
|
+
# Skip symbolic links
|
|
39
|
+
subdirs[:] = [sd for sd in subdirs if not os.path.islink(os.path.join(subroot, sd))]
|
|
40
|
+
if any(f.endswith('.adoc') for f in subfiles):
|
|
41
|
+
has_adoc = True
|
|
42
|
+
break
|
|
43
|
+
if has_adoc:
|
|
44
|
+
scan_dirs.append(dir_path)
|
|
45
|
+
|
|
46
|
+
# Also check for modules/rn pattern if modules exists
|
|
47
|
+
modules_dirs = [d for d in scan_dirs if os.path.basename(d) == 'modules']
|
|
48
|
+
for modules_dir in modules_dirs:
|
|
49
|
+
rn_dir = os.path.join(modules_dir, 'rn')
|
|
50
|
+
if os.path.isdir(rn_dir):
|
|
51
|
+
# Check if rn directory or subdirectories contain .adoc files
|
|
52
|
+
has_adoc = False
|
|
53
|
+
for subroot, subdirs, subfiles in os.walk(rn_dir):
|
|
54
|
+
subdirs[:] = [sd for sd in subdirs if not os.path.islink(os.path.join(subroot, sd))]
|
|
55
|
+
if any(f.endswith('.adoc') for f in subfiles):
|
|
56
|
+
has_adoc = True
|
|
57
|
+
break
|
|
58
|
+
if has_adoc:
|
|
59
|
+
scan_dirs.append(rn_dir)
|
|
60
|
+
|
|
61
|
+
return scan_dirs
|
|
62
|
+
|
|
63
|
+
def find_unused_adoc(scan_dirs=None, archive_dir='./archive', archive=False, exclude_dirs=None, exclude_files=None, include_commented=False):
|
|
9
64
|
# Print safety warning
|
|
10
65
|
print("\n⚠️ SAFETY: Work in a git branch! Run without --archive first to preview.\n")
|
|
11
|
-
|
|
66
|
+
|
|
67
|
+
# If no scan_dirs provided, auto-discover them
|
|
68
|
+
if not scan_dirs:
|
|
69
|
+
scan_dirs = find_scan_directories(exclude_dirs=exclude_dirs)
|
|
70
|
+
if scan_dirs:
|
|
71
|
+
print(f"Auto-discovered directories to scan:")
|
|
72
|
+
for dir_path in sorted(scan_dirs):
|
|
73
|
+
print(f" - {dir_path}")
|
|
74
|
+
else:
|
|
75
|
+
print("No 'modules' or 'assemblies' directories found containing .adoc files.")
|
|
76
|
+
print("Please run this tool from your documentation repository root.")
|
|
77
|
+
return
|
|
78
|
+
|
|
12
79
|
# Detect repository type
|
|
13
80
|
repo_type = detect_repo_type()
|
|
14
81
|
print(f"Detected repository type: {repo_type}")
|
|
15
|
-
|
|
82
|
+
|
|
16
83
|
# Collect all .adoc files in scan directories
|
|
17
84
|
asciidoc_files = collect_files(scan_dirs, {'.adoc'}, exclude_dirs, exclude_files)
|
|
18
|
-
|
|
19
|
-
# Track which files are referenced
|
|
20
|
-
referenced_files = set()
|
|
21
|
-
|
|
85
|
+
|
|
86
|
+
# Track which files are referenced (uncommented and commented separately)
|
|
87
|
+
referenced_files = set() # Files in uncommented includes
|
|
88
|
+
commented_only_files = {} # Files referenced ONLY in commented lines: {basename: [(file, line_num, line_text)]}
|
|
89
|
+
|
|
22
90
|
if repo_type == 'topic_map':
|
|
23
91
|
# For OpenShift-docs style repos, get references from topic maps
|
|
24
92
|
topic_references = get_all_topic_map_references()
|
|
25
93
|
# Convert to basenames for comparison
|
|
26
94
|
referenced_files.update(os.path.basename(ref) for ref in topic_references)
|
|
27
|
-
|
|
28
|
-
#
|
|
95
|
+
|
|
96
|
+
# Patterns for finding includes (both commented and uncommented)
|
|
29
97
|
include_pattern = re.compile(r'include::(.+?)\[')
|
|
98
|
+
commented_include_pattern = re.compile(r'^\s*//.*include::(.+?)\[')
|
|
99
|
+
|
|
30
100
|
adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
|
|
31
|
-
|
|
101
|
+
|
|
32
102
|
for file_path in adoc_files:
|
|
33
103
|
try:
|
|
34
104
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
105
|
+
lines = f.readlines()
|
|
106
|
+
|
|
107
|
+
for line_num, line in enumerate(lines, 1):
|
|
108
|
+
# Check if this is a commented include
|
|
109
|
+
commented_match = commented_include_pattern.search(line)
|
|
110
|
+
if commented_match:
|
|
111
|
+
include_basename = os.path.basename(commented_match.group(1))
|
|
112
|
+
# Track location of commented reference
|
|
113
|
+
if include_basename not in commented_only_files:
|
|
114
|
+
commented_only_files[include_basename] = []
|
|
115
|
+
commented_only_files[include_basename].append((file_path, line_num, line.strip()))
|
|
116
|
+
else:
|
|
117
|
+
# Check for uncommented includes
|
|
118
|
+
uncommented_match = include_pattern.search(line)
|
|
119
|
+
if uncommented_match:
|
|
120
|
+
include_basename = os.path.basename(uncommented_match.group(1))
|
|
121
|
+
referenced_files.add(include_basename)
|
|
122
|
+
# If we found an uncommented reference, remove from commented_only tracking
|
|
123
|
+
if include_basename in commented_only_files:
|
|
124
|
+
del commented_only_files[include_basename]
|
|
42
125
|
except Exception as e:
|
|
43
126
|
print(f"Warning: could not read {file_path}: {e}")
|
|
44
|
-
|
|
45
|
-
#
|
|
46
|
-
|
|
127
|
+
|
|
128
|
+
# Determine which files are unused based on the include_commented flag
|
|
129
|
+
if include_commented:
|
|
130
|
+
# When --commented is used: treat files with commented-only references as unused
|
|
131
|
+
# Only files with uncommented references are considered "used"
|
|
132
|
+
unused_files = [f for f in asciidoc_files if os.path.basename(f) not in referenced_files]
|
|
133
|
+
commented_only_unused = []
|
|
134
|
+
else:
|
|
135
|
+
# Default behavior: files referenced only in commented lines are considered "used"
|
|
136
|
+
# They should NOT be in the unused list, but we track them for reporting
|
|
137
|
+
all_referenced = referenced_files.union(set(commented_only_files.keys()))
|
|
138
|
+
unused_files = [f for f in asciidoc_files if os.path.basename(f) not in all_referenced]
|
|
139
|
+
|
|
140
|
+
# Generate list of files referenced only in comments for the report
|
|
141
|
+
commented_only_unused = []
|
|
142
|
+
for basename, references in commented_only_files.items():
|
|
143
|
+
# Find the full path for this basename in asciidoc_files
|
|
144
|
+
matching_files = [f for f in asciidoc_files if os.path.basename(f) == basename]
|
|
145
|
+
for f in matching_files:
|
|
146
|
+
commented_only_unused.append((f, references))
|
|
147
|
+
|
|
47
148
|
unused_files = list(dict.fromkeys(unused_files)) # Remove duplicates
|
|
48
|
-
|
|
149
|
+
|
|
150
|
+
# Print summary
|
|
49
151
|
print(f"Found {len(unused_files)} unused files out of {len(asciidoc_files)} total files in scan directories")
|
|
50
|
-
|
|
152
|
+
|
|
153
|
+
# Generate detailed report for commented-only references
|
|
154
|
+
if commented_only_unused and not include_commented:
|
|
155
|
+
report_path = os.path.join(archive_dir, 'commented-references-report.txt')
|
|
156
|
+
os.makedirs(archive_dir, exist_ok=True)
|
|
157
|
+
|
|
158
|
+
with open(report_path, 'w', encoding='utf-8') as report:
|
|
159
|
+
report.write("Files Referenced Only in Commented Lines\n")
|
|
160
|
+
report.write("=" * 70 + "\n\n")
|
|
161
|
+
report.write(f"Found {len(commented_only_unused)} files that are referenced only in commented-out includes.\n")
|
|
162
|
+
report.write("These files are considered 'used' by default and will NOT be archived.\n\n")
|
|
163
|
+
report.write("To archive these files along with other unused files, use the --commented flag.\n\n")
|
|
164
|
+
report.write("-" * 70 + "\n\n")
|
|
165
|
+
|
|
166
|
+
for file_path, references in sorted(commented_only_unused):
|
|
167
|
+
report.write(f"File: {file_path}\n")
|
|
168
|
+
report.write(f"Referenced in {len(references)} commented line(s):\n")
|
|
169
|
+
for ref_file, line_num, line_text in references:
|
|
170
|
+
report.write(f" {ref_file}:{line_num}\n")
|
|
171
|
+
report.write(f" {line_text}\n")
|
|
172
|
+
report.write("\n")
|
|
173
|
+
|
|
174
|
+
print(f"\n📋 Found {len(commented_only_unused)} files referenced only in commented lines.")
|
|
175
|
+
print(f" Detailed report saved to: {report_path}")
|
|
176
|
+
print(f" These files are considered 'used' and will NOT be archived by default.")
|
|
177
|
+
print(f" To include them in the archive operation, use the --commented flag.\n")
|
|
178
|
+
|
|
51
179
|
return write_manifest_and_archive(
|
|
52
180
|
unused_files, archive_dir, 'to-archive', 'to-archive', archive=archive
|
|
53
181
|
)
|
doc_utils/unused_attributes.py
CHANGED
|
@@ -6,19 +6,62 @@ Functions:
|
|
|
6
6
|
- find_adoc_files: Recursively find all .adoc files in a directory (ignoring symlinks).
|
|
7
7
|
- scan_for_attribute_usage: Find which attributes are used in a set of .adoc files.
|
|
8
8
|
- find_unused_attributes: Main function to return unused attributes.
|
|
9
|
+
- find_attributes_files: Find all potential attributes files in the repository.
|
|
9
10
|
"""
|
|
10
11
|
|
|
11
12
|
import os
|
|
12
13
|
import re
|
|
13
|
-
from
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Set, List, Optional
|
|
14
16
|
|
|
15
17
|
def parse_attributes_file(attr_file: str) -> Set[str]:
|
|
18
|
+
# AsciiDoc configuration attributes that control the processor itself
|
|
19
|
+
# These should be ignored as they won't appear in content
|
|
20
|
+
IGNORED_ATTRIBUTES = {
|
|
21
|
+
'data-uri',
|
|
22
|
+
'doctype',
|
|
23
|
+
'experimental',
|
|
24
|
+
'idprefix',
|
|
25
|
+
'imagesdir',
|
|
26
|
+
'includes',
|
|
27
|
+
'sectanchors',
|
|
28
|
+
'sectlinks',
|
|
29
|
+
'source-highlighter',
|
|
30
|
+
'linkattrs',
|
|
31
|
+
'toclevels',
|
|
32
|
+
'idseparator',
|
|
33
|
+
'icons',
|
|
34
|
+
'iconsdir',
|
|
35
|
+
'generated-dir',
|
|
36
|
+
'code-examples',
|
|
37
|
+
'doc-guides',
|
|
38
|
+
'doc-examples',
|
|
39
|
+
}
|
|
40
|
+
|
|
16
41
|
attributes = set()
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
42
|
+
|
|
43
|
+
# Check if file exists
|
|
44
|
+
if not os.path.exists(attr_file):
|
|
45
|
+
raise FileNotFoundError(f"Attributes file not found: {attr_file}")
|
|
46
|
+
|
|
47
|
+
# Check if it's a file (not a directory)
|
|
48
|
+
if not os.path.isfile(attr_file):
|
|
49
|
+
raise ValueError(f"Path is not a file: {attr_file}")
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
with open(attr_file, 'r', encoding='utf-8') as f:
|
|
53
|
+
for line in f:
|
|
54
|
+
match = re.match(r'^:([\w-]+):', line.strip())
|
|
55
|
+
if match:
|
|
56
|
+
attr_name = match.group(1)
|
|
57
|
+
# Skip ignored configuration attributes
|
|
58
|
+
if attr_name not in IGNORED_ATTRIBUTES:
|
|
59
|
+
attributes.add(attr_name)
|
|
60
|
+
except PermissionError:
|
|
61
|
+
raise PermissionError(f"Permission denied reading file: {attr_file}")
|
|
62
|
+
except UnicodeDecodeError as e:
|
|
63
|
+
raise ValueError(f"Unable to read file (encoding issue): {attr_file}\n{str(e)}")
|
|
64
|
+
|
|
22
65
|
return attributes
|
|
23
66
|
|
|
24
67
|
def find_adoc_files(root_dir: str) -> List[str]:
|
|
@@ -33,18 +76,187 @@ def find_adoc_files(root_dir: str) -> List[str]:
|
|
|
33
76
|
|
|
34
77
|
def scan_for_attribute_usage(adoc_files: List[str], attributes: Set[str]) -> Set[str]:
|
|
35
78
|
used = set()
|
|
79
|
+
# Pattern for attribute references: {attribute-name}
|
|
36
80
|
attr_pattern = re.compile(r'\{([\w-]+)\}')
|
|
81
|
+
# Patterns for conditional directives: ifdef::attr[], ifndef::attr[], endif::attr[]
|
|
82
|
+
conditional_pattern = re.compile(r'(?:ifdef|ifndef|endif)::([\w-]+)\[')
|
|
83
|
+
|
|
37
84
|
for file in adoc_files:
|
|
38
85
|
with open(file, 'r', encoding='utf-8') as f:
|
|
39
86
|
for line in f:
|
|
87
|
+
# Check for {attribute} references
|
|
40
88
|
for match in attr_pattern.findall(line):
|
|
41
89
|
if match in attributes:
|
|
42
90
|
used.add(match)
|
|
91
|
+
# Check for ifdef::attribute[], ifndef::attribute[], endif::attribute[]
|
|
92
|
+
for match in conditional_pattern.findall(line):
|
|
93
|
+
if match in attributes:
|
|
94
|
+
used.add(match)
|
|
43
95
|
return used
|
|
44
96
|
|
|
97
|
+
def find_attributes_files(root_dir: str = '.') -> List[str]:
|
|
98
|
+
"""Find all attributes.adoc files in the repository."""
|
|
99
|
+
attributes_files = []
|
|
100
|
+
root_path = Path(root_dir)
|
|
101
|
+
|
|
102
|
+
# Common attribute file patterns
|
|
103
|
+
patterns = ['**/attributes.adoc', '**/attributes*.adoc', '**/*attributes.adoc', '**/*-attributes.adoc']
|
|
104
|
+
|
|
105
|
+
for pattern in patterns:
|
|
106
|
+
for path in root_path.glob(pattern):
|
|
107
|
+
# Skip hidden directories and common build directories
|
|
108
|
+
parts = path.parts
|
|
109
|
+
if any(part.startswith('.') or part in ['target', 'build', 'node_modules', '.archive'] for part in parts):
|
|
110
|
+
continue
|
|
111
|
+
# Convert to string and avoid duplicates
|
|
112
|
+
str_path = str(path)
|
|
113
|
+
if str_path not in attributes_files:
|
|
114
|
+
attributes_files.append(str_path)
|
|
115
|
+
|
|
116
|
+
# Sort for consistent ordering
|
|
117
|
+
attributes_files.sort()
|
|
118
|
+
return attributes_files
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def select_attributes_file(attributes_files: List[str]) -> Optional[str]:
|
|
122
|
+
"""Interactive selection of attributes file from a list."""
|
|
123
|
+
if not attributes_files:
|
|
124
|
+
return None
|
|
125
|
+
|
|
126
|
+
if len(attributes_files) == 1:
|
|
127
|
+
print(f"Found attributes file: {attributes_files[0]}")
|
|
128
|
+
response = input("Use this file? (y/n): ").strip().lower()
|
|
129
|
+
if response == 'y':
|
|
130
|
+
return attributes_files[0]
|
|
131
|
+
else:
|
|
132
|
+
response = input("Enter the path to your attributes file: ").strip()
|
|
133
|
+
if os.path.exists(response) and os.path.isfile(response):
|
|
134
|
+
return response
|
|
135
|
+
else:
|
|
136
|
+
print(f"Error: File not found: {response}")
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
# Multiple files found
|
|
140
|
+
print("\nFound multiple attributes files:")
|
|
141
|
+
for i, file_path in enumerate(attributes_files, 1):
|
|
142
|
+
print(f" {i}. {file_path}")
|
|
143
|
+
print(f" {len(attributes_files) + 1}. Enter custom path")
|
|
144
|
+
|
|
145
|
+
while True:
|
|
146
|
+
response = input(f"\nSelect option (1-{len(attributes_files) + 1}) or 'q' to quit: ").strip()
|
|
147
|
+
if response.lower() == 'q':
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
choice = int(response)
|
|
152
|
+
if 1 <= choice <= len(attributes_files):
|
|
153
|
+
return attributes_files[choice - 1]
|
|
154
|
+
elif choice == len(attributes_files) + 1:
|
|
155
|
+
response = input("Enter the path to your attributes file: ").strip()
|
|
156
|
+
if os.path.exists(response) and os.path.isfile(response):
|
|
157
|
+
return response
|
|
158
|
+
else:
|
|
159
|
+
print(f"Error: File not found: {response}")
|
|
160
|
+
else:
|
|
161
|
+
print(f"Invalid choice. Please enter a number between 1 and {len(attributes_files) + 1}")
|
|
162
|
+
except ValueError:
|
|
163
|
+
print("Invalid input. Please enter a number.")
|
|
164
|
+
|
|
165
|
+
return None
|
|
166
|
+
|
|
167
|
+
|
|
45
168
|
def find_unused_attributes(attr_file: str, adoc_root: str = '.') -> List[str]:
|
|
46
169
|
attributes = parse_attributes_file(attr_file)
|
|
47
170
|
adoc_files = find_adoc_files(adoc_root)
|
|
48
171
|
used = scan_for_attribute_usage(adoc_files, attributes)
|
|
49
172
|
unused = sorted(attributes - used)
|
|
50
173
|
return unused
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def comment_out_unused_attributes(attr_file: str, unused_attrs: List[str]) -> int:
|
|
177
|
+
"""
|
|
178
|
+
Comment out unused attributes in the attributes file.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
attr_file: Path to the attributes file
|
|
182
|
+
unused_attrs: List of unused attribute names
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
Number of attributes commented out
|
|
186
|
+
"""
|
|
187
|
+
if not unused_attrs:
|
|
188
|
+
return 0
|
|
189
|
+
|
|
190
|
+
# Read the file
|
|
191
|
+
with open(attr_file, 'r', encoding='utf-8') as f:
|
|
192
|
+
lines = f.readlines()
|
|
193
|
+
|
|
194
|
+
# Create a set for faster lookup
|
|
195
|
+
unused_set = set(unused_attrs)
|
|
196
|
+
commented_count = 0
|
|
197
|
+
|
|
198
|
+
# Process each line
|
|
199
|
+
new_lines = []
|
|
200
|
+
for line in lines:
|
|
201
|
+
# Check if this line defines an attribute
|
|
202
|
+
match = re.match(r'^:([\w-]+):', line)
|
|
203
|
+
if match and match.group(1) in unused_set:
|
|
204
|
+
# Comment out this line
|
|
205
|
+
new_lines.append(f'// Unused {line}')
|
|
206
|
+
commented_count += 1
|
|
207
|
+
else:
|
|
208
|
+
new_lines.append(line)
|
|
209
|
+
|
|
210
|
+
# Write back to the file
|
|
211
|
+
with open(attr_file, 'w', encoding='utf-8') as f:
|
|
212
|
+
f.writelines(new_lines)
|
|
213
|
+
|
|
214
|
+
return commented_count
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def remove_unused_attributes(attr_file: str, unused_attrs: List[str] = None) -> int:
|
|
218
|
+
"""
|
|
219
|
+
Remove unused attributes from the attributes file.
|
|
220
|
+
|
|
221
|
+
This removes lines that either:
|
|
222
|
+
- Define an attribute in the unused_attrs list, or
|
|
223
|
+
- Are already marked with "// Unused" prefix
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
attr_file: Path to the attributes file
|
|
227
|
+
unused_attrs: Optional list of unused attribute names. If None, only
|
|
228
|
+
removes lines already marked with "// Unused".
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
Number of lines removed
|
|
232
|
+
"""
|
|
233
|
+
# Read the file
|
|
234
|
+
with open(attr_file, 'r', encoding='utf-8') as f:
|
|
235
|
+
lines = f.readlines()
|
|
236
|
+
|
|
237
|
+
# Create a set for faster lookup
|
|
238
|
+
unused_set = set(unused_attrs) if unused_attrs else set()
|
|
239
|
+
removed_count = 0
|
|
240
|
+
|
|
241
|
+
# Process each line
|
|
242
|
+
new_lines = []
|
|
243
|
+
for line in lines:
|
|
244
|
+
# Check if line is already marked as unused
|
|
245
|
+
if line.startswith('// Unused '):
|
|
246
|
+
removed_count += 1
|
|
247
|
+
continue
|
|
248
|
+
|
|
249
|
+
# Check if this line defines an unused attribute
|
|
250
|
+
if unused_attrs:
|
|
251
|
+
match = re.match(r'^:([\w-]+):', line)
|
|
252
|
+
if match and match.group(1) in unused_set:
|
|
253
|
+
removed_count += 1
|
|
254
|
+
continue
|
|
255
|
+
|
|
256
|
+
new_lines.append(line)
|
|
257
|
+
|
|
258
|
+
# Write back to the file
|
|
259
|
+
with open(attr_file, 'w', encoding='utf-8') as f:
|
|
260
|
+
f.writelines(new_lines)
|
|
261
|
+
|
|
262
|
+
return removed_count
|
doc_utils/unused_images.py
CHANGED
|
@@ -6,26 +6,98 @@ from .file_utils import collect_files, write_manifest_and_archive
|
|
|
6
6
|
|
|
7
7
|
IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.svg'}
|
|
8
8
|
|
|
9
|
-
def find_unused_images(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None):
|
|
9
|
+
def find_unused_images(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None, include_commented=False):
|
|
10
10
|
# Print safety warning
|
|
11
11
|
print("\n⚠️ SAFETY: Work in a git branch! Run without --archive first to preview.\n")
|
|
12
|
-
|
|
12
|
+
|
|
13
13
|
image_files = collect_files(scan_dirs, IMAGE_EXTENSIONS, exclude_dirs, exclude_files)
|
|
14
14
|
adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
|
|
15
|
-
|
|
15
|
+
|
|
16
|
+
# Track which images are referenced (uncommented and commented separately)
|
|
17
|
+
referenced_images = set() # Images in uncommented references
|
|
18
|
+
commented_only_images = {} # Images referenced ONLY in commented lines: {basename: [(file, line_num, line_text)]}
|
|
19
|
+
|
|
20
|
+
# Patterns for finding image references (both commented and uncommented)
|
|
16
21
|
image_ref_pattern = re.compile(r'(?i)image::([^\[]+)[\[]|image:([^\[]+)[\[]|"([^"\s]+\.(?:png|jpg|jpeg|gif|svg))"')
|
|
22
|
+
commented_line_pattern = re.compile(r'^\s*//')
|
|
23
|
+
|
|
17
24
|
for adoc_file in adoc_files:
|
|
18
25
|
try:
|
|
19
26
|
with open(adoc_file, 'r', encoding='utf-8') as f:
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
27
|
+
lines = f.readlines()
|
|
28
|
+
|
|
29
|
+
for line_num, line in enumerate(lines, 1):
|
|
30
|
+
# Check if this line is commented
|
|
31
|
+
is_commented = commented_line_pattern.match(line)
|
|
32
|
+
|
|
33
|
+
# Find all image references in the line
|
|
34
|
+
for match in image_ref_pattern.findall(line):
|
|
35
|
+
for group in match:
|
|
36
|
+
if group:
|
|
37
|
+
image_basename = os.path.basename(group)
|
|
38
|
+
|
|
39
|
+
if is_commented:
|
|
40
|
+
# Track location of commented reference
|
|
41
|
+
if image_basename not in commented_only_images:
|
|
42
|
+
commented_only_images[image_basename] = []
|
|
43
|
+
commented_only_images[image_basename].append((adoc_file, line_num, line.strip()))
|
|
44
|
+
else:
|
|
45
|
+
# Add to uncommented references
|
|
46
|
+
referenced_images.add(image_basename)
|
|
47
|
+
# If we found an uncommented reference, remove from commented_only tracking
|
|
48
|
+
if image_basename in commented_only_images:
|
|
49
|
+
del commented_only_images[image_basename]
|
|
25
50
|
except Exception as e:
|
|
26
51
|
print(f"Warning: could not read {adoc_file}: {e}")
|
|
27
|
-
|
|
52
|
+
|
|
53
|
+
# Determine which images are unused based on the include_commented flag
|
|
54
|
+
if include_commented:
|
|
55
|
+
# When --commented is used: treat images with commented-only references as unused
|
|
56
|
+
# Only images with uncommented references are considered "used"
|
|
57
|
+
unused_images = [f for f in image_files if os.path.basename(f) not in referenced_images]
|
|
58
|
+
commented_only_unused = []
|
|
59
|
+
else:
|
|
60
|
+
# Default behavior: images referenced only in commented lines are considered "used"
|
|
61
|
+
# They should NOT be in the unused list, but we track them for reporting
|
|
62
|
+
all_referenced = referenced_images.union(set(commented_only_images.keys()))
|
|
63
|
+
unused_images = [f for f in image_files if os.path.basename(f) not in all_referenced]
|
|
64
|
+
|
|
65
|
+
# Generate list of images referenced only in comments for the report
|
|
66
|
+
commented_only_unused = []
|
|
67
|
+
for basename, references in commented_only_images.items():
|
|
68
|
+
# Find the full path for this basename in image_files
|
|
69
|
+
matching_files = [f for f in image_files if os.path.basename(f) == basename]
|
|
70
|
+
for f in matching_files:
|
|
71
|
+
commented_only_unused.append((f, references))
|
|
72
|
+
|
|
28
73
|
unused_images = list(dict.fromkeys(unused_images))
|
|
74
|
+
|
|
75
|
+
# Generate detailed report for commented-only references
|
|
76
|
+
if commented_only_unused and not include_commented:
|
|
77
|
+
report_path = os.path.join(archive_dir, 'commented-image-references-report.txt')
|
|
78
|
+
os.makedirs(archive_dir, exist_ok=True)
|
|
79
|
+
|
|
80
|
+
with open(report_path, 'w', encoding='utf-8') as report:
|
|
81
|
+
report.write("Images Referenced Only in Commented Lines\n")
|
|
82
|
+
report.write("=" * 70 + "\n\n")
|
|
83
|
+
report.write(f"Found {len(commented_only_unused)} images that are referenced only in commented-out lines.\n")
|
|
84
|
+
report.write("These images are considered 'used' by default and will NOT be archived.\n\n")
|
|
85
|
+
report.write("To archive these images along with other unused images, use the --commented flag.\n\n")
|
|
86
|
+
report.write("-" * 70 + "\n\n")
|
|
87
|
+
|
|
88
|
+
for file_path, references in sorted(commented_only_unused):
|
|
89
|
+
report.write(f"Image: {file_path}\n")
|
|
90
|
+
report.write(f"Referenced in {len(references)} commented line(s):\n")
|
|
91
|
+
for ref_file, line_num, line_text in references:
|
|
92
|
+
report.write(f" {ref_file}:{line_num}\n")
|
|
93
|
+
report.write(f" {line_text}\n")
|
|
94
|
+
report.write("\n")
|
|
95
|
+
|
|
96
|
+
print(f"\n📋 Found {len(commented_only_unused)} images referenced only in commented lines.")
|
|
97
|
+
print(f" Detailed report saved to: {report_path}")
|
|
98
|
+
print(f" These images are considered 'used' and will NOT be archived by default.")
|
|
99
|
+
print(f" To include them in the archive operation, use the --commented flag.\n")
|
|
100
|
+
|
|
29
101
|
return write_manifest_and_archive(
|
|
30
102
|
unused_images, archive_dir, 'unused-images', 'unused-images', archive=archive
|
|
31
103
|
)
|