rolfedh-doc-utils 0.1.4__py3-none-any.whl → 0.1.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- archive_unused_files.py +18 -5
- archive_unused_images.py +9 -2
- callout_lib/__init__.py +22 -0
- callout_lib/converter_bullets.py +103 -0
- callout_lib/converter_comments.py +295 -0
- callout_lib/converter_deflist.py +134 -0
- callout_lib/detector.py +364 -0
- callout_lib/table_parser.py +804 -0
- check_published_links.py +1083 -0
- check_scannability.py +6 -0
- check_source_directives.py +101 -0
- convert_callouts_interactive.py +567 -0
- convert_callouts_to_deflist.py +628 -0
- convert_freemarker_to_asciidoc.py +288 -0
- convert_tables_to_deflists.py +479 -0
- doc_utils/convert_freemarker_to_asciidoc.py +708 -0
- doc_utils/duplicate_content.py +409 -0
- doc_utils/duplicate_includes.py +347 -0
- doc_utils/extract_link_attributes.py +618 -0
- doc_utils/format_asciidoc_spacing.py +285 -0
- doc_utils/insert_abstract_role.py +220 -0
- doc_utils/inventory_conditionals.py +164 -0
- doc_utils/missing_source_directive.py +211 -0
- doc_utils/replace_link_attributes.py +187 -0
- doc_utils/spinner.py +119 -0
- doc_utils/unused_adoc.py +150 -22
- doc_utils/unused_attributes.py +218 -6
- doc_utils/unused_images.py +81 -9
- doc_utils/validate_links.py +576 -0
- doc_utils/version.py +8 -0
- doc_utils/version_check.py +243 -0
- doc_utils/warnings_report.py +237 -0
- doc_utils_cli.py +158 -0
- extract_link_attributes.py +120 -0
- find_duplicate_content.py +209 -0
- find_duplicate_includes.py +198 -0
- find_unused_attributes.py +84 -6
- format_asciidoc_spacing.py +134 -0
- insert_abstract_role.py +163 -0
- inventory_conditionals.py +53 -0
- replace_link_attributes.py +214 -0
- rolfedh_doc_utils-0.1.41.dist-info/METADATA +246 -0
- rolfedh_doc_utils-0.1.41.dist-info/RECORD +52 -0
- {rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/WHEEL +1 -1
- rolfedh_doc_utils-0.1.41.dist-info/entry_points.txt +20 -0
- rolfedh_doc_utils-0.1.41.dist-info/top_level.txt +21 -0
- validate_links.py +213 -0
- rolfedh_doc_utils-0.1.4.dist-info/METADATA +0 -285
- rolfedh_doc_utils-0.1.4.dist-info/RECORD +0 -17
- rolfedh_doc_utils-0.1.4.dist-info/entry_points.txt +0 -5
- rolfedh_doc_utils-0.1.4.dist-info/top_level.txt +0 -5
- {rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Format AsciiDoc spacing - ensures blank lines after headings and around include directives.
|
|
3
|
+
|
|
4
|
+
Core logic for formatting AsciiDoc files with proper spacing.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import List, Tuple
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def process_file(file_path: Path, dry_run: bool = False, verbose: bool = False) -> Tuple[bool, List[str]]:
|
|
13
|
+
"""
|
|
14
|
+
Process a single AsciiDoc file to fix spacing issues.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
file_path: Path to the file to process
|
|
18
|
+
dry_run: If True, show what would be changed without modifying
|
|
19
|
+
verbose: If True, show detailed output
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
Tuple of (changes_made, messages) where messages is a list of verbose output
|
|
23
|
+
"""
|
|
24
|
+
messages = []
|
|
25
|
+
|
|
26
|
+
if verbose:
|
|
27
|
+
messages.append(f"Processing: {file_path}")
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
31
|
+
lines = f.readlines()
|
|
32
|
+
except (IOError, UnicodeDecodeError) as e:
|
|
33
|
+
raise IOError(f"Error reading {file_path}: {e}")
|
|
34
|
+
|
|
35
|
+
# Remove trailing newlines from lines for processing
|
|
36
|
+
lines = [line.rstrip('\n\r') for line in lines]
|
|
37
|
+
|
|
38
|
+
new_lines = []
|
|
39
|
+
changes_made = False
|
|
40
|
+
in_block = False # Track if we're inside a block (admonition, listing, etc.)
|
|
41
|
+
in_conditional = False # Track if we're inside a conditional block
|
|
42
|
+
in_comment_block = False # Track if we're inside a //// comment block
|
|
43
|
+
|
|
44
|
+
for i, current_line in enumerate(lines):
|
|
45
|
+
prev_line = lines[i-1] if i > 0 else ""
|
|
46
|
+
next_line = lines[i+1] if i + 1 < len(lines) else ""
|
|
47
|
+
|
|
48
|
+
# Check for conditional start (ifdef:: or ifndef::)
|
|
49
|
+
if re.match(r'^(ifdef::|ifndef::)', current_line):
|
|
50
|
+
in_conditional = True
|
|
51
|
+
# Add blank line before conditional if needed
|
|
52
|
+
# Don't add if previous line is a comment (they form a logical unit)
|
|
53
|
+
if (prev_line and
|
|
54
|
+
not re.match(r'^\s*$', prev_line) and
|
|
55
|
+
not re.match(r'^(ifdef::|ifndef::|endif::)', prev_line) and
|
|
56
|
+
not re.match(r'^//', prev_line)):
|
|
57
|
+
new_lines.append("")
|
|
58
|
+
changes_made = True
|
|
59
|
+
if verbose:
|
|
60
|
+
messages.append(" Added blank line before conditional block")
|
|
61
|
+
new_lines.append(current_line)
|
|
62
|
+
|
|
63
|
+
# Check for conditional end (endif::)
|
|
64
|
+
elif re.match(r'^endif::', current_line):
|
|
65
|
+
new_lines.append(current_line)
|
|
66
|
+
in_conditional = False
|
|
67
|
+
# Add blank line after conditional if needed
|
|
68
|
+
# Don't add if next line is:
|
|
69
|
+
# - a list item (starts with *, -, ., .., or numbered)
|
|
70
|
+
# - list continuation (+)
|
|
71
|
+
# - another conditional
|
|
72
|
+
# - blank
|
|
73
|
+
if (next_line and
|
|
74
|
+
not re.match(r'^\s*$', next_line) and
|
|
75
|
+
not re.match(r'^(ifdef::|ifndef::|endif::)', next_line) and
|
|
76
|
+
not re.match(r'^(\*|\-|\.|\.\.|\d+\.)\s', next_line) and # List items
|
|
77
|
+
not re.match(r'^\+\s*$', next_line)): # List continuation
|
|
78
|
+
new_lines.append("")
|
|
79
|
+
changes_made = True
|
|
80
|
+
if verbose:
|
|
81
|
+
messages.append(" Added blank line after conditional block")
|
|
82
|
+
|
|
83
|
+
# Check for comment block delimiters (////)
|
|
84
|
+
elif re.match(r'^////+$', current_line):
|
|
85
|
+
in_comment_block = not in_comment_block # Toggle comment block state
|
|
86
|
+
new_lines.append(current_line)
|
|
87
|
+
|
|
88
|
+
# If we're closing a comment block, add blank line after if needed
|
|
89
|
+
if not in_comment_block:
|
|
90
|
+
if (next_line and
|
|
91
|
+
not re.match(r'^\s*$', next_line) and
|
|
92
|
+
not re.match(r'^////+$', next_line)):
|
|
93
|
+
new_lines.append("")
|
|
94
|
+
changes_made = True
|
|
95
|
+
if verbose:
|
|
96
|
+
messages.append(" Added blank line after comment block")
|
|
97
|
+
|
|
98
|
+
# Check for block delimiters (====, ----, ...., ____)
|
|
99
|
+
# These are used for admonitions, listing blocks, literal blocks, etc.
|
|
100
|
+
elif re.match(r'^(====+|----+|\.\.\.\.+|____+)$', current_line):
|
|
101
|
+
in_block = not in_block # Toggle block state
|
|
102
|
+
new_lines.append(current_line)
|
|
103
|
+
|
|
104
|
+
# Check for role blocks ([role="..."])
|
|
105
|
+
# Role blocks don't need special spacing - they're followed directly by content
|
|
106
|
+
elif not in_block and not in_comment_block and re.match(r'^\[role=', current_line):
|
|
107
|
+
new_lines.append(current_line)
|
|
108
|
+
|
|
109
|
+
# Check for block titles (.Title)
|
|
110
|
+
elif not in_block and not in_comment_block and re.match(r'^\.[A-Z]', current_line):
|
|
111
|
+
# Add blank line before block title if needed
|
|
112
|
+
# Don't add if inside a conditional block or if previous line is a conditional directive
|
|
113
|
+
if (not in_conditional and
|
|
114
|
+
prev_line and
|
|
115
|
+
not re.match(r'^\s*$', prev_line) and
|
|
116
|
+
not re.match(r'^=+\s+', prev_line) and
|
|
117
|
+
not re.match(r'^\[role=', prev_line) and
|
|
118
|
+
not re.match(r'^(ifdef::|ifndef::|endif::)', prev_line)): # Don't add if previous is conditional
|
|
119
|
+
new_lines.append("")
|
|
120
|
+
changes_made = True
|
|
121
|
+
if verbose:
|
|
122
|
+
truncated = current_line[:50] + "..." if len(current_line) > 50 else current_line
|
|
123
|
+
messages.append(f" Added blank line before block title: {truncated}")
|
|
124
|
+
new_lines.append(current_line)
|
|
125
|
+
|
|
126
|
+
# Check if current line is a heading (but not if we're in a block)
|
|
127
|
+
elif not in_block and re.match(r'^=+\s+', current_line):
|
|
128
|
+
new_lines.append(current_line)
|
|
129
|
+
|
|
130
|
+
# Check if next line is not empty, not another heading, not a comment block, and not a conditional
|
|
131
|
+
if (next_line and
|
|
132
|
+
not re.match(r'^=+\s+', next_line) and
|
|
133
|
+
not re.match(r'^\s*$', next_line) and
|
|
134
|
+
not re.match(r'^////+$', next_line) and # Don't add if next is comment block
|
|
135
|
+
not re.match(r'^(ifdef::|ifndef::|endif::)', next_line)): # Don't add if next is conditional
|
|
136
|
+
new_lines.append("")
|
|
137
|
+
changes_made = True
|
|
138
|
+
if verbose:
|
|
139
|
+
truncated = current_line[:50] + "..." if len(current_line) > 50 else current_line
|
|
140
|
+
messages.append(f" Added blank line after heading: {truncated}")
|
|
141
|
+
|
|
142
|
+
# Check if current line is a comment (AsciiDoc comments start with //)
|
|
143
|
+
elif re.match(r'^//', current_line):
|
|
144
|
+
# Skip special handling if we're inside a conditional block
|
|
145
|
+
if in_conditional:
|
|
146
|
+
new_lines.append(current_line)
|
|
147
|
+
else:
|
|
148
|
+
# Check if next line is an include directive
|
|
149
|
+
if next_line and re.match(r'^include::', next_line):
|
|
150
|
+
# This comment belongs to the include, add blank line before comment if needed
|
|
151
|
+
# This includes when previous line is an include (to separate include blocks)
|
|
152
|
+
if (prev_line and
|
|
153
|
+
not re.match(r'^\s*$', prev_line) and
|
|
154
|
+
not re.match(r'^//', prev_line) and
|
|
155
|
+
not re.match(r'^:', prev_line)):
|
|
156
|
+
new_lines.append("")
|
|
157
|
+
changes_made = True
|
|
158
|
+
if verbose:
|
|
159
|
+
messages.append(" Added blank line before comment above include")
|
|
160
|
+
new_lines.append(current_line)
|
|
161
|
+
else:
|
|
162
|
+
# Standalone comment, just add it
|
|
163
|
+
new_lines.append(current_line)
|
|
164
|
+
|
|
165
|
+
# Check if current line is an attribute (starts with :)
|
|
166
|
+
elif re.match(r'^:', current_line):
|
|
167
|
+
# Skip special handling if we're inside a conditional block
|
|
168
|
+
if in_conditional:
|
|
169
|
+
new_lines.append(current_line)
|
|
170
|
+
else:
|
|
171
|
+
# Check if next line is an include directive
|
|
172
|
+
if next_line and re.match(r'^include::', next_line):
|
|
173
|
+
# This attribute belongs to the include, add blank line before attribute if needed
|
|
174
|
+
if (prev_line and
|
|
175
|
+
not re.match(r'^\s*$', prev_line) and
|
|
176
|
+
not re.match(r'^//', prev_line) and
|
|
177
|
+
not re.match(r'^:', prev_line)): # Don't add if previous is comment or attribute
|
|
178
|
+
new_lines.append("")
|
|
179
|
+
changes_made = True
|
|
180
|
+
if verbose:
|
|
181
|
+
messages.append(" Added blank line before attribute above include")
|
|
182
|
+
new_lines.append(current_line)
|
|
183
|
+
else:
|
|
184
|
+
# Standalone attribute, just add it
|
|
185
|
+
new_lines.append(current_line)
|
|
186
|
+
|
|
187
|
+
# Check if current line is an include directive
|
|
188
|
+
elif re.match(r'^include::', current_line):
|
|
189
|
+
# Handle includes inside conditional blocks
|
|
190
|
+
if in_conditional:
|
|
191
|
+
# Add blank line between consecutive includes within conditional blocks
|
|
192
|
+
if prev_line and re.match(r'^include::', prev_line):
|
|
193
|
+
new_lines.append("")
|
|
194
|
+
changes_made = True
|
|
195
|
+
if verbose:
|
|
196
|
+
messages.append(" Added blank line between includes in conditional block")
|
|
197
|
+
new_lines.append(current_line)
|
|
198
|
+
else:
|
|
199
|
+
# Check if this is an attribute include (contains "attribute" in the path)
|
|
200
|
+
is_attribute_include = 'attribute' in current_line.lower()
|
|
201
|
+
|
|
202
|
+
# Check if this appears near the top of the file (within first 10 lines after H1)
|
|
203
|
+
# Find the H1 heading position
|
|
204
|
+
h1_position = -1
|
|
205
|
+
for j in range(min(i, 10)): # Look back up to 10 lines or to current position
|
|
206
|
+
if re.match(r'^=\s+', lines[j]): # H1 heading starts with single =
|
|
207
|
+
h1_position = j
|
|
208
|
+
break
|
|
209
|
+
|
|
210
|
+
# If this is an attribute include near the H1 heading, don't add surrounding blank lines
|
|
211
|
+
is_near_h1 = h1_position >= 0 and (i - h1_position) <= 2
|
|
212
|
+
|
|
213
|
+
# Check if previous line is a comment or attribute (which belongs to this include)
|
|
214
|
+
has_comment_above = prev_line and re.match(r'^//', prev_line)
|
|
215
|
+
has_attribute_above = prev_line and re.match(r'^:', prev_line)
|
|
216
|
+
|
|
217
|
+
# If it's an attribute include near H1, only the heading's blank line is needed
|
|
218
|
+
if not (is_attribute_include and is_near_h1):
|
|
219
|
+
# Don't add blank line if there's a comment or attribute above (it was handled by the comment/attribute logic)
|
|
220
|
+
if not has_comment_above and not has_attribute_above:
|
|
221
|
+
# Add blank line before include if previous line is not empty
|
|
222
|
+
# This includes adding blank lines between consecutive includes
|
|
223
|
+
if (prev_line and
|
|
224
|
+
not re.match(r'^\s*$', prev_line)):
|
|
225
|
+
new_lines.append("")
|
|
226
|
+
changes_made = True
|
|
227
|
+
if verbose:
|
|
228
|
+
truncated = current_line[:50] + "..." if len(current_line) > 50 else current_line
|
|
229
|
+
messages.append(f" Added blank line before include: {truncated}")
|
|
230
|
+
|
|
231
|
+
new_lines.append(current_line)
|
|
232
|
+
|
|
233
|
+
# If it's an attribute include near H1, don't add blank line after
|
|
234
|
+
if not (is_attribute_include and is_near_h1):
|
|
235
|
+
# Add blank line after include if next line exists and is not empty and not an include
|
|
236
|
+
if (next_line and
|
|
237
|
+
not re.match(r'^\s*$', next_line) and
|
|
238
|
+
not re.match(r'^include::', next_line)):
|
|
239
|
+
new_lines.append("")
|
|
240
|
+
changes_made = True
|
|
241
|
+
if verbose:
|
|
242
|
+
truncated = current_line[:50] + "..." if len(current_line) > 50 else current_line
|
|
243
|
+
messages.append(f" Added blank line after include: {truncated}")
|
|
244
|
+
|
|
245
|
+
else:
|
|
246
|
+
new_lines.append(current_line)
|
|
247
|
+
|
|
248
|
+
# Apply changes if any were made
|
|
249
|
+
if changes_made:
|
|
250
|
+
# Clean up any consecutive blank lines we may have added
|
|
251
|
+
cleaned_lines = []
|
|
252
|
+
for i, line in enumerate(new_lines):
|
|
253
|
+
# Check if this is a blank line we're about to add
|
|
254
|
+
if line == "":
|
|
255
|
+
# Check if the previous line is also a blank line
|
|
256
|
+
if i > 0 and cleaned_lines and cleaned_lines[-1] == "":
|
|
257
|
+
# Skip this blank line as we already have one
|
|
258
|
+
continue
|
|
259
|
+
cleaned_lines.append(line)
|
|
260
|
+
|
|
261
|
+
if not dry_run:
|
|
262
|
+
try:
|
|
263
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
|
264
|
+
for line in cleaned_lines:
|
|
265
|
+
f.write(line + '\n')
|
|
266
|
+
except IOError as e:
|
|
267
|
+
raise IOError(f"Error writing {file_path}: {e}")
|
|
268
|
+
else:
|
|
269
|
+
if verbose:
|
|
270
|
+
messages.append(" No changes needed")
|
|
271
|
+
|
|
272
|
+
return changes_made, messages
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def find_adoc_files(path: Path) -> List[Path]:
|
|
276
|
+
"""Find all .adoc files in the given path"""
|
|
277
|
+
adoc_files = []
|
|
278
|
+
|
|
279
|
+
if path.is_file():
|
|
280
|
+
if path.suffix == '.adoc':
|
|
281
|
+
adoc_files.append(path)
|
|
282
|
+
elif path.is_dir():
|
|
283
|
+
adoc_files = list(path.rglob('*.adoc'))
|
|
284
|
+
|
|
285
|
+
return adoc_files
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Insert abstract role - ensures AsciiDoc files have [role="_abstract"] above the first paragraph.
|
|
3
|
+
|
|
4
|
+
Core logic for adding the [role="_abstract"] attribute required for DITA short description conversion.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import List, Tuple, Optional
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def find_first_paragraph_after_title(lines: List[str]) -> Optional[int]:
|
|
13
|
+
"""
|
|
14
|
+
Find the line index of the first paragraph after the document title.
|
|
15
|
+
|
|
16
|
+
The first paragraph is the first non-empty line that:
|
|
17
|
+
- Comes after a level 1 heading (= Title)
|
|
18
|
+
- Is not an attribute definition (starts with :)
|
|
19
|
+
- Is not a comment (starts with //)
|
|
20
|
+
- Is not a block attribute (starts with [)
|
|
21
|
+
- Is not another heading
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
lines: List of lines from the file (without trailing newlines)
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
Line index of the first paragraph, or None if not found
|
|
28
|
+
"""
|
|
29
|
+
title_found = False
|
|
30
|
+
title_index = -1
|
|
31
|
+
|
|
32
|
+
for i, line in enumerate(lines):
|
|
33
|
+
# Check for level 1 heading (document title)
|
|
34
|
+
if re.match(r'^=\s+[^=]', line):
|
|
35
|
+
title_found = True
|
|
36
|
+
title_index = i
|
|
37
|
+
continue
|
|
38
|
+
|
|
39
|
+
# Only look for first paragraph after we've found the title
|
|
40
|
+
if not title_found:
|
|
41
|
+
continue
|
|
42
|
+
|
|
43
|
+
# Skip empty lines
|
|
44
|
+
if re.match(r'^\s*$', line):
|
|
45
|
+
continue
|
|
46
|
+
|
|
47
|
+
# Skip attribute definitions
|
|
48
|
+
if re.match(r'^:', line):
|
|
49
|
+
continue
|
|
50
|
+
|
|
51
|
+
# Skip comments (single line)
|
|
52
|
+
if re.match(r'^//', line):
|
|
53
|
+
continue
|
|
54
|
+
|
|
55
|
+
# Skip block attributes like [role=...], [id=...], etc.
|
|
56
|
+
if re.match(r'^\[', line):
|
|
57
|
+
continue
|
|
58
|
+
|
|
59
|
+
# Skip other headings
|
|
60
|
+
if re.match(r'^=+\s+', line):
|
|
61
|
+
continue
|
|
62
|
+
|
|
63
|
+
# Skip include directives
|
|
64
|
+
if re.match(r'^include::', line):
|
|
65
|
+
continue
|
|
66
|
+
|
|
67
|
+
# This is the first paragraph
|
|
68
|
+
return i
|
|
69
|
+
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def has_abstract_role(lines: List[str], paragraph_index: int) -> bool:
|
|
74
|
+
"""
|
|
75
|
+
Check if there's already a [role="_abstract"] before the paragraph.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
lines: List of lines from the file
|
|
79
|
+
paragraph_index: Index of the first paragraph
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
True if [role="_abstract"] already exists before the paragraph
|
|
83
|
+
"""
|
|
84
|
+
# Look at the lines immediately before the paragraph
|
|
85
|
+
for i in range(paragraph_index - 1, -1, -1):
|
|
86
|
+
line = lines[i].strip()
|
|
87
|
+
|
|
88
|
+
# Skip empty lines
|
|
89
|
+
if not line:
|
|
90
|
+
continue
|
|
91
|
+
|
|
92
|
+
# Found abstract role
|
|
93
|
+
if re.match(r'^\[role=["\']_abstract["\']\]$', line):
|
|
94
|
+
return True
|
|
95
|
+
|
|
96
|
+
# If we hit any other non-empty content, stop looking
|
|
97
|
+
# (could be attribute, heading, etc.)
|
|
98
|
+
break
|
|
99
|
+
|
|
100
|
+
return False
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def process_file(file_path: Path, dry_run: bool = False, verbose: bool = False) -> Tuple[bool, List[str]]:
|
|
104
|
+
"""
|
|
105
|
+
Process a single AsciiDoc file to add [role="_abstract"] if needed.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
file_path: Path to the file to process
|
|
109
|
+
dry_run: If True, show what would be changed without modifying
|
|
110
|
+
verbose: If True, show detailed output
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
Tuple of (changes_made, messages) where messages is a list of verbose output
|
|
114
|
+
"""
|
|
115
|
+
messages = []
|
|
116
|
+
|
|
117
|
+
if verbose:
|
|
118
|
+
messages.append(f"Processing: {file_path}")
|
|
119
|
+
|
|
120
|
+
try:
|
|
121
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
122
|
+
lines = f.readlines()
|
|
123
|
+
except (IOError, UnicodeDecodeError) as e:
|
|
124
|
+
raise IOError(f"Error reading {file_path}: {e}")
|
|
125
|
+
|
|
126
|
+
# Remove trailing newlines from lines for processing
|
|
127
|
+
lines = [line.rstrip('\n\r') for line in lines]
|
|
128
|
+
|
|
129
|
+
# Find the first paragraph after the title
|
|
130
|
+
paragraph_index = find_first_paragraph_after_title(lines)
|
|
131
|
+
|
|
132
|
+
if paragraph_index is None:
|
|
133
|
+
if verbose:
|
|
134
|
+
messages.append(" No paragraph found after title")
|
|
135
|
+
return False, messages
|
|
136
|
+
|
|
137
|
+
# Check if abstract role already exists
|
|
138
|
+
if has_abstract_role(lines, paragraph_index):
|
|
139
|
+
if verbose:
|
|
140
|
+
messages.append(" [role=\"_abstract\"] already present")
|
|
141
|
+
return False, messages
|
|
142
|
+
|
|
143
|
+
# Insert [role="_abstract"] before the first paragraph
|
|
144
|
+
# We need to add it with a blank line before it if there isn't one
|
|
145
|
+
new_lines = lines[:paragraph_index]
|
|
146
|
+
|
|
147
|
+
# Check if we need to add a blank line before the role
|
|
148
|
+
if paragraph_index > 0 and lines[paragraph_index - 1].strip():
|
|
149
|
+
new_lines.append('')
|
|
150
|
+
|
|
151
|
+
new_lines.append('[role="_abstract"]')
|
|
152
|
+
new_lines.extend(lines[paragraph_index:])
|
|
153
|
+
|
|
154
|
+
if verbose:
|
|
155
|
+
preview = lines[paragraph_index][:60] + "..." if len(lines[paragraph_index]) > 60 else lines[paragraph_index]
|
|
156
|
+
messages.append(f" Adding [role=\"_abstract\"] before line {paragraph_index + 1}: {preview}")
|
|
157
|
+
|
|
158
|
+
if not dry_run:
|
|
159
|
+
try:
|
|
160
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
|
161
|
+
for line in new_lines:
|
|
162
|
+
f.write(line + '\n')
|
|
163
|
+
except IOError as e:
|
|
164
|
+
raise IOError(f"Error writing {file_path}: {e}")
|
|
165
|
+
|
|
166
|
+
return True, messages
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def find_adoc_files(path: Path, exclude_dirs: List[str] = None, exclude_files: List[str] = None) -> List[Path]:
|
|
170
|
+
"""
|
|
171
|
+
Find all .adoc files in the given path.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
path: File or directory path to search
|
|
175
|
+
exclude_dirs: List of directory paths to exclude
|
|
176
|
+
exclude_files: List of file paths to exclude
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
List of Path objects for .adoc files
|
|
180
|
+
"""
|
|
181
|
+
exclude_dirs = exclude_dirs or []
|
|
182
|
+
exclude_files = exclude_files or []
|
|
183
|
+
|
|
184
|
+
# Normalize exclusion paths to absolute
|
|
185
|
+
exclude_dirs_abs = [Path(d).resolve() for d in exclude_dirs]
|
|
186
|
+
exclude_files_abs = [Path(f).resolve() for f in exclude_files]
|
|
187
|
+
|
|
188
|
+
adoc_files = []
|
|
189
|
+
|
|
190
|
+
if path.is_file():
|
|
191
|
+
if path.suffix == '.adoc':
|
|
192
|
+
path_abs = path.resolve()
|
|
193
|
+
if path_abs not in exclude_files_abs:
|
|
194
|
+
adoc_files.append(path)
|
|
195
|
+
elif path.is_dir():
|
|
196
|
+
for adoc_path in path.rglob('*.adoc'):
|
|
197
|
+
# Skip symlinks
|
|
198
|
+
if adoc_path.is_symlink():
|
|
199
|
+
continue
|
|
200
|
+
|
|
201
|
+
path_abs = adoc_path.resolve()
|
|
202
|
+
|
|
203
|
+
# Check if file is excluded
|
|
204
|
+
if path_abs in exclude_files_abs:
|
|
205
|
+
continue
|
|
206
|
+
|
|
207
|
+
# Check if any parent directory is excluded
|
|
208
|
+
skip = False
|
|
209
|
+
for exclude_dir in exclude_dirs_abs:
|
|
210
|
+
try:
|
|
211
|
+
path_abs.relative_to(exclude_dir)
|
|
212
|
+
skip = True
|
|
213
|
+
break
|
|
214
|
+
except ValueError:
|
|
215
|
+
pass
|
|
216
|
+
|
|
217
|
+
if not skip:
|
|
218
|
+
adoc_files.append(adoc_path)
|
|
219
|
+
|
|
220
|
+
return sorted(adoc_files)
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module for inventorying AsciiDoc conditional directives.
|
|
3
|
+
|
|
4
|
+
Functions:
|
|
5
|
+
- find_adoc_files: Recursively find all .adoc files in a directory.
|
|
6
|
+
- scan_file_for_conditionals: Scan a file for conditional directives.
|
|
7
|
+
- create_inventory: Create an inventory of all conditionals found in .adoc files.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from collections import defaultdict
|
|
14
|
+
from typing import List, Tuple, Dict, Set
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# Pattern to match AsciiDoc conditionals
|
|
18
|
+
CONDITIONAL_PATTERN = re.compile(
|
|
19
|
+
r'^(ifdef|ifndef|endif|ifeval)::(.*)$',
|
|
20
|
+
re.MULTILINE
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def find_adoc_files(directory: Path) -> List[Path]:
|
|
25
|
+
"""Find all .adoc files in the given directory recursively."""
|
|
26
|
+
return sorted(directory.rglob('*.adoc'))
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def scan_file_for_conditionals(filepath: Path) -> List[Tuple[int, str, str]]:
|
|
30
|
+
"""
|
|
31
|
+
Scan a file for conditional directives.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
filepath: Path to the .adoc file to scan.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
A list of tuples: (line_number, directive_type, condition)
|
|
38
|
+
"""
|
|
39
|
+
results = []
|
|
40
|
+
try:
|
|
41
|
+
content = filepath.read_text(encoding='utf-8')
|
|
42
|
+
for i, line in enumerate(content.splitlines(), start=1):
|
|
43
|
+
match = CONDITIONAL_PATTERN.match(line.strip())
|
|
44
|
+
if match:
|
|
45
|
+
directive_type = match.group(1)
|
|
46
|
+
condition = match.group(2)
|
|
47
|
+
results.append((i, directive_type, condition))
|
|
48
|
+
except Exception as e:
|
|
49
|
+
print(f"Warning: Could not read {filepath}: {e}")
|
|
50
|
+
return results
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def create_inventory(directory: Path, output_dir: Path = None) -> Path:
|
|
54
|
+
"""
|
|
55
|
+
Create an inventory of all conditionals found in .adoc files.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
directory: Directory to scan for .adoc files.
|
|
59
|
+
output_dir: Directory to write the inventory file. Defaults to current directory.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
The path to the created inventory file.
|
|
63
|
+
"""
|
|
64
|
+
if output_dir is None:
|
|
65
|
+
output_dir = Path.cwd()
|
|
66
|
+
|
|
67
|
+
timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
|
|
68
|
+
output_file = output_dir / f'inventory-{timestamp}.txt'
|
|
69
|
+
|
|
70
|
+
adoc_files = find_adoc_files(directory)
|
|
71
|
+
|
|
72
|
+
# Track statistics
|
|
73
|
+
stats: Dict[str, int] = defaultdict(int)
|
|
74
|
+
conditions_used: Dict[str, List[Tuple[Path, int]]] = defaultdict(list)
|
|
75
|
+
total_files_with_conditionals = 0
|
|
76
|
+
|
|
77
|
+
lines = []
|
|
78
|
+
lines.append("AsciiDoc Conditionals Inventory")
|
|
79
|
+
lines.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
80
|
+
lines.append(f"Directory: {directory.resolve()}")
|
|
81
|
+
lines.append("=" * 80)
|
|
82
|
+
lines.append("")
|
|
83
|
+
|
|
84
|
+
for filepath in adoc_files:
|
|
85
|
+
conditionals = scan_file_for_conditionals(filepath)
|
|
86
|
+
if conditionals:
|
|
87
|
+
total_files_with_conditionals += 1
|
|
88
|
+
relative_path = filepath.relative_to(directory)
|
|
89
|
+
lines.append(f"File: {relative_path}")
|
|
90
|
+
lines.append("-" * 60)
|
|
91
|
+
|
|
92
|
+
for line_num, directive, condition in conditionals:
|
|
93
|
+
stats[directive] += 1
|
|
94
|
+
# Extract the condition name (before any brackets)
|
|
95
|
+
cond_name = condition.split('[')[0] if condition else '(empty)'
|
|
96
|
+
if directive in ('ifdef', 'ifndef', 'ifeval'):
|
|
97
|
+
conditions_used[cond_name].append((relative_path, line_num))
|
|
98
|
+
|
|
99
|
+
lines.append(f" Line {line_num:5d}: {directive}::{condition}")
|
|
100
|
+
|
|
101
|
+
lines.append("")
|
|
102
|
+
|
|
103
|
+
# Add summary section
|
|
104
|
+
lines.append("=" * 80)
|
|
105
|
+
lines.append("SUMMARY")
|
|
106
|
+
lines.append("=" * 80)
|
|
107
|
+
lines.append("")
|
|
108
|
+
lines.append(f"Total .adoc files scanned: {len(adoc_files)}")
|
|
109
|
+
lines.append(f"Files with conditionals: {total_files_with_conditionals}")
|
|
110
|
+
lines.append("")
|
|
111
|
+
lines.append("Directive counts:")
|
|
112
|
+
for directive in sorted(stats.keys()):
|
|
113
|
+
lines.append(f" {directive}: {stats[directive]}")
|
|
114
|
+
lines.append(f" Total: {sum(stats.values())}")
|
|
115
|
+
lines.append("")
|
|
116
|
+
|
|
117
|
+
# List unique conditions
|
|
118
|
+
lines.append("=" * 80)
|
|
119
|
+
lines.append("UNIQUE CONDITIONS USED")
|
|
120
|
+
lines.append("=" * 80)
|
|
121
|
+
lines.append("")
|
|
122
|
+
for cond in sorted(conditions_used.keys()):
|
|
123
|
+
occurrences = conditions_used[cond]
|
|
124
|
+
lines.append(f" {cond}: {len(occurrences)} occurrences")
|
|
125
|
+
|
|
126
|
+
# Write the inventory file
|
|
127
|
+
output_file.write_text('\n'.join(lines), encoding='utf-8')
|
|
128
|
+
|
|
129
|
+
return output_file
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def get_inventory_stats(directory: Path) -> Dict:
|
|
133
|
+
"""
|
|
134
|
+
Get statistics about conditionals without writing a file.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
directory: Directory to scan for .adoc files.
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
Dictionary with statistics about conditionals found.
|
|
141
|
+
"""
|
|
142
|
+
adoc_files = find_adoc_files(directory)
|
|
143
|
+
|
|
144
|
+
stats: Dict[str, int] = defaultdict(int)
|
|
145
|
+
conditions_used: Dict[str, int] = defaultdict(int)
|
|
146
|
+
files_with_conditionals: Set[Path] = set()
|
|
147
|
+
|
|
148
|
+
for filepath in adoc_files:
|
|
149
|
+
conditionals = scan_file_for_conditionals(filepath)
|
|
150
|
+
if conditionals:
|
|
151
|
+
files_with_conditionals.add(filepath)
|
|
152
|
+
for line_num, directive, condition in conditionals:
|
|
153
|
+
stats[directive] += 1
|
|
154
|
+
cond_name = condition.split('[')[0] if condition else '(empty)'
|
|
155
|
+
if directive in ('ifdef', 'ifndef', 'ifeval'):
|
|
156
|
+
conditions_used[cond_name] += 1
|
|
157
|
+
|
|
158
|
+
return {
|
|
159
|
+
'total_files': len(adoc_files),
|
|
160
|
+
'files_with_conditionals': len(files_with_conditionals),
|
|
161
|
+
'directive_counts': dict(stats),
|
|
162
|
+
'total_conditionals': sum(stats.values()),
|
|
163
|
+
'unique_conditions': dict(conditions_used),
|
|
164
|
+
}
|