rolfedh-doc-utils 0.1.4__py3-none-any.whl → 0.1.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- archive_unused_files.py +18 -5
- archive_unused_images.py +9 -2
- callout_lib/__init__.py +22 -0
- callout_lib/converter_bullets.py +103 -0
- callout_lib/converter_comments.py +295 -0
- callout_lib/converter_deflist.py +134 -0
- callout_lib/detector.py +364 -0
- callout_lib/table_parser.py +804 -0
- check_published_links.py +1083 -0
- check_scannability.py +6 -0
- check_source_directives.py +101 -0
- convert_callouts_interactive.py +567 -0
- convert_callouts_to_deflist.py +628 -0
- convert_freemarker_to_asciidoc.py +288 -0
- convert_tables_to_deflists.py +479 -0
- doc_utils/convert_freemarker_to_asciidoc.py +708 -0
- doc_utils/duplicate_content.py +409 -0
- doc_utils/duplicate_includes.py +347 -0
- doc_utils/extract_link_attributes.py +618 -0
- doc_utils/format_asciidoc_spacing.py +285 -0
- doc_utils/insert_abstract_role.py +220 -0
- doc_utils/inventory_conditionals.py +164 -0
- doc_utils/missing_source_directive.py +211 -0
- doc_utils/replace_link_attributes.py +187 -0
- doc_utils/spinner.py +119 -0
- doc_utils/unused_adoc.py +150 -22
- doc_utils/unused_attributes.py +218 -6
- doc_utils/unused_images.py +81 -9
- doc_utils/validate_links.py +576 -0
- doc_utils/version.py +8 -0
- doc_utils/version_check.py +243 -0
- doc_utils/warnings_report.py +237 -0
- doc_utils_cli.py +158 -0
- extract_link_attributes.py +120 -0
- find_duplicate_content.py +209 -0
- find_duplicate_includes.py +198 -0
- find_unused_attributes.py +84 -6
- format_asciidoc_spacing.py +134 -0
- insert_abstract_role.py +163 -0
- inventory_conditionals.py +53 -0
- replace_link_attributes.py +214 -0
- rolfedh_doc_utils-0.1.41.dist-info/METADATA +246 -0
- rolfedh_doc_utils-0.1.41.dist-info/RECORD +52 -0
- {rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/WHEEL +1 -1
- rolfedh_doc_utils-0.1.41.dist-info/entry_points.txt +20 -0
- rolfedh_doc_utils-0.1.41.dist-info/top_level.txt +21 -0
- validate_links.py +213 -0
- rolfedh_doc_utils-0.1.4.dist-info/METADATA +0 -285
- rolfedh_doc_utils-0.1.4.dist-info/RECORD +0 -17
- rolfedh_doc_utils-0.1.4.dist-info/entry_points.txt +0 -5
- rolfedh_doc_utils-0.1.4.dist-info/top_level.txt +0 -5
- {rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/licenses/LICENSE +0 -0
callout_lib/detector.py
ADDED
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Callout Detection Module
|
|
3
|
+
|
|
4
|
+
Detects code blocks with callouts and extracts callout information from AsciiDoc files.
|
|
5
|
+
Supports both list-format and table-format callout explanations.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
from typing import List, Dict, Tuple, Optional
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from .table_parser import TableParser
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class Callout:
|
|
16
|
+
"""Represents a callout with its number and explanation text."""
|
|
17
|
+
number: int
|
|
18
|
+
lines: List[str] # List of lines to preserve formatting
|
|
19
|
+
is_optional: bool = False
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class CalloutGroup:
|
|
24
|
+
"""Represents one or more callouts that share the same code line."""
|
|
25
|
+
code_line: str # The actual code line (without callouts)
|
|
26
|
+
callout_numbers: List[int] # List of callout numbers on this line
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class CodeBlock:
|
|
31
|
+
"""Represents a code block with its content and metadata."""
|
|
32
|
+
start_line: int
|
|
33
|
+
end_line: int
|
|
34
|
+
delimiter: str
|
|
35
|
+
content: List[str]
|
|
36
|
+
language: Optional[str] = None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class CalloutDetector:
|
|
40
|
+
"""Detects and extracts callout information from AsciiDoc code blocks."""
|
|
41
|
+
|
|
42
|
+
# Pattern for code block start: [source,language] or [source] with optional attributes
|
|
43
|
+
CODE_BLOCK_START = re.compile(r'^\[source(?:,\s*(\w+))?(?:[,\s]+[^\]]+)?\]')
|
|
44
|
+
|
|
45
|
+
# Pattern for callout number in code block (can appear multiple times per line)
|
|
46
|
+
CALLOUT_IN_CODE = re.compile(r'<(\d+)>')
|
|
47
|
+
|
|
48
|
+
# Pattern for callout with optional preceding comment syntax
|
|
49
|
+
# Matches common comment styles: //, #, --, followed by optional whitespace and <number>
|
|
50
|
+
# Note: Semicolon (;) removed because it's a statement terminator in Java/C/C++/JavaScript
|
|
51
|
+
# and causes false positives (e.g., "name; <1>" would incorrectly remove the semicolon)
|
|
52
|
+
CALLOUT_WITH_COMMENT = re.compile(r'\s*(?://|#|--)\s*<\d+>|\s*<\d+>')
|
|
53
|
+
|
|
54
|
+
# Pattern for callout explanation line: <1> Explanation text
|
|
55
|
+
CALLOUT_EXPLANATION = re.compile(r'^<(\d+)>\s+(.+)$')
|
|
56
|
+
|
|
57
|
+
# Pattern to detect user-replaceable values in angle brackets
|
|
58
|
+
# Excludes heredoc syntax (<<) and comparison operators
|
|
59
|
+
USER_VALUE_PATTERN = re.compile(r'(?<!<)<([a-zA-Z][^>]*)>')
|
|
60
|
+
|
|
61
|
+
def __init__(self):
|
|
62
|
+
"""Initialize detector with table parser."""
|
|
63
|
+
self.table_parser = TableParser()
|
|
64
|
+
self.last_table_title = "" # Track title from most recent table extraction
|
|
65
|
+
self.last_table = None # Track last table found for validation diagnostics
|
|
66
|
+
|
|
67
|
+
def find_code_blocks(self, lines: List[str]) -> List[CodeBlock]:
|
|
68
|
+
"""Find all code blocks in the document."""
|
|
69
|
+
blocks = []
|
|
70
|
+
i = 0
|
|
71
|
+
|
|
72
|
+
while i < len(lines):
|
|
73
|
+
# Check for [source] prefix first
|
|
74
|
+
match = self.CODE_BLOCK_START.match(lines[i])
|
|
75
|
+
if match:
|
|
76
|
+
language = match.group(1)
|
|
77
|
+
start = i
|
|
78
|
+
i += 1
|
|
79
|
+
|
|
80
|
+
# Find the delimiter line (---- or ....)
|
|
81
|
+
if i < len(lines) and lines[i].strip() in ['----', '....']:
|
|
82
|
+
delimiter = lines[i].strip()
|
|
83
|
+
i += 1
|
|
84
|
+
content_start = i
|
|
85
|
+
|
|
86
|
+
# Find the closing delimiter
|
|
87
|
+
while i < len(lines):
|
|
88
|
+
if lines[i].strip() == delimiter:
|
|
89
|
+
content = lines[content_start:i]
|
|
90
|
+
blocks.append(CodeBlock(
|
|
91
|
+
start_line=start,
|
|
92
|
+
end_line=i,
|
|
93
|
+
delimiter=delimiter,
|
|
94
|
+
content=content,
|
|
95
|
+
language=language
|
|
96
|
+
))
|
|
97
|
+
break
|
|
98
|
+
i += 1
|
|
99
|
+
# Check for plain delimited blocks without [source] prefix
|
|
100
|
+
elif lines[i].strip() in ['----', '....']:
|
|
101
|
+
delimiter = lines[i].strip()
|
|
102
|
+
start = i
|
|
103
|
+
i += 1
|
|
104
|
+
content_start = i
|
|
105
|
+
|
|
106
|
+
# Find the closing delimiter
|
|
107
|
+
while i < len(lines):
|
|
108
|
+
if lines[i].strip() == delimiter:
|
|
109
|
+
content = lines[content_start:i]
|
|
110
|
+
# Only add if block contains callouts
|
|
111
|
+
if any(self.CALLOUT_IN_CODE.search(line) for line in content):
|
|
112
|
+
blocks.append(CodeBlock(
|
|
113
|
+
start_line=start,
|
|
114
|
+
end_line=i,
|
|
115
|
+
delimiter=delimiter,
|
|
116
|
+
content=content,
|
|
117
|
+
language=None
|
|
118
|
+
))
|
|
119
|
+
break
|
|
120
|
+
i += 1
|
|
121
|
+
i += 1
|
|
122
|
+
|
|
123
|
+
return blocks
|
|
124
|
+
|
|
125
|
+
def extract_callouts_from_code(self, content: List[str]) -> List[CalloutGroup]:
|
|
126
|
+
"""
|
|
127
|
+
Extract callout numbers from code block content.
|
|
128
|
+
Returns list of CalloutGroups, where each group contains:
|
|
129
|
+
- The code line (with user-replaceable value if found, or full line)
|
|
130
|
+
- List of callout numbers on that line
|
|
131
|
+
|
|
132
|
+
Multiple callouts on the same line are grouped together to be merged
|
|
133
|
+
in the definition list.
|
|
134
|
+
"""
|
|
135
|
+
groups = []
|
|
136
|
+
|
|
137
|
+
for line in content:
|
|
138
|
+
# Look for all callout numbers on this line
|
|
139
|
+
callout_matches = list(self.CALLOUT_IN_CODE.finditer(line))
|
|
140
|
+
if callout_matches:
|
|
141
|
+
# Remove callouts AND preceding comment syntax from the line
|
|
142
|
+
# Use CALLOUT_WITH_COMMENT to remove both comment syntax and callout
|
|
143
|
+
line_without_callouts = self.CALLOUT_WITH_COMMENT.sub('', line).rstrip()
|
|
144
|
+
|
|
145
|
+
# COMMENTED OUT: User-replaceable value detection causes false positives
|
|
146
|
+
# with Java generics (e.g., <MyEntity, Integer>) and other valid syntax
|
|
147
|
+
# that uses angle brackets. Always use the full code line.
|
|
148
|
+
#
|
|
149
|
+
# # Find all angle-bracket enclosed values
|
|
150
|
+
# user_values = self.USER_VALUE_PATTERN.findall(line_without_callouts)
|
|
151
|
+
#
|
|
152
|
+
# # Determine what to use as the code line term
|
|
153
|
+
# if user_values:
|
|
154
|
+
# # Use the rightmost (closest to the callout) user value
|
|
155
|
+
# code_line = user_values[-1]
|
|
156
|
+
# else:
|
|
157
|
+
# # No angle-bracket value found - use the actual code line
|
|
158
|
+
# code_line = line_without_callouts
|
|
159
|
+
|
|
160
|
+
# Always use the full code line
|
|
161
|
+
code_line = line_without_callouts
|
|
162
|
+
|
|
163
|
+
# Collect all callout numbers on this line
|
|
164
|
+
callout_nums = [int(m.group(1)) for m in callout_matches]
|
|
165
|
+
|
|
166
|
+
groups.append(CalloutGroup(
|
|
167
|
+
code_line=code_line,
|
|
168
|
+
callout_numbers=callout_nums
|
|
169
|
+
))
|
|
170
|
+
|
|
171
|
+
return groups
|
|
172
|
+
|
|
173
|
+
def extract_callout_explanations(self, lines: List[str], start_line: int) -> Tuple[Dict[int, Callout], int]:
|
|
174
|
+
"""
|
|
175
|
+
Extract callout explanations following a code block.
|
|
176
|
+
Supports list-format (<1> text), 2-column table, and 3-column table formats.
|
|
177
|
+
Returns dict of callouts and the line number where explanations end.
|
|
178
|
+
"""
|
|
179
|
+
# First, try to find a table-format callout explanation
|
|
180
|
+
table = self.table_parser.find_callout_table_after_code_block(lines, start_line)
|
|
181
|
+
if table:
|
|
182
|
+
# Check if it's a 3-column table (Item | Value | Description)
|
|
183
|
+
if self.table_parser.is_3column_callout_table(table):
|
|
184
|
+
return self._extract_from_3column_table(table)
|
|
185
|
+
# Check if it's a 2-column table (<callout> | explanation)
|
|
186
|
+
elif self.table_parser.is_callout_table(table):
|
|
187
|
+
return self._extract_from_table(table)
|
|
188
|
+
|
|
189
|
+
# Fall back to list-format extraction
|
|
190
|
+
return self._extract_from_list(lines, start_line)
|
|
191
|
+
|
|
192
|
+
def _extract_from_table(self, table) -> Tuple[Dict[int, Callout], int]:
|
|
193
|
+
"""Extract callout explanations from a table format."""
|
|
194
|
+
# Store table for use by converters and validation
|
|
195
|
+
self.last_table = table
|
|
196
|
+
self.last_table_title = table.title if hasattr(table, 'title') else ""
|
|
197
|
+
|
|
198
|
+
explanations = {}
|
|
199
|
+
table_data = self.table_parser.extract_callout_explanations_from_table(table)
|
|
200
|
+
|
|
201
|
+
for callout_num, (explanation_lines, row_conditionals) in table_data.items():
|
|
202
|
+
# explanation_lines now includes blank lines and conditionals inline
|
|
203
|
+
# row_conditionals are before/after the entire row (rarely used)
|
|
204
|
+
all_lines = []
|
|
205
|
+
|
|
206
|
+
# Add any row-level conditionals before
|
|
207
|
+
if row_conditionals:
|
|
208
|
+
all_lines.extend(row_conditionals)
|
|
209
|
+
|
|
210
|
+
# Add explanation lines (already includes inline conditionals and blank lines)
|
|
211
|
+
all_lines.extend(explanation_lines)
|
|
212
|
+
|
|
213
|
+
# Check if marked as optional
|
|
214
|
+
is_optional = False
|
|
215
|
+
if all_lines and (all_lines[0].lower().startswith('optional.') or
|
|
216
|
+
all_lines[0].lower().startswith('optional:')):
|
|
217
|
+
is_optional = True
|
|
218
|
+
all_lines[0] = all_lines[0][9:].strip()
|
|
219
|
+
elif all_lines and ('(Optional)' in all_lines[0] or '(optional)' in all_lines[0]):
|
|
220
|
+
is_optional = True
|
|
221
|
+
all_lines[0] = re.sub(r'\s*\(optional\)\s*', ' ', all_lines[0], flags=re.IGNORECASE).strip()
|
|
222
|
+
|
|
223
|
+
explanations[callout_num] = Callout(callout_num, all_lines, is_optional)
|
|
224
|
+
|
|
225
|
+
return explanations, table.end_line
|
|
226
|
+
|
|
227
|
+
def _extract_from_3column_table(self, table) -> Tuple[Dict[int, Callout], int]:
|
|
228
|
+
"""
|
|
229
|
+
Extract callout explanations from a 3-column table format.
|
|
230
|
+
Format: Item (number) | Value | Description
|
|
231
|
+
"""
|
|
232
|
+
# Store table for use by converters and validation
|
|
233
|
+
self.last_table = table
|
|
234
|
+
self.last_table_title = table.title if hasattr(table, 'title') else ""
|
|
235
|
+
|
|
236
|
+
explanations = {}
|
|
237
|
+
table_data = self.table_parser.extract_3column_callout_explanations(table)
|
|
238
|
+
|
|
239
|
+
for callout_num, (value_lines, description_lines, row_conditionals) in table_data.items():
|
|
240
|
+
# Combine value and description into explanation lines
|
|
241
|
+
# Both value_lines and description_lines now include conditionals and blank lines inline
|
|
242
|
+
all_lines = []
|
|
243
|
+
|
|
244
|
+
# Add any row-level conditionals before
|
|
245
|
+
if row_conditionals:
|
|
246
|
+
all_lines.extend(row_conditionals)
|
|
247
|
+
|
|
248
|
+
# Add value lines with context
|
|
249
|
+
if value_lines:
|
|
250
|
+
# Format: "`value`:"
|
|
251
|
+
value_text = value_lines[0] if value_lines else ""
|
|
252
|
+
# If value is code-like (contains backticks or special chars), keep it formatted
|
|
253
|
+
if value_text:
|
|
254
|
+
all_lines.append(f"{value_text}:")
|
|
255
|
+
|
|
256
|
+
# Add additional value lines if multi-line (includes conditionals and blank lines)
|
|
257
|
+
for line in value_lines[1:]:
|
|
258
|
+
all_lines.append(line)
|
|
259
|
+
|
|
260
|
+
# Add description lines (already includes conditionals and blank lines)
|
|
261
|
+
all_lines.extend(description_lines)
|
|
262
|
+
|
|
263
|
+
# Check if marked as optional
|
|
264
|
+
is_optional = False
|
|
265
|
+
if all_lines and (all_lines[0].lower().startswith('optional.') or
|
|
266
|
+
all_lines[0].lower().startswith('optional:') or
|
|
267
|
+
'optional' in all_lines[0].lower()[:50]): # Check first 50 chars
|
|
268
|
+
is_optional = True
|
|
269
|
+
# Don't remove "optional" text - it's part of the description
|
|
270
|
+
|
|
271
|
+
explanations[callout_num] = Callout(callout_num, all_lines, is_optional)
|
|
272
|
+
|
|
273
|
+
return explanations, table.end_line
|
|
274
|
+
|
|
275
|
+
def _extract_from_list(self, lines: List[str], start_line: int) -> Tuple[Dict[int, Callout], int]:
|
|
276
|
+
"""Extract callout explanations from list format (<1> text)."""
|
|
277
|
+
# Clear table data since list format doesn't have tables
|
|
278
|
+
self.last_table = None
|
|
279
|
+
self.last_table_title = ""
|
|
280
|
+
|
|
281
|
+
explanations = {}
|
|
282
|
+
i = start_line + 1 # Start after the closing delimiter
|
|
283
|
+
|
|
284
|
+
# Skip blank lines and continuation markers (+)
|
|
285
|
+
while i < len(lines) and (not lines[i].strip() or lines[i].strip() == '+'):
|
|
286
|
+
i += 1
|
|
287
|
+
|
|
288
|
+
# Collect consecutive callout explanation lines
|
|
289
|
+
while i < len(lines):
|
|
290
|
+
match = self.CALLOUT_EXPLANATION.match(lines[i])
|
|
291
|
+
if match:
|
|
292
|
+
num = int(match.group(1))
|
|
293
|
+
first_line = match.group(2).strip()
|
|
294
|
+
explanation_lines = [first_line]
|
|
295
|
+
i += 1
|
|
296
|
+
|
|
297
|
+
# Collect continuation lines (lines that don't start with a new callout)
|
|
298
|
+
# Continue until we hit a blank line, a new callout, or certain patterns
|
|
299
|
+
while i < len(lines):
|
|
300
|
+
line = lines[i]
|
|
301
|
+
# Stop if we hit a blank line, new callout, or list start marker
|
|
302
|
+
if not line.strip() or self.CALLOUT_EXPLANATION.match(line) or line.startswith('[start='):
|
|
303
|
+
break
|
|
304
|
+
# Add continuation line preserving original formatting
|
|
305
|
+
explanation_lines.append(line)
|
|
306
|
+
i += 1
|
|
307
|
+
|
|
308
|
+
# Check if marked as optional (only in first line)
|
|
309
|
+
is_optional = False
|
|
310
|
+
if first_line.lower().startswith('optional.') or first_line.lower().startswith('optional:'):
|
|
311
|
+
is_optional = True
|
|
312
|
+
# Remove "Optional." or "Optional:" from first line
|
|
313
|
+
explanation_lines[0] = first_line[9:].strip()
|
|
314
|
+
elif '(Optional)' in first_line or '(optional)' in first_line:
|
|
315
|
+
is_optional = True
|
|
316
|
+
explanation_lines[0] = re.sub(r'\s*\(optional\)\s*', ' ', first_line, flags=re.IGNORECASE).strip()
|
|
317
|
+
|
|
318
|
+
explanations[num] = Callout(num, explanation_lines, is_optional)
|
|
319
|
+
else:
|
|
320
|
+
break
|
|
321
|
+
|
|
322
|
+
return explanations, i - 1
|
|
323
|
+
|
|
324
|
+
def remove_callouts_from_code(self, content: List[str]) -> List[str]:
|
|
325
|
+
"""
|
|
326
|
+
Remove callout numbers and preceding comment syntax from code block content.
|
|
327
|
+
Handles multiple callouts per line and various comment styles (//, #, --, ;).
|
|
328
|
+
"""
|
|
329
|
+
cleaned = []
|
|
330
|
+
for line in content:
|
|
331
|
+
# Remove all callout numbers with their preceding comment syntax
|
|
332
|
+
cleaned.append(self.CALLOUT_WITH_COMMENT.sub('', line).rstrip())
|
|
333
|
+
return cleaned
|
|
334
|
+
|
|
335
|
+
def validate_callouts(self, callout_groups: List[CalloutGroup], explanations: Dict[int, Callout]) -> Tuple[bool, List[int], List[int]]:
|
|
336
|
+
"""
|
|
337
|
+
Validate that callout numbers in code match explanation numbers.
|
|
338
|
+
Returns tuple of (is_valid, code_nums_list, explanation_nums_list).
|
|
339
|
+
|
|
340
|
+
Returns:
|
|
341
|
+
- is_valid: True if unique callout numbers match
|
|
342
|
+
- code_nums_list: List of callout numbers from code (unique, sorted)
|
|
343
|
+
- explanation_nums_list: List of callout numbers from explanations
|
|
344
|
+
(preserves duplicates if from table, sorted)
|
|
345
|
+
"""
|
|
346
|
+
# Extract unique callout numbers from code groups
|
|
347
|
+
code_nums_set = set()
|
|
348
|
+
for group in callout_groups:
|
|
349
|
+
code_nums_set.update(group.callout_numbers)
|
|
350
|
+
|
|
351
|
+
# Get explanation numbers, preserving duplicates if from a table
|
|
352
|
+
if self.last_table:
|
|
353
|
+
# Use table parser to get raw callout numbers (with duplicates)
|
|
354
|
+
explanation_nums_list = self.table_parser.get_table_callout_numbers(self.last_table)
|
|
355
|
+
else:
|
|
356
|
+
# List format: dict keys are already unique
|
|
357
|
+
explanation_nums_list = list(explanations.keys())
|
|
358
|
+
|
|
359
|
+
explanation_nums_set = set(explanation_nums_list)
|
|
360
|
+
|
|
361
|
+
# Validation compares unique numbers only
|
|
362
|
+
is_valid = code_nums_set == explanation_nums_set
|
|
363
|
+
|
|
364
|
+
return is_valid, sorted(code_nums_set), sorted(explanation_nums_list)
|