rolfedh-doc-utils 0.1.4__py3-none-any.whl → 0.1.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. archive_unused_files.py +18 -5
  2. archive_unused_images.py +9 -2
  3. callout_lib/__init__.py +22 -0
  4. callout_lib/converter_bullets.py +103 -0
  5. callout_lib/converter_comments.py +295 -0
  6. callout_lib/converter_deflist.py +134 -0
  7. callout_lib/detector.py +364 -0
  8. callout_lib/table_parser.py +804 -0
  9. check_published_links.py +1083 -0
  10. check_scannability.py +6 -0
  11. check_source_directives.py +101 -0
  12. convert_callouts_interactive.py +567 -0
  13. convert_callouts_to_deflist.py +628 -0
  14. convert_freemarker_to_asciidoc.py +288 -0
  15. convert_tables_to_deflists.py +479 -0
  16. doc_utils/convert_freemarker_to_asciidoc.py +708 -0
  17. doc_utils/duplicate_content.py +409 -0
  18. doc_utils/duplicate_includes.py +347 -0
  19. doc_utils/extract_link_attributes.py +618 -0
  20. doc_utils/format_asciidoc_spacing.py +285 -0
  21. doc_utils/insert_abstract_role.py +220 -0
  22. doc_utils/inventory_conditionals.py +164 -0
  23. doc_utils/missing_source_directive.py +211 -0
  24. doc_utils/replace_link_attributes.py +187 -0
  25. doc_utils/spinner.py +119 -0
  26. doc_utils/unused_adoc.py +150 -22
  27. doc_utils/unused_attributes.py +218 -6
  28. doc_utils/unused_images.py +81 -9
  29. doc_utils/validate_links.py +576 -0
  30. doc_utils/version.py +8 -0
  31. doc_utils/version_check.py +243 -0
  32. doc_utils/warnings_report.py +237 -0
  33. doc_utils_cli.py +158 -0
  34. extract_link_attributes.py +120 -0
  35. find_duplicate_content.py +209 -0
  36. find_duplicate_includes.py +198 -0
  37. find_unused_attributes.py +84 -6
  38. format_asciidoc_spacing.py +134 -0
  39. insert_abstract_role.py +163 -0
  40. inventory_conditionals.py +53 -0
  41. replace_link_attributes.py +214 -0
  42. rolfedh_doc_utils-0.1.41.dist-info/METADATA +246 -0
  43. rolfedh_doc_utils-0.1.41.dist-info/RECORD +52 -0
  44. {rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/WHEEL +1 -1
  45. rolfedh_doc_utils-0.1.41.dist-info/entry_points.txt +20 -0
  46. rolfedh_doc_utils-0.1.41.dist-info/top_level.txt +21 -0
  47. validate_links.py +213 -0
  48. rolfedh_doc_utils-0.1.4.dist-info/METADATA +0 -285
  49. rolfedh_doc_utils-0.1.4.dist-info/RECORD +0 -17
  50. rolfedh_doc_utils-0.1.4.dist-info/entry_points.txt +0 -5
  51. rolfedh_doc_utils-0.1.4.dist-info/top_level.txt +0 -5
  52. {rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,364 @@
1
+ """
2
+ Callout Detection Module
3
+
4
+ Detects code blocks with callouts and extracts callout information from AsciiDoc files.
5
+ Supports both list-format and table-format callout explanations.
6
+ """
7
+
8
+ import re
9
+ from typing import List, Dict, Tuple, Optional
10
+ from dataclasses import dataclass
11
+ from .table_parser import TableParser
12
+
13
+
14
+ @dataclass
15
+ class Callout:
16
+ """Represents a callout with its number and explanation text."""
17
+ number: int
18
+ lines: List[str] # List of lines to preserve formatting
19
+ is_optional: bool = False
20
+
21
+
22
+ @dataclass
23
+ class CalloutGroup:
24
+ """Represents one or more callouts that share the same code line."""
25
+ code_line: str # The actual code line (without callouts)
26
+ callout_numbers: List[int] # List of callout numbers on this line
27
+
28
+
29
+ @dataclass
30
+ class CodeBlock:
31
+ """Represents a code block with its content and metadata."""
32
+ start_line: int
33
+ end_line: int
34
+ delimiter: str
35
+ content: List[str]
36
+ language: Optional[str] = None
37
+
38
+
39
+ class CalloutDetector:
40
+ """Detects and extracts callout information from AsciiDoc code blocks."""
41
+
42
+ # Pattern for code block start: [source,language] or [source] with optional attributes
43
+ CODE_BLOCK_START = re.compile(r'^\[source(?:,\s*(\w+))?(?:[,\s]+[^\]]+)?\]')
44
+
45
+ # Pattern for callout number in code block (can appear multiple times per line)
46
+ CALLOUT_IN_CODE = re.compile(r'<(\d+)>')
47
+
48
+ # Pattern for callout with optional preceding comment syntax
49
+ # Matches common comment styles: //, #, --, followed by optional whitespace and <number>
50
+ # Note: Semicolon (;) removed because it's a statement terminator in Java/C/C++/JavaScript
51
+ # and causes false positives (e.g., "name; <1>" would incorrectly remove the semicolon)
52
+ CALLOUT_WITH_COMMENT = re.compile(r'\s*(?://|#|--)\s*<\d+>|\s*<\d+>')
53
+
54
+ # Pattern for callout explanation line: <1> Explanation text
55
+ CALLOUT_EXPLANATION = re.compile(r'^<(\d+)>\s+(.+)$')
56
+
57
+ # Pattern to detect user-replaceable values in angle brackets
58
+ # Excludes heredoc syntax (<<) and comparison operators
59
+ USER_VALUE_PATTERN = re.compile(r'(?<!<)<([a-zA-Z][^>]*)>')
60
+
61
+ def __init__(self):
62
+ """Initialize detector with table parser."""
63
+ self.table_parser = TableParser()
64
+ self.last_table_title = "" # Track title from most recent table extraction
65
+ self.last_table = None # Track last table found for validation diagnostics
66
+
67
+ def find_code_blocks(self, lines: List[str]) -> List[CodeBlock]:
68
+ """Find all code blocks in the document."""
69
+ blocks = []
70
+ i = 0
71
+
72
+ while i < len(lines):
73
+ # Check for [source] prefix first
74
+ match = self.CODE_BLOCK_START.match(lines[i])
75
+ if match:
76
+ language = match.group(1)
77
+ start = i
78
+ i += 1
79
+
80
+ # Find the delimiter line (---- or ....)
81
+ if i < len(lines) and lines[i].strip() in ['----', '....']:
82
+ delimiter = lines[i].strip()
83
+ i += 1
84
+ content_start = i
85
+
86
+ # Find the closing delimiter
87
+ while i < len(lines):
88
+ if lines[i].strip() == delimiter:
89
+ content = lines[content_start:i]
90
+ blocks.append(CodeBlock(
91
+ start_line=start,
92
+ end_line=i,
93
+ delimiter=delimiter,
94
+ content=content,
95
+ language=language
96
+ ))
97
+ break
98
+ i += 1
99
+ # Check for plain delimited blocks without [source] prefix
100
+ elif lines[i].strip() in ['----', '....']:
101
+ delimiter = lines[i].strip()
102
+ start = i
103
+ i += 1
104
+ content_start = i
105
+
106
+ # Find the closing delimiter
107
+ while i < len(lines):
108
+ if lines[i].strip() == delimiter:
109
+ content = lines[content_start:i]
110
+ # Only add if block contains callouts
111
+ if any(self.CALLOUT_IN_CODE.search(line) for line in content):
112
+ blocks.append(CodeBlock(
113
+ start_line=start,
114
+ end_line=i,
115
+ delimiter=delimiter,
116
+ content=content,
117
+ language=None
118
+ ))
119
+ break
120
+ i += 1
121
+ i += 1
122
+
123
+ return blocks
124
+
125
+ def extract_callouts_from_code(self, content: List[str]) -> List[CalloutGroup]:
126
+ """
127
+ Extract callout numbers from code block content.
128
+ Returns list of CalloutGroups, where each group contains:
129
+ - The code line (with user-replaceable value if found, or full line)
130
+ - List of callout numbers on that line
131
+
132
+ Multiple callouts on the same line are grouped together to be merged
133
+ in the definition list.
134
+ """
135
+ groups = []
136
+
137
+ for line in content:
138
+ # Look for all callout numbers on this line
139
+ callout_matches = list(self.CALLOUT_IN_CODE.finditer(line))
140
+ if callout_matches:
141
+ # Remove callouts AND preceding comment syntax from the line
142
+ # Use CALLOUT_WITH_COMMENT to remove both comment syntax and callout
143
+ line_without_callouts = self.CALLOUT_WITH_COMMENT.sub('', line).rstrip()
144
+
145
+ # COMMENTED OUT: User-replaceable value detection causes false positives
146
+ # with Java generics (e.g., <MyEntity, Integer>) and other valid syntax
147
+ # that uses angle brackets. Always use the full code line.
148
+ #
149
+ # # Find all angle-bracket enclosed values
150
+ # user_values = self.USER_VALUE_PATTERN.findall(line_without_callouts)
151
+ #
152
+ # # Determine what to use as the code line term
153
+ # if user_values:
154
+ # # Use the rightmost (closest to the callout) user value
155
+ # code_line = user_values[-1]
156
+ # else:
157
+ # # No angle-bracket value found - use the actual code line
158
+ # code_line = line_without_callouts
159
+
160
+ # Always use the full code line
161
+ code_line = line_without_callouts
162
+
163
+ # Collect all callout numbers on this line
164
+ callout_nums = [int(m.group(1)) for m in callout_matches]
165
+
166
+ groups.append(CalloutGroup(
167
+ code_line=code_line,
168
+ callout_numbers=callout_nums
169
+ ))
170
+
171
+ return groups
172
+
173
+ def extract_callout_explanations(self, lines: List[str], start_line: int) -> Tuple[Dict[int, Callout], int]:
174
+ """
175
+ Extract callout explanations following a code block.
176
+ Supports list-format (<1> text), 2-column table, and 3-column table formats.
177
+ Returns dict of callouts and the line number where explanations end.
178
+ """
179
+ # First, try to find a table-format callout explanation
180
+ table = self.table_parser.find_callout_table_after_code_block(lines, start_line)
181
+ if table:
182
+ # Check if it's a 3-column table (Item | Value | Description)
183
+ if self.table_parser.is_3column_callout_table(table):
184
+ return self._extract_from_3column_table(table)
185
+ # Check if it's a 2-column table (<callout> | explanation)
186
+ elif self.table_parser.is_callout_table(table):
187
+ return self._extract_from_table(table)
188
+
189
+ # Fall back to list-format extraction
190
+ return self._extract_from_list(lines, start_line)
191
+
192
+ def _extract_from_table(self, table) -> Tuple[Dict[int, Callout], int]:
193
+ """Extract callout explanations from a table format."""
194
+ # Store table for use by converters and validation
195
+ self.last_table = table
196
+ self.last_table_title = table.title if hasattr(table, 'title') else ""
197
+
198
+ explanations = {}
199
+ table_data = self.table_parser.extract_callout_explanations_from_table(table)
200
+
201
+ for callout_num, (explanation_lines, row_conditionals) in table_data.items():
202
+ # explanation_lines now includes blank lines and conditionals inline
203
+ # row_conditionals are before/after the entire row (rarely used)
204
+ all_lines = []
205
+
206
+ # Add any row-level conditionals before
207
+ if row_conditionals:
208
+ all_lines.extend(row_conditionals)
209
+
210
+ # Add explanation lines (already includes inline conditionals and blank lines)
211
+ all_lines.extend(explanation_lines)
212
+
213
+ # Check if marked as optional
214
+ is_optional = False
215
+ if all_lines and (all_lines[0].lower().startswith('optional.') or
216
+ all_lines[0].lower().startswith('optional:')):
217
+ is_optional = True
218
+ all_lines[0] = all_lines[0][9:].strip()
219
+ elif all_lines and ('(Optional)' in all_lines[0] or '(optional)' in all_lines[0]):
220
+ is_optional = True
221
+ all_lines[0] = re.sub(r'\s*\(optional\)\s*', ' ', all_lines[0], flags=re.IGNORECASE).strip()
222
+
223
+ explanations[callout_num] = Callout(callout_num, all_lines, is_optional)
224
+
225
+ return explanations, table.end_line
226
+
227
+ def _extract_from_3column_table(self, table) -> Tuple[Dict[int, Callout], int]:
228
+ """
229
+ Extract callout explanations from a 3-column table format.
230
+ Format: Item (number) | Value | Description
231
+ """
232
+ # Store table for use by converters and validation
233
+ self.last_table = table
234
+ self.last_table_title = table.title if hasattr(table, 'title') else ""
235
+
236
+ explanations = {}
237
+ table_data = self.table_parser.extract_3column_callout_explanations(table)
238
+
239
+ for callout_num, (value_lines, description_lines, row_conditionals) in table_data.items():
240
+ # Combine value and description into explanation lines
241
+ # Both value_lines and description_lines now include conditionals and blank lines inline
242
+ all_lines = []
243
+
244
+ # Add any row-level conditionals before
245
+ if row_conditionals:
246
+ all_lines.extend(row_conditionals)
247
+
248
+ # Add value lines with context
249
+ if value_lines:
250
+ # Format: "`value`:"
251
+ value_text = value_lines[0] if value_lines else ""
252
+ # If value is code-like (contains backticks or special chars), keep it formatted
253
+ if value_text:
254
+ all_lines.append(f"{value_text}:")
255
+
256
+ # Add additional value lines if multi-line (includes conditionals and blank lines)
257
+ for line in value_lines[1:]:
258
+ all_lines.append(line)
259
+
260
+ # Add description lines (already includes conditionals and blank lines)
261
+ all_lines.extend(description_lines)
262
+
263
+ # Check if marked as optional
264
+ is_optional = False
265
+ if all_lines and (all_lines[0].lower().startswith('optional.') or
266
+ all_lines[0].lower().startswith('optional:') or
267
+ 'optional' in all_lines[0].lower()[:50]): # Check first 50 chars
268
+ is_optional = True
269
+ # Don't remove "optional" text - it's part of the description
270
+
271
+ explanations[callout_num] = Callout(callout_num, all_lines, is_optional)
272
+
273
+ return explanations, table.end_line
274
+
275
+ def _extract_from_list(self, lines: List[str], start_line: int) -> Tuple[Dict[int, Callout], int]:
276
+ """Extract callout explanations from list format (<1> text)."""
277
+ # Clear table data since list format doesn't have tables
278
+ self.last_table = None
279
+ self.last_table_title = ""
280
+
281
+ explanations = {}
282
+ i = start_line + 1 # Start after the closing delimiter
283
+
284
+ # Skip blank lines and continuation markers (+)
285
+ while i < len(lines) and (not lines[i].strip() or lines[i].strip() == '+'):
286
+ i += 1
287
+
288
+ # Collect consecutive callout explanation lines
289
+ while i < len(lines):
290
+ match = self.CALLOUT_EXPLANATION.match(lines[i])
291
+ if match:
292
+ num = int(match.group(1))
293
+ first_line = match.group(2).strip()
294
+ explanation_lines = [first_line]
295
+ i += 1
296
+
297
+ # Collect continuation lines (lines that don't start with a new callout)
298
+ # Continue until we hit a blank line, a new callout, or certain patterns
299
+ while i < len(lines):
300
+ line = lines[i]
301
+ # Stop if we hit a blank line, new callout, or list start marker
302
+ if not line.strip() or self.CALLOUT_EXPLANATION.match(line) or line.startswith('[start='):
303
+ break
304
+ # Add continuation line preserving original formatting
305
+ explanation_lines.append(line)
306
+ i += 1
307
+
308
+ # Check if marked as optional (only in first line)
309
+ is_optional = False
310
+ if first_line.lower().startswith('optional.') or first_line.lower().startswith('optional:'):
311
+ is_optional = True
312
+ # Remove "Optional." or "Optional:" from first line
313
+ explanation_lines[0] = first_line[9:].strip()
314
+ elif '(Optional)' in first_line or '(optional)' in first_line:
315
+ is_optional = True
316
+ explanation_lines[0] = re.sub(r'\s*\(optional\)\s*', ' ', first_line, flags=re.IGNORECASE).strip()
317
+
318
+ explanations[num] = Callout(num, explanation_lines, is_optional)
319
+ else:
320
+ break
321
+
322
+ return explanations, i - 1
323
+
324
+ def remove_callouts_from_code(self, content: List[str]) -> List[str]:
325
+ """
326
+ Remove callout numbers and preceding comment syntax from code block content.
327
+ Handles multiple callouts per line and various comment styles (//, #, --, ;).
328
+ """
329
+ cleaned = []
330
+ for line in content:
331
+ # Remove all callout numbers with their preceding comment syntax
332
+ cleaned.append(self.CALLOUT_WITH_COMMENT.sub('', line).rstrip())
333
+ return cleaned
334
+
335
+ def validate_callouts(self, callout_groups: List[CalloutGroup], explanations: Dict[int, Callout]) -> Tuple[bool, List[int], List[int]]:
336
+ """
337
+ Validate that callout numbers in code match explanation numbers.
338
+ Returns tuple of (is_valid, code_nums_list, explanation_nums_list).
339
+
340
+ Returns:
341
+ - is_valid: True if unique callout numbers match
342
+ - code_nums_list: List of callout numbers from code (unique, sorted)
343
+ - explanation_nums_list: List of callout numbers from explanations
344
+ (preserves duplicates if from table, sorted)
345
+ """
346
+ # Extract unique callout numbers from code groups
347
+ code_nums_set = set()
348
+ for group in callout_groups:
349
+ code_nums_set.update(group.callout_numbers)
350
+
351
+ # Get explanation numbers, preserving duplicates if from a table
352
+ if self.last_table:
353
+ # Use table parser to get raw callout numbers (with duplicates)
354
+ explanation_nums_list = self.table_parser.get_table_callout_numbers(self.last_table)
355
+ else:
356
+ # List format: dict keys are already unique
357
+ explanation_nums_list = list(explanations.keys())
358
+
359
+ explanation_nums_set = set(explanation_nums_list)
360
+
361
+ # Validation compares unique numbers only
362
+ is_valid = code_nums_set == explanation_nums_set
363
+
364
+ return is_valid, sorted(code_nums_set), sorted(explanation_nums_list)