rolfedh-doc-utils 0.1.4__py3-none-any.whl → 0.1.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. archive_unused_files.py +18 -5
  2. archive_unused_images.py +9 -2
  3. callout_lib/__init__.py +22 -0
  4. callout_lib/converter_bullets.py +103 -0
  5. callout_lib/converter_comments.py +295 -0
  6. callout_lib/converter_deflist.py +134 -0
  7. callout_lib/detector.py +364 -0
  8. callout_lib/table_parser.py +804 -0
  9. check_published_links.py +1083 -0
  10. check_scannability.py +6 -0
  11. check_source_directives.py +101 -0
  12. convert_callouts_interactive.py +567 -0
  13. convert_callouts_to_deflist.py +628 -0
  14. convert_freemarker_to_asciidoc.py +288 -0
  15. convert_tables_to_deflists.py +479 -0
  16. doc_utils/convert_freemarker_to_asciidoc.py +708 -0
  17. doc_utils/duplicate_content.py +409 -0
  18. doc_utils/duplicate_includes.py +347 -0
  19. doc_utils/extract_link_attributes.py +618 -0
  20. doc_utils/format_asciidoc_spacing.py +285 -0
  21. doc_utils/insert_abstract_role.py +220 -0
  22. doc_utils/inventory_conditionals.py +164 -0
  23. doc_utils/missing_source_directive.py +211 -0
  24. doc_utils/replace_link_attributes.py +187 -0
  25. doc_utils/spinner.py +119 -0
  26. doc_utils/unused_adoc.py +150 -22
  27. doc_utils/unused_attributes.py +218 -6
  28. doc_utils/unused_images.py +81 -9
  29. doc_utils/validate_links.py +576 -0
  30. doc_utils/version.py +8 -0
  31. doc_utils/version_check.py +243 -0
  32. doc_utils/warnings_report.py +237 -0
  33. doc_utils_cli.py +158 -0
  34. extract_link_attributes.py +120 -0
  35. find_duplicate_content.py +209 -0
  36. find_duplicate_includes.py +198 -0
  37. find_unused_attributes.py +84 -6
  38. format_asciidoc_spacing.py +134 -0
  39. insert_abstract_role.py +163 -0
  40. inventory_conditionals.py +53 -0
  41. replace_link_attributes.py +214 -0
  42. rolfedh_doc_utils-0.1.41.dist-info/METADATA +246 -0
  43. rolfedh_doc_utils-0.1.41.dist-info/RECORD +52 -0
  44. {rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/WHEEL +1 -1
  45. rolfedh_doc_utils-0.1.41.dist-info/entry_points.txt +20 -0
  46. rolfedh_doc_utils-0.1.41.dist-info/top_level.txt +21 -0
  47. validate_links.py +213 -0
  48. rolfedh_doc_utils-0.1.4.dist-info/METADATA +0 -285
  49. rolfedh_doc_utils-0.1.4.dist-info/RECORD +0 -17
  50. rolfedh_doc_utils-0.1.4.dist-info/entry_points.txt +0 -5
  51. rolfedh_doc_utils-0.1.4.dist-info/top_level.txt +0 -5
  52. {rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,804 @@
1
+ """
2
+ AsciiDoc Table Parser Module
3
+
4
+ Parses AsciiDoc tables and extracts structured data. Designed to be reusable
5
+ for various table conversion tasks (not just callout explanations).
6
+
7
+ Handles:
8
+ - Two-column tables with callout numbers and explanations
9
+ - Conditional statements (ifdef, ifndef, endif) within table cells
10
+ - Multi-line table cells
11
+ - Table attributes and formatting
12
+ """
13
+
14
+ import re
15
+ from typing import List, Dict, Tuple, Optional
16
+ from dataclasses import dataclass
17
+
18
+
19
+ @dataclass
20
+ class TableCell:
21
+ """Represents a single table cell with its content and any conditional blocks."""
22
+ content: List[str] # Lines of content in the cell
23
+ conditionals: List[str] # Any ifdef/ifndef/endif lines associated with this cell
24
+
25
+
26
+ @dataclass
27
+ class TableRow:
28
+ """Represents a table row with cells."""
29
+ cells: List[TableCell]
30
+ conditionals_before: List[str] # Conditional statements before this row
31
+ conditionals_after: List[str] # Conditional statements after this row
32
+
33
+
34
+ @dataclass
35
+ class AsciiDocTable:
36
+ """Represents a complete AsciiDoc table."""
37
+ start_line: int
38
+ end_line: int
39
+ attributes: str # Table attributes like [cols="1,3"]
40
+ rows: List[TableRow]
41
+ title: str = "" # Block title like ".Table description"
42
+
43
+
44
+ class TableParser:
45
+ """Parses AsciiDoc tables and extracts structured data."""
46
+
47
+ # Pattern for table start delimiter with optional attributes
48
+ TABLE_START = re.compile(r'^\[.*?\]$')
49
+ TABLE_DELIMITER = re.compile(r'^\|===\s*$')
50
+
51
+ # Pattern for table cell separator (| or cell type specifier like a|, s|, etc.)
52
+ CELL_SEPARATOR = re.compile(r'^(\||[ashdmev]\|)')
53
+
54
+ # Pattern for conditional directives
55
+ IFDEF_PATTERN = re.compile(r'^(ifdef::|ifndef::).+\[\]\s*$')
56
+ ENDIF_PATTERN = re.compile(r'^endif::\[\]\s*$')
57
+
58
+ # Pattern for callout number (used for callout table detection)
59
+ CALLOUT_NUMBER = re.compile(r'^<(\d+)>\s*$')
60
+ PLAIN_NUMBER = re.compile(r'^(\d+)\s*$')
61
+
62
+ def _is_callout_or_number(self, text: str) -> tuple[bool, int]:
63
+ """
64
+ Check if text is a callout number (<1>) or plain number (1).
65
+ Returns (is_match, number) or (False, 0) if no match.
66
+ """
67
+ text = text.strip()
68
+
69
+ # Try callout format first: <1>
70
+ match = self.CALLOUT_NUMBER.match(text)
71
+ if match:
72
+ return (True, int(match.group(1)))
73
+
74
+ # Try plain number format: 1
75
+ match = self.PLAIN_NUMBER.match(text)
76
+ if match:
77
+ return (True, int(match.group(1)))
78
+
79
+ return (False, 0)
80
+
81
+ def _finalize_row_if_complete(self, current_row_cells, conditionals_before_row,
82
+ conditionals_after_row, expected_columns, rows):
83
+ """
84
+ Check if we have enough cells for a complete row, and if so, save it.
85
+
86
+ Returns: (new_current_row_cells, new_conditionals_before, new_conditionals_after)
87
+ """
88
+ if expected_columns > 0 and len(current_row_cells) >= expected_columns:
89
+ # Row is complete - save it
90
+ rows.append(TableRow(
91
+ cells=current_row_cells.copy(),
92
+ conditionals_before=conditionals_before_row.copy(),
93
+ conditionals_after=conditionals_after_row.copy()
94
+ ))
95
+ return [], [], [] # Reset for next row
96
+
97
+ # Row not complete yet
98
+ return current_row_cells, conditionals_before_row, conditionals_after_row
99
+
100
+ def _parse_column_count(self, attributes: str) -> int:
101
+ """
102
+ Parse the cols attribute to determine number of columns.
103
+
104
+ Example: '[cols="1,7a"]' returns 2
105
+ '[cols="1,2,3"]' returns 3
106
+ """
107
+ import re
108
+ # Match cols="..." or cols='...'
109
+ match = re.search(r'cols=["\']([^"\']+)["\']', attributes)
110
+ if not match:
111
+ return 0 # Unknown column count
112
+
113
+ cols_spec = match.group(1)
114
+ # Count comma-separated values
115
+ # Handle formats like: "1,2", "1a,2a", "1,2,3", etc.
116
+ columns = cols_spec.split(',')
117
+ return len(columns)
118
+
119
+ def find_tables(self, lines: List[str]) -> List[AsciiDocTable]:
120
+ """Find all tables in the document."""
121
+ tables = []
122
+ i = 0
123
+
124
+ while i < len(lines):
125
+ # Look for table delimiter
126
+ if self.TABLE_DELIMITER.match(lines[i]):
127
+ # Check for attributes and title before the table
128
+ attributes = ""
129
+ title = ""
130
+ start_line = i
131
+
132
+ # Check line before delimiter for attributes [cols="..."]
133
+ if i > 0 and self.TABLE_START.match(lines[i - 1]):
134
+ attributes = lines[i - 1]
135
+ start_line = i - 1
136
+
137
+ # Check line before attributes for title .Title
138
+ if i > 1 and lines[i - 2].strip().startswith('.') and not lines[i - 2].strip().startswith('..'):
139
+ title = lines[i - 2].strip()
140
+ start_line = i - 2
141
+ elif i > 0 and lines[i - 1].strip().startswith('.') and not lines[i - 1].strip().startswith('..'):
142
+ # Title directly before delimiter (no attributes)
143
+ title = lines[i - 1].strip()
144
+ start_line = i - 1
145
+
146
+ # Parse table content
147
+ table = self._parse_table(lines, start_line, i, title)
148
+ if table:
149
+ tables.append(table)
150
+ i = table.end_line + 1
151
+ continue
152
+ i += 1
153
+
154
+ return tables
155
+
156
+ def _parse_table(self, lines: List[str], start_line: int, delimiter_line: int, title: str = "") -> Optional[AsciiDocTable]:
157
+ """
158
+ Parse a single table starting at the delimiter.
159
+
160
+ AsciiDoc table format:
161
+ .Optional title
162
+ [optional attributes]
163
+ |===
164
+ |Cell1
165
+ |Cell2
166
+ (blank line separates rows)
167
+ |Cell3
168
+ |Cell4
169
+ |===
170
+ """
171
+ # Get attributes and parse column count
172
+ attributes = ""
173
+ if start_line < delimiter_line:
174
+ # Check if start line is title or attributes
175
+ start_content = lines[start_line].strip()
176
+ if start_content.startswith('.') and not start_content.startswith('..'):
177
+ # Start line is title, attributes might be on next line
178
+ if start_line + 1 < delimiter_line:
179
+ attributes = lines[start_line + 1]
180
+ else:
181
+ # Start line is attributes
182
+ attributes = lines[start_line]
183
+
184
+ expected_columns = self._parse_column_count(attributes)
185
+
186
+ i = delimiter_line + 1
187
+ rows = []
188
+ current_row_cells = []
189
+ current_cell_lines = []
190
+ conditionals_before_row = []
191
+ conditionals_after_row = []
192
+ in_asciidoc_cell = False # Track if we're in an a| (AsciiDoc) cell
193
+
194
+ while i < len(lines):
195
+ line = lines[i]
196
+
197
+ # Check for table end
198
+ if self.TABLE_DELIMITER.match(line):
199
+ # Save any pending cell
200
+ if current_cell_lines:
201
+ current_row_cells.append(TableCell(
202
+ content=current_cell_lines.copy(),
203
+ conditionals=[]
204
+ ))
205
+ current_cell_lines = []
206
+
207
+ # Save any pending row
208
+ if current_row_cells:
209
+ rows.append(TableRow(
210
+ cells=current_row_cells.copy(),
211
+ conditionals_before=conditionals_before_row.copy(),
212
+ conditionals_after=conditionals_after_row.copy()
213
+ ))
214
+
215
+ # Get attributes if present (already extracted above)
216
+ return AsciiDocTable(
217
+ start_line=start_line,
218
+ end_line=i,
219
+ attributes=attributes,
220
+ rows=rows,
221
+ title=title
222
+ )
223
+
224
+ # Check for conditional directives
225
+ if self.IFDEF_PATTERN.match(line) or self.ENDIF_PATTERN.match(line):
226
+ # If we're building a cell (current_cell_lines is not empty) OR
227
+ # we're in an AsciiDoc cell, add conditional to cell content
228
+ if current_cell_lines or in_asciidoc_cell:
229
+ # Inside a cell - conditional is part of cell content
230
+ current_cell_lines.append(line)
231
+ elif current_row_cells:
232
+ # Between cells in the same row
233
+ conditionals_after_row.append(line)
234
+ else:
235
+ # Conditional before any cells in this row
236
+ conditionals_before_row.append(line)
237
+ i += 1
238
+ continue
239
+
240
+ # Blank line handling
241
+ if not line.strip():
242
+ # In AsciiDoc cells (a|), blank lines are part of cell content
243
+ if in_asciidoc_cell:
244
+ current_cell_lines.append(line)
245
+ i += 1
246
+ continue
247
+
248
+ # Otherwise, blank line separates rows
249
+ # Save pending cell if exists
250
+ if current_cell_lines:
251
+ current_row_cells.append(TableCell(
252
+ content=current_cell_lines.copy(),
253
+ conditionals=[]
254
+ ))
255
+ current_cell_lines = []
256
+ in_asciidoc_cell = False
257
+
258
+ # Save row if we have cells
259
+ if current_row_cells:
260
+ rows.append(TableRow(
261
+ cells=current_row_cells.copy(),
262
+ conditionals_before=conditionals_before_row.copy(),
263
+ conditionals_after=conditionals_after_row.copy()
264
+ ))
265
+ current_row_cells = []
266
+ conditionals_before_row = []
267
+ conditionals_after_row = []
268
+
269
+ i += 1
270
+ continue
271
+
272
+ # Check for cell separator (|) or cell type specifier (a|, s|, etc.)
273
+ if self.CELL_SEPARATOR.match(line):
274
+ # Determine if line starts with | or with a cell type specifier
275
+ if line.startswith('|'):
276
+ # Standard cell separator
277
+ cell_content = line[1:] # Remove leading |
278
+ else:
279
+ # Cell type specifier without leading | (e.g., "a|text")
280
+ cell_content = line
281
+
282
+ # Save previous cell if exists
283
+ if current_cell_lines:
284
+ current_row_cells.append(TableCell(
285
+ content=current_cell_lines.copy(),
286
+ conditionals=[]
287
+ ))
288
+ current_cell_lines = []
289
+ in_asciidoc_cell = False # Reset for next cell
290
+
291
+ # Check if row is complete (have enough cells based on cols attribute)
292
+ current_row_cells, conditionals_before_row, conditionals_after_row = \
293
+ self._finalize_row_if_complete(
294
+ current_row_cells, conditionals_before_row,
295
+ conditionals_after_row, expected_columns, rows
296
+ )
297
+
298
+ # Check for cell type specifier (a|, s|, etc.)
299
+ # Type specifiers are single characters followed by |
300
+ if len(cell_content) > 1 and cell_content[0] in 'ashdmev' and cell_content[1] == '|':
301
+ # Track if this is an AsciiDoc cell (a|)
302
+ if cell_content[0] == 'a':
303
+ in_asciidoc_cell = True
304
+ cell_content = cell_content[2:] # Remove type specifier and |
305
+
306
+ cell_content = cell_content.strip()
307
+
308
+ # Check if there are multiple cells on the same line (e.g., |Cell1 |Cell2 |Cell3)
309
+ if '|' in cell_content:
310
+ # Split by | to get multiple cells
311
+ parts = cell_content.split('|')
312
+ for part in parts:
313
+ part = part.strip()
314
+ if part: # Skip empty parts
315
+ current_row_cells.append(TableCell(
316
+ content=[part],
317
+ conditionals=[]
318
+ ))
319
+
320
+ # Multi-cell line completes a row - finalize it
321
+ if current_row_cells:
322
+ rows.append(TableRow(
323
+ cells=current_row_cells.copy(),
324
+ conditionals_before=conditionals_before_row.copy(),
325
+ conditionals_after=conditionals_after_row.copy()
326
+ ))
327
+ current_row_cells = []
328
+ conditionals_before_row = []
329
+ conditionals_after_row = []
330
+ else:
331
+ # Single cell on this line
332
+ if cell_content:
333
+ current_cell_lines.append(cell_content)
334
+ # If empty, just start a new cell with no content yet
335
+
336
+ i += 1
337
+ continue
338
+
339
+ # Check for cell type specifier on its own line (e.g., "a|", "s|", "h|")
340
+ # This is actually a cell SEPARATOR with type specifier
341
+ # Example:
342
+ # |<1> ← Cell 1
343
+ # a| ← Start cell 2 with type 'a' (AsciiDoc)
344
+ # content... ← Cell 2 content
345
+ stripped_line = line.strip()
346
+ if (len(stripped_line) == 2 and
347
+ stripped_line[0] in 'ashdmev' and
348
+ stripped_line[1] == '|' and
349
+ (current_cell_lines or current_row_cells)):
350
+ # Save previous cell if we have one
351
+ if current_cell_lines:
352
+ current_row_cells.append(TableCell(
353
+ content=current_cell_lines.copy(),
354
+ conditionals=[]
355
+ ))
356
+ current_cell_lines = []
357
+
358
+ # Check if row is complete
359
+ current_row_cells, conditionals_before_row, conditionals_after_row = \
360
+ self._finalize_row_if_complete(
361
+ current_row_cells, conditionals_before_row,
362
+ conditionals_after_row, expected_columns, rows
363
+ )
364
+
365
+ # Set cell type for the NEW cell we're starting
366
+ if stripped_line[0] == 'a':
367
+ in_asciidoc_cell = True
368
+ # Start collecting content for the new cell (no content on this line)
369
+ i += 1
370
+ continue
371
+
372
+ # Regular content line (continuation of current cell)
373
+ if current_cell_lines or current_row_cells:
374
+ current_cell_lines.append(line)
375
+
376
+ i += 1
377
+
378
+ # Return None if we didn't find a proper table end
379
+ return None
380
+
381
+ def is_callout_table(self, table: AsciiDocTable) -> bool:
382
+ """
383
+ Determine if a table is a callout explanation table.
384
+ A callout table has two columns: callout number and explanation.
385
+ Accepts both callout format (<1>) and plain numbers (1).
386
+ Skips header rows if present.
387
+ """
388
+ if not table.rows:
389
+ return False
390
+
391
+ # Check if all rows have exactly 2 cells
392
+ if not all(len(row.cells) == 2 for row in table.rows):
393
+ return False
394
+
395
+ # Determine if there's a header row and skip it
396
+ has_header = self._has_header_row(table)
397
+ data_rows = table.rows[1:] if has_header else table.rows
398
+
399
+ if not data_rows:
400
+ return False
401
+
402
+ # Check if first cell of each data row is a callout number (either <1> or 1)
403
+ for row in data_rows:
404
+ first_cell = row.cells[0]
405
+ if not first_cell.content:
406
+ return False
407
+
408
+ # First line of first cell should be a callout number or plain number
409
+ first_line = first_cell.content[0].strip()
410
+ is_match, _ = self._is_callout_or_number(first_line)
411
+ if not is_match:
412
+ return False
413
+
414
+ return True
415
+
416
+ def _has_header_row(self, table: AsciiDocTable) -> bool:
417
+ """
418
+ Check if table has a header row.
419
+ Common header patterns: "Item", "Value", "Description", "Column", etc.
420
+
421
+ A row is a header if:
422
+ - It does NOT start with a callout number (<1> or 1)
423
+ - It contains common header keywords in the cells
424
+ """
425
+ if not table.rows:
426
+ return False
427
+
428
+ first_row = table.rows[0]
429
+ if not first_row.cells or len(first_row.cells) < 2:
430
+ return False
431
+
432
+ # If first cell is a callout number, this is NOT a header
433
+ first_cell = first_row.cells[0]
434
+ if first_cell.content:
435
+ first_cell_text = first_cell.content[0].strip()
436
+ is_callout, _ = self._is_callout_or_number(first_cell_text)
437
+ if is_callout:
438
+ return False
439
+
440
+ # Collect text from all cells in first row
441
+ header_text = ' '.join(
442
+ cell.content[0] if cell.content else ''
443
+ for cell in first_row.cells
444
+ ).lower()
445
+
446
+ # Check for common header keywords (as whole words)
447
+ header_keywords = ['item', 'description', 'value', 'column', 'parameter', 'field', 'name']
448
+ import re
449
+ return any(re.search(r'\b' + re.escape(keyword) + r'\b', header_text) for keyword in header_keywords)
450
+
451
+ def is_3column_callout_table(self, table: AsciiDocTable) -> bool:
452
+ """
453
+ Determine if a table is a 3-column callout explanation table.
454
+ Format: Item (number) | Value | Description
455
+
456
+ This format is used in some documentation (e.g., Debezium) where:
457
+ - Column 1: Item number (1, 2, 3...) corresponding to callout numbers
458
+ - Column 2: The value/code being explained
459
+ - Column 3: Description/explanation text
460
+ """
461
+ if not table.rows:
462
+ return False
463
+
464
+ # Determine if there's a header row
465
+ has_header = self._has_header_row(table)
466
+ data_rows = table.rows[1:] if has_header else table.rows
467
+
468
+ if not data_rows:
469
+ return False
470
+
471
+ # Check if all data rows have exactly 3 cells
472
+ if not all(len(row.cells) == 3 for row in data_rows):
473
+ return False
474
+
475
+ # Check if first cell of each data row contains a callout or plain number (1, 2, 3... or <1>, <2>...)
476
+ for row in data_rows:
477
+ first_cell = row.cells[0]
478
+ if not first_cell.content:
479
+ return False
480
+
481
+ # First line of first cell should be a callout number or plain number
482
+ first_line = first_cell.content[0].strip()
483
+ is_match, _ = self._is_callout_or_number(first_line)
484
+ if not is_match:
485
+ return False
486
+
487
+ return True
488
+
489
+ def get_table_callout_numbers(self, table: AsciiDocTable) -> List[int]:
490
+ """
491
+ Extract just the callout numbers from a table (in order, with duplicates).
492
+ Used for validation and diagnostics.
493
+
494
+ Returns:
495
+ List of callout numbers in the order they appear in the table.
496
+ Preserves duplicates to help identify table errors.
497
+ """
498
+ callout_numbers = []
499
+
500
+ # Determine if there's a header row and skip it
501
+ has_header = self._has_header_row(table)
502
+ data_rows = table.rows[1:] if has_header else table.rows
503
+
504
+ for row in data_rows:
505
+ # Handle both 2-column and 3-column tables
506
+ if len(row.cells) < 2:
507
+ continue
508
+
509
+ first_cell = row.cells[0]
510
+ if not first_cell.content:
511
+ continue
512
+
513
+ # Extract callout number (supports both <1> and 1 formats)
514
+ first_line = first_cell.content[0].strip()
515
+ is_match, callout_num = self._is_callout_or_number(first_line)
516
+ if is_match:
517
+ callout_numbers.append(callout_num)
518
+
519
+ return callout_numbers
520
+
521
+ def extract_callout_explanations_from_table(self, table: AsciiDocTable) -> Dict[int, Tuple[List[str], List[str]]]:
522
+ """
523
+ Extract callout explanations from a table.
524
+ Returns dict mapping callout number to tuple of (explanation_lines, conditionals).
525
+
526
+ The conditionals list includes any ifdef/ifndef/endif statements that should
527
+ be preserved when converting the table to other formats.
528
+
529
+ Accepts both callout format (<1>) and plain numbers (1).
530
+ Skips header rows if present.
531
+
532
+ Note: If table contains duplicate callout numbers, the last one wins.
533
+ Use get_table_callout_numbers() to detect duplicates.
534
+ """
535
+ explanations = {}
536
+
537
+ # Determine if there's a header row and skip it
538
+ has_header = self._has_header_row(table)
539
+ data_rows = table.rows[1:] if has_header else table.rows
540
+
541
+ for row in data_rows:
542
+ if len(row.cells) != 2:
543
+ continue
544
+
545
+ callout_cell = row.cells[0]
546
+ explanation_cell = row.cells[1]
547
+
548
+ # Extract callout number (supports both <1> and 1 formats)
549
+ first_line = callout_cell.content[0].strip()
550
+ is_match, callout_num = self._is_callout_or_number(first_line)
551
+ if not is_match:
552
+ continue
553
+
554
+ # Collect explanation lines, preserving blank lines and conditionals inline
555
+ # Blank lines will need to become continuation markers (+) in definition lists
556
+ explanation_lines = []
557
+ for line in explanation_cell.content:
558
+ # Preserve ALL lines including conditionals and blank lines
559
+ # Empty lines will be marked as '' which signals need for continuation marker
560
+ explanation_lines.append(line)
561
+
562
+ # Collect conditionals that appear before/after the row
563
+ row_conditionals = []
564
+ row_conditionals.extend(row.conditionals_before)
565
+ row_conditionals.extend(row.conditionals_after)
566
+
567
+ explanations[callout_num] = (explanation_lines, row_conditionals)
568
+
569
+ return explanations
570
+
571
+ def extract_3column_callout_explanations(self, table: AsciiDocTable) -> Dict[int, Tuple[List[str], List[str], List[str]]]:
572
+ """
573
+ Extract callout explanations from a 3-column table.
574
+ Returns dict mapping callout number to tuple of (value_lines, description_lines, conditionals).
575
+
576
+ Format: Item | Value | Description
577
+ - Item: Number (1, 2, 3...) or callout (<1>, <2>...) corresponding to callout number
578
+ - Value: The code/value being explained
579
+ - Description: Explanation text
580
+
581
+ The conditionals list includes any ifdef/ifndef/endif statements that should
582
+ be preserved when converting the table to other formats.
583
+
584
+ Accepts both callout format (<1>) and plain numbers (1).
585
+ """
586
+ explanations = {}
587
+
588
+ # Determine if there's a header row and skip it
589
+ has_header = self._has_header_row(table)
590
+ data_rows = table.rows[1:] if has_header else table.rows
591
+
592
+ for row in data_rows:
593
+ if len(row.cells) != 3:
594
+ continue
595
+
596
+ item_cell = row.cells[0]
597
+ value_cell = row.cells[1]
598
+ desc_cell = row.cells[2]
599
+
600
+ # Extract item number (maps to callout number) - supports both <1> and 1 formats
601
+ if not item_cell.content:
602
+ continue
603
+
604
+ item_num_str = item_cell.content[0].strip()
605
+ is_match, callout_num = self._is_callout_or_number(item_num_str)
606
+ if not is_match:
607
+ continue
608
+
609
+ # Collect value lines (column 2), preserving all content including conditionals
610
+ value_lines = []
611
+ for line in value_cell.content:
612
+ value_lines.append(line)
613
+
614
+ # Collect description lines (column 3), preserving all content including conditionals
615
+ description_lines = []
616
+ for line in desc_cell.content:
617
+ description_lines.append(line)
618
+
619
+ # Collect conditionals that appear before/after the row
620
+ row_conditionals = []
621
+ row_conditionals.extend(row.conditionals_before)
622
+ row_conditionals.extend(row.conditionals_after)
623
+
624
+ explanations[callout_num] = (value_lines, description_lines, row_conditionals)
625
+
626
+ return explanations
627
+
628
+ def find_callout_table_after_code_block(self, lines: List[str], code_block_end: int) -> Optional[AsciiDocTable]:
629
+ """
630
+ Find a callout explanation table that appears after a code block.
631
+
632
+ Args:
633
+ lines: All lines in the document
634
+ code_block_end: Line number where the code block ends
635
+
636
+ Returns:
637
+ AsciiDocTable if a callout table is found, None otherwise
638
+ """
639
+ # Skip the closing delimiter of the code block (----, ...., etc.)
640
+ i = code_block_end + 1
641
+ if i < len(lines) and lines[i].strip() in ['----', '....', '====']:
642
+ i += 1
643
+
644
+ # Skip blank lines and continuation markers after code block
645
+ while i < len(lines) and (not lines[i].strip() or lines[i].strip() == '+'):
646
+ i += 1
647
+
648
+ # Look for a table starting within the next few lines
649
+ # (allowing for possible text between code block and table)
650
+ search_limit = min(i + 10, len(lines))
651
+
652
+ for j in range(i, search_limit):
653
+ line = lines[j]
654
+
655
+ # If we encounter a list-format callout explanation, stop
656
+ # (list format takes precedence over table format further away)
657
+ if self.CALLOUT_NUMBER.match(line.strip()):
658
+ return None
659
+
660
+ # If we encounter another code block start, stop
661
+ if line.strip() in ['----', '....'] or line.strip().startswith('[source'):
662
+ return None
663
+
664
+ # Check for table delimiter
665
+ if self.TABLE_DELIMITER.match(line):
666
+ # Found a table, extract attributes and title
667
+ attributes = ""
668
+ title = ""
669
+ start_line = j
670
+
671
+ # Check line before delimiter for attributes [cols="..."]
672
+ if j > 0 and self.TABLE_START.match(lines[j - 1]):
673
+ attributes = lines[j - 1]
674
+ start_line = j - 1
675
+
676
+ # Check line before attributes for title .Title
677
+ if j > 1 and lines[j - 2].strip().startswith('.') and not lines[j - 2].strip().startswith('..'):
678
+ title = lines[j - 2].strip()
679
+ start_line = j - 2
680
+ elif j > 0 and lines[j - 1].strip().startswith('.') and not lines[j - 1].strip().startswith('..'):
681
+ # Title directly before delimiter (no attributes)
682
+ title = lines[j - 1].strip()
683
+ start_line = j - 1
684
+
685
+ table = self._parse_table(lines, start_line, j, title)
686
+ if table and (self.is_callout_table(table) or self.is_3column_callout_table(table)):
687
+ return table
688
+
689
+ # If we found a table but it's not a callout table, stop searching
690
+ break
691
+
692
+ return None
693
+
694
+ def convert_table_to_deflist(self, table: AsciiDocTable, preserve_conditionals: bool = True) -> List[str]:
695
+ """
696
+ Convert a two-column table to an AsciiDoc definition list.
697
+
698
+ Args:
699
+ table: The table to convert
700
+ preserve_conditionals: Whether to preserve ifdef/ifndef/endif statements
701
+
702
+ Returns:
703
+ List of lines representing the definition list
704
+ """
705
+ output = []
706
+
707
+ for row in table.rows:
708
+ if len(row.cells) != 2:
709
+ continue
710
+
711
+ # Add conditionals before row
712
+ if preserve_conditionals and row.conditionals_before:
713
+ output.extend(row.conditionals_before)
714
+
715
+ # First cell is the term
716
+ term_lines = row.cells[0].content
717
+ if term_lines:
718
+ output.append(term_lines[0])
719
+
720
+ # Second cell is the definition
721
+ definition_lines = row.cells[1].content
722
+ if definition_lines:
723
+ # Filter out conditionals if needed
724
+ if preserve_conditionals:
725
+ for line in definition_lines:
726
+ if self.IFDEF_PATTERN.match(line) or self.ENDIF_PATTERN.match(line):
727
+ output.append(line)
728
+ else:
729
+ output.append(f" {line}")
730
+ else:
731
+ for line in definition_lines:
732
+ if not (self.IFDEF_PATTERN.match(line) or self.ENDIF_PATTERN.match(line)):
733
+ output.append(f" {line}")
734
+
735
+ # Add conditionals after row
736
+ if preserve_conditionals and row.conditionals_after:
737
+ output.extend(row.conditionals_after)
738
+
739
+ # Add blank line between entries
740
+ output.append("")
741
+
742
+ # Remove trailing blank line
743
+ if output and not output[-1].strip():
744
+ output.pop()
745
+
746
+ return output
747
+
748
+ def convert_table_to_bullets(self, table: AsciiDocTable, preserve_conditionals: bool = True) -> List[str]:
749
+ """
750
+ Convert a two-column table to a bulleted list.
751
+
752
+ Args:
753
+ table: The table to convert
754
+ preserve_conditionals: Whether to preserve ifdef/ifndef/endif statements
755
+
756
+ Returns:
757
+ List of lines representing the bulleted list
758
+ """
759
+ output = []
760
+
761
+ for row in table.rows:
762
+ if len(row.cells) != 2:
763
+ continue
764
+
765
+ # Add conditionals before row
766
+ if preserve_conditionals and row.conditionals_before:
767
+ output.extend(row.conditionals_before)
768
+
769
+ # Get the term (first cell)
770
+ term_lines = row.cells[0].content
771
+ term = term_lines[0] if term_lines else ""
772
+
773
+ # Get the definition (second cell)
774
+ definition_lines = row.cells[1].content
775
+
776
+ # Filter conditionals from definition if needed
777
+ filtered_def_lines = []
778
+ inline_conditionals = []
779
+
780
+ for line in definition_lines:
781
+ if self.IFDEF_PATTERN.match(line) or self.ENDIF_PATTERN.match(line):
782
+ if preserve_conditionals:
783
+ inline_conditionals.append(line)
784
+ else:
785
+ filtered_def_lines.append(line)
786
+
787
+ # Create bullet item
788
+ if filtered_def_lines:
789
+ first_line = filtered_def_lines[0]
790
+ output.append(f"* *{term}*: {first_line}")
791
+
792
+ # Add continuation lines with proper indentation
793
+ for line in filtered_def_lines[1:]:
794
+ output.append(f" {line}")
795
+
796
+ # Add inline conditionals if present
797
+ if preserve_conditionals and inline_conditionals:
798
+ output.extend(inline_conditionals)
799
+
800
+ # Add conditionals after row
801
+ if preserve_conditionals and row.conditionals_after:
802
+ output.extend(row.conditionals_after)
803
+
804
+ return output