rolfedh-doc-utils 0.1.4__py3-none-any.whl → 0.1.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- archive_unused_files.py +18 -5
- archive_unused_images.py +9 -2
- callout_lib/__init__.py +22 -0
- callout_lib/converter_bullets.py +103 -0
- callout_lib/converter_comments.py +295 -0
- callout_lib/converter_deflist.py +134 -0
- callout_lib/detector.py +364 -0
- callout_lib/table_parser.py +804 -0
- check_published_links.py +1083 -0
- check_scannability.py +6 -0
- check_source_directives.py +101 -0
- convert_callouts_interactive.py +567 -0
- convert_callouts_to_deflist.py +628 -0
- convert_freemarker_to_asciidoc.py +288 -0
- convert_tables_to_deflists.py +479 -0
- doc_utils/convert_freemarker_to_asciidoc.py +708 -0
- doc_utils/duplicate_content.py +409 -0
- doc_utils/duplicate_includes.py +347 -0
- doc_utils/extract_link_attributes.py +618 -0
- doc_utils/format_asciidoc_spacing.py +285 -0
- doc_utils/insert_abstract_role.py +220 -0
- doc_utils/inventory_conditionals.py +164 -0
- doc_utils/missing_source_directive.py +211 -0
- doc_utils/replace_link_attributes.py +187 -0
- doc_utils/spinner.py +119 -0
- doc_utils/unused_adoc.py +150 -22
- doc_utils/unused_attributes.py +218 -6
- doc_utils/unused_images.py +81 -9
- doc_utils/validate_links.py +576 -0
- doc_utils/version.py +8 -0
- doc_utils/version_check.py +243 -0
- doc_utils/warnings_report.py +237 -0
- doc_utils_cli.py +158 -0
- extract_link_attributes.py +120 -0
- find_duplicate_content.py +209 -0
- find_duplicate_includes.py +198 -0
- find_unused_attributes.py +84 -6
- format_asciidoc_spacing.py +134 -0
- insert_abstract_role.py +163 -0
- inventory_conditionals.py +53 -0
- replace_link_attributes.py +214 -0
- rolfedh_doc_utils-0.1.41.dist-info/METADATA +246 -0
- rolfedh_doc_utils-0.1.41.dist-info/RECORD +52 -0
- {rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/WHEEL +1 -1
- rolfedh_doc_utils-0.1.41.dist-info/entry_points.txt +20 -0
- rolfedh_doc_utils-0.1.41.dist-info/top_level.txt +21 -0
- validate_links.py +213 -0
- rolfedh_doc_utils-0.1.4.dist-info/METADATA +0 -285
- rolfedh_doc_utils-0.1.4.dist-info/RECORD +0 -17
- rolfedh_doc_utils-0.1.4.dist-info/entry_points.txt +0 -5
- rolfedh_doc_utils-0.1.4.dist-info/top_level.txt +0 -5
- {rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,804 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AsciiDoc Table Parser Module
|
|
3
|
+
|
|
4
|
+
Parses AsciiDoc tables and extracts structured data. Designed to be reusable
|
|
5
|
+
for various table conversion tasks (not just callout explanations).
|
|
6
|
+
|
|
7
|
+
Handles:
|
|
8
|
+
- Two-column tables with callout numbers and explanations
|
|
9
|
+
- Conditional statements (ifdef, ifndef, endif) within table cells
|
|
10
|
+
- Multi-line table cells
|
|
11
|
+
- Table attributes and formatting
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import re
|
|
15
|
+
from typing import List, Dict, Tuple, Optional
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class TableCell:
|
|
21
|
+
"""Represents a single table cell with its content and any conditional blocks."""
|
|
22
|
+
content: List[str] # Lines of content in the cell
|
|
23
|
+
conditionals: List[str] # Any ifdef/ifndef/endif lines associated with this cell
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class TableRow:
|
|
28
|
+
"""Represents a table row with cells."""
|
|
29
|
+
cells: List[TableCell]
|
|
30
|
+
conditionals_before: List[str] # Conditional statements before this row
|
|
31
|
+
conditionals_after: List[str] # Conditional statements after this row
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class AsciiDocTable:
|
|
36
|
+
"""Represents a complete AsciiDoc table."""
|
|
37
|
+
start_line: int
|
|
38
|
+
end_line: int
|
|
39
|
+
attributes: str # Table attributes like [cols="1,3"]
|
|
40
|
+
rows: List[TableRow]
|
|
41
|
+
title: str = "" # Block title like ".Table description"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class TableParser:
|
|
45
|
+
"""Parses AsciiDoc tables and extracts structured data."""
|
|
46
|
+
|
|
47
|
+
# Pattern for table start delimiter with optional attributes
|
|
48
|
+
TABLE_START = re.compile(r'^\[.*?\]$')
|
|
49
|
+
TABLE_DELIMITER = re.compile(r'^\|===\s*$')
|
|
50
|
+
|
|
51
|
+
# Pattern for table cell separator (| or cell type specifier like a|, s|, etc.)
|
|
52
|
+
CELL_SEPARATOR = re.compile(r'^(\||[ashdmev]\|)')
|
|
53
|
+
|
|
54
|
+
# Pattern for conditional directives
|
|
55
|
+
IFDEF_PATTERN = re.compile(r'^(ifdef::|ifndef::).+\[\]\s*$')
|
|
56
|
+
ENDIF_PATTERN = re.compile(r'^endif::\[\]\s*$')
|
|
57
|
+
|
|
58
|
+
# Pattern for callout number (used for callout table detection)
|
|
59
|
+
CALLOUT_NUMBER = re.compile(r'^<(\d+)>\s*$')
|
|
60
|
+
PLAIN_NUMBER = re.compile(r'^(\d+)\s*$')
|
|
61
|
+
|
|
62
|
+
def _is_callout_or_number(self, text: str) -> tuple[bool, int]:
|
|
63
|
+
"""
|
|
64
|
+
Check if text is a callout number (<1>) or plain number (1).
|
|
65
|
+
Returns (is_match, number) or (False, 0) if no match.
|
|
66
|
+
"""
|
|
67
|
+
text = text.strip()
|
|
68
|
+
|
|
69
|
+
# Try callout format first: <1>
|
|
70
|
+
match = self.CALLOUT_NUMBER.match(text)
|
|
71
|
+
if match:
|
|
72
|
+
return (True, int(match.group(1)))
|
|
73
|
+
|
|
74
|
+
# Try plain number format: 1
|
|
75
|
+
match = self.PLAIN_NUMBER.match(text)
|
|
76
|
+
if match:
|
|
77
|
+
return (True, int(match.group(1)))
|
|
78
|
+
|
|
79
|
+
return (False, 0)
|
|
80
|
+
|
|
81
|
+
def _finalize_row_if_complete(self, current_row_cells, conditionals_before_row,
|
|
82
|
+
conditionals_after_row, expected_columns, rows):
|
|
83
|
+
"""
|
|
84
|
+
Check if we have enough cells for a complete row, and if so, save it.
|
|
85
|
+
|
|
86
|
+
Returns: (new_current_row_cells, new_conditionals_before, new_conditionals_after)
|
|
87
|
+
"""
|
|
88
|
+
if expected_columns > 0 and len(current_row_cells) >= expected_columns:
|
|
89
|
+
# Row is complete - save it
|
|
90
|
+
rows.append(TableRow(
|
|
91
|
+
cells=current_row_cells.copy(),
|
|
92
|
+
conditionals_before=conditionals_before_row.copy(),
|
|
93
|
+
conditionals_after=conditionals_after_row.copy()
|
|
94
|
+
))
|
|
95
|
+
return [], [], [] # Reset for next row
|
|
96
|
+
|
|
97
|
+
# Row not complete yet
|
|
98
|
+
return current_row_cells, conditionals_before_row, conditionals_after_row
|
|
99
|
+
|
|
100
|
+
def _parse_column_count(self, attributes: str) -> int:
|
|
101
|
+
"""
|
|
102
|
+
Parse the cols attribute to determine number of columns.
|
|
103
|
+
|
|
104
|
+
Example: '[cols="1,7a"]' returns 2
|
|
105
|
+
'[cols="1,2,3"]' returns 3
|
|
106
|
+
"""
|
|
107
|
+
import re
|
|
108
|
+
# Match cols="..." or cols='...'
|
|
109
|
+
match = re.search(r'cols=["\']([^"\']+)["\']', attributes)
|
|
110
|
+
if not match:
|
|
111
|
+
return 0 # Unknown column count
|
|
112
|
+
|
|
113
|
+
cols_spec = match.group(1)
|
|
114
|
+
# Count comma-separated values
|
|
115
|
+
# Handle formats like: "1,2", "1a,2a", "1,2,3", etc.
|
|
116
|
+
columns = cols_spec.split(',')
|
|
117
|
+
return len(columns)
|
|
118
|
+
|
|
119
|
+
def find_tables(self, lines: List[str]) -> List[AsciiDocTable]:
|
|
120
|
+
"""Find all tables in the document."""
|
|
121
|
+
tables = []
|
|
122
|
+
i = 0
|
|
123
|
+
|
|
124
|
+
while i < len(lines):
|
|
125
|
+
# Look for table delimiter
|
|
126
|
+
if self.TABLE_DELIMITER.match(lines[i]):
|
|
127
|
+
# Check for attributes and title before the table
|
|
128
|
+
attributes = ""
|
|
129
|
+
title = ""
|
|
130
|
+
start_line = i
|
|
131
|
+
|
|
132
|
+
# Check line before delimiter for attributes [cols="..."]
|
|
133
|
+
if i > 0 and self.TABLE_START.match(lines[i - 1]):
|
|
134
|
+
attributes = lines[i - 1]
|
|
135
|
+
start_line = i - 1
|
|
136
|
+
|
|
137
|
+
# Check line before attributes for title .Title
|
|
138
|
+
if i > 1 and lines[i - 2].strip().startswith('.') and not lines[i - 2].strip().startswith('..'):
|
|
139
|
+
title = lines[i - 2].strip()
|
|
140
|
+
start_line = i - 2
|
|
141
|
+
elif i > 0 and lines[i - 1].strip().startswith('.') and not lines[i - 1].strip().startswith('..'):
|
|
142
|
+
# Title directly before delimiter (no attributes)
|
|
143
|
+
title = lines[i - 1].strip()
|
|
144
|
+
start_line = i - 1
|
|
145
|
+
|
|
146
|
+
# Parse table content
|
|
147
|
+
table = self._parse_table(lines, start_line, i, title)
|
|
148
|
+
if table:
|
|
149
|
+
tables.append(table)
|
|
150
|
+
i = table.end_line + 1
|
|
151
|
+
continue
|
|
152
|
+
i += 1
|
|
153
|
+
|
|
154
|
+
return tables
|
|
155
|
+
|
|
156
|
+
def _parse_table(self, lines: List[str], start_line: int, delimiter_line: int, title: str = "") -> Optional[AsciiDocTable]:
|
|
157
|
+
"""
|
|
158
|
+
Parse a single table starting at the delimiter.
|
|
159
|
+
|
|
160
|
+
AsciiDoc table format:
|
|
161
|
+
.Optional title
|
|
162
|
+
[optional attributes]
|
|
163
|
+
|===
|
|
164
|
+
|Cell1
|
|
165
|
+
|Cell2
|
|
166
|
+
(blank line separates rows)
|
|
167
|
+
|Cell3
|
|
168
|
+
|Cell4
|
|
169
|
+
|===
|
|
170
|
+
"""
|
|
171
|
+
# Get attributes and parse column count
|
|
172
|
+
attributes = ""
|
|
173
|
+
if start_line < delimiter_line:
|
|
174
|
+
# Check if start line is title or attributes
|
|
175
|
+
start_content = lines[start_line].strip()
|
|
176
|
+
if start_content.startswith('.') and not start_content.startswith('..'):
|
|
177
|
+
# Start line is title, attributes might be on next line
|
|
178
|
+
if start_line + 1 < delimiter_line:
|
|
179
|
+
attributes = lines[start_line + 1]
|
|
180
|
+
else:
|
|
181
|
+
# Start line is attributes
|
|
182
|
+
attributes = lines[start_line]
|
|
183
|
+
|
|
184
|
+
expected_columns = self._parse_column_count(attributes)
|
|
185
|
+
|
|
186
|
+
i = delimiter_line + 1
|
|
187
|
+
rows = []
|
|
188
|
+
current_row_cells = []
|
|
189
|
+
current_cell_lines = []
|
|
190
|
+
conditionals_before_row = []
|
|
191
|
+
conditionals_after_row = []
|
|
192
|
+
in_asciidoc_cell = False # Track if we're in an a| (AsciiDoc) cell
|
|
193
|
+
|
|
194
|
+
while i < len(lines):
|
|
195
|
+
line = lines[i]
|
|
196
|
+
|
|
197
|
+
# Check for table end
|
|
198
|
+
if self.TABLE_DELIMITER.match(line):
|
|
199
|
+
# Save any pending cell
|
|
200
|
+
if current_cell_lines:
|
|
201
|
+
current_row_cells.append(TableCell(
|
|
202
|
+
content=current_cell_lines.copy(),
|
|
203
|
+
conditionals=[]
|
|
204
|
+
))
|
|
205
|
+
current_cell_lines = []
|
|
206
|
+
|
|
207
|
+
# Save any pending row
|
|
208
|
+
if current_row_cells:
|
|
209
|
+
rows.append(TableRow(
|
|
210
|
+
cells=current_row_cells.copy(),
|
|
211
|
+
conditionals_before=conditionals_before_row.copy(),
|
|
212
|
+
conditionals_after=conditionals_after_row.copy()
|
|
213
|
+
))
|
|
214
|
+
|
|
215
|
+
# Get attributes if present (already extracted above)
|
|
216
|
+
return AsciiDocTable(
|
|
217
|
+
start_line=start_line,
|
|
218
|
+
end_line=i,
|
|
219
|
+
attributes=attributes,
|
|
220
|
+
rows=rows,
|
|
221
|
+
title=title
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
# Check for conditional directives
|
|
225
|
+
if self.IFDEF_PATTERN.match(line) or self.ENDIF_PATTERN.match(line):
|
|
226
|
+
# If we're building a cell (current_cell_lines is not empty) OR
|
|
227
|
+
# we're in an AsciiDoc cell, add conditional to cell content
|
|
228
|
+
if current_cell_lines or in_asciidoc_cell:
|
|
229
|
+
# Inside a cell - conditional is part of cell content
|
|
230
|
+
current_cell_lines.append(line)
|
|
231
|
+
elif current_row_cells:
|
|
232
|
+
# Between cells in the same row
|
|
233
|
+
conditionals_after_row.append(line)
|
|
234
|
+
else:
|
|
235
|
+
# Conditional before any cells in this row
|
|
236
|
+
conditionals_before_row.append(line)
|
|
237
|
+
i += 1
|
|
238
|
+
continue
|
|
239
|
+
|
|
240
|
+
# Blank line handling
|
|
241
|
+
if not line.strip():
|
|
242
|
+
# In AsciiDoc cells (a|), blank lines are part of cell content
|
|
243
|
+
if in_asciidoc_cell:
|
|
244
|
+
current_cell_lines.append(line)
|
|
245
|
+
i += 1
|
|
246
|
+
continue
|
|
247
|
+
|
|
248
|
+
# Otherwise, blank line separates rows
|
|
249
|
+
# Save pending cell if exists
|
|
250
|
+
if current_cell_lines:
|
|
251
|
+
current_row_cells.append(TableCell(
|
|
252
|
+
content=current_cell_lines.copy(),
|
|
253
|
+
conditionals=[]
|
|
254
|
+
))
|
|
255
|
+
current_cell_lines = []
|
|
256
|
+
in_asciidoc_cell = False
|
|
257
|
+
|
|
258
|
+
# Save row if we have cells
|
|
259
|
+
if current_row_cells:
|
|
260
|
+
rows.append(TableRow(
|
|
261
|
+
cells=current_row_cells.copy(),
|
|
262
|
+
conditionals_before=conditionals_before_row.copy(),
|
|
263
|
+
conditionals_after=conditionals_after_row.copy()
|
|
264
|
+
))
|
|
265
|
+
current_row_cells = []
|
|
266
|
+
conditionals_before_row = []
|
|
267
|
+
conditionals_after_row = []
|
|
268
|
+
|
|
269
|
+
i += 1
|
|
270
|
+
continue
|
|
271
|
+
|
|
272
|
+
# Check for cell separator (|) or cell type specifier (a|, s|, etc.)
|
|
273
|
+
if self.CELL_SEPARATOR.match(line):
|
|
274
|
+
# Determine if line starts with | or with a cell type specifier
|
|
275
|
+
if line.startswith('|'):
|
|
276
|
+
# Standard cell separator
|
|
277
|
+
cell_content = line[1:] # Remove leading |
|
|
278
|
+
else:
|
|
279
|
+
# Cell type specifier without leading | (e.g., "a|text")
|
|
280
|
+
cell_content = line
|
|
281
|
+
|
|
282
|
+
# Save previous cell if exists
|
|
283
|
+
if current_cell_lines:
|
|
284
|
+
current_row_cells.append(TableCell(
|
|
285
|
+
content=current_cell_lines.copy(),
|
|
286
|
+
conditionals=[]
|
|
287
|
+
))
|
|
288
|
+
current_cell_lines = []
|
|
289
|
+
in_asciidoc_cell = False # Reset for next cell
|
|
290
|
+
|
|
291
|
+
# Check if row is complete (have enough cells based on cols attribute)
|
|
292
|
+
current_row_cells, conditionals_before_row, conditionals_after_row = \
|
|
293
|
+
self._finalize_row_if_complete(
|
|
294
|
+
current_row_cells, conditionals_before_row,
|
|
295
|
+
conditionals_after_row, expected_columns, rows
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
# Check for cell type specifier (a|, s|, etc.)
|
|
299
|
+
# Type specifiers are single characters followed by |
|
|
300
|
+
if len(cell_content) > 1 and cell_content[0] in 'ashdmev' and cell_content[1] == '|':
|
|
301
|
+
# Track if this is an AsciiDoc cell (a|)
|
|
302
|
+
if cell_content[0] == 'a':
|
|
303
|
+
in_asciidoc_cell = True
|
|
304
|
+
cell_content = cell_content[2:] # Remove type specifier and |
|
|
305
|
+
|
|
306
|
+
cell_content = cell_content.strip()
|
|
307
|
+
|
|
308
|
+
# Check if there are multiple cells on the same line (e.g., |Cell1 |Cell2 |Cell3)
|
|
309
|
+
if '|' in cell_content:
|
|
310
|
+
# Split by | to get multiple cells
|
|
311
|
+
parts = cell_content.split('|')
|
|
312
|
+
for part in parts:
|
|
313
|
+
part = part.strip()
|
|
314
|
+
if part: # Skip empty parts
|
|
315
|
+
current_row_cells.append(TableCell(
|
|
316
|
+
content=[part],
|
|
317
|
+
conditionals=[]
|
|
318
|
+
))
|
|
319
|
+
|
|
320
|
+
# Multi-cell line completes a row - finalize it
|
|
321
|
+
if current_row_cells:
|
|
322
|
+
rows.append(TableRow(
|
|
323
|
+
cells=current_row_cells.copy(),
|
|
324
|
+
conditionals_before=conditionals_before_row.copy(),
|
|
325
|
+
conditionals_after=conditionals_after_row.copy()
|
|
326
|
+
))
|
|
327
|
+
current_row_cells = []
|
|
328
|
+
conditionals_before_row = []
|
|
329
|
+
conditionals_after_row = []
|
|
330
|
+
else:
|
|
331
|
+
# Single cell on this line
|
|
332
|
+
if cell_content:
|
|
333
|
+
current_cell_lines.append(cell_content)
|
|
334
|
+
# If empty, just start a new cell with no content yet
|
|
335
|
+
|
|
336
|
+
i += 1
|
|
337
|
+
continue
|
|
338
|
+
|
|
339
|
+
# Check for cell type specifier on its own line (e.g., "a|", "s|", "h|")
|
|
340
|
+
# This is actually a cell SEPARATOR with type specifier
|
|
341
|
+
# Example:
|
|
342
|
+
# |<1> ← Cell 1
|
|
343
|
+
# a| ← Start cell 2 with type 'a' (AsciiDoc)
|
|
344
|
+
# content... ← Cell 2 content
|
|
345
|
+
stripped_line = line.strip()
|
|
346
|
+
if (len(stripped_line) == 2 and
|
|
347
|
+
stripped_line[0] in 'ashdmev' and
|
|
348
|
+
stripped_line[1] == '|' and
|
|
349
|
+
(current_cell_lines or current_row_cells)):
|
|
350
|
+
# Save previous cell if we have one
|
|
351
|
+
if current_cell_lines:
|
|
352
|
+
current_row_cells.append(TableCell(
|
|
353
|
+
content=current_cell_lines.copy(),
|
|
354
|
+
conditionals=[]
|
|
355
|
+
))
|
|
356
|
+
current_cell_lines = []
|
|
357
|
+
|
|
358
|
+
# Check if row is complete
|
|
359
|
+
current_row_cells, conditionals_before_row, conditionals_after_row = \
|
|
360
|
+
self._finalize_row_if_complete(
|
|
361
|
+
current_row_cells, conditionals_before_row,
|
|
362
|
+
conditionals_after_row, expected_columns, rows
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
# Set cell type for the NEW cell we're starting
|
|
366
|
+
if stripped_line[0] == 'a':
|
|
367
|
+
in_asciidoc_cell = True
|
|
368
|
+
# Start collecting content for the new cell (no content on this line)
|
|
369
|
+
i += 1
|
|
370
|
+
continue
|
|
371
|
+
|
|
372
|
+
# Regular content line (continuation of current cell)
|
|
373
|
+
if current_cell_lines or current_row_cells:
|
|
374
|
+
current_cell_lines.append(line)
|
|
375
|
+
|
|
376
|
+
i += 1
|
|
377
|
+
|
|
378
|
+
# Return None if we didn't find a proper table end
|
|
379
|
+
return None
|
|
380
|
+
|
|
381
|
+
def is_callout_table(self, table: AsciiDocTable) -> bool:
|
|
382
|
+
"""
|
|
383
|
+
Determine if a table is a callout explanation table.
|
|
384
|
+
A callout table has two columns: callout number and explanation.
|
|
385
|
+
Accepts both callout format (<1>) and plain numbers (1).
|
|
386
|
+
Skips header rows if present.
|
|
387
|
+
"""
|
|
388
|
+
if not table.rows:
|
|
389
|
+
return False
|
|
390
|
+
|
|
391
|
+
# Check if all rows have exactly 2 cells
|
|
392
|
+
if not all(len(row.cells) == 2 for row in table.rows):
|
|
393
|
+
return False
|
|
394
|
+
|
|
395
|
+
# Determine if there's a header row and skip it
|
|
396
|
+
has_header = self._has_header_row(table)
|
|
397
|
+
data_rows = table.rows[1:] if has_header else table.rows
|
|
398
|
+
|
|
399
|
+
if not data_rows:
|
|
400
|
+
return False
|
|
401
|
+
|
|
402
|
+
# Check if first cell of each data row is a callout number (either <1> or 1)
|
|
403
|
+
for row in data_rows:
|
|
404
|
+
first_cell = row.cells[0]
|
|
405
|
+
if not first_cell.content:
|
|
406
|
+
return False
|
|
407
|
+
|
|
408
|
+
# First line of first cell should be a callout number or plain number
|
|
409
|
+
first_line = first_cell.content[0].strip()
|
|
410
|
+
is_match, _ = self._is_callout_or_number(first_line)
|
|
411
|
+
if not is_match:
|
|
412
|
+
return False
|
|
413
|
+
|
|
414
|
+
return True
|
|
415
|
+
|
|
416
|
+
def _has_header_row(self, table: AsciiDocTable) -> bool:
|
|
417
|
+
"""
|
|
418
|
+
Check if table has a header row.
|
|
419
|
+
Common header patterns: "Item", "Value", "Description", "Column", etc.
|
|
420
|
+
|
|
421
|
+
A row is a header if:
|
|
422
|
+
- It does NOT start with a callout number (<1> or 1)
|
|
423
|
+
- It contains common header keywords in the cells
|
|
424
|
+
"""
|
|
425
|
+
if not table.rows:
|
|
426
|
+
return False
|
|
427
|
+
|
|
428
|
+
first_row = table.rows[0]
|
|
429
|
+
if not first_row.cells or len(first_row.cells) < 2:
|
|
430
|
+
return False
|
|
431
|
+
|
|
432
|
+
# If first cell is a callout number, this is NOT a header
|
|
433
|
+
first_cell = first_row.cells[0]
|
|
434
|
+
if first_cell.content:
|
|
435
|
+
first_cell_text = first_cell.content[0].strip()
|
|
436
|
+
is_callout, _ = self._is_callout_or_number(first_cell_text)
|
|
437
|
+
if is_callout:
|
|
438
|
+
return False
|
|
439
|
+
|
|
440
|
+
# Collect text from all cells in first row
|
|
441
|
+
header_text = ' '.join(
|
|
442
|
+
cell.content[0] if cell.content else ''
|
|
443
|
+
for cell in first_row.cells
|
|
444
|
+
).lower()
|
|
445
|
+
|
|
446
|
+
# Check for common header keywords (as whole words)
|
|
447
|
+
header_keywords = ['item', 'description', 'value', 'column', 'parameter', 'field', 'name']
|
|
448
|
+
import re
|
|
449
|
+
return any(re.search(r'\b' + re.escape(keyword) + r'\b', header_text) for keyword in header_keywords)
|
|
450
|
+
|
|
451
|
+
def is_3column_callout_table(self, table: AsciiDocTable) -> bool:
|
|
452
|
+
"""
|
|
453
|
+
Determine if a table is a 3-column callout explanation table.
|
|
454
|
+
Format: Item (number) | Value | Description
|
|
455
|
+
|
|
456
|
+
This format is used in some documentation (e.g., Debezium) where:
|
|
457
|
+
- Column 1: Item number (1, 2, 3...) corresponding to callout numbers
|
|
458
|
+
- Column 2: The value/code being explained
|
|
459
|
+
- Column 3: Description/explanation text
|
|
460
|
+
"""
|
|
461
|
+
if not table.rows:
|
|
462
|
+
return False
|
|
463
|
+
|
|
464
|
+
# Determine if there's a header row
|
|
465
|
+
has_header = self._has_header_row(table)
|
|
466
|
+
data_rows = table.rows[1:] if has_header else table.rows
|
|
467
|
+
|
|
468
|
+
if not data_rows:
|
|
469
|
+
return False
|
|
470
|
+
|
|
471
|
+
# Check if all data rows have exactly 3 cells
|
|
472
|
+
if not all(len(row.cells) == 3 for row in data_rows):
|
|
473
|
+
return False
|
|
474
|
+
|
|
475
|
+
# Check if first cell of each data row contains a callout or plain number (1, 2, 3... or <1>, <2>...)
|
|
476
|
+
for row in data_rows:
|
|
477
|
+
first_cell = row.cells[0]
|
|
478
|
+
if not first_cell.content:
|
|
479
|
+
return False
|
|
480
|
+
|
|
481
|
+
# First line of first cell should be a callout number or plain number
|
|
482
|
+
first_line = first_cell.content[0].strip()
|
|
483
|
+
is_match, _ = self._is_callout_or_number(first_line)
|
|
484
|
+
if not is_match:
|
|
485
|
+
return False
|
|
486
|
+
|
|
487
|
+
return True
|
|
488
|
+
|
|
489
|
+
def get_table_callout_numbers(self, table: AsciiDocTable) -> List[int]:
|
|
490
|
+
"""
|
|
491
|
+
Extract just the callout numbers from a table (in order, with duplicates).
|
|
492
|
+
Used for validation and diagnostics.
|
|
493
|
+
|
|
494
|
+
Returns:
|
|
495
|
+
List of callout numbers in the order they appear in the table.
|
|
496
|
+
Preserves duplicates to help identify table errors.
|
|
497
|
+
"""
|
|
498
|
+
callout_numbers = []
|
|
499
|
+
|
|
500
|
+
# Determine if there's a header row and skip it
|
|
501
|
+
has_header = self._has_header_row(table)
|
|
502
|
+
data_rows = table.rows[1:] if has_header else table.rows
|
|
503
|
+
|
|
504
|
+
for row in data_rows:
|
|
505
|
+
# Handle both 2-column and 3-column tables
|
|
506
|
+
if len(row.cells) < 2:
|
|
507
|
+
continue
|
|
508
|
+
|
|
509
|
+
first_cell = row.cells[0]
|
|
510
|
+
if not first_cell.content:
|
|
511
|
+
continue
|
|
512
|
+
|
|
513
|
+
# Extract callout number (supports both <1> and 1 formats)
|
|
514
|
+
first_line = first_cell.content[0].strip()
|
|
515
|
+
is_match, callout_num = self._is_callout_or_number(first_line)
|
|
516
|
+
if is_match:
|
|
517
|
+
callout_numbers.append(callout_num)
|
|
518
|
+
|
|
519
|
+
return callout_numbers
|
|
520
|
+
|
|
521
|
+
def extract_callout_explanations_from_table(self, table: AsciiDocTable) -> Dict[int, Tuple[List[str], List[str]]]:
|
|
522
|
+
"""
|
|
523
|
+
Extract callout explanations from a table.
|
|
524
|
+
Returns dict mapping callout number to tuple of (explanation_lines, conditionals).
|
|
525
|
+
|
|
526
|
+
The conditionals list includes any ifdef/ifndef/endif statements that should
|
|
527
|
+
be preserved when converting the table to other formats.
|
|
528
|
+
|
|
529
|
+
Accepts both callout format (<1>) and plain numbers (1).
|
|
530
|
+
Skips header rows if present.
|
|
531
|
+
|
|
532
|
+
Note: If table contains duplicate callout numbers, the last one wins.
|
|
533
|
+
Use get_table_callout_numbers() to detect duplicates.
|
|
534
|
+
"""
|
|
535
|
+
explanations = {}
|
|
536
|
+
|
|
537
|
+
# Determine if there's a header row and skip it
|
|
538
|
+
has_header = self._has_header_row(table)
|
|
539
|
+
data_rows = table.rows[1:] if has_header else table.rows
|
|
540
|
+
|
|
541
|
+
for row in data_rows:
|
|
542
|
+
if len(row.cells) != 2:
|
|
543
|
+
continue
|
|
544
|
+
|
|
545
|
+
callout_cell = row.cells[0]
|
|
546
|
+
explanation_cell = row.cells[1]
|
|
547
|
+
|
|
548
|
+
# Extract callout number (supports both <1> and 1 formats)
|
|
549
|
+
first_line = callout_cell.content[0].strip()
|
|
550
|
+
is_match, callout_num = self._is_callout_or_number(first_line)
|
|
551
|
+
if not is_match:
|
|
552
|
+
continue
|
|
553
|
+
|
|
554
|
+
# Collect explanation lines, preserving blank lines and conditionals inline
|
|
555
|
+
# Blank lines will need to become continuation markers (+) in definition lists
|
|
556
|
+
explanation_lines = []
|
|
557
|
+
for line in explanation_cell.content:
|
|
558
|
+
# Preserve ALL lines including conditionals and blank lines
|
|
559
|
+
# Empty lines will be marked as '' which signals need for continuation marker
|
|
560
|
+
explanation_lines.append(line)
|
|
561
|
+
|
|
562
|
+
# Collect conditionals that appear before/after the row
|
|
563
|
+
row_conditionals = []
|
|
564
|
+
row_conditionals.extend(row.conditionals_before)
|
|
565
|
+
row_conditionals.extend(row.conditionals_after)
|
|
566
|
+
|
|
567
|
+
explanations[callout_num] = (explanation_lines, row_conditionals)
|
|
568
|
+
|
|
569
|
+
return explanations
|
|
570
|
+
|
|
571
|
+
def extract_3column_callout_explanations(self, table: AsciiDocTable) -> Dict[int, Tuple[List[str], List[str], List[str]]]:
|
|
572
|
+
"""
|
|
573
|
+
Extract callout explanations from a 3-column table.
|
|
574
|
+
Returns dict mapping callout number to tuple of (value_lines, description_lines, conditionals).
|
|
575
|
+
|
|
576
|
+
Format: Item | Value | Description
|
|
577
|
+
- Item: Number (1, 2, 3...) or callout (<1>, <2>...) corresponding to callout number
|
|
578
|
+
- Value: The code/value being explained
|
|
579
|
+
- Description: Explanation text
|
|
580
|
+
|
|
581
|
+
The conditionals list includes any ifdef/ifndef/endif statements that should
|
|
582
|
+
be preserved when converting the table to other formats.
|
|
583
|
+
|
|
584
|
+
Accepts both callout format (<1>) and plain numbers (1).
|
|
585
|
+
"""
|
|
586
|
+
explanations = {}
|
|
587
|
+
|
|
588
|
+
# Determine if there's a header row and skip it
|
|
589
|
+
has_header = self._has_header_row(table)
|
|
590
|
+
data_rows = table.rows[1:] if has_header else table.rows
|
|
591
|
+
|
|
592
|
+
for row in data_rows:
|
|
593
|
+
if len(row.cells) != 3:
|
|
594
|
+
continue
|
|
595
|
+
|
|
596
|
+
item_cell = row.cells[0]
|
|
597
|
+
value_cell = row.cells[1]
|
|
598
|
+
desc_cell = row.cells[2]
|
|
599
|
+
|
|
600
|
+
# Extract item number (maps to callout number) - supports both <1> and 1 formats
|
|
601
|
+
if not item_cell.content:
|
|
602
|
+
continue
|
|
603
|
+
|
|
604
|
+
item_num_str = item_cell.content[0].strip()
|
|
605
|
+
is_match, callout_num = self._is_callout_or_number(item_num_str)
|
|
606
|
+
if not is_match:
|
|
607
|
+
continue
|
|
608
|
+
|
|
609
|
+
# Collect value lines (column 2), preserving all content including conditionals
|
|
610
|
+
value_lines = []
|
|
611
|
+
for line in value_cell.content:
|
|
612
|
+
value_lines.append(line)
|
|
613
|
+
|
|
614
|
+
# Collect description lines (column 3), preserving all content including conditionals
|
|
615
|
+
description_lines = []
|
|
616
|
+
for line in desc_cell.content:
|
|
617
|
+
description_lines.append(line)
|
|
618
|
+
|
|
619
|
+
# Collect conditionals that appear before/after the row
|
|
620
|
+
row_conditionals = []
|
|
621
|
+
row_conditionals.extend(row.conditionals_before)
|
|
622
|
+
row_conditionals.extend(row.conditionals_after)
|
|
623
|
+
|
|
624
|
+
explanations[callout_num] = (value_lines, description_lines, row_conditionals)
|
|
625
|
+
|
|
626
|
+
return explanations
|
|
627
|
+
|
|
628
|
+
def find_callout_table_after_code_block(self, lines: List[str], code_block_end: int) -> Optional[AsciiDocTable]:
|
|
629
|
+
"""
|
|
630
|
+
Find a callout explanation table that appears after a code block.
|
|
631
|
+
|
|
632
|
+
Args:
|
|
633
|
+
lines: All lines in the document
|
|
634
|
+
code_block_end: Line number where the code block ends
|
|
635
|
+
|
|
636
|
+
Returns:
|
|
637
|
+
AsciiDocTable if a callout table is found, None otherwise
|
|
638
|
+
"""
|
|
639
|
+
# Skip the closing delimiter of the code block (----, ...., etc.)
|
|
640
|
+
i = code_block_end + 1
|
|
641
|
+
if i < len(lines) and lines[i].strip() in ['----', '....', '====']:
|
|
642
|
+
i += 1
|
|
643
|
+
|
|
644
|
+
# Skip blank lines and continuation markers after code block
|
|
645
|
+
while i < len(lines) and (not lines[i].strip() or lines[i].strip() == '+'):
|
|
646
|
+
i += 1
|
|
647
|
+
|
|
648
|
+
# Look for a table starting within the next few lines
|
|
649
|
+
# (allowing for possible text between code block and table)
|
|
650
|
+
search_limit = min(i + 10, len(lines))
|
|
651
|
+
|
|
652
|
+
for j in range(i, search_limit):
|
|
653
|
+
line = lines[j]
|
|
654
|
+
|
|
655
|
+
# If we encounter a list-format callout explanation, stop
|
|
656
|
+
# (list format takes precedence over table format further away)
|
|
657
|
+
if self.CALLOUT_NUMBER.match(line.strip()):
|
|
658
|
+
return None
|
|
659
|
+
|
|
660
|
+
# If we encounter another code block start, stop
|
|
661
|
+
if line.strip() in ['----', '....'] or line.strip().startswith('[source'):
|
|
662
|
+
return None
|
|
663
|
+
|
|
664
|
+
# Check for table delimiter
|
|
665
|
+
if self.TABLE_DELIMITER.match(line):
|
|
666
|
+
# Found a table, extract attributes and title
|
|
667
|
+
attributes = ""
|
|
668
|
+
title = ""
|
|
669
|
+
start_line = j
|
|
670
|
+
|
|
671
|
+
# Check line before delimiter for attributes [cols="..."]
|
|
672
|
+
if j > 0 and self.TABLE_START.match(lines[j - 1]):
|
|
673
|
+
attributes = lines[j - 1]
|
|
674
|
+
start_line = j - 1
|
|
675
|
+
|
|
676
|
+
# Check line before attributes for title .Title
|
|
677
|
+
if j > 1 and lines[j - 2].strip().startswith('.') and not lines[j - 2].strip().startswith('..'):
|
|
678
|
+
title = lines[j - 2].strip()
|
|
679
|
+
start_line = j - 2
|
|
680
|
+
elif j > 0 and lines[j - 1].strip().startswith('.') and not lines[j - 1].strip().startswith('..'):
|
|
681
|
+
# Title directly before delimiter (no attributes)
|
|
682
|
+
title = lines[j - 1].strip()
|
|
683
|
+
start_line = j - 1
|
|
684
|
+
|
|
685
|
+
table = self._parse_table(lines, start_line, j, title)
|
|
686
|
+
if table and (self.is_callout_table(table) or self.is_3column_callout_table(table)):
|
|
687
|
+
return table
|
|
688
|
+
|
|
689
|
+
# If we found a table but it's not a callout table, stop searching
|
|
690
|
+
break
|
|
691
|
+
|
|
692
|
+
return None
|
|
693
|
+
|
|
694
|
+
def convert_table_to_deflist(self, table: AsciiDocTable, preserve_conditionals: bool = True) -> List[str]:
|
|
695
|
+
"""
|
|
696
|
+
Convert a two-column table to an AsciiDoc definition list.
|
|
697
|
+
|
|
698
|
+
Args:
|
|
699
|
+
table: The table to convert
|
|
700
|
+
preserve_conditionals: Whether to preserve ifdef/ifndef/endif statements
|
|
701
|
+
|
|
702
|
+
Returns:
|
|
703
|
+
List of lines representing the definition list
|
|
704
|
+
"""
|
|
705
|
+
output = []
|
|
706
|
+
|
|
707
|
+
for row in table.rows:
|
|
708
|
+
if len(row.cells) != 2:
|
|
709
|
+
continue
|
|
710
|
+
|
|
711
|
+
# Add conditionals before row
|
|
712
|
+
if preserve_conditionals and row.conditionals_before:
|
|
713
|
+
output.extend(row.conditionals_before)
|
|
714
|
+
|
|
715
|
+
# First cell is the term
|
|
716
|
+
term_lines = row.cells[0].content
|
|
717
|
+
if term_lines:
|
|
718
|
+
output.append(term_lines[0])
|
|
719
|
+
|
|
720
|
+
# Second cell is the definition
|
|
721
|
+
definition_lines = row.cells[1].content
|
|
722
|
+
if definition_lines:
|
|
723
|
+
# Filter out conditionals if needed
|
|
724
|
+
if preserve_conditionals:
|
|
725
|
+
for line in definition_lines:
|
|
726
|
+
if self.IFDEF_PATTERN.match(line) or self.ENDIF_PATTERN.match(line):
|
|
727
|
+
output.append(line)
|
|
728
|
+
else:
|
|
729
|
+
output.append(f" {line}")
|
|
730
|
+
else:
|
|
731
|
+
for line in definition_lines:
|
|
732
|
+
if not (self.IFDEF_PATTERN.match(line) or self.ENDIF_PATTERN.match(line)):
|
|
733
|
+
output.append(f" {line}")
|
|
734
|
+
|
|
735
|
+
# Add conditionals after row
|
|
736
|
+
if preserve_conditionals and row.conditionals_after:
|
|
737
|
+
output.extend(row.conditionals_after)
|
|
738
|
+
|
|
739
|
+
# Add blank line between entries
|
|
740
|
+
output.append("")
|
|
741
|
+
|
|
742
|
+
# Remove trailing blank line
|
|
743
|
+
if output and not output[-1].strip():
|
|
744
|
+
output.pop()
|
|
745
|
+
|
|
746
|
+
return output
|
|
747
|
+
|
|
748
|
+
def convert_table_to_bullets(self, table: AsciiDocTable, preserve_conditionals: bool = True) -> List[str]:
|
|
749
|
+
"""
|
|
750
|
+
Convert a two-column table to a bulleted list.
|
|
751
|
+
|
|
752
|
+
Args:
|
|
753
|
+
table: The table to convert
|
|
754
|
+
preserve_conditionals: Whether to preserve ifdef/ifndef/endif statements
|
|
755
|
+
|
|
756
|
+
Returns:
|
|
757
|
+
List of lines representing the bulleted list
|
|
758
|
+
"""
|
|
759
|
+
output = []
|
|
760
|
+
|
|
761
|
+
for row in table.rows:
|
|
762
|
+
if len(row.cells) != 2:
|
|
763
|
+
continue
|
|
764
|
+
|
|
765
|
+
# Add conditionals before row
|
|
766
|
+
if preserve_conditionals and row.conditionals_before:
|
|
767
|
+
output.extend(row.conditionals_before)
|
|
768
|
+
|
|
769
|
+
# Get the term (first cell)
|
|
770
|
+
term_lines = row.cells[0].content
|
|
771
|
+
term = term_lines[0] if term_lines else ""
|
|
772
|
+
|
|
773
|
+
# Get the definition (second cell)
|
|
774
|
+
definition_lines = row.cells[1].content
|
|
775
|
+
|
|
776
|
+
# Filter conditionals from definition if needed
|
|
777
|
+
filtered_def_lines = []
|
|
778
|
+
inline_conditionals = []
|
|
779
|
+
|
|
780
|
+
for line in definition_lines:
|
|
781
|
+
if self.IFDEF_PATTERN.match(line) or self.ENDIF_PATTERN.match(line):
|
|
782
|
+
if preserve_conditionals:
|
|
783
|
+
inline_conditionals.append(line)
|
|
784
|
+
else:
|
|
785
|
+
filtered_def_lines.append(line)
|
|
786
|
+
|
|
787
|
+
# Create bullet item
|
|
788
|
+
if filtered_def_lines:
|
|
789
|
+
first_line = filtered_def_lines[0]
|
|
790
|
+
output.append(f"* *{term}*: {first_line}")
|
|
791
|
+
|
|
792
|
+
# Add continuation lines with proper indentation
|
|
793
|
+
for line in filtered_def_lines[1:]:
|
|
794
|
+
output.append(f" {line}")
|
|
795
|
+
|
|
796
|
+
# Add inline conditionals if present
|
|
797
|
+
if preserve_conditionals and inline_conditionals:
|
|
798
|
+
output.extend(inline_conditionals)
|
|
799
|
+
|
|
800
|
+
# Add conditionals after row
|
|
801
|
+
if preserve_conditionals and row.conditionals_after:
|
|
802
|
+
output.extend(row.conditionals_after)
|
|
803
|
+
|
|
804
|
+
return output
|