rolfedh-doc-utils 0.1.23__py3-none-any.whl → 0.1.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- callout_lib/__init__.py +22 -0
- callout_lib/converter_bullets.py +95 -0
- callout_lib/converter_comments.py +295 -0
- callout_lib/converter_deflist.py +79 -0
- callout_lib/detector.py +220 -0
- convert_callouts_interactive.py +532 -0
- convert_callouts_to_deflist.py +118 -316
- {rolfedh_doc_utils-0.1.23.dist-info → rolfedh_doc_utils-0.1.25.dist-info}/METADATA +1 -1
- {rolfedh_doc_utils-0.1.23.dist-info → rolfedh_doc_utils-0.1.25.dist-info}/RECORD +13 -7
- {rolfedh_doc_utils-0.1.23.dist-info → rolfedh_doc_utils-0.1.25.dist-info}/entry_points.txt +1 -0
- {rolfedh_doc_utils-0.1.23.dist-info → rolfedh_doc_utils-0.1.25.dist-info}/top_level.txt +2 -0
- {rolfedh_doc_utils-0.1.23.dist-info → rolfedh_doc_utils-0.1.25.dist-info}/WHEEL +0 -0
- {rolfedh_doc_utils-0.1.23.dist-info → rolfedh_doc_utils-0.1.25.dist-info}/licenses/LICENSE +0 -0
callout_lib/detector.py
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Callout Detection Module
|
|
3
|
+
|
|
4
|
+
Detects code blocks with callouts and extracts callout information from AsciiDoc files.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
from typing import List, Dict, Tuple, Optional
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class Callout:
|
|
14
|
+
"""Represents a callout with its number and explanation text."""
|
|
15
|
+
number: int
|
|
16
|
+
lines: List[str] # List of lines to preserve formatting
|
|
17
|
+
is_optional: bool = False
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class CalloutGroup:
|
|
22
|
+
"""Represents one or more callouts that share the same code line."""
|
|
23
|
+
code_line: str # The actual code line (without callouts)
|
|
24
|
+
callout_numbers: List[int] # List of callout numbers on this line
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class CodeBlock:
|
|
29
|
+
"""Represents a code block with its content and metadata."""
|
|
30
|
+
start_line: int
|
|
31
|
+
end_line: int
|
|
32
|
+
delimiter: str
|
|
33
|
+
content: List[str]
|
|
34
|
+
language: Optional[str] = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class CalloutDetector:
|
|
38
|
+
"""Detects and extracts callout information from AsciiDoc code blocks."""
|
|
39
|
+
|
|
40
|
+
# Pattern for code block start: [source,language] or [source] with optional attributes
|
|
41
|
+
CODE_BLOCK_START = re.compile(r'^\[source(?:,\s*(\w+))?(?:[,\s]+[^\]]+)?\]')
|
|
42
|
+
|
|
43
|
+
# Pattern for callout number in code block (can appear multiple times per line)
|
|
44
|
+
CALLOUT_IN_CODE = re.compile(r'<(\d+)>')
|
|
45
|
+
|
|
46
|
+
# Pattern for callout explanation line: <1> Explanation text
|
|
47
|
+
CALLOUT_EXPLANATION = re.compile(r'^<(\d+)>\s+(.+)$')
|
|
48
|
+
|
|
49
|
+
# Pattern to detect user-replaceable values in angle brackets
|
|
50
|
+
# Excludes heredoc syntax (<<) and comparison operators
|
|
51
|
+
USER_VALUE_PATTERN = re.compile(r'(?<!<)<([a-zA-Z][^>]*)>')
|
|
52
|
+
|
|
53
|
+
def find_code_blocks(self, lines: List[str]) -> List[CodeBlock]:
|
|
54
|
+
"""Find all code blocks in the document."""
|
|
55
|
+
blocks = []
|
|
56
|
+
i = 0
|
|
57
|
+
|
|
58
|
+
while i < len(lines):
|
|
59
|
+
# Check for [source] prefix first
|
|
60
|
+
match = self.CODE_BLOCK_START.match(lines[i])
|
|
61
|
+
if match:
|
|
62
|
+
language = match.group(1)
|
|
63
|
+
start = i
|
|
64
|
+
i += 1
|
|
65
|
+
|
|
66
|
+
# Find the delimiter line (---- or ....)
|
|
67
|
+
if i < len(lines) and lines[i].strip() in ['----', '....']:
|
|
68
|
+
delimiter = lines[i].strip()
|
|
69
|
+
i += 1
|
|
70
|
+
content_start = i
|
|
71
|
+
|
|
72
|
+
# Find the closing delimiter
|
|
73
|
+
while i < len(lines):
|
|
74
|
+
if lines[i].strip() == delimiter:
|
|
75
|
+
content = lines[content_start:i]
|
|
76
|
+
blocks.append(CodeBlock(
|
|
77
|
+
start_line=start,
|
|
78
|
+
end_line=i,
|
|
79
|
+
delimiter=delimiter,
|
|
80
|
+
content=content,
|
|
81
|
+
language=language
|
|
82
|
+
))
|
|
83
|
+
break
|
|
84
|
+
i += 1
|
|
85
|
+
# Check for plain delimited blocks without [source] prefix
|
|
86
|
+
elif lines[i].strip() in ['----', '....']:
|
|
87
|
+
delimiter = lines[i].strip()
|
|
88
|
+
start = i
|
|
89
|
+
i += 1
|
|
90
|
+
content_start = i
|
|
91
|
+
|
|
92
|
+
# Find the closing delimiter
|
|
93
|
+
while i < len(lines):
|
|
94
|
+
if lines[i].strip() == delimiter:
|
|
95
|
+
content = lines[content_start:i]
|
|
96
|
+
# Only add if block contains callouts
|
|
97
|
+
if any(self.CALLOUT_IN_CODE.search(line) for line in content):
|
|
98
|
+
blocks.append(CodeBlock(
|
|
99
|
+
start_line=start,
|
|
100
|
+
end_line=i,
|
|
101
|
+
delimiter=delimiter,
|
|
102
|
+
content=content,
|
|
103
|
+
language=None
|
|
104
|
+
))
|
|
105
|
+
break
|
|
106
|
+
i += 1
|
|
107
|
+
i += 1
|
|
108
|
+
|
|
109
|
+
return blocks
|
|
110
|
+
|
|
111
|
+
def extract_callouts_from_code(self, content: List[str]) -> List[CalloutGroup]:
|
|
112
|
+
"""
|
|
113
|
+
Extract callout numbers from code block content.
|
|
114
|
+
Returns list of CalloutGroups, where each group contains:
|
|
115
|
+
- The code line (with user-replaceable value if found, or full line)
|
|
116
|
+
- List of callout numbers on that line
|
|
117
|
+
|
|
118
|
+
Multiple callouts on the same line are grouped together to be merged
|
|
119
|
+
in the definition list.
|
|
120
|
+
"""
|
|
121
|
+
groups = []
|
|
122
|
+
|
|
123
|
+
for line in content:
|
|
124
|
+
# Look for all callout numbers on this line
|
|
125
|
+
callout_matches = list(self.CALLOUT_IN_CODE.finditer(line))
|
|
126
|
+
if callout_matches:
|
|
127
|
+
# Remove all callouts from the line to get the actual code
|
|
128
|
+
line_without_callouts = self.CALLOUT_IN_CODE.sub('', line).strip()
|
|
129
|
+
|
|
130
|
+
# Find all angle-bracket enclosed values
|
|
131
|
+
user_values = self.USER_VALUE_PATTERN.findall(line_without_callouts)
|
|
132
|
+
|
|
133
|
+
# Determine what to use as the code line term
|
|
134
|
+
if user_values:
|
|
135
|
+
# Use the rightmost (closest to the callout) user value
|
|
136
|
+
code_line = user_values[-1]
|
|
137
|
+
else:
|
|
138
|
+
# No angle-bracket value found - use the actual code line
|
|
139
|
+
code_line = line_without_callouts
|
|
140
|
+
|
|
141
|
+
# Collect all callout numbers on this line
|
|
142
|
+
callout_nums = [int(m.group(1)) for m in callout_matches]
|
|
143
|
+
|
|
144
|
+
groups.append(CalloutGroup(
|
|
145
|
+
code_line=code_line,
|
|
146
|
+
callout_numbers=callout_nums
|
|
147
|
+
))
|
|
148
|
+
|
|
149
|
+
return groups
|
|
150
|
+
|
|
151
|
+
def extract_callout_explanations(self, lines: List[str], start_line: int) -> Tuple[Dict[int, Callout], int]:
|
|
152
|
+
"""
|
|
153
|
+
Extract callout explanations following a code block.
|
|
154
|
+
Returns dict of callouts and the line number where explanations end.
|
|
155
|
+
"""
|
|
156
|
+
explanations = {}
|
|
157
|
+
i = start_line + 1 # Start after the closing delimiter
|
|
158
|
+
|
|
159
|
+
# Skip blank lines and continuation markers (+)
|
|
160
|
+
while i < len(lines) and (not lines[i].strip() or lines[i].strip() == '+'):
|
|
161
|
+
i += 1
|
|
162
|
+
|
|
163
|
+
# Collect consecutive callout explanation lines
|
|
164
|
+
while i < len(lines):
|
|
165
|
+
match = self.CALLOUT_EXPLANATION.match(lines[i])
|
|
166
|
+
if match:
|
|
167
|
+
num = int(match.group(1))
|
|
168
|
+
first_line = match.group(2).strip()
|
|
169
|
+
explanation_lines = [first_line]
|
|
170
|
+
i += 1
|
|
171
|
+
|
|
172
|
+
# Collect continuation lines (lines that don't start with a new callout)
|
|
173
|
+
# Continue until we hit a blank line, a new callout, or certain patterns
|
|
174
|
+
while i < len(lines):
|
|
175
|
+
line = lines[i]
|
|
176
|
+
# Stop if we hit a blank line, new callout, or list start marker
|
|
177
|
+
if not line.strip() or self.CALLOUT_EXPLANATION.match(line) or line.startswith('[start='):
|
|
178
|
+
break
|
|
179
|
+
# Add continuation line preserving original formatting
|
|
180
|
+
explanation_lines.append(line)
|
|
181
|
+
i += 1
|
|
182
|
+
|
|
183
|
+
# Check if marked as optional (only in first line)
|
|
184
|
+
is_optional = False
|
|
185
|
+
if first_line.lower().startswith('optional.') or first_line.lower().startswith('optional:'):
|
|
186
|
+
is_optional = True
|
|
187
|
+
# Remove "Optional." or "Optional:" from first line
|
|
188
|
+
explanation_lines[0] = first_line[9:].strip()
|
|
189
|
+
elif '(Optional)' in first_line or '(optional)' in first_line:
|
|
190
|
+
is_optional = True
|
|
191
|
+
explanation_lines[0] = re.sub(r'\s*\(optional\)\s*', ' ', first_line, flags=re.IGNORECASE).strip()
|
|
192
|
+
|
|
193
|
+
explanations[num] = Callout(num, explanation_lines, is_optional)
|
|
194
|
+
else:
|
|
195
|
+
break
|
|
196
|
+
|
|
197
|
+
return explanations, i - 1
|
|
198
|
+
|
|
199
|
+
def remove_callouts_from_code(self, content: List[str]) -> List[str]:
|
|
200
|
+
"""Remove callout numbers from code block content (handles multiple callouts per line)."""
|
|
201
|
+
cleaned = []
|
|
202
|
+
for line in content:
|
|
203
|
+
# Remove all callout numbers and trailing whitespace
|
|
204
|
+
cleaned.append(self.CALLOUT_IN_CODE.sub('', line).rstrip())
|
|
205
|
+
return cleaned
|
|
206
|
+
|
|
207
|
+
def validate_callouts(self, callout_groups: List[CalloutGroup], explanations: Dict[int, Callout]) -> Tuple[bool, set, set]:
|
|
208
|
+
"""
|
|
209
|
+
Validate that callout numbers in code match explanation numbers.
|
|
210
|
+
Returns tuple of (is_valid, code_nums, explanation_nums).
|
|
211
|
+
"""
|
|
212
|
+
# Extract all callout numbers from groups
|
|
213
|
+
code_nums = set()
|
|
214
|
+
for group in callout_groups:
|
|
215
|
+
code_nums.update(group.callout_numbers)
|
|
216
|
+
|
|
217
|
+
explanation_nums = set(explanations.keys())
|
|
218
|
+
|
|
219
|
+
is_valid = code_nums == explanation_nums
|
|
220
|
+
return is_valid, code_nums, explanation_nums
|