rolfedh-doc-utils 0.1.24__py3-none-any.whl → 0.1.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,220 @@
1
+ """
2
+ Callout Detection Module
3
+
4
+ Detects code blocks with callouts and extracts callout information from AsciiDoc files.
5
+ """
6
+
7
+ import re
8
+ from typing import List, Dict, Tuple, Optional
9
+ from dataclasses import dataclass
10
+
11
+
12
+ @dataclass
13
+ class Callout:
14
+ """Represents a callout with its number and explanation text."""
15
+ number: int
16
+ lines: List[str] # List of lines to preserve formatting
17
+ is_optional: bool = False
18
+
19
+
20
+ @dataclass
21
+ class CalloutGroup:
22
+ """Represents one or more callouts that share the same code line."""
23
+ code_line: str # The actual code line (without callouts)
24
+ callout_numbers: List[int] # List of callout numbers on this line
25
+
26
+
27
+ @dataclass
28
+ class CodeBlock:
29
+ """Represents a code block with its content and metadata."""
30
+ start_line: int
31
+ end_line: int
32
+ delimiter: str
33
+ content: List[str]
34
+ language: Optional[str] = None
35
+
36
+
37
+ class CalloutDetector:
38
+ """Detects and extracts callout information from AsciiDoc code blocks."""
39
+
40
+ # Pattern for code block start: [source,language] or [source] with optional attributes
41
+ CODE_BLOCK_START = re.compile(r'^\[source(?:,\s*(\w+))?(?:[,\s]+[^\]]+)?\]')
42
+
43
+ # Pattern for callout number in code block (can appear multiple times per line)
44
+ CALLOUT_IN_CODE = re.compile(r'<(\d+)>')
45
+
46
+ # Pattern for callout explanation line: <1> Explanation text
47
+ CALLOUT_EXPLANATION = re.compile(r'^<(\d+)>\s+(.+)$')
48
+
49
+ # Pattern to detect user-replaceable values in angle brackets
50
+ # Excludes heredoc syntax (<<) and comparison operators
51
+ USER_VALUE_PATTERN = re.compile(r'(?<!<)<([a-zA-Z][^>]*)>')
52
+
53
+ def find_code_blocks(self, lines: List[str]) -> List[CodeBlock]:
54
+ """Find all code blocks in the document."""
55
+ blocks = []
56
+ i = 0
57
+
58
+ while i < len(lines):
59
+ # Check for [source] prefix first
60
+ match = self.CODE_BLOCK_START.match(lines[i])
61
+ if match:
62
+ language = match.group(1)
63
+ start = i
64
+ i += 1
65
+
66
+ # Find the delimiter line (---- or ....)
67
+ if i < len(lines) and lines[i].strip() in ['----', '....']:
68
+ delimiter = lines[i].strip()
69
+ i += 1
70
+ content_start = i
71
+
72
+ # Find the closing delimiter
73
+ while i < len(lines):
74
+ if lines[i].strip() == delimiter:
75
+ content = lines[content_start:i]
76
+ blocks.append(CodeBlock(
77
+ start_line=start,
78
+ end_line=i,
79
+ delimiter=delimiter,
80
+ content=content,
81
+ language=language
82
+ ))
83
+ break
84
+ i += 1
85
+ # Check for plain delimited blocks without [source] prefix
86
+ elif lines[i].strip() in ['----', '....']:
87
+ delimiter = lines[i].strip()
88
+ start = i
89
+ i += 1
90
+ content_start = i
91
+
92
+ # Find the closing delimiter
93
+ while i < len(lines):
94
+ if lines[i].strip() == delimiter:
95
+ content = lines[content_start:i]
96
+ # Only add if block contains callouts
97
+ if any(self.CALLOUT_IN_CODE.search(line) for line in content):
98
+ blocks.append(CodeBlock(
99
+ start_line=start,
100
+ end_line=i,
101
+ delimiter=delimiter,
102
+ content=content,
103
+ language=None
104
+ ))
105
+ break
106
+ i += 1
107
+ i += 1
108
+
109
+ return blocks
110
+
111
+ def extract_callouts_from_code(self, content: List[str]) -> List[CalloutGroup]:
112
+ """
113
+ Extract callout numbers from code block content.
114
+ Returns list of CalloutGroups, where each group contains:
115
+ - The code line (with user-replaceable value if found, or full line)
116
+ - List of callout numbers on that line
117
+
118
+ Multiple callouts on the same line are grouped together to be merged
119
+ in the definition list.
120
+ """
121
+ groups = []
122
+
123
+ for line in content:
124
+ # Look for all callout numbers on this line
125
+ callout_matches = list(self.CALLOUT_IN_CODE.finditer(line))
126
+ if callout_matches:
127
+ # Remove all callouts from the line to get the actual code
128
+ line_without_callouts = self.CALLOUT_IN_CODE.sub('', line).strip()
129
+
130
+ # Find all angle-bracket enclosed values
131
+ user_values = self.USER_VALUE_PATTERN.findall(line_without_callouts)
132
+
133
+ # Determine what to use as the code line term
134
+ if user_values:
135
+ # Use the rightmost (closest to the callout) user value
136
+ code_line = user_values[-1]
137
+ else:
138
+ # No angle-bracket value found - use the actual code line
139
+ code_line = line_without_callouts
140
+
141
+ # Collect all callout numbers on this line
142
+ callout_nums = [int(m.group(1)) for m in callout_matches]
143
+
144
+ groups.append(CalloutGroup(
145
+ code_line=code_line,
146
+ callout_numbers=callout_nums
147
+ ))
148
+
149
+ return groups
150
+
151
+ def extract_callout_explanations(self, lines: List[str], start_line: int) -> Tuple[Dict[int, Callout], int]:
152
+ """
153
+ Extract callout explanations following a code block.
154
+ Returns dict of callouts and the line number where explanations end.
155
+ """
156
+ explanations = {}
157
+ i = start_line + 1 # Start after the closing delimiter
158
+
159
+ # Skip blank lines and continuation markers (+)
160
+ while i < len(lines) and (not lines[i].strip() or lines[i].strip() == '+'):
161
+ i += 1
162
+
163
+ # Collect consecutive callout explanation lines
164
+ while i < len(lines):
165
+ match = self.CALLOUT_EXPLANATION.match(lines[i])
166
+ if match:
167
+ num = int(match.group(1))
168
+ first_line = match.group(2).strip()
169
+ explanation_lines = [first_line]
170
+ i += 1
171
+
172
+ # Collect continuation lines (lines that don't start with a new callout)
173
+ # Continue until we hit a blank line, a new callout, or certain patterns
174
+ while i < len(lines):
175
+ line = lines[i]
176
+ # Stop if we hit a blank line, new callout, or list start marker
177
+ if not line.strip() or self.CALLOUT_EXPLANATION.match(line) or line.startswith('[start='):
178
+ break
179
+ # Add continuation line preserving original formatting
180
+ explanation_lines.append(line)
181
+ i += 1
182
+
183
+ # Check if marked as optional (only in first line)
184
+ is_optional = False
185
+ if first_line.lower().startswith('optional.') or first_line.lower().startswith('optional:'):
186
+ is_optional = True
187
+ # Remove "Optional." or "Optional:" from first line
188
+ explanation_lines[0] = first_line[9:].strip()
189
+ elif '(Optional)' in first_line or '(optional)' in first_line:
190
+ is_optional = True
191
+ explanation_lines[0] = re.sub(r'\s*\(optional\)\s*', ' ', first_line, flags=re.IGNORECASE).strip()
192
+
193
+ explanations[num] = Callout(num, explanation_lines, is_optional)
194
+ else:
195
+ break
196
+
197
+ return explanations, i - 1
198
+
199
+ def remove_callouts_from_code(self, content: List[str]) -> List[str]:
200
+ """Remove callout numbers from code block content (handles multiple callouts per line)."""
201
+ cleaned = []
202
+ for line in content:
203
+ # Remove all callout numbers and trailing whitespace
204
+ cleaned.append(self.CALLOUT_IN_CODE.sub('', line).rstrip())
205
+ return cleaned
206
+
207
+ def validate_callouts(self, callout_groups: List[CalloutGroup], explanations: Dict[int, Callout]) -> Tuple[bool, set, set]:
208
+ """
209
+ Validate that callout numbers in code match explanation numbers.
210
+ Returns tuple of (is_valid, code_nums, explanation_nums).
211
+ """
212
+ # Extract all callout numbers from groups
213
+ code_nums = set()
214
+ for group in callout_groups:
215
+ code_nums.update(group.callout_numbers)
216
+
217
+ explanation_nums = set(explanations.keys())
218
+
219
+ is_valid = code_nums == explanation_nums
220
+ return is_valid, code_nums, explanation_nums