markdown-flow 0.2.18__py3-none-any.whl → 0.2.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of markdown-flow might be problematic. Click here for more details.

markdown_flow/llm.py CHANGED
@@ -15,7 +15,6 @@ from .constants import NO_LLM_PROVIDER_ERROR
15
15
  class ProcessMode(Enum):
16
16
  """LLM processing modes."""
17
17
 
18
- PROMPT_ONLY = "prompt_only" # Return prompt only, no LLM call
19
18
  COMPLETE = "complete" # Complete processing (non-streaming)
20
19
  STREAM = "stream" # Streaming processing
21
20
 
@@ -43,7 +42,8 @@ class LLMProvider(ABC):
43
42
  Non-streaming LLM call.
44
43
 
45
44
  Args:
46
- messages: Message list in format [{"role": "system/user/assistant", "content": "..."}]
45
+ messages: Message list in format [{"role": "system/user/assistant", "content": "..."}].
46
+ This list already includes conversation history context merged by MarkdownFlow.
47
47
 
48
48
  Returns:
49
49
  str: LLM response content
@@ -58,7 +58,8 @@ class LLMProvider(ABC):
58
58
  Streaming LLM call.
59
59
 
60
60
  Args:
61
- messages: Message list in format [{"role": "system/user/assistant", "content": "..."}]
61
+ messages: Message list in format [{"role": "system/user/assistant", "content": "..."}].
62
+ This list already includes conversation history context merged by MarkdownFlow.
62
63
 
63
64
  Yields:
64
65
  str: Incremental LLM response content
markdown_flow/models.py CHANGED
@@ -7,7 +7,7 @@ Simplified and refactored data models focused on core functionality.
7
7
  from dataclasses import dataclass, field
8
8
 
9
9
  from .enums import BlockType, InputType
10
- from .utils import extract_variables_from_text
10
+ from .parser import extract_variables_from_text
11
11
 
12
12
 
13
13
  @dataclass
@@ -0,0 +1,34 @@
1
+ """
2
+ Markdown-Flow Parser Module
3
+
4
+ Provides specialized parsers for different aspects of MarkdownFlow document processing.
5
+ """
6
+
7
+ from .interaction import InteractionParser, InteractionType, extract_interaction_question
8
+ from .json_parser import parse_json_response
9
+ from .output import (
10
+ extract_preserved_content,
11
+ is_preserved_content_block,
12
+ process_output_instructions,
13
+ )
14
+ from .validation import generate_smart_validation_template, parse_validation_response
15
+ from .variable import extract_variables_from_text, replace_variables_in_text
16
+
17
+ __all__ = [
18
+ # Variable parsing
19
+ "extract_variables_from_text",
20
+ "replace_variables_in_text",
21
+ # Interaction parsing
22
+ "InteractionParser",
23
+ "InteractionType",
24
+ "extract_interaction_question",
25
+ # Output and preserved content
26
+ "is_preserved_content_block",
27
+ "extract_preserved_content",
28
+ "process_output_instructions",
29
+ # Validation
30
+ "generate_smart_validation_template",
31
+ "parse_validation_response",
32
+ # JSON parsing
33
+ "parse_json_response",
34
+ ]
@@ -0,0 +1,354 @@
1
+ """
2
+ Interaction Parser Module
3
+
4
+ Provides three-layer interaction parsing for MarkdownFlow ?[] format validation,
5
+ variable detection, and content parsing.
6
+ """
7
+
8
+ from enum import Enum
9
+ from typing import Any
10
+
11
+ from ..constants import (
12
+ COMPILED_INTERACTION_REGEX,
13
+ COMPILED_LAYER1_INTERACTION_REGEX,
14
+ COMPILED_LAYER2_VARIABLE_REGEX,
15
+ COMPILED_LAYER3_ELLIPSIS_REGEX,
16
+ COMPILED_SINGLE_PIPE_SPLIT_REGEX,
17
+ )
18
+
19
+
20
+ class InteractionType(Enum):
21
+ """Interaction input type enumeration."""
22
+
23
+ TEXT_ONLY = "text_only" # Text input only: ?[%{{var}}...question]
24
+ BUTTONS_ONLY = "buttons_only" # Button selection only: ?[%{{var}} A|B]
25
+ BUTTONS_WITH_TEXT = "buttons_with_text" # Buttons + text: ?[%{{var}} A|B|...question]
26
+ BUTTONS_MULTI_SELECT = "buttons_multi_select" # Multi-select buttons: ?[%{{var}} A||B]
27
+ BUTTONS_MULTI_WITH_TEXT = "buttons_multi_with_text" # Multi-select + text: ?[%{{var}} A||B||...question]
28
+ NON_ASSIGNMENT_BUTTON = "non_assignment_button" # Display buttons: ?[Continue|Cancel]
29
+
30
+
31
+ def extract_interaction_question(content: str) -> str | None:
32
+ """
33
+ Extract question text from interaction block content.
34
+
35
+ Args:
36
+ content: Raw interaction block content
37
+
38
+ Returns:
39
+ Question text if found, None otherwise
40
+ """
41
+ # Match interaction format: ?[...] using pre-compiled regex
42
+ match = COMPILED_INTERACTION_REGEX.match(content.strip())
43
+ if not match:
44
+ return None # type: ignore[unreachable]
45
+
46
+ # Extract interaction content (remove ?[ and ])
47
+ interaction_content = match.group(1) if match.groups() else match.group(0)[2:-1]
48
+
49
+ # Find ... separator, question text follows
50
+ if "..." in interaction_content:
51
+ # Split and get question part
52
+ parts = interaction_content.split("...", 1)
53
+ if len(parts) > 1:
54
+ return parts[1].strip()
55
+
56
+ return None # type: ignore[unreachable]
57
+
58
+
59
+ class InteractionParser:
60
+ """
61
+ Three-layer interaction parser for ?[] format validation,
62
+ variable detection, and content parsing.
63
+ """
64
+
65
+ def __init__(self):
66
+ """Initialize parser."""
67
+
68
+ def parse(self, content: str) -> dict[str, Any]:
69
+ """
70
+ Main parsing method.
71
+
72
+ Args:
73
+ content: Raw interaction block content
74
+
75
+ Returns:
76
+ Standardized parsing result with type, variable, buttons, and question fields
77
+ """
78
+ try:
79
+ # Layer 1: Validate basic format
80
+ inner_content = self._layer1_validate_format(content)
81
+ if inner_content is None:
82
+ return self._create_error_result(f"Invalid interaction format: {content}")
83
+
84
+ # Layer 2: Variable detection and pattern classification
85
+ has_variable, variable_name, remaining_content = self._layer2_detect_variable(inner_content)
86
+
87
+ # Layer 3: Specific content parsing
88
+ if has_variable:
89
+ assert variable_name is not None, "variable_name should not be None when has_variable is True"
90
+ return self._layer3_parse_variable_interaction(variable_name, remaining_content)
91
+ return self._layer3_parse_display_buttons(inner_content)
92
+
93
+ except Exception as e:
94
+ return self._create_error_result(f"Parsing error: {str(e)}")
95
+
96
+ def _layer1_validate_format(self, content: str) -> str | None:
97
+ """
98
+ Layer 1: Validate ?[] format and extract content.
99
+
100
+ Args:
101
+ content: Raw content
102
+
103
+ Returns:
104
+ Extracted bracket content, None if validation fails
105
+ """
106
+ content = content.strip()
107
+ match = COMPILED_LAYER1_INTERACTION_REGEX.search(content)
108
+
109
+ if not match:
110
+ return None # type: ignore[unreachable]
111
+
112
+ # Ensure matched content is complete (no other text)
113
+ matched_text = match.group(0)
114
+ if matched_text.strip() != content:
115
+ return None
116
+
117
+ return match.group(1)
118
+
119
+ def _layer2_detect_variable(self, inner_content: str) -> tuple[bool, str | None, str]:
120
+ """
121
+ Layer 2: Detect variables and classify patterns.
122
+
123
+ Args:
124
+ inner_content: Content extracted from layer 1
125
+
126
+ Returns:
127
+ Tuple of (has_variable, variable_name, remaining_content)
128
+ """
129
+ match = COMPILED_LAYER2_VARIABLE_REGEX.match(inner_content)
130
+
131
+ if not match:
132
+ # No variable, use entire content for display button parsing
133
+ return False, None, inner_content # type: ignore[unreachable]
134
+
135
+ variable_name = match.group(1).strip()
136
+ remaining_content = match.group(2).strip()
137
+
138
+ return True, variable_name, remaining_content
139
+
140
+ def _layer3_parse_variable_interaction(self, variable_name: str, content: str) -> dict[str, Any]:
141
+ """
142
+ Layer 3: Parse variable interactions (variable assignment type).
143
+
144
+ Args:
145
+ variable_name: Variable name
146
+ content: Content after variable
147
+
148
+ Returns:
149
+ Parsing result dictionary
150
+ """
151
+ # Detect ... separator
152
+ ellipsis_match = COMPILED_LAYER3_ELLIPSIS_REGEX.match(content)
153
+
154
+ if ellipsis_match:
155
+ # Has ... separator
156
+ before_ellipsis = ellipsis_match.group(1).strip()
157
+ question = ellipsis_match.group(2).strip()
158
+
159
+ if before_ellipsis:
160
+ # Has prefix content (buttons or single option) + text input
161
+ buttons, is_multi_select = self._parse_buttons(before_ellipsis)
162
+ interaction_type = InteractionType.BUTTONS_MULTI_WITH_TEXT if is_multi_select else InteractionType.BUTTONS_WITH_TEXT
163
+ return {
164
+ "type": interaction_type,
165
+ "variable": variable_name,
166
+ "buttons": buttons,
167
+ "question": question,
168
+ "is_multi_select": is_multi_select,
169
+ }
170
+ # Pure text input
171
+ return {
172
+ "type": InteractionType.TEXT_ONLY,
173
+ "variable": variable_name,
174
+ "question": question,
175
+ "is_multi_select": False,
176
+ }
177
+ # No ... separator
178
+ if ("|" in content or "||" in content) and content: # type: ignore[unreachable]
179
+ # Pure button group
180
+ buttons, is_multi_select = self._parse_buttons(content)
181
+ interaction_type = InteractionType.BUTTONS_MULTI_SELECT if is_multi_select else InteractionType.BUTTONS_ONLY
182
+ return {
183
+ "type": interaction_type,
184
+ "variable": variable_name,
185
+ "buttons": buttons,
186
+ "is_multi_select": is_multi_select,
187
+ }
188
+ if content: # type: ignore[unreachable]
189
+ # Single button
190
+ button = self._parse_single_button(content)
191
+ return {
192
+ "type": InteractionType.BUTTONS_ONLY,
193
+ "variable": variable_name,
194
+ "buttons": [button],
195
+ "is_multi_select": False,
196
+ }
197
+ # Pure text input (no hint)
198
+ return {
199
+ "type": InteractionType.TEXT_ONLY,
200
+ "variable": variable_name,
201
+ "question": "",
202
+ "is_multi_select": False,
203
+ }
204
+
205
+ def _layer3_parse_display_buttons(self, content: str) -> dict[str, Any]:
206
+ """
207
+ Layer 3: Parse display buttons (non-variable assignment type).
208
+
209
+ Args:
210
+ content: Content to parse
211
+
212
+ Returns:
213
+ Parsing result dictionary
214
+ """
215
+ if not content:
216
+ # Empty content: ?[]
217
+ return {
218
+ "type": InteractionType.NON_ASSIGNMENT_BUTTON,
219
+ "buttons": [{"display": "", "value": ""}],
220
+ }
221
+
222
+ if "|" in content:
223
+ # Multiple buttons
224
+ buttons, _ = self._parse_buttons(content) # Display buttons don't use multi-select
225
+ return {"type": InteractionType.NON_ASSIGNMENT_BUTTON, "buttons": buttons}
226
+ # Single button
227
+ button = self._parse_single_button(content)
228
+ return {"type": InteractionType.NON_ASSIGNMENT_BUTTON, "buttons": [button]}
229
+
230
+ def _parse_buttons(self, content: str) -> tuple[list[dict[str, str]], bool]:
231
+ """
232
+ Parse button group with fault tolerance.
233
+
234
+ Args:
235
+ content: Button content separated by | or ||
236
+
237
+ Returns:
238
+ Tuple of (button list, is_multi_select)
239
+ """
240
+ if not content or not isinstance(content, str):
241
+ return [], False
242
+
243
+ _, is_multi_select = self._detect_separator_type(content)
244
+
245
+ buttons = []
246
+ try:
247
+ # Use different splitting logic based on separator type
248
+ if is_multi_select:
249
+ # Multi-select mode: split on ||, preserve single |
250
+ button_parts = content.split("||")
251
+ else:
252
+ # Single-select mode: split on single |, but preserve ||
253
+ # Use pre-compiled regex from constants
254
+ button_parts = COMPILED_SINGLE_PIPE_SPLIT_REGEX.split(content)
255
+
256
+ for button_text in button_parts:
257
+ button_text = button_text.strip()
258
+ if button_text:
259
+ button = self._parse_single_button(button_text)
260
+ buttons.append(button)
261
+ except (TypeError, ValueError):
262
+ # Fallback to treating entire content as single button
263
+ return [{"display": content.strip(), "value": content.strip()}], False
264
+
265
+ # For empty content (like just separators), return empty list
266
+ if not buttons and (content.strip() == "||" or content.strip() == "|"):
267
+ return [], is_multi_select
268
+
269
+ # Ensure at least one button exists (but only if there's actual content)
270
+ if not buttons and content.strip():
271
+ buttons = [{"display": content.strip(), "value": content.strip()}]
272
+
273
+ return buttons, is_multi_select
274
+
275
+ def _parse_single_button(self, button_text: str) -> dict[str, str]:
276
+ """
277
+ Parse single button with fault tolerance, supports Button//value format.
278
+
279
+ Args:
280
+ button_text: Button text
281
+
282
+ Returns:
283
+ Dictionary with display and value keys
284
+ """
285
+ if not button_text or not isinstance(button_text, str):
286
+ return {"display": "", "value": ""}
287
+
288
+ button_text = button_text.strip()
289
+ if not button_text:
290
+ return {"display": "", "value": ""}
291
+
292
+ try:
293
+ # Detect Button//value format - split only on first //
294
+ if "//" in button_text:
295
+ parts = button_text.split("//", 1) # Split only on first //
296
+ display = parts[0].strip()
297
+ value = parts[1] if len(parts) > 1 else ""
298
+ # Don't strip value to preserve intentional spacing/formatting
299
+ return {"display": display, "value": value}
300
+ except (ValueError, IndexError):
301
+ # Fallback: use text as both display and value
302
+ pass
303
+
304
+ return {"display": button_text, "value": button_text}
305
+
306
+ def _detect_separator_type(self, content: str) -> tuple[str, bool]:
307
+ """
308
+ Detect separator type and whether it's multi-select.
309
+
310
+ Implements fault tolerance: first separator type encountered determines the behavior.
311
+ Mixed separators are handled by treating the rest as literal text.
312
+
313
+ Args:
314
+ content: Button content to analyze
315
+
316
+ Returns:
317
+ Tuple of (separator, is_multi_select) where separator is '|' or '||'
318
+ """
319
+ if not content or not isinstance(content, str):
320
+ return "|", False
321
+
322
+ # Find first occurrence of separators
323
+ single_pos = content.find("|")
324
+ double_pos = content.find("||")
325
+
326
+ # If no separators found
327
+ if single_pos == -1 and double_pos == -1:
328
+ return "|", False
329
+
330
+ # If only single separator found
331
+ if double_pos == -1:
332
+ return "|", False
333
+
334
+ # If only double separator found
335
+ if single_pos == -1:
336
+ return "||", True
337
+
338
+ # Both found - fault tolerance: first occurrence wins
339
+ # This handles mixed cases like "A||B|C" (multi-select) and "A|B||C" (single-select)
340
+ if double_pos <= single_pos:
341
+ return "||", True
342
+ return "|", False
343
+
344
+ def _create_error_result(self, error_message: str) -> dict[str, Any]:
345
+ """
346
+ Create error result.
347
+
348
+ Args:
349
+ error_message: Error message
350
+
351
+ Returns:
352
+ Error result dictionary
353
+ """
354
+ return {"type": None, "error": error_message} # type: ignore[unreachable]
@@ -0,0 +1,50 @@
1
+ """
2
+ JSON Parser Module
3
+
4
+ Provides robust JSON parsing with support for code blocks and mixed text formats.
5
+ """
6
+
7
+ import json
8
+ import re
9
+ from typing import Any
10
+
11
+ from ..constants import JSON_PARSE_ERROR
12
+
13
+
14
+ def parse_json_response(response_text: str) -> dict[str, Any]:
15
+ """
16
+ Parse JSON response supporting multiple formats.
17
+
18
+ Supports pure JSON strings, ```json code blocks, and mixed text formats.
19
+
20
+ Args:
21
+ response_text: Response text to parse
22
+
23
+ Returns:
24
+ Parsed dictionary object
25
+
26
+ Raises:
27
+ ValueError: When JSON cannot be parsed
28
+ """
29
+ text = response_text.strip()
30
+
31
+ # Extract JSON code block
32
+ if "```json" in text:
33
+ start_idx = text.find("```json") + 7
34
+ end_idx = text.find("```", start_idx)
35
+ if end_idx != -1:
36
+ text = text[start_idx:end_idx].strip()
37
+ elif "```" in text:
38
+ start_idx = text.find("```") + 3
39
+ end_idx = text.find("```", start_idx)
40
+ if end_idx != -1:
41
+ text = text[start_idx:end_idx].strip()
42
+
43
+ try:
44
+ return json.loads(text)
45
+ except json.JSONDecodeError:
46
+ # Try to extract first JSON object
47
+ json_match = re.search(r"\{[^}]+\}", text)
48
+ if json_match:
49
+ return json.loads(json_match.group())
50
+ raise ValueError(JSON_PARSE_ERROR)
@@ -0,0 +1,215 @@
1
+ """
2
+ Output Parser Module
3
+
4
+ Handles output instructions and preserved content processing for MarkdownFlow documents.
5
+ """
6
+
7
+ import re
8
+
9
+ from ..constants import (
10
+ COMPILED_INLINE_PRESERVE_REGEX,
11
+ COMPILED_PRESERVE_FENCE_REGEX,
12
+ OUTPUT_INSTRUCTION_PREFIX,
13
+ OUTPUT_INSTRUCTION_SUFFIX,
14
+ )
15
+
16
+
17
+ def is_preserved_content_block(content: str) -> bool:
18
+ """
19
+ Check if content is completely preserved content block.
20
+
21
+ Preserved blocks are entirely wrapped by markers with no external content.
22
+ Supports inline (===content===), multiline (!=== ... !===) formats, and mixed formats.
23
+
24
+ Args:
25
+ content: Content to check
26
+
27
+ Returns:
28
+ True if content is fully wrapped by preserved markers
29
+ """
30
+ content = content.strip()
31
+ if not content:
32
+ return False
33
+
34
+ lines = content.split("\n")
35
+
36
+ # Use state machine to validate that all non-empty content is preserved
37
+ state = "OUTSIDE" # States: OUTSIDE, INSIDE
38
+ has_preserve_content = False
39
+
40
+ for line in lines:
41
+ stripped_line = line.strip()
42
+
43
+ # Check if this line is a fence marker (!===)
44
+ if COMPILED_PRESERVE_FENCE_REGEX.match(stripped_line):
45
+ if state == "OUTSIDE":
46
+ # Enter preserve block
47
+ state = "INSIDE"
48
+ has_preserve_content = True
49
+ elif state == "INSIDE":
50
+ # Exit preserve block
51
+ state = "OUTSIDE"
52
+ # Fence markers themselves are valid preserved content
53
+ continue
54
+
55
+ # Non-fence lines
56
+ if stripped_line: # Non-empty line
57
+ if state == "INSIDE":
58
+ # Inside fence block, this is valid preserved content
59
+ has_preserve_content = True
60
+ else:
61
+ # Outside fence block, check if it's inline format
62
+ match = COMPILED_INLINE_PRESERVE_REGEX.match(stripped_line)
63
+ if match:
64
+ # Ensure inner content exists and contains no ===
65
+ inner_content = match.group(1).strip()
66
+ if inner_content and "===" not in inner_content:
67
+ # Valid inline format
68
+ has_preserve_content = True
69
+ else:
70
+ # Invalid inline format
71
+ return False
72
+ else:
73
+ # Not fence, not inline format -> external content
74
+ return False
75
+
76
+ # Judgment conditions:
77
+ # 1. Must have preserved content
78
+ # 2. Final state must be OUTSIDE (all fence blocks closed)
79
+ return has_preserve_content and state == "OUTSIDE"
80
+
81
+
82
+ def process_output_instructions(content: str) -> tuple[str, bool]:
83
+ """
84
+ Process output instruction markers, converting !=== format to [output] format.
85
+
86
+ Uses unified state machine to handle inline (===content===) and multiline (!===...!===) formats.
87
+
88
+ Args:
89
+ content: Raw content containing output instructions
90
+
91
+ Returns:
92
+ Tuple of (processed_content, has_preserved_content):
93
+ - processed_content: Content with === and !=== markers converted to XML format
94
+ - has_preserved_content: True if content contained preserved markers
95
+ """
96
+ lines = content.split("\n")
97
+ result_lines = []
98
+ i = 0
99
+ has_output_instruction = False
100
+
101
+ while i < len(lines):
102
+ line = lines[i]
103
+
104
+ # Check if contains preserved markers (inline ===...=== or multiline !===...)
105
+ # Check inline format first: ===content===
106
+ inline_match = re.search(r"===\s*(.+?)\s*===", line)
107
+ if inline_match and line.count("===") == 2 and not line.strip().startswith("!"):
108
+ inner_content = inline_match.group(1).strip()
109
+ # Validate that inner content doesn't contain ===
110
+ if not inner_content or "===" in inner_content:
111
+ result_lines.append(line)
112
+ i += 1
113
+ continue
114
+ # Process inline format
115
+ full_match = inline_match.group(0)
116
+
117
+ # Build output instruction - keep inline format on same line
118
+ output_instruction = f"{OUTPUT_INSTRUCTION_PREFIX}{inner_content}{OUTPUT_INSTRUCTION_SUFFIX}"
119
+
120
+ # Replace ===...=== part in original line
121
+ processed_line = line.replace(full_match, output_instruction)
122
+ result_lines.append(processed_line)
123
+ has_output_instruction = True
124
+ i += 1
125
+
126
+ elif COMPILED_PRESERVE_FENCE_REGEX.match(line.strip()):
127
+ # Multiline format start
128
+ i += 1
129
+ output_content_lines: list[str] = []
130
+
131
+ # Collect multiline content
132
+ while i < len(lines):
133
+ current_line = lines[i]
134
+ if COMPILED_PRESERVE_FENCE_REGEX.match(current_line.strip()):
135
+ # Found end marker, process collected content
136
+ output_content = "\n".join(output_content_lines).strip()
137
+
138
+ # Special handling for title format (maintain original logic)
139
+ hash_prefix = ""
140
+ if output_content.startswith("#"):
141
+ first_space = output_content.find(" ")
142
+ first_newline = output_content.find("\n")
143
+
144
+ if first_space != -1 and (first_newline == -1 or first_space < first_newline):
145
+ hash_prefix = output_content[: first_space + 1]
146
+ output_content = output_content[first_space + 1 :].strip()
147
+ elif first_newline != -1:
148
+ hash_prefix = output_content[: first_newline + 1]
149
+ output_content = output_content[first_newline + 1 :].strip()
150
+
151
+ # Build output instruction
152
+ if hash_prefix:
153
+ result_lines.append(f"{OUTPUT_INSTRUCTION_PREFIX}{hash_prefix}{output_content}{OUTPUT_INSTRUCTION_SUFFIX}")
154
+ else:
155
+ result_lines.append(f"{OUTPUT_INSTRUCTION_PREFIX}{output_content}{OUTPUT_INSTRUCTION_SUFFIX}")
156
+
157
+ has_output_instruction = True
158
+ i += 1
159
+ break
160
+ # Continue collecting content
161
+ output_content_lines.append(current_line) # type: ignore[unreachable]
162
+ i += 1
163
+ else:
164
+ # No end marker found, rollback processing
165
+ result_lines.append(lines[i - len(output_content_lines) - 1])
166
+ result_lines.extend(output_content_lines)
167
+ else:
168
+ # Normal line
169
+ result_lines.append(line) # type: ignore[unreachable]
170
+ i += 1
171
+
172
+ # Assemble final content
173
+ processed_content = "\n".join(result_lines)
174
+
175
+ # Return both processed content and whether it contains preserved content
176
+ return processed_content, has_output_instruction
177
+
178
+
179
+ def extract_preserved_content(content: str) -> str:
180
+ """
181
+ Extract actual content from preserved content blocks, removing markers.
182
+
183
+ Handles inline (===content===) and multiline (!===...!===) formats.
184
+
185
+ Args:
186
+ content: Preserved content containing preserved markers
187
+
188
+ Returns:
189
+ Actual content with === and !=== markers removed
190
+ """
191
+ content = content.strip()
192
+ if not content:
193
+ return ""
194
+
195
+ lines = content.split("\n")
196
+ result_lines = []
197
+
198
+ for line in lines:
199
+ stripped_line = line.strip()
200
+
201
+ # Check inline format: ===content===
202
+ inline_match = COMPILED_INLINE_PRESERVE_REGEX.match(stripped_line)
203
+ if inline_match:
204
+ # Inline format, extract middle content
205
+ inner_content = inline_match.group(1).strip()
206
+ if inner_content and "===" not in inner_content:
207
+ result_lines.append(inner_content)
208
+ elif COMPILED_PRESERVE_FENCE_REGEX.match(stripped_line): # type: ignore[unreachable]
209
+ # Multiline format delimiter, skip
210
+ continue
211
+ else:
212
+ # Normal content line, keep
213
+ result_lines.append(line)
214
+
215
+ return "\n".join(result_lines)