markdown-flow 0.2.19__py3-none-any.whl → 0.2.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,215 @@
1
+ """
2
+ Output Parser Module
3
+
4
+ Handles output instructions and preserved content processing for MarkdownFlow documents.
5
+ """
6
+
7
+ import re
8
+
9
+ from ..constants import (
10
+ COMPILED_INLINE_PRESERVE_REGEX,
11
+ COMPILED_PRESERVE_FENCE_REGEX,
12
+ OUTPUT_INSTRUCTION_PREFIX,
13
+ OUTPUT_INSTRUCTION_SUFFIX,
14
+ )
15
+
16
+
17
+ def is_preserved_content_block(content: str) -> bool:
18
+ """
19
+ Check if content is completely preserved content block.
20
+
21
+ Preserved blocks are entirely wrapped by markers with no external content.
22
+ Supports inline (===content===), multiline (!=== ... !===) formats, and mixed formats.
23
+
24
+ Args:
25
+ content: Content to check
26
+
27
+ Returns:
28
+ True if content is fully wrapped by preserved markers
29
+ """
30
+ content = content.strip()
31
+ if not content:
32
+ return False
33
+
34
+ lines = content.split("\n")
35
+
36
+ # Use state machine to validate that all non-empty content is preserved
37
+ state = "OUTSIDE" # States: OUTSIDE, INSIDE
38
+ has_preserve_content = False
39
+
40
+ for line in lines:
41
+ stripped_line = line.strip()
42
+
43
+ # Check if this line is a fence marker (!===)
44
+ if COMPILED_PRESERVE_FENCE_REGEX.match(stripped_line):
45
+ if state == "OUTSIDE":
46
+ # Enter preserve block
47
+ state = "INSIDE"
48
+ has_preserve_content = True
49
+ elif state == "INSIDE":
50
+ # Exit preserve block
51
+ state = "OUTSIDE"
52
+ # Fence markers themselves are valid preserved content
53
+ continue
54
+
55
+ # Non-fence lines
56
+ if stripped_line: # Non-empty line
57
+ if state == "INSIDE":
58
+ # Inside fence block, this is valid preserved content
59
+ has_preserve_content = True
60
+ else:
61
+ # Outside fence block, check if it's inline format
62
+ match = COMPILED_INLINE_PRESERVE_REGEX.match(stripped_line)
63
+ if match:
64
+ # Ensure inner content exists and contains no ===
65
+ inner_content = match.group(1).strip()
66
+ if inner_content and "===" not in inner_content:
67
+ # Valid inline format
68
+ has_preserve_content = True
69
+ else:
70
+ # Invalid inline format
71
+ return False
72
+ else:
73
+ # Not fence, not inline format -> external content
74
+ return False
75
+
76
+ # Judgment conditions:
77
+ # 1. Must have preserved content
78
+ # 2. Final state must be OUTSIDE (all fence blocks closed)
79
+ return has_preserve_content and state == "OUTSIDE"
80
+
81
+
82
+ def process_output_instructions(content: str) -> tuple[str, bool]:
83
+ """
84
+ Process output instruction markers, converting !=== format to [output] format.
85
+
86
+ Uses unified state machine to handle inline (===content===) and multiline (!===...!===) formats.
87
+
88
+ Args:
89
+ content: Raw content containing output instructions
90
+
91
+ Returns:
92
+ Tuple of (processed_content, has_preserved_content):
93
+ - processed_content: Content with === and !=== markers converted to XML format
94
+ - has_preserved_content: True if content contained preserved markers
95
+ """
96
+ lines = content.split("\n")
97
+ result_lines = []
98
+ i = 0
99
+ has_output_instruction = False
100
+
101
+ while i < len(lines):
102
+ line = lines[i]
103
+
104
+ # Check if contains preserved markers (inline ===...=== or multiline !===...)
105
+ # Check inline format first: ===content===
106
+ inline_match = re.search(r"===\s*(.+?)\s*===", line)
107
+ if inline_match and line.count("===") == 2 and not line.strip().startswith("!"):
108
+ inner_content = inline_match.group(1).strip()
109
+ # Validate that inner content doesn't contain ===
110
+ if not inner_content or "===" in inner_content:
111
+ result_lines.append(line)
112
+ i += 1
113
+ continue
114
+ # Process inline format
115
+ full_match = inline_match.group(0)
116
+
117
+ # Build output instruction - keep inline format on same line
118
+ output_instruction = f"{OUTPUT_INSTRUCTION_PREFIX}{inner_content}{OUTPUT_INSTRUCTION_SUFFIX}"
119
+
120
+ # Replace ===...=== part in original line
121
+ processed_line = line.replace(full_match, output_instruction)
122
+ result_lines.append(processed_line)
123
+ has_output_instruction = True
124
+ i += 1
125
+
126
+ elif COMPILED_PRESERVE_FENCE_REGEX.match(line.strip()):
127
+ # Multiline format start
128
+ i += 1
129
+ output_content_lines: list[str] = []
130
+
131
+ # Collect multiline content
132
+ while i < len(lines):
133
+ current_line = lines[i]
134
+ if COMPILED_PRESERVE_FENCE_REGEX.match(current_line.strip()):
135
+ # Found end marker, process collected content
136
+ output_content = "\n".join(output_content_lines).strip()
137
+
138
+ # Special handling for title format (maintain original logic)
139
+ hash_prefix = ""
140
+ if output_content.startswith("#"):
141
+ first_space = output_content.find(" ")
142
+ first_newline = output_content.find("\n")
143
+
144
+ if first_space != -1 and (first_newline == -1 or first_space < first_newline):
145
+ hash_prefix = output_content[: first_space + 1]
146
+ output_content = output_content[first_space + 1 :].strip()
147
+ elif first_newline != -1:
148
+ hash_prefix = output_content[: first_newline + 1]
149
+ output_content = output_content[first_newline + 1 :].strip()
150
+
151
+ # Build output instruction
152
+ if hash_prefix:
153
+ result_lines.append(f"{OUTPUT_INSTRUCTION_PREFIX}{hash_prefix}{output_content}{OUTPUT_INSTRUCTION_SUFFIX}")
154
+ else:
155
+ result_lines.append(f"{OUTPUT_INSTRUCTION_PREFIX}{output_content}{OUTPUT_INSTRUCTION_SUFFIX}")
156
+
157
+ has_output_instruction = True
158
+ i += 1
159
+ break
160
+ # Continue collecting content
161
+ output_content_lines.append(current_line) # type: ignore[unreachable]
162
+ i += 1
163
+ else:
164
+ # No end marker found, rollback processing
165
+ result_lines.append(lines[i - len(output_content_lines) - 1])
166
+ result_lines.extend(output_content_lines)
167
+ else:
168
+ # Normal line
169
+ result_lines.append(line) # type: ignore[unreachable]
170
+ i += 1
171
+
172
+ # Assemble final content
173
+ processed_content = "\n".join(result_lines)
174
+
175
+ # Return both processed content and whether it contains preserved content
176
+ return processed_content, has_output_instruction
177
+
178
+
179
+ def extract_preserved_content(content: str) -> str:
180
+ """
181
+ Extract actual content from preserved content blocks, removing markers.
182
+
183
+ Handles inline (===content===) and multiline (!===...!===) formats.
184
+
185
+ Args:
186
+ content: Preserved content containing preserved markers
187
+
188
+ Returns:
189
+ Actual content with === and !=== markers removed
190
+ """
191
+ content = content.strip()
192
+ if not content:
193
+ return ""
194
+
195
+ lines = content.split("\n")
196
+ result_lines = []
197
+
198
+ for line in lines:
199
+ stripped_line = line.strip()
200
+
201
+ # Check inline format: ===content===
202
+ inline_match = COMPILED_INLINE_PRESERVE_REGEX.match(stripped_line)
203
+ if inline_match:
204
+ # Inline format, extract middle content
205
+ inner_content = inline_match.group(1).strip()
206
+ if inner_content and "===" not in inner_content:
207
+ result_lines.append(inner_content)
208
+ elif COMPILED_PRESERVE_FENCE_REGEX.match(stripped_line): # type: ignore[unreachable]
209
+ # Multiline format delimiter, skip
210
+ continue
211
+ else:
212
+ # Normal content line, keep
213
+ result_lines.append(line)
214
+
215
+ return "\n".join(result_lines)
@@ -0,0 +1,151 @@
1
+ """
2
+ Code Block Preprocessor
3
+
4
+ Extracts code block content before parsing, implementing CommonMark-compliant fenced code blocks.
5
+ """
6
+
7
+ from .code_fence_utils import is_code_fence_end, parse_code_fence_start
8
+
9
+
10
+ class CodeBlockPreprocessor:
11
+ """
12
+ Code block preprocessor
13
+
14
+ Extracts code blocks from document and replaces them with placeholders, so that MarkdownFlow
15
+ syntax inside code blocks is ignored during subsequent parsing.
16
+
17
+ Attributes:
18
+ code_blocks: Mapping of placeholder → original code block content (including fence markers)
19
+ counter: Placeholder counter
20
+ """
21
+
22
+ # State machine states
23
+ STATE_NORMAL = "NORMAL"
24
+ STATE_IN_CODE_BLOCK = "IN_CODE_BLOCK"
25
+
26
+ def __init__(self):
27
+ """Initialize preprocessor"""
28
+ self.code_blocks: dict[str, str] = {}
29
+ self.counter: int = 0
30
+
31
+ def extract_code_blocks(self, document: str) -> str:
32
+ """
33
+ Extract code blocks from document and replace with placeholders
34
+
35
+ How it works:
36
+ 1. Scan document line by line using a state machine
37
+ 2. Detect CommonMark-compliant fenced code blocks
38
+ 3. Replace code block content (including fences) with unique placeholders
39
+ 4. Store code block content in internal mapping
40
+
41
+ Args:
42
+ document: Original markdown document
43
+
44
+ Returns:
45
+ Processed document (code blocks replaced with placeholders)
46
+
47
+ Examples:
48
+ >>> preprocessor = CodeBlockPreprocessor()
49
+ >>> doc = "```python\\nprint('hello')\\n```"
50
+ >>> processed = preprocessor.extract_code_blocks(doc)
51
+ >>> "__MDFLOW_CODE_BLOCK_1__" in processed
52
+ True
53
+ """
54
+ lines = document.split("\n")
55
+ result = []
56
+
57
+ # State machine variables
58
+ state = self.STATE_NORMAL
59
+ current_fence = None
60
+ code_buffer = []
61
+
62
+ for line in lines:
63
+ if state == self.STATE_NORMAL:
64
+ # Detect code block opening fence
65
+ fence_info = parse_code_fence_start(line)
66
+ if fence_info is not None:
67
+ # Enter code block state
68
+ state = self.STATE_IN_CODE_BLOCK
69
+ current_fence = fence_info
70
+ code_buffer = [line]
71
+ else:
72
+ # Normal line, keep as-is
73
+ result.append(line)
74
+
75
+ elif state == self.STATE_IN_CODE_BLOCK:
76
+ # Accumulate code lines
77
+ code_buffer.append(line)
78
+
79
+ # Detect fence closing
80
+ if is_code_fence_end(line, current_fence):
81
+ # Generate placeholder
82
+ placeholder = self._generate_placeholder()
83
+
84
+ # Store code block
85
+ code_content = "\n".join(code_buffer)
86
+ self.code_blocks[placeholder] = code_content
87
+
88
+ # Output placeholder (as a separate line)
89
+ result.append(placeholder)
90
+
91
+ # Reset state
92
+ state = self.STATE_NORMAL
93
+ current_fence = None
94
+ code_buffer = []
95
+
96
+ # Handle unclosed code blocks (keep as-is)
97
+ if state == self.STATE_IN_CODE_BLOCK and code_buffer:
98
+ # Restore unclosed code block content to result
99
+ result.extend(code_buffer)
100
+
101
+ return "\n".join(result)
102
+
103
+ def restore_code_blocks(self, processed: str) -> str:
104
+ """
105
+ Restore placeholders back to original code block content
106
+
107
+ Args:
108
+ processed: Processed document containing placeholders
109
+
110
+ Returns:
111
+ Restored document
112
+
113
+ Examples:
114
+ >>> preprocessor = CodeBlockPreprocessor()
115
+ >>> doc = "```python\\nprint('hello')\\n```"
116
+ >>> processed = preprocessor.extract_code_blocks(doc)
117
+ >>> restored = preprocessor.restore_code_blocks(processed)
118
+ >>> restored == doc
119
+ True
120
+ """
121
+ result = processed
122
+
123
+ # Replace all placeholders
124
+ for placeholder, original in self.code_blocks.items():
125
+ result = result.replace(placeholder, original)
126
+
127
+ return result
128
+
129
+ def _generate_placeholder(self) -> str:
130
+ """
131
+ Generate a unique placeholder
132
+
133
+ Returns:
134
+ Placeholder in format __MDFLOW_CODE_BLOCK_N__
135
+ """
136
+ self.counter += 1
137
+ return f"__MDFLOW_CODE_BLOCK_{self.counter}__"
138
+
139
+ def reset(self):
140
+ """Reset preprocessor state (for processing new documents)"""
141
+ self.code_blocks = {}
142
+ self.counter = 0
143
+
144
+ def get_code_blocks(self) -> dict[str, str]:
145
+ """
146
+ Return all extracted code blocks (for debugging)
147
+
148
+ Returns:
149
+ Mapping of placeholder → original code block content
150
+ """
151
+ return self.code_blocks
@@ -0,0 +1,100 @@
1
+ """
2
+ Validation Parser Module
3
+
4
+ Provides validation template generation and response parsing for user input validation.
5
+ """
6
+
7
+ import json
8
+ from typing import Any
9
+
10
+ from ..constants import (
11
+ CONTEXT_BUTTON_OPTIONS_TEMPLATE,
12
+ CONTEXT_CONVERSATION_TEMPLATE,
13
+ CONTEXT_QUESTION_MARKER,
14
+ CONTEXT_QUESTION_TEMPLATE,
15
+ VALIDATION_ILLEGAL_DEFAULT_REASON,
16
+ VALIDATION_RESPONSE_ILLEGAL,
17
+ VALIDATION_RESPONSE_OK,
18
+ VALIDATION_TASK_TEMPLATE,
19
+ )
20
+ from .json_parser import parse_json_response
21
+
22
+
23
+ def generate_smart_validation_template(
24
+ target_variable: str,
25
+ context: list[dict[str, Any]] | None = None,
26
+ interaction_question: str | None = None,
27
+ buttons: list[dict[str, str]] | None = None,
28
+ ) -> str:
29
+ """
30
+ Generate smart validation template based on context and question.
31
+
32
+ DEPRECATED: This function is no longer used internally.
33
+ Use _build_validation_messages() in MarkdownFlow class instead.
34
+
35
+ Args:
36
+ target_variable: Target variable name
37
+ context: Context message list with role and content fields
38
+ interaction_question: Question text from interaction block
39
+ buttons: Button options list with display and value fields
40
+
41
+ Returns:
42
+ Generated validation template (for backward compatibility)
43
+ """
44
+ # For backward compatibility, return a simple template
45
+ # This function is no longer used in the core validation flow
46
+ template = VALIDATION_TASK_TEMPLATE.replace("{target_variable}", target_variable)
47
+ template += "\n\n# 用户回答\n{sys_user_input}"
48
+ return template.strip()
49
+
50
+
51
+ def parse_validation_response(llm_response: str, original_input: str, target_variable: str) -> dict[str, Any]:
52
+ """
53
+ Parse LLM validation response, returning standard format.
54
+
55
+ Supports JSON format and natural language text responses.
56
+
57
+ Args:
58
+ llm_response: LLM's raw response
59
+ original_input: User's original input
60
+ target_variable: Target variable name
61
+
62
+ Returns:
63
+ Standardized parsing result with content and variables fields
64
+ """
65
+ try:
66
+ # Try to parse JSON response
67
+ parsed_response = parse_json_response(llm_response)
68
+
69
+ if isinstance(parsed_response, dict):
70
+ result = parsed_response.get("result", "").lower()
71
+
72
+ if result == VALIDATION_RESPONSE_OK:
73
+ # Validation successful
74
+ parse_vars = parsed_response.get("parse_vars", {})
75
+ if target_variable not in parse_vars:
76
+ parse_vars[target_variable] = original_input.strip()
77
+
78
+ # Ensure the variable value is in list format (user_input format)
79
+ if target_variable in parse_vars and not isinstance(parse_vars[target_variable], list):
80
+ parse_vars[target_variable] = [parse_vars[target_variable]]
81
+
82
+ return {"content": "", "variables": parse_vars}
83
+
84
+ if result == VALIDATION_RESPONSE_ILLEGAL:
85
+ # Validation failed
86
+ reason = parsed_response.get("reason", VALIDATION_ILLEGAL_DEFAULT_REASON)
87
+ return {"content": reason, "variables": None}
88
+
89
+ except (json.JSONDecodeError, ValueError, KeyError):
90
+ # JSON parsing failed, fallback to text mode
91
+ pass
92
+
93
+ # Text response parsing (fallback processing)
94
+ response_lower = llm_response.lower()
95
+
96
+ # Check against standard response format
97
+ if "ok" in response_lower or "valid" in response_lower:
98
+ # Return in list format to match user_input format
99
+ return {"content": "", "variables": {target_variable: [original_input.strip()]}}
100
+ return {"content": llm_response, "variables": None}
@@ -0,0 +1,95 @@
1
+ """
2
+ Variable Parser Module
3
+
4
+ Provides variable extraction and replacement functionality for MarkdownFlow documents.
5
+ """
6
+
7
+ import re
8
+
9
+ from ..constants import (
10
+ COMPILED_BRACE_VARIABLE_REGEX,
11
+ COMPILED_PERCENT_VARIABLE_REGEX,
12
+ VARIABLE_DEFAULT_VALUE,
13
+ )
14
+
15
+
16
+ def extract_variables_from_text(text: str) -> list[str]:
17
+ """
18
+ Extract all variable names from text.
19
+
20
+ Recognizes two variable formats:
21
+ - %{{variable_name}} format (preserved variables)
22
+ - {{variable_name}} format (replaceable variables)
23
+
24
+ Args:
25
+ text: Text content to analyze
26
+
27
+ Returns:
28
+ Sorted list of unique variable names
29
+ """
30
+ variables = set()
31
+
32
+ # Match %{{...}} format variables using pre-compiled regex
33
+ matches = COMPILED_PERCENT_VARIABLE_REGEX.findall(text)
34
+ for match in matches:
35
+ variables.add(match.strip())
36
+
37
+ # Match {{...}} format variables (excluding %) using pre-compiled regex
38
+ matches = COMPILED_BRACE_VARIABLE_REGEX.findall(text)
39
+ for match in matches:
40
+ variables.add(match.strip())
41
+
42
+ return sorted(list(variables))
43
+
44
+
45
+ def replace_variables_in_text(text: str, variables: dict[str, str | list[str]]) -> str:
46
+ """
47
+ Replace variables in text, undefined or empty variables are auto-assigned "UNKNOWN".
48
+
49
+ Args:
50
+ text: Text containing variables
51
+ variables: Variable name to value mapping
52
+
53
+ Returns:
54
+ Text with variables replaced
55
+ """
56
+ if not text or not isinstance(text, str):
57
+ return text or ""
58
+
59
+ # Check each variable for null or empty values, assign "UNKNOWN" if so
60
+ if variables:
61
+ for key, value in variables.items():
62
+ if value is None or value == "" or (isinstance(value, list) and not value):
63
+ variables[key] = VARIABLE_DEFAULT_VALUE
64
+
65
+ # Initialize variables as empty dict (if None)
66
+ if not variables:
67
+ variables = {}
68
+
69
+ # Find all {{variable}} format variable references
70
+ variable_pattern = r"\{\{([^{}]+)\}\}"
71
+ matches = re.findall(variable_pattern, text)
72
+
73
+ # Assign "UNKNOWN" to undefined variables
74
+ for var_name in matches:
75
+ var_name = var_name.strip()
76
+ if var_name not in variables:
77
+ variables[var_name] = "UNKNOWN"
78
+
79
+ # Use updated replacement logic, preserve %{{var_name}} format variables
80
+ result = text
81
+ for var_name, var_value in variables.items():
82
+ # Convert value to string based on type
83
+ if isinstance(var_value, list):
84
+ # Multiple values - join with comma
85
+ value_str = ", ".join(str(v) for v in var_value if v is not None and str(v).strip())
86
+ if not value_str:
87
+ value_str = VARIABLE_DEFAULT_VALUE
88
+ else:
89
+ value_str = str(var_value) if var_value is not None else VARIABLE_DEFAULT_VALUE
90
+
91
+ # Use negative lookbehind assertion to exclude %{{var_name}} format
92
+ pattern = f"(?<!%){{{{{re.escape(var_name)}}}}}"
93
+ result = re.sub(pattern, value_str, result)
94
+
95
+ return result
@@ -0,0 +1,16 @@
1
+ """
2
+ Markdown-Flow LLM Providers Module
3
+
4
+ Provides built-in LLM provider implementations.
5
+ """
6
+
7
+ from .config import ProviderConfig
8
+ from .openai import OpenAIProvider, create_default_provider, create_provider
9
+
10
+
11
+ __all__ = [
12
+ "ProviderConfig",
13
+ "OpenAIProvider",
14
+ "create_provider",
15
+ "create_default_provider",
16
+ ]
@@ -0,0 +1,46 @@
1
+ """
2
+ Provider Configuration Module
3
+
4
+ Provides configuration classes for LLM providers.
5
+ """
6
+
7
+ import os
8
+ from dataclasses import dataclass, field
9
+
10
+
11
+ @dataclass
12
+ class ProviderConfig:
13
+ """
14
+ Configuration for LLM providers.
15
+
16
+ Supports environment variable defaults for easy configuration.
17
+ """
18
+
19
+ api_key: str = field(default_factory=lambda: os.getenv("LLM_API_KEY", ""))
20
+ """API key for the LLM service. Default: LLM_API_KEY environment variable."""
21
+
22
+ base_url: str = field(default_factory=lambda: os.getenv("LLM_BASE_URL", "https://api.openai.com/v1"))
23
+ """Base URL for the API endpoint. Default: LLM_BASE_URL environment variable or OpenAI default."""
24
+
25
+ model: str = field(default_factory=lambda: os.getenv("LLM_MODEL", "gpt-3.5-turbo"))
26
+ """Default model name. Default: LLM_MODEL environment variable or gpt-3.5-turbo."""
27
+
28
+ temperature: float = field(default_factory=lambda: float(os.getenv("LLM_TEMPERATURE", "0.7")))
29
+ """Default temperature (0.0-2.0). Default: LLM_TEMPERATURE environment variable or 0.7."""
30
+
31
+ debug: bool = field(default_factory=lambda: os.getenv("LLM_DEBUG", "false").lower() in ("true", "1", "yes"))
32
+ """Enable debug mode (colorized console output). Default: LLM_DEBUG environment variable or False."""
33
+
34
+ timeout: float | None = field(default_factory=lambda: float(os.getenv("LLM_TIMEOUT")) if os.getenv("LLM_TIMEOUT") else None)
35
+ """Request timeout in seconds. None means no timeout. Default: LLM_TIMEOUT environment variable or None."""
36
+
37
+ def __post_init__(self):
38
+ """Validate configuration after initialization."""
39
+ if not self.api_key:
40
+ raise ValueError("API key is required. Set it via ProviderConfig(api_key='...') or LLM_API_KEY environment variable.")
41
+
42
+ if self.temperature < 0.0 or self.temperature > 2.0:
43
+ raise ValueError(f"Temperature must be between 0.0 and 2.0, got {self.temperature}")
44
+
45
+ if self.timeout is not None and self.timeout <= 0:
46
+ raise ValueError(f"Timeout must be positive or None, got {self.timeout}")