markdown-flow 0.2.16__py3-none-any.whl → 0.2.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of markdown-flow might be problematic. Click here for more details.

@@ -0,0 +1,215 @@
1
+ """
2
+ Output Parser Module
3
+
4
+ Handles output instructions and preserved content processing for MarkdownFlow documents.
5
+ """
6
+
7
+ import re
8
+
9
+ from ..constants import (
10
+ COMPILED_INLINE_PRESERVE_REGEX,
11
+ COMPILED_PRESERVE_FENCE_REGEX,
12
+ OUTPUT_INSTRUCTION_PREFIX,
13
+ OUTPUT_INSTRUCTION_SUFFIX,
14
+ )
15
+
16
+
17
+ def is_preserved_content_block(content: str) -> bool:
18
+ """
19
+ Check if content is completely preserved content block.
20
+
21
+ Preserved blocks are entirely wrapped by markers with no external content.
22
+ Supports inline (===content===), multiline (!=== ... !===) formats, and mixed formats.
23
+
24
+ Args:
25
+ content: Content to check
26
+
27
+ Returns:
28
+ True if content is fully wrapped by preserved markers
29
+ """
30
+ content = content.strip()
31
+ if not content:
32
+ return False
33
+
34
+ lines = content.split("\n")
35
+
36
+ # Use state machine to validate that all non-empty content is preserved
37
+ state = "OUTSIDE" # States: OUTSIDE, INSIDE
38
+ has_preserve_content = False
39
+
40
+ for line in lines:
41
+ stripped_line = line.strip()
42
+
43
+ # Check if this line is a fence marker (!===)
44
+ if COMPILED_PRESERVE_FENCE_REGEX.match(stripped_line):
45
+ if state == "OUTSIDE":
46
+ # Enter preserve block
47
+ state = "INSIDE"
48
+ has_preserve_content = True
49
+ elif state == "INSIDE":
50
+ # Exit preserve block
51
+ state = "OUTSIDE"
52
+ # Fence markers themselves are valid preserved content
53
+ continue
54
+
55
+ # Non-fence lines
56
+ if stripped_line: # Non-empty line
57
+ if state == "INSIDE":
58
+ # Inside fence block, this is valid preserved content
59
+ has_preserve_content = True
60
+ else:
61
+ # Outside fence block, check if it's inline format
62
+ match = COMPILED_INLINE_PRESERVE_REGEX.match(stripped_line)
63
+ if match:
64
+ # Ensure inner content exists and contains no ===
65
+ inner_content = match.group(1).strip()
66
+ if inner_content and "===" not in inner_content:
67
+ # Valid inline format
68
+ has_preserve_content = True
69
+ else:
70
+ # Invalid inline format
71
+ return False
72
+ else:
73
+ # Not fence, not inline format -> external content
74
+ return False
75
+
76
+ # Judgment conditions:
77
+ # 1. Must have preserved content
78
+ # 2. Final state must be OUTSIDE (all fence blocks closed)
79
+ return has_preserve_content and state == "OUTSIDE"
80
+
81
+
82
+ def process_output_instructions(content: str) -> tuple[str, bool]:
83
+ """
84
+ Process output instruction markers, converting !=== format to [output] format.
85
+
86
+ Uses unified state machine to handle inline (===content===) and multiline (!===...!===) formats.
87
+
88
+ Args:
89
+ content: Raw content containing output instructions
90
+
91
+ Returns:
92
+ Tuple of (processed_content, has_preserved_content):
93
+ - processed_content: Content with === and !=== markers converted to XML format
94
+ - has_preserved_content: True if content contained preserved markers
95
+ """
96
+ lines = content.split("\n")
97
+ result_lines = []
98
+ i = 0
99
+ has_output_instruction = False
100
+
101
+ while i < len(lines):
102
+ line = lines[i]
103
+
104
+ # Check if contains preserved markers (inline ===...=== or multiline !===...)
105
+ # Check inline format first: ===content===
106
+ inline_match = re.search(r"===\s*(.+?)\s*===", line)
107
+ if inline_match and line.count("===") == 2 and not line.strip().startswith("!"):
108
+ inner_content = inline_match.group(1).strip()
109
+ # Validate that inner content doesn't contain ===
110
+ if not inner_content or "===" in inner_content:
111
+ result_lines.append(line)
112
+ i += 1
113
+ continue
114
+ # Process inline format
115
+ full_match = inline_match.group(0)
116
+
117
+ # Build output instruction - keep inline format on same line
118
+ output_instruction = f"{OUTPUT_INSTRUCTION_PREFIX}{inner_content}{OUTPUT_INSTRUCTION_SUFFIX}"
119
+
120
+ # Replace ===...=== part in original line
121
+ processed_line = line.replace(full_match, output_instruction)
122
+ result_lines.append(processed_line)
123
+ has_output_instruction = True
124
+ i += 1
125
+
126
+ elif COMPILED_PRESERVE_FENCE_REGEX.match(line.strip()):
127
+ # Multiline format start
128
+ i += 1
129
+ output_content_lines: list[str] = []
130
+
131
+ # Collect multiline content
132
+ while i < len(lines):
133
+ current_line = lines[i]
134
+ if COMPILED_PRESERVE_FENCE_REGEX.match(current_line.strip()):
135
+ # Found end marker, process collected content
136
+ output_content = "\n".join(output_content_lines).strip()
137
+
138
+ # Special handling for title format (maintain original logic)
139
+ hash_prefix = ""
140
+ if output_content.startswith("#"):
141
+ first_space = output_content.find(" ")
142
+ first_newline = output_content.find("\n")
143
+
144
+ if first_space != -1 and (first_newline == -1 or first_space < first_newline):
145
+ hash_prefix = output_content[: first_space + 1]
146
+ output_content = output_content[first_space + 1 :].strip()
147
+ elif first_newline != -1:
148
+ hash_prefix = output_content[: first_newline + 1]
149
+ output_content = output_content[first_newline + 1 :].strip()
150
+
151
+ # Build output instruction
152
+ if hash_prefix:
153
+ result_lines.append(f"{OUTPUT_INSTRUCTION_PREFIX}{hash_prefix}{output_content}{OUTPUT_INSTRUCTION_SUFFIX}")
154
+ else:
155
+ result_lines.append(f"{OUTPUT_INSTRUCTION_PREFIX}{output_content}{OUTPUT_INSTRUCTION_SUFFIX}")
156
+
157
+ has_output_instruction = True
158
+ i += 1
159
+ break
160
+ # Continue collecting content
161
+ output_content_lines.append(current_line) # type: ignore[unreachable]
162
+ i += 1
163
+ else:
164
+ # No end marker found, rollback processing
165
+ result_lines.append(lines[i - len(output_content_lines) - 1])
166
+ result_lines.extend(output_content_lines)
167
+ else:
168
+ # Normal line
169
+ result_lines.append(line) # type: ignore[unreachable]
170
+ i += 1
171
+
172
+ # Assemble final content
173
+ processed_content = "\n".join(result_lines)
174
+
175
+ # Return both processed content and whether it contains preserved content
176
+ return processed_content, has_output_instruction
177
+
178
+
179
+ def extract_preserved_content(content: str) -> str:
180
+ """
181
+ Extract actual content from preserved content blocks, removing markers.
182
+
183
+ Handles inline (===content===) and multiline (!===...!===) formats.
184
+
185
+ Args:
186
+ content: Preserved content containing preserved markers
187
+
188
+ Returns:
189
+ Actual content with === and !=== markers removed
190
+ """
191
+ content = content.strip()
192
+ if not content:
193
+ return ""
194
+
195
+ lines = content.split("\n")
196
+ result_lines = []
197
+
198
+ for line in lines:
199
+ stripped_line = line.strip()
200
+
201
+ # Check inline format: ===content===
202
+ inline_match = COMPILED_INLINE_PRESERVE_REGEX.match(stripped_line)
203
+ if inline_match:
204
+ # Inline format, extract middle content
205
+ inner_content = inline_match.group(1).strip()
206
+ if inner_content and "===" not in inner_content:
207
+ result_lines.append(inner_content)
208
+ elif COMPILED_PRESERVE_FENCE_REGEX.match(stripped_line): # type: ignore[unreachable]
209
+ # Multiline format delimiter, skip
210
+ continue
211
+ else:
212
+ # Normal content line, keep
213
+ result_lines.append(line)
214
+
215
+ return "\n".join(result_lines)
@@ -0,0 +1,121 @@
1
+ """
2
+ Validation Parser Module
3
+
4
+ Provides validation template generation and response parsing for user input validation.
5
+ """
6
+
7
+ import json
8
+ from typing import Any
9
+
10
+ from ..constants import (
11
+ CONTEXT_BUTTON_OPTIONS_TEMPLATE,
12
+ CONTEXT_CONVERSATION_TEMPLATE,
13
+ CONTEXT_QUESTION_MARKER,
14
+ CONTEXT_QUESTION_TEMPLATE,
15
+ SMART_VALIDATION_TEMPLATE,
16
+ VALIDATION_ILLEGAL_DEFAULT_REASON,
17
+ VALIDATION_RESPONSE_ILLEGAL,
18
+ VALIDATION_RESPONSE_OK,
19
+ )
20
+ from .json_parser import parse_json_response
21
+
22
+
23
+ def generate_smart_validation_template(
24
+ target_variable: str,
25
+ context: list[dict[str, Any]] | None = None,
26
+ interaction_question: str | None = None,
27
+ buttons: list[dict[str, str]] | None = None,
28
+ ) -> str:
29
+ """
30
+ Generate smart validation template based on context and question.
31
+
32
+ Args:
33
+ target_variable: Target variable name
34
+ context: Context message list with role and content fields
35
+ interaction_question: Question text from interaction block
36
+ buttons: Button options list with display and value fields
37
+
38
+ Returns:
39
+ Generated validation template
40
+ """
41
+ # Build context information
42
+ context_info = ""
43
+ if interaction_question or context or buttons:
44
+ context_parts = []
45
+
46
+ # Add question information (most important, put first)
47
+ if interaction_question:
48
+ context_parts.append(CONTEXT_QUESTION_TEMPLATE.format(question=interaction_question))
49
+
50
+ # Add button options information
51
+ if buttons:
52
+ button_displays = [btn.get("display", "") for btn in buttons if btn.get("display")]
53
+ if button_displays:
54
+ button_options_str = ", ".join(button_displays)
55
+ button_info = CONTEXT_BUTTON_OPTIONS_TEMPLATE.format(button_options=button_options_str)
56
+ context_parts.append(button_info)
57
+
58
+ # Add conversation context
59
+ if context:
60
+ for msg in context:
61
+ if msg.get("role") == "assistant" and CONTEXT_QUESTION_MARKER not in msg.get("content", ""):
62
+ # Other assistant messages as context (exclude extracted questions)
63
+ context_parts.append(CONTEXT_CONVERSATION_TEMPLATE.format(content=msg.get("content", "")))
64
+
65
+ if context_parts:
66
+ context_info = "\n\n".join(context_parts)
67
+
68
+ # Use template from constants
69
+ # Note: {sys_user_input} will be replaced later in _build_validation_messages
70
+ return SMART_VALIDATION_TEMPLATE.format(
71
+ target_variable=target_variable,
72
+ context_info=context_info,
73
+ sys_user_input="{sys_user_input}", # Keep placeholder for later replacement
74
+ ).strip()
75
+
76
+
77
+ def parse_validation_response(llm_response: str, original_input: str, target_variable: str) -> dict[str, Any]:
78
+ """
79
+ Parse LLM validation response, returning standard format.
80
+
81
+ Supports JSON format and natural language text responses.
82
+
83
+ Args:
84
+ llm_response: LLM's raw response
85
+ original_input: User's original input
86
+ target_variable: Target variable name
87
+
88
+ Returns:
89
+ Standardized parsing result with content and variables fields
90
+ """
91
+ try:
92
+ # Try to parse JSON response
93
+ parsed_response = parse_json_response(llm_response)
94
+
95
+ if isinstance(parsed_response, dict):
96
+ result = parsed_response.get("result", "").lower()
97
+
98
+ if result == VALIDATION_RESPONSE_OK:
99
+ # Validation successful
100
+ parse_vars = parsed_response.get("parse_vars", {})
101
+ if target_variable not in parse_vars:
102
+ parse_vars[target_variable] = original_input.strip()
103
+
104
+ return {"content": "", "variables": parse_vars}
105
+
106
+ if result == VALIDATION_RESPONSE_ILLEGAL:
107
+ # Validation failed
108
+ reason = parsed_response.get("reason", VALIDATION_ILLEGAL_DEFAULT_REASON)
109
+ return {"content": reason, "variables": None}
110
+
111
+ except (json.JSONDecodeError, ValueError, KeyError):
112
+ # JSON parsing failed, fallback to text mode
113
+ pass
114
+
115
+ # Text response parsing (fallback processing)
116
+ response_lower = llm_response.lower()
117
+
118
+ # Check against standard response format
119
+ if "ok" in response_lower or "valid" in response_lower:
120
+ return {"content": "", "variables": {target_variable: original_input.strip()}}
121
+ return {"content": llm_response, "variables": None}
@@ -0,0 +1,95 @@
1
+ """
2
+ Variable Parser Module
3
+
4
+ Provides variable extraction and replacement functionality for MarkdownFlow documents.
5
+ """
6
+
7
+ import re
8
+
9
+ from ..constants import (
10
+ COMPILED_BRACE_VARIABLE_REGEX,
11
+ COMPILED_PERCENT_VARIABLE_REGEX,
12
+ VARIABLE_DEFAULT_VALUE,
13
+ )
14
+
15
+
16
+ def extract_variables_from_text(text: str) -> list[str]:
17
+ """
18
+ Extract all variable names from text.
19
+
20
+ Recognizes two variable formats:
21
+ - %{{variable_name}} format (preserved variables)
22
+ - {{variable_name}} format (replaceable variables)
23
+
24
+ Args:
25
+ text: Text content to analyze
26
+
27
+ Returns:
28
+ Sorted list of unique variable names
29
+ """
30
+ variables = set()
31
+
32
+ # Match %{{...}} format variables using pre-compiled regex
33
+ matches = COMPILED_PERCENT_VARIABLE_REGEX.findall(text)
34
+ for match in matches:
35
+ variables.add(match.strip())
36
+
37
+ # Match {{...}} format variables (excluding %) using pre-compiled regex
38
+ matches = COMPILED_BRACE_VARIABLE_REGEX.findall(text)
39
+ for match in matches:
40
+ variables.add(match.strip())
41
+
42
+ return sorted(list(variables))
43
+
44
+
45
+ def replace_variables_in_text(text: str, variables: dict[str, str | list[str]]) -> str:
46
+ """
47
+ Replace variables in text, undefined or empty variables are auto-assigned "UNKNOWN".
48
+
49
+ Args:
50
+ text: Text containing variables
51
+ variables: Variable name to value mapping
52
+
53
+ Returns:
54
+ Text with variables replaced
55
+ """
56
+ if not text or not isinstance(text, str):
57
+ return text or ""
58
+
59
+ # Check each variable for null or empty values, assign "UNKNOWN" if so
60
+ if variables:
61
+ for key, value in variables.items():
62
+ if value is None or value == "" or (isinstance(value, list) and not value):
63
+ variables[key] = VARIABLE_DEFAULT_VALUE
64
+
65
+ # Initialize variables as empty dict (if None)
66
+ if not variables:
67
+ variables = {}
68
+
69
+ # Find all {{variable}} format variable references
70
+ variable_pattern = r"\{\{([^{}]+)\}\}"
71
+ matches = re.findall(variable_pattern, text)
72
+
73
+ # Assign "UNKNOWN" to undefined variables
74
+ for var_name in matches:
75
+ var_name = var_name.strip()
76
+ if var_name not in variables:
77
+ variables[var_name] = "UNKNOWN"
78
+
79
+ # Use updated replacement logic, preserve %{{var_name}} format variables
80
+ result = text
81
+ for var_name, var_value in variables.items():
82
+ # Convert value to string based on type
83
+ if isinstance(var_value, list):
84
+ # Multiple values - join with comma
85
+ value_str = ", ".join(str(v) for v in var_value if v is not None and str(v).strip())
86
+ if not value_str:
87
+ value_str = VARIABLE_DEFAULT_VALUE
88
+ else:
89
+ value_str = str(var_value) if var_value is not None else VARIABLE_DEFAULT_VALUE
90
+
91
+ # Use negative lookbehind assertion to exclude %{{var_name}} format
92
+ pattern = f"(?<!%){{{{{re.escape(var_name)}}}}}"
93
+ result = re.sub(pattern, value_str, result)
94
+
95
+ return result
@@ -0,0 +1,15 @@
1
+ """
2
+ Markdown-Flow LLM Providers Module
3
+
4
+ Provides built-in LLM provider implementations.
5
+ """
6
+
7
+ from .config import ProviderConfig
8
+ from .openai import OpenAIProvider, create_provider, create_default_provider
9
+
10
+ __all__ = [
11
+ "ProviderConfig",
12
+ "OpenAIProvider",
13
+ "create_provider",
14
+ "create_default_provider",
15
+ ]
@@ -0,0 +1,51 @@
1
+ """
2
+ Provider Configuration Module
3
+
4
+ Provides configuration classes for LLM providers.
5
+ """
6
+
7
+ import os
8
+ from dataclasses import dataclass, field
9
+
10
+
11
+ @dataclass
12
+ class ProviderConfig:
13
+ """
14
+ Configuration for LLM providers.
15
+
16
+ Supports environment variable defaults for easy configuration.
17
+ """
18
+
19
+ api_key: str = field(default_factory=lambda: os.getenv("LLM_API_KEY", ""))
20
+ """API key for the LLM service. Default: LLM_API_KEY environment variable."""
21
+
22
+ base_url: str = field(default_factory=lambda: os.getenv("LLM_BASE_URL", "https://api.openai.com/v1"))
23
+ """Base URL for the API endpoint. Default: LLM_BASE_URL environment variable or OpenAI default."""
24
+
25
+ model: str = field(default_factory=lambda: os.getenv("LLM_MODEL", "gpt-3.5-turbo"))
26
+ """Default model name. Default: LLM_MODEL environment variable or gpt-3.5-turbo."""
27
+
28
+ temperature: float = field(default_factory=lambda: float(os.getenv("LLM_TEMPERATURE", "0.7")))
29
+ """Default temperature (0.0-2.0). Default: LLM_TEMPERATURE environment variable or 0.7."""
30
+
31
+ debug: bool = field(default_factory=lambda: os.getenv("LLM_DEBUG", "false").lower() in ("true", "1", "yes"))
32
+ """Enable debug mode (colorized console output). Default: LLM_DEBUG environment variable or False."""
33
+
34
+ timeout: float | None = field(
35
+ default_factory=lambda: float(os.getenv("LLM_TIMEOUT")) if os.getenv("LLM_TIMEOUT") else None
36
+ )
37
+ """Request timeout in seconds. None means no timeout. Default: LLM_TIMEOUT environment variable or None."""
38
+
39
+ def __post_init__(self):
40
+ """Validate configuration after initialization."""
41
+ if not self.api_key:
42
+ raise ValueError(
43
+ "API key is required. Set it via ProviderConfig(api_key='...') "
44
+ "or LLM_API_KEY environment variable."
45
+ )
46
+
47
+ if self.temperature < 0.0 or self.temperature > 2.0:
48
+ raise ValueError(f"Temperature must be between 0.0 and 2.0, got {self.temperature}")
49
+
50
+ if self.timeout is not None and self.timeout <= 0:
51
+ raise ValueError(f"Timeout must be positive or None, got {self.timeout}")