markdown-flow 0.2.18__py3-none-any.whl → 0.2.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of markdown-flow might be problematic. Click here for more details.

@@ -0,0 +1,121 @@
1
+ """
2
+ Validation Parser Module
3
+
4
+ Provides validation template generation and response parsing for user input validation.
5
+ """
6
+
7
+ import json
8
+ from typing import Any
9
+
10
+ from ..constants import (
11
+ CONTEXT_BUTTON_OPTIONS_TEMPLATE,
12
+ CONTEXT_CONVERSATION_TEMPLATE,
13
+ CONTEXT_QUESTION_MARKER,
14
+ CONTEXT_QUESTION_TEMPLATE,
15
+ SMART_VALIDATION_TEMPLATE,
16
+ VALIDATION_ILLEGAL_DEFAULT_REASON,
17
+ VALIDATION_RESPONSE_ILLEGAL,
18
+ VALIDATION_RESPONSE_OK,
19
+ )
20
+ from .json_parser import parse_json_response
21
+
22
+
23
+ def generate_smart_validation_template(
24
+ target_variable: str,
25
+ context: list[dict[str, Any]] | None = None,
26
+ interaction_question: str | None = None,
27
+ buttons: list[dict[str, str]] | None = None,
28
+ ) -> str:
29
+ """
30
+ Generate smart validation template based on context and question.
31
+
32
+ Args:
33
+ target_variable: Target variable name
34
+ context: Context message list with role and content fields
35
+ interaction_question: Question text from interaction block
36
+ buttons: Button options list with display and value fields
37
+
38
+ Returns:
39
+ Generated validation template
40
+ """
41
+ # Build context information
42
+ context_info = ""
43
+ if interaction_question or context or buttons:
44
+ context_parts = []
45
+
46
+ # Add question information (most important, put first)
47
+ if interaction_question:
48
+ context_parts.append(CONTEXT_QUESTION_TEMPLATE.format(question=interaction_question))
49
+
50
+ # Add button options information
51
+ if buttons:
52
+ button_displays = [btn.get("display", "") for btn in buttons if btn.get("display")]
53
+ if button_displays:
54
+ button_options_str = ", ".join(button_displays)
55
+ button_info = CONTEXT_BUTTON_OPTIONS_TEMPLATE.format(button_options=button_options_str)
56
+ context_parts.append(button_info)
57
+
58
+ # Add conversation context
59
+ if context:
60
+ for msg in context:
61
+ if msg.get("role") == "assistant" and CONTEXT_QUESTION_MARKER not in msg.get("content", ""):
62
+ # Other assistant messages as context (exclude extracted questions)
63
+ context_parts.append(CONTEXT_CONVERSATION_TEMPLATE.format(content=msg.get("content", "")))
64
+
65
+ if context_parts:
66
+ context_info = "\n\n".join(context_parts)
67
+
68
+ # Use template from constants
69
+ # Note: {sys_user_input} will be replaced later in _build_validation_messages
70
+ return SMART_VALIDATION_TEMPLATE.format(
71
+ target_variable=target_variable,
72
+ context_info=context_info,
73
+ sys_user_input="{sys_user_input}", # Keep placeholder for later replacement
74
+ ).strip()
75
+
76
+
77
+ def parse_validation_response(llm_response: str, original_input: str, target_variable: str) -> dict[str, Any]:
78
+ """
79
+ Parse LLM validation response, returning standard format.
80
+
81
+ Supports JSON format and natural language text responses.
82
+
83
+ Args:
84
+ llm_response: LLM's raw response
85
+ original_input: User's original input
86
+ target_variable: Target variable name
87
+
88
+ Returns:
89
+ Standardized parsing result with content and variables fields
90
+ """
91
+ try:
92
+ # Try to parse JSON response
93
+ parsed_response = parse_json_response(llm_response)
94
+
95
+ if isinstance(parsed_response, dict):
96
+ result = parsed_response.get("result", "").lower()
97
+
98
+ if result == VALIDATION_RESPONSE_OK:
99
+ # Validation successful
100
+ parse_vars = parsed_response.get("parse_vars", {})
101
+ if target_variable not in parse_vars:
102
+ parse_vars[target_variable] = original_input.strip()
103
+
104
+ return {"content": "", "variables": parse_vars}
105
+
106
+ if result == VALIDATION_RESPONSE_ILLEGAL:
107
+ # Validation failed
108
+ reason = parsed_response.get("reason", VALIDATION_ILLEGAL_DEFAULT_REASON)
109
+ return {"content": reason, "variables": None}
110
+
111
+ except (json.JSONDecodeError, ValueError, KeyError):
112
+ # JSON parsing failed, fallback to text mode
113
+ pass
114
+
115
+ # Text response parsing (fallback processing)
116
+ response_lower = llm_response.lower()
117
+
118
+ # Check against standard response format
119
+ if "ok" in response_lower or "valid" in response_lower:
120
+ return {"content": "", "variables": {target_variable: original_input.strip()}}
121
+ return {"content": llm_response, "variables": None}
@@ -0,0 +1,95 @@
1
+ """
2
+ Variable Parser Module
3
+
4
+ Provides variable extraction and replacement functionality for MarkdownFlow documents.
5
+ """
6
+
7
+ import re
8
+
9
+ from ..constants import (
10
+ COMPILED_BRACE_VARIABLE_REGEX,
11
+ COMPILED_PERCENT_VARIABLE_REGEX,
12
+ VARIABLE_DEFAULT_VALUE,
13
+ )
14
+
15
+
16
+ def extract_variables_from_text(text: str) -> list[str]:
17
+ """
18
+ Extract all variable names from text.
19
+
20
+ Recognizes two variable formats:
21
+ - %{{variable_name}} format (preserved variables)
22
+ - {{variable_name}} format (replaceable variables)
23
+
24
+ Args:
25
+ text: Text content to analyze
26
+
27
+ Returns:
28
+ Sorted list of unique variable names
29
+ """
30
+ variables = set()
31
+
32
+ # Match %{{...}} format variables using pre-compiled regex
33
+ matches = COMPILED_PERCENT_VARIABLE_REGEX.findall(text)
34
+ for match in matches:
35
+ variables.add(match.strip())
36
+
37
+ # Match {{...}} format variables (excluding %) using pre-compiled regex
38
+ matches = COMPILED_BRACE_VARIABLE_REGEX.findall(text)
39
+ for match in matches:
40
+ variables.add(match.strip())
41
+
42
+ return sorted(list(variables))
43
+
44
+
45
+ def replace_variables_in_text(text: str, variables: dict[str, str | list[str]]) -> str:
46
+ """
47
+ Replace variables in text, undefined or empty variables are auto-assigned "UNKNOWN".
48
+
49
+ Args:
50
+ text: Text containing variables
51
+ variables: Variable name to value mapping
52
+
53
+ Returns:
54
+ Text with variables replaced
55
+ """
56
+ if not text or not isinstance(text, str):
57
+ return text or ""
58
+
59
+ # Check each variable for null or empty values, assign "UNKNOWN" if so
60
+ if variables:
61
+ for key, value in variables.items():
62
+ if value is None or value == "" or (isinstance(value, list) and not value):
63
+ variables[key] = VARIABLE_DEFAULT_VALUE
64
+
65
+ # Initialize variables as empty dict (if None)
66
+ if not variables:
67
+ variables = {}
68
+
69
+ # Find all {{variable}} format variable references
70
+ variable_pattern = r"\{\{([^{}]+)\}\}"
71
+ matches = re.findall(variable_pattern, text)
72
+
73
+ # Assign "UNKNOWN" to undefined variables
74
+ for var_name in matches:
75
+ var_name = var_name.strip()
76
+ if var_name not in variables:
77
+ variables[var_name] = "UNKNOWN"
78
+
79
+ # Use updated replacement logic, preserve %{{var_name}} format variables
80
+ result = text
81
+ for var_name, var_value in variables.items():
82
+ # Convert value to string based on type
83
+ if isinstance(var_value, list):
84
+ # Multiple values - join with comma
85
+ value_str = ", ".join(str(v) for v in var_value if v is not None and str(v).strip())
86
+ if not value_str:
87
+ value_str = VARIABLE_DEFAULT_VALUE
88
+ else:
89
+ value_str = str(var_value) if var_value is not None else VARIABLE_DEFAULT_VALUE
90
+
91
+ # Use negative lookbehind assertion to exclude %{{var_name}} format
92
+ pattern = f"(?<!%){{{{{re.escape(var_name)}}}}}"
93
+ result = re.sub(pattern, value_str, result)
94
+
95
+ return result
@@ -0,0 +1,15 @@
1
+ """
2
+ Markdown-Flow LLM Providers Module
3
+
4
+ Provides built-in LLM provider implementations.
5
+ """
6
+
7
+ from .config import ProviderConfig
8
+ from .openai import OpenAIProvider, create_provider, create_default_provider
9
+
10
+ __all__ = [
11
+ "ProviderConfig",
12
+ "OpenAIProvider",
13
+ "create_provider",
14
+ "create_default_provider",
15
+ ]
@@ -0,0 +1,51 @@
1
+ """
2
+ Provider Configuration Module
3
+
4
+ Provides configuration classes for LLM providers.
5
+ """
6
+
7
+ import os
8
+ from dataclasses import dataclass, field
9
+
10
+
11
+ @dataclass
12
+ class ProviderConfig:
13
+ """
14
+ Configuration for LLM providers.
15
+
16
+ Supports environment variable defaults for easy configuration.
17
+ """
18
+
19
+ api_key: str = field(default_factory=lambda: os.getenv("LLM_API_KEY", ""))
20
+ """API key for the LLM service. Default: LLM_API_KEY environment variable."""
21
+
22
+ base_url: str = field(default_factory=lambda: os.getenv("LLM_BASE_URL", "https://api.openai.com/v1"))
23
+ """Base URL for the API endpoint. Default: LLM_BASE_URL environment variable or OpenAI default."""
24
+
25
+ model: str = field(default_factory=lambda: os.getenv("LLM_MODEL", "gpt-3.5-turbo"))
26
+ """Default model name. Default: LLM_MODEL environment variable or gpt-3.5-turbo."""
27
+
28
+ temperature: float = field(default_factory=lambda: float(os.getenv("LLM_TEMPERATURE", "0.7")))
29
+ """Default temperature (0.0-2.0). Default: LLM_TEMPERATURE environment variable or 0.7."""
30
+
31
+ debug: bool = field(default_factory=lambda: os.getenv("LLM_DEBUG", "false").lower() in ("true", "1", "yes"))
32
+ """Enable debug mode (colorized console output). Default: LLM_DEBUG environment variable or False."""
33
+
34
+ timeout: float | None = field(
35
+ default_factory=lambda: float(os.getenv("LLM_TIMEOUT")) if os.getenv("LLM_TIMEOUT") else None
36
+ )
37
+ """Request timeout in seconds. None means no timeout. Default: LLM_TIMEOUT environment variable or None."""
38
+
39
+ def __post_init__(self):
40
+ """Validate configuration after initialization."""
41
+ if not self.api_key:
42
+ raise ValueError(
43
+ "API key is required. Set it via ProviderConfig(api_key='...') "
44
+ "or LLM_API_KEY environment variable."
45
+ )
46
+
47
+ if self.temperature < 0.0 or self.temperature > 2.0:
48
+ raise ValueError(f"Temperature must be between 0.0 and 2.0, got {self.temperature}")
49
+
50
+ if self.timeout is not None and self.timeout <= 0:
51
+ raise ValueError(f"Timeout must be positive or None, got {self.timeout}")
@@ -0,0 +1,371 @@
1
+ """
2
+ OpenAI-Compatible Provider Implementation
3
+
4
+ Provides a production-ready OpenAI-compatible LLM provider with debug mode,
5
+ token tracking, and comprehensive metadata.
6
+ """
7
+
8
+ import time
9
+ from collections.abc import Generator
10
+ from typing import Any
11
+
12
+ from ..llm import LLMProvider
13
+ from .config import ProviderConfig
14
+
15
+ try:
16
+ from openai import OpenAI
17
+ except ImportError:
18
+ OpenAI = None # type: ignore[misc, assignment]
19
+
20
+
21
+ class OpenAIProvider(LLMProvider):
22
+ """
23
+ OpenAI-compatible LLM provider implementation.
24
+
25
+ Features:
26
+ - Debug mode with colorized console output
27
+ - Automatic token usage tracking
28
+ - Comprehensive metadata (model, temperature, processing time, tokens, timestamp)
29
+ - Instance-level model/temperature override support
30
+ - Streaming and non-streaming modes
31
+ """
32
+
33
+ def __init__(self, config: ProviderConfig):
34
+ """
35
+ Initialize OpenAI provider.
36
+
37
+ Args:
38
+ config: Provider configuration
39
+
40
+ Raises:
41
+ ImportError: If openai package is not installed
42
+ ValueError: If configuration is invalid
43
+ """
44
+ if OpenAI is None:
45
+ raise ImportError(
46
+ "The 'openai' package is required for OpenAIProvider. "
47
+ "Install it with: pip install openai"
48
+ )
49
+
50
+ self.config = config
51
+ self.client = OpenAI(
52
+ api_key=config.api_key,
53
+ base_url=config.base_url,
54
+ timeout=config.timeout,
55
+ )
56
+ self._last_metadata: dict[str, Any] = {}
57
+
58
+ def complete(
59
+ self,
60
+ messages: list[dict[str, str]],
61
+ model: str | None = None,
62
+ temperature: float | None = None,
63
+ ) -> str:
64
+ """
65
+ Non-streaming LLM call.
66
+
67
+ Args:
68
+ messages: Message list
69
+ model: Optional model override
70
+ temperature: Optional temperature override
71
+
72
+ Returns:
73
+ LLM response content
74
+
75
+ Raises:
76
+ Exception: If API call fails
77
+ """
78
+ # Determine actual model and temperature (instance override > provider default)
79
+ actual_model = model if model is not None else self.config.model
80
+ actual_temperature = temperature if temperature is not None else self.config.temperature
81
+
82
+ # Debug output: Request info
83
+ if self.config.debug:
84
+ self._print_request_info(messages, actual_model, actual_temperature)
85
+
86
+ # Format messages
87
+ formatted_messages = self._format_messages(messages)
88
+
89
+ # Record start time
90
+ start_time = time.time()
91
+
92
+ try:
93
+ # Make API call
94
+ response = self.client.chat.completions.create(
95
+ model=actual_model,
96
+ messages=formatted_messages,
97
+ temperature=actual_temperature,
98
+ )
99
+
100
+ # Calculate processing time
101
+ processing_time_ms = int((time.time() - start_time) * 1000)
102
+
103
+ # Extract content
104
+ if not response.choices or len(response.choices) == 0:
105
+ raise Exception("API response error: no choices returned")
106
+
107
+ choice = response.choices[0]
108
+ if not choice.message:
109
+ raise Exception("Response has no message field")
110
+
111
+ content = choice.message.content or ""
112
+
113
+ # Extract token usage
114
+ usage = response.usage
115
+ metadata = {
116
+ "model": actual_model,
117
+ "temperature": actual_temperature,
118
+ "provider": "openai-compatible",
119
+ "processing_time": processing_time_ms,
120
+ "timestamp": int(time.time()),
121
+ }
122
+
123
+ if usage:
124
+ metadata.update(
125
+ {
126
+ "prompt_tokens": usage.prompt_tokens,
127
+ "output_tokens": usage.completion_tokens,
128
+ "total_tokens": usage.total_tokens,
129
+ }
130
+ )
131
+
132
+ # Save metadata for retrieval by MarkdownFlow
133
+ self._last_metadata = metadata
134
+
135
+ # Debug output: Response metadata
136
+ if self.config.debug:
137
+ self._print_response_metadata(metadata)
138
+
139
+ return content
140
+
141
+ except Exception as e:
142
+ raise Exception(f"API request failed: {str(e)}") from e
143
+
144
+ def stream(
145
+ self,
146
+ messages: list[dict[str, str]],
147
+ model: str | None = None,
148
+ temperature: float | None = None,
149
+ ) -> Generator[str, None, None]:
150
+ """
151
+ Streaming LLM call.
152
+
153
+ Args:
154
+ messages: Message list
155
+ model: Optional model override
156
+ temperature: Optional temperature override
157
+
158
+ Yields:
159
+ Incremental LLM response content
160
+
161
+ Raises:
162
+ Exception: If API call fails
163
+ """
164
+ # Determine actual model and temperature
165
+ actual_model = model if model is not None else self.config.model
166
+ actual_temperature = temperature if temperature is not None else self.config.temperature
167
+
168
+ # Debug output: Request info
169
+ if self.config.debug:
170
+ self._print_request_info(messages, actual_model, actual_temperature)
171
+
172
+ # Format messages
173
+ formatted_messages = self._format_messages(messages)
174
+
175
+ # Record start time
176
+ start_time = time.time()
177
+
178
+ try:
179
+ # Create streaming response
180
+ stream = self.client.chat.completions.create(
181
+ model=actual_model,
182
+ messages=formatted_messages,
183
+ temperature=actual_temperature,
184
+ stream=True,
185
+ )
186
+
187
+ for chunk in stream:
188
+ if chunk.choices and chunk.choices[0].delta.content:
189
+ yield chunk.choices[0].delta.content
190
+
191
+ # Calculate processing time after stream completes
192
+ processing_time_ms = int((time.time() - start_time) * 1000)
193
+
194
+ # Save metadata for retrieval by MarkdownFlow
195
+ metadata = {
196
+ "model": actual_model,
197
+ "temperature": actual_temperature,
198
+ "provider": "openai-compatible",
199
+ "processing_time": processing_time_ms,
200
+ "timestamp": int(time.time()),
201
+ "stream_done": True,
202
+ }
203
+ self._last_metadata = metadata
204
+
205
+ # Debug output: Stream completion info
206
+ if self.config.debug:
207
+ self._print_response_metadata(metadata)
208
+
209
+ except Exception as e:
210
+ raise ValueError(f"Streaming request failed: {str(e)}") from e
211
+
212
+ def get_last_metadata(self) -> dict[str, Any]:
213
+ """
214
+ Get metadata from the last LLM call.
215
+
216
+ This method allows MarkdownFlow to retrieve comprehensive metadata including
217
+ token usage, processing time, and other information from the most recent
218
+ complete() or stream() call.
219
+
220
+ Returns:
221
+ Dictionary containing metadata:
222
+ - model: Model name used
223
+ - temperature: Temperature value used
224
+ - provider: Provider identifier
225
+ - processing_time: Processing time in milliseconds
226
+ - timestamp: Unix timestamp
227
+ - prompt_tokens: Number of input tokens (if available)
228
+ - output_tokens: Number of output tokens (if available)
229
+ - total_tokens: Total tokens (if available)
230
+ - stream_done: True if this was a completed stream (stream mode only)
231
+
232
+ Example:
233
+ >>> provider = create_default_provider()
234
+ >>> content = provider.complete(messages)
235
+ >>> metadata = provider.get_last_metadata()
236
+ >>> print(f"Used {metadata['total_tokens']} tokens")
237
+ """
238
+ return self._last_metadata.copy()
239
+
240
+ def _format_messages(self, messages: list[dict[str, str]]) -> list[dict[str, str]]:
241
+ """
242
+ Format messages for API call.
243
+
244
+ Args:
245
+ messages: Raw message list
246
+
247
+ Returns:
248
+ Formatted message list
249
+ """
250
+ formatted = []
251
+ for msg in messages:
252
+ if isinstance(msg, dict) and "role" in msg and "content" in msg:
253
+ formatted.append(
254
+ {
255
+ "role": msg["role"],
256
+ "content": str(msg["content"]),
257
+ }
258
+ )
259
+ else:
260
+ # Fallback for non-standard format
261
+ formatted.append(
262
+ {
263
+ "role": "user",
264
+ "content": str(msg),
265
+ }
266
+ )
267
+ return formatted
268
+
269
+ def _print_request_info(self, messages: list[dict[str, str]], model: str, temperature: float) -> None:
270
+ """
271
+ Print colorized request information to console (debug mode).
272
+
273
+ Args:
274
+ messages: Message list
275
+ model: Model name
276
+ temperature: Temperature value
277
+ """
278
+ print("\033[97m\033[44m[ ====== LLM Request Start ====== ]\033[0m")
279
+ print(f"\033[30m\033[42mmodel\033[0m: {model}")
280
+ print(f"\033[30m\033[42mtemperature\033[0m: {temperature}")
281
+
282
+ for message in messages:
283
+ role = message.get("role", "user")
284
+ content = message.get("content", "")
285
+ # Truncate long content for readability
286
+ display_content = content if len(content) <= 200 else content[:200] + "..."
287
+ print(f"\033[30m\033[43m{role}\033[0m: {display_content}")
288
+
289
+ print("\033[97m\033[44m[ ====== LLM Request End ====== ]\033[0m")
290
+
291
+ def _print_response_metadata(self, metadata: dict[str, Any]) -> None:
292
+ """
293
+ Print colorized response metadata to console (debug mode).
294
+
295
+ Args:
296
+ metadata: Response metadata dictionary
297
+ """
298
+ print("\033[97m\033[42m[ ====== LLM Response Metadata ====== ]\033[0m")
299
+
300
+ # Essential fields
301
+ print(f"\033[36mmodel:\033[0m {metadata.get('model', 'N/A')}")
302
+ print(f"\033[36mtemperature:\033[0m {metadata.get('temperature', 'N/A')}")
303
+ print(f"\033[36mprovider:\033[0m {metadata.get('provider', 'N/A')}")
304
+ print(f"\033[36mprocessing_time:\033[0m {metadata.get('processing_time', 'N/A')} ms")
305
+
306
+ # Token usage (if available)
307
+ if "prompt_tokens" in metadata:
308
+ print(
309
+ f"\033[36mprompt_tokens:\033[0m \033[33m{metadata['prompt_tokens']}\033[0m "
310
+ f"\033[36moutput_tokens:\033[0m \033[33m{metadata['output_tokens']}\033[0m "
311
+ f"\033[36mtotal_tokens:\033[0m \033[32m{metadata['total_tokens']}\033[0m"
312
+ )
313
+
314
+ print(f"\033[36mtimestamp:\033[0m {metadata.get('timestamp', 'N/A')}")
315
+
316
+ if metadata.get("stream_done"):
317
+ print("\033[36mstream:\033[0m completed")
318
+
319
+ print("\033[97m\033[42m[ ====== ======================= ====== ]\033[0m")
320
+
321
+
322
+ def create_provider(config: ProviderConfig | None = None) -> OpenAIProvider:
323
+ """
324
+ Create an OpenAI provider instance.
325
+
326
+ Args:
327
+ config: Optional provider configuration. If None, uses default config
328
+ (reads from environment variables).
329
+
330
+ Returns:
331
+ OpenAIProvider instance
332
+
333
+ Raises:
334
+ ValueError: If configuration is invalid
335
+ ImportError: If openai package is not installed
336
+
337
+ Example:
338
+ >>> config = ProviderConfig(api_key="sk-...", model="gpt-4")
339
+ >>> provider = create_provider(config)
340
+ """
341
+ if config is None:
342
+ config = ProviderConfig()
343
+ return OpenAIProvider(config)
344
+
345
+
346
+ def create_default_provider() -> OpenAIProvider:
347
+ """
348
+ Create an OpenAI provider with default configuration.
349
+
350
+ Reads configuration from environment variables:
351
+ - LLM_API_KEY: API key (required)
352
+ - LLM_BASE_URL: Base URL (default: https://api.openai.com/v1)
353
+ - LLM_MODEL: Model name (default: gpt-3.5-turbo)
354
+ - LLM_TEMPERATURE: Temperature (default: 0.7)
355
+ - LLM_DEBUG: Debug mode (default: false)
356
+ - LLM_TIMEOUT: Request timeout in seconds (default: None, no timeout)
357
+
358
+ Returns:
359
+ OpenAIProvider instance with default config
360
+
361
+ Raises:
362
+ ValueError: If LLM_API_KEY is not set
363
+ ImportError: If openai package is not installed
364
+
365
+ Example:
366
+ >>> # Set environment variable first
367
+ >>> import os
368
+ >>> os.environ["LLM_API_KEY"] = "sk-..."
369
+ >>> provider = create_default_provider()
370
+ """
371
+ return create_provider()