markdown-flow 0.2.16__py3-none-any.whl → 0.2.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of markdown-flow might be problematic. Click here for more details.

@@ -0,0 +1,371 @@
1
+ """
2
+ OpenAI-Compatible Provider Implementation
3
+
4
+ Provides a production-ready OpenAI-compatible LLM provider with debug mode,
5
+ token tracking, and comprehensive metadata.
6
+ """
7
+
8
+ import time
9
+ from collections.abc import Generator
10
+ from typing import Any
11
+
12
+ from ..llm import LLMProvider
13
+ from .config import ProviderConfig
14
+
15
+ try:
16
+ from openai import OpenAI
17
+ except ImportError:
18
+ OpenAI = None # type: ignore[misc, assignment]
19
+
20
+
21
+ class OpenAIProvider(LLMProvider):
22
+ """
23
+ OpenAI-compatible LLM provider implementation.
24
+
25
+ Features:
26
+ - Debug mode with colorized console output
27
+ - Automatic token usage tracking
28
+ - Comprehensive metadata (model, temperature, processing time, tokens, timestamp)
29
+ - Instance-level model/temperature override support
30
+ - Streaming and non-streaming modes
31
+ """
32
+
33
+ def __init__(self, config: ProviderConfig):
34
+ """
35
+ Initialize OpenAI provider.
36
+
37
+ Args:
38
+ config: Provider configuration
39
+
40
+ Raises:
41
+ ImportError: If openai package is not installed
42
+ ValueError: If configuration is invalid
43
+ """
44
+ if OpenAI is None:
45
+ raise ImportError(
46
+ "The 'openai' package is required for OpenAIProvider. "
47
+ "Install it with: pip install openai"
48
+ )
49
+
50
+ self.config = config
51
+ self.client = OpenAI(
52
+ api_key=config.api_key,
53
+ base_url=config.base_url,
54
+ timeout=config.timeout,
55
+ )
56
+ self._last_metadata: dict[str, Any] = {}
57
+
58
+ def complete(
59
+ self,
60
+ messages: list[dict[str, str]],
61
+ model: str | None = None,
62
+ temperature: float | None = None,
63
+ ) -> str:
64
+ """
65
+ Non-streaming LLM call.
66
+
67
+ Args:
68
+ messages: Message list
69
+ model: Optional model override
70
+ temperature: Optional temperature override
71
+
72
+ Returns:
73
+ LLM response content
74
+
75
+ Raises:
76
+ Exception: If API call fails
77
+ """
78
+ # Determine actual model and temperature (instance override > provider default)
79
+ actual_model = model if model is not None else self.config.model
80
+ actual_temperature = temperature if temperature is not None else self.config.temperature
81
+
82
+ # Debug output: Request info
83
+ if self.config.debug:
84
+ self._print_request_info(messages, actual_model, actual_temperature)
85
+
86
+ # Format messages
87
+ formatted_messages = self._format_messages(messages)
88
+
89
+ # Record start time
90
+ start_time = time.time()
91
+
92
+ try:
93
+ # Make API call
94
+ response = self.client.chat.completions.create(
95
+ model=actual_model,
96
+ messages=formatted_messages,
97
+ temperature=actual_temperature,
98
+ )
99
+
100
+ # Calculate processing time
101
+ processing_time_ms = int((time.time() - start_time) * 1000)
102
+
103
+ # Extract content
104
+ if not response.choices or len(response.choices) == 0:
105
+ raise Exception("API response error: no choices returned")
106
+
107
+ choice = response.choices[0]
108
+ if not choice.message:
109
+ raise Exception("Response has no message field")
110
+
111
+ content = choice.message.content or ""
112
+
113
+ # Extract token usage
114
+ usage = response.usage
115
+ metadata = {
116
+ "model": actual_model,
117
+ "temperature": actual_temperature,
118
+ "provider": "openai-compatible",
119
+ "processing_time": processing_time_ms,
120
+ "timestamp": int(time.time()),
121
+ }
122
+
123
+ if usage:
124
+ metadata.update(
125
+ {
126
+ "prompt_tokens": usage.prompt_tokens,
127
+ "output_tokens": usage.completion_tokens,
128
+ "total_tokens": usage.total_tokens,
129
+ }
130
+ )
131
+
132
+ # Save metadata for retrieval by MarkdownFlow
133
+ self._last_metadata = metadata
134
+
135
+ # Debug output: Response metadata
136
+ if self.config.debug:
137
+ self._print_response_metadata(metadata)
138
+
139
+ return content
140
+
141
+ except Exception as e:
142
+ raise Exception(f"API request failed: {str(e)}") from e
143
+
144
+ def stream(
145
+ self,
146
+ messages: list[dict[str, str]],
147
+ model: str | None = None,
148
+ temperature: float | None = None,
149
+ ) -> Generator[str, None, None]:
150
+ """
151
+ Streaming LLM call.
152
+
153
+ Args:
154
+ messages: Message list
155
+ model: Optional model override
156
+ temperature: Optional temperature override
157
+
158
+ Yields:
159
+ Incremental LLM response content
160
+
161
+ Raises:
162
+ Exception: If API call fails
163
+ """
164
+ # Determine actual model and temperature
165
+ actual_model = model if model is not None else self.config.model
166
+ actual_temperature = temperature if temperature is not None else self.config.temperature
167
+
168
+ # Debug output: Request info
169
+ if self.config.debug:
170
+ self._print_request_info(messages, actual_model, actual_temperature)
171
+
172
+ # Format messages
173
+ formatted_messages = self._format_messages(messages)
174
+
175
+ # Record start time
176
+ start_time = time.time()
177
+
178
+ try:
179
+ # Create streaming response
180
+ stream = self.client.chat.completions.create(
181
+ model=actual_model,
182
+ messages=formatted_messages,
183
+ temperature=actual_temperature,
184
+ stream=True,
185
+ )
186
+
187
+ for chunk in stream:
188
+ if chunk.choices and chunk.choices[0].delta.content:
189
+ yield chunk.choices[0].delta.content
190
+
191
+ # Calculate processing time after stream completes
192
+ processing_time_ms = int((time.time() - start_time) * 1000)
193
+
194
+ # Save metadata for retrieval by MarkdownFlow
195
+ metadata = {
196
+ "model": actual_model,
197
+ "temperature": actual_temperature,
198
+ "provider": "openai-compatible",
199
+ "processing_time": processing_time_ms,
200
+ "timestamp": int(time.time()),
201
+ "stream_done": True,
202
+ }
203
+ self._last_metadata = metadata
204
+
205
+ # Debug output: Stream completion info
206
+ if self.config.debug:
207
+ self._print_response_metadata(metadata)
208
+
209
+ except Exception as e:
210
+ raise ValueError(f"Streaming request failed: {str(e)}") from e
211
+
212
+ def get_last_metadata(self) -> dict[str, Any]:
213
+ """
214
+ Get metadata from the last LLM call.
215
+
216
+ This method allows MarkdownFlow to retrieve comprehensive metadata including
217
+ token usage, processing time, and other information from the most recent
218
+ complete() or stream() call.
219
+
220
+ Returns:
221
+ Dictionary containing metadata:
222
+ - model: Model name used
223
+ - temperature: Temperature value used
224
+ - provider: Provider identifier
225
+ - processing_time: Processing time in milliseconds
226
+ - timestamp: Unix timestamp
227
+ - prompt_tokens: Number of input tokens (if available)
228
+ - output_tokens: Number of output tokens (if available)
229
+ - total_tokens: Total tokens (if available)
230
+ - stream_done: True if this was a completed stream (stream mode only)
231
+
232
+ Example:
233
+ >>> provider = create_default_provider()
234
+ >>> content = provider.complete(messages)
235
+ >>> metadata = provider.get_last_metadata()
236
+ >>> print(f"Used {metadata['total_tokens']} tokens")
237
+ """
238
+ return self._last_metadata.copy()
239
+
240
+ def _format_messages(self, messages: list[dict[str, str]]) -> list[dict[str, str]]:
241
+ """
242
+ Format messages for API call.
243
+
244
+ Args:
245
+ messages: Raw message list
246
+
247
+ Returns:
248
+ Formatted message list
249
+ """
250
+ formatted = []
251
+ for msg in messages:
252
+ if isinstance(msg, dict) and "role" in msg and "content" in msg:
253
+ formatted.append(
254
+ {
255
+ "role": msg["role"],
256
+ "content": str(msg["content"]),
257
+ }
258
+ )
259
+ else:
260
+ # Fallback for non-standard format
261
+ formatted.append(
262
+ {
263
+ "role": "user",
264
+ "content": str(msg),
265
+ }
266
+ )
267
+ return formatted
268
+
269
+ def _print_request_info(self, messages: list[dict[str, str]], model: str, temperature: float) -> None:
270
+ """
271
+ Print colorized request information to console (debug mode).
272
+
273
+ Args:
274
+ messages: Message list
275
+ model: Model name
276
+ temperature: Temperature value
277
+ """
278
+ print("\033[97m\033[44m[ ====== LLM Request Start ====== ]\033[0m")
279
+ print(f"\033[30m\033[42mmodel\033[0m: {model}")
280
+ print(f"\033[30m\033[42mtemperature\033[0m: {temperature}")
281
+
282
+ for message in messages:
283
+ role = message.get("role", "user")
284
+ content = message.get("content", "")
285
+ # Truncate long content for readability
286
+ display_content = content if len(content) <= 200 else content[:200] + "..."
287
+ print(f"\033[30m\033[43m{role}\033[0m: {display_content}")
288
+
289
+ print("\033[97m\033[44m[ ====== LLM Request End ====== ]\033[0m")
290
+
291
+ def _print_response_metadata(self, metadata: dict[str, Any]) -> None:
292
+ """
293
+ Print colorized response metadata to console (debug mode).
294
+
295
+ Args:
296
+ metadata: Response metadata dictionary
297
+ """
298
+ print("\033[97m\033[42m[ ====== LLM Response Metadata ====== ]\033[0m")
299
+
300
+ # Essential fields
301
+ print(f"\033[36mmodel:\033[0m {metadata.get('model', 'N/A')}")
302
+ print(f"\033[36mtemperature:\033[0m {metadata.get('temperature', 'N/A')}")
303
+ print(f"\033[36mprovider:\033[0m {metadata.get('provider', 'N/A')}")
304
+ print(f"\033[36mprocessing_time:\033[0m {metadata.get('processing_time', 'N/A')} ms")
305
+
306
+ # Token usage (if available)
307
+ if "prompt_tokens" in metadata:
308
+ print(
309
+ f"\033[36mprompt_tokens:\033[0m \033[33m{metadata['prompt_tokens']}\033[0m "
310
+ f"\033[36moutput_tokens:\033[0m \033[33m{metadata['output_tokens']}\033[0m "
311
+ f"\033[36mtotal_tokens:\033[0m \033[32m{metadata['total_tokens']}\033[0m"
312
+ )
313
+
314
+ print(f"\033[36mtimestamp:\033[0m {metadata.get('timestamp', 'N/A')}")
315
+
316
+ if metadata.get("stream_done"):
317
+ print("\033[36mstream:\033[0m completed")
318
+
319
+ print("\033[97m\033[42m[ ====== ======================= ====== ]\033[0m")
320
+
321
+
322
+ def create_provider(config: ProviderConfig | None = None) -> OpenAIProvider:
323
+ """
324
+ Create an OpenAI provider instance.
325
+
326
+ Args:
327
+ config: Optional provider configuration. If None, uses default config
328
+ (reads from environment variables).
329
+
330
+ Returns:
331
+ OpenAIProvider instance
332
+
333
+ Raises:
334
+ ValueError: If configuration is invalid
335
+ ImportError: If openai package is not installed
336
+
337
+ Example:
338
+ >>> config = ProviderConfig(api_key="sk-...", model="gpt-4")
339
+ >>> provider = create_provider(config)
340
+ """
341
+ if config is None:
342
+ config = ProviderConfig()
343
+ return OpenAIProvider(config)
344
+
345
+
346
+ def create_default_provider() -> OpenAIProvider:
347
+ """
348
+ Create an OpenAI provider with default configuration.
349
+
350
+ Reads configuration from environment variables:
351
+ - LLM_API_KEY: API key (required)
352
+ - LLM_BASE_URL: Base URL (default: https://api.openai.com/v1)
353
+ - LLM_MODEL: Model name (default: gpt-3.5-turbo)
354
+ - LLM_TEMPERATURE: Temperature (default: 0.7)
355
+ - LLM_DEBUG: Debug mode (default: false)
356
+ - LLM_TIMEOUT: Request timeout in seconds (default: None, no timeout)
357
+
358
+ Returns:
359
+ OpenAIProvider instance with default config
360
+
361
+ Raises:
362
+ ValueError: If LLM_API_KEY is not set
363
+ ImportError: If openai package is not installed
364
+
365
+ Example:
366
+ >>> # Set environment variable first
367
+ >>> import os
368
+ >>> os.environ["LLM_API_KEY"] = "sk-..."
369
+ >>> provider = create_default_provider()
370
+ """
371
+ return create_provider()
markdown_flow/utils.py CHANGED
@@ -19,11 +19,11 @@ from .constants import (
19
19
  COMPILED_PERCENT_VARIABLE_REGEX,
20
20
  COMPILED_PRESERVE_FENCE_REGEX,
21
21
  COMPILED_SINGLE_PIPE_SPLIT_REGEX,
22
+ CONTEXT_BUTTON_OPTIONS_TEMPLATE,
22
23
  CONTEXT_CONVERSATION_TEMPLATE,
23
24
  CONTEXT_QUESTION_MARKER,
24
25
  CONTEXT_QUESTION_TEMPLATE,
25
26
  JSON_PARSE_ERROR,
26
- OUTPUT_INSTRUCTION_EXPLANATION,
27
27
  OUTPUT_INSTRUCTION_PREFIX,
28
28
  OUTPUT_INSTRUCTION_SUFFIX,
29
29
  SMART_VALIDATION_TEMPLATE,
@@ -68,7 +68,7 @@ def is_preserved_content_block(content: str) -> bool:
68
68
  Check if content is completely preserved content block.
69
69
 
70
70
  Preserved blocks are entirely wrapped by markers with no external content.
71
- Supports inline (===content===) and multiline (!=== ... !===) formats.
71
+ Supports inline (===content===), multiline (!=== ... !===) formats, and mixed formats.
72
72
 
73
73
  Args:
74
74
  content: Content to check
@@ -82,61 +82,50 @@ def is_preserved_content_block(content: str) -> bool:
82
82
 
83
83
  lines = content.split("\n")
84
84
 
85
- # Check if all non-empty lines are inline format (!===content!===)
86
- all_inline_format = True
87
- has_any_content = False
88
-
89
- for line in lines:
90
- stripped_line = line.strip()
91
- if stripped_line: # Non-empty line
92
- has_any_content = True
93
- # Check if inline format: ===content===
94
- match = COMPILED_INLINE_PRESERVE_REGEX.match(stripped_line)
95
- if match:
96
- # Ensure inner content exists and contains no ===
97
- inner_content = match.group(1).strip()
98
- if not inner_content or "===" in inner_content:
99
- all_inline_format = False
100
- break
101
- else:
102
- all_inline_format = False # type: ignore[unreachable]
103
- break
104
-
105
- # If all lines are inline format, return directly
106
- if has_any_content and all_inline_format:
107
- return True
108
-
109
- # Check multiline format using state machine
85
+ # Use state machine to validate that all non-empty content is preserved
110
86
  state = "OUTSIDE" # States: OUTSIDE, INSIDE
111
- has_content_outside = False # Has external content
112
- has_preserve_blocks = False # Has preserve blocks
87
+ has_preserve_content = False
113
88
 
114
89
  for line in lines:
115
90
  stripped_line = line.strip()
116
91
 
92
+ # Check if this line is a fence marker (!===)
117
93
  if COMPILED_PRESERVE_FENCE_REGEX.match(stripped_line):
118
94
  if state == "OUTSIDE":
119
95
  # Enter preserve block
120
96
  state = "INSIDE"
121
- has_preserve_blocks = True
97
+ has_preserve_content = True
122
98
  elif state == "INSIDE":
123
99
  # Exit preserve block
124
100
  state = "OUTSIDE"
125
- # !=== lines don't count as external content
126
- else:
127
- # Non-!=== lines
128
- if stripped_line: # type: ignore[unreachable] # Non-empty line
129
- if state == "OUTSIDE":
130
- # External content found
131
- has_content_outside = True
132
- break
133
- # Internal content doesn't affect judgment
101
+ # Fence markers themselves are valid preserved content
102
+ continue
103
+
104
+ # Non-fence lines
105
+ if stripped_line: # Non-empty line
106
+ if state == "INSIDE":
107
+ # Inside fence block, this is valid preserved content
108
+ has_preserve_content = True
109
+ else:
110
+ # Outside fence block, check if it's inline format
111
+ match = COMPILED_INLINE_PRESERVE_REGEX.match(stripped_line)
112
+ if match:
113
+ # Ensure inner content exists and contains no ===
114
+ inner_content = match.group(1).strip()
115
+ if inner_content and "===" not in inner_content:
116
+ # Valid inline format
117
+ has_preserve_content = True
118
+ else:
119
+ # Invalid inline format
120
+ return False
121
+ else:
122
+ # Not fence, not inline format -> external content
123
+ return False
134
124
 
135
125
  # Judgment conditions:
136
- # 1. Must have preserve blocks
137
- # 2. Cannot have external content
138
- # 3. Final state must be OUTSIDE (all blocks closed)
139
- return has_preserve_blocks and not has_content_outside and state == "OUTSIDE"
126
+ # 1. Must have preserved content
127
+ # 2. Final state must be OUTSIDE (all fence blocks closed)
128
+ return has_preserve_content and state == "OUTSIDE"
140
129
 
141
130
 
142
131
  def extract_interaction_question(content: str) -> str | None:
@@ -480,6 +469,7 @@ def generate_smart_validation_template(
480
469
  target_variable: str,
481
470
  context: list[dict[str, Any]] | None = None,
482
471
  interaction_question: str | None = None,
472
+ buttons: list[dict[str, str]] | None = None,
483
473
  ) -> str:
484
474
  """
485
475
  Generate smart validation template based on context and question.
@@ -488,19 +478,28 @@ def generate_smart_validation_template(
488
478
  target_variable: Target variable name
489
479
  context: Context message list with role and content fields
490
480
  interaction_question: Question text from interaction block
481
+ buttons: Button options list with display and value fields
491
482
 
492
483
  Returns:
493
484
  Generated validation template
494
485
  """
495
486
  # Build context information
496
487
  context_info = ""
497
- if interaction_question or context:
488
+ if interaction_question or context or buttons:
498
489
  context_parts = []
499
490
 
500
491
  # Add question information (most important, put first)
501
492
  if interaction_question:
502
493
  context_parts.append(CONTEXT_QUESTION_TEMPLATE.format(question=interaction_question))
503
494
 
495
+ # Add button options information
496
+ if buttons:
497
+ button_displays = [btn.get("display", "") for btn in buttons if btn.get("display")]
498
+ if button_displays:
499
+ button_options_str = ", ".join(button_displays)
500
+ button_info = CONTEXT_BUTTON_OPTIONS_TEMPLATE.format(button_options=button_options_str)
501
+ context_parts.append(button_info)
502
+
504
503
  # Add conversation context
505
504
  if context:
506
505
  for msg in context:
@@ -559,7 +558,7 @@ def parse_json_response(response_text: str) -> dict[str, Any]:
559
558
  raise ValueError(JSON_PARSE_ERROR)
560
559
 
561
560
 
562
- def process_output_instructions(content: str) -> str:
561
+ def process_output_instructions(content: str) -> tuple[str, bool]:
563
562
  """
564
563
  Process output instruction markers, converting !=== format to [output] format.
565
564
 
@@ -569,7 +568,9 @@ def process_output_instructions(content: str) -> str:
569
568
  content: Raw content containing output instructions
570
569
 
571
570
  Returns:
572
- Processed content with === and !=== markers converted to [output] format
571
+ Tuple of (processed_content, has_preserved_content):
572
+ - processed_content: Content with === and !=== markers converted to XML format
573
+ - has_preserved_content: True if content contained preserved markers
573
574
  """
574
575
  lines = content.split("\n")
575
576
  result_lines = []
@@ -650,11 +651,8 @@ def process_output_instructions(content: str) -> str:
650
651
  # Assemble final content
651
652
  processed_content = "\n".join(result_lines)
652
653
 
653
- # Add explanation prefix (if has output instructions)
654
- if has_output_instruction:
655
- processed_content = OUTPUT_INSTRUCTION_EXPLANATION + processed_content
656
-
657
- return processed_content
654
+ # Return both processed content and whether it contains preserved content
655
+ return processed_content, has_output_instruction
658
656
 
659
657
 
660
658
  def extract_preserved_content(content: str) -> str: