markdown-flow 0.2.10__py3-none-any.whl → 0.2.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,369 @@
1
+ """
2
+ OpenAI-Compatible Provider Implementation
3
+
4
+ Provides a production-ready OpenAI-compatible LLM provider with debug mode,
5
+ token tracking, and comprehensive metadata.
6
+ """
7
+
8
+ import time
9
+ from collections.abc import Generator
10
+ from typing import Any
11
+
12
+ from ..llm import LLMProvider
13
+ from .config import ProviderConfig
14
+
15
+
16
+ try:
17
+ from openai import OpenAI
18
+ except ImportError:
19
+ OpenAI = None # type: ignore[misc, assignment]
20
+
21
+
22
+ class OpenAIProvider(LLMProvider):
23
+ """
24
+ OpenAI-compatible LLM provider implementation.
25
+
26
+ Features:
27
+ - Debug mode with colorized console output
28
+ - Automatic token usage tracking
29
+ - Comprehensive metadata (model, temperature, processing time, tokens, timestamp)
30
+ - Instance-level model/temperature override support
31
+ - Streaming and non-streaming modes
32
+ """
33
+
34
+ def __init__(self, config: ProviderConfig):
35
+ """
36
+ Initialize OpenAI provider.
37
+
38
+ Args:
39
+ config: Provider configuration
40
+
41
+ Raises:
42
+ ImportError: If openai package is not installed
43
+ ValueError: If configuration is invalid
44
+ """
45
+ if OpenAI is None:
46
+ raise ImportError("The 'openai' package is required for OpenAIProvider. Install it with: pip install openai")
47
+
48
+ self.config = config
49
+ self.client = OpenAI(
50
+ api_key=config.api_key,
51
+ base_url=config.base_url,
52
+ timeout=config.timeout,
53
+ )
54
+ self._last_metadata: dict[str, Any] = {}
55
+
56
+ def complete(
57
+ self,
58
+ messages: list[dict[str, str]],
59
+ model: str | None = None,
60
+ temperature: float | None = None,
61
+ ) -> str:
62
+ """
63
+ Non-streaming LLM call.
64
+
65
+ Args:
66
+ messages: Message list
67
+ model: Optional model override
68
+ temperature: Optional temperature override
69
+
70
+ Returns:
71
+ LLM response content
72
+
73
+ Raises:
74
+ Exception: If API call fails
75
+ """
76
+ # Determine actual model and temperature (instance override > provider default)
77
+ actual_model = model if model is not None else self.config.model
78
+ actual_temperature = temperature if temperature is not None else self.config.temperature
79
+
80
+ # Debug output: Request info
81
+ if self.config.debug:
82
+ self._print_request_info(messages, actual_model, actual_temperature)
83
+
84
+ # Format messages
85
+ formatted_messages = self._format_messages(messages)
86
+
87
+ # Record start time
88
+ start_time = time.time()
89
+
90
+ try:
91
+ # Make API call
92
+ response = self.client.chat.completions.create(
93
+ model=actual_model,
94
+ messages=formatted_messages,
95
+ temperature=actual_temperature,
96
+ )
97
+
98
+ # Calculate processing time
99
+ processing_time_ms = int((time.time() - start_time) * 1000)
100
+
101
+ # Extract content
102
+ if not response.choices or len(response.choices) == 0:
103
+ raise Exception("API response error: no choices returned")
104
+
105
+ choice = response.choices[0]
106
+ if not choice.message:
107
+ raise Exception("Response has no message field")
108
+
109
+ content = choice.message.content or ""
110
+
111
+ # Extract token usage
112
+ usage = response.usage
113
+ metadata = {
114
+ "model": actual_model,
115
+ "temperature": actual_temperature,
116
+ "provider": "openai-compatible",
117
+ "processing_time": processing_time_ms,
118
+ "timestamp": int(time.time()),
119
+ }
120
+
121
+ if usage:
122
+ metadata.update(
123
+ {
124
+ "prompt_tokens": usage.prompt_tokens,
125
+ "output_tokens": usage.completion_tokens,
126
+ "total_tokens": usage.total_tokens,
127
+ }
128
+ )
129
+
130
+ # Save metadata for retrieval by MarkdownFlow
131
+ self._last_metadata = metadata
132
+
133
+ # Debug output: Response metadata
134
+ if self.config.debug:
135
+ self._print_response_metadata(metadata)
136
+
137
+ return content
138
+
139
+ except Exception as e:
140
+ raise Exception(f"API request failed: {str(e)}") from e
141
+
142
+ def stream(
143
+ self,
144
+ messages: list[dict[str, str]],
145
+ model: str | None = None,
146
+ temperature: float | None = None,
147
+ ) -> Generator[str, None, None]:
148
+ """
149
+ Streaming LLM call.
150
+
151
+ Args:
152
+ messages: Message list
153
+ model: Optional model override
154
+ temperature: Optional temperature override
155
+
156
+ Yields:
157
+ Incremental LLM response content
158
+
159
+ Raises:
160
+ Exception: If API call fails
161
+ """
162
+ # Determine actual model and temperature
163
+ actual_model = model if model is not None else self.config.model
164
+ actual_temperature = temperature if temperature is not None else self.config.temperature
165
+
166
+ # Debug output: Request info
167
+ if self.config.debug:
168
+ self._print_request_info(messages, actual_model, actual_temperature)
169
+
170
+ # Format messages
171
+ formatted_messages = self._format_messages(messages)
172
+
173
+ # Record start time
174
+ start_time = time.time()
175
+
176
+ try:
177
+ # Create streaming response
178
+ stream = self.client.chat.completions.create(
179
+ model=actual_model,
180
+ messages=formatted_messages,
181
+ temperature=actual_temperature,
182
+ stream=True,
183
+ )
184
+
185
+ for chunk in stream:
186
+ if chunk.choices and chunk.choices[0].delta.content:
187
+ yield chunk.choices[0].delta.content
188
+
189
+ # Calculate processing time after stream completes
190
+ processing_time_ms = int((time.time() - start_time) * 1000)
191
+
192
+ # Save metadata for retrieval by MarkdownFlow
193
+ metadata = {
194
+ "model": actual_model,
195
+ "temperature": actual_temperature,
196
+ "provider": "openai-compatible",
197
+ "processing_time": processing_time_ms,
198
+ "timestamp": int(time.time()),
199
+ "stream_done": True,
200
+ }
201
+ self._last_metadata = metadata
202
+
203
+ # Debug output: Stream completion info
204
+ if self.config.debug:
205
+ self._print_response_metadata(metadata)
206
+
207
+ except Exception as e:
208
+ raise ValueError(f"Streaming request failed: {str(e)}") from e
209
+
210
+ def get_last_metadata(self) -> dict[str, Any]:
211
+ """
212
+ Get metadata from the last LLM call.
213
+
214
+ This method allows MarkdownFlow to retrieve comprehensive metadata including
215
+ token usage, processing time, and other information from the most recent
216
+ complete() or stream() call.
217
+
218
+ Returns:
219
+ Dictionary containing metadata:
220
+ - model: Model name used
221
+ - temperature: Temperature value used
222
+ - provider: Provider identifier
223
+ - processing_time: Processing time in milliseconds
224
+ - timestamp: Unix timestamp
225
+ - prompt_tokens: Number of input tokens (if available)
226
+ - output_tokens: Number of output tokens (if available)
227
+ - total_tokens: Total tokens (if available)
228
+ - stream_done: True if this was a completed stream (stream mode only)
229
+
230
+ Example:
231
+ >>> provider = create_default_provider()
232
+ >>> content = provider.complete(messages)
233
+ >>> metadata = provider.get_last_metadata()
234
+ >>> print(f"Used {metadata['total_tokens']} tokens")
235
+ """
236
+ return self._last_metadata.copy()
237
+
238
+ def _format_messages(self, messages: list[dict[str, str]]) -> list[dict[str, str]]:
239
+ """
240
+ Format messages for API call.
241
+
242
+ Args:
243
+ messages: Raw message list
244
+
245
+ Returns:
246
+ Formatted message list
247
+ """
248
+ formatted = []
249
+ for msg in messages:
250
+ if isinstance(msg, dict) and "role" in msg and "content" in msg:
251
+ formatted.append(
252
+ {
253
+ "role": msg["role"],
254
+ "content": str(msg["content"]),
255
+ }
256
+ )
257
+ else:
258
+ # Fallback for non-standard format
259
+ formatted.append(
260
+ {
261
+ "role": "user",
262
+ "content": str(msg),
263
+ }
264
+ )
265
+ return formatted
266
+
267
+ def _print_request_info(self, messages: list[dict[str, str]], model: str, temperature: float) -> None:
268
+ """
269
+ Print colorized request information to console (debug mode).
270
+
271
+ Args:
272
+ messages: Message list
273
+ model: Model name
274
+ temperature: Temperature value
275
+ """
276
+ print("\033[97m\033[44m[ ====== LLM Request Start ====== ]\033[0m")
277
+ print(f"\033[30m\033[42mmodel\033[0m: {model}")
278
+ print(f"\033[30m\033[42mtemperature\033[0m: {temperature}")
279
+
280
+ for message in messages:
281
+ role = message.get("role", "user")
282
+ content = message.get("content", "")
283
+ # Truncate long content for readability
284
+ display_content = content
285
+ print(f"\033[30m\033[43m{role}\033[0m: {display_content}")
286
+
287
+ print("\033[97m\033[44m[ ====== LLM Request End ====== ]\033[0m")
288
+
289
+ def _print_response_metadata(self, metadata: dict[str, Any]) -> None:
290
+ """
291
+ Print colorized response metadata to console (debug mode).
292
+
293
+ Args:
294
+ metadata: Response metadata dictionary
295
+ """
296
+ print("\033[97m\033[42m[ ====== LLM Response Metadata ====== ]\033[0m")
297
+
298
+ # Essential fields
299
+ print(f"\033[36mmodel:\033[0m {metadata.get('model', 'N/A')}")
300
+ print(f"\033[36mtemperature:\033[0m {metadata.get('temperature', 'N/A')}")
301
+ print(f"\033[36mprovider:\033[0m {metadata.get('provider', 'N/A')}")
302
+ print(f"\033[36mprocessing_time:\033[0m {metadata.get('processing_time', 'N/A')} ms")
303
+
304
+ # Token usage (if available)
305
+ if "prompt_tokens" in metadata:
306
+ print(
307
+ f"\033[36mprompt_tokens:\033[0m \033[33m{metadata['prompt_tokens']}\033[0m "
308
+ f"\033[36moutput_tokens:\033[0m \033[33m{metadata['output_tokens']}\033[0m "
309
+ f"\033[36mtotal_tokens:\033[0m \033[32m{metadata['total_tokens']}\033[0m"
310
+ )
311
+
312
+ print(f"\033[36mtimestamp:\033[0m {metadata.get('timestamp', 'N/A')}")
313
+
314
+ if metadata.get("stream_done"):
315
+ print("\033[36mstream:\033[0m completed")
316
+
317
+ print("\033[97m\033[42m[ ====== ======================= ====== ]\033[0m")
318
+
319
+
320
+ def create_provider(config: ProviderConfig | None = None) -> OpenAIProvider:
321
+ """
322
+ Create an OpenAI provider instance.
323
+
324
+ Args:
325
+ config: Optional provider configuration. If None, uses default config
326
+ (reads from environment variables).
327
+
328
+ Returns:
329
+ OpenAIProvider instance
330
+
331
+ Raises:
332
+ ValueError: If configuration is invalid
333
+ ImportError: If openai package is not installed
334
+
335
+ Example:
336
+ >>> config = ProviderConfig(api_key="sk-...", model="gpt-4")
337
+ >>> provider = create_provider(config)
338
+ """
339
+ if config is None:
340
+ config = ProviderConfig()
341
+ return OpenAIProvider(config)
342
+
343
+
344
+ def create_default_provider() -> OpenAIProvider:
345
+ """
346
+ Create an OpenAI provider with default configuration.
347
+
348
+ Reads configuration from environment variables:
349
+ - LLM_API_KEY: API key (required)
350
+ - LLM_BASE_URL: Base URL (default: https://api.openai.com/v1)
351
+ - LLM_MODEL: Model name (default: gpt-3.5-turbo)
352
+ - LLM_TEMPERATURE: Temperature (default: 0.7)
353
+ - LLM_DEBUG: Debug mode (default: false)
354
+ - LLM_TIMEOUT: Request timeout in seconds (default: None, no timeout)
355
+
356
+ Returns:
357
+ OpenAIProvider instance with default config
358
+
359
+ Raises:
360
+ ValueError: If LLM_API_KEY is not set
361
+ ImportError: If openai package is not installed
362
+
363
+ Example:
364
+ >>> # Set environment variable first
365
+ >>> import os
366
+ >>> os.environ["LLM_API_KEY"] = "sk-..."
367
+ >>> provider = create_default_provider()
368
+ """
369
+ return create_provider()
markdown_flow/utils.py CHANGED
@@ -19,11 +19,11 @@ from .constants import (
19
19
  COMPILED_PERCENT_VARIABLE_REGEX,
20
20
  COMPILED_PRESERVE_FENCE_REGEX,
21
21
  COMPILED_SINGLE_PIPE_SPLIT_REGEX,
22
+ CONTEXT_BUTTON_OPTIONS_TEMPLATE,
22
23
  CONTEXT_CONVERSATION_TEMPLATE,
23
24
  CONTEXT_QUESTION_MARKER,
24
25
  CONTEXT_QUESTION_TEMPLATE,
25
26
  JSON_PARSE_ERROR,
26
- OUTPUT_INSTRUCTION_EXPLANATION,
27
27
  OUTPUT_INSTRUCTION_PREFIX,
28
28
  OUTPUT_INSTRUCTION_SUFFIX,
29
29
  SMART_VALIDATION_TEMPLATE,
@@ -68,7 +68,7 @@ def is_preserved_content_block(content: str) -> bool:
68
68
  Check if content is completely preserved content block.
69
69
 
70
70
  Preserved blocks are entirely wrapped by markers with no external content.
71
- Supports inline (===content===) and multiline (!=== ... !===) formats.
71
+ Supports inline (===content===), multiline (!=== ... !===) formats, and mixed formats.
72
72
 
73
73
  Args:
74
74
  content: Content to check
@@ -82,61 +82,50 @@ def is_preserved_content_block(content: str) -> bool:
82
82
 
83
83
  lines = content.split("\n")
84
84
 
85
- # Check if all non-empty lines are inline format (!===content!===)
86
- all_inline_format = True
87
- has_any_content = False
88
-
89
- for line in lines:
90
- stripped_line = line.strip()
91
- if stripped_line: # Non-empty line
92
- has_any_content = True
93
- # Check if inline format: ===content===
94
- match = COMPILED_INLINE_PRESERVE_REGEX.match(stripped_line)
95
- if match:
96
- # Ensure inner content exists and contains no ===
97
- inner_content = match.group(1).strip()
98
- if not inner_content or "===" in inner_content:
99
- all_inline_format = False
100
- break
101
- else:
102
- all_inline_format = False # type: ignore[unreachable]
103
- break
104
-
105
- # If all lines are inline format, return directly
106
- if has_any_content and all_inline_format:
107
- return True
108
-
109
- # Check multiline format using state machine
85
+ # Use state machine to validate that all non-empty content is preserved
110
86
  state = "OUTSIDE" # States: OUTSIDE, INSIDE
111
- has_content_outside = False # Has external content
112
- has_preserve_blocks = False # Has preserve blocks
87
+ has_preserve_content = False
113
88
 
114
89
  for line in lines:
115
90
  stripped_line = line.strip()
116
91
 
92
+ # Check if this line is a fence marker (!===)
117
93
  if COMPILED_PRESERVE_FENCE_REGEX.match(stripped_line):
118
94
  if state == "OUTSIDE":
119
95
  # Enter preserve block
120
96
  state = "INSIDE"
121
- has_preserve_blocks = True
97
+ has_preserve_content = True
122
98
  elif state == "INSIDE":
123
99
  # Exit preserve block
124
100
  state = "OUTSIDE"
125
- # !=== lines don't count as external content
126
- else:
127
- # Non-!=== lines
128
- if stripped_line: # type: ignore[unreachable] # Non-empty line
129
- if state == "OUTSIDE":
130
- # External content found
131
- has_content_outside = True
132
- break
133
- # Internal content doesn't affect judgment
101
+ # Fence markers themselves are valid preserved content
102
+ continue
103
+
104
+ # Non-fence lines
105
+ if stripped_line: # Non-empty line
106
+ if state == "INSIDE":
107
+ # Inside fence block, this is valid preserved content
108
+ has_preserve_content = True
109
+ else:
110
+ # Outside fence block, check if it's inline format
111
+ match = COMPILED_INLINE_PRESERVE_REGEX.match(stripped_line)
112
+ if match:
113
+ # Ensure inner content exists and contains no ===
114
+ inner_content = match.group(1).strip()
115
+ if inner_content and "===" not in inner_content:
116
+ # Valid inline format
117
+ has_preserve_content = True
118
+ else:
119
+ # Invalid inline format
120
+ return False
121
+ else:
122
+ # Not fence, not inline format -> external content
123
+ return False
134
124
 
135
125
  # Judgment conditions:
136
- # 1. Must have preserve blocks
137
- # 2. Cannot have external content
138
- # 3. Final state must be OUTSIDE (all blocks closed)
139
- return has_preserve_blocks and not has_content_outside and state == "OUTSIDE"
126
+ # 1. Must have preserved content
127
+ # 2. Final state must be OUTSIDE (all fence blocks closed)
128
+ return has_preserve_content and state == "OUTSIDE"
140
129
 
141
130
 
142
131
  def extract_interaction_question(content: str) -> str | None:
@@ -480,6 +469,7 @@ def generate_smart_validation_template(
480
469
  target_variable: str,
481
470
  context: list[dict[str, Any]] | None = None,
482
471
  interaction_question: str | None = None,
472
+ buttons: list[dict[str, str]] | None = None,
483
473
  ) -> str:
484
474
  """
485
475
  Generate smart validation template based on context and question.
@@ -488,19 +478,28 @@ def generate_smart_validation_template(
488
478
  target_variable: Target variable name
489
479
  context: Context message list with role and content fields
490
480
  interaction_question: Question text from interaction block
481
+ buttons: Button options list with display and value fields
491
482
 
492
483
  Returns:
493
484
  Generated validation template
494
485
  """
495
486
  # Build context information
496
487
  context_info = ""
497
- if interaction_question or context:
488
+ if interaction_question or context or buttons:
498
489
  context_parts = []
499
490
 
500
491
  # Add question information (most important, put first)
501
492
  if interaction_question:
502
493
  context_parts.append(CONTEXT_QUESTION_TEMPLATE.format(question=interaction_question))
503
494
 
495
+ # Add button options information
496
+ if buttons:
497
+ button_displays = [btn.get("display", "") for btn in buttons if btn.get("display")]
498
+ if button_displays:
499
+ button_options_str = ", ".join(button_displays)
500
+ button_info = CONTEXT_BUTTON_OPTIONS_TEMPLATE.format(button_options=button_options_str)
501
+ context_parts.append(button_info)
502
+
504
503
  # Add conversation context
505
504
  if context:
506
505
  for msg in context:
@@ -559,7 +558,7 @@ def parse_json_response(response_text: str) -> dict[str, Any]:
559
558
  raise ValueError(JSON_PARSE_ERROR)
560
559
 
561
560
 
562
- def process_output_instructions(content: str) -> str:
561
+ def process_output_instructions(content: str) -> tuple[str, bool]:
563
562
  """
564
563
  Process output instruction markers, converting !=== format to [output] format.
565
564
 
@@ -569,7 +568,9 @@ def process_output_instructions(content: str) -> str:
569
568
  content: Raw content containing output instructions
570
569
 
571
570
  Returns:
572
- Processed content with === and !=== markers converted to [output] format
571
+ Tuple of (processed_content, has_preserved_content):
572
+ - processed_content: Content with === and !=== markers converted to XML format
573
+ - has_preserved_content: True if content contained preserved markers
573
574
  """
574
575
  lines = content.split("\n")
575
576
  result_lines = []
@@ -650,11 +651,8 @@ def process_output_instructions(content: str) -> str:
650
651
  # Assemble final content
651
652
  processed_content = "\n".join(result_lines)
652
653
 
653
- # Add explanation prefix (if has output instructions)
654
- if has_output_instruction:
655
- processed_content = OUTPUT_INSTRUCTION_EXPLANATION + processed_content
656
-
657
- return processed_content
654
+ # Return both processed content and whether it contains preserved content
655
+ return processed_content, has_output_instruction
658
656
 
659
657
 
660
658
  def extract_preserved_content(content: str) -> str: