PyPI - markdown-flow - Versions diffs - 0.2.10__py3-none-any.whl → 0.2.30__py3-none-any.whl - Mend

markdown-flow 0.2.10py3-none-any.whl → 0.2.30py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

markdown_flow/__init__.py +7 -7
markdown_flow/constants.py +212 -49
markdown_flow/core.py +614 -591
markdown_flow/llm.py +10 -12
markdown_flow/models.py +1 -17
markdown_flow/parser/__init__.py +38 -0
markdown_flow/parser/code_fence_utils.py +190 -0
markdown_flow/parser/interaction.py +354 -0
markdown_flow/parser/json_parser.py +50 -0
markdown_flow/parser/output.py +215 -0
markdown_flow/parser/preprocessor.py +151 -0
markdown_flow/parser/validation.py +100 -0
markdown_flow/parser/variable.py +95 -0
markdown_flow/providers/__init__.py +16 -0
markdown_flow/providers/config.py +46 -0
markdown_flow/providers/openai.py +369 -0
markdown_flow/utils.py +49 -51
{markdown_flow-0.2.10.dist-info → markdown_flow-0.2.30.dist-info}/METADATA +18 -107
markdown_flow-0.2.30.dist-info/RECORD +24 -0
markdown_flow-0.2.10.dist-info/RECORD +0 -13
{markdown_flow-0.2.10.dist-info → markdown_flow-0.2.30.dist-info}/WHEEL +0 -0
{markdown_flow-0.2.10.dist-info → markdown_flow-0.2.30.dist-info}/licenses/LICENSE +0 -0
{markdown_flow-0.2.10.dist-info → markdown_flow-0.2.30.dist-info}/top_level.txt +0 -0

markdown_flow/providers/openai.py ADDED Viewed

@@ -0,0 +1,369 @@
+"""
+OpenAI-Compatible Provider Implementation
+Provides a production-ready OpenAI-compatible LLM provider with debug mode,
+token tracking, and comprehensive metadata.
+"""
+import time
+from collections.abc import Generator
+from typing import Any
+from ..llm import LLMProvider
+from .config import ProviderConfig
+try:
+    from openai import OpenAI
+except ImportError:
+    OpenAI = None  # type: ignore[misc, assignment]
+class OpenAIProvider(LLMProvider):
+    """
+    OpenAI-compatible LLM provider implementation.
+    Features:
+    - Debug mode with colorized console output
+    - Automatic token usage tracking
+    - Comprehensive metadata (model, temperature, processing time, tokens, timestamp)
+    - Instance-level model/temperature override support
+    - Streaming and non-streaming modes
+    """
+    def __init__(self, config: ProviderConfig):
+        """
+        Initialize OpenAI provider.
+        Args:
+            config: Provider configuration
+        Raises:
+            ImportError: If openai package is not installed
+            ValueError: If configuration is invalid
+        """
+        if OpenAI is None:
+            raise ImportError("The 'openai' package is required for OpenAIProvider. Install it with: pip install openai")
+        self.config = config
+        self.client = OpenAI(
+            api_key=config.api_key,
+            base_url=config.base_url,
+            timeout=config.timeout,
+        )
+        self._last_metadata: dict[str, Any] = {}
+    def complete(
+        self,
+        messages: list[dict[str, str]],
+        model: str | None = None,
+        temperature: float | None = None,
+    ) -> str:
+        """
+        Non-streaming LLM call.
+        Args:
+            messages: Message list
+            model: Optional model override
+            temperature: Optional temperature override
+        Returns:
+            LLM response content
+        Raises:
+            Exception: If API call fails
+        """
+        # Determine actual model and temperature (instance override > provider default)
+        actual_model = model if model is not None else self.config.model
+        actual_temperature = temperature if temperature is not None else self.config.temperature
+        # Debug output: Request info
+        if self.config.debug:
+            self._print_request_info(messages, actual_model, actual_temperature)
+        # Format messages
+        formatted_messages = self._format_messages(messages)
+        # Record start time
+        start_time = time.time()
+        try:
+            # Make API call
+            response = self.client.chat.completions.create(
+                model=actual_model,
+                messages=formatted_messages,
+                temperature=actual_temperature,
+            )
+            # Calculate processing time
+            processing_time_ms = int((time.time() - start_time) * 1000)
+            # Extract content
+            if not response.choices or len(response.choices) == 0:
+                raise Exception("API response error: no choices returned")
+            choice = response.choices[0]
+            if not choice.message:
+                raise Exception("Response has no message field")
+            content = choice.message.content or ""
+            # Extract token usage
+            usage = response.usage
+            metadata = {
+                "model": actual_model,
+                "temperature": actual_temperature,
+                "provider": "openai-compatible",
+                "processing_time": processing_time_ms,
+                "timestamp": int(time.time()),
+            }
+            if usage:
+                metadata.update(
+                    {
+                        "prompt_tokens": usage.prompt_tokens,
+                        "output_tokens": usage.completion_tokens,
+                        "total_tokens": usage.total_tokens,
+                    }
+                )
+            # Save metadata for retrieval by MarkdownFlow
+            self._last_metadata = metadata
+            # Debug output: Response metadata
+            if self.config.debug:
+                self._print_response_metadata(metadata)
+            return content
+        except Exception as e:
+            raise Exception(f"API request failed: {str(e)}") from e
+    def stream(
+        self,
+        messages: list[dict[str, str]],
+        model: str | None = None,
+        temperature: float | None = None,
+    ) -> Generator[str, None, None]:
+        """
+        Streaming LLM call.
+        Args:
+            messages: Message list
+            model: Optional model override
+            temperature: Optional temperature override
+        Yields:
+            Incremental LLM response content
+        Raises:
+            Exception: If API call fails
+        """
+        # Determine actual model and temperature
+        actual_model = model if model is not None else self.config.model
+        actual_temperature = temperature if temperature is not None else self.config.temperature
+        # Debug output: Request info
+        if self.config.debug:
+            self._print_request_info(messages, actual_model, actual_temperature)
+        # Format messages
+        formatted_messages = self._format_messages(messages)
+        # Record start time
+        start_time = time.time()
+        try:
+            # Create streaming response
+            stream = self.client.chat.completions.create(
+                model=actual_model,
+                messages=formatted_messages,
+                temperature=actual_temperature,
+                stream=True,
+            )
+            for chunk in stream:
+                if chunk.choices and chunk.choices[0].delta.content:
+                    yield chunk.choices[0].delta.content
+            # Calculate processing time after stream completes
+            processing_time_ms = int((time.time() - start_time) * 1000)
+            # Save metadata for retrieval by MarkdownFlow
+            metadata = {
+                "model": actual_model,
+                "temperature": actual_temperature,
+                "provider": "openai-compatible",
+                "processing_time": processing_time_ms,
+                "timestamp": int(time.time()),
+                "stream_done": True,
+            }
+            self._last_metadata = metadata
+            # Debug output: Stream completion info
+            if self.config.debug:
+                self._print_response_metadata(metadata)
+        except Exception as e:
+            raise ValueError(f"Streaming request failed: {str(e)}") from e
+    def get_last_metadata(self) -> dict[str, Any]:
+        """
+        Get metadata from the last LLM call.
+        This method allows MarkdownFlow to retrieve comprehensive metadata including
+        token usage, processing time, and other information from the most recent
+        complete() or stream() call.
+        Returns:
+            Dictionary containing metadata:
+            - model: Model name used
+            - temperature: Temperature value used
+            - provider: Provider identifier
+            - processing_time: Processing time in milliseconds
+            - timestamp: Unix timestamp
+            - prompt_tokens: Number of input tokens (if available)
+            - output_tokens: Number of output tokens (if available)
+            - total_tokens: Total tokens (if available)
+            - stream_done: True if this was a completed stream (stream mode only)
+        Example:
+            >>> provider = create_default_provider()
+            >>> content = provider.complete(messages)
+            >>> metadata = provider.get_last_metadata()
+            >>> print(f"Used {metadata['total_tokens']} tokens")
+        """
+        return self._last_metadata.copy()
+    def _format_messages(self, messages: list[dict[str, str]]) -> list[dict[str, str]]:
+        """
+        Format messages for API call.
+        Args:
+            messages: Raw message list
+        Returns:
+            Formatted message list
+        """
+        formatted = []
+        for msg in messages:
+            if isinstance(msg, dict) and "role" in msg and "content" in msg:
+                formatted.append(
+                    {
+                        "role": msg["role"],
+                        "content": str(msg["content"]),
+                    }
+                )
+            else:
+                # Fallback for non-standard format
+                formatted.append(
+                    {
+                        "role": "user",
+                        "content": str(msg),
+                    }
+                )
+        return formatted
+    def _print_request_info(self, messages: list[dict[str, str]], model: str, temperature: float) -> None:
+        """
+        Print colorized request information to console (debug mode).
+        Args:
+            messages: Message list
+            model: Model name
+            temperature: Temperature value
+        """
+        print("\033[97m\033[44m[ ====== LLM Request Start ====== ]\033[0m")
+        print(f"\033[30m\033[42mmodel\033[0m: {model}")
+        print(f"\033[30m\033[42mtemperature\033[0m: {temperature}")
+        for message in messages:
+            role = message.get("role", "user")
+            content = message.get("content", "")
+            # Truncate long content for readability
+            display_content = content
+            print(f"\033[30m\033[43m{role}\033[0m: {display_content}")
+        print("\033[97m\033[44m[ ====== LLM Request End ====== ]\033[0m")
+    def _print_response_metadata(self, metadata: dict[str, Any]) -> None:
+        """
+        Print colorized response metadata to console (debug mode).
+        Args:
+            metadata: Response metadata dictionary
+        """
+        print("\033[97m\033[42m[ ====== LLM Response Metadata ====== ]\033[0m")
+        # Essential fields
+        print(f"\033[36mmodel:\033[0m {metadata.get('model', 'N/A')}")
+        print(f"\033[36mtemperature:\033[0m {metadata.get('temperature', 'N/A')}")
+        print(f"\033[36mprovider:\033[0m {metadata.get('provider', 'N/A')}")
+        print(f"\033[36mprocessing_time:\033[0m {metadata.get('processing_time', 'N/A')} ms")
+        # Token usage (if available)
+        if "prompt_tokens" in metadata:
+            print(
+                f"\033[36mprompt_tokens:\033[0m \033[33m{metadata['prompt_tokens']}\033[0m  "
+                f"\033[36moutput_tokens:\033[0m \033[33m{metadata['output_tokens']}\033[0m  "
+                f"\033[36mtotal_tokens:\033[0m \033[32m{metadata['total_tokens']}\033[0m"
+            )
+        print(f"\033[36mtimestamp:\033[0m {metadata.get('timestamp', 'N/A')}")
+        if metadata.get("stream_done"):
+            print("\033[36mstream:\033[0m completed")
+        print("\033[97m\033[42m[ ====== ======================= ====== ]\033[0m")
+def create_provider(config: ProviderConfig | None = None) -> OpenAIProvider:
+    """
+    Create an OpenAI provider instance.
+    Args:
+        config: Optional provider configuration. If None, uses default config
+                (reads from environment variables).
+    Returns:
+        OpenAIProvider instance
+    Raises:
+        ValueError: If configuration is invalid
+        ImportError: If openai package is not installed
+    Example:
+        >>> config = ProviderConfig(api_key="sk-...", model="gpt-4")
+        >>> provider = create_provider(config)
+    """
+    if config is None:
+        config = ProviderConfig()
+    return OpenAIProvider(config)
+def create_default_provider() -> OpenAIProvider:
+    """
+    Create an OpenAI provider with default configuration.
+    Reads configuration from environment variables:
+    - LLM_API_KEY: API key (required)
+    - LLM_BASE_URL: Base URL (default: https://api.openai.com/v1)
+    - LLM_MODEL: Model name (default: gpt-3.5-turbo)
+    - LLM_TEMPERATURE: Temperature (default: 0.7)
+    - LLM_DEBUG: Debug mode (default: false)
+    - LLM_TIMEOUT: Request timeout in seconds (default: None, no timeout)
+    Returns:
+        OpenAIProvider instance with default config
+    Raises:
+        ValueError: If LLM_API_KEY is not set
+        ImportError: If openai package is not installed
+    Example:
+        >>> # Set environment variable first
+        >>> import os
+        >>> os.environ["LLM_API_KEY"] = "sk-..."
+        >>> provider = create_default_provider()
+    """
+    return create_provider()

markdown_flow/utils.py CHANGED Viewed

@@ -19,11 +19,11 @@ from .constants import (
     COMPILED_PERCENT_VARIABLE_REGEX,
     COMPILED_PRESERVE_FENCE_REGEX,
     COMPILED_SINGLE_PIPE_SPLIT_REGEX,
+    CONTEXT_BUTTON_OPTIONS_TEMPLATE,
     CONTEXT_CONVERSATION_TEMPLATE,
     CONTEXT_QUESTION_MARKER,
     CONTEXT_QUESTION_TEMPLATE,
     JSON_PARSE_ERROR,
-    OUTPUT_INSTRUCTION_EXPLANATION,
     OUTPUT_INSTRUCTION_PREFIX,
     OUTPUT_INSTRUCTION_SUFFIX,
     SMART_VALIDATION_TEMPLATE,
@@ -68,7 +68,7 @@ def is_preserved_content_block(content: str) -> bool:
     Check if content is completely preserved content block.
     Preserved blocks are entirely wrapped by markers with no external content.
-    Supports inline (===content===) and multiline (!=== ... !===) formats.
+    Supports inline (===content===), multiline (!=== ... !===) formats, and mixed formats.
     Args:
         content: Content to check
@@ -82,61 +82,50 @@ def is_preserved_content_block(content: str) -> bool:
     lines = content.split("\n")
-    # Check if all non-empty lines are inline format (!===content!===)
-    all_inline_format = True
-    has_any_content = False
-    for line in lines:
-        stripped_line = line.strip()
-        if stripped_line:  # Non-empty line
-            has_any_content = True
-            # Check if inline format: ===content===
-            match = COMPILED_INLINE_PRESERVE_REGEX.match(stripped_line)
-            if match:
-                # Ensure inner content exists and contains no ===
-                inner_content = match.group(1).strip()
-                if not inner_content or "===" in inner_content:
-                    all_inline_format = False
-                    break
-            else:
-                all_inline_format = False  # type: ignore[unreachable]
-                break
-    # If all lines are inline format, return directly
-    if has_any_content and all_inline_format:
-        return True
-    # Check multiline format using state machine
+    # Use state machine to validate that all non-empty content is preserved
     state = "OUTSIDE"  # States: OUTSIDE, INSIDE
-    has_content_outside = False  # Has external content
-    has_preserve_blocks = False  # Has preserve blocks
+    has_preserve_content = False
     for line in lines:
         stripped_line = line.strip()
+        # Check if this line is a fence marker (!===)
         if COMPILED_PRESERVE_FENCE_REGEX.match(stripped_line):
             if state == "OUTSIDE":
                 # Enter preserve block
                 state = "INSIDE"
-                has_preserve_blocks = True
+                has_preserve_content = True
             elif state == "INSIDE":
                 # Exit preserve block
                 state = "OUTSIDE"
-            # !=== lines don't count as external content
-        else:
-            # Non-!=== lines
-            if stripped_line:  # type: ignore[unreachable]  # Non-empty line
-                if state == "OUTSIDE":
-                    # External content found
-                    has_content_outside = True
-                    break
-                # Internal content doesn't affect judgment
+            # Fence markers themselves are valid preserved content
+            continue
+        # Non-fence lines
+        if stripped_line:  # Non-empty line
+            if state == "INSIDE":
+                # Inside fence block, this is valid preserved content
+                has_preserve_content = True
+            else:
+                # Outside fence block, check if it's inline format
+                match = COMPILED_INLINE_PRESERVE_REGEX.match(stripped_line)
+                if match:
+                    # Ensure inner content exists and contains no ===
+                    inner_content = match.group(1).strip()
+                    if inner_content and "===" not in inner_content:
+                        # Valid inline format
+                        has_preserve_content = True
+                    else:
+                        # Invalid inline format
+                        return False
+                else:
+                    # Not fence, not inline format -> external content
+                    return False
     # Judgment conditions:
-    # 1. Must have preserve blocks
-    # 2. Cannot have external content
-    # 3. Final state must be OUTSIDE (all blocks closed)
-    return has_preserve_blocks and not has_content_outside and state == "OUTSIDE"
+    # 1. Must have preserved content
+    # 2. Final state must be OUTSIDE (all fence blocks closed)
+    return has_preserve_content and state == "OUTSIDE"
 def extract_interaction_question(content: str) -> str | None:
@@ -480,6 +469,7 @@ def generate_smart_validation_template(
     target_variable: str,
     context: list[dict[str, Any]] | None = None,
     interaction_question: str | None = None,
+    buttons: list[dict[str, str]] | None = None,
 ) -> str:
     """
     Generate smart validation template based on context and question.
@@ -488,19 +478,28 @@ def generate_smart_validation_template(
         target_variable: Target variable name
         context: Context message list with role and content fields
         interaction_question: Question text from interaction block
+        buttons: Button options list with display and value fields
     Returns:
         Generated validation template
     """
     # Build context information
     context_info = ""
-    if interaction_question or context:
+    if interaction_question or context or buttons:
         context_parts = []
         # Add question information (most important, put first)
         if interaction_question:
             context_parts.append(CONTEXT_QUESTION_TEMPLATE.format(question=interaction_question))
+        # Add button options information
+        if buttons:
+            button_displays = [btn.get("display", "") for btn in buttons if btn.get("display")]
+            if button_displays:
+                button_options_str = ", ".join(button_displays)
+                button_info = CONTEXT_BUTTON_OPTIONS_TEMPLATE.format(button_options=button_options_str)
+                context_parts.append(button_info)
         # Add conversation context
         if context:
             for msg in context:
@@ -559,7 +558,7 @@ def parse_json_response(response_text: str) -> dict[str, Any]:
         raise ValueError(JSON_PARSE_ERROR)
-def process_output_instructions(content: str) -> str:
+def process_output_instructions(content: str) -> tuple[str, bool]:
     """
     Process output instruction markers, converting !=== format to [output] format.
@@ -569,7 +568,9 @@ def process_output_instructions(content: str) -> str:
         content: Raw content containing output instructions
     Returns:
-        Processed content with === and !=== markers converted to [output] format
+        Tuple of (processed_content, has_preserved_content):
+        - processed_content: Content with === and !=== markers converted to XML format
+        - has_preserved_content: True if content contained preserved markers
     """
     lines = content.split("\n")
     result_lines = []
@@ -650,11 +651,8 @@ def process_output_instructions(content: str) -> str:
     # Assemble final content
     processed_content = "\n".join(result_lines)
-    # Add explanation prefix (if has output instructions)
-    if has_output_instruction:
-        processed_content = OUTPUT_INSTRUCTION_EXPLANATION + processed_content
-    return processed_content
+    # Return both processed content and whether it contains preserved content
+    return processed_content, has_output_instruction
 def extract_preserved_content(content: str) -> str:

markdown-flow 0.2.10__py3-none-any.whl → 0.2.30__py3-none-any.whl

markdown-flow 0.2.10py3-none-any.whl → 0.2.30py3-none-any.whl