PyPI - markdown-flow - Versions diffs - 0.2.10__py3-none-any.whl → 0.2.30__py3-none-any.whl - Mend

markdown-flow 0.2.10py3-none-any.whl → 0.2.30py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

markdown_flow/__init__.py +7 -7
markdown_flow/constants.py +212 -49
markdown_flow/core.py +614 -591
markdown_flow/llm.py +10 -12
markdown_flow/models.py +1 -17
markdown_flow/parser/__init__.py +38 -0
markdown_flow/parser/code_fence_utils.py +190 -0
markdown_flow/parser/interaction.py +354 -0
markdown_flow/parser/json_parser.py +50 -0
markdown_flow/parser/output.py +215 -0
markdown_flow/parser/preprocessor.py +151 -0
markdown_flow/parser/validation.py +100 -0
markdown_flow/parser/variable.py +95 -0
markdown_flow/providers/__init__.py +16 -0
markdown_flow/providers/config.py +46 -0
markdown_flow/providers/openai.py +369 -0
markdown_flow/utils.py +49 -51
{markdown_flow-0.2.10.dist-info → markdown_flow-0.2.30.dist-info}/METADATA +18 -107
markdown_flow-0.2.30.dist-info/RECORD +24 -0
markdown_flow-0.2.10.dist-info/RECORD +0 -13
{markdown_flow-0.2.10.dist-info → markdown_flow-0.2.30.dist-info}/WHEEL +0 -0
{markdown_flow-0.2.10.dist-info → markdown_flow-0.2.30.dist-info}/licenses/LICENSE +0 -0
{markdown_flow-0.2.10.dist-info → markdown_flow-0.2.30.dist-info}/top_level.txt +0 -0

markdown_flow/parser/json_parser.py ADDED Viewed

@@ -0,0 +1,50 @@
+"""
+JSON Parser Module
+Provides robust JSON parsing with support for code blocks and mixed text formats.
+"""
+import json
+import re
+from typing import Any
+from ..constants import JSON_PARSE_ERROR
+def parse_json_response(response_text: str) -> dict[str, Any]:
+    """
+    Parse JSON response supporting multiple formats.
+    Supports pure JSON strings, ```json code blocks, and mixed text formats.
+    Args:
+        response_text: Response text to parse
+    Returns:
+        Parsed dictionary object
+    Raises:
+        ValueError: When JSON cannot be parsed
+    """
+    text = response_text.strip()
+    # Extract JSON code block
+    if "```json" in text:
+        start_idx = text.find("```json") + 7
+        end_idx = text.find("```", start_idx)
+        if end_idx != -1:
+            text = text[start_idx:end_idx].strip()
+    elif "```" in text:
+        start_idx = text.find("```") + 3
+        end_idx = text.find("```", start_idx)
+        if end_idx != -1:
+            text = text[start_idx:end_idx].strip()
+    try:
+        return json.loads(text)
+    except json.JSONDecodeError:
+        # Try to extract first JSON object
+        json_match = re.search(r"\{[^}]+\}", text)
+        if json_match:
+            return json.loads(json_match.group())
+        raise ValueError(JSON_PARSE_ERROR)

markdown_flow/parser/output.py ADDED Viewed

@@ -0,0 +1,215 @@
+"""
+Output Parser Module
+Handles output instructions and preserved content processing for MarkdownFlow documents.
+"""
+import re
+from ..constants import (
+    COMPILED_INLINE_PRESERVE_REGEX,
+    COMPILED_PRESERVE_FENCE_REGEX,
+    OUTPUT_INSTRUCTION_PREFIX,
+    OUTPUT_INSTRUCTION_SUFFIX,
+)
+def is_preserved_content_block(content: str) -> bool:
+    """
+    Check if content is completely preserved content block.
+    Preserved blocks are entirely wrapped by markers with no external content.
+    Supports inline (===content===), multiline (!=== ... !===) formats, and mixed formats.
+    Args:
+        content: Content to check
+    Returns:
+        True if content is fully wrapped by preserved markers
+    """
+    content = content.strip()
+    if not content:
+        return False
+    lines = content.split("\n")
+    # Use state machine to validate that all non-empty content is preserved
+    state = "OUTSIDE"  # States: OUTSIDE, INSIDE
+    has_preserve_content = False
+    for line in lines:
+        stripped_line = line.strip()
+        # Check if this line is a fence marker (!===)
+        if COMPILED_PRESERVE_FENCE_REGEX.match(stripped_line):
+            if state == "OUTSIDE":
+                # Enter preserve block
+                state = "INSIDE"
+                has_preserve_content = True
+            elif state == "INSIDE":
+                # Exit preserve block
+                state = "OUTSIDE"
+            # Fence markers themselves are valid preserved content
+            continue
+        # Non-fence lines
+        if stripped_line:  # Non-empty line
+            if state == "INSIDE":
+                # Inside fence block, this is valid preserved content
+                has_preserve_content = True
+            else:
+                # Outside fence block, check if it's inline format
+                match = COMPILED_INLINE_PRESERVE_REGEX.match(stripped_line)
+                if match:
+                    # Ensure inner content exists and contains no ===
+                    inner_content = match.group(1).strip()
+                    if inner_content and "===" not in inner_content:
+                        # Valid inline format
+                        has_preserve_content = True
+                    else:
+                        # Invalid inline format
+                        return False
+                else:
+                    # Not fence, not inline format -> external content
+                    return False
+    # Judgment conditions:
+    # 1. Must have preserved content
+    # 2. Final state must be OUTSIDE (all fence blocks closed)
+    return has_preserve_content and state == "OUTSIDE"
+def process_output_instructions(content: str) -> tuple[str, bool]:
+    """
+    Process output instruction markers, converting !=== format to [output] format.
+    Uses unified state machine to handle inline (===content===) and multiline (!===...!===) formats.
+    Args:
+        content: Raw content containing output instructions
+    Returns:
+        Tuple of (processed_content, has_preserved_content):
+        - processed_content: Content with === and !=== markers converted to XML format
+        - has_preserved_content: True if content contained preserved markers
+    """
+    lines = content.split("\n")
+    result_lines = []
+    i = 0
+    has_output_instruction = False
+    while i < len(lines):
+        line = lines[i]
+        # Check if contains preserved markers (inline ===...=== or multiline !===...)
+        # Check inline format first: ===content===
+        inline_match = re.search(r"===\s*(.+?)\s*===", line)
+        if inline_match and line.count("===") == 2 and not line.strip().startswith("!"):
+            inner_content = inline_match.group(1).strip()
+            # Validate that inner content doesn't contain ===
+            if not inner_content or "===" in inner_content:
+                result_lines.append(line)
+                i += 1
+                continue
+            # Process inline format
+            full_match = inline_match.group(0)
+            # Build output instruction - keep inline format on same line
+            output_instruction = f"{OUTPUT_INSTRUCTION_PREFIX}{inner_content}{OUTPUT_INSTRUCTION_SUFFIX}"
+            # Replace ===...=== part in original line
+            processed_line = line.replace(full_match, output_instruction)
+            result_lines.append(processed_line)
+            has_output_instruction = True
+            i += 1
+        elif COMPILED_PRESERVE_FENCE_REGEX.match(line.strip()):
+            # Multiline format start
+            i += 1
+            output_content_lines: list[str] = []
+            # Collect multiline content
+            while i < len(lines):
+                current_line = lines[i]
+                if COMPILED_PRESERVE_FENCE_REGEX.match(current_line.strip()):
+                    # Found end marker, process collected content
+                    output_content = "\n".join(output_content_lines).strip()
+                    # Special handling for title format (maintain original logic)
+                    hash_prefix = ""
+                    if output_content.startswith("#"):
+                        first_space = output_content.find(" ")
+                        first_newline = output_content.find("\n")
+                        if first_space != -1 and (first_newline == -1 or first_space < first_newline):
+                            hash_prefix = output_content[: first_space + 1]
+                            output_content = output_content[first_space + 1 :].strip()
+                        elif first_newline != -1:
+                            hash_prefix = output_content[: first_newline + 1]
+                            output_content = output_content[first_newline + 1 :].strip()
+                    # Build output instruction
+                    if hash_prefix:
+                        result_lines.append(f"{OUTPUT_INSTRUCTION_PREFIX}{hash_prefix}{output_content}{OUTPUT_INSTRUCTION_SUFFIX}")
+                    else:
+                        result_lines.append(f"{OUTPUT_INSTRUCTION_PREFIX}{output_content}{OUTPUT_INSTRUCTION_SUFFIX}")
+                    has_output_instruction = True
+                    i += 1
+                    break
+                # Continue collecting content
+                output_content_lines.append(current_line)  # type: ignore[unreachable]
+                i += 1
+            else:
+                # No end marker found, rollback processing
+                result_lines.append(lines[i - len(output_content_lines) - 1])
+                result_lines.extend(output_content_lines)
+        else:
+            # Normal line
+            result_lines.append(line)  # type: ignore[unreachable]
+            i += 1
+    # Assemble final content
+    processed_content = "\n".join(result_lines)
+    # Return both processed content and whether it contains preserved content
+    return processed_content, has_output_instruction
+def extract_preserved_content(content: str) -> str:
+    """
+    Extract actual content from preserved content blocks, removing markers.
+    Handles inline (===content===) and multiline (!===...!===) formats.
+    Args:
+        content: Preserved content containing preserved markers
+    Returns:
+        Actual content with === and !=== markers removed
+    """
+    content = content.strip()
+    if not content:
+        return ""
+    lines = content.split("\n")
+    result_lines = []
+    for line in lines:
+        stripped_line = line.strip()
+        # Check inline format: ===content===
+        inline_match = COMPILED_INLINE_PRESERVE_REGEX.match(stripped_line)
+        if inline_match:
+            # Inline format, extract middle content
+            inner_content = inline_match.group(1).strip()
+            if inner_content and "===" not in inner_content:
+                result_lines.append(inner_content)
+        elif COMPILED_PRESERVE_FENCE_REGEX.match(stripped_line):  # type: ignore[unreachable]
+            # Multiline format delimiter, skip
+            continue
+        else:
+            # Normal content line, keep
+            result_lines.append(line)
+    return "\n".join(result_lines)

markdown_flow/parser/preprocessor.py ADDED Viewed

@@ -0,0 +1,151 @@
+"""
+Code Block Preprocessor
+Extracts code block content before parsing, implementing CommonMark-compliant fenced code blocks.
+"""
+from .code_fence_utils import is_code_fence_end, parse_code_fence_start
+class CodeBlockPreprocessor:
+    """
+    Code block preprocessor
+    Extracts code blocks from document and replaces them with placeholders, so that MarkdownFlow
+    syntax inside code blocks is ignored during subsequent parsing.
+    Attributes:
+        code_blocks: Mapping of placeholder → original code block content (including fence markers)
+        counter: Placeholder counter
+    """
+    # State machine states
+    STATE_NORMAL = "NORMAL"
+    STATE_IN_CODE_BLOCK = "IN_CODE_BLOCK"
+    def __init__(self):
+        """Initialize preprocessor"""
+        self.code_blocks: dict[str, str] = {}
+        self.counter: int = 0
+    def extract_code_blocks(self, document: str) -> str:
+        """
+        Extract code blocks from document and replace with placeholders
+        How it works:
+          1. Scan document line by line using a state machine
+          2. Detect CommonMark-compliant fenced code blocks
+          3. Replace code block content (including fences) with unique placeholders
+          4. Store code block content in internal mapping
+        Args:
+            document: Original markdown document
+        Returns:
+            Processed document (code blocks replaced with placeholders)
+        Examples:
+            >>> preprocessor = CodeBlockPreprocessor()
+            >>> doc = "```python\\nprint('hello')\\n```"
+            >>> processed = preprocessor.extract_code_blocks(doc)
+            >>> "__MDFLOW_CODE_BLOCK_1__" in processed
+            True
+        """
+        lines = document.split("\n")
+        result = []
+        # State machine variables
+        state = self.STATE_NORMAL
+        current_fence = None
+        code_buffer = []
+        for line in lines:
+            if state == self.STATE_NORMAL:
+                # Detect code block opening fence
+                fence_info = parse_code_fence_start(line)
+                if fence_info is not None:
+                    # Enter code block state
+                    state = self.STATE_IN_CODE_BLOCK
+                    current_fence = fence_info
+                    code_buffer = [line]
+                else:
+                    # Normal line, keep as-is
+                    result.append(line)
+            elif state == self.STATE_IN_CODE_BLOCK:
+                # Accumulate code lines
+                code_buffer.append(line)
+                # Detect fence closing
+                if is_code_fence_end(line, current_fence):
+                    # Generate placeholder
+                    placeholder = self._generate_placeholder()
+                    # Store code block
+                    code_content = "\n".join(code_buffer)
+                    self.code_blocks[placeholder] = code_content
+                    # Output placeholder (as a separate line)
+                    result.append(placeholder)
+                    # Reset state
+                    state = self.STATE_NORMAL
+                    current_fence = None
+                    code_buffer = []
+        # Handle unclosed code blocks (keep as-is)
+        if state == self.STATE_IN_CODE_BLOCK and code_buffer:
+            # Restore unclosed code block content to result
+            result.extend(code_buffer)
+        return "\n".join(result)
+    def restore_code_blocks(self, processed: str) -> str:
+        """
+        Restore placeholders back to original code block content
+        Args:
+            processed: Processed document containing placeholders
+        Returns:
+            Restored document
+        Examples:
+            >>> preprocessor = CodeBlockPreprocessor()
+            >>> doc = "```python\\nprint('hello')\\n```"
+            >>> processed = preprocessor.extract_code_blocks(doc)
+            >>> restored = preprocessor.restore_code_blocks(processed)
+            >>> restored == doc
+            True
+        """
+        result = processed
+        # Replace all placeholders
+        for placeholder, original in self.code_blocks.items():
+            result = result.replace(placeholder, original)
+        return result
+    def _generate_placeholder(self) -> str:
+        """
+        Generate a unique placeholder
+        Returns:
+            Placeholder in format __MDFLOW_CODE_BLOCK_N__
+        """
+        self.counter += 1
+        return f"__MDFLOW_CODE_BLOCK_{self.counter}__"
+    def reset(self):
+        """Reset preprocessor state (for processing new documents)"""
+        self.code_blocks = {}
+        self.counter = 0
+    def get_code_blocks(self) -> dict[str, str]:
+        """
+        Return all extracted code blocks (for debugging)
+        Returns:
+            Mapping of placeholder → original code block content
+        """
+        return self.code_blocks

markdown_flow/parser/validation.py ADDED Viewed

@@ -0,0 +1,100 @@
+"""
+Validation Parser Module
+Provides validation template generation and response parsing for user input validation.
+"""
+import json
+from typing import Any
+from ..constants import (
+    CONTEXT_BUTTON_OPTIONS_TEMPLATE,
+    CONTEXT_CONVERSATION_TEMPLATE,
+    CONTEXT_QUESTION_MARKER,
+    CONTEXT_QUESTION_TEMPLATE,
+    VALIDATION_ILLEGAL_DEFAULT_REASON,
+    VALIDATION_RESPONSE_ILLEGAL,
+    VALIDATION_RESPONSE_OK,
+    VALIDATION_TASK_TEMPLATE,
+)
+from .json_parser import parse_json_response
+def generate_smart_validation_template(
+    target_variable: str,
+    context: list[dict[str, Any]] | None = None,
+    interaction_question: str | None = None,
+    buttons: list[dict[str, str]] | None = None,
+) -> str:
+    """
+    Generate smart validation template based on context and question.
+    DEPRECATED: This function is no longer used internally.
+    Use _build_validation_messages() in MarkdownFlow class instead.
+    Args:
+        target_variable: Target variable name
+        context: Context message list with role and content fields
+        interaction_question: Question text from interaction block
+        buttons: Button options list with display and value fields
+    Returns:
+        Generated validation template (for backward compatibility)
+    """
+    # For backward compatibility, return a simple template
+    # This function is no longer used in the core validation flow
+    template = VALIDATION_TASK_TEMPLATE.replace("{target_variable}", target_variable)
+    template += "\n\n# 用户回答\n{sys_user_input}"
+    return template.strip()
+def parse_validation_response(llm_response: str, original_input: str, target_variable: str) -> dict[str, Any]:
+    """
+    Parse LLM validation response, returning standard format.
+    Supports JSON format and natural language text responses.
+    Args:
+        llm_response: LLM's raw response
+        original_input: User's original input
+        target_variable: Target variable name
+    Returns:
+        Standardized parsing result with content and variables fields
+    """
+    try:
+        # Try to parse JSON response
+        parsed_response = parse_json_response(llm_response)
+        if isinstance(parsed_response, dict):
+            result = parsed_response.get("result", "").lower()
+            if result == VALIDATION_RESPONSE_OK:
+                # Validation successful
+                parse_vars = parsed_response.get("parse_vars", {})
+                if target_variable not in parse_vars:
+                    parse_vars[target_variable] = original_input.strip()
+                # Ensure the variable value is in list format (user_input format)
+                if target_variable in parse_vars and not isinstance(parse_vars[target_variable], list):
+                    parse_vars[target_variable] = [parse_vars[target_variable]]
+                return {"content": "", "variables": parse_vars}
+            if result == VALIDATION_RESPONSE_ILLEGAL:
+                # Validation failed
+                reason = parsed_response.get("reason", VALIDATION_ILLEGAL_DEFAULT_REASON)
+                return {"content": reason, "variables": None}
+    except (json.JSONDecodeError, ValueError, KeyError):
+        # JSON parsing failed, fallback to text mode
+        pass
+    # Text response parsing (fallback processing)
+    response_lower = llm_response.lower()
+    # Check against standard response format
+    if "ok" in response_lower or "valid" in response_lower:
+        # Return in list format to match user_input format
+        return {"content": "", "variables": {target_variable: [original_input.strip()]}}
+    return {"content": llm_response, "variables": None}

markdown_flow/parser/variable.py ADDED Viewed

@@ -0,0 +1,95 @@
+"""
+Variable Parser Module
+Provides variable extraction and replacement functionality for MarkdownFlow documents.
+"""
+import re
+from ..constants import (
+    COMPILED_BRACE_VARIABLE_REGEX,
+    COMPILED_PERCENT_VARIABLE_REGEX,
+    VARIABLE_DEFAULT_VALUE,
+)
+def extract_variables_from_text(text: str) -> list[str]:
+    """
+    Extract all variable names from text.
+    Recognizes two variable formats:
+    - %{{variable_name}} format (preserved variables)
+    - {{variable_name}} format (replaceable variables)
+    Args:
+        text: Text content to analyze
+    Returns:
+        Sorted list of unique variable names
+    """
+    variables = set()
+    # Match %{{...}} format variables using pre-compiled regex
+    matches = COMPILED_PERCENT_VARIABLE_REGEX.findall(text)
+    for match in matches:
+        variables.add(match.strip())
+    # Match {{...}} format variables (excluding %) using pre-compiled regex
+    matches = COMPILED_BRACE_VARIABLE_REGEX.findall(text)
+    for match in matches:
+        variables.add(match.strip())
+    return sorted(list(variables))
+def replace_variables_in_text(text: str, variables: dict[str, str | list[str]]) -> str:
+    """
+    Replace variables in text, undefined or empty variables are auto-assigned "UNKNOWN".
+    Args:
+        text: Text containing variables
+        variables: Variable name to value mapping
+    Returns:
+        Text with variables replaced
+    """
+    if not text or not isinstance(text, str):
+        return text or ""
+    # Check each variable for null or empty values, assign "UNKNOWN" if so
+    if variables:
+        for key, value in variables.items():
+            if value is None or value == "" or (isinstance(value, list) and not value):
+                variables[key] = VARIABLE_DEFAULT_VALUE
+    # Initialize variables as empty dict (if None)
+    if not variables:
+        variables = {}
+    # Find all {{variable}} format variable references
+    variable_pattern = r"\{\{([^{}]+)\}\}"
+    matches = re.findall(variable_pattern, text)
+    # Assign "UNKNOWN" to undefined variables
+    for var_name in matches:
+        var_name = var_name.strip()
+        if var_name not in variables:
+            variables[var_name] = "UNKNOWN"
+    # Use updated replacement logic, preserve %{{var_name}} format variables
+    result = text
+    for var_name, var_value in variables.items():
+        # Convert value to string based on type
+        if isinstance(var_value, list):
+            # Multiple values - join with comma
+            value_str = ", ".join(str(v) for v in var_value if v is not None and str(v).strip())
+            if not value_str:
+                value_str = VARIABLE_DEFAULT_VALUE
+        else:
+            value_str = str(var_value) if var_value is not None else VARIABLE_DEFAULT_VALUE
+        # Use negative lookbehind assertion to exclude %{{var_name}} format
+        pattern = f"(?<!%){{{{{re.escape(var_name)}}}}}"
+        result = re.sub(pattern, value_str, result)
+    return result

markdown_flow/providers/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""
+Markdown-Flow LLM Providers Module
+Provides built-in LLM provider implementations.
+"""
+from .config import ProviderConfig
+from .openai import OpenAIProvider, create_default_provider, create_provider
+__all__ = [
+    "ProviderConfig",
+    "OpenAIProvider",
+    "create_provider",
+    "create_default_provider",
+]

markdown_flow/providers/config.py ADDED Viewed

@@ -0,0 +1,46 @@
+"""
+Provider Configuration Module
+Provides configuration classes for LLM providers.
+"""
+import os
+from dataclasses import dataclass, field
+@dataclass
+class ProviderConfig:
+    """
+    Configuration for LLM providers.
+    Supports environment variable defaults for easy configuration.
+    """
+    api_key: str = field(default_factory=lambda: os.getenv("LLM_API_KEY", ""))
+    """API key for the LLM service. Default: LLM_API_KEY environment variable."""
+    base_url: str = field(default_factory=lambda: os.getenv("LLM_BASE_URL", "https://api.openai.com/v1"))
+    """Base URL for the API endpoint. Default: LLM_BASE_URL environment variable or OpenAI default."""
+    model: str = field(default_factory=lambda: os.getenv("LLM_MODEL", "gpt-3.5-turbo"))
+    """Default model name. Default: LLM_MODEL environment variable or gpt-3.5-turbo."""
+    temperature: float = field(default_factory=lambda: float(os.getenv("LLM_TEMPERATURE", "0.7")))
+    """Default temperature (0.0-2.0). Default: LLM_TEMPERATURE environment variable or 0.7."""
+    debug: bool = field(default_factory=lambda: os.getenv("LLM_DEBUG", "false").lower() in ("true", "1", "yes"))
+    """Enable debug mode (colorized console output). Default: LLM_DEBUG environment variable or False."""
+    timeout: float | None = field(default_factory=lambda: float(os.getenv("LLM_TIMEOUT")) if os.getenv("LLM_TIMEOUT") else None)
+    """Request timeout in seconds. None means no timeout. Default: LLM_TIMEOUT environment variable or None."""
+    def __post_init__(self):
+        """Validate configuration after initialization."""
+        if not self.api_key:
+            raise ValueError("API key is required. Set it via ProviderConfig(api_key='...') or LLM_API_KEY environment variable.")
+        if self.temperature < 0.0 or self.temperature > 2.0:
+            raise ValueError(f"Temperature must be between 0.0 and 2.0, got {self.temperature}")
+        if self.timeout is not None and self.timeout <= 0:
+            raise ValueError(f"Timeout must be positive or None, got {self.timeout}")

markdown-flow 0.2.10__py3-none-any.whl → 0.2.30__py3-none-any.whl

markdown-flow 0.2.10py3-none-any.whl → 0.2.30py3-none-any.whl