PyPI - markdown-flow - Versions diffs - 0.2.16__py3-none-any.whl → 0.2.26__py3-none-any.whl - Mend

markdown-flow 0.2.16py3-none-any.whl → 0.2.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of markdown-flow might be problematic. Click here for more details.

Files changed (21) hide show

markdown_flow/__init__.py +6 -7
markdown_flow/constants.py +52 -20
markdown_flow/core.py +359 -544
markdown_flow/llm.py +10 -12
markdown_flow/models.py +1 -1
markdown_flow/parser/__init__.py +34 -0
markdown_flow/parser/interaction.py +354 -0
markdown_flow/parser/json_parser.py +50 -0
markdown_flow/parser/output.py +215 -0
markdown_flow/parser/validation.py +121 -0
markdown_flow/parser/variable.py +95 -0
markdown_flow/providers/__init__.py +15 -0
markdown_flow/providers/config.py +51 -0
markdown_flow/providers/openai.py +371 -0
markdown_flow/utils.py +49 -51
{markdown_flow-0.2.16.dist-info → markdown_flow-0.2.26.dist-info}/METADATA +18 -107
markdown_flow-0.2.26.dist-info/RECORD +22 -0
markdown_flow-0.2.16.dist-info/RECORD +0 -13
{markdown_flow-0.2.16.dist-info → markdown_flow-0.2.26.dist-info}/WHEEL +0 -0
{markdown_flow-0.2.16.dist-info → markdown_flow-0.2.26.dist-info}/licenses/LICENSE +0 -0
{markdown_flow-0.2.16.dist-info → markdown_flow-0.2.26.dist-info}/top_level.txt +0 -0

markdown_flow/parser/output.py ADDED Viewed

@@ -0,0 +1,215 @@
+"""
+Output Parser Module
+Handles output instructions and preserved content processing for MarkdownFlow documents.
+"""
+import re
+from ..constants import (
+    COMPILED_INLINE_PRESERVE_REGEX,
+    COMPILED_PRESERVE_FENCE_REGEX,
+    OUTPUT_INSTRUCTION_PREFIX,
+    OUTPUT_INSTRUCTION_SUFFIX,
+)
+def is_preserved_content_block(content: str) -> bool:
+    """
+    Check if content is completely preserved content block.
+    Preserved blocks are entirely wrapped by markers with no external content.
+    Supports inline (===content===), multiline (!=== ... !===) formats, and mixed formats.
+    Args:
+        content: Content to check
+    Returns:
+        True if content is fully wrapped by preserved markers
+    """
+    content = content.strip()
+    if not content:
+        return False
+    lines = content.split("\n")
+    # Use state machine to validate that all non-empty content is preserved
+    state = "OUTSIDE"  # States: OUTSIDE, INSIDE
+    has_preserve_content = False
+    for line in lines:
+        stripped_line = line.strip()
+        # Check if this line is a fence marker (!===)
+        if COMPILED_PRESERVE_FENCE_REGEX.match(stripped_line):
+            if state == "OUTSIDE":
+                # Enter preserve block
+                state = "INSIDE"
+                has_preserve_content = True
+            elif state == "INSIDE":
+                # Exit preserve block
+                state = "OUTSIDE"
+            # Fence markers themselves are valid preserved content
+            continue
+        # Non-fence lines
+        if stripped_line:  # Non-empty line
+            if state == "INSIDE":
+                # Inside fence block, this is valid preserved content
+                has_preserve_content = True
+            else:
+                # Outside fence block, check if it's inline format
+                match = COMPILED_INLINE_PRESERVE_REGEX.match(stripped_line)
+                if match:
+                    # Ensure inner content exists and contains no ===
+                    inner_content = match.group(1).strip()
+                    if inner_content and "===" not in inner_content:
+                        # Valid inline format
+                        has_preserve_content = True
+                    else:
+                        # Invalid inline format
+                        return False
+                else:
+                    # Not fence, not inline format -> external content
+                    return False
+    # Judgment conditions:
+    # 1. Must have preserved content
+    # 2. Final state must be OUTSIDE (all fence blocks closed)
+    return has_preserve_content and state == "OUTSIDE"
+def process_output_instructions(content: str) -> tuple[str, bool]:
+    """
+    Process output instruction markers, converting !=== format to [output] format.
+    Uses unified state machine to handle inline (===content===) and multiline (!===...!===) formats.
+    Args:
+        content: Raw content containing output instructions
+    Returns:
+        Tuple of (processed_content, has_preserved_content):
+        - processed_content: Content with === and !=== markers converted to XML format
+        - has_preserved_content: True if content contained preserved markers
+    """
+    lines = content.split("\n")
+    result_lines = []
+    i = 0
+    has_output_instruction = False
+    while i < len(lines):
+        line = lines[i]
+        # Check if contains preserved markers (inline ===...=== or multiline !===...)
+        # Check inline format first: ===content===
+        inline_match = re.search(r"===\s*(.+?)\s*===", line)
+        if inline_match and line.count("===") == 2 and not line.strip().startswith("!"):
+            inner_content = inline_match.group(1).strip()
+            # Validate that inner content doesn't contain ===
+            if not inner_content or "===" in inner_content:
+                result_lines.append(line)
+                i += 1
+                continue
+            # Process inline format
+            full_match = inline_match.group(0)
+            # Build output instruction - keep inline format on same line
+            output_instruction = f"{OUTPUT_INSTRUCTION_PREFIX}{inner_content}{OUTPUT_INSTRUCTION_SUFFIX}"
+            # Replace ===...=== part in original line
+            processed_line = line.replace(full_match, output_instruction)
+            result_lines.append(processed_line)
+            has_output_instruction = True
+            i += 1
+        elif COMPILED_PRESERVE_FENCE_REGEX.match(line.strip()):
+            # Multiline format start
+            i += 1
+            output_content_lines: list[str] = []
+            # Collect multiline content
+            while i < len(lines):
+                current_line = lines[i]
+                if COMPILED_PRESERVE_FENCE_REGEX.match(current_line.strip()):
+                    # Found end marker, process collected content
+                    output_content = "\n".join(output_content_lines).strip()
+                    # Special handling for title format (maintain original logic)
+                    hash_prefix = ""
+                    if output_content.startswith("#"):
+                        first_space = output_content.find(" ")
+                        first_newline = output_content.find("\n")
+                        if first_space != -1 and (first_newline == -1 or first_space < first_newline):
+                            hash_prefix = output_content[: first_space + 1]
+                            output_content = output_content[first_space + 1 :].strip()
+                        elif first_newline != -1:
+                            hash_prefix = output_content[: first_newline + 1]
+                            output_content = output_content[first_newline + 1 :].strip()
+                    # Build output instruction
+                    if hash_prefix:
+                        result_lines.append(f"{OUTPUT_INSTRUCTION_PREFIX}{hash_prefix}{output_content}{OUTPUT_INSTRUCTION_SUFFIX}")
+                    else:
+                        result_lines.append(f"{OUTPUT_INSTRUCTION_PREFIX}{output_content}{OUTPUT_INSTRUCTION_SUFFIX}")
+                    has_output_instruction = True
+                    i += 1
+                    break
+                # Continue collecting content
+                output_content_lines.append(current_line)  # type: ignore[unreachable]
+                i += 1
+            else:
+                # No end marker found, rollback processing
+                result_lines.append(lines[i - len(output_content_lines) - 1])
+                result_lines.extend(output_content_lines)
+        else:
+            # Normal line
+            result_lines.append(line)  # type: ignore[unreachable]
+            i += 1
+    # Assemble final content
+    processed_content = "\n".join(result_lines)
+    # Return both processed content and whether it contains preserved content
+    return processed_content, has_output_instruction
+def extract_preserved_content(content: str) -> str:
+    """
+    Extract actual content from preserved content blocks, removing markers.
+    Handles inline (===content===) and multiline (!===...!===) formats.
+    Args:
+        content: Preserved content containing preserved markers
+    Returns:
+        Actual content with === and !=== markers removed
+    """
+    content = content.strip()
+    if not content:
+        return ""
+    lines = content.split("\n")
+    result_lines = []
+    for line in lines:
+        stripped_line = line.strip()
+        # Check inline format: ===content===
+        inline_match = COMPILED_INLINE_PRESERVE_REGEX.match(stripped_line)
+        if inline_match:
+            # Inline format, extract middle content
+            inner_content = inline_match.group(1).strip()
+            if inner_content and "===" not in inner_content:
+                result_lines.append(inner_content)
+        elif COMPILED_PRESERVE_FENCE_REGEX.match(stripped_line):  # type: ignore[unreachable]
+            # Multiline format delimiter, skip
+            continue
+        else:
+            # Normal content line, keep
+            result_lines.append(line)
+    return "\n".join(result_lines)

markdown_flow/parser/validation.py ADDED Viewed

@@ -0,0 +1,121 @@
+"""
+Validation Parser Module
+Provides validation template generation and response parsing for user input validation.
+"""
+import json
+from typing import Any
+from ..constants import (
+    CONTEXT_BUTTON_OPTIONS_TEMPLATE,
+    CONTEXT_CONVERSATION_TEMPLATE,
+    CONTEXT_QUESTION_MARKER,
+    CONTEXT_QUESTION_TEMPLATE,
+    SMART_VALIDATION_TEMPLATE,
+    VALIDATION_ILLEGAL_DEFAULT_REASON,
+    VALIDATION_RESPONSE_ILLEGAL,
+    VALIDATION_RESPONSE_OK,
+)
+from .json_parser import parse_json_response
+def generate_smart_validation_template(
+    target_variable: str,
+    context: list[dict[str, Any]] | None = None,
+    interaction_question: str | None = None,
+    buttons: list[dict[str, str]] | None = None,
+) -> str:
+    """
+    Generate smart validation template based on context and question.
+    Args:
+        target_variable: Target variable name
+        context: Context message list with role and content fields
+        interaction_question: Question text from interaction block
+        buttons: Button options list with display and value fields
+    Returns:
+        Generated validation template
+    """
+    # Build context information
+    context_info = ""
+    if interaction_question or context or buttons:
+        context_parts = []
+        # Add question information (most important, put first)
+        if interaction_question:
+            context_parts.append(CONTEXT_QUESTION_TEMPLATE.format(question=interaction_question))
+        # Add button options information
+        if buttons:
+            button_displays = [btn.get("display", "") for btn in buttons if btn.get("display")]
+            if button_displays:
+                button_options_str = ", ".join(button_displays)
+                button_info = CONTEXT_BUTTON_OPTIONS_TEMPLATE.format(button_options=button_options_str)
+                context_parts.append(button_info)
+        # Add conversation context
+        if context:
+            for msg in context:
+                if msg.get("role") == "assistant" and CONTEXT_QUESTION_MARKER not in msg.get("content", ""):
+                    # Other assistant messages as context (exclude extracted questions)
+                    context_parts.append(CONTEXT_CONVERSATION_TEMPLATE.format(content=msg.get("content", "")))
+        if context_parts:
+            context_info = "\n\n".join(context_parts)
+    # Use template from constants
+    # Note: {sys_user_input} will be replaced later in _build_validation_messages
+    return SMART_VALIDATION_TEMPLATE.format(
+        target_variable=target_variable,
+        context_info=context_info,
+        sys_user_input="{sys_user_input}",  # Keep placeholder for later replacement
+    ).strip()
+def parse_validation_response(llm_response: str, original_input: str, target_variable: str) -> dict[str, Any]:
+    """
+    Parse LLM validation response, returning standard format.
+    Supports JSON format and natural language text responses.
+    Args:
+        llm_response: LLM's raw response
+        original_input: User's original input
+        target_variable: Target variable name
+    Returns:
+        Standardized parsing result with content and variables fields
+    """
+    try:
+        # Try to parse JSON response
+        parsed_response = parse_json_response(llm_response)
+        if isinstance(parsed_response, dict):
+            result = parsed_response.get("result", "").lower()
+            if result == VALIDATION_RESPONSE_OK:
+                # Validation successful
+                parse_vars = parsed_response.get("parse_vars", {})
+                if target_variable not in parse_vars:
+                    parse_vars[target_variable] = original_input.strip()
+                return {"content": "", "variables": parse_vars}
+            if result == VALIDATION_RESPONSE_ILLEGAL:
+                # Validation failed
+                reason = parsed_response.get("reason", VALIDATION_ILLEGAL_DEFAULT_REASON)
+                return {"content": reason, "variables": None}
+    except (json.JSONDecodeError, ValueError, KeyError):
+        # JSON parsing failed, fallback to text mode
+        pass
+    # Text response parsing (fallback processing)
+    response_lower = llm_response.lower()
+    # Check against standard response format
+    if "ok" in response_lower or "valid" in response_lower:
+        return {"content": "", "variables": {target_variable: original_input.strip()}}
+    return {"content": llm_response, "variables": None}

markdown_flow/parser/variable.py ADDED Viewed

@@ -0,0 +1,95 @@
+"""
+Variable Parser Module
+Provides variable extraction and replacement functionality for MarkdownFlow documents.
+"""
+import re
+from ..constants import (
+    COMPILED_BRACE_VARIABLE_REGEX,
+    COMPILED_PERCENT_VARIABLE_REGEX,
+    VARIABLE_DEFAULT_VALUE,
+)
+def extract_variables_from_text(text: str) -> list[str]:
+    """
+    Extract all variable names from text.
+    Recognizes two variable formats:
+    - %{{variable_name}} format (preserved variables)
+    - {{variable_name}} format (replaceable variables)
+    Args:
+        text: Text content to analyze
+    Returns:
+        Sorted list of unique variable names
+    """
+    variables = set()
+    # Match %{{...}} format variables using pre-compiled regex
+    matches = COMPILED_PERCENT_VARIABLE_REGEX.findall(text)
+    for match in matches:
+        variables.add(match.strip())
+    # Match {{...}} format variables (excluding %) using pre-compiled regex
+    matches = COMPILED_BRACE_VARIABLE_REGEX.findall(text)
+    for match in matches:
+        variables.add(match.strip())
+    return sorted(list(variables))
+def replace_variables_in_text(text: str, variables: dict[str, str | list[str]]) -> str:
+    """
+    Replace variables in text, undefined or empty variables are auto-assigned "UNKNOWN".
+    Args:
+        text: Text containing variables
+        variables: Variable name to value mapping
+    Returns:
+        Text with variables replaced
+    """
+    if not text or not isinstance(text, str):
+        return text or ""
+    # Check each variable for null or empty values, assign "UNKNOWN" if so
+    if variables:
+        for key, value in variables.items():
+            if value is None or value == "" or (isinstance(value, list) and not value):
+                variables[key] = VARIABLE_DEFAULT_VALUE
+    # Initialize variables as empty dict (if None)
+    if not variables:
+        variables = {}
+    # Find all {{variable}} format variable references
+    variable_pattern = r"\{\{([^{}]+)\}\}"
+    matches = re.findall(variable_pattern, text)
+    # Assign "UNKNOWN" to undefined variables
+    for var_name in matches:
+        var_name = var_name.strip()
+        if var_name not in variables:
+            variables[var_name] = "UNKNOWN"
+    # Use updated replacement logic, preserve %{{var_name}} format variables
+    result = text
+    for var_name, var_value in variables.items():
+        # Convert value to string based on type
+        if isinstance(var_value, list):
+            # Multiple values - join with comma
+            value_str = ", ".join(str(v) for v in var_value if v is not None and str(v).strip())
+            if not value_str:
+                value_str = VARIABLE_DEFAULT_VALUE
+        else:
+            value_str = str(var_value) if var_value is not None else VARIABLE_DEFAULT_VALUE
+        # Use negative lookbehind assertion to exclude %{{var_name}} format
+        pattern = f"(?<!%){{{{{re.escape(var_name)}}}}}"
+        result = re.sub(pattern, value_str, result)
+    return result

markdown_flow/providers/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""
+Markdown-Flow LLM Providers Module
+Provides built-in LLM provider implementations.
+"""
+from .config import ProviderConfig
+from .openai import OpenAIProvider, create_provider, create_default_provider
+__all__ = [
+    "ProviderConfig",
+    "OpenAIProvider",
+    "create_provider",
+    "create_default_provider",
+]

markdown_flow/providers/config.py ADDED Viewed

@@ -0,0 +1,51 @@
+"""
+Provider Configuration Module
+Provides configuration classes for LLM providers.
+"""
+import os
+from dataclasses import dataclass, field
+@dataclass
+class ProviderConfig:
+    """
+    Configuration for LLM providers.
+    Supports environment variable defaults for easy configuration.
+    """
+    api_key: str = field(default_factory=lambda: os.getenv("LLM_API_KEY", ""))
+    """API key for the LLM service. Default: LLM_API_KEY environment variable."""
+    base_url: str = field(default_factory=lambda: os.getenv("LLM_BASE_URL", "https://api.openai.com/v1"))
+    """Base URL for the API endpoint. Default: LLM_BASE_URL environment variable or OpenAI default."""
+    model: str = field(default_factory=lambda: os.getenv("LLM_MODEL", "gpt-3.5-turbo"))
+    """Default model name. Default: LLM_MODEL environment variable or gpt-3.5-turbo."""
+    temperature: float = field(default_factory=lambda: float(os.getenv("LLM_TEMPERATURE", "0.7")))
+    """Default temperature (0.0-2.0). Default: LLM_TEMPERATURE environment variable or 0.7."""
+    debug: bool = field(default_factory=lambda: os.getenv("LLM_DEBUG", "false").lower() in ("true", "1", "yes"))
+    """Enable debug mode (colorized console output). Default: LLM_DEBUG environment variable or False."""
+    timeout: float | None = field(
+        default_factory=lambda: float(os.getenv("LLM_TIMEOUT")) if os.getenv("LLM_TIMEOUT") else None
+    )
+    """Request timeout in seconds. None means no timeout. Default: LLM_TIMEOUT environment variable or None."""
+    def __post_init__(self):
+        """Validate configuration after initialization."""
+        if not self.api_key:
+            raise ValueError(
+                "API key is required. Set it via ProviderConfig(api_key='...') "
+                "or LLM_API_KEY environment variable."
+            )
+        if self.temperature < 0.0 or self.temperature > 2.0:
+            raise ValueError(f"Temperature must be between 0.0 and 2.0, got {self.temperature}")
+        if self.timeout is not None and self.timeout <= 0:
+            raise ValueError(f"Timeout must be positive or None, got {self.timeout}")

markdown-flow 0.2.16__py3-none-any.whl → 0.2.26__py3-none-any.whl

Potentially problematic release.

markdown-flow 0.2.16py3-none-any.whl → 0.2.26py3-none-any.whl