PyPI - markdown-flow - Versions diffs - 0.2.10__py3-none-any.whl → 0.2.30__py3-none-any.whl - Mend

markdown-flow 0.2.10py3-none-any.whl → 0.2.30py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

markdown_flow/__init__.py +7 -7
markdown_flow/constants.py +212 -49
markdown_flow/core.py +614 -591
markdown_flow/llm.py +10 -12
markdown_flow/models.py +1 -17
markdown_flow/parser/__init__.py +38 -0
markdown_flow/parser/code_fence_utils.py +190 -0
markdown_flow/parser/interaction.py +354 -0
markdown_flow/parser/json_parser.py +50 -0
markdown_flow/parser/output.py +215 -0
markdown_flow/parser/preprocessor.py +151 -0
markdown_flow/parser/validation.py +100 -0
markdown_flow/parser/variable.py +95 -0
markdown_flow/providers/__init__.py +16 -0
markdown_flow/providers/config.py +46 -0
markdown_flow/providers/openai.py +369 -0
markdown_flow/utils.py +49 -51
{markdown_flow-0.2.10.dist-info → markdown_flow-0.2.30.dist-info}/METADATA +18 -107
markdown_flow-0.2.30.dist-info/RECORD +24 -0
markdown_flow-0.2.10.dist-info/RECORD +0 -13
{markdown_flow-0.2.10.dist-info → markdown_flow-0.2.30.dist-info}/WHEEL +0 -0
{markdown_flow-0.2.10.dist-info → markdown_flow-0.2.30.dist-info}/licenses/LICENSE +0 -0
{markdown_flow-0.2.10.dist-info → markdown_flow-0.2.30.dist-info}/top_level.txt +0 -0

markdown_flow/llm.py CHANGED Viewed

@@ -5,7 +5,6 @@ Provides LLM provider interfaces and related data models, supporting multiple pr
 """
 from abc import ABC, abstractmethod
-from collections.abc import AsyncGenerator, Generator
 from dataclasses import dataclass
 from enum import Enum
 from typing import Any
@@ -16,7 +15,6 @@ from .constants import NO_LLM_PROVIDER_ERROR
 class ProcessMode(Enum):
     """LLM processing modes."""
-    PROMPT_ONLY = "prompt_only"  # Return prompt only, no LLM call
     COMPLETE = "complete"  # Complete processing (non-streaming)
     STREAM = "stream"  # Streaming processing
@@ -29,7 +27,6 @@ class LLMResult:
     prompt: str | None = None  # Used prompt
     variables: dict[str, str | list[str]] | None = None  # Extracted variables
     metadata: dict[str, Any] | None = None  # Metadata
-    transformed_to_interaction: bool = False  # Whether content block was transformed to interaction block
     def __bool__(self):
         """Support boolean evaluation."""
@@ -40,28 +37,29 @@ class LLMProvider(ABC):
     """Abstract LLM provider interface."""
     @abstractmethod
-    def complete(self, messages: list[dict[str, str]], tools: list[dict[str, Any]] | None = None) -> LLMResult:
+    def complete(self, messages: list[dict[str, str]]) -> str:
         """
-        Non-streaming LLM call with optional function calling support.
+        Non-streaming LLM call.
         Args:
-            messages: Message list in format [{"role": "system/user/assistant", "content": "..."}]
-            tools: Optional tools/functions for LLM to call
+            messages: Message list in format [{"role": "system/user/assistant", "content": "..."}].
+                      This list already includes conversation history context merged by MarkdownFlow.
         Returns:
-            LLMResult: Structured result with content and metadata
+            str: LLM response content
         Raises:
             ValueError: When LLM call fails
         """
     @abstractmethod
-    def stream(self, messages: list[dict[str, str]]) -> Generator[str, None, None]:
+    def stream(self, messages: list[dict[str, str]]):
         """
         Streaming LLM call.
         Args:
-            messages: Message list in format [{"role": "system/user/assistant", "content": "..."}]
+            messages: Message list in format [{"role": "system/user/assistant", "content": "..."}].
+                      This list already includes conversation history context merged by MarkdownFlow.
         Yields:
             str: Incremental LLM response content
@@ -74,8 +72,8 @@ class LLMProvider(ABC):
 class NoLLMProvider(LLMProvider):
     """Empty LLM provider for prompt-only scenarios."""
-    def complete(self, messages: list[dict[str, str]], tools: list[dict[str, Any]] | None = None) -> LLMResult:
+    def complete(self, messages: list[dict[str, str]]) -> str:
         raise NotImplementedError(NO_LLM_PROVIDER_ERROR)
-    def stream(self, messages: list[dict[str, str]]) -> Generator[str, None, None]:
+    def stream(self, messages: list[dict[str, str]]):
         raise NotImplementedError(NO_LLM_PROVIDER_ERROR)

markdown_flow/models.py CHANGED Viewed

@@ -7,7 +7,7 @@ Simplified and refactored data models focused on core functionality.
 from dataclasses import dataclass, field
 from .enums import BlockType, InputType
-from .utils import extract_variables_from_text
+from .parser import extract_variables_from_text
 @dataclass
@@ -26,22 +26,6 @@ class UserInput:
     is_multi_select: bool = False
-@dataclass
-class InteractionValidationConfig:
-    """
-    Simplified interaction validation configuration.
-    Attributes:
-        validation_template (Optional[str]): Validation prompt template
-        target_variable (Optional[str]): Target variable name
-        enable_custom_validation (bool): Enable custom validation, defaults to True
-    """
-    validation_template: str | None = None
-    target_variable: str | None = None
-    enable_custom_validation: bool = True
 @dataclass
 class Block:
     """

markdown_flow/parser/__init__.py ADDED Viewed

@@ -0,0 +1,38 @@
+"""
+Markdown-Flow Parser Module
+Provides specialized parsers for different aspects of MarkdownFlow document processing.
+"""
+from .interaction import InteractionParser, InteractionType, extract_interaction_question
+from .json_parser import parse_json_response
+from .output import (
+    extract_preserved_content,
+    is_preserved_content_block,
+    process_output_instructions,
+)
+from .preprocessor import CodeBlockPreprocessor
+from .validation import generate_smart_validation_template, parse_validation_response
+from .variable import extract_variables_from_text, replace_variables_in_text
+__all__ = [
+    # Variable parsing
+    "extract_variables_from_text",
+    "replace_variables_in_text",
+    # Interaction parsing
+    "InteractionParser",
+    "InteractionType",
+    "extract_interaction_question",
+    # Output and preserved content
+    "is_preserved_content_block",
+    "extract_preserved_content",
+    "process_output_instructions",
+    # Code block preprocessing
+    "CodeBlockPreprocessor",
+    # Validation
+    "generate_smart_validation_template",
+    "parse_validation_response",
+    # JSON parsing
+    "parse_json_response",
+]

markdown_flow/parser/code_fence_utils.py ADDED Viewed

@@ -0,0 +1,190 @@
+"""
+Code Fence Utilities
+Provides CommonMark-compliant code fence parsing utility functions.
+"""
+from dataclasses import dataclass
+from ..constants import (
+    COMPILED_CODE_FENCE_END_REGEX,
+    COMPILED_CODE_FENCE_START_REGEX,
+)
+@dataclass
+class CodeFenceInfo:
+    """
+    Code fence information
+    Used to track the opening fence of a code block for proper matching with closing fence.
+    Attributes:
+        char: Fence character ('`' or '~')
+        length: Fence length (≥3)
+        indent: Number of indent spaces (≤3)
+        line: Full opening fence line (including info string, e.g., language identifier)
+    """
+    char: str
+    length: int
+    indent: int
+    line: str
+def validate_fence_characters(fence_str: str) -> bool:
+    """
+    Validate that all characters in the fence string are the same
+    CommonMark specification: fence must consist of the same character (all ` or all ~)
+    Args:
+        fence_str: Fence string (e.g., "```" or "~~~~")
+    Returns:
+        True if all characters are the same, False otherwise
+    Examples:
+        >>> validate_fence_characters("```")
+        True
+        >>> validate_fence_characters("~~~~")
+        True
+        >>> validate_fence_characters("``~")
+        False
+        >>> validate_fence_characters("")
+        False
+    """
+    if not fence_str:
+        return False
+    fence_char = fence_str[0]
+    return all(ch == fence_char for ch in fence_str)
+def parse_code_fence_start(line: str) -> CodeFenceInfo | None:
+    """
+    Parse code block opening fence marker
+    CommonMark specification:
+      - 0-3 spaces indent
+      - At least 3 consecutive ` or ~ characters
+      - All characters must be the same
+      - Optional info string (language identifier)
+    Args:
+        line: Line to detect
+    Returns:
+        CodeFenceInfo if valid opening fence marker, None otherwise
+    Examples:
+        >>> parse_code_fence_start("```")
+        CodeFenceInfo(char='`', length=3, ...)
+        >>> parse_code_fence_start("```go")
+        CodeFenceInfo(char='`', length=3, line="```go", ...)
+        >>> parse_code_fence_start("   ~~~python")
+        CodeFenceInfo(char='~', length=3, indent=3, ...)
+        >>> parse_code_fence_start("    ```")
+        None  # indent > 3
+        >>> parse_code_fence_start("``~")
+        None  # mixed characters
+    """
+    match = COMPILED_CODE_FENCE_START_REGEX.match(line)
+    if not match:
+        return None
+    # match.group(1) is the fence string (e.g., ```, ~~~~)
+    # match.group(2) is the info string (e.g., go, python)
+    fence_str = match.group(1)
+    # Validate all characters are the same (backticks or tildes)
+    if not validate_fence_characters(fence_str):
+        return None
+    # Calculate indent
+    indent = len(line) - len(line.lstrip(" "))
+    # Validate indent ≤ 3 (CommonMark specification)
+    if indent > 3:
+        return None
+    # Fence length
+    fence_length = len(fence_str)
+    # Validate fence length ≥ 3 (regex already ensures this, but check to be safe)
+    if fence_length < 3:
+        return None
+    return CodeFenceInfo(
+        char=fence_str[0],
+        length=fence_length,
+        indent=indent,
+        line=line,
+    )
+def is_code_fence_end(line: str, start_fence: CodeFenceInfo) -> bool:
+    """
+    Detect if line is a matching code block closing fence marker
+    CommonMark specification:
+      - Use same type of fence character (` or ~)
+      - Fence length ≥ opening fence
+      - 0-3 spaces indent
+      - Only contains fence characters and whitespace
+    Args:
+        line: Line to detect
+        start_fence: Opening fence information
+    Returns:
+        Whether line is a matching closing fence
+    Examples:
+        >>> start = CodeFenceInfo(char='`', length=3, indent=0, line="```")
+        >>> is_code_fence_end("```", start)
+        True
+        >>> is_code_fence_end("````", start)
+        True  # length ≥ opening fence
+        >>> is_code_fence_end("~~~", start)
+        False  # character type mismatch
+        >>> is_code_fence_end("``", start)
+        False  # length < opening fence
+        >>> is_code_fence_end("    ```", start)
+        False  # indent > 3
+    """
+    match = COMPILED_CODE_FENCE_END_REGEX.match(line)
+    if not match:
+        return False
+    # Extract indent
+    indent = len(line) - len(line.lstrip(" "))
+    # Validate indent ≤ 3
+    if indent > 3:
+        return False
+    # Extract fence string (remove indent and trailing whitespace)
+    fence_str = line.strip()
+    # Validate non-empty
+    if not fence_str:
+        return False
+    first_char = fence_str[0]
+    # Character type must match
+    if first_char != start_fence.char:
+        return False
+    # Calculate fence length (count consecutive same characters)
+    fence_length = 0
+    for ch in fence_str:
+        if ch == first_char:
+            fence_length += 1
+        else:
+            # Contains other characters, not a valid closing fence
+            return False
+    # Length must be ≥ opening fence
+    return fence_length >= start_fence.length

markdown_flow/parser/interaction.py ADDED Viewed

@@ -0,0 +1,354 @@
+"""
+Interaction Parser Module
+Provides three-layer interaction parsing for MarkdownFlow ?[] format validation,
+variable detection, and content parsing.
+"""
+from enum import Enum
+from typing import Any
+from ..constants import (
+    COMPILED_INTERACTION_REGEX,
+    COMPILED_LAYER1_INTERACTION_REGEX,
+    COMPILED_LAYER2_VARIABLE_REGEX,
+    COMPILED_LAYER3_ELLIPSIS_REGEX,
+    COMPILED_SINGLE_PIPE_SPLIT_REGEX,
+)
+class InteractionType(Enum):
+    """Interaction input type enumeration."""
+    TEXT_ONLY = "text_only"  # Text input only: ?[%{{var}}...question]
+    BUTTONS_ONLY = "buttons_only"  # Button selection only: ?[%{{var}} A|B]
+    BUTTONS_WITH_TEXT = "buttons_with_text"  # Buttons + text: ?[%{{var}} A|B|...question]
+    BUTTONS_MULTI_SELECT = "buttons_multi_select"  # Multi-select buttons: ?[%{{var}} A||B]
+    BUTTONS_MULTI_WITH_TEXT = "buttons_multi_with_text"  # Multi-select + text: ?[%{{var}} A||B||...question]
+    NON_ASSIGNMENT_BUTTON = "non_assignment_button"  # Display buttons: ?[Continue|Cancel]
+def extract_interaction_question(content: str) -> str | None:
+    """
+    Extract question text from interaction block content.
+    Args:
+        content: Raw interaction block content
+    Returns:
+        Question text if found, None otherwise
+    """
+    # Match interaction format: ?[...] using pre-compiled regex
+    match = COMPILED_INTERACTION_REGEX.match(content.strip())
+    if not match:
+        return None  # type: ignore[unreachable]
+    # Extract interaction content (remove ?[ and ])
+    interaction_content = match.group(1) if match.groups() else match.group(0)[2:-1]
+    # Find ... separator, question text follows
+    if "..." in interaction_content:
+        # Split and get question part
+        parts = interaction_content.split("...", 1)
+        if len(parts) > 1:
+            return parts[1].strip()
+    return None  # type: ignore[unreachable]
+class InteractionParser:
+    """
+    Three-layer interaction parser for ?[] format validation,
+    variable detection, and content parsing.
+    """
+    def __init__(self):
+        """Initialize parser."""
+    def parse(self, content: str) -> dict[str, Any]:
+        """
+        Main parsing method.
+        Args:
+            content: Raw interaction block content
+        Returns:
+            Standardized parsing result with type, variable, buttons, and question fields
+        """
+        try:
+            # Layer 1: Validate basic format
+            inner_content = self._layer1_validate_format(content)
+            if inner_content is None:
+                return self._create_error_result(f"Invalid interaction format: {content}")
+            # Layer 2: Variable detection and pattern classification
+            has_variable, variable_name, remaining_content = self._layer2_detect_variable(inner_content)
+            # Layer 3: Specific content parsing
+            if has_variable:
+                assert variable_name is not None, "variable_name should not be None when has_variable is True"
+                return self._layer3_parse_variable_interaction(variable_name, remaining_content)
+            return self._layer3_parse_display_buttons(inner_content)
+        except Exception as e:
+            return self._create_error_result(f"Parsing error: {str(e)}")
+    def _layer1_validate_format(self, content: str) -> str | None:
+        """
+        Layer 1: Validate ?[] format and extract content.
+        Args:
+            content: Raw content
+        Returns:
+            Extracted bracket content, None if validation fails
+        """
+        content = content.strip()
+        match = COMPILED_LAYER1_INTERACTION_REGEX.search(content)
+        if not match:
+            return None  # type: ignore[unreachable]
+        # Ensure matched content is complete (no other text)
+        matched_text = match.group(0)
+        if matched_text.strip() != content:
+            return None
+        return match.group(1)
+    def _layer2_detect_variable(self, inner_content: str) -> tuple[bool, str | None, str]:
+        """
+        Layer 2: Detect variables and classify patterns.
+        Args:
+            inner_content: Content extracted from layer 1
+        Returns:
+            Tuple of (has_variable, variable_name, remaining_content)
+        """
+        match = COMPILED_LAYER2_VARIABLE_REGEX.match(inner_content)
+        if not match:
+            # No variable, use entire content for display button parsing
+            return False, None, inner_content  # type: ignore[unreachable]
+        variable_name = match.group(1).strip()
+        remaining_content = match.group(2).strip()
+        return True, variable_name, remaining_content
+    def _layer3_parse_variable_interaction(self, variable_name: str, content: str) -> dict[str, Any]:
+        """
+        Layer 3: Parse variable interactions (variable assignment type).
+        Args:
+            variable_name: Variable name
+            content: Content after variable
+        Returns:
+            Parsing result dictionary
+        """
+        # Detect ... separator
+        ellipsis_match = COMPILED_LAYER3_ELLIPSIS_REGEX.match(content)
+        if ellipsis_match:
+            # Has ... separator
+            before_ellipsis = ellipsis_match.group(1).strip()
+            question = ellipsis_match.group(2).strip()
+            if before_ellipsis:
+                # Has prefix content (buttons or single option) + text input
+                buttons, is_multi_select = self._parse_buttons(before_ellipsis)
+                interaction_type = InteractionType.BUTTONS_MULTI_WITH_TEXT if is_multi_select else InteractionType.BUTTONS_WITH_TEXT
+                return {
+                    "type": interaction_type,
+                    "variable": variable_name,
+                    "buttons": buttons,
+                    "question": question,
+                    "is_multi_select": is_multi_select,
+                }
+            # Pure text input
+            return {
+                "type": InteractionType.TEXT_ONLY,
+                "variable": variable_name,
+                "question": question,
+                "is_multi_select": False,
+            }
+        # No ... separator
+        if ("|" in content or "||" in content) and content:  # type: ignore[unreachable]
+            # Pure button group
+            buttons, is_multi_select = self._parse_buttons(content)
+            interaction_type = InteractionType.BUTTONS_MULTI_SELECT if is_multi_select else InteractionType.BUTTONS_ONLY
+            return {
+                "type": interaction_type,
+                "variable": variable_name,
+                "buttons": buttons,
+                "is_multi_select": is_multi_select,
+            }
+        if content:  # type: ignore[unreachable]
+            # Single button
+            button = self._parse_single_button(content)
+            return {
+                "type": InteractionType.BUTTONS_ONLY,
+                "variable": variable_name,
+                "buttons": [button],
+                "is_multi_select": False,
+            }
+        # Pure text input (no hint)
+        return {
+            "type": InteractionType.TEXT_ONLY,
+            "variable": variable_name,
+            "question": "",
+            "is_multi_select": False,
+        }
+    def _layer3_parse_display_buttons(self, content: str) -> dict[str, Any]:
+        """
+        Layer 3: Parse display buttons (non-variable assignment type).
+        Args:
+            content: Content to parse
+        Returns:
+            Parsing result dictionary
+        """
+        if not content:
+            # Empty content: ?[]
+            return {
+                "type": InteractionType.NON_ASSIGNMENT_BUTTON,
+                "buttons": [{"display": "", "value": ""}],
+            }
+        if "|" in content:
+            # Multiple buttons
+            buttons, _ = self._parse_buttons(content)  # Display buttons don't use multi-select
+            return {"type": InteractionType.NON_ASSIGNMENT_BUTTON, "buttons": buttons}
+        # Single button
+        button = self._parse_single_button(content)
+        return {"type": InteractionType.NON_ASSIGNMENT_BUTTON, "buttons": [button]}
+    def _parse_buttons(self, content: str) -> tuple[list[dict[str, str]], bool]:
+        """
+        Parse button group with fault tolerance.
+        Args:
+            content: Button content separated by | or ||
+        Returns:
+            Tuple of (button list, is_multi_select)
+        """
+        if not content or not isinstance(content, str):
+            return [], False
+        _, is_multi_select = self._detect_separator_type(content)
+        buttons = []
+        try:
+            # Use different splitting logic based on separator type
+            if is_multi_select:
+                # Multi-select mode: split on ||, preserve single |
+                button_parts = content.split("||")
+            else:
+                # Single-select mode: split on single |, but preserve ||
+                # Use pre-compiled regex from constants
+                button_parts = COMPILED_SINGLE_PIPE_SPLIT_REGEX.split(content)
+            for button_text in button_parts:
+                button_text = button_text.strip()
+                if button_text:
+                    button = self._parse_single_button(button_text)
+                    buttons.append(button)
+        except (TypeError, ValueError):
+            # Fallback to treating entire content as single button
+            return [{"display": content.strip(), "value": content.strip()}], False
+        # For empty content (like just separators), return empty list
+        if not buttons and (content.strip() == "||" or content.strip() == "|"):
+            return [], is_multi_select
+        # Ensure at least one button exists (but only if there's actual content)
+        if not buttons and content.strip():
+            buttons = [{"display": content.strip(), "value": content.strip()}]
+        return buttons, is_multi_select
+    def _parse_single_button(self, button_text: str) -> dict[str, str]:
+        """
+        Parse single button with fault tolerance, supports Button//value format.
+        Args:
+            button_text: Button text
+        Returns:
+            Dictionary with display and value keys
+        """
+        if not button_text or not isinstance(button_text, str):
+            return {"display": "", "value": ""}
+        button_text = button_text.strip()
+        if not button_text:
+            return {"display": "", "value": ""}
+        try:
+            # Detect Button//value format - split only on first //
+            if "//" in button_text:
+                parts = button_text.split("//", 1)  # Split only on first //
+                display = parts[0].strip()
+                value = parts[1] if len(parts) > 1 else ""
+                # Don't strip value to preserve intentional spacing/formatting
+                return {"display": display, "value": value}
+        except (ValueError, IndexError):
+            # Fallback: use text as both display and value
+            pass
+        return {"display": button_text, "value": button_text}
+    def _detect_separator_type(self, content: str) -> tuple[str, bool]:
+        """
+        Detect separator type and whether it's multi-select.
+        Implements fault tolerance: first separator type encountered determines the behavior.
+        Mixed separators are handled by treating the rest as literal text.
+        Args:
+            content: Button content to analyze
+        Returns:
+            Tuple of (separator, is_multi_select) where separator is '|' or '||'
+        """
+        if not content or not isinstance(content, str):
+            return "|", False
+        # Find first occurrence of separators
+        single_pos = content.find("|")
+        double_pos = content.find("||")
+        # If no separators found
+        if single_pos == -1 and double_pos == -1:
+            return "|", False
+        # If only single separator found
+        if double_pos == -1:
+            return "|", False
+        # If only double separator found
+        if single_pos == -1:
+            return "||", True
+        # Both found - fault tolerance: first occurrence wins
+        # This handles mixed cases like "A||B|C" (multi-select) and "A|B||C" (single-select)
+        if double_pos <= single_pos:
+            return "||", True
+        return "|", False
+    def _create_error_result(self, error_message: str) -> dict[str, Any]:
+        """
+        Create error result.
+        Args:
+            error_message: Error message
+        Returns:
+            Error result dictionary
+        """
+        return {"type": None, "error": error_message}  # type: ignore[unreachable]

markdown-flow 0.2.10__py3-none-any.whl → 0.2.30__py3-none-any.whl

markdown-flow 0.2.10py3-none-any.whl → 0.2.30py3-none-any.whl