PyPI - markdown-flow - Versions diffs - 0.2.19__py3-none-any.whl → 0.2.30__py3-none-any.whl - Mend

markdown-flow 0.2.19py3-none-any.whl → 0.2.30py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

markdown_flow/__init__.py +4 -4
markdown_flow/constants.py +210 -99
markdown_flow/core.py +605 -209
markdown_flow/llm.py +4 -3
markdown_flow/models.py +1 -17
markdown_flow/parser/__init__.py +38 -0
markdown_flow/parser/code_fence_utils.py +190 -0
markdown_flow/parser/interaction.py +354 -0
markdown_flow/parser/json_parser.py +50 -0
markdown_flow/parser/output.py +215 -0
markdown_flow/parser/preprocessor.py +151 -0
markdown_flow/parser/validation.py +100 -0
markdown_flow/parser/variable.py +95 -0
markdown_flow/providers/__init__.py +16 -0
markdown_flow/providers/config.py +46 -0
markdown_flow/providers/openai.py +369 -0
markdown_flow/utils.py +43 -43
{markdown_flow-0.2.19.dist-info → markdown_flow-0.2.30.dist-info}/METADATA +45 -52
markdown_flow-0.2.30.dist-info/RECORD +24 -0
markdown_flow-0.2.19.dist-info/RECORD +0 -13
{markdown_flow-0.2.19.dist-info → markdown_flow-0.2.30.dist-info}/WHEEL +0 -0
{markdown_flow-0.2.19.dist-info → markdown_flow-0.2.30.dist-info}/licenses/LICENSE +0 -0
{markdown_flow-0.2.19.dist-info → markdown_flow-0.2.30.dist-info}/top_level.txt +0 -0

markdown_flow/core.py CHANGED Viewed

@@ -13,32 +13,30 @@ from typing import Any
 from .constants import (
     BLOCK_INDEX_OUT_OF_RANGE_ERROR,
     BLOCK_SEPARATOR,
-    BUTTONS_WITH_TEXT_VALIDATION_TEMPLATE,
-    COMPILED_BRACKETS_CLEANUP_REGEX,
-    COMPILED_INTERACTION_CONTENT_RECONSTRUCT_REGEX,
-    COMPILED_VARIABLE_REFERENCE_CLEANUP_REGEX,
-    COMPILED_WHITESPACE_CLEANUP_REGEX,
+    CONTEXT_BUTTON_OPTIONS_TEMPLATE,
+    CONTEXT_QUESTION_TEMPLATE,
+    DEFAULT_BASE_SYSTEM_PROMPT,
     DEFAULT_INTERACTION_ERROR_PROMPT,
     DEFAULT_INTERACTION_PROMPT,
-    DEFAULT_VALIDATION_SYSTEM_MESSAGE,
     INPUT_EMPTY_ERROR,
     INTERACTION_ERROR_RENDER_INSTRUCTIONS,
     INTERACTION_PARSE_ERROR,
     INTERACTION_PATTERN_NON_CAPTURING,
     INTERACTION_PATTERN_SPLIT,
-    INTERACTION_RENDER_INSTRUCTIONS,
     LLM_PROVIDER_REQUIRED_ERROR,
     OUTPUT_INSTRUCTION_EXPLANATION,
     UNSUPPORTED_PROMPT_TYPE_ERROR,
+    VALIDATION_REQUIREMENTS_TEMPLATE,
+    VALIDATION_TASK_TEMPLATE,
 )
 from .enums import BlockType
 from .exceptions import BlockIndexError
 from .llm import LLMProvider, LLMResult, ProcessMode
-from .models import Block, InteractionValidationConfig
-from .utils import (
+from .models import Block
+from .parser import (
+    CodeBlockPreprocessor,
     InteractionParser,
     InteractionType,
-    extract_interaction_question,
     extract_preserved_content,
     extract_variables_from_text,
     is_preserved_content_block,
@@ -57,51 +55,156 @@ class MarkdownFlow:
     _llm_provider: LLMProvider | None
     _document: str
+    _processed_document: str
     _document_prompt: str | None
     _interaction_prompt: str | None
     _interaction_error_prompt: str | None
+    _max_context_length: int
     _blocks: list[Block] | None
-    _interaction_configs: dict[int, InteractionValidationConfig]
+    _model: str | None
+    _temperature: float | None
+    _preprocessor: CodeBlockPreprocessor
     def __init__(
         self,
         document: str,
         llm_provider: LLMProvider | None = None,
+        base_system_prompt: str | None = None,
         document_prompt: str | None = None,
         interaction_prompt: str | None = None,
         interaction_error_prompt: str | None = None,
+        max_context_length: int = 0,
     ):
         """
         Initialize MarkdownFlow instance.
         Args:
             document: Markdown document content
-            llm_provider: LLM provider, if None only PROMPT_ONLY mode is available
+            llm_provider: LLM provider (required for COMPLETE and STREAM modes)
+            base_system_prompt: MarkdownFlow base system prompt (framework-level, content blocks only)
             document_prompt: Document-level system prompt
             interaction_prompt: Interaction content rendering prompt
             interaction_error_prompt: Interaction error rendering prompt
+            max_context_length: Maximum number of context messages to keep (0 = unlimited)
         """
         self._document = document
         self._llm_provider = llm_provider
+        self._base_system_prompt = base_system_prompt or DEFAULT_BASE_SYSTEM_PROMPT
         self._document_prompt = document_prompt
         self._interaction_prompt = interaction_prompt or DEFAULT_INTERACTION_PROMPT
         self._interaction_error_prompt = interaction_error_prompt or DEFAULT_INTERACTION_ERROR_PROMPT
+        self._max_context_length = max_context_length
         self._blocks = None
-        self._interaction_configs: dict[int, InteractionValidationConfig] = {}
+        self._model: str | None = None
+        self._temperature: float | None = None
+        # Preprocess document: extract code blocks and replace with placeholders
+        # This is done once during initialization, similar to Go implementation
+        self._preprocessor = CodeBlockPreprocessor()
+        self._processed_document = self._preprocessor.extract_code_blocks(document)
     def set_llm_provider(self, provider: LLMProvider) -> None:
         """Set LLM provider."""
         self._llm_provider = provider
+    def get_processed_document(self) -> str:
+        """
+        Get preprocessed document (for debugging and testing).
+        Returns the document content after code blocks have been replaced with placeholders.
+        Use cases:
+            - Verify that code block preprocessing was executed correctly
+            - Check placeholder format (__MDFLOW_CODE_BLOCK_N__)
+            - Debug preprocessing stage issues
+        Returns:
+            Preprocessed document string
+        """
+        return self._processed_document
+    def get_content_messages(
+        self,
+        block_index: int,
+        variables: dict[str, str | list[str]] | None,
+        context: list[dict[str, str]] | None = None,
+    ) -> list[dict[str, str]]:
+        """
+        Get content messages (for debugging and inspection).
+        Builds and returns the complete message list that will be sent to LLM.
+        Use cases:
+            - Debug: View actual content sent to LLM
+            - Verify: Check if code blocks are correctly restored
+            - Inspect: Verify variable replacement and prompt building logic
+            - Review: Confirm system/user message assembly results
+        Args:
+            block_index: Block index
+            variables: Variable mapping
+            context: Context message list
+        Returns:
+            List of message dictionaries
+        """
+        return self._build_content_messages(block_index, variables, context)
+    def set_model(self, model: str) -> "MarkdownFlow":
+        """
+        Set model name for this instance.
+        Args:
+            model: Model name to use
+        Returns:
+            Self for method chaining
+        """
+        self._model = model
+        return self
+    def set_temperature(self, temperature: float) -> "MarkdownFlow":
+        """
+        Set temperature for this instance.
+        Args:
+            temperature: Temperature value (typically 0.0-2.0)
+        Returns:
+            Self for method chaining
+        """
+        self._temperature = temperature
+        return self
+    def get_model(self) -> str | None:
+        """
+        Get model name for this instance.
+        Returns:
+            Model name if set, None otherwise
+        """
+        return self._model
+    def get_temperature(self) -> float | None:
+        """
+        Get temperature for this instance.
+        Returns:
+            Temperature value if set, None otherwise
+        """
+        return self._temperature
     def set_prompt(self, prompt_type: str, value: str | None) -> None:
         """
         Set prompt template.
         Args:
-            prompt_type: Prompt type ('document', 'interaction', 'interaction_error')
+            prompt_type: Prompt type ('base_system', 'document', 'interaction', 'interaction_error')
             value: Prompt content
         """
-        if prompt_type == "document":
+        if prompt_type == "base_system":
+            self._base_system_prompt = value or DEFAULT_BASE_SYSTEM_PROMPT
+        elif prompt_type == "document":
             self._document_prompt = value
         elif prompt_type == "interaction":
             self._interaction_prompt = value or DEFAULT_INTERACTION_PROMPT
@@ -110,6 +213,44 @@ class MarkdownFlow:
         else:
             raise ValueError(UNSUPPORTED_PROMPT_TYPE_ERROR.format(prompt_type=prompt_type))
+    def _truncate_context(
+        self,
+        context: list[dict[str, str]] | None,
+    ) -> list[dict[str, str]] | None:
+        """
+        Filter and truncate context to specified maximum length.
+        Processing steps:
+        1. Filter out messages with empty content (empty string or whitespace only)
+        2. Truncate to max_context_length if configured (0 = unlimited)
+        Args:
+            context: Original context list
+        Returns:
+            Filtered and truncated context. Returns None if no valid messages remain.
+        """
+        if not context:
+            return None
+        # Step 1: Filter out messages with empty or whitespace-only content
+        filtered_context = [msg for msg in context if msg.get("content", "").strip()]
+        # Return None if no valid messages remain after filtering
+        if not filtered_context:
+            return None
+        # Step 2: Truncate to max_context_length if configured
+        if self._max_context_length == 0:
+            # No limit, return all filtered messages
+            return filtered_context
+        # Keep the most recent N messages
+        if len(filtered_context) > self._max_context_length:
+            return filtered_context[-self._max_context_length :]
+        return filtered_context
     @property
     def document(self) -> str:
         """Get document content."""
@@ -125,8 +266,9 @@ class MarkdownFlow:
         if self._blocks is not None:
             return self._blocks
-        content = self._document.strip()
-        segments = re.split(BLOCK_SEPARATOR, content)
+        # Parse the preprocessed document (code blocks already replaced with placeholders)
+        # The preprocessing was done once during initialization
+        segments = re.split(BLOCK_SEPARATOR, self._processed_document)
         final_blocks: list[Block] = []
         for segment in segments:
@@ -167,14 +309,6 @@ class MarkdownFlow:
         """Extract all variable names from the document."""
         return extract_variables_from_text(self._document)
-    def set_interaction_validation_config(self, block_index: int, config: InteractionValidationConfig) -> None:
-        """Set validation config for specified interaction block."""
-        self._interaction_configs[block_index] = config
-    def get_interaction_validation_config(self, block_index: int) -> InteractionValidationConfig | None:
-        """Get validation config for specified interaction block."""
-        return self._interaction_configs.get(block_index)
     # Core unified interface
     def process(
@@ -198,6 +332,10 @@ class MarkdownFlow:
         Returns:
             LLMResult or Generator[LLMResult, None, None]
         """
+        # Process base_system_prompt variable replacement
+        if self._base_system_prompt:
+            self._base_system_prompt = replace_variables_in_text(self._base_system_prompt, variables or {})
         # Process document_prompt variable replacement
         if self._document_prompt:
             self._document_prompt = replace_variables_in_text(self._document_prompt, variables or {})
@@ -210,7 +348,7 @@ class MarkdownFlow:
         if block.block_type == BlockType.INTERACTION:
             if user_input is None:
                 # Render interaction content
-                return self._process_interaction_render(block_index, mode, variables)
+                return self._process_interaction_render(block_index, mode, context, variables)
             # Process user input
             return self._process_interaction_input(block_index, user_input, mode, context, variables)
@@ -231,17 +369,17 @@ class MarkdownFlow:
         variables: dict[str, str | list[str]] | None,
     ):
         """Process content block."""
-        # Build messages
-        messages = self._build_content_messages(block_index, variables)
+        # Truncate context to configured maximum length
+        truncated_context = self._truncate_context(context)
-        if mode == ProcessMode.PROMPT_ONLY:
-            return LLMResult(prompt=messages[-1]["content"], metadata={"messages": messages})
+        # Build messages with context
+        messages = self._build_content_messages(block_index, variables, truncated_context)
         if mode == ProcessMode.COMPLETE:
             if not self._llm_provider:
                 raise ValueError(LLM_PROVIDER_REQUIRED_ERROR)
-            content = self._llm_provider.complete(messages)
+            content = self._llm_provider.complete(messages, model=self._model, temperature=self._temperature)
             return LLMResult(content=content, prompt=messages[-1]["content"])
         if mode == ProcessMode.STREAM:
@@ -249,7 +387,7 @@ class MarkdownFlow:
                 raise ValueError(LLM_PROVIDER_REQUIRED_ERROR)
             def stream_generator():
-                for chunk in self._llm_provider.stream(messages):  # type: ignore[attr-defined]
+                for chunk in self._llm_provider.stream(messages, model=self._model, temperature=self._temperature):  # type: ignore[attr-defined]
                     yield LLMResult(content=chunk, prompt=messages[-1]["content"])
             return stream_generator()
@@ -264,9 +402,18 @@ class MarkdownFlow:
         # Replace variables
         content = replace_variables_in_text(content, variables or {})
+        # Restore code blocks (replace placeholders with original code blocks)
+        content = self._preprocessor.restore_code_blocks(content)
         return LLMResult(content=content)
-    def _process_interaction_render(self, block_index: int, mode: ProcessMode, variables: dict[str, str | list[str]] | None = None):
+    def _process_interaction_render(
+        self,
+        block_index: int,
+        mode: ProcessMode,
+        context: list[dict[str, str]] | None = None,
+        variables: dict[str, str | list[str]] | None = None,
+    ):
         """Process interaction content rendering."""
         block = self.get_block(block_index)
@@ -277,66 +424,92 @@ class MarkdownFlow:
         processed_block = copy(block)
         processed_block.content = processed_content
-        # Extract question text from processed content
-        question_text = extract_interaction_question(processed_block.content)
-        if not question_text:
-            # Unable to extract, return processed content
-            return LLMResult(content=processed_block.content)
-        # Build render messages
-        messages = self._build_interaction_render_messages(question_text)
+        # 提取可翻译内容（JSON 格式）
+        translatable_json, interaction_info = self._extract_translatable_content(processed_block.content)
+        if not interaction_info:
+            # 解析失败，返回原始内容
+            return LLMResult(
+                content=processed_block.content,
+                metadata={
+                    "block_type": "interaction",
+                    "block_index": block_index,
+                },
+            )
-        if mode == ProcessMode.PROMPT_ONLY:
+        # 如果没有可翻译内容，直接返回
+        if not translatable_json or translatable_json == "{}":
             return LLMResult(
-                prompt=messages[-1]["content"],
+                content=processed_block.content,
                 metadata={
-                    "original_content": processed_block.content,
-                    "question_text": question_text,
+                    "block_type": "interaction",
+                    "block_index": block_index,
                 },
             )
+        # 构建翻译消息
+        messages = self._build_translation_messages(translatable_json)
         if mode == ProcessMode.COMPLETE:
             if not self._llm_provider:
-                return LLMResult(content=processed_block.content)  # Fallback processing
+                return LLMResult(
+                    content=processed_block.content,
+                    metadata={
+                        "block_type": "interaction",
+                        "block_index": block_index,
+                    },
+                )
-            rendered_question = self._llm_provider.complete(messages)
-            rendered_content = self._reconstruct_interaction_content(processed_block.content, rendered_question)
+            # 调用 LLM 进行翻译
+            translated_json = self._llm_provider.complete(messages, model=self._model, temperature=self._temperature)
+            # 使用翻译结果重构交互内容
+            translated_content = self._reconstruct_with_translation(processed_block.content, translatable_json, translated_json, interaction_info)
             return LLMResult(
-                content=rendered_content,
+                content=translated_content,
                 prompt=messages[-1]["content"],
                 metadata={
-                    "original_question": question_text,
-                    "rendered_question": rendered_question,
+                    "block_type": "interaction",
+                    "block_index": block_index,
+                    "original_content": translatable_json,
+                    "translated_content": translated_json,
                 },
             )
         if mode == ProcessMode.STREAM:
             if not self._llm_provider:
-                # For interaction blocks, return reconstructed content (one-time output)
-                rendered_content = self._reconstruct_interaction_content(processed_block.content, question_text or "")
+                # 降级处理，返回处理后的内容
                 def stream_generator():
                     yield LLMResult(
-                        content=rendered_content,
+                        content=processed_block.content,
                         prompt=messages[-1]["content"],
+                        metadata={
+                            "block_type": "interaction",
+                            "block_index": block_index,
+                        },
                     )
                 return stream_generator()
-            # With LLM provider, collect full response then return once
+            # 有 LLM 提供者，收集完整响应后返回一次
             def stream_generator():
                 full_response = ""
-                for chunk in self._llm_provider.stream(messages):  # type: ignore[attr-defined]
+                for chunk in self._llm_provider.stream(messages, model=self._model, temperature=self._temperature):  # type: ignore[attr-defined]
                     full_response += chunk
-                # Reconstruct final interaction content
-                rendered_content = self._reconstruct_interaction_content(processed_block.content, full_response)
+                # 使用翻译结果重构交互内容
+                translated_content = self._reconstruct_with_translation(processed_block.content, translatable_json, full_response, interaction_info)
-                # Return complete content at once, not incrementally
+                # 一次性返回完整内容（不是增量）
                 yield LLMResult(
-                    content=rendered_content,
+                    content=translated_content,
                     prompt=messages[-1]["content"],
+                    metadata={
+                        "block_type": "interaction",
+                        "block_index": block_index,
+                        "original_content": translatable_json,
+                        "translated_content": full_response,
+                    },
                 )
             return stream_generator()
@@ -356,7 +529,7 @@ class MarkdownFlow:
         # Basic validation
         if not user_input or not any(values for values in user_input.values()):
             error_msg = INPUT_EMPTY_ERROR
-            return self._render_error(error_msg, mode)
+            return self._render_error(error_msg, mode, context)
         # Get the target variable value from user_input
         target_values = user_input.get(target_variable, [])
@@ -370,24 +543,98 @@ class MarkdownFlow:
         if "error" in parse_result:
             error_msg = INTERACTION_PARSE_ERROR.format(error=parse_result["error"])
-            return self._render_error(error_msg, mode)
+            return self._render_error(error_msg, mode, context)
         interaction_type = parse_result.get("type")
         # Process user input based on interaction type
         if interaction_type in [
-            InteractionType.BUTTONS_ONLY,
             InteractionType.BUTTONS_WITH_TEXT,
-            InteractionType.BUTTONS_MULTI_SELECT,
             InteractionType.BUTTONS_MULTI_WITH_TEXT,
         ]:
-            # All button types: validate user input against available buttons
+            # Buttons with text input: smart validation (match buttons first, then LLM validate custom text)
+            buttons = parse_result.get("buttons", [])
+            # Step 1: Match button values
+            matched_values, unmatched_values = self._match_button_values(buttons, target_values)
+            # Step 2: If there are unmatched values (custom text), validate with LLM
+            if unmatched_values:
+                # Create user_input for LLM validation (only custom text)
+                custom_input = {target_variable: unmatched_values}
+                validation_result = self._process_llm_validation(
+                    block_index=block_index,
+                    user_input=custom_input,
+                    target_variable=target_variable,
+                    mode=mode,
+                    context=context,
+                )
+                # Handle validation result based on mode
+                if mode == ProcessMode.COMPLETE:
+                    # Check if validation passed
+                    if isinstance(validation_result, LLMResult) and validation_result.variables:
+                        validated_values = validation_result.variables.get(target_variable, [])
+                        # Merge matched button values + validated custom text
+                        all_values = matched_values + validated_values
+                        return LLMResult(
+                            content="",
+                            variables={target_variable: all_values},
+                            metadata={
+                                "interaction_type": str(interaction_type),
+                                "matched_button_values": matched_values,
+                                "validated_custom_values": validated_values,
+                            },
+                        )
+                    # Validation failed, return error
+                    return validation_result
+                if mode == ProcessMode.STREAM:
+                    # For stream mode, collect validation result
+                    def stream_merge_generator():
+                        # Consume the validation stream
+                        for result in validation_result:  # type: ignore[attr-defined]
+                            if isinstance(result, LLMResult) and result.variables:
+                                validated_values = result.variables.get(target_variable, [])
+                                all_values = matched_values + validated_values
+                                yield LLMResult(
+                                    content="",
+                                    variables={target_variable: all_values},
+                                    metadata={
+                                        "interaction_type": str(interaction_type),
+                                        "matched_button_values": matched_values,
+                                        "validated_custom_values": validated_values,
+                                    },
+                                )
+                            else:
+                                # Validation failed
+                                yield result
+                    return stream_merge_generator()
+            else:
+                # All values matched buttons, return directly
+                return LLMResult(
+                    content="",
+                    variables={target_variable: matched_values},
+                    metadata={
+                        "interaction_type": str(interaction_type),
+                        "all_matched_buttons": True,
+                    },
+                )
+        if interaction_type in [
+            InteractionType.BUTTONS_ONLY,
+            InteractionType.BUTTONS_MULTI_SELECT,
+        ]:
+            # Pure button types: only basic button validation (no LLM)
             return self._process_button_validation(
                 parse_result,
                 target_values,
                 target_variable,
                 mode,
                 interaction_type,
+                context,
             )
         if interaction_type == InteractionType.NON_ASSIGNMENT_BUTTON:
@@ -403,19 +650,50 @@ class MarkdownFlow:
             )
         # Text-only input type: ?[%{{sys_user_nickname}}...question]
-        # For text-only inputs, directly use the target variable values
+        # Use LLM validation to check if input is relevant to the question
         if target_values:
-            return LLMResult(
-                content="",
-                variables={target_variable: target_values},
-                metadata={
-                    "interaction_type": "text_only",
-                    "target_variable": target_variable,
-                    "values": target_values,
-                },
+            return self._process_llm_validation(
+                block_index=block_index,
+                user_input=user_input,
+                target_variable=target_variable,
+                mode=mode,
+                context=context,
             )
         error_msg = f"No input provided for variable '{target_variable}'"
-        return self._render_error(error_msg, mode)
+        return self._render_error(error_msg, mode, context)
+    def _match_button_values(
+        self,
+        buttons: list[dict[str, str]],
+        target_values: list[str],
+    ) -> tuple[list[str], list[str]]:
+        """
+        Match user input values against button options.
+        Args:
+            buttons: List of button dictionaries with 'display' and 'value' keys
+            target_values: User input values to match
+        Returns:
+            Tuple of (matched_values, unmatched_values)
+            - matched_values: Values that match button options (using button value)
+            - unmatched_values: Values that don't match any button
+        """
+        matched_values = []
+        unmatched_values = []
+        for value in target_values:
+            matched = False
+            for button in buttons:
+                if value in [button["display"], button["value"]]:
+                    matched_values.append(button["value"])  # Use button value
+                    matched = True
+                    break
+            if not matched:
+                unmatched_values.append(value)
+        return matched_values, unmatched_values
     def _process_button_validation(
         self,
@@ -424,6 +702,7 @@ class MarkdownFlow:
         target_variable: str,
         mode: ProcessMode,
         interaction_type: InteractionType,
+        context: list[dict[str, str]] | None = None,
     ) -> LLMResult | Generator[LLMResult, None, None]:
         """
         Simplified button validation with new input format.
@@ -434,6 +713,7 @@ class MarkdownFlow:
             target_variable: Target variable name
             mode: Processing mode
             interaction_type: Type of interaction
+            context: Conversation history context (optional)
         """
         buttons = parse_result.get("buttons", [])
         is_multi_select = interaction_type in [
@@ -459,7 +739,7 @@ class MarkdownFlow:
             # Pure button mode requires input
             button_displays = [btn["display"] for btn in buttons]
             error_msg = f"Please select from: {', '.join(button_displays)}"
-            return self._render_error(error_msg, mode)
+            return self._render_error(error_msg, mode, context)
         # Validate input values against available buttons
         valid_values = []
@@ -484,7 +764,7 @@ class MarkdownFlow:
         if invalid_values and not allow_text_input:
             button_displays = [btn["display"] for btn in buttons]
             error_msg = f"Invalid options: {', '.join(invalid_values)}. Please select from: {', '.join(button_displays)}"
-            return self._render_error(error_msg, mode)
+            return self._render_error(error_msg, mode, context)
         # Success: return validated values
         return LLMResult(
@@ -505,26 +785,18 @@ class MarkdownFlow:
         user_input: dict[str, list[str]],
         target_variable: str,
         mode: ProcessMode,
+        context: list[dict[str, str]] | None = None,
     ) -> LLMResult | Generator[LLMResult, None, None]:
         """Process LLM validation."""
         # Build validation messages
-        messages = self._build_validation_messages(block_index, user_input, target_variable)
-        if mode == ProcessMode.PROMPT_ONLY:
-            return LLMResult(
-                prompt=messages[-1]["content"],
-                metadata={
-                    "validation_target": user_input,
-                    "target_variable": target_variable,
-                },
-            )
+        messages = self._build_validation_messages(block_index, user_input, target_variable, context)
         if mode == ProcessMode.COMPLETE:
             if not self._llm_provider:
                 # Fallback processing, return variables directly
                 return LLMResult(content="", variables=user_input)  # type: ignore[arg-type]
-            llm_response = self._llm_provider.complete(messages)
+            llm_response = self._llm_provider.complete(messages, model=self._model, temperature=self._temperature)
             # Parse validation response and convert to LLMResult
             # Use joined target values for fallback; avoids JSON string injection
@@ -538,7 +810,7 @@ class MarkdownFlow:
             def stream_generator():
                 full_response = ""
-                for chunk in self._llm_provider.stream(messages):  # type: ignore[attr-defined]
+                for chunk in self._llm_provider.stream(messages, model=self._model, temperature=self._temperature):  # type: ignore[attr-defined]
                     full_response += chunk
                 # Parse complete response and convert to LLMResult
@@ -562,26 +834,15 @@ class MarkdownFlow:
         mode: ProcessMode,
     ) -> LLMResult | Generator[LLMResult, None, None]:
         """Process LLM validation with button options (third case)."""
-        # Build special validation messages containing button option information
-        messages = self._build_validation_messages_with_options(user_input, target_variable, options, question)
-        if mode == ProcessMode.PROMPT_ONLY:
-            return LLMResult(
-                prompt=messages[-1]["content"],
-                metadata={
-                    "validation_target": user_input,
-                    "target_variable": target_variable,
-                    "options": options,
-                    "question": question,
-                },
-            )
+        # Use unified validation message builder (button context will be included automatically)
+        messages = self._build_validation_messages(block_index, user_input, target_variable, context=None)
         if mode == ProcessMode.COMPLETE:
             if not self._llm_provider:
                 # Fallback processing, return variables directly
                 return LLMResult(content="", variables=user_input)  # type: ignore[arg-type]
-            llm_response = self._llm_provider.complete(messages)
+            llm_response = self._llm_provider.complete(messages, model=self._model, temperature=self._temperature)
             # Parse validation response and convert to LLMResult
             # Use joined target values for fallback; avoids JSON string injection
@@ -595,7 +856,7 @@ class MarkdownFlow:
             def stream_generator():
                 full_response = ""
-                for chunk in self._llm_provider.stream(messages):  # type: ignore[attr-defined]
+                for chunk in self._llm_provider.stream(messages, model=self._model, temperature=self._temperature):  # type: ignore[attr-defined]
                     full_response += chunk
                     # For validation scenario, don't output chunks in real-time, only final result
@@ -612,21 +873,24 @@ class MarkdownFlow:
             return stream_generator()
-    def _render_error(self, error_message: str, mode: ProcessMode) -> LLMResult | Generator[LLMResult, None, None]:
+    def _render_error(
+        self,
+        error_message: str,
+        mode: ProcessMode,
+        context: list[dict[str, str]] | None = None,
+    ) -> LLMResult | Generator[LLMResult, None, None]:
         """Render user-friendly error message."""
-        messages = self._build_error_render_messages(error_message)
+        # Truncate context to configured maximum length
+        truncated_context = self._truncate_context(context)
-        if mode == ProcessMode.PROMPT_ONLY:
-            return LLMResult(
-                prompt=messages[-1]["content"],
-                metadata={"original_error": error_message},
-            )
+        # Build error messages with context
+        messages = self._build_error_render_messages(error_message, truncated_context)
         if mode == ProcessMode.COMPLETE:
             if not self._llm_provider:
                 return LLMResult(content=error_message)  # Fallback processing
-            friendly_error = self._llm_provider.complete(messages)
+            friendly_error = self._llm_provider.complete(messages, model=self._model, temperature=self._temperature)
             return LLMResult(content=friendly_error, prompt=messages[-1]["content"])
         if mode == ProcessMode.STREAM:
@@ -634,7 +898,7 @@ class MarkdownFlow:
                 return LLMResult(content=error_message)
             def stream_generator():
-                for chunk in self._llm_provider.stream(messages):  # type: ignore[attr-defined]
+                for chunk in self._llm_provider.stream(messages, model=self._model, temperature=self._temperature):  # type: ignore[attr-defined]
                     yield LLMResult(content=chunk, prompt=messages[-1]["content"])
             return stream_generator()
@@ -645,6 +909,7 @@ class MarkdownFlow:
         self,
         block_index: int,
         variables: dict[str, str | list[str]] | None,
+        context: list[dict[str, str]] | None = None,
     ) -> list[dict[str, str]]:
         """Build content block messages."""
         block = self.get_block(block_index)
@@ -657,120 +922,261 @@ class MarkdownFlow:
         # Replace variables
         block_content = replace_variables_in_text(block_content, variables or {})
+        # Restore code blocks (让 LLM 看到真实的代码块内容)
+        # Code block preprocessing is to prevent the parser from misinterpreting
+        # MarkdownFlow syntax inside code blocks, but the LLM needs to see
+        # the real content to correctly understand and generate responses
+        block_content = self._preprocessor.restore_code_blocks(block_content)
         # Build message array
         messages = []
-        # Conditionally add system prompts
+        # Build system message with XML tags
+        # Priority order: preserve_or_translate_instruction > base_system > document_prompt
+        system_parts = []
+        # 1. Output instruction (highest priority - if preserved content exists)
+        # Note: OUTPUT_INSTRUCTION_EXPLANATION already contains <preserve_or_translate_instruction> tags
+        if has_preserved_content:
+            system_parts.append(OUTPUT_INSTRUCTION_EXPLANATION.strip())
+        # 2. Base system prompt (if exists and non-empty)
+        if self._base_system_prompt:
+            system_parts.append(f"<base_system>\n{self._base_system_prompt}\n</base_system>")
+        # 3. Document prompt (if exists and non-empty)
         if self._document_prompt:
-            system_msg = self._document_prompt
-            # Only add output instruction explanation when preserved content detected
-            if has_preserved_content:
-                system_msg += "\n\n" + OUTPUT_INSTRUCTION_EXPLANATION.strip()
+            system_parts.append(f"<document_prompt>\n{self._document_prompt}\n</document_prompt>")
+        # Combine all parts and add as system message
+        if system_parts:
+            system_msg = "\n\n".join(system_parts)
             messages.append({"role": "system", "content": system_msg})
-        elif has_preserved_content:
-            # No document prompt but has preserved content, add explanation alone
-            messages.append({"role": "system", "content": OUTPUT_INSTRUCTION_EXPLANATION.strip()})
-        # For most content blocks, historical conversation context is not needed
-        # because each document block is an independent instruction
-        # If future specific scenarios need context, logic can be added here
-        # if context:
-        #     messages.extend(context)
+        # Add conversation history context if provided
+        # Context is inserted after system message and before current user message
+        truncated_context = self._truncate_context(context)
+        if truncated_context:
+            messages.extend(truncated_context)
         # Add processed content as user message (as instruction to LLM)
         messages.append({"role": "user", "content": block_content})
         return messages
-    def _build_interaction_render_messages(self, question_text: str) -> list[dict[str, str]]:
-        """Build interaction rendering messages."""
-        # Check if using custom interaction prompt
-        if self._interaction_prompt != DEFAULT_INTERACTION_PROMPT:
-            # User custom prompt + mandatory direction protection
-            render_prompt = f"""{self._interaction_prompt}"""
-        else:
-            # Use default prompt and instructions
-            render_prompt = f"""{self._interaction_prompt}
-{INTERACTION_RENDER_INSTRUCTIONS}"""
+    def _extract_translatable_content(self, interaction_content: str) -> tuple[str, dict[str, Any] | None]:
+        """提取交互内容中需要翻译的部分为 JSON 格式
-        messages = []
+        Args:
+            interaction_content: 交互内容字符串
-        messages.append({"role": "system", "content": render_prompt})
-        messages.append({"role": "user", "content": question_text})
+        Returns:
+            tuple: (JSON 字符串, InteractionInfo 字典)
+        """
+        # 解析交互内容
+        interaction_parser = InteractionParser()
+        interaction_info = interaction_parser.parse(interaction_content)
+        if not interaction_info:
+            return "{}", None
-        return messages
+        translatable = {}
-    def _build_validation_messages(self, block_index: int, user_input: dict[str, list[str]], target_variable: str) -> list[dict[str, str]]:
-        """Build validation messages."""
-        block = self.get_block(block_index)
-        config = self.get_interaction_validation_config(block_index)
-        if config and config.validation_template:
-            # Use custom validation template
-            validation_prompt = config.validation_template
-            user_input_str = json.dumps(user_input, ensure_ascii=False)
-            validation_prompt = validation_prompt.replace("{sys_user_input}", user_input_str)
-            validation_prompt = validation_prompt.replace("{block_content}", block.content)
-            validation_prompt = validation_prompt.replace("{target_variable}", target_variable)
-            system_message = DEFAULT_VALIDATION_SYSTEM_MESSAGE
-        else:
-            # Use smart default validation template
-            from .utils import (
-                extract_interaction_question,
-                generate_smart_validation_template,
-            )
+        # 提取按钮的 Display 文本
+        if interaction_info.get("buttons"):
+            button_texts = [btn["display"] for btn in interaction_info["buttons"]]
+            translatable["buttons"] = button_texts
-            # Extract interaction question
-            interaction_question = extract_interaction_question(block.content)
+        # 提取问题文本
+        if interaction_info.get("question"):
+            translatable["question"] = interaction_info["question"]
-            # Generate smart validation template
-            validation_template = generate_smart_validation_template(
-                target_variable,
-                context=None,  # Could consider passing context here
-                interaction_question=interaction_question,
-            )
+        # 转换为 JSON
+        import json
+        json_str = json.dumps(translatable, ensure_ascii=False)
-            # Replace template variables
-            user_input_str = json.dumps(user_input, ensure_ascii=False)
-            validation_prompt = validation_template.replace("{sys_user_input}", user_input_str)
-            validation_prompt = validation_prompt.replace("{block_content}", block.content)
-            validation_prompt = validation_prompt.replace("{target_variable}", target_variable)
-            system_message = DEFAULT_VALIDATION_SYSTEM_MESSAGE
+        return json_str, interaction_info
+    def _build_translation_messages(self, translatable_json: str) -> list[dict[str, str]]:
+        """构建翻译用的消息列表
+        Args:
+            translatable_json: 可翻译内容的 JSON 字符串
+        Returns:
+            list: 消息列表
+        """
         messages = []
-        messages.append({"role": "system", "content": system_message})
-        messages.append({"role": "user", "content": validation_prompt})
+        # 构建 system message：交互翻译提示词 + 文档提示词（XML 格式）
+        # interaction_prompt: 定义翻译规则和 JSON 格式要求（包含 <interaction_translation_rules> 标签）
+        # document_prompt: 提供语言指令（如"使用英语输出"），包装在 <document_context> 标签中供 LLM 检测
+        system_content = self._interaction_prompt
+        if self._document_prompt:
+            # 将文档提示词包装在 <document_context> 标签中
+            system_content = f"{self._interaction_prompt}\n\n<document_context>\n{self._document_prompt}\n</document_context>"
+        messages.append({"role": "system", "content": system_content})
+        # 添加可翻译内容作为 user message
+        messages.append({"role": "user", "content": translatable_json})
         return messages
-    def _build_validation_messages_with_options(
+    def _reconstruct_with_translation(
         self,
+        original_content: str,
+        original_json: str,
+        translated_json: str,
+        interaction_info: dict[str, Any],
+    ) -> str:
+        """使用翻译后的内容重构交互块
+        Args:
+            original_content: 原始交互内容
+            original_json: 原始的可翻译 JSON（翻译前）
+            translated_json: 翻译后的 JSON 字符串
+            interaction_info: 交互信息字典
+        Returns:
+            str: 重构后的交互内容
+        """
+        import json
+        # 解析原始 JSON
+        try:
+            original = json.loads(original_json)
+        except json.JSONDecodeError:
+            return original_content
+        # 解析翻译后的 JSON
+        try:
+            translated = json.loads(translated_json)
+        except json.JSONDecodeError:
+            return original_content
+        reconstructed = original_content
+        # 替换按钮 Display 文本（智能处理 Value）
+        if "buttons" in translated and interaction_info.get("buttons"):
+            for i, button in enumerate(interaction_info["buttons"]):
+                if i < len(translated["buttons"]):
+                    old_display = button["display"]
+                    new_display = translated["buttons"][i]
+                    # 检测是否发生了翻译
+                    translation_happened = False
+                    if "buttons" in original and i < len(original["buttons"]):
+                        if original["buttons"][i] != new_display:
+                            translation_happened = True
+                    # 如果有 Value 分离（display//value 格式），保留 value
+                    if button["display"] != button["value"]:
+                        # 已有 value 分离，按原逻辑处理
+                        # 替换格式：oldDisplay//value -> newDisplay//value
+                        old_pattern = f"{old_display}//{button['value']}"
+                        new_pattern = f"{new_display}//{button['value']}"
+                        reconstructed = reconstructed.replace(old_pattern, new_pattern, 1)
+                    elif translation_happened:
+                        # 没有 value 分离，但发生了翻译
+                        # 自动添加 value：翻译后//原始
+                        old_pattern = old_display
+                        new_pattern = f"{new_display}//{old_display}"
+                        reconstructed = reconstructed.replace(old_pattern, new_pattern, 1)
+                    else:
+                        # 没有翻译，保持原样
+                        reconstructed = reconstructed.replace(old_display, new_display, 1)
+        # 替换问题文本
+        if "question" in translated and interaction_info.get("question"):
+            old_question = interaction_info["question"]
+            new_question = translated["question"]
+            reconstructed = reconstructed.replace(f"...{old_question}", f"...{new_question}", 1)
+        return reconstructed
+    def _build_validation_messages(
+        self,
+        block_index: int,
         user_input: dict[str, list[str]],
         target_variable: str,
-        options: list[str],
-        question: str,
+        context: list[dict[str, str]] | None = None,
     ) -> list[dict[str, str]]:
-        """Build validation messages with button options (third case)."""
-        # Use validation template from constants
-        user_input_str = json.dumps(user_input, ensure_ascii=False)
-        validation_prompt = BUTTONS_WITH_TEXT_VALIDATION_TEMPLATE.format(
-            question=question,
-            options=", ".join(options),
-            user_input=user_input_str,
-            target_variable=target_variable,
-        )
+        """
+        Build validation messages with new structure.
-        messages = []
+        System message contains:
+        - VALIDATION_TASK_TEMPLATE (includes task description and output language rules)
+        - Question context (if exists)
+        - Button options context (if exists)
+        - VALIDATION_REQUIREMENTS_TEMPLATE
+        - document_prompt wrapped in <document_context> tags (if exists)
+        User message contains:
+        - User input only
+        """
+        from .parser import InteractionParser, extract_interaction_question
+        block = self.get_block(block_index)
+        # Extract user input values for target variable
+        target_values = user_input.get(target_variable, [])
+        user_input_str = ", ".join(target_values) if target_values else ""
+        # Build System Message (contains all validation rules and context)
+        # VALIDATION_TASK_TEMPLATE already includes system message, directly replace variables
+        task_template = VALIDATION_TASK_TEMPLATE.replace("{target_variable}", target_variable)
+        system_parts = [task_template]
+        # Extract interaction question
+        interaction_question = extract_interaction_question(block.content)
+        # Add question context (if exists)
+        if interaction_question:
+            question_context = CONTEXT_QUESTION_TEMPLATE.format(question=interaction_question)
+            system_parts.append("")
+            system_parts.append(question_context)
+        # Parse interaction to extract button information
+        parser = InteractionParser()
+        parse_result = parser.parse(block.content)
+        buttons = parse_result.get("buttons") if "buttons" in parse_result else None
+        # Add button options context (if exists)
+        if buttons:
+            button_displays = [btn.get("display", "") for btn in buttons if btn.get("display")]
+            if button_displays:
+                button_options = "、".join(button_displays)
+                button_context = CONTEXT_BUTTON_OPTIONS_TEMPLATE.format(button_options=button_options)
+                system_parts.append("")
+                system_parts.append(button_context)
+        # Add extraction requirements (using template)
+        system_parts.append("")
+        system_parts.append(VALIDATION_REQUIREMENTS_TEMPLATE)
+        # Add document_prompt (if exists)
         if self._document_prompt:
-            messages.append({"role": "system", "content": self._document_prompt})
+            system_parts.append("")
+            system_parts.append("<document_context>")
+            system_parts.append(self._document_prompt)
+            system_parts.append("</document_context>")
+        system_content = "\n".join(system_parts)
-        messages.append({"role": "system", "content": DEFAULT_VALIDATION_SYSTEM_MESSAGE})
-        messages.append({"role": "user", "content": validation_prompt})
+        # Build message list
+        messages = [
+            {"role": "system", "content": system_content},
+            {"role": "user", "content": user_input_str},  # Only user input
+        ]
         return messages
-    def _build_error_render_messages(self, error_message: str) -> list[dict[str, str]]:
+    def _build_error_render_messages(
+        self,
+        error_message: str,
+        context: list[dict[str, str]] | None = None,
+    ) -> list[dict[str, str]]:
         """Build error rendering messages."""
         render_prompt = f"""{self._interaction_error_prompt}
@@ -783,24 +1189,14 @@ Original Error: {error_message}
             messages.append({"role": "system", "content": self._document_prompt})
         messages.append({"role": "system", "content": render_prompt})
+        # Add conversation history context if provided
+        truncated_context = self._truncate_context(context)
+        if truncated_context:
+            messages.extend(truncated_context)
         messages.append({"role": "user", "content": error_message})
         return messages
     # Helper methods
-    def _reconstruct_interaction_content(self, original_content: str, rendered_question: str) -> str:
-        """Reconstruct interaction content."""
-        cleaned_question = rendered_question.strip()
-        # Use pre-compiled regex for improved performance
-        cleaned_question = COMPILED_BRACKETS_CLEANUP_REGEX.sub("", cleaned_question)
-        cleaned_question = COMPILED_VARIABLE_REFERENCE_CLEANUP_REGEX.sub("", cleaned_question)
-        cleaned_question = COMPILED_WHITESPACE_CLEANUP_REGEX.sub(" ", cleaned_question).strip()
-        match = COMPILED_INTERACTION_CONTENT_RECONSTRUCT_REGEX.search(original_content)
-        if match:
-            prefix = match.group(1)
-            suffix = match.group(2)
-            return f"{prefix}{cleaned_question}{suffix}"
-        return original_content  # type: ignore[unreachable]

markdown-flow 0.2.19__py3-none-any.whl → 0.2.30__py3-none-any.whl

markdown-flow 0.2.19py3-none-any.whl → 0.2.30py3-none-any.whl