PyPI - flowllm - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

flowllm 0.1.1py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

flowllm/__init__.py +15 -6
flowllm/app.py +4 -14
flowllm/client/__init__.py +25 -0
flowllm/client/async_http_client.py +81 -0
flowllm/client/http_client.py +81 -0
flowllm/client/mcp_client.py +133 -0
flowllm/client/sync_mcp_client.py +116 -0
flowllm/config/__init__.py +1 -0
flowllm/config/{default_config.yaml → default.yaml} +3 -8
flowllm/config/empty.yaml +37 -0
flowllm/config/pydantic_config_parser.py +17 -17
flowllm/context/base_context.py +27 -7
flowllm/context/flow_context.py +6 -18
flowllm/context/registry.py +5 -1
flowllm/context/service_context.py +81 -37
flowllm/embedding_model/__init__.py +1 -1
flowllm/embedding_model/base_embedding_model.py +91 -0
flowllm/embedding_model/openai_compatible_embedding_model.py +63 -5
flowllm/flow/__init__.py +1 -0
flowllm/flow/base_flow.py +72 -0
flowllm/flow/base_tool_flow.py +15 -0
flowllm/flow/gallery/__init__.py +8 -0
flowllm/flow/gallery/cmd_flow.py +11 -0
flowllm/flow/gallery/code_tool_flow.py +30 -0
flowllm/flow/gallery/dashscope_search_tool_flow.py +34 -0
flowllm/flow/gallery/deepsearch_tool_flow.py +39 -0
flowllm/flow/gallery/expression_tool_flow.py +18 -0
flowllm/flow/gallery/mock_tool_flow.py +67 -0
flowllm/flow/gallery/tavily_search_tool_flow.py +30 -0
flowllm/flow/gallery/terminate_tool_flow.py +30 -0
flowllm/flow/parser/__init__.py +0 -0
flowllm/{flow_engine/simple_flow_engine.py → flow/parser/expression_parser.py} +25 -67
flowllm/llm/__init__.py +2 -1
flowllm/llm/base_llm.py +94 -4
flowllm/llm/litellm_llm.py +455 -0
flowllm/llm/openai_compatible_llm.py +205 -5
flowllm/op/__init__.py +11 -3
flowllm/op/agent/__init__.py +0 -0
flowllm/op/agent/react_op.py +83 -0
flowllm/op/agent/react_prompt.yaml +28 -0
flowllm/op/akshare/__init__.py +3 -0
flowllm/op/akshare/get_ak_a_code_op.py +14 -22
flowllm/op/akshare/get_ak_a_info_op.py +17 -20
flowllm/op/{llm_base_op.py → base_llm_op.py} +6 -5
flowllm/op/base_op.py +14 -35
flowllm/op/base_ray_op.py +313 -0
flowllm/op/code/__init__.py +1 -0
flowllm/op/code/execute_code_op.py +42 -0
flowllm/op/gallery/__init__.py +2 -0
flowllm/op/{mock_op.py → gallery/mock_op.py} +4 -4
flowllm/op/gallery/terminate_op.py +29 -0
flowllm/op/parallel_op.py +2 -9
flowllm/op/search/__init__.py +3 -0
flowllm/op/search/dashscope_deep_research_op.py +260 -0
flowllm/op/search/dashscope_search_op.py +179 -0
flowllm/op/search/dashscope_search_prompt.yaml +13 -0
flowllm/op/search/tavily_search_op.py +102 -0
flowllm/op/sequential_op.py +1 -9
flowllm/schema/flow_request.py +12 -0
flowllm/schema/service_config.py +12 -16
flowllm/schema/tool_call.py +13 -5
flowllm/schema/vector_node.py +1 -0
flowllm/service/__init__.py +3 -2
flowllm/service/base_service.py +50 -41
flowllm/service/cmd_service.py +15 -0
flowllm/service/http_service.py +34 -42
flowllm/service/mcp_service.py +13 -11
flowllm/storage/cache/__init__.py +1 -0
flowllm/storage/cache/cache_data_handler.py +104 -0
flowllm/{utils/dataframe_cache.py → storage/cache/data_cache.py} +136 -92
flowllm/storage/vector_store/__init__.py +3 -3
flowllm/storage/vector_store/es_vector_store.py +1 -2
flowllm/storage/vector_store/local_vector_store.py +0 -1
flowllm/utils/common_utils.py +9 -21
flowllm/utils/fetch_url.py +16 -12
flowllm/utils/llm_utils.py +28 -0
flowllm/utils/ridge_v2.py +54 -0
{flowllm-0.1.1.dist-info → flowllm-0.1.2.dist-info}/METADATA +43 -390
flowllm-0.1.2.dist-info/RECORD +99 -0
flowllm-0.1.2.dist-info/entry_points.txt +2 -0
flowllm/flow_engine/__init__.py +0 -1
flowllm/flow_engine/base_flow_engine.py +0 -34
flowllm-0.1.1.dist-info/RECORD +0 -62
flowllm-0.1.1.dist-info/entry_points.txt +0 -4
{flowllm-0.1.1.dist-info → flowllm-0.1.2.dist-info}/WHEEL +0 -0
{flowllm-0.1.1.dist-info → flowllm-0.1.2.dist-info}/licenses/LICENSE +0 -0
{flowllm-0.1.1.dist-info → flowllm-0.1.2.dist-info}/top_level.txt +0 -0

flowllm/llm/litellm_llm.py ADDED Viewed

@@ -0,0 +1,455 @@
+import asyncio
+import os
+from typing import List, Dict
+from litellm import completion, acompletion
+from loguru import logger
+from pydantic import Field, PrivateAttr, model_validator
+from flowllm.context.service_context import C
+from flowllm.enumeration.chunk_enum import ChunkEnum
+from flowllm.enumeration.role import Role
+from flowllm.llm.base_llm import BaseLLM
+from flowllm.schema.message import Message
+from flowllm.schema.tool_call import ToolCall
+@C.register_llm("litellm")
+class LiteLLMBaseLLM(BaseLLM):
+    """
+    LiteLLM-compatible LLM implementation supporting multiple LLM providers through unified interface.
+    This class implements the BaseLLM interface using LiteLLM, which provides:
+    - Support for 100+ LLM providers (OpenAI, Anthropic, Cohere, Azure, etc.)
+    - Streaming responses with different chunk types (content, tools, usage)
+    - Tool calling with parallel execution support
+    - Unified API across different providers
+    - Robust error handling and retries
+    LiteLLM automatically handles provider-specific authentication and request formatting.
+    """
+    # API configuration - LiteLLM handles provider-specific settings
+    api_key: str = Field(default_factory=lambda: os.getenv("FLOW_LLM_API_KEY"),
+                         description="API key for authentication")
+    base_url: str = Field(default_factory=lambda: os.getenv("FLOW_LLM_BASE_URL"),
+                          description="Base URL for custom endpoints")
+    # LiteLLM specific configuration
+    custom_llm_provider: str = Field(default="openai", description="Custom LLM provider name for LiteLLM routing")
+    # Additional LiteLLM parameters
+    timeout: float = Field(default=600, description="Request timeout in seconds")
+    max_tokens: int = Field(default=None, description="Maximum tokens to generate")
+    # Private attributes for LiteLLM configuration
+    _litellm_params: dict = PrivateAttr(default_factory=dict)
+    @model_validator(mode="after")
+    def init_litellm_config(self):
+        """
+        Initialize LiteLLM configuration after model validation.
+        This validator sets up LiteLLM-specific parameters and environment variables
+        required for different providers. It configures authentication and routing
+        based on the model name and provider settings.
+        Returns:
+            Self for method chaining
+        """
+        # Configure LiteLLM parameters
+        self._litellm_params = {
+            "api_key": self.api_key,
+            "base_url": self.base_url,  #.replace("/v1", "")
+            "model": self.model_name,
+            "temperature": self.temperature,
+            "seed": self.seed,
+            "timeout": self.timeout,
+        }
+        # Add optional parameters
+        if self.top_p is not None:
+            self._litellm_params["top_p"] = self.top_p
+        if self.max_tokens is not None:
+            self._litellm_params["max_tokens"] = self.max_tokens
+        if self.presence_penalty is not None:
+            self._litellm_params["presence_penalty"] = self.presence_penalty
+        if self.custom_llm_provider:
+            self._litellm_params["custom_llm_provider"] = self.custom_llm_provider
+        return self
+    def stream_chat(self, messages: List[Message], tools: List[ToolCall] = None, **kwargs):
+        """
+        Stream chat completions from LiteLLM with support for multiple providers.
+        This method handles streaming responses and categorizes chunks into different types:
+        - ANSWER: Regular response content from the model
+        - TOOL: Tool calls that need to be executed
+        - USAGE: Token usage statistics (when available)
+        - ERROR: Error information from failed requests
+        Args:
+            messages: List of conversation messages
+            tools: Optional list of tools available to the model
+            **kwargs: Additional parameters passed to LiteLLM
+        Yields:
+            Tuple of (chunk_content, ChunkEnum) for each streaming piece
+        """
+        for i in range(self.max_retries):
+            try:
+                # Prepare parameters for LiteLLM
+                params = self._litellm_params.copy()
+                params.update(kwargs)
+                params.update({
+                    "messages": [x.simple_dump() for x in messages],
+                    "stream": True,
+                })
+                # Add tools if provided
+                if tools:
+                    params["tools"] = [x.simple_input_dump() for x in tools]
+                    params["tool_choice"] = self.tool_choice if self.tool_choice else "auto"
+                # Create streaming completion using LiteLLM
+                completion_response = completion(**params)
+                # Initialize tool call tracking
+                ret_tools: List[ToolCall] = []  # Accumulate tool calls across chunks
+                # Process each chunk in the streaming response
+                for chunk in completion_response:
+                    try:
+                        # Handle chunks without choices (usually usage/metadata)
+                        if not hasattr(chunk, 'choices') or not chunk.choices:
+                            # Check for usage information
+                            if hasattr(chunk, 'usage') and chunk.usage:
+                                yield chunk.usage, ChunkEnum.USAGE
+                            continue
+                        delta = chunk.choices[0].delta
+                        # Handle regular response content
+                        if hasattr(delta, 'content') and delta.content is not None:
+                            yield delta.content, ChunkEnum.ANSWER
+                        # Handle tool calls (function calling)
+                        if hasattr(delta, 'tool_calls') and delta.tool_calls is not None:
+                            for tool_call in delta.tool_calls:
+                                index = getattr(tool_call, 'index', 0)
+                                # Ensure we have enough tool call slots
+                                while len(ret_tools) <= index:
+                                    ret_tools.append(ToolCall(index=index))
+                                # Accumulate tool call information across chunks
+                                if hasattr(tool_call, 'id') and tool_call.id:
+                                    ret_tools[index].id += tool_call.id
+                                if (hasattr(tool_call, 'function') and tool_call.function and
+                                        hasattr(tool_call.function, 'name') and tool_call.function.name):
+                                    ret_tools[index].name += tool_call.function.name
+                                if (hasattr(tool_call, 'function') and tool_call.function and
+                                        hasattr(tool_call.function, 'arguments') and tool_call.function.arguments):
+                                    ret_tools[index].arguments += tool_call.function.arguments
+                    except Exception as chunk_error:
+                        logger.warning(f"Error processing chunk: {chunk_error}")
+                        continue
+                # Yield completed tool calls after streaming finishes
+                if ret_tools:
+                    tool_dict: Dict[str, ToolCall] = {x.name: x for x in tools} if tools else {}
+                    for tool in ret_tools:
+                        # Only yield tool calls that correspond to available tools
+                        if tools and tool.name not in tool_dict:
+                            continue
+                        yield tool, ChunkEnum.TOOL
+                return
+            except Exception as e:
+                logger.exception(f"stream chat with LiteLLM model={self.model_name} encounter error: {e}")
+                # Handle retry logic
+                if i == self.max_retries - 1 and self.raise_exception:
+                    raise e
+                else:
+                    error_msg = str(e.args) if hasattr(e, 'args') else str(e)
+                    yield error_msg, ChunkEnum.ERROR
+    async def astream_chat(self, messages: List[Message], tools: List[ToolCall] = None, **kwargs):
+        """
+        Async stream chat completions from LiteLLM with support for multiple providers.
+        This method handles async streaming responses and categorizes chunks into different types:
+        - ANSWER: Regular response content from the model
+        - TOOL: Tool calls that need to be executed
+        - USAGE: Token usage statistics (when available)
+        - ERROR: Error information from failed requests
+        Args:
+            messages: List of conversation messages
+            tools: Optional list of tools available to the model
+            **kwargs: Additional parameters passed to LiteLLM
+        Yields:
+            Tuple of (chunk_content, ChunkEnum) for each streaming piece
+        """
+        for i in range(self.max_retries):
+            try:
+                # Prepare parameters for LiteLLM
+                params = self._litellm_params.copy()
+                params.update(kwargs)
+                params.update({
+                    "messages": [x.simple_dump() for x in messages],
+                    "stream": True,
+                })
+                # Add tools if provided
+                if tools:
+                    params["tools"] = [x.simple_input_dump() for x in tools]
+                    params["tool_choice"] = self.tool_choice if self.tool_choice else "auto"
+                # Create async streaming completion using LiteLLM
+                completion_response = await acompletion(**params)
+                # Initialize tool call tracking
+                ret_tools: List[ToolCall] = []  # Accumulate tool calls across chunks
+                # Process each chunk in the async streaming response
+                async for chunk in completion_response:
+                    try:
+                        # Handle chunks without choices (usually usage/metadata)
+                        if not hasattr(chunk, 'choices') or not chunk.choices:
+                            # Check for usage information
+                            if hasattr(chunk, 'usage') and chunk.usage:
+                                yield chunk.usage, ChunkEnum.USAGE
+                            continue
+                        delta = chunk.choices[0].delta
+                        # Handle regular response content
+                        if hasattr(delta, 'content') and delta.content is not None:
+                            yield delta.content, ChunkEnum.ANSWER
+                        # Handle tool calls (function calling)
+                        if hasattr(delta, 'tool_calls') and delta.tool_calls is not None:
+                            for tool_call in delta.tool_calls:
+                                index = getattr(tool_call, 'index', 0)
+                                # Ensure we have enough tool call slots
+                                while len(ret_tools) <= index:
+                                    ret_tools.append(ToolCall(index=index))
+                                # Accumulate tool call information across chunks
+                                if hasattr(tool_call, 'id') and tool_call.id:
+                                    ret_tools[index].id += tool_call.id
+                                if (hasattr(tool_call, 'function') and tool_call.function and
+                                        hasattr(tool_call.function, 'name') and tool_call.function.name):
+                                    ret_tools[index].name += tool_call.function.name
+                                if (hasattr(tool_call, 'function') and tool_call.function and
+                                        hasattr(tool_call.function, 'arguments') and tool_call.function.arguments):
+                                    ret_tools[index].arguments += tool_call.function.arguments
+                    except Exception as chunk_error:
+                        logger.warning(f"Error processing async chunk: {chunk_error}")
+                        continue
+                # Yield completed tool calls after streaming finishes
+                if ret_tools:
+                    tool_dict: Dict[str, ToolCall] = {x.name: x for x in tools} if tools else {}
+                    for tool in ret_tools:
+                        # Only yield tool calls that correspond to available tools
+                        if tools and tool.name not in tool_dict:
+                            continue
+                        yield tool, ChunkEnum.TOOL
+                return
+            except Exception as e:
+                logger.exception(f"async stream chat with LiteLLM model={self.model_name} encounter error: {e}")
+                # Handle retry logic with async sleep
+                await asyncio.sleep(1 + i)
+                if i == self.max_retries - 1 and self.raise_exception:
+                    raise e
+                else:
+                    error_msg = str(e.args) if hasattr(e, 'args') else str(e)
+                    yield error_msg, ChunkEnum.ERROR
+    def _chat(self, messages: List[Message], tools: List[ToolCall] = None, enable_stream_print: bool = False,
+              **kwargs) -> Message:
+        """
+        Perform a complete chat completion by aggregating streaming chunks from LiteLLM.
+        This method consumes the entire streaming response and combines all
+        chunks into a single Message object. It separates regular answer content
+        and tool calls, providing a complete response.
+        Args:
+            messages: List of conversation messages
+            tools: Optional list of tools available to the model
+            enable_stream_print: Whether to print streaming response to console
+            **kwargs: Additional parameters passed to LiteLLM
+        Returns:
+            Complete Message with all content aggregated
+        """
+        answer_content = ""  # Final response content
+        tool_calls = []  # List of tool calls to execute
+        # Consume streaming response and aggregate chunks by type
+        for chunk, chunk_enum in self.stream_chat(messages, tools, **kwargs):
+            if chunk_enum is ChunkEnum.USAGE:
+                # Display token usage statistics
+                if enable_stream_print:
+                    if hasattr(chunk, 'model_dump_json'):
+                        print(f"\n<usage>{chunk.model_dump_json(indent=2)}</usage>")
+                    else:
+                        print(f"\n<usage>{chunk}</usage>")
+            elif chunk_enum is ChunkEnum.ANSWER:
+                if enable_stream_print:
+                    print(chunk, end="")
+                answer_content += chunk
+            elif chunk_enum is ChunkEnum.TOOL:
+                if enable_stream_print:
+                    if hasattr(chunk, 'model_dump_json'):
+                        print(f"\n<tool>{chunk.model_dump_json()}</tool>", end="")
+                    else:
+                        print(f"\n<tool>{chunk}</tool>", end="")
+                tool_calls.append(chunk)
+            elif chunk_enum is ChunkEnum.ERROR:
+                if enable_stream_print:
+                    print(f"\n<error>{chunk}</error>", end="")
+        # Construct complete response message
+        return Message(
+            role=Role.ASSISTANT,
+            content=answer_content,
+            tool_calls=tool_calls
+        )
+    async def _achat(self, messages: List[Message], tools: List[ToolCall] = None, enable_stream_print: bool = False,
+                     **kwargs) -> Message:
+        """
+        Perform an async complete chat completion by aggregating streaming chunks from LiteLLM.
+        This method consumes the entire async streaming response and combines all
+        chunks into a single Message object. It separates regular answer content
+        and tool calls, providing a complete response.
+        Args:
+            messages: List of conversation messages
+            tools: Optional list of tools available to the model
+            enable_stream_print: Whether to print streaming response to console
+            **kwargs: Additional parameters passed to LiteLLM
+        Returns:
+            Complete Message with all content aggregated
+        """
+        answer_content = ""  # Final response content
+        tool_calls = []  # List of tool calls to execute
+        # Consume async streaming response and aggregate chunks by type
+        async for chunk, chunk_enum in self.astream_chat(messages, tools, **kwargs):
+            if chunk_enum is ChunkEnum.USAGE:
+                # Display token usage statistics
+                if enable_stream_print:
+                    if hasattr(chunk, 'model_dump_json'):
+                        print(f"\n<usage>{chunk.model_dump_json(indent=2)}</usage>")
+                    else:
+                        print(f"\n<usage>{chunk}</usage>")
+            elif chunk_enum is ChunkEnum.ANSWER:
+                if enable_stream_print:
+                    print(chunk, end="")
+                answer_content += chunk
+            elif chunk_enum is ChunkEnum.TOOL:
+                if enable_stream_print:
+                    if hasattr(chunk, 'model_dump_json'):
+                        print(f"\n<tool>{chunk.model_dump_json()}</tool>", end="")
+                    else:
+                        print(f"\n<tool>{chunk}</tool>", end="")
+                tool_calls.append(chunk)
+            elif chunk_enum is ChunkEnum.ERROR:
+                if enable_stream_print:
+                    print(f"\n<error>{chunk}</error>", end="")
+        # Construct complete response message
+        return Message(
+            role=Role.ASSISTANT,
+            content=answer_content,
+            tool_calls=tool_calls
+        )
+async def async_main():
+    """
+    Async test function for LiteLLMBaseLLM.
+    This function demonstrates how to use the LiteLLMBaseLLM class
+    with async operations. It requires proper environment variables
+    to be set for the chosen LLM provider.
+    """
+    from flowllm.utils.common_utils import load_env
+    load_env()
+    # Example with OpenAI model through LiteLLM
+    model_name = "qwen-max-2025-01-25"  # LiteLLM will route to OpenAI
+    llm = LiteLLMBaseLLM(model_name=model_name)
+    # Test async chat
+    message: Message = await llm.achat(
+        [Message(role=Role.USER, content="Hello! How are you?")],
+        [],
+        enable_stream_print=True
+    )
+    print("\nAsync result:", message)
+def main():
+    """
+    Sync test function for LiteLLMBaseLLM.
+    This function demonstrates how to use the LiteLLMBaseLLM class
+    with synchronous operations. It requires proper environment variables
+    to be set for the chosen LLM provider.
+    """
+    from flowllm.utils.common_utils import load_env
+    load_env()
+    # Example with OpenAI model through LiteLLM
+    model_name = "qwen-max-2025-01-25"  # LiteLLM will route to OpenAI
+    llm = LiteLLMBaseLLM(model_name=model_name)
+    # Test sync chat
+    message: Message = llm.chat(
+        [Message(role=Role.USER, content="Hello! How are you?")],
+        [],
+        enable_stream_print=True
+    )
+    print("\nSync result:", message)
+if __name__ == "__main__":
+    main()
+    # import asyncio
+    #
+    # asyncio.run(async_main())

flowllm 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

flowllm 0.1.1py3-none-any.whl → 0.1.2py3-none-any.whl