PyPI - flowllm - Versions diffs - 0.1.0__py3-none-any.whl - Mend

flowllm 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

flowllm-0.1.0.dist-info/METADATA +597 -0
flowllm-0.1.0.dist-info/RECORD +66 -0
flowllm-0.1.0.dist-info/WHEEL +5 -0
flowllm-0.1.0.dist-info/entry_points.txt +3 -0
flowllm-0.1.0.dist-info/licenses/LICENSE +201 -0
flowllm-0.1.0.dist-info/top_level.txt +1 -0
llmflow/__init__.py +0 -0
llmflow/app.py +53 -0
llmflow/config/__init__.py +0 -0
llmflow/config/config_parser.py +80 -0
llmflow/config/mock_config.yaml +58 -0
llmflow/embedding_model/__init__.py +5 -0
llmflow/embedding_model/base_embedding_model.py +104 -0
llmflow/embedding_model/openai_compatible_embedding_model.py +95 -0
llmflow/enumeration/__init__.py +0 -0
llmflow/enumeration/agent_state.py +8 -0
llmflow/enumeration/chunk_enum.py +9 -0
llmflow/enumeration/http_enum.py +9 -0
llmflow/enumeration/role.py +8 -0
llmflow/llm/__init__.py +5 -0
llmflow/llm/base_llm.py +138 -0
llmflow/llm/openai_compatible_llm.py +283 -0
llmflow/mcp_server.py +110 -0
llmflow/op/__init__.py +10 -0
llmflow/op/base_op.py +125 -0
llmflow/op/mock_op.py +40 -0
llmflow/op/prompt_mixin.py +74 -0
llmflow/op/react/__init__.py +0 -0
llmflow/op/react/react_v1_op.py +88 -0
llmflow/op/react/react_v1_prompt.yaml +28 -0
llmflow/op/vector_store/__init__.py +13 -0
llmflow/op/vector_store/recall_vector_store_op.py +48 -0
llmflow/op/vector_store/update_vector_store_op.py +28 -0
llmflow/op/vector_store/vector_store_action_op.py +46 -0
llmflow/pipeline/__init__.py +0 -0
llmflow/pipeline/pipeline.py +94 -0
llmflow/pipeline/pipeline_context.py +37 -0
llmflow/schema/__init__.py +0 -0
llmflow/schema/app_config.py +69 -0
llmflow/schema/experience.py +144 -0
llmflow/schema/message.py +68 -0
llmflow/schema/request.py +32 -0
llmflow/schema/response.py +29 -0
llmflow/schema/vector_node.py +11 -0
llmflow/service/__init__.py +0 -0
llmflow/service/llmflow_service.py +96 -0
llmflow/tool/__init__.py +9 -0
llmflow/tool/base_tool.py +80 -0
llmflow/tool/code_tool.py +43 -0
llmflow/tool/dashscope_search_tool.py +162 -0
llmflow/tool/mcp_tool.py +77 -0
llmflow/tool/tavily_search_tool.py +109 -0
llmflow/tool/terminate_tool.py +23 -0
llmflow/utils/__init__.py +0 -0
llmflow/utils/common_utils.py +17 -0
llmflow/utils/file_handler.py +25 -0
llmflow/utils/http_client.py +156 -0
llmflow/utils/op_utils.py +102 -0
llmflow/utils/registry.py +33 -0
llmflow/utils/singleton.py +9 -0
llmflow/utils/timer.py +53 -0
llmflow/vector_store/__init__.py +7 -0
llmflow/vector_store/base_vector_store.py +136 -0
llmflow/vector_store/chroma_vector_store.py +188 -0
llmflow/vector_store/es_vector_store.py +227 -0
llmflow/vector_store/file_vector_store.py +163 -0

llmflow/llm/base_llm.py ADDED Viewed

@@ -0,0 +1,138 @@
+import time
+from abc import ABC
+from typing import List, Literal, Callable
+from loguru import logger
+from pydantic import Field, BaseModel
+from llmflow.schema.message import Message
+from llmflow.tool.base_tool import BaseTool
+class BaseLLM(BaseModel, ABC):
+    """
+    Abstract base class for Large Language Model (LLM) implementations.
+    This class defines the common interface and configuration parameters
+    that all LLM implementations should support. It provides a standardized
+    way to interact with different LLM providers while handling common
+    concerns like retries, error handling, and streaming.
+    """
+    # Core model configuration
+    model_name: str = Field(..., description="Name of the LLM model to use")
+    # Generation parameters
+    seed: int = Field(default=42, description="Random seed for reproducible outputs")
+    top_p: float | None = Field(default=None, description="Top-p (nucleus) sampling parameter")
+    # stream: bool = Field(default=True)  # Commented out - streaming is handled per request
+    stream_options: dict = Field(default={"include_usage": True}, description="Options for streaming responses")
+    temperature: float = Field(default=0.0000001, description="Sampling temperature (low for deterministic outputs)")
+    presence_penalty: float | None = Field(default=None, description="Presence penalty to reduce repetition")
+    # Model-specific features
+    enable_thinking: bool = Field(default=True, description="Enable reasoning/thinking mode for supported models")
+    # Tool usage configuration
+    tool_choice: Literal["none", "auto", "required"] = Field(default="auto", description="Strategy for tool selection")
+    parallel_tool_calls: bool = Field(default=True, description="Allow multiple tool calls in parallel")
+    # Error handling and reliability
+    max_retries: int = Field(default=5, description="Maximum number of retry attempts on failure")
+    raise_exception: bool = Field(default=False, description="Whether to raise exceptions or return default values")
+    def stream_chat(self, messages: List[Message], tools: List[BaseTool] = None, **kwargs):
+        """
+        Stream chat completions from the LLM.
+        This method should yield chunks of the response as they become available,
+        allowing for real-time display of the model's output.
+        Args:
+            messages: List of conversation messages
+            tools: Optional list of tools the model can use
+            **kwargs: Additional model-specific parameters
+        Yields:
+            Chunks of the streaming response with their types
+        """
+        raise NotImplementedError
+    def stream_print(self, messages: List[Message], tools: List[BaseTool] = None, **kwargs):
+        """
+        Stream chat completions and print them to console in real-time.
+        This is a convenience method for debugging and interactive use,
+        combining streaming with formatted console output.
+        Args:
+            messages: List of conversation messages
+            tools: Optional list of tools the model can use
+            **kwargs: Additional model-specific parameters
+        """
+        raise NotImplementedError
+    def _chat(self, messages: List[Message], tools: List[BaseTool] = None, **kwargs) -> Message:
+        """
+        Internal method to perform a single chat completion.
+        This method should be implemented by subclasses to handle the actual
+        communication with the LLM provider. It's called by the public chat()
+        method which adds retry logic and error handling.
+        Args:
+            messages: List of conversation messages
+            tools: Optional list of tools the model can use
+            **kwargs: Additional model-specific parameters
+        Returns:
+            The complete response message from the LLM
+        """
+        raise NotImplementedError
+    def chat(self, messages: List[Message], tools: List[BaseTool] = None, callback_fn: Callable = None,
+             default_value=None, **kwargs):
+        """
+        Perform a chat completion with retry logic and error handling.
+        This is the main public interface for chat completions. It wraps the
+        internal _chat() method with robust error handling, exponential backoff,
+        and optional callback processing.
+        Args:
+            messages: List of conversation messages
+            tools: Optional list of tools the model can use
+            callback_fn: Optional callback to process the response message
+            default_value: Value to return if all retries fail (when raise_exception=False)
+            **kwargs: Additional model-specific parameters
+        Returns:
+            The response message (possibly processed by callback_fn) or default_value
+        Raises:
+            Exception: If raise_exception=True and all retries fail
+        """
+        for i in range(self.max_retries):
+            try:
+                # Attempt to get response from the model
+                message: Message = self._chat(messages, tools, **kwargs)
+                # Apply callback function if provided
+                if callback_fn:
+                    return callback_fn(message)
+                else:
+                    return message
+            except Exception as e:
+                logger.exception(f"chat with model={self.model_name} encounter error with e={e.args}")
+                # Exponential backoff: wait longer after each failure
+                time.sleep(1 + i)
+                # Handle final retry failure
+                if i == self.max_retries - 1:
+                    if self.raise_exception:
+                        raise e
+                    else:
+                        return default_value
+        return None

llmflow/llm/openai_compatible_llm.py ADDED Viewed

@@ -0,0 +1,283 @@
+import os
+from typing import List
+from dotenv import load_dotenv
+from loguru import logger
+from openai import OpenAI
+from openai.types import CompletionUsage
+from pydantic import Field, PrivateAttr, model_validator
+from llmflow.enumeration.chunk_enum import ChunkEnum
+from llmflow.enumeration.role import Role
+from llmflow.llm import LLM_REGISTRY
+from llmflow.llm.base_llm import BaseLLM
+from llmflow.schema.message import Message, ToolCall
+from llmflow.tool.base_tool import BaseTool
+@LLM_REGISTRY.register("openai_compatible")
+class OpenAICompatibleBaseLLM(BaseLLM):
+    """
+    OpenAI-compatible LLM implementation supporting streaming and tool calls.
+    This class implements the BaseLLM interface for OpenAI-compatible APIs,
+    including support for:
+    - Streaming responses with different chunk types (thinking, answer, tools)
+    - Tool calling with parallel execution
+    - Reasoning/thinking content from supported models
+    - Robust error handling and retries
+    """
+    # API configuration
+    api_key: str = Field(default_factory=lambda: os.getenv("LLM_API_KEY"), description="API key for authentication")
+    base_url: str = Field(default_factory=lambda: os.getenv("LLM_BASE_URL"),
+                          description="Base URL for the API endpoint")
+    _client: OpenAI = PrivateAttr()
+    @model_validator(mode="after")
+    def init_client(self):
+        """
+        Initialize the OpenAI client after model validation.
+        This validator runs after all field validation is complete,
+        ensuring we have valid API credentials before creating the client.
+        Returns:
+            Self for method chaining
+        """
+        self._client = OpenAI(api_key=self.api_key, base_url=self.base_url)
+        return self
+    def stream_chat(self, messages: List[Message], tools: List[BaseTool] = None, **kwargs):
+        """
+        Stream chat completions from OpenAI-compatible API.
+        This method handles streaming responses and categorizes chunks into different types:
+        - THINK: Reasoning/thinking content from the model
+        - ANSWER: Regular response content
+        - TOOL: Tool calls that need to be executed
+        - USAGE: Token usage statistics
+        - ERROR: Error information
+        Args:
+            messages: List of conversation messages
+            tools: Optional list of tools available to the model
+            **kwargs: Additional parameters
+        Yields:
+            Tuple of (chunk_content, ChunkEnum) for each streaming piece
+        """
+        for i in range(self.max_retries):
+            try:
+                # Create streaming completion request
+                completion = self._client.chat.completions.create(
+                    model=self.model_name,
+                    messages=[x.simple_dump() for x in messages],
+                    seed=self.seed,
+                    top_p=self.top_p,
+                    stream=True,
+                    stream_options=self.stream_options,
+                    temperature=self.temperature,
+                    extra_body={"enable_thinking": self.enable_thinking},  # Enable reasoning mode
+                    tools=[x.simple_dump() for x in tools] if tools else None,
+                    tool_choice=self.tool_choice,
+                    parallel_tool_calls=self.parallel_tool_calls)
+                # Initialize tool call tracking
+                ret_tools = []  # Accumulate tool calls across chunks
+                is_answering = False  # Track when model starts answering
+                # Process each chunk in the streaming response
+                for chunk in completion:
+                    # Handle chunks without choices (usually usage info)
+                    if not chunk.choices:
+                        yield chunk.usage, ChunkEnum.USAGE
+                    else:
+                        delta = chunk.choices[0].delta
+                        # Handle reasoning/thinking content (model's internal thoughts)
+                        if hasattr(delta, 'reasoning_content') and delta.reasoning_content is not None:
+                            yield delta.reasoning_content, ChunkEnum.THINK
+                        else:
+                            # Mark transition from thinking to answering
+                            if not is_answering:
+                                is_answering = True
+                            # Handle regular response content
+                            if delta.content is not None:
+                                yield delta.content, ChunkEnum.ANSWER
+                            # Handle tool calls (function calling)
+                            if delta.tool_calls is not None:
+                                for tool_call in delta.tool_calls:
+                                    index = tool_call.index
+                                    # Ensure we have enough tool call slots
+                                    while len(ret_tools) <= index:
+                                        ret_tools.append(ToolCall(index=index))
+                                    # Accumulate tool call information across chunks
+                                    if tool_call.id:
+                                        ret_tools[index].id += tool_call.id
+                                    if tool_call.function and tool_call.function.name:
+                                        ret_tools[index].name += tool_call.function.name
+                                    if tool_call.function and tool_call.function.arguments:
+                                        ret_tools[index].arguments += tool_call.function.arguments
+                # Yield completed tool calls after streaming finishes
+                if ret_tools:
+                    tool_dict = {x.name: x for x in tools} if tools else {}
+                    for tool in ret_tools:
+                        # Only yield tool calls that correspond to available tools
+                        if tool.name not in tool_dict:
+                            continue
+                        yield tool, ChunkEnum.TOOL
+                return  # Success - exit retry loop
+            except Exception as e:
+                logger.exception(f"stream chat with model={self.model_name} encounter error with e={e.args}")
+                # Handle retry logic
+                if i == self.max_retries - 1 and self.raise_exception:
+                    raise e
+                else:
+                    yield e.args, ChunkEnum.ERROR
+    def _chat(self, messages: List[Message], tools: List[BaseTool] = None, **kwargs) -> Message:
+        """
+        Perform a complete chat completion by aggregating streaming chunks.
+        This method consumes the entire streaming response and combines all
+        chunks into a single Message object. It separates reasoning content,
+        regular answer content, and tool calls.
+        Args:
+            messages: List of conversation messages
+            tools: Optional list of tools available to the model
+            **kwargs: Additional parameters
+        Returns:
+            Complete Message with all content aggregated
+        """
+        # Initialize content accumulators
+        reasoning_content = ""  # Model's internal reasoning
+        answer_content = ""  # Final response content
+        tool_calls = []  # List of tool calls to execute
+        # Consume streaming response and aggregate chunks by type
+        for chunk, chunk_enum in self.stream_chat(messages, tools, **kwargs):
+            if chunk_enum is ChunkEnum.THINK:
+                reasoning_content += chunk
+            elif chunk_enum is ChunkEnum.ANSWER:
+                answer_content += chunk
+            elif chunk_enum is ChunkEnum.TOOL:
+                tool_calls.append(chunk)
+            # Note: USAGE and ERROR chunks are ignored in non-streaming mode
+        # Construct complete response message
+        return Message(role=Role.ASSISTANT,
+                       reasoning_content=reasoning_content,
+                       content=answer_content,
+                       tool_calls=tool_calls)
+    def stream_print(self, messages: List[Message], tools: List[BaseTool] = None, **kwargs):
+        """
+        Stream chat completions with formatted console output.
+        This method provides a real-time view of the model's response,
+        with different formatting for different types of content:
+        - Thinking content is wrapped in <think></think> tags
+        - Answer content is printed directly
+        - Tool calls are formatted as JSON
+        - Usage statistics and errors are clearly marked
+        Args:
+            messages: List of conversation messages
+            tools: Optional list of tools available to the model
+            **kwargs: Additional parameters
+        """
+        # Track which sections we've entered for proper formatting
+        enter_think = False  # Whether we've started printing thinking content
+        enter_answer = False  # Whether we've started printing answer content
+        # Process each streaming chunk with appropriate formatting
+        for chunk, chunk_enum in self.stream_chat(messages, tools, **kwargs):
+            if chunk_enum is ChunkEnum.USAGE:
+                # Display token usage statistics
+                if isinstance(chunk, CompletionUsage):
+                    print(f"\n<usage>{chunk.model_dump_json(indent=2)}</usage>")
+                else:
+                    print(f"\n<usage>{chunk}</usage>")
+            elif chunk_enum is ChunkEnum.THINK:
+                # Format thinking/reasoning content
+                if not enter_think:
+                    enter_think = True
+                    print("<think>\n", end="")
+                print(chunk, end="")
+            elif chunk_enum is ChunkEnum.ANSWER:
+                # Format regular answer content
+                if not enter_answer:
+                    enter_answer = True
+                    # Close thinking section if we were in it
+                    if enter_think:
+                        print("\n</think>")
+                print(chunk, end="")
+            elif chunk_enum is ChunkEnum.TOOL:
+                # Format tool calls as structured JSON
+                assert isinstance(chunk, ToolCall)
+                print(f"\n<tool>{chunk.model_dump_json(indent=2)}</tool>", end="")
+            elif chunk_enum is ChunkEnum.ERROR:
+                # Display error information
+                print(f"\n<error>{chunk}</error>", end="")
+def main():
+    """
+    Demo function to test the OpenAI-compatible LLM implementation.
+    This function demonstrates:
+    1. Basic chat without tools
+    2. Chat with tool usage (search and code tools)
+    3. Real-time streaming output formatting
+    """
+    from llmflow.tool.dashscope_search_tool import DashscopeSearchTool
+    from llmflow.tool.code_tool import CodeTool
+    from llmflow.enumeration.role import Role
+    # Load environment variables for API credentials
+    load_dotenv()
+    # Initialize the LLM with a specific model
+    model_name = "qwen-max-2025-01-25"
+    llm = OpenAICompatibleBaseLLM(model_name=model_name)
+    # Set up available tools
+    tools: List[BaseTool] = [DashscopeSearchTool(), CodeTool()]
+    # Test 1: Simple greeting without tools
+    print("=== Test 1: Simple Chat ===")
+    llm.stream_print([Message(role=Role.USER, content="hello")], [])
+    print("\n" + "=" * 20)
+    # Test 2: Complex query that might use tools
+    print("\n=== Test 2: Chat with Tools ===")
+    llm.stream_print([Message(role=Role.USER, content="What's the weather like in Beijing today?")], tools)
+if __name__ == "__main__":
+    main()
+    # Launch with: python -m llmflow.llm.openai_compatible_llm

llmflow/mcp_server.py ADDED Viewed

@@ -0,0 +1,110 @@
+import sys
+from typing import List
+from dotenv import load_dotenv
+from fastmcp import FastMCP
+from llmflow.service.llmflow_service import LLMFlowService
+load_dotenv()
+mcp = FastMCP("llmflow")
+service = LLMFlowService(sys.argv[1:])
+@mcp.tool
+def retriever(query: str,
+              messages: List[dict] = None,
+              top_k: int = 1,
+              workspace_id: str = "default",
+              config: dict = None) -> dict:
+    """
+    Retrieve experiences from the workspace based on a query.
+    Args:
+        query: Query string
+        messages: List of messages
+        top_k: Number of top experiences to retrieve
+        workspace_id: Workspace identifier
+        config: Additional configuration parameters
+    Returns:
+        Dictionary containing retrieved experiences
+    """
+    return service(api="retriever", request={
+        "query": query,
+        "messages": messages if messages else [],
+        "top_k": top_k,
+        "workspace_id": workspace_id,
+        "config": config if config else {},
+    }).model_dump()
+@mcp.tool
+def summarizer(traj_list: List[dict], workspace_id: str = "default", config: dict = None) -> dict:
+    """
+    Summarize trajectories into experiences.
+    Args:
+        traj_list: List of trajectories
+        workspace_id: Workspace identifier
+        config: Additional configuration parameters
+    Returns:
+        experiences
+    """
+    return service(api="summarizer", request={
+        "traj_list": traj_list,
+        "workspace_id": workspace_id,
+        "config": config if config else {},
+    }).model_dump()
+@mcp.tool
+def vector_store(action: str,
+                 src_workspace_id: str = "",
+                 workspace_id: str = "",
+                 path: str = "./",
+                 config: dict = None) -> dict:
+    """
+    Perform vector store operations.
+    Args:
+        action: Action to perform (e.g., "copy", "delete", "dump", "load")
+        src_workspace_id: Source workspace identifier
+        workspace_id: Workspace identifier
+        path: Path to the vector store
+        config: Additional configuration parameters
+    Returns:
+        Dictionary containing the result of the vector store operation
+    """
+    return service(api="vector_store", request={
+        "action": action,
+        "src_workspace_id": src_workspace_id,
+        "workspace_id": workspace_id,
+        "path": path,
+        "config": config if config else {},
+    }).model_dump()
+def main():
+    mcp_transport: str = service.init_app_config.mcp_transport
+    if mcp_transport == "sse":
+        mcp.run(transport="sse", host=service.http_service_config.host, port=service.http_service_config.port)
+    elif mcp_transport == "stdio":
+        mcp.run(transport="stdio")
+    else:
+        raise ValueError(f"Unsupported mcp transport: {mcp_transport}")
+if __name__ == "__main__":
+    main()
+# start with:
+# llmflow_mcp \
+#   mcp_transport=stdio \
+#   http_service.port=8001 \
+#   llm.default.model_name=qwen3-32b \
+#   embedding_model.default.model_name=text-embedding-v4 \
+#   vector_store.default.backend=local_file

llmflow/op/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+from llmflow.utils.registry import Registry
+OP_REGISTRY = Registry()
+from llmflow.op.mock_op import Mock1Op, Mock2Op, Mock3Op, Mock4Op, Mock5Op, Mock6Op
+from llmflow.op.vector_store.update_vector_store_op import UpdateVectorStoreOp
+from llmflow.op.vector_store.recall_vector_store_op import RecallVectorStoreOp
+from llmflow.op.vector_store.vector_store_action_op import VectorStoreActionOp
+from llmflow.op.react.react_v1_op import ReactV1Op

llmflow/op/base_op.py ADDED Viewed

@@ -0,0 +1,125 @@
+from abc import abstractmethod, ABC
+from concurrent.futures import Future
+from pathlib import Path
+from typing import List
+from loguru import logger
+from tqdm import tqdm
+from llmflow.embedding_model import EMBEDDING_MODEL_REGISTRY
+from llmflow.embedding_model.base_embedding_model import BaseEmbeddingModel
+from llmflow.llm import LLM_REGISTRY
+from llmflow.llm.base_llm import BaseLLM
+from llmflow.op.prompt_mixin import PromptMixin
+from llmflow.pipeline.pipeline_context import PipelineContext
+from llmflow.schema.app_config import OpConfig, LLMConfig, EmbeddingModelConfig
+from llmflow.utils.common_utils import camel_to_snake
+from llmflow.utils.timer import Timer
+from llmflow.vector_store.base_vector_store import BaseVectorStore
+class BaseOp(PromptMixin, ABC):
+    current_path: str = __file__
+    def __init__(self, context: PipelineContext, op_config: OpConfig):
+        super().__init__()
+        self.context: PipelineContext = context
+        self.op_config: OpConfig = op_config
+        self.timer = Timer(name=self.simple_name)
+        self._prepare_prompt()
+        self._llm: BaseLLM | None = None
+        self._embedding_model: BaseEmbeddingModel | None = None
+        self._vector_store: BaseVectorStore | None = None
+        self.task_list: List[Future] = []
+    def _prepare_prompt(self):
+        if self.op_config.prompt_file_path:
+            prompt_file_path = self.op_config.prompt_file_path
+        else:
+            prompt_name = self.simple_name.replace("_op", "_prompt.yaml")
+            prompt_file_path = Path(self.current_path).parent / prompt_name
+        # Load custom prompts from prompt file
+        self.load_prompt_by_file(prompt_file_path=prompt_file_path)
+        # Load custom prompts from config
+        self.load_prompt_dict(prompt_dict=self.op_config.prompt_dict)
+    @property
+    def simple_name(self) -> str:
+        return camel_to_snake(self.__class__.__name__)
+    @property
+    def op_params(self) -> dict:
+        return self.op_config.params
+    @abstractmethod
+    def execute(self):
+        ...
+    def execute_wrap(self):
+        try:
+            with self.timer:
+                return self.execute()
+        except Exception as e:
+            logger.exception(f"op={self.simple_name} execute failed, error={e.args}")
+    def submit_task(self, fn, *args, **kwargs):
+        task = self.context.thread_pool.submit(fn, *args, **kwargs)
+        self.task_list.append(task)
+        return self
+    def join_task(self, task_desc: str = None) -> list:
+        result = []
+        for task in tqdm(self.task_list, desc=task_desc or (self.simple_name + ".join_task")):
+            t_result = task.result()
+            if t_result:
+                if isinstance(t_result, list):
+                    result.extend(t_result)
+                else:
+                    result.append(t_result)
+        self.task_list.clear()
+        return result
+    @property
+    def llm(self) -> BaseLLM:
+        if self._llm is None:
+            llm_name: str = self.op_config.llm
+            assert llm_name in self.context.app_config.llm, f"llm={llm_name} not found in app_config.llm!"
+            llm_config: LLMConfig = self.context.app_config.llm[llm_name]
+            assert llm_config.backend in LLM_REGISTRY, f"llm.backend={llm_config.backend} not found in LLM_REGISTRY!"
+            llm_cls = LLM_REGISTRY[llm_config.backend]
+            self._llm = llm_cls(model_name=llm_config.model_name, **llm_config.params)
+        return self._llm
+    @property
+    def embedding_model(self):
+        if self._embedding_model is None:
+            embedding_model_name: str = self.op_config.embedding_model
+            assert embedding_model_name in self.context.app_config.embedding_model, \
+                f"embedding_model={embedding_model_name} not found in app_config.embedding_model!"
+            embedding_model_config: EmbeddingModelConfig = self.context.app_config.embedding_model[embedding_model_name]
+            assert embedding_model_config.backend in EMBEDDING_MODEL_REGISTRY, \
+                f"embedding_model.backend={embedding_model_config.backend} not found in EMBEDDING_MODEL_REGISTRY!"
+            embedding_model_cls = EMBEDDING_MODEL_REGISTRY[embedding_model_config.backend]
+            self._embedding_model = embedding_model_cls(model_name=embedding_model_config.model_name,
+                                                        **embedding_model_config.params)
+        return self._embedding_model
+    @property
+    def vector_store(self):
+        if self._vector_store is None:
+            vector_store_name: str = self.op_config.vector_store
+            assert vector_store_name in self.context.vector_store_dict, \
+                f"vector_store={vector_store_name} not found in vector_store_dict!"
+            self._vector_store = self.context.vector_store_dict[vector_store_name]
+        return self._vector_store