flowllm 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowllm/__init__.py +21 -0
- flowllm/app.py +15 -0
- flowllm/client/__init__.py +25 -0
- flowllm/client/async_http_client.py +81 -0
- flowllm/client/http_client.py +81 -0
- flowllm/client/mcp_client.py +133 -0
- flowllm/client/sync_mcp_client.py +116 -0
- flowllm/config/__init__.py +1 -0
- flowllm/config/default.yaml +77 -0
- flowllm/config/empty.yaml +37 -0
- flowllm/config/pydantic_config_parser.py +242 -0
- flowllm/context/base_context.py +79 -0
- flowllm/context/flow_context.py +16 -0
- llmflow/op/prompt_mixin.py → flowllm/context/prompt_handler.py +25 -14
- flowllm/context/registry.py +30 -0
- flowllm/context/service_context.py +147 -0
- flowllm/embedding_model/__init__.py +1 -0
- {llmflow → flowllm}/embedding_model/base_embedding_model.py +93 -2
- {llmflow → flowllm}/embedding_model/openai_compatible_embedding_model.py +71 -13
- flowllm/flow/__init__.py +1 -0
- flowllm/flow/base_flow.py +72 -0
- flowllm/flow/base_tool_flow.py +15 -0
- flowllm/flow/gallery/__init__.py +8 -0
- flowllm/flow/gallery/cmd_flow.py +11 -0
- flowllm/flow/gallery/code_tool_flow.py +30 -0
- flowllm/flow/gallery/dashscope_search_tool_flow.py +34 -0
- flowllm/flow/gallery/deepsearch_tool_flow.py +39 -0
- flowllm/flow/gallery/expression_tool_flow.py +18 -0
- flowllm/flow/gallery/mock_tool_flow.py +67 -0
- flowllm/flow/gallery/tavily_search_tool_flow.py +30 -0
- flowllm/flow/gallery/terminate_tool_flow.py +30 -0
- flowllm/flow/parser/expression_parser.py +171 -0
- flowllm/llm/__init__.py +2 -0
- {llmflow → flowllm}/llm/base_llm.py +100 -18
- flowllm/llm/litellm_llm.py +455 -0
- flowllm/llm/openai_compatible_llm.py +439 -0
- flowllm/op/__init__.py +11 -0
- llmflow/op/react/react_v1_op.py → flowllm/op/agent/react_op.py +17 -22
- flowllm/op/akshare/__init__.py +3 -0
- flowllm/op/akshare/get_ak_a_code_op.py +108 -0
- flowllm/op/akshare/get_ak_a_code_prompt.yaml +21 -0
- flowllm/op/akshare/get_ak_a_info_op.py +140 -0
- flowllm/op/base_llm_op.py +64 -0
- flowllm/op/base_op.py +148 -0
- flowllm/op/base_ray_op.py +313 -0
- flowllm/op/code/__init__.py +1 -0
- flowllm/op/code/execute_code_op.py +42 -0
- flowllm/op/gallery/__init__.py +2 -0
- flowllm/op/gallery/mock_op.py +42 -0
- flowllm/op/gallery/terminate_op.py +29 -0
- flowllm/op/parallel_op.py +23 -0
- flowllm/op/search/__init__.py +3 -0
- flowllm/op/search/dashscope_deep_research_op.py +260 -0
- flowllm/op/search/dashscope_search_op.py +179 -0
- flowllm/op/search/dashscope_search_prompt.yaml +13 -0
- flowllm/op/search/tavily_search_op.py +102 -0
- flowllm/op/sequential_op.py +21 -0
- flowllm/schema/flow_request.py +12 -0
- flowllm/schema/flow_response.py +12 -0
- flowllm/schema/message.py +35 -0
- flowllm/schema/service_config.py +72 -0
- flowllm/schema/tool_call.py +118 -0
- {llmflow → flowllm}/schema/vector_node.py +1 -0
- flowllm/service/__init__.py +3 -0
- flowllm/service/base_service.py +68 -0
- flowllm/service/cmd_service.py +15 -0
- flowllm/service/http_service.py +79 -0
- flowllm/service/mcp_service.py +47 -0
- flowllm/storage/__init__.py +1 -0
- flowllm/storage/cache/__init__.py +1 -0
- flowllm/storage/cache/cache_data_handler.py +104 -0
- flowllm/storage/cache/data_cache.py +375 -0
- flowllm/storage/vector_store/__init__.py +3 -0
- flowllm/storage/vector_store/base_vector_store.py +44 -0
- {llmflow → flowllm/storage}/vector_store/chroma_vector_store.py +11 -10
- {llmflow → flowllm/storage}/vector_store/es_vector_store.py +11 -11
- llmflow/vector_store/file_vector_store.py → flowllm/storage/vector_store/local_vector_store.py +110 -11
- flowllm/utils/common_utils.py +52 -0
- flowllm/utils/fetch_url.py +117 -0
- flowllm/utils/llm_utils.py +28 -0
- flowllm/utils/ridge_v2.py +54 -0
- {llmflow → flowllm}/utils/timer.py +5 -4
- {flowllm-0.1.0.dist-info → flowllm-0.1.2.dist-info}/METADATA +45 -388
- flowllm-0.1.2.dist-info/RECORD +99 -0
- flowllm-0.1.2.dist-info/entry_points.txt +2 -0
- {flowllm-0.1.0.dist-info → flowllm-0.1.2.dist-info}/licenses/LICENSE +1 -1
- flowllm-0.1.2.dist-info/top_level.txt +1 -0
- flowllm-0.1.0.dist-info/RECORD +0 -66
- flowllm-0.1.0.dist-info/entry_points.txt +0 -3
- flowllm-0.1.0.dist-info/top_level.txt +0 -1
- llmflow/app.py +0 -53
- llmflow/config/config_parser.py +0 -80
- llmflow/config/mock_config.yaml +0 -58
- llmflow/embedding_model/__init__.py +0 -5
- llmflow/enumeration/agent_state.py +0 -8
- llmflow/llm/__init__.py +0 -5
- llmflow/llm/openai_compatible_llm.py +0 -283
- llmflow/mcp_server.py +0 -110
- llmflow/op/__init__.py +0 -10
- llmflow/op/base_op.py +0 -125
- llmflow/op/mock_op.py +0 -40
- llmflow/op/vector_store/__init__.py +0 -13
- llmflow/op/vector_store/recall_vector_store_op.py +0 -48
- llmflow/op/vector_store/update_vector_store_op.py +0 -28
- llmflow/op/vector_store/vector_store_action_op.py +0 -46
- llmflow/pipeline/pipeline.py +0 -94
- llmflow/pipeline/pipeline_context.py +0 -37
- llmflow/schema/app_config.py +0 -69
- llmflow/schema/experience.py +0 -144
- llmflow/schema/message.py +0 -68
- llmflow/schema/request.py +0 -32
- llmflow/schema/response.py +0 -29
- llmflow/service/__init__.py +0 -0
- llmflow/service/llmflow_service.py +0 -96
- llmflow/tool/__init__.py +0 -9
- llmflow/tool/base_tool.py +0 -80
- llmflow/tool/code_tool.py +0 -43
- llmflow/tool/dashscope_search_tool.py +0 -162
- llmflow/tool/mcp_tool.py +0 -77
- llmflow/tool/tavily_search_tool.py +0 -109
- llmflow/tool/terminate_tool.py +0 -23
- llmflow/utils/__init__.py +0 -0
- llmflow/utils/common_utils.py +0 -17
- llmflow/utils/file_handler.py +0 -25
- llmflow/utils/http_client.py +0 -156
- llmflow/utils/op_utils.py +0 -102
- llmflow/utils/registry.py +0 -33
- llmflow/vector_store/__init__.py +0 -7
- llmflow/vector_store/base_vector_store.py +0 -136
- {llmflow → flowllm/context}/__init__.py +0 -0
- {llmflow/config → flowllm/enumeration}/__init__.py +0 -0
- {llmflow → flowllm}/enumeration/chunk_enum.py +0 -0
- {llmflow → flowllm}/enumeration/http_enum.py +0 -0
- {llmflow → flowllm}/enumeration/role.py +0 -0
- {llmflow/enumeration → flowllm/flow/parser}/__init__.py +0 -0
- {llmflow/op/react → flowllm/op/agent}/__init__.py +0 -0
- /llmflow/op/react/react_v1_prompt.yaml → /flowllm/op/agent/react_prompt.yaml +0 -0
- {llmflow/pipeline → flowllm/schema}/__init__.py +0 -0
- {llmflow/schema → flowllm/utils}/__init__.py +0 -0
- {llmflow → flowllm}/utils/singleton.py +0 -0
- {flowllm-0.1.0.dist-info → flowllm-0.1.2.dist-info}/WHEEL +0 -0
llmflow/app.py
DELETED
@@ -1,53 +0,0 @@
|
|
1
|
-
import sys
|
2
|
-
|
3
|
-
import uvicorn
|
4
|
-
from dotenv import load_dotenv
|
5
|
-
from fastapi import FastAPI
|
6
|
-
|
7
|
-
from llmflow.schema.request import RetrieverRequest, SummarizerRequest, VectorStoreRequest, AgentRequest
|
8
|
-
from llmflow.schema.response import RetrieverResponse, SummarizerResponse, VectorStoreResponse, AgentResponse
|
9
|
-
from llmflow.service.llmflow_service import LLMFlowService
|
10
|
-
|
11
|
-
load_dotenv()
|
12
|
-
|
13
|
-
app = FastAPI()
|
14
|
-
service = LLMFlowService(sys.argv[1:])
|
15
|
-
|
16
|
-
|
17
|
-
@app.post('/retriever', response_model=RetrieverResponse)
|
18
|
-
def call_retriever(request: RetrieverRequest):
|
19
|
-
return service(api="retriever", request=request)
|
20
|
-
|
21
|
-
|
22
|
-
@app.post('/summarizer', response_model=SummarizerResponse)
|
23
|
-
def call_summarizer(request: SummarizerRequest):
|
24
|
-
return service(api="summarizer", request=request)
|
25
|
-
|
26
|
-
|
27
|
-
@app.post('/vector_store', response_model=VectorStoreResponse)
|
28
|
-
def call_vector_store(request: VectorStoreRequest):
|
29
|
-
return service(api="vector_store", request=request)
|
30
|
-
|
31
|
-
|
32
|
-
@app.post('/agent', response_model=AgentResponse)
|
33
|
-
def call_agent(request: AgentRequest):
|
34
|
-
return service(api="agent", request=request)
|
35
|
-
|
36
|
-
|
37
|
-
def main():
|
38
|
-
uvicorn.run(app=app,
|
39
|
-
host=service.http_service_config.host,
|
40
|
-
port=service.http_service_config.port,
|
41
|
-
timeout_keep_alive=service.http_service_config.timeout_keep_alive,
|
42
|
-
limit_concurrency=service.http_service_config.limit_concurrency)
|
43
|
-
|
44
|
-
|
45
|
-
if __name__ == "__main__":
|
46
|
-
main()
|
47
|
-
|
48
|
-
# start with:
|
49
|
-
# llmflow \
|
50
|
-
# http_service.port=8001 \
|
51
|
-
# llm.default.model_name=qwen3-32b \
|
52
|
-
# embedding_model.default.model_name=text-embedding-v4 \
|
53
|
-
# vector_store.default.backend=local_file
|
llmflow/config/config_parser.py
DELETED
@@ -1,80 +0,0 @@
|
|
1
|
-
import json
|
2
|
-
from pathlib import Path
|
3
|
-
|
4
|
-
from loguru import logger
|
5
|
-
from omegaconf import OmegaConf, DictConfig
|
6
|
-
|
7
|
-
from llmflow.schema.app_config import AppConfig
|
8
|
-
|
9
|
-
|
10
|
-
class ConfigParser:
|
11
|
-
"""
|
12
|
-
Configuration parser that handles loading and merging configurations from multiple sources.
|
13
|
-
|
14
|
-
The configuration loading priority (from lowest to highest):
|
15
|
-
1. Default configuration from AppConfig schema
|
16
|
-
2. YAML configuration file
|
17
|
-
3. Command line arguments
|
18
|
-
4. Runtime keyword arguments
|
19
|
-
"""
|
20
|
-
|
21
|
-
def __init__(self, args: list):
|
22
|
-
"""
|
23
|
-
Initialize the configuration parser with command line arguments.
|
24
|
-
|
25
|
-
Args:
|
26
|
-
args: List of command line arguments in dotlist format (e.g., ['key=value'])
|
27
|
-
"""
|
28
|
-
# Step 1: Initialize with default configuration from AppConfig schema
|
29
|
-
self.app_config: DictConfig = OmegaConf.structured(AppConfig)
|
30
|
-
|
31
|
-
# Step 2: Load configuration from YAML file
|
32
|
-
# First, parse CLI arguments to check if custom config path is specified
|
33
|
-
cli_config: DictConfig = OmegaConf.from_dotlist(args)
|
34
|
-
temp_config: AppConfig = OmegaConf.to_object(OmegaConf.merge(self.app_config, cli_config))
|
35
|
-
|
36
|
-
# Determine config file path: either from CLI args or use predefined config
|
37
|
-
if temp_config.config_path:
|
38
|
-
# Use custom config path if provided
|
39
|
-
config_path = Path(temp_config.config_path)
|
40
|
-
else:
|
41
|
-
# Use predefined config name from the config directory
|
42
|
-
pre_defined_config = temp_config.pre_defined_config
|
43
|
-
if not pre_defined_config.endswith(".yaml"):
|
44
|
-
pre_defined_config += ".yaml"
|
45
|
-
config_path = Path(__file__).parent / pre_defined_config
|
46
|
-
|
47
|
-
logger.info(f"load config from path={config_path}")
|
48
|
-
yaml_config = OmegaConf.load(config_path)
|
49
|
-
# Merge YAML config with default config
|
50
|
-
self.app_config = OmegaConf.merge(self.app_config, yaml_config)
|
51
|
-
|
52
|
-
# Step 3: Merge CLI arguments (highest priority)
|
53
|
-
self.app_config = OmegaConf.merge(self.app_config, cli_config)
|
54
|
-
|
55
|
-
# Log the final merged configuration
|
56
|
-
app_config_dict = OmegaConf.to_container(self.app_config, resolve=True)
|
57
|
-
logger.info(f"app_config=\n{json.dumps(app_config_dict, indent=2, ensure_ascii=False)}")
|
58
|
-
|
59
|
-
def get_app_config(self, **kwargs) -> AppConfig:
|
60
|
-
"""
|
61
|
-
Get the application configuration with optional runtime overrides.
|
62
|
-
|
63
|
-
Args:
|
64
|
-
**kwargs: Additional configuration parameters to override at runtime
|
65
|
-
|
66
|
-
Returns:
|
67
|
-
AppConfig: The final application configuration object
|
68
|
-
"""
|
69
|
-
# Create a copy of the current configuration
|
70
|
-
app_config = self.app_config.copy()
|
71
|
-
|
72
|
-
# Apply runtime overrides if provided
|
73
|
-
if kwargs:
|
74
|
-
# Convert kwargs to dotlist format for OmegaConf
|
75
|
-
kwargs_list = [f"{k}={v}" for k, v in kwargs.items()]
|
76
|
-
update_config = OmegaConf.from_dotlist(kwargs_list)
|
77
|
-
app_config = OmegaConf.merge(app_config, update_config)
|
78
|
-
|
79
|
-
# Convert OmegaConf DictConfig to structured AppConfig object
|
80
|
-
return OmegaConf.to_object(app_config)
|
llmflow/config/mock_config.yaml
DELETED
@@ -1,58 +0,0 @@
|
|
1
|
-
# demo config.yaml
|
2
|
-
|
3
|
-
http_service:
|
4
|
-
host: "0.0.0.0"
|
5
|
-
port: 8001
|
6
|
-
timeout_keep_alive: 600
|
7
|
-
limit_concurrency: 64
|
8
|
-
|
9
|
-
thread_pool:
|
10
|
-
max_workers: 10
|
11
|
-
|
12
|
-
api:
|
13
|
-
retriever: mock1_op->[mock4_op->mock2_op|mock5_op]->[mock3_op|mock6_op]
|
14
|
-
summarizer: mock1_op->[mock4_op->mock2_op|mock5_op]->mock3_op
|
15
|
-
vector_store: mock6_op
|
16
|
-
|
17
|
-
op:
|
18
|
-
mock1_op:
|
19
|
-
backend: mock1_op
|
20
|
-
llm: default
|
21
|
-
vector_store: default
|
22
|
-
params:
|
23
|
-
a: 1
|
24
|
-
b: 2
|
25
|
-
mock2_op:
|
26
|
-
backend: mock2_op
|
27
|
-
params:
|
28
|
-
a: 1
|
29
|
-
mock3_op:
|
30
|
-
backend: mock3_op
|
31
|
-
mock4_op:
|
32
|
-
backend: mock4_op
|
33
|
-
mock5_op:
|
34
|
-
backend: mock5_op
|
35
|
-
mock6_op:
|
36
|
-
backend: mock6_op
|
37
|
-
|
38
|
-
llm:
|
39
|
-
default:
|
40
|
-
backend: openai_compatible
|
41
|
-
model_name: qwen3-32b
|
42
|
-
params:
|
43
|
-
temperature: 0.6
|
44
|
-
|
45
|
-
embedding_model:
|
46
|
-
default:
|
47
|
-
backend: openai_compatible
|
48
|
-
model_name: text-embedding-v4
|
49
|
-
params:
|
50
|
-
dimensions: 1024
|
51
|
-
|
52
|
-
vector_store:
|
53
|
-
default:
|
54
|
-
backend: elasticsearch
|
55
|
-
embedding_model: default
|
56
|
-
params:
|
57
|
-
hosts: "http://localhost:9200"
|
58
|
-
|
llmflow/llm/__init__.py
DELETED
@@ -1,283 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
from typing import List
|
3
|
-
|
4
|
-
from dotenv import load_dotenv
|
5
|
-
from loguru import logger
|
6
|
-
from openai import OpenAI
|
7
|
-
from openai.types import CompletionUsage
|
8
|
-
from pydantic import Field, PrivateAttr, model_validator
|
9
|
-
|
10
|
-
from llmflow.enumeration.chunk_enum import ChunkEnum
|
11
|
-
from llmflow.enumeration.role import Role
|
12
|
-
from llmflow.llm import LLM_REGISTRY
|
13
|
-
from llmflow.llm.base_llm import BaseLLM
|
14
|
-
from llmflow.schema.message import Message, ToolCall
|
15
|
-
from llmflow.tool.base_tool import BaseTool
|
16
|
-
|
17
|
-
|
18
|
-
@LLM_REGISTRY.register("openai_compatible")
|
19
|
-
class OpenAICompatibleBaseLLM(BaseLLM):
|
20
|
-
"""
|
21
|
-
OpenAI-compatible LLM implementation supporting streaming and tool calls.
|
22
|
-
|
23
|
-
This class implements the BaseLLM interface for OpenAI-compatible APIs,
|
24
|
-
including support for:
|
25
|
-
- Streaming responses with different chunk types (thinking, answer, tools)
|
26
|
-
- Tool calling with parallel execution
|
27
|
-
- Reasoning/thinking content from supported models
|
28
|
-
- Robust error handling and retries
|
29
|
-
"""
|
30
|
-
|
31
|
-
# API configuration
|
32
|
-
api_key: str = Field(default_factory=lambda: os.getenv("LLM_API_KEY"), description="API key for authentication")
|
33
|
-
base_url: str = Field(default_factory=lambda: os.getenv("LLM_BASE_URL"),
|
34
|
-
description="Base URL for the API endpoint")
|
35
|
-
_client: OpenAI = PrivateAttr()
|
36
|
-
|
37
|
-
@model_validator(mode="after")
|
38
|
-
def init_client(self):
|
39
|
-
"""
|
40
|
-
Initialize the OpenAI client after model validation.
|
41
|
-
|
42
|
-
This validator runs after all field validation is complete,
|
43
|
-
ensuring we have valid API credentials before creating the client.
|
44
|
-
|
45
|
-
Returns:
|
46
|
-
Self for method chaining
|
47
|
-
"""
|
48
|
-
self._client = OpenAI(api_key=self.api_key, base_url=self.base_url)
|
49
|
-
return self
|
50
|
-
|
51
|
-
def stream_chat(self, messages: List[Message], tools: List[BaseTool] = None, **kwargs):
|
52
|
-
"""
|
53
|
-
Stream chat completions from OpenAI-compatible API.
|
54
|
-
|
55
|
-
This method handles streaming responses and categorizes chunks into different types:
|
56
|
-
- THINK: Reasoning/thinking content from the model
|
57
|
-
- ANSWER: Regular response content
|
58
|
-
- TOOL: Tool calls that need to be executed
|
59
|
-
- USAGE: Token usage statistics
|
60
|
-
- ERROR: Error information
|
61
|
-
|
62
|
-
Args:
|
63
|
-
messages: List of conversation messages
|
64
|
-
tools: Optional list of tools available to the model
|
65
|
-
**kwargs: Additional parameters
|
66
|
-
|
67
|
-
Yields:
|
68
|
-
Tuple of (chunk_content, ChunkEnum) for each streaming piece
|
69
|
-
"""
|
70
|
-
for i in range(self.max_retries):
|
71
|
-
try:
|
72
|
-
# Create streaming completion request
|
73
|
-
completion = self._client.chat.completions.create(
|
74
|
-
model=self.model_name,
|
75
|
-
messages=[x.simple_dump() for x in messages],
|
76
|
-
seed=self.seed,
|
77
|
-
top_p=self.top_p,
|
78
|
-
stream=True,
|
79
|
-
stream_options=self.stream_options,
|
80
|
-
temperature=self.temperature,
|
81
|
-
extra_body={"enable_thinking": self.enable_thinking}, # Enable reasoning mode
|
82
|
-
tools=[x.simple_dump() for x in tools] if tools else None,
|
83
|
-
tool_choice=self.tool_choice,
|
84
|
-
parallel_tool_calls=self.parallel_tool_calls)
|
85
|
-
|
86
|
-
# Initialize tool call tracking
|
87
|
-
ret_tools = [] # Accumulate tool calls across chunks
|
88
|
-
is_answering = False # Track when model starts answering
|
89
|
-
|
90
|
-
# Process each chunk in the streaming response
|
91
|
-
for chunk in completion:
|
92
|
-
# Handle chunks without choices (usually usage info)
|
93
|
-
if not chunk.choices:
|
94
|
-
yield chunk.usage, ChunkEnum.USAGE
|
95
|
-
|
96
|
-
else:
|
97
|
-
delta = chunk.choices[0].delta
|
98
|
-
|
99
|
-
# Handle reasoning/thinking content (model's internal thoughts)
|
100
|
-
if hasattr(delta, 'reasoning_content') and delta.reasoning_content is not None:
|
101
|
-
yield delta.reasoning_content, ChunkEnum.THINK
|
102
|
-
|
103
|
-
else:
|
104
|
-
# Mark transition from thinking to answering
|
105
|
-
if not is_answering:
|
106
|
-
is_answering = True
|
107
|
-
|
108
|
-
# Handle regular response content
|
109
|
-
if delta.content is not None:
|
110
|
-
yield delta.content, ChunkEnum.ANSWER
|
111
|
-
|
112
|
-
# Handle tool calls (function calling)
|
113
|
-
if delta.tool_calls is not None:
|
114
|
-
for tool_call in delta.tool_calls:
|
115
|
-
index = tool_call.index
|
116
|
-
|
117
|
-
# Ensure we have enough tool call slots
|
118
|
-
while len(ret_tools) <= index:
|
119
|
-
ret_tools.append(ToolCall(index=index))
|
120
|
-
|
121
|
-
# Accumulate tool call information across chunks
|
122
|
-
if tool_call.id:
|
123
|
-
ret_tools[index].id += tool_call.id
|
124
|
-
|
125
|
-
if tool_call.function and tool_call.function.name:
|
126
|
-
ret_tools[index].name += tool_call.function.name
|
127
|
-
|
128
|
-
if tool_call.function and tool_call.function.arguments:
|
129
|
-
ret_tools[index].arguments += tool_call.function.arguments
|
130
|
-
|
131
|
-
# Yield completed tool calls after streaming finishes
|
132
|
-
if ret_tools:
|
133
|
-
tool_dict = {x.name: x for x in tools} if tools else {}
|
134
|
-
for tool in ret_tools:
|
135
|
-
# Only yield tool calls that correspond to available tools
|
136
|
-
if tool.name not in tool_dict:
|
137
|
-
continue
|
138
|
-
|
139
|
-
yield tool, ChunkEnum.TOOL
|
140
|
-
|
141
|
-
return # Success - exit retry loop
|
142
|
-
|
143
|
-
except Exception as e:
|
144
|
-
logger.exception(f"stream chat with model={self.model_name} encounter error with e={e.args}")
|
145
|
-
|
146
|
-
# Handle retry logic
|
147
|
-
if i == self.max_retries - 1 and self.raise_exception:
|
148
|
-
raise e
|
149
|
-
else:
|
150
|
-
yield e.args, ChunkEnum.ERROR
|
151
|
-
|
152
|
-
def _chat(self, messages: List[Message], tools: List[BaseTool] = None, **kwargs) -> Message:
|
153
|
-
"""
|
154
|
-
Perform a complete chat completion by aggregating streaming chunks.
|
155
|
-
|
156
|
-
This method consumes the entire streaming response and combines all
|
157
|
-
chunks into a single Message object. It separates reasoning content,
|
158
|
-
regular answer content, and tool calls.
|
159
|
-
|
160
|
-
Args:
|
161
|
-
messages: List of conversation messages
|
162
|
-
tools: Optional list of tools available to the model
|
163
|
-
**kwargs: Additional parameters
|
164
|
-
|
165
|
-
Returns:
|
166
|
-
Complete Message with all content aggregated
|
167
|
-
"""
|
168
|
-
# Initialize content accumulators
|
169
|
-
reasoning_content = "" # Model's internal reasoning
|
170
|
-
answer_content = "" # Final response content
|
171
|
-
tool_calls = [] # List of tool calls to execute
|
172
|
-
|
173
|
-
# Consume streaming response and aggregate chunks by type
|
174
|
-
for chunk, chunk_enum in self.stream_chat(messages, tools, **kwargs):
|
175
|
-
if chunk_enum is ChunkEnum.THINK:
|
176
|
-
reasoning_content += chunk
|
177
|
-
|
178
|
-
elif chunk_enum is ChunkEnum.ANSWER:
|
179
|
-
answer_content += chunk
|
180
|
-
|
181
|
-
elif chunk_enum is ChunkEnum.TOOL:
|
182
|
-
tool_calls.append(chunk)
|
183
|
-
|
184
|
-
# Note: USAGE and ERROR chunks are ignored in non-streaming mode
|
185
|
-
|
186
|
-
# Construct complete response message
|
187
|
-
return Message(role=Role.ASSISTANT,
|
188
|
-
reasoning_content=reasoning_content,
|
189
|
-
content=answer_content,
|
190
|
-
tool_calls=tool_calls)
|
191
|
-
|
192
|
-
def stream_print(self, messages: List[Message], tools: List[BaseTool] = None, **kwargs):
|
193
|
-
"""
|
194
|
-
Stream chat completions with formatted console output.
|
195
|
-
|
196
|
-
This method provides a real-time view of the model's response,
|
197
|
-
with different formatting for different types of content:
|
198
|
-
- Thinking content is wrapped in <think></think> tags
|
199
|
-
- Answer content is printed directly
|
200
|
-
- Tool calls are formatted as JSON
|
201
|
-
- Usage statistics and errors are clearly marked
|
202
|
-
|
203
|
-
Args:
|
204
|
-
messages: List of conversation messages
|
205
|
-
tools: Optional list of tools available to the model
|
206
|
-
**kwargs: Additional parameters
|
207
|
-
"""
|
208
|
-
# Track which sections we've entered for proper formatting
|
209
|
-
enter_think = False # Whether we've started printing thinking content
|
210
|
-
enter_answer = False # Whether we've started printing answer content
|
211
|
-
|
212
|
-
# Process each streaming chunk with appropriate formatting
|
213
|
-
for chunk, chunk_enum in self.stream_chat(messages, tools, **kwargs):
|
214
|
-
if chunk_enum is ChunkEnum.USAGE:
|
215
|
-
# Display token usage statistics
|
216
|
-
if isinstance(chunk, CompletionUsage):
|
217
|
-
print(f"\n<usage>{chunk.model_dump_json(indent=2)}</usage>")
|
218
|
-
else:
|
219
|
-
print(f"\n<usage>{chunk}</usage>")
|
220
|
-
|
221
|
-
elif chunk_enum is ChunkEnum.THINK:
|
222
|
-
# Format thinking/reasoning content
|
223
|
-
if not enter_think:
|
224
|
-
enter_think = True
|
225
|
-
print("<think>\n", end="")
|
226
|
-
print(chunk, end="")
|
227
|
-
|
228
|
-
elif chunk_enum is ChunkEnum.ANSWER:
|
229
|
-
# Format regular answer content
|
230
|
-
if not enter_answer:
|
231
|
-
enter_answer = True
|
232
|
-
# Close thinking section if we were in it
|
233
|
-
if enter_think:
|
234
|
-
print("\n</think>")
|
235
|
-
print(chunk, end="")
|
236
|
-
|
237
|
-
elif chunk_enum is ChunkEnum.TOOL:
|
238
|
-
# Format tool calls as structured JSON
|
239
|
-
assert isinstance(chunk, ToolCall)
|
240
|
-
print(f"\n<tool>{chunk.model_dump_json(indent=2)}</tool>", end="")
|
241
|
-
|
242
|
-
elif chunk_enum is ChunkEnum.ERROR:
|
243
|
-
# Display error information
|
244
|
-
print(f"\n<error>{chunk}</error>", end="")
|
245
|
-
|
246
|
-
|
247
|
-
def main():
|
248
|
-
"""
|
249
|
-
Demo function to test the OpenAI-compatible LLM implementation.
|
250
|
-
|
251
|
-
This function demonstrates:
|
252
|
-
1. Basic chat without tools
|
253
|
-
2. Chat with tool usage (search and code tools)
|
254
|
-
3. Real-time streaming output formatting
|
255
|
-
"""
|
256
|
-
from llmflow.tool.dashscope_search_tool import DashscopeSearchTool
|
257
|
-
from llmflow.tool.code_tool import CodeTool
|
258
|
-
from llmflow.enumeration.role import Role
|
259
|
-
|
260
|
-
# Load environment variables for API credentials
|
261
|
-
load_dotenv()
|
262
|
-
|
263
|
-
# Initialize the LLM with a specific model
|
264
|
-
model_name = "qwen-max-2025-01-25"
|
265
|
-
llm = OpenAICompatibleBaseLLM(model_name=model_name)
|
266
|
-
|
267
|
-
# Set up available tools
|
268
|
-
tools: List[BaseTool] = [DashscopeSearchTool(), CodeTool()]
|
269
|
-
|
270
|
-
# Test 1: Simple greeting without tools
|
271
|
-
print("=== Test 1: Simple Chat ===")
|
272
|
-
llm.stream_print([Message(role=Role.USER, content="hello")], [])
|
273
|
-
|
274
|
-
print("\n" + "=" * 20)
|
275
|
-
|
276
|
-
# Test 2: Complex query that might use tools
|
277
|
-
print("\n=== Test 2: Chat with Tools ===")
|
278
|
-
llm.stream_print([Message(role=Role.USER, content="What's the weather like in Beijing today?")], tools)
|
279
|
-
|
280
|
-
|
281
|
-
if __name__ == "__main__":
|
282
|
-
main()
|
283
|
-
# Launch with: python -m llmflow.llm.openai_compatible_llm
|
llmflow/mcp_server.py
DELETED
@@ -1,110 +0,0 @@
|
|
1
|
-
import sys
|
2
|
-
from typing import List
|
3
|
-
|
4
|
-
from dotenv import load_dotenv
|
5
|
-
from fastmcp import FastMCP
|
6
|
-
|
7
|
-
from llmflow.service.llmflow_service import LLMFlowService
|
8
|
-
|
9
|
-
load_dotenv()
|
10
|
-
|
11
|
-
mcp = FastMCP("llmflow")
|
12
|
-
service = LLMFlowService(sys.argv[1:])
|
13
|
-
|
14
|
-
|
15
|
-
@mcp.tool
|
16
|
-
def retriever(query: str,
|
17
|
-
messages: List[dict] = None,
|
18
|
-
top_k: int = 1,
|
19
|
-
workspace_id: str = "default",
|
20
|
-
config: dict = None) -> dict:
|
21
|
-
"""
|
22
|
-
Retrieve experiences from the workspace based on a query.
|
23
|
-
|
24
|
-
Args:
|
25
|
-
query: Query string
|
26
|
-
messages: List of messages
|
27
|
-
top_k: Number of top experiences to retrieve
|
28
|
-
workspace_id: Workspace identifier
|
29
|
-
config: Additional configuration parameters
|
30
|
-
|
31
|
-
Returns:
|
32
|
-
Dictionary containing retrieved experiences
|
33
|
-
"""
|
34
|
-
return service(api="retriever", request={
|
35
|
-
"query": query,
|
36
|
-
"messages": messages if messages else [],
|
37
|
-
"top_k": top_k,
|
38
|
-
"workspace_id": workspace_id,
|
39
|
-
"config": config if config else {},
|
40
|
-
}).model_dump()
|
41
|
-
|
42
|
-
|
43
|
-
@mcp.tool
|
44
|
-
def summarizer(traj_list: List[dict], workspace_id: str = "default", config: dict = None) -> dict:
|
45
|
-
"""
|
46
|
-
Summarize trajectories into experiences.
|
47
|
-
|
48
|
-
Args:
|
49
|
-
traj_list: List of trajectories
|
50
|
-
workspace_id: Workspace identifier
|
51
|
-
config: Additional configuration parameters
|
52
|
-
|
53
|
-
Returns:
|
54
|
-
experiences
|
55
|
-
"""
|
56
|
-
return service(api="summarizer", request={
|
57
|
-
"traj_list": traj_list,
|
58
|
-
"workspace_id": workspace_id,
|
59
|
-
"config": config if config else {},
|
60
|
-
}).model_dump()
|
61
|
-
|
62
|
-
|
63
|
-
@mcp.tool
|
64
|
-
def vector_store(action: str,
|
65
|
-
src_workspace_id: str = "",
|
66
|
-
workspace_id: str = "",
|
67
|
-
path: str = "./",
|
68
|
-
config: dict = None) -> dict:
|
69
|
-
"""
|
70
|
-
Perform vector store operations.
|
71
|
-
|
72
|
-
Args:
|
73
|
-
action: Action to perform (e.g., "copy", "delete", "dump", "load")
|
74
|
-
src_workspace_id: Source workspace identifier
|
75
|
-
workspace_id: Workspace identifier
|
76
|
-
path: Path to the vector store
|
77
|
-
config: Additional configuration parameters
|
78
|
-
|
79
|
-
Returns:
|
80
|
-
Dictionary containing the result of the vector store operation
|
81
|
-
"""
|
82
|
-
return service(api="vector_store", request={
|
83
|
-
"action": action,
|
84
|
-
"src_workspace_id": src_workspace_id,
|
85
|
-
"workspace_id": workspace_id,
|
86
|
-
"path": path,
|
87
|
-
"config": config if config else {},
|
88
|
-
}).model_dump()
|
89
|
-
|
90
|
-
|
91
|
-
def main():
|
92
|
-
mcp_transport: str = service.init_app_config.mcp_transport
|
93
|
-
if mcp_transport == "sse":
|
94
|
-
mcp.run(transport="sse", host=service.http_service_config.host, port=service.http_service_config.port)
|
95
|
-
elif mcp_transport == "stdio":
|
96
|
-
mcp.run(transport="stdio")
|
97
|
-
else:
|
98
|
-
raise ValueError(f"Unsupported mcp transport: {mcp_transport}")
|
99
|
-
|
100
|
-
|
101
|
-
if __name__ == "__main__":
|
102
|
-
main()
|
103
|
-
|
104
|
-
# start with:
|
105
|
-
# llmflow_mcp \
|
106
|
-
# mcp_transport=stdio \
|
107
|
-
# http_service.port=8001 \
|
108
|
-
# llm.default.model_name=qwen3-32b \
|
109
|
-
# embedding_model.default.model_name=text-embedding-v4 \
|
110
|
-
# vector_store.default.backend=local_file
|
llmflow/op/__init__.py
DELETED
@@ -1,10 +0,0 @@
|
|
1
|
-
from llmflow.utils.registry import Registry
|
2
|
-
|
3
|
-
OP_REGISTRY = Registry()
|
4
|
-
|
5
|
-
from llmflow.op.mock_op import Mock1Op, Mock2Op, Mock3Op, Mock4Op, Mock5Op, Mock6Op
|
6
|
-
|
7
|
-
from llmflow.op.vector_store.update_vector_store_op import UpdateVectorStoreOp
|
8
|
-
from llmflow.op.vector_store.recall_vector_store_op import RecallVectorStoreOp
|
9
|
-
from llmflow.op.vector_store.vector_store_action_op import VectorStoreActionOp
|
10
|
-
from llmflow.op.react.react_v1_op import ReactV1Op
|