flowllm 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowllm/__init__.py +12 -0
- flowllm/app.py +25 -0
- flowllm/config/default_config.yaml +82 -0
- flowllm/config/pydantic_config_parser.py +242 -0
- flowllm/context/base_context.py +59 -0
- flowllm/context/flow_context.py +28 -0
- llmflow/op/prompt_mixin.py → flowllm/context/prompt_handler.py +25 -14
- flowllm/context/registry.py +26 -0
- flowllm/context/service_context.py +103 -0
- flowllm/embedding_model/__init__.py +1 -0
- {llmflow → flowllm}/embedding_model/base_embedding_model.py +2 -2
- {llmflow → flowllm}/embedding_model/openai_compatible_embedding_model.py +8 -8
- flowllm/flow_engine/__init__.py +1 -0
- flowllm/flow_engine/base_flow_engine.py +34 -0
- flowllm/flow_engine/simple_flow_engine.py +213 -0
- flowllm/llm/__init__.py +1 -0
- {llmflow → flowllm}/llm/base_llm.py +16 -24
- {llmflow → flowllm}/llm/openai_compatible_llm.py +64 -108
- flowllm/op/__init__.py +3 -0
- flowllm/op/akshare/get_ak_a_code_op.py +116 -0
- flowllm/op/akshare/get_ak_a_code_prompt.yaml +21 -0
- flowllm/op/akshare/get_ak_a_info_op.py +143 -0
- flowllm/op/base_op.py +169 -0
- flowllm/op/llm_base_op.py +63 -0
- flowllm/op/mock_op.py +42 -0
- flowllm/op/parallel_op.py +30 -0
- flowllm/op/sequential_op.py +29 -0
- flowllm/schema/flow_response.py +12 -0
- flowllm/schema/message.py +35 -0
- flowllm/schema/service_config.py +76 -0
- flowllm/schema/tool_call.py +110 -0
- flowllm/service/__init__.py +2 -0
- flowllm/service/base_service.py +59 -0
- flowllm/service/http_service.py +87 -0
- flowllm/service/mcp_service.py +45 -0
- flowllm/storage/__init__.py +1 -0
- flowllm/storage/vector_store/__init__.py +3 -0
- flowllm/storage/vector_store/base_vector_store.py +44 -0
- {llmflow → flowllm/storage}/vector_store/chroma_vector_store.py +11 -10
- {llmflow → flowllm/storage}/vector_store/es_vector_store.py +10 -9
- llmflow/vector_store/file_vector_store.py → flowllm/storage/vector_store/local_vector_store.py +110 -10
- flowllm/utils/common_utils.py +64 -0
- flowllm/utils/dataframe_cache.py +331 -0
- flowllm/utils/fetch_url.py +113 -0
- {llmflow → flowllm}/utils/timer.py +5 -4
- {flowllm-0.1.0.dist-info → flowllm-0.1.1.dist-info}/METADATA +31 -27
- flowllm-0.1.1.dist-info/RECORD +62 -0
- flowllm-0.1.1.dist-info/entry_points.txt +4 -0
- {flowllm-0.1.0.dist-info → flowllm-0.1.1.dist-info}/licenses/LICENSE +1 -1
- flowllm-0.1.1.dist-info/top_level.txt +1 -0
- flowllm-0.1.0.dist-info/RECORD +0 -66
- flowllm-0.1.0.dist-info/entry_points.txt +0 -3
- flowllm-0.1.0.dist-info/top_level.txt +0 -1
- llmflow/app.py +0 -53
- llmflow/config/config_parser.py +0 -80
- llmflow/config/mock_config.yaml +0 -58
- llmflow/embedding_model/__init__.py +0 -5
- llmflow/enumeration/agent_state.py +0 -8
- llmflow/llm/__init__.py +0 -5
- llmflow/mcp_server.py +0 -110
- llmflow/op/__init__.py +0 -10
- llmflow/op/base_op.py +0 -125
- llmflow/op/mock_op.py +0 -40
- llmflow/op/react/react_v1_op.py +0 -88
- llmflow/op/react/react_v1_prompt.yaml +0 -28
- llmflow/op/vector_store/__init__.py +0 -13
- llmflow/op/vector_store/recall_vector_store_op.py +0 -48
- llmflow/op/vector_store/update_vector_store_op.py +0 -28
- llmflow/op/vector_store/vector_store_action_op.py +0 -46
- llmflow/pipeline/pipeline.py +0 -94
- llmflow/pipeline/pipeline_context.py +0 -37
- llmflow/schema/app_config.py +0 -69
- llmflow/schema/experience.py +0 -144
- llmflow/schema/message.py +0 -68
- llmflow/schema/request.py +0 -32
- llmflow/schema/response.py +0 -29
- llmflow/service/__init__.py +0 -0
- llmflow/service/llmflow_service.py +0 -96
- llmflow/tool/__init__.py +0 -9
- llmflow/tool/base_tool.py +0 -80
- llmflow/tool/code_tool.py +0 -43
- llmflow/tool/dashscope_search_tool.py +0 -162
- llmflow/tool/mcp_tool.py +0 -77
- llmflow/tool/tavily_search_tool.py +0 -109
- llmflow/tool/terminate_tool.py +0 -23
- llmflow/utils/__init__.py +0 -0
- llmflow/utils/common_utils.py +0 -17
- llmflow/utils/file_handler.py +0 -25
- llmflow/utils/http_client.py +0 -156
- llmflow/utils/op_utils.py +0 -102
- llmflow/utils/registry.py +0 -33
- llmflow/vector_store/__init__.py +0 -7
- llmflow/vector_store/base_vector_store.py +0 -136
- {llmflow → flowllm/config}/__init__.py +0 -0
- {llmflow/config → flowllm/context}/__init__.py +0 -0
- {llmflow → flowllm}/enumeration/__init__.py +0 -0
- {llmflow → flowllm}/enumeration/chunk_enum.py +0 -0
- {llmflow → flowllm}/enumeration/http_enum.py +0 -0
- {llmflow → flowllm}/enumeration/role.py +0 -0
- {llmflow/op/react → flowllm/op/akshare}/__init__.py +0 -0
- {llmflow/pipeline → flowllm/schema}/__init__.py +0 -0
- {llmflow → flowllm}/schema/vector_node.py +0 -0
- {llmflow/schema → flowllm/utils}/__init__.py +0 -0
- {llmflow → flowllm}/utils/singleton.py +0 -0
- {flowllm-0.1.0.dist-info → flowllm-0.1.1.dist-info}/WHEEL +0 -0
@@ -1,15 +1,14 @@
|
|
1
1
|
import os
|
2
2
|
from typing import Literal, List
|
3
3
|
|
4
|
-
from dotenv import load_dotenv
|
5
4
|
from openai import OpenAI
|
6
5
|
from pydantic import Field, PrivateAttr, model_validator
|
7
6
|
|
8
|
-
from
|
9
|
-
from
|
7
|
+
from flowllm.context.service_context import C
|
8
|
+
from flowllm.embedding_model.base_embedding_model import BaseEmbeddingModel
|
10
9
|
|
11
10
|
|
12
|
-
@
|
11
|
+
@C.register_embedding_model("openai_compatible")
|
13
12
|
class OpenAICompatibleEmbeddingModel(BaseEmbeddingModel):
|
14
13
|
"""
|
15
14
|
OpenAI-compatible embedding model implementation.
|
@@ -19,9 +18,9 @@ class OpenAICompatibleEmbeddingModel(BaseEmbeddingModel):
|
|
19
18
|
other services that follow the same interface.
|
20
19
|
"""
|
21
20
|
# API configuration fields
|
22
|
-
api_key: str = Field(default_factory=lambda: os.getenv("
|
21
|
+
api_key: str = Field(default_factory=lambda: os.getenv("FLOW_EMBEDDING_API_KEY"),
|
23
22
|
description="API key for authentication")
|
24
|
-
base_url: str = Field(default_factory=lambda: os.getenv("
|
23
|
+
base_url: str = Field(default_factory=lambda: os.getenv("FLOW_EMBEDDING_BASE_URL"),
|
25
24
|
description="Base URL for the API endpoint")
|
26
25
|
model_name: str = Field(default="", description="Name of the embedding model to use")
|
27
26
|
dimensions: int = Field(default=1024, description="Dimensionality of the embedding vectors")
|
@@ -81,7 +80,9 @@ class OpenAICompatibleEmbeddingModel(BaseEmbeddingModel):
|
|
81
80
|
|
82
81
|
|
83
82
|
def main():
|
84
|
-
|
83
|
+
from flowllm.utils.common_utils import load_env
|
84
|
+
|
85
|
+
load_env()
|
85
86
|
model = OpenAICompatibleEmbeddingModel(dimensions=64, model_name="text-embedding-v4")
|
86
87
|
res1 = model.get_embeddings(
|
87
88
|
"The clothes are of good quality and look good, definitely worth the wait. I love them.")
|
@@ -92,4 +93,3 @@ def main():
|
|
92
93
|
|
93
94
|
if __name__ == "__main__":
|
94
95
|
main()
|
95
|
-
# launch with: python -m llmflow.model.openai_compatible_embedding_model
|
@@ -0,0 +1 @@
|
|
1
|
+
from flowllm.flow_engine import simple_flow_engine
|
@@ -0,0 +1,34 @@
|
|
1
|
+
from abc import ABC
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
from flowllm.context.flow_context import FlowContext
|
5
|
+
from flowllm.op.base_op import BaseOp
|
6
|
+
from flowllm.utils.timer import timer
|
7
|
+
|
8
|
+
|
9
|
+
class BaseFlowEngine(ABC):
|
10
|
+
|
11
|
+
def __init__(self, flow_name: str, flow_content: str, flow_context: FlowContext):
|
12
|
+
self.flow_name: str = flow_name
|
13
|
+
self.flow_content: str = flow_content
|
14
|
+
self.flow_context: FlowContext = flow_context
|
15
|
+
|
16
|
+
self._parsed_flow: Optional[BaseOp] = None
|
17
|
+
self._parsed_ops_cache = {}
|
18
|
+
|
19
|
+
def _parse_flow(self):
|
20
|
+
raise NotImplementedError
|
21
|
+
|
22
|
+
def _create_op(self, op_name: str):
|
23
|
+
raise NotImplementedError
|
24
|
+
|
25
|
+
def _print_flow(self):
|
26
|
+
raise NotImplementedError
|
27
|
+
|
28
|
+
def _execute_flow(self):
|
29
|
+
raise NotImplementedError
|
30
|
+
|
31
|
+
def __call__(self):
|
32
|
+
self._parse_flow()
|
33
|
+
self._print_flow()
|
34
|
+
return self._execute_flow()
|
@@ -0,0 +1,213 @@
|
|
1
|
+
import re
|
2
|
+
|
3
|
+
from loguru import logger
|
4
|
+
|
5
|
+
from flowllm.context.service_context import C
|
6
|
+
from flowllm.flow_engine.base_flow_engine import BaseFlowEngine
|
7
|
+
from flowllm.op.base_op import BaseOp
|
8
|
+
from flowllm.op.parallel_op import ParallelOp
|
9
|
+
from flowllm.op.sequential_op import SequentialOp
|
10
|
+
from flowllm.schema.service_config import OpConfig
|
11
|
+
|
12
|
+
|
13
|
+
@C.register_flow_engine("simple")
|
14
|
+
class SimpleFlowEngine(BaseFlowEngine):
|
15
|
+
SEQ_SYMBOL = ">>"
|
16
|
+
PARALLEL_SYMBOL = "|"
|
17
|
+
|
18
|
+
"""
|
19
|
+
Simple flow implementation that supports parsing and executing operation expressions.
|
20
|
+
|
21
|
+
Supports flow expressions like:
|
22
|
+
- "op1 >> op2" (sequential execution)
|
23
|
+
- "op1 | op2" (parallel execution)
|
24
|
+
- "op1 >> (op2 | op3) >> op4" (mixed execution)
|
25
|
+
- "op1 >> (op1 | (op2 >> op3)) >> op4" (complex nested execution)
|
26
|
+
"""
|
27
|
+
|
28
|
+
def _parse_flow(self):
|
29
|
+
expression = re.sub(r'\s+', ' ', self.flow_content.strip())
|
30
|
+
self._parsed_flow = self._parse_expression(expression)
|
31
|
+
|
32
|
+
def _parse_expression(self, expression: str) -> BaseOp:
|
33
|
+
"""
|
34
|
+
Parse the flow content string into executable operations.
|
35
|
+
|
36
|
+
Supports expressions with operators:
|
37
|
+
- ">>" for sequential execution
|
38
|
+
- "|" for parallel execution
|
39
|
+
- Parentheses for grouping operations
|
40
|
+
|
41
|
+
Args:
|
42
|
+
expression: The expression string to parse. If None, uses self.flow_content
|
43
|
+
|
44
|
+
Returns:
|
45
|
+
BaseOp: The parsed flow as an executable operation tree
|
46
|
+
"""
|
47
|
+
# handle parentheses by finding and replacing innermost groups
|
48
|
+
while '(' in expression:
|
49
|
+
# Find the innermost parentheses
|
50
|
+
start = -1
|
51
|
+
for i, char in enumerate(expression):
|
52
|
+
if char == '(':
|
53
|
+
start = i
|
54
|
+
elif char == ')':
|
55
|
+
if start == -1:
|
56
|
+
raise ValueError(f"mismatched parentheses in expression: {expression}")
|
57
|
+
|
58
|
+
# extract and parse the inner expression
|
59
|
+
inner_expr = expression[start + 1:i]
|
60
|
+
inner_result = self._parse_expression(inner_expr)
|
61
|
+
|
62
|
+
# create a placeholder for the parsed inner expression
|
63
|
+
placeholder = f"__PARSED_OP_{len(self._parsed_ops_cache)}__"
|
64
|
+
|
65
|
+
# store the parsed operation for later retrieval
|
66
|
+
self._parsed_ops_cache[placeholder] = inner_result
|
67
|
+
|
68
|
+
# Replace the parentheses group with placeholder
|
69
|
+
expression = expression[:start] + placeholder + expression[i + 1:]
|
70
|
+
break
|
71
|
+
else:
|
72
|
+
if start != -1:
|
73
|
+
raise ValueError(f"mismatched parentheses in expression: {expression}")
|
74
|
+
|
75
|
+
# Parse the expression without parentheses
|
76
|
+
return self._parse_flat_expression(expression)
|
77
|
+
|
78
|
+
def _parse_flat_expression(self, expression: str) -> BaseOp:
|
79
|
+
"""
|
80
|
+
Parse a flat expression (no parentheses) into operation objects.
|
81
|
+
|
82
|
+
Args:
|
83
|
+
expression: The flat expression string
|
84
|
+
|
85
|
+
Returns:
|
86
|
+
BaseOp: The parsed operation tree
|
87
|
+
"""
|
88
|
+
# split by '>>' first (sequential has higher precedence)
|
89
|
+
sequential_parts = [part.strip() for part in expression.split(self.SEQ_SYMBOL)]
|
90
|
+
|
91
|
+
if len(sequential_parts) > 1:
|
92
|
+
# parse each part and create sequential operation
|
93
|
+
ops = []
|
94
|
+
for part in sequential_parts:
|
95
|
+
part = part.strip()
|
96
|
+
if part in self._parsed_ops_cache:
|
97
|
+
ops.append(self._parsed_ops_cache[part])
|
98
|
+
else:
|
99
|
+
ops.append(self._parse_parallel_expression(part))
|
100
|
+
|
101
|
+
return SequentialOp(ops=ops, flow_context=self.flow_context)
|
102
|
+
|
103
|
+
else:
|
104
|
+
# no sequential operators, parse for parallel
|
105
|
+
return self._parse_parallel_expression(expression)
|
106
|
+
|
107
|
+
def _parse_parallel_expression(self, expression: str) -> BaseOp:
|
108
|
+
"""
|
109
|
+
Parse a parallel expression (operations separated by |).
|
110
|
+
|
111
|
+
Args:
|
112
|
+
expression: The expression string
|
113
|
+
|
114
|
+
Returns:
|
115
|
+
BaseOp: The parsed operation (single op or parallel op)
|
116
|
+
"""
|
117
|
+
parallel_parts = [part.strip() for part in expression.split(self.PARALLEL_SYMBOL)]
|
118
|
+
|
119
|
+
if len(parallel_parts) > 1:
|
120
|
+
# create parallel operation
|
121
|
+
ops = []
|
122
|
+
for part in parallel_parts:
|
123
|
+
part = part.strip()
|
124
|
+
if part in self._parsed_ops_cache:
|
125
|
+
ops.append(self._parsed_ops_cache[part])
|
126
|
+
else:
|
127
|
+
ops.append(self._create_op(part))
|
128
|
+
|
129
|
+
return ParallelOp(ops=ops, flow_context=self.flow_context)
|
130
|
+
|
131
|
+
else:
|
132
|
+
# single operation
|
133
|
+
part = expression.strip()
|
134
|
+
if part in self._parsed_ops_cache:
|
135
|
+
return self._parsed_ops_cache[part]
|
136
|
+
else:
|
137
|
+
return self._create_op(part)
|
138
|
+
|
139
|
+
def _create_op(self, op_name: str) -> BaseOp:
|
140
|
+
if op_name in self.flow_context.service_config.op:
|
141
|
+
op_config: OpConfig = self.flow_context.service_config.op[op_name]
|
142
|
+
op_cls = C.resolve_op(op_config.backend)
|
143
|
+
|
144
|
+
|
145
|
+
elif op_name in C.registry_dict["op"]:
|
146
|
+
op_config: OpConfig = OpConfig()
|
147
|
+
op_cls = C.resolve_op(op_name)
|
148
|
+
|
149
|
+
else:
|
150
|
+
raise ValueError(f"op='{op_name}' is not registered!")
|
151
|
+
|
152
|
+
kwargs = {
|
153
|
+
"name": op_name,
|
154
|
+
"raise_exception": op_config.raise_exception,
|
155
|
+
"flow_context": self.flow_context,
|
156
|
+
**op_config.params
|
157
|
+
}
|
158
|
+
|
159
|
+
if op_config.language:
|
160
|
+
kwargs["language"] = op_config.language
|
161
|
+
if op_config.prompt_path:
|
162
|
+
kwargs["prompt_path"] = op_config.prompt_path
|
163
|
+
if op_config.llm:
|
164
|
+
kwargs["llm"] = op_config.llm
|
165
|
+
if op_config.embedding_model:
|
166
|
+
kwargs["embedding_model"] = op_config.embedding_model
|
167
|
+
if op_config.vector_store:
|
168
|
+
kwargs["vector_store"] = op_config.vector_store
|
169
|
+
|
170
|
+
return op_cls(**kwargs)
|
171
|
+
|
172
|
+
def _print_flow(self):
|
173
|
+
"""
|
174
|
+
Print the parsed flow structure in a readable format.
|
175
|
+
Allows users to visualize the execution flow on screen.
|
176
|
+
"""
|
177
|
+
assert self._parsed_flow is not None, "flow_content is not parsed!"
|
178
|
+
|
179
|
+
logger.info(f"Expression: {self.flow_content}")
|
180
|
+
self._print_operation_tree(self._parsed_flow, indent=0)
|
181
|
+
|
182
|
+
def _print_operation_tree(self, op: BaseOp, indent: int = 0):
|
183
|
+
"""
|
184
|
+
Recursively print the operation tree structure.
|
185
|
+
|
186
|
+
Args:
|
187
|
+
op: The operation to print
|
188
|
+
indent: Current indentation level
|
189
|
+
"""
|
190
|
+
prefix = " " * indent
|
191
|
+
if isinstance(op, SequentialOp):
|
192
|
+
logger.info(f"{prefix}Sequential Execution:")
|
193
|
+
for i, sub_op in enumerate(op.ops):
|
194
|
+
logger.info(f"{prefix} Step {i + 1}:")
|
195
|
+
self._print_operation_tree(sub_op, indent + 2)
|
196
|
+
|
197
|
+
elif isinstance(op, ParallelOp):
|
198
|
+
logger.info(f"{prefix}Parallel Execution:")
|
199
|
+
for i, sub_op in enumerate(op.ops):
|
200
|
+
logger.info(f"{prefix} Branch {i + 1}:")
|
201
|
+
self._print_operation_tree(sub_op, indent + 2)
|
202
|
+
|
203
|
+
else:
|
204
|
+
logger.info(f"{prefix}Operation: {op.name}")
|
205
|
+
|
206
|
+
def _execute_flow(self):
|
207
|
+
"""
|
208
|
+
Execute the parsed flow and return the result.
|
209
|
+
|
210
|
+
Returns:
|
211
|
+
The result of executing the flow
|
212
|
+
"""
|
213
|
+
return self._parsed_flow.execute()
|
flowllm/llm/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
from flowllm.llm.openai_compatible_llm import OpenAICompatibleBaseLLM
|
@@ -1,12 +1,12 @@
|
|
1
1
|
import time
|
2
2
|
from abc import ABC
|
3
|
-
from typing import List,
|
3
|
+
from typing import List, Callable
|
4
4
|
|
5
5
|
from loguru import logger
|
6
6
|
from pydantic import Field, BaseModel
|
7
7
|
|
8
|
-
from
|
9
|
-
from
|
8
|
+
from flowllm.schema.message import Message
|
9
|
+
from flowllm.schema.tool_call import ToolCall
|
10
10
|
|
11
11
|
|
12
12
|
class BaseLLM(BaseModel, ABC):
|
@@ -30,17 +30,17 @@ class BaseLLM(BaseModel, ABC):
|
|
30
30
|
presence_penalty: float | None = Field(default=None, description="Presence penalty to reduce repetition")
|
31
31
|
|
32
32
|
# Model-specific features
|
33
|
-
enable_thinking: bool = Field(default=
|
33
|
+
enable_thinking: bool = Field(default=False, description="Enable reasoning/thinking mode for supported models")
|
34
34
|
|
35
35
|
# Tool usage configuration
|
36
|
-
tool_choice:
|
36
|
+
tool_choice: str = Field(default=None, description="Strategy for tool selection")
|
37
37
|
parallel_tool_calls: bool = Field(default=True, description="Allow multiple tool calls in parallel")
|
38
38
|
|
39
39
|
# Error handling and reliability
|
40
40
|
max_retries: int = Field(default=5, description="Maximum number of retry attempts on failure")
|
41
41
|
raise_exception: bool = Field(default=False, description="Whether to raise exceptions or return default values")
|
42
42
|
|
43
|
-
def stream_chat(self, messages: List[Message], tools: List[
|
43
|
+
def stream_chat(self, messages: List[Message], tools: List[ToolCall] = None, **kwargs):
|
44
44
|
"""
|
45
45
|
Stream chat completions from the LLM.
|
46
46
|
|
@@ -57,21 +57,8 @@ class BaseLLM(BaseModel, ABC):
|
|
57
57
|
"""
|
58
58
|
raise NotImplementedError
|
59
59
|
|
60
|
-
def
|
61
|
-
|
62
|
-
Stream chat completions and print them to console in real-time.
|
63
|
-
|
64
|
-
This is a convenience method for debugging and interactive use,
|
65
|
-
combining streaming with formatted console output.
|
66
|
-
|
67
|
-
Args:
|
68
|
-
messages: List of conversation messages
|
69
|
-
tools: Optional list of tools the model can use
|
70
|
-
**kwargs: Additional model-specific parameters
|
71
|
-
"""
|
72
|
-
raise NotImplementedError
|
73
|
-
|
74
|
-
def _chat(self, messages: List[Message], tools: List[BaseTool] = None, **kwargs) -> Message:
|
60
|
+
def _chat(self, messages: List[Message], tools: List[ToolCall] = None, enable_stream_print: bool = False,
|
61
|
+
**kwargs) -> Message:
|
75
62
|
"""
|
76
63
|
Internal method to perform a single chat completion.
|
77
64
|
|
@@ -82,6 +69,7 @@ class BaseLLM(BaseModel, ABC):
|
|
82
69
|
Args:
|
83
70
|
messages: List of conversation messages
|
84
71
|
tools: Optional list of tools the model can use
|
72
|
+
enable_stream_print: Whether to print streaming response to console
|
85
73
|
**kwargs: Additional model-specific parameters
|
86
74
|
|
87
75
|
Returns:
|
@@ -89,8 +77,8 @@ class BaseLLM(BaseModel, ABC):
|
|
89
77
|
"""
|
90
78
|
raise NotImplementedError
|
91
79
|
|
92
|
-
def chat(self, messages: List[Message], tools: List[
|
93
|
-
default_value=None, **kwargs):
|
80
|
+
def chat(self, messages: List[Message], tools: List[ToolCall] = None, enable_stream_print: bool = False,
|
81
|
+
callback_fn: Callable = None, default_value=None, **kwargs):
|
94
82
|
"""
|
95
83
|
Perform a chat completion with retry logic and error handling.
|
96
84
|
|
@@ -103,6 +91,7 @@ class BaseLLM(BaseModel, ABC):
|
|
103
91
|
tools: Optional list of tools the model can use
|
104
92
|
callback_fn: Optional callback to process the response message
|
105
93
|
default_value: Value to return if all retries fail (when raise_exception=False)
|
94
|
+
enable_stream_print: Whether to print streaming response to console
|
106
95
|
**kwargs: Additional model-specific parameters
|
107
96
|
|
108
97
|
Returns:
|
@@ -114,7 +103,10 @@ class BaseLLM(BaseModel, ABC):
|
|
114
103
|
for i in range(self.max_retries):
|
115
104
|
try:
|
116
105
|
# Attempt to get response from the model
|
117
|
-
message: Message = self._chat(messages,
|
106
|
+
message: Message = self._chat(messages=messages,
|
107
|
+
tools=tools,
|
108
|
+
enable_stream_print=enable_stream_print,
|
109
|
+
**kwargs)
|
118
110
|
|
119
111
|
# Apply callback function if provided
|
120
112
|
if callback_fn:
|
@@ -1,21 +1,20 @@
|
|
1
1
|
import os
|
2
|
-
from typing import List
|
2
|
+
from typing import List, Dict
|
3
3
|
|
4
|
-
from dotenv import load_dotenv
|
5
4
|
from loguru import logger
|
6
5
|
from openai import OpenAI
|
7
6
|
from openai.types import CompletionUsage
|
8
7
|
from pydantic import Field, PrivateAttr, model_validator
|
9
8
|
|
10
|
-
from
|
11
|
-
from
|
12
|
-
from
|
13
|
-
from
|
14
|
-
from
|
15
|
-
from
|
9
|
+
from flowllm.context.service_context import C
|
10
|
+
from flowllm.enumeration.chunk_enum import ChunkEnum
|
11
|
+
from flowllm.enumeration.role import Role
|
12
|
+
from flowllm.llm.base_llm import BaseLLM
|
13
|
+
from flowllm.schema.message import Message
|
14
|
+
from flowllm.schema.tool_call import ToolCall
|
16
15
|
|
17
16
|
|
18
|
-
@
|
17
|
+
@C.register_llm("openai_compatible")
|
19
18
|
class OpenAICompatibleBaseLLM(BaseLLM):
|
20
19
|
"""
|
21
20
|
OpenAI-compatible LLM implementation supporting streaming and tool calls.
|
@@ -29,8 +28,9 @@ class OpenAICompatibleBaseLLM(BaseLLM):
|
|
29
28
|
"""
|
30
29
|
|
31
30
|
# API configuration
|
32
|
-
api_key: str = Field(default_factory=lambda: os.getenv("
|
33
|
-
|
31
|
+
api_key: str = Field(default_factory=lambda: os.getenv("FLOW_LLM_API_KEY"),
|
32
|
+
description="API key for authentication")
|
33
|
+
base_url: str = Field(default_factory=lambda: os.getenv("FLOW_LLM_BASE_URL"),
|
34
34
|
description="Base URL for the API endpoint")
|
35
35
|
_client: OpenAI = PrivateAttr()
|
36
36
|
|
@@ -48,7 +48,7 @@ class OpenAICompatibleBaseLLM(BaseLLM):
|
|
48
48
|
self._client = OpenAI(api_key=self.api_key, base_url=self.base_url)
|
49
49
|
return self
|
50
50
|
|
51
|
-
def stream_chat(self, messages: List[Message], tools: List[
|
51
|
+
def stream_chat(self, messages: List[Message], tools: List[ToolCall] = None, **kwargs):
|
52
52
|
"""
|
53
53
|
Stream chat completions from OpenAI-compatible API.
|
54
54
|
|
@@ -69,7 +69,10 @@ class OpenAICompatibleBaseLLM(BaseLLM):
|
|
69
69
|
"""
|
70
70
|
for i in range(self.max_retries):
|
71
71
|
try:
|
72
|
-
|
72
|
+
extra_body = {}
|
73
|
+
if self.enable_thinking:
|
74
|
+
extra_body["enable_thinking"] = True # qwen3 params
|
75
|
+
|
73
76
|
completion = self._client.chat.completions.create(
|
74
77
|
model=self.model_name,
|
75
78
|
messages=[x.simple_dump() for x in messages],
|
@@ -78,14 +81,13 @@ class OpenAICompatibleBaseLLM(BaseLLM):
|
|
78
81
|
stream=True,
|
79
82
|
stream_options=self.stream_options,
|
80
83
|
temperature=self.temperature,
|
81
|
-
extra_body=
|
82
|
-
tools=[x.
|
83
|
-
tool_choice=self.tool_choice,
|
84
|
+
extra_body=extra_body,
|
85
|
+
tools=[x.simple_input_dump() for x in tools] if tools else None,
|
84
86
|
parallel_tool_calls=self.parallel_tool_calls)
|
85
87
|
|
86
88
|
# Initialize tool call tracking
|
87
|
-
ret_tools = [] # Accumulate tool calls across chunks
|
88
|
-
is_answering = False # Track when model starts answering
|
89
|
+
ret_tools: List[ToolCall] = [] # Accumulate tool calls across chunks
|
90
|
+
is_answering: bool = False # Track when model starts answering
|
89
91
|
|
90
92
|
# Process each chunk in the streaming response
|
91
93
|
for chunk in completion:
|
@@ -130,7 +132,7 @@ class OpenAICompatibleBaseLLM(BaseLLM):
|
|
130
132
|
|
131
133
|
# Yield completed tool calls after streaming finishes
|
132
134
|
if ret_tools:
|
133
|
-
tool_dict = {x.name: x for x in tools} if tools else {}
|
135
|
+
tool_dict: Dict[str, ToolCall] = {x.name: x for x in tools} if tools else {}
|
134
136
|
for tool in ret_tools:
|
135
137
|
# Only yield tool calls that correspond to available tools
|
136
138
|
if tool.name not in tool_dict:
|
@@ -138,7 +140,7 @@ class OpenAICompatibleBaseLLM(BaseLLM):
|
|
138
140
|
|
139
141
|
yield tool, ChunkEnum.TOOL
|
140
142
|
|
141
|
-
return
|
143
|
+
return
|
142
144
|
|
143
145
|
except Exception as e:
|
144
146
|
logger.exception(f"stream chat with model={self.model_name} encounter error with e={e.args}")
|
@@ -149,7 +151,8 @@ class OpenAICompatibleBaseLLM(BaseLLM):
|
|
149
151
|
else:
|
150
152
|
yield e.args, ChunkEnum.ERROR
|
151
153
|
|
152
|
-
def _chat(self, messages: List[Message], tools: List[
|
154
|
+
def _chat(self, messages: List[Message], tools: List[ToolCall] = None, enable_stream_print: bool = False,
|
155
|
+
**kwargs) -> Message:
|
153
156
|
"""
|
154
157
|
Perform a complete chat completion by aggregating streaming chunks.
|
155
158
|
|
@@ -160,28 +163,60 @@ class OpenAICompatibleBaseLLM(BaseLLM):
|
|
160
163
|
Args:
|
161
164
|
messages: List of conversation messages
|
162
165
|
tools: Optional list of tools available to the model
|
166
|
+
enable_stream_print: Whether to print streaming response to console
|
163
167
|
**kwargs: Additional parameters
|
164
168
|
|
165
169
|
Returns:
|
166
170
|
Complete Message with all content aggregated
|
167
171
|
"""
|
168
|
-
|
172
|
+
|
173
|
+
enter_think = False # Whether we've started printing thinking content
|
174
|
+
enter_answer = False # Whether we've started printing answer content
|
169
175
|
reasoning_content = "" # Model's internal reasoning
|
170
176
|
answer_content = "" # Final response content
|
171
177
|
tool_calls = [] # List of tool calls to execute
|
172
178
|
|
173
179
|
# Consume streaming response and aggregate chunks by type
|
174
180
|
for chunk, chunk_enum in self.stream_chat(messages, tools, **kwargs):
|
175
|
-
if chunk_enum is ChunkEnum.
|
181
|
+
if chunk_enum is ChunkEnum.USAGE:
|
182
|
+
# Display token usage statistics
|
183
|
+
if enable_stream_print:
|
184
|
+
if isinstance(chunk, CompletionUsage):
|
185
|
+
print(f"\n<usage>{chunk.model_dump_json(indent=2)}</usage>")
|
186
|
+
else:
|
187
|
+
print(f"\n<usage>{chunk}</usage>")
|
188
|
+
|
189
|
+
elif chunk_enum is ChunkEnum.THINK:
|
190
|
+
if enable_stream_print:
|
191
|
+
# Format thinking/reasoning content
|
192
|
+
if not enter_think:
|
193
|
+
enter_think = True
|
194
|
+
print("<think>\n", end="")
|
195
|
+
print(chunk, end="")
|
196
|
+
|
176
197
|
reasoning_content += chunk
|
177
198
|
|
178
199
|
elif chunk_enum is ChunkEnum.ANSWER:
|
200
|
+
if enable_stream_print:
|
201
|
+
if not enter_answer:
|
202
|
+
enter_answer = True
|
203
|
+
# Close thinking section if we were in it
|
204
|
+
if enter_think:
|
205
|
+
print("\n</think>")
|
206
|
+
print(chunk, end="")
|
207
|
+
|
179
208
|
answer_content += chunk
|
180
209
|
|
181
210
|
elif chunk_enum is ChunkEnum.TOOL:
|
211
|
+
if enable_stream_print:
|
212
|
+
print(f"\n<tool>{chunk.model_dump_json()}</tool>", end="")
|
213
|
+
|
182
214
|
tool_calls.append(chunk)
|
183
215
|
|
184
|
-
|
216
|
+
elif chunk_enum is ChunkEnum.ERROR:
|
217
|
+
if enable_stream_print:
|
218
|
+
# Display error information
|
219
|
+
print(f"\n<error>{chunk}</error>", end="")
|
185
220
|
|
186
221
|
# Construct complete response message
|
187
222
|
return Message(role=Role.ASSISTANT,
|
@@ -189,95 +224,16 @@ class OpenAICompatibleBaseLLM(BaseLLM):
|
|
189
224
|
content=answer_content,
|
190
225
|
tool_calls=tool_calls)
|
191
226
|
|
192
|
-
def stream_print(self, messages: List[Message], tools: List[BaseTool] = None, **kwargs):
|
193
|
-
"""
|
194
|
-
Stream chat completions with formatted console output.
|
195
|
-
|
196
|
-
This method provides a real-time view of the model's response,
|
197
|
-
with different formatting for different types of content:
|
198
|
-
- Thinking content is wrapped in <think></think> tags
|
199
|
-
- Answer content is printed directly
|
200
|
-
- Tool calls are formatted as JSON
|
201
|
-
- Usage statistics and errors are clearly marked
|
202
|
-
|
203
|
-
Args:
|
204
|
-
messages: List of conversation messages
|
205
|
-
tools: Optional list of tools available to the model
|
206
|
-
**kwargs: Additional parameters
|
207
|
-
"""
|
208
|
-
# Track which sections we've entered for proper formatting
|
209
|
-
enter_think = False # Whether we've started printing thinking content
|
210
|
-
enter_answer = False # Whether we've started printing answer content
|
211
|
-
|
212
|
-
# Process each streaming chunk with appropriate formatting
|
213
|
-
for chunk, chunk_enum in self.stream_chat(messages, tools, **kwargs):
|
214
|
-
if chunk_enum is ChunkEnum.USAGE:
|
215
|
-
# Display token usage statistics
|
216
|
-
if isinstance(chunk, CompletionUsage):
|
217
|
-
print(f"\n<usage>{chunk.model_dump_json(indent=2)}</usage>")
|
218
|
-
else:
|
219
|
-
print(f"\n<usage>{chunk}</usage>")
|
220
|
-
|
221
|
-
elif chunk_enum is ChunkEnum.THINK:
|
222
|
-
# Format thinking/reasoning content
|
223
|
-
if not enter_think:
|
224
|
-
enter_think = True
|
225
|
-
print("<think>\n", end="")
|
226
|
-
print(chunk, end="")
|
227
|
-
|
228
|
-
elif chunk_enum is ChunkEnum.ANSWER:
|
229
|
-
# Format regular answer content
|
230
|
-
if not enter_answer:
|
231
|
-
enter_answer = True
|
232
|
-
# Close thinking section if we were in it
|
233
|
-
if enter_think:
|
234
|
-
print("\n</think>")
|
235
|
-
print(chunk, end="")
|
236
|
-
|
237
|
-
elif chunk_enum is ChunkEnum.TOOL:
|
238
|
-
# Format tool calls as structured JSON
|
239
|
-
assert isinstance(chunk, ToolCall)
|
240
|
-
print(f"\n<tool>{chunk.model_dump_json(indent=2)}</tool>", end="")
|
241
|
-
|
242
|
-
elif chunk_enum is ChunkEnum.ERROR:
|
243
|
-
# Display error information
|
244
|
-
print(f"\n<error>{chunk}</error>", end="")
|
245
|
-
|
246
|
-
|
247
227
|
def main():
|
248
|
-
|
249
|
-
Demo function to test the OpenAI-compatible LLM implementation.
|
250
|
-
|
251
|
-
This function demonstrates:
|
252
|
-
1. Basic chat without tools
|
253
|
-
2. Chat with tool usage (search and code tools)
|
254
|
-
3. Real-time streaming output formatting
|
255
|
-
"""
|
256
|
-
from llmflow.tool.dashscope_search_tool import DashscopeSearchTool
|
257
|
-
from llmflow.tool.code_tool import CodeTool
|
258
|
-
from llmflow.enumeration.role import Role
|
228
|
+
from flowllm.utils.common_utils import load_env
|
259
229
|
|
260
|
-
|
261
|
-
load_dotenv()
|
230
|
+
load_env()
|
262
231
|
|
263
|
-
# Initialize the LLM with a specific model
|
264
232
|
model_name = "qwen-max-2025-01-25"
|
265
233
|
llm = OpenAICompatibleBaseLLM(model_name=model_name)
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
# Test 1: Simple greeting without tools
|
271
|
-
print("=== Test 1: Simple Chat ===")
|
272
|
-
llm.stream_print([Message(role=Role.USER, content="hello")], [])
|
273
|
-
|
274
|
-
print("\n" + "=" * 20)
|
275
|
-
|
276
|
-
# Test 2: Complex query that might use tools
|
277
|
-
print("\n=== Test 2: Chat with Tools ===")
|
278
|
-
llm.stream_print([Message(role=Role.USER, content="What's the weather like in Beijing today?")], tools)
|
279
|
-
|
234
|
+
message: Message = llm.chat([Message(role=Role.USER, content="hello")], [],
|
235
|
+
enable_stream_print=False)
|
236
|
+
print(message)
|
280
237
|
|
281
238
|
if __name__ == "__main__":
|
282
239
|
main()
|
283
|
-
# Launch with: python -m llmflow.llm.openai_compatible_llm
|
flowllm/op/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
1
|
+
from flowllm.op.akshare.get_ak_a_code_op import GetAkACodeOp
|
2
|
+
from flowllm.op.akshare.get_ak_a_info_op import GetAkAInfoOp, GetAkASpotOp, GetAkAMoneyFlowOp, GetAkAFinancialInfoOp, GetAkANewsOp, MergeAkAInfoOp
|
3
|
+
from flowllm.op.mock_op import Mock1Op, Mock2Op, Mock3Op, Mock4Op, Mock5Op, Mock6Op
|