flowllm 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. flowllm/__init__.py +19 -6
  2. flowllm/app.py +4 -14
  3. flowllm/client/__init__.py +25 -0
  4. flowllm/client/async_http_client.py +81 -0
  5. flowllm/client/http_client.py +81 -0
  6. flowllm/client/mcp_client.py +133 -0
  7. flowllm/client/sync_mcp_client.py +116 -0
  8. flowllm/config/__init__.py +1 -0
  9. flowllm/config/{default_config.yaml → default.yaml} +3 -8
  10. flowllm/config/empty.yaml +37 -0
  11. flowllm/config/pydantic_config_parser.py +17 -17
  12. flowllm/context/base_context.py +27 -7
  13. flowllm/context/flow_context.py +6 -18
  14. flowllm/context/registry.py +5 -1
  15. flowllm/context/service_context.py +83 -37
  16. flowllm/embedding_model/__init__.py +1 -1
  17. flowllm/embedding_model/base_embedding_model.py +91 -0
  18. flowllm/embedding_model/openai_compatible_embedding_model.py +63 -5
  19. flowllm/flow/__init__.py +1 -0
  20. flowllm/flow/base_flow.py +74 -0
  21. flowllm/flow/base_tool_flow.py +15 -0
  22. flowllm/flow/gallery/__init__.py +8 -0
  23. flowllm/flow/gallery/cmd_flow.py +11 -0
  24. flowllm/flow/gallery/code_tool_flow.py +30 -0
  25. flowllm/flow/gallery/dashscope_search_tool_flow.py +34 -0
  26. flowllm/flow/gallery/deepsearch_tool_flow.py +39 -0
  27. flowllm/flow/gallery/expression_tool_flow.py +18 -0
  28. flowllm/flow/gallery/mock_tool_flow.py +62 -0
  29. flowllm/flow/gallery/tavily_search_tool_flow.py +30 -0
  30. flowllm/flow/gallery/terminate_tool_flow.py +30 -0
  31. flowllm/flow/parser/__init__.py +0 -0
  32. flowllm/{flow_engine/simple_flow_engine.py → flow/parser/expression_parser.py} +25 -67
  33. flowllm/llm/__init__.py +2 -1
  34. flowllm/llm/base_llm.py +94 -4
  35. flowllm/llm/litellm_llm.py +456 -0
  36. flowllm/llm/openai_compatible_llm.py +205 -5
  37. flowllm/op/__init__.py +12 -3
  38. flowllm/op/agent/__init__.py +1 -0
  39. flowllm/op/agent/react_v1_op.py +109 -0
  40. flowllm/op/agent/react_v1_prompt.yaml +54 -0
  41. flowllm/op/agent/react_v2_op.py +86 -0
  42. flowllm/op/agent/react_v2_prompt.yaml +35 -0
  43. flowllm/op/akshare/__init__.py +3 -0
  44. flowllm/op/akshare/get_ak_a_code_op.py +14 -22
  45. flowllm/op/akshare/get_ak_a_info_op.py +17 -20
  46. flowllm/op/{llm_base_op.py → base_llm_op.py} +7 -5
  47. flowllm/op/base_op.py +40 -44
  48. flowllm/op/base_ray_op.py +313 -0
  49. flowllm/op/code/__init__.py +1 -0
  50. flowllm/op/code/execute_code_op.py +42 -0
  51. flowllm/op/gallery/__init__.py +2 -0
  52. flowllm/op/{mock_op.py → gallery/mock_op.py} +4 -4
  53. flowllm/op/gallery/terminate_op.py +29 -0
  54. flowllm/op/parallel_op.py +2 -9
  55. flowllm/op/search/__init__.py +3 -0
  56. flowllm/op/search/dashscope_deep_research_op.py +267 -0
  57. flowllm/op/search/dashscope_search_op.py +186 -0
  58. flowllm/op/search/dashscope_search_prompt.yaml +13 -0
  59. flowllm/op/search/tavily_search_op.py +109 -0
  60. flowllm/op/sequential_op.py +1 -9
  61. flowllm/schema/flow_request.py +12 -0
  62. flowllm/schema/message.py +2 -0
  63. flowllm/schema/service_config.py +12 -16
  64. flowllm/schema/tool_call.py +20 -8
  65. flowllm/schema/vector_node.py +1 -0
  66. flowllm/service/__init__.py +3 -2
  67. flowllm/service/base_service.py +50 -41
  68. flowllm/service/cmd_service.py +15 -0
  69. flowllm/service/http_service.py +34 -42
  70. flowllm/service/mcp_service.py +13 -11
  71. flowllm/storage/cache/__init__.py +1 -0
  72. flowllm/storage/cache/cache_data_handler.py +104 -0
  73. flowllm/{utils/dataframe_cache.py → storage/cache/data_cache.py} +136 -92
  74. flowllm/storage/vector_store/__init__.py +3 -3
  75. flowllm/storage/vector_store/base_vector_store.py +3 -0
  76. flowllm/storage/vector_store/es_vector_store.py +4 -5
  77. flowllm/storage/vector_store/local_vector_store.py +0 -1
  78. flowllm/utils/common_utils.py +9 -21
  79. flowllm/utils/fetch_url.py +16 -12
  80. flowllm/utils/llm_utils.py +28 -0
  81. flowllm/utils/logger_utils.py +28 -0
  82. flowllm/utils/ridge_v2.py +54 -0
  83. {flowllm-0.1.1.dist-info → flowllm-0.1.3.dist-info}/METADATA +43 -390
  84. flowllm-0.1.3.dist-info/RECORD +102 -0
  85. flowllm-0.1.3.dist-info/entry_points.txt +2 -0
  86. flowllm/flow_engine/__init__.py +0 -1
  87. flowllm/flow_engine/base_flow_engine.py +0 -34
  88. flowllm-0.1.1.dist-info/RECORD +0 -62
  89. flowllm-0.1.1.dist-info/entry_points.txt +0 -4
  90. {flowllm-0.1.1.dist-info → flowllm-0.1.3.dist-info}/WHEEL +0 -0
  91. {flowllm-0.1.1.dist-info → flowllm-0.1.3.dist-info}/licenses/LICENSE +0 -0
  92. {flowllm-0.1.1.dist-info → flowllm-0.1.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,456 @@
1
+ import asyncio
2
+ import os
3
+ from typing import List, Dict
4
+
5
+ from loguru import logger
6
+ from pydantic import Field, PrivateAttr, model_validator
7
+
8
+ from flowllm.context.service_context import C
9
+ from flowllm.enumeration.chunk_enum import ChunkEnum
10
+ from flowllm.enumeration.role import Role
11
+ from flowllm.llm.base_llm import BaseLLM
12
+ from flowllm.schema.message import Message
13
+ from flowllm.schema.tool_call import ToolCall
14
+
15
+
16
+ @C.register_llm("litellm")
17
+ class LiteLLMBaseLLM(BaseLLM):
18
+ """
19
+ LiteLLM-compatible LLM implementation supporting multiple LLM providers through unified interface.
20
+
21
+ This class implements the BaseLLM interface using LiteLLM, which provides:
22
+ - Support for 100+ LLM providers (OpenAI, Anthropic, Cohere, Azure, etc.)
23
+ - Streaming responses with different chunk types (content, tools, usage)
24
+ - Tool calling with parallel execution support
25
+ - Unified API across different providers
26
+ - Robust error handling and retries
27
+
28
+ LiteLLM automatically handles provider-specific authentication and request formatting.
29
+ """
30
+
31
+ # API configuration - LiteLLM handles provider-specific settings
32
+ api_key: str = Field(default_factory=lambda: os.getenv("FLOW_LLM_API_KEY"),
33
+ description="API key for authentication")
34
+ base_url: str = Field(default_factory=lambda: os.getenv("FLOW_LLM_BASE_URL"),
35
+ description="Base URL for custom endpoints")
36
+
37
+ # LiteLLM specific configuration
38
+ custom_llm_provider: str = Field(default="openai", description="Custom LLM provider name for LiteLLM routing")
39
+
40
+ # Additional LiteLLM parameters
41
+ timeout: float = Field(default=600, description="Request timeout in seconds")
42
+ max_tokens: int = Field(default=None, description="Maximum tokens to generate")
43
+
44
+ # Private attributes for LiteLLM configuration
45
+ _litellm_params: dict = PrivateAttr(default_factory=dict)
46
+
47
+ @model_validator(mode="after")
48
+ def init_litellm_config(self):
49
+ """
50
+ Initialize LiteLLM configuration after model validation.
51
+
52
+ This validator sets up LiteLLM-specific parameters and environment variables
53
+ required for different providers. It configures authentication and routing
54
+ based on the model name and provider settings.
55
+
56
+ Returns:
57
+ Self for method chaining
58
+ """
59
+
60
+ # Configure LiteLLM parameters
61
+ self._litellm_params = {
62
+ "api_key": self.api_key,
63
+ "base_url": self.base_url, #.replace("/v1", "")
64
+ "model": self.model_name,
65
+ "temperature": self.temperature,
66
+ "seed": self.seed,
67
+ "timeout": self.timeout,
68
+ }
69
+
70
+ # Add optional parameters
71
+ if self.top_p is not None:
72
+ self._litellm_params["top_p"] = self.top_p
73
+ if self.max_tokens is not None:
74
+ self._litellm_params["max_tokens"] = self.max_tokens
75
+ if self.presence_penalty is not None:
76
+ self._litellm_params["presence_penalty"] = self.presence_penalty
77
+ if self.custom_llm_provider:
78
+ self._litellm_params["custom_llm_provider"] = self.custom_llm_provider
79
+
80
+ return self
81
+
82
+ def stream_chat(self, messages: List[Message], tools: List[ToolCall] = None, **kwargs):
83
+ """
84
+ Stream chat completions from LiteLLM with support for multiple providers.
85
+
86
+ This method handles streaming responses and categorizes chunks into different types:
87
+ - ANSWER: Regular response content from the model
88
+ - TOOL: Tool calls that need to be executed
89
+ - USAGE: Token usage statistics (when available)
90
+ - ERROR: Error information from failed requests
91
+
92
+ Args:
93
+ messages: List of conversation messages
94
+ tools: Optional list of tools available to the model
95
+ **kwargs: Additional parameters passed to LiteLLM
96
+
97
+ Yields:
98
+ Tuple of (chunk_content, ChunkEnum) for each streaming piece
99
+ """
100
+ from litellm import completion
101
+ for i in range(self.max_retries):
102
+ try:
103
+ # Prepare parameters for LiteLLM
104
+ params = self._litellm_params.copy()
105
+ params.update(kwargs)
106
+ params.update({
107
+ "messages": [x.simple_dump() for x in messages],
108
+ "stream": True,
109
+ })
110
+
111
+ # Add tools if provided
112
+ if tools:
113
+ params["tools"] = [x.simple_input_dump() for x in tools]
114
+ params["tool_choice"] = self.tool_choice if self.tool_choice else "auto"
115
+
116
+ # Create streaming completion using LiteLLM
117
+ completion_response = completion(**params)
118
+
119
+ # Initialize tool call tracking
120
+ ret_tools: List[ToolCall] = [] # Accumulate tool calls across chunks
121
+
122
+ # Process each chunk in the streaming response
123
+ for chunk in completion_response:
124
+ try:
125
+ # Handle chunks without choices (usually usage/metadata)
126
+ if not hasattr(chunk, 'choices') or not chunk.choices:
127
+ # Check for usage information
128
+ if hasattr(chunk, 'usage') and chunk.usage:
129
+ yield chunk.usage, ChunkEnum.USAGE
130
+ continue
131
+
132
+ delta = chunk.choices[0].delta
133
+
134
+ # Handle regular response content
135
+ if hasattr(delta, 'content') and delta.content is not None:
136
+ yield delta.content, ChunkEnum.ANSWER
137
+
138
+ # Handle tool calls (function calling)
139
+ if hasattr(delta, 'tool_calls') and delta.tool_calls is not None:
140
+ for tool_call in delta.tool_calls:
141
+ index = getattr(tool_call, 'index', 0)
142
+
143
+ # Ensure we have enough tool call slots
144
+ while len(ret_tools) <= index:
145
+ ret_tools.append(ToolCall(index=index))
146
+
147
+ # Accumulate tool call information across chunks
148
+ if hasattr(tool_call, 'id') and tool_call.id:
149
+ ret_tools[index].id += tool_call.id
150
+
151
+ if (hasattr(tool_call, 'function') and tool_call.function and
152
+ hasattr(tool_call.function, 'name') and tool_call.function.name):
153
+ ret_tools[index].name += tool_call.function.name
154
+
155
+ if (hasattr(tool_call, 'function') and tool_call.function and
156
+ hasattr(tool_call.function, 'arguments') and tool_call.function.arguments):
157
+ ret_tools[index].arguments += tool_call.function.arguments
158
+
159
+ except Exception as chunk_error:
160
+ logger.warning(f"Error processing chunk: {chunk_error}")
161
+ continue
162
+
163
+ # Yield completed tool calls after streaming finishes
164
+ if ret_tools:
165
+ tool_dict: Dict[str, ToolCall] = {x.name: x for x in tools} if tools else {}
166
+ for tool in ret_tools:
167
+ # Only yield tool calls that correspond to available tools
168
+ if tools and tool.name not in tool_dict:
169
+ continue
170
+
171
+ yield tool, ChunkEnum.TOOL
172
+
173
+ return
174
+
175
+ except Exception as e:
176
+ logger.exception(f"stream chat with LiteLLM model={self.model_name} encounter error: {e}")
177
+
178
+ # Handle retry logic
179
+ if i == self.max_retries - 1 and self.raise_exception:
180
+ raise e
181
+ else:
182
+ error_msg = str(e.args) if hasattr(e, 'args') else str(e)
183
+ yield error_msg, ChunkEnum.ERROR
184
+
185
+ async def astream_chat(self, messages: List[Message], tools: List[ToolCall] = None, **kwargs):
186
+ """
187
+ Async stream chat completions from LiteLLM with support for multiple providers.
188
+
189
+ This method handles async streaming responses and categorizes chunks into different types:
190
+ - ANSWER: Regular response content from the model
191
+ - TOOL: Tool calls that need to be executed
192
+ - USAGE: Token usage statistics (when available)
193
+ - ERROR: Error information from failed requests
194
+
195
+ Args:
196
+ messages: List of conversation messages
197
+ tools: Optional list of tools available to the model
198
+ **kwargs: Additional parameters passed to LiteLLM
199
+
200
+ Yields:
201
+ Tuple of (chunk_content, ChunkEnum) for each streaming piece
202
+ """
203
+ from litellm import acompletion
204
+ for i in range(self.max_retries):
205
+ try:
206
+ # Prepare parameters for LiteLLM
207
+ params = self._litellm_params.copy()
208
+ params.update(kwargs)
209
+ params.update({
210
+ "messages": [x.simple_dump() for x in messages],
211
+ "stream": True,
212
+ })
213
+
214
+ # Add tools if provided
215
+ if tools:
216
+ params["tools"] = [x.simple_input_dump() for x in tools]
217
+ params["tool_choice"] = self.tool_choice if self.tool_choice else "auto"
218
+
219
+ # Create async streaming completion using LiteLLM
220
+ completion_response = await acompletion(**params)
221
+
222
+ # Initialize tool call tracking
223
+ ret_tools: List[ToolCall] = [] # Accumulate tool calls across chunks
224
+
225
+ # Process each chunk in the async streaming response
226
+ async for chunk in completion_response:
227
+ try:
228
+ # Handle chunks without choices (usually usage/metadata)
229
+ if not hasattr(chunk, 'choices') or not chunk.choices:
230
+ # Check for usage information
231
+ if hasattr(chunk, 'usage') and chunk.usage:
232
+ yield chunk.usage, ChunkEnum.USAGE
233
+ continue
234
+
235
+ delta = chunk.choices[0].delta
236
+
237
+ # Handle regular response content
238
+ if hasattr(delta, 'content') and delta.content is not None:
239
+ yield delta.content, ChunkEnum.ANSWER
240
+
241
+ # Handle tool calls (function calling)
242
+ if hasattr(delta, 'tool_calls') and delta.tool_calls is not None:
243
+ for tool_call in delta.tool_calls:
244
+ index = getattr(tool_call, 'index', 0)
245
+
246
+ # Ensure we have enough tool call slots
247
+ while len(ret_tools) <= index:
248
+ ret_tools.append(ToolCall(index=index))
249
+
250
+ # Accumulate tool call information across chunks
251
+ if hasattr(tool_call, 'id') and tool_call.id:
252
+ ret_tools[index].id += tool_call.id
253
+
254
+ if (hasattr(tool_call, 'function') and tool_call.function and
255
+ hasattr(tool_call.function, 'name') and tool_call.function.name):
256
+ ret_tools[index].name += tool_call.function.name
257
+
258
+ if (hasattr(tool_call, 'function') and tool_call.function and
259
+ hasattr(tool_call.function, 'arguments') and tool_call.function.arguments):
260
+ ret_tools[index].arguments += tool_call.function.arguments
261
+
262
+ except Exception as chunk_error:
263
+ logger.warning(f"Error processing async chunk: {chunk_error}")
264
+ continue
265
+
266
+ # Yield completed tool calls after streaming finishes
267
+ if ret_tools:
268
+ tool_dict: Dict[str, ToolCall] = {x.name: x for x in tools} if tools else {}
269
+ for tool in ret_tools:
270
+ # Only yield tool calls that correspond to available tools
271
+ if tools and tool.name not in tool_dict:
272
+ continue
273
+
274
+ yield tool, ChunkEnum.TOOL
275
+
276
+ return
277
+
278
+ except Exception as e:
279
+ logger.exception(f"async stream chat with LiteLLM model={self.model_name} encounter error: {e}")
280
+
281
+ # Handle retry logic with async sleep
282
+ await asyncio.sleep(1 + i)
283
+
284
+ if i == self.max_retries - 1 and self.raise_exception:
285
+ raise e
286
+ else:
287
+ error_msg = str(e.args) if hasattr(e, 'args') else str(e)
288
+ yield error_msg, ChunkEnum.ERROR
289
+
290
+ def _chat(self, messages: List[Message], tools: List[ToolCall] = None, enable_stream_print: bool = False,
291
+ **kwargs) -> Message:
292
+ """
293
+ Perform a complete chat completion by aggregating streaming chunks from LiteLLM.
294
+
295
+ This method consumes the entire streaming response and combines all
296
+ chunks into a single Message object. It separates regular answer content
297
+ and tool calls, providing a complete response.
298
+
299
+ Args:
300
+ messages: List of conversation messages
301
+ tools: Optional list of tools available to the model
302
+ enable_stream_print: Whether to print streaming response to console
303
+ **kwargs: Additional parameters passed to LiteLLM
304
+
305
+ Returns:
306
+ Complete Message with all content aggregated
307
+ """
308
+ answer_content = "" # Final response content
309
+ tool_calls = [] # List of tool calls to execute
310
+
311
+ # Consume streaming response and aggregate chunks by type
312
+ for chunk, chunk_enum in self.stream_chat(messages, tools, **kwargs):
313
+ if chunk_enum is ChunkEnum.USAGE:
314
+ # Display token usage statistics
315
+ if enable_stream_print:
316
+ if hasattr(chunk, 'model_dump_json'):
317
+ print(f"\n<usage>{chunk.model_dump_json(indent=2)}</usage>")
318
+ else:
319
+ print(f"\n<usage>{chunk}</usage>")
320
+
321
+ elif chunk_enum is ChunkEnum.ANSWER:
322
+ if enable_stream_print:
323
+ print(chunk, end="")
324
+ answer_content += chunk
325
+
326
+ elif chunk_enum is ChunkEnum.TOOL:
327
+ if enable_stream_print:
328
+ if hasattr(chunk, 'model_dump_json'):
329
+ print(f"\n<tool>{chunk.model_dump_json()}</tool>", end="")
330
+ else:
331
+ print(f"\n<tool>{chunk}</tool>", end="")
332
+ tool_calls.append(chunk)
333
+
334
+ elif chunk_enum is ChunkEnum.ERROR:
335
+ if enable_stream_print:
336
+ print(f"\n<error>{chunk}</error>", end="")
337
+
338
+ # Construct complete response message
339
+ return Message(
340
+ role=Role.ASSISTANT,
341
+ content=answer_content,
342
+ tool_calls=tool_calls
343
+ )
344
+
345
+ async def _achat(self, messages: List[Message], tools: List[ToolCall] = None, enable_stream_print: bool = False,
346
+ **kwargs) -> Message:
347
+ """
348
+ Perform an async complete chat completion by aggregating streaming chunks from LiteLLM.
349
+
350
+ This method consumes the entire async streaming response and combines all
351
+ chunks into a single Message object. It separates regular answer content
352
+ and tool calls, providing a complete response.
353
+
354
+ Args:
355
+ messages: List of conversation messages
356
+ tools: Optional list of tools available to the model
357
+ enable_stream_print: Whether to print streaming response to console
358
+ **kwargs: Additional parameters passed to LiteLLM
359
+
360
+ Returns:
361
+ Complete Message with all content aggregated
362
+ """
363
+ answer_content = "" # Final response content
364
+ tool_calls = [] # List of tool calls to execute
365
+
366
+ # Consume async streaming response and aggregate chunks by type
367
+ async for chunk, chunk_enum in self.astream_chat(messages, tools, **kwargs):
368
+ if chunk_enum is ChunkEnum.USAGE:
369
+ # Display token usage statistics
370
+ if enable_stream_print:
371
+ if hasattr(chunk, 'model_dump_json'):
372
+ print(f"\n<usage>{chunk.model_dump_json(indent=2)}</usage>")
373
+ else:
374
+ print(f"\n<usage>{chunk}</usage>")
375
+
376
+ elif chunk_enum is ChunkEnum.ANSWER:
377
+ if enable_stream_print:
378
+ print(chunk, end="")
379
+ answer_content += chunk
380
+
381
+ elif chunk_enum is ChunkEnum.TOOL:
382
+ if enable_stream_print:
383
+ if hasattr(chunk, 'model_dump_json'):
384
+ print(f"\n<tool>{chunk.model_dump_json()}</tool>", end="")
385
+ else:
386
+ print(f"\n<tool>{chunk}</tool>", end="")
387
+ tool_calls.append(chunk)
388
+
389
+ elif chunk_enum is ChunkEnum.ERROR:
390
+ if enable_stream_print:
391
+ print(f"\n<error>{chunk}</error>", end="")
392
+
393
+ # Construct complete response message
394
+ return Message(
395
+ role=Role.ASSISTANT,
396
+ content=answer_content,
397
+ tool_calls=tool_calls
398
+ )
399
+
400
+
401
+ async def async_main():
402
+ """
403
+ Async test function for LiteLLMBaseLLM.
404
+
405
+ This function demonstrates how to use the LiteLLMBaseLLM class
406
+ with async operations. It requires proper environment variables
407
+ to be set for the chosen LLM provider.
408
+ """
409
+ from flowllm.utils.common_utils import load_env
410
+
411
+ load_env()
412
+
413
+ # Example with OpenAI model through LiteLLM
414
+ model_name = "qwen-max-2025-01-25" # LiteLLM will route to OpenAI
415
+ llm = LiteLLMBaseLLM(model_name=model_name)
416
+
417
+ # Test async chat
418
+ message: Message = await llm.achat(
419
+ [Message(role=Role.USER, content="Hello! How are you?")],
420
+ [],
421
+ enable_stream_print=True
422
+ )
423
+ print("\nAsync result:", message)
424
+
425
+
426
+ def main():
427
+ """
428
+ Sync test function for LiteLLMBaseLLM.
429
+
430
+ This function demonstrates how to use the LiteLLMBaseLLM class
431
+ with synchronous operations. It requires proper environment variables
432
+ to be set for the chosen LLM provider.
433
+ """
434
+ from flowllm.utils.common_utils import load_env
435
+
436
+ load_env()
437
+
438
+ # Example with OpenAI model through LiteLLM
439
+ model_name = "qwen-max-2025-01-25" # LiteLLM will route to OpenAI
440
+ llm = LiteLLMBaseLLM(model_name=model_name)
441
+
442
+ # Test sync chat
443
+ message: Message = llm.chat(
444
+ [Message(role=Role.USER, content="Hello! How are you?")],
445
+ [],
446
+ enable_stream_print=True
447
+ )
448
+ print("\nSync result:", message)
449
+
450
+
451
+ if __name__ == "__main__":
452
+ main()
453
+
454
+ # import asyncio
455
+ #
456
+ # asyncio.run(async_main())