flowllm 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowllm/__init__.py +21 -0
- flowllm/app.py +15 -0
- flowllm/client/__init__.py +25 -0
- flowllm/client/async_http_client.py +81 -0
- flowllm/client/http_client.py +81 -0
- flowllm/client/mcp_client.py +133 -0
- flowllm/client/sync_mcp_client.py +116 -0
- flowllm/config/__init__.py +1 -0
- flowllm/config/default.yaml +77 -0
- flowllm/config/empty.yaml +37 -0
- flowllm/config/pydantic_config_parser.py +242 -0
- flowllm/context/base_context.py +79 -0
- flowllm/context/flow_context.py +16 -0
- llmflow/op/prompt_mixin.py → flowllm/context/prompt_handler.py +25 -14
- flowllm/context/registry.py +30 -0
- flowllm/context/service_context.py +147 -0
- flowllm/embedding_model/__init__.py +1 -0
- {llmflow → flowllm}/embedding_model/base_embedding_model.py +93 -2
- {llmflow → flowllm}/embedding_model/openai_compatible_embedding_model.py +71 -13
- flowllm/flow/__init__.py +1 -0
- flowllm/flow/base_flow.py +72 -0
- flowllm/flow/base_tool_flow.py +15 -0
- flowllm/flow/gallery/__init__.py +8 -0
- flowllm/flow/gallery/cmd_flow.py +11 -0
- flowllm/flow/gallery/code_tool_flow.py +30 -0
- flowllm/flow/gallery/dashscope_search_tool_flow.py +34 -0
- flowllm/flow/gallery/deepsearch_tool_flow.py +39 -0
- flowllm/flow/gallery/expression_tool_flow.py +18 -0
- flowllm/flow/gallery/mock_tool_flow.py +67 -0
- flowllm/flow/gallery/tavily_search_tool_flow.py +30 -0
- flowllm/flow/gallery/terminate_tool_flow.py +30 -0
- flowllm/flow/parser/expression_parser.py +171 -0
- flowllm/llm/__init__.py +2 -0
- {llmflow → flowllm}/llm/base_llm.py +100 -18
- flowllm/llm/litellm_llm.py +455 -0
- flowllm/llm/openai_compatible_llm.py +439 -0
- flowllm/op/__init__.py +11 -0
- llmflow/op/react/react_v1_op.py → flowllm/op/agent/react_op.py +17 -22
- flowllm/op/akshare/__init__.py +3 -0
- flowllm/op/akshare/get_ak_a_code_op.py +108 -0
- flowllm/op/akshare/get_ak_a_code_prompt.yaml +21 -0
- flowllm/op/akshare/get_ak_a_info_op.py +140 -0
- flowllm/op/base_llm_op.py +64 -0
- flowllm/op/base_op.py +148 -0
- flowllm/op/base_ray_op.py +313 -0
- flowllm/op/code/__init__.py +1 -0
- flowllm/op/code/execute_code_op.py +42 -0
- flowllm/op/gallery/__init__.py +2 -0
- flowllm/op/gallery/mock_op.py +42 -0
- flowllm/op/gallery/terminate_op.py +29 -0
- flowllm/op/parallel_op.py +23 -0
- flowllm/op/search/__init__.py +3 -0
- flowllm/op/search/dashscope_deep_research_op.py +260 -0
- flowllm/op/search/dashscope_search_op.py +179 -0
- flowllm/op/search/dashscope_search_prompt.yaml +13 -0
- flowllm/op/search/tavily_search_op.py +102 -0
- flowllm/op/sequential_op.py +21 -0
- flowllm/schema/flow_request.py +12 -0
- flowllm/schema/flow_response.py +12 -0
- flowllm/schema/message.py +35 -0
- flowllm/schema/service_config.py +72 -0
- flowllm/schema/tool_call.py +118 -0
- {llmflow → flowllm}/schema/vector_node.py +1 -0
- flowllm/service/__init__.py +3 -0
- flowllm/service/base_service.py +68 -0
- flowllm/service/cmd_service.py +15 -0
- flowllm/service/http_service.py +79 -0
- flowllm/service/mcp_service.py +47 -0
- flowllm/storage/__init__.py +1 -0
- flowllm/storage/cache/__init__.py +1 -0
- flowllm/storage/cache/cache_data_handler.py +104 -0
- flowllm/storage/cache/data_cache.py +375 -0
- flowllm/storage/vector_store/__init__.py +3 -0
- flowllm/storage/vector_store/base_vector_store.py +44 -0
- {llmflow → flowllm/storage}/vector_store/chroma_vector_store.py +11 -10
- {llmflow → flowllm/storage}/vector_store/es_vector_store.py +11 -11
- llmflow/vector_store/file_vector_store.py → flowllm/storage/vector_store/local_vector_store.py +110 -11
- flowllm/utils/common_utils.py +52 -0
- flowllm/utils/fetch_url.py +117 -0
- flowllm/utils/llm_utils.py +28 -0
- flowllm/utils/ridge_v2.py +54 -0
- {llmflow → flowllm}/utils/timer.py +5 -4
- {flowllm-0.1.0.dist-info → flowllm-0.1.2.dist-info}/METADATA +45 -388
- flowllm-0.1.2.dist-info/RECORD +99 -0
- flowllm-0.1.2.dist-info/entry_points.txt +2 -0
- {flowllm-0.1.0.dist-info → flowllm-0.1.2.dist-info}/licenses/LICENSE +1 -1
- flowllm-0.1.2.dist-info/top_level.txt +1 -0
- flowllm-0.1.0.dist-info/RECORD +0 -66
- flowllm-0.1.0.dist-info/entry_points.txt +0 -3
- flowllm-0.1.0.dist-info/top_level.txt +0 -1
- llmflow/app.py +0 -53
- llmflow/config/config_parser.py +0 -80
- llmflow/config/mock_config.yaml +0 -58
- llmflow/embedding_model/__init__.py +0 -5
- llmflow/enumeration/agent_state.py +0 -8
- llmflow/llm/__init__.py +0 -5
- llmflow/llm/openai_compatible_llm.py +0 -283
- llmflow/mcp_server.py +0 -110
- llmflow/op/__init__.py +0 -10
- llmflow/op/base_op.py +0 -125
- llmflow/op/mock_op.py +0 -40
- llmflow/op/vector_store/__init__.py +0 -13
- llmflow/op/vector_store/recall_vector_store_op.py +0 -48
- llmflow/op/vector_store/update_vector_store_op.py +0 -28
- llmflow/op/vector_store/vector_store_action_op.py +0 -46
- llmflow/pipeline/pipeline.py +0 -94
- llmflow/pipeline/pipeline_context.py +0 -37
- llmflow/schema/app_config.py +0 -69
- llmflow/schema/experience.py +0 -144
- llmflow/schema/message.py +0 -68
- llmflow/schema/request.py +0 -32
- llmflow/schema/response.py +0 -29
- llmflow/service/__init__.py +0 -0
- llmflow/service/llmflow_service.py +0 -96
- llmflow/tool/__init__.py +0 -9
- llmflow/tool/base_tool.py +0 -80
- llmflow/tool/code_tool.py +0 -43
- llmflow/tool/dashscope_search_tool.py +0 -162
- llmflow/tool/mcp_tool.py +0 -77
- llmflow/tool/tavily_search_tool.py +0 -109
- llmflow/tool/terminate_tool.py +0 -23
- llmflow/utils/__init__.py +0 -0
- llmflow/utils/common_utils.py +0 -17
- llmflow/utils/file_handler.py +0 -25
- llmflow/utils/http_client.py +0 -156
- llmflow/utils/op_utils.py +0 -102
- llmflow/utils/registry.py +0 -33
- llmflow/vector_store/__init__.py +0 -7
- llmflow/vector_store/base_vector_store.py +0 -136
- {llmflow → flowllm/context}/__init__.py +0 -0
- {llmflow/config → flowllm/enumeration}/__init__.py +0 -0
- {llmflow → flowllm}/enumeration/chunk_enum.py +0 -0
- {llmflow → flowllm}/enumeration/http_enum.py +0 -0
- {llmflow → flowllm}/enumeration/role.py +0 -0
- {llmflow/enumeration → flowllm/flow/parser}/__init__.py +0 -0
- {llmflow/op/react → flowllm/op/agent}/__init__.py +0 -0
- /llmflow/op/react/react_v1_prompt.yaml → /flowllm/op/agent/react_prompt.yaml +0 -0
- {llmflow/pipeline → flowllm/schema}/__init__.py +0 -0
- {llmflow/schema → flowllm/utils}/__init__.py +0 -0
- {llmflow → flowllm}/utils/singleton.py +0 -0
- {flowllm-0.1.0.dist-info → flowllm-0.1.2.dist-info}/WHEEL +0 -0
@@ -1,109 +0,0 @@
|
|
1
|
-
import json
|
2
|
-
import os
|
3
|
-
import re
|
4
|
-
import time
|
5
|
-
from typing import Literal
|
6
|
-
|
7
|
-
from loguru import logger
|
8
|
-
from pydantic import Field, model_validator, PrivateAttr
|
9
|
-
from tavily import TavilyClient
|
10
|
-
|
11
|
-
from llmflow.tool import TOOL_REGISTRY
|
12
|
-
from llmflow.tool.base_tool import BaseTool
|
13
|
-
|
14
|
-
|
15
|
-
@TOOL_REGISTRY.register()
|
16
|
-
class TavilySearchTool(BaseTool):
|
17
|
-
name: str = "web_search"
|
18
|
-
description: str = "Use query to retrieve relevant information from the internet."
|
19
|
-
parameters: dict = {
|
20
|
-
"type": "object",
|
21
|
-
"properties": {
|
22
|
-
"query": {
|
23
|
-
"type": "string",
|
24
|
-
"description": "search query",
|
25
|
-
}
|
26
|
-
},
|
27
|
-
"required": ["query"]
|
28
|
-
}
|
29
|
-
enable_print: bool = Field(default=True)
|
30
|
-
enable_cache: bool = Field(default=False)
|
31
|
-
cache_path: str = Field(default="./web_search_cache")
|
32
|
-
topic: Literal["general", "news", "finance"] = Field(default="general", description="finance, general")
|
33
|
-
|
34
|
-
_client: TavilyClient | None = PrivateAttr()
|
35
|
-
|
36
|
-
@model_validator(mode="after")
|
37
|
-
def init(self):
|
38
|
-
if not os.path.exists(self.cache_path):
|
39
|
-
os.makedirs(self.cache_path)
|
40
|
-
|
41
|
-
self._client = TavilyClient()
|
42
|
-
return self
|
43
|
-
|
44
|
-
def load_cache(self, cache_name: str = "default") -> dict:
|
45
|
-
cache_file = os.path.join(self.cache_path, cache_name + ".jsonl")
|
46
|
-
if not os.path.exists(cache_file):
|
47
|
-
return {}
|
48
|
-
|
49
|
-
with open(cache_file) as f:
|
50
|
-
return json.load(f)
|
51
|
-
|
52
|
-
def dump_cache(self, cache_dict: dict, cache_name: str = "default"):
|
53
|
-
cache_file = os.path.join(self.cache_path, cache_name + ".jsonl")
|
54
|
-
with open(cache_file, "w") as f:
|
55
|
-
return json.dump(cache_dict, f, indent=2, ensure_ascii=False)
|
56
|
-
|
57
|
-
@staticmethod
|
58
|
-
def remove_urls_and_images(text):
|
59
|
-
pattern = re.compile(r'https?://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]')
|
60
|
-
result = pattern.sub("", text)
|
61
|
-
return result
|
62
|
-
|
63
|
-
def post_process(self, response):
|
64
|
-
if self.enable_print:
|
65
|
-
logger.info("response=\n" + json.dumps(response, indent=2, ensure_ascii=False))
|
66
|
-
|
67
|
-
return response
|
68
|
-
|
69
|
-
def execute(self, query: str = "", **kwargs):
|
70
|
-
assert query, "Query cannot be empty"
|
71
|
-
|
72
|
-
cache_dict = {}
|
73
|
-
if self.enable_cache:
|
74
|
-
cache_dict = self.load_cache()
|
75
|
-
if query in cache_dict:
|
76
|
-
return self.post_process(cache_dict[query])
|
77
|
-
|
78
|
-
for i in range(self.max_retries):
|
79
|
-
try:
|
80
|
-
response = self._client.search(query=query, topic=self.topic)
|
81
|
-
url_info_dict = {item["url"]: item for item in response["results"]}
|
82
|
-
response_extract = self._client.extract(urls=[item["url"] for item in response["results"]],
|
83
|
-
format="text")
|
84
|
-
|
85
|
-
final_result = {}
|
86
|
-
for item in response_extract["results"]:
|
87
|
-
url = item["url"]
|
88
|
-
final_result[url] = url_info_dict[url]
|
89
|
-
final_result[url]["raw_content"] = item["raw_content"]
|
90
|
-
|
91
|
-
if self.enable_cache:
|
92
|
-
cache_dict[query] = final_result
|
93
|
-
self.dump_cache(cache_dict)
|
94
|
-
|
95
|
-
return self.post_process(final_result)
|
96
|
-
|
97
|
-
except Exception as e:
|
98
|
-
logger.exception(f"tavily search with query={query} encounter error with e={e.args}")
|
99
|
-
time.sleep(i + 1)
|
100
|
-
|
101
|
-
return None
|
102
|
-
|
103
|
-
|
104
|
-
if __name__ == "__main__":
|
105
|
-
from dotenv import load_dotenv
|
106
|
-
|
107
|
-
load_dotenv()
|
108
|
-
tool = TavilySearchTool()
|
109
|
-
tool.execute(query="A股医药为什么一直涨")
|
llmflow/tool/terminate_tool.py
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
from llmflow.tool import TOOL_REGISTRY
|
2
|
-
from llmflow.tool.base_tool import BaseTool
|
3
|
-
|
4
|
-
|
5
|
-
@TOOL_REGISTRY.register()
|
6
|
-
class TerminateTool(BaseTool):
|
7
|
-
name: str = "terminate"
|
8
|
-
description: str = "If you can answer the user's question based on the context, be sure to use the **terminate** tool."
|
9
|
-
parameters: dict = {
|
10
|
-
"type": "object",
|
11
|
-
"properties": {
|
12
|
-
"status": {
|
13
|
-
"type": "string",
|
14
|
-
"description": "Please determine whether the user's question has been completed. (success / failure)",
|
15
|
-
"enum": ["success", "failure"],
|
16
|
-
}
|
17
|
-
},
|
18
|
-
"required": ["status"],
|
19
|
-
}
|
20
|
-
|
21
|
-
def execute(self, status: str):
|
22
|
-
self.success = status in ["success", "failure"]
|
23
|
-
return f"The interaction has been completed with status: {status}"
|
llmflow/utils/__init__.py
DELETED
File without changes
|
llmflow/utils/common_utils.py
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
import re
|
2
|
-
|
3
|
-
|
4
|
-
def camel_to_snake(content: str) -> str:
|
5
|
-
"""
|
6
|
-
BaseWorker -> base_worker
|
7
|
-
"""
|
8
|
-
snake_str = re.sub(r'(?<!^)(?=[A-Z])', '_', content).lower()
|
9
|
-
return snake_str
|
10
|
-
|
11
|
-
|
12
|
-
def snake_to_camel(content: str) -> str:
|
13
|
-
"""
|
14
|
-
base_worker -> BaseWorker
|
15
|
-
"""
|
16
|
-
camel_str = "".join(x.capitalize() for x in content.split("_"))
|
17
|
-
return camel_str
|
llmflow/utils/file_handler.py
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
import json
|
2
|
-
from pathlib import Path
|
3
|
-
|
4
|
-
import yaml
|
5
|
-
|
6
|
-
|
7
|
-
class FileHandler:
|
8
|
-
|
9
|
-
def __init__(self, file_path: str | Path):
|
10
|
-
self.file_path: Path = Path(file_path)
|
11
|
-
suffix = Path(self.file_path).suffix
|
12
|
-
if suffix == ".json":
|
13
|
-
self._obj = json
|
14
|
-
elif suffix == ".yaml":
|
15
|
-
self._obj = yaml
|
16
|
-
else:
|
17
|
-
raise ValueError(f"unsupported file type={suffix}")
|
18
|
-
|
19
|
-
def dump(self, config, **kwargs):
|
20
|
-
with open(self.file_path, "w") as f:
|
21
|
-
self._obj.dump(config, f, **kwargs)
|
22
|
-
|
23
|
-
def load(self, **kwargs):
|
24
|
-
with open(self.file_path, "r") as f:
|
25
|
-
return self._obj.load(f, **kwargs)
|
llmflow/utils/http_client.py
DELETED
@@ -1,156 +0,0 @@
|
|
1
|
-
import http
|
2
|
-
import time
|
3
|
-
from typing import Any
|
4
|
-
|
5
|
-
import requests
|
6
|
-
from loguru import logger
|
7
|
-
from pydantic import BaseModel, Field, PrivateAttr, model_validator
|
8
|
-
|
9
|
-
from llmflow.enumeration.http_enum import HttpEnum
|
10
|
-
|
11
|
-
|
12
|
-
class HttpClient(BaseModel):
|
13
|
-
url: str = Field(default="")
|
14
|
-
keep_alive: bool = Field(default=False, description="if true, use session to keep long connection")
|
15
|
-
timeout: int = Field(default=300, description="request timeout, second")
|
16
|
-
|
17
|
-
return_default_if_error: bool = Field(default=True)
|
18
|
-
request_start_time: float = Field(default_factory=time.time)
|
19
|
-
request_time_cost: float = Field(default=0.0, description="request time cost")
|
20
|
-
|
21
|
-
retry_sleep_time: float = Field(default=0.5, description="interval time for retry")
|
22
|
-
retry_time_multiplier: float = Field(default=2.0, description="retry time multiplier")
|
23
|
-
retry_max_count: int = Field(default=1, description="maximum number of retries")
|
24
|
-
|
25
|
-
_client: Any = PrivateAttr()
|
26
|
-
|
27
|
-
@model_validator(mode="after")
|
28
|
-
def init_client(self):
|
29
|
-
self._client = requests.Session() if self.keep_alive else requests
|
30
|
-
return self
|
31
|
-
|
32
|
-
def __enter__(self):
|
33
|
-
return self
|
34
|
-
|
35
|
-
def __exit__(self, *args):
|
36
|
-
self.close()
|
37
|
-
self.request_time_cost: float = time.time() - self.request_start_time
|
38
|
-
|
39
|
-
def close(self):
|
40
|
-
if isinstance(self._client, requests.Session):
|
41
|
-
self._client.close()
|
42
|
-
|
43
|
-
def _request(self,
|
44
|
-
data: str = None,
|
45
|
-
json_data: dict = None,
|
46
|
-
headers: dict = None,
|
47
|
-
stream: bool = False,
|
48
|
-
http_enum: HttpEnum | str = HttpEnum.POST):
|
49
|
-
|
50
|
-
if isinstance(http_enum, str):
|
51
|
-
http_enum = HttpEnum(http_enum)
|
52
|
-
|
53
|
-
if http_enum is HttpEnum.POST:
|
54
|
-
response: requests.Response = self._client.post(url=self.url,
|
55
|
-
data=data,
|
56
|
-
json=json_data,
|
57
|
-
headers=headers,
|
58
|
-
stream=stream,
|
59
|
-
timeout=self.timeout)
|
60
|
-
|
61
|
-
elif http_enum is HttpEnum.GET:
|
62
|
-
response: requests.Response = self._client.get(url=self.url,
|
63
|
-
data=data,
|
64
|
-
json=json_data,
|
65
|
-
headers=headers,
|
66
|
-
stream=stream,
|
67
|
-
timeout=self.timeout)
|
68
|
-
|
69
|
-
else:
|
70
|
-
raise NotImplementedError
|
71
|
-
|
72
|
-
if response.status_code != http.HTTPStatus.OK:
|
73
|
-
raise RuntimeError(f"request failed! content={response.json()}")
|
74
|
-
|
75
|
-
return response
|
76
|
-
|
77
|
-
def parse_result(self, response: requests.Response | Any = None, **kwargs):
|
78
|
-
return response.json()
|
79
|
-
|
80
|
-
def return_default(self, **kwargs):
|
81
|
-
return None
|
82
|
-
|
83
|
-
def request(self,
|
84
|
-
data: str | Any = None,
|
85
|
-
json_data: dict = None,
|
86
|
-
headers: dict = None,
|
87
|
-
http_enum: HttpEnum | str = HttpEnum.POST,
|
88
|
-
**kwargs):
|
89
|
-
|
90
|
-
retry_sleep_time = self.retry_sleep_time
|
91
|
-
for i in range(self.retry_max_count):
|
92
|
-
try:
|
93
|
-
response = self._request(data=data, json_data=json_data, headers=headers, http_enum=http_enum)
|
94
|
-
result = self.parse_result(response=response,
|
95
|
-
data=data,
|
96
|
-
json_data=json_data,
|
97
|
-
headers=headers,
|
98
|
-
http_enum=http_enum,
|
99
|
-
**kwargs)
|
100
|
-
return result
|
101
|
-
|
102
|
-
except Exception as e:
|
103
|
-
logger.exception(f"{self.__class__.__name__} {i}th request failed with args={e.args}")
|
104
|
-
|
105
|
-
if i == self.retry_max_count - 1:
|
106
|
-
if self.return_default_if_error:
|
107
|
-
return self.return_default()
|
108
|
-
else:
|
109
|
-
raise e
|
110
|
-
|
111
|
-
retry_sleep_time *= self.retry_time_multiplier
|
112
|
-
time.sleep(retry_sleep_time)
|
113
|
-
|
114
|
-
return None
|
115
|
-
|
116
|
-
def request_stream(self,
|
117
|
-
data: str = None,
|
118
|
-
json_data: dict = None,
|
119
|
-
headers: dict = None,
|
120
|
-
http_enum: HttpEnum | str = HttpEnum.POST,
|
121
|
-
**kwargs):
|
122
|
-
|
123
|
-
retry_sleep_time = self.retry_sleep_time
|
124
|
-
for i in range(self.retry_max_count):
|
125
|
-
try:
|
126
|
-
response = self._request(data=data,
|
127
|
-
json_data=json_data,
|
128
|
-
headers=headers,
|
129
|
-
stream=True,
|
130
|
-
http_enum=http_enum)
|
131
|
-
request_context = {}
|
132
|
-
for iter_idx, line in enumerate(response.iter_lines()):
|
133
|
-
yield self.parse_result(line=line,
|
134
|
-
request_context=request_context,
|
135
|
-
index=iter_idx,
|
136
|
-
data=data,
|
137
|
-
json_data=json_data,
|
138
|
-
headers=headers,
|
139
|
-
http_enum=http_enum,
|
140
|
-
**kwargs)
|
141
|
-
|
142
|
-
return None
|
143
|
-
|
144
|
-
except Exception as e:
|
145
|
-
logger.exception(f"{self.__class__.__name__} {i}th request failed with args={e.args}")
|
146
|
-
|
147
|
-
if i == self.retry_max_count - 1:
|
148
|
-
if self.return_default_if_error:
|
149
|
-
return self.return_default()
|
150
|
-
else:
|
151
|
-
raise e
|
152
|
-
|
153
|
-
retry_sleep_time *= self.retry_time_multiplier
|
154
|
-
time.sleep(retry_sleep_time)
|
155
|
-
|
156
|
-
return None
|
llmflow/utils/op_utils.py
DELETED
@@ -1,102 +0,0 @@
|
|
1
|
-
from typing import List
|
2
|
-
|
3
|
-
from llmflow.enumeration.role import Role
|
4
|
-
from llmflow.schema.message import Message, Trajectory
|
5
|
-
import json
|
6
|
-
import re
|
7
|
-
from loguru import logger
|
8
|
-
|
9
|
-
def merge_messages_content(messages: List[Message | dict]) -> str:
|
10
|
-
content_collector = []
|
11
|
-
for i, message in enumerate(messages):
|
12
|
-
if isinstance(message, dict):
|
13
|
-
message = Message(**message)
|
14
|
-
|
15
|
-
if message.role is Role.ASSISTANT:
|
16
|
-
line = f"### step.{i} role={message.role.value} content=\n{message.reasoning_content}\n\n{message.content}\n"
|
17
|
-
if message.tool_calls:
|
18
|
-
for tool_call in message.tool_calls:
|
19
|
-
line += f" - tool call={tool_call.name}\n params={tool_call.arguments}\n"
|
20
|
-
content_collector.append(line)
|
21
|
-
|
22
|
-
elif message.role is Role.USER:
|
23
|
-
line = f"### step.{i} role={message.role.value} content=\n{message.content}\n"
|
24
|
-
content_collector.append(line)
|
25
|
-
|
26
|
-
elif message.role is Role.TOOL:
|
27
|
-
line = f"### step.{i} role={message.role.value} tool call result=\n{message.content}\n"
|
28
|
-
content_collector.append(line)
|
29
|
-
|
30
|
-
return "\n".join(content_collector)
|
31
|
-
|
32
|
-
|
33
|
-
def parse_json_experience_response(response: str) -> List[dict]:
|
34
|
-
"""Parse JSON formatted experience response"""
|
35
|
-
try:
|
36
|
-
# Extract JSON blocks
|
37
|
-
json_pattern = r'```json\s*([\s\S]*?)\s*```'
|
38
|
-
json_blocks = re.findall(json_pattern, response)
|
39
|
-
|
40
|
-
if json_blocks:
|
41
|
-
parsed = json.loads(json_blocks[0])
|
42
|
-
|
43
|
-
# Handle array format
|
44
|
-
if isinstance(parsed, list):
|
45
|
-
experiences = []
|
46
|
-
for exp_data in parsed:
|
47
|
-
if isinstance(exp_data, dict) and (
|
48
|
-
("when_to_use" in exp_data and "experience" in exp_data) or
|
49
|
-
("condition" in exp_data and "experience" in exp_data)
|
50
|
-
):
|
51
|
-
experiences.append(exp_data)
|
52
|
-
|
53
|
-
return experiences
|
54
|
-
|
55
|
-
|
56
|
-
# Handle single object
|
57
|
-
elif isinstance(parsed, dict) and (
|
58
|
-
("when_to_use" in parsed and "experience" in parsed) or
|
59
|
-
("condition" in parsed and "experience" in parsed)
|
60
|
-
):
|
61
|
-
return [parsed]
|
62
|
-
|
63
|
-
# Fallback: try to parse entire response
|
64
|
-
parsed = json.loads(response)
|
65
|
-
if isinstance(parsed, list):
|
66
|
-
return parsed
|
67
|
-
elif isinstance(parsed, dict):
|
68
|
-
return [parsed]
|
69
|
-
|
70
|
-
except json.JSONDecodeError as e:
|
71
|
-
logger.warning(f"Failed to parse JSON experience response: {e}")
|
72
|
-
|
73
|
-
return []
|
74
|
-
|
75
|
-
def get_trajectory_context(trajectory: Trajectory, step_sequence: List[Message]) -> str:
|
76
|
-
"""Get context of step sequence within trajectory"""
|
77
|
-
try:
|
78
|
-
# Find position of step sequence in trajectory
|
79
|
-
start_idx = 0
|
80
|
-
for i, step in enumerate(trajectory.messages):
|
81
|
-
if step == step_sequence[0]:
|
82
|
-
start_idx = i
|
83
|
-
break
|
84
|
-
|
85
|
-
# Extract before and after context
|
86
|
-
context_before = trajectory.messages[max(0, start_idx - 2):start_idx]
|
87
|
-
context_after = trajectory.messages[start_idx + len(step_sequence):start_idx + len(step_sequence) + 2]
|
88
|
-
|
89
|
-
context = f"Query: {trajectory.metadata.get('query', 'N/A')}\n"
|
90
|
-
|
91
|
-
if context_before:
|
92
|
-
context += "Previous steps:\n" + "\n".join(
|
93
|
-
[f"- {step.content[:100]}..." for step in context_before]) + "\n"
|
94
|
-
|
95
|
-
if context_after:
|
96
|
-
context += "Following steps:\n" + "\n".join([f"- {step.content[:100]}..." for step in context_after])
|
97
|
-
|
98
|
-
return context
|
99
|
-
|
100
|
-
except Exception as e:
|
101
|
-
logger.error(f"Error getting trajectory context: {e}")
|
102
|
-
return f"Query: {trajectory.metadata.get('query', 'N/A')}"
|
llmflow/utils/registry.py
DELETED
@@ -1,33 +0,0 @@
|
|
1
|
-
from typing import List
|
2
|
-
|
3
|
-
from loguru import logger
|
4
|
-
|
5
|
-
from llmflow.utils.common_utils import camel_to_snake
|
6
|
-
|
7
|
-
|
8
|
-
class Registry(object):
|
9
|
-
def __init__(self):
|
10
|
-
self._registry = {}
|
11
|
-
|
12
|
-
def register(self, name: str = ""):
|
13
|
-
|
14
|
-
def decorator(cls):
|
15
|
-
class_name = name if name else camel_to_snake(cls.__name__)
|
16
|
-
if class_name in self._registry:
|
17
|
-
logger.warning(f"name={class_name} is already registered, will be overwritten.")
|
18
|
-
self._registry[class_name] = cls
|
19
|
-
return cls
|
20
|
-
|
21
|
-
return decorator
|
22
|
-
|
23
|
-
def __getitem__(self, name: str):
|
24
|
-
if name not in self._registry:
|
25
|
-
raise KeyError(f"name={name} is not registered!")
|
26
|
-
return self._registry[name]
|
27
|
-
|
28
|
-
def __contains__(self, name: str):
|
29
|
-
return name in self._registry
|
30
|
-
|
31
|
-
@property
|
32
|
-
def registered_names(self) -> List[str]:
|
33
|
-
return sorted(self._registry.keys())
|
llmflow/vector_store/__init__.py
DELETED
@@ -1,7 +0,0 @@
|
|
1
|
-
from llmflow.utils.registry import Registry
|
2
|
-
|
3
|
-
VECTOR_STORE_REGISTRY = Registry()
|
4
|
-
|
5
|
-
from llmflow.vector_store.es_vector_store import EsVectorStore
|
6
|
-
from llmflow.vector_store.chroma_vector_store import ChromaVectorStore
|
7
|
-
from llmflow.vector_store.file_vector_store import FileVectorStore
|
@@ -1,136 +0,0 @@
|
|
1
|
-
import fcntl
|
2
|
-
import json
|
3
|
-
from abc import ABC
|
4
|
-
from pathlib import Path
|
5
|
-
from typing import List, Iterable
|
6
|
-
|
7
|
-
from loguru import logger
|
8
|
-
from pydantic import BaseModel, Field
|
9
|
-
from tqdm import tqdm
|
10
|
-
|
11
|
-
from llmflow.embedding_model.base_embedding_model import BaseEmbeddingModel
|
12
|
-
from llmflow.schema.vector_node import VectorNode
|
13
|
-
|
14
|
-
|
15
|
-
class BaseVectorStore(BaseModel, ABC):
|
16
|
-
embedding_model: BaseEmbeddingModel | None = Field(default=None)
|
17
|
-
batch_size: int = Field(default=1024)
|
18
|
-
|
19
|
-
@staticmethod
|
20
|
-
def _load_from_path(workspace_id: str, path: str | Path, callback_fn=None, **kwargs) -> Iterable[VectorNode]:
|
21
|
-
workspace_path = Path(path) / f"{workspace_id}.jsonl"
|
22
|
-
if not workspace_path.exists():
|
23
|
-
logger.warning(f"workspace_path={workspace_path} is not exists!")
|
24
|
-
return
|
25
|
-
|
26
|
-
with workspace_path.open() as f:
|
27
|
-
fcntl.flock(f, fcntl.LOCK_SH)
|
28
|
-
try:
|
29
|
-
for line in tqdm(f, desc="load from path"):
|
30
|
-
if line.strip():
|
31
|
-
node_dict = json.loads(line.strip())
|
32
|
-
if callback_fn:
|
33
|
-
node = callback_fn(node_dict)
|
34
|
-
else:
|
35
|
-
node = VectorNode(**node_dict, **kwargs)
|
36
|
-
node.workspace_id = workspace_id
|
37
|
-
yield node
|
38
|
-
|
39
|
-
finally:
|
40
|
-
fcntl.flock(f, fcntl.LOCK_UN)
|
41
|
-
|
42
|
-
@staticmethod
|
43
|
-
def _dump_to_path(nodes: Iterable[VectorNode], workspace_id: str, path: str | Path = "", callback_fn=None,
|
44
|
-
ensure_ascii: bool = False, **kwargs):
|
45
|
-
dump_path: Path = Path(path)
|
46
|
-
dump_path.mkdir(parents=True, exist_ok=True)
|
47
|
-
dump_file = dump_path / f"{workspace_id}.jsonl"
|
48
|
-
|
49
|
-
count = 0
|
50
|
-
with dump_file.open("w") as f:
|
51
|
-
fcntl.flock(f, fcntl.LOCK_EX)
|
52
|
-
try:
|
53
|
-
for node in tqdm(nodes, desc="dump to path"):
|
54
|
-
node.workspace_id = workspace_id
|
55
|
-
if callback_fn:
|
56
|
-
node_dict = callback_fn(node)
|
57
|
-
else:
|
58
|
-
node_dict = node.model_dump()
|
59
|
-
assert isinstance(node_dict, dict)
|
60
|
-
f.write(json.dumps(node_dict, ensure_ascii=ensure_ascii, **kwargs))
|
61
|
-
f.write("\n")
|
62
|
-
count += 1
|
63
|
-
|
64
|
-
return {"size": count}
|
65
|
-
finally:
|
66
|
-
fcntl.flock(f, fcntl.LOCK_UN)
|
67
|
-
|
68
|
-
def exist_workspace(self, workspace_id: str, **kwargs) -> bool:
|
69
|
-
raise NotImplementedError
|
70
|
-
|
71
|
-
def delete_workspace(self, workspace_id: str, **kwargs):
|
72
|
-
raise NotImplementedError
|
73
|
-
|
74
|
-
def create_workspace(self, workspace_id: str, **kwargs):
|
75
|
-
raise NotImplementedError
|
76
|
-
|
77
|
-
def _iter_workspace_nodes(self, workspace_id: str, **kwargs) -> Iterable[VectorNode]:
|
78
|
-
raise NotImplementedError
|
79
|
-
|
80
|
-
def dump_workspace(self, workspace_id: str, path: str | Path = "", callback_fn=None, **kwargs):
|
81
|
-
if not self.exist_workspace(workspace_id=workspace_id, **kwargs):
|
82
|
-
logger.warning(f"workspace_id={workspace_id} is not exist!")
|
83
|
-
return {}
|
84
|
-
|
85
|
-
return self._dump_to_path(nodes=self._iter_workspace_nodes(workspace_id=workspace_id, **kwargs),
|
86
|
-
workspace_id=workspace_id,
|
87
|
-
path=path,
|
88
|
-
callback_fn=callback_fn,
|
89
|
-
**kwargs)
|
90
|
-
|
91
|
-
def load_workspace(self, workspace_id: str, path: str | Path = "", nodes: List[VectorNode] = None, callback_fn=None,
|
92
|
-
**kwargs):
|
93
|
-
if self.exist_workspace(workspace_id, **kwargs):
|
94
|
-
self.delete_workspace(workspace_id=workspace_id, **kwargs)
|
95
|
-
logger.info(f"delete workspace_id={workspace_id}")
|
96
|
-
|
97
|
-
self.create_workspace(workspace_id=workspace_id, **kwargs)
|
98
|
-
|
99
|
-
all_nodes: List[VectorNode] = []
|
100
|
-
if nodes:
|
101
|
-
all_nodes.extend(nodes)
|
102
|
-
for node in self._load_from_path(path=path, workspace_id=workspace_id, callback_fn=callback_fn, **kwargs):
|
103
|
-
all_nodes.append(node)
|
104
|
-
self.insert(nodes=all_nodes, workspace_id=workspace_id, **kwargs)
|
105
|
-
return {"size": len(all_nodes)}
|
106
|
-
|
107
|
-
def copy_workspace(self, src_workspace_id: str, dest_workspace_id: str, **kwargs):
|
108
|
-
if not self.exist_workspace(workspace_id=src_workspace_id, **kwargs):
|
109
|
-
logger.warning(f"src_workspace_id={src_workspace_id} is not exist!")
|
110
|
-
return {}
|
111
|
-
|
112
|
-
if not self.exist_workspace(dest_workspace_id, **kwargs):
|
113
|
-
self.create_workspace(workspace_id=dest_workspace_id, **kwargs)
|
114
|
-
|
115
|
-
nodes = []
|
116
|
-
node_size = 0
|
117
|
-
for node in self._iter_workspace_nodes(workspace_id=src_workspace_id, **kwargs):
|
118
|
-
nodes.append(node)
|
119
|
-
node_size += 1
|
120
|
-
if len(nodes) >= self.batch_size:
|
121
|
-
self.insert(nodes=nodes, workspace_id=dest_workspace_id, **kwargs)
|
122
|
-
nodes.clear()
|
123
|
-
|
124
|
-
if nodes:
|
125
|
-
self.insert(nodes=nodes, workspace_id=dest_workspace_id, **kwargs)
|
126
|
-
return {"size": node_size}
|
127
|
-
|
128
|
-
def search(self, query: str, workspace_id: str, top_k: int = 1, **kwargs) -> List[VectorNode]:
|
129
|
-
raise NotImplementedError
|
130
|
-
|
131
|
-
def insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, **kwargs):
|
132
|
-
raise NotImplementedError
|
133
|
-
|
134
|
-
def delete(self, node_ids: str | List[str], workspace_id: str, **kwargs):
|
135
|
-
raise NotImplementedError
|
136
|
-
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|