flowllm 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. flowllm/__init__.py +21 -0
  2. flowllm/app.py +15 -0
  3. flowllm/client/__init__.py +25 -0
  4. flowllm/client/async_http_client.py +81 -0
  5. flowllm/client/http_client.py +81 -0
  6. flowllm/client/mcp_client.py +133 -0
  7. flowllm/client/sync_mcp_client.py +116 -0
  8. flowllm/config/__init__.py +1 -0
  9. flowllm/config/default.yaml +77 -0
  10. flowllm/config/empty.yaml +37 -0
  11. flowllm/config/pydantic_config_parser.py +242 -0
  12. flowllm/context/base_context.py +79 -0
  13. flowllm/context/flow_context.py +16 -0
  14. llmflow/op/prompt_mixin.py → flowllm/context/prompt_handler.py +25 -14
  15. flowllm/context/registry.py +30 -0
  16. flowllm/context/service_context.py +147 -0
  17. flowllm/embedding_model/__init__.py +1 -0
  18. {llmflow → flowllm}/embedding_model/base_embedding_model.py +93 -2
  19. {llmflow → flowllm}/embedding_model/openai_compatible_embedding_model.py +71 -13
  20. flowllm/flow/__init__.py +1 -0
  21. flowllm/flow/base_flow.py +72 -0
  22. flowllm/flow/base_tool_flow.py +15 -0
  23. flowllm/flow/gallery/__init__.py +8 -0
  24. flowllm/flow/gallery/cmd_flow.py +11 -0
  25. flowllm/flow/gallery/code_tool_flow.py +30 -0
  26. flowllm/flow/gallery/dashscope_search_tool_flow.py +34 -0
  27. flowllm/flow/gallery/deepsearch_tool_flow.py +39 -0
  28. flowllm/flow/gallery/expression_tool_flow.py +18 -0
  29. flowllm/flow/gallery/mock_tool_flow.py +67 -0
  30. flowllm/flow/gallery/tavily_search_tool_flow.py +30 -0
  31. flowllm/flow/gallery/terminate_tool_flow.py +30 -0
  32. flowllm/flow/parser/expression_parser.py +171 -0
  33. flowllm/llm/__init__.py +2 -0
  34. {llmflow → flowllm}/llm/base_llm.py +100 -18
  35. flowllm/llm/litellm_llm.py +455 -0
  36. flowllm/llm/openai_compatible_llm.py +439 -0
  37. flowllm/op/__init__.py +11 -0
  38. llmflow/op/react/react_v1_op.py → flowllm/op/agent/react_op.py +17 -22
  39. flowllm/op/akshare/__init__.py +3 -0
  40. flowllm/op/akshare/get_ak_a_code_op.py +108 -0
  41. flowllm/op/akshare/get_ak_a_code_prompt.yaml +21 -0
  42. flowllm/op/akshare/get_ak_a_info_op.py +140 -0
  43. flowllm/op/base_llm_op.py +64 -0
  44. flowllm/op/base_op.py +148 -0
  45. flowllm/op/base_ray_op.py +313 -0
  46. flowllm/op/code/__init__.py +1 -0
  47. flowllm/op/code/execute_code_op.py +42 -0
  48. flowllm/op/gallery/__init__.py +2 -0
  49. flowllm/op/gallery/mock_op.py +42 -0
  50. flowllm/op/gallery/terminate_op.py +29 -0
  51. flowllm/op/parallel_op.py +23 -0
  52. flowllm/op/search/__init__.py +3 -0
  53. flowllm/op/search/dashscope_deep_research_op.py +260 -0
  54. flowllm/op/search/dashscope_search_op.py +179 -0
  55. flowllm/op/search/dashscope_search_prompt.yaml +13 -0
  56. flowllm/op/search/tavily_search_op.py +102 -0
  57. flowllm/op/sequential_op.py +21 -0
  58. flowllm/schema/flow_request.py +12 -0
  59. flowllm/schema/flow_response.py +12 -0
  60. flowllm/schema/message.py +35 -0
  61. flowllm/schema/service_config.py +72 -0
  62. flowllm/schema/tool_call.py +118 -0
  63. {llmflow → flowllm}/schema/vector_node.py +1 -0
  64. flowllm/service/__init__.py +3 -0
  65. flowllm/service/base_service.py +68 -0
  66. flowllm/service/cmd_service.py +15 -0
  67. flowllm/service/http_service.py +79 -0
  68. flowllm/service/mcp_service.py +47 -0
  69. flowllm/storage/__init__.py +1 -0
  70. flowllm/storage/cache/__init__.py +1 -0
  71. flowllm/storage/cache/cache_data_handler.py +104 -0
  72. flowllm/storage/cache/data_cache.py +375 -0
  73. flowllm/storage/vector_store/__init__.py +3 -0
  74. flowllm/storage/vector_store/base_vector_store.py +44 -0
  75. {llmflow → flowllm/storage}/vector_store/chroma_vector_store.py +11 -10
  76. {llmflow → flowllm/storage}/vector_store/es_vector_store.py +11 -11
  77. llmflow/vector_store/file_vector_store.py → flowllm/storage/vector_store/local_vector_store.py +110 -11
  78. flowllm/utils/common_utils.py +52 -0
  79. flowllm/utils/fetch_url.py +117 -0
  80. flowllm/utils/llm_utils.py +28 -0
  81. flowllm/utils/ridge_v2.py +54 -0
  82. {llmflow → flowllm}/utils/timer.py +5 -4
  83. {flowllm-0.1.0.dist-info → flowllm-0.1.2.dist-info}/METADATA +45 -388
  84. flowllm-0.1.2.dist-info/RECORD +99 -0
  85. flowllm-0.1.2.dist-info/entry_points.txt +2 -0
  86. {flowllm-0.1.0.dist-info → flowllm-0.1.2.dist-info}/licenses/LICENSE +1 -1
  87. flowllm-0.1.2.dist-info/top_level.txt +1 -0
  88. flowllm-0.1.0.dist-info/RECORD +0 -66
  89. flowllm-0.1.0.dist-info/entry_points.txt +0 -3
  90. flowllm-0.1.0.dist-info/top_level.txt +0 -1
  91. llmflow/app.py +0 -53
  92. llmflow/config/config_parser.py +0 -80
  93. llmflow/config/mock_config.yaml +0 -58
  94. llmflow/embedding_model/__init__.py +0 -5
  95. llmflow/enumeration/agent_state.py +0 -8
  96. llmflow/llm/__init__.py +0 -5
  97. llmflow/llm/openai_compatible_llm.py +0 -283
  98. llmflow/mcp_server.py +0 -110
  99. llmflow/op/__init__.py +0 -10
  100. llmflow/op/base_op.py +0 -125
  101. llmflow/op/mock_op.py +0 -40
  102. llmflow/op/vector_store/__init__.py +0 -13
  103. llmflow/op/vector_store/recall_vector_store_op.py +0 -48
  104. llmflow/op/vector_store/update_vector_store_op.py +0 -28
  105. llmflow/op/vector_store/vector_store_action_op.py +0 -46
  106. llmflow/pipeline/pipeline.py +0 -94
  107. llmflow/pipeline/pipeline_context.py +0 -37
  108. llmflow/schema/app_config.py +0 -69
  109. llmflow/schema/experience.py +0 -144
  110. llmflow/schema/message.py +0 -68
  111. llmflow/schema/request.py +0 -32
  112. llmflow/schema/response.py +0 -29
  113. llmflow/service/__init__.py +0 -0
  114. llmflow/service/llmflow_service.py +0 -96
  115. llmflow/tool/__init__.py +0 -9
  116. llmflow/tool/base_tool.py +0 -80
  117. llmflow/tool/code_tool.py +0 -43
  118. llmflow/tool/dashscope_search_tool.py +0 -162
  119. llmflow/tool/mcp_tool.py +0 -77
  120. llmflow/tool/tavily_search_tool.py +0 -109
  121. llmflow/tool/terminate_tool.py +0 -23
  122. llmflow/utils/__init__.py +0 -0
  123. llmflow/utils/common_utils.py +0 -17
  124. llmflow/utils/file_handler.py +0 -25
  125. llmflow/utils/http_client.py +0 -156
  126. llmflow/utils/op_utils.py +0 -102
  127. llmflow/utils/registry.py +0 -33
  128. llmflow/vector_store/__init__.py +0 -7
  129. llmflow/vector_store/base_vector_store.py +0 -136
  130. {llmflow → flowllm/context}/__init__.py +0 -0
  131. {llmflow/config → flowllm/enumeration}/__init__.py +0 -0
  132. {llmflow → flowllm}/enumeration/chunk_enum.py +0 -0
  133. {llmflow → flowllm}/enumeration/http_enum.py +0 -0
  134. {llmflow → flowllm}/enumeration/role.py +0 -0
  135. {llmflow/enumeration → flowllm/flow/parser}/__init__.py +0 -0
  136. {llmflow/op/react → flowllm/op/agent}/__init__.py +0 -0
  137. /llmflow/op/react/react_v1_prompt.yaml → /flowllm/op/agent/react_prompt.yaml +0 -0
  138. {llmflow/pipeline → flowllm/schema}/__init__.py +0 -0
  139. {llmflow/schema → flowllm/utils}/__init__.py +0 -0
  140. {llmflow → flowllm}/utils/singleton.py +0 -0
  141. {flowllm-0.1.0.dist-info → flowllm-0.1.2.dist-info}/WHEEL +0 -0
@@ -1,109 +0,0 @@
1
- import json
2
- import os
3
- import re
4
- import time
5
- from typing import Literal
6
-
7
- from loguru import logger
8
- from pydantic import Field, model_validator, PrivateAttr
9
- from tavily import TavilyClient
10
-
11
- from llmflow.tool import TOOL_REGISTRY
12
- from llmflow.tool.base_tool import BaseTool
13
-
14
-
15
- @TOOL_REGISTRY.register()
16
- class TavilySearchTool(BaseTool):
17
- name: str = "web_search"
18
- description: str = "Use query to retrieve relevant information from the internet."
19
- parameters: dict = {
20
- "type": "object",
21
- "properties": {
22
- "query": {
23
- "type": "string",
24
- "description": "search query",
25
- }
26
- },
27
- "required": ["query"]
28
- }
29
- enable_print: bool = Field(default=True)
30
- enable_cache: bool = Field(default=False)
31
- cache_path: str = Field(default="./web_search_cache")
32
- topic: Literal["general", "news", "finance"] = Field(default="general", description="finance, general")
33
-
34
- _client: TavilyClient | None = PrivateAttr()
35
-
36
- @model_validator(mode="after")
37
- def init(self):
38
- if not os.path.exists(self.cache_path):
39
- os.makedirs(self.cache_path)
40
-
41
- self._client = TavilyClient()
42
- return self
43
-
44
- def load_cache(self, cache_name: str = "default") -> dict:
45
- cache_file = os.path.join(self.cache_path, cache_name + ".jsonl")
46
- if not os.path.exists(cache_file):
47
- return {}
48
-
49
- with open(cache_file) as f:
50
- return json.load(f)
51
-
52
- def dump_cache(self, cache_dict: dict, cache_name: str = "default"):
53
- cache_file = os.path.join(self.cache_path, cache_name + ".jsonl")
54
- with open(cache_file, "w") as f:
55
- return json.dump(cache_dict, f, indent=2, ensure_ascii=False)
56
-
57
- @staticmethod
58
- def remove_urls_and_images(text):
59
- pattern = re.compile(r'https?://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]')
60
- result = pattern.sub("", text)
61
- return result
62
-
63
- def post_process(self, response):
64
- if self.enable_print:
65
- logger.info("response=\n" + json.dumps(response, indent=2, ensure_ascii=False))
66
-
67
- return response
68
-
69
- def execute(self, query: str = "", **kwargs):
70
- assert query, "Query cannot be empty"
71
-
72
- cache_dict = {}
73
- if self.enable_cache:
74
- cache_dict = self.load_cache()
75
- if query in cache_dict:
76
- return self.post_process(cache_dict[query])
77
-
78
- for i in range(self.max_retries):
79
- try:
80
- response = self._client.search(query=query, topic=self.topic)
81
- url_info_dict = {item["url"]: item for item in response["results"]}
82
- response_extract = self._client.extract(urls=[item["url"] for item in response["results"]],
83
- format="text")
84
-
85
- final_result = {}
86
- for item in response_extract["results"]:
87
- url = item["url"]
88
- final_result[url] = url_info_dict[url]
89
- final_result[url]["raw_content"] = item["raw_content"]
90
-
91
- if self.enable_cache:
92
- cache_dict[query] = final_result
93
- self.dump_cache(cache_dict)
94
-
95
- return self.post_process(final_result)
96
-
97
- except Exception as e:
98
- logger.exception(f"tavily search with query={query} encounter error with e={e.args}")
99
- time.sleep(i + 1)
100
-
101
- return None
102
-
103
-
104
- if __name__ == "__main__":
105
- from dotenv import load_dotenv
106
-
107
- load_dotenv()
108
- tool = TavilySearchTool()
109
- tool.execute(query="A股医药为什么一直涨")
@@ -1,23 +0,0 @@
1
- from llmflow.tool import TOOL_REGISTRY
2
- from llmflow.tool.base_tool import BaseTool
3
-
4
-
5
- @TOOL_REGISTRY.register()
6
- class TerminateTool(BaseTool):
7
- name: str = "terminate"
8
- description: str = "If you can answer the user's question based on the context, be sure to use the **terminate** tool."
9
- parameters: dict = {
10
- "type": "object",
11
- "properties": {
12
- "status": {
13
- "type": "string",
14
- "description": "Please determine whether the user's question has been completed. (success / failure)",
15
- "enum": ["success", "failure"],
16
- }
17
- },
18
- "required": ["status"],
19
- }
20
-
21
- def execute(self, status: str):
22
- self.success = status in ["success", "failure"]
23
- return f"The interaction has been completed with status: {status}"
llmflow/utils/__init__.py DELETED
File without changes
@@ -1,17 +0,0 @@
1
- import re
2
-
3
-
4
- def camel_to_snake(content: str) -> str:
5
- """
6
- BaseWorker -> base_worker
7
- """
8
- snake_str = re.sub(r'(?<!^)(?=[A-Z])', '_', content).lower()
9
- return snake_str
10
-
11
-
12
- def snake_to_camel(content: str) -> str:
13
- """
14
- base_worker -> BaseWorker
15
- """
16
- camel_str = "".join(x.capitalize() for x in content.split("_"))
17
- return camel_str
@@ -1,25 +0,0 @@
1
- import json
2
- from pathlib import Path
3
-
4
- import yaml
5
-
6
-
7
- class FileHandler:
8
-
9
- def __init__(self, file_path: str | Path):
10
- self.file_path: Path = Path(file_path)
11
- suffix = Path(self.file_path).suffix
12
- if suffix == ".json":
13
- self._obj = json
14
- elif suffix == ".yaml":
15
- self._obj = yaml
16
- else:
17
- raise ValueError(f"unsupported file type={suffix}")
18
-
19
- def dump(self, config, **kwargs):
20
- with open(self.file_path, "w") as f:
21
- self._obj.dump(config, f, **kwargs)
22
-
23
- def load(self, **kwargs):
24
- with open(self.file_path, "r") as f:
25
- return self._obj.load(f, **kwargs)
@@ -1,156 +0,0 @@
1
- import http
2
- import time
3
- from typing import Any
4
-
5
- import requests
6
- from loguru import logger
7
- from pydantic import BaseModel, Field, PrivateAttr, model_validator
8
-
9
- from llmflow.enumeration.http_enum import HttpEnum
10
-
11
-
12
- class HttpClient(BaseModel):
13
- url: str = Field(default="")
14
- keep_alive: bool = Field(default=False, description="if true, use session to keep long connection")
15
- timeout: int = Field(default=300, description="request timeout, second")
16
-
17
- return_default_if_error: bool = Field(default=True)
18
- request_start_time: float = Field(default_factory=time.time)
19
- request_time_cost: float = Field(default=0.0, description="request time cost")
20
-
21
- retry_sleep_time: float = Field(default=0.5, description="interval time for retry")
22
- retry_time_multiplier: float = Field(default=2.0, description="retry time multiplier")
23
- retry_max_count: int = Field(default=1, description="maximum number of retries")
24
-
25
- _client: Any = PrivateAttr()
26
-
27
- @model_validator(mode="after")
28
- def init_client(self):
29
- self._client = requests.Session() if self.keep_alive else requests
30
- return self
31
-
32
- def __enter__(self):
33
- return self
34
-
35
- def __exit__(self, *args):
36
- self.close()
37
- self.request_time_cost: float = time.time() - self.request_start_time
38
-
39
- def close(self):
40
- if isinstance(self._client, requests.Session):
41
- self._client.close()
42
-
43
- def _request(self,
44
- data: str = None,
45
- json_data: dict = None,
46
- headers: dict = None,
47
- stream: bool = False,
48
- http_enum: HttpEnum | str = HttpEnum.POST):
49
-
50
- if isinstance(http_enum, str):
51
- http_enum = HttpEnum(http_enum)
52
-
53
- if http_enum is HttpEnum.POST:
54
- response: requests.Response = self._client.post(url=self.url,
55
- data=data,
56
- json=json_data,
57
- headers=headers,
58
- stream=stream,
59
- timeout=self.timeout)
60
-
61
- elif http_enum is HttpEnum.GET:
62
- response: requests.Response = self._client.get(url=self.url,
63
- data=data,
64
- json=json_data,
65
- headers=headers,
66
- stream=stream,
67
- timeout=self.timeout)
68
-
69
- else:
70
- raise NotImplementedError
71
-
72
- if response.status_code != http.HTTPStatus.OK:
73
- raise RuntimeError(f"request failed! content={response.json()}")
74
-
75
- return response
76
-
77
- def parse_result(self, response: requests.Response | Any = None, **kwargs):
78
- return response.json()
79
-
80
- def return_default(self, **kwargs):
81
- return None
82
-
83
- def request(self,
84
- data: str | Any = None,
85
- json_data: dict = None,
86
- headers: dict = None,
87
- http_enum: HttpEnum | str = HttpEnum.POST,
88
- **kwargs):
89
-
90
- retry_sleep_time = self.retry_sleep_time
91
- for i in range(self.retry_max_count):
92
- try:
93
- response = self._request(data=data, json_data=json_data, headers=headers, http_enum=http_enum)
94
- result = self.parse_result(response=response,
95
- data=data,
96
- json_data=json_data,
97
- headers=headers,
98
- http_enum=http_enum,
99
- **kwargs)
100
- return result
101
-
102
- except Exception as e:
103
- logger.exception(f"{self.__class__.__name__} {i}th request failed with args={e.args}")
104
-
105
- if i == self.retry_max_count - 1:
106
- if self.return_default_if_error:
107
- return self.return_default()
108
- else:
109
- raise e
110
-
111
- retry_sleep_time *= self.retry_time_multiplier
112
- time.sleep(retry_sleep_time)
113
-
114
- return None
115
-
116
- def request_stream(self,
117
- data: str = None,
118
- json_data: dict = None,
119
- headers: dict = None,
120
- http_enum: HttpEnum | str = HttpEnum.POST,
121
- **kwargs):
122
-
123
- retry_sleep_time = self.retry_sleep_time
124
- for i in range(self.retry_max_count):
125
- try:
126
- response = self._request(data=data,
127
- json_data=json_data,
128
- headers=headers,
129
- stream=True,
130
- http_enum=http_enum)
131
- request_context = {}
132
- for iter_idx, line in enumerate(response.iter_lines()):
133
- yield self.parse_result(line=line,
134
- request_context=request_context,
135
- index=iter_idx,
136
- data=data,
137
- json_data=json_data,
138
- headers=headers,
139
- http_enum=http_enum,
140
- **kwargs)
141
-
142
- return None
143
-
144
- except Exception as e:
145
- logger.exception(f"{self.__class__.__name__} {i}th request failed with args={e.args}")
146
-
147
- if i == self.retry_max_count - 1:
148
- if self.return_default_if_error:
149
- return self.return_default()
150
- else:
151
- raise e
152
-
153
- retry_sleep_time *= self.retry_time_multiplier
154
- time.sleep(retry_sleep_time)
155
-
156
- return None
llmflow/utils/op_utils.py DELETED
@@ -1,102 +0,0 @@
1
- from typing import List
2
-
3
- from llmflow.enumeration.role import Role
4
- from llmflow.schema.message import Message, Trajectory
5
- import json
6
- import re
7
- from loguru import logger
8
-
9
- def merge_messages_content(messages: List[Message | dict]) -> str:
10
- content_collector = []
11
- for i, message in enumerate(messages):
12
- if isinstance(message, dict):
13
- message = Message(**message)
14
-
15
- if message.role is Role.ASSISTANT:
16
- line = f"### step.{i} role={message.role.value} content=\n{message.reasoning_content}\n\n{message.content}\n"
17
- if message.tool_calls:
18
- for tool_call in message.tool_calls:
19
- line += f" - tool call={tool_call.name}\n params={tool_call.arguments}\n"
20
- content_collector.append(line)
21
-
22
- elif message.role is Role.USER:
23
- line = f"### step.{i} role={message.role.value} content=\n{message.content}\n"
24
- content_collector.append(line)
25
-
26
- elif message.role is Role.TOOL:
27
- line = f"### step.{i} role={message.role.value} tool call result=\n{message.content}\n"
28
- content_collector.append(line)
29
-
30
- return "\n".join(content_collector)
31
-
32
-
33
- def parse_json_experience_response(response: str) -> List[dict]:
34
- """Parse JSON formatted experience response"""
35
- try:
36
- # Extract JSON blocks
37
- json_pattern = r'```json\s*([\s\S]*?)\s*```'
38
- json_blocks = re.findall(json_pattern, response)
39
-
40
- if json_blocks:
41
- parsed = json.loads(json_blocks[0])
42
-
43
- # Handle array format
44
- if isinstance(parsed, list):
45
- experiences = []
46
- for exp_data in parsed:
47
- if isinstance(exp_data, dict) and (
48
- ("when_to_use" in exp_data and "experience" in exp_data) or
49
- ("condition" in exp_data and "experience" in exp_data)
50
- ):
51
- experiences.append(exp_data)
52
-
53
- return experiences
54
-
55
-
56
- # Handle single object
57
- elif isinstance(parsed, dict) and (
58
- ("when_to_use" in parsed and "experience" in parsed) or
59
- ("condition" in parsed and "experience" in parsed)
60
- ):
61
- return [parsed]
62
-
63
- # Fallback: try to parse entire response
64
- parsed = json.loads(response)
65
- if isinstance(parsed, list):
66
- return parsed
67
- elif isinstance(parsed, dict):
68
- return [parsed]
69
-
70
- except json.JSONDecodeError as e:
71
- logger.warning(f"Failed to parse JSON experience response: {e}")
72
-
73
- return []
74
-
75
- def get_trajectory_context(trajectory: Trajectory, step_sequence: List[Message]) -> str:
76
- """Get context of step sequence within trajectory"""
77
- try:
78
- # Find position of step sequence in trajectory
79
- start_idx = 0
80
- for i, step in enumerate(trajectory.messages):
81
- if step == step_sequence[0]:
82
- start_idx = i
83
- break
84
-
85
- # Extract before and after context
86
- context_before = trajectory.messages[max(0, start_idx - 2):start_idx]
87
- context_after = trajectory.messages[start_idx + len(step_sequence):start_idx + len(step_sequence) + 2]
88
-
89
- context = f"Query: {trajectory.metadata.get('query', 'N/A')}\n"
90
-
91
- if context_before:
92
- context += "Previous steps:\n" + "\n".join(
93
- [f"- {step.content[:100]}..." for step in context_before]) + "\n"
94
-
95
- if context_after:
96
- context += "Following steps:\n" + "\n".join([f"- {step.content[:100]}..." for step in context_after])
97
-
98
- return context
99
-
100
- except Exception as e:
101
- logger.error(f"Error getting trajectory context: {e}")
102
- return f"Query: {trajectory.metadata.get('query', 'N/A')}"
llmflow/utils/registry.py DELETED
@@ -1,33 +0,0 @@
1
- from typing import List
2
-
3
- from loguru import logger
4
-
5
- from llmflow.utils.common_utils import camel_to_snake
6
-
7
-
8
- class Registry(object):
9
- def __init__(self):
10
- self._registry = {}
11
-
12
- def register(self, name: str = ""):
13
-
14
- def decorator(cls):
15
- class_name = name if name else camel_to_snake(cls.__name__)
16
- if class_name in self._registry:
17
- logger.warning(f"name={class_name} is already registered, will be overwritten.")
18
- self._registry[class_name] = cls
19
- return cls
20
-
21
- return decorator
22
-
23
- def __getitem__(self, name: str):
24
- if name not in self._registry:
25
- raise KeyError(f"name={name} is not registered!")
26
- return self._registry[name]
27
-
28
- def __contains__(self, name: str):
29
- return name in self._registry
30
-
31
- @property
32
- def registered_names(self) -> List[str]:
33
- return sorted(self._registry.keys())
@@ -1,7 +0,0 @@
1
- from llmflow.utils.registry import Registry
2
-
3
- VECTOR_STORE_REGISTRY = Registry()
4
-
5
- from llmflow.vector_store.es_vector_store import EsVectorStore
6
- from llmflow.vector_store.chroma_vector_store import ChromaVectorStore
7
- from llmflow.vector_store.file_vector_store import FileVectorStore
@@ -1,136 +0,0 @@
1
- import fcntl
2
- import json
3
- from abc import ABC
4
- from pathlib import Path
5
- from typing import List, Iterable
6
-
7
- from loguru import logger
8
- from pydantic import BaseModel, Field
9
- from tqdm import tqdm
10
-
11
- from llmflow.embedding_model.base_embedding_model import BaseEmbeddingModel
12
- from llmflow.schema.vector_node import VectorNode
13
-
14
-
15
- class BaseVectorStore(BaseModel, ABC):
16
- embedding_model: BaseEmbeddingModel | None = Field(default=None)
17
- batch_size: int = Field(default=1024)
18
-
19
- @staticmethod
20
- def _load_from_path(workspace_id: str, path: str | Path, callback_fn=None, **kwargs) -> Iterable[VectorNode]:
21
- workspace_path = Path(path) / f"{workspace_id}.jsonl"
22
- if not workspace_path.exists():
23
- logger.warning(f"workspace_path={workspace_path} is not exists!")
24
- return
25
-
26
- with workspace_path.open() as f:
27
- fcntl.flock(f, fcntl.LOCK_SH)
28
- try:
29
- for line in tqdm(f, desc="load from path"):
30
- if line.strip():
31
- node_dict = json.loads(line.strip())
32
- if callback_fn:
33
- node = callback_fn(node_dict)
34
- else:
35
- node = VectorNode(**node_dict, **kwargs)
36
- node.workspace_id = workspace_id
37
- yield node
38
-
39
- finally:
40
- fcntl.flock(f, fcntl.LOCK_UN)
41
-
42
- @staticmethod
43
- def _dump_to_path(nodes: Iterable[VectorNode], workspace_id: str, path: str | Path = "", callback_fn=None,
44
- ensure_ascii: bool = False, **kwargs):
45
- dump_path: Path = Path(path)
46
- dump_path.mkdir(parents=True, exist_ok=True)
47
- dump_file = dump_path / f"{workspace_id}.jsonl"
48
-
49
- count = 0
50
- with dump_file.open("w") as f:
51
- fcntl.flock(f, fcntl.LOCK_EX)
52
- try:
53
- for node in tqdm(nodes, desc="dump to path"):
54
- node.workspace_id = workspace_id
55
- if callback_fn:
56
- node_dict = callback_fn(node)
57
- else:
58
- node_dict = node.model_dump()
59
- assert isinstance(node_dict, dict)
60
- f.write(json.dumps(node_dict, ensure_ascii=ensure_ascii, **kwargs))
61
- f.write("\n")
62
- count += 1
63
-
64
- return {"size": count}
65
- finally:
66
- fcntl.flock(f, fcntl.LOCK_UN)
67
-
68
- def exist_workspace(self, workspace_id: str, **kwargs) -> bool:
69
- raise NotImplementedError
70
-
71
- def delete_workspace(self, workspace_id: str, **kwargs):
72
- raise NotImplementedError
73
-
74
- def create_workspace(self, workspace_id: str, **kwargs):
75
- raise NotImplementedError
76
-
77
- def _iter_workspace_nodes(self, workspace_id: str, **kwargs) -> Iterable[VectorNode]:
78
- raise NotImplementedError
79
-
80
- def dump_workspace(self, workspace_id: str, path: str | Path = "", callback_fn=None, **kwargs):
81
- if not self.exist_workspace(workspace_id=workspace_id, **kwargs):
82
- logger.warning(f"workspace_id={workspace_id} is not exist!")
83
- return {}
84
-
85
- return self._dump_to_path(nodes=self._iter_workspace_nodes(workspace_id=workspace_id, **kwargs),
86
- workspace_id=workspace_id,
87
- path=path,
88
- callback_fn=callback_fn,
89
- **kwargs)
90
-
91
- def load_workspace(self, workspace_id: str, path: str | Path = "", nodes: List[VectorNode] = None, callback_fn=None,
92
- **kwargs):
93
- if self.exist_workspace(workspace_id, **kwargs):
94
- self.delete_workspace(workspace_id=workspace_id, **kwargs)
95
- logger.info(f"delete workspace_id={workspace_id}")
96
-
97
- self.create_workspace(workspace_id=workspace_id, **kwargs)
98
-
99
- all_nodes: List[VectorNode] = []
100
- if nodes:
101
- all_nodes.extend(nodes)
102
- for node in self._load_from_path(path=path, workspace_id=workspace_id, callback_fn=callback_fn, **kwargs):
103
- all_nodes.append(node)
104
- self.insert(nodes=all_nodes, workspace_id=workspace_id, **kwargs)
105
- return {"size": len(all_nodes)}
106
-
107
- def copy_workspace(self, src_workspace_id: str, dest_workspace_id: str, **kwargs):
108
- if not self.exist_workspace(workspace_id=src_workspace_id, **kwargs):
109
- logger.warning(f"src_workspace_id={src_workspace_id} is not exist!")
110
- return {}
111
-
112
- if not self.exist_workspace(dest_workspace_id, **kwargs):
113
- self.create_workspace(workspace_id=dest_workspace_id, **kwargs)
114
-
115
- nodes = []
116
- node_size = 0
117
- for node in self._iter_workspace_nodes(workspace_id=src_workspace_id, **kwargs):
118
- nodes.append(node)
119
- node_size += 1
120
- if len(nodes) >= self.batch_size:
121
- self.insert(nodes=nodes, workspace_id=dest_workspace_id, **kwargs)
122
- nodes.clear()
123
-
124
- if nodes:
125
- self.insert(nodes=nodes, workspace_id=dest_workspace_id, **kwargs)
126
- return {"size": node_size}
127
-
128
- def search(self, query: str, workspace_id: str, top_k: int = 1, **kwargs) -> List[VectorNode]:
129
- raise NotImplementedError
130
-
131
- def insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, **kwargs):
132
- raise NotImplementedError
133
-
134
- def delete(self, node_ids: str | List[str], workspace_id: str, **kwargs):
135
- raise NotImplementedError
136
-
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes