flowllm 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowllm/__init__.py +21 -0
- flowllm/app.py +15 -0
- flowllm/client/__init__.py +25 -0
- flowllm/client/async_http_client.py +81 -0
- flowllm/client/http_client.py +81 -0
- flowllm/client/mcp_client.py +133 -0
- flowllm/client/sync_mcp_client.py +116 -0
- flowllm/config/__init__.py +1 -0
- flowllm/config/default.yaml +77 -0
- flowllm/config/empty.yaml +37 -0
- flowllm/config/pydantic_config_parser.py +242 -0
- flowllm/context/base_context.py +79 -0
- flowllm/context/flow_context.py +16 -0
- llmflow/op/prompt_mixin.py → flowllm/context/prompt_handler.py +25 -14
- flowllm/context/registry.py +30 -0
- flowllm/context/service_context.py +147 -0
- flowllm/embedding_model/__init__.py +1 -0
- {llmflow → flowllm}/embedding_model/base_embedding_model.py +93 -2
- {llmflow → flowllm}/embedding_model/openai_compatible_embedding_model.py +71 -13
- flowllm/flow/__init__.py +1 -0
- flowllm/flow/base_flow.py +72 -0
- flowllm/flow/base_tool_flow.py +15 -0
- flowllm/flow/gallery/__init__.py +8 -0
- flowllm/flow/gallery/cmd_flow.py +11 -0
- flowllm/flow/gallery/code_tool_flow.py +30 -0
- flowllm/flow/gallery/dashscope_search_tool_flow.py +34 -0
- flowllm/flow/gallery/deepsearch_tool_flow.py +39 -0
- flowllm/flow/gallery/expression_tool_flow.py +18 -0
- flowllm/flow/gallery/mock_tool_flow.py +67 -0
- flowllm/flow/gallery/tavily_search_tool_flow.py +30 -0
- flowllm/flow/gallery/terminate_tool_flow.py +30 -0
- flowllm/flow/parser/expression_parser.py +171 -0
- flowllm/llm/__init__.py +2 -0
- {llmflow → flowllm}/llm/base_llm.py +100 -18
- flowllm/llm/litellm_llm.py +455 -0
- flowllm/llm/openai_compatible_llm.py +439 -0
- flowllm/op/__init__.py +11 -0
- llmflow/op/react/react_v1_op.py → flowllm/op/agent/react_op.py +17 -22
- flowllm/op/akshare/__init__.py +3 -0
- flowllm/op/akshare/get_ak_a_code_op.py +108 -0
- flowllm/op/akshare/get_ak_a_code_prompt.yaml +21 -0
- flowllm/op/akshare/get_ak_a_info_op.py +140 -0
- flowllm/op/base_llm_op.py +64 -0
- flowllm/op/base_op.py +148 -0
- flowllm/op/base_ray_op.py +313 -0
- flowllm/op/code/__init__.py +1 -0
- flowllm/op/code/execute_code_op.py +42 -0
- flowllm/op/gallery/__init__.py +2 -0
- flowllm/op/gallery/mock_op.py +42 -0
- flowllm/op/gallery/terminate_op.py +29 -0
- flowllm/op/parallel_op.py +23 -0
- flowllm/op/search/__init__.py +3 -0
- flowllm/op/search/dashscope_deep_research_op.py +260 -0
- flowllm/op/search/dashscope_search_op.py +179 -0
- flowllm/op/search/dashscope_search_prompt.yaml +13 -0
- flowllm/op/search/tavily_search_op.py +102 -0
- flowllm/op/sequential_op.py +21 -0
- flowllm/schema/flow_request.py +12 -0
- flowllm/schema/flow_response.py +12 -0
- flowllm/schema/message.py +35 -0
- flowllm/schema/service_config.py +72 -0
- flowllm/schema/tool_call.py +118 -0
- {llmflow → flowllm}/schema/vector_node.py +1 -0
- flowllm/service/__init__.py +3 -0
- flowllm/service/base_service.py +68 -0
- flowllm/service/cmd_service.py +15 -0
- flowllm/service/http_service.py +79 -0
- flowllm/service/mcp_service.py +47 -0
- flowllm/storage/__init__.py +1 -0
- flowllm/storage/cache/__init__.py +1 -0
- flowllm/storage/cache/cache_data_handler.py +104 -0
- flowllm/storage/cache/data_cache.py +375 -0
- flowllm/storage/vector_store/__init__.py +3 -0
- flowllm/storage/vector_store/base_vector_store.py +44 -0
- {llmflow → flowllm/storage}/vector_store/chroma_vector_store.py +11 -10
- {llmflow → flowllm/storage}/vector_store/es_vector_store.py +11 -11
- llmflow/vector_store/file_vector_store.py → flowllm/storage/vector_store/local_vector_store.py +110 -11
- flowllm/utils/common_utils.py +52 -0
- flowllm/utils/fetch_url.py +117 -0
- flowllm/utils/llm_utils.py +28 -0
- flowllm/utils/ridge_v2.py +54 -0
- {llmflow → flowllm}/utils/timer.py +5 -4
- {flowllm-0.1.0.dist-info → flowllm-0.1.2.dist-info}/METADATA +45 -388
- flowllm-0.1.2.dist-info/RECORD +99 -0
- flowllm-0.1.2.dist-info/entry_points.txt +2 -0
- {flowllm-0.1.0.dist-info → flowllm-0.1.2.dist-info}/licenses/LICENSE +1 -1
- flowllm-0.1.2.dist-info/top_level.txt +1 -0
- flowllm-0.1.0.dist-info/RECORD +0 -66
- flowllm-0.1.0.dist-info/entry_points.txt +0 -3
- flowllm-0.1.0.dist-info/top_level.txt +0 -1
- llmflow/app.py +0 -53
- llmflow/config/config_parser.py +0 -80
- llmflow/config/mock_config.yaml +0 -58
- llmflow/embedding_model/__init__.py +0 -5
- llmflow/enumeration/agent_state.py +0 -8
- llmflow/llm/__init__.py +0 -5
- llmflow/llm/openai_compatible_llm.py +0 -283
- llmflow/mcp_server.py +0 -110
- llmflow/op/__init__.py +0 -10
- llmflow/op/base_op.py +0 -125
- llmflow/op/mock_op.py +0 -40
- llmflow/op/vector_store/__init__.py +0 -13
- llmflow/op/vector_store/recall_vector_store_op.py +0 -48
- llmflow/op/vector_store/update_vector_store_op.py +0 -28
- llmflow/op/vector_store/vector_store_action_op.py +0 -46
- llmflow/pipeline/pipeline.py +0 -94
- llmflow/pipeline/pipeline_context.py +0 -37
- llmflow/schema/app_config.py +0 -69
- llmflow/schema/experience.py +0 -144
- llmflow/schema/message.py +0 -68
- llmflow/schema/request.py +0 -32
- llmflow/schema/response.py +0 -29
- llmflow/service/__init__.py +0 -0
- llmflow/service/llmflow_service.py +0 -96
- llmflow/tool/__init__.py +0 -9
- llmflow/tool/base_tool.py +0 -80
- llmflow/tool/code_tool.py +0 -43
- llmflow/tool/dashscope_search_tool.py +0 -162
- llmflow/tool/mcp_tool.py +0 -77
- llmflow/tool/tavily_search_tool.py +0 -109
- llmflow/tool/terminate_tool.py +0 -23
- llmflow/utils/__init__.py +0 -0
- llmflow/utils/common_utils.py +0 -17
- llmflow/utils/file_handler.py +0 -25
- llmflow/utils/http_client.py +0 -156
- llmflow/utils/op_utils.py +0 -102
- llmflow/utils/registry.py +0 -33
- llmflow/vector_store/__init__.py +0 -7
- llmflow/vector_store/base_vector_store.py +0 -136
- {llmflow → flowllm/context}/__init__.py +0 -0
- {llmflow/config → flowllm/enumeration}/__init__.py +0 -0
- {llmflow → flowllm}/enumeration/chunk_enum.py +0 -0
- {llmflow → flowllm}/enumeration/http_enum.py +0 -0
- {llmflow → flowllm}/enumeration/role.py +0 -0
- {llmflow/enumeration → flowllm/flow/parser}/__init__.py +0 -0
- {llmflow/op/react → flowllm/op/agent}/__init__.py +0 -0
- /llmflow/op/react/react_v1_prompt.yaml → /flowllm/op/agent/react_prompt.yaml +0 -0
- {llmflow/pipeline → flowllm/schema}/__init__.py +0 -0
- {llmflow/schema → flowllm/utils}/__init__.py +0 -0
- {llmflow → flowllm}/utils/singleton.py +0 -0
- {flowllm-0.1.0.dist-info → flowllm-0.1.2.dist-info}/WHEEL +0 -0
@@ -0,0 +1,104 @@
|
|
1
|
+
import json
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import Dict, Any
|
5
|
+
|
6
|
+
import pandas as pd
|
7
|
+
|
8
|
+
|
9
|
+
class CacheDataHandler(ABC):
|
10
|
+
"""Abstract base class for data type handlers"""
|
11
|
+
|
12
|
+
@abstractmethod
|
13
|
+
def save(self, data: Any, file_path: Path, **kwargs) -> Dict[str, Any]:
|
14
|
+
"""
|
15
|
+
Save data to file and return metadata
|
16
|
+
|
17
|
+
Args:
|
18
|
+
data: Data to save
|
19
|
+
file_path: File path to save to
|
20
|
+
**kwargs: Additional parameters
|
21
|
+
|
22
|
+
Returns:
|
23
|
+
Dict containing metadata about the saved data
|
24
|
+
"""
|
25
|
+
pass
|
26
|
+
|
27
|
+
@abstractmethod
|
28
|
+
def load(self, file_path: Path, **kwargs) -> Any:
|
29
|
+
"""
|
30
|
+
Load data from file
|
31
|
+
|
32
|
+
Args:
|
33
|
+
file_path: File path to load from
|
34
|
+
**kwargs: Additional parameters
|
35
|
+
|
36
|
+
Returns:
|
37
|
+
Loaded data
|
38
|
+
"""
|
39
|
+
pass
|
40
|
+
|
41
|
+
@abstractmethod
|
42
|
+
def get_file_extension(self) -> str:
|
43
|
+
"""Get the file extension for this data type"""
|
44
|
+
pass
|
45
|
+
|
46
|
+
|
47
|
+
class DataFrameHandler(CacheDataHandler):
|
48
|
+
"""Handler for pandas DataFrame data type"""
|
49
|
+
|
50
|
+
def save(self, data: pd.DataFrame, file_path: Path, **kwargs) -> Dict[str, Any]:
|
51
|
+
"""Save DataFrame as CSV"""
|
52
|
+
csv_params = {
|
53
|
+
"index": False,
|
54
|
+
"encoding": "utf-8"
|
55
|
+
}
|
56
|
+
csv_params.update(kwargs)
|
57
|
+
|
58
|
+
data.to_csv(file_path, **csv_params)
|
59
|
+
|
60
|
+
return {
|
61
|
+
'row_count': len(data),
|
62
|
+
'column_count': len(data.columns),
|
63
|
+
'file_size': file_path.stat().st_size
|
64
|
+
}
|
65
|
+
|
66
|
+
def load(self, file_path: Path, **kwargs) -> pd.DataFrame:
|
67
|
+
"""Load DataFrame from CSV"""
|
68
|
+
csv_params = {
|
69
|
+
'encoding': 'utf-8'
|
70
|
+
}
|
71
|
+
csv_params.update(kwargs)
|
72
|
+
|
73
|
+
return pd.read_csv(file_path, **csv_params)
|
74
|
+
|
75
|
+
def get_file_extension(self) -> str:
|
76
|
+
return ".csv"
|
77
|
+
|
78
|
+
|
79
|
+
class DictHandler(CacheDataHandler):
|
80
|
+
"""Handler for dict data type"""
|
81
|
+
|
82
|
+
def save(self, data: dict, file_path: Path, **kwargs) -> Dict[str, Any]:
|
83
|
+
"""Save dict as JSON"""
|
84
|
+
json_params = {
|
85
|
+
"ensure_ascii": False,
|
86
|
+
"indent": 2
|
87
|
+
}
|
88
|
+
json_params.update(kwargs)
|
89
|
+
|
90
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
91
|
+
json.dump(data, f, **json_params)
|
92
|
+
|
93
|
+
return {
|
94
|
+
'key_count': len(data),
|
95
|
+
'file_size': file_path.stat().st_size
|
96
|
+
}
|
97
|
+
|
98
|
+
def load(self, file_path: Path, **kwargs) -> dict:
|
99
|
+
"""Load dict from JSON"""
|
100
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
101
|
+
return json.load(f)
|
102
|
+
|
103
|
+
def get_file_extension(self) -> str:
|
104
|
+
return ".json"
|
@@ -0,0 +1,375 @@
|
|
1
|
+
"""
|
2
|
+
DataCache utility that supports multiple data types with local storage and data expiration functionality
|
3
|
+
"""
|
4
|
+
|
5
|
+
import json
|
6
|
+
from datetime import datetime, timedelta
|
7
|
+
from pathlib import Path
|
8
|
+
from typing import Optional, Dict, Any, Union, Type
|
9
|
+
|
10
|
+
import pandas as pd
|
11
|
+
|
12
|
+
from flowllm.storage.cache.cache_data_handler import CacheDataHandler, DataFrameHandler, DictHandler
|
13
|
+
|
14
|
+
|
15
|
+
class DataCache:
|
16
|
+
"""
|
17
|
+
Generic data cache utility class
|
18
|
+
|
19
|
+
Features:
|
20
|
+
- Support for multiple data types (DataFrame, dict, and extensible for others)
|
21
|
+
- Support for data expiration time settings
|
22
|
+
- Automatic cleanup of expired data
|
23
|
+
- Recording and managing update timestamps
|
24
|
+
- Type-specific storage formats (CSV for DataFrame, JSON for dict)
|
25
|
+
"""
|
26
|
+
|
27
|
+
def __init__(self, cache_dir: str = "cache"):
|
28
|
+
self.cache_dir = Path(cache_dir)
|
29
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
30
|
+
self.metadata_file = self.cache_dir / "metadata.json"
|
31
|
+
self.metadata = {}
|
32
|
+
|
33
|
+
# Register default handlers
|
34
|
+
self.handlers: Dict[Type, CacheDataHandler] = {
|
35
|
+
pd.DataFrame: DataFrameHandler(),
|
36
|
+
dict: DictHandler()
|
37
|
+
}
|
38
|
+
|
39
|
+
self._load_metadata()
|
40
|
+
|
41
|
+
def register_handler(self, data_type: Type, handler: CacheDataHandler):
|
42
|
+
"""
|
43
|
+
Register a custom data handler for a specific data type
|
44
|
+
|
45
|
+
Args:
|
46
|
+
data_type: The data type to handle
|
47
|
+
handler: The handler instance
|
48
|
+
"""
|
49
|
+
self.handlers[data_type] = handler
|
50
|
+
|
51
|
+
def _get_handler(self, data_type: Type) -> CacheDataHandler:
|
52
|
+
"""Get the appropriate handler for a data type"""
|
53
|
+
if data_type in self.handlers:
|
54
|
+
return self.handlers[data_type]
|
55
|
+
|
56
|
+
# Try to find a handler for parent classes
|
57
|
+
for registered_type, handler in self.handlers.items():
|
58
|
+
if issubclass(data_type, registered_type):
|
59
|
+
return handler
|
60
|
+
|
61
|
+
raise ValueError(f"No handler registered for data type: {data_type}")
|
62
|
+
|
63
|
+
def _load_metadata(self):
|
64
|
+
"""Load metadata"""
|
65
|
+
if self.metadata_file.exists():
|
66
|
+
with open(self.metadata_file) as f:
|
67
|
+
self.metadata = json.load(f)
|
68
|
+
|
69
|
+
def _save_metadata(self):
|
70
|
+
"""Save metadata"""
|
71
|
+
with open(self.metadata_file, "w") as f:
|
72
|
+
json.dump(self.metadata, f, ensure_ascii=False, indent=2)
|
73
|
+
|
74
|
+
def _get_file_path(self, key: str, data_type: Type = None) -> Path:
|
75
|
+
"""Get data file path with appropriate extension"""
|
76
|
+
if data_type is None:
|
77
|
+
# Try to get extension from metadata
|
78
|
+
if key in self.metadata and 'data_type' in self.metadata[key]:
|
79
|
+
stored_type_name = self.metadata[key]['data_type']
|
80
|
+
if stored_type_name == 'DataFrame':
|
81
|
+
extension = '.csv'
|
82
|
+
elif stored_type_name == 'dict':
|
83
|
+
extension = '.json'
|
84
|
+
elif stored_type_name == 'str':
|
85
|
+
extension = '.txt'
|
86
|
+
else:
|
87
|
+
# Try to find extension from registered handlers
|
88
|
+
extension = '.dat' # Default extension
|
89
|
+
for registered_type, handler in self.handlers.items():
|
90
|
+
if registered_type.__name__ == stored_type_name:
|
91
|
+
extension = handler.get_file_extension()
|
92
|
+
break
|
93
|
+
else:
|
94
|
+
extension = '.dat' # Default extension
|
95
|
+
else:
|
96
|
+
try:
|
97
|
+
handler = self._get_handler(data_type)
|
98
|
+
extension = handler.get_file_extension()
|
99
|
+
except ValueError:
|
100
|
+
extension = '.dat' # Default extension
|
101
|
+
|
102
|
+
return self.cache_dir / f"{key}{extension}"
|
103
|
+
|
104
|
+
def _is_expired(self, key: str) -> bool:
|
105
|
+
"""Check if data is expired"""
|
106
|
+
if key not in self.metadata:
|
107
|
+
return True
|
108
|
+
|
109
|
+
expire_time_str = self.metadata[key].get('expire_time')
|
110
|
+
if not expire_time_str:
|
111
|
+
return False # No expiration time set, never expires
|
112
|
+
|
113
|
+
expire_time = datetime.fromisoformat(expire_time_str)
|
114
|
+
return datetime.now() > expire_time
|
115
|
+
|
116
|
+
def save(self, key: str, data: Union[pd.DataFrame, dict, Any], expire_hours: Optional[float] = None,
|
117
|
+
**handler_kwargs) -> bool:
|
118
|
+
"""
|
119
|
+
Save data to cache
|
120
|
+
|
121
|
+
Args:
|
122
|
+
key: Cache key name
|
123
|
+
data: Data to save (DataFrame, dict, or other supported types)
|
124
|
+
expire_hours: Expiration time in hours, None means never expires
|
125
|
+
**handler_kwargs: Additional parameters passed to the data handler
|
126
|
+
|
127
|
+
Returns:
|
128
|
+
bool: Whether save was successful
|
129
|
+
"""
|
130
|
+
try:
|
131
|
+
data_type = type(data)
|
132
|
+
handler = self._get_handler(data_type)
|
133
|
+
file_path = self._get_file_path(key, data_type)
|
134
|
+
|
135
|
+
# Save data using appropriate handler
|
136
|
+
handler_metadata = handler.save(data, file_path, **handler_kwargs)
|
137
|
+
|
138
|
+
# Update metadata
|
139
|
+
current_time = datetime.now()
|
140
|
+
self.metadata[key] = {
|
141
|
+
'created_time': current_time.isoformat(),
|
142
|
+
'updated_time': current_time.isoformat(),
|
143
|
+
'expire_time': (current_time + timedelta(hours=expire_hours)).isoformat() if expire_hours else None,
|
144
|
+
'data_type': data_type.__name__,
|
145
|
+
**handler_metadata
|
146
|
+
}
|
147
|
+
|
148
|
+
self._save_metadata()
|
149
|
+
return True
|
150
|
+
|
151
|
+
except Exception as e:
|
152
|
+
print(f"Failed to save data: {e}")
|
153
|
+
return False
|
154
|
+
|
155
|
+
def load(self, key: str, auto_clean_expired: bool = True, **handler_kwargs) -> Optional[Any]:
|
156
|
+
"""
|
157
|
+
Load data from cache
|
158
|
+
|
159
|
+
Args:
|
160
|
+
key: Cache key name
|
161
|
+
auto_clean_expired: Whether to automatically clean expired data
|
162
|
+
**handler_kwargs: Additional parameters passed to the data handler
|
163
|
+
|
164
|
+
Returns:
|
165
|
+
Optional[Any]: Loaded data, returns None if not exists or expired
|
166
|
+
"""
|
167
|
+
try:
|
168
|
+
# Check if expired
|
169
|
+
if self._is_expired(key):
|
170
|
+
if auto_clean_expired:
|
171
|
+
self.delete(key)
|
172
|
+
print(f"Cache '{key}' has expired and was automatically cleaned")
|
173
|
+
return None
|
174
|
+
|
175
|
+
file_path = self._get_file_path(key)
|
176
|
+
if not file_path.exists():
|
177
|
+
return None
|
178
|
+
|
179
|
+
# Get data type from metadata
|
180
|
+
if key not in self.metadata or 'data_type' not in self.metadata[key]:
|
181
|
+
print(f"No data type information found for key '{key}'")
|
182
|
+
return None
|
183
|
+
|
184
|
+
data_type_name = self.metadata[key]['data_type']
|
185
|
+
|
186
|
+
# Map type name back to actual type
|
187
|
+
if data_type_name == 'DataFrame':
|
188
|
+
data_type = pd.DataFrame
|
189
|
+
elif data_type_name == 'dict':
|
190
|
+
data_type = dict
|
191
|
+
elif data_type_name == 'str':
|
192
|
+
data_type = str
|
193
|
+
else:
|
194
|
+
# For other custom types, try to find a handler by checking registered types
|
195
|
+
data_type = None
|
196
|
+
for registered_type in self.handlers.keys():
|
197
|
+
if registered_type.__name__ == data_type_name:
|
198
|
+
data_type = registered_type
|
199
|
+
break
|
200
|
+
|
201
|
+
if data_type is None:
|
202
|
+
print(f"Unknown data type: {data_type_name}")
|
203
|
+
return None
|
204
|
+
|
205
|
+
handler = self._get_handler(data_type)
|
206
|
+
|
207
|
+
# Load data using appropriate handler
|
208
|
+
data = handler.load(file_path, **handler_kwargs)
|
209
|
+
|
210
|
+
# Update last access time
|
211
|
+
if key in self.metadata:
|
212
|
+
self.metadata[key]['last_accessed'] = datetime.now().isoformat()
|
213
|
+
self._save_metadata()
|
214
|
+
|
215
|
+
return data
|
216
|
+
|
217
|
+
except Exception as e:
|
218
|
+
print(f"Failed to load data: {e}")
|
219
|
+
return None
|
220
|
+
|
221
|
+
def exists(self, key: str, check_expired: bool = True) -> bool:
|
222
|
+
"""
|
223
|
+
Check if cache exists
|
224
|
+
|
225
|
+
Args:
|
226
|
+
key: Cache key name
|
227
|
+
check_expired: Whether to check expiration status
|
228
|
+
|
229
|
+
Returns:
|
230
|
+
bool: Whether cache exists and is not expired
|
231
|
+
"""
|
232
|
+
if check_expired and self._is_expired(key):
|
233
|
+
return False
|
234
|
+
|
235
|
+
file_path = self._get_file_path(key)
|
236
|
+
return file_path.exists() and key in self.metadata
|
237
|
+
|
238
|
+
def delete(self, key: str) -> bool:
|
239
|
+
"""
|
240
|
+
Delete cache
|
241
|
+
|
242
|
+
Args:
|
243
|
+
key: Cache key name
|
244
|
+
|
245
|
+
Returns:
|
246
|
+
bool: Whether deletion was successful
|
247
|
+
"""
|
248
|
+
try:
|
249
|
+
file_path = self._get_file_path(key)
|
250
|
+
|
251
|
+
# Delete data file
|
252
|
+
if file_path.exists():
|
253
|
+
file_path.unlink()
|
254
|
+
|
255
|
+
# Delete metadata
|
256
|
+
if key in self.metadata:
|
257
|
+
del self.metadata[key]
|
258
|
+
self._save_metadata()
|
259
|
+
|
260
|
+
return True
|
261
|
+
|
262
|
+
except Exception as e:
|
263
|
+
print(f"Failed to delete cache: {e}")
|
264
|
+
return False
|
265
|
+
|
266
|
+
def clean_expired(self) -> int:
|
267
|
+
"""
|
268
|
+
Clean all expired caches
|
269
|
+
|
270
|
+
Returns:
|
271
|
+
int: Number of cleaned caches
|
272
|
+
"""
|
273
|
+
expired_keys = []
|
274
|
+
|
275
|
+
for key in list(self.metadata.keys()):
|
276
|
+
if self._is_expired(key):
|
277
|
+
expired_keys.append(key)
|
278
|
+
|
279
|
+
cleaned_count = 0
|
280
|
+
for key in expired_keys:
|
281
|
+
if self.delete(key):
|
282
|
+
cleaned_count += 1
|
283
|
+
|
284
|
+
return cleaned_count
|
285
|
+
|
286
|
+
def get_info(self, key: str) -> Optional[Dict[str, Any]]:
|
287
|
+
"""
|
288
|
+
Get cache information
|
289
|
+
|
290
|
+
Args:
|
291
|
+
key: Cache key name
|
292
|
+
|
293
|
+
Returns:
|
294
|
+
Optional[Dict]: Cache information including creation time, update time, expiration time, etc.
|
295
|
+
"""
|
296
|
+
if key not in self.metadata:
|
297
|
+
return None
|
298
|
+
|
299
|
+
info = self.metadata[key].copy()
|
300
|
+
info['key'] = key
|
301
|
+
info['is_expired'] = self._is_expired(key)
|
302
|
+
info['file_path'] = str(self._get_file_path(key))
|
303
|
+
|
304
|
+
return info
|
305
|
+
|
306
|
+
def list_all(self, include_expired: bool = False) -> Dict[str, Dict[str, Any]]:
|
307
|
+
"""
|
308
|
+
List all caches
|
309
|
+
|
310
|
+
Args:
|
311
|
+
include_expired: Whether to include expired caches
|
312
|
+
|
313
|
+
Returns:
|
314
|
+
Dict: Information of all caches
|
315
|
+
"""
|
316
|
+
result = {}
|
317
|
+
|
318
|
+
for key in self.metadata:
|
319
|
+
if not include_expired and self._is_expired(key):
|
320
|
+
continue
|
321
|
+
|
322
|
+
info = self.get_info(key)
|
323
|
+
if info:
|
324
|
+
result[key] = info
|
325
|
+
|
326
|
+
return result
|
327
|
+
|
328
|
+
def get_cache_stats(self) -> Dict[str, Any]:
|
329
|
+
"""
|
330
|
+
Get cache statistics
|
331
|
+
|
332
|
+
Returns:
|
333
|
+
Dict: Cache statistics information
|
334
|
+
"""
|
335
|
+
total_count = len(self.metadata)
|
336
|
+
expired_count = sum(1 for key in self.metadata if self._is_expired(key))
|
337
|
+
active_count = total_count - expired_count
|
338
|
+
|
339
|
+
total_size = 0
|
340
|
+
for key in self.metadata:
|
341
|
+
file_path = self._get_file_path(key)
|
342
|
+
if file_path.exists():
|
343
|
+
total_size += file_path.stat().st_size
|
344
|
+
|
345
|
+
return {
|
346
|
+
'total_count': total_count,
|
347
|
+
'active_count': active_count,
|
348
|
+
'expired_count': expired_count,
|
349
|
+
'total_size_bytes': total_size,
|
350
|
+
'total_size_mb': round(total_size / (1024 * 1024), 2),
|
351
|
+
'cache_dir': str(self.cache_dir)
|
352
|
+
}
|
353
|
+
|
354
|
+
def clear_all(self) -> bool:
|
355
|
+
"""
|
356
|
+
Clear all caches
|
357
|
+
|
358
|
+
Returns:
|
359
|
+
bool: Whether clearing was successful
|
360
|
+
"""
|
361
|
+
try:
|
362
|
+
# Delete all data files (CSV, JSON, and other supported formats)
|
363
|
+
for data_file in self.cache_dir.glob("*"):
|
364
|
+
if data_file.is_file() and data_file.name != "metadata.json":
|
365
|
+
data_file.unlink()
|
366
|
+
|
367
|
+
# Clear metadata
|
368
|
+
self.metadata = {}
|
369
|
+
self._save_metadata()
|
370
|
+
|
371
|
+
return True
|
372
|
+
|
373
|
+
except Exception as e:
|
374
|
+
print(f"Failed to clear cache: {e}")
|
375
|
+
return False
|
@@ -0,0 +1,44 @@
|
|
1
|
+
from abc import ABC
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import List, Iterable
|
4
|
+
|
5
|
+
from pydantic import BaseModel, Field
|
6
|
+
|
7
|
+
from flowllm.embedding_model.base_embedding_model import BaseEmbeddingModel
|
8
|
+
from flowllm.schema.vector_node import VectorNode
|
9
|
+
|
10
|
+
|
11
|
+
class BaseVectorStore(BaseModel, ABC):
|
12
|
+
embedding_model: BaseEmbeddingModel | None = Field(default=None)
|
13
|
+
batch_size: int = Field(default=1024)
|
14
|
+
|
15
|
+
def exist_workspace(self, workspace_id: str, **kwargs) -> bool:
|
16
|
+
raise NotImplementedError
|
17
|
+
|
18
|
+
def delete_workspace(self, workspace_id: str, **kwargs):
|
19
|
+
raise NotImplementedError
|
20
|
+
|
21
|
+
def create_workspace(self, workspace_id: str, **kwargs):
|
22
|
+
raise NotImplementedError
|
23
|
+
|
24
|
+
def _iter_workspace_nodes(self, workspace_id: str, **kwargs) -> Iterable[VectorNode]:
|
25
|
+
raise NotImplementedError
|
26
|
+
|
27
|
+
def dump_workspace(self, workspace_id: str, path: str | Path = "", callback_fn=None, **kwargs):
|
28
|
+
raise NotImplementedError
|
29
|
+
|
30
|
+
def load_workspace(self, workspace_id: str, path: str | Path = "", nodes: List[VectorNode] = None, callback_fn=None,
|
31
|
+
**kwargs):
|
32
|
+
raise NotImplementedError
|
33
|
+
|
34
|
+
def copy_workspace(self, src_workspace_id: str, dest_workspace_id: str, **kwargs):
|
35
|
+
raise NotImplementedError
|
36
|
+
|
37
|
+
def search(self, query: str, workspace_id: str, top_k: int = 1, **kwargs) -> List[VectorNode]:
|
38
|
+
raise NotImplementedError
|
39
|
+
|
40
|
+
def insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, **kwargs):
|
41
|
+
raise NotImplementedError
|
42
|
+
|
43
|
+
def delete(self, node_ids: str | List[str], workspace_id: str, **kwargs):
|
44
|
+
raise NotImplementedError
|
@@ -6,17 +6,16 @@ from chromadb.config import Settings
|
|
6
6
|
from loguru import logger
|
7
7
|
from pydantic import Field, PrivateAttr, model_validator
|
8
8
|
|
9
|
-
from
|
10
|
-
from
|
11
|
-
from
|
12
|
-
from llmflow.vector_store.base_vector_store import BaseVectorStore
|
9
|
+
from flowllm.context.service_context import C
|
10
|
+
from flowllm.schema.vector_node import VectorNode
|
11
|
+
from flowllm.storage.vector_store.local_vector_store import LocalVectorStore
|
13
12
|
|
14
13
|
|
15
|
-
@
|
16
|
-
class ChromaVectorStore(
|
14
|
+
@C.register_vector_store("chroma")
|
15
|
+
class ChromaVectorStore(LocalVectorStore):
|
17
16
|
store_dir: str = Field(default="./chroma_vector_store")
|
18
17
|
collections: dict = Field(default_factory=dict)
|
19
|
-
_client: chromadb.
|
18
|
+
_client: chromadb.ClientAPI = PrivateAttr()
|
20
19
|
|
21
20
|
@model_validator(mode="after")
|
22
21
|
def init_client(self):
|
@@ -97,8 +96,10 @@ class ChromaVectorStore(BaseVectorStore):
|
|
97
96
|
|
98
97
|
|
99
98
|
def main():
|
100
|
-
from
|
101
|
-
|
99
|
+
from flowllm.utils.common_utils import load_env
|
100
|
+
from flowllm.embedding_model import OpenAICompatibleEmbeddingModel
|
101
|
+
|
102
|
+
load_env()
|
102
103
|
|
103
104
|
embedding_model = OpenAICompatibleEmbeddingModel(dimensions=64, model_name="text-embedding-v4")
|
104
105
|
workspace_id = "chroma_test_index"
|
@@ -185,4 +186,4 @@ def main():
|
|
185
186
|
|
186
187
|
if __name__ == "__main__":
|
187
188
|
main()
|
188
|
-
# launch with: python -m
|
189
|
+
# launch with: python -m flowllm.storage.chroma_vector_store
|
@@ -6,15 +6,14 @@ from elasticsearch.helpers import bulk
|
|
6
6
|
from loguru import logger
|
7
7
|
from pydantic import Field, PrivateAttr, model_validator
|
8
8
|
|
9
|
-
from
|
10
|
-
from
|
11
|
-
from
|
12
|
-
from llmflow.vector_store.base_vector_store import BaseVectorStore
|
9
|
+
from flowllm.context.service_context import C
|
10
|
+
from flowllm.schema.vector_node import VectorNode
|
11
|
+
from flowllm.storage.vector_store.local_vector_store import LocalVectorStore
|
13
12
|
|
14
13
|
|
15
|
-
@
|
16
|
-
class EsVectorStore(
|
17
|
-
hosts: str | List[str] = Field(default_factory=lambda: os.getenv("
|
14
|
+
@C.register_vector_store("elasticsearch")
|
15
|
+
class EsVectorStore(LocalVectorStore):
|
16
|
+
hosts: str | List[str] = Field(default_factory=lambda: os.getenv("FLOW_ES_HOSTS", "http://localhost:9200"))
|
18
17
|
basic_auth: str | Tuple[str, str] | None = Field(default=None)
|
19
18
|
retrieve_filters: List[dict] = []
|
20
19
|
_client: Elasticsearch = PrivateAttr()
|
@@ -24,6 +23,7 @@ class EsVectorStore(BaseVectorStore):
|
|
24
23
|
if isinstance(self.hosts, str):
|
25
24
|
self.hosts = [self.hosts]
|
26
25
|
self._client = Elasticsearch(hosts=self.hosts, basic_auth=self.basic_auth)
|
26
|
+
logger.info(f"Elasticsearch client initialized with hosts: {self.hosts}")
|
27
27
|
return self
|
28
28
|
|
29
29
|
def exist_workspace(self, workspace_id: str, **kwargs) -> bool:
|
@@ -160,10 +160,11 @@ class EsVectorStore(BaseVectorStore):
|
|
160
160
|
if refresh:
|
161
161
|
self.refresh(workspace_id=workspace_id)
|
162
162
|
|
163
|
-
|
164
163
|
def main():
|
165
|
-
from
|
166
|
-
|
164
|
+
from flowllm.utils.common_utils import load_env
|
165
|
+
from flowllm.embedding_model import OpenAICompatibleEmbeddingModel
|
166
|
+
|
167
|
+
load_env()
|
167
168
|
|
168
169
|
embedding_model = OpenAICompatibleEmbeddingModel(dimensions=64, model_name="text-embedding-v4")
|
169
170
|
workspace_id = "rag_nodes_index"
|
@@ -224,4 +225,3 @@ def main():
|
|
224
225
|
|
225
226
|
if __name__ == "__main__":
|
226
227
|
main()
|
227
|
-
# launch with: python -m llmflow.storage.es_vector_store
|