flowllm 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. flowllm/__init__.py +21 -0
  2. flowllm/app.py +15 -0
  3. flowllm/client/__init__.py +25 -0
  4. flowllm/client/async_http_client.py +81 -0
  5. flowllm/client/http_client.py +81 -0
  6. flowllm/client/mcp_client.py +133 -0
  7. flowllm/client/sync_mcp_client.py +116 -0
  8. flowllm/config/__init__.py +1 -0
  9. flowllm/config/default.yaml +77 -0
  10. flowllm/config/empty.yaml +37 -0
  11. flowllm/config/pydantic_config_parser.py +242 -0
  12. flowllm/context/base_context.py +79 -0
  13. flowllm/context/flow_context.py +16 -0
  14. llmflow/op/prompt_mixin.py → flowllm/context/prompt_handler.py +25 -14
  15. flowllm/context/registry.py +30 -0
  16. flowllm/context/service_context.py +147 -0
  17. flowllm/embedding_model/__init__.py +1 -0
  18. {llmflow → flowllm}/embedding_model/base_embedding_model.py +93 -2
  19. {llmflow → flowllm}/embedding_model/openai_compatible_embedding_model.py +71 -13
  20. flowllm/flow/__init__.py +1 -0
  21. flowllm/flow/base_flow.py +72 -0
  22. flowllm/flow/base_tool_flow.py +15 -0
  23. flowllm/flow/gallery/__init__.py +8 -0
  24. flowllm/flow/gallery/cmd_flow.py +11 -0
  25. flowllm/flow/gallery/code_tool_flow.py +30 -0
  26. flowllm/flow/gallery/dashscope_search_tool_flow.py +34 -0
  27. flowllm/flow/gallery/deepsearch_tool_flow.py +39 -0
  28. flowllm/flow/gallery/expression_tool_flow.py +18 -0
  29. flowllm/flow/gallery/mock_tool_flow.py +67 -0
  30. flowllm/flow/gallery/tavily_search_tool_flow.py +30 -0
  31. flowllm/flow/gallery/terminate_tool_flow.py +30 -0
  32. flowllm/flow/parser/expression_parser.py +171 -0
  33. flowllm/llm/__init__.py +2 -0
  34. {llmflow → flowllm}/llm/base_llm.py +100 -18
  35. flowllm/llm/litellm_llm.py +455 -0
  36. flowllm/llm/openai_compatible_llm.py +439 -0
  37. flowllm/op/__init__.py +11 -0
  38. llmflow/op/react/react_v1_op.py → flowllm/op/agent/react_op.py +17 -22
  39. flowllm/op/akshare/__init__.py +3 -0
  40. flowllm/op/akshare/get_ak_a_code_op.py +108 -0
  41. flowllm/op/akshare/get_ak_a_code_prompt.yaml +21 -0
  42. flowllm/op/akshare/get_ak_a_info_op.py +140 -0
  43. flowllm/op/base_llm_op.py +64 -0
  44. flowllm/op/base_op.py +148 -0
  45. flowllm/op/base_ray_op.py +313 -0
  46. flowllm/op/code/__init__.py +1 -0
  47. flowllm/op/code/execute_code_op.py +42 -0
  48. flowllm/op/gallery/__init__.py +2 -0
  49. flowllm/op/gallery/mock_op.py +42 -0
  50. flowllm/op/gallery/terminate_op.py +29 -0
  51. flowllm/op/parallel_op.py +23 -0
  52. flowllm/op/search/__init__.py +3 -0
  53. flowllm/op/search/dashscope_deep_research_op.py +260 -0
  54. flowllm/op/search/dashscope_search_op.py +179 -0
  55. flowllm/op/search/dashscope_search_prompt.yaml +13 -0
  56. flowllm/op/search/tavily_search_op.py +102 -0
  57. flowllm/op/sequential_op.py +21 -0
  58. flowllm/schema/flow_request.py +12 -0
  59. flowllm/schema/flow_response.py +12 -0
  60. flowllm/schema/message.py +35 -0
  61. flowllm/schema/service_config.py +72 -0
  62. flowllm/schema/tool_call.py +118 -0
  63. {llmflow → flowllm}/schema/vector_node.py +1 -0
  64. flowllm/service/__init__.py +3 -0
  65. flowllm/service/base_service.py +68 -0
  66. flowllm/service/cmd_service.py +15 -0
  67. flowllm/service/http_service.py +79 -0
  68. flowllm/service/mcp_service.py +47 -0
  69. flowllm/storage/__init__.py +1 -0
  70. flowllm/storage/cache/__init__.py +1 -0
  71. flowllm/storage/cache/cache_data_handler.py +104 -0
  72. flowllm/storage/cache/data_cache.py +375 -0
  73. flowllm/storage/vector_store/__init__.py +3 -0
  74. flowllm/storage/vector_store/base_vector_store.py +44 -0
  75. {llmflow → flowllm/storage}/vector_store/chroma_vector_store.py +11 -10
  76. {llmflow → flowllm/storage}/vector_store/es_vector_store.py +11 -11
  77. llmflow/vector_store/file_vector_store.py → flowllm/storage/vector_store/local_vector_store.py +110 -11
  78. flowllm/utils/common_utils.py +52 -0
  79. flowllm/utils/fetch_url.py +117 -0
  80. flowllm/utils/llm_utils.py +28 -0
  81. flowllm/utils/ridge_v2.py +54 -0
  82. {llmflow → flowllm}/utils/timer.py +5 -4
  83. {flowllm-0.1.0.dist-info → flowllm-0.1.2.dist-info}/METADATA +45 -388
  84. flowllm-0.1.2.dist-info/RECORD +99 -0
  85. flowllm-0.1.2.dist-info/entry_points.txt +2 -0
  86. {flowllm-0.1.0.dist-info → flowllm-0.1.2.dist-info}/licenses/LICENSE +1 -1
  87. flowllm-0.1.2.dist-info/top_level.txt +1 -0
  88. flowllm-0.1.0.dist-info/RECORD +0 -66
  89. flowllm-0.1.0.dist-info/entry_points.txt +0 -3
  90. flowllm-0.1.0.dist-info/top_level.txt +0 -1
  91. llmflow/app.py +0 -53
  92. llmflow/config/config_parser.py +0 -80
  93. llmflow/config/mock_config.yaml +0 -58
  94. llmflow/embedding_model/__init__.py +0 -5
  95. llmflow/enumeration/agent_state.py +0 -8
  96. llmflow/llm/__init__.py +0 -5
  97. llmflow/llm/openai_compatible_llm.py +0 -283
  98. llmflow/mcp_server.py +0 -110
  99. llmflow/op/__init__.py +0 -10
  100. llmflow/op/base_op.py +0 -125
  101. llmflow/op/mock_op.py +0 -40
  102. llmflow/op/vector_store/__init__.py +0 -13
  103. llmflow/op/vector_store/recall_vector_store_op.py +0 -48
  104. llmflow/op/vector_store/update_vector_store_op.py +0 -28
  105. llmflow/op/vector_store/vector_store_action_op.py +0 -46
  106. llmflow/pipeline/pipeline.py +0 -94
  107. llmflow/pipeline/pipeline_context.py +0 -37
  108. llmflow/schema/app_config.py +0 -69
  109. llmflow/schema/experience.py +0 -144
  110. llmflow/schema/message.py +0 -68
  111. llmflow/schema/request.py +0 -32
  112. llmflow/schema/response.py +0 -29
  113. llmflow/service/__init__.py +0 -0
  114. llmflow/service/llmflow_service.py +0 -96
  115. llmflow/tool/__init__.py +0 -9
  116. llmflow/tool/base_tool.py +0 -80
  117. llmflow/tool/code_tool.py +0 -43
  118. llmflow/tool/dashscope_search_tool.py +0 -162
  119. llmflow/tool/mcp_tool.py +0 -77
  120. llmflow/tool/tavily_search_tool.py +0 -109
  121. llmflow/tool/terminate_tool.py +0 -23
  122. llmflow/utils/__init__.py +0 -0
  123. llmflow/utils/common_utils.py +0 -17
  124. llmflow/utils/file_handler.py +0 -25
  125. llmflow/utils/http_client.py +0 -156
  126. llmflow/utils/op_utils.py +0 -102
  127. llmflow/utils/registry.py +0 -33
  128. llmflow/vector_store/__init__.py +0 -7
  129. llmflow/vector_store/base_vector_store.py +0 -136
  130. {llmflow → flowllm/context}/__init__.py +0 -0
  131. {llmflow/config → flowllm/enumeration}/__init__.py +0 -0
  132. {llmflow → flowllm}/enumeration/chunk_enum.py +0 -0
  133. {llmflow → flowllm}/enumeration/http_enum.py +0 -0
  134. {llmflow → flowllm}/enumeration/role.py +0 -0
  135. {llmflow/enumeration → flowllm/flow/parser}/__init__.py +0 -0
  136. {llmflow/op/react → flowllm/op/agent}/__init__.py +0 -0
  137. /llmflow/op/react/react_v1_prompt.yaml → /flowllm/op/agent/react_prompt.yaml +0 -0
  138. {llmflow/pipeline → flowllm/schema}/__init__.py +0 -0
  139. {llmflow/schema → flowllm/utils}/__init__.py +0 -0
  140. {llmflow → flowllm}/utils/singleton.py +0 -0
  141. {flowllm-0.1.0.dist-info → flowllm-0.1.2.dist-info}/WHEEL +0 -0
@@ -0,0 +1,104 @@
1
+ import json
2
+ from abc import ABC, abstractmethod
3
+ from pathlib import Path
4
+ from typing import Dict, Any
5
+
6
+ import pandas as pd
7
+
8
+
9
+ class CacheDataHandler(ABC):
10
+ """Abstract base class for data type handlers"""
11
+
12
+ @abstractmethod
13
+ def save(self, data: Any, file_path: Path, **kwargs) -> Dict[str, Any]:
14
+ """
15
+ Save data to file and return metadata
16
+
17
+ Args:
18
+ data: Data to save
19
+ file_path: File path to save to
20
+ **kwargs: Additional parameters
21
+
22
+ Returns:
23
+ Dict containing metadata about the saved data
24
+ """
25
+ pass
26
+
27
+ @abstractmethod
28
+ def load(self, file_path: Path, **kwargs) -> Any:
29
+ """
30
+ Load data from file
31
+
32
+ Args:
33
+ file_path: File path to load from
34
+ **kwargs: Additional parameters
35
+
36
+ Returns:
37
+ Loaded data
38
+ """
39
+ pass
40
+
41
+ @abstractmethod
42
+ def get_file_extension(self) -> str:
43
+ """Get the file extension for this data type"""
44
+ pass
45
+
46
+
47
+ class DataFrameHandler(CacheDataHandler):
48
+ """Handler for pandas DataFrame data type"""
49
+
50
+ def save(self, data: pd.DataFrame, file_path: Path, **kwargs) -> Dict[str, Any]:
51
+ """Save DataFrame as CSV"""
52
+ csv_params = {
53
+ "index": False,
54
+ "encoding": "utf-8"
55
+ }
56
+ csv_params.update(kwargs)
57
+
58
+ data.to_csv(file_path, **csv_params)
59
+
60
+ return {
61
+ 'row_count': len(data),
62
+ 'column_count': len(data.columns),
63
+ 'file_size': file_path.stat().st_size
64
+ }
65
+
66
+ def load(self, file_path: Path, **kwargs) -> pd.DataFrame:
67
+ """Load DataFrame from CSV"""
68
+ csv_params = {
69
+ 'encoding': 'utf-8'
70
+ }
71
+ csv_params.update(kwargs)
72
+
73
+ return pd.read_csv(file_path, **csv_params)
74
+
75
+ def get_file_extension(self) -> str:
76
+ return ".csv"
77
+
78
+
79
+ class DictHandler(CacheDataHandler):
80
+ """Handler for dict data type"""
81
+
82
+ def save(self, data: dict, file_path: Path, **kwargs) -> Dict[str, Any]:
83
+ """Save dict as JSON"""
84
+ json_params = {
85
+ "ensure_ascii": False,
86
+ "indent": 2
87
+ }
88
+ json_params.update(kwargs)
89
+
90
+ with open(file_path, 'w', encoding='utf-8') as f:
91
+ json.dump(data, f, **json_params)
92
+
93
+ return {
94
+ 'key_count': len(data),
95
+ 'file_size': file_path.stat().st_size
96
+ }
97
+
98
+ def load(self, file_path: Path, **kwargs) -> dict:
99
+ """Load dict from JSON"""
100
+ with open(file_path, 'r', encoding='utf-8') as f:
101
+ return json.load(f)
102
+
103
+ def get_file_extension(self) -> str:
104
+ return ".json"
@@ -0,0 +1,375 @@
1
+ """
2
+ DataCache utility that supports multiple data types with local storage and data expiration functionality
3
+ """
4
+
5
+ import json
6
+ from datetime import datetime, timedelta
7
+ from pathlib import Path
8
+ from typing import Optional, Dict, Any, Union, Type
9
+
10
+ import pandas as pd
11
+
12
+ from flowllm.storage.cache.cache_data_handler import CacheDataHandler, DataFrameHandler, DictHandler
13
+
14
+
15
+ class DataCache:
16
+ """
17
+ Generic data cache utility class
18
+
19
+ Features:
20
+ - Support for multiple data types (DataFrame, dict, and extensible for others)
21
+ - Support for data expiration time settings
22
+ - Automatic cleanup of expired data
23
+ - Recording and managing update timestamps
24
+ - Type-specific storage formats (CSV for DataFrame, JSON for dict)
25
+ """
26
+
27
+ def __init__(self, cache_dir: str = "cache"):
28
+ self.cache_dir = Path(cache_dir)
29
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
30
+ self.metadata_file = self.cache_dir / "metadata.json"
31
+ self.metadata = {}
32
+
33
+ # Register default handlers
34
+ self.handlers: Dict[Type, CacheDataHandler] = {
35
+ pd.DataFrame: DataFrameHandler(),
36
+ dict: DictHandler()
37
+ }
38
+
39
+ self._load_metadata()
40
+
41
+ def register_handler(self, data_type: Type, handler: CacheDataHandler):
42
+ """
43
+ Register a custom data handler for a specific data type
44
+
45
+ Args:
46
+ data_type: The data type to handle
47
+ handler: The handler instance
48
+ """
49
+ self.handlers[data_type] = handler
50
+
51
+ def _get_handler(self, data_type: Type) -> CacheDataHandler:
52
+ """Get the appropriate handler for a data type"""
53
+ if data_type in self.handlers:
54
+ return self.handlers[data_type]
55
+
56
+ # Try to find a handler for parent classes
57
+ for registered_type, handler in self.handlers.items():
58
+ if issubclass(data_type, registered_type):
59
+ return handler
60
+
61
+ raise ValueError(f"No handler registered for data type: {data_type}")
62
+
63
+ def _load_metadata(self):
64
+ """Load metadata"""
65
+ if self.metadata_file.exists():
66
+ with open(self.metadata_file) as f:
67
+ self.metadata = json.load(f)
68
+
69
+ def _save_metadata(self):
70
+ """Save metadata"""
71
+ with open(self.metadata_file, "w") as f:
72
+ json.dump(self.metadata, f, ensure_ascii=False, indent=2)
73
+
74
+ def _get_file_path(self, key: str, data_type: Type = None) -> Path:
75
+ """Get data file path with appropriate extension"""
76
+ if data_type is None:
77
+ # Try to get extension from metadata
78
+ if key in self.metadata and 'data_type' in self.metadata[key]:
79
+ stored_type_name = self.metadata[key]['data_type']
80
+ if stored_type_name == 'DataFrame':
81
+ extension = '.csv'
82
+ elif stored_type_name == 'dict':
83
+ extension = '.json'
84
+ elif stored_type_name == 'str':
85
+ extension = '.txt'
86
+ else:
87
+ # Try to find extension from registered handlers
88
+ extension = '.dat' # Default extension
89
+ for registered_type, handler in self.handlers.items():
90
+ if registered_type.__name__ == stored_type_name:
91
+ extension = handler.get_file_extension()
92
+ break
93
+ else:
94
+ extension = '.dat' # Default extension
95
+ else:
96
+ try:
97
+ handler = self._get_handler(data_type)
98
+ extension = handler.get_file_extension()
99
+ except ValueError:
100
+ extension = '.dat' # Default extension
101
+
102
+ return self.cache_dir / f"{key}{extension}"
103
+
104
+ def _is_expired(self, key: str) -> bool:
105
+ """Check if data is expired"""
106
+ if key not in self.metadata:
107
+ return True
108
+
109
+ expire_time_str = self.metadata[key].get('expire_time')
110
+ if not expire_time_str:
111
+ return False # No expiration time set, never expires
112
+
113
+ expire_time = datetime.fromisoformat(expire_time_str)
114
+ return datetime.now() > expire_time
115
+
116
+ def save(self, key: str, data: Union[pd.DataFrame, dict, Any], expire_hours: Optional[float] = None,
117
+ **handler_kwargs) -> bool:
118
+ """
119
+ Save data to cache
120
+
121
+ Args:
122
+ key: Cache key name
123
+ data: Data to save (DataFrame, dict, or other supported types)
124
+ expire_hours: Expiration time in hours, None means never expires
125
+ **handler_kwargs: Additional parameters passed to the data handler
126
+
127
+ Returns:
128
+ bool: Whether save was successful
129
+ """
130
+ try:
131
+ data_type = type(data)
132
+ handler = self._get_handler(data_type)
133
+ file_path = self._get_file_path(key, data_type)
134
+
135
+ # Save data using appropriate handler
136
+ handler_metadata = handler.save(data, file_path, **handler_kwargs)
137
+
138
+ # Update metadata
139
+ current_time = datetime.now()
140
+ self.metadata[key] = {
141
+ 'created_time': current_time.isoformat(),
142
+ 'updated_time': current_time.isoformat(),
143
+ 'expire_time': (current_time + timedelta(hours=expire_hours)).isoformat() if expire_hours else None,
144
+ 'data_type': data_type.__name__,
145
+ **handler_metadata
146
+ }
147
+
148
+ self._save_metadata()
149
+ return True
150
+
151
+ except Exception as e:
152
+ print(f"Failed to save data: {e}")
153
+ return False
154
+
155
+ def load(self, key: str, auto_clean_expired: bool = True, **handler_kwargs) -> Optional[Any]:
156
+ """
157
+ Load data from cache
158
+
159
+ Args:
160
+ key: Cache key name
161
+ auto_clean_expired: Whether to automatically clean expired data
162
+ **handler_kwargs: Additional parameters passed to the data handler
163
+
164
+ Returns:
165
+ Optional[Any]: Loaded data, returns None if not exists or expired
166
+ """
167
+ try:
168
+ # Check if expired
169
+ if self._is_expired(key):
170
+ if auto_clean_expired:
171
+ self.delete(key)
172
+ print(f"Cache '{key}' has expired and was automatically cleaned")
173
+ return None
174
+
175
+ file_path = self._get_file_path(key)
176
+ if not file_path.exists():
177
+ return None
178
+
179
+ # Get data type from metadata
180
+ if key not in self.metadata or 'data_type' not in self.metadata[key]:
181
+ print(f"No data type information found for key '{key}'")
182
+ return None
183
+
184
+ data_type_name = self.metadata[key]['data_type']
185
+
186
+ # Map type name back to actual type
187
+ if data_type_name == 'DataFrame':
188
+ data_type = pd.DataFrame
189
+ elif data_type_name == 'dict':
190
+ data_type = dict
191
+ elif data_type_name == 'str':
192
+ data_type = str
193
+ else:
194
+ # For other custom types, try to find a handler by checking registered types
195
+ data_type = None
196
+ for registered_type in self.handlers.keys():
197
+ if registered_type.__name__ == data_type_name:
198
+ data_type = registered_type
199
+ break
200
+
201
+ if data_type is None:
202
+ print(f"Unknown data type: {data_type_name}")
203
+ return None
204
+
205
+ handler = self._get_handler(data_type)
206
+
207
+ # Load data using appropriate handler
208
+ data = handler.load(file_path, **handler_kwargs)
209
+
210
+ # Update last access time
211
+ if key in self.metadata:
212
+ self.metadata[key]['last_accessed'] = datetime.now().isoformat()
213
+ self._save_metadata()
214
+
215
+ return data
216
+
217
+ except Exception as e:
218
+ print(f"Failed to load data: {e}")
219
+ return None
220
+
221
+ def exists(self, key: str, check_expired: bool = True) -> bool:
222
+ """
223
+ Check if cache exists
224
+
225
+ Args:
226
+ key: Cache key name
227
+ check_expired: Whether to check expiration status
228
+
229
+ Returns:
230
+ bool: Whether cache exists and is not expired
231
+ """
232
+ if check_expired and self._is_expired(key):
233
+ return False
234
+
235
+ file_path = self._get_file_path(key)
236
+ return file_path.exists() and key in self.metadata
237
+
238
+ def delete(self, key: str) -> bool:
239
+ """
240
+ Delete cache
241
+
242
+ Args:
243
+ key: Cache key name
244
+
245
+ Returns:
246
+ bool: Whether deletion was successful
247
+ """
248
+ try:
249
+ file_path = self._get_file_path(key)
250
+
251
+ # Delete data file
252
+ if file_path.exists():
253
+ file_path.unlink()
254
+
255
+ # Delete metadata
256
+ if key in self.metadata:
257
+ del self.metadata[key]
258
+ self._save_metadata()
259
+
260
+ return True
261
+
262
+ except Exception as e:
263
+ print(f"Failed to delete cache: {e}")
264
+ return False
265
+
266
+ def clean_expired(self) -> int:
267
+ """
268
+ Clean all expired caches
269
+
270
+ Returns:
271
+ int: Number of cleaned caches
272
+ """
273
+ expired_keys = []
274
+
275
+ for key in list(self.metadata.keys()):
276
+ if self._is_expired(key):
277
+ expired_keys.append(key)
278
+
279
+ cleaned_count = 0
280
+ for key in expired_keys:
281
+ if self.delete(key):
282
+ cleaned_count += 1
283
+
284
+ return cleaned_count
285
+
286
+ def get_info(self, key: str) -> Optional[Dict[str, Any]]:
287
+ """
288
+ Get cache information
289
+
290
+ Args:
291
+ key: Cache key name
292
+
293
+ Returns:
294
+ Optional[Dict]: Cache information including creation time, update time, expiration time, etc.
295
+ """
296
+ if key not in self.metadata:
297
+ return None
298
+
299
+ info = self.metadata[key].copy()
300
+ info['key'] = key
301
+ info['is_expired'] = self._is_expired(key)
302
+ info['file_path'] = str(self._get_file_path(key))
303
+
304
+ return info
305
+
306
+ def list_all(self, include_expired: bool = False) -> Dict[str, Dict[str, Any]]:
307
+ """
308
+ List all caches
309
+
310
+ Args:
311
+ include_expired: Whether to include expired caches
312
+
313
+ Returns:
314
+ Dict: Information of all caches
315
+ """
316
+ result = {}
317
+
318
+ for key in self.metadata:
319
+ if not include_expired and self._is_expired(key):
320
+ continue
321
+
322
+ info = self.get_info(key)
323
+ if info:
324
+ result[key] = info
325
+
326
+ return result
327
+
328
+ def get_cache_stats(self) -> Dict[str, Any]:
329
+ """
330
+ Get cache statistics
331
+
332
+ Returns:
333
+ Dict: Cache statistics information
334
+ """
335
+ total_count = len(self.metadata)
336
+ expired_count = sum(1 for key in self.metadata if self._is_expired(key))
337
+ active_count = total_count - expired_count
338
+
339
+ total_size = 0
340
+ for key in self.metadata:
341
+ file_path = self._get_file_path(key)
342
+ if file_path.exists():
343
+ total_size += file_path.stat().st_size
344
+
345
+ return {
346
+ 'total_count': total_count,
347
+ 'active_count': active_count,
348
+ 'expired_count': expired_count,
349
+ 'total_size_bytes': total_size,
350
+ 'total_size_mb': round(total_size / (1024 * 1024), 2),
351
+ 'cache_dir': str(self.cache_dir)
352
+ }
353
+
354
+ def clear_all(self) -> bool:
355
+ """
356
+ Clear all caches
357
+
358
+ Returns:
359
+ bool: Whether clearing was successful
360
+ """
361
+ try:
362
+ # Delete all data files (CSV, JSON, and other supported formats)
363
+ for data_file in self.cache_dir.glob("*"):
364
+ if data_file.is_file() and data_file.name != "metadata.json":
365
+ data_file.unlink()
366
+
367
+ # Clear metadata
368
+ self.metadata = {}
369
+ self._save_metadata()
370
+
371
+ return True
372
+
373
+ except Exception as e:
374
+ print(f"Failed to clear cache: {e}")
375
+ return False
@@ -0,0 +1,3 @@
1
+ from .chroma_vector_store import ChromaVectorStore
2
+ from .es_vector_store import EsVectorStore
3
+ from .local_vector_store import LocalVectorStore
@@ -0,0 +1,44 @@
1
+ from abc import ABC
2
+ from pathlib import Path
3
+ from typing import List, Iterable
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+ from flowllm.embedding_model.base_embedding_model import BaseEmbeddingModel
8
+ from flowllm.schema.vector_node import VectorNode
9
+
10
+
11
+ class BaseVectorStore(BaseModel, ABC):
12
+ embedding_model: BaseEmbeddingModel | None = Field(default=None)
13
+ batch_size: int = Field(default=1024)
14
+
15
+ def exist_workspace(self, workspace_id: str, **kwargs) -> bool:
16
+ raise NotImplementedError
17
+
18
+ def delete_workspace(self, workspace_id: str, **kwargs):
19
+ raise NotImplementedError
20
+
21
+ def create_workspace(self, workspace_id: str, **kwargs):
22
+ raise NotImplementedError
23
+
24
+ def _iter_workspace_nodes(self, workspace_id: str, **kwargs) -> Iterable[VectorNode]:
25
+ raise NotImplementedError
26
+
27
+ def dump_workspace(self, workspace_id: str, path: str | Path = "", callback_fn=None, **kwargs):
28
+ raise NotImplementedError
29
+
30
+ def load_workspace(self, workspace_id: str, path: str | Path = "", nodes: List[VectorNode] = None, callback_fn=None,
31
+ **kwargs):
32
+ raise NotImplementedError
33
+
34
+ def copy_workspace(self, src_workspace_id: str, dest_workspace_id: str, **kwargs):
35
+ raise NotImplementedError
36
+
37
+ def search(self, query: str, workspace_id: str, top_k: int = 1, **kwargs) -> List[VectorNode]:
38
+ raise NotImplementedError
39
+
40
+ def insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, **kwargs):
41
+ raise NotImplementedError
42
+
43
+ def delete(self, node_ids: str | List[str], workspace_id: str, **kwargs):
44
+ raise NotImplementedError
@@ -6,17 +6,16 @@ from chromadb.config import Settings
6
6
  from loguru import logger
7
7
  from pydantic import Field, PrivateAttr, model_validator
8
8
 
9
- from llmflow.embedding_model.openai_compatible_embedding_model import OpenAICompatibleEmbeddingModel
10
- from llmflow.schema.vector_node import VectorNode
11
- from llmflow.vector_store import VECTOR_STORE_REGISTRY
12
- from llmflow.vector_store.base_vector_store import BaseVectorStore
9
+ from flowllm.context.service_context import C
10
+ from flowllm.schema.vector_node import VectorNode
11
+ from flowllm.storage.vector_store.local_vector_store import LocalVectorStore
13
12
 
14
13
 
15
- @VECTOR_STORE_REGISTRY.register("chroma")
16
- class ChromaVectorStore(BaseVectorStore):
14
+ @C.register_vector_store("chroma")
15
+ class ChromaVectorStore(LocalVectorStore):
17
16
  store_dir: str = Field(default="./chroma_vector_store")
18
17
  collections: dict = Field(default_factory=dict)
19
- _client: chromadb.Client = PrivateAttr()
18
+ _client: chromadb.ClientAPI = PrivateAttr()
20
19
 
21
20
  @model_validator(mode="after")
22
21
  def init_client(self):
@@ -97,8 +96,10 @@ class ChromaVectorStore(BaseVectorStore):
97
96
 
98
97
 
99
98
  def main():
100
- from dotenv import load_dotenv
101
- load_dotenv()
99
+ from flowllm.utils.common_utils import load_env
100
+ from flowllm.embedding_model import OpenAICompatibleEmbeddingModel
101
+
102
+ load_env()
102
103
 
103
104
  embedding_model = OpenAICompatibleEmbeddingModel(dimensions=64, model_name="text-embedding-v4")
104
105
  workspace_id = "chroma_test_index"
@@ -185,4 +186,4 @@ def main():
185
186
 
186
187
  if __name__ == "__main__":
187
188
  main()
188
- # launch with: python -m llmflow.storage.chroma_vector_store
189
+ # launch with: python -m flowllm.storage.chroma_vector_store
@@ -6,15 +6,14 @@ from elasticsearch.helpers import bulk
6
6
  from loguru import logger
7
7
  from pydantic import Field, PrivateAttr, model_validator
8
8
 
9
- from llmflow.embedding_model.openai_compatible_embedding_model import OpenAICompatibleEmbeddingModel
10
- from llmflow.schema.vector_node import VectorNode
11
- from llmflow.vector_store import VECTOR_STORE_REGISTRY
12
- from llmflow.vector_store.base_vector_store import BaseVectorStore
9
+ from flowllm.context.service_context import C
10
+ from flowllm.schema.vector_node import VectorNode
11
+ from flowllm.storage.vector_store.local_vector_store import LocalVectorStore
13
12
 
14
13
 
15
- @VECTOR_STORE_REGISTRY.register("elasticsearch")
16
- class EsVectorStore(BaseVectorStore):
17
- hosts: str | List[str] = Field(default_factory=lambda: os.getenv("ES_HOSTS", "http://localhost:9200"))
14
+ @C.register_vector_store("elasticsearch")
15
+ class EsVectorStore(LocalVectorStore):
16
+ hosts: str | List[str] = Field(default_factory=lambda: os.getenv("FLOW_ES_HOSTS", "http://localhost:9200"))
18
17
  basic_auth: str | Tuple[str, str] | None = Field(default=None)
19
18
  retrieve_filters: List[dict] = []
20
19
  _client: Elasticsearch = PrivateAttr()
@@ -24,6 +23,7 @@ class EsVectorStore(BaseVectorStore):
24
23
  if isinstance(self.hosts, str):
25
24
  self.hosts = [self.hosts]
26
25
  self._client = Elasticsearch(hosts=self.hosts, basic_auth=self.basic_auth)
26
+ logger.info(f"Elasticsearch client initialized with hosts: {self.hosts}")
27
27
  return self
28
28
 
29
29
  def exist_workspace(self, workspace_id: str, **kwargs) -> bool:
@@ -160,10 +160,11 @@ class EsVectorStore(BaseVectorStore):
160
160
  if refresh:
161
161
  self.refresh(workspace_id=workspace_id)
162
162
 
163
-
164
163
  def main():
165
- from dotenv import load_dotenv
166
- load_dotenv()
164
+ from flowllm.utils.common_utils import load_env
165
+ from flowllm.embedding_model import OpenAICompatibleEmbeddingModel
166
+
167
+ load_env()
167
168
 
168
169
  embedding_model = OpenAICompatibleEmbeddingModel(dimensions=64, model_name="text-embedding-v4")
169
170
  workspace_id = "rag_nodes_index"
@@ -224,4 +225,3 @@ def main():
224
225
 
225
226
  if __name__ == "__main__":
226
227
  main()
227
- # launch with: python -m llmflow.storage.es_vector_store