flowllm 0.1.3__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowllm/__init__.py +4 -3
- flowllm/app.py +2 -1
- flowllm/config/base.yaml +75 -0
- flowllm/config/fin_supply.yaml +39 -0
- flowllm/config/pydantic_config_parser.py +16 -1
- flowllm/context/__init__.py +2 -0
- flowllm/context/base_context.py +10 -20
- flowllm/context/flow_context.py +45 -2
- flowllm/context/service_context.py +69 -10
- flowllm/embedding_model/openai_compatible_embedding_model.py +1 -2
- flowllm/enumeration/chunk_enum.py +1 -0
- flowllm/flow/__init__.py +9 -0
- flowllm/flow/base_flow.py +44 -13
- flowllm/flow/expression/__init__.py +1 -0
- flowllm/flow/{parser → expression}/expression_parser.py +5 -2
- flowllm/flow/expression/expression_tool_flow.py +25 -0
- flowllm/flow/gallery/__init__.py +1 -8
- flowllm/flow/gallery/mock_tool_flow.py +46 -28
- flowllm/flow/tool_op_flow.py +97 -0
- flowllm/llm/base_llm.py +0 -2
- flowllm/op/__init__.py +3 -4
- flowllm/op/akshare/get_ak_a_code_op.py +1 -1
- flowllm/op/akshare/get_ak_a_info_op.py +1 -1
- flowllm/op/base_op.py +232 -16
- flowllm/op/base_tool_op.py +47 -0
- flowllm/op/gallery/__init__.py +0 -1
- flowllm/op/gallery/mock_op.py +13 -7
- flowllm/op/llm/__init__.py +3 -0
- flowllm/op/{agent/react_v2_op.py → llm/react_llm_op.py} +43 -24
- flowllm/op/llm/simple_llm_op.py +48 -0
- flowllm/op/llm/stream_llm_op.py +61 -0
- flowllm/op/mcp/__init__.py +2 -0
- flowllm/op/mcp/ant_op.py +42 -0
- flowllm/op/mcp/base_sse_mcp_op.py +28 -0
- flowllm/op/parallel_op.py +5 -1
- flowllm/op/search/__init__.py +1 -2
- flowllm/op/search/dashscope_search_op.py +73 -128
- flowllm/op/search/tavily_search_op.py +64 -82
- flowllm/op/sequential_op.py +4 -0
- flowllm/schema/flow_stream_chunk.py +11 -0
- flowllm/schema/service_config.py +8 -3
- flowllm/schema/tool_call.py +46 -1
- flowllm/service/__init__.py +0 -1
- flowllm/service/base_service.py +31 -14
- flowllm/service/http_service.py +45 -36
- flowllm/service/mcp_service.py +17 -23
- flowllm/storage/vector_store/__init__.py +1 -0
- flowllm/storage/vector_store/base_vector_store.py +99 -15
- flowllm/storage/vector_store/chroma_vector_store.py +250 -8
- flowllm/storage/vector_store/es_vector_store.py +288 -32
- flowllm/storage/vector_store/local_vector_store.py +206 -9
- flowllm/storage/vector_store/memory_vector_store.py +509 -0
- flowllm/utils/common_utils.py +55 -1
- flowllm/utils/miner_u_pdf_processor.py +726 -0
- {flowllm-0.1.3.dist-info → flowllm-0.1.6.dist-info}/METADATA +7 -6
- flowllm-0.1.6.dist-info/RECORD +98 -0
- flowllm/config/default.yaml +0 -77
- flowllm/config/empty.yaml +0 -37
- flowllm/flow/gallery/cmd_flow.py +0 -11
- flowllm/flow/gallery/code_tool_flow.py +0 -30
- flowllm/flow/gallery/dashscope_search_tool_flow.py +0 -34
- flowllm/flow/gallery/deepsearch_tool_flow.py +0 -39
- flowllm/flow/gallery/expression_tool_flow.py +0 -18
- flowllm/flow/gallery/tavily_search_tool_flow.py +0 -30
- flowllm/flow/gallery/terminate_tool_flow.py +0 -30
- flowllm/flow/parser/__init__.py +0 -0
- flowllm/op/agent/__init__.py +0 -1
- flowllm/op/agent/react_v1_op.py +0 -109
- flowllm/op/agent/react_v1_prompt.yaml +0 -54
- flowllm/op/base_ray_op.py +0 -313
- flowllm/op/code/__init__.py +0 -1
- flowllm/op/code/execute_code_op.py +0 -42
- flowllm/op/gallery/terminate_op.py +0 -29
- flowllm/op/search/dashscope_deep_research_op.py +0 -267
- flowllm/service/cmd_service.py +0 -15
- flowllm-0.1.3.dist-info/RECORD +0 -102
- /flowllm/op/{agent/react_v2_prompt.yaml → llm/react_llm_prompt.yaml} +0 -0
- {flowllm-0.1.3.dist-info → flowllm-0.1.6.dist-info}/WHEEL +0 -0
- {flowllm-0.1.3.dist-info → flowllm-0.1.6.dist-info}/entry_points.txt +0 -0
- {flowllm-0.1.3.dist-info → flowllm-0.1.6.dist-info}/licenses/LICENSE +0 -0
- {flowllm-0.1.3.dist-info → flowllm-0.1.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,509 @@
|
|
1
|
+
import asyncio
|
2
|
+
from functools import partial
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import List, Dict, Optional, Any
|
5
|
+
|
6
|
+
from loguru import logger
|
7
|
+
from pydantic import Field
|
8
|
+
|
9
|
+
from flowllm.context.service_context import C
|
10
|
+
from flowllm.schema.vector_node import VectorNode
|
11
|
+
from flowllm.storage.vector_store.local_vector_store import LocalVectorStore
|
12
|
+
|
13
|
+
|
14
|
+
@C.register_vector_store("memory")
|
15
|
+
class MemoryVectorStore(LocalVectorStore):
|
16
|
+
"""
|
17
|
+
In-memory vector store that keeps all data in memory for fast access.
|
18
|
+
Only saves to disk when dump_workspace is called.
|
19
|
+
Can load previously saved data via load_workspace.
|
20
|
+
"""
|
21
|
+
store_dir: str = Field(default="./memory_vector_store")
|
22
|
+
|
23
|
+
def __init__(self, **data):
|
24
|
+
super().__init__(**data)
|
25
|
+
self._memory_store: Dict[str, Dict[str, VectorNode]] = {}
|
26
|
+
|
27
|
+
def exist_workspace(self, workspace_id: str, **kwargs) -> bool:
|
28
|
+
return workspace_id in self._memory_store
|
29
|
+
|
30
|
+
def delete_workspace(self, workspace_id: str, **kwargs):
|
31
|
+
if workspace_id in self._memory_store:
|
32
|
+
del self._memory_store[workspace_id]
|
33
|
+
logger.info(f"Deleted workspace_id={workspace_id} from memory")
|
34
|
+
|
35
|
+
def create_workspace(self, workspace_id: str, **kwargs):
|
36
|
+
if workspace_id not in self._memory_store:
|
37
|
+
self._memory_store[workspace_id] = {}
|
38
|
+
logger.info(f"Created workspace_id={workspace_id} in memory")
|
39
|
+
|
40
|
+
def iter_workspace_nodes(self, workspace_id: str, callback_fn=None, **kwargs):
|
41
|
+
if workspace_id in self._memory_store:
|
42
|
+
for node in self._memory_store[workspace_id].values():
|
43
|
+
if callback_fn:
|
44
|
+
yield callback_fn(node)
|
45
|
+
else:
|
46
|
+
yield node
|
47
|
+
|
48
|
+
def dump_workspace(self, workspace_id: str, path: str | Path = "", callback_fn=None, **kwargs):
|
49
|
+
if workspace_id not in self._memory_store:
|
50
|
+
logger.warning(f"workspace_id={workspace_id} not found in memory!")
|
51
|
+
return {}
|
52
|
+
|
53
|
+
dump_path = Path(path) if path else self.store_path
|
54
|
+
nodes = list(self._memory_store[workspace_id].values())
|
55
|
+
|
56
|
+
return self._dump_to_path(nodes=nodes,
|
57
|
+
workspace_id=workspace_id,
|
58
|
+
path=dump_path,
|
59
|
+
callback_fn=callback_fn,
|
60
|
+
**kwargs)
|
61
|
+
|
62
|
+
def load_workspace(self, workspace_id: str, path: str | Path = "", nodes: Optional[List[VectorNode]] = None,
|
63
|
+
callback_fn=None, **kwargs):
|
64
|
+
if workspace_id in self._memory_store:
|
65
|
+
del self._memory_store[workspace_id]
|
66
|
+
logger.info(f"Cleared existing workspace_id={workspace_id} from memory")
|
67
|
+
|
68
|
+
self.create_workspace(workspace_id=workspace_id, **kwargs)
|
69
|
+
|
70
|
+
all_nodes: List[VectorNode] = []
|
71
|
+
|
72
|
+
if nodes:
|
73
|
+
all_nodes.extend(nodes)
|
74
|
+
|
75
|
+
if path:
|
76
|
+
for node in self._load_from_path(path=path, workspace_id=workspace_id, callback_fn=callback_fn, **kwargs):
|
77
|
+
all_nodes.append(node)
|
78
|
+
|
79
|
+
if all_nodes:
|
80
|
+
self.insert(nodes=all_nodes, workspace_id=workspace_id, **kwargs)
|
81
|
+
|
82
|
+
logger.info(f"Loaded workspace_id={workspace_id} with {len(all_nodes)} nodes into memory")
|
83
|
+
return {"size": len(all_nodes)}
|
84
|
+
|
85
|
+
def copy_workspace(self, src_workspace_id: str, dest_workspace_id: str, **kwargs):
|
86
|
+
if src_workspace_id not in self._memory_store:
|
87
|
+
logger.warning(f"src_workspace_id={src_workspace_id} not found in memory!")
|
88
|
+
return {}
|
89
|
+
|
90
|
+
if dest_workspace_id not in self._memory_store:
|
91
|
+
self.create_workspace(workspace_id=dest_workspace_id, **kwargs)
|
92
|
+
|
93
|
+
src_nodes = list(self._memory_store[src_workspace_id].values())
|
94
|
+
node_size = len(src_nodes)
|
95
|
+
|
96
|
+
for i in range(0, node_size, self.batch_size):
|
97
|
+
batch_nodes = src_nodes[i:i + self.batch_size]
|
98
|
+
new_nodes = []
|
99
|
+
for node in batch_nodes:
|
100
|
+
new_node = VectorNode(**node.model_dump())
|
101
|
+
new_node.workspace_id = dest_workspace_id
|
102
|
+
new_nodes.append(new_node)
|
103
|
+
|
104
|
+
self.insert(nodes=new_nodes, workspace_id=dest_workspace_id, **kwargs)
|
105
|
+
|
106
|
+
logger.info(f"Copied {node_size} nodes from {src_workspace_id} to {dest_workspace_id}")
|
107
|
+
return {"size": node_size}
|
108
|
+
|
109
|
+
def search(self, query: str, workspace_id: str, top_k: int = 1, filter_dict: Optional[Dict[str, Any]] = None,
|
110
|
+
**kwargs) -> List[VectorNode]:
|
111
|
+
if workspace_id not in self._memory_store:
|
112
|
+
logger.warning(f"workspace_id={workspace_id} not found in memory!")
|
113
|
+
return []
|
114
|
+
|
115
|
+
query_vector = self.embedding_model.get_embeddings(query)
|
116
|
+
nodes: List[VectorNode] = []
|
117
|
+
|
118
|
+
for node in self._memory_store[workspace_id].values():
|
119
|
+
if node.vector and self._matches_filters(node, filter_dict):
|
120
|
+
score = self.calculate_similarity(query_vector, node.vector)
|
121
|
+
result_node = VectorNode(**node.model_dump())
|
122
|
+
result_node.metadata["score"] = score
|
123
|
+
nodes.append(result_node)
|
124
|
+
|
125
|
+
nodes = sorted(nodes, key=lambda x: x.metadata["score"], reverse=True)
|
126
|
+
return nodes[:top_k]
|
127
|
+
|
128
|
+
def insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, **kwargs):
|
129
|
+
if isinstance(nodes, VectorNode):
|
130
|
+
nodes = [nodes]
|
131
|
+
|
132
|
+
if workspace_id not in self._memory_store:
|
133
|
+
self.create_workspace(workspace_id=workspace_id, **kwargs)
|
134
|
+
|
135
|
+
nodes: List[VectorNode] = self.embedding_model.get_node_embeddings(nodes)
|
136
|
+
|
137
|
+
update_cnt = 0
|
138
|
+
for node in nodes:
|
139
|
+
if node.unique_id in self._memory_store[workspace_id]:
|
140
|
+
update_cnt += 1
|
141
|
+
|
142
|
+
node.workspace_id = workspace_id
|
143
|
+
self._memory_store[workspace_id][node.unique_id] = node
|
144
|
+
|
145
|
+
total_nodes = len(self._memory_store[workspace_id])
|
146
|
+
logger.info(f"Inserted into workspace_id={workspace_id} nodes.size={len(nodes)} "
|
147
|
+
f"total.size={total_nodes} update_cnt={update_cnt}")
|
148
|
+
|
149
|
+
def delete(self, node_ids: str | List[str], workspace_id: str, **kwargs):
|
150
|
+
if workspace_id not in self._memory_store:
|
151
|
+
logger.warning(f"workspace_id={workspace_id} not found in memory!")
|
152
|
+
return
|
153
|
+
|
154
|
+
if isinstance(node_ids, str):
|
155
|
+
node_ids = [node_ids]
|
156
|
+
|
157
|
+
before_size = len(self._memory_store[workspace_id])
|
158
|
+
deleted_cnt = 0
|
159
|
+
|
160
|
+
for node_id in node_ids:
|
161
|
+
if node_id in self._memory_store[workspace_id]:
|
162
|
+
del self._memory_store[workspace_id][node_id]
|
163
|
+
deleted_cnt += 1
|
164
|
+
|
165
|
+
after_size = len(self._memory_store[workspace_id])
|
166
|
+
logger.info(f"Deleted from workspace_id={workspace_id} before_size={before_size} "
|
167
|
+
f"after_size={after_size} deleted_cnt={deleted_cnt}")
|
168
|
+
|
169
|
+
async def async_search(self, query: str, workspace_id: str, top_k: int = 1,
|
170
|
+
filter_dict: Optional[Dict[str, Any]] = None, **kwargs) -> List[VectorNode]:
|
171
|
+
"""Async version of search using embedding model async capabilities"""
|
172
|
+
if workspace_id not in self._memory_store:
|
173
|
+
logger.warning(f"workspace_id={workspace_id} not found in memory!")
|
174
|
+
return []
|
175
|
+
|
176
|
+
query_vector = await self.embedding_model.get_embeddings_async(query)
|
177
|
+
nodes: List[VectorNode] = []
|
178
|
+
|
179
|
+
for node in self._memory_store[workspace_id].values():
|
180
|
+
# Apply filters and only consider nodes with vectors
|
181
|
+
if node.vector and self._matches_filters(node, filter_dict):
|
182
|
+
score = self.calculate_similarity(query_vector, node.vector)
|
183
|
+
# Create a copy to avoid modifying original
|
184
|
+
result_node = VectorNode(**node.model_dump())
|
185
|
+
result_node.metadata["score"] = score
|
186
|
+
nodes.append(result_node)
|
187
|
+
|
188
|
+
nodes = sorted(nodes, key=lambda x: x.metadata["score"], reverse=True)
|
189
|
+
return nodes[:top_k]
|
190
|
+
|
191
|
+
async def async_insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, **kwargs):
|
192
|
+
"""Async version of insert using embedding model async capabilities"""
|
193
|
+
if isinstance(nodes, VectorNode):
|
194
|
+
nodes = [nodes]
|
195
|
+
|
196
|
+
# Ensure workspace exists
|
197
|
+
if workspace_id not in self._memory_store:
|
198
|
+
self.create_workspace(workspace_id=workspace_id, **kwargs)
|
199
|
+
|
200
|
+
# Use async embedding
|
201
|
+
nodes = await self.embedding_model.get_node_embeddings_async(nodes)
|
202
|
+
|
203
|
+
update_cnt = 0
|
204
|
+
for node in nodes:
|
205
|
+
if node.unique_id in self._memory_store[workspace_id]:
|
206
|
+
update_cnt += 1
|
207
|
+
|
208
|
+
node.workspace_id = workspace_id
|
209
|
+
self._memory_store[workspace_id][node.unique_id] = node
|
210
|
+
|
211
|
+
total_nodes = len(self._memory_store[workspace_id])
|
212
|
+
logger.info(f"Async inserted into workspace_id={workspace_id} nodes.size={len(nodes)} "
|
213
|
+
f"total.size={total_nodes} update_cnt={update_cnt}")
|
214
|
+
|
215
|
+
async def async_dump_workspace(self, workspace_id: str, path: str | Path = "", callback_fn=None, **kwargs):
|
216
|
+
"""Async version of dump_workspace"""
|
217
|
+
loop = asyncio.get_event_loop()
|
218
|
+
return await loop.run_in_executor(
|
219
|
+
C.thread_pool,
|
220
|
+
partial(self.dump_workspace, workspace_id, path, callback_fn, **kwargs)
|
221
|
+
)
|
222
|
+
|
223
|
+
async def async_load_workspace(self, workspace_id: str, path: str | Path = "", nodes: List[VectorNode] = None,
|
224
|
+
callback_fn=None, **kwargs):
|
225
|
+
"""Async version of load_workspace"""
|
226
|
+
loop = asyncio.get_event_loop()
|
227
|
+
return await loop.run_in_executor(
|
228
|
+
C.thread_pool,
|
229
|
+
partial(self.load_workspace, workspace_id, path, nodes, callback_fn, **kwargs)
|
230
|
+
)
|
231
|
+
|
232
|
+
async def async_exist_workspace(self, workspace_id: str, **kwargs) -> bool:
|
233
|
+
"""Async version of exist_workspace"""
|
234
|
+
return self.exist_workspace(workspace_id, **kwargs)
|
235
|
+
|
236
|
+
async def async_delete_workspace(self, workspace_id: str, **kwargs):
|
237
|
+
"""Async version of delete_workspace"""
|
238
|
+
return self.delete_workspace(workspace_id, **kwargs)
|
239
|
+
|
240
|
+
async def async_create_workspace(self, workspace_id: str, **kwargs):
|
241
|
+
"""Async version of create_workspace"""
|
242
|
+
return self.create_workspace(workspace_id, **kwargs)
|
243
|
+
|
244
|
+
async def async_delete(self, node_ids: str | List[str], workspace_id: str, **kwargs):
|
245
|
+
"""Async version of delete"""
|
246
|
+
return self.delete(node_ids, workspace_id, **kwargs)
|
247
|
+
|
248
|
+
async def async_copy_workspace(self, src_workspace_id: str, dest_workspace_id: str, **kwargs):
|
249
|
+
"""Async version of copy_workspace"""
|
250
|
+
return self.copy_workspace(src_workspace_id, dest_workspace_id, **kwargs)
|
251
|
+
|
252
|
+
|
253
|
+
def main():
|
254
|
+
"""Test the MemoryVectorStore with synchronous operations"""
|
255
|
+
from flowllm.utils.common_utils import load_env
|
256
|
+
from flowllm.embedding_model import OpenAICompatibleEmbeddingModel
|
257
|
+
|
258
|
+
load_env()
|
259
|
+
|
260
|
+
embedding_model = OpenAICompatibleEmbeddingModel(dimensions=64, model_name="text-embedding-v4")
|
261
|
+
workspace_id = "memory_test_workspace"
|
262
|
+
client = MemoryVectorStore(embedding_model=embedding_model)
|
263
|
+
|
264
|
+
# Clean up and create workspace
|
265
|
+
if client.exist_workspace(workspace_id):
|
266
|
+
client.delete_workspace(workspace_id)
|
267
|
+
client.create_workspace(workspace_id)
|
268
|
+
|
269
|
+
sample_nodes = [
|
270
|
+
VectorNode(
|
271
|
+
unique_id="memory_node1",
|
272
|
+
workspace_id=workspace_id,
|
273
|
+
content="Artificial intelligence is a technology that simulates human intelligence.",
|
274
|
+
metadata={
|
275
|
+
"node_type": "tech",
|
276
|
+
"category": "AI"
|
277
|
+
}
|
278
|
+
),
|
279
|
+
VectorNode(
|
280
|
+
unique_id="memory_node2",
|
281
|
+
workspace_id=workspace_id,
|
282
|
+
content="Machine learning is a subset of artificial intelligence.",
|
283
|
+
metadata={
|
284
|
+
"node_type": "tech",
|
285
|
+
"category": "ML"
|
286
|
+
}
|
287
|
+
),
|
288
|
+
VectorNode(
|
289
|
+
unique_id="memory_node3",
|
290
|
+
workspace_id=workspace_id,
|
291
|
+
content="I love eating delicious seafood, especially fresh fish.",
|
292
|
+
metadata={
|
293
|
+
"node_type": "food",
|
294
|
+
"category": "preference"
|
295
|
+
}
|
296
|
+
),
|
297
|
+
VectorNode(
|
298
|
+
unique_id="memory_node4",
|
299
|
+
workspace_id=workspace_id,
|
300
|
+
content="Deep learning uses neural networks with multiple layers.",
|
301
|
+
metadata={
|
302
|
+
"node_type": "tech",
|
303
|
+
"category": "DL"
|
304
|
+
}
|
305
|
+
),
|
306
|
+
]
|
307
|
+
|
308
|
+
# Test insert
|
309
|
+
logger.info("Testing insert...")
|
310
|
+
client.insert(sample_nodes, workspace_id)
|
311
|
+
|
312
|
+
# Test search
|
313
|
+
logger.info("=" * 20 + " SEARCH TEST " + "=" * 20)
|
314
|
+
results = client.search("What is artificial intelligence?", workspace_id=workspace_id, top_k=3)
|
315
|
+
for i, r in enumerate(results, 1):
|
316
|
+
logger.info(f"Result {i}: {r.model_dump(exclude={'vector'})}")
|
317
|
+
|
318
|
+
# Test filter_dict
|
319
|
+
logger.info("=" * 20 + " FILTER TEST " + "=" * 20)
|
320
|
+
filter_dict = {"node_type": "tech"}
|
321
|
+
results = client.search("What is artificial intelligence?", workspace_id=workspace_id, top_k=5,
|
322
|
+
filter_dict=filter_dict)
|
323
|
+
logger.info(f"Filtered results (node_type=tech): {len(results)} results")
|
324
|
+
for i, r in enumerate(results, 1):
|
325
|
+
logger.info(f"Filtered Result {i}: {r.model_dump(exclude={'vector'})}")
|
326
|
+
|
327
|
+
# Test update (insert existing node with same unique_id)
|
328
|
+
logger.info("=" * 20 + " UPDATE TEST " + "=" * 20)
|
329
|
+
updated_node = VectorNode(
|
330
|
+
unique_id="memory_node2", # Same ID as existing node
|
331
|
+
workspace_id=workspace_id,
|
332
|
+
content="Machine learning is a powerful subset of AI that learns from data.",
|
333
|
+
metadata={
|
334
|
+
"node_type": "tech",
|
335
|
+
"category": "ML",
|
336
|
+
"updated": True
|
337
|
+
}
|
338
|
+
)
|
339
|
+
client.insert(updated_node, workspace_id)
|
340
|
+
|
341
|
+
# Search again to see updated content
|
342
|
+
results = client.search("machine learning", workspace_id=workspace_id, top_k=2)
|
343
|
+
for i, r in enumerate(results, 1):
|
344
|
+
logger.info(f"Updated Result {i}: {r.model_dump(exclude={'vector'})}")
|
345
|
+
|
346
|
+
# Test delete
|
347
|
+
logger.info("=" * 20 + " DELETE TEST " + "=" * 20)
|
348
|
+
client.delete("memory_node3", workspace_id=workspace_id)
|
349
|
+
|
350
|
+
# Search for food-related content (should return fewer results)
|
351
|
+
results = client.search("food fish", workspace_id=workspace_id, top_k=5)
|
352
|
+
logger.info(f"After deletion, found {len(results)} food-related results")
|
353
|
+
|
354
|
+
# Test dump to disk
|
355
|
+
logger.info("=" * 20 + " DUMP TEST " + "=" * 20)
|
356
|
+
dump_result = client.dump_workspace(workspace_id)
|
357
|
+
logger.info(f"Dumped {dump_result['size']} nodes to disk")
|
358
|
+
|
359
|
+
# Test copy workspace
|
360
|
+
logger.info("=" * 20 + " COPY TEST " + "=" * 20)
|
361
|
+
copy_workspace_id = "memory_copy_workspace"
|
362
|
+
copy_result = client.copy_workspace(workspace_id, copy_workspace_id)
|
363
|
+
logger.info(f"Copied {copy_result['size']} nodes to new workspace")
|
364
|
+
|
365
|
+
# Search in copied workspace
|
366
|
+
results = client.search("AI technology", workspace_id=copy_workspace_id, top_k=2)
|
367
|
+
for i, r in enumerate(results, 1):
|
368
|
+
logger.info(f"Copy Result {i}: {r.model_dump(exclude={'vector'})}")
|
369
|
+
|
370
|
+
# Clean up
|
371
|
+
client.delete_workspace(workspace_id)
|
372
|
+
client.delete_workspace(copy_workspace_id)
|
373
|
+
logger.info("Cleanup completed")
|
374
|
+
|
375
|
+
|
376
|
+
async def async_main():
|
377
|
+
"""Test the MemoryVectorStore with asynchronous operations"""
|
378
|
+
from flowllm.utils.common_utils import load_env
|
379
|
+
from flowllm.embedding_model import OpenAICompatibleEmbeddingModel
|
380
|
+
|
381
|
+
load_env()
|
382
|
+
|
383
|
+
embedding_model = OpenAICompatibleEmbeddingModel(dimensions=64, model_name="text-embedding-v4")
|
384
|
+
workspace_id = "async_memory_test_workspace"
|
385
|
+
client = MemoryVectorStore(embedding_model=embedding_model, store_dir="./async_memory_vector_store")
|
386
|
+
|
387
|
+
# Clean up and create workspace
|
388
|
+
if await client.async_exist_workspace(workspace_id):
|
389
|
+
await client.async_delete_workspace(workspace_id)
|
390
|
+
await client.async_create_workspace(workspace_id)
|
391
|
+
|
392
|
+
sample_nodes = [
|
393
|
+
VectorNode(
|
394
|
+
unique_id="async_memory_node1",
|
395
|
+
workspace_id=workspace_id,
|
396
|
+
content="Quantum computing represents the future of computational power.",
|
397
|
+
metadata={
|
398
|
+
"node_type": "tech",
|
399
|
+
"category": "quantum"
|
400
|
+
}
|
401
|
+
),
|
402
|
+
VectorNode(
|
403
|
+
unique_id="async_memory_node2",
|
404
|
+
workspace_id=workspace_id,
|
405
|
+
content="Blockchain technology enables decentralized applications.",
|
406
|
+
metadata={
|
407
|
+
"node_type": "tech",
|
408
|
+
"category": "blockchain"
|
409
|
+
}
|
410
|
+
),
|
411
|
+
VectorNode(
|
412
|
+
unique_id="async_memory_node3",
|
413
|
+
workspace_id=workspace_id,
|
414
|
+
content="Cloud computing provides scalable infrastructure solutions.",
|
415
|
+
metadata={
|
416
|
+
"node_type": "tech",
|
417
|
+
"category": "cloud"
|
418
|
+
}
|
419
|
+
),
|
420
|
+
VectorNode(
|
421
|
+
unique_id="async_memory_node4",
|
422
|
+
workspace_id=workspace_id,
|
423
|
+
content="Pizza is my favorite Italian food with cheese and tomatoes.",
|
424
|
+
metadata={
|
425
|
+
"node_type": "food",
|
426
|
+
"category": "italian"
|
427
|
+
}
|
428
|
+
),
|
429
|
+
]
|
430
|
+
|
431
|
+
# Test async insert
|
432
|
+
logger.info("ASYNC TEST - Testing insert...")
|
433
|
+
await client.async_insert(sample_nodes, workspace_id)
|
434
|
+
|
435
|
+
# Test async search
|
436
|
+
logger.info("ASYNC TEST - " + "=" * 20 + " SEARCH TEST " + "=" * 20)
|
437
|
+
results = await client.async_search("What is quantum computing?", workspace_id=workspace_id, top_k=3)
|
438
|
+
for i, r in enumerate(results, 1):
|
439
|
+
logger.info(f"Async Result {i}: {r.model_dump(exclude={'vector'})}")
|
440
|
+
|
441
|
+
# Test async update
|
442
|
+
logger.info("ASYNC TEST - " + "=" * 20 + " UPDATE TEST " + "=" * 20)
|
443
|
+
updated_node = VectorNode(
|
444
|
+
unique_id="async_memory_node2", # Same ID as existing node
|
445
|
+
workspace_id=workspace_id,
|
446
|
+
content="Blockchain is a revolutionary distributed ledger technology for secure transactions.",
|
447
|
+
metadata={
|
448
|
+
"node_type": "tech",
|
449
|
+
"category": "blockchain",
|
450
|
+
"updated": True,
|
451
|
+
"version": "2.0"
|
452
|
+
}
|
453
|
+
)
|
454
|
+
await client.async_insert(updated_node, workspace_id)
|
455
|
+
|
456
|
+
# Search again to see updated content
|
457
|
+
results = await client.async_search("blockchain distributed", workspace_id=workspace_id, top_k=2)
|
458
|
+
for i, r in enumerate(results, 1):
|
459
|
+
logger.info(f"Async Updated Result {i}: {r.model_dump(exclude={'vector'})}")
|
460
|
+
|
461
|
+
# Test async delete
|
462
|
+
logger.info("ASYNC TEST - " + "=" * 20 + " DELETE TEST " + "=" * 20)
|
463
|
+
await client.async_delete("async_memory_node4", workspace_id=workspace_id)
|
464
|
+
|
465
|
+
# Search for food-related content (should return no results)
|
466
|
+
results = await client.async_search("pizza food", workspace_id=workspace_id, top_k=5)
|
467
|
+
logger.info(f"After async deletion, found {len(results)} food-related results")
|
468
|
+
|
469
|
+
# Test async dump to disk
|
470
|
+
logger.info("ASYNC TEST - " + "=" * 20 + " DUMP TEST " + "=" * 20)
|
471
|
+
dump_result = await client.async_dump_workspace(workspace_id)
|
472
|
+
logger.info(f"Async dumped {dump_result['size']} nodes to disk")
|
473
|
+
|
474
|
+
# Test load from disk (first delete from memory, then load)
|
475
|
+
logger.info("ASYNC TEST - " + "=" * 20 + " LOAD TEST " + "=" * 20)
|
476
|
+
await client.async_delete_workspace(workspace_id) # Clear from memory
|
477
|
+
load_result = await client.async_load_workspace(workspace_id, path=client.store_path)
|
478
|
+
logger.info(f"Async loaded {load_result['size']} nodes from disk")
|
479
|
+
|
480
|
+
# Verify loaded data
|
481
|
+
results = await client.async_search("quantum technology", workspace_id=workspace_id, top_k=3)
|
482
|
+
for i, r in enumerate(results, 1):
|
483
|
+
logger.info(f"Loaded Result {i}: {r.model_dump(exclude={'vector'})}")
|
484
|
+
|
485
|
+
# Test async copy workspace
|
486
|
+
logger.info("ASYNC TEST - " + "=" * 20 + " COPY TEST " + "=" * 20)
|
487
|
+
copy_workspace_id = "async_memory_copy_workspace"
|
488
|
+
copy_result = await client.async_copy_workspace(workspace_id, copy_workspace_id)
|
489
|
+
logger.info(f"Async copied {copy_result['size']} nodes to new workspace")
|
490
|
+
|
491
|
+
# Search in copied workspace
|
492
|
+
results = await client.async_search("computing technology", workspace_id=copy_workspace_id, top_k=2)
|
493
|
+
for i, r in enumerate(results, 1):
|
494
|
+
logger.info(f"Async Copy Result {i}: {r.model_dump(exclude={'vector'})}")
|
495
|
+
|
496
|
+
# Final cleanup
|
497
|
+
await client.async_delete_workspace(workspace_id)
|
498
|
+
await client.async_delete_workspace(copy_workspace_id)
|
499
|
+
logger.info("Async cleanup completed")
|
500
|
+
|
501
|
+
|
502
|
+
if __name__ == "__main__":
|
503
|
+
# Run sync test
|
504
|
+
logger.info("=" * 50 + " SYNC TESTS " + "=" * 50)
|
505
|
+
main()
|
506
|
+
|
507
|
+
# Run async test
|
508
|
+
logger.info("\n" + "=" * 50 + " ASYNC TESTS " + "=" * 50)
|
509
|
+
asyncio.run(async_main())
|
flowllm/utils/common_utils.py
CHANGED
@@ -3,12 +3,20 @@ import re
|
|
3
3
|
from pathlib import Path
|
4
4
|
|
5
5
|
from loguru import logger
|
6
|
+
from pyfiglet import Figlet
|
7
|
+
from rich.console import Console, Group
|
8
|
+
from rich.panel import Panel
|
9
|
+
from rich.table import Table
|
10
|
+
from rich.text import Text
|
6
11
|
|
7
12
|
|
8
13
|
def camel_to_snake(content: str) -> str:
|
9
14
|
"""
|
10
15
|
BaseWorker -> base_worker
|
11
16
|
"""
|
17
|
+
# FIXME
|
18
|
+
content = content.replace("LLM", "Llm")
|
19
|
+
|
12
20
|
snake_str = re.sub(r'(?<!^)(?=[A-Z])', '_', content).lower()
|
13
21
|
return snake_str
|
14
22
|
|
@@ -18,6 +26,9 @@ def snake_to_camel(content: str) -> str:
|
|
18
26
|
base_worker -> BaseWorker
|
19
27
|
"""
|
20
28
|
camel_str = "".join(x.capitalize() for x in content.split("_"))
|
29
|
+
|
30
|
+
# FIXME
|
31
|
+
camel_str = camel_str.replace("Llm", "LLM")
|
21
32
|
return camel_str
|
22
33
|
|
23
34
|
|
@@ -49,4 +60,47 @@ def load_env(path: str | Path = None):
|
|
49
60
|
_load_env(path)
|
50
61
|
return
|
51
62
|
|
52
|
-
|
63
|
+
logger.warning(".env not found")
|
64
|
+
|
65
|
+
|
66
|
+
def print_banner(name: str, service_config, width: int = 200):
|
67
|
+
from flowllm.schema.service_config import ServiceConfig
|
68
|
+
assert isinstance(service_config, ServiceConfig)
|
69
|
+
|
70
|
+
f = Figlet(font="slant", width=width)
|
71
|
+
logo: str = f.renderText(name)
|
72
|
+
logo_text = Text(logo, style="bold green")
|
73
|
+
|
74
|
+
info_table = Table.grid(padding=(0, 1))
|
75
|
+
info_table.add_column(style="bold", justify="center") # Emoji column
|
76
|
+
info_table.add_column(style="bold cyan", justify="left") # Label column
|
77
|
+
info_table.add_column(style="white", justify="left") # Value column
|
78
|
+
|
79
|
+
info_table.add_row("📦", "Backend:", service_config.backend)
|
80
|
+
|
81
|
+
if service_config.backend == "http":
|
82
|
+
info_table.add_row("🔗", "URL:", f"http://{service_config.http.host}:{service_config.http.port}")
|
83
|
+
elif service_config.backend == "mcp":
|
84
|
+
info_table.add_row("📚", "Transport:", service_config.mcp.transport)
|
85
|
+
if service_config.mcp.transport == "sse":
|
86
|
+
info_table.add_row("🔗", "URL:",
|
87
|
+
f"http://{service_config.mcp.host}:{service_config.mcp.port}/sse")
|
88
|
+
|
89
|
+
info_table.add_row("", "", "")
|
90
|
+
import flowllm
|
91
|
+
info_table.add_row("🚀", "FlowLLM version:", Text(flowllm.__version__, style="dim white", no_wrap=True))
|
92
|
+
import fastmcp
|
93
|
+
info_table.add_row("📚", "FastMCP version:", Text(fastmcp.__version__, style="dim white", no_wrap=True))
|
94
|
+
panel_content = Group(logo_text, "", info_table)
|
95
|
+
|
96
|
+
panel = Panel(
|
97
|
+
panel_content,
|
98
|
+
title=name,
|
99
|
+
title_align="left",
|
100
|
+
border_style="dim",
|
101
|
+
padding=(1, 4),
|
102
|
+
expand=False,
|
103
|
+
)
|
104
|
+
|
105
|
+
console = Console(stderr=True)
|
106
|
+
console.print(Group("\n", panel, "\n"))
|