flowllm 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowllm/__init__.py +4 -3
- flowllm/app.py +1 -1
- flowllm/config/base.yaml +75 -0
- flowllm/config/fin_supply.yaml +39 -0
- flowllm/config/pydantic_config_parser.py +16 -1
- flowllm/context/__init__.py +2 -0
- flowllm/context/base_context.py +10 -20
- flowllm/context/flow_context.py +45 -2
- flowllm/context/service_context.py +69 -10
- flowllm/embedding_model/openai_compatible_embedding_model.py +1 -2
- flowllm/enumeration/chunk_enum.py +1 -0
- flowllm/flow/__init__.py +9 -0
- flowllm/flow/base_flow.py +44 -13
- flowllm/flow/expression/__init__.py +1 -0
- flowllm/flow/{parser → expression}/expression_parser.py +5 -2
- flowllm/flow/expression/expression_tool_flow.py +25 -0
- flowllm/flow/gallery/__init__.py +1 -8
- flowllm/flow/gallery/mock_tool_flow.py +46 -28
- flowllm/flow/tool_op_flow.py +97 -0
- flowllm/llm/base_llm.py +0 -2
- flowllm/op/__init__.py +3 -4
- flowllm/op/akshare/get_ak_a_code_op.py +1 -1
- flowllm/op/akshare/get_ak_a_info_op.py +1 -1
- flowllm/op/base_op.py +232 -16
- flowllm/op/base_tool_op.py +47 -0
- flowllm/op/gallery/__init__.py +0 -1
- flowllm/op/gallery/mock_op.py +13 -7
- flowllm/op/llm/__init__.py +3 -0
- flowllm/op/{agent/react_v2_op.py → llm/react_llm_op.py} +43 -24
- flowllm/op/llm/simple_llm_op.py +48 -0
- flowllm/op/llm/stream_llm_op.py +61 -0
- flowllm/op/mcp/__init__.py +2 -0
- flowllm/op/mcp/ant_op.py +42 -0
- flowllm/op/mcp/base_sse_mcp_op.py +28 -0
- flowllm/op/parallel_op.py +5 -1
- flowllm/op/search/__init__.py +1 -2
- flowllm/op/search/dashscope_search_op.py +73 -128
- flowllm/op/search/tavily_search_op.py +64 -82
- flowllm/op/sequential_op.py +4 -0
- flowllm/schema/flow_stream_chunk.py +11 -0
- flowllm/schema/service_config.py +8 -3
- flowllm/schema/tool_call.py +46 -1
- flowllm/service/__init__.py +0 -1
- flowllm/service/base_service.py +31 -14
- flowllm/service/http_service.py +45 -36
- flowllm/service/mcp_service.py +17 -23
- flowllm/storage/vector_store/__init__.py +1 -0
- flowllm/storage/vector_store/base_vector_store.py +99 -15
- flowllm/storage/vector_store/chroma_vector_store.py +250 -8
- flowllm/storage/vector_store/es_vector_store.py +288 -32
- flowllm/storage/vector_store/local_vector_store.py +206 -9
- flowllm/storage/vector_store/memory_vector_store.py +509 -0
- flowllm/utils/common_utils.py +54 -0
- flowllm/utils/miner_u_pdf_processor.py +726 -0
- {flowllm-0.1.3.dist-info → flowllm-0.1.5.dist-info}/METADATA +7 -6
- flowllm-0.1.5.dist-info/RECORD +98 -0
- flowllm/config/default.yaml +0 -77
- flowllm/config/empty.yaml +0 -37
- flowllm/flow/gallery/cmd_flow.py +0 -11
- flowllm/flow/gallery/code_tool_flow.py +0 -30
- flowllm/flow/gallery/dashscope_search_tool_flow.py +0 -34
- flowllm/flow/gallery/deepsearch_tool_flow.py +0 -39
- flowllm/flow/gallery/expression_tool_flow.py +0 -18
- flowllm/flow/gallery/tavily_search_tool_flow.py +0 -30
- flowllm/flow/gallery/terminate_tool_flow.py +0 -30
- flowllm/flow/parser/__init__.py +0 -0
- flowllm/op/agent/__init__.py +0 -1
- flowllm/op/agent/react_v1_op.py +0 -109
- flowllm/op/agent/react_v1_prompt.yaml +0 -54
- flowllm/op/base_ray_op.py +0 -313
- flowllm/op/code/__init__.py +0 -1
- flowllm/op/code/execute_code_op.py +0 -42
- flowllm/op/gallery/terminate_op.py +0 -29
- flowllm/op/search/dashscope_deep_research_op.py +0 -267
- flowllm/service/cmd_service.py +0 -15
- flowllm-0.1.3.dist-info/RECORD +0 -102
- /flowllm/op/{agent/react_v2_prompt.yaml → llm/react_llm_prompt.yaml} +0 -0
- {flowllm-0.1.3.dist-info → flowllm-0.1.5.dist-info}/WHEEL +0 -0
- {flowllm-0.1.3.dist-info → flowllm-0.1.5.dist-info}/entry_points.txt +0 -0
- {flowllm-0.1.3.dist-info → flowllm-0.1.5.dist-info}/licenses/LICENSE +0 -0
- {flowllm-0.1.3.dist-info → flowllm-0.1.5.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,12 @@
|
|
1
|
-
|
1
|
+
import asyncio
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
from functools import partial
|
2
4
|
from pathlib import Path
|
3
|
-
from typing import List, Iterable
|
5
|
+
from typing import List, Iterable, Dict, Any, Optional
|
4
6
|
|
5
7
|
from pydantic import BaseModel, Field
|
6
8
|
|
9
|
+
from flowllm.context.service_context import C
|
7
10
|
from flowllm.embedding_model.base_embedding_model import BaseEmbeddingModel
|
8
11
|
from flowllm.schema.vector_node import VectorNode
|
9
12
|
|
@@ -12,36 +15,117 @@ class BaseVectorStore(BaseModel, ABC):
|
|
12
15
|
embedding_model: BaseEmbeddingModel | None = Field(default=None)
|
13
16
|
batch_size: int = Field(default=1024)
|
14
17
|
|
18
|
+
@abstractmethod
|
15
19
|
def exist_workspace(self, workspace_id: str, **kwargs) -> bool:
|
20
|
+
"""Check if a workspace exists in the vector store."""
|
16
21
|
raise NotImplementedError
|
17
22
|
|
18
|
-
|
23
|
+
@abstractmethod
|
24
|
+
def delete_workspace(self, workspace_id: str, **kwargs) -> None:
|
25
|
+
"""Delete a workspace from the vector store."""
|
19
26
|
raise NotImplementedError
|
20
27
|
|
21
|
-
|
28
|
+
@abstractmethod
|
29
|
+
def create_workspace(self, workspace_id: str, **kwargs) -> None:
|
30
|
+
"""Create a new workspace in the vector store."""
|
22
31
|
raise NotImplementedError
|
23
32
|
|
24
|
-
|
33
|
+
@abstractmethod
|
34
|
+
def iter_workspace_nodes(self, workspace_id: str, callback_fn=None, **kwargs) -> Iterable[VectorNode]:
|
35
|
+
"""Iterate over all nodes in a workspace."""
|
25
36
|
raise NotImplementedError
|
26
37
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
def dump_workspace(self, workspace_id: str, path: str | Path = "", callback_fn=None, **kwargs):
|
38
|
+
@abstractmethod
|
39
|
+
def dump_workspace(self, workspace_id: str, path: str | Path = "", callback_fn=None, **kwargs) -> None:
|
40
|
+
"""Dump workspace data to a file or path."""
|
31
41
|
raise NotImplementedError
|
32
42
|
|
33
|
-
|
34
|
-
|
43
|
+
@abstractmethod
|
44
|
+
def load_workspace(self, workspace_id: str, path: str | Path = "", nodes: Optional[List[VectorNode]] = None,
|
45
|
+
callback_fn=None, **kwargs) -> None:
|
46
|
+
"""Load workspace data from a file or path, or from provided nodes."""
|
35
47
|
raise NotImplementedError
|
36
48
|
|
37
|
-
|
49
|
+
@abstractmethod
|
50
|
+
def copy_workspace(self, src_workspace_id: str, dest_workspace_id: str, **kwargs) -> None:
|
51
|
+
"""Copy one workspace to another."""
|
38
52
|
raise NotImplementedError
|
39
53
|
|
40
|
-
|
54
|
+
@abstractmethod
|
55
|
+
def search(self, query: str, workspace_id: str, top_k: int = 1, filter_dict: Optional[Dict[str, Any]] = None,
|
56
|
+
**kwargs) -> List[VectorNode]:
|
57
|
+
"""Search for similar vectors in the workspace."""
|
41
58
|
raise NotImplementedError
|
42
59
|
|
43
|
-
|
60
|
+
@abstractmethod
|
61
|
+
def insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, **kwargs) -> None:
|
62
|
+
"""Insert nodes into the workspace."""
|
44
63
|
raise NotImplementedError
|
45
64
|
|
46
|
-
|
65
|
+
@abstractmethod
|
66
|
+
def delete(self, node_ids: str | List[str], workspace_id: str, **kwargs) -> None:
|
67
|
+
"""Delete nodes from the workspace by their IDs."""
|
47
68
|
raise NotImplementedError
|
69
|
+
|
70
|
+
def close(self) -> None:
|
71
|
+
"""Close the vector store and clean up resources. Default implementation does nothing."""
|
72
|
+
pass
|
73
|
+
|
74
|
+
"""
|
75
|
+
Async versions of all methods
|
76
|
+
"""
|
77
|
+
|
78
|
+
async def async_exist_workspace(self, workspace_id: str, **kwargs) -> bool:
|
79
|
+
"""Async version of exist_workspace."""
|
80
|
+
loop = asyncio.get_event_loop()
|
81
|
+
return await loop.run_in_executor(C.thread_pool, partial(self.exist_workspace, workspace_id, **kwargs))
|
82
|
+
|
83
|
+
async def async_delete_workspace(self, workspace_id: str, **kwargs) -> None:
|
84
|
+
"""Async version of delete_workspace."""
|
85
|
+
loop = asyncio.get_event_loop()
|
86
|
+
return await loop.run_in_executor(C.thread_pool, partial(self.delete_workspace, workspace_id, **kwargs))
|
87
|
+
|
88
|
+
async def async_create_workspace(self, workspace_id: str, **kwargs) -> None:
|
89
|
+
"""Async version of create_workspace."""
|
90
|
+
loop = asyncio.get_event_loop()
|
91
|
+
return await loop.run_in_executor(C.thread_pool, partial(self.create_workspace, workspace_id, **kwargs))
|
92
|
+
|
93
|
+
async def async_iter_workspace_nodes(self, workspace_id: str, callback_fn=None, **kwargs) -> Iterable[VectorNode]:
|
94
|
+
"""Async version of iter_workspace_nodes. Returns an iterable, not an async iterator."""
|
95
|
+
loop = asyncio.get_event_loop()
|
96
|
+
return await loop.run_in_executor(C.thread_pool, partial(self.iter_workspace_nodes, workspace_id,
|
97
|
+
callback_fn, **kwargs))
|
98
|
+
|
99
|
+
async def async_dump_workspace(self, workspace_id: str, path: str | Path = "", callback_fn=None, **kwargs):
|
100
|
+
loop = asyncio.get_event_loop()
|
101
|
+
return await loop.run_in_executor(C.thread_pool, partial(self.dump_workspace, workspace_id, path,
|
102
|
+
callback_fn, **kwargs))
|
103
|
+
|
104
|
+
async def async_load_workspace(self, workspace_id: str, path: str | Path = "", nodes: List[VectorNode] = None,
|
105
|
+
callback_fn=None, **kwargs):
|
106
|
+
loop = asyncio.get_event_loop()
|
107
|
+
return await loop.run_in_executor(C.thread_pool, partial(self.load_workspace, workspace_id, path, nodes,
|
108
|
+
callback_fn, **kwargs))
|
109
|
+
|
110
|
+
async def async_copy_workspace(self, src_workspace_id: str, dest_workspace_id: str, **kwargs):
|
111
|
+
loop = asyncio.get_event_loop()
|
112
|
+
return await loop.run_in_executor(C.thread_pool, partial(self.copy_workspace, src_workspace_id,
|
113
|
+
dest_workspace_id, **kwargs))
|
114
|
+
|
115
|
+
async def async_search(self, query: str, workspace_id: str, top_k: int = 1, filter_dict: dict = None,
|
116
|
+
**kwargs) -> List[VectorNode]:
|
117
|
+
loop = asyncio.get_event_loop()
|
118
|
+
return await loop.run_in_executor(C.thread_pool, partial(self.search, query, workspace_id, top_k,
|
119
|
+
filter_dict, **kwargs))
|
120
|
+
|
121
|
+
async def async_insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, **kwargs):
|
122
|
+
loop = asyncio.get_event_loop()
|
123
|
+
return await loop.run_in_executor(C.thread_pool, partial(self.insert, nodes, workspace_id, **kwargs))
|
124
|
+
|
125
|
+
async def async_delete(self, node_ids: str | List[str], workspace_id: str, **kwargs):
|
126
|
+
loop = asyncio.get_event_loop()
|
127
|
+
return await loop.run_in_executor(C.thread_pool, partial(self.delete, node_ids, workspace_id, **kwargs))
|
128
|
+
|
129
|
+
async def async_close(self):
|
130
|
+
loop = asyncio.get_event_loop()
|
131
|
+
return await loop.run_in_executor(C.thread_pool, self.close)
|
@@ -1,4 +1,7 @@
|
|
1
|
-
|
1
|
+
import asyncio
|
2
|
+
import os
|
3
|
+
from functools import partial
|
4
|
+
from typing import List, Iterable, Dict, Any, Optional
|
2
5
|
|
3
6
|
import chromadb
|
4
7
|
from chromadb import Collection
|
@@ -6,6 +9,9 @@ from chromadb.config import Settings
|
|
6
9
|
from loguru import logger
|
7
10
|
from pydantic import Field, PrivateAttr, model_validator
|
8
11
|
|
12
|
+
# Disable ChromaDB telemetry to avoid PostHog warnings
|
13
|
+
os.environ.setdefault("ANONYMIZED_TELEMETRY", "false")
|
14
|
+
|
9
15
|
from flowllm.context.service_context import C
|
10
16
|
from flowllm.schema.vector_node import VectorNode
|
11
17
|
from flowllm.storage.vector_store.local_vector_store import LocalVectorStore
|
@@ -19,7 +25,12 @@ class ChromaVectorStore(LocalVectorStore):
|
|
19
25
|
|
20
26
|
@model_validator(mode="after")
|
21
27
|
def init_client(self):
|
22
|
-
|
28
|
+
# Disable telemetry to avoid PostHog warnings
|
29
|
+
settings = Settings(
|
30
|
+
persist_directory=self.store_dir,
|
31
|
+
anonymized_telemetry=False
|
32
|
+
)
|
33
|
+
self._client = chromadb.Client(settings)
|
23
34
|
return self
|
24
35
|
|
25
36
|
def _get_collection(self, workspace_id: str) -> Collection:
|
@@ -38,7 +49,8 @@ class ChromaVectorStore(LocalVectorStore):
|
|
38
49
|
def create_workspace(self, workspace_id: str, **kwargs):
|
39
50
|
self.collections[workspace_id] = self._client.get_or_create_collection(workspace_id)
|
40
51
|
|
41
|
-
def
|
52
|
+
def iter_workspace_nodes(self, workspace_id: str, callback_fn=None, **kwargs) -> Iterable[VectorNode]:
|
53
|
+
"""Iterate over all nodes in a workspace."""
|
42
54
|
collection: Collection = self._get_collection(workspace_id)
|
43
55
|
results = collection.get()
|
44
56
|
for i in range(len(results["ids"])):
|
@@ -46,23 +58,69 @@ class ChromaVectorStore(LocalVectorStore):
|
|
46
58
|
unique_id=results["ids"][i],
|
47
59
|
content=results["documents"][i],
|
48
60
|
metadata=results["metadatas"][i])
|
49
|
-
|
50
|
-
|
51
|
-
|
61
|
+
if callback_fn:
|
62
|
+
yield callback_fn(node)
|
63
|
+
else:
|
64
|
+
yield node
|
65
|
+
|
66
|
+
@staticmethod
|
67
|
+
def _build_chroma_filters(filter_dict: Optional[Dict[str, Any]] = None) -> Optional[Dict]:
|
68
|
+
"""Build ChromaDB where clause from filter_dict"""
|
69
|
+
if not filter_dict:
|
70
|
+
return None
|
71
|
+
|
72
|
+
where_conditions = {}
|
73
|
+
for key, filter_value in filter_dict.items():
|
74
|
+
if isinstance(filter_value, dict):
|
75
|
+
# Range filter: {"gte": 1, "lte": 10}
|
76
|
+
range_conditions = {}
|
77
|
+
if "gte" in filter_value:
|
78
|
+
range_conditions["$gte"] = filter_value["gte"]
|
79
|
+
if "lte" in filter_value:
|
80
|
+
range_conditions["$lte"] = filter_value["lte"]
|
81
|
+
if "gt" in filter_value:
|
82
|
+
range_conditions["$gt"] = filter_value["gt"]
|
83
|
+
if "lt" in filter_value:
|
84
|
+
range_conditions["$lt"] = filter_value["lt"]
|
85
|
+
if range_conditions:
|
86
|
+
where_conditions[key] = range_conditions
|
87
|
+
else:
|
88
|
+
# Term filter: direct value comparison
|
89
|
+
where_conditions[key] = filter_value
|
90
|
+
|
91
|
+
return where_conditions if where_conditions else None
|
92
|
+
|
93
|
+
def search(self, query: str, workspace_id: str, top_k: int = 1, filter_dict: Optional[Dict[str, Any]] = None,
|
94
|
+
**kwargs) -> List[VectorNode]:
|
52
95
|
if not self.exist_workspace(workspace_id=workspace_id):
|
53
96
|
logger.warning(f"workspace_id={workspace_id} is not exists!")
|
54
97
|
return []
|
55
98
|
|
56
99
|
collection: Collection = self._get_collection(workspace_id)
|
57
100
|
query_vector = self.embedding_model.get_embeddings(query)
|
58
|
-
|
101
|
+
|
102
|
+
# Build where clause from filter_dict
|
103
|
+
where_clause = self._build_chroma_filters(filter_dict)
|
104
|
+
|
105
|
+
results = collection.query(
|
106
|
+
query_embeddings=[query_vector],
|
107
|
+
n_results=top_k,
|
108
|
+
where=where_clause
|
109
|
+
)
|
110
|
+
|
59
111
|
nodes = []
|
60
112
|
for i in range(len(results["ids"][0])):
|
61
113
|
node = VectorNode(workspace_id=workspace_id,
|
62
114
|
unique_id=results["ids"][0][i],
|
63
115
|
content=results["documents"][0][i],
|
64
116
|
metadata=results["metadatas"][0][i])
|
117
|
+
# ChromaDB returns distances, convert to similarity score
|
118
|
+
if results.get("distances") and len(results["distances"][0]) > i:
|
119
|
+
distance = results["distances"][0][i]
|
120
|
+
# Convert distance to similarity (assuming cosine distance)
|
121
|
+
node.metadata["score"] = 1.0 - distance
|
65
122
|
nodes.append(node)
|
123
|
+
|
66
124
|
return nodes
|
67
125
|
|
68
126
|
def insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, **kwargs):
|
@@ -95,6 +153,85 @@ class ChromaVectorStore(LocalVectorStore):
|
|
95
153
|
collection.delete(ids=node_ids)
|
96
154
|
|
97
155
|
|
156
|
+
async def async_search(self, query: str, workspace_id: str, top_k: int = 1,
|
157
|
+
filter_dict: Optional[Dict[str, Any]] = None, **kwargs) -> List[VectorNode]:
|
158
|
+
"""Async version of search using async embedding and run_in_executor for ChromaDB operations"""
|
159
|
+
if not await self.async_exist_workspace(workspace_id=workspace_id):
|
160
|
+
logger.warning(f"workspace_id={workspace_id} is not exists!")
|
161
|
+
return []
|
162
|
+
|
163
|
+
# Use async embedding
|
164
|
+
query_vector = await self.embedding_model.get_embeddings_async(query)
|
165
|
+
|
166
|
+
# Build where clause from filter_dict
|
167
|
+
where_clause = self._build_chroma_filters(filter_dict)
|
168
|
+
|
169
|
+
# Execute ChromaDB query in thread pool
|
170
|
+
loop = asyncio.get_event_loop()
|
171
|
+
collection = await loop.run_in_executor(C.thread_pool, self._get_collection, workspace_id)
|
172
|
+
results = await loop.run_in_executor(
|
173
|
+
C.thread_pool,
|
174
|
+
partial(collection.query, query_embeddings=[query_vector], n_results=top_k, where=where_clause)
|
175
|
+
)
|
176
|
+
|
177
|
+
nodes = []
|
178
|
+
for i in range(len(results["ids"][0])):
|
179
|
+
node = VectorNode(workspace_id=workspace_id,
|
180
|
+
unique_id=results["ids"][0][i],
|
181
|
+
content=results["documents"][0][i],
|
182
|
+
metadata=results["metadatas"][0][i])
|
183
|
+
# ChromaDB returns distances, convert to similarity score
|
184
|
+
if results.get("distances") and len(results["distances"][0]) > i:
|
185
|
+
distance = results["distances"][0][i]
|
186
|
+
# Convert distance to similarity (assuming cosine distance)
|
187
|
+
node.metadata["score"] = 1.0 - distance
|
188
|
+
nodes.append(node)
|
189
|
+
|
190
|
+
return nodes
|
191
|
+
|
192
|
+
async def async_insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, **kwargs):
|
193
|
+
"""Async version of insert using async embedding and run_in_executor for ChromaDB operations"""
|
194
|
+
if not await self.async_exist_workspace(workspace_id=workspace_id):
|
195
|
+
await self.async_create_workspace(workspace_id=workspace_id)
|
196
|
+
|
197
|
+
if isinstance(nodes, VectorNode):
|
198
|
+
nodes = [nodes]
|
199
|
+
|
200
|
+
embedded_nodes = [node for node in nodes if node.vector]
|
201
|
+
not_embedded_nodes = [node for node in nodes if not node.vector]
|
202
|
+
|
203
|
+
# Use async embedding
|
204
|
+
now_embedded_nodes = await self.embedding_model.get_node_embeddings_async(not_embedded_nodes)
|
205
|
+
|
206
|
+
all_nodes = embedded_nodes + now_embedded_nodes
|
207
|
+
|
208
|
+
# Execute ChromaDB operations in thread pool
|
209
|
+
loop = asyncio.get_event_loop()
|
210
|
+
collection = await loop.run_in_executor(C.thread_pool, self._get_collection, workspace_id)
|
211
|
+
await loop.run_in_executor(
|
212
|
+
C.thread_pool,
|
213
|
+
partial(collection.add,
|
214
|
+
ids=[n.unique_id for n in all_nodes],
|
215
|
+
embeddings=[n.vector for n in all_nodes],
|
216
|
+
documents=[n.content for n in all_nodes],
|
217
|
+
metadatas=[n.metadata for n in all_nodes])
|
218
|
+
)
|
219
|
+
|
220
|
+
async def async_delete(self, node_ids: str | List[str], workspace_id: str, **kwargs):
|
221
|
+
"""Async version of delete using run_in_executor for ChromaDB operations"""
|
222
|
+
if not await self.async_exist_workspace(workspace_id=workspace_id):
|
223
|
+
logger.warning(f"workspace_id={workspace_id} is not exists!")
|
224
|
+
return
|
225
|
+
|
226
|
+
if isinstance(node_ids, str):
|
227
|
+
node_ids = [node_ids]
|
228
|
+
|
229
|
+
# Execute ChromaDB operations in thread pool
|
230
|
+
loop = asyncio.get_event_loop()
|
231
|
+
collection = await loop.run_in_executor(C.thread_pool, self._get_collection, workspace_id)
|
232
|
+
await loop.run_in_executor(C.thread_pool, partial(collection.delete, ids=node_ids))
|
233
|
+
|
234
|
+
|
98
235
|
def main():
|
99
236
|
from flowllm.utils.common_utils import load_env
|
100
237
|
from flowllm.embedding_model import OpenAICompatibleEmbeddingModel
|
@@ -160,6 +297,15 @@ def main():
|
|
160
297
|
logger.info(r.model_dump(exclude={"vector"}))
|
161
298
|
logger.info("=" * 20)
|
162
299
|
|
300
|
+
# Test filter_dict
|
301
|
+
logger.info("=" * 20 + " FILTER TEST " + "=" * 20)
|
302
|
+
filter_dict = {"node_type": "n1"}
|
303
|
+
results = chroma_store.search("What is AI?", top_k=5, workspace_id=workspace_id, filter_dict=filter_dict)
|
304
|
+
logger.info(f"Filtered results (node_type=n1): {len(results)} results")
|
305
|
+
for r in results:
|
306
|
+
logger.info(r.model_dump(exclude={"vector"}))
|
307
|
+
logger.info("=" * 20)
|
308
|
+
|
163
309
|
node2_update = VectorNode(
|
164
310
|
unique_id="node2",
|
165
311
|
workspace_id=workspace_id,
|
@@ -184,6 +330,102 @@ def main():
|
|
184
330
|
chroma_store.delete_workspace(workspace_id=workspace_id)
|
185
331
|
|
186
332
|
|
333
|
+
async def async_main():
|
334
|
+
from flowllm.utils.common_utils import load_env
|
335
|
+
from flowllm.embedding_model import OpenAICompatibleEmbeddingModel
|
336
|
+
|
337
|
+
load_env()
|
338
|
+
|
339
|
+
embedding_model = OpenAICompatibleEmbeddingModel(dimensions=64, model_name="text-embedding-v4")
|
340
|
+
workspace_id = "chroma_async_test_index"
|
341
|
+
|
342
|
+
chroma_store = ChromaVectorStore(
|
343
|
+
embedding_model=embedding_model,
|
344
|
+
store_dir="./async_chroma_async_test_db"
|
345
|
+
)
|
346
|
+
|
347
|
+
# Clean up and create workspace
|
348
|
+
if await chroma_store.async_exist_workspace(workspace_id):
|
349
|
+
await chroma_store.async_delete_workspace(workspace_id)
|
350
|
+
await chroma_store.async_create_workspace(workspace_id)
|
351
|
+
|
352
|
+
sample_nodes = [
|
353
|
+
VectorNode(
|
354
|
+
unique_id="async_node1",
|
355
|
+
workspace_id=workspace_id,
|
356
|
+
content="Artificial intelligence is a technology that simulates human intelligence.",
|
357
|
+
metadata={
|
358
|
+
"node_type": "n1",
|
359
|
+
"category": "tech"
|
360
|
+
}
|
361
|
+
),
|
362
|
+
VectorNode(
|
363
|
+
unique_id="async_node2",
|
364
|
+
workspace_id=workspace_id,
|
365
|
+
content="AI is the future of mankind.",
|
366
|
+
metadata={
|
367
|
+
"node_type": "n1",
|
368
|
+
"category": "tech"
|
369
|
+
}
|
370
|
+
),
|
371
|
+
VectorNode(
|
372
|
+
unique_id="async_node3",
|
373
|
+
workspace_id=workspace_id,
|
374
|
+
content="I want to eat fish!",
|
375
|
+
metadata={
|
376
|
+
"node_type": "n2",
|
377
|
+
"category": "food"
|
378
|
+
}
|
379
|
+
),
|
380
|
+
VectorNode(
|
381
|
+
unique_id="async_node4",
|
382
|
+
workspace_id=workspace_id,
|
383
|
+
content="The bigger the storm, the more expensive the fish.",
|
384
|
+
metadata={
|
385
|
+
"node_type": "n1",
|
386
|
+
"category": "food"
|
387
|
+
}
|
388
|
+
),
|
389
|
+
]
|
390
|
+
|
391
|
+
# Test async insert
|
392
|
+
await chroma_store.async_insert(sample_nodes, workspace_id=workspace_id)
|
393
|
+
|
394
|
+
logger.info("ASYNC TEST - " + "=" * 20)
|
395
|
+
# Test async search
|
396
|
+
results = await chroma_store.async_search("What is AI?", top_k=5, workspace_id=workspace_id)
|
397
|
+
for r in results:
|
398
|
+
logger.info(r.model_dump(exclude={"vector"}))
|
399
|
+
logger.info("=" * 20)
|
400
|
+
|
401
|
+
# Test async update (delete + insert)
|
402
|
+
node2_update = VectorNode(
|
403
|
+
unique_id="async_node2",
|
404
|
+
workspace_id=workspace_id,
|
405
|
+
content="AI is the future of humanity and technology.",
|
406
|
+
metadata={
|
407
|
+
"node_type": "n1",
|
408
|
+
"category": "tech",
|
409
|
+
"updated": True
|
410
|
+
}
|
411
|
+
)
|
412
|
+
await chroma_store.async_delete(node2_update.unique_id, workspace_id=workspace_id)
|
413
|
+
await chroma_store.async_insert(node2_update, workspace_id=workspace_id)
|
414
|
+
|
415
|
+
logger.info("ASYNC Updated Result:")
|
416
|
+
results = await chroma_store.async_search("fish?", top_k=10, workspace_id=workspace_id)
|
417
|
+
for r in results:
|
418
|
+
logger.info(r.model_dump(exclude={"vector"}))
|
419
|
+
logger.info("=" * 20)
|
420
|
+
|
421
|
+
# Clean up
|
422
|
+
await chroma_store.async_dump_workspace(workspace_id=workspace_id)
|
423
|
+
await chroma_store.async_delete_workspace(workspace_id=workspace_id)
|
424
|
+
|
425
|
+
|
187
426
|
if __name__ == "__main__":
|
188
427
|
main()
|
189
|
-
|
428
|
+
|
429
|
+
# Run async test
|
430
|
+
logger.info("\n" + "=" * 50 + " ASYNC TESTS " + "=" * 50)
|
431
|
+
# asyncio.run(async_main())
|