flowllm 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. flowllm/__init__.py +4 -3
  2. flowllm/app.py +1 -1
  3. flowllm/config/base.yaml +75 -0
  4. flowllm/config/fin_supply.yaml +39 -0
  5. flowllm/config/pydantic_config_parser.py +16 -1
  6. flowllm/context/__init__.py +2 -0
  7. flowllm/context/base_context.py +10 -20
  8. flowllm/context/flow_context.py +45 -2
  9. flowllm/context/service_context.py +69 -10
  10. flowllm/embedding_model/openai_compatible_embedding_model.py +1 -2
  11. flowllm/enumeration/chunk_enum.py +1 -0
  12. flowllm/flow/__init__.py +9 -0
  13. flowllm/flow/base_flow.py +44 -13
  14. flowllm/flow/expression/__init__.py +1 -0
  15. flowllm/flow/{parser → expression}/expression_parser.py +5 -2
  16. flowllm/flow/expression/expression_tool_flow.py +25 -0
  17. flowllm/flow/gallery/__init__.py +1 -8
  18. flowllm/flow/gallery/mock_tool_flow.py +46 -28
  19. flowllm/flow/tool_op_flow.py +97 -0
  20. flowllm/llm/base_llm.py +0 -2
  21. flowllm/op/__init__.py +3 -4
  22. flowllm/op/akshare/get_ak_a_code_op.py +1 -1
  23. flowllm/op/akshare/get_ak_a_info_op.py +1 -1
  24. flowllm/op/base_op.py +232 -16
  25. flowllm/op/base_tool_op.py +47 -0
  26. flowllm/op/gallery/__init__.py +0 -1
  27. flowllm/op/gallery/mock_op.py +13 -7
  28. flowllm/op/llm/__init__.py +3 -0
  29. flowllm/op/{agent/react_v2_op.py → llm/react_llm_op.py} +43 -24
  30. flowllm/op/llm/simple_llm_op.py +48 -0
  31. flowllm/op/llm/stream_llm_op.py +61 -0
  32. flowllm/op/mcp/__init__.py +2 -0
  33. flowllm/op/mcp/ant_op.py +42 -0
  34. flowllm/op/mcp/base_sse_mcp_op.py +28 -0
  35. flowllm/op/parallel_op.py +5 -1
  36. flowllm/op/search/__init__.py +1 -2
  37. flowllm/op/search/dashscope_search_op.py +73 -128
  38. flowllm/op/search/tavily_search_op.py +64 -82
  39. flowllm/op/sequential_op.py +4 -0
  40. flowllm/schema/flow_stream_chunk.py +11 -0
  41. flowllm/schema/service_config.py +8 -3
  42. flowllm/schema/tool_call.py +46 -1
  43. flowllm/service/__init__.py +0 -1
  44. flowllm/service/base_service.py +31 -14
  45. flowllm/service/http_service.py +45 -36
  46. flowllm/service/mcp_service.py +17 -23
  47. flowllm/storage/vector_store/__init__.py +1 -0
  48. flowllm/storage/vector_store/base_vector_store.py +99 -15
  49. flowllm/storage/vector_store/chroma_vector_store.py +250 -8
  50. flowllm/storage/vector_store/es_vector_store.py +288 -32
  51. flowllm/storage/vector_store/local_vector_store.py +206 -9
  52. flowllm/storage/vector_store/memory_vector_store.py +509 -0
  53. flowllm/utils/common_utils.py +54 -0
  54. flowllm/utils/miner_u_pdf_processor.py +726 -0
  55. {flowllm-0.1.3.dist-info → flowllm-0.1.5.dist-info}/METADATA +7 -6
  56. flowllm-0.1.5.dist-info/RECORD +98 -0
  57. flowllm/config/default.yaml +0 -77
  58. flowllm/config/empty.yaml +0 -37
  59. flowllm/flow/gallery/cmd_flow.py +0 -11
  60. flowllm/flow/gallery/code_tool_flow.py +0 -30
  61. flowllm/flow/gallery/dashscope_search_tool_flow.py +0 -34
  62. flowllm/flow/gallery/deepsearch_tool_flow.py +0 -39
  63. flowllm/flow/gallery/expression_tool_flow.py +0 -18
  64. flowllm/flow/gallery/tavily_search_tool_flow.py +0 -30
  65. flowllm/flow/gallery/terminate_tool_flow.py +0 -30
  66. flowllm/flow/parser/__init__.py +0 -0
  67. flowllm/op/agent/__init__.py +0 -1
  68. flowllm/op/agent/react_v1_op.py +0 -109
  69. flowllm/op/agent/react_v1_prompt.yaml +0 -54
  70. flowllm/op/base_ray_op.py +0 -313
  71. flowllm/op/code/__init__.py +0 -1
  72. flowllm/op/code/execute_code_op.py +0 -42
  73. flowllm/op/gallery/terminate_op.py +0 -29
  74. flowllm/op/search/dashscope_deep_research_op.py +0 -267
  75. flowllm/service/cmd_service.py +0 -15
  76. flowllm-0.1.3.dist-info/RECORD +0 -102
  77. /flowllm/op/{agent/react_v2_prompt.yaml → llm/react_llm_prompt.yaml} +0 -0
  78. {flowllm-0.1.3.dist-info → flowllm-0.1.5.dist-info}/WHEEL +0 -0
  79. {flowllm-0.1.3.dist-info → flowllm-0.1.5.dist-info}/entry_points.txt +0 -0
  80. {flowllm-0.1.3.dist-info → flowllm-0.1.5.dist-info}/licenses/LICENSE +0 -0
  81. {flowllm-0.1.3.dist-info → flowllm-0.1.5.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,12 @@
1
- from abc import ABC
1
+ import asyncio
2
+ from abc import ABC, abstractmethod
3
+ from functools import partial
2
4
  from pathlib import Path
3
- from typing import List, Iterable
5
+ from typing import List, Iterable, Dict, Any, Optional
4
6
 
5
7
  from pydantic import BaseModel, Field
6
8
 
9
+ from flowllm.context.service_context import C
7
10
  from flowllm.embedding_model.base_embedding_model import BaseEmbeddingModel
8
11
  from flowllm.schema.vector_node import VectorNode
9
12
 
@@ -12,36 +15,117 @@ class BaseVectorStore(BaseModel, ABC):
12
15
  embedding_model: BaseEmbeddingModel | None = Field(default=None)
13
16
  batch_size: int = Field(default=1024)
14
17
 
18
+ @abstractmethod
15
19
  def exist_workspace(self, workspace_id: str, **kwargs) -> bool:
20
+ """Check if a workspace exists in the vector store."""
16
21
  raise NotImplementedError
17
22
 
18
- def delete_workspace(self, workspace_id: str, **kwargs):
23
+ @abstractmethod
24
+ def delete_workspace(self, workspace_id: str, **kwargs) -> None:
25
+ """Delete a workspace from the vector store."""
19
26
  raise NotImplementedError
20
27
 
21
- def create_workspace(self, workspace_id: str, **kwargs):
28
+ @abstractmethod
29
+ def create_workspace(self, workspace_id: str, **kwargs) -> None:
30
+ """Create a new workspace in the vector store."""
22
31
  raise NotImplementedError
23
32
 
24
- def _iter_workspace_nodes(self, workspace_id: str, **kwargs) -> Iterable[VectorNode]:
33
+ @abstractmethod
34
+ def iter_workspace_nodes(self, workspace_id: str, callback_fn=None, **kwargs) -> Iterable[VectorNode]:
35
+ """Iterate over all nodes in a workspace."""
25
36
  raise NotImplementedError
26
37
 
27
- def iter_workspace_nodes(self, workspace_id: str, **kwargs) -> Iterable[VectorNode]:
28
- return self._iter_workspace_nodes(workspace_id, **kwargs)
29
-
30
- def dump_workspace(self, workspace_id: str, path: str | Path = "", callback_fn=None, **kwargs):
38
+ @abstractmethod
39
+ def dump_workspace(self, workspace_id: str, path: str | Path = "", callback_fn=None, **kwargs) -> None:
40
+ """Dump workspace data to a file or path."""
31
41
  raise NotImplementedError
32
42
 
33
- def load_workspace(self, workspace_id: str, path: str | Path = "", nodes: List[VectorNode] = None, callback_fn=None,
34
- **kwargs):
43
+ @abstractmethod
44
+ def load_workspace(self, workspace_id: str, path: str | Path = "", nodes: Optional[List[VectorNode]] = None,
45
+ callback_fn=None, **kwargs) -> None:
46
+ """Load workspace data from a file or path, or from provided nodes."""
35
47
  raise NotImplementedError
36
48
 
37
- def copy_workspace(self, src_workspace_id: str, dest_workspace_id: str, **kwargs):
49
+ @abstractmethod
50
+ def copy_workspace(self, src_workspace_id: str, dest_workspace_id: str, **kwargs) -> None:
51
+ """Copy one workspace to another."""
38
52
  raise NotImplementedError
39
53
 
40
- def search(self, query: str, workspace_id: str, top_k: int = 1, **kwargs) -> List[VectorNode]:
54
+ @abstractmethod
55
+ def search(self, query: str, workspace_id: str, top_k: int = 1, filter_dict: Optional[Dict[str, Any]] = None,
56
+ **kwargs) -> List[VectorNode]:
57
+ """Search for similar vectors in the workspace."""
41
58
  raise NotImplementedError
42
59
 
43
- def insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, **kwargs):
60
+ @abstractmethod
61
+ def insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, **kwargs) -> None:
62
+ """Insert nodes into the workspace."""
44
63
  raise NotImplementedError
45
64
 
46
- def delete(self, node_ids: str | List[str], workspace_id: str, **kwargs):
65
+ @abstractmethod
66
+ def delete(self, node_ids: str | List[str], workspace_id: str, **kwargs) -> None:
67
+ """Delete nodes from the workspace by their IDs."""
47
68
  raise NotImplementedError
69
+
70
+ def close(self) -> None:
71
+ """Close the vector store and clean up resources. Default implementation does nothing."""
72
+ pass
73
+
74
+ """
75
+ Async versions of all methods
76
+ """
77
+
78
+ async def async_exist_workspace(self, workspace_id: str, **kwargs) -> bool:
79
+ """Async version of exist_workspace."""
80
+ loop = asyncio.get_event_loop()
81
+ return await loop.run_in_executor(C.thread_pool, partial(self.exist_workspace, workspace_id, **kwargs))
82
+
83
+ async def async_delete_workspace(self, workspace_id: str, **kwargs) -> None:
84
+ """Async version of delete_workspace."""
85
+ loop = asyncio.get_event_loop()
86
+ return await loop.run_in_executor(C.thread_pool, partial(self.delete_workspace, workspace_id, **kwargs))
87
+
88
+ async def async_create_workspace(self, workspace_id: str, **kwargs) -> None:
89
+ """Async version of create_workspace."""
90
+ loop = asyncio.get_event_loop()
91
+ return await loop.run_in_executor(C.thread_pool, partial(self.create_workspace, workspace_id, **kwargs))
92
+
93
+ async def async_iter_workspace_nodes(self, workspace_id: str, callback_fn=None, **kwargs) -> Iterable[VectorNode]:
94
+ """Async version of iter_workspace_nodes. Returns an iterable, not an async iterator."""
95
+ loop = asyncio.get_event_loop()
96
+ return await loop.run_in_executor(C.thread_pool, partial(self.iter_workspace_nodes, workspace_id,
97
+ callback_fn, **kwargs))
98
+
99
+ async def async_dump_workspace(self, workspace_id: str, path: str | Path = "", callback_fn=None, **kwargs):
100
+ loop = asyncio.get_event_loop()
101
+ return await loop.run_in_executor(C.thread_pool, partial(self.dump_workspace, workspace_id, path,
102
+ callback_fn, **kwargs))
103
+
104
+ async def async_load_workspace(self, workspace_id: str, path: str | Path = "", nodes: List[VectorNode] = None,
105
+ callback_fn=None, **kwargs):
106
+ loop = asyncio.get_event_loop()
107
+ return await loop.run_in_executor(C.thread_pool, partial(self.load_workspace, workspace_id, path, nodes,
108
+ callback_fn, **kwargs))
109
+
110
+ async def async_copy_workspace(self, src_workspace_id: str, dest_workspace_id: str, **kwargs):
111
+ loop = asyncio.get_event_loop()
112
+ return await loop.run_in_executor(C.thread_pool, partial(self.copy_workspace, src_workspace_id,
113
+ dest_workspace_id, **kwargs))
114
+
115
+ async def async_search(self, query: str, workspace_id: str, top_k: int = 1, filter_dict: dict = None,
116
+ **kwargs) -> List[VectorNode]:
117
+ loop = asyncio.get_event_loop()
118
+ return await loop.run_in_executor(C.thread_pool, partial(self.search, query, workspace_id, top_k,
119
+ filter_dict, **kwargs))
120
+
121
+ async def async_insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, **kwargs):
122
+ loop = asyncio.get_event_loop()
123
+ return await loop.run_in_executor(C.thread_pool, partial(self.insert, nodes, workspace_id, **kwargs))
124
+
125
+ async def async_delete(self, node_ids: str | List[str], workspace_id: str, **kwargs):
126
+ loop = asyncio.get_event_loop()
127
+ return await loop.run_in_executor(C.thread_pool, partial(self.delete, node_ids, workspace_id, **kwargs))
128
+
129
+ async def async_close(self):
130
+ loop = asyncio.get_event_loop()
131
+ return await loop.run_in_executor(C.thread_pool, self.close)
@@ -1,4 +1,7 @@
1
- from typing import List, Iterable
1
+ import asyncio
2
+ import os
3
+ from functools import partial
4
+ from typing import List, Iterable, Dict, Any, Optional
2
5
 
3
6
  import chromadb
4
7
  from chromadb import Collection
@@ -6,6 +9,9 @@ from chromadb.config import Settings
6
9
  from loguru import logger
7
10
  from pydantic import Field, PrivateAttr, model_validator
8
11
 
12
+ # Disable ChromaDB telemetry to avoid PostHog warnings
13
+ os.environ.setdefault("ANONYMIZED_TELEMETRY", "false")
14
+
9
15
  from flowllm.context.service_context import C
10
16
  from flowllm.schema.vector_node import VectorNode
11
17
  from flowllm.storage.vector_store.local_vector_store import LocalVectorStore
@@ -19,7 +25,12 @@ class ChromaVectorStore(LocalVectorStore):
19
25
 
20
26
  @model_validator(mode="after")
21
27
  def init_client(self):
22
- self._client = chromadb.Client(Settings(persist_directory=self.store_dir))
28
+ # Disable telemetry to avoid PostHog warnings
29
+ settings = Settings(
30
+ persist_directory=self.store_dir,
31
+ anonymized_telemetry=False
32
+ )
33
+ self._client = chromadb.Client(settings)
23
34
  return self
24
35
 
25
36
  def _get_collection(self, workspace_id: str) -> Collection:
@@ -38,7 +49,8 @@ class ChromaVectorStore(LocalVectorStore):
38
49
  def create_workspace(self, workspace_id: str, **kwargs):
39
50
  self.collections[workspace_id] = self._client.get_or_create_collection(workspace_id)
40
51
 
41
- def _iter_workspace_nodes(self, workspace_id: str, **kwargs) -> Iterable[VectorNode]:
52
+ def iter_workspace_nodes(self, workspace_id: str, callback_fn=None, **kwargs) -> Iterable[VectorNode]:
53
+ """Iterate over all nodes in a workspace."""
42
54
  collection: Collection = self._get_collection(workspace_id)
43
55
  results = collection.get()
44
56
  for i in range(len(results["ids"])):
@@ -46,23 +58,69 @@ class ChromaVectorStore(LocalVectorStore):
46
58
  unique_id=results["ids"][i],
47
59
  content=results["documents"][i],
48
60
  metadata=results["metadatas"][i])
49
- yield node
50
-
51
- def search(self, query: str, workspace_id: str, top_k: int = 1, **kwargs) -> List[VectorNode]:
61
+ if callback_fn:
62
+ yield callback_fn(node)
63
+ else:
64
+ yield node
65
+
66
+ @staticmethod
67
+ def _build_chroma_filters(filter_dict: Optional[Dict[str, Any]] = None) -> Optional[Dict]:
68
+ """Build ChromaDB where clause from filter_dict"""
69
+ if not filter_dict:
70
+ return None
71
+
72
+ where_conditions = {}
73
+ for key, filter_value in filter_dict.items():
74
+ if isinstance(filter_value, dict):
75
+ # Range filter: {"gte": 1, "lte": 10}
76
+ range_conditions = {}
77
+ if "gte" in filter_value:
78
+ range_conditions["$gte"] = filter_value["gte"]
79
+ if "lte" in filter_value:
80
+ range_conditions["$lte"] = filter_value["lte"]
81
+ if "gt" in filter_value:
82
+ range_conditions["$gt"] = filter_value["gt"]
83
+ if "lt" in filter_value:
84
+ range_conditions["$lt"] = filter_value["lt"]
85
+ if range_conditions:
86
+ where_conditions[key] = range_conditions
87
+ else:
88
+ # Term filter: direct value comparison
89
+ where_conditions[key] = filter_value
90
+
91
+ return where_conditions if where_conditions else None
92
+
93
+ def search(self, query: str, workspace_id: str, top_k: int = 1, filter_dict: Optional[Dict[str, Any]] = None,
94
+ **kwargs) -> List[VectorNode]:
52
95
  if not self.exist_workspace(workspace_id=workspace_id):
53
96
  logger.warning(f"workspace_id={workspace_id} is not exists!")
54
97
  return []
55
98
 
56
99
  collection: Collection = self._get_collection(workspace_id)
57
100
  query_vector = self.embedding_model.get_embeddings(query)
58
- results = collection.query(query_embeddings=[query_vector], n_results=top_k)
101
+
102
+ # Build where clause from filter_dict
103
+ where_clause = self._build_chroma_filters(filter_dict)
104
+
105
+ results = collection.query(
106
+ query_embeddings=[query_vector],
107
+ n_results=top_k,
108
+ where=where_clause
109
+ )
110
+
59
111
  nodes = []
60
112
  for i in range(len(results["ids"][0])):
61
113
  node = VectorNode(workspace_id=workspace_id,
62
114
  unique_id=results["ids"][0][i],
63
115
  content=results["documents"][0][i],
64
116
  metadata=results["metadatas"][0][i])
117
+ # ChromaDB returns distances, convert to similarity score
118
+ if results.get("distances") and len(results["distances"][0]) > i:
119
+ distance = results["distances"][0][i]
120
+ # Convert distance to similarity (assuming cosine distance)
121
+ node.metadata["score"] = 1.0 - distance
65
122
  nodes.append(node)
123
+
66
124
  return nodes
67
125
 
68
126
  def insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, **kwargs):
@@ -95,6 +153,85 @@ class ChromaVectorStore(LocalVectorStore):
95
153
  collection.delete(ids=node_ids)
96
154
 
97
155
 
156
+ async def async_search(self, query: str, workspace_id: str, top_k: int = 1,
157
+ filter_dict: Optional[Dict[str, Any]] = None, **kwargs) -> List[VectorNode]:
158
+ """Async version of search using async embedding and run_in_executor for ChromaDB operations"""
159
+ if not await self.async_exist_workspace(workspace_id=workspace_id):
160
+ logger.warning(f"workspace_id={workspace_id} is not exists!")
161
+ return []
162
+
163
+ # Use async embedding
164
+ query_vector = await self.embedding_model.get_embeddings_async(query)
165
+
166
+ # Build where clause from filter_dict
167
+ where_clause = self._build_chroma_filters(filter_dict)
168
+
169
+ # Execute ChromaDB query in thread pool
170
+ loop = asyncio.get_event_loop()
171
+ collection = await loop.run_in_executor(C.thread_pool, self._get_collection, workspace_id)
172
+ results = await loop.run_in_executor(
173
+ C.thread_pool,
174
+ partial(collection.query, query_embeddings=[query_vector], n_results=top_k, where=where_clause)
175
+ )
176
+
177
+ nodes = []
178
+ for i in range(len(results["ids"][0])):
179
+ node = VectorNode(workspace_id=workspace_id,
180
+ unique_id=results["ids"][0][i],
181
+ content=results["documents"][0][i],
182
+ metadata=results["metadatas"][0][i])
183
+ # ChromaDB returns distances, convert to similarity score
184
+ if results.get("distances") and len(results["distances"][0]) > i:
185
+ distance = results["distances"][0][i]
186
+ # Convert distance to similarity (assuming cosine distance)
187
+ node.metadata["score"] = 1.0 - distance
188
+ nodes.append(node)
189
+
190
+ return nodes
191
+
192
+ async def async_insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, **kwargs):
193
+ """Async version of insert using async embedding and run_in_executor for ChromaDB operations"""
194
+ if not await self.async_exist_workspace(workspace_id=workspace_id):
195
+ await self.async_create_workspace(workspace_id=workspace_id)
196
+
197
+ if isinstance(nodes, VectorNode):
198
+ nodes = [nodes]
199
+
200
+ embedded_nodes = [node for node in nodes if node.vector]
201
+ not_embedded_nodes = [node for node in nodes if not node.vector]
202
+
203
+ # Use async embedding
204
+ now_embedded_nodes = await self.embedding_model.get_node_embeddings_async(not_embedded_nodes)
205
+
206
+ all_nodes = embedded_nodes + now_embedded_nodes
207
+
208
+ # Execute ChromaDB operations in thread pool
209
+ loop = asyncio.get_event_loop()
210
+ collection = await loop.run_in_executor(C.thread_pool, self._get_collection, workspace_id)
211
+ await loop.run_in_executor(
212
+ C.thread_pool,
213
+ partial(collection.add,
214
+ ids=[n.unique_id for n in all_nodes],
215
+ embeddings=[n.vector for n in all_nodes],
216
+ documents=[n.content for n in all_nodes],
217
+ metadatas=[n.metadata for n in all_nodes])
218
+ )
219
+
220
+ async def async_delete(self, node_ids: str | List[str], workspace_id: str, **kwargs):
221
+ """Async version of delete using run_in_executor for ChromaDB operations"""
222
+ if not await self.async_exist_workspace(workspace_id=workspace_id):
223
+ logger.warning(f"workspace_id={workspace_id} is not exists!")
224
+ return
225
+
226
+ if isinstance(node_ids, str):
227
+ node_ids = [node_ids]
228
+
229
+ # Execute ChromaDB operations in thread pool
230
+ loop = asyncio.get_event_loop()
231
+ collection = await loop.run_in_executor(C.thread_pool, self._get_collection, workspace_id)
232
+ await loop.run_in_executor(C.thread_pool, partial(collection.delete, ids=node_ids))
233
+
234
+
98
235
  def main():
99
236
  from flowllm.utils.common_utils import load_env
100
237
  from flowllm.embedding_model import OpenAICompatibleEmbeddingModel
@@ -160,6 +297,15 @@ def main():
160
297
  logger.info(r.model_dump(exclude={"vector"}))
161
298
  logger.info("=" * 20)
162
299
 
300
+ # Test filter_dict
301
+ logger.info("=" * 20 + " FILTER TEST " + "=" * 20)
302
+ filter_dict = {"node_type": "n1"}
303
+ results = chroma_store.search("What is AI?", top_k=5, workspace_id=workspace_id, filter_dict=filter_dict)
304
+ logger.info(f"Filtered results (node_type=n1): {len(results)} results")
305
+ for r in results:
306
+ logger.info(r.model_dump(exclude={"vector"}))
307
+ logger.info("=" * 20)
308
+
163
309
  node2_update = VectorNode(
164
310
  unique_id="node2",
165
311
  workspace_id=workspace_id,
@@ -184,6 +330,102 @@ def main():
184
330
  chroma_store.delete_workspace(workspace_id=workspace_id)
185
331
 
186
332
 
333
+ async def async_main():
334
+ from flowllm.utils.common_utils import load_env
335
+ from flowllm.embedding_model import OpenAICompatibleEmbeddingModel
336
+
337
+ load_env()
338
+
339
+ embedding_model = OpenAICompatibleEmbeddingModel(dimensions=64, model_name="text-embedding-v4")
340
+ workspace_id = "chroma_async_test_index"
341
+
342
+ chroma_store = ChromaVectorStore(
343
+ embedding_model=embedding_model,
344
+ store_dir="./async_chroma_async_test_db"
345
+ )
346
+
347
+ # Clean up and create workspace
348
+ if await chroma_store.async_exist_workspace(workspace_id):
349
+ await chroma_store.async_delete_workspace(workspace_id)
350
+ await chroma_store.async_create_workspace(workspace_id)
351
+
352
+ sample_nodes = [
353
+ VectorNode(
354
+ unique_id="async_node1",
355
+ workspace_id=workspace_id,
356
+ content="Artificial intelligence is a technology that simulates human intelligence.",
357
+ metadata={
358
+ "node_type": "n1",
359
+ "category": "tech"
360
+ }
361
+ ),
362
+ VectorNode(
363
+ unique_id="async_node2",
364
+ workspace_id=workspace_id,
365
+ content="AI is the future of mankind.",
366
+ metadata={
367
+ "node_type": "n1",
368
+ "category": "tech"
369
+ }
370
+ ),
371
+ VectorNode(
372
+ unique_id="async_node3",
373
+ workspace_id=workspace_id,
374
+ content="I want to eat fish!",
375
+ metadata={
376
+ "node_type": "n2",
377
+ "category": "food"
378
+ }
379
+ ),
380
+ VectorNode(
381
+ unique_id="async_node4",
382
+ workspace_id=workspace_id,
383
+ content="The bigger the storm, the more expensive the fish.",
384
+ metadata={
385
+ "node_type": "n1",
386
+ "category": "food"
387
+ }
388
+ ),
389
+ ]
390
+
391
+ # Test async insert
392
+ await chroma_store.async_insert(sample_nodes, workspace_id=workspace_id)
393
+
394
+ logger.info("ASYNC TEST - " + "=" * 20)
395
+ # Test async search
396
+ results = await chroma_store.async_search("What is AI?", top_k=5, workspace_id=workspace_id)
397
+ for r in results:
398
+ logger.info(r.model_dump(exclude={"vector"}))
399
+ logger.info("=" * 20)
400
+
401
+ # Test async update (delete + insert)
402
+ node2_update = VectorNode(
403
+ unique_id="async_node2",
404
+ workspace_id=workspace_id,
405
+ content="AI is the future of humanity and technology.",
406
+ metadata={
407
+ "node_type": "n1",
408
+ "category": "tech",
409
+ "updated": True
410
+ }
411
+ )
412
+ await chroma_store.async_delete(node2_update.unique_id, workspace_id=workspace_id)
413
+ await chroma_store.async_insert(node2_update, workspace_id=workspace_id)
414
+
415
+ logger.info("ASYNC Updated Result:")
416
+ results = await chroma_store.async_search("fish?", top_k=10, workspace_id=workspace_id)
417
+ for r in results:
418
+ logger.info(r.model_dump(exclude={"vector"}))
419
+ logger.info("=" * 20)
420
+
421
+ # Clean up
422
+ await chroma_store.async_dump_workspace(workspace_id=workspace_id)
423
+ await chroma_store.async_delete_workspace(workspace_id=workspace_id)
424
+
425
+
187
426
  if __name__ == "__main__":
188
427
  main()
189
- # launch with: python -m flowllm.storage.chroma_vector_store
428
+
429
+ # Run async test
430
+ logger.info("\n" + "=" * 50 + " ASYNC TESTS " + "=" * 50)
431
+ # asyncio.run(async_main())