flowllm 0.1.2__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. flowllm/__init__.py +8 -3
  2. flowllm/app.py +1 -1
  3. flowllm/config/base.yaml +75 -0
  4. flowllm/config/fin_supply.yaml +39 -0
  5. flowllm/config/pydantic_config_parser.py +16 -1
  6. flowllm/context/__init__.py +2 -0
  7. flowllm/context/base_context.py +10 -20
  8. flowllm/context/flow_context.py +45 -2
  9. flowllm/context/service_context.py +73 -12
  10. flowllm/embedding_model/openai_compatible_embedding_model.py +1 -2
  11. flowllm/enumeration/chunk_enum.py +1 -0
  12. flowllm/flow/__init__.py +9 -0
  13. flowllm/flow/base_flow.py +44 -11
  14. flowllm/flow/expression/__init__.py +1 -0
  15. flowllm/flow/{parser → expression}/expression_parser.py +5 -2
  16. flowllm/flow/expression/expression_tool_flow.py +25 -0
  17. flowllm/flow/gallery/__init__.py +1 -8
  18. flowllm/flow/gallery/mock_tool_flow.py +46 -33
  19. flowllm/flow/tool_op_flow.py +97 -0
  20. flowllm/llm/base_llm.py +0 -2
  21. flowllm/llm/litellm_llm.py +2 -1
  22. flowllm/op/__init__.py +3 -3
  23. flowllm/op/akshare/get_ak_a_code_op.py +1 -1
  24. flowllm/op/akshare/get_ak_a_info_op.py +1 -1
  25. flowllm/op/base_llm_op.py +3 -2
  26. flowllm/op/base_op.py +258 -25
  27. flowllm/op/base_tool_op.py +47 -0
  28. flowllm/op/gallery/__init__.py +0 -1
  29. flowllm/op/gallery/mock_op.py +13 -7
  30. flowllm/op/llm/__init__.py +3 -0
  31. flowllm/op/llm/react_llm_op.py +105 -0
  32. flowllm/op/{agent/react_prompt.yaml → llm/react_llm_prompt.yaml} +17 -10
  33. flowllm/op/llm/simple_llm_op.py +48 -0
  34. flowllm/op/llm/stream_llm_op.py +61 -0
  35. flowllm/op/mcp/__init__.py +2 -0
  36. flowllm/op/mcp/ant_op.py +42 -0
  37. flowllm/op/mcp/base_sse_mcp_op.py +28 -0
  38. flowllm/op/parallel_op.py +5 -1
  39. flowllm/op/search/__init__.py +1 -2
  40. flowllm/op/search/dashscope_search_op.py +73 -121
  41. flowllm/op/search/tavily_search_op.py +69 -80
  42. flowllm/op/sequential_op.py +4 -0
  43. flowllm/schema/flow_stream_chunk.py +11 -0
  44. flowllm/schema/message.py +2 -0
  45. flowllm/schema/service_config.py +8 -3
  46. flowllm/schema/tool_call.py +53 -4
  47. flowllm/service/__init__.py +0 -1
  48. flowllm/service/base_service.py +31 -14
  49. flowllm/service/http_service.py +46 -37
  50. flowllm/service/mcp_service.py +17 -23
  51. flowllm/storage/vector_store/__init__.py +1 -0
  52. flowllm/storage/vector_store/base_vector_store.py +99 -12
  53. flowllm/storage/vector_store/chroma_vector_store.py +250 -8
  54. flowllm/storage/vector_store/es_vector_store.py +291 -35
  55. flowllm/storage/vector_store/local_vector_store.py +206 -9
  56. flowllm/storage/vector_store/memory_vector_store.py +509 -0
  57. flowllm/utils/common_utils.py +54 -0
  58. flowllm/utils/logger_utils.py +28 -0
  59. flowllm/utils/miner_u_pdf_processor.py +726 -0
  60. {flowllm-0.1.2.dist-info → flowllm-0.1.5.dist-info}/METADATA +7 -6
  61. flowllm-0.1.5.dist-info/RECORD +98 -0
  62. flowllm/config/default.yaml +0 -77
  63. flowllm/config/empty.yaml +0 -37
  64. flowllm/flow/gallery/cmd_flow.py +0 -11
  65. flowllm/flow/gallery/code_tool_flow.py +0 -30
  66. flowllm/flow/gallery/dashscope_search_tool_flow.py +0 -34
  67. flowllm/flow/gallery/deepsearch_tool_flow.py +0 -39
  68. flowllm/flow/gallery/expression_tool_flow.py +0 -18
  69. flowllm/flow/gallery/tavily_search_tool_flow.py +0 -30
  70. flowllm/flow/gallery/terminate_tool_flow.py +0 -30
  71. flowllm/flow/parser/__init__.py +0 -0
  72. flowllm/op/agent/__init__.py +0 -0
  73. flowllm/op/agent/react_op.py +0 -83
  74. flowllm/op/base_ray_op.py +0 -313
  75. flowllm/op/code/__init__.py +0 -1
  76. flowllm/op/code/execute_code_op.py +0 -42
  77. flowllm/op/gallery/terminate_op.py +0 -29
  78. flowllm/op/search/dashscope_deep_research_op.py +0 -260
  79. flowllm/service/cmd_service.py +0 -15
  80. flowllm-0.1.2.dist-info/RECORD +0 -99
  81. {flowllm-0.1.2.dist-info → flowllm-0.1.5.dist-info}/WHEEL +0 -0
  82. {flowllm-0.1.2.dist-info → flowllm-0.1.5.dist-info}/entry_points.txt +0 -0
  83. {flowllm-0.1.2.dist-info → flowllm-0.1.5.dist-info}/licenses/LICENSE +0 -0
  84. {flowllm-0.1.2.dist-info → flowllm-0.1.5.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,10 @@
1
+ import asyncio
1
2
  import fcntl
2
3
  import json
3
4
  import math
5
+ from functools import partial
4
6
  from pathlib import Path
5
- from typing import List, Iterable
7
+ from typing import List, Iterable, Optional, Dict, Any
6
8
 
7
9
  from loguru import logger
8
10
  from pydantic import Field, model_validator
@@ -15,7 +17,7 @@ from flowllm.storage.vector_store.base_vector_store import BaseVectorStore
15
17
 
16
18
  @C.register_vector_store("local")
17
19
  class LocalVectorStore(BaseVectorStore):
18
- store_dir: str = Field(default="./file_vector_store")
20
+ store_dir: str = Field(default="./local_vector_store")
19
21
 
20
22
  @model_validator(mode="after")
21
23
  def init_client(self):
@@ -88,8 +90,9 @@ class LocalVectorStore(BaseVectorStore):
88
90
  def create_workspace(self, workspace_id: str, **kwargs):
89
91
  self._dump_to_path(nodes=[], workspace_id=workspace_id, path=self.store_path, **kwargs)
90
92
 
91
- def _iter_workspace_nodes(self, workspace_id: str, **kwargs) -> Iterable[VectorNode]:
92
- for i, node in enumerate(self._load_from_path(path=self.store_path, workspace_id=workspace_id, **kwargs)):
93
+ def iter_workspace_nodes(self, workspace_id: str, callback_fn=None, **kwargs):
94
+ for node in self._load_from_path(path=self.store_path, workspace_id=workspace_id, callback_fn=callback_fn,
95
+ **kwargs):
93
96
  yield node
94
97
 
95
98
  def dump_workspace(self, workspace_id: str, path: str | Path = "", callback_fn=None, **kwargs):
@@ -97,7 +100,8 @@ class LocalVectorStore(BaseVectorStore):
97
100
  logger.warning(f"workspace_id={workspace_id} is not exist!")
98
101
  return {}
99
102
 
100
- return self._dump_to_path(nodes=self._iter_workspace_nodes(workspace_id=workspace_id, **kwargs),
103
+ return self._dump_to_path(
104
+ nodes=self.iter_workspace_nodes(workspace_id=workspace_id, callback_fn=callback_fn, **kwargs),
101
105
  workspace_id=workspace_id,
102
106
  path=path,
103
107
  callback_fn=callback_fn,
@@ -129,7 +133,7 @@ class LocalVectorStore(BaseVectorStore):
129
133
 
130
134
  nodes = []
131
135
  node_size = 0
132
- for node in self._iter_workspace_nodes(workspace_id=src_workspace_id, **kwargs):
136
+ for node in self.iter_workspace_nodes(workspace_id=src_workspace_id, **kwargs):
133
137
  nodes.append(node)
134
138
  node_size += 1
135
139
  if len(nodes) >= self.batch_size:
@@ -140,6 +144,39 @@ class LocalVectorStore(BaseVectorStore):
140
144
  self.insert(nodes=nodes, workspace_id=dest_workspace_id, **kwargs)
141
145
  return {"size": node_size}
142
146
 
147
+ @staticmethod
148
+ def _matches_filters(node: VectorNode, filter_dict: dict = None) -> bool:
149
+ """Check if a node matches all filters in filter_dict"""
150
+ if not filter_dict:
151
+ return True
152
+
153
+ for key, filter_value in filter_dict.items():
154
+ # Navigate nested keys (e.g., "metadata.node_type")
155
+ value = node.metadata
156
+ for key_part in key.split('.'):
157
+ if isinstance(value, dict) and key_part in value:
158
+ value = value[key_part]
159
+ else:
160
+ return False # Key not found
161
+
162
+ # Handle different filter types
163
+ if isinstance(filter_value, dict):
164
+ # Range filter: {"gte": 1, "lte": 10}
165
+ if "gte" in filter_value and value < filter_value["gte"]:
166
+ return False
167
+ if "lte" in filter_value and value > filter_value["lte"]:
168
+ return False
169
+ if "gt" in filter_value and value <= filter_value["gt"]:
170
+ return False
171
+ if "lt" in filter_value and value >= filter_value["lt"]:
172
+ return False
173
+ else:
174
+ # Term filter: direct value comparison
175
+ if value != filter_value:
176
+ return False
177
+
178
+ return True
179
+
143
180
  @staticmethod
144
181
  def calculate_similarity(query_vector: List[float], node_vector: List[float]):
145
182
  assert query_vector, f"query_vector is empty!"
@@ -152,12 +189,15 @@ class LocalVectorStore(BaseVectorStore):
152
189
  norm_v2 = math.sqrt(sum(y ** 2 for y in node_vector))
153
190
  return dot_product / (norm_v1 * norm_v2)
154
191
 
155
- def search(self, query: str, workspace_id: str, top_k: int = 1, **kwargs) -> List[VectorNode]:
192
+ def search(self, query: str, workspace_id: str, top_k: int = 1, filter_dict: Optional[Dict[str, Any]] = None,
193
+ **kwargs) -> List[VectorNode]:
156
194
  query_vector = self.embedding_model.get_embeddings(query)
157
195
  nodes: List[VectorNode] = []
158
196
  for node in self._load_from_path(path=self.store_path, workspace_id=workspace_id, **kwargs):
159
- node.metadata["score"] = self.calculate_similarity(query_vector, node.vector)
160
- nodes.append(node)
197
+ # Apply filters
198
+ if self._matches_filters(node, filter_dict):
199
+ node.metadata["score"] = self.calculate_similarity(query_vector, node.vector)
200
+ nodes.append(node)
161
201
 
162
202
  nodes = sorted(nodes, key=lambda x: x.metadata["score"], reverse=True)
163
203
  return nodes[:top_k]
@@ -203,6 +243,66 @@ class LocalVectorStore(BaseVectorStore):
203
243
  self._dump_to_path(nodes=all_nodes, workspace_id=workspace_id, path=self.store_path, **kwargs)
204
244
  logger.info(f"delete workspace_id={workspace_id} before_size={before_size} after_size={after_size}")
205
245
 
246
+ # Override async methods for better performance with file I/O
247
+ async def async_search(self, query: str, workspace_id: str, top_k: int = 1,
248
+ filter_dict: Optional[Dict[str, Any]] = None, **kwargs) -> List[VectorNode]:
249
+ """Async version of search using embedding model async capabilities"""
250
+ query_vector = await self.embedding_model.get_embeddings_async(query)
251
+
252
+ # Load nodes asynchronously
253
+ loop = asyncio.get_event_loop()
254
+ nodes_iter = await loop.run_in_executor(
255
+ C.thread_pool,
256
+ partial(self._load_from_path, path=self.store_path, workspace_id=workspace_id, **kwargs)
257
+ )
258
+
259
+ nodes: List[VectorNode] = []
260
+ for node in nodes_iter:
261
+ # Apply filters
262
+ if self._matches_filters(node, filter_dict):
263
+ node.metadata["score"] = self.calculate_similarity(query_vector, node.vector)
264
+ nodes.append(node)
265
+
266
+ nodes = sorted(nodes, key=lambda x: x.metadata["score"], reverse=True)
267
+ return nodes[:top_k]
268
+
269
+ async def async_insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, **kwargs):
270
+ """Async version of insert using embedding model async capabilities"""
271
+ if isinstance(nodes, VectorNode):
272
+ nodes = [nodes]
273
+
274
+ # Use async embedding
275
+ nodes = await self.embedding_model.get_node_embeddings_async(nodes)
276
+
277
+ # Load existing nodes asynchronously
278
+ loop = asyncio.get_event_loop()
279
+ exist_nodes_iter = await loop.run_in_executor(
280
+ C.thread_pool,
281
+ partial(self._load_from_path, path=self.store_path, workspace_id=workspace_id)
282
+ )
283
+
284
+ all_node_dict = {}
285
+ exist_nodes: List[VectorNode] = list(exist_nodes_iter)
286
+ for node in exist_nodes:
287
+ all_node_dict[node.unique_id] = node
288
+
289
+ update_cnt = 0
290
+ for node in nodes:
291
+ if node.unique_id in all_node_dict:
292
+ update_cnt += 1
293
+ all_node_dict[node.unique_id] = node
294
+
295
+ # Dump to path asynchronously
296
+ await loop.run_in_executor(
297
+ C.thread_pool,
298
+ partial(self._dump_to_path, nodes=list(all_node_dict.values()),
299
+ workspace_id=workspace_id, path=self.store_path, **kwargs)
300
+ )
301
+
302
+ logger.info(f"update workspace_id={workspace_id} nodes.size={len(nodes)} all.size={len(all_node_dict)} "
303
+ f"update_cnt={update_cnt}")
304
+
305
+
206
306
  def main():
207
307
  from flowllm.utils.common_utils import load_env
208
308
  from flowllm.embedding_model import OpenAICompatibleEmbeddingModel
@@ -253,10 +353,107 @@ def main():
253
353
  for r in results:
254
354
  logger.info(r.model_dump(exclude={"vector"}))
255
355
  logger.info("=" * 20)
356
+
357
+ # Test filter_dict
358
+ logger.info("=" * 20 + " FILTER TEST " + "=" * 20)
359
+ filter_dict = {"node_type": "n1"}
360
+ results = client.search("What is AI?", workspace_id=workspace_id, top_k=5, filter_dict=filter_dict)
361
+ logger.info(f"Filtered results (node_type=n1): {len(results)} results")
362
+ for r in results:
363
+ logger.info(r.model_dump(exclude={"vector"}))
364
+ logger.info("=" * 20)
256
365
  client.dump_workspace(workspace_id)
257
366
 
258
367
  client.delete_workspace(workspace_id)
259
368
 
260
369
 
370
+ async def async_main():
371
+ from flowllm.utils.common_utils import load_env
372
+ from flowllm.embedding_model import OpenAICompatibleEmbeddingModel
373
+
374
+ load_env()
375
+
376
+ embedding_model = OpenAICompatibleEmbeddingModel(dimensions=64, model_name="text-embedding-v4")
377
+ workspace_id = "async_rag_nodes_index"
378
+ client = LocalVectorStore(embedding_model=embedding_model, store_dir="./async_file_vector_store")
379
+
380
+ # Clean up and create workspace
381
+ if await client.async_exist_workspace(workspace_id):
382
+ await client.async_delete_workspace(workspace_id)
383
+ await client.async_create_workspace(workspace_id)
384
+
385
+ sample_nodes = [
386
+ VectorNode(
387
+ unique_id="async_local_node1",
388
+ workspace_id=workspace_id,
389
+ content="Artificial intelligence is a technology that simulates human intelligence.",
390
+ metadata={
391
+ "node_type": "n1",
392
+ }
393
+ ),
394
+ VectorNode(
395
+ unique_id="async_local_node2",
396
+ workspace_id=workspace_id,
397
+ content="AI is the future of mankind.",
398
+ metadata={
399
+ "node_type": "n1",
400
+ }
401
+ ),
402
+ VectorNode(
403
+ unique_id="async_local_node3",
404
+ workspace_id=workspace_id,
405
+ content="I want to eat fish!",
406
+ metadata={
407
+ "node_type": "n2",
408
+ }
409
+ ),
410
+ VectorNode(
411
+ unique_id="async_local_node4",
412
+ workspace_id=workspace_id,
413
+ content="The bigger the storm, the more expensive the fish.",
414
+ metadata={
415
+ "node_type": "n1",
416
+ }
417
+ ),
418
+ ]
419
+
420
+ # Test async insert
421
+ await client.async_insert(sample_nodes, workspace_id)
422
+
423
+ logger.info("ASYNC TEST - " + "=" * 20)
424
+ # Test async search
425
+ results = await client.async_search("What is AI?", workspace_id=workspace_id, top_k=5)
426
+ for r in results:
427
+ logger.info(r.model_dump(exclude={"vector"}))
428
+ logger.info("=" * 20)
429
+
430
+ # Test async update (delete + insert)
431
+ node2_update = VectorNode(
432
+ unique_id="async_local_node2",
433
+ workspace_id=workspace_id,
434
+ content="AI is the future of humanity and technology.",
435
+ metadata={
436
+ "node_type": "n1",
437
+ "updated": True
438
+ }
439
+ )
440
+ await client.async_delete(node2_update.unique_id, workspace_id=workspace_id)
441
+ await client.async_insert(node2_update, workspace_id=workspace_id)
442
+
443
+ logger.info("ASYNC Updated Result:")
444
+ results = await client.async_search("fish?", workspace_id=workspace_id, top_k=10)
445
+ for r in results:
446
+ logger.info(r.model_dump(exclude={"vector"}))
447
+ logger.info("=" * 20)
448
+
449
+ # Clean up
450
+ await client.async_dump_workspace(workspace_id)
451
+ await client.async_delete_workspace(workspace_id)
452
+
453
+
261
454
  if __name__ == "__main__":
262
455
  main()
456
+
457
+ # Run async test
458
+ logger.info("\n" + "=" * 50 + " ASYNC TESTS " + "=" * 50)
459
+ asyncio.run(async_main())