flowllm 0.1.2__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. flowllm/__init__.py +8 -3
  2. flowllm/app.py +1 -1
  3. flowllm/config/base.yaml +75 -0
  4. flowllm/config/fin_supply.yaml +39 -0
  5. flowllm/config/pydantic_config_parser.py +16 -1
  6. flowllm/context/__init__.py +2 -0
  7. flowllm/context/base_context.py +10 -20
  8. flowllm/context/flow_context.py +45 -2
  9. flowllm/context/service_context.py +73 -12
  10. flowllm/embedding_model/openai_compatible_embedding_model.py +1 -2
  11. flowllm/enumeration/chunk_enum.py +1 -0
  12. flowllm/flow/__init__.py +9 -0
  13. flowllm/flow/base_flow.py +44 -11
  14. flowllm/flow/expression/__init__.py +1 -0
  15. flowllm/flow/{parser → expression}/expression_parser.py +5 -2
  16. flowllm/flow/expression/expression_tool_flow.py +25 -0
  17. flowllm/flow/gallery/__init__.py +1 -8
  18. flowllm/flow/gallery/mock_tool_flow.py +46 -33
  19. flowllm/flow/tool_op_flow.py +97 -0
  20. flowllm/llm/base_llm.py +0 -2
  21. flowllm/llm/litellm_llm.py +2 -1
  22. flowllm/op/__init__.py +3 -3
  23. flowllm/op/akshare/get_ak_a_code_op.py +1 -1
  24. flowllm/op/akshare/get_ak_a_info_op.py +1 -1
  25. flowllm/op/base_llm_op.py +3 -2
  26. flowllm/op/base_op.py +258 -25
  27. flowllm/op/base_tool_op.py +47 -0
  28. flowllm/op/gallery/__init__.py +0 -1
  29. flowllm/op/gallery/mock_op.py +13 -7
  30. flowllm/op/llm/__init__.py +3 -0
  31. flowllm/op/llm/react_llm_op.py +105 -0
  32. flowllm/op/{agent/react_prompt.yaml → llm/react_llm_prompt.yaml} +17 -10
  33. flowllm/op/llm/simple_llm_op.py +48 -0
  34. flowllm/op/llm/stream_llm_op.py +61 -0
  35. flowllm/op/mcp/__init__.py +2 -0
  36. flowllm/op/mcp/ant_op.py +42 -0
  37. flowllm/op/mcp/base_sse_mcp_op.py +28 -0
  38. flowllm/op/parallel_op.py +5 -1
  39. flowllm/op/search/__init__.py +1 -2
  40. flowllm/op/search/dashscope_search_op.py +73 -121
  41. flowllm/op/search/tavily_search_op.py +69 -80
  42. flowllm/op/sequential_op.py +4 -0
  43. flowllm/schema/flow_stream_chunk.py +11 -0
  44. flowllm/schema/message.py +2 -0
  45. flowllm/schema/service_config.py +8 -3
  46. flowllm/schema/tool_call.py +53 -4
  47. flowllm/service/__init__.py +0 -1
  48. flowllm/service/base_service.py +31 -14
  49. flowllm/service/http_service.py +46 -37
  50. flowllm/service/mcp_service.py +17 -23
  51. flowllm/storage/vector_store/__init__.py +1 -0
  52. flowllm/storage/vector_store/base_vector_store.py +99 -12
  53. flowllm/storage/vector_store/chroma_vector_store.py +250 -8
  54. flowllm/storage/vector_store/es_vector_store.py +291 -35
  55. flowllm/storage/vector_store/local_vector_store.py +206 -9
  56. flowllm/storage/vector_store/memory_vector_store.py +509 -0
  57. flowllm/utils/common_utils.py +54 -0
  58. flowllm/utils/logger_utils.py +28 -0
  59. flowllm/utils/miner_u_pdf_processor.py +726 -0
  60. {flowllm-0.1.2.dist-info → flowllm-0.1.5.dist-info}/METADATA +7 -6
  61. flowllm-0.1.5.dist-info/RECORD +98 -0
  62. flowllm/config/default.yaml +0 -77
  63. flowllm/config/empty.yaml +0 -37
  64. flowllm/flow/gallery/cmd_flow.py +0 -11
  65. flowllm/flow/gallery/code_tool_flow.py +0 -30
  66. flowllm/flow/gallery/dashscope_search_tool_flow.py +0 -34
  67. flowllm/flow/gallery/deepsearch_tool_flow.py +0 -39
  68. flowllm/flow/gallery/expression_tool_flow.py +0 -18
  69. flowllm/flow/gallery/tavily_search_tool_flow.py +0 -30
  70. flowllm/flow/gallery/terminate_tool_flow.py +0 -30
  71. flowllm/flow/parser/__init__.py +0 -0
  72. flowllm/op/agent/__init__.py +0 -0
  73. flowllm/op/agent/react_op.py +0 -83
  74. flowllm/op/base_ray_op.py +0 -313
  75. flowllm/op/code/__init__.py +0 -1
  76. flowllm/op/code/execute_code_op.py +0 -42
  77. flowllm/op/gallery/terminate_op.py +0 -29
  78. flowllm/op/search/dashscope_deep_research_op.py +0 -260
  79. flowllm/service/cmd_service.py +0 -15
  80. flowllm-0.1.2.dist-info/RECORD +0 -99
  81. {flowllm-0.1.2.dist-info → flowllm-0.1.5.dist-info}/WHEEL +0 -0
  82. {flowllm-0.1.2.dist-info → flowllm-0.1.5.dist-info}/entry_points.txt +0 -0
  83. {flowllm-0.1.2.dist-info → flowllm-0.1.5.dist-info}/licenses/LICENSE +0 -0
  84. {flowllm-0.1.2.dist-info → flowllm-0.1.5.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,9 @@
1
+ import asyncio
1
2
  import os
2
- from typing import List, Tuple, Iterable
3
+ from typing import List, Tuple, Iterable, Dict, Any, Optional
3
4
 
4
- from elasticsearch import Elasticsearch
5
- from elasticsearch.helpers import bulk
5
+ from elasticsearch import Elasticsearch, AsyncElasticsearch
6
+ from elasticsearch.helpers import bulk, async_bulk
6
7
  from loguru import logger
7
8
  from pydantic import Field, PrivateAttr, model_validator
8
9
 
@@ -15,14 +16,15 @@ from flowllm.storage.vector_store.local_vector_store import LocalVectorStore
15
16
  class EsVectorStore(LocalVectorStore):
16
17
  hosts: str | List[str] = Field(default_factory=lambda: os.getenv("FLOW_ES_HOSTS", "http://localhost:9200"))
17
18
  basic_auth: str | Tuple[str, str] | None = Field(default=None)
18
- retrieve_filters: List[dict] = []
19
19
  _client: Elasticsearch = PrivateAttr()
20
+ _async_client: AsyncElasticsearch = PrivateAttr()
20
21
 
21
22
  @model_validator(mode="after")
22
23
  def init_client(self):
23
24
  if isinstance(self.hosts, str):
24
25
  self.hosts = [self.hosts]
25
26
  self._client = Elasticsearch(hosts=self.hosts, basic_auth=self.basic_auth)
27
+ self._async_client = AsyncElasticsearch(hosts=self.hosts, basic_auth=self.basic_auth)
26
28
  logger.info(f"Elasticsearch client initialized with hosts: {self.hosts}")
27
29
  return self
28
30
 
@@ -48,10 +50,16 @@ class EsVectorStore(LocalVectorStore):
48
50
  }
49
51
  return self._client.indices.create(index=workspace_id, body=body)
50
52
 
51
- def _iter_workspace_nodes(self, workspace_id: str, max_size: int = 10000, **kwargs) -> Iterable[VectorNode]:
53
+ def iter_workspace_nodes(self, workspace_id: str, callback_fn=None, max_size: int = 10000, **kwargs) -> Iterable[
54
+ VectorNode]:
55
+ """Iterate over all nodes in a workspace."""
52
56
  response = self._client.search(index=workspace_id, body={"query": {"match_all": {}}, "size": max_size})
53
57
  for doc in response['hits']['hits']:
54
- yield self.doc2node(doc, workspace_id)
58
+ node = self.doc2node(doc, workspace_id)
59
+ if callback_fn:
60
+ yield callback_fn(node)
61
+ else:
62
+ yield node
55
63
 
56
64
  def refresh(self, workspace_id: str):
57
65
  self._client.indices.refresh(index=workspace_id)
@@ -62,38 +70,54 @@ class EsVectorStore(LocalVectorStore):
62
70
  node.workspace_id = workspace_id
63
71
  node.unique_id = doc["_id"]
64
72
  if "_score" in doc:
65
- node.metadata["_score"] = doc["_score"] - 1
73
+ node.metadata["score"] = doc["_score"] - 1
66
74
  return node
67
75
 
68
- def add_term_filter(self, key: str, value):
69
- if key:
70
- self.retrieve_filters.append({"term": {key: value}})
71
- return self
72
-
73
- def add_range_filter(self, key: str, gte=None, lte=None):
74
- if key:
75
- if gte is not None and lte is not None:
76
- self.retrieve_filters.append({"range": {key: {"gte": gte, "lte": lte}}})
77
- elif gte is not None:
78
- self.retrieve_filters.append({"range": {key: {"gte": gte}}})
79
- elif lte is not None:
80
- self.retrieve_filters.append({"range": {key: {"lte": lte}}})
81
- return self
82
-
83
- def clear_filter(self):
84
- self.retrieve_filters.clear()
85
- return self
76
+ @staticmethod
77
+ def _build_es_filters(filter_dict: Optional[Dict[str, Any]] = None) -> List[Dict]:
78
+ """Build Elasticsearch filter clauses from filter_dict"""
79
+ if not filter_dict:
80
+ return []
86
81
 
87
- def search(self, query: str, workspace_id: str, top_k: int = 1, **kwargs) -> List[VectorNode]:
82
+ filters = []
83
+ for key, filter_value in filter_dict.items():
84
+ # Handle nested keys by prefixing with metadata.
85
+ es_key = f"metadata.{key}" if not key.startswith("metadata.") else key
86
+
87
+ if isinstance(filter_value, dict):
88
+ # Range filter: {"gte": 1, "lte": 10}
89
+ range_conditions = {}
90
+ if "gte" in filter_value:
91
+ range_conditions["gte"] = filter_value["gte"]
92
+ if "lte" in filter_value:
93
+ range_conditions["lte"] = filter_value["lte"]
94
+ if "gt" in filter_value:
95
+ range_conditions["gt"] = filter_value["gt"]
96
+ if "lt" in filter_value:
97
+ range_conditions["lt"] = filter_value["lt"]
98
+ if range_conditions:
99
+ filters.append({"range": {es_key: range_conditions}})
100
+ else:
101
+ # Term filter: direct value comparison
102
+ filters.append({"term": {es_key: filter_value}})
103
+
104
+ return filters
105
+
106
+ def search(self, query: str, workspace_id: str, top_k: int = 1, filter_dict: Optional[Dict[str, Any]] = None,
107
+ **kwargs) -> List[VectorNode]:
88
108
  if not self.exist_workspace(workspace_id=workspace_id):
89
109
  logger.warning(f"workspace_id={workspace_id} is not exists!")
90
110
  return []
91
111
 
92
112
  query_vector = self.embedding_model.get_embeddings(query)
113
+
114
+ # Build filters from filter_dict
115
+ es_filters = self._build_es_filters(filter_dict)
116
+
93
117
  body = {
94
118
  "query": {
95
119
  "script_score": {
96
- "query": {"bool": {"must": self.retrieve_filters}},
120
+ "query": {"bool": {"must": es_filters}} if es_filters else {"match_all": {}},
97
121
  "script": {
98
122
  "source": "cosineSimilarity(params.query_vector, 'vector') + 1.0",
99
123
  "params": {"query_vector": query_vector},
@@ -106,12 +130,13 @@ class EsVectorStore(LocalVectorStore):
106
130
 
107
131
  nodes: List[VectorNode] = []
108
132
  for doc in response['hits']['hits']:
109
- nodes.append(self.doc2node(doc, workspace_id))
133
+ node = self.doc2node(doc, workspace_id)
134
+ node.metadata["score"] = doc["_score"] - 1 # Adjust score since we added 1.0
135
+ nodes.append(node)
110
136
 
111
- self.retrieve_filters.clear()
112
137
  return nodes
113
138
 
114
- def insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, refresh: bool = False, **kwargs):
139
+ def insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, refresh: bool = True, **kwargs):
115
140
  if not self.exist_workspace(workspace_id=workspace_id):
116
141
  self.create_workspace(workspace_id=workspace_id)
117
142
 
@@ -140,7 +165,7 @@ class EsVectorStore(LocalVectorStore):
140
165
  if refresh:
141
166
  self.refresh(workspace_id=workspace_id)
142
167
 
143
- def delete(self, node_ids: str | List[str], workspace_id: str, refresh: bool = False, **kwargs):
168
+ def delete(self, node_ids: str | List[str], workspace_id: str, refresh: bool = True, **kwargs):
144
169
  if not self.exist_workspace(workspace_id=workspace_id):
145
170
  logger.warning(f"workspace_id={workspace_id} is not exists!")
146
171
  return
@@ -160,6 +185,134 @@ class EsVectorStore(LocalVectorStore):
160
185
  if refresh:
161
186
  self.refresh(workspace_id=workspace_id)
162
187
 
188
+
189
+ # Async methods using native Elasticsearch async APIs
190
+ async def async_exist_workspace(self, workspace_id: str, **kwargs) -> bool:
191
+ """Async version of exist_workspace using native ES async client"""
192
+ return await self._async_client.indices.exists(index=workspace_id)
193
+
194
+ async def async_delete_workspace(self, workspace_id: str, **kwargs):
195
+ """Async version of delete_workspace using native ES async client"""
196
+ return await self._async_client.indices.delete(index=workspace_id, **kwargs)
197
+
198
+ async def async_create_workspace(self, workspace_id: str, **kwargs):
199
+ """Async version of create_workspace using native ES async client"""
200
+ body = {
201
+ "mappings": {
202
+ "properties": {
203
+ "workspace_id": {"type": "keyword"},
204
+ "content": {"type": "text"},
205
+ "metadata": {"type": "object"},
206
+ "vector": {
207
+ "type": "dense_vector",
208
+ "dims": self.embedding_model.dimensions
209
+ }
210
+ }
211
+ }
212
+ }
213
+ return await self._async_client.indices.create(index=workspace_id, body=body)
214
+
215
+ async def async_refresh(self, workspace_id: str):
216
+ """Async version of refresh using native ES async client"""
217
+ await self._async_client.indices.refresh(index=workspace_id)
218
+
219
+ async def async_search(self, query: str, workspace_id: str, top_k: int = 1,
220
+ filter_dict: Optional[Dict[str, Any]] = None, **kwargs) -> List[VectorNode]:
221
+ """Async version of search using native ES async client and async embedding"""
222
+ if not await self.async_exist_workspace(workspace_id=workspace_id):
223
+ logger.warning(f"workspace_id={workspace_id} is not exists!")
224
+ return []
225
+
226
+ # Use async embedding
227
+ query_vector = await self.embedding_model.get_embeddings_async(query)
228
+
229
+ # Build filters from filter_dict
230
+ es_filters = self._build_es_filters(filter_dict)
231
+
232
+ body = {
233
+ "query": {
234
+ "script_score": {
235
+ "query": {"bool": {"must": es_filters}} if es_filters else {"match_all": {}},
236
+ "script": {
237
+ "source": "cosineSimilarity(params.query_vector, 'vector') + 1.0",
238
+ "params": {"query_vector": query_vector},
239
+ }
240
+ }
241
+ },
242
+ "size": top_k
243
+ }
244
+ response = await self._async_client.search(index=workspace_id, body=body, **kwargs)
245
+
246
+ nodes: List[VectorNode] = []
247
+ for doc in response['hits']['hits']:
248
+ node = self.doc2node(doc, workspace_id)
249
+ node.metadata["score"] = doc["_score"] - 1 # Adjust score since we added 1.0
250
+ nodes.append(node)
251
+
252
+ return nodes
253
+
254
+ async def async_insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, refresh: bool = True,
255
+ **kwargs):
256
+ """Async version of insert using native ES async client and async embedding"""
257
+ if not await self.async_exist_workspace(workspace_id=workspace_id):
258
+ await self.async_create_workspace(workspace_id=workspace_id)
259
+
260
+ if isinstance(nodes, VectorNode):
261
+ nodes = [nodes]
262
+
263
+ embedded_nodes = [node for node in nodes if node.vector]
264
+ not_embedded_nodes = [node for node in nodes if not node.vector]
265
+
266
+ # Use async embedding
267
+ now_embedded_nodes = await self.embedding_model.get_node_embeddings_async(not_embedded_nodes)
268
+
269
+ docs = [
270
+ {
271
+ "_op_type": "index",
272
+ "_index": workspace_id,
273
+ "_id": node.unique_id,
274
+ "_source": {
275
+ "workspace_id": workspace_id,
276
+ "content": node.content,
277
+ "metadata": node.metadata,
278
+ "vector": node.vector
279
+ }
280
+ } for node in embedded_nodes + now_embedded_nodes]
281
+
282
+ status, error = await async_bulk(self._async_client, docs, chunk_size=self.batch_size, **kwargs)
283
+ logger.info(f"async insert docs.size={len(docs)} status={status} error={error}")
284
+
285
+ if refresh:
286
+ await self.async_refresh(workspace_id=workspace_id)
287
+
288
+ async def async_delete(self, node_ids: str | List[str], workspace_id: str, refresh: bool = True, **kwargs):
289
+ """Async version of delete using native ES async client"""
290
+ if not await self.async_exist_workspace(workspace_id=workspace_id):
291
+ logger.warning(f"workspace_id={workspace_id} is not exists!")
292
+ return
293
+
294
+ if isinstance(node_ids, str):
295
+ node_ids = [node_ids]
296
+
297
+ actions = [
298
+ {
299
+ "_op_type": "delete",
300
+ "_index": workspace_id,
301
+ "_id": node_id
302
+ } for node_id in node_ids]
303
+
304
+ status, error = await async_bulk(self._async_client, actions, chunk_size=self.batch_size, **kwargs)
305
+ logger.info(f"async delete actions.size={len(actions)} status={status} error={error}")
306
+
307
+ if refresh:
308
+ await self.async_refresh(workspace_id=workspace_id)
309
+
310
+ def close(self):
311
+ self._client.close()
312
+
313
+ async def async_close(self):
314
+ await self._async_client.close()
315
+
163
316
  def main():
164
317
  from flowllm.utils.common_utils import load_env
165
318
  from flowllm.embedding_model import OpenAICompatibleEmbeddingModel
@@ -207,21 +360,124 @@ def main():
207
360
 
208
361
  es.insert(sample_nodes, workspace_id=workspace_id, refresh=True)
209
362
 
210
- logger.info("=" * 20)
211
- results = es.add_term_filter(key="metadata.node_type", value="n1") \
212
- .search("What is AI?", top_k=5, workspace_id=workspace_id)
363
+ logger.info("=" * 20 + " FILTER TEST " + "=" * 20)
364
+ filter_dict = {"node_type": "n1"}
365
+ results = es.search("What is AI?", top_k=5, workspace_id=workspace_id, filter_dict=filter_dict)
366
+ logger.info(f"Filtered results (node_type=n1): {len(results)} results")
213
367
  for r in results:
214
368
  logger.info(r.model_dump(exclude={"vector"}))
215
369
  logger.info("=" * 20)
216
370
 
217
- logger.info("=" * 20)
371
+ logger.info("=" * 20 + " UNFILTERED TEST " + "=" * 20)
218
372
  results = es.search("What is AI?", top_k=5, workspace_id=workspace_id)
373
+ logger.info(f"Unfiltered results: {len(results)} results")
219
374
  for r in results:
220
375
  logger.info(r.model_dump(exclude={"vector"}))
221
376
  logger.info("=" * 20)
222
377
  es.dump_workspace(workspace_id=workspace_id)
223
378
  es.delete_workspace(workspace_id=workspace_id)
224
379
 
380
+ es.close()
381
+
382
+ async def async_main():
383
+ from flowllm.utils.common_utils import load_env
384
+ from flowllm.embedding_model import OpenAICompatibleEmbeddingModel
385
+
386
+ load_env()
387
+
388
+ embedding_model = OpenAICompatibleEmbeddingModel(dimensions=64, model_name="text-embedding-v4")
389
+ workspace_id = "async_rag_nodes_index"
390
+ hosts = "http://11.160.132.46:8200"
391
+
392
+ # Use async context manager to ensure proper cleanup
393
+ es = EsVectorStore(hosts=hosts, embedding_model=embedding_model)
394
+ # Clean up and create workspace
395
+ if await es.async_exist_workspace(workspace_id=workspace_id):
396
+ await es.async_delete_workspace(workspace_id=workspace_id)
397
+ await es.async_create_workspace(workspace_id=workspace_id)
398
+
399
+ sample_nodes = [
400
+ VectorNode(
401
+ unique_id="async_es_node1",
402
+ workspace_id=workspace_id,
403
+ content="Artificial intelligence is a technology that simulates human intelligence.",
404
+ metadata={
405
+ "node_type": "n1",
406
+ }
407
+ ),
408
+ VectorNode(
409
+ unique_id="async_es_node2",
410
+ workspace_id=workspace_id,
411
+ content="AI is the future of mankind.",
412
+ metadata={
413
+ "node_type": "n1",
414
+ }
415
+ ),
416
+ VectorNode(
417
+ unique_id="async_es_node3",
418
+ workspace_id=workspace_id,
419
+ content="I want to eat fish!",
420
+ metadata={
421
+ "node_type": "n2",
422
+ }
423
+ ),
424
+ VectorNode(
425
+ unique_id="async_es_node4",
426
+ workspace_id=workspace_id,
427
+ content="The bigger the storm, the more expensive the fish.",
428
+ metadata={
429
+ "node_type": "n1",
430
+ }
431
+ ),
432
+ ]
433
+
434
+ # Test async insert
435
+ await es.async_insert(sample_nodes, workspace_id=workspace_id, refresh=True)
436
+
437
+ logger.info("ASYNC TEST - " + "=" * 20)
438
+ # Test async search with filter
439
+ filter_dict = {"node_type": "n1"}
440
+ results = await es.async_search("What is AI?", top_k=5, workspace_id=workspace_id, filter_dict=filter_dict)
441
+ for r in results:
442
+ logger.info(r.model_dump(exclude={"vector"}))
443
+ logger.info("=" * 20)
444
+
445
+ # Test async search without filter
446
+ logger.info("ASYNC TEST WITHOUT FILTER - " + "=" * 20)
447
+ results = await es.async_search("What is AI?", top_k=5, workspace_id=workspace_id)
448
+ for r in results:
449
+ logger.info(r.model_dump(exclude={"vector"}))
450
+ logger.info("=" * 20)
451
+
452
+ # Test async update (delete + insert)
453
+ node2_update = VectorNode(
454
+ unique_id="async_es_node2",
455
+ workspace_id=workspace_id,
456
+ content="AI is the future of humanity and technology.",
457
+ metadata={
458
+ "node_type": "n1",
459
+ "updated": True
460
+ }
461
+ )
462
+ await es.async_delete(node2_update.unique_id, workspace_id=workspace_id, refresh=True)
463
+ await es.async_insert(node2_update, workspace_id=workspace_id, refresh=True)
464
+
465
+ logger.info("ASYNC Updated Result:")
466
+ results = await es.async_search("fish?", workspace_id=workspace_id, top_k=10)
467
+ for r in results:
468
+ logger.info(r.model_dump(exclude={"vector"}))
469
+ logger.info("=" * 20)
470
+
471
+ # Clean up
472
+ await es.async_dump_workspace(workspace_id=workspace_id)
473
+ await es.async_delete_workspace(workspace_id=workspace_id)
474
+
475
+ await es.async_close()
476
+
225
477
 
226
478
  if __name__ == "__main__":
227
479
  main()
480
+
481
+ # Run async test
482
+ logger.info("\n" + "=" * 50 + " ASYNC TESTS " + "=" * 50)
483
+ asyncio.run(async_main())