PyPI - flowllm - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.5__py3-none-any.whl - Mend

flowllm 0.1.2py3-none-any.whl → 0.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

flowllm/__init__.py +8 -3
flowllm/app.py +1 -1
flowllm/config/base.yaml +75 -0
flowllm/config/fin_supply.yaml +39 -0
flowllm/config/pydantic_config_parser.py +16 -1
flowllm/context/__init__.py +2 -0
flowllm/context/base_context.py +10 -20
flowllm/context/flow_context.py +45 -2
flowllm/context/service_context.py +73 -12
flowllm/embedding_model/openai_compatible_embedding_model.py +1 -2
flowllm/enumeration/chunk_enum.py +1 -0
flowllm/flow/__init__.py +9 -0
flowllm/flow/base_flow.py +44 -11
flowllm/flow/expression/__init__.py +1 -0
flowllm/flow/{parser → expression}/expression_parser.py +5 -2
flowllm/flow/expression/expression_tool_flow.py +25 -0
flowllm/flow/gallery/__init__.py +1 -8
flowllm/flow/gallery/mock_tool_flow.py +46 -33
flowllm/flow/tool_op_flow.py +97 -0
flowllm/llm/base_llm.py +0 -2
flowllm/llm/litellm_llm.py +2 -1
flowllm/op/__init__.py +3 -3
flowllm/op/akshare/get_ak_a_code_op.py +1 -1
flowllm/op/akshare/get_ak_a_info_op.py +1 -1
flowllm/op/base_llm_op.py +3 -2
flowllm/op/base_op.py +258 -25
flowllm/op/base_tool_op.py +47 -0
flowllm/op/gallery/__init__.py +0 -1
flowllm/op/gallery/mock_op.py +13 -7
flowllm/op/llm/__init__.py +3 -0
flowllm/op/llm/react_llm_op.py +105 -0
flowllm/op/{agent/react_prompt.yaml → llm/react_llm_prompt.yaml} +17 -10
flowllm/op/llm/simple_llm_op.py +48 -0
flowllm/op/llm/stream_llm_op.py +61 -0
flowllm/op/mcp/__init__.py +2 -0
flowllm/op/mcp/ant_op.py +42 -0
flowllm/op/mcp/base_sse_mcp_op.py +28 -0
flowllm/op/parallel_op.py +5 -1
flowllm/op/search/__init__.py +1 -2
flowllm/op/search/dashscope_search_op.py +73 -121
flowllm/op/search/tavily_search_op.py +69 -80
flowllm/op/sequential_op.py +4 -0
flowllm/schema/flow_stream_chunk.py +11 -0
flowllm/schema/message.py +2 -0
flowllm/schema/service_config.py +8 -3
flowllm/schema/tool_call.py +53 -4
flowllm/service/__init__.py +0 -1
flowllm/service/base_service.py +31 -14
flowllm/service/http_service.py +46 -37
flowllm/service/mcp_service.py +17 -23
flowllm/storage/vector_store/__init__.py +1 -0
flowllm/storage/vector_store/base_vector_store.py +99 -12
flowllm/storage/vector_store/chroma_vector_store.py +250 -8
flowllm/storage/vector_store/es_vector_store.py +291 -35
flowllm/storage/vector_store/local_vector_store.py +206 -9
flowllm/storage/vector_store/memory_vector_store.py +509 -0
flowllm/utils/common_utils.py +54 -0
flowllm/utils/logger_utils.py +28 -0
flowllm/utils/miner_u_pdf_processor.py +726 -0
{flowllm-0.1.2.dist-info → flowllm-0.1.5.dist-info}/METADATA +7 -6
flowllm-0.1.5.dist-info/RECORD +98 -0
flowllm/config/default.yaml +0 -77
flowllm/config/empty.yaml +0 -37
flowllm/flow/gallery/cmd_flow.py +0 -11
flowllm/flow/gallery/code_tool_flow.py +0 -30
flowllm/flow/gallery/dashscope_search_tool_flow.py +0 -34
flowllm/flow/gallery/deepsearch_tool_flow.py +0 -39
flowllm/flow/gallery/expression_tool_flow.py +0 -18
flowllm/flow/gallery/tavily_search_tool_flow.py +0 -30
flowllm/flow/gallery/terminate_tool_flow.py +0 -30
flowllm/flow/parser/__init__.py +0 -0
flowllm/op/agent/__init__.py +0 -0
flowllm/op/agent/react_op.py +0 -83
flowllm/op/base_ray_op.py +0 -313
flowllm/op/code/__init__.py +0 -1
flowllm/op/code/execute_code_op.py +0 -42
flowllm/op/gallery/terminate_op.py +0 -29
flowllm/op/search/dashscope_deep_research_op.py +0 -260
flowllm/service/cmd_service.py +0 -15
flowllm-0.1.2.dist-info/RECORD +0 -99
{flowllm-0.1.2.dist-info → flowllm-0.1.5.dist-info}/WHEEL +0 -0
{flowllm-0.1.2.dist-info → flowllm-0.1.5.dist-info}/entry_points.txt +0 -0
{flowllm-0.1.2.dist-info → flowllm-0.1.5.dist-info}/licenses/LICENSE +0 -0
{flowllm-0.1.2.dist-info → flowllm-0.1.5.dist-info}/top_level.txt +0 -0

flowllm/storage/vector_store/local_vector_store.py CHANGED Viewed

@@ -1,8 +1,10 @@
+import asyncio
 import fcntl
 import json
 import math
+from functools import partial
 from pathlib import Path
-from typing import List, Iterable
+from typing import List, Iterable, Optional, Dict, Any
 from loguru import logger
 from pydantic import Field, model_validator
@@ -15,7 +17,7 @@ from flowllm.storage.vector_store.base_vector_store import BaseVectorStore
 @C.register_vector_store("local")
 class LocalVectorStore(BaseVectorStore):
-    store_dir: str = Field(default="./file_vector_store")
+    store_dir: str = Field(default="./local_vector_store")
     @model_validator(mode="after")
     def init_client(self):
@@ -88,8 +90,9 @@ class LocalVectorStore(BaseVectorStore):
     def create_workspace(self, workspace_id: str, **kwargs):
         self._dump_to_path(nodes=[], workspace_id=workspace_id, path=self.store_path, **kwargs)
-    def _iter_workspace_nodes(self, workspace_id: str, **kwargs) -> Iterable[VectorNode]:
-        for i, node in enumerate(self._load_from_path(path=self.store_path, workspace_id=workspace_id, **kwargs)):
+    def iter_workspace_nodes(self, workspace_id: str, callback_fn=None, **kwargs):
+        for node in self._load_from_path(path=self.store_path, workspace_id=workspace_id, callback_fn=callback_fn,
+                                         **kwargs):
             yield node
     def dump_workspace(self, workspace_id: str, path: str | Path = "", callback_fn=None, **kwargs):
@@ -97,7 +100,8 @@ class LocalVectorStore(BaseVectorStore):
             logger.warning(f"workspace_id={workspace_id} is not exist!")
             return {}
-        return self._dump_to_path(nodes=self._iter_workspace_nodes(workspace_id=workspace_id, **kwargs),
+        return self._dump_to_path(
+            nodes=self.iter_workspace_nodes(workspace_id=workspace_id, callback_fn=callback_fn, **kwargs),
                                   workspace_id=workspace_id,
                                   path=path,
                                   callback_fn=callback_fn,
@@ -129,7 +133,7 @@ class LocalVectorStore(BaseVectorStore):
         nodes = []
         node_size = 0
-        for node in self._iter_workspace_nodes(workspace_id=src_workspace_id, **kwargs):
+        for node in self.iter_workspace_nodes(workspace_id=src_workspace_id, **kwargs):
             nodes.append(node)
             node_size += 1
             if len(nodes) >= self.batch_size:
@@ -140,6 +144,39 @@ class LocalVectorStore(BaseVectorStore):
             self.insert(nodes=nodes, workspace_id=dest_workspace_id, **kwargs)
         return {"size": node_size}
+    @staticmethod
+    def _matches_filters(node: VectorNode, filter_dict: dict = None) -> bool:
+        """Check if a node matches all filters in filter_dict"""
+        if not filter_dict:
+            return True
+        for key, filter_value in filter_dict.items():
+            # Navigate nested keys (e.g., "metadata.node_type")
+            value = node.metadata
+            for key_part in key.split('.'):
+                if isinstance(value, dict) and key_part in value:
+                    value = value[key_part]
+                else:
+                    return False  # Key not found
+            # Handle different filter types
+            if isinstance(filter_value, dict):
+                # Range filter: {"gte": 1, "lte": 10}
+                if "gte" in filter_value and value < filter_value["gte"]:
+                    return False
+                if "lte" in filter_value and value > filter_value["lte"]:
+                    return False
+                if "gt" in filter_value and value <= filter_value["gt"]:
+                    return False
+                if "lt" in filter_value and value >= filter_value["lt"]:
+                    return False
+            else:
+                # Term filter: direct value comparison
+                if value != filter_value:
+                    return False
+        return True
     @staticmethod
     def calculate_similarity(query_vector: List[float], node_vector: List[float]):
         assert query_vector, f"query_vector is empty!"
@@ -152,12 +189,15 @@ class LocalVectorStore(BaseVectorStore):
         norm_v2 = math.sqrt(sum(y ** 2 for y in node_vector))
         return dot_product / (norm_v1 * norm_v2)
-    def search(self, query: str, workspace_id: str, top_k: int = 1, **kwargs) -> List[VectorNode]:
+    def search(self, query: str, workspace_id: str, top_k: int = 1, filter_dict: Optional[Dict[str, Any]] = None,
+               **kwargs) -> List[VectorNode]:
         query_vector = self.embedding_model.get_embeddings(query)
         nodes: List[VectorNode] = []
         for node in self._load_from_path(path=self.store_path, workspace_id=workspace_id, **kwargs):
-            node.metadata["score"] = self.calculate_similarity(query_vector, node.vector)
-            nodes.append(node)
+            # Apply filters
+            if self._matches_filters(node, filter_dict):
+                node.metadata["score"] = self.calculate_similarity(query_vector, node.vector)
+                nodes.append(node)
         nodes = sorted(nodes, key=lambda x: x.metadata["score"], reverse=True)
         return nodes[:top_k]
@@ -203,6 +243,66 @@ class LocalVectorStore(BaseVectorStore):
         self._dump_to_path(nodes=all_nodes, workspace_id=workspace_id, path=self.store_path, **kwargs)
         logger.info(f"delete workspace_id={workspace_id} before_size={before_size} after_size={after_size}")
+    # Override async methods for better performance with file I/O
+    async def async_search(self, query: str, workspace_id: str, top_k: int = 1,
+                           filter_dict: Optional[Dict[str, Any]] = None, **kwargs) -> List[VectorNode]:
+        """Async version of search using embedding model async capabilities"""
+        query_vector = await self.embedding_model.get_embeddings_async(query)
+        # Load nodes asynchronously
+        loop = asyncio.get_event_loop()
+        nodes_iter = await loop.run_in_executor(
+            C.thread_pool,
+            partial(self._load_from_path, path=self.store_path, workspace_id=workspace_id, **kwargs)
+        )
+        nodes: List[VectorNode] = []
+        for node in nodes_iter:
+            # Apply filters
+            if self._matches_filters(node, filter_dict):
+                node.metadata["score"] = self.calculate_similarity(query_vector, node.vector)
+                nodes.append(node)
+        nodes = sorted(nodes, key=lambda x: x.metadata["score"], reverse=True)
+        return nodes[:top_k]
+    async def async_insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, **kwargs):
+        """Async version of insert using embedding model async capabilities"""
+        if isinstance(nodes, VectorNode):
+            nodes = [nodes]
+        # Use async embedding
+        nodes = await self.embedding_model.get_node_embeddings_async(nodes)
+        # Load existing nodes asynchronously
+        loop = asyncio.get_event_loop()
+        exist_nodes_iter = await loop.run_in_executor(
+            C.thread_pool,
+            partial(self._load_from_path, path=self.store_path, workspace_id=workspace_id)
+        )
+        all_node_dict = {}
+        exist_nodes: List[VectorNode] = list(exist_nodes_iter)
+        for node in exist_nodes:
+            all_node_dict[node.unique_id] = node
+        update_cnt = 0
+        for node in nodes:
+            if node.unique_id in all_node_dict:
+                update_cnt += 1
+            all_node_dict[node.unique_id] = node
+        # Dump to path asynchronously
+        await loop.run_in_executor(
+            C.thread_pool,
+            partial(self._dump_to_path, nodes=list(all_node_dict.values()),
+                    workspace_id=workspace_id, path=self.store_path, **kwargs)
+        )
+        logger.info(f"update workspace_id={workspace_id} nodes.size={len(nodes)} all.size={len(all_node_dict)} "
+                    f"update_cnt={update_cnt}")
 def main():
     from flowllm.utils.common_utils import load_env
     from flowllm.embedding_model import OpenAICompatibleEmbeddingModel
@@ -253,10 +353,107 @@ def main():
     for r in results:
         logger.info(r.model_dump(exclude={"vector"}))
     logger.info("=" * 20)
+    # Test filter_dict
+    logger.info("=" * 20 + " FILTER TEST " + "=" * 20)
+    filter_dict = {"node_type": "n1"}
+    results = client.search("What is AI?", workspace_id=workspace_id, top_k=5, filter_dict=filter_dict)
+    logger.info(f"Filtered results (node_type=n1): {len(results)} results")
+    for r in results:
+        logger.info(r.model_dump(exclude={"vector"}))
+    logger.info("=" * 20)
     client.dump_workspace(workspace_id)
     client.delete_workspace(workspace_id)
+async def async_main():
+    from flowllm.utils.common_utils import load_env
+    from flowllm.embedding_model import OpenAICompatibleEmbeddingModel
+    load_env()
+    embedding_model = OpenAICompatibleEmbeddingModel(dimensions=64, model_name="text-embedding-v4")
+    workspace_id = "async_rag_nodes_index"
+    client = LocalVectorStore(embedding_model=embedding_model, store_dir="./async_file_vector_store")
+    # Clean up and create workspace
+    if await client.async_exist_workspace(workspace_id):
+        await client.async_delete_workspace(workspace_id)
+    await client.async_create_workspace(workspace_id)
+    sample_nodes = [
+        VectorNode(
+            unique_id="async_local_node1",
+            workspace_id=workspace_id,
+            content="Artificial intelligence is a technology that simulates human intelligence.",
+            metadata={
+                "node_type": "n1",
+            }
+        ),
+        VectorNode(
+            unique_id="async_local_node2",
+            workspace_id=workspace_id,
+            content="AI is the future of mankind.",
+            metadata={
+                "node_type": "n1",
+            }
+        ),
+        VectorNode(
+            unique_id="async_local_node3",
+            workspace_id=workspace_id,
+            content="I want to eat fish!",
+            metadata={
+                "node_type": "n2",
+            }
+        ),
+        VectorNode(
+            unique_id="async_local_node4",
+            workspace_id=workspace_id,
+            content="The bigger the storm, the more expensive the fish.",
+            metadata={
+                "node_type": "n1",
+            }
+        ),
+    ]
+    # Test async insert
+    await client.async_insert(sample_nodes, workspace_id)
+    logger.info("ASYNC TEST - " + "=" * 20)
+    # Test async search
+    results = await client.async_search("What is AI?", workspace_id=workspace_id, top_k=5)
+    for r in results:
+        logger.info(r.model_dump(exclude={"vector"}))
+    logger.info("=" * 20)
+    # Test async update (delete + insert)
+    node2_update = VectorNode(
+        unique_id="async_local_node2",
+        workspace_id=workspace_id,
+        content="AI is the future of humanity and technology.",
+        metadata={
+            "node_type": "n1",
+            "updated": True
+        }
+    )
+    await client.async_delete(node2_update.unique_id, workspace_id=workspace_id)
+    await client.async_insert(node2_update, workspace_id=workspace_id)
+    logger.info("ASYNC Updated Result:")
+    results = await client.async_search("fish?", workspace_id=workspace_id, top_k=10)
+    for r in results:
+        logger.info(r.model_dump(exclude={"vector"}))
+    logger.info("=" * 20)
+    # Clean up
+    await client.async_dump_workspace(workspace_id)
+    await client.async_delete_workspace(workspace_id)
 if __name__ == "__main__":
     main()
+    # Run async test
+    logger.info("\n" + "=" * 50 + " ASYNC TESTS " + "=" * 50)
+    asyncio.run(async_main())

flowllm 0.1.2__py3-none-any.whl → 0.1.5__py3-none-any.whl

flowllm 0.1.2py3-none-any.whl → 0.1.5py3-none-any.whl