PyPI - flowllm - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.5__py3-none-any.whl - Mend

flowllm 0.1.2py3-none-any.whl → 0.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

flowllm/__init__.py +8 -3
flowllm/app.py +1 -1
flowllm/config/base.yaml +75 -0
flowllm/config/fin_supply.yaml +39 -0
flowllm/config/pydantic_config_parser.py +16 -1
flowllm/context/__init__.py +2 -0
flowllm/context/base_context.py +10 -20
flowllm/context/flow_context.py +45 -2
flowllm/context/service_context.py +73 -12
flowllm/embedding_model/openai_compatible_embedding_model.py +1 -2
flowllm/enumeration/chunk_enum.py +1 -0
flowllm/flow/__init__.py +9 -0
flowllm/flow/base_flow.py +44 -11
flowllm/flow/expression/__init__.py +1 -0
flowllm/flow/{parser → expression}/expression_parser.py +5 -2
flowllm/flow/expression/expression_tool_flow.py +25 -0
flowllm/flow/gallery/__init__.py +1 -8
flowllm/flow/gallery/mock_tool_flow.py +46 -33
flowllm/flow/tool_op_flow.py +97 -0
flowllm/llm/base_llm.py +0 -2
flowllm/llm/litellm_llm.py +2 -1
flowllm/op/__init__.py +3 -3
flowllm/op/akshare/get_ak_a_code_op.py +1 -1
flowllm/op/akshare/get_ak_a_info_op.py +1 -1
flowllm/op/base_llm_op.py +3 -2
flowllm/op/base_op.py +258 -25
flowllm/op/base_tool_op.py +47 -0
flowllm/op/gallery/__init__.py +0 -1
flowllm/op/gallery/mock_op.py +13 -7
flowllm/op/llm/__init__.py +3 -0
flowllm/op/llm/react_llm_op.py +105 -0
flowllm/op/{agent/react_prompt.yaml → llm/react_llm_prompt.yaml} +17 -10
flowllm/op/llm/simple_llm_op.py +48 -0
flowllm/op/llm/stream_llm_op.py +61 -0
flowllm/op/mcp/__init__.py +2 -0
flowllm/op/mcp/ant_op.py +42 -0
flowllm/op/mcp/base_sse_mcp_op.py +28 -0
flowllm/op/parallel_op.py +5 -1
flowllm/op/search/__init__.py +1 -2
flowllm/op/search/dashscope_search_op.py +73 -121
flowllm/op/search/tavily_search_op.py +69 -80
flowllm/op/sequential_op.py +4 -0
flowllm/schema/flow_stream_chunk.py +11 -0
flowllm/schema/message.py +2 -0
flowllm/schema/service_config.py +8 -3
flowllm/schema/tool_call.py +53 -4
flowllm/service/__init__.py +0 -1
flowllm/service/base_service.py +31 -14
flowllm/service/http_service.py +46 -37
flowllm/service/mcp_service.py +17 -23
flowllm/storage/vector_store/__init__.py +1 -0
flowllm/storage/vector_store/base_vector_store.py +99 -12
flowllm/storage/vector_store/chroma_vector_store.py +250 -8
flowllm/storage/vector_store/es_vector_store.py +291 -35
flowllm/storage/vector_store/local_vector_store.py +206 -9
flowllm/storage/vector_store/memory_vector_store.py +509 -0
flowllm/utils/common_utils.py +54 -0
flowllm/utils/logger_utils.py +28 -0
flowllm/utils/miner_u_pdf_processor.py +726 -0
{flowllm-0.1.2.dist-info → flowllm-0.1.5.dist-info}/METADATA +7 -6
flowllm-0.1.5.dist-info/RECORD +98 -0
flowllm/config/default.yaml +0 -77
flowllm/config/empty.yaml +0 -37
flowllm/flow/gallery/cmd_flow.py +0 -11
flowllm/flow/gallery/code_tool_flow.py +0 -30
flowllm/flow/gallery/dashscope_search_tool_flow.py +0 -34
flowllm/flow/gallery/deepsearch_tool_flow.py +0 -39
flowllm/flow/gallery/expression_tool_flow.py +0 -18
flowllm/flow/gallery/tavily_search_tool_flow.py +0 -30
flowllm/flow/gallery/terminate_tool_flow.py +0 -30
flowllm/flow/parser/__init__.py +0 -0
flowllm/op/agent/__init__.py +0 -0
flowllm/op/agent/react_op.py +0 -83
flowllm/op/base_ray_op.py +0 -313
flowllm/op/code/__init__.py +0 -1
flowllm/op/code/execute_code_op.py +0 -42
flowllm/op/gallery/terminate_op.py +0 -29
flowllm/op/search/dashscope_deep_research_op.py +0 -260
flowllm/service/cmd_service.py +0 -15
flowllm-0.1.2.dist-info/RECORD +0 -99
{flowllm-0.1.2.dist-info → flowllm-0.1.5.dist-info}/WHEEL +0 -0
{flowllm-0.1.2.dist-info → flowllm-0.1.5.dist-info}/entry_points.txt +0 -0
{flowllm-0.1.2.dist-info → flowllm-0.1.5.dist-info}/licenses/LICENSE +0 -0
{flowllm-0.1.2.dist-info → flowllm-0.1.5.dist-info}/top_level.txt +0 -0

flowllm/storage/vector_store/es_vector_store.py CHANGED Viewed

@@ -1,8 +1,9 @@
+import asyncio
 import os
-from typing import List, Tuple, Iterable
+from typing import List, Tuple, Iterable, Dict, Any, Optional
-from elasticsearch import Elasticsearch
-from elasticsearch.helpers import bulk
+from elasticsearch import Elasticsearch, AsyncElasticsearch
+from elasticsearch.helpers import bulk, async_bulk
 from loguru import logger
 from pydantic import Field, PrivateAttr, model_validator
@@ -15,14 +16,15 @@ from flowllm.storage.vector_store.local_vector_store import LocalVectorStore
 class EsVectorStore(LocalVectorStore):
     hosts: str | List[str] = Field(default_factory=lambda: os.getenv("FLOW_ES_HOSTS", "http://localhost:9200"))
     basic_auth: str | Tuple[str, str] | None = Field(default=None)
-    retrieve_filters: List[dict] = []
     _client: Elasticsearch = PrivateAttr()
+    _async_client: AsyncElasticsearch = PrivateAttr()
     @model_validator(mode="after")
     def init_client(self):
         if isinstance(self.hosts, str):
             self.hosts = [self.hosts]
         self._client = Elasticsearch(hosts=self.hosts, basic_auth=self.basic_auth)
+        self._async_client = AsyncElasticsearch(hosts=self.hosts, basic_auth=self.basic_auth)
         logger.info(f"Elasticsearch client initialized with hosts: {self.hosts}")
         return self
@@ -48,10 +50,16 @@ class EsVectorStore(LocalVectorStore):
         }
         return self._client.indices.create(index=workspace_id, body=body)
-    def _iter_workspace_nodes(self, workspace_id: str, max_size: int = 10000, **kwargs) -> Iterable[VectorNode]:
+    def iter_workspace_nodes(self, workspace_id: str, callback_fn=None, max_size: int = 10000, **kwargs) -> Iterable[
+        VectorNode]:
+        """Iterate over all nodes in a workspace."""
         response = self._client.search(index=workspace_id, body={"query": {"match_all": {}}, "size": max_size})
         for doc in response['hits']['hits']:
-            yield self.doc2node(doc, workspace_id)
+            node = self.doc2node(doc, workspace_id)
+            if callback_fn:
+                yield callback_fn(node)
+            else:
+                yield node
     def refresh(self, workspace_id: str):
         self._client.indices.refresh(index=workspace_id)
@@ -62,38 +70,54 @@ class EsVectorStore(LocalVectorStore):
         node.workspace_id = workspace_id
         node.unique_id = doc["_id"]
         if "_score" in doc:
-            node.metadata["_score"] = doc["_score"] - 1
+            node.metadata["score"] = doc["_score"] - 1
         return node
-    def add_term_filter(self, key: str, value):
-        if key:
-            self.retrieve_filters.append({"term": {key: value}})
-        return self
-    def add_range_filter(self, key: str, gte=None, lte=None):
-        if key:
-            if gte is not None and lte is not None:
-                self.retrieve_filters.append({"range": {key: {"gte": gte, "lte": lte}}})
-            elif gte is not None:
-                self.retrieve_filters.append({"range": {key: {"gte": gte}}})
-            elif lte is not None:
-                self.retrieve_filters.append({"range": {key: {"lte": lte}}})
-        return self
-    def clear_filter(self):
-        self.retrieve_filters.clear()
-        return self
+    @staticmethod
+    def _build_es_filters(filter_dict: Optional[Dict[str, Any]] = None) -> List[Dict]:
+        """Build Elasticsearch filter clauses from filter_dict"""
+        if not filter_dict:
+            return []
-    def search(self, query: str, workspace_id: str, top_k: int = 1, **kwargs) -> List[VectorNode]:
+        filters = []
+        for key, filter_value in filter_dict.items():
+            # Handle nested keys by prefixing with metadata.
+            es_key = f"metadata.{key}" if not key.startswith("metadata.") else key
+            if isinstance(filter_value, dict):
+                # Range filter: {"gte": 1, "lte": 10}
+                range_conditions = {}
+                if "gte" in filter_value:
+                    range_conditions["gte"] = filter_value["gte"]
+                if "lte" in filter_value:
+                    range_conditions["lte"] = filter_value["lte"]
+                if "gt" in filter_value:
+                    range_conditions["gt"] = filter_value["gt"]
+                if "lt" in filter_value:
+                    range_conditions["lt"] = filter_value["lt"]
+                if range_conditions:
+                    filters.append({"range": {es_key: range_conditions}})
+            else:
+                # Term filter: direct value comparison
+                filters.append({"term": {es_key: filter_value}})
+        return filters
+    def search(self, query: str, workspace_id: str, top_k: int = 1, filter_dict: Optional[Dict[str, Any]] = None,
+               **kwargs) -> List[VectorNode]:
         if not self.exist_workspace(workspace_id=workspace_id):
             logger.warning(f"workspace_id={workspace_id} is not exists!")
             return []
         query_vector = self.embedding_model.get_embeddings(query)
+        # Build filters from filter_dict
+        es_filters = self._build_es_filters(filter_dict)
         body = {
             "query": {
                 "script_score": {
-                    "query": {"bool": {"must": self.retrieve_filters}},
+                    "query": {"bool": {"must": es_filters}} if es_filters else {"match_all": {}},
                     "script": {
                         "source": "cosineSimilarity(params.query_vector, 'vector') + 1.0",
                         "params": {"query_vector": query_vector},
@@ -106,12 +130,13 @@ class EsVectorStore(LocalVectorStore):
         nodes: List[VectorNode] = []
         for doc in response['hits']['hits']:
-            nodes.append(self.doc2node(doc, workspace_id))
+            node = self.doc2node(doc, workspace_id)
+            node.metadata["score"] = doc["_score"] - 1  # Adjust score since we added 1.0
+            nodes.append(node)
-        self.retrieve_filters.clear()
         return nodes
-    def insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, refresh: bool = False, **kwargs):
+    def insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, refresh: bool = True, **kwargs):
         if not self.exist_workspace(workspace_id=workspace_id):
             self.create_workspace(workspace_id=workspace_id)
@@ -140,7 +165,7 @@ class EsVectorStore(LocalVectorStore):
         if refresh:
             self.refresh(workspace_id=workspace_id)
-    def delete(self, node_ids: str | List[str], workspace_id: str, refresh: bool = False, **kwargs):
+    def delete(self, node_ids: str | List[str], workspace_id: str, refresh: bool = True, **kwargs):
         if not self.exist_workspace(workspace_id=workspace_id):
             logger.warning(f"workspace_id={workspace_id} is not exists!")
             return
@@ -160,6 +185,134 @@ class EsVectorStore(LocalVectorStore):
         if refresh:
             self.refresh(workspace_id=workspace_id)
+    # Async methods using native Elasticsearch async APIs
+    async def async_exist_workspace(self, workspace_id: str, **kwargs) -> bool:
+        """Async version of exist_workspace using native ES async client"""
+        return await self._async_client.indices.exists(index=workspace_id)
+    async def async_delete_workspace(self, workspace_id: str, **kwargs):
+        """Async version of delete_workspace using native ES async client"""
+        return await self._async_client.indices.delete(index=workspace_id, **kwargs)
+    async def async_create_workspace(self, workspace_id: str, **kwargs):
+        """Async version of create_workspace using native ES async client"""
+        body = {
+            "mappings": {
+                "properties": {
+                    "workspace_id": {"type": "keyword"},
+                    "content": {"type": "text"},
+                    "metadata": {"type": "object"},
+                    "vector": {
+                        "type": "dense_vector",
+                        "dims": self.embedding_model.dimensions
+                    }
+                }
+            }
+        }
+        return await self._async_client.indices.create(index=workspace_id, body=body)
+    async def async_refresh(self, workspace_id: str):
+        """Async version of refresh using native ES async client"""
+        await self._async_client.indices.refresh(index=workspace_id)
+    async def async_search(self, query: str, workspace_id: str, top_k: int = 1,
+                           filter_dict: Optional[Dict[str, Any]] = None, **kwargs) -> List[VectorNode]:
+        """Async version of search using native ES async client and async embedding"""
+        if not await self.async_exist_workspace(workspace_id=workspace_id):
+            logger.warning(f"workspace_id={workspace_id} is not exists!")
+            return []
+        # Use async embedding
+        query_vector = await self.embedding_model.get_embeddings_async(query)
+        # Build filters from filter_dict
+        es_filters = self._build_es_filters(filter_dict)
+        body = {
+            "query": {
+                "script_score": {
+                    "query": {"bool": {"must": es_filters}} if es_filters else {"match_all": {}},
+                    "script": {
+                        "source": "cosineSimilarity(params.query_vector, 'vector') + 1.0",
+                        "params": {"query_vector": query_vector},
+                    }
+                }
+            },
+            "size": top_k
+        }
+        response = await self._async_client.search(index=workspace_id, body=body, **kwargs)
+        nodes: List[VectorNode] = []
+        for doc in response['hits']['hits']:
+            node = self.doc2node(doc, workspace_id)
+            node.metadata["score"] = doc["_score"] - 1  # Adjust score since we added 1.0
+            nodes.append(node)
+        return nodes
+    async def async_insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, refresh: bool = True,
+                           **kwargs):
+        """Async version of insert using native ES async client and async embedding"""
+        if not await self.async_exist_workspace(workspace_id=workspace_id):
+            await self.async_create_workspace(workspace_id=workspace_id)
+        if isinstance(nodes, VectorNode):
+            nodes = [nodes]
+        embedded_nodes = [node for node in nodes if node.vector]
+        not_embedded_nodes = [node for node in nodes if not node.vector]
+        # Use async embedding
+        now_embedded_nodes = await self.embedding_model.get_node_embeddings_async(not_embedded_nodes)
+        docs = [
+            {
+                "_op_type": "index",
+                "_index": workspace_id,
+                "_id": node.unique_id,
+                "_source": {
+                    "workspace_id": workspace_id,
+                    "content": node.content,
+                    "metadata": node.metadata,
+                    "vector": node.vector
+                }
+            } for node in embedded_nodes + now_embedded_nodes]
+        status, error = await async_bulk(self._async_client, docs, chunk_size=self.batch_size, **kwargs)
+        logger.info(f"async insert docs.size={len(docs)} status={status} error={error}")
+        if refresh:
+            await self.async_refresh(workspace_id=workspace_id)
+    async def async_delete(self, node_ids: str | List[str], workspace_id: str, refresh: bool = True, **kwargs):
+        """Async version of delete using native ES async client"""
+        if not await self.async_exist_workspace(workspace_id=workspace_id):
+            logger.warning(f"workspace_id={workspace_id} is not exists!")
+            return
+        if isinstance(node_ids, str):
+            node_ids = [node_ids]
+        actions = [
+            {
+                "_op_type": "delete",
+                "_index": workspace_id,
+                "_id": node_id
+            } for node_id in node_ids]
+        status, error = await async_bulk(self._async_client, actions, chunk_size=self.batch_size, **kwargs)
+        logger.info(f"async delete actions.size={len(actions)} status={status} error={error}")
+        if refresh:
+            await self.async_refresh(workspace_id=workspace_id)
+    def close(self):
+        self._client.close()
+    async def async_close(self):
+        await self._async_client.close()
 def main():
     from flowllm.utils.common_utils import load_env
     from flowllm.embedding_model import OpenAICompatibleEmbeddingModel
@@ -207,21 +360,124 @@ def main():
     es.insert(sample_nodes, workspace_id=workspace_id, refresh=True)
-    logger.info("=" * 20)
-    results = es.add_term_filter(key="metadata.node_type", value="n1") \
-        .search("What is AI?", top_k=5, workspace_id=workspace_id)
+    logger.info("=" * 20 + " FILTER TEST " + "=" * 20)
+    filter_dict = {"node_type": "n1"}
+    results = es.search("What is AI?", top_k=5, workspace_id=workspace_id, filter_dict=filter_dict)
+    logger.info(f"Filtered results (node_type=n1): {len(results)} results")
     for r in results:
         logger.info(r.model_dump(exclude={"vector"}))
     logger.info("=" * 20)
-    logger.info("=" * 20)
+    logger.info("=" * 20 + " UNFILTERED TEST " + "=" * 20)
     results = es.search("What is AI?", top_k=5, workspace_id=workspace_id)
+    logger.info(f"Unfiltered results: {len(results)} results")
     for r in results:
         logger.info(r.model_dump(exclude={"vector"}))
     logger.info("=" * 20)
     es.dump_workspace(workspace_id=workspace_id)
     es.delete_workspace(workspace_id=workspace_id)
+    es.close()
+async def async_main():
+    from flowllm.utils.common_utils import load_env
+    from flowllm.embedding_model import OpenAICompatibleEmbeddingModel
+    load_env()
+    embedding_model = OpenAICompatibleEmbeddingModel(dimensions=64, model_name="text-embedding-v4")
+    workspace_id = "async_rag_nodes_index"
+    hosts = "http://11.160.132.46:8200"
+    # Use async context manager to ensure proper cleanup
+    es = EsVectorStore(hosts=hosts, embedding_model=embedding_model)
+    # Clean up and create workspace
+    if await es.async_exist_workspace(workspace_id=workspace_id):
+        await es.async_delete_workspace(workspace_id=workspace_id)
+    await es.async_create_workspace(workspace_id=workspace_id)
+    sample_nodes = [
+        VectorNode(
+            unique_id="async_es_node1",
+            workspace_id=workspace_id,
+            content="Artificial intelligence is a technology that simulates human intelligence.",
+            metadata={
+                "node_type": "n1",
+            }
+        ),
+        VectorNode(
+            unique_id="async_es_node2",
+            workspace_id=workspace_id,
+            content="AI is the future of mankind.",
+            metadata={
+                "node_type": "n1",
+            }
+        ),
+        VectorNode(
+            unique_id="async_es_node3",
+            workspace_id=workspace_id,
+            content="I want to eat fish!",
+            metadata={
+                "node_type": "n2",
+            }
+        ),
+        VectorNode(
+            unique_id="async_es_node4",
+            workspace_id=workspace_id,
+            content="The bigger the storm, the more expensive the fish.",
+            metadata={
+                "node_type": "n1",
+            }
+        ),
+    ]
+    # Test async insert
+    await es.async_insert(sample_nodes, workspace_id=workspace_id, refresh=True)
+    logger.info("ASYNC TEST - " + "=" * 20)
+    # Test async search with filter
+    filter_dict = {"node_type": "n1"}
+    results = await es.async_search("What is AI?", top_k=5, workspace_id=workspace_id, filter_dict=filter_dict)
+    for r in results:
+        logger.info(r.model_dump(exclude={"vector"}))
+    logger.info("=" * 20)
+    # Test async search without filter
+    logger.info("ASYNC TEST WITHOUT FILTER - " + "=" * 20)
+    results = await es.async_search("What is AI?", top_k=5, workspace_id=workspace_id)
+    for r in results:
+        logger.info(r.model_dump(exclude={"vector"}))
+    logger.info("=" * 20)
+    # Test async update (delete + insert)
+    node2_update = VectorNode(
+        unique_id="async_es_node2",
+        workspace_id=workspace_id,
+        content="AI is the future of humanity and technology.",
+        metadata={
+            "node_type": "n1",
+            "updated": True
+        }
+    )
+    await es.async_delete(node2_update.unique_id, workspace_id=workspace_id, refresh=True)
+    await es.async_insert(node2_update, workspace_id=workspace_id, refresh=True)
+    logger.info("ASYNC Updated Result:")
+    results = await es.async_search("fish?", workspace_id=workspace_id, top_k=10)
+    for r in results:
+        logger.info(r.model_dump(exclude={"vector"}))
+    logger.info("=" * 20)
+    # Clean up
+    await es.async_dump_workspace(workspace_id=workspace_id)
+    await es.async_delete_workspace(workspace_id=workspace_id)
+    await es.async_close()
 if __name__ == "__main__":
     main()
+    # Run async test
+    logger.info("\n" + "=" * 50 + " ASYNC TESTS " + "=" * 50)
+    asyncio.run(async_main())

flowllm 0.1.2__py3-none-any.whl → 0.1.5__py3-none-any.whl

flowllm 0.1.2py3-none-any.whl → 0.1.5py3-none-any.whl