PyPI - aiagents4pharma - Versions diffs - 1.45.1__py3-none-any.whl → 1.46.1__py3-none-any.whl - Mend

aiagents4pharma 1.45.1py3-none-any.whl → 1.46.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py CHANGED Viewed

@@ -2,11 +2,15 @@
 Tool for performing multimodal subgraph extraction.
 """
+import asyncio
+import concurrent.futures
 import logging
+from dataclasses import dataclass
 from typing import Annotated
 import hydra
 import pandas as pd
+import pcst_fast
 from langchain_core.messages import ToolMessage
 from langchain_core.tools import BaseTool
 from langchain_core.tools.base import InjectedToolCallId
@@ -15,6 +19,8 @@ from langgraph.types import Command
 from pydantic import BaseModel, Field
 from pymilvus import Collection
+from ..utils.database import MilvusConnectionManager
+from ..utils.database.milvus_connection_manager import QueryParams
 from ..utils.extractions.milvus_multimodal_pcst import (
     DynamicLibraryLoader,
     MultimodalPCSTPruning,
@@ -22,11 +28,23 @@ from ..utils.extractions.milvus_multimodal_pcst import (
 )
 from .load_arguments import ArgumentData
+# pylint: disable=too-many-lines
 # Initialize logger
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+@dataclass
+class ExtractionParams:
+    """Parameters for subgraph extraction."""
+    state: dict
+    cfg: dict
+    cfg_db: dict
+    query_df: object
+    connection_manager: object
 class MultimodalSubgraphExtractionInput(BaseModel):
     """
     MultimodalSubgraphExtractionInput is a Pydantic model representing an input
@@ -118,7 +136,15 @@ class MultimodalSubgraphExtractionTool(BaseTool):
         q_node_names = getattr(
             node_names_series, "to_pandas", lambda series=node_names_series: series
         )().tolist()
-        q_columns = ["node_id", "node_name", "node_type", "feat", "feat_emb", "desc", "desc_emb"]
+        q_columns = [
+            "node_id",
+            "node_name",
+            "node_type",
+            "feat",
+            "feat_emb",
+            "desc",
+            "desc_emb",
+        ]
         res = collection.query(
             expr=f"node_name IN [{','.join(f'"{name}"' for name in q_node_names)}]",
             output_fields=q_columns,
@@ -133,6 +159,52 @@ class MultimodalSubgraphExtractionTool(BaseTool):
         res_df["use_description"] = False
         return res_df
+    async def _query_milvus_collection_async(
+        self, node_type, node_type_df, cfg_db, connection_manager
+    ):
+        """Helper method to query Milvus collection asynchronously for a specific node type."""
+        collection_name = f"{cfg_db.milvus_db.database_name}_nodes_{node_type.replace('/', '_')}"
+        # Query the collection with node names from multimodal_df
+        node_names_series = node_type_df["q_node_name"]
+        q_node_names = getattr(
+            node_names_series, "to_pandas", lambda series=node_names_series: series
+        )().tolist()
+        # Create filter expression for async query
+        node_names_str = ",".join(f'"{name}"' for name in q_node_names)
+        expr = f"node_name IN [{node_names_str}]"
+        q_columns = [
+            "node_id",
+            "node_name",
+            "node_type",
+            "feat",
+            "feat_emb",
+            "desc",
+            "desc_emb",
+        ]
+        # Create query parameters and perform async query
+        query_params = QueryParams(
+            collection_name=collection_name, expr=expr, output_fields=q_columns
+        )
+        res = await connection_manager.async_query(query_params)
+        # Convert the embeddings into floats
+        for r_ in res:
+            r_["feat_emb"] = [float(x) for x in r_["feat_emb"]]
+            r_["desc_emb"] = [float(x) for x in r_["desc_emb"]]
+        # Convert the result to a DataFrame
+        res_df = (
+            self.loader.df.DataFrame(res)[q_columns]
+            if res
+            else self.loader.df.DataFrame(columns=q_columns)
+        )
+        res_df["use_description"] = False
+        return res_df
     def _prepare_query_modalities(
         self, prompt: dict, state: Annotated[dict, InjectedState], cfg_db: dict
     ):
@@ -201,6 +273,97 @@ class MultimodalSubgraphExtractionTool(BaseTool):
         return query_df
+    async def _prepare_query_modalities_async(
+        self,
+        prompt: dict,
+        state: Annotated[dict, InjectedState],
+        cfg_db: dict,
+        connection_manager,
+    ):
+        """
+        Prepare the modality-specific query for subgraph extraction asynchronously.
+        Args:
+            prompt: The dictionary containing the user prompt and embeddings
+            state: The injected state for the tool
+            cfg_db: The configuration dictionary for Milvus database
+            connection_manager: The MilvusConnectionManager instance
+        Returns:
+            A DataFrame containing the query embeddings and modalities
+        """
+        # Initialize dataframes
+        logger.log(logging.INFO, "Initializing dataframes (async)")
+        query_df = []
+        prompt_df = self.loader.df.DataFrame(
+            {
+                "node_id": "user_prompt",
+                "node_name": "User Prompt",
+                "node_type": "prompt",
+                "feat": prompt["text"],
+                "feat_emb": prompt["emb"],
+                "desc": prompt["text"],
+                "desc_emb": prompt["emb"],
+                "use_description": True,  # set to True for user prompt embedding
+            }
+        )
+        # Read multimodal files uploaded by the user
+        multimodal_df = self._read_multimodal_files(state)
+        # Check if the multimodal_df is empty
+        logger.log(logging.INFO, "Prepare query modalities (async)")
+        if len(multimodal_df) > 0:
+            # Create parallel tasks for querying each node type
+            logger.log(
+                logging.INFO,
+                "Querying Milvus database for each node type in multimodal_df (parallel)",
+            )
+            # Create async tasks for each node type
+            tasks = []
+            for node_type, node_type_df in multimodal_df.groupby("q_node_type"):
+                print(f"Processing node type: {node_type}")
+                task = self._query_milvus_collection_async(
+                    node_type, node_type_df, cfg_db, connection_manager
+                )
+                tasks.append(task)
+            # Execute all queries in parallel using hybrid approach
+            if len(tasks) == 1:
+                # Single task, run directly
+                query_results = [await tasks[0]]
+            else:
+                # Multiple tasks, but use sequential execution to avoid event loop issues
+                query_results = []
+                for task in tasks:
+                    result = await task
+                    query_results.append(result)
+            query_df.extend(query_results)
+            # Concatenate all results into a single DataFrame
+            logger.log(logging.INFO, "Concatenating all results into a single DataFrame")
+            query_df = self.loader.df.concat(query_df, ignore_index=True)
+            # Update the state by adding the selected node IDs
+            logger.log(logging.INFO, "Updating state with selected node IDs")
+            state["selections"] = (
+                getattr(query_df, "to_pandas", lambda: query_df)()
+                .groupby("node_type")["node_id"]
+                .apply(list)
+                .to_dict()
+            )
+            # Append a user prompt to the query dataframe
+            logger.log(logging.INFO, "Adding user prompt to query dataframe")
+            query_df = self.loader.df.concat([query_df, prompt_df]).reset_index(drop=True)
+        else:
+            # If no multimodal files are uploaded, use the prompt embeddings
+            query_df = prompt_df
+        return query_df
     def _perform_subgraph_extraction(
         self,
         state: Annotated[dict, InjectedState],
@@ -287,7 +450,13 @@ class MultimodalSubgraphExtractionTool(BaseTool):
         # Convert the unified subgraph and subgraphs to DataFrames
         unified_subgraph = self.loader.df.DataFrame(
-            [("Unified Subgraph", unified_subgraph["nodes"], unified_subgraph["edges"])],
+            [
+                (
+                    "Unified Subgraph",
+                    unified_subgraph["nodes"],
+                    unified_subgraph["edges"],
+                )
+            ],
             columns=["name", "nodes", "edges"],
         )
         subgraphs = self.loader.df.DataFrame(subgraphs, columns=["name", "nodes", "edges"])
@@ -297,8 +466,199 @@ class MultimodalSubgraphExtractionTool(BaseTool):
         return subgraphs
+    async def _perform_subgraph_extraction_async(self, params: ExtractionParams) -> dict:
+        """
+        Perform multimodal subgraph extraction based on modal-specific embeddings asynchronously.
+        Args:
+            state: The injected state for the tool
+            cfg: The configuration dictionary
+            cfg_db: The configuration dictionary for Milvus database
+            query_df: The DataFrame containing the query embeddings and modalities
+            connection_manager: The MilvusConnectionManager instance
+        Returns:
+            A dictionary containing the extracted subgraph with nodes and edges
+        """
+        # Initialize the subgraph dictionary
+        subgraphs = []
+        unified_subgraph = {"nodes": [], "edges": []}
+        # Create parallel tasks for each query
+        tasks = []
+        query_info = []
+        for q in getattr(params.query_df, "to_pandas", lambda: params.query_df)().iterrows():
+            logger.log(logging.INFO, "===========================================")
+            logger.log(logging.INFO, "Processing query: %s", q[1]["node_name"])
+            # Store query info for later processing
+            query_info.append(q[1])
+            # Get dynamic metric type using helper method
+            dynamic_metric_type = self._get_dynamic_metric_type(params.cfg)
+            # Create PCST pruning instance using helper
+            pcst_instance = self._create_pcst_instance(params, q[1], dynamic_metric_type)
+            # Create async task for subgraph extraction
+            task = self._extract_single_subgraph_async(
+                pcst_instance, q[1], params.cfg_db, params.connection_manager
+            )
+            tasks.append(task)
+        # Execute all subgraph extractions sequentially to avoid event loop conflicts
+        subgraph_results = []
+        for i, task in enumerate(tasks):
+            logger.log(logging.INFO, "Processing subgraph %d/%d", i + 1, len(tasks))
+            result = await task
+            subgraph_results.append(result)
+        # Process results and finalize
+        self._process_subgraph_results(subgraph_results, query_info, unified_subgraph, subgraphs)
+        return self._finalize_subgraph_results(subgraphs, unified_subgraph)
+    def _process_subgraph_results(self, subgraph_results, query_info, unified_subgraph, subgraphs):
+        """Process individual subgraph results."""
+        for i, subgraph in enumerate(subgraph_results):
+            query_row = query_info[i]
+            unified_subgraph["nodes"].append(subgraph["nodes"].tolist())
+            unified_subgraph["edges"].append(subgraph["edges"].tolist())
+            subgraphs.append(
+                (
+                    query_row["node_name"],
+                    subgraph["nodes"].tolist(),
+                    subgraph["edges"].tolist(),
+                )
+            )
+    def _finalize_subgraph_results(self, subgraphs, unified_subgraph):
+        """Process and finalize subgraph results into DataFrames."""
+        # Concatenate and get unique node and edge indices
+        nodes_arrays = [self.loader.py.array(list_) for list_ in unified_subgraph["nodes"]]
+        unified_subgraph["nodes"] = self.loader.py.unique(
+            self.loader.py.concatenate(nodes_arrays)
+        ).tolist()
+        edges_arrays = [self.loader.py.array(list_) for list_ in unified_subgraph["edges"]]
+        unified_subgraph["edges"] = self.loader.py.unique(
+            self.loader.py.concatenate(edges_arrays)
+        ).tolist()
+        # Convert the unified subgraph and subgraphs to DataFrames
+        unified_subgraph_df = self.loader.df.DataFrame(
+            [
+                (
+                    "Unified Subgraph",
+                    unified_subgraph["nodes"],
+                    unified_subgraph["edges"],
+                )
+            ],
+            columns=["name", "nodes", "edges"],
+        )
+        subgraphs_df = self.loader.df.DataFrame(subgraphs, columns=["name", "nodes", "edges"])
+        # Concatenate both DataFrames
+        return self.loader.df.concat([unified_subgraph_df, subgraphs_df], ignore_index=True)
+    async def _extract_single_subgraph_async(
+        self, pcst_instance, query_row, cfg_db, connection_manager
+    ):
+        """
+        Extract a single subgraph asynchronously using the new async methods.
+        """
+        # Load data and compute prizes
+        edge_index, prizes, num_nodes = await self._load_subgraph_data(
+            pcst_instance, query_row, cfg_db, connection_manager
+        )
+        # Run PCST algorithm and get results
+        return self._run_pcst_algorithm(pcst_instance, edge_index, num_nodes, prizes)
+    async def _load_subgraph_data(self, pcst_instance, query_row, cfg_db, connection_manager):
+        """Load edge index, compute prizes, and get node count."""
+        # Load edge index asynchronously
+        edge_index = await pcst_instance.load_edge_index_async(cfg_db, connection_manager)
+        # Compute prizes asynchronously
+        prizes = await pcst_instance.compute_prizes_async(
+            query_row["desc_emb"],
+            query_row["feat_emb"],
+            cfg_db,
+            query_row["node_type"],
+        )
+        # Get number of nodes
+        nodes_collection = f"{cfg_db.milvus_db.database_name}_nodes"
+        stats = await connection_manager.async_get_collection_stats(nodes_collection)
+        num_nodes = stats["num_entities"]
+        return edge_index, prizes, num_nodes
+    def _run_pcst_algorithm(self, pcst_instance, edge_index, num_nodes, prizes):
+        """Run PCST algorithm and get subgraph results."""
+        # Compute costs in constructing the subgraph
+        edges_dict, prizes_final, costs, mapping = pcst_instance.compute_subgraph_costs(
+            edge_index, num_nodes, prizes
+        )
+        # Retrieve the subgraph using the PCST algorithm
+        result_vertices, result_edges = pcst_fast.pcst_fast(
+            edges_dict["edges"].tolist(),
+            prizes_final.tolist(),
+            costs.tolist(),
+            pcst_instance.root,
+            pcst_instance.num_clusters,
+            pcst_instance.pruning,
+            pcst_instance.verbosity_level,
+        )
+        # Get subgraph nodes and edges based on the PCST result
+        return pcst_instance.get_subgraph_nodes_edges(
+            num_nodes,
+            pcst_instance.loader.py.asarray(result_vertices),
+            {
+                "edges": pcst_instance.loader.py.asarray(result_edges),
+                "num_prior_edges": edges_dict["num_prior_edges"],
+                "edge_index": edge_index,
+            },
+            mapping,
+        )
+    def _run(
+        self,
+        tool_call_id: Annotated[str, InjectedToolCallId],
+        state: Annotated[dict, InjectedState],
+        prompt: str,
+        arg_data: ArgumentData = None,
+    ) -> Command:
+        """
+        Synchronous wrapper for the async _run_async method.
+        This maintains compatibility with LangGraph while using async operations internally.
+        """
+        # concurrent.futures imported at top level
+        def run_in_thread():
+            """Run async method in a new thread with its own event loop."""
+            # Create a new event loop for this thread
+            new_loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(new_loop)
+            try:
+                result = new_loop.run_until_complete(
+                    self._run_async(tool_call_id, state, prompt, arg_data)
+                )
+                return result
+            finally:
+                # Properly cleanup the event loop
+                new_loop.close()
+                asyncio.set_event_loop(None)
+        # Always use a separate thread to avoid event loop conflicts
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
+            future = executor.submit(run_in_thread)
+            return future.result()
     def _prepare_final_subgraph(
-        self, state: Annotated[dict, InjectedState], subgraph: dict, cfg: dict, cfg_db
+        self, state: Annotated[dict, InjectedState], subgraph: dict, cfg_db
     ) -> dict:
         """
         Prepare the subgraph based on the extracted subgraph.
@@ -306,8 +666,6 @@ class MultimodalSubgraphExtractionTool(BaseTool):
         Args:
             state: The injected state for the tool.
             subgraph: The extracted subgraph.
-            graph: The graph dictionary.
-            cfg: The configuration dictionary for the tool.
             cfg_db: The configuration dictionary for Milvus database.
         Returns:
@@ -315,7 +673,7 @@ class MultimodalSubgraphExtractionTool(BaseTool):
         """
         # Convert the dict to a DataFrame
         node_colors = {
-            n: cfg.node_colors_dict[k] for k, v in state["selections"].items() for n in v
+            n: cfg_db.node_colors_dict[k] for k, v in state["selections"].items() for n in v
         }
         color_df = self.loader.df.DataFrame(list(node_colors.items()), columns=["node_id", "color"])
         # print(color_df)
@@ -345,7 +703,9 @@ class MultimodalSubgraphExtractionTool(BaseTool):
                         },
                     )
                     for row in getattr(
-                        graph_nodes, "to_pandas", lambda graph_nodes=graph_nodes: graph_nodes
+                        graph_nodes,
+                        "to_pandas",
+                        lambda graph_nodes=graph_nodes: graph_nodes,
                     )().itertuples(index=False)
                 ]
             )
@@ -353,7 +713,9 @@ class MultimodalSubgraphExtractionTool(BaseTool):
                 [
                     (row.head_id, row.tail_id, {"label": tuple(row.edge_type)})
                     for row in getattr(
-                        graph_edges, "to_pandas", lambda graph_edges=graph_edges: graph_edges
+                        graph_edges,
+                        "to_pandas",
+                        lambda graph_edges=graph_edges: graph_edges,
                     )().itertuples(index=False)
                 ]
             )
@@ -364,11 +726,15 @@ class MultimodalSubgraphExtractionTool(BaseTool):
                 graph_nodes.rename(columns={"desc": "node_attr"}, inplace=True)
                 graph_edges = graph_edges[["head_id", "edge_type", "tail_id"]]
                 nodes_pandas = getattr(
-                    graph_nodes, "to_pandas", lambda graph_nodes=graph_nodes: graph_nodes
+                    graph_nodes,
+                    "to_pandas",
+                    lambda graph_nodes=graph_nodes: graph_nodes,
                 )()
                 nodes_csv = nodes_pandas.to_csv(index=False)
                 edges_pandas = getattr(
-                    graph_edges, "to_pandas", lambda graph_edges=graph_edges: graph_edges
+                    graph_edges,
+                    "to_pandas",
+                    lambda graph_edges=graph_edges: graph_edges,
                 )()
                 edges_csv = edges_pandas.to_csv(index=False)
                 graph_dict["text"] = nodes_csv + "\n" + edges_csv
@@ -414,6 +780,35 @@ class MultimodalSubgraphExtractionTool(BaseTool):
         return graph_nodes, graph_edges
+    def _get_dynamic_metric_type(self, cfg: dict) -> str:
+        """Helper method to get dynamic metric type."""
+        has_vector_processing = hasattr(cfg, "vector_processing")
+        if has_vector_processing:
+            dynamic_metrics_enabled = getattr(cfg.vector_processing, "dynamic_metrics", True)
+        else:
+            dynamic_metrics_enabled = False
+        if has_vector_processing and dynamic_metrics_enabled:
+            return self.loader.metric_type
+        return getattr(cfg, "search_metric_type", self.loader.metric_type)
+    def _create_pcst_instance(
+        self, params: ExtractionParams, query_row: dict, dynamic_metric_type: str
+    ) -> MultimodalPCSTPruning:
+        """Helper method to create PCST pruning instance."""
+        return MultimodalPCSTPruning(
+            topk=params.state["topk_nodes"],
+            topk_e=params.state["topk_edges"],
+            cost_e=params.cfg.cost_e,
+            c_const=params.cfg.c_const,
+            root=params.cfg.root,
+            num_clusters=params.cfg.num_clusters,
+            pruning=params.cfg.pruning,
+            verbosity_level=params.cfg.verbosity_level,
+            use_description=query_row["use_description"],
+            metric_type=dynamic_metric_type,
+            loader=self.loader,
+        )
     def normalize_vector(self, v: list) -> list:
         """
         Normalize a vector using appropriate library (CuPy for GPU, NumPy for CPU).
@@ -432,7 +827,7 @@ class MultimodalSubgraphExtractionTool(BaseTool):
         # CPU mode: return as-is for COSINE similarity
         return v
-    def _run(
+    async def _run_async(
         self,
         tool_call_id: Annotated[str, InjectedToolCallId],
         state: Annotated[dict, InjectedState],
@@ -459,55 +854,71 @@ class MultimodalSubgraphExtractionTool(BaseTool):
                 config_name="config",
                 overrides=["tools/multimodal_subgraph_extraction=default"],
             )
-            cfg_db = cfg.app.frontend
             cfg = cfg.tools.multimodal_subgraph_extraction
-        # Check if the Milvus connection exists
-        # logger.log(logging.INFO, "Checking Milvus connection")
-        # logger.log(logging.INFO, "Milvus connection name: %s", cfg_db.milvus_db.alias)
-        # logger.log(logging.INFO, "Milvus connection DB: %s", cfg_db.milvus_db.database_name)
-        # logger.log(logging.INFO, "Is connection established? %s",
-        #            connections.has_connection(cfg_db.milvus_db.alias))
-        # if connections.has_connection(cfg_db.milvus_db.alias):
-        #     logger.log(logging.INFO, "Milvus connection is established.")
-        #     for collection_name in utility.list_collections():
-        #         logger.log(logging.INFO, "Collection: %s", collection_name)
-        # Prepare the query embeddings and modalities
-        logger.log(logging.INFO, "_prepare_query_modalities")
-        # start = datetime.datetime.now()
-        query_df = self._prepare_query_modalities(
+        # Load database configuration separately
+        with hydra.initialize(version_base=None, config_path="../configs"):
+            cfg_all = hydra.compose(config_name="config")
+            cfg_db = cfg_all.utils.database.milvus
+        # Establish Milvus connection using singleton connection manager
+        logger.log(logging.INFO, "Getting Milvus connection manager (singleton)")
+        connection_manager = MilvusConnectionManager(cfg_db)
+        try:
+            connection_manager.ensure_connection()
+            logger.log(logging.INFO, "Milvus connection established successfully")
+            # Log connection info
+            conn_info = connection_manager.get_connection_info()
+            logger.log(logging.INFO, "Connected to database: %s", conn_info.get("database"))
+            logger.log(
+                logging.INFO,
+                "Connection healthy: %s",
+                connection_manager.test_connection(),
+            )
+        except Exception as e:
+            logger.error("Failed to establish Milvus connection: %s", str(e))
+            raise RuntimeError(f"Cannot connect to Milvus database: {str(e)}") from e
+        # Prepare the query embeddings and modalities (async)
+        logger.log(logging.INFO, "_prepare_query_modalities_async")
+        query_df = await self._prepare_query_modalities_async(
             {
                 "text": prompt,
                 "emb": [self.normalize_vector(state["embedding_model"].embed_query(prompt))],
             },
             state,
             cfg_db,
+            connection_manager,
         )
-        # end = datetime.datetime.now()
-        # logger.log(logging.INFO, "_prepare_query_modalities time: %s seconds",
-        #            (end - start).total_seconds())
-        # Perform subgraph extraction
-        logger.log(logging.INFO, "_perform_subgraph_extraction")
-        # start = datetime.datetime.now()
-        subgraphs = self._perform_subgraph_extraction(state, cfg, cfg_db, query_df)
-        # end = datetime.datetime.now()
-        # logger.log(logging.INFO, "_perform_subgraph_extraction time: %s seconds",
-        #            (end - start).total_seconds())
+        # Perform subgraph extraction (async)
+        logger.log(logging.INFO, "_perform_subgraph_extraction_async")
+        extraction_params = ExtractionParams(
+            state=state,
+            cfg=cfg,
+            cfg_db=cfg_db,
+            query_df=query_df,
+            connection_manager=connection_manager,
+        )
+        subgraphs = await self._perform_subgraph_extraction_async(extraction_params)
         # Prepare subgraph as a NetworkX graph and textualized graph
         logger.log(logging.INFO, "_prepare_final_subgraph")
         logger.log(logging.INFO, "Subgraphs extracted: %s", len(subgraphs))
         # start = datetime.datetime.now()
-        final_subgraph = self._prepare_final_subgraph(state, subgraphs, cfg, cfg_db)
+        final_subgraph = self._prepare_final_subgraph(state, subgraphs, cfg_db)
         # end = datetime.datetime.now()
         # logger.log(logging.INFO, "_prepare_final_subgraph time: %s seconds",
         #            (end - start).total_seconds())
+        # Create final result and return command
+        return self._create_extraction_result(tool_call_id, state, final_subgraph, arg_data)
+    def _create_extraction_result(self, tool_call_id, state, final_subgraph, arg_data):
+        """Create the final extraction result and command."""
         # Prepare the dictionary of extracted graph
         logger.log(logging.INFO, "dic_extracted_graph")
-        # start = datetime.datetime.now()
         dic_extracted_graph = {
             "name": arg_data.extraction_name,
             "tool_call_id": tool_call_id,
@@ -522,28 +933,33 @@ class MultimodalSubgraphExtractionTool(BaseTool):
             "graph_text": final_subgraph["text"],
             "graph_summary": None,
         }
-        # end = datetime.datetime.now()
-        # logger.log(logging.INFO, "dic_extracted_graph time: %s seconds",
-        #            (end - start).total_seconds())
-        # Prepare the dictionary of updated state
-        dic_updated_state_for_model = {}
-        for key, value in {
-            "dic_extracted_graph": [dic_extracted_graph],
-        }.items():
-            if value:
-                dic_updated_state_for_model[key] = value
+        # Debug logging
+        logger.info(
+            "Created dic_extracted_graph with keys: %s",
+            list(dic_extracted_graph.keys()),
+        )
+        logger.info(
+            "Graph dict structure - name count: %d, nodes count: %d, edges count: %d",
+            len(dic_extracted_graph["graph_dict"]["name"]),
+            len(dic_extracted_graph["graph_dict"]["nodes"]),
+            len(dic_extracted_graph["graph_dict"]["edges"]),
+        )
-        # Return the updated state of the tool
+        # Create success message
+        success_message = (
+            f"Successfully extracted subgraph '{arg_data.extraction_name}' "
+            f"with {len(final_subgraph['name'])} graph(s). The subgraph contains "
+            f"{sum(len(nodes) for nodes in final_subgraph['nodes'])} nodes and "
+            f"{sum(len(edges) for edges in final_subgraph['edges'])} edges. "
+            "The extracted subgraph has been stored and is ready for "
+            "visualization and analysis."
+        )
+        # Return the command with updated state
         return Command(
-            update=dic_updated_state_for_model
+            update={"dic_extracted_graph": [dic_extracted_graph]}
             | {
-                # update the message history
-                "messages": [
-                    ToolMessage(
-                        content=f"Subgraph Extraction Result of {arg_data.extraction_name}",
-                        tool_call_id=tool_call_id,
-                    )
-                ],
+                "messages": [ToolMessage(content=success_message, tool_call_id=tool_call_id)],
             }
         )

aiagents4pharma 1.45.1__py3-none-any.whl → 1.46.1__py3-none-any.whl

aiagents4pharma 1.45.1py3-none-any.whl → 1.46.1py3-none-any.whl