PyPI - aiagents4pharma - Versions diffs - 1.43.0__py3-none-any.whl → 1.45.0__py3-none-any.whl - Mend

aiagents4pharma 1.43.0py3-none-any.whl → 1.45.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (290) hide show

aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py CHANGED Viewed

@@ -18,7 +18,7 @@ import os
 import platform
 import subprocess
 import sys
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Union
 # Configure logging
 logging.basicConfig(level=logging.INFO, format="[DATA LOADER] %(message)s")
@@ -32,9 +32,7 @@ class SystemDetector:
         self.os_type = platform.system().lower()  # 'windows', 'linux', 'darwin'
         self.architecture = platform.machine().lower()  # 'x86_64', 'arm64', etc.
         self.has_nvidia_gpu = self._detect_nvidia_gpu()
-        self.use_gpu = (
-            self.has_nvidia_gpu and self.os_type != "darwin"
-        )  # No CUDA on macOS
+        self.use_gpu = self.has_nvidia_gpu and self.os_type != "darwin"  # No CUDA on macOS
         logger.info("System Detection Results:")
         logger.info("  OS: %s", self.os_type)
@@ -46,9 +44,7 @@ class SystemDetector:
         """Detect if NVIDIA GPU is available."""
         try:
             # Try nvidia-smi command
-            result = subprocess.run(
-                ["nvidia-smi"], capture_output=True, text=True, timeout=10
-            )
+            result = subprocess.run(["nvidia-smi"], capture_output=True, text=True, timeout=10)
             return result.returncode == 0
         except (
             subprocess.TimeoutExpired,
@@ -57,7 +53,7 @@ class SystemDetector:
         ):
             return False
-    def get_required_packages(self) -> List[str]:
+    def get_required_packages(self) -> list[str]:
         """Get list of packages to install based on system capabilities - matches original logic."""
         if self.use_gpu and self.os_type == "linux":
             # Exact package list from original script for GPU mode
@@ -103,9 +99,7 @@ class SystemDetector:
                 if result.returncode != 0:
                     logger.error("Error installing package: %s", result.stderr)
                     if "cudf" in package_cmd or "dask-cudf" in package_cmd:
-                        logger.warning(
-                            "GPU package installation failed, falling back to CPU mode"
-                        )
+                        logger.warning("GPU package installation failed, falling back to CPU mode")
                         self.use_gpu = False
                         return self.install_packages()  # Retry with CPU packages
                     else:
@@ -115,9 +109,7 @@ class SystemDetector:
             except subprocess.CalledProcessError as e:
                 logger.error("Failed to install %s: %s", package_cmd, e.stderr)
                 if "cudf" in package_cmd:
-                    logger.warning(
-                        "GPU package installation failed, falling back to CPU mode"
-                    )
+                    logger.warning("GPU package installation failed, falling back to CPU mode")
                     self.use_gpu = False
                     return self.install_packages()  # Retry with CPU packages
                 else:
@@ -130,7 +122,7 @@ class SystemDetector:
 class DynamicDataLoader:
     """Dynamic data loader that adapts to system capabilities."""
-    def __init__(self, config: Dict[str, Any]):
+    def __init__(self, config: dict[str, Any]):
         """Initialize with system detection and dynamic library loading."""
         self.config = config
         self.detector = SystemDetector()
@@ -207,13 +199,15 @@ class DynamicDataLoader:
                 logger.info("Successfully imported GPU libraries (cudf, cupy)")
             except ImportError as e:
                 logger.error(
-                    "[DATA LOADER] cudf or cupy not found. Please ensure they are installed correctly."
+                    "[DATA LOADER] cudf or cupy not found. "
+                    "Please ensure they are installed correctly."
                 )
                 logger.error("Import error: %s", str(e))
                 # Match original script's exit behavior for critical GPU import failure
                 if not os.getenv("FORCE_CPU", "false").lower() == "true":
                     logger.error(
-                        "GPU libraries required but not available. Set FORCE_CPU=true to use CPU mode."
+                        "GPU libraries required but not available. "
+                        "Set FORCE_CPU=true to use CPU mode."
                     )
                     sys.exit(1)
                 else:
@@ -222,7 +216,7 @@ class DynamicDataLoader:
                     self.use_gpu = False
     def _read_dataframe(
-        self, file_path: str, columns: Optional[List[str]] = None
+        self, file_path: str, columns: list[str] | None = None
     ) -> Union["pd.DataFrame", "cudf.DataFrame"]:  # type: ignore[reportUndefinedVariable]  # noqa: F821
         """Read dataframe using appropriate library."""
         if self.use_gpu:
@@ -231,7 +225,7 @@ class DynamicDataLoader:
             return self.pd.read_parquet(file_path, columns=columns)
     def _concat_dataframes(
-        self, df_list: List, ignore_index: bool = True
+        self, df_list: list, ignore_index: bool = True
     ) -> Union["pd.DataFrame", "cudf.DataFrame"]:  # type: ignore[reportUndefinedVariable]  # noqa: F821
         """Concatenate dataframes using appropriate library."""
         if self.use_gpu:
@@ -257,9 +251,7 @@ class DynamicDataLoader:
         """Extract embeddings and convert to appropriate format."""
         if self.use_gpu:
             # cuDF list extraction
-            emb_data = self.cp.asarray(df[column_name].list.leaves).astype(
-                self.cp.float32
-            )
+            emb_data = self.cp.asarray(df[column_name].list.leaves).astype(self.cp.float32)
             return emb_data.reshape(df.shape[0], -1)
         else:
             # pandas extraction
@@ -325,9 +317,7 @@ class DynamicDataLoader:
             for stage in ["enrichment", "embedding"]:
                 logger.info("Processing %s %s", element, stage)
-                file_list = glob.glob(
-                    os.path.join(self.data_dir, element, stage, "*.parquet.gzip")
-                )
+                file_list = glob.glob(os.path.join(self.data_dir, element, stage, "*.parquet.gzip"))
                 logger.info("Found %d files for %s %s", len(file_list), element, stage)
                 if not file_list:
@@ -342,13 +332,9 @@ class DynamicDataLoader:
                         chunk_files = file_list[i : i + chunk_size]
                         chunk_df_list = []
                         for f in chunk_files:
-                            df = self._read_dataframe(
-                                f, columns=["triplet_index", "edge_emb"]
-                            )
+                            df = self._read_dataframe(f, columns=["triplet_index", "edge_emb"])
                             chunk_df_list.append(df)
-                        chunk_df = self._concat_dataframes(
-                            chunk_df_list, ignore_index=True
-                        )
+                        chunk_df = self._concat_dataframes(chunk_df_list, ignore_index=True)
                         graph[element][stage].append(chunk_df)
                 else:
                     # For other combinations, read all files
@@ -356,9 +342,7 @@ class DynamicDataLoader:
                     for f in file_list:
                         df = self._read_dataframe(f)
                         df_list.append(df)
-                    graph[element][stage] = self._concat_dataframes(
-                        df_list, ignore_index=True
-                    )
+                    graph[element][stage] = self._concat_dataframes(df_list, ignore_index=True)
         logger.info("Graph data loaded successfully")
         return graph
@@ -367,16 +351,15 @@ class DynamicDataLoader:
         """Get embedding dimension using original script's exact logic."""
         first_emb = df.iloc[0][column_name]
         if self.use_gpu:
-            # cuDF format - matches original: len(nodes_df.iloc[0]['desc_emb'].to_arrow().to_pylist()[0])
+            # cuDF format - matches original:
+            # len(nodes_df.iloc[0]['desc_emb'].to_arrow().to_pylist()[0])
             return len(first_emb.to_arrow().to_pylist()[0])
         else:
             # pandas format
             if isinstance(first_emb, list):
                 return len(first_emb)
             else:
-                return len(
-                    first_emb.tolist() if hasattr(first_emb, "tolist") else first_emb
-                )
+                return len(first_emb.tolist() if hasattr(first_emb, "tolist") else first_emb)
     def create_nodes_collection(self, nodes_df):
         """Create and populate the main nodes collection."""
@@ -431,9 +414,7 @@ class DynamicDataLoader:
         # Create collection if it doesn't exist
         if not self.pymilvus_modules["utility"].has_collection(node_coll_name):
-            collection = self.pymilvus_modules["Collection"](
-                name=node_coll_name, schema=schema
-            )
+            collection = self.pymilvus_modules["Collection"](name=node_coll_name, schema=schema)
         else:
             collection = self.pymilvus_modules["Collection"](name=node_coll_name)
@@ -487,9 +468,7 @@ class DynamicDataLoader:
             collection.insert(batch)
         collection.flush()
-        logger.info(
-            "Nodes collection created with %d entities", collection.num_entities
-        )
+        logger.info("Nodes collection created with %d entities", collection.num_entities)
     def create_node_type_collections(self, nodes_df):
         """Create separate collections for each node type."""
@@ -498,9 +477,7 @@ class DynamicDataLoader:
         for node_type, nodes_df_ in self.tqdm(
             nodes_df.groupby("node_type"), desc="Processing node types"
         ):
-            node_coll_name = (
-                f"{self.milvus_database}_nodes_{node_type.replace('/', '_')}"
-            )
+            node_coll_name = f"{self.milvus_database}_nodes_{node_type.replace('/', '_')}"
             # Get embedding dimensions
             desc_dim = self._get_embedding_dimension(nodes_df_, "desc_emb")
@@ -564,9 +541,7 @@ class DynamicDataLoader:
             )
             if not self.pymilvus_modules["utility"].has_collection(node_coll_name):
-                collection = self.pymilvus_modules["Collection"](
-                    name=node_coll_name, schema=schema
-                )
+                collection = self.pymilvus_modules["Collection"](name=node_coll_name, schema=schema)
             else:
                 collection = self.pymilvus_modules["Collection"](name=node_coll_name)
@@ -639,7 +614,7 @@ class DynamicDataLoader:
                 collection.num_entities,
             )
-    def create_edges_collection(self, edges_enrichment_df, edges_embedding_df: List):
+    def create_edges_collection(self, edges_enrichment_df, edges_embedding_df: list):
         """Create and populate the edges collection - exact original logic."""
         logger.info("Creating edges collection...")
@@ -647,9 +622,7 @@ class DynamicDataLoader:
         # Get embedding dimension from first chunk - exact original logic
         if self.use_gpu:
-            emb_dim = len(
-                edges_embedding_df[0].loc[0, "edge_emb"]
-            )  # Original cudf access
+            emb_dim = len(edges_embedding_df[0].loc[0, "edge_emb"])  # Original cudf access
         else:
             first_edge_emb = edges_embedding_df[0].iloc[0]["edge_emb"]
             emb_dim = (
@@ -772,24 +745,18 @@ class DynamicDataLoader:
             # Insert data in batches
             total = len(data[0])
-            for i in self.tqdm(
-                range(0, total, self.batch_size), desc="Inserting edges"
-            ):
+            for i in self.tqdm(range(0, total, self.batch_size), desc="Inserting edges"):
                 batch_data = [d[i : i + self.batch_size] for d in data]
                 collection.insert(batch_data)
         collection.flush()
-        logger.info(
-            "Edges collection created with %d entities", collection.num_entities
-        )
+        logger.info("Edges collection created with %d entities", collection.num_entities)
     def run(self):
         """Main execution method."""
         try:
             logger.info("Starting Dynamic Milvus data loading process...")
-            logger.info(
-                "System: %s %s", self.detector.os_type, self.detector.architecture
-            )
+            logger.info("System: %s %s", self.detector.os_type, self.detector.architecture)
             logger.info("GPU acceleration: %s", self.use_gpu)
             # Connect to Milvus
@@ -851,8 +818,7 @@ def main():
         "data_dir": os.getenv("DATA_DIR", default_data_dir),
         "batch_size": int(os.getenv("BATCH_SIZE", "500")),
         "chunk_size": int(os.getenv("CHUNK_SIZE", "5")),
-        "auto_install_packages": os.getenv("AUTO_INSTALL_PACKAGES", "true").lower()
-        == "true",
+        "auto_install_packages": os.getenv("AUTO_INSTALL_PACKAGES", "true").lower() == "true",
     }
     # Override detection for testing/forcing specific modes

aiagents4pharma/talk2knowledgegraphs/states/__init__.py CHANGED Viewed

@@ -1,4 +1,5 @@
-'''
+"""
 This file is used to import all the models in the package.
-'''
+"""
 from . import state_talk2knowledgegraphs

aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py CHANGED Viewed

@@ -3,6 +3,7 @@ This is the state file for the Talk2KnowledgeGraphs agent.
 """
 from typing import Annotated
 # import operator
 from langchain_core.embeddings.embeddings import Embeddings
 from langchain_core.language_models.chat_models import BaseChatModel

aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py CHANGED Viewed

@@ -1,15 +1,19 @@
 """
 Test cases for agents/t2kg_agent.py
 """
-from unittest.mock import patch, MagicMock
+from unittest.mock import MagicMock, patch
+import pandas as pd
 import pytest
 from langchain_core.messages import HumanMessage
 from langchain_openai import ChatOpenAI, OpenAIEmbeddings
-import pandas as pd
 from ..agents.t2kg_agent import get_app
 DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
 @pytest.fixture(name="input_dict")
 def input_dict_fixture():
     """
@@ -24,7 +28,7 @@ def input_dict_fixture():
             "cellular_component": [],
             "biological_process": [],
             "drug": [],
-            "disease": []
+            "disease": [],
         },
         "uploaded_files": [
             {
@@ -44,42 +48,53 @@ def input_dict_fixture():
                 "kg_text_path": f"{DATA_PATH}/biobridge_multimodal_text_graph.pkl",
             }
         ],
-        "dic_extracted_graph": []
+        "dic_extracted_graph": [],
     }
     return input_dict
 def mock_milvus_collection(name):
     """
     Mock Milvus collection for testing.
     """
     nodes = MagicMock()
     nodes.query.return_value = [
-        {"node_index": 0,
-         "node_id": "id1",
-         "node_name": "Adalimumab",
-         "node_type": "drug",
-         "feat": "featA", "feat_emb": [0.1, 0.2, 0.3],
-         "desc": "descA", "desc_emb": [0.1, 0.2, 0.3]},
-        {"node_index": 1,
-         "node_id": "id2",
-         "node_name": "TNF",
-         "node_type": "gene/protein",
-         "feat": "featB", "feat_emb": [0.4, 0.5, 0.6],
-         "desc": "descB", "desc_emb": [0.4, 0.5, 0.6]}
+        {
+            "node_index": 0,
+            "node_id": "id1",
+            "node_name": "Adalimumab",
+            "node_type": "drug",
+            "feat": "featA",
+            "feat_emb": [0.1, 0.2, 0.3],
+            "desc": "descA",
+            "desc_emb": [0.1, 0.2, 0.3],
+        },
+        {
+            "node_index": 1,
+            "node_id": "id2",
+            "node_name": "TNF",
+            "node_type": "gene/protein",
+            "feat": "featB",
+            "feat_emb": [0.4, 0.5, 0.6],
+            "desc": "descB",
+            "desc_emb": [0.4, 0.5, 0.6],
+        },
     ]
     nodes.load.return_value = None
     edges = MagicMock()
     edges.query.return_value = [
-        {"triplet_index": 0,
-         "head_id": "id1",
-         "head_index": 0,
-         "tail_id": "id2",
-         "tail_index": 1,
-         "edge_type": "drug,acts_on,gene/protein",
-         "display_relation": "acts_on",
-         "feat": "featC",
-         "feat_emb": [0.7, 0.8, 0.9]}
+        {
+            "triplet_index": 0,
+            "head_id": "id1",
+            "head_index": 0,
+            "tail_id": "id2",
+            "tail_index": 1,
+            "edge_type": "drug,acts_on,gene/protein",
+            "display_relation": "acts_on",
+            "feat": "featC",
+            "feat_emb": [0.7, 0.8, 0.9],
+        }
     ]
     edges.load.return_value = None
@@ -89,6 +104,7 @@ def mock_milvus_collection(name):
         return edges
     return None
 def test_t2kg_agent_openai_milvus_mock(input_dict):
     """
     Test the T2KG agent using OpenAI model and Milvus mock.
@@ -103,11 +119,11 @@ def test_t2kg_agent_openai_milvus_mock(input_dict):
     config = {"configurable": {"thread_id": unique_id}}
     app.update_state(config, input_dict)
     prompt = """
-    Adalimumab is a fully human monoclonal antibody (IgG1)
+    Adalimumab is a fully human monoclonal antibody (IgG1)
     that specifically binds to tumor necrosis factor-alpha (TNF-α), a pro-inflammatory cytokine.
     I would like to get evidence from the knowledge graph about the mechanism of actions related to
-    Adalimumab in treating inflammatory bowel disease
+    Adalimumab in treating inflammatory bowel disease
     (IBD). Please follow these steps:
     - Extract a subgraph from the PrimeKG that contains information about Adalimumab.
     - Summarize the extracted subgraph.
@@ -116,21 +132,31 @@ def test_t2kg_agent_openai_milvus_mock(input_dict):
     Please set the extraction name for the extraction process as `subkg_12345`.
     """
-    with patch("aiagents4pharma.talk2knowledgegraphs.tools."
-               "milvus_multimodal_subgraph_extraction.Collection",
-               side_effect=mock_milvus_collection), \
-         patch("aiagents4pharma.talk2knowledgegraphs.tools."
-               "milvus_multimodal_subgraph_extraction.MultimodalPCSTPruning") as mock_pcst, \
-         patch("pymilvus.connections") as mock_connections, \
-         patch("aiagents4pharma.talk2knowledgegraphs.tools."
-               "milvus_multimodal_subgraph_extraction.hydra.initialize"), \
-         patch("aiagents4pharma.talk2knowledgegraphs.tools."
-               "milvus_multimodal_subgraph_extraction.hydra.compose") as mock_compose:
+    with (
+        patch(
+            "aiagents4pharma.talk2knowledgegraphs.tools."
+            "milvus_multimodal_subgraph_extraction.Collection",
+            side_effect=mock_milvus_collection,
+        ),
+        patch(
+            "aiagents4pharma.talk2knowledgegraphs.tools."
+            "milvus_multimodal_subgraph_extraction.MultimodalPCSTPruning"
+        ) as mock_pcst,
+        patch("pymilvus.connections") as mock_connections,
+        patch(
+            "aiagents4pharma.talk2knowledgegraphs.tools."
+            "milvus_multimodal_subgraph_extraction.hydra.initialize"
+        ),
+        patch(
+            "aiagents4pharma.talk2knowledgegraphs.tools."
+            "milvus_multimodal_subgraph_extraction.hydra.compose"
+        ) as mock_compose,
+    ):
         mock_connections.has_connection.return_value = True
         mock_pcst_instance = MagicMock()
         mock_pcst_instance.extract_subgraph.return_value = {
             "nodes": pd.Series([0, 1]),
-            "edges": pd.Series([0])
+            "edges": pd.Series([0]),
         }
         mock_pcst.return_value = mock_pcst_instance
         mock_cfg = MagicMock()
@@ -144,8 +170,7 @@ def test_t2kg_agent_openai_milvus_mock(input_dict):
         mock_cfg.node_colors_dict = {"drug": "blue", "gene/protein": "red"}
         mock_compose.return_value = MagicMock()
         mock_compose.return_value.tools.multimodal_subgraph_extraction = mock_cfg
-        mock_compose.return_value.tools.subgraph_summarization.\
-            prompt_subgraph_summarization = (
+        mock_compose.return_value.tools.subgraph_summarization.prompt_subgraph_summarization = (
             "Summarize the following subgraph: {textualized_subgraph}"
         )

aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py CHANGED Viewed

@@ -4,7 +4,9 @@ Test cases for datasets/primekg_loader.py
 import os
 import shutil
 import pytest
 from ..datasets.biobridge_primekg import BioBridgePrimeKG
 # Remove the data folder for testing if it exists
@@ -12,13 +14,14 @@ PRIMEKG_LOCAL_DIR = "../data/primekg_test/"
 LOCAL_DIR = "../data/biobridge_primekg_test/"
 shutil.rmtree(LOCAL_DIR, ignore_errors=True)
 @pytest.fixture(name="biobridge_primekg")
 def biobridge_primekg_fixture():
     """
     Fixture for creating an instance of PrimeKG.
     """
-    return BioBridgePrimeKG(primekg_dir=PRIMEKG_LOCAL_DIR,
-                            local_dir=LOCAL_DIR)
+    return BioBridgePrimeKG(primekg_dir=PRIMEKG_LOCAL_DIR, local_dir=LOCAL_DIR)
 def test_download_primekg(biobridge_primekg):
     """
@@ -39,8 +42,7 @@ def test_download_primekg(biobridge_primekg):
     assert os.path.exists(biobridge_primekg.local_dir)
     # Check if downloaded and processed files exist
     # PrimeKG files
-    files = ["nodes.tab", "primekg_nodes.tsv.gz",
-             "edges.csv", "primekg_edges.tsv.gz"]
+    files = ["nodes.tab", "primekg_nodes.tsv.gz", "edges.csv", "primekg_edges.tsv.gz"]
     for file in files:
         path = f"{biobridge_primekg.primekg_dir}/{file}"
         assert os.path.exists(path)
@@ -54,7 +56,7 @@ def test_download_primekg(biobridge_primekg):
         "bp.pkl",
         "drug.pkl",
         "disease.pkl",
-        "embedding_dict.pkl"
+        "embedding_dict.pkl",
     ]
     for file in files:
         path = f"{biobridge_primekg.local_dir}/embeddings/{file}"
@@ -89,9 +91,9 @@ def test_download_primekg(biobridge_primekg):
     # Check processed BioBridge data config
     assert biobridge_data_config is not None
     assert len(biobridge_data_config) > 0
-    assert len(biobridge_data_config['node_type']) == 10
-    assert len(biobridge_data_config['relation_type']) == 18
-    assert len(biobridge_data_config['emb_dim']) == 6
+    assert len(biobridge_data_config["node_type"]) == 10
+    assert len(biobridge_data_config["relation_type"]) == 18
+    assert len(biobridge_data_config["emb_dim"]) == 6
     # Check processed BioBridge embeddings
     assert biobridge_emb_dict is not None
     assert len(biobridge_emb_dict) > 0
@@ -100,24 +102,26 @@ def test_download_primekg(biobridge_primekg):
     assert biobridge_triplets is not None
     assert len(biobridge_triplets) > 0
     assert biobridge_triplets.shape[0] == 3904610
-    assert list(biobridge_splits.keys()) == ['train', 'node_train', 'test', 'node_test']
-    assert len(biobridge_splits['train']) == 3510930
-    assert len(biobridge_splits['node_train']) == 76486
-    assert len(biobridge_splits['test']) == 393680
-    assert len(biobridge_splits['node_test']) == 8495
+    assert list(biobridge_splits.keys()) == ["train", "node_train", "test", "node_test"]
+    assert len(biobridge_splits["train"]) == 3510930
+    assert len(biobridge_splits["node_train"]) == 76486
+    assert len(biobridge_splits["test"]) == 393680
+    assert len(biobridge_splits["node_test"]) == 8495
     # Check node info dictionary
-    assert list(biobridge_node_info.keys()) == ['gene/protein',
-                                                'molecular_function',
-                                                'cellular_component',
-                                                'biological_process',
-                                                'drug',
-                                                'disease']
-    assert len(biobridge_node_info['gene/protein']) == 19162
-    assert len(biobridge_node_info['molecular_function']) == 10966
-    assert len(biobridge_node_info['cellular_component']) == 4013
-    assert len(biobridge_node_info['biological_process']) == 27478
-    assert len(biobridge_node_info['drug']) == 6948
-    assert len(biobridge_node_info['disease']) == 44133
+    assert list(biobridge_node_info.keys()) == [
+        "gene/protein",
+        "molecular_function",
+        "cellular_component",
+        "biological_process",
+        "drug",
+        "disease",
+    ]
+    assert len(biobridge_node_info["gene/protein"]) == 19162
+    assert len(biobridge_node_info["molecular_function"]) == 10966
+    assert len(biobridge_node_info["cellular_component"]) == 4013
+    assert len(biobridge_node_info["biological_process"]) == 27478
+    assert len(biobridge_node_info["drug"]) == 6948
+    assert len(biobridge_node_info["disease"]) == 44133
 def test_load_existing_primekg(biobridge_primekg):
@@ -139,8 +143,7 @@ def test_load_existing_primekg(biobridge_primekg):
     assert os.path.exists(biobridge_primekg.local_dir)
     # Check if downloaded and processed files exist
     # PrimeKG files
-    files = ["nodes.tab", "primekg_nodes.tsv.gz",
-             "edges.csv", "primekg_edges.tsv.gz"]
+    files = ["nodes.tab", "primekg_nodes.tsv.gz", "edges.csv", "primekg_edges.tsv.gz"]
     for file in files:
         path = f"{biobridge_primekg.primekg_dir}/{file}"
         assert os.path.exists(path)
@@ -154,7 +157,7 @@ def test_load_existing_primekg(biobridge_primekg):
         "bp.pkl",
         "drug.pkl",
         "disease.pkl",
-        "embedding_dict.pkl"
+        "embedding_dict.pkl",
     ]
     for file in files:
         path = f"{biobridge_primekg.local_dir}/embeddings/{file}"
@@ -189,9 +192,9 @@ def test_load_existing_primekg(biobridge_primekg):
     # Check processed BioBridge data config
     assert biobridge_data_config is not None
     assert len(biobridge_data_config) > 0
-    assert len(biobridge_data_config['node_type']) == 10
-    assert len(biobridge_data_config['relation_type']) == 18
-    assert len(biobridge_data_config['emb_dim']) == 6
+    assert len(biobridge_data_config["node_type"]) == 10
+    assert len(biobridge_data_config["relation_type"]) == 18
+    assert len(biobridge_data_config["emb_dim"]) == 6
     # Check processed BioBridge embeddings
     assert biobridge_emb_dict is not None
     assert len(biobridge_emb_dict) > 0
@@ -200,24 +203,27 @@ def test_load_existing_primekg(biobridge_primekg):
     assert biobridge_triplets is not None
     assert len(biobridge_triplets) > 0
     assert biobridge_triplets.shape[0] == 3904610
-    assert list(biobridge_splits.keys()) == ['train', 'node_train', 'test', 'node_test']
-    assert len(biobridge_splits['train']) == 3510930
-    assert len(biobridge_splits['node_train']) == 76486
-    assert len(biobridge_splits['test']) == 393680
-    assert len(biobridge_splits['node_test']) == 8495
+    assert list(biobridge_splits.keys()) == ["train", "node_train", "test", "node_test"]
+    assert len(biobridge_splits["train"]) == 3510930
+    assert len(biobridge_splits["node_train"]) == 76486
+    assert len(biobridge_splits["test"]) == 393680
+    assert len(biobridge_splits["node_test"]) == 8495
     # Check node info dictionary
-    assert list(biobridge_node_info.keys()) == ['gene/protein',
-                                                'molecular_function',
-                                                'cellular_component',
-                                                'biological_process',
-                                                'drug',
-                                                'disease']
-    assert len(biobridge_node_info['gene/protein']) == 19162
-    assert len(biobridge_node_info['molecular_function']) == 10966
-    assert len(biobridge_node_info['cellular_component']) == 4013
-    assert len(biobridge_node_info['biological_process']) == 27478
-    assert len(biobridge_node_info['drug']) == 6948
-    assert len(biobridge_node_info['disease']) == 44133
+    assert list(biobridge_node_info.keys()) == [
+        "gene/protein",
+        "molecular_function",
+        "cellular_component",
+        "biological_process",
+        "drug",
+        "disease",
+    ]
+    assert len(biobridge_node_info["gene/protein"]) == 19162
+    assert len(biobridge_node_info["molecular_function"]) == 10966
+    assert len(biobridge_node_info["cellular_component"]) == 4013
+    assert len(biobridge_node_info["biological_process"]) == 27478
+    assert len(biobridge_node_info["drug"]) == 6948
+    assert len(biobridge_node_info["disease"]) == 44133
 # def test_load_existing_primekg_with_negative_triplets(biobridge_primekg):
 #     """

aiagents4pharma 1.43.0__py3-none-any.whl → 1.45.0__py3-none-any.whl

aiagents4pharma 1.43.0py3-none-any.whl → 1.45.0py3-none-any.whl