PyPI - aiagents4pharma - Versions diffs - 0.0.0__py3-none-any.whl - Mend

aiagents4pharma 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (336) hide show

aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py ADDED Viewed

@@ -0,0 +1,51 @@
+"""
+Test cases for utils/embeddings/embeddings.py
+"""
+import pytest
+from ..utils.embeddings.embeddings import Embeddings
+class TestEmbeddings(Embeddings):
+    """Test implementation of the Embeddings interface for testing purposes."""
+    def embed_documents(self, texts: list[str]) -> list[list[float]]:
+        return [[0.1, 0.2, 0.3] for _ in texts]
+    def embed_query(self, text: str) -> list[float]:
+        return [0.1, 0.2, 0.3]
+def test_embed_documents():
+    """Test embedding documents using the Embeddings interface."""
+    embeddings = TestEmbeddings()
+    texts = ["text1", "text2"]
+    result = embeddings.embed_documents(texts)
+    assert result == [[0.1, 0.2, 0.3], [0.1, 0.2, 0.3]]
+def test_embed_query():
+    """Test embedding a query using the Embeddings interface."""
+    embeddings = TestEmbeddings()
+    text = "query"
+    result = embeddings.embed_query(text)
+    assert result == [0.1, 0.2, 0.3]
+@pytest.mark.asyncio
+async def test_aembed_documents():
+    """Test asynchronous embedding of documents using the Embeddings interface."""
+    embeddings = TestEmbeddings()
+    texts = ["text1", "text2"]
+    result = await embeddings.aembed_documents(texts)
+    assert result == [[0.1, 0.2, 0.3], [0.1, 0.2, 0.3]]
+@pytest.mark.asyncio
+async def test_aembed_query():
+    """Test asynchronous embedding of a query using the Embeddings interface."""
+    embeddings = TestEmbeddings()
+    text = "query"
+    result = await embeddings.aembed_query(text)
+    assert result == [0.1, 0.2, 0.3]

aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py ADDED Viewed

@@ -0,0 +1,49 @@
+"""
+Test cases for utils/embeddings/huggingface.py
+"""
+import pytest
+from ..utils.embeddings.huggingface import EmbeddingWithHuggingFace
+@pytest.fixture(name="embedding_model")
+def embedding_model_fixture():
+    """Return the configuration object for the HuggingFace embedding model and model object"""
+    return EmbeddingWithHuggingFace(
+        model_name="NeuML/pubmedbert-base-embeddings",
+        model_cache_dir="../../cache",
+        truncation=True,
+    )
+def test_embedding_with_huggingface_embed_documents(embedding_model):
+    """Test embedding documents using the EmbeddingWithHuggingFace class."""
+    # Perform embedding
+    texts = ["Adalimumab", "Infliximab", "Vedolizumab"]
+    result = embedding_model.embed_documents(texts)
+    # Check the result
+    assert len(result) == 3
+    assert len(result[0]) == 768
+def test_embedding_with_huggingface_embed_query(embedding_model):
+    """Test embedding a query using the EmbeddingWithHuggingFace class."""
+    # Perform embedding
+    text = "Adalimumab"
+    result = embedding_model.embed_query(text)
+    # Check the result
+    assert len(result) == 768
+def test_embedding_with_huggingface_failed():
+    """Test embedding documents using the EmbeddingWithHuggingFace class."""
+    # Check if the model is available on HuggingFace Hub
+    model_name = "aiagents4pharma/embeddings"
+    err_msg = f"Model {model_name} is not available on HuggingFace Hub."
+    with pytest.raises(ValueError, match=err_msg):
+        EmbeddingWithHuggingFace(
+            model_name=model_name,
+            model_cache_dir="../../cache",
+            truncation=True,
+        )

aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py ADDED Viewed

@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+"""
+Test cases for utils/embeddings/nim_molmim.py
+"""
+import unittest
+from unittest.mock import MagicMock, patch
+from ..utils.embeddings.nim_molmim import EmbeddingWithMOLMIM
+class TestEmbeddingWithMOLMIM(unittest.TestCase):
+    """
+    Test cases for EmbeddingWithMOLMIM class.
+    """
+    def setUp(self):
+        self.base_url = "https://fake-nim-api.com/embeddings"
+        self.embeddings_model = EmbeddingWithMOLMIM(self.base_url)
+        self.test_texts = ["CCO", "CCC", "C=O"]
+        self.test_query = "CCO"
+        self.mock_response = {"embeddings": [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]}
+    @patch("requests.post")
+    def test_embed_documents(self, mock_post):
+        """
+        Test the embed_documents method.
+        """
+        # Mock the response from requests.post
+        mock_post.return_value = MagicMock()
+        mock_post.return_value.json.return_value = self.mock_response
+        embeddings = self.embeddings_model.embed_documents(self.test_texts)
+        # Assertions
+        self.assertEqual(embeddings, self.mock_response["embeddings"])
+        mock_post.assert_called_once_with(
+            self.base_url,
+            headers={"accept": "application/json", "Content-Type": "application/json"},
+            data='{"sequences": ["CCO", "CCC", "C=O"]}',
+            timeout=60,
+        )
+    @patch("requests.post")
+    def test_embed_query(self, mock_post):
+        """
+        Test the embed_query method.
+        """
+        # Mock the response from requests.post
+        mock_post.return_value = MagicMock()
+        mock_post.return_value.json.return_value = {"embeddings": [[0.1, 0.2, 0.3]]}
+        embedding = self.embeddings_model.embed_query(self.test_query)
+        # Assertions
+        self.assertEqual(embedding, [[0.1, 0.2, 0.3]])
+        mock_post.assert_called_once_with(
+            self.base_url,
+            headers={"accept": "application/json", "Content-Type": "application/json"},
+            data='{"sequences": ["CCO"]}',
+            timeout=60,
+        )

aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py ADDED Viewed

@@ -0,0 +1,63 @@
+"""
+Test cases for utils/embeddings/ollama.py
+"""
+import ollama
+import pytest
+from ..utils.embeddings.ollama import EmbeddingWithOllama
+@pytest.fixture(name="ollama_config")
+def fixture_ollama_config():
+    """Return a dictionary with Ollama configuration."""
+    return {
+        "model_name": "all-minilm",  # Choose a small model
+    }
+def test_no_model_ollama(ollama_config):
+    """Test the case when the Ollama model is not available."""
+    cfg = ollama_config
+    # Delete the Ollama model
+    try:
+        ollama.delete(cfg["model_name"])
+    except ollama.ResponseError:
+        pass
+    # Check if the model is available
+    with pytest.raises(
+        ValueError,
+        match=f"Error: Pulled {cfg['model_name']} model and restarted Ollama server.",
+    ):
+        EmbeddingWithOllama(model_name=cfg["model_name"])
+@pytest.fixture(name="embedding_model")
+def embedding_model_fixture(ollama_config):
+    """Return the configuration object for the Ollama embedding model and model object"""
+    cfg = ollama_config
+    return EmbeddingWithOllama(model_name=cfg["model_name"])
+def test_embedding_with_ollama_embed_documents(embedding_model):
+    """Test embedding documents using the EmbeddingWithOllama class."""
+    # Perform embedding
+    texts = ["Adalimumab", "Infliximab", "Vedolizumab"]
+    result = embedding_model.embed_documents(texts)
+    # Check the result
+    assert len(result) == 3
+    assert len(result[0]) == 384
+def test_embedding_with_ollama_embed_query(embedding_model):
+    """Test embedding a query using the EmbeddingWithOllama class."""
+    # Perform embedding
+    text = "Adalimumab"
+    result = embedding_model.embed_query(text)
+    # Check the result
+    assert len(result) == 384
+    # Delete the Ollama model so that it will not be cached afterward
+    ollama.delete(embedding_model.model_name)

aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_sentencetransformer.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""
+Test cases for utils/embeddings/sentence_transformer.py
+"""
+import numpy as np
+import pytest
+from ..utils.embeddings.sentence_transformer import EmbeddingWithSentenceTransformer
+@pytest.fixture(name="embedding_model")
+def embedding_model_fixture():
+    """
+    Fixture for creating an instance of EmbeddingWithSentenceTransformer.
+    """
+    model_name = "sentence-transformers/all-MiniLM-L6-v1"  # Small model for testing
+    embedding_model = EmbeddingWithSentenceTransformer(model_name=model_name)
+    # Move underlying model to CPU for testing
+    embedding_model.model.to("cpu")
+    return embedding_model
+def test_embed_documents(embedding_model):
+    """
+    Test the embed_documents method of EmbeddingWithSentenceTransformer class.
+    """
+    # Perform embedding
+    texts = ["This is a test sentence.", "Another test sentence."]
+    embeddings = embedding_model.embed_documents(texts)
+    # Check the result
+    assert len(embeddings) == len(texts)
+    assert len(embeddings[0]) > 0
+    assert len(embeddings[0]) == 384
+    assert embeddings.dtype == np.float32
+def test_embed_query(embedding_model):
+    """
+    Test the embed_query method of EmbeddingWithSentenceTransformer class.
+    """
+    # Perform embedding
+    text = "This is a test query."
+    embedding = embedding_model.embed_query(text)
+    # Check the result
+    assert len(embedding) > 0
+    assert len(embedding) == 384
+    assert embedding.dtype == np.float32

aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py ADDED Viewed

@@ -0,0 +1,40 @@
+"""
+Test cases for utils/enrichments/enrichments.py
+"""
+from ..utils.enrichments.enrichments import Enrichments
+class TestEnrichments(Enrichments):
+    """Test implementation of the Enrichments interface for testing purposes."""
+    def enrich_documents(self, texts: list[str]) -> list[list[float]]:
+        return [f"Additional text description of {text} as the input." for text in texts]
+    def enrich_documents_with_rag(self, texts, docs):
+        # Currently we don't have a RAG model to test this method.
+        # Thus, we will just call the enrich_documents method instead.
+        return self.enrich_documents(texts)
+def test_enrich_documents():
+    """Test enriching documents using the Enrichments interface."""
+    enrichments = TestEnrichments()
+    texts = ["text1", "text2"]
+    result = enrichments.enrich_documents(texts)
+    assert result == [
+        "Additional text description of text1 as the input.",
+        "Additional text description of text2 as the input.",
+    ]
+def test_enrich_documents_with_rag():
+    """Test enriching documents with RAG using the Enrichments interface."""
+    enrichments = TestEnrichments()
+    texts = ["text1", "text2"]
+    docs = ["doc1", "doc2"]
+    result = enrichments.enrich_documents_with_rag(texts, docs)
+    assert result == [
+        "Additional text description of text1 as the input.",
+        "Additional text description of text2 as the input.",
+    ]

aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py ADDED Viewed

@@ -0,0 +1,94 @@
+"""
+Test cases for utils/enrichments/ollama.py
+"""
+import ollama
+import pytest
+from ..utils.enrichments.ollama import EnrichmentWithOllama
+@pytest.fixture(name="ollama_config")
+def fixture_ollama_config():
+    """Return a dictionary with Ollama configuration."""
+    return {
+        "model_name": "llama3.2:1b",
+        "prompt_enrichment": """
+            Given the input as a list of strings, please return the list of addditional information
+            of each input terms using your prior knowledge.
+            Example:
+            Input: ['acetaminophen', 'aspirin']
+            Ouput: ['acetaminophen is a medication used to treat pain and fever',
+            'aspirin is a medication used to treat pain, fever, and inflammation']
+            Do not include any pretext as the output, only the list of strings enriched.
+            Input: {input}
+        """,
+        "temperature": 0.0,
+        "streaming": False,
+    }
+def test_no_model_ollama(ollama_config):
+    """Test the case when the Ollama model is not available."""
+    cfg = ollama_config
+    cfg_model = "smollm2:135m"  # Choose a small model
+    # Delete the Ollama model
+    try:
+        ollama.delete(cfg_model)
+    except ollama.ResponseError:
+        pass
+    # Check if the model is available
+    with pytest.raises(
+        ValueError,
+        match=f"Error: Pulled {cfg_model} model and restarted Ollama server.",
+    ):
+        EnrichmentWithOllama(
+            model_name=cfg_model,
+            prompt_enrichment=cfg["prompt_enrichment"],
+            temperature=cfg["temperature"],
+            streaming=cfg["streaming"],
+        )
+    ollama.delete(cfg_model)
+def test_enrich_ollama(ollama_config):
+    """Test the Ollama textual enrichment class for node enrichment."""
+    # Prepare enrichment model
+    cfg = ollama_config
+    enr_model = EnrichmentWithOllama(
+        model_name=cfg["model_name"],
+        prompt_enrichment=cfg["prompt_enrichment"],
+        temperature=cfg["temperature"],
+        streaming=cfg["streaming"],
+    )
+    # Perform enrichment for nodes
+    nodes = ["acetaminophen"]
+    enriched_nodes = enr_model.enrich_documents(nodes)
+    # Check the enriched nodes
+    assert len(enriched_nodes) == 1
+    assert all(enriched_nodes[i] != nodes[i] for i in range(len(nodes)))
+def test_enrich_ollama_rag(ollama_config):
+    """Test the Ollama textual enrichment class for enrichment with RAG (not implemented)."""
+    # Prepare enrichment model
+    cfg = ollama_config
+    enr_model = EnrichmentWithOllama(
+        model_name=cfg["model_name"],
+        prompt_enrichment=cfg["prompt_enrichment"],
+        temperature=cfg["temperature"],
+        streaming=cfg["streaming"],
+    )
+    # Perform enrichment for nodes
+    nodes = ["acetaminophen"]
+    docs = [r"\path\to\doc1", r"\path\to\doc2"]
+    enriched_nodes = enr_model.enrich_documents_with_rag(nodes, docs)
+    # Check the enriched nodes
+    assert len(enriched_nodes) == 1
+    assert all(enriched_nodes[i] != nodes[i] for i in range(len(nodes)))

aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py ADDED Viewed

@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+"""
+Test cases for utils/enrichments/ols_terms.py
+"""
+import pytest
+from ..utils.enrichments.ols_terms import EnrichmentWithOLS
+# In this test, we will consider 5 examples:
+# 1. CL_0000899: T-helper 17 cell (Cell Ontology)
+# 2. GO_0046427: positive regulation of receptor signaling pathway via JAK-STAT (GO)
+# 3. UBERON_0000004: nose (Uberon)
+# 4. HP_0009739: Hypoplasia of the antihelix (Human Phenotype Ontology)
+# 5. MONDO_0005011: Crohn disease (MONDO)
+# 6. XYZ_0000000: Non-existing term (for testing error handling)
+# The expected description for each term starts with:
+CL_DESC = "CD4-positive, alpha-beta T cell"
+GO_DESC = "Any process that activates or increases the frequency, rate or extent"
+UBERON_DESC = "The olfactory organ of vertebrates, consisting of nares"
+HP_DESC = "Developmental hypoplasia of the antihelix"
+MONDO_DESC = "A gastrointestinal disorder characterized by chronic inflammation"
+# The expected description for the non-existing term is None
+@pytest.fixture(name="enrich_obj")
+def fixture_uniprot_config():
+    """Return a dictionary with the configuration for OLS enrichment."""
+    return EnrichmentWithOLS()
+def test_enrich_documents(enrich_obj):
+    """Test the enrich_documents method."""
+    ols_terms = [
+        "CL_0000899",
+        "GO_0046427",
+        "UBERON_0000004",
+        "HP_0009739",
+        "MONDO_0005011",
+        "XYZ_0000000",
+    ]
+    descriptions = enrich_obj.enrich_documents(ols_terms)
+    assert CL_DESC in descriptions[0]
+    assert GO_DESC in descriptions[1]
+    assert UBERON_DESC in descriptions[2]
+    assert HP_DESC in descriptions[3]
+    assert MONDO_DESC in descriptions[4]
+    assert descriptions[5] == ""
+def test_enrich_documents_with_rag(enrich_obj):
+    """Test the enrich_documents_with_rag method."""
+    ols_terms = [
+        "CL_0000899",
+        "GO_0046427",
+        "UBERON_0000004",
+        "HP_0009739",
+        "MONDO_0005011",
+        "XYZ_0000000",
+    ]
+    descriptions = enrich_obj.enrich_documents_with_rag(ols_terms, None)
+    assert CL_DESC in descriptions[0]
+    assert GO_DESC in descriptions[1]
+    assert UBERON_DESC in descriptions[2]
+    assert HP_DESC in descriptions[3]
+    assert MONDO_DESC in descriptions[4]
+    assert descriptions[5] == ""

aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py ADDED Viewed

@@ -0,0 +1,45 @@
+#!/usr/bin/env python3
+"""
+Test cases for utils/enrichments/pubchem_strings.py
+"""
+import pytest
+from ..utils.enrichments.pubchem_strings import EnrichmentWithPubChem
+# In this test, we will consider 2 examples:
+# 1. PubChem ID: 5311000 (Alclometasone)
+# 2. PubChem ID: 1X (Fake ID)
+# The expected SMILES representation for the first PubChem ID is:
+SMILES_FIRST = "C[C@@H]1C[C@H]2[C@@H]3[C@@H](CC4=CC(=O)C=C[C@@]"
+SMILES_FIRST += "4([C@H]3[C@H](C[C@@]2([C@]1(C(=O)CO)O)C)O)C)Cl"
+# The expected description for the first PubChem ID starts with:
+DESCRIPTION_FIRST = "Alclometasone is a prednisolone compound having an alpha-chloro substituent"
+# The expected SMILES representation and description for the second PubChem ID is None.
+@pytest.fixture(name="enrich_obj")
+def fixture_pubchem_config():
+    """Return a dictionary with the configuration for the PubChem enrichment."""
+    return EnrichmentWithPubChem()
+def test_enrich_documents(enrich_obj):
+    """Test the enrich_documents method."""
+    pubchem_ids = ["5311000", "1X"]
+    enriched_descriptions, enriched_strings = enrich_obj.enrich_documents(pubchem_ids)
+    assert enriched_strings == [SMILES_FIRST, None]
+    assert enriched_descriptions[0].startswith(DESCRIPTION_FIRST)
+    assert enriched_descriptions[1] is None
+def test_enrich_documents_with_rag(enrich_obj):
+    """Test the enrich_documents_with_rag method."""
+    pubchem_ids = ["5311000", "1X"]
+    enriched_descriptions, enriched_strings = enrich_obj.enrich_documents_with_rag(
+        pubchem_ids, None
+    )
+    assert enriched_strings == [SMILES_FIRST, None]
+    assert enriched_descriptions[0].startswith(DESCRIPTION_FIRST)
+    assert enriched_descriptions[1] is None

aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py ADDED Viewed

@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+"""
+Test cases for utils/enrichments/reactome_pathways.py
+"""
+import pytest
+from ..utils.enrichments.reactome_pathways import EnrichmentWithReactome
+# In this test, we will consider 2 examples:
+# 1. R-HSA-3244647: cGAS binds cytosolic DNA
+# 2. R-HSA-9905952: ATP binds P2RX7 in P2RX7 trimer:PANX1 heptamer
+# 3. R-HSA-1234567: Fake pathway
+# The expected description of pathway R-HSA-3244647 startswith:
+FIRST_PATHWAY = "Cyclic GMP-AMP (cGAMP) synthase (cGAS) was identified as a cytosolic DNA"
+# The expected description of pathway R-HSA-9905952 startswith:
+SECOND_PATHWAY = "The P2RX7 (P2X7, P2Z) trimer binds ATP,"
+# The expected description of pathway R-HSA-1234567 is None.
+@pytest.fixture(name="enrich_obj")
+def fixture_uniprot_config():
+    """Return a dictionary with the configuration for Reactome enrichment."""
+    return EnrichmentWithReactome()
+def test_enrich_documents(enrich_obj):
+    """Test the enrich_documents method."""
+    reactome_pathways = ["R-HSA-3244647", "R-HSA-9905952", "R-HSA-1234567"]
+    descriptions = enrich_obj.enrich_documents(reactome_pathways)
+    assert descriptions[0].startswith(FIRST_PATHWAY)
+    assert descriptions[1].startswith(SECOND_PATHWAY)
+    assert descriptions[2] is None
+def test_enrich_documents_with_rag(enrich_obj):
+    """Test the enrich_documents_with_rag method."""
+    reactome_pathways = ["R-HSA-3244647", "R-HSA-9905952", "R-HSA-1234567"]
+    descriptions = enrich_obj.enrich_documents_with_rag(reactome_pathways, None)
+    assert descriptions[0].startswith(FIRST_PATHWAY)
+    assert descriptions[1].startswith(SECOND_PATHWAY)
+    assert descriptions[2] is None

aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py ADDED Viewed

@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+"""
+Test cases for utils/enrichments/uniprot_proteins.py
+"""
+import pytest
+from ..utils.enrichments.uniprot_proteins import EnrichmentWithUniProt
+# In this test, we will consider 2 examples:
+# 1. Gene Name: TP53
+# 2. Gene Name: TP5 (Incomplete; must return empty results)
+# 2. Gene Name: XZ (Shorter than 3 characters; must return empty results)
+# The expected description of TP53 startswith:
+START_DESCP = "Multifunctional transcription factor"
+# The expected amino acid sequence of TP53 startswith:
+START_SEQ = "MEEPQSDPSV"
+@pytest.fixture(name="enrich_obj")
+def fixture_uniprot_config():
+    """Return a dictionary with the configuration for UniProt enrichment."""
+    return EnrichmentWithUniProt()
+def test_enrich_documents(enrich_obj):
+    """Test the enrich_documents method."""
+    gene_names = ["TP53", "TP5", "XZ"]
+    descriptions, sequences = enrich_obj.enrich_documents(gene_names)
+    assert descriptions[0].startswith(START_DESCP)
+    assert sequences[0].startswith(START_SEQ)
+    assert descriptions[1] is None
+    assert sequences[1] is None
+    assert descriptions[2] is None
+    assert sequences[2] is None
+def test_enrich_documents_with_rag(enrich_obj):
+    """Test the enrich_documents_with_rag method."""
+    gene_names = ["TP53", "TP5", "XZ"]
+    descriptions, sequences = enrich_obj.enrich_documents_with_rag(gene_names, None)
+    assert descriptions[0].startswith(START_DESCP)
+    assert sequences[0].startswith(START_SEQ)
+    assert descriptions[1] is None
+    assert sequences[1] is None
+    assert descriptions[2] is None
+    assert sequences[2] is None