aiagents4pharma 1.21.0__py3-none-any.whl → 1.22.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,3 +5,4 @@ Import all the modules in the package
5
5
  from . import agents
6
6
  from . import tools
7
7
  from . import app
8
+ from . import utils
@@ -4,4 +4,5 @@ defaults:
4
4
  - tools/subgraph_extraction: default
5
5
  - tools/subgraph_summarization: default
6
6
  - tools/graphrag_reasoning: default
7
+ - utils/pubchem_utils: default
7
8
  - app/frontend: default
@@ -0,0 +1,64 @@
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ Test cases for utils/embeddings/nim_molmim.py
5
+ """
6
+
7
+ import unittest
8
+ from unittest.mock import patch, MagicMock
9
+ from ..utils.embeddings.nim_molmim import EmbeddingWithMOLMIM
10
+
11
+ class TestEmbeddingWithMOLMIM(unittest.TestCase):
12
+ """
13
+ Test cases for EmbeddingWithMOLMIM class.
14
+ """
15
+ def setUp(self):
16
+ self.base_url = "https://fake-nim-api.com/embeddings"
17
+ self.embeddings_model = EmbeddingWithMOLMIM(self.base_url)
18
+ self.test_texts = ["CCO", "CCC", "C=O"]
19
+ self.test_query = "CCO"
20
+ self.mock_response = {
21
+ "embeddings": [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]
22
+ }
23
+
24
+ @patch("requests.post")
25
+ def test_embed_documents(self, mock_post):
26
+ '''
27
+ Test the embed_documents method.
28
+ '''
29
+ # Mock the response from requests.post
30
+ mock_post.return_value = MagicMock()
31
+ mock_post.return_value.json.return_value = self.mock_response
32
+ embeddings = self.embeddings_model.embed_documents(self.test_texts)
33
+ # Assertions
34
+ self.assertEqual(embeddings, self.mock_response["embeddings"])
35
+ mock_post.assert_called_once_with(
36
+ self.base_url,
37
+ headers={
38
+ 'accept': 'application/json',
39
+ 'Content-Type': 'application/json'
40
+ },
41
+ data='{"sequences": ["CCO", "CCC", "C=O"]}',
42
+ timeout=60
43
+ )
44
+
45
+ @patch("requests.post")
46
+ def test_embed_query(self, mock_post):
47
+ '''
48
+ Test the embed_query method.
49
+ '''
50
+ # Mock the response from requests.post
51
+ mock_post.return_value = MagicMock()
52
+ mock_post.return_value.json.return_value = {"embeddings": [[0.1, 0.2, 0.3]]}
53
+ embedding = self.embeddings_model.embed_query(self.test_query)
54
+ # Assertions
55
+ self.assertEqual(embedding, [[0.1, 0.2, 0.3]])
56
+ mock_post.assert_called_once_with(
57
+ self.base_url,
58
+ headers={
59
+ 'accept': 'application/json',
60
+ 'Content-Type': 'application/json'
61
+ },
62
+ data='{"sequences": ["CCO"]}',
63
+ timeout=60
64
+ )
@@ -0,0 +1,33 @@
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ Test cases for utils/enrichments/pubchem_strings.py
5
+ """
6
+
7
+ import pytest
8
+ from ..utils.enrichments.pubchem_strings import EnrichmentWithPubChem
9
+
10
+ # In this test, we will consider 2 examples:
11
+ # 1. PubChem ID: 5311000 (Alclometasone)
12
+ # 2. PubChem ID: 1X (Fake ID)
13
+ # The expected SMILES representation for the first PubChem ID is:
14
+ SMILES_FIRST = 'C[C@@H]1C[C@H]2[C@@H]3[C@@H](CC4=CC(=O)C=C[C@@]'
15
+ SMILES_FIRST += '4([C@H]3[C@H](C[C@@]2([C@]1(C(=O)CO)O)C)O)C)Cl'
16
+ # The expected SMILES representation for the second PubChem ID is None.
17
+
18
+ @pytest.fixture(name="enrich_obj")
19
+ def fixture_pubchem_config():
20
+ """Return a dictionary with the configuration for the PubChem enrichment."""
21
+ return EnrichmentWithPubChem()
22
+
23
+ def test_enrich_documents(enrich_obj):
24
+ """Test the enrich_documents method."""
25
+ pubchem_ids = ["5311000", "1X"]
26
+ enriched_strings = enrich_obj.enrich_documents(pubchem_ids)
27
+ assert enriched_strings == [SMILES_FIRST, None]
28
+
29
+ def test_enrich_documents_with_rag(enrich_obj):
30
+ """Test the enrich_documents_with_rag method."""
31
+ pubchem_ids = ["5311000", "1X"]
32
+ enriched_strings = enrich_obj.enrich_documents_with_rag(pubchem_ids, None)
33
+ assert enriched_strings == [SMILES_FIRST, None]
@@ -0,0 +1,16 @@
1
+ """
2
+ Test cases for utils/pubchem_utils.py
3
+ """
4
+
5
+ from ..utils import pubchem_utils
6
+
7
+ def test_drugbank_id2pubchem_cid():
8
+ """
9
+ Test the drugbank_id2pubchem_cid method.
10
+
11
+ The DrugBank ID for Alclometasone is DB00240.
12
+ The PubChem CID for Alclometasone is 5311000.
13
+ """
14
+ drugbank_id = "DB00240"
15
+ pubchem_cid = pubchem_utils.drugbank_id2pubchem_cid(drugbank_id)
16
+ assert pubchem_cid == 5311000
@@ -5,3 +5,4 @@ from . import embeddings
5
5
  from . import enrichments
6
6
  from . import extractions
7
7
  from . import kg_utils
8
+ from . import pubchem_utils
@@ -5,3 +5,4 @@ from . import embeddings
5
5
  from . import sentence_transformer
6
6
  from . import huggingface
7
7
  from . import ollama
8
+ from . import nim_molmim
@@ -0,0 +1,54 @@
1
+ """
2
+ Embedding class using MOLMIM model from NVIDIA NIM.
3
+ """
4
+
5
+ import json
6
+ from typing import List
7
+ import requests
8
+ from .embeddings import Embeddings
9
+
10
+ class EmbeddingWithMOLMIM(Embeddings):
11
+ """
12
+ Embedding class using MOLMIM model from NVIDIA NIM
13
+ """
14
+ def __init__(self, base_url: str):
15
+ """
16
+ Initialize the EmbeddingWithMOLMIM class.
17
+
18
+ Args:
19
+ base_url: The base URL for the NIM/MOLMIM model.
20
+ """
21
+ # Set base URL
22
+ self.base_url = base_url
23
+
24
+ def embed_documents(self, texts: List[str]) -> List[float]:
25
+ """
26
+ Generate embedding for a list of SMILES strings using MOLMIM model.
27
+
28
+ Args:
29
+ texts: The list of SMILES strings to be embedded.
30
+
31
+ Returns:
32
+ The list of embeddings for the given SMILES strings.
33
+ """
34
+ headers = {
35
+ 'accept': 'application/json',
36
+ 'Content-Type': 'application/json'
37
+ }
38
+ data = json.dumps({"sequences": texts})
39
+ response = requests.post(self.base_url, headers=headers, data=data, timeout=60)
40
+ embeddings = response.json()["embeddings"]
41
+ return embeddings
42
+
43
+ def embed_query(self, text: str) -> List[float]:
44
+ """
45
+ Generate embeddings for an input query using MOLMIM model.
46
+
47
+ Args:
48
+ text: A query to be embedded.
49
+ Returns:
50
+ The embeddings for the given query.
51
+ """
52
+ # Generate the embedding
53
+ embeddings = self.embed_documents([text])
54
+ return embeddings
@@ -3,3 +3,4 @@ This package contains modules to use the enrichment model
3
3
  """
4
4
  from . import enrichments
5
5
  from . import ollama
6
+ from . import pubchem_strings
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ Enrichment class for enriching PubChem IDs with their STRINGS representation.
5
+ """
6
+
7
+ from typing import List
8
+ import pubchempy as pcp
9
+ from .enrichments import Enrichments
10
+
11
+ class EnrichmentWithPubChem(Enrichments):
12
+ """
13
+ Enrichment class using PubChem
14
+ """
15
+ def enrich_documents(self, texts: List[str]) -> List[str]:
16
+ """
17
+ Enrich a list of input PubChem IDs with their STRINGS representation.
18
+
19
+ Args:
20
+ texts: The list of pubchem IDs to be enriched.
21
+
22
+ Returns:
23
+ The list of enriched STRINGS
24
+ """
25
+
26
+ enriched_pubchem_ids = []
27
+ pubchem_cids = texts
28
+ for pubchem_cid in pubchem_cids:
29
+ try:
30
+ c = pcp.Compound.from_cid(pubchem_cid)
31
+ except pcp.BadRequestError:
32
+ enriched_pubchem_ids.append(None)
33
+ continue
34
+ enriched_pubchem_ids.append(c.isomeric_smiles)
35
+
36
+ return enriched_pubchem_ids
37
+
38
+ def enrich_documents_with_rag(self, texts, docs):
39
+ """
40
+ Enrich a list of input PubChem IDs with their STRINGS representation.
41
+
42
+ Args:
43
+ texts: The list of pubchem IDs to be enriched.
44
+ docs: None
45
+
46
+ Returns:
47
+ The list of enriched STRINGS
48
+ """
49
+ return self.enrich_documents(texts)
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ Enrichment class for enriching PubChem IDs with their STRINGS representation.
5
+ """
6
+
7
+ import logging
8
+ import requests
9
+ import hydra
10
+
11
+ # Initialize logger
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
+
15
+ def drugbank_id2pubchem_cid(drugbank_id):
16
+ """
17
+ Convert DrugBank ID to PubChem CID.
18
+
19
+ Args:
20
+ drugbank_id: The DrugBank ID of the drug.
21
+
22
+ Returns:
23
+ The PubChem CID of the drug.
24
+ """
25
+ logger.log(logging.INFO, "Load Hydra configuration for PubChem ID conversion.")
26
+ with hydra.initialize(version_base=None, config_path="../configs"):
27
+ cfg = hydra.compose(config_name='config',
28
+ overrides=['utils/pubchem_utils=default'])
29
+ cfg = cfg.utils.pubchem_utils
30
+ # Prepare the URL
31
+ pubchem_url_for_drug = cfg.drugbank_id_to_pubchem_cid_url + drugbank_id + '/JSON'
32
+ # Get the data
33
+ response = requests.get(pubchem_url_for_drug, timeout=60)
34
+ data = response.json()
35
+ # Extract the PubChem CID
36
+ cid = None
37
+ for substance in data.get("PC_Substances", []):
38
+ for compound in substance.get("compound", []):
39
+ if "id" in compound and "type" in compound["id"] and compound["id"]["type"] == 1:
40
+ cid = compound["id"].get("id", {}).get("cid")
41
+ break
42
+ return cid
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: aiagents4pharma
3
- Version: 1.21.0
3
+ Version: 1.22.0
4
4
  Summary: AI Agents for drug discovery, drug development, and other pharmaceutical R&D.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -30,6 +30,7 @@ Requires-Dist: ollama==0.4.6
30
30
  Requires-Dist: pandas==2.2.3
31
31
  Requires-Dist: pcst_fast==1.0.10
32
32
  Requires-Dist: plotly==5.24.1
33
+ Requires-Dist: pubchempy==1.0.4
33
34
  Requires-Dist: pydantic==2.9.2
34
35
  Requires-Dist: pylint==3.3.1
35
36
  Requires-Dist: pypdf==5.2.0
@@ -59,8 +59,8 @@ aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py,sha256=MLe-twtFnOu-
59
59
  aiagents4pharma/talk2knowledgegraphs/__init__.py,sha256=Z0Eo7LTiKk0STsr8VI7wkCLq7PHrK1vYlH4I1hSNLiA,165
60
60
  aiagents4pharma/talk2knowledgegraphs/agents/__init__.py,sha256=iOAzuy_8A03tQDFtSBhC9dldUo62z5gfxcVtXAdLOJs,92
61
61
  aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py,sha256=j6MA1LB28mqpb6ZEmNLGcvDZvOnlGbJB9r7VXyEGask,3079
62
- aiagents4pharma/talk2knowledgegraphs/configs/__init__.py,sha256=Y49ucO22v9oe9EwFiXN6MU2wvyB3_ZBpmHwHbeh-ZVQ,106
63
- aiagents4pharma/talk2knowledgegraphs/configs/config.yaml,sha256=rwUIZ2t5j5hlFyre7VnV8zMsP0qpPTwvAFExgvQD6q0,196
62
+ aiagents4pharma/talk2knowledgegraphs/configs/__init__.py,sha256=4_DVdpahaJ55yPl0aZotlFA_MYWLFF2cubWyKtBVI_Q,126
63
+ aiagents4pharma/talk2knowledgegraphs/configs/config.yaml,sha256=bag4w3JCSqaojG37MTksy3ZehAPe3qoVzjIN2uh3nrc,229
64
64
  aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py,sha256=-fAORvyFmG2iSvFOFDixmt9OTQRR58y89uhhu2EgbA8,46
65
65
  aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml,sha256=ENCGROwYFpR6g4QD518h73sshdn3vPVpotBMk1QJcpU,4830
66
66
  aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py,sha256=fKfc3FR7g5KjY9b6jzrU6cwKTVVpkoVZQS3dvUowu34,69
@@ -91,26 +91,32 @@ aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py,sha
91
91
  aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py,sha256=oBqfspXXOxH04OQuPb8BCW0liIQTGKXtaPNSrPpQtFc,7597
92
92
  aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py,sha256=uYFoE_6zeU10_1mLLAHUr5c4S2XZMSc0Q_860o-KWEw,1517
93
93
  aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py,sha256=hzX84pheZdEsTtikF2KtBFiH44_xPjYXxLA6p4Ax1CY,1623
94
+ aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py,sha256=LwtTZ-M7lHGxvRrGBXbyIT8AkA3T2OpeKqtNq3RK7Ik,2164
94
95
  aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py,sha256=jn-TrPwF0aR9kVoerwkbMZa3U6Hc6HjV6Zoau4qSH4g,1834
95
96
  aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_sentencetransformer.py,sha256=Qxo6WeIDRy8aLh1tNKw0kSlzmUj3MtTak63oW2YwB24,1327
96
97
  aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py,sha256=N6HRr4lWHXY7bTHe2uXJe4D_EG9WqZPibZne6qLl9_k,1447
97
98
  aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py,sha256=JhY7axvVULLywDJ2ctA-gob5YPeaJYWsaMNjHT6L9CU,3021
99
+ aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py,sha256=bk27KElJxOvKJ2RTz4ftleExQPMyWWS755KKmlImzbk,1241
98
100
  aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py,sha256=pal76wi7WgQWUNk56BrzfFV8jKpbDaHHdbwtgx_gXLI,2410
101
+ aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py,sha256=C07YqUNYW7ofpKAzKh0lBovXKLvaiXFb3oJU6k1dvu4,411
99
102
  aiagents4pharma/talk2knowledgegraphs/tools/__init__.py,sha256=zpD4h7EYtyq0QNOqLd6bkxrPlPb2XN64ceI9ncgESrA,171
100
103
  aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py,sha256=OEuOFncDRdb7TQEGq4rkT5On-jI-R7Nt8K5EBzaND8w,5338
101
104
  aiagents4pharma/talk2knowledgegraphs/tools/load_arguments.py,sha256=zhmsRp-8vjB5rRekqTA07d3yb-42HWqng9dDMkvK6hM,623
102
105
  aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py,sha256=te06QMFQfgJWrjaGrqpcOYeaV38jwm0KY_rXVSMHkeI,11468
103
106
  aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py,sha256=mDSBOxopDfNhEJeU8fVI8b5lXTYrRzcc97aLbFgYSy4,4413
104
- aiagents4pharma/talk2knowledgegraphs/utils/__init__.py,sha256=Q9mzcSmkmhdnOn13fxGh1fNECYoUR5Y5CCuEJTIxwAI,167
107
+ aiagents4pharma/talk2knowledgegraphs/utils/__init__.py,sha256=cZqb3LZLmBnmyAtWFv2Z-4uJvQmx0M4zKsfiWrlM3Pk,195
105
108
  aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py,sha256=6vQnPkeOWae_8jePjhma3sJuMTngy0I0tqzdFt6OqKg,2507
106
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py,sha256=4TGK0XIVkkfGOyrSVwFQ-Lp-rzH9CCl-fWcqkFJKRLc,174
109
+ aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py,sha256=IlrdGbRGD0IM7eMcpkOjuRjKNuH3lz_X8zN6RHwk61c,1340
110
+ aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py,sha256=POSDrSdFAWsBCueOPD-Fok-ARdTywJU1ivwpT9EU1Kw,199
107
111
  aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py,sha256=1nGznrAj-xT0xuSMBGz2dOujJ7M_IwSR84njxtxsy9A,2523
108
112
  aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py,sha256=2vi_elf6EgzfagFAO5QnL3a_aXZyN7B1EBziu44MTfM,3806
113
+ aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py,sha256=XH6JNfmMS38UEU7UGJeeabHfRykharnQpQaqjO86OlQ,1537
109
114
  aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py,sha256=8w0sjt3Ex5YJ_XvpKl9UbhdTiiaoMIarbPUxLBU-1Uw,2378
110
115
  aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py,sha256=36iKlisOpMtGR5xfTAlSHXWvPqVC_Jbezod8kbBBMVg,2136
111
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py,sha256=tW426knki2DBIHcWyF_K04iMMdbpIn_e_TpPmTgz2dI,113
116
+ aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py,sha256=JKGavA-umsGX3ng17_UYAvDBdbg-W-mPn8Q6JfP7J9U,143
112
117
  aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py,sha256=Bx8x6zzk5614ApWB90N_iv4_Y_Uq0-KwUeBwYSdQMU4,924
113
118
  aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py,sha256=8eoxR-VHo0G7ReQIwje7xEhE-SJlHdef7_wJRpnvFIc,4116
119
+ aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py,sha256=qsVlDCGGDkUCv-R5_xFGhrtLS7P0CfagnM2qATwiOFM,1333
114
120
  aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py,sha256=7gwwtfzKhB8GuOBD47XRi0NprwEXkOzwNl5eeu-hDTI,86
115
121
  aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py,sha256=m5p0yoJb7I19ua5yeQfXPf7c4r6S1XPwttsrM7Qoy94,9336
116
122
  aiagents4pharma/talk2scholars/__init__.py,sha256=gphERyVKZHvOnMQsml7TIHlaIshHJ75R1J3FKExkfuY,120
@@ -152,8 +158,8 @@ aiagents4pharma/talk2scholars/tools/s2/query_results.py,sha256=EUfzRh5Qc_tMl5fDI
152
158
  aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py,sha256=Lg1L4HQCN2LaQEyWtLD73O67PMoXkPHi-Y8rCzHS0A4,2499
153
159
  aiagents4pharma/talk2scholars/tools/s2/search.py,sha256=mnBQWDuQ50UVw6B-bRuL8Ek1av-pEtdgzVMxpEA2BpI,4296
154
160
  aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py,sha256=xgnUj9W9JkeTvB2VJBJUAnia789GGNGqdqgJ_G16v2s,5120
155
- aiagents4pharma-1.21.0.dist-info/LICENSE,sha256=IcIbyB1Hyk5ZDah03VNQvJkbNk2hkBCDqQ8qtnCvB4Q,1077
156
- aiagents4pharma-1.21.0.dist-info/METADATA,sha256=YsjDHw3yfqfPClv0N3j35AObxHBhStDojFUUslyd_1Q,7757
157
- aiagents4pharma-1.21.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
158
- aiagents4pharma-1.21.0.dist-info/top_level.txt,sha256=-AH8rMmrSnJtq7HaAObS78UU-cTCwvX660dSxeM7a0A,16
159
- aiagents4pharma-1.21.0.dist-info/RECORD,,
161
+ aiagents4pharma-1.22.0.dist-info/LICENSE,sha256=IcIbyB1Hyk5ZDah03VNQvJkbNk2hkBCDqQ8qtnCvB4Q,1077
162
+ aiagents4pharma-1.22.0.dist-info/METADATA,sha256=qMCGdj4nouCcxKZOt2SwH53fDMIgPkoUTirEIvk2Mfs,7789
163
+ aiagents4pharma-1.22.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
164
+ aiagents4pharma-1.22.0.dist-info/top_level.txt,sha256=-AH8rMmrSnJtq7HaAObS78UU-cTCwvX660dSxeM7a0A,16
165
+ aiagents4pharma-1.22.0.dist-info/RECORD,,