aiagents4pharma 1.39.3__py3-none-any.whl → 1.39.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +3 -1
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +10 -10
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +11 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +15 -7
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +31 -9
- aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +33 -2
- {aiagents4pharma-1.39.3.dist-info → aiagents4pharma-1.39.5.dist-info}/METADATA +1 -1
- {aiagents4pharma-1.39.3.dist-info → aiagents4pharma-1.39.5.dist-info}/RECORD +11 -11
- {aiagents4pharma-1.39.3.dist-info → aiagents4pharma-1.39.5.dist-info}/WHEEL +0 -0
- {aiagents4pharma-1.39.3.dist-info → aiagents4pharma-1.39.5.dist-info}/licenses/LICENSE +0 -0
- {aiagents4pharma-1.39.3.dist-info → aiagents4pharma-1.39.5.dist-info}/top_level.txt +0 -0
@@ -3,4 +3,6 @@ prompt: >
|
|
3
3
|
Given the user question, extract the relevant species names.
|
4
4
|
If the user aks for a specific species, extract that species.
|
5
5
|
If none of the species match the user question, return None.
|
6
|
-
If the user asks for all species, return all species.
|
6
|
+
If the user asks for all species, return all species.
|
7
|
+
If the user does not specify any species or simpley asks for
|
8
|
+
model annotations, return all species.
|
@@ -42,11 +42,11 @@ def test_enrich_documents(enrich_obj):
|
|
42
42
|
"XYZ_0000000",
|
43
43
|
]
|
44
44
|
descriptions = enrich_obj.enrich_documents(ols_terms)
|
45
|
-
assert descriptions[0]
|
46
|
-
assert descriptions[1]
|
47
|
-
assert descriptions[2]
|
48
|
-
assert descriptions[3]
|
49
|
-
assert descriptions[4]
|
45
|
+
assert CL_DESC in descriptions[0]
|
46
|
+
assert GO_DESC in descriptions[1]
|
47
|
+
assert UBERON_DESC in descriptions[2]
|
48
|
+
assert HP_DESC in descriptions[3]
|
49
|
+
assert MONDO_DESC in descriptions[4]
|
50
50
|
assert descriptions[5] is None
|
51
51
|
|
52
52
|
|
@@ -61,9 +61,9 @@ def test_enrich_documents_with_rag(enrich_obj):
|
|
61
61
|
"XYZ_0000000",
|
62
62
|
]
|
63
63
|
descriptions = enrich_obj.enrich_documents_with_rag(ols_terms, None)
|
64
|
-
assert descriptions[0]
|
65
|
-
assert descriptions[1]
|
66
|
-
assert descriptions[2]
|
67
|
-
assert descriptions[3]
|
68
|
-
assert descriptions[4]
|
64
|
+
assert CL_DESC in descriptions[0]
|
65
|
+
assert GO_DESC in descriptions[1]
|
66
|
+
assert UBERON_DESC in descriptions[2]
|
67
|
+
assert HP_DESC in descriptions[3]
|
68
|
+
assert MONDO_DESC in descriptions[4]
|
69
69
|
assert descriptions[5] is None
|
@@ -4,6 +4,17 @@ Test cases for utils/pubchem_utils.py
|
|
4
4
|
|
5
5
|
from ..utils import pubchem_utils
|
6
6
|
|
7
|
+
def test_cas_rn2pubchem_cid():
|
8
|
+
"""
|
9
|
+
Test the casRN2pubchem_cid function.
|
10
|
+
|
11
|
+
The CAS RN for ethyl cabonate is 105-58-8.
|
12
|
+
The PubChem CID for ethyl cabonate is 7766.
|
13
|
+
"""
|
14
|
+
casrn = "105-58-8"
|
15
|
+
pubchem_cid = pubchem_utils.cas_rn2pubchem_cid(casrn)
|
16
|
+
assert pubchem_cid == 7766
|
17
|
+
|
7
18
|
def test_external_id2pubchem_cid():
|
8
19
|
"""
|
9
20
|
Test the external_id2pubchem_cid function.
|
@@ -54,13 +54,21 @@ class EnrichmentWithOLS(Enrichments):
|
|
54
54
|
descriptions.append(None)
|
55
55
|
continue
|
56
56
|
# Add the description to the list
|
57
|
-
description =
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
57
|
+
description = []
|
58
|
+
for term in response_body['_embedded']['terms']:
|
59
|
+
# If the term has a description, add it to the list
|
60
|
+
description += term.get('description', [])
|
61
|
+
# Add synonyms to the description
|
62
|
+
description += term.get('synonyms', [])
|
63
|
+
# Add the label to the description
|
64
|
+
# Label is not provided as list, so we need to convert it to a list
|
65
|
+
description += [term.get('label', [])]
|
66
|
+
# Make unique the description
|
67
|
+
description = list(set(description))
|
68
|
+
# Join the description with new line
|
69
|
+
description = '\n'.join(description)
|
70
|
+
# Add the description to the list
|
71
|
+
descriptions.append(description)
|
64
72
|
return descriptions
|
65
73
|
|
66
74
|
def enrich_documents_with_rag(self, texts, docs):
|
@@ -4,11 +4,17 @@
|
|
4
4
|
Enrichment class for enriching PubChem IDs with their STRINGS representation and descriptions.
|
5
5
|
"""
|
6
6
|
|
7
|
+
import logging
|
7
8
|
from typing import List
|
8
|
-
import
|
9
|
+
import requests
|
10
|
+
import hydra
|
9
11
|
from .enrichments import Enrichments
|
10
12
|
from ..pubchem_utils import pubchem_cid_description
|
11
13
|
|
14
|
+
# Initialize logger
|
15
|
+
logging.basicConfig(level=logging.INFO)
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
12
18
|
class EnrichmentWithPubChem(Enrichments):
|
13
19
|
"""
|
14
20
|
Enrichment class using PubChem
|
@@ -27,16 +33,32 @@ class EnrichmentWithPubChem(Enrichments):
|
|
27
33
|
enriched_pubchem_ids_smiles = []
|
28
34
|
enriched_pubchem_ids_descriptions = []
|
29
35
|
|
36
|
+
# Load Hydra configuration to get the base URL for PubChem
|
37
|
+
with hydra.initialize(version_base=None, config_path="../../configs"):
|
38
|
+
cfg = hydra.compose(config_name='config',
|
39
|
+
overrides=['utils/pubchem_utils=default'])
|
40
|
+
cfg = cfg.utils.pubchem_utils
|
41
|
+
# Iterate over each PubChem ID in the input list
|
30
42
|
pubchem_cids = texts
|
31
43
|
for pubchem_cid in pubchem_cids:
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
44
|
+
# Prepare the URL
|
45
|
+
pubchem_url = f"{cfg.pubchem_cid2smiles_url}/{pubchem_cid}/property/smiles/JSON"
|
46
|
+
# Get the data
|
47
|
+
response = requests.get(pubchem_url, timeout=60)
|
48
|
+
data = response.json()
|
49
|
+
# Extract the PubChem CID SMILES
|
50
|
+
smiles = ''
|
51
|
+
description = ''
|
52
|
+
if "PropertyTable" in data:
|
53
|
+
for prop in data["PropertyTable"]['Properties']:
|
54
|
+
smiles = prop.get("SMILES", '')
|
55
|
+
description = pubchem_cid_description(pubchem_cid)
|
56
|
+
else:
|
57
|
+
# If the PubChem ID is not found, set smiles and description to None
|
58
|
+
smiles = None
|
59
|
+
description = None
|
60
|
+
enriched_pubchem_ids_smiles.append(smiles)
|
61
|
+
enriched_pubchem_ids_descriptions.append(description)
|
40
62
|
|
41
63
|
return enriched_pubchem_ids_descriptions, enriched_pubchem_ids_smiles
|
42
64
|
|
@@ -12,6 +12,37 @@ import hydra
|
|
12
12
|
logging.basicConfig(level=logging.INFO)
|
13
13
|
logger = logging.getLogger(__name__)
|
14
14
|
|
15
|
+
def cas_rn2pubchem_cid(casrn):
|
16
|
+
"""
|
17
|
+
Convert CAS RN to PubChem CID.
|
18
|
+
|
19
|
+
Args:
|
20
|
+
casrn: The CAS RN of the drug.
|
21
|
+
|
22
|
+
Returns:
|
23
|
+
The PubChem CID of the drug.
|
24
|
+
"""
|
25
|
+
# Load Hydra configuration for PubChem ID conversion
|
26
|
+
with hydra.initialize(version_base=None, config_path="../configs"):
|
27
|
+
cfg = hydra.compose(config_name='config',
|
28
|
+
overrides=['utils/pubchem_utils=default'])
|
29
|
+
cfg = cfg.utils.pubchem_utils
|
30
|
+
# Prepare the URL
|
31
|
+
pubchem_url_for_drug = f"{cfg.pubchem_casrn2cid_url}{casrn}/record/JSON"
|
32
|
+
# Get the data
|
33
|
+
response = requests.get(pubchem_url_for_drug, timeout=60)
|
34
|
+
data = response.json()
|
35
|
+
# Extract the PubChem CID
|
36
|
+
cid = None
|
37
|
+
for substance in data.get("PC_Substances", []):
|
38
|
+
for compound in substance.get("compound", []):
|
39
|
+
if "id" in compound and "type" in compound["id"] and compound["id"]["type"] == 1:
|
40
|
+
cid = compound["id"].get("id", {}).get("cid")
|
41
|
+
break
|
42
|
+
if cid is not None:
|
43
|
+
break
|
44
|
+
return cid
|
45
|
+
|
15
46
|
def external_id2pubchem_cid(db, db_id):
|
16
47
|
"""
|
17
48
|
Convert external DB ID to PubChem CID.
|
@@ -26,7 +57,7 @@ def external_id2pubchem_cid(db, db_id):
|
|
26
57
|
Returns:
|
27
58
|
The PubChem CID of the drug.
|
28
59
|
"""
|
29
|
-
|
60
|
+
# Load Hydra configuration for PubChem ID conversion
|
30
61
|
with hydra.initialize(version_base=None, config_path="../configs"):
|
31
62
|
cfg = hydra.compose(config_name='config',
|
32
63
|
overrides=['utils/pubchem_utils=default'])
|
@@ -55,7 +86,7 @@ def pubchem_cid_description(cid):
|
|
55
86
|
Returns:
|
56
87
|
The description of the PubChem CID.
|
57
88
|
"""
|
58
|
-
|
89
|
+
# Load Hydra configuration for PubChem CID description
|
59
90
|
with hydra.initialize(version_base=None, config_path="../configs"):
|
60
91
|
cfg = hydra.compose(config_name='config',
|
61
92
|
overrides=['utils/pubchem_utils=default'])
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: aiagents4pharma
|
3
|
-
Version: 1.39.
|
3
|
+
Version: 1.39.5
|
4
4
|
Summary: AI Agents for drug discovery, drug development, and other pharmaceutical R&D.
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
6
6
|
Classifier: License :: OSI Approved :: MIT License
|
@@ -28,7 +28,7 @@ aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml,sha256=7k
|
|
28
28
|
aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py,sha256=-fAORvyFmG2iSvFOFDixmt9OTQRR58y89uhhu2EgbA8,46
|
29
29
|
aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml,sha256=14Aic9IDr1eOyeLo_YGMwZL40CYsgqOXix2j9ucB4EA,464
|
30
30
|
aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py,sha256=-fAORvyFmG2iSvFOFDixmt9OTQRR58y89uhhu2EgbA8,46
|
31
|
-
aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml,sha256=
|
31
|
+
aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml,sha256=Dx6afMdafUEZjN4LWEFQWkjXHw7DChSpQeb_wlQXRUo,408
|
32
32
|
aiagents4pharma/talk2biomodels/models/__init__.py,sha256=5fTHHm3PVloYPNKXbgNlcPgv3-u28ZquxGydFYDfhJA,122
|
33
33
|
aiagents4pharma/talk2biomodels/models/basico_model.py,sha256=4LjeHKGcXsdm1gFxs9SGVNDerajZ7REvCeNch0s2pDg,5242
|
34
34
|
aiagents4pharma/talk2biomodels/models/sys_bio_model.py,sha256=JeoiGQAvQABHnG0wKR2XBmmxqQdtgO6kxaLDUTUmr1s,2001
|
@@ -113,12 +113,12 @@ aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py,sha25
|
|
113
113
|
aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_sentencetransformer.py,sha256=Xkuf2UFGCXldj1zcsh6kqfQYLDf5i0B6KP3KcmNLSzQ,1452
|
114
114
|
aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py,sha256=N6HRr4lWHXY7bTHe2uXJe4D_EG9WqZPibZne6qLl9_k,1447
|
115
115
|
aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py,sha256=JhY7axvVULLywDJ2ctA-gob5YPeaJYWsaMNjHT6L9CU,3021
|
116
|
-
aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py,sha256=
|
116
|
+
aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py,sha256=5nSBLwfBnwq4K78Q4GM0zTeym5csxHhOwGwx2utBBEU,2280
|
117
117
|
aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py,sha256=0SgYvqdvxseUYTHx2KuSNI2hnmQ3VVVz0F-79_-P41o,1769
|
118
118
|
aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py,sha256=r1D74mavsnSCm4xnWl0n0nM9PZqgm3doD2dulNrKNVQ,1754
|
119
119
|
aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py,sha256=G13Diw7cA5TGINUNO1CDnN4rM6KbepxRXNjuzY578DI,1611
|
120
120
|
aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py,sha256=pal76wi7WgQWUNk56BrzfFV8jKpbDaHHdbwtgx_gXLI,2410
|
121
|
-
aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py,sha256=
|
121
|
+
aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py,sha256=31WPX8MrhnztoHUROAlH5KvHeXMbB_Jndp3ypAKJO9E,1543
|
122
122
|
aiagents4pharma/talk2knowledgegraphs/tools/__init__.py,sha256=uleTEbhgvlYw4fOqV4NmoFvxGTon2Oim7jTQ5qPmYoU,216
|
123
123
|
aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py,sha256=OEuOFncDRdb7TQEGq4rkT5On-jI-R7Nt8K5EBzaND8w,5338
|
124
124
|
aiagents4pharma/talk2knowledgegraphs/tools/load_arguments.py,sha256=zhmsRp-8vjB5rRekqTA07d3yb-42HWqng9dDMkvK6hM,623
|
@@ -127,7 +127,7 @@ aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py,sha256=te06QMF
|
|
127
127
|
aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py,sha256=mDSBOxopDfNhEJeU8fVI8b5lXTYrRzcc97aLbFgYSy4,4413
|
128
128
|
aiagents4pharma/talk2knowledgegraphs/utils/__init__.py,sha256=cZqb3LZLmBnmyAtWFv2Z-4uJvQmx0M4zKsfiWrlM3Pk,195
|
129
129
|
aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py,sha256=6vQnPkeOWae_8jePjhma3sJuMTngy0I0tqzdFt6OqKg,2507
|
130
|
-
aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py,sha256=
|
130
|
+
aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py,sha256=kMSabEZDIa6BruoFtnvWQnUWX47FUmTo22CLihYz8F8,3458
|
131
131
|
aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py,sha256=POSDrSdFAWsBCueOPD-Fok-ARdTywJU1ivwpT9EU1Kw,199
|
132
132
|
aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py,sha256=1nGznrAj-xT0xuSMBGz2dOujJ7M_IwSR84njxtxsy9A,2523
|
133
133
|
aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py,sha256=2vi_elf6EgzfagFAO5QnL3a_aXZyN7B1EBziu44MTfM,3806
|
@@ -137,8 +137,8 @@ aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py,sh
|
|
137
137
|
aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py,sha256=K157MWJ4dn2fj3G5ClhyAOXg9jI2H02GP07J6UpasJw,230
|
138
138
|
aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py,sha256=Bx8x6zzk5614ApWB90N_iv4_Y_Uq0-KwUeBwYSdQMU4,924
|
139
139
|
aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py,sha256=8eoxR-VHo0G7ReQIwje7xEhE-SJlHdef7_wJRpnvFIc,4116
|
140
|
-
aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py,sha256=
|
141
|
-
aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py,sha256=
|
140
|
+
aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py,sha256=QLINhZM51A8JwFoF0INzdUFT2pdlc_h0rdYRgKr49vQ,2772
|
141
|
+
aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py,sha256=zdBYhRxYfdNSViBwDNwc-9DTYkYGTPlyyHB_OIgbb3E,2630
|
142
142
|
aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py,sha256=I0cD0Fk2Uk27_4jEaIhpoGhoMh_RphY1VtkMnk4dkPg,2011
|
143
143
|
aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py,sha256=z0Jb3tt8VzRjzqI9oVcUvRlPPg6BUdmslfKDIEFE_h8,3013
|
144
144
|
aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py,sha256=5bt3H6gGSAwN2K-IG7AHwG2lC4yQeMd2_jbhu2z5XKg,116
|
@@ -251,8 +251,8 @@ aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py,sha256=IPD1V9y
|
|
251
251
|
aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py,sha256=ALwLecy1QVebbsmXJiDj1GhGmyhq2R2tZlAyEl1vfhw,7410
|
252
252
|
aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py,sha256=oIrfbOySgts50ksHKyjcWjRkPRIS88g3Lc0v9mBkU8w,6375
|
253
253
|
aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py,sha256=ERBha8afU6Q1EaRBe9qB8tchOzZ4_KfFgDW6EElOJoU,4816
|
254
|
-
aiagents4pharma-1.39.
|
255
|
-
aiagents4pharma-1.39.
|
256
|
-
aiagents4pharma-1.39.
|
257
|
-
aiagents4pharma-1.39.
|
258
|
-
aiagents4pharma-1.39.
|
254
|
+
aiagents4pharma-1.39.5.dist-info/licenses/LICENSE,sha256=IcIbyB1Hyk5ZDah03VNQvJkbNk2hkBCDqQ8qtnCvB4Q,1077
|
255
|
+
aiagents4pharma-1.39.5.dist-info/METADATA,sha256=plRPPLrdrpqnpbLLT_Kkaw3aONTG5VqpG1_q_1wddAg,15462
|
256
|
+
aiagents4pharma-1.39.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
257
|
+
aiagents4pharma-1.39.5.dist-info/top_level.txt,sha256=-AH8rMmrSnJtq7HaAObS78UU-cTCwvX660dSxeM7a0A,16
|
258
|
+
aiagents4pharma-1.39.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|