aiagents4pharma 1.32.0__py3-none-any.whl → 1.33.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +1 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +44 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +90 -0
- {aiagents4pharma-1.32.0.dist-info → aiagents4pharma-1.33.0.dist-info}/METADATA +1 -1
- {aiagents4pharma-1.32.0.dist-info → aiagents4pharma-1.33.0.dist-info}/RECORD +9 -7
- {aiagents4pharma-1.32.0.dist-info → aiagents4pharma-1.33.0.dist-info}/WHEEL +0 -0
- {aiagents4pharma-1.32.0.dist-info → aiagents4pharma-1.33.0.dist-info}/licenses/LICENSE +0 -0
- {aiagents4pharma-1.32.0.dist-info → aiagents4pharma-1.33.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,44 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
|
3
|
+
"""
|
4
|
+
Test cases for utils/enrichments/uniprot_proteins.py
|
5
|
+
"""
|
6
|
+
|
7
|
+
import pytest
|
8
|
+
from ..utils.enrichments.uniprot_proteins import EnrichmentWithUniProt
|
9
|
+
|
10
|
+
# In this test, we will consider 2 examples:
|
11
|
+
# 1. Gene Name: TP53
|
12
|
+
# 2. Gene Name: TP5 (Incomplete; must return empty results)
|
13
|
+
# 2. Gene Name: XZ (Shorter than 3 characters; must return empty results)
|
14
|
+
# The expected description of TP53 startswith:
|
15
|
+
START_DESCP = "Multifunctional transcription factor"
|
16
|
+
# The expected amino acid sequence of TP53 startswith:
|
17
|
+
START_SEQ = "MEEPQSDPSV"
|
18
|
+
|
19
|
+
@pytest.fixture(name="enrich_obj")
|
20
|
+
def fixture_uniprot_config():
|
21
|
+
"""Return a dictionary with the configuration for UniProt enrichment."""
|
22
|
+
return EnrichmentWithUniProt()
|
23
|
+
|
24
|
+
def test_enrich_documents(enrich_obj):
|
25
|
+
"""Test the enrich_documents method."""
|
26
|
+
gene_names = ["TP53", "TP5", "XZ"]
|
27
|
+
descriptions, sequences = enrich_obj.enrich_documents(gene_names)
|
28
|
+
assert descriptions[0].startswith(START_DESCP)
|
29
|
+
assert sequences[0].startswith(START_SEQ)
|
30
|
+
assert descriptions[1] is None
|
31
|
+
assert sequences[1] is None
|
32
|
+
assert descriptions[2] is None
|
33
|
+
assert sequences[2] is None
|
34
|
+
|
35
|
+
def test_enrich_documents_with_rag(enrich_obj):
|
36
|
+
"""Test the enrich_documents_with_rag method."""
|
37
|
+
gene_names = ["TP53", "TP5", "XZ"]
|
38
|
+
descriptions, sequences = enrich_obj.enrich_documents_with_rag(gene_names, None)
|
39
|
+
assert descriptions[0].startswith(START_DESCP)
|
40
|
+
assert sequences[0].startswith(START_SEQ)
|
41
|
+
assert descriptions[1] is None
|
42
|
+
assert sequences[1] is None
|
43
|
+
assert descriptions[2] is None
|
44
|
+
assert sequences[2] is None
|
@@ -0,0 +1,90 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
|
3
|
+
"""
|
4
|
+
Enrichment class for enriching Gene names with their function and sequence using UniProt.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from typing import List
|
8
|
+
import logging
|
9
|
+
import json
|
10
|
+
import hydra
|
11
|
+
import requests
|
12
|
+
from .enrichments import Enrichments
|
13
|
+
|
14
|
+
# Initialize logger
|
15
|
+
logging.basicConfig(level=logging.INFO)
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
class EnrichmentWithUniProt(Enrichments):
|
19
|
+
"""
|
20
|
+
Enrichment class using UniProt
|
21
|
+
"""
|
22
|
+
def enrich_documents(self, texts: List[str]) -> List[str]:
|
23
|
+
"""
|
24
|
+
Enrich a list of input UniProt gene names with their function and sequence.
|
25
|
+
|
26
|
+
Args:
|
27
|
+
texts: The list of gene names to be enriched.
|
28
|
+
|
29
|
+
Returns:
|
30
|
+
The list of enriched functions and sequences
|
31
|
+
"""
|
32
|
+
|
33
|
+
enriched_gene_names = texts
|
34
|
+
|
35
|
+
logger.log(logging.INFO,
|
36
|
+
"Load Hydra configuration for Gene enrichment with description and sequence.")
|
37
|
+
with hydra.initialize(version_base=None, config_path="../../configs"):
|
38
|
+
cfg = hydra.compose(config_name='config',
|
39
|
+
overrides=['utils/enrichments/uniprot_proteins=default'])
|
40
|
+
cfg = cfg.utils.enrichments.uniprot_proteins
|
41
|
+
|
42
|
+
|
43
|
+
descriptions = []
|
44
|
+
sequences = []
|
45
|
+
for gene in enriched_gene_names:
|
46
|
+
params = {
|
47
|
+
"reviewed": cfg.reviewed,
|
48
|
+
"isoform": cfg.isoform,
|
49
|
+
"exact_gene": gene,
|
50
|
+
"organism": cfg.organism,
|
51
|
+
# You can get the list of all available organisms here:
|
52
|
+
# https://www.uniprot.org/help/taxonomy
|
53
|
+
}
|
54
|
+
|
55
|
+
r = requests.get(cfg.uniprot_url,
|
56
|
+
headers={ "Accept" : "application/json"},
|
57
|
+
params=params,
|
58
|
+
timeout=cfg.timeout)
|
59
|
+
# if the response is not ok
|
60
|
+
if not r.ok:
|
61
|
+
descriptions.append(None)
|
62
|
+
sequences.append(None)
|
63
|
+
continue
|
64
|
+
response_body = json.loads(r.text)
|
65
|
+
# if the response body is empty
|
66
|
+
if not response_body:
|
67
|
+
descriptions.append(None)
|
68
|
+
sequences.append(None)
|
69
|
+
continue
|
70
|
+
description = ''
|
71
|
+
for comment in response_body[0]['comments']:
|
72
|
+
if comment['type'] == 'FUNCTION':
|
73
|
+
for value in comment['text']:
|
74
|
+
description += value['value']
|
75
|
+
sequence = response_body[0]['sequence']['sequence']
|
76
|
+
descriptions.append(description)
|
77
|
+
sequences.append(sequence)
|
78
|
+
return descriptions, sequences
|
79
|
+
|
80
|
+
def enrich_documents_with_rag(self, texts, docs):
|
81
|
+
"""
|
82
|
+
Enrich a list of input UniProt gene names with their function and sequence.
|
83
|
+
|
84
|
+
Args:
|
85
|
+
texts: The list of gene names to be enriched.
|
86
|
+
|
87
|
+
Returns:
|
88
|
+
The list of enriched functions and sequences
|
89
|
+
"""
|
90
|
+
return self.enrich_documents(texts)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: aiagents4pharma
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.33.0
|
4
4
|
Summary: AI Agents for drug discovery, drug development, and other pharmaceutical R&D.
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
6
6
|
Classifier: License :: OSI Approved :: MIT License
|
@@ -75,7 +75,7 @@ aiagents4pharma/talk2knowledgegraphs/__init__.py,sha256=Z0Eo7LTiKk0STsr8VI7wkCLq
|
|
75
75
|
aiagents4pharma/talk2knowledgegraphs/agents/__init__.py,sha256=iOAzuy_8A03tQDFtSBhC9dldUo62z5gfxcVtXAdLOJs,92
|
76
76
|
aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py,sha256=IcXSZ2qQA1m-gS-o0Pj_g1oar8uPdhsbaovloUFka3Q,3058
|
77
77
|
aiagents4pharma/talk2knowledgegraphs/configs/__init__.py,sha256=4_DVdpahaJ55yPl0aZotlFA_MYWLFF2cubWyKtBVI_Q,126
|
78
|
-
aiagents4pharma/talk2knowledgegraphs/configs/config.yaml,sha256=
|
78
|
+
aiagents4pharma/talk2knowledgegraphs/configs/config.yaml,sha256=KubbjqcaMgSMiJ4085F7O-cul-dYUAF7cWLqaM8Ss-g,277
|
79
79
|
aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py,sha256=-fAORvyFmG2iSvFOFDixmt9OTQRR58y89uhhu2EgbA8,46
|
80
80
|
aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml,sha256=ENCGROwYFpR6g4QD518h73sshdn3vPVpotBMk1QJcpU,4830
|
81
81
|
aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py,sha256=fKfc3FR7g5KjY9b6jzrU6cwKTVVpkoVZQS3dvUowu34,69
|
@@ -112,6 +112,7 @@ aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_sentencetransfo
|
|
112
112
|
aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py,sha256=N6HRr4lWHXY7bTHe2uXJe4D_EG9WqZPibZne6qLl9_k,1447
|
113
113
|
aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py,sha256=JhY7axvVULLywDJ2ctA-gob5YPeaJYWsaMNjHT6L9CU,3021
|
114
114
|
aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py,sha256=bk27KElJxOvKJ2RTz4ftleExQPMyWWS755KKmlImzbk,1241
|
115
|
+
aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py,sha256=G13Diw7cA5TGINUNO1CDnN4rM6KbepxRXNjuzY578DI,1611
|
115
116
|
aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py,sha256=pal76wi7WgQWUNk56BrzfFV8jKpbDaHHdbwtgx_gXLI,2410
|
116
117
|
aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py,sha256=C07YqUNYW7ofpKAzKh0lBovXKLvaiXFb3oJU6k1dvu4,411
|
117
118
|
aiagents4pharma/talk2knowledgegraphs/tools/__init__.py,sha256=zpD4h7EYtyq0QNOqLd6bkxrPlPb2XN64ceI9ncgESrA,171
|
@@ -128,10 +129,11 @@ aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py,sha256=2vi_
|
|
128
129
|
aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py,sha256=XH6JNfmMS38UEU7UGJeeabHfRykharnQpQaqjO86OlQ,1537
|
129
130
|
aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py,sha256=8w0sjt3Ex5YJ_XvpKl9UbhdTiiaoMIarbPUxLBU-1Uw,2378
|
130
131
|
aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py,sha256=36iKlisOpMtGR5xfTAlSHXWvPqVC_Jbezod8kbBBMVg,2136
|
131
|
-
aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py,sha256=
|
132
|
+
aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py,sha256=gkHVwImeTnDBP_q4TWjn_CMJ-BMmt8NJqNkA1IOLvuI,174
|
132
133
|
aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py,sha256=Bx8x6zzk5614ApWB90N_iv4_Y_Uq0-KwUeBwYSdQMU4,924
|
133
134
|
aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py,sha256=8eoxR-VHo0G7ReQIwje7xEhE-SJlHdef7_wJRpnvFIc,4116
|
134
135
|
aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py,sha256=qsVlDCGGDkUCv-R5_xFGhrtLS7P0CfagnM2qATwiOFM,1333
|
136
|
+
aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py,sha256=z0Jb3tt8VzRjzqI9oVcUvRlPPg6BUdmslfKDIEFE_h8,3013
|
135
137
|
aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py,sha256=7gwwtfzKhB8GuOBD47XRi0NprwEXkOzwNl5eeu-hDTI,86
|
136
138
|
aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py,sha256=m5p0yoJb7I19ua5yeQfXPf7c4r6S1XPwttsrM7Qoy94,9336
|
137
139
|
aiagents4pharma/talk2scholars/__init__.py,sha256=NOZxTklAH1j1ggu97Ib8Xn9LCKudEWt-8dx8w7yxVD8,180
|
@@ -219,8 +221,8 @@ aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py,sha256=lyrfpx8NH
|
|
219
221
|
aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py,sha256=IPD1V9yrBYaDnRe7sR6PrpwR82OBJbA2P_Tc6RbxAbM,2748
|
220
222
|
aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py,sha256=ALwLecy1QVebbsmXJiDj1GhGmyhq2R2tZlAyEl1vfhw,7410
|
221
223
|
aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py,sha256=oIrfbOySgts50ksHKyjcWjRkPRIS88g3Lc0v9mBkU8w,6375
|
222
|
-
aiagents4pharma-1.
|
223
|
-
aiagents4pharma-1.
|
224
|
-
aiagents4pharma-1.
|
225
|
-
aiagents4pharma-1.
|
226
|
-
aiagents4pharma-1.
|
224
|
+
aiagents4pharma-1.33.0.dist-info/licenses/LICENSE,sha256=IcIbyB1Hyk5ZDah03VNQvJkbNk2hkBCDqQ8qtnCvB4Q,1077
|
225
|
+
aiagents4pharma-1.33.0.dist-info/METADATA,sha256=cv2sm_J63wHmZipscCFn4rQtFNAJSz-trdl-bAsHwDw,16043
|
226
|
+
aiagents4pharma-1.33.0.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
|
227
|
+
aiagents4pharma-1.33.0.dist-info/top_level.txt,sha256=-AH8rMmrSnJtq7HaAObS78UU-cTCwvX660dSxeM7a0A,16
|
228
|
+
aiagents4pharma-1.33.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|