aiagents4pharma 1.32.0__py3-none-any.whl → 1.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,4 +5,5 @@ defaults:
5
5
  - tools/subgraph_summarization: default
6
6
  - tools/graphrag_reasoning: default
7
7
  - utils/pubchem_utils: default
8
+ - utils/enrichments/uniprot_proteins: default
8
9
  - app/frontend: default
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ Test cases for utils/enrichments/uniprot_proteins.py
5
+ """
6
+
7
+ import pytest
8
+ from ..utils.enrichments.uniprot_proteins import EnrichmentWithUniProt
9
+
10
+ # In this test, we will consider 2 examples:
11
+ # 1. Gene Name: TP53
12
+ # 2. Gene Name: TP5 (Incomplete; must return empty results)
13
+ # 2. Gene Name: XZ (Shorter than 3 characters; must return empty results)
14
+ # The expected description of TP53 startswith:
15
+ START_DESCP = "Multifunctional transcription factor"
16
+ # The expected amino acid sequence of TP53 startswith:
17
+ START_SEQ = "MEEPQSDPSV"
18
+
19
+ @pytest.fixture(name="enrich_obj")
20
+ def fixture_uniprot_config():
21
+ """Return a dictionary with the configuration for UniProt enrichment."""
22
+ return EnrichmentWithUniProt()
23
+
24
+ def test_enrich_documents(enrich_obj):
25
+ """Test the enrich_documents method."""
26
+ gene_names = ["TP53", "TP5", "XZ"]
27
+ descriptions, sequences = enrich_obj.enrich_documents(gene_names)
28
+ assert descriptions[0].startswith(START_DESCP)
29
+ assert sequences[0].startswith(START_SEQ)
30
+ assert descriptions[1] is None
31
+ assert sequences[1] is None
32
+ assert descriptions[2] is None
33
+ assert sequences[2] is None
34
+
35
+ def test_enrich_documents_with_rag(enrich_obj):
36
+ """Test the enrich_documents_with_rag method."""
37
+ gene_names = ["TP53", "TP5", "XZ"]
38
+ descriptions, sequences = enrich_obj.enrich_documents_with_rag(gene_names, None)
39
+ assert descriptions[0].startswith(START_DESCP)
40
+ assert sequences[0].startswith(START_SEQ)
41
+ assert descriptions[1] is None
42
+ assert sequences[1] is None
43
+ assert descriptions[2] is None
44
+ assert sequences[2] is None
@@ -4,3 +4,4 @@ This package contains modules to use the enrichment model
4
4
  from . import enrichments
5
5
  from . import ollama
6
6
  from . import pubchem_strings
7
+ from . import uniprot_proteins
@@ -0,0 +1,90 @@
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ Enrichment class for enriching Gene names with their function and sequence using UniProt.
5
+ """
6
+
7
+ from typing import List
8
+ import logging
9
+ import json
10
+ import hydra
11
+ import requests
12
+ from .enrichments import Enrichments
13
+
14
+ # Initialize logger
15
+ logging.basicConfig(level=logging.INFO)
16
+ logger = logging.getLogger(__name__)
17
+
18
+ class EnrichmentWithUniProt(Enrichments):
19
+ """
20
+ Enrichment class using UniProt
21
+ """
22
+ def enrich_documents(self, texts: List[str]) -> List[str]:
23
+ """
24
+ Enrich a list of input UniProt gene names with their function and sequence.
25
+
26
+ Args:
27
+ texts: The list of gene names to be enriched.
28
+
29
+ Returns:
30
+ The list of enriched functions and sequences
31
+ """
32
+
33
+ enriched_gene_names = texts
34
+
35
+ logger.log(logging.INFO,
36
+ "Load Hydra configuration for Gene enrichment with description and sequence.")
37
+ with hydra.initialize(version_base=None, config_path="../../configs"):
38
+ cfg = hydra.compose(config_name='config',
39
+ overrides=['utils/enrichments/uniprot_proteins=default'])
40
+ cfg = cfg.utils.enrichments.uniprot_proteins
41
+
42
+
43
+ descriptions = []
44
+ sequences = []
45
+ for gene in enriched_gene_names:
46
+ params = {
47
+ "reviewed": cfg.reviewed,
48
+ "isoform": cfg.isoform,
49
+ "exact_gene": gene,
50
+ "organism": cfg.organism,
51
+ # You can get the list of all available organisms here:
52
+ # https://www.uniprot.org/help/taxonomy
53
+ }
54
+
55
+ r = requests.get(cfg.uniprot_url,
56
+ headers={ "Accept" : "application/json"},
57
+ params=params,
58
+ timeout=cfg.timeout)
59
+ # if the response is not ok
60
+ if not r.ok:
61
+ descriptions.append(None)
62
+ sequences.append(None)
63
+ continue
64
+ response_body = json.loads(r.text)
65
+ # if the response body is empty
66
+ if not response_body:
67
+ descriptions.append(None)
68
+ sequences.append(None)
69
+ continue
70
+ description = ''
71
+ for comment in response_body[0]['comments']:
72
+ if comment['type'] == 'FUNCTION':
73
+ for value in comment['text']:
74
+ description += value['value']
75
+ sequence = response_body[0]['sequence']['sequence']
76
+ descriptions.append(description)
77
+ sequences.append(sequence)
78
+ return descriptions, sequences
79
+
80
+ def enrich_documents_with_rag(self, texts, docs):
81
+ """
82
+ Enrich a list of input UniProt gene names with their function and sequence.
83
+
84
+ Args:
85
+ texts: The list of gene names to be enriched.
86
+
87
+ Returns:
88
+ The list of enriched functions and sequences
89
+ """
90
+ return self.enrich_documents(texts)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aiagents4pharma
3
- Version: 1.32.0
3
+ Version: 1.33.0
4
4
  Summary: AI Agents for drug discovery, drug development, and other pharmaceutical R&D.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -75,7 +75,7 @@ aiagents4pharma/talk2knowledgegraphs/__init__.py,sha256=Z0Eo7LTiKk0STsr8VI7wkCLq
75
75
  aiagents4pharma/talk2knowledgegraphs/agents/__init__.py,sha256=iOAzuy_8A03tQDFtSBhC9dldUo62z5gfxcVtXAdLOJs,92
76
76
  aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py,sha256=IcXSZ2qQA1m-gS-o0Pj_g1oar8uPdhsbaovloUFka3Q,3058
77
77
  aiagents4pharma/talk2knowledgegraphs/configs/__init__.py,sha256=4_DVdpahaJ55yPl0aZotlFA_MYWLFF2cubWyKtBVI_Q,126
78
- aiagents4pharma/talk2knowledgegraphs/configs/config.yaml,sha256=bag4w3JCSqaojG37MTksy3ZehAPe3qoVzjIN2uh3nrc,229
78
+ aiagents4pharma/talk2knowledgegraphs/configs/config.yaml,sha256=KubbjqcaMgSMiJ4085F7O-cul-dYUAF7cWLqaM8Ss-g,277
79
79
  aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py,sha256=-fAORvyFmG2iSvFOFDixmt9OTQRR58y89uhhu2EgbA8,46
80
80
  aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml,sha256=ENCGROwYFpR6g4QD518h73sshdn3vPVpotBMk1QJcpU,4830
81
81
  aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py,sha256=fKfc3FR7g5KjY9b6jzrU6cwKTVVpkoVZQS3dvUowu34,69
@@ -112,6 +112,7 @@ aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_sentencetransfo
112
112
  aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py,sha256=N6HRr4lWHXY7bTHe2uXJe4D_EG9WqZPibZne6qLl9_k,1447
113
113
  aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py,sha256=JhY7axvVULLywDJ2ctA-gob5YPeaJYWsaMNjHT6L9CU,3021
114
114
  aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py,sha256=bk27KElJxOvKJ2RTz4ftleExQPMyWWS755KKmlImzbk,1241
115
+ aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py,sha256=G13Diw7cA5TGINUNO1CDnN4rM6KbepxRXNjuzY578DI,1611
115
116
  aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py,sha256=pal76wi7WgQWUNk56BrzfFV8jKpbDaHHdbwtgx_gXLI,2410
116
117
  aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py,sha256=C07YqUNYW7ofpKAzKh0lBovXKLvaiXFb3oJU6k1dvu4,411
117
118
  aiagents4pharma/talk2knowledgegraphs/tools/__init__.py,sha256=zpD4h7EYtyq0QNOqLd6bkxrPlPb2XN64ceI9ncgESrA,171
@@ -128,10 +129,11 @@ aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py,sha256=2vi_
128
129
  aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py,sha256=XH6JNfmMS38UEU7UGJeeabHfRykharnQpQaqjO86OlQ,1537
129
130
  aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py,sha256=8w0sjt3Ex5YJ_XvpKl9UbhdTiiaoMIarbPUxLBU-1Uw,2378
130
131
  aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py,sha256=36iKlisOpMtGR5xfTAlSHXWvPqVC_Jbezod8kbBBMVg,2136
131
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py,sha256=JKGavA-umsGX3ng17_UYAvDBdbg-W-mPn8Q6JfP7J9U,143
132
+ aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py,sha256=gkHVwImeTnDBP_q4TWjn_CMJ-BMmt8NJqNkA1IOLvuI,174
132
133
  aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py,sha256=Bx8x6zzk5614ApWB90N_iv4_Y_Uq0-KwUeBwYSdQMU4,924
133
134
  aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py,sha256=8eoxR-VHo0G7ReQIwje7xEhE-SJlHdef7_wJRpnvFIc,4116
134
135
  aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py,sha256=qsVlDCGGDkUCv-R5_xFGhrtLS7P0CfagnM2qATwiOFM,1333
136
+ aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py,sha256=z0Jb3tt8VzRjzqI9oVcUvRlPPg6BUdmslfKDIEFE_h8,3013
135
137
  aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py,sha256=7gwwtfzKhB8GuOBD47XRi0NprwEXkOzwNl5eeu-hDTI,86
136
138
  aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py,sha256=m5p0yoJb7I19ua5yeQfXPf7c4r6S1XPwttsrM7Qoy94,9336
137
139
  aiagents4pharma/talk2scholars/__init__.py,sha256=NOZxTklAH1j1ggu97Ib8Xn9LCKudEWt-8dx8w7yxVD8,180
@@ -219,8 +221,8 @@ aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py,sha256=lyrfpx8NH
219
221
  aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py,sha256=IPD1V9yrBYaDnRe7sR6PrpwR82OBJbA2P_Tc6RbxAbM,2748
220
222
  aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py,sha256=ALwLecy1QVebbsmXJiDj1GhGmyhq2R2tZlAyEl1vfhw,7410
221
223
  aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py,sha256=oIrfbOySgts50ksHKyjcWjRkPRIS88g3Lc0v9mBkU8w,6375
222
- aiagents4pharma-1.32.0.dist-info/licenses/LICENSE,sha256=IcIbyB1Hyk5ZDah03VNQvJkbNk2hkBCDqQ8qtnCvB4Q,1077
223
- aiagents4pharma-1.32.0.dist-info/METADATA,sha256=o3uHTzLfOPglRJUlK6_vKAuJ0qwKhGt7BeiQHeF4U_o,16043
224
- aiagents4pharma-1.32.0.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
225
- aiagents4pharma-1.32.0.dist-info/top_level.txt,sha256=-AH8rMmrSnJtq7HaAObS78UU-cTCwvX660dSxeM7a0A,16
226
- aiagents4pharma-1.32.0.dist-info/RECORD,,
224
+ aiagents4pharma-1.33.0.dist-info/licenses/LICENSE,sha256=IcIbyB1Hyk5ZDah03VNQvJkbNk2hkBCDqQ8qtnCvB4Q,1077
225
+ aiagents4pharma-1.33.0.dist-info/METADATA,sha256=cv2sm_J63wHmZipscCFn4rQtFNAJSz-trdl-bAsHwDw,16043
226
+ aiagents4pharma-1.33.0.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
227
+ aiagents4pharma-1.33.0.dist-info/top_level.txt,sha256=-AH8rMmrSnJtq7HaAObS78UU-cTCwvX660dSxeM7a0A,16
228
+ aiagents4pharma-1.33.0.dist-info/RECORD,,