aiagents4pharma 1.32.0__py3-none-any.whl → 1.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,4 +5,7 @@ defaults:
5
5
  - tools/subgraph_summarization: default
6
6
  - tools/graphrag_reasoning: default
7
7
  - utils/pubchem_utils: default
8
+ - utils/enrichments/uniprot_proteins: default
9
+ - utils/enrichments/ols_terms: default
10
+ - utils/enrichments/reactome_pathways: default
8
11
  - app/frontend: default
@@ -0,0 +1,62 @@
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ Test cases for utils/enrichments/ols_terms.py
5
+ """
6
+
7
+ import pytest
8
+ from ..utils.enrichments.ols_terms import EnrichmentWithOLS
9
+
10
+ # In this test, we will consider 5 examples:
11
+ # 1. CL_0000899: T-helper 17 cell (Cell Ontology)
12
+ # 2. GO_0046427: positive regulation of receptor signaling pathway via JAK-STAT (GO)
13
+ # 3. UBERON_0000004: nose (Uberon)
14
+ # 4. HP_0009739: Hypoplasia of the antihelix (Human Phenotype Ontology)
15
+ # 5. MONDO_0005011: Crohn disease (MONDO)
16
+ # 6. XYZ_0000000: Non-existing term (for testing error handling)
17
+
18
+ # The expected description for each term starts with:
19
+ CL_DESC = "CD4-positive, alpha-beta T cell"
20
+ GO_DESC = "Any process that activates or increases the frequency, rate or extent"
21
+ UBERON_DESC = "The olfactory organ of vertebrates, consisting of nares"
22
+ HP_DESC = "Hypoplasia of the antihelix"
23
+ MONDO_DESC = "A gastrointestinal disorder characterized by chronic inflammation"
24
+
25
+ # The expected description for the non-existing term is None
26
+
27
+ @pytest.fixture(name="enrich_obj")
28
+ def fixture_uniprot_config():
29
+ """Return a dictionary with the configuration for OLS enrichment."""
30
+ return EnrichmentWithOLS()
31
+
32
+ def test_enrich_documents(enrich_obj):
33
+ """Test the enrich_documents method."""
34
+ ols_terms = ["CL_0000899",
35
+ "GO_0046427",
36
+ "UBERON_0000004",
37
+ "HP_0009739",
38
+ "MONDO_0005011",
39
+ "XYZ_0000000"]
40
+ descriptions = enrich_obj.enrich_documents(ols_terms)
41
+ assert descriptions[0].startswith(CL_DESC)
42
+ assert descriptions[1].startswith(GO_DESC)
43
+ assert descriptions[2].startswith(UBERON_DESC)
44
+ assert descriptions[3].startswith(HP_DESC)
45
+ assert descriptions[4].startswith(MONDO_DESC)
46
+ assert descriptions[5] is None
47
+
48
+ def test_enrich_documents_with_rag(enrich_obj):
49
+ """Test the enrich_documents_with_rag method."""
50
+ ols_terms = ["CL_0000899",
51
+ "GO_0046427",
52
+ "UBERON_0000004",
53
+ "HP_0009739",
54
+ "MONDO_0005011",
55
+ "XYZ_0000000"]
56
+ descriptions = enrich_obj.enrich_documents_with_rag(ols_terms, None)
57
+ assert descriptions[0].startswith(CL_DESC)
58
+ assert descriptions[1].startswith(GO_DESC)
59
+ assert descriptions[2].startswith(UBERON_DESC)
60
+ assert descriptions[3].startswith(HP_DESC)
61
+ assert descriptions[4].startswith(MONDO_DESC)
62
+ assert descriptions[5] is None
@@ -13,7 +13,9 @@ from ..utils.enrichments.pubchem_strings import EnrichmentWithPubChem
13
13
  # The expected SMILES representation for the first PubChem ID is:
14
14
  SMILES_FIRST = 'C[C@@H]1C[C@H]2[C@@H]3[C@@H](CC4=CC(=O)C=C[C@@]'
15
15
  SMILES_FIRST += '4([C@H]3[C@H](C[C@@]2([C@]1(C(=O)CO)O)C)O)C)Cl'
16
- # The expected SMILES representation for the second PubChem ID is None.
16
+ # The expected description for the first PubChem ID starts with:
17
+ DESCRIPTION_FIRST = "Alclometasone is a prednisolone compound having an alpha-chloro substituent"
18
+ # The expected SMILES representation and description for the second PubChem ID is None.
17
19
 
18
20
  @pytest.fixture(name="enrich_obj")
19
21
  def fixture_pubchem_config():
@@ -23,11 +25,16 @@ def fixture_pubchem_config():
23
25
  def test_enrich_documents(enrich_obj):
24
26
  """Test the enrich_documents method."""
25
27
  pubchem_ids = ["5311000", "1X"]
26
- enriched_strings = enrich_obj.enrich_documents(pubchem_ids)
28
+ enriched_descriptions, enriched_strings = enrich_obj.enrich_documents(pubchem_ids)
27
29
  assert enriched_strings == [SMILES_FIRST, None]
30
+ assert enriched_descriptions[0].startswith(DESCRIPTION_FIRST)
31
+ assert enriched_descriptions[1] is None
28
32
 
29
33
  def test_enrich_documents_with_rag(enrich_obj):
30
34
  """Test the enrich_documents_with_rag method."""
31
35
  pubchem_ids = ["5311000", "1X"]
32
- enriched_strings = enrich_obj.enrich_documents_with_rag(pubchem_ids, None)
36
+ enriched_descriptions, enriched_strings = enrich_obj.enrich_documents_with_rag(pubchem_ids,
37
+ None)
33
38
  assert enriched_strings == [SMILES_FIRST, None]
39
+ assert enriched_descriptions[0].startswith(DESCRIPTION_FIRST)
40
+ assert enriched_descriptions[1] is None
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ Test cases for utils/enrichments/reactome_pathways.py
5
+ """
6
+
7
+ import pytest
8
+ from ..utils.enrichments.reactome_pathways import EnrichmentWithReactome
9
+
10
+ # In this test, we will consider 2 examples:
11
+ # 1. R-HSA-3244647: cGAS binds cytosolic DNA
12
+ # 2. R-HSA-9905952: ATP binds P2RX7 in P2RX7 trimer:PANX1 heptamer
13
+ # 3. R-HSA-1234567: Fake pathway
14
+
15
+ # The expected description of pathway R-HSA-3244647 startswith:
16
+ FIRST_PATHWAY = "Cyclic GMP-AMP (cGAMP) synthase (cGAS) was identified as a cytosolic DNA"
17
+ # The expected description of pathway R-HSA-9905952 startswith:
18
+ SECOND_PATHWAY = "The P2RX7 (P2X7, P2Z) trimer binds ATP,"
19
+ # The expected description of pathway R-HSA-1234567 is None.
20
+
21
+ @pytest.fixture(name="enrich_obj")
22
+ def fixture_uniprot_config():
23
+ """Return a dictionary with the configuration for Reactome enrichment."""
24
+ return EnrichmentWithReactome()
25
+
26
+ def test_enrich_documents(enrich_obj):
27
+ """Test the enrich_documents method."""
28
+ reactome_pathways = ["R-HSA-3244647",
29
+ "R-HSA-9905952",
30
+ "R-HSA-1234567"]
31
+ descriptions = enrich_obj.enrich_documents(reactome_pathways)
32
+ assert descriptions[0].startswith(FIRST_PATHWAY)
33
+ assert descriptions[1].startswith(SECOND_PATHWAY)
34
+ assert descriptions[2] is None
35
+
36
+ def test_enrich_documents_with_rag(enrich_obj):
37
+ """Test the enrich_documents_with_rag method."""
38
+ reactome_pathways = ["R-HSA-3244647",
39
+ "R-HSA-9905952",
40
+ "R-HSA-1234567"]
41
+ descriptions = enrich_obj.enrich_documents_with_rag(reactome_pathways, None)
42
+ assert descriptions[0].startswith(FIRST_PATHWAY)
43
+ assert descriptions[1].startswith(SECOND_PATHWAY)
44
+ assert descriptions[2] is None
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ Test cases for utils/enrichments/uniprot_proteins.py
5
+ """
6
+
7
+ import pytest
8
+ from ..utils.enrichments.uniprot_proteins import EnrichmentWithUniProt
9
+
10
+ # In this test, we will consider 2 examples:
11
+ # 1. Gene Name: TP53
12
+ # 2. Gene Name: TP5 (Incomplete; must return empty results)
13
+ # 2. Gene Name: XZ (Shorter than 3 characters; must return empty results)
14
+ # The expected description of TP53 startswith:
15
+ START_DESCP = "Multifunctional transcription factor"
16
+ # The expected amino acid sequence of TP53 startswith:
17
+ START_SEQ = "MEEPQSDPSV"
18
+
19
+ @pytest.fixture(name="enrich_obj")
20
+ def fixture_uniprot_config():
21
+ """Return a dictionary with the configuration for UniProt enrichment."""
22
+ return EnrichmentWithUniProt()
23
+
24
+ def test_enrich_documents(enrich_obj):
25
+ """Test the enrich_documents method."""
26
+ gene_names = ["TP53", "TP5", "XZ"]
27
+ descriptions, sequences = enrich_obj.enrich_documents(gene_names)
28
+ assert descriptions[0].startswith(START_DESCP)
29
+ assert sequences[0].startswith(START_SEQ)
30
+ assert descriptions[1] is None
31
+ assert sequences[1] is None
32
+ assert descriptions[2] is None
33
+ assert sequences[2] is None
34
+
35
+ def test_enrich_documents_with_rag(enrich_obj):
36
+ """Test the enrich_documents_with_rag method."""
37
+ gene_names = ["TP53", "TP5", "XZ"]
38
+ descriptions, sequences = enrich_obj.enrich_documents_with_rag(gene_names, None)
39
+ assert descriptions[0].startswith(START_DESCP)
40
+ assert sequences[0].startswith(START_SEQ)
41
+ assert descriptions[1] is None
42
+ assert sequences[1] is None
43
+ assert descriptions[2] is None
44
+ assert sequences[2] is None
@@ -4,13 +4,35 @@ Test cases for utils/pubchem_utils.py
4
4
 
5
5
  from ..utils import pubchem_utils
6
6
 
7
- def test_drugbank_id2pubchem_cid():
7
+ def test_external_id2pubchem_cid():
8
8
  """
9
- Test the drugbank_id2pubchem_cid method.
9
+ Test the external_id2pubchem_cid function.
10
10
 
11
11
  The DrugBank ID for Alclometasone is DB00240.
12
12
  The PubChem CID for Alclometasone is 5311000.
13
+
14
+ The CTD ID for Butylated Hydroxyanisole is D002083
15
+ The PubChem CID for Butylated Hydroxyanisole is 24667.
13
16
  """
14
17
  drugbank_id = "DB00240"
15
- pubchem_cid = pubchem_utils.drugbank_id2pubchem_cid(drugbank_id)
18
+ pubchem_cid = pubchem_utils.external_id2pubchem_cid('drugbank', drugbank_id)
16
19
  assert pubchem_cid == 5311000
20
+
21
+ ctd_id = "D002083"
22
+ pubchem_cid = pubchem_utils.external_id2pubchem_cid(
23
+ 'comparative toxicogenomics database',
24
+ ctd_id)
25
+ assert pubchem_cid == 24667
26
+
27
+ def test_pubchem_cid_description():
28
+ """
29
+ Test the pubchem_cid_description function.
30
+
31
+ The PubChem CID for Alclometasone is 5311000.
32
+ The description for Alclometasone starts with
33
+ "Alclometasone is a prednisolone compound having an alpha-chloro substituent".
34
+ """
35
+ pubchem_cid = 5311000
36
+ description = pubchem_utils.pubchem_cid_description(pubchem_cid)
37
+ assert description.startswith(
38
+ "Alclometasone is a prednisolone compound having an alpha-chloro substituent")
@@ -4,3 +4,6 @@ This package contains modules to use the enrichment model
4
4
  from . import enrichments
5
5
  from . import ollama
6
6
  from . import pubchem_strings
7
+ from . import uniprot_proteins
8
+ from . import reactome_pathways
9
+ from . import ols_terms
@@ -0,0 +1,76 @@
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ Enrichment class for enriching OLS terms with textual descriptions
5
+ """
6
+
7
+ from typing import List
8
+ import logging
9
+ import json
10
+ import hydra
11
+ import requests
12
+ from .enrichments import Enrichments
13
+
14
+ # Initialize logger
15
+ logging.basicConfig(level=logging.INFO)
16
+ logger = logging.getLogger(__name__)
17
+
18
+ class EnrichmentWithOLS(Enrichments):
19
+ """
20
+ Enrichment class using OLS terms
21
+ """
22
+ def enrich_documents(self, texts: List[str]) -> List[str]:
23
+ """
24
+ Enrich a list of input OLS terms
25
+
26
+ Args:
27
+ texts: The list of OLS terms to be enriched.
28
+
29
+ Returns:
30
+ The list of enriched descriptions
31
+ """
32
+
33
+ ols_ids = texts
34
+
35
+ logger.log(logging.INFO,
36
+ "Load Hydra configuration for OLS enrichments.")
37
+ with hydra.initialize(version_base=None, config_path="../../configs"):
38
+ cfg = hydra.compose(config_name='config',
39
+ overrides=['utils/enrichments/ols_terms=default'])
40
+ cfg = cfg.utils.enrichments.ols_terms
41
+
42
+ descriptions = []
43
+ for ols_id in ols_ids:
44
+ params = {
45
+ 'short_form': ols_id
46
+ }
47
+ r = requests.get(cfg.base_url,
48
+ headers={ "Accept" : "application/json"},
49
+ params=params,
50
+ timeout=cfg.timeout)
51
+ response_body = json.loads(r.text)
52
+ # if the response body is empty
53
+ if '_embedded' not in response_body:
54
+ descriptions.append(None)
55
+ continue
56
+ # Add the description to the list
57
+ description = response_body['_embedded']['terms'][0]['description']
58
+ # Add synonyms to the description
59
+ description += response_body['_embedded']['terms'][0]['synonyms']
60
+ # Add the label to the description
61
+ # Label is not provided as list, so we need to convert it to a list
62
+ description += [response_body['_embedded']['terms'][0]['label']]
63
+ descriptions.append('\n'.join(description))
64
+ return descriptions
65
+
66
+ def enrich_documents_with_rag(self, texts, docs):
67
+ """
68
+ Enrich a list of input OLS terms
69
+
70
+ Args:
71
+ texts: The list of OLS to be enriched.
72
+
73
+ Returns:
74
+ The list of enriched descriptions
75
+ """
76
+ return self.enrich_documents(texts)
@@ -1,12 +1,13 @@
1
1
  #!/usr/bin/env python3
2
2
 
3
3
  """
4
- Enrichment class for enriching PubChem IDs with their STRINGS representation.
4
+ Enrichment class for enriching PubChem IDs with their STRINGS representation and descriptions.
5
5
  """
6
6
 
7
7
  from typing import List
8
8
  import pubchempy as pcp
9
9
  from .enrichments import Enrichments
10
+ from ..pubchem_utils import pubchem_cid_description
10
11
 
11
12
  class EnrichmentWithPubChem(Enrichments):
12
13
  """
@@ -20,20 +21,24 @@ class EnrichmentWithPubChem(Enrichments):
20
21
  texts: The list of pubchem IDs to be enriched.
21
22
 
22
23
  Returns:
23
- The list of enriched STRINGS
24
+ The list of enriched STRINGS and their descriptions.
24
25
  """
25
26
 
26
- enriched_pubchem_ids = []
27
+ enriched_pubchem_ids_smiles = []
28
+ enriched_pubchem_ids_descriptions = []
29
+
27
30
  pubchem_cids = texts
28
31
  for pubchem_cid in pubchem_cids:
29
32
  try:
30
33
  c = pcp.Compound.from_cid(pubchem_cid)
31
34
  except pcp.BadRequestError:
32
- enriched_pubchem_ids.append(None)
35
+ enriched_pubchem_ids_smiles.append(None)
36
+ enriched_pubchem_ids_descriptions.append(None)
33
37
  continue
34
- enriched_pubchem_ids.append(c.isomeric_smiles)
38
+ enriched_pubchem_ids_smiles.append(c.isomeric_smiles)
39
+ enriched_pubchem_ids_descriptions.append(pubchem_cid_description(pubchem_cid))
35
40
 
36
- return enriched_pubchem_ids
41
+ return enriched_pubchem_ids_descriptions, enriched_pubchem_ids_smiles
37
42
 
38
43
  def enrich_documents_with_rag(self, texts, docs):
39
44
  """
@@ -0,0 +1,65 @@
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ Enrichment class for enriching Reactome pathways with textual descriptions
5
+ """
6
+
7
+ from typing import List
8
+ import logging
9
+ import hydra
10
+ import requests
11
+ from .enrichments import Enrichments
12
+
13
+ # Initialize logger
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
+
17
+ class EnrichmentWithReactome(Enrichments):
18
+ """
19
+ Enrichment class using Reactome pathways
20
+ """
21
+ def enrich_documents(self, texts: List[str]) -> List[str]:
22
+ """
23
+ Enrich a list of input Reactome pathways
24
+
25
+ Args:
26
+ texts: The list of Reactome pathways to be enriched.
27
+
28
+ Returns:
29
+ The list of enriched descriptions
30
+ """
31
+
32
+ reactome_pathways_ids = texts
33
+
34
+ logger.log(logging.INFO,
35
+ "Load Hydra configuration for reactome enrichment")
36
+ with hydra.initialize(version_base=None, config_path="../../configs"):
37
+ cfg = hydra.compose(config_name='config',
38
+ overrides=['utils/enrichments/reactome_pathways=default'])
39
+ cfg = cfg.utils.enrichments.reactome_pathways
40
+
41
+ descriptions = []
42
+ for reactome_pathway_id in reactome_pathways_ids:
43
+ r = requests.get(cfg.base_url + reactome_pathway_id + '/summation',
44
+ headers={ "Accept" : "text/plain"},
45
+ timeout=cfg.timeout)
46
+ # if the response is not ok
47
+ if not r.ok:
48
+ descriptions.append(None)
49
+ continue
50
+ response_body = r.text
51
+ # if the response is ok
52
+ descriptions.append(response_body.split('\t')[1])
53
+ return descriptions
54
+
55
+ def enrich_documents_with_rag(self, texts, docs):
56
+ """
57
+ Enrich a list of input Reactome pathways
58
+
59
+ Args:
60
+ texts: The list of Reactome pathways to be enriched.
61
+
62
+ Returns:
63
+ The list of enriched descriptions
64
+ """
65
+ return self.enrich_documents(texts)
@@ -0,0 +1,90 @@
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ Enrichment class for enriching Gene names with their function and sequence using UniProt.
5
+ """
6
+
7
+ from typing import List
8
+ import logging
9
+ import json
10
+ import hydra
11
+ import requests
12
+ from .enrichments import Enrichments
13
+
14
+ # Initialize logger
15
+ logging.basicConfig(level=logging.INFO)
16
+ logger = logging.getLogger(__name__)
17
+
18
+ class EnrichmentWithUniProt(Enrichments):
19
+ """
20
+ Enrichment class using UniProt
21
+ """
22
+ def enrich_documents(self, texts: List[str]) -> List[str]:
23
+ """
24
+ Enrich a list of input UniProt gene names with their function and sequence.
25
+
26
+ Args:
27
+ texts: The list of gene names to be enriched.
28
+
29
+ Returns:
30
+ The list of enriched functions and sequences
31
+ """
32
+
33
+ enriched_gene_names = texts
34
+
35
+ logger.log(logging.INFO,
36
+ "Load Hydra configuration for Gene enrichment with description and sequence.")
37
+ with hydra.initialize(version_base=None, config_path="../../configs"):
38
+ cfg = hydra.compose(config_name='config',
39
+ overrides=['utils/enrichments/uniprot_proteins=default'])
40
+ cfg = cfg.utils.enrichments.uniprot_proteins
41
+
42
+
43
+ descriptions = []
44
+ sequences = []
45
+ for gene in enriched_gene_names:
46
+ params = {
47
+ "reviewed": cfg.reviewed,
48
+ "isoform": cfg.isoform,
49
+ "exact_gene": gene,
50
+ "organism": cfg.organism,
51
+ # You can get the list of all available organisms here:
52
+ # https://www.uniprot.org/help/taxonomy
53
+ }
54
+
55
+ r = requests.get(cfg.uniprot_url,
56
+ headers={ "Accept" : "application/json"},
57
+ params=params,
58
+ timeout=cfg.timeout)
59
+ # if the response is not ok
60
+ if not r.ok:
61
+ descriptions.append(None)
62
+ sequences.append(None)
63
+ continue
64
+ response_body = json.loads(r.text)
65
+ # if the response body is empty
66
+ if not response_body:
67
+ descriptions.append(None)
68
+ sequences.append(None)
69
+ continue
70
+ description = ''
71
+ for comment in response_body[0]['comments']:
72
+ if comment['type'] == 'FUNCTION':
73
+ for value in comment['text']:
74
+ description += value['value']
75
+ sequence = response_body[0]['sequence']['sequence']
76
+ descriptions.append(description)
77
+ sequences.append(sequence)
78
+ return descriptions, sequences
79
+
80
+ def enrich_documents_with_rag(self, texts, docs):
81
+ """
82
+ Enrich a list of input UniProt gene names with their function and sequence.
83
+
84
+ Args:
85
+ texts: The list of gene names to be enriched.
86
+
87
+ Returns:
88
+ The list of enriched functions and sequences
89
+ """
90
+ return self.enrich_documents(texts)
@@ -12,12 +12,16 @@ import hydra
12
12
  logging.basicConfig(level=logging.INFO)
13
13
  logger = logging.getLogger(__name__)
14
14
 
15
- def drugbank_id2pubchem_cid(drugbank_id):
15
+ def external_id2pubchem_cid(db, db_id):
16
16
  """
17
- Convert DrugBank ID to PubChem CID.
17
+ Convert external DB ID to PubChem CID.
18
+ Please refer to the following URL for more information
19
+ on data sources:
20
+ https://pubchem.ncbi.nlm.nih.gov/sources/
18
21
 
19
22
  Args:
20
- drugbank_id: The DrugBank ID of the drug.
23
+ db: The database name.
24
+ db_id: The database ID of the drug.
21
25
 
22
26
  Returns:
23
27
  The PubChem CID of the drug.
@@ -28,7 +32,7 @@ def drugbank_id2pubchem_cid(drugbank_id):
28
32
  overrides=['utils/pubchem_utils=default'])
29
33
  cfg = cfg.utils.pubchem_utils
30
34
  # Prepare the URL
31
- pubchem_url_for_drug = cfg.drugbank_id_to_pubchem_cid_url + drugbank_id + '/JSON'
35
+ pubchem_url_for_drug = f"{cfg.pubchem_cid_base_url}/{db}/{db_id}/JSON"
32
36
  # Get the data
33
37
  response = requests.get(pubchem_url_for_drug, timeout=60)
34
38
  data = response.json()
@@ -40,3 +44,29 @@ def drugbank_id2pubchem_cid(drugbank_id):
40
44
  cid = compound["id"].get("id", {}).get("cid")
41
45
  break
42
46
  return cid
47
+
48
+ def pubchem_cid_description(cid):
49
+ """
50
+ Get the description of a PubChem CID.
51
+
52
+ Args:
53
+ cid: The PubChem CID of the drug.
54
+
55
+ Returns:
56
+ The description of the PubChem CID.
57
+ """
58
+ logger.log(logging.INFO, "Load Hydra configuration for PubChem CID description.")
59
+ with hydra.initialize(version_base=None, config_path="../configs"):
60
+ cfg = hydra.compose(config_name='config',
61
+ overrides=['utils/pubchem_utils=default'])
62
+ cfg = cfg.utils.pubchem_utils
63
+ # Prepare the URL
64
+ pubchem_url_for_descpription = f"{cfg.pubchem_cid_description_url}/{cid}/description/JSON"
65
+ # Get the data
66
+ response = requests.get(pubchem_url_for_descpription, timeout=60)
67
+ data = response.json()
68
+ # Extract the PubChem CID description
69
+ description = ''
70
+ for information in data["InformationList"]['Information']:
71
+ description += information.get("Description", '')
72
+ return description
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aiagents4pharma
3
- Version: 1.32.0
3
+ Version: 1.34.0
4
4
  Summary: AI Agents for drug discovery, drug development, and other pharmaceutical R&D.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -75,7 +75,7 @@ aiagents4pharma/talk2knowledgegraphs/__init__.py,sha256=Z0Eo7LTiKk0STsr8VI7wkCLq
75
75
  aiagents4pharma/talk2knowledgegraphs/agents/__init__.py,sha256=iOAzuy_8A03tQDFtSBhC9dldUo62z5gfxcVtXAdLOJs,92
76
76
  aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py,sha256=IcXSZ2qQA1m-gS-o0Pj_g1oar8uPdhsbaovloUFka3Q,3058
77
77
  aiagents4pharma/talk2knowledgegraphs/configs/__init__.py,sha256=4_DVdpahaJ55yPl0aZotlFA_MYWLFF2cubWyKtBVI_Q,126
78
- aiagents4pharma/talk2knowledgegraphs/configs/config.yaml,sha256=bag4w3JCSqaojG37MTksy3ZehAPe3qoVzjIN2uh3nrc,229
78
+ aiagents4pharma/talk2knowledgegraphs/configs/config.yaml,sha256=X91262b-wkygiH4HrEr0bIzHxHDuDWwuxLQAmdUe-E4,367
79
79
  aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py,sha256=-fAORvyFmG2iSvFOFDixmt9OTQRR58y89uhhu2EgbA8,46
80
80
  aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml,sha256=ENCGROwYFpR6g4QD518h73sshdn3vPVpotBMk1QJcpU,4830
81
81
  aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py,sha256=fKfc3FR7g5KjY9b6jzrU6cwKTVVpkoVZQS3dvUowu34,69
@@ -111,9 +111,12 @@ aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py,sha25
111
111
  aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_sentencetransformer.py,sha256=Qxo6WeIDRy8aLh1tNKw0kSlzmUj3MtTak63oW2YwB24,1327
112
112
  aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py,sha256=N6HRr4lWHXY7bTHe2uXJe4D_EG9WqZPibZne6qLl9_k,1447
113
113
  aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py,sha256=JhY7axvVULLywDJ2ctA-gob5YPeaJYWsaMNjHT6L9CU,3021
114
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py,sha256=bk27KElJxOvKJ2RTz4ftleExQPMyWWS755KKmlImzbk,1241
114
+ aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py,sha256=woSm723ns9fHieu-QWFiniLlm5h22v1qzO4v6n20K5g,2413
115
+ aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py,sha256=0SgYvqdvxseUYTHx2KuSNI2hnmQ3VVVz0F-79_-P41o,1769
116
+ aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py,sha256=r1D74mavsnSCm4xnWl0n0nM9PZqgm3doD2dulNrKNVQ,1754
117
+ aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py,sha256=G13Diw7cA5TGINUNO1CDnN4rM6KbepxRXNjuzY578DI,1611
115
118
  aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py,sha256=pal76wi7WgQWUNk56BrzfFV8jKpbDaHHdbwtgx_gXLI,2410
116
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py,sha256=C07YqUNYW7ofpKAzKh0lBovXKLvaiXFb3oJU6k1dvu4,411
119
+ aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py,sha256=K1Y6QM0MDP1IrAdcWkigl8R-O-i-lsL4NCyOrWewhdM,1246
117
120
  aiagents4pharma/talk2knowledgegraphs/tools/__init__.py,sha256=zpD4h7EYtyq0QNOqLd6bkxrPlPb2XN64ceI9ncgESrA,171
118
121
  aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py,sha256=OEuOFncDRdb7TQEGq4rkT5On-jI-R7Nt8K5EBzaND8w,5338
119
122
  aiagents4pharma/talk2knowledgegraphs/tools/load_arguments.py,sha256=zhmsRp-8vjB5rRekqTA07d3yb-42HWqng9dDMkvK6hM,623
@@ -121,17 +124,20 @@ aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py,sha256=te06QMF
121
124
  aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py,sha256=mDSBOxopDfNhEJeU8fVI8b5lXTYrRzcc97aLbFgYSy4,4413
122
125
  aiagents4pharma/talk2knowledgegraphs/utils/__init__.py,sha256=cZqb3LZLmBnmyAtWFv2Z-4uJvQmx0M4zKsfiWrlM3Pk,195
123
126
  aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py,sha256=6vQnPkeOWae_8jePjhma3sJuMTngy0I0tqzdFt6OqKg,2507
124
- aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py,sha256=IlrdGbRGD0IM7eMcpkOjuRjKNuH3lz_X8zN6RHwk61c,1340
127
+ aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py,sha256=8cve_KLtQUhG3uMKYpyelZvpETSsNGRdGE4X0NXMk4M,2442
125
128
  aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py,sha256=POSDrSdFAWsBCueOPD-Fok-ARdTywJU1ivwpT9EU1Kw,199
126
129
  aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py,sha256=1nGznrAj-xT0xuSMBGz2dOujJ7M_IwSR84njxtxsy9A,2523
127
130
  aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py,sha256=2vi_elf6EgzfagFAO5QnL3a_aXZyN7B1EBziu44MTfM,3806
128
131
  aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py,sha256=XH6JNfmMS38UEU7UGJeeabHfRykharnQpQaqjO86OlQ,1537
129
132
  aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py,sha256=8w0sjt3Ex5YJ_XvpKl9UbhdTiiaoMIarbPUxLBU-1Uw,2378
130
133
  aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py,sha256=36iKlisOpMtGR5xfTAlSHXWvPqVC_Jbezod8kbBBMVg,2136
131
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py,sha256=JKGavA-umsGX3ng17_UYAvDBdbg-W-mPn8Q6JfP7J9U,143
134
+ aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py,sha256=K157MWJ4dn2fj3G5ClhyAOXg9jI2H02GP07J6UpasJw,230
132
135
  aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py,sha256=Bx8x6zzk5614ApWB90N_iv4_Y_Uq0-KwUeBwYSdQMU4,924
133
136
  aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py,sha256=8eoxR-VHo0G7ReQIwje7xEhE-SJlHdef7_wJRpnvFIc,4116
134
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py,sha256=qsVlDCGGDkUCv-R5_xFGhrtLS7P0CfagnM2qATwiOFM,1333
137
+ aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py,sha256=xSPP-h2q9fABz6Sd6ZlH9WiyoO8KZeEnPI5n2nJpWL4,2443
138
+ aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py,sha256=CQEGQ6Qsex2T91Vw7zTrclJBbSGGhxeWaVJb8tnURAQ,1691
139
+ aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py,sha256=I0cD0Fk2Uk27_4jEaIhpoGhoMh_RphY1VtkMnk4dkPg,2011
140
+ aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py,sha256=z0Jb3tt8VzRjzqI9oVcUvRlPPg6BUdmslfKDIEFE_h8,3013
135
141
  aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py,sha256=7gwwtfzKhB8GuOBD47XRi0NprwEXkOzwNl5eeu-hDTI,86
136
142
  aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py,sha256=m5p0yoJb7I19ua5yeQfXPf7c4r6S1XPwttsrM7Qoy94,9336
137
143
  aiagents4pharma/talk2scholars/__init__.py,sha256=NOZxTklAH1j1ggu97Ib8Xn9LCKudEWt-8dx8w7yxVD8,180
@@ -219,8 +225,8 @@ aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py,sha256=lyrfpx8NH
219
225
  aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py,sha256=IPD1V9yrBYaDnRe7sR6PrpwR82OBJbA2P_Tc6RbxAbM,2748
220
226
  aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py,sha256=ALwLecy1QVebbsmXJiDj1GhGmyhq2R2tZlAyEl1vfhw,7410
221
227
  aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py,sha256=oIrfbOySgts50ksHKyjcWjRkPRIS88g3Lc0v9mBkU8w,6375
222
- aiagents4pharma-1.32.0.dist-info/licenses/LICENSE,sha256=IcIbyB1Hyk5ZDah03VNQvJkbNk2hkBCDqQ8qtnCvB4Q,1077
223
- aiagents4pharma-1.32.0.dist-info/METADATA,sha256=o3uHTzLfOPglRJUlK6_vKAuJ0qwKhGt7BeiQHeF4U_o,16043
224
- aiagents4pharma-1.32.0.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
225
- aiagents4pharma-1.32.0.dist-info/top_level.txt,sha256=-AH8rMmrSnJtq7HaAObS78UU-cTCwvX660dSxeM7a0A,16
226
- aiagents4pharma-1.32.0.dist-info/RECORD,,
228
+ aiagents4pharma-1.34.0.dist-info/licenses/LICENSE,sha256=IcIbyB1Hyk5ZDah03VNQvJkbNk2hkBCDqQ8qtnCvB4Q,1077
229
+ aiagents4pharma-1.34.0.dist-info/METADATA,sha256=P4bmxMTSkbYdRNmw6mijR5O19PBaYRqqP3SgtFhdtpk,16043
230
+ aiagents4pharma-1.34.0.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
231
+ aiagents4pharma-1.34.0.dist-info/top_level.txt,sha256=-AH8rMmrSnJtq7HaAObS78UU-cTCwvX660dSxeM7a0A,16
232
+ aiagents4pharma-1.34.0.dist-info/RECORD,,