PyPI - aiagents4pharma - Versions diffs - 1.44.0__py3-none-any.whl → 1.45.1__py3-none-any.whl - Mend

aiagents4pharma 1.44.0py3-none-any.whl → 1.45.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (289) hide show

aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py CHANGED Viewed

@@ -2,17 +2,20 @@
 Embedding class using Ollama model based on LangChain Embeddings class.
 """
-import time
-from typing import List
 import subprocess
+import time
 import ollama
 from langchain_ollama import OllamaEmbeddings
 from .embeddings import Embeddings
 class EmbeddingWithOllama(Embeddings):
     """
     Embedding class using Ollama model based on LangChain Embeddings class.
     """
     def __init__(self, model_name: str):
         """
         Initialize the EmbeddingWithOllama class.
@@ -38,18 +41,21 @@ class EmbeddingWithOllama(Embeddings):
         """
         try:
             models_list = ollama.list()["models"]
-            if model_name not in [m['model'].replace(":latest", "") for m in models_list]:
+            if model_name not in [m["model"].replace(":latest", "") for m in models_list]:
                 ollama.pull(model_name)
                 time.sleep(30)
                 raise ValueError(f"Pulled {model_name} model")
         except Exception as e:
             with subprocess.Popen(
-                "ollama serve", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+                "ollama serve",
+                shell=True,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
             ):
                 time.sleep(10)
             raise ValueError(f"Error: {e} and restarted Ollama server.") from e
-    def embed_documents(self, texts: List[str]) -> List[float]:
+    def embed_documents(self, texts: list[str]) -> list[float]:
         """
         Generate embedding for a list of input texts using Ollama model.
@@ -65,7 +71,7 @@ class EmbeddingWithOllama(Embeddings):
         return embeddings
-    def embed_query(self, text: str) -> List[float]:
+    def embed_query(self, text: str) -> list[float]:
         """
         Generate embeddings for an input text using Ollama model.

aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py CHANGED Viewed

@@ -4,8 +4,8 @@
 Embedding class using SentenceTransformer model based on LangChain Embeddings class.
 """
-from typing import List
 from sentence_transformers import SentenceTransformer
 from .embeddings import Embeddings
@@ -35,11 +35,13 @@ class EmbeddingWithSentenceTransformer(Embeddings):
         self.trust_remote_code = trust_remote_code
         # Load the model
-        self.model = SentenceTransformer(self.model_name,
-                                         cache_folder=self.model_cache_dir,
-                                         trust_remote_code=self.trust_remote_code)
+        self.model = SentenceTransformer(
+            self.model_name,
+            cache_folder=self.model_cache_dir,
+            trust_remote_code=self.trust_remote_code,
+        )
-    def embed_documents(self, texts: List[str]) -> List[float]:
+    def embed_documents(self, texts: list[str]) -> list[float]:
         """
         Generate embedding for a list of input texts using SentenceTransformer model.
@@ -55,7 +57,7 @@ class EmbeddingWithSentenceTransformer(Embeddings):
         return embeddings
-    def embed_query(self, text: str) -> List[float]:
+    def embed_query(self, text: str) -> list[float]:
         """
         Generate embeddings for an input text using SentenceTransformer model.

aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py CHANGED Viewed

@@ -1,9 +1,12 @@
 """
 This package contains modules to use the enrichment model
 """
-from . import enrichments
-from . import ollama
-from . import pubchem_strings
-from . import uniprot_proteins
-from . import reactome_pathways
-from . import ols_terms
+from . import (
+    enrichments,
+    ollama,
+    ols_terms,
+    pubchem_strings,
+    reactome_pathways,
+    uniprot_proteins,
+)

aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py CHANGED Viewed

@@ -4,6 +4,7 @@ Enrichments interface
 from abc import ABC, abstractmethod
 class Enrichments(ABC):
     """Interface for enrichment models.

aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py CHANGED Viewed

@@ -4,20 +4,23 @@
 Enrichment class using Ollama model based on LangChain Enrichment class.
 """
-import time
-from typing import List
-import subprocess
 import ast
+import subprocess
+import time
 import ollama
-from langchain_ollama import ChatOllama
-from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_ollama import ChatOllama
 from .enrichments import Enrichments
 class EnrichmentWithOllama(Enrichments):
     """
     Enrichment class using Ollama model based on the Enrichment abstract class.
     """
     def __init__(
         self,
         model_name: str,
@@ -67,18 +70,21 @@ class EnrichmentWithOllama(Enrichments):
         """
         try:
             models_list = ollama.list()["models"]
-            if model_name not in [m['model'].replace(":latest", "") for m in models_list]:
+            if model_name not in [m["model"].replace(":latest", "") for m in models_list]:
                 ollama.pull(model_name)
                 time.sleep(30)
                 raise ValueError(f"Pulled {model_name} model")
         except Exception as e:
             with subprocess.Popen(
-                "ollama serve", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+                "ollama serve",
+                shell=True,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
             ):
                 time.sleep(10)
             raise ValueError(f"Error: {e} and restarted Ollama server.") from e
-    def enrich_documents(self, texts: List[str]) -> List[str]:
+    def enrich_documents(self, texts: list[str]) -> list[str]:
         """
         Enrich a list of input texts with additional textual features using OLLAMA model.
         Important: Make sure the input is a list of texts based on the defined prompt template
@@ -116,7 +122,7 @@ class EnrichmentWithOllama(Enrichments):
         Args:
             texts: The list of texts to be enriched.
             docs: The list of reference documents to enrich the input texts.
         Returns:
             The list of enriched texts
         """

aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py CHANGED Viewed

@@ -4,22 +4,25 @@
 Enrichment class for enriching OLS terms with textual descriptions
 """
-from typing import List
-import logging
 import json
+import logging
 import hydra
 import requests
 from .enrichments import Enrichments
 # Initialize logger
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 class EnrichmentWithOLS(Enrichments):
     """
     Enrichment class using OLS terms
     """
-    def enrich_documents(self, texts: List[str]) -> List[str]:
+    def enrich_documents(self, texts: list[str]) -> list[str]:
         """
         Enrich a list of input OLS terms
@@ -32,41 +35,41 @@ class EnrichmentWithOLS(Enrichments):
         ols_ids = texts
-        logger.log(logging.INFO,
-                   "Load Hydra configuration for OLS enrichments.")
+        logger.log(logging.INFO, "Load Hydra configuration for OLS enrichments.")
         with hydra.initialize(version_base=None, config_path="../../configs"):
-            cfg = hydra.compose(config_name='config',
-                                overrides=['utils/enrichments/ols_terms=default'])
+            cfg = hydra.compose(
+                config_name="config", overrides=["utils/enrichments/ols_terms=default"]
+            )
             cfg = cfg.utils.enrichments.ols_terms
         descriptions = []
         for ols_id in ols_ids:
-            params = {
-                'short_form': ols_id
-            }
-            r = requests.get(cfg.base_url,
-                             headers={ "Accept" : "application/json"},
-                             params=params,
-                             timeout=cfg.timeout)
+            params = {"short_form": ols_id}
+            r = requests.get(
+                cfg.base_url,
+                headers={"Accept": "application/json"},
+                params=params,
+                timeout=cfg.timeout,
+            )
             response_body = json.loads(r.text)
             # if the response body is empty
-            if '_embedded' not in response_body:
+            if "_embedded" not in response_body:
                 descriptions.append(None)
                 continue
             # Add the description to the list
             description = []
-            for term in response_body['_embedded']['terms']:
+            for term in response_body["_embedded"]["terms"]:
                 # If the term has a description, add it to the list
-                description += term.get('description', [])
+                description += term.get("description", [])
                 # Add synonyms to the description
-                description += term.get('synonyms', [])
+                description += term.get("synonyms", [])
                 # Add the label to the description
                 # Label is not provided as list, so we need to convert it to a list
-                description += [term.get('label', [])]
+                description += [term.get("label", [])]
             # Make unique the description
             description = list(set(description))
             # Join the description with new line
-            description = '\n'.join(description)
+            description = "\n".join(description)
             # Add the description to the list
             descriptions.append(description)
         return descriptions

aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py CHANGED Viewed

@@ -5,21 +5,24 @@ Enrichment class for enriching PubChem IDs with their STRINGS representation and
 """
 import logging
-from typing import List
-import requests
 import hydra
-from .enrichments import Enrichments
+import requests
 from ..pubchem_utils import pubchem_cid_description
+from .enrichments import Enrichments
 # Initialize logger
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 class EnrichmentWithPubChem(Enrichments):
     """
     Enrichment class using PubChem
     """
-    def enrich_documents(self, texts: List[str]) -> List[str]:
+    def enrich_documents(self, texts: list[str]) -> list[str]:
         """
         Enrich a list of input PubChem IDs with their STRINGS representation.
@@ -35,8 +38,7 @@ class EnrichmentWithPubChem(Enrichments):
         # Load Hydra configuration to get the base URL for PubChem
         with hydra.initialize(version_base=None, config_path="../../configs"):
-            cfg = hydra.compose(config_name='config',
-                                overrides=['utils/pubchem_utils=default'])
+            cfg = hydra.compose(config_name="config", overrides=["utils/pubchem_utils=default"])
             cfg = cfg.utils.pubchem_utils
         # Iterate over each PubChem ID in the input list
         pubchem_cids = texts
@@ -47,11 +49,11 @@ class EnrichmentWithPubChem(Enrichments):
             response = requests.get(pubchem_url, timeout=60)
             data = response.json()
             # Extract the PubChem CID SMILES
-            smiles = ''
-            description = ''
+            smiles = ""
+            description = ""
             if "PropertyTable" in data:
-                for prop in data["PropertyTable"]['Properties']:
-                    smiles = prop.get("SMILES", '')
+                for prop in data["PropertyTable"]["Properties"]:
+                    smiles = prop.get("SMILES", "")
                     description = pubchem_cid_description(pubchem_cid)
             else:
                 # If the PubChem ID is not found, set smiles and description to None

aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py CHANGED Viewed

@@ -4,21 +4,24 @@
 Enrichment class for enriching Reactome pathways with textual descriptions
 """
-from typing import List
 import logging
 import hydra
 import requests
 from .enrichments import Enrichments
 # Initialize logger
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 class EnrichmentWithReactome(Enrichments):
     """
     Enrichment class using Reactome pathways
     """
-    def enrich_documents(self, texts: List[str]) -> List[str]:
+    def enrich_documents(self, texts: list[str]) -> list[str]:
         """
         Enrich a list of input Reactome pathways
@@ -31,25 +34,28 @@ class EnrichmentWithReactome(Enrichments):
         reactome_pathways_ids = texts
-        logger.log(logging.INFO,
-                   "Load Hydra configuration for reactome enrichment")
+        logger.log(logging.INFO, "Load Hydra configuration for reactome enrichment")
         with hydra.initialize(version_base=None, config_path="../../configs"):
-            cfg = hydra.compose(config_name='config',
-                                overrides=['utils/enrichments/reactome_pathways=default'])
+            cfg = hydra.compose(
+                config_name="config",
+                overrides=["utils/enrichments/reactome_pathways=default"],
+            )
             cfg = cfg.utils.enrichments.reactome_pathways
         descriptions = []
         for reactome_pathway_id in reactome_pathways_ids:
-            r = requests.get(cfg.base_url + reactome_pathway_id + '/summation',
-                             headers={ "Accept" : "text/plain"},
-                             timeout=cfg.timeout)
+            r = requests.get(
+                cfg.base_url + reactome_pathway_id + "/summation",
+                headers={"Accept": "text/plain"},
+                timeout=cfg.timeout,
+            )
             # if the response is not ok
             if not r.ok:
                 descriptions.append(None)
                 continue
             response_body = r.text
             # if the response is ok
-            descriptions.append(response_body.split('\t')[1])
+            descriptions.append(response_body.split("\t")[1])
         return descriptions
     def enrich_documents_with_rag(self, texts, docs):

aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py CHANGED Viewed

@@ -4,22 +4,25 @@
 Enrichment class for enriching Gene names with their function and sequence using UniProt.
 """
-from typing import List
-import logging
 import json
+import logging
 import hydra
 import requests
 from .enrichments import Enrichments
 # Initialize logger
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 class EnrichmentWithUniProt(Enrichments):
     """
     Enrichment class using UniProt
     """
-    def enrich_documents(self, texts: List[str]) -> List[str]:
+    def enrich_documents(self, texts: list[str]) -> list[str]:
         """
         Enrich a list of input UniProt gene names with their function and sequence.
@@ -32,14 +35,17 @@ class EnrichmentWithUniProt(Enrichments):
         enriched_gene_names = texts
-        logger.log(logging.INFO,
-                   "Load Hydra configuration for Gene enrichment with description and sequence.")
+        logger.log(
+            logging.INFO,
+            "Load Hydra configuration for Gene enrichment with description and sequence.",
+        )
         with hydra.initialize(version_base=None, config_path="../../configs"):
-            cfg = hydra.compose(config_name='config',
-                                overrides=['utils/enrichments/uniprot_proteins=default'])
+            cfg = hydra.compose(
+                config_name="config",
+                overrides=["utils/enrichments/uniprot_proteins=default"],
+            )
             cfg = cfg.utils.enrichments.uniprot_proteins
         descriptions = []
         sequences = []
         for gene in enriched_gene_names:
@@ -52,10 +58,12 @@ class EnrichmentWithUniProt(Enrichments):
                 # https://www.uniprot.org/help/taxonomy
             }
-            r = requests.get(cfg.uniprot_url,
-                             headers={ "Accept" : "application/json"},
-                             params=params,
-                             timeout=cfg.timeout)
+            r = requests.get(
+                cfg.uniprot_url,
+                headers={"Accept": "application/json"},
+                params=params,
+                timeout=cfg.timeout,
+            )
             # if the response is not ok
             if not r.ok:
                 descriptions.append(None)
@@ -67,12 +75,12 @@ class EnrichmentWithUniProt(Enrichments):
                 descriptions.append(None)
                 sequences.append(None)
                 continue
-            description = ''
-            for comment in response_body[0]['comments']:
-                if comment['type'] == 'FUNCTION':
-                    for value in comment['text']:
-                        description += value['value']
-            sequence = response_body[0]['sequence']['sequence']
+            description = ""
+            for comment in response_body[0]["comments"]:
+                if comment["type"] == "FUNCTION":
+                    for value in comment["text"]:
+                        description += value["value"]
+            sequence = response_body[0]["sequence"]["sequence"]
             descriptions.append(description)
             sequences.append(sequence)
         return descriptions, sequences

aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py CHANGED Viewed

@@ -1,6 +1,5 @@
-'''
+"""
 This file is used to import all the models in the package.
-'''
-from . import pcst
-from . import multimodal_pcst
-from . import milvus_multimodal_pcst
+"""
+from . import milvus_multimodal_pcst, multimodal_pcst, pcst

aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py CHANGED Viewed

@@ -16,6 +16,7 @@ from pymilvus import Collection
 try:
     import cudf
     import cupy as cp
     CUDF_AVAILABLE = True
 except ImportError:
     CUDF_AVAILABLE = False
@@ -34,9 +35,7 @@ class SystemDetector:
         self.os_type = platform.system().lower()  # 'windows', 'linux', 'darwin'
         self.architecture = platform.machine().lower()  # 'x86_64', 'arm64', etc.
         self.has_nvidia_gpu = self._detect_nvidia_gpu()
-        self.use_gpu = (
-            self.has_nvidia_gpu and self.os_type != "darwin"
-        )  # No CUDA on macOS
+        self.use_gpu = self.has_nvidia_gpu and self.os_type != "darwin"  # No CUDA on macOS
         logger.info("System Detection Results:")
         logger.info("  OS: %s", self.os_type)
@@ -232,9 +231,7 @@ class MultimodalPCSTPruning(NamedTuple):
         """
         # Initialize several variables
         topk = min(self.topk, colls["nodes"].num_entities)
-        n_prizes = self.loader.py.zeros(
-            colls["nodes"].num_entities, dtype=self.loader.py.float32
-        )
+        n_prizes = self.loader.py.zeros(colls["nodes"].num_entities, dtype=self.loader.py.float32)
         # Get the actual metric type to use
         actual_metric_type = self.metric_type or self.loader.metric_type
@@ -279,9 +276,7 @@ class MultimodalPCSTPruning(NamedTuple):
         """
         # Initialize several variables
         topk_e = min(self.topk_e, colls["edges"].num_entities)
-        e_prizes = self.loader.py.zeros(
-            colls["edges"].num_entities, dtype=self.loader.py.float32
-        )
+        e_prizes = self.loader.py.zeros(colls["edges"].num_entities, dtype=self.loader.py.float32)
         # Get the actual metric type to use
         actual_metric_type = self.metric_type or self.loader.metric_type
@@ -299,15 +294,11 @@ class MultimodalPCSTPruning(NamedTuple):
         e_prizes[[r.id for r in res[0]]] = [r.score for r in res[0]]
         # Further process the edge_prizes
-        unique_prizes, inverse_indices = self.loader.py.unique(
-            e_prizes, return_inverse=True
-        )
+        unique_prizes, inverse_indices = self.loader.py.unique(e_prizes, return_inverse=True)
         topk_e_values = unique_prizes[self.loader.py.argsort(-unique_prizes)[:topk_e]]
         last_topk_e_value = topk_e
         for k in range(topk_e):
-            indices = (
-                inverse_indices == (unique_prizes == topk_e_values[k]).nonzero()[0]
-            )
+            indices = inverse_indices == (unique_prizes == topk_e_values[k]).nonzero()[0]
             value = min((topk_e - k) / indices.sum().item(), last_topk_e_value)
             e_prizes[indices] = value
             last_topk_e_value = value * (1 - self.c_const)
@@ -381,7 +372,7 @@ class MultimodalPCSTPruning(NamedTuple):
         # Edge index mapping: local real edge idx -> original global index
         logger.log(logging.INFO, "Creating mapping for real edges")
         mapping_edges = dict(
-            zip(range(len(real_["indices"])), self.loader.to_list(real_["indices"]))
+            zip(range(len(real_["indices"])), self.loader.to_list(real_["indices"]), strict=False)
         )
         # Virtual edge handling
@@ -398,15 +389,9 @@ class MultimodalPCSTPruning(NamedTuple):
         # Virtual edges: (src → virtual), (virtual → dst)
         logger.log(logging.INFO, "Creating virtual edges")
-        virt_["edges_1"] = self.loader.py.stack(
-            [virt_["src"], virt_["node_ids"]], axis=1
-        )
-        virt_["edges_2"] = self.loader.py.stack(
-            [virt_["node_ids"], virt_["dst"]], axis=1
-        )
-        virt_["edges"] = self.loader.py.concatenate(
-            [virt_["edges_1"], virt_["edges_2"]], axis=0
-        )
+        virt_["edges_1"] = self.loader.py.stack([virt_["src"], virt_["node_ids"]], axis=1)
+        virt_["edges_2"] = self.loader.py.stack([virt_["node_ids"], virt_["dst"]], axis=1)
+        virt_["edges"] = self.loader.py.concatenate([virt_["edges_1"], virt_["edges_2"]], axis=0)
         virt_["costs"] = self.loader.py.zeros(
             (virt_["edges"].shape[0],), dtype=real_["costs"].dtype
         )
@@ -418,9 +403,7 @@ class MultimodalPCSTPruning(NamedTuple):
         # Final prizes
         logger.log(logging.INFO, "Getting final prizes")
-        final_prizes = self.loader.py.concatenate(
-            [prizes["nodes"], virt_["prizes"]], axis=0
-        )
+        final_prizes = self.loader.py.concatenate([prizes["nodes"], virt_["prizes"]], axis=0)
         # Mapping virtual node ID -> edge index in original graph
         logger.log(logging.INFO, "Creating mapping for virtual nodes")
@@ -428,6 +411,7 @@ class MultimodalPCSTPruning(NamedTuple):
             zip(
                 self.loader.to_list(virt_["node_ids"]),
                 self.loader.to_list(virt_["indices"]),
+                strict=False,
             )
         )
@@ -466,9 +450,7 @@ class MultimodalPCSTPruning(NamedTuple):
         # Retrieve the selected nodes and edges based on the given vertices and edges
         subgraph_nodes = vertices[vertices < num_nodes]
-        subgraph_edges = [
-            mapping["edges"][e.item()] for e in edges if e < num_prior_edges
-        ]
+        subgraph_edges = [mapping["edges"][e.item()] for e in edges if e < num_prior_edges]
         virtual_vertices = vertices[vertices >= num_nodes]
         if len(virtual_vertices) > 0:
             virtual_edges = [mapping["nodes"][i.item()] for i in virtual_vertices]
@@ -480,9 +462,7 @@ class MultimodalPCSTPruning(NamedTuple):
         return {"nodes": subgraph_nodes, "edges": subgraph_edges}
-    def extract_subgraph(
-        self, text_emb: list, query_emb: list, modality: str, cfg: dict
-    ) -> dict:
+    def extract_subgraph(self, text_emb: list, query_emb: list, modality: str, cfg: dict) -> dict:
         """
         Perform the Prize-Collecting Steiner Tree (PCST) algorithm to extract the subgraph.

aiagents4pharma 1.44.0__py3-none-any.whl → 1.45.1__py3-none-any.whl

aiagents4pharma 1.44.0py3-none-any.whl → 1.45.1py3-none-any.whl