PyPI - aiagents4pharma - Versions diffs - 1.5.4__py3-none-any.whl → 1.6.0__py3-none-any.whl - Mend

aiagents4pharma 1.5.4py3-none-any.whl → 1.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

aiagents4pharma/talk2knowledgegraphs/utils/__init__.py ADDED Viewed

File without changes

aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+'''
+This file is used to import all the models in the package.
+'''
+from . import embeddings
+from . import sentence_transformer
+from . import huggingface

aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py ADDED Viewed

@@ -0,0 +1,77 @@
+"""
+Embeddings interface from LangChain Core.
+https://github.com/langchain-ai/langchain/blob/master/libs/core/langchain_core/embeddings/embeddings.py
+"""
+from abc import ABC, abstractmethod
+from langchain_core.runnables.config import run_in_executor
+class Embeddings(ABC):
+    """Interface for embedding models.
+    This is an interface meant for implementing text embedding models.
+    Text embedding models are used to map text to a vector (a point in n-dimensional
+    space).
+    Texts that are similar will usually be mapped to points that are close to each
+    other in this space. The exact details of what's considered "similar" and how
+    "distance" is measured in this space are dependent on the specific embedding model.
+    This abstraction contains a method for embedding a list of documents and a method
+    for embedding a query text. The embedding of a query text is expected to be a single
+    vector, while the embedding of a list of documents is expected to be a list of
+    vectors.
+    Usually the query embedding is identical to the document embedding, but the
+    abstraction allows treating them independently.
+    In addition to the synchronous methods, this interface also provides asynchronous
+    versions of the methods.
+    By default, the asynchronous methods are implemented using the synchronous methods;
+    however, implementations may choose to override the asynchronous methods with
+    an async native implementation for performance reasons.
+    """
+    @abstractmethod
+    def embed_documents(self, texts: list[str]) -> list[list[float]]:
+        """Embed search docs.
+        Args:
+            texts: List of text to embed.
+        Returns:
+            List of embeddings.
+        """
+    @abstractmethod
+    def embed_query(self, text: str) -> list[float]:
+        """Embed query text.
+        Args:
+            text: Text to embed.
+        Returns:
+            Embedding.
+        """
+    async def aembed_documents(self, texts: list[str]) -> list[list[float]]:
+        """Asynchronous Embed search docs.
+        Args:
+            texts: List of text to embed.
+        Returns:
+            List of embeddings.
+        """
+        return await run_in_executor(None, self.embed_documents, texts)
+    async def aembed_query(self, text: str) -> list[float]:
+        """Asynchronous Embed query text.
+        Args:
+            text: Text to embed.
+        Returns:
+            Embedding.
+        """
+        return await run_in_executor(None, self.embed_query, text)

aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py ADDED Viewed

@@ -0,0 +1,114 @@
+"""
+Embedding class using HuggingFace model based on LangChain Embeddings class.
+"""
+from typing import List
+import torch
+from transformers import AutoModel, AutoTokenizer, AutoConfig
+from .embeddings import Embeddings
+class EmbeddingWithHuggingFace(Embeddings):
+    """
+    Embedding class using HuggingFace model based on LangChain Embeddings class.
+    """
+    def __init__(
+        self,
+        model_name: str,
+        model_cache_dir: str = None,
+        truncation: bool = True,
+        device: str = "cpu",
+    ):
+        """
+        Initialize the EmbeddingWithHuggingFace class.
+        Args:
+            model_name: The name of the HuggingFace model to be used.
+            model_cache_dir: The directory to cache the HuggingFace model.
+            truncation: The truncation flag for the HuggingFace tokenizer.
+            return_tensors: The return_tensors flag for the HuggingFace tokenizer.
+            device: The device to run the model on.
+        """
+        # Set parameters
+        self.model_name = model_name
+        self.model_cache_dir = model_cache_dir
+        self.truncation = truncation
+        self.device = device
+        # Try to load the model from HuggingFace Hub
+        try:
+            AutoConfig.from_pretrained(self.model_name)
+        except EnvironmentError as e:
+            raise ValueError(
+                f"Model {self.model_name} is not available on HuggingFace Hub."
+            ) from e
+        # Load HuggingFace tokenizer and model
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            self.model_name, cache_dir=self.model_cache_dir
+        )
+        self.model = AutoModel.from_pretrained(
+            self.model_name, cache_dir=self.model_cache_dir
+        )
+    def meanpooling(self, output, mask) -> torch.Tensor:
+        """
+        Mean Pooling - Take attention mask into account for correct averaging.
+        According to the following documentation:
+        https://huggingface.co/NeuML/pubmedbert-base-embeddings
+        Args:
+            output: The output of the model.
+            mask: The mask of the model.
+        """
+        embeddings = output[0] # First element of model_output contains all token embeddings
+        mask = mask.unsqueeze(-1).expand(embeddings.size()).float()
+        return torch.sum(embeddings * mask, 1) / torch.clamp(mask.sum(1), min=1e-9)
+    def embed_documents(self, texts: List[str]) -> List[float]:
+        """
+        Generate embedding for a list of input texts using HuggingFace model.
+        Args:
+            texts: The list of texts to be embedded.
+        Returns:
+            The list of embeddings for the given texts.
+        """
+        # Generate the embedding
+        with torch.no_grad():
+            inputs = self.tokenizer(
+                texts,
+                padding=True,
+                truncation=self.truncation,
+                return_tensors="pt",
+            ).to(self.device)
+            outputs = self.model.to(self.device)(**inputs)
+            embeddings = self.meanpooling(outputs, inputs['attention_mask']).cpu()
+        return embeddings
+    def embed_query(self, text: str) -> List[float]:
+        """
+        Generate embeddings for an input text using HuggingFace model.
+        Args:
+            text: A query to be embedded.
+        Returns:
+            The embeddings for the given query.
+        """
+        # Generate the embedding
+        with torch.no_grad():
+            inputs = self.tokenizer(
+                text,
+                padding=True,
+                truncation=self.truncation,
+                return_tensors="pt",
+            ).to(self.device)
+            outputs = self.model.to(self.device)(**inputs)
+            embeddings = self.meanpooling(outputs, inputs['attention_mask']).cpu()[0]
+        return embeddings

aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py ADDED Viewed

@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+"""
+Embedding class using SentenceTransformer model based on LangChain Embeddings class.
+"""
+from typing import List
+from sentence_transformers import SentenceTransformer
+from .embeddings import Embeddings
+class EmbeddingWithSentenceTransformer(Embeddings):
+    """
+    Embedding class using SentenceTransformer model based on LangChain Embeddings class.
+    """
+    def __init__(
+        self,
+        model_name: str,
+        model_cache_dir: str = None,
+        trust_remote_code: bool = True,
+    ):
+        """
+        Initialize the EmbeddingWithSentenceTransformer class.
+        Args:
+            model_name: The name of the SentenceTransformer model to be used.
+            model_cache_dir: The directory to cache the SentenceTransformer model.
+            trust_remote_code: Whether to trust the remote code of the model.
+        """
+        # Set parameters
+        self.model_name = model_name
+        self.model_cache_dir = model_cache_dir
+        self.trust_remote_code = trust_remote_code
+        # Load the model
+        self.model = SentenceTransformer(self.model_name,
+                                         cache_folder=self.model_cache_dir,
+                                         trust_remote_code=self.trust_remote_code)
+    def embed_documents(self, texts: List[str]) -> List[float]:
+        """
+        Generate embedding for a list of input texts using SentenceTransformer model.
+        Args:
+            texts: The list of texts to be embedded.
+        Returns:
+            The list of embeddings for the given texts.
+        """
+        # Generate the embedding
+        embeddings = self.model.encode(texts, show_progress_bar=False)
+        return embeddings
+    def embed_query(self, text: str) -> List[float]:
+        """
+        Generate embeddings for an input text using SentenceTransformer model.
+        Args:
+            text: A query to be embedded.
+        Returns:
+            The embeddings for the given query.
+        """
+        # Generate the embedding
+        embeddings = self.model.encode(text, show_progress_bar=False)
+        return embeddings

aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py ADDED Viewed

@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+'''A utility module for knowledge graph operations'''
+from typing import Tuple
+import networkx as nx
+import pandas as pd
+def kg_to_df_pandas(kg: nx.DiGraph) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    """
+    Convert a directed knowledge graph to a pandas DataFrame.
+    Args:
+        kg: The directed knowledge graph in networkX format.
+    Returns:
+        df_nodes: A pandas DataFrame of the nodes in the knowledge graph.
+        df_edges: A pandas DataFrame of the edges in the knowledge graph.
+    """
+    # Create a pandas DataFrame of the nodes
+    df_nodes = pd.DataFrame.from_dict(kg.nodes, orient='index')
+    # Create a pandas DataFrame of the edges
+    df_edges = nx.to_pandas_edgelist(kg,
+                                    source='node_source',
+                                    target='node_target')
+    return df_nodes, df_edges
+def df_pandas_to_kg(df: pd.DataFrame,
+                    df_nodes_attrs: pd.DataFrame,
+                    node_source: str,
+                    node_target: str
+                    ) -> nx.DiGraph:
+    """
+    Convert a pandas DataFrame to a directed knowledge graph.
+    Args:
+        df: A pandas DataFrame of the edges in the knowledge graph.
+        df_nodes_attrs: A pandas DataFrame of the nodes in the knowledge graph.
+        node_source: The column name of the source node in the df.
+        node_target: The column name of the target node in the df.
+    Returns:
+        kg: The directed knowledge graph in networkX format.
+    """
+    # Assert if the columns node_source and node_target are in the df
+    assert node_source in df.columns, f'{node_source} not in df'
+    assert node_target in df.columns, f'{node_target} not in df'
+    # Assert that the nodes in the index of the df_nodes_attrs
+    # are present in the source and target columns of the df
+    assert set(df_nodes_attrs.index).issubset(set(df[node_source]).\
+                                        union(set(df[node_target]))), \
+                                        'Nodes in index of df_nodes not found in df_edges'
+    # Create a knowledge graph from the dataframes
+    # Add edges and nodes to the knowledge graph
+    kg = nx.from_pandas_edgelist(df,
+                                source=node_source,
+                                target=node_target,
+                                create_using=nx.DiGraph,
+                                edge_attr=True)
+    kg.add_nodes_from(df_nodes_attrs.to_dict('index').items())
+    return kg

{aiagents4pharma-1.5.4.dist-info → aiagents4pharma-1.6.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: aiagents4pharma
-Version: 1.5.4
+Version: 1.6.0
 Summary: AI Agents for drug discovery, drug development, and other pharmaceutical R&D
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: MIT License
@@ -10,6 +10,7 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: copasi_basico==0.78
 Requires-Dist: coverage==7.6.4
+Requires-Dist: einops==0.8.0
 Requires-Dist: gdown==5.2.0
 Requires-Dist: huggingface_hub==0.26.5
 Requires-Dist: joblib==1.4.2
@@ -25,10 +26,14 @@ Requires-Dist: plotly==5.24.1
 Requires-Dist: pydantic==2.9.2
 Requires-Dist: pylint==3.3.1
 Requires-Dist: pytest==8.3.3
+Requires-Dist: pytest-asyncio==0.25.2
 Requires-Dist: streamlit==1.39.0
+Requires-Dist: sentence_transformers==3.3.1
 Requires-Dist: tabulate==0.9.0
-Requires-Dist: torch==2.5.1
+Requires-Dist: torch==2.2.2
+Requires-Dist: torch_geometric==2.6.1
 Requires-Dist: tqdm==4.66.6
+Requires-Dist: transformers==4.48.0
 Requires-Dist: mkdocs==1.6.1
 Requires-Dist: mkdocs-jupyter==0.25.1
 Requires-Dist: mkdocs-material==9.5.47

{aiagents4pharma-1.5.4.dist-info → aiagents4pharma-1.6.0.dist-info}/RECORD RENAMED Viewed

@@ -16,8 +16,14 @@ aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py,sha256=QlzDXm
 aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py,sha256=-LaPLse8BkALqwFetNK7wch2dt9Dz6QKGKZKBKM6bIk,409
 aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py,sha256=KBMhCJ7yjMWqQJJctFYdpjYAlwv48Jl6i1dddXP4f08,7599
 aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py,sha256=Y-6-nORsnBJlU6rH0skyfr9S9J4PfTWK-af_p5UuknQ,7483
-aiagents4pharma-1.5.4.dist-info/LICENSE,sha256=IcIbyB1Hyk5ZDah03VNQvJkbNk2hkBCDqQ8qtnCvB4Q,1077
-aiagents4pharma-1.5.4.dist-info/METADATA,sha256=h2EUjQ_tbIGPYJkx7solW6NNWArVlykxyZkLI9uY0Gk,6746
-aiagents4pharma-1.5.4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-aiagents4pharma-1.5.4.dist-info/top_level.txt,sha256=-AH8rMmrSnJtq7HaAObS78UU-cTCwvX660dSxeM7a0A,16
-aiagents4pharma-1.5.4.dist-info/RECORD,,
+aiagents4pharma/talk2knowledgegraphs/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py,sha256=6vQnPkeOWae_8jePjhma3sJuMTngy0I0tqzdFt6OqKg,2507
+aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py,sha256=xRb0x7SoAb0nSVZYgjrqxWvENOMDuqIdL43NMjoOaCs,153
+aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py,sha256=1nGznrAj-xT0xuSMBGz2dOujJ7M_IwSR84njxtxsy9A,2523
+aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py,sha256=2vi_elf6EgzfagFAO5QnL3a_aXZyN7B1EBziu44MTfM,3806
+aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py,sha256=36iKlisOpMtGR5xfTAlSHXWvPqVC_Jbezod8kbBBMVg,2136
+aiagents4pharma-1.6.0.dist-info/LICENSE,sha256=IcIbyB1Hyk5ZDah03VNQvJkbNk2hkBCDqQ8qtnCvB4Q,1077
+aiagents4pharma-1.6.0.dist-info/METADATA,sha256=kad1BCtIEP5RVf2x6uI_w4UJyM1mnDZz2R2JT7ilnZo,6931
+aiagents4pharma-1.6.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+aiagents4pharma-1.6.0.dist-info/top_level.txt,sha256=-AH8rMmrSnJtq7HaAObS78UU-cTCwvX660dSxeM7a0A,16
+aiagents4pharma-1.6.0.dist-info/RECORD,,

{aiagents4pharma-1.5.4.dist-info → aiagents4pharma-1.6.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{aiagents4pharma-1.5.4.dist-info → aiagents4pharma-1.6.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{aiagents4pharma-1.5.4.dist-info → aiagents4pharma-1.6.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

aiagents4pharma 1.5.4__py3-none-any.whl → 1.6.0__py3-none-any.whl

aiagents4pharma 1.5.4py3-none-any.whl → 1.6.0py3-none-any.whl