aiagents4pharma 1.5.4__tar.gz → 1.6.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/PKG-INFO +11 -5
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/README.md +3 -3
- aiagents4pharma-1.6.1/aiagents4pharma/talk2cells/__init__.py +6 -0
- aiagents4pharma-1.6.1/aiagents4pharma/talk2cells/agents/__init__.py +5 -0
- aiagents4pharma-1.6.1/aiagents4pharma/talk2cells/agents/scp_agent.py +85 -0
- aiagents4pharma-1.6.1/aiagents4pharma/talk2cells/states/__init__.py +5 -0
- aiagents4pharma-1.6.1/aiagents4pharma/talk2cells/states/state_talk2cells.py +13 -0
- aiagents4pharma-1.6.1/aiagents4pharma/talk2cells/tools/__init__.py +5 -0
- aiagents4pharma-1.6.1/aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +0 -0
- aiagents4pharma-1.6.1/aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +6 -0
- aiagents4pharma-1.6.1/aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +77 -0
- aiagents4pharma-1.6.1/aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +114 -0
- aiagents4pharma-1.6.1/aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +71 -0
- aiagents4pharma-1.6.1/aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +68 -0
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma.egg-info/PKG-INFO +11 -5
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma.egg-info/SOURCES.txt +13 -1
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma.egg-info/requires.txt +7 -1
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/pyproject.toml +14 -2
- aiagents4pharma-1.6.1/release_version.txt +1 -0
- aiagents4pharma-1.5.4/release_version.txt +0 -1
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/LICENSE +0 -0
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma/__init__.py +0 -0
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma/talk2biomodels/__init__.py +0 -0
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma/talk2biomodels/models/__init__.py +0 -0
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma/talk2biomodels/models/basico_model.py +0 -0
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma/talk2biomodels/models/sys_bio_model.py +0 -0
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma/talk2biomodels/tools/__init__.py +0 -0
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma/talk2biomodels/tools/ask_question.py +0 -0
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma/talk2biomodels/tools/custom_plotter.py +0 -0
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma/talk2biomodels/tools/fetch_parameters.py +0 -0
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma/talk2biomodels/tools/model_description.py +0 -0
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma/talk2biomodels/tools/search_models.py +0 -0
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma/talk2biomodels/tools/simulate_model.py +0 -0
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma/talk2knowledgegraphs/__init__.py +0 -0
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +0 -0
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +0 -0
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +0 -0
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +0 -0
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +0 -0
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma.egg-info/dependency_links.txt +0 -0
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma.egg-info/top_level.txt +0 -0
- {aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: aiagents4pharma
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.6.1
|
4
4
|
Summary: AI Agents for drug discovery, drug development, and other pharmaceutical R&D
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
6
6
|
Classifier: License :: OSI Approved :: MIT License
|
@@ -10,6 +10,7 @@ Description-Content-Type: text/markdown
|
|
10
10
|
License-File: LICENSE
|
11
11
|
Requires-Dist: copasi_basico==0.78
|
12
12
|
Requires-Dist: coverage==7.6.4
|
13
|
+
Requires-Dist: einops==0.8.0
|
13
14
|
Requires-Dist: gdown==5.2.0
|
14
15
|
Requires-Dist: huggingface_hub==0.26.5
|
15
16
|
Requires-Dist: joblib==1.4.2
|
@@ -18,6 +19,7 @@ Requires-Dist: langchain-community==0.3.5
|
|
18
19
|
Requires-Dist: langchain-core==0.3.15
|
19
20
|
Requires-Dist: langchain-experimental==0.3.3
|
20
21
|
Requires-Dist: langchain-openai==0.2.5
|
22
|
+
Requires-Dist: langgraph==0.2.62
|
21
23
|
Requires-Dist: matplotlib==3.9.2
|
22
24
|
Requires-Dist: openai==1.59.4
|
23
25
|
Requires-Dist: pandas==2.2.3
|
@@ -25,10 +27,14 @@ Requires-Dist: plotly==5.24.1
|
|
25
27
|
Requires-Dist: pydantic==2.9.2
|
26
28
|
Requires-Dist: pylint==3.3.1
|
27
29
|
Requires-Dist: pytest==8.3.3
|
30
|
+
Requires-Dist: pytest-asyncio==0.25.2
|
28
31
|
Requires-Dist: streamlit==1.39.0
|
32
|
+
Requires-Dist: sentence_transformers==3.3.1
|
29
33
|
Requires-Dist: tabulate==0.9.0
|
30
|
-
Requires-Dist: torch==2.
|
34
|
+
Requires-Dist: torch==2.2.2
|
35
|
+
Requires-Dist: torch_geometric==2.6.1
|
31
36
|
Requires-Dist: tqdm==4.66.6
|
37
|
+
Requires-Dist: transformers==4.48.0
|
32
38
|
Requires-Dist: mkdocs==1.6.1
|
33
39
|
Requires-Dist: mkdocs-jupyter==0.25.1
|
34
40
|
Requires-Dist: mkdocs-material==9.5.47
|
@@ -46,8 +52,8 @@ Welcome to **AIAgents4Pharma** – an open-source project by [Team VPE](https://
|
|
46
52
|
Our toolkit currently consists of three intelligent agents, each designed to simplify and enhance access to specialized data in biology:
|
47
53
|
|
48
54
|
- **Talk2BioModels**: Engage directly with mathematical models in systems biology.
|
49
|
-
- **Talk2Cells** *(
|
50
|
-
- **Talk2KnowledgeGraphs** *(
|
55
|
+
- **Talk2Cells** *(Work in progress)*: Query and analyze sequencing data with ease.
|
56
|
+
- **Talk2KnowledgeGraphs** *(Work in progress)*: Access and explore complex biological knowledge graphs for insightful data connections.
|
51
57
|
|
52
58
|
---
|
53
59
|
|
@@ -61,7 +67,7 @@ Our toolkit currently consists of three intelligent agents, each designed to sim
|
|
61
67
|
- Adjust parameters within the model to simulate different conditions.
|
62
68
|
- Query simulation results.
|
63
69
|
|
64
|
-
### 2. Talk2Cells *(
|
70
|
+
### 2. Talk2Cells *(Work in Progress)*
|
65
71
|
|
66
72
|
**Talk2Cells** is being developed to provide direct access to and analysis of sequencing data, such as RNA-Seq or DNA-Seq, using natural language.
|
67
73
|
|
@@ -7,8 +7,8 @@ Welcome to **AIAgents4Pharma** – an open-source project by [Team VPE](https://
|
|
7
7
|
Our toolkit currently consists of three intelligent agents, each designed to simplify and enhance access to specialized data in biology:
|
8
8
|
|
9
9
|
- **Talk2BioModels**: Engage directly with mathematical models in systems biology.
|
10
|
-
- **Talk2Cells** *(
|
11
|
-
- **Talk2KnowledgeGraphs** *(
|
10
|
+
- **Talk2Cells** *(Work in progress)*: Query and analyze sequencing data with ease.
|
11
|
+
- **Talk2KnowledgeGraphs** *(Work in progress)*: Access and explore complex biological knowledge graphs for insightful data connections.
|
12
12
|
|
13
13
|
---
|
14
14
|
|
@@ -22,7 +22,7 @@ Our toolkit currently consists of three intelligent agents, each designed to sim
|
|
22
22
|
- Adjust parameters within the model to simulate different conditions.
|
23
23
|
- Query simulation results.
|
24
24
|
|
25
|
-
### 2. Talk2Cells *(
|
25
|
+
### 2. Talk2Cells *(Work in Progress)*
|
26
26
|
|
27
27
|
**Talk2Cells** is being developed to provide direct access to and analysis of sequencing data, such as RNA-Seq or DNA-Seq, using natural language.
|
28
28
|
|
@@ -0,0 +1,85 @@
|
|
1
|
+
#/usr/bin/env python3
|
2
|
+
|
3
|
+
'''
|
4
|
+
This is the agent file for the Talk2Cells graph.
|
5
|
+
'''
|
6
|
+
|
7
|
+
import logging
|
8
|
+
import os
|
9
|
+
from langchain_openai import ChatOpenAI
|
10
|
+
from langgraph.checkpoint.memory import MemorySaver
|
11
|
+
from langgraph.graph import START, StateGraph
|
12
|
+
from langgraph.prebuilt import create_react_agent, ToolNode
|
13
|
+
from ..tools.scp_agent.search_studies import search_studies
|
14
|
+
from ..tools.scp_agent.display_studies import display_studies
|
15
|
+
from ..states.state_talk2cells import Talk2Cells
|
16
|
+
|
17
|
+
# Initialize logger
|
18
|
+
logging.basicConfig(level=logging.INFO)
|
19
|
+
logger = logging.getLogger(__name__)
|
20
|
+
|
21
|
+
def get_app(uniq_id):
|
22
|
+
'''
|
23
|
+
This function returns the langraph app.
|
24
|
+
'''
|
25
|
+
def agent_scp_node(state: Talk2Cells):
|
26
|
+
'''
|
27
|
+
This function calls the model.
|
28
|
+
'''
|
29
|
+
logger.log(logging.INFO, "Creating SCP_Agent node with thread_id %s", uniq_id)
|
30
|
+
# Get the messages from the state
|
31
|
+
# messages = state['messages']
|
32
|
+
# Call the model
|
33
|
+
# inputs = {'messages': messages}
|
34
|
+
response = model.invoke(state, {"configurable": {"thread_id": uniq_id}})
|
35
|
+
# The response is a list of messages and may contain `tool calls`
|
36
|
+
# We return a list, because this will get added to the existing list
|
37
|
+
# return {"messages": [response]}
|
38
|
+
return response
|
39
|
+
|
40
|
+
# Define the tools
|
41
|
+
# tools = [search_studies, display_studies]
|
42
|
+
tools = ToolNode([search_studies, display_studies])
|
43
|
+
|
44
|
+
# Create the LLM
|
45
|
+
# And bind the tools to it
|
46
|
+
# model = ChatOpenAI(model="gpt-4o-mini", temperature=0).bind_tools(tools)
|
47
|
+
|
48
|
+
# Create an environment variable to store the LLM model
|
49
|
+
# Check if the environment variable AIAGENTS4PHARMA_LLM_MODEL is set
|
50
|
+
# If not, set it to 'gpt-4o-mini'
|
51
|
+
llm_model = os.getenv('AIAGENTS4PHARMA_LLM_MODEL', 'gpt-4o-mini')
|
52
|
+
# print (f'LLM model: {llm_model}')
|
53
|
+
# llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
|
54
|
+
llm = ChatOpenAI(model=llm_model, temperature=0)
|
55
|
+
model = create_react_agent(
|
56
|
+
llm,
|
57
|
+
tools=tools,
|
58
|
+
state_schema=Talk2Cells,
|
59
|
+
state_modifier=(
|
60
|
+
"You are Talk2Cells agent."
|
61
|
+
),
|
62
|
+
checkpointer=MemorySaver()
|
63
|
+
)
|
64
|
+
|
65
|
+
# Define a new graph
|
66
|
+
workflow = StateGraph(Talk2Cells)
|
67
|
+
|
68
|
+
# Define the two nodes we will cycle between
|
69
|
+
workflow.add_node("agent_scp", agent_scp_node)
|
70
|
+
|
71
|
+
# Set the entrypoint as `agent`
|
72
|
+
# This means that this node is the first one called
|
73
|
+
workflow.add_edge(START, "agent_scp")
|
74
|
+
|
75
|
+
# Initialize memory to persist state between graph runs
|
76
|
+
checkpointer = MemorySaver()
|
77
|
+
|
78
|
+
# Finally, we compile it!
|
79
|
+
# This compiles it into a LangChain Runnable,
|
80
|
+
# meaning you can use it as you would any other runnable.
|
81
|
+
# Note that we're (optionally) passing the memory when compiling the graph
|
82
|
+
app = workflow.compile(checkpointer=checkpointer)
|
83
|
+
logger.log(logging.INFO, "Compiled the graph")
|
84
|
+
|
85
|
+
return app
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
|
3
|
+
'''
|
4
|
+
This is the state file for the Talk2Cells agent.
|
5
|
+
'''
|
6
|
+
|
7
|
+
from langgraph.prebuilt.chat_agent_executor import AgentState
|
8
|
+
|
9
|
+
class Talk2Cells(AgentState):
|
10
|
+
"""
|
11
|
+
The state for the Talk2Cells agent.
|
12
|
+
"""
|
13
|
+
search_table: str
|
File without changes
|
@@ -0,0 +1,77 @@
|
|
1
|
+
"""
|
2
|
+
Embeddings interface from LangChain Core.
|
3
|
+
https://github.com/langchain-ai/langchain/blob/master/libs/core/langchain_core/embeddings/embeddings.py
|
4
|
+
"""
|
5
|
+
from abc import ABC, abstractmethod
|
6
|
+
from langchain_core.runnables.config import run_in_executor
|
7
|
+
|
8
|
+
class Embeddings(ABC):
|
9
|
+
"""Interface for embedding models.
|
10
|
+
|
11
|
+
This is an interface meant for implementing text embedding models.
|
12
|
+
|
13
|
+
Text embedding models are used to map text to a vector (a point in n-dimensional
|
14
|
+
space).
|
15
|
+
|
16
|
+
Texts that are similar will usually be mapped to points that are close to each
|
17
|
+
other in this space. The exact details of what's considered "similar" and how
|
18
|
+
"distance" is measured in this space are dependent on the specific embedding model.
|
19
|
+
|
20
|
+
This abstraction contains a method for embedding a list of documents and a method
|
21
|
+
for embedding a query text. The embedding of a query text is expected to be a single
|
22
|
+
vector, while the embedding of a list of documents is expected to be a list of
|
23
|
+
vectors.
|
24
|
+
|
25
|
+
Usually the query embedding is identical to the document embedding, but the
|
26
|
+
abstraction allows treating them independently.
|
27
|
+
|
28
|
+
In addition to the synchronous methods, this interface also provides asynchronous
|
29
|
+
versions of the methods.
|
30
|
+
|
31
|
+
By default, the asynchronous methods are implemented using the synchronous methods;
|
32
|
+
however, implementations may choose to override the asynchronous methods with
|
33
|
+
an async native implementation for performance reasons.
|
34
|
+
"""
|
35
|
+
@abstractmethod
|
36
|
+
def embed_documents(self, texts: list[str]) -> list[list[float]]:
|
37
|
+
"""Embed search docs.
|
38
|
+
|
39
|
+
Args:
|
40
|
+
texts: List of text to embed.
|
41
|
+
|
42
|
+
Returns:
|
43
|
+
List of embeddings.
|
44
|
+
"""
|
45
|
+
|
46
|
+
@abstractmethod
|
47
|
+
def embed_query(self, text: str) -> list[float]:
|
48
|
+
"""Embed query text.
|
49
|
+
|
50
|
+
Args:
|
51
|
+
text: Text to embed.
|
52
|
+
|
53
|
+
Returns:
|
54
|
+
Embedding.
|
55
|
+
"""
|
56
|
+
|
57
|
+
async def aembed_documents(self, texts: list[str]) -> list[list[float]]:
|
58
|
+
"""Asynchronous Embed search docs.
|
59
|
+
|
60
|
+
Args:
|
61
|
+
texts: List of text to embed.
|
62
|
+
|
63
|
+
Returns:
|
64
|
+
List of embeddings.
|
65
|
+
"""
|
66
|
+
return await run_in_executor(None, self.embed_documents, texts)
|
67
|
+
|
68
|
+
async def aembed_query(self, text: str) -> list[float]:
|
69
|
+
"""Asynchronous Embed query text.
|
70
|
+
|
71
|
+
Args:
|
72
|
+
text: Text to embed.
|
73
|
+
|
74
|
+
Returns:
|
75
|
+
Embedding.
|
76
|
+
"""
|
77
|
+
return await run_in_executor(None, self.embed_query, text)
|
@@ -0,0 +1,114 @@
|
|
1
|
+
"""
|
2
|
+
Embedding class using HuggingFace model based on LangChain Embeddings class.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import List
|
6
|
+
import torch
|
7
|
+
from transformers import AutoModel, AutoTokenizer, AutoConfig
|
8
|
+
from .embeddings import Embeddings
|
9
|
+
|
10
|
+
class EmbeddingWithHuggingFace(Embeddings):
|
11
|
+
"""
|
12
|
+
Embedding class using HuggingFace model based on LangChain Embeddings class.
|
13
|
+
"""
|
14
|
+
|
15
|
+
def __init__(
|
16
|
+
self,
|
17
|
+
model_name: str,
|
18
|
+
model_cache_dir: str = None,
|
19
|
+
truncation: bool = True,
|
20
|
+
device: str = "cpu",
|
21
|
+
):
|
22
|
+
"""
|
23
|
+
Initialize the EmbeddingWithHuggingFace class.
|
24
|
+
|
25
|
+
Args:
|
26
|
+
model_name: The name of the HuggingFace model to be used.
|
27
|
+
model_cache_dir: The directory to cache the HuggingFace model.
|
28
|
+
truncation: The truncation flag for the HuggingFace tokenizer.
|
29
|
+
return_tensors: The return_tensors flag for the HuggingFace tokenizer.
|
30
|
+
device: The device to run the model on.
|
31
|
+
"""
|
32
|
+
|
33
|
+
# Set parameters
|
34
|
+
self.model_name = model_name
|
35
|
+
self.model_cache_dir = model_cache_dir
|
36
|
+
self.truncation = truncation
|
37
|
+
self.device = device
|
38
|
+
|
39
|
+
# Try to load the model from HuggingFace Hub
|
40
|
+
try:
|
41
|
+
AutoConfig.from_pretrained(self.model_name)
|
42
|
+
except EnvironmentError as e:
|
43
|
+
raise ValueError(
|
44
|
+
f"Model {self.model_name} is not available on HuggingFace Hub."
|
45
|
+
) from e
|
46
|
+
|
47
|
+
# Load HuggingFace tokenizer and model
|
48
|
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
49
|
+
self.model_name, cache_dir=self.model_cache_dir
|
50
|
+
)
|
51
|
+
self.model = AutoModel.from_pretrained(
|
52
|
+
self.model_name, cache_dir=self.model_cache_dir
|
53
|
+
)
|
54
|
+
|
55
|
+
def meanpooling(self, output, mask) -> torch.Tensor:
|
56
|
+
"""
|
57
|
+
Mean Pooling - Take attention mask into account for correct averaging.
|
58
|
+
According to the following documentation:
|
59
|
+
https://huggingface.co/NeuML/pubmedbert-base-embeddings
|
60
|
+
|
61
|
+
Args:
|
62
|
+
output: The output of the model.
|
63
|
+
mask: The mask of the model.
|
64
|
+
"""
|
65
|
+
embeddings = output[0] # First element of model_output contains all token embeddings
|
66
|
+
mask = mask.unsqueeze(-1).expand(embeddings.size()).float()
|
67
|
+
return torch.sum(embeddings * mask, 1) / torch.clamp(mask.sum(1), min=1e-9)
|
68
|
+
|
69
|
+
def embed_documents(self, texts: List[str]) -> List[float]:
|
70
|
+
"""
|
71
|
+
Generate embedding for a list of input texts using HuggingFace model.
|
72
|
+
|
73
|
+
Args:
|
74
|
+
texts: The list of texts to be embedded.
|
75
|
+
|
76
|
+
Returns:
|
77
|
+
The list of embeddings for the given texts.
|
78
|
+
"""
|
79
|
+
|
80
|
+
# Generate the embedding
|
81
|
+
with torch.no_grad():
|
82
|
+
inputs = self.tokenizer(
|
83
|
+
texts,
|
84
|
+
padding=True,
|
85
|
+
truncation=self.truncation,
|
86
|
+
return_tensors="pt",
|
87
|
+
).to(self.device)
|
88
|
+
outputs = self.model.to(self.device)(**inputs)
|
89
|
+
embeddings = self.meanpooling(outputs, inputs['attention_mask']).cpu()
|
90
|
+
|
91
|
+
return embeddings
|
92
|
+
|
93
|
+
def embed_query(self, text: str) -> List[float]:
|
94
|
+
"""
|
95
|
+
Generate embeddings for an input text using HuggingFace model.
|
96
|
+
|
97
|
+
Args:
|
98
|
+
text: A query to be embedded.
|
99
|
+
Returns:
|
100
|
+
The embeddings for the given query.
|
101
|
+
"""
|
102
|
+
|
103
|
+
# Generate the embedding
|
104
|
+
with torch.no_grad():
|
105
|
+
inputs = self.tokenizer(
|
106
|
+
text,
|
107
|
+
padding=True,
|
108
|
+
truncation=self.truncation,
|
109
|
+
return_tensors="pt",
|
110
|
+
).to(self.device)
|
111
|
+
outputs = self.model.to(self.device)(**inputs)
|
112
|
+
embeddings = self.meanpooling(outputs, inputs['attention_mask']).cpu()[0]
|
113
|
+
|
114
|
+
return embeddings
|
aiagents4pharma-1.6.1/aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
|
3
|
+
"""
|
4
|
+
Embedding class using SentenceTransformer model based on LangChain Embeddings class.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from typing import List
|
8
|
+
from sentence_transformers import SentenceTransformer
|
9
|
+
from .embeddings import Embeddings
|
10
|
+
|
11
|
+
|
12
|
+
class EmbeddingWithSentenceTransformer(Embeddings):
|
13
|
+
"""
|
14
|
+
Embedding class using SentenceTransformer model based on LangChain Embeddings class.
|
15
|
+
"""
|
16
|
+
|
17
|
+
def __init__(
|
18
|
+
self,
|
19
|
+
model_name: str,
|
20
|
+
model_cache_dir: str = None,
|
21
|
+
trust_remote_code: bool = True,
|
22
|
+
):
|
23
|
+
"""
|
24
|
+
Initialize the EmbeddingWithSentenceTransformer class.
|
25
|
+
|
26
|
+
Args:
|
27
|
+
model_name: The name of the SentenceTransformer model to be used.
|
28
|
+
model_cache_dir: The directory to cache the SentenceTransformer model.
|
29
|
+
trust_remote_code: Whether to trust the remote code of the model.
|
30
|
+
"""
|
31
|
+
|
32
|
+
# Set parameters
|
33
|
+
self.model_name = model_name
|
34
|
+
self.model_cache_dir = model_cache_dir
|
35
|
+
self.trust_remote_code = trust_remote_code
|
36
|
+
|
37
|
+
# Load the model
|
38
|
+
self.model = SentenceTransformer(self.model_name,
|
39
|
+
cache_folder=self.model_cache_dir,
|
40
|
+
trust_remote_code=self.trust_remote_code)
|
41
|
+
|
42
|
+
def embed_documents(self, texts: List[str]) -> List[float]:
|
43
|
+
"""
|
44
|
+
Generate embedding for a list of input texts using SentenceTransformer model.
|
45
|
+
|
46
|
+
Args:
|
47
|
+
texts: The list of texts to be embedded.
|
48
|
+
|
49
|
+
Returns:
|
50
|
+
The list of embeddings for the given texts.
|
51
|
+
"""
|
52
|
+
|
53
|
+
# Generate the embedding
|
54
|
+
embeddings = self.model.encode(texts, show_progress_bar=False)
|
55
|
+
|
56
|
+
return embeddings
|
57
|
+
|
58
|
+
def embed_query(self, text: str) -> List[float]:
|
59
|
+
"""
|
60
|
+
Generate embeddings for an input text using SentenceTransformer model.
|
61
|
+
|
62
|
+
Args:
|
63
|
+
text: A query to be embedded.
|
64
|
+
Returns:
|
65
|
+
The embeddings for the given query.
|
66
|
+
"""
|
67
|
+
|
68
|
+
# Generate the embedding
|
69
|
+
embeddings = self.model.encode(text, show_progress_bar=False)
|
70
|
+
|
71
|
+
return embeddings
|
@@ -0,0 +1,68 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
|
3
|
+
'''A utility module for knowledge graph operations'''
|
4
|
+
|
5
|
+
from typing import Tuple
|
6
|
+
import networkx as nx
|
7
|
+
import pandas as pd
|
8
|
+
|
9
|
+
def kg_to_df_pandas(kg: nx.DiGraph) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
10
|
+
"""
|
11
|
+
Convert a directed knowledge graph to a pandas DataFrame.
|
12
|
+
|
13
|
+
Args:
|
14
|
+
kg: The directed knowledge graph in networkX format.
|
15
|
+
|
16
|
+
Returns:
|
17
|
+
df_nodes: A pandas DataFrame of the nodes in the knowledge graph.
|
18
|
+
df_edges: A pandas DataFrame of the edges in the knowledge graph.
|
19
|
+
"""
|
20
|
+
|
21
|
+
# Create a pandas DataFrame of the nodes
|
22
|
+
df_nodes = pd.DataFrame.from_dict(kg.nodes, orient='index')
|
23
|
+
|
24
|
+
# Create a pandas DataFrame of the edges
|
25
|
+
df_edges = nx.to_pandas_edgelist(kg,
|
26
|
+
source='node_source',
|
27
|
+
target='node_target')
|
28
|
+
|
29
|
+
return df_nodes, df_edges
|
30
|
+
|
31
|
+
def df_pandas_to_kg(df: pd.DataFrame,
|
32
|
+
df_nodes_attrs: pd.DataFrame,
|
33
|
+
node_source: str,
|
34
|
+
node_target: str
|
35
|
+
) -> nx.DiGraph:
|
36
|
+
"""
|
37
|
+
Convert a pandas DataFrame to a directed knowledge graph.
|
38
|
+
|
39
|
+
Args:
|
40
|
+
df: A pandas DataFrame of the edges in the knowledge graph.
|
41
|
+
df_nodes_attrs: A pandas DataFrame of the nodes in the knowledge graph.
|
42
|
+
node_source: The column name of the source node in the df.
|
43
|
+
node_target: The column name of the target node in the df.
|
44
|
+
|
45
|
+
Returns:
|
46
|
+
kg: The directed knowledge graph in networkX format.
|
47
|
+
"""
|
48
|
+
|
49
|
+
# Assert if the columns node_source and node_target are in the df
|
50
|
+
assert node_source in df.columns, f'{node_source} not in df'
|
51
|
+
assert node_target in df.columns, f'{node_target} not in df'
|
52
|
+
|
53
|
+
# Assert that the nodes in the index of the df_nodes_attrs
|
54
|
+
# are present in the source and target columns of the df
|
55
|
+
assert set(df_nodes_attrs.index).issubset(set(df[node_source]).\
|
56
|
+
union(set(df[node_target]))), \
|
57
|
+
'Nodes in index of df_nodes not found in df_edges'
|
58
|
+
|
59
|
+
# Create a knowledge graph from the dataframes
|
60
|
+
# Add edges and nodes to the knowledge graph
|
61
|
+
kg = nx.from_pandas_edgelist(df,
|
62
|
+
source=node_source,
|
63
|
+
target=node_target,
|
64
|
+
create_using=nx.DiGraph,
|
65
|
+
edge_attr=True)
|
66
|
+
kg.add_nodes_from(df_nodes_attrs.to_dict('index').items())
|
67
|
+
|
68
|
+
return kg
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: aiagents4pharma
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.6.1
|
4
4
|
Summary: AI Agents for drug discovery, drug development, and other pharmaceutical R&D
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
6
6
|
Classifier: License :: OSI Approved :: MIT License
|
@@ -10,6 +10,7 @@ Description-Content-Type: text/markdown
|
|
10
10
|
License-File: LICENSE
|
11
11
|
Requires-Dist: copasi_basico==0.78
|
12
12
|
Requires-Dist: coverage==7.6.4
|
13
|
+
Requires-Dist: einops==0.8.0
|
13
14
|
Requires-Dist: gdown==5.2.0
|
14
15
|
Requires-Dist: huggingface_hub==0.26.5
|
15
16
|
Requires-Dist: joblib==1.4.2
|
@@ -18,6 +19,7 @@ Requires-Dist: langchain-community==0.3.5
|
|
18
19
|
Requires-Dist: langchain-core==0.3.15
|
19
20
|
Requires-Dist: langchain-experimental==0.3.3
|
20
21
|
Requires-Dist: langchain-openai==0.2.5
|
22
|
+
Requires-Dist: langgraph==0.2.62
|
21
23
|
Requires-Dist: matplotlib==3.9.2
|
22
24
|
Requires-Dist: openai==1.59.4
|
23
25
|
Requires-Dist: pandas==2.2.3
|
@@ -25,10 +27,14 @@ Requires-Dist: plotly==5.24.1
|
|
25
27
|
Requires-Dist: pydantic==2.9.2
|
26
28
|
Requires-Dist: pylint==3.3.1
|
27
29
|
Requires-Dist: pytest==8.3.3
|
30
|
+
Requires-Dist: pytest-asyncio==0.25.2
|
28
31
|
Requires-Dist: streamlit==1.39.0
|
32
|
+
Requires-Dist: sentence_transformers==3.3.1
|
29
33
|
Requires-Dist: tabulate==0.9.0
|
30
|
-
Requires-Dist: torch==2.
|
34
|
+
Requires-Dist: torch==2.2.2
|
35
|
+
Requires-Dist: torch_geometric==2.6.1
|
31
36
|
Requires-Dist: tqdm==4.66.6
|
37
|
+
Requires-Dist: transformers==4.48.0
|
32
38
|
Requires-Dist: mkdocs==1.6.1
|
33
39
|
Requires-Dist: mkdocs-jupyter==0.25.1
|
34
40
|
Requires-Dist: mkdocs-material==9.5.47
|
@@ -46,8 +52,8 @@ Welcome to **AIAgents4Pharma** – an open-source project by [Team VPE](https://
|
|
46
52
|
Our toolkit currently consists of three intelligent agents, each designed to simplify and enhance access to specialized data in biology:
|
47
53
|
|
48
54
|
- **Talk2BioModels**: Engage directly with mathematical models in systems biology.
|
49
|
-
- **Talk2Cells** *(
|
50
|
-
- **Talk2KnowledgeGraphs** *(
|
55
|
+
- **Talk2Cells** *(Work in progress)*: Query and analyze sequencing data with ease.
|
56
|
+
- **Talk2KnowledgeGraphs** *(Work in progress)*: Access and explore complex biological knowledge graphs for insightful data connections.
|
51
57
|
|
52
58
|
---
|
53
59
|
|
@@ -61,7 +67,7 @@ Our toolkit currently consists of three intelligent agents, each designed to sim
|
|
61
67
|
- Adjust parameters within the model to simulate different conditions.
|
62
68
|
- Query simulation results.
|
63
69
|
|
64
|
-
### 2. Talk2Cells *(
|
70
|
+
### 2. Talk2Cells *(Work in Progress)*
|
65
71
|
|
66
72
|
**Talk2Cells** is being developed to provide direct access to and analysis of sequencing data, such as RNA-Seq or DNA-Seq, using natural language.
|
67
73
|
|
@@ -19,9 +19,21 @@ aiagents4pharma/talk2biomodels/tools/fetch_parameters.py
|
|
19
19
|
aiagents4pharma/talk2biomodels/tools/model_description.py
|
20
20
|
aiagents4pharma/talk2biomodels/tools/search_models.py
|
21
21
|
aiagents4pharma/talk2biomodels/tools/simulate_model.py
|
22
|
+
aiagents4pharma/talk2cells/__init__.py
|
23
|
+
aiagents4pharma/talk2cells/agents/__init__.py
|
24
|
+
aiagents4pharma/talk2cells/agents/scp_agent.py
|
25
|
+
aiagents4pharma/talk2cells/states/__init__.py
|
26
|
+
aiagents4pharma/talk2cells/states/state_talk2cells.py
|
27
|
+
aiagents4pharma/talk2cells/tools/__init__.py
|
22
28
|
aiagents4pharma/talk2knowledgegraphs/__init__.py
|
23
29
|
aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py
|
24
30
|
aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py
|
25
31
|
aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py
|
26
32
|
aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py
|
27
|
-
aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py
|
33
|
+
aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py
|
34
|
+
aiagents4pharma/talk2knowledgegraphs/utils/__init__.py
|
35
|
+
aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py
|
36
|
+
aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py
|
37
|
+
aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py
|
38
|
+
aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py
|
39
|
+
aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py
|
@@ -1,5 +1,6 @@
|
|
1
1
|
copasi_basico==0.78
|
2
2
|
coverage==7.6.4
|
3
|
+
einops==0.8.0
|
3
4
|
gdown==5.2.0
|
4
5
|
huggingface_hub==0.26.5
|
5
6
|
joblib==1.4.2
|
@@ -8,6 +9,7 @@ langchain-community==0.3.5
|
|
8
9
|
langchain-core==0.3.15
|
9
10
|
langchain-experimental==0.3.3
|
10
11
|
langchain-openai==0.2.5
|
12
|
+
langgraph==0.2.62
|
11
13
|
matplotlib==3.9.2
|
12
14
|
openai==1.59.4
|
13
15
|
pandas==2.2.3
|
@@ -15,10 +17,14 @@ plotly==5.24.1
|
|
15
17
|
pydantic==2.9.2
|
16
18
|
pylint==3.3.1
|
17
19
|
pytest==8.3.3
|
20
|
+
pytest-asyncio==0.25.2
|
18
21
|
streamlit==1.39.0
|
22
|
+
sentence_transformers==3.3.1
|
19
23
|
tabulate==0.9.0
|
20
|
-
torch==2.
|
24
|
+
torch==2.2.2
|
25
|
+
torch_geometric==2.6.1
|
21
26
|
tqdm==4.66.6
|
27
|
+
transformers==4.48.0
|
22
28
|
mkdocs==1.6.1
|
23
29
|
mkdocs-jupyter==0.25.1
|
24
30
|
mkdocs-material==9.5.47
|
@@ -15,6 +15,7 @@ classifiers = [
|
|
15
15
|
dependencies = [
|
16
16
|
"copasi_basico==0.78",
|
17
17
|
"coverage==7.6.4",
|
18
|
+
"einops==0.8.0",
|
18
19
|
"gdown==5.2.0",
|
19
20
|
"huggingface_hub==0.26.5",
|
20
21
|
"joblib==1.4.2",
|
@@ -23,6 +24,7 @@ dependencies = [
|
|
23
24
|
"langchain-core==0.3.15",
|
24
25
|
"langchain-experimental==0.3.3",
|
25
26
|
"langchain-openai==0.2.5",
|
27
|
+
"langgraph==0.2.62",
|
26
28
|
"matplotlib==3.9.2",
|
27
29
|
"openai==1.59.4",
|
28
30
|
"pandas==2.2.3",
|
@@ -30,10 +32,14 @@ dependencies = [
|
|
30
32
|
"pydantic==2.9.2",
|
31
33
|
"pylint==3.3.1",
|
32
34
|
"pytest==8.3.3",
|
35
|
+
"pytest-asyncio==0.25.2",
|
33
36
|
"streamlit==1.39.0",
|
37
|
+
"sentence_transformers==3.3.1",
|
34
38
|
"tabulate==0.9.0",
|
35
|
-
"torch==2.
|
39
|
+
"torch==2.2.2",
|
40
|
+
"torch_geometric==2.6.1",
|
36
41
|
"tqdm==4.66.6",
|
42
|
+
"transformers==4.48.0",
|
37
43
|
"mkdocs==1.6.1",
|
38
44
|
"mkdocs-jupyter==0.25.1",
|
39
45
|
"mkdocs-material==9.5.47",
|
@@ -53,8 +59,14 @@ packages = ["aiagents4pharma",
|
|
53
59
|
"aiagents4pharma.talk2biomodels",
|
54
60
|
"aiagents4pharma.talk2biomodels.models",
|
55
61
|
"aiagents4pharma.talk2biomodels.tools",
|
62
|
+
"aiagents4pharma.talk2cells",
|
63
|
+
"aiagents4pharma.talk2cells.agents",
|
64
|
+
"aiagents4pharma.talk2cells.states",
|
65
|
+
"aiagents4pharma.talk2cells.tools",
|
56
66
|
"aiagents4pharma.talk2knowledgegraphs",
|
57
|
-
"aiagents4pharma.talk2knowledgegraphs.datasets"
|
67
|
+
"aiagents4pharma.talk2knowledgegraphs.datasets",
|
68
|
+
"aiagents4pharma.talk2knowledgegraphs.utils",
|
69
|
+
"aiagents4pharma.talk2knowledgegraphs.utils.embeddings"]
|
58
70
|
|
59
71
|
# [tool.setuptools.packages.find]
|
60
72
|
# where = ["aiagents4pharma", "aiagents4pharma.talk2biomodels"]
|
@@ -0,0 +1 @@
|
|
1
|
+
v1.6.1
|
@@ -1 +0,0 @@
|
|
1
|
-
v1.5.4
|
File without changes
|
File without changes
|
File without changes
|
{aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma/talk2biomodels/models/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma/talk2biomodels/tools/__init__.py
RENAMED
File without changes
|
{aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma/talk2biomodels/tools/ask_question.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma/talk2knowledgegraphs/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{aiagents4pharma-1.5.4 → aiagents4pharma-1.6.1}/aiagents4pharma.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|