aiagents4pharma 1.39.5__py3-none-any.whl → 1.40.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +26 -13
- aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +83 -3
- aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +4 -1
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +36 -5
- aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +509 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +85 -23
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +413 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +175 -0
- aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +509 -0
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +393 -0
- {aiagents4pharma-1.39.5.dist-info → aiagents4pharma-1.40.1.dist-info}/METADATA +13 -14
- {aiagents4pharma-1.39.5.dist-info → aiagents4pharma-1.40.1.dist-info}/RECORD +17 -12
- {aiagents4pharma-1.39.5.dist-info → aiagents4pharma-1.40.1.dist-info}/WHEEL +0 -0
- {aiagents4pharma-1.39.5.dist-info → aiagents4pharma-1.40.1.dist-info}/licenses/LICENSE +0 -0
- {aiagents4pharma-1.39.5.dist-info → aiagents4pharma-1.40.1.dist-info}/top_level.txt +0 -0
@@ -1,16 +1,29 @@
|
|
1
1
|
_target_: agents.main_agent.get_app
|
2
2
|
system_prompt: >
|
3
|
-
You are Talk2AIAgents4Pharma agent.
|
4
|
-
|
5
|
-
|
6
|
-
1. Talk2Biomodels (T2B) agent: This agent can search and
|
7
|
-
operate on mathematical models of biological systems. This
|
8
|
-
agent can also query an uploaded document/pdf/article.
|
9
|
-
|
10
|
-
2. Talk2KnowledgeGraphs (T2KG) agent: This agent can
|
11
|
-
reason over a knowledge graph of biological entities
|
12
|
-
and their relationships to answer subjective questions.
|
13
|
-
Do not call this agent for objective questions or questions
|
14
|
-
related to the mathematical models of biological systems.
|
3
|
+
You are the Talk2AIAgents4Pharma main agent.
|
4
|
+
Your primary responsibility is to analyze the user's query and intelligently route it to the most appropriate specialized agent based on the user's intent.
|
15
5
|
|
16
|
-
|
6
|
+
You manage the following two expert agents:
|
7
|
+
|
8
|
+
1. Talk2Biomodels (T2B) Agent:
|
9
|
+
- This agent specializes in working with **mathematical models of biological systems**.
|
10
|
+
- It can perform tasks such as querying and simulating models, conducting parameter scans, analyzing time series,
|
11
|
+
and accessing annotated model components like species and reactions.
|
12
|
+
- It is also capable of processing and answering questions based on **uploaded documents, PDFs, or articles**.
|
13
|
+
- You should route queries to T2B if they contain or imply topics such as:
|
14
|
+
**"biological models", "mathematical models", "parameter scanning", "simulations", "time series", "species", "reactions", "annotations", "SBML", "model dynamics", "ODEs", "system biology"**, etc.
|
15
|
+
- For annotation-related queries, this agent should consider the annotated species and provide insights based on the retrieved context.
|
16
|
+
|
17
|
+
2. Talk2KnowledgeGraphs (T2KG) Agent:
|
18
|
+
- This agent specializes in **reasoning over biomedical knowledge graphs**.
|
19
|
+
- It is capable of answering **subjective, inferential, or relational questions** that involve **nodes, edges, relationships, and graph structures**.
|
20
|
+
- You should route queries to T2KG if they involve or imply topics such as:
|
21
|
+
**"knowledge graph", "biomedical graph", "subgraph", "graph structure", "node relationships", "graph reasoning", "graph connections", "pathways", "entity linkage", "graph queries", "subgraph extraction", "subgraph summarization"**, etc.
|
22
|
+
|
23
|
+
Important Guidelines:
|
24
|
+
- **Think carefully about the user's true intent**. Keywords are helpful indicators, but you must use reasoning to determine the best fit.
|
25
|
+
- If the question involves **mathematical modeling**, **simulation**, or **objective factual analysis**, select the **Talk2Biomodels (T2B)** agent.
|
26
|
+
- If the question involves **graph-based reasoning**, **subjective inference**, or **relationship analysis between entities**, select the **Talk2KnowledgeGraphs (T2KG)** agent.
|
27
|
+
- **Never** route a question about **biological models** or **objective computational simulations** to T2KG.
|
28
|
+
|
29
|
+
The following are the respective agent prompts:
|
@@ -1,10 +1,11 @@
|
|
1
1
|
'''
|
2
2
|
Test Talk2AIAgents4Pharma supervisor agent.
|
3
3
|
'''
|
4
|
-
|
4
|
+
from unittest.mock import patch, MagicMock
|
5
5
|
import pytest
|
6
6
|
from langchain_core.messages import HumanMessage
|
7
7
|
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
8
|
+
import pandas as pd
|
8
9
|
from ..agents.main_agent import get_app
|
9
10
|
|
10
11
|
# Define the data path for the test files of Talk2KnowledgeGraphs agent
|
@@ -41,6 +42,47 @@ def input_dict_fixture():
|
|
41
42
|
|
42
43
|
return input_dict
|
43
44
|
|
45
|
+
def mock_milvus_collection(name):
|
46
|
+
"""
|
47
|
+
Mock Milvus collection for testing.
|
48
|
+
"""
|
49
|
+
nodes = MagicMock()
|
50
|
+
nodes.query.return_value = [
|
51
|
+
{"node_index": 0,
|
52
|
+
"node_id": "id1",
|
53
|
+
"node_name": "Adalimumab",
|
54
|
+
"node_type": "drug",
|
55
|
+
"feat": "featA", "feat_emb": [0.1, 0.2, 0.3],
|
56
|
+
"desc": "descA", "desc_emb": [0.1, 0.2, 0.3]},
|
57
|
+
{"node_index": 1,
|
58
|
+
"node_id": "id2",
|
59
|
+
"node_name": "TNF",
|
60
|
+
"node_type": "gene/protein",
|
61
|
+
"feat": "featB", "feat_emb": [0.4, 0.5, 0.6],
|
62
|
+
"desc": "descB", "desc_emb": [0.4, 0.5, 0.6]}
|
63
|
+
]
|
64
|
+
nodes.load.return_value = None
|
65
|
+
|
66
|
+
edges = MagicMock()
|
67
|
+
edges.query.return_value = [
|
68
|
+
{"triplet_index": 0,
|
69
|
+
"head_id": "id1",
|
70
|
+
"head_index": 0,
|
71
|
+
"tail_id": "id2",
|
72
|
+
"tail_index": 1,
|
73
|
+
"edge_type": "drug,acts_on,gene/protein",
|
74
|
+
"display_relation": "acts_on",
|
75
|
+
"feat": "featC",
|
76
|
+
"feat_emb": [0.7, 0.8, 0.9]}
|
77
|
+
]
|
78
|
+
edges.load.return_value = None
|
79
|
+
|
80
|
+
if "nodes" in name:
|
81
|
+
return nodes
|
82
|
+
if "edges" in name:
|
83
|
+
return edges
|
84
|
+
return None
|
85
|
+
|
44
86
|
def test_main_agent_invokes_t2kg(input_dict):
|
45
87
|
"""
|
46
88
|
In the following test, we will ask the main agent (supervisor)
|
@@ -67,8 +109,42 @@ def test_main_agent_invokes_t2kg(input_dict):
|
|
67
109
|
)
|
68
110
|
prompt = "List drugs that target the gene Interleukin-6"
|
69
111
|
|
70
|
-
|
71
|
-
|
112
|
+
with patch("aiagents4pharma.talk2knowledgegraphs.tools."
|
113
|
+
"milvus_multimodal_subgraph_extraction.Collection",
|
114
|
+
side_effect=mock_milvus_collection), \
|
115
|
+
patch("aiagents4pharma.talk2knowledgegraphs.tools."
|
116
|
+
"milvus_multimodal_subgraph_extraction.MultimodalPCSTPruning") as mock_pcst, \
|
117
|
+
patch("pymilvus.connections") as mock_connections, \
|
118
|
+
patch("aiagents4pharma.talk2knowledgegraphs.tools."
|
119
|
+
"milvus_multimodal_subgraph_extraction.hydra.initialize"), \
|
120
|
+
patch("aiagents4pharma.talk2knowledgegraphs.tools."
|
121
|
+
"milvus_multimodal_subgraph_extraction.hydra.compose") as mock_compose:
|
122
|
+
mock_connections.has_connection.return_value = True
|
123
|
+
mock_pcst_instance = MagicMock()
|
124
|
+
mock_pcst_instance.extract_subgraph.return_value = {
|
125
|
+
"nodes": pd.Series([0, 1]),
|
126
|
+
"edges": pd.Series([0])
|
127
|
+
}
|
128
|
+
mock_pcst.return_value = mock_pcst_instance
|
129
|
+
mock_cfg = MagicMock()
|
130
|
+
mock_cfg.cost_e = 1.0
|
131
|
+
mock_cfg.c_const = 1.0
|
132
|
+
mock_cfg.root = 0
|
133
|
+
mock_cfg.num_clusters = 1
|
134
|
+
mock_cfg.pruning = True
|
135
|
+
mock_cfg.verbosity_level = 0
|
136
|
+
mock_cfg.search_metric_type = "L2"
|
137
|
+
mock_cfg.node_colors_dict = {"drug": "blue", "gene/protein": "red"}
|
138
|
+
mock_compose.return_value = MagicMock()
|
139
|
+
mock_compose.return_value.tools.multimodal_subgraph_extraction = mock_cfg
|
140
|
+
mock_compose.return_value.tools.subgraph_summarization.\
|
141
|
+
prompt_subgraph_summarization = (
|
142
|
+
"Summarize the following subgraph: {textualized_subgraph}"
|
143
|
+
)
|
144
|
+
|
145
|
+
# Invoke the agent
|
146
|
+
response = app.invoke({"messages": [HumanMessage(content=prompt)]},
|
147
|
+
config=config)
|
72
148
|
|
73
149
|
# Check assistant message
|
74
150
|
assistant_msg = response["messages"][-1].content
|
@@ -88,6 +164,10 @@ def test_main_agent_invokes_t2kg(input_dict):
|
|
88
164
|
# Check summarized subgraph
|
89
165
|
assert isinstance(dic_extracted_graph["graph_summary"], str)
|
90
166
|
|
167
|
+
# Another test for unknown collection
|
168
|
+
result = mock_milvus_collection("unknown")
|
169
|
+
assert result is None
|
170
|
+
|
91
171
|
def test_main_agent_invokes_t2b():
|
92
172
|
'''
|
93
173
|
In the following test, we will ask the main agent (supervisor)
|
@@ -9,7 +9,10 @@ from langchain_core.language_models.chat_models import BaseChatModel
|
|
9
9
|
from langgraph.checkpoint.memory import MemorySaver
|
10
10
|
from langgraph.graph import START, StateGraph
|
11
11
|
from langgraph.prebuilt import create_react_agent, ToolNode, InjectedState
|
12
|
-
from ..tools.multimodal_subgraph_extraction import MultimodalSubgraphExtractionTool
|
12
|
+
# from ..tools.multimodal_subgraph_extraction import MultimodalSubgraphExtractionTool
|
13
|
+
from ..tools.milvus_multimodal_subgraph_extraction import MultimodalSubgraphExtractionTool
|
14
|
+
# from ..tools.cu2_multimodal_subgraph_extraction import MultimodalSubgraphExtractionTool
|
15
|
+
# from ..tools.gsfs_multimodal_subgraph_extraction import MultimodalSubgraphExtractionTool
|
13
16
|
from ..tools.subgraph_summarization import SubgraphSummarizationTool
|
14
17
|
from ..tools.graphrag_reasoning import GraphRAGReasoningTool
|
15
18
|
from ..states.state_talk2knowledgegraphs import Talk2KnowledgeGraphs
|
@@ -6,7 +6,16 @@ multimodal_allowed_file_types:
|
|
6
6
|
- "xls"
|
7
7
|
- "xlsx"
|
8
8
|
upload_data_dir: "../files"
|
9
|
-
kg_name: "PrimeKG"
|
9
|
+
kg_name: "BioBridge-PrimeKG"
|
10
|
+
kg_node_types:
|
11
|
+
- "gene/protein"
|
12
|
+
- "molecular_function"
|
13
|
+
- "cellular_component"
|
14
|
+
- "biological_process"
|
15
|
+
- "drug"
|
16
|
+
- "disease"
|
17
|
+
# kg_nodes_path: "aiagents4pharma/talk2knowledgegraphs/tests/files/biobridge_nodes.parquet.gzip"
|
18
|
+
# kg_edges_path: "aiagents4pharma/talk2knowledgegraphs/tests/files/biobridge_edges.parquet.gzip"
|
10
19
|
kg_pyg_path: "aiagents4pharma/talk2knowledgegraphs/tests/files/biobridge_multimodal_pyg_graph.pkl"
|
11
20
|
kg_text_path: "aiagents4pharma/talk2knowledgegraphs/tests/files/biobridge_multimodal_text_graph.pkl"
|
12
21
|
openai_api_key: ${oc.env:OPENAI_API_KEY}
|
@@ -15,6 +24,7 @@ openai_llms:
|
|
15
24
|
- "gpt-4-turbo"
|
16
25
|
- "gpt-3.5-turbo"
|
17
26
|
openai_embeddings:
|
27
|
+
- "text-embedding-ada-002"
|
18
28
|
- "text-embedding-3-small"
|
19
29
|
ollama_llms:
|
20
30
|
- "llama3.2:1b"
|
@@ -22,11 +32,32 @@ ollama_llms:
|
|
22
32
|
- "llama3.1"
|
23
33
|
ollama_embeddings:
|
24
34
|
- "nomic-embed-text"
|
35
|
+
default_embedding_model: "openai"
|
25
36
|
temperature: 0.1
|
26
37
|
streaming: False
|
27
|
-
reasoning_subgraph_topk_nodes:
|
38
|
+
reasoning_subgraph_topk_nodes: 15
|
28
39
|
reasoning_subgraph_topk_nodes_min: 1
|
29
|
-
reasoning_subgraph_topk_nodes_max:
|
30
|
-
reasoning_subgraph_topk_edges:
|
40
|
+
reasoning_subgraph_topk_nodes_max: 50
|
41
|
+
reasoning_subgraph_topk_edges: 15
|
31
42
|
reasoning_subgraph_topk_edges_min: 1
|
32
|
-
reasoning_subgraph_topk_edges_max:
|
43
|
+
reasoning_subgraph_topk_edges_max: 50
|
44
|
+
# Configuration for Milvus
|
45
|
+
milvus_db:
|
46
|
+
alias: "default"
|
47
|
+
host: ${oc.env:MILVUS_HOST,localhost}
|
48
|
+
port: ${oc.env:MILVUS_PORT,19530}
|
49
|
+
uri: "http://${oc.env:MILVUS_HOST,localhost}:${oc.env:MILVUS_PORT,19530}"
|
50
|
+
token: "root:Milvus"
|
51
|
+
user: "root"
|
52
|
+
password: "Milvus"
|
53
|
+
database_name: "t2kg_primekg"
|
54
|
+
collection_edges: "t2kg_primekg_edges"
|
55
|
+
collection_nodes: "t2kg_primekg_nodes"
|
56
|
+
collection_nodes_gene_protein: "t2kg_primekg_nodes_gene_protein"
|
57
|
+
collection_nodes_molecular_function: "t2kg_primekg_nodes_molecular_function"
|
58
|
+
collection_nodes_cellular_component: "t2kg_primekg_nodes_cellular_component"
|
59
|
+
collection_nodes_biological_process: "t2kg_primekg_nodes_biological_process"
|
60
|
+
collection_nodes_drug: "t2kg_primekg_nodes_drug"
|
61
|
+
collection_nodes_disease: "t2kg_primekg_nodes_disease"
|
62
|
+
query_batch_size: 1000000
|
63
|
+
cache_edge_index_path: "aiagents4pharma/talk2knowledgegraphs/tests/files/t2kg_primekg_edge_index.pkl"
|