aiagents4pharma 1.45.1__py3-none-any.whl → 1.46.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/talk2aiagents4pharma/configs/app/__init__.py +0 -0
- aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/__init__.py +0 -0
- aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/default.yaml +102 -0
- aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +1 -0
- aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +144 -54
- aiagents4pharma/talk2biomodels/api/__init__.py +1 -1
- aiagents4pharma/talk2biomodels/configs/app/__init__.py +0 -0
- aiagents4pharma/talk2biomodels/configs/app/frontend/__init__.py +0 -0
- aiagents4pharma/talk2biomodels/configs/app/frontend/default.yaml +72 -0
- aiagents4pharma/talk2biomodels/configs/config.yaml +1 -0
- aiagents4pharma/talk2biomodels/tests/test_api.py +0 -30
- aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +1 -1
- aiagents4pharma/talk2biomodels/tools/get_annotation.py +1 -10
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +42 -26
- aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +1 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +4 -23
- aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/default.yaml +61 -0
- aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +1 -11
- aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +11 -10
- aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +193 -73
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +1375 -667
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_database_milvus_connection_manager.py +812 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +723 -539
- aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +474 -58
- aiagents4pharma/talk2knowledgegraphs/utils/database/__init__.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/utils/database/milvus_connection_manager.py +586 -0
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +240 -8
- aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml +67 -31
- {aiagents4pharma-1.45.1.dist-info → aiagents4pharma-1.46.1.dist-info}/METADATA +10 -1
- {aiagents4pharma-1.45.1.dist-info → aiagents4pharma-1.46.1.dist-info}/RECORD +33 -23
- aiagents4pharma/talk2biomodels/api/kegg.py +0 -87
- {aiagents4pharma-1.45.1.dist-info → aiagents4pharma-1.46.1.dist-info}/WHEEL +0 -0
- {aiagents4pharma-1.45.1.dist-info → aiagents4pharma-1.46.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,61 @@
|
|
1
|
+
# Milvus Database Configuration
|
2
|
+
# This config is used by backend tools and the MilvusConnectionManager
|
3
|
+
# Separated from frontend config for proper backend-frontend separation
|
4
|
+
#
|
5
|
+
# Environment Variables (all optional with sensible defaults):
|
6
|
+
# MILVUS_HOST - Milvus server host (default: localhost)
|
7
|
+
# MILVUS_PORT - Milvus server port (default: 19530)
|
8
|
+
# MILVUS_USER - Milvus username (default: root)
|
9
|
+
# MILVUS_PASSWORD - Milvus password (default: Milvus)
|
10
|
+
# MILVUS_DATABASE - Database name (default: t2kg_primekg)
|
11
|
+
|
12
|
+
milvus_db:
|
13
|
+
# Connection settings
|
14
|
+
alias: "default"
|
15
|
+
host: ${oc.env:MILVUS_HOST,localhost}
|
16
|
+
port: ${oc.env:MILVUS_PORT,19530}
|
17
|
+
uri: "http://${oc.env:MILVUS_HOST,localhost}:${oc.env:MILVUS_PORT,19530}"
|
18
|
+
token: "${oc.env:MILVUS_USER,root}:${oc.env:MILVUS_PASSWORD,Milvus}"
|
19
|
+
user: ${oc.env:MILVUS_USER,root}
|
20
|
+
password: ${oc.env:MILVUS_PASSWORD,Milvus}
|
21
|
+
|
22
|
+
# Database and collection names
|
23
|
+
database_name: ${oc.env:MILVUS_DATABASE,t2kg_primekg}
|
24
|
+
collection_edges: "${oc.env:MILVUS_DATABASE,t2kg_primekg}_edges"
|
25
|
+
collection_nodes: "${oc.env:MILVUS_DATABASE,t2kg_primekg}_nodes"
|
26
|
+
collection_nodes_gene_protein: "${oc.env:MILVUS_DATABASE,t2kg_primekg}_nodes_gene_protein"
|
27
|
+
collection_nodes_molecular_function: "${oc.env:MILVUS_DATABASE,t2kg_primekg}_nodes_molecular_function"
|
28
|
+
collection_nodes_cellular_component: "${oc.env:MILVUS_DATABASE,t2kg_primekg}_nodes_cellular_component"
|
29
|
+
collection_nodes_biological_process: "${oc.env:MILVUS_DATABASE,t2kg_primekg}_nodes_biological_process"
|
30
|
+
collection_nodes_drug: "${oc.env:MILVUS_DATABASE,t2kg_primekg}_nodes_drug"
|
31
|
+
collection_nodes_disease: "${oc.env:MILVUS_DATABASE,t2kg_primekg}_nodes_disease"
|
32
|
+
|
33
|
+
# Query performance settings
|
34
|
+
query_batch_size: 10000
|
35
|
+
|
36
|
+
# Node and edge column mappings
|
37
|
+
node_id_column: "node_id"
|
38
|
+
node_attr_column: "node_attr"
|
39
|
+
edge_src_column: "edge_src"
|
40
|
+
edge_attr_column: "edge_attr"
|
41
|
+
edge_dst_column: "edge_dst"
|
42
|
+
|
43
|
+
# Node colors for visualization (moved from frontend)
|
44
|
+
node_colors_dict:
|
45
|
+
"gene/protein": "#6a79f7"
|
46
|
+
"molecular_function": "#82cafc"
|
47
|
+
"cellular_component": "#3f9b0b"
|
48
|
+
"biological_process": "#c5c9c7"
|
49
|
+
"drug": "#c4a661"
|
50
|
+
"disease": "#80013f"
|
51
|
+
|
52
|
+
# BioBridge data source configuration
|
53
|
+
biobridge:
|
54
|
+
source: "/mnt/blockstorage/biobridge_multimodal/"
|
55
|
+
node_type:
|
56
|
+
- "gene/protein"
|
57
|
+
- "molecular_function"
|
58
|
+
- "cellular_component"
|
59
|
+
- "biological_process"
|
60
|
+
- "drug"
|
61
|
+
- "disease"
|
@@ -173,18 +173,8 @@ log "Data loading phase completed. Starting main application..."
|
|
173
173
|
# Ensure Python path includes the app directory
|
174
174
|
export PYTHONPATH="/app:${PYTHONPATH}"
|
175
175
|
|
176
|
-
# Create cache directory and set path for container
|
177
|
-
cache_dir="/app/aiagents4pharma/talk2knowledgegraphs/tests/files"
|
178
|
-
if [ ! -d "$cache_dir" ]; then
|
179
|
-
log "Creating cache directory: $cache_dir"
|
180
|
-
mkdir -p "$cache_dir"
|
181
|
-
fi
|
182
|
-
|
183
|
-
# Set container-specific cache path
|
184
|
-
export CACHE_EDGE_INDEX_PATH="/app/aiagents4pharma/talk2knowledgegraphs/tests/files/t2kg_primekg_edge_index.pkl"
|
185
|
-
|
186
176
|
log "Starting main application..."
|
187
177
|
log "Python path: $PYTHONPATH"
|
188
|
-
log "
|
178
|
+
log "Note: Edge index is now loaded on-demand from Milvus (no cache file needed)"
|
189
179
|
log "Executing command: $@"
|
190
180
|
exec "$@"
|
@@ -58,21 +58,22 @@ class SystemDetector:
|
|
58
58
|
if self.use_gpu and self.os_type == "linux":
|
59
59
|
# Exact package list from original script for GPU mode
|
60
60
|
packages = [
|
61
|
-
|
62
|
-
|
63
|
-
"pip install pymilvus
|
64
|
-
"pip install numpy
|
65
|
-
"pip install pandas
|
66
|
-
"pip install tqdm
|
61
|
+
"pip install --extra-index-url=https://pypi.nvidia.com cudf-cu12",
|
62
|
+
"pip install --extra-index-url=https://pypi.nvidia.com dask-cudf-cu12",
|
63
|
+
"pip install pymilvus",
|
64
|
+
"pip install numpy",
|
65
|
+
"pip install pandas",
|
66
|
+
"pip install tqdm",
|
67
67
|
]
|
68
68
|
return packages
|
69
69
|
else:
|
70
70
|
# CPU-only packages
|
71
71
|
packages = [
|
72
|
-
"pip install pymilvus
|
73
|
-
"pip install numpy
|
74
|
-
"pip install pandas
|
75
|
-
"pip install tqdm
|
72
|
+
"pip install pymilvus",
|
73
|
+
"pip install numpy",
|
74
|
+
"pip install pandas",
|
75
|
+
"pip install tqdm",
|
76
|
+
"pip install pyarrow",
|
76
77
|
]
|
77
78
|
return packages
|
78
79
|
|
@@ -2,14 +2,19 @@
|
|
2
2
|
Test cases for agents/t2kg_agent.py
|
3
3
|
"""
|
4
4
|
|
5
|
+
from contextlib import ExitStack
|
5
6
|
from unittest.mock import MagicMock, patch
|
6
7
|
|
7
8
|
import pandas as pd
|
8
9
|
import pytest
|
9
|
-
from langchain_core.messages import HumanMessage
|
10
|
+
from langchain_core.messages import HumanMessage, ToolMessage
|
10
11
|
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
12
|
+
from langgraph.types import Command
|
11
13
|
|
12
14
|
from ..agents.t2kg_agent import get_app
|
15
|
+
from ..tools.milvus_multimodal_subgraph_extraction import (
|
16
|
+
MultimodalSubgraphExtractionTool,
|
17
|
+
)
|
13
18
|
|
14
19
|
DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
|
15
20
|
|
@@ -57,6 +62,8 @@ def mock_milvus_collection(name):
|
|
57
62
|
"""
|
58
63
|
Mock Milvus collection for testing.
|
59
64
|
"""
|
65
|
+
# name is intentionally unused in this simplified mock
|
66
|
+
del name
|
60
67
|
nodes = MagicMock()
|
61
68
|
nodes.query.return_value = [
|
62
69
|
{
|
@@ -98,88 +105,202 @@ def mock_milvus_collection(name):
|
|
98
105
|
]
|
99
106
|
edges.load.return_value = None
|
100
107
|
|
101
|
-
|
102
|
-
return nodes
|
103
|
-
if "edges" in name:
|
104
|
-
return edges
|
105
|
-
return None
|
108
|
+
# Default path in tests expects None for unknown collections (implicit)
|
106
109
|
|
107
110
|
|
108
|
-
def
|
109
|
-
"""
|
110
|
-
Test the T2KG agent using OpenAI model and Milvus mock.
|
111
|
-
|
112
|
-
Args:
|
113
|
-
input_dict: Input dictionary
|
114
|
-
"""
|
115
|
-
input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
|
116
|
-
input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
|
117
|
-
unique_id = 12345
|
111
|
+
def _invoke_app_with_mocks(unique_id, input_dict):
|
112
|
+
"""Run the app with patched Milvus + tool stack and return (app, config, response)."""
|
118
113
|
app = get_app(unique_id, llm_model=input_dict["llm_model"])
|
119
114
|
config = {"configurable": {"thread_id": unique_id}}
|
120
115
|
app.update_state(config, input_dict)
|
121
|
-
prompt =
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
116
|
+
prompt = (
|
117
|
+
"Adalimumab is a fully human monoclonal antibody (IgG1) that "
|
118
|
+
"specifically binds to tumor necrosis factor-alpha (TNF-α), a "
|
119
|
+
"pro-inflammatory cytokine.\n\n"
|
120
|
+
"I would like to get evidence from the knowledge graph about the "
|
121
|
+
"mechanism of actions related to Adalimumab in treating inflammatory "
|
122
|
+
"bowel disease (IBD). Please follow these steps:\n"
|
123
|
+
"- Extract a subgraph from the PrimeKG that contains information about "
|
124
|
+
"Adalimumab.\n- Summarize the extracted subgraph.\n"
|
125
|
+
"- Reason about the mechanism of action of Adalimumab in treating IBD.\n\n"
|
126
|
+
"Please set the extraction name for the extraction process as `subkg_"
|
127
|
+
f"{unique_id}`."
|
128
|
+
)
|
129
|
+
|
130
|
+
mocks = {
|
131
|
+
"pcst": MagicMock(),
|
132
|
+
"connections": MagicMock(),
|
133
|
+
"compose": MagicMock(),
|
134
|
+
"connections_manager": MagicMock(),
|
135
|
+
"db": MagicMock(),
|
136
|
+
"conn_mgr": MagicMock(),
|
137
|
+
}
|
138
|
+
|
139
|
+
with ExitStack() as stack:
|
140
|
+
stack.enter_context(
|
141
|
+
patch(
|
142
|
+
"aiagents4pharma.talk2knowledgegraphs.tools."
|
143
|
+
"milvus_multimodal_subgraph_extraction.Collection",
|
144
|
+
side_effect=mock_milvus_collection,
|
145
|
+
)
|
146
|
+
)
|
147
|
+
stack.enter_context(
|
148
|
+
patch(
|
149
|
+
"aiagents4pharma.talk2knowledgegraphs.tools."
|
150
|
+
"milvus_multimodal_subgraph_extraction.MultimodalPCSTPruning",
|
151
|
+
mocks["pcst"],
|
152
|
+
)
|
153
|
+
)
|
154
|
+
stack.enter_context(patch("pymilvus.connections", mocks["connections"]))
|
155
|
+
stack.enter_context(
|
156
|
+
patch(
|
157
|
+
"aiagents4pharma.talk2knowledgegraphs.tools."
|
158
|
+
"milvus_multimodal_subgraph_extraction.hydra.initialize"
|
159
|
+
)
|
160
|
+
)
|
161
|
+
stack.enter_context(
|
162
|
+
patch(
|
163
|
+
"aiagents4pharma.talk2knowledgegraphs.tools."
|
164
|
+
"milvus_multimodal_subgraph_extraction.hydra.compose",
|
165
|
+
mocks["compose"],
|
166
|
+
)
|
167
|
+
)
|
168
|
+
stack.enter_context(
|
169
|
+
patch(
|
170
|
+
"aiagents4pharma.talk2knowledgegraphs.utils.database."
|
171
|
+
"milvus_connection_manager.connections",
|
172
|
+
mocks["connections_manager"],
|
173
|
+
)
|
174
|
+
)
|
175
|
+
stack.enter_context(
|
176
|
+
patch(
|
177
|
+
"aiagents4pharma.talk2knowledgegraphs.utils.database."
|
178
|
+
"milvus_connection_manager.Collection",
|
179
|
+
side_effect=mock_milvus_collection,
|
180
|
+
)
|
181
|
+
)
|
182
|
+
stack.enter_context(
|
183
|
+
patch(
|
184
|
+
"aiagents4pharma.talk2knowledgegraphs.utils.database.milvus_connection_manager.db",
|
185
|
+
mocks["db"],
|
186
|
+
)
|
187
|
+
)
|
188
|
+
stack.enter_context(
|
189
|
+
patch(
|
190
|
+
"aiagents4pharma.talk2knowledgegraphs.tools."
|
191
|
+
"milvus_multimodal_subgraph_extraction.MilvusConnectionManager",
|
192
|
+
mocks["conn_mgr"],
|
193
|
+
)
|
194
|
+
)
|
134
195
|
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
196
|
+
def mock_tool_execution(tool_call_id, state, prompt, arg_data=None):
|
197
|
+
del prompt, arg_data
|
198
|
+
mock_extracted_graph = {
|
199
|
+
"name": f"subkg_{unique_id}",
|
200
|
+
"tool_call_id": tool_call_id,
|
201
|
+
"graph_source": "BioBridge",
|
202
|
+
"topk_nodes": 3,
|
203
|
+
"topk_edges": 3,
|
204
|
+
"graph_dict": {
|
205
|
+
"name": "extracted_subgraph",
|
206
|
+
"nodes": ["Adalimumab", "TNF"],
|
207
|
+
"edges": [("Adalimumab", "acts_on", "TNF")],
|
208
|
+
},
|
209
|
+
"graph_text": (
|
210
|
+
"Adalimumab acts on TNF protein for treating inflammatory diseases."
|
211
|
+
),
|
212
|
+
"graph_summary": None,
|
213
|
+
}
|
214
|
+
tool_message = ToolMessage(
|
215
|
+
content=(
|
216
|
+
"Subgraph extraction completed successfully. "
|
217
|
+
"Extracted subgraph containing Adalimumab and TNF interactions."
|
218
|
+
),
|
219
|
+
tool_call_id=tool_call_id,
|
220
|
+
name="subgraph_extraction",
|
221
|
+
)
|
222
|
+
return Command(
|
223
|
+
update={
|
224
|
+
"messages": [tool_message],
|
225
|
+
"dic_extracted_graph": state.get("dic_extracted_graph", [])
|
226
|
+
+ [mock_extracted_graph],
|
227
|
+
}
|
228
|
+
)
|
229
|
+
|
230
|
+
stack.enter_context(
|
231
|
+
patch.object(MultimodalSubgraphExtractionTool, "_run", side_effect=mock_tool_execution)
|
232
|
+
)
|
233
|
+
|
234
|
+
# set return values via the mocks dict
|
235
|
+
mocks["connections"].has_connection.return_value = True
|
236
|
+
mocks["connections_manager"].has_connection.return_value = True
|
237
|
+
mocks["db"].using_database.return_value = None
|
238
|
+
|
239
|
+
pcst_instance = MagicMock()
|
240
|
+
pcst_instance.extract_subgraph.return_value = {
|
158
241
|
"nodes": pd.Series([0, 1]),
|
159
242
|
"edges": pd.Series([0]),
|
160
243
|
}
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
244
|
+
mocks["pcst"].return_value = pcst_instance
|
245
|
+
|
246
|
+
cfg = MagicMock()
|
247
|
+
for k, v in {
|
248
|
+
"cost_e": 1.0,
|
249
|
+
"c_const": 1.0,
|
250
|
+
"root": 0,
|
251
|
+
"num_clusters": 1,
|
252
|
+
"pruning": True,
|
253
|
+
"verbosity_level": 0,
|
254
|
+
"search_metric_type": "L2",
|
255
|
+
}.items():
|
256
|
+
setattr(cfg, k, v)
|
257
|
+
cfg.node_colors_dict = {"drug": "blue", "gene/protein": "red"}
|
258
|
+
|
259
|
+
mocks["compose"].return_value = MagicMock()
|
260
|
+
mocks["compose"].return_value.tools.multimodal_subgraph_extraction = cfg
|
261
|
+
mocks[
|
262
|
+
"compose"
|
263
|
+
].return_value.tools.subgraph_summarization.prompt_subgraph_summarization = (
|
174
264
|
"Summarize the following subgraph: {textualized_subgraph}"
|
175
265
|
)
|
176
266
|
|
267
|
+
db_cfg = MagicMock()
|
268
|
+
for k, v in {
|
269
|
+
"alias": "test_alias",
|
270
|
+
"host": "localhost",
|
271
|
+
"port": "19530",
|
272
|
+
"user": "root",
|
273
|
+
"password": "password",
|
274
|
+
"database_name": "test_db",
|
275
|
+
}.items():
|
276
|
+
setattr(db_cfg.milvus_db, k, v)
|
277
|
+
mocks["compose"].return_value.utils.database.milvus = db_cfg.milvus_db
|
278
|
+
|
279
|
+
conn = MagicMock()
|
280
|
+
conn.ensure_connection.return_value = True
|
281
|
+
conn.get_connection_info.return_value = {"database": "test_db", "connected": True}
|
282
|
+
conn.test_connection.return_value = True
|
283
|
+
mocks["conn_mgr"].return_value = conn
|
284
|
+
|
177
285
|
response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
|
178
286
|
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
287
|
+
return app, config, response
|
288
|
+
|
289
|
+
|
290
|
+
def test_t2kg_agent_openai_milvus_mock(input_dict):
|
291
|
+
"""
|
292
|
+
Test the T2KG agent using OpenAI model and Milvus mock.
|
293
|
+
|
294
|
+
Args:
|
295
|
+
input_dict: Input dictionary
|
296
|
+
"""
|
297
|
+
input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
|
298
|
+
input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
|
299
|
+
unique_id = 12345
|
300
|
+
app, config, response = _invoke_app_with_mocks(unique_id, input_dict)
|
301
|
+
|
302
|
+
assert isinstance(response["messages"][-1].content, str)
|
303
|
+
dic_extracted_graph = app.get_state(config).values["dic_extracted_graph"][0]
|
183
304
|
assert isinstance(dic_extracted_graph, dict)
|
184
305
|
assert dic_extracted_graph["name"] == "subkg_12345"
|
185
306
|
assert dic_extracted_graph["graph_source"] == "BioBridge"
|
@@ -190,9 +311,8 @@ def test_t2kg_agent_openai_milvus_mock(input_dict):
|
|
190
311
|
assert len(dic_extracted_graph["graph_dict"]["edges"]) > 0
|
191
312
|
assert isinstance(dic_extracted_graph["graph_text"], str)
|
192
313
|
assert isinstance(dic_extracted_graph["graph_summary"], str)
|
193
|
-
assert "Adalimumab" in
|
194
|
-
assert "TNF" in
|
314
|
+
assert "Adalimumab" in response["messages"][-1].content
|
315
|
+
assert "TNF" in response["messages"][-1].content
|
195
316
|
|
196
317
|
# Another test for unknown collection
|
197
|
-
|
198
|
-
assert result is None
|
318
|
+
assert mock_milvus_collection("unknown") is None
|