aiagents4pharma 1.39.4__py3-none-any.whl → 1.40.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +26 -13
  2. aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +83 -3
  3. aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +4 -1
  4. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +36 -5
  5. aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +509 -0
  6. aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +85 -23
  7. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +413 -0
  8. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +10 -10
  9. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +175 -0
  10. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +11 -0
  11. aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +1 -0
  12. aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +509 -0
  13. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +15 -7
  14. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +31 -9
  15. aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +1 -0
  16. aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +393 -0
  17. aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +33 -2
  18. {aiagents4pharma-1.39.4.dist-info → aiagents4pharma-1.40.0.dist-info}/METADATA +13 -14
  19. {aiagents4pharma-1.39.4.dist-info → aiagents4pharma-1.40.0.dist-info}/RECORD +22 -17
  20. {aiagents4pharma-1.39.4.dist-info → aiagents4pharma-1.40.0.dist-info}/WHEEL +0 -0
  21. {aiagents4pharma-1.39.4.dist-info → aiagents4pharma-1.40.0.dist-info}/licenses/LICENSE +0 -0
  22. {aiagents4pharma-1.39.4.dist-info → aiagents4pharma-1.40.0.dist-info}/top_level.txt +0 -0
@@ -1,16 +1,29 @@
1
1
  _target_: agents.main_agent.get_app
2
2
  system_prompt: >
3
- You are Talk2AIAgents4Pharma agent.
4
- You are managing a team of the following 2 agents:
5
-
6
- 1. Talk2Biomodels (T2B) agent: This agent can search and
7
- operate on mathematical models of biological systems. This
8
- agent can also query an uploaded document/pdf/article.
9
-
10
- 2. Talk2KnowledgeGraphs (T2KG) agent: This agent can
11
- reason over a knowledge graph of biological entities
12
- and their relationships to answer subjective questions.
13
- Do not call this agent for objective questions or questions
14
- related to the mathematical models of biological systems.
3
+ You are the Talk2AIAgents4Pharma main agent.
4
+ Your primary responsibility is to analyze the user's query and intelligently route it to the most appropriate specialized agent based on the user's intent.
15
5
 
16
- Below mentioned are the prompts for each agent:
6
+ You manage the following two expert agents:
7
+
8
+ 1. Talk2Biomodels (T2B) Agent:
9
+ - This agent specializes in working with **mathematical models of biological systems**.
10
+ - It can perform tasks such as querying and simulating models, conducting parameter scans, analyzing time series,
11
+ and accessing annotated model components like species and reactions.
12
+ - It is also capable of processing and answering questions based on **uploaded documents, PDFs, or articles**.
13
+ - You should route queries to T2B if they contain or imply topics such as:
14
+ **"biological models", "mathematical models", "parameter scanning", "simulations", "time series", "species", "reactions", "annotations", "SBML", "model dynamics", "ODEs", "system biology"**, etc.
15
+ - For annotation-related queries, this agent should consider the annotated species and provide insights based on the retrieved context.
16
+
17
+ 2. Talk2KnowledgeGraphs (T2KG) Agent:
18
+ - This agent specializes in **reasoning over biomedical knowledge graphs**.
19
+ - It is capable of answering **subjective, inferential, or relational questions** that involve **nodes, edges, relationships, and graph structures**.
20
+ - You should route queries to T2KG if they involve or imply topics such as:
21
+ **"knowledge graph", "biomedical graph", "subgraph", "graph structure", "node relationships", "graph reasoning", "graph connections", "pathways", "entity linkage", "graph queries", "subgraph extraction", "subgraph summarization"**, etc.
22
+
23
+ Important Guidelines:
24
+ - **Think carefully about the user's true intent**. Keywords are helpful indicators, but you must use reasoning to determine the best fit.
25
+ - If the question involves **mathematical modeling**, **simulation**, or **objective factual analysis**, select the **Talk2Biomodels (T2B)** agent.
26
+ - If the question involves **graph-based reasoning**, **subjective inference**, or **relationship analysis between entities**, select the **Talk2KnowledgeGraphs (T2KG)** agent.
27
+ - **Never** route a question about **biological models** or **objective computational simulations** to T2KG.
28
+
29
+ The following are the respective agent prompts:
@@ -1,10 +1,11 @@
1
1
  '''
2
2
  Test Talk2AIAgents4Pharma supervisor agent.
3
3
  '''
4
-
4
+ from unittest.mock import patch, MagicMock
5
5
  import pytest
6
6
  from langchain_core.messages import HumanMessage
7
7
  from langchain_openai import ChatOpenAI, OpenAIEmbeddings
8
+ import pandas as pd
8
9
  from ..agents.main_agent import get_app
9
10
 
10
11
  # Define the data path for the test files of Talk2KnowledgeGraphs agent
@@ -41,6 +42,47 @@ def input_dict_fixture():
41
42
 
42
43
  return input_dict
43
44
 
45
+ def mock_milvus_collection(name):
46
+ """
47
+ Mock Milvus collection for testing.
48
+ """
49
+ nodes = MagicMock()
50
+ nodes.query.return_value = [
51
+ {"node_index": 0,
52
+ "node_id": "id1",
53
+ "node_name": "Adalimumab",
54
+ "node_type": "drug",
55
+ "feat": "featA", "feat_emb": [0.1, 0.2, 0.3],
56
+ "desc": "descA", "desc_emb": [0.1, 0.2, 0.3]},
57
+ {"node_index": 1,
58
+ "node_id": "id2",
59
+ "node_name": "TNF",
60
+ "node_type": "gene/protein",
61
+ "feat": "featB", "feat_emb": [0.4, 0.5, 0.6],
62
+ "desc": "descB", "desc_emb": [0.4, 0.5, 0.6]}
63
+ ]
64
+ nodes.load.return_value = None
65
+
66
+ edges = MagicMock()
67
+ edges.query.return_value = [
68
+ {"triplet_index": 0,
69
+ "head_id": "id1",
70
+ "head_index": 0,
71
+ "tail_id": "id2",
72
+ "tail_index": 1,
73
+ "edge_type": "drug,acts_on,gene/protein",
74
+ "display_relation": "acts_on",
75
+ "feat": "featC",
76
+ "feat_emb": [0.7, 0.8, 0.9]}
77
+ ]
78
+ edges.load.return_value = None
79
+
80
+ if "nodes" in name:
81
+ return nodes
82
+ if "edges" in name:
83
+ return edges
84
+ return None
85
+
44
86
  def test_main_agent_invokes_t2kg(input_dict):
45
87
  """
46
88
  In the following test, we will ask the main agent (supervisor)
@@ -67,8 +109,42 @@ def test_main_agent_invokes_t2kg(input_dict):
67
109
  )
68
110
  prompt = "List drugs that target the gene Interleukin-6"
69
111
 
70
- # Invoke the agent
71
- response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
112
+ with patch("aiagents4pharma.talk2knowledgegraphs.tools."
113
+ "milvus_multimodal_subgraph_extraction.Collection",
114
+ side_effect=mock_milvus_collection), \
115
+ patch("aiagents4pharma.talk2knowledgegraphs.tools."
116
+ "milvus_multimodal_subgraph_extraction.MultimodalPCSTPruning") as mock_pcst, \
117
+ patch("pymilvus.connections") as mock_connections, \
118
+ patch("aiagents4pharma.talk2knowledgegraphs.tools."
119
+ "milvus_multimodal_subgraph_extraction.hydra.initialize"), \
120
+ patch("aiagents4pharma.talk2knowledgegraphs.tools."
121
+ "milvus_multimodal_subgraph_extraction.hydra.compose") as mock_compose:
122
+ mock_connections.has_connection.return_value = True
123
+ mock_pcst_instance = MagicMock()
124
+ mock_pcst_instance.extract_subgraph.return_value = {
125
+ "nodes": pd.Series([0, 1]),
126
+ "edges": pd.Series([0])
127
+ }
128
+ mock_pcst.return_value = mock_pcst_instance
129
+ mock_cfg = MagicMock()
130
+ mock_cfg.cost_e = 1.0
131
+ mock_cfg.c_const = 1.0
132
+ mock_cfg.root = 0
133
+ mock_cfg.num_clusters = 1
134
+ mock_cfg.pruning = True
135
+ mock_cfg.verbosity_level = 0
136
+ mock_cfg.search_metric_type = "L2"
137
+ mock_cfg.node_colors_dict = {"drug": "blue", "gene/protein": "red"}
138
+ mock_compose.return_value = MagicMock()
139
+ mock_compose.return_value.tools.multimodal_subgraph_extraction = mock_cfg
140
+ mock_compose.return_value.tools.subgraph_summarization.\
141
+ prompt_subgraph_summarization = (
142
+ "Summarize the following subgraph: {textualized_subgraph}"
143
+ )
144
+
145
+ # Invoke the agent
146
+ response = app.invoke({"messages": [HumanMessage(content=prompt)]},
147
+ config=config)
72
148
 
73
149
  # Check assistant message
74
150
  assistant_msg = response["messages"][-1].content
@@ -88,6 +164,10 @@ def test_main_agent_invokes_t2kg(input_dict):
88
164
  # Check summarized subgraph
89
165
  assert isinstance(dic_extracted_graph["graph_summary"], str)
90
166
 
167
+ # Another test for unknown collection
168
+ result = mock_milvus_collection("unknown")
169
+ assert result is None
170
+
91
171
  def test_main_agent_invokes_t2b():
92
172
  '''
93
173
  In the following test, we will ask the main agent (supervisor)
@@ -9,7 +9,10 @@ from langchain_core.language_models.chat_models import BaseChatModel
9
9
  from langgraph.checkpoint.memory import MemorySaver
10
10
  from langgraph.graph import START, StateGraph
11
11
  from langgraph.prebuilt import create_react_agent, ToolNode, InjectedState
12
- from ..tools.multimodal_subgraph_extraction import MultimodalSubgraphExtractionTool
12
+ # from ..tools.multimodal_subgraph_extraction import MultimodalSubgraphExtractionTool
13
+ from ..tools.milvus_multimodal_subgraph_extraction import MultimodalSubgraphExtractionTool
14
+ # from ..tools.cu2_multimodal_subgraph_extraction import MultimodalSubgraphExtractionTool
15
+ # from ..tools.gsfs_multimodal_subgraph_extraction import MultimodalSubgraphExtractionTool
13
16
  from ..tools.subgraph_summarization import SubgraphSummarizationTool
14
17
  from ..tools.graphrag_reasoning import GraphRAGReasoningTool
15
18
  from ..states.state_talk2knowledgegraphs import Talk2KnowledgeGraphs
@@ -6,7 +6,16 @@ multimodal_allowed_file_types:
6
6
  - "xls"
7
7
  - "xlsx"
8
8
  upload_data_dir: "../files"
9
- kg_name: "PrimeKG"
9
+ kg_name: "BioBridge-PrimeKG"
10
+ kg_node_types:
11
+ - "gene/protein"
12
+ - "molecular_function"
13
+ - "cellular_component"
14
+ - "biological_process"
15
+ - "drug"
16
+ - "disease"
17
+ # kg_nodes_path: "aiagents4pharma/talk2knowledgegraphs/tests/files/biobridge_nodes.parquet.gzip"
18
+ # kg_edges_path: "aiagents4pharma/talk2knowledgegraphs/tests/files/biobridge_edges.parquet.gzip"
10
19
  kg_pyg_path: "aiagents4pharma/talk2knowledgegraphs/tests/files/biobridge_multimodal_pyg_graph.pkl"
11
20
  kg_text_path: "aiagents4pharma/talk2knowledgegraphs/tests/files/biobridge_multimodal_text_graph.pkl"
12
21
  openai_api_key: ${oc.env:OPENAI_API_KEY}
@@ -15,6 +24,7 @@ openai_llms:
15
24
  - "gpt-4-turbo"
16
25
  - "gpt-3.5-turbo"
17
26
  openai_embeddings:
27
+ - "text-embedding-ada-002"
18
28
  - "text-embedding-3-small"
19
29
  ollama_llms:
20
30
  - "llama3.2:1b"
@@ -22,11 +32,32 @@ ollama_llms:
22
32
  - "llama3.1"
23
33
  ollama_embeddings:
24
34
  - "nomic-embed-text"
35
+ default_embedding_model: "openai"
25
36
  temperature: 0.1
26
37
  streaming: False
27
- reasoning_subgraph_topk_nodes: 5
38
+ reasoning_subgraph_topk_nodes: 15
28
39
  reasoning_subgraph_topk_nodes_min: 1
29
- reasoning_subgraph_topk_nodes_max: 100
30
- reasoning_subgraph_topk_edges: 5
40
+ reasoning_subgraph_topk_nodes_max: 50
41
+ reasoning_subgraph_topk_edges: 15
31
42
  reasoning_subgraph_topk_edges_min: 1
32
- reasoning_subgraph_topk_edges_max: 100
43
+ reasoning_subgraph_topk_edges_max: 50
44
+ # Configuration for Milvus
45
+ milvus_db:
46
+ alias: "default"
47
+ host: ${oc.env:MILVUS_HOST,localhost}
48
+ port: ${oc.env:MILVUS_PORT,19530}
49
+ uri: "http://${oc.env:MILVUS_HOST,localhost}:${oc.env:MILVUS_PORT,19530}"
50
+ token: "root:Milvus"
51
+ user: "root"
52
+ password: "Milvus"
53
+ database_name: "t2kg_primekg"
54
+ collection_edges: "t2kg_primekg_edges"
55
+ collection_nodes: "t2kg_primekg_nodes"
56
+ collection_nodes_gene_protein: "t2kg_primekg_nodes_gene_protein"
57
+ collection_nodes_molecular_function: "t2kg_primekg_nodes_molecular_function"
58
+ collection_nodes_cellular_component: "t2kg_primekg_nodes_cellular_component"
59
+ collection_nodes_biological_process: "t2kg_primekg_nodes_biological_process"
60
+ collection_nodes_drug: "t2kg_primekg_nodes_drug"
61
+ collection_nodes_disease: "t2kg_primekg_nodes_disease"
62
+ query_batch_size: 1000000
63
+ cache_edge_index_path: "aiagents4pharma/talk2knowledgegraphs/tests/files/t2kg_primekg_edge_index.pkl"