aiagents4pharma 1.17.1__py3-none-any.whl → 1.19.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- aiagents4pharma/talk2biomodels/agents/t2b_agent.py +4 -4
- aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +7 -15
- aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +4 -1
- aiagents4pharma/talk2biomodels/tests/test_ask_question.py +4 -2
- aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +4 -2
- aiagents4pharma/talk2biomodels/tests/test_integration.py +34 -30
- aiagents4pharma/talk2biomodels/tests/test_query_article.py +7 -1
- aiagents4pharma/talk2biomodels/tests/test_search_models.py +3 -1
- aiagents4pharma/talk2biomodels/tests/test_steady_state.py +6 -3
- aiagents4pharma/talk2biomodels/tools/ask_question.py +1 -2
- aiagents4pharma/talk2biomodels/tools/custom_plotter.py +23 -10
- aiagents4pharma/talk2biomodels/tools/get_annotation.py +11 -10
- aiagents4pharma/talk2biomodels/tools/query_article.py +6 -2
- aiagents4pharma/talk2biomodels/tools/search_models.py +8 -2
- aiagents4pharma/talk2knowledgegraphs/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +85 -0
- aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +7 -0
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +62 -0
- aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +31 -0
- aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +7 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +6 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +24 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +43 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +9 -0
- aiagents4pharma/talk2knowledgegraphs/states/__init__.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +38 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +110 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +210 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +174 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +154 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +0 -1
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +56 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +18 -42
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +79 -0
- aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +6 -0
- aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +143 -0
- aiagents4pharma/talk2knowledgegraphs/tools/load_arguments.py +22 -0
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +305 -0
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +126 -0
- aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -2
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +81 -0
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +225 -0
- {aiagents4pharma-1.17.1.dist-info → aiagents4pharma-1.19.0.dist-info}/METADATA +12 -3
- {aiagents4pharma-1.17.1.dist-info → aiagents4pharma-1.19.0.dist-info}/RECORD +56 -24
- {aiagents4pharma-1.17.1.dist-info → aiagents4pharma-1.19.0.dist-info}/LICENSE +0 -0
- {aiagents4pharma-1.17.1.dist-info → aiagents4pharma-1.19.0.dist-info}/WHEEL +0 -0
- {aiagents4pharma-1.17.1.dist-info → aiagents4pharma-1.19.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,62 @@
|
|
1
|
+
_target_: agents.t2kg_agent.get_app
|
2
|
+
state_modifier: >
|
3
|
+
You are talk2knowledgegraphs agent, a helpful assistant for reasoning over knowledge graphs.
|
4
|
+
User can ask questions related to the knowledge graphs, and you will provide the answers using
|
5
|
+
the provided tools as follows (if necessary):
|
6
|
+
[`subgraph_extraction`, `subgraph_summarization`, `graphrag_reasoning`].
|
7
|
+
|
8
|
+
**Tools Descriptions**:
|
9
|
+
- `subgraph_extraction`: Extract a subgraph from the knowledge graph that contains the relevant
|
10
|
+
information to answer the user's query. This tool can be used to provide a subgraph context
|
11
|
+
as a part of the reasoning process. The extracted subgraph should contain the most relevant
|
12
|
+
nodes and edges to the user's query in the form of a textualized subgraph.
|
13
|
+
- `subgraph_summarization`: Summarize the extracted textualized subgraph obtained from the
|
14
|
+
`subgraph_extraction` tool. This tool can be used to provide a concise and informative summary
|
15
|
+
of the subgraph to be used for reasoning as subgraph context. This tool highlights the most
|
16
|
+
important nodes and edges in the subgraph to respond to the user's request.
|
17
|
+
- `graphrag_reasoning`: Reason over the extracted textualized subgraph to answer the user's
|
18
|
+
prompt by also considering the context from the extracted subgraph and the retrieved
|
19
|
+
documents. User may also have a set of uploaded files that can be used to provide additional
|
20
|
+
information for reasoning. The history of previous conversations should be considered as well,
|
21
|
+
and you as an agent should provide which conversations can be included as chat history.
|
22
|
+
|
23
|
+
As an agent, you should approach each request by first understanding the user's query and then
|
24
|
+
following the appropriate steps to provide the best answer possible.
|
25
|
+
|
26
|
+
**Execution Steps**:
|
27
|
+
- Understand thoroughly the user's query and think over the best approach to answer it.
|
28
|
+
- You may not need to call any tool for each user's query. Use the related tool(s) as needed.
|
29
|
+
Think deeply whether it is necessary to call any tool to respond to the user's request.
|
30
|
+
- Call `subgraph_extraction` if there is any indication that the user needs to get the
|
31
|
+
information from the knowledge graph, which is not directly available as context in the prompt or
|
32
|
+
in the previous extracted subgraph.
|
33
|
+
If the user asks for subgraph extraction, suggest a value for the `extraction_name` argument.
|
34
|
+
You should always follow it with `subgraph_summarization` as the next tool to be invoked.
|
35
|
+
- Call `subgraph_summarization` tool to summarize the extracted subgraph and provide
|
36
|
+
a useful insights over the subgraph. This tool also has the ability to filter endotypes
|
37
|
+
in the forms of differentially expressed genes that are relevant to the input query. Make sure
|
38
|
+
to include the most relevant genes if the user provides endotype-related documents.
|
39
|
+
The summary of the subgraph will be stored as `graph_summary` in the state in which you can use
|
40
|
+
it for reasoning over the subgraph in the `graphrag_reasoning` tool afterwards.
|
41
|
+
- If the user asks follow-up questions related to the extracted subgraph, you should
|
42
|
+
call `subgraph_summarization` followed by `graphrag_reasoning` tools if you think
|
43
|
+
the answer can be retrieved from the previously extracted subgraph.
|
44
|
+
- Call `graphrag_reasoning` tool to reason over the extracted subgraph and documents.
|
45
|
+
Always perform reasoning over the extracted subgraph and documents to provide
|
46
|
+
the best possible answer to the user's query. Before calling this tool,
|
47
|
+
make sure you have access to the summarized subgraph obtained from `subgraph_summarization` tool.
|
48
|
+
- By default, if the user asks for a specific question about the extracted graph, you should
|
49
|
+
call `subgraph_summarization` followed by `graphrag_reasoning` if the most recent subgraphs
|
50
|
+
contain the relevant information to answer the user's question.
|
51
|
+
Use the summarized subgraph as the subgraph context in the `graphrag_reasoning` tool.
|
52
|
+
- It is strongly recommended to avoid calling the same tool multiple times unless
|
53
|
+
it is necessary to get the correct and thorough answer to the user's request.
|
54
|
+
|
55
|
+
**Tool Calling Workflow Examples**:
|
56
|
+
- `subgraph_extraction` -> `subgraph_summarization` -> `graphrag_reasoning` when the user asks
|
57
|
+
for specific instructions to extract the subgraph and reason over it. Follow this order to
|
58
|
+
provide the most accurate and relevant information if you think the currently available context
|
59
|
+
is not enough to answer the user's question.
|
60
|
+
- `subgraph_summarization` -> `graphrag_reasoning` when the user asks for the previously extracted
|
61
|
+
subgraph. Use the summarized subgraph as the subgraph context in the `graphrag_reasoning` tool.
|
62
|
+
- Do not call `graphrag_reasoning` tool without calling `subgraph_summarization` tool first.
|
@@ -0,0 +1,31 @@
|
|
1
|
+
_target_: app.frontend.streamlit_app_talk2knowledgegraphs
|
2
|
+
default_user: "talk2kg_user"
|
3
|
+
data_package_allowed_file_types:
|
4
|
+
- "pdf"
|
5
|
+
endotype_allowed_file_types:
|
6
|
+
- "pdf"
|
7
|
+
upload_data_dir: "../files"
|
8
|
+
kg_name: "PrimeKG"
|
9
|
+
kg_pyg_path: "aiagents4pharma/talk2knowledgegraphs/tests/files/primekg_ibd_pyg_graph.pkl"
|
10
|
+
kg_text_path: "aiagents4pharma/talk2knowledgegraphs/tests/files/primekg_ibd_text_graph.pkl"
|
11
|
+
openai_api_key: ${oc.env:OPENAI_API_KEY}
|
12
|
+
openai_llms:
|
13
|
+
- "gpt-4o-mini"
|
14
|
+
- "gpt-4-turbo"
|
15
|
+
- "gpt-3.5-turbo"
|
16
|
+
openai_embeddings:
|
17
|
+
- "text-embedding-3-small"
|
18
|
+
ollama_llms:
|
19
|
+
- "llama3.2:1b"
|
20
|
+
- "llama3.2"
|
21
|
+
- "llama3.1"
|
22
|
+
ollama_embeddings:
|
23
|
+
- "nomic-embed-text"
|
24
|
+
temperature: 0.1
|
25
|
+
streaming: False
|
26
|
+
reasoning_subgraph_topk_nodes: 10
|
27
|
+
reasoning_subgraph_topk_nodes_min: 1
|
28
|
+
reasoning_subgraph_topk_nodes_max: 100
|
29
|
+
reasoning_subgraph_topk_edges: 10
|
30
|
+
reasoning_subgraph_topk_edges_min: 1
|
31
|
+
reasoning_subgraph_topk_edges_max: 100
|
@@ -0,0 +1,24 @@
|
|
1
|
+
_target_: talk2knowledgegraphs.tools.graphrag_reasoning
|
2
|
+
splitter_chunk_size: 1024
|
3
|
+
splitter_chunk_overlap: 256
|
4
|
+
retriever_search_type: "mmr"
|
5
|
+
retriever_k: 3
|
6
|
+
retriever_fetch_k: 10
|
7
|
+
retriever_lambda_mult: 0.3
|
8
|
+
prompt_graphrag_w_docs_context: >
|
9
|
+
Given a chat history and the latest user question, which might reference context
|
10
|
+
in the chat history, formulate a standalone question that can be understood
|
11
|
+
without the chat history. Do NOT answer the question, just reformulate it if needed
|
12
|
+
and otherwise return it as is.
|
13
|
+
|
14
|
+
Question: {input}
|
15
|
+
prompt_graphrag_w_docs: >
|
16
|
+
You are talk2knowledgegraphs, a helpful assistant performing retrievel-augmented generation (RAG)
|
17
|
+
over knowledge graphs.
|
18
|
+
One of your tasks is to answer react-based questions by using the following pieces of
|
19
|
+
retrieved context to answer the question. You can leverage a summarization of the subgraph
|
20
|
+
and the retrieved documents to provide the best possible answer to the user's query.
|
21
|
+
|
22
|
+
Subgraph Summary: {subgraph_summary}
|
23
|
+
Context: {context}
|
24
|
+
Question: {input}
|
@@ -0,0 +1,43 @@
|
|
1
|
+
_target_: talk2knowledgegraphs.tools.subgraph_extraction
|
2
|
+
ollama_embeddings:
|
3
|
+
- "nomic-embed-text"
|
4
|
+
temperature: 0.1
|
5
|
+
streaming: False
|
6
|
+
topk: 5
|
7
|
+
topk_e: 5
|
8
|
+
cost_e: 0.5
|
9
|
+
c_const: 0.01
|
10
|
+
root: -1
|
11
|
+
num_clusters: 1
|
12
|
+
pruning: "gw"
|
13
|
+
verbosity_level: 0
|
14
|
+
node_id_column: "node_id"
|
15
|
+
node_attr_column: "node_attr"
|
16
|
+
edge_src_column: "edge_src"
|
17
|
+
edge_attr_column: "edge_attr"
|
18
|
+
edge_dst_column: "edge_dst"
|
19
|
+
prompt_endotype_filtering: >
|
20
|
+
You are talk2knowledgegraphs agent, a helpful assistant in filtering the most relevant endotype
|
21
|
+
for the subgraph extraction process.
|
22
|
+
Given the retrieved endotype documents, you need to filter the most relevant
|
23
|
+
endotype that will be used for the following reasoning process.
|
24
|
+
Only included a list of genes that exist in the provided documents
|
25
|
+
that are relevant to the input query.
|
26
|
+
For this task, you may modify your prompt to optimize the filtering process
|
27
|
+
based on factual informationbetween each gene in the documents and the input query.
|
28
|
+
Discover as many genes as possible that are relevant for enriching the subgraph extraction process.
|
29
|
+
|
30
|
+
You do not need to include any other information in the output.
|
31
|
+
Use the following output format:
|
32
|
+
[gene_1, gene_2, ..., gene_n]
|
33
|
+
|
34
|
+
{context}
|
35
|
+
Input: {input}
|
36
|
+
prompt_endotype_addition: >
|
37
|
+
Include the following endotype for the subgraph extraction process:
|
38
|
+
splitter_chunk_size: 64
|
39
|
+
splitter_chunk_overlap: 16
|
40
|
+
retriever_search_type: "mmr"
|
41
|
+
retriever_k: 3
|
42
|
+
retriever_fetch_k: 10
|
43
|
+
retriever_lambda_mult: 0.3
|
@@ -0,0 +1,9 @@
|
|
1
|
+
_target_: talk2knowledgegraphs.tools.subgraph_summarization
|
2
|
+
prompt_subgraph_summarization: >
|
3
|
+
You are talk2knowledgegraphs agent, a helpful assistant in reasoning over biomedical knowledge graph.
|
4
|
+
Your task is to summarize the extracted textualized subgraph to provide a concise and informative
|
5
|
+
summary of the subgraph to be used for reasoning as subgraph context. You are responsible for
|
6
|
+
highlighting the most important nodes and edges in the subgraph to respond to the user's question.
|
7
|
+
|
8
|
+
Textualized Subgraph: {textualized_subgraph}
|
9
|
+
Question: {input}
|
@@ -0,0 +1,38 @@
|
|
1
|
+
"""
|
2
|
+
This is the state file for the Talk2KnowledgeGraphs agent.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import Annotated
|
6
|
+
# import operator
|
7
|
+
from langchain_core.embeddings.embeddings import Embeddings
|
8
|
+
from langchain_core.language_models.chat_models import BaseChatModel
|
9
|
+
from langgraph.prebuilt.chat_agent_executor import AgentState
|
10
|
+
|
11
|
+
|
12
|
+
def add_data(data1: dict, data2: dict) -> dict:
|
13
|
+
"""
|
14
|
+
A reducer function to merge two dictionaries.
|
15
|
+
"""
|
16
|
+
left_idx_by_name = {data["name"]: idx for idx, data in enumerate(data1)}
|
17
|
+
merged = data1.copy()
|
18
|
+
for data in data2:
|
19
|
+
idx = left_idx_by_name.get(data["name"])
|
20
|
+
if idx is not None:
|
21
|
+
merged[idx] = data
|
22
|
+
else:
|
23
|
+
merged.append(data)
|
24
|
+
return merged
|
25
|
+
|
26
|
+
|
27
|
+
class Talk2KnowledgeGraphs(AgentState):
|
28
|
+
"""
|
29
|
+
The state for the Talk2KnowledgeGraphs agent.
|
30
|
+
"""
|
31
|
+
|
32
|
+
llm_model: BaseChatModel
|
33
|
+
embedding_model: Embeddings
|
34
|
+
uploaded_files: list
|
35
|
+
topk_nodes: int
|
36
|
+
topk_edges: int
|
37
|
+
dic_source_graph: Annotated[list[dict], add_data]
|
38
|
+
dic_extracted_graph: Annotated[list[dict], add_data]
|
@@ -0,0 +1,110 @@
|
|
1
|
+
"""
|
2
|
+
Test cases for agents/t2kg_agent.py
|
3
|
+
"""
|
4
|
+
|
5
|
+
import pytest
|
6
|
+
from langchain_core.messages import HumanMessage
|
7
|
+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
8
|
+
from ..agents.t2kg_agent import get_app
|
9
|
+
|
10
|
+
# Define the data path
|
11
|
+
DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
|
12
|
+
|
13
|
+
|
14
|
+
@pytest.fixture(name="input_dict")
|
15
|
+
def input_dict_fixture():
|
16
|
+
"""
|
17
|
+
Input dictionary fixture.
|
18
|
+
"""
|
19
|
+
input_dict = {
|
20
|
+
"llm_model": None, # TBA for each test case
|
21
|
+
"embedding_model": None, # TBA for each test case
|
22
|
+
"uploaded_files": [
|
23
|
+
{
|
24
|
+
"file_name": "adalimumab.pdf",
|
25
|
+
"file_path": f"{DATA_PATH}/adalimumab.pdf",
|
26
|
+
"file_type": "drug_data",
|
27
|
+
"uploaded_by": "VPEUser",
|
28
|
+
"uploaded_timestamp": "2024-11-05 00:00:00",
|
29
|
+
},
|
30
|
+
{
|
31
|
+
"file_name": "DGE_human_Colon_UC-vs-Colon_Control.pdf",
|
32
|
+
"file_path": f"{DATA_PATH}/DGE_human_Colon_UC-vs-Colon_Control.pdf",
|
33
|
+
"file_type": "endotype",
|
34
|
+
"uploaded_by": "VPEUser",
|
35
|
+
"uploaded_timestamp": "2024-11-05 00:00:00",
|
36
|
+
},
|
37
|
+
],
|
38
|
+
"topk_nodes": 3,
|
39
|
+
"topk_edges": 3,
|
40
|
+
"dic_source_graph": [
|
41
|
+
{
|
42
|
+
"name": "PrimeKG",
|
43
|
+
"kg_pyg_path": f"{DATA_PATH}/primekg_ibd_pyg_graph.pkl",
|
44
|
+
"kg_text_path": f"{DATA_PATH}/primekg_ibd_text_graph.pkl",
|
45
|
+
}
|
46
|
+
],
|
47
|
+
"dic_extracted_graph": []
|
48
|
+
}
|
49
|
+
|
50
|
+
return input_dict
|
51
|
+
|
52
|
+
|
53
|
+
def test_t2kg_agent_openai(input_dict):
|
54
|
+
"""
|
55
|
+
Test the T2KG agent using OpenAI model.
|
56
|
+
|
57
|
+
Args:
|
58
|
+
input_dict: Input dictionary
|
59
|
+
"""
|
60
|
+
# Prepare LLM and embedding model
|
61
|
+
input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
|
62
|
+
input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
|
63
|
+
|
64
|
+
# Setup the app
|
65
|
+
unique_id = 12345
|
66
|
+
app = get_app(unique_id, llm_model=input_dict["llm_model"])
|
67
|
+
config = {"configurable": {"thread_id": unique_id}}
|
68
|
+
# Update state
|
69
|
+
app.update_state(
|
70
|
+
config,
|
71
|
+
input_dict,
|
72
|
+
)
|
73
|
+
prompt = """
|
74
|
+
Adalimumab is a fully human monoclonal antibody (IgG1)
|
75
|
+
that specifically binds to tumor necrosis factor-alpha (TNF-α), a pro-inflammatory cytokine.
|
76
|
+
|
77
|
+
I would like to get evidence from the knowledge graph about the mechanism of actions related to
|
78
|
+
Adalimumab in treating inflammatory bowel disease
|
79
|
+
(IBD). Please follow these steps:
|
80
|
+
- Extract a subgraph from the PrimeKG that contains information about Adalimumab.
|
81
|
+
- Summarize the extracted subgraph.
|
82
|
+
- Reason about the mechanism of action of Adalimumab in treating IBD.
|
83
|
+
|
84
|
+
Please set the extraction name for the extraction process as `subkg_12345`.
|
85
|
+
"""
|
86
|
+
|
87
|
+
# Test the tool get_modelinfo
|
88
|
+
response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
|
89
|
+
|
90
|
+
# Check assistant message
|
91
|
+
assistant_msg = response["messages"][-1].content
|
92
|
+
assert isinstance(assistant_msg, str)
|
93
|
+
|
94
|
+
# Check extracted subgraph dictionary
|
95
|
+
current_state = app.get_state(config)
|
96
|
+
dic_extracted_graph = current_state.values["dic_extracted_graph"][0]
|
97
|
+
assert isinstance(dic_extracted_graph, dict)
|
98
|
+
assert dic_extracted_graph["name"] == "subkg_12345"
|
99
|
+
assert dic_extracted_graph["graph_source"] == "PrimeKG"
|
100
|
+
assert dic_extracted_graph["topk_nodes"] == 3
|
101
|
+
assert dic_extracted_graph["topk_edges"] == 3
|
102
|
+
assert isinstance(dic_extracted_graph["graph_dict"], dict)
|
103
|
+
assert len(dic_extracted_graph["graph_dict"]["nodes"]) > 0
|
104
|
+
assert len(dic_extracted_graph["graph_dict"]["edges"]) > 0
|
105
|
+
assert isinstance(dic_extracted_graph["graph_text"], str)
|
106
|
+
# Check summarized subgraph
|
107
|
+
assert isinstance(dic_extracted_graph["graph_summary"], str)
|
108
|
+
# Check reasoning results
|
109
|
+
assert "Adalimumab" in assistant_msg
|
110
|
+
assert "TNF" in assistant_msg
|
@@ -0,0 +1,210 @@
|
|
1
|
+
"""
|
2
|
+
Test cases for tools/graphrag_reasoning.py
|
3
|
+
"""
|
4
|
+
|
5
|
+
import pytest
|
6
|
+
from langchain_core.messages import HumanMessage
|
7
|
+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
8
|
+
from ..agents.t2kg_agent import get_app
|
9
|
+
|
10
|
+
# Define the data path
|
11
|
+
DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
|
12
|
+
|
13
|
+
|
14
|
+
@pytest.fixture(name="input_dict")
|
15
|
+
def input_dict_fixture():
|
16
|
+
"""
|
17
|
+
Input dictionary fixture.
|
18
|
+
"""
|
19
|
+
input_dict = {
|
20
|
+
"llm_model": None, # TBA for each test case
|
21
|
+
"embedding_model": None, # TBA for each test case
|
22
|
+
"uploaded_files": [
|
23
|
+
{
|
24
|
+
"file_name": "adalimumab.pdf",
|
25
|
+
"file_path": f"{DATA_PATH}/adalimumab.pdf",
|
26
|
+
"file_type": "drug_data",
|
27
|
+
"uploaded_by": "VPEUser",
|
28
|
+
"uploaded_timestamp": "2024-11-05 00:00:00",
|
29
|
+
},
|
30
|
+
{
|
31
|
+
"file_name": "DGE_human_Colon_UC-vs-Colon_Control.pdf",
|
32
|
+
"file_path": f"{DATA_PATH}/DGE_human_Colon_UC-vs-Colon_Control.pdf",
|
33
|
+
"file_type": "endotype",
|
34
|
+
"uploaded_by": "VPEUser",
|
35
|
+
"uploaded_timestamp": "2024-11-05 00:00:00",
|
36
|
+
},
|
37
|
+
],
|
38
|
+
"topk_nodes": 3,
|
39
|
+
"topk_edges": 3,
|
40
|
+
"dic_source_graph": [
|
41
|
+
{
|
42
|
+
"name": "PrimeKG",
|
43
|
+
"kg_pyg_path": f"{DATA_PATH}/primekg_ibd_pyg_graph.pkl",
|
44
|
+
"kg_text_path": f"{DATA_PATH}/primekg_ibd_text_graph.pkl",
|
45
|
+
}
|
46
|
+
],
|
47
|
+
"dic_extracted_graph": [
|
48
|
+
{
|
49
|
+
"name": "subkg_12345",
|
50
|
+
"tool_call_id": "tool_12345",
|
51
|
+
"graph_source": "PrimeKG",
|
52
|
+
"topk_nodes": 3,
|
53
|
+
"topk_edges": 3,
|
54
|
+
"graph_dict": {
|
55
|
+
'nodes': [('IFNG_(3495)', {}),
|
56
|
+
('IKBKG_(3672)', {}),
|
57
|
+
('ATG16L1_(6661)', {}),
|
58
|
+
('inflammatory bowel disease_(28158)', {}),
|
59
|
+
('Crohn ileitis and jejunitis_(35814)', {}),
|
60
|
+
("Crohn's colitis_(83770)", {})],
|
61
|
+
'edges': [('IFNG_(3495)', 'inflammatory bowel disease_(28158)',
|
62
|
+
{'relation': ['gene/protein', 'associated with', 'disease'],
|
63
|
+
'label': ['gene/protein', 'associated with', 'disease']}),
|
64
|
+
('IFNG_(3495)', "Crohn's colitis_(83770)",
|
65
|
+
{'relation': ['gene/protein', 'associated with', 'disease'],
|
66
|
+
'label': ['gene/protein', 'associated with', 'disease']}),
|
67
|
+
('IFNG_(3495)', 'Crohn ileitis and jejunitis_(35814)',
|
68
|
+
{'relation': ['gene/protein', 'associated with', 'disease'],
|
69
|
+
'label': ['gene/protein', 'associated with', 'disease']}),
|
70
|
+
('ATG16L1_(6661)', 'IKBKG_(3672)',
|
71
|
+
{'relation': ['gene/protein', 'ppi', 'gene/protein'],
|
72
|
+
'label': ['gene/protein', 'ppi', 'gene/protein']}),
|
73
|
+
("Crohn's colitis_(83770)", 'ATG16L1_(6661)',
|
74
|
+
{'relation': ['disease', 'associated with', 'gene/protein'],
|
75
|
+
'label': ['disease', 'associated with', 'gene/protein']})]},
|
76
|
+
"graph_text": """
|
77
|
+
node_id,node_attr
|
78
|
+
IFNG_(3495),"IFNG belongs to gene/protein category.
|
79
|
+
This gene encodes a soluble cytokine that is a member of the type II interferon class.
|
80
|
+
The encoded protein is secreted by cells of both the innate and adaptive immune systems.
|
81
|
+
The active protein is a homodimer that binds to the interferon gamma receptor
|
82
|
+
which triggers a cellular response to viral and microbial infections.
|
83
|
+
Mutations in this gene are associated with an increased susceptibility to viral,
|
84
|
+
bacterial and parasitic infections and to several autoimmune diseases.
|
85
|
+
[provided by RefSeq, Dec 2015]."
|
86
|
+
IKBKG_(3672),"IKBKG belongs to gene/protein category. This gene encodes the regulatory
|
87
|
+
subunit of the inhibitor of kappaB kinase (IKK) complex, which activates NF-kappaB
|
88
|
+
resulting in activation of genes involved in inflammation, immunity, cell survival,
|
89
|
+
and other pathways. Mutations in this gene result in incontinentia pigmenti,
|
90
|
+
hypohidrotic ectodermal dysplasia, and several other types of immunodeficiencies.
|
91
|
+
A pseudogene highly similar to this locus is located in an adjacent region of the
|
92
|
+
X chromosome. [provided by RefSeq, Mar 2016]."
|
93
|
+
ATG16L1_(6661),"ATG16L1 belongs to gene/protein category. The protein encoded
|
94
|
+
by this gene is part of a large protein complex that is necessary for autophagy,
|
95
|
+
the major process by which intracellular components are targeted to lysosomes
|
96
|
+
for degradation. Defects in this gene are a cause of susceptibility to inflammatory
|
97
|
+
bowel disease type 10 (IBD10). Several transcript variants encoding different
|
98
|
+
isoforms have been found for this gene.[provided by RefSeq, Jun 2010]."
|
99
|
+
inflammatory bowel disease_(28158),inflammatory bowel disease belongs to disease
|
100
|
+
category. Any inflammatory bowel disease in which the cause of the disease
|
101
|
+
is a mutation in the NOD2 gene.
|
102
|
+
Crohn ileitis and jejunitis_(35814),Crohn ileitis and jejunitis belongs to
|
103
|
+
disease category. An Crohn disease involving a pathogenic inflammatory
|
104
|
+
response in the ileum.
|
105
|
+
Crohn's colitis_(83770),Crohn's colitis belongs to disease category.
|
106
|
+
Crohn's disease affecting the colon.
|
107
|
+
|
108
|
+
head_id,edge_type,tail_id
|
109
|
+
Crohn's colitis_(83770),"('disease', 'associated with', 'gene/protein')",
|
110
|
+
ATG16L1_(6661)
|
111
|
+
ATG16L1_(6661),"('gene/protein', 'ppi', 'gene/protein')",IKBKG_(3672)
|
112
|
+
IFNG_(3495),"('gene/protein', 'associated with', 'disease')",
|
113
|
+
inflammatory bowel disease_(28158)
|
114
|
+
IFNG_(3495),"('gene/protein', 'associated with', 'disease')",Crohn's colitis_(83770)
|
115
|
+
IFNG_(3495),"('gene/protein', 'associated with', 'disease')",
|
116
|
+
Crohn ileitis and jejunitis_(35814)
|
117
|
+
""",
|
118
|
+
"graph_summary": """
|
119
|
+
The subgraph extracted from `subkg_12345` includes several important genes and
|
120
|
+
their associations with inflammatory bowel diseases, particularly Crohn's disease.
|
121
|
+
|
122
|
+
Key Nodes:
|
123
|
+
1. **IFNG (Interferon gamma)**: This gene encodes a cytokine that plays a crucial
|
124
|
+
role in immune response. It is associated with several diseases, including
|
125
|
+
inflammatory bowel disease and specifically Crohn's colitis and Crohn ileitis and
|
126
|
+
jejunitis. Mutations in IFNG can lead to increased susceptibility to infections
|
127
|
+
and autoimmune diseases.
|
128
|
+
|
129
|
+
2. **IKBKG (Inhibitor of kappaB kinase gamma)**: This gene is involved in the
|
130
|
+
regulation of NF-kappaB, which is critical for inflammation and immune responses.
|
131
|
+
Mutations can lead to immunodeficiencies and other disorders.
|
132
|
+
|
133
|
+
3. **ATG16L1**: This gene is essential for autophagy, a process that helps in
|
134
|
+
degrading intracellular components. Defects in ATG16L1 are linked to inflammatory
|
135
|
+
bowel disease type 10 (IBD10) and are associated with Crohn's colitis.
|
136
|
+
|
137
|
+
4. **Inflammatory Bowel Disease**: A category of diseases characterized by
|
138
|
+
chronic inflammation of the gastrointestinal tract, with specific mention of
|
139
|
+
mutations in the NOD2 gene as a cause.
|
140
|
+
|
141
|
+
5. **Crohn's Colitis**: A specific type of Crohn's disease affecting the colon,
|
142
|
+
indicating a pathogenic inflammatory response.
|
143
|
+
|
144
|
+
6. **Crohn Ileitis and Jejunitis**: Another form of Crohn's disease that involves
|
145
|
+
inflammation in the ileum.
|
146
|
+
|
147
|
+
Key Edges:
|
148
|
+
- **IFNG is associated with inflammatory bowel disease, Crohn's colitis, and
|
149
|
+
Crohn ileitis and jejunitis**: This highlights the role of IFNG in these diseases.
|
150
|
+
- **ATG16L1 is associated with Crohn's colitis**: This indicates a direct link
|
151
|
+
between the gene and the disease.
|
152
|
+
- **ATG16L1 interacts with IKBKG**: This protein-protein interaction suggests a
|
153
|
+
functional relationship between these two genes in the context of immune response
|
154
|
+
and inflammation.
|
155
|
+
|
156
|
+
In summary, the subgraph illustrates the connections between key genes
|
157
|
+
(IFNG, IKBKG, ATG16L1) and their associations with inflammatory bowel diseases,
|
158
|
+
particularly Crohn's disease, emphasizing the genetic underpinnings of these conditions.
|
159
|
+
""",
|
160
|
+
}
|
161
|
+
],
|
162
|
+
}
|
163
|
+
|
164
|
+
return input_dict
|
165
|
+
|
166
|
+
|
167
|
+
def test_graphrag_reasoning_openai(input_dict):
|
168
|
+
"""
|
169
|
+
Test the GraphRAG reasoning tool using OpenAI model.
|
170
|
+
|
171
|
+
Args:
|
172
|
+
input_dict: Input dictionary
|
173
|
+
"""
|
174
|
+
# Prepare LLM and embedding model
|
175
|
+
input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
|
176
|
+
input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
|
177
|
+
|
178
|
+
# Setup the app
|
179
|
+
unique_id = 12345
|
180
|
+
app = get_app(unique_id, llm_model=input_dict["llm_model"])
|
181
|
+
config = {"configurable": {"thread_id": unique_id}}
|
182
|
+
# Update state
|
183
|
+
app.update_state(
|
184
|
+
config,
|
185
|
+
input_dict,
|
186
|
+
)
|
187
|
+
prompt = """
|
188
|
+
Without extracting a new subgraph, based on subgraph extracted from `subkg_12345`
|
189
|
+
perform Graph RAG reasoning to get insights related to nodes of genes
|
190
|
+
mentioned in the knowledge graph related to Adalimumab.
|
191
|
+
|
192
|
+
Here is an additional context:
|
193
|
+
Adalimumab is a fully human monoclonal antibody (IgG1)
|
194
|
+
that specifically binds to tumor necrosis factor-alpha (TNF-α), a pro-inflammatory cytokine.
|
195
|
+
"""
|
196
|
+
|
197
|
+
# Test the tool graphrag_reasoning
|
198
|
+
response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
|
199
|
+
|
200
|
+
# Check assistant message
|
201
|
+
assistant_msg = response["messages"][-1].content
|
202
|
+
assert isinstance(assistant_msg, str)
|
203
|
+
|
204
|
+
# Check tool message
|
205
|
+
tool_msg = response["messages"][-2]
|
206
|
+
assert tool_msg.name == "graphrag_reasoning"
|
207
|
+
|
208
|
+
# Check reasoning results
|
209
|
+
assert "Adalimumab" in assistant_msg
|
210
|
+
assert "TNF" in assistant_msg
|