aiagents4pharma 1.17.1__py3-none-any.whl → 1.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/talk2biomodels/agents/t2b_agent.py +4 -4
- aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +7 -15
- aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +4 -1
- aiagents4pharma/talk2biomodels/tests/test_ask_question.py +4 -2
- aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +4 -2
- aiagents4pharma/talk2biomodels/tests/test_integration.py +34 -30
- aiagents4pharma/talk2biomodels/tests/test_query_article.py +7 -1
- aiagents4pharma/talk2biomodels/tests/test_search_models.py +3 -1
- aiagents4pharma/talk2biomodels/tests/test_steady_state.py +6 -3
- aiagents4pharma/talk2biomodels/tools/ask_question.py +1 -2
- aiagents4pharma/talk2biomodels/tools/custom_plotter.py +23 -10
- aiagents4pharma/talk2biomodels/tools/get_annotation.py +11 -10
- aiagents4pharma/talk2biomodels/tools/query_article.py +6 -2
- aiagents4pharma/talk2biomodels/tools/search_models.py +8 -2
- aiagents4pharma/talk2knowledgegraphs/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +85 -0
- aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +7 -0
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +62 -0
- aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +31 -0
- aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +7 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +6 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +24 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +43 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +9 -0
- aiagents4pharma/talk2knowledgegraphs/states/__init__.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +38 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +110 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +210 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +174 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +154 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +0 -1
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +56 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +18 -42
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +79 -0
- aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +6 -0
- aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +143 -0
- aiagents4pharma/talk2knowledgegraphs/tools/load_arguments.py +22 -0
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +305 -0
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +126 -0
- aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -2
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +81 -0
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +225 -0
- {aiagents4pharma-1.17.1.dist-info → aiagents4pharma-1.19.0.dist-info}/METADATA +12 -3
- {aiagents4pharma-1.17.1.dist-info → aiagents4pharma-1.19.0.dist-info}/RECORD +56 -24
- {aiagents4pharma-1.17.1.dist-info → aiagents4pharma-1.19.0.dist-info}/LICENSE +0 -0
- {aiagents4pharma-1.17.1.dist-info → aiagents4pharma-1.19.0.dist-info}/WHEEL +0 -0
- {aiagents4pharma-1.17.1.dist-info → aiagents4pharma-1.19.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,62 @@
|
|
1
|
+
_target_: agents.t2kg_agent.get_app
|
2
|
+
state_modifier: >
|
3
|
+
You are talk2knowledgegraphs agent, a helpful assistant for reasoning over knowledge graphs.
|
4
|
+
User can ask questions related to the knowledge graphs, and you will provide the answers using
|
5
|
+
the provided tools as follows (if necessary):
|
6
|
+
[`subgraph_extraction`, `subgraph_summarization`, `graphrag_reasoning`].
|
7
|
+
|
8
|
+
**Tools Descriptions**:
|
9
|
+
- `subgraph_extraction`: Extract a subgraph from the knowledge graph that contains the relevant
|
10
|
+
information to answer the user's query. This tool can be used to provide a subgraph context
|
11
|
+
as a part of the reasoning process. The extracted subgraph should contain the most relevant
|
12
|
+
nodes and edges to the user's query in the form of a textualized subgraph.
|
13
|
+
- `subgraph_summarization`: Summarize the extracted textualized subgraph obtained from the
|
14
|
+
`subgraph_extraction` tool. This tool can be used to provide a concise and informative summary
|
15
|
+
of the subgraph to be used for reasoning as subgraph context. This tool highlights the most
|
16
|
+
important nodes and edges in the subgraph to respond to the user's request.
|
17
|
+
- `graphrag_reasoning`: Reason over the extracted textualized subgraph to answer the user's
|
18
|
+
prompt by also considering the context from the extracted subgraph and the retrieved
|
19
|
+
documents. User may also have a set of uploaded files that can be used to provide additional
|
20
|
+
information for reasoning. The history of previous conversations should be considered as well,
|
21
|
+
and you as an agent should provide which conversations can be included as chat history.
|
22
|
+
|
23
|
+
As an agent, you should approach each request by first understanding the user's query and then
|
24
|
+
following the appropriate steps to provide the best answer possible.
|
25
|
+
|
26
|
+
**Execution Steps**:
|
27
|
+
- Understand thoroughly the user's query and think over the best approach to answer it.
|
28
|
+
- You may not need to call any tool for each user's query. Use the related tool(s) as needed.
|
29
|
+
Think deeply whether it is necessary to call any tool to respond to the user's request.
|
30
|
+
- Call `subgraph_extraction` if there is any indication that the user needs to get the
|
31
|
+
information from the knowledge graph, which is not directly available as context in the prompt or
|
32
|
+
in the previous extracted subgraph.
|
33
|
+
If the user asks for subgraph extraction, suggest a value for the `extraction_name` argument.
|
34
|
+
You should always follow it with `subgraph_summarization` as the next tool to be invoked.
|
35
|
+
- Call `subgraph_summarization` tool to summarize the extracted subgraph and provide
|
36
|
+
a useful insights over the subgraph. This tool also has the ability to filter endotypes
|
37
|
+
in the forms of differentially expressed genes that are relevant to the input query. Make sure
|
38
|
+
to include the most relevant genes if the user provides endotype-related documents.
|
39
|
+
The summary of the subgraph will be stored as `graph_summary` in the state in which you can use
|
40
|
+
it for reasoning over the subgraph in the `graphrag_reasoning` tool afterwards.
|
41
|
+
- If the user asks follow-up questions related to the extracted subgraph, you should
|
42
|
+
call `subgraph_summarization` followed by `graphrag_reasoning` tools if you think
|
43
|
+
the answer can be retrieved from the previously extracted subgraph.
|
44
|
+
- Call `graphrag_reasoning` tool to reason over the extracted subgraph and documents.
|
45
|
+
Always perform reasoning over the extracted subgraph and documents to provide
|
46
|
+
the best possible answer to the user's query. Before calling this tool,
|
47
|
+
make sure you have access to the summarized subgraph obtained from `subgraph_summarization` tool.
|
48
|
+
- By default, if the user asks for a specific question about the extracted graph, you should
|
49
|
+
call `subgraph_summarization` followed by `graphrag_reasoning` if the most recent subgraphs
|
50
|
+
contain the relevant information to answer the user's question.
|
51
|
+
Use the summarized subgraph as the subgraph context in the `graphrag_reasoning` tool.
|
52
|
+
- It is strongly recommended to avoid calling the same tool multiple times unless
|
53
|
+
it is necessary to get the correct and thorough answer to the user's request.
|
54
|
+
|
55
|
+
**Tool Calling Workflow Examples**:
|
56
|
+
- `subgraph_extraction` -> `subgraph_summarization` -> `graphrag_reasoning` when the user asks
|
57
|
+
for specific instructions to extract the subgraph and reason over it. Follow this order to
|
58
|
+
provide the most accurate and relevant information if you think the currently available context
|
59
|
+
is not enough to answer the user's question.
|
60
|
+
- `subgraph_summarization` -> `graphrag_reasoning` when the user asks for the previously extracted
|
61
|
+
subgraph. Use the summarized subgraph as the subgraph context in the `graphrag_reasoning` tool.
|
62
|
+
- Do not call `graphrag_reasoning` tool without calling `subgraph_summarization` tool first.
|
@@ -0,0 +1,31 @@
|
|
1
|
+
_target_: app.frontend.streamlit_app_talk2knowledgegraphs
|
2
|
+
default_user: "talk2kg_user"
|
3
|
+
data_package_allowed_file_types:
|
4
|
+
- "pdf"
|
5
|
+
endotype_allowed_file_types:
|
6
|
+
- "pdf"
|
7
|
+
upload_data_dir: "../files"
|
8
|
+
kg_name: "PrimeKG"
|
9
|
+
kg_pyg_path: "aiagents4pharma/talk2knowledgegraphs/tests/files/primekg_ibd_pyg_graph.pkl"
|
10
|
+
kg_text_path: "aiagents4pharma/talk2knowledgegraphs/tests/files/primekg_ibd_text_graph.pkl"
|
11
|
+
openai_api_key: ${oc.env:OPENAI_API_KEY}
|
12
|
+
openai_llms:
|
13
|
+
- "gpt-4o-mini"
|
14
|
+
- "gpt-4-turbo"
|
15
|
+
- "gpt-3.5-turbo"
|
16
|
+
openai_embeddings:
|
17
|
+
- "text-embedding-3-small"
|
18
|
+
ollama_llms:
|
19
|
+
- "llama3.2:1b"
|
20
|
+
- "llama3.2"
|
21
|
+
- "llama3.1"
|
22
|
+
ollama_embeddings:
|
23
|
+
- "nomic-embed-text"
|
24
|
+
temperature: 0.1
|
25
|
+
streaming: False
|
26
|
+
reasoning_subgraph_topk_nodes: 10
|
27
|
+
reasoning_subgraph_topk_nodes_min: 1
|
28
|
+
reasoning_subgraph_topk_nodes_max: 100
|
29
|
+
reasoning_subgraph_topk_edges: 10
|
30
|
+
reasoning_subgraph_topk_edges_min: 1
|
31
|
+
reasoning_subgraph_topk_edges_max: 100
|
@@ -0,0 +1,24 @@
|
|
1
|
+
_target_: talk2knowledgegraphs.tools.graphrag_reasoning
|
2
|
+
splitter_chunk_size: 1024
|
3
|
+
splitter_chunk_overlap: 256
|
4
|
+
retriever_search_type: "mmr"
|
5
|
+
retriever_k: 3
|
6
|
+
retriever_fetch_k: 10
|
7
|
+
retriever_lambda_mult: 0.3
|
8
|
+
prompt_graphrag_w_docs_context: >
|
9
|
+
Given a chat history and the latest user question, which might reference context
|
10
|
+
in the chat history, formulate a standalone question that can be understood
|
11
|
+
without the chat history. Do NOT answer the question, just reformulate it if needed
|
12
|
+
and otherwise return it as is.
|
13
|
+
|
14
|
+
Question: {input}
|
15
|
+
prompt_graphrag_w_docs: >
|
16
|
+
You are talk2knowledgegraphs, a helpful assistant performing retrievel-augmented generation (RAG)
|
17
|
+
over knowledge graphs.
|
18
|
+
One of your tasks is to answer react-based questions by using the following pieces of
|
19
|
+
retrieved context to answer the question. You can leverage a summarization of the subgraph
|
20
|
+
and the retrieved documents to provide the best possible answer to the user's query.
|
21
|
+
|
22
|
+
Subgraph Summary: {subgraph_summary}
|
23
|
+
Context: {context}
|
24
|
+
Question: {input}
|
@@ -0,0 +1,43 @@
|
|
1
|
+
_target_: talk2knowledgegraphs.tools.subgraph_extraction
|
2
|
+
ollama_embeddings:
|
3
|
+
- "nomic-embed-text"
|
4
|
+
temperature: 0.1
|
5
|
+
streaming: False
|
6
|
+
topk: 5
|
7
|
+
topk_e: 5
|
8
|
+
cost_e: 0.5
|
9
|
+
c_const: 0.01
|
10
|
+
root: -1
|
11
|
+
num_clusters: 1
|
12
|
+
pruning: "gw"
|
13
|
+
verbosity_level: 0
|
14
|
+
node_id_column: "node_id"
|
15
|
+
node_attr_column: "node_attr"
|
16
|
+
edge_src_column: "edge_src"
|
17
|
+
edge_attr_column: "edge_attr"
|
18
|
+
edge_dst_column: "edge_dst"
|
19
|
+
prompt_endotype_filtering: >
|
20
|
+
You are talk2knowledgegraphs agent, a helpful assistant in filtering the most relevant endotype
|
21
|
+
for the subgraph extraction process.
|
22
|
+
Given the retrieved endotype documents, you need to filter the most relevant
|
23
|
+
endotype that will be used for the following reasoning process.
|
24
|
+
Only included a list of genes that exist in the provided documents
|
25
|
+
that are relevant to the input query.
|
26
|
+
For this task, you may modify your prompt to optimize the filtering process
|
27
|
+
based on factual informationbetween each gene in the documents and the input query.
|
28
|
+
Discover as many genes as possible that are relevant for enriching the subgraph extraction process.
|
29
|
+
|
30
|
+
You do not need to include any other information in the output.
|
31
|
+
Use the following output format:
|
32
|
+
[gene_1, gene_2, ..., gene_n]
|
33
|
+
|
34
|
+
{context}
|
35
|
+
Input: {input}
|
36
|
+
prompt_endotype_addition: >
|
37
|
+
Include the following endotype for the subgraph extraction process:
|
38
|
+
splitter_chunk_size: 64
|
39
|
+
splitter_chunk_overlap: 16
|
40
|
+
retriever_search_type: "mmr"
|
41
|
+
retriever_k: 3
|
42
|
+
retriever_fetch_k: 10
|
43
|
+
retriever_lambda_mult: 0.3
|
@@ -0,0 +1,9 @@
|
|
1
|
+
_target_: talk2knowledgegraphs.tools.subgraph_summarization
|
2
|
+
prompt_subgraph_summarization: >
|
3
|
+
You are talk2knowledgegraphs agent, a helpful assistant in reasoning over biomedical knowledge graph.
|
4
|
+
Your task is to summarize the extracted textualized subgraph to provide a concise and informative
|
5
|
+
summary of the subgraph to be used for reasoning as subgraph context. You are responsible for
|
6
|
+
highlighting the most important nodes and edges in the subgraph to respond to the user's question.
|
7
|
+
|
8
|
+
Textualized Subgraph: {textualized_subgraph}
|
9
|
+
Question: {input}
|
@@ -0,0 +1,38 @@
|
|
1
|
+
"""
|
2
|
+
This is the state file for the Talk2KnowledgeGraphs agent.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import Annotated
|
6
|
+
# import operator
|
7
|
+
from langchain_core.embeddings.embeddings import Embeddings
|
8
|
+
from langchain_core.language_models.chat_models import BaseChatModel
|
9
|
+
from langgraph.prebuilt.chat_agent_executor import AgentState
|
10
|
+
|
11
|
+
|
12
|
+
def add_data(data1: dict, data2: dict) -> dict:
|
13
|
+
"""
|
14
|
+
A reducer function to merge two dictionaries.
|
15
|
+
"""
|
16
|
+
left_idx_by_name = {data["name"]: idx for idx, data in enumerate(data1)}
|
17
|
+
merged = data1.copy()
|
18
|
+
for data in data2:
|
19
|
+
idx = left_idx_by_name.get(data["name"])
|
20
|
+
if idx is not None:
|
21
|
+
merged[idx] = data
|
22
|
+
else:
|
23
|
+
merged.append(data)
|
24
|
+
return merged
|
25
|
+
|
26
|
+
|
27
|
+
class Talk2KnowledgeGraphs(AgentState):
|
28
|
+
"""
|
29
|
+
The state for the Talk2KnowledgeGraphs agent.
|
30
|
+
"""
|
31
|
+
|
32
|
+
llm_model: BaseChatModel
|
33
|
+
embedding_model: Embeddings
|
34
|
+
uploaded_files: list
|
35
|
+
topk_nodes: int
|
36
|
+
topk_edges: int
|
37
|
+
dic_source_graph: Annotated[list[dict], add_data]
|
38
|
+
dic_extracted_graph: Annotated[list[dict], add_data]
|
@@ -0,0 +1,110 @@
|
|
1
|
+
"""
|
2
|
+
Test cases for agents/t2kg_agent.py
|
3
|
+
"""
|
4
|
+
|
5
|
+
import pytest
|
6
|
+
from langchain_core.messages import HumanMessage
|
7
|
+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
8
|
+
from ..agents.t2kg_agent import get_app
|
9
|
+
|
10
|
+
# Define the data path
|
11
|
+
DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
|
12
|
+
|
13
|
+
|
14
|
+
@pytest.fixture(name="input_dict")
|
15
|
+
def input_dict_fixture():
|
16
|
+
"""
|
17
|
+
Input dictionary fixture.
|
18
|
+
"""
|
19
|
+
input_dict = {
|
20
|
+
"llm_model": None, # TBA for each test case
|
21
|
+
"embedding_model": None, # TBA for each test case
|
22
|
+
"uploaded_files": [
|
23
|
+
{
|
24
|
+
"file_name": "adalimumab.pdf",
|
25
|
+
"file_path": f"{DATA_PATH}/adalimumab.pdf",
|
26
|
+
"file_type": "drug_data",
|
27
|
+
"uploaded_by": "VPEUser",
|
28
|
+
"uploaded_timestamp": "2024-11-05 00:00:00",
|
29
|
+
},
|
30
|
+
{
|
31
|
+
"file_name": "DGE_human_Colon_UC-vs-Colon_Control.pdf",
|
32
|
+
"file_path": f"{DATA_PATH}/DGE_human_Colon_UC-vs-Colon_Control.pdf",
|
33
|
+
"file_type": "endotype",
|
34
|
+
"uploaded_by": "VPEUser",
|
35
|
+
"uploaded_timestamp": "2024-11-05 00:00:00",
|
36
|
+
},
|
37
|
+
],
|
38
|
+
"topk_nodes": 3,
|
39
|
+
"topk_edges": 3,
|
40
|
+
"dic_source_graph": [
|
41
|
+
{
|
42
|
+
"name": "PrimeKG",
|
43
|
+
"kg_pyg_path": f"{DATA_PATH}/primekg_ibd_pyg_graph.pkl",
|
44
|
+
"kg_text_path": f"{DATA_PATH}/primekg_ibd_text_graph.pkl",
|
45
|
+
}
|
46
|
+
],
|
47
|
+
"dic_extracted_graph": []
|
48
|
+
}
|
49
|
+
|
50
|
+
return input_dict
|
51
|
+
|
52
|
+
|
53
|
+
def test_t2kg_agent_openai(input_dict):
|
54
|
+
"""
|
55
|
+
Test the T2KG agent using OpenAI model.
|
56
|
+
|
57
|
+
Args:
|
58
|
+
input_dict: Input dictionary
|
59
|
+
"""
|
60
|
+
# Prepare LLM and embedding model
|
61
|
+
input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
|
62
|
+
input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
|
63
|
+
|
64
|
+
# Setup the app
|
65
|
+
unique_id = 12345
|
66
|
+
app = get_app(unique_id, llm_model=input_dict["llm_model"])
|
67
|
+
config = {"configurable": {"thread_id": unique_id}}
|
68
|
+
# Update state
|
69
|
+
app.update_state(
|
70
|
+
config,
|
71
|
+
input_dict,
|
72
|
+
)
|
73
|
+
prompt = """
|
74
|
+
Adalimumab is a fully human monoclonal antibody (IgG1)
|
75
|
+
that specifically binds to tumor necrosis factor-alpha (TNF-α), a pro-inflammatory cytokine.
|
76
|
+
|
77
|
+
I would like to get evidence from the knowledge graph about the mechanism of actions related to
|
78
|
+
Adalimumab in treating inflammatory bowel disease
|
79
|
+
(IBD). Please follow these steps:
|
80
|
+
- Extract a subgraph from the PrimeKG that contains information about Adalimumab.
|
81
|
+
- Summarize the extracted subgraph.
|
82
|
+
- Reason about the mechanism of action of Adalimumab in treating IBD.
|
83
|
+
|
84
|
+
Please set the extraction name for the extraction process as `subkg_12345`.
|
85
|
+
"""
|
86
|
+
|
87
|
+
# Test the tool get_modelinfo
|
88
|
+
response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
|
89
|
+
|
90
|
+
# Check assistant message
|
91
|
+
assistant_msg = response["messages"][-1].content
|
92
|
+
assert isinstance(assistant_msg, str)
|
93
|
+
|
94
|
+
# Check extracted subgraph dictionary
|
95
|
+
current_state = app.get_state(config)
|
96
|
+
dic_extracted_graph = current_state.values["dic_extracted_graph"][0]
|
97
|
+
assert isinstance(dic_extracted_graph, dict)
|
98
|
+
assert dic_extracted_graph["name"] == "subkg_12345"
|
99
|
+
assert dic_extracted_graph["graph_source"] == "PrimeKG"
|
100
|
+
assert dic_extracted_graph["topk_nodes"] == 3
|
101
|
+
assert dic_extracted_graph["topk_edges"] == 3
|
102
|
+
assert isinstance(dic_extracted_graph["graph_dict"], dict)
|
103
|
+
assert len(dic_extracted_graph["graph_dict"]["nodes"]) > 0
|
104
|
+
assert len(dic_extracted_graph["graph_dict"]["edges"]) > 0
|
105
|
+
assert isinstance(dic_extracted_graph["graph_text"], str)
|
106
|
+
# Check summarized subgraph
|
107
|
+
assert isinstance(dic_extracted_graph["graph_summary"], str)
|
108
|
+
# Check reasoning results
|
109
|
+
assert "Adalimumab" in assistant_msg
|
110
|
+
assert "TNF" in assistant_msg
|
@@ -0,0 +1,210 @@
|
|
1
|
+
"""
|
2
|
+
Test cases for tools/graphrag_reasoning.py
|
3
|
+
"""
|
4
|
+
|
5
|
+
import pytest
|
6
|
+
from langchain_core.messages import HumanMessage
|
7
|
+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
8
|
+
from ..agents.t2kg_agent import get_app
|
9
|
+
|
10
|
+
# Define the data path
|
11
|
+
DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
|
12
|
+
|
13
|
+
|
14
|
+
@pytest.fixture(name="input_dict")
|
15
|
+
def input_dict_fixture():
|
16
|
+
"""
|
17
|
+
Input dictionary fixture.
|
18
|
+
"""
|
19
|
+
input_dict = {
|
20
|
+
"llm_model": None, # TBA for each test case
|
21
|
+
"embedding_model": None, # TBA for each test case
|
22
|
+
"uploaded_files": [
|
23
|
+
{
|
24
|
+
"file_name": "adalimumab.pdf",
|
25
|
+
"file_path": f"{DATA_PATH}/adalimumab.pdf",
|
26
|
+
"file_type": "drug_data",
|
27
|
+
"uploaded_by": "VPEUser",
|
28
|
+
"uploaded_timestamp": "2024-11-05 00:00:00",
|
29
|
+
},
|
30
|
+
{
|
31
|
+
"file_name": "DGE_human_Colon_UC-vs-Colon_Control.pdf",
|
32
|
+
"file_path": f"{DATA_PATH}/DGE_human_Colon_UC-vs-Colon_Control.pdf",
|
33
|
+
"file_type": "endotype",
|
34
|
+
"uploaded_by": "VPEUser",
|
35
|
+
"uploaded_timestamp": "2024-11-05 00:00:00",
|
36
|
+
},
|
37
|
+
],
|
38
|
+
"topk_nodes": 3,
|
39
|
+
"topk_edges": 3,
|
40
|
+
"dic_source_graph": [
|
41
|
+
{
|
42
|
+
"name": "PrimeKG",
|
43
|
+
"kg_pyg_path": f"{DATA_PATH}/primekg_ibd_pyg_graph.pkl",
|
44
|
+
"kg_text_path": f"{DATA_PATH}/primekg_ibd_text_graph.pkl",
|
45
|
+
}
|
46
|
+
],
|
47
|
+
"dic_extracted_graph": [
|
48
|
+
{
|
49
|
+
"name": "subkg_12345",
|
50
|
+
"tool_call_id": "tool_12345",
|
51
|
+
"graph_source": "PrimeKG",
|
52
|
+
"topk_nodes": 3,
|
53
|
+
"topk_edges": 3,
|
54
|
+
"graph_dict": {
|
55
|
+
'nodes': [('IFNG_(3495)', {}),
|
56
|
+
('IKBKG_(3672)', {}),
|
57
|
+
('ATG16L1_(6661)', {}),
|
58
|
+
('inflammatory bowel disease_(28158)', {}),
|
59
|
+
('Crohn ileitis and jejunitis_(35814)', {}),
|
60
|
+
("Crohn's colitis_(83770)", {})],
|
61
|
+
'edges': [('IFNG_(3495)', 'inflammatory bowel disease_(28158)',
|
62
|
+
{'relation': ['gene/protein', 'associated with', 'disease'],
|
63
|
+
'label': ['gene/protein', 'associated with', 'disease']}),
|
64
|
+
('IFNG_(3495)', "Crohn's colitis_(83770)",
|
65
|
+
{'relation': ['gene/protein', 'associated with', 'disease'],
|
66
|
+
'label': ['gene/protein', 'associated with', 'disease']}),
|
67
|
+
('IFNG_(3495)', 'Crohn ileitis and jejunitis_(35814)',
|
68
|
+
{'relation': ['gene/protein', 'associated with', 'disease'],
|
69
|
+
'label': ['gene/protein', 'associated with', 'disease']}),
|
70
|
+
('ATG16L1_(6661)', 'IKBKG_(3672)',
|
71
|
+
{'relation': ['gene/protein', 'ppi', 'gene/protein'],
|
72
|
+
'label': ['gene/protein', 'ppi', 'gene/protein']}),
|
73
|
+
("Crohn's colitis_(83770)", 'ATG16L1_(6661)',
|
74
|
+
{'relation': ['disease', 'associated with', 'gene/protein'],
|
75
|
+
'label': ['disease', 'associated with', 'gene/protein']})]},
|
76
|
+
"graph_text": """
|
77
|
+
node_id,node_attr
|
78
|
+
IFNG_(3495),"IFNG belongs to gene/protein category.
|
79
|
+
This gene encodes a soluble cytokine that is a member of the type II interferon class.
|
80
|
+
The encoded protein is secreted by cells of both the innate and adaptive immune systems.
|
81
|
+
The active protein is a homodimer that binds to the interferon gamma receptor
|
82
|
+
which triggers a cellular response to viral and microbial infections.
|
83
|
+
Mutations in this gene are associated with an increased susceptibility to viral,
|
84
|
+
bacterial and parasitic infections and to several autoimmune diseases.
|
85
|
+
[provided by RefSeq, Dec 2015]."
|
86
|
+
IKBKG_(3672),"IKBKG belongs to gene/protein category. This gene encodes the regulatory
|
87
|
+
subunit of the inhibitor of kappaB kinase (IKK) complex, which activates NF-kappaB
|
88
|
+
resulting in activation of genes involved in inflammation, immunity, cell survival,
|
89
|
+
and other pathways. Mutations in this gene result in incontinentia pigmenti,
|
90
|
+
hypohidrotic ectodermal dysplasia, and several other types of immunodeficiencies.
|
91
|
+
A pseudogene highly similar to this locus is located in an adjacent region of the
|
92
|
+
X chromosome. [provided by RefSeq, Mar 2016]."
|
93
|
+
ATG16L1_(6661),"ATG16L1 belongs to gene/protein category. The protein encoded
|
94
|
+
by this gene is part of a large protein complex that is necessary for autophagy,
|
95
|
+
the major process by which intracellular components are targeted to lysosomes
|
96
|
+
for degradation. Defects in this gene are a cause of susceptibility to inflammatory
|
97
|
+
bowel disease type 10 (IBD10). Several transcript variants encoding different
|
98
|
+
isoforms have been found for this gene.[provided by RefSeq, Jun 2010]."
|
99
|
+
inflammatory bowel disease_(28158),inflammatory bowel disease belongs to disease
|
100
|
+
category. Any inflammatory bowel disease in which the cause of the disease
|
101
|
+
is a mutation in the NOD2 gene.
|
102
|
+
Crohn ileitis and jejunitis_(35814),Crohn ileitis and jejunitis belongs to
|
103
|
+
disease category. An Crohn disease involving a pathogenic inflammatory
|
104
|
+
response in the ileum.
|
105
|
+
Crohn's colitis_(83770),Crohn's colitis belongs to disease category.
|
106
|
+
Crohn's disease affecting the colon.
|
107
|
+
|
108
|
+
head_id,edge_type,tail_id
|
109
|
+
Crohn's colitis_(83770),"('disease', 'associated with', 'gene/protein')",
|
110
|
+
ATG16L1_(6661)
|
111
|
+
ATG16L1_(6661),"('gene/protein', 'ppi', 'gene/protein')",IKBKG_(3672)
|
112
|
+
IFNG_(3495),"('gene/protein', 'associated with', 'disease')",
|
113
|
+
inflammatory bowel disease_(28158)
|
114
|
+
IFNG_(3495),"('gene/protein', 'associated with', 'disease')",Crohn's colitis_(83770)
|
115
|
+
IFNG_(3495),"('gene/protein', 'associated with', 'disease')",
|
116
|
+
Crohn ileitis and jejunitis_(35814)
|
117
|
+
""",
|
118
|
+
"graph_summary": """
|
119
|
+
The subgraph extracted from `subkg_12345` includes several important genes and
|
120
|
+
their associations with inflammatory bowel diseases, particularly Crohn's disease.
|
121
|
+
|
122
|
+
Key Nodes:
|
123
|
+
1. **IFNG (Interferon gamma)**: This gene encodes a cytokine that plays a crucial
|
124
|
+
role in immune response. It is associated with several diseases, including
|
125
|
+
inflammatory bowel disease and specifically Crohn's colitis and Crohn ileitis and
|
126
|
+
jejunitis. Mutations in IFNG can lead to increased susceptibility to infections
|
127
|
+
and autoimmune diseases.
|
128
|
+
|
129
|
+
2. **IKBKG (Inhibitor of kappaB kinase gamma)**: This gene is involved in the
|
130
|
+
regulation of NF-kappaB, which is critical for inflammation and immune responses.
|
131
|
+
Mutations can lead to immunodeficiencies and other disorders.
|
132
|
+
|
133
|
+
3. **ATG16L1**: This gene is essential for autophagy, a process that helps in
|
134
|
+
degrading intracellular components. Defects in ATG16L1 are linked to inflammatory
|
135
|
+
bowel disease type 10 (IBD10) and are associated with Crohn's colitis.
|
136
|
+
|
137
|
+
4. **Inflammatory Bowel Disease**: A category of diseases characterized by
|
138
|
+
chronic inflammation of the gastrointestinal tract, with specific mention of
|
139
|
+
mutations in the NOD2 gene as a cause.
|
140
|
+
|
141
|
+
5. **Crohn's Colitis**: A specific type of Crohn's disease affecting the colon,
|
142
|
+
indicating a pathogenic inflammatory response.
|
143
|
+
|
144
|
+
6. **Crohn Ileitis and Jejunitis**: Another form of Crohn's disease that involves
|
145
|
+
inflammation in the ileum.
|
146
|
+
|
147
|
+
Key Edges:
|
148
|
+
- **IFNG is associated with inflammatory bowel disease, Crohn's colitis, and
|
149
|
+
Crohn ileitis and jejunitis**: This highlights the role of IFNG in these diseases.
|
150
|
+
- **ATG16L1 is associated with Crohn's colitis**: This indicates a direct link
|
151
|
+
between the gene and the disease.
|
152
|
+
- **ATG16L1 interacts with IKBKG**: This protein-protein interaction suggests a
|
153
|
+
functional relationship between these two genes in the context of immune response
|
154
|
+
and inflammation.
|
155
|
+
|
156
|
+
In summary, the subgraph illustrates the connections between key genes
|
157
|
+
(IFNG, IKBKG, ATG16L1) and their associations with inflammatory bowel diseases,
|
158
|
+
particularly Crohn's disease, emphasizing the genetic underpinnings of these conditions.
|
159
|
+
""",
|
160
|
+
}
|
161
|
+
],
|
162
|
+
}
|
163
|
+
|
164
|
+
return input_dict
|
165
|
+
|
166
|
+
|
167
|
+
def test_graphrag_reasoning_openai(input_dict):
|
168
|
+
"""
|
169
|
+
Test the GraphRAG reasoning tool using OpenAI model.
|
170
|
+
|
171
|
+
Args:
|
172
|
+
input_dict: Input dictionary
|
173
|
+
"""
|
174
|
+
# Prepare LLM and embedding model
|
175
|
+
input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
|
176
|
+
input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
|
177
|
+
|
178
|
+
# Setup the app
|
179
|
+
unique_id = 12345
|
180
|
+
app = get_app(unique_id, llm_model=input_dict["llm_model"])
|
181
|
+
config = {"configurable": {"thread_id": unique_id}}
|
182
|
+
# Update state
|
183
|
+
app.update_state(
|
184
|
+
config,
|
185
|
+
input_dict,
|
186
|
+
)
|
187
|
+
prompt = """
|
188
|
+
Without extracting a new subgraph, based on subgraph extracted from `subkg_12345`
|
189
|
+
perform Graph RAG reasoning to get insights related to nodes of genes
|
190
|
+
mentioned in the knowledge graph related to Adalimumab.
|
191
|
+
|
192
|
+
Here is an additional context:
|
193
|
+
Adalimumab is a fully human monoclonal antibody (IgG1)
|
194
|
+
that specifically binds to tumor necrosis factor-alpha (TNF-α), a pro-inflammatory cytokine.
|
195
|
+
"""
|
196
|
+
|
197
|
+
# Test the tool graphrag_reasoning
|
198
|
+
response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
|
199
|
+
|
200
|
+
# Check assistant message
|
201
|
+
assistant_msg = response["messages"][-1].content
|
202
|
+
assert isinstance(assistant_msg, str)
|
203
|
+
|
204
|
+
# Check tool message
|
205
|
+
tool_msg = response["messages"][-2]
|
206
|
+
assert tool_msg.name == "graphrag_reasoning"
|
207
|
+
|
208
|
+
# Check reasoning results
|
209
|
+
assert "Adalimumab" in assistant_msg
|
210
|
+
assert "TNF" in assistant_msg
|