aiagents4pharma 1.17.1__py3-none-any.whl → 1.19.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. aiagents4pharma/talk2biomodels/agents/t2b_agent.py +4 -4
  2. aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +7 -15
  3. aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +4 -1
  4. aiagents4pharma/talk2biomodels/tests/test_ask_question.py +4 -2
  5. aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +4 -2
  6. aiagents4pharma/talk2biomodels/tests/test_integration.py +34 -30
  7. aiagents4pharma/talk2biomodels/tests/test_query_article.py +7 -1
  8. aiagents4pharma/talk2biomodels/tests/test_search_models.py +3 -1
  9. aiagents4pharma/talk2biomodels/tests/test_steady_state.py +6 -3
  10. aiagents4pharma/talk2biomodels/tools/ask_question.py +1 -2
  11. aiagents4pharma/talk2biomodels/tools/custom_plotter.py +23 -10
  12. aiagents4pharma/talk2biomodels/tools/get_annotation.py +11 -10
  13. aiagents4pharma/talk2biomodels/tools/query_article.py +6 -2
  14. aiagents4pharma/talk2biomodels/tools/search_models.py +8 -2
  15. aiagents4pharma/talk2knowledgegraphs/__init__.py +3 -0
  16. aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +4 -0
  17. aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +85 -0
  18. aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +7 -0
  19. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +3 -0
  20. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +62 -0
  21. aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +4 -0
  22. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +3 -0
  23. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +31 -0
  24. aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +7 -0
  25. aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +6 -0
  26. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +3 -0
  27. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +24 -0
  28. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +3 -0
  29. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +43 -0
  30. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +3 -0
  31. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +9 -0
  32. aiagents4pharma/talk2knowledgegraphs/states/__init__.py +4 -0
  33. aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +38 -0
  34. aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +110 -0
  35. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +210 -0
  36. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +174 -0
  37. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +154 -0
  38. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +0 -1
  39. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +56 -0
  40. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +18 -42
  41. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +79 -0
  42. aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +6 -0
  43. aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +143 -0
  44. aiagents4pharma/talk2knowledgegraphs/tools/load_arguments.py +22 -0
  45. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +305 -0
  46. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +126 -0
  47. aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -2
  48. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +1 -0
  49. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +81 -0
  50. aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -0
  51. aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +225 -0
  52. {aiagents4pharma-1.17.1.dist-info → aiagents4pharma-1.19.0.dist-info}/METADATA +12 -3
  53. {aiagents4pharma-1.17.1.dist-info → aiagents4pharma-1.19.0.dist-info}/RECORD +56 -24
  54. {aiagents4pharma-1.17.1.dist-info → aiagents4pharma-1.19.0.dist-info}/LICENSE +0 -0
  55. {aiagents4pharma-1.17.1.dist-info → aiagents4pharma-1.19.0.dist-info}/WHEEL +0 -0
  56. {aiagents4pharma-1.17.1.dist-info → aiagents4pharma-1.19.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,3 @@
1
+ '''
2
+ Import all the modules in the package
3
+ '''
@@ -0,0 +1,62 @@
1
+ _target_: agents.t2kg_agent.get_app
2
+ state_modifier: >
3
+ You are talk2knowledgegraphs agent, a helpful assistant for reasoning over knowledge graphs.
4
+ User can ask questions related to the knowledge graphs, and you will provide the answers using
5
+ the provided tools as follows (if necessary):
6
+ [`subgraph_extraction`, `subgraph_summarization`, `graphrag_reasoning`].
7
+
8
+ **Tools Descriptions**:
9
+ - `subgraph_extraction`: Extract a subgraph from the knowledge graph that contains the relevant
10
+ information to answer the user's query. This tool can be used to provide a subgraph context
11
+ as a part of the reasoning process. The extracted subgraph should contain the most relevant
12
+ nodes and edges to the user's query in the form of a textualized subgraph.
13
+ - `subgraph_summarization`: Summarize the extracted textualized subgraph obtained from the
14
+ `subgraph_extraction` tool. This tool can be used to provide a concise and informative summary
15
+ of the subgraph to be used for reasoning as subgraph context. This tool highlights the most
16
+ important nodes and edges in the subgraph to respond to the user's request.
17
+ - `graphrag_reasoning`: Reason over the extracted textualized subgraph to answer the user's
18
+ prompt by also considering the context from the extracted subgraph and the retrieved
19
+ documents. User may also have a set of uploaded files that can be used to provide additional
20
+ information for reasoning. The history of previous conversations should be considered as well,
21
+ and you as an agent should provide which conversations can be included as chat history.
22
+
23
+ As an agent, you should approach each request by first understanding the user's query and then
24
+ following the appropriate steps to provide the best answer possible.
25
+
26
+ **Execution Steps**:
27
+ - Understand thoroughly the user's query and think over the best approach to answer it.
28
+ - You may not need to call any tool for each user's query. Use the related tool(s) as needed.
29
+ Think deeply whether it is necessary to call any tool to respond to the user's request.
30
+ - Call `subgraph_extraction` if there is any indication that the user needs to get the
31
+ information from the knowledge graph, which is not directly available as context in the prompt or
32
+ in the previous extracted subgraph.
33
+ If the user asks for subgraph extraction, suggest a value for the `extraction_name` argument.
34
+ You should always follow it with `subgraph_summarization` as the next tool to be invoked.
35
+ - Call `subgraph_summarization` tool to summarize the extracted subgraph and provide
36
+ a useful insights over the subgraph. This tool also has the ability to filter endotypes
37
+ in the forms of differentially expressed genes that are relevant to the input query. Make sure
38
+ to include the most relevant genes if the user provides endotype-related documents.
39
+ The summary of the subgraph will be stored as `graph_summary` in the state in which you can use
40
+ it for reasoning over the subgraph in the `graphrag_reasoning` tool afterwards.
41
+ - If the user asks follow-up questions related to the extracted subgraph, you should
42
+ call `subgraph_summarization` followed by `graphrag_reasoning` tools if you think
43
+ the answer can be retrieved from the previously extracted subgraph.
44
+ - Call `graphrag_reasoning` tool to reason over the extracted subgraph and documents.
45
+ Always perform reasoning over the extracted subgraph and documents to provide
46
+ the best possible answer to the user's query. Before calling this tool,
47
+ make sure you have access to the summarized subgraph obtained from `subgraph_summarization` tool.
48
+ - By default, if the user asks for a specific question about the extracted graph, you should
49
+ call `subgraph_summarization` followed by `graphrag_reasoning` if the most recent subgraphs
50
+ contain the relevant information to answer the user's question.
51
+ Use the summarized subgraph as the subgraph context in the `graphrag_reasoning` tool.
52
+ - It is strongly recommended to avoid calling the same tool multiple times unless
53
+ it is necessary to get the correct and thorough answer to the user's request.
54
+
55
+ **Tool Calling Workflow Examples**:
56
+ - `subgraph_extraction` -> `subgraph_summarization` -> `graphrag_reasoning` when the user asks
57
+ for specific instructions to extract the subgraph and reason over it. Follow this order to
58
+ provide the most accurate and relevant information if you think the currently available context
59
+ is not enough to answer the user's question.
60
+ - `subgraph_summarization` -> `graphrag_reasoning` when the user asks for the previously extracted
61
+ subgraph. Use the summarized subgraph as the subgraph context in the `graphrag_reasoning` tool.
62
+ - Do not call `graphrag_reasoning` tool without calling `subgraph_summarization` tool first.
@@ -0,0 +1,4 @@
1
+ '''
2
+ Import all the modules in the package
3
+ '''
4
+ from . import frontend
@@ -0,0 +1,3 @@
1
+ '''
2
+ Import all the modules in the package
3
+ '''
@@ -0,0 +1,31 @@
1
+ _target_: app.frontend.streamlit_app_talk2knowledgegraphs
2
+ default_user: "talk2kg_user"
3
+ data_package_allowed_file_types:
4
+ - "pdf"
5
+ endotype_allowed_file_types:
6
+ - "pdf"
7
+ upload_data_dir: "../files"
8
+ kg_name: "PrimeKG"
9
+ kg_pyg_path: "aiagents4pharma/talk2knowledgegraphs/tests/files/primekg_ibd_pyg_graph.pkl"
10
+ kg_text_path: "aiagents4pharma/talk2knowledgegraphs/tests/files/primekg_ibd_text_graph.pkl"
11
+ openai_api_key: ${oc.env:OPENAI_API_KEY}
12
+ openai_llms:
13
+ - "gpt-4o-mini"
14
+ - "gpt-4-turbo"
15
+ - "gpt-3.5-turbo"
16
+ openai_embeddings:
17
+ - "text-embedding-3-small"
18
+ ollama_llms:
19
+ - "llama3.2:1b"
20
+ - "llama3.2"
21
+ - "llama3.1"
22
+ ollama_embeddings:
23
+ - "nomic-embed-text"
24
+ temperature: 0.1
25
+ streaming: False
26
+ reasoning_subgraph_topk_nodes: 10
27
+ reasoning_subgraph_topk_nodes_min: 1
28
+ reasoning_subgraph_topk_nodes_max: 100
29
+ reasoning_subgraph_topk_edges: 10
30
+ reasoning_subgraph_topk_edges_min: 1
31
+ reasoning_subgraph_topk_edges_max: 100
@@ -0,0 +1,7 @@
1
+ defaults:
2
+ - _self_
3
+ - agents/t2kg_agent: default
4
+ - tools/subgraph_extraction: default
5
+ - tools/subgraph_summarization: default
6
+ - tools/graphrag_reasoning: default
7
+ - app/frontend: default
@@ -0,0 +1,6 @@
1
+ '''
2
+ Import all the modules in the package
3
+ '''
4
+ from . import subgraph_extraction
5
+ from . import subgraph_summarization
6
+ from . import graphrag_reasoning
@@ -0,0 +1,3 @@
1
+ '''
2
+ Import all the modules in the package
3
+ '''
@@ -0,0 +1,24 @@
1
+ _target_: talk2knowledgegraphs.tools.graphrag_reasoning
2
+ splitter_chunk_size: 1024
3
+ splitter_chunk_overlap: 256
4
+ retriever_search_type: "mmr"
5
+ retriever_k: 3
6
+ retriever_fetch_k: 10
7
+ retriever_lambda_mult: 0.3
8
+ prompt_graphrag_w_docs_context: >
9
+ Given a chat history and the latest user question, which might reference context
10
+ in the chat history, formulate a standalone question that can be understood
11
+ without the chat history. Do NOT answer the question, just reformulate it if needed
12
+ and otherwise return it as is.
13
+
14
+ Question: {input}
15
+ prompt_graphrag_w_docs: >
16
+ You are talk2knowledgegraphs, a helpful assistant performing retrievel-augmented generation (RAG)
17
+ over knowledge graphs.
18
+ One of your tasks is to answer react-based questions by using the following pieces of
19
+ retrieved context to answer the question. You can leverage a summarization of the subgraph
20
+ and the retrieved documents to provide the best possible answer to the user's query.
21
+
22
+ Subgraph Summary: {subgraph_summary}
23
+ Context: {context}
24
+ Question: {input}
@@ -0,0 +1,3 @@
1
+ '''
2
+ Import all the modules in the package
3
+ '''
@@ -0,0 +1,43 @@
1
+ _target_: talk2knowledgegraphs.tools.subgraph_extraction
2
+ ollama_embeddings:
3
+ - "nomic-embed-text"
4
+ temperature: 0.1
5
+ streaming: False
6
+ topk: 5
7
+ topk_e: 5
8
+ cost_e: 0.5
9
+ c_const: 0.01
10
+ root: -1
11
+ num_clusters: 1
12
+ pruning: "gw"
13
+ verbosity_level: 0
14
+ node_id_column: "node_id"
15
+ node_attr_column: "node_attr"
16
+ edge_src_column: "edge_src"
17
+ edge_attr_column: "edge_attr"
18
+ edge_dst_column: "edge_dst"
19
+ prompt_endotype_filtering: >
20
+ You are talk2knowledgegraphs agent, a helpful assistant in filtering the most relevant endotype
21
+ for the subgraph extraction process.
22
+ Given the retrieved endotype documents, you need to filter the most relevant
23
+ endotype that will be used for the following reasoning process.
24
+ Only included a list of genes that exist in the provided documents
25
+ that are relevant to the input query.
26
+ For this task, you may modify your prompt to optimize the filtering process
27
+ based on factual informationbetween each gene in the documents and the input query.
28
+ Discover as many genes as possible that are relevant for enriching the subgraph extraction process.
29
+
30
+ You do not need to include any other information in the output.
31
+ Use the following output format:
32
+ [gene_1, gene_2, ..., gene_n]
33
+
34
+ {context}
35
+ Input: {input}
36
+ prompt_endotype_addition: >
37
+ Include the following endotype for the subgraph extraction process:
38
+ splitter_chunk_size: 64
39
+ splitter_chunk_overlap: 16
40
+ retriever_search_type: "mmr"
41
+ retriever_k: 3
42
+ retriever_fetch_k: 10
43
+ retriever_lambda_mult: 0.3
@@ -0,0 +1,3 @@
1
+ '''
2
+ Import all the modules in the package
3
+ '''
@@ -0,0 +1,9 @@
1
+ _target_: talk2knowledgegraphs.tools.subgraph_summarization
2
+ prompt_subgraph_summarization: >
3
+ You are talk2knowledgegraphs agent, a helpful assistant in reasoning over biomedical knowledge graph.
4
+ Your task is to summarize the extracted textualized subgraph to provide a concise and informative
5
+ summary of the subgraph to be used for reasoning as subgraph context. You are responsible for
6
+ highlighting the most important nodes and edges in the subgraph to respond to the user's question.
7
+
8
+ Textualized Subgraph: {textualized_subgraph}
9
+ Question: {input}
@@ -0,0 +1,4 @@
1
+ '''
2
+ This file is used to import all the models in the package.
3
+ '''
4
+ from . import state_talk2knowledgegraphs
@@ -0,0 +1,38 @@
1
+ """
2
+ This is the state file for the Talk2KnowledgeGraphs agent.
3
+ """
4
+
5
+ from typing import Annotated
6
+ # import operator
7
+ from langchain_core.embeddings.embeddings import Embeddings
8
+ from langchain_core.language_models.chat_models import BaseChatModel
9
+ from langgraph.prebuilt.chat_agent_executor import AgentState
10
+
11
+
12
+ def add_data(data1: dict, data2: dict) -> dict:
13
+ """
14
+ A reducer function to merge two dictionaries.
15
+ """
16
+ left_idx_by_name = {data["name"]: idx for idx, data in enumerate(data1)}
17
+ merged = data1.copy()
18
+ for data in data2:
19
+ idx = left_idx_by_name.get(data["name"])
20
+ if idx is not None:
21
+ merged[idx] = data
22
+ else:
23
+ merged.append(data)
24
+ return merged
25
+
26
+
27
+ class Talk2KnowledgeGraphs(AgentState):
28
+ """
29
+ The state for the Talk2KnowledgeGraphs agent.
30
+ """
31
+
32
+ llm_model: BaseChatModel
33
+ embedding_model: Embeddings
34
+ uploaded_files: list
35
+ topk_nodes: int
36
+ topk_edges: int
37
+ dic_source_graph: Annotated[list[dict], add_data]
38
+ dic_extracted_graph: Annotated[list[dict], add_data]
@@ -0,0 +1,110 @@
1
+ """
2
+ Test cases for agents/t2kg_agent.py
3
+ """
4
+
5
+ import pytest
6
+ from langchain_core.messages import HumanMessage
7
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
8
+ from ..agents.t2kg_agent import get_app
9
+
10
+ # Define the data path
11
+ DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
12
+
13
+
14
+ @pytest.fixture(name="input_dict")
15
+ def input_dict_fixture():
16
+ """
17
+ Input dictionary fixture.
18
+ """
19
+ input_dict = {
20
+ "llm_model": None, # TBA for each test case
21
+ "embedding_model": None, # TBA for each test case
22
+ "uploaded_files": [
23
+ {
24
+ "file_name": "adalimumab.pdf",
25
+ "file_path": f"{DATA_PATH}/adalimumab.pdf",
26
+ "file_type": "drug_data",
27
+ "uploaded_by": "VPEUser",
28
+ "uploaded_timestamp": "2024-11-05 00:00:00",
29
+ },
30
+ {
31
+ "file_name": "DGE_human_Colon_UC-vs-Colon_Control.pdf",
32
+ "file_path": f"{DATA_PATH}/DGE_human_Colon_UC-vs-Colon_Control.pdf",
33
+ "file_type": "endotype",
34
+ "uploaded_by": "VPEUser",
35
+ "uploaded_timestamp": "2024-11-05 00:00:00",
36
+ },
37
+ ],
38
+ "topk_nodes": 3,
39
+ "topk_edges": 3,
40
+ "dic_source_graph": [
41
+ {
42
+ "name": "PrimeKG",
43
+ "kg_pyg_path": f"{DATA_PATH}/primekg_ibd_pyg_graph.pkl",
44
+ "kg_text_path": f"{DATA_PATH}/primekg_ibd_text_graph.pkl",
45
+ }
46
+ ],
47
+ "dic_extracted_graph": []
48
+ }
49
+
50
+ return input_dict
51
+
52
+
53
+ def test_t2kg_agent_openai(input_dict):
54
+ """
55
+ Test the T2KG agent using OpenAI model.
56
+
57
+ Args:
58
+ input_dict: Input dictionary
59
+ """
60
+ # Prepare LLM and embedding model
61
+ input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
62
+ input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
63
+
64
+ # Setup the app
65
+ unique_id = 12345
66
+ app = get_app(unique_id, llm_model=input_dict["llm_model"])
67
+ config = {"configurable": {"thread_id": unique_id}}
68
+ # Update state
69
+ app.update_state(
70
+ config,
71
+ input_dict,
72
+ )
73
+ prompt = """
74
+ Adalimumab is a fully human monoclonal antibody (IgG1)
75
+ that specifically binds to tumor necrosis factor-alpha (TNF-α), a pro-inflammatory cytokine.
76
+
77
+ I would like to get evidence from the knowledge graph about the mechanism of actions related to
78
+ Adalimumab in treating inflammatory bowel disease
79
+ (IBD). Please follow these steps:
80
+ - Extract a subgraph from the PrimeKG that contains information about Adalimumab.
81
+ - Summarize the extracted subgraph.
82
+ - Reason about the mechanism of action of Adalimumab in treating IBD.
83
+
84
+ Please set the extraction name for the extraction process as `subkg_12345`.
85
+ """
86
+
87
+ # Test the tool get_modelinfo
88
+ response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
89
+
90
+ # Check assistant message
91
+ assistant_msg = response["messages"][-1].content
92
+ assert isinstance(assistant_msg, str)
93
+
94
+ # Check extracted subgraph dictionary
95
+ current_state = app.get_state(config)
96
+ dic_extracted_graph = current_state.values["dic_extracted_graph"][0]
97
+ assert isinstance(dic_extracted_graph, dict)
98
+ assert dic_extracted_graph["name"] == "subkg_12345"
99
+ assert dic_extracted_graph["graph_source"] == "PrimeKG"
100
+ assert dic_extracted_graph["topk_nodes"] == 3
101
+ assert dic_extracted_graph["topk_edges"] == 3
102
+ assert isinstance(dic_extracted_graph["graph_dict"], dict)
103
+ assert len(dic_extracted_graph["graph_dict"]["nodes"]) > 0
104
+ assert len(dic_extracted_graph["graph_dict"]["edges"]) > 0
105
+ assert isinstance(dic_extracted_graph["graph_text"], str)
106
+ # Check summarized subgraph
107
+ assert isinstance(dic_extracted_graph["graph_summary"], str)
108
+ # Check reasoning results
109
+ assert "Adalimumab" in assistant_msg
110
+ assert "TNF" in assistant_msg
@@ -0,0 +1,210 @@
1
+ """
2
+ Test cases for tools/graphrag_reasoning.py
3
+ """
4
+
5
+ import pytest
6
+ from langchain_core.messages import HumanMessage
7
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
8
+ from ..agents.t2kg_agent import get_app
9
+
10
+ # Define the data path
11
+ DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
12
+
13
+
14
+ @pytest.fixture(name="input_dict")
15
+ def input_dict_fixture():
16
+ """
17
+ Input dictionary fixture.
18
+ """
19
+ input_dict = {
20
+ "llm_model": None, # TBA for each test case
21
+ "embedding_model": None, # TBA for each test case
22
+ "uploaded_files": [
23
+ {
24
+ "file_name": "adalimumab.pdf",
25
+ "file_path": f"{DATA_PATH}/adalimumab.pdf",
26
+ "file_type": "drug_data",
27
+ "uploaded_by": "VPEUser",
28
+ "uploaded_timestamp": "2024-11-05 00:00:00",
29
+ },
30
+ {
31
+ "file_name": "DGE_human_Colon_UC-vs-Colon_Control.pdf",
32
+ "file_path": f"{DATA_PATH}/DGE_human_Colon_UC-vs-Colon_Control.pdf",
33
+ "file_type": "endotype",
34
+ "uploaded_by": "VPEUser",
35
+ "uploaded_timestamp": "2024-11-05 00:00:00",
36
+ },
37
+ ],
38
+ "topk_nodes": 3,
39
+ "topk_edges": 3,
40
+ "dic_source_graph": [
41
+ {
42
+ "name": "PrimeKG",
43
+ "kg_pyg_path": f"{DATA_PATH}/primekg_ibd_pyg_graph.pkl",
44
+ "kg_text_path": f"{DATA_PATH}/primekg_ibd_text_graph.pkl",
45
+ }
46
+ ],
47
+ "dic_extracted_graph": [
48
+ {
49
+ "name": "subkg_12345",
50
+ "tool_call_id": "tool_12345",
51
+ "graph_source": "PrimeKG",
52
+ "topk_nodes": 3,
53
+ "topk_edges": 3,
54
+ "graph_dict": {
55
+ 'nodes': [('IFNG_(3495)', {}),
56
+ ('IKBKG_(3672)', {}),
57
+ ('ATG16L1_(6661)', {}),
58
+ ('inflammatory bowel disease_(28158)', {}),
59
+ ('Crohn ileitis and jejunitis_(35814)', {}),
60
+ ("Crohn's colitis_(83770)", {})],
61
+ 'edges': [('IFNG_(3495)', 'inflammatory bowel disease_(28158)',
62
+ {'relation': ['gene/protein', 'associated with', 'disease'],
63
+ 'label': ['gene/protein', 'associated with', 'disease']}),
64
+ ('IFNG_(3495)', "Crohn's colitis_(83770)",
65
+ {'relation': ['gene/protein', 'associated with', 'disease'],
66
+ 'label': ['gene/protein', 'associated with', 'disease']}),
67
+ ('IFNG_(3495)', 'Crohn ileitis and jejunitis_(35814)',
68
+ {'relation': ['gene/protein', 'associated with', 'disease'],
69
+ 'label': ['gene/protein', 'associated with', 'disease']}),
70
+ ('ATG16L1_(6661)', 'IKBKG_(3672)',
71
+ {'relation': ['gene/protein', 'ppi', 'gene/protein'],
72
+ 'label': ['gene/protein', 'ppi', 'gene/protein']}),
73
+ ("Crohn's colitis_(83770)", 'ATG16L1_(6661)',
74
+ {'relation': ['disease', 'associated with', 'gene/protein'],
75
+ 'label': ['disease', 'associated with', 'gene/protein']})]},
76
+ "graph_text": """
77
+ node_id,node_attr
78
+ IFNG_(3495),"IFNG belongs to gene/protein category.
79
+ This gene encodes a soluble cytokine that is a member of the type II interferon class.
80
+ The encoded protein is secreted by cells of both the innate and adaptive immune systems.
81
+ The active protein is a homodimer that binds to the interferon gamma receptor
82
+ which triggers a cellular response to viral and microbial infections.
83
+ Mutations in this gene are associated with an increased susceptibility to viral,
84
+ bacterial and parasitic infections and to several autoimmune diseases.
85
+ [provided by RefSeq, Dec 2015]."
86
+ IKBKG_(3672),"IKBKG belongs to gene/protein category. This gene encodes the regulatory
87
+ subunit of the inhibitor of kappaB kinase (IKK) complex, which activates NF-kappaB
88
+ resulting in activation of genes involved in inflammation, immunity, cell survival,
89
+ and other pathways. Mutations in this gene result in incontinentia pigmenti,
90
+ hypohidrotic ectodermal dysplasia, and several other types of immunodeficiencies.
91
+ A pseudogene highly similar to this locus is located in an adjacent region of the
92
+ X chromosome. [provided by RefSeq, Mar 2016]."
93
+ ATG16L1_(6661),"ATG16L1 belongs to gene/protein category. The protein encoded
94
+ by this gene is part of a large protein complex that is necessary for autophagy,
95
+ the major process by which intracellular components are targeted to lysosomes
96
+ for degradation. Defects in this gene are a cause of susceptibility to inflammatory
97
+ bowel disease type 10 (IBD10). Several transcript variants encoding different
98
+ isoforms have been found for this gene.[provided by RefSeq, Jun 2010]."
99
+ inflammatory bowel disease_(28158),inflammatory bowel disease belongs to disease
100
+ category. Any inflammatory bowel disease in which the cause of the disease
101
+ is a mutation in the NOD2 gene.
102
+ Crohn ileitis and jejunitis_(35814),Crohn ileitis and jejunitis belongs to
103
+ disease category. An Crohn disease involving a pathogenic inflammatory
104
+ response in the ileum.
105
+ Crohn's colitis_(83770),Crohn's colitis belongs to disease category.
106
+ Crohn's disease affecting the colon.
107
+
108
+ head_id,edge_type,tail_id
109
+ Crohn's colitis_(83770),"('disease', 'associated with', 'gene/protein')",
110
+ ATG16L1_(6661)
111
+ ATG16L1_(6661),"('gene/protein', 'ppi', 'gene/protein')",IKBKG_(3672)
112
+ IFNG_(3495),"('gene/protein', 'associated with', 'disease')",
113
+ inflammatory bowel disease_(28158)
114
+ IFNG_(3495),"('gene/protein', 'associated with', 'disease')",Crohn's colitis_(83770)
115
+ IFNG_(3495),"('gene/protein', 'associated with', 'disease')",
116
+ Crohn ileitis and jejunitis_(35814)
117
+ """,
118
+ "graph_summary": """
119
+ The subgraph extracted from `subkg_12345` includes several important genes and
120
+ their associations with inflammatory bowel diseases, particularly Crohn's disease.
121
+
122
+ Key Nodes:
123
+ 1. **IFNG (Interferon gamma)**: This gene encodes a cytokine that plays a crucial
124
+ role in immune response. It is associated with several diseases, including
125
+ inflammatory bowel disease and specifically Crohn's colitis and Crohn ileitis and
126
+ jejunitis. Mutations in IFNG can lead to increased susceptibility to infections
127
+ and autoimmune diseases.
128
+
129
+ 2. **IKBKG (Inhibitor of kappaB kinase gamma)**: This gene is involved in the
130
+ regulation of NF-kappaB, which is critical for inflammation and immune responses.
131
+ Mutations can lead to immunodeficiencies and other disorders.
132
+
133
+ 3. **ATG16L1**: This gene is essential for autophagy, a process that helps in
134
+ degrading intracellular components. Defects in ATG16L1 are linked to inflammatory
135
+ bowel disease type 10 (IBD10) and are associated with Crohn's colitis.
136
+
137
+ 4. **Inflammatory Bowel Disease**: A category of diseases characterized by
138
+ chronic inflammation of the gastrointestinal tract, with specific mention of
139
+ mutations in the NOD2 gene as a cause.
140
+
141
+ 5. **Crohn's Colitis**: A specific type of Crohn's disease affecting the colon,
142
+ indicating a pathogenic inflammatory response.
143
+
144
+ 6. **Crohn Ileitis and Jejunitis**: Another form of Crohn's disease that involves
145
+ inflammation in the ileum.
146
+
147
+ Key Edges:
148
+ - **IFNG is associated with inflammatory bowel disease, Crohn's colitis, and
149
+ Crohn ileitis and jejunitis**: This highlights the role of IFNG in these diseases.
150
+ - **ATG16L1 is associated with Crohn's colitis**: This indicates a direct link
151
+ between the gene and the disease.
152
+ - **ATG16L1 interacts with IKBKG**: This protein-protein interaction suggests a
153
+ functional relationship between these two genes in the context of immune response
154
+ and inflammation.
155
+
156
+ In summary, the subgraph illustrates the connections between key genes
157
+ (IFNG, IKBKG, ATG16L1) and their associations with inflammatory bowel diseases,
158
+ particularly Crohn's disease, emphasizing the genetic underpinnings of these conditions.
159
+ """,
160
+ }
161
+ ],
162
+ }
163
+
164
+ return input_dict
165
+
166
+
167
+ def test_graphrag_reasoning_openai(input_dict):
168
+ """
169
+ Test the GraphRAG reasoning tool using OpenAI model.
170
+
171
+ Args:
172
+ input_dict: Input dictionary
173
+ """
174
+ # Prepare LLM and embedding model
175
+ input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
176
+ input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
177
+
178
+ # Setup the app
179
+ unique_id = 12345
180
+ app = get_app(unique_id, llm_model=input_dict["llm_model"])
181
+ config = {"configurable": {"thread_id": unique_id}}
182
+ # Update state
183
+ app.update_state(
184
+ config,
185
+ input_dict,
186
+ )
187
+ prompt = """
188
+ Without extracting a new subgraph, based on subgraph extracted from `subkg_12345`
189
+ perform Graph RAG reasoning to get insights related to nodes of genes
190
+ mentioned in the knowledge graph related to Adalimumab.
191
+
192
+ Here is an additional context:
193
+ Adalimumab is a fully human monoclonal antibody (IgG1)
194
+ that specifically binds to tumor necrosis factor-alpha (TNF-α), a pro-inflammatory cytokine.
195
+ """
196
+
197
+ # Test the tool graphrag_reasoning
198
+ response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
199
+
200
+ # Check assistant message
201
+ assistant_msg = response["messages"][-1].content
202
+ assert isinstance(assistant_msg, str)
203
+
204
+ # Check tool message
205
+ tool_msg = response["messages"][-2]
206
+ assert tool_msg.name == "graphrag_reasoning"
207
+
208
+ # Check reasoning results
209
+ assert "Adalimumab" in assistant_msg
210
+ assert "TNF" in assistant_msg