aiagents4pharma 1.17.1__py3-none-any.whl → 1.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. aiagents4pharma/talk2biomodels/agents/t2b_agent.py +4 -4
  2. aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +7 -15
  3. aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +4 -1
  4. aiagents4pharma/talk2biomodels/tests/test_ask_question.py +4 -2
  5. aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +4 -2
  6. aiagents4pharma/talk2biomodels/tests/test_integration.py +34 -30
  7. aiagents4pharma/talk2biomodels/tests/test_query_article.py +7 -1
  8. aiagents4pharma/talk2biomodels/tests/test_search_models.py +3 -1
  9. aiagents4pharma/talk2biomodels/tests/test_steady_state.py +6 -3
  10. aiagents4pharma/talk2biomodels/tools/ask_question.py +1 -2
  11. aiagents4pharma/talk2biomodels/tools/custom_plotter.py +23 -10
  12. aiagents4pharma/talk2biomodels/tools/get_annotation.py +11 -10
  13. aiagents4pharma/talk2biomodels/tools/query_article.py +6 -2
  14. aiagents4pharma/talk2biomodels/tools/search_models.py +8 -2
  15. aiagents4pharma/talk2knowledgegraphs/__init__.py +3 -0
  16. aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +4 -0
  17. aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +85 -0
  18. aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +7 -0
  19. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +3 -0
  20. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +62 -0
  21. aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +4 -0
  22. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +3 -0
  23. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +31 -0
  24. aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +7 -0
  25. aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +6 -0
  26. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +3 -0
  27. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +24 -0
  28. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +3 -0
  29. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +43 -0
  30. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +3 -0
  31. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +9 -0
  32. aiagents4pharma/talk2knowledgegraphs/states/__init__.py +4 -0
  33. aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +38 -0
  34. aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +110 -0
  35. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +210 -0
  36. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +174 -0
  37. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +154 -0
  38. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +0 -1
  39. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +56 -0
  40. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +18 -42
  41. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +79 -0
  42. aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +6 -0
  43. aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +143 -0
  44. aiagents4pharma/talk2knowledgegraphs/tools/load_arguments.py +22 -0
  45. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +305 -0
  46. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +126 -0
  47. aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -2
  48. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +1 -0
  49. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +81 -0
  50. aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -0
  51. aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +225 -0
  52. {aiagents4pharma-1.17.1.dist-info → aiagents4pharma-1.19.0.dist-info}/METADATA +12 -3
  53. {aiagents4pharma-1.17.1.dist-info → aiagents4pharma-1.19.0.dist-info}/RECORD +56 -24
  54. {aiagents4pharma-1.17.1.dist-info → aiagents4pharma-1.19.0.dist-info}/LICENSE +0 -0
  55. {aiagents4pharma-1.17.1.dist-info → aiagents4pharma-1.19.0.dist-info}/WHEEL +0 -0
  56. {aiagents4pharma-1.17.1.dist-info → aiagents4pharma-1.19.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,3 @@
1
+ '''
2
+ Import all the modules in the package
3
+ '''
@@ -0,0 +1,62 @@
1
+ _target_: agents.t2kg_agent.get_app
2
+ state_modifier: >
3
+ You are talk2knowledgegraphs agent, a helpful assistant for reasoning over knowledge graphs.
4
+ User can ask questions related to the knowledge graphs, and you will provide the answers using
5
+ the provided tools as follows (if necessary):
6
+ [`subgraph_extraction`, `subgraph_summarization`, `graphrag_reasoning`].
7
+
8
+ **Tools Descriptions**:
9
+ - `subgraph_extraction`: Extract a subgraph from the knowledge graph that contains the relevant
10
+ information to answer the user's query. This tool can be used to provide a subgraph context
11
+ as a part of the reasoning process. The extracted subgraph should contain the most relevant
12
+ nodes and edges to the user's query in the form of a textualized subgraph.
13
+ - `subgraph_summarization`: Summarize the extracted textualized subgraph obtained from the
14
+ `subgraph_extraction` tool. This tool can be used to provide a concise and informative summary
15
+ of the subgraph to be used for reasoning as subgraph context. This tool highlights the most
16
+ important nodes and edges in the subgraph to respond to the user's request.
17
+ - `graphrag_reasoning`: Reason over the extracted textualized subgraph to answer the user's
18
+ prompt by also considering the context from the extracted subgraph and the retrieved
19
+ documents. User may also have a set of uploaded files that can be used to provide additional
20
+ information for reasoning. The history of previous conversations should be considered as well,
21
+ and you as an agent should provide which conversations can be included as chat history.
22
+
23
+ As an agent, you should approach each request by first understanding the user's query and then
24
+ following the appropriate steps to provide the best answer possible.
25
+
26
+ **Execution Steps**:
27
+ - Understand thoroughly the user's query and think over the best approach to answer it.
28
+ - You may not need to call any tool for each user's query. Use the related tool(s) as needed.
29
+ Think deeply whether it is necessary to call any tool to respond to the user's request.
30
+ - Call `subgraph_extraction` if there is any indication that the user needs to get the
31
+ information from the knowledge graph, which is not directly available as context in the prompt or
32
+ in the previous extracted subgraph.
33
+ If the user asks for subgraph extraction, suggest a value for the `extraction_name` argument.
34
+ You should always follow it with `subgraph_summarization` as the next tool to be invoked.
35
+ - Call `subgraph_summarization` tool to summarize the extracted subgraph and provide
36
+ a useful insights over the subgraph. This tool also has the ability to filter endotypes
37
+ in the forms of differentially expressed genes that are relevant to the input query. Make sure
38
+ to include the most relevant genes if the user provides endotype-related documents.
39
+ The summary of the subgraph will be stored as `graph_summary` in the state in which you can use
40
+ it for reasoning over the subgraph in the `graphrag_reasoning` tool afterwards.
41
+ - If the user asks follow-up questions related to the extracted subgraph, you should
42
+ call `subgraph_summarization` followed by `graphrag_reasoning` tools if you think
43
+ the answer can be retrieved from the previously extracted subgraph.
44
+ - Call `graphrag_reasoning` tool to reason over the extracted subgraph and documents.
45
+ Always perform reasoning over the extracted subgraph and documents to provide
46
+ the best possible answer to the user's query. Before calling this tool,
47
+ make sure you have access to the summarized subgraph obtained from `subgraph_summarization` tool.
48
+ - By default, if the user asks for a specific question about the extracted graph, you should
49
+ call `subgraph_summarization` followed by `graphrag_reasoning` if the most recent subgraphs
50
+ contain the relevant information to answer the user's question.
51
+ Use the summarized subgraph as the subgraph context in the `graphrag_reasoning` tool.
52
+ - It is strongly recommended to avoid calling the same tool multiple times unless
53
+ it is necessary to get the correct and thorough answer to the user's request.
54
+
55
+ **Tool Calling Workflow Examples**:
56
+ - `subgraph_extraction` -> `subgraph_summarization` -> `graphrag_reasoning` when the user asks
57
+ for specific instructions to extract the subgraph and reason over it. Follow this order to
58
+ provide the most accurate and relevant information if you think the currently available context
59
+ is not enough to answer the user's question.
60
+ - `subgraph_summarization` -> `graphrag_reasoning` when the user asks for the previously extracted
61
+ subgraph. Use the summarized subgraph as the subgraph context in the `graphrag_reasoning` tool.
62
+ - Do not call `graphrag_reasoning` tool without calling `subgraph_summarization` tool first.
@@ -0,0 +1,4 @@
1
+ '''
2
+ Import all the modules in the package
3
+ '''
4
+ from . import frontend
@@ -0,0 +1,3 @@
1
+ '''
2
+ Import all the modules in the package
3
+ '''
@@ -0,0 +1,31 @@
1
+ _target_: app.frontend.streamlit_app_talk2knowledgegraphs
2
+ default_user: "talk2kg_user"
3
+ data_package_allowed_file_types:
4
+ - "pdf"
5
+ endotype_allowed_file_types:
6
+ - "pdf"
7
+ upload_data_dir: "../files"
8
+ kg_name: "PrimeKG"
9
+ kg_pyg_path: "aiagents4pharma/talk2knowledgegraphs/tests/files/primekg_ibd_pyg_graph.pkl"
10
+ kg_text_path: "aiagents4pharma/talk2knowledgegraphs/tests/files/primekg_ibd_text_graph.pkl"
11
+ openai_api_key: ${oc.env:OPENAI_API_KEY}
12
+ openai_llms:
13
+ - "gpt-4o-mini"
14
+ - "gpt-4-turbo"
15
+ - "gpt-3.5-turbo"
16
+ openai_embeddings:
17
+ - "text-embedding-3-small"
18
+ ollama_llms:
19
+ - "llama3.2:1b"
20
+ - "llama3.2"
21
+ - "llama3.1"
22
+ ollama_embeddings:
23
+ - "nomic-embed-text"
24
+ temperature: 0.1
25
+ streaming: False
26
+ reasoning_subgraph_topk_nodes: 10
27
+ reasoning_subgraph_topk_nodes_min: 1
28
+ reasoning_subgraph_topk_nodes_max: 100
29
+ reasoning_subgraph_topk_edges: 10
30
+ reasoning_subgraph_topk_edges_min: 1
31
+ reasoning_subgraph_topk_edges_max: 100
@@ -0,0 +1,7 @@
1
+ defaults:
2
+ - _self_
3
+ - agents/t2kg_agent: default
4
+ - tools/subgraph_extraction: default
5
+ - tools/subgraph_summarization: default
6
+ - tools/graphrag_reasoning: default
7
+ - app/frontend: default
@@ -0,0 +1,6 @@
1
+ '''
2
+ Import all the modules in the package
3
+ '''
4
+ from . import subgraph_extraction
5
+ from . import subgraph_summarization
6
+ from . import graphrag_reasoning
@@ -0,0 +1,3 @@
1
+ '''
2
+ Import all the modules in the package
3
+ '''
@@ -0,0 +1,24 @@
1
+ _target_: talk2knowledgegraphs.tools.graphrag_reasoning
2
+ splitter_chunk_size: 1024
3
+ splitter_chunk_overlap: 256
4
+ retriever_search_type: "mmr"
5
+ retriever_k: 3
6
+ retriever_fetch_k: 10
7
+ retriever_lambda_mult: 0.3
8
+ prompt_graphrag_w_docs_context: >
9
+ Given a chat history and the latest user question, which might reference context
10
+ in the chat history, formulate a standalone question that can be understood
11
+ without the chat history. Do NOT answer the question, just reformulate it if needed
12
+ and otherwise return it as is.
13
+
14
+ Question: {input}
15
+ prompt_graphrag_w_docs: >
16
+ You are talk2knowledgegraphs, a helpful assistant performing retrievel-augmented generation (RAG)
17
+ over knowledge graphs.
18
+ One of your tasks is to answer react-based questions by using the following pieces of
19
+ retrieved context to answer the question. You can leverage a summarization of the subgraph
20
+ and the retrieved documents to provide the best possible answer to the user's query.
21
+
22
+ Subgraph Summary: {subgraph_summary}
23
+ Context: {context}
24
+ Question: {input}
@@ -0,0 +1,3 @@
1
+ '''
2
+ Import all the modules in the package
3
+ '''
@@ -0,0 +1,43 @@
1
+ _target_: talk2knowledgegraphs.tools.subgraph_extraction
2
+ ollama_embeddings:
3
+ - "nomic-embed-text"
4
+ temperature: 0.1
5
+ streaming: False
6
+ topk: 5
7
+ topk_e: 5
8
+ cost_e: 0.5
9
+ c_const: 0.01
10
+ root: -1
11
+ num_clusters: 1
12
+ pruning: "gw"
13
+ verbosity_level: 0
14
+ node_id_column: "node_id"
15
+ node_attr_column: "node_attr"
16
+ edge_src_column: "edge_src"
17
+ edge_attr_column: "edge_attr"
18
+ edge_dst_column: "edge_dst"
19
+ prompt_endotype_filtering: >
20
+ You are talk2knowledgegraphs agent, a helpful assistant in filtering the most relevant endotype
21
+ for the subgraph extraction process.
22
+ Given the retrieved endotype documents, you need to filter the most relevant
23
+ endotype that will be used for the following reasoning process.
24
+ Only included a list of genes that exist in the provided documents
25
+ that are relevant to the input query.
26
+ For this task, you may modify your prompt to optimize the filtering process
27
+ based on factual informationbetween each gene in the documents and the input query.
28
+ Discover as many genes as possible that are relevant for enriching the subgraph extraction process.
29
+
30
+ You do not need to include any other information in the output.
31
+ Use the following output format:
32
+ [gene_1, gene_2, ..., gene_n]
33
+
34
+ {context}
35
+ Input: {input}
36
+ prompt_endotype_addition: >
37
+ Include the following endotype for the subgraph extraction process:
38
+ splitter_chunk_size: 64
39
+ splitter_chunk_overlap: 16
40
+ retriever_search_type: "mmr"
41
+ retriever_k: 3
42
+ retriever_fetch_k: 10
43
+ retriever_lambda_mult: 0.3
@@ -0,0 +1,3 @@
1
+ '''
2
+ Import all the modules in the package
3
+ '''
@@ -0,0 +1,9 @@
1
+ _target_: talk2knowledgegraphs.tools.subgraph_summarization
2
+ prompt_subgraph_summarization: >
3
+ You are talk2knowledgegraphs agent, a helpful assistant in reasoning over biomedical knowledge graph.
4
+ Your task is to summarize the extracted textualized subgraph to provide a concise and informative
5
+ summary of the subgraph to be used for reasoning as subgraph context. You are responsible for
6
+ highlighting the most important nodes and edges in the subgraph to respond to the user's question.
7
+
8
+ Textualized Subgraph: {textualized_subgraph}
9
+ Question: {input}
@@ -0,0 +1,4 @@
1
+ '''
2
+ This file is used to import all the models in the package.
3
+ '''
4
+ from . import state_talk2knowledgegraphs
@@ -0,0 +1,38 @@
1
+ """
2
+ This is the state file for the Talk2KnowledgeGraphs agent.
3
+ """
4
+
5
+ from typing import Annotated
6
+ # import operator
7
+ from langchain_core.embeddings.embeddings import Embeddings
8
+ from langchain_core.language_models.chat_models import BaseChatModel
9
+ from langgraph.prebuilt.chat_agent_executor import AgentState
10
+
11
+
12
+ def add_data(data1: dict, data2: dict) -> dict:
13
+ """
14
+ A reducer function to merge two dictionaries.
15
+ """
16
+ left_idx_by_name = {data["name"]: idx for idx, data in enumerate(data1)}
17
+ merged = data1.copy()
18
+ for data in data2:
19
+ idx = left_idx_by_name.get(data["name"])
20
+ if idx is not None:
21
+ merged[idx] = data
22
+ else:
23
+ merged.append(data)
24
+ return merged
25
+
26
+
27
+ class Talk2KnowledgeGraphs(AgentState):
28
+ """
29
+ The state for the Talk2KnowledgeGraphs agent.
30
+ """
31
+
32
+ llm_model: BaseChatModel
33
+ embedding_model: Embeddings
34
+ uploaded_files: list
35
+ topk_nodes: int
36
+ topk_edges: int
37
+ dic_source_graph: Annotated[list[dict], add_data]
38
+ dic_extracted_graph: Annotated[list[dict], add_data]
@@ -0,0 +1,110 @@
1
+ """
2
+ Test cases for agents/t2kg_agent.py
3
+ """
4
+
5
+ import pytest
6
+ from langchain_core.messages import HumanMessage
7
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
8
+ from ..agents.t2kg_agent import get_app
9
+
10
+ # Define the data path
11
+ DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
12
+
13
+
14
+ @pytest.fixture(name="input_dict")
15
+ def input_dict_fixture():
16
+ """
17
+ Input dictionary fixture.
18
+ """
19
+ input_dict = {
20
+ "llm_model": None, # TBA for each test case
21
+ "embedding_model": None, # TBA for each test case
22
+ "uploaded_files": [
23
+ {
24
+ "file_name": "adalimumab.pdf",
25
+ "file_path": f"{DATA_PATH}/adalimumab.pdf",
26
+ "file_type": "drug_data",
27
+ "uploaded_by": "VPEUser",
28
+ "uploaded_timestamp": "2024-11-05 00:00:00",
29
+ },
30
+ {
31
+ "file_name": "DGE_human_Colon_UC-vs-Colon_Control.pdf",
32
+ "file_path": f"{DATA_PATH}/DGE_human_Colon_UC-vs-Colon_Control.pdf",
33
+ "file_type": "endotype",
34
+ "uploaded_by": "VPEUser",
35
+ "uploaded_timestamp": "2024-11-05 00:00:00",
36
+ },
37
+ ],
38
+ "topk_nodes": 3,
39
+ "topk_edges": 3,
40
+ "dic_source_graph": [
41
+ {
42
+ "name": "PrimeKG",
43
+ "kg_pyg_path": f"{DATA_PATH}/primekg_ibd_pyg_graph.pkl",
44
+ "kg_text_path": f"{DATA_PATH}/primekg_ibd_text_graph.pkl",
45
+ }
46
+ ],
47
+ "dic_extracted_graph": []
48
+ }
49
+
50
+ return input_dict
51
+
52
+
53
+ def test_t2kg_agent_openai(input_dict):
54
+ """
55
+ Test the T2KG agent using OpenAI model.
56
+
57
+ Args:
58
+ input_dict: Input dictionary
59
+ """
60
+ # Prepare LLM and embedding model
61
+ input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
62
+ input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
63
+
64
+ # Setup the app
65
+ unique_id = 12345
66
+ app = get_app(unique_id, llm_model=input_dict["llm_model"])
67
+ config = {"configurable": {"thread_id": unique_id}}
68
+ # Update state
69
+ app.update_state(
70
+ config,
71
+ input_dict,
72
+ )
73
+ prompt = """
74
+ Adalimumab is a fully human monoclonal antibody (IgG1)
75
+ that specifically binds to tumor necrosis factor-alpha (TNF-α), a pro-inflammatory cytokine.
76
+
77
+ I would like to get evidence from the knowledge graph about the mechanism of actions related to
78
+ Adalimumab in treating inflammatory bowel disease
79
+ (IBD). Please follow these steps:
80
+ - Extract a subgraph from the PrimeKG that contains information about Adalimumab.
81
+ - Summarize the extracted subgraph.
82
+ - Reason about the mechanism of action of Adalimumab in treating IBD.
83
+
84
+ Please set the extraction name for the extraction process as `subkg_12345`.
85
+ """
86
+
87
+ # Test the tool get_modelinfo
88
+ response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
89
+
90
+ # Check assistant message
91
+ assistant_msg = response["messages"][-1].content
92
+ assert isinstance(assistant_msg, str)
93
+
94
+ # Check extracted subgraph dictionary
95
+ current_state = app.get_state(config)
96
+ dic_extracted_graph = current_state.values["dic_extracted_graph"][0]
97
+ assert isinstance(dic_extracted_graph, dict)
98
+ assert dic_extracted_graph["name"] == "subkg_12345"
99
+ assert dic_extracted_graph["graph_source"] == "PrimeKG"
100
+ assert dic_extracted_graph["topk_nodes"] == 3
101
+ assert dic_extracted_graph["topk_edges"] == 3
102
+ assert isinstance(dic_extracted_graph["graph_dict"], dict)
103
+ assert len(dic_extracted_graph["graph_dict"]["nodes"]) > 0
104
+ assert len(dic_extracted_graph["graph_dict"]["edges"]) > 0
105
+ assert isinstance(dic_extracted_graph["graph_text"], str)
106
+ # Check summarized subgraph
107
+ assert isinstance(dic_extracted_graph["graph_summary"], str)
108
+ # Check reasoning results
109
+ assert "Adalimumab" in assistant_msg
110
+ assert "TNF" in assistant_msg
@@ -0,0 +1,210 @@
1
+ """
2
+ Test cases for tools/graphrag_reasoning.py
3
+ """
4
+
5
+ import pytest
6
+ from langchain_core.messages import HumanMessage
7
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
8
+ from ..agents.t2kg_agent import get_app
9
+
10
+ # Define the data path
11
+ DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
12
+
13
+
14
+ @pytest.fixture(name="input_dict")
15
+ def input_dict_fixture():
16
+ """
17
+ Input dictionary fixture.
18
+ """
19
+ input_dict = {
20
+ "llm_model": None, # TBA for each test case
21
+ "embedding_model": None, # TBA for each test case
22
+ "uploaded_files": [
23
+ {
24
+ "file_name": "adalimumab.pdf",
25
+ "file_path": f"{DATA_PATH}/adalimumab.pdf",
26
+ "file_type": "drug_data",
27
+ "uploaded_by": "VPEUser",
28
+ "uploaded_timestamp": "2024-11-05 00:00:00",
29
+ },
30
+ {
31
+ "file_name": "DGE_human_Colon_UC-vs-Colon_Control.pdf",
32
+ "file_path": f"{DATA_PATH}/DGE_human_Colon_UC-vs-Colon_Control.pdf",
33
+ "file_type": "endotype",
34
+ "uploaded_by": "VPEUser",
35
+ "uploaded_timestamp": "2024-11-05 00:00:00",
36
+ },
37
+ ],
38
+ "topk_nodes": 3,
39
+ "topk_edges": 3,
40
+ "dic_source_graph": [
41
+ {
42
+ "name": "PrimeKG",
43
+ "kg_pyg_path": f"{DATA_PATH}/primekg_ibd_pyg_graph.pkl",
44
+ "kg_text_path": f"{DATA_PATH}/primekg_ibd_text_graph.pkl",
45
+ }
46
+ ],
47
+ "dic_extracted_graph": [
48
+ {
49
+ "name": "subkg_12345",
50
+ "tool_call_id": "tool_12345",
51
+ "graph_source": "PrimeKG",
52
+ "topk_nodes": 3,
53
+ "topk_edges": 3,
54
+ "graph_dict": {
55
+ 'nodes': [('IFNG_(3495)', {}),
56
+ ('IKBKG_(3672)', {}),
57
+ ('ATG16L1_(6661)', {}),
58
+ ('inflammatory bowel disease_(28158)', {}),
59
+ ('Crohn ileitis and jejunitis_(35814)', {}),
60
+ ("Crohn's colitis_(83770)", {})],
61
+ 'edges': [('IFNG_(3495)', 'inflammatory bowel disease_(28158)',
62
+ {'relation': ['gene/protein', 'associated with', 'disease'],
63
+ 'label': ['gene/protein', 'associated with', 'disease']}),
64
+ ('IFNG_(3495)', "Crohn's colitis_(83770)",
65
+ {'relation': ['gene/protein', 'associated with', 'disease'],
66
+ 'label': ['gene/protein', 'associated with', 'disease']}),
67
+ ('IFNG_(3495)', 'Crohn ileitis and jejunitis_(35814)',
68
+ {'relation': ['gene/protein', 'associated with', 'disease'],
69
+ 'label': ['gene/protein', 'associated with', 'disease']}),
70
+ ('ATG16L1_(6661)', 'IKBKG_(3672)',
71
+ {'relation': ['gene/protein', 'ppi', 'gene/protein'],
72
+ 'label': ['gene/protein', 'ppi', 'gene/protein']}),
73
+ ("Crohn's colitis_(83770)", 'ATG16L1_(6661)',
74
+ {'relation': ['disease', 'associated with', 'gene/protein'],
75
+ 'label': ['disease', 'associated with', 'gene/protein']})]},
76
+ "graph_text": """
77
+ node_id,node_attr
78
+ IFNG_(3495),"IFNG belongs to gene/protein category.
79
+ This gene encodes a soluble cytokine that is a member of the type II interferon class.
80
+ The encoded protein is secreted by cells of both the innate and adaptive immune systems.
81
+ The active protein is a homodimer that binds to the interferon gamma receptor
82
+ which triggers a cellular response to viral and microbial infections.
83
+ Mutations in this gene are associated with an increased susceptibility to viral,
84
+ bacterial and parasitic infections and to several autoimmune diseases.
85
+ [provided by RefSeq, Dec 2015]."
86
+ IKBKG_(3672),"IKBKG belongs to gene/protein category. This gene encodes the regulatory
87
+ subunit of the inhibitor of kappaB kinase (IKK) complex, which activates NF-kappaB
88
+ resulting in activation of genes involved in inflammation, immunity, cell survival,
89
+ and other pathways. Mutations in this gene result in incontinentia pigmenti,
90
+ hypohidrotic ectodermal dysplasia, and several other types of immunodeficiencies.
91
+ A pseudogene highly similar to this locus is located in an adjacent region of the
92
+ X chromosome. [provided by RefSeq, Mar 2016]."
93
+ ATG16L1_(6661),"ATG16L1 belongs to gene/protein category. The protein encoded
94
+ by this gene is part of a large protein complex that is necessary for autophagy,
95
+ the major process by which intracellular components are targeted to lysosomes
96
+ for degradation. Defects in this gene are a cause of susceptibility to inflammatory
97
+ bowel disease type 10 (IBD10). Several transcript variants encoding different
98
+ isoforms have been found for this gene.[provided by RefSeq, Jun 2010]."
99
+ inflammatory bowel disease_(28158),inflammatory bowel disease belongs to disease
100
+ category. Any inflammatory bowel disease in which the cause of the disease
101
+ is a mutation in the NOD2 gene.
102
+ Crohn ileitis and jejunitis_(35814),Crohn ileitis and jejunitis belongs to
103
+ disease category. An Crohn disease involving a pathogenic inflammatory
104
+ response in the ileum.
105
+ Crohn's colitis_(83770),Crohn's colitis belongs to disease category.
106
+ Crohn's disease affecting the colon.
107
+
108
+ head_id,edge_type,tail_id
109
+ Crohn's colitis_(83770),"('disease', 'associated with', 'gene/protein')",
110
+ ATG16L1_(6661)
111
+ ATG16L1_(6661),"('gene/protein', 'ppi', 'gene/protein')",IKBKG_(3672)
112
+ IFNG_(3495),"('gene/protein', 'associated with', 'disease')",
113
+ inflammatory bowel disease_(28158)
114
+ IFNG_(3495),"('gene/protein', 'associated with', 'disease')",Crohn's colitis_(83770)
115
+ IFNG_(3495),"('gene/protein', 'associated with', 'disease')",
116
+ Crohn ileitis and jejunitis_(35814)
117
+ """,
118
+ "graph_summary": """
119
+ The subgraph extracted from `subkg_12345` includes several important genes and
120
+ their associations with inflammatory bowel diseases, particularly Crohn's disease.
121
+
122
+ Key Nodes:
123
+ 1. **IFNG (Interferon gamma)**: This gene encodes a cytokine that plays a crucial
124
+ role in immune response. It is associated with several diseases, including
125
+ inflammatory bowel disease and specifically Crohn's colitis and Crohn ileitis and
126
+ jejunitis. Mutations in IFNG can lead to increased susceptibility to infections
127
+ and autoimmune diseases.
128
+
129
+ 2. **IKBKG (Inhibitor of kappaB kinase gamma)**: This gene is involved in the
130
+ regulation of NF-kappaB, which is critical for inflammation and immune responses.
131
+ Mutations can lead to immunodeficiencies and other disorders.
132
+
133
+ 3. **ATG16L1**: This gene is essential for autophagy, a process that helps in
134
+ degrading intracellular components. Defects in ATG16L1 are linked to inflammatory
135
+ bowel disease type 10 (IBD10) and are associated with Crohn's colitis.
136
+
137
+ 4. **Inflammatory Bowel Disease**: A category of diseases characterized by
138
+ chronic inflammation of the gastrointestinal tract, with specific mention of
139
+ mutations in the NOD2 gene as a cause.
140
+
141
+ 5. **Crohn's Colitis**: A specific type of Crohn's disease affecting the colon,
142
+ indicating a pathogenic inflammatory response.
143
+
144
+ 6. **Crohn Ileitis and Jejunitis**: Another form of Crohn's disease that involves
145
+ inflammation in the ileum.
146
+
147
+ Key Edges:
148
+ - **IFNG is associated with inflammatory bowel disease, Crohn's colitis, and
149
+ Crohn ileitis and jejunitis**: This highlights the role of IFNG in these diseases.
150
+ - **ATG16L1 is associated with Crohn's colitis**: This indicates a direct link
151
+ between the gene and the disease.
152
+ - **ATG16L1 interacts with IKBKG**: This protein-protein interaction suggests a
153
+ functional relationship between these two genes in the context of immune response
154
+ and inflammation.
155
+
156
+ In summary, the subgraph illustrates the connections between key genes
157
+ (IFNG, IKBKG, ATG16L1) and their associations with inflammatory bowel diseases,
158
+ particularly Crohn's disease, emphasizing the genetic underpinnings of these conditions.
159
+ """,
160
+ }
161
+ ],
162
+ }
163
+
164
+ return input_dict
165
+
166
+
167
+ def test_graphrag_reasoning_openai(input_dict):
168
+ """
169
+ Test the GraphRAG reasoning tool using OpenAI model.
170
+
171
+ Args:
172
+ input_dict: Input dictionary
173
+ """
174
+ # Prepare LLM and embedding model
175
+ input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
176
+ input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
177
+
178
+ # Setup the app
179
+ unique_id = 12345
180
+ app = get_app(unique_id, llm_model=input_dict["llm_model"])
181
+ config = {"configurable": {"thread_id": unique_id}}
182
+ # Update state
183
+ app.update_state(
184
+ config,
185
+ input_dict,
186
+ )
187
+ prompt = """
188
+ Without extracting a new subgraph, based on subgraph extracted from `subkg_12345`
189
+ perform Graph RAG reasoning to get insights related to nodes of genes
190
+ mentioned in the knowledge graph related to Adalimumab.
191
+
192
+ Here is an additional context:
193
+ Adalimumab is a fully human monoclonal antibody (IgG1)
194
+ that specifically binds to tumor necrosis factor-alpha (TNF-α), a pro-inflammatory cytokine.
195
+ """
196
+
197
+ # Test the tool graphrag_reasoning
198
+ response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
199
+
200
+ # Check assistant message
201
+ assistant_msg = response["messages"][-1].content
202
+ assert isinstance(assistant_msg, str)
203
+
204
+ # Check tool message
205
+ tool_msg = response["messages"][-2]
206
+ assert tool_msg.name == "graphrag_reasoning"
207
+
208
+ # Check reasoning results
209
+ assert "Adalimumab" in assistant_msg
210
+ assert "TNF" in assistant_msg