aiagents4pharma 1.18.0__py3-none-any.whl → 1.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/talk2knowledgegraphs/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +85 -0
- aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +7 -0
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +62 -0
- aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +31 -0
- aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +7 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +6 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +24 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +43 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +9 -0
- aiagents4pharma/talk2knowledgegraphs/states/__init__.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +38 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +110 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +210 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +174 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +154 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +0 -1
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +56 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +18 -42
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +79 -0
- aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +6 -0
- aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +143 -0
- aiagents4pharma/talk2knowledgegraphs/tools/load_arguments.py +22 -0
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +305 -0
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +126 -0
- aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -2
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +81 -0
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +225 -0
- {aiagents4pharma-1.18.0.dist-info → aiagents4pharma-1.19.0.dist-info}/METADATA +3 -1
- {aiagents4pharma-1.18.0.dist-info → aiagents4pharma-1.19.0.dist-info}/RECORD +42 -10
- {aiagents4pharma-1.18.0.dist-info → aiagents4pharma-1.19.0.dist-info}/LICENSE +0 -0
- {aiagents4pharma-1.18.0.dist-info → aiagents4pharma-1.19.0.dist-info}/WHEEL +0 -0
- {aiagents4pharma-1.18.0.dist-info → aiagents4pharma-1.19.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,210 @@
|
|
1
|
+
"""
|
2
|
+
Test cases for tools/graphrag_reasoning.py
|
3
|
+
"""
|
4
|
+
|
5
|
+
import pytest
|
6
|
+
from langchain_core.messages import HumanMessage
|
7
|
+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
8
|
+
from ..agents.t2kg_agent import get_app
|
9
|
+
|
10
|
+
# Define the data path
|
11
|
+
DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
|
12
|
+
|
13
|
+
|
14
|
+
@pytest.fixture(name="input_dict")
|
15
|
+
def input_dict_fixture():
|
16
|
+
"""
|
17
|
+
Input dictionary fixture.
|
18
|
+
"""
|
19
|
+
input_dict = {
|
20
|
+
"llm_model": None, # TBA for each test case
|
21
|
+
"embedding_model": None, # TBA for each test case
|
22
|
+
"uploaded_files": [
|
23
|
+
{
|
24
|
+
"file_name": "adalimumab.pdf",
|
25
|
+
"file_path": f"{DATA_PATH}/adalimumab.pdf",
|
26
|
+
"file_type": "drug_data",
|
27
|
+
"uploaded_by": "VPEUser",
|
28
|
+
"uploaded_timestamp": "2024-11-05 00:00:00",
|
29
|
+
},
|
30
|
+
{
|
31
|
+
"file_name": "DGE_human_Colon_UC-vs-Colon_Control.pdf",
|
32
|
+
"file_path": f"{DATA_PATH}/DGE_human_Colon_UC-vs-Colon_Control.pdf",
|
33
|
+
"file_type": "endotype",
|
34
|
+
"uploaded_by": "VPEUser",
|
35
|
+
"uploaded_timestamp": "2024-11-05 00:00:00",
|
36
|
+
},
|
37
|
+
],
|
38
|
+
"topk_nodes": 3,
|
39
|
+
"topk_edges": 3,
|
40
|
+
"dic_source_graph": [
|
41
|
+
{
|
42
|
+
"name": "PrimeKG",
|
43
|
+
"kg_pyg_path": f"{DATA_PATH}/primekg_ibd_pyg_graph.pkl",
|
44
|
+
"kg_text_path": f"{DATA_PATH}/primekg_ibd_text_graph.pkl",
|
45
|
+
}
|
46
|
+
],
|
47
|
+
"dic_extracted_graph": [
|
48
|
+
{
|
49
|
+
"name": "subkg_12345",
|
50
|
+
"tool_call_id": "tool_12345",
|
51
|
+
"graph_source": "PrimeKG",
|
52
|
+
"topk_nodes": 3,
|
53
|
+
"topk_edges": 3,
|
54
|
+
"graph_dict": {
|
55
|
+
'nodes': [('IFNG_(3495)', {}),
|
56
|
+
('IKBKG_(3672)', {}),
|
57
|
+
('ATG16L1_(6661)', {}),
|
58
|
+
('inflammatory bowel disease_(28158)', {}),
|
59
|
+
('Crohn ileitis and jejunitis_(35814)', {}),
|
60
|
+
("Crohn's colitis_(83770)", {})],
|
61
|
+
'edges': [('IFNG_(3495)', 'inflammatory bowel disease_(28158)',
|
62
|
+
{'relation': ['gene/protein', 'associated with', 'disease'],
|
63
|
+
'label': ['gene/protein', 'associated with', 'disease']}),
|
64
|
+
('IFNG_(3495)', "Crohn's colitis_(83770)",
|
65
|
+
{'relation': ['gene/protein', 'associated with', 'disease'],
|
66
|
+
'label': ['gene/protein', 'associated with', 'disease']}),
|
67
|
+
('IFNG_(3495)', 'Crohn ileitis and jejunitis_(35814)',
|
68
|
+
{'relation': ['gene/protein', 'associated with', 'disease'],
|
69
|
+
'label': ['gene/protein', 'associated with', 'disease']}),
|
70
|
+
('ATG16L1_(6661)', 'IKBKG_(3672)',
|
71
|
+
{'relation': ['gene/protein', 'ppi', 'gene/protein'],
|
72
|
+
'label': ['gene/protein', 'ppi', 'gene/protein']}),
|
73
|
+
("Crohn's colitis_(83770)", 'ATG16L1_(6661)',
|
74
|
+
{'relation': ['disease', 'associated with', 'gene/protein'],
|
75
|
+
'label': ['disease', 'associated with', 'gene/protein']})]},
|
76
|
+
"graph_text": """
|
77
|
+
node_id,node_attr
|
78
|
+
IFNG_(3495),"IFNG belongs to gene/protein category.
|
79
|
+
This gene encodes a soluble cytokine that is a member of the type II interferon class.
|
80
|
+
The encoded protein is secreted by cells of both the innate and adaptive immune systems.
|
81
|
+
The active protein is a homodimer that binds to the interferon gamma receptor
|
82
|
+
which triggers a cellular response to viral and microbial infections.
|
83
|
+
Mutations in this gene are associated with an increased susceptibility to viral,
|
84
|
+
bacterial and parasitic infections and to several autoimmune diseases.
|
85
|
+
[provided by RefSeq, Dec 2015]."
|
86
|
+
IKBKG_(3672),"IKBKG belongs to gene/protein category. This gene encodes the regulatory
|
87
|
+
subunit of the inhibitor of kappaB kinase (IKK) complex, which activates NF-kappaB
|
88
|
+
resulting in activation of genes involved in inflammation, immunity, cell survival,
|
89
|
+
and other pathways. Mutations in this gene result in incontinentia pigmenti,
|
90
|
+
hypohidrotic ectodermal dysplasia, and several other types of immunodeficiencies.
|
91
|
+
A pseudogene highly similar to this locus is located in an adjacent region of the
|
92
|
+
X chromosome. [provided by RefSeq, Mar 2016]."
|
93
|
+
ATG16L1_(6661),"ATG16L1 belongs to gene/protein category. The protein encoded
|
94
|
+
by this gene is part of a large protein complex that is necessary for autophagy,
|
95
|
+
the major process by which intracellular components are targeted to lysosomes
|
96
|
+
for degradation. Defects in this gene are a cause of susceptibility to inflammatory
|
97
|
+
bowel disease type 10 (IBD10). Several transcript variants encoding different
|
98
|
+
isoforms have been found for this gene.[provided by RefSeq, Jun 2010]."
|
99
|
+
inflammatory bowel disease_(28158),inflammatory bowel disease belongs to disease
|
100
|
+
category. Any inflammatory bowel disease in which the cause of the disease
|
101
|
+
is a mutation in the NOD2 gene.
|
102
|
+
Crohn ileitis and jejunitis_(35814),Crohn ileitis and jejunitis belongs to
|
103
|
+
disease category. An Crohn disease involving a pathogenic inflammatory
|
104
|
+
response in the ileum.
|
105
|
+
Crohn's colitis_(83770),Crohn's colitis belongs to disease category.
|
106
|
+
Crohn's disease affecting the colon.
|
107
|
+
|
108
|
+
head_id,edge_type,tail_id
|
109
|
+
Crohn's colitis_(83770),"('disease', 'associated with', 'gene/protein')",
|
110
|
+
ATG16L1_(6661)
|
111
|
+
ATG16L1_(6661),"('gene/protein', 'ppi', 'gene/protein')",IKBKG_(3672)
|
112
|
+
IFNG_(3495),"('gene/protein', 'associated with', 'disease')",
|
113
|
+
inflammatory bowel disease_(28158)
|
114
|
+
IFNG_(3495),"('gene/protein', 'associated with', 'disease')",Crohn's colitis_(83770)
|
115
|
+
IFNG_(3495),"('gene/protein', 'associated with', 'disease')",
|
116
|
+
Crohn ileitis and jejunitis_(35814)
|
117
|
+
""",
|
118
|
+
"graph_summary": """
|
119
|
+
The subgraph extracted from `subkg_12345` includes several important genes and
|
120
|
+
their associations with inflammatory bowel diseases, particularly Crohn's disease.
|
121
|
+
|
122
|
+
Key Nodes:
|
123
|
+
1. **IFNG (Interferon gamma)**: This gene encodes a cytokine that plays a crucial
|
124
|
+
role in immune response. It is associated with several diseases, including
|
125
|
+
inflammatory bowel disease and specifically Crohn's colitis and Crohn ileitis and
|
126
|
+
jejunitis. Mutations in IFNG can lead to increased susceptibility to infections
|
127
|
+
and autoimmune diseases.
|
128
|
+
|
129
|
+
2. **IKBKG (Inhibitor of kappaB kinase gamma)**: This gene is involved in the
|
130
|
+
regulation of NF-kappaB, which is critical for inflammation and immune responses.
|
131
|
+
Mutations can lead to immunodeficiencies and other disorders.
|
132
|
+
|
133
|
+
3. **ATG16L1**: This gene is essential for autophagy, a process that helps in
|
134
|
+
degrading intracellular components. Defects in ATG16L1 are linked to inflammatory
|
135
|
+
bowel disease type 10 (IBD10) and are associated with Crohn's colitis.
|
136
|
+
|
137
|
+
4. **Inflammatory Bowel Disease**: A category of diseases characterized by
|
138
|
+
chronic inflammation of the gastrointestinal tract, with specific mention of
|
139
|
+
mutations in the NOD2 gene as a cause.
|
140
|
+
|
141
|
+
5. **Crohn's Colitis**: A specific type of Crohn's disease affecting the colon,
|
142
|
+
indicating a pathogenic inflammatory response.
|
143
|
+
|
144
|
+
6. **Crohn Ileitis and Jejunitis**: Another form of Crohn's disease that involves
|
145
|
+
inflammation in the ileum.
|
146
|
+
|
147
|
+
Key Edges:
|
148
|
+
- **IFNG is associated with inflammatory bowel disease, Crohn's colitis, and
|
149
|
+
Crohn ileitis and jejunitis**: This highlights the role of IFNG in these diseases.
|
150
|
+
- **ATG16L1 is associated with Crohn's colitis**: This indicates a direct link
|
151
|
+
between the gene and the disease.
|
152
|
+
- **ATG16L1 interacts with IKBKG**: This protein-protein interaction suggests a
|
153
|
+
functional relationship between these two genes in the context of immune response
|
154
|
+
and inflammation.
|
155
|
+
|
156
|
+
In summary, the subgraph illustrates the connections between key genes
|
157
|
+
(IFNG, IKBKG, ATG16L1) and their associations with inflammatory bowel diseases,
|
158
|
+
particularly Crohn's disease, emphasizing the genetic underpinnings of these conditions.
|
159
|
+
""",
|
160
|
+
}
|
161
|
+
],
|
162
|
+
}
|
163
|
+
|
164
|
+
return input_dict
|
165
|
+
|
166
|
+
|
167
|
+
def test_graphrag_reasoning_openai(input_dict):
|
168
|
+
"""
|
169
|
+
Test the GraphRAG reasoning tool using OpenAI model.
|
170
|
+
|
171
|
+
Args:
|
172
|
+
input_dict: Input dictionary
|
173
|
+
"""
|
174
|
+
# Prepare LLM and embedding model
|
175
|
+
input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
|
176
|
+
input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
|
177
|
+
|
178
|
+
# Setup the app
|
179
|
+
unique_id = 12345
|
180
|
+
app = get_app(unique_id, llm_model=input_dict["llm_model"])
|
181
|
+
config = {"configurable": {"thread_id": unique_id}}
|
182
|
+
# Update state
|
183
|
+
app.update_state(
|
184
|
+
config,
|
185
|
+
input_dict,
|
186
|
+
)
|
187
|
+
prompt = """
|
188
|
+
Without extracting a new subgraph, based on subgraph extracted from `subkg_12345`
|
189
|
+
perform Graph RAG reasoning to get insights related to nodes of genes
|
190
|
+
mentioned in the knowledge graph related to Adalimumab.
|
191
|
+
|
192
|
+
Here is an additional context:
|
193
|
+
Adalimumab is a fully human monoclonal antibody (IgG1)
|
194
|
+
that specifically binds to tumor necrosis factor-alpha (TNF-α), a pro-inflammatory cytokine.
|
195
|
+
"""
|
196
|
+
|
197
|
+
# Test the tool graphrag_reasoning
|
198
|
+
response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
|
199
|
+
|
200
|
+
# Check assistant message
|
201
|
+
assistant_msg = response["messages"][-1].content
|
202
|
+
assert isinstance(assistant_msg, str)
|
203
|
+
|
204
|
+
# Check tool message
|
205
|
+
tool_msg = response["messages"][-2]
|
206
|
+
assert tool_msg.name == "graphrag_reasoning"
|
207
|
+
|
208
|
+
# Check reasoning results
|
209
|
+
assert "Adalimumab" in assistant_msg
|
210
|
+
assert "TNF" in assistant_msg
|
@@ -0,0 +1,174 @@
|
|
1
|
+
"""
|
2
|
+
Test cases for tools/subgraph_extraction.py
|
3
|
+
"""
|
4
|
+
|
5
|
+
import pytest
|
6
|
+
from langchain_core.messages import HumanMessage
|
7
|
+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
8
|
+
from ..agents.t2kg_agent import get_app
|
9
|
+
|
10
|
+
# Define the data path
|
11
|
+
DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
|
12
|
+
|
13
|
+
|
14
|
+
@pytest.fixture(name="input_dict")
|
15
|
+
def input_dict_fixture():
|
16
|
+
"""
|
17
|
+
Input dictionary fixture.
|
18
|
+
"""
|
19
|
+
input_dict = {
|
20
|
+
"llm_model": None, # TBA for each test case
|
21
|
+
"embedding_model": None, # TBA for each test case
|
22
|
+
"uploaded_files": [],
|
23
|
+
"topk_nodes": 3,
|
24
|
+
"topk_edges": 3,
|
25
|
+
"dic_source_graph": [
|
26
|
+
{
|
27
|
+
"name": "PrimeKG",
|
28
|
+
"kg_pyg_path": f"{DATA_PATH}/primekg_ibd_pyg_graph.pkl",
|
29
|
+
"kg_text_path": f"{DATA_PATH}/primekg_ibd_text_graph.pkl",
|
30
|
+
}
|
31
|
+
],
|
32
|
+
}
|
33
|
+
|
34
|
+
return input_dict
|
35
|
+
|
36
|
+
|
37
|
+
def test_extract_subgraph_wo_docs(input_dict):
|
38
|
+
"""
|
39
|
+
Test the subgraph extraction tool without any documents using OpenAI model.
|
40
|
+
|
41
|
+
Args:
|
42
|
+
input_dict: Input dictionary.
|
43
|
+
"""
|
44
|
+
# Prepare LLM and embedding model
|
45
|
+
input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
|
46
|
+
input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
|
47
|
+
|
48
|
+
# Setup the app
|
49
|
+
unique_id = 12345
|
50
|
+
app = get_app(unique_id, llm_model=input_dict["llm_model"])
|
51
|
+
config = {"configurable": {"thread_id": unique_id}}
|
52
|
+
# Update state
|
53
|
+
app.update_state(
|
54
|
+
config,
|
55
|
+
input_dict,
|
56
|
+
)
|
57
|
+
prompt = """
|
58
|
+
Please directly invoke `subgraph_extraction` tool without calling any other tools
|
59
|
+
to respond to the following prompt:
|
60
|
+
|
61
|
+
Extract all relevant information related to nodes of genes related to inflammatory bowel disease
|
62
|
+
(IBD) that existed in the knowledge graph.
|
63
|
+
Please set the extraction name for this process as `subkg_12345`.
|
64
|
+
"""
|
65
|
+
|
66
|
+
# Test the tool subgraph_extraction
|
67
|
+
response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
|
68
|
+
|
69
|
+
# Check assistant message
|
70
|
+
assistant_msg = response["messages"][-1].content
|
71
|
+
assert isinstance(assistant_msg, str)
|
72
|
+
|
73
|
+
# Check tool message
|
74
|
+
tool_msg = response["messages"][-2]
|
75
|
+
assert tool_msg.name == "subgraph_extraction"
|
76
|
+
|
77
|
+
# Check extracted subgraph dictionary
|
78
|
+
current_state = app.get_state(config)
|
79
|
+
dic_extracted_graph = current_state.values["dic_extracted_graph"][0]
|
80
|
+
assert isinstance(dic_extracted_graph, dict)
|
81
|
+
assert dic_extracted_graph["name"] == "subkg_12345"
|
82
|
+
assert dic_extracted_graph["graph_source"] == "PrimeKG"
|
83
|
+
assert dic_extracted_graph["topk_nodes"] == 3
|
84
|
+
assert dic_extracted_graph["topk_edges"] == 3
|
85
|
+
assert isinstance(dic_extracted_graph["graph_dict"], dict)
|
86
|
+
assert len(dic_extracted_graph["graph_dict"]["nodes"]) > 0
|
87
|
+
assert len(dic_extracted_graph["graph_dict"]["edges"]) > 0
|
88
|
+
assert isinstance(dic_extracted_graph["graph_text"], str)
|
89
|
+
# Check if the nodes are in the graph_text
|
90
|
+
assert all(
|
91
|
+
n[0] in dic_extracted_graph["graph_text"]
|
92
|
+
for n in dic_extracted_graph["graph_dict"]["nodes"]
|
93
|
+
)
|
94
|
+
# Check if the edges are in the graph_text
|
95
|
+
assert all(
|
96
|
+
",".join([e[0], '"' + str(tuple(e[2]["relation"])) + '"', e[1]])
|
97
|
+
in dic_extracted_graph["graph_text"]
|
98
|
+
for e in dic_extracted_graph["graph_dict"]["edges"]
|
99
|
+
)
|
100
|
+
|
101
|
+
|
102
|
+
def test_extract_subgraph_w_docs(input_dict):
|
103
|
+
"""
|
104
|
+
Test the subgraph extraction tool with a document as reference (i.e., endotype document)
|
105
|
+
using OpenAI model.
|
106
|
+
|
107
|
+
Args:
|
108
|
+
input_dict: Input dictionary.
|
109
|
+
"""
|
110
|
+
# Prepare LLM and embedding model
|
111
|
+
input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
|
112
|
+
input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
|
113
|
+
|
114
|
+
# Setup the app
|
115
|
+
unique_id = 12345
|
116
|
+
app = get_app(unique_id, llm_model=input_dict["llm_model"])
|
117
|
+
config = {"configurable": {"thread_id": unique_id}}
|
118
|
+
# Update state
|
119
|
+
input_dict["uploaded_files"] = [
|
120
|
+
{
|
121
|
+
"file_name": "DGE_human_Colon_UC-vs-Colon_Control.pdf",
|
122
|
+
"file_path": f"{DATA_PATH}/DGE_human_Colon_UC-vs-Colon_Control.pdf",
|
123
|
+
"file_type": "endotype",
|
124
|
+
"uploaded_by": "VPEUser",
|
125
|
+
"uploaded_timestamp": "2024-11-05 00:00:00",
|
126
|
+
}
|
127
|
+
]
|
128
|
+
app.update_state(
|
129
|
+
config,
|
130
|
+
input_dict,
|
131
|
+
)
|
132
|
+
prompt = """
|
133
|
+
Please ONLY invoke `subgraph_extraction` tool without calling any other tools
|
134
|
+
to respond to the following prompt:
|
135
|
+
|
136
|
+
Extract all relevant information related to nodes of genes related to inflammatory bowel disease
|
137
|
+
(IBD) that existed in the knowledge graph.
|
138
|
+
Please set the extraction name for this process as `subkg_12345`.
|
139
|
+
"""
|
140
|
+
|
141
|
+
# Test the tool subgraph_extraction
|
142
|
+
response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
|
143
|
+
|
144
|
+
# Check assistant message
|
145
|
+
assistant_msg = response["messages"][-1].content
|
146
|
+
assert isinstance(assistant_msg, str)
|
147
|
+
|
148
|
+
# Check tool message
|
149
|
+
tool_msg = response["messages"][-2]
|
150
|
+
assert tool_msg.name == "subgraph_extraction"
|
151
|
+
|
152
|
+
# Check extracted subgraph dictionary
|
153
|
+
current_state = app.get_state(config)
|
154
|
+
dic_extracted_graph = current_state.values["dic_extracted_graph"][0]
|
155
|
+
assert isinstance(dic_extracted_graph, dict)
|
156
|
+
assert dic_extracted_graph["name"] == "subkg_12345"
|
157
|
+
assert dic_extracted_graph["graph_source"] == "PrimeKG"
|
158
|
+
assert dic_extracted_graph["topk_nodes"] == 3
|
159
|
+
assert dic_extracted_graph["topk_edges"] == 3
|
160
|
+
assert isinstance(dic_extracted_graph["graph_dict"], dict)
|
161
|
+
assert len(dic_extracted_graph["graph_dict"]["nodes"]) > 0
|
162
|
+
assert len(dic_extracted_graph["graph_dict"]["edges"]) > 0
|
163
|
+
assert isinstance(dic_extracted_graph["graph_text"], str)
|
164
|
+
# Check if the nodes are in the graph_text
|
165
|
+
assert all(
|
166
|
+
n[0] in dic_extracted_graph["graph_text"]
|
167
|
+
for n in dic_extracted_graph["graph_dict"]["nodes"]
|
168
|
+
)
|
169
|
+
# Check if the edges are in the graph_text
|
170
|
+
assert all(
|
171
|
+
",".join([e[0], '"' + str(tuple(e[2]["relation"])) + '"', e[1]])
|
172
|
+
in dic_extracted_graph["graph_text"]
|
173
|
+
for e in dic_extracted_graph["graph_dict"]["edges"]
|
174
|
+
)
|
@@ -0,0 +1,154 @@
|
|
1
|
+
"""
|
2
|
+
Test cases for tools/subgraph_summarization.py
|
3
|
+
"""
|
4
|
+
|
5
|
+
import pytest
|
6
|
+
from langchain_core.messages import HumanMessage
|
7
|
+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
8
|
+
from ..agents.t2kg_agent import get_app
|
9
|
+
|
10
|
+
# Define the data path
|
11
|
+
DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
|
12
|
+
|
13
|
+
|
14
|
+
@pytest.fixture(name="input_dict")
|
15
|
+
def input_dict_fixture():
|
16
|
+
"""
|
17
|
+
Input dictionary fixture.
|
18
|
+
"""
|
19
|
+
input_dict = {
|
20
|
+
"llm_model": None, # TBA for each test case
|
21
|
+
"embedding_model": None, # TBA for each test case
|
22
|
+
"uploaded_files": [],
|
23
|
+
"topk_nodes": 3,
|
24
|
+
"topk_edges": 3,
|
25
|
+
"dic_source_graph": [
|
26
|
+
{
|
27
|
+
"name": "PrimeKG",
|
28
|
+
"kg_pyg_path": f"{DATA_PATH}/primekg_ibd_pyg_graph.pkl",
|
29
|
+
"kg_text_path": f"{DATA_PATH}/primekg_ibd_text_graph.pkl",
|
30
|
+
}
|
31
|
+
],
|
32
|
+
"dic_extracted_graph": [
|
33
|
+
{
|
34
|
+
"name": "subkg_12345",
|
35
|
+
"tool_call_id": "tool_12345",
|
36
|
+
"graph_source": "PrimeKG",
|
37
|
+
"topk_nodes": 3,
|
38
|
+
"topk_edges": 3,
|
39
|
+
"graph_dict": {
|
40
|
+
'nodes': [('IFNG_(3495)', {}),
|
41
|
+
('IKBKG_(3672)', {}),
|
42
|
+
('ATG16L1_(6661)', {}),
|
43
|
+
('inflammatory bowel disease_(28158)', {}),
|
44
|
+
('Crohn ileitis and jejunitis_(35814)', {}),
|
45
|
+
("Crohn's colitis_(83770)", {})],
|
46
|
+
'edges': [('IFNG_(3495)', 'inflammatory bowel disease_(28158)',
|
47
|
+
{'relation': ['gene/protein', 'associated with', 'disease'],
|
48
|
+
'label': ['gene/protein', 'associated with', 'disease']}),
|
49
|
+
('IFNG_(3495)', "Crohn's colitis_(83770)",
|
50
|
+
{'relation': ['gene/protein', 'associated with', 'disease'],
|
51
|
+
'label': ['gene/protein', 'associated with', 'disease']}),
|
52
|
+
('IFNG_(3495)', 'Crohn ileitis and jejunitis_(35814)',
|
53
|
+
{'relation': ['gene/protein', 'associated with', 'disease'],
|
54
|
+
'label': ['gene/protein', 'associated with', 'disease']}),
|
55
|
+
('ATG16L1_(6661)', 'IKBKG_(3672)',
|
56
|
+
{'relation': ['gene/protein', 'ppi', 'gene/protein'],
|
57
|
+
'label': ['gene/protein', 'ppi', 'gene/protein']}),
|
58
|
+
("Crohn's colitis_(83770)", 'ATG16L1_(6661)',
|
59
|
+
{'relation': ['disease', 'associated with', 'gene/protein'],
|
60
|
+
'label': ['disease', 'associated with', 'gene/protein']})]},
|
61
|
+
"graph_text": """
|
62
|
+
node_id,node_attr
|
63
|
+
IFNG_(3495),"IFNG belongs to gene/protein category.
|
64
|
+
This gene encodes a soluble cytokine that is a member of the type II interferon class.
|
65
|
+
The encoded protein is secreted by cells of both the innate and adaptive immune systems.
|
66
|
+
The active protein is a homodimer that binds to the interferon gamma receptor
|
67
|
+
which triggers a cellular response to viral and microbial infections.
|
68
|
+
Mutations in this gene are associated with an increased susceptibility to viral,
|
69
|
+
bacterial and parasitic infections and to several autoimmune diseases.
|
70
|
+
[provided by RefSeq, Dec 2015]."
|
71
|
+
IKBKG_(3672),"IKBKG belongs to gene/protein category. This gene encodes the regulatory
|
72
|
+
subunit of the inhibitor of kappaB kinase (IKK) complex, which activates NF-kappaB
|
73
|
+
resulting in activation of genes involved in inflammation, immunity, cell survival,
|
74
|
+
and other pathways. Mutations in this gene result in incontinentia pigmenti,
|
75
|
+
hypohidrotic ectodermal dysplasia, and several other types of immunodeficiencies.
|
76
|
+
A pseudogene highly similar to this locus is located in an adjacent region of the
|
77
|
+
X chromosome. [provided by RefSeq, Mar 2016]."
|
78
|
+
ATG16L1_(6661),"ATG16L1 belongs to gene/protein category. The protein encoded
|
79
|
+
by this gene is part of a large protein complex that is necessary for autophagy,
|
80
|
+
the major process by which intracellular components are targeted to lysosomes
|
81
|
+
for degradation. Defects in this gene are a cause of susceptibility to inflammatory
|
82
|
+
bowel disease type 10 (IBD10). Several transcript variants encoding different
|
83
|
+
isoforms have been found for this gene.[provided by RefSeq, Jun 2010]."
|
84
|
+
inflammatory bowel disease_(28158),inflammatory bowel disease belongs to disease
|
85
|
+
category. Any inflammatory bowel disease in which the cause of the disease
|
86
|
+
is a mutation in the NOD2 gene.
|
87
|
+
Crohn ileitis and jejunitis_(35814),Crohn ileitis and jejunitis belongs to
|
88
|
+
disease category. An Crohn disease involving a pathogenic inflammatory
|
89
|
+
response in the ileum.
|
90
|
+
Crohn's colitis_(83770),Crohn's colitis belongs to disease category.
|
91
|
+
Crohn's disease affecting the colon.
|
92
|
+
|
93
|
+
head_id,edge_type,tail_id
|
94
|
+
Crohn's colitis_(83770),"('disease', 'associated with', 'gene/protein')",
|
95
|
+
ATG16L1_(6661)
|
96
|
+
ATG16L1_(6661),"('gene/protein', 'ppi', 'gene/protein')",IKBKG_(3672)
|
97
|
+
IFNG_(3495),"('gene/protein', 'associated with', 'disease')",
|
98
|
+
inflammatory bowel disease_(28158)
|
99
|
+
IFNG_(3495),"('gene/protein', 'associated with', 'disease')",Crohn's colitis_(83770)
|
100
|
+
IFNG_(3495),"('gene/protein', 'associated with', 'disease')",
|
101
|
+
Crohn ileitis and jejunitis_(35814)
|
102
|
+
""",
|
103
|
+
"graph_summary": None,
|
104
|
+
}
|
105
|
+
],
|
106
|
+
}
|
107
|
+
|
108
|
+
return input_dict
|
109
|
+
|
110
|
+
|
111
|
+
def test_summarize_subgraph(input_dict):
|
112
|
+
"""
|
113
|
+
Test the subgraph summarization tool without any documents using Ollama model.
|
114
|
+
|
115
|
+
Args:
|
116
|
+
input_dict: Input dictionary fixture.
|
117
|
+
"""
|
118
|
+
# Prepare LLM and embedding model
|
119
|
+
input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
|
120
|
+
input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
|
121
|
+
|
122
|
+
# Setup the app
|
123
|
+
unique_id = 12345
|
124
|
+
app = get_app(unique_id, llm_model=input_dict["llm_model"])
|
125
|
+
config = {"configurable": {"thread_id": unique_id}}
|
126
|
+
# Update state
|
127
|
+
app.update_state(
|
128
|
+
config,
|
129
|
+
input_dict,
|
130
|
+
)
|
131
|
+
prompt = """
|
132
|
+
Please directly invoke `subgraph_summarization` tool without calling any other tools
|
133
|
+
to respond to the following prompt:
|
134
|
+
|
135
|
+
You are given a subgraph in the forms of textualized subgraph representing
|
136
|
+
nodes and edges (triples) obtained from extraction_name `subkg_12345`.
|
137
|
+
Summarize the given subgraph and higlight the importance nodes and edges.
|
138
|
+
"""
|
139
|
+
|
140
|
+
# Test the tool subgraph_summarization
|
141
|
+
response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
|
142
|
+
|
143
|
+
# Check assistant message
|
144
|
+
assistant_msg = response["messages"][-1].content
|
145
|
+
assert isinstance(assistant_msg, str)
|
146
|
+
|
147
|
+
# Check tool message
|
148
|
+
tool_msg = response["messages"][-2]
|
149
|
+
assert tool_msg.name == "subgraph_summarization"
|
150
|
+
|
151
|
+
# Check summarized subgraph
|
152
|
+
current_state = app.get_state(config)
|
153
|
+
dic_extracted_graph = current_state.values["dic_extracted_graph"][0]
|
154
|
+
assert isinstance(dic_extracted_graph["graph_summary"], str)
|
@@ -31,7 +31,6 @@ def test_embedding_with_huggingface_embed_query(embedding_model):
|
|
31
31
|
# Check the result
|
32
32
|
assert len(result) == 768
|
33
33
|
|
34
|
-
|
35
34
|
def test_embedding_with_huggingface_failed():
|
36
35
|
"""Test embedding documents using the EmbeddingWithHuggingFace class."""
|
37
36
|
# Check if the model is available on HuggingFace Hub
|
@@ -0,0 +1,56 @@
|
|
1
|
+
"""
|
2
|
+
Test cases for utils/embeddings/ollama.py
|
3
|
+
"""
|
4
|
+
|
5
|
+
import pytest
|
6
|
+
import ollama
|
7
|
+
from ..utils.embeddings.ollama import EmbeddingWithOllama
|
8
|
+
|
9
|
+
@pytest.fixture(name="ollama_config")
|
10
|
+
def fixture_ollama_config():
|
11
|
+
"""Return a dictionary with Ollama configuration."""
|
12
|
+
return {
|
13
|
+
"model_name": "all-minilm", # Choose a small model
|
14
|
+
}
|
15
|
+
|
16
|
+
def test_no_model_ollama(ollama_config):
|
17
|
+
"""Test the case when the Ollama model is not available."""
|
18
|
+
cfg = ollama_config
|
19
|
+
|
20
|
+
# Delete the Ollama model
|
21
|
+
try:
|
22
|
+
ollama.delete(cfg["model_name"])
|
23
|
+
except ollama.ResponseError:
|
24
|
+
pass
|
25
|
+
|
26
|
+
# Check if the model is available
|
27
|
+
with pytest.raises(
|
28
|
+
ValueError, match=f"Error: Pulled {cfg["model_name"]} model and restarted Ollama server."
|
29
|
+
):
|
30
|
+
EmbeddingWithOllama(model_name=cfg["model_name"])
|
31
|
+
|
32
|
+
@pytest.fixture(name="embedding_model")
|
33
|
+
def embedding_model_fixture(ollama_config):
|
34
|
+
"""Return the configuration object for the Ollama embedding model and model object"""
|
35
|
+
cfg = ollama_config
|
36
|
+
return EmbeddingWithOllama(model_name=cfg["model_name"])
|
37
|
+
|
38
|
+
def test_embedding_with_ollama_embed_documents(embedding_model):
|
39
|
+
"""Test embedding documents using the EmbeddingWithOllama class."""
|
40
|
+
# Perform embedding
|
41
|
+
texts = ["Adalimumab", "Infliximab", "Vedolizumab"]
|
42
|
+
result = embedding_model.embed_documents(texts)
|
43
|
+
# Check the result
|
44
|
+
assert len(result) == 3
|
45
|
+
assert len(result[0]) == 384
|
46
|
+
|
47
|
+
def test_embedding_with_ollama_embed_query(embedding_model):
|
48
|
+
"""Test embedding a query using the EmbeddingWithOllama class."""
|
49
|
+
# Perform embedding
|
50
|
+
text = "Adalimumab"
|
51
|
+
result = embedding_model.embed_query(text)
|
52
|
+
# Check the result
|
53
|
+
assert len(result) == 384
|
54
|
+
|
55
|
+
# Delete the Ollama model so that it will not be cached afterward
|
56
|
+
ollama.delete(embedding_model.model_name)
|