aiagents4pharma 1.18.0__py3-none-any.whl → 1.19.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- aiagents4pharma/talk2knowledgegraphs/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +85 -0
- aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +7 -0
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +62 -0
- aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +31 -0
- aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +7 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +6 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +24 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +43 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +9 -0
- aiagents4pharma/talk2knowledgegraphs/states/__init__.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +38 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +110 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +210 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +174 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +154 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +0 -1
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +56 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +18 -42
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +79 -0
- aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +6 -0
- aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +143 -0
- aiagents4pharma/talk2knowledgegraphs/tools/load_arguments.py +22 -0
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +305 -0
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +126 -0
- aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -2
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +81 -0
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +225 -0
- {aiagents4pharma-1.18.0.dist-info → aiagents4pharma-1.19.0.dist-info}/METADATA +3 -1
- {aiagents4pharma-1.18.0.dist-info → aiagents4pharma-1.19.0.dist-info}/RECORD +42 -10
- {aiagents4pharma-1.18.0.dist-info → aiagents4pharma-1.19.0.dist-info}/LICENSE +0 -0
- {aiagents4pharma-1.18.0.dist-info → aiagents4pharma-1.19.0.dist-info}/WHEEL +0 -0
- {aiagents4pharma-1.18.0.dist-info → aiagents4pharma-1.19.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,210 @@
|
|
1
|
+
"""
|
2
|
+
Test cases for tools/graphrag_reasoning.py
|
3
|
+
"""
|
4
|
+
|
5
|
+
import pytest
|
6
|
+
from langchain_core.messages import HumanMessage
|
7
|
+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
8
|
+
from ..agents.t2kg_agent import get_app
|
9
|
+
|
10
|
+
# Define the data path
|
11
|
+
DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
|
12
|
+
|
13
|
+
|
14
|
+
@pytest.fixture(name="input_dict")
|
15
|
+
def input_dict_fixture():
|
16
|
+
"""
|
17
|
+
Input dictionary fixture.
|
18
|
+
"""
|
19
|
+
input_dict = {
|
20
|
+
"llm_model": None, # TBA for each test case
|
21
|
+
"embedding_model": None, # TBA for each test case
|
22
|
+
"uploaded_files": [
|
23
|
+
{
|
24
|
+
"file_name": "adalimumab.pdf",
|
25
|
+
"file_path": f"{DATA_PATH}/adalimumab.pdf",
|
26
|
+
"file_type": "drug_data",
|
27
|
+
"uploaded_by": "VPEUser",
|
28
|
+
"uploaded_timestamp": "2024-11-05 00:00:00",
|
29
|
+
},
|
30
|
+
{
|
31
|
+
"file_name": "DGE_human_Colon_UC-vs-Colon_Control.pdf",
|
32
|
+
"file_path": f"{DATA_PATH}/DGE_human_Colon_UC-vs-Colon_Control.pdf",
|
33
|
+
"file_type": "endotype",
|
34
|
+
"uploaded_by": "VPEUser",
|
35
|
+
"uploaded_timestamp": "2024-11-05 00:00:00",
|
36
|
+
},
|
37
|
+
],
|
38
|
+
"topk_nodes": 3,
|
39
|
+
"topk_edges": 3,
|
40
|
+
"dic_source_graph": [
|
41
|
+
{
|
42
|
+
"name": "PrimeKG",
|
43
|
+
"kg_pyg_path": f"{DATA_PATH}/primekg_ibd_pyg_graph.pkl",
|
44
|
+
"kg_text_path": f"{DATA_PATH}/primekg_ibd_text_graph.pkl",
|
45
|
+
}
|
46
|
+
],
|
47
|
+
"dic_extracted_graph": [
|
48
|
+
{
|
49
|
+
"name": "subkg_12345",
|
50
|
+
"tool_call_id": "tool_12345",
|
51
|
+
"graph_source": "PrimeKG",
|
52
|
+
"topk_nodes": 3,
|
53
|
+
"topk_edges": 3,
|
54
|
+
"graph_dict": {
|
55
|
+
'nodes': [('IFNG_(3495)', {}),
|
56
|
+
('IKBKG_(3672)', {}),
|
57
|
+
('ATG16L1_(6661)', {}),
|
58
|
+
('inflammatory bowel disease_(28158)', {}),
|
59
|
+
('Crohn ileitis and jejunitis_(35814)', {}),
|
60
|
+
("Crohn's colitis_(83770)", {})],
|
61
|
+
'edges': [('IFNG_(3495)', 'inflammatory bowel disease_(28158)',
|
62
|
+
{'relation': ['gene/protein', 'associated with', 'disease'],
|
63
|
+
'label': ['gene/protein', 'associated with', 'disease']}),
|
64
|
+
('IFNG_(3495)', "Crohn's colitis_(83770)",
|
65
|
+
{'relation': ['gene/protein', 'associated with', 'disease'],
|
66
|
+
'label': ['gene/protein', 'associated with', 'disease']}),
|
67
|
+
('IFNG_(3495)', 'Crohn ileitis and jejunitis_(35814)',
|
68
|
+
{'relation': ['gene/protein', 'associated with', 'disease'],
|
69
|
+
'label': ['gene/protein', 'associated with', 'disease']}),
|
70
|
+
('ATG16L1_(6661)', 'IKBKG_(3672)',
|
71
|
+
{'relation': ['gene/protein', 'ppi', 'gene/protein'],
|
72
|
+
'label': ['gene/protein', 'ppi', 'gene/protein']}),
|
73
|
+
("Crohn's colitis_(83770)", 'ATG16L1_(6661)',
|
74
|
+
{'relation': ['disease', 'associated with', 'gene/protein'],
|
75
|
+
'label': ['disease', 'associated with', 'gene/protein']})]},
|
76
|
+
"graph_text": """
|
77
|
+
node_id,node_attr
|
78
|
+
IFNG_(3495),"IFNG belongs to gene/protein category.
|
79
|
+
This gene encodes a soluble cytokine that is a member of the type II interferon class.
|
80
|
+
The encoded protein is secreted by cells of both the innate and adaptive immune systems.
|
81
|
+
The active protein is a homodimer that binds to the interferon gamma receptor
|
82
|
+
which triggers a cellular response to viral and microbial infections.
|
83
|
+
Mutations in this gene are associated with an increased susceptibility to viral,
|
84
|
+
bacterial and parasitic infections and to several autoimmune diseases.
|
85
|
+
[provided by RefSeq, Dec 2015]."
|
86
|
+
IKBKG_(3672),"IKBKG belongs to gene/protein category. This gene encodes the regulatory
|
87
|
+
subunit of the inhibitor of kappaB kinase (IKK) complex, which activates NF-kappaB
|
88
|
+
resulting in activation of genes involved in inflammation, immunity, cell survival,
|
89
|
+
and other pathways. Mutations in this gene result in incontinentia pigmenti,
|
90
|
+
hypohidrotic ectodermal dysplasia, and several other types of immunodeficiencies.
|
91
|
+
A pseudogene highly similar to this locus is located in an adjacent region of the
|
92
|
+
X chromosome. [provided by RefSeq, Mar 2016]."
|
93
|
+
ATG16L1_(6661),"ATG16L1 belongs to gene/protein category. The protein encoded
|
94
|
+
by this gene is part of a large protein complex that is necessary for autophagy,
|
95
|
+
the major process by which intracellular components are targeted to lysosomes
|
96
|
+
for degradation. Defects in this gene are a cause of susceptibility to inflammatory
|
97
|
+
bowel disease type 10 (IBD10). Several transcript variants encoding different
|
98
|
+
isoforms have been found for this gene.[provided by RefSeq, Jun 2010]."
|
99
|
+
inflammatory bowel disease_(28158),inflammatory bowel disease belongs to disease
|
100
|
+
category. Any inflammatory bowel disease in which the cause of the disease
|
101
|
+
is a mutation in the NOD2 gene.
|
102
|
+
Crohn ileitis and jejunitis_(35814),Crohn ileitis and jejunitis belongs to
|
103
|
+
disease category. An Crohn disease involving a pathogenic inflammatory
|
104
|
+
response in the ileum.
|
105
|
+
Crohn's colitis_(83770),Crohn's colitis belongs to disease category.
|
106
|
+
Crohn's disease affecting the colon.
|
107
|
+
|
108
|
+
head_id,edge_type,tail_id
|
109
|
+
Crohn's colitis_(83770),"('disease', 'associated with', 'gene/protein')",
|
110
|
+
ATG16L1_(6661)
|
111
|
+
ATG16L1_(6661),"('gene/protein', 'ppi', 'gene/protein')",IKBKG_(3672)
|
112
|
+
IFNG_(3495),"('gene/protein', 'associated with', 'disease')",
|
113
|
+
inflammatory bowel disease_(28158)
|
114
|
+
IFNG_(3495),"('gene/protein', 'associated with', 'disease')",Crohn's colitis_(83770)
|
115
|
+
IFNG_(3495),"('gene/protein', 'associated with', 'disease')",
|
116
|
+
Crohn ileitis and jejunitis_(35814)
|
117
|
+
""",
|
118
|
+
"graph_summary": """
|
119
|
+
The subgraph extracted from `subkg_12345` includes several important genes and
|
120
|
+
their associations with inflammatory bowel diseases, particularly Crohn's disease.
|
121
|
+
|
122
|
+
Key Nodes:
|
123
|
+
1. **IFNG (Interferon gamma)**: This gene encodes a cytokine that plays a crucial
|
124
|
+
role in immune response. It is associated with several diseases, including
|
125
|
+
inflammatory bowel disease and specifically Crohn's colitis and Crohn ileitis and
|
126
|
+
jejunitis. Mutations in IFNG can lead to increased susceptibility to infections
|
127
|
+
and autoimmune diseases.
|
128
|
+
|
129
|
+
2. **IKBKG (Inhibitor of kappaB kinase gamma)**: This gene is involved in the
|
130
|
+
regulation of NF-kappaB, which is critical for inflammation and immune responses.
|
131
|
+
Mutations can lead to immunodeficiencies and other disorders.
|
132
|
+
|
133
|
+
3. **ATG16L1**: This gene is essential for autophagy, a process that helps in
|
134
|
+
degrading intracellular components. Defects in ATG16L1 are linked to inflammatory
|
135
|
+
bowel disease type 10 (IBD10) and are associated with Crohn's colitis.
|
136
|
+
|
137
|
+
4. **Inflammatory Bowel Disease**: A category of diseases characterized by
|
138
|
+
chronic inflammation of the gastrointestinal tract, with specific mention of
|
139
|
+
mutations in the NOD2 gene as a cause.
|
140
|
+
|
141
|
+
5. **Crohn's Colitis**: A specific type of Crohn's disease affecting the colon,
|
142
|
+
indicating a pathogenic inflammatory response.
|
143
|
+
|
144
|
+
6. **Crohn Ileitis and Jejunitis**: Another form of Crohn's disease that involves
|
145
|
+
inflammation in the ileum.
|
146
|
+
|
147
|
+
Key Edges:
|
148
|
+
- **IFNG is associated with inflammatory bowel disease, Crohn's colitis, and
|
149
|
+
Crohn ileitis and jejunitis**: This highlights the role of IFNG in these diseases.
|
150
|
+
- **ATG16L1 is associated with Crohn's colitis**: This indicates a direct link
|
151
|
+
between the gene and the disease.
|
152
|
+
- **ATG16L1 interacts with IKBKG**: This protein-protein interaction suggests a
|
153
|
+
functional relationship between these two genes in the context of immune response
|
154
|
+
and inflammation.
|
155
|
+
|
156
|
+
In summary, the subgraph illustrates the connections between key genes
|
157
|
+
(IFNG, IKBKG, ATG16L1) and their associations with inflammatory bowel diseases,
|
158
|
+
particularly Crohn's disease, emphasizing the genetic underpinnings of these conditions.
|
159
|
+
""",
|
160
|
+
}
|
161
|
+
],
|
162
|
+
}
|
163
|
+
|
164
|
+
return input_dict
|
165
|
+
|
166
|
+
|
167
|
+
def test_graphrag_reasoning_openai(input_dict):
|
168
|
+
"""
|
169
|
+
Test the GraphRAG reasoning tool using OpenAI model.
|
170
|
+
|
171
|
+
Args:
|
172
|
+
input_dict: Input dictionary
|
173
|
+
"""
|
174
|
+
# Prepare LLM and embedding model
|
175
|
+
input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
|
176
|
+
input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
|
177
|
+
|
178
|
+
# Setup the app
|
179
|
+
unique_id = 12345
|
180
|
+
app = get_app(unique_id, llm_model=input_dict["llm_model"])
|
181
|
+
config = {"configurable": {"thread_id": unique_id}}
|
182
|
+
# Update state
|
183
|
+
app.update_state(
|
184
|
+
config,
|
185
|
+
input_dict,
|
186
|
+
)
|
187
|
+
prompt = """
|
188
|
+
Without extracting a new subgraph, based on subgraph extracted from `subkg_12345`
|
189
|
+
perform Graph RAG reasoning to get insights related to nodes of genes
|
190
|
+
mentioned in the knowledge graph related to Adalimumab.
|
191
|
+
|
192
|
+
Here is an additional context:
|
193
|
+
Adalimumab is a fully human monoclonal antibody (IgG1)
|
194
|
+
that specifically binds to tumor necrosis factor-alpha (TNF-α), a pro-inflammatory cytokine.
|
195
|
+
"""
|
196
|
+
|
197
|
+
# Test the tool graphrag_reasoning
|
198
|
+
response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
|
199
|
+
|
200
|
+
# Check assistant message
|
201
|
+
assistant_msg = response["messages"][-1].content
|
202
|
+
assert isinstance(assistant_msg, str)
|
203
|
+
|
204
|
+
# Check tool message
|
205
|
+
tool_msg = response["messages"][-2]
|
206
|
+
assert tool_msg.name == "graphrag_reasoning"
|
207
|
+
|
208
|
+
# Check reasoning results
|
209
|
+
assert "Adalimumab" in assistant_msg
|
210
|
+
assert "TNF" in assistant_msg
|
@@ -0,0 +1,174 @@
|
|
1
|
+
"""
|
2
|
+
Test cases for tools/subgraph_extraction.py
|
3
|
+
"""
|
4
|
+
|
5
|
+
import pytest
|
6
|
+
from langchain_core.messages import HumanMessage
|
7
|
+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
8
|
+
from ..agents.t2kg_agent import get_app
|
9
|
+
|
10
|
+
# Define the data path
|
11
|
+
DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
|
12
|
+
|
13
|
+
|
14
|
+
@pytest.fixture(name="input_dict")
|
15
|
+
def input_dict_fixture():
|
16
|
+
"""
|
17
|
+
Input dictionary fixture.
|
18
|
+
"""
|
19
|
+
input_dict = {
|
20
|
+
"llm_model": None, # TBA for each test case
|
21
|
+
"embedding_model": None, # TBA for each test case
|
22
|
+
"uploaded_files": [],
|
23
|
+
"topk_nodes": 3,
|
24
|
+
"topk_edges": 3,
|
25
|
+
"dic_source_graph": [
|
26
|
+
{
|
27
|
+
"name": "PrimeKG",
|
28
|
+
"kg_pyg_path": f"{DATA_PATH}/primekg_ibd_pyg_graph.pkl",
|
29
|
+
"kg_text_path": f"{DATA_PATH}/primekg_ibd_text_graph.pkl",
|
30
|
+
}
|
31
|
+
],
|
32
|
+
}
|
33
|
+
|
34
|
+
return input_dict
|
35
|
+
|
36
|
+
|
37
|
+
def test_extract_subgraph_wo_docs(input_dict):
|
38
|
+
"""
|
39
|
+
Test the subgraph extraction tool without any documents using OpenAI model.
|
40
|
+
|
41
|
+
Args:
|
42
|
+
input_dict: Input dictionary.
|
43
|
+
"""
|
44
|
+
# Prepare LLM and embedding model
|
45
|
+
input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
|
46
|
+
input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
|
47
|
+
|
48
|
+
# Setup the app
|
49
|
+
unique_id = 12345
|
50
|
+
app = get_app(unique_id, llm_model=input_dict["llm_model"])
|
51
|
+
config = {"configurable": {"thread_id": unique_id}}
|
52
|
+
# Update state
|
53
|
+
app.update_state(
|
54
|
+
config,
|
55
|
+
input_dict,
|
56
|
+
)
|
57
|
+
prompt = """
|
58
|
+
Please directly invoke `subgraph_extraction` tool without calling any other tools
|
59
|
+
to respond to the following prompt:
|
60
|
+
|
61
|
+
Extract all relevant information related to nodes of genes related to inflammatory bowel disease
|
62
|
+
(IBD) that existed in the knowledge graph.
|
63
|
+
Please set the extraction name for this process as `subkg_12345`.
|
64
|
+
"""
|
65
|
+
|
66
|
+
# Test the tool subgraph_extraction
|
67
|
+
response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
|
68
|
+
|
69
|
+
# Check assistant message
|
70
|
+
assistant_msg = response["messages"][-1].content
|
71
|
+
assert isinstance(assistant_msg, str)
|
72
|
+
|
73
|
+
# Check tool message
|
74
|
+
tool_msg = response["messages"][-2]
|
75
|
+
assert tool_msg.name == "subgraph_extraction"
|
76
|
+
|
77
|
+
# Check extracted subgraph dictionary
|
78
|
+
current_state = app.get_state(config)
|
79
|
+
dic_extracted_graph = current_state.values["dic_extracted_graph"][0]
|
80
|
+
assert isinstance(dic_extracted_graph, dict)
|
81
|
+
assert dic_extracted_graph["name"] == "subkg_12345"
|
82
|
+
assert dic_extracted_graph["graph_source"] == "PrimeKG"
|
83
|
+
assert dic_extracted_graph["topk_nodes"] == 3
|
84
|
+
assert dic_extracted_graph["topk_edges"] == 3
|
85
|
+
assert isinstance(dic_extracted_graph["graph_dict"], dict)
|
86
|
+
assert len(dic_extracted_graph["graph_dict"]["nodes"]) > 0
|
87
|
+
assert len(dic_extracted_graph["graph_dict"]["edges"]) > 0
|
88
|
+
assert isinstance(dic_extracted_graph["graph_text"], str)
|
89
|
+
# Check if the nodes are in the graph_text
|
90
|
+
assert all(
|
91
|
+
n[0] in dic_extracted_graph["graph_text"]
|
92
|
+
for n in dic_extracted_graph["graph_dict"]["nodes"]
|
93
|
+
)
|
94
|
+
# Check if the edges are in the graph_text
|
95
|
+
assert all(
|
96
|
+
",".join([e[0], '"' + str(tuple(e[2]["relation"])) + '"', e[1]])
|
97
|
+
in dic_extracted_graph["graph_text"]
|
98
|
+
for e in dic_extracted_graph["graph_dict"]["edges"]
|
99
|
+
)
|
100
|
+
|
101
|
+
|
102
|
+
def test_extract_subgraph_w_docs(input_dict):
|
103
|
+
"""
|
104
|
+
Test the subgraph extraction tool with a document as reference (i.e., endotype document)
|
105
|
+
using OpenAI model.
|
106
|
+
|
107
|
+
Args:
|
108
|
+
input_dict: Input dictionary.
|
109
|
+
"""
|
110
|
+
# Prepare LLM and embedding model
|
111
|
+
input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
|
112
|
+
input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
|
113
|
+
|
114
|
+
# Setup the app
|
115
|
+
unique_id = 12345
|
116
|
+
app = get_app(unique_id, llm_model=input_dict["llm_model"])
|
117
|
+
config = {"configurable": {"thread_id": unique_id}}
|
118
|
+
# Update state
|
119
|
+
input_dict["uploaded_files"] = [
|
120
|
+
{
|
121
|
+
"file_name": "DGE_human_Colon_UC-vs-Colon_Control.pdf",
|
122
|
+
"file_path": f"{DATA_PATH}/DGE_human_Colon_UC-vs-Colon_Control.pdf",
|
123
|
+
"file_type": "endotype",
|
124
|
+
"uploaded_by": "VPEUser",
|
125
|
+
"uploaded_timestamp": "2024-11-05 00:00:00",
|
126
|
+
}
|
127
|
+
]
|
128
|
+
app.update_state(
|
129
|
+
config,
|
130
|
+
input_dict,
|
131
|
+
)
|
132
|
+
prompt = """
|
133
|
+
Please ONLY invoke `subgraph_extraction` tool without calling any other tools
|
134
|
+
to respond to the following prompt:
|
135
|
+
|
136
|
+
Extract all relevant information related to nodes of genes related to inflammatory bowel disease
|
137
|
+
(IBD) that existed in the knowledge graph.
|
138
|
+
Please set the extraction name for this process as `subkg_12345`.
|
139
|
+
"""
|
140
|
+
|
141
|
+
# Test the tool subgraph_extraction
|
142
|
+
response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
|
143
|
+
|
144
|
+
# Check assistant message
|
145
|
+
assistant_msg = response["messages"][-1].content
|
146
|
+
assert isinstance(assistant_msg, str)
|
147
|
+
|
148
|
+
# Check tool message
|
149
|
+
tool_msg = response["messages"][-2]
|
150
|
+
assert tool_msg.name == "subgraph_extraction"
|
151
|
+
|
152
|
+
# Check extracted subgraph dictionary
|
153
|
+
current_state = app.get_state(config)
|
154
|
+
dic_extracted_graph = current_state.values["dic_extracted_graph"][0]
|
155
|
+
assert isinstance(dic_extracted_graph, dict)
|
156
|
+
assert dic_extracted_graph["name"] == "subkg_12345"
|
157
|
+
assert dic_extracted_graph["graph_source"] == "PrimeKG"
|
158
|
+
assert dic_extracted_graph["topk_nodes"] == 3
|
159
|
+
assert dic_extracted_graph["topk_edges"] == 3
|
160
|
+
assert isinstance(dic_extracted_graph["graph_dict"], dict)
|
161
|
+
assert len(dic_extracted_graph["graph_dict"]["nodes"]) > 0
|
162
|
+
assert len(dic_extracted_graph["graph_dict"]["edges"]) > 0
|
163
|
+
assert isinstance(dic_extracted_graph["graph_text"], str)
|
164
|
+
# Check if the nodes are in the graph_text
|
165
|
+
assert all(
|
166
|
+
n[0] in dic_extracted_graph["graph_text"]
|
167
|
+
for n in dic_extracted_graph["graph_dict"]["nodes"]
|
168
|
+
)
|
169
|
+
# Check if the edges are in the graph_text
|
170
|
+
assert all(
|
171
|
+
",".join([e[0], '"' + str(tuple(e[2]["relation"])) + '"', e[1]])
|
172
|
+
in dic_extracted_graph["graph_text"]
|
173
|
+
for e in dic_extracted_graph["graph_dict"]["edges"]
|
174
|
+
)
|
@@ -0,0 +1,154 @@
|
|
1
|
+
"""
|
2
|
+
Test cases for tools/subgraph_summarization.py
|
3
|
+
"""
|
4
|
+
|
5
|
+
import pytest
|
6
|
+
from langchain_core.messages import HumanMessage
|
7
|
+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
8
|
+
from ..agents.t2kg_agent import get_app
|
9
|
+
|
10
|
+
# Define the data path
|
11
|
+
DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
|
12
|
+
|
13
|
+
|
14
|
+
@pytest.fixture(name="input_dict")
|
15
|
+
def input_dict_fixture():
|
16
|
+
"""
|
17
|
+
Input dictionary fixture.
|
18
|
+
"""
|
19
|
+
input_dict = {
|
20
|
+
"llm_model": None, # TBA for each test case
|
21
|
+
"embedding_model": None, # TBA for each test case
|
22
|
+
"uploaded_files": [],
|
23
|
+
"topk_nodes": 3,
|
24
|
+
"topk_edges": 3,
|
25
|
+
"dic_source_graph": [
|
26
|
+
{
|
27
|
+
"name": "PrimeKG",
|
28
|
+
"kg_pyg_path": f"{DATA_PATH}/primekg_ibd_pyg_graph.pkl",
|
29
|
+
"kg_text_path": f"{DATA_PATH}/primekg_ibd_text_graph.pkl",
|
30
|
+
}
|
31
|
+
],
|
32
|
+
"dic_extracted_graph": [
|
33
|
+
{
|
34
|
+
"name": "subkg_12345",
|
35
|
+
"tool_call_id": "tool_12345",
|
36
|
+
"graph_source": "PrimeKG",
|
37
|
+
"topk_nodes": 3,
|
38
|
+
"topk_edges": 3,
|
39
|
+
"graph_dict": {
|
40
|
+
'nodes': [('IFNG_(3495)', {}),
|
41
|
+
('IKBKG_(3672)', {}),
|
42
|
+
('ATG16L1_(6661)', {}),
|
43
|
+
('inflammatory bowel disease_(28158)', {}),
|
44
|
+
('Crohn ileitis and jejunitis_(35814)', {}),
|
45
|
+
("Crohn's colitis_(83770)", {})],
|
46
|
+
'edges': [('IFNG_(3495)', 'inflammatory bowel disease_(28158)',
|
47
|
+
{'relation': ['gene/protein', 'associated with', 'disease'],
|
48
|
+
'label': ['gene/protein', 'associated with', 'disease']}),
|
49
|
+
('IFNG_(3495)', "Crohn's colitis_(83770)",
|
50
|
+
{'relation': ['gene/protein', 'associated with', 'disease'],
|
51
|
+
'label': ['gene/protein', 'associated with', 'disease']}),
|
52
|
+
('IFNG_(3495)', 'Crohn ileitis and jejunitis_(35814)',
|
53
|
+
{'relation': ['gene/protein', 'associated with', 'disease'],
|
54
|
+
'label': ['gene/protein', 'associated with', 'disease']}),
|
55
|
+
('ATG16L1_(6661)', 'IKBKG_(3672)',
|
56
|
+
{'relation': ['gene/protein', 'ppi', 'gene/protein'],
|
57
|
+
'label': ['gene/protein', 'ppi', 'gene/protein']}),
|
58
|
+
("Crohn's colitis_(83770)", 'ATG16L1_(6661)',
|
59
|
+
{'relation': ['disease', 'associated with', 'gene/protein'],
|
60
|
+
'label': ['disease', 'associated with', 'gene/protein']})]},
|
61
|
+
"graph_text": """
|
62
|
+
node_id,node_attr
|
63
|
+
IFNG_(3495),"IFNG belongs to gene/protein category.
|
64
|
+
This gene encodes a soluble cytokine that is a member of the type II interferon class.
|
65
|
+
The encoded protein is secreted by cells of both the innate and adaptive immune systems.
|
66
|
+
The active protein is a homodimer that binds to the interferon gamma receptor
|
67
|
+
which triggers a cellular response to viral and microbial infections.
|
68
|
+
Mutations in this gene are associated with an increased susceptibility to viral,
|
69
|
+
bacterial and parasitic infections and to several autoimmune diseases.
|
70
|
+
[provided by RefSeq, Dec 2015]."
|
71
|
+
IKBKG_(3672),"IKBKG belongs to gene/protein category. This gene encodes the regulatory
|
72
|
+
subunit of the inhibitor of kappaB kinase (IKK) complex, which activates NF-kappaB
|
73
|
+
resulting in activation of genes involved in inflammation, immunity, cell survival,
|
74
|
+
and other pathways. Mutations in this gene result in incontinentia pigmenti,
|
75
|
+
hypohidrotic ectodermal dysplasia, and several other types of immunodeficiencies.
|
76
|
+
A pseudogene highly similar to this locus is located in an adjacent region of the
|
77
|
+
X chromosome. [provided by RefSeq, Mar 2016]."
|
78
|
+
ATG16L1_(6661),"ATG16L1 belongs to gene/protein category. The protein encoded
|
79
|
+
by this gene is part of a large protein complex that is necessary for autophagy,
|
80
|
+
the major process by which intracellular components are targeted to lysosomes
|
81
|
+
for degradation. Defects in this gene are a cause of susceptibility to inflammatory
|
82
|
+
bowel disease type 10 (IBD10). Several transcript variants encoding different
|
83
|
+
isoforms have been found for this gene.[provided by RefSeq, Jun 2010]."
|
84
|
+
inflammatory bowel disease_(28158),inflammatory bowel disease belongs to disease
|
85
|
+
category. Any inflammatory bowel disease in which the cause of the disease
|
86
|
+
is a mutation in the NOD2 gene.
|
87
|
+
Crohn ileitis and jejunitis_(35814),Crohn ileitis and jejunitis belongs to
|
88
|
+
disease category. An Crohn disease involving a pathogenic inflammatory
|
89
|
+
response in the ileum.
|
90
|
+
Crohn's colitis_(83770),Crohn's colitis belongs to disease category.
|
91
|
+
Crohn's disease affecting the colon.
|
92
|
+
|
93
|
+
head_id,edge_type,tail_id
|
94
|
+
Crohn's colitis_(83770),"('disease', 'associated with', 'gene/protein')",
|
95
|
+
ATG16L1_(6661)
|
96
|
+
ATG16L1_(6661),"('gene/protein', 'ppi', 'gene/protein')",IKBKG_(3672)
|
97
|
+
IFNG_(3495),"('gene/protein', 'associated with', 'disease')",
|
98
|
+
inflammatory bowel disease_(28158)
|
99
|
+
IFNG_(3495),"('gene/protein', 'associated with', 'disease')",Crohn's colitis_(83770)
|
100
|
+
IFNG_(3495),"('gene/protein', 'associated with', 'disease')",
|
101
|
+
Crohn ileitis and jejunitis_(35814)
|
102
|
+
""",
|
103
|
+
"graph_summary": None,
|
104
|
+
}
|
105
|
+
],
|
106
|
+
}
|
107
|
+
|
108
|
+
return input_dict
|
109
|
+
|
110
|
+
|
111
|
+
def test_summarize_subgraph(input_dict):
|
112
|
+
"""
|
113
|
+
Test the subgraph summarization tool without any documents using Ollama model.
|
114
|
+
|
115
|
+
Args:
|
116
|
+
input_dict: Input dictionary fixture.
|
117
|
+
"""
|
118
|
+
# Prepare LLM and embedding model
|
119
|
+
input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
|
120
|
+
input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
|
121
|
+
|
122
|
+
# Setup the app
|
123
|
+
unique_id = 12345
|
124
|
+
app = get_app(unique_id, llm_model=input_dict["llm_model"])
|
125
|
+
config = {"configurable": {"thread_id": unique_id}}
|
126
|
+
# Update state
|
127
|
+
app.update_state(
|
128
|
+
config,
|
129
|
+
input_dict,
|
130
|
+
)
|
131
|
+
prompt = """
|
132
|
+
Please directly invoke `subgraph_summarization` tool without calling any other tools
|
133
|
+
to respond to the following prompt:
|
134
|
+
|
135
|
+
You are given a subgraph in the forms of textualized subgraph representing
|
136
|
+
nodes and edges (triples) obtained from extraction_name `subkg_12345`.
|
137
|
+
Summarize the given subgraph and higlight the importance nodes and edges.
|
138
|
+
"""
|
139
|
+
|
140
|
+
# Test the tool subgraph_summarization
|
141
|
+
response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
|
142
|
+
|
143
|
+
# Check assistant message
|
144
|
+
assistant_msg = response["messages"][-1].content
|
145
|
+
assert isinstance(assistant_msg, str)
|
146
|
+
|
147
|
+
# Check tool message
|
148
|
+
tool_msg = response["messages"][-2]
|
149
|
+
assert tool_msg.name == "subgraph_summarization"
|
150
|
+
|
151
|
+
# Check summarized subgraph
|
152
|
+
current_state = app.get_state(config)
|
153
|
+
dic_extracted_graph = current_state.values["dic_extracted_graph"][0]
|
154
|
+
assert isinstance(dic_extracted_graph["graph_summary"], str)
|
@@ -31,7 +31,6 @@ def test_embedding_with_huggingface_embed_query(embedding_model):
|
|
31
31
|
# Check the result
|
32
32
|
assert len(result) == 768
|
33
33
|
|
34
|
-
|
35
34
|
def test_embedding_with_huggingface_failed():
|
36
35
|
"""Test embedding documents using the EmbeddingWithHuggingFace class."""
|
37
36
|
# Check if the model is available on HuggingFace Hub
|
@@ -0,0 +1,56 @@
|
|
1
|
+
"""
|
2
|
+
Test cases for utils/embeddings/ollama.py
|
3
|
+
"""
|
4
|
+
|
5
|
+
import pytest
|
6
|
+
import ollama
|
7
|
+
from ..utils.embeddings.ollama import EmbeddingWithOllama
|
8
|
+
|
9
|
+
@pytest.fixture(name="ollama_config")
|
10
|
+
def fixture_ollama_config():
|
11
|
+
"""Return a dictionary with Ollama configuration."""
|
12
|
+
return {
|
13
|
+
"model_name": "all-minilm", # Choose a small model
|
14
|
+
}
|
15
|
+
|
16
|
+
def test_no_model_ollama(ollama_config):
|
17
|
+
"""Test the case when the Ollama model is not available."""
|
18
|
+
cfg = ollama_config
|
19
|
+
|
20
|
+
# Delete the Ollama model
|
21
|
+
try:
|
22
|
+
ollama.delete(cfg["model_name"])
|
23
|
+
except ollama.ResponseError:
|
24
|
+
pass
|
25
|
+
|
26
|
+
# Check if the model is available
|
27
|
+
with pytest.raises(
|
28
|
+
ValueError, match=f"Error: Pulled {cfg["model_name"]} model and restarted Ollama server."
|
29
|
+
):
|
30
|
+
EmbeddingWithOllama(model_name=cfg["model_name"])
|
31
|
+
|
32
|
+
@pytest.fixture(name="embedding_model")
|
33
|
+
def embedding_model_fixture(ollama_config):
|
34
|
+
"""Return the configuration object for the Ollama embedding model and model object"""
|
35
|
+
cfg = ollama_config
|
36
|
+
return EmbeddingWithOllama(model_name=cfg["model_name"])
|
37
|
+
|
38
|
+
def test_embedding_with_ollama_embed_documents(embedding_model):
|
39
|
+
"""Test embedding documents using the EmbeddingWithOllama class."""
|
40
|
+
# Perform embedding
|
41
|
+
texts = ["Adalimumab", "Infliximab", "Vedolizumab"]
|
42
|
+
result = embedding_model.embed_documents(texts)
|
43
|
+
# Check the result
|
44
|
+
assert len(result) == 3
|
45
|
+
assert len(result[0]) == 384
|
46
|
+
|
47
|
+
def test_embedding_with_ollama_embed_query(embedding_model):
|
48
|
+
"""Test embedding a query using the EmbeddingWithOllama class."""
|
49
|
+
# Perform embedding
|
50
|
+
text = "Adalimumab"
|
51
|
+
result = embedding_model.embed_query(text)
|
52
|
+
# Check the result
|
53
|
+
assert len(result) == 384
|
54
|
+
|
55
|
+
# Delete the Ollama model so that it will not be cached afterward
|
56
|
+
ollama.delete(embedding_model.model_name)
|