aiagents4pharma 1.18.0__py3-none-any.whl → 1.19.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. aiagents4pharma/talk2knowledgegraphs/__init__.py +3 -0
  2. aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +4 -0
  3. aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +85 -0
  4. aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +7 -0
  5. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +3 -0
  6. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +62 -0
  7. aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +4 -0
  8. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +3 -0
  9. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +31 -0
  10. aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +7 -0
  11. aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +6 -0
  12. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +3 -0
  13. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +24 -0
  14. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +3 -0
  15. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +43 -0
  16. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +3 -0
  17. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +9 -0
  18. aiagents4pharma/talk2knowledgegraphs/states/__init__.py +4 -0
  19. aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +38 -0
  20. aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +110 -0
  21. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +210 -0
  22. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +174 -0
  23. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +154 -0
  24. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +0 -1
  25. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +56 -0
  26. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +18 -42
  27. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +79 -0
  28. aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +6 -0
  29. aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +143 -0
  30. aiagents4pharma/talk2knowledgegraphs/tools/load_arguments.py +22 -0
  31. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +305 -0
  32. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +126 -0
  33. aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -2
  34. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +1 -0
  35. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +81 -0
  36. aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -0
  37. aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +225 -0
  38. {aiagents4pharma-1.18.0.dist-info → aiagents4pharma-1.19.0.dist-info}/METADATA +3 -1
  39. {aiagents4pharma-1.18.0.dist-info → aiagents4pharma-1.19.0.dist-info}/RECORD +42 -10
  40. {aiagents4pharma-1.18.0.dist-info → aiagents4pharma-1.19.0.dist-info}/LICENSE +0 -0
  41. {aiagents4pharma-1.18.0.dist-info → aiagents4pharma-1.19.0.dist-info}/WHEEL +0 -0
  42. {aiagents4pharma-1.18.0.dist-info → aiagents4pharma-1.19.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,210 @@
1
+ """
2
+ Test cases for tools/graphrag_reasoning.py
3
+ """
4
+
5
+ import pytest
6
+ from langchain_core.messages import HumanMessage
7
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
8
+ from ..agents.t2kg_agent import get_app
9
+
10
+ # Define the data path
11
+ DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
12
+
13
+
14
+ @pytest.fixture(name="input_dict")
15
+ def input_dict_fixture():
16
+ """
17
+ Input dictionary fixture.
18
+ """
19
+ input_dict = {
20
+ "llm_model": None, # TBA for each test case
21
+ "embedding_model": None, # TBA for each test case
22
+ "uploaded_files": [
23
+ {
24
+ "file_name": "adalimumab.pdf",
25
+ "file_path": f"{DATA_PATH}/adalimumab.pdf",
26
+ "file_type": "drug_data",
27
+ "uploaded_by": "VPEUser",
28
+ "uploaded_timestamp": "2024-11-05 00:00:00",
29
+ },
30
+ {
31
+ "file_name": "DGE_human_Colon_UC-vs-Colon_Control.pdf",
32
+ "file_path": f"{DATA_PATH}/DGE_human_Colon_UC-vs-Colon_Control.pdf",
33
+ "file_type": "endotype",
34
+ "uploaded_by": "VPEUser",
35
+ "uploaded_timestamp": "2024-11-05 00:00:00",
36
+ },
37
+ ],
38
+ "topk_nodes": 3,
39
+ "topk_edges": 3,
40
+ "dic_source_graph": [
41
+ {
42
+ "name": "PrimeKG",
43
+ "kg_pyg_path": f"{DATA_PATH}/primekg_ibd_pyg_graph.pkl",
44
+ "kg_text_path": f"{DATA_PATH}/primekg_ibd_text_graph.pkl",
45
+ }
46
+ ],
47
+ "dic_extracted_graph": [
48
+ {
49
+ "name": "subkg_12345",
50
+ "tool_call_id": "tool_12345",
51
+ "graph_source": "PrimeKG",
52
+ "topk_nodes": 3,
53
+ "topk_edges": 3,
54
+ "graph_dict": {
55
+ 'nodes': [('IFNG_(3495)', {}),
56
+ ('IKBKG_(3672)', {}),
57
+ ('ATG16L1_(6661)', {}),
58
+ ('inflammatory bowel disease_(28158)', {}),
59
+ ('Crohn ileitis and jejunitis_(35814)', {}),
60
+ ("Crohn's colitis_(83770)", {})],
61
+ 'edges': [('IFNG_(3495)', 'inflammatory bowel disease_(28158)',
62
+ {'relation': ['gene/protein', 'associated with', 'disease'],
63
+ 'label': ['gene/protein', 'associated with', 'disease']}),
64
+ ('IFNG_(3495)', "Crohn's colitis_(83770)",
65
+ {'relation': ['gene/protein', 'associated with', 'disease'],
66
+ 'label': ['gene/protein', 'associated with', 'disease']}),
67
+ ('IFNG_(3495)', 'Crohn ileitis and jejunitis_(35814)',
68
+ {'relation': ['gene/protein', 'associated with', 'disease'],
69
+ 'label': ['gene/protein', 'associated with', 'disease']}),
70
+ ('ATG16L1_(6661)', 'IKBKG_(3672)',
71
+ {'relation': ['gene/protein', 'ppi', 'gene/protein'],
72
+ 'label': ['gene/protein', 'ppi', 'gene/protein']}),
73
+ ("Crohn's colitis_(83770)", 'ATG16L1_(6661)',
74
+ {'relation': ['disease', 'associated with', 'gene/protein'],
75
+ 'label': ['disease', 'associated with', 'gene/protein']})]},
76
+ "graph_text": """
77
+ node_id,node_attr
78
+ IFNG_(3495),"IFNG belongs to gene/protein category.
79
+ This gene encodes a soluble cytokine that is a member of the type II interferon class.
80
+ The encoded protein is secreted by cells of both the innate and adaptive immune systems.
81
+ The active protein is a homodimer that binds to the interferon gamma receptor
82
+ which triggers a cellular response to viral and microbial infections.
83
+ Mutations in this gene are associated with an increased susceptibility to viral,
84
+ bacterial and parasitic infections and to several autoimmune diseases.
85
+ [provided by RefSeq, Dec 2015]."
86
+ IKBKG_(3672),"IKBKG belongs to gene/protein category. This gene encodes the regulatory
87
+ subunit of the inhibitor of kappaB kinase (IKK) complex, which activates NF-kappaB
88
+ resulting in activation of genes involved in inflammation, immunity, cell survival,
89
+ and other pathways. Mutations in this gene result in incontinentia pigmenti,
90
+ hypohidrotic ectodermal dysplasia, and several other types of immunodeficiencies.
91
+ A pseudogene highly similar to this locus is located in an adjacent region of the
92
+ X chromosome. [provided by RefSeq, Mar 2016]."
93
+ ATG16L1_(6661),"ATG16L1 belongs to gene/protein category. The protein encoded
94
+ by this gene is part of a large protein complex that is necessary for autophagy,
95
+ the major process by which intracellular components are targeted to lysosomes
96
+ for degradation. Defects in this gene are a cause of susceptibility to inflammatory
97
+ bowel disease type 10 (IBD10). Several transcript variants encoding different
98
+ isoforms have been found for this gene.[provided by RefSeq, Jun 2010]."
99
+ inflammatory bowel disease_(28158),inflammatory bowel disease belongs to disease
100
+ category. Any inflammatory bowel disease in which the cause of the disease
101
+ is a mutation in the NOD2 gene.
102
+ Crohn ileitis and jejunitis_(35814),Crohn ileitis and jejunitis belongs to
103
+ disease category. An Crohn disease involving a pathogenic inflammatory
104
+ response in the ileum.
105
+ Crohn's colitis_(83770),Crohn's colitis belongs to disease category.
106
+ Crohn's disease affecting the colon.
107
+
108
+ head_id,edge_type,tail_id
109
+ Crohn's colitis_(83770),"('disease', 'associated with', 'gene/protein')",
110
+ ATG16L1_(6661)
111
+ ATG16L1_(6661),"('gene/protein', 'ppi', 'gene/protein')",IKBKG_(3672)
112
+ IFNG_(3495),"('gene/protein', 'associated with', 'disease')",
113
+ inflammatory bowel disease_(28158)
114
+ IFNG_(3495),"('gene/protein', 'associated with', 'disease')",Crohn's colitis_(83770)
115
+ IFNG_(3495),"('gene/protein', 'associated with', 'disease')",
116
+ Crohn ileitis and jejunitis_(35814)
117
+ """,
118
+ "graph_summary": """
119
+ The subgraph extracted from `subkg_12345` includes several important genes and
120
+ their associations with inflammatory bowel diseases, particularly Crohn's disease.
121
+
122
+ Key Nodes:
123
+ 1. **IFNG (Interferon gamma)**: This gene encodes a cytokine that plays a crucial
124
+ role in immune response. It is associated with several diseases, including
125
+ inflammatory bowel disease and specifically Crohn's colitis and Crohn ileitis and
126
+ jejunitis. Mutations in IFNG can lead to increased susceptibility to infections
127
+ and autoimmune diseases.
128
+
129
+ 2. **IKBKG (Inhibitor of kappaB kinase gamma)**: This gene is involved in the
130
+ regulation of NF-kappaB, which is critical for inflammation and immune responses.
131
+ Mutations can lead to immunodeficiencies and other disorders.
132
+
133
+ 3. **ATG16L1**: This gene is essential for autophagy, a process that helps in
134
+ degrading intracellular components. Defects in ATG16L1 are linked to inflammatory
135
+ bowel disease type 10 (IBD10) and are associated with Crohn's colitis.
136
+
137
+ 4. **Inflammatory Bowel Disease**: A category of diseases characterized by
138
+ chronic inflammation of the gastrointestinal tract, with specific mention of
139
+ mutations in the NOD2 gene as a cause.
140
+
141
+ 5. **Crohn's Colitis**: A specific type of Crohn's disease affecting the colon,
142
+ indicating a pathogenic inflammatory response.
143
+
144
+ 6. **Crohn Ileitis and Jejunitis**: Another form of Crohn's disease that involves
145
+ inflammation in the ileum.
146
+
147
+ Key Edges:
148
+ - **IFNG is associated with inflammatory bowel disease, Crohn's colitis, and
149
+ Crohn ileitis and jejunitis**: This highlights the role of IFNG in these diseases.
150
+ - **ATG16L1 is associated with Crohn's colitis**: This indicates a direct link
151
+ between the gene and the disease.
152
+ - **ATG16L1 interacts with IKBKG**: This protein-protein interaction suggests a
153
+ functional relationship between these two genes in the context of immune response
154
+ and inflammation.
155
+
156
+ In summary, the subgraph illustrates the connections between key genes
157
+ (IFNG, IKBKG, ATG16L1) and their associations with inflammatory bowel diseases,
158
+ particularly Crohn's disease, emphasizing the genetic underpinnings of these conditions.
159
+ """,
160
+ }
161
+ ],
162
+ }
163
+
164
+ return input_dict
165
+
166
+
167
+ def test_graphrag_reasoning_openai(input_dict):
168
+ """
169
+ Test the GraphRAG reasoning tool using OpenAI model.
170
+
171
+ Args:
172
+ input_dict: Input dictionary
173
+ """
174
+ # Prepare LLM and embedding model
175
+ input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
176
+ input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
177
+
178
+ # Setup the app
179
+ unique_id = 12345
180
+ app = get_app(unique_id, llm_model=input_dict["llm_model"])
181
+ config = {"configurable": {"thread_id": unique_id}}
182
+ # Update state
183
+ app.update_state(
184
+ config,
185
+ input_dict,
186
+ )
187
+ prompt = """
188
+ Without extracting a new subgraph, based on subgraph extracted from `subkg_12345`
189
+ perform Graph RAG reasoning to get insights related to nodes of genes
190
+ mentioned in the knowledge graph related to Adalimumab.
191
+
192
+ Here is an additional context:
193
+ Adalimumab is a fully human monoclonal antibody (IgG1)
194
+ that specifically binds to tumor necrosis factor-alpha (TNF-α), a pro-inflammatory cytokine.
195
+ """
196
+
197
+ # Test the tool graphrag_reasoning
198
+ response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
199
+
200
+ # Check assistant message
201
+ assistant_msg = response["messages"][-1].content
202
+ assert isinstance(assistant_msg, str)
203
+
204
+ # Check tool message
205
+ tool_msg = response["messages"][-2]
206
+ assert tool_msg.name == "graphrag_reasoning"
207
+
208
+ # Check reasoning results
209
+ assert "Adalimumab" in assistant_msg
210
+ assert "TNF" in assistant_msg
@@ -0,0 +1,174 @@
1
+ """
2
+ Test cases for tools/subgraph_extraction.py
3
+ """
4
+
5
+ import pytest
6
+ from langchain_core.messages import HumanMessage
7
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
8
+ from ..agents.t2kg_agent import get_app
9
+
10
+ # Define the data path
11
+ DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
12
+
13
+
14
+ @pytest.fixture(name="input_dict")
15
+ def input_dict_fixture():
16
+ """
17
+ Input dictionary fixture.
18
+ """
19
+ input_dict = {
20
+ "llm_model": None, # TBA for each test case
21
+ "embedding_model": None, # TBA for each test case
22
+ "uploaded_files": [],
23
+ "topk_nodes": 3,
24
+ "topk_edges": 3,
25
+ "dic_source_graph": [
26
+ {
27
+ "name": "PrimeKG",
28
+ "kg_pyg_path": f"{DATA_PATH}/primekg_ibd_pyg_graph.pkl",
29
+ "kg_text_path": f"{DATA_PATH}/primekg_ibd_text_graph.pkl",
30
+ }
31
+ ],
32
+ }
33
+
34
+ return input_dict
35
+
36
+
37
+ def test_extract_subgraph_wo_docs(input_dict):
38
+ """
39
+ Test the subgraph extraction tool without any documents using OpenAI model.
40
+
41
+ Args:
42
+ input_dict: Input dictionary.
43
+ """
44
+ # Prepare LLM and embedding model
45
+ input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
46
+ input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
47
+
48
+ # Setup the app
49
+ unique_id = 12345
50
+ app = get_app(unique_id, llm_model=input_dict["llm_model"])
51
+ config = {"configurable": {"thread_id": unique_id}}
52
+ # Update state
53
+ app.update_state(
54
+ config,
55
+ input_dict,
56
+ )
57
+ prompt = """
58
+ Please directly invoke `subgraph_extraction` tool without calling any other tools
59
+ to respond to the following prompt:
60
+
61
+ Extract all relevant information related to nodes of genes related to inflammatory bowel disease
62
+ (IBD) that existed in the knowledge graph.
63
+ Please set the extraction name for this process as `subkg_12345`.
64
+ """
65
+
66
+ # Test the tool subgraph_extraction
67
+ response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
68
+
69
+ # Check assistant message
70
+ assistant_msg = response["messages"][-1].content
71
+ assert isinstance(assistant_msg, str)
72
+
73
+ # Check tool message
74
+ tool_msg = response["messages"][-2]
75
+ assert tool_msg.name == "subgraph_extraction"
76
+
77
+ # Check extracted subgraph dictionary
78
+ current_state = app.get_state(config)
79
+ dic_extracted_graph = current_state.values["dic_extracted_graph"][0]
80
+ assert isinstance(dic_extracted_graph, dict)
81
+ assert dic_extracted_graph["name"] == "subkg_12345"
82
+ assert dic_extracted_graph["graph_source"] == "PrimeKG"
83
+ assert dic_extracted_graph["topk_nodes"] == 3
84
+ assert dic_extracted_graph["topk_edges"] == 3
85
+ assert isinstance(dic_extracted_graph["graph_dict"], dict)
86
+ assert len(dic_extracted_graph["graph_dict"]["nodes"]) > 0
87
+ assert len(dic_extracted_graph["graph_dict"]["edges"]) > 0
88
+ assert isinstance(dic_extracted_graph["graph_text"], str)
89
+ # Check if the nodes are in the graph_text
90
+ assert all(
91
+ n[0] in dic_extracted_graph["graph_text"]
92
+ for n in dic_extracted_graph["graph_dict"]["nodes"]
93
+ )
94
+ # Check if the edges are in the graph_text
95
+ assert all(
96
+ ",".join([e[0], '"' + str(tuple(e[2]["relation"])) + '"', e[1]])
97
+ in dic_extracted_graph["graph_text"]
98
+ for e in dic_extracted_graph["graph_dict"]["edges"]
99
+ )
100
+
101
+
102
+ def test_extract_subgraph_w_docs(input_dict):
103
+ """
104
+ Test the subgraph extraction tool with a document as reference (i.e., endotype document)
105
+ using OpenAI model.
106
+
107
+ Args:
108
+ input_dict: Input dictionary.
109
+ """
110
+ # Prepare LLM and embedding model
111
+ input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
112
+ input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
113
+
114
+ # Setup the app
115
+ unique_id = 12345
116
+ app = get_app(unique_id, llm_model=input_dict["llm_model"])
117
+ config = {"configurable": {"thread_id": unique_id}}
118
+ # Update state
119
+ input_dict["uploaded_files"] = [
120
+ {
121
+ "file_name": "DGE_human_Colon_UC-vs-Colon_Control.pdf",
122
+ "file_path": f"{DATA_PATH}/DGE_human_Colon_UC-vs-Colon_Control.pdf",
123
+ "file_type": "endotype",
124
+ "uploaded_by": "VPEUser",
125
+ "uploaded_timestamp": "2024-11-05 00:00:00",
126
+ }
127
+ ]
128
+ app.update_state(
129
+ config,
130
+ input_dict,
131
+ )
132
+ prompt = """
133
+ Please ONLY invoke `subgraph_extraction` tool without calling any other tools
134
+ to respond to the following prompt:
135
+
136
+ Extract all relevant information related to nodes of genes related to inflammatory bowel disease
137
+ (IBD) that existed in the knowledge graph.
138
+ Please set the extraction name for this process as `subkg_12345`.
139
+ """
140
+
141
+ # Test the tool subgraph_extraction
142
+ response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
143
+
144
+ # Check assistant message
145
+ assistant_msg = response["messages"][-1].content
146
+ assert isinstance(assistant_msg, str)
147
+
148
+ # Check tool message
149
+ tool_msg = response["messages"][-2]
150
+ assert tool_msg.name == "subgraph_extraction"
151
+
152
+ # Check extracted subgraph dictionary
153
+ current_state = app.get_state(config)
154
+ dic_extracted_graph = current_state.values["dic_extracted_graph"][0]
155
+ assert isinstance(dic_extracted_graph, dict)
156
+ assert dic_extracted_graph["name"] == "subkg_12345"
157
+ assert dic_extracted_graph["graph_source"] == "PrimeKG"
158
+ assert dic_extracted_graph["topk_nodes"] == 3
159
+ assert dic_extracted_graph["topk_edges"] == 3
160
+ assert isinstance(dic_extracted_graph["graph_dict"], dict)
161
+ assert len(dic_extracted_graph["graph_dict"]["nodes"]) > 0
162
+ assert len(dic_extracted_graph["graph_dict"]["edges"]) > 0
163
+ assert isinstance(dic_extracted_graph["graph_text"], str)
164
+ # Check if the nodes are in the graph_text
165
+ assert all(
166
+ n[0] in dic_extracted_graph["graph_text"]
167
+ for n in dic_extracted_graph["graph_dict"]["nodes"]
168
+ )
169
+ # Check if the edges are in the graph_text
170
+ assert all(
171
+ ",".join([e[0], '"' + str(tuple(e[2]["relation"])) + '"', e[1]])
172
+ in dic_extracted_graph["graph_text"]
173
+ for e in dic_extracted_graph["graph_dict"]["edges"]
174
+ )
@@ -0,0 +1,154 @@
1
+ """
2
+ Test cases for tools/subgraph_summarization.py
3
+ """
4
+
5
+ import pytest
6
+ from langchain_core.messages import HumanMessage
7
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
8
+ from ..agents.t2kg_agent import get_app
9
+
10
+ # Define the data path
11
+ DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
12
+
13
+
14
+ @pytest.fixture(name="input_dict")
15
+ def input_dict_fixture():
16
+ """
17
+ Input dictionary fixture.
18
+ """
19
+ input_dict = {
20
+ "llm_model": None, # TBA for each test case
21
+ "embedding_model": None, # TBA for each test case
22
+ "uploaded_files": [],
23
+ "topk_nodes": 3,
24
+ "topk_edges": 3,
25
+ "dic_source_graph": [
26
+ {
27
+ "name": "PrimeKG",
28
+ "kg_pyg_path": f"{DATA_PATH}/primekg_ibd_pyg_graph.pkl",
29
+ "kg_text_path": f"{DATA_PATH}/primekg_ibd_text_graph.pkl",
30
+ }
31
+ ],
32
+ "dic_extracted_graph": [
33
+ {
34
+ "name": "subkg_12345",
35
+ "tool_call_id": "tool_12345",
36
+ "graph_source": "PrimeKG",
37
+ "topk_nodes": 3,
38
+ "topk_edges": 3,
39
+ "graph_dict": {
40
+ 'nodes': [('IFNG_(3495)', {}),
41
+ ('IKBKG_(3672)', {}),
42
+ ('ATG16L1_(6661)', {}),
43
+ ('inflammatory bowel disease_(28158)', {}),
44
+ ('Crohn ileitis and jejunitis_(35814)', {}),
45
+ ("Crohn's colitis_(83770)", {})],
46
+ 'edges': [('IFNG_(3495)', 'inflammatory bowel disease_(28158)',
47
+ {'relation': ['gene/protein', 'associated with', 'disease'],
48
+ 'label': ['gene/protein', 'associated with', 'disease']}),
49
+ ('IFNG_(3495)', "Crohn's colitis_(83770)",
50
+ {'relation': ['gene/protein', 'associated with', 'disease'],
51
+ 'label': ['gene/protein', 'associated with', 'disease']}),
52
+ ('IFNG_(3495)', 'Crohn ileitis and jejunitis_(35814)',
53
+ {'relation': ['gene/protein', 'associated with', 'disease'],
54
+ 'label': ['gene/protein', 'associated with', 'disease']}),
55
+ ('ATG16L1_(6661)', 'IKBKG_(3672)',
56
+ {'relation': ['gene/protein', 'ppi', 'gene/protein'],
57
+ 'label': ['gene/protein', 'ppi', 'gene/protein']}),
58
+ ("Crohn's colitis_(83770)", 'ATG16L1_(6661)',
59
+ {'relation': ['disease', 'associated with', 'gene/protein'],
60
+ 'label': ['disease', 'associated with', 'gene/protein']})]},
61
+ "graph_text": """
62
+ node_id,node_attr
63
+ IFNG_(3495),"IFNG belongs to gene/protein category.
64
+ This gene encodes a soluble cytokine that is a member of the type II interferon class.
65
+ The encoded protein is secreted by cells of both the innate and adaptive immune systems.
66
+ The active protein is a homodimer that binds to the interferon gamma receptor
67
+ which triggers a cellular response to viral and microbial infections.
68
+ Mutations in this gene are associated with an increased susceptibility to viral,
69
+ bacterial and parasitic infections and to several autoimmune diseases.
70
+ [provided by RefSeq, Dec 2015]."
71
+ IKBKG_(3672),"IKBKG belongs to gene/protein category. This gene encodes the regulatory
72
+ subunit of the inhibitor of kappaB kinase (IKK) complex, which activates NF-kappaB
73
+ resulting in activation of genes involved in inflammation, immunity, cell survival,
74
+ and other pathways. Mutations in this gene result in incontinentia pigmenti,
75
+ hypohidrotic ectodermal dysplasia, and several other types of immunodeficiencies.
76
+ A pseudogene highly similar to this locus is located in an adjacent region of the
77
+ X chromosome. [provided by RefSeq, Mar 2016]."
78
+ ATG16L1_(6661),"ATG16L1 belongs to gene/protein category. The protein encoded
79
+ by this gene is part of a large protein complex that is necessary for autophagy,
80
+ the major process by which intracellular components are targeted to lysosomes
81
+ for degradation. Defects in this gene are a cause of susceptibility to inflammatory
82
+ bowel disease type 10 (IBD10). Several transcript variants encoding different
83
+ isoforms have been found for this gene.[provided by RefSeq, Jun 2010]."
84
+ inflammatory bowel disease_(28158),inflammatory bowel disease belongs to disease
85
+ category. Any inflammatory bowel disease in which the cause of the disease
86
+ is a mutation in the NOD2 gene.
87
+ Crohn ileitis and jejunitis_(35814),Crohn ileitis and jejunitis belongs to
88
+ disease category. An Crohn disease involving a pathogenic inflammatory
89
+ response in the ileum.
90
+ Crohn's colitis_(83770),Crohn's colitis belongs to disease category.
91
+ Crohn's disease affecting the colon.
92
+
93
+ head_id,edge_type,tail_id
94
+ Crohn's colitis_(83770),"('disease', 'associated with', 'gene/protein')",
95
+ ATG16L1_(6661)
96
+ ATG16L1_(6661),"('gene/protein', 'ppi', 'gene/protein')",IKBKG_(3672)
97
+ IFNG_(3495),"('gene/protein', 'associated with', 'disease')",
98
+ inflammatory bowel disease_(28158)
99
+ IFNG_(3495),"('gene/protein', 'associated with', 'disease')",Crohn's colitis_(83770)
100
+ IFNG_(3495),"('gene/protein', 'associated with', 'disease')",
101
+ Crohn ileitis and jejunitis_(35814)
102
+ """,
103
+ "graph_summary": None,
104
+ }
105
+ ],
106
+ }
107
+
108
+ return input_dict
109
+
110
+
111
+ def test_summarize_subgraph(input_dict):
112
+ """
113
+ Test the subgraph summarization tool without any documents using Ollama model.
114
+
115
+ Args:
116
+ input_dict: Input dictionary fixture.
117
+ """
118
+ # Prepare LLM and embedding model
119
+ input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
120
+ input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
121
+
122
+ # Setup the app
123
+ unique_id = 12345
124
+ app = get_app(unique_id, llm_model=input_dict["llm_model"])
125
+ config = {"configurable": {"thread_id": unique_id}}
126
+ # Update state
127
+ app.update_state(
128
+ config,
129
+ input_dict,
130
+ )
131
+ prompt = """
132
+ Please directly invoke `subgraph_summarization` tool without calling any other tools
133
+ to respond to the following prompt:
134
+
135
+ You are given a subgraph in the forms of textualized subgraph representing
136
+ nodes and edges (triples) obtained from extraction_name `subkg_12345`.
137
+ Summarize the given subgraph and higlight the importance nodes and edges.
138
+ """
139
+
140
+ # Test the tool subgraph_summarization
141
+ response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
142
+
143
+ # Check assistant message
144
+ assistant_msg = response["messages"][-1].content
145
+ assert isinstance(assistant_msg, str)
146
+
147
+ # Check tool message
148
+ tool_msg = response["messages"][-2]
149
+ assert tool_msg.name == "subgraph_summarization"
150
+
151
+ # Check summarized subgraph
152
+ current_state = app.get_state(config)
153
+ dic_extracted_graph = current_state.values["dic_extracted_graph"][0]
154
+ assert isinstance(dic_extracted_graph["graph_summary"], str)
@@ -31,7 +31,6 @@ def test_embedding_with_huggingface_embed_query(embedding_model):
31
31
  # Check the result
32
32
  assert len(result) == 768
33
33
 
34
-
35
34
  def test_embedding_with_huggingface_failed():
36
35
  """Test embedding documents using the EmbeddingWithHuggingFace class."""
37
36
  # Check if the model is available on HuggingFace Hub
@@ -0,0 +1,56 @@
1
+ """
2
+ Test cases for utils/embeddings/ollama.py
3
+ """
4
+
5
+ import pytest
6
+ import ollama
7
+ from ..utils.embeddings.ollama import EmbeddingWithOllama
8
+
9
+ @pytest.fixture(name="ollama_config")
10
+ def fixture_ollama_config():
11
+ """Return a dictionary with Ollama configuration."""
12
+ return {
13
+ "model_name": "all-minilm", # Choose a small model
14
+ }
15
+
16
+ def test_no_model_ollama(ollama_config):
17
+ """Test the case when the Ollama model is not available."""
18
+ cfg = ollama_config
19
+
20
+ # Delete the Ollama model
21
+ try:
22
+ ollama.delete(cfg["model_name"])
23
+ except ollama.ResponseError:
24
+ pass
25
+
26
+ # Check if the model is available
27
+ with pytest.raises(
28
+ ValueError, match=f"Error: Pulled {cfg["model_name"]} model and restarted Ollama server."
29
+ ):
30
+ EmbeddingWithOllama(model_name=cfg["model_name"])
31
+
32
+ @pytest.fixture(name="embedding_model")
33
+ def embedding_model_fixture(ollama_config):
34
+ """Return the configuration object for the Ollama embedding model and model object"""
35
+ cfg = ollama_config
36
+ return EmbeddingWithOllama(model_name=cfg["model_name"])
37
+
38
+ def test_embedding_with_ollama_embed_documents(embedding_model):
39
+ """Test embedding documents using the EmbeddingWithOllama class."""
40
+ # Perform embedding
41
+ texts = ["Adalimumab", "Infliximab", "Vedolizumab"]
42
+ result = embedding_model.embed_documents(texts)
43
+ # Check the result
44
+ assert len(result) == 3
45
+ assert len(result[0]) == 384
46
+
47
+ def test_embedding_with_ollama_embed_query(embedding_model):
48
+ """Test embedding a query using the EmbeddingWithOllama class."""
49
+ # Perform embedding
50
+ text = "Adalimumab"
51
+ result = embedding_model.embed_query(text)
52
+ # Check the result
53
+ assert len(result) == 384
54
+
55
+ # Delete the Ollama model so that it will not be cached afterward
56
+ ollama.delete(embedding_model.model_name)