aiagents4pharma 1.18.0__py3-none-any.whl → 1.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. aiagents4pharma/talk2knowledgegraphs/__init__.py +3 -0
  2. aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +4 -0
  3. aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +85 -0
  4. aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +7 -0
  5. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +3 -0
  6. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +62 -0
  7. aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +4 -0
  8. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +3 -0
  9. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +31 -0
  10. aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +7 -0
  11. aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +6 -0
  12. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +3 -0
  13. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +24 -0
  14. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +3 -0
  15. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +43 -0
  16. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +3 -0
  17. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +9 -0
  18. aiagents4pharma/talk2knowledgegraphs/states/__init__.py +4 -0
  19. aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +38 -0
  20. aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +110 -0
  21. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +210 -0
  22. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +174 -0
  23. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +154 -0
  24. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +0 -1
  25. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +56 -0
  26. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +18 -42
  27. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +79 -0
  28. aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +6 -0
  29. aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +143 -0
  30. aiagents4pharma/talk2knowledgegraphs/tools/load_arguments.py +22 -0
  31. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +305 -0
  32. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +126 -0
  33. aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -2
  34. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +1 -0
  35. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +81 -0
  36. aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -0
  37. aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +225 -0
  38. {aiagents4pharma-1.18.0.dist-info → aiagents4pharma-1.19.0.dist-info}/METADATA +3 -1
  39. {aiagents4pharma-1.18.0.dist-info → aiagents4pharma-1.19.0.dist-info}/RECORD +42 -10
  40. {aiagents4pharma-1.18.0.dist-info → aiagents4pharma-1.19.0.dist-info}/LICENSE +0 -0
  41. {aiagents4pharma-1.18.0.dist-info → aiagents4pharma-1.19.0.dist-info}/WHEEL +0 -0
  42. {aiagents4pharma-1.18.0.dist-info → aiagents4pharma-1.19.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,210 @@
1
+ """
2
+ Test cases for tools/graphrag_reasoning.py
3
+ """
4
+
5
+ import pytest
6
+ from langchain_core.messages import HumanMessage
7
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
8
+ from ..agents.t2kg_agent import get_app
9
+
10
+ # Define the data path
11
+ DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
12
+
13
+
14
+ @pytest.fixture(name="input_dict")
15
+ def input_dict_fixture():
16
+ """
17
+ Input dictionary fixture.
18
+ """
19
+ input_dict = {
20
+ "llm_model": None, # TBA for each test case
21
+ "embedding_model": None, # TBA for each test case
22
+ "uploaded_files": [
23
+ {
24
+ "file_name": "adalimumab.pdf",
25
+ "file_path": f"{DATA_PATH}/adalimumab.pdf",
26
+ "file_type": "drug_data",
27
+ "uploaded_by": "VPEUser",
28
+ "uploaded_timestamp": "2024-11-05 00:00:00",
29
+ },
30
+ {
31
+ "file_name": "DGE_human_Colon_UC-vs-Colon_Control.pdf",
32
+ "file_path": f"{DATA_PATH}/DGE_human_Colon_UC-vs-Colon_Control.pdf",
33
+ "file_type": "endotype",
34
+ "uploaded_by": "VPEUser",
35
+ "uploaded_timestamp": "2024-11-05 00:00:00",
36
+ },
37
+ ],
38
+ "topk_nodes": 3,
39
+ "topk_edges": 3,
40
+ "dic_source_graph": [
41
+ {
42
+ "name": "PrimeKG",
43
+ "kg_pyg_path": f"{DATA_PATH}/primekg_ibd_pyg_graph.pkl",
44
+ "kg_text_path": f"{DATA_PATH}/primekg_ibd_text_graph.pkl",
45
+ }
46
+ ],
47
+ "dic_extracted_graph": [
48
+ {
49
+ "name": "subkg_12345",
50
+ "tool_call_id": "tool_12345",
51
+ "graph_source": "PrimeKG",
52
+ "topk_nodes": 3,
53
+ "topk_edges": 3,
54
+ "graph_dict": {
55
+ 'nodes': [('IFNG_(3495)', {}),
56
+ ('IKBKG_(3672)', {}),
57
+ ('ATG16L1_(6661)', {}),
58
+ ('inflammatory bowel disease_(28158)', {}),
59
+ ('Crohn ileitis and jejunitis_(35814)', {}),
60
+ ("Crohn's colitis_(83770)", {})],
61
+ 'edges': [('IFNG_(3495)', 'inflammatory bowel disease_(28158)',
62
+ {'relation': ['gene/protein', 'associated with', 'disease'],
63
+ 'label': ['gene/protein', 'associated with', 'disease']}),
64
+ ('IFNG_(3495)', "Crohn's colitis_(83770)",
65
+ {'relation': ['gene/protein', 'associated with', 'disease'],
66
+ 'label': ['gene/protein', 'associated with', 'disease']}),
67
+ ('IFNG_(3495)', 'Crohn ileitis and jejunitis_(35814)',
68
+ {'relation': ['gene/protein', 'associated with', 'disease'],
69
+ 'label': ['gene/protein', 'associated with', 'disease']}),
70
+ ('ATG16L1_(6661)', 'IKBKG_(3672)',
71
+ {'relation': ['gene/protein', 'ppi', 'gene/protein'],
72
+ 'label': ['gene/protein', 'ppi', 'gene/protein']}),
73
+ ("Crohn's colitis_(83770)", 'ATG16L1_(6661)',
74
+ {'relation': ['disease', 'associated with', 'gene/protein'],
75
+ 'label': ['disease', 'associated with', 'gene/protein']})]},
76
+ "graph_text": """
77
+ node_id,node_attr
78
+ IFNG_(3495),"IFNG belongs to gene/protein category.
79
+ This gene encodes a soluble cytokine that is a member of the type II interferon class.
80
+ The encoded protein is secreted by cells of both the innate and adaptive immune systems.
81
+ The active protein is a homodimer that binds to the interferon gamma receptor
82
+ which triggers a cellular response to viral and microbial infections.
83
+ Mutations in this gene are associated with an increased susceptibility to viral,
84
+ bacterial and parasitic infections and to several autoimmune diseases.
85
+ [provided by RefSeq, Dec 2015]."
86
+ IKBKG_(3672),"IKBKG belongs to gene/protein category. This gene encodes the regulatory
87
+ subunit of the inhibitor of kappaB kinase (IKK) complex, which activates NF-kappaB
88
+ resulting in activation of genes involved in inflammation, immunity, cell survival,
89
+ and other pathways. Mutations in this gene result in incontinentia pigmenti,
90
+ hypohidrotic ectodermal dysplasia, and several other types of immunodeficiencies.
91
+ A pseudogene highly similar to this locus is located in an adjacent region of the
92
+ X chromosome. [provided by RefSeq, Mar 2016]."
93
+ ATG16L1_(6661),"ATG16L1 belongs to gene/protein category. The protein encoded
94
+ by this gene is part of a large protein complex that is necessary for autophagy,
95
+ the major process by which intracellular components are targeted to lysosomes
96
+ for degradation. Defects in this gene are a cause of susceptibility to inflammatory
97
+ bowel disease type 10 (IBD10). Several transcript variants encoding different
98
+ isoforms have been found for this gene.[provided by RefSeq, Jun 2010]."
99
+ inflammatory bowel disease_(28158),inflammatory bowel disease belongs to disease
100
+ category. Any inflammatory bowel disease in which the cause of the disease
101
+ is a mutation in the NOD2 gene.
102
+ Crohn ileitis and jejunitis_(35814),Crohn ileitis and jejunitis belongs to
103
+ disease category. An Crohn disease involving a pathogenic inflammatory
104
+ response in the ileum.
105
+ Crohn's colitis_(83770),Crohn's colitis belongs to disease category.
106
+ Crohn's disease affecting the colon.
107
+
108
+ head_id,edge_type,tail_id
109
+ Crohn's colitis_(83770),"('disease', 'associated with', 'gene/protein')",
110
+ ATG16L1_(6661)
111
+ ATG16L1_(6661),"('gene/protein', 'ppi', 'gene/protein')",IKBKG_(3672)
112
+ IFNG_(3495),"('gene/protein', 'associated with', 'disease')",
113
+ inflammatory bowel disease_(28158)
114
+ IFNG_(3495),"('gene/protein', 'associated with', 'disease')",Crohn's colitis_(83770)
115
+ IFNG_(3495),"('gene/protein', 'associated with', 'disease')",
116
+ Crohn ileitis and jejunitis_(35814)
117
+ """,
118
+ "graph_summary": """
119
+ The subgraph extracted from `subkg_12345` includes several important genes and
120
+ their associations with inflammatory bowel diseases, particularly Crohn's disease.
121
+
122
+ Key Nodes:
123
+ 1. **IFNG (Interferon gamma)**: This gene encodes a cytokine that plays a crucial
124
+ role in immune response. It is associated with several diseases, including
125
+ inflammatory bowel disease and specifically Crohn's colitis and Crohn ileitis and
126
+ jejunitis. Mutations in IFNG can lead to increased susceptibility to infections
127
+ and autoimmune diseases.
128
+
129
+ 2. **IKBKG (Inhibitor of kappaB kinase gamma)**: This gene is involved in the
130
+ regulation of NF-kappaB, which is critical for inflammation and immune responses.
131
+ Mutations can lead to immunodeficiencies and other disorders.
132
+
133
+ 3. **ATG16L1**: This gene is essential for autophagy, a process that helps in
134
+ degrading intracellular components. Defects in ATG16L1 are linked to inflammatory
135
+ bowel disease type 10 (IBD10) and are associated with Crohn's colitis.
136
+
137
+ 4. **Inflammatory Bowel Disease**: A category of diseases characterized by
138
+ chronic inflammation of the gastrointestinal tract, with specific mention of
139
+ mutations in the NOD2 gene as a cause.
140
+
141
+ 5. **Crohn's Colitis**: A specific type of Crohn's disease affecting the colon,
142
+ indicating a pathogenic inflammatory response.
143
+
144
+ 6. **Crohn Ileitis and Jejunitis**: Another form of Crohn's disease that involves
145
+ inflammation in the ileum.
146
+
147
+ Key Edges:
148
+ - **IFNG is associated with inflammatory bowel disease, Crohn's colitis, and
149
+ Crohn ileitis and jejunitis**: This highlights the role of IFNG in these diseases.
150
+ - **ATG16L1 is associated with Crohn's colitis**: This indicates a direct link
151
+ between the gene and the disease.
152
+ - **ATG16L1 interacts with IKBKG**: This protein-protein interaction suggests a
153
+ functional relationship between these two genes in the context of immune response
154
+ and inflammation.
155
+
156
+ In summary, the subgraph illustrates the connections between key genes
157
+ (IFNG, IKBKG, ATG16L1) and their associations with inflammatory bowel diseases,
158
+ particularly Crohn's disease, emphasizing the genetic underpinnings of these conditions.
159
+ """,
160
+ }
161
+ ],
162
+ }
163
+
164
+ return input_dict
165
+
166
+
167
+ def test_graphrag_reasoning_openai(input_dict):
168
+ """
169
+ Test the GraphRAG reasoning tool using OpenAI model.
170
+
171
+ Args:
172
+ input_dict: Input dictionary
173
+ """
174
+ # Prepare LLM and embedding model
175
+ input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
176
+ input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
177
+
178
+ # Setup the app
179
+ unique_id = 12345
180
+ app = get_app(unique_id, llm_model=input_dict["llm_model"])
181
+ config = {"configurable": {"thread_id": unique_id}}
182
+ # Update state
183
+ app.update_state(
184
+ config,
185
+ input_dict,
186
+ )
187
+ prompt = """
188
+ Without extracting a new subgraph, based on subgraph extracted from `subkg_12345`
189
+ perform Graph RAG reasoning to get insights related to nodes of genes
190
+ mentioned in the knowledge graph related to Adalimumab.
191
+
192
+ Here is an additional context:
193
+ Adalimumab is a fully human monoclonal antibody (IgG1)
194
+ that specifically binds to tumor necrosis factor-alpha (TNF-α), a pro-inflammatory cytokine.
195
+ """
196
+
197
+ # Test the tool graphrag_reasoning
198
+ response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
199
+
200
+ # Check assistant message
201
+ assistant_msg = response["messages"][-1].content
202
+ assert isinstance(assistant_msg, str)
203
+
204
+ # Check tool message
205
+ tool_msg = response["messages"][-2]
206
+ assert tool_msg.name == "graphrag_reasoning"
207
+
208
+ # Check reasoning results
209
+ assert "Adalimumab" in assistant_msg
210
+ assert "TNF" in assistant_msg
@@ -0,0 +1,174 @@
1
+ """
2
+ Test cases for tools/subgraph_extraction.py
3
+ """
4
+
5
+ import pytest
6
+ from langchain_core.messages import HumanMessage
7
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
8
+ from ..agents.t2kg_agent import get_app
9
+
10
+ # Define the data path
11
+ DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
12
+
13
+
14
+ @pytest.fixture(name="input_dict")
15
+ def input_dict_fixture():
16
+ """
17
+ Input dictionary fixture.
18
+ """
19
+ input_dict = {
20
+ "llm_model": None, # TBA for each test case
21
+ "embedding_model": None, # TBA for each test case
22
+ "uploaded_files": [],
23
+ "topk_nodes": 3,
24
+ "topk_edges": 3,
25
+ "dic_source_graph": [
26
+ {
27
+ "name": "PrimeKG",
28
+ "kg_pyg_path": f"{DATA_PATH}/primekg_ibd_pyg_graph.pkl",
29
+ "kg_text_path": f"{DATA_PATH}/primekg_ibd_text_graph.pkl",
30
+ }
31
+ ],
32
+ }
33
+
34
+ return input_dict
35
+
36
+
37
+ def test_extract_subgraph_wo_docs(input_dict):
38
+ """
39
+ Test the subgraph extraction tool without any documents using OpenAI model.
40
+
41
+ Args:
42
+ input_dict: Input dictionary.
43
+ """
44
+ # Prepare LLM and embedding model
45
+ input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
46
+ input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
47
+
48
+ # Setup the app
49
+ unique_id = 12345
50
+ app = get_app(unique_id, llm_model=input_dict["llm_model"])
51
+ config = {"configurable": {"thread_id": unique_id}}
52
+ # Update state
53
+ app.update_state(
54
+ config,
55
+ input_dict,
56
+ )
57
+ prompt = """
58
+ Please directly invoke `subgraph_extraction` tool without calling any other tools
59
+ to respond to the following prompt:
60
+
61
+ Extract all relevant information related to nodes of genes related to inflammatory bowel disease
62
+ (IBD) that existed in the knowledge graph.
63
+ Please set the extraction name for this process as `subkg_12345`.
64
+ """
65
+
66
+ # Test the tool subgraph_extraction
67
+ response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
68
+
69
+ # Check assistant message
70
+ assistant_msg = response["messages"][-1].content
71
+ assert isinstance(assistant_msg, str)
72
+
73
+ # Check tool message
74
+ tool_msg = response["messages"][-2]
75
+ assert tool_msg.name == "subgraph_extraction"
76
+
77
+ # Check extracted subgraph dictionary
78
+ current_state = app.get_state(config)
79
+ dic_extracted_graph = current_state.values["dic_extracted_graph"][0]
80
+ assert isinstance(dic_extracted_graph, dict)
81
+ assert dic_extracted_graph["name"] == "subkg_12345"
82
+ assert dic_extracted_graph["graph_source"] == "PrimeKG"
83
+ assert dic_extracted_graph["topk_nodes"] == 3
84
+ assert dic_extracted_graph["topk_edges"] == 3
85
+ assert isinstance(dic_extracted_graph["graph_dict"], dict)
86
+ assert len(dic_extracted_graph["graph_dict"]["nodes"]) > 0
87
+ assert len(dic_extracted_graph["graph_dict"]["edges"]) > 0
88
+ assert isinstance(dic_extracted_graph["graph_text"], str)
89
+ # Check if the nodes are in the graph_text
90
+ assert all(
91
+ n[0] in dic_extracted_graph["graph_text"]
92
+ for n in dic_extracted_graph["graph_dict"]["nodes"]
93
+ )
94
+ # Check if the edges are in the graph_text
95
+ assert all(
96
+ ",".join([e[0], '"' + str(tuple(e[2]["relation"])) + '"', e[1]])
97
+ in dic_extracted_graph["graph_text"]
98
+ for e in dic_extracted_graph["graph_dict"]["edges"]
99
+ )
100
+
101
+
102
+ def test_extract_subgraph_w_docs(input_dict):
103
+ """
104
+ Test the subgraph extraction tool with a document as reference (i.e., endotype document)
105
+ using OpenAI model.
106
+
107
+ Args:
108
+ input_dict: Input dictionary.
109
+ """
110
+ # Prepare LLM and embedding model
111
+ input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
112
+ input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
113
+
114
+ # Setup the app
115
+ unique_id = 12345
116
+ app = get_app(unique_id, llm_model=input_dict["llm_model"])
117
+ config = {"configurable": {"thread_id": unique_id}}
118
+ # Update state
119
+ input_dict["uploaded_files"] = [
120
+ {
121
+ "file_name": "DGE_human_Colon_UC-vs-Colon_Control.pdf",
122
+ "file_path": f"{DATA_PATH}/DGE_human_Colon_UC-vs-Colon_Control.pdf",
123
+ "file_type": "endotype",
124
+ "uploaded_by": "VPEUser",
125
+ "uploaded_timestamp": "2024-11-05 00:00:00",
126
+ }
127
+ ]
128
+ app.update_state(
129
+ config,
130
+ input_dict,
131
+ )
132
+ prompt = """
133
+ Please ONLY invoke `subgraph_extraction` tool without calling any other tools
134
+ to respond to the following prompt:
135
+
136
+ Extract all relevant information related to nodes of genes related to inflammatory bowel disease
137
+ (IBD) that existed in the knowledge graph.
138
+ Please set the extraction name for this process as `subkg_12345`.
139
+ """
140
+
141
+ # Test the tool subgraph_extraction
142
+ response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
143
+
144
+ # Check assistant message
145
+ assistant_msg = response["messages"][-1].content
146
+ assert isinstance(assistant_msg, str)
147
+
148
+ # Check tool message
149
+ tool_msg = response["messages"][-2]
150
+ assert tool_msg.name == "subgraph_extraction"
151
+
152
+ # Check extracted subgraph dictionary
153
+ current_state = app.get_state(config)
154
+ dic_extracted_graph = current_state.values["dic_extracted_graph"][0]
155
+ assert isinstance(dic_extracted_graph, dict)
156
+ assert dic_extracted_graph["name"] == "subkg_12345"
157
+ assert dic_extracted_graph["graph_source"] == "PrimeKG"
158
+ assert dic_extracted_graph["topk_nodes"] == 3
159
+ assert dic_extracted_graph["topk_edges"] == 3
160
+ assert isinstance(dic_extracted_graph["graph_dict"], dict)
161
+ assert len(dic_extracted_graph["graph_dict"]["nodes"]) > 0
162
+ assert len(dic_extracted_graph["graph_dict"]["edges"]) > 0
163
+ assert isinstance(dic_extracted_graph["graph_text"], str)
164
+ # Check if the nodes are in the graph_text
165
+ assert all(
166
+ n[0] in dic_extracted_graph["graph_text"]
167
+ for n in dic_extracted_graph["graph_dict"]["nodes"]
168
+ )
169
+ # Check if the edges are in the graph_text
170
+ assert all(
171
+ ",".join([e[0], '"' + str(tuple(e[2]["relation"])) + '"', e[1]])
172
+ in dic_extracted_graph["graph_text"]
173
+ for e in dic_extracted_graph["graph_dict"]["edges"]
174
+ )
@@ -0,0 +1,154 @@
1
+ """
2
+ Test cases for tools/subgraph_summarization.py
3
+ """
4
+
5
+ import pytest
6
+ from langchain_core.messages import HumanMessage
7
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
8
+ from ..agents.t2kg_agent import get_app
9
+
10
+ # Define the data path
11
+ DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
12
+
13
+
14
+ @pytest.fixture(name="input_dict")
15
+ def input_dict_fixture():
16
+ """
17
+ Input dictionary fixture.
18
+ """
19
+ input_dict = {
20
+ "llm_model": None, # TBA for each test case
21
+ "embedding_model": None, # TBA for each test case
22
+ "uploaded_files": [],
23
+ "topk_nodes": 3,
24
+ "topk_edges": 3,
25
+ "dic_source_graph": [
26
+ {
27
+ "name": "PrimeKG",
28
+ "kg_pyg_path": f"{DATA_PATH}/primekg_ibd_pyg_graph.pkl",
29
+ "kg_text_path": f"{DATA_PATH}/primekg_ibd_text_graph.pkl",
30
+ }
31
+ ],
32
+ "dic_extracted_graph": [
33
+ {
34
+ "name": "subkg_12345",
35
+ "tool_call_id": "tool_12345",
36
+ "graph_source": "PrimeKG",
37
+ "topk_nodes": 3,
38
+ "topk_edges": 3,
39
+ "graph_dict": {
40
+ 'nodes': [('IFNG_(3495)', {}),
41
+ ('IKBKG_(3672)', {}),
42
+ ('ATG16L1_(6661)', {}),
43
+ ('inflammatory bowel disease_(28158)', {}),
44
+ ('Crohn ileitis and jejunitis_(35814)', {}),
45
+ ("Crohn's colitis_(83770)", {})],
46
+ 'edges': [('IFNG_(3495)', 'inflammatory bowel disease_(28158)',
47
+ {'relation': ['gene/protein', 'associated with', 'disease'],
48
+ 'label': ['gene/protein', 'associated with', 'disease']}),
49
+ ('IFNG_(3495)', "Crohn's colitis_(83770)",
50
+ {'relation': ['gene/protein', 'associated with', 'disease'],
51
+ 'label': ['gene/protein', 'associated with', 'disease']}),
52
+ ('IFNG_(3495)', 'Crohn ileitis and jejunitis_(35814)',
53
+ {'relation': ['gene/protein', 'associated with', 'disease'],
54
+ 'label': ['gene/protein', 'associated with', 'disease']}),
55
+ ('ATG16L1_(6661)', 'IKBKG_(3672)',
56
+ {'relation': ['gene/protein', 'ppi', 'gene/protein'],
57
+ 'label': ['gene/protein', 'ppi', 'gene/protein']}),
58
+ ("Crohn's colitis_(83770)", 'ATG16L1_(6661)',
59
+ {'relation': ['disease', 'associated with', 'gene/protein'],
60
+ 'label': ['disease', 'associated with', 'gene/protein']})]},
61
+ "graph_text": """
62
+ node_id,node_attr
63
+ IFNG_(3495),"IFNG belongs to gene/protein category.
64
+ This gene encodes a soluble cytokine that is a member of the type II interferon class.
65
+ The encoded protein is secreted by cells of both the innate and adaptive immune systems.
66
+ The active protein is a homodimer that binds to the interferon gamma receptor
67
+ which triggers a cellular response to viral and microbial infections.
68
+ Mutations in this gene are associated with an increased susceptibility to viral,
69
+ bacterial and parasitic infections and to several autoimmune diseases.
70
+ [provided by RefSeq, Dec 2015]."
71
+ IKBKG_(3672),"IKBKG belongs to gene/protein category. This gene encodes the regulatory
72
+ subunit of the inhibitor of kappaB kinase (IKK) complex, which activates NF-kappaB
73
+ resulting in activation of genes involved in inflammation, immunity, cell survival,
74
+ and other pathways. Mutations in this gene result in incontinentia pigmenti,
75
+ hypohidrotic ectodermal dysplasia, and several other types of immunodeficiencies.
76
+ A pseudogene highly similar to this locus is located in an adjacent region of the
77
+ X chromosome. [provided by RefSeq, Mar 2016]."
78
+ ATG16L1_(6661),"ATG16L1 belongs to gene/protein category. The protein encoded
79
+ by this gene is part of a large protein complex that is necessary for autophagy,
80
+ the major process by which intracellular components are targeted to lysosomes
81
+ for degradation. Defects in this gene are a cause of susceptibility to inflammatory
82
+ bowel disease type 10 (IBD10). Several transcript variants encoding different
83
+ isoforms have been found for this gene.[provided by RefSeq, Jun 2010]."
84
+ inflammatory bowel disease_(28158),inflammatory bowel disease belongs to disease
85
+ category. Any inflammatory bowel disease in which the cause of the disease
86
+ is a mutation in the NOD2 gene.
87
+ Crohn ileitis and jejunitis_(35814),Crohn ileitis and jejunitis belongs to
88
+ disease category. An Crohn disease involving a pathogenic inflammatory
89
+ response in the ileum.
90
+ Crohn's colitis_(83770),Crohn's colitis belongs to disease category.
91
+ Crohn's disease affecting the colon.
92
+
93
+ head_id,edge_type,tail_id
94
+ Crohn's colitis_(83770),"('disease', 'associated with', 'gene/protein')",
95
+ ATG16L1_(6661)
96
+ ATG16L1_(6661),"('gene/protein', 'ppi', 'gene/protein')",IKBKG_(3672)
97
+ IFNG_(3495),"('gene/protein', 'associated with', 'disease')",
98
+ inflammatory bowel disease_(28158)
99
+ IFNG_(3495),"('gene/protein', 'associated with', 'disease')",Crohn's colitis_(83770)
100
+ IFNG_(3495),"('gene/protein', 'associated with', 'disease')",
101
+ Crohn ileitis and jejunitis_(35814)
102
+ """,
103
+ "graph_summary": None,
104
+ }
105
+ ],
106
+ }
107
+
108
+ return input_dict
109
+
110
+
111
+ def test_summarize_subgraph(input_dict):
112
+ """
113
+ Test the subgraph summarization tool without any documents using Ollama model.
114
+
115
+ Args:
116
+ input_dict: Input dictionary fixture.
117
+ """
118
+ # Prepare LLM and embedding model
119
+ input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
120
+ input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
121
+
122
+ # Setup the app
123
+ unique_id = 12345
124
+ app = get_app(unique_id, llm_model=input_dict["llm_model"])
125
+ config = {"configurable": {"thread_id": unique_id}}
126
+ # Update state
127
+ app.update_state(
128
+ config,
129
+ input_dict,
130
+ )
131
+ prompt = """
132
+ Please directly invoke `subgraph_summarization` tool without calling any other tools
133
+ to respond to the following prompt:
134
+
135
+ You are given a subgraph in the forms of textualized subgraph representing
136
+ nodes and edges (triples) obtained from extraction_name `subkg_12345`.
137
+ Summarize the given subgraph and higlight the importance nodes and edges.
138
+ """
139
+
140
+ # Test the tool subgraph_summarization
141
+ response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
142
+
143
+ # Check assistant message
144
+ assistant_msg = response["messages"][-1].content
145
+ assert isinstance(assistant_msg, str)
146
+
147
+ # Check tool message
148
+ tool_msg = response["messages"][-2]
149
+ assert tool_msg.name == "subgraph_summarization"
150
+
151
+ # Check summarized subgraph
152
+ current_state = app.get_state(config)
153
+ dic_extracted_graph = current_state.values["dic_extracted_graph"][0]
154
+ assert isinstance(dic_extracted_graph["graph_summary"], str)
@@ -31,7 +31,6 @@ def test_embedding_with_huggingface_embed_query(embedding_model):
31
31
  # Check the result
32
32
  assert len(result) == 768
33
33
 
34
-
35
34
  def test_embedding_with_huggingface_failed():
36
35
  """Test embedding documents using the EmbeddingWithHuggingFace class."""
37
36
  # Check if the model is available on HuggingFace Hub
@@ -0,0 +1,56 @@
1
+ """
2
+ Test cases for utils/embeddings/ollama.py
3
+ """
4
+
5
+ import pytest
6
+ import ollama
7
+ from ..utils.embeddings.ollama import EmbeddingWithOllama
8
+
9
+ @pytest.fixture(name="ollama_config")
10
+ def fixture_ollama_config():
11
+ """Return a dictionary with Ollama configuration."""
12
+ return {
13
+ "model_name": "all-minilm", # Choose a small model
14
+ }
15
+
16
+ def test_no_model_ollama(ollama_config):
17
+ """Test the case when the Ollama model is not available."""
18
+ cfg = ollama_config
19
+
20
+ # Delete the Ollama model
21
+ try:
22
+ ollama.delete(cfg["model_name"])
23
+ except ollama.ResponseError:
24
+ pass
25
+
26
+ # Check if the model is available
27
+ with pytest.raises(
28
+ ValueError, match=f"Error: Pulled {cfg["model_name"]} model and restarted Ollama server."
29
+ ):
30
+ EmbeddingWithOllama(model_name=cfg["model_name"])
31
+
32
+ @pytest.fixture(name="embedding_model")
33
+ def embedding_model_fixture(ollama_config):
34
+ """Return the configuration object for the Ollama embedding model and model object"""
35
+ cfg = ollama_config
36
+ return EmbeddingWithOllama(model_name=cfg["model_name"])
37
+
38
+ def test_embedding_with_ollama_embed_documents(embedding_model):
39
+ """Test embedding documents using the EmbeddingWithOllama class."""
40
+ # Perform embedding
41
+ texts = ["Adalimumab", "Infliximab", "Vedolizumab"]
42
+ result = embedding_model.embed_documents(texts)
43
+ # Check the result
44
+ assert len(result) == 3
45
+ assert len(result[0]) == 384
46
+
47
+ def test_embedding_with_ollama_embed_query(embedding_model):
48
+ """Test embedding a query using the EmbeddingWithOllama class."""
49
+ # Perform embedding
50
+ text = "Adalimumab"
51
+ result = embedding_model.embed_query(text)
52
+ # Check the result
53
+ assert len(result) == 384
54
+
55
+ # Delete the Ollama model so that it will not be cached afterward
56
+ ollama.delete(embedding_model.model_name)