aiagents4pharma 1.40.1__py3-none-any.whl → 1.42.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +37 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +6 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +5 -0
- aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +752 -350
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +4 -0
- aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml +44 -4
- aiagents4pharma/talk2scholars/tests/test_nvidia_nim_reranker.py +127 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_answer_formatter.py +66 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_batch_processor.py +101 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_collection_manager.py +150 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_document_processor.py +69 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_generate_answer.py +75 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_gpu_detection.py +140 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_paper_loader.py +116 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_rag_pipeline.py +98 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_retrieve_chunks.py +197 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_singleton_manager.py +156 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_vector_normalization.py +121 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_vector_store.py +434 -0
- aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +89 -509
- aiagents4pharma/talk2scholars/tests/test_tool_helper_utils.py +34 -89
- aiagents4pharma/talk2scholars/tools/paper_download/download_biorxiv_input.py +8 -6
- aiagents4pharma/talk2scholars/tools/paper_download/download_medrxiv_input.py +6 -4
- aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +74 -40
- aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +26 -1
- aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +62 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +200 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +172 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +76 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +14 -14
- aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +63 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +154 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +60 -40
- aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +123 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +122 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +162 -40
- aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +140 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +40 -78
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +159 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +277 -96
- aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +12 -9
- aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +0 -1
- aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +9 -8
- aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +5 -5
- {aiagents4pharma-1.40.1.dist-info → aiagents4pharma-1.42.0.dist-info}/METADATA +52 -126
- {aiagents4pharma-1.40.1.dist-info → aiagents4pharma-1.42.0.dist-info}/RECORD +52 -25
- aiagents4pharma/talk2scholars/tests/test_nvidia_nim_reranker_utils.py +0 -28
- {aiagents4pharma-1.40.1.dist-info → aiagents4pharma-1.42.0.dist-info}/WHEEL +0 -0
- {aiagents4pharma-1.40.1.dist-info → aiagents4pharma-1.42.0.dist-info}/licenses/LICENSE +0 -0
- {aiagents4pharma-1.40.1.dist-info → aiagents4pharma-1.42.0.dist-info}/top_level.txt +0 -0
@@ -2,528 +2,108 @@
|
|
2
2
|
Unit tests for question_and_answer tool functionality.
|
3
3
|
"""
|
4
4
|
|
5
|
-
import unittest
|
6
|
-
from types import SimpleNamespace
|
7
5
|
from unittest.mock import MagicMock, patch
|
8
6
|
|
9
|
-
|
10
|
-
from langchain_core.
|
7
|
+
import pytest
|
8
|
+
from langchain_core.messages import ToolMessage
|
11
9
|
|
12
10
|
from aiagents4pharma.talk2scholars.tools.pdf.question_and_answer import (
|
13
11
|
question_and_answer,
|
14
12
|
)
|
15
|
-
from aiagents4pharma.talk2scholars.tools.pdf.utils.generate_answer import (
|
16
|
-
generate_answer,
|
17
|
-
load_hydra_config,
|
18
|
-
)
|
19
|
-
from aiagents4pharma.talk2scholars.tools.pdf.utils.nvidia_nim_reranker import (
|
20
|
-
rank_papers_by_query,
|
21
|
-
)
|
22
|
-
from aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks import (
|
23
|
-
retrieve_relevant_chunks,
|
24
|
-
)
|
25
|
-
from aiagents4pharma.talk2scholars.tools.pdf.utils.vector_store import Vectorstore
|
26
|
-
|
27
|
-
|
28
|
-
class TestQuestionAndAnswerTool(unittest.TestCase):
|
29
|
-
"""tests for question_and_answer tool functionality."""
|
30
|
-
|
31
|
-
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.vector_store.PyPDFLoader")
|
32
|
-
def test_add_paper(self, mock_pypdf_loader):
|
33
|
-
"""test adding a paper to the vector store."""
|
34
|
-
# Mock the PDF loader
|
35
|
-
mock_loader = mock_pypdf_loader.return_value
|
36
|
-
mock_loader.load.return_value = [Document(page_content="Page content")]
|
37
|
-
|
38
|
-
# Mock embedding model
|
39
|
-
mock_embedding_model = MagicMock(spec=Embeddings)
|
40
|
-
|
41
|
-
# Initialize Vectorstore
|
42
|
-
vector_store = Vectorstore(
|
43
|
-
embedding_model=mock_embedding_model,
|
44
|
-
config=load_hydra_config(),
|
45
|
-
)
|
46
|
-
|
47
|
-
# Add a paper
|
48
|
-
vector_store.add_paper(
|
49
|
-
paper_id="test_paper",
|
50
|
-
pdf_url="http://example.com/test.pdf",
|
51
|
-
paper_metadata={"Title": "Test Paper"},
|
52
|
-
)
|
53
|
-
|
54
|
-
# Check if the paper was added
|
55
|
-
self.assertIn("test_paper_0", vector_store.documents)
|
56
|
-
|
57
|
-
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.vector_store.PyPDFLoader")
|
58
|
-
def test_add_paper_already_loaded(self, mock_pypdf_loader):
|
59
|
-
"""Test that adding a paper that is already loaded does not re-load or add new documents."""
|
60
|
-
# Mock the PDF loader (it should not be used when the paper is already loaded)
|
61
|
-
mock_loader = mock_pypdf_loader.return_value
|
62
|
-
mock_loader.load.return_value = [Document(page_content="Page content")]
|
63
|
-
|
64
|
-
# Mock embedding model
|
65
|
-
mock_embedding_model = MagicMock(spec=Embeddings)
|
66
|
-
|
67
|
-
# Initialize Vectorstore
|
68
|
-
vector_store = Vectorstore(
|
69
|
-
embedding_model=mock_embedding_model,
|
70
|
-
config=load_hydra_config(),
|
71
|
-
)
|
72
|
-
|
73
|
-
# Simulate the paper already being loaded.
|
74
|
-
vector_store.loaded_papers.add("test_paper")
|
75
|
-
# Capture the initial state of documents (should be empty)
|
76
|
-
initial_documents = dict(vector_store.documents)
|
77
|
-
|
78
|
-
# Attempt to add the same paper again.
|
79
|
-
vector_store.add_paper(
|
80
|
-
paper_id="test_paper",
|
81
|
-
pdf_url="http://example.com/test.pdf",
|
82
|
-
paper_metadata={"Title": "Test Paper"},
|
83
|
-
)
|
84
|
-
|
85
|
-
# Verify that no new paper was added by checking:
|
86
|
-
# 1. The loaded papers set remains unchanged.
|
87
|
-
self.assertEqual(vector_store.loaded_papers, {"test_paper"})
|
88
|
-
# 2. The documents dictionary remains unchanged.
|
89
|
-
self.assertEqual(vector_store.documents, initial_documents)
|
90
|
-
# 3. The PDF loader was not called at all.
|
91
|
-
mock_loader.load.assert_not_called()
|
92
|
-
|
93
|
-
def test_build_vector_store(self):
|
94
|
-
"""test building the vector store."""
|
95
|
-
# Mock embedding model
|
96
|
-
mock_embedding_model = MagicMock(spec=Embeddings)
|
97
|
-
|
98
|
-
# Initialize Vectorstore
|
99
|
-
vector_store = Vectorstore(embedding_model=mock_embedding_model)
|
100
|
-
|
101
|
-
# Add a mock document
|
102
|
-
vector_store.documents["test_doc"] = Document(page_content="Test content")
|
103
|
-
|
104
|
-
# Mock the embed_documents method to return a list of embeddings
|
105
|
-
mock_embedding_model.embed_documents.return_value = [[0.1, 0.2, 0.3]]
|
106
|
-
|
107
|
-
# Build vector store
|
108
|
-
vector_store.build_vector_store()
|
109
|
-
|
110
|
-
# Check if the vector store is built
|
111
|
-
self.assertIsNotNone(vector_store.vector_store)
|
112
|
-
|
113
|
-
@patch(
|
114
|
-
"aiagents4pharma.talk2scholars.tools.pdf.utils.nvidia_nim_reranker.NVIDIARerank"
|
115
|
-
)
|
116
|
-
def test_rank_papers_by_query(self, mock_nvidia_rerank):
|
117
|
-
"""test ranking papers by query."""
|
118
|
-
# Create a mock config object with the top_k_papers attribute
|
119
|
-
# Create a mock config object with required reranker settings and top_k_papers
|
120
|
-
mock_config = SimpleNamespace(
|
121
|
-
reranker=SimpleNamespace(model="dummy", api_key="key"),
|
122
|
-
top_k_papers=1,
|
123
|
-
)
|
124
|
-
|
125
|
-
# Mock the re-ranker instance.
|
126
|
-
mock_reranker = mock_nvidia_rerank.return_value
|
127
|
-
mock_reranker.compress_documents.return_value = [
|
128
|
-
Document(
|
129
|
-
page_content="Aggregated content", metadata={"paper_id": "test_paper"}
|
130
|
-
)
|
131
|
-
]
|
132
|
-
|
133
|
-
# Create a mock embedding model.
|
134
|
-
mock_embedding_model = MagicMock(spec=Embeddings)
|
135
|
-
|
136
|
-
# Initialize Vectorstore.
|
137
|
-
vector_store = Vectorstore(embedding_model=mock_embedding_model)
|
138
|
-
|
139
|
-
# Add a mock document.
|
140
|
-
vector_store.documents["test_doc"] = Document(
|
141
|
-
page_content="Test content", metadata={"paper_id": "test_paper"}
|
142
|
-
)
|
143
|
-
|
144
|
-
# Rank papers using the standalone function
|
145
|
-
ranked_papers = rank_papers_by_query(
|
146
|
-
vector_store, "test query", mock_config, top_k=mock_config.top_k_papers
|
147
|
-
)
|
148
|
-
|
149
|
-
# Check if the ranking is correct (updated expectation: a list of paper IDs)
|
150
|
-
self.assertEqual(ranked_papers[0], "test_paper")
|
151
|
-
|
152
|
-
@patch(
|
153
|
-
"aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks.maximal_marginal_relevance"
|
154
|
-
)
|
155
|
-
def test_retrieve_relevant_chunks(self, mock_mmr):
|
156
|
-
"""Test retrieving relevant chunks without filters."""
|
157
|
-
mock_mmr.return_value = [0]
|
158
|
-
mock_embedding_model = MagicMock(spec=Embeddings)
|
159
|
-
mock_embedding_model.embed_query.return_value = [0.1, 0.2, 0.3]
|
160
|
-
mock_embedding_model.embed_documents.return_value = [[0.1, 0.2, 0.3]]
|
161
|
-
|
162
|
-
vector_store = Vectorstore(embedding_model=mock_embedding_model)
|
163
|
-
vector_store.vector_store = True
|
164
|
-
# Add a document chunk with required metadata including chunk_id
|
165
|
-
vector_store.documents["test_doc"] = Document(
|
166
|
-
page_content="Test content",
|
167
|
-
metadata={"paper_id": "test_paper", "chunk_id": 0},
|
168
|
-
)
|
169
|
-
|
170
|
-
results = retrieve_relevant_chunks(vector_store, query="test query")
|
171
|
-
assert len(results) == 1
|
172
|
-
assert results[0].metadata["paper_id"] == "test_paper"
|
173
|
-
|
174
|
-
@patch(
|
175
|
-
"aiagents4pharma.talk2scholars.tools.pdf.utils.generate_answer.BaseChatModel"
|
176
|
-
)
|
177
|
-
def test_generate_answer(self, mock_base_chat_model):
|
178
|
-
"""test generating an answer."""
|
179
|
-
# Mock the language model
|
180
|
-
mock_llm = mock_base_chat_model.return_value
|
181
|
-
mock_llm.invoke.return_value.content = "Generated answer"
|
182
|
-
|
183
|
-
# Create a mock document
|
184
|
-
mock_document = Document(
|
185
|
-
page_content="Test content", metadata={"paper_id": "test_paper"}
|
186
|
-
)
|
187
|
-
|
188
|
-
# Generate answer with dummy config
|
189
|
-
config = {"prompt_template": "{context} {question}"}
|
190
|
-
result = generate_answer(
|
191
|
-
question="What is the test?",
|
192
|
-
retrieved_chunks=[mock_document],
|
193
|
-
llm_model=mock_llm,
|
194
|
-
config=config,
|
195
|
-
)
|
196
|
-
|
197
|
-
# Check if the answer is generated correctly
|
198
|
-
self.assertEqual(result["output_text"], "Generated answer")
|
199
|
-
|
200
|
-
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.vector_store.PyPDFLoader")
|
201
|
-
def test_add_paper_exception_handling(self, mock_pypdf_loader):
|
202
|
-
"""Test exception handling when adding a paper."""
|
203
|
-
# Mock the PDF loader to raise an exception.
|
204
|
-
mock_loader = mock_pypdf_loader.return_value
|
205
|
-
mock_loader.load.side_effect = Exception("Loading error")
|
206
|
-
|
207
|
-
# Mock embedding model.
|
208
|
-
mock_embedding_model = MagicMock(spec=Embeddings)
|
209
|
-
|
210
|
-
# Initialize Vectorstore.
|
211
|
-
vector_store = Vectorstore(embedding_model=mock_embedding_model)
|
212
|
-
|
213
|
-
# Attempt to add a paper and expect an exception.
|
214
|
-
with self.assertRaises(Exception) as context:
|
215
|
-
vector_store.add_paper(
|
216
|
-
paper_id="test_paper",
|
217
|
-
pdf_url="http://example.com/test.pdf",
|
218
|
-
paper_metadata={"Title": "Test Paper"},
|
219
|
-
)
|
220
|
-
|
221
|
-
# Verify that the exception message is as expected.
|
222
|
-
self.assertEqual(str(context.exception), "Loading error")
|
223
|
-
|
224
|
-
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.vector_store.PyPDFLoader")
|
225
|
-
def test_add_paper_missing_config(self, mock_pypdf_loader):
|
226
|
-
"""Test that add_paper raises ValueError when config is missing."""
|
227
|
-
# Mock the PDF loader to return a single page
|
228
|
-
mock_loader = mock_pypdf_loader.return_value
|
229
|
-
mock_loader.load.return_value = [Document(page_content="Page content")]
|
230
|
-
|
231
|
-
# Mock embedding model
|
232
|
-
mock_embedding_model = MagicMock(spec=Embeddings)
|
233
13
|
|
234
|
-
# Initialize Vectorstore without config (default None)
|
235
|
-
vector_store = Vectorstore(embedding_model=mock_embedding_model)
|
236
14
|
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
mock_embedding_model = MagicMock(spec=Embeddings)
|
268
|
-
|
269
|
-
# Initialize Vectorstore
|
270
|
-
vector_store = Vectorstore(embedding_model=mock_embedding_model)
|
271
|
-
|
272
|
-
# Add a mock document
|
273
|
-
vector_store.documents["test_doc"] = Document(page_content="Test content")
|
274
|
-
|
275
|
-
# Mock the embed_documents method to return a list of embeddings
|
276
|
-
mock_embedding_model.embed_documents.return_value = [[0.1, 0.2, 0.3]]
|
277
|
-
|
278
|
-
# Build vector store once
|
279
|
-
vector_store.build_vector_store()
|
280
|
-
first_build = vector_store.vector_store
|
281
|
-
|
282
|
-
# Attempt to build vector store again
|
283
|
-
vector_store.build_vector_store()
|
284
|
-
|
285
|
-
# Check that the vector store remains unchanged (i.e. same object/state)
|
286
|
-
self.assertEqual(vector_store.vector_store, first_build)
|
287
|
-
|
288
|
-
def test_retrieve_relevant_chunks_vector_store_not_built(self):
|
289
|
-
"""Test retrieving relevant chunks when the vector store is not built."""
|
290
|
-
# Mock embedding model
|
291
|
-
mock_embedding_model = MagicMock(spec=Embeddings)
|
292
|
-
|
293
|
-
# Initialize Vectorstore without adding any documents
|
294
|
-
vector_store = Vectorstore(embedding_model=mock_embedding_model)
|
295
|
-
|
296
|
-
# Attempt to retrieve relevant chunks (vector_store.vector_store is None)
|
297
|
-
result = retrieve_relevant_chunks(vector_store, query="test query")
|
298
|
-
|
299
|
-
# Verify that an empty list is returned since the vector store is not built.
|
300
|
-
self.assertEqual(result, [])
|
301
|
-
|
302
|
-
def test_retrieve_relevant_chunks_with_paper_ids(self):
|
303
|
-
"""Test retrieving relevant chunks with specific paper_ids when the store is not built."""
|
304
|
-
# Mock embedding model
|
305
|
-
mock_embedding_model = MagicMock(spec=Embeddings)
|
306
|
-
# Mock embed_documents method to return embeddings of fixed length
|
307
|
-
mock_embedding_model.embed_documents.return_value = [MagicMock()] * 2
|
308
|
-
|
309
|
-
# Initialize Vectorstore and add documents
|
310
|
-
vector_store = Vectorstore(embedding_model=mock_embedding_model)
|
311
|
-
vector_store.documents = {
|
312
|
-
"doc1": Document(page_content="content1", metadata={"paper_id": "paper1"}),
|
313
|
-
"doc2": Document(page_content="content2", metadata={"paper_id": "paper2"}),
|
15
|
+
@pytest.fixture(name="dependencies_fixture")
|
16
|
+
def _dependencies_fixture():
|
17
|
+
"""Patches all dependencies for question_and_answer."""
|
18
|
+
with (
|
19
|
+
patch(
|
20
|
+
"aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.format_answer"
|
21
|
+
) as mock_format,
|
22
|
+
patch(
|
23
|
+
"aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.retrieve_and_rerank_chunks"
|
24
|
+
) as mock_rerank,
|
25
|
+
patch(
|
26
|
+
"aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.load_all_papers"
|
27
|
+
) as mock_load_papers,
|
28
|
+
patch(
|
29
|
+
"aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.load_hydra_config"
|
30
|
+
) as mock_config,
|
31
|
+
patch(
|
32
|
+
"aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.QAToolHelper"
|
33
|
+
) as mock_helper_cls,
|
34
|
+
patch(
|
35
|
+
"aiagents4pharma.talk2scholars.tools.pdf.utils.tool_helper.get_vectorstore"
|
36
|
+
) as mock_get_vs,
|
37
|
+
):
|
38
|
+
yield {
|
39
|
+
"mock_get_vectorstore": mock_get_vs,
|
40
|
+
"mock_helper_cls": mock_helper_cls,
|
41
|
+
"mock_load_config": mock_config,
|
42
|
+
"mock_load_all_papers": mock_load_papers,
|
43
|
+
"mock_retrieve_rerank": mock_rerank,
|
44
|
+
"mock_format_answer": mock_format,
|
314
45
|
}
|
315
46
|
|
316
|
-
# Leave vector_store.vector_store as None to trigger the branch that returns an empty list
|
317
|
-
vector_store.vector_store = None
|
318
|
-
|
319
|
-
# Call retrieve_relevant_chunks with specific paper_ids
|
320
|
-
paper_ids = ["paper1"]
|
321
|
-
# Use module-level retrieve_relevant_chunks
|
322
|
-
|
323
|
-
result = retrieve_relevant_chunks(
|
324
|
-
vector_store, query="test query", paper_ids=paper_ids
|
325
|
-
)
|
326
|
-
|
327
|
-
# Verify that an empty list is returned since the vector store is not built.
|
328
|
-
self.assertEqual(result, [])
|
329
|
-
|
330
|
-
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.vector_store.PyPDFLoader")
|
331
|
-
def test_additional_metadata_field_added(self, mock_pypdf_loader):
|
332
|
-
"""test that additional metadata fields are added correctly."""
|
333
|
-
# Setup the PDF loader to return a single document with empty metadata
|
334
|
-
mock_loader = mock_pypdf_loader.return_value
|
335
|
-
mock_loader.load.return_value = [
|
336
|
-
Document(page_content="Test content", metadata={})
|
337
|
-
]
|
338
|
-
|
339
|
-
# Create a dummy embedding model
|
340
|
-
dummy_embedding_model = MagicMock(spec=Embeddings)
|
341
47
|
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
self.assertEqual(added_doc.metadata.get("custom_field"), "custom_value")
|
364
|
-
|
365
|
-
def test_generate_answer_missing_config_fields(self):
|
366
|
-
"""test that generate_answer raises ValueError for missing config fields."""
|
367
|
-
# Create a dummy document and dummy LLM model
|
368
|
-
dummy_doc = Document(
|
369
|
-
page_content="Test content", metadata={"paper_id": "test_paper"}
|
370
|
-
)
|
371
|
-
dummy_llm_model = MagicMock()
|
372
|
-
|
373
|
-
# Case 1: Configuration is None, expect a ValueError
|
374
|
-
with self.assertRaises(ValueError) as context_none:
|
375
|
-
generate_answer("What is the test?", [dummy_doc], dummy_llm_model, None)
|
376
|
-
self.assertEqual(
|
377
|
-
str(context_none.exception),
|
378
|
-
"Configuration for generate_answer is required.",
|
379
|
-
)
|
380
|
-
|
381
|
-
# Case 2: Configuration missing 'prompt_template', expect a ValueError
|
382
|
-
with self.assertRaises(ValueError) as context_missing:
|
383
|
-
generate_answer("What is the test?", [dummy_doc], dummy_llm_model, {})
|
384
|
-
self.assertEqual(
|
385
|
-
str(context_missing.exception),
|
386
|
-
"The prompt_template is missing from the configuration.",
|
387
|
-
)
|
388
|
-
|
389
|
-
def test_state_validation_errors(self):
|
390
|
-
"""Test errors raised for missing state entries."""
|
391
|
-
valid_articles = {"paper1": {"pdf_url": "u", "Title": "T1"}}
|
392
|
-
cases = [
|
393
|
-
({"llm_model": MagicMock(), "article_data": valid_articles},
|
394
|
-
"No text embedding model found in state."),
|
395
|
-
({"text_embedding_model": MagicMock(), "article_data": valid_articles},
|
396
|
-
"No LLM model found in state."),
|
397
|
-
({"text_embedding_model": MagicMock(), "llm_model": MagicMock()},
|
398
|
-
"No article_data found in state."),
|
399
|
-
({"text_embedding_model": MagicMock(), "llm_model": MagicMock(), "article_data": {}},
|
400
|
-
"No article_data found in state."),
|
401
|
-
]
|
402
|
-
for state_dict, expected_msg in cases:
|
403
|
-
with self.subTest(state=state_dict):
|
404
|
-
tool_input = {"question": "Q?", "state": state_dict, "tool_call_id": "id"}
|
405
|
-
with self.assertRaises(ValueError) as cm:
|
406
|
-
question_and_answer.run(tool_input)
|
407
|
-
self.assertEqual(str(cm.exception), expected_msg)
|
408
|
-
|
409
|
-
def test_retrieve_relevant_chunks_with_filtering(self):
|
410
|
-
"""Test that filtering works by paper_ids."""
|
411
|
-
mock_embedding_model = MagicMock(spec=Embeddings)
|
412
|
-
mock_embedding_model.embed_query.return_value = [0.1, 0.2, 0.3]
|
413
|
-
mock_embedding_model.embed_documents.return_value = [[0.1, 0.2, 0.3]]
|
414
|
-
|
415
|
-
vector_store = Vectorstore(
|
416
|
-
embedding_model=mock_embedding_model, config=load_hydra_config()
|
417
|
-
)
|
418
|
-
vector_store.vector_store = True
|
419
|
-
# Add document chunks with necessary metadata including chunk_ids
|
420
|
-
doc1 = Document(
|
421
|
-
page_content="Doc 1", metadata={"paper_id": "paper1", "chunk_id": 0}
|
422
|
-
)
|
423
|
-
doc2 = Document(
|
424
|
-
page_content="Doc 2", metadata={"paper_id": "paper2", "chunk_id": 1}
|
425
|
-
)
|
426
|
-
vector_store.documents = {"doc1": doc1, "doc2": doc2}
|
427
|
-
|
428
|
-
results = retrieve_relevant_chunks(
|
429
|
-
vector_store, query="query", paper_ids=["paper1"]
|
430
|
-
)
|
431
|
-
assert len(results) == 1
|
432
|
-
assert results[0].metadata["paper_id"] == "paper1"
|
433
|
-
|
434
|
-
def test_retrieve_relevant_chunks_no_matching_docs(self):
|
435
|
-
"""Ensure it returns empty list and logs warning if no docs match."""
|
436
|
-
mock_embedding_model = MagicMock(spec=Embeddings)
|
437
|
-
mock_embedding_model.embed_query.return_value = [0.1, 0.2, 0.3]
|
438
|
-
mock_embedding_model.embed_documents.return_value = []
|
439
|
-
|
440
|
-
vector_store = Vectorstore(
|
441
|
-
embedding_model=mock_embedding_model, config=load_hydra_config()
|
442
|
-
)
|
443
|
-
vector_store.vector_store = True
|
444
|
-
# Add doc with paper_id that won't match
|
445
|
-
vector_store.documents["doc1"] = Document(
|
446
|
-
page_content="No match", metadata={"paper_id": "unmatched_paper"}
|
447
|
-
)
|
448
|
-
# Use util function for retrieval
|
449
|
-
results = retrieve_relevant_chunks(
|
450
|
-
vector_store, query="test", paper_ids=["nonexistent_id"]
|
451
|
-
)
|
452
|
-
assert results == []
|
453
|
-
|
454
|
-
@patch(
|
455
|
-
"aiagents4pharma.talk2scholars.tools.pdf.question_and_answer."
|
456
|
-
"helper.get_state_models_and_data"
|
457
|
-
)
|
458
|
-
@patch(
|
459
|
-
"aiagents4pharma.talk2scholars.tools.pdf.question_and_answer."
|
460
|
-
"helper.init_vector_store"
|
48
|
+
@pytest.fixture(name="input_fixture")
|
49
|
+
def _input_fixture():
|
50
|
+
"""Simulates input for the question_and_answer tool."""
|
51
|
+
return {
|
52
|
+
"question": "What is the main contribution of the paper?",
|
53
|
+
"tool_call_id": "test_tool_call_id",
|
54
|
+
"state": {
|
55
|
+
"article_data": {"paper1": {"title": "Test Paper", "pdf_url": "url1"}},
|
56
|
+
"text_embedding_model": MagicMock(),
|
57
|
+
"llm_model": MagicMock(),
|
58
|
+
},
|
59
|
+
}
|
60
|
+
|
61
|
+
|
62
|
+
def test_question_and_answer_success(dependencies_fixture, input_fixture):
|
63
|
+
"""question_and_answer should return a ToolMessage with the answer."""
|
64
|
+
mock_helper = MagicMock()
|
65
|
+
mock_helper.get_state_models_and_data.return_value = (
|
66
|
+
input_fixture["state"]["text_embedding_model"],
|
67
|
+
input_fixture["state"]["llm_model"],
|
68
|
+
input_fixture["state"]["article_data"],
|
461
69
|
)
|
462
|
-
|
463
|
-
|
464
|
-
|
70
|
+
mock_helper.init_vector_store.return_value = MagicMock()
|
71
|
+
mock_helper.has_gpu = True
|
72
|
+
|
73
|
+
dependencies_fixture["mock_helper_cls"].return_value = mock_helper
|
74
|
+
dependencies_fixture["mock_load_config"].return_value = {"config_key": "value"}
|
75
|
+
dependencies_fixture["mock_get_vectorstore"].return_value = MagicMock()
|
76
|
+
dependencies_fixture["mock_retrieve_rerank"].return_value = [
|
77
|
+
{"chunk": "relevant content"}
|
78
|
+
]
|
79
|
+
dependencies_fixture["mock_format_answer"].return_value = "Here is your answer."
|
80
|
+
|
81
|
+
result = question_and_answer.invoke(input_fixture)
|
82
|
+
|
83
|
+
assert isinstance(result.update["messages"][0], ToolMessage)
|
84
|
+
assert result.update["messages"][0].content == "Here is your answer."
|
85
|
+
|
86
|
+
|
87
|
+
def test_question_and_answer_no_reranked_chunks(dependencies_fixture, input_fixture):
|
88
|
+
"""question_and_answer should return a ToolMessage with no relevant information found."""
|
89
|
+
mock_helper = MagicMock()
|
90
|
+
mock_helper.get_state_models_and_data.return_value = (
|
91
|
+
input_fixture["state"]["text_embedding_model"],
|
92
|
+
input_fixture["state"]["llm_model"],
|
93
|
+
input_fixture["state"]["article_data"],
|
465
94
|
)
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
95
|
+
mock_helper.init_vector_store.return_value = MagicMock()
|
96
|
+
mock_helper.has_gpu = False
|
97
|
+
|
98
|
+
dependencies_fixture["mock_helper_cls"].return_value = mock_helper
|
99
|
+
dependencies_fixture["mock_load_config"].return_value = {"config_key": "value"}
|
100
|
+
dependencies_fixture["mock_get_vectorstore"].return_value = MagicMock()
|
101
|
+
dependencies_fixture["mock_retrieve_rerank"].return_value = []
|
102
|
+
dependencies_fixture["mock_format_answer"].return_value = (
|
103
|
+
"No relevant information found."
|
470
104
|
)
|
471
|
-
def test_question_and_answer_happy_path(
|
472
|
-
self, mock_retrieve, mock_init, mock_state
|
473
|
-
):
|
474
|
-
"""Test happy path for question_and_answer tool."""
|
475
|
-
# Setup helper and utility mocks
|
476
|
-
emb = object()
|
477
|
-
llm = object()
|
478
|
-
articles = {"p1": {"pdf_url": "u"}}
|
479
|
-
mock_state.return_value = (emb, llm, articles)
|
480
|
-
# Provide dummy vector store for loading
|
481
|
-
vs = SimpleNamespace(loaded_papers=set(), add_paper=MagicMock())
|
482
|
-
mock_init.return_value = vs
|
483
|
-
# Dummy chunk list for retrieval
|
484
|
-
dummy_chunk = Document(page_content="c", metadata={"paper_id": "p1"})
|
485
|
-
mock_retrieve.return_value = [dummy_chunk]
|
486
|
-
|
487
|
-
# Use module-level question_and_answer
|
488
|
-
|
489
|
-
state = {}
|
490
|
-
tool_input = {"question": "Q?", "state": state, "tool_call_id": "tid"}
|
491
|
-
result = question_and_answer.run(tool_input)
|
492
|
-
# Verify Command message content and tool_call_id
|
493
|
-
msgs = result.update.get("messages", [])
|
494
|
-
self.assertEqual(len(msgs), 1)
|
495
|
-
msg = msgs[0]
|
496
|
-
self.assertEqual(msg.content, "formatted answer")
|
497
|
-
self.assertEqual(msg.tool_call_id, "tid")
|
498
105
|
|
499
|
-
|
500
|
-
"aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.helper."
|
501
|
-
"get_state_models_and_data"
|
502
|
-
)
|
503
|
-
@patch(
|
504
|
-
"aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.helper.init_vector_store"
|
505
|
-
)
|
506
|
-
@patch(
|
507
|
-
"aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.helper.run_reranker",
|
508
|
-
return_value=["p1"],
|
509
|
-
)
|
510
|
-
@patch(
|
511
|
-
"aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.retrieve_relevant_chunks",
|
512
|
-
return_value=[],
|
513
|
-
)
|
514
|
-
def test_question_and_answer_no_chunks(
|
515
|
-
self, _mock_retrieve, _mock_rerank, mock_init, mock_state
|
516
|
-
):
|
517
|
-
"""Test that no chunks raises RuntimeError."""
|
518
|
-
emb = object()
|
519
|
-
llm = object()
|
520
|
-
articles = {"p1": {"pdf_url": "u"}}
|
521
|
-
mock_state.return_value = (emb, llm, articles)
|
522
|
-
# Provide dummy vector store to satisfy load_candidate_papers
|
523
|
-
vs = SimpleNamespace(loaded_papers=set(), add_paper=MagicMock())
|
524
|
-
mock_init.return_value = vs
|
106
|
+
result = question_and_answer.invoke(input_fixture)
|
525
107
|
|
526
|
-
|
527
|
-
|
528
|
-
question_and_answer.run(tool_input)
|
529
|
-
self.assertIn("No relevant chunks found for question", str(cm.exception))
|
108
|
+
assert isinstance(result.update["messages"][0], ToolMessage)
|
109
|
+
assert result.update["messages"][0].content == "No relevant information found."
|