aiagents4pharma 1.40.0__py3-none-any.whl → 1.41.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +4 -0
  2. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml +44 -4
  3. aiagents4pharma/talk2scholars/tests/test_nvidia_nim_reranker.py +127 -0
  4. aiagents4pharma/talk2scholars/tests/test_pdf_answer_formatter.py +66 -0
  5. aiagents4pharma/talk2scholars/tests/test_pdf_batch_processor.py +101 -0
  6. aiagents4pharma/talk2scholars/tests/test_pdf_collection_manager.py +150 -0
  7. aiagents4pharma/talk2scholars/tests/test_pdf_document_processor.py +69 -0
  8. aiagents4pharma/talk2scholars/tests/test_pdf_generate_answer.py +75 -0
  9. aiagents4pharma/talk2scholars/tests/test_pdf_gpu_detection.py +140 -0
  10. aiagents4pharma/talk2scholars/tests/test_pdf_paper_loader.py +116 -0
  11. aiagents4pharma/talk2scholars/tests/test_pdf_rag_pipeline.py +98 -0
  12. aiagents4pharma/talk2scholars/tests/test_pdf_retrieve_chunks.py +197 -0
  13. aiagents4pharma/talk2scholars/tests/test_pdf_singleton_manager.py +156 -0
  14. aiagents4pharma/talk2scholars/tests/test_pdf_vector_normalization.py +121 -0
  15. aiagents4pharma/talk2scholars/tests/test_pdf_vector_store.py +434 -0
  16. aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +89 -509
  17. aiagents4pharma/talk2scholars/tests/test_tool_helper_utils.py +34 -89
  18. aiagents4pharma/talk2scholars/tools/paper_download/download_biorxiv_input.py +8 -6
  19. aiagents4pharma/talk2scholars/tools/paper_download/download_medrxiv_input.py +6 -4
  20. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +74 -40
  21. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +26 -1
  22. aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +62 -0
  23. aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +200 -0
  24. aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +172 -0
  25. aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +76 -0
  26. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +14 -14
  27. aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +63 -0
  28. aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +154 -0
  29. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +60 -40
  30. aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +123 -0
  31. aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +122 -0
  32. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +162 -40
  33. aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +140 -0
  34. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +40 -78
  35. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +159 -0
  36. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +277 -96
  37. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +12 -9
  38. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +0 -1
  39. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +9 -8
  40. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +5 -5
  41. {aiagents4pharma-1.40.0.dist-info → aiagents4pharma-1.41.0.dist-info}/METADATA +27 -115
  42. {aiagents4pharma-1.40.0.dist-info → aiagents4pharma-1.41.0.dist-info}/RECORD +45 -23
  43. aiagents4pharma/talk2scholars/tests/test_nvidia_nim_reranker_utils.py +0 -28
  44. {aiagents4pharma-1.40.0.dist-info → aiagents4pharma-1.41.0.dist-info}/WHEEL +0 -0
  45. {aiagents4pharma-1.40.0.dist-info → aiagents4pharma-1.41.0.dist-info}/licenses/LICENSE +0 -0
  46. {aiagents4pharma-1.40.0.dist-info → aiagents4pharma-1.41.0.dist-info}/top_level.txt +0 -0
@@ -2,528 +2,108 @@
2
2
  Unit tests for question_and_answer tool functionality.
3
3
  """
4
4
 
5
- import unittest
6
- from types import SimpleNamespace
7
5
  from unittest.mock import MagicMock, patch
8
6
 
9
- from langchain_core.documents import Document
10
- from langchain_core.embeddings import Embeddings
7
+ import pytest
8
+ from langchain_core.messages import ToolMessage
11
9
 
12
10
  from aiagents4pharma.talk2scholars.tools.pdf.question_and_answer import (
13
11
  question_and_answer,
14
12
  )
15
- from aiagents4pharma.talk2scholars.tools.pdf.utils.generate_answer import (
16
- generate_answer,
17
- load_hydra_config,
18
- )
19
- from aiagents4pharma.talk2scholars.tools.pdf.utils.nvidia_nim_reranker import (
20
- rank_papers_by_query,
21
- )
22
- from aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks import (
23
- retrieve_relevant_chunks,
24
- )
25
- from aiagents4pharma.talk2scholars.tools.pdf.utils.vector_store import Vectorstore
26
-
27
-
28
- class TestQuestionAndAnswerTool(unittest.TestCase):
29
- """tests for question_and_answer tool functionality."""
30
-
31
- @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.vector_store.PyPDFLoader")
32
- def test_add_paper(self, mock_pypdf_loader):
33
- """test adding a paper to the vector store."""
34
- # Mock the PDF loader
35
- mock_loader = mock_pypdf_loader.return_value
36
- mock_loader.load.return_value = [Document(page_content="Page content")]
37
-
38
- # Mock embedding model
39
- mock_embedding_model = MagicMock(spec=Embeddings)
40
-
41
- # Initialize Vectorstore
42
- vector_store = Vectorstore(
43
- embedding_model=mock_embedding_model,
44
- config=load_hydra_config(),
45
- )
46
-
47
- # Add a paper
48
- vector_store.add_paper(
49
- paper_id="test_paper",
50
- pdf_url="http://example.com/test.pdf",
51
- paper_metadata={"Title": "Test Paper"},
52
- )
53
-
54
- # Check if the paper was added
55
- self.assertIn("test_paper_0", vector_store.documents)
56
-
57
- @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.vector_store.PyPDFLoader")
58
- def test_add_paper_already_loaded(self, mock_pypdf_loader):
59
- """Test that adding a paper that is already loaded does not re-load or add new documents."""
60
- # Mock the PDF loader (it should not be used when the paper is already loaded)
61
- mock_loader = mock_pypdf_loader.return_value
62
- mock_loader.load.return_value = [Document(page_content="Page content")]
63
-
64
- # Mock embedding model
65
- mock_embedding_model = MagicMock(spec=Embeddings)
66
-
67
- # Initialize Vectorstore
68
- vector_store = Vectorstore(
69
- embedding_model=mock_embedding_model,
70
- config=load_hydra_config(),
71
- )
72
-
73
- # Simulate the paper already being loaded.
74
- vector_store.loaded_papers.add("test_paper")
75
- # Capture the initial state of documents (should be empty)
76
- initial_documents = dict(vector_store.documents)
77
-
78
- # Attempt to add the same paper again.
79
- vector_store.add_paper(
80
- paper_id="test_paper",
81
- pdf_url="http://example.com/test.pdf",
82
- paper_metadata={"Title": "Test Paper"},
83
- )
84
-
85
- # Verify that no new paper was added by checking:
86
- # 1. The loaded papers set remains unchanged.
87
- self.assertEqual(vector_store.loaded_papers, {"test_paper"})
88
- # 2. The documents dictionary remains unchanged.
89
- self.assertEqual(vector_store.documents, initial_documents)
90
- # 3. The PDF loader was not called at all.
91
- mock_loader.load.assert_not_called()
92
-
93
- def test_build_vector_store(self):
94
- """test building the vector store."""
95
- # Mock embedding model
96
- mock_embedding_model = MagicMock(spec=Embeddings)
97
-
98
- # Initialize Vectorstore
99
- vector_store = Vectorstore(embedding_model=mock_embedding_model)
100
-
101
- # Add a mock document
102
- vector_store.documents["test_doc"] = Document(page_content="Test content")
103
-
104
- # Mock the embed_documents method to return a list of embeddings
105
- mock_embedding_model.embed_documents.return_value = [[0.1, 0.2, 0.3]]
106
-
107
- # Build vector store
108
- vector_store.build_vector_store()
109
-
110
- # Check if the vector store is built
111
- self.assertIsNotNone(vector_store.vector_store)
112
-
113
- @patch(
114
- "aiagents4pharma.talk2scholars.tools.pdf.utils.nvidia_nim_reranker.NVIDIARerank"
115
- )
116
- def test_rank_papers_by_query(self, mock_nvidia_rerank):
117
- """test ranking papers by query."""
118
- # Create a mock config object with the top_k_papers attribute
119
- # Create a mock config object with required reranker settings and top_k_papers
120
- mock_config = SimpleNamespace(
121
- reranker=SimpleNamespace(model="dummy", api_key="key"),
122
- top_k_papers=1,
123
- )
124
-
125
- # Mock the re-ranker instance.
126
- mock_reranker = mock_nvidia_rerank.return_value
127
- mock_reranker.compress_documents.return_value = [
128
- Document(
129
- page_content="Aggregated content", metadata={"paper_id": "test_paper"}
130
- )
131
- ]
132
-
133
- # Create a mock embedding model.
134
- mock_embedding_model = MagicMock(spec=Embeddings)
135
-
136
- # Initialize Vectorstore.
137
- vector_store = Vectorstore(embedding_model=mock_embedding_model)
138
-
139
- # Add a mock document.
140
- vector_store.documents["test_doc"] = Document(
141
- page_content="Test content", metadata={"paper_id": "test_paper"}
142
- )
143
-
144
- # Rank papers using the standalone function
145
- ranked_papers = rank_papers_by_query(
146
- vector_store, "test query", mock_config, top_k=mock_config.top_k_papers
147
- )
148
-
149
- # Check if the ranking is correct (updated expectation: a list of paper IDs)
150
- self.assertEqual(ranked_papers[0], "test_paper")
151
-
152
- @patch(
153
- "aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks.maximal_marginal_relevance"
154
- )
155
- def test_retrieve_relevant_chunks(self, mock_mmr):
156
- """Test retrieving relevant chunks without filters."""
157
- mock_mmr.return_value = [0]
158
- mock_embedding_model = MagicMock(spec=Embeddings)
159
- mock_embedding_model.embed_query.return_value = [0.1, 0.2, 0.3]
160
- mock_embedding_model.embed_documents.return_value = [[0.1, 0.2, 0.3]]
161
-
162
- vector_store = Vectorstore(embedding_model=mock_embedding_model)
163
- vector_store.vector_store = True
164
- # Add a document chunk with required metadata including chunk_id
165
- vector_store.documents["test_doc"] = Document(
166
- page_content="Test content",
167
- metadata={"paper_id": "test_paper", "chunk_id": 0},
168
- )
169
-
170
- results = retrieve_relevant_chunks(vector_store, query="test query")
171
- assert len(results) == 1
172
- assert results[0].metadata["paper_id"] == "test_paper"
173
-
174
- @patch(
175
- "aiagents4pharma.talk2scholars.tools.pdf.utils.generate_answer.BaseChatModel"
176
- )
177
- def test_generate_answer(self, mock_base_chat_model):
178
- """test generating an answer."""
179
- # Mock the language model
180
- mock_llm = mock_base_chat_model.return_value
181
- mock_llm.invoke.return_value.content = "Generated answer"
182
-
183
- # Create a mock document
184
- mock_document = Document(
185
- page_content="Test content", metadata={"paper_id": "test_paper"}
186
- )
187
-
188
- # Generate answer with dummy config
189
- config = {"prompt_template": "{context} {question}"}
190
- result = generate_answer(
191
- question="What is the test?",
192
- retrieved_chunks=[mock_document],
193
- llm_model=mock_llm,
194
- config=config,
195
- )
196
-
197
- # Check if the answer is generated correctly
198
- self.assertEqual(result["output_text"], "Generated answer")
199
-
200
- @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.vector_store.PyPDFLoader")
201
- def test_add_paper_exception_handling(self, mock_pypdf_loader):
202
- """Test exception handling when adding a paper."""
203
- # Mock the PDF loader to raise an exception.
204
- mock_loader = mock_pypdf_loader.return_value
205
- mock_loader.load.side_effect = Exception("Loading error")
206
-
207
- # Mock embedding model.
208
- mock_embedding_model = MagicMock(spec=Embeddings)
209
-
210
- # Initialize Vectorstore.
211
- vector_store = Vectorstore(embedding_model=mock_embedding_model)
212
-
213
- # Attempt to add a paper and expect an exception.
214
- with self.assertRaises(Exception) as context:
215
- vector_store.add_paper(
216
- paper_id="test_paper",
217
- pdf_url="http://example.com/test.pdf",
218
- paper_metadata={"Title": "Test Paper"},
219
- )
220
-
221
- # Verify that the exception message is as expected.
222
- self.assertEqual(str(context.exception), "Loading error")
223
-
224
- @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.vector_store.PyPDFLoader")
225
- def test_add_paper_missing_config(self, mock_pypdf_loader):
226
- """Test that add_paper raises ValueError when config is missing."""
227
- # Mock the PDF loader to return a single page
228
- mock_loader = mock_pypdf_loader.return_value
229
- mock_loader.load.return_value = [Document(page_content="Page content")]
230
-
231
- # Mock embedding model
232
- mock_embedding_model = MagicMock(spec=Embeddings)
233
13
 
234
- # Initialize Vectorstore without config (default None)
235
- vector_store = Vectorstore(embedding_model=mock_embedding_model)
236
14
 
237
- # Attempt to add a paper and expect a configuration error
238
- with self.assertRaises(ValueError) as cm:
239
- vector_store.add_paper(
240
- paper_id="test_paper",
241
- pdf_url="http://example.com/test.pdf",
242
- paper_metadata={"Title": "Test Paper"},
243
- )
244
- self.assertEqual(
245
- str(cm.exception),
246
- "Configuration is required for text splitting in Vectorstore.",
247
- )
248
-
249
- def test_build_vector_store_no_documents(self):
250
- """Test building vector store with no documents results in an unchanged vector_store."""
251
- # Mock embedding model
252
- mock_embedding_model = MagicMock(spec=Embeddings)
253
-
254
- # Initialize Vectorstore without adding any documents
255
- vector_store = Vectorstore(embedding_model=mock_embedding_model)
256
-
257
- # Attempt to build vector store
258
- vector_store.build_vector_store()
259
-
260
- # Instead of checking log output, check that vector_store remains None (or unchanged)
261
- self.assertIsNone(vector_store.vector_store)
262
-
263
- def test_build_vector_store_already_built(self):
264
- """Test that calling build_vector_store when
265
- it is already built does not change the store."""
266
- # Mock embedding model
267
- mock_embedding_model = MagicMock(spec=Embeddings)
268
-
269
- # Initialize Vectorstore
270
- vector_store = Vectorstore(embedding_model=mock_embedding_model)
271
-
272
- # Add a mock document
273
- vector_store.documents["test_doc"] = Document(page_content="Test content")
274
-
275
- # Mock the embed_documents method to return a list of embeddings
276
- mock_embedding_model.embed_documents.return_value = [[0.1, 0.2, 0.3]]
277
-
278
- # Build vector store once
279
- vector_store.build_vector_store()
280
- first_build = vector_store.vector_store
281
-
282
- # Attempt to build vector store again
283
- vector_store.build_vector_store()
284
-
285
- # Check that the vector store remains unchanged (i.e. same object/state)
286
- self.assertEqual(vector_store.vector_store, first_build)
287
-
288
- def test_retrieve_relevant_chunks_vector_store_not_built(self):
289
- """Test retrieving relevant chunks when the vector store is not built."""
290
- # Mock embedding model
291
- mock_embedding_model = MagicMock(spec=Embeddings)
292
-
293
- # Initialize Vectorstore without adding any documents
294
- vector_store = Vectorstore(embedding_model=mock_embedding_model)
295
-
296
- # Attempt to retrieve relevant chunks (vector_store.vector_store is None)
297
- result = retrieve_relevant_chunks(vector_store, query="test query")
298
-
299
- # Verify that an empty list is returned since the vector store is not built.
300
- self.assertEqual(result, [])
301
-
302
- def test_retrieve_relevant_chunks_with_paper_ids(self):
303
- """Test retrieving relevant chunks with specific paper_ids when the store is not built."""
304
- # Mock embedding model
305
- mock_embedding_model = MagicMock(spec=Embeddings)
306
- # Mock embed_documents method to return embeddings of fixed length
307
- mock_embedding_model.embed_documents.return_value = [MagicMock()] * 2
308
-
309
- # Initialize Vectorstore and add documents
310
- vector_store = Vectorstore(embedding_model=mock_embedding_model)
311
- vector_store.documents = {
312
- "doc1": Document(page_content="content1", metadata={"paper_id": "paper1"}),
313
- "doc2": Document(page_content="content2", metadata={"paper_id": "paper2"}),
15
+ @pytest.fixture(name="dependencies_fixture")
16
+ def _dependencies_fixture():
17
+ """Patches all dependencies for question_and_answer."""
18
+ with (
19
+ patch(
20
+ "aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.format_answer"
21
+ ) as mock_format,
22
+ patch(
23
+ "aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.retrieve_and_rerank_chunks"
24
+ ) as mock_rerank,
25
+ patch(
26
+ "aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.load_all_papers"
27
+ ) as mock_load_papers,
28
+ patch(
29
+ "aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.load_hydra_config"
30
+ ) as mock_config,
31
+ patch(
32
+ "aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.QAToolHelper"
33
+ ) as mock_helper_cls,
34
+ patch(
35
+ "aiagents4pharma.talk2scholars.tools.pdf.utils.tool_helper.get_vectorstore"
36
+ ) as mock_get_vs,
37
+ ):
38
+ yield {
39
+ "mock_get_vectorstore": mock_get_vs,
40
+ "mock_helper_cls": mock_helper_cls,
41
+ "mock_load_config": mock_config,
42
+ "mock_load_all_papers": mock_load_papers,
43
+ "mock_retrieve_rerank": mock_rerank,
44
+ "mock_format_answer": mock_format,
314
45
  }
315
46
 
316
- # Leave vector_store.vector_store as None to trigger the branch that returns an empty list
317
- vector_store.vector_store = None
318
-
319
- # Call retrieve_relevant_chunks with specific paper_ids
320
- paper_ids = ["paper1"]
321
- # Use module-level retrieve_relevant_chunks
322
-
323
- result = retrieve_relevant_chunks(
324
- vector_store, query="test query", paper_ids=paper_ids
325
- )
326
-
327
- # Verify that an empty list is returned since the vector store is not built.
328
- self.assertEqual(result, [])
329
-
330
- @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.vector_store.PyPDFLoader")
331
- def test_additional_metadata_field_added(self, mock_pypdf_loader):
332
- """test that additional metadata fields are added correctly."""
333
- # Setup the PDF loader to return a single document with empty metadata
334
- mock_loader = mock_pypdf_loader.return_value
335
- mock_loader.load.return_value = [
336
- Document(page_content="Test content", metadata={})
337
- ]
338
-
339
- # Create a dummy embedding model
340
- dummy_embedding_model = MagicMock(spec=Embeddings)
341
47
 
342
- # Define custom metadata fields including an additional field "custom_field"
343
- custom_fields = ["title", "paper_id", "page", "chunk_id", "custom_field"]
344
- vector_store = Vectorstore(
345
- embedding_model=dummy_embedding_model,
346
- metadata_fields=custom_fields,
347
- config=load_hydra_config(),
348
- )
349
-
350
- # Paper metadata includes "Title" (for default title) and the additional "custom_field"
351
- paper_metadata = {"Title": "Test Paper", "custom_field": "custom_value"}
352
-
353
- # Call add_paper to process the document and add metadata
354
- vector_store.add_paper(
355
- paper_id="test_paper",
356
- pdf_url="http://example.com/test.pdf",
357
- paper_metadata=paper_metadata,
358
- )
359
-
360
- # Verify that the document was added with the custom field included in its metadata
361
- self.assertIn("test_paper_0", vector_store.documents)
362
- added_doc = vector_store.documents["test_paper_0"]
363
- self.assertEqual(added_doc.metadata.get("custom_field"), "custom_value")
364
-
365
- def test_generate_answer_missing_config_fields(self):
366
- """test that generate_answer raises ValueError for missing config fields."""
367
- # Create a dummy document and dummy LLM model
368
- dummy_doc = Document(
369
- page_content="Test content", metadata={"paper_id": "test_paper"}
370
- )
371
- dummy_llm_model = MagicMock()
372
-
373
- # Case 1: Configuration is None, expect a ValueError
374
- with self.assertRaises(ValueError) as context_none:
375
- generate_answer("What is the test?", [dummy_doc], dummy_llm_model, None)
376
- self.assertEqual(
377
- str(context_none.exception),
378
- "Configuration for generate_answer is required.",
379
- )
380
-
381
- # Case 2: Configuration missing 'prompt_template', expect a ValueError
382
- with self.assertRaises(ValueError) as context_missing:
383
- generate_answer("What is the test?", [dummy_doc], dummy_llm_model, {})
384
- self.assertEqual(
385
- str(context_missing.exception),
386
- "The prompt_template is missing from the configuration.",
387
- )
388
-
389
- def test_state_validation_errors(self):
390
- """Test errors raised for missing state entries."""
391
- valid_articles = {"paper1": {"pdf_url": "u", "Title": "T1"}}
392
- cases = [
393
- ({"llm_model": MagicMock(), "article_data": valid_articles},
394
- "No text embedding model found in state."),
395
- ({"text_embedding_model": MagicMock(), "article_data": valid_articles},
396
- "No LLM model found in state."),
397
- ({"text_embedding_model": MagicMock(), "llm_model": MagicMock()},
398
- "No article_data found in state."),
399
- ({"text_embedding_model": MagicMock(), "llm_model": MagicMock(), "article_data": {}},
400
- "No article_data found in state."),
401
- ]
402
- for state_dict, expected_msg in cases:
403
- with self.subTest(state=state_dict):
404
- tool_input = {"question": "Q?", "state": state_dict, "tool_call_id": "id"}
405
- with self.assertRaises(ValueError) as cm:
406
- question_and_answer.run(tool_input)
407
- self.assertEqual(str(cm.exception), expected_msg)
408
-
409
- def test_retrieve_relevant_chunks_with_filtering(self):
410
- """Test that filtering works by paper_ids."""
411
- mock_embedding_model = MagicMock(spec=Embeddings)
412
- mock_embedding_model.embed_query.return_value = [0.1, 0.2, 0.3]
413
- mock_embedding_model.embed_documents.return_value = [[0.1, 0.2, 0.3]]
414
-
415
- vector_store = Vectorstore(
416
- embedding_model=mock_embedding_model, config=load_hydra_config()
417
- )
418
- vector_store.vector_store = True
419
- # Add document chunks with necessary metadata including chunk_ids
420
- doc1 = Document(
421
- page_content="Doc 1", metadata={"paper_id": "paper1", "chunk_id": 0}
422
- )
423
- doc2 = Document(
424
- page_content="Doc 2", metadata={"paper_id": "paper2", "chunk_id": 1}
425
- )
426
- vector_store.documents = {"doc1": doc1, "doc2": doc2}
427
-
428
- results = retrieve_relevant_chunks(
429
- vector_store, query="query", paper_ids=["paper1"]
430
- )
431
- assert len(results) == 1
432
- assert results[0].metadata["paper_id"] == "paper1"
433
-
434
- def test_retrieve_relevant_chunks_no_matching_docs(self):
435
- """Ensure it returns empty list and logs warning if no docs match."""
436
- mock_embedding_model = MagicMock(spec=Embeddings)
437
- mock_embedding_model.embed_query.return_value = [0.1, 0.2, 0.3]
438
- mock_embedding_model.embed_documents.return_value = []
439
-
440
- vector_store = Vectorstore(
441
- embedding_model=mock_embedding_model, config=load_hydra_config()
442
- )
443
- vector_store.vector_store = True
444
- # Add doc with paper_id that won't match
445
- vector_store.documents["doc1"] = Document(
446
- page_content="No match", metadata={"paper_id": "unmatched_paper"}
447
- )
448
- # Use util function for retrieval
449
- results = retrieve_relevant_chunks(
450
- vector_store, query="test", paper_ids=["nonexistent_id"]
451
- )
452
- assert results == []
453
-
454
- @patch(
455
- "aiagents4pharma.talk2scholars.tools.pdf.question_and_answer."
456
- "helper.get_state_models_and_data"
457
- )
458
- @patch(
459
- "aiagents4pharma.talk2scholars.tools.pdf.question_and_answer."
460
- "helper.init_vector_store"
48
+ @pytest.fixture(name="input_fixture")
49
+ def _input_fixture():
50
+ """Simulates input for the question_and_answer tool."""
51
+ return {
52
+ "question": "What is the main contribution of the paper?",
53
+ "tool_call_id": "test_tool_call_id",
54
+ "state": {
55
+ "article_data": {"paper1": {"title": "Test Paper", "pdf_url": "url1"}},
56
+ "text_embedding_model": MagicMock(),
57
+ "llm_model": MagicMock(),
58
+ },
59
+ }
60
+
61
+
62
+ def test_question_and_answer_success(dependencies_fixture, input_fixture):
63
+ """question_and_answer should return a ToolMessage with the answer."""
64
+ mock_helper = MagicMock()
65
+ mock_helper.get_state_models_and_data.return_value = (
66
+ input_fixture["state"]["text_embedding_model"],
67
+ input_fixture["state"]["llm_model"],
68
+ input_fixture["state"]["article_data"],
461
69
  )
462
- @patch(
463
- "aiagents4pharma.talk2scholars.tools.pdf.question_and_answer."
464
- "retrieve_relevant_chunks"
70
+ mock_helper.init_vector_store.return_value = MagicMock()
71
+ mock_helper.has_gpu = True
72
+
73
+ dependencies_fixture["mock_helper_cls"].return_value = mock_helper
74
+ dependencies_fixture["mock_load_config"].return_value = {"config_key": "value"}
75
+ dependencies_fixture["mock_get_vectorstore"].return_value = MagicMock()
76
+ dependencies_fixture["mock_retrieve_rerank"].return_value = [
77
+ {"chunk": "relevant content"}
78
+ ]
79
+ dependencies_fixture["mock_format_answer"].return_value = "Here is your answer."
80
+
81
+ result = question_and_answer.invoke(input_fixture)
82
+
83
+ assert isinstance(result.update["messages"][0], ToolMessage)
84
+ assert result.update["messages"][0].content == "Here is your answer."
85
+
86
+
87
+ def test_question_and_answer_no_reranked_chunks(dependencies_fixture, input_fixture):
88
+ """question_and_answer should return a ToolMessage with no relevant information found."""
89
+ mock_helper = MagicMock()
90
+ mock_helper.get_state_models_and_data.return_value = (
91
+ input_fixture["state"]["text_embedding_model"],
92
+ input_fixture["state"]["llm_model"],
93
+ input_fixture["state"]["article_data"],
465
94
  )
466
- @patch.multiple(
467
- "aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.helper",
468
- run_reranker=lambda vs, query, candidates: ["p1"],
469
- format_answer=lambda question, chunks, llm, articles: "formatted answer",
95
+ mock_helper.init_vector_store.return_value = MagicMock()
96
+ mock_helper.has_gpu = False
97
+
98
+ dependencies_fixture["mock_helper_cls"].return_value = mock_helper
99
+ dependencies_fixture["mock_load_config"].return_value = {"config_key": "value"}
100
+ dependencies_fixture["mock_get_vectorstore"].return_value = MagicMock()
101
+ dependencies_fixture["mock_retrieve_rerank"].return_value = []
102
+ dependencies_fixture["mock_format_answer"].return_value = (
103
+ "No relevant information found."
470
104
  )
471
- def test_question_and_answer_happy_path(
472
- self, mock_retrieve, mock_init, mock_state
473
- ):
474
- """Test happy path for question_and_answer tool."""
475
- # Setup helper and utility mocks
476
- emb = object()
477
- llm = object()
478
- articles = {"p1": {"pdf_url": "u"}}
479
- mock_state.return_value = (emb, llm, articles)
480
- # Provide dummy vector store for loading
481
- vs = SimpleNamespace(loaded_papers=set(), add_paper=MagicMock())
482
- mock_init.return_value = vs
483
- # Dummy chunk list for retrieval
484
- dummy_chunk = Document(page_content="c", metadata={"paper_id": "p1"})
485
- mock_retrieve.return_value = [dummy_chunk]
486
-
487
- # Use module-level question_and_answer
488
-
489
- state = {}
490
- tool_input = {"question": "Q?", "state": state, "tool_call_id": "tid"}
491
- result = question_and_answer.run(tool_input)
492
- # Verify Command message content and tool_call_id
493
- msgs = result.update.get("messages", [])
494
- self.assertEqual(len(msgs), 1)
495
- msg = msgs[0]
496
- self.assertEqual(msg.content, "formatted answer")
497
- self.assertEqual(msg.tool_call_id, "tid")
498
105
 
499
- @patch(
500
- "aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.helper."
501
- "get_state_models_and_data"
502
- )
503
- @patch(
504
- "aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.helper.init_vector_store"
505
- )
506
- @patch(
507
- "aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.helper.run_reranker",
508
- return_value=["p1"],
509
- )
510
- @patch(
511
- "aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.retrieve_relevant_chunks",
512
- return_value=[],
513
- )
514
- def test_question_and_answer_no_chunks(
515
- self, _mock_retrieve, _mock_rerank, mock_init, mock_state
516
- ):
517
- """Test that no chunks raises RuntimeError."""
518
- emb = object()
519
- llm = object()
520
- articles = {"p1": {"pdf_url": "u"}}
521
- mock_state.return_value = (emb, llm, articles)
522
- # Provide dummy vector store to satisfy load_candidate_papers
523
- vs = SimpleNamespace(loaded_papers=set(), add_paper=MagicMock())
524
- mock_init.return_value = vs
106
+ result = question_and_answer.invoke(input_fixture)
525
107
 
526
- tool_input = {"question": "Q?", "state": {}, "tool_call_id": "id"}
527
- with self.assertRaises(RuntimeError) as cm:
528
- question_and_answer.run(tool_input)
529
- self.assertIn("No relevant chunks found for question", str(cm.exception))
108
+ assert isinstance(result.update["messages"][0], ToolMessage)
109
+ assert result.update["messages"][0].content == "No relevant information found."