aiagents4pharma 1.31.0__py3-none-any.whl → 1.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +1 -0
  2. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +44 -0
  3. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +1 -0
  4. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +90 -0
  5. aiagents4pharma/talk2scholars/agents/main_agent.py +4 -3
  6. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +3 -4
  7. aiagents4pharma/talk2scholars/agents/pdf_agent.py +6 -7
  8. aiagents4pharma/talk2scholars/agents/s2_agent.py +23 -20
  9. aiagents4pharma/talk2scholars/agents/zotero_agent.py +11 -11
  10. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +19 -19
  11. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +20 -15
  12. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +27 -6
  13. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +7 -7
  14. aiagents4pharma/talk2scholars/tests/test_main_agent.py +16 -16
  15. aiagents4pharma/talk2scholars/tests/test_paper_download_agent.py +17 -24
  16. aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py +152 -135
  17. aiagents4pharma/talk2scholars/tests/test_pdf_agent.py +9 -16
  18. aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +790 -218
  19. aiagents4pharma/talk2scholars/tests/test_s2_agent.py +9 -9
  20. aiagents4pharma/talk2scholars/tests/test_s2_display.py +8 -8
  21. aiagents4pharma/talk2scholars/tests/test_s2_query.py +8 -8
  22. aiagents4pharma/talk2scholars/tests/test_zotero_agent.py +12 -12
  23. aiagents4pharma/talk2scholars/tests/test_zotero_path.py +11 -12
  24. aiagents4pharma/talk2scholars/tests/test_zotero_read.py +400 -22
  25. aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +0 -6
  26. aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +89 -31
  27. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +540 -156
  28. aiagents4pharma/talk2scholars/tools/s2/__init__.py +4 -4
  29. aiagents4pharma/talk2scholars/tools/s2/{display_results.py → display_dataframe.py} +19 -21
  30. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +71 -0
  31. aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +213 -35
  32. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +3 -3
  33. {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.33.0.dist-info}/METADATA +3 -1
  34. {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.33.0.dist-info}/RECORD +37 -37
  35. {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.33.0.dist-info}/WHEEL +1 -1
  36. aiagents4pharma/talk2scholars/tools/paper_download/abstract_downloader.py +0 -45
  37. aiagents4pharma/talk2scholars/tools/paper_download/arxiv_downloader.py +0 -115
  38. aiagents4pharma/talk2scholars/tools/s2/query_results.py +0 -61
  39. {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.33.0.dist-info}/licenses/LICENSE +0 -0
  40. {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.33.0.dist-info}/top_level.txt +0 -0
@@ -2,249 +2,821 @@
2
2
  Unit tests for question_and_answer tool functionality.
3
3
  """
4
4
 
5
- from langchain.docstore.document import Document
6
- from ..tools.pdf import question_and_answer
7
- from ..tools.pdf.question_and_answer import (
8
- extract_text_from_pdf_data,
9
- question_and_answer_tool,
5
+ import unittest
6
+ from unittest.mock import MagicMock, patch
7
+
8
+ from langchain_core.documents import Document
9
+ from langchain_core.embeddings import Embeddings
10
+
11
+ from aiagents4pharma.talk2scholars.tools.pdf.question_and_answer import (
12
+ Vectorstore,
10
13
  generate_answer,
14
+ question_and_answer,
11
15
  )
12
16
 
13
- # pylint: disable=redefined-outer-name,too-few-public-methods
14
-
15
-
16
- def test_extract_text_from_pdf_data():
17
- """
18
- Test that extract_text_from_pdf_data returns text containing 'Hello World'.
19
- """
20
- extracted_text = extract_text_from_pdf_data(DUMMY_PDF_BYTES)
21
- assert "Hello World" in extracted_text
22
-
23
-
24
- DUMMY_PDF_BYTES = (
25
- b"%PDF-1.4\n"
26
- b"%\xe2\xe3\xcf\xd3\n"
27
- b"1 0 obj\n"
28
- b"<< /Type /Catalog /Pages 2 0 R >>\n"
29
- b"endobj\n"
30
- b"2 0 obj\n"
31
- b"<< /Type /Pages /Count 1 /Kids [3 0 R] >>\n"
32
- b"endobj\n"
33
- b"3 0 obj\n"
34
- b"<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R "
35
- b"/Resources << /Font << /F1 5 0 R >> >> >>\n"
36
- b"endobj\n"
37
- b"4 0 obj\n"
38
- b"<< /Length 44 >>\n"
39
- b"stream\nBT\n/F1 24 Tf\n72 712 Td\n(Hello World) Tj\nET\nendstream\n"
40
- b"endobj\n"
41
- b"5 0 obj\n"
42
- b"<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>\n"
43
- b"endobj\n"
44
- b"xref\n0 6\n0000000000 65535 f \n0000000010 00000 n \n0000000053 00000 n \n"
45
- b"0000000100 00000 n \n0000000150 00000 n \n0000000200 00000 n \n"
46
- b"trailer\n<< /Size 6 /Root 1 0 R >>\nstartxref\n250\n%%EOF\n"
47
- )
48
17
 
18
+ class TestQuestionAndAnswerTool(unittest.TestCase):
19
+ """tests for question_and_answer tool functionality."""
49
20
 
50
- def fake_generate_answer2(question, pdf_url, _text_embedding_model):
51
- """
52
- Fake generate_answer2 function to bypass external dependencies.
53
- """
54
- return {
55
- "answer": "Mock answer",
56
- "question": question,
57
- "pdf_url": pdf_url,
58
- }
59
-
60
-
61
- def test_question_and_answer_tool_success(monkeypatch):
62
- """
63
- Test that question_and_answer_tool returns the expected result on success.
64
- """
65
- # Patch generate_answer2 because the tool calls that.
66
- monkeypatch.setattr(question_and_answer, "generate_answer2", fake_generate_answer2)
67
- dummy_text_embedding_model = object() # Provide a dummy text embedding model.
68
- # Create a valid state with pdf_data and include dummy llm_model and text_embedding_model.
69
- state = {
70
- "pdf_data": {"pdf_object": DUMMY_PDF_BYTES, "pdf_url": "http://dummy.url"},
71
- "llm_model": object(), # Provide a dummy LLM model instance.
72
- "text_embedding_model": dummy_text_embedding_model,
73
- }
74
- question = "What is in the PDF?"
75
- result = question_and_answer_tool.func(
76
- question=question, tool_call_id="test_call_id", state=state
77
- )
78
- assert result["answer"] == "Mock answer"
79
- assert result["question"] == question
80
- assert result["pdf_url"] == "http://dummy.url"
81
-
82
-
83
- def test_question_and_answer_tool_no_pdf_data():
84
- """
85
- Test that an error is returned if the state lacks the 'pdf_data' key.
86
- """
87
- state = {
88
- "text_embedding_model": object(), # Added to avoid KeyError.
89
- }
90
- question = "Any question?"
91
- result = question_and_answer_tool.func(
92
- question=question, tool_call_id="test_call_id", state=state
93
- )
94
- messages = result.update["messages"]
95
- assert any("No pdf_data found in state." in msg.content for msg in messages)
96
-
97
-
98
- def test_question_and_answer_tool_no_pdf_object():
99
- """
100
- Test that an error is returned if the pdf_object is missing within pdf_data.
101
- """
102
- state = {
103
- "pdf_data": {"pdf_object": None},
104
- "text_embedding_model": object(), # Added to avoid KeyError.
105
- "llm_model": object(), # Dummy LLM model.
106
- }
107
- question = "Any question?"
108
- result = question_and_answer_tool.func(
109
- question=question, tool_call_id="test_call_id", state=state
110
- )
111
- messages = result.update["messages"]
112
- assert any(
113
- "PDF binary data is missing in the pdf_data from state." in msg.content
114
- for msg in messages
115
- )
21
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.PyPDFLoader")
22
+ def test_add_paper(self, mock_pypdf_loader):
23
+ """test adding a paper to the vector store."""
24
+ # Mock the PDF loader
25
+ mock_loader = mock_pypdf_loader.return_value
26
+ mock_loader.load.return_value = [Document(page_content="Page content")]
116
27
 
28
+ # Mock embedding model
29
+ mock_embedding_model = MagicMock(spec=Embeddings)
117
30
 
118
- def test_question_and_answer_tool_no_llm_model():
119
- """
120
- Test that an error is returned if the LLM model is missing in the state.
121
- """
122
- state = {
123
- "pdf_data": {"pdf_object": DUMMY_PDF_BYTES, "pdf_url": "http://dummy.url"},
124
- "text_embedding_model": object(), # Added to avoid KeyError.
125
- # llm_model is intentionally omitted.
126
- }
127
- question = "What is in the PDF?"
128
- result = question_and_answer_tool.func(
129
- question=question, tool_call_id="test_call_id", state=state
130
- )
131
- assert result == {"error": "No LLM model found in state."}
31
+ # Initialize Vectorstore
32
+ vector_store = Vectorstore(embedding_model=mock_embedding_model)
132
33
 
34
+ # Add a paper
35
+ vector_store.add_paper(
36
+ paper_id="test_paper",
37
+ pdf_url="http://example.com/test.pdf",
38
+ paper_metadata={"Title": "Test Paper"},
39
+ )
133
40
 
134
- def test_generate_answer2_actual(monkeypatch):
135
- """
136
- Test the actual behavior of generate_answer2 using fake dependencies
137
- to exercise its internal logic.
138
- """
41
+ # Check if the paper was added
42
+ self.assertIn("test_paper_0", vector_store.documents)
139
43
 
140
- # Create a fake PyPDFLoader that does not perform a network call.
141
- class FakePyPDFLoader:
142
- """class to fake PyPDFLoader"""
44
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.PyPDFLoader")
45
+ def test_add_paper_already_loaded(self, mock_pypdf_loader):
46
+ """Test that adding a paper that is already loaded does not re-load or add new documents."""
47
+ # Mock the PDF loader (it should not be used when the paper is already loaded)
48
+ mock_loader = mock_pypdf_loader.return_value
49
+ mock_loader.load.return_value = [Document(page_content="Page content")]
143
50
 
144
- def __init__(self, file_path, headers=None):
145
- """Initialize the fake PyPDFLoader."""
146
- self.file_path = file_path
147
- self.headers = headers
51
+ # Mock embedding model
52
+ mock_embedding_model = MagicMock(spec=Embeddings)
148
53
 
149
- def lazy_load(self):
150
- """Return a list with one fake Document."""
151
- # Return a list with one fake Document.
152
- return [Document(page_content="Answer for Test question?")]
54
+ # Initialize Vectorstore
55
+ vector_store = Vectorstore(embedding_model=mock_embedding_model)
153
56
 
154
- monkeypatch.setattr(question_and_answer, "PyPDFLoader", FakePyPDFLoader)
57
+ # Simulate the paper already being loaded.
58
+ vector_store.loaded_papers.add("test_paper")
59
+ # Capture the initial state of documents (should be empty)
60
+ initial_documents = dict(vector_store.documents)
155
61
 
156
- # Create a fake vector store that returns a controlled result for similarity_search.
157
- class FakeVectorStore:
158
- """Fake vector store for similarity search."""
62
+ # Attempt to add the same paper again.
63
+ vector_store.add_paper(
64
+ paper_id="test_paper",
65
+ pdf_url="http://example.com/test.pdf",
66
+ paper_metadata={"Title": "Test Paper"},
67
+ )
159
68
 
160
- def similarity_search(self, query):
161
- """Return a list with one Document containing our expected answer."""
162
- # Return a list with one Document containing our expected answer.
163
- return [Document(page_content=f"Answer for {query}")]
69
+ # Verify that no new paper was added by checking:
70
+ # 1. The loaded papers set remains unchanged.
71
+ self.assertEqual(vector_store.loaded_papers, {"test_paper"})
72
+ # 2. The documents dictionary remains unchanged.
73
+ self.assertEqual(vector_store.documents, initial_documents)
74
+ # 3. The PDF loader was not called at all.
75
+ mock_loader.load.assert_not_called()
164
76
 
165
- monkeypatch.setattr(
166
- question_and_answer.InMemoryVectorStore,
167
- "from_documents",
168
- lambda docs, emb: FakeVectorStore(),
169
- )
77
+ def test_build_vector_store(self):
78
+ """test building the vector store."""
79
+ # Mock embedding model
80
+ mock_embedding_model = MagicMock(spec=Embeddings)
170
81
 
171
- # Provide a dummy text embedding model.
172
- dummy_text_embedding_model = object()
173
- question = "Test question?"
174
- pdf_url = "http://dummy.pdf"
82
+ # Initialize Vectorstore
83
+ vector_store = Vectorstore(embedding_model=mock_embedding_model)
175
84
 
176
- # Call generate_answer2 without triggering an actual network call.
177
- result = question_and_answer.generate_answer2(
178
- question, pdf_url, dummy_text_embedding_model
179
- )
180
- # The function should join the page content from the similarity search.
181
- expected = "Answer for Test question?"
182
- assert result == expected
85
+ # Add a mock document
86
+ vector_store.documents["test_doc"] = Document(page_content="Test content")
183
87
 
88
+ # Mock the embed_documents method to return a list of embeddings
89
+ mock_embedding_model.embed_documents.return_value = [[0.1, 0.2, 0.3]]
184
90
 
185
- def test_generate_answer(monkeypatch):
186
- """
187
- Test generate_answer function with controlled monkeypatched dependencies.
188
- """
91
+ # Build vector store
92
+ vector_store.build_vector_store()
189
93
 
190
- def fake_split_text(_self, _text):
191
- """Fake split_text method that returns controlled chunks."""
192
- return ["chunk1", "chunk2"]
94
+ # Check if the vector store is built
95
+ self.assertIsNotNone(vector_store.vector_store)
193
96
 
194
- monkeypatch.setattr(
195
- question_and_answer.CharacterTextSplitter, "split_text", fake_split_text
97
+ @patch(
98
+ "aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.load_hydra_config"
196
99
  )
100
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.NVIDIARerank")
101
+ def test_rank_papers_by_query(self, mock_nvidia_rerank, mock_load_config):
102
+ """test ranking papers by query."""
103
+ # Create a mock config object with attributes
104
+ mock_config = MagicMock()
105
+ mock_config.reranker.model = "nvidia/llama-3.2-nv-rerankqa-1b-v2"
106
+ mock_config.reranker.api_key = "dummy_api_key"
107
+
108
+ # Patch load_hydra_config to return the mock config object
109
+ mock_load_config.return_value = mock_config
110
+
111
+ # Mock the re-ranker instance.
112
+ mock_reranker = mock_nvidia_rerank.return_value
113
+ mock_reranker.compress_documents.return_value = [
114
+ Document(
115
+ page_content="Aggregated content", metadata={"paper_id": "test_paper"}
116
+ )
117
+ ]
118
+
119
+ # Create a mock embedding model.
120
+ mock_embedding_model = MagicMock(spec=Embeddings)
121
+
122
+ # Initialize Vectorstore.
123
+ vector_store = Vectorstore(embedding_model=mock_embedding_model)
124
+
125
+ # Add a mock document.
126
+ vector_store.documents["test_doc"] = Document(
127
+ page_content="Test content", metadata={"paper_id": "test_paper"}
128
+ )
129
+
130
+ # Rank papers.
131
+ ranked_papers = vector_store.rank_papers_by_query(query="test query")
132
+
133
+ # Check if the ranking is correct (updated expectation: a list of paper IDs)
134
+ self.assertEqual(ranked_papers[0], "test_paper")
135
+
136
+ @patch(
137
+ "aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.maximal_marginal_relevance"
138
+ )
139
+ def test_retrieve_relevant_chunks(self, mock_mmr):
140
+ """Test retrieving relevant chunks without filters."""
141
+ mock_mmr.return_value = [0]
142
+ mock_embedding_model = MagicMock(spec=Embeddings)
143
+ mock_embedding_model.embed_query.return_value = [0.1, 0.2, 0.3]
144
+ mock_embedding_model.embed_documents.return_value = [[0.1, 0.2, 0.3]]
145
+
146
+ vector_store = Vectorstore(embedding_model=mock_embedding_model)
147
+ vector_store.vector_store = True
148
+ vector_store.documents["test_doc"] = Document(
149
+ page_content="Test content", metadata={"paper_id": "test_paper"}
150
+ )
151
+
152
+ results = vector_store.retrieve_relevant_chunks(query="test query")
153
+ assert len(results) == 1
154
+ assert results[0].metadata["paper_id"] == "test_paper"
155
+
156
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.BaseChatModel")
157
+ def test_generate_answer(self, mock_base_chat_model):
158
+ """test generating an answer."""
159
+ # Mock the language model
160
+ mock_llm = mock_base_chat_model.return_value
161
+ mock_llm.invoke.return_value.content = "Generated answer"
162
+
163
+ # Create a mock document
164
+ mock_document = Document(
165
+ page_content="Test content", metadata={"paper_id": "test_paper"}
166
+ )
167
+
168
+ # Generate answer
169
+ result = generate_answer(
170
+ question="What is the test?",
171
+ retrieved_chunks=[mock_document],
172
+ llm_model=mock_llm,
173
+ )
174
+
175
+ # Check if the answer is generated correctly
176
+ self.assertEqual(result["output_text"], "Generated answer")
177
+
178
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.PyPDFLoader")
179
+ def test_add_paper_exception_handling(self, mock_pypdf_loader):
180
+ """Test exception handling when adding a paper."""
181
+ # Mock the PDF loader to raise an exception.
182
+ mock_loader = mock_pypdf_loader.return_value
183
+ mock_loader.load.side_effect = Exception("Loading error")
184
+
185
+ # Mock embedding model.
186
+ mock_embedding_model = MagicMock(spec=Embeddings)
187
+
188
+ # Initialize Vectorstore.
189
+ vector_store = Vectorstore(embedding_model=mock_embedding_model)
190
+
191
+ # Attempt to add a paper and expect an exception.
192
+ with self.assertRaises(Exception) as context:
193
+ vector_store.add_paper(
194
+ paper_id="test_paper",
195
+ pdf_url="http://example.com/test.pdf",
196
+ paper_metadata={"Title": "Test Paper"},
197
+ )
198
+
199
+ # Verify that the exception message is as expected.
200
+ self.assertEqual(str(context.exception), "Loading error")
201
+
202
+ def test_build_vector_store_no_documents(self):
203
+ """Test building vector store with no documents results in an unchanged vector_store."""
204
+ # Mock embedding model
205
+ mock_embedding_model = MagicMock(spec=Embeddings)
206
+
207
+ # Initialize Vectorstore without adding any documents
208
+ vector_store = Vectorstore(embedding_model=mock_embedding_model)
209
+
210
+ # Attempt to build vector store
211
+ vector_store.build_vector_store()
212
+
213
+ # Instead of checking log output, check that vector_store remains None (or unchanged)
214
+ self.assertIsNone(vector_store.vector_store)
215
+
216
+ def test_build_vector_store_already_built(self):
217
+ """Test that calling build_vector_store when
218
+ it is already built does not change the store."""
219
+ # Mock embedding model
220
+ mock_embedding_model = MagicMock(spec=Embeddings)
221
+
222
+ # Initialize Vectorstore
223
+ vector_store = Vectorstore(embedding_model=mock_embedding_model)
224
+
225
+ # Add a mock document
226
+ vector_store.documents["test_doc"] = Document(page_content="Test content")
227
+
228
+ # Mock the embed_documents method to return a list of embeddings
229
+ mock_embedding_model.embed_documents.return_value = [[0.1, 0.2, 0.3]]
230
+
231
+ # Build vector store once
232
+ vector_store.build_vector_store()
233
+ first_build = vector_store.vector_store
234
+
235
+ # Attempt to build vector store again
236
+ vector_store.build_vector_store()
237
+
238
+ # Check that the vector store remains unchanged (i.e. same object/state)
239
+ self.assertEqual(vector_store.vector_store, first_build)
240
+
241
+ def test_retrieve_relevant_chunks_vector_store_not_built(self):
242
+ """Test retrieving relevant chunks when the vector store is not built."""
243
+ # Mock embedding model
244
+ mock_embedding_model = MagicMock(spec=Embeddings)
245
+
246
+ # Initialize Vectorstore without adding any documents
247
+ vector_store = Vectorstore(embedding_model=mock_embedding_model)
248
+
249
+ # Attempt to retrieve relevant chunks (vector_store.vector_store is None)
250
+ result = vector_store.retrieve_relevant_chunks(query="test query")
251
+
252
+ # Verify that an empty list is returned since the vector store is not built.
253
+ self.assertEqual(result, [])
254
+
255
+ def test_retrieve_relevant_chunks_with_paper_ids(self):
256
+ """Test retrieving relevant chunks with specific paper_ids when the store is not built."""
257
+ # Mock embedding model
258
+ mock_embedding_model = MagicMock(spec=Embeddings)
259
+ # Mock embed_documents method to return embeddings of fixed length
260
+ mock_embedding_model.embed_documents.return_value = [MagicMock()] * 2
261
+
262
+ # Initialize Vectorstore and add documents
263
+ vector_store = Vectorstore(embedding_model=mock_embedding_model)
264
+ vector_store.documents = {
265
+ "doc1": Document(page_content="content1", metadata={"paper_id": "paper1"}),
266
+ "doc2": Document(page_content="content2", metadata={"paper_id": "paper2"}),
267
+ }
268
+
269
+ # Leave vector_store.vector_store as None to trigger the branch that returns an empty list
270
+ vector_store.vector_store = None
271
+
272
+ # Call retrieve_relevant_chunks with specific paper_ids
273
+ paper_ids = ["paper1"]
274
+ result = vector_store.retrieve_relevant_chunks(
275
+ query="test query", paper_ids=paper_ids
276
+ )
197
277
 
198
- def fake_annoy_from_documents(_documents, _embeddings):
199
- """
200
- Fake Annoy.from_documents function that returns a fake vector store.
201
- """
202
-
203
- # pylint: disable=too-few-public-methods, unused-argument
204
- class FakeVectorStore:
205
- """Fake vector store for similarity search."""
206
-
207
- def similarity_search(self, _question, k):
208
- """Return a list with a single dummy Document."""
209
- return [Document(page_content="dummy content")]
210
-
211
- return FakeVectorStore()
278
+ # Verify that an empty list is returned since the vector store is not built.
279
+ self.assertEqual(result, [])
212
280
 
213
- monkeypatch.setattr(
214
- question_and_answer.Annoy, "from_documents", fake_annoy_from_documents
281
+ @patch(
282
+ "aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.generate_answer"
215
283
  )
216
-
217
- def fake_load_qa_chain(_llm, chain_type): # chain_type matches the keyword argument
284
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.Vectorstore")
285
+ def test_question_and_answer_success(self, mock_vectorstore, mock_generate_answer):
286
+ """test the main functionality of the question_and_answer tool."""
287
+ # Create a dummy document to simulate a retrieved chunk
288
+ dummy_doc = Document(
289
+ page_content="Dummy content",
290
+ metadata={"paper_id": "paper1", "title": "Paper One", "page": 1},
291
+ )
292
+
293
+ # Configure generate_answer to return a dummy answer result
294
+ mock_generate_answer.return_value = {
295
+ "output_text": "Test Answer",
296
+ "papers_used": ["paper1"],
297
+ }
298
+
299
+ # Create a dummy embedding model
300
+ dummy_embedding_model = MagicMock(spec=Embeddings)
301
+
302
+ # Create a dummy vector store and simulate that it is already built and has the paper loaded
303
+ dummy_vector_store = Vectorstore(embedding_model=dummy_embedding_model)
304
+ dummy_vector_store.vector_store = (
305
+ True # Simulate that the vector store is built
306
+ )
307
+ dummy_vector_store.loaded_papers.add("paper1")
308
+ dummy_vector_store.retrieve_relevant_chunks = MagicMock(
309
+ return_value=[dummy_doc]
310
+ )
311
+ # Return our dummy vector store when Vectorstore() is instantiated
312
+ mock_vectorstore.return_value = dummy_vector_store
313
+
314
+ # Create a dummy LLM model
315
+ dummy_llm_model = MagicMock()
316
+
317
+ # Construct the state with required keys
318
+ state = {
319
+ "article_data": {
320
+ "paper1": {
321
+ "pdf_url": "http://example.com/paper1.pdf",
322
+ "Title": "Paper One",
323
+ }
324
+ },
325
+ "text_embedding_model": dummy_embedding_model,
326
+ "llm_model": dummy_llm_model,
327
+ "vector_store": dummy_vector_store,
328
+ }
329
+
330
+ input_data = {
331
+ "question": "What is the content?",
332
+ "paper_ids": ["paper1"],
333
+ "use_all_papers": False,
334
+ "tool_call_id": "test_tool_call",
335
+ "state": state,
336
+ }
337
+ result = question_and_answer.run(input_data)
338
+
339
+ # Verify that generate_answer was called with expected arguments
340
+ mock_generate_answer.assert_called_once()
341
+ args, _ = mock_generate_answer.call_args
342
+ self.assertEqual(args[0], "What is the content?")
343
+ self.assertEqual(args[2], dummy_llm_model)
344
+
345
+ # Verify the final response content and tool_call_id in the returned Command
346
+ response_message = result.update["messages"][0]
347
+ expected_output = "Test Answer\n\nSources:\n- Paper One"
348
+ self.assertEqual(response_message.content, expected_output)
349
+ self.assertEqual(response_message.tool_call_id, "test_tool_call")
350
+
351
+ @patch(
352
+ "aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.generate_answer"
353
+ )
354
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.Vectorstore")
355
+ def test_question_and_answer_semantic_branch(
356
+ self, mock_vectorstore, mock_generate_answer
357
+ ):
358
+ """test the semantic ranking branch of the question_and_answer tool."""
359
+ # Create a dummy document to simulate a retrieved chunk from semantic ranking
360
+ dummy_doc = Document(
361
+ page_content="Semantic chunk",
362
+ metadata={"paper_id": "paper_sem", "title": "Paper Semantic", "page": 2},
363
+ )
364
+
365
+ # Configure generate_answer to return a dummy answer result
366
+ mock_generate_answer.return_value = {
367
+ "output_text": "Semantic Answer",
368
+ "papers_used": ["paper_sem"],
369
+ }
370
+
371
+ # Create a dummy Vectorstore instance to simulate the semantic branch behavior
372
+ dummy_vs = MagicMock()
373
+ # Initially, no papers are loaded
374
+ dummy_vs.loaded_papers = set()
375
+ # Explicitly set vector_store to None so that the build_vector_store branch is taken
376
+ dummy_vs.vector_store = None
377
+ # When build_vector_store is called, simulate that the vector store is built
378
+ dummy_vs.build_vector_store.side_effect = lambda: setattr(
379
+ dummy_vs, "vector_store", True
380
+ )
381
+ # Simulate ranking: return a single paper id with score as a tuple for unpacking
382
+ dummy_vs.rank_papers_by_query.return_value = [("paper_sem", 1.0)]
383
+ # Simulate retrieval: return our dummy document
384
+ dummy_vs.retrieve_relevant_chunks.return_value = [dummy_doc]
385
+ # Ensure add_paper is available (it may be called more than once)
386
+ dummy_vs.add_paper.return_value = None
387
+
388
+ # When the tool instantiates Vectorstore, return our dummy instance
389
+ mock_vectorstore.return_value = dummy_vs
390
+
391
+ # Create dummy embedding and LLM models
392
+ dummy_embedding_model = MagicMock(spec=Embeddings)
393
+ dummy_llm_model = MagicMock()
394
+
395
+ # Construct the state WITHOUT a vector_store to force creation,
396
+ # and without explicit paper_ids so the semantic branch is taken.
397
+ state = {
398
+ "article_data": {
399
+ "paper_sem": {
400
+ "pdf_url": "http://example.com/paper_sem.pdf",
401
+ "Title": "Paper Semantic",
402
+ }
403
+ },
404
+ "text_embedding_model": dummy_embedding_model,
405
+ "llm_model": dummy_llm_model,
406
+ # Note: "vector_store" key is omitted intentionally
407
+ }
408
+
409
+ input_data = {
410
+ "question": "What is semantic content?",
411
+ "paper_ids": None,
412
+ "use_all_papers": False,
413
+ "tool_call_id": "test_semantic_tool_call",
414
+ "state": state,
415
+ }
416
+ result = question_and_answer.run(input_data)
417
+
418
+ # Instead of checking that 'vector_store' was added to the original state dict,
419
+ # verify that a new vector store was created by checking that Vectorstore was instantiated.
420
+ mock_vectorstore.assert_called_once_with(embedding_model=dummy_embedding_model)
421
+
422
+ # Check that add_paper was called at least once (semantic branch should load the paper)
423
+ self.assertTrue(dummy_vs.add_paper.call_count >= 1)
424
+
425
+ # Verify that build_vector_store was called to set up the store
426
+ dummy_vs.build_vector_store.assert_called()
427
+
428
+ # Verify that rank_papers_by_query was called with the expected question and top_k=3
429
+ dummy_vs.rank_papers_by_query.assert_called_with(
430
+ "What is semantic content?", top_k=40
431
+ )
432
+
433
+ # Verify that retrieve_relevant_chunks was called with the selected paper id.
434
+ dummy_vs.retrieve_relevant_chunks.assert_called_with(
435
+ query="What is semantic content?", paper_ids=["paper_sem"], top_k=25
436
+ )
437
+
438
+ # Verify that generate_answer was called with the expected arguments
439
+ mock_generate_answer.assert_called_once()
440
+ args, _ = mock_generate_answer.call_args
441
+ self.assertEqual(args[0], "What is semantic content?")
442
+ self.assertEqual(args[2], dummy_llm_model)
443
+
444
+ # Verify that the final response message is correctly
445
+ # formatted with answer and source attribution
446
+ response_message = result.update["messages"][0]
447
+ expected_output = "Semantic Answer\n\nSources:\n- Paper Semantic"
448
+ self.assertEqual(response_message.content, expected_output)
449
+ self.assertEqual(response_message.tool_call_id, "test_semantic_tool_call")
450
+
451
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.Vectorstore")
452
+ def test_question_and_answer_fallback_no_relevant_chunks(self, mock_vectorstore):
453
+ """Test the fallback branch of the question_and_answer
454
+ tool when no relevant chunks are found."""
455
+ # Create a dummy Vectorstore instance to simulate fallback and error conditions.
456
+ dummy_vs = MagicMock()
457
+ # Ensure no papers are loaded initially.
458
+ dummy_vs.loaded_papers = set()
459
+ # Simulate that the vector store is not built.
460
+ dummy_vs.vector_store = None
461
+ # Simulate ranking returning an empty list to force the fallback branch.
462
+ dummy_vs.rank_papers_by_query.return_value = []
463
+ # In the "load selected papers" loop, simulate that add_paper raises an exception.
464
+ dummy_vs.add_paper.side_effect = IOError("Test error")
465
+ # When build_vector_store is called, simulate setting the vector store.
466
+ dummy_vs.build_vector_store.side_effect = lambda: setattr(
467
+ dummy_vs, "vector_store", True
468
+ )
469
+ # Simulate retrieval returning an empty list so that a RuntimeError is raised.
470
+ dummy_vs.retrieve_relevant_chunks.return_value = []
471
+ mock_vectorstore.return_value = dummy_vs
472
+
473
+ # Create dummy embedding and LLM models.
474
+ dummy_embedding_model = MagicMock(spec=Embeddings)
475
+ dummy_llm_model = MagicMock()
476
+
477
+ # Construct state with article_data containing one paper.
478
+ state = {
479
+ "article_data": {
480
+ "paper1": {
481
+ "pdf_url": "http://example.com/paper1.pdf",
482
+ "Title": "Paper One",
483
+ }
484
+ },
485
+ "text_embedding_model": dummy_embedding_model,
486
+ "llm_model": dummy_llm_model,
487
+ # "vector_store" key is omitted intentionally to force creation.
488
+ }
489
+
490
+ input_data = {
491
+ "question": "What is fallback test?",
492
+ # Provide paper_ids that do not match article_data so that the
493
+ # fallback branch is triggered.
494
+ "paper_ids": ["nonexistent"],
495
+ "use_all_papers": False,
496
+ "tool_call_id": "test_fallback_call",
497
+ "state": state,
498
+ }
499
+
500
+ with self.assertRaises(RuntimeError) as context:
501
+ question_and_answer.run(input_data)
502
+
503
+ # Verify that build_vector_store was called to ensure the store is built.
504
+ dummy_vs.build_vector_store.assert_called()
505
+
506
+ # Verify that the RuntimeError contains the expected error message.
507
+ self.assertIn(
508
+ "I couldn't find relevant information to answer your question",
509
+ str(context.exception),
510
+ )
511
+
512
+ @patch(
513
+ "aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.generate_answer"
514
+ )
515
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.Vectorstore")
516
+ def test_question_and_answer_use_all_papers(
517
+ self, mock_vectorstore, mock_generate_answer
518
+ ):
519
+ """test the use_all_papers branch of the question_and_answer tool."""
520
+ # Test the branch where use_all_papers is True.
521
+ # Create a dummy document for retrieval.
522
+ dummy_doc = Document(
523
+ page_content="Content from all papers branch",
524
+ metadata={"paper_id": "paper_all", "title": "Paper All", "page": 1},
525
+ )
526
+ # Configure generate_answer to return a dummy answer.
527
+ mock_generate_answer.return_value = {
528
+ "output_text": "Answer from all papers",
529
+ "papers_used": ["paper_all"],
530
+ }
531
+
532
+ # Create a dummy vector store that is already built and already loaded with the paper.
533
+ dummy_vs = MagicMock()
534
+ dummy_vs.vector_store = True
535
+ # Simulate that the paper is already loaded.
536
+ dummy_vs.loaded_papers = {"paper_all"}
537
+ # Simulate retrieval returning the dummy document.
538
+ dummy_vs.retrieve_relevant_chunks.return_value = [dummy_doc]
539
+ # No add_paper call should be needed.
540
+ dummy_vs.add_paper.return_value = None
541
+ # Return our dummy vector store when Vectorstore() is instantiated
542
+ mock_vectorstore.return_value = dummy_vs
543
+
544
+ # Construct state with article_data containing one paper and an existing vector_store.
545
+ dummy_embedding_model = MagicMock(spec=Embeddings)
546
+ dummy_llm_model = MagicMock()
547
+ state = {
548
+ "article_data": {
549
+ "paper_all": {
550
+ "pdf_url": "http://example.com/paper_all.pdf",
551
+ "Title": "Paper All",
552
+ }
553
+ },
554
+ "text_embedding_model": dummy_embedding_model,
555
+ "llm_model": dummy_llm_model,
556
+ "vector_store": dummy_vs, # Existing vector store
557
+ }
558
+
559
+ input_data = {
560
+ "question": "What is the content from all papers?",
561
+ "paper_ids": None,
562
+ "use_all_papers": True,
563
+ "tool_call_id": "test_use_all_papers",
564
+ "state": state,
565
+ }
566
+ result = question_and_answer.run(input_data)
567
+
568
+ # Verify that the use_all_papers branch was
569
+ # taken by checking that all article keys were selected.
570
+ # (This is logged; here we indirectly verify
571
+ # that generate_answer was called with the dummy_llm_model.)
572
+ mock_generate_answer.assert_called_once()
573
+ args, _ = mock_generate_answer.call_args
574
+ self.assertEqual(args[0], "What is the content from all papers?")
575
+ self.assertEqual(args[2], dummy_llm_model)
576
+
577
+ # Verify that the final response message includes the answer and source attribution.
578
+ response_message = result.update["messages"][0]
579
+ expected_output = "Answer from all papers\n\nSources:\n- Paper All"
580
+ self.assertEqual(response_message.content, expected_output)
581
+ self.assertEqual(response_message.tool_call_id, "test_use_all_papers")
582
+
583
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.Vectorstore")
584
+ def test_question_and_answer_add_paper_exception(self, mock_vectorstore):
585
+ """test exception handling when add_paper fails."""
586
+ # Test that in the semantic ranking branch, if add_paper raises an exception,
587
+ # the error is logged and then re-raised.
588
+ dummy_vs = MagicMock()
589
+ # No papers are loaded.
590
+ dummy_vs.loaded_papers = set()
591
+ # Simulate that the vector store is not built.
592
+ dummy_vs.vector_store = None
593
+ # In the semantic branch, when trying to load the paper, add_paper will raise an exception.
594
+ dummy_vs.add_paper.side_effect = IOError("Add paper failure")
595
+ # Simulate that build_vector_store would set the store
596
+ # (if reached, but it won't in this test).
597
+ dummy_vs.build_vector_store.side_effect = lambda: setattr(
598
+ dummy_vs, "vector_store", True
599
+ )
600
+ # Ensure retrieval is never reached because add_paper fails.
601
+ dummy_vs.retrieve_relevant_chunks.return_value = []
602
+ mock_vectorstore.return_value = dummy_vs
603
+
604
+ dummy_embedding_model = MagicMock(spec=Embeddings)
605
+ dummy_llm_model = MagicMock()
606
+ # Construct state with article_data containing one paper.
607
+ state = {
608
+ "article_data": {
609
+ "paper_err": {
610
+ "pdf_url": "http://example.com/paper_err.pdf",
611
+ "Title": "Paper Error",
612
+ }
613
+ },
614
+ "text_embedding_model": dummy_embedding_model,
615
+ "llm_model": dummy_llm_model,
616
+ # No vector_store key provided to force creation of a new one.
617
+ }
618
+
619
+ # Use paper_ids=None and use_all_papers=False to trigger semantic ranking branch.
620
+ input_data = {
621
+ "question": "What happens when add_paper fails?",
622
+ "paper_ids": None,
623
+ "use_all_papers": False,
624
+ "tool_call_id": "test_add_paper_exception",
625
+ "state": state,
626
+ }
627
+ with self.assertRaises(IOError) as context:
628
+ question_and_answer.run(input_data)
629
+ self.assertIn("Add paper failure", str(context.exception))
630
+
631
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.PyPDFLoader")
632
+ def test_additional_metadata_field_added(self, mock_pypdf_loader):
633
+ """test that additional metadata fields are added correctly."""
634
+ # Setup the PDF loader to return a single document with empty metadata
635
+ mock_loader = mock_pypdf_loader.return_value
636
+ mock_loader.load.return_value = [
637
+ Document(page_content="Test content", metadata={})
638
+ ]
639
+
640
+ # Create a dummy embedding model
641
+ dummy_embedding_model = MagicMock(spec=Embeddings)
642
+
643
+ # Define custom metadata fields including an additional field "custom_field"
644
+ custom_fields = ["title", "paper_id", "page", "chunk_id", "custom_field"]
645
+ vector_store = Vectorstore(
646
+ embedding_model=dummy_embedding_model, metadata_fields=custom_fields
647
+ )
648
+
649
+ # Paper metadata includes "Title" (for default title) and the additional "custom_field"
650
+ paper_metadata = {"Title": "Test Paper", "custom_field": "custom_value"}
651
+
652
+ # Call add_paper to process the document and add metadata
653
+ vector_store.add_paper(
654
+ paper_id="test_paper",
655
+ pdf_url="http://example.com/test.pdf",
656
+ paper_metadata=paper_metadata,
657
+ )
658
+
659
+ # Verify that the document was added with the custom field included in its metadata
660
+ self.assertIn("test_paper_0", vector_store.documents)
661
+ added_doc = vector_store.documents["test_paper_0"]
662
+ self.assertEqual(added_doc.metadata.get("custom_field"), "custom_value")
663
+
664
+ @patch(
665
+ "aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.load_hydra_config"
666
+ )
667
+ def test_generate_answer_missing_config_fields(self, mock_load_config):
668
+ """test that generate_answer raises ValueError for missing config fields."""
669
+ # Create a dummy document and dummy LLM model
670
+ dummy_doc = Document(
671
+ page_content="Test content", metadata={"paper_id": "test_paper"}
672
+ )
673
+ dummy_llm_model = MagicMock()
674
+
675
+ # Case 1: Configuration is None, expect a ValueError
676
+ mock_load_config.return_value = None
677
+ with self.assertRaises(ValueError) as context_none:
678
+ generate_answer("What is the test?", [dummy_doc], dummy_llm_model)
679
+ self.assertEqual(
680
+ str(context_none.exception), "Hydra config loading failed: config is None."
681
+ )
682
+
683
+ # Case 2: Configuration missing 'prompt_template', expect a ValueError
684
+ mock_load_config.return_value = {}
685
+ with self.assertRaises(ValueError) as context_missing:
686
+ generate_answer("What is the test?", [dummy_doc], dummy_llm_model)
687
+ self.assertEqual(
688
+ str(context_missing.exception),
689
+ "The prompt_template is missing from the configuration.",
690
+ )
691
+
692
+
693
+ class TestMissingState(unittest.TestCase):
694
+ """Test error when missing from state."""
695
+
696
+ def test_missing_text_embedding_model(self):
697
+ """Test error when text_embedding_model is missing from state."""
698
+ state = {
699
+ # Missing text_embedding_model
700
+ "llm_model": MagicMock(),
701
+ "article_data": {
702
+ "paper1": {
703
+ "pdf_url": "http://example.com/test.pdf",
704
+ "Title": "Test Paper",
705
+ }
706
+ },
707
+ }
708
+ tool_call_id = "test_call_2"
709
+ question = "What is the conclusion?"
710
+ tool_input = {
711
+ "question": question,
712
+ "tool_call_id": tool_call_id,
713
+ "state": state,
714
+ }
715
+ with self.assertRaises(ValueError) as context:
716
+ question_and_answer.run(tool_input)
717
+ self.assertEqual(
718
+ str(context.exception), "No text embedding model found in state."
719
+ )
720
+
721
+ def test_missing_llm_model(self):
722
+ """Test error when llm_model is missing from state."""
723
+ state = {
724
+ "text_embedding_model": MagicMock(),
725
+ # Missing llm_model
726
+ "article_data": {
727
+ "paper1": {
728
+ "pdf_url": "http://example.com/test.pdf",
729
+ "Title": "Test Paper",
730
+ }
731
+ },
732
+ }
733
+ tool_call_id = "test_call_3"
734
+ question = "What is the conclusion?"
735
+ tool_input = {
736
+ "question": question,
737
+ "tool_call_id": tool_call_id,
738
+ "state": state,
739
+ }
740
+ with self.assertRaises(ValueError) as context:
741
+ question_and_answer.run(tool_input)
742
+ self.assertEqual(str(context.exception), "No LLM model found in state.")
743
+
744
+ def test_missing_article_data(self):
745
+ """Test error when article_data is missing from state."""
746
+ state = {
747
+ "text_embedding_model": MagicMock(),
748
+ "llm_model": MagicMock(),
749
+ # Missing article_data
750
+ }
751
+ tool_call_id = "test_call_4"
752
+ question = "What is the conclusion?"
753
+ tool_input = {
754
+ "question": question,
755
+ "tool_call_id": tool_call_id,
756
+ "state": state,
757
+ }
758
+ with self.assertRaises(ValueError) as context:
759
+ question_and_answer.run(tool_input)
760
+ self.assertEqual(str(context.exception), "No article_data found in state.")
761
+
762
+ def test_empty_article_data(self):
218
763
  """
219
- Fake load_qa_chain function that returns a fake QA chain.
764
+ Test that when article_data exists but is empty (no paper keys), a ValueError is raised.
220
765
  """
221
-
222
- # pylint: disable=too-few-public-methods, unused-argument
223
- class FakeChain:
224
- """Fake QA chain for testing generate_answer."""
225
-
226
- def invoke(self, **kwargs):
227
- """
228
- Fake invoke method that returns a mock answer.
229
- """
230
- input_data = kwargs.get("input")
231
- return {
232
- "answer": "real mock answer",
233
- "question": input_data.get("question"),
234
- }
235
-
236
- return FakeChain()
237
-
238
- monkeypatch.setattr(question_and_answer, "load_qa_chain", fake_load_qa_chain)
239
- # Set dummy configuration values so that generate_answer can run.
240
- question_and_answer.cfg.chunk_size = 1000
241
- question_and_answer.cfg.chunk_overlap = 0
242
- question_and_answer.cfg.openai_api_key = "dummy_key"
243
- question_and_answer.cfg.num_retrievals = 1
244
- question_and_answer.cfg.qa_chain_type = "dummy-chain"
245
-
246
- question = "What is in the PDF?"
247
- dummy_llm_model = object() # A dummy model placeholder.
248
- answer = generate_answer(question, DUMMY_PDF_BYTES, dummy_llm_model)
249
- assert answer["answer"] == "real mock answer"
250
- assert answer["question"] == question
766
+ state = {
767
+ "text_embedding_model": MagicMock(),
768
+ "llm_model": MagicMock(),
769
+ "article_data": {}, # empty dict
770
+ }
771
+ tool_call_id = "test_empty_article_data"
772
+ question = "What is the summary?"
773
+ tool_input = {
774
+ "question": question,
775
+ "tool_call_id": tool_call_id,
776
+ "state": state,
777
+ }
778
+ with self.assertRaises(ValueError) as context:
779
+ question_and_answer.run(tool_input)
780
+ self.assertEqual(str(context.exception), "No article_data found in state.")
781
+
782
+ @patch(
783
+ "aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.maximal_marginal_relevance"
784
+ )
785
+ def test_retrieve_relevant_chunks_with_filtering(self, mock_mmr):
786
+ """Test that filtering works by paper_ids."""
787
+ mock_mmr.return_value = [0]
788
+ dummy_embedding = [0.1, 0.2, 0.3]
789
+
790
+ mock_embedding_model = MagicMock(spec=Embeddings)
791
+ mock_embedding_model.embed_query.return_value = dummy_embedding
792
+ mock_embedding_model.embed_documents.return_value = [dummy_embedding]
793
+
794
+ vector_store = Vectorstore(embedding_model=mock_embedding_model)
795
+ vector_store.vector_store = True
796
+ doc1 = Document(page_content="Doc 1", metadata={"paper_id": "paper1"})
797
+ doc2 = Document(page_content="Doc 2", metadata={"paper_id": "paper2"})
798
+ vector_store.documents = {"doc1": doc1, "doc2": doc2}
799
+
800
+ results = vector_store.retrieve_relevant_chunks(
801
+ query="query", paper_ids=["paper1"]
802
+ )
803
+ assert len(results) == 1
804
+ assert results[0].metadata["paper_id"] == "paper1"
805
+
806
+ def test_retrieve_relevant_chunks_no_matching_docs(self):
807
+ """Ensure it returns empty list and logs warning if no docs match."""
808
+ mock_embedding_model = MagicMock(spec=Embeddings)
809
+ mock_embedding_model.embed_query.return_value = [0.1, 0.2, 0.3]
810
+ mock_embedding_model.embed_documents.return_value = []
811
+
812
+ vector_store = Vectorstore(embedding_model=mock_embedding_model)
813
+ vector_store.vector_store = True
814
+ # Add doc with paper_id that won't match
815
+ vector_store.documents["doc1"] = Document(
816
+ page_content="No match", metadata={"paper_id": "unmatched_paper"}
817
+ )
818
+
819
+ results = vector_store.retrieve_relevant_chunks(
820
+ query="test", paper_ids=["nonexistent_id"]
821
+ )
822
+ assert results == []