aiagents4pharma 1.40.0__py3-none-any.whl → 1.41.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +4 -0
  2. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml +44 -4
  3. aiagents4pharma/talk2scholars/tests/test_nvidia_nim_reranker.py +127 -0
  4. aiagents4pharma/talk2scholars/tests/test_pdf_answer_formatter.py +66 -0
  5. aiagents4pharma/talk2scholars/tests/test_pdf_batch_processor.py +101 -0
  6. aiagents4pharma/talk2scholars/tests/test_pdf_collection_manager.py +150 -0
  7. aiagents4pharma/talk2scholars/tests/test_pdf_document_processor.py +69 -0
  8. aiagents4pharma/talk2scholars/tests/test_pdf_generate_answer.py +75 -0
  9. aiagents4pharma/talk2scholars/tests/test_pdf_gpu_detection.py +140 -0
  10. aiagents4pharma/talk2scholars/tests/test_pdf_paper_loader.py +116 -0
  11. aiagents4pharma/talk2scholars/tests/test_pdf_rag_pipeline.py +98 -0
  12. aiagents4pharma/talk2scholars/tests/test_pdf_retrieve_chunks.py +197 -0
  13. aiagents4pharma/talk2scholars/tests/test_pdf_singleton_manager.py +156 -0
  14. aiagents4pharma/talk2scholars/tests/test_pdf_vector_normalization.py +121 -0
  15. aiagents4pharma/talk2scholars/tests/test_pdf_vector_store.py +434 -0
  16. aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +89 -509
  17. aiagents4pharma/talk2scholars/tests/test_tool_helper_utils.py +34 -89
  18. aiagents4pharma/talk2scholars/tools/paper_download/download_biorxiv_input.py +8 -6
  19. aiagents4pharma/talk2scholars/tools/paper_download/download_medrxiv_input.py +6 -4
  20. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +74 -40
  21. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +26 -1
  22. aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +62 -0
  23. aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +200 -0
  24. aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +172 -0
  25. aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +76 -0
  26. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +14 -14
  27. aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +63 -0
  28. aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +154 -0
  29. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +60 -40
  30. aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +123 -0
  31. aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +122 -0
  32. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +162 -40
  33. aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +140 -0
  34. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +40 -78
  35. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +159 -0
  36. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +277 -96
  37. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +12 -9
  38. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +0 -1
  39. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +9 -8
  40. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +5 -5
  41. {aiagents4pharma-1.40.0.dist-info → aiagents4pharma-1.41.0.dist-info}/METADATA +27 -115
  42. {aiagents4pharma-1.40.0.dist-info → aiagents4pharma-1.41.0.dist-info}/RECORD +45 -23
  43. aiagents4pharma/talk2scholars/tests/test_nvidia_nim_reranker_utils.py +0 -28
  44. {aiagents4pharma-1.40.0.dist-info → aiagents4pharma-1.41.0.dist-info}/WHEEL +0 -0
  45. {aiagents4pharma-1.40.0.dist-info → aiagents4pharma-1.41.0.dist-info}/licenses/LICENSE +0 -0
  46. {aiagents4pharma-1.40.0.dist-info → aiagents4pharma-1.41.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,140 @@
1
+ """gpu detection and index configuration tests."""
2
+
3
+ import subprocess
4
+ from types import SimpleNamespace
5
+ from unittest.mock import MagicMock, patch
6
+
7
+
8
+ from aiagents4pharma.talk2scholars.tools.pdf.utils.gpu_detection import (
9
+ detect_nvidia_gpu,
10
+ get_optimal_index_config,
11
+ log_index_configuration,
12
+ )
13
+
14
+ # === detect_nvidia_gpu ===
15
+
16
+
17
+ def test_detect_nvidia_gpu_force_cpu_from_config():
18
+ """detect_nvidia_gpu should return False if force_cpu_mode is set."""
19
+ config = SimpleNamespace(gpu_detection=SimpleNamespace(force_cpu_mode=True))
20
+ assert detect_nvidia_gpu(config) is False
21
+
22
+
23
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.gpu_detection.subprocess.run")
24
+ def test_detect_nvidia_gpu_success(mock_run):
25
+ """detect_nvidia_gpu should return True if NVIDIA GPUs are detected."""
26
+ mock_run.return_value = MagicMock(
27
+ returncode=0, stdout="NVIDIA A100\nNVIDIA RTX 3090"
28
+ )
29
+
30
+ assert detect_nvidia_gpu() is True
31
+ mock_run.assert_called_once()
32
+
33
+
34
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.gpu_detection.subprocess.run")
35
+ def test_detect_nvidia_gpu_no_output(mock_run):
36
+ """detect_nvidia_gpu should return False if no GPUs are detected."""
37
+ mock_run.return_value = MagicMock(returncode=0, stdout="")
38
+
39
+ assert detect_nvidia_gpu() is False
40
+
41
+
42
+ # === get_optimal_index_config ===
43
+
44
+
45
+ def test_get_optimal_index_config_gpu():
46
+ """get_optimal_index_config should return GPU_CAGRA for GPU setup."""
47
+ index_params, search_params = get_optimal_index_config(
48
+ has_gpu=True, embedding_dim=768
49
+ )
50
+
51
+ assert index_params["index_type"] == "GPU_CAGRA"
52
+ assert "cache_dataset_on_device" in index_params["params"]
53
+ assert search_params["params"]["search_width"] == 16
54
+
55
+
56
+ def test_get_optimal_index_config_cpu():
57
+ """get_optimal_index_config should return IVF_FLAT for CPU setup."""
58
+ index_params, search_params = get_optimal_index_config(
59
+ has_gpu=False, embedding_dim=768
60
+ )
61
+
62
+ assert index_params["index_type"] == "IVF_FLAT"
63
+ assert index_params["params"]["nlist"] == 96 # 768 / 8 = 96
64
+ assert search_params["params"]["nprobe"] == 16
65
+
66
+
67
+ # === log_index_configuration ===
68
+
69
+
70
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.gpu_detection.logger")
71
+ def test_log_index_configuration_logs_all(mock_logger):
72
+ """log_index_configuration should log all parameters correctly."""
73
+ index_params = {
74
+ "index_type": "IVF_FLAT",
75
+ "metric_type": "COSINE",
76
+ "params": {"nlist": 128},
77
+ }
78
+ search_params = {"metric_type": "COSINE", "params": {"nprobe": 16}}
79
+
80
+ log_index_configuration(index_params, search_params)
81
+
82
+ assert mock_logger.info.call_count >= 5
83
+
84
+
85
+ def test_get_optimal_index_config_gpu_without_cosine():
86
+ """Ensure GPU config defaults to IP when use_cosine is False."""
87
+ index_params, search_params = get_optimal_index_config(
88
+ has_gpu=True, embedding_dim=768, use_cosine=False
89
+ )
90
+
91
+ assert index_params["index_type"] == "GPU_CAGRA"
92
+ assert index_params["metric_type"] == "IP"
93
+ assert search_params["metric_type"] == "IP"
94
+
95
+
96
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.gpu_detection.logger")
97
+ def test_log_index_configuration_logs_cosine_simulation_note(mock_logger):
98
+ """Test GPU_CAGRA COSINE -> IP note is logged properly."""
99
+ index_params = {
100
+ "index_type": "GPU_CAGRA",
101
+ "metric_type": "IP",
102
+ "params": {"itopk_size": 128},
103
+ }
104
+ search_params = {
105
+ "metric_type": "IP",
106
+ "params": {"search_width": 16},
107
+ }
108
+
109
+ log_index_configuration(index_params, search_params, use_cosine=True)
110
+
111
+ log_messages = [str(call.args[0]) for call in mock_logger.info.call_args_list]
112
+ assert any("simulate COSINE for GPU" in msg for msg in log_messages)
113
+
114
+
115
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.gpu_detection.logger")
116
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.gpu_detection.subprocess.run")
117
+ def test_detect_nvidia_gpu_timeout_raises_false(mock_run, mock_logger):
118
+ """detect_nvidia_gpu should return False and log info on subprocess.TimeoutExpired."""
119
+ # Simulate a timeout
120
+ mock_run.side_effect = subprocess.TimeoutExpired(cmd="nvidia-smi", timeout=10)
121
+
122
+ result = detect_nvidia_gpu()
123
+ assert result is False
124
+ mock_logger.info.assert_called_with(
125
+ "NVIDIA GPU detection failed: %s", mock_run.side_effect
126
+ )
127
+
128
+
129
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.gpu_detection.logger")
130
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.gpu_detection.subprocess.run")
131
+ def test_detect_nvidia_gpu_file_not_found_raises_false(mock_run, mock_logger):
132
+ """detect_nvidia_gpu should return False and log info on FileNotFoundError."""
133
+ # Simulate nvidia-smi not installed
134
+ mock_run.side_effect = FileNotFoundError("nvidia-smi not found")
135
+
136
+ result = detect_nvidia_gpu()
137
+ assert result is False
138
+ mock_logger.info.assert_called_with(
139
+ "NVIDIA GPU detection failed: %s", mock_run.side_effect
140
+ )
@@ -0,0 +1,116 @@
1
+ """paper_loader tests for the load_all_papers function."""
2
+
3
+ from unittest.mock import MagicMock, patch
4
+
5
+ import pytest
6
+
7
+ from aiagents4pharma.talk2scholars.tools.pdf.utils.paper_loader import (
8
+ load_all_papers,
9
+ )
10
+
11
+
12
+ @pytest.fixture
13
+ def articles():
14
+ """A fixture to provide a sample articles dictionary."""
15
+ return {
16
+ "p1": {"pdf_url": "http://example.com/p1.pdf", "title": "Paper 1"},
17
+ "p2": {"pdf_url": "http://example.com/p2.pdf", "title": "Paper 2"},
18
+ "p3": {"title": "No PDF paper"},
19
+ }
20
+
21
+
22
+ @pytest.fixture
23
+ def mock_vector_store():
24
+ """Mock vector store fixture."""
25
+ return MagicMock(
26
+ loaded_papers={"p1"},
27
+ paper_metadata={},
28
+ documents={},
29
+ metadata_fields=["title"],
30
+ config={"embedding_batch_size": 1234},
31
+ has_gpu=False,
32
+ vector_store=MagicMock(),
33
+ )
34
+
35
+
36
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.paper_loader.add_papers_batch")
37
+ def test_all_papers_loaded_returns_early(mock_batch, request):
38
+ """Test early return when all papers are already loaded."""
39
+ article_data = request.getfixturevalue("articles")
40
+ vector_store = request.getfixturevalue("mock_vector_store")
41
+ vector_store.loaded_papers = set(article_data.keys())
42
+
43
+ load_all_papers(
44
+ vector_store=vector_store,
45
+ articles=article_data,
46
+ call_id="test_call",
47
+ config={"embedding_batch_size": 1000},
48
+ has_gpu=False,
49
+ )
50
+
51
+ mock_batch.assert_not_called()
52
+
53
+
54
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.paper_loader.add_papers_batch")
55
+ def test_skips_papers_without_pdf(mock_batch, request):
56
+ """Test that papers without PDF URLs are skipped."""
57
+ article_data = request.getfixturevalue("articles")
58
+ vector_store = request.getfixturevalue("mock_vector_store")
59
+ vector_store.loaded_papers = {"p2"} # p1 not loaded, p3 has no pdf
60
+
61
+ load_all_papers(
62
+ vector_store=vector_store,
63
+ articles=article_data,
64
+ call_id="test_call",
65
+ config={"embedding_batch_size": 1000},
66
+ has_gpu=False,
67
+ )
68
+
69
+ assert mock_batch.call_count == 1
70
+ call_args = mock_batch.call_args[1]["papers_to_add"]
71
+ assert len(call_args) == 1
72
+ assert call_args[0][0] == "p1"
73
+
74
+
75
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.paper_loader.add_papers_batch")
76
+ def test_gpu_parameters_used(mock_batch, request):
77
+ """Test GPU-based parameters are used if has_gpu is True."""
78
+ article_data = request.getfixturevalue("articles")
79
+ vector_store = request.getfixturevalue("mock_vector_store")
80
+ vector_store.loaded_papers = set()
81
+ vector_store.has_gpu = True
82
+
83
+ load_all_papers(
84
+ vector_store=vector_store,
85
+ articles=article_data,
86
+ call_id="gpu_call",
87
+ config={"embedding_batch_size": 2048},
88
+ has_gpu=True,
89
+ )
90
+
91
+ args = mock_batch.call_args[1]
92
+ assert args["has_gpu"] is True
93
+ assert args["batch_size"] == 2048
94
+ assert args["max_workers"] >= 4
95
+
96
+
97
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.paper_loader.add_papers_batch")
98
+ def test_cpu_parameters_used(mock_batch, request):
99
+ """Test CPU-based parameters are used if has_gpu is False."""
100
+ article_data = request.getfixturevalue("articles")
101
+ vector_store = request.getfixturevalue("mock_vector_store")
102
+ vector_store.loaded_papers = set()
103
+ vector_store.has_gpu = False
104
+
105
+ load_all_papers(
106
+ vector_store=vector_store,
107
+ articles=article_data,
108
+ call_id="cpu_call",
109
+ config={"embedding_batch_size": 512},
110
+ has_gpu=False,
111
+ )
112
+
113
+ args = mock_batch.call_args[1]
114
+ assert args["has_gpu"] is False
115
+ assert args["batch_size"] == 512
116
+ assert args["max_workers"] >= 3
@@ -0,0 +1,98 @@
1
+ """pdf rag pipeline tests."""
2
+
3
+ from unittest.mock import MagicMock, patch
4
+ import pytest
5
+ from langchain_core.documents import Document
6
+
7
+ from aiagents4pharma.talk2scholars.tools.pdf.utils.rag_pipeline import (
8
+ retrieve_and_rerank_chunks,
9
+ )
10
+
11
+
12
+ @pytest.fixture(name="base_config_fixture")
13
+ def _base_config_fixture():
14
+ """Provides a config-like object for testing."""
15
+ config = MagicMock()
16
+ config.get.side_effect = lambda key, default=None: {
17
+ "initial_retrieval_k": 120,
18
+ "mmr_diversity": 0.7,
19
+ }.get(key, default)
20
+ config.top_k_chunks = 5
21
+ return config
22
+
23
+
24
+ @pytest.fixture(name="mock_docs_fixture")
25
+ def _mock_docs_fixture():
26
+ """Simulates PDF document chunks."""
27
+ return [
28
+ Document(page_content=f"chunk {i}", metadata={"paper_id": f"P{i % 2}"})
29
+ for i in range(10)
30
+ ]
31
+
32
+
33
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.rag_pipeline.rerank_chunks")
34
+ @patch(
35
+ "aiagents4pharma.talk2scholars.tools.pdf.utils.rag_pipeline.retrieve_relevant_chunks"
36
+ )
37
+ def test_rag_pipeline_gpu_path(
38
+ mock_retrieve, mock_rerank, base_config_fixture, mock_docs_fixture
39
+ ):
40
+ """test RAG pipeline with GPU path."""
41
+ mock_retrieve.return_value = mock_docs_fixture
42
+ mock_rerank.return_value = mock_docs_fixture[:5]
43
+
44
+ result = retrieve_and_rerank_chunks(
45
+ vector_store=MagicMock(),
46
+ query="Explain AI.",
47
+ config=base_config_fixture,
48
+ call_id="gpu_test",
49
+ has_gpu=True,
50
+ )
51
+
52
+ assert result == mock_docs_fixture[:5]
53
+ mock_retrieve.assert_called_once()
54
+ mock_rerank.assert_called_once()
55
+
56
+
57
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.rag_pipeline.rerank_chunks")
58
+ @patch(
59
+ "aiagents4pharma.talk2scholars.tools.pdf.utils.rag_pipeline.retrieve_relevant_chunks"
60
+ )
61
+ def test_rag_pipeline_cpu_path(
62
+ mock_retrieve, mock_rerank, base_config_fixture, mock_docs_fixture
63
+ ):
64
+ """rag pipeline with CPU path."""
65
+ mock_retrieve.return_value = mock_docs_fixture
66
+ mock_rerank.return_value = mock_docs_fixture[:5]
67
+
68
+ result = retrieve_and_rerank_chunks(
69
+ vector_store=MagicMock(),
70
+ query="Explain quantum physics.",
71
+ config=base_config_fixture,
72
+ call_id="cpu_test",
73
+ has_gpu=False,
74
+ )
75
+
76
+ assert result == mock_docs_fixture[:5]
77
+ mock_retrieve.assert_called_once()
78
+ mock_rerank.assert_called_once()
79
+
80
+
81
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.rag_pipeline.rerank_chunks")
82
+ @patch(
83
+ "aiagents4pharma.talk2scholars.tools.pdf.utils.rag_pipeline.retrieve_relevant_chunks"
84
+ )
85
+ def test_rag_pipeline_empty_results(mock_retrieve, mock_rerank, base_config_fixture):
86
+ """rag pipeline with no results."""
87
+ mock_retrieve.return_value = []
88
+
89
+ result = retrieve_and_rerank_chunks(
90
+ vector_store=MagicMock(),
91
+ query="No match?",
92
+ config=base_config_fixture,
93
+ call_id="empty_test",
94
+ has_gpu=False,
95
+ )
96
+
97
+ assert result == []
98
+ mock_rerank.assert_not_called()
@@ -0,0 +1,197 @@
1
+ """retrieve_chunks for PDF tool tests"""
2
+
3
+ from unittest.mock import MagicMock, patch
4
+
5
+ import pytest
6
+ from langchain_core.documents import Document
7
+
8
+ from aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks import (
9
+ retrieve_relevant_chunks,
10
+ retrieve_relevant_chunks_with_scores,
11
+ )
12
+
13
+
14
+ @pytest.fixture
15
+ def mock_vector_store():
16
+ """Fixture to simulate a vector store."""
17
+ return MagicMock()
18
+
19
+
20
+ @pytest.fixture
21
+ def mock_chunks():
22
+ """Fixture to simulate PDF chunks."""
23
+ return [
24
+ Document(page_content=f"chunk {i}", metadata={"paper_id": f"P{i%2}"})
25
+ for i in range(5)
26
+ ]
27
+
28
+
29
+ @pytest.fixture
30
+ def mock_scored_chunks():
31
+ """Fixture to simulate scored PDF chunks."""
32
+ return [
33
+ (Document(page_content=f"chunk {i}", metadata={}), score)
34
+ for i, score in enumerate([0.9, 0.8, 0.4, 0.95])
35
+ ]
36
+
37
+
38
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks.logger")
39
+ def test_retrieve_chunks_cpu_success(mock_logger, request):
40
+ """Test retrieve_relevant_chunks with CPU path."""
41
+ vector_store = request.getfixturevalue("mock_vector_store")
42
+ chunks = request.getfixturevalue("mock_chunks")
43
+ vector_store.has_gpu = False
44
+ mock_logger.debug = MagicMock()
45
+ vector_store.max_marginal_relevance_search.return_value = chunks
46
+
47
+ results = retrieve_relevant_chunks(vector_store, query="AI", top_k=5)
48
+
49
+ assert results == chunks
50
+ vector_store.max_marginal_relevance_search.assert_called_once()
51
+
52
+
53
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks.logger")
54
+ def test_retrieve_chunks_gpu_success(mock_logger, request):
55
+ """Test retrieve_relevant_chunks with GPU path."""
56
+ vector_store = request.getfixturevalue("mock_vector_store")
57
+ chunks = request.getfixturevalue("mock_chunks")
58
+ vector_store.has_gpu = True
59
+ mock_logger.debug = MagicMock()
60
+ vector_store.max_marginal_relevance_search.return_value = chunks
61
+
62
+ results = retrieve_relevant_chunks(vector_store, query="AI", top_k=5)
63
+
64
+ assert results == chunks
65
+ vector_store.max_marginal_relevance_search.assert_called_once()
66
+
67
+
68
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks.logger")
69
+ def test_retrieve_chunks_with_filter(mock_logger, request):
70
+ """Test retrieve_relevant_chunks with paper_id filter."""
71
+ vector_store = request.getfixturevalue("mock_vector_store")
72
+ chunks = request.getfixturevalue("mock_chunks")
73
+ vector_store.has_gpu = False
74
+ mock_logger.debug = MagicMock()
75
+ vector_store.max_marginal_relevance_search.return_value = chunks
76
+
77
+ results = retrieve_relevant_chunks(
78
+ vector_store, query="filter test", paper_ids=["P1"], top_k=3
79
+ )
80
+ assert results == chunks
81
+ args, kwargs = vector_store.max_marginal_relevance_search.call_args
82
+ assert len(args) == 0
83
+ assert kwargs["filter"] == {"paper_id": ["P1"]}
84
+
85
+
86
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks.logger")
87
+ def test_retrieve_chunks_no_vector_store(mock_logger):
88
+ """Test when vector store is None."""
89
+ result = retrieve_relevant_chunks(vector_store=None, query="irrelevant")
90
+ assert result == []
91
+ mock_logger.error.assert_called_with("Vector store is not initialized")
92
+
93
+
94
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks.logger")
95
+ def test_retrieve_chunks_with_scores_no_vector_store(mock_logger):
96
+ """Test retrieve_relevant_chunks_with_scores when vector store is None."""
97
+ result = retrieve_relevant_chunks_with_scores(vector_store=None, query="none")
98
+ assert result == []
99
+ mock_logger.error.assert_called_with("Vector store is not initialized")
100
+
101
+
102
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks.logger")
103
+ def test_retrieve_chunks_default_search_params(mock_logger, request):
104
+ """Test default search params used when not defined."""
105
+ vector_store = request.getfixturevalue("mock_vector_store")
106
+ chunks = request.getfixturevalue("mock_chunks")
107
+ vector_store.has_gpu = False
108
+ delattr(vector_store, "search_params")
109
+ vector_store.max_marginal_relevance_search.return_value = chunks
110
+
111
+ results = retrieve_relevant_chunks(
112
+ vector_store,
113
+ query="default search param test",
114
+ top_k=5,
115
+ )
116
+
117
+ assert results == chunks
118
+ mock_logger.debug.assert_any_call(
119
+ "Using default search parameters (no hardware optimization)"
120
+ )
121
+
122
+
123
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks.logger")
124
+ def test_retrieve_chunks_with_scores_paper_filter(mock_logger, request):
125
+ """Test retrieve_relevant_chunks_with_scores applies paper_id filter."""
126
+ vector_store = request.getfixturevalue("mock_vector_store")
127
+ scored_chunks = request.getfixturevalue("mock_scored_chunks")
128
+ vector_store.similarity_search_with_score.return_value = scored_chunks
129
+ mock_logger.debug = MagicMock()
130
+
131
+ results = retrieve_relevant_chunks_with_scores(
132
+ vector_store=vector_store,
133
+ query="filtered score",
134
+ paper_ids=["P123"],
135
+ top_k=5,
136
+ score_threshold=0.0,
137
+ )
138
+
139
+ assert isinstance(results, list)
140
+ assert vector_store.similarity_search_with_score.call_args[1]["filter"] == {
141
+ "paper_id": ["P123"]
142
+ }
143
+
144
+
145
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks.logger")
146
+ def test_retrieve_chunks_with_scores_gpu_debug(mock_logger, request):
147
+ """Test GPU debug log and correct return in retrieve_relevant_chunks_with_scores."""
148
+ vector_store = request.getfixturevalue("mock_vector_store")
149
+ scored_chunks = request.getfixturevalue("mock_scored_chunks")
150
+ vector_store.has_gpu = True
151
+ vector_store.similarity_search_with_score.return_value = scored_chunks
152
+ mock_logger.debug = MagicMock()
153
+
154
+ results = retrieve_relevant_chunks_with_scores(
155
+ vector_store=vector_store, query="gpu test", top_k=4, score_threshold=0.0
156
+ )
157
+
158
+ # Should return all scored_chunks since threshold=0.0
159
+ assert results == scored_chunks
160
+ mock_logger.debug.assert_called_with("GPU-accelerated similarity search enabled")
161
+
162
+
163
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks.logger")
164
+ def test_retrieve_chunks_with_scores_cpu_debug(mock_logger, request):
165
+ """Test CPU debug log and correct return in retrieve_relevant_chunks_with_scores."""
166
+ vector_store = request.getfixturevalue("mock_vector_store")
167
+ scored_chunks = request.getfixturevalue("mock_scored_chunks")
168
+ vector_store.has_gpu = False
169
+ vector_store.similarity_search_with_score.return_value = scored_chunks
170
+ mock_logger.debug = MagicMock()
171
+
172
+ results = retrieve_relevant_chunks_with_scores(
173
+ vector_store=vector_store, query="cpu test", top_k=2, score_threshold=0.0
174
+ )
175
+
176
+ assert results == scored_chunks
177
+ mock_logger.debug.assert_called_with("Standard CPU similarity search")
178
+
179
+
180
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks.logger")
181
+ def test_retrieve_chunks_with_scores_not_implemented(mock_logger, request):
182
+ """Test NotImplementedError path when similarity_search_with_score is missing."""
183
+ vector_store = request.getfixturevalue("mock_vector_store")
184
+ vector_store.has_gpu = True
185
+ # Remove the method to trigger NotImplementedError
186
+ if hasattr(vector_store, "similarity_search_with_score"):
187
+ delattr(vector_store, "similarity_search_with_score")
188
+ mock_logger.debug = MagicMock()
189
+
190
+ with pytest.raises(NotImplementedError) as excinfo:
191
+ retrieve_relevant_chunks_with_scores(
192
+ vector_store=vector_store, query="fail test", top_k=1, score_threshold=0.0
193
+ )
194
+ assert "Vector store does not support similarity_search_with_score" in str(
195
+ excinfo.value
196
+ )
197
+ mock_logger.debug.assert_called_with("GPU-accelerated similarity search enabled")