aiagents4pharma 1.42.0__py3-none-any.whl → 1.44.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +17 -2
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +618 -413
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +362 -25
- aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +146 -109
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +240 -83
- aiagents4pharma/talk2scholars/agents/paper_download_agent.py +7 -4
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +49 -95
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml +15 -1
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +16 -2
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +40 -5
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +15 -5
- aiagents4pharma/talk2scholars/configs/config.yaml +1 -3
- aiagents4pharma/talk2scholars/configs/tools/paper_download/default.yaml +124 -0
- aiagents4pharma/talk2scholars/tests/test_arxiv_downloader.py +478 -0
- aiagents4pharma/talk2scholars/tests/test_base_paper_downloader.py +620 -0
- aiagents4pharma/talk2scholars/tests/test_biorxiv_downloader.py +697 -0
- aiagents4pharma/talk2scholars/tests/test_medrxiv_downloader.py +534 -0
- aiagents4pharma/talk2scholars/tests/test_paper_download_agent.py +22 -12
- aiagents4pharma/talk2scholars/tests/test_paper_downloader.py +545 -0
- aiagents4pharma/talk2scholars/tests/test_pubmed_downloader.py +1067 -0
- aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +2 -4
- aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +457 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +20 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +209 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +343 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +321 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +198 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +337 -0
- aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +97 -45
- aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +47 -29
- {aiagents4pharma-1.42.0.dist-info → aiagents4pharma-1.44.0.dist-info}/METADATA +3 -1
- {aiagents4pharma-1.42.0.dist-info → aiagents4pharma-1.44.0.dist-info}/RECORD +36 -33
- aiagents4pharma/talk2scholars/configs/tools/download_arxiv_paper/default.yaml +0 -4
- aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/__init__.py +0 -3
- aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/default.yaml +0 -2
- aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/__init__.py +0 -3
- aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/default.yaml +0 -2
- aiagents4pharma/talk2scholars/tests/test_paper_download_biorxiv.py +0 -151
- aiagents4pharma/talk2scholars/tests/test_paper_download_medrxiv.py +0 -151
- aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py +0 -249
- aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +0 -177
- aiagents4pharma/talk2scholars/tools/paper_download/download_biorxiv_input.py +0 -114
- aiagents4pharma/talk2scholars/tools/paper_download/download_medrxiv_input.py +0 -114
- /aiagents4pharma/talk2scholars/configs/tools/{download_arxiv_paper → paper_download}/__init__.py +0 -0
- {aiagents4pharma-1.42.0.dist-info → aiagents4pharma-1.44.0.dist-info}/WHEEL +0 -0
- {aiagents4pharma-1.42.0.dist-info → aiagents4pharma-1.44.0.dist-info}/licenses/LICENSE +0 -0
- {aiagents4pharma-1.42.0.dist-info → aiagents4pharma-1.44.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,545 @@
|
|
1
|
+
"""
|
2
|
+
Unit tests for the unified paper downloader functionality.
|
3
|
+
|
4
|
+
These tests drive coverage through the public surface (factory .create(),
|
5
|
+
tool wrappers, and the tool entry) to exercise the key branches in
|
6
|
+
aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py:
|
7
|
+
- Service selection in create()
|
8
|
+
- Hydra config load with/without cache, and failure path
|
9
|
+
- GlobalHydra clear when already initialized
|
10
|
+
- Service config extraction via OmegaConf, __dict__, items(), and dir() fallback
|
11
|
+
- _apply_config warning path (handled via public create())
|
12
|
+
- Success and both error paths in _download_papers_impl()
|
13
|
+
"""
|
14
|
+
|
15
|
+
import unittest
|
16
|
+
from types import SimpleNamespace
|
17
|
+
from unittest.mock import Mock, patch
|
18
|
+
|
19
|
+
from langchain_core.messages import ToolMessage
|
20
|
+
from langgraph.types import Command
|
21
|
+
|
22
|
+
from aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader import (
|
23
|
+
PaperDownloaderFactory,
|
24
|
+
_download_papers_impl,
|
25
|
+
download_arxiv_papers,
|
26
|
+
download_biorxiv_papers,
|
27
|
+
download_medrxiv_papers,
|
28
|
+
download_papers,
|
29
|
+
download_pubmed_papers,
|
30
|
+
)
|
31
|
+
|
32
|
+
|
33
|
+
# --- tiny helpers to manipulate factory state without protected-access lint ---
|
34
|
+
def _set_cached_config(value):
|
35
|
+
"""set cached config in the factory for testing purposes."""
|
36
|
+
setattr(PaperDownloaderFactory, "_cached_config", value)
|
37
|
+
|
38
|
+
|
39
|
+
def _set_config_lock(lock_obj):
|
40
|
+
"""set the config lock object in the factory for testing purposes."""
|
41
|
+
setattr(PaperDownloaderFactory, "_config_lock", lock_obj)
|
42
|
+
|
43
|
+
|
44
|
+
class PaperDownloaderFactoryTestShim(PaperDownloaderFactory):
|
45
|
+
"""Public shim for manipulating internal cache/lock in tests."""
|
46
|
+
|
47
|
+
__test__ = False # avoid pytest test collection
|
48
|
+
|
49
|
+
|
50
|
+
def _cfg_obj(common_obj, services_map):
|
51
|
+
"""Build a fake hydra cfg structure with tools.paper_download."""
|
52
|
+
tools = SimpleNamespace(
|
53
|
+
paper_download=SimpleNamespace(common=common_obj, services=services_map)
|
54
|
+
)
|
55
|
+
return SimpleNamespace(tools=tools)
|
56
|
+
|
57
|
+
|
58
|
+
class _SlotsSource:
|
59
|
+
"""Object with __slots__ to avoid __dict__, forcing dir() fallback."""
|
60
|
+
|
61
|
+
__slots__ = ("public_attr", "_private")
|
62
|
+
|
63
|
+
def __init__(self, public_val, private_val):
|
64
|
+
"""initialize with public and private attributes."""
|
65
|
+
self.public_attr = public_val
|
66
|
+
self._private = private_val
|
67
|
+
|
68
|
+
# 1st public method
|
69
|
+
def peek(self): # pragma: no cover - used in tiny coverage test
|
70
|
+
"""Return the public value."""
|
71
|
+
return self.public_attr
|
72
|
+
|
73
|
+
# 2nd public method to satisfy R0903
|
74
|
+
def echo(self, value=None): # pragma: no cover - used in tiny coverage test
|
75
|
+
"""Echo given value or the public attribute."""
|
76
|
+
return self.public_attr if value is None else value
|
77
|
+
|
78
|
+
|
79
|
+
class _ItemsNoDict:
|
80
|
+
"""items()-only object (no __dict__) to force items() extraction path."""
|
81
|
+
|
82
|
+
__slots__ = ("_data",)
|
83
|
+
|
84
|
+
def __init__(self, data):
|
85
|
+
"""initialize with a dict-like data structure."""
|
86
|
+
self._data = data
|
87
|
+
|
88
|
+
def items(self):
|
89
|
+
"""implement items() to return the internal data."""
|
90
|
+
return list(self._data.items())
|
91
|
+
|
92
|
+
# add a second public method to satisfy R0903
|
93
|
+
def size(self): # pragma: no cover - simple helper to satisfy pylint
|
94
|
+
"""Return number of keys."""
|
95
|
+
return len(self._data)
|
96
|
+
|
97
|
+
|
98
|
+
class _ExplodingItemsSlots:
|
99
|
+
"""items()-only object (no __dict__) that raises to hit _apply_config warning."""
|
100
|
+
|
101
|
+
__slots__ = ()
|
102
|
+
|
103
|
+
def items(self): # pragma: no cover - we only care that it raises
|
104
|
+
"""implement items() that raises an error to test warning handling."""
|
105
|
+
raise AttributeError("boom in items()")
|
106
|
+
|
107
|
+
# add a second public method to satisfy R0903
|
108
|
+
def noop(self): # pragma: no cover - simple helper to satisfy pylint
|
109
|
+
"""No-op method."""
|
110
|
+
return None
|
111
|
+
|
112
|
+
|
113
|
+
class TestPaperDownloaderFactory(unittest.TestCase):
|
114
|
+
"""Tests for PaperDownloaderFactory behavior via public APIs."""
|
115
|
+
|
116
|
+
def setUp(self):
|
117
|
+
"""setup before each test."""
|
118
|
+
PaperDownloaderFactory.clear_cache()
|
119
|
+
|
120
|
+
def tearDown(self):
|
121
|
+
"""tear down after each test."""
|
122
|
+
PaperDownloaderFactory.clear_cache()
|
123
|
+
|
124
|
+
@patch(
|
125
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader.ArxivDownloader"
|
126
|
+
)
|
127
|
+
@patch("aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader.hydra")
|
128
|
+
@patch(
|
129
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader.GlobalHydra"
|
130
|
+
)
|
131
|
+
def test_create_arxiv_and_cached_config(
|
132
|
+
self, mock_global_hydra, mock_hydra, mock_arxiv
|
133
|
+
):
|
134
|
+
"""First create loads config, second create returns cached config (no re-init)."""
|
135
|
+
# First call: GlobalHydra not initialized
|
136
|
+
mock_global_hydra.return_value.is_initialized.return_value = False
|
137
|
+
# Common via __dict__ path; service via items() path
|
138
|
+
common_obj = SimpleNamespace(request_timeout=15, chunk_size=4096)
|
139
|
+
svc_obj = _ItemsNoDict({"api_url": "https://api", "extra": 1})
|
140
|
+
mock_hydra.compose.return_value = _cfg_obj(common_obj, {"arxiv": svc_obj})
|
141
|
+
|
142
|
+
# Create arxiv
|
143
|
+
result1 = PaperDownloaderFactory.create("arxiv")
|
144
|
+
self.assertIs(result1, mock_arxiv.return_value)
|
145
|
+
mock_arxiv.assert_called_once()
|
146
|
+
passed_cfg = mock_arxiv.call_args[0][0]
|
147
|
+
self.assertTrue(passed_cfg.has_attribute("api_url"))
|
148
|
+
self.assertEqual(passed_cfg.get_config_dict()["request_timeout"], 15)
|
149
|
+
self.assertEqual(passed_cfg.get_config_dict()["chunk_size"], 4096)
|
150
|
+
self.assertEqual(passed_cfg.get_config_dict()["api_url"], "https://api")
|
151
|
+
|
152
|
+
# Second create (cached): hydra.initialize should not be called again
|
153
|
+
mock_hydra.initialize.reset_mock()
|
154
|
+
mock_hydra.compose.reset_mock()
|
155
|
+
PaperDownloaderFactory.create("arxiv")
|
156
|
+
mock_hydra.initialize.assert_not_called()
|
157
|
+
mock_hydra.compose.assert_not_called()
|
158
|
+
|
159
|
+
@patch(
|
160
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader.MedrxivDownloader"
|
161
|
+
)
|
162
|
+
@patch("aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader.hydra")
|
163
|
+
@patch(
|
164
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader.GlobalHydra"
|
165
|
+
)
|
166
|
+
@patch(
|
167
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader.OmegaConf"
|
168
|
+
)
|
169
|
+
def test_create_medrxiv_omegaconf_and_clear_existing(
|
170
|
+
self, mock_omegaconf, mock_global_hydra, mock_hydra, mock_medrxiv
|
171
|
+
):
|
172
|
+
"""When GlobalHydra is initialized, it should clear;
|
173
|
+
OmegaConf extraction should populate fields."""
|
174
|
+
PaperDownloaderFactory.clear_cache()
|
175
|
+
# Force "already initialized" branch
|
176
|
+
mock_global_hydra.return_value.is_initialized.return_value = True
|
177
|
+
|
178
|
+
# OmegaConf conversion for both common and service
|
179
|
+
common_oc = SimpleNamespace(_content=True)
|
180
|
+
svc_oc = SimpleNamespace(_content=True)
|
181
|
+
mock_omegaconf.to_container.side_effect = [
|
182
|
+
{"request_timeout": 20, "chunk_size": 8192},
|
183
|
+
{"api_url": "https://med", "pdf_url_template": "T"},
|
184
|
+
]
|
185
|
+
mock_hydra.compose.return_value = _cfg_obj(common_oc, {"medrxiv": svc_oc})
|
186
|
+
|
187
|
+
PaperDownloaderFactory.create("medrxiv")
|
188
|
+
# GlobalHydra.instance().clear should be called once
|
189
|
+
mock_global_hydra.instance.return_value.clear.assert_called_once()
|
190
|
+
# Verify the config passed
|
191
|
+
cfg = mock_medrxiv.call_args[0][0]
|
192
|
+
cfg_d = cfg.get_config_dict()
|
193
|
+
self.assertEqual(cfg_d["request_timeout"], 20)
|
194
|
+
self.assertEqual(cfg_d["chunk_size"], 8192)
|
195
|
+
self.assertEqual(cfg_d["api_url"], "https://med")
|
196
|
+
self.assertEqual(cfg_d["pdf_url_template"], "T")
|
197
|
+
|
198
|
+
@patch(
|
199
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader.BiorxivDownloader"
|
200
|
+
)
|
201
|
+
@patch("aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader.hydra")
|
202
|
+
@patch(
|
203
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader.GlobalHydra"
|
204
|
+
)
|
205
|
+
def test_create_biorxiv_dir_fallback(
|
206
|
+
self, mock_global_hydra, mock_hydra, mock_biorxiv
|
207
|
+
):
|
208
|
+
"""dir() fallback path with __slots__ object should populate public, skip private."""
|
209
|
+
mock_global_hydra.return_value.is_initialized.return_value = False
|
210
|
+
common_obj = _SlotsSource(public_val=30, private_val="hide")
|
211
|
+
svc_obj = _SlotsSource(public_val="https://biorxiv", private_val="x")
|
212
|
+
mock_hydra.compose.return_value = _cfg_obj(common_obj, {"biorxiv": svc_obj})
|
213
|
+
|
214
|
+
PaperDownloaderFactory.create("biorxiv")
|
215
|
+
cfg = mock_biorxiv.call_args[0][0]
|
216
|
+
cfg_d = cfg.get_config_dict()
|
217
|
+
# Both "public_attr" from common and service appear;
|
218
|
+
# service wins on name clash? not needed, just check present.
|
219
|
+
self.assertIn("public_attr", cfg_d)
|
220
|
+
self.assertEqual(cfg_d["public_attr"], "https://biorxiv")
|
221
|
+
# Ensure private key not present
|
222
|
+
self.assertNotIn("_private", cfg_d)
|
223
|
+
|
224
|
+
@patch(
|
225
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader.PubmedDownloader"
|
226
|
+
)
|
227
|
+
@patch("aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader.hydra")
|
228
|
+
@patch(
|
229
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader.GlobalHydra"
|
230
|
+
)
|
231
|
+
def test_create_pubmed_apply_config_warning_path(
|
232
|
+
self, mock_global_hydra, mock_hydra, mock_pubmed
|
233
|
+
):
|
234
|
+
"""If extraction raises in a path, _apply_config should log a warning and continue."""
|
235
|
+
mock_global_hydra.return_value.is_initialized.return_value = False
|
236
|
+
# First (common) will raise inside _extract_from_items -> warning
|
237
|
+
common_obj = _ExplodingItemsSlots()
|
238
|
+
# Service path is sane to still build config
|
239
|
+
svc_obj = SimpleNamespace(
|
240
|
+
api_url="https://pubmed", request_timeout=55, chunk_size=1024
|
241
|
+
)
|
242
|
+
mock_hydra.compose.return_value = _cfg_obj(common_obj, {"pubmed": svc_obj})
|
243
|
+
|
244
|
+
with patch(
|
245
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader.logger"
|
246
|
+
) as mock_logger:
|
247
|
+
PaperDownloaderFactory.create("pubmed")
|
248
|
+
# Warning logged once for common
|
249
|
+
self.assertTrue(mock_logger.warning.called)
|
250
|
+
|
251
|
+
cfg = mock_pubmed.call_args[0][0]
|
252
|
+
cfg_d = cfg.get_config_dict()
|
253
|
+
self.assertEqual(cfg_d["api_url"], "https://pubmed")
|
254
|
+
self.assertEqual(cfg_d["request_timeout"], 55)
|
255
|
+
self.assertEqual(cfg_d["chunk_size"], 1024)
|
256
|
+
|
257
|
+
@patch("aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader.hydra")
|
258
|
+
@patch(
|
259
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader.GlobalHydra"
|
260
|
+
)
|
261
|
+
def test_create_missing_service_error_message(self, mock_global_hydra, mock_hydra):
|
262
|
+
"""Missing service should raise ValueError with 'Service ... not found' message."""
|
263
|
+
mock_global_hydra.return_value.is_initialized.return_value = False
|
264
|
+
mock_hydra.compose.return_value = _cfg_obj(SimpleNamespace(), {"arxiv": {}})
|
265
|
+
with self.assertRaises(ValueError) as ctx:
|
266
|
+
PaperDownloaderFactory.create("unsupported")
|
267
|
+
self.assertIn(
|
268
|
+
"Service 'unsupported' not found in configuration", str(ctx.exception)
|
269
|
+
)
|
270
|
+
|
271
|
+
@patch("aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader.hydra")
|
272
|
+
@patch(
|
273
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader.GlobalHydra"
|
274
|
+
)
|
275
|
+
def test_get_unified_config_failure_raises_runtimeerror(
|
276
|
+
self, mock_global_hydra, mock_hydra
|
277
|
+
):
|
278
|
+
"""Hydra initialize failure should surface as RuntimeError from create()."""
|
279
|
+
PaperDownloaderFactory.clear_cache()
|
280
|
+
mock_global_hydra.return_value.is_initialized.return_value = False
|
281
|
+
mock_hydra.initialize.side_effect = Exception("Config error")
|
282
|
+
# Using arxiv path to trigger load; patch downloader to avoid import side-effects
|
283
|
+
with patch(
|
284
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader.ArxivDownloader"
|
285
|
+
):
|
286
|
+
with self.assertRaises(RuntimeError):
|
287
|
+
PaperDownloaderFactory.create("arxiv")
|
288
|
+
|
289
|
+
|
290
|
+
class TestDownloadPapersFunction(unittest.TestCase):
|
291
|
+
"""Tests for the download_papers tool and internal impl."""
|
292
|
+
|
293
|
+
@patch(
|
294
|
+
"aiagents4pharma.talk2scholars.tools.paper_download."
|
295
|
+
"paper_downloader.PaperDownloaderFactory.create"
|
296
|
+
)
|
297
|
+
def test_download_papers_success(self, mock_create):
|
298
|
+
"""Successful run returns article data and ToolMessage with summary."""
|
299
|
+
dl = Mock()
|
300
|
+
dl.get_service_name.return_value = "arXiv"
|
301
|
+
dl.process_identifiers.return_value = {
|
302
|
+
"1234.5678": {"Title": "T", "access_type": "open_access_downloaded"}
|
303
|
+
}
|
304
|
+
dl.build_summary.return_value = "Summary OK"
|
305
|
+
mock_create.return_value = dl
|
306
|
+
|
307
|
+
cmd = _download_papers_impl("arxiv", ["1234.5678"], "tid1")
|
308
|
+
self.assertIsInstance(cmd, Command)
|
309
|
+
self.assertIn("1234.5678", cmd.update["article_data"])
|
310
|
+
msg = cmd.update["messages"][0]
|
311
|
+
self.assertIsInstance(msg, ToolMessage)
|
312
|
+
self.assertEqual(msg.tool_call_id, "tid1")
|
313
|
+
self.assertEqual(msg.content, "Summary OK")
|
314
|
+
|
315
|
+
@patch(
|
316
|
+
"aiagents4pharma.talk2scholars.tools.paper_download."
|
317
|
+
"paper_downloader.PaperDownloaderFactory.get_default_service"
|
318
|
+
)
|
319
|
+
@patch(
|
320
|
+
"aiagents4pharma.talk2scholars.tools.paper_download."
|
321
|
+
"paper_downloader.PaperDownloaderFactory.create"
|
322
|
+
)
|
323
|
+
def test_download_papers_none_service_uses_default(self, mock_create, mock_get_default):
|
324
|
+
"""When service=None, should use get_default_service() result."""
|
325
|
+
mock_get_default.return_value = "pubmed"
|
326
|
+
dl = Mock()
|
327
|
+
dl.get_service_name.return_value = "PubMed"
|
328
|
+
dl.process_identifiers.return_value = {
|
329
|
+
"12345": {"Title": "Test", "access_type": "abstract_only"}
|
330
|
+
}
|
331
|
+
dl.build_summary.return_value = "PubMed Summary"
|
332
|
+
mock_create.return_value = dl
|
333
|
+
|
334
|
+
cmd = _download_papers_impl(None, ["12345"], "tid1")
|
335
|
+
|
336
|
+
# Verify default service was requested
|
337
|
+
mock_get_default.assert_called_once()
|
338
|
+
# Verify create was called with the default service
|
339
|
+
mock_create.assert_called_once_with("pubmed")
|
340
|
+
|
341
|
+
self.assertIsInstance(cmd, Command)
|
342
|
+
self.assertIn("12345", cmd.update["article_data"])
|
343
|
+
|
344
|
+
@patch(
|
345
|
+
"aiagents4pharma.talk2scholars.tools.paper_download."
|
346
|
+
"paper_downloader.PaperDownloaderFactory.create"
|
347
|
+
)
|
348
|
+
def test_download_papers_service_error_branch(self, mock_create):
|
349
|
+
"""ValueError from factory becomes a 'Service error' ToolMessage and empty data."""
|
350
|
+
mock_create.side_effect = ValueError("Unsupported service: nope")
|
351
|
+
cmd = _download_papers_impl("nope", ["x"], "tid2")
|
352
|
+
self.assertEqual(cmd.update["article_data"], {})
|
353
|
+
self.assertIn(
|
354
|
+
"Error: Service error for 'nope': Unsupported service: nope",
|
355
|
+
cmd.update["messages"][0].content,
|
356
|
+
)
|
357
|
+
|
358
|
+
@patch(
|
359
|
+
"aiagents4pharma.talk2scholars.tools.paper_download."
|
360
|
+
"paper_downloader.PaperDownloaderFactory.create"
|
361
|
+
)
|
362
|
+
def test_download_papers_unexpected_error_branch(self, mock_create):
|
363
|
+
"""Unexpected error from downloader is caught and surfaced."""
|
364
|
+
dl = Mock()
|
365
|
+
dl.get_service_name.return_value = "arXiv"
|
366
|
+
dl.process_identifiers.side_effect = RuntimeError("kaboom")
|
367
|
+
mock_create.return_value = dl
|
368
|
+
|
369
|
+
cmd = _download_papers_impl("arxiv", ["x"], "tid3")
|
370
|
+
self.assertEqual(cmd.update["article_data"], {})
|
371
|
+
self.assertIn(
|
372
|
+
"Error: Unexpected error during paper download: kaboom",
|
373
|
+
cmd.update["messages"][0].content,
|
374
|
+
)
|
375
|
+
|
376
|
+
@patch(
|
377
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader._download_papers_impl"
|
378
|
+
)
|
379
|
+
def test_convenience_wrappers(self, mock_impl):
|
380
|
+
"""The convenience functions forward to the core impl with the right service string."""
|
381
|
+
mock_impl.return_value = Command(update={"ok": True})
|
382
|
+
download_arxiv_papers(["a"], "tc1")
|
383
|
+
mock_impl.assert_called_with("arxiv", ["a"], "tc1")
|
384
|
+
download_medrxiv_papers(["b"], "tc2")
|
385
|
+
mock_impl.assert_called_with("medrxiv", ["b"], "tc2")
|
386
|
+
download_biorxiv_papers(["c"], "tc3")
|
387
|
+
mock_impl.assert_called_with("biorxiv", ["c"], "tc3")
|
388
|
+
download_pubmed_papers(["d"], "tc4")
|
389
|
+
mock_impl.assert_called_with("pubmed", ["d"], "tc4")
|
390
|
+
|
391
|
+
@patch(
|
392
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader._download_papers_impl"
|
393
|
+
)
|
394
|
+
def test_tool_entry(self, mock_impl):
|
395
|
+
"""The download_papers tool entry should call the core impl."""
|
396
|
+
mock_impl.return_value = Command(update={"ok": True})
|
397
|
+
payload = {"service": "arxiv", "identifiers": ["123"], "tool_call_id": "tid"}
|
398
|
+
result = download_papers.invoke(payload)
|
399
|
+
mock_impl.assert_called_once_with("arxiv", ["123"], "tid")
|
400
|
+
self.assertTrue(result.update["ok"])
|
401
|
+
|
402
|
+
@patch("aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader.hydra")
|
403
|
+
@patch(
|
404
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader.GlobalHydra"
|
405
|
+
)
|
406
|
+
def test_get_default_service_functionality(self, mock_global_hydra, mock_hydra):
|
407
|
+
"""Test get_default_service method with various configurations."""
|
408
|
+
mock_global_hydra.return_value.is_initialized.return_value = False
|
409
|
+
|
410
|
+
# Test default service from config
|
411
|
+
common_cfg = SimpleNamespace(request_timeout=30, chunk_size=8192)
|
412
|
+
services = {
|
413
|
+
"arxiv": SimpleNamespace(api_url="https://arxiv.org"),
|
414
|
+
"pubmed": SimpleNamespace(id_converter_url="https://pmc.ncbi.nlm.nih.gov")
|
415
|
+
}
|
416
|
+
tool_cfg = SimpleNamespace(default_service="arxiv")
|
417
|
+
mock_hydra.compose.return_value = SimpleNamespace(
|
418
|
+
tools=SimpleNamespace(
|
419
|
+
paper_download=SimpleNamespace(
|
420
|
+
tool=tool_cfg, common=common_cfg, services=services
|
421
|
+
)
|
422
|
+
)
|
423
|
+
)
|
424
|
+
|
425
|
+
# Clear cache to ensure fresh config load
|
426
|
+
PaperDownloaderFactory.clear_cache()
|
427
|
+
result = PaperDownloaderFactory.get_default_service()
|
428
|
+
self.assertEqual(result, "arxiv")
|
429
|
+
|
430
|
+
# Test invalid default service fallback
|
431
|
+
tool_cfg.default_service = "invalid_service"
|
432
|
+
PaperDownloaderFactory.clear_cache()
|
433
|
+
result = PaperDownloaderFactory.get_default_service()
|
434
|
+
self.assertEqual(result, "pubmed") # Should fallback to pubmed
|
435
|
+
|
436
|
+
# Test missing default service (fallback to pubmed)
|
437
|
+
mock_hydra.compose.return_value = SimpleNamespace(
|
438
|
+
tools=SimpleNamespace(
|
439
|
+
paper_download=SimpleNamespace(
|
440
|
+
tool=SimpleNamespace(), common=common_cfg, services=services
|
441
|
+
)
|
442
|
+
)
|
443
|
+
)
|
444
|
+
PaperDownloaderFactory.clear_cache()
|
445
|
+
result = PaperDownloaderFactory.get_default_service()
|
446
|
+
self.assertEqual(result, "pubmed")
|
447
|
+
|
448
|
+
# Test medrxiv default service
|
449
|
+
tool_cfg.default_service = "medrxiv"
|
450
|
+
mock_hydra.compose.return_value = SimpleNamespace(
|
451
|
+
tools=SimpleNamespace(
|
452
|
+
paper_download=SimpleNamespace(
|
453
|
+
tool=tool_cfg, common=common_cfg, services=services
|
454
|
+
)
|
455
|
+
)
|
456
|
+
)
|
457
|
+
PaperDownloaderFactory.clear_cache()
|
458
|
+
result = PaperDownloaderFactory.get_default_service()
|
459
|
+
self.assertEqual(result, "medrxiv")
|
460
|
+
|
461
|
+
# Test biorxiv default service
|
462
|
+
tool_cfg.default_service = "biorxiv"
|
463
|
+
PaperDownloaderFactory.clear_cache()
|
464
|
+
result = PaperDownloaderFactory.get_default_service()
|
465
|
+
self.assertEqual(result, "biorxiv")
|
466
|
+
|
467
|
+
|
468
|
+
class TestUnifiedConfigDoubleCheck(unittest.TestCase):
|
469
|
+
"""Covers the double-check return branch in _get_unified_config."""
|
470
|
+
|
471
|
+
@patch("aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader.hydra")
|
472
|
+
@patch(
|
473
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.paper_downloader.GlobalHydra"
|
474
|
+
)
|
475
|
+
def test_double_check_inside_lock(self, mock_global_hydra, _mock_hydra):
|
476
|
+
"""tests the double-check branch in _get_unified_config using public create()."""
|
477
|
+
# avoid real hydra init path if we accidentally go there
|
478
|
+
mock_global_hydra.return_value.is_initialized.return_value = False
|
479
|
+
|
480
|
+
# start clean
|
481
|
+
_set_cached_config(None)
|
482
|
+
|
483
|
+
class _LockCtx:
|
484
|
+
"""lock context manager that simulates another thread setting the cache."""
|
485
|
+
|
486
|
+
def __enter__(self):
|
487
|
+
# simulate another thread setting the cache while the lock is held
|
488
|
+
_set_cached_config({"via": "enter"})
|
489
|
+
return self
|
490
|
+
|
491
|
+
def __exit__(self, exc_type, exc, tb):
|
492
|
+
"""exit context manager, no-op."""
|
493
|
+
return False
|
494
|
+
|
495
|
+
_set_config_lock(_LockCtx())
|
496
|
+
|
497
|
+
# Patch build_service_config so we can assert the *exact* object returned by double-check
|
498
|
+
with (
|
499
|
+
patch.object(PaperDownloaderFactory, "_build_service_config") as mock_build,
|
500
|
+
patch(
|
501
|
+
"aiagents4pharma.talk2scholars.tools."
|
502
|
+
"paper_download.paper_downloader.ArxivDownloader"
|
503
|
+
) as mock_arxiv,
|
504
|
+
):
|
505
|
+
|
506
|
+
def _check_and_return(cfg, _svc):
|
507
|
+
# ensure double-check returned our injected dict
|
508
|
+
self.assertEqual(cfg, {"via": "enter"})
|
509
|
+
# return a trivial config object for the downloader ctor
|
510
|
+
return SimpleNamespace()
|
511
|
+
|
512
|
+
mock_build.side_effect = _check_and_return
|
513
|
+
|
514
|
+
# call public API; this will invoke the double-check path internally
|
515
|
+
PaperDownloaderFactory.create("arxiv")
|
516
|
+
mock_arxiv.assert_called_once()
|
517
|
+
|
518
|
+
# cleanup
|
519
|
+
_set_cached_config(None)
|
520
|
+
_set_config_lock(None)
|
521
|
+
|
522
|
+
|
523
|
+
class TestHelperTinyCoverage(unittest.TestCase):
|
524
|
+
"""Covers tiny helper methods added to satisfy R0903."""
|
525
|
+
|
526
|
+
def test_slots_source_helpers(self):
|
527
|
+
"""yields public_attr via peek() and echo() methods."""
|
528
|
+
obj = _SlotsSource(public_val="x", private_val="y")
|
529
|
+
self.assertEqual(obj.peek(), "x")
|
530
|
+
self.assertEqual(obj.echo(), "x")
|
531
|
+
self.assertEqual(obj.echo("z"), "z")
|
532
|
+
|
533
|
+
def test_items_no_dict_size(self):
|
534
|
+
"""tests items() and size() methods of _ItemsNoDict."""
|
535
|
+
data = {"a": 1, "b": 2}
|
536
|
+
src = _ItemsNoDict(data)
|
537
|
+
self.assertEqual(src.size(), 2)
|
538
|
+
self.assertEqual(dict(src.items()), data)
|
539
|
+
|
540
|
+
def test_exploding_items_noop(self):
|
541
|
+
"""tests the _ExplodingItemsSlots class that raises in items()."""
|
542
|
+
src = _ExplodingItemsSlots()
|
543
|
+
# we only call the extra public method (noop) for coverage;
|
544
|
+
# .items() is meant to raise in other tests
|
545
|
+
self.assertIsNone(src.noop())
|