aiagents4pharma 1.39.0__py3-none-any.whl → 1.39.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. aiagents4pharma/talk2scholars/agents/main_agent.py +7 -7
  2. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +88 -12
  3. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml +5 -0
  4. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +5 -0
  5. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +1 -20
  6. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +1 -26
  7. aiagents4pharma/talk2scholars/configs/tools/download_arxiv_paper/default.yaml +4 -0
  8. aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/default.yaml +2 -0
  9. aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/default.yaml +2 -0
  10. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml +22 -0
  11. aiagents4pharma/talk2scholars/tests/test_main_agent.py +20 -2
  12. aiagents4pharma/talk2scholars/tests/test_nvidia_nim_reranker_utils.py +28 -0
  13. aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py +107 -29
  14. aiagents4pharma/talk2scholars/tests/test_pdf_agent.py +2 -3
  15. aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +194 -543
  16. aiagents4pharma/talk2scholars/tests/test_s2_agent.py +2 -2
  17. aiagents4pharma/talk2scholars/tests/{test_s2_display.py → test_s2_display_dataframe.py} +2 -3
  18. aiagents4pharma/talk2scholars/tests/test_s2_query_dataframe.py +201 -0
  19. aiagents4pharma/talk2scholars/tests/test_s2_retrieve.py +7 -6
  20. aiagents4pharma/talk2scholars/tests/test_s2_utils_ext_ids.py +413 -0
  21. aiagents4pharma/talk2scholars/tests/test_tool_helper_utils.py +140 -0
  22. aiagents4pharma/talk2scholars/tests/test_zotero_agent.py +0 -1
  23. aiagents4pharma/talk2scholars/tests/test_zotero_read.py +16 -18
  24. aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +92 -37
  25. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +73 -575
  26. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +10 -0
  27. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +97 -0
  28. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +77 -0
  29. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +83 -0
  30. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +125 -0
  31. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +162 -0
  32. aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +33 -10
  33. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +39 -16
  34. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +124 -10
  35. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +49 -17
  36. aiagents4pharma/talk2scholars/tools/s2/search.py +39 -16
  37. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +34 -16
  38. aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +49 -16
  39. aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +51 -16
  40. aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +50 -17
  41. {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/METADATA +58 -105
  42. {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/RECORD +45 -32
  43. aiagents4pharma/talk2scholars/tests/test_llm_main_integration.py +0 -89
  44. aiagents4pharma/talk2scholars/tests/test_routing_logic.py +0 -74
  45. aiagents4pharma/talk2scholars/tests/test_s2_query.py +0 -95
  46. {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/WHEEL +0 -0
  47. {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/licenses/LICENSE +0 -0
  48. {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,6 @@
2
2
  Updated Unit Tests for the S2 agent (Semantic Scholar sub-agent).
3
3
  """
4
4
 
5
- # pylint: disable=redefined-outer-name
6
5
  from unittest import mock
7
6
  import pytest
8
7
  from langchain_core.messages import HumanMessage, AIMessage
@@ -10,7 +9,8 @@ from langchain_openai import ChatOpenAI
10
9
  from ..agents.s2_agent import get_app
11
10
  from ..state.state_talk2scholars import Talk2Scholars
12
11
 
13
- LLM_MODEL = ChatOpenAI(model='gpt-4o-mini', temperature=0)
12
+ LLM_MODEL = ChatOpenAI(model="gpt-4o-mini", temperature=0)
13
+
14
14
 
15
15
  @pytest.fixture(autouse=True)
16
16
  def mock_hydra_fixture():
@@ -2,7 +2,6 @@
2
2
  Unit tests for S2 tools functionality.
3
3
  """
4
4
 
5
- # pylint: disable=redefined-outer-name
6
5
  import pytest
7
6
  from langgraph.types import Command
8
7
  from ..tools.s2.display_dataframe import (
@@ -11,8 +10,8 @@ from ..tools.s2.display_dataframe import (
11
10
  )
12
11
 
13
12
 
14
- @pytest.fixture
15
- def initial_state():
13
+ @pytest.fixture(name="initial_state")
14
+ def initial_state_fixture():
16
15
  """Provides an empty initial state for tests."""
17
16
  return {"papers": {}, "multi_papers": {}}
18
17
 
@@ -0,0 +1,201 @@
1
+ """
2
+ Unit tests for S2 tools functionality.
3
+ """
4
+
5
+ from unittest.mock import MagicMock, patch
6
+
7
+ import pytest
8
+ from langchain_core.messages import ToolMessage
9
+
10
+ from ..tools.s2.query_dataframe import NoPapersFoundError, query_dataframe
11
+
12
+
13
+ @pytest.fixture(name="initial_state")
14
+ def initial_state_fixture():
15
+ """Provides an empty initial state for tests with a dummy llm_model."""
16
+ return {"papers": {}, "multi_papers": {}, "llm_model": MagicMock()}
17
+
18
+
19
+ # Fixed test data for deterministic results
20
+ MOCK_SEARCH_RESPONSE = {
21
+ "data": [
22
+ {
23
+ "paperId": "123",
24
+ "title": "Machine Learning Basics",
25
+ "abstract": "An introduction to ML",
26
+ "year": 2023,
27
+ "citationCount": 100,
28
+ "url": "https://example.com/paper1",
29
+ "authors": [{"name": "Test Author"}],
30
+ }
31
+ ]
32
+ }
33
+
34
+ MOCK_STATE_PAPER = {
35
+ "123": {
36
+ "Title": "Machine Learning Basics",
37
+ "Abstract": "An introduction to ML",
38
+ "Year": 2023,
39
+ "Citation Count": 100,
40
+ "URL": "https://example.com/paper1",
41
+ }
42
+ }
43
+
44
+
45
+ class TestS2Tools:
46
+ """Unit tests for individual S2 tools"""
47
+
48
+ def test_query_dataframe_empty_state(self, initial_state):
49
+ """Tests query_dataframe tool behavior when no papers are found."""
50
+ # Calling without any papers should raise NoPapersFoundError
51
+ tool_input = {
52
+ "question": "List all papers",
53
+ "state": initial_state,
54
+ "tool_call_id": "test_id",
55
+ }
56
+ with pytest.raises(
57
+ NoPapersFoundError,
58
+ match="No papers found. A search needs to be performed first.",
59
+ ):
60
+ query_dataframe.run(tool_input)
61
+
62
+ @patch(
63
+ "aiagents4pharma.talk2scholars.tools.s2.query_dataframe.create_pandas_dataframe_agent"
64
+ )
65
+ def test_query_dataframe_with_papers(self, mock_create_agent, initial_state):
66
+ """Tests querying papers when data is available."""
67
+ state = initial_state.copy()
68
+ state["last_displayed_papers"] = "papers"
69
+ state["papers"] = MOCK_STATE_PAPER
70
+
71
+ # Mock the dataframe agent instead of the LLM
72
+ mock_agent = MagicMock()
73
+ mock_agent.invoke.return_value = {"output": "Mocked response"}
74
+
75
+ mock_create_agent.return_value = (
76
+ mock_agent # Mock the function returning the agent
77
+ )
78
+
79
+ # Ensure that the output of query_dataframe is correctly structured
80
+ # Invoke the tool with a test tool_call_id
81
+ tool_input = {
82
+ "question": "List all papers",
83
+ "state": state,
84
+ "tool_call_id": "test_id",
85
+ }
86
+ result = query_dataframe.run(tool_input)
87
+ # The tool returns a Command with messages
88
+ assert hasattr(result, "update")
89
+ update = result.update
90
+ assert "messages" in update
91
+ msgs = update["messages"]
92
+ assert len(msgs) == 1
93
+ msg = msgs[0]
94
+ assert isinstance(msg, ToolMessage)
95
+ assert msg.content == "Mocked response"
96
+
97
+ @patch(
98
+ "aiagents4pharma.talk2scholars.tools.s2.query_dataframe.create_pandas_dataframe_agent"
99
+ )
100
+ def test_query_dataframe_direct_mapping(self, mock_create_agent, initial_state):
101
+ """Tests query_dataframe when last_displayed_papers is a direct dict mapping."""
102
+ # Prepare state with direct mapping
103
+ state = initial_state.copy()
104
+ state["last_displayed_papers"] = MOCK_STATE_PAPER
105
+ # Mock the dataframe agent
106
+ mock_agent = MagicMock()
107
+ mock_agent.invoke.return_value = {"output": "Direct mapping response"}
108
+ mock_create_agent.return_value = mock_agent
109
+ # Invoke tool
110
+ # Invoke the tool with direct mapping and test tool_call_id
111
+ tool_input = {
112
+ "question": "Filter papers",
113
+ "state": state,
114
+ "tool_call_id": "test_id",
115
+ }
116
+ result = query_dataframe.run(tool_input)
117
+ update = result.update
118
+ assert "messages" in update
119
+ msgs = update["messages"]
120
+ assert len(msgs) == 1
121
+ msg = msgs[0]
122
+ assert isinstance(msg, ToolMessage)
123
+ assert msg.content == "Direct mapping response"
124
+
125
+ def test_query_dataframe_missing_llm(self, initial_state):
126
+ """Test that missing llm_model raises ValueError."""
127
+ # Remove llm_model
128
+ state = {k: v for k, v in initial_state.items() if k != "llm_model"}
129
+ state["last_displayed_papers"] = MOCK_STATE_PAPER
130
+ tool_input = {"question": "Test", "state": state, "tool_call_id": "test_id"}
131
+ with pytest.raises(ValueError) as exc:
132
+ query_dataframe.run(tool_input)
133
+ assert "Missing 'llm_model' in state." in str(exc.value)
134
+
135
+ def test_query_dataframe_invalid_mapping(self, initial_state):
136
+ """Test that invalid last_displayed_papers mapping raises ValueError."""
137
+ # Provide invalid mapping key
138
+ state = initial_state.copy()
139
+ state["last_displayed_papers"] = "nonexistent_key"
140
+ # llm_model present
141
+ tool_input = {"question": "Test", "state": state, "tool_call_id": "test_id"}
142
+ with pytest.raises(ValueError) as exc:
143
+ query_dataframe.run(tool_input)
144
+ assert "Could not resolve a valid metadata dictionary" in str(exc.value)
145
+
146
+ @patch(
147
+ "aiagents4pharma.talk2scholars.tools.s2.query_dataframe.create_pandas_dataframe_agent"
148
+ )
149
+ def test_query_dataframe_extract_ids(self, mock_create_agent):
150
+ """Test extract_ids returns the raw list or single element correctly."""
151
+ # Prepare state with fake paper_ids column
152
+ state = {"llm_model": MagicMock()}
153
+ state_key = "papers"
154
+ dic = {
155
+ "p1": {"paper_ids": ["id1", "id2"]},
156
+ "p2": {"paper_ids": ["id3"]},
157
+ }
158
+ state["last_displayed_papers"] = dic
159
+ state[state_key] = dic # simulate indirect mapping
160
+ # Mock agent to echo the Python expression
161
+ mock_agent = MagicMock()
162
+ mock_agent.invoke.side_effect = lambda args, stream_mode=None: {
163
+ "output": args["input"]
164
+ }
165
+ mock_create_agent.return_value = mock_agent
166
+ # Test full list
167
+ tool_input = {
168
+ "question": "",
169
+ "state": state,
170
+ "tool_call_id": "tid",
171
+ "extract_ids": True,
172
+ "id_column": "paper_ids",
173
+ }
174
+ result = query_dataframe.run(tool_input)
175
+ output = result.update["messages"][0].content
176
+ # Should be the base list expression
177
+ expected = "df['paper_ids'].dropna().str[0].tolist()"
178
+ assert output == expected
179
+ # Test single element
180
+ tool_input["row_number"] = 2
181
+ result2 = query_dataframe.run(tool_input)
182
+ output2 = result2.update["messages"][0].content
183
+ expected2 = "df['paper_ids'].dropna().str[0].tolist()[1]"
184
+ assert output2 == expected2
185
+
186
+ def test_query_dataframe_extract_ids_missing_column(self, initial_state):
187
+ """Test that missing id_column raises ValueError when extract_ids=True."""
188
+ state = initial_state.copy()
189
+ state["last_displayed_papers"] = {"p1": {"paper_ids": ["id1"]}}
190
+ state["papers"] = state["last_displayed_papers"]
191
+ with pytest.raises(ValueError) as exc:
192
+ query_dataframe.run(
193
+ {
194
+ "question": "",
195
+ "state": state,
196
+ "tool_call_id": "tid",
197
+ "extract_ids": True,
198
+ "id_column": "",
199
+ }
200
+ )
201
+ assert "Must specify 'id_column' when extract_ids=True." in str(exc.value)
@@ -2,7 +2,6 @@
2
2
  Unit tests for S2 tools functionality.
3
3
  """
4
4
 
5
- # pylint: disable=redefined-outer-name
6
5
  from unittest.mock import patch
7
6
  import pytest
8
7
  from langgraph.types import Command
@@ -52,13 +51,15 @@ class TestS2Tools:
52
51
 
53
52
  assert isinstance(result, Command)
54
53
  assert "messages" in result.update
55
- assert (
56
- "Paper ID for 'Machine Learning Basics' is: 123"
57
- in result.update["messages"][0].content
58
- )
54
+ # The tool now returns the raw paper ID as the message content
55
+ content = result.update["messages"][0].content
56
+ assert content == "123"
59
57
 
60
- def test_retrieve_semantic_scholar_paper_id_no_results(self):
58
+ @patch("requests.get")
59
+ def test_retrieve_semantic_scholar_paper_id_no_results(self, mock_get):
61
60
  """Test retrieving a paper ID when no results are found."""
61
+ mock_get.return_value.json.return_value = {"data": []}
62
+ mock_get.return_value.status_code = 200
62
63
  with pytest.raises(ValueError, match="No papers found for query: UnknownPaper"):
63
64
  retrieve_semantic_scholar_paper_id.invoke(
64
65
  input={"paper_title": "UnknownPaper", "tool_call_id": "test123"}
@@ -0,0 +1,413 @@
1
+ """
2
+ Unit tests for external ID handling in S2 helper modules.
3
+ """
4
+
5
+ from types import SimpleNamespace
6
+
7
+ import hydra
8
+ import pytest
9
+ import requests
10
+
11
+ from aiagents4pharma.talk2scholars.tools.s2.utils.multi_helper import MultiPaperRecData
12
+ from aiagents4pharma.talk2scholars.tools.s2.utils.search_helper import SearchData
13
+ from aiagents4pharma.talk2scholars.tools.s2.utils.single_helper import (
14
+ SinglePaperRecData,
15
+ )
16
+
17
+
18
+ @pytest.fixture(autouse=True)
19
+ def patch_hydra(monkeypatch):
20
+ """Patch Hydra's initialize and compose to provide dummy configs for tests."""
21
+
22
+ class DummyHydraContext:
23
+ """Dummy Hydra context manager to bypass config loading."""
24
+
25
+ def __enter__(self):
26
+ return None
27
+
28
+ def __exit__(self, exc_type, exc_val, traceback):
29
+ return False
30
+
31
+ # Dummy config with necessary fields for multi, search, and single helpers
32
+ dummy_cfg = SimpleNamespace(
33
+ tools=SimpleNamespace(
34
+ multi_paper_recommendation=SimpleNamespace(
35
+ api_endpoint="",
36
+ headers={},
37
+ api_fields=["paperId", "title", "authors", "externalIds"],
38
+ request_timeout=1,
39
+ ),
40
+ search=SimpleNamespace(
41
+ api_endpoint="",
42
+ api_fields=["paperId", "title", "authors", "externalIds"],
43
+ ),
44
+ single_paper_recommendation=SimpleNamespace(
45
+ api_endpoint="",
46
+ api_fields=["paperId", "title", "authors", "externalIds"],
47
+ request_timeout=1,
48
+ recommendation_params=SimpleNamespace(from_pool="test_pool"),
49
+ ),
50
+ )
51
+ )
52
+ monkeypatch.setattr(
53
+ hydra, "initialize", lambda version_base, config_path: DummyHydraContext()
54
+ )
55
+ monkeypatch.setattr(hydra, "compose", lambda config_name, overrides: dummy_cfg)
56
+
57
+
58
+ def test_multi_helper_pmc_and_doi_ids(monkeypatch):
59
+ """Test PubMedCentral and DOI ID handling in MultiPaperRecData."""
60
+ rec = MultiPaperRecData(paper_ids=["p"], limit=1, year=None, tool_call_id="tid")
61
+ # Setup dummy API response
62
+ data = {
63
+ "recommendedPapers": [
64
+ {
65
+ "paperId": "p1",
66
+ "title": "Test",
67
+ "authors": [{"name": "A", "authorId": "A1"}],
68
+ "externalIds": {"PubMedCentral": "pmc1", "DOI": "doi1"},
69
+ }
70
+ ]
71
+ }
72
+ response = SimpleNamespace(
73
+ status_code=200, json=lambda: data, raise_for_status=lambda: None
74
+ )
75
+ monkeypatch.setattr(requests, "post", lambda *args, **kwargs: response)
76
+ results = rec.process_recommendations()
77
+ ids_list = results["papers"]["p1"]["paper_ids"]
78
+ assert ids_list == ["pmc:pmc1", "doi:doi1"]
79
+
80
+
81
+ def test_search_helper_pmc_and_doi_ids(monkeypatch):
82
+ """Test PubMedCentral and DOI ID handling in SearchData."""
83
+ sd = SearchData(query="q", limit=1, year=None, tool_call_id="tid")
84
+ data = {
85
+ "data": [
86
+ {
87
+ "paperId": "s1",
88
+ "title": "Test",
89
+ "authors": [{"name": "B", "authorId": "B1"}],
90
+ "externalIds": {"PubMedCentral": "pmc2", "DOI": "doi2"},
91
+ }
92
+ ]
93
+ }
94
+ response = SimpleNamespace(
95
+ status_code=200, json=lambda: data, raise_for_status=lambda: None
96
+ )
97
+ monkeypatch.setattr(requests, "get", lambda *args, **kwargs: response)
98
+ results = sd.process_search()
99
+ ids_list = results["papers"]["s1"]["paper_ids"]
100
+ assert ids_list == ["pmc:pmc2", "doi:doi2"]
101
+
102
+
103
+ def test_single_helper_pmc_and_doi_ids(monkeypatch):
104
+ """Test PubMedCentral and DOI ID handling in SinglePaperRecData."""
105
+ sp = SinglePaperRecData(paper_id="x", limit=1, year=None, tool_call_id="tid")
106
+ data = {
107
+ "recommendedPapers": [
108
+ {
109
+ "paperId": "x1",
110
+ "title": "Test",
111
+ "authors": [{"name": "C", "authorId": "C1"}],
112
+ "externalIds": {"PubMedCentral": "pmc3", "DOI": "doi3"},
113
+ }
114
+ ]
115
+ }
116
+ response = SimpleNamespace(
117
+ status_code=200, json=lambda: data, raise_for_status=lambda: None
118
+ )
119
+ monkeypatch.setattr(requests, "get", lambda *args, **kwargs: response)
120
+ results = sp.process_recommendations()
121
+ ids_list = results["papers"]["x1"]["paper_ids"]
122
+ assert ids_list == ["pmc:pmc3", "doi:doi3"]
123
+
124
+
125
+ def test_helpers_empty_when_no_external_ids(monkeypatch):
126
+ """Test that MultiPaperRecData, SearchData, and SinglePaperRecData
127
+ return empty lists when externalIds are missing or empty."""
128
+ # Test that no IDs are returned when externalIds is empty or missing
129
+ rec = MultiPaperRecData(paper_ids=["p"], limit=1, year=None, tool_call_id="tid")
130
+
131
+ # Simulate empty externalIds in API response
132
+ class DummyResp1:
133
+ """dummy response for multi-paper recommendation with empty externalIds"""
134
+
135
+ def __init__(self, data):
136
+ """initialize with data"""
137
+ self._data = data
138
+ self.status_code = 200
139
+
140
+ def json(self):
141
+ """json method to return data"""
142
+ return self._data
143
+
144
+ def raise_for_status(self):
145
+ """raise_for_status method to simulate successful response"""
146
+ return None
147
+
148
+ def dummy_post1(*_, **__):
149
+ """dummy response for multi-paper recommendation with empty externalIds"""
150
+ return DummyResp1(
151
+ {
152
+ "recommendedPapers": [
153
+ {
154
+ "paperId": "p2",
155
+ "title": "Test2",
156
+ "authors": [{"name": "D", "authorId": "D1"}],
157
+ "externalIds": {},
158
+ },
159
+ ]
160
+ }
161
+ )
162
+
163
+ monkeypatch.setattr(requests, "post", dummy_post1)
164
+ assert rec.process_recommendations()["papers"].get("p2", {}).get("paper_ids") == []
165
+ sd = SearchData(query="q2", limit=1, year=None, tool_call_id="tid2")
166
+
167
+ # Simulate empty externalIds in search API response
168
+ class DummyResp2:
169
+ """dummy response for search with empty externalIds"""
170
+
171
+ def __init__(self, data):
172
+ """initialize with data"""
173
+ self._data = data
174
+ self.status_code = 200
175
+
176
+ def json(self):
177
+ """json method to return data"""
178
+ return self._data
179
+
180
+ def raise_for_status(self):
181
+ """raise_for_status method to simulate successful response"""
182
+ return None
183
+
184
+ def dummy_get2(*_, **__):
185
+ """dummy response for search with empty externalIds"""
186
+ return DummyResp2(
187
+ {
188
+ "data": [
189
+ {
190
+ "paperId": "s2",
191
+ "title": "Test2",
192
+ "authors": [{"name": "E", "authorId": "E1"}],
193
+ "externalIds": {},
194
+ },
195
+ ]
196
+ }
197
+ )
198
+
199
+ monkeypatch.setattr(requests, "get", dummy_get2)
200
+ assert sd.process_search()["papers"].get("s2", {}).get("paper_ids") == []
201
+ sp = SinglePaperRecData(paper_id="y", limit=1, year=None, tool_call_id="tid3")
202
+
203
+ # Simulate empty externalIds in single-paper API response
204
+ class DummyResp3:
205
+ """dummy response for single paper recommendation with empty externalIds"""
206
+
207
+ def __init__(self, data):
208
+ """initialize with data"""
209
+ self._data = data
210
+ self.status_code = 200
211
+
212
+ def json(self):
213
+ """json method to return data"""
214
+ return self._data
215
+
216
+ def raise_for_status(self):
217
+ """raise_for_status method to simulate successful response"""
218
+ return None
219
+
220
+ def dummy_get3(*_, **__):
221
+ """dummy response for single paper recommendation with empty externalIds"""
222
+ return DummyResp3(
223
+ {
224
+ "recommendedPapers": [
225
+ {
226
+ "paperId": "y1",
227
+ "title": "Test3",
228
+ "authors": [{"name": "F", "authorId": "F1"}],
229
+ "externalIds": {},
230
+ },
231
+ ]
232
+ }
233
+ )
234
+
235
+ monkeypatch.setattr(requests, "get", dummy_get3)
236
+ assert sp.process_recommendations()["papers"].get("y1", {}).get("paper_ids") == []
237
+
238
+
239
+ def test_multi_helper_arxiv_and_pubmed_ids(monkeypatch):
240
+ """Test ArXiv and PubMed ID handling in MultiPaperRecData."""
241
+ rec = MultiPaperRecData(paper_ids=["p"], limit=1, year=None, tool_call_id="tid")
242
+
243
+ class DummyResp5:
244
+ """dummy response for multi-paper recommendation with ArXiv and PubMed IDs"""
245
+
246
+ def __init__(self, data):
247
+ """initialize with data"""
248
+ self._data = data
249
+ self.status_code = 200
250
+
251
+ def json(self):
252
+ """json method to return data"""
253
+ return self._data
254
+
255
+ def raise_for_status(self):
256
+ """raise_for_status method to simulate successful response"""
257
+ return None
258
+
259
+ def dummy_post5(*_, **__):
260
+ """dummy response for multi-paper recommendation with ArXiv and PubMed IDs"""
261
+ return DummyResp5(
262
+ {
263
+ "recommendedPapers": [
264
+ {
265
+ "paperId": "pX",
266
+ "title": "TestX",
267
+ "authors": [{"name": "A", "authorId": "A1"}],
268
+ "externalIds": {"ArXiv": "ax1", "PubMed": "pm1"},
269
+ },
270
+ ]
271
+ }
272
+ )
273
+
274
+ monkeypatch.setattr(requests, "post", dummy_post5)
275
+ ids_list = rec.process_recommendations()["papers"].get("pX", {}).get("paper_ids")
276
+ assert ids_list == ["arxiv:ax1", "pubmed:pm1"]
277
+
278
+
279
+ def test_search_helper_arxiv_and_pubmed_ids(monkeypatch):
280
+ """Test ArXiv and PubMed ID handling in SearchData."""
281
+ sd = SearchData(query="q", limit=1, year=None, tool_call_id="tid")
282
+
283
+ class DummyResp6:
284
+ """dummy response for search with ArXiv and PubMed IDs"""
285
+
286
+ def __init__(self, data):
287
+ """initialize with data"""
288
+ self._data = data
289
+ self.status_code = 200
290
+
291
+ def json(self):
292
+ """json method to return data"""
293
+ return self._data
294
+
295
+ def raise_for_status(self):
296
+ """ "raise_for_status method to simulate successful response"""
297
+ return None
298
+
299
+ def dummy_get6(*_, **__):
300
+ """dummy response for search with ArXiv and PubMed IDs"""
301
+ return DummyResp6(
302
+ {
303
+ "data": [
304
+ {
305
+ "paperId": "sX",
306
+ "title": "TestS",
307
+ "authors": [{"name": "B", "authorId": "B1"}],
308
+ "externalIds": {"ArXiv": "ax2", "PubMed": "pm2"},
309
+ },
310
+ ]
311
+ }
312
+ )
313
+
314
+ monkeypatch.setattr(requests, "get", dummy_get6)
315
+ ids_list = sd.process_search()["papers"].get("sX", {}).get("paper_ids")
316
+ assert ids_list == ["arxiv:ax2", "pubmed:pm2"]
317
+
318
+
319
+ def test_single_helper_arxiv_and_pubmed_ids(monkeypatch):
320
+ """Test ArXiv and PubMed ID handling in SinglePaperRecData."""
321
+ sp = SinglePaperRecData(paper_id="x", limit=1, year=None, tool_call_id="tid")
322
+
323
+ class DummyResp7:
324
+ """dummy response for single paper recommendation with ArXiv and PubMed IDs"""
325
+
326
+ def __init__(self, data):
327
+ """initialize with data"""
328
+ self._data = data
329
+ self.status_code = 200
330
+
331
+ def json(self):
332
+ """json method to return data"""
333
+ return self._data
334
+
335
+ def raise_for_status(self):
336
+ """raise_for_status method to simulate successful response"""
337
+ return None
338
+
339
+ def dummy_get7(*_, **__):
340
+ """dummy response for single paper recommendation with ArXiv and PubMed IDs"""
341
+ return DummyResp7(
342
+ {
343
+ "recommendedPapers": [
344
+ {
345
+ "paperId": "xY",
346
+ "title": "TestY",
347
+ "authors": [{"name": "C", "authorId": "C1"}],
348
+ "externalIds": {"ArXiv": "ax3", "PubMed": "pm3"},
349
+ },
350
+ ]
351
+ }
352
+ )
353
+
354
+ monkeypatch.setattr(requests, "get", dummy_get7)
355
+ ids_list = sp.process_recommendations()["papers"].get("xY", {}).get("paper_ids")
356
+ assert ids_list == ["arxiv:ax3", "pubmed:pm3"]
357
+
358
+
359
+ def test_search_helper_create_content_snippet(monkeypatch):
360
+ """Test that SearchData._create_content includes snippets appropriately."""
361
+ sd = SearchData(query="QueryX", limit=3, year="2022", tool_call_id="tid")
362
+ sd.filtered_papers = {
363
+ "p1": {"Title": "Title1", "Year": "2021", "Abstract": "First. Second. Third."},
364
+ "p2": {"Title": "Title2", "Year": "2020", "Abstract": ""},
365
+ }
366
+ # Stub out network fetch/filter to rely on preset filtered_papers
367
+ monkeypatch.setattr(SearchData, "_fetch_papers", lambda self: None)
368
+ monkeypatch.setattr(SearchData, "_filter_papers", lambda self: None)
369
+ results = sd.process_search()
370
+ content = results["content"]
371
+ assert "1. Title1 (2021)" in content
372
+ assert "Abstract snippet: First. Second." in content
373
+ assert "2. Title2 (2020)" in content
374
+ # Only one snippet present
375
+ assert content.count("Abstract snippet:") == 1
376
+
377
+
378
+ def test_single_helper_create_content_snippet(monkeypatch):
379
+ """Test that SinglePaperRecData._create_content includes snippets appropriately."""
380
+ sp = SinglePaperRecData(paper_id="pid", limit=2, year=None, tool_call_id="tid")
381
+ sp.filtered_papers = {
382
+ "x1": {"Title": "STitle1", "Year": "2019", "Abstract": "SOne. STwo. SThree."},
383
+ "x2": {"Title": "STitle2", "Year": "2018", "Abstract": ""},
384
+ }
385
+ # Stub out network fetch/filter to rely on preset filtered_papers
386
+ monkeypatch.setattr(SinglePaperRecData, "_fetch_recommendations", lambda self: None)
387
+ monkeypatch.setattr(SinglePaperRecData, "_filter_papers", lambda self: None)
388
+ results = sp.process_recommendations()
389
+ content = results["content"]
390
+ assert "1. STitle1 (2019)" in content
391
+ assert "Abstract snippet: SOne. STwo." in content
392
+ assert "2. STitle2 (2018)" in content
393
+ assert content.count("Abstract snippet:") == 1
394
+
395
+
396
+ def test_multi_helper_create_content_snippet(monkeypatch):
397
+ """Test that MultiPaperRecData._create_content includes snippets appropriately."""
398
+ mr = MultiPaperRecData(
399
+ paper_ids=["a", "b"], limit=2, year="2021", tool_call_id="tid"
400
+ )
401
+ mr.filtered_papers = {
402
+ "m1": {"Title": "MTitle1", "Year": "2017", "Abstract": "MOne. MTwo. MThree."},
403
+ "m2": {"Title": "MTitle2", "Year": "2016", "Abstract": ""},
404
+ }
405
+ # Stub out network fetch/filter to rely on preset filtered_papers
406
+ monkeypatch.setattr(MultiPaperRecData, "_fetch_recommendations", lambda self: None)
407
+ monkeypatch.setattr(MultiPaperRecData, "_filter_papers", lambda self: None)
408
+ results = mr.process_recommendations()
409
+ content = results["content"]
410
+ assert "1. MTitle1 (2017)" in content
411
+ assert "Abstract snippet: MOne. MTwo." in content
412
+ assert "2. MTitle2 (2016)" in content
413
+ assert content.count("Abstract snippet:") == 1