aiagents4pharma 1.39.0__py3-none-any.whl → 1.39.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/talk2scholars/agents/main_agent.py +7 -7
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +88 -12
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml +5 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +5 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +1 -20
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +1 -26
- aiagents4pharma/talk2scholars/configs/tools/download_arxiv_paper/default.yaml +4 -0
- aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/default.yaml +2 -0
- aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/default.yaml +2 -0
- aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml +22 -0
- aiagents4pharma/talk2scholars/tests/test_main_agent.py +20 -2
- aiagents4pharma/talk2scholars/tests/test_nvidia_nim_reranker_utils.py +28 -0
- aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py +107 -29
- aiagents4pharma/talk2scholars/tests/test_pdf_agent.py +2 -3
- aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +194 -543
- aiagents4pharma/talk2scholars/tests/test_s2_agent.py +2 -2
- aiagents4pharma/talk2scholars/tests/{test_s2_display.py → test_s2_display_dataframe.py} +2 -3
- aiagents4pharma/talk2scholars/tests/test_s2_query_dataframe.py +201 -0
- aiagents4pharma/talk2scholars/tests/test_s2_retrieve.py +7 -6
- aiagents4pharma/talk2scholars/tests/test_s2_utils_ext_ids.py +413 -0
- aiagents4pharma/talk2scholars/tests/test_tool_helper_utils.py +140 -0
- aiagents4pharma/talk2scholars/tests/test_zotero_agent.py +0 -1
- aiagents4pharma/talk2scholars/tests/test_zotero_read.py +16 -18
- aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +92 -37
- aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +73 -575
- aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +10 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +97 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +77 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +83 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +125 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +162 -0
- aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +33 -10
- aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +39 -16
- aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +124 -10
- aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +49 -17
- aiagents4pharma/talk2scholars/tools/s2/search.py +39 -16
- aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +34 -16
- aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +49 -16
- aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +51 -16
- aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +50 -17
- {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/METADATA +58 -105
- {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/RECORD +45 -32
- aiagents4pharma/talk2scholars/tests/test_llm_main_integration.py +0 -89
- aiagents4pharma/talk2scholars/tests/test_routing_logic.py +0 -74
- aiagents4pharma/talk2scholars/tests/test_s2_query.py +0 -95
- {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/WHEEL +0 -0
- {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/licenses/LICENSE +0 -0
- {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,6 @@
|
|
2
2
|
Updated Unit Tests for the S2 agent (Semantic Scholar sub-agent).
|
3
3
|
"""
|
4
4
|
|
5
|
-
# pylint: disable=redefined-outer-name
|
6
5
|
from unittest import mock
|
7
6
|
import pytest
|
8
7
|
from langchain_core.messages import HumanMessage, AIMessage
|
@@ -10,7 +9,8 @@ from langchain_openai import ChatOpenAI
|
|
10
9
|
from ..agents.s2_agent import get_app
|
11
10
|
from ..state.state_talk2scholars import Talk2Scholars
|
12
11
|
|
13
|
-
LLM_MODEL = ChatOpenAI(model=
|
12
|
+
LLM_MODEL = ChatOpenAI(model="gpt-4o-mini", temperature=0)
|
13
|
+
|
14
14
|
|
15
15
|
@pytest.fixture(autouse=True)
|
16
16
|
def mock_hydra_fixture():
|
@@ -2,7 +2,6 @@
|
|
2
2
|
Unit tests for S2 tools functionality.
|
3
3
|
"""
|
4
4
|
|
5
|
-
# pylint: disable=redefined-outer-name
|
6
5
|
import pytest
|
7
6
|
from langgraph.types import Command
|
8
7
|
from ..tools.s2.display_dataframe import (
|
@@ -11,8 +10,8 @@ from ..tools.s2.display_dataframe import (
|
|
11
10
|
)
|
12
11
|
|
13
12
|
|
14
|
-
@pytest.fixture
|
15
|
-
def
|
13
|
+
@pytest.fixture(name="initial_state")
|
14
|
+
def initial_state_fixture():
|
16
15
|
"""Provides an empty initial state for tests."""
|
17
16
|
return {"papers": {}, "multi_papers": {}}
|
18
17
|
|
@@ -0,0 +1,201 @@
|
|
1
|
+
"""
|
2
|
+
Unit tests for S2 tools functionality.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from unittest.mock import MagicMock, patch
|
6
|
+
|
7
|
+
import pytest
|
8
|
+
from langchain_core.messages import ToolMessage
|
9
|
+
|
10
|
+
from ..tools.s2.query_dataframe import NoPapersFoundError, query_dataframe
|
11
|
+
|
12
|
+
|
13
|
+
@pytest.fixture(name="initial_state")
|
14
|
+
def initial_state_fixture():
|
15
|
+
"""Provides an empty initial state for tests with a dummy llm_model."""
|
16
|
+
return {"papers": {}, "multi_papers": {}, "llm_model": MagicMock()}
|
17
|
+
|
18
|
+
|
19
|
+
# Fixed test data for deterministic results
|
20
|
+
MOCK_SEARCH_RESPONSE = {
|
21
|
+
"data": [
|
22
|
+
{
|
23
|
+
"paperId": "123",
|
24
|
+
"title": "Machine Learning Basics",
|
25
|
+
"abstract": "An introduction to ML",
|
26
|
+
"year": 2023,
|
27
|
+
"citationCount": 100,
|
28
|
+
"url": "https://example.com/paper1",
|
29
|
+
"authors": [{"name": "Test Author"}],
|
30
|
+
}
|
31
|
+
]
|
32
|
+
}
|
33
|
+
|
34
|
+
MOCK_STATE_PAPER = {
|
35
|
+
"123": {
|
36
|
+
"Title": "Machine Learning Basics",
|
37
|
+
"Abstract": "An introduction to ML",
|
38
|
+
"Year": 2023,
|
39
|
+
"Citation Count": 100,
|
40
|
+
"URL": "https://example.com/paper1",
|
41
|
+
}
|
42
|
+
}
|
43
|
+
|
44
|
+
|
45
|
+
class TestS2Tools:
|
46
|
+
"""Unit tests for individual S2 tools"""
|
47
|
+
|
48
|
+
def test_query_dataframe_empty_state(self, initial_state):
|
49
|
+
"""Tests query_dataframe tool behavior when no papers are found."""
|
50
|
+
# Calling without any papers should raise NoPapersFoundError
|
51
|
+
tool_input = {
|
52
|
+
"question": "List all papers",
|
53
|
+
"state": initial_state,
|
54
|
+
"tool_call_id": "test_id",
|
55
|
+
}
|
56
|
+
with pytest.raises(
|
57
|
+
NoPapersFoundError,
|
58
|
+
match="No papers found. A search needs to be performed first.",
|
59
|
+
):
|
60
|
+
query_dataframe.run(tool_input)
|
61
|
+
|
62
|
+
@patch(
|
63
|
+
"aiagents4pharma.talk2scholars.tools.s2.query_dataframe.create_pandas_dataframe_agent"
|
64
|
+
)
|
65
|
+
def test_query_dataframe_with_papers(self, mock_create_agent, initial_state):
|
66
|
+
"""Tests querying papers when data is available."""
|
67
|
+
state = initial_state.copy()
|
68
|
+
state["last_displayed_papers"] = "papers"
|
69
|
+
state["papers"] = MOCK_STATE_PAPER
|
70
|
+
|
71
|
+
# Mock the dataframe agent instead of the LLM
|
72
|
+
mock_agent = MagicMock()
|
73
|
+
mock_agent.invoke.return_value = {"output": "Mocked response"}
|
74
|
+
|
75
|
+
mock_create_agent.return_value = (
|
76
|
+
mock_agent # Mock the function returning the agent
|
77
|
+
)
|
78
|
+
|
79
|
+
# Ensure that the output of query_dataframe is correctly structured
|
80
|
+
# Invoke the tool with a test tool_call_id
|
81
|
+
tool_input = {
|
82
|
+
"question": "List all papers",
|
83
|
+
"state": state,
|
84
|
+
"tool_call_id": "test_id",
|
85
|
+
}
|
86
|
+
result = query_dataframe.run(tool_input)
|
87
|
+
# The tool returns a Command with messages
|
88
|
+
assert hasattr(result, "update")
|
89
|
+
update = result.update
|
90
|
+
assert "messages" in update
|
91
|
+
msgs = update["messages"]
|
92
|
+
assert len(msgs) == 1
|
93
|
+
msg = msgs[0]
|
94
|
+
assert isinstance(msg, ToolMessage)
|
95
|
+
assert msg.content == "Mocked response"
|
96
|
+
|
97
|
+
@patch(
|
98
|
+
"aiagents4pharma.talk2scholars.tools.s2.query_dataframe.create_pandas_dataframe_agent"
|
99
|
+
)
|
100
|
+
def test_query_dataframe_direct_mapping(self, mock_create_agent, initial_state):
|
101
|
+
"""Tests query_dataframe when last_displayed_papers is a direct dict mapping."""
|
102
|
+
# Prepare state with direct mapping
|
103
|
+
state = initial_state.copy()
|
104
|
+
state["last_displayed_papers"] = MOCK_STATE_PAPER
|
105
|
+
# Mock the dataframe agent
|
106
|
+
mock_agent = MagicMock()
|
107
|
+
mock_agent.invoke.return_value = {"output": "Direct mapping response"}
|
108
|
+
mock_create_agent.return_value = mock_agent
|
109
|
+
# Invoke tool
|
110
|
+
# Invoke the tool with direct mapping and test tool_call_id
|
111
|
+
tool_input = {
|
112
|
+
"question": "Filter papers",
|
113
|
+
"state": state,
|
114
|
+
"tool_call_id": "test_id",
|
115
|
+
}
|
116
|
+
result = query_dataframe.run(tool_input)
|
117
|
+
update = result.update
|
118
|
+
assert "messages" in update
|
119
|
+
msgs = update["messages"]
|
120
|
+
assert len(msgs) == 1
|
121
|
+
msg = msgs[0]
|
122
|
+
assert isinstance(msg, ToolMessage)
|
123
|
+
assert msg.content == "Direct mapping response"
|
124
|
+
|
125
|
+
def test_query_dataframe_missing_llm(self, initial_state):
|
126
|
+
"""Test that missing llm_model raises ValueError."""
|
127
|
+
# Remove llm_model
|
128
|
+
state = {k: v for k, v in initial_state.items() if k != "llm_model"}
|
129
|
+
state["last_displayed_papers"] = MOCK_STATE_PAPER
|
130
|
+
tool_input = {"question": "Test", "state": state, "tool_call_id": "test_id"}
|
131
|
+
with pytest.raises(ValueError) as exc:
|
132
|
+
query_dataframe.run(tool_input)
|
133
|
+
assert "Missing 'llm_model' in state." in str(exc.value)
|
134
|
+
|
135
|
+
def test_query_dataframe_invalid_mapping(self, initial_state):
|
136
|
+
"""Test that invalid last_displayed_papers mapping raises ValueError."""
|
137
|
+
# Provide invalid mapping key
|
138
|
+
state = initial_state.copy()
|
139
|
+
state["last_displayed_papers"] = "nonexistent_key"
|
140
|
+
# llm_model present
|
141
|
+
tool_input = {"question": "Test", "state": state, "tool_call_id": "test_id"}
|
142
|
+
with pytest.raises(ValueError) as exc:
|
143
|
+
query_dataframe.run(tool_input)
|
144
|
+
assert "Could not resolve a valid metadata dictionary" in str(exc.value)
|
145
|
+
|
146
|
+
@patch(
|
147
|
+
"aiagents4pharma.talk2scholars.tools.s2.query_dataframe.create_pandas_dataframe_agent"
|
148
|
+
)
|
149
|
+
def test_query_dataframe_extract_ids(self, mock_create_agent):
|
150
|
+
"""Test extract_ids returns the raw list or single element correctly."""
|
151
|
+
# Prepare state with fake paper_ids column
|
152
|
+
state = {"llm_model": MagicMock()}
|
153
|
+
state_key = "papers"
|
154
|
+
dic = {
|
155
|
+
"p1": {"paper_ids": ["id1", "id2"]},
|
156
|
+
"p2": {"paper_ids": ["id3"]},
|
157
|
+
}
|
158
|
+
state["last_displayed_papers"] = dic
|
159
|
+
state[state_key] = dic # simulate indirect mapping
|
160
|
+
# Mock agent to echo the Python expression
|
161
|
+
mock_agent = MagicMock()
|
162
|
+
mock_agent.invoke.side_effect = lambda args, stream_mode=None: {
|
163
|
+
"output": args["input"]
|
164
|
+
}
|
165
|
+
mock_create_agent.return_value = mock_agent
|
166
|
+
# Test full list
|
167
|
+
tool_input = {
|
168
|
+
"question": "",
|
169
|
+
"state": state,
|
170
|
+
"tool_call_id": "tid",
|
171
|
+
"extract_ids": True,
|
172
|
+
"id_column": "paper_ids",
|
173
|
+
}
|
174
|
+
result = query_dataframe.run(tool_input)
|
175
|
+
output = result.update["messages"][0].content
|
176
|
+
# Should be the base list expression
|
177
|
+
expected = "df['paper_ids'].dropna().str[0].tolist()"
|
178
|
+
assert output == expected
|
179
|
+
# Test single element
|
180
|
+
tool_input["row_number"] = 2
|
181
|
+
result2 = query_dataframe.run(tool_input)
|
182
|
+
output2 = result2.update["messages"][0].content
|
183
|
+
expected2 = "df['paper_ids'].dropna().str[0].tolist()[1]"
|
184
|
+
assert output2 == expected2
|
185
|
+
|
186
|
+
def test_query_dataframe_extract_ids_missing_column(self, initial_state):
|
187
|
+
"""Test that missing id_column raises ValueError when extract_ids=True."""
|
188
|
+
state = initial_state.copy()
|
189
|
+
state["last_displayed_papers"] = {"p1": {"paper_ids": ["id1"]}}
|
190
|
+
state["papers"] = state["last_displayed_papers"]
|
191
|
+
with pytest.raises(ValueError) as exc:
|
192
|
+
query_dataframe.run(
|
193
|
+
{
|
194
|
+
"question": "",
|
195
|
+
"state": state,
|
196
|
+
"tool_call_id": "tid",
|
197
|
+
"extract_ids": True,
|
198
|
+
"id_column": "",
|
199
|
+
}
|
200
|
+
)
|
201
|
+
assert "Must specify 'id_column' when extract_ids=True." in str(exc.value)
|
@@ -2,7 +2,6 @@
|
|
2
2
|
Unit tests for S2 tools functionality.
|
3
3
|
"""
|
4
4
|
|
5
|
-
# pylint: disable=redefined-outer-name
|
6
5
|
from unittest.mock import patch
|
7
6
|
import pytest
|
8
7
|
from langgraph.types import Command
|
@@ -52,13 +51,15 @@ class TestS2Tools:
|
|
52
51
|
|
53
52
|
assert isinstance(result, Command)
|
54
53
|
assert "messages" in result.update
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
)
|
54
|
+
# The tool now returns the raw paper ID as the message content
|
55
|
+
content = result.update["messages"][0].content
|
56
|
+
assert content == "123"
|
59
57
|
|
60
|
-
|
58
|
+
@patch("requests.get")
|
59
|
+
def test_retrieve_semantic_scholar_paper_id_no_results(self, mock_get):
|
61
60
|
"""Test retrieving a paper ID when no results are found."""
|
61
|
+
mock_get.return_value.json.return_value = {"data": []}
|
62
|
+
mock_get.return_value.status_code = 200
|
62
63
|
with pytest.raises(ValueError, match="No papers found for query: UnknownPaper"):
|
63
64
|
retrieve_semantic_scholar_paper_id.invoke(
|
64
65
|
input={"paper_title": "UnknownPaper", "tool_call_id": "test123"}
|
@@ -0,0 +1,413 @@
|
|
1
|
+
"""
|
2
|
+
Unit tests for external ID handling in S2 helper modules.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from types import SimpleNamespace
|
6
|
+
|
7
|
+
import hydra
|
8
|
+
import pytest
|
9
|
+
import requests
|
10
|
+
|
11
|
+
from aiagents4pharma.talk2scholars.tools.s2.utils.multi_helper import MultiPaperRecData
|
12
|
+
from aiagents4pharma.talk2scholars.tools.s2.utils.search_helper import SearchData
|
13
|
+
from aiagents4pharma.talk2scholars.tools.s2.utils.single_helper import (
|
14
|
+
SinglePaperRecData,
|
15
|
+
)
|
16
|
+
|
17
|
+
|
18
|
+
@pytest.fixture(autouse=True)
|
19
|
+
def patch_hydra(monkeypatch):
|
20
|
+
"""Patch Hydra's initialize and compose to provide dummy configs for tests."""
|
21
|
+
|
22
|
+
class DummyHydraContext:
|
23
|
+
"""Dummy Hydra context manager to bypass config loading."""
|
24
|
+
|
25
|
+
def __enter__(self):
|
26
|
+
return None
|
27
|
+
|
28
|
+
def __exit__(self, exc_type, exc_val, traceback):
|
29
|
+
return False
|
30
|
+
|
31
|
+
# Dummy config with necessary fields for multi, search, and single helpers
|
32
|
+
dummy_cfg = SimpleNamespace(
|
33
|
+
tools=SimpleNamespace(
|
34
|
+
multi_paper_recommendation=SimpleNamespace(
|
35
|
+
api_endpoint="",
|
36
|
+
headers={},
|
37
|
+
api_fields=["paperId", "title", "authors", "externalIds"],
|
38
|
+
request_timeout=1,
|
39
|
+
),
|
40
|
+
search=SimpleNamespace(
|
41
|
+
api_endpoint="",
|
42
|
+
api_fields=["paperId", "title", "authors", "externalIds"],
|
43
|
+
),
|
44
|
+
single_paper_recommendation=SimpleNamespace(
|
45
|
+
api_endpoint="",
|
46
|
+
api_fields=["paperId", "title", "authors", "externalIds"],
|
47
|
+
request_timeout=1,
|
48
|
+
recommendation_params=SimpleNamespace(from_pool="test_pool"),
|
49
|
+
),
|
50
|
+
)
|
51
|
+
)
|
52
|
+
monkeypatch.setattr(
|
53
|
+
hydra, "initialize", lambda version_base, config_path: DummyHydraContext()
|
54
|
+
)
|
55
|
+
monkeypatch.setattr(hydra, "compose", lambda config_name, overrides: dummy_cfg)
|
56
|
+
|
57
|
+
|
58
|
+
def test_multi_helper_pmc_and_doi_ids(monkeypatch):
|
59
|
+
"""Test PubMedCentral and DOI ID handling in MultiPaperRecData."""
|
60
|
+
rec = MultiPaperRecData(paper_ids=["p"], limit=1, year=None, tool_call_id="tid")
|
61
|
+
# Setup dummy API response
|
62
|
+
data = {
|
63
|
+
"recommendedPapers": [
|
64
|
+
{
|
65
|
+
"paperId": "p1",
|
66
|
+
"title": "Test",
|
67
|
+
"authors": [{"name": "A", "authorId": "A1"}],
|
68
|
+
"externalIds": {"PubMedCentral": "pmc1", "DOI": "doi1"},
|
69
|
+
}
|
70
|
+
]
|
71
|
+
}
|
72
|
+
response = SimpleNamespace(
|
73
|
+
status_code=200, json=lambda: data, raise_for_status=lambda: None
|
74
|
+
)
|
75
|
+
monkeypatch.setattr(requests, "post", lambda *args, **kwargs: response)
|
76
|
+
results = rec.process_recommendations()
|
77
|
+
ids_list = results["papers"]["p1"]["paper_ids"]
|
78
|
+
assert ids_list == ["pmc:pmc1", "doi:doi1"]
|
79
|
+
|
80
|
+
|
81
|
+
def test_search_helper_pmc_and_doi_ids(monkeypatch):
|
82
|
+
"""Test PubMedCentral and DOI ID handling in SearchData."""
|
83
|
+
sd = SearchData(query="q", limit=1, year=None, tool_call_id="tid")
|
84
|
+
data = {
|
85
|
+
"data": [
|
86
|
+
{
|
87
|
+
"paperId": "s1",
|
88
|
+
"title": "Test",
|
89
|
+
"authors": [{"name": "B", "authorId": "B1"}],
|
90
|
+
"externalIds": {"PubMedCentral": "pmc2", "DOI": "doi2"},
|
91
|
+
}
|
92
|
+
]
|
93
|
+
}
|
94
|
+
response = SimpleNamespace(
|
95
|
+
status_code=200, json=lambda: data, raise_for_status=lambda: None
|
96
|
+
)
|
97
|
+
monkeypatch.setattr(requests, "get", lambda *args, **kwargs: response)
|
98
|
+
results = sd.process_search()
|
99
|
+
ids_list = results["papers"]["s1"]["paper_ids"]
|
100
|
+
assert ids_list == ["pmc:pmc2", "doi:doi2"]
|
101
|
+
|
102
|
+
|
103
|
+
def test_single_helper_pmc_and_doi_ids(monkeypatch):
|
104
|
+
"""Test PubMedCentral and DOI ID handling in SinglePaperRecData."""
|
105
|
+
sp = SinglePaperRecData(paper_id="x", limit=1, year=None, tool_call_id="tid")
|
106
|
+
data = {
|
107
|
+
"recommendedPapers": [
|
108
|
+
{
|
109
|
+
"paperId": "x1",
|
110
|
+
"title": "Test",
|
111
|
+
"authors": [{"name": "C", "authorId": "C1"}],
|
112
|
+
"externalIds": {"PubMedCentral": "pmc3", "DOI": "doi3"},
|
113
|
+
}
|
114
|
+
]
|
115
|
+
}
|
116
|
+
response = SimpleNamespace(
|
117
|
+
status_code=200, json=lambda: data, raise_for_status=lambda: None
|
118
|
+
)
|
119
|
+
monkeypatch.setattr(requests, "get", lambda *args, **kwargs: response)
|
120
|
+
results = sp.process_recommendations()
|
121
|
+
ids_list = results["papers"]["x1"]["paper_ids"]
|
122
|
+
assert ids_list == ["pmc:pmc3", "doi:doi3"]
|
123
|
+
|
124
|
+
|
125
|
+
def test_helpers_empty_when_no_external_ids(monkeypatch):
|
126
|
+
"""Test that MultiPaperRecData, SearchData, and SinglePaperRecData
|
127
|
+
return empty lists when externalIds are missing or empty."""
|
128
|
+
# Test that no IDs are returned when externalIds is empty or missing
|
129
|
+
rec = MultiPaperRecData(paper_ids=["p"], limit=1, year=None, tool_call_id="tid")
|
130
|
+
|
131
|
+
# Simulate empty externalIds in API response
|
132
|
+
class DummyResp1:
|
133
|
+
"""dummy response for multi-paper recommendation with empty externalIds"""
|
134
|
+
|
135
|
+
def __init__(self, data):
|
136
|
+
"""initialize with data"""
|
137
|
+
self._data = data
|
138
|
+
self.status_code = 200
|
139
|
+
|
140
|
+
def json(self):
|
141
|
+
"""json method to return data"""
|
142
|
+
return self._data
|
143
|
+
|
144
|
+
def raise_for_status(self):
|
145
|
+
"""raise_for_status method to simulate successful response"""
|
146
|
+
return None
|
147
|
+
|
148
|
+
def dummy_post1(*_, **__):
|
149
|
+
"""dummy response for multi-paper recommendation with empty externalIds"""
|
150
|
+
return DummyResp1(
|
151
|
+
{
|
152
|
+
"recommendedPapers": [
|
153
|
+
{
|
154
|
+
"paperId": "p2",
|
155
|
+
"title": "Test2",
|
156
|
+
"authors": [{"name": "D", "authorId": "D1"}],
|
157
|
+
"externalIds": {},
|
158
|
+
},
|
159
|
+
]
|
160
|
+
}
|
161
|
+
)
|
162
|
+
|
163
|
+
monkeypatch.setattr(requests, "post", dummy_post1)
|
164
|
+
assert rec.process_recommendations()["papers"].get("p2", {}).get("paper_ids") == []
|
165
|
+
sd = SearchData(query="q2", limit=1, year=None, tool_call_id="tid2")
|
166
|
+
|
167
|
+
# Simulate empty externalIds in search API response
|
168
|
+
class DummyResp2:
|
169
|
+
"""dummy response for search with empty externalIds"""
|
170
|
+
|
171
|
+
def __init__(self, data):
|
172
|
+
"""initialize with data"""
|
173
|
+
self._data = data
|
174
|
+
self.status_code = 200
|
175
|
+
|
176
|
+
def json(self):
|
177
|
+
"""json method to return data"""
|
178
|
+
return self._data
|
179
|
+
|
180
|
+
def raise_for_status(self):
|
181
|
+
"""raise_for_status method to simulate successful response"""
|
182
|
+
return None
|
183
|
+
|
184
|
+
def dummy_get2(*_, **__):
|
185
|
+
"""dummy response for search with empty externalIds"""
|
186
|
+
return DummyResp2(
|
187
|
+
{
|
188
|
+
"data": [
|
189
|
+
{
|
190
|
+
"paperId": "s2",
|
191
|
+
"title": "Test2",
|
192
|
+
"authors": [{"name": "E", "authorId": "E1"}],
|
193
|
+
"externalIds": {},
|
194
|
+
},
|
195
|
+
]
|
196
|
+
}
|
197
|
+
)
|
198
|
+
|
199
|
+
monkeypatch.setattr(requests, "get", dummy_get2)
|
200
|
+
assert sd.process_search()["papers"].get("s2", {}).get("paper_ids") == []
|
201
|
+
sp = SinglePaperRecData(paper_id="y", limit=1, year=None, tool_call_id="tid3")
|
202
|
+
|
203
|
+
# Simulate empty externalIds in single-paper API response
|
204
|
+
class DummyResp3:
|
205
|
+
"""dummy response for single paper recommendation with empty externalIds"""
|
206
|
+
|
207
|
+
def __init__(self, data):
|
208
|
+
"""initialize with data"""
|
209
|
+
self._data = data
|
210
|
+
self.status_code = 200
|
211
|
+
|
212
|
+
def json(self):
|
213
|
+
"""json method to return data"""
|
214
|
+
return self._data
|
215
|
+
|
216
|
+
def raise_for_status(self):
|
217
|
+
"""raise_for_status method to simulate successful response"""
|
218
|
+
return None
|
219
|
+
|
220
|
+
def dummy_get3(*_, **__):
|
221
|
+
"""dummy response for single paper recommendation with empty externalIds"""
|
222
|
+
return DummyResp3(
|
223
|
+
{
|
224
|
+
"recommendedPapers": [
|
225
|
+
{
|
226
|
+
"paperId": "y1",
|
227
|
+
"title": "Test3",
|
228
|
+
"authors": [{"name": "F", "authorId": "F1"}],
|
229
|
+
"externalIds": {},
|
230
|
+
},
|
231
|
+
]
|
232
|
+
}
|
233
|
+
)
|
234
|
+
|
235
|
+
monkeypatch.setattr(requests, "get", dummy_get3)
|
236
|
+
assert sp.process_recommendations()["papers"].get("y1", {}).get("paper_ids") == []
|
237
|
+
|
238
|
+
|
239
|
+
def test_multi_helper_arxiv_and_pubmed_ids(monkeypatch):
|
240
|
+
"""Test ArXiv and PubMed ID handling in MultiPaperRecData."""
|
241
|
+
rec = MultiPaperRecData(paper_ids=["p"], limit=1, year=None, tool_call_id="tid")
|
242
|
+
|
243
|
+
class DummyResp5:
|
244
|
+
"""dummy response for multi-paper recommendation with ArXiv and PubMed IDs"""
|
245
|
+
|
246
|
+
def __init__(self, data):
|
247
|
+
"""initialize with data"""
|
248
|
+
self._data = data
|
249
|
+
self.status_code = 200
|
250
|
+
|
251
|
+
def json(self):
|
252
|
+
"""json method to return data"""
|
253
|
+
return self._data
|
254
|
+
|
255
|
+
def raise_for_status(self):
|
256
|
+
"""raise_for_status method to simulate successful response"""
|
257
|
+
return None
|
258
|
+
|
259
|
+
def dummy_post5(*_, **__):
|
260
|
+
"""dummy response for multi-paper recommendation with ArXiv and PubMed IDs"""
|
261
|
+
return DummyResp5(
|
262
|
+
{
|
263
|
+
"recommendedPapers": [
|
264
|
+
{
|
265
|
+
"paperId": "pX",
|
266
|
+
"title": "TestX",
|
267
|
+
"authors": [{"name": "A", "authorId": "A1"}],
|
268
|
+
"externalIds": {"ArXiv": "ax1", "PubMed": "pm1"},
|
269
|
+
},
|
270
|
+
]
|
271
|
+
}
|
272
|
+
)
|
273
|
+
|
274
|
+
monkeypatch.setattr(requests, "post", dummy_post5)
|
275
|
+
ids_list = rec.process_recommendations()["papers"].get("pX", {}).get("paper_ids")
|
276
|
+
assert ids_list == ["arxiv:ax1", "pubmed:pm1"]
|
277
|
+
|
278
|
+
|
279
|
+
def test_search_helper_arxiv_and_pubmed_ids(monkeypatch):
|
280
|
+
"""Test ArXiv and PubMed ID handling in SearchData."""
|
281
|
+
sd = SearchData(query="q", limit=1, year=None, tool_call_id="tid")
|
282
|
+
|
283
|
+
class DummyResp6:
|
284
|
+
"""dummy response for search with ArXiv and PubMed IDs"""
|
285
|
+
|
286
|
+
def __init__(self, data):
|
287
|
+
"""initialize with data"""
|
288
|
+
self._data = data
|
289
|
+
self.status_code = 200
|
290
|
+
|
291
|
+
def json(self):
|
292
|
+
"""json method to return data"""
|
293
|
+
return self._data
|
294
|
+
|
295
|
+
def raise_for_status(self):
|
296
|
+
""" "raise_for_status method to simulate successful response"""
|
297
|
+
return None
|
298
|
+
|
299
|
+
def dummy_get6(*_, **__):
|
300
|
+
"""dummy response for search with ArXiv and PubMed IDs"""
|
301
|
+
return DummyResp6(
|
302
|
+
{
|
303
|
+
"data": [
|
304
|
+
{
|
305
|
+
"paperId": "sX",
|
306
|
+
"title": "TestS",
|
307
|
+
"authors": [{"name": "B", "authorId": "B1"}],
|
308
|
+
"externalIds": {"ArXiv": "ax2", "PubMed": "pm2"},
|
309
|
+
},
|
310
|
+
]
|
311
|
+
}
|
312
|
+
)
|
313
|
+
|
314
|
+
monkeypatch.setattr(requests, "get", dummy_get6)
|
315
|
+
ids_list = sd.process_search()["papers"].get("sX", {}).get("paper_ids")
|
316
|
+
assert ids_list == ["arxiv:ax2", "pubmed:pm2"]
|
317
|
+
|
318
|
+
|
319
|
+
def test_single_helper_arxiv_and_pubmed_ids(monkeypatch):
|
320
|
+
"""Test ArXiv and PubMed ID handling in SinglePaperRecData."""
|
321
|
+
sp = SinglePaperRecData(paper_id="x", limit=1, year=None, tool_call_id="tid")
|
322
|
+
|
323
|
+
class DummyResp7:
|
324
|
+
"""dummy response for single paper recommendation with ArXiv and PubMed IDs"""
|
325
|
+
|
326
|
+
def __init__(self, data):
|
327
|
+
"""initialize with data"""
|
328
|
+
self._data = data
|
329
|
+
self.status_code = 200
|
330
|
+
|
331
|
+
def json(self):
|
332
|
+
"""json method to return data"""
|
333
|
+
return self._data
|
334
|
+
|
335
|
+
def raise_for_status(self):
|
336
|
+
"""raise_for_status method to simulate successful response"""
|
337
|
+
return None
|
338
|
+
|
339
|
+
def dummy_get7(*_, **__):
|
340
|
+
"""dummy response for single paper recommendation with ArXiv and PubMed IDs"""
|
341
|
+
return DummyResp7(
|
342
|
+
{
|
343
|
+
"recommendedPapers": [
|
344
|
+
{
|
345
|
+
"paperId": "xY",
|
346
|
+
"title": "TestY",
|
347
|
+
"authors": [{"name": "C", "authorId": "C1"}],
|
348
|
+
"externalIds": {"ArXiv": "ax3", "PubMed": "pm3"},
|
349
|
+
},
|
350
|
+
]
|
351
|
+
}
|
352
|
+
)
|
353
|
+
|
354
|
+
monkeypatch.setattr(requests, "get", dummy_get7)
|
355
|
+
ids_list = sp.process_recommendations()["papers"].get("xY", {}).get("paper_ids")
|
356
|
+
assert ids_list == ["arxiv:ax3", "pubmed:pm3"]
|
357
|
+
|
358
|
+
|
359
|
+
def test_search_helper_create_content_snippet(monkeypatch):
|
360
|
+
"""Test that SearchData._create_content includes snippets appropriately."""
|
361
|
+
sd = SearchData(query="QueryX", limit=3, year="2022", tool_call_id="tid")
|
362
|
+
sd.filtered_papers = {
|
363
|
+
"p1": {"Title": "Title1", "Year": "2021", "Abstract": "First. Second. Third."},
|
364
|
+
"p2": {"Title": "Title2", "Year": "2020", "Abstract": ""},
|
365
|
+
}
|
366
|
+
# Stub out network fetch/filter to rely on preset filtered_papers
|
367
|
+
monkeypatch.setattr(SearchData, "_fetch_papers", lambda self: None)
|
368
|
+
monkeypatch.setattr(SearchData, "_filter_papers", lambda self: None)
|
369
|
+
results = sd.process_search()
|
370
|
+
content = results["content"]
|
371
|
+
assert "1. Title1 (2021)" in content
|
372
|
+
assert "Abstract snippet: First. Second." in content
|
373
|
+
assert "2. Title2 (2020)" in content
|
374
|
+
# Only one snippet present
|
375
|
+
assert content.count("Abstract snippet:") == 1
|
376
|
+
|
377
|
+
|
378
|
+
def test_single_helper_create_content_snippet(monkeypatch):
|
379
|
+
"""Test that SinglePaperRecData._create_content includes snippets appropriately."""
|
380
|
+
sp = SinglePaperRecData(paper_id="pid", limit=2, year=None, tool_call_id="tid")
|
381
|
+
sp.filtered_papers = {
|
382
|
+
"x1": {"Title": "STitle1", "Year": "2019", "Abstract": "SOne. STwo. SThree."},
|
383
|
+
"x2": {"Title": "STitle2", "Year": "2018", "Abstract": ""},
|
384
|
+
}
|
385
|
+
# Stub out network fetch/filter to rely on preset filtered_papers
|
386
|
+
monkeypatch.setattr(SinglePaperRecData, "_fetch_recommendations", lambda self: None)
|
387
|
+
monkeypatch.setattr(SinglePaperRecData, "_filter_papers", lambda self: None)
|
388
|
+
results = sp.process_recommendations()
|
389
|
+
content = results["content"]
|
390
|
+
assert "1. STitle1 (2019)" in content
|
391
|
+
assert "Abstract snippet: SOne. STwo." in content
|
392
|
+
assert "2. STitle2 (2018)" in content
|
393
|
+
assert content.count("Abstract snippet:") == 1
|
394
|
+
|
395
|
+
|
396
|
+
def test_multi_helper_create_content_snippet(monkeypatch):
|
397
|
+
"""Test that MultiPaperRecData._create_content includes snippets appropriately."""
|
398
|
+
mr = MultiPaperRecData(
|
399
|
+
paper_ids=["a", "b"], limit=2, year="2021", tool_call_id="tid"
|
400
|
+
)
|
401
|
+
mr.filtered_papers = {
|
402
|
+
"m1": {"Title": "MTitle1", "Year": "2017", "Abstract": "MOne. MTwo. MThree."},
|
403
|
+
"m2": {"Title": "MTitle2", "Year": "2016", "Abstract": ""},
|
404
|
+
}
|
405
|
+
# Stub out network fetch/filter to rely on preset filtered_papers
|
406
|
+
monkeypatch.setattr(MultiPaperRecData, "_fetch_recommendations", lambda self: None)
|
407
|
+
monkeypatch.setattr(MultiPaperRecData, "_filter_papers", lambda self: None)
|
408
|
+
results = mr.process_recommendations()
|
409
|
+
content = results["content"]
|
410
|
+
assert "1. MTitle1 (2017)" in content
|
411
|
+
assert "Abstract snippet: MOne. MTwo." in content
|
412
|
+
assert "2. MTitle2 (2016)" in content
|
413
|
+
assert content.count("Abstract snippet:") == 1
|