aiagents4pharma 1.31.0__py3-none-any.whl → 1.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/talk2scholars/agents/main_agent.py +4 -3
- aiagents4pharma/talk2scholars/agents/paper_download_agent.py +3 -4
- aiagents4pharma/talk2scholars/agents/pdf_agent.py +6 -7
- aiagents4pharma/talk2scholars/agents/s2_agent.py +23 -20
- aiagents4pharma/talk2scholars/agents/zotero_agent.py +11 -11
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +19 -19
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +20 -15
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +27 -6
- aiagents4pharma/talk2scholars/state/state_talk2scholars.py +7 -7
- aiagents4pharma/talk2scholars/tests/test_main_agent.py +16 -16
- aiagents4pharma/talk2scholars/tests/test_paper_download_agent.py +17 -24
- aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py +152 -135
- aiagents4pharma/talk2scholars/tests/test_pdf_agent.py +9 -16
- aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +790 -218
- aiagents4pharma/talk2scholars/tests/test_s2_agent.py +9 -9
- aiagents4pharma/talk2scholars/tests/test_s2_display.py +8 -8
- aiagents4pharma/talk2scholars/tests/test_s2_query.py +8 -8
- aiagents4pharma/talk2scholars/tests/test_zotero_agent.py +12 -12
- aiagents4pharma/talk2scholars/tests/test_zotero_path.py +11 -12
- aiagents4pharma/talk2scholars/tests/test_zotero_read.py +400 -22
- aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +0 -6
- aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +89 -31
- aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +540 -156
- aiagents4pharma/talk2scholars/tools/s2/__init__.py +4 -4
- aiagents4pharma/talk2scholars/tools/s2/{display_results.py → display_dataframe.py} +19 -21
- aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +71 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +213 -35
- aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +3 -3
- {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.32.0.dist-info}/METADATA +3 -1
- {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.32.0.dist-info}/RECORD +33 -35
- {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.32.0.dist-info}/WHEEL +1 -1
- aiagents4pharma/talk2scholars/tools/paper_download/abstract_downloader.py +0 -45
- aiagents4pharma/talk2scholars/tools/paper_download/arxiv_downloader.py +0 -115
- aiagents4pharma/talk2scholars/tools/s2/query_results.py +0 -61
- {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.32.0.dist-info}/licenses/LICENSE +0 -0
- {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.32.0.dist-info}/top_level.txt +0 -0
@@ -1,154 +1,171 @@
|
|
1
1
|
"""
|
2
2
|
Unit tests for arXiv paper downloading functionality, including:
|
3
|
-
- AbstractPaperDownloader (base class)
|
4
|
-
- ArxivPaperDownloader (arXiv-specific implementation)
|
5
3
|
- download_arxiv_paper tool function.
|
6
4
|
"""
|
7
5
|
|
8
|
-
|
9
|
-
import
|
10
|
-
|
11
|
-
from requests.exceptions import HTTPError
|
12
|
-
from langgraph.types import Command
|
6
|
+
import unittest
|
7
|
+
from unittest.mock import MagicMock, patch
|
8
|
+
|
13
9
|
from langchain_core.messages import ToolMessage
|
14
10
|
|
15
|
-
# Import the classes and function under test
|
16
|
-
from aiagents4pharma.talk2scholars.tools.paper_download.abstract_downloader import (
|
17
|
-
AbstractPaperDownloader,
|
18
|
-
)
|
19
|
-
from aiagents4pharma.talk2scholars.tools.paper_download.arxiv_downloader import (
|
20
|
-
ArxivPaperDownloader,
|
21
|
-
)
|
22
11
|
from aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input import (
|
23
12
|
download_arxiv_paper,
|
24
13
|
)
|
25
14
|
|
26
|
-
@pytest.mark.parametrize("class_obj", [AbstractPaperDownloader])
|
27
|
-
|
28
|
-
def test_abstract_downloader_cannot_be_instantiated(class_obj):
|
29
|
-
"""
|
30
|
-
Validates that AbstractPaperDownloader is indeed abstract and raises TypeError
|
31
|
-
if anyone attempts to instantiate it directly.
|
32
|
-
"""
|
33
|
-
with pytest.raises(TypeError):
|
34
|
-
class_obj()
|
35
|
-
|
36
|
-
|
37
|
-
@pytest.fixture(name="arxiv_downloader_fixture")
|
38
|
-
@pytest.mark.usefixtures("mock_hydra_config_setup")
|
39
|
-
def fixture_arxiv_downloader():
|
40
|
-
"""
|
41
|
-
Provides an ArxivPaperDownloader instance with a mocked Hydra config.
|
42
|
-
"""
|
43
|
-
return ArxivPaperDownloader()
|
44
|
-
|
45
|
-
|
46
|
-
def test_fetch_metadata_success(arxiv_downloader_fixture,):
|
47
|
-
"""
|
48
|
-
Ensures fetch_metadata retrieves XML data correctly, given a successful HTTP response.
|
49
|
-
"""
|
50
|
-
mock_response = MagicMock()
|
51
|
-
mock_response.text = "<xml>Mock ArXiv Metadata</xml>"
|
52
|
-
mock_response.raise_for_status = MagicMock()
|
53
|
-
|
54
|
-
with patch.object(requests, "get", return_value=mock_response) as mock_get:
|
55
|
-
paper_id = "1234.5678"
|
56
|
-
result = arxiv_downloader_fixture.fetch_metadata(paper_id)
|
57
|
-
mock_get.assert_called_once_with(
|
58
|
-
"http://export.arxiv.org/api/query?search_query=id:1234.5678&start=0&max_results=1",
|
59
|
-
timeout=10,
|
60
|
-
)
|
61
|
-
assert result["xml"] == "<xml>Mock ArXiv Metadata</xml>"
|
62
|
-
|
63
15
|
|
64
|
-
|
65
|
-
"""
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
16
|
+
class TestDownloadArxivPaper(unittest.TestCase):
|
17
|
+
"""tests for the download_arxiv_paper tool."""
|
18
|
+
|
19
|
+
@patch(
|
20
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.initialize"
|
21
|
+
)
|
22
|
+
@patch(
|
23
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.compose"
|
24
|
+
)
|
25
|
+
@patch(
|
26
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.requests.get"
|
27
|
+
)
|
28
|
+
def test_download_arxiv_paper_success(
|
29
|
+
self, mock_get, mock_compose, mock_initialize
|
30
|
+
):
|
31
|
+
"""test the download_arxiv_paper tool for successful retrieval of metadata and PDF URL."""
|
32
|
+
# Set up a dummy Hydra config.
|
33
|
+
dummy_cfg = MagicMock()
|
34
|
+
dummy_cfg.tools.download_arxiv_paper.api_url = "http://dummy.arxiv.org/api"
|
35
|
+
dummy_cfg.tools.download_arxiv_paper.request_timeout = 10
|
36
|
+
mock_compose.return_value = dummy_cfg
|
37
|
+
mock_initialize.return_value.__enter__.return_value = None
|
38
|
+
|
39
|
+
# Set up a dummy XML response with a valid entry including a pdf link.
|
40
|
+
arxiv_id = "1234.56789"
|
41
|
+
dummy_xml = f"""<?xml version="1.0" encoding="UTF-8"?>
|
42
|
+
<feed xmlns="http://www.w3.org/2005/Atom">
|
43
|
+
<entry>
|
44
|
+
<title>Sample Paper Title</title>
|
45
|
+
<author>
|
46
|
+
<name>Author One</name>
|
47
|
+
</author>
|
48
|
+
<author>
|
49
|
+
<name>Author Two</name>
|
50
|
+
</author>
|
51
|
+
<summary>This is a sample abstract.</summary>
|
52
|
+
<published>2020-01-01T00:00:00Z</published>
|
53
|
+
<link title="pdf" href="http://arxiv.org/pdf/{arxiv_id}v1"/>
|
54
|
+
</entry>
|
55
|
+
</feed>
|
56
|
+
"""
|
57
|
+
dummy_response = MagicMock()
|
58
|
+
dummy_response.text = dummy_xml
|
59
|
+
dummy_response.raise_for_status = MagicMock()
|
60
|
+
mock_get.return_value = dummy_response
|
61
|
+
|
62
|
+
tool_call_id = "test_tool_id"
|
63
|
+
tool_input = {"arxiv_id": arxiv_id, "tool_call_id": tool_call_id}
|
64
|
+
result = download_arxiv_paper.run(tool_input)
|
65
|
+
update = result.update
|
66
|
+
|
67
|
+
# Check that article_data was correctly set.
|
68
|
+
self.assertIn("article_data", update)
|
69
|
+
self.assertIn(arxiv_id, update["article_data"])
|
70
|
+
metadata = update["article_data"][arxiv_id]
|
71
|
+
self.assertEqual(metadata["Title"], "Sample Paper Title")
|
72
|
+
self.assertEqual(metadata["Authors"], ["Author One", "Author Two"])
|
73
|
+
self.assertEqual(metadata["Abstract"], "This is a sample abstract.")
|
74
|
+
self.assertEqual(metadata["Publication Date"], "2020-01-01T00:00:00Z")
|
75
|
+
self.assertEqual(metadata["URL"], f"http://arxiv.org/pdf/{arxiv_id}v1")
|
76
|
+
self.assertEqual(metadata["pdf_url"], f"http://arxiv.org/pdf/{arxiv_id}v1")
|
77
|
+
self.assertEqual(metadata["filename"], f"{arxiv_id}.pdf")
|
78
|
+
self.assertEqual(metadata["source"], "arxiv")
|
79
|
+
self.assertEqual(metadata["arxiv_id"], arxiv_id)
|
80
|
+
|
81
|
+
# Check that the message content is as expected.
|
82
|
+
messages = update["messages"]
|
83
|
+
self.assertTrue(len(messages) >= 1)
|
84
|
+
self.assertIsInstance(messages[0], ToolMessage)
|
85
|
+
self.assertIn(
|
86
|
+
f"Successfully retrieved metadata and PDF URL for arXiv ID {arxiv_id}",
|
87
|
+
messages[0].content,
|
88
|
+
)
|
74
89
|
|
90
|
+
@patch(
|
91
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.initialize"
|
92
|
+
)
|
93
|
+
@patch(
|
94
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.compose"
|
95
|
+
)
|
96
|
+
@patch(
|
97
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.requests.get"
|
98
|
+
)
|
99
|
+
def test_no_entry_found(self, mock_get, mock_compose, mock_initialize):
|
100
|
+
"""test the download_arxiv_paper tool for no entry found in XML response."""
|
101
|
+
# Dummy config as before.
|
102
|
+
dummy_cfg = MagicMock()
|
103
|
+
dummy_cfg.tools.download_arxiv_paper.api_url = "http://dummy.arxiv.org/api"
|
104
|
+
dummy_cfg.tools.download_arxiv_paper.request_timeout = 10
|
105
|
+
mock_compose.return_value = dummy_cfg
|
106
|
+
mock_initialize.return_value.__enter__.return_value = None
|
107
|
+
|
108
|
+
# Set up XML with no entry element.
|
109
|
+
arxiv_id = "1234.56789"
|
110
|
+
dummy_xml = (
|
111
|
+
"""<?xml version="1.0" encoding="UTF-8"?>"""
|
112
|
+
"""<feed xmlns="http://www.w3.org/2005/Atom"></feed>"""
|
113
|
+
)
|
114
|
+
dummy_response = MagicMock()
|
115
|
+
dummy_response.text = dummy_xml
|
116
|
+
dummy_response.raise_for_status = MagicMock()
|
117
|
+
mock_get.return_value = dummy_response
|
118
|
+
|
119
|
+
tool_call_id = "test_tool_id"
|
120
|
+
tool_input = {"arxiv_id": arxiv_id, "tool_call_id": tool_call_id}
|
121
|
+
with self.assertRaises(ValueError) as context:
|
122
|
+
download_arxiv_paper.run(tool_input)
|
123
|
+
self.assertEqual(
|
124
|
+
str(context.exception), f"No entry found for arXiv ID {arxiv_id}"
|
125
|
+
)
|
75
126
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
127
|
+
@patch(
|
128
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.initialize"
|
129
|
+
)
|
130
|
+
@patch(
|
131
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.compose"
|
132
|
+
)
|
133
|
+
@patch(
|
134
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.requests.get"
|
135
|
+
)
|
136
|
+
def test_no_pdf_url_found(self, mock_get, mock_compose, mock_initialize):
|
137
|
+
"""test the download_arxiv_paper tool for no PDF URL found in XML response."""
|
138
|
+
# Dummy config.
|
139
|
+
dummy_cfg = MagicMock()
|
140
|
+
dummy_cfg.tools.download_arxiv_paper.api_url = "http://dummy.arxiv.org/api"
|
141
|
+
dummy_cfg.tools.download_arxiv_paper.request_timeout = 10
|
142
|
+
mock_compose.return_value = dummy_cfg
|
143
|
+
mock_initialize.return_value.__enter__.return_value = None
|
144
|
+
|
145
|
+
# Set up XML with an entry that does not contain a pdf link.
|
146
|
+
arxiv_id = "1234.56789"
|
147
|
+
dummy_xml = """<?xml version="1.0" encoding="UTF-8"?>
|
83
148
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
84
149
|
<entry>
|
85
|
-
<
|
150
|
+
<title>Sample Paper Title</title>
|
151
|
+
<author>
|
152
|
+
<name>Author One</name>
|
153
|
+
</author>
|
154
|
+
<summary>This is a sample abstract.</summary>
|
155
|
+
<published>2020-01-01T00:00:00Z</published>
|
156
|
+
<!-- Missing pdf link -->
|
86
157
|
</entry>
|
87
158
|
</feed>
|
88
159
|
"""
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
with
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
mock_get.assert_called_once_with(
|
102
|
-
"http://test.arxiv.org/pdf/1234.5678v1.pdf",
|
103
|
-
stream=True,
|
104
|
-
timeout=10,
|
105
|
-
)
|
106
|
-
|
107
|
-
|
108
|
-
def test_download_pdf_no_pdf_link(arxiv_downloader_fixture):
|
109
|
-
"""
|
110
|
-
Ensures a RuntimeError is raised if no <link> with title="pdf" is found in the XML.
|
111
|
-
"""
|
112
|
-
mock_metadata = {"xml": "<feed></feed>"}
|
113
|
-
|
114
|
-
with patch.object(arxiv_downloader_fixture, "fetch_metadata", return_value=mock_metadata):
|
115
|
-
with pytest.raises(RuntimeError, match="Failed to download PDF"):
|
116
|
-
arxiv_downloader_fixture.download_pdf("1234.5678")
|
117
|
-
|
118
|
-
|
119
|
-
def test_download_arxiv_paper_tool_success(arxiv_downloader_fixture):
|
120
|
-
"""
|
121
|
-
Validates download_arxiv_paper orchestrates the ArxivPaperDownloader correctly,
|
122
|
-
returning a Command with PDF data and success messages.
|
123
|
-
"""
|
124
|
-
mock_metadata = {"xml": "<mockxml></mockxml>"}
|
125
|
-
mock_pdf_response = {
|
126
|
-
"pdf_object": b"FAKE_PDF_CONTENT",
|
127
|
-
"pdf_url": "http://test.arxiv.org/mock.pdf",
|
128
|
-
"arxiv_id": "9999.8888",
|
129
|
-
}
|
130
|
-
|
131
|
-
with patch(
|
132
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input."
|
133
|
-
"ArxivPaperDownloader",
|
134
|
-
return_value=arxiv_downloader_fixture,
|
135
|
-
):
|
136
|
-
with patch.object(arxiv_downloader_fixture, "fetch_metadata", return_value=mock_metadata):
|
137
|
-
with patch.object(
|
138
|
-
arxiv_downloader_fixture,
|
139
|
-
"download_pdf",
|
140
|
-
return_value=mock_pdf_response,
|
141
|
-
):
|
142
|
-
command_result = download_arxiv_paper.invoke(
|
143
|
-
{"arxiv_id": "9999.8888", "tool_call_id": "test_tool_call"}
|
144
|
-
)
|
145
|
-
|
146
|
-
assert isinstance(command_result, Command)
|
147
|
-
assert "pdf_data" in command_result.update
|
148
|
-
assert command_result.update["pdf_data"] == mock_pdf_response
|
149
|
-
|
150
|
-
messages = command_result.update.get("messages", [])
|
151
|
-
assert len(messages) == 1
|
152
|
-
assert isinstance(messages[0], ToolMessage)
|
153
|
-
assert "Successfully downloaded PDF" in messages[0].content
|
154
|
-
assert "9999.8888" in messages[0].content
|
160
|
+
dummy_response = MagicMock()
|
161
|
+
dummy_response.text = dummy_xml
|
162
|
+
dummy_response.raise_for_status = MagicMock()
|
163
|
+
mock_get.return_value = dummy_response
|
164
|
+
|
165
|
+
tool_call_id = "test_tool_id"
|
166
|
+
tool_input = {"arxiv_id": arxiv_id, "tool_call_id": tool_call_id}
|
167
|
+
with self.assertRaises(RuntimeError) as context:
|
168
|
+
download_arxiv_paper.run(tool_input)
|
169
|
+
self.assertEqual(
|
170
|
+
str(context.exception), f"Could not find PDF URL for arXiv ID {arxiv_id}"
|
171
|
+
)
|
@@ -27,17 +27,13 @@ def mock_tools_fixture():
|
|
27
27
|
"""Mock PDF agent tools to prevent execution of real API calls."""
|
28
28
|
with (
|
29
29
|
mock.patch(
|
30
|
-
"aiagents4pharma.talk2scholars.agents.pdf_agent.
|
31
|
-
) as
|
32
|
-
mock.patch(
|
33
|
-
"aiagents4pharma.talk2scholars.agents.pdf_agent.query_results"
|
34
|
-
) as mock_query_results,
|
30
|
+
"aiagents4pharma.talk2scholars.agents.pdf_agent.question_and_answer"
|
31
|
+
) as mock_question_and_answer,
|
35
32
|
):
|
36
|
-
|
33
|
+
mock_question_and_answer.return_value = {
|
37
34
|
"result": "Mock Question and Answer Result"
|
38
35
|
}
|
39
|
-
|
40
|
-
yield [mock_question_and_answer_tool, mock_query_results]
|
36
|
+
yield [mock_question_and_answer]
|
41
37
|
|
42
38
|
|
43
39
|
@pytest.fixture
|
@@ -73,10 +69,8 @@ def test_pdf_agent_invocation(mock_llm):
|
|
73
69
|
mock_create.return_value = mock_agent
|
74
70
|
# Simulate a response from the PDF agent.
|
75
71
|
mock_agent.invoke.return_value = {
|
76
|
-
"messages": [
|
77
|
-
|
78
|
-
],
|
79
|
-
"pdf_data": {"page": 1, "text": "Sample PDF text"},
|
72
|
+
"messages": [AIMessage(content="PDF content extracted successfully")],
|
73
|
+
"article_data": {"page": 1, "text": "Sample PDF text"},
|
80
74
|
}
|
81
75
|
app = get_app(thread_id, mock_llm)
|
82
76
|
result = app.invoke(
|
@@ -90,8 +84,8 @@ def test_pdf_agent_invocation(mock_llm):
|
|
90
84
|
},
|
91
85
|
)
|
92
86
|
assert "messages" in result
|
93
|
-
assert "
|
94
|
-
assert result["
|
87
|
+
assert "article_data" in result
|
88
|
+
assert result["article_data"]["page"] == 1
|
95
89
|
|
96
90
|
|
97
91
|
def test_pdf_agent_tools_assignment(request, mock_llm):
|
@@ -109,12 +103,11 @@ def test_pdf_agent_tools_assignment(request, mock_llm):
|
|
109
103
|
mock_agent = mock.Mock()
|
110
104
|
mock_create.return_value = mock_agent
|
111
105
|
mock_tool_instance = mock.Mock()
|
112
|
-
# For the PDF agent, we expect two tools: question_and_answer_tool and query_results.
|
113
106
|
mock_tool_instance.tools = mock_tools
|
114
107
|
mock_toolnode.return_value = mock_tool_instance
|
115
108
|
get_app(thread_id, mock_llm)
|
116
109
|
assert mock_toolnode.called
|
117
|
-
assert len(mock_tool_instance.tools) ==
|
110
|
+
assert len(mock_tool_instance.tools) == 1
|
118
111
|
|
119
112
|
|
120
113
|
def test_pdf_agent_hydra_failure(mock_llm):
|