aiagents4pharma 1.31.0__py3-none-any.whl → 1.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. aiagents4pharma/talk2scholars/agents/main_agent.py +4 -3
  2. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +3 -4
  3. aiagents4pharma/talk2scholars/agents/pdf_agent.py +6 -7
  4. aiagents4pharma/talk2scholars/agents/s2_agent.py +23 -20
  5. aiagents4pharma/talk2scholars/agents/zotero_agent.py +11 -11
  6. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +19 -19
  7. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +20 -15
  8. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +27 -6
  9. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +7 -7
  10. aiagents4pharma/talk2scholars/tests/test_main_agent.py +16 -16
  11. aiagents4pharma/talk2scholars/tests/test_paper_download_agent.py +17 -24
  12. aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py +152 -135
  13. aiagents4pharma/talk2scholars/tests/test_pdf_agent.py +9 -16
  14. aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +790 -218
  15. aiagents4pharma/talk2scholars/tests/test_s2_agent.py +9 -9
  16. aiagents4pharma/talk2scholars/tests/test_s2_display.py +8 -8
  17. aiagents4pharma/talk2scholars/tests/test_s2_query.py +8 -8
  18. aiagents4pharma/talk2scholars/tests/test_zotero_agent.py +12 -12
  19. aiagents4pharma/talk2scholars/tests/test_zotero_path.py +11 -12
  20. aiagents4pharma/talk2scholars/tests/test_zotero_read.py +400 -22
  21. aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +0 -6
  22. aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +89 -31
  23. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +540 -156
  24. aiagents4pharma/talk2scholars/tools/s2/__init__.py +4 -4
  25. aiagents4pharma/talk2scholars/tools/s2/{display_results.py → display_dataframe.py} +19 -21
  26. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +71 -0
  27. aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +213 -35
  28. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +3 -3
  29. {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.32.0.dist-info}/METADATA +3 -1
  30. {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.32.0.dist-info}/RECORD +33 -35
  31. {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.32.0.dist-info}/WHEEL +1 -1
  32. aiagents4pharma/talk2scholars/tools/paper_download/abstract_downloader.py +0 -45
  33. aiagents4pharma/talk2scholars/tools/paper_download/arxiv_downloader.py +0 -115
  34. aiagents4pharma/talk2scholars/tools/s2/query_results.py +0 -61
  35. {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.32.0.dist-info}/licenses/LICENSE +0 -0
  36. {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.32.0.dist-info}/top_level.txt +0 -0
@@ -1,154 +1,171 @@
1
1
  """
2
2
  Unit tests for arXiv paper downloading functionality, including:
3
- - AbstractPaperDownloader (base class)
4
- - ArxivPaperDownloader (arXiv-specific implementation)
5
3
  - download_arxiv_paper tool function.
6
4
  """
7
5
 
8
- from unittest.mock import patch, MagicMock
9
- import pytest
10
- import requests
11
- from requests.exceptions import HTTPError
12
- from langgraph.types import Command
6
+ import unittest
7
+ from unittest.mock import MagicMock, patch
8
+
13
9
  from langchain_core.messages import ToolMessage
14
10
 
15
- # Import the classes and function under test
16
- from aiagents4pharma.talk2scholars.tools.paper_download.abstract_downloader import (
17
- AbstractPaperDownloader,
18
- )
19
- from aiagents4pharma.talk2scholars.tools.paper_download.arxiv_downloader import (
20
- ArxivPaperDownloader,
21
- )
22
11
  from aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input import (
23
12
  download_arxiv_paper,
24
13
  )
25
14
 
26
- @pytest.mark.parametrize("class_obj", [AbstractPaperDownloader])
27
-
28
- def test_abstract_downloader_cannot_be_instantiated(class_obj):
29
- """
30
- Validates that AbstractPaperDownloader is indeed abstract and raises TypeError
31
- if anyone attempts to instantiate it directly.
32
- """
33
- with pytest.raises(TypeError):
34
- class_obj()
35
-
36
-
37
- @pytest.fixture(name="arxiv_downloader_fixture")
38
- @pytest.mark.usefixtures("mock_hydra_config_setup")
39
- def fixture_arxiv_downloader():
40
- """
41
- Provides an ArxivPaperDownloader instance with a mocked Hydra config.
42
- """
43
- return ArxivPaperDownloader()
44
-
45
-
46
- def test_fetch_metadata_success(arxiv_downloader_fixture,):
47
- """
48
- Ensures fetch_metadata retrieves XML data correctly, given a successful HTTP response.
49
- """
50
- mock_response = MagicMock()
51
- mock_response.text = "<xml>Mock ArXiv Metadata</xml>"
52
- mock_response.raise_for_status = MagicMock()
53
-
54
- with patch.object(requests, "get", return_value=mock_response) as mock_get:
55
- paper_id = "1234.5678"
56
- result = arxiv_downloader_fixture.fetch_metadata(paper_id)
57
- mock_get.assert_called_once_with(
58
- "http://export.arxiv.org/api/query?search_query=id:1234.5678&start=0&max_results=1",
59
- timeout=10,
60
- )
61
- assert result["xml"] == "<xml>Mock ArXiv Metadata</xml>"
62
-
63
15
 
64
- def test_fetch_metadata_http_error(arxiv_downloader_fixture):
65
- """
66
- Validates that fetch_metadata raises HTTPError when the response indicates a failure.
67
- """
68
- mock_response = MagicMock()
69
- mock_response.raise_for_status.side_effect = HTTPError("Mocked HTTP failure")
70
-
71
- with patch.object(requests, "get", return_value=mock_response):
72
- with pytest.raises(HTTPError):
73
- arxiv_downloader_fixture.fetch_metadata("invalid_id")
16
+ class TestDownloadArxivPaper(unittest.TestCase):
17
+ """tests for the download_arxiv_paper tool."""
18
+
19
+ @patch(
20
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.initialize"
21
+ )
22
+ @patch(
23
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.compose"
24
+ )
25
+ @patch(
26
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.requests.get"
27
+ )
28
+ def test_download_arxiv_paper_success(
29
+ self, mock_get, mock_compose, mock_initialize
30
+ ):
31
+ """test the download_arxiv_paper tool for successful retrieval of metadata and PDF URL."""
32
+ # Set up a dummy Hydra config.
33
+ dummy_cfg = MagicMock()
34
+ dummy_cfg.tools.download_arxiv_paper.api_url = "http://dummy.arxiv.org/api"
35
+ dummy_cfg.tools.download_arxiv_paper.request_timeout = 10
36
+ mock_compose.return_value = dummy_cfg
37
+ mock_initialize.return_value.__enter__.return_value = None
38
+
39
+ # Set up a dummy XML response with a valid entry including a pdf link.
40
+ arxiv_id = "1234.56789"
41
+ dummy_xml = f"""<?xml version="1.0" encoding="UTF-8"?>
42
+ <feed xmlns="http://www.w3.org/2005/Atom">
43
+ <entry>
44
+ <title>Sample Paper Title</title>
45
+ <author>
46
+ <name>Author One</name>
47
+ </author>
48
+ <author>
49
+ <name>Author Two</name>
50
+ </author>
51
+ <summary>This is a sample abstract.</summary>
52
+ <published>2020-01-01T00:00:00Z</published>
53
+ <link title="pdf" href="http://arxiv.org/pdf/{arxiv_id}v1"/>
54
+ </entry>
55
+ </feed>
56
+ """
57
+ dummy_response = MagicMock()
58
+ dummy_response.text = dummy_xml
59
+ dummy_response.raise_for_status = MagicMock()
60
+ mock_get.return_value = dummy_response
61
+
62
+ tool_call_id = "test_tool_id"
63
+ tool_input = {"arxiv_id": arxiv_id, "tool_call_id": tool_call_id}
64
+ result = download_arxiv_paper.run(tool_input)
65
+ update = result.update
66
+
67
+ # Check that article_data was correctly set.
68
+ self.assertIn("article_data", update)
69
+ self.assertIn(arxiv_id, update["article_data"])
70
+ metadata = update["article_data"][arxiv_id]
71
+ self.assertEqual(metadata["Title"], "Sample Paper Title")
72
+ self.assertEqual(metadata["Authors"], ["Author One", "Author Two"])
73
+ self.assertEqual(metadata["Abstract"], "This is a sample abstract.")
74
+ self.assertEqual(metadata["Publication Date"], "2020-01-01T00:00:00Z")
75
+ self.assertEqual(metadata["URL"], f"http://arxiv.org/pdf/{arxiv_id}v1")
76
+ self.assertEqual(metadata["pdf_url"], f"http://arxiv.org/pdf/{arxiv_id}v1")
77
+ self.assertEqual(metadata["filename"], f"{arxiv_id}.pdf")
78
+ self.assertEqual(metadata["source"], "arxiv")
79
+ self.assertEqual(metadata["arxiv_id"], arxiv_id)
80
+
81
+ # Check that the message content is as expected.
82
+ messages = update["messages"]
83
+ self.assertTrue(len(messages) >= 1)
84
+ self.assertIsInstance(messages[0], ToolMessage)
85
+ self.assertIn(
86
+ f"Successfully retrieved metadata and PDF URL for arXiv ID {arxiv_id}",
87
+ messages[0].content,
88
+ )
74
89
 
90
+ @patch(
91
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.initialize"
92
+ )
93
+ @patch(
94
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.compose"
95
+ )
96
+ @patch(
97
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.requests.get"
98
+ )
99
+ def test_no_entry_found(self, mock_get, mock_compose, mock_initialize):
100
+ """test the download_arxiv_paper tool for no entry found in XML response."""
101
+ # Dummy config as before.
102
+ dummy_cfg = MagicMock()
103
+ dummy_cfg.tools.download_arxiv_paper.api_url = "http://dummy.arxiv.org/api"
104
+ dummy_cfg.tools.download_arxiv_paper.request_timeout = 10
105
+ mock_compose.return_value = dummy_cfg
106
+ mock_initialize.return_value.__enter__.return_value = None
107
+
108
+ # Set up XML with no entry element.
109
+ arxiv_id = "1234.56789"
110
+ dummy_xml = (
111
+ """<?xml version="1.0" encoding="UTF-8"?>"""
112
+ """<feed xmlns="http://www.w3.org/2005/Atom"></feed>"""
113
+ )
114
+ dummy_response = MagicMock()
115
+ dummy_response.text = dummy_xml
116
+ dummy_response.raise_for_status = MagicMock()
117
+ mock_get.return_value = dummy_response
118
+
119
+ tool_call_id = "test_tool_id"
120
+ tool_input = {"arxiv_id": arxiv_id, "tool_call_id": tool_call_id}
121
+ with self.assertRaises(ValueError) as context:
122
+ download_arxiv_paper.run(tool_input)
123
+ self.assertEqual(
124
+ str(context.exception), f"No entry found for arXiv ID {arxiv_id}"
125
+ )
75
126
 
76
- def test_download_pdf_success(arxiv_downloader_fixture):
77
- """
78
- Tests that download_pdf fetches the PDF link from metadata and successfully
79
- retrieves the binary content.
80
- """
81
- mock_metadata = {
82
- "xml": """
127
+ @patch(
128
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.initialize"
129
+ )
130
+ @patch(
131
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.compose"
132
+ )
133
+ @patch(
134
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.requests.get"
135
+ )
136
+ def test_no_pdf_url_found(self, mock_get, mock_compose, mock_initialize):
137
+ """test the download_arxiv_paper tool for no PDF URL found in XML response."""
138
+ # Dummy config.
139
+ dummy_cfg = MagicMock()
140
+ dummy_cfg.tools.download_arxiv_paper.api_url = "http://dummy.arxiv.org/api"
141
+ dummy_cfg.tools.download_arxiv_paper.request_timeout = 10
142
+ mock_compose.return_value = dummy_cfg
143
+ mock_initialize.return_value.__enter__.return_value = None
144
+
145
+ # Set up XML with an entry that does not contain a pdf link.
146
+ arxiv_id = "1234.56789"
147
+ dummy_xml = """<?xml version="1.0" encoding="UTF-8"?>
83
148
  <feed xmlns="http://www.w3.org/2005/Atom">
84
149
  <entry>
85
- <link title="pdf" href="http://test.arxiv.org/pdf/1234.5678v1.pdf"/>
150
+ <title>Sample Paper Title</title>
151
+ <author>
152
+ <name>Author One</name>
153
+ </author>
154
+ <summary>This is a sample abstract.</summary>
155
+ <published>2020-01-01T00:00:00Z</published>
156
+ <!-- Missing pdf link -->
86
157
  </entry>
87
158
  </feed>
88
159
  """
89
- }
90
-
91
- mock_pdf_response = MagicMock()
92
- mock_pdf_response.raise_for_status = MagicMock()
93
- mock_pdf_response.iter_content = lambda chunk_size: [b"FAKE_PDF_CONTENT"]
94
-
95
- with patch.object(arxiv_downloader_fixture, "fetch_metadata", return_value=mock_metadata):
96
- with patch.object(requests, "get", return_value=mock_pdf_response) as mock_get:
97
- result = arxiv_downloader_fixture.download_pdf("1234.5678")
98
- assert result["pdf_object"] == b"FAKE_PDF_CONTENT"
99
- assert result["pdf_url"] == "http://test.arxiv.org/pdf/1234.5678v1.pdf"
100
- assert result["arxiv_id"] == "1234.5678"
101
- mock_get.assert_called_once_with(
102
- "http://test.arxiv.org/pdf/1234.5678v1.pdf",
103
- stream=True,
104
- timeout=10,
105
- )
106
-
107
-
108
- def test_download_pdf_no_pdf_link(arxiv_downloader_fixture):
109
- """
110
- Ensures a RuntimeError is raised if no <link> with title="pdf" is found in the XML.
111
- """
112
- mock_metadata = {"xml": "<feed></feed>"}
113
-
114
- with patch.object(arxiv_downloader_fixture, "fetch_metadata", return_value=mock_metadata):
115
- with pytest.raises(RuntimeError, match="Failed to download PDF"):
116
- arxiv_downloader_fixture.download_pdf("1234.5678")
117
-
118
-
119
- def test_download_arxiv_paper_tool_success(arxiv_downloader_fixture):
120
- """
121
- Validates download_arxiv_paper orchestrates the ArxivPaperDownloader correctly,
122
- returning a Command with PDF data and success messages.
123
- """
124
- mock_metadata = {"xml": "<mockxml></mockxml>"}
125
- mock_pdf_response = {
126
- "pdf_object": b"FAKE_PDF_CONTENT",
127
- "pdf_url": "http://test.arxiv.org/mock.pdf",
128
- "arxiv_id": "9999.8888",
129
- }
130
-
131
- with patch(
132
- "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input."
133
- "ArxivPaperDownloader",
134
- return_value=arxiv_downloader_fixture,
135
- ):
136
- with patch.object(arxiv_downloader_fixture, "fetch_metadata", return_value=mock_metadata):
137
- with patch.object(
138
- arxiv_downloader_fixture,
139
- "download_pdf",
140
- return_value=mock_pdf_response,
141
- ):
142
- command_result = download_arxiv_paper.invoke(
143
- {"arxiv_id": "9999.8888", "tool_call_id": "test_tool_call"}
144
- )
145
-
146
- assert isinstance(command_result, Command)
147
- assert "pdf_data" in command_result.update
148
- assert command_result.update["pdf_data"] == mock_pdf_response
149
-
150
- messages = command_result.update.get("messages", [])
151
- assert len(messages) == 1
152
- assert isinstance(messages[0], ToolMessage)
153
- assert "Successfully downloaded PDF" in messages[0].content
154
- assert "9999.8888" in messages[0].content
160
+ dummy_response = MagicMock()
161
+ dummy_response.text = dummy_xml
162
+ dummy_response.raise_for_status = MagicMock()
163
+ mock_get.return_value = dummy_response
164
+
165
+ tool_call_id = "test_tool_id"
166
+ tool_input = {"arxiv_id": arxiv_id, "tool_call_id": tool_call_id}
167
+ with self.assertRaises(RuntimeError) as context:
168
+ download_arxiv_paper.run(tool_input)
169
+ self.assertEqual(
170
+ str(context.exception), f"Could not find PDF URL for arXiv ID {arxiv_id}"
171
+ )
@@ -27,17 +27,13 @@ def mock_tools_fixture():
27
27
  """Mock PDF agent tools to prevent execution of real API calls."""
28
28
  with (
29
29
  mock.patch(
30
- "aiagents4pharma.talk2scholars.agents.pdf_agent.question_and_answer_tool"
31
- ) as mock_question_and_answer_tool,
32
- mock.patch(
33
- "aiagents4pharma.talk2scholars.agents.pdf_agent.query_results"
34
- ) as mock_query_results,
30
+ "aiagents4pharma.talk2scholars.agents.pdf_agent.question_and_answer"
31
+ ) as mock_question_and_answer,
35
32
  ):
36
- mock_question_and_answer_tool.return_value = {
33
+ mock_question_and_answer.return_value = {
37
34
  "result": "Mock Question and Answer Result"
38
35
  }
39
- mock_query_results.return_value = {"result": "Mock Query Result"}
40
- yield [mock_question_and_answer_tool, mock_query_results]
36
+ yield [mock_question_and_answer]
41
37
 
42
38
 
43
39
  @pytest.fixture
@@ -73,10 +69,8 @@ def test_pdf_agent_invocation(mock_llm):
73
69
  mock_create.return_value = mock_agent
74
70
  # Simulate a response from the PDF agent.
75
71
  mock_agent.invoke.return_value = {
76
- "messages": [
77
- AIMessage(content="PDF content extracted successfully")
78
- ],
79
- "pdf_data": {"page": 1, "text": "Sample PDF text"},
72
+ "messages": [AIMessage(content="PDF content extracted successfully")],
73
+ "article_data": {"page": 1, "text": "Sample PDF text"},
80
74
  }
81
75
  app = get_app(thread_id, mock_llm)
82
76
  result = app.invoke(
@@ -90,8 +84,8 @@ def test_pdf_agent_invocation(mock_llm):
90
84
  },
91
85
  )
92
86
  assert "messages" in result
93
- assert "pdf_data" in result
94
- assert result["pdf_data"]["page"] == 1
87
+ assert "article_data" in result
88
+ assert result["article_data"]["page"] == 1
95
89
 
96
90
 
97
91
  def test_pdf_agent_tools_assignment(request, mock_llm):
@@ -109,12 +103,11 @@ def test_pdf_agent_tools_assignment(request, mock_llm):
109
103
  mock_agent = mock.Mock()
110
104
  mock_create.return_value = mock_agent
111
105
  mock_tool_instance = mock.Mock()
112
- # For the PDF agent, we expect two tools: question_and_answer_tool and query_results.
113
106
  mock_tool_instance.tools = mock_tools
114
107
  mock_toolnode.return_value = mock_tool_instance
115
108
  get_app(thread_id, mock_llm)
116
109
  assert mock_toolnode.called
117
- assert len(mock_tool_instance.tools) == 2
110
+ assert len(mock_tool_instance.tools) == 1
118
111
 
119
112
 
120
113
  def test_pdf_agent_hydra_failure(mock_llm):