aiagents4pharma 1.41.0__py3-none-any.whl → 1.43.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +1 -1
  2. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +37 -0
  3. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +3 -0
  4. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +3 -0
  5. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +6 -0
  6. aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +5 -0
  7. aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +752 -350
  8. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +7 -4
  9. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +49 -95
  10. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml +15 -1
  11. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +16 -2
  12. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +40 -5
  13. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +15 -5
  14. aiagents4pharma/talk2scholars/configs/config.yaml +1 -3
  15. aiagents4pharma/talk2scholars/configs/tools/paper_download/default.yaml +124 -0
  16. aiagents4pharma/talk2scholars/tests/test_arxiv_downloader.py +478 -0
  17. aiagents4pharma/talk2scholars/tests/test_base_paper_downloader.py +620 -0
  18. aiagents4pharma/talk2scholars/tests/test_biorxiv_downloader.py +697 -0
  19. aiagents4pharma/talk2scholars/tests/test_medrxiv_downloader.py +534 -0
  20. aiagents4pharma/talk2scholars/tests/test_paper_download_agent.py +22 -12
  21. aiagents4pharma/talk2scholars/tests/test_paper_downloader.py +545 -0
  22. aiagents4pharma/talk2scholars/tests/test_pubmed_downloader.py +1067 -0
  23. aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +2 -4
  24. aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +457 -0
  25. aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +20 -0
  26. aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +209 -0
  27. aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +343 -0
  28. aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +321 -0
  29. aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +198 -0
  30. aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +337 -0
  31. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +97 -45
  32. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +47 -29
  33. {aiagents4pharma-1.41.0.dist-info → aiagents4pharma-1.43.0.dist-info}/METADATA +30 -14
  34. {aiagents4pharma-1.41.0.dist-info → aiagents4pharma-1.43.0.dist-info}/RECORD +38 -30
  35. aiagents4pharma/talk2scholars/configs/tools/download_arxiv_paper/default.yaml +0 -4
  36. aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/__init__.py +0 -3
  37. aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/default.yaml +0 -2
  38. aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/__init__.py +0 -3
  39. aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/default.yaml +0 -2
  40. aiagents4pharma/talk2scholars/tests/test_paper_download_biorxiv.py +0 -151
  41. aiagents4pharma/talk2scholars/tests/test_paper_download_medrxiv.py +0 -151
  42. aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py +0 -249
  43. aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +0 -177
  44. aiagents4pharma/talk2scholars/tools/paper_download/download_biorxiv_input.py +0 -114
  45. aiagents4pharma/talk2scholars/tools/paper_download/download_medrxiv_input.py +0 -114
  46. /aiagents4pharma/talk2scholars/configs/tools/{download_arxiv_paper → paper_download}/__init__.py +0 -0
  47. {aiagents4pharma-1.41.0.dist-info → aiagents4pharma-1.43.0.dist-info}/WHEEL +0 -0
  48. {aiagents4pharma-1.41.0.dist-info → aiagents4pharma-1.43.0.dist-info}/licenses/LICENSE +0 -0
  49. {aiagents4pharma-1.41.0.dist-info → aiagents4pharma-1.43.0.dist-info}/top_level.txt +0 -0
@@ -1,151 +0,0 @@
1
- """
2
- Unit tests for bioRxiv paper downloading functionality, including:
3
- - download_bioRxiv_paper tool function.
4
- """
5
-
6
- import unittest
7
- from unittest.mock import MagicMock, patch
8
- from langchain_core.messages import ToolMessage
9
-
10
- from aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input import (
11
- download_biorxiv_paper,
12
- )
13
-
14
-
15
- class TestDownloadBiorxivPaper(unittest.TestCase):
16
- """Tests for the download_bioRxiv_paper tool."""
17
-
18
- @patch(
19
- "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.initialize"
20
- )
21
- @patch(
22
- "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.compose"
23
- )
24
- @patch(
25
- "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.requests.get"
26
- )
27
- def test_download_biorxiv_paper_success(self, mock_get, mock_compose, mock_initialize):
28
- """Test successful metadata and PDF URL retrieval."""
29
- dummy_cfg = MagicMock()
30
- dummy_cfg.tools.download_biorxiv_paper.api_url = "http://dummy.biorxiv.org/api"
31
- dummy_cfg.tools.download_biorxiv_paper.request_timeout = 10
32
- mock_compose.return_value = dummy_cfg
33
- mock_initialize.return_value.__enter__.return_value = None
34
-
35
- doi = "10.1101/2025.05.13.653102"
36
-
37
- dummy_response = MagicMock()
38
- dummy_response.status_code = 200
39
- dummy_response.raise_for_status = MagicMock()
40
- dummy_response.json.return_value = {
41
- "collection": [
42
- {
43
- "title": "Sample BioRxiv Paper",
44
- "authors": "Author One; Author Two",
45
- "abstract": "This is a bioRxiv abstract.",
46
- "date": "2025-04-25",
47
- "doi": doi,
48
- "link": f"https://www.biorxiv.org/content/{doi}.full.pdf"
49
- }
50
- ]
51
- }
52
- mock_get.return_value = dummy_response
53
-
54
- tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
55
- result = download_biorxiv_paper.run(tool_input)
56
- update = result.update
57
-
58
- self.assertIn("article_data", update)
59
- self.assertIn(doi, update["article_data"])
60
- metadata = update["article_data"][doi]
61
- self.assertEqual(metadata["Title"], "Sample BioRxiv Paper")
62
- self.assertEqual(metadata["Authors"], "Author One; Author Two")
63
- self.assertEqual(metadata["Abstract"], "This is a bioRxiv abstract.")
64
- self.assertEqual(metadata["Publication Date"], "2025-04-25")
65
- self.assertEqual(metadata["URL"], f"https://www.biorxiv.org/content/{doi}.full.pdf")
66
- self.assertEqual(metadata["pdf_url"], f"https://www.biorxiv.org/content/{doi}.full.pdf")
67
- self.assertEqual(metadata["filename"], f"{doi.rsplit('/', maxsplit=1)[-1]}.pdf")
68
- self.assertEqual(metadata["source"], "biorxiv")
69
- self.assertEqual(metadata["biorxiv_id"], doi)
70
-
71
- self.assertTrue(len(update["messages"]) >= 1)
72
- self.assertIsInstance(update["messages"][0], ToolMessage)
73
- self.assertIn("Successfully retrieved metadata and PDF URL", update["messages"][0].content)
74
-
75
- @patch(
76
- "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.initialize"
77
- )
78
- @patch(
79
- "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.compose"
80
- )
81
- @patch(
82
- "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.requests.get"
83
- )
84
- def test_no_entry_found(self, mock_get, mock_compose, mock_initialize):
85
- """Test behavior when no 'entry' is in response."""
86
- dummy_cfg = MagicMock()
87
- dummy_cfg.tools.download_biorxiv_paper.api_url = "http://dummy.biorxiv.org/api"
88
- dummy_cfg.tools.download_biorxiv_paper.request_timeout = 10
89
- mock_compose.return_value = dummy_cfg
90
- mock_initialize.return_value.__enter__.return_value = None
91
-
92
- dummy_response = MagicMock()
93
- dummy_response.status_code = 200
94
- dummy_response.raise_for_status = MagicMock()
95
- dummy_response.json.return_value = {} # No entry
96
- mock_get.return_value = dummy_response
97
-
98
- doi = "10.1101/2025.05.13.653102"
99
- tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
100
-
101
- with self.assertRaises(ValueError) as context:
102
- download_biorxiv_paper.run(tool_input)
103
-
104
- self.assertEqual(str(context.exception), f"No metadata found for DOI: {doi}")
105
-
106
- @patch(
107
- "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.initialize"
108
- )
109
- @patch(
110
- "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.compose"
111
- )
112
- @patch(
113
- "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.requests.get"
114
- )
115
- def test_no_pdf_url_found(self, mock_get, mock_compose, mock_initialize):
116
- """Test fallback to DOI-based PDF URL construction when 'link' is missing."""
117
- dummy_cfg = MagicMock()
118
- dummy_cfg.tools.download_biorxiv_paper.api_url = "http://dummy.biorxiv.org/api"
119
- dummy_cfg.tools.download_biorxiv_paper.request_timeout = 10
120
- mock_compose.return_value = dummy_cfg
121
- mock_initialize.return_value.__enter__.return_value = None
122
-
123
- doi = "10.1101/2025.05.13.653102"
124
-
125
- dummy_response = MagicMock()
126
- dummy_response.status_code = 200
127
- dummy_response.raise_for_status = MagicMock()
128
- dummy_response.json.return_value = {
129
- "collection": [
130
- {
131
- "title": "Sample Biorxiv Paper",
132
- "authors": "Author One; Author Two",
133
- "abstract": "This is a BioRxiv abstract.",
134
- "date": "2025-04-25",
135
- "doi": doi
136
- # 'link' is intentionally omitted
137
- }
138
- ]
139
- }
140
- mock_get.return_value = dummy_response
141
-
142
- tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
143
- result = download_biorxiv_paper.run(tool_input)
144
- update = result.update
145
- metadata = update["article_data"][doi]
146
-
147
- # Assert that the PDF URL was constructed from DOI
148
- expected_suffix = doi.rsplit('/', maxsplit=1)[-1]
149
- expected_url = f"https://www.biorxiv.org/content/10.1101/{expected_suffix}.full.pdf"
150
-
151
- self.assertEqual(metadata["pdf_url"], expected_url)
@@ -1,151 +0,0 @@
1
- """
2
- Unit tests for medrXiv paper downloading functionality, including:
3
- - download_medrxiv_paper tool function.
4
- """
5
-
6
- import unittest
7
- from unittest.mock import MagicMock, patch
8
- from langchain_core.messages import ToolMessage
9
-
10
- from aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input import (
11
- download_medrxiv_paper,
12
- )
13
-
14
-
15
- class TestDownloadMedrxivPaper(unittest.TestCase):
16
- """Tests for the download_medrxiv_paper tool."""
17
-
18
- @patch(
19
- "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.initialize"
20
- )
21
- @patch(
22
- "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.compose"
23
- )
24
- @patch(
25
- "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.requests.get"
26
- )
27
- def test_download_medrxiv_paper_success(self, mock_get, mock_compose, mock_initialize):
28
- """Test successful metadata and PDF URL retrieval."""
29
- dummy_cfg = MagicMock()
30
- dummy_cfg.tools.download_medrxiv_paper.api_url = "http://dummy.medrxiv.org/api"
31
- dummy_cfg.tools.download_medrxiv_paper.request_timeout = 10
32
- mock_compose.return_value = dummy_cfg
33
- mock_initialize.return_value.__enter__.return_value = None
34
-
35
- doi = "10.1101/2025.04.25.25326432"
36
-
37
- dummy_response = MagicMock()
38
- dummy_response.status_code = 200
39
- dummy_response.raise_for_status = MagicMock()
40
- dummy_response.json.return_value = {
41
- "collection": [
42
- {
43
- "title": "Sample Medrxiv Paper",
44
- "authors": "Author One; Author Two",
45
- "abstract": "This is a medRxiv abstract.",
46
- "date": "2025-04-25",
47
- "doi": doi,
48
- "link": f"https://www.medrxiv.org/content/{doi}.full.pdf"
49
- }
50
- ]
51
- }
52
- mock_get.return_value = dummy_response
53
-
54
- tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
55
- result = download_medrxiv_paper.run(tool_input)
56
- update = result.update
57
-
58
- self.assertIn("article_data", update)
59
- self.assertIn(doi, update["article_data"])
60
- metadata = update["article_data"][doi]
61
- self.assertEqual(metadata["Title"], "Sample Medrxiv Paper")
62
- self.assertEqual(metadata["Authors"], "Author One; Author Two")
63
- self.assertEqual(metadata["Abstract"], "This is a medRxiv abstract.")
64
- self.assertEqual(metadata["Publication Date"], "2025-04-25")
65
- self.assertEqual(metadata["URL"], f"https://www.medrxiv.org/content/{doi}.full.pdf")
66
- self.assertEqual(metadata["pdf_url"], f"https://www.medrxiv.org/content/{doi}.full.pdf")
67
- self.assertEqual(metadata["filename"], f"{doi.rsplit('/', maxsplit=1)[-1]}.pdf")
68
- self.assertEqual(metadata["source"], "medrxiv")
69
- self.assertEqual(metadata["medrxiv_id"], doi)
70
-
71
- self.assertTrue(len(update["messages"]) >= 1)
72
- self.assertIsInstance(update["messages"][0], ToolMessage)
73
- self.assertIn("Successfully retrieved metadata and PDF URL", update["messages"][0].content)
74
-
75
- @patch(
76
- "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.initialize"
77
- )
78
- @patch(
79
- "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.compose"
80
- )
81
- @patch(
82
- "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.requests.get"
83
- )
84
- def test_no_entry_found(self, mock_get, mock_compose, mock_initialize):
85
- """Test behavior when no 'entry' is in response."""
86
- dummy_cfg = MagicMock()
87
- dummy_cfg.tools.download_medrxiv_paper.api_url = "http://dummy.medrxiv.org/api"
88
- dummy_cfg.tools.download_medrxiv_paper.request_timeout = 10
89
- mock_compose.return_value = dummy_cfg
90
- mock_initialize.return_value.__enter__.return_value = None
91
-
92
- dummy_response = MagicMock()
93
- dummy_response.status_code = 200
94
- dummy_response.raise_for_status = MagicMock()
95
- dummy_response.json.return_value = {} # No entry
96
- mock_get.return_value = dummy_response
97
-
98
- doi = "10.1101/2025.04.25.25326432"
99
- tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
100
-
101
- with self.assertRaises(ValueError) as context:
102
- download_medrxiv_paper.run(tool_input)
103
-
104
- self.assertEqual(str(context.exception), f"No entry found for medRxiv ID {doi}")
105
-
106
- @patch(
107
- "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.initialize"
108
- )
109
- @patch(
110
- "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.compose"
111
- )
112
- @patch(
113
- "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.requests.get"
114
- )
115
- def test_no_pdf_url_found(self, mock_get, mock_compose, mock_initialize):
116
- """Test fallback to DOI-based PDF URL construction when 'link' is missing."""
117
- dummy_cfg = MagicMock()
118
- dummy_cfg.tools.download_medrxiv_paper.api_url = "http://dummy.medrxiv.org/api"
119
- dummy_cfg.tools.download_medrxiv_paper.request_timeout = 10
120
- mock_compose.return_value = dummy_cfg
121
- mock_initialize.return_value.__enter__.return_value = None
122
-
123
- doi = "10.1101/2025.04.25.25326432"
124
-
125
- dummy_response = MagicMock()
126
- dummy_response.status_code = 200
127
- dummy_response.raise_for_status = MagicMock()
128
- dummy_response.json.return_value = {
129
- "collection": [
130
- {
131
- "title": "Sample Medrxiv Paper",
132
- "authors": "Author One; Author Two",
133
- "abstract": "This is a medRxiv abstract.",
134
- "date": "2025-04-25",
135
- "doi": doi
136
- # 'link' is intentionally omitted
137
- }
138
- ]
139
- }
140
- mock_get.return_value = dummy_response
141
-
142
- tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
143
- result = download_medrxiv_paper.run(tool_input)
144
- update = result.update
145
- metadata = update["article_data"][doi]
146
-
147
- # Assert that the PDF URL was constructed from DOI
148
- expected_suffix = doi.rsplit('/', maxsplit=1)[-1]
149
- expected_url = f"https://www.medrxiv.org/content/10.1101/{expected_suffix}.full.pdf"
150
-
151
- self.assertEqual(metadata["pdf_url"], expected_url)
@@ -1,249 +0,0 @@
1
- """
2
- Unit tests for arXiv paper downloading functionality, including:
3
- - download_arxiv_paper tool function.
4
- """
5
-
6
- import unittest
7
- from unittest.mock import MagicMock, patch
8
-
9
- import pytest
10
- from langchain_core.messages import ToolMessage
11
-
12
- from aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input import (
13
- _get_snippet,
14
- download_arxiv_paper,
15
- )
16
-
17
-
18
- class TestDownloadArxivPaper(unittest.TestCase):
19
- """tests for the download_arxiv_paper tool."""
20
-
21
- @patch(
22
- "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.initialize"
23
- )
24
- @patch(
25
- "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.compose"
26
- )
27
- @patch(
28
- "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.requests.get"
29
- )
30
- def test_download_arxiv_paper_success(
31
- self, mock_get, mock_compose, mock_initialize
32
- ):
33
- """test the download_arxiv_paper tool for successful retrieval of metadata and PDF URL."""
34
- # Set up a dummy Hydra config.
35
- dummy_cfg = MagicMock()
36
- dummy_cfg.tools.download_arxiv_paper.api_url = "http://dummy.arxiv.org/api"
37
- dummy_cfg.tools.download_arxiv_paper.request_timeout = 10
38
- mock_compose.return_value = dummy_cfg
39
- mock_initialize.return_value.__enter__.return_value = None
40
-
41
- # Set up a dummy XML response with a valid entry including a pdf link.
42
- arxiv_id = "1234.56789"
43
- dummy_response = MagicMock()
44
- dummy_response.text = (
45
- f"""<?xml version=\"1.0\" encoding=\"UTF-8\"?>
46
- <feed xmlns=\"http://www.w3.org/2005/Atom\">"""
47
- f" <entry>"
48
- f"<title>Sample Paper Title</title>"
49
- f"<author><name>Author One</name></author>"
50
- f"<author><name>Author Two</name></author>"
51
- f"<summary>This is a sample abstract.</summary>"
52
- f"<published>2020-01-01T00:00:00Z</published>"
53
- f'<link title="pdf" href="http://arxiv.org/pdf/{arxiv_id}v1"/>'
54
- f"</entry></feed>"
55
- )
56
- dummy_response.raise_for_status = MagicMock()
57
- mock_get.return_value = dummy_response
58
-
59
- tool_call_id = "test_tool_id"
60
- tool_input = {"arxiv_ids": [arxiv_id], "tool_call_id": tool_call_id}
61
- result = download_arxiv_paper.run(tool_input)
62
- update = result.update
63
-
64
- # Check that article_data was correctly set.
65
- self.assertIn("article_data", update)
66
- self.assertIn(arxiv_id, update["article_data"])
67
- metadata = update["article_data"][arxiv_id]
68
- self.assertEqual(metadata["Title"], "Sample Paper Title")
69
- self.assertEqual(metadata["Authors"], ["Author One", "Author Two"])
70
- self.assertEqual(metadata["Abstract"], "This is a sample abstract.")
71
- self.assertEqual(metadata["Publication Date"], "2020-01-01T00:00:00Z")
72
- self.assertEqual(metadata["URL"], f"http://arxiv.org/pdf/{arxiv_id}v1")
73
- self.assertEqual(metadata["pdf_url"], f"http://arxiv.org/pdf/{arxiv_id}v1")
74
- self.assertEqual(metadata["filename"], f"{arxiv_id}.pdf")
75
- self.assertEqual(metadata["source"], "arxiv")
76
- self.assertEqual(metadata["arxiv_id"], arxiv_id)
77
-
78
- # Check that the message content matches the new summary format
79
- messages = update["messages"]
80
- self.assertEqual(len(messages), 1)
81
- self.assertIsInstance(messages[0], ToolMessage)
82
- content = messages[0].content
83
- # Build expected summary
84
- expected = (
85
- "Download was successful. Papers metadata are attached as an artifact. "
86
- "Here is a summary of the results:\n"
87
- f"Number of papers found: 1\n"
88
- "Top 3 papers:\n"
89
- f"1. Sample Paper Title (2020-01-01T00:00:00Z)\n"
90
- f" View PDF: http://arxiv.org/pdf/{arxiv_id}v1\n"
91
- " Abstract snippet: This is a sample abstract."
92
- )
93
- self.assertEqual(content, expected)
94
-
95
- @patch(
96
- "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.initialize"
97
- )
98
- @patch(
99
- "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.compose"
100
- )
101
- @patch(
102
- "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.requests.get"
103
- )
104
- def test_no_entry_found(self, mock_get, mock_compose, mock_initialize):
105
- """test the download_arxiv_paper tool for no entry found in XML response."""
106
- # Dummy config as before.
107
- dummy_cfg = MagicMock()
108
- dummy_cfg.tools.download_arxiv_paper.api_url = "http://dummy.arxiv.org/api"
109
- dummy_cfg.tools.download_arxiv_paper.request_timeout = 10
110
- mock_compose.return_value = dummy_cfg
111
- mock_initialize.return_value.__enter__.return_value = None
112
-
113
- # Set up XML with no entry element.
114
- arxiv_id = "1234.56789"
115
- dummy_xml = (
116
- """<?xml version="1.0" encoding="UTF-8"?>"""
117
- """<feed xmlns="http://www.w3.org/2005/Atom"></feed>"""
118
- )
119
- dummy_response = MagicMock()
120
- dummy_response.text = dummy_xml
121
- dummy_response.raise_for_status = MagicMock()
122
- mock_get.return_value = dummy_response
123
-
124
- tool_call_id = "test_tool_id"
125
- tool_input = {"arxiv_ids": [arxiv_id], "tool_call_id": tool_call_id}
126
- # No entry found should result in empty article_data and header-only summary
127
- result = download_arxiv_paper.run(tool_input)
128
- update = result.update
129
- self.assertIn("article_data", update)
130
- self.assertEqual(update["article_data"], {})
131
- messages = update.get("messages", [])
132
- self.assertEqual(len(messages), 1)
133
- content = messages[0].content
134
- expected = (
135
- "Download was successful. Papers metadata are attached as an artifact. "
136
- "Here is a summary of the results:\n"
137
- "Number of papers found: 0\n"
138
- "Top 3 papers:\n"
139
- )
140
- self.assertEqual(content, expected)
141
-
142
- @patch(
143
- "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.initialize"
144
- )
145
- @patch(
146
- "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.compose"
147
- )
148
- @patch(
149
- "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.requests.get"
150
- )
151
- def test_no_pdf_url_found(self, mock_get, mock_compose, mock_initialize):
152
- """test the download_arxiv_paper tool for no PDF URL found in XML response."""
153
- # Dummy config.
154
- dummy_cfg = MagicMock()
155
- dummy_cfg.tools.download_arxiv_paper.api_url = "http://dummy.arxiv.org/api"
156
- dummy_cfg.tools.download_arxiv_paper.request_timeout = 10
157
- mock_compose.return_value = dummy_cfg
158
- mock_initialize.return_value.__enter__.return_value = None
159
-
160
- # Set up XML with an entry that does not contain a pdf link.
161
- arxiv_id = "1234.56789"
162
- dummy_xml = """<?xml version="1.0" encoding="UTF-8"?>
163
- <feed xmlns="http://www.w3.org/2005/Atom">
164
- <entry>
165
- <title>Sample Paper Title</title>
166
- <author>
167
- <name>Author One</name>
168
- </author>
169
- <summary>This is a sample abstract.</summary>
170
- <published>2020-01-01T00:00:00Z</published>
171
- <!-- Missing pdf link -->
172
- </entry>
173
- </feed>
174
- """
175
- dummy_response = MagicMock()
176
- dummy_response.text = dummy_xml
177
- dummy_response.raise_for_status = MagicMock()
178
- mock_get.return_value = dummy_response
179
-
180
- tool_call_id = "test_tool_id"
181
- tool_input = {"arxiv_ids": [arxiv_id], "tool_call_id": tool_call_id}
182
- with self.assertRaises(RuntimeError) as context:
183
- download_arxiv_paper.run(tool_input)
184
- self.assertEqual(
185
- str(context.exception), f"Could not find PDF URL for arXiv ID {arxiv_id}"
186
- )
187
-
188
- @patch(
189
- "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.extract_metadata"
190
- )
191
- @patch(
192
- "aiagents4pharma.talk2scholars.tools.paper_download.download_"
193
- "arxiv_input.fetch_arxiv_metadata"
194
- )
195
- @patch(
196
- "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.compose"
197
- )
198
- @patch(
199
- "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.initialize"
200
- )
201
- def test_summary_multiple_papers(
202
- self, mock_initialize, mock_compose, _mock_fetch, mock_extract
203
- ):
204
- """Test summary includes '...and N more papers.' when more than 3 papers."""
205
- # Dummy config
206
- dummy_cfg = MagicMock()
207
- dummy_cfg.tools.download_arxiv_paper.api_url = "http://dummy"
208
- dummy_cfg.tools.download_arxiv_paper.request_timeout = 5
209
- mock_compose.return_value = dummy_cfg
210
- mock_initialize.return_value.__enter__.return_value = None
211
-
212
- # Simulate metadata extraction for multiple papers
213
- def dummy_meta(_entry, _ns, aid):
214
- """dummy metadata extraction function."""
215
- return {
216
- "Title": f"T{aid}",
217
- "Publication Date": "2020-01-01T00:00:00Z",
218
- "URL": f"u{aid}v1",
219
- }
220
-
221
- mock_extract.side_effect = dummy_meta
222
- # Prepare 5 paper IDs
223
- ids = [str(i) for i in range(5)]
224
- tool_input = {"arxiv_ids": ids, "tool_call_id": "tid"}
225
- result = download_arxiv_paper.run(tool_input)
226
- summary = result.update["messages"][0].content
227
- # Should report total count of 5 and list only top 3 without ellipsis
228
- assert "Number of papers found: 5" in summary
229
- assert "Top 3 papers:" in summary
230
- # Entries for first three IDs should include URL and no ellipsis
231
- assert "1. T0 (2020-01-01T00:00:00Z)" in summary
232
- assert " View PDF: u0v1" in summary
233
- assert "3. T2 (2020-01-01T00:00:00Z)" in summary
234
- assert "...and" not in summary
235
-
236
-
237
- @pytest.mark.parametrize(
238
- "input_text,expected",
239
- [
240
- ("", ""),
241
- ("N/A", ""),
242
- ("Just one sentence", "Just one sentence."),
243
- ("First. Second", "First. Second."),
244
- ("Hello. World.", "Hello. World."),
245
- ],
246
- )
247
- def test_get_snippet_various(input_text, expected):
248
- """Test _get_snippet behavior for various abstracts."""
249
- assert _get_snippet(input_text) == expected