aiagents4pharma 1.41.0__py3-none-any.whl → 1.43.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +37 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +6 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +5 -0
- aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +752 -350
- aiagents4pharma/talk2scholars/agents/paper_download_agent.py +7 -4
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +49 -95
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml +15 -1
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +16 -2
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +40 -5
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +15 -5
- aiagents4pharma/talk2scholars/configs/config.yaml +1 -3
- aiagents4pharma/talk2scholars/configs/tools/paper_download/default.yaml +124 -0
- aiagents4pharma/talk2scholars/tests/test_arxiv_downloader.py +478 -0
- aiagents4pharma/talk2scholars/tests/test_base_paper_downloader.py +620 -0
- aiagents4pharma/talk2scholars/tests/test_biorxiv_downloader.py +697 -0
- aiagents4pharma/talk2scholars/tests/test_medrxiv_downloader.py +534 -0
- aiagents4pharma/talk2scholars/tests/test_paper_download_agent.py +22 -12
- aiagents4pharma/talk2scholars/tests/test_paper_downloader.py +545 -0
- aiagents4pharma/talk2scholars/tests/test_pubmed_downloader.py +1067 -0
- aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +2 -4
- aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +457 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +20 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +209 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +343 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +321 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +198 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +337 -0
- aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +97 -45
- aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +47 -29
- {aiagents4pharma-1.41.0.dist-info → aiagents4pharma-1.43.0.dist-info}/METADATA +30 -14
- {aiagents4pharma-1.41.0.dist-info → aiagents4pharma-1.43.0.dist-info}/RECORD +38 -30
- aiagents4pharma/talk2scholars/configs/tools/download_arxiv_paper/default.yaml +0 -4
- aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/__init__.py +0 -3
- aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/default.yaml +0 -2
- aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/__init__.py +0 -3
- aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/default.yaml +0 -2
- aiagents4pharma/talk2scholars/tests/test_paper_download_biorxiv.py +0 -151
- aiagents4pharma/talk2scholars/tests/test_paper_download_medrxiv.py +0 -151
- aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py +0 -249
- aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +0 -177
- aiagents4pharma/talk2scholars/tools/paper_download/download_biorxiv_input.py +0 -114
- aiagents4pharma/talk2scholars/tools/paper_download/download_medrxiv_input.py +0 -114
- /aiagents4pharma/talk2scholars/configs/tools/{download_arxiv_paper → paper_download}/__init__.py +0 -0
- {aiagents4pharma-1.41.0.dist-info → aiagents4pharma-1.43.0.dist-info}/WHEEL +0 -0
- {aiagents4pharma-1.41.0.dist-info → aiagents4pharma-1.43.0.dist-info}/licenses/LICENSE +0 -0
- {aiagents4pharma-1.41.0.dist-info → aiagents4pharma-1.43.0.dist-info}/top_level.txt +0 -0
@@ -1,151 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Unit tests for bioRxiv paper downloading functionality, including:
|
3
|
-
- download_bioRxiv_paper tool function.
|
4
|
-
"""
|
5
|
-
|
6
|
-
import unittest
|
7
|
-
from unittest.mock import MagicMock, patch
|
8
|
-
from langchain_core.messages import ToolMessage
|
9
|
-
|
10
|
-
from aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input import (
|
11
|
-
download_biorxiv_paper,
|
12
|
-
)
|
13
|
-
|
14
|
-
|
15
|
-
class TestDownloadBiorxivPaper(unittest.TestCase):
|
16
|
-
"""Tests for the download_bioRxiv_paper tool."""
|
17
|
-
|
18
|
-
@patch(
|
19
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.initialize"
|
20
|
-
)
|
21
|
-
@patch(
|
22
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.compose"
|
23
|
-
)
|
24
|
-
@patch(
|
25
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.requests.get"
|
26
|
-
)
|
27
|
-
def test_download_biorxiv_paper_success(self, mock_get, mock_compose, mock_initialize):
|
28
|
-
"""Test successful metadata and PDF URL retrieval."""
|
29
|
-
dummy_cfg = MagicMock()
|
30
|
-
dummy_cfg.tools.download_biorxiv_paper.api_url = "http://dummy.biorxiv.org/api"
|
31
|
-
dummy_cfg.tools.download_biorxiv_paper.request_timeout = 10
|
32
|
-
mock_compose.return_value = dummy_cfg
|
33
|
-
mock_initialize.return_value.__enter__.return_value = None
|
34
|
-
|
35
|
-
doi = "10.1101/2025.05.13.653102"
|
36
|
-
|
37
|
-
dummy_response = MagicMock()
|
38
|
-
dummy_response.status_code = 200
|
39
|
-
dummy_response.raise_for_status = MagicMock()
|
40
|
-
dummy_response.json.return_value = {
|
41
|
-
"collection": [
|
42
|
-
{
|
43
|
-
"title": "Sample BioRxiv Paper",
|
44
|
-
"authors": "Author One; Author Two",
|
45
|
-
"abstract": "This is a bioRxiv abstract.",
|
46
|
-
"date": "2025-04-25",
|
47
|
-
"doi": doi,
|
48
|
-
"link": f"https://www.biorxiv.org/content/{doi}.full.pdf"
|
49
|
-
}
|
50
|
-
]
|
51
|
-
}
|
52
|
-
mock_get.return_value = dummy_response
|
53
|
-
|
54
|
-
tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
|
55
|
-
result = download_biorxiv_paper.run(tool_input)
|
56
|
-
update = result.update
|
57
|
-
|
58
|
-
self.assertIn("article_data", update)
|
59
|
-
self.assertIn(doi, update["article_data"])
|
60
|
-
metadata = update["article_data"][doi]
|
61
|
-
self.assertEqual(metadata["Title"], "Sample BioRxiv Paper")
|
62
|
-
self.assertEqual(metadata["Authors"], "Author One; Author Two")
|
63
|
-
self.assertEqual(metadata["Abstract"], "This is a bioRxiv abstract.")
|
64
|
-
self.assertEqual(metadata["Publication Date"], "2025-04-25")
|
65
|
-
self.assertEqual(metadata["URL"], f"https://www.biorxiv.org/content/{doi}.full.pdf")
|
66
|
-
self.assertEqual(metadata["pdf_url"], f"https://www.biorxiv.org/content/{doi}.full.pdf")
|
67
|
-
self.assertEqual(metadata["filename"], f"{doi.rsplit('/', maxsplit=1)[-1]}.pdf")
|
68
|
-
self.assertEqual(metadata["source"], "biorxiv")
|
69
|
-
self.assertEqual(metadata["biorxiv_id"], doi)
|
70
|
-
|
71
|
-
self.assertTrue(len(update["messages"]) >= 1)
|
72
|
-
self.assertIsInstance(update["messages"][0], ToolMessage)
|
73
|
-
self.assertIn("Successfully retrieved metadata and PDF URL", update["messages"][0].content)
|
74
|
-
|
75
|
-
@patch(
|
76
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.initialize"
|
77
|
-
)
|
78
|
-
@patch(
|
79
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.compose"
|
80
|
-
)
|
81
|
-
@patch(
|
82
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.requests.get"
|
83
|
-
)
|
84
|
-
def test_no_entry_found(self, mock_get, mock_compose, mock_initialize):
|
85
|
-
"""Test behavior when no 'entry' is in response."""
|
86
|
-
dummy_cfg = MagicMock()
|
87
|
-
dummy_cfg.tools.download_biorxiv_paper.api_url = "http://dummy.biorxiv.org/api"
|
88
|
-
dummy_cfg.tools.download_biorxiv_paper.request_timeout = 10
|
89
|
-
mock_compose.return_value = dummy_cfg
|
90
|
-
mock_initialize.return_value.__enter__.return_value = None
|
91
|
-
|
92
|
-
dummy_response = MagicMock()
|
93
|
-
dummy_response.status_code = 200
|
94
|
-
dummy_response.raise_for_status = MagicMock()
|
95
|
-
dummy_response.json.return_value = {} # No entry
|
96
|
-
mock_get.return_value = dummy_response
|
97
|
-
|
98
|
-
doi = "10.1101/2025.05.13.653102"
|
99
|
-
tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
|
100
|
-
|
101
|
-
with self.assertRaises(ValueError) as context:
|
102
|
-
download_biorxiv_paper.run(tool_input)
|
103
|
-
|
104
|
-
self.assertEqual(str(context.exception), f"No metadata found for DOI: {doi}")
|
105
|
-
|
106
|
-
@patch(
|
107
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.initialize"
|
108
|
-
)
|
109
|
-
@patch(
|
110
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.compose"
|
111
|
-
)
|
112
|
-
@patch(
|
113
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.requests.get"
|
114
|
-
)
|
115
|
-
def test_no_pdf_url_found(self, mock_get, mock_compose, mock_initialize):
|
116
|
-
"""Test fallback to DOI-based PDF URL construction when 'link' is missing."""
|
117
|
-
dummy_cfg = MagicMock()
|
118
|
-
dummy_cfg.tools.download_biorxiv_paper.api_url = "http://dummy.biorxiv.org/api"
|
119
|
-
dummy_cfg.tools.download_biorxiv_paper.request_timeout = 10
|
120
|
-
mock_compose.return_value = dummy_cfg
|
121
|
-
mock_initialize.return_value.__enter__.return_value = None
|
122
|
-
|
123
|
-
doi = "10.1101/2025.05.13.653102"
|
124
|
-
|
125
|
-
dummy_response = MagicMock()
|
126
|
-
dummy_response.status_code = 200
|
127
|
-
dummy_response.raise_for_status = MagicMock()
|
128
|
-
dummy_response.json.return_value = {
|
129
|
-
"collection": [
|
130
|
-
{
|
131
|
-
"title": "Sample Biorxiv Paper",
|
132
|
-
"authors": "Author One; Author Two",
|
133
|
-
"abstract": "This is a BioRxiv abstract.",
|
134
|
-
"date": "2025-04-25",
|
135
|
-
"doi": doi
|
136
|
-
# 'link' is intentionally omitted
|
137
|
-
}
|
138
|
-
]
|
139
|
-
}
|
140
|
-
mock_get.return_value = dummy_response
|
141
|
-
|
142
|
-
tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
|
143
|
-
result = download_biorxiv_paper.run(tool_input)
|
144
|
-
update = result.update
|
145
|
-
metadata = update["article_data"][doi]
|
146
|
-
|
147
|
-
# Assert that the PDF URL was constructed from DOI
|
148
|
-
expected_suffix = doi.rsplit('/', maxsplit=1)[-1]
|
149
|
-
expected_url = f"https://www.biorxiv.org/content/10.1101/{expected_suffix}.full.pdf"
|
150
|
-
|
151
|
-
self.assertEqual(metadata["pdf_url"], expected_url)
|
@@ -1,151 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Unit tests for medrXiv paper downloading functionality, including:
|
3
|
-
- download_medrxiv_paper tool function.
|
4
|
-
"""
|
5
|
-
|
6
|
-
import unittest
|
7
|
-
from unittest.mock import MagicMock, patch
|
8
|
-
from langchain_core.messages import ToolMessage
|
9
|
-
|
10
|
-
from aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input import (
|
11
|
-
download_medrxiv_paper,
|
12
|
-
)
|
13
|
-
|
14
|
-
|
15
|
-
class TestDownloadMedrxivPaper(unittest.TestCase):
|
16
|
-
"""Tests for the download_medrxiv_paper tool."""
|
17
|
-
|
18
|
-
@patch(
|
19
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.initialize"
|
20
|
-
)
|
21
|
-
@patch(
|
22
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.compose"
|
23
|
-
)
|
24
|
-
@patch(
|
25
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.requests.get"
|
26
|
-
)
|
27
|
-
def test_download_medrxiv_paper_success(self, mock_get, mock_compose, mock_initialize):
|
28
|
-
"""Test successful metadata and PDF URL retrieval."""
|
29
|
-
dummy_cfg = MagicMock()
|
30
|
-
dummy_cfg.tools.download_medrxiv_paper.api_url = "http://dummy.medrxiv.org/api"
|
31
|
-
dummy_cfg.tools.download_medrxiv_paper.request_timeout = 10
|
32
|
-
mock_compose.return_value = dummy_cfg
|
33
|
-
mock_initialize.return_value.__enter__.return_value = None
|
34
|
-
|
35
|
-
doi = "10.1101/2025.04.25.25326432"
|
36
|
-
|
37
|
-
dummy_response = MagicMock()
|
38
|
-
dummy_response.status_code = 200
|
39
|
-
dummy_response.raise_for_status = MagicMock()
|
40
|
-
dummy_response.json.return_value = {
|
41
|
-
"collection": [
|
42
|
-
{
|
43
|
-
"title": "Sample Medrxiv Paper",
|
44
|
-
"authors": "Author One; Author Two",
|
45
|
-
"abstract": "This is a medRxiv abstract.",
|
46
|
-
"date": "2025-04-25",
|
47
|
-
"doi": doi,
|
48
|
-
"link": f"https://www.medrxiv.org/content/{doi}.full.pdf"
|
49
|
-
}
|
50
|
-
]
|
51
|
-
}
|
52
|
-
mock_get.return_value = dummy_response
|
53
|
-
|
54
|
-
tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
|
55
|
-
result = download_medrxiv_paper.run(tool_input)
|
56
|
-
update = result.update
|
57
|
-
|
58
|
-
self.assertIn("article_data", update)
|
59
|
-
self.assertIn(doi, update["article_data"])
|
60
|
-
metadata = update["article_data"][doi]
|
61
|
-
self.assertEqual(metadata["Title"], "Sample Medrxiv Paper")
|
62
|
-
self.assertEqual(metadata["Authors"], "Author One; Author Two")
|
63
|
-
self.assertEqual(metadata["Abstract"], "This is a medRxiv abstract.")
|
64
|
-
self.assertEqual(metadata["Publication Date"], "2025-04-25")
|
65
|
-
self.assertEqual(metadata["URL"], f"https://www.medrxiv.org/content/{doi}.full.pdf")
|
66
|
-
self.assertEqual(metadata["pdf_url"], f"https://www.medrxiv.org/content/{doi}.full.pdf")
|
67
|
-
self.assertEqual(metadata["filename"], f"{doi.rsplit('/', maxsplit=1)[-1]}.pdf")
|
68
|
-
self.assertEqual(metadata["source"], "medrxiv")
|
69
|
-
self.assertEqual(metadata["medrxiv_id"], doi)
|
70
|
-
|
71
|
-
self.assertTrue(len(update["messages"]) >= 1)
|
72
|
-
self.assertIsInstance(update["messages"][0], ToolMessage)
|
73
|
-
self.assertIn("Successfully retrieved metadata and PDF URL", update["messages"][0].content)
|
74
|
-
|
75
|
-
@patch(
|
76
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.initialize"
|
77
|
-
)
|
78
|
-
@patch(
|
79
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.compose"
|
80
|
-
)
|
81
|
-
@patch(
|
82
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.requests.get"
|
83
|
-
)
|
84
|
-
def test_no_entry_found(self, mock_get, mock_compose, mock_initialize):
|
85
|
-
"""Test behavior when no 'entry' is in response."""
|
86
|
-
dummy_cfg = MagicMock()
|
87
|
-
dummy_cfg.tools.download_medrxiv_paper.api_url = "http://dummy.medrxiv.org/api"
|
88
|
-
dummy_cfg.tools.download_medrxiv_paper.request_timeout = 10
|
89
|
-
mock_compose.return_value = dummy_cfg
|
90
|
-
mock_initialize.return_value.__enter__.return_value = None
|
91
|
-
|
92
|
-
dummy_response = MagicMock()
|
93
|
-
dummy_response.status_code = 200
|
94
|
-
dummy_response.raise_for_status = MagicMock()
|
95
|
-
dummy_response.json.return_value = {} # No entry
|
96
|
-
mock_get.return_value = dummy_response
|
97
|
-
|
98
|
-
doi = "10.1101/2025.04.25.25326432"
|
99
|
-
tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
|
100
|
-
|
101
|
-
with self.assertRaises(ValueError) as context:
|
102
|
-
download_medrxiv_paper.run(tool_input)
|
103
|
-
|
104
|
-
self.assertEqual(str(context.exception), f"No entry found for medRxiv ID {doi}")
|
105
|
-
|
106
|
-
@patch(
|
107
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.initialize"
|
108
|
-
)
|
109
|
-
@patch(
|
110
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.compose"
|
111
|
-
)
|
112
|
-
@patch(
|
113
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.requests.get"
|
114
|
-
)
|
115
|
-
def test_no_pdf_url_found(self, mock_get, mock_compose, mock_initialize):
|
116
|
-
"""Test fallback to DOI-based PDF URL construction when 'link' is missing."""
|
117
|
-
dummy_cfg = MagicMock()
|
118
|
-
dummy_cfg.tools.download_medrxiv_paper.api_url = "http://dummy.medrxiv.org/api"
|
119
|
-
dummy_cfg.tools.download_medrxiv_paper.request_timeout = 10
|
120
|
-
mock_compose.return_value = dummy_cfg
|
121
|
-
mock_initialize.return_value.__enter__.return_value = None
|
122
|
-
|
123
|
-
doi = "10.1101/2025.04.25.25326432"
|
124
|
-
|
125
|
-
dummy_response = MagicMock()
|
126
|
-
dummy_response.status_code = 200
|
127
|
-
dummy_response.raise_for_status = MagicMock()
|
128
|
-
dummy_response.json.return_value = {
|
129
|
-
"collection": [
|
130
|
-
{
|
131
|
-
"title": "Sample Medrxiv Paper",
|
132
|
-
"authors": "Author One; Author Two",
|
133
|
-
"abstract": "This is a medRxiv abstract.",
|
134
|
-
"date": "2025-04-25",
|
135
|
-
"doi": doi
|
136
|
-
# 'link' is intentionally omitted
|
137
|
-
}
|
138
|
-
]
|
139
|
-
}
|
140
|
-
mock_get.return_value = dummy_response
|
141
|
-
|
142
|
-
tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
|
143
|
-
result = download_medrxiv_paper.run(tool_input)
|
144
|
-
update = result.update
|
145
|
-
metadata = update["article_data"][doi]
|
146
|
-
|
147
|
-
# Assert that the PDF URL was constructed from DOI
|
148
|
-
expected_suffix = doi.rsplit('/', maxsplit=1)[-1]
|
149
|
-
expected_url = f"https://www.medrxiv.org/content/10.1101/{expected_suffix}.full.pdf"
|
150
|
-
|
151
|
-
self.assertEqual(metadata["pdf_url"], expected_url)
|
@@ -1,249 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Unit tests for arXiv paper downloading functionality, including:
|
3
|
-
- download_arxiv_paper tool function.
|
4
|
-
"""
|
5
|
-
|
6
|
-
import unittest
|
7
|
-
from unittest.mock import MagicMock, patch
|
8
|
-
|
9
|
-
import pytest
|
10
|
-
from langchain_core.messages import ToolMessage
|
11
|
-
|
12
|
-
from aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input import (
|
13
|
-
_get_snippet,
|
14
|
-
download_arxiv_paper,
|
15
|
-
)
|
16
|
-
|
17
|
-
|
18
|
-
class TestDownloadArxivPaper(unittest.TestCase):
|
19
|
-
"""tests for the download_arxiv_paper tool."""
|
20
|
-
|
21
|
-
@patch(
|
22
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.initialize"
|
23
|
-
)
|
24
|
-
@patch(
|
25
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.compose"
|
26
|
-
)
|
27
|
-
@patch(
|
28
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.requests.get"
|
29
|
-
)
|
30
|
-
def test_download_arxiv_paper_success(
|
31
|
-
self, mock_get, mock_compose, mock_initialize
|
32
|
-
):
|
33
|
-
"""test the download_arxiv_paper tool for successful retrieval of metadata and PDF URL."""
|
34
|
-
# Set up a dummy Hydra config.
|
35
|
-
dummy_cfg = MagicMock()
|
36
|
-
dummy_cfg.tools.download_arxiv_paper.api_url = "http://dummy.arxiv.org/api"
|
37
|
-
dummy_cfg.tools.download_arxiv_paper.request_timeout = 10
|
38
|
-
mock_compose.return_value = dummy_cfg
|
39
|
-
mock_initialize.return_value.__enter__.return_value = None
|
40
|
-
|
41
|
-
# Set up a dummy XML response with a valid entry including a pdf link.
|
42
|
-
arxiv_id = "1234.56789"
|
43
|
-
dummy_response = MagicMock()
|
44
|
-
dummy_response.text = (
|
45
|
-
f"""<?xml version=\"1.0\" encoding=\"UTF-8\"?>
|
46
|
-
<feed xmlns=\"http://www.w3.org/2005/Atom\">"""
|
47
|
-
f" <entry>"
|
48
|
-
f"<title>Sample Paper Title</title>"
|
49
|
-
f"<author><name>Author One</name></author>"
|
50
|
-
f"<author><name>Author Two</name></author>"
|
51
|
-
f"<summary>This is a sample abstract.</summary>"
|
52
|
-
f"<published>2020-01-01T00:00:00Z</published>"
|
53
|
-
f'<link title="pdf" href="http://arxiv.org/pdf/{arxiv_id}v1"/>'
|
54
|
-
f"</entry></feed>"
|
55
|
-
)
|
56
|
-
dummy_response.raise_for_status = MagicMock()
|
57
|
-
mock_get.return_value = dummy_response
|
58
|
-
|
59
|
-
tool_call_id = "test_tool_id"
|
60
|
-
tool_input = {"arxiv_ids": [arxiv_id], "tool_call_id": tool_call_id}
|
61
|
-
result = download_arxiv_paper.run(tool_input)
|
62
|
-
update = result.update
|
63
|
-
|
64
|
-
# Check that article_data was correctly set.
|
65
|
-
self.assertIn("article_data", update)
|
66
|
-
self.assertIn(arxiv_id, update["article_data"])
|
67
|
-
metadata = update["article_data"][arxiv_id]
|
68
|
-
self.assertEqual(metadata["Title"], "Sample Paper Title")
|
69
|
-
self.assertEqual(metadata["Authors"], ["Author One", "Author Two"])
|
70
|
-
self.assertEqual(metadata["Abstract"], "This is a sample abstract.")
|
71
|
-
self.assertEqual(metadata["Publication Date"], "2020-01-01T00:00:00Z")
|
72
|
-
self.assertEqual(metadata["URL"], f"http://arxiv.org/pdf/{arxiv_id}v1")
|
73
|
-
self.assertEqual(metadata["pdf_url"], f"http://arxiv.org/pdf/{arxiv_id}v1")
|
74
|
-
self.assertEqual(metadata["filename"], f"{arxiv_id}.pdf")
|
75
|
-
self.assertEqual(metadata["source"], "arxiv")
|
76
|
-
self.assertEqual(metadata["arxiv_id"], arxiv_id)
|
77
|
-
|
78
|
-
# Check that the message content matches the new summary format
|
79
|
-
messages = update["messages"]
|
80
|
-
self.assertEqual(len(messages), 1)
|
81
|
-
self.assertIsInstance(messages[0], ToolMessage)
|
82
|
-
content = messages[0].content
|
83
|
-
# Build expected summary
|
84
|
-
expected = (
|
85
|
-
"Download was successful. Papers metadata are attached as an artifact. "
|
86
|
-
"Here is a summary of the results:\n"
|
87
|
-
f"Number of papers found: 1\n"
|
88
|
-
"Top 3 papers:\n"
|
89
|
-
f"1. Sample Paper Title (2020-01-01T00:00:00Z)\n"
|
90
|
-
f" View PDF: http://arxiv.org/pdf/{arxiv_id}v1\n"
|
91
|
-
" Abstract snippet: This is a sample abstract."
|
92
|
-
)
|
93
|
-
self.assertEqual(content, expected)
|
94
|
-
|
95
|
-
@patch(
|
96
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.initialize"
|
97
|
-
)
|
98
|
-
@patch(
|
99
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.compose"
|
100
|
-
)
|
101
|
-
@patch(
|
102
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.requests.get"
|
103
|
-
)
|
104
|
-
def test_no_entry_found(self, mock_get, mock_compose, mock_initialize):
|
105
|
-
"""test the download_arxiv_paper tool for no entry found in XML response."""
|
106
|
-
# Dummy config as before.
|
107
|
-
dummy_cfg = MagicMock()
|
108
|
-
dummy_cfg.tools.download_arxiv_paper.api_url = "http://dummy.arxiv.org/api"
|
109
|
-
dummy_cfg.tools.download_arxiv_paper.request_timeout = 10
|
110
|
-
mock_compose.return_value = dummy_cfg
|
111
|
-
mock_initialize.return_value.__enter__.return_value = None
|
112
|
-
|
113
|
-
# Set up XML with no entry element.
|
114
|
-
arxiv_id = "1234.56789"
|
115
|
-
dummy_xml = (
|
116
|
-
"""<?xml version="1.0" encoding="UTF-8"?>"""
|
117
|
-
"""<feed xmlns="http://www.w3.org/2005/Atom"></feed>"""
|
118
|
-
)
|
119
|
-
dummy_response = MagicMock()
|
120
|
-
dummy_response.text = dummy_xml
|
121
|
-
dummy_response.raise_for_status = MagicMock()
|
122
|
-
mock_get.return_value = dummy_response
|
123
|
-
|
124
|
-
tool_call_id = "test_tool_id"
|
125
|
-
tool_input = {"arxiv_ids": [arxiv_id], "tool_call_id": tool_call_id}
|
126
|
-
# No entry found should result in empty article_data and header-only summary
|
127
|
-
result = download_arxiv_paper.run(tool_input)
|
128
|
-
update = result.update
|
129
|
-
self.assertIn("article_data", update)
|
130
|
-
self.assertEqual(update["article_data"], {})
|
131
|
-
messages = update.get("messages", [])
|
132
|
-
self.assertEqual(len(messages), 1)
|
133
|
-
content = messages[0].content
|
134
|
-
expected = (
|
135
|
-
"Download was successful. Papers metadata are attached as an artifact. "
|
136
|
-
"Here is a summary of the results:\n"
|
137
|
-
"Number of papers found: 0\n"
|
138
|
-
"Top 3 papers:\n"
|
139
|
-
)
|
140
|
-
self.assertEqual(content, expected)
|
141
|
-
|
142
|
-
@patch(
|
143
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.initialize"
|
144
|
-
)
|
145
|
-
@patch(
|
146
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.compose"
|
147
|
-
)
|
148
|
-
@patch(
|
149
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.requests.get"
|
150
|
-
)
|
151
|
-
def test_no_pdf_url_found(self, mock_get, mock_compose, mock_initialize):
|
152
|
-
"""test the download_arxiv_paper tool for no PDF URL found in XML response."""
|
153
|
-
# Dummy config.
|
154
|
-
dummy_cfg = MagicMock()
|
155
|
-
dummy_cfg.tools.download_arxiv_paper.api_url = "http://dummy.arxiv.org/api"
|
156
|
-
dummy_cfg.tools.download_arxiv_paper.request_timeout = 10
|
157
|
-
mock_compose.return_value = dummy_cfg
|
158
|
-
mock_initialize.return_value.__enter__.return_value = None
|
159
|
-
|
160
|
-
# Set up XML with an entry that does not contain a pdf link.
|
161
|
-
arxiv_id = "1234.56789"
|
162
|
-
dummy_xml = """<?xml version="1.0" encoding="UTF-8"?>
|
163
|
-
<feed xmlns="http://www.w3.org/2005/Atom">
|
164
|
-
<entry>
|
165
|
-
<title>Sample Paper Title</title>
|
166
|
-
<author>
|
167
|
-
<name>Author One</name>
|
168
|
-
</author>
|
169
|
-
<summary>This is a sample abstract.</summary>
|
170
|
-
<published>2020-01-01T00:00:00Z</published>
|
171
|
-
<!-- Missing pdf link -->
|
172
|
-
</entry>
|
173
|
-
</feed>
|
174
|
-
"""
|
175
|
-
dummy_response = MagicMock()
|
176
|
-
dummy_response.text = dummy_xml
|
177
|
-
dummy_response.raise_for_status = MagicMock()
|
178
|
-
mock_get.return_value = dummy_response
|
179
|
-
|
180
|
-
tool_call_id = "test_tool_id"
|
181
|
-
tool_input = {"arxiv_ids": [arxiv_id], "tool_call_id": tool_call_id}
|
182
|
-
with self.assertRaises(RuntimeError) as context:
|
183
|
-
download_arxiv_paper.run(tool_input)
|
184
|
-
self.assertEqual(
|
185
|
-
str(context.exception), f"Could not find PDF URL for arXiv ID {arxiv_id}"
|
186
|
-
)
|
187
|
-
|
188
|
-
@patch(
|
189
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.extract_metadata"
|
190
|
-
)
|
191
|
-
@patch(
|
192
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_"
|
193
|
-
"arxiv_input.fetch_arxiv_metadata"
|
194
|
-
)
|
195
|
-
@patch(
|
196
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.compose"
|
197
|
-
)
|
198
|
-
@patch(
|
199
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.initialize"
|
200
|
-
)
|
201
|
-
def test_summary_multiple_papers(
|
202
|
-
self, mock_initialize, mock_compose, _mock_fetch, mock_extract
|
203
|
-
):
|
204
|
-
"""Test summary includes '...and N more papers.' when more than 3 papers."""
|
205
|
-
# Dummy config
|
206
|
-
dummy_cfg = MagicMock()
|
207
|
-
dummy_cfg.tools.download_arxiv_paper.api_url = "http://dummy"
|
208
|
-
dummy_cfg.tools.download_arxiv_paper.request_timeout = 5
|
209
|
-
mock_compose.return_value = dummy_cfg
|
210
|
-
mock_initialize.return_value.__enter__.return_value = None
|
211
|
-
|
212
|
-
# Simulate metadata extraction for multiple papers
|
213
|
-
def dummy_meta(_entry, _ns, aid):
|
214
|
-
"""dummy metadata extraction function."""
|
215
|
-
return {
|
216
|
-
"Title": f"T{aid}",
|
217
|
-
"Publication Date": "2020-01-01T00:00:00Z",
|
218
|
-
"URL": f"u{aid}v1",
|
219
|
-
}
|
220
|
-
|
221
|
-
mock_extract.side_effect = dummy_meta
|
222
|
-
# Prepare 5 paper IDs
|
223
|
-
ids = [str(i) for i in range(5)]
|
224
|
-
tool_input = {"arxiv_ids": ids, "tool_call_id": "tid"}
|
225
|
-
result = download_arxiv_paper.run(tool_input)
|
226
|
-
summary = result.update["messages"][0].content
|
227
|
-
# Should report total count of 5 and list only top 3 without ellipsis
|
228
|
-
assert "Number of papers found: 5" in summary
|
229
|
-
assert "Top 3 papers:" in summary
|
230
|
-
# Entries for first three IDs should include URL and no ellipsis
|
231
|
-
assert "1. T0 (2020-01-01T00:00:00Z)" in summary
|
232
|
-
assert " View PDF: u0v1" in summary
|
233
|
-
assert "3. T2 (2020-01-01T00:00:00Z)" in summary
|
234
|
-
assert "...and" not in summary
|
235
|
-
|
236
|
-
|
237
|
-
@pytest.mark.parametrize(
|
238
|
-
"input_text,expected",
|
239
|
-
[
|
240
|
-
("", ""),
|
241
|
-
("N/A", ""),
|
242
|
-
("Just one sentence", "Just one sentence."),
|
243
|
-
("First. Second", "First. Second."),
|
244
|
-
("Hello. World.", "Hello. World."),
|
245
|
-
],
|
246
|
-
)
|
247
|
-
def test_get_snippet_various(input_text, expected):
|
248
|
-
"""Test _get_snippet behavior for various abstracts."""
|
249
|
-
assert _get_snippet(input_text) == expected
|