aiagents4pharma 1.37.0__py3-none-any.whl → 1.39.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +12 -4
  2. aiagents4pharma/talk2scholars/configs/config.yaml +2 -0
  3. aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/__init__.py +3 -0
  4. aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/__init__.py +3 -0
  5. aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml +1 -0
  6. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +33 -7
  7. aiagents4pharma/talk2scholars/tests/test_paper_download_biorxiv.py +151 -0
  8. aiagents4pharma/talk2scholars/tests/test_paper_download_medrxiv.py +151 -0
  9. aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +59 -3
  10. aiagents4pharma/talk2scholars/tests/test_read_helper_utils.py +110 -0
  11. aiagents4pharma/talk2scholars/tests/test_s2_display.py +20 -1
  12. aiagents4pharma/talk2scholars/tests/test_s2_query.py +17 -0
  13. aiagents4pharma/talk2scholars/tests/test_state.py +25 -1
  14. aiagents4pharma/talk2scholars/tests/test_zotero_pdf_downloader_utils.py +46 -0
  15. aiagents4pharma/talk2scholars/tests/test_zotero_read.py +35 -40
  16. aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +4 -1
  17. aiagents4pharma/talk2scholars/tools/paper_download/download_biorxiv_input.py +112 -0
  18. aiagents4pharma/talk2scholars/tools/paper_download/download_medrxiv_input.py +112 -0
  19. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +82 -41
  20. aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +6 -2
  21. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +2 -1
  22. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +7 -3
  23. aiagents4pharma/talk2scholars/tools/s2/search.py +2 -1
  24. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +2 -1
  25. aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +2 -0
  26. aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +2 -0
  27. aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +2 -0
  28. aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +79 -136
  29. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +147 -0
  30. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +42 -9
  31. {aiagents4pharma-1.37.0.dist-info → aiagents4pharma-1.39.0.dist-info}/METADATA +1 -1
  32. {aiagents4pharma-1.37.0.dist-info → aiagents4pharma-1.39.0.dist-info}/RECORD +35 -26
  33. {aiagents4pharma-1.37.0.dist-info → aiagents4pharma-1.39.0.dist-info}/WHEEL +1 -1
  34. {aiagents4pharma-1.37.0.dist-info → aiagents4pharma-1.39.0.dist-info}/licenses/LICENSE +0 -0
  35. {aiagents4pharma-1.37.0.dist-info → aiagents4pharma-1.39.0.dist-info}/top_level.txt +0 -0
@@ -52,7 +52,9 @@ class TestS2Tools:
52
52
  raised_error,
53
53
  match="No papers found. A search/rec needs to be performed first.",
54
54
  ):
55
- display_dataframe.invoke({"state": initial_state, "tool_call_id": "test123"})
55
+ display_dataframe.invoke(
56
+ {"state": initial_state, "tool_call_id": "test123"}
57
+ )
56
58
 
57
59
  def test_display_dataframe_shows_papers(self, initial_state):
58
60
  """Verifies display_dataframe tool correctly returns papers from state"""
@@ -72,3 +74,20 @@ class TestS2Tools:
72
74
  "1 papers found. Papers are attached as an artifact."
73
75
  in result.update["messages"][0].content
74
76
  )
77
+
78
+ def test_display_dataframe_direct_mapping(self, initial_state):
79
+ """Verifies display_dataframe handles direct dict mapping in last_displayed_papers."""
80
+ # Prepare state with direct mapping of papers
81
+ state = initial_state.copy()
82
+ state["last_displayed_papers"] = MOCK_STATE_PAPER
83
+ # Invoke display tool
84
+ result = display_dataframe.invoke({"state": state, "tool_call_id": "test123"})
85
+ assert isinstance(result, Command)
86
+ update = result.update
87
+ # Artifact should be the direct mapping
88
+ messages = update.get("messages", [])
89
+ assert len(messages) == 1
90
+ artifact = messages[0].artifact
91
+ assert artifact == MOCK_STATE_PAPER
92
+ # Content count should match mapping length
93
+ assert "1 papers found" in messages[0].content
@@ -76,3 +76,20 @@ class TestS2Tools:
76
76
 
77
77
  assert isinstance(result, str) # Ensure output is a string
78
78
  assert result == "Mocked response" # Validate the expected response
79
+
80
+ @patch(
81
+ "aiagents4pharma.talk2scholars.tools.s2.query_dataframe.create_pandas_dataframe_agent"
82
+ )
83
+ def test_query_dataframe_direct_mapping(self, mock_create_agent, initial_state):
84
+ """Tests query_dataframe when last_displayed_papers is a direct dict mapping."""
85
+ # Prepare state with direct mapping
86
+ state = initial_state.copy()
87
+ state["last_displayed_papers"] = MOCK_STATE_PAPER
88
+ # Mock the dataframe agent
89
+ mock_agent = MagicMock()
90
+ mock_agent.invoke.return_value = {"output": "Direct mapping response"}
91
+ mock_create_agent.return_value = mock_agent
92
+ # Invoke tool
93
+ result = query_dataframe.invoke({"question": "Filter papers", "state": state})
94
+ assert isinstance(result, str)
95
+ assert result == "Direct mapping response"
@@ -2,7 +2,7 @@
2
2
  Tests for state management functionality.
3
3
  """
4
4
 
5
- from ..state.state_talk2scholars import replace_dict
5
+ from ..state.state_talk2scholars import merge_dict, replace_dict
6
6
 
7
7
 
8
8
  def test_state_replace_dict():
@@ -12,3 +12,27 @@ def test_state_replace_dict():
12
12
  result = replace_dict(existing, new)
13
13
  assert result == new
14
14
  assert isinstance(result, dict)
15
+
16
+
17
+ def test_state_merge_dict():
18
+ """Verifies state dictionary merging works correctly"""
19
+ existing = {"a": 1, "b": 2}
20
+ new = {"b": 3, "c": 4}
21
+ result = merge_dict(existing, new)
22
+ # result should contain merged keys, with new values overriding existing ones
23
+ assert result == {"a": 1, "b": 3, "c": 4}
24
+ assert isinstance(result, dict)
25
+ # original existing dict should be unchanged
26
+ assert existing == {"a": 1, "b": 2}
27
+
28
+
29
+ def test_replace_dict_non_mapping():
30
+ """Verifies replace_dict returns non-mapping values directly"""
31
+
32
+ existing = {"key": "value"}
33
+ # When new is not a dict, replace_dict should return new value unchanged
34
+ new_value = "not_a_dict"
35
+ result = replace_dict(existing, new_value)
36
+ assert result == new_value
37
+ # existing should remain unmodified when returning new directly
38
+ assert existing == {"key": "value"}
@@ -0,0 +1,46 @@
1
+ """
2
+ Unit tests for Zotero PDF downloader utilities.
3
+ """
4
+
5
+ import os
6
+ import unittest
7
+ from unittest.mock import MagicMock, patch
8
+
9
+ import requests
10
+
11
+ from aiagents4pharma.talk2scholars.tools.zotero.utils.zotero_pdf_downloader import (
12
+ download_pdfs_in_parallel,
13
+ download_zotero_pdf,
14
+ )
15
+
16
+
17
+ class TestZoteroPDFDownloaderUtils(unittest.TestCase):
18
+ """Tests for zotero_pdf_downloader module."""
19
+
20
+ @patch("requests.Session.get")
21
+ def test_download_zotero_pdf_default_filename(self, mock_get):
22
+ """Test download_zotero_pdf returns default filename when header has no filename."""
23
+ # Mock response without Content-Disposition filename
24
+ mock_response = MagicMock()
25
+ mock_response.raise_for_status = lambda: None
26
+ mock_response.iter_content = lambda chunk_size: [b"fakepdf"]
27
+ mock_response.headers = {}
28
+ mock_get.return_value = mock_response
29
+
30
+ session = requests.Session()
31
+ result = download_zotero_pdf(session, "user123", "apikey", "attach123")
32
+ # Should return a tuple (file_path, filename)
33
+ self.assertIsNotNone(result)
34
+ file_path, filename = result
35
+ # File should exist
36
+ self.assertTrue(os.path.isfile(file_path))
37
+ # Filename should default to 'downloaded.pdf'
38
+ self.assertEqual(filename, "downloaded.pdf")
39
+ # Clean up temp file
40
+ os.remove(file_path)
41
+
42
+ def test_download_pdfs_in_parallel_empty(self):
43
+ """Test that download_pdfs_in_parallel returns empty dict on empty input."""
44
+ session = requests.Session()
45
+ result = download_pdfs_in_parallel(session, "user123", "apikey", {})
46
+ self.assertEqual(result, {})
@@ -2,14 +2,20 @@
2
2
  Unit tests for Zotero search tool in zotero_read.py.
3
3
  """
4
4
 
5
- from types import SimpleNamespace
6
5
  import unittest
7
- from unittest.mock import patch, MagicMock
6
+ from types import SimpleNamespace
7
+ from unittest.mock import MagicMock, patch
8
+
9
+ import requests
8
10
  from langgraph.types import Command
9
- from aiagents4pharma.talk2scholars.tools.zotero.zotero_read import zotero_read
11
+
10
12
  from aiagents4pharma.talk2scholars.tools.zotero.utils.read_helper import (
11
13
  ZoteroSearchData,
12
14
  )
15
+ from aiagents4pharma.talk2scholars.tools.zotero.utils.zotero_pdf_downloader import (
16
+ download_zotero_pdf,
17
+ )
18
+ from aiagents4pharma.talk2scholars.tools.zotero.zotero_read import zotero_read
13
19
 
14
20
  # pylint: disable=protected-access
15
21
  # pylint: disable=protected-access, too-many-arguments, too-many-positional-arguments
@@ -22,7 +28,6 @@ dummy_zotero_read_config = SimpleNamespace(
22
28
  zotero=SimpleNamespace(
23
29
  max_limit=5,
24
30
  filter_item_types=["journalArticle", "conferencePaper"],
25
- filter_excluded_types=["attachment", "note"],
26
31
  ),
27
32
  )
28
33
  dummy_cfg = SimpleNamespace(tools=SimpleNamespace(zotero_read=dummy_zotero_read_config))
@@ -204,8 +209,7 @@ class TestZoteroSearchTool(unittest.TestCase):
204
209
  "aiagents4pharma.talk2scholars.tools.zotero.utils.read_helper.hydra.initialize"
205
210
  )
206
211
  @patch(
207
- "aiagents4pharma.talk2scholars.tools.zotero.utils.read_helper."
208
- "ZoteroSearchData._download_pdfs_in_parallel"
212
+ "aiagents4pharma.talk2scholars.tools.zotero.utils.read_helper.download_pdfs_in_parallel"
209
213
  )
210
214
  def test_filtering_no_matching_papers(
211
215
  self,
@@ -260,6 +264,7 @@ class TestZoteroSearchTool(unittest.TestCase):
260
264
  "only_articles": False,
261
265
  "tool_call_id": "test_id_4",
262
266
  "limit": 2,
267
+ "download_pdfs": True,
263
268
  }
264
269
 
265
270
  result = zotero_read.run(tool_input)
@@ -514,6 +519,7 @@ class TestZoteroSearchTool(unittest.TestCase):
514
519
  "only_articles": True,
515
520
  "tool_call_id": "test_pdf_success",
516
521
  "limit": 1,
522
+ "download_pdfs": True,
517
523
  }
518
524
 
519
525
  result = zotero_read.run(tool_input)
@@ -713,39 +719,26 @@ class TestZoteroSearchTool(unittest.TestCase):
713
719
  self.assertNotIn("filename", filtered_papers["paper1"])
714
720
  self.assertNotIn("attachment_key", filtered_papers["paper1"])
715
721
 
716
- @patch("aiagents4pharma.talk2scholars.tools.zotero.utils.read_helper.requests.get")
717
722
  @patch(
718
- "aiagents4pharma.talk2scholars.tools.zotero.utils.zotero_path.get_item_collections"
723
+ "aiagents4pharma.talk2scholars.tools.zotero.utils.zotero_pdf_downloader."
724
+ "requests.Session.get"
719
725
  )
720
- @patch("aiagents4pharma.talk2scholars.tools.zotero.utils.read_helper.zotero.Zotero")
721
- @patch("aiagents4pharma.talk2scholars.tools.zotero.utils.read_helper.hydra.compose")
722
- @patch(
723
- "aiagents4pharma.talk2scholars.tools.zotero.utils.read_helper.hydra.initialize"
724
- )
725
- def test_download_zotero_pdf_exception(
726
- self,
727
- mock_hydra_init,
728
- mock_hydra_compose,
729
- mock_zotero_class,
730
- mock_get_item_collections,
731
- mock_requests_get,
732
- ):
733
- """Test that _download_zotero_pdf returns None and logs error on request exception."""
734
- # Setup mocks for config and Zotero client
735
- mock_hydra_compose.return_value = dummy_cfg
736
- mock_hydra_init.return_value.__enter__.return_value = None
737
- mock_zotero_class.return_value = MagicMock()
738
- mock_get_item_collections.return_value = {}
739
-
740
- # Simulate a request exception during PDF download
741
- mock_requests_get.side_effect = Exception("Simulated download failure")
742
-
743
- zotero_search = ZoteroSearchData(
744
- query="test", only_articles=False, limit=1, tool_call_id="test123"
726
+ def test_download_zotero_pdf_exception(self, mock_session_get):
727
+ """Test that download_zotero_pdf returns None and logs error on request exception."""
728
+ # Simulate a session.get exception during PDF download
729
+ mock_session_get.side_effect = requests.exceptions.RequestException(
730
+ "Simulated download failure"
745
731
  )
746
-
747
- result = zotero_search._download_zotero_pdf("FAKE_ATTACHMENT_KEY")
748
-
732
+ # Create a session for testing
733
+ session = requests.Session()
734
+ # Call the module-level download function
735
+ result = download_zotero_pdf(
736
+ session,
737
+ dummy_cfg.tools.zotero_read.user_id,
738
+ dummy_cfg.tools.zotero_read.api_key,
739
+ "FAKE_ATTACHMENT_KEY",
740
+ )
741
+ # Should return None on failure
749
742
  self.assertIsNone(result)
750
743
 
751
744
  @patch(
@@ -791,12 +784,14 @@ class TestZoteroSearchTool(unittest.TestCase):
791
784
  mock_zotero_class.return_value = fake_zot
792
785
  mock_get_item_collections.return_value = {"paper1": ["/Fake Collection"]}
793
786
 
794
- # Patch just the internal _download_zotero_pdf to raise an exception
787
+ # Patch the module-level download_zotero_pdf to raise an exception
795
788
  with patch(
796
- "aiagents4pharma.talk2scholars.tools.zotero.utils.read_helper."
797
- "ZoteroSearchData._download_zotero_pdf"
789
+ "aiagents4pharma.talk2scholars.tools.zotero.utils.zotero_pdf_downloader."
790
+ "download_zotero_pdf"
798
791
  ) as mock_download_pdf:
799
- mock_download_pdf.side_effect = Exception("Simulated download error")
792
+ mock_download_pdf.side_effect = requests.exceptions.RequestException(
793
+ "Simulated download error"
794
+ )
800
795
 
801
796
  search = ZoteroSearchData(
802
797
  query="failure test",
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- This package provides modules for fetching and downloading academic papers from arXiv.
3
+ This package provides modules for fetching and downloading academic papers from arXiv,
4
+ biorxiv and medrxiv.
4
5
  """
5
6
 
6
7
  # Import modules
@@ -8,4 +9,6 @@ from . import download_arxiv_input
8
9
 
9
10
  __all__ = [
10
11
  "download_arxiv_input",
12
+ "download_biorxiv_input",
13
+ "download_medrxiv_input",
11
14
  ]
@@ -0,0 +1,112 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Tool for downloading bioRxiv paper metadata and retrieving the PDF URL.
4
+ """
5
+
6
+ import logging
7
+ from typing import Annotated, Any
8
+
9
+ import hydra
10
+ import requests
11
+ from langchain_core.messages import ToolMessage
12
+ from langchain_core.tools import tool
13
+ from langchain_core.tools.base import InjectedToolCallId
14
+ from langgraph.types import Command
15
+ from pydantic import BaseModel, Field
16
+
17
+ # Configure logging
18
+ logging.basicConfig(level=logging.INFO)
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class DownloadBiorxivPaperInput(BaseModel):
23
+ """Input schema for the bioRxiv paper download tool."""
24
+
25
+ doi: str = Field(description=
26
+ """The bioRxiv DOI, from search_helper or multi_helper or single_helper,
27
+ used to retrieve the paper details and PDF URL."""
28
+ )
29
+ logger.info("DOI Received: %s", doi)
30
+ tool_call_id: Annotated[str, InjectedToolCallId]
31
+
32
+ def fetch_biorxiv_metadata(doi: str, api_url: str, request_timeout: int) -> dict:
33
+ """
34
+ Fetch metadata for a bioRxiv paper using its DOI and extract relevant fields.
35
+
36
+ Parameters:
37
+ doi (str): The DOI of the bioRxiv paper.
38
+
39
+ Returns:
40
+ dict: A dictionary containing the title, authors, abstract, publication date, and URLs.
41
+ """
42
+ # Strip any version suffix (e.g., v1) since bioRxiv's API is version-sensitive
43
+ clean_doi = doi.split("v")[0]
44
+
45
+ api_url = f"{api_url}{clean_doi}"
46
+ logger.info("Fetching metadata from api url: %s", api_url)
47
+ response = requests.get(api_url, timeout=request_timeout)
48
+ response.raise_for_status()
49
+
50
+ data = response.json()
51
+ if not data.get("collection"):
52
+ raise ValueError(f"No metadata found for DOI: {doi}")
53
+
54
+ data = response.json()
55
+
56
+ return data["collection"][0]
57
+
58
+ def extract_metadata(paper: dict, doi: str) -> dict:
59
+ """
60
+ Extract relevant metadata fields from a bioRxiv paper entry.
61
+ """
62
+ title = paper.get("title", "")
63
+ authors = paper.get("authors", "")
64
+ abstract = paper.get("abstract", "")
65
+ pub_date = paper.get("date", "")
66
+ doi_suffix = paper.get("doi", "").split("10.1101/")[-1]
67
+ pdf_url = f"https://www.biorxiv.org/content/10.1101/{doi_suffix}.full.pdf"
68
+ logger.info("PDF URL: %s", pdf_url)
69
+ return {
70
+ "Title": title,
71
+ "Authors": authors,
72
+ "Abstract": abstract,
73
+ "Publication Date": pub_date,
74
+ "URL": pdf_url,
75
+ "pdf_url": pdf_url,
76
+ "filename": f"{doi_suffix}.pdf",
77
+ "source": "biorxiv",
78
+ "biorxiv_id": doi
79
+ }
80
+
81
+ @tool(args_schema=DownloadBiorxivPaperInput, parse_docstring=True)
82
+ def download_biorxiv_paper(
83
+ doi: str,
84
+ tool_call_id: Annotated[str, InjectedToolCallId],
85
+ ) -> Command[Any]:
86
+ """
87
+ Get metadata and PDF URL for a bioRxiv paper using its DOI.
88
+ """
89
+ logger.info("Fetching metadata from bioRxiv for DOI: %s", doi)
90
+
91
+ # Load configuration
92
+ with hydra.initialize(version_base=None, config_path="../../configs"):
93
+ cfg = hydra.compose(
94
+ config_name="config", overrides=["tools/download_biorxiv_paper=default"]
95
+ )
96
+ api_url = cfg.tools.download_biorxiv_paper.api_url
97
+ request_timeout = cfg.tools.download_biorxiv_paper.request_timeout
98
+ logger.info("API URL: %s", api_url)
99
+ logger.info("Request Timeout: %s", request_timeout)
100
+
101
+ # Fetch metadata
102
+ raw_data = fetch_biorxiv_metadata(doi, api_url, request_timeout)
103
+ metadata = extract_metadata(raw_data, doi)
104
+ article_data = {doi: metadata}
105
+ content = f"Successfully retrieved metadata and PDF URL for bioRxiv DOI {doi}"
106
+
107
+ return Command(
108
+ update={
109
+ "article_data": article_data,
110
+ "messages": [ToolMessage(content=content, tool_call_id=tool_call_id)],
111
+ }
112
+ )
@@ -0,0 +1,112 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Tool for downloading medRxiv paper metadata and retrieving the PDF URL.
4
+ """
5
+
6
+ import logging
7
+ from typing import Annotated, Any
8
+
9
+ import hydra
10
+ import requests
11
+ from langchain_core.messages import ToolMessage
12
+ from langchain_core.tools import tool
13
+ from langchain_core.tools.base import InjectedToolCallId
14
+ from langgraph.types import Command
15
+ from pydantic import BaseModel, Field
16
+
17
+ # Configure logging
18
+ logging.basicConfig(level=logging.INFO)
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class DownloadMedrxivPaperInput(BaseModel):
23
+ """Input schema for the medRxiv paper download tool."""
24
+
25
+ doi: str = Field(description=
26
+ """The medRxiv DOI, from search_helper or multi_helper or single_helper,
27
+ used to retrieve the paper details and PDF URL."""
28
+ )
29
+ logger.info("DOI Received: %s", doi)
30
+ tool_call_id: Annotated[str, InjectedToolCallId]
31
+
32
+ # Fetching raw metadata from medRxiv API for a given DOI
33
+ def fetch_medrxiv_metadata(doi: str, api_url: str, request_timeout: int) -> dict:
34
+ """
35
+ Fetch metadata for a medRxiv paper using its DOI and extract relevant fields.
36
+
37
+ Parameters:
38
+ doi (str): The DOI of the medRxiv paper.
39
+
40
+ Returns:
41
+ dict: A dictionary containing the title, authors, abstract, publication date, and URLs.
42
+ """
43
+ # Strip any version suffix (e.g., v1) since bioRxiv's API is version-sensitive
44
+ clean_doi = doi.split("v")[0]
45
+
46
+ api_url = f"{api_url}{clean_doi}"
47
+ logger.info("Fetching metadata from api url: %s", api_url)
48
+ response = requests.get(api_url, timeout=request_timeout)
49
+ response.raise_for_status()
50
+
51
+ data = response.json()
52
+ if not data.get("collection"):
53
+ raise ValueError(f"No entry found for medRxiv ID {doi}")
54
+
55
+ return data["collection"][0]
56
+
57
+ # Extracting relevant metadata fields from the raw data
58
+ def extract_metadata(paper: dict, doi: str) -> dict:
59
+ """
60
+ Extract relevant metadata fields from a medRxiv paper entry.
61
+ """
62
+ title = paper.get("title", "")
63
+ authors = paper.get("authors", "")
64
+ abstract = paper.get("abstract", "")
65
+ pub_date = paper.get("date", "")
66
+ doi_suffix = paper.get("doi", "").split("10.1101/")[-1]
67
+ pdf_url = f"https://www.medrxiv.org/content/10.1101/{doi_suffix}.full.pdf"
68
+ logger.info("PDF URL: %s", pdf_url)
69
+ return {
70
+ "Title": title,
71
+ "Authors": authors,
72
+ "Abstract": abstract,
73
+ "Publication Date": pub_date,
74
+ "URL": pdf_url,
75
+ "pdf_url": pdf_url,
76
+ "filename": f"{doi_suffix}.pdf",
77
+ "source": "medrxiv",
78
+ "medrxiv_id": doi
79
+ }
80
+
81
+ # Tool to download medRxiv paper metadata and PDF URL
82
+ @tool(args_schema=DownloadMedrxivPaperInput, parse_docstring=True)
83
+ def download_medrxiv_paper(
84
+ doi: str,
85
+ tool_call_id: Annotated[str, InjectedToolCallId],
86
+ ) -> Command[Any]:
87
+ """
88
+ Get metadata and PDF URL for a medRxiv paper using its doi or medrxiv id.
89
+ """
90
+ logger.info("Fetching metadata from medRxiv for DOI: %s", doi)
91
+
92
+ # Load configuration
93
+ with hydra.initialize(version_base=None, config_path="../../configs"):
94
+ cfg = hydra.compose(
95
+ config_name="config", overrides=["tools/download_medrxiv_paper=default"]
96
+ )
97
+ api_url = cfg.tools.download_medrxiv_paper.api_url
98
+ request_timeout = cfg.tools.download_medrxiv_paper.request_timeout
99
+ logger.info("API URL: %s", api_url)
100
+
101
+ raw_data = fetch_medrxiv_metadata(doi, api_url, request_timeout)
102
+ metadata = extract_metadata(raw_data, doi)
103
+ article_data = {doi: metadata}
104
+
105
+ content = f"Successfully retrieved metadata and PDF URL for medRxiv DOI {doi}"
106
+
107
+ return Command(
108
+ update={
109
+ "article_data": article_data,
110
+ "messages": [ToolMessage(content=content, tool_call_id=tool_call_id)],
111
+ }
112
+ )