aiagents4pharma 1.38.0__py3-none-any.whl → 1.39.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,8 @@ from langgraph.prebuilt.tool_node import ToolNode
14
14
  from langgraph.checkpoint.memory import MemorySaver
15
15
  from ..state.state_talk2scholars import Talk2Scholars
16
16
  from ..tools.paper_download.download_arxiv_input import download_arxiv_paper
17
+ from ..tools.paper_download.download_medrxiv_input import download_medrxiv_paper
18
+ from ..tools.paper_download.download_biorxiv_input import download_biorxiv_paper
17
19
 
18
20
  # Initialize logger
19
21
  logging.basicConfig(level=logging.INFO)
@@ -24,14 +26,20 @@ def get_app(uniq_id, llm_model: BaseChatModel):
24
26
  """
25
27
  Initializes and returns the LangGraph application for the Talk2Scholars paper download agent.
26
28
 
29
+ This agent supports downloading scientific papers from multiple preprint servers, including
30
+ arXiv, BioRxiv, and MedRxiv. It can intelligently handle user queries by extracting or resolving
31
+ necessary identifiers (e.g., arXiv ID or DOI) from the paper title and routing the request to
32
+ the appropriate download tool.
33
+
27
34
  Args:
28
35
  uniq_id (str): A unique identifier for tracking the current session.
29
36
  llm_model (BaseChatModel, optional): The language model to be used by the agent.
30
- Defaults to ChatOpenAI(model="gpt-4o-mini", temperature=0.5).
37
+ Defaults to ChatOpenAI(model="gpt-4o-mini", temperature=0.5).
31
38
 
32
39
  Returns:
33
40
  StateGraph: A compiled LangGraph application that enables the paper download agent to
34
- process user queries and retrieve arXiv papers.
41
+ process user queries and retrieve research papers from arXiv (using arXiv ID),
42
+ BioRxiv and MedRxiv (using DOI resolved from the paper title or provided directly).
35
43
  """
36
44
 
37
45
  # Load Hydra configuration
@@ -44,7 +52,7 @@ def get_app(uniq_id, llm_model: BaseChatModel):
44
52
  cfg = cfg.agents.talk2scholars.paper_download_agent
45
53
 
46
54
  # Define tools properly
47
- tools = ToolNode([download_arxiv_paper])
55
+ tools = ToolNode([download_arxiv_paper, download_medrxiv_paper, download_biorxiv_paper])
48
56
 
49
57
  # Define the model
50
58
  logger.info("Using OpenAI model %s", llm_model)
@@ -58,7 +66,7 @@ def get_app(uniq_id, llm_model: BaseChatModel):
58
66
 
59
67
  def paper_download_agent_node(state: Talk2Scholars) -> Dict[str, Any]:
60
68
  """
61
- Processes the current state to fetch the arXiv paper.
69
+ Processes the current state to fetch the research paper from arXiv, BioRxiv, or MedRxiv.
62
70
  """
63
71
  logger.info("Creating paper download agent node with thread_id: %s", uniq_id)
64
72
  result = model.invoke(state, {"configurable": {"thread_id": uniq_id}})
@@ -8,6 +8,8 @@ defaults:
8
8
  - agents/talk2scholars/pdf_agent: default
9
9
  - tools/search: default
10
10
  - tools/download_arxiv_paper: default
11
+ - tools/download_biorxiv_paper: default
12
+ - tools/download_medrxiv_paper: default
11
13
  - tools/single_paper_recommendation: default
12
14
  - tools/multi_paper_recommendation: default
13
15
  - tools/retrieve_semantic_scholar_paper_id: default
@@ -0,0 +1,3 @@
1
+ """
2
+ Import all the modules in the package
3
+ """
@@ -0,0 +1,3 @@
1
+ """
2
+ Import all the modules in the package
3
+ """
@@ -0,0 +1,151 @@
1
+ """
2
+ Unit tests for bioRxiv paper downloading functionality, including:
3
+ - download_bioRxiv_paper tool function.
4
+ """
5
+
6
+ import unittest
7
+ from unittest.mock import MagicMock, patch
8
+ from langchain_core.messages import ToolMessage
9
+
10
+ from aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input import (
11
+ download_biorxiv_paper,
12
+ )
13
+
14
+
15
+ class TestDownloadBiorxivPaper(unittest.TestCase):
16
+ """Tests for the download_bioRxiv_paper tool."""
17
+
18
+ @patch(
19
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.initialize"
20
+ )
21
+ @patch(
22
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.compose"
23
+ )
24
+ @patch(
25
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.requests.get"
26
+ )
27
+ def test_download_biorxiv_paper_success(self, mock_get, mock_compose, mock_initialize):
28
+ """Test successful metadata and PDF URL retrieval."""
29
+ dummy_cfg = MagicMock()
30
+ dummy_cfg.tools.download_biorxiv_paper.api_url = "http://dummy.biorxiv.org/api"
31
+ dummy_cfg.tools.download_biorxiv_paper.request_timeout = 10
32
+ mock_compose.return_value = dummy_cfg
33
+ mock_initialize.return_value.__enter__.return_value = None
34
+
35
+ doi = "10.1101/2025.05.13.653102"
36
+
37
+ dummy_response = MagicMock()
38
+ dummy_response.status_code = 200
39
+ dummy_response.raise_for_status = MagicMock()
40
+ dummy_response.json.return_value = {
41
+ "collection": [
42
+ {
43
+ "title": "Sample BioRxiv Paper",
44
+ "authors": "Author One; Author Two",
45
+ "abstract": "This is a bioRxiv abstract.",
46
+ "date": "2025-04-25",
47
+ "doi": doi,
48
+ "link": f"https://www.biorxiv.org/content/{doi}.full.pdf"
49
+ }
50
+ ]
51
+ }
52
+ mock_get.return_value = dummy_response
53
+
54
+ tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
55
+ result = download_biorxiv_paper.run(tool_input)
56
+ update = result.update
57
+
58
+ self.assertIn("article_data", update)
59
+ self.assertIn(doi, update["article_data"])
60
+ metadata = update["article_data"][doi]
61
+ self.assertEqual(metadata["Title"], "Sample BioRxiv Paper")
62
+ self.assertEqual(metadata["Authors"], "Author One; Author Two")
63
+ self.assertEqual(metadata["Abstract"], "This is a bioRxiv abstract.")
64
+ self.assertEqual(metadata["Publication Date"], "2025-04-25")
65
+ self.assertEqual(metadata["URL"], f"https://www.biorxiv.org/content/{doi}.full.pdf")
66
+ self.assertEqual(metadata["pdf_url"], f"https://www.biorxiv.org/content/{doi}.full.pdf")
67
+ self.assertEqual(metadata["filename"], f"{doi.rsplit('/', maxsplit=1)[-1]}.pdf")
68
+ self.assertEqual(metadata["source"], "biorxiv")
69
+ self.assertEqual(metadata["biorxiv_id"], doi)
70
+
71
+ self.assertTrue(len(update["messages"]) >= 1)
72
+ self.assertIsInstance(update["messages"][0], ToolMessage)
73
+ self.assertIn("Successfully retrieved metadata and PDF URL", update["messages"][0].content)
74
+
75
+ @patch(
76
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.initialize"
77
+ )
78
+ @patch(
79
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.compose"
80
+ )
81
+ @patch(
82
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.requests.get"
83
+ )
84
+ def test_no_entry_found(self, mock_get, mock_compose, mock_initialize):
85
+ """Test behavior when no 'entry' is in response."""
86
+ dummy_cfg = MagicMock()
87
+ dummy_cfg.tools.download_biorxiv_paper.api_url = "http://dummy.biorxiv.org/api"
88
+ dummy_cfg.tools.download_biorxiv_paper.request_timeout = 10
89
+ mock_compose.return_value = dummy_cfg
90
+ mock_initialize.return_value.__enter__.return_value = None
91
+
92
+ dummy_response = MagicMock()
93
+ dummy_response.status_code = 200
94
+ dummy_response.raise_for_status = MagicMock()
95
+ dummy_response.json.return_value = {} # No entry
96
+ mock_get.return_value = dummy_response
97
+
98
+ doi = "10.1101/2025.05.13.653102"
99
+ tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
100
+
101
+ with self.assertRaises(ValueError) as context:
102
+ download_biorxiv_paper.run(tool_input)
103
+
104
+ self.assertEqual(str(context.exception), f"No metadata found for DOI: {doi}")
105
+
106
+ @patch(
107
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.initialize"
108
+ )
109
+ @patch(
110
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.compose"
111
+ )
112
+ @patch(
113
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.requests.get"
114
+ )
115
+ def test_no_pdf_url_found(self, mock_get, mock_compose, mock_initialize):
116
+ """Test fallback to DOI-based PDF URL construction when 'link' is missing."""
117
+ dummy_cfg = MagicMock()
118
+ dummy_cfg.tools.download_biorxiv_paper.api_url = "http://dummy.biorxiv.org/api"
119
+ dummy_cfg.tools.download_biorxiv_paper.request_timeout = 10
120
+ mock_compose.return_value = dummy_cfg
121
+ mock_initialize.return_value.__enter__.return_value = None
122
+
123
+ doi = "10.1101/2025.05.13.653102"
124
+
125
+ dummy_response = MagicMock()
126
+ dummy_response.status_code = 200
127
+ dummy_response.raise_for_status = MagicMock()
128
+ dummy_response.json.return_value = {
129
+ "collection": [
130
+ {
131
+ "title": "Sample Biorxiv Paper",
132
+ "authors": "Author One; Author Two",
133
+ "abstract": "This is a BioRxiv abstract.",
134
+ "date": "2025-04-25",
135
+ "doi": doi
136
+ # 'link' is intentionally omitted
137
+ }
138
+ ]
139
+ }
140
+ mock_get.return_value = dummy_response
141
+
142
+ tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
143
+ result = download_biorxiv_paper.run(tool_input)
144
+ update = result.update
145
+ metadata = update["article_data"][doi]
146
+
147
+ # Assert that the PDF URL was constructed from DOI
148
+ expected_suffix = doi.rsplit('/', maxsplit=1)[-1]
149
+ expected_url = f"https://www.biorxiv.org/content/10.1101/{expected_suffix}.full.pdf"
150
+
151
+ self.assertEqual(metadata["pdf_url"], expected_url)
@@ -0,0 +1,151 @@
1
+ """
2
+ Unit tests for medrXiv paper downloading functionality, including:
3
+ - download_medrxiv_paper tool function.
4
+ """
5
+
6
+ import unittest
7
+ from unittest.mock import MagicMock, patch
8
+ from langchain_core.messages import ToolMessage
9
+
10
+ from aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input import (
11
+ download_medrxiv_paper,
12
+ )
13
+
14
+
15
+ class TestDownloadMedrxivPaper(unittest.TestCase):
16
+ """Tests for the download_medrxiv_paper tool."""
17
+
18
+ @patch(
19
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.initialize"
20
+ )
21
+ @patch(
22
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.compose"
23
+ )
24
+ @patch(
25
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.requests.get"
26
+ )
27
+ def test_download_medrxiv_paper_success(self, mock_get, mock_compose, mock_initialize):
28
+ """Test successful metadata and PDF URL retrieval."""
29
+ dummy_cfg = MagicMock()
30
+ dummy_cfg.tools.download_medrxiv_paper.api_url = "http://dummy.medrxiv.org/api"
31
+ dummy_cfg.tools.download_medrxiv_paper.request_timeout = 10
32
+ mock_compose.return_value = dummy_cfg
33
+ mock_initialize.return_value.__enter__.return_value = None
34
+
35
+ doi = "10.1101/2025.04.25.25326432"
36
+
37
+ dummy_response = MagicMock()
38
+ dummy_response.status_code = 200
39
+ dummy_response.raise_for_status = MagicMock()
40
+ dummy_response.json.return_value = {
41
+ "collection": [
42
+ {
43
+ "title": "Sample Medrxiv Paper",
44
+ "authors": "Author One; Author Two",
45
+ "abstract": "This is a medRxiv abstract.",
46
+ "date": "2025-04-25",
47
+ "doi": doi,
48
+ "link": f"https://www.medrxiv.org/content/{doi}.full.pdf"
49
+ }
50
+ ]
51
+ }
52
+ mock_get.return_value = dummy_response
53
+
54
+ tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
55
+ result = download_medrxiv_paper.run(tool_input)
56
+ update = result.update
57
+
58
+ self.assertIn("article_data", update)
59
+ self.assertIn(doi, update["article_data"])
60
+ metadata = update["article_data"][doi]
61
+ self.assertEqual(metadata["Title"], "Sample Medrxiv Paper")
62
+ self.assertEqual(metadata["Authors"], "Author One; Author Two")
63
+ self.assertEqual(metadata["Abstract"], "This is a medRxiv abstract.")
64
+ self.assertEqual(metadata["Publication Date"], "2025-04-25")
65
+ self.assertEqual(metadata["URL"], f"https://www.medrxiv.org/content/{doi}.full.pdf")
66
+ self.assertEqual(metadata["pdf_url"], f"https://www.medrxiv.org/content/{doi}.full.pdf")
67
+ self.assertEqual(metadata["filename"], f"{doi.rsplit('/', maxsplit=1)[-1]}.pdf")
68
+ self.assertEqual(metadata["source"], "medrxiv")
69
+ self.assertEqual(metadata["medrxiv_id"], doi)
70
+
71
+ self.assertTrue(len(update["messages"]) >= 1)
72
+ self.assertIsInstance(update["messages"][0], ToolMessage)
73
+ self.assertIn("Successfully retrieved metadata and PDF URL", update["messages"][0].content)
74
+
75
+ @patch(
76
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.initialize"
77
+ )
78
+ @patch(
79
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.compose"
80
+ )
81
+ @patch(
82
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.requests.get"
83
+ )
84
+ def test_no_entry_found(self, mock_get, mock_compose, mock_initialize):
85
+ """Test behavior when no 'entry' is in response."""
86
+ dummy_cfg = MagicMock()
87
+ dummy_cfg.tools.download_medrxiv_paper.api_url = "http://dummy.medrxiv.org/api"
88
+ dummy_cfg.tools.download_medrxiv_paper.request_timeout = 10
89
+ mock_compose.return_value = dummy_cfg
90
+ mock_initialize.return_value.__enter__.return_value = None
91
+
92
+ dummy_response = MagicMock()
93
+ dummy_response.status_code = 200
94
+ dummy_response.raise_for_status = MagicMock()
95
+ dummy_response.json.return_value = {} # No entry
96
+ mock_get.return_value = dummy_response
97
+
98
+ doi = "10.1101/2025.04.25.25326432"
99
+ tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
100
+
101
+ with self.assertRaises(ValueError) as context:
102
+ download_medrxiv_paper.run(tool_input)
103
+
104
+ self.assertEqual(str(context.exception), f"No entry found for medRxiv ID {doi}")
105
+
106
+ @patch(
107
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.initialize"
108
+ )
109
+ @patch(
110
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.compose"
111
+ )
112
+ @patch(
113
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.requests.get"
114
+ )
115
+ def test_no_pdf_url_found(self, mock_get, mock_compose, mock_initialize):
116
+ """Test fallback to DOI-based PDF URL construction when 'link' is missing."""
117
+ dummy_cfg = MagicMock()
118
+ dummy_cfg.tools.download_medrxiv_paper.api_url = "http://dummy.medrxiv.org/api"
119
+ dummy_cfg.tools.download_medrxiv_paper.request_timeout = 10
120
+ mock_compose.return_value = dummy_cfg
121
+ mock_initialize.return_value.__enter__.return_value = None
122
+
123
+ doi = "10.1101/2025.04.25.25326432"
124
+
125
+ dummy_response = MagicMock()
126
+ dummy_response.status_code = 200
127
+ dummy_response.raise_for_status = MagicMock()
128
+ dummy_response.json.return_value = {
129
+ "collection": [
130
+ {
131
+ "title": "Sample Medrxiv Paper",
132
+ "authors": "Author One; Author Two",
133
+ "abstract": "This is a medRxiv abstract.",
134
+ "date": "2025-04-25",
135
+ "doi": doi
136
+ # 'link' is intentionally omitted
137
+ }
138
+ ]
139
+ }
140
+ mock_get.return_value = dummy_response
141
+
142
+ tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
143
+ result = download_medrxiv_paper.run(tool_input)
144
+ update = result.update
145
+ metadata = update["article_data"][doi]
146
+
147
+ # Assert that the PDF URL was constructed from DOI
148
+ expected_suffix = doi.rsplit('/', maxsplit=1)[-1]
149
+ expected_url = f"https://www.medrxiv.org/content/10.1101/{expected_suffix}.full.pdf"
150
+
151
+ self.assertEqual(metadata["pdf_url"], expected_url)
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- This package provides modules for fetching and downloading academic papers from arXiv.
3
+ This package provides modules for fetching and downloading academic papers from arXiv,
4
+ biorxiv and medrxiv.
4
5
  """
5
6
 
6
7
  # Import modules
@@ -8,4 +9,6 @@ from . import download_arxiv_input
8
9
 
9
10
  __all__ = [
10
11
  "download_arxiv_input",
12
+ "download_biorxiv_input",
13
+ "download_medrxiv_input",
11
14
  ]
@@ -0,0 +1,112 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Tool for downloading bioRxiv paper metadata and retrieving the PDF URL.
4
+ """
5
+
6
+ import logging
7
+ from typing import Annotated, Any
8
+
9
+ import hydra
10
+ import requests
11
+ from langchain_core.messages import ToolMessage
12
+ from langchain_core.tools import tool
13
+ from langchain_core.tools.base import InjectedToolCallId
14
+ from langgraph.types import Command
15
+ from pydantic import BaseModel, Field
16
+
17
+ # Configure logging
18
+ logging.basicConfig(level=logging.INFO)
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class DownloadBiorxivPaperInput(BaseModel):
23
+ """Input schema for the bioRxiv paper download tool."""
24
+
25
+ doi: str = Field(description=
26
+ """The bioRxiv DOI, from search_helper or multi_helper or single_helper,
27
+ used to retrieve the paper details and PDF URL."""
28
+ )
29
+ logger.info("DOI Received: %s", doi)
30
+ tool_call_id: Annotated[str, InjectedToolCallId]
31
+
32
+ def fetch_biorxiv_metadata(doi: str, api_url: str, request_timeout: int) -> dict:
33
+ """
34
+ Fetch metadata for a bioRxiv paper using its DOI and extract relevant fields.
35
+
36
+ Parameters:
37
+ doi (str): The DOI of the bioRxiv paper.
38
+
39
+ Returns:
40
+ dict: A dictionary containing the title, authors, abstract, publication date, and URLs.
41
+ """
42
+ # Strip any version suffix (e.g., v1) since bioRxiv's API is version-sensitive
43
+ clean_doi = doi.split("v")[0]
44
+
45
+ api_url = f"{api_url}{clean_doi}"
46
+ logger.info("Fetching metadata from api url: %s", api_url)
47
+ response = requests.get(api_url, timeout=request_timeout)
48
+ response.raise_for_status()
49
+
50
+ data = response.json()
51
+ if not data.get("collection"):
52
+ raise ValueError(f"No metadata found for DOI: {doi}")
53
+
54
+ data = response.json()
55
+
56
+ return data["collection"][0]
57
+
58
+ def extract_metadata(paper: dict, doi: str) -> dict:
59
+ """
60
+ Extract relevant metadata fields from a bioRxiv paper entry.
61
+ """
62
+ title = paper.get("title", "")
63
+ authors = paper.get("authors", "")
64
+ abstract = paper.get("abstract", "")
65
+ pub_date = paper.get("date", "")
66
+ doi_suffix = paper.get("doi", "").split("10.1101/")[-1]
67
+ pdf_url = f"https://www.biorxiv.org/content/10.1101/{doi_suffix}.full.pdf"
68
+ logger.info("PDF URL: %s", pdf_url)
69
+ return {
70
+ "Title": title,
71
+ "Authors": authors,
72
+ "Abstract": abstract,
73
+ "Publication Date": pub_date,
74
+ "URL": pdf_url,
75
+ "pdf_url": pdf_url,
76
+ "filename": f"{doi_suffix}.pdf",
77
+ "source": "biorxiv",
78
+ "biorxiv_id": doi
79
+ }
80
+
81
+ @tool(args_schema=DownloadBiorxivPaperInput, parse_docstring=True)
82
+ def download_biorxiv_paper(
83
+ doi: str,
84
+ tool_call_id: Annotated[str, InjectedToolCallId],
85
+ ) -> Command[Any]:
86
+ """
87
+ Get metadata and PDF URL for a bioRxiv paper using its DOI.
88
+ """
89
+ logger.info("Fetching metadata from bioRxiv for DOI: %s", doi)
90
+
91
+ # Load configuration
92
+ with hydra.initialize(version_base=None, config_path="../../configs"):
93
+ cfg = hydra.compose(
94
+ config_name="config", overrides=["tools/download_biorxiv_paper=default"]
95
+ )
96
+ api_url = cfg.tools.download_biorxiv_paper.api_url
97
+ request_timeout = cfg.tools.download_biorxiv_paper.request_timeout
98
+ logger.info("API URL: %s", api_url)
99
+ logger.info("Request Timeout: %s", request_timeout)
100
+
101
+ # Fetch metadata
102
+ raw_data = fetch_biorxiv_metadata(doi, api_url, request_timeout)
103
+ metadata = extract_metadata(raw_data, doi)
104
+ article_data = {doi: metadata}
105
+ content = f"Successfully retrieved metadata and PDF URL for bioRxiv DOI {doi}"
106
+
107
+ return Command(
108
+ update={
109
+ "article_data": article_data,
110
+ "messages": [ToolMessage(content=content, tool_call_id=tool_call_id)],
111
+ }
112
+ )
@@ -0,0 +1,112 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Tool for downloading medRxiv paper metadata and retrieving the PDF URL.
4
+ """
5
+
6
+ import logging
7
+ from typing import Annotated, Any
8
+
9
+ import hydra
10
+ import requests
11
+ from langchain_core.messages import ToolMessage
12
+ from langchain_core.tools import tool
13
+ from langchain_core.tools.base import InjectedToolCallId
14
+ from langgraph.types import Command
15
+ from pydantic import BaseModel, Field
16
+
17
+ # Configure logging
18
+ logging.basicConfig(level=logging.INFO)
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class DownloadMedrxivPaperInput(BaseModel):
23
+ """Input schema for the medRxiv paper download tool."""
24
+
25
+ doi: str = Field(description=
26
+ """The medRxiv DOI, from search_helper or multi_helper or single_helper,
27
+ used to retrieve the paper details and PDF URL."""
28
+ )
29
+ logger.info("DOI Received: %s", doi)
30
+ tool_call_id: Annotated[str, InjectedToolCallId]
31
+
32
+ # Fetching raw metadata from medRxiv API for a given DOI
33
+ def fetch_medrxiv_metadata(doi: str, api_url: str, request_timeout: int) -> dict:
34
+ """
35
+ Fetch metadata for a medRxiv paper using its DOI and extract relevant fields.
36
+
37
+ Parameters:
38
+ doi (str): The DOI of the medRxiv paper.
39
+
40
+ Returns:
41
+ dict: A dictionary containing the title, authors, abstract, publication date, and URLs.
42
+ """
43
+ # Strip any version suffix (e.g., v1) since bioRxiv's API is version-sensitive
44
+ clean_doi = doi.split("v")[0]
45
+
46
+ api_url = f"{api_url}{clean_doi}"
47
+ logger.info("Fetching metadata from api url: %s", api_url)
48
+ response = requests.get(api_url, timeout=request_timeout)
49
+ response.raise_for_status()
50
+
51
+ data = response.json()
52
+ if not data.get("collection"):
53
+ raise ValueError(f"No entry found for medRxiv ID {doi}")
54
+
55
+ return data["collection"][0]
56
+
57
+ # Extracting relevant metadata fields from the raw data
58
+ def extract_metadata(paper: dict, doi: str) -> dict:
59
+ """
60
+ Extract relevant metadata fields from a medRxiv paper entry.
61
+ """
62
+ title = paper.get("title", "")
63
+ authors = paper.get("authors", "")
64
+ abstract = paper.get("abstract", "")
65
+ pub_date = paper.get("date", "")
66
+ doi_suffix = paper.get("doi", "").split("10.1101/")[-1]
67
+ pdf_url = f"https://www.medrxiv.org/content/10.1101/{doi_suffix}.full.pdf"
68
+ logger.info("PDF URL: %s", pdf_url)
69
+ return {
70
+ "Title": title,
71
+ "Authors": authors,
72
+ "Abstract": abstract,
73
+ "Publication Date": pub_date,
74
+ "URL": pdf_url,
75
+ "pdf_url": pdf_url,
76
+ "filename": f"{doi_suffix}.pdf",
77
+ "source": "medrxiv",
78
+ "medrxiv_id": doi
79
+ }
80
+
81
+ # Tool to download medRxiv paper metadata and PDF URL
82
+ @tool(args_schema=DownloadMedrxivPaperInput, parse_docstring=True)
83
+ def download_medrxiv_paper(
84
+ doi: str,
85
+ tool_call_id: Annotated[str, InjectedToolCallId],
86
+ ) -> Command[Any]:
87
+ """
88
+ Get metadata and PDF URL for a medRxiv paper using its doi or medrxiv id.
89
+ """
90
+ logger.info("Fetching metadata from medRxiv for DOI: %s", doi)
91
+
92
+ # Load configuration
93
+ with hydra.initialize(version_base=None, config_path="../../configs"):
94
+ cfg = hydra.compose(
95
+ config_name="config", overrides=["tools/download_medrxiv_paper=default"]
96
+ )
97
+ api_url = cfg.tools.download_medrxiv_paper.api_url
98
+ request_timeout = cfg.tools.download_medrxiv_paper.request_timeout
99
+ logger.info("API URL: %s", api_url)
100
+
101
+ raw_data = fetch_medrxiv_metadata(doi, api_url, request_timeout)
102
+ metadata = extract_metadata(raw_data, doi)
103
+ article_data = {doi: metadata}
104
+
105
+ content = f"Successfully retrieved metadata and PDF URL for medRxiv DOI {doi}"
106
+
107
+ return Command(
108
+ update={
109
+ "article_data": article_data,
110
+ "messages": [ToolMessage(content=content, tool_call_id=tool_call_id)],
111
+ }
112
+ )
@@ -500,8 +500,27 @@ def question_and_answer(
500
500
  if isinstance(paper, dict)
501
501
  )
502
502
 
503
+ has_biorxiv_papers = any(
504
+ paper.get("source") == "biorxiv"
505
+ for paper in article_data.values()
506
+ if isinstance(paper, dict)
507
+ )
508
+
509
+ has_medrxiv_papers = any(
510
+ paper.get("source") == "medrxiv"
511
+ for paper in article_data.values()
512
+ if isinstance(paper, dict)
513
+ )
514
+
503
515
  # Choose papers to use
504
516
  selected_paper_ids = []
517
+ has_combimed_papers = (
518
+ has_uploaded_papers
519
+ or has_zotero_papers
520
+ or has_arxiv_papers
521
+ or has_biorxiv_papers
522
+ or has_medrxiv_papers
523
+ )
505
524
 
506
525
  if paper_ids:
507
526
  # Use explicitly specified papers
@@ -515,7 +534,7 @@ def question_and_answer(
515
534
  "%s: None of the provided paper_ids %s were found", call_id, paper_ids
516
535
  )
517
536
 
518
- elif use_all_papers or has_uploaded_papers or has_zotero_papers or has_arxiv_papers:
537
+ elif use_all_papers or has_combimed_papers:
519
538
  # Use all available papers if explicitly requested or if we have papers from any source
520
539
  selected_paper_ids = list(article_data.keys())
521
540
  logger.info(
@@ -143,6 +143,7 @@ class MultiPaperRecData:
143
143
  ],
144
144
  "URL": paper.get("url", "N/A"),
145
145
  "arxiv_id": paper.get("externalIds", {}).get("ArXiv", "N/A"),
146
+ "doi": paper.get("externalIds", {}).get("DOI", "N/A"),
146
147
  }
147
148
  for paper in self.recommendations
148
149
  if paper.get("title") and paper.get("authors")
@@ -158,6 +159,7 @@ class MultiPaperRecData:
158
159
  f"{i+1}. {paper['Title']} ({paper['Year']}; "
159
160
  f"semantic_scholar_paper_id: {paper['semantic_scholar_paper_id']}; "
160
161
  f"arXiv ID: {paper['arxiv_id']})"
162
+ f"doi: {paper['doi']})"
161
163
  for i, paper in enumerate(top_papers)
162
164
  ]
163
165
  )
@@ -125,6 +125,7 @@ class SearchData:
125
125
  ],
126
126
  "URL": paper.get("url", "N/A"),
127
127
  "arxiv_id": paper.get("externalIds", {}).get("ArXiv", "N/A"),
128
+ "doi": paper.get("externalIds", {}).get("DOI", "N/A"),
128
129
  }
129
130
  for paper in self.papers
130
131
  if paper.get("title") and paper.get("authors")
@@ -140,6 +141,7 @@ class SearchData:
140
141
  f"{i+1}. {paper['Title']} ({paper['Year']}; "
141
142
  f"semantic_scholar_paper_id: {paper['semantic_scholar_paper_id']}; "
142
143
  f"arXiv ID: {paper['arxiv_id']})"
144
+ f"doi: {paper['doi']})"
143
145
  for i, paper in enumerate(top_papers)
144
146
  ]
145
147
  )
@@ -136,6 +136,7 @@ class SinglePaperRecData:
136
136
  ],
137
137
  "URL": paper.get("url", "N/A"),
138
138
  "arxiv_id": paper.get("externalIds", {}).get("ArXiv", "N/A"),
139
+ "doi": paper.get("externalIds", {}).get("DOI", "N/A"),
139
140
  }
140
141
  for paper in self.recommendations
141
142
  if paper.get("title") and paper.get("authors")
@@ -151,6 +152,7 @@ class SinglePaperRecData:
151
152
  f"{i+1}. {paper['Title']} ({paper['Year']}; "
152
153
  f"semantic_scholar_paper_id: {paper['semantic_scholar_paper_id']}; "
153
154
  f"arXiv ID: {paper['arxiv_id']})"
155
+ f"doi: {paper['doi']})"
154
156
  for i, paper in enumerate(top_papers)
155
157
  ]
156
158
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aiagents4pharma
3
- Version: 1.38.0
3
+ Version: 1.39.0
4
4
  Summary: AI Agents for drug discovery, drug development, and other pharmaceutical R&D.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -147,12 +147,12 @@ aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py,sha256=m5p0yoJb7I
147
147
  aiagents4pharma/talk2scholars/__init__.py,sha256=NOZxTklAH1j1ggu97Ib8Xn9LCKudEWt-8dx8w7yxVD8,180
148
148
  aiagents4pharma/talk2scholars/agents/__init__.py,sha256=c_0Pk85bt-RfK5RMyALM3MXo3qXVMoYS7BOqM9wuFME,317
149
149
  aiagents4pharma/talk2scholars/agents/main_agent.py,sha256=oCSWPj3TUgTIERmYbBTYipNrU1g956LXJEUx-7-KAQ0,3354
150
- aiagents4pharma/talk2scholars/agents/paper_download_agent.py,sha256=pYHW3R7VQjRA3PhgWGQYI3ErfdILYQ0FM1WGXii3r1k,2996
150
+ aiagents4pharma/talk2scholars/agents/paper_download_agent.py,sha256=J_kEl8joQfM80211xlNLZA9RkN52fY58dbCisuiEft8,3687
151
151
  aiagents4pharma/talk2scholars/agents/pdf_agent.py,sha256=GEXzJMQxIeZ7zLP-AlnTMU-n_KXZ7g22Qd9L3USIc_4,3626
152
152
  aiagents4pharma/talk2scholars/agents/s2_agent.py,sha256=oui0CMSyXmBGBJ7LnYq8Ce0V8Qc3BS6GgH5Qx5wI6oM,4565
153
153
  aiagents4pharma/talk2scholars/agents/zotero_agent.py,sha256=NAmEURIhH-sjXGO-dqAigUA10m-Re9Qe1hY8db4CIP0,4370
154
154
  aiagents4pharma/talk2scholars/configs/__init__.py,sha256=Y9-4PxsNCMoxyyQgDSbPByJnO9wnyem5SYL3eOZt1HY,189
155
- aiagents4pharma/talk2scholars/configs/config.yaml,sha256=-8X0_gTmjEuXAeIrnppw3Npy8HICelHZOvTKEScI-rs,596
155
+ aiagents4pharma/talk2scholars/configs/config.yaml,sha256=F7BCgmcnhfkyKT6qFL11E_iwTYPmF8W_0b1n4KAaSho,680
156
156
  aiagents4pharma/talk2scholars/configs/agents/__init__.py,sha256=plv5Iw34gvbGZbRyJapvoOiiFXekRQIwjV_yy5AR_SI,104
157
157
  aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py,sha256=D94LW4cXLmJe4dNl5qoR9QN0JnBqGLbQDgDLqhCNUE0,213
158
158
  aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
@@ -168,6 +168,8 @@ aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py,sha256=fqQQ-GlRcb
168
168
  aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml,sha256=A6nYjrgzEyRv5JYsGN7oqNX4-tufMBZ6mg-A7bMX6V4,906
169
169
  aiagents4pharma/talk2scholars/configs/tools/__init__.py,sha256=6pHPF0ZGY78SD6KPMukd_xrfO1ocVXcyrsrB-kz-OnI,402
170
170
  aiagents4pharma/talk2scholars/configs/tools/download_arxiv_paper/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
171
+ aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
172
+ aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
171
173
  aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
172
174
  aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml,sha256=comNgL9hRpH--IWuEsrN6hV5WdrJmh-ZsRh7hbryVhg,631
173
175
  aiagents4pharma/talk2scholars/configs/tools/question_and_answer/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
@@ -187,6 +189,8 @@ aiagents4pharma/talk2scholars/tests/__init__.py,sha256=U3PsTiUZaUBD1IZanFGkDIOdF
187
189
  aiagents4pharma/talk2scholars/tests/test_llm_main_integration.py,sha256=FBRqS06IKJYFOudQEHQr-9oJ4tftkH-gTCowTAqwWSg,3686
188
190
  aiagents4pharma/talk2scholars/tests/test_main_agent.py,sha256=IZYSocYVwqPil2lF6L07mKm8PUq7vjopmqNiCm6IJEA,6876
189
191
  aiagents4pharma/talk2scholars/tests/test_paper_download_agent.py,sha256=gKSQp-sw62FplNnGYW0wv2ZIUEefh3o0tFWbRzy9yLs,5068
192
+ aiagents4pharma/talk2scholars/tests/test_paper_download_biorxiv.py,sha256=gosuW4VBXyorQXbf0TpgAIT2hQjEeuvTTnT1jnoBYqM,6405
193
+ aiagents4pharma/talk2scholars/tests/test_paper_download_medrxiv.py,sha256=iNq9vEIVapmnUZTRJXCv_UoaWThGapW7Vt_2BmZG9NE,6414
190
194
  aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py,sha256=3mycLeEgH5XkwxuoXfTpQb8c8xFtIX2HjVnACPrSf60,7141
191
195
  aiagents4pharma/talk2scholars/tests/test_pdf_agent.py,sha256=scGCTgka2JuoUhzZwzDn0OgIYihOLhXbwb5uGFR02aI,4302
192
196
  aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py,sha256=KR4GjjGgBjWXwEVzSh4ZpYjcWPq-EaZTT_fzRheb0uY,37286
@@ -207,10 +211,12 @@ aiagents4pharma/talk2scholars/tests/test_zotero_pdf_downloader_utils.py,sha256=N
207
211
  aiagents4pharma/talk2scholars/tests/test_zotero_read.py,sha256=E7ncgspEzhJTvmZuKplugZJPPWsoiFU_xLUg-oz6qkI,29100
208
212
  aiagents4pharma/talk2scholars/tests/test_zotero_write.py,sha256=qWlO0XoZJ6vxUxgisjYv9Np87CoTEDxiQBEOhdj9foo,6111
209
213
  aiagents4pharma/talk2scholars/tools/__init__.py,sha256=c8pYHDqR9P0Frz2jWjbvyizfSTBMlMFzGsiQzx2KC9c,189
210
- aiagents4pharma/talk2scholars/tools/paper_download/__init__.py,sha256=tNTLSPNdir4XSKRF0HjXI_tBGBXXXwDhWRI5VnwbZpM,214
214
+ aiagents4pharma/talk2scholars/tools/paper_download/__init__.py,sha256=Lu5FmBxDH8mIIYE41G8_BKYXUf-vHIYVwujidbeydl4,295
211
215
  aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py,sha256=WTWvXbh0C96OoMoPf8Bgu0AgorsdkWslac_WqlHc4bo,3900
216
+ aiagents4pharma/talk2scholars/tools/paper_download/download_biorxiv_input.py,sha256=R92OaR4Omilj-v-rT0Me_BhxN8-AF0sbDwhUxNCUTm4,3718
217
+ aiagents4pharma/talk2scholars/tools/paper_download/download_medrxiv_input.py,sha256=UaHsdZXseUMQfiIovD0kS8r9DZ6KJpRGtTZyOCTRYVs,3786
212
218
  aiagents4pharma/talk2scholars/tools/pdf/__init__.py,sha256=DPpOfON3AySko5EBBAe_3udOoSaAdQWNyGeNvJyV5R8,138
213
- aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py,sha256=_G5M39iHWPMOBbnmF6z46_JkaF9sCV8CQiD8vQYVmVY,22956
219
+ aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py,sha256=pzJhSOdchyS3J4Tzoh7aFMALJFCqEk4Xh4LCDa-5I1I,23406
214
220
  aiagents4pharma/talk2scholars/tools/s2/__init__.py,sha256=w_eiw0pG8HNp79F9O_icXs_Yl_4odsmagYNKDTjIsvk,428
215
221
  aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py,sha256=wOZ7UJq4b8vl7NU9mU3BW_nRmCIkeBvc6nbGGegysek,3181
216
222
  aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py,sha256=N7-6dzRI71bK7MG3-A4G505YnNvAMJW_Qjjtcoo4JYw,2799
@@ -219,9 +225,9 @@ aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py,sha
219
225
  aiagents4pharma/talk2scholars/tools/s2/search.py,sha256=p86RLy_9bMxm3KTDL2L0Ilb3yeF4K6IIkZCgbt4CsiE,2529
220
226
  aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py,sha256=rnl6Bb7mKXg_lsProAYaSEJNIzWgNVZuDHqD-dDe9EI,2763
221
227
  aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py,sha256=wBTPVgiXbmIJUMouOQRwojgk5PJXeEinDJzHzEToZbU,229
222
- aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py,sha256=rrR0DRNeGHpYcONZS7oS-VCSWBL5zNALU4m7IF6Yxng,7268
223
- aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py,sha256=_eP7q4ZTSWisEF4Stffe-IpR2MD9WrQ0u3jbbeJBRLU,6363
224
- aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py,sha256=ahTDT0lp5VRZS5hLL3-hsHx4wB3LUVY2OBTCTEJyR3Y,6983
228
+ aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py,sha256=kjzZ90Cd23hXBQ861Z2BEjE1VvI02zxc1mIj2S7YWFo,7379
229
+ aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py,sha256=AembYVndEOwgcDz_n1VWAydfL8ufQ5pEokTKkrx47jA,6474
230
+ aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py,sha256=zLENnFSyQIpXqmJKow1XHS9pWbf27tsSUEvzydNCj9I,7094
225
231
  aiagents4pharma/talk2scholars/tools/zotero/__init__.py,sha256=wXiQILLq-utV35PkDUpm_F074mG9yRMyGQAFlr9UAOw,197
226
232
  aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py,sha256=Fgv7PIkIlRqfl8EprcXqr1S4wtbSG8itv7x-3nMf3Rc,3990
227
233
  aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py,sha256=iqwpolg7GWAjXizubLrPaAsgOpsOhKz-tFRyLOiBvC0,6325
@@ -232,8 +238,8 @@ aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py,sha256=IPD1V9y
232
238
  aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py,sha256=ALwLecy1QVebbsmXJiDj1GhGmyhq2R2tZlAyEl1vfhw,7410
233
239
  aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py,sha256=oIrfbOySgts50ksHKyjcWjRkPRIS88g3Lc0v9mBkU8w,6375
234
240
  aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py,sha256=ERBha8afU6Q1EaRBe9qB8tchOzZ4_KfFgDW6EElOJoU,4816
235
- aiagents4pharma-1.38.0.dist-info/licenses/LICENSE,sha256=IcIbyB1Hyk5ZDah03VNQvJkbNk2hkBCDqQ8qtnCvB4Q,1077
236
- aiagents4pharma-1.38.0.dist-info/METADATA,sha256=pHbuNJmxv1gfi0JcUnBxOMTcniqA4rMFEJfqgzwLFjw,16788
237
- aiagents4pharma-1.38.0.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
238
- aiagents4pharma-1.38.0.dist-info/top_level.txt,sha256=-AH8rMmrSnJtq7HaAObS78UU-cTCwvX660dSxeM7a0A,16
239
- aiagents4pharma-1.38.0.dist-info/RECORD,,
241
+ aiagents4pharma-1.39.0.dist-info/licenses/LICENSE,sha256=IcIbyB1Hyk5ZDah03VNQvJkbNk2hkBCDqQ8qtnCvB4Q,1077
242
+ aiagents4pharma-1.39.0.dist-info/METADATA,sha256=ITwj9yujMnDVZtQM3n09ZxDv4ueGCGDlG2JZOvU3n7k,16788
243
+ aiagents4pharma-1.39.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
244
+ aiagents4pharma-1.39.0.dist-info/top_level.txt,sha256=-AH8rMmrSnJtq7HaAObS78UU-cTCwvX660dSxeM7a0A,16
245
+ aiagents4pharma-1.39.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.8.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5