aiagents4pharma 1.29.0__py3-none-any.whl → 1.30.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,5 +4,6 @@ This file is used to import all the modules in the package.
4
4
 
5
5
  from . import main_agent
6
6
  from . import s2_agent
7
+ from . import paper_download_agent
7
8
  from . import zotero_agent
8
9
  from . import pdf_agent
@@ -0,0 +1,86 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ This module defines the paper download agent that connects to the arXiv API to fetch
4
+ paper details and PDFs. It is part of the Talk2Scholars project.
5
+ """
6
+
7
+ import logging
8
+ from typing import Any, Dict
9
+ import hydra
10
+ from langchain_core.language_models.chat_models import BaseChatModel
11
+ from langgraph.graph import START, StateGraph
12
+ from langgraph.prebuilt.chat_agent_executor import create_react_agent
13
+ from langgraph.prebuilt.tool_node import ToolNode
14
+ from langgraph.checkpoint.memory import MemorySaver
15
+ from ..state.state_talk2scholars import Talk2Scholars
16
+ from ..tools.paper_download import download_arxiv_paper
17
+ from ..tools.s2.query_results import query_results
18
+
19
+ # Initialize logger
20
+ logging.basicConfig(level=logging.INFO)
21
+ logger = logging.getLogger(__name__)
22
+
23
+ def get_app(uniq_id, llm_model: BaseChatModel):
24
+ """
25
+ Initializes and returns the LangGraph application for the Talk2Scholars paper download agent.
26
+
27
+ Args:
28
+ uniq_id (str): A unique identifier for tracking the current session.
29
+ llm_model (BaseChatModel, optional): The language model to be used by the agent.
30
+ Defaults to ChatOpenAI(model="gpt-4o-mini", temperature=0.5).
31
+
32
+ Returns:
33
+ StateGraph: A compiled LangGraph application that enables the paper download agent to
34
+ process user queries and retrieve arXiv papers.
35
+ """
36
+
37
+ # Load Hydra configuration
38
+ logger.info("Loading Hydra configuration for Talk2Scholars paper download agent")
39
+ with hydra.initialize(version_base=None, config_path="../configs"):
40
+ cfg = hydra.compose(
41
+ config_name="config",
42
+ overrides=["agents/talk2scholars/paper_download_agent=default"]
43
+ )
44
+ cfg = cfg.agents.talk2scholars.paper_download_agent
45
+
46
+ # Define tools properly
47
+ tools = ToolNode(
48
+ [download_arxiv_paper, query_results]
49
+ )
50
+
51
+ # Define the model
52
+ logger.info("Using OpenAI model %s", llm_model)
53
+ model = create_react_agent(
54
+ llm_model,
55
+ tools=tools,
56
+ state_schema=Talk2Scholars,
57
+ prompt=cfg.prompt,
58
+ checkpointer=MemorySaver(),
59
+ )
60
+
61
+ def paper_download_agent_node(state: Talk2Scholars) -> Dict[str, Any]:
62
+ """
63
+ Processes the current state to fetch the arXiv paper.
64
+ """
65
+ logger.info("Creating paper download agent node with thread_id: %s", uniq_id)
66
+ result = model.invoke(state, {"configurable": {"thread_id": uniq_id}})
67
+ return result
68
+
69
+ # Define new graph
70
+ workflow = StateGraph(Talk2Scholars)
71
+
72
+ # Adding node for paper download agent
73
+ workflow.add_node("paper_download_agent", paper_download_agent_node)
74
+
75
+ # Entering into the agent
76
+ workflow.add_edge(START, "paper_download_agent")
77
+
78
+ # Memory management for states between graph runs
79
+ checkpointer = MemorySaver()
80
+
81
+ # Compile the graph
82
+ app = workflow.compile(checkpointer=checkpointer)
83
+
84
+ # Logging the information and returning the app
85
+ logger.info("Compiled the graph")
86
+ return app
@@ -0,0 +1,3 @@
1
+ """
2
+ Import all the modules in the package
3
+ """
@@ -2,10 +2,12 @@ defaults:
2
2
  - _self_
3
3
  - agents/talk2scholars/main_agent: default
4
4
  - agents/talk2scholars/s2_agent: default
5
+ - agents/talk2scholars/paper_download_agent: default
5
6
  - agents/talk2scholars/zotero_agent: default
6
7
  - app/frontend: default
7
8
  - agents/talk2scholars/pdf_agent: default
8
9
  - tools/search: default
10
+ - tools/download_arxiv_paper: default
9
11
  - tools/single_paper_recommendation: default
10
12
  - tools/multi_paper_recommendation: default
11
13
  - tools/retrieve_semantic_scholar_paper_id: default
@@ -0,0 +1,3 @@
1
+ """
2
+ Import all the modules in the package
3
+ """
@@ -63,3 +63,4 @@ class Talk2Scholars(AgentState):
63
63
  pdf_data: Annotated[Dict[str, Any], replace_dict]
64
64
  zotero_read: Annotated[Dict[str, Any], replace_dict]
65
65
  llm_model: BaseChatModel
66
+ pdf_data: Annotated[Dict[str, Any], replace_dict]
@@ -0,0 +1,142 @@
1
+ """Unit tests for the paper download agent in Talk2Scholars."""
2
+
3
+ from unittest import mock
4
+ import pytest
5
+ from langchain_core.messages import HumanMessage, AIMessage
6
+ from langchain_core.language_models.chat_models import BaseChatModel
7
+ from ..agents.paper_download_agent import get_app
8
+ from ..state.state_talk2scholars import Talk2Scholars
9
+
10
+
11
+ @pytest.fixture(autouse=True)
12
+ def mock_hydra_fixture():
13
+ """Mocks Hydra configuration for tests."""
14
+ with mock.patch("hydra.initialize"), mock.patch("hydra.compose") as mock_compose:
15
+ cfg_mock = mock.MagicMock()
16
+ cfg_mock.agents.talk2scholars.s2_agent.temperature = 0
17
+ cfg_mock.agents.talk2scholars.paper_download_agent.prompt = "Test prompt"
18
+ mock_compose.return_value = cfg_mock
19
+ yield mock_compose
20
+
21
+
22
+ @pytest.fixture
23
+ def mock_tools_fixture():
24
+ """Mocks paper download tools to prevent real HTTP calls."""
25
+ with (
26
+ mock.patch(
27
+ "aiagents4pharma.talk2scholars.tools.paper_download."
28
+ "download_arxiv_input.download_arxiv_paper"
29
+ ) as mock_download_arxiv_paper,
30
+ mock.patch(
31
+ "aiagents4pharma.talk2scholars.tools.s2.query_results.query_results"
32
+ ) as mock_query_results,
33
+ ):
34
+ mock_download_arxiv_paper.return_value = {
35
+ "pdf_data": {"dummy_key": "dummy_value"}
36
+ }
37
+ mock_query_results.return_value = {
38
+ "result": "Mocked Query Result"
39
+ }
40
+ yield [mock_download_arxiv_paper, mock_query_results]
41
+
42
+ @pytest.mark.usefixtures("mock_hydra_fixture")
43
+ def test_paper_download_agent_initialization():
44
+ """Ensures the paper download agent initializes properly with a prompt."""
45
+ thread_id = "test_thread_paper_dl"
46
+ llm_mock = mock.Mock(spec=BaseChatModel) # Mock LLM
47
+
48
+ with mock.patch(
49
+ "aiagents4pharma.talk2scholars.agents.paper_download_agent.create_react_agent"
50
+ ) as mock_create_agent:
51
+ mock_create_agent.return_value = mock.Mock()
52
+
53
+ app = get_app(thread_id, llm_mock)
54
+ assert app is not None, "The agent app should be successfully created."
55
+ assert mock_create_agent.called
56
+
57
+ def test_paper_download_agent_invocation():
58
+ """Verifies agent processes queries and updates state correctly."""
59
+ _ = mock_tools_fixture # Prevents unused-argument warning
60
+ thread_id = "test_thread_paper_dl"
61
+ mock_state = Talk2Scholars(
62
+ messages=[HumanMessage(content="Download paper 1234.5678")]
63
+ )
64
+ llm_mock = mock.Mock(spec=BaseChatModel)
65
+
66
+ with mock.patch(
67
+ "aiagents4pharma.talk2scholars.agents.paper_download_agent.create_react_agent"
68
+ ) as mock_create_agent:
69
+ mock_agent = mock.Mock()
70
+ mock_create_agent.return_value = mock_agent
71
+ mock_agent.invoke.return_value = {
72
+ "messages": [AIMessage(content="Here is the paper")],
73
+ "pdf_data": {"file_bytes": b"FAKE_PDF_CONTENTS"},
74
+ }
75
+
76
+
77
+ app = get_app(thread_id, llm_mock)
78
+ result = app.invoke(
79
+ mock_state,
80
+ config={
81
+ "configurable": {
82
+ "thread_id": thread_id,
83
+ "checkpoint_ns": "test_ns",
84
+ "checkpoint_id": "test_checkpoint",
85
+ }
86
+ },
87
+ )
88
+
89
+ assert "messages" in result
90
+ assert "pdf_data" in result
91
+
92
+
93
+ def test_paper_download_agent_tools_assignment(request): # Keep fixture name
94
+ """Checks correct tool assignment (download_arxiv_paper, query_results)."""
95
+ thread_id = "test_thread_paper_dl"
96
+ mock_tools = request.getfixturevalue("mock_tools_fixture")
97
+ llm_mock = mock.Mock(spec=BaseChatModel)
98
+
99
+ with (
100
+ mock.patch(
101
+ "aiagents4pharma.talk2scholars.agents.paper_download_agent.create_react_agent"
102
+ ) as mock_create_agent,
103
+ mock.patch(
104
+ "aiagents4pharma.talk2scholars.agents.paper_download_agent.ToolNode"
105
+ ) as mock_toolnode,
106
+ ):
107
+ mock_agent = mock.Mock()
108
+ mock_create_agent.return_value = mock_agent
109
+ mock_tool_instance = mock.Mock()
110
+ mock_tool_instance.tools = mock_tools
111
+ mock_toolnode.return_value= mock_tool_instance
112
+
113
+ get_app(thread_id, llm_mock)
114
+ assert mock_toolnode.called
115
+ assert len(mock_tool_instance.tools) == 2
116
+
117
+
118
+ def test_paper_download_agent_hydra_failure():
119
+ """Confirms the agent gracefully handles exceptions if Hydra fails."""
120
+ thread_id = "test_thread_paper_dl"
121
+ llm_mock = mock.Mock(spec=BaseChatModel)
122
+
123
+ with mock.patch("hydra.initialize", side_effect=Exception("Mock Hydra failure")):
124
+ with pytest.raises(Exception) as exc_info:
125
+ get_app(thread_id, llm_mock)
126
+ assert "Mock Hydra failure" in str(exc_info.value)
127
+
128
+
129
+ def test_paper_download_agent_model_failure():
130
+ """Ensures agent handles model-related failures gracefully."""
131
+ thread_id = "test_thread_paper_dl"
132
+ llm_mock = mock.Mock(spec=BaseChatModel)
133
+
134
+ with mock.patch(
135
+ "aiagents4pharma.talk2scholars.agents.paper_download_agent.create_react_agent",
136
+ side_effect=Exception("Mock model failure"),
137
+ ):
138
+ with pytest.raises(Exception) as exc_info:
139
+ get_app(thread_id, llm_mock)
140
+ assert "Mock model failure" in str(exc_info.value), (
141
+ "Model initialization failure should raise an exception."
142
+ )
@@ -0,0 +1,154 @@
1
+ """
2
+ Unit tests for arXiv paper downloading functionality, including:
3
+ - AbstractPaperDownloader (base class)
4
+ - ArxivPaperDownloader (arXiv-specific implementation)
5
+ - download_arxiv_paper tool function.
6
+ """
7
+
8
+ from unittest.mock import patch, MagicMock
9
+ import pytest
10
+ import requests
11
+ from requests.exceptions import HTTPError
12
+ from langgraph.types import Command
13
+ from langchain_core.messages import ToolMessage
14
+
15
+ # Import the classes and function under test
16
+ from aiagents4pharma.talk2scholars.tools.paper_download.abstract_downloader import (
17
+ AbstractPaperDownloader,
18
+ )
19
+ from aiagents4pharma.talk2scholars.tools.paper_download.arxiv_downloader import (
20
+ ArxivPaperDownloader,
21
+ )
22
+ from aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input import (
23
+ download_arxiv_paper,
24
+ )
25
+
26
+ @pytest.mark.parametrize("class_obj", [AbstractPaperDownloader])
27
+
28
+ def test_abstract_downloader_cannot_be_instantiated(class_obj):
29
+ """
30
+ Validates that AbstractPaperDownloader is indeed abstract and raises TypeError
31
+ if anyone attempts to instantiate it directly.
32
+ """
33
+ with pytest.raises(TypeError):
34
+ class_obj()
35
+
36
+
37
+ @pytest.fixture(name="arxiv_downloader_fixture")
38
+ @pytest.mark.usefixtures("mock_hydra_config_setup")
39
+ def fixture_arxiv_downloader():
40
+ """
41
+ Provides an ArxivPaperDownloader instance with a mocked Hydra config.
42
+ """
43
+ return ArxivPaperDownloader()
44
+
45
+
46
+ def test_fetch_metadata_success(arxiv_downloader_fixture,):
47
+ """
48
+ Ensures fetch_metadata retrieves XML data correctly, given a successful HTTP response.
49
+ """
50
+ mock_response = MagicMock()
51
+ mock_response.text = "<xml>Mock ArXiv Metadata</xml>"
52
+ mock_response.raise_for_status = MagicMock()
53
+
54
+ with patch.object(requests, "get", return_value=mock_response) as mock_get:
55
+ paper_id = "1234.5678"
56
+ result = arxiv_downloader_fixture.fetch_metadata(paper_id)
57
+ mock_get.assert_called_once_with(
58
+ "http://export.arxiv.org/api/query?search_query=id:1234.5678&start=0&max_results=1",
59
+ timeout=10,
60
+ )
61
+ assert result["xml"] == "<xml>Mock ArXiv Metadata</xml>"
62
+
63
+
64
+ def test_fetch_metadata_http_error(arxiv_downloader_fixture):
65
+ """
66
+ Validates that fetch_metadata raises HTTPError when the response indicates a failure.
67
+ """
68
+ mock_response = MagicMock()
69
+ mock_response.raise_for_status.side_effect = HTTPError("Mocked HTTP failure")
70
+
71
+ with patch.object(requests, "get", return_value=mock_response):
72
+ with pytest.raises(HTTPError):
73
+ arxiv_downloader_fixture.fetch_metadata("invalid_id")
74
+
75
+
76
+ def test_download_pdf_success(arxiv_downloader_fixture):
77
+ """
78
+ Tests that download_pdf fetches the PDF link from metadata and successfully
79
+ retrieves the binary content.
80
+ """
81
+ mock_metadata = {
82
+ "xml": """
83
+ <feed xmlns="http://www.w3.org/2005/Atom">
84
+ <entry>
85
+ <link title="pdf" href="http://test.arxiv.org/pdf/1234.5678v1.pdf"/>
86
+ </entry>
87
+ </feed>
88
+ """
89
+ }
90
+
91
+ mock_pdf_response = MagicMock()
92
+ mock_pdf_response.raise_for_status = MagicMock()
93
+ mock_pdf_response.iter_content = lambda chunk_size: [b"FAKE_PDF_CONTENT"]
94
+
95
+ with patch.object(arxiv_downloader_fixture, "fetch_metadata", return_value=mock_metadata):
96
+ with patch.object(requests, "get", return_value=mock_pdf_response) as mock_get:
97
+ result = arxiv_downloader_fixture.download_pdf("1234.5678")
98
+ assert result["pdf_object"] == b"FAKE_PDF_CONTENT"
99
+ assert result["pdf_url"] == "http://test.arxiv.org/pdf/1234.5678v1.pdf"
100
+ assert result["arxiv_id"] == "1234.5678"
101
+ mock_get.assert_called_once_with(
102
+ "http://test.arxiv.org/pdf/1234.5678v1.pdf",
103
+ stream=True,
104
+ timeout=10,
105
+ )
106
+
107
+
108
+ def test_download_pdf_no_pdf_link(arxiv_downloader_fixture):
109
+ """
110
+ Ensures a RuntimeError is raised if no <link> with title="pdf" is found in the XML.
111
+ """
112
+ mock_metadata = {"xml": "<feed></feed>"}
113
+
114
+ with patch.object(arxiv_downloader_fixture, "fetch_metadata", return_value=mock_metadata):
115
+ with pytest.raises(RuntimeError, match="Failed to download PDF"):
116
+ arxiv_downloader_fixture.download_pdf("1234.5678")
117
+
118
+
119
+ def test_download_arxiv_paper_tool_success(arxiv_downloader_fixture):
120
+ """
121
+ Validates download_arxiv_paper orchestrates the ArxivPaperDownloader correctly,
122
+ returning a Command with PDF data and success messages.
123
+ """
124
+ mock_metadata = {"xml": "<mockxml></mockxml>"}
125
+ mock_pdf_response = {
126
+ "pdf_object": b"FAKE_PDF_CONTENT",
127
+ "pdf_url": "http://test.arxiv.org/mock.pdf",
128
+ "arxiv_id": "9999.8888",
129
+ }
130
+
131
+ with patch(
132
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input."
133
+ "ArxivPaperDownloader",
134
+ return_value=arxiv_downloader_fixture,
135
+ ):
136
+ with patch.object(arxiv_downloader_fixture, "fetch_metadata", return_value=mock_metadata):
137
+ with patch.object(
138
+ arxiv_downloader_fixture,
139
+ "download_pdf",
140
+ return_value=mock_pdf_response,
141
+ ):
142
+ command_result = download_arxiv_paper.invoke(
143
+ {"arxiv_id": "9999.8888", "tool_call_id": "test_tool_call"}
144
+ )
145
+
146
+ assert isinstance(command_result, Command)
147
+ assert "pdf_data" in command_result.update
148
+ assert command_result.update["pdf_data"] == mock_pdf_response
149
+
150
+ messages = command_result.update.get("messages", [])
151
+ assert len(messages) == 1
152
+ assert isinstance(messages[0], ToolMessage)
153
+ assert "Successfully downloaded PDF" in messages[0].content
154
+ assert "9999.8888" in messages[0].content
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ This package provides modules for fetching and downloading academic papers from arXiv.
4
+ """
5
+
6
+ # Import modules
7
+ from . import abstract_downloader
8
+ from . import arxiv_downloader
9
+ from . import download_arxiv_input
10
+ from .download_arxiv_input import download_arxiv_paper
11
+
12
+ __all__ = [
13
+ "abstract_downloader",
14
+ "arxiv_downloader",
15
+ "download_arxiv_input",
16
+ "download_arxiv_paper",
17
+ ]
@@ -0,0 +1,43 @@
1
+ """
2
+ Abstract Base Class for Paper Downloaders.
3
+
4
+ This module defines the `AbstractPaperDownloader` class, which serves as a
5
+ base class for downloading scholarly papers from different sources
6
+ (e.g., arXiv, PubMed, IEEE Xplore). Any specific downloader should
7
+ inherit from this class and implement its methods.
8
+ """
9
+
10
+ from abc import ABC, abstractmethod
11
+ from typing import Any, Dict
12
+ class AbstractPaperDownloader(ABC):
13
+ """
14
+ Abstract base class for scholarly paper downloaders.
15
+
16
+ This is designed to be extended for different paper sources
17
+ like arXiv, PubMed, IEEE Xplore, etc. Each implementation
18
+ must define methods for fetching metadata and downloading PDFs.
19
+ """
20
+
21
+ @abstractmethod
22
+ def fetch_metadata(self, paper_id: str) -> Dict[str, Any]:
23
+ """
24
+ Fetch metadata for a given paper ID.
25
+
26
+ Args:
27
+ paper_id (str): The unique identifier for the paper.
28
+
29
+ Returns:
30
+ Dict[str, Any]: The metadata dictionary (format depends on the data source).
31
+ """
32
+
33
+ @abstractmethod
34
+ def download_pdf(self, paper_id: str) -> bytes:
35
+ """
36
+ Download the PDF for a given paper ID.
37
+
38
+ Args:
39
+ paper_id (str): The unique identifier for the paper.
40
+
41
+ Returns:
42
+ bytes: The binary content of the downloaded PDF.
43
+ """
@@ -0,0 +1,108 @@
1
+ """
2
+ Arxiv Paper Downloader
3
+
4
+ This module provides an implementation of `AbstractPaperDownloader` for arXiv.
5
+ It connects to the arXiv API, retrieves metadata for a research paper, and
6
+ downloads the corresponding PDF.
7
+
8
+ By using an abstract base class, this implementation is extendable to other
9
+ APIs like PubMed, IEEE Xplore, etc.
10
+ """
11
+ import xml.etree.ElementTree as ET
12
+ from typing import Any, Dict
13
+ import logging
14
+ import hydra
15
+ import requests
16
+ from .abstract_downloader import AbstractPaperDownloader
17
+
18
+ # Configure logging
19
+ logging.basicConfig(level=logging.INFO)
20
+ logger = logging.getLogger(__name__)
21
+
22
+ class ArxivPaperDownloader(AbstractPaperDownloader):
23
+ """
24
+ Downloader class for arXiv.
25
+
26
+ This class interfaces with the arXiv API to fetch metadata
27
+ and retrieve PDFs of academic papers based on their arXiv IDs.
28
+ """
29
+
30
+ def __init__(self):
31
+ """
32
+ Initializes the arXiv paper downloader.
33
+
34
+ Uses Hydra for configuration management to retrieve API details.
35
+ """
36
+ with hydra.initialize(version_base=None, config_path="../../configs"):
37
+ cfg = hydra.compose(
38
+ config_name="config",
39
+ overrides=["tools/download_arxiv_paper=default"]
40
+ )
41
+ self.api_url = cfg.tools.download_arxiv_paper.api_url
42
+ self.request_timeout = cfg.tools.download_arxiv_paper.request_timeout
43
+ self.chunk_size = cfg.tools.download_arxiv_paper.chunk_size
44
+ self.pdf_base_url = cfg.tools.download_arxiv_paper.pdf_base_url
45
+ def fetch_metadata(self, paper_id: str) -> Dict[str, Any]:
46
+ """
47
+ Fetch metadata from arXiv for a given paper ID.
48
+
49
+ Args:
50
+ paper_id (str): The arXiv ID of the paper.
51
+
52
+ Returns:
53
+ Dict[str, Any]: A dictionary containing metadata, including the XML response.
54
+ """
55
+ logger.info("Fetching metadata from arXiv for paper ID: %s", paper_id)
56
+ api_url = f"{self.api_url}?search_query=id:{paper_id}&start=0&max_results=1"
57
+ response = requests.get(api_url, timeout=self.request_timeout)
58
+ response.raise_for_status()
59
+ return {"xml": response.text}
60
+
61
+ def download_pdf(self, paper_id: str) -> Dict[str, Any]:
62
+ """
63
+ Download the PDF of a paper from arXiv.
64
+
65
+ This function first retrieves the paper's metadata to locate the PDF link
66
+ before downloading the file.
67
+
68
+ Args:
69
+ paper_id (str): The arXiv ID of the paper.
70
+
71
+ Returns:
72
+ Dict[str, Any]: A dictionary containing:
73
+ - `pdf_object`: The binary content of the downloaded PDF.
74
+ - `pdf_url`: The URL from which the PDF was fetched.
75
+ - `arxiv_id`: The arXiv ID of the downloaded paper.
76
+ """
77
+ metadata = self.fetch_metadata(paper_id)
78
+
79
+ # Parse the XML response to locate the PDF link.
80
+ root = ET.fromstring(metadata["xml"])
81
+ ns = {"atom": "http://www.w3.org/2005/Atom"}
82
+ pdf_url = next(
83
+ (
84
+ link.attrib.get("href")
85
+ for entry in root.findall("atom:entry", ns)
86
+ for link in entry.findall("atom:link", ns)
87
+ if link.attrib.get("title") == "pdf"
88
+ ),
89
+ None,
90
+ )
91
+
92
+ if not pdf_url:
93
+ raise RuntimeError(f"Failed to download PDF for arXiv ID {paper_id}.")
94
+
95
+ logger.info("Downloading PDF from: %s", pdf_url)
96
+ pdf_response = requests.get(pdf_url, stream=True, timeout=self.request_timeout)
97
+ pdf_response.raise_for_status()
98
+
99
+ # Combine the PDF data from chunks.
100
+ pdf_object = b"".join(
101
+ chunk for chunk in pdf_response.iter_content(chunk_size=self.chunk_size) if chunk
102
+ )
103
+
104
+ return {
105
+ "pdf_object": pdf_object,
106
+ "pdf_url": pdf_url,
107
+ "arxiv_id": paper_id,
108
+ }
@@ -0,0 +1,60 @@
1
+ # File: aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py
2
+ """
3
+ This module defines the `download_arxiv_paper` tool, which leverages the
4
+ `ArxivPaperDownloader` class to fetch and download academic papers from arXiv
5
+ based on their unique arXiv ID.
6
+ """
7
+ from typing import Annotated, Any
8
+ from pydantic import BaseModel, Field
9
+ from langchain_core.tools import tool
10
+ from langchain_core.messages import ToolMessage
11
+ from langchain_core.tools.base import InjectedToolCallId
12
+ from langgraph.types import Command
13
+
14
+ # Local import from the same package:
15
+ from .arxiv_downloader import ArxivPaperDownloader
16
+
17
+ class DownloadArxivPaperInput(BaseModel):
18
+ """
19
+ Input schema for the arXiv paper download tool.
20
+ (Optional: if you decide to keep Pydantic validation in the future)
21
+ """
22
+ arxiv_id: str = Field(
23
+ description="The arXiv paper ID used to retrieve the paper details and PDF."
24
+ )
25
+ tool_call_id: Annotated[str, InjectedToolCallId]
26
+
27
+ @tool(args_schema=DownloadArxivPaperInput, parse_docstring=True)
28
+ def download_arxiv_paper(
29
+ arxiv_id: str,
30
+ tool_call_id: Annotated[str, InjectedToolCallId],
31
+ ) -> Command[Any]:
32
+ """
33
+ Download an arXiv paper's PDF using its unique arXiv ID.
34
+
35
+ This function:
36
+ 1. Creates an `ArxivPaperDownloader` instance.
37
+ 2. Fetches metadata from arXiv using the provided `arxiv_id`.
38
+ 3. Downloads the PDF from the returned link.
39
+ 4. Returns a `Command` object containing the PDF data and a success message.
40
+
41
+ Args:
42
+ arxiv_id (str): The unique arXiv paper ID.
43
+ tool_call_id (InjectedToolCallId): A unique identifier for tracking this tool call.
44
+
45
+ Returns:
46
+ Command[Any]: Contains metadata and messages about the success of the operation.
47
+ """
48
+ downloader = ArxivPaperDownloader()
49
+
50
+ # If the downloader fails or the arxiv_id is invalid, this might raise an error
51
+ pdf_data = downloader.download_pdf(arxiv_id)
52
+
53
+ content = f"Successfully downloaded PDF for arXiv ID {arxiv_id}"
54
+
55
+ return Command(
56
+ update={
57
+ "pdf_data": pdf_data,
58
+ "messages": [ToolMessage(content=content, tool_call_id=tool_call_id)],
59
+ }
60
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: aiagents4pharma
3
- Version: 1.29.0
3
+ Version: 1.30.0
4
4
  Summary: AI Agents for drug discovery, drug development, and other pharmaceutical R&D.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -135,17 +135,19 @@ aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py,sha256
135
135
  aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py,sha256=7gwwtfzKhB8GuOBD47XRi0NprwEXkOzwNl5eeu-hDTI,86
136
136
  aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py,sha256=m5p0yoJb7I19ua5yeQfXPf7c4r6S1XPwttsrM7Qoy94,9336
137
137
  aiagents4pharma/talk2scholars/__init__.py,sha256=gphERyVKZHvOnMQsml7TIHlaIshHJ75R1J3FKExkfuY,120
138
- aiagents4pharma/talk2scholars/agents/__init__.py,sha256=ZwFiHOlDGJk1601J5xEZDy0btPzqiOk2UCocKxohde8,168
138
+ aiagents4pharma/talk2scholars/agents/__init__.py,sha256=inLJpRDlT80RNSi3OFNi2lpbbTisQgzNkMYTvnhFjVY,203
139
139
  aiagents4pharma/talk2scholars/agents/main_agent.py,sha256=TABzGSOg7I0_fJ0qybBVqZDdrU8YCjyG_m-kasO4WgE,2854
140
+ aiagents4pharma/talk2scholars/agents/paper_download_agent.py,sha256=3GxxNhA_VGf3QOozIjr5cEY2te5n6rQSdZpdFajZttA,3006
140
141
  aiagents4pharma/talk2scholars/agents/pdf_agent.py,sha256=c9-_z5qp5Zkgh6piEIlgI4uo4OMXD3janZNmfYwnFCg,3729
141
142
  aiagents4pharma/talk2scholars/agents/s2_agent.py,sha256=ua1bjKE2HBKZuLnDn8me5fuV1lSvdZbwAlo3Yp27TT4,4659
142
143
  aiagents4pharma/talk2scholars/agents/zotero_agent.py,sha256=5jfIJiLsRdlCJjkF7BQMkP5PsEY_Gr7SfztWKozbUGo,4223
143
144
  aiagents4pharma/talk2scholars/configs/__init__.py,sha256=tf2gz8n7M4ko6xLdX_C925ELVIxoP6SgkPcbeh59ad4,151
144
- aiagents4pharma/talk2scholars/configs/config.yaml,sha256=dQIMg3jLGYAudkc1Zz85qqvFf-HdVXPfewUfAfPNNzU,501
145
+ aiagents4pharma/talk2scholars/configs/config.yaml,sha256=-8X0_gTmjEuXAeIrnppw3Npy8HICelHZOvTKEScI-rs,596
145
146
  aiagents4pharma/talk2scholars/configs/agents/__init__.py,sha256=yyh7PB2oY_JulnpSQCWS4wwCH_uzIdt47O2Ay48x_oU,75
146
147
  aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py,sha256=64GEWAoKOd_YHLi27eSOcOC5eSLK0IG_FNra3ZBt02Y,146
147
148
  aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
148
149
  aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml,sha256=rZfZ_dJArjlznHzusjxCnOjhptLTyejFiB0euV5R13c,662
150
+ aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
149
151
  aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
150
152
  aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
151
153
  aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml,sha256=sn6vX6r-P0CR7UWS63ZqCmMKKn4As8pZoITRWx8sdoo,1151
@@ -155,6 +157,7 @@ aiagents4pharma/talk2scholars/configs/app/__init__.py,sha256=JoSZV6N669kGMv5zLDs
155
157
  aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
156
158
  aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml,sha256=wsELBdRLv6UqZ9QZfwpS7K4xfMj5s-a99-aXqIs6WEI,868
157
159
  aiagents4pharma/talk2scholars/configs/tools/__init__.py,sha256=GwpgnRrfjyZDVsangewSVTG3H3GBYM6s_YaQd9-zI10,238
160
+ aiagents4pharma/talk2scholars/configs/tools/download_arxiv_paper/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
158
161
  aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
159
162
  aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml,sha256=QV7HrG7NdjBEjTMszh27MbGBYMbf_78V3sCGftdTtvo,442
160
163
  aiagents4pharma/talk2scholars/configs/tools/question_and_answer/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
@@ -168,10 +171,12 @@ aiagents4pharma/talk2scholars/configs/tools/zotero_read/__init__.py,sha256=fqQQ-
168
171
  aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml,sha256=6ZvZdCsnudPeVjnatv78Z0QfMwsHZuliE2RCIRCW05Y,1221
169
172
  aiagents4pharma/talk2scholars/configs/tools/zotero_write/__inti__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
170
173
  aiagents4pharma/talk2scholars/state/__init__.py,sha256=S6SxlszIMZSIMJehjevPF9sKyR-PAwWb5TEdo6xWXE8,103
171
- aiagents4pharma/talk2scholars/state/state_talk2scholars.py,sha256=E0epqlBurzNcMzN4WV6nh--YkVAesbxQTuBBlJhESVA,2436
174
+ aiagents4pharma/talk2scholars/state/state_talk2scholars.py,sha256=0dFSdsGiiilNIuuHQFEjpjQmcZXlK0JQwMV_GCiAsuU,2490
172
175
  aiagents4pharma/talk2scholars/tests/__init__.py,sha256=U3PsTiUZaUBD1IZanFGkDIOdFieDVJtGKQ5-woYUo8c,45
173
176
  aiagents4pharma/talk2scholars/tests/test_llm_main_integration.py,sha256=SAMG-Kb2S9sei8Us5vUWCUJikTKXPZVKQ6aJJPEhJsc,1880
174
177
  aiagents4pharma/talk2scholars/tests/test_main_agent.py,sha256=5QnOPKNrQCd5GdYU-vVF3bUrmitOsUcazZA7BsXeomo,5947
178
+ aiagents4pharma/talk2scholars/tests/test_paper_download_agent.py,sha256=CP4fKFU_JYP_AXvTptnwpjaVar1d5lVKV5vxYgH_1j4,5309
179
+ aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py,sha256=_bGuoo4b6zD_vwLa7jGziWDT5qRtavsf02Jiaa7JIRU,5817
175
180
  aiagents4pharma/talk2scholars/tests/test_pdf_agent.py,sha256=TN4Sq5-SCxv-9VfFyq7sOlBlxbekmnWuB7-qh4MrhkA,4656
176
181
  aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py,sha256=TpCDiGfsC2y6bOkm0ZTXjT1Vp8D-Po25wiEH5aDT_DA,6491
177
182
  aiagents4pharma/talk2scholars/tests/test_routing_logic.py,sha256=AZrvaEBDk51KL6edrZY3GpQ_N6VbrlADqXFeg_jxDoQ,2284
@@ -188,6 +193,10 @@ aiagents4pharma/talk2scholars/tests/test_zotero_path.py,sha256=XeXYqTlSkJgZ02tCz
188
193
  aiagents4pharma/talk2scholars/tests/test_zotero_read.py,sha256=vLAPAFeL8MjDju_HlsLnio-9HxzN1RqOApr9jyemYBk,14951
189
194
  aiagents4pharma/talk2scholars/tests/test_zotero_write.py,sha256=76V7ezb6Xw-BEEwdJQvJs78JPGRYpAsijHIi3bTGsW8,23206
190
195
  aiagents4pharma/talk2scholars/tools/__init__.py,sha256=UtGutYNNaRcr2nOmT_XqbTiaJpgVYKo3KVGVPFVrX2Y,107
196
+ aiagents4pharma/talk2scholars/tools/paper_download/__init__.py,sha256=0XmPLEqCply536Y1uWksmHYjlgNWcmcMpZx63XvGEFI,413
197
+ aiagents4pharma/talk2scholars/tools/paper_download/abstract_downloader.py,sha256=UgJOu9o9RAjlzMahUgPWV6iCGC6n7atDOa0VEp8bGx0,1325
198
+ aiagents4pharma/talk2scholars/tools/paper_download/arxiv_downloader.py,sha256=kP5tyLc92zlkF5EPA7zVYSjpVk724pCsjHFgOntb_Tw,3869
199
+ aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py,sha256=EJBr9RSSog8tFa7BIFIDZ-Qn7qjqJIAuRb_hF4wZ49Q,2181
191
200
  aiagents4pharma/talk2scholars/tools/pdf/__init__.py,sha256=WOm-o-fFzyjFZBaHg658Gjzdiu1Kt-h9xvzvw0hR7aE,103
192
201
  aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py,sha256=22JvT7F0rY11TF40pBfe9Cn2Y-6Tx73NfWDt4NJv700,6639
193
202
  aiagents4pharma/talk2scholars/tools/s2/__init__.py,sha256=wytqCmGm8Fbl8y5qLdIkxhhG8VHLYMifCGjbH_LK2Fc,258
@@ -202,8 +211,8 @@ aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py,sha256=eRqdQCyWws8q6iC
202
211
  aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py,sha256=dqYc5HWMK3vz77psHYUosMLE63NYg9Nk6xbWy8TOrU4,9246
203
212
  aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py,sha256=Ll8YQZj9sYJpXmoGxj_0ZcuEHDj06_CUqdDlTlevGL4,53
204
213
  aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py,sha256=nHmYe3kcrygNOslHki4YeMztfnmRDPul4gZvXl_XsV0,1954
205
- aiagents4pharma-1.29.0.dist-info/LICENSE,sha256=IcIbyB1Hyk5ZDah03VNQvJkbNk2hkBCDqQ8qtnCvB4Q,1077
206
- aiagents4pharma-1.29.0.dist-info/METADATA,sha256=qYpzGvw6Raduy-RwlrnMNTElHqb4HP9n3LslreZaNl0,13245
207
- aiagents4pharma-1.29.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
208
- aiagents4pharma-1.29.0.dist-info/top_level.txt,sha256=-AH8rMmrSnJtq7HaAObS78UU-cTCwvX660dSxeM7a0A,16
209
- aiagents4pharma-1.29.0.dist-info/RECORD,,
214
+ aiagents4pharma-1.30.0.dist-info/LICENSE,sha256=IcIbyB1Hyk5ZDah03VNQvJkbNk2hkBCDqQ8qtnCvB4Q,1077
215
+ aiagents4pharma-1.30.0.dist-info/METADATA,sha256=411N0HHxJVGSKxY07zzYQ4Z60aIJRN7fd4cMaSa7uVc,13245
216
+ aiagents4pharma-1.30.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
217
+ aiagents4pharma-1.30.0.dist-info/top_level.txt,sha256=-AH8rMmrSnJtq7HaAObS78UU-cTCwvX660dSxeM7a0A,16
218
+ aiagents4pharma-1.30.0.dist-info/RECORD,,