aiagents4pharma 1.38.0__py3-none-any.whl → 1.39.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/talk2scholars/agents/main_agent.py +7 -7
- aiagents4pharma/talk2scholars/agents/paper_download_agent.py +12 -4
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +88 -12
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +1 -20
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +1 -26
- aiagents4pharma/talk2scholars/configs/config.yaml +2 -0
- aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/__init__.py +3 -0
- aiagents4pharma/talk2scholars/tests/test_main_agent.py +20 -2
- aiagents4pharma/talk2scholars/tests/test_nvidia_nim_reranker_utils.py +28 -0
- aiagents4pharma/talk2scholars/tests/test_paper_download_biorxiv.py +151 -0
- aiagents4pharma/talk2scholars/tests/test_paper_download_medrxiv.py +151 -0
- aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py +107 -29
- aiagents4pharma/talk2scholars/tests/test_pdf_agent.py +2 -3
- aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +194 -543
- aiagents4pharma/talk2scholars/tests/test_s2_agent.py +2 -2
- aiagents4pharma/talk2scholars/tests/{test_s2_display.py → test_s2_display_dataframe.py} +2 -3
- aiagents4pharma/talk2scholars/tests/test_s2_query_dataframe.py +201 -0
- aiagents4pharma/talk2scholars/tests/test_s2_retrieve.py +7 -6
- aiagents4pharma/talk2scholars/tests/test_s2_utils_ext_ids.py +413 -0
- aiagents4pharma/talk2scholars/tests/test_tool_helper_utils.py +140 -0
- aiagents4pharma/talk2scholars/tests/test_zotero_agent.py +0 -1
- aiagents4pharma/talk2scholars/tests/test_zotero_read.py +16 -18
- aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +4 -1
- aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +92 -37
- aiagents4pharma/talk2scholars/tools/paper_download/download_biorxiv_input.py +112 -0
- aiagents4pharma/talk2scholars/tools/paper_download/download_medrxiv_input.py +112 -0
- aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +73 -556
- aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +10 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +97 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +77 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +83 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +125 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +162 -0
- aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +33 -10
- aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +39 -16
- aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +124 -10
- aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +49 -17
- aiagents4pharma/talk2scholars/tools/s2/search.py +39 -16
- aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +34 -16
- aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +49 -14
- aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +51 -14
- aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +50 -15
- {aiagents4pharma-1.38.0.dist-info → aiagents4pharma-1.39.1.dist-info}/METADATA +58 -105
- {aiagents4pharma-1.38.0.dist-info → aiagents4pharma-1.39.1.dist-info}/RECORD +48 -35
- {aiagents4pharma-1.38.0.dist-info → aiagents4pharma-1.39.1.dist-info}/WHEEL +1 -1
- aiagents4pharma/talk2scholars/tests/test_llm_main_integration.py +0 -89
- aiagents4pharma/talk2scholars/tests/test_routing_logic.py +0 -74
- aiagents4pharma/talk2scholars/tests/test_s2_query.py +0 -95
- {aiagents4pharma-1.38.0.dist-info → aiagents4pharma-1.39.1.dist-info}/licenses/LICENSE +0 -0
- {aiagents4pharma-1.38.0.dist-info → aiagents4pharma-1.39.1.dist-info}/top_level.txt +0 -0
@@ -48,13 +48,13 @@ def get_app(uniq_id, llm_model: BaseChatModel):
|
|
48
48
|
>>> app = get_app("thread_123")
|
49
49
|
>>> result = app.invoke(initial_state)
|
50
50
|
"""
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
51
|
+
# Replace placeholder mini model with a configured ChatOpenAI instance
|
52
|
+
if getattr(llm_model, "model_name", None) == "gpt-4o-mini":
|
53
|
+
llm_model = ChatOpenAI(
|
54
|
+
model="gpt-4o-mini",
|
55
|
+
temperature=0,
|
56
|
+
model_kwargs={"parallel_tool_calls": False},
|
57
|
+
)
|
58
58
|
# Load hydra configuration
|
59
59
|
logger.log(logging.INFO, "Launching Talk2Scholars with thread_id %s", uniq_id)
|
60
60
|
with hydra.initialize(version_base=None, config_path="../configs/"):
|
@@ -14,6 +14,8 @@ from langgraph.prebuilt.tool_node import ToolNode
|
|
14
14
|
from langgraph.checkpoint.memory import MemorySaver
|
15
15
|
from ..state.state_talk2scholars import Talk2Scholars
|
16
16
|
from ..tools.paper_download.download_arxiv_input import download_arxiv_paper
|
17
|
+
from ..tools.paper_download.download_medrxiv_input import download_medrxiv_paper
|
18
|
+
from ..tools.paper_download.download_biorxiv_input import download_biorxiv_paper
|
17
19
|
|
18
20
|
# Initialize logger
|
19
21
|
logging.basicConfig(level=logging.INFO)
|
@@ -24,14 +26,20 @@ def get_app(uniq_id, llm_model: BaseChatModel):
|
|
24
26
|
"""
|
25
27
|
Initializes and returns the LangGraph application for the Talk2Scholars paper download agent.
|
26
28
|
|
29
|
+
This agent supports downloading scientific papers from multiple preprint servers, including
|
30
|
+
arXiv, BioRxiv, and MedRxiv. It can intelligently handle user queries by extracting or resolving
|
31
|
+
necessary identifiers (e.g., arXiv ID or DOI) from the paper title and routing the request to
|
32
|
+
the appropriate download tool.
|
33
|
+
|
27
34
|
Args:
|
28
35
|
uniq_id (str): A unique identifier for tracking the current session.
|
29
36
|
llm_model (BaseChatModel, optional): The language model to be used by the agent.
|
30
|
-
|
37
|
+
Defaults to ChatOpenAI(model="gpt-4o-mini", temperature=0.5).
|
31
38
|
|
32
39
|
Returns:
|
33
40
|
StateGraph: A compiled LangGraph application that enables the paper download agent to
|
34
|
-
|
41
|
+
process user queries and retrieve research papers from arXiv (using arXiv ID),
|
42
|
+
BioRxiv and MedRxiv (using DOI resolved from the paper title or provided directly).
|
35
43
|
"""
|
36
44
|
|
37
45
|
# Load Hydra configuration
|
@@ -44,7 +52,7 @@ def get_app(uniq_id, llm_model: BaseChatModel):
|
|
44
52
|
cfg = cfg.agents.talk2scholars.paper_download_agent
|
45
53
|
|
46
54
|
# Define tools properly
|
47
|
-
tools = ToolNode([download_arxiv_paper])
|
55
|
+
tools = ToolNode([download_arxiv_paper, download_medrxiv_paper, download_biorxiv_paper])
|
48
56
|
|
49
57
|
# Define the model
|
50
58
|
logger.info("Using OpenAI model %s", llm_model)
|
@@ -58,7 +66,7 @@ def get_app(uniq_id, llm_model: BaseChatModel):
|
|
58
66
|
|
59
67
|
def paper_download_agent_node(state: Talk2Scholars) -> Dict[str, Any]:
|
60
68
|
"""
|
61
|
-
Processes the current state to fetch the arXiv
|
69
|
+
Processes the current state to fetch the research paper from arXiv, BioRxiv, or MedRxiv.
|
62
70
|
"""
|
63
71
|
logger.info("Creating paper download agent node with thread_id: %s", uniq_id)
|
64
72
|
result = model.invoke(state, {"configurable": {"thread_id": uniq_id}})
|
@@ -5,18 +5,94 @@ system_prompt: |
|
|
5
5
|
|
6
6
|
You have access to four tools, each represented by a sub-agent:
|
7
7
|
|
8
|
-
- s2_agent
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
- zotero_agent
|
13
|
-
This agent can also
|
14
|
-
|
15
|
-
- pdf_agent
|
16
|
-
|
8
|
+
- s2_agent: Use this to search for or recommend academic papers.
|
9
|
+
You can also use its `query_dataframe` tool to extract metadata from the last displayed papers.
|
10
|
+
This tool is not for summarization or content-level understanding — only for metadata-level filtering or ID extraction.
|
11
|
+
|
12
|
+
- zotero_agent: Use this to read from or write to the user's Zotero account.
|
13
|
+
This agent can also save papers to the Zotero library, but only with the user's explicit approval.
|
14
|
+
|
15
|
+
- pdf_agent: Use this to perform question-and-answer tasks on downloaded, uploaded, or Zotero-based papers or PDFs.
|
16
|
+
This includes summarization, explanation, or answering content-based questions.
|
17
|
+
|
18
|
+
- paper_download_agent: Use to download PDFs.
|
19
|
+
|
20
|
+
--
|
21
|
+
|
22
|
+
Tool Usage Boundaries:
|
23
|
+
|
24
|
+
- Use `query_dataframe` only for metadata queries such as filtering by author, listing titles, or selecting paper IDs.
|
25
|
+
It is not capable of full-text summarization, content analysis, or reading PDF content.
|
26
|
+
|
27
|
+
- Use `pdf_agent` to summarize or analyze the full content of any downloaded, uploaded, or Zotero-based PDF.
|
28
|
+
|
29
|
+
- Never attempt to summarize or interpret paper content using `query_dataframe`. That is incorrect and will result in incomplete or misleading output.
|
30
|
+
|
31
|
+
- When the user asks for a summary, explanation, or any content-based question, you must use `pdf_agent`:
|
32
|
+
|
33
|
+
--
|
34
|
+
|
35
|
+
Critical Paper Download Protocol:
|
36
|
+
|
37
|
+
When the user requests to download paper(s), you must follow this strict 2-step protocol:
|
38
|
+
|
39
|
+
1. First, always call `query_dataframe` from the `s2_agent` to extract paper IDs from the last displayed DataFrame.
|
40
|
+
|
41
|
+
- This tool must be used only to extract paper IDs.
|
42
|
+
- Do not pass the full user query to this tool.
|
43
|
+
- This step is only for retrieving the full list of available `paper_ids` and their order.
|
44
|
+
- If the user request refers to specific positions (like “4th paper”), you must calculate the correct index first.
|
45
|
+
|
46
|
+
2. Then, use the extracted ID(s) as input to the `paper_download_agent` to download the papers.
|
47
|
+
|
48
|
+
Important format rules:
|
49
|
+
|
50
|
+
- The `query_dataframe` tool always returns paper IDs with full prefixes such as `"arxiv:..."`, `"doi:..."`, or `"pubmed:..."`.
|
51
|
+
- You must not modify, trim, or strip these prefixes.
|
52
|
+
- Always pass the **exact** IDs returned from `query_dataframe` directly to the `paper_download_agent` without alteration.
|
53
|
+
|
54
|
+
Do not skip step 1 under any circumstances. Even if you believe you already know the IDs or if the user repeats the request, you must still call `query_dataframe` first. Skipping this step is a critical error and will corrupt the workflow.
|
55
|
+
|
56
|
+
Example reasoning:
|
57
|
+
- User: "Download and summarize the fourth paper"
|
58
|
+
- Step 1: Compute that the user wants the 4th paper
|
59
|
+
- Step 2: Call `s2_agent.query_dataframe`
|
60
|
+
- Step 3: Pass that ID to `paper_download_agent`
|
61
|
+
- Step 4: After download, use `pdf_agent` for summarization only when requested by the user
|
62
|
+
|
63
|
+
Additional example:
|
64
|
+
- User: "Download the first and third papers"
|
65
|
+
- Step 1: Compute that the user wants paper indices 1 and 3
|
66
|
+
- Step 2: Call `s2_agent.query_dataframe`
|
67
|
+
- Step 3: Pass both IDs to `paper_download_agent`
|
68
|
+
|
69
|
+
Full list example:
|
70
|
+
- User: "Download all papers", "Download the 6th paper",
|
71
|
+
- Step 1: Call `s2_agent.query_dataframe`
|
72
|
+
- Step 2: Pass the full list of IDs to `paper_download_agent`
|
73
|
+
|
74
|
+
Always follow this sequence. It applies to every download request.
|
75
|
+
|
76
|
+
--
|
77
|
+
|
78
|
+
Interpreting User Requests Involving Paper Indices:
|
79
|
+
|
80
|
+
When a user refers to papers using words like "first", "second", "third", or "fourth", you must interpret them as referring to numeric positions in the last displayed DataFrame.
|
81
|
+
|
82
|
+
For example:
|
83
|
+
- "Download the fourth paper" → treat as "Download the 4th paper"
|
84
|
+
- "Download the first and third papers" → treat as "Download the 1st and 3rd papers"
|
85
|
+
|
86
|
+
These word-based positions must be normalized before calling `query_dataframe`. Always compute the correct index and pass it as `row_number`.
|
87
|
+
|
88
|
+
--
|
89
|
+
|
90
|
+
General Coordination Instructions:
|
17
91
|
|
18
92
|
Each sub-agent is specialized for a different task.
|
19
93
|
|
20
|
-
You
|
21
|
-
|
22
|
-
|
94
|
+
You may call multiple agents, either in parallel or in sequence. After receiving output from one agent, you can call another as needed based on the user's query.
|
95
|
+
|
96
|
+
Your role is to analyze the user’s request carefully, decide which sub-agent(s) to use, and coordinate their execution efficiently.
|
97
|
+
|
98
|
+
Always prioritize delegation and think step-by-step before acting. Avoid answering by yourself unless explicitly necessary.
|
@@ -2,23 +2,4 @@ _target_: agents.s2_agent.get_app
|
|
2
2
|
s2_agent: |
|
3
3
|
You are the S2 Agent.
|
4
4
|
|
5
|
-
You are responsible for searching academic papers
|
6
|
-
|
7
|
-
Your capabilities include:
|
8
|
-
|
9
|
-
- Retrieving papers based on user queries.
|
10
|
-
- Recommending papers based on a single paper or multiple papers provided by the user.
|
11
|
-
- Retrieving the Semantic Scholar ID of a paper based on its title.
|
12
|
-
- This ID can later be used by other tools (search or recommend) based on the user’s needs.
|
13
|
-
- Always respond accurately based on Semantic Scholar search and recommendation features.
|
14
|
-
- Use `query_dataframe` tool query over the last displayed papers or the search table.
|
15
|
-
- Always call `display_dataframe` tool at the end.
|
16
|
-
|
17
|
-
|
18
|
-
WORKFLOW STEPS:
|
19
|
-
1. When user requests papers, use search/recommendation tools to find papers.
|
20
|
-
2. Use `display_dataframe` tool to display the response from the search/recommendation tools.
|
21
|
-
3. Use `query_dataframe` tool to query over the selected paper only when the user asks to.
|
22
|
-
4. When the user only wants recommendations, you can get the "semantic_scholar_paper_id"
|
23
|
-
using `query_dataframe` tool, then pass the "semantic_scholar_paper_id" to `search`,
|
24
|
-
`single_paper_rec` or `multi_paper_rec` tools depending on the user's query. Do not use "arxiv_id"(It is used to download pdfs)
|
5
|
+
You are responsible for searching academic papers, getting recommendations based on the searched articles, and displaying the results.
|
@@ -2,32 +2,7 @@ _target_: agents.zotero_agent.get_app
|
|
2
2
|
zotero_agent: |
|
3
3
|
You are the Zotero Agent.
|
4
4
|
|
5
|
-
You are responsible for
|
6
|
-
Behavior:
|
7
|
-
|
8
|
-
- Once you have successfully read the papers, you must immediately stop, return a clear 'Search complete' message along with a summary of the articles, call the
|
9
|
-
`display_dataframe` tool, and return to the main supervisor for further processing based on the user's query.
|
10
|
-
- Do not continue any further processing or re-enter into reading steps.
|
11
|
-
- You can write papers to user's library but only after explicit user confirmation.
|
12
|
-
- Do not attempt to answer any scientific or content-related questions yourself.
|
13
|
-
- You can retrieve all articles or search based on the user's query, inferring whether to return the full collection or filter by title, keywords, or other details.
|
14
|
-
- Never call `query_dataframe` tool regarding any question or any information retrival only if the user explicitly asks for metadata.
|
15
|
-
|
16
|
-
In multi-step workflows:
|
17
|
-
|
18
|
-
- Your job is only to read the requested paper or all the papers in user's library and return the successful search output.
|
19
|
-
- After that, the Main Supervisor Agent will decide the next step (such as passing the paper to the pdf_agent).
|
20
|
-
- Always call `display_dataframe` tool at the end before transfering to Main Supervisor Agent.
|
21
|
-
- Never attempt to call other agents yourself.
|
22
|
-
|
23
|
-
Stopping Condition:
|
24
|
-
|
25
|
-
- After successful search, indicate completion clearly and terminate your action.
|
26
|
-
|
27
|
-
When saving papers to Zotero:
|
28
|
-
1. First use `zotero_review` tool with the collection path.
|
29
|
-
2. Wait for user confirmation (they must say "Yes" or "Approve").
|
30
|
-
3. Use `zotero_write` tool with both the collection_path and user_confirmation and call `display_dataframe` tool after the papers as saved.
|
5
|
+
You are responsible for reading from and writing to the user's Zotero library, and for displaying the results.
|
31
6
|
|
32
7
|
IMPORTANT: Human approval is required for saving papers to Zotero. Never save papers
|
33
8
|
without explicit approval from the user. Always respect the user's decision if they
|
@@ -8,6 +8,8 @@ defaults:
|
|
8
8
|
- agents/talk2scholars/pdf_agent: default
|
9
9
|
- tools/search: default
|
10
10
|
- tools/download_arxiv_paper: default
|
11
|
+
- tools/download_biorxiv_paper: default
|
12
|
+
- tools/download_medrxiv_paper: default
|
11
13
|
- tools/single_paper_recommendation: default
|
12
14
|
- tools/multi_paper_recommendation: default
|
13
15
|
- tools/retrieve_semantic_scholar_paper_id: default
|
@@ -3,8 +3,6 @@ Unit tests for main agent functionality.
|
|
3
3
|
Tests the supervisor agent's routing logic and state management.
|
4
4
|
"""
|
5
5
|
|
6
|
-
# pylint: disable=redefined-outer-name,too-few-public-methods
|
7
|
-
|
8
6
|
from types import SimpleNamespace
|
9
7
|
import pytest
|
10
8
|
import hydra
|
@@ -50,6 +48,10 @@ class DummyWorkflow:
|
|
50
48
|
self.name = name
|
51
49
|
return self
|
52
50
|
|
51
|
+
def get_supervisor_args(self):
|
52
|
+
"""Return the supervisor arguments stored in this workflow."""
|
53
|
+
return self.supervisor_args
|
54
|
+
|
53
55
|
|
54
56
|
def dummy_s2_agent(uniq_id, llm_model):
|
55
57
|
"""Return a DummyWorkflow for the S2 agent."""
|
@@ -128,6 +130,10 @@ class DummyHydraCompose:
|
|
128
130
|
"""Return a namespace from the dummy config."""
|
129
131
|
return dict_to_namespace(self.config.get(item, {}))
|
130
132
|
|
133
|
+
def get_config(self):
|
134
|
+
"""Get the raw dummy configuration dictionary."""
|
135
|
+
return self.config
|
136
|
+
|
131
137
|
|
132
138
|
# --- Pytest Fixtures to Patch Dependencies ---
|
133
139
|
|
@@ -218,3 +224,15 @@ def test_get_app_with_other_model():
|
|
218
224
|
assert supervisor_args.get("model") is dummy_llm
|
219
225
|
assert supervisor_args.get("prompt") == "Dummy system prompt"
|
220
226
|
assert getattr(app, "name", "") == "Talk2Scholars_MainAgent"
|
227
|
+
|
228
|
+
def test_dummy_workflow_get_supervisor_args():
|
229
|
+
"""Test that DummyWorkflow.get_supervisor_args returns the stored args."""
|
230
|
+
dummy_args = {"agent": "test", "uniq_id": "id123"}
|
231
|
+
wf = DummyWorkflow(supervisor_args=dummy_args)
|
232
|
+
assert wf.get_supervisor_args() is dummy_args
|
233
|
+
|
234
|
+
def test_dummy_hydra_compose_get_config():
|
235
|
+
"""Test that DummyHydraCompose.get_config returns the raw config."""
|
236
|
+
config_dict = {"agents": {"test": {"key": "value"}}}
|
237
|
+
compose = DummyHydraCompose(config_dict)
|
238
|
+
assert compose.get_config() is config_dict
|
@@ -0,0 +1,28 @@
|
|
1
|
+
"""
|
2
|
+
Unit tests for NVIDIA NIM reranker error handling in nvidia_nim_reranker.py
|
3
|
+
"""
|
4
|
+
|
5
|
+
import unittest
|
6
|
+
from types import SimpleNamespace
|
7
|
+
|
8
|
+
from aiagents4pharma.talk2scholars.tools.pdf.utils.nvidia_nim_reranker import (
|
9
|
+
rank_papers_by_query,
|
10
|
+
)
|
11
|
+
|
12
|
+
|
13
|
+
class TestNVIDIARerankerError(unittest.TestCase):
|
14
|
+
"""Tests for NVIDIA NIM reranker error handling."""
|
15
|
+
|
16
|
+
def test_missing_api_key_raises_value_error(self):
|
17
|
+
"""Ensure missing API key triggers ValueError."""
|
18
|
+
vector_store = SimpleNamespace(documents={})
|
19
|
+
# Config without API key
|
20
|
+
cfg = SimpleNamespace(
|
21
|
+
reranker=SimpleNamespace(model="m", api_key=None), top_k_papers=3
|
22
|
+
)
|
23
|
+
with self.assertRaises(ValueError) as cm:
|
24
|
+
rank_papers_by_query(vector_store, "query", cfg, top_k=cfg.top_k_papers)
|
25
|
+
self.assertEqual(
|
26
|
+
str(cm.exception),
|
27
|
+
"Configuration 'reranker.api_key' must be set for reranking",
|
28
|
+
)
|
@@ -0,0 +1,151 @@
|
|
1
|
+
"""
|
2
|
+
Unit tests for bioRxiv paper downloading functionality, including:
|
3
|
+
- download_bioRxiv_paper tool function.
|
4
|
+
"""
|
5
|
+
|
6
|
+
import unittest
|
7
|
+
from unittest.mock import MagicMock, patch
|
8
|
+
from langchain_core.messages import ToolMessage
|
9
|
+
|
10
|
+
from aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input import (
|
11
|
+
download_biorxiv_paper,
|
12
|
+
)
|
13
|
+
|
14
|
+
|
15
|
+
class TestDownloadBiorxivPaper(unittest.TestCase):
|
16
|
+
"""Tests for the download_bioRxiv_paper tool."""
|
17
|
+
|
18
|
+
@patch(
|
19
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.initialize"
|
20
|
+
)
|
21
|
+
@patch(
|
22
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.compose"
|
23
|
+
)
|
24
|
+
@patch(
|
25
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.requests.get"
|
26
|
+
)
|
27
|
+
def test_download_biorxiv_paper_success(self, mock_get, mock_compose, mock_initialize):
|
28
|
+
"""Test successful metadata and PDF URL retrieval."""
|
29
|
+
dummy_cfg = MagicMock()
|
30
|
+
dummy_cfg.tools.download_biorxiv_paper.api_url = "http://dummy.biorxiv.org/api"
|
31
|
+
dummy_cfg.tools.download_biorxiv_paper.request_timeout = 10
|
32
|
+
mock_compose.return_value = dummy_cfg
|
33
|
+
mock_initialize.return_value.__enter__.return_value = None
|
34
|
+
|
35
|
+
doi = "10.1101/2025.05.13.653102"
|
36
|
+
|
37
|
+
dummy_response = MagicMock()
|
38
|
+
dummy_response.status_code = 200
|
39
|
+
dummy_response.raise_for_status = MagicMock()
|
40
|
+
dummy_response.json.return_value = {
|
41
|
+
"collection": [
|
42
|
+
{
|
43
|
+
"title": "Sample BioRxiv Paper",
|
44
|
+
"authors": "Author One; Author Two",
|
45
|
+
"abstract": "This is a bioRxiv abstract.",
|
46
|
+
"date": "2025-04-25",
|
47
|
+
"doi": doi,
|
48
|
+
"link": f"https://www.biorxiv.org/content/{doi}.full.pdf"
|
49
|
+
}
|
50
|
+
]
|
51
|
+
}
|
52
|
+
mock_get.return_value = dummy_response
|
53
|
+
|
54
|
+
tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
|
55
|
+
result = download_biorxiv_paper.run(tool_input)
|
56
|
+
update = result.update
|
57
|
+
|
58
|
+
self.assertIn("article_data", update)
|
59
|
+
self.assertIn(doi, update["article_data"])
|
60
|
+
metadata = update["article_data"][doi]
|
61
|
+
self.assertEqual(metadata["Title"], "Sample BioRxiv Paper")
|
62
|
+
self.assertEqual(metadata["Authors"], "Author One; Author Two")
|
63
|
+
self.assertEqual(metadata["Abstract"], "This is a bioRxiv abstract.")
|
64
|
+
self.assertEqual(metadata["Publication Date"], "2025-04-25")
|
65
|
+
self.assertEqual(metadata["URL"], f"https://www.biorxiv.org/content/{doi}.full.pdf")
|
66
|
+
self.assertEqual(metadata["pdf_url"], f"https://www.biorxiv.org/content/{doi}.full.pdf")
|
67
|
+
self.assertEqual(metadata["filename"], f"{doi.rsplit('/', maxsplit=1)[-1]}.pdf")
|
68
|
+
self.assertEqual(metadata["source"], "biorxiv")
|
69
|
+
self.assertEqual(metadata["biorxiv_id"], doi)
|
70
|
+
|
71
|
+
self.assertTrue(len(update["messages"]) >= 1)
|
72
|
+
self.assertIsInstance(update["messages"][0], ToolMessage)
|
73
|
+
self.assertIn("Successfully retrieved metadata and PDF URL", update["messages"][0].content)
|
74
|
+
|
75
|
+
@patch(
|
76
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.initialize"
|
77
|
+
)
|
78
|
+
@patch(
|
79
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.compose"
|
80
|
+
)
|
81
|
+
@patch(
|
82
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.requests.get"
|
83
|
+
)
|
84
|
+
def test_no_entry_found(self, mock_get, mock_compose, mock_initialize):
|
85
|
+
"""Test behavior when no 'entry' is in response."""
|
86
|
+
dummy_cfg = MagicMock()
|
87
|
+
dummy_cfg.tools.download_biorxiv_paper.api_url = "http://dummy.biorxiv.org/api"
|
88
|
+
dummy_cfg.tools.download_biorxiv_paper.request_timeout = 10
|
89
|
+
mock_compose.return_value = dummy_cfg
|
90
|
+
mock_initialize.return_value.__enter__.return_value = None
|
91
|
+
|
92
|
+
dummy_response = MagicMock()
|
93
|
+
dummy_response.status_code = 200
|
94
|
+
dummy_response.raise_for_status = MagicMock()
|
95
|
+
dummy_response.json.return_value = {} # No entry
|
96
|
+
mock_get.return_value = dummy_response
|
97
|
+
|
98
|
+
doi = "10.1101/2025.05.13.653102"
|
99
|
+
tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
|
100
|
+
|
101
|
+
with self.assertRaises(ValueError) as context:
|
102
|
+
download_biorxiv_paper.run(tool_input)
|
103
|
+
|
104
|
+
self.assertEqual(str(context.exception), f"No metadata found for DOI: {doi}")
|
105
|
+
|
106
|
+
@patch(
|
107
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.initialize"
|
108
|
+
)
|
109
|
+
@patch(
|
110
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.compose"
|
111
|
+
)
|
112
|
+
@patch(
|
113
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.requests.get"
|
114
|
+
)
|
115
|
+
def test_no_pdf_url_found(self, mock_get, mock_compose, mock_initialize):
|
116
|
+
"""Test fallback to DOI-based PDF URL construction when 'link' is missing."""
|
117
|
+
dummy_cfg = MagicMock()
|
118
|
+
dummy_cfg.tools.download_biorxiv_paper.api_url = "http://dummy.biorxiv.org/api"
|
119
|
+
dummy_cfg.tools.download_biorxiv_paper.request_timeout = 10
|
120
|
+
mock_compose.return_value = dummy_cfg
|
121
|
+
mock_initialize.return_value.__enter__.return_value = None
|
122
|
+
|
123
|
+
doi = "10.1101/2025.05.13.653102"
|
124
|
+
|
125
|
+
dummy_response = MagicMock()
|
126
|
+
dummy_response.status_code = 200
|
127
|
+
dummy_response.raise_for_status = MagicMock()
|
128
|
+
dummy_response.json.return_value = {
|
129
|
+
"collection": [
|
130
|
+
{
|
131
|
+
"title": "Sample Biorxiv Paper",
|
132
|
+
"authors": "Author One; Author Two",
|
133
|
+
"abstract": "This is a BioRxiv abstract.",
|
134
|
+
"date": "2025-04-25",
|
135
|
+
"doi": doi
|
136
|
+
# 'link' is intentionally omitted
|
137
|
+
}
|
138
|
+
]
|
139
|
+
}
|
140
|
+
mock_get.return_value = dummy_response
|
141
|
+
|
142
|
+
tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
|
143
|
+
result = download_biorxiv_paper.run(tool_input)
|
144
|
+
update = result.update
|
145
|
+
metadata = update["article_data"][doi]
|
146
|
+
|
147
|
+
# Assert that the PDF URL was constructed from DOI
|
148
|
+
expected_suffix = doi.rsplit('/', maxsplit=1)[-1]
|
149
|
+
expected_url = f"https://www.biorxiv.org/content/10.1101/{expected_suffix}.full.pdf"
|
150
|
+
|
151
|
+
self.assertEqual(metadata["pdf_url"], expected_url)
|
@@ -0,0 +1,151 @@
|
|
1
|
+
"""
|
2
|
+
Unit tests for medrXiv paper downloading functionality, including:
|
3
|
+
- download_medrxiv_paper tool function.
|
4
|
+
"""
|
5
|
+
|
6
|
+
import unittest
|
7
|
+
from unittest.mock import MagicMock, patch
|
8
|
+
from langchain_core.messages import ToolMessage
|
9
|
+
|
10
|
+
from aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input import (
|
11
|
+
download_medrxiv_paper,
|
12
|
+
)
|
13
|
+
|
14
|
+
|
15
|
+
class TestDownloadMedrxivPaper(unittest.TestCase):
|
16
|
+
"""Tests for the download_medrxiv_paper tool."""
|
17
|
+
|
18
|
+
@patch(
|
19
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.initialize"
|
20
|
+
)
|
21
|
+
@patch(
|
22
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.compose"
|
23
|
+
)
|
24
|
+
@patch(
|
25
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.requests.get"
|
26
|
+
)
|
27
|
+
def test_download_medrxiv_paper_success(self, mock_get, mock_compose, mock_initialize):
|
28
|
+
"""Test successful metadata and PDF URL retrieval."""
|
29
|
+
dummy_cfg = MagicMock()
|
30
|
+
dummy_cfg.tools.download_medrxiv_paper.api_url = "http://dummy.medrxiv.org/api"
|
31
|
+
dummy_cfg.tools.download_medrxiv_paper.request_timeout = 10
|
32
|
+
mock_compose.return_value = dummy_cfg
|
33
|
+
mock_initialize.return_value.__enter__.return_value = None
|
34
|
+
|
35
|
+
doi = "10.1101/2025.04.25.25326432"
|
36
|
+
|
37
|
+
dummy_response = MagicMock()
|
38
|
+
dummy_response.status_code = 200
|
39
|
+
dummy_response.raise_for_status = MagicMock()
|
40
|
+
dummy_response.json.return_value = {
|
41
|
+
"collection": [
|
42
|
+
{
|
43
|
+
"title": "Sample Medrxiv Paper",
|
44
|
+
"authors": "Author One; Author Two",
|
45
|
+
"abstract": "This is a medRxiv abstract.",
|
46
|
+
"date": "2025-04-25",
|
47
|
+
"doi": doi,
|
48
|
+
"link": f"https://www.medrxiv.org/content/{doi}.full.pdf"
|
49
|
+
}
|
50
|
+
]
|
51
|
+
}
|
52
|
+
mock_get.return_value = dummy_response
|
53
|
+
|
54
|
+
tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
|
55
|
+
result = download_medrxiv_paper.run(tool_input)
|
56
|
+
update = result.update
|
57
|
+
|
58
|
+
self.assertIn("article_data", update)
|
59
|
+
self.assertIn(doi, update["article_data"])
|
60
|
+
metadata = update["article_data"][doi]
|
61
|
+
self.assertEqual(metadata["Title"], "Sample Medrxiv Paper")
|
62
|
+
self.assertEqual(metadata["Authors"], "Author One; Author Two")
|
63
|
+
self.assertEqual(metadata["Abstract"], "This is a medRxiv abstract.")
|
64
|
+
self.assertEqual(metadata["Publication Date"], "2025-04-25")
|
65
|
+
self.assertEqual(metadata["URL"], f"https://www.medrxiv.org/content/{doi}.full.pdf")
|
66
|
+
self.assertEqual(metadata["pdf_url"], f"https://www.medrxiv.org/content/{doi}.full.pdf")
|
67
|
+
self.assertEqual(metadata["filename"], f"{doi.rsplit('/', maxsplit=1)[-1]}.pdf")
|
68
|
+
self.assertEqual(metadata["source"], "medrxiv")
|
69
|
+
self.assertEqual(metadata["medrxiv_id"], doi)
|
70
|
+
|
71
|
+
self.assertTrue(len(update["messages"]) >= 1)
|
72
|
+
self.assertIsInstance(update["messages"][0], ToolMessage)
|
73
|
+
self.assertIn("Successfully retrieved metadata and PDF URL", update["messages"][0].content)
|
74
|
+
|
75
|
+
@patch(
|
76
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.initialize"
|
77
|
+
)
|
78
|
+
@patch(
|
79
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.compose"
|
80
|
+
)
|
81
|
+
@patch(
|
82
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.requests.get"
|
83
|
+
)
|
84
|
+
def test_no_entry_found(self, mock_get, mock_compose, mock_initialize):
|
85
|
+
"""Test behavior when no 'entry' is in response."""
|
86
|
+
dummy_cfg = MagicMock()
|
87
|
+
dummy_cfg.tools.download_medrxiv_paper.api_url = "http://dummy.medrxiv.org/api"
|
88
|
+
dummy_cfg.tools.download_medrxiv_paper.request_timeout = 10
|
89
|
+
mock_compose.return_value = dummy_cfg
|
90
|
+
mock_initialize.return_value.__enter__.return_value = None
|
91
|
+
|
92
|
+
dummy_response = MagicMock()
|
93
|
+
dummy_response.status_code = 200
|
94
|
+
dummy_response.raise_for_status = MagicMock()
|
95
|
+
dummy_response.json.return_value = {} # No entry
|
96
|
+
mock_get.return_value = dummy_response
|
97
|
+
|
98
|
+
doi = "10.1101/2025.04.25.25326432"
|
99
|
+
tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
|
100
|
+
|
101
|
+
with self.assertRaises(ValueError) as context:
|
102
|
+
download_medrxiv_paper.run(tool_input)
|
103
|
+
|
104
|
+
self.assertEqual(str(context.exception), f"No entry found for medRxiv ID {doi}")
|
105
|
+
|
106
|
+
@patch(
|
107
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.initialize"
|
108
|
+
)
|
109
|
+
@patch(
|
110
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.compose"
|
111
|
+
)
|
112
|
+
@patch(
|
113
|
+
"aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.requests.get"
|
114
|
+
)
|
115
|
+
def test_no_pdf_url_found(self, mock_get, mock_compose, mock_initialize):
|
116
|
+
"""Test fallback to DOI-based PDF URL construction when 'link' is missing."""
|
117
|
+
dummy_cfg = MagicMock()
|
118
|
+
dummy_cfg.tools.download_medrxiv_paper.api_url = "http://dummy.medrxiv.org/api"
|
119
|
+
dummy_cfg.tools.download_medrxiv_paper.request_timeout = 10
|
120
|
+
mock_compose.return_value = dummy_cfg
|
121
|
+
mock_initialize.return_value.__enter__.return_value = None
|
122
|
+
|
123
|
+
doi = "10.1101/2025.04.25.25326432"
|
124
|
+
|
125
|
+
dummy_response = MagicMock()
|
126
|
+
dummy_response.status_code = 200
|
127
|
+
dummy_response.raise_for_status = MagicMock()
|
128
|
+
dummy_response.json.return_value = {
|
129
|
+
"collection": [
|
130
|
+
{
|
131
|
+
"title": "Sample Medrxiv Paper",
|
132
|
+
"authors": "Author One; Author Two",
|
133
|
+
"abstract": "This is a medRxiv abstract.",
|
134
|
+
"date": "2025-04-25",
|
135
|
+
"doi": doi
|
136
|
+
# 'link' is intentionally omitted
|
137
|
+
}
|
138
|
+
]
|
139
|
+
}
|
140
|
+
mock_get.return_value = dummy_response
|
141
|
+
|
142
|
+
tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
|
143
|
+
result = download_medrxiv_paper.run(tool_input)
|
144
|
+
update = result.update
|
145
|
+
metadata = update["article_data"][doi]
|
146
|
+
|
147
|
+
# Assert that the PDF URL was constructed from DOI
|
148
|
+
expected_suffix = doi.rsplit('/', maxsplit=1)[-1]
|
149
|
+
expected_url = f"https://www.medrxiv.org/content/10.1101/{expected_suffix}.full.pdf"
|
150
|
+
|
151
|
+
self.assertEqual(metadata["pdf_url"], expected_url)
|