aiagents4pharma 1.42.0__py3-none-any.whl → 1.43.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/talk2scholars/agents/paper_download_agent.py +7 -4
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +49 -95
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml +15 -1
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +16 -2
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +40 -5
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +15 -5
- aiagents4pharma/talk2scholars/configs/config.yaml +1 -3
- aiagents4pharma/talk2scholars/configs/tools/paper_download/default.yaml +124 -0
- aiagents4pharma/talk2scholars/tests/test_arxiv_downloader.py +478 -0
- aiagents4pharma/talk2scholars/tests/test_base_paper_downloader.py +620 -0
- aiagents4pharma/talk2scholars/tests/test_biorxiv_downloader.py +697 -0
- aiagents4pharma/talk2scholars/tests/test_medrxiv_downloader.py +534 -0
- aiagents4pharma/talk2scholars/tests/test_paper_download_agent.py +22 -12
- aiagents4pharma/talk2scholars/tests/test_paper_downloader.py +545 -0
- aiagents4pharma/talk2scholars/tests/test_pubmed_downloader.py +1067 -0
- aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +2 -4
- aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +457 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +20 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +209 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +343 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +321 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +198 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +337 -0
- aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +97 -45
- aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +47 -29
- {aiagents4pharma-1.42.0.dist-info → aiagents4pharma-1.43.0.dist-info}/METADATA +3 -1
- {aiagents4pharma-1.42.0.dist-info → aiagents4pharma-1.43.0.dist-info}/RECORD +31 -28
- aiagents4pharma/talk2scholars/configs/tools/download_arxiv_paper/default.yaml +0 -4
- aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/__init__.py +0 -3
- aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/default.yaml +0 -2
- aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/__init__.py +0 -3
- aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/default.yaml +0 -2
- aiagents4pharma/talk2scholars/tests/test_paper_download_biorxiv.py +0 -151
- aiagents4pharma/talk2scholars/tests/test_paper_download_medrxiv.py +0 -151
- aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py +0 -249
- aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +0 -177
- aiagents4pharma/talk2scholars/tools/paper_download/download_biorxiv_input.py +0 -114
- aiagents4pharma/talk2scholars/tools/paper_download/download_medrxiv_input.py +0 -114
- /aiagents4pharma/talk2scholars/configs/tools/{download_arxiv_paper → paper_download}/__init__.py +0 -0
- {aiagents4pharma-1.42.0.dist-info → aiagents4pharma-1.43.0.dist-info}/WHEEL +0 -0
- {aiagents4pharma-1.42.0.dist-info → aiagents4pharma-1.43.0.dist-info}/licenses/LICENSE +0 -0
- {aiagents4pharma-1.42.0.dist-info → aiagents4pharma-1.43.0.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,31 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
|
3
|
-
"""
|
4
|
-
Tool for querying the metadata table of the last displayed papers.
|
5
3
|
|
6
|
-
This tool loads the most recently displayed papers into a pandas DataFrame and uses an
|
7
|
-
LLM-driven pandas agent to answer metadata-level questions (e.g., filter by author, list titles).
|
8
|
-
It is intended for metadata exploration only, and does not perform content-based retrieval
|
9
|
-
or summarization. For PDF-level question answering, use the 'question_and_answer_agent'.
|
10
4
|
"""
|
5
|
+
Query the metadata table of the most recently displayed papers.
|
6
|
+
|
7
|
+
This tool loads `state['last_displayed_papers']` into a pandas DataFrame and uses an
|
8
|
+
LLM-driven DataFrame agent to execute metadata-level queries. It supports both
|
9
|
+
natural-language prompts (e.g., “list titles by author X”) and direct Python expressions
|
10
|
+
over the DataFrame.
|
11
|
+
|
12
|
+
Capabilities
|
13
|
+
- Filter, sort, and aggregate rows using metadata columns (e.g., Title, Authors, Venue, Year).
|
14
|
+
- Extract paper identifiers from a designated column (default: 'paper_ids'),
|
15
|
+
optionally for a single row.
|
16
|
+
- Return the DataFrame agent’s textual result as a ToolMessage.
|
17
|
+
|
18
|
+
Requirements
|
19
|
+
- `state['llm_model']`: model used to instantiate the DataFrame agent.
|
20
|
+
- `state['last_displayed_papers']`: dictionary mapping row keys → metadata records.
|
21
|
+
|
22
|
+
Notes
|
23
|
+
- Operates strictly on the metadata table; it does not parse or read PDF content.
|
24
|
+
- When `extract_ids=True`, the tool constructs a Python expression for the agent to evaluate
|
25
|
+
and return identifiers from `id_column`. If `row_number` is provided (1-based), only that row’s
|
26
|
+
first identifier is returned; otherwise a list is returned from all rows that have values.
|
27
|
+
"""
|
28
|
+
|
11
29
|
|
12
30
|
import logging
|
13
31
|
from typing import Annotated, Optional, Any
|
@@ -32,23 +50,32 @@ class NoPapersFoundError(Exception):
|
|
32
50
|
|
33
51
|
class QueryDataFrameInput(BaseModel):
|
34
52
|
"""
|
35
|
-
|
53
|
+
Input schema for querying the last displayed papers metadata DataFrame.
|
36
54
|
|
37
55
|
Fields:
|
38
|
-
question:
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
56
|
+
question (str):
|
57
|
+
The query to execute. Accepts natural language (e.g., "List titles from 2024")
|
58
|
+
or a Python expression over the DataFrame (e.g., "df['Title'].tolist()").
|
59
|
+
|
60
|
+
extract_ids (bool, default=False):
|
61
|
+
When True, the tool prepares a Python expression for the DataFrame agent to extract
|
62
|
+
identifiers from `id_column`. Use to obtain IDs from the metadata table.
|
63
|
+
|
64
|
+
id_column (str, default="paper_ids"):
|
65
|
+
Name of the column that contains per-row lists of identifiers (e.g., ["arxiv:2301.12345"]).
|
66
|
+
Used only when `extract_ids=True`.
|
67
|
+
|
68
|
+
row_number (int | None, default=None):
|
69
|
+
1-based row index. When provided with `extract_ids=True`, returns only that row’s first
|
70
|
+
identifier. When omitted, returns a list of first identifiers from each applicable row.
|
71
|
+
|
72
|
+
tool_call_id (InjectedToolCallId):
|
73
|
+
Internal identifier for tracing the tool invocation.
|
74
|
+
|
75
|
+
state (dict):
|
76
|
+
Agent state containing:
|
77
|
+
- 'last_displayed_papers': dict with the current results table (rows → metadata)
|
78
|
+
- 'llm_model': model object or reference for the DataFrame agent
|
52
79
|
"""
|
53
80
|
|
54
81
|
question: str = Field(
|
@@ -95,30 +122,55 @@ def query_dataframe(
|
|
95
122
|
**kwargs: Any,
|
96
123
|
) -> Command:
|
97
124
|
"""
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
125
|
+
Execute a metadata query against the DataFrame built from `last_displayed_papers`.
|
126
|
+
|
127
|
+
Behavior
|
128
|
+
- Builds a pandas DataFrame from `state['last_displayed_papers']`.
|
129
|
+
- Instantiates a pandas DataFrame agent with `state['llm_model']`.
|
130
|
+
- Runs either:
|
131
|
+
• the provided natural-language prompt, or
|
132
|
+
• a constructed Python expression when `extract_ids=True`
|
133
|
+
(optionally scoped to `row_number`, 1-based).
|
134
|
+
- Returns the DataFrame agent’s output text in a ToolMessage.
|
135
|
+
|
136
|
+
Parameters
|
137
|
+
question (str):
|
138
|
+
Natural-language query or Python expression to run on the DataFrame.
|
139
|
+
state (dict):
|
140
|
+
Must provide 'llm_model' and 'last_displayed_papers'.
|
141
|
+
tool_call_id (str):
|
142
|
+
Internal identifier for the tool call.
|
143
|
+
**kwargs:
|
144
|
+
extract_ids (bool): Enable ID extraction from `id_column`.
|
145
|
+
id_column (str): Column containing lists of identifiers (default: "paper_ids").
|
146
|
+
row_number (int | None): 1-based index for a single-row extraction.
|
147
|
+
|
148
|
+
Returns
|
149
|
+
Command:
|
150
|
+
update = {
|
151
|
+
"messages": [
|
152
|
+
ToolMessage(
|
153
|
+
content=<text result from the DataFrame agent>,
|
154
|
+
tool_call_id=<tool_call_id>
|
155
|
+
)
|
156
|
+
]
|
157
|
+
}
|
158
|
+
|
159
|
+
Errors
|
160
|
+
- Raises `ValueError` if 'llm_model' is missing in `state`.
|
161
|
+
- Raises `NoPapersFoundError` if `state['last_displayed_papers']` is missing or empty.
|
162
|
+
- Raises `ValueError` if a required argument for the chosen mode is invalid
|
163
|
+
(e.g., no `id_column` when `extract_ids=True`).
|
164
|
+
|
165
|
+
Examples
|
166
|
+
- Natural language:
|
167
|
+
question="List titles where Year >= 2023"
|
168
|
+
- Python list of titles:
|
169
|
+
question="df.query('Year >= 2023')['Title'].tolist()"
|
170
|
+
- Extract first ID from row 1:
|
171
|
+
extract_ids=True, row_number=1
|
172
|
+
- Extract first IDs from all rows:
|
173
|
+
extract_ids=True
|
122
174
|
"""
|
123
175
|
logger.info("Querying last displayed papers with question: %s", question)
|
124
176
|
llm_model = state.get("llm_model")
|
@@ -1,12 +1,11 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
|
3
3
|
"""
|
4
|
-
|
4
|
+
Resolve a paper title to a Semantic Scholar paperId.
|
5
5
|
|
6
|
-
This tool queries the Semantic Scholar API for the best match
|
7
|
-
|
8
|
-
|
9
|
-
use this tool for broad literature search; use the `search` tool instead.
|
6
|
+
This module provides a tool that queries the Semantic Scholar API for the best match to a
|
7
|
+
given paper title (full or partial) and returns the corresponding `paperId` string.
|
8
|
+
Configuration is loaded via Hydra and the top ranked result is returned.
|
10
9
|
"""
|
11
10
|
|
12
11
|
import logging
|
@@ -27,11 +26,14 @@ logger = logging.getLogger(__name__)
|
|
27
26
|
|
28
27
|
class RetrieveSemanticScholarPaperIdInput(BaseModel):
|
29
28
|
"""
|
30
|
-
|
31
|
-
|
32
|
-
Fields
|
33
|
-
|
34
|
-
|
29
|
+
Input schema for title→paperId resolution.
|
30
|
+
|
31
|
+
Fields
|
32
|
+
-------
|
33
|
+
paper_title : str
|
34
|
+
Paper title to search. Accepts full titles or informative partial titles.
|
35
|
+
tool_call_id : InjectedToolCallId
|
36
|
+
Runtime-injected identifier for tracing the tool invocation.
|
35
37
|
"""
|
36
38
|
|
37
39
|
paper_title: str = Field(
|
@@ -50,27 +52,43 @@ def retrieve_semantic_scholar_paper_id(
|
|
50
52
|
tool_call_id: str,
|
51
53
|
) -> Command[Any]:
|
52
54
|
"""
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
55
|
+
Look up a Semantic Scholar paperId from a paper title.
|
56
|
+
|
57
|
+
Behavior
|
58
|
+
--------
|
59
|
+
- Loads Hydra config from `tools.retrieve_semantic_scholar_paper_id`.
|
60
|
+
- Sends a search request with `query=<paper_title>`, `limit=1`, and requested fields.
|
61
|
+
- Parses the top hit and returns its `paperId` as the ToolMessage content (plain string).
|
62
|
+
|
63
|
+
Parameters
|
64
|
+
----------
|
65
|
+
paper_title : str
|
66
|
+
Title or informative partial title to resolve.
|
67
|
+
tool_call_id : str
|
68
|
+
Runtime-injected identifier for the tool call.
|
69
|
+
|
70
|
+
Returns
|
71
|
+
-------
|
72
|
+
Command
|
73
|
+
update = {
|
74
|
+
"messages": [
|
75
|
+
ToolMessage(
|
76
|
+
content="<paperId>", # Semantic Scholar paperId string
|
77
|
+
tool_call_id=<tool_call_id>
|
78
|
+
)
|
79
|
+
]
|
80
|
+
}
|
66
81
|
|
67
|
-
|
68
|
-
|
69
|
-
|
82
|
+
Exceptions
|
83
|
+
----------
|
84
|
+
ValueError
|
85
|
+
Raised when no match is found for the provided title.
|
86
|
+
requests.RequestException
|
87
|
+
Raised on network/HTTP errors (timeout, connection issues, etc.).
|
70
88
|
|
71
|
-
|
72
|
-
|
73
|
-
|
89
|
+
Examples
|
90
|
+
--------
|
91
|
+
>>> retrieve_semantic_scholar_paper_id("Attention Is All You Need", "tc_123")
|
74
92
|
"""
|
75
93
|
# Load hydra configuration
|
76
94
|
with hydra.initialize(version_base=None, config_path="../../configs"):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: aiagents4pharma
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.43.0
|
4
4
|
Summary: AI Agents for drug discovery, drug development, and other pharmaceutical R&D.
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
6
6
|
Classifier: License :: OSI Approved :: MIT License
|
@@ -72,6 +72,7 @@ Dynamic: license-file
|
|
72
72
|
<!-- Project Info -->
|
73
73
|
|
74
74
|

|
75
|
+

|
75
76
|

|
76
77
|
|
77
78
|
<!-- Deployment Workflows -->
|
@@ -79,6 +80,7 @@ Dynamic: license-file
|
|
79
80
|
[](https://github.com/VirtualPatientEngine/AIAgents4Pharma/actions/workflows/pages/pages-build-deployment)
|
80
81
|
[](https://github.com/VirtualPatientEngine/AIAgents4Pharma/actions/workflows/mkdocs-deploy.yml)
|
81
82
|
[](https://github.com/VirtualPatientEngine/AIAgents4Pharma/actions/workflows/docker_build.yml)
|
83
|
+
[](https://github.com/VirtualPatientEngine/AIAgents4Pharma/actions/workflows/docker_compose_release.yml)
|
82
84
|
|
83
85
|
<!-- Tests -->
|
84
86
|
|
@@ -157,36 +157,32 @@ aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py,sha256=m5p0yoJb7I
|
|
157
157
|
aiagents4pharma/talk2scholars/__init__.py,sha256=NOZxTklAH1j1ggu97Ib8Xn9LCKudEWt-8dx8w7yxVD8,180
|
158
158
|
aiagents4pharma/talk2scholars/agents/__init__.py,sha256=c_0Pk85bt-RfK5RMyALM3MXo3qXVMoYS7BOqM9wuFME,317
|
159
159
|
aiagents4pharma/talk2scholars/agents/main_agent.py,sha256=oQqa1z4nvfUvPWCX-SUHGs9jOCJKtzjw86jXJZ68gCk,3382
|
160
|
-
aiagents4pharma/talk2scholars/agents/paper_download_agent.py,sha256=
|
160
|
+
aiagents4pharma/talk2scholars/agents/paper_download_agent.py,sha256=gIaaUcT4JPuQSJ5C-PPtY92oskWKh69zl99WzqPO9AA,3501
|
161
161
|
aiagents4pharma/talk2scholars/agents/pdf_agent.py,sha256=GEXzJMQxIeZ7zLP-AlnTMU-n_KXZ7g22Qd9L3USIc_4,3626
|
162
162
|
aiagents4pharma/talk2scholars/agents/s2_agent.py,sha256=oui0CMSyXmBGBJ7LnYq8Ce0V8Qc3BS6GgH5Qx5wI6oM,4565
|
163
163
|
aiagents4pharma/talk2scholars/agents/zotero_agent.py,sha256=NAmEURIhH-sjXGO-dqAigUA10m-Re9Qe1hY8db4CIP0,4370
|
164
164
|
aiagents4pharma/talk2scholars/configs/__init__.py,sha256=Y9-4PxsNCMoxyyQgDSbPByJnO9wnyem5SYL3eOZt1HY,189
|
165
|
-
aiagents4pharma/talk2scholars/configs/config.yaml,sha256=
|
165
|
+
aiagents4pharma/talk2scholars/configs/config.yaml,sha256=GyL69HPuoccv93Gcw7S44xiWG-iPxfKU-p27VWC12iY,590
|
166
166
|
aiagents4pharma/talk2scholars/configs/agents/__init__.py,sha256=plv5Iw34gvbGZbRyJapvoOiiFXekRQIwjV_yy5AR_SI,104
|
167
167
|
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py,sha256=D94LW4cXLmJe4dNl5qoR9QN0JnBqGLbQDgDLqhCNUE0,213
|
168
168
|
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
|
169
|
-
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml,sha256=
|
169
|
+
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml,sha256=vSxteufVdgZeXBnU_LhduoBNBVz17s3mpqrjFw-QPUI,3677
|
170
170
|
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
|
171
|
-
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml,sha256=
|
171
|
+
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml,sha256=S1cmJaX6Xp4i_ixExZfP0qgD-tXQVFbpWpXvrh7ZVeQ,1173
|
172
172
|
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
|
173
|
-
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml,sha256=
|
173
|
+
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml,sha256=MYC9REeARVfTOCWyV_4cInvWHDksKMz193Q1LirKtZw,832
|
174
174
|
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
|
175
|
-
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml,sha256=
|
175
|
+
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml,sha256=X88qKa0DgqCIvpnJgO_qEqAhhT9ymDr1B8HkJLTkB0U,2718
|
176
176
|
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
|
177
|
-
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml,sha256=
|
177
|
+
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml,sha256=d_38r0JtOPW0wM3J7LTB17aW3eBRyA6nFhB8MJd6lOw,1016
|
178
178
|
aiagents4pharma/talk2scholars/configs/app/__init__.py,sha256=tXpOW3R4eAfNoqvoaHfabSG-DcMHmUGSTg_4zH_vlgw,94
|
179
179
|
aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
|
180
180
|
aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml,sha256=A6nYjrgzEyRv5JYsGN7oqNX4-tufMBZ6mg-A7bMX6V4,906
|
181
181
|
aiagents4pharma/talk2scholars/configs/tools/__init__.py,sha256=6pHPF0ZGY78SD6KPMukd_xrfO1ocVXcyrsrB-kz-OnI,402
|
182
|
-
aiagents4pharma/talk2scholars/configs/tools/download_arxiv_paper/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
|
183
|
-
aiagents4pharma/talk2scholars/configs/tools/download_arxiv_paper/default.yaml,sha256=VT3f-E6QGtqUjLEX0eaw9b_7f1Fp83cnnMOpqOufK4I,120
|
184
|
-
aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
|
185
|
-
aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/default.yaml,sha256=d5_dl1_FYWseBGTVGkvC41jfJMsMUaDMOmC_av3aL4Q,72
|
186
|
-
aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
|
187
|
-
aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/default.yaml,sha256=NaWmyVZ71rIF3ZRUm912wQgRL0cgDTWA-hVKeu60rtg,70
|
188
182
|
aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
|
189
183
|
aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml,sha256=comNgL9hRpH--IWuEsrN6hV5WdrJmh-ZsRh7hbryVhg,631
|
184
|
+
aiagents4pharma/talk2scholars/configs/tools/paper_download/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
|
185
|
+
aiagents4pharma/talk2scholars/configs/tools/paper_download/default.yaml,sha256=KaHKlCf-Y1Ob1wnEKhfhKEaLye_L9Qs_BT3PhLlDM6Y,3571
|
190
186
|
aiagents4pharma/talk2scholars/configs/tools/question_and_answer/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
|
191
187
|
aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml,sha256=Pa3JVyA9tabXZ4Bk3n5dAml7P-nXUcT7HgkA8Kr_sXk,2238
|
192
188
|
aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
|
@@ -202,12 +198,14 @@ aiagents4pharma/talk2scholars/configs/tools/zotero_write/default.yaml,sha256=gB7
|
|
202
198
|
aiagents4pharma/talk2scholars/state/__init__.py,sha256=ReScKLpEvedq4P6ww52NRQS0Xr6SSQV7hqoQ83Mt75U,138
|
203
199
|
aiagents4pharma/talk2scholars/state/state_talk2scholars.py,sha256=Z2zV-SXB2SMnn8PnjWjmK-OD5KjUwMTChBpXBAcl2hg,3885
|
204
200
|
aiagents4pharma/talk2scholars/tests/__init__.py,sha256=U3PsTiUZaUBD1IZanFGkDIOdFieDVJtGKQ5-woYUo8c,45
|
201
|
+
aiagents4pharma/talk2scholars/tests/test_arxiv_downloader.py,sha256=JvqMLUeNVnmkhGo9fjDr73r76kwWFiuBbJHMAdo7Jko,18772
|
202
|
+
aiagents4pharma/talk2scholars/tests/test_base_paper_downloader.py,sha256=Rtp05inMVbDg4Yt7HlmXFt3_9xOXUighQsxishmABSk,25107
|
203
|
+
aiagents4pharma/talk2scholars/tests/test_biorxiv_downloader.py,sha256=qvNDcJTpqV7L5bZ3r40wtBehNcrDNtlOJLkzhSNh6q0,27690
|
205
204
|
aiagents4pharma/talk2scholars/tests/test_main_agent.py,sha256=4Z3xLq8MGlayGhQE5qKOirYotwJrlf7fk8rqAaORorg,7617
|
205
|
+
aiagents4pharma/talk2scholars/tests/test_medrxiv_downloader.py,sha256=aoLB64tw7HXMv0xIIOUZ5CFt9fd8n61kQMmKpznh2M0,20848
|
206
206
|
aiagents4pharma/talk2scholars/tests/test_nvidia_nim_reranker.py,sha256=ftfn4VenufZ-zt2nSOyOoCjfSbwFOW9CS1DLzPtBEaM,4410
|
207
|
-
aiagents4pharma/talk2scholars/tests/test_paper_download_agent.py,sha256=
|
208
|
-
aiagents4pharma/talk2scholars/tests/
|
209
|
-
aiagents4pharma/talk2scholars/tests/test_paper_download_medrxiv.py,sha256=iNq9vEIVapmnUZTRJXCv_UoaWThGapW7Vt_2BmZG9NE,6414
|
210
|
-
aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py,sha256=lGXbHl3lEXDjMHAX9uCgrREBOUuOHWv9TsYEshiG_tc,10421
|
207
|
+
aiagents4pharma/talk2scholars/tests/test_paper_download_agent.py,sha256=1QeIf9SrBM712DOPC5qotL-vDo7Ax-9l9LDDEtTJNdI,5344
|
208
|
+
aiagents4pharma/talk2scholars/tests/test_paper_downloader.py,sha256=8r_OZOJkHZ8mhrSNuSZ3aLyIA6Ngb-S23S30XmYRZ-c,22822
|
211
209
|
aiagents4pharma/talk2scholars/tests/test_pdf_agent.py,sha256=9Kr0FcyFWmUDTasYh6ZdS-OWQqy37mH9K3p5Y0dqQHw,4283
|
212
210
|
aiagents4pharma/talk2scholars/tests/test_pdf_answer_formatter.py,sha256=a1a_z1M9sOQ_SFo-gHM3xA_f5MoJJmEoW8Tc3AX9vL0,2239
|
213
211
|
aiagents4pharma/talk2scholars/tests/test_pdf_batch_processor.py,sha256=e8KQJ80nbOmCH4mgDnIXXjlrRk0zf0g-5Odt2jsCU0s,3251
|
@@ -221,6 +219,7 @@ aiagents4pharma/talk2scholars/tests/test_pdf_retrieve_chunks.py,sha256=2cjbCrf86
|
|
221
219
|
aiagents4pharma/talk2scholars/tests/test_pdf_singleton_manager.py,sha256=gOk8L9wNRPUnAiB89n1a4pQQPI7UtonRi7-IcCip94k,5628
|
222
220
|
aiagents4pharma/talk2scholars/tests/test_pdf_vector_normalization.py,sha256=pu9I3tromjToIN5r4S8sWanaLBVhhk71UPP5zTTGwZY,3928
|
223
221
|
aiagents4pharma/talk2scholars/tests/test_pdf_vector_store.py,sha256=-7CUiPLT4mOBVkNV2qlF7t4yU_mRArM0uAcUAKgEY4k,15644
|
222
|
+
aiagents4pharma/talk2scholars/tests/test_pubmed_downloader.py,sha256=FpuyL2NhlnX6tkI8UwU7rxYJe94TdHlwJWZan5rbXcg,44972
|
224
223
|
aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py,sha256=UKSD7wLJ1cZunw8a1BHM7E-NUML6blxpqoDroCYuvQs,4245
|
225
224
|
aiagents4pharma/talk2scholars/tests/test_read_helper_utils.py,sha256=yTT1aLpTydDSdGcRZur5cMktwYZbFK5NEUgOBvltcWg,3819
|
226
225
|
aiagents4pharma/talk2scholars/tests/test_s2_agent.py,sha256=TsdNlZ6vHz18bbX6Vto28nbBLRDI94wSFt5-1acDK64,7768
|
@@ -240,10 +239,14 @@ aiagents4pharma/talk2scholars/tests/test_zotero_pdf_downloader_utils.py,sha256=N
|
|
240
239
|
aiagents4pharma/talk2scholars/tests/test_zotero_read.py,sha256=qkudWMjxjjTYKJ1zvpWs0EJXCIvFx-iNKyKs_Tv1CSI,29061
|
241
240
|
aiagents4pharma/talk2scholars/tests/test_zotero_write.py,sha256=qWlO0XoZJ6vxUxgisjYv9Np87CoTEDxiQBEOhdj9foo,6111
|
242
241
|
aiagents4pharma/talk2scholars/tools/__init__.py,sha256=c8pYHDqR9P0Frz2jWjbvyizfSTBMlMFzGsiQzx2KC9c,189
|
243
|
-
aiagents4pharma/talk2scholars/tools/paper_download/__init__.py,sha256=
|
244
|
-
aiagents4pharma/talk2scholars/tools/paper_download/
|
245
|
-
aiagents4pharma/talk2scholars/tools/paper_download/
|
246
|
-
aiagents4pharma/talk2scholars/tools/paper_download/
|
242
|
+
aiagents4pharma/talk2scholars/tools/paper_download/__init__.py,sha256=PzJTrcXBHB3e2rgOvJ3Q8JJjvZAXyUCtGcGdANk1svE,227
|
243
|
+
aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py,sha256=YJuuRhkxFZIHyQTE3Hh4rkJF1hYCFNgORd_9dyp9-5E,16908
|
244
|
+
aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py,sha256=mbtb0IK6fW6ASQyqMhkaCHFBXQUguR7G6hZHrTJwZXc,467
|
245
|
+
aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py,sha256=xWb4YPbDPZbgBDS1INDCSrOVeNiDRNC2KYhiOQjJV3o,7029
|
246
|
+
aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py,sha256=pIt-BFgt7mh4XPXzcfRy5LY8xzmhADF6j8-TOyiaa1Q,11350
|
247
|
+
aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py,sha256=sz4lc1jQEKzYk9kX2QJotgjbNWXpLfmi8jfY-zVW8Wg,11652
|
248
|
+
aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py,sha256=9jKbh6lW9IBI9RJhHzqV1ktz3LfvoD3B5ayeXk15Eg4,6638
|
249
|
+
aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py,sha256=Md9bWaiy7_88NIue_3NkPrmXyNZQS8zLcK4xaFQmIjU,12177
|
247
250
|
aiagents4pharma/talk2scholars/tools/pdf/__init__.py,sha256=DPpOfON3AySko5EBBAe_3udOoSaAdQWNyGeNvJyV5R8,138
|
248
251
|
aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py,sha256=_HLO04bzTSlnEEWwkJ3rC9Gjz8MPeHVglqovoTDOSp4,5844
|
249
252
|
aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py,sha256=AlvoJY0hI4MCS4zHO3EtFkFJpKqjO7ZxWkoE3QvHi88,820
|
@@ -265,8 +268,8 @@ aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py,sha256=X9OGwV3RHZI
|
|
265
268
|
aiagents4pharma/talk2scholars/tools/s2/__init__.py,sha256=w_eiw0pG8HNp79F9O_icXs_Yl_4odsmagYNKDTjIsvk,428
|
266
269
|
aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py,sha256=qnY7AQDnAs0SrmV7AZ9pWm10HEmPlO7EBfzYvpb3jvs,3965
|
267
270
|
aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py,sha256=TEt4jOX1u8v3w_u10sGx-Ghwhe4yjuaYmUjD62nJQJM,3886
|
268
|
-
aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py,sha256=
|
269
|
-
aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py,sha256=
|
271
|
+
aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py,sha256=bZiSe7-hf0IHIoZhITtgOSDauyd9NAtim5ukWi1vyuA,8599
|
272
|
+
aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py,sha256=zwm4LGfFWxJLIPRc2TO5zQ48kCKe7EmkwY1SyTVCTAg,4014
|
270
273
|
aiagents4pharma/talk2scholars/tools/s2/search.py,sha256=SUAN32x1d9dNikFKitcXZZ0BhFfsGMdLDk0z0DpJXuA,3334
|
271
274
|
aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py,sha256=JTggOB4sqhTF1kP81Gl_54RjpbHIfqtJEoUTxlZ82N8,3630
|
272
275
|
aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py,sha256=wBTPVgiXbmIJUMouOQRwojgk5PJXeEinDJzHzEToZbU,229
|
@@ -283,8 +286,8 @@ aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py,sha256=IPD1V9y
|
|
283
286
|
aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py,sha256=ALwLecy1QVebbsmXJiDj1GhGmyhq2R2tZlAyEl1vfhw,7410
|
284
287
|
aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py,sha256=oIrfbOySgts50ksHKyjcWjRkPRIS88g3Lc0v9mBkU8w,6375
|
285
288
|
aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py,sha256=ERBha8afU6Q1EaRBe9qB8tchOzZ4_KfFgDW6EElOJoU,4816
|
286
|
-
aiagents4pharma-1.
|
287
|
-
aiagents4pharma-1.
|
288
|
-
aiagents4pharma-1.
|
289
|
-
aiagents4pharma-1.
|
290
|
-
aiagents4pharma-1.
|
289
|
+
aiagents4pharma-1.43.0.dist-info/licenses/LICENSE,sha256=IcIbyB1Hyk5ZDah03VNQvJkbNk2hkBCDqQ8qtnCvB4Q,1077
|
290
|
+
aiagents4pharma-1.43.0.dist-info/METADATA,sha256=9gum-z3uXsxDqDhQjnjD8tHEoNpfSFgxeusKXRC3WJ0,13281
|
291
|
+
aiagents4pharma-1.43.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
292
|
+
aiagents4pharma-1.43.0.dist-info/top_level.txt,sha256=-AH8rMmrSnJtq7HaAObS78UU-cTCwvX660dSxeM7a0A,16
|
293
|
+
aiagents4pharma-1.43.0.dist-info/RECORD,,
|
@@ -1,151 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Unit tests for bioRxiv paper downloading functionality, including:
|
3
|
-
- download_bioRxiv_paper tool function.
|
4
|
-
"""
|
5
|
-
|
6
|
-
import unittest
|
7
|
-
from unittest.mock import MagicMock, patch
|
8
|
-
from langchain_core.messages import ToolMessage
|
9
|
-
|
10
|
-
from aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input import (
|
11
|
-
download_biorxiv_paper,
|
12
|
-
)
|
13
|
-
|
14
|
-
|
15
|
-
class TestDownloadBiorxivPaper(unittest.TestCase):
|
16
|
-
"""Tests for the download_bioRxiv_paper tool."""
|
17
|
-
|
18
|
-
@patch(
|
19
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.initialize"
|
20
|
-
)
|
21
|
-
@patch(
|
22
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.compose"
|
23
|
-
)
|
24
|
-
@patch(
|
25
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.requests.get"
|
26
|
-
)
|
27
|
-
def test_download_biorxiv_paper_success(self, mock_get, mock_compose, mock_initialize):
|
28
|
-
"""Test successful metadata and PDF URL retrieval."""
|
29
|
-
dummy_cfg = MagicMock()
|
30
|
-
dummy_cfg.tools.download_biorxiv_paper.api_url = "http://dummy.biorxiv.org/api"
|
31
|
-
dummy_cfg.tools.download_biorxiv_paper.request_timeout = 10
|
32
|
-
mock_compose.return_value = dummy_cfg
|
33
|
-
mock_initialize.return_value.__enter__.return_value = None
|
34
|
-
|
35
|
-
doi = "10.1101/2025.05.13.653102"
|
36
|
-
|
37
|
-
dummy_response = MagicMock()
|
38
|
-
dummy_response.status_code = 200
|
39
|
-
dummy_response.raise_for_status = MagicMock()
|
40
|
-
dummy_response.json.return_value = {
|
41
|
-
"collection": [
|
42
|
-
{
|
43
|
-
"title": "Sample BioRxiv Paper",
|
44
|
-
"authors": "Author One; Author Two",
|
45
|
-
"abstract": "This is a bioRxiv abstract.",
|
46
|
-
"date": "2025-04-25",
|
47
|
-
"doi": doi,
|
48
|
-
"link": f"https://www.biorxiv.org/content/{doi}.full.pdf"
|
49
|
-
}
|
50
|
-
]
|
51
|
-
}
|
52
|
-
mock_get.return_value = dummy_response
|
53
|
-
|
54
|
-
tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
|
55
|
-
result = download_biorxiv_paper.run(tool_input)
|
56
|
-
update = result.update
|
57
|
-
|
58
|
-
self.assertIn("article_data", update)
|
59
|
-
self.assertIn(doi, update["article_data"])
|
60
|
-
metadata = update["article_data"][doi]
|
61
|
-
self.assertEqual(metadata["Title"], "Sample BioRxiv Paper")
|
62
|
-
self.assertEqual(metadata["Authors"], "Author One; Author Two")
|
63
|
-
self.assertEqual(metadata["Abstract"], "This is a bioRxiv abstract.")
|
64
|
-
self.assertEqual(metadata["Publication Date"], "2025-04-25")
|
65
|
-
self.assertEqual(metadata["URL"], f"https://www.biorxiv.org/content/{doi}.full.pdf")
|
66
|
-
self.assertEqual(metadata["pdf_url"], f"https://www.biorxiv.org/content/{doi}.full.pdf")
|
67
|
-
self.assertEqual(metadata["filename"], f"{doi.rsplit('/', maxsplit=1)[-1]}.pdf")
|
68
|
-
self.assertEqual(metadata["source"], "biorxiv")
|
69
|
-
self.assertEqual(metadata["biorxiv_id"], doi)
|
70
|
-
|
71
|
-
self.assertTrue(len(update["messages"]) >= 1)
|
72
|
-
self.assertIsInstance(update["messages"][0], ToolMessage)
|
73
|
-
self.assertIn("Successfully retrieved metadata and PDF URL", update["messages"][0].content)
|
74
|
-
|
75
|
-
@patch(
|
76
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.initialize"
|
77
|
-
)
|
78
|
-
@patch(
|
79
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.compose"
|
80
|
-
)
|
81
|
-
@patch(
|
82
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.requests.get"
|
83
|
-
)
|
84
|
-
def test_no_entry_found(self, mock_get, mock_compose, mock_initialize):
|
85
|
-
"""Test behavior when no 'entry' is in response."""
|
86
|
-
dummy_cfg = MagicMock()
|
87
|
-
dummy_cfg.tools.download_biorxiv_paper.api_url = "http://dummy.biorxiv.org/api"
|
88
|
-
dummy_cfg.tools.download_biorxiv_paper.request_timeout = 10
|
89
|
-
mock_compose.return_value = dummy_cfg
|
90
|
-
mock_initialize.return_value.__enter__.return_value = None
|
91
|
-
|
92
|
-
dummy_response = MagicMock()
|
93
|
-
dummy_response.status_code = 200
|
94
|
-
dummy_response.raise_for_status = MagicMock()
|
95
|
-
dummy_response.json.return_value = {} # No entry
|
96
|
-
mock_get.return_value = dummy_response
|
97
|
-
|
98
|
-
doi = "10.1101/2025.05.13.653102"
|
99
|
-
tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
|
100
|
-
|
101
|
-
with self.assertRaises(ValueError) as context:
|
102
|
-
download_biorxiv_paper.run(tool_input)
|
103
|
-
|
104
|
-
self.assertEqual(str(context.exception), f"No metadata found for DOI: {doi}")
|
105
|
-
|
106
|
-
@patch(
|
107
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.initialize"
|
108
|
-
)
|
109
|
-
@patch(
|
110
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.compose"
|
111
|
-
)
|
112
|
-
@patch(
|
113
|
-
"aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.requests.get"
|
114
|
-
)
|
115
|
-
def test_no_pdf_url_found(self, mock_get, mock_compose, mock_initialize):
|
116
|
-
"""Test fallback to DOI-based PDF URL construction when 'link' is missing."""
|
117
|
-
dummy_cfg = MagicMock()
|
118
|
-
dummy_cfg.tools.download_biorxiv_paper.api_url = "http://dummy.biorxiv.org/api"
|
119
|
-
dummy_cfg.tools.download_biorxiv_paper.request_timeout = 10
|
120
|
-
mock_compose.return_value = dummy_cfg
|
121
|
-
mock_initialize.return_value.__enter__.return_value = None
|
122
|
-
|
123
|
-
doi = "10.1101/2025.05.13.653102"
|
124
|
-
|
125
|
-
dummy_response = MagicMock()
|
126
|
-
dummy_response.status_code = 200
|
127
|
-
dummy_response.raise_for_status = MagicMock()
|
128
|
-
dummy_response.json.return_value = {
|
129
|
-
"collection": [
|
130
|
-
{
|
131
|
-
"title": "Sample Biorxiv Paper",
|
132
|
-
"authors": "Author One; Author Two",
|
133
|
-
"abstract": "This is a BioRxiv abstract.",
|
134
|
-
"date": "2025-04-25",
|
135
|
-
"doi": doi
|
136
|
-
# 'link' is intentionally omitted
|
137
|
-
}
|
138
|
-
]
|
139
|
-
}
|
140
|
-
mock_get.return_value = dummy_response
|
141
|
-
|
142
|
-
tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
|
143
|
-
result = download_biorxiv_paper.run(tool_input)
|
144
|
-
update = result.update
|
145
|
-
metadata = update["article_data"][doi]
|
146
|
-
|
147
|
-
# Assert that the PDF URL was constructed from DOI
|
148
|
-
expected_suffix = doi.rsplit('/', maxsplit=1)[-1]
|
149
|
-
expected_url = f"https://www.biorxiv.org/content/10.1101/{expected_suffix}.full.pdf"
|
150
|
-
|
151
|
-
self.assertEqual(metadata["pdf_url"], expected_url)
|