aiagents4pharma 1.30.4__py3-none-any.whl → 1.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/talk2scholars/agents/main_agent.py +4 -3
- aiagents4pharma/talk2scholars/agents/paper_download_agent.py +3 -4
- aiagents4pharma/talk2scholars/agents/pdf_agent.py +6 -7
- aiagents4pharma/talk2scholars/agents/s2_agent.py +23 -20
- aiagents4pharma/talk2scholars/agents/zotero_agent.py +11 -11
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +19 -19
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +20 -15
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +27 -6
- aiagents4pharma/talk2scholars/state/state_talk2scholars.py +7 -7
- aiagents4pharma/talk2scholars/tests/test_main_agent.py +16 -16
- aiagents4pharma/talk2scholars/tests/test_paper_download_agent.py +17 -24
- aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py +152 -135
- aiagents4pharma/talk2scholars/tests/test_pdf_agent.py +9 -16
- aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +790 -218
- aiagents4pharma/talk2scholars/tests/test_s2_agent.py +9 -9
- aiagents4pharma/talk2scholars/tests/test_s2_display.py +8 -8
- aiagents4pharma/talk2scholars/tests/test_s2_query.py +8 -8
- aiagents4pharma/talk2scholars/tests/test_zotero_agent.py +12 -12
- aiagents4pharma/talk2scholars/tests/test_zotero_path.py +11 -12
- aiagents4pharma/talk2scholars/tests/test_zotero_read.py +400 -22
- aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +0 -6
- aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +89 -31
- aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +540 -156
- aiagents4pharma/talk2scholars/tools/s2/__init__.py +4 -4
- aiagents4pharma/talk2scholars/tools/s2/{display_results.py → display_dataframe.py} +19 -21
- aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +71 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +213 -35
- aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +3 -3
- aiagents4pharma-1.32.0.dist-info/METADATA +364 -0
- {aiagents4pharma-1.30.4.dist-info → aiagents4pharma-1.32.0.dist-info}/RECORD +33 -35
- {aiagents4pharma-1.30.4.dist-info → aiagents4pharma-1.32.0.dist-info}/WHEEL +1 -1
- aiagents4pharma/talk2scholars/tools/paper_download/abstract_downloader.py +0 -45
- aiagents4pharma/talk2scholars/tools/paper_download/arxiv_downloader.py +0 -115
- aiagents4pharma/talk2scholars/tools/s2/query_results.py +0 -61
- aiagents4pharma-1.30.4.dist-info/METADATA +0 -334
- {aiagents4pharma-1.30.4.dist-info → aiagents4pharma-1.32.0.dist-info}/licenses/LICENSE +0 -0
- {aiagents4pharma-1.30.4.dist-info → aiagents4pharma-1.32.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,364 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: aiagents4pharma
|
3
|
+
Version: 1.32.0
|
4
|
+
Summary: AI Agents for drug discovery, drug development, and other pharmaceutical R&D.
|
5
|
+
Classifier: Programming Language :: Python :: 3
|
6
|
+
Classifier: License :: OSI Approved :: MIT License
|
7
|
+
Classifier: Operating System :: OS Independent
|
8
|
+
Requires-Python: >=3.12
|
9
|
+
Description-Content-Type: text/markdown
|
10
|
+
License-File: LICENSE
|
11
|
+
Requires-Dist: copasi_basico==0.78
|
12
|
+
Requires-Dist: coverage==7.6.4
|
13
|
+
Requires-Dist: einops==0.8.0
|
14
|
+
Requires-Dist: gdown==5.2.0
|
15
|
+
Requires-Dist: gravis==0.1.0
|
16
|
+
Requires-Dist: huggingface_hub==0.26.5
|
17
|
+
Requires-Dist: hydra-core==1.3.2
|
18
|
+
Requires-Dist: joblib==1.4.2
|
19
|
+
Requires-Dist: langchain==0.3.7
|
20
|
+
Requires-Dist: langchain-community==0.3.5
|
21
|
+
Requires-Dist: langchain-core==0.3.40
|
22
|
+
Requires-Dist: langchain-experimental==0.3.3
|
23
|
+
Requires-Dist: langchain-nvidia-ai-endpoints==0.3.9
|
24
|
+
Requires-Dist: langchain-openai==0.2.5
|
25
|
+
Requires-Dist: langchain_ollama==0.2.3
|
26
|
+
Requires-Dist: langgraph_supervisor==0.0.9
|
27
|
+
Requires-Dist: matplotlib==3.9.2
|
28
|
+
Requires-Dist: openai==1.59.4
|
29
|
+
Requires-Dist: ollama==0.4.7
|
30
|
+
Requires-Dist: pandas==2.2.3
|
31
|
+
Requires-Dist: pcst_fast==1.0.10
|
32
|
+
Requires-Dist: plotly==5.24.1
|
33
|
+
Requires-Dist: pubchempy==1.0.4
|
34
|
+
Requires-Dist: pydantic==2.9.2
|
35
|
+
Requires-Dist: pylint==3.3.1
|
36
|
+
Requires-Dist: pypdf==5.2.0
|
37
|
+
Requires-Dist: pytest==8.3.3
|
38
|
+
Requires-Dist: pytest-asyncio==0.25.2
|
39
|
+
Requires-Dist: pyzotero==1.6.9
|
40
|
+
Requires-Dist: streamlit==1.39.0
|
41
|
+
Requires-Dist: sentence_transformers==3.3.1
|
42
|
+
Requires-Dist: tabulate==0.9.0
|
43
|
+
Requires-Dist: torch==2.2.2
|
44
|
+
Requires-Dist: torch_geometric==2.6.1
|
45
|
+
Requires-Dist: transformers==4.48.0
|
46
|
+
Requires-Dist: mkdocs==1.6.1
|
47
|
+
Requires-Dist: mkdocs-jupyter==0.25.1
|
48
|
+
Requires-Dist: mkdocs-material==9.5.47
|
49
|
+
Requires-Dist: mkdocstrings-python==1.12.2
|
50
|
+
Requires-Dist: mkdocs-include-markdown-plugin==7.1.2
|
51
|
+
Requires-Dist: mkdocstrings==0.27.0
|
52
|
+
Requires-Dist: streamlit-feedback
|
53
|
+
Requires-Dist: anndata==0.11.3
|
54
|
+
Requires-Dist: h5py==3.13.0
|
55
|
+
Requires-Dist: igraph==0.11.8
|
56
|
+
Requires-Dist: ipykernel==6.29.5
|
57
|
+
Requires-Dist: ipython==8.32.0
|
58
|
+
Requires-Dist: nbformat==5.10.4
|
59
|
+
Requires-Dist: scipy==1.15.2
|
60
|
+
Requires-Dist: tqdm==4.67.1
|
61
|
+
Requires-Dist: umap-learn==0.5.7
|
62
|
+
Requires-Dist: plotly-express==0.4.1
|
63
|
+
Requires-Dist: seaborn==0.13.2
|
64
|
+
Requires-Dist: scanpy==1.11.0
|
65
|
+
Dynamic: license-file
|
66
|
+
|
67
|
+
[](https://github.com/VirtualPatientEngine/AIAgents4Pharma/actions/workflows/tests_talk2biomodels.yml)
|
68
|
+
[](https://github.com/VirtualPatientEngine/AIAgents4Pharma/actions/workflows/tests_talk2cells.yml)
|
69
|
+
[](https://github.com/VirtualPatientEngine/AIAgents4Pharma/actions/workflows/tests_talk2knowledgegraphs.yml)
|
70
|
+
[](https://github.com/VirtualPatientEngine/AIAgents4Pharma/actions/workflows/tests_talk2scholars.yml)
|
71
|
+
[](https://github.com/VirtualPatientEngine/AIAgents4Pharma/actions/workflows/tests_talk2aiagents4pharma.yml)
|
72
|
+

|
73
|
+

|
74
|
+

|
75
|
+

|
76
|
+

|
77
|
+

|
78
|
+
|
79
|
+
## Introduction
|
80
|
+
|
81
|
+
Welcome to **AIAgents4Pharma** – an open-source project by [Team VPE](https://bmedx.com/research-teams/artificial-intelligence/team-vpe/) that brings together AI-driven tools to help researchers and pharma interact seamlessly with complex biological data.
|
82
|
+
|
83
|
+
Our toolkit currently consists of the following agents:
|
84
|
+
|
85
|
+
- **Talk2BioModels** _(v1 released; v2 in progress)_: Engage directly with mathematical models in systems biology.
|
86
|
+
- **Talk2KnowledgeGraphs** _(v1 in progress)_: Access and explore complex biological knowledge graphs for insightful data connections.
|
87
|
+
- **Talk2Scholars** _(v1 in progress)_: Get recommendations for articles related to your choice. Download, query, and write/retrieve them to your reference manager (currently supporting Zotero).
|
88
|
+
- **Talk2Cells** _(v1 in progress)_: Query and analyze sequencing data with ease.
|
89
|
+
- **Talk2AIAgents4Pharma** _(v1 in progress)_: Converse with all the agents above (currently supports T2B and T2KG)
|
90
|
+
|
91
|
+

|
92
|
+
|
93
|
+
## Getting Started
|
94
|
+
|
95
|
+
### Installation
|
96
|
+
|
97
|
+
#### Option 1: Docker (stable-release)
|
98
|
+
|
99
|
+
_We now have all the agents available on Docker Hub._
|
100
|
+
|
101
|
+
##### **To run Talk2AIAgents4Pharma / Talk2KnowledgeGraphs**
|
102
|
+
|
103
|
+
Both agents require [Ollama](https://ollama.com/) to run embedding models like `nomic-embed-text`. We use a **single startup script** that automatically detects your hardware (NVIDIA, AMD, or CPU) and handles container startup, model loading, and service orchestration.
|
104
|
+
|
105
|
+
##### **1. Clone the repository and navigate to the agent directory**
|
106
|
+
|
107
|
+
```sh
|
108
|
+
git clone https://github.com/VirtualPatientEngine/AIAgents4Pharma
|
109
|
+
|
110
|
+
cd AIAgents4Pharma/aiagents4pharma/<agent>
|
111
|
+
```
|
112
|
+
|
113
|
+
Replace `<agent>` with either:
|
114
|
+
|
115
|
+
- `talk2aiagents4pharma`
|
116
|
+
- `talk2knowledgegraphs`
|
117
|
+
|
118
|
+
##### **2. Setup environment variables**
|
119
|
+
|
120
|
+
Copy and configure your `.env` file:
|
121
|
+
|
122
|
+
```sh
|
123
|
+
cp .env.example .env
|
124
|
+
```
|
125
|
+
|
126
|
+
Then edit `.env` and add your API keys:
|
127
|
+
|
128
|
+
```env
|
129
|
+
OPENAI_API_KEY=... # Required for both agents
|
130
|
+
NVIDIA_API_KEY=... # Required for both agents
|
131
|
+
OLLAMA_HOST=http://ollama:11434 # Required for AA4P / T2KG
|
132
|
+
LANGCHAIN_TRACING_V2=true # Optional for both agents
|
133
|
+
LANGCHAIN_API_KEY=... # Optional for both agents
|
134
|
+
```
|
135
|
+
|
136
|
+
To use **Talk2AIAgents4Pharma** or **Talk2KnowledgeGraphs**, you need a free **NVIDIA API key**. Create an account and apply for free credits [here](https://build.nvidia.com/explore/discover).
|
137
|
+
|
138
|
+
###### Notes for Windows Users
|
139
|
+
|
140
|
+
If you are using Windows, it is recommended to install **Git Bash** for a smoother experience when running the bash commands in this guide.
|
141
|
+
|
142
|
+
- For applications that use **Docker Compose**, Git Bash is **required**.
|
143
|
+
- For applications that use **docker run** manually, Git Bash is **optional**, but recommended for consistency.
|
144
|
+
|
145
|
+
You can download Git Bash here: [Git for Windows](https://git-scm.com/downloads).
|
146
|
+
|
147
|
+
When using Docker on Windows, make sure you **run Docker with administrative privileges** if you face permission issues.
|
148
|
+
|
149
|
+
To resolve for permission issues, you can:
|
150
|
+
|
151
|
+
- Review the official Docker documentation on [Windows permission requirements](https://docs.docker.com/desktop/setup/install/windows-permission-requirements/).
|
152
|
+
- Alternatively, follow the community discussion and solutions on [Docker Community Forums](https://forums.docker.com/t/error-when-trying-to-run-windows-containers-docker-client-must-be-run-with-elevated-privileges/136619).
|
153
|
+
|
154
|
+
**LangSmith** support is optional. To enable it, create an API key [here](https://docs.smith.langchain.com/administration/how_to_guides/organization_management/create_account_api_key).
|
155
|
+
|
156
|
+
##### **3. Start the application**
|
157
|
+
|
158
|
+
Run the startup script. It will:
|
159
|
+
|
160
|
+
- Detect your hardware configuration (NVIDIA GPU, AMD GPU, or CPU). Apple Metal is unavailable inside Docker, and Intel SIMD optimizations are automatically handled without special configuration.
|
161
|
+
- Choose the correct Ollama image (`latest` or `rocm`)
|
162
|
+
- Launch the Ollama container with appropriate runtime settings
|
163
|
+
- Pull the required embedding model (`nomic-embed-text`)
|
164
|
+
- Start the agent **after the model is available**
|
165
|
+
|
166
|
+
```sh
|
167
|
+
chmod +x startup.sh
|
168
|
+
./startup.sh # Add --cpu flag to force CPU mode if needed
|
169
|
+
```
|
170
|
+
|
171
|
+
##### **4. Access the Web UI**
|
172
|
+
|
173
|
+
Once started, the agent is available at:
|
174
|
+
|
175
|
+
```
|
176
|
+
http://localhost:8501
|
177
|
+
```
|
178
|
+
|
179
|
+
##### **To Run Talk2Biomodels / Talk2Scholars**
|
180
|
+
|
181
|
+
1. **Run the containers**
|
182
|
+
|
183
|
+
###### Talk2Biomodels
|
184
|
+
|
185
|
+
```docker
|
186
|
+
docker run -d \
|
187
|
+
--name talk2biomodels \
|
188
|
+
-e OPENAI_API_KEY=<your_openai_api_key> \
|
189
|
+
-e NVIDIA_API_KEY=<your_nvidia_api_key> \
|
190
|
+
-p 8501:8501 \
|
191
|
+
virtualpatientengine/talk2biomodels
|
192
|
+
```
|
193
|
+
|
194
|
+
###### Talk2Scholars
|
195
|
+
|
196
|
+
```docker
|
197
|
+
docker run -d \
|
198
|
+
--name talk2scholars \
|
199
|
+
-e OPENAI_API_KEY=<your_openai_api_key> \
|
200
|
+
-e ZOTERO_API_KEY=<your_zotero_api_key> \
|
201
|
+
-e ZOTERO_USER_ID=<your_zotero_user_id> \
|
202
|
+
-e NVIDIA_API_KEY=<your_nvidia_api_key> \
|
203
|
+
-p 8501:8501 \
|
204
|
+
virtualpatientengine/talk2scholars
|
205
|
+
```
|
206
|
+
|
207
|
+
2. **Access the Web App**
|
208
|
+
Open your browser and go to:
|
209
|
+
|
210
|
+
```
|
211
|
+
http://localhost:8501
|
212
|
+
```
|
213
|
+
|
214
|
+
To use **Talk2BioModels** or **Talk2Scholars**, you need a free **NVIDIA API key**. Create an account and apply for free credits [here](https://build.nvidia.com/explore/discover).
|
215
|
+
|
216
|
+
Only for **Talk2Scholars**, you also need a **Zotero API key**, which you can generate [here](https://www.zotero.org/user/login#applications). _(For all other agents, the Zotero key is not required.)_
|
217
|
+
|
218
|
+
If you are using docker on Windows, please follow these [Windows Setup Notes](#notes-for-windows-users).
|
219
|
+
|
220
|
+
**LangSmith** support is optional. To enable it, create an API key [here](https://docs.smith.langchain.com/administration/how_to_guides/organization_management/create_account_api_key).
|
221
|
+
|
222
|
+
#### Notes
|
223
|
+
|
224
|
+
- Be sure to **replace the placeholder values** with your actual credentials before running any container:
|
225
|
+
|
226
|
+
- `<your_openai_api_key>`
|
227
|
+
- `<your_nvidia_api_key>`
|
228
|
+
- `<your_zotero_api_key>`
|
229
|
+
- `<your_zotero_user_id>`
|
230
|
+
|
231
|
+
- All agents default to **port `8501`**. If you plan to run multiple agents simultaneously, make sure to assign **different ports** to avoid conflicts.
|
232
|
+
|
233
|
+
Example (Talk2Scholars on port `8502`):
|
234
|
+
|
235
|
+
```docker
|
236
|
+
docker run -d \
|
237
|
+
--name talk2scholars \
|
238
|
+
-e OPENAI_API_KEY=<your_openai_api_key> \
|
239
|
+
-e ZOTERO_API_KEY=<your_zotero_api_key> \
|
240
|
+
-e ZOTERO_USER_ID=<your_zotero_user_id> \
|
241
|
+
-e NVIDIA_API_KEY=<your_nvidia_api_key> \
|
242
|
+
-p 8502:8501 \
|
243
|
+
virtualpatientengine/talk2scholars
|
244
|
+
```
|
245
|
+
|
246
|
+
Then access the app at: [http://localhost:8502](http://localhost:8502)
|
247
|
+
|
248
|
+
#### Option 2: git (for developers and contributors)
|
249
|
+
|
250
|
+

|
251
|
+
|
252
|
+
1. **Clone the repository:**
|
253
|
+
```sh
|
254
|
+
git clone https://github.com/VirtualPatientEngine/AIAgents4Pharma
|
255
|
+
cd AIAgents4Pharma
|
256
|
+
```
|
257
|
+
2. **Install dependencies:**
|
258
|
+
|
259
|
+
```python
|
260
|
+
pip install -r requirements.txt
|
261
|
+
```
|
262
|
+
|
263
|
+
3. **Initialize API Keys**
|
264
|
+
|
265
|
+
```env
|
266
|
+
export OPENAI_API_KEY=.... # Required for all agents
|
267
|
+
export NVIDIA_API_KEY=.... # Required for all agents
|
268
|
+
export ZOTERO_API_KEY=.... # Required for T2S
|
269
|
+
export ZOTERO_USER_ID=.... # Required for T2S
|
270
|
+
export LANGCHAIN_TRACING_V2=true # Optional for all agents
|
271
|
+
export LANGCHAIN_API_KEY=... # Optional for all agents
|
272
|
+
```
|
273
|
+
|
274
|
+
To use **Talk2AIAgents4Pharma**, **Talk2BioModels**, **Talk2KnowledgeGraphs**, or **Talk2Scholars**, you need a free **NVIDIA API key**. Create an account and apply for free credits [here](https://build.nvidia.com/explore/discover).
|
275
|
+
|
276
|
+
Only for **Talk2Scholars**, you also need a **Zotero API key**, which you can generate [here](https://www.zotero.org/user/login#applications). _(For all other agents, the Zotero key is not required.)_
|
277
|
+
|
278
|
+
To use **Talk2Scholars**, you must have **FAISS** installed through **Conda**. Follow installation instructions for your OS [here](https://github.com/VirtualPatientEngine/AIAgents4Pharma/tree/main/aiagents4pharma/talk2scholars/install.md).
|
279
|
+
|
280
|
+
To use **Talk2AIAgents4Pharma** or **Talk2KnowledgeGraphs**, you must have **Ollama** installed. Follow installation instructions for your OS [here](https://ollama.com/download).
|
281
|
+
|
282
|
+
After installing, pull the `nomic-embed-text` model and start the server by running:
|
283
|
+
|
284
|
+
```sh
|
285
|
+
ollama pull nomic-embed-text && ollama serve
|
286
|
+
```
|
287
|
+
|
288
|
+
More details about the model are available [here](https://ollama.com/library/nomic-embed-text).
|
289
|
+
|
290
|
+
Additionally on **Windows**, the `pcst_fast 1.0.10` library requires **Microsoft Visual C++ 14.0 or greater**.
|
291
|
+
You can download the **Microsoft C++ Build Tools** [here](https://visualstudio.microsoft.com/visual-cpp-build-tools/).
|
292
|
+
|
293
|
+
**LangSmith** support is optional. To enable it, create an API key [here](https://docs.smith.langchain.com/administration/how_to_guides/organization_management/create_account_api_key).
|
294
|
+
|
295
|
+
_Please note that this will create a new tracing project in your Langsmith
|
296
|
+
account with the name `T2X-xxxx`, where `X` can be `AA4P` (Main Agent),
|
297
|
+
`B` (Biomodels), `S` (Scholars), `KG` (KnowledgeGraphs), or `C` (Cells).
|
298
|
+
If you skip the previous step, it will default to the name `default`.
|
299
|
+
`xxxx` will be the 4-digit ID created for the session._
|
300
|
+
|
301
|
+
4. **Launch the app:**
|
302
|
+
```sh
|
303
|
+
streamlit run app/frontend/streamlit_app_<agent>.py
|
304
|
+
```
|
305
|
+
_Replace `<agent>` with the agent name you are interested to launch:_
|
306
|
+
|
307
|
+
- `talk2aiagents4pharma`
|
308
|
+
- `talk2biomodels`
|
309
|
+
- `talk2knowledgegraphs`
|
310
|
+
- `talk2scholars`
|
311
|
+
- `talk2cells`
|
312
|
+
|
313
|
+
For detailed instructions on each agent, please refer to their respective modules.
|
314
|
+
|
315
|
+
#### Option 3: pip (beta-release)
|
316
|
+
|
317
|
+

|
318
|
+
|
319
|
+
```sh
|
320
|
+
pip install aiagents4pharma
|
321
|
+
```
|
322
|
+
|
323
|
+
Check out the tutorials on each agent for detailed instructions.
|
324
|
+
|
325
|
+
## Contributing
|
326
|
+
|
327
|
+
We welcome your support to make **AIAgents4Pharma** even better.
|
328
|
+
All types of contributions are appreciated — whether you're fixing bugs, adding features, improving documentation, or helping with testing, every contribution is valuable.
|
329
|
+
|
330
|
+
#### How to contribute
|
331
|
+
|
332
|
+
1. Star this repository to show your support.
|
333
|
+
2. Fork the repository.
|
334
|
+
3. Create a new branch for your work:
|
335
|
+
```sh
|
336
|
+
git checkout -b feat/your-feature-name
|
337
|
+
```
|
338
|
+
4. Make your changes and commit them:
|
339
|
+
```sh
|
340
|
+
git commit -m "feat: add a brief description of your change"
|
341
|
+
```
|
342
|
+
5. Push your branch:
|
343
|
+
```sh
|
344
|
+
git push origin feat/your-feature-name
|
345
|
+
```
|
346
|
+
6. Open a Pull Request.
|
347
|
+
|
348
|
+
#### Areas where you can help
|
349
|
+
|
350
|
+
- Beta testing for Talk2BioModels and Talk2Scholars.
|
351
|
+
- Development work related to Python, bioinformatics, or knowledge graphs.
|
352
|
+
|
353
|
+
#### Contacts for contributions
|
354
|
+
|
355
|
+
- **Talk2Biomodels / Talk2Cells**: [@gurdeep330](https://github.com/gurdeep330), [@lilijap](https://github.com/lilijap), [@dmccloskey](https://github.com/dmccloskey)
|
356
|
+
- **Talk2KnowledgeGraphs**: [@awmulyadi](https://github.com/awmulyadi), [@dmccloskey](https://github.com/dmccloskey)
|
357
|
+
- **Talk2Scholars**: [@ansh-info](https://github.com/ansh-info), [@gurdeep330](https://github.com/gurdeep330), [@dmccloskey](https://github.com/dmccloskey)
|
358
|
+
|
359
|
+
Please refer to our [CONTRIBUTING.md](CONTRIBUTING.md) for more detailed contribution guidelines.
|
360
|
+
|
361
|
+
## Feedback
|
362
|
+
|
363
|
+
If you have questions, bug reports, feature requests, comments, or suggestions, we would love to hear from you.
|
364
|
+
Please open an `issue` or start a `discussion`
|
@@ -136,23 +136,23 @@ aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py,sha256=7gwwtf
|
|
136
136
|
aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py,sha256=m5p0yoJb7I19ua5yeQfXPf7c4r6S1XPwttsrM7Qoy94,9336
|
137
137
|
aiagents4pharma/talk2scholars/__init__.py,sha256=NOZxTklAH1j1ggu97Ib8Xn9LCKudEWt-8dx8w7yxVD8,180
|
138
138
|
aiagents4pharma/talk2scholars/agents/__init__.py,sha256=c_0Pk85bt-RfK5RMyALM3MXo3qXVMoYS7BOqM9wuFME,317
|
139
|
-
aiagents4pharma/talk2scholars/agents/main_agent.py,sha256=
|
140
|
-
aiagents4pharma/talk2scholars/agents/paper_download_agent.py,sha256=
|
141
|
-
aiagents4pharma/talk2scholars/agents/pdf_agent.py,sha256=
|
142
|
-
aiagents4pharma/talk2scholars/agents/s2_agent.py,sha256=
|
143
|
-
aiagents4pharma/talk2scholars/agents/zotero_agent.py,sha256=
|
139
|
+
aiagents4pharma/talk2scholars/agents/main_agent.py,sha256=oCSWPj3TUgTIERmYbBTYipNrU1g956LXJEUx-7-KAQ0,3354
|
140
|
+
aiagents4pharma/talk2scholars/agents/paper_download_agent.py,sha256=pYHW3R7VQjRA3PhgWGQYI3ErfdILYQ0FM1WGXii3r1k,2996
|
141
|
+
aiagents4pharma/talk2scholars/agents/pdf_agent.py,sha256=GEXzJMQxIeZ7zLP-AlnTMU-n_KXZ7g22Qd9L3USIc_4,3626
|
142
|
+
aiagents4pharma/talk2scholars/agents/s2_agent.py,sha256=oui0CMSyXmBGBJ7LnYq8Ce0V8Qc3BS6GgH5Qx5wI6oM,4565
|
143
|
+
aiagents4pharma/talk2scholars/agents/zotero_agent.py,sha256=NAmEURIhH-sjXGO-dqAigUA10m-Re9Qe1hY8db4CIP0,4370
|
144
144
|
aiagents4pharma/talk2scholars/configs/__init__.py,sha256=Y9-4PxsNCMoxyyQgDSbPByJnO9wnyem5SYL3eOZt1HY,189
|
145
145
|
aiagents4pharma/talk2scholars/configs/config.yaml,sha256=-8X0_gTmjEuXAeIrnppw3Npy8HICelHZOvTKEScI-rs,596
|
146
146
|
aiagents4pharma/talk2scholars/configs/agents/__init__.py,sha256=plv5Iw34gvbGZbRyJapvoOiiFXekRQIwjV_yy5AR_SI,104
|
147
147
|
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py,sha256=D94LW4cXLmJe4dNl5qoR9QN0JnBqGLbQDgDLqhCNUE0,213
|
148
148
|
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
|
149
|
-
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml,sha256=
|
149
|
+
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml,sha256=EmUAxeQSnH4U5Op5_XOzCbcexDCp-Rpz3z0yVPRtQUg,1315
|
150
150
|
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
|
151
151
|
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
|
152
152
|
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
|
153
|
-
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml,sha256=
|
153
|
+
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml,sha256=_sSt2jCgSILwrXkywDAxkXONCZn896owLBaf46iFI0I,1323
|
154
154
|
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
|
155
|
-
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml,sha256=
|
155
|
+
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml,sha256=SOdjRiGUxjW9JeCRDd_U1RjCclItkoPODrj5RpIrxSY,2030
|
156
156
|
aiagents4pharma/talk2scholars/configs/app/__init__.py,sha256=tXpOW3R4eAfNoqvoaHfabSG-DcMHmUGSTg_4zH_vlgw,94
|
157
157
|
aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
|
158
158
|
aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml,sha256=A6nYjrgzEyRv5JYsGN7oqNX4-tufMBZ6mg-A7bMX6V4,906
|
@@ -172,39 +172,37 @@ aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml,sha256=ifOt
|
|
172
172
|
aiagents4pharma/talk2scholars/configs/tools/zotero_write/__inti__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
|
173
173
|
aiagents4pharma/talk2scholars/configs/tools/zotero_write/default.yaml,sha256=gB7y7pznviQUzu49Eu4ONNkjQjT8wPKNSw6S_vfd9kI,1222
|
174
174
|
aiagents4pharma/talk2scholars/state/__init__.py,sha256=ReScKLpEvedq4P6ww52NRQS0Xr6SSQV7hqoQ83Mt75U,138
|
175
|
-
aiagents4pharma/talk2scholars/state/state_talk2scholars.py,sha256=
|
175
|
+
aiagents4pharma/talk2scholars/state/state_talk2scholars.py,sha256=MGB-rWjbOpLN-pK3nY9YKAuskcjeR62rjXbZl1Ppjas,2836
|
176
176
|
aiagents4pharma/talk2scholars/tests/__init__.py,sha256=U3PsTiUZaUBD1IZanFGkDIOdFieDVJtGKQ5-woYUo8c,45
|
177
177
|
aiagents4pharma/talk2scholars/tests/test_llm_main_integration.py,sha256=FBRqS06IKJYFOudQEHQr-9oJ4tftkH-gTCowTAqwWSg,3686
|
178
|
-
aiagents4pharma/talk2scholars/tests/test_main_agent.py,sha256=
|
179
|
-
aiagents4pharma/talk2scholars/tests/test_paper_download_agent.py,sha256=
|
180
|
-
aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py,sha256=
|
181
|
-
aiagents4pharma/talk2scholars/tests/test_pdf_agent.py,sha256=
|
182
|
-
aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py,sha256=
|
178
|
+
aiagents4pharma/talk2scholars/tests/test_main_agent.py,sha256=IZYSocYVwqPil2lF6L07mKm8PUq7vjopmqNiCm6IJEA,6876
|
179
|
+
aiagents4pharma/talk2scholars/tests/test_paper_download_agent.py,sha256=gKSQp-sw62FplNnGYW0wv2ZIUEefh3o0tFWbRzy9yLs,5068
|
180
|
+
aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py,sha256=3mycLeEgH5XkwxuoXfTpQb8c8xFtIX2HjVnACPrSf60,7141
|
181
|
+
aiagents4pharma/talk2scholars/tests/test_pdf_agent.py,sha256=scGCTgka2JuoUhzZwzDn0OgIYihOLhXbwb5uGFR02aI,4302
|
182
|
+
aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py,sha256=_zzg4_XVVEuvYDsJ5la0kFLf9dT45P67-UnUZWDUkhY,34874
|
183
183
|
aiagents4pharma/talk2scholars/tests/test_routing_logic.py,sha256=g79tG68ZrUOL3-duCCJwvFK6OieR5KedRf3yTUDqIFk,2784
|
184
|
-
aiagents4pharma/talk2scholars/tests/test_s2_agent.py,sha256=
|
185
|
-
aiagents4pharma/talk2scholars/tests/test_s2_display.py,sha256=
|
184
|
+
aiagents4pharma/talk2scholars/tests/test_s2_agent.py,sha256=xvlPU4Lz_DdQLTpdtoHW9l_AMvFrzC-FXE5royGbtLM,7806
|
185
|
+
aiagents4pharma/talk2scholars/tests/test_s2_display.py,sha256=TfJE74KsocAHLbitMLjVrfUwAwyIYpzEvkdrQMBzM2g,2263
|
186
186
|
aiagents4pharma/talk2scholars/tests/test_s2_multi.py,sha256=VCTfexhtX7FgWOBS0YtSm1zghbByZnni1NBLGVTJVGI,11166
|
187
|
-
aiagents4pharma/talk2scholars/tests/test_s2_query.py,sha256=
|
187
|
+
aiagents4pharma/talk2scholars/tests/test_s2_query.py,sha256=_pDVolOmhrjZnh37Ig97-LcDHUe0lm3GvTWjNDKgMkc,2461
|
188
188
|
aiagents4pharma/talk2scholars/tests/test_s2_retrieve.py,sha256=YtA2nbPRtoSR7mPqEjqLF5ERGVzTfeULztsNoCI48X8,2003
|
189
189
|
aiagents4pharma/talk2scholars/tests/test_s2_search.py,sha256=mCGpoCYVn0SJ9BPcEjTz2MLy_K2XJIxvPngwsMoKijA,9945
|
190
190
|
aiagents4pharma/talk2scholars/tests/test_s2_single.py,sha256=KjSh7V2cl1IuO_M9O6dj0vnMHr13H-xKxia_ZgT4qag,10313
|
191
191
|
aiagents4pharma/talk2scholars/tests/test_state.py,sha256=_iHXvoZnU_eruf8l1sQKBSCIVnxNkH_9VzkVtZZA6bY,384
|
192
|
-
aiagents4pharma/talk2scholars/tests/test_zotero_agent.py,sha256=
|
192
|
+
aiagents4pharma/talk2scholars/tests/test_zotero_agent.py,sha256=jFEtfQVEwEQ6v3kq7A1_p2MKCu5wbtX47V4bE-fKD6M,6158
|
193
193
|
aiagents4pharma/talk2scholars/tests/test_zotero_human_in_the_loop.py,sha256=YelLQu9Y_r1SNQsC1xoLHJoJ3soIZtBt1MFbbNhY-Dg,10744
|
194
|
-
aiagents4pharma/talk2scholars/tests/test_zotero_path.py,sha256=
|
195
|
-
aiagents4pharma/talk2scholars/tests/test_zotero_read.py,sha256=
|
194
|
+
aiagents4pharma/talk2scholars/tests/test_zotero_path.py,sha256=Ko0HyXCrpm-vs8Bkf-syxp3MfL1IvZwXXgPExyQy_F8,18618
|
195
|
+
aiagents4pharma/talk2scholars/tests/test_zotero_read.py,sha256=yQTksJhqW036Scs7pnc_bBC23N210mcjaZ6sJZl8QnM,29492
|
196
196
|
aiagents4pharma/talk2scholars/tests/test_zotero_write.py,sha256=qWlO0XoZJ6vxUxgisjYv9Np87CoTEDxiQBEOhdj9foo,6111
|
197
197
|
aiagents4pharma/talk2scholars/tools/__init__.py,sha256=c8pYHDqR9P0Frz2jWjbvyizfSTBMlMFzGsiQzx2KC9c,189
|
198
|
-
aiagents4pharma/talk2scholars/tools/paper_download/__init__.py,sha256=
|
199
|
-
aiagents4pharma/talk2scholars/tools/paper_download/
|
200
|
-
aiagents4pharma/talk2scholars/tools/paper_download/arxiv_downloader.py,sha256=hM9fdbwtOxuW1mpAfmfbILTI7kSVALgrGpjC2vMsvf8,3970
|
201
|
-
aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py,sha256=zndAnNFRBztuBK-tpW9UyYsGL8tB3gFjYhiTq6nzZu4,2203
|
198
|
+
aiagents4pharma/talk2scholars/tools/paper_download/__init__.py,sha256=tNTLSPNdir4XSKRF0HjXI_tBGBXXXwDhWRI5VnwbZpM,214
|
199
|
+
aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py,sha256=WTWvXbh0C96OoMoPf8Bgu0AgorsdkWslac_WqlHc4bo,3900
|
202
200
|
aiagents4pharma/talk2scholars/tools/pdf/__init__.py,sha256=DPpOfON3AySko5EBBAe_3udOoSaAdQWNyGeNvJyV5R8,138
|
203
|
-
aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py,sha256=
|
204
|
-
aiagents4pharma/talk2scholars/tools/s2/__init__.py,sha256=
|
205
|
-
aiagents4pharma/talk2scholars/tools/s2/
|
201
|
+
aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py,sha256=RfICBn4VorvpTrb_GunFFAi6fnzUlees_k0poQm0VKc,21853
|
202
|
+
aiagents4pharma/talk2scholars/tools/s2/__init__.py,sha256=w_eiw0pG8HNp79F9O_icXs_Yl_4odsmagYNKDTjIsvk,428
|
203
|
+
aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py,sha256=YtnCrI0c3Fhi68R6ndPUnVM3E5u7CuBB_myIzLN6nXg,3040
|
206
204
|
aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py,sha256=N6GwG3oCQFEcntpjTQObAELzM5OpZq0u9J9-gUWU2kc,2716
|
207
|
-
aiagents4pharma/talk2scholars/tools/s2/
|
205
|
+
aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py,sha256=inaWWctaylJAJsXinQA63qPs5n-gn7axJz8ijj66Jmw,2746
|
208
206
|
aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py,sha256=llzMMnEQKeYVamJbF4_DTMx-BgVe79vwDcUIFGLrmUY,2615
|
209
207
|
aiagents4pharma/talk2scholars/tools/s2/search.py,sha256=NGzo1rF5VJJuZJbSLDwy2f220wSh7DWEw6xT1qQA2V0,2452
|
210
208
|
aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py,sha256=7VivBGHcmaJZN7v7gYwddC-rfrDHaZo74pSNBYlJ2xU,2673
|
@@ -213,16 +211,16 @@ aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py,sha256=rrR0DRNeGHpY
|
|
213
211
|
aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py,sha256=_eP7q4ZTSWisEF4Stffe-IpR2MD9WrQ0u3jbbeJBRLU,6363
|
214
212
|
aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py,sha256=ahTDT0lp5VRZS5hLL3-hsHx4wB3LUVY2OBTCTEJyR3Y,6983
|
215
213
|
aiagents4pharma/talk2scholars/tools/zotero/__init__.py,sha256=wXiQILLq-utV35PkDUpm_F074mG9yRMyGQAFlr9UAOw,197
|
216
|
-
aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py,sha256=
|
214
|
+
aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py,sha256=RqFXP2DXmkHLhVLirrTnmSk-E8Jipi4ue_Zw65npbnM,2263
|
217
215
|
aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py,sha256=iqwpolg7GWAjXizubLrPaAsgOpsOhKz-tFRyLOiBvC0,6325
|
218
216
|
aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py,sha256=KnDcnUBB0lwMcxNpC3hsVnICWkj23MDAePdHlK-Kekk,3024
|
219
217
|
aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py,sha256=uIyKZSFB07-zd3vjS9ABL0r6fdBX9JHw60j8oUfxHQs,209
|
220
|
-
aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py,sha256=
|
218
|
+
aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py,sha256=lyrfpx8NHYiAN1qQSJWqPka7cML5BASwRXaI66fb-u8,13662
|
221
219
|
aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py,sha256=IPD1V9yrBYaDnRe7sR6PrpwR82OBJbA2P_Tc6RbxAbM,2748
|
222
220
|
aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py,sha256=ALwLecy1QVebbsmXJiDj1GhGmyhq2R2tZlAyEl1vfhw,7410
|
223
221
|
aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py,sha256=oIrfbOySgts50ksHKyjcWjRkPRIS88g3Lc0v9mBkU8w,6375
|
224
|
-
aiagents4pharma-1.
|
225
|
-
aiagents4pharma-1.
|
226
|
-
aiagents4pharma-1.
|
227
|
-
aiagents4pharma-1.
|
228
|
-
aiagents4pharma-1.
|
222
|
+
aiagents4pharma-1.32.0.dist-info/licenses/LICENSE,sha256=IcIbyB1Hyk5ZDah03VNQvJkbNk2hkBCDqQ8qtnCvB4Q,1077
|
223
|
+
aiagents4pharma-1.32.0.dist-info/METADATA,sha256=o3uHTzLfOPglRJUlK6_vKAuJ0qwKhGt7BeiQHeF4U_o,16043
|
224
|
+
aiagents4pharma-1.32.0.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
|
225
|
+
aiagents4pharma-1.32.0.dist-info/top_level.txt,sha256=-AH8rMmrSnJtq7HaAObS78UU-cTCwvX660dSxeM7a0A,16
|
226
|
+
aiagents4pharma-1.32.0.dist-info/RECORD,,
|
@@ -1,45 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Abstract Base Class for Paper Downloaders.
|
3
|
-
|
4
|
-
This module defines the `AbstractPaperDownloader` class, which serves as a
|
5
|
-
base class for downloading scholarly papers from different sources
|
6
|
-
(e.g., arXiv, PubMed, IEEE Xplore). Any specific downloader should
|
7
|
-
inherit from this class and implement its methods.
|
8
|
-
"""
|
9
|
-
|
10
|
-
from abc import ABC, abstractmethod
|
11
|
-
from typing import Any, Dict
|
12
|
-
|
13
|
-
|
14
|
-
class AbstractPaperDownloader(ABC):
|
15
|
-
"""
|
16
|
-
Abstract base class for scholarly paper downloaders.
|
17
|
-
|
18
|
-
This is designed to be extended for different paper sources
|
19
|
-
like arXiv, PubMed, IEEE Xplore, etc. Each implementation
|
20
|
-
must define methods for fetching metadata and downloading PDFs.
|
21
|
-
"""
|
22
|
-
|
23
|
-
@abstractmethod
|
24
|
-
def fetch_metadata(self, paper_id: str) -> Dict[str, Any]:
|
25
|
-
"""
|
26
|
-
Fetch metadata for a given paper ID.
|
27
|
-
|
28
|
-
Args:
|
29
|
-
paper_id (str): The unique identifier for the paper.
|
30
|
-
|
31
|
-
Returns:
|
32
|
-
Dict[str, Any]: The metadata dictionary (format depends on the data source).
|
33
|
-
"""
|
34
|
-
|
35
|
-
@abstractmethod
|
36
|
-
def download_pdf(self, paper_id: str) -> bytes:
|
37
|
-
"""
|
38
|
-
Download the PDF for a given paper ID.
|
39
|
-
|
40
|
-
Args:
|
41
|
-
paper_id (str): The unique identifier for the paper.
|
42
|
-
|
43
|
-
Returns:
|
44
|
-
bytes: The binary content of the downloaded PDF.
|
45
|
-
"""
|
@@ -1,115 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Arxiv Paper Downloader
|
3
|
-
|
4
|
-
This module provides an implementation of `AbstractPaperDownloader` for arXiv.
|
5
|
-
It connects to the arXiv API, retrieves metadata for a research paper, and
|
6
|
-
downloads the corresponding PDF.
|
7
|
-
|
8
|
-
By using an abstract base class, this implementation is extendable to other
|
9
|
-
APIs like PubMed, IEEE Xplore, etc.
|
10
|
-
"""
|
11
|
-
|
12
|
-
import xml.etree.ElementTree as ET
|
13
|
-
from typing import Any, Dict
|
14
|
-
import logging
|
15
|
-
import hydra
|
16
|
-
import requests
|
17
|
-
from .abstract_downloader import AbstractPaperDownloader
|
18
|
-
|
19
|
-
# Configure logging
|
20
|
-
logging.basicConfig(level=logging.INFO)
|
21
|
-
logger = logging.getLogger(__name__)
|
22
|
-
|
23
|
-
|
24
|
-
class ArxivPaperDownloader(AbstractPaperDownloader):
|
25
|
-
"""
|
26
|
-
Downloader class for arXiv.
|
27
|
-
|
28
|
-
This class interfaces with the arXiv API to fetch metadata
|
29
|
-
and retrieve PDFs of academic papers based on their arXiv IDs.
|
30
|
-
"""
|
31
|
-
|
32
|
-
def __init__(self):
|
33
|
-
"""
|
34
|
-
Initializes the arXiv paper downloader.
|
35
|
-
|
36
|
-
Uses Hydra for configuration management to retrieve API details.
|
37
|
-
"""
|
38
|
-
with hydra.initialize(version_base=None, config_path="../../configs"):
|
39
|
-
cfg = hydra.compose(
|
40
|
-
config_name="config", overrides=["tools/download_arxiv_paper=default"]
|
41
|
-
)
|
42
|
-
self.api_url = cfg.tools.download_arxiv_paper.api_url
|
43
|
-
self.request_timeout = cfg.tools.download_arxiv_paper.request_timeout
|
44
|
-
self.chunk_size = cfg.tools.download_arxiv_paper.chunk_size
|
45
|
-
self.pdf_base_url = cfg.tools.download_arxiv_paper.pdf_base_url
|
46
|
-
|
47
|
-
def fetch_metadata(self, paper_id: str) -> Dict[str, Any]:
|
48
|
-
"""
|
49
|
-
Fetch metadata from arXiv for a given paper ID.
|
50
|
-
|
51
|
-
Args:
|
52
|
-
paper_id (str): The arXiv ID of the paper.
|
53
|
-
|
54
|
-
Returns:
|
55
|
-
Dict[str, Any]: A dictionary containing metadata, including the XML response.
|
56
|
-
"""
|
57
|
-
logger.info("Fetching metadata from arXiv for paper ID: %s", paper_id)
|
58
|
-
api_url = f"{self.api_url}?search_query=id:{paper_id}&start=0&max_results=1"
|
59
|
-
response = requests.get(api_url, timeout=self.request_timeout)
|
60
|
-
response.raise_for_status()
|
61
|
-
return {"xml": response.text}
|
62
|
-
|
63
|
-
def download_pdf(self, paper_id: str) -> Dict[str, Any]:
|
64
|
-
"""
|
65
|
-
Download the PDF of a paper from arXiv.
|
66
|
-
|
67
|
-
This function first retrieves the paper's metadata to locate the PDF link
|
68
|
-
before downloading the file.
|
69
|
-
|
70
|
-
Args:
|
71
|
-
paper_id (str): The arXiv ID of the paper.
|
72
|
-
|
73
|
-
Returns:
|
74
|
-
Dict[str, Any]: A dictionary containing:
|
75
|
-
- `pdf_object`: The binary content of the downloaded PDF.
|
76
|
-
- `pdf_url`: The URL from which the PDF was fetched.
|
77
|
-
- `arxiv_id`: The arXiv ID of the downloaded paper.
|
78
|
-
"""
|
79
|
-
metadata = self.fetch_metadata(paper_id)
|
80
|
-
|
81
|
-
# Parse the XML response to locate the PDF link.
|
82
|
-
root = ET.fromstring(metadata["xml"])
|
83
|
-
ns = {"atom": "http://www.w3.org/2005/Atom"}
|
84
|
-
pdf_url = next(
|
85
|
-
(
|
86
|
-
link.attrib.get("href")
|
87
|
-
for entry in root.findall("atom:entry", ns)
|
88
|
-
for link in entry.findall("atom:link", ns)
|
89
|
-
if link.attrib.get("title") == "pdf"
|
90
|
-
),
|
91
|
-
None,
|
92
|
-
)
|
93
|
-
|
94
|
-
if not pdf_url:
|
95
|
-
raise RuntimeError(f"Failed to download PDF for arXiv ID {paper_id}.")
|
96
|
-
|
97
|
-
logger.info("Downloading PDF from: %s", pdf_url)
|
98
|
-
pdf_response = requests.get(pdf_url, stream=True, timeout=self.request_timeout)
|
99
|
-
pdf_response.raise_for_status()
|
100
|
-
# print (pdf_response)
|
101
|
-
|
102
|
-
# Combine the PDF data from chunks.
|
103
|
-
pdf_object = b"".join(
|
104
|
-
chunk
|
105
|
-
for chunk in pdf_response.iter_content(chunk_size=self.chunk_size)
|
106
|
-
if chunk
|
107
|
-
)
|
108
|
-
# print (pdf_object)
|
109
|
-
print("PDF_URL", pdf_url)
|
110
|
-
|
111
|
-
return {
|
112
|
-
"pdf_object": pdf_object,
|
113
|
-
"pdf_url": pdf_url,
|
114
|
-
"arxiv_id": paper_id,
|
115
|
-
}
|