aiagents4pharma 1.30.4__py3-none-any.whl → 1.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. aiagents4pharma/talk2scholars/agents/main_agent.py +4 -3
  2. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +3 -4
  3. aiagents4pharma/talk2scholars/agents/pdf_agent.py +6 -7
  4. aiagents4pharma/talk2scholars/agents/s2_agent.py +23 -20
  5. aiagents4pharma/talk2scholars/agents/zotero_agent.py +11 -11
  6. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +19 -19
  7. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +20 -15
  8. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +27 -6
  9. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +7 -7
  10. aiagents4pharma/talk2scholars/tests/test_main_agent.py +16 -16
  11. aiagents4pharma/talk2scholars/tests/test_paper_download_agent.py +17 -24
  12. aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py +152 -135
  13. aiagents4pharma/talk2scholars/tests/test_pdf_agent.py +9 -16
  14. aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +790 -218
  15. aiagents4pharma/talk2scholars/tests/test_s2_agent.py +9 -9
  16. aiagents4pharma/talk2scholars/tests/test_s2_display.py +8 -8
  17. aiagents4pharma/talk2scholars/tests/test_s2_query.py +8 -8
  18. aiagents4pharma/talk2scholars/tests/test_zotero_agent.py +12 -12
  19. aiagents4pharma/talk2scholars/tests/test_zotero_path.py +11 -12
  20. aiagents4pharma/talk2scholars/tests/test_zotero_read.py +400 -22
  21. aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +0 -6
  22. aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +89 -31
  23. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +540 -156
  24. aiagents4pharma/talk2scholars/tools/s2/__init__.py +4 -4
  25. aiagents4pharma/talk2scholars/tools/s2/{display_results.py → display_dataframe.py} +19 -21
  26. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +71 -0
  27. aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +213 -35
  28. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +3 -3
  29. aiagents4pharma-1.32.0.dist-info/METADATA +364 -0
  30. {aiagents4pharma-1.30.4.dist-info → aiagents4pharma-1.32.0.dist-info}/RECORD +33 -35
  31. {aiagents4pharma-1.30.4.dist-info → aiagents4pharma-1.32.0.dist-info}/WHEEL +1 -1
  32. aiagents4pharma/talk2scholars/tools/paper_download/abstract_downloader.py +0 -45
  33. aiagents4pharma/talk2scholars/tools/paper_download/arxiv_downloader.py +0 -115
  34. aiagents4pharma/talk2scholars/tools/s2/query_results.py +0 -61
  35. aiagents4pharma-1.30.4.dist-info/METADATA +0 -334
  36. {aiagents4pharma-1.30.4.dist-info → aiagents4pharma-1.32.0.dist-info}/licenses/LICENSE +0 -0
  37. {aiagents4pharma-1.30.4.dist-info → aiagents4pharma-1.32.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,364 @@
1
+ Metadata-Version: 2.4
2
+ Name: aiagents4pharma
3
+ Version: 1.32.0
4
+ Summary: AI Agents for drug discovery, drug development, and other pharmaceutical R&D.
5
+ Classifier: Programming Language :: Python :: 3
6
+ Classifier: License :: OSI Approved :: MIT License
7
+ Classifier: Operating System :: OS Independent
8
+ Requires-Python: >=3.12
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Requires-Dist: copasi_basico==0.78
12
+ Requires-Dist: coverage==7.6.4
13
+ Requires-Dist: einops==0.8.0
14
+ Requires-Dist: gdown==5.2.0
15
+ Requires-Dist: gravis==0.1.0
16
+ Requires-Dist: huggingface_hub==0.26.5
17
+ Requires-Dist: hydra-core==1.3.2
18
+ Requires-Dist: joblib==1.4.2
19
+ Requires-Dist: langchain==0.3.7
20
+ Requires-Dist: langchain-community==0.3.5
21
+ Requires-Dist: langchain-core==0.3.40
22
+ Requires-Dist: langchain-experimental==0.3.3
23
+ Requires-Dist: langchain-nvidia-ai-endpoints==0.3.9
24
+ Requires-Dist: langchain-openai==0.2.5
25
+ Requires-Dist: langchain_ollama==0.2.3
26
+ Requires-Dist: langgraph_supervisor==0.0.9
27
+ Requires-Dist: matplotlib==3.9.2
28
+ Requires-Dist: openai==1.59.4
29
+ Requires-Dist: ollama==0.4.7
30
+ Requires-Dist: pandas==2.2.3
31
+ Requires-Dist: pcst_fast==1.0.10
32
+ Requires-Dist: plotly==5.24.1
33
+ Requires-Dist: pubchempy==1.0.4
34
+ Requires-Dist: pydantic==2.9.2
35
+ Requires-Dist: pylint==3.3.1
36
+ Requires-Dist: pypdf==5.2.0
37
+ Requires-Dist: pytest==8.3.3
38
+ Requires-Dist: pytest-asyncio==0.25.2
39
+ Requires-Dist: pyzotero==1.6.9
40
+ Requires-Dist: streamlit==1.39.0
41
+ Requires-Dist: sentence_transformers==3.3.1
42
+ Requires-Dist: tabulate==0.9.0
43
+ Requires-Dist: torch==2.2.2
44
+ Requires-Dist: torch_geometric==2.6.1
45
+ Requires-Dist: transformers==4.48.0
46
+ Requires-Dist: mkdocs==1.6.1
47
+ Requires-Dist: mkdocs-jupyter==0.25.1
48
+ Requires-Dist: mkdocs-material==9.5.47
49
+ Requires-Dist: mkdocstrings-python==1.12.2
50
+ Requires-Dist: mkdocs-include-markdown-plugin==7.1.2
51
+ Requires-Dist: mkdocstrings==0.27.0
52
+ Requires-Dist: streamlit-feedback
53
+ Requires-Dist: anndata==0.11.3
54
+ Requires-Dist: h5py==3.13.0
55
+ Requires-Dist: igraph==0.11.8
56
+ Requires-Dist: ipykernel==6.29.5
57
+ Requires-Dist: ipython==8.32.0
58
+ Requires-Dist: nbformat==5.10.4
59
+ Requires-Dist: scipy==1.15.2
60
+ Requires-Dist: tqdm==4.67.1
61
+ Requires-Dist: umap-learn==0.5.7
62
+ Requires-Dist: plotly-express==0.4.1
63
+ Requires-Dist: seaborn==0.13.2
64
+ Requires-Dist: scanpy==1.11.0
65
+ Dynamic: license-file
66
+
67
+ [![Talk2BioModels](https://github.com/VirtualPatientEngine/AIAgents4Pharma/actions/workflows/tests_talk2biomodels.yml/badge.svg)](https://github.com/VirtualPatientEngine/AIAgents4Pharma/actions/workflows/tests_talk2biomodels.yml)
68
+ [![Talk2Cells](https://github.com/VirtualPatientEngine/AIAgents4Pharma/actions/workflows/tests_talk2cells.yml/badge.svg)](https://github.com/VirtualPatientEngine/AIAgents4Pharma/actions/workflows/tests_talk2cells.yml)
69
+ [![Talk2KnowledgeGraphs](https://github.com/VirtualPatientEngine/AIAgents4Pharma/actions/workflows/tests_talk2knowledgegraphs.yml/badge.svg)](https://github.com/VirtualPatientEngine/AIAgents4Pharma/actions/workflows/tests_talk2knowledgegraphs.yml)
70
+ [![TESTS Talk2Scholars](https://github.com/VirtualPatientEngine/AIAgents4Pharma/actions/workflows/tests_talk2scholars.yml/badge.svg)](https://github.com/VirtualPatientEngine/AIAgents4Pharma/actions/workflows/tests_talk2scholars.yml)
71
+ [![TESTS Talk2AIAgents4Pharma](https://github.com/VirtualPatientEngine/AIAgents4Pharma/actions/workflows/tests_talk2aiagents4pharma.yml/badge.svg)](https://github.com/VirtualPatientEngine/AIAgents4Pharma/actions/workflows/tests_talk2aiagents4pharma.yml)
72
+ ![GitHub Release](https://img.shields.io/github/v/release/VirtualPatientEngine/AIAgents4Pharma)
73
+ ![Python Version from PEP 621 TOML](https://img.shields.io/python/required-version-toml?tomlFilePath=https%3A%2F%2Fraw.githubusercontent.com%2FVirtualPatientEngine%2FAIAgents4Pharma%2Frefs%2Fheads%2Fmain%2Fpyproject.toml)
74
+ ![Talk2AIAgents4Pharma Pulls](https://img.shields.io/docker/pulls/virtualpatientengine/talk2aiagents4pharma?label=Talk2AIAgents4Pharma%20Pulls&color=blue&logo=docker&style=flat-square)
75
+ ![Talk2Scholars Pulls](https://img.shields.io/docker/pulls/virtualpatientengine/talk2scholars?label=Talk2Scholars%20Pulls&color=blue&logo=docker&style=flat-square)
76
+ ![Talk2BioModels Pulls](https://img.shields.io/docker/pulls/virtualpatientengine/talk2biomodels?label=Talk2BioModels%20Pulls&color=blue&logo=docker&style=flat-square)
77
+ ![Talk2KnowledgeGraphs Pulls](https://img.shields.io/docker/pulls/virtualpatientengine/talk2knowledgegraphs?label=Talk2KnowledgeGraphs%20Pulls&color=blue&logo=docker&style=flat-square)
78
+
79
+ ## Introduction
80
+
81
+ Welcome to **AIAgents4Pharma** – an open-source project by [Team VPE](https://bmedx.com/research-teams/artificial-intelligence/team-vpe/) that brings together AI-driven tools to help researchers and pharma interact seamlessly with complex biological data.
82
+
83
+ Our toolkit currently consists of the following agents:
84
+
85
+ - **Talk2BioModels** _(v1 released; v2 in progress)_: Engage directly with mathematical models in systems biology.
86
+ - **Talk2KnowledgeGraphs** _(v1 in progress)_: Access and explore complex biological knowledge graphs for insightful data connections.
87
+ - **Talk2Scholars** _(v1 in progress)_: Get recommendations for articles related to your choice. Download, query, and write/retrieve them to your reference manager (currently supporting Zotero).
88
+ - **Talk2Cells** _(v1 in progress)_: Query and analyze sequencing data with ease.
89
+ - **Talk2AIAgents4Pharma** _(v1 in progress)_: Converse with all the agents above (currently supports T2B and T2KG)
90
+
91
+ ![AIAgents4Pharma](docs/assets/AIAgents4Pharma.png)
92
+
93
+ ## Getting Started
94
+
95
+ ### Installation
96
+
97
+ #### Option 1: Docker (stable-release)
98
+
99
+ _We now have all the agents available on Docker Hub._
100
+
101
+ ##### **To run Talk2AIAgents4Pharma / Talk2KnowledgeGraphs**
102
+
103
+ Both agents require [Ollama](https://ollama.com/) to run embedding models like `nomic-embed-text`. We use a **single startup script** that automatically detects your hardware (NVIDIA, AMD, or CPU) and handles container startup, model loading, and service orchestration.
104
+
105
+ ##### **1. Clone the repository and navigate to the agent directory**
106
+
107
+ ```sh
108
+ git clone https://github.com/VirtualPatientEngine/AIAgents4Pharma
109
+
110
+ cd AIAgents4Pharma/aiagents4pharma/<agent>
111
+ ```
112
+
113
+ Replace `<agent>` with either:
114
+
115
+ - `talk2aiagents4pharma`
116
+ - `talk2knowledgegraphs`
117
+
118
+ ##### **2. Setup environment variables**
119
+
120
+ Copy and configure your `.env` file:
121
+
122
+ ```sh
123
+ cp .env.example .env
124
+ ```
125
+
126
+ Then edit `.env` and add your API keys:
127
+
128
+ ```env
129
+ OPENAI_API_KEY=... # Required for both agents
130
+ NVIDIA_API_KEY=... # Required for both agents
131
+ OLLAMA_HOST=http://ollama:11434 # Required for AA4P / T2KG
132
+ LANGCHAIN_TRACING_V2=true # Optional for both agents
133
+ LANGCHAIN_API_KEY=... # Optional for both agents
134
+ ```
135
+
136
+ To use **Talk2AIAgents4Pharma** or **Talk2KnowledgeGraphs**, you need a free **NVIDIA API key**. Create an account and apply for free credits [here](https://build.nvidia.com/explore/discover).
137
+
138
+ ###### Notes for Windows Users
139
+
140
+ If you are using Windows, it is recommended to install **Git Bash** for a smoother experience when running the bash commands in this guide.
141
+
142
+ - For applications that use **Docker Compose**, Git Bash is **required**.
143
+ - For applications that use **docker run** manually, Git Bash is **optional**, but recommended for consistency.
144
+
145
+ You can download Git Bash here: [Git for Windows](https://git-scm.com/downloads).
146
+
147
+ When using Docker on Windows, make sure you **run Docker with administrative privileges** if you face permission issues.
148
+
149
+ To resolve for permission issues, you can:
150
+
151
+ - Review the official Docker documentation on [Windows permission requirements](https://docs.docker.com/desktop/setup/install/windows-permission-requirements/).
152
+ - Alternatively, follow the community discussion and solutions on [Docker Community Forums](https://forums.docker.com/t/error-when-trying-to-run-windows-containers-docker-client-must-be-run-with-elevated-privileges/136619).
153
+
154
+ **LangSmith** support is optional. To enable it, create an API key [here](https://docs.smith.langchain.com/administration/how_to_guides/organization_management/create_account_api_key).
155
+
156
+ ##### **3. Start the application**
157
+
158
+ Run the startup script. It will:
159
+
160
+ - Detect your hardware configuration (NVIDIA GPU, AMD GPU, or CPU). Apple Metal is unavailable inside Docker, and Intel SIMD optimizations are automatically handled without special configuration.
161
+ - Choose the correct Ollama image (`latest` or `rocm`)
162
+ - Launch the Ollama container with appropriate runtime settings
163
+ - Pull the required embedding model (`nomic-embed-text`)
164
+ - Start the agent **after the model is available**
165
+
166
+ ```sh
167
+ chmod +x startup.sh
168
+ ./startup.sh # Add --cpu flag to force CPU mode if needed
169
+ ```
170
+
171
+ ##### **4. Access the Web UI**
172
+
173
+ Once started, the agent is available at:
174
+
175
+ ```
176
+ http://localhost:8501
177
+ ```
178
+
179
+ ##### **To Run Talk2Biomodels / Talk2Scholars**
180
+
181
+ 1. **Run the containers**
182
+
183
+ ###### Talk2Biomodels
184
+
185
+ ```docker
186
+ docker run -d \
187
+ --name talk2biomodels \
188
+ -e OPENAI_API_KEY=<your_openai_api_key> \
189
+ -e NVIDIA_API_KEY=<your_nvidia_api_key> \
190
+ -p 8501:8501 \
191
+ virtualpatientengine/talk2biomodels
192
+ ```
193
+
194
+ ###### Talk2Scholars
195
+
196
+ ```docker
197
+ docker run -d \
198
+ --name talk2scholars \
199
+ -e OPENAI_API_KEY=<your_openai_api_key> \
200
+ -e ZOTERO_API_KEY=<your_zotero_api_key> \
201
+ -e ZOTERO_USER_ID=<your_zotero_user_id> \
202
+ -e NVIDIA_API_KEY=<your_nvidia_api_key> \
203
+ -p 8501:8501 \
204
+ virtualpatientengine/talk2scholars
205
+ ```
206
+
207
+ 2. **Access the Web App**
208
+ Open your browser and go to:
209
+
210
+ ```
211
+ http://localhost:8501
212
+ ```
213
+
214
+ To use **Talk2BioModels** or **Talk2Scholars**, you need a free **NVIDIA API key**. Create an account and apply for free credits [here](https://build.nvidia.com/explore/discover).
215
+
216
+ Only for **Talk2Scholars**, you also need a **Zotero API key**, which you can generate [here](https://www.zotero.org/user/login#applications). _(For all other agents, the Zotero key is not required.)_
217
+
218
+ If you are using docker on Windows, please follow these [Windows Setup Notes](#notes-for-windows-users).
219
+
220
+ **LangSmith** support is optional. To enable it, create an API key [here](https://docs.smith.langchain.com/administration/how_to_guides/organization_management/create_account_api_key).
221
+
222
+ #### Notes
223
+
224
+ - Be sure to **replace the placeholder values** with your actual credentials before running any container:
225
+
226
+ - `<your_openai_api_key>`
227
+ - `<your_nvidia_api_key>`
228
+ - `<your_zotero_api_key>`
229
+ - `<your_zotero_user_id>`
230
+
231
+ - All agents default to **port `8501`**. If you plan to run multiple agents simultaneously, make sure to assign **different ports** to avoid conflicts.
232
+
233
+ Example (Talk2Scholars on port `8502`):
234
+
235
+ ```docker
236
+ docker run -d \
237
+ --name talk2scholars \
238
+ -e OPENAI_API_KEY=<your_openai_api_key> \
239
+ -e ZOTERO_API_KEY=<your_zotero_api_key> \
240
+ -e ZOTERO_USER_ID=<your_zotero_user_id> \
241
+ -e NVIDIA_API_KEY=<your_nvidia_api_key> \
242
+ -p 8502:8501 \
243
+ virtualpatientengine/talk2scholars
244
+ ```
245
+
246
+ Then access the app at: [http://localhost:8502](http://localhost:8502)
247
+
248
+ #### Option 2: git (for developers and contributors)
249
+
250
+ ![Python Version from PEP 621 TOML](https://img.shields.io/python/required-version-toml?tomlFilePath=https%3A%2F%2Fraw.githubusercontent.com%2FVirtualPatientEngine%2FAIAgents4Pharma%2Frefs%2Fheads%2Fmain%2Fpyproject.toml)
251
+
252
+ 1. **Clone the repository:**
253
+ ```sh
254
+ git clone https://github.com/VirtualPatientEngine/AIAgents4Pharma
255
+ cd AIAgents4Pharma
256
+ ```
257
+ 2. **Install dependencies:**
258
+
259
+ ```python
260
+ pip install -r requirements.txt
261
+ ```
262
+
263
+ 3. **Initialize API Keys**
264
+
265
+ ```env
266
+ export OPENAI_API_KEY=.... # Required for all agents
267
+ export NVIDIA_API_KEY=.... # Required for all agents
268
+ export ZOTERO_API_KEY=.... # Required for T2S
269
+ export ZOTERO_USER_ID=.... # Required for T2S
270
+ export LANGCHAIN_TRACING_V2=true # Optional for all agents
271
+ export LANGCHAIN_API_KEY=... # Optional for all agents
272
+ ```
273
+
274
+ To use **Talk2AIAgents4Pharma**, **Talk2BioModels**, **Talk2KnowledgeGraphs**, or **Talk2Scholars**, you need a free **NVIDIA API key**. Create an account and apply for free credits [here](https://build.nvidia.com/explore/discover).
275
+
276
+ Only for **Talk2Scholars**, you also need a **Zotero API key**, which you can generate [here](https://www.zotero.org/user/login#applications). _(For all other agents, the Zotero key is not required.)_
277
+
278
+ To use **Talk2Scholars**, you must have **FAISS** installed through **Conda**. Follow installation instructions for your OS [here](https://github.com/VirtualPatientEngine/AIAgents4Pharma/tree/main/aiagents4pharma/talk2scholars/install.md).
279
+
280
+ To use **Talk2AIAgents4Pharma** or **Talk2KnowledgeGraphs**, you must have **Ollama** installed. Follow installation instructions for your OS [here](https://ollama.com/download).
281
+
282
+ After installing, pull the `nomic-embed-text` model and start the server by running:
283
+
284
+ ```sh
285
+ ollama pull nomic-embed-text && ollama serve
286
+ ```
287
+
288
+ More details about the model are available [here](https://ollama.com/library/nomic-embed-text).
289
+
290
+ Additionally on **Windows**, the `pcst_fast 1.0.10` library requires **Microsoft Visual C++ 14.0 or greater**.
291
+ You can download the **Microsoft C++ Build Tools** [here](https://visualstudio.microsoft.com/visual-cpp-build-tools/).
292
+
293
+ **LangSmith** support is optional. To enable it, create an API key [here](https://docs.smith.langchain.com/administration/how_to_guides/organization_management/create_account_api_key).
294
+
295
+ _Please note that this will create a new tracing project in your Langsmith
296
+ account with the name `T2X-xxxx`, where `X` can be `AA4P` (Main Agent),
297
+ `B` (Biomodels), `S` (Scholars), `KG` (KnowledgeGraphs), or `C` (Cells).
298
+ If you skip the previous step, it will default to the name `default`.
299
+ `xxxx` will be the 4-digit ID created for the session._
300
+
301
+ 4. **Launch the app:**
302
+ ```sh
303
+ streamlit run app/frontend/streamlit_app_<agent>.py
304
+ ```
305
+ _Replace `<agent>` with the agent name you are interested to launch:_
306
+
307
+ - `talk2aiagents4pharma`
308
+ - `talk2biomodels`
309
+ - `talk2knowledgegraphs`
310
+ - `talk2scholars`
311
+ - `talk2cells`
312
+
313
+ For detailed instructions on each agent, please refer to their respective modules.
314
+
315
+ #### Option 3: pip (beta-release)
316
+
317
+ ![Python Version from PEP 621 TOML](https://img.shields.io/python/required-version-toml?tomlFilePath=https%3A%2F%2Fraw.githubusercontent.com%2FVirtualPatientEngine%2FAIAgents4Pharma%2Frefs%2Fheads%2Fmain%2Fpyproject.toml)
318
+
319
+ ```sh
320
+ pip install aiagents4pharma
321
+ ```
322
+
323
+ Check out the tutorials on each agent for detailed instructions.
324
+
325
+ ## Contributing
326
+
327
+ We welcome your support to make **AIAgents4Pharma** even better.
328
+ All types of contributions are appreciated — whether you're fixing bugs, adding features, improving documentation, or helping with testing, every contribution is valuable.
329
+
330
+ #### How to contribute
331
+
332
+ 1. Star this repository to show your support.
333
+ 2. Fork the repository.
334
+ 3. Create a new branch for your work:
335
+ ```sh
336
+ git checkout -b feat/your-feature-name
337
+ ```
338
+ 4. Make your changes and commit them:
339
+ ```sh
340
+ git commit -m "feat: add a brief description of your change"
341
+ ```
342
+ 5. Push your branch:
343
+ ```sh
344
+ git push origin feat/your-feature-name
345
+ ```
346
+ 6. Open a Pull Request.
347
+
348
+ #### Areas where you can help
349
+
350
+ - Beta testing for Talk2BioModels and Talk2Scholars.
351
+ - Development work related to Python, bioinformatics, or knowledge graphs.
352
+
353
+ #### Contacts for contributions
354
+
355
+ - **Talk2Biomodels / Talk2Cells**: [@gurdeep330](https://github.com/gurdeep330), [@lilijap](https://github.com/lilijap), [@dmccloskey](https://github.com/dmccloskey)
356
+ - **Talk2KnowledgeGraphs**: [@awmulyadi](https://github.com/awmulyadi), [@dmccloskey](https://github.com/dmccloskey)
357
+ - **Talk2Scholars**: [@ansh-info](https://github.com/ansh-info), [@gurdeep330](https://github.com/gurdeep330), [@dmccloskey](https://github.com/dmccloskey)
358
+
359
+ Please refer to our [CONTRIBUTING.md](CONTRIBUTING.md) for more detailed contribution guidelines.
360
+
361
+ ## Feedback
362
+
363
+ If you have questions, bug reports, feature requests, comments, or suggestions, we would love to hear from you.
364
+ Please open an `issue` or start a `discussion`
@@ -136,23 +136,23 @@ aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py,sha256=7gwwtf
136
136
  aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py,sha256=m5p0yoJb7I19ua5yeQfXPf7c4r6S1XPwttsrM7Qoy94,9336
137
137
  aiagents4pharma/talk2scholars/__init__.py,sha256=NOZxTklAH1j1ggu97Ib8Xn9LCKudEWt-8dx8w7yxVD8,180
138
138
  aiagents4pharma/talk2scholars/agents/__init__.py,sha256=c_0Pk85bt-RfK5RMyALM3MXo3qXVMoYS7BOqM9wuFME,317
139
- aiagents4pharma/talk2scholars/agents/main_agent.py,sha256=KdKbnc-5zxktLUkzEZHC3bvn8_iKa8Kk4So90i48cdE,3275
140
- aiagents4pharma/talk2scholars/agents/paper_download_agent.py,sha256=wrK9CPy5evH56fyOZ2BlkBfY5aEj6tefc4jSDPVzYvs,3041
141
- aiagents4pharma/talk2scholars/agents/pdf_agent.py,sha256=xt_bgCTVJ6jOCkhc_rHh8pngq4uS8kuNOevuP3eC-sA,3702
142
- aiagents4pharma/talk2scholars/agents/s2_agent.py,sha256=ua1bjKE2HBKZuLnDn8me5fuV1lSvdZbwAlo3Yp27TT4,4659
143
- aiagents4pharma/talk2scholars/agents/zotero_agent.py,sha256=f11kNqtyZnpBiIf3Fe9gU0WqQ0-ohhngTKl-o0u997Q,4442
139
+ aiagents4pharma/talk2scholars/agents/main_agent.py,sha256=oCSWPj3TUgTIERmYbBTYipNrU1g956LXJEUx-7-KAQ0,3354
140
+ aiagents4pharma/talk2scholars/agents/paper_download_agent.py,sha256=pYHW3R7VQjRA3PhgWGQYI3ErfdILYQ0FM1WGXii3r1k,2996
141
+ aiagents4pharma/talk2scholars/agents/pdf_agent.py,sha256=GEXzJMQxIeZ7zLP-AlnTMU-n_KXZ7g22Qd9L3USIc_4,3626
142
+ aiagents4pharma/talk2scholars/agents/s2_agent.py,sha256=oui0CMSyXmBGBJ7LnYq8Ce0V8Qc3BS6GgH5Qx5wI6oM,4565
143
+ aiagents4pharma/talk2scholars/agents/zotero_agent.py,sha256=NAmEURIhH-sjXGO-dqAigUA10m-Re9Qe1hY8db4CIP0,4370
144
144
  aiagents4pharma/talk2scholars/configs/__init__.py,sha256=Y9-4PxsNCMoxyyQgDSbPByJnO9wnyem5SYL3eOZt1HY,189
145
145
  aiagents4pharma/talk2scholars/configs/config.yaml,sha256=-8X0_gTmjEuXAeIrnppw3Npy8HICelHZOvTKEScI-rs,596
146
146
  aiagents4pharma/talk2scholars/configs/agents/__init__.py,sha256=plv5Iw34gvbGZbRyJapvoOiiFXekRQIwjV_yy5AR_SI,104
147
147
  aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py,sha256=D94LW4cXLmJe4dNl5qoR9QN0JnBqGLbQDgDLqhCNUE0,213
148
148
  aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
149
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml,sha256=GZRqZoUy8eAWXyd9GJDh-A4mYSJOhnkid6TaIJTGBeU,1192
149
+ aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml,sha256=EmUAxeQSnH4U5Op5_XOzCbcexDCp-Rpz3z0yVPRtQUg,1315
150
150
  aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
151
151
  aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
152
152
  aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
153
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml,sha256=UIYkr060IpoLHMXVPxGAjrkCJSjX7H0DzcFSasyW6sE,1185
153
+ aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml,sha256=_sSt2jCgSILwrXkywDAxkXONCZn896owLBaf46iFI0I,1323
154
154
  aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
155
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml,sha256=09nMAlI4CTLlss03trR0ZaEeLdABQfkwUUE6ZCK4WzY,718
155
+ aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml,sha256=SOdjRiGUxjW9JeCRDd_U1RjCclItkoPODrj5RpIrxSY,2030
156
156
  aiagents4pharma/talk2scholars/configs/app/__init__.py,sha256=tXpOW3R4eAfNoqvoaHfabSG-DcMHmUGSTg_4zH_vlgw,94
157
157
  aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
158
158
  aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml,sha256=A6nYjrgzEyRv5JYsGN7oqNX4-tufMBZ6mg-A7bMX6V4,906
@@ -172,39 +172,37 @@ aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml,sha256=ifOt
172
172
  aiagents4pharma/talk2scholars/configs/tools/zotero_write/__inti__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
173
173
  aiagents4pharma/talk2scholars/configs/tools/zotero_write/default.yaml,sha256=gB7y7pznviQUzu49Eu4ONNkjQjT8wPKNSw6S_vfd9kI,1222
174
174
  aiagents4pharma/talk2scholars/state/__init__.py,sha256=ReScKLpEvedq4P6ww52NRQS0Xr6SSQV7hqoQ83Mt75U,138
175
- aiagents4pharma/talk2scholars/state/state_talk2scholars.py,sha256=8ZFfv55tXO3LU-2DzKC5JjH4_ryodgu-H1ieKBmVFvw,2844
175
+ aiagents4pharma/talk2scholars/state/state_talk2scholars.py,sha256=MGB-rWjbOpLN-pK3nY9YKAuskcjeR62rjXbZl1Ppjas,2836
176
176
  aiagents4pharma/talk2scholars/tests/__init__.py,sha256=U3PsTiUZaUBD1IZanFGkDIOdFieDVJtGKQ5-woYUo8c,45
177
177
  aiagents4pharma/talk2scholars/tests/test_llm_main_integration.py,sha256=FBRqS06IKJYFOudQEHQr-9oJ4tftkH-gTCowTAqwWSg,3686
178
- aiagents4pharma/talk2scholars/tests/test_main_agent.py,sha256=hgKgMXAGGqGJ6EXbjfsdZ5t1IWo2W67tc_F7vK747Qg,6844
179
- aiagents4pharma/talk2scholars/tests/test_paper_download_agent.py,sha256=CP4fKFU_JYP_AXvTptnwpjaVar1d5lVKV5vxYgH_1j4,5309
180
- aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py,sha256=_bGuoo4b6zD_vwLa7jGziWDT5qRtavsf02Jiaa7JIRU,5817
181
- aiagents4pharma/talk2scholars/tests/test_pdf_agent.py,sha256=TN4Sq5-SCxv-9VfFyq7sOlBlxbekmnWuB7-qh4MrhkA,4656
182
- aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py,sha256=te6w1tPvuNSNCTihIwaCT083BzUCowjOQPwuCodXR4k,8723
178
+ aiagents4pharma/talk2scholars/tests/test_main_agent.py,sha256=IZYSocYVwqPil2lF6L07mKm8PUq7vjopmqNiCm6IJEA,6876
179
+ aiagents4pharma/talk2scholars/tests/test_paper_download_agent.py,sha256=gKSQp-sw62FplNnGYW0wv2ZIUEefh3o0tFWbRzy9yLs,5068
180
+ aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py,sha256=3mycLeEgH5XkwxuoXfTpQb8c8xFtIX2HjVnACPrSf60,7141
181
+ aiagents4pharma/talk2scholars/tests/test_pdf_agent.py,sha256=scGCTgka2JuoUhzZwzDn0OgIYihOLhXbwb5uGFR02aI,4302
182
+ aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py,sha256=_zzg4_XVVEuvYDsJ5la0kFLf9dT45P67-UnUZWDUkhY,34874
183
183
  aiagents4pharma/talk2scholars/tests/test_routing_logic.py,sha256=g79tG68ZrUOL3-duCCJwvFK6OieR5KedRf3yTUDqIFk,2784
184
- aiagents4pharma/talk2scholars/tests/test_s2_agent.py,sha256=BhW1wGc-wUPS4fwNBQRtBXJaJ_i7L6t_G9Bq57fK7rI,7784
185
- aiagents4pharma/talk2scholars/tests/test_s2_display.py,sha256=w1TqgEdl9WpW_A2Ud1slfI5fkRFkKtKadAlkEfSLOZk,2247
184
+ aiagents4pharma/talk2scholars/tests/test_s2_agent.py,sha256=xvlPU4Lz_DdQLTpdtoHW9l_AMvFrzC-FXE5royGbtLM,7806
185
+ aiagents4pharma/talk2scholars/tests/test_s2_display.py,sha256=TfJE74KsocAHLbitMLjVrfUwAwyIYpzEvkdrQMBzM2g,2263
186
186
  aiagents4pharma/talk2scholars/tests/test_s2_multi.py,sha256=VCTfexhtX7FgWOBS0YtSm1zghbByZnni1NBLGVTJVGI,11166
187
- aiagents4pharma/talk2scholars/tests/test_s2_query.py,sha256=hEcBt142nn_bKV9lor__Yk4LusgE1tN5dA-qpT606Bc,2443
187
+ aiagents4pharma/talk2scholars/tests/test_s2_query.py,sha256=_pDVolOmhrjZnh37Ig97-LcDHUe0lm3GvTWjNDKgMkc,2461
188
188
  aiagents4pharma/talk2scholars/tests/test_s2_retrieve.py,sha256=YtA2nbPRtoSR7mPqEjqLF5ERGVzTfeULztsNoCI48X8,2003
189
189
  aiagents4pharma/talk2scholars/tests/test_s2_search.py,sha256=mCGpoCYVn0SJ9BPcEjTz2MLy_K2XJIxvPngwsMoKijA,9945
190
190
  aiagents4pharma/talk2scholars/tests/test_s2_single.py,sha256=KjSh7V2cl1IuO_M9O6dj0vnMHr13H-xKxia_ZgT4qag,10313
191
191
  aiagents4pharma/talk2scholars/tests/test_state.py,sha256=_iHXvoZnU_eruf8l1sQKBSCIVnxNkH_9VzkVtZZA6bY,384
192
- aiagents4pharma/talk2scholars/tests/test_zotero_agent.py,sha256=fQDQj28uFNC1TyMPzNaNfDJacuw1_DqwGiX6IgliR3Y,6130
192
+ aiagents4pharma/talk2scholars/tests/test_zotero_agent.py,sha256=jFEtfQVEwEQ6v3kq7A1_p2MKCu5wbtX47V4bE-fKD6M,6158
193
193
  aiagents4pharma/talk2scholars/tests/test_zotero_human_in_the_loop.py,sha256=YelLQu9Y_r1SNQsC1xoLHJoJ3soIZtBt1MFbbNhY-Dg,10744
194
- aiagents4pharma/talk2scholars/tests/test_zotero_path.py,sha256=i1bS-5uSv25z4UFrc_W5rKZ0bwOg6ZK5cwLHMkjWgt8,18592
195
- aiagents4pharma/talk2scholars/tests/test_zotero_read.py,sha256=ecu9C-RWaLNgmAFqGN8s7dzCrtODqXOSZDievzjtAfQ,15511
194
+ aiagents4pharma/talk2scholars/tests/test_zotero_path.py,sha256=Ko0HyXCrpm-vs8Bkf-syxp3MfL1IvZwXXgPExyQy_F8,18618
195
+ aiagents4pharma/talk2scholars/tests/test_zotero_read.py,sha256=yQTksJhqW036Scs7pnc_bBC23N210mcjaZ6sJZl8QnM,29492
196
196
  aiagents4pharma/talk2scholars/tests/test_zotero_write.py,sha256=qWlO0XoZJ6vxUxgisjYv9Np87CoTEDxiQBEOhdj9foo,6111
197
197
  aiagents4pharma/talk2scholars/tools/__init__.py,sha256=c8pYHDqR9P0Frz2jWjbvyizfSTBMlMFzGsiQzx2KC9c,189
198
- aiagents4pharma/talk2scholars/tools/paper_download/__init__.py,sha256=0XmPLEqCply536Y1uWksmHYjlgNWcmcMpZx63XvGEFI,413
199
- aiagents4pharma/talk2scholars/tools/paper_download/abstract_downloader.py,sha256=nwVhRUqkdta3WLgd9roAWpx-bhJm3aAgJLx4RSYSJXQ,1327
200
- aiagents4pharma/talk2scholars/tools/paper_download/arxiv_downloader.py,sha256=hM9fdbwtOxuW1mpAfmfbILTI7kSVALgrGpjC2vMsvf8,3970
201
- aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py,sha256=zndAnNFRBztuBK-tpW9UyYsGL8tB3gFjYhiTq6nzZu4,2203
198
+ aiagents4pharma/talk2scholars/tools/paper_download/__init__.py,sha256=tNTLSPNdir4XSKRF0HjXI_tBGBXXXwDhWRI5VnwbZpM,214
199
+ aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py,sha256=WTWvXbh0C96OoMoPf8Bgu0AgorsdkWslac_WqlHc4bo,3900
202
200
  aiagents4pharma/talk2scholars/tools/pdf/__init__.py,sha256=DPpOfON3AySko5EBBAe_3udOoSaAdQWNyGeNvJyV5R8,138
203
- aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py,sha256=tNv0frCr0dxA0lfbwf5yudKRyWtbuRGMwqW5mk9u4eE,8797
204
- aiagents4pharma/talk2scholars/tools/s2/__init__.py,sha256=u_qh1bpDhVdyTr_S2wVfPwQ_lMz77NQ-ZDWtOP_PWzo,420
205
- aiagents4pharma/talk2scholars/tools/s2/display_results.py,sha256=UR0PtEHGDpOhPH0Di5HT8-Fip2RkEMTJgzROsChb1gc,2959
201
+ aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py,sha256=RfICBn4VorvpTrb_GunFFAi6fnzUlees_k0poQm0VKc,21853
202
+ aiagents4pharma/talk2scholars/tools/s2/__init__.py,sha256=w_eiw0pG8HNp79F9O_icXs_Yl_4odsmagYNKDTjIsvk,428
203
+ aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py,sha256=YtnCrI0c3Fhi68R6ndPUnVM3E5u7CuBB_myIzLN6nXg,3040
206
204
  aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py,sha256=N6GwG3oCQFEcntpjTQObAELzM5OpZq0u9J9-gUWU2kc,2716
207
- aiagents4pharma/talk2scholars/tools/s2/query_results.py,sha256=5yXuHqz5UKO9BbovEUnqgjcMvqVG4vp9VJO8Zaz5N1w,2029
205
+ aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py,sha256=inaWWctaylJAJsXinQA63qPs5n-gn7axJz8ijj66Jmw,2746
208
206
  aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py,sha256=llzMMnEQKeYVamJbF4_DTMx-BgVe79vwDcUIFGLrmUY,2615
209
207
  aiagents4pharma/talk2scholars/tools/s2/search.py,sha256=NGzo1rF5VJJuZJbSLDwy2f220wSh7DWEw6xT1qQA2V0,2452
210
208
  aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py,sha256=7VivBGHcmaJZN7v7gYwddC-rfrDHaZo74pSNBYlJ2xU,2673
@@ -213,16 +211,16 @@ aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py,sha256=rrR0DRNeGHpY
213
211
  aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py,sha256=_eP7q4ZTSWisEF4Stffe-IpR2MD9WrQ0u3jbbeJBRLU,6363
214
212
  aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py,sha256=ahTDT0lp5VRZS5hLL3-hsHx4wB3LUVY2OBTCTEJyR3Y,6983
215
213
  aiagents4pharma/talk2scholars/tools/zotero/__init__.py,sha256=wXiQILLq-utV35PkDUpm_F074mG9yRMyGQAFlr9UAOw,197
216
- aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py,sha256=B21TOdB1nR--2Ug6Zx7nYjAKfCbndMF4L-cWcHPBWII,2267
214
+ aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py,sha256=RqFXP2DXmkHLhVLirrTnmSk-E8Jipi4ue_Zw65npbnM,2263
217
215
  aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py,sha256=iqwpolg7GWAjXizubLrPaAsgOpsOhKz-tFRyLOiBvC0,6325
218
216
  aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py,sha256=KnDcnUBB0lwMcxNpC3hsVnICWkj23MDAePdHlK-Kekk,3024
219
217
  aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py,sha256=uIyKZSFB07-zd3vjS9ABL0r6fdBX9JHw60j8oUfxHQs,209
220
- aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py,sha256=6f2yu5admNZp7xw5VW2TFGF4wh_oyVn9aLaQF47FLMc,6038
218
+ aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py,sha256=lyrfpx8NHYiAN1qQSJWqPka7cML5BASwRXaI66fb-u8,13662
221
219
  aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py,sha256=IPD1V9yrBYaDnRe7sR6PrpwR82OBJbA2P_Tc6RbxAbM,2748
222
220
  aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py,sha256=ALwLecy1QVebbsmXJiDj1GhGmyhq2R2tZlAyEl1vfhw,7410
223
221
  aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py,sha256=oIrfbOySgts50ksHKyjcWjRkPRIS88g3Lc0v9mBkU8w,6375
224
- aiagents4pharma-1.30.4.dist-info/licenses/LICENSE,sha256=IcIbyB1Hyk5ZDah03VNQvJkbNk2hkBCDqQ8qtnCvB4Q,1077
225
- aiagents4pharma-1.30.4.dist-info/METADATA,sha256=0Sr-ukEvXz5sLl0PdoHYJ9bqFHjdScBvP75GSvxeUHc,13267
226
- aiagents4pharma-1.30.4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
227
- aiagents4pharma-1.30.4.dist-info/top_level.txt,sha256=-AH8rMmrSnJtq7HaAObS78UU-cTCwvX660dSxeM7a0A,16
228
- aiagents4pharma-1.30.4.dist-info/RECORD,,
222
+ aiagents4pharma-1.32.0.dist-info/licenses/LICENSE,sha256=IcIbyB1Hyk5ZDah03VNQvJkbNk2hkBCDqQ8qtnCvB4Q,1077
223
+ aiagents4pharma-1.32.0.dist-info/METADATA,sha256=o3uHTzLfOPglRJUlK6_vKAuJ0qwKhGt7BeiQHeF4U_o,16043
224
+ aiagents4pharma-1.32.0.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
225
+ aiagents4pharma-1.32.0.dist-info/top_level.txt,sha256=-AH8rMmrSnJtq7HaAObS78UU-cTCwvX660dSxeM7a0A,16
226
+ aiagents4pharma-1.32.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (80.3.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,45 +0,0 @@
1
- """
2
- Abstract Base Class for Paper Downloaders.
3
-
4
- This module defines the `AbstractPaperDownloader` class, which serves as a
5
- base class for downloading scholarly papers from different sources
6
- (e.g., arXiv, PubMed, IEEE Xplore). Any specific downloader should
7
- inherit from this class and implement its methods.
8
- """
9
-
10
- from abc import ABC, abstractmethod
11
- from typing import Any, Dict
12
-
13
-
14
- class AbstractPaperDownloader(ABC):
15
- """
16
- Abstract base class for scholarly paper downloaders.
17
-
18
- This is designed to be extended for different paper sources
19
- like arXiv, PubMed, IEEE Xplore, etc. Each implementation
20
- must define methods for fetching metadata and downloading PDFs.
21
- """
22
-
23
- @abstractmethod
24
- def fetch_metadata(self, paper_id: str) -> Dict[str, Any]:
25
- """
26
- Fetch metadata for a given paper ID.
27
-
28
- Args:
29
- paper_id (str): The unique identifier for the paper.
30
-
31
- Returns:
32
- Dict[str, Any]: The metadata dictionary (format depends on the data source).
33
- """
34
-
35
- @abstractmethod
36
- def download_pdf(self, paper_id: str) -> bytes:
37
- """
38
- Download the PDF for a given paper ID.
39
-
40
- Args:
41
- paper_id (str): The unique identifier for the paper.
42
-
43
- Returns:
44
- bytes: The binary content of the downloaded PDF.
45
- """
@@ -1,115 +0,0 @@
1
- """
2
- Arxiv Paper Downloader
3
-
4
- This module provides an implementation of `AbstractPaperDownloader` for arXiv.
5
- It connects to the arXiv API, retrieves metadata for a research paper, and
6
- downloads the corresponding PDF.
7
-
8
- By using an abstract base class, this implementation is extendable to other
9
- APIs like PubMed, IEEE Xplore, etc.
10
- """
11
-
12
- import xml.etree.ElementTree as ET
13
- from typing import Any, Dict
14
- import logging
15
- import hydra
16
- import requests
17
- from .abstract_downloader import AbstractPaperDownloader
18
-
19
- # Configure logging
20
- logging.basicConfig(level=logging.INFO)
21
- logger = logging.getLogger(__name__)
22
-
23
-
24
- class ArxivPaperDownloader(AbstractPaperDownloader):
25
- """
26
- Downloader class for arXiv.
27
-
28
- This class interfaces with the arXiv API to fetch metadata
29
- and retrieve PDFs of academic papers based on their arXiv IDs.
30
- """
31
-
32
- def __init__(self):
33
- """
34
- Initializes the arXiv paper downloader.
35
-
36
- Uses Hydra for configuration management to retrieve API details.
37
- """
38
- with hydra.initialize(version_base=None, config_path="../../configs"):
39
- cfg = hydra.compose(
40
- config_name="config", overrides=["tools/download_arxiv_paper=default"]
41
- )
42
- self.api_url = cfg.tools.download_arxiv_paper.api_url
43
- self.request_timeout = cfg.tools.download_arxiv_paper.request_timeout
44
- self.chunk_size = cfg.tools.download_arxiv_paper.chunk_size
45
- self.pdf_base_url = cfg.tools.download_arxiv_paper.pdf_base_url
46
-
47
- def fetch_metadata(self, paper_id: str) -> Dict[str, Any]:
48
- """
49
- Fetch metadata from arXiv for a given paper ID.
50
-
51
- Args:
52
- paper_id (str): The arXiv ID of the paper.
53
-
54
- Returns:
55
- Dict[str, Any]: A dictionary containing metadata, including the XML response.
56
- """
57
- logger.info("Fetching metadata from arXiv for paper ID: %s", paper_id)
58
- api_url = f"{self.api_url}?search_query=id:{paper_id}&start=0&max_results=1"
59
- response = requests.get(api_url, timeout=self.request_timeout)
60
- response.raise_for_status()
61
- return {"xml": response.text}
62
-
63
- def download_pdf(self, paper_id: str) -> Dict[str, Any]:
64
- """
65
- Download the PDF of a paper from arXiv.
66
-
67
- This function first retrieves the paper's metadata to locate the PDF link
68
- before downloading the file.
69
-
70
- Args:
71
- paper_id (str): The arXiv ID of the paper.
72
-
73
- Returns:
74
- Dict[str, Any]: A dictionary containing:
75
- - `pdf_object`: The binary content of the downloaded PDF.
76
- - `pdf_url`: The URL from which the PDF was fetched.
77
- - `arxiv_id`: The arXiv ID of the downloaded paper.
78
- """
79
- metadata = self.fetch_metadata(paper_id)
80
-
81
- # Parse the XML response to locate the PDF link.
82
- root = ET.fromstring(metadata["xml"])
83
- ns = {"atom": "http://www.w3.org/2005/Atom"}
84
- pdf_url = next(
85
- (
86
- link.attrib.get("href")
87
- for entry in root.findall("atom:entry", ns)
88
- for link in entry.findall("atom:link", ns)
89
- if link.attrib.get("title") == "pdf"
90
- ),
91
- None,
92
- )
93
-
94
- if not pdf_url:
95
- raise RuntimeError(f"Failed to download PDF for arXiv ID {paper_id}.")
96
-
97
- logger.info("Downloading PDF from: %s", pdf_url)
98
- pdf_response = requests.get(pdf_url, stream=True, timeout=self.request_timeout)
99
- pdf_response.raise_for_status()
100
- # print (pdf_response)
101
-
102
- # Combine the PDF data from chunks.
103
- pdf_object = b"".join(
104
- chunk
105
- for chunk in pdf_response.iter_content(chunk_size=self.chunk_size)
106
- if chunk
107
- )
108
- # print (pdf_object)
109
- print("PDF_URL", pdf_url)
110
-
111
- return {
112
- "pdf_object": pdf_object,
113
- "pdf_url": pdf_url,
114
- "arxiv_id": paper_id,
115
- }