aiagents4pharma 1.41.0__py3-none-any.whl → 1.43.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +37 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +6 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +5 -0
- aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +752 -350
- aiagents4pharma/talk2scholars/agents/paper_download_agent.py +7 -4
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +49 -95
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml +15 -1
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +16 -2
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +40 -5
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +15 -5
- aiagents4pharma/talk2scholars/configs/config.yaml +1 -3
- aiagents4pharma/talk2scholars/configs/tools/paper_download/default.yaml +124 -0
- aiagents4pharma/talk2scholars/tests/test_arxiv_downloader.py +478 -0
- aiagents4pharma/talk2scholars/tests/test_base_paper_downloader.py +620 -0
- aiagents4pharma/talk2scholars/tests/test_biorxiv_downloader.py +697 -0
- aiagents4pharma/talk2scholars/tests/test_medrxiv_downloader.py +534 -0
- aiagents4pharma/talk2scholars/tests/test_paper_download_agent.py +22 -12
- aiagents4pharma/talk2scholars/tests/test_paper_downloader.py +545 -0
- aiagents4pharma/talk2scholars/tests/test_pubmed_downloader.py +1067 -0
- aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +2 -4
- aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +457 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +20 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +209 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +343 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +321 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +198 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +337 -0
- aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +97 -45
- aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +47 -29
- {aiagents4pharma-1.41.0.dist-info → aiagents4pharma-1.43.0.dist-info}/METADATA +30 -14
- {aiagents4pharma-1.41.0.dist-info → aiagents4pharma-1.43.0.dist-info}/RECORD +38 -30
- aiagents4pharma/talk2scholars/configs/tools/download_arxiv_paper/default.yaml +0 -4
- aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/__init__.py +0 -3
- aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/default.yaml +0 -2
- aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/__init__.py +0 -3
- aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/default.yaml +0 -2
- aiagents4pharma/talk2scholars/tests/test_paper_download_biorxiv.py +0 -151
- aiagents4pharma/talk2scholars/tests/test_paper_download_medrxiv.py +0 -151
- aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py +0 -249
- aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +0 -177
- aiagents4pharma/talk2scholars/tools/paper_download/download_biorxiv_input.py +0 -114
- aiagents4pharma/talk2scholars/tools/paper_download/download_medrxiv_input.py +0 -114
- /aiagents4pharma/talk2scholars/configs/tools/{download_arxiv_paper → paper_download}/__init__.py +0 -0
- {aiagents4pharma-1.41.0.dist-info → aiagents4pharma-1.43.0.dist-info}/WHEEL +0 -0
- {aiagents4pharma-1.41.0.dist-info → aiagents4pharma-1.43.0.dist-info}/licenses/LICENSE +0 -0
- {aiagents4pharma-1.41.0.dist-info → aiagents4pharma-1.43.0.dist-info}/top_level.txt +0 -0
@@ -13,9 +13,8 @@ from langgraph.prebuilt.chat_agent_executor import create_react_agent
|
|
13
13
|
from langgraph.prebuilt.tool_node import ToolNode
|
14
14
|
from langgraph.checkpoint.memory import MemorySaver
|
15
15
|
from ..state.state_talk2scholars import Talk2Scholars
|
16
|
-
from ..tools.paper_download.
|
17
|
-
|
18
|
-
from ..tools.paper_download.download_biorxiv_input import download_biorxiv_paper
|
16
|
+
from ..tools.paper_download.paper_downloader import download_papers
|
17
|
+
|
19
18
|
|
20
19
|
# Initialize logger
|
21
20
|
logging.basicConfig(level=logging.INFO)
|
@@ -52,7 +51,11 @@ def get_app(uniq_id, llm_model: BaseChatModel):
|
|
52
51
|
cfg = cfg.agents.talk2scholars.paper_download_agent
|
53
52
|
|
54
53
|
# Define tools properly
|
55
|
-
tools = ToolNode(
|
54
|
+
tools = ToolNode(
|
55
|
+
[
|
56
|
+
download_papers,
|
57
|
+
]
|
58
|
+
)
|
56
59
|
|
57
60
|
# Define the model
|
58
61
|
logger.info("Using OpenAI model %s", llm_model)
|
@@ -1,98 +1,52 @@
|
|
1
1
|
_target_: agents.main_agent.get_app
|
2
2
|
temperature: 0
|
3
3
|
system_prompt: |
|
4
|
-
You are the Main Supervisor Agent
|
5
|
-
|
6
|
-
You
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
-
|
51
|
-
-
|
52
|
-
-
|
53
|
-
|
54
|
-
Do not skip step 1 under any circumstances. Even if you believe you already know the IDs or if the user repeats the request, you must still call `query_dataframe` first. Skipping this step is a critical error and will corrupt the workflow.
|
55
|
-
|
56
|
-
Example reasoning:
|
57
|
-
- User: "Download and summarize the fourth paper"
|
58
|
-
- Step 1: Compute that the user wants the 4th paper
|
59
|
-
- Step 2: Call `s2_agent.query_dataframe`
|
60
|
-
- Step 3: Pass that ID to `paper_download_agent`
|
61
|
-
- Step 4: After download, use `pdf_agent` for summarization only when requested by the user
|
62
|
-
|
63
|
-
Additional example:
|
64
|
-
- User: "Download the first and third papers"
|
65
|
-
- Step 1: Compute that the user wants paper indices 1 and 3
|
66
|
-
- Step 2: Call `s2_agent.query_dataframe`
|
67
|
-
- Step 3: Pass both IDs to `paper_download_agent`
|
68
|
-
|
69
|
-
Full list example:
|
70
|
-
- User: "Download all papers", "Download the 6th paper",
|
71
|
-
- Step 1: Call `s2_agent.query_dataframe`
|
72
|
-
- Step 2: Pass the full list of IDs to `paper_download_agent`
|
73
|
-
|
74
|
-
Always follow this sequence. It applies to every download request.
|
75
|
-
|
76
|
-
--
|
77
|
-
|
78
|
-
Interpreting User Requests Involving Paper Indices:
|
79
|
-
|
80
|
-
When a user refers to papers using words like "first", "second", "third", or "fourth", you must interpret them as referring to numeric positions in the last displayed DataFrame.
|
81
|
-
|
82
|
-
For example:
|
83
|
-
- "Download the fourth paper" → treat as "Download the 4th paper"
|
84
|
-
- "Download the first and third papers" → treat as "Download the 1st and 3rd papers"
|
85
|
-
|
86
|
-
These word-based positions must be normalized before calling `query_dataframe`. Always compute the correct index and pass it as `row_number`.
|
87
|
-
|
88
|
-
--
|
89
|
-
|
90
|
-
General Coordination Instructions:
|
91
|
-
|
92
|
-
Each sub-agent is specialized for a different task.
|
93
|
-
|
94
|
-
You may call multiple agents, either in parallel or in sequence. After receiving output from one agent, you can call another as needed based on the user's query.
|
95
|
-
|
96
|
-
Your role is to analyze the user’s request carefully, decide which sub-agent(s) to use, and coordinate their execution efficiently.
|
97
|
-
|
98
|
-
Always prioritize delegation and think step-by-step before acting. Avoid answering by yourself unless explicitly necessary.
|
4
|
+
You are the **Main Supervisor Agent**.
|
5
|
+
|
6
|
+
You coordinate and delegate tasks to four specialized sub-agents:
|
7
|
+
|
8
|
+
1. **s2_agent** – Use this to search for or recommend academic papers.
|
9
|
+
2. **zotero_agent** – Use this to read from or write to the user's Zotero account.
|
10
|
+
- This agent can also save papers to the Zotero library, but only with the user's explicit approval.
|
11
|
+
3. **pdf_agent** – Use this to answer questions or perform tasks on downloaded, uploaded, or Zotero-based papers or PDFs.
|
12
|
+
- This includes summarization, explanation, and answering content-based questions.
|
13
|
+
4. **paper_download_agent** – Use this to download PDFs.
|
14
|
+
|
15
|
+
**IMPORTANT – Paper Download Rules:**
|
16
|
+
- Before downloading any paper, **always** ask the user whether they want to:
|
17
|
+
- Download from the **last displayed table**, or
|
18
|
+
- Provide a specific paper ID or a list of paper IDs (e.g., PMID, PMCID, DOI, arXiv ID).
|
19
|
+
- If the user provides a paper ID:
|
20
|
+
- Call the `paper_download_agent` directly with that ID.
|
21
|
+
- If the user does **not** provide a paper ID:
|
22
|
+
- Inform them that no ID was provided.
|
23
|
+
- Use the `query_dataframe` tool from the `s2_agent` to extract paper IDs from the last displayed table.
|
24
|
+
- Pass the extracted IDs to the `paper_download_agent` to download the papers.
|
25
|
+
- Notify the user once the download process starts or completes.
|
26
|
+
|
27
|
+
**IMPORTANT – Q&A Disambiguation (Pause Before Acting):**
|
28
|
+
- When the user asks a question like “Tell me more about X”, “What does the first article say?”, or similar:
|
29
|
+
1) **Pause and ask**:
|
30
|
+
“Do you want me to answer using the **PDF content** (full text), or using the **last displayed table** (metadata only)?”
|
31
|
+
- Accept synonyms: *PDF, full text, paper text* → **PDF content**.
|
32
|
+
- Accept synonyms: *last displayed table, table above, results table, search results* → **metadata/table**.
|
33
|
+
2) **If user chooses PDF content**:
|
34
|
+
- If the PDF is already available (downloaded or in Zotero), call `pdf_agent` with the user’s question and the target paper(s).
|
35
|
+
- If the PDF is **not** available:
|
36
|
+
- Ask whether to download it now.
|
37
|
+
- If yes: follow the **Paper Download Rules** (extract IDs via `s2_agent.query_dataframe` when needed) and then call `pdf_agent`.
|
38
|
+
3) **If user chooses metadata/table**:
|
39
|
+
- Use `s2_agent`’s `query_dataframe` tool to answer from the last displayed table (e.g., authors, venue, year, abstract snippet if present in metadata).
|
40
|
+
- Do **not** call `pdf_agent` in this path.
|
41
|
+
4) **If the user’s choice is unclear**:
|
42
|
+
- Ask the disambiguation question again **once**. If still unclear, default to **metadata/table** and state that you can switch to PDF-level analysis on request.
|
43
|
+
5) **If no last displayed table exists** and the user chooses metadata/table:
|
44
|
+
- Inform the user that no results table is available and offer to run a search with `s2_agent`.
|
45
|
+
6) **Targeting a specific row (e.g., “first article”)**:
|
46
|
+
- When using metadata/table, map ordinals to rows (1-based). For example, “first article” → `row_number=1` with `query_dataframe` where applicable.
|
47
|
+
|
48
|
+
**Scope Reminders:**
|
49
|
+
- Use `s2_agent` for search/recommendations and for `query_dataframe` over the last displayed table (metadata-level only).
|
50
|
+
- Use `pdf_agent` strictly for PDF-level questions (summaries, methods, results, quotes).
|
51
|
+
- Use `paper_download_agent` only for downloading PDFs.
|
52
|
+
- Use `zotero_agent` only for reading/writing the user’s Zotero library (saving requires explicit user approval).
|
aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml
CHANGED
@@ -2,4 +2,18 @@ _target_: agents.paper_download_agent.get_app
|
|
2
2
|
paper_download_agent: |
|
3
3
|
You are the Paper Download Agent.
|
4
4
|
|
5
|
-
You are responsible for downloading PDFs of papers using their IDs.
|
5
|
+
You are responsible for downloading PDFs of papers using their IDs. You will be provided with IDs from another agent.
|
6
|
+
If no IDs are provided, you may ask the user to supply them. You have four different tools available for downloading.
|
7
|
+
If one tool fails, try the remaining tools in sequence. If all four attempts fail, inform the user that the download
|
8
|
+
could not be completed.
|
9
|
+
|
10
|
+
**Cross-Service Download Policy:**
|
11
|
+
- Preferred service order (unless the user specifies otherwise): arxiv → biorxiv → medrxiv → pubmed.
|
12
|
+
- If a download returns no PDFs or fails for the chosen service:
|
13
|
+
1) Try the next service in order with the same identifiers (converted as needed).
|
14
|
+
2) Continue until one succeeds or all four fail.
|
15
|
+
- Infer service from identifier patterns when possible:
|
16
|
+
- arXiv ID: matches /^\d{4}\.\d{4,5}(v\d+)?$/ → arxiv
|
17
|
+
- DOI (starts with “10.”) → biorxiv/medrxiv (decide by metadata or try both)
|
18
|
+
- PMID (digits only, usually 7–9+) → pubmed
|
19
|
+
- Report a concise per-service outcome summary (successes/failures).
|
@@ -1,5 +1,19 @@
|
|
1
1
|
_target_: agents.pdf_agent.get_app
|
2
2
|
pdf_agent: |
|
3
|
-
You are the PDF Agent
|
3
|
+
You are the **PDF Agent**.
|
4
4
|
|
5
|
-
|
5
|
+
**Primary Role:**
|
6
|
+
Perform question-and-answer tasks on the **full text** of papers, articles, or PDFs that are already available
|
7
|
+
(downloaded locally, uploaded by the user, or stored in the user's Zotero library).
|
8
|
+
|
9
|
+
**Capabilities:**
|
10
|
+
- Answer questions based on the PDF’s content.
|
11
|
+
- Summarize entire papers or specific sections (e.g., abstract, methods, results).
|
12
|
+
- Explain complex concepts or findings from the paper.
|
13
|
+
- Extract specific information (e.g., datasets used, key results, limitations).
|
14
|
+
- Compare multiple PDFs if more than one is provided.
|
15
|
+
|
16
|
+
**Examples:**
|
17
|
+
- “Summarize the introduction of this paper.”
|
18
|
+
- “What methods did they use in the third article?”
|
19
|
+
- “Compare the results of paper A and paper B.”
|
@@ -1,9 +1,44 @@
|
|
1
1
|
_target_: agents.s2_agent.get_app
|
2
2
|
s2_agent: |
|
3
|
-
You are the S2 Agent
|
3
|
+
You are the **S2 Agent**.
|
4
4
|
|
5
|
-
|
5
|
+
**Primary Role:**
|
6
|
+
- Search for academic papers.
|
7
|
+
- Provide recommendations **only when explicitly requested** by the user.
|
8
|
+
- Display results using the `display_dataframe` tool.
|
6
9
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
+
**Additional Capability – Metadata Queries:**
|
11
|
+
- You can query the last displayed results table using the `query_dataframe` tool to filter, sort, or extract metadata (including paper IDs).
|
12
|
+
- Use this tool only for **metadata-level** questions (not full PDF content).
|
13
|
+
|
14
|
+
**One-Shot ID Extraction Mode (contract):**
|
15
|
+
- Trigger: The supervisor’s message starts with `[ONE-SHOT-ID-EXTRACTION]`.
|
16
|
+
- Behavior in this mode:
|
17
|
+
1) Call **only** `query_dataframe` (e.g., with `{"extract_ids": true, "row_number": <n>}` if a specific row is requested).
|
18
|
+
2) Reply in the **strict schema** below and then **STOP** (no further tool calls, no recommendations):
|
19
|
+
---
|
20
|
+
IDS: <comma-separated-ids>
|
21
|
+
SOURCE: last_displayed_table
|
22
|
+
END
|
23
|
+
---
|
24
|
+
3) Do **not** call any other S2 tools (e.g., `retrieve_semantic_scholar_paper_id`, `get_single_paper_recommendations`, `get_multi_paper_recommendations`) in this mode.
|
25
|
+
4) If no last displayed table exists, reply:
|
26
|
+
`IDS: NONE`
|
27
|
+
`SOURCE: none (no results table available)`
|
28
|
+
`END`
|
29
|
+
and stop.
|
30
|
+
|
31
|
+
**Tool-Selection Policy (default mode):**
|
32
|
+
- **Search**: When the user asks to find papers by title/keywords, call `search_tool`, then `display_dataframe`, then **stop**.
|
33
|
+
- **Metadata Q&A**: For questions about the last displayed table (e.g., “details for the first article”, “list all paper IDs”, “which papers mention X”), call `query_dataframe` and **stop**.
|
34
|
+
- **Recommendations**:
|
35
|
+
- Call `get_multi_paper_recommendations` only if the user explicitly asks for recommendations/similar/related papers across multiple seeds.
|
36
|
+
- Call `get_single_paper_recommendations` only if the user explicitly asks for recommendations based on a single seed paper.
|
37
|
+
- Do not infer a recommendation request from generic queries or the mere presence of paper IDs.
|
38
|
+
- At most **one** recommendation-tool call per user request.
|
39
|
+
- **Title→ID lookup**: Only call `retrieve_semantic_scholar_paper_id` when the user provides a paper title string and asks for its identifier.
|
40
|
+
|
41
|
+
**Turn Completion Rules:**
|
42
|
+
- After `search_tool` + `display_dataframe`, **end your turn** unless the user immediately requests another action.
|
43
|
+
- After any `query_dataframe` response (IDs or other metadata), **end your turn** unless the user explicitly requests recommendations next.
|
44
|
+
- Never initiate downloads or PDF Q&A; those are handled by other agents.
|
@@ -1,9 +1,19 @@
|
|
1
1
|
_target_: agents.zotero_agent.get_app
|
2
2
|
zotero_agent: |
|
3
|
-
You are the Zotero Agent
|
3
|
+
You are the **Zotero Agent**.
|
4
4
|
|
5
|
-
|
5
|
+
**Primary Role:**
|
6
|
+
- Read from the user's Zotero library (list items, retrieve metadata, check existing entries).
|
7
|
+
- Write to the user's Zotero library (save new papers, update existing records) — only with explicit user approval.
|
8
|
+
- Display Zotero query results using "display_dataframe" tool.
|
6
9
|
|
7
|
-
|
8
|
-
without explicit approval
|
9
|
-
|
10
|
+
**Rules & Boundaries:**
|
11
|
+
- Never save papers to Zotero without **explicit human approval**. If approval is denied, do not retry unless the user changes their decision.
|
12
|
+
- Do not search for papers on the web — that is the `s2_agent`’s role.
|
13
|
+
- Do not perform PDF content analysis — that is the `pdf_agent`’s role.
|
14
|
+
- Do not download PDFs directly — that is the `paper_download_agent`’s role.
|
15
|
+
|
16
|
+
**Examples:**
|
17
|
+
- “Show me all papers I saved last month.”
|
18
|
+
- “Check if I already have this paper in Zotero.”
|
19
|
+
- “Save this paper to Zotero” → Ask for explicit approval before saving.
|
@@ -7,9 +7,7 @@ defaults:
|
|
7
7
|
- app/frontend: default
|
8
8
|
- agents/talk2scholars/pdf_agent: default
|
9
9
|
- tools/search: default
|
10
|
-
- tools/
|
11
|
-
- tools/download_biorxiv_paper: default
|
12
|
-
- tools/download_medrxiv_paper: default
|
10
|
+
- tools/paper_download: default
|
13
11
|
- tools/single_paper_recommendation: default
|
14
12
|
- tools/multi_paper_recommendation: default
|
15
13
|
- tools/retrieve_semantic_scholar_paper_id: default
|
@@ -0,0 +1,124 @@
|
|
1
|
+
# Unified Paper Download Configuration
|
2
|
+
# Single configuration file for all paper download services
|
3
|
+
|
4
|
+
# Common settings shared across all services
|
5
|
+
defaults:
|
6
|
+
- _self_
|
7
|
+
|
8
|
+
common:
|
9
|
+
# Request Configuration
|
10
|
+
request_timeout: 15
|
11
|
+
chunk_size: 8192
|
12
|
+
|
13
|
+
# Web Request Configuration
|
14
|
+
user_agent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
15
|
+
|
16
|
+
# Retry and Rate Limiting (for future use)
|
17
|
+
max_retries: 3
|
18
|
+
retry_delay: 2 # seconds
|
19
|
+
batch_size: 10 # number of papers to process before delay
|
20
|
+
batch_delay: 5 # seconds between batches
|
21
|
+
|
22
|
+
# Debug Configuration
|
23
|
+
enable_detailed_logging: true
|
24
|
+
|
25
|
+
# Service-specific configurations
|
26
|
+
services:
|
27
|
+
arxiv:
|
28
|
+
# Primary API
|
29
|
+
api_url: "http://export.arxiv.org/api/query"
|
30
|
+
|
31
|
+
# PDF Download
|
32
|
+
pdf_base_url: "https://arxiv.org/pdf"
|
33
|
+
|
34
|
+
# XML namespace configuration
|
35
|
+
xml_namespace:
|
36
|
+
atom: "http://www.w3.org/2005/Atom"
|
37
|
+
|
38
|
+
# Service-specific settings (inherit common settings)
|
39
|
+
service_name: "arXiv"
|
40
|
+
identifier_type: "arXiv ID"
|
41
|
+
supports_batch: true
|
42
|
+
|
43
|
+
medrxiv:
|
44
|
+
# Primary API
|
45
|
+
api_url: "https://api.medrxiv.org/details"
|
46
|
+
|
47
|
+
# PDF Download configuration
|
48
|
+
pdf_base_url: "https://www.medrxiv.org/content/10.1101/"
|
49
|
+
pdf_url_template: "https://www.medrxiv.org/content/{identifier}v{version}.full.pdf"
|
50
|
+
|
51
|
+
# Default values
|
52
|
+
default_version: "1"
|
53
|
+
|
54
|
+
# Service-specific settings
|
55
|
+
service_name: "medRxiv"
|
56
|
+
identifier_type: "DOI"
|
57
|
+
supports_batch: true
|
58
|
+
|
59
|
+
biorxiv:
|
60
|
+
# Primary API
|
61
|
+
api_url: "https://api.biorxiv.org/details"
|
62
|
+
|
63
|
+
# PDF Download configuration
|
64
|
+
pdf_base_url: "https://www.biorxiv.org/content/10.1101/"
|
65
|
+
landing_url_template: "https://www.biorxiv.org/content/{doi}v{version}"
|
66
|
+
pdf_url_template: "https://www.biorxiv.org/content/{doi}v{version}.full.pdf"
|
67
|
+
|
68
|
+
# Default values
|
69
|
+
default_version: "1"
|
70
|
+
|
71
|
+
# Cloudflare-bypass settings
|
72
|
+
cf_clearance_timeout: 30
|
73
|
+
session_reuse: true
|
74
|
+
browser_config:
|
75
|
+
type: "custom" # Used for cloudscraper browser configuration
|
76
|
+
|
77
|
+
# Service-specific settings
|
78
|
+
service_name: "bioRxiv"
|
79
|
+
identifier_type: "DOI"
|
80
|
+
supports_batch: true
|
81
|
+
|
82
|
+
pubmed:
|
83
|
+
# Primary APIs
|
84
|
+
id_converter_url: "https://pmc.ncbi.nlm.nih.gov/tools/idconv/api/v1/articles"
|
85
|
+
oa_api_url: "https://www.ncbi.nlm.nih.gov/pmc/utils/oa/oa.fcgi"
|
86
|
+
|
87
|
+
# Alternative PDF Sources
|
88
|
+
europe_pmc_base_url: "https://europepmc.org/backend/ptpmcrender.fcgi"
|
89
|
+
pmc_page_base_url: "https://www.ncbi.nlm.nih.gov/pmc/articles"
|
90
|
+
direct_pmc_pdf_base_url: "https://pmc.ncbi.nlm.nih.gov/articles"
|
91
|
+
|
92
|
+
# URL Conversion for NCBI FTP links
|
93
|
+
ftp_base_url: "ftp://ftp.ncbi.nlm.nih.gov"
|
94
|
+
https_base_url: "https://ftp.ncbi.nlm.nih.gov"
|
95
|
+
|
96
|
+
# API configuration
|
97
|
+
id_converter_format: "json"
|
98
|
+
|
99
|
+
# Page scraping configuration
|
100
|
+
pdf_meta_name: "citation_pdf_url"
|
101
|
+
|
102
|
+
# Error handling
|
103
|
+
default_error_code: "unknown"
|
104
|
+
|
105
|
+
# PubMed-specific settings
|
106
|
+
service_name: "PubMed"
|
107
|
+
identifier_type: "PMID"
|
108
|
+
supports_batch: true
|
109
|
+
log_response_preview_chars: 500 # chars to log from API responses
|
110
|
+
|
111
|
+
# Global configuration for all services
|
112
|
+
supported_services: ["arxiv", "medrxiv", "biorxiv", "pubmed"]
|
113
|
+
|
114
|
+
# Tool configuration
|
115
|
+
tool:
|
116
|
+
name: "download_papers"
|
117
|
+
description: "Universal paper download tool supporting arXiv, medRxiv, bioRxiv, and PubMed"
|
118
|
+
supported_services: ["arxiv", "medrxiv", "biorxiv", "pubmed"]
|
119
|
+
default_service: "pubmed"
|
120
|
+
|
121
|
+
# Output configuration
|
122
|
+
max_summary_papers: 3
|
123
|
+
include_abstracts_in_summary: true
|
124
|
+
temp_file_cleanup: false # Set to true to auto-cleanup temp files
|