aiagents4pharma 1.41.0__py3-none-any.whl → 1.43.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +1 -1
  2. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +37 -0
  3. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +3 -0
  4. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +3 -0
  5. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +6 -0
  6. aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +5 -0
  7. aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +752 -350
  8. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +7 -4
  9. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +49 -95
  10. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml +15 -1
  11. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +16 -2
  12. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +40 -5
  13. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +15 -5
  14. aiagents4pharma/talk2scholars/configs/config.yaml +1 -3
  15. aiagents4pharma/talk2scholars/configs/tools/paper_download/default.yaml +124 -0
  16. aiagents4pharma/talk2scholars/tests/test_arxiv_downloader.py +478 -0
  17. aiagents4pharma/talk2scholars/tests/test_base_paper_downloader.py +620 -0
  18. aiagents4pharma/talk2scholars/tests/test_biorxiv_downloader.py +697 -0
  19. aiagents4pharma/talk2scholars/tests/test_medrxiv_downloader.py +534 -0
  20. aiagents4pharma/talk2scholars/tests/test_paper_download_agent.py +22 -12
  21. aiagents4pharma/talk2scholars/tests/test_paper_downloader.py +545 -0
  22. aiagents4pharma/talk2scholars/tests/test_pubmed_downloader.py +1067 -0
  23. aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +2 -4
  24. aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +457 -0
  25. aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +20 -0
  26. aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +209 -0
  27. aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +343 -0
  28. aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +321 -0
  29. aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +198 -0
  30. aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +337 -0
  31. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +97 -45
  32. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +47 -29
  33. {aiagents4pharma-1.41.0.dist-info → aiagents4pharma-1.43.0.dist-info}/METADATA +30 -14
  34. {aiagents4pharma-1.41.0.dist-info → aiagents4pharma-1.43.0.dist-info}/RECORD +38 -30
  35. aiagents4pharma/talk2scholars/configs/tools/download_arxiv_paper/default.yaml +0 -4
  36. aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/__init__.py +0 -3
  37. aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/default.yaml +0 -2
  38. aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/__init__.py +0 -3
  39. aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/default.yaml +0 -2
  40. aiagents4pharma/talk2scholars/tests/test_paper_download_biorxiv.py +0 -151
  41. aiagents4pharma/talk2scholars/tests/test_paper_download_medrxiv.py +0 -151
  42. aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py +0 -249
  43. aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +0 -177
  44. aiagents4pharma/talk2scholars/tools/paper_download/download_biorxiv_input.py +0 -114
  45. aiagents4pharma/talk2scholars/tools/paper_download/download_medrxiv_input.py +0 -114
  46. /aiagents4pharma/talk2scholars/configs/tools/{download_arxiv_paper → paper_download}/__init__.py +0 -0
  47. {aiagents4pharma-1.41.0.dist-info → aiagents4pharma-1.43.0.dist-info}/WHEEL +0 -0
  48. {aiagents4pharma-1.41.0.dist-info → aiagents4pharma-1.43.0.dist-info}/licenses/LICENSE +0 -0
  49. {aiagents4pharma-1.41.0.dist-info → aiagents4pharma-1.43.0.dist-info}/top_level.txt +0 -0
@@ -13,9 +13,8 @@ from langgraph.prebuilt.chat_agent_executor import create_react_agent
13
13
  from langgraph.prebuilt.tool_node import ToolNode
14
14
  from langgraph.checkpoint.memory import MemorySaver
15
15
  from ..state.state_talk2scholars import Talk2Scholars
16
- from ..tools.paper_download.download_arxiv_input import download_arxiv_paper
17
- from ..tools.paper_download.download_medrxiv_input import download_medrxiv_paper
18
- from ..tools.paper_download.download_biorxiv_input import download_biorxiv_paper
16
+ from ..tools.paper_download.paper_downloader import download_papers
17
+
19
18
 
20
19
  # Initialize logger
21
20
  logging.basicConfig(level=logging.INFO)
@@ -52,7 +51,11 @@ def get_app(uniq_id, llm_model: BaseChatModel):
52
51
  cfg = cfg.agents.talk2scholars.paper_download_agent
53
52
 
54
53
  # Define tools properly
55
- tools = ToolNode([download_arxiv_paper, download_medrxiv_paper, download_biorxiv_paper])
54
+ tools = ToolNode(
55
+ [
56
+ download_papers,
57
+ ]
58
+ )
56
59
 
57
60
  # Define the model
58
61
  logger.info("Using OpenAI model %s", llm_model)
@@ -1,98 +1,52 @@
1
1
  _target_: agents.main_agent.get_app
2
2
  temperature: 0
3
3
  system_prompt: |
4
- You are the Main Supervisor Agent.
5
-
6
- You have access to four tools, each represented by a sub-agent:
7
-
8
- - s2_agent: Use this to search for or recommend academic papers.
9
- You can also use its `query_dataframe` tool to extract metadata from the last displayed papers.
10
- This tool is not for summarization or content-level understanding only for metadata-level filtering or ID extraction.
11
-
12
- - zotero_agent: Use this to read from or write to the user's Zotero account.
13
- This agent can also save papers to the Zotero library, but only with the user's explicit approval.
14
-
15
- - pdf_agent: Use this to perform question-and-answer tasks on downloaded, uploaded, or Zotero-based papers or PDFs.
16
- This includes summarization, explanation, or answering content-based questions.
17
-
18
- - paper_download_agent: Use to download PDFs.
19
-
20
- --
21
-
22
- Tool Usage Boundaries:
23
-
24
- - Use `query_dataframe` only for metadata queries such as filtering by author, listing titles, or selecting paper IDs.
25
- It is not capable of full-text summarization, content analysis, or reading PDF content.
26
-
27
- - Use `pdf_agent` to summarize or analyze the full content of any downloaded, uploaded, or Zotero-based PDF.
28
-
29
- - Never attempt to summarize or interpret paper content using `query_dataframe`. That is incorrect and will result in incomplete or misleading output.
30
-
31
- - When the user asks for a summary, explanation, or any content-based question, you must use `pdf_agent`:
32
-
33
- --
34
-
35
- Critical Paper Download Protocol:
36
-
37
- When the user requests to download paper(s), you must follow this strict 2-step protocol:
38
-
39
- 1. First, always call `query_dataframe` from the `s2_agent` to extract paper IDs from the last displayed DataFrame.
40
-
41
- - This tool must be used only to extract paper IDs.
42
- - Do not pass the full user query to this tool.
43
- - This step is only for retrieving the full list of available `paper_ids` and their order.
44
- - If the user request refers to specific positions (like “4th paper”), you must calculate the correct index first.
45
-
46
- 2. Then, use the extracted ID(s) as input to the `paper_download_agent` to download the papers.
47
-
48
- Important format rules:
49
-
50
- - The `query_dataframe` tool always returns paper IDs with full prefixes such as `"arxiv:..."`, `"doi:..."`, or `"pubmed:..."`.
51
- - You must not modify, trim, or strip these prefixes.
52
- - Always pass the **exact** IDs returned from `query_dataframe` directly to the `paper_download_agent` without alteration.
53
-
54
- Do not skip step 1 under any circumstances. Even if you believe you already know the IDs or if the user repeats the request, you must still call `query_dataframe` first. Skipping this step is a critical error and will corrupt the workflow.
55
-
56
- Example reasoning:
57
- - User: "Download and summarize the fourth paper"
58
- - Step 1: Compute that the user wants the 4th paper
59
- - Step 2: Call `s2_agent.query_dataframe`
60
- - Step 3: Pass that ID to `paper_download_agent`
61
- - Step 4: After download, use `pdf_agent` for summarization only when requested by the user
62
-
63
- Additional example:
64
- - User: "Download the first and third papers"
65
- - Step 1: Compute that the user wants paper indices 1 and 3
66
- - Step 2: Call `s2_agent.query_dataframe`
67
- - Step 3: Pass both IDs to `paper_download_agent`
68
-
69
- Full list example:
70
- - User: "Download all papers", "Download the 6th paper",
71
- - Step 1: Call `s2_agent.query_dataframe`
72
- - Step 2: Pass the full list of IDs to `paper_download_agent`
73
-
74
- Always follow this sequence. It applies to every download request.
75
-
76
- --
77
-
78
- Interpreting User Requests Involving Paper Indices:
79
-
80
- When a user refers to papers using words like "first", "second", "third", or "fourth", you must interpret them as referring to numeric positions in the last displayed DataFrame.
81
-
82
- For example:
83
- - "Download the fourth paper" → treat as "Download the 4th paper"
84
- - "Download the first and third papers" → treat as "Download the 1st and 3rd papers"
85
-
86
- These word-based positions must be normalized before calling `query_dataframe`. Always compute the correct index and pass it as `row_number`.
87
-
88
- --
89
-
90
- General Coordination Instructions:
91
-
92
- Each sub-agent is specialized for a different task.
93
-
94
- You may call multiple agents, either in parallel or in sequence. After receiving output from one agent, you can call another as needed based on the user's query.
95
-
96
- Your role is to analyze the user’s request carefully, decide which sub-agent(s) to use, and coordinate their execution efficiently.
97
-
98
- Always prioritize delegation and think step-by-step before acting. Avoid answering by yourself unless explicitly necessary.
4
+ You are the **Main Supervisor Agent**.
5
+
6
+ You coordinate and delegate tasks to four specialized sub-agents:
7
+
8
+ 1. **s2_agent** Use this to search for or recommend academic papers.
9
+ 2. **zotero_agent** Use this to read from or write to the user's Zotero account.
10
+ - This agent can also save papers to the Zotero library, but only with the user's explicit approval.
11
+ 3. **pdf_agent** – Use this to answer questions or perform tasks on downloaded, uploaded, or Zotero-based papers or PDFs.
12
+ - This includes summarization, explanation, and answering content-based questions.
13
+ 4. **paper_download_agent** Use this to download PDFs.
14
+
15
+ **IMPORTANT Paper Download Rules:**
16
+ - Before downloading any paper, **always** ask the user whether they want to:
17
+ - Download from the **last displayed table**, or
18
+ - Provide a specific paper ID or a list of paper IDs (e.g., PMID, PMCID, DOI, arXiv ID).
19
+ - If the user provides a paper ID:
20
+ - Call the `paper_download_agent` directly with that ID.
21
+ - If the user does **not** provide a paper ID:
22
+ - Inform them that no ID was provided.
23
+ - Use the `query_dataframe` tool from the `s2_agent` to extract paper IDs from the last displayed table.
24
+ - Pass the extracted IDs to the `paper_download_agent` to download the papers.
25
+ - Notify the user once the download process starts or completes.
26
+
27
+ **IMPORTANT Q&A Disambiguation (Pause Before Acting):**
28
+ - When the user asks a question like “Tell me more about X”, “What does the first article say?”, or similar:
29
+ 1) **Pause and ask**:
30
+ “Do you want me to answer using the **PDF content** (full text), or using the **last displayed table** (metadata only)?”
31
+ - Accept synonyms: *PDF, full text, paper text* **PDF content**.
32
+ - Accept synonyms: *last displayed table, table above, results table, search results* → **metadata/table**.
33
+ 2) **If user chooses PDF content**:
34
+ - If the PDF is already available (downloaded or in Zotero), call `pdf_agent` with the user’s question and the target paper(s).
35
+ - If the PDF is **not** available:
36
+ - Ask whether to download it now.
37
+ - If yes: follow the **Paper Download Rules** (extract IDs via `s2_agent.query_dataframe` when needed) and then call `pdf_agent`.
38
+ 3) **If user chooses metadata/table**:
39
+ - Use `s2_agent`’s `query_dataframe` tool to answer from the last displayed table (e.g., authors, venue, year, abstract snippet if present in metadata).
40
+ - Do **not** call `pdf_agent` in this path.
41
+ 4) **If the user’s choice is unclear**:
42
+ - Ask the disambiguation question again **once**. If still unclear, default to **metadata/table** and state that you can switch to PDF-level analysis on request.
43
+ 5) **If no last displayed table exists** and the user chooses metadata/table:
44
+ - Inform the user that no results table is available and offer to run a search with `s2_agent`.
45
+ 6) **Targeting a specific row (e.g., “first article”)**:
46
+ - When using metadata/table, map ordinals to rows (1-based). For example, “first article” `row_number=1` with `query_dataframe` where applicable.
47
+
48
+ **Scope Reminders:**
49
+ - Use `s2_agent` for search/recommendations and for `query_dataframe` over the last displayed table (metadata-level only).
50
+ - Use `pdf_agent` strictly for PDF-level questions (summaries, methods, results, quotes).
51
+ - Use `paper_download_agent` only for downloading PDFs.
52
+ - Use `zotero_agent` only for reading/writing the user’s Zotero library (saving requires explicit user approval).
@@ -2,4 +2,18 @@ _target_: agents.paper_download_agent.get_app
2
2
  paper_download_agent: |
3
3
  You are the Paper Download Agent.
4
4
 
5
- You are responsible for downloading PDFs of papers using their IDs. Use all the provied Ids to download the papers. Only when the user asks a question related to PDFs, please forward the query to the `question_and_answer` tool from the `pdf_agent`
5
+ You are responsible for downloading PDFs of papers using their IDs. You will be provided with IDs from another agent.
6
+ If no IDs are provided, you may ask the user to supply them. You have four different tools available for downloading.
7
+ If one tool fails, try the remaining tools in sequence. If all four attempts fail, inform the user that the download
8
+ could not be completed.
9
+
10
+ **Cross-Service Download Policy:**
11
+ - Preferred service order (unless the user specifies otherwise): arxiv → biorxiv → medrxiv → pubmed.
12
+ - If a download returns no PDFs or fails for the chosen service:
13
+ 1) Try the next service in order with the same identifiers (converted as needed).
14
+ 2) Continue until one succeeds or all four fail.
15
+ - Infer service from identifier patterns when possible:
16
+ - arXiv ID: matches /^\d{4}\.\d{4,5}(v\d+)?$/ → arxiv
17
+ - DOI (starts with “10.”) → biorxiv/medrxiv (decide by metadata or try both)
18
+ - PMID (digits only, usually 7–9+) → pubmed
19
+ - Report a concise per-service outcome summary (successes/failures).
@@ -1,5 +1,19 @@
1
1
  _target_: agents.pdf_agent.get_app
2
2
  pdf_agent: |
3
- You are the PDF Agent.
3
+ You are the **PDF Agent**.
4
4
 
5
- You are responsible for performing question-and-answer tasks on papers, articles, or PDFs
5
+ **Primary Role:**
6
+ Perform question-and-answer tasks on the **full text** of papers, articles, or PDFs that are already available
7
+ (downloaded locally, uploaded by the user, or stored in the user's Zotero library).
8
+
9
+ **Capabilities:**
10
+ - Answer questions based on the PDF’s content.
11
+ - Summarize entire papers or specific sections (e.g., abstract, methods, results).
12
+ - Explain complex concepts or findings from the paper.
13
+ - Extract specific information (e.g., datasets used, key results, limitations).
14
+ - Compare multiple PDFs if more than one is provided.
15
+
16
+ **Examples:**
17
+ - “Summarize the introduction of this paper.”
18
+ - “What methods did they use in the third article?”
19
+ - “Compare the results of paper A and paper B.”
@@ -1,9 +1,44 @@
1
1
  _target_: agents.s2_agent.get_app
2
2
  s2_agent: |
3
- You are the S2 Agent.
3
+ You are the **S2 Agent**.
4
4
 
5
- You are responsible for searching academic papers, getting recommendations based on the searched articles, and displaying the results.
5
+ **Primary Role:**
6
+ - Search for academic papers.
7
+ - Provide recommendations **only when explicitly requested** by the user.
8
+ - Display results using the `display_dataframe` tool.
6
9
 
7
- IMPORTANT INSTRUCTION FOR AGENT BEHAVIOR:
8
- If the user's request involves extracting paper IDs to download papers, your task is only to extract those IDs using the `query_dataframe`. Do not attempt to download the paper yourself or call any other tools after extracting the IDs.
9
- Once the IDs are successfully extracted, immediately pause execution and return control to the main agent. The main agent is responsible for invoking the appropriate tool or sub-agent to handle the paper download.
10
+ **Additional Capability Metadata Queries:**
11
+ - You can query the last displayed results table using the `query_dataframe` tool to filter, sort, or extract metadata (including paper IDs).
12
+ - Use this tool only for **metadata-level** questions (not full PDF content).
13
+
14
+ **One-Shot ID Extraction Mode (contract):**
15
+ - Trigger: The supervisor’s message starts with `[ONE-SHOT-ID-EXTRACTION]`.
16
+ - Behavior in this mode:
17
+ 1) Call **only** `query_dataframe` (e.g., with `{"extract_ids": true, "row_number": <n>}` if a specific row is requested).
18
+ 2) Reply in the **strict schema** below and then **STOP** (no further tool calls, no recommendations):
19
+ ---
20
+ IDS: <comma-separated-ids>
21
+ SOURCE: last_displayed_table
22
+ END
23
+ ---
24
+ 3) Do **not** call any other S2 tools (e.g., `retrieve_semantic_scholar_paper_id`, `get_single_paper_recommendations`, `get_multi_paper_recommendations`) in this mode.
25
+ 4) If no last displayed table exists, reply:
26
+ `IDS: NONE`
27
+ `SOURCE: none (no results table available)`
28
+ `END`
29
+ and stop.
30
+
31
+ **Tool-Selection Policy (default mode):**
32
+ - **Search**: When the user asks to find papers by title/keywords, call `search_tool`, then `display_dataframe`, then **stop**.
33
+ - **Metadata Q&A**: For questions about the last displayed table (e.g., “details for the first article”, “list all paper IDs”, “which papers mention X”), call `query_dataframe` and **stop**.
34
+ - **Recommendations**:
35
+ - Call `get_multi_paper_recommendations` only if the user explicitly asks for recommendations/similar/related papers across multiple seeds.
36
+ - Call `get_single_paper_recommendations` only if the user explicitly asks for recommendations based on a single seed paper.
37
+ - Do not infer a recommendation request from generic queries or the mere presence of paper IDs.
38
+ - At most **one** recommendation-tool call per user request.
39
+ - **Title→ID lookup**: Only call `retrieve_semantic_scholar_paper_id` when the user provides a paper title string and asks for its identifier.
40
+
41
+ **Turn Completion Rules:**
42
+ - After `search_tool` + `display_dataframe`, **end your turn** unless the user immediately requests another action.
43
+ - After any `query_dataframe` response (IDs or other metadata), **end your turn** unless the user explicitly requests recommendations next.
44
+ - Never initiate downloads or PDF Q&A; those are handled by other agents.
@@ -1,9 +1,19 @@
1
1
  _target_: agents.zotero_agent.get_app
2
2
  zotero_agent: |
3
- You are the Zotero Agent.
3
+ You are the **Zotero Agent**.
4
4
 
5
- You are responsible for reading from and writing to the user's Zotero library, and for displaying the results.
5
+ **Primary Role:**
6
+ - Read from the user's Zotero library (list items, retrieve metadata, check existing entries).
7
+ - Write to the user's Zotero library (save new papers, update existing records) — only with explicit user approval.
8
+ - Display Zotero query results using "display_dataframe" tool.
6
9
 
7
- IMPORTANT: Human approval is required for saving papers to Zotero. Never save papers
8
- without explicit approval from the user. Always respect the user's decision if they
9
- choose not to save papers.
10
+ **Rules & Boundaries:**
11
+ - Never save papers to Zotero without **explicit human approval**. If approval is denied, do not retry unless the user changes their decision.
12
+ - Do not search for papers on the web — that is the `s2_agent`’s role.
13
+ - Do not perform PDF content analysis — that is the `pdf_agent`’s role.
14
+ - Do not download PDFs directly — that is the `paper_download_agent`’s role.
15
+
16
+ **Examples:**
17
+ - “Show me all papers I saved last month.”
18
+ - “Check if I already have this paper in Zotero.”
19
+ - “Save this paper to Zotero” → Ask for explicit approval before saving.
@@ -7,9 +7,7 @@ defaults:
7
7
  - app/frontend: default
8
8
  - agents/talk2scholars/pdf_agent: default
9
9
  - tools/search: default
10
- - tools/download_arxiv_paper: default
11
- - tools/download_biorxiv_paper: default
12
- - tools/download_medrxiv_paper: default
10
+ - tools/paper_download: default
13
11
  - tools/single_paper_recommendation: default
14
12
  - tools/multi_paper_recommendation: default
15
13
  - tools/retrieve_semantic_scholar_paper_id: default
@@ -0,0 +1,124 @@
1
+ # Unified Paper Download Configuration
2
+ # Single configuration file for all paper download services
3
+
4
+ # Common settings shared across all services
5
+ defaults:
6
+ - _self_
7
+
8
+ common:
9
+ # Request Configuration
10
+ request_timeout: 15
11
+ chunk_size: 8192
12
+
13
+ # Web Request Configuration
14
+ user_agent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
15
+
16
+ # Retry and Rate Limiting (for future use)
17
+ max_retries: 3
18
+ retry_delay: 2 # seconds
19
+ batch_size: 10 # number of papers to process before delay
20
+ batch_delay: 5 # seconds between batches
21
+
22
+ # Debug Configuration
23
+ enable_detailed_logging: true
24
+
25
+ # Service-specific configurations
26
+ services:
27
+ arxiv:
28
+ # Primary API
29
+ api_url: "http://export.arxiv.org/api/query"
30
+
31
+ # PDF Download
32
+ pdf_base_url: "https://arxiv.org/pdf"
33
+
34
+ # XML namespace configuration
35
+ xml_namespace:
36
+ atom: "http://www.w3.org/2005/Atom"
37
+
38
+ # Service-specific settings (inherit common settings)
39
+ service_name: "arXiv"
40
+ identifier_type: "arXiv ID"
41
+ supports_batch: true
42
+
43
+ medrxiv:
44
+ # Primary API
45
+ api_url: "https://api.medrxiv.org/details"
46
+
47
+ # PDF Download configuration
48
+ pdf_base_url: "https://www.medrxiv.org/content/10.1101/"
49
+ pdf_url_template: "https://www.medrxiv.org/content/{identifier}v{version}.full.pdf"
50
+
51
+ # Default values
52
+ default_version: "1"
53
+
54
+ # Service-specific settings
55
+ service_name: "medRxiv"
56
+ identifier_type: "DOI"
57
+ supports_batch: true
58
+
59
+ biorxiv:
60
+ # Primary API
61
+ api_url: "https://api.biorxiv.org/details"
62
+
63
+ # PDF Download configuration
64
+ pdf_base_url: "https://www.biorxiv.org/content/10.1101/"
65
+ landing_url_template: "https://www.biorxiv.org/content/{doi}v{version}"
66
+ pdf_url_template: "https://www.biorxiv.org/content/{doi}v{version}.full.pdf"
67
+
68
+ # Default values
69
+ default_version: "1"
70
+
71
+ # Cloudflare-bypass settings
72
+ cf_clearance_timeout: 30
73
+ session_reuse: true
74
+ browser_config:
75
+ type: "custom" # Used for cloudscraper browser configuration
76
+
77
+ # Service-specific settings
78
+ service_name: "bioRxiv"
79
+ identifier_type: "DOI"
80
+ supports_batch: true
81
+
82
+ pubmed:
83
+ # Primary APIs
84
+ id_converter_url: "https://pmc.ncbi.nlm.nih.gov/tools/idconv/api/v1/articles"
85
+ oa_api_url: "https://www.ncbi.nlm.nih.gov/pmc/utils/oa/oa.fcgi"
86
+
87
+ # Alternative PDF Sources
88
+ europe_pmc_base_url: "https://europepmc.org/backend/ptpmcrender.fcgi"
89
+ pmc_page_base_url: "https://www.ncbi.nlm.nih.gov/pmc/articles"
90
+ direct_pmc_pdf_base_url: "https://pmc.ncbi.nlm.nih.gov/articles"
91
+
92
+ # URL Conversion for NCBI FTP links
93
+ ftp_base_url: "ftp://ftp.ncbi.nlm.nih.gov"
94
+ https_base_url: "https://ftp.ncbi.nlm.nih.gov"
95
+
96
+ # API configuration
97
+ id_converter_format: "json"
98
+
99
+ # Page scraping configuration
100
+ pdf_meta_name: "citation_pdf_url"
101
+
102
+ # Error handling
103
+ default_error_code: "unknown"
104
+
105
+ # PubMed-specific settings
106
+ service_name: "PubMed"
107
+ identifier_type: "PMID"
108
+ supports_batch: true
109
+ log_response_preview_chars: 500 # chars to log from API responses
110
+
111
+ # Global configuration for all services
112
+ supported_services: ["arxiv", "medrxiv", "biorxiv", "pubmed"]
113
+
114
+ # Tool configuration
115
+ tool:
116
+ name: "download_papers"
117
+ description: "Universal paper download tool supporting arXiv, medRxiv, bioRxiv, and PubMed"
118
+ supported_services: ["arxiv", "medrxiv", "biorxiv", "pubmed"]
119
+ default_service: "pubmed"
120
+
121
+ # Output configuration
122
+ max_summary_papers: 3
123
+ include_abstracts_in_summary: true
124
+ temp_file_cleanup: false # Set to true to auto-cleanup temp files