paper-search-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. package/.env.example +165 -0
  2. package/LICENSE +21 -0
  3. package/README-sc.md +642 -0
  4. package/README.md +642 -0
  5. package/dist/cli.d.ts +3 -0
  6. package/dist/cli.d.ts.map +1 -0
  7. package/dist/cli.js +637 -0
  8. package/dist/cli.js.map +1 -0
  9. package/dist/config/ConfigService.d.ts +26 -0
  10. package/dist/config/ConfigService.d.ts.map +1 -0
  11. package/dist/config/ConfigService.js +145 -0
  12. package/dist/config/ConfigService.js.map +1 -0
  13. package/dist/config/constants.d.ts +140 -0
  14. package/dist/config/constants.d.ts.map +1 -0
  15. package/dist/config/constants.js +93 -0
  16. package/dist/config/constants.js.map +1 -0
  17. package/dist/core/diagnostics.d.ts +43 -0
  18. package/dist/core/diagnostics.d.ts.map +1 -0
  19. package/dist/core/diagnostics.js +544 -0
  20. package/dist/core/diagnostics.js.map +1 -0
  21. package/dist/core/handleToolCall.d.ts +8 -0
  22. package/dist/core/handleToolCall.d.ts.map +1 -0
  23. package/dist/core/handleToolCall.js +440 -0
  24. package/dist/core/handleToolCall.js.map +1 -0
  25. package/dist/core/schemas.d.ts +454 -0
  26. package/dist/core/schemas.d.ts.map +1 -0
  27. package/dist/core/schemas.js +322 -0
  28. package/dist/core/schemas.js.map +1 -0
  29. package/dist/core/searchers.d.ts +45 -0
  30. package/dist/core/searchers.d.ts.map +1 -0
  31. package/dist/core/searchers.js +73 -0
  32. package/dist/core/searchers.js.map +1 -0
  33. package/dist/core/tools.d.ts +7 -0
  34. package/dist/core/tools.d.ts.map +1 -0
  35. package/dist/core/tools.js +640 -0
  36. package/dist/core/tools.js.map +1 -0
  37. package/dist/models/Paper.d.ts +64 -0
  38. package/dist/models/Paper.d.ts.map +1 -0
  39. package/dist/models/Paper.js +70 -0
  40. package/dist/models/Paper.js.map +1 -0
  41. package/dist/platforms/ArxivSearcher.d.ts +64 -0
  42. package/dist/platforms/ArxivSearcher.d.ts.map +1 -0
  43. package/dist/platforms/ArxivSearcher.js +531 -0
  44. package/dist/platforms/ArxivSearcher.js.map +1 -0
  45. package/dist/platforms/BioRxivSearcher.d.ts +47 -0
  46. package/dist/platforms/BioRxivSearcher.d.ts.map +1 -0
  47. package/dist/platforms/BioRxivSearcher.js +196 -0
  48. package/dist/platforms/BioRxivSearcher.js.map +1 -0
  49. package/dist/platforms/CORESearcher.d.ts +16 -0
  50. package/dist/platforms/CORESearcher.d.ts.map +1 -0
  51. package/dist/platforms/CORESearcher.js +148 -0
  52. package/dist/platforms/CORESearcher.js.map +1 -0
  53. package/dist/platforms/CrossrefSearcher.d.ts +34 -0
  54. package/dist/platforms/CrossrefSearcher.d.ts.map +1 -0
  55. package/dist/platforms/CrossrefSearcher.js +339 -0
  56. package/dist/platforms/CrossrefSearcher.js.map +1 -0
  57. package/dist/platforms/EuropePMCSearcher.d.ts +20 -0
  58. package/dist/platforms/EuropePMCSearcher.d.ts.map +1 -0
  59. package/dist/platforms/EuropePMCSearcher.js +173 -0
  60. package/dist/platforms/EuropePMCSearcher.js.map +1 -0
  61. package/dist/platforms/GoogleScholarSearcher.d.ts +77 -0
  62. package/dist/platforms/GoogleScholarSearcher.d.ts.map +1 -0
  63. package/dist/platforms/GoogleScholarSearcher.js +262 -0
  64. package/dist/platforms/GoogleScholarSearcher.js.map +1 -0
  65. package/dist/platforms/IACRSearcher.d.ts +51 -0
  66. package/dist/platforms/IACRSearcher.d.ts.map +1 -0
  67. package/dist/platforms/IACRSearcher.js +339 -0
  68. package/dist/platforms/IACRSearcher.js.map +1 -0
  69. package/dist/platforms/OpenAIRESearcher.d.ts +22 -0
  70. package/dist/platforms/OpenAIRESearcher.d.ts.map +1 -0
  71. package/dist/platforms/OpenAIRESearcher.js +223 -0
  72. package/dist/platforms/OpenAIRESearcher.js.map +1 -0
  73. package/dist/platforms/OpenAlexSearcher.d.ts +14 -0
  74. package/dist/platforms/OpenAlexSearcher.d.ts.map +1 -0
  75. package/dist/platforms/OpenAlexSearcher.js +114 -0
  76. package/dist/platforms/OpenAlexSearcher.js.map +1 -0
  77. package/dist/platforms/PMCSearcher.d.ts +20 -0
  78. package/dist/platforms/PMCSearcher.d.ts.map +1 -0
  79. package/dist/platforms/PMCSearcher.js +177 -0
  80. package/dist/platforms/PMCSearcher.js.map +1 -0
  81. package/dist/platforms/PaperSource.d.ts +143 -0
  82. package/dist/platforms/PaperSource.d.ts.map +1 -0
  83. package/dist/platforms/PaperSource.js +125 -0
  84. package/dist/platforms/PaperSource.js.map +1 -0
  85. package/dist/platforms/PubMedSearcher.d.ts +104 -0
  86. package/dist/platforms/PubMedSearcher.d.ts.map +1 -0
  87. package/dist/platforms/PubMedSearcher.js +422 -0
  88. package/dist/platforms/PubMedSearcher.js.map +1 -0
  89. package/dist/platforms/SciHubSearcher.d.ts +66 -0
  90. package/dist/platforms/SciHubSearcher.d.ts.map +1 -0
  91. package/dist/platforms/SciHubSearcher.js +398 -0
  92. package/dist/platforms/SciHubSearcher.js.map +1 -0
  93. package/dist/platforms/ScienceDirectSearcher.d.ts +42 -0
  94. package/dist/platforms/ScienceDirectSearcher.d.ts.map +1 -0
  95. package/dist/platforms/ScienceDirectSearcher.js +326 -0
  96. package/dist/platforms/ScienceDirectSearcher.js.map +1 -0
  97. package/dist/platforms/ScopusSearcher.d.ts +43 -0
  98. package/dist/platforms/ScopusSearcher.d.ts.map +1 -0
  99. package/dist/platforms/ScopusSearcher.js +364 -0
  100. package/dist/platforms/ScopusSearcher.js.map +1 -0
  101. package/dist/platforms/SemanticScholarSearcher.d.ts +96 -0
  102. package/dist/platforms/SemanticScholarSearcher.d.ts.map +1 -0
  103. package/dist/platforms/SemanticScholarSearcher.js +419 -0
  104. package/dist/platforms/SemanticScholarSearcher.js.map +1 -0
  105. package/dist/platforms/SpringerSearcher.d.ts +54 -0
  106. package/dist/platforms/SpringerSearcher.d.ts.map +1 -0
  107. package/dist/platforms/SpringerSearcher.js +407 -0
  108. package/dist/platforms/SpringerSearcher.js.map +1 -0
  109. package/dist/platforms/UnpaywallSearcher.d.ts +18 -0
  110. package/dist/platforms/UnpaywallSearcher.d.ts.map +1 -0
  111. package/dist/platforms/UnpaywallSearcher.js +115 -0
  112. package/dist/platforms/UnpaywallSearcher.js.map +1 -0
  113. package/dist/platforms/WebOfScienceSearcher.d.ts +111 -0
  114. package/dist/platforms/WebOfScienceSearcher.d.ts.map +1 -0
  115. package/dist/platforms/WebOfScienceSearcher.js +500 -0
  116. package/dist/platforms/WebOfScienceSearcher.js.map +1 -0
  117. package/dist/platforms/WileySearcher.d.ts +44 -0
  118. package/dist/platforms/WileySearcher.d.ts.map +1 -0
  119. package/dist/platforms/WileySearcher.js +148 -0
  120. package/dist/platforms/WileySearcher.js.map +1 -0
  121. package/dist/services/CitationService.d.ts +66 -0
  122. package/dist/services/CitationService.d.ts.map +1 -0
  123. package/dist/services/CitationService.js +237 -0
  124. package/dist/services/CitationService.js.map +1 -0
  125. package/dist/services/MultiSourceSearchService.d.ts +19 -0
  126. package/dist/services/MultiSourceSearchService.d.ts.map +1 -0
  127. package/dist/services/MultiSourceSearchService.js +96 -0
  128. package/dist/services/MultiSourceSearchService.js.map +1 -0
  129. package/dist/services/OpenAccessFallbackService.d.ts +20 -0
  130. package/dist/services/OpenAccessFallbackService.d.ts.map +1 -0
  131. package/dist/services/OpenAccessFallbackService.js +124 -0
  132. package/dist/services/OpenAccessFallbackService.js.map +1 -0
  133. package/dist/utils/ErrorHandler.d.ts +99 -0
  134. package/dist/utils/ErrorHandler.d.ts.map +1 -0
  135. package/dist/utils/ErrorHandler.js +266 -0
  136. package/dist/utils/ErrorHandler.js.map +1 -0
  137. package/dist/utils/Logger.d.ts +6 -0
  138. package/dist/utils/Logger.d.ts.map +1 -0
  139. package/dist/utils/Logger.js +26 -0
  140. package/dist/utils/Logger.js.map +1 -0
  141. package/dist/utils/PDFExtractor.d.ts +34 -0
  142. package/dist/utils/PDFExtractor.d.ts.map +1 -0
  143. package/dist/utils/PDFExtractor.js +130 -0
  144. package/dist/utils/PDFExtractor.js.map +1 -0
  145. package/dist/utils/PdfDownload.d.ts +7 -0
  146. package/dist/utils/PdfDownload.d.ts.map +1 -0
  147. package/dist/utils/PdfDownload.js +52 -0
  148. package/dist/utils/PdfDownload.js.map +1 -0
  149. package/dist/utils/QuotaManager.d.ts +32 -0
  150. package/dist/utils/QuotaManager.d.ts.map +1 -0
  151. package/dist/utils/QuotaManager.js +95 -0
  152. package/dist/utils/QuotaManager.js.map +1 -0
  153. package/dist/utils/RateLimiter.d.ts +50 -0
  154. package/dist/utils/RateLimiter.d.ts.map +1 -0
  155. package/dist/utils/RateLimiter.js +121 -0
  156. package/dist/utils/RateLimiter.js.map +1 -0
  157. package/dist/utils/RequestCache.d.ts +26 -0
  158. package/dist/utils/RequestCache.d.ts.map +1 -0
  159. package/dist/utils/RequestCache.js +66 -0
  160. package/dist/utils/RequestCache.js.map +1 -0
  161. package/dist/utils/SecurityUtils.d.ts +80 -0
  162. package/dist/utils/SecurityUtils.d.ts.map +1 -0
  163. package/dist/utils/SecurityUtils.js +357 -0
  164. package/dist/utils/SecurityUtils.js.map +1 -0
  165. package/package.json +111 -0
  166. package/skills/paper-search/SKILL.md +192 -0
package/README.md ADDED
@@ -0,0 +1,642 @@
1
+ # Paper Search CLI
2
+
3
+ [中文](README-sc.md)
4
+
5
+ Paper Search CLI is a standalone Node.js command line tool for searching, validating, and downloading academic papers from multiple scholarly sources. It is designed for direct terminal use, automation scripts, and agent workflows that need a stable command surface with predictable JSON output.
6
+
7
+ It keeps the broad platform coverage, unified paper model, and detailed capability descriptions of the earlier Paper Search implementation, but runs as a normal CLI process. There is no long-running background service to configure, start, or keep alive.
8
+
9
+ ![Node.js](https://img.shields.io/badge/node.js->=18.0.0-green.svg)
10
+ ![TypeScript](https://img.shields.io/badge/typescript-^5.5.3-blue.svg)
11
+ ![License](https://img.shields.io/badge/license-MIT-blue.svg)
12
+ ![Platforms](https://img.shields.io/badge/platforms-20-brightgreen.svg)
13
+ ![Version](https://img.shields.io/badge/version-0.1.0-blue.svg)
14
+
15
+ [Quick Start](#quick-start) · [Configuration](#configuration) · [Agent Skill](#agent-skill) · [Supported Platforms](#supported-platforms) · [Commands](#commands) · [Tool Reference](#tool-reference) · [Troubleshooting](#troubleshooting)
16
+
17
+ ## Design Goals
18
+
19
+ - **Free-first retrieval**: prefer public metadata and open-access full-text routes before restricted or fragile sources.
20
+ - **One command surface**: keep search, status, download, and precise tool calls behind the same executable.
21
+ - **Agent-safe output**: produce predictable JSON that can be parsed without scraping terminal text.
22
+ - **Transparent source behavior**: document which platforms provide metadata only, which can download PDFs, and which need API keys.
23
+ - **No hidden background process**: each command starts, returns a result, and exits.
24
+
25
+ ## Key Features
26
+
27
+ - **20 academic sources/platforms**: Crossref, OpenAlex, PubMed, PubMed Central, Europe PMC, arXiv, bioRxiv, medRxiv, Semantic Scholar, CORE, OpenAIRE, Web of Science, Google Scholar, IACR ePrint, Sci-Hub, ScienceDirect, Springer Nature, Wiley, Scopus, and Unpaywall.
28
+ - **Single command interface**: install once, then call `paper-search` from terminal, scripts, or agents.
29
+ - **JSON-first output**: stdout is machine-readable JSON by default; stderr is reserved for human-readable diagnostics.
30
+ - **Unified paper model**: normalized title, authors, DOI, source, dates, abstract, PDF URL, citation count, and provider-specific metadata where available.
31
+ - **Multi-source search with dedupe**: query selected sources with `--sources crossref,openalex,pmc`, or use curated `platform=all`, then merge duplicates by DOI and title/author keys.
32
+ - **Semantic Scholar body-snippet search**: `search_semantic_snippets` searches Semantic Scholar's Open Access snippet index for body-text snippets, which is useful for finding methodological details. It requires `SEMANTIC_SCHOLAR_API_KEY`.
33
+ - **Open-access-first fallback download**: `download_with_fallback` tries native source download, discovered PDF URLs, PMC/Europe PMC/CORE/OpenAIRE, Unpaywall DOI resolution, then optional Sci-Hub only when explicitly enabled.
34
+ - **Rate limits and retry logic**: platform-specific rate limiting and retryable API error handling.
35
+ - **PDF download support**: download from supported sources such as arXiv, bioRxiv, medRxiv, Semantic Scholar, IACR, Sci-Hub, Springer open access, and Wiley DOI-based access.
36
+ - **Agent-friendly commands**: `tools`, `status`, `search`, `download`, and `run` cover both simple use and precise advanced calls.
37
+
38
+ ## Quick Start
39
+
40
+ ### Install
41
+
42
+ Requires Node.js >= 18.0.0 and npm.
43
+
44
+ ```bash
45
+ npm install -g paper-search-cli
46
+ paper-search setup
47
+ paper-search search "machine learning" --platform crossref --max-results 3 --pretty
48
+ ```
49
+
50
+ Run `paper-search setup` after installation to write optional API keys and emails into the user config.
51
+
52
+ If this repository is still private or the npm package has not been published yet, install from an authenticated checkout instead:
53
+
54
+ ```bash
55
+ git clone git@github.com:dr-dumpling/paper-search-cli.git
56
+ cd paper-search-cli
57
+ npm install
58
+ npm run build
59
+ npm install -g .
60
+ ```
61
+
62
+ ### Common Checks
63
+
64
+ ```bash
65
+ paper-search status --pretty
66
+ paper-search tools --pretty
67
+ paper-search config doctor --pretty
68
+ ```
69
+
70
+ ## Supported Platforms
71
+
72
+ | Platform | Search | Download | Full Text | Citations | API Key | Special Features |
73
+ | --- | --- | --- | --- | --- | --- | --- |
74
+ | Crossref | ✅ | ❌ | ❌ | ✅ | ❌ | Default search platform, broad metadata coverage |
75
+ | OpenAlex | ✅ | 🟡 Conditional | ❌ | ✅ | ❌ | Broad free metadata; can feed fallback downloads when records include OA links |
76
+ | arXiv | ✅ | ✅ | ✅ | ❌ | ❌ | Physics, CS, math, and related preprints |
77
+ | Web of Science | ✅ | ❌ | ❌ | ✅ | ✅ Required | Citation database, date sorting, year ranges |
78
+ | PubMed | ✅ | ❌ | ❌ | ❌ | 🟡 Optional | Biomedical literature through NCBI E-utilities |
79
+ | PubMed Central | ✅ | ✅ | ✅ | ❌ | ❌ | Open biomedical full text and PMC PDFs |
80
+ | Europe PMC | ✅ | ✅ | ✅ | ❌ | ❌ | Biomedical metadata plus open full-text links |
81
+ | Google Scholar | ✅ | ❌ | ❌ | ✅ | ❌ | Broad academic discovery, scrape-based |
82
+ | bioRxiv | ✅ | ✅ | ✅ | ❌ | ❌ | Biology preprints |
83
+ | medRxiv | ✅ | ✅ | ✅ | ❌ | ❌ | Medical preprints |
84
+ | Semantic Scholar | ✅ | ✅ | ✅ Body snippets | ✅ | 🟡 Optional* | AI semantic search + OA body snippets |
85
+ | CORE | ✅ | 🟡 Conditional | 🟡 Conditional | ❌ | 🟡 Optional | Downloads work when records include PDF or full-text links |
86
+ | OpenAIRE | ✅ | 🟡 Conditional | ❌ | ❌ | 🟡 Optional | Can feed fallback downloads when records include open links |
87
+ | Unpaywall | 🟡 Conditional | 🟡 Conditional | ❌ | ❌ | ✅ Required | DOI-only lookup; requires an email; downloads work when an OA PDF is found |
88
+ | IACR ePrint | ✅ | ✅ | ✅ | ❌ | ❌ | Cryptography papers |
89
+ | Sci-Hub | ✅ | ✅ | ❌ | ❌ | ❌ | DOI-based paper lookup and PDF retrieval |
90
+ | ScienceDirect | ✅ | ❌ | ❌ | ✅ | ✅ Required | Elsevier metadata and abstracts |
91
+ | Springer Nature | ✅ | 🟡 Conditional | ❌ | ❌ | ✅ Required | Open-access records can be downloaded; metadata API requires a key |
92
+ | Wiley | ❌ Keyword search | ✅ | ✅ | ❌ | ✅ Required | TDM API, DOI-based PDF download only |
93
+ | Scopus | ✅ | ❌ | ❌ | ✅ | ✅ Required | Abstract and citation database |
94
+
95
+ Notes:
96
+
97
+ - In capability columns, `✅` means directly supported, `❌` means unsupported, and `🟡 Conditional` means support depends on record content or provider constraints, such as DOI-only lookup, available PDF/OA links, or open-access-only downloads.
98
+ - In the API Key column, `❌` means no configuration is needed, `🟡 Optional` means configuration improves limits or stability, and `✅ Required` means the key is required only when you use that platform, not that every new installation should configure it. Unpaywall requires an email rather than a traditional API key.
99
+ - Wiley does not support keyword search through the Wiley TDM API. Use `search_crossref` to find Wiley articles and then use `download_paper` with `platform=wiley` and the DOI.
100
+ - `platform=all` uses a curated fan-out across the more stable free/open/API sources: Crossref, OpenAlex, PubMed, PMC, Europe PMC, arXiv, bioRxiv, medRxiv, IACR, CORE, and OpenAIRE. It intentionally excludes Google Scholar, Sci-Hub, paid-key sources, DOI-only Unpaywall, and rate-limit-prone Semantic Scholar unless requested explicitly.
101
+ - `--sources` accepts a comma-separated source list, for example `--sources crossref,openalex,pmc`.
102
+ - `🟡 Optional*` for Semantic Scholar means optional for regular search; `search_semantic_snippets` body-snippet search requires `SEMANTIC_SCHOLAR_API_KEY`.
103
+
104
+ ## Configuration
105
+
106
+ Most free metadata sources work without configuration. For API keys and emails, prefer the user-level config file so the CLI works from any directory:
107
+
108
+ ```bash
109
+ paper-search setup
110
+ paper-search config set SEMANTIC_SCHOLAR_API_KEY your_semantic_scholar_api_key_here
111
+ paper-search config set PAPER_SEARCH_UNPAYWALL_EMAIL you@example.com
112
+ paper-search config list --pretty
113
+ paper-search config doctor --pretty
114
+ paper-search diagnostics --pretty
115
+ ```
116
+
117
+ The default config path is:
118
+
119
+ ```text
120
+ ~/.config/paper-search-cli/config.json
121
+ ```
122
+
123
+ The file is written with `0600` permissions. `config list` and `config doctor` mask secrets.
124
+
125
+ `paper-search setup` is the guided setup command. By default it asks for the recommended credentials only: Semantic Scholar, Unpaywall email, Crossref email, and CORE. Use `paper-search setup --all` to walk through every supported configuration key, or `paper-search setup --keys SEMANTIC_SCHOLAR_API_KEY,CORE_API_KEY` to configure a specific subset.
126
+
127
+ `paper-search diagnostics --pretty` lists every API-key or email-backed capability, the related config keys, whether the required keys are configured, common failure modes, and suggested next checks. Search commands also add a `diagnostic` field when a key-backed platform returns zero results or an auth/permission/rate-limit error.
128
+
129
+ ### API Key Recommendation
130
+
131
+ `paper-search setup` asks only for the credentials that are most useful for ordinary new users. `✅ Required` in the platform table means "required for that platform", not "recommended for every installation".
132
+
133
+ | Level | Config keys | Recommended for new users | Notes |
134
+ | --- | --- | --- | --- |
135
+ | Default recommended | `SEMANTIC_SCHOLAR_API_KEY` | Yes | Enables Semantic Scholar body-snippet search for methodology details and improves request stability. |
136
+ | Default recommended | `PAPER_SEARCH_UNPAYWALL_EMAIL` or `UNPAYWALL_EMAIL` | Yes | Finds open-access PDFs from DOI records; this only needs an email, not an API key. |
137
+ | Default recommended | `CROSSREF_MAILTO` | Yes | Puts Crossref requests in the polite pool, which is better for long-running or frequent searches. |
138
+ | Default recommended | `CORE_API_KEY` or `PAPER_SEARCH_CORE_API_KEY` | Yes | CORE anonymous access is often rate-limited; a key makes open repository search more reliable. |
139
+ | Biomedical-heavy use | `PUBMED_API_KEY`, `NCBI_EMAIL`, `NCBI_TOOL` | Recommended if you use PubMed heavily | Raises NCBI E-utilities limits and identifies the client. |
140
+ | Institution entitlement | `WOS_API_KEY` | Configure only with Web of Science API access | Enables Web of Science search and citation data; requires Clarivate API entitlement. |
141
+ | Institution entitlement | `ELSEVIER_API_KEY` | Configure only with Scopus or ScienceDirect API access | One Elsevier key does not automatically grant both products; Scopus and ScienceDirect need separate entitlements. |
142
+ | Institution entitlement | `SPRINGER_API_KEY`, `SPRINGER_OPENACCESS_API_KEY` | Configure only when you need Springer | Used for Springer metadata and open-access records; 401 usually means an invalid key or missing product access. |
143
+ | Institution entitlement | `WILEY_TDM_TOKEN` | Configure only with Wiley TDM/institutional full-text access | DOI-based download only; availability depends on the token and institutional subscription. |
144
+ | Usually unnecessary | `PAPER_SEARCH_OPENAIRE_API_KEY` or `OPENAIRE_API_KEY` | Not recommended by default | OpenAIRE public search usually works without a key; configure only for account or quota requirements. |
145
+
146
+ You can also import an existing `.env`:
147
+
148
+ ```bash
149
+ paper-search config import-env .env --pretty
150
+ ```
151
+
152
+ Config priority is:
153
+
154
+ 1. Shell environment variables.
155
+ 2. Current working directory `.env`.
156
+ 3. User config file.
157
+ 4. Built-in defaults for free sources.
158
+
159
+ For repo-local development, copying `.env.example` still works:
160
+
161
+ ```bash
162
+ cp .env.example .env
163
+ ```
164
+
165
+ ### Environment Variables
166
+
167
+ ```bash
168
+ # Web of Science, required for Web of Science search
169
+ WOS_API_KEY=your_web_of_science_api_key_here
170
+ WOS_API_VERSION=v1
171
+
172
+ # PubMed, optional; increases rate limit from 3 requests/sec to 10 requests/sec
173
+ PUBMED_API_KEY=your_ncbi_api_key_here
174
+ NCBI_EMAIL=you@example.com
175
+ NCBI_TOOL=paper-search-cli
176
+
177
+ # Semantic Scholar, required for body-snippet search and useful for higher request limits
178
+ SEMANTIC_SCHOLAR_API_KEY=your_semantic_scholar_api_key_here
179
+
180
+ # Elsevier, required for Scopus and ScienceDirect; each product still needs separate entitlement
181
+ ELSEVIER_API_KEY=your_elsevier_api_key_here
182
+
183
+ # Springer Nature, required for Springer search and open access download
184
+ SPRINGER_API_KEY=your_springer_api_key_here
185
+ SPRINGER_OPENACCESS_API_KEY=your_openaccess_api_key_here
186
+
187
+ # Wiley TDM, required for Wiley DOI-based PDF download
188
+ WILEY_TDM_TOKEN=your_wiley_tdm_token_here
189
+
190
+ # Crossref polite pool, optional but recommended
191
+ CROSSREF_MAILTO=you@example.com
192
+
193
+ # Unpaywall, required for DOI-based OA resolution
194
+ PAPER_SEARCH_UNPAYWALL_EMAIL=you@example.com
195
+ UNPAYWALL_EMAIL=you@example.com
196
+
197
+ # CORE, optional but recommended; anonymous access is often heavily rate-limited
198
+ PAPER_SEARCH_CORE_API_KEY=your_core_api_key_here
199
+ CORE_API_KEY=your_core_api_key_here
200
+
201
+ # OpenAIRE, optional; public search works without a key
202
+ PAPER_SEARCH_OPENAIRE_API_KEY=your_openaire_api_key_here
203
+ OPENAIRE_API_KEY=your_openaire_api_key_here
204
+ ```
205
+
206
+ ### API Key Sources
207
+
208
+ - Web of Science: [Clarivate Developer Portal](https://developer.clarivate.com/apis)
209
+ - PubMed: [NCBI API Keys](https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/)
210
+ - Semantic Scholar: [Semantic Scholar API](https://www.semanticscholar.org/product/api)
211
+ - Elsevier: [Elsevier Developer Portal](https://dev.elsevier.com/apikey/manage)
212
+ - Springer Nature: [Springer Nature Developers](https://dev.springernature.com/)
213
+ - Wiley TDM: [Wiley Text and Data Mining](https://onlinelibrary.wiley.com/library-info/resources/text-and-datamining)
214
+ - Unpaywall: [Unpaywall Data Format and API](https://unpaywall.org/products/api)
215
+ - CORE: [CORE API](https://core.ac.uk/services/api)
216
+ - OpenAIRE: [OpenAIRE APIs](https://develop.openaire.eu/)
217
+
218
+ `.env` is ignored by git. Do not commit API keys or tokens.
219
+
220
+ ## Agent Skill
221
+
222
+ This repository includes an optional agent skill at `skills/paper-search/SKILL.md`. Install it into your agent's skill directory if your agent supports skills.
223
+
224
+ For example:
225
+
226
+ ```bash
227
+ mkdir -p ~/.agents/skills/paper-search
228
+ cp skills/paper-search/SKILL.md ~/.agents/skills/paper-search/SKILL.md
229
+ ```
230
+
231
+ The skill only teaches the agent how to call the `paper-search` CLI. API keys are still configured through `paper-search setup`, `paper-search config`, `.env`, or shell environment variables. Do not store secrets in the skill file.
232
+
233
+ ## Output Contract
234
+
235
+ By default, every command writes JSON to stdout.
236
+
237
+ ```json
238
+ {
239
+ "ok": true,
240
+ "tool": "search_papers",
241
+ "message": "Found 1 papers.",
242
+ "data": []
243
+ }
244
+ ```
245
+
246
+ Use `--pretty` for formatted JSON:
247
+
248
+ ```bash
249
+ paper-search search "machine learning" --platform crossref --max-results 1 --pretty
250
+ ```
251
+
252
+ Use `--format text` if you need the raw text response:
253
+
254
+ ```bash
255
+ paper-search search "machine learning" --platform crossref --max-results 1 --format text
256
+ ```
257
+
258
+ Use `--include-text` to keep the raw response text alongside parsed JSON:
259
+
260
+ ```bash
261
+ paper-search run search_crossref --arg query="machine learning" --arg maxResults=3 --include-text --pretty
262
+ ```
263
+
264
+ ## Commands
265
+
266
+ ### `paper-search search`
267
+
268
+ Unified search entrypoint.
269
+
270
+ ```bash
271
+ paper-search search <query> [options]
272
+ ```
273
+
274
+ Examples:
275
+
276
+ ```bash
277
+ paper-search search "machine learning" --platform crossref --max-results 10 --pretty
278
+ paper-search search "machine learning" --sources crossref,openalex --max-results 2 --pretty
279
+ paper-search search "cancer immunotherapy" --platform all --max-results 2 --pretty
280
+ paper-search search "transformer neural networks" --platform arxiv --category cs.AI --year 2023 --pretty
281
+ paper-search search "COVID-19 vaccine efficacy" --platform pubmed --max-results 20 --year 2023 --pretty
282
+ paper-search search "CRISPR gene editing" --platform webofscience --journal Nature --max-results 15 --pretty
283
+ ```
284
+
285
+ Common options:
286
+
287
+ | Option | Description |
288
+ | --- | --- |
289
+ | `--platform` | Source platform. Default: `crossref` |
290
+ | `--sources` | Comma-separated source list for multi-source search, e.g. `crossref,openalex,pmc` |
291
+ | `--max-results` | Maximum result count |
292
+ | `--year` | Year filter, e.g. `2024`, `2020-2024`, `2020-` |
293
+ | `--author` | Author name filter |
294
+ | `--journal` | Journal name filter |
295
+ | `--category` | Category filter, mainly arXiv/bioRxiv/medRxiv |
296
+ | `--days` | Days back for bioRxiv/medRxiv |
297
+ | `--sort-by` | `relevance`, `date`, or `citations` |
298
+ | `--sort-order` | `asc` or `desc` |
299
+
300
+ ### `paper-search run`
301
+
302
+ Run a specific internal tool by name. This is the most precise command for agent workflows.
303
+
304
+ ```bash
305
+ paper-search run <tool-name> --arg key=value --arg key=value
306
+ paper-search run <tool-name> --json-args '{"key":"value"}'
307
+ paper-search run <tool-name> --json-args @args.json
308
+ ```
309
+
310
+ Examples:
311
+
312
+ ```bash
313
+ paper-search run search_crossref --arg query="machine learning" --arg maxResults=5 --pretty
314
+ paper-search run search_papers --json-args '{"query":"machine learning","sources":"crossref,openalex","maxResults":2}' --pretty
315
+ paper-search run search_pubmed --json-args '{"query":"osteoarthritis","maxResults":5,"sortBy":"date"}' --pretty
316
+ paper-search run get_paper_by_doi --arg doi="10.1038/nature12373" --pretty
317
+ ```
318
+
319
+ ### `paper-search tools`
320
+
321
+ List all available tool names, descriptions, and input schemas.
322
+
323
+ ```bash
324
+ paper-search tools --pretty
325
+ ```
326
+
327
+ ### `paper-search status`
328
+
329
+ Show platform capabilities and API key status. Secrets are never printed.
330
+
331
+ ```bash
332
+ paper-search status --pretty
333
+ paper-search status --validate --pretty
334
+ ```
335
+
336
+ `--validate` may make live provider requests. Use it when you intentionally want credential validation.
337
+
338
+ ### `paper-search diagnostics`
339
+
340
+ Show API-key-backed capabilities and troubleshooting guidance. This does not print secrets.
341
+
342
+ ```bash
343
+ paper-search diagnostics --pretty
344
+ ```
345
+
346
+ When a command returns zero results from a configured key-backed source, or fails with 401, 403, 400, or 429, JSON output includes a `diagnostic` field with likely causes and next actions.
347
+
348
+ ### `paper-search config`
349
+
350
+ Manage the user-level config file.
351
+
352
+ ```bash
353
+ paper-search config init --pretty
354
+ paper-search config set SEMANTIC_SCHOLAR_API_KEY your_key --pretty
355
+ paper-search config set PAPER_SEARCH_UNPAYWALL_EMAIL you@example.com --pretty
356
+ paper-search config import-env .env --pretty
357
+ paper-search config list --pretty
358
+ paper-search config doctor --pretty
359
+ paper-search config path --pretty
360
+ paper-search config keys --pretty
361
+ ```
362
+
363
+ ### `paper-search download`
364
+
365
+ Download a paper PDF through a platform that supports downloads.
366
+
367
+ ```bash
368
+ paper-search download <paper-id-or-doi> --platform <platform> [--save-path ./downloads]
369
+ ```
370
+
371
+ Examples:
372
+
373
+ ```bash
374
+ paper-search download 2301.00001 --platform arxiv --save-path ./downloads
375
+ paper-search download 10.1000/example --platform scihub --save-path ./downloads
376
+ paper-search download 10.1111/jtsb.12390 --platform wiley --save-path ./downloads
377
+ paper-search run download_with_fallback --arg source=arxiv --arg paperId=1201.0490 --arg doi=10.48550/arxiv.1201.0490 --arg savePath=./downloads --pretty
378
+ ```
379
+
380
+ ## Tool Reference
381
+
382
+ These names can be used with `paper-search run`.
383
+
384
+ ### `search_papers`
385
+
386
+ Search across the unified dispatcher.
387
+
388
+ ```bash
389
+ paper-search run search_papers --json-args '{"query":"machine learning","platform":"crossref","maxResults":10,"year":"2023","sortBy":"date"}' --pretty
390
+ ```
391
+
392
+ Supported platforms:
393
+
394
+ ```text
395
+ crossref, arxiv, webofscience, wos, pubmed, biorxiv, medrxiv, semantic,
396
+ iacr, googlescholar, scholar, scihub, sciencedirect, springer, scopus,
397
+ openalex, unpaywall, pmc, europepmc, core, openaire, all
398
+ ```
399
+
400
+ For multi-source search, pass `sources`:
401
+
402
+ ```bash
403
+ paper-search run search_papers --json-args '{"query":"machine learning","sources":"crossref,openalex,pmc","maxResults":2}' --pretty
404
+ ```
405
+
406
+ ### `search_crossref`
407
+
408
+ Search Crossref, the default free metadata source.
409
+
410
+ ```bash
411
+ paper-search run search_crossref --arg query="machine learning" --arg maxResults=10 --arg year=2023 --arg sortBy=relevance --arg sortOrder=desc --pretty
412
+ ```
413
+
414
+ ### `search_arxiv`
415
+
416
+ Search arXiv preprints.
417
+
418
+ ```bash
419
+ paper-search run search_arxiv --arg query="transformer neural networks" --arg maxResults=10 --arg category=cs.AI --arg year=2023 --arg sortBy=date --arg sortOrder=desc --pretty
420
+ ```
421
+
422
+ ### `search_pubmed`
423
+
424
+ Search PubMed/MEDLINE biomedical literature.
425
+
426
+ ```bash
427
+ paper-search run search_pubmed --json-args '{"query":"COVID-19 vaccine efficacy","maxResults":20,"year":"2023","journal":"New England Journal of Medicine","publicationType":["Journal Article","Clinical Trial"],"sortBy":"date"}' --pretty
428
+ ```
429
+
430
+ ### Open Metadata And Full-Text Sources
431
+
432
+ Use these commands for open metadata search, open full-text discovery, and fallback PDF lookup:
433
+
434
+ ```bash
435
+ paper-search run search_openalex --arg query="machine learning" --arg maxResults=3 --pretty
436
+ paper-search run search_unpaywall --arg query="10.48550/arxiv.1201.0490" --pretty
437
+ paper-search run search_pmc --arg query="cancer immunotherapy" --arg maxResults=3 --pretty
438
+ paper-search run search_europepmc --arg query="cancer genomics" --arg maxResults=3 --pretty
439
+ paper-search run search_core --arg query="machine learning" --arg maxResults=3 --pretty
440
+ paper-search run search_openaire --arg query="machine learning" --arg maxResults=3 --pretty
441
+ ```
442
+
443
+ Unpaywall is DOI-only and requires an email. CORE public access may return zero results or rate-limit quickly without an API key.
444
+
445
+ ### `search_webofscience`
446
+
447
+ Search Web of Science. Requires `WOS_API_KEY`.
448
+
449
+ ```bash
450
+ paper-search run search_webofscience --arg query="CRISPR gene editing" --arg maxResults=15 --arg year=2022 --arg journal=Nature --pretty
451
+ ```
452
+
453
+ ### `search_google_scholar`
454
+
455
+ Search Google Scholar.
456
+
457
+ ```bash
458
+ paper-search run search_google_scholar --arg query="deep learning" --arg maxResults=10 --arg yearLow=2020 --arg yearHigh=2024 --pretty
459
+ ```
460
+
461
+ ### `search_biorxiv` and `search_medrxiv`
462
+
463
+ Search preprint servers by recent day window and optional category.
464
+
465
+ ```bash
466
+ paper-search run search_biorxiv --arg query="genomics" --arg maxResults=10 --arg days=30 --pretty
467
+ paper-search run search_medrxiv --arg query="epidemiology" --arg maxResults=10 --arg days=60 --pretty
468
+ ```
469
+
470
+ ### `search_semantic_scholar`
471
+
472
+ Search Semantic Scholar with optional field filters.
473
+
474
+ ```bash
475
+ paper-search run search_semantic_scholar --json-args '{"query":"graph neural networks","maxResults":10,"fieldsOfStudy":["Computer Science"]}' --pretty
476
+ ```
477
+
478
+ ### `search_semantic_snippets`
479
+
480
+ Search Semantic Scholar's Open Access snippet index for body-text snippets that can help locate methodological details. Requires `SEMANTIC_SCHOLAR_API_KEY`.
481
+
482
+ ```bash
483
+ paper-search run search_semantic_snippets --arg query="CMAverse mediation bootstrap confidence interval" --arg limit=5 --arg fieldsOfStudy=Medicine --pretty
484
+ ```
485
+
486
+ ### `search_iacr`
487
+
488
+ Search IACR ePrint Archive.
489
+
490
+ ```bash
491
+ paper-search run search_iacr --arg query="zero knowledge proof" --arg maxResults=10 --arg fetchDetails=true --pretty
492
+ ```
493
+
494
+ ### `search_sciencedirect`
495
+
496
+ Search ScienceDirect. Requires `ELSEVIER_API_KEY`.
497
+
498
+ ```bash
499
+ paper-search run search_sciencedirect --arg query="materials science" --arg maxResults=10 --arg openAccess=true --pretty
500
+ ```
501
+
502
+ ### `search_scopus`
503
+
504
+ Search Scopus. Requires `ELSEVIER_API_KEY`.
505
+
506
+ ```bash
507
+ paper-search run search_scopus --arg query="citation analysis" --arg maxResults=10 --arg documentType=ar --pretty
508
+ ```
509
+
510
+ ### `search_springer`
511
+
512
+ Search Springer Nature. Requires `SPRINGER_API_KEY`.
513
+
514
+ ```bash
515
+ paper-search run search_springer --arg query="machine learning" --arg maxResults=10 --arg type=Journal --arg openAccess=true --pretty
516
+ ```
517
+
518
+ ### `search_scihub`
519
+
520
+ Lookup a DOI or article URL through Sci-Hub and optionally download a PDF.
521
+
522
+ ```bash
523
+ paper-search run search_scihub --arg doiOrUrl="10.1038/nature12373" --arg downloadPdf=false --pretty
524
+ paper-search run search_scihub --arg doiOrUrl="10.1038/nature12373" --arg downloadPdf=true --arg savePath=./downloads --pretty
525
+ ```
526
+
527
+ ### `check_scihub_mirrors`
528
+
529
+ Show Sci-Hub mirror health.
530
+
531
+ ```bash
532
+ paper-search run check_scihub_mirrors --pretty
533
+ paper-search run check_scihub_mirrors --arg forceCheck=true --pretty
534
+ ```
535
+
536
+ ### `get_paper_by_doi`
537
+
538
+ Lookup metadata by DOI.
539
+
540
+ ```bash
541
+ paper-search run get_paper_by_doi --arg doi="10.1038/nature12373" --arg platform=all --pretty
542
+ paper-search run get_paper_by_doi --arg doi="10.1038/nature12373" --arg platform=arxiv --pretty
543
+ ```
544
+
545
+ ### `download_paper`
546
+
547
+ Download PDF files from supported platforms.
548
+
549
+ ```bash
550
+ paper-search run download_paper --arg paperId="2301.00001" --arg platform=arxiv --arg savePath=./downloads --pretty
551
+ ```
552
+
553
+ Supported download platforms:
554
+
555
+ ```text
556
+ arxiv, biorxiv, medrxiv, semantic, iacr, scihub, springer, wiley,
557
+ pmc, europepmc, core
558
+ ```
559
+
560
+ ### `download_with_fallback`
561
+
562
+ Try open-access routes before optional last-resort sources:
563
+
564
+ ```bash
565
+ paper-search run download_with_fallback --arg source=arxiv --arg paperId=1201.0490 --arg doi=10.48550/arxiv.1201.0490 --arg savePath=./downloads --pretty
566
+ paper-search run download_with_fallback --arg source=crossref --arg paperId="10.1038/nature12373" --arg doi="10.1038/nature12373" --arg savePath=./downloads --arg useSciHub=false --pretty
567
+ ```
568
+
569
+ `useSciHub` defaults to `false`; set it to `true` only when you explicitly choose that final fallback.
570
+
571
+ ### `search_wiley`
572
+
573
+ Wiley keyword search is not supported by the Wiley TDM API. Use Crossref first, then download by DOI:
574
+
575
+ ```bash
576
+ paper-search run search_crossref --arg query="site:wiley.com machine learning" --arg maxResults=10 --pretty
577
+ paper-search run download_paper --arg paperId="10.1111/example" --arg platform=wiley --pretty
578
+ ```
579
+
580
+ ### `get_platform_status`
581
+
582
+ Same as `paper-search status`.
583
+
584
+ ```bash
585
+ paper-search run get_platform_status --pretty
586
+ paper-search run get_platform_status --arg validate=true --pretty
587
+ ```
588
+
589
+ ## Troubleshooting
590
+
591
+ ### Command Not Found
592
+
593
+ Run from the project:
594
+
595
+ ```bash
596
+ node dist/cli.js status --pretty
597
+ ```
598
+
599
+ Or register the local command:
600
+
601
+ ```bash
602
+ npm link
603
+ paper-search status --pretty
604
+ ```
605
+
606
+ ### Missing API Key
607
+
608
+ Run:
609
+
610
+ ```bash
611
+ paper-search status --pretty
612
+ ```
613
+
614
+ If a provider shows `missing`, add the relevant key through `paper-search setup`, user config, or `.env`, then rerun the command.
615
+
616
+ For global installs, prefer user config:
617
+
618
+ ```bash
619
+ paper-search setup
620
+ paper-search config set SEMANTIC_SCHOLAR_API_KEY your_key
621
+ paper-search config doctor --pretty
622
+ ```
623
+
624
+ ### Provider Rate Limits
625
+
626
+ Reduce `--max-results`, avoid repeated live validation, and prefer sources with official APIs. PubMed, Semantic Scholar, and CORE support optional keys for better limits. CORE anonymous access can return HTTP 429; configure `PAPER_SEARCH_CORE_API_KEY` when you rely on it.
627
+
628
+ ### JSON Parsing In Scripts
629
+
630
+ Use default JSON output and parse stdout. Human diagnostics are written to stderr.
631
+
632
+ ## Usage Boundaries
633
+
634
+ Some sources may be subject to platform terms, institutional subscriptions, or local law. Use restricted integrations only when you have the appropriate access rights and permission.
635
+
636
+ ## Project Origin
637
+
638
+ This project is a standalone CLI adaptation inspired by [openags/paper-search-mcp](https://github.com/openags/paper-search-mcp). It keeps the paper-search workflow focused on a one-command terminal tool and does not require an MCP runtime.
639
+
640
+ ## License
641
+
642
+ MIT
package/dist/cli.d.ts ADDED
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ export {};
3
+ //# sourceMappingURL=cli.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":""}