scholarcli 1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. scholarcli-1.6/LICENSE +21 -0
  2. scholarcli-1.6/PKG-INFO +346 -0
  3. scholarcli-1.6/README.md +309 -0
  4. scholarcli-1.6/pyproject.toml +67 -0
  5. scholarcli-1.6/setup.cfg +4 -0
  6. scholarcli-1.6/src/scholar/__init__.py +21 -0
  7. scholarcli-1.6/src/scholar/__main__.py +6 -0
  8. scholarcli-1.6/src/scholar/cache.py +141 -0
  9. scholarcli-1.6/src/scholar/cli.py +2231 -0
  10. scholarcli-1.6/src/scholar/enrich.py +361 -0
  11. scholarcli-1.6/src/scholar/llm_review.py +1933 -0
  12. scholarcli-1.6/src/scholar/notes.py +591 -0
  13. scholarcli-1.6/src/scholar/pdf.py +482 -0
  14. scholarcli-1.6/src/scholar/providers.py +2539 -0
  15. scholarcli-1.6/src/scholar/review.py +1459 -0
  16. scholarcli-1.6/src/scholar/scholar.py +712 -0
  17. scholarcli-1.6/src/scholar/tui.py +1859 -0
  18. scholarcli-1.6/src/scholar/utils.py +72 -0
  19. scholarcli-1.6/src/scholarcli.egg-info/PKG-INFO +346 -0
  20. scholarcli-1.6/src/scholarcli.egg-info/SOURCES.txt +74 -0
  21. scholarcli-1.6/src/scholarcli.egg-info/dependency_links.txt +1 -0
  22. scholarcli-1.6/src/scholarcli.egg-info/entry_points.txt +2 -0
  23. scholarcli-1.6/src/scholarcli.egg-info/requires.txt +26 -0
  24. scholarcli-1.6/src/scholarcli.egg-info/top_level.txt +3 -0
  25. scholarcli-1.6/src/snowball/__init__.py +195 -0
  26. scholarcli-1.6/src/snowball/apis/__init__.py +0 -0
  27. scholarcli-1.6/src/snowball/apis/aggregator.py +296 -0
  28. scholarcli-1.6/src/snowball/apis/arxiv.py +233 -0
  29. scholarcli-1.6/src/snowball/apis/base.py +84 -0
  30. scholarcli-1.6/src/snowball/apis/crossref.py +203 -0
  31. scholarcli-1.6/src/snowball/apis/google_scholar.py +259 -0
  32. scholarcli-1.6/src/snowball/apis/openalex.py +299 -0
  33. scholarcli-1.6/src/snowball/apis/opencitations.py +288 -0
  34. scholarcli-1.6/src/snowball/apis/semantic_scholar.py +320 -0
  35. scholarcli-1.6/src/snowball/cli.py +1127 -0
  36. scholarcli-1.6/src/snowball/exporters/__init__.py +0 -0
  37. scholarcli-1.6/src/snowball/exporters/bibtex.py +153 -0
  38. scholarcli-1.6/src/snowball/exporters/csv_exporter.py +139 -0
  39. scholarcli-1.6/src/snowball/exporters/tikz.py +224 -0
  40. scholarcli-1.6/src/snowball/filters/__init__.py +0 -0
  41. scholarcli-1.6/src/snowball/filters/filter_engine.py +166 -0
  42. scholarcli-1.6/src/snowball/models.py +152 -0
  43. scholarcli-1.6/src/snowball/paper_utils.py +612 -0
  44. scholarcli-1.6/src/snowball/parsers/__init__.py +0 -0
  45. scholarcli-1.6/src/snowball/parsers/pdf_parser.py +371 -0
  46. scholarcli-1.6/src/snowball/scoring/__init__.py +30 -0
  47. scholarcli-1.6/src/snowball/scoring/base.py +47 -0
  48. scholarcli-1.6/src/snowball/scoring/llm_scorer.py +170 -0
  49. scholarcli-1.6/src/snowball/scoring/tfidf_scorer.py +147 -0
  50. scholarcli-1.6/src/snowball/snowballing.py +623 -0
  51. scholarcli-1.6/src/snowball/storage/__init__.py +0 -0
  52. scholarcli-1.6/src/snowball/storage/json_storage.py +287 -0
  53. scholarcli-1.6/src/snowball/tui/__init__.py +0 -0
  54. scholarcli-1.6/src/snowball/tui/app.py +1925 -0
  55. scholarcli-1.6/src/snowball/visualization.py +290 -0
  56. scholarcli-1.6/src/tuxedo/__init__.py +136 -0
  57. scholarcli-1.6/src/tuxedo/analysis.py +265 -0
  58. scholarcli-1.6/src/tuxedo/cli.py +1402 -0
  59. scholarcli-1.6/src/tuxedo/clustering.py +703 -0
  60. scholarcli-1.6/src/tuxedo/database.py +650 -0
  61. scholarcli-1.6/src/tuxedo/grobid.py +519 -0
  62. scholarcli-1.6/src/tuxedo/logging.py +118 -0
  63. scholarcli-1.6/src/tuxedo/models.py +127 -0
  64. scholarcli-1.6/src/tuxedo/project.py +288 -0
  65. scholarcli-1.6/src/tuxedo/tui.py +2836 -0
  66. scholarcli-1.6/tests/test_cache.py +98 -0
  67. scholarcli-1.6/tests/test_cli.py +455 -0
  68. scholarcli-1.6/tests/test_enrich.py +378 -0
  69. scholarcli-1.6/tests/test_llm_review.py +1018 -0
  70. scholarcli-1.6/tests/test_notes.py +450 -0
  71. scholarcli-1.6/tests/test_pdf.py +279 -0
  72. scholarcli-1.6/tests/test_providers.py +2084 -0
  73. scholarcli-1.6/tests/test_review.py +1122 -0
  74. scholarcli-1.6/tests/test_scholar.py +696 -0
  75. scholarcli-1.6/tests/test_tui.py +11 -0
  76. scholarcli-1.6/tests/test_utils.py +75 -0
scholarcli-1.6/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025--2026 Daniel Bosk
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,346 @@
1
+ Metadata-Version: 2.4
2
+ Name: scholarcli
3
+ Version: 1.6
4
+ Summary: A tool for structured literature searches across bibliographic databases
5
+ Author-email: Daniel Bosk <dbosk@kth.se>, Ric Glassey <glassey@kth.se>
6
+ License-Expression: MIT
7
+ Requires-Python: >=3.12
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: requests>=2.32.5
11
+ Requires-Dist: typer>=0.21.0
12
+ Requires-Dist: rich>=14.2.0
13
+ Requires-Dist: pyalex>=0.19
14
+ Requires-Dist: arxiv>=2.1.0
15
+ Requires-Dist: cachetools>=6.2.4
16
+ Requires-Dist: platformdirs>=4.5.1
17
+ Requires-Dist: textual>=6.11.0
18
+ Requires-Dist: pypandoc>=1.14
19
+ Requires-Dist: click>=8.0.0
20
+ Requires-Dist: llm>=0.19
21
+ Requires-Dist: llm-openai-plugin>=0.7
22
+ Requires-Dist: llm-gpt4all>=0.4
23
+ Requires-Dist: llm-azure>=2.1
24
+ Requires-Dist: llm-anthropic>=0.23
25
+ Requires-Dist: llm-gemini>=0.28.2
26
+ Requires-Dist: pypdfium2>=4.26.0
27
+ Requires-Dist: bibtexparser>=1.4.0
28
+ Requires-Dist: pandas>=2.0.0
29
+ Requires-Dist: pydantic>=2.0.0
30
+ Requires-Dist: python-dotenv>=1.0.0
31
+ Requires-Dist: httpx>=0.25.0
32
+ Requires-Dist: grobid-client-python>=0.1.0
33
+ Requires-Dist: scholarly>=1.7.0
34
+ Requires-Dist: openai>=1.0.0
35
+ Requires-Dist: pyyaml>=6.0
36
+ Dynamic: license-file
37
+
38
+ # Scholar
39
+
40
+ A command-line tool for conducting structured literature searches across multiple academic databases, with built-in support for systematic literature reviews.
41
+
42
+ ## Features
43
+
44
+ ### Multi-Database Search
45
+
46
+ Search across six academic databases with a single query:
47
+
48
+ - **Semantic Scholar** - AI-powered research database with 200M+ papers
49
+ - **OpenAlex** - Open catalog of 250M+ scholarly works
50
+ - **DBLP** - Computer science bibliography
51
+ - **Web of Science** - Comprehensive citation index (requires API key)
52
+ - **IEEE Xplore** - IEEE technical literature (requires API key)
53
+ - **arXiv** - Preprints (no API key)
54
+
55
+ ```bash
56
+ # Search specific providers
57
+ scholar search "federated learning" -p semantic_scholar -p openalex
58
+
59
+ # Start from a research question (LLM generates provider-specific queries)
60
+ scholar rq "How can privacy-preserving ML be evaluated?" \
61
+ --provider openalex --provider dblp \
62
+ --count 20
63
+ ```
64
+
65
+ ### Interactive Review Interface
66
+
67
+ Review search results in a terminal-based interface with vim-style navigation:
68
+
69
+ ```bash
70
+ scholar search "neural networks" --review
71
+ ```
72
+
73
+ The TUI supports:
74
+ - **Keep/Discard decisions** with mandatory motivations for discards
75
+ - **Theme tagging** for organizing kept papers
76
+ - **Note-taking** with your preferred editor
77
+ - **PDF viewing** with automatic download and caching
78
+ - **Abstract enrichment** for papers missing abstracts
79
+ - **LLM-assisted classification** to help review large result sets
80
+ - **Sorting and filtering** by various criteria
81
+
82
+ ### Output Formats
83
+
84
+ Export results in multiple formats:
85
+
86
+ ```bash
87
+ # Pretty table (default for terminal)
88
+ scholar search "query"
89
+
90
+ # Machine-readable formats
91
+ scholar search "query" -f json
92
+ scholar search "query" -f csv
93
+ scholar search "query" -f bibtex
94
+ ```
95
+
96
+ ### Session Management
97
+
98
+ Save and resume review sessions:
99
+
100
+ ```bash
101
+ # List saved sessions
102
+ scholar sessions list
103
+
104
+ # Resume a session
105
+ scholar sessions resume "machine learning"
106
+
107
+ # Export session to reports
108
+ scholar sessions export "machine learning" -f all
109
+ ```
110
+
111
+ ### Paper Notes
112
+
113
+ Manage notes across all reviewed papers:
114
+
115
+ ```bash
116
+ # Browse papers with notes
117
+ scholar notes
118
+
119
+ # List papers with notes
120
+ scholar notes list
121
+
122
+ # Export/import notes
123
+ scholar notes export notes.json
124
+ scholar notes import notes.json
125
+ ```
126
+
127
+ ### Caching
128
+
129
+ Search results are cached to avoid redundant API calls:
130
+
131
+ ```bash
132
+ scholar cache info # Show cache statistics
133
+ scholar cache clear # Delete cached results
134
+ scholar cache path # Print cache directory
135
+ ```
136
+
137
+ PDF downloads are also cached for offline viewing.
138
+
139
+ ## Quickstart
140
+
141
+ ### Install
142
+
143
+ ```bash
144
+ pipx install scholarcli
145
+ ```
146
+
147
+ ### Configure LLM access (optional, for `scholar rq` and LLM-assisted review)
148
+
149
+ Scholar uses the [`llm`](https://llm.datasette.io/) package for model selection
150
+ and API key configuration.
151
+
152
+ If you want to configure it via the `llm` CLI, install it as well (or install
153
+ `scholarcli` with `pipx --include-deps` so the dependency CLIs are exposed):
154
+
155
+ ```bash
156
+ pipx install llm
157
+ # Or: pipx install --include-deps scholarcli
158
+ ```
159
+
160
+ Then configure at least one provider (examples):
161
+
162
+ ```bash
163
+ llm install llm-openai-plugin
164
+ llm keys set openai
165
+
166
+ # Or:
167
+ llm install llm-anthropic
168
+ llm keys set anthropic
169
+ ```
170
+
171
+ Set a default model for Scholar to use:
172
+
173
+ ```bash
174
+ llm models
175
+ llm models default gpt-4o-mini
176
+ ```
177
+
178
+ ### First run
179
+
180
+ ```bash
181
+ # Search directly
182
+ scholar search "machine learning privacy"
183
+
184
+ # Start from a research question (LLM generates provider-specific queries)
185
+ scholar rq "How do LLMs support novice programming?" --count 20
186
+ ```
187
+
188
+ ## Installation
189
+
190
+ If you don't use `pipx`, you can install with `pip`:
191
+
192
+ ```bash
193
+ pip install scholarcli
194
+ ```
195
+
196
+ Or with [uv](https://github.com/astral-sh/uv):
197
+
198
+ ```bash
199
+ uv pip install scholarcli
200
+ ```
201
+
202
+ ## Configuration
203
+
204
+ Some providers require API keys set as environment variables:
205
+
206
+ | Provider | Environment Variable | Required | How to Get |
207
+ |----------|---------------------|----------|------------|
208
+ | Semantic Scholar | `S2_API_KEY` | No | [api.semanticscholar.org](https://api.semanticscholar.org) |
209
+ | OpenAlex | `OPENALEX_EMAIL` | No | Any email (for polite pool) |
210
+ | DBLP | - | No | No key needed |
211
+ | Web of Science | `WOS_API_KEY` | Yes | [developer.clarivate.com](https://developer.clarivate.com) |
212
+ | IEEE Xplore | `IEEE_API_KEY` | Yes | [developer.ieee.org](https://developer.ieee.org) |
213
+
214
+ View provider status:
215
+
216
+ ```bash
217
+ scholar providers
218
+ ```
219
+
220
+ ## Usage Examples
221
+
222
+ ### Basic Search
223
+
224
+ ```bash
225
+ # Search with default providers (Semantic Scholar, OpenAlex, DBLP)
226
+ scholar search "differential privacy"
227
+
228
+ # Limit results per provider (default: 1000)
229
+ scholar search "blockchain" -l 50
230
+
231
+ # Unlimited results per provider
232
+ scholar search "blockchain" -l 0
233
+ ```
234
+
235
+ ### Systematic Review Workflow
236
+
237
+ ```bash
238
+ # 1. Search and review interactively
239
+ scholar search "privacy-preserving machine learning" --review --name "privacy-ml-review"
240
+
241
+ # 2. Add more searches to the same session
242
+ scholar search "federated learning privacy" --review --name "privacy-ml-review"
243
+
244
+ # 3. Resume reviewing later
245
+ scholar sessions resume "privacy-ml-review"
246
+
247
+ # 4. Generate reports
248
+ scholar sessions export "privacy-ml-review" -f all
249
+ ```
250
+
251
+ ### Enriching Results
252
+
253
+ Some providers (like DBLP) don't include abstracts. Fetch them from other sources:
254
+
255
+ ```bash
256
+ # Enrich during search
257
+ scholar search "query" --enrich
258
+
259
+ # Enrich an existing session
260
+ scholar enrich "session-name"
261
+ ```
262
+
263
+ ### PDF Management
264
+
265
+ ```bash
266
+ # Download and open a PDF
267
+ scholar pdf open "https://arxiv.org/pdf/2301.00001.pdf"
268
+
269
+ # View PDF cache
270
+ scholar pdf info
271
+ scholar pdf clear
272
+ ```
273
+
274
+ ## Keybindings (Review TUI)
275
+
276
+ | Key | Action |
277
+ |-----|--------|
278
+ | `j`/`k` | Navigate up/down |
279
+ | `Enter` | View paper details |
280
+ | `K` | Keep paper (quick) |
281
+ | `T` | Keep with themes |
282
+ | `d` | Discard (requires motivation) |
283
+ | `n` | Edit notes |
284
+ | `p` | Open PDF |
285
+ | `e` | Enrich (fetch abstract) |
286
+ | `L` | LLM-assisted classification |
287
+ | `s` | Sort papers |
288
+ | `f` | Filter by status |
289
+ | `q` | Quit |
290
+
291
+ ## LLM-Assisted Review
292
+
293
+ For large result sets, Scholar can use LLMs to assist with paper classification:
294
+
295
+ ```bash
296
+ # In the TUI, press 'L' to invoke LLM classification
297
+ # Or use the CLI command directly
298
+ scholar llm-review "session-name" --count 10
299
+ ```
300
+
301
+ ### How It Works
302
+
303
+ 1. **Tag some papers manually** - The LLM needs examples to learn from. Review at least 5 papers with tags (themes for kept, motivations for discarded).
304
+
305
+ 2. **Set research context** (optional) - Describe your review's focus to help the LLM understand relevance criteria.
306
+
307
+ 3. **Invoke LLM classification** - The LLM classifies pending papers based on your examples, returning confidence scores.
308
+
309
+ 4. **Review LLM decisions** - Prioritize low-confidence classifications. Accept correct ones, correct wrong ones.
310
+
311
+ 5. **Iterate** - Corrections become training examples for the next round.
312
+
313
+ ### Requirements
314
+
315
+ Install and configure the `llm` command (Scholar uses `llm`'s configuration and
316
+ default model):
317
+
318
+ ```bash
319
+ pipx install llm
320
+
321
+ llm install llm-openai-plugin
322
+ llm keys set openai
323
+
324
+ # Pick a default model (used by `scholar rq` and `scholar llm-review`)
325
+ llm models
326
+ llm models default gpt-4o-mini
327
+ ```
328
+
329
+ If you installed Scholar with `pipx install scholarcli` and want the `llm` CLI
330
+ available from that same environment, you can alternatively install Scholar
331
+ with `pipx install --include-deps scholarcli`.
332
+
333
+ The LLM integration supports models available through Simon Willison's `llm`
334
+ package (OpenAI, Anthropic, local models, etc.).
335
+
336
+ Note: `scholar llm-review` learns from your existing labeled examples (typically
337
+ ~5 tagged papers). `scholar rq` can start without examples by using the research
338
+ question as context.
339
+
340
+ ## Documentation
341
+
342
+ Full documentation is available in the `doc/` directory as a literate program combining documentation and implementation.
343
+
344
+ ## License
345
+
346
+ MIT License - see [LICENSE](LICENSE) for details.
@@ -0,0 +1,309 @@
1
+ # Scholar
2
+
3
+ A command-line tool for conducting structured literature searches across multiple academic databases, with built-in support for systematic literature reviews.
4
+
5
+ ## Features
6
+
7
+ ### Multi-Database Search
8
+
9
+ Search across six academic databases with a single query:
10
+
11
+ - **Semantic Scholar** - AI-powered research database with 200M+ papers
12
+ - **OpenAlex** - Open catalog of 250M+ scholarly works
13
+ - **DBLP** - Computer science bibliography
14
+ - **Web of Science** - Comprehensive citation index (requires API key)
15
+ - **IEEE Xplore** - IEEE technical literature (requires API key)
16
+ - **arXiv** - Preprints (no API key)
17
+
18
+ ```bash
19
+ # Search specific providers
20
+ scholar search "federated learning" -p semantic_scholar -p openalex
21
+
22
+ # Start from a research question (LLM generates provider-specific queries)
23
+ scholar rq "How can privacy-preserving ML be evaluated?" \
24
+ --provider openalex --provider dblp \
25
+ --count 20
26
+ ```
27
+
28
+ ### Interactive Review Interface
29
+
30
+ Review search results in a terminal-based interface with vim-style navigation:
31
+
32
+ ```bash
33
+ scholar search "neural networks" --review
34
+ ```
35
+
36
+ The TUI supports:
37
+ - **Keep/Discard decisions** with mandatory motivations for discards
38
+ - **Theme tagging** for organizing kept papers
39
+ - **Note-taking** with your preferred editor
40
+ - **PDF viewing** with automatic download and caching
41
+ - **Abstract enrichment** for papers missing abstracts
42
+ - **LLM-assisted classification** to help review large result sets
43
+ - **Sorting and filtering** by various criteria
44
+
45
+ ### Output Formats
46
+
47
+ Export results in multiple formats:
48
+
49
+ ```bash
50
+ # Pretty table (default for terminal)
51
+ scholar search "query"
52
+
53
+ # Machine-readable formats
54
+ scholar search "query" -f json
55
+ scholar search "query" -f csv
56
+ scholar search "query" -f bibtex
57
+ ```
58
+
59
+ ### Session Management
60
+
61
+ Save and resume review sessions:
62
+
63
+ ```bash
64
+ # List saved sessions
65
+ scholar sessions list
66
+
67
+ # Resume a session
68
+ scholar sessions resume "machine learning"
69
+
70
+ # Export session to reports
71
+ scholar sessions export "machine learning" -f all
72
+ ```
73
+
74
+ ### Paper Notes
75
+
76
+ Manage notes across all reviewed papers:
77
+
78
+ ```bash
79
+ # Browse papers with notes
80
+ scholar notes
81
+
82
+ # List papers with notes
83
+ scholar notes list
84
+
85
+ # Export/import notes
86
+ scholar notes export notes.json
87
+ scholar notes import notes.json
88
+ ```
89
+
90
+ ### Caching
91
+
92
+ Search results are cached to avoid redundant API calls:
93
+
94
+ ```bash
95
+ scholar cache info # Show cache statistics
96
+ scholar cache clear # Delete cached results
97
+ scholar cache path # Print cache directory
98
+ ```
99
+
100
+ PDF downloads are also cached for offline viewing.
101
+
102
+ ## Quickstart
103
+
104
+ ### Install
105
+
106
+ ```bash
107
+ pipx install scholarcli
108
+ ```
109
+
110
+ ### Configure LLM access (optional, for `scholar rq` and LLM-assisted review)
111
+
112
+ Scholar uses the [`llm`](https://llm.datasette.io/) package for model selection
113
+ and API key configuration.
114
+
115
+ If you want to configure it via the `llm` CLI, install it as well (or install
116
+ `scholarcli` with `pipx --include-deps` so the dependency CLIs are exposed):
117
+
118
+ ```bash
119
+ pipx install llm
120
+ # Or: pipx install --include-deps scholarcli
121
+ ```
122
+
123
+ Then configure at least one provider (examples):
124
+
125
+ ```bash
126
+ llm install llm-openai-plugin
127
+ llm keys set openai
128
+
129
+ # Or:
130
+ llm install llm-anthropic
131
+ llm keys set anthropic
132
+ ```
133
+
134
+ Set a default model for Scholar to use:
135
+
136
+ ```bash
137
+ llm models
138
+ llm models default gpt-4o-mini
139
+ ```
140
+
141
+ ### First run
142
+
143
+ ```bash
144
+ # Search directly
145
+ scholar search "machine learning privacy"
146
+
147
+ # Start from a research question (LLM generates provider-specific queries)
148
+ scholar rq "How do LLMs support novice programming?" --count 20
149
+ ```
150
+
151
+ ## Installation
152
+
153
+ If you don't use `pipx`, you can install with `pip`:
154
+
155
+ ```bash
156
+ pip install scholarcli
157
+ ```
158
+
159
+ Or with [uv](https://github.com/astral-sh/uv):
160
+
161
+ ```bash
162
+ uv pip install scholarcli
163
+ ```
164
+
165
+ ## Configuration
166
+
167
+ Some providers require API keys set as environment variables:
168
+
169
+ | Provider | Environment Variable | Required | How to Get |
170
+ |----------|---------------------|----------|------------|
171
+ | Semantic Scholar | `S2_API_KEY` | No | [api.semanticscholar.org](https://api.semanticscholar.org) |
172
+ | OpenAlex | `OPENALEX_EMAIL` | No | Any email (for polite pool) |
173
+ | DBLP | - | No | No key needed |
174
+ | Web of Science | `WOS_API_KEY` | Yes | [developer.clarivate.com](https://developer.clarivate.com) |
175
+ | IEEE Xplore | `IEEE_API_KEY` | Yes | [developer.ieee.org](https://developer.ieee.org) |
176
+
177
+ View provider status:
178
+
179
+ ```bash
180
+ scholar providers
181
+ ```
182
+
183
+ ## Usage Examples
184
+
185
+ ### Basic Search
186
+
187
+ ```bash
188
+ # Search with default providers (Semantic Scholar, OpenAlex, DBLP)
189
+ scholar search "differential privacy"
190
+
191
+ # Limit results per provider (default: 1000)
192
+ scholar search "blockchain" -l 50
193
+
194
+ # Unlimited results per provider
195
+ scholar search "blockchain" -l 0
196
+ ```
197
+
198
+ ### Systematic Review Workflow
199
+
200
+ ```bash
201
+ # 1. Search and review interactively
202
+ scholar search "privacy-preserving machine learning" --review --name "privacy-ml-review"
203
+
204
+ # 2. Add more searches to the same session
205
+ scholar search "federated learning privacy" --review --name "privacy-ml-review"
206
+
207
+ # 3. Resume reviewing later
208
+ scholar sessions resume "privacy-ml-review"
209
+
210
+ # 4. Generate reports
211
+ scholar sessions export "privacy-ml-review" -f all
212
+ ```
213
+
214
+ ### Enriching Results
215
+
216
+ Some providers (like DBLP) don't include abstracts. Fetch them from other sources:
217
+
218
+ ```bash
219
+ # Enrich during search
220
+ scholar search "query" --enrich
221
+
222
+ # Enrich an existing session
223
+ scholar enrich "session-name"
224
+ ```
225
+
226
+ ### PDF Management
227
+
228
+ ```bash
229
+ # Download and open a PDF
230
+ scholar pdf open "https://arxiv.org/pdf/2301.00001.pdf"
231
+
232
+ # View PDF cache
233
+ scholar pdf info
234
+ scholar pdf clear
235
+ ```
236
+
237
+ ## Keybindings (Review TUI)
238
+
239
+ | Key | Action |
240
+ |-----|--------|
241
+ | `j`/`k` | Navigate up/down |
242
+ | `Enter` | View paper details |
243
+ | `K` | Keep paper (quick) |
244
+ | `T` | Keep with themes |
245
+ | `d` | Discard (requires motivation) |
246
+ | `n` | Edit notes |
247
+ | `p` | Open PDF |
248
+ | `e` | Enrich (fetch abstract) |
249
+ | `L` | LLM-assisted classification |
250
+ | `s` | Sort papers |
251
+ | `f` | Filter by status |
252
+ | `q` | Quit |
253
+
254
+ ## LLM-Assisted Review
255
+
256
+ For large result sets, Scholar can use LLMs to assist with paper classification:
257
+
258
+ ```bash
259
+ # In the TUI, press 'L' to invoke LLM classification
260
+ # Or use the CLI command directly
261
+ scholar llm-review "session-name" --count 10
262
+ ```
263
+
264
+ ### How It Works
265
+
266
+ 1. **Tag some papers manually** - The LLM needs examples to learn from. Review at least 5 papers with tags (themes for kept, motivations for discarded).
267
+
268
+ 2. **Set research context** (optional) - Describe your review's focus to help the LLM understand relevance criteria.
269
+
270
+ 3. **Invoke LLM classification** - The LLM classifies pending papers based on your examples, returning confidence scores.
271
+
272
+ 4. **Review LLM decisions** - Prioritize low-confidence classifications. Accept correct ones, correct wrong ones.
273
+
274
+ 5. **Iterate** - Corrections become training examples for the next round.
275
+
276
+ ### Requirements
277
+
278
+ Install and configure the `llm` command (Scholar uses `llm`'s configuration and
279
+ default model):
280
+
281
+ ```bash
282
+ pipx install llm
283
+
284
+ llm install llm-openai-plugin
285
+ llm keys set openai
286
+
287
+ # Pick a default model (used by `scholar rq` and `scholar llm-review`)
288
+ llm models
289
+ llm models default gpt-4o-mini
290
+ ```
291
+
292
+ If you installed Scholar with `pipx install scholarcli` and want the `llm` CLI
293
+ available from that same environment, you can alternatively install Scholar
294
+ with `pipx install --include-deps scholarcli`.
295
+
296
+ The LLM integration supports models available through Simon Willison's `llm`
297
+ package (OpenAI, Anthropic, local models, etc.).
298
+
299
+ Note: `scholar llm-review` learns from your existing labeled examples (typically
300
+ ~5 tagged papers). `scholar rq` can start without examples by using the research
301
+ question as context.
302
+
303
+ ## Documentation
304
+
305
+ Full documentation is available in the `doc/` directory as a literate program combining documentation and implementation.
306
+
307
+ ## License
308
+
309
+ MIT License - see [LICENSE](LICENSE) for details.