scholarcli 1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scholarcli-1.6/LICENSE +21 -0
- scholarcli-1.6/PKG-INFO +346 -0
- scholarcli-1.6/README.md +309 -0
- scholarcli-1.6/pyproject.toml +67 -0
- scholarcli-1.6/setup.cfg +4 -0
- scholarcli-1.6/src/scholar/__init__.py +21 -0
- scholarcli-1.6/src/scholar/__main__.py +6 -0
- scholarcli-1.6/src/scholar/cache.py +141 -0
- scholarcli-1.6/src/scholar/cli.py +2231 -0
- scholarcli-1.6/src/scholar/enrich.py +361 -0
- scholarcli-1.6/src/scholar/llm_review.py +1933 -0
- scholarcli-1.6/src/scholar/notes.py +591 -0
- scholarcli-1.6/src/scholar/pdf.py +482 -0
- scholarcli-1.6/src/scholar/providers.py +2539 -0
- scholarcli-1.6/src/scholar/review.py +1459 -0
- scholarcli-1.6/src/scholar/scholar.py +712 -0
- scholarcli-1.6/src/scholar/tui.py +1859 -0
- scholarcli-1.6/src/scholar/utils.py +72 -0
- scholarcli-1.6/src/scholarcli.egg-info/PKG-INFO +346 -0
- scholarcli-1.6/src/scholarcli.egg-info/SOURCES.txt +74 -0
- scholarcli-1.6/src/scholarcli.egg-info/dependency_links.txt +1 -0
- scholarcli-1.6/src/scholarcli.egg-info/entry_points.txt +2 -0
- scholarcli-1.6/src/scholarcli.egg-info/requires.txt +26 -0
- scholarcli-1.6/src/scholarcli.egg-info/top_level.txt +3 -0
- scholarcli-1.6/src/snowball/__init__.py +195 -0
- scholarcli-1.6/src/snowball/apis/__init__.py +0 -0
- scholarcli-1.6/src/snowball/apis/aggregator.py +296 -0
- scholarcli-1.6/src/snowball/apis/arxiv.py +233 -0
- scholarcli-1.6/src/snowball/apis/base.py +84 -0
- scholarcli-1.6/src/snowball/apis/crossref.py +203 -0
- scholarcli-1.6/src/snowball/apis/google_scholar.py +259 -0
- scholarcli-1.6/src/snowball/apis/openalex.py +299 -0
- scholarcli-1.6/src/snowball/apis/opencitations.py +288 -0
- scholarcli-1.6/src/snowball/apis/semantic_scholar.py +320 -0
- scholarcli-1.6/src/snowball/cli.py +1127 -0
- scholarcli-1.6/src/snowball/exporters/__init__.py +0 -0
- scholarcli-1.6/src/snowball/exporters/bibtex.py +153 -0
- scholarcli-1.6/src/snowball/exporters/csv_exporter.py +139 -0
- scholarcli-1.6/src/snowball/exporters/tikz.py +224 -0
- scholarcli-1.6/src/snowball/filters/__init__.py +0 -0
- scholarcli-1.6/src/snowball/filters/filter_engine.py +166 -0
- scholarcli-1.6/src/snowball/models.py +152 -0
- scholarcli-1.6/src/snowball/paper_utils.py +612 -0
- scholarcli-1.6/src/snowball/parsers/__init__.py +0 -0
- scholarcli-1.6/src/snowball/parsers/pdf_parser.py +371 -0
- scholarcli-1.6/src/snowball/scoring/__init__.py +30 -0
- scholarcli-1.6/src/snowball/scoring/base.py +47 -0
- scholarcli-1.6/src/snowball/scoring/llm_scorer.py +170 -0
- scholarcli-1.6/src/snowball/scoring/tfidf_scorer.py +147 -0
- scholarcli-1.6/src/snowball/snowballing.py +623 -0
- scholarcli-1.6/src/snowball/storage/__init__.py +0 -0
- scholarcli-1.6/src/snowball/storage/json_storage.py +287 -0
- scholarcli-1.6/src/snowball/tui/__init__.py +0 -0
- scholarcli-1.6/src/snowball/tui/app.py +1925 -0
- scholarcli-1.6/src/snowball/visualization.py +290 -0
- scholarcli-1.6/src/tuxedo/__init__.py +136 -0
- scholarcli-1.6/src/tuxedo/analysis.py +265 -0
- scholarcli-1.6/src/tuxedo/cli.py +1402 -0
- scholarcli-1.6/src/tuxedo/clustering.py +703 -0
- scholarcli-1.6/src/tuxedo/database.py +650 -0
- scholarcli-1.6/src/tuxedo/grobid.py +519 -0
- scholarcli-1.6/src/tuxedo/logging.py +118 -0
- scholarcli-1.6/src/tuxedo/models.py +127 -0
- scholarcli-1.6/src/tuxedo/project.py +288 -0
- scholarcli-1.6/src/tuxedo/tui.py +2836 -0
- scholarcli-1.6/tests/test_cache.py +98 -0
- scholarcli-1.6/tests/test_cli.py +455 -0
- scholarcli-1.6/tests/test_enrich.py +378 -0
- scholarcli-1.6/tests/test_llm_review.py +1018 -0
- scholarcli-1.6/tests/test_notes.py +450 -0
- scholarcli-1.6/tests/test_pdf.py +279 -0
- scholarcli-1.6/tests/test_providers.py +2084 -0
- scholarcli-1.6/tests/test_review.py +1122 -0
- scholarcli-1.6/tests/test_scholar.py +696 -0
- scholarcli-1.6/tests/test_tui.py +11 -0
- scholarcli-1.6/tests/test_utils.py +75 -0
scholarcli-1.6/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025--2026 Daniel Bosk
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
scholarcli-1.6/PKG-INFO
ADDED
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: scholarcli
|
|
3
|
+
Version: 1.6
|
|
4
|
+
Summary: A tool for structured literature searches across bibliographic databases
|
|
5
|
+
Author-email: Daniel Bosk <dbosk@kth.se>, Ric Glassey <glassey@kth.se>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Requires-Python: >=3.12
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: requests>=2.32.5
|
|
11
|
+
Requires-Dist: typer>=0.21.0
|
|
12
|
+
Requires-Dist: rich>=14.2.0
|
|
13
|
+
Requires-Dist: pyalex>=0.19
|
|
14
|
+
Requires-Dist: arxiv>=2.1.0
|
|
15
|
+
Requires-Dist: cachetools>=6.2.4
|
|
16
|
+
Requires-Dist: platformdirs>=4.5.1
|
|
17
|
+
Requires-Dist: textual>=6.11.0
|
|
18
|
+
Requires-Dist: pypandoc>=1.14
|
|
19
|
+
Requires-Dist: click>=8.0.0
|
|
20
|
+
Requires-Dist: llm>=0.19
|
|
21
|
+
Requires-Dist: llm-openai-plugin>=0.7
|
|
22
|
+
Requires-Dist: llm-gpt4all>=0.4
|
|
23
|
+
Requires-Dist: llm-azure>=2.1
|
|
24
|
+
Requires-Dist: llm-anthropic>=0.23
|
|
25
|
+
Requires-Dist: llm-gemini>=0.28.2
|
|
26
|
+
Requires-Dist: pypdfium2>=4.26.0
|
|
27
|
+
Requires-Dist: bibtexparser>=1.4.0
|
|
28
|
+
Requires-Dist: pandas>=2.0.0
|
|
29
|
+
Requires-Dist: pydantic>=2.0.0
|
|
30
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
31
|
+
Requires-Dist: httpx>=0.25.0
|
|
32
|
+
Requires-Dist: grobid-client-python>=0.1.0
|
|
33
|
+
Requires-Dist: scholarly>=1.7.0
|
|
34
|
+
Requires-Dist: openai>=1.0.0
|
|
35
|
+
Requires-Dist: pyyaml>=6.0
|
|
36
|
+
Dynamic: license-file
|
|
37
|
+
|
|
38
|
+
# Scholar
|
|
39
|
+
|
|
40
|
+
A command-line tool for conducting structured literature searches across multiple academic databases, with built-in support for systematic literature reviews.
|
|
41
|
+
|
|
42
|
+
## Features
|
|
43
|
+
|
|
44
|
+
### Multi-Database Search
|
|
45
|
+
|
|
46
|
+
Search across six academic databases with a single query:
|
|
47
|
+
|
|
48
|
+
- **Semantic Scholar** - AI-powered research database with 200M+ papers
|
|
49
|
+
- **OpenAlex** - Open catalog of 250M+ scholarly works
|
|
50
|
+
- **DBLP** - Computer science bibliography
|
|
51
|
+
- **Web of Science** - Comprehensive citation index (requires API key)
|
|
52
|
+
- **IEEE Xplore** - IEEE technical literature (requires API key)
|
|
53
|
+
- **arXiv** - Preprints (no API key)
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
# Search specific providers
|
|
57
|
+
scholar search "federated learning" -p semantic_scholar -p openalex
|
|
58
|
+
|
|
59
|
+
# Start from a research question (LLM generates provider-specific queries)
|
|
60
|
+
scholar rq "How can privacy-preserving ML be evaluated?" \
|
|
61
|
+
--provider openalex --provider dblp \
|
|
62
|
+
--count 20
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### Interactive Review Interface
|
|
66
|
+
|
|
67
|
+
Review search results in a terminal-based interface with vim-style navigation:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
scholar search "neural networks" --review
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
The TUI supports:
|
|
74
|
+
- **Keep/Discard decisions** with mandatory motivations for discards
|
|
75
|
+
- **Theme tagging** for organizing kept papers
|
|
76
|
+
- **Note-taking** with your preferred editor
|
|
77
|
+
- **PDF viewing** with automatic download and caching
|
|
78
|
+
- **Abstract enrichment** for papers missing abstracts
|
|
79
|
+
- **LLM-assisted classification** to help review large result sets
|
|
80
|
+
- **Sorting and filtering** by various criteria
|
|
81
|
+
|
|
82
|
+
### Output Formats
|
|
83
|
+
|
|
84
|
+
Export results in multiple formats:
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
# Pretty table (default for terminal)
|
|
88
|
+
scholar search "query"
|
|
89
|
+
|
|
90
|
+
# Machine-readable formats
|
|
91
|
+
scholar search "query" -f json
|
|
92
|
+
scholar search "query" -f csv
|
|
93
|
+
scholar search "query" -f bibtex
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Session Management
|
|
97
|
+
|
|
98
|
+
Save and resume review sessions:
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
# List saved sessions
|
|
102
|
+
scholar sessions list
|
|
103
|
+
|
|
104
|
+
# Resume a session
|
|
105
|
+
scholar sessions resume "machine learning"
|
|
106
|
+
|
|
107
|
+
# Export session to reports
|
|
108
|
+
scholar sessions export "machine learning" -f all
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Paper Notes
|
|
112
|
+
|
|
113
|
+
Manage notes across all reviewed papers:
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
# Browse papers with notes
|
|
117
|
+
scholar notes
|
|
118
|
+
|
|
119
|
+
# List papers with notes
|
|
120
|
+
scholar notes list
|
|
121
|
+
|
|
122
|
+
# Export/import notes
|
|
123
|
+
scholar notes export notes.json
|
|
124
|
+
scholar notes import notes.json
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### Caching
|
|
128
|
+
|
|
129
|
+
Search results are cached to avoid redundant API calls:
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
scholar cache info # Show cache statistics
|
|
133
|
+
scholar cache clear # Delete cached results
|
|
134
|
+
scholar cache path # Print cache directory
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
PDF downloads are also cached for offline viewing.
|
|
138
|
+
|
|
139
|
+
## Quickstart
|
|
140
|
+
|
|
141
|
+
### Install
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
pipx install scholarcli
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### Configure LLM access (optional, for `scholar rq` and LLM-assisted review)
|
|
148
|
+
|
|
149
|
+
Scholar uses the [`llm`](https://llm.datasette.io/) package for model selection
|
|
150
|
+
and API key configuration.
|
|
151
|
+
|
|
152
|
+
If you want to configure it via the `llm` CLI, install it as well (or install
|
|
153
|
+
`scholarcli` with `pipx --include-deps` so the dependency CLIs are exposed):
|
|
154
|
+
|
|
155
|
+
```bash
|
|
156
|
+
pipx install llm
|
|
157
|
+
# Or: pipx install --include-deps scholarcli
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
Then configure at least one provider (examples):
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
llm install llm-openai-plugin
|
|
164
|
+
llm keys set openai
|
|
165
|
+
|
|
166
|
+
# Or:
|
|
167
|
+
llm install llm-anthropic
|
|
168
|
+
llm keys set anthropic
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
Set a default model for Scholar to use:
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
llm models
|
|
175
|
+
llm models default gpt-4o-mini
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### First run
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
# Search directly
|
|
182
|
+
scholar search "machine learning privacy"
|
|
183
|
+
|
|
184
|
+
# Start from a research question (LLM generates provider-specific queries)
|
|
185
|
+
scholar rq "How do LLMs support novice programming?" --count 20
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
## Installation
|
|
189
|
+
|
|
190
|
+
If you don't use `pipx`, you can install with `pip`:
|
|
191
|
+
|
|
192
|
+
```bash
|
|
193
|
+
pip install scholarcli
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
Or with [uv](https://github.com/astral-sh/uv):
|
|
197
|
+
|
|
198
|
+
```bash
|
|
199
|
+
uv pip install scholarcli
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
## Configuration
|
|
203
|
+
|
|
204
|
+
Some providers require API keys set as environment variables:
|
|
205
|
+
|
|
206
|
+
| Provider | Environment Variable | Required | How to Get |
|
|
207
|
+
|----------|---------------------|----------|------------|
|
|
208
|
+
| Semantic Scholar | `S2_API_KEY` | No | [api.semanticscholar.org](https://api.semanticscholar.org) |
|
|
209
|
+
| OpenAlex | `OPENALEX_EMAIL` | No | Any email (for polite pool) |
|
|
210
|
+
| DBLP | - | No | No key needed |
|
|
211
|
+
| Web of Science | `WOS_API_KEY` | Yes | [developer.clarivate.com](https://developer.clarivate.com) |
|
|
212
|
+
| IEEE Xplore | `IEEE_API_KEY` | Yes | [developer.ieee.org](https://developer.ieee.org) |
|
|
213
|
+
|
|
214
|
+
View provider status:
|
|
215
|
+
|
|
216
|
+
```bash
|
|
217
|
+
scholar providers
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
## Usage Examples
|
|
221
|
+
|
|
222
|
+
### Basic Search
|
|
223
|
+
|
|
224
|
+
```bash
|
|
225
|
+
# Search with default providers (Semantic Scholar, OpenAlex, DBLP)
|
|
226
|
+
scholar search "differential privacy"
|
|
227
|
+
|
|
228
|
+
# Limit results per provider (default: 1000)
|
|
229
|
+
scholar search "blockchain" -l 50
|
|
230
|
+
|
|
231
|
+
# Unlimited results per provider
|
|
232
|
+
scholar search "blockchain" -l 0
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
### Systematic Review Workflow
|
|
236
|
+
|
|
237
|
+
```bash
|
|
238
|
+
# 1. Search and review interactively
|
|
239
|
+
scholar search "privacy-preserving machine learning" --review --name "privacy-ml-review"
|
|
240
|
+
|
|
241
|
+
# 2. Add more searches to the same session
|
|
242
|
+
scholar search "federated learning privacy" --review --name "privacy-ml-review"
|
|
243
|
+
|
|
244
|
+
# 3. Resume reviewing later
|
|
245
|
+
scholar sessions resume "privacy-ml-review"
|
|
246
|
+
|
|
247
|
+
# 4. Generate reports
|
|
248
|
+
scholar sessions export "privacy-ml-review" -f all
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
### Enriching Results
|
|
252
|
+
|
|
253
|
+
Some providers (like DBLP) don't include abstracts. Fetch them from other sources:
|
|
254
|
+
|
|
255
|
+
```bash
|
|
256
|
+
# Enrich during search
|
|
257
|
+
scholar search "query" --enrich
|
|
258
|
+
|
|
259
|
+
# Enrich an existing session
|
|
260
|
+
scholar enrich "session-name"
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
### PDF Management
|
|
264
|
+
|
|
265
|
+
```bash
|
|
266
|
+
# Download and open a PDF
|
|
267
|
+
scholar pdf open "https://arxiv.org/pdf/2301.00001.pdf"
|
|
268
|
+
|
|
269
|
+
# View PDF cache
|
|
270
|
+
scholar pdf info
|
|
271
|
+
scholar pdf clear
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
## Keybindings (Review TUI)
|
|
275
|
+
|
|
276
|
+
| Key | Action |
|
|
277
|
+
|-----|--------|
|
|
278
|
+
| `j`/`k` | Navigate up/down |
|
|
279
|
+
| `Enter` | View paper details |
|
|
280
|
+
| `K` | Keep paper (quick) |
|
|
281
|
+
| `T` | Keep with themes |
|
|
282
|
+
| `d` | Discard (requires motivation) |
|
|
283
|
+
| `n` | Edit notes |
|
|
284
|
+
| `p` | Open PDF |
|
|
285
|
+
| `e` | Enrich (fetch abstract) |
|
|
286
|
+
| `L` | LLM-assisted classification |
|
|
287
|
+
| `s` | Sort papers |
|
|
288
|
+
| `f` | Filter by status |
|
|
289
|
+
| `q` | Quit |
|
|
290
|
+
|
|
291
|
+
## LLM-Assisted Review
|
|
292
|
+
|
|
293
|
+
For large result sets, Scholar can use LLMs to assist with paper classification:
|
|
294
|
+
|
|
295
|
+
```bash
|
|
296
|
+
# In the TUI, press 'L' to invoke LLM classification
|
|
297
|
+
# Or use the CLI command directly
|
|
298
|
+
scholar llm-review "session-name" --count 10
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
### How It Works
|
|
302
|
+
|
|
303
|
+
1. **Tag some papers manually** - The LLM needs examples to learn from. Review at least 5 papers with tags (themes for kept, motivations for discarded).
|
|
304
|
+
|
|
305
|
+
2. **Set research context** (optional) - Describe your review's focus to help the LLM understand relevance criteria.
|
|
306
|
+
|
|
307
|
+
3. **Invoke LLM classification** - The LLM classifies pending papers based on your examples, returning confidence scores.
|
|
308
|
+
|
|
309
|
+
4. **Review LLM decisions** - Prioritize low-confidence classifications. Accept correct ones, correct wrong ones.
|
|
310
|
+
|
|
311
|
+
5. **Iterate** - Corrections become training examples for the next round.
|
|
312
|
+
|
|
313
|
+
### Requirements
|
|
314
|
+
|
|
315
|
+
Install and configure the `llm` command (Scholar uses `llm`'s configuration and
|
|
316
|
+
default model):
|
|
317
|
+
|
|
318
|
+
```bash
|
|
319
|
+
pipx install llm
|
|
320
|
+
|
|
321
|
+
llm install llm-openai-plugin
|
|
322
|
+
llm keys set openai
|
|
323
|
+
|
|
324
|
+
# Pick a default model (used by `scholar rq` and `scholar llm-review`)
|
|
325
|
+
llm models
|
|
326
|
+
llm models default gpt-4o-mini
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
If you installed Scholar with `pipx install scholarcli` and want the `llm` CLI
|
|
330
|
+
available from that same environment, you can alternatively install Scholar
|
|
331
|
+
with `pipx install --include-deps scholarcli`.
|
|
332
|
+
|
|
333
|
+
The LLM integration supports models available through Simon Willison's `llm`
|
|
334
|
+
package (OpenAI, Anthropic, local models, etc.).
|
|
335
|
+
|
|
336
|
+
Note: `scholar llm-review` learns from your existing labeled examples (typically
|
|
337
|
+
~5 tagged papers). `scholar rq` can start without examples by using the research
|
|
338
|
+
question as context.
|
|
339
|
+
|
|
340
|
+
## Documentation
|
|
341
|
+
|
|
342
|
+
Full documentation is available in the `doc/` directory as a literate program combining documentation and implementation.
|
|
343
|
+
|
|
344
|
+
## License
|
|
345
|
+
|
|
346
|
+
MIT License - see [LICENSE](LICENSE) for details.
|
scholarcli-1.6/README.md
ADDED
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
# Scholar
|
|
2
|
+
|
|
3
|
+
A command-line tool for conducting structured literature searches across multiple academic databases, with built-in support for systematic literature reviews.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
### Multi-Database Search
|
|
8
|
+
|
|
9
|
+
Search across six academic databases with a single query:
|
|
10
|
+
|
|
11
|
+
- **Semantic Scholar** - AI-powered research database with 200M+ papers
|
|
12
|
+
- **OpenAlex** - Open catalog of 250M+ scholarly works
|
|
13
|
+
- **DBLP** - Computer science bibliography
|
|
14
|
+
- **Web of Science** - Comprehensive citation index (requires API key)
|
|
15
|
+
- **IEEE Xplore** - IEEE technical literature (requires API key)
|
|
16
|
+
- **arXiv** - Preprints (no API key)
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
# Search specific providers
|
|
20
|
+
scholar search "federated learning" -p semantic_scholar -p openalex
|
|
21
|
+
|
|
22
|
+
# Start from a research question (LLM generates provider-specific queries)
|
|
23
|
+
scholar rq "How can privacy-preserving ML be evaluated?" \
|
|
24
|
+
--provider openalex --provider dblp \
|
|
25
|
+
--count 20
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### Interactive Review Interface
|
|
29
|
+
|
|
30
|
+
Review search results in a terminal-based interface with vim-style navigation:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
scholar search "neural networks" --review
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
The TUI supports:
|
|
37
|
+
- **Keep/Discard decisions** with mandatory motivations for discards
|
|
38
|
+
- **Theme tagging** for organizing kept papers
|
|
39
|
+
- **Note-taking** with your preferred editor
|
|
40
|
+
- **PDF viewing** with automatic download and caching
|
|
41
|
+
- **Abstract enrichment** for papers missing abstracts
|
|
42
|
+
- **LLM-assisted classification** to help review large result sets
|
|
43
|
+
- **Sorting and filtering** by various criteria
|
|
44
|
+
|
|
45
|
+
### Output Formats
|
|
46
|
+
|
|
47
|
+
Export results in multiple formats:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
# Pretty table (default for terminal)
|
|
51
|
+
scholar search "query"
|
|
52
|
+
|
|
53
|
+
# Machine-readable formats
|
|
54
|
+
scholar search "query" -f json
|
|
55
|
+
scholar search "query" -f csv
|
|
56
|
+
scholar search "query" -f bibtex
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### Session Management
|
|
60
|
+
|
|
61
|
+
Save and resume review sessions:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
# List saved sessions
|
|
65
|
+
scholar sessions list
|
|
66
|
+
|
|
67
|
+
# Resume a session
|
|
68
|
+
scholar sessions resume "machine learning"
|
|
69
|
+
|
|
70
|
+
# Export session to reports
|
|
71
|
+
scholar sessions export "machine learning" -f all
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Paper Notes
|
|
75
|
+
|
|
76
|
+
Manage notes across all reviewed papers:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
# Browse papers with notes
|
|
80
|
+
scholar notes
|
|
81
|
+
|
|
82
|
+
# List papers with notes
|
|
83
|
+
scholar notes list
|
|
84
|
+
|
|
85
|
+
# Export/import notes
|
|
86
|
+
scholar notes export notes.json
|
|
87
|
+
scholar notes import notes.json
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Caching
|
|
91
|
+
|
|
92
|
+
Search results are cached to avoid redundant API calls:
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
scholar cache info # Show cache statistics
|
|
96
|
+
scholar cache clear # Delete cached results
|
|
97
|
+
scholar cache path # Print cache directory
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
PDF downloads are also cached for offline viewing.
|
|
101
|
+
|
|
102
|
+
## Quickstart
|
|
103
|
+
|
|
104
|
+
### Install
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
pipx install scholarcli
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Configure LLM access (optional, for `scholar rq` and LLM-assisted review)
|
|
111
|
+
|
|
112
|
+
Scholar uses the [`llm`](https://llm.datasette.io/) package for model selection
|
|
113
|
+
and API key configuration.
|
|
114
|
+
|
|
115
|
+
If you want to configure it via the `llm` CLI, install it as well (or install
|
|
116
|
+
`scholarcli` with `pipx --include-deps` so the dependency CLIs are exposed):
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
pipx install llm
|
|
120
|
+
# Or: pipx install --include-deps scholarcli
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
Then configure at least one provider (examples):
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
llm install llm-openai-plugin
|
|
127
|
+
llm keys set openai
|
|
128
|
+
|
|
129
|
+
# Or:
|
|
130
|
+
llm install llm-anthropic
|
|
131
|
+
llm keys set anthropic
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Set a default model for Scholar to use:
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
llm models
|
|
138
|
+
llm models default gpt-4o-mini
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### First run
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
# Search directly
|
|
145
|
+
scholar search "machine learning privacy"
|
|
146
|
+
|
|
147
|
+
# Start from a research question (LLM generates provider-specific queries)
|
|
148
|
+
scholar rq "How do LLMs support novice programming?" --count 20
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
## Installation
|
|
152
|
+
|
|
153
|
+
If you don't use `pipx`, you can install with `pip`:
|
|
154
|
+
|
|
155
|
+
```bash
|
|
156
|
+
pip install scholarcli
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
Or with [uv](https://github.com/astral-sh/uv):
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
uv pip install scholarcli
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
## Configuration
|
|
166
|
+
|
|
167
|
+
Some providers require API keys set as environment variables:
|
|
168
|
+
|
|
169
|
+
| Provider | Environment Variable | Required | How to Get |
|
|
170
|
+
|----------|---------------------|----------|------------|
|
|
171
|
+
| Semantic Scholar | `S2_API_KEY` | No | [api.semanticscholar.org](https://api.semanticscholar.org) |
|
|
172
|
+
| OpenAlex | `OPENALEX_EMAIL` | No | Any email (for polite pool) |
|
|
173
|
+
| DBLP | - | No | No key needed |
|
|
174
|
+
| Web of Science | `WOS_API_KEY` | Yes | [developer.clarivate.com](https://developer.clarivate.com) |
|
|
175
|
+
| IEEE Xplore | `IEEE_API_KEY` | Yes | [developer.ieee.org](https://developer.ieee.org) |
|
|
176
|
+
|
|
177
|
+
View provider status:
|
|
178
|
+
|
|
179
|
+
```bash
|
|
180
|
+
scholar providers
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
## Usage Examples
|
|
184
|
+
|
|
185
|
+
### Basic Search
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
# Search with default providers (Semantic Scholar, OpenAlex, DBLP)
|
|
189
|
+
scholar search "differential privacy"
|
|
190
|
+
|
|
191
|
+
# Limit results per provider (default: 1000)
|
|
192
|
+
scholar search "blockchain" -l 50
|
|
193
|
+
|
|
194
|
+
# Unlimited results per provider
|
|
195
|
+
scholar search "blockchain" -l 0
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
### Systematic Review Workflow
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
# 1. Search and review interactively
|
|
202
|
+
scholar search "privacy-preserving machine learning" --review --name "privacy-ml-review"
|
|
203
|
+
|
|
204
|
+
# 2. Add more searches to the same session
|
|
205
|
+
scholar search "federated learning privacy" --review --name "privacy-ml-review"
|
|
206
|
+
|
|
207
|
+
# 3. Resume reviewing later
|
|
208
|
+
scholar sessions resume "privacy-ml-review"
|
|
209
|
+
|
|
210
|
+
# 4. Generate reports
|
|
211
|
+
scholar sessions export "privacy-ml-review" -f all
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
### Enriching Results
|
|
215
|
+
|
|
216
|
+
Some providers (like DBLP) don't include abstracts. Fetch them from other sources:
|
|
217
|
+
|
|
218
|
+
```bash
|
|
219
|
+
# Enrich during search
|
|
220
|
+
scholar search "query" --enrich
|
|
221
|
+
|
|
222
|
+
# Enrich an existing session
|
|
223
|
+
scholar enrich "session-name"
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
### PDF Management
|
|
227
|
+
|
|
228
|
+
```bash
|
|
229
|
+
# Download and open a PDF
|
|
230
|
+
scholar pdf open "https://arxiv.org/pdf/2301.00001.pdf"
|
|
231
|
+
|
|
232
|
+
# View PDF cache
|
|
233
|
+
scholar pdf info
|
|
234
|
+
scholar pdf clear
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
## Keybindings (Review TUI)
|
|
238
|
+
|
|
239
|
+
| Key | Action |
|
|
240
|
+
|-----|--------|
|
|
241
|
+
| `j`/`k` | Navigate up/down |
|
|
242
|
+
| `Enter` | View paper details |
|
|
243
|
+
| `K` | Keep paper (quick) |
|
|
244
|
+
| `T` | Keep with themes |
|
|
245
|
+
| `d` | Discard (requires motivation) |
|
|
246
|
+
| `n` | Edit notes |
|
|
247
|
+
| `p` | Open PDF |
|
|
248
|
+
| `e` | Enrich (fetch abstract) |
|
|
249
|
+
| `L` | LLM-assisted classification |
|
|
250
|
+
| `s` | Sort papers |
|
|
251
|
+
| `f` | Filter by status |
|
|
252
|
+
| `q` | Quit |
|
|
253
|
+
|
|
254
|
+
## LLM-Assisted Review
|
|
255
|
+
|
|
256
|
+
For large result sets, Scholar can use LLMs to assist with paper classification:
|
|
257
|
+
|
|
258
|
+
```bash
|
|
259
|
+
# In the TUI, press 'L' to invoke LLM classification
|
|
260
|
+
# Or use the CLI command directly
|
|
261
|
+
scholar llm-review "session-name" --count 10
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
### How It Works
|
|
265
|
+
|
|
266
|
+
1. **Tag some papers manually** - The LLM needs examples to learn from. Review at least 5 papers with tags (themes for kept, motivations for discarded).
|
|
267
|
+
|
|
268
|
+
2. **Set research context** (optional) - Describe your review's focus to help the LLM understand relevance criteria.
|
|
269
|
+
|
|
270
|
+
3. **Invoke LLM classification** - The LLM classifies pending papers based on your examples, returning confidence scores.
|
|
271
|
+
|
|
272
|
+
4. **Review LLM decisions** - Prioritize low-confidence classifications. Accept correct ones, correct wrong ones.
|
|
273
|
+
|
|
274
|
+
5. **Iterate** - Corrections become training examples for the next round.
|
|
275
|
+
|
|
276
|
+
### Requirements
|
|
277
|
+
|
|
278
|
+
Install and configure the `llm` command (Scholar uses `llm`'s configuration and
|
|
279
|
+
default model):
|
|
280
|
+
|
|
281
|
+
```bash
|
|
282
|
+
pipx install llm
|
|
283
|
+
|
|
284
|
+
llm install llm-openai-plugin
|
|
285
|
+
llm keys set openai
|
|
286
|
+
|
|
287
|
+
# Pick a default model (used by `scholar rq` and `scholar llm-review`)
|
|
288
|
+
llm models
|
|
289
|
+
llm models default gpt-4o-mini
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
If you installed Scholar with `pipx install scholarcli` and want the `llm` CLI
|
|
293
|
+
available from that same environment, you can alternatively install Scholar
|
|
294
|
+
with `pipx install --include-deps scholarcli`.
|
|
295
|
+
|
|
296
|
+
The LLM integration supports models available through Simon Willison's `llm`
|
|
297
|
+
package (OpenAI, Anthropic, local models, etc.).
|
|
298
|
+
|
|
299
|
+
Note: `scholar llm-review` learns from your existing labeled examples (typically
|
|
300
|
+
~5 tagged papers). `scholar rq` can start without examples by using the research
|
|
301
|
+
question as context.
|
|
302
|
+
|
|
303
|
+
## Documentation
|
|
304
|
+
|
|
305
|
+
Full documentation is available in the `doc/` directory as a literate program combining documentation and implementation.
|
|
306
|
+
|
|
307
|
+
## License
|
|
308
|
+
|
|
309
|
+
MIT License - see [LICENSE](LICENSE) for details.
|