scholarcli 1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. scholarcli-1.0/LICENSE +21 -0
  2. scholarcli-1.0/PKG-INFO +280 -0
  3. scholarcli-1.0/README.md +236 -0
  4. scholarcli-1.0/pyproject.toml +80 -0
  5. scholarcli-1.0/setup.cfg +4 -0
  6. scholarcli-1.0/src/scholar/__init__.py +21 -0
  7. scholarcli-1.0/src/scholar/__main__.py +6 -0
  8. scholarcli-1.0/src/scholar/cache.py +141 -0
  9. scholarcli-1.0/src/scholar/cli.py +1783 -0
  10. scholarcli-1.0/src/scholar/enrich.py +361 -0
  11. scholarcli-1.0/src/scholar/llm_review.py +804 -0
  12. scholarcli-1.0/src/scholar/notes.py +591 -0
  13. scholarcli-1.0/src/scholar/pdf.py +482 -0
  14. scholarcli-1.0/src/scholar/providers.py +2537 -0
  15. scholarcli-1.0/src/scholar/review.py +1438 -0
  16. scholarcli-1.0/src/scholar/scholar.py +712 -0
  17. scholarcli-1.0/src/scholar/tui.py +1859 -0
  18. scholarcli-1.0/src/scholar/utils.py +72 -0
  19. scholarcli-1.0/src/scholarcli.egg-info/PKG-INFO +280 -0
  20. scholarcli-1.0/src/scholarcli.egg-info/SOURCES.txt +74 -0
  21. scholarcli-1.0/src/scholarcli.egg-info/dependency_links.txt +1 -0
  22. scholarcli-1.0/src/scholarcli.egg-info/entry_points.txt +2 -0
  23. scholarcli-1.0/src/scholarcli.egg-info/requires.txt +36 -0
  24. scholarcli-1.0/src/scholarcli.egg-info/top_level.txt +3 -0
  25. scholarcli-1.0/src/snowball/__init__.py +195 -0
  26. scholarcli-1.0/src/snowball/apis/__init__.py +0 -0
  27. scholarcli-1.0/src/snowball/apis/aggregator.py +296 -0
  28. scholarcli-1.0/src/snowball/apis/arxiv.py +233 -0
  29. scholarcli-1.0/src/snowball/apis/base.py +84 -0
  30. scholarcli-1.0/src/snowball/apis/crossref.py +203 -0
  31. scholarcli-1.0/src/snowball/apis/google_scholar.py +259 -0
  32. scholarcli-1.0/src/snowball/apis/openalex.py +299 -0
  33. scholarcli-1.0/src/snowball/apis/opencitations.py +288 -0
  34. scholarcli-1.0/src/snowball/apis/semantic_scholar.py +320 -0
  35. scholarcli-1.0/src/snowball/cli.py +1127 -0
  36. scholarcli-1.0/src/snowball/exporters/__init__.py +0 -0
  37. scholarcli-1.0/src/snowball/exporters/bibtex.py +153 -0
  38. scholarcli-1.0/src/snowball/exporters/csv_exporter.py +139 -0
  39. scholarcli-1.0/src/snowball/exporters/tikz.py +224 -0
  40. scholarcli-1.0/src/snowball/filters/__init__.py +0 -0
  41. scholarcli-1.0/src/snowball/filters/filter_engine.py +166 -0
  42. scholarcli-1.0/src/snowball/models.py +152 -0
  43. scholarcli-1.0/src/snowball/paper_utils.py +612 -0
  44. scholarcli-1.0/src/snowball/parsers/__init__.py +0 -0
  45. scholarcli-1.0/src/snowball/parsers/pdf_parser.py +371 -0
  46. scholarcli-1.0/src/snowball/scoring/__init__.py +30 -0
  47. scholarcli-1.0/src/snowball/scoring/base.py +47 -0
  48. scholarcli-1.0/src/snowball/scoring/llm_scorer.py +170 -0
  49. scholarcli-1.0/src/snowball/scoring/tfidf_scorer.py +147 -0
  50. scholarcli-1.0/src/snowball/snowballing.py +623 -0
  51. scholarcli-1.0/src/snowball/storage/__init__.py +0 -0
  52. scholarcli-1.0/src/snowball/storage/json_storage.py +287 -0
  53. scholarcli-1.0/src/snowball/tui/__init__.py +0 -0
  54. scholarcli-1.0/src/snowball/tui/app.py +1925 -0
  55. scholarcli-1.0/src/snowball/visualization.py +290 -0
  56. scholarcli-1.0/src/tuxedo/__init__.py +136 -0
  57. scholarcli-1.0/src/tuxedo/analysis.py +265 -0
  58. scholarcli-1.0/src/tuxedo/cli.py +1402 -0
  59. scholarcli-1.0/src/tuxedo/clustering.py +703 -0
  60. scholarcli-1.0/src/tuxedo/database.py +650 -0
  61. scholarcli-1.0/src/tuxedo/grobid.py +519 -0
  62. scholarcli-1.0/src/tuxedo/logging.py +118 -0
  63. scholarcli-1.0/src/tuxedo/models.py +127 -0
  64. scholarcli-1.0/src/tuxedo/project.py +288 -0
  65. scholarcli-1.0/src/tuxedo/tui.py +2836 -0
  66. scholarcli-1.0/tests/test_cache.py +98 -0
  67. scholarcli-1.0/tests/test_cli.py +332 -0
  68. scholarcli-1.0/tests/test_enrich.py +378 -0
  69. scholarcli-1.0/tests/test_llm_review.py +589 -0
  70. scholarcli-1.0/tests/test_notes.py +450 -0
  71. scholarcli-1.0/tests/test_pdf.py +279 -0
  72. scholarcli-1.0/tests/test_providers.py +2077 -0
  73. scholarcli-1.0/tests/test_review.py +1056 -0
  74. scholarcli-1.0/tests/test_scholar.py +696 -0
  75. scholarcli-1.0/tests/test_tui.py +11 -0
  76. scholarcli-1.0/tests/test_utils.py +75 -0
scholarcli-1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025--2026 Daniel Bosk
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,280 @@
1
+ Metadata-Version: 2.4
2
+ Name: scholarcli
3
+ Version: 1.0
4
+ Summary: A tool for structured literature searches across bibliographic databases
5
+ Author-email: Daniel Bosk <dbosk@kth.se>, Ric Glassey <glassey@kth.se>
6
+ License-Expression: MIT
7
+ Requires-Python: >=3.12
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: requests>=2.32.5
11
+ Requires-Dist: typer>=0.21.0
12
+ Requires-Dist: rich>=14.2.0
13
+ Requires-Dist: pyalex>=0.19
14
+ Requires-Dist: arxiv>=2.1.0
15
+ Requires-Dist: cachetools>=6.2.4
16
+ Requires-Dist: platformdirs>=4.5.1
17
+ Requires-Dist: textual>=6.11.0
18
+ Requires-Dist: pypandoc>=1.14
19
+ Requires-Dist: click>=8.0.0
20
+ Requires-Dist: llm>=0.19
21
+ Requires-Dist: llm-openai-plugin>=0.7
22
+ Requires-Dist: llm-gpt4all>=0.4
23
+ Requires-Dist: llm-azure>=2.1
24
+ Requires-Dist: llm-anthropic>=0.23
25
+ Requires-Dist: llm-gemini>=0.28.2
26
+ Provides-Extra: snowball
27
+ Requires-Dist: pypdfium2>=4.26.0; extra == "snowball"
28
+ Requires-Dist: bibtexparser>=1.4.0; extra == "snowball"
29
+ Requires-Dist: pandas>=2.0.0; extra == "snowball"
30
+ Requires-Dist: pydantic>=2.0.0; extra == "snowball"
31
+ Requires-Dist: python-dotenv>=1.0.0; extra == "snowball"
32
+ Requires-Dist: httpx>=0.25.0; extra == "snowball"
33
+ Requires-Dist: grobid-client-python>=0.1.0; extra == "snowball"
34
+ Requires-Dist: scholarly>=1.7.0; extra == "snowball"
35
+ Provides-Extra: tuxedo
36
+ Requires-Dist: httpx>=0.27.0; extra == "tuxedo"
37
+ Requires-Dist: openai>=1.0.0; extra == "tuxedo"
38
+ Requires-Dist: pydantic>=2.0.0; extra == "tuxedo"
39
+ Requires-Dist: pyyaml>=6.0; extra == "tuxedo"
40
+ Provides-Extra: all
41
+ Requires-Dist: scholar[snowball]; extra == "all"
42
+ Requires-Dist: scholar[tuxedo]; extra == "all"
43
+ Dynamic: license-file
44
+
45
+ # Scholar
46
+
47
+ A command-line tool for conducting structured literature searches across multiple academic databases, with built-in support for systematic literature reviews.
48
+
49
+ ## Features
50
+
51
+ ### Multi-Database Search
52
+
53
+ Search across five academic databases with a single query:
54
+
55
+ - **Semantic Scholar** - AI-powered research database with 200M+ papers
56
+ - **OpenAlex** - Open catalog of 250M+ scholarly works
57
+ - **DBLP** - Computer science bibliography
58
+ - **Web of Science** - Comprehensive citation index (requires API key)
59
+ - **IEEE Xplore** - IEEE technical literature (requires API key)
60
+
61
+ ```bash
62
+ # Search all available providers
63
+ scholar search "machine learning privacy"
64
+
65
+ # Search specific providers
66
+ scholar search "federated learning" -p semantic_scholar -p openalex
67
+ ```
68
+
69
+ ### Interactive Review Interface
70
+
71
+ Review search results in a terminal-based interface with vim-style navigation:
72
+
73
+ ```bash
74
+ scholar search "neural networks" --review
75
+ ```
76
+
77
+ The TUI supports:
78
+ - **Keep/Discard decisions** with mandatory motivations for discards
79
+ - **Theme tagging** for organizing kept papers
80
+ - **Note-taking** with your preferred editor
81
+ - **PDF viewing** with automatic download and caching
82
+ - **Abstract enrichment** for papers missing abstracts
83
+ - **LLM-assisted classification** to help review large result sets
84
+ - **Sorting and filtering** by various criteria
85
+
86
+ ### Output Formats
87
+
88
+ Export results in multiple formats:
89
+
90
+ ```bash
91
+ # Pretty table (default for terminal)
92
+ scholar search "query"
93
+
94
+ # Machine-readable formats
95
+ scholar search "query" -f json
96
+ scholar search "query" -f csv
97
+ scholar search "query" -f bibtex
98
+ ```
99
+
100
+ ### Session Management
101
+
102
+ Save and resume review sessions:
103
+
104
+ ```bash
105
+ # List saved sessions
106
+ scholar sessions list
107
+
108
+ # Resume a session
109
+ scholar sessions resume "machine learning"
110
+
111
+ # Export session to reports
112
+ scholar sessions export "machine learning" -f all
113
+ ```
114
+
115
+ ### Paper Notes
116
+
117
+ Manage notes across all reviewed papers:
118
+
119
+ ```bash
120
+ # Browse papers with notes
121
+ scholar notes
122
+
123
+ # List papers with notes
124
+ scholar notes list
125
+
126
+ # Export/import notes
127
+ scholar notes export notes.json
128
+ scholar notes import notes.json
129
+ ```
130
+
131
+ ### Caching
132
+
133
+ Search results are cached to avoid redundant API calls:
134
+
135
+ ```bash
136
+ scholar cache info # Show cache statistics
137
+ scholar cache clear # Delete cached results
138
+ scholar cache path # Print cache directory
139
+ ```
140
+
141
+ PDF downloads are also cached for offline viewing.
142
+
143
+ ## Installation
144
+
145
+ ```bash
146
+ pip install scholar-cli
147
+ ```
148
+
149
+ Or with [uv](https://github.com/astral-sh/uv):
150
+
151
+ ```bash
152
+ uv pip install scholar-cli
153
+ ```
154
+
155
+ ## Configuration
156
+
157
+ Some providers require API keys set as environment variables:
158
+
159
+ | Provider | Environment Variable | Required | How to Get |
160
+ |----------|---------------------|----------|------------|
161
+ | Semantic Scholar | `S2_API_KEY` | No | [api.semanticscholar.org](https://api.semanticscholar.org) |
162
+ | OpenAlex | `OPENALEX_EMAIL` | No | Any email (for polite pool) |
163
+ | DBLP | - | No | No key needed |
164
+ | Web of Science | `WOS_API_KEY` | Yes | [developer.clarivate.com](https://developer.clarivate.com) |
165
+ | IEEE Xplore | `IEEE_API_KEY` | Yes | [developer.ieee.org](https://developer.ieee.org) |
166
+
167
+ View provider status:
168
+
169
+ ```bash
170
+ scholar providers
171
+ ```
172
+
173
+ ## Usage Examples
174
+
175
+ ### Basic Search
176
+
177
+ ```bash
178
+ # Search with default providers (Semantic Scholar, OpenAlex, DBLP)
179
+ scholar search "differential privacy"
180
+
181
+ # Limit results per provider
182
+ scholar search "blockchain" -l 50
183
+ ```
184
+
185
+ ### Systematic Review Workflow
186
+
187
+ ```bash
188
+ # 1. Search and review interactively
189
+ scholar search "privacy-preserving machine learning" --review --name "privacy-ml-review"
190
+
191
+ # 2. Add more searches to the same session
192
+ scholar search "federated learning privacy" --review --name "privacy-ml-review"
193
+
194
+ # 3. Resume reviewing later
195
+ scholar sessions resume "privacy-ml-review"
196
+
197
+ # 4. Generate reports
198
+ scholar sessions export "privacy-ml-review" -f all
199
+ ```
200
+
201
+ ### Enriching Results
202
+
203
+ Some providers (like DBLP) don't include abstracts. Fetch them from other sources:
204
+
205
+ ```bash
206
+ # Enrich during search
207
+ scholar search "query" --enrich
208
+
209
+ # Enrich an existing session
210
+ scholar enrich "session-name"
211
+ ```
212
+
213
+ ### PDF Management
214
+
215
+ ```bash
216
+ # Download and open a PDF
217
+ scholar pdf open "https://arxiv.org/pdf/2301.00001.pdf"
218
+
219
+ # View PDF cache
220
+ scholar pdf info
221
+ scholar pdf clear
222
+ ```
223
+
224
+ ## Keybindings (Review TUI)
225
+
226
+ | Key | Action |
227
+ |-----|--------|
228
+ | `j`/`k` | Navigate up/down |
229
+ | `Enter` | View paper details |
230
+ | `K` | Keep paper (quick) |
231
+ | `T` | Keep with themes |
232
+ | `d` | Discard (requires motivation) |
233
+ | `n` | Edit notes |
234
+ | `p` | Open PDF |
235
+ | `e` | Enrich (fetch abstract) |
236
+ | `L` | LLM-assisted classification |
237
+ | `s` | Sort papers |
238
+ | `f` | Filter by status |
239
+ | `q` | Quit |
240
+
241
+ ## LLM-Assisted Review
242
+
243
+ For large result sets, Scholar can use LLMs to assist with paper classification:
244
+
245
+ ```bash
246
+ # In the TUI, press 'L' to invoke LLM classification
247
+ # Or use the CLI command directly
248
+ scholar llm-review "session-name" --count 10
249
+ ```
250
+
251
+ ### How It Works
252
+
253
+ 1. **Tag some papers manually** - The LLM needs examples to learn from. Review at least 5 papers with tags (themes for kept, motivations for discarded).
254
+
255
+ 2. **Set research context** (optional) - Describe your review's focus to help the LLM understand relevance criteria.
256
+
257
+ 3. **Invoke LLM classification** - The LLM classifies pending papers based on your examples, returning confidence scores.
258
+
259
+ 4. **Review LLM decisions** - Prioritize low-confidence classifications. Accept correct ones, correct wrong ones.
260
+
261
+ 5. **Iterate** - Corrections become training examples for the next round.
262
+
263
+ ### Requirements
264
+
265
+ Install the `llm` package and configure a model:
266
+
267
+ ```bash
268
+ pip install llm
269
+ llm keys set openai # Or configure another provider
270
+ ```
271
+
272
+ The LLM module supports any model available through Simon Willison's `llm` package (OpenAI, Anthropic, local models, etc.).
273
+
274
+ ## Documentation
275
+
276
+ Full documentation is available in the `doc/` directory as a literate program combining documentation and implementation.
277
+
278
+ ## License
279
+
280
+ MIT License - see [LICENSE](LICENSE) for details.
@@ -0,0 +1,236 @@
1
+ # Scholar
2
+
3
+ A command-line tool for conducting structured literature searches across multiple academic databases, with built-in support for systematic literature reviews.
4
+
5
+ ## Features
6
+
7
+ ### Multi-Database Search
8
+
9
+ Search across five academic databases with a single query:
10
+
11
+ - **Semantic Scholar** - AI-powered research database with 200M+ papers
12
+ - **OpenAlex** - Open catalog of 250M+ scholarly works
13
+ - **DBLP** - Computer science bibliography
14
+ - **Web of Science** - Comprehensive citation index (requires API key)
15
+ - **IEEE Xplore** - IEEE technical literature (requires API key)
16
+
17
+ ```bash
18
+ # Search all available providers
19
+ scholar search "machine learning privacy"
20
+
21
+ # Search specific providers
22
+ scholar search "federated learning" -p semantic_scholar -p openalex
23
+ ```
24
+
25
+ ### Interactive Review Interface
26
+
27
+ Review search results in a terminal-based interface with vim-style navigation:
28
+
29
+ ```bash
30
+ scholar search "neural networks" --review
31
+ ```
32
+
33
+ The TUI supports:
34
+ - **Keep/Discard decisions** with mandatory motivations for discards
35
+ - **Theme tagging** for organizing kept papers
36
+ - **Note-taking** with your preferred editor
37
+ - **PDF viewing** with automatic download and caching
38
+ - **Abstract enrichment** for papers missing abstracts
39
+ - **LLM-assisted classification** to help review large result sets
40
+ - **Sorting and filtering** by various criteria
41
+
42
+ ### Output Formats
43
+
44
+ Export results in multiple formats:
45
+
46
+ ```bash
47
+ # Pretty table (default for terminal)
48
+ scholar search "query"
49
+
50
+ # Machine-readable formats
51
+ scholar search "query" -f json
52
+ scholar search "query" -f csv
53
+ scholar search "query" -f bibtex
54
+ ```
55
+
56
+ ### Session Management
57
+
58
+ Save and resume review sessions:
59
+
60
+ ```bash
61
+ # List saved sessions
62
+ scholar sessions list
63
+
64
+ # Resume a session
65
+ scholar sessions resume "machine learning"
66
+
67
+ # Export session to reports
68
+ scholar sessions export "machine learning" -f all
69
+ ```
70
+
71
+ ### Paper Notes
72
+
73
+ Manage notes across all reviewed papers:
74
+
75
+ ```bash
76
+ # Browse papers with notes
77
+ scholar notes
78
+
79
+ # List papers with notes
80
+ scholar notes list
81
+
82
+ # Export/import notes
83
+ scholar notes export notes.json
84
+ scholar notes import notes.json
85
+ ```
86
+
87
+ ### Caching
88
+
89
+ Search results are cached to avoid redundant API calls:
90
+
91
+ ```bash
92
+ scholar cache info # Show cache statistics
93
+ scholar cache clear # Delete cached results
94
+ scholar cache path # Print cache directory
95
+ ```
96
+
97
+ PDF downloads are also cached for offline viewing.
98
+
99
+ ## Installation
100
+
101
+ ```bash
102
+ pip install scholar-cli
103
+ ```
104
+
105
+ Or with [uv](https://github.com/astral-sh/uv):
106
+
107
+ ```bash
108
+ uv pip install scholar-cli
109
+ ```
110
+
111
+ ## Configuration
112
+
113
+ Some providers require API keys set as environment variables:
114
+
115
+ | Provider | Environment Variable | Required | How to Get |
116
+ |----------|---------------------|----------|------------|
117
+ | Semantic Scholar | `S2_API_KEY` | No | [api.semanticscholar.org](https://api.semanticscholar.org) |
118
+ | OpenAlex | `OPENALEX_EMAIL` | No | Any email (for polite pool) |
119
+ | DBLP | - | No | No key needed |
120
+ | Web of Science | `WOS_API_KEY` | Yes | [developer.clarivate.com](https://developer.clarivate.com) |
121
+ | IEEE Xplore | `IEEE_API_KEY` | Yes | [developer.ieee.org](https://developer.ieee.org) |
122
+
123
+ View provider status:
124
+
125
+ ```bash
126
+ scholar providers
127
+ ```
128
+
129
+ ## Usage Examples
130
+
131
+ ### Basic Search
132
+
133
+ ```bash
134
+ # Search with default providers (Semantic Scholar, OpenAlex, DBLP)
135
+ scholar search "differential privacy"
136
+
137
+ # Limit results per provider
138
+ scholar search "blockchain" -l 50
139
+ ```
140
+
141
+ ### Systematic Review Workflow
142
+
143
+ ```bash
144
+ # 1. Search and review interactively
145
+ scholar search "privacy-preserving machine learning" --review --name "privacy-ml-review"
146
+
147
+ # 2. Add more searches to the same session
148
+ scholar search "federated learning privacy" --review --name "privacy-ml-review"
149
+
150
+ # 3. Resume reviewing later
151
+ scholar sessions resume "privacy-ml-review"
152
+
153
+ # 4. Generate reports
154
+ scholar sessions export "privacy-ml-review" -f all
155
+ ```
156
+
157
+ ### Enriching Results
158
+
159
+ Some providers (like DBLP) don't include abstracts. Fetch them from other sources:
160
+
161
+ ```bash
162
+ # Enrich during search
163
+ scholar search "query" --enrich
164
+
165
+ # Enrich an existing session
166
+ scholar enrich "session-name"
167
+ ```
168
+
169
+ ### PDF Management
170
+
171
+ ```bash
172
+ # Download and open a PDF
173
+ scholar pdf open "https://arxiv.org/pdf/2301.00001.pdf"
174
+
175
+ # View PDF cache
176
+ scholar pdf info
177
+ scholar pdf clear
178
+ ```
179
+
180
+ ## Keybindings (Review TUI)
181
+
182
+ | Key | Action |
183
+ |-----|--------|
184
+ | `j`/`k` | Navigate up/down |
185
+ | `Enter` | View paper details |
186
+ | `K` | Keep paper (quick) |
187
+ | `T` | Keep with themes |
188
+ | `d` | Discard (requires motivation) |
189
+ | `n` | Edit notes |
190
+ | `p` | Open PDF |
191
+ | `e` | Enrich (fetch abstract) |
192
+ | `L` | LLM-assisted classification |
193
+ | `s` | Sort papers |
194
+ | `f` | Filter by status |
195
+ | `q` | Quit |
196
+
197
+ ## LLM-Assisted Review
198
+
199
+ For large result sets, Scholar can use LLMs to assist with paper classification:
200
+
201
+ ```bash
202
+ # In the TUI, press 'L' to invoke LLM classification
203
+ # Or use the CLI command directly
204
+ scholar llm-review "session-name" --count 10
205
+ ```
206
+
207
+ ### How It Works
208
+
209
+ 1. **Tag some papers manually** - The LLM needs examples to learn from. Review at least 5 papers with tags (themes for kept, motivations for discarded).
210
+
211
+ 2. **Set research context** (optional) - Describe your review's focus to help the LLM understand relevance criteria.
212
+
213
+ 3. **Invoke LLM classification** - The LLM classifies pending papers based on your examples, returning confidence scores.
214
+
215
+ 4. **Review LLM decisions** - Prioritize low-confidence classifications. Accept correct ones, correct wrong ones.
216
+
217
+ 5. **Iterate** - Corrections become training examples for the next round.
218
+
219
+ ### Requirements
220
+
221
+ Install the `llm` package and configure a model:
222
+
223
+ ```bash
224
+ pip install llm
225
+ llm keys set openai # Or configure another provider
226
+ ```
227
+
228
+ The LLM module supports any model available through Simon Willison's `llm` package (OpenAI, Anthropic, local models, etc.).
229
+
230
+ ## Documentation
231
+
232
+ Full documentation is available in the `doc/` directory as a literate program combining documentation and implementation.
233
+
234
+ ## License
235
+
236
+ MIT License - see [LICENSE](LICENSE) for details.
@@ -0,0 +1,80 @@
1
+ [project]
2
+ name = "scholarcli"
3
+ version = "1.0"
4
+ description = "A tool for structured literature searches across bibliographic databases"
5
+ authors = [{ name = "Daniel Bosk", email = "dbosk@kth.se" },
6
+ { name = "Ric Glassey", email = "glassey@kth.se" }]
7
+ readme = "README.md"
8
+ license = "MIT"
9
+ requires-python = ">= 3.12"
10
+ dependencies = [
11
+ "requests>=2.32.5",
12
+ "typer>=0.21.0",
13
+ "rich>=14.2.0",
14
+ "pyalex>=0.19",
15
+ "arxiv>=2.1.0",
16
+ "cachetools>=6.2.4",
17
+ "platformdirs>=4.5.1",
18
+ "textual>=6.11.0",
19
+ "pypandoc>=1.14",
20
+ "click>=8.0.0",
21
+ "llm>=0.19",
22
+ "llm-openai-plugin>=0.7",
23
+ "llm-gpt4all>=0.4",
24
+ "llm-azure>=2.1",
25
+ "llm-anthropic>=0.23",
26
+ "llm-gemini>=0.28.2",
27
+ ]
28
+
29
+ [project.optional-dependencies]
30
+ # Snowball dependencies (for scholar snowball subcommand)
31
+ snowball = [
32
+ "pypdfium2>=4.26.0",
33
+ "bibtexparser>=1.4.0",
34
+ "pandas>=2.0.0",
35
+ "pydantic>=2.0.0",
36
+ "python-dotenv>=1.0.0",
37
+ "httpx>=0.25.0",
38
+ "grobid-client-python>=0.1.0",
39
+ "scholarly>=1.7.0",
40
+ ]
41
+ # Tuxedo dependencies (for scholar tuxedo subcommand)
42
+ tuxedo = [
43
+ "httpx>=0.27.0",
44
+ "openai>=1.0.0",
45
+ "pydantic>=2.0.0",
46
+ "pyyaml>=6.0",
47
+ ]
48
+ # All subcommand dependencies
49
+ all = [
50
+ "scholar[snowball]",
51
+ "scholar[tuxedo]",
52
+ ]
53
+
54
+ [project.scripts]
55
+ scholar = "scholar.cli:main"
56
+
57
+ [build-system]
58
+ requires = ["setuptools>=61.0"]
59
+ build-backend = "setuptools.build_meta"
60
+
61
+ [tool.setuptools.packages.find]
62
+ where = ["src"]
63
+
64
+ [tool.pytest.ini_options]
65
+ testpaths = ["tests"]
66
+ python_files = ["test_*.py"]
67
+ markers = [
68
+ "integration: marks tests as integration tests (make real API calls)",
69
+ ]
70
+
71
+ [tool.black]
72
+ line-length = 78
73
+
74
+ [dependency-groups]
75
+ dev = [
76
+ "pytest (>=9.0.2,<10.0.0)",
77
+ "pytest-cov (>=7.0.0,<8.0.0)",
78
+ "black (>=25.12.0,<26.0.0)",
79
+ "mypy (>=1.19.1,<2.0.0)"
80
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,21 @@
1
+ """
2
+ Scholar package for structured literature searches.
3
+ """
4
+
5
+ from .scholar import Search, SearchResult, Paper, SearchFilters
6
+ from .scholar import search, filter_papers
7
+ from .scholar import get_registry, isolated_registry
8
+ from .utils import safe_get_nested, ensure_list
9
+
10
+ __all__ = [
11
+ "Search",
12
+ "SearchResult",
13
+ "Paper",
14
+ "SearchFilters",
15
+ "search",
16
+ "filter_papers",
17
+ "get_registry",
18
+ "isolated_registry",
19
+ "safe_get_nested",
20
+ "ensure_list",
21
+ ]
@@ -0,0 +1,6 @@
1
+ """Allow running Scholar as a module: python -m scholar"""
2
+
3
+ from scholar.cli import main
4
+
5
+ if __name__ == "__main__":
6
+ main()