academic-refchecker 2.0.18__tar.gz → 2.0.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. academic_refchecker-2.0.20/PKG-INFO +405 -0
  2. academic_refchecker-2.0.20/README.md +333 -0
  3. academic_refchecker-2.0.20/academic_refchecker.egg-info/PKG-INFO +405 -0
  4. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/__version__.py +1 -1
  5. academic_refchecker-2.0.18/PKG-INFO +0 -877
  6. academic_refchecker-2.0.18/README.md +0 -805
  7. academic_refchecker-2.0.18/academic_refchecker.egg-info/PKG-INFO +0 -877
  8. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/LICENSE +0 -0
  9. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/MANIFEST.in +0 -0
  10. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/academic_refchecker.egg-info/SOURCES.txt +0 -0
  11. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/academic_refchecker.egg-info/dependency_links.txt +0 -0
  12. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/academic_refchecker.egg-info/entry_points.txt +0 -0
  13. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/academic_refchecker.egg-info/requires.txt +0 -0
  14. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/academic_refchecker.egg-info/top_level.txt +0 -0
  15. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/backend/__init__.py +0 -0
  16. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/backend/__main__.py +0 -0
  17. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/backend/cli.py +0 -0
  18. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/backend/concurrency.py +0 -0
  19. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/backend/database.py +0 -0
  20. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/backend/main.py +0 -0
  21. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/backend/models.py +0 -0
  22. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/backend/refchecker_wrapper.py +0 -0
  23. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/backend/static/assets/index-2P6L_39v.css +0 -0
  24. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/backend/static/assets/index-B92lKsA8.js +0 -0
  25. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/backend/static/assets/index-BuguAhjS.css +0 -0
  26. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/backend/static/assets/index-DMZJNrR0.js +0 -0
  27. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/backend/static/assets/index-hk21nqxR.js +0 -0
  28. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/backend/static/favicon.svg +0 -0
  29. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/backend/static/index.html +0 -0
  30. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/backend/static/vite.svg +0 -0
  31. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/backend/thumbnail.py +0 -0
  32. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/backend/websocket_manager.py +0 -0
  33. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/pyproject.toml +0 -0
  34. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/requirements.txt +0 -0
  35. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/scripts/download_db.py +0 -0
  36. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/scripts/run_tests.py +0 -0
  37. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/scripts/start_vllm_server.py +0 -0
  38. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/setup.cfg +0 -0
  39. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/__init__.py +0 -0
  40. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/__main__.py +0 -0
  41. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/checkers/__init__.py +0 -0
  42. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/checkers/arxiv_citation.py +0 -0
  43. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/checkers/crossref.py +0 -0
  44. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/checkers/enhanced_hybrid_checker.py +0 -0
  45. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/checkers/github_checker.py +0 -0
  46. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/checkers/local_semantic_scholar.py +0 -0
  47. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/checkers/openalex.py +0 -0
  48. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/checkers/openreview_checker.py +0 -0
  49. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/checkers/pdf_paper_checker.py +0 -0
  50. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/checkers/semantic_scholar.py +0 -0
  51. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/checkers/webpage_checker.py +0 -0
  52. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/config/__init__.py +0 -0
  53. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/config/logging.conf +0 -0
  54. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/config/settings.py +0 -0
  55. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/core/__init__.py +0 -0
  56. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/core/db_connection_pool.py +0 -0
  57. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/core/parallel_processor.py +0 -0
  58. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/core/refchecker.py +0 -0
  59. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/database/__init__.py +0 -0
  60. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/database/download_semantic_scholar_db.py +0 -0
  61. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/llm/__init__.py +0 -0
  62. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/llm/base.py +0 -0
  63. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/llm/providers.py +0 -0
  64. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/scripts/__init__.py +0 -0
  65. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/scripts/start_vllm_server.py +0 -0
  66. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/services/__init__.py +0 -0
  67. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/services/pdf_processor.py +0 -0
  68. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/utils/__init__.py +0 -0
  69. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/utils/arxiv_rate_limiter.py +0 -0
  70. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/utils/arxiv_utils.py +0 -0
  71. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/utils/author_utils.py +0 -0
  72. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/utils/biblatex_parser.py +0 -0
  73. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/utils/bibliography_utils.py +0 -0
  74. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/utils/bibtex_parser.py +0 -0
  75. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/utils/config_validator.py +0 -0
  76. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/utils/db_utils.py +0 -0
  77. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/utils/doi_utils.py +0 -0
  78. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/utils/error_utils.py +0 -0
  79. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/utils/mock_objects.py +0 -0
  80. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/utils/text_utils.py +0 -0
  81. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/utils/unicode_utils.py +0 -0
  82. {academic_refchecker-2.0.18 → academic_refchecker-2.0.20}/src/refchecker/utils/url_utils.py +0 -0
@@ -0,0 +1,405 @@
1
+ Metadata-Version: 2.4
2
+ Name: academic-refchecker
3
+ Version: 2.0.20
4
+ Summary: A comprehensive tool for validating reference accuracy in academic papers
5
+ Author-email: Mark Russinovich <markrussinovich@hotmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/markrussinovich/refchecker
8
+ Project-URL: Repository, https://github.com/markrussinovich/refchecker
9
+ Project-URL: Bug Tracker, https://github.com/markrussinovich/refchecker/issues
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.7
15
+ Classifier: Programming Language :: Python :: 3.8
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Operating System :: OS Independent
20
+ Requires-Python: >=3.7
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Requires-Dist: requests>=2.25.0
24
+ Requires-Dist: beautifulsoup4>=4.9.0
25
+ Requires-Dist: pypdf>=5.0.0
26
+ Requires-Dist: arxiv>=1.4.0
27
+ Requires-Dist: python-dateutil>=2.8.0
28
+ Requires-Dist: tqdm>=4.60.0
29
+ Requires-Dist: colorama>=0.4.4
30
+ Requires-Dist: fuzzywuzzy>=0.18.0
31
+ Requires-Dist: python-Levenshtein>=0.12.0
32
+ Requires-Dist: pandas<2.4.0,>=1.3.0
33
+ Requires-Dist: numpy<2.0.0,>=1.22.4
34
+ Requires-Dist: pdfplumber>=0.6.0
35
+ Requires-Dist: bibtexparser>=1.4.0
36
+ Provides-Extra: dev
37
+ Requires-Dist: pytest>=6.0.0; extra == "dev"
38
+ Requires-Dist: pytest-cov>=2.0.0; extra == "dev"
39
+ Requires-Dist: black>=21.0.0; extra == "dev"
40
+ Requires-Dist: isort>=5.0.0; extra == "dev"
41
+ Requires-Dist: flake8>=3.9.0; extra == "dev"
42
+ Requires-Dist: mypy>=0.910; extra == "dev"
43
+ Provides-Extra: docs
44
+ Requires-Dist: sphinx>=4.0.0; extra == "docs"
45
+ Requires-Dist: sphinx-rtd-theme>=0.5.0; extra == "docs"
46
+ Provides-Extra: llm
47
+ Requires-Dist: openai>=1.0.0; extra == "llm"
48
+ Requires-Dist: anthropic>=0.7.0; extra == "llm"
49
+ Requires-Dist: google-generativeai>=0.3.0; extra == "llm"
50
+ Provides-Extra: optional
51
+ Requires-Dist: lxml>=4.6.0; extra == "optional"
52
+ Requires-Dist: selenium>=4.0.0; extra == "optional"
53
+ Requires-Dist: pikepdf>=5.0.0; extra == "optional"
54
+ Requires-Dist: nltk>=3.6.0; extra == "optional"
55
+ Requires-Dist: scikit-learn>=1.0.0; extra == "optional"
56
+ Requires-Dist: joblib>=1.1.0; extra == "optional"
57
+ Provides-Extra: vllm
58
+ Requires-Dist: vllm>=0.3.0; extra == "vllm"
59
+ Requires-Dist: huggingface_hub>=0.17.0; extra == "vllm"
60
+ Requires-Dist: torch>=2.0.0; extra == "vllm"
61
+ Provides-Extra: webui
62
+ Requires-Dist: fastapi>=0.100.0; extra == "webui"
63
+ Requires-Dist: uvicorn[standard]>=0.22.0; extra == "webui"
64
+ Requires-Dist: pydantic>=2.0.0; extra == "webui"
65
+ Requires-Dist: aiosqlite>=0.19.0; extra == "webui"
66
+ Requires-Dist: httpx>=0.24.0; extra == "webui"
67
+ Requires-Dist: cryptography>=42.0.0; extra == "webui"
68
+ Requires-Dist: pymupdf>=1.23.0; extra == "webui"
69
+ Requires-Dist: Pillow>=9.0.0; extra == "webui"
70
+ Requires-Dist: python-multipart>=0.0.6; extra == "webui"
71
+ Dynamic: license-file
72
+
73
+ # RefChecker
74
+
75
+ Validate reference accuracy in academic papers. Useful for authors checking bibliographies and reviewers ensuring citations are authentic. RefChecker verifies citations against Semantic Scholar, OpenAlex, and CrossRef.
76
+
77
+ *Built by Mark Russinovich with AI assistants (Cursor, GitHub Copilot, Claude Code). [Watch the deep dive video](https://www.youtube.com/watch?v=n929Alz-fjo).*
78
+
79
+ ## Contents
80
+
81
+ - [Quick Start](#quick-start)
82
+ - [Features](#features)
83
+ - [Sample Output](#sample-output)
84
+ - [Install](#install)
85
+ - [Run](#run)
86
+ - [Output](#output)
87
+ - [Configure](#configure)
88
+ - [Local Database](#local-database)
89
+ - [Testing](#testing)
90
+ - [License](#license)
91
+
92
+ ## Quick Start
93
+
94
+ ### Web UI (Docker)
95
+
96
+ ```bash
97
+ docker run -p 8000:8000 ghcr.io/markrussinovich/refchecker:latest
98
+ ```
99
+
100
+ Open **http://localhost:8000** in your browser.
101
+
102
+ ### Web UI (pip)
103
+
104
+ ```bash
105
+ pip install academic-refchecker[llm,webui]
106
+ refchecker-webui
107
+ ```
108
+
109
+ ### CLI (pip)
110
+
111
+ ```bash
112
+ pip install academic-refchecker[llm]
113
+ academic-refchecker --paper 1706.03762
114
+ academic-refchecker --paper /path/to/paper.pdf
115
+ ```
116
+
117
+ > **Performance**: Set `SEMANTIC_SCHOLAR_API_KEY` for 1-2s per reference vs 5-10s without.
118
+
119
+ ## Features
120
+
121
+ - **Multiple formats**: ArXiv papers, PDFs, LaTeX, text files
122
+ - **LLM-powered extraction**: OpenAI, Anthropic, Google, Azure, vLLM
123
+ - **Multi-source verification**: Semantic Scholar, OpenAlex, CrossRef
124
+ - **Comprehensive checks**: Titles, authors, years, venues, DOIs, ArXiv IDs
125
+ - **Smart matching**: Handles formatting variations (BERT vs B-ERT, pre-trained vs pretrained)
126
+ - **Detailed reports**: Errors, warnings, corrected references
127
+
128
+ ## Sample Output
129
+
130
+ **Web UI**
131
+
132
+ ![RefChecker Web UI](assets/webui.png)
133
+
134
+ **CLI**
135
+
136
+ ```
137
+ 📄 Processing: Attention Is All You Need
138
+ URL: https://arxiv.org/abs/1706.03762
139
+
140
+ [1/45] Neural machine translation in linear time
141
+ Nal Kalchbrenner et al. | 2017
142
+ ⚠️ Warning: Year mismatch: cited '2017', actual '2016'
143
+
144
+ [2/45] Effective approaches to attention-based neural machine translation
145
+ Minh-Thang Luong et al. | 2015
146
+ ❌ Error: First author mismatch: cited 'Minh-Thang Luong', actual 'Thang Luong'
147
+
148
+ [3/45] Deep Residual Learning for Image Recognition
149
+ Kaiming He et al. | 2016 | https://doi.org/10.1109/CVPR.2016.91
150
+ ❌ Error: DOI mismatch: cited '10.1109/CVPR.2016.91', actual '10.1109/CVPR.2016.90'
151
+
152
+ ============================================================
153
+ 📋 SUMMARY
154
+ 📚 Total references processed: 68
155
+ ❌ Total errors: 55 ⚠️ Total warnings: 16 ❓ Unverified: 15
156
+ ```
157
+
158
+ ## Install
159
+
160
+ ### PyPI (Recommended)
161
+
162
+ ```bash
163
+ pip install academic-refchecker[llm,webui] # Web UI + CLI + LLM providers
164
+ pip install academic-refchecker # CLI only
165
+ ```
166
+
167
+ ### From Source (Development)
168
+
169
+ ```bash
170
+ git clone https://github.com/markrussinovich/refchecker.git && cd refchecker
171
+ python -m venv .venv && source .venv/bin/activate
172
+ pip install -e ".[llm,webui]"
173
+ ```
174
+
175
+ **Requirements:** Python 3.7+ (3.10+ recommended). Node.js 18+ is only needed for Web UI development.
176
+
177
+ ## Run
178
+
179
+ ### Web UI
180
+
181
+ The Web UI shows live progress, history, and export (including corrected values).
182
+
183
+ ```bash
184
+ refchecker-webui --port 8000
185
+ ```
186
+
187
+ #### Development (frontend)
188
+
189
+ ```bash
190
+ cd web-ui
191
+ npm install
192
+ npm start
193
+ ```
194
+
195
+ Open **http://localhost:5173**.
196
+
197
+ Alternative (separate servers):
198
+
199
+ ```bash
200
+ # Terminal 1
201
+ python -m uvicorn backend.main:app --reload --port 8000
202
+
203
+ # Terminal 2
204
+ cd web-ui
205
+ npm run dev
206
+ ```
207
+
208
+ Verify the backend is running:
209
+
210
+ ```bash
211
+ curl http://localhost:8000/
212
+ ```
213
+
214
+ Web UI documentation: see [web-ui/README.md](web-ui/README.md).
215
+
216
+ ### Docker
217
+
218
+ Pre-built multi-architecture images are published to GitHub Container Registry on every release.
219
+
220
+ #### Quick Start
221
+
222
+ ```bash
223
+ docker run -p 8000:8000 ghcr.io/markrussinovich/refchecker:latest
224
+ ```
225
+
226
+ Open **http://localhost:8000** in your browser.
227
+
228
+ #### With LLM API Key
229
+
230
+ Pass your API key for LLM-powered reference extraction (recommended):
231
+
232
+ ```bash
233
+ # Anthropic Claude (recommended)
234
+ docker run -p 8000:8000 -e ANTHROPIC_API_KEY=your_key ghcr.io/markrussinovich/refchecker:latest
235
+
236
+ # OpenAI
237
+ docker run -p 8000:8000 -e OPENAI_API_KEY=your_key ghcr.io/markrussinovich/refchecker:latest
238
+
239
+ # Google Gemini
240
+ docker run -p 8000:8000 -e GOOGLE_API_KEY=your_key ghcr.io/markrussinovich/refchecker:latest
241
+ ```
242
+
243
+ #### Persistent Data
244
+
245
+ Mount a volume to persist check history and settings between restarts:
246
+
247
+ ```bash
248
+ docker run -p 8000:8000 \
249
+ -e ANTHROPIC_API_KEY=your_key \
250
+ -v refchecker-data:/app/data \
251
+ ghcr.io/markrussinovich/refchecker:latest
252
+ ```
253
+
254
+ #### Docker Compose
255
+
256
+ For easier configuration with an `.env` file:
257
+
258
+ ```bash
259
+ git clone https://github.com/markrussinovich/refchecker.git && cd refchecker
260
+ cp .env.example .env # Add your API keys
261
+ docker compose up -d
262
+ ```
263
+
264
+ Common commands:
265
+
266
+ ```bash
267
+ docker compose logs -f # View logs
268
+ docker compose down # Stop
269
+ docker compose pull # Update to latest
270
+ ```
271
+
272
+ #### Available Tags
273
+
274
+ | Tag | Description | Arch | Size |
275
+ |-----|-------------|------|------|
276
+ | `latest` | Latest stable release | amd64, arm64 | ~800MB |
277
+ | `X.Y.Z` | Specific version (e.g., `2.0.18`) | amd64, arm64 | ~800MB |
278
+
279
+ ### CLI
280
+
281
+ ```bash
282
+ # ArXiv (ID or URL)
283
+ academic-refchecker --paper 1706.03762
284
+ academic-refchecker --paper https://arxiv.org/abs/1706.03762
285
+
286
+ # Local files
287
+ academic-refchecker --paper paper.pdf
288
+ academic-refchecker --paper paper.tex
289
+ academic-refchecker --paper paper.txt
290
+ academic-refchecker --paper refs.bib
291
+
292
+ # Faster/offline verification (local DB)
293
+ academic-refchecker --paper paper.pdf --db-path semantic_scholar_db/semantic_scholar.db
294
+
295
+ # Save results
296
+ academic-refchecker --paper 1706.03762 --output-file errors.txt
297
+ ```
298
+
299
+ ## Output
300
+
301
+ RefChecker reports these result types:
302
+
303
+ | Type | Description | Examples |
304
+ |------|-------------|----------|
305
+ | ❌ **Error** | Critical issues needing correction | Author/title/DOI mismatches, incorrect ArXiv IDs |
306
+ | ⚠️ **Warning** | Minor issues to review | Year differences, venue variations |
307
+ | ℹ️ **Suggestion** | Recommended improvements | Add missing ArXiv/DOI URLs, small metadata fixes |
308
+ | ❓ **Unverified** | Could not verify against any source | Rare publications, preprints |
309
+
310
+ Verified references include discovered URLs (Semantic Scholar, ArXiv, DOI). Suggestions are non-blocking improvements.
311
+
312
+ <details>
313
+ <summary>Detailed examples</summary>
314
+
315
+ ```
316
+ ❌ Error: First author mismatch: cited 'T. Xie', actual 'Zhao Xu'
317
+ ❌ Error: DOI mismatch: cited '10.5555/3295222.3295349', actual '10.48550/arXiv.1706.03762'
318
+ ⚠️ Warning: Year mismatch: cited '2024', actual '2023'
319
+ ℹ️ Suggestion: Add ArXiv URL https://arxiv.org/abs/1706.03762
320
+ ❓ Could not verify: Llama guard (M. A. Research, 2024)
321
+ ```
322
+
323
+ </details>
324
+
325
+ ## Configure
326
+
327
+ ### LLM
328
+
329
+ LLM-powered extraction improves accuracy with complex bibliographies. Claude Sonnet 4 performs best; GPT-4o may hallucinate DOIs.
330
+
331
+ | Provider | Env Variable | Example Model |
332
+ |----------|--------------|---------------|
333
+ | Anthropic | `ANTHROPIC_API_KEY` | `claude-sonnet-4-20250514` |
334
+ | OpenAI | `OPENAI_API_KEY` | `gpt-4o` |
335
+ | Google | `GOOGLE_API_KEY` | `gemini-2.5-flash` |
336
+ | Azure | `AZURE_OPENAI_API_KEY` | `gpt-4` |
337
+ | vLLM | (local) | `meta-llama/Llama-3.1-8B-Instruct` |
338
+
339
+ ```bash
340
+ export ANTHROPIC_API_KEY=your_key
341
+ academic-refchecker --paper 1706.03762 --llm-provider anthropic
342
+
343
+ academic-refchecker --paper paper.pdf --llm-provider openai --llm-model gpt-4o
344
+ academic-refchecker --paper paper.pdf --llm-provider vllm --llm-model meta-llama/Llama-3.1-8B-Instruct
345
+ ```
346
+
347
+ #### Local models (vLLM)
348
+
349
+ There is no separate “GPU Docker image”. For local inference, install the vLLM extra and run an OpenAI-compatible vLLM server:
350
+
351
+ ```bash
352
+ pip install "academic-refchecker[vllm]"
353
+ python scripts/start_vllm_server.py --model meta-llama/Llama-3.1-8B-Instruct --port 8001
354
+ academic-refchecker --paper paper.pdf --llm-provider vllm --llm-endpoint http://localhost:8001/v1
355
+ ```
356
+
357
+ ### Command Line
358
+
359
+ ```bash
360
+ --paper PAPER # ArXiv ID, URL, or file path
361
+ --llm-provider PROVIDER # openai, anthropic, google, azure, vllm
362
+ --llm-model MODEL # Override default model
363
+ --db-path PATH # Local database for offline verification
364
+ --output-file [PATH] # Save results (default: reference_errors.txt)
365
+ --debug # Verbose output
366
+ ```
367
+
368
+ ### Environment Variables
369
+
370
+ ```bash
371
+ # LLM
372
+ export REFCHECKER_LLM_PROVIDER=anthropic
373
+ export ANTHROPIC_API_KEY=your_key # Also: OPENAI_API_KEY, GOOGLE_API_KEY
374
+
375
+ # Performance
376
+ export SEMANTIC_SCHOLAR_API_KEY=your_key # Higher rate limits / faster verification
377
+ ```
378
+
379
+ ## Local Database
380
+
381
+ For offline verification or faster processing:
382
+
383
+ ```bash
384
+ python scripts/download_db.py \
385
+ --field "computer science" \
386
+ --start-year 2020 --end-year 2024
387
+
388
+ academic-refchecker --paper paper.pdf --db-path semantic_scholar_db/semantic_scholar.db
389
+ ```
390
+
391
+ ## Testing
392
+
393
+ 490+ tests covering unit, integration, and end-to-end scenarios.
394
+
395
+ ```bash
396
+ pytest tests/ # All tests
397
+ pytest tests/unit/ # Unit only
398
+ pytest --cov=src tests/ # With coverage
399
+ ```
400
+
401
+ See [tests/README.md](tests/README.md) for details.
402
+
403
+ ## License
404
+
405
+ MIT License - see [LICENSE](LICENSE).