academic-refchecker 2.0.19__tar.gz → 2.0.21__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. {academic_refchecker-2.0.19/academic_refchecker.egg-info → academic_refchecker-2.0.21}/PKG-INFO +74 -32
  2. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/README.md +73 -31
  3. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21/academic_refchecker.egg-info}/PKG-INFO +74 -32
  4. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/backend/database.py +126 -5
  5. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/backend/main.py +450 -3
  6. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/backend/refchecker_wrapper.py +109 -19
  7. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/__version__.py +1 -1
  8. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/checkers/semantic_scholar.py +2 -2
  9. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/core/refchecker.py +46 -0
  10. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/services/pdf_processor.py +156 -1
  11. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/utils/text_utils.py +3 -1
  12. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/LICENSE +0 -0
  13. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/MANIFEST.in +0 -0
  14. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/academic_refchecker.egg-info/SOURCES.txt +0 -0
  15. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/academic_refchecker.egg-info/dependency_links.txt +0 -0
  16. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/academic_refchecker.egg-info/entry_points.txt +0 -0
  17. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/academic_refchecker.egg-info/requires.txt +0 -0
  18. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/academic_refchecker.egg-info/top_level.txt +0 -0
  19. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/backend/__init__.py +0 -0
  20. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/backend/__main__.py +0 -0
  21. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/backend/cli.py +0 -0
  22. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/backend/concurrency.py +0 -0
  23. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/backend/models.py +0 -0
  24. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/backend/static/assets/index-2P6L_39v.css +0 -0
  25. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/backend/static/assets/index-B92lKsA8.js +0 -0
  26. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/backend/static/assets/index-BuguAhjS.css +0 -0
  27. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/backend/static/assets/index-DMZJNrR0.js +0 -0
  28. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/backend/static/assets/index-hk21nqxR.js +0 -0
  29. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/backend/static/favicon.svg +0 -0
  30. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/backend/static/index.html +0 -0
  31. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/backend/static/vite.svg +0 -0
  32. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/backend/thumbnail.py +0 -0
  33. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/backend/websocket_manager.py +0 -0
  34. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/pyproject.toml +0 -0
  35. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/requirements.txt +0 -0
  36. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/scripts/download_db.py +0 -0
  37. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/scripts/run_tests.py +0 -0
  38. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/scripts/start_vllm_server.py +0 -0
  39. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/setup.cfg +0 -0
  40. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/__init__.py +0 -0
  41. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/__main__.py +0 -0
  42. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/checkers/__init__.py +0 -0
  43. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/checkers/arxiv_citation.py +0 -0
  44. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/checkers/crossref.py +0 -0
  45. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/checkers/enhanced_hybrid_checker.py +0 -0
  46. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/checkers/github_checker.py +0 -0
  47. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/checkers/local_semantic_scholar.py +0 -0
  48. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/checkers/openalex.py +0 -0
  49. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/checkers/openreview_checker.py +0 -0
  50. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/checkers/pdf_paper_checker.py +0 -0
  51. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/checkers/webpage_checker.py +0 -0
  52. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/config/__init__.py +0 -0
  53. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/config/logging.conf +0 -0
  54. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/config/settings.py +0 -0
  55. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/core/__init__.py +0 -0
  56. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/core/db_connection_pool.py +0 -0
  57. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/core/parallel_processor.py +0 -0
  58. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/database/__init__.py +0 -0
  59. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/database/download_semantic_scholar_db.py +0 -0
  60. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/llm/__init__.py +0 -0
  61. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/llm/base.py +0 -0
  62. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/llm/providers.py +0 -0
  63. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/scripts/__init__.py +0 -0
  64. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/scripts/start_vllm_server.py +0 -0
  65. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/services/__init__.py +0 -0
  66. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/utils/__init__.py +0 -0
  67. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/utils/arxiv_rate_limiter.py +0 -0
  68. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/utils/arxiv_utils.py +0 -0
  69. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/utils/author_utils.py +0 -0
  70. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/utils/biblatex_parser.py +0 -0
  71. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/utils/bibliography_utils.py +0 -0
  72. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/utils/bibtex_parser.py +0 -0
  73. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/utils/config_validator.py +0 -0
  74. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/utils/db_utils.py +0 -0
  75. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/utils/doi_utils.py +0 -0
  76. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/utils/error_utils.py +0 -0
  77. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/utils/mock_objects.py +0 -0
  78. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/utils/unicode_utils.py +0 -0
  79. {academic_refchecker-2.0.19 → academic_refchecker-2.0.21}/src/refchecker/utils/url_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academic-refchecker
3
- Version: 2.0.19
3
+ Version: 2.0.21
4
4
  Summary: A comprehensive tool for validating reference accuracy in academic papers
5
5
  Author-email: Mark Russinovich <markrussinovich@hotmail.com>
6
6
  License-Expression: MIT
@@ -85,7 +85,6 @@ Validate reference accuracy in academic papers. Useful for authors checking bibl
85
85
  - [Run](#run)
86
86
  - [Output](#output)
87
87
  - [Configure](#configure)
88
- - [Docker](#docker)
89
88
  - [Local Database](#local-database)
90
89
  - [Testing](#testing)
91
90
  - [License](#license)
@@ -125,6 +124,7 @@ academic-refchecker --paper /path/to/paper.pdf
125
124
  - **Comprehensive checks**: Titles, authors, years, venues, DOIs, ArXiv IDs
126
125
  - **Smart matching**: Handles formatting variations (BERT vs B-ERT, pre-trained vs pretrained)
127
126
  - **Detailed reports**: Errors, warnings, corrected references
127
+ - **Bulk web checks**: Upload multiple files or a ZIP in the Web UI to validate many papers at once
128
128
 
129
129
  ## Sample Output
130
130
 
@@ -185,6 +185,8 @@ The Web UI shows live progress, history, and export (including corrected values)
185
185
  refchecker-webui --port 8000
186
186
  ```
187
187
 
188
+ *Tip: You can bulk-check multiple papers by selecting several files or a single ZIP; the Web UI will group them into a batch in the history sidebar.*
189
+
188
190
  #### Development (frontend)
189
191
 
190
192
  ```bash
@@ -214,6 +216,69 @@ curl http://localhost:8000/
214
216
 
215
217
  Web UI documentation: see [web-ui/README.md](web-ui/README.md).
216
218
 
219
+ ### Docker
220
+
221
+ Pre-built multi-architecture images are published to GitHub Container Registry on every release.
222
+
223
+ #### Quick Start
224
+
225
+ ```bash
226
+ docker run -p 8000:8000 ghcr.io/markrussinovich/refchecker:latest
227
+ ```
228
+
229
+ Open **http://localhost:8000** in your browser.
230
+
231
+ #### With LLM API Key
232
+
233
+ Pass your API key for LLM-powered reference extraction (recommended):
234
+
235
+ ```bash
236
+ # Anthropic Claude (recommended)
237
+ docker run -p 8000:8000 -e ANTHROPIC_API_KEY=your_key ghcr.io/markrussinovich/refchecker:latest
238
+
239
+ # OpenAI
240
+ docker run -p 8000:8000 -e OPENAI_API_KEY=your_key ghcr.io/markrussinovich/refchecker:latest
241
+
242
+ # Google Gemini
243
+ docker run -p 8000:8000 -e GOOGLE_API_KEY=your_key ghcr.io/markrussinovich/refchecker:latest
244
+ ```
245
+
246
+ #### Persistent Data
247
+
248
+ Mount a volume to persist check history and settings between restarts:
249
+
250
+ ```bash
251
+ docker run -p 8000:8000 \
252
+ -e ANTHROPIC_API_KEY=your_key \
253
+ -v refchecker-data:/app/data \
254
+ ghcr.io/markrussinovich/refchecker:latest
255
+ ```
256
+
257
+ #### Docker Compose
258
+
259
+ For easier configuration with an `.env` file:
260
+
261
+ ```bash
262
+ git clone https://github.com/markrussinovich/refchecker.git && cd refchecker
263
+ cp .env.example .env # Add your API keys
264
+ docker compose up -d
265
+ ```
266
+
267
+ Common commands:
268
+
269
+ ```bash
270
+ docker compose logs -f # View logs
271
+ docker compose down # Stop
272
+ docker compose pull # Update to latest
273
+ ```
274
+
275
+ #### Available Tags
276
+
277
+ | Tag | Description | Arch | Size |
278
+ |-----|-------------|------|------|
279
+ | `latest` | Latest stable release | amd64, arm64 | ~800MB |
280
+ | `X.Y.Z` | Specific version (e.g., `2.0.18`) | amd64, arm64 | ~800MB |
281
+
217
282
  ### CLI
218
283
 
219
284
  ```bash
@@ -269,17 +334,17 @@ LLM-powered extraction improves accuracy with complex bibliographies. Claude Son
269
334
  | Provider | Env Variable | Example Model |
270
335
  |----------|--------------|---------------|
271
336
  | Anthropic | `ANTHROPIC_API_KEY` | `claude-sonnet-4-20250514` |
272
- | OpenAI | `OPENAI_API_KEY` | `gpt-4o` |
273
- | Google | `GOOGLE_API_KEY` | `gemini-2.5-flash` |
274
- | Azure | `AZURE_OPENAI_API_KEY` | `gpt-4` |
275
- | vLLM | (local) | `meta-llama/Llama-3.1-8B-Instruct` |
337
+ | OpenAI | `OPENAI_API_KEY` | `gpt-5.2-mini` |
338
+ | Google | `GOOGLE_API_KEY` | `gemini-3` |
339
+ | Azure | `AZURE_OPENAI_API_KEY` | `gpt-4o` |
340
+ | vLLM | (local) | `meta-llama/Llama-3.3-70B-Instruct` |
276
341
 
277
342
  ```bash
278
343
  export ANTHROPIC_API_KEY=your_key
279
344
  academic-refchecker --paper 1706.03762 --llm-provider anthropic
280
345
 
281
- academic-refchecker --paper paper.pdf --llm-provider openai --llm-model gpt-4o
282
- academic-refchecker --paper paper.pdf --llm-provider vllm --llm-model meta-llama/Llama-3.1-8B-Instruct
346
+ academic-refchecker --paper paper.pdf --llm-provider openai --llm-model gpt-5.2-mini
347
+ academic-refchecker --paper paper.pdf --llm-provider vllm --llm-model meta-llama/Llama-3.3-70B-Instruct
283
348
  ```
284
349
 
285
350
  #### Local models (vLLM)
@@ -288,7 +353,7 @@ There is no separate “GPU Docker image”. For local inference, install the vL
288
353
 
289
354
  ```bash
290
355
  pip install "academic-refchecker[vllm]"
291
- python scripts/start_vllm_server.py --model meta-llama/Llama-3.1-8B-Instruct --port 8001
356
+ python scripts/start_vllm_server.py --model meta-llama/Llama-3.3-70B-Instruct --port 8001
292
357
  academic-refchecker --paper paper.pdf --llm-provider vllm --llm-endpoint http://localhost:8001/v1
293
358
  ```
294
359
 
@@ -314,29 +379,6 @@ export ANTHROPIC_API_KEY=your_key # Also: OPENAI_API_KEY, GOOGLE_API_K
314
379
  export SEMANTIC_SCHOLAR_API_KEY=your_key # Higher rate limits / faster verification
315
380
  ```
316
381
 
317
- ## Docker
318
-
319
- Pre-built images are published to GitHub Container Registry.
320
-
321
- ```bash
322
- docker run -p 8000:8000 \
323
- -e ANTHROPIC_API_KEY=your_key \
324
- -v refchecker-data:/app/data \
325
- ghcr.io/markrussinovich/refchecker:latest
326
- ```
327
-
328
- Docker Compose:
329
-
330
- ```bash
331
- git clone https://github.com/markrussinovich/refchecker.git && cd refchecker
332
- cp .env.example .env # Add your API keys
333
- docker compose up -d
334
- ```
335
-
336
- | Tag | Description | Arch | Size |
337
- |-----|-------------|------|------|
338
- | `latest` | RefChecker (Web UI + API-based LLM support) | amd64, arm64 | ~800MB |
339
-
340
382
  ## Local Database
341
383
 
342
384
  For offline verification or faster processing:
@@ -13,7 +13,6 @@ Validate reference accuracy in academic papers. Useful for authors checking bibl
13
13
  - [Run](#run)
14
14
  - [Output](#output)
15
15
  - [Configure](#configure)
16
- - [Docker](#docker)
17
16
  - [Local Database](#local-database)
18
17
  - [Testing](#testing)
19
18
  - [License](#license)
@@ -53,6 +52,7 @@ academic-refchecker --paper /path/to/paper.pdf
53
52
  - **Comprehensive checks**: Titles, authors, years, venues, DOIs, ArXiv IDs
54
53
  - **Smart matching**: Handles formatting variations (BERT vs B-ERT, pre-trained vs pretrained)
55
54
  - **Detailed reports**: Errors, warnings, corrected references
55
+ - **Bulk web checks**: Upload multiple files or a ZIP in the Web UI to validate many papers at once
56
56
 
57
57
  ## Sample Output
58
58
 
@@ -113,6 +113,8 @@ The Web UI shows live progress, history, and export (including corrected values)
113
113
  refchecker-webui --port 8000
114
114
  ```
115
115
 
116
+ *Tip: You can bulk-check multiple papers by selecting several files or a single ZIP; the Web UI will group them into a batch in the history sidebar.*
117
+
116
118
  #### Development (frontend)
117
119
 
118
120
  ```bash
@@ -142,6 +144,69 @@ curl http://localhost:8000/
142
144
 
143
145
  Web UI documentation: see [web-ui/README.md](web-ui/README.md).
144
146
 
147
+ ### Docker
148
+
149
+ Pre-built multi-architecture images are published to GitHub Container Registry on every release.
150
+
151
+ #### Quick Start
152
+
153
+ ```bash
154
+ docker run -p 8000:8000 ghcr.io/markrussinovich/refchecker:latest
155
+ ```
156
+
157
+ Open **http://localhost:8000** in your browser.
158
+
159
+ #### With LLM API Key
160
+
161
+ Pass your API key for LLM-powered reference extraction (recommended):
162
+
163
+ ```bash
164
+ # Anthropic Claude (recommended)
165
+ docker run -p 8000:8000 -e ANTHROPIC_API_KEY=your_key ghcr.io/markrussinovich/refchecker:latest
166
+
167
+ # OpenAI
168
+ docker run -p 8000:8000 -e OPENAI_API_KEY=your_key ghcr.io/markrussinovich/refchecker:latest
169
+
170
+ # Google Gemini
171
+ docker run -p 8000:8000 -e GOOGLE_API_KEY=your_key ghcr.io/markrussinovich/refchecker:latest
172
+ ```
173
+
174
+ #### Persistent Data
175
+
176
+ Mount a volume to persist check history and settings between restarts:
177
+
178
+ ```bash
179
+ docker run -p 8000:8000 \
180
+ -e ANTHROPIC_API_KEY=your_key \
181
+ -v refchecker-data:/app/data \
182
+ ghcr.io/markrussinovich/refchecker:latest
183
+ ```
184
+
185
+ #### Docker Compose
186
+
187
+ For easier configuration with an `.env` file:
188
+
189
+ ```bash
190
+ git clone https://github.com/markrussinovich/refchecker.git && cd refchecker
191
+ cp .env.example .env # Add your API keys
192
+ docker compose up -d
193
+ ```
194
+
195
+ Common commands:
196
+
197
+ ```bash
198
+ docker compose logs -f # View logs
199
+ docker compose down # Stop
200
+ docker compose pull # Update to latest
201
+ ```
202
+
203
+ #### Available Tags
204
+
205
+ | Tag | Description | Arch | Size |
206
+ |-----|-------------|------|------|
207
+ | `latest` | Latest stable release | amd64, arm64 | ~800MB |
208
+ | `X.Y.Z` | Specific version (e.g., `2.0.18`) | amd64, arm64 | ~800MB |
209
+
145
210
  ### CLI
146
211
 
147
212
  ```bash
@@ -197,17 +262,17 @@ LLM-powered extraction improves accuracy with complex bibliographies. Claude Son
197
262
  | Provider | Env Variable | Example Model |
198
263
  |----------|--------------|---------------|
199
264
  | Anthropic | `ANTHROPIC_API_KEY` | `claude-sonnet-4-20250514` |
200
- | OpenAI | `OPENAI_API_KEY` | `gpt-4o` |
201
- | Google | `GOOGLE_API_KEY` | `gemini-2.5-flash` |
202
- | Azure | `AZURE_OPENAI_API_KEY` | `gpt-4` |
203
- | vLLM | (local) | `meta-llama/Llama-3.1-8B-Instruct` |
265
+ | OpenAI | `OPENAI_API_KEY` | `gpt-5.2-mini` |
266
+ | Google | `GOOGLE_API_KEY` | `gemini-3` |
267
+ | Azure | `AZURE_OPENAI_API_KEY` | `gpt-4o` |
268
+ | vLLM | (local) | `meta-llama/Llama-3.3-70B-Instruct` |
204
269
 
205
270
  ```bash
206
271
  export ANTHROPIC_API_KEY=your_key
207
272
  academic-refchecker --paper 1706.03762 --llm-provider anthropic
208
273
 
209
- academic-refchecker --paper paper.pdf --llm-provider openai --llm-model gpt-4o
210
- academic-refchecker --paper paper.pdf --llm-provider vllm --llm-model meta-llama/Llama-3.1-8B-Instruct
274
+ academic-refchecker --paper paper.pdf --llm-provider openai --llm-model gpt-5.2-mini
275
+ academic-refchecker --paper paper.pdf --llm-provider vllm --llm-model meta-llama/Llama-3.3-70B-Instruct
211
276
  ```
212
277
 
213
278
  #### Local models (vLLM)
@@ -216,7 +281,7 @@ There is no separate “GPU Docker image”. For local inference, install the vL
216
281
 
217
282
  ```bash
218
283
  pip install "academic-refchecker[vllm]"
219
- python scripts/start_vllm_server.py --model meta-llama/Llama-3.1-8B-Instruct --port 8001
284
+ python scripts/start_vllm_server.py --model meta-llama/Llama-3.3-70B-Instruct --port 8001
220
285
  academic-refchecker --paper paper.pdf --llm-provider vllm --llm-endpoint http://localhost:8001/v1
221
286
  ```
222
287
 
@@ -242,29 +307,6 @@ export ANTHROPIC_API_KEY=your_key # Also: OPENAI_API_KEY, GOOGLE_API_K
242
307
  export SEMANTIC_SCHOLAR_API_KEY=your_key # Higher rate limits / faster verification
243
308
  ```
244
309
 
245
- ## Docker
246
-
247
- Pre-built images are published to GitHub Container Registry.
248
-
249
- ```bash
250
- docker run -p 8000:8000 \
251
- -e ANTHROPIC_API_KEY=your_key \
252
- -v refchecker-data:/app/data \
253
- ghcr.io/markrussinovich/refchecker:latest
254
- ```
255
-
256
- Docker Compose:
257
-
258
- ```bash
259
- git clone https://github.com/markrussinovich/refchecker.git && cd refchecker
260
- cp .env.example .env # Add your API keys
261
- docker compose up -d
262
- ```
263
-
264
- | Tag | Description | Arch | Size |
265
- |-----|-------------|------|------|
266
- | `latest` | RefChecker (Web UI + API-based LLM support) | amd64, arm64 | ~800MB |
267
-
268
310
  ## Local Database
269
311
 
270
312
  For offline verification or faster processing:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academic-refchecker
3
- Version: 2.0.19
3
+ Version: 2.0.21
4
4
  Summary: A comprehensive tool for validating reference accuracy in academic papers
5
5
  Author-email: Mark Russinovich <markrussinovich@hotmail.com>
6
6
  License-Expression: MIT
@@ -85,7 +85,6 @@ Validate reference accuracy in academic papers. Useful for authors checking bibl
85
85
  - [Run](#run)
86
86
  - [Output](#output)
87
87
  - [Configure](#configure)
88
- - [Docker](#docker)
89
88
  - [Local Database](#local-database)
90
89
  - [Testing](#testing)
91
90
  - [License](#license)
@@ -125,6 +124,7 @@ academic-refchecker --paper /path/to/paper.pdf
125
124
  - **Comprehensive checks**: Titles, authors, years, venues, DOIs, ArXiv IDs
126
125
  - **Smart matching**: Handles formatting variations (BERT vs B-ERT, pre-trained vs pretrained)
127
126
  - **Detailed reports**: Errors, warnings, corrected references
127
+ - **Bulk web checks**: Upload multiple files or a ZIP in the Web UI to validate many papers at once
128
128
 
129
129
  ## Sample Output
130
130
 
@@ -185,6 +185,8 @@ The Web UI shows live progress, history, and export (including corrected values)
185
185
  refchecker-webui --port 8000
186
186
  ```
187
187
 
188
+ *Tip: You can bulk-check multiple papers by selecting several files or a single ZIP; the Web UI will group them into a batch in the history sidebar.*
189
+
188
190
  #### Development (frontend)
189
191
 
190
192
  ```bash
@@ -214,6 +216,69 @@ curl http://localhost:8000/
214
216
 
215
217
  Web UI documentation: see [web-ui/README.md](web-ui/README.md).
216
218
 
219
+ ### Docker
220
+
221
+ Pre-built multi-architecture images are published to GitHub Container Registry on every release.
222
+
223
+ #### Quick Start
224
+
225
+ ```bash
226
+ docker run -p 8000:8000 ghcr.io/markrussinovich/refchecker:latest
227
+ ```
228
+
229
+ Open **http://localhost:8000** in your browser.
230
+
231
+ #### With LLM API Key
232
+
233
+ Pass your API key for LLM-powered reference extraction (recommended):
234
+
235
+ ```bash
236
+ # Anthropic Claude (recommended)
237
+ docker run -p 8000:8000 -e ANTHROPIC_API_KEY=your_key ghcr.io/markrussinovich/refchecker:latest
238
+
239
+ # OpenAI
240
+ docker run -p 8000:8000 -e OPENAI_API_KEY=your_key ghcr.io/markrussinovich/refchecker:latest
241
+
242
+ # Google Gemini
243
+ docker run -p 8000:8000 -e GOOGLE_API_KEY=your_key ghcr.io/markrussinovich/refchecker:latest
244
+ ```
245
+
246
+ #### Persistent Data
247
+
248
+ Mount a volume to persist check history and settings between restarts:
249
+
250
+ ```bash
251
+ docker run -p 8000:8000 \
252
+ -e ANTHROPIC_API_KEY=your_key \
253
+ -v refchecker-data:/app/data \
254
+ ghcr.io/markrussinovich/refchecker:latest
255
+ ```
256
+
257
+ #### Docker Compose
258
+
259
+ For easier configuration with an `.env` file:
260
+
261
+ ```bash
262
+ git clone https://github.com/markrussinovich/refchecker.git && cd refchecker
263
+ cp .env.example .env # Add your API keys
264
+ docker compose up -d
265
+ ```
266
+
267
+ Common commands:
268
+
269
+ ```bash
270
+ docker compose logs -f # View logs
271
+ docker compose down # Stop
272
+ docker compose pull # Update to latest
273
+ ```
274
+
275
+ #### Available Tags
276
+
277
+ | Tag | Description | Arch | Size |
278
+ |-----|-------------|------|------|
279
+ | `latest` | Latest stable release | amd64, arm64 | ~800MB |
280
+ | `X.Y.Z` | Specific version (e.g., `2.0.18`) | amd64, arm64 | ~800MB |
281
+
217
282
  ### CLI
218
283
 
219
284
  ```bash
@@ -269,17 +334,17 @@ LLM-powered extraction improves accuracy with complex bibliographies. Claude Son
269
334
  | Provider | Env Variable | Example Model |
270
335
  |----------|--------------|---------------|
271
336
  | Anthropic | `ANTHROPIC_API_KEY` | `claude-sonnet-4-20250514` |
272
- | OpenAI | `OPENAI_API_KEY` | `gpt-4o` |
273
- | Google | `GOOGLE_API_KEY` | `gemini-2.5-flash` |
274
- | Azure | `AZURE_OPENAI_API_KEY` | `gpt-4` |
275
- | vLLM | (local) | `meta-llama/Llama-3.1-8B-Instruct` |
337
+ | OpenAI | `OPENAI_API_KEY` | `gpt-5.2-mini` |
338
+ | Google | `GOOGLE_API_KEY` | `gemini-3` |
339
+ | Azure | `AZURE_OPENAI_API_KEY` | `gpt-4o` |
340
+ | vLLM | (local) | `meta-llama/Llama-3.3-70B-Instruct` |
276
341
 
277
342
  ```bash
278
343
  export ANTHROPIC_API_KEY=your_key
279
344
  academic-refchecker --paper 1706.03762 --llm-provider anthropic
280
345
 
281
- academic-refchecker --paper paper.pdf --llm-provider openai --llm-model gpt-4o
282
- academic-refchecker --paper paper.pdf --llm-provider vllm --llm-model meta-llama/Llama-3.1-8B-Instruct
346
+ academic-refchecker --paper paper.pdf --llm-provider openai --llm-model gpt-5.2-mini
347
+ academic-refchecker --paper paper.pdf --llm-provider vllm --llm-model meta-llama/Llama-3.3-70B-Instruct
283
348
  ```
284
349
 
285
350
  #### Local models (vLLM)
@@ -288,7 +353,7 @@ There is no separate “GPU Docker image”. For local inference, install the vL
288
353
 
289
354
  ```bash
290
355
  pip install "academic-refchecker[vllm]"
291
- python scripts/start_vllm_server.py --model meta-llama/Llama-3.1-8B-Instruct --port 8001
356
+ python scripts/start_vllm_server.py --model meta-llama/Llama-3.3-70B-Instruct --port 8001
292
357
  academic-refchecker --paper paper.pdf --llm-provider vllm --llm-endpoint http://localhost:8001/v1
293
358
  ```
294
359
 
@@ -314,29 +379,6 @@ export ANTHROPIC_API_KEY=your_key # Also: OPENAI_API_KEY, GOOGLE_API_K
314
379
  export SEMANTIC_SCHOLAR_API_KEY=your_key # Higher rate limits / faster verification
315
380
  ```
316
381
 
317
- ## Docker
318
-
319
- Pre-built images are published to GitHub Container Registry.
320
-
321
- ```bash
322
- docker run -p 8000:8000 \
323
- -e ANTHROPIC_API_KEY=your_key \
324
- -v refchecker-data:/app/data \
325
- ghcr.io/markrussinovich/refchecker:latest
326
- ```
327
-
328
- Docker Compose:
329
-
330
- ```bash
331
- git clone https://github.com/markrussinovich/refchecker.git && cd refchecker
332
- cp .env.example .env # Add your API keys
333
- docker compose up -d
334
- ```
335
-
336
- | Tag | Description | Arch | Size |
337
- |-----|-------------|------|------|
338
- | `latest` | RefChecker (Web UI + API-based LLM support) | amd64, arm64 | ~800MB |
339
-
340
382
  ## Local Database
341
383
 
342
384
  For offline verification or faster processing:
@@ -144,6 +144,12 @@ class Database:
144
144
  """)
145
145
 
146
146
  await self._ensure_columns(db)
147
+
148
+ # Create index for batch queries
149
+ await db.execute("""
150
+ CREATE INDEX IF NOT EXISTS idx_check_history_batch_id
151
+ ON check_history(batch_id)
152
+ """)
147
153
  await db.commit()
148
154
 
149
155
  async def _ensure_columns(self, db: aiosqlite.Connection):
@@ -168,6 +174,12 @@ class Database:
168
174
  await db.execute("ALTER TABLE check_history ADD COLUMN thumbnail_path TEXT")
169
175
  if "bibliography_source_path" not in columns:
170
176
  await db.execute("ALTER TABLE check_history ADD COLUMN bibliography_source_path TEXT")
177
+ if "batch_id" not in columns:
178
+ await db.execute("ALTER TABLE check_history ADD COLUMN batch_id TEXT")
179
+ if "batch_label" not in columns:
180
+ await db.execute("ALTER TABLE check_history ADD COLUMN batch_label TEXT")
181
+ if "original_filename" not in columns:
182
+ await db.execute("ALTER TABLE check_history ADD COLUMN original_filename TEXT")
171
183
 
172
184
  async def save_check(self,
173
185
  paper_title: str,
@@ -222,7 +234,8 @@ class Database:
222
234
  SELECT id, paper_title, paper_source, custom_label, timestamp,
223
235
  total_refs, errors_count, warnings_count, suggestions_count, unverified_count,
224
236
  refs_with_errors, refs_with_warnings_only, refs_verified,
225
- llm_provider, llm_model, status, source_type
237
+ llm_provider, llm_model, status, source_type, batch_id, batch_label,
238
+ original_filename
226
239
  FROM check_history
227
240
  ORDER BY timestamp DESC
228
241
  LIMIT ?
@@ -282,20 +295,27 @@ class Database:
282
295
  paper_source: str,
283
296
  source_type: str,
284
297
  llm_provider: Optional[str] = None,
285
- llm_model: Optional[str] = None) -> int:
298
+ llm_model: Optional[str] = None,
299
+ batch_id: Optional[str] = None,
300
+ batch_label: Optional[str] = None,
301
+ original_filename: Optional[str] = None) -> int:
286
302
  """Create a pending check entry before verification starts"""
287
303
  async with aiosqlite.connect(self.db_path) as db:
288
304
  cursor = await db.execute("""
289
305
  INSERT INTO check_history
290
306
  (paper_title, paper_source, source_type, total_refs, errors_count, warnings_count,
291
- suggestions_count, unverified_count, results_json, llm_provider, llm_model, status)
292
- VALUES (?, ?, ?, 0, 0, 0, 0, 0, '[]', ?, ?, 'in_progress')
307
+ suggestions_count, unverified_count, results_json, llm_provider, llm_model, status,
308
+ batch_id, batch_label, original_filename)
309
+ VALUES (?, ?, ?, 0, 0, 0, 0, 0, '[]', ?, ?, 'in_progress', ?, ?, ?)
293
310
  """, (
294
311
  paper_title,
295
312
  paper_source,
296
313
  source_type,
297
314
  llm_provider,
298
- llm_model
315
+ llm_model,
316
+ batch_id,
317
+ batch_label,
318
+ original_filename
299
319
  ))
300
320
  await db.commit()
301
321
  return cursor.lastrowid
@@ -667,16 +687,35 @@ class Database:
667
687
 
668
688
  Returns the cached result if found, None otherwise.
669
689
  """
690
+ import time
691
+ import tempfile
692
+ from pathlib import Path
693
+
694
+ debug_file = Path(tempfile.gettempdir()) / "refchecker_debug.log"
695
+
696
+ start = time.time()
670
697
  cache_key = self._compute_reference_cache_key(reference)
698
+ key_time = time.time() - start
671
699
 
700
+ connect_start = time.time()
672
701
  async with aiosqlite.connect(self.db_path) as db:
702
+ connect_time = time.time() - connect_start
673
703
  await db.execute("PRAGMA busy_timeout=5000")
674
704
  db.row_factory = aiosqlite.Row
705
+
706
+ query_start = time.time()
675
707
  async with db.execute(
676
708
  "SELECT result_json FROM verification_cache WHERE cache_key = ?",
677
709
  (cache_key,)
678
710
  ) as cursor:
679
711
  row = await cursor.fetchone()
712
+ query_time = time.time() - query_start
713
+
714
+ total_time = time.time() - start
715
+ if total_time > 0.05:
716
+ with open(debug_file, "a") as f:
717
+ f.write(f"[TIMING] Cache lookup: total={total_time:.3f}s, key={key_time:.3f}s, connect={connect_time:.3f}s, query={query_time:.3f}s\n")
718
+
680
719
  if row and row['result_json']:
681
720
  try:
682
721
  return json.loads(row['result_json'])
@@ -716,6 +755,88 @@ class Database:
716
755
  await db.commit()
717
756
  return cursor.rowcount
718
757
 
758
+ # Batch operations
759
+
760
+ async def get_batch_checks(self, batch_id: str) -> List[Dict[str, Any]]:
761
+ """Get all checks belonging to a batch"""
762
+ async with aiosqlite.connect(self.db_path) as db:
763
+ await db.execute("PRAGMA busy_timeout=5000")
764
+ db.row_factory = aiosqlite.Row
765
+ async with db.execute("""
766
+ SELECT id, paper_title, paper_source, custom_label, timestamp,
767
+ total_refs, errors_count, warnings_count, suggestions_count, unverified_count,
768
+ refs_with_errors, refs_with_warnings_only, refs_verified,
769
+ llm_provider, llm_model, status, source_type, batch_id, batch_label
770
+ FROM check_history
771
+ WHERE batch_id = ?
772
+ ORDER BY timestamp ASC
773
+ """, (batch_id,)) as cursor:
774
+ rows = await cursor.fetchall()
775
+ return [dict(row) for row in rows]
776
+
777
+ async def get_batch_summary(self, batch_id: str) -> Optional[Dict[str, Any]]:
778
+ """Get aggregated summary for a batch"""
779
+ async with aiosqlite.connect(self.db_path) as db:
780
+ await db.execute("PRAGMA busy_timeout=5000")
781
+ db.row_factory = aiosqlite.Row
782
+ async with db.execute("""
783
+ SELECT
784
+ batch_id,
785
+ batch_label,
786
+ COUNT(*) as total_papers,
787
+ SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) as completed_papers,
788
+ SUM(CASE WHEN status = 'in_progress' THEN 1 ELSE 0 END) as in_progress_papers,
789
+ SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) as error_papers,
790
+ SUM(CASE WHEN status = 'cancelled' THEN 1 ELSE 0 END) as cancelled_papers,
791
+ SUM(total_refs) as total_refs,
792
+ SUM(errors_count) as total_errors,
793
+ SUM(warnings_count) as total_warnings,
794
+ SUM(suggestions_count) as total_suggestions,
795
+ SUM(unverified_count) as total_unverified,
796
+ MIN(timestamp) as started_at
797
+ FROM check_history
798
+ WHERE batch_id = ?
799
+ GROUP BY batch_id
800
+ """, (batch_id,)) as cursor:
801
+ row = await cursor.fetchone()
802
+ if row:
803
+ return dict(row)
804
+ return None
805
+
806
+ async def cancel_batch(self, batch_id: str) -> int:
807
+ """Cancel all in-progress checks in a batch. Returns count of cancelled checks."""
808
+ async with aiosqlite.connect(self.db_path) as db:
809
+ await db.execute("PRAGMA busy_timeout=5000")
810
+ cursor = await db.execute("""
811
+ UPDATE check_history
812
+ SET status = 'cancelled'
813
+ WHERE batch_id = ? AND status = 'in_progress'
814
+ """, (batch_id,))
815
+ await db.commit()
816
+ return cursor.rowcount
817
+
818
+ async def delete_batch(self, batch_id: str) -> int:
819
+ """Delete all checks in a batch. Returns count of deleted checks."""
820
+ async with aiosqlite.connect(self.db_path) as db:
821
+ await db.execute("PRAGMA busy_timeout=5000")
822
+ cursor = await db.execute(
823
+ "DELETE FROM check_history WHERE batch_id = ?",
824
+ (batch_id,)
825
+ )
826
+ await db.commit()
827
+ return cursor.rowcount
828
+
829
+ async def update_batch_label(self, batch_id: str, label: str) -> bool:
830
+ """Update the label for all checks in a batch"""
831
+ async with aiosqlite.connect(self.db_path) as db:
832
+ await db.execute("PRAGMA busy_timeout=5000")
833
+ await db.execute(
834
+ "UPDATE check_history SET batch_label = ? WHERE batch_id = ?",
835
+ (label, batch_id)
836
+ )
837
+ await db.commit()
838
+ return True
839
+
719
840
 
720
841
  # Global database instance
721
842
  db = Database()