academic-refchecker 2.0.19__py3-none-any.whl → 2.0.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {academic_refchecker-2.0.19.dist-info → academic_refchecker-2.0.21.dist-info}/METADATA +74 -32
- {academic_refchecker-2.0.19.dist-info → academic_refchecker-2.0.21.dist-info}/RECORD +14 -14
- backend/database.py +126 -5
- backend/main.py +450 -3
- backend/refchecker_wrapper.py +109 -19
- refchecker/__version__.py +1 -1
- refchecker/checkers/semantic_scholar.py +2 -2
- refchecker/core/refchecker.py +46 -0
- refchecker/services/pdf_processor.py +156 -1
- refchecker/utils/text_utils.py +3 -1
- {academic_refchecker-2.0.19.dist-info → academic_refchecker-2.0.21.dist-info}/WHEEL +0 -0
- {academic_refchecker-2.0.19.dist-info → academic_refchecker-2.0.21.dist-info}/entry_points.txt +0 -0
- {academic_refchecker-2.0.19.dist-info → academic_refchecker-2.0.21.dist-info}/licenses/LICENSE +0 -0
- {academic_refchecker-2.0.19.dist-info → academic_refchecker-2.0.21.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: academic-refchecker
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.21
|
|
4
4
|
Summary: A comprehensive tool for validating reference accuracy in academic papers
|
|
5
5
|
Author-email: Mark Russinovich <markrussinovich@hotmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -85,7 +85,6 @@ Validate reference accuracy in academic papers. Useful for authors checking bibl
|
|
|
85
85
|
- [Run](#run)
|
|
86
86
|
- [Output](#output)
|
|
87
87
|
- [Configure](#configure)
|
|
88
|
-
- [Docker](#docker)
|
|
89
88
|
- [Local Database](#local-database)
|
|
90
89
|
- [Testing](#testing)
|
|
91
90
|
- [License](#license)
|
|
@@ -125,6 +124,7 @@ academic-refchecker --paper /path/to/paper.pdf
|
|
|
125
124
|
- **Comprehensive checks**: Titles, authors, years, venues, DOIs, ArXiv IDs
|
|
126
125
|
- **Smart matching**: Handles formatting variations (BERT vs B-ERT, pre-trained vs pretrained)
|
|
127
126
|
- **Detailed reports**: Errors, warnings, corrected references
|
|
127
|
+
- **Bulk web checks**: Upload multiple files or a ZIP in the Web UI to validate many papers at once
|
|
128
128
|
|
|
129
129
|
## Sample Output
|
|
130
130
|
|
|
@@ -185,6 +185,8 @@ The Web UI shows live progress, history, and export (including corrected values)
|
|
|
185
185
|
refchecker-webui --port 8000
|
|
186
186
|
```
|
|
187
187
|
|
|
188
|
+
*Tip: You can bulk-check multiple papers by selecting several files or a single ZIP; the Web UI will group them into a batch in the history sidebar.*
|
|
189
|
+
|
|
188
190
|
#### Development (frontend)
|
|
189
191
|
|
|
190
192
|
```bash
|
|
@@ -214,6 +216,69 @@ curl http://localhost:8000/
|
|
|
214
216
|
|
|
215
217
|
Web UI documentation: see [web-ui/README.md](web-ui/README.md).
|
|
216
218
|
|
|
219
|
+
### Docker
|
|
220
|
+
|
|
221
|
+
Pre-built multi-architecture images are published to GitHub Container Registry on every release.
|
|
222
|
+
|
|
223
|
+
#### Quick Start
|
|
224
|
+
|
|
225
|
+
```bash
|
|
226
|
+
docker run -p 8000:8000 ghcr.io/markrussinovich/refchecker:latest
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
Open **http://localhost:8000** in your browser.
|
|
230
|
+
|
|
231
|
+
#### With LLM API Key
|
|
232
|
+
|
|
233
|
+
Pass your API key for LLM-powered reference extraction (recommended):
|
|
234
|
+
|
|
235
|
+
```bash
|
|
236
|
+
# Anthropic Claude (recommended)
|
|
237
|
+
docker run -p 8000:8000 -e ANTHROPIC_API_KEY=your_key ghcr.io/markrussinovich/refchecker:latest
|
|
238
|
+
|
|
239
|
+
# OpenAI
|
|
240
|
+
docker run -p 8000:8000 -e OPENAI_API_KEY=your_key ghcr.io/markrussinovich/refchecker:latest
|
|
241
|
+
|
|
242
|
+
# Google Gemini
|
|
243
|
+
docker run -p 8000:8000 -e GOOGLE_API_KEY=your_key ghcr.io/markrussinovich/refchecker:latest
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
#### Persistent Data
|
|
247
|
+
|
|
248
|
+
Mount a volume to persist check history and settings between restarts:
|
|
249
|
+
|
|
250
|
+
```bash
|
|
251
|
+
docker run -p 8000:8000 \
|
|
252
|
+
-e ANTHROPIC_API_KEY=your_key \
|
|
253
|
+
-v refchecker-data:/app/data \
|
|
254
|
+
ghcr.io/markrussinovich/refchecker:latest
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
#### Docker Compose
|
|
258
|
+
|
|
259
|
+
For easier configuration with an `.env` file:
|
|
260
|
+
|
|
261
|
+
```bash
|
|
262
|
+
git clone https://github.com/markrussinovich/refchecker.git && cd refchecker
|
|
263
|
+
cp .env.example .env # Add your API keys
|
|
264
|
+
docker compose up -d
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
Common commands:
|
|
268
|
+
|
|
269
|
+
```bash
|
|
270
|
+
docker compose logs -f # View logs
|
|
271
|
+
docker compose down # Stop
|
|
272
|
+
docker compose pull # Update to latest
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
#### Available Tags
|
|
276
|
+
|
|
277
|
+
| Tag | Description | Arch | Size |
|
|
278
|
+
|-----|-------------|------|------|
|
|
279
|
+
| `latest` | Latest stable release | amd64, arm64 | ~800MB |
|
|
280
|
+
| `X.Y.Z` | Specific version (e.g., `2.0.18`) | amd64, arm64 | ~800MB |
|
|
281
|
+
|
|
217
282
|
### CLI
|
|
218
283
|
|
|
219
284
|
```bash
|
|
@@ -269,17 +334,17 @@ LLM-powered extraction improves accuracy with complex bibliographies. Claude Son
|
|
|
269
334
|
| Provider | Env Variable | Example Model |
|
|
270
335
|
|----------|--------------|---------------|
|
|
271
336
|
| Anthropic | `ANTHROPIC_API_KEY` | `claude-sonnet-4-20250514` |
|
|
272
|
-
| OpenAI | `OPENAI_API_KEY` | `gpt-
|
|
273
|
-
| Google | `GOOGLE_API_KEY` | `gemini-
|
|
274
|
-
| Azure | `AZURE_OPENAI_API_KEY` | `gpt-
|
|
275
|
-
| vLLM | (local) | `meta-llama/Llama-3.
|
|
337
|
+
| OpenAI | `OPENAI_API_KEY` | `gpt-5.2-mini` |
|
|
338
|
+
| Google | `GOOGLE_API_KEY` | `gemini-3` |
|
|
339
|
+
| Azure | `AZURE_OPENAI_API_KEY` | `gpt-4o` |
|
|
340
|
+
| vLLM | (local) | `meta-llama/Llama-3.3-70B-Instruct` |
|
|
276
341
|
|
|
277
342
|
```bash
|
|
278
343
|
export ANTHROPIC_API_KEY=your_key
|
|
279
344
|
academic-refchecker --paper 1706.03762 --llm-provider anthropic
|
|
280
345
|
|
|
281
|
-
academic-refchecker --paper paper.pdf --llm-provider openai --llm-model gpt-
|
|
282
|
-
academic-refchecker --paper paper.pdf --llm-provider vllm --llm-model meta-llama/Llama-3.
|
|
346
|
+
academic-refchecker --paper paper.pdf --llm-provider openai --llm-model gpt-5.2-mini
|
|
347
|
+
academic-refchecker --paper paper.pdf --llm-provider vllm --llm-model meta-llama/Llama-3.3-70B-Instruct
|
|
283
348
|
```
|
|
284
349
|
|
|
285
350
|
#### Local models (vLLM)
|
|
@@ -288,7 +353,7 @@ There is no separate “GPU Docker image”. For local inference, install the vL
|
|
|
288
353
|
|
|
289
354
|
```bash
|
|
290
355
|
pip install "academic-refchecker[vllm]"
|
|
291
|
-
python scripts/start_vllm_server.py --model meta-llama/Llama-3.
|
|
356
|
+
python scripts/start_vllm_server.py --model meta-llama/Llama-3.3-70B-Instruct --port 8001
|
|
292
357
|
academic-refchecker --paper paper.pdf --llm-provider vllm --llm-endpoint http://localhost:8001/v1
|
|
293
358
|
```
|
|
294
359
|
|
|
@@ -314,29 +379,6 @@ export ANTHROPIC_API_KEY=your_key # Also: OPENAI_API_KEY, GOOGLE_API_K
|
|
|
314
379
|
export SEMANTIC_SCHOLAR_API_KEY=your_key # Higher rate limits / faster verification
|
|
315
380
|
```
|
|
316
381
|
|
|
317
|
-
## Docker
|
|
318
|
-
|
|
319
|
-
Pre-built images are published to GitHub Container Registry.
|
|
320
|
-
|
|
321
|
-
```bash
|
|
322
|
-
docker run -p 8000:8000 \
|
|
323
|
-
-e ANTHROPIC_API_KEY=your_key \
|
|
324
|
-
-v refchecker-data:/app/data \
|
|
325
|
-
ghcr.io/markrussinovich/refchecker:latest
|
|
326
|
-
```
|
|
327
|
-
|
|
328
|
-
Docker Compose:
|
|
329
|
-
|
|
330
|
-
```bash
|
|
331
|
-
git clone https://github.com/markrussinovich/refchecker.git && cd refchecker
|
|
332
|
-
cp .env.example .env # Add your API keys
|
|
333
|
-
docker compose up -d
|
|
334
|
-
```
|
|
335
|
-
|
|
336
|
-
| Tag | Description | Arch | Size |
|
|
337
|
-
|-----|-------------|------|------|
|
|
338
|
-
| `latest` | RefChecker (Web UI + API-based LLM support) | amd64, arm64 | ~800MB |
|
|
339
|
-
|
|
340
382
|
## Local Database
|
|
341
383
|
|
|
342
384
|
For offline verification or faster processing:
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
academic_refchecker-2.0.
|
|
1
|
+
academic_refchecker-2.0.21.dist-info/licenses/LICENSE,sha256=Kwrx3fePVCeEFDCZvCW4OuoTNBiSoYbpGBI6qzGhWF0,1067
|
|
2
2
|
backend/__init__.py,sha256=TFVkOx5tSp3abty15RzUbaSwQ9ZD0kfUn7PDh63xkYY,521
|
|
3
3
|
backend/__main__.py,sha256=74V7yUMsRSZaaRyXYm-rZVc3TVUcUgwsoTQTUbV5EqM,211
|
|
4
4
|
backend/cli.py,sha256=xV3l9M5OdNQQYOcrzj2d_7RmCgj7CXP_1oi0TPe6zNo,1672
|
|
5
5
|
backend/concurrency.py,sha256=2KY9I_8dDkyl_HTGx27ZxU4rFXx2vqbGOlo5RrRbPjA,3223
|
|
6
|
-
backend/database.py,sha256=
|
|
7
|
-
backend/main.py,sha256=
|
|
6
|
+
backend/database.py,sha256=mxDLr4v2FxzVwRPyzfm5ijoGSHMhJlsztxbrcYA2CCs,36750
|
|
7
|
+
backend/main.py,sha256=Vhr8FkgaGLeJGQsRsMkoabixHBWcoqglRr82RVKl7s4,72593
|
|
8
8
|
backend/models.py,sha256=El2F-RTHgxQ7-WODmiYCpjsTFDpjwF9PBt-JDa_XipE,2591
|
|
9
|
-
backend/refchecker_wrapper.py,sha256=
|
|
9
|
+
backend/refchecker_wrapper.py,sha256=PsId_FKF6pi-8EbeKcVpGeVIgzTz6fjpPkeKWxxr6Wo,60540
|
|
10
10
|
backend/thumbnail.py,sha256=zw6wLMyv9g4p83yqICh2ZHOAWK0WR6E8HMV6o-ocPmc,22251
|
|
11
11
|
backend/websocket_manager.py,sha256=l-Wou-rKV6n7t6Gcf5fR6s_4G-mssSrba0davNnYS70,4247
|
|
12
12
|
backend/static/favicon.svg,sha256=R0oQauh16Uy0D7JlT27k-zdjJtrvfPKOe9La5vKYwuM,395
|
|
@@ -19,7 +19,7 @@ backend/static/assets/index-DMZJNrR0.js,sha256=UhK5CQ8IufZmx6FTvXUCtkRxTqpGK7czS
|
|
|
19
19
|
backend/static/assets/index-hk21nqxR.js,sha256=z2agP8ZFYw4AfYi-GJ5E_8_k-lPF-frXOJtPk-I0hDs,369533
|
|
20
20
|
refchecker/__init__.py,sha256=Pg5MrtLxDBRcNYcI02N-bv3tzURVd1S3nQ8IyF7Zw7E,322
|
|
21
21
|
refchecker/__main__.py,sha256=agBbT9iKN0g2xXtRNCoh29Nr7z2n5vU-r0MCVJKi4tI,232
|
|
22
|
-
refchecker/__version__.py,sha256=
|
|
22
|
+
refchecker/__version__.py,sha256=XyBLo7S1kdSvGrt9yRtdTYCZwfWFfzPLcvw8uZes7kM,66
|
|
23
23
|
refchecker/checkers/__init__.py,sha256=-dR7HX0bfPq9YMXrnODoYbfNWFLqu706xoVsUdWHYRI,611
|
|
24
24
|
refchecker/checkers/arxiv_citation.py,sha256=j_waQmQSP3iuZdVuBE92ghtiOdGFTCx09s6f4mHik6o,27777
|
|
25
25
|
refchecker/checkers/crossref.py,sha256=88moAyTudBqf9SKqTQkNAq1yyuRe95f8r4EpmJznupQ,20937
|
|
@@ -29,7 +29,7 @@ refchecker/checkers/local_semantic_scholar.py,sha256=c-KUTh99s-Di71h-pzdrwlPgoST
|
|
|
29
29
|
refchecker/checkers/openalex.py,sha256=WEjEppQMbutPs8kWOSorCIoXWqpJ9o1CXUicThHSWYU,20120
|
|
30
30
|
refchecker/checkers/openreview_checker.py,sha256=0IHZe4Nscy8fle28rmhy1hhsofR5g0FFSakk8FFH_0A,40540
|
|
31
31
|
refchecker/checkers/pdf_paper_checker.py,sha256=lrg09poNJBz9FNMrUoEjQ6CJbdYZAVANw0bCaTSb5oo,19904
|
|
32
|
-
refchecker/checkers/semantic_scholar.py,sha256=
|
|
32
|
+
refchecker/checkers/semantic_scholar.py,sha256=nyadTRdCoGIkErKcG1F55jxM0bN-fwRSPi_AMWlDvno,48622
|
|
33
33
|
refchecker/checkers/webpage_checker.py,sha256=A_d5kg3OOsyliC00OVq_l0J-RJ4Ln7hUoURk21aO2fs,43653
|
|
34
34
|
refchecker/config/__init__.py,sha256=r7sONsX2-ITviUJRU1KEz76uAuTRqZlzU-TVkvFRGYY,15
|
|
35
35
|
refchecker/config/logging.conf,sha256=r1tP0ApLHtlz7rV-oKS1MVO7oXJOgahbZFTtYmKnf9U,687
|
|
@@ -37,7 +37,7 @@ refchecker/config/settings.py,sha256=O8PETl_O7uyUl1r_spWhOMHbIaiBM-golfdIN82eigI
|
|
|
37
37
|
refchecker/core/__init__.py,sha256=1T2MSQyDk0u_PupbHvm4CvNNN--dxsw78fqKUrqoYrM,157
|
|
38
38
|
refchecker/core/db_connection_pool.py,sha256=XRiOdehikkSz3obH4WKgf8woa3694if50Q15rBT-4XQ,4697
|
|
39
39
|
refchecker/core/parallel_processor.py,sha256=HpVFEMwPBiP2FRjvGqlaXpjV5S0qP-hxdB_Wdl_lACo,17704
|
|
40
|
-
refchecker/core/refchecker.py,sha256=
|
|
40
|
+
refchecker/core/refchecker.py,sha256=8PczWJeVSrakmWiMsfvo7SASestFBnHhT19YX-wmN5o,290460
|
|
41
41
|
refchecker/database/__init__.py,sha256=mEuVHlEBuS44t_2ZT_JnvQQrlRCjo1SJq1NmaJ6r8OY,125
|
|
42
42
|
refchecker/database/download_semantic_scholar_db.py,sha256=waN4I97KC_36YMiPbiBDUUmgfzu1nub5yeKdAsIR2aw,75276
|
|
43
43
|
refchecker/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -46,7 +46,7 @@ refchecker/llm/providers.py,sha256=2pOEre_OH_shgm0b9m3_nVIxyoY-MxhFM5KAP_qKo_Q,3
|
|
|
46
46
|
refchecker/scripts/__init__.py,sha256=xJwo6afG8s7S888BK2Bxw2d7FX8aLkbl0l_ZoJOFibE,37
|
|
47
47
|
refchecker/scripts/start_vllm_server.py,sha256=ZepWp2y2cKFW0Kgsoima2RbmF02fTU29UFcLLpsBhFU,4213
|
|
48
48
|
refchecker/services/__init__.py,sha256=jGi9S74Msak3YR-C4Qb68VU7HB4oLaX9o1rlVAFpOFI,187
|
|
49
|
-
refchecker/services/pdf_processor.py,sha256=
|
|
49
|
+
refchecker/services/pdf_processor.py,sha256=cu7gMJcG_uzdk5X8jRoiHp5RTLKldGL1yk-NMRorEPg,16794
|
|
50
50
|
refchecker/utils/__init__.py,sha256=SKTEQeKpLOFFMIzZiakzctsW9zGe_J7LDNJlygWV6RY,1221
|
|
51
51
|
refchecker/utils/arxiv_rate_limiter.py,sha256=axOv84Ge6q_mJ69lcyAFsCmHx9qXvV1aX71oSaxhnjE,4119
|
|
52
52
|
refchecker/utils/arxiv_utils.py,sha256=C7wqoCy9FZUQpoF92vLeJyrK1-6XoMmmL6u_hfDV3ro,18031
|
|
@@ -59,11 +59,11 @@ refchecker/utils/db_utils.py,sha256=_wSupfBlm0ILFvntQTvoj7tLDCbrYPRQrp9NDvphF_E,
|
|
|
59
59
|
refchecker/utils/doi_utils.py,sha256=_7YvQ0DTOQBMIujUE0SdJicjPiAR3VETLU668GIji24,6094
|
|
60
60
|
refchecker/utils/error_utils.py,sha256=8TcfRUD6phZ7viPJrezQ4jKf_vE65lqEXZq5707eU6s,15425
|
|
61
61
|
refchecker/utils/mock_objects.py,sha256=QxU-UXyHSY27IZYN8Sb8ei0JtNkpGSdMXoErrRLHXvE,6437
|
|
62
|
-
refchecker/utils/text_utils.py,sha256=
|
|
62
|
+
refchecker/utils/text_utils.py,sha256=4sT6YKYqINLGCrRwKOkzrZ2t2cJorHgAXT5Gd3_hKCM,235856
|
|
63
63
|
refchecker/utils/unicode_utils.py,sha256=-WBKarXO756p7fd7gCeNsMag4ztDNURwFX5IVniOtwY,10366
|
|
64
64
|
refchecker/utils/url_utils.py,sha256=7b0rWCQJSajzqOvD7ghsBZPejiq6mUIz6SGhvU_WGDs,9441
|
|
65
|
-
academic_refchecker-2.0.
|
|
66
|
-
academic_refchecker-2.0.
|
|
67
|
-
academic_refchecker-2.0.
|
|
68
|
-
academic_refchecker-2.0.
|
|
69
|
-
academic_refchecker-2.0.
|
|
65
|
+
academic_refchecker-2.0.21.dist-info/METADATA,sha256=D2y73gwXBF9-kvb2OHeE0bMNpcxx-Y1VksqkkUcrpyI,12443
|
|
66
|
+
academic_refchecker-2.0.21.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
67
|
+
academic_refchecker-2.0.21.dist-info/entry_points.txt,sha256=9cREsaKwlp05Ql0CBIjKrNHk5IG2cHY5LvJPsV2-SxA,108
|
|
68
|
+
academic_refchecker-2.0.21.dist-info/top_level.txt,sha256=FfNvrvpj25gfpUBjW0epvz7Qrdejhups5Za_DBiSRu4,19
|
|
69
|
+
academic_refchecker-2.0.21.dist-info/RECORD,,
|
backend/database.py
CHANGED
|
@@ -144,6 +144,12 @@ class Database:
|
|
|
144
144
|
""")
|
|
145
145
|
|
|
146
146
|
await self._ensure_columns(db)
|
|
147
|
+
|
|
148
|
+
# Create index for batch queries
|
|
149
|
+
await db.execute("""
|
|
150
|
+
CREATE INDEX IF NOT EXISTS idx_check_history_batch_id
|
|
151
|
+
ON check_history(batch_id)
|
|
152
|
+
""")
|
|
147
153
|
await db.commit()
|
|
148
154
|
|
|
149
155
|
async def _ensure_columns(self, db: aiosqlite.Connection):
|
|
@@ -168,6 +174,12 @@ class Database:
|
|
|
168
174
|
await db.execute("ALTER TABLE check_history ADD COLUMN thumbnail_path TEXT")
|
|
169
175
|
if "bibliography_source_path" not in columns:
|
|
170
176
|
await db.execute("ALTER TABLE check_history ADD COLUMN bibliography_source_path TEXT")
|
|
177
|
+
if "batch_id" not in columns:
|
|
178
|
+
await db.execute("ALTER TABLE check_history ADD COLUMN batch_id TEXT")
|
|
179
|
+
if "batch_label" not in columns:
|
|
180
|
+
await db.execute("ALTER TABLE check_history ADD COLUMN batch_label TEXT")
|
|
181
|
+
if "original_filename" not in columns:
|
|
182
|
+
await db.execute("ALTER TABLE check_history ADD COLUMN original_filename TEXT")
|
|
171
183
|
|
|
172
184
|
async def save_check(self,
|
|
173
185
|
paper_title: str,
|
|
@@ -222,7 +234,8 @@ class Database:
|
|
|
222
234
|
SELECT id, paper_title, paper_source, custom_label, timestamp,
|
|
223
235
|
total_refs, errors_count, warnings_count, suggestions_count, unverified_count,
|
|
224
236
|
refs_with_errors, refs_with_warnings_only, refs_verified,
|
|
225
|
-
llm_provider, llm_model, status, source_type
|
|
237
|
+
llm_provider, llm_model, status, source_type, batch_id, batch_label,
|
|
238
|
+
original_filename
|
|
226
239
|
FROM check_history
|
|
227
240
|
ORDER BY timestamp DESC
|
|
228
241
|
LIMIT ?
|
|
@@ -282,20 +295,27 @@ class Database:
|
|
|
282
295
|
paper_source: str,
|
|
283
296
|
source_type: str,
|
|
284
297
|
llm_provider: Optional[str] = None,
|
|
285
|
-
llm_model: Optional[str] = None
|
|
298
|
+
llm_model: Optional[str] = None,
|
|
299
|
+
batch_id: Optional[str] = None,
|
|
300
|
+
batch_label: Optional[str] = None,
|
|
301
|
+
original_filename: Optional[str] = None) -> int:
|
|
286
302
|
"""Create a pending check entry before verification starts"""
|
|
287
303
|
async with aiosqlite.connect(self.db_path) as db:
|
|
288
304
|
cursor = await db.execute("""
|
|
289
305
|
INSERT INTO check_history
|
|
290
306
|
(paper_title, paper_source, source_type, total_refs, errors_count, warnings_count,
|
|
291
|
-
suggestions_count, unverified_count, results_json, llm_provider, llm_model, status
|
|
292
|
-
|
|
307
|
+
suggestions_count, unverified_count, results_json, llm_provider, llm_model, status,
|
|
308
|
+
batch_id, batch_label, original_filename)
|
|
309
|
+
VALUES (?, ?, ?, 0, 0, 0, 0, 0, '[]', ?, ?, 'in_progress', ?, ?, ?)
|
|
293
310
|
""", (
|
|
294
311
|
paper_title,
|
|
295
312
|
paper_source,
|
|
296
313
|
source_type,
|
|
297
314
|
llm_provider,
|
|
298
|
-
llm_model
|
|
315
|
+
llm_model,
|
|
316
|
+
batch_id,
|
|
317
|
+
batch_label,
|
|
318
|
+
original_filename
|
|
299
319
|
))
|
|
300
320
|
await db.commit()
|
|
301
321
|
return cursor.lastrowid
|
|
@@ -667,16 +687,35 @@ class Database:
|
|
|
667
687
|
|
|
668
688
|
Returns the cached result if found, None otherwise.
|
|
669
689
|
"""
|
|
690
|
+
import time
|
|
691
|
+
import tempfile
|
|
692
|
+
from pathlib import Path
|
|
693
|
+
|
|
694
|
+
debug_file = Path(tempfile.gettempdir()) / "refchecker_debug.log"
|
|
695
|
+
|
|
696
|
+
start = time.time()
|
|
670
697
|
cache_key = self._compute_reference_cache_key(reference)
|
|
698
|
+
key_time = time.time() - start
|
|
671
699
|
|
|
700
|
+
connect_start = time.time()
|
|
672
701
|
async with aiosqlite.connect(self.db_path) as db:
|
|
702
|
+
connect_time = time.time() - connect_start
|
|
673
703
|
await db.execute("PRAGMA busy_timeout=5000")
|
|
674
704
|
db.row_factory = aiosqlite.Row
|
|
705
|
+
|
|
706
|
+
query_start = time.time()
|
|
675
707
|
async with db.execute(
|
|
676
708
|
"SELECT result_json FROM verification_cache WHERE cache_key = ?",
|
|
677
709
|
(cache_key,)
|
|
678
710
|
) as cursor:
|
|
679
711
|
row = await cursor.fetchone()
|
|
712
|
+
query_time = time.time() - query_start
|
|
713
|
+
|
|
714
|
+
total_time = time.time() - start
|
|
715
|
+
if total_time > 0.05:
|
|
716
|
+
with open(debug_file, "a") as f:
|
|
717
|
+
f.write(f"[TIMING] Cache lookup: total={total_time:.3f}s, key={key_time:.3f}s, connect={connect_time:.3f}s, query={query_time:.3f}s\n")
|
|
718
|
+
|
|
680
719
|
if row and row['result_json']:
|
|
681
720
|
try:
|
|
682
721
|
return json.loads(row['result_json'])
|
|
@@ -716,6 +755,88 @@ class Database:
|
|
|
716
755
|
await db.commit()
|
|
717
756
|
return cursor.rowcount
|
|
718
757
|
|
|
758
|
+
# Batch operations
|
|
759
|
+
|
|
760
|
+
async def get_batch_checks(self, batch_id: str) -> List[Dict[str, Any]]:
|
|
761
|
+
"""Get all checks belonging to a batch"""
|
|
762
|
+
async with aiosqlite.connect(self.db_path) as db:
|
|
763
|
+
await db.execute("PRAGMA busy_timeout=5000")
|
|
764
|
+
db.row_factory = aiosqlite.Row
|
|
765
|
+
async with db.execute("""
|
|
766
|
+
SELECT id, paper_title, paper_source, custom_label, timestamp,
|
|
767
|
+
total_refs, errors_count, warnings_count, suggestions_count, unverified_count,
|
|
768
|
+
refs_with_errors, refs_with_warnings_only, refs_verified,
|
|
769
|
+
llm_provider, llm_model, status, source_type, batch_id, batch_label
|
|
770
|
+
FROM check_history
|
|
771
|
+
WHERE batch_id = ?
|
|
772
|
+
ORDER BY timestamp ASC
|
|
773
|
+
""", (batch_id,)) as cursor:
|
|
774
|
+
rows = await cursor.fetchall()
|
|
775
|
+
return [dict(row) for row in rows]
|
|
776
|
+
|
|
777
|
+
async def get_batch_summary(self, batch_id: str) -> Optional[Dict[str, Any]]:
|
|
778
|
+
"""Get aggregated summary for a batch"""
|
|
779
|
+
async with aiosqlite.connect(self.db_path) as db:
|
|
780
|
+
await db.execute("PRAGMA busy_timeout=5000")
|
|
781
|
+
db.row_factory = aiosqlite.Row
|
|
782
|
+
async with db.execute("""
|
|
783
|
+
SELECT
|
|
784
|
+
batch_id,
|
|
785
|
+
batch_label,
|
|
786
|
+
COUNT(*) as total_papers,
|
|
787
|
+
SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) as completed_papers,
|
|
788
|
+
SUM(CASE WHEN status = 'in_progress' THEN 1 ELSE 0 END) as in_progress_papers,
|
|
789
|
+
SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) as error_papers,
|
|
790
|
+
SUM(CASE WHEN status = 'cancelled' THEN 1 ELSE 0 END) as cancelled_papers,
|
|
791
|
+
SUM(total_refs) as total_refs,
|
|
792
|
+
SUM(errors_count) as total_errors,
|
|
793
|
+
SUM(warnings_count) as total_warnings,
|
|
794
|
+
SUM(suggestions_count) as total_suggestions,
|
|
795
|
+
SUM(unverified_count) as total_unverified,
|
|
796
|
+
MIN(timestamp) as started_at
|
|
797
|
+
FROM check_history
|
|
798
|
+
WHERE batch_id = ?
|
|
799
|
+
GROUP BY batch_id
|
|
800
|
+
""", (batch_id,)) as cursor:
|
|
801
|
+
row = await cursor.fetchone()
|
|
802
|
+
if row:
|
|
803
|
+
return dict(row)
|
|
804
|
+
return None
|
|
805
|
+
|
|
806
|
+
async def cancel_batch(self, batch_id: str) -> int:
|
|
807
|
+
"""Cancel all in-progress checks in a batch. Returns count of cancelled checks."""
|
|
808
|
+
async with aiosqlite.connect(self.db_path) as db:
|
|
809
|
+
await db.execute("PRAGMA busy_timeout=5000")
|
|
810
|
+
cursor = await db.execute("""
|
|
811
|
+
UPDATE check_history
|
|
812
|
+
SET status = 'cancelled'
|
|
813
|
+
WHERE batch_id = ? AND status = 'in_progress'
|
|
814
|
+
""", (batch_id,))
|
|
815
|
+
await db.commit()
|
|
816
|
+
return cursor.rowcount
|
|
817
|
+
|
|
818
|
+
async def delete_batch(self, batch_id: str) -> int:
|
|
819
|
+
"""Delete all checks in a batch. Returns count of deleted checks."""
|
|
820
|
+
async with aiosqlite.connect(self.db_path) as db:
|
|
821
|
+
await db.execute("PRAGMA busy_timeout=5000")
|
|
822
|
+
cursor = await db.execute(
|
|
823
|
+
"DELETE FROM check_history WHERE batch_id = ?",
|
|
824
|
+
(batch_id,)
|
|
825
|
+
)
|
|
826
|
+
await db.commit()
|
|
827
|
+
return cursor.rowcount
|
|
828
|
+
|
|
829
|
+
async def update_batch_label(self, batch_id: str, label: str) -> bool:
|
|
830
|
+
"""Update the label for all checks in a batch"""
|
|
831
|
+
async with aiosqlite.connect(self.db_path) as db:
|
|
832
|
+
await db.execute("PRAGMA busy_timeout=5000")
|
|
833
|
+
await db.execute(
|
|
834
|
+
"UPDATE check_history SET batch_label = ? WHERE batch_id = ?",
|
|
835
|
+
(label, batch_id)
|
|
836
|
+
)
|
|
837
|
+
await db.commit()
|
|
838
|
+
return True
|
|
839
|
+
|
|
719
840
|
|
|
720
841
|
# Global database instance
|
|
721
842
|
db = Database()
|