academic-refchecker 2.0.20__tar.gz → 2.0.21__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {academic_refchecker-2.0.20/academic_refchecker.egg-info → academic_refchecker-2.0.21}/PKG-INFO +11 -8
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/README.md +10 -7
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21/academic_refchecker.egg-info}/PKG-INFO +11 -8
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/backend/database.py +126 -5
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/backend/main.py +450 -3
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/backend/refchecker_wrapper.py +109 -19
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/__version__.py +1 -1
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/checkers/semantic_scholar.py +2 -2
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/core/refchecker.py +46 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/services/pdf_processor.py +156 -1
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/utils/text_utils.py +3 -1
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/LICENSE +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/MANIFEST.in +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/academic_refchecker.egg-info/SOURCES.txt +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/academic_refchecker.egg-info/dependency_links.txt +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/academic_refchecker.egg-info/entry_points.txt +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/academic_refchecker.egg-info/requires.txt +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/academic_refchecker.egg-info/top_level.txt +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/backend/__init__.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/backend/__main__.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/backend/cli.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/backend/concurrency.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/backend/models.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/backend/static/assets/index-2P6L_39v.css +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/backend/static/assets/index-B92lKsA8.js +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/backend/static/assets/index-BuguAhjS.css +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/backend/static/assets/index-DMZJNrR0.js +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/backend/static/assets/index-hk21nqxR.js +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/backend/static/favicon.svg +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/backend/static/index.html +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/backend/static/vite.svg +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/backend/thumbnail.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/backend/websocket_manager.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/pyproject.toml +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/requirements.txt +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/scripts/download_db.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/scripts/run_tests.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/scripts/start_vllm_server.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/setup.cfg +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/__init__.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/__main__.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/checkers/__init__.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/checkers/arxiv_citation.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/checkers/crossref.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/checkers/enhanced_hybrid_checker.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/checkers/github_checker.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/checkers/local_semantic_scholar.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/checkers/openalex.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/checkers/openreview_checker.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/checkers/pdf_paper_checker.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/checkers/webpage_checker.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/config/__init__.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/config/logging.conf +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/config/settings.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/core/__init__.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/core/db_connection_pool.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/core/parallel_processor.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/database/__init__.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/database/download_semantic_scholar_db.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/llm/__init__.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/llm/base.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/llm/providers.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/scripts/__init__.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/scripts/start_vllm_server.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/services/__init__.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/utils/__init__.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/utils/arxiv_rate_limiter.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/utils/arxiv_utils.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/utils/author_utils.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/utils/biblatex_parser.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/utils/bibliography_utils.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/utils/bibtex_parser.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/utils/config_validator.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/utils/db_utils.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/utils/doi_utils.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/utils/error_utils.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/utils/mock_objects.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/utils/unicode_utils.py +0 -0
- {academic_refchecker-2.0.20 → academic_refchecker-2.0.21}/src/refchecker/utils/url_utils.py +0 -0
{academic_refchecker-2.0.20/academic_refchecker.egg-info → academic_refchecker-2.0.21}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: academic-refchecker
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.21
|
|
4
4
|
Summary: A comprehensive tool for validating reference accuracy in academic papers
|
|
5
5
|
Author-email: Mark Russinovich <markrussinovich@hotmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -124,6 +124,7 @@ academic-refchecker --paper /path/to/paper.pdf
|
|
|
124
124
|
- **Comprehensive checks**: Titles, authors, years, venues, DOIs, ArXiv IDs
|
|
125
125
|
- **Smart matching**: Handles formatting variations (BERT vs B-ERT, pre-trained vs pretrained)
|
|
126
126
|
- **Detailed reports**: Errors, warnings, corrected references
|
|
127
|
+
- **Bulk web checks**: Upload multiple files or a ZIP in the Web UI to validate many papers at once
|
|
127
128
|
|
|
128
129
|
## Sample Output
|
|
129
130
|
|
|
@@ -184,6 +185,8 @@ The Web UI shows live progress, history, and export (including corrected values)
|
|
|
184
185
|
refchecker-webui --port 8000
|
|
185
186
|
```
|
|
186
187
|
|
|
188
|
+
*Tip: You can bulk-check multiple papers by selecting several files or a single ZIP; the Web UI will group them into a batch in the history sidebar.*
|
|
189
|
+
|
|
187
190
|
#### Development (frontend)
|
|
188
191
|
|
|
189
192
|
```bash
|
|
@@ -331,17 +334,17 @@ LLM-powered extraction improves accuracy with complex bibliographies. Claude Son
|
|
|
331
334
|
| Provider | Env Variable | Example Model |
|
|
332
335
|
|----------|--------------|---------------|
|
|
333
336
|
| Anthropic | `ANTHROPIC_API_KEY` | `claude-sonnet-4-20250514` |
|
|
334
|
-
| OpenAI | `OPENAI_API_KEY` | `gpt-
|
|
335
|
-
| Google | `GOOGLE_API_KEY` | `gemini-
|
|
336
|
-
| Azure | `AZURE_OPENAI_API_KEY` | `gpt-
|
|
337
|
-
| vLLM | (local) | `meta-llama/Llama-3.
|
|
337
|
+
| OpenAI | `OPENAI_API_KEY` | `gpt-5.2-mini` |
|
|
338
|
+
| Google | `GOOGLE_API_KEY` | `gemini-3` |
|
|
339
|
+
| Azure | `AZURE_OPENAI_API_KEY` | `gpt-4o` |
|
|
340
|
+
| vLLM | (local) | `meta-llama/Llama-3.3-70B-Instruct` |
|
|
338
341
|
|
|
339
342
|
```bash
|
|
340
343
|
export ANTHROPIC_API_KEY=your_key
|
|
341
344
|
academic-refchecker --paper 1706.03762 --llm-provider anthropic
|
|
342
345
|
|
|
343
|
-
academic-refchecker --paper paper.pdf --llm-provider openai --llm-model gpt-
|
|
344
|
-
academic-refchecker --paper paper.pdf --llm-provider vllm --llm-model meta-llama/Llama-3.
|
|
346
|
+
academic-refchecker --paper paper.pdf --llm-provider openai --llm-model gpt-5.2-mini
|
|
347
|
+
academic-refchecker --paper paper.pdf --llm-provider vllm --llm-model meta-llama/Llama-3.3-70B-Instruct
|
|
345
348
|
```
|
|
346
349
|
|
|
347
350
|
#### Local models (vLLM)
|
|
@@ -350,7 +353,7 @@ There is no separate “GPU Docker image”. For local inference, install the vL
|
|
|
350
353
|
|
|
351
354
|
```bash
|
|
352
355
|
pip install "academic-refchecker[vllm]"
|
|
353
|
-
python scripts/start_vllm_server.py --model meta-llama/Llama-3.
|
|
356
|
+
python scripts/start_vllm_server.py --model meta-llama/Llama-3.3-70B-Instruct --port 8001
|
|
354
357
|
academic-refchecker --paper paper.pdf --llm-provider vllm --llm-endpoint http://localhost:8001/v1
|
|
355
358
|
```
|
|
356
359
|
|
|
@@ -52,6 +52,7 @@ academic-refchecker --paper /path/to/paper.pdf
|
|
|
52
52
|
- **Comprehensive checks**: Titles, authors, years, venues, DOIs, ArXiv IDs
|
|
53
53
|
- **Smart matching**: Handles formatting variations (BERT vs B-ERT, pre-trained vs pretrained)
|
|
54
54
|
- **Detailed reports**: Errors, warnings, corrected references
|
|
55
|
+
- **Bulk web checks**: Upload multiple files or a ZIP in the Web UI to validate many papers at once
|
|
55
56
|
|
|
56
57
|
## Sample Output
|
|
57
58
|
|
|
@@ -112,6 +113,8 @@ The Web UI shows live progress, history, and export (including corrected values)
|
|
|
112
113
|
refchecker-webui --port 8000
|
|
113
114
|
```
|
|
114
115
|
|
|
116
|
+
*Tip: You can bulk-check multiple papers by selecting several files or a single ZIP; the Web UI will group them into a batch in the history sidebar.*
|
|
117
|
+
|
|
115
118
|
#### Development (frontend)
|
|
116
119
|
|
|
117
120
|
```bash
|
|
@@ -259,17 +262,17 @@ LLM-powered extraction improves accuracy with complex bibliographies. Claude Son
|
|
|
259
262
|
| Provider | Env Variable | Example Model |
|
|
260
263
|
|----------|--------------|---------------|
|
|
261
264
|
| Anthropic | `ANTHROPIC_API_KEY` | `claude-sonnet-4-20250514` |
|
|
262
|
-
| OpenAI | `OPENAI_API_KEY` | `gpt-
|
|
263
|
-
| Google | `GOOGLE_API_KEY` | `gemini-
|
|
264
|
-
| Azure | `AZURE_OPENAI_API_KEY` | `gpt-
|
|
265
|
-
| vLLM | (local) | `meta-llama/Llama-3.
|
|
265
|
+
| OpenAI | `OPENAI_API_KEY` | `gpt-5.2-mini` |
|
|
266
|
+
| Google | `GOOGLE_API_KEY` | `gemini-3` |
|
|
267
|
+
| Azure | `AZURE_OPENAI_API_KEY` | `gpt-4o` |
|
|
268
|
+
| vLLM | (local) | `meta-llama/Llama-3.3-70B-Instruct` |
|
|
266
269
|
|
|
267
270
|
```bash
|
|
268
271
|
export ANTHROPIC_API_KEY=your_key
|
|
269
272
|
academic-refchecker --paper 1706.03762 --llm-provider anthropic
|
|
270
273
|
|
|
271
|
-
academic-refchecker --paper paper.pdf --llm-provider openai --llm-model gpt-
|
|
272
|
-
academic-refchecker --paper paper.pdf --llm-provider vllm --llm-model meta-llama/Llama-3.
|
|
274
|
+
academic-refchecker --paper paper.pdf --llm-provider openai --llm-model gpt-5.2-mini
|
|
275
|
+
academic-refchecker --paper paper.pdf --llm-provider vllm --llm-model meta-llama/Llama-3.3-70B-Instruct
|
|
273
276
|
```
|
|
274
277
|
|
|
275
278
|
#### Local models (vLLM)
|
|
@@ -278,7 +281,7 @@ There is no separate “GPU Docker image”. For local inference, install the vL
|
|
|
278
281
|
|
|
279
282
|
```bash
|
|
280
283
|
pip install "academic-refchecker[vllm]"
|
|
281
|
-
python scripts/start_vllm_server.py --model meta-llama/Llama-3.
|
|
284
|
+
python scripts/start_vllm_server.py --model meta-llama/Llama-3.3-70B-Instruct --port 8001
|
|
282
285
|
academic-refchecker --paper paper.pdf --llm-provider vllm --llm-endpoint http://localhost:8001/v1
|
|
283
286
|
```
|
|
284
287
|
|
{academic_refchecker-2.0.20 → academic_refchecker-2.0.21/academic_refchecker.egg-info}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: academic-refchecker
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.21
|
|
4
4
|
Summary: A comprehensive tool for validating reference accuracy in academic papers
|
|
5
5
|
Author-email: Mark Russinovich <markrussinovich@hotmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -124,6 +124,7 @@ academic-refchecker --paper /path/to/paper.pdf
|
|
|
124
124
|
- **Comprehensive checks**: Titles, authors, years, venues, DOIs, ArXiv IDs
|
|
125
125
|
- **Smart matching**: Handles formatting variations (BERT vs B-ERT, pre-trained vs pretrained)
|
|
126
126
|
- **Detailed reports**: Errors, warnings, corrected references
|
|
127
|
+
- **Bulk web checks**: Upload multiple files or a ZIP in the Web UI to validate many papers at once
|
|
127
128
|
|
|
128
129
|
## Sample Output
|
|
129
130
|
|
|
@@ -184,6 +185,8 @@ The Web UI shows live progress, history, and export (including corrected values)
|
|
|
184
185
|
refchecker-webui --port 8000
|
|
185
186
|
```
|
|
186
187
|
|
|
188
|
+
*Tip: You can bulk-check multiple papers by selecting several files or a single ZIP; the Web UI will group them into a batch in the history sidebar.*
|
|
189
|
+
|
|
187
190
|
#### Development (frontend)
|
|
188
191
|
|
|
189
192
|
```bash
|
|
@@ -331,17 +334,17 @@ LLM-powered extraction improves accuracy with complex bibliographies. Claude Son
|
|
|
331
334
|
| Provider | Env Variable | Example Model |
|
|
332
335
|
|----------|--------------|---------------|
|
|
333
336
|
| Anthropic | `ANTHROPIC_API_KEY` | `claude-sonnet-4-20250514` |
|
|
334
|
-
| OpenAI | `OPENAI_API_KEY` | `gpt-
|
|
335
|
-
| Google | `GOOGLE_API_KEY` | `gemini-
|
|
336
|
-
| Azure | `AZURE_OPENAI_API_KEY` | `gpt-
|
|
337
|
-
| vLLM | (local) | `meta-llama/Llama-3.
|
|
337
|
+
| OpenAI | `OPENAI_API_KEY` | `gpt-5.2-mini` |
|
|
338
|
+
| Google | `GOOGLE_API_KEY` | `gemini-3` |
|
|
339
|
+
| Azure | `AZURE_OPENAI_API_KEY` | `gpt-4o` |
|
|
340
|
+
| vLLM | (local) | `meta-llama/Llama-3.3-70B-Instruct` |
|
|
338
341
|
|
|
339
342
|
```bash
|
|
340
343
|
export ANTHROPIC_API_KEY=your_key
|
|
341
344
|
academic-refchecker --paper 1706.03762 --llm-provider anthropic
|
|
342
345
|
|
|
343
|
-
academic-refchecker --paper paper.pdf --llm-provider openai --llm-model gpt-
|
|
344
|
-
academic-refchecker --paper paper.pdf --llm-provider vllm --llm-model meta-llama/Llama-3.
|
|
346
|
+
academic-refchecker --paper paper.pdf --llm-provider openai --llm-model gpt-5.2-mini
|
|
347
|
+
academic-refchecker --paper paper.pdf --llm-provider vllm --llm-model meta-llama/Llama-3.3-70B-Instruct
|
|
345
348
|
```
|
|
346
349
|
|
|
347
350
|
#### Local models (vLLM)
|
|
@@ -350,7 +353,7 @@ There is no separate “GPU Docker image”. For local inference, install the vL
|
|
|
350
353
|
|
|
351
354
|
```bash
|
|
352
355
|
pip install "academic-refchecker[vllm]"
|
|
353
|
-
python scripts/start_vllm_server.py --model meta-llama/Llama-3.
|
|
356
|
+
python scripts/start_vllm_server.py --model meta-llama/Llama-3.3-70B-Instruct --port 8001
|
|
354
357
|
academic-refchecker --paper paper.pdf --llm-provider vllm --llm-endpoint http://localhost:8001/v1
|
|
355
358
|
```
|
|
356
359
|
|
|
@@ -144,6 +144,12 @@ class Database:
|
|
|
144
144
|
""")
|
|
145
145
|
|
|
146
146
|
await self._ensure_columns(db)
|
|
147
|
+
|
|
148
|
+
# Create index for batch queries
|
|
149
|
+
await db.execute("""
|
|
150
|
+
CREATE INDEX IF NOT EXISTS idx_check_history_batch_id
|
|
151
|
+
ON check_history(batch_id)
|
|
152
|
+
""")
|
|
147
153
|
await db.commit()
|
|
148
154
|
|
|
149
155
|
async def _ensure_columns(self, db: aiosqlite.Connection):
|
|
@@ -168,6 +174,12 @@ class Database:
|
|
|
168
174
|
await db.execute("ALTER TABLE check_history ADD COLUMN thumbnail_path TEXT")
|
|
169
175
|
if "bibliography_source_path" not in columns:
|
|
170
176
|
await db.execute("ALTER TABLE check_history ADD COLUMN bibliography_source_path TEXT")
|
|
177
|
+
if "batch_id" not in columns:
|
|
178
|
+
await db.execute("ALTER TABLE check_history ADD COLUMN batch_id TEXT")
|
|
179
|
+
if "batch_label" not in columns:
|
|
180
|
+
await db.execute("ALTER TABLE check_history ADD COLUMN batch_label TEXT")
|
|
181
|
+
if "original_filename" not in columns:
|
|
182
|
+
await db.execute("ALTER TABLE check_history ADD COLUMN original_filename TEXT")
|
|
171
183
|
|
|
172
184
|
async def save_check(self,
|
|
173
185
|
paper_title: str,
|
|
@@ -222,7 +234,8 @@ class Database:
|
|
|
222
234
|
SELECT id, paper_title, paper_source, custom_label, timestamp,
|
|
223
235
|
total_refs, errors_count, warnings_count, suggestions_count, unverified_count,
|
|
224
236
|
refs_with_errors, refs_with_warnings_only, refs_verified,
|
|
225
|
-
llm_provider, llm_model, status, source_type
|
|
237
|
+
llm_provider, llm_model, status, source_type, batch_id, batch_label,
|
|
238
|
+
original_filename
|
|
226
239
|
FROM check_history
|
|
227
240
|
ORDER BY timestamp DESC
|
|
228
241
|
LIMIT ?
|
|
@@ -282,20 +295,27 @@ class Database:
|
|
|
282
295
|
paper_source: str,
|
|
283
296
|
source_type: str,
|
|
284
297
|
llm_provider: Optional[str] = None,
|
|
285
|
-
llm_model: Optional[str] = None
|
|
298
|
+
llm_model: Optional[str] = None,
|
|
299
|
+
batch_id: Optional[str] = None,
|
|
300
|
+
batch_label: Optional[str] = None,
|
|
301
|
+
original_filename: Optional[str] = None) -> int:
|
|
286
302
|
"""Create a pending check entry before verification starts"""
|
|
287
303
|
async with aiosqlite.connect(self.db_path) as db:
|
|
288
304
|
cursor = await db.execute("""
|
|
289
305
|
INSERT INTO check_history
|
|
290
306
|
(paper_title, paper_source, source_type, total_refs, errors_count, warnings_count,
|
|
291
|
-
suggestions_count, unverified_count, results_json, llm_provider, llm_model, status
|
|
292
|
-
|
|
307
|
+
suggestions_count, unverified_count, results_json, llm_provider, llm_model, status,
|
|
308
|
+
batch_id, batch_label, original_filename)
|
|
309
|
+
VALUES (?, ?, ?, 0, 0, 0, 0, 0, '[]', ?, ?, 'in_progress', ?, ?, ?)
|
|
293
310
|
""", (
|
|
294
311
|
paper_title,
|
|
295
312
|
paper_source,
|
|
296
313
|
source_type,
|
|
297
314
|
llm_provider,
|
|
298
|
-
llm_model
|
|
315
|
+
llm_model,
|
|
316
|
+
batch_id,
|
|
317
|
+
batch_label,
|
|
318
|
+
original_filename
|
|
299
319
|
))
|
|
300
320
|
await db.commit()
|
|
301
321
|
return cursor.lastrowid
|
|
@@ -667,16 +687,35 @@ class Database:
|
|
|
667
687
|
|
|
668
688
|
Returns the cached result if found, None otherwise.
|
|
669
689
|
"""
|
|
690
|
+
import time
|
|
691
|
+
import tempfile
|
|
692
|
+
from pathlib import Path
|
|
693
|
+
|
|
694
|
+
debug_file = Path(tempfile.gettempdir()) / "refchecker_debug.log"
|
|
695
|
+
|
|
696
|
+
start = time.time()
|
|
670
697
|
cache_key = self._compute_reference_cache_key(reference)
|
|
698
|
+
key_time = time.time() - start
|
|
671
699
|
|
|
700
|
+
connect_start = time.time()
|
|
672
701
|
async with aiosqlite.connect(self.db_path) as db:
|
|
702
|
+
connect_time = time.time() - connect_start
|
|
673
703
|
await db.execute("PRAGMA busy_timeout=5000")
|
|
674
704
|
db.row_factory = aiosqlite.Row
|
|
705
|
+
|
|
706
|
+
query_start = time.time()
|
|
675
707
|
async with db.execute(
|
|
676
708
|
"SELECT result_json FROM verification_cache WHERE cache_key = ?",
|
|
677
709
|
(cache_key,)
|
|
678
710
|
) as cursor:
|
|
679
711
|
row = await cursor.fetchone()
|
|
712
|
+
query_time = time.time() - query_start
|
|
713
|
+
|
|
714
|
+
total_time = time.time() - start
|
|
715
|
+
if total_time > 0.05:
|
|
716
|
+
with open(debug_file, "a") as f:
|
|
717
|
+
f.write(f"[TIMING] Cache lookup: total={total_time:.3f}s, key={key_time:.3f}s, connect={connect_time:.3f}s, query={query_time:.3f}s\n")
|
|
718
|
+
|
|
680
719
|
if row and row['result_json']:
|
|
681
720
|
try:
|
|
682
721
|
return json.loads(row['result_json'])
|
|
@@ -716,6 +755,88 @@ class Database:
|
|
|
716
755
|
await db.commit()
|
|
717
756
|
return cursor.rowcount
|
|
718
757
|
|
|
758
|
+
# Batch operations
|
|
759
|
+
|
|
760
|
+
async def get_batch_checks(self, batch_id: str) -> List[Dict[str, Any]]:
|
|
761
|
+
"""Get all checks belonging to a batch"""
|
|
762
|
+
async with aiosqlite.connect(self.db_path) as db:
|
|
763
|
+
await db.execute("PRAGMA busy_timeout=5000")
|
|
764
|
+
db.row_factory = aiosqlite.Row
|
|
765
|
+
async with db.execute("""
|
|
766
|
+
SELECT id, paper_title, paper_source, custom_label, timestamp,
|
|
767
|
+
total_refs, errors_count, warnings_count, suggestions_count, unverified_count,
|
|
768
|
+
refs_with_errors, refs_with_warnings_only, refs_verified,
|
|
769
|
+
llm_provider, llm_model, status, source_type, batch_id, batch_label
|
|
770
|
+
FROM check_history
|
|
771
|
+
WHERE batch_id = ?
|
|
772
|
+
ORDER BY timestamp ASC
|
|
773
|
+
""", (batch_id,)) as cursor:
|
|
774
|
+
rows = await cursor.fetchall()
|
|
775
|
+
return [dict(row) for row in rows]
|
|
776
|
+
|
|
777
|
+
async def get_batch_summary(self, batch_id: str) -> Optional[Dict[str, Any]]:
|
|
778
|
+
"""Get aggregated summary for a batch"""
|
|
779
|
+
async with aiosqlite.connect(self.db_path) as db:
|
|
780
|
+
await db.execute("PRAGMA busy_timeout=5000")
|
|
781
|
+
db.row_factory = aiosqlite.Row
|
|
782
|
+
async with db.execute("""
|
|
783
|
+
SELECT
|
|
784
|
+
batch_id,
|
|
785
|
+
batch_label,
|
|
786
|
+
COUNT(*) as total_papers,
|
|
787
|
+
SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) as completed_papers,
|
|
788
|
+
SUM(CASE WHEN status = 'in_progress' THEN 1 ELSE 0 END) as in_progress_papers,
|
|
789
|
+
SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) as error_papers,
|
|
790
|
+
SUM(CASE WHEN status = 'cancelled' THEN 1 ELSE 0 END) as cancelled_papers,
|
|
791
|
+
SUM(total_refs) as total_refs,
|
|
792
|
+
SUM(errors_count) as total_errors,
|
|
793
|
+
SUM(warnings_count) as total_warnings,
|
|
794
|
+
SUM(suggestions_count) as total_suggestions,
|
|
795
|
+
SUM(unverified_count) as total_unverified,
|
|
796
|
+
MIN(timestamp) as started_at
|
|
797
|
+
FROM check_history
|
|
798
|
+
WHERE batch_id = ?
|
|
799
|
+
GROUP BY batch_id
|
|
800
|
+
""", (batch_id,)) as cursor:
|
|
801
|
+
row = await cursor.fetchone()
|
|
802
|
+
if row:
|
|
803
|
+
return dict(row)
|
|
804
|
+
return None
|
|
805
|
+
|
|
806
|
+
async def cancel_batch(self, batch_id: str) -> int:
|
|
807
|
+
"""Cancel all in-progress checks in a batch. Returns count of cancelled checks."""
|
|
808
|
+
async with aiosqlite.connect(self.db_path) as db:
|
|
809
|
+
await db.execute("PRAGMA busy_timeout=5000")
|
|
810
|
+
cursor = await db.execute("""
|
|
811
|
+
UPDATE check_history
|
|
812
|
+
SET status = 'cancelled'
|
|
813
|
+
WHERE batch_id = ? AND status = 'in_progress'
|
|
814
|
+
""", (batch_id,))
|
|
815
|
+
await db.commit()
|
|
816
|
+
return cursor.rowcount
|
|
817
|
+
|
|
818
|
+
async def delete_batch(self, batch_id: str) -> int:
|
|
819
|
+
"""Delete all checks in a batch. Returns count of deleted checks."""
|
|
820
|
+
async with aiosqlite.connect(self.db_path) as db:
|
|
821
|
+
await db.execute("PRAGMA busy_timeout=5000")
|
|
822
|
+
cursor = await db.execute(
|
|
823
|
+
"DELETE FROM check_history WHERE batch_id = ?",
|
|
824
|
+
(batch_id,)
|
|
825
|
+
)
|
|
826
|
+
await db.commit()
|
|
827
|
+
return cursor.rowcount
|
|
828
|
+
|
|
829
|
+
async def update_batch_label(self, batch_id: str, label: str) -> bool:
|
|
830
|
+
"""Update the label for all checks in a batch"""
|
|
831
|
+
async with aiosqlite.connect(self.db_path) as db:
|
|
832
|
+
await db.execute("PRAGMA busy_timeout=5000")
|
|
833
|
+
await db.execute(
|
|
834
|
+
"UPDATE check_history SET batch_label = ? WHERE batch_id = ?",
|
|
835
|
+
(label, batch_id)
|
|
836
|
+
)
|
|
837
|
+
await db.commit()
|
|
838
|
+
return True
|
|
839
|
+
|
|
719
840
|
|
|
720
841
|
# Global database instance
|
|
721
842
|
db = Database()
|