academic-refchecker 2.0.20__tar.gz → 2.0.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. {academic_refchecker-2.0.20/academic_refchecker.egg-info → academic_refchecker-2.0.22}/PKG-INFO +11 -8
  2. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/README.md +10 -7
  3. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22/academic_refchecker.egg-info}/PKG-INFO +11 -8
  4. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/backend/database.py +126 -5
  5. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/backend/main.py +450 -3
  6. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/backend/refchecker_wrapper.py +109 -19
  7. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/__version__.py +1 -1
  8. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/checkers/semantic_scholar.py +2 -2
  9. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/core/refchecker.py +46 -0
  10. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/services/pdf_processor.py +156 -1
  11. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/utils/text_utils.py +25 -1
  12. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/LICENSE +0 -0
  13. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/MANIFEST.in +0 -0
  14. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/academic_refchecker.egg-info/SOURCES.txt +0 -0
  15. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/academic_refchecker.egg-info/dependency_links.txt +0 -0
  16. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/academic_refchecker.egg-info/entry_points.txt +0 -0
  17. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/academic_refchecker.egg-info/requires.txt +0 -0
  18. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/academic_refchecker.egg-info/top_level.txt +0 -0
  19. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/backend/__init__.py +0 -0
  20. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/backend/__main__.py +0 -0
  21. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/backend/cli.py +0 -0
  22. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/backend/concurrency.py +0 -0
  23. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/backend/models.py +0 -0
  24. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/backend/static/assets/index-2P6L_39v.css +0 -0
  25. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/backend/static/assets/index-B92lKsA8.js +0 -0
  26. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/backend/static/assets/index-BuguAhjS.css +0 -0
  27. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/backend/static/assets/index-DMZJNrR0.js +0 -0
  28. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/backend/static/assets/index-hk21nqxR.js +0 -0
  29. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/backend/static/favicon.svg +0 -0
  30. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/backend/static/index.html +0 -0
  31. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/backend/static/vite.svg +0 -0
  32. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/backend/thumbnail.py +0 -0
  33. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/backend/websocket_manager.py +0 -0
  34. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/pyproject.toml +0 -0
  35. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/requirements.txt +0 -0
  36. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/scripts/download_db.py +0 -0
  37. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/scripts/run_tests.py +0 -0
  38. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/scripts/start_vllm_server.py +0 -0
  39. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/setup.cfg +0 -0
  40. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/__init__.py +0 -0
  41. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/__main__.py +0 -0
  42. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/checkers/__init__.py +0 -0
  43. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/checkers/arxiv_citation.py +0 -0
  44. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/checkers/crossref.py +0 -0
  45. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/checkers/enhanced_hybrid_checker.py +0 -0
  46. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/checkers/github_checker.py +0 -0
  47. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/checkers/local_semantic_scholar.py +0 -0
  48. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/checkers/openalex.py +0 -0
  49. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/checkers/openreview_checker.py +0 -0
  50. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/checkers/pdf_paper_checker.py +0 -0
  51. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/checkers/webpage_checker.py +0 -0
  52. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/config/__init__.py +0 -0
  53. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/config/logging.conf +0 -0
  54. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/config/settings.py +0 -0
  55. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/core/__init__.py +0 -0
  56. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/core/db_connection_pool.py +0 -0
  57. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/core/parallel_processor.py +0 -0
  58. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/database/__init__.py +0 -0
  59. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/database/download_semantic_scholar_db.py +0 -0
  60. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/llm/__init__.py +0 -0
  61. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/llm/base.py +0 -0
  62. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/llm/providers.py +0 -0
  63. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/scripts/__init__.py +0 -0
  64. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/scripts/start_vllm_server.py +0 -0
  65. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/services/__init__.py +0 -0
  66. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/utils/__init__.py +0 -0
  67. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/utils/arxiv_rate_limiter.py +0 -0
  68. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/utils/arxiv_utils.py +0 -0
  69. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/utils/author_utils.py +0 -0
  70. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/utils/biblatex_parser.py +0 -0
  71. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/utils/bibliography_utils.py +0 -0
  72. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/utils/bibtex_parser.py +0 -0
  73. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/utils/config_validator.py +0 -0
  74. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/utils/db_utils.py +0 -0
  75. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/utils/doi_utils.py +0 -0
  76. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/utils/error_utils.py +0 -0
  77. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/utils/mock_objects.py +0 -0
  78. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/utils/unicode_utils.py +0 -0
  79. {academic_refchecker-2.0.20 → academic_refchecker-2.0.22}/src/refchecker/utils/url_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academic-refchecker
3
- Version: 2.0.20
3
+ Version: 2.0.22
4
4
  Summary: A comprehensive tool for validating reference accuracy in academic papers
5
5
  Author-email: Mark Russinovich <markrussinovich@hotmail.com>
6
6
  License-Expression: MIT
@@ -124,6 +124,7 @@ academic-refchecker --paper /path/to/paper.pdf
124
124
  - **Comprehensive checks**: Titles, authors, years, venues, DOIs, ArXiv IDs
125
125
  - **Smart matching**: Handles formatting variations (BERT vs B-ERT, pre-trained vs pretrained)
126
126
  - **Detailed reports**: Errors, warnings, corrected references
127
+ - **Bulk web checks**: Upload multiple files or a ZIP in the Web UI to validate many papers at once
127
128
 
128
129
  ## Sample Output
129
130
 
@@ -184,6 +185,8 @@ The Web UI shows live progress, history, and export (including corrected values)
184
185
  refchecker-webui --port 8000
185
186
  ```
186
187
 
188
+ *Tip: You can bulk-check multiple papers by selecting several files or a single ZIP; the Web UI will group them into a batch in the history sidebar.*
189
+
187
190
  #### Development (frontend)
188
191
 
189
192
  ```bash
@@ -331,17 +334,17 @@ LLM-powered extraction improves accuracy with complex bibliographies. Claude Son
331
334
  | Provider | Env Variable | Example Model |
332
335
  |----------|--------------|---------------|
333
336
  | Anthropic | `ANTHROPIC_API_KEY` | `claude-sonnet-4-20250514` |
334
- | OpenAI | `OPENAI_API_KEY` | `gpt-4o` |
335
- | Google | `GOOGLE_API_KEY` | `gemini-2.5-flash` |
336
- | Azure | `AZURE_OPENAI_API_KEY` | `gpt-4` |
337
- | vLLM | (local) | `meta-llama/Llama-3.1-8B-Instruct` |
337
+ | OpenAI | `OPENAI_API_KEY` | `gpt-5.2-mini` |
338
+ | Google | `GOOGLE_API_KEY` | `gemini-3` |
339
+ | Azure | `AZURE_OPENAI_API_KEY` | `gpt-4o` |
340
+ | vLLM | (local) | `meta-llama/Llama-3.3-70B-Instruct` |
338
341
 
339
342
  ```bash
340
343
  export ANTHROPIC_API_KEY=your_key
341
344
  academic-refchecker --paper 1706.03762 --llm-provider anthropic
342
345
 
343
- academic-refchecker --paper paper.pdf --llm-provider openai --llm-model gpt-4o
344
- academic-refchecker --paper paper.pdf --llm-provider vllm --llm-model meta-llama/Llama-3.1-8B-Instruct
346
+ academic-refchecker --paper paper.pdf --llm-provider openai --llm-model gpt-5.2-mini
347
+ academic-refchecker --paper paper.pdf --llm-provider vllm --llm-model meta-llama/Llama-3.3-70B-Instruct
345
348
  ```
346
349
 
347
350
  #### Local models (vLLM)
@@ -350,7 +353,7 @@ There is no separate “GPU Docker image”. For local inference, install the vL
350
353
 
351
354
  ```bash
352
355
  pip install "academic-refchecker[vllm]"
353
- python scripts/start_vllm_server.py --model meta-llama/Llama-3.1-8B-Instruct --port 8001
356
+ python scripts/start_vllm_server.py --model meta-llama/Llama-3.3-70B-Instruct --port 8001
354
357
  academic-refchecker --paper paper.pdf --llm-provider vllm --llm-endpoint http://localhost:8001/v1
355
358
  ```
356
359
 
@@ -52,6 +52,7 @@ academic-refchecker --paper /path/to/paper.pdf
52
52
  - **Comprehensive checks**: Titles, authors, years, venues, DOIs, ArXiv IDs
53
53
  - **Smart matching**: Handles formatting variations (BERT vs B-ERT, pre-trained vs pretrained)
54
54
  - **Detailed reports**: Errors, warnings, corrected references
55
+ - **Bulk web checks**: Upload multiple files or a ZIP in the Web UI to validate many papers at once
55
56
 
56
57
  ## Sample Output
57
58
 
@@ -112,6 +113,8 @@ The Web UI shows live progress, history, and export (including corrected values)
112
113
  refchecker-webui --port 8000
113
114
  ```
114
115
 
116
+ *Tip: You can bulk-check multiple papers by selecting several files or a single ZIP; the Web UI will group them into a batch in the history sidebar.*
117
+
115
118
  #### Development (frontend)
116
119
 
117
120
  ```bash
@@ -259,17 +262,17 @@ LLM-powered extraction improves accuracy with complex bibliographies. Claude Son
259
262
  | Provider | Env Variable | Example Model |
260
263
  |----------|--------------|---------------|
261
264
  | Anthropic | `ANTHROPIC_API_KEY` | `claude-sonnet-4-20250514` |
262
- | OpenAI | `OPENAI_API_KEY` | `gpt-4o` |
263
- | Google | `GOOGLE_API_KEY` | `gemini-2.5-flash` |
264
- | Azure | `AZURE_OPENAI_API_KEY` | `gpt-4` |
265
- | vLLM | (local) | `meta-llama/Llama-3.1-8B-Instruct` |
265
+ | OpenAI | `OPENAI_API_KEY` | `gpt-5.2-mini` |
266
+ | Google | `GOOGLE_API_KEY` | `gemini-3` |
267
+ | Azure | `AZURE_OPENAI_API_KEY` | `gpt-4o` |
268
+ | vLLM | (local) | `meta-llama/Llama-3.3-70B-Instruct` |
266
269
 
267
270
  ```bash
268
271
  export ANTHROPIC_API_KEY=your_key
269
272
  academic-refchecker --paper 1706.03762 --llm-provider anthropic
270
273
 
271
- academic-refchecker --paper paper.pdf --llm-provider openai --llm-model gpt-4o
272
- academic-refchecker --paper paper.pdf --llm-provider vllm --llm-model meta-llama/Llama-3.1-8B-Instruct
274
+ academic-refchecker --paper paper.pdf --llm-provider openai --llm-model gpt-5.2-mini
275
+ academic-refchecker --paper paper.pdf --llm-provider vllm --llm-model meta-llama/Llama-3.3-70B-Instruct
273
276
  ```
274
277
 
275
278
  #### Local models (vLLM)
@@ -278,7 +281,7 @@ There is no separate “GPU Docker image”. For local inference, install the vL
278
281
 
279
282
  ```bash
280
283
  pip install "academic-refchecker[vllm]"
281
- python scripts/start_vllm_server.py --model meta-llama/Llama-3.1-8B-Instruct --port 8001
284
+ python scripts/start_vllm_server.py --model meta-llama/Llama-3.3-70B-Instruct --port 8001
282
285
  academic-refchecker --paper paper.pdf --llm-provider vllm --llm-endpoint http://localhost:8001/v1
283
286
  ```
284
287
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academic-refchecker
3
- Version: 2.0.20
3
+ Version: 2.0.22
4
4
  Summary: A comprehensive tool for validating reference accuracy in academic papers
5
5
  Author-email: Mark Russinovich <markrussinovich@hotmail.com>
6
6
  License-Expression: MIT
@@ -124,6 +124,7 @@ academic-refchecker --paper /path/to/paper.pdf
124
124
  - **Comprehensive checks**: Titles, authors, years, venues, DOIs, ArXiv IDs
125
125
  - **Smart matching**: Handles formatting variations (BERT vs B-ERT, pre-trained vs pretrained)
126
126
  - **Detailed reports**: Errors, warnings, corrected references
127
+ - **Bulk web checks**: Upload multiple files or a ZIP in the Web UI to validate many papers at once
127
128
 
128
129
  ## Sample Output
129
130
 
@@ -184,6 +185,8 @@ The Web UI shows live progress, history, and export (including corrected values)
184
185
  refchecker-webui --port 8000
185
186
  ```
186
187
 
188
+ *Tip: You can bulk-check multiple papers by selecting several files or a single ZIP; the Web UI will group them into a batch in the history sidebar.*
189
+
187
190
  #### Development (frontend)
188
191
 
189
192
  ```bash
@@ -331,17 +334,17 @@ LLM-powered extraction improves accuracy with complex bibliographies. Claude Son
331
334
  | Provider | Env Variable | Example Model |
332
335
  |----------|--------------|---------------|
333
336
  | Anthropic | `ANTHROPIC_API_KEY` | `claude-sonnet-4-20250514` |
334
- | OpenAI | `OPENAI_API_KEY` | `gpt-4o` |
335
- | Google | `GOOGLE_API_KEY` | `gemini-2.5-flash` |
336
- | Azure | `AZURE_OPENAI_API_KEY` | `gpt-4` |
337
- | vLLM | (local) | `meta-llama/Llama-3.1-8B-Instruct` |
337
+ | OpenAI | `OPENAI_API_KEY` | `gpt-5.2-mini` |
338
+ | Google | `GOOGLE_API_KEY` | `gemini-3` |
339
+ | Azure | `AZURE_OPENAI_API_KEY` | `gpt-4o` |
340
+ | vLLM | (local) | `meta-llama/Llama-3.3-70B-Instruct` |
338
341
 
339
342
  ```bash
340
343
  export ANTHROPIC_API_KEY=your_key
341
344
  academic-refchecker --paper 1706.03762 --llm-provider anthropic
342
345
 
343
- academic-refchecker --paper paper.pdf --llm-provider openai --llm-model gpt-4o
344
- academic-refchecker --paper paper.pdf --llm-provider vllm --llm-model meta-llama/Llama-3.1-8B-Instruct
346
+ academic-refchecker --paper paper.pdf --llm-provider openai --llm-model gpt-5.2-mini
347
+ academic-refchecker --paper paper.pdf --llm-provider vllm --llm-model meta-llama/Llama-3.3-70B-Instruct
345
348
  ```
346
349
 
347
350
  #### Local models (vLLM)
@@ -350,7 +353,7 @@ There is no separate “GPU Docker image”. For local inference, install the vL
350
353
 
351
354
  ```bash
352
355
  pip install "academic-refchecker[vllm]"
353
- python scripts/start_vllm_server.py --model meta-llama/Llama-3.1-8B-Instruct --port 8001
356
+ python scripts/start_vllm_server.py --model meta-llama/Llama-3.3-70B-Instruct --port 8001
354
357
  academic-refchecker --paper paper.pdf --llm-provider vllm --llm-endpoint http://localhost:8001/v1
355
358
  ```
356
359
 
@@ -144,6 +144,12 @@ class Database:
144
144
  """)
145
145
 
146
146
  await self._ensure_columns(db)
147
+
148
+ # Create index for batch queries
149
+ await db.execute("""
150
+ CREATE INDEX IF NOT EXISTS idx_check_history_batch_id
151
+ ON check_history(batch_id)
152
+ """)
147
153
  await db.commit()
148
154
 
149
155
  async def _ensure_columns(self, db: aiosqlite.Connection):
@@ -168,6 +174,12 @@ class Database:
168
174
  await db.execute("ALTER TABLE check_history ADD COLUMN thumbnail_path TEXT")
169
175
  if "bibliography_source_path" not in columns:
170
176
  await db.execute("ALTER TABLE check_history ADD COLUMN bibliography_source_path TEXT")
177
+ if "batch_id" not in columns:
178
+ await db.execute("ALTER TABLE check_history ADD COLUMN batch_id TEXT")
179
+ if "batch_label" not in columns:
180
+ await db.execute("ALTER TABLE check_history ADD COLUMN batch_label TEXT")
181
+ if "original_filename" not in columns:
182
+ await db.execute("ALTER TABLE check_history ADD COLUMN original_filename TEXT")
171
183
 
172
184
  async def save_check(self,
173
185
  paper_title: str,
@@ -222,7 +234,8 @@ class Database:
222
234
  SELECT id, paper_title, paper_source, custom_label, timestamp,
223
235
  total_refs, errors_count, warnings_count, suggestions_count, unverified_count,
224
236
  refs_with_errors, refs_with_warnings_only, refs_verified,
225
- llm_provider, llm_model, status, source_type
237
+ llm_provider, llm_model, status, source_type, batch_id, batch_label,
238
+ original_filename
226
239
  FROM check_history
227
240
  ORDER BY timestamp DESC
228
241
  LIMIT ?
@@ -282,20 +295,27 @@ class Database:
282
295
  paper_source: str,
283
296
  source_type: str,
284
297
  llm_provider: Optional[str] = None,
285
- llm_model: Optional[str] = None) -> int:
298
+ llm_model: Optional[str] = None,
299
+ batch_id: Optional[str] = None,
300
+ batch_label: Optional[str] = None,
301
+ original_filename: Optional[str] = None) -> int:
286
302
  """Create a pending check entry before verification starts"""
287
303
  async with aiosqlite.connect(self.db_path) as db:
288
304
  cursor = await db.execute("""
289
305
  INSERT INTO check_history
290
306
  (paper_title, paper_source, source_type, total_refs, errors_count, warnings_count,
291
- suggestions_count, unverified_count, results_json, llm_provider, llm_model, status)
292
- VALUES (?, ?, ?, 0, 0, 0, 0, 0, '[]', ?, ?, 'in_progress')
307
+ suggestions_count, unverified_count, results_json, llm_provider, llm_model, status,
308
+ batch_id, batch_label, original_filename)
309
+ VALUES (?, ?, ?, 0, 0, 0, 0, 0, '[]', ?, ?, 'in_progress', ?, ?, ?)
293
310
  """, (
294
311
  paper_title,
295
312
  paper_source,
296
313
  source_type,
297
314
  llm_provider,
298
- llm_model
315
+ llm_model,
316
+ batch_id,
317
+ batch_label,
318
+ original_filename
299
319
  ))
300
320
  await db.commit()
301
321
  return cursor.lastrowid
@@ -667,16 +687,35 @@ class Database:
667
687
 
668
688
  Returns the cached result if found, None otherwise.
669
689
  """
690
+ import time
691
+ import tempfile
692
+ from pathlib import Path
693
+
694
+ debug_file = Path(tempfile.gettempdir()) / "refchecker_debug.log"
695
+
696
+ start = time.time()
670
697
  cache_key = self._compute_reference_cache_key(reference)
698
+ key_time = time.time() - start
671
699
 
700
+ connect_start = time.time()
672
701
  async with aiosqlite.connect(self.db_path) as db:
702
+ connect_time = time.time() - connect_start
673
703
  await db.execute("PRAGMA busy_timeout=5000")
674
704
  db.row_factory = aiosqlite.Row
705
+
706
+ query_start = time.time()
675
707
  async with db.execute(
676
708
  "SELECT result_json FROM verification_cache WHERE cache_key = ?",
677
709
  (cache_key,)
678
710
  ) as cursor:
679
711
  row = await cursor.fetchone()
712
+ query_time = time.time() - query_start
713
+
714
+ total_time = time.time() - start
715
+ if total_time > 0.05:
716
+ with open(debug_file, "a") as f:
717
+ f.write(f"[TIMING] Cache lookup: total={total_time:.3f}s, key={key_time:.3f}s, connect={connect_time:.3f}s, query={query_time:.3f}s\n")
718
+
680
719
  if row and row['result_json']:
681
720
  try:
682
721
  return json.loads(row['result_json'])
@@ -716,6 +755,88 @@ class Database:
716
755
  await db.commit()
717
756
  return cursor.rowcount
718
757
 
758
+ # Batch operations
759
+
760
+ async def get_batch_checks(self, batch_id: str) -> List[Dict[str, Any]]:
761
+ """Get all checks belonging to a batch"""
762
+ async with aiosqlite.connect(self.db_path) as db:
763
+ await db.execute("PRAGMA busy_timeout=5000")
764
+ db.row_factory = aiosqlite.Row
765
+ async with db.execute("""
766
+ SELECT id, paper_title, paper_source, custom_label, timestamp,
767
+ total_refs, errors_count, warnings_count, suggestions_count, unverified_count,
768
+ refs_with_errors, refs_with_warnings_only, refs_verified,
769
+ llm_provider, llm_model, status, source_type, batch_id, batch_label
770
+ FROM check_history
771
+ WHERE batch_id = ?
772
+ ORDER BY timestamp ASC
773
+ """, (batch_id,)) as cursor:
774
+ rows = await cursor.fetchall()
775
+ return [dict(row) for row in rows]
776
+
777
+ async def get_batch_summary(self, batch_id: str) -> Optional[Dict[str, Any]]:
778
+ """Get aggregated summary for a batch"""
779
+ async with aiosqlite.connect(self.db_path) as db:
780
+ await db.execute("PRAGMA busy_timeout=5000")
781
+ db.row_factory = aiosqlite.Row
782
+ async with db.execute("""
783
+ SELECT
784
+ batch_id,
785
+ batch_label,
786
+ COUNT(*) as total_papers,
787
+ SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) as completed_papers,
788
+ SUM(CASE WHEN status = 'in_progress' THEN 1 ELSE 0 END) as in_progress_papers,
789
+ SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) as error_papers,
790
+ SUM(CASE WHEN status = 'cancelled' THEN 1 ELSE 0 END) as cancelled_papers,
791
+ SUM(total_refs) as total_refs,
792
+ SUM(errors_count) as total_errors,
793
+ SUM(warnings_count) as total_warnings,
794
+ SUM(suggestions_count) as total_suggestions,
795
+ SUM(unverified_count) as total_unverified,
796
+ MIN(timestamp) as started_at
797
+ FROM check_history
798
+ WHERE batch_id = ?
799
+ GROUP BY batch_id
800
+ """, (batch_id,)) as cursor:
801
+ row = await cursor.fetchone()
802
+ if row:
803
+ return dict(row)
804
+ return None
805
+
806
+ async def cancel_batch(self, batch_id: str) -> int:
807
+ """Cancel all in-progress checks in a batch. Returns count of cancelled checks."""
808
+ async with aiosqlite.connect(self.db_path) as db:
809
+ await db.execute("PRAGMA busy_timeout=5000")
810
+ cursor = await db.execute("""
811
+ UPDATE check_history
812
+ SET status = 'cancelled'
813
+ WHERE batch_id = ? AND status = 'in_progress'
814
+ """, (batch_id,))
815
+ await db.commit()
816
+ return cursor.rowcount
817
+
818
+ async def delete_batch(self, batch_id: str) -> int:
819
+ """Delete all checks in a batch. Returns count of deleted checks."""
820
+ async with aiosqlite.connect(self.db_path) as db:
821
+ await db.execute("PRAGMA busy_timeout=5000")
822
+ cursor = await db.execute(
823
+ "DELETE FROM check_history WHERE batch_id = ?",
824
+ (batch_id,)
825
+ )
826
+ await db.commit()
827
+ return cursor.rowcount
828
+
829
+ async def update_batch_label(self, batch_id: str, label: str) -> bool:
830
+ """Update the label for all checks in a batch"""
831
+ async with aiosqlite.connect(self.db_path) as db:
832
+ await db.execute("PRAGMA busy_timeout=5000")
833
+ await db.execute(
834
+ "UPDATE check_history SET batch_label = ? WHERE batch_id = ?",
835
+ (label, batch_id)
836
+ )
837
+ await db.commit()
838
+ return True
839
+
719
840
 
720
841
  # Global database instance
721
842
  db = Database()