academic-refchecker 1.2.65__py3-none-any.whl → 1.2.66__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {academic_refchecker-1.2.65.dist-info → academic_refchecker-1.2.66.dist-info}/METADATA +72 -7
- {academic_refchecker-1.2.65.dist-info → academic_refchecker-1.2.66.dist-info}/RECORD +28 -18
- {academic_refchecker-1.2.65.dist-info → academic_refchecker-1.2.66.dist-info}/entry_points.txt +1 -0
- {academic_refchecker-1.2.65.dist-info → academic_refchecker-1.2.66.dist-info}/top_level.txt +1 -0
- backend/__init__.py +21 -0
- backend/__main__.py +11 -0
- backend/cli.py +56 -0
- backend/concurrency.py +100 -0
- backend/database.py +686 -0
- backend/main.py +1266 -0
- backend/models.py +99 -0
- backend/refchecker_wrapper.py +1126 -0
- backend/thumbnail.py +517 -0
- backend/websocket_manager.py +104 -0
- refchecker/__version__.py +2 -2
- refchecker/checkers/crossref.py +15 -6
- refchecker/checkers/enhanced_hybrid_checker.py +18 -4
- refchecker/checkers/local_semantic_scholar.py +2 -2
- refchecker/checkers/openalex.py +15 -6
- refchecker/checkers/semantic_scholar.py +15 -6
- refchecker/core/refchecker.py +17 -6
- refchecker/utils/__init__.py +2 -1
- refchecker/utils/arxiv_utils.py +18 -60
- refchecker/utils/doi_utils.py +32 -1
- refchecker/utils/error_utils.py +20 -9
- refchecker/utils/text_utils.py +143 -27
- {academic_refchecker-1.2.65.dist-info → academic_refchecker-1.2.66.dist-info}/WHEEL +0 -0
- {academic_refchecker-1.2.65.dist-info → academic_refchecker-1.2.66.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: academic-refchecker
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.66
|
|
4
4
|
Summary: A comprehensive tool for validating reference accuracy in academic papers
|
|
5
5
|
Author-email: Mark Russinovich <markrussinovich@hotmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -57,6 +57,16 @@ Provides-Extra: vllm
|
|
|
57
57
|
Requires-Dist: vllm>=0.3.0; extra == "vllm"
|
|
58
58
|
Requires-Dist: huggingface_hub>=0.17.0; extra == "vllm"
|
|
59
59
|
Requires-Dist: torch>=2.0.0; extra == "vllm"
|
|
60
|
+
Provides-Extra: webui
|
|
61
|
+
Requires-Dist: fastapi>=0.100.0; extra == "webui"
|
|
62
|
+
Requires-Dist: uvicorn[standard]>=0.22.0; extra == "webui"
|
|
63
|
+
Requires-Dist: pydantic>=2.0.0; extra == "webui"
|
|
64
|
+
Requires-Dist: aiosqlite>=0.19.0; extra == "webui"
|
|
65
|
+
Requires-Dist: httpx>=0.24.0; extra == "webui"
|
|
66
|
+
Requires-Dist: cryptography>=42.0.0; extra == "webui"
|
|
67
|
+
Requires-Dist: pymupdf>=1.23.0; extra == "webui"
|
|
68
|
+
Requires-Dist: Pillow>=9.0.0; extra == "webui"
|
|
69
|
+
Requires-Dist: python-multipart>=0.0.6; extra == "webui"
|
|
60
70
|
Dynamic: license-file
|
|
61
71
|
|
|
62
72
|
# 📚 Academic Paper Reference Checker
|
|
@@ -176,9 +186,18 @@ Learn about RefChecker's design philosophy and development process in this detai
|
|
|
176
186
|
|
|
177
187
|
RefChecker also includes a modern web interface with real-time progress updates, check history, and export options.
|
|
178
188
|
|
|
189
|
+
### Prerequisites
|
|
190
|
+
|
|
191
|
+
- **Python 3.8+** with RefChecker installed (`pip install academic-refchecker[webui]`)
|
|
192
|
+
- **Node.js 18+** and npm
|
|
193
|
+
|
|
179
194
|
### Quick Start (Web UI)
|
|
180
195
|
|
|
181
196
|
```bash
|
|
197
|
+
# Install Python dependencies (if not already done)
|
|
198
|
+
pip install academic-refchecker[llm,webui]
|
|
199
|
+
|
|
200
|
+
# Install Node.js dependencies
|
|
182
201
|
cd web-ui
|
|
183
202
|
npm install # First time only
|
|
184
203
|
npm start # Starts both backend and frontend
|
|
@@ -186,6 +205,20 @@ npm start # Starts both backend and frontend
|
|
|
186
205
|
|
|
187
206
|
Then open **http://localhost:5173** in your browser.
|
|
188
207
|
|
|
208
|
+
### Alternative: Start Servers Separately
|
|
209
|
+
|
|
210
|
+
**Terminal 1 - Backend:**
|
|
211
|
+
```bash
|
|
212
|
+
refchecker-webui --port 8000
|
|
213
|
+
# Or: python -m uvicorn backend.main:app --port 8000
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
**Terminal 2 - Frontend:**
|
|
217
|
+
```bash
|
|
218
|
+
cd web-ui
|
|
219
|
+
npm run dev
|
|
220
|
+
```
|
|
221
|
+
|
|
189
222
|
### Features
|
|
190
223
|
|
|
191
224
|
- ✨ Real-time validation with live progress updates
|
|
@@ -284,24 +317,33 @@ You can debug vllm server issues by running refchecker with the `--debug` flag.
|
|
|
284
317
|
|
|
285
318
|
## 📦 Installation
|
|
286
319
|
|
|
320
|
+
### Prerequisites
|
|
321
|
+
|
|
322
|
+
- **Python 3.8+** (3.10+ recommended)
|
|
323
|
+
- **Node.js 18+** and npm (only required for Web UI)
|
|
324
|
+
|
|
287
325
|
### Option 1: Install from PyPI (Recommended)
|
|
288
326
|
|
|
289
327
|
For the latest stable release with all features:
|
|
290
328
|
|
|
291
329
|
```bash
|
|
292
|
-
pip install academic-refchecker[llm,
|
|
330
|
+
pip install academic-refchecker[llm,webui]
|
|
293
331
|
```
|
|
294
332
|
|
|
295
333
|
This installs RefChecker with:
|
|
296
334
|
- **llm**: Support for OpenAI, Anthropic, Google, Azure, and vLLM providers
|
|
297
|
-
- **
|
|
298
|
-
- **optional**: Enhanced features (lxml, selenium, pikepdf, nltk, scikit-learn)
|
|
335
|
+
- **webui**: Web interface dependencies (FastAPI, uvicorn, etc.)
|
|
299
336
|
|
|
300
|
-
For a minimal installation:
|
|
337
|
+
For a minimal installation (CLI only, no LLM or Web UI):
|
|
301
338
|
```bash
|
|
302
339
|
pip install academic-refchecker
|
|
303
340
|
```
|
|
304
341
|
|
|
342
|
+
Other optional extras:
|
|
343
|
+
- **dev**: Development tools (pytest, black, flake8, mypy)
|
|
344
|
+
- **optional**: Enhanced features (lxml, selenium, pikepdf, nltk, scikit-learn)
|
|
345
|
+
- **vllm**: Local model inference with vLLM
|
|
346
|
+
|
|
305
347
|
### Option 2: Install from Source
|
|
306
348
|
|
|
307
349
|
#### 1. Clone the Repository
|
|
@@ -311,13 +353,27 @@ git clone https://github.com/markrussinovich/refchecker.git
|
|
|
311
353
|
cd refchecker
|
|
312
354
|
```
|
|
313
355
|
|
|
314
|
-
#### 2.
|
|
356
|
+
#### 2. Create and Activate Virtual Environment (Recommended)
|
|
357
|
+
|
|
358
|
+
```bash
|
|
359
|
+
python -m venv .venv
|
|
360
|
+
# On Windows:
|
|
361
|
+
.venv\Scripts\activate
|
|
362
|
+
# On macOS/Linux:
|
|
363
|
+
source .venv/bin/activate
|
|
364
|
+
```
|
|
365
|
+
|
|
366
|
+
#### 3. Install Dependencies
|
|
315
367
|
|
|
316
368
|
```bash
|
|
369
|
+
# Install all dependencies including LLM and Web UI support
|
|
370
|
+
pip install -e ".[llm,webui]"
|
|
371
|
+
|
|
372
|
+
# Or install from requirements.txt
|
|
317
373
|
pip install -r requirements.txt
|
|
318
374
|
```
|
|
319
375
|
|
|
320
|
-
####
|
|
376
|
+
#### 4. (Optional) Install Additional Dependencies
|
|
321
377
|
|
|
322
378
|
For enhanced performance and LLM support, you can install optional dependencies:
|
|
323
379
|
|
|
@@ -337,6 +393,15 @@ pip install selenium
|
|
|
337
393
|
pip install pikepdf
|
|
338
394
|
```
|
|
339
395
|
|
|
396
|
+
### Web UI Installation
|
|
397
|
+
|
|
398
|
+
The Web UI requires Node.js 18+ in addition to the Python dependencies:
|
|
399
|
+
|
|
400
|
+
```bash
|
|
401
|
+
cd web-ui
|
|
402
|
+
npm install
|
|
403
|
+
```
|
|
404
|
+
|
|
340
405
|
## 📖 Usage
|
|
341
406
|
|
|
342
407
|
Check papers in various formats and online locations:
|
|
@@ -1,16 +1,26 @@
|
|
|
1
|
-
academic_refchecker-1.2.
|
|
1
|
+
academic_refchecker-1.2.66.dist-info/licenses/LICENSE,sha256=Kwrx3fePVCeEFDCZvCW4OuoTNBiSoYbpGBI6qzGhWF0,1067
|
|
2
|
+
backend/__init__.py,sha256=TFVkOx5tSp3abty15RzUbaSwQ9ZD0kfUn7PDh63xkYY,521
|
|
3
|
+
backend/__main__.py,sha256=74V7yUMsRSZaaRyXYm-rZVc3TVUcUgwsoTQTUbV5EqM,211
|
|
4
|
+
backend/cli.py,sha256=SCwaLObaO9F-tiDCbPJF6DXwiamSsdDz50E1OX-6T08,1379
|
|
5
|
+
backend/concurrency.py,sha256=2KY9I_8dDkyl_HTGx27ZxU4rFXx2vqbGOlo5RrRbPjA,3223
|
|
6
|
+
backend/database.py,sha256=TP5wRt4wzQC8mk5CyTSmCVJQza9kUwNUuWpQarPYmbc,29794
|
|
7
|
+
backend/main.py,sha256=hcY1lV82f1B-8Nu_IO3Y7Fqhb_4bH64n8FfGAtDv9x4,49594
|
|
8
|
+
backend/models.py,sha256=El2F-RTHgxQ7-WODmiYCpjsTFDpjwF9PBt-JDa_XipE,2591
|
|
9
|
+
backend/refchecker_wrapper.py,sha256=B8oERiF81Pbrv0bS9CWUiFIzUQyfXCv8k4dz_jojaYk,51935
|
|
10
|
+
backend/thumbnail.py,sha256=wPFXp3RlmcL9jVKZmSBRB7Pfy9Ti7nCnzNtL4osfNtM,17618
|
|
11
|
+
backend/websocket_manager.py,sha256=l-Wou-rKV6n7t6Gcf5fR6s_4G-mssSrba0davNnYS70,4247
|
|
2
12
|
refchecker/__init__.py,sha256=Pg5MrtLxDBRcNYcI02N-bv3tzURVd1S3nQ8IyF7Zw7E,322
|
|
3
13
|
refchecker/__main__.py,sha256=agBbT9iKN0g2xXtRNCoh29Nr7z2n5vU-r0MCVJKi4tI,232
|
|
4
|
-
refchecker/__version__.py,sha256
|
|
14
|
+
refchecker/__version__.py,sha256=riI0sV8UuNdPwTFz-z5QxcMQEIeBO61ZM-kvhsyG3-Y,89
|
|
5
15
|
refchecker/checkers/__init__.py,sha256=T0PAHTFt6UiGvn-WGoJU8CdhXNmf6zaHmcGVoWHhmJQ,533
|
|
6
|
-
refchecker/checkers/crossref.py,sha256=
|
|
7
|
-
refchecker/checkers/enhanced_hybrid_checker.py,sha256=
|
|
16
|
+
refchecker/checkers/crossref.py,sha256=88moAyTudBqf9SKqTQkNAq1yyuRe95f8r4EpmJznupQ,20937
|
|
17
|
+
refchecker/checkers/enhanced_hybrid_checker.py,sha256=2jIeUX7hankPok3M4de9o2bsJZ17ZomuLkdfdr9EV0s,28671
|
|
8
18
|
refchecker/checkers/github_checker.py,sha256=YJ2sLj22qezw3uWjA0jhtDO0fOW4HUwcVbv2DQ4LjR0,14277
|
|
9
|
-
refchecker/checkers/local_semantic_scholar.py,sha256=
|
|
10
|
-
refchecker/checkers/openalex.py,sha256=
|
|
19
|
+
refchecker/checkers/local_semantic_scholar.py,sha256=wJtMwyu_PgLF0CGsyip42auTnRyObTRxCRiv_N8l78Q,21024
|
|
20
|
+
refchecker/checkers/openalex.py,sha256=WEjEppQMbutPs8kWOSorCIoXWqpJ9o1CXUicThHSWYU,20120
|
|
11
21
|
refchecker/checkers/openreview_checker.py,sha256=0IHZe4Nscy8fle28rmhy1hhsofR5g0FFSakk8FFH_0A,40540
|
|
12
22
|
refchecker/checkers/pdf_paper_checker.py,sha256=lrg09poNJBz9FNMrUoEjQ6CJbdYZAVANw0bCaTSb5oo,19904
|
|
13
|
-
refchecker/checkers/semantic_scholar.py,sha256=
|
|
23
|
+
refchecker/checkers/semantic_scholar.py,sha256=T8PSJfyYP1BlbW9_hhZTlxVOeBMyGHJap989PMpDrEE,36012
|
|
14
24
|
refchecker/checkers/webpage_checker.py,sha256=A_d5kg3OOsyliC00OVq_l0J-RJ4Ln7hUoURk21aO2fs,43653
|
|
15
25
|
refchecker/config/__init__.py,sha256=r7sONsX2-ITviUJRU1KEz76uAuTRqZlzU-TVkvFRGYY,15
|
|
16
26
|
refchecker/config/logging.conf,sha256=r1tP0ApLHtlz7rV-oKS1MVO7oXJOgahbZFTtYmKnf9U,687
|
|
@@ -18,7 +28,7 @@ refchecker/config/settings.py,sha256=-vODFoXbWbGPUElpmchE5zbCj_n4Vtxr8HU1hQDFp_c
|
|
|
18
28
|
refchecker/core/__init__.py,sha256=1T2MSQyDk0u_PupbHvm4CvNNN--dxsw78fqKUrqoYrM,157
|
|
19
29
|
refchecker/core/db_connection_pool.py,sha256=XRiOdehikkSz3obH4WKgf8woa3694if50Q15rBT-4XQ,4697
|
|
20
30
|
refchecker/core/parallel_processor.py,sha256=HpVFEMwPBiP2FRjvGqlaXpjV5S0qP-hxdB_Wdl_lACo,17704
|
|
21
|
-
refchecker/core/refchecker.py,sha256=
|
|
31
|
+
refchecker/core/refchecker.py,sha256=rP5_-9eRhn6rxtoC7LCJyaOdNNGX5AS5K1S7ZNN3bo0,287343
|
|
22
32
|
refchecker/database/__init__.py,sha256=mEuVHlEBuS44t_2ZT_JnvQQrlRCjo1SJq1NmaJ6r8OY,125
|
|
23
33
|
refchecker/database/download_semantic_scholar_db.py,sha256=waN4I97KC_36YMiPbiBDUUmgfzu1nub5yeKdAsIR2aw,75276
|
|
24
34
|
refchecker/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -28,22 +38,22 @@ refchecker/scripts/__init__.py,sha256=xJwo6afG8s7S888BK2Bxw2d7FX8aLkbl0l_ZoJOFib
|
|
|
28
38
|
refchecker/scripts/start_vllm_server.py,sha256=ZepWp2y2cKFW0Kgsoima2RbmF02fTU29UFcLLpsBhFU,4213
|
|
29
39
|
refchecker/services/__init__.py,sha256=jGi9S74Msak3YR-C4Qb68VU7HB4oLaX9o1rlVAFpOFI,187
|
|
30
40
|
refchecker/services/pdf_processor.py,sha256=7i5x043qfnyzE5EQmytfy_uPjbeCJp4Ka5OPyH-bwOE,10577
|
|
31
|
-
refchecker/utils/__init__.py,sha256=
|
|
32
|
-
refchecker/utils/arxiv_utils.py,sha256=
|
|
41
|
+
refchecker/utils/__init__.py,sha256=SKTEQeKpLOFFMIzZiakzctsW9zGe_J7LDNJlygWV6RY,1221
|
|
42
|
+
refchecker/utils/arxiv_utils.py,sha256=C7wqoCy9FZUQpoF92vLeJyrK1-6XoMmmL6u_hfDV3ro,18031
|
|
33
43
|
refchecker/utils/author_utils.py,sha256=DLTo1xsxef2wxoe4s_MWrh36maj4fgnvFlsDLpDE-qQ,5507
|
|
34
44
|
refchecker/utils/biblatex_parser.py,sha256=IKRUMtRsjdXIktyk9XGArt_ms0asmqP549uhFvvumuE,25581
|
|
35
45
|
refchecker/utils/bibliography_utils.py,sha256=d6kqDOQou_PX6WQkOzrGyN5GpzaOjhu54w9wGfBRQZw,11760
|
|
36
46
|
refchecker/utils/bibtex_parser.py,sha256=xY0dEqT8lBZF-W21YRpG28lp_F2ikLan7nK70WiCU2o,15286
|
|
37
47
|
refchecker/utils/config_validator.py,sha256=rxf7K3DYmJ-BNPsmtaCNipY2BTVT-pJZ7wN-M9Y3GC8,11167
|
|
38
48
|
refchecker/utils/db_utils.py,sha256=_wSupfBlm0ILFvntQTvoj7tLDCbrYPRQrp9NDvphF_E,6281
|
|
39
|
-
refchecker/utils/doi_utils.py,sha256=
|
|
40
|
-
refchecker/utils/error_utils.py,sha256=
|
|
49
|
+
refchecker/utils/doi_utils.py,sha256=_7YvQ0DTOQBMIujUE0SdJicjPiAR3VETLU668GIji24,6094
|
|
50
|
+
refchecker/utils/error_utils.py,sha256=PbB9HP2eoiWTIvRqzyh1SDhEgCEc7ecN5hVB-dUFP5Q,13093
|
|
41
51
|
refchecker/utils/mock_objects.py,sha256=QxU-UXyHSY27IZYN8Sb8ei0JtNkpGSdMXoErrRLHXvE,6437
|
|
42
|
-
refchecker/utils/text_utils.py,sha256=
|
|
52
|
+
refchecker/utils/text_utils.py,sha256=v5beDt_fyx4ETfTXLYrDMp3CuUGoDoLs7-d1H2GdySE,228585
|
|
43
53
|
refchecker/utils/unicode_utils.py,sha256=-WBKarXO756p7fd7gCeNsMag4ztDNURwFX5IVniOtwY,10366
|
|
44
54
|
refchecker/utils/url_utils.py,sha256=7b0rWCQJSajzqOvD7ghsBZPejiq6mUIz6SGhvU_WGDs,9441
|
|
45
|
-
academic_refchecker-1.2.
|
|
46
|
-
academic_refchecker-1.2.
|
|
47
|
-
academic_refchecker-1.2.
|
|
48
|
-
academic_refchecker-1.2.
|
|
49
|
-
academic_refchecker-1.2.
|
|
55
|
+
academic_refchecker-1.2.66.dist-info/METADATA,sha256=Qb0y3NPV9VPdNDEICSvkfLHoYE3n6Hl4kBXbN4RO3O8,25778
|
|
56
|
+
academic_refchecker-1.2.66.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
57
|
+
academic_refchecker-1.2.66.dist-info/entry_points.txt,sha256=9cREsaKwlp05Ql0CBIjKrNHk5IG2cHY5LvJPsV2-SxA,108
|
|
58
|
+
academic_refchecker-1.2.66.dist-info/top_level.txt,sha256=FfNvrvpj25gfpUBjW0epvz7Qrdejhups5Za_DBiSRu4,19
|
|
59
|
+
academic_refchecker-1.2.66.dist-info/RECORD,,
|
backend/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Backend package for RefChecker Web UI
|
|
2
|
+
"""
|
|
3
|
+
RefChecker Web UI Backend
|
|
4
|
+
|
|
5
|
+
This package provides the FastAPI backend for the RefChecker Web UI,
|
|
6
|
+
including WebSocket support for real-time progress updates.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
# As a command line tool (after pip install):
|
|
10
|
+
refchecker-webui --host 0.0.0.0 --port 8000
|
|
11
|
+
|
|
12
|
+
# As a Python module:
|
|
13
|
+
python -m backend --host 0.0.0.0 --port 8000
|
|
14
|
+
|
|
15
|
+
# With uvicorn directly:
|
|
16
|
+
uvicorn backend.main:app --host 0.0.0.0 --port 8000
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from .main import app
|
|
20
|
+
|
|
21
|
+
__all__ = ["app"]
|
backend/__main__.py
ADDED
backend/cli.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
CLI entry point for RefChecker Web UI backend server.
|
|
4
|
+
|
|
5
|
+
This module provides the console script entry point for the refchecker-webui command.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import sys
|
|
9
|
+
import argparse
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def main():
|
|
13
|
+
"""Main entry point for the refchecker-webui command."""
|
|
14
|
+
parser = argparse.ArgumentParser(
|
|
15
|
+
description="Start the RefChecker Web UI backend server"
|
|
16
|
+
)
|
|
17
|
+
parser.add_argument(
|
|
18
|
+
"--host",
|
|
19
|
+
default="0.0.0.0",
|
|
20
|
+
help="Host to bind to (default: 0.0.0.0)"
|
|
21
|
+
)
|
|
22
|
+
parser.add_argument(
|
|
23
|
+
"--port",
|
|
24
|
+
type=int,
|
|
25
|
+
default=8000,
|
|
26
|
+
help="Port to listen on (default: 8000)"
|
|
27
|
+
)
|
|
28
|
+
parser.add_argument(
|
|
29
|
+
"--reload",
|
|
30
|
+
action="store_true",
|
|
31
|
+
help="Enable auto-reload for development"
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
args = parser.parse_args()
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
import uvicorn
|
|
38
|
+
except ImportError:
|
|
39
|
+
print("Error: uvicorn is not installed.")
|
|
40
|
+
print("Install it with: pip install 'academic-refchecker[webui]'")
|
|
41
|
+
sys.exit(1)
|
|
42
|
+
|
|
43
|
+
print(f"Starting RefChecker Web UI backend on http://{args.host}:{args.port}")
|
|
44
|
+
print("Make sure to start the frontend separately (cd web-ui && npm run dev)")
|
|
45
|
+
print()
|
|
46
|
+
|
|
47
|
+
uvicorn.run(
|
|
48
|
+
"backend.main:app",
|
|
49
|
+
host=args.host,
|
|
50
|
+
port=args.port,
|
|
51
|
+
reload=args.reload
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
if __name__ == "__main__":
|
|
56
|
+
main()
|
backend/concurrency.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Global concurrency limiter for reference checking across all papers.
|
|
3
|
+
|
|
4
|
+
This module provides a system-wide semaphore that limits the total number
|
|
5
|
+
of concurrent reference checks, regardless of how many papers are being
|
|
6
|
+
checked simultaneously.
|
|
7
|
+
"""
|
|
8
|
+
import asyncio
|
|
9
|
+
from typing import Optional
|
|
10
|
+
import logging
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
# Default max concurrent reference checks
|
|
15
|
+
DEFAULT_MAX_CONCURRENT = 6
|
|
16
|
+
|
|
17
|
+
class GlobalConcurrencyLimiter:
|
|
18
|
+
"""
|
|
19
|
+
System-wide concurrency limiter for reference checks.
|
|
20
|
+
|
|
21
|
+
Uses a semaphore to limit total concurrent operations across
|
|
22
|
+
all paper checks.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, max_concurrent: int = DEFAULT_MAX_CONCURRENT):
|
|
26
|
+
self._max_concurrent = max_concurrent
|
|
27
|
+
self._semaphore = asyncio.Semaphore(max_concurrent)
|
|
28
|
+
self._active_count = 0
|
|
29
|
+
self._lock = asyncio.Lock()
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def max_concurrent(self) -> int:
|
|
33
|
+
return self._max_concurrent
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def active_count(self) -> int:
|
|
37
|
+
return self._active_count
|
|
38
|
+
|
|
39
|
+
async def set_max_concurrent(self, value: int):
|
|
40
|
+
"""
|
|
41
|
+
Update the max concurrent limit.
|
|
42
|
+
|
|
43
|
+
Note: This recreates the semaphore, so it should only be called
|
|
44
|
+
when no operations are in progress, or the caller should be aware
|
|
45
|
+
that current limits may temporarily exceed the new value.
|
|
46
|
+
"""
|
|
47
|
+
if value < 1:
|
|
48
|
+
value = 1
|
|
49
|
+
if value > 50:
|
|
50
|
+
value = 50
|
|
51
|
+
|
|
52
|
+
async with self._lock:
|
|
53
|
+
old_value = self._max_concurrent
|
|
54
|
+
self._max_concurrent = value
|
|
55
|
+
self._semaphore = asyncio.Semaphore(value)
|
|
56
|
+
logger.info(f"Global concurrency limit changed from {old_value} to {value}")
|
|
57
|
+
|
|
58
|
+
async def acquire(self):
|
|
59
|
+
"""Acquire a slot in the concurrency pool."""
|
|
60
|
+
await self._semaphore.acquire()
|
|
61
|
+
async with self._lock:
|
|
62
|
+
self._active_count += 1
|
|
63
|
+
logger.debug(f"Acquired slot, active: {self._active_count}/{self._max_concurrent}")
|
|
64
|
+
|
|
65
|
+
def release(self):
|
|
66
|
+
"""Release a slot back to the concurrency pool."""
|
|
67
|
+
self._semaphore.release()
|
|
68
|
+
# Note: can't use async lock in sync context, so we do best-effort count
|
|
69
|
+
self._active_count = max(0, self._active_count - 1)
|
|
70
|
+
logger.debug(f"Released slot, active: {self._active_count}/{self._max_concurrent}")
|
|
71
|
+
|
|
72
|
+
async def __aenter__(self):
|
|
73
|
+
await self.acquire()
|
|
74
|
+
return self
|
|
75
|
+
|
|
76
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
77
|
+
self.release()
|
|
78
|
+
return False
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
# Global singleton instance
|
|
82
|
+
_limiter: Optional[GlobalConcurrencyLimiter] = None
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def get_limiter() -> GlobalConcurrencyLimiter:
|
|
86
|
+
"""Get the global concurrency limiter instance."""
|
|
87
|
+
global _limiter
|
|
88
|
+
if _limiter is None:
|
|
89
|
+
_limiter = GlobalConcurrencyLimiter()
|
|
90
|
+
return _limiter
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
async def init_limiter(max_concurrent: int = DEFAULT_MAX_CONCURRENT):
|
|
94
|
+
"""Initialize or reinitialize the global limiter with a specific limit."""
|
|
95
|
+
global _limiter
|
|
96
|
+
if _limiter is None:
|
|
97
|
+
_limiter = GlobalConcurrencyLimiter(max_concurrent)
|
|
98
|
+
else:
|
|
99
|
+
await _limiter.set_max_concurrent(max_concurrent)
|
|
100
|
+
return _limiter
|