academic-refchecker 2.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. academic_refchecker-2.0.7.dist-info/METADATA +738 -0
  2. academic_refchecker-2.0.7.dist-info/RECORD +64 -0
  3. academic_refchecker-2.0.7.dist-info/WHEEL +5 -0
  4. academic_refchecker-2.0.7.dist-info/entry_points.txt +3 -0
  5. academic_refchecker-2.0.7.dist-info/licenses/LICENSE +21 -0
  6. academic_refchecker-2.0.7.dist-info/top_level.txt +2 -0
  7. backend/__init__.py +21 -0
  8. backend/__main__.py +11 -0
  9. backend/cli.py +64 -0
  10. backend/concurrency.py +100 -0
  11. backend/database.py +711 -0
  12. backend/main.py +1367 -0
  13. backend/models.py +99 -0
  14. backend/refchecker_wrapper.py +1126 -0
  15. backend/static/assets/index-2P6L_39v.css +1 -0
  16. backend/static/assets/index-hk21nqxR.js +25 -0
  17. backend/static/favicon.svg +6 -0
  18. backend/static/index.html +15 -0
  19. backend/static/vite.svg +1 -0
  20. backend/thumbnail.py +517 -0
  21. backend/websocket_manager.py +104 -0
  22. refchecker/__init__.py +13 -0
  23. refchecker/__main__.py +11 -0
  24. refchecker/__version__.py +3 -0
  25. refchecker/checkers/__init__.py +17 -0
  26. refchecker/checkers/crossref.py +541 -0
  27. refchecker/checkers/enhanced_hybrid_checker.py +563 -0
  28. refchecker/checkers/github_checker.py +326 -0
  29. refchecker/checkers/local_semantic_scholar.py +540 -0
  30. refchecker/checkers/openalex.py +513 -0
  31. refchecker/checkers/openreview_checker.py +984 -0
  32. refchecker/checkers/pdf_paper_checker.py +493 -0
  33. refchecker/checkers/semantic_scholar.py +764 -0
  34. refchecker/checkers/webpage_checker.py +938 -0
  35. refchecker/config/__init__.py +1 -0
  36. refchecker/config/logging.conf +36 -0
  37. refchecker/config/settings.py +170 -0
  38. refchecker/core/__init__.py +7 -0
  39. refchecker/core/db_connection_pool.py +141 -0
  40. refchecker/core/parallel_processor.py +415 -0
  41. refchecker/core/refchecker.py +5838 -0
  42. refchecker/database/__init__.py +6 -0
  43. refchecker/database/download_semantic_scholar_db.py +1725 -0
  44. refchecker/llm/__init__.py +0 -0
  45. refchecker/llm/base.py +376 -0
  46. refchecker/llm/providers.py +911 -0
  47. refchecker/scripts/__init__.py +1 -0
  48. refchecker/scripts/start_vllm_server.py +121 -0
  49. refchecker/services/__init__.py +8 -0
  50. refchecker/services/pdf_processor.py +268 -0
  51. refchecker/utils/__init__.py +27 -0
  52. refchecker/utils/arxiv_utils.py +462 -0
  53. refchecker/utils/author_utils.py +179 -0
  54. refchecker/utils/biblatex_parser.py +584 -0
  55. refchecker/utils/bibliography_utils.py +332 -0
  56. refchecker/utils/bibtex_parser.py +411 -0
  57. refchecker/utils/config_validator.py +262 -0
  58. refchecker/utils/db_utils.py +210 -0
  59. refchecker/utils/doi_utils.py +190 -0
  60. refchecker/utils/error_utils.py +482 -0
  61. refchecker/utils/mock_objects.py +211 -0
  62. refchecker/utils/text_utils.py +5057 -0
  63. refchecker/utils/unicode_utils.py +335 -0
  64. refchecker/utils/url_utils.py +307 -0
@@ -0,0 +1,64 @@
1
+ academic_refchecker-2.0.7.dist-info/licenses/LICENSE,sha256=Kwrx3fePVCeEFDCZvCW4OuoTNBiSoYbpGBI6qzGhWF0,1067
2
+ backend/__init__.py,sha256=TFVkOx5tSp3abty15RzUbaSwQ9ZD0kfUn7PDh63xkYY,521
3
+ backend/__main__.py,sha256=74V7yUMsRSZaaRyXYm-rZVc3TVUcUgwsoTQTUbV5EqM,211
4
+ backend/cli.py,sha256=xV3l9M5OdNQQYOcrzj2d_7RmCgj7CXP_1oi0TPe6zNo,1672
5
+ backend/concurrency.py,sha256=2KY9I_8dDkyl_HTGx27ZxU4rFXx2vqbGOlo5RrRbPjA,3223
6
+ backend/database.py,sha256=1jLP1m9vNk5sEs4bh_xmX0T5ilZkUTX1c7nOVz5XnNc,30681
7
+ backend/main.py,sha256=ntz5PbEfG65ENFTHVQlY-c8hP5UPM_hdFjl60YMNh78,54371
8
+ backend/models.py,sha256=El2F-RTHgxQ7-WODmiYCpjsTFDpjwF9PBt-JDa_XipE,2591
9
+ backend/refchecker_wrapper.py,sha256=cgJpPFNGAKHaNKxnLY3r3RxlJVT-yRyr3rJFeMxoxAo,51873
10
+ backend/thumbnail.py,sha256=wPFXp3RlmcL9jVKZmSBRB7Pfy9Ti7nCnzNtL4osfNtM,17618
11
+ backend/websocket_manager.py,sha256=l-Wou-rKV6n7t6Gcf5fR6s_4G-mssSrba0davNnYS70,4247
12
+ backend/static/favicon.svg,sha256=R0oQauh16Uy0D7JlT27k-zdjJtrvfPKOe9La5vKYwuM,395
13
+ backend/static/index.html,sha256=eJDL5t98ZJOl85d1_kJNNSUhmgGft_PCKcgbdG0UvCw,598
14
+ backend/static/vite.svg,sha256=SnSK_UQ5GLsWWRyDTEAdrjPoeGGrXbrQgRw6O0qSFPs,1497
15
+ backend/static/assets/index-2P6L_39v.css,sha256=KC3Wa6jfD1qwmEoVpqTovlzf8fsn5oHYz3NKta6BJxw,27469
16
+ backend/static/assets/index-hk21nqxR.js,sha256=z2agP8ZFYw4AfYi-GJ5E_8_k-lPF-frXOJtPk-I0hDs,369533
17
+ refchecker/__init__.py,sha256=Pg5MrtLxDBRcNYcI02N-bv3tzURVd1S3nQ8IyF7Zw7E,322
18
+ refchecker/__main__.py,sha256=agBbT9iKN0g2xXtRNCoh29Nr7z2n5vU-r0MCVJKi4tI,232
19
+ refchecker/__version__.py,sha256=xuLub_DO4RmCl3zDtU20SXDlj6XT8dJRz8nB_qv8LRo,65
20
+ refchecker/checkers/__init__.py,sha256=T0PAHTFt6UiGvn-WGoJU8CdhXNmf6zaHmcGVoWHhmJQ,533
21
+ refchecker/checkers/crossref.py,sha256=88moAyTudBqf9SKqTQkNAq1yyuRe95f8r4EpmJznupQ,20937
22
+ refchecker/checkers/enhanced_hybrid_checker.py,sha256=2jIeUX7hankPok3M4de9o2bsJZ17ZomuLkdfdr9EV0s,28671
23
+ refchecker/checkers/github_checker.py,sha256=YJ2sLj22qezw3uWjA0jhtDO0fOW4HUwcVbv2DQ4LjR0,14277
24
+ refchecker/checkers/local_semantic_scholar.py,sha256=c-KUTh99s-Di71h-pzdrwlPgoSTwB-tgVAZnCrMFXmw,21011
25
+ refchecker/checkers/openalex.py,sha256=WEjEppQMbutPs8kWOSorCIoXWqpJ9o1CXUicThHSWYU,20120
26
+ refchecker/checkers/openreview_checker.py,sha256=0IHZe4Nscy8fle28rmhy1hhsofR5g0FFSakk8FFH_0A,40540
27
+ refchecker/checkers/pdf_paper_checker.py,sha256=lrg09poNJBz9FNMrUoEjQ6CJbdYZAVANw0bCaTSb5oo,19904
28
+ refchecker/checkers/semantic_scholar.py,sha256=yvatQM5fXdW0qagqrTUpgotd0RbT7N_pqaRNGfmQjJs,35613
29
+ refchecker/checkers/webpage_checker.py,sha256=A_d5kg3OOsyliC00OVq_l0J-RJ4Ln7hUoURk21aO2fs,43653
30
+ refchecker/config/__init__.py,sha256=r7sONsX2-ITviUJRU1KEz76uAuTRqZlzU-TVkvFRGYY,15
31
+ refchecker/config/logging.conf,sha256=r1tP0ApLHtlz7rV-oKS1MVO7oXJOgahbZFTtYmKnf9U,687
32
+ refchecker/config/settings.py,sha256=-vODFoXbWbGPUElpmchE5zbCj_n4Vtxr8HU1hQDFp_c,6164
33
+ refchecker/core/__init__.py,sha256=1T2MSQyDk0u_PupbHvm4CvNNN--dxsw78fqKUrqoYrM,157
34
+ refchecker/core/db_connection_pool.py,sha256=XRiOdehikkSz3obH4WKgf8woa3694if50Q15rBT-4XQ,4697
35
+ refchecker/core/parallel_processor.py,sha256=HpVFEMwPBiP2FRjvGqlaXpjV5S0qP-hxdB_Wdl_lACo,17704
36
+ refchecker/core/refchecker.py,sha256=nX8guDXFL1ZdT-K6KUJT_3iZjuoYsWj4e0rKrqd5VZA,287117
37
+ refchecker/database/__init__.py,sha256=mEuVHlEBuS44t_2ZT_JnvQQrlRCjo1SJq1NmaJ6r8OY,125
38
+ refchecker/database/download_semantic_scholar_db.py,sha256=waN4I97KC_36YMiPbiBDUUmgfzu1nub5yeKdAsIR2aw,75276
39
+ refchecker/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
+ refchecker/llm/base.py,sha256=uMF-KOqZ9ZQ7rccOQLpKJiW9sEMMxr7ePXBSF0yYDJY,16782
41
+ refchecker/llm/providers.py,sha256=RhsYbUqHV5YznJcJ8vTa6M-nUKltdREeG5mYrLdBS2c,40992
42
+ refchecker/scripts/__init__.py,sha256=xJwo6afG8s7S888BK2Bxw2d7FX8aLkbl0l_ZoJOFibE,37
43
+ refchecker/scripts/start_vllm_server.py,sha256=ZepWp2y2cKFW0Kgsoima2RbmF02fTU29UFcLLpsBhFU,4213
44
+ refchecker/services/__init__.py,sha256=jGi9S74Msak3YR-C4Qb68VU7HB4oLaX9o1rlVAFpOFI,187
45
+ refchecker/services/pdf_processor.py,sha256=7i5x043qfnyzE5EQmytfy_uPjbeCJp4Ka5OPyH-bwOE,10577
46
+ refchecker/utils/__init__.py,sha256=SKTEQeKpLOFFMIzZiakzctsW9zGe_J7LDNJlygWV6RY,1221
47
+ refchecker/utils/arxiv_utils.py,sha256=C7wqoCy9FZUQpoF92vLeJyrK1-6XoMmmL6u_hfDV3ro,18031
48
+ refchecker/utils/author_utils.py,sha256=DLTo1xsxef2wxoe4s_MWrh36maj4fgnvFlsDLpDE-qQ,5507
49
+ refchecker/utils/biblatex_parser.py,sha256=IKRUMtRsjdXIktyk9XGArt_ms0asmqP549uhFvvumuE,25581
50
+ refchecker/utils/bibliography_utils.py,sha256=d6kqDOQou_PX6WQkOzrGyN5GpzaOjhu54w9wGfBRQZw,11760
51
+ refchecker/utils/bibtex_parser.py,sha256=xY0dEqT8lBZF-W21YRpG28lp_F2ikLan7nK70WiCU2o,15286
52
+ refchecker/utils/config_validator.py,sha256=rxf7K3DYmJ-BNPsmtaCNipY2BTVT-pJZ7wN-M9Y3GC8,11167
53
+ refchecker/utils/db_utils.py,sha256=_wSupfBlm0ILFvntQTvoj7tLDCbrYPRQrp9NDvphF_E,6281
54
+ refchecker/utils/doi_utils.py,sha256=_7YvQ0DTOQBMIujUE0SdJicjPiAR3VETLU668GIji24,6094
55
+ refchecker/utils/error_utils.py,sha256=8TcfRUD6phZ7viPJrezQ4jKf_vE65lqEXZq5707eU6s,15425
56
+ refchecker/utils/mock_objects.py,sha256=QxU-UXyHSY27IZYN8Sb8ei0JtNkpGSdMXoErrRLHXvE,6437
57
+ refchecker/utils/text_utils.py,sha256=v5beDt_fyx4ETfTXLYrDMp3CuUGoDoLs7-d1H2GdySE,228585
58
+ refchecker/utils/unicode_utils.py,sha256=-WBKarXO756p7fd7gCeNsMag4ztDNURwFX5IVniOtwY,10366
59
+ refchecker/utils/url_utils.py,sha256=7b0rWCQJSajzqOvD7ghsBZPejiq6mUIz6SGhvU_WGDs,9441
60
+ academic_refchecker-2.0.7.dist-info/METADATA,sha256=weaQGma4FfzxrEhRbcbJYz9OcAdygsrDHGVYmkf4uc4,26477
61
+ academic_refchecker-2.0.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
62
+ academic_refchecker-2.0.7.dist-info/entry_points.txt,sha256=9cREsaKwlp05Ql0CBIjKrNHk5IG2cHY5LvJPsV2-SxA,108
63
+ academic_refchecker-2.0.7.dist-info/top_level.txt,sha256=FfNvrvpj25gfpUBjW0epvz7Qrdejhups5Za_DBiSRu4,19
64
+ academic_refchecker-2.0.7.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ academic-refchecker = refchecker.core.refchecker:main
3
+ refchecker-webui = backend.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 RefChecker
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,2 @@
1
+ backend
2
+ refchecker
backend/__init__.py ADDED
@@ -0,0 +1,21 @@
1
+ # Backend package for RefChecker Web UI
2
+ """
3
+ RefChecker Web UI Backend
4
+
5
+ This package provides the FastAPI backend for the RefChecker Web UI,
6
+ including WebSocket support for real-time progress updates.
7
+
8
+ Usage:
9
+ # As a command line tool (after pip install):
10
+ refchecker-webui --host 0.0.0.0 --port 8000
11
+
12
+ # As a Python module:
13
+ python -m backend --host 0.0.0.0 --port 8000
14
+
15
+ # With uvicorn directly:
16
+ uvicorn backend.main:app --host 0.0.0.0 --port 8000
17
+ """
18
+
19
+ from .main import app
20
+
21
+ __all__ = ["app"]
backend/__main__.py ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Allow running backend as a module: python -m backend
4
+
5
+ This provides a clean entry point for the WebUI backend server.
6
+ """
7
+
8
+ from .cli import main
9
+
10
+ if __name__ == "__main__":
11
+ main()
backend/cli.py ADDED
@@ -0,0 +1,64 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ CLI entry point for RefChecker Web UI backend server.
4
+
5
+ This module provides the console script entry point for the refchecker-webui command.
6
+ """
7
+
8
+ import sys
9
+ import argparse
10
+ from pathlib import Path
11
+
12
+
13
+ def main():
14
+ """Main entry point for the refchecker-webui command."""
15
+ parser = argparse.ArgumentParser(
16
+ description="Start the RefChecker Web UI server"
17
+ )
18
+ parser.add_argument(
19
+ "--host",
20
+ default="0.0.0.0",
21
+ help="Host to bind to (default: 0.0.0.0)"
22
+ )
23
+ parser.add_argument(
24
+ "--port",
25
+ type=int,
26
+ default=8000,
27
+ help="Port to listen on (default: 8000)"
28
+ )
29
+ parser.add_argument(
30
+ "--reload",
31
+ action="store_true",
32
+ help="Enable auto-reload for development"
33
+ )
34
+
35
+ args = parser.parse_args()
36
+
37
+ try:
38
+ import uvicorn
39
+ except ImportError:
40
+ print("Error: uvicorn is not installed.")
41
+ print("Install it with: pip install 'academic-refchecker[webui]'")
42
+ sys.exit(1)
43
+
44
+ # Check if static frontend is bundled
45
+ static_dir = Path(__file__).parent / "static"
46
+ has_frontend = static_dir.exists() and (static_dir / "index.html").exists()
47
+
48
+ print(f"Starting RefChecker Web UI on http://{args.host}:{args.port}")
49
+ if has_frontend:
50
+ print(f"Open http://localhost:{args.port} in your browser")
51
+ else:
52
+ print("Note: Frontend not bundled. Start it separately: cd web-ui && npm run dev")
53
+ print()
54
+
55
+ uvicorn.run(
56
+ "backend.main:app",
57
+ host=args.host,
58
+ port=args.port,
59
+ reload=args.reload
60
+ )
61
+
62
+
63
+ if __name__ == "__main__":
64
+ main()
backend/concurrency.py ADDED
@@ -0,0 +1,100 @@
1
+ """
2
+ Global concurrency limiter for reference checking across all papers.
3
+
4
+ This module provides a system-wide semaphore that limits the total number
5
+ of concurrent reference checks, regardless of how many papers are being
6
+ checked simultaneously.
7
+ """
8
+ import asyncio
9
+ from typing import Optional
10
+ import logging
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Default max concurrent reference checks
15
+ DEFAULT_MAX_CONCURRENT = 6
16
+
17
+ class GlobalConcurrencyLimiter:
18
+ """
19
+ System-wide concurrency limiter for reference checks.
20
+
21
+ Uses a semaphore to limit total concurrent operations across
22
+ all paper checks.
23
+ """
24
+
25
+ def __init__(self, max_concurrent: int = DEFAULT_MAX_CONCURRENT):
26
+ self._max_concurrent = max_concurrent
27
+ self._semaphore = asyncio.Semaphore(max_concurrent)
28
+ self._active_count = 0
29
+ self._lock = asyncio.Lock()
30
+
31
+ @property
32
+ def max_concurrent(self) -> int:
33
+ return self._max_concurrent
34
+
35
+ @property
36
+ def active_count(self) -> int:
37
+ return self._active_count
38
+
39
+ async def set_max_concurrent(self, value: int):
40
+ """
41
+ Update the max concurrent limit.
42
+
43
+ Note: This recreates the semaphore, so it should only be called
44
+ when no operations are in progress, or the caller should be aware
45
+ that current limits may temporarily exceed the new value.
46
+ """
47
+ if value < 1:
48
+ value = 1
49
+ if value > 50:
50
+ value = 50
51
+
52
+ async with self._lock:
53
+ old_value = self._max_concurrent
54
+ self._max_concurrent = value
55
+ self._semaphore = asyncio.Semaphore(value)
56
+ logger.info(f"Global concurrency limit changed from {old_value} to {value}")
57
+
58
+ async def acquire(self):
59
+ """Acquire a slot in the concurrency pool."""
60
+ await self._semaphore.acquire()
61
+ async with self._lock:
62
+ self._active_count += 1
63
+ logger.debug(f"Acquired slot, active: {self._active_count}/{self._max_concurrent}")
64
+
65
+ def release(self):
66
+ """Release a slot back to the concurrency pool."""
67
+ self._semaphore.release()
68
+ # Note: can't use async lock in sync context, so we do best-effort count
69
+ self._active_count = max(0, self._active_count - 1)
70
+ logger.debug(f"Released slot, active: {self._active_count}/{self._max_concurrent}")
71
+
72
+ async def __aenter__(self):
73
+ await self.acquire()
74
+ return self
75
+
76
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
77
+ self.release()
78
+ return False
79
+
80
+
81
+ # Global singleton instance
82
+ _limiter: Optional[GlobalConcurrencyLimiter] = None
83
+
84
+
85
+ def get_limiter() -> GlobalConcurrencyLimiter:
86
+ """Get the global concurrency limiter instance."""
87
+ global _limiter
88
+ if _limiter is None:
89
+ _limiter = GlobalConcurrencyLimiter()
90
+ return _limiter
91
+
92
+
93
+ async def init_limiter(max_concurrent: int = DEFAULT_MAX_CONCURRENT):
94
+ """Initialize or reinitialize the global limiter with a specific limit."""
95
+ global _limiter
96
+ if _limiter is None:
97
+ _limiter = GlobalConcurrencyLimiter(max_concurrent)
98
+ else:
99
+ await _limiter.set_max_concurrent(max_concurrent)
100
+ return _limiter