academic-refchecker 1.2.44__tar.gz → 1.2.46__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {academic_refchecker-1.2.44/src/academic_refchecker.egg-info → academic_refchecker-1.2.46}/PKG-INFO +25 -9
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/README.md +24 -8
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/__version__.py +1 -1
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46/src/academic_refchecker.egg-info}/PKG-INFO +25 -9
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/checkers/crossref.py +6 -7
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/checkers/github_checker.py +13 -3
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/checkers/openalex.py +6 -7
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/checkers/openreview_checker.py +7 -4
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/checkers/semantic_scholar.py +44 -42
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/checkers/webpage_checker.py +19 -4
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/core/parallel_processor.py +5 -2
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/core/refchecker.py +52 -43
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/utils/arxiv_utils.py +16 -19
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/utils/doi_utils.py +6 -12
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/utils/error_utils.py +145 -3
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/utils/text_utils.py +116 -8
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/LICENSE +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/MANIFEST.in +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/pyproject.toml +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/requirements.txt +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/scripts/download_db.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/scripts/run_tests.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/scripts/start_vllm_server.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/setup.cfg +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/__init__.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/academic_refchecker.egg-info/SOURCES.txt +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/academic_refchecker.egg-info/dependency_links.txt +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/academic_refchecker.egg-info/entry_points.txt +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/academic_refchecker.egg-info/requires.txt +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/academic_refchecker.egg-info/top_level.txt +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/checkers/__init__.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/checkers/enhanced_hybrid_checker.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/checkers/local_semantic_scholar.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/config/__init__.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/config/logging.conf +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/config/settings.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/core/__init__.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/core/db_connection_pool.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/database/__init__.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/database/download_semantic_scholar_db.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/llm/__init__.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/llm/base.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/llm/providers.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/scripts/__init__.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/scripts/start_vllm_server.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/services/__init__.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/services/pdf_processor.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/utils/__init__.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/utils/author_utils.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/utils/biblatex_parser.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/utils/bibliography_utils.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/utils/bibtex_parser.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/utils/config_validator.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/utils/db_utils.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/utils/mock_objects.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/utils/unicode_utils.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/utils/url_utils.py +0 -0
{academic_refchecker-1.2.44/src/academic_refchecker.egg-info → academic_refchecker-1.2.46}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: academic-refchecker
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.46
|
|
4
4
|
Summary: A comprehensive tool for validating reference accuracy in academic papers
|
|
5
5
|
Author-email: Mark Russinovich <markrussinovich@hotmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -78,7 +78,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
|
|
|
78
78
|
Verified URL: https://www.semanticscholar.org/paper/5f4ac1ac7ca4b17d3db1b52d9aafd9e8b26c0d7
|
|
79
79
|
ArXiv URL: https://arxiv.org/abs/1610.10099
|
|
80
80
|
DOI URL: https://doi.org/10.48550/arxiv.1610.10099
|
|
81
|
-
⚠️ Warning: Year mismatch:
|
|
81
|
+
⚠️ Warning: Year mismatch:
|
|
82
|
+
cited: '2017'
|
|
83
|
+
actual: '2016'
|
|
82
84
|
|
|
83
85
|
[2/45] Effective approaches to attention-based neural machine translation
|
|
84
86
|
Minh-Thang Luong, Hieu Pham, Christopher D. Manning
|
|
@@ -87,7 +89,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
|
|
|
87
89
|
Verified URL: https://www.semanticscholar.org/paper/93499a7c7f699b6630a86fad964536f9423bb6d0
|
|
88
90
|
ArXiv URL: https://arxiv.org/abs/1508.04025
|
|
89
91
|
DOI URL: https://doi.org/10.18653/v1/d15-1166
|
|
90
|
-
❌ Error: First author mismatch:
|
|
92
|
+
❌ Error: First author mismatch:
|
|
93
|
+
cited: 'Minh-Thang Luong'
|
|
94
|
+
actual: 'Thang Luong'
|
|
91
95
|
|
|
92
96
|
[3/45] Deep Residual Learning for Image Recognition
|
|
93
97
|
Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
|
|
@@ -98,7 +102,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
|
|
|
98
102
|
Verified URL: https://www.semanticscholar.org/paper/2c03df8b48bf3fa39054345bafabfeff15bfd11d
|
|
99
103
|
ArXiv URL: https://arxiv.org/abs/1512.03385
|
|
100
104
|
DOI URL: https://doi.org/10.1109/CVPR.2016.90
|
|
101
|
-
❌ Error: DOI mismatch:
|
|
105
|
+
❌ Error: DOI mismatch:
|
|
106
|
+
cited: '10.1109/CVPR.2016.91'
|
|
107
|
+
actual: '10.1109/CVPR.2016.90'
|
|
102
108
|
|
|
103
109
|
============================================================
|
|
104
110
|
📋 SUMMARY
|
|
@@ -382,7 +388,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
382
388
|
Verified URL: https://www.semanticscholar.org/paper/a1b2c3d4e5f6789012345678901234567890abcd
|
|
383
389
|
ArXiv URL: https://arxiv.org/abs/2312.02119
|
|
384
390
|
DOI URL: https://doi.org/10.48550/arxiv.2312.02119
|
|
385
|
-
❌ Error: First author mismatch:
|
|
391
|
+
❌ Error: First author mismatch:
|
|
392
|
+
cited: 'T. Xie'
|
|
393
|
+
actual: 'Zhao Xu'
|
|
386
394
|
```
|
|
387
395
|
- `title`: Title discrepancies
|
|
388
396
|
```
|
|
@@ -392,7 +400,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
392
400
|
Verified URL: https://www.semanticscholar.org/paper/df2b0e26d0599ce3e70df8a9da02e51594e0e992
|
|
393
401
|
ArXiv URL: https://arxiv.org/abs/1810.04805
|
|
394
402
|
DOI URL: https://doi.org/10.18653/v1/n19-1423
|
|
395
|
-
❌ Error: Title mismatch:
|
|
403
|
+
❌ Error: Title mismatch:
|
|
404
|
+
cited: 'BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding'
|
|
405
|
+
actual: 'BERT: Pre-training of Deep Bidirectional Transformers for Language Comprehension'
|
|
396
406
|
```
|
|
397
407
|
- `arxiv_id`: Incorrect URLs or arXiv IDs
|
|
398
408
|
```
|
|
@@ -415,7 +425,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
415
425
|
Verified URL: https://www.semanticscholar.org/paper/204e3073870fae3d05bcbc2f6a8e263d9b72e776
|
|
416
426
|
ArXiv URL: https://arxiv.org/abs/1706.03762
|
|
417
427
|
DOI URL: https://doi.org/10.48550/arXiv.1706.03762
|
|
418
|
-
❌ Error: DOI mismatch:
|
|
428
|
+
❌ Error: DOI mismatch:
|
|
429
|
+
cited: '10.5555/3295222.3295349'
|
|
430
|
+
actual: '10.48550/arXiv.1706.03762'
|
|
419
431
|
```
|
|
420
432
|
|
|
421
433
|
- **⚠️ Warnings**: Minor issues that may need attention
|
|
@@ -428,7 +440,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
428
440
|
Verified URL: https://www.semanticscholar.org/paper/f1a2b3c4d5e6f7890123456789012345678901ab
|
|
429
441
|
ArXiv URL: https://arxiv.org/abs/2310.03684
|
|
430
442
|
DOI URL: https://doi.org/10.48550/arxiv.2310.03684
|
|
431
|
-
⚠️ Warning: Year mismatch:
|
|
443
|
+
⚠️ Warning: Year mismatch:
|
|
444
|
+
cited: '2024'
|
|
445
|
+
actual: '2023'
|
|
432
446
|
```
|
|
433
447
|
- `venue`: Venue format variations
|
|
434
448
|
```
|
|
@@ -439,7 +453,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
439
453
|
Verified URL: https://www.semanticscholar.org/paper/c1d2e3f4a5b6c7d8e9f0123456789012345678ab
|
|
440
454
|
ArXiv URL: https://arxiv.org/abs/2403.02151
|
|
441
455
|
DOI URL: https://doi.org/10.48550/arxiv.2403.02151
|
|
442
|
-
⚠️ Warning: Venue mismatch:
|
|
456
|
+
⚠️ Warning: Venue mismatch:
|
|
457
|
+
cited: 'arXiv, 2024'
|
|
458
|
+
actual: 'Neural Information Processing Systems'
|
|
443
459
|
```
|
|
444
460
|
|
|
445
461
|
- **❓ Unverified**: References that couldn't be verified with any of the checker APIs
|
|
@@ -17,7 +17,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
|
|
|
17
17
|
Verified URL: https://www.semanticscholar.org/paper/5f4ac1ac7ca4b17d3db1b52d9aafd9e8b26c0d7
|
|
18
18
|
ArXiv URL: https://arxiv.org/abs/1610.10099
|
|
19
19
|
DOI URL: https://doi.org/10.48550/arxiv.1610.10099
|
|
20
|
-
⚠️ Warning: Year mismatch:
|
|
20
|
+
⚠️ Warning: Year mismatch:
|
|
21
|
+
cited: '2017'
|
|
22
|
+
actual: '2016'
|
|
21
23
|
|
|
22
24
|
[2/45] Effective approaches to attention-based neural machine translation
|
|
23
25
|
Minh-Thang Luong, Hieu Pham, Christopher D. Manning
|
|
@@ -26,7 +28,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
|
|
|
26
28
|
Verified URL: https://www.semanticscholar.org/paper/93499a7c7f699b6630a86fad964536f9423bb6d0
|
|
27
29
|
ArXiv URL: https://arxiv.org/abs/1508.04025
|
|
28
30
|
DOI URL: https://doi.org/10.18653/v1/d15-1166
|
|
29
|
-
❌ Error: First author mismatch:
|
|
31
|
+
❌ Error: First author mismatch:
|
|
32
|
+
cited: 'Minh-Thang Luong'
|
|
33
|
+
actual: 'Thang Luong'
|
|
30
34
|
|
|
31
35
|
[3/45] Deep Residual Learning for Image Recognition
|
|
32
36
|
Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
|
|
@@ -37,7 +41,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
|
|
|
37
41
|
Verified URL: https://www.semanticscholar.org/paper/2c03df8b48bf3fa39054345bafabfeff15bfd11d
|
|
38
42
|
ArXiv URL: https://arxiv.org/abs/1512.03385
|
|
39
43
|
DOI URL: https://doi.org/10.1109/CVPR.2016.90
|
|
40
|
-
❌ Error: DOI mismatch:
|
|
44
|
+
❌ Error: DOI mismatch:
|
|
45
|
+
cited: '10.1109/CVPR.2016.91'
|
|
46
|
+
actual: '10.1109/CVPR.2016.90'
|
|
41
47
|
|
|
42
48
|
============================================================
|
|
43
49
|
📋 SUMMARY
|
|
@@ -321,7 +327,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
321
327
|
Verified URL: https://www.semanticscholar.org/paper/a1b2c3d4e5f6789012345678901234567890abcd
|
|
322
328
|
ArXiv URL: https://arxiv.org/abs/2312.02119
|
|
323
329
|
DOI URL: https://doi.org/10.48550/arxiv.2312.02119
|
|
324
|
-
❌ Error: First author mismatch:
|
|
330
|
+
❌ Error: First author mismatch:
|
|
331
|
+
cited: 'T. Xie'
|
|
332
|
+
actual: 'Zhao Xu'
|
|
325
333
|
```
|
|
326
334
|
- `title`: Title discrepancies
|
|
327
335
|
```
|
|
@@ -331,7 +339,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
331
339
|
Verified URL: https://www.semanticscholar.org/paper/df2b0e26d0599ce3e70df8a9da02e51594e0e992
|
|
332
340
|
ArXiv URL: https://arxiv.org/abs/1810.04805
|
|
333
341
|
DOI URL: https://doi.org/10.18653/v1/n19-1423
|
|
334
|
-
❌ Error: Title mismatch:
|
|
342
|
+
❌ Error: Title mismatch:
|
|
343
|
+
cited: 'BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding'
|
|
344
|
+
actual: 'BERT: Pre-training of Deep Bidirectional Transformers for Language Comprehension'
|
|
335
345
|
```
|
|
336
346
|
- `arxiv_id`: Incorrect URLs or arXiv IDs
|
|
337
347
|
```
|
|
@@ -354,7 +364,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
354
364
|
Verified URL: https://www.semanticscholar.org/paper/204e3073870fae3d05bcbc2f6a8e263d9b72e776
|
|
355
365
|
ArXiv URL: https://arxiv.org/abs/1706.03762
|
|
356
366
|
DOI URL: https://doi.org/10.48550/arXiv.1706.03762
|
|
357
|
-
❌ Error: DOI mismatch:
|
|
367
|
+
❌ Error: DOI mismatch:
|
|
368
|
+
cited: '10.5555/3295222.3295349'
|
|
369
|
+
actual: '10.48550/arXiv.1706.03762'
|
|
358
370
|
```
|
|
359
371
|
|
|
360
372
|
- **⚠️ Warnings**: Minor issues that may need attention
|
|
@@ -367,7 +379,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
367
379
|
Verified URL: https://www.semanticscholar.org/paper/f1a2b3c4d5e6f7890123456789012345678901ab
|
|
368
380
|
ArXiv URL: https://arxiv.org/abs/2310.03684
|
|
369
381
|
DOI URL: https://doi.org/10.48550/arxiv.2310.03684
|
|
370
|
-
⚠️ Warning: Year mismatch:
|
|
382
|
+
⚠️ Warning: Year mismatch:
|
|
383
|
+
cited: '2024'
|
|
384
|
+
actual: '2023'
|
|
371
385
|
```
|
|
372
386
|
- `venue`: Venue format variations
|
|
373
387
|
```
|
|
@@ -378,7 +392,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
378
392
|
Verified URL: https://www.semanticscholar.org/paper/c1d2e3f4a5b6c7d8e9f0123456789012345678ab
|
|
379
393
|
ArXiv URL: https://arxiv.org/abs/2403.02151
|
|
380
394
|
DOI URL: https://doi.org/10.48550/arxiv.2403.02151
|
|
381
|
-
⚠️ Warning: Venue mismatch:
|
|
395
|
+
⚠️ Warning: Venue mismatch:
|
|
396
|
+
cited: 'arXiv, 2024'
|
|
397
|
+
actual: 'Neural Information Processing Systems'
|
|
382
398
|
```
|
|
383
399
|
|
|
384
400
|
- **❓ Unverified**: References that couldn't be verified with any of the checker APIs
|
{academic_refchecker-1.2.44 → academic_refchecker-1.2.46/src/academic_refchecker.egg-info}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: academic-refchecker
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.46
|
|
4
4
|
Summary: A comprehensive tool for validating reference accuracy in academic papers
|
|
5
5
|
Author-email: Mark Russinovich <markrussinovich@hotmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -78,7 +78,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
|
|
|
78
78
|
Verified URL: https://www.semanticscholar.org/paper/5f4ac1ac7ca4b17d3db1b52d9aafd9e8b26c0d7
|
|
79
79
|
ArXiv URL: https://arxiv.org/abs/1610.10099
|
|
80
80
|
DOI URL: https://doi.org/10.48550/arxiv.1610.10099
|
|
81
|
-
⚠️ Warning: Year mismatch:
|
|
81
|
+
⚠️ Warning: Year mismatch:
|
|
82
|
+
cited: '2017'
|
|
83
|
+
actual: '2016'
|
|
82
84
|
|
|
83
85
|
[2/45] Effective approaches to attention-based neural machine translation
|
|
84
86
|
Minh-Thang Luong, Hieu Pham, Christopher D. Manning
|
|
@@ -87,7 +89,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
|
|
|
87
89
|
Verified URL: https://www.semanticscholar.org/paper/93499a7c7f699b6630a86fad964536f9423bb6d0
|
|
88
90
|
ArXiv URL: https://arxiv.org/abs/1508.04025
|
|
89
91
|
DOI URL: https://doi.org/10.18653/v1/d15-1166
|
|
90
|
-
❌ Error: First author mismatch:
|
|
92
|
+
❌ Error: First author mismatch:
|
|
93
|
+
cited: 'Minh-Thang Luong'
|
|
94
|
+
actual: 'Thang Luong'
|
|
91
95
|
|
|
92
96
|
[3/45] Deep Residual Learning for Image Recognition
|
|
93
97
|
Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
|
|
@@ -98,7 +102,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
|
|
|
98
102
|
Verified URL: https://www.semanticscholar.org/paper/2c03df8b48bf3fa39054345bafabfeff15bfd11d
|
|
99
103
|
ArXiv URL: https://arxiv.org/abs/1512.03385
|
|
100
104
|
DOI URL: https://doi.org/10.1109/CVPR.2016.90
|
|
101
|
-
❌ Error: DOI mismatch:
|
|
105
|
+
❌ Error: DOI mismatch:
|
|
106
|
+
cited: '10.1109/CVPR.2016.91'
|
|
107
|
+
actual: '10.1109/CVPR.2016.90'
|
|
102
108
|
|
|
103
109
|
============================================================
|
|
104
110
|
📋 SUMMARY
|
|
@@ -382,7 +388,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
382
388
|
Verified URL: https://www.semanticscholar.org/paper/a1b2c3d4e5f6789012345678901234567890abcd
|
|
383
389
|
ArXiv URL: https://arxiv.org/abs/2312.02119
|
|
384
390
|
DOI URL: https://doi.org/10.48550/arxiv.2312.02119
|
|
385
|
-
❌ Error: First author mismatch:
|
|
391
|
+
❌ Error: First author mismatch:
|
|
392
|
+
cited: 'T. Xie'
|
|
393
|
+
actual: 'Zhao Xu'
|
|
386
394
|
```
|
|
387
395
|
- `title`: Title discrepancies
|
|
388
396
|
```
|
|
@@ -392,7 +400,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
392
400
|
Verified URL: https://www.semanticscholar.org/paper/df2b0e26d0599ce3e70df8a9da02e51594e0e992
|
|
393
401
|
ArXiv URL: https://arxiv.org/abs/1810.04805
|
|
394
402
|
DOI URL: https://doi.org/10.18653/v1/n19-1423
|
|
395
|
-
❌ Error: Title mismatch:
|
|
403
|
+
❌ Error: Title mismatch:
|
|
404
|
+
cited: 'BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding'
|
|
405
|
+
actual: 'BERT: Pre-training of Deep Bidirectional Transformers for Language Comprehension'
|
|
396
406
|
```
|
|
397
407
|
- `arxiv_id`: Incorrect URLs or arXiv IDs
|
|
398
408
|
```
|
|
@@ -415,7 +425,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
415
425
|
Verified URL: https://www.semanticscholar.org/paper/204e3073870fae3d05bcbc2f6a8e263d9b72e776
|
|
416
426
|
ArXiv URL: https://arxiv.org/abs/1706.03762
|
|
417
427
|
DOI URL: https://doi.org/10.48550/arXiv.1706.03762
|
|
418
|
-
❌ Error: DOI mismatch:
|
|
428
|
+
❌ Error: DOI mismatch:
|
|
429
|
+
cited: '10.5555/3295222.3295349'
|
|
430
|
+
actual: '10.48550/arXiv.1706.03762'
|
|
419
431
|
```
|
|
420
432
|
|
|
421
433
|
- **⚠️ Warnings**: Minor issues that may need attention
|
|
@@ -428,7 +440,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
428
440
|
Verified URL: https://www.semanticscholar.org/paper/f1a2b3c4d5e6f7890123456789012345678901ab
|
|
429
441
|
ArXiv URL: https://arxiv.org/abs/2310.03684
|
|
430
442
|
DOI URL: https://doi.org/10.48550/arxiv.2310.03684
|
|
431
|
-
⚠️ Warning: Year mismatch:
|
|
443
|
+
⚠️ Warning: Year mismatch:
|
|
444
|
+
cited: '2024'
|
|
445
|
+
actual: '2023'
|
|
432
446
|
```
|
|
433
447
|
- `venue`: Venue format variations
|
|
434
448
|
```
|
|
@@ -439,7 +453,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
439
453
|
Verified URL: https://www.semanticscholar.org/paper/c1d2e3f4a5b6c7d8e9f0123456789012345678ab
|
|
440
454
|
ArXiv URL: https://arxiv.org/abs/2403.02151
|
|
441
455
|
DOI URL: https://doi.org/10.48550/arxiv.2403.02151
|
|
442
|
-
⚠️ Warning: Venue mismatch:
|
|
456
|
+
⚠️ Warning: Venue mismatch:
|
|
457
|
+
cited: 'arXiv, 2024'
|
|
458
|
+
actual: 'Neural Information Processing Systems'
|
|
443
459
|
```
|
|
444
460
|
|
|
445
461
|
- **❓ Unverified**: References that couldn't be verified with any of the checker APIs
|
|
@@ -31,6 +31,7 @@ import re
|
|
|
31
31
|
from typing import Dict, List, Tuple, Optional, Any, Union
|
|
32
32
|
from urllib.parse import quote_plus
|
|
33
33
|
from utils.text_utils import normalize_text, clean_title_basic, find_best_match, is_name_match, compare_authors, clean_title_for_search
|
|
34
|
+
from utils.error_utils import format_year_mismatch, format_doi_mismatch
|
|
34
35
|
from config.settings import get_config
|
|
35
36
|
|
|
36
37
|
# Set up logging
|
|
@@ -478,21 +479,19 @@ class CrossRefReferenceChecker:
|
|
|
478
479
|
if year and work_year and year != work_year:
|
|
479
480
|
errors.append({
|
|
480
481
|
'warning_type': 'year',
|
|
481
|
-
'warning_details':
|
|
482
|
+
'warning_details': format_year_mismatch(year, work_year),
|
|
482
483
|
'ref_year_correct': work_year
|
|
483
484
|
})
|
|
484
485
|
|
|
485
486
|
# Verify DOI
|
|
486
487
|
work_doi = work_data.get('DOI')
|
|
487
488
|
if doi and work_doi:
|
|
488
|
-
#
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
if cited_doi_clean.lower() != work_doi_clean.lower():
|
|
489
|
+
# Compare DOIs using the proper comparison function
|
|
490
|
+
from utils.doi_utils import compare_dois
|
|
491
|
+
if not compare_dois(doi, work_doi):
|
|
493
492
|
errors.append({
|
|
494
493
|
'error_type': 'doi',
|
|
495
|
-
'error_details':
|
|
494
|
+
'error_details': format_doi_mismatch(doi, work_doi),
|
|
496
495
|
'ref_doi_correct': work_doi
|
|
497
496
|
})
|
|
498
497
|
|
|
@@ -169,9 +169,14 @@ class GitHubChecker:
|
|
|
169
169
|
if cited_title:
|
|
170
170
|
title_match = self._check_title_match(cited_title, actual_name, actual_description)
|
|
171
171
|
if not title_match:
|
|
172
|
+
from utils.error_utils import format_title_mismatch
|
|
173
|
+
details = format_title_mismatch(cited_title, actual_name)
|
|
174
|
+
if actual_description:
|
|
175
|
+
snippet = actual_description[:100] + ('...' if len(actual_description) > 100 else '')
|
|
176
|
+
details += f" ({snippet})"
|
|
172
177
|
errors.append({
|
|
173
178
|
"warning_type": "title",
|
|
174
|
-
"warning_details":
|
|
179
|
+
"warning_details": details
|
|
175
180
|
})
|
|
176
181
|
|
|
177
182
|
# Verify authors
|
|
@@ -180,9 +185,13 @@ class GitHubChecker:
|
|
|
180
185
|
author_str = ', '.join(cited_authors) if isinstance(cited_authors, list) else str(cited_authors)
|
|
181
186
|
author_match = self._check_author_match(author_str, actual_owner, actual_owner_name)
|
|
182
187
|
if not author_match:
|
|
188
|
+
from utils.error_utils import format_three_line_mismatch
|
|
189
|
+
left = author_str
|
|
190
|
+
right = f"{actual_owner} ({actual_owner_name})" if actual_owner_name else actual_owner
|
|
191
|
+
details = format_three_line_mismatch("Author mismatch", left, right)
|
|
183
192
|
errors.append({
|
|
184
193
|
"warning_type": "author",
|
|
185
|
-
"warning_details":
|
|
194
|
+
"warning_details": details
|
|
186
195
|
})
|
|
187
196
|
|
|
188
197
|
# Verify year
|
|
@@ -191,9 +200,10 @@ class GitHubChecker:
|
|
|
191
200
|
try:
|
|
192
201
|
cited_year_int = int(cited_year)
|
|
193
202
|
if cited_year_int < creation_year:
|
|
203
|
+
from utils.error_utils import format_year_mismatch
|
|
194
204
|
errors.append({
|
|
195
205
|
"warning_type": "year",
|
|
196
|
-
"warning_details":
|
|
206
|
+
"warning_details": format_year_mismatch(cited_year, creation_year),
|
|
197
207
|
"ref_year_correct": str(creation_year)
|
|
198
208
|
})
|
|
199
209
|
except (ValueError, TypeError):
|
|
@@ -33,6 +33,7 @@ import re
|
|
|
33
33
|
from typing import Dict, List, Tuple, Optional, Any, Union
|
|
34
34
|
from urllib.parse import quote_plus
|
|
35
35
|
from utils.text_utils import normalize_text, clean_title_basic, find_best_match, is_name_match, compare_authors, clean_title_for_search
|
|
36
|
+
from utils.error_utils import format_year_mismatch, format_doi_mismatch
|
|
36
37
|
from config.settings import get_config
|
|
37
38
|
|
|
38
39
|
# Set up logging
|
|
@@ -448,7 +449,7 @@ class OpenAlexReferenceChecker:
|
|
|
448
449
|
if year and work_year and year != work_year:
|
|
449
450
|
errors.append({
|
|
450
451
|
'warning_type': 'year',
|
|
451
|
-
'warning_details':
|
|
452
|
+
'warning_details': format_year_mismatch(year, work_year),
|
|
452
453
|
'ref_year_correct': work_year
|
|
453
454
|
})
|
|
454
455
|
|
|
@@ -458,14 +459,12 @@ class OpenAlexReferenceChecker:
|
|
|
458
459
|
work_doi = work_data['ids']['doi']
|
|
459
460
|
|
|
460
461
|
if doi and work_doi:
|
|
461
|
-
#
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
if cited_doi_clean.lower() != work_doi_clean.lower():
|
|
462
|
+
# Compare DOIs using the proper comparison function
|
|
463
|
+
from utils.doi_utils import compare_dois
|
|
464
|
+
if not compare_dois(doi, work_doi):
|
|
466
465
|
errors.append({
|
|
467
466
|
'error_type': 'doi',
|
|
468
|
-
'error_details':
|
|
467
|
+
'error_details': format_doi_mismatch(doi, work_doi),
|
|
469
468
|
'ref_doi_correct': work_doi
|
|
470
469
|
})
|
|
471
470
|
|
{academic_refchecker-1.2.44 → academic_refchecker-1.2.46}/src/checkers/openreview_checker.py
RENAMED
|
@@ -425,9 +425,11 @@ class OpenReviewReferenceChecker:
|
|
|
425
425
|
if cited_title and paper_title:
|
|
426
426
|
similarity = calculate_title_similarity(cited_title, paper_title)
|
|
427
427
|
if similarity < 0.7: # Using a reasonable threshold
|
|
428
|
+
from utils.error_utils import format_title_mismatch
|
|
429
|
+
details = format_title_mismatch(cited_title, paper_title) + f" (similarity: {similarity:.2f})"
|
|
428
430
|
errors.append({
|
|
429
431
|
"warning_type": "title",
|
|
430
|
-
"warning_details":
|
|
432
|
+
"warning_details": details
|
|
431
433
|
})
|
|
432
434
|
|
|
433
435
|
# Check authors
|
|
@@ -460,9 +462,10 @@ class OpenReviewReferenceChecker:
|
|
|
460
462
|
|
|
461
463
|
is_different, year_message = is_year_substantially_different(cited_year_int, paper_year_int)
|
|
462
464
|
if is_different and year_message:
|
|
465
|
+
from utils.error_utils import format_year_mismatch
|
|
463
466
|
errors.append({
|
|
464
467
|
"warning_type": "year",
|
|
465
|
-
"warning_details":
|
|
468
|
+
"warning_details": format_year_mismatch(cited_year_int, paper_year_int)
|
|
466
469
|
})
|
|
467
470
|
except (ValueError, TypeError):
|
|
468
471
|
pass # Skip year validation if conversion fails
|
|
@@ -473,10 +476,10 @@ class OpenReviewReferenceChecker:
|
|
|
473
476
|
|
|
474
477
|
if cited_venue and paper_venue:
|
|
475
478
|
if are_venues_substantially_different(cited_venue, paper_venue):
|
|
476
|
-
from utils.error_utils import
|
|
479
|
+
from utils.error_utils import format_venue_mismatch
|
|
477
480
|
errors.append({
|
|
478
481
|
"warning_type": "venue",
|
|
479
|
-
"warning_details":
|
|
482
|
+
"warning_details": format_venue_mismatch(cited_venue, paper_venue)
|
|
480
483
|
})
|
|
481
484
|
|
|
482
485
|
# Create verified data structure
|
|
@@ -29,6 +29,7 @@ import logging
|
|
|
29
29
|
import re
|
|
30
30
|
from typing import Dict, List, Tuple, Optional, Any, Union
|
|
31
31
|
from utils.text_utils import normalize_text, clean_title_basic, find_best_match, is_name_match, are_venues_substantially_different, calculate_title_similarity, compare_authors, clean_title_for_search
|
|
32
|
+
from utils.error_utils import format_title_mismatch
|
|
32
33
|
from config.settings import get_config
|
|
33
34
|
|
|
34
35
|
# Set up logging
|
|
@@ -471,7 +472,7 @@ class NonArxivReferenceChecker:
|
|
|
471
472
|
if found_title and title_similarity < SIMILARITY_THRESHOLD:
|
|
472
473
|
errors.append({
|
|
473
474
|
'error_type': 'title',
|
|
474
|
-
'error_details':
|
|
475
|
+
'error_details': format_title_mismatch(title, found_title),
|
|
475
476
|
'ref_title_correct': paper_data.get('title', '')
|
|
476
477
|
})
|
|
477
478
|
|
|
@@ -525,9 +526,10 @@ class NonArxivReferenceChecker:
|
|
|
525
526
|
is_different, warning_message = is_year_substantially_different(year, paper_year, context)
|
|
526
527
|
|
|
527
528
|
if is_different and warning_message:
|
|
529
|
+
from utils.error_utils import format_year_mismatch
|
|
528
530
|
errors.append({
|
|
529
531
|
'warning_type': 'year',
|
|
530
|
-
'warning_details':
|
|
532
|
+
'warning_details': format_year_mismatch(year, paper_year),
|
|
531
533
|
'ref_year_correct': paper_year
|
|
532
534
|
})
|
|
533
535
|
|
|
@@ -541,49 +543,50 @@ class NonArxivReferenceChecker:
|
|
|
541
543
|
elif paper_venue and not isinstance(paper_venue, str):
|
|
542
544
|
paper_venue = str(paper_venue)
|
|
543
545
|
|
|
546
|
+
# Check venue mismatches
|
|
544
547
|
if cited_venue and paper_venue:
|
|
545
548
|
# Use the utility function to check if venues are substantially different
|
|
546
549
|
if are_venues_substantially_different(cited_venue, paper_venue):
|
|
547
550
|
from utils.error_utils import create_venue_warning
|
|
548
551
|
errors.append(create_venue_warning(cited_venue, paper_venue))
|
|
549
552
|
elif not cited_venue and paper_venue:
|
|
550
|
-
#
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
# Check if the reference already includes this ArXiv URL or equivalent DOI
|
|
559
|
-
reference_url = reference.get('url', '')
|
|
560
|
-
|
|
561
|
-
# Check for direct arXiv URL match
|
|
562
|
-
has_arxiv_url = arxiv_url in reference_url
|
|
563
|
-
|
|
564
|
-
# Also check for arXiv DOI URL (e.g., https://doi.org/10.48550/arxiv.2505.11595)
|
|
565
|
-
arxiv_doi_url = f"https://doi.org/10.48550/arxiv.{arxiv_id}"
|
|
566
|
-
has_arxiv_doi = arxiv_doi_url.lower() in reference_url.lower()
|
|
567
|
-
|
|
568
|
-
if not (has_arxiv_url or has_arxiv_doi):
|
|
553
|
+
# Original reference has the venue in raw text but not parsed correctly
|
|
554
|
+
raw_text = reference.get('raw_text', '')
|
|
555
|
+
if raw_text and '#' in raw_text:
|
|
556
|
+
# Check if venue might be in the raw text format (author#title#venue#year#url)
|
|
557
|
+
parts = raw_text.split('#')
|
|
558
|
+
if len(parts) >= 3 and parts[2].strip():
|
|
559
|
+
# Venue is present in raw text but missing from parsed reference
|
|
569
560
|
errors.append({
|
|
570
561
|
'warning_type': 'venue',
|
|
571
|
-
'warning_details': f"
|
|
572
|
-
'
|
|
562
|
+
'warning_details': f"Venue missing: should include '{paper_venue}'",
|
|
563
|
+
'ref_venue_correct': paper_venue
|
|
573
564
|
})
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
565
|
+
|
|
566
|
+
# Always check for missing arXiv URLs when paper has arXiv ID
|
|
567
|
+
external_ids = paper_data.get('externalIds', {})
|
|
568
|
+
arxiv_id = external_ids.get('ArXiv') if external_ids else None
|
|
569
|
+
|
|
570
|
+
if arxiv_id:
|
|
571
|
+
# For arXiv papers, check if reference includes the arXiv URL
|
|
572
|
+
arxiv_url = f"https://arxiv.org/abs/{arxiv_id}"
|
|
573
|
+
|
|
574
|
+
# Check if the reference already includes this ArXiv URL or equivalent DOI
|
|
575
|
+
reference_url = reference.get('url', '')
|
|
576
|
+
|
|
577
|
+
# Check for direct arXiv URL match
|
|
578
|
+
has_arxiv_url = arxiv_url in reference_url
|
|
579
|
+
|
|
580
|
+
# Also check for arXiv DOI URL (e.g., https://doi.org/10.48550/arxiv.2505.11595)
|
|
581
|
+
arxiv_doi_url = f"https://doi.org/10.48550/arxiv.{arxiv_id}"
|
|
582
|
+
has_arxiv_doi = arxiv_doi_url.lower() in reference_url.lower()
|
|
583
|
+
|
|
584
|
+
if not (has_arxiv_url or has_arxiv_doi):
|
|
585
|
+
errors.append({
|
|
586
|
+
'warning_type': 'url',
|
|
587
|
+
'warning_details': f"Reference could include arXiv URL: {arxiv_url}",
|
|
588
|
+
'ref_url_correct': arxiv_url
|
|
589
|
+
})
|
|
587
590
|
|
|
588
591
|
# Verify DOI
|
|
589
592
|
paper_doi = None
|
|
@@ -591,14 +594,13 @@ class NonArxivReferenceChecker:
|
|
|
591
594
|
if external_ids and 'DOI' in external_ids:
|
|
592
595
|
paper_doi = external_ids['DOI']
|
|
593
596
|
|
|
594
|
-
# Compare DOIs
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
if cited_doi_clean and paper_doi_clean and cited_doi_clean.lower() != paper_doi_clean.lower():
|
|
597
|
+
# Compare DOIs using the proper comparison function
|
|
598
|
+
from utils.doi_utils import compare_dois
|
|
599
|
+
if doi and paper_doi and not compare_dois(doi, paper_doi):
|
|
600
|
+
from utils.error_utils import format_doi_mismatch
|
|
599
601
|
errors.append({
|
|
600
602
|
'error_type': 'doi',
|
|
601
|
-
'error_details':
|
|
603
|
+
'error_details': format_doi_mismatch(doi, paper_doi),
|
|
602
604
|
'ref_doi_correct': paper_doi
|
|
603
605
|
})
|
|
604
606
|
|
|
@@ -71,7 +71,8 @@ class WebPageChecker:
|
|
|
71
71
|
doc_indicators = [
|
|
72
72
|
'docs', 'documentation', 'readthedocs.io', 'help', 'guide', 'tutorial',
|
|
73
73
|
'reference', 'manual', 'wiki', 'blog', 'api', 'developer', 'platform',
|
|
74
|
-
'index', 'research', 'news', 'insights', 'whitepaper', 'brief', 'develop'
|
|
74
|
+
'index', 'research', 'news', 'insights', 'whitepaper', 'brief', 'develop',
|
|
75
|
+
'posts' # For blog posts and forum posts like LessWrong
|
|
75
76
|
]
|
|
76
77
|
|
|
77
78
|
return any(indicator in url.lower() for indicator in doc_indicators) or self._is_likely_webpage(url)
|
|
@@ -84,7 +85,8 @@ class WebPageChecker:
|
|
|
84
85
|
doc_domains = [
|
|
85
86
|
'pytorch.org', 'tensorflow.org', 'readthedocs.io', 'onnxruntime.ai',
|
|
86
87
|
'deepspeed.ai', 'huggingface.co', 'openai.com', 'microsoft.com',
|
|
87
|
-
'google.com', 'nvidia.com', 'intel.com', 'langchain.com'
|
|
88
|
+
'google.com', 'nvidia.com', 'intel.com', 'langchain.com',
|
|
89
|
+
'lesswrong.com' # LessWrong rationality and AI safety blog platform
|
|
88
90
|
]
|
|
89
91
|
|
|
90
92
|
return any(domain in parsed.netloc for domain in doc_domains)
|
|
@@ -182,9 +184,10 @@ class WebPageChecker:
|
|
|
182
184
|
# Check title match
|
|
183
185
|
if cited_title and page_title:
|
|
184
186
|
if not self._check_title_match(cited_title, page_title, page_description):
|
|
187
|
+
from utils.error_utils import format_title_mismatch
|
|
185
188
|
errors.append({
|
|
186
189
|
"warning_type": "title",
|
|
187
|
-
"warning_details":
|
|
190
|
+
"warning_details": format_title_mismatch(cited_title, page_title)
|
|
188
191
|
})
|
|
189
192
|
|
|
190
193
|
# Check if this is a documentation page for the cited topic
|
|
@@ -201,9 +204,13 @@ class WebPageChecker:
|
|
|
201
204
|
if cited_authors:
|
|
202
205
|
author_str = ', '.join(cited_authors) if isinstance(cited_authors, list) else str(cited_authors)
|
|
203
206
|
if not self._check_author_match(author_str, site_info, web_url):
|
|
207
|
+
from utils.error_utils import format_three_line_mismatch
|
|
208
|
+
left = author_str
|
|
209
|
+
right = site_info.get('organization', 'unknown')
|
|
210
|
+
details = format_three_line_mismatch("Author/organization mismatch", left, right)
|
|
204
211
|
errors.append({
|
|
205
212
|
"warning_type": "author",
|
|
206
|
-
"warning_details":
|
|
213
|
+
"warning_details": details
|
|
207
214
|
})
|
|
208
215
|
|
|
209
216
|
logger.debug(f"Web page verification completed for: {web_url}")
|
|
@@ -390,6 +397,14 @@ class WebPageChecker:
|
|
|
390
397
|
organization = site_info.get('organization', '').lower()
|
|
391
398
|
domain = site_info.get('domain', '').lower()
|
|
392
399
|
|
|
400
|
+
# Accept generic web resource terms - these are valid for any web URL
|
|
401
|
+
generic_web_terms = [
|
|
402
|
+
'web resource', 'web site', 'website', 'online resource',
|
|
403
|
+
'online', 'web', 'internet resource', 'web page', 'webpage'
|
|
404
|
+
]
|
|
405
|
+
if cited_lower in generic_web_terms:
|
|
406
|
+
return True
|
|
407
|
+
|
|
393
408
|
# Direct matches
|
|
394
409
|
if cited_lower in organization or organization in cited_lower:
|
|
395
410
|
return True
|