academic-refchecker 1.2.44__tar.gz → 1.2.45__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {academic_refchecker-1.2.44/src/academic_refchecker.egg-info → academic_refchecker-1.2.45}/PKG-INFO +25 -9
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/README.md +24 -8
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/__version__.py +1 -1
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45/src/academic_refchecker.egg-info}/PKG-INFO +25 -9
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/checkers/crossref.py +6 -7
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/checkers/github_checker.py +13 -3
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/checkers/openalex.py +6 -7
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/checkers/openreview_checker.py +7 -4
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/checkers/semantic_scholar.py +9 -8
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/checkers/webpage_checker.py +7 -2
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/core/parallel_processor.py +5 -2
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/core/refchecker.py +52 -43
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/utils/doi_utils.py +6 -12
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/utils/error_utils.py +145 -3
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/utils/text_utils.py +114 -8
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/LICENSE +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/MANIFEST.in +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/pyproject.toml +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/requirements.txt +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/scripts/download_db.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/scripts/run_tests.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/scripts/start_vllm_server.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/setup.cfg +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/__init__.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/academic_refchecker.egg-info/SOURCES.txt +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/academic_refchecker.egg-info/dependency_links.txt +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/academic_refchecker.egg-info/entry_points.txt +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/academic_refchecker.egg-info/requires.txt +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/academic_refchecker.egg-info/top_level.txt +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/checkers/__init__.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/checkers/enhanced_hybrid_checker.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/checkers/local_semantic_scholar.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/config/__init__.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/config/logging.conf +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/config/settings.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/core/__init__.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/core/db_connection_pool.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/database/__init__.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/database/download_semantic_scholar_db.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/llm/__init__.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/llm/base.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/llm/providers.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/scripts/__init__.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/scripts/start_vllm_server.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/services/__init__.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/services/pdf_processor.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/utils/__init__.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/utils/arxiv_utils.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/utils/author_utils.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/utils/biblatex_parser.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/utils/bibliography_utils.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/utils/bibtex_parser.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/utils/config_validator.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/utils/db_utils.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/utils/mock_objects.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/utils/unicode_utils.py +0 -0
- {academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/utils/url_utils.py +0 -0
{academic_refchecker-1.2.44/src/academic_refchecker.egg-info → academic_refchecker-1.2.45}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: academic-refchecker
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.45
|
|
4
4
|
Summary: A comprehensive tool for validating reference accuracy in academic papers
|
|
5
5
|
Author-email: Mark Russinovich <markrussinovich@hotmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -78,7 +78,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
|
|
|
78
78
|
Verified URL: https://www.semanticscholar.org/paper/5f4ac1ac7ca4b17d3db1b52d9aafd9e8b26c0d7
|
|
79
79
|
ArXiv URL: https://arxiv.org/abs/1610.10099
|
|
80
80
|
DOI URL: https://doi.org/10.48550/arxiv.1610.10099
|
|
81
|
-
⚠️ Warning: Year mismatch:
|
|
81
|
+
⚠️ Warning: Year mismatch:
|
|
82
|
+
cited: '2017'
|
|
83
|
+
actual: '2016'
|
|
82
84
|
|
|
83
85
|
[2/45] Effective approaches to attention-based neural machine translation
|
|
84
86
|
Minh-Thang Luong, Hieu Pham, Christopher D. Manning
|
|
@@ -87,7 +89,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
|
|
|
87
89
|
Verified URL: https://www.semanticscholar.org/paper/93499a7c7f699b6630a86fad964536f9423bb6d0
|
|
88
90
|
ArXiv URL: https://arxiv.org/abs/1508.04025
|
|
89
91
|
DOI URL: https://doi.org/10.18653/v1/d15-1166
|
|
90
|
-
❌ Error: First author mismatch:
|
|
92
|
+
❌ Error: First author mismatch:
|
|
93
|
+
cited: 'Minh-Thang Luong'
|
|
94
|
+
actual: 'Thang Luong'
|
|
91
95
|
|
|
92
96
|
[3/45] Deep Residual Learning for Image Recognition
|
|
93
97
|
Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
|
|
@@ -98,7 +102,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
|
|
|
98
102
|
Verified URL: https://www.semanticscholar.org/paper/2c03df8b48bf3fa39054345bafabfeff15bfd11d
|
|
99
103
|
ArXiv URL: https://arxiv.org/abs/1512.03385
|
|
100
104
|
DOI URL: https://doi.org/10.1109/CVPR.2016.90
|
|
101
|
-
❌ Error: DOI mismatch:
|
|
105
|
+
❌ Error: DOI mismatch:
|
|
106
|
+
cited: '10.1109/CVPR.2016.91'
|
|
107
|
+
actual: '10.1109/CVPR.2016.90'
|
|
102
108
|
|
|
103
109
|
============================================================
|
|
104
110
|
📋 SUMMARY
|
|
@@ -382,7 +388,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
382
388
|
Verified URL: https://www.semanticscholar.org/paper/a1b2c3d4e5f6789012345678901234567890abcd
|
|
383
389
|
ArXiv URL: https://arxiv.org/abs/2312.02119
|
|
384
390
|
DOI URL: https://doi.org/10.48550/arxiv.2312.02119
|
|
385
|
-
❌ Error: First author mismatch:
|
|
391
|
+
❌ Error: First author mismatch:
|
|
392
|
+
cited: 'T. Xie'
|
|
393
|
+
actual: 'Zhao Xu'
|
|
386
394
|
```
|
|
387
395
|
- `title`: Title discrepancies
|
|
388
396
|
```
|
|
@@ -392,7 +400,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
392
400
|
Verified URL: https://www.semanticscholar.org/paper/df2b0e26d0599ce3e70df8a9da02e51594e0e992
|
|
393
401
|
ArXiv URL: https://arxiv.org/abs/1810.04805
|
|
394
402
|
DOI URL: https://doi.org/10.18653/v1/n19-1423
|
|
395
|
-
❌ Error: Title mismatch:
|
|
403
|
+
❌ Error: Title mismatch:
|
|
404
|
+
cited: 'BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding'
|
|
405
|
+
actual: 'BERT: Pre-training of Deep Bidirectional Transformers for Language Comprehension'
|
|
396
406
|
```
|
|
397
407
|
- `arxiv_id`: Incorrect URLs or arXiv IDs
|
|
398
408
|
```
|
|
@@ -415,7 +425,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
415
425
|
Verified URL: https://www.semanticscholar.org/paper/204e3073870fae3d05bcbc2f6a8e263d9b72e776
|
|
416
426
|
ArXiv URL: https://arxiv.org/abs/1706.03762
|
|
417
427
|
DOI URL: https://doi.org/10.48550/arXiv.1706.03762
|
|
418
|
-
❌ Error: DOI mismatch:
|
|
428
|
+
❌ Error: DOI mismatch:
|
|
429
|
+
cited: '10.5555/3295222.3295349'
|
|
430
|
+
actual: '10.48550/arXiv.1706.03762'
|
|
419
431
|
```
|
|
420
432
|
|
|
421
433
|
- **⚠️ Warnings**: Minor issues that may need attention
|
|
@@ -428,7 +440,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
428
440
|
Verified URL: https://www.semanticscholar.org/paper/f1a2b3c4d5e6f7890123456789012345678901ab
|
|
429
441
|
ArXiv URL: https://arxiv.org/abs/2310.03684
|
|
430
442
|
DOI URL: https://doi.org/10.48550/arxiv.2310.03684
|
|
431
|
-
⚠️ Warning: Year mismatch:
|
|
443
|
+
⚠️ Warning: Year mismatch:
|
|
444
|
+
cited: '2024'
|
|
445
|
+
actual: '2023'
|
|
432
446
|
```
|
|
433
447
|
- `venue`: Venue format variations
|
|
434
448
|
```
|
|
@@ -439,7 +453,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
439
453
|
Verified URL: https://www.semanticscholar.org/paper/c1d2e3f4a5b6c7d8e9f0123456789012345678ab
|
|
440
454
|
ArXiv URL: https://arxiv.org/abs/2403.02151
|
|
441
455
|
DOI URL: https://doi.org/10.48550/arxiv.2403.02151
|
|
442
|
-
⚠️ Warning: Venue mismatch:
|
|
456
|
+
⚠️ Warning: Venue mismatch:
|
|
457
|
+
cited: 'arXiv, 2024'
|
|
458
|
+
actual: 'Neural Information Processing Systems'
|
|
443
459
|
```
|
|
444
460
|
|
|
445
461
|
- **❓ Unverified**: References that couldn't be verified with any of the checker APIs
|
|
@@ -17,7 +17,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
|
|
|
17
17
|
Verified URL: https://www.semanticscholar.org/paper/5f4ac1ac7ca4b17d3db1b52d9aafd9e8b26c0d7
|
|
18
18
|
ArXiv URL: https://arxiv.org/abs/1610.10099
|
|
19
19
|
DOI URL: https://doi.org/10.48550/arxiv.1610.10099
|
|
20
|
-
⚠️ Warning: Year mismatch:
|
|
20
|
+
⚠️ Warning: Year mismatch:
|
|
21
|
+
cited: '2017'
|
|
22
|
+
actual: '2016'
|
|
21
23
|
|
|
22
24
|
[2/45] Effective approaches to attention-based neural machine translation
|
|
23
25
|
Minh-Thang Luong, Hieu Pham, Christopher D. Manning
|
|
@@ -26,7 +28,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
|
|
|
26
28
|
Verified URL: https://www.semanticscholar.org/paper/93499a7c7f699b6630a86fad964536f9423bb6d0
|
|
27
29
|
ArXiv URL: https://arxiv.org/abs/1508.04025
|
|
28
30
|
DOI URL: https://doi.org/10.18653/v1/d15-1166
|
|
29
|
-
❌ Error: First author mismatch:
|
|
31
|
+
❌ Error: First author mismatch:
|
|
32
|
+
cited: 'Minh-Thang Luong'
|
|
33
|
+
actual: 'Thang Luong'
|
|
30
34
|
|
|
31
35
|
[3/45] Deep Residual Learning for Image Recognition
|
|
32
36
|
Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
|
|
@@ -37,7 +41,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
|
|
|
37
41
|
Verified URL: https://www.semanticscholar.org/paper/2c03df8b48bf3fa39054345bafabfeff15bfd11d
|
|
38
42
|
ArXiv URL: https://arxiv.org/abs/1512.03385
|
|
39
43
|
DOI URL: https://doi.org/10.1109/CVPR.2016.90
|
|
40
|
-
❌ Error: DOI mismatch:
|
|
44
|
+
❌ Error: DOI mismatch:
|
|
45
|
+
cited: '10.1109/CVPR.2016.91'
|
|
46
|
+
actual: '10.1109/CVPR.2016.90'
|
|
41
47
|
|
|
42
48
|
============================================================
|
|
43
49
|
📋 SUMMARY
|
|
@@ -321,7 +327,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
321
327
|
Verified URL: https://www.semanticscholar.org/paper/a1b2c3d4e5f6789012345678901234567890abcd
|
|
322
328
|
ArXiv URL: https://arxiv.org/abs/2312.02119
|
|
323
329
|
DOI URL: https://doi.org/10.48550/arxiv.2312.02119
|
|
324
|
-
❌ Error: First author mismatch:
|
|
330
|
+
❌ Error: First author mismatch:
|
|
331
|
+
cited: 'T. Xie'
|
|
332
|
+
actual: 'Zhao Xu'
|
|
325
333
|
```
|
|
326
334
|
- `title`: Title discrepancies
|
|
327
335
|
```
|
|
@@ -331,7 +339,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
331
339
|
Verified URL: https://www.semanticscholar.org/paper/df2b0e26d0599ce3e70df8a9da02e51594e0e992
|
|
332
340
|
ArXiv URL: https://arxiv.org/abs/1810.04805
|
|
333
341
|
DOI URL: https://doi.org/10.18653/v1/n19-1423
|
|
334
|
-
❌ Error: Title mismatch:
|
|
342
|
+
❌ Error: Title mismatch:
|
|
343
|
+
cited: 'BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding'
|
|
344
|
+
actual: 'BERT: Pre-training of Deep Bidirectional Transformers for Language Comprehension'
|
|
335
345
|
```
|
|
336
346
|
- `arxiv_id`: Incorrect URLs or arXiv IDs
|
|
337
347
|
```
|
|
@@ -354,7 +364,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
354
364
|
Verified URL: https://www.semanticscholar.org/paper/204e3073870fae3d05bcbc2f6a8e263d9b72e776
|
|
355
365
|
ArXiv URL: https://arxiv.org/abs/1706.03762
|
|
356
366
|
DOI URL: https://doi.org/10.48550/arXiv.1706.03762
|
|
357
|
-
❌ Error: DOI mismatch:
|
|
367
|
+
❌ Error: DOI mismatch:
|
|
368
|
+
cited: '10.5555/3295222.3295349'
|
|
369
|
+
actual: '10.48550/arXiv.1706.03762'
|
|
358
370
|
```
|
|
359
371
|
|
|
360
372
|
- **⚠️ Warnings**: Minor issues that may need attention
|
|
@@ -367,7 +379,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
367
379
|
Verified URL: https://www.semanticscholar.org/paper/f1a2b3c4d5e6f7890123456789012345678901ab
|
|
368
380
|
ArXiv URL: https://arxiv.org/abs/2310.03684
|
|
369
381
|
DOI URL: https://doi.org/10.48550/arxiv.2310.03684
|
|
370
|
-
⚠️ Warning: Year mismatch:
|
|
382
|
+
⚠️ Warning: Year mismatch:
|
|
383
|
+
cited: '2024'
|
|
384
|
+
actual: '2023'
|
|
371
385
|
```
|
|
372
386
|
- `venue`: Venue format variations
|
|
373
387
|
```
|
|
@@ -378,7 +392,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
378
392
|
Verified URL: https://www.semanticscholar.org/paper/c1d2e3f4a5b6c7d8e9f0123456789012345678ab
|
|
379
393
|
ArXiv URL: https://arxiv.org/abs/2403.02151
|
|
380
394
|
DOI URL: https://doi.org/10.48550/arxiv.2403.02151
|
|
381
|
-
⚠️ Warning: Venue mismatch:
|
|
395
|
+
⚠️ Warning: Venue mismatch:
|
|
396
|
+
cited: 'arXiv, 2024'
|
|
397
|
+
actual: 'Neural Information Processing Systems'
|
|
382
398
|
```
|
|
383
399
|
|
|
384
400
|
- **❓ Unverified**: References that couldn't be verified with any of the checker APIs
|
{academic_refchecker-1.2.44 → academic_refchecker-1.2.45/src/academic_refchecker.egg-info}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: academic-refchecker
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.45
|
|
4
4
|
Summary: A comprehensive tool for validating reference accuracy in academic papers
|
|
5
5
|
Author-email: Mark Russinovich <markrussinovich@hotmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -78,7 +78,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
|
|
|
78
78
|
Verified URL: https://www.semanticscholar.org/paper/5f4ac1ac7ca4b17d3db1b52d9aafd9e8b26c0d7
|
|
79
79
|
ArXiv URL: https://arxiv.org/abs/1610.10099
|
|
80
80
|
DOI URL: https://doi.org/10.48550/arxiv.1610.10099
|
|
81
|
-
⚠️ Warning: Year mismatch:
|
|
81
|
+
⚠️ Warning: Year mismatch:
|
|
82
|
+
cited: '2017'
|
|
83
|
+
actual: '2016'
|
|
82
84
|
|
|
83
85
|
[2/45] Effective approaches to attention-based neural machine translation
|
|
84
86
|
Minh-Thang Luong, Hieu Pham, Christopher D. Manning
|
|
@@ -87,7 +89,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
|
|
|
87
89
|
Verified URL: https://www.semanticscholar.org/paper/93499a7c7f699b6630a86fad964536f9423bb6d0
|
|
88
90
|
ArXiv URL: https://arxiv.org/abs/1508.04025
|
|
89
91
|
DOI URL: https://doi.org/10.18653/v1/d15-1166
|
|
90
|
-
❌ Error: First author mismatch:
|
|
92
|
+
❌ Error: First author mismatch:
|
|
93
|
+
cited: 'Minh-Thang Luong'
|
|
94
|
+
actual: 'Thang Luong'
|
|
91
95
|
|
|
92
96
|
[3/45] Deep Residual Learning for Image Recognition
|
|
93
97
|
Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
|
|
@@ -98,7 +102,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
|
|
|
98
102
|
Verified URL: https://www.semanticscholar.org/paper/2c03df8b48bf3fa39054345bafabfeff15bfd11d
|
|
99
103
|
ArXiv URL: https://arxiv.org/abs/1512.03385
|
|
100
104
|
DOI URL: https://doi.org/10.1109/CVPR.2016.90
|
|
101
|
-
❌ Error: DOI mismatch:
|
|
105
|
+
❌ Error: DOI mismatch:
|
|
106
|
+
cited: '10.1109/CVPR.2016.91'
|
|
107
|
+
actual: '10.1109/CVPR.2016.90'
|
|
102
108
|
|
|
103
109
|
============================================================
|
|
104
110
|
📋 SUMMARY
|
|
@@ -382,7 +388,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
382
388
|
Verified URL: https://www.semanticscholar.org/paper/a1b2c3d4e5f6789012345678901234567890abcd
|
|
383
389
|
ArXiv URL: https://arxiv.org/abs/2312.02119
|
|
384
390
|
DOI URL: https://doi.org/10.48550/arxiv.2312.02119
|
|
385
|
-
❌ Error: First author mismatch:
|
|
391
|
+
❌ Error: First author mismatch:
|
|
392
|
+
cited: 'T. Xie'
|
|
393
|
+
actual: 'Zhao Xu'
|
|
386
394
|
```
|
|
387
395
|
- `title`: Title discrepancies
|
|
388
396
|
```
|
|
@@ -392,7 +400,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
392
400
|
Verified URL: https://www.semanticscholar.org/paper/df2b0e26d0599ce3e70df8a9da02e51594e0e992
|
|
393
401
|
ArXiv URL: https://arxiv.org/abs/1810.04805
|
|
394
402
|
DOI URL: https://doi.org/10.18653/v1/n19-1423
|
|
395
|
-
❌ Error: Title mismatch:
|
|
403
|
+
❌ Error: Title mismatch:
|
|
404
|
+
cited: 'BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding'
|
|
405
|
+
actual: 'BERT: Pre-training of Deep Bidirectional Transformers for Language Comprehension'
|
|
396
406
|
```
|
|
397
407
|
- `arxiv_id`: Incorrect URLs or arXiv IDs
|
|
398
408
|
```
|
|
@@ -415,7 +425,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
415
425
|
Verified URL: https://www.semanticscholar.org/paper/204e3073870fae3d05bcbc2f6a8e263d9b72e776
|
|
416
426
|
ArXiv URL: https://arxiv.org/abs/1706.03762
|
|
417
427
|
DOI URL: https://doi.org/10.48550/arXiv.1706.03762
|
|
418
|
-
❌ Error: DOI mismatch:
|
|
428
|
+
❌ Error: DOI mismatch:
|
|
429
|
+
cited: '10.5555/3295222.3295349'
|
|
430
|
+
actual: '10.48550/arXiv.1706.03762'
|
|
419
431
|
```
|
|
420
432
|
|
|
421
433
|
- **⚠️ Warnings**: Minor issues that may need attention
|
|
@@ -428,7 +440,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
428
440
|
Verified URL: https://www.semanticscholar.org/paper/f1a2b3c4d5e6f7890123456789012345678901ab
|
|
429
441
|
ArXiv URL: https://arxiv.org/abs/2310.03684
|
|
430
442
|
DOI URL: https://doi.org/10.48550/arxiv.2310.03684
|
|
431
|
-
⚠️ Warning: Year mismatch:
|
|
443
|
+
⚠️ Warning: Year mismatch:
|
|
444
|
+
cited: '2024'
|
|
445
|
+
actual: '2023'
|
|
432
446
|
```
|
|
433
447
|
- `venue`: Venue format variations
|
|
434
448
|
```
|
|
@@ -439,7 +453,9 @@ This enhanced URL display helps users access multiple authoritative sources for
|
|
|
439
453
|
Verified URL: https://www.semanticscholar.org/paper/c1d2e3f4a5b6c7d8e9f0123456789012345678ab
|
|
440
454
|
ArXiv URL: https://arxiv.org/abs/2403.02151
|
|
441
455
|
DOI URL: https://doi.org/10.48550/arxiv.2403.02151
|
|
442
|
-
⚠️ Warning: Venue mismatch:
|
|
456
|
+
⚠️ Warning: Venue mismatch:
|
|
457
|
+
cited: 'arXiv, 2024'
|
|
458
|
+
actual: 'Neural Information Processing Systems'
|
|
443
459
|
```
|
|
444
460
|
|
|
445
461
|
- **❓ Unverified**: References that couldn't be verified with any of the checker APIs
|
|
@@ -31,6 +31,7 @@ import re
|
|
|
31
31
|
from typing import Dict, List, Tuple, Optional, Any, Union
|
|
32
32
|
from urllib.parse import quote_plus
|
|
33
33
|
from utils.text_utils import normalize_text, clean_title_basic, find_best_match, is_name_match, compare_authors, clean_title_for_search
|
|
34
|
+
from utils.error_utils import format_year_mismatch, format_doi_mismatch
|
|
34
35
|
from config.settings import get_config
|
|
35
36
|
|
|
36
37
|
# Set up logging
|
|
@@ -478,21 +479,19 @@ class CrossRefReferenceChecker:
|
|
|
478
479
|
if year and work_year and year != work_year:
|
|
479
480
|
errors.append({
|
|
480
481
|
'warning_type': 'year',
|
|
481
|
-
'warning_details':
|
|
482
|
+
'warning_details': format_year_mismatch(year, work_year),
|
|
482
483
|
'ref_year_correct': work_year
|
|
483
484
|
})
|
|
484
485
|
|
|
485
486
|
# Verify DOI
|
|
486
487
|
work_doi = work_data.get('DOI')
|
|
487
488
|
if doi and work_doi:
|
|
488
|
-
#
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
if cited_doi_clean.lower() != work_doi_clean.lower():
|
|
489
|
+
# Compare DOIs using the proper comparison function
|
|
490
|
+
from utils.doi_utils import compare_dois
|
|
491
|
+
if not compare_dois(doi, work_doi):
|
|
493
492
|
errors.append({
|
|
494
493
|
'error_type': 'doi',
|
|
495
|
-
'error_details':
|
|
494
|
+
'error_details': format_doi_mismatch(doi, work_doi),
|
|
496
495
|
'ref_doi_correct': work_doi
|
|
497
496
|
})
|
|
498
497
|
|
|
@@ -169,9 +169,14 @@ class GitHubChecker:
|
|
|
169
169
|
if cited_title:
|
|
170
170
|
title_match = self._check_title_match(cited_title, actual_name, actual_description)
|
|
171
171
|
if not title_match:
|
|
172
|
+
from utils.error_utils import format_title_mismatch
|
|
173
|
+
details = format_title_mismatch(cited_title, actual_name)
|
|
174
|
+
if actual_description:
|
|
175
|
+
snippet = actual_description[:100] + ('...' if len(actual_description) > 100 else '')
|
|
176
|
+
details += f" ({snippet})"
|
|
172
177
|
errors.append({
|
|
173
178
|
"warning_type": "title",
|
|
174
|
-
"warning_details":
|
|
179
|
+
"warning_details": details
|
|
175
180
|
})
|
|
176
181
|
|
|
177
182
|
# Verify authors
|
|
@@ -180,9 +185,13 @@ class GitHubChecker:
|
|
|
180
185
|
author_str = ', '.join(cited_authors) if isinstance(cited_authors, list) else str(cited_authors)
|
|
181
186
|
author_match = self._check_author_match(author_str, actual_owner, actual_owner_name)
|
|
182
187
|
if not author_match:
|
|
188
|
+
from utils.error_utils import format_three_line_mismatch
|
|
189
|
+
left = author_str
|
|
190
|
+
right = f"{actual_owner} ({actual_owner_name})" if actual_owner_name else actual_owner
|
|
191
|
+
details = format_three_line_mismatch("Author mismatch", left, right)
|
|
183
192
|
errors.append({
|
|
184
193
|
"warning_type": "author",
|
|
185
|
-
"warning_details":
|
|
194
|
+
"warning_details": details
|
|
186
195
|
})
|
|
187
196
|
|
|
188
197
|
# Verify year
|
|
@@ -191,9 +200,10 @@ class GitHubChecker:
|
|
|
191
200
|
try:
|
|
192
201
|
cited_year_int = int(cited_year)
|
|
193
202
|
if cited_year_int < creation_year:
|
|
203
|
+
from utils.error_utils import format_year_mismatch
|
|
194
204
|
errors.append({
|
|
195
205
|
"warning_type": "year",
|
|
196
|
-
"warning_details":
|
|
206
|
+
"warning_details": format_year_mismatch(cited_year, creation_year),
|
|
197
207
|
"ref_year_correct": str(creation_year)
|
|
198
208
|
})
|
|
199
209
|
except (ValueError, TypeError):
|
|
@@ -33,6 +33,7 @@ import re
|
|
|
33
33
|
from typing import Dict, List, Tuple, Optional, Any, Union
|
|
34
34
|
from urllib.parse import quote_plus
|
|
35
35
|
from utils.text_utils import normalize_text, clean_title_basic, find_best_match, is_name_match, compare_authors, clean_title_for_search
|
|
36
|
+
from utils.error_utils import format_year_mismatch, format_doi_mismatch
|
|
36
37
|
from config.settings import get_config
|
|
37
38
|
|
|
38
39
|
# Set up logging
|
|
@@ -448,7 +449,7 @@ class OpenAlexReferenceChecker:
|
|
|
448
449
|
if year and work_year and year != work_year:
|
|
449
450
|
errors.append({
|
|
450
451
|
'warning_type': 'year',
|
|
451
|
-
'warning_details':
|
|
452
|
+
'warning_details': format_year_mismatch(year, work_year),
|
|
452
453
|
'ref_year_correct': work_year
|
|
453
454
|
})
|
|
454
455
|
|
|
@@ -458,14 +459,12 @@ class OpenAlexReferenceChecker:
|
|
|
458
459
|
work_doi = work_data['ids']['doi']
|
|
459
460
|
|
|
460
461
|
if doi and work_doi:
|
|
461
|
-
#
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
if cited_doi_clean.lower() != work_doi_clean.lower():
|
|
462
|
+
# Compare DOIs using the proper comparison function
|
|
463
|
+
from utils.doi_utils import compare_dois
|
|
464
|
+
if not compare_dois(doi, work_doi):
|
|
466
465
|
errors.append({
|
|
467
466
|
'error_type': 'doi',
|
|
468
|
-
'error_details':
|
|
467
|
+
'error_details': format_doi_mismatch(doi, work_doi),
|
|
469
468
|
'ref_doi_correct': work_doi
|
|
470
469
|
})
|
|
471
470
|
|
{academic_refchecker-1.2.44 → academic_refchecker-1.2.45}/src/checkers/openreview_checker.py
RENAMED
|
@@ -425,9 +425,11 @@ class OpenReviewReferenceChecker:
|
|
|
425
425
|
if cited_title and paper_title:
|
|
426
426
|
similarity = calculate_title_similarity(cited_title, paper_title)
|
|
427
427
|
if similarity < 0.7: # Using a reasonable threshold
|
|
428
|
+
from utils.error_utils import format_title_mismatch
|
|
429
|
+
details = format_title_mismatch(cited_title, paper_title) + f" (similarity: {similarity:.2f})"
|
|
428
430
|
errors.append({
|
|
429
431
|
"warning_type": "title",
|
|
430
|
-
"warning_details":
|
|
432
|
+
"warning_details": details
|
|
431
433
|
})
|
|
432
434
|
|
|
433
435
|
# Check authors
|
|
@@ -460,9 +462,10 @@ class OpenReviewReferenceChecker:
|
|
|
460
462
|
|
|
461
463
|
is_different, year_message = is_year_substantially_different(cited_year_int, paper_year_int)
|
|
462
464
|
if is_different and year_message:
|
|
465
|
+
from utils.error_utils import format_year_mismatch
|
|
463
466
|
errors.append({
|
|
464
467
|
"warning_type": "year",
|
|
465
|
-
"warning_details":
|
|
468
|
+
"warning_details": format_year_mismatch(cited_year_int, paper_year_int)
|
|
466
469
|
})
|
|
467
470
|
except (ValueError, TypeError):
|
|
468
471
|
pass # Skip year validation if conversion fails
|
|
@@ -473,10 +476,10 @@ class OpenReviewReferenceChecker:
|
|
|
473
476
|
|
|
474
477
|
if cited_venue and paper_venue:
|
|
475
478
|
if are_venues_substantially_different(cited_venue, paper_venue):
|
|
476
|
-
from utils.error_utils import
|
|
479
|
+
from utils.error_utils import format_venue_mismatch
|
|
477
480
|
errors.append({
|
|
478
481
|
"warning_type": "venue",
|
|
479
|
-
"warning_details":
|
|
482
|
+
"warning_details": format_venue_mismatch(cited_venue, paper_venue)
|
|
480
483
|
})
|
|
481
484
|
|
|
482
485
|
# Create verified data structure
|
|
@@ -29,6 +29,7 @@ import logging
|
|
|
29
29
|
import re
|
|
30
30
|
from typing import Dict, List, Tuple, Optional, Any, Union
|
|
31
31
|
from utils.text_utils import normalize_text, clean_title_basic, find_best_match, is_name_match, are_venues_substantially_different, calculate_title_similarity, compare_authors, clean_title_for_search
|
|
32
|
+
from utils.error_utils import format_title_mismatch
|
|
32
33
|
from config.settings import get_config
|
|
33
34
|
|
|
34
35
|
# Set up logging
|
|
@@ -471,7 +472,7 @@ class NonArxivReferenceChecker:
|
|
|
471
472
|
if found_title and title_similarity < SIMILARITY_THRESHOLD:
|
|
472
473
|
errors.append({
|
|
473
474
|
'error_type': 'title',
|
|
474
|
-
'error_details':
|
|
475
|
+
'error_details': format_title_mismatch(title, found_title),
|
|
475
476
|
'ref_title_correct': paper_data.get('title', '')
|
|
476
477
|
})
|
|
477
478
|
|
|
@@ -525,9 +526,10 @@ class NonArxivReferenceChecker:
|
|
|
525
526
|
is_different, warning_message = is_year_substantially_different(year, paper_year, context)
|
|
526
527
|
|
|
527
528
|
if is_different and warning_message:
|
|
529
|
+
from utils.error_utils import format_year_mismatch
|
|
528
530
|
errors.append({
|
|
529
531
|
'warning_type': 'year',
|
|
530
|
-
'warning_details':
|
|
532
|
+
'warning_details': format_year_mismatch(year, paper_year),
|
|
531
533
|
'ref_year_correct': paper_year
|
|
532
534
|
})
|
|
533
535
|
|
|
@@ -591,14 +593,13 @@ class NonArxivReferenceChecker:
|
|
|
591
593
|
if external_ids and 'DOI' in external_ids:
|
|
592
594
|
paper_doi = external_ids['DOI']
|
|
593
595
|
|
|
594
|
-
# Compare DOIs
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
if cited_doi_clean and paper_doi_clean and cited_doi_clean.lower() != paper_doi_clean.lower():
|
|
596
|
+
# Compare DOIs using the proper comparison function
|
|
597
|
+
from utils.doi_utils import compare_dois
|
|
598
|
+
if doi and paper_doi and not compare_dois(doi, paper_doi):
|
|
599
|
+
from utils.error_utils import format_doi_mismatch
|
|
599
600
|
errors.append({
|
|
600
601
|
'error_type': 'doi',
|
|
601
|
-
'error_details':
|
|
602
|
+
'error_details': format_doi_mismatch(doi, paper_doi),
|
|
602
603
|
'ref_doi_correct': paper_doi
|
|
603
604
|
})
|
|
604
605
|
|
|
@@ -182,9 +182,10 @@ class WebPageChecker:
|
|
|
182
182
|
# Check title match
|
|
183
183
|
if cited_title and page_title:
|
|
184
184
|
if not self._check_title_match(cited_title, page_title, page_description):
|
|
185
|
+
from utils.error_utils import format_title_mismatch
|
|
185
186
|
errors.append({
|
|
186
187
|
"warning_type": "title",
|
|
187
|
-
"warning_details":
|
|
188
|
+
"warning_details": format_title_mismatch(cited_title, page_title)
|
|
188
189
|
})
|
|
189
190
|
|
|
190
191
|
# Check if this is a documentation page for the cited topic
|
|
@@ -201,9 +202,13 @@ class WebPageChecker:
|
|
|
201
202
|
if cited_authors:
|
|
202
203
|
author_str = ', '.join(cited_authors) if isinstance(cited_authors, list) else str(cited_authors)
|
|
203
204
|
if not self._check_author_match(author_str, site_info, web_url):
|
|
205
|
+
from utils.error_utils import format_three_line_mismatch
|
|
206
|
+
left = author_str
|
|
207
|
+
right = site_info.get('organization', 'unknown')
|
|
208
|
+
details = format_three_line_mismatch("Author/organization mismatch", left, right)
|
|
204
209
|
errors.append({
|
|
205
210
|
"warning_type": "author",
|
|
206
|
-
"warning_details":
|
|
211
|
+
"warning_details": details
|
|
207
212
|
})
|
|
208
213
|
|
|
209
214
|
logger.debug(f"Web page verification completed for: {web_url}")
|
|
@@ -352,12 +352,15 @@ class ParallelReferenceProcessor:
|
|
|
352
352
|
error_type = error.get('error_type') or error.get('warning_type')
|
|
353
353
|
error_details = error.get('error_details') or error.get('warning_details', 'Unknown error')
|
|
354
354
|
|
|
355
|
+
from utils.error_utils import print_labeled_multiline
|
|
356
|
+
|
|
355
357
|
if error_type == 'arxiv_id':
|
|
358
|
+
# Keep existing style for arXiv ID errors
|
|
356
359
|
print(f" ❌ {error_details}")
|
|
357
360
|
elif 'error_type' in error:
|
|
358
|
-
|
|
361
|
+
print_labeled_multiline("❌ Error", error_details)
|
|
359
362
|
else:
|
|
360
|
-
|
|
363
|
+
print_labeled_multiline("⚠️ Warning", error_details)
|
|
361
364
|
|
|
362
365
|
# Show timing info for slow references
|
|
363
366
|
if result.processing_time > 5.0:
|