academic-refchecker 1.2.43__tar.gz → 1.2.45__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {academic_refchecker-1.2.43/src/academic_refchecker.egg-info → academic_refchecker-1.2.45}/PKG-INFO +25 -9
  2. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/README.md +24 -8
  3. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/__version__.py +1 -1
  4. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45/src/academic_refchecker.egg-info}/PKG-INFO +25 -9
  5. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/checkers/crossref.py +6 -7
  6. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/checkers/github_checker.py +13 -3
  7. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/checkers/openalex.py +6 -7
  8. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/checkers/openreview_checker.py +7 -4
  9. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/checkers/semantic_scholar.py +9 -8
  10. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/checkers/webpage_checker.py +7 -2
  11. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/core/parallel_processor.py +5 -2
  12. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/core/refchecker.py +53 -44
  13. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/utils/doi_utils.py +6 -12
  14. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/utils/error_utils.py +145 -3
  15. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/utils/text_utils.py +115 -9
  16. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/utils/url_utils.py +17 -0
  17. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/LICENSE +0 -0
  18. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/MANIFEST.in +0 -0
  19. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/pyproject.toml +0 -0
  20. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/requirements.txt +0 -0
  21. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/scripts/download_db.py +0 -0
  22. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/scripts/run_tests.py +0 -0
  23. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/scripts/start_vllm_server.py +0 -0
  24. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/setup.cfg +0 -0
  25. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/__init__.py +0 -0
  26. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/academic_refchecker.egg-info/SOURCES.txt +0 -0
  27. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/academic_refchecker.egg-info/dependency_links.txt +0 -0
  28. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/academic_refchecker.egg-info/entry_points.txt +0 -0
  29. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/academic_refchecker.egg-info/requires.txt +0 -0
  30. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/academic_refchecker.egg-info/top_level.txt +0 -0
  31. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/checkers/__init__.py +0 -0
  32. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/checkers/enhanced_hybrid_checker.py +0 -0
  33. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/checkers/local_semantic_scholar.py +0 -0
  34. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/config/__init__.py +0 -0
  35. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/config/logging.conf +0 -0
  36. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/config/settings.py +0 -0
  37. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/core/__init__.py +0 -0
  38. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/core/db_connection_pool.py +0 -0
  39. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/database/__init__.py +0 -0
  40. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/database/download_semantic_scholar_db.py +0 -0
  41. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/llm/__init__.py +0 -0
  42. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/llm/base.py +0 -0
  43. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/llm/providers.py +0 -0
  44. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/scripts/__init__.py +0 -0
  45. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/scripts/start_vllm_server.py +0 -0
  46. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/services/__init__.py +0 -0
  47. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/services/pdf_processor.py +0 -0
  48. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/utils/__init__.py +0 -0
  49. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/utils/arxiv_utils.py +0 -0
  50. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/utils/author_utils.py +0 -0
  51. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/utils/biblatex_parser.py +0 -0
  52. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/utils/bibliography_utils.py +0 -0
  53. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/utils/bibtex_parser.py +0 -0
  54. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/utils/config_validator.py +0 -0
  55. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/utils/db_utils.py +0 -0
  56. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/utils/mock_objects.py +0 -0
  57. {academic_refchecker-1.2.43 → academic_refchecker-1.2.45}/src/utils/unicode_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academic-refchecker
3
- Version: 1.2.43
3
+ Version: 1.2.45
4
4
  Summary: A comprehensive tool for validating reference accuracy in academic papers
5
5
  Author-email: Mark Russinovich <markrussinovich@hotmail.com>
6
6
  License-Expression: MIT
@@ -78,7 +78,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
78
78
  Verified URL: https://www.semanticscholar.org/paper/5f4ac1ac7ca4b17d3db1b52d9aafd9e8b26c0d7
79
79
  ArXiv URL: https://arxiv.org/abs/1610.10099
80
80
  DOI URL: https://doi.org/10.48550/arxiv.1610.10099
81
- ⚠️ Warning: Year mismatch: cited as 2017 but actually 2016
81
+ ⚠️ Warning: Year mismatch:
82
+ cited: '2017'
83
+ actual: '2016'
82
84
 
83
85
  [2/45] Effective approaches to attention-based neural machine translation
84
86
  Minh-Thang Luong, Hieu Pham, Christopher D. Manning
@@ -87,7 +89,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
87
89
  Verified URL: https://www.semanticscholar.org/paper/93499a7c7f699b6630a86fad964536f9423bb6d0
88
90
  ArXiv URL: https://arxiv.org/abs/1508.04025
89
91
  DOI URL: https://doi.org/10.18653/v1/d15-1166
90
- ❌ Error: First author mismatch: 'Minh-Thang Luong' vs 'Thang Luong'
92
+ ❌ Error: First author mismatch:
93
+ cited: 'Minh-Thang Luong'
94
+ actual: 'Thang Luong'
91
95
 
92
96
  [3/45] Deep Residual Learning for Image Recognition
93
97
  Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
@@ -98,7 +102,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
98
102
  Verified URL: https://www.semanticscholar.org/paper/2c03df8b48bf3fa39054345bafabfeff15bfd11d
99
103
  ArXiv URL: https://arxiv.org/abs/1512.03385
100
104
  DOI URL: https://doi.org/10.1109/CVPR.2016.90
101
- ❌ Error: DOI mismatch: cited as '10.1109/CVPR.2016.91' but actually '10.1109/CVPR.2016.90'
105
+ ❌ Error: DOI mismatch:
106
+ cited: '10.1109/CVPR.2016.91'
107
+ actual: '10.1109/CVPR.2016.90'
102
108
 
103
109
  ============================================================
104
110
  📋 SUMMARY
@@ -382,7 +388,9 @@ This enhanced URL display helps users access multiple authoritative sources for
382
388
  Verified URL: https://www.semanticscholar.org/paper/a1b2c3d4e5f6789012345678901234567890abcd
383
389
  ArXiv URL: https://arxiv.org/abs/2312.02119
384
390
  DOI URL: https://doi.org/10.48550/arxiv.2312.02119
385
- ❌ Error: First author mismatch: 'T. Xie' vs 'Zhao Xu'
391
+ ❌ Error: First author mismatch:
392
+ cited: 'T. Xie'
393
+ actual: 'Zhao Xu'
386
394
  ```
387
395
  - `title`: Title discrepancies
388
396
  ```
@@ -392,7 +400,9 @@ This enhanced URL display helps users access multiple authoritative sources for
392
400
  Verified URL: https://www.semanticscholar.org/paper/df2b0e26d0599ce3e70df8a9da02e51594e0e992
393
401
  ArXiv URL: https://arxiv.org/abs/1810.04805
394
402
  DOI URL: https://doi.org/10.18653/v1/n19-1423
395
- ❌ Error: Title mismatch: cited as 'BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding' but actually 'BERT: Pre-training of Deep Bidirectional Transformers for Language Comprehension'
403
+ ❌ Error: Title mismatch:
404
+ cited: 'BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding'
405
+ actual: 'BERT: Pre-training of Deep Bidirectional Transformers for Language Comprehension'
396
406
  ```
397
407
  - `arxiv_id`: Incorrect URLs or arXiv IDs
398
408
  ```
@@ -415,7 +425,9 @@ This enhanced URL display helps users access multiple authoritative sources for
415
425
  Verified URL: https://www.semanticscholar.org/paper/204e3073870fae3d05bcbc2f6a8e263d9b72e776
416
426
  ArXiv URL: https://arxiv.org/abs/1706.03762
417
427
  DOI URL: https://doi.org/10.48550/arXiv.1706.03762
418
- ❌ Error: DOI mismatch: cited as '10.5555/3295222.3295349' but actually '10.48550/arXiv.1706.03762'
428
+ ❌ Error: DOI mismatch:
429
+ cited: '10.5555/3295222.3295349'
430
+ actual: '10.48550/arXiv.1706.03762'
419
431
  ```
420
432
 
421
433
  - **⚠️ Warnings**: Minor issues that may need attention
@@ -428,7 +440,9 @@ This enhanced URL display helps users access multiple authoritative sources for
428
440
  Verified URL: https://www.semanticscholar.org/paper/f1a2b3c4d5e6f7890123456789012345678901ab
429
441
  ArXiv URL: https://arxiv.org/abs/2310.03684
430
442
  DOI URL: https://doi.org/10.48550/arxiv.2310.03684
431
- ⚠️ Warning: Year mismatch: cited as 2024 but actually 2023
443
+ ⚠️ Warning: Year mismatch:
444
+ cited: '2024'
445
+ actual: '2023'
432
446
  ```
433
447
  - `venue`: Venue format variations
434
448
  ```
@@ -439,7 +453,9 @@ This enhanced URL display helps users access multiple authoritative sources for
439
453
  Verified URL: https://www.semanticscholar.org/paper/c1d2e3f4a5b6c7d8e9f0123456789012345678ab
440
454
  ArXiv URL: https://arxiv.org/abs/2403.02151
441
455
  DOI URL: https://doi.org/10.48550/arxiv.2403.02151
442
- ⚠️ Warning: Venue mismatch: cited as 'arXiv, 2024' but actually 'Neural Information Processing Systems'
456
+ ⚠️ Warning: Venue mismatch:
457
+ cited: 'arXiv, 2024'
458
+ actual: 'Neural Information Processing Systems'
443
459
  ```
444
460
 
445
461
  - **❓ Unverified**: References that couldn't be verified with any of the checker APIs
@@ -17,7 +17,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
17
17
  Verified URL: https://www.semanticscholar.org/paper/5f4ac1ac7ca4b17d3db1b52d9aafd9e8b26c0d7
18
18
  ArXiv URL: https://arxiv.org/abs/1610.10099
19
19
  DOI URL: https://doi.org/10.48550/arxiv.1610.10099
20
- ⚠️ Warning: Year mismatch: cited as 2017 but actually 2016
20
+ ⚠️ Warning: Year mismatch:
21
+ cited: '2017'
22
+ actual: '2016'
21
23
 
22
24
  [2/45] Effective approaches to attention-based neural machine translation
23
25
  Minh-Thang Luong, Hieu Pham, Christopher D. Manning
@@ -26,7 +28,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
26
28
  Verified URL: https://www.semanticscholar.org/paper/93499a7c7f699b6630a86fad964536f9423bb6d0
27
29
  ArXiv URL: https://arxiv.org/abs/1508.04025
28
30
  DOI URL: https://doi.org/10.18653/v1/d15-1166
29
- ❌ Error: First author mismatch: 'Minh-Thang Luong' vs 'Thang Luong'
31
+ ❌ Error: First author mismatch:
32
+ cited: 'Minh-Thang Luong'
33
+ actual: 'Thang Luong'
30
34
 
31
35
  [3/45] Deep Residual Learning for Image Recognition
32
36
  Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
@@ -37,7 +41,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
37
41
  Verified URL: https://www.semanticscholar.org/paper/2c03df8b48bf3fa39054345bafabfeff15bfd11d
38
42
  ArXiv URL: https://arxiv.org/abs/1512.03385
39
43
  DOI URL: https://doi.org/10.1109/CVPR.2016.90
40
- ❌ Error: DOI mismatch: cited as '10.1109/CVPR.2016.91' but actually '10.1109/CVPR.2016.90'
44
+ ❌ Error: DOI mismatch:
45
+ cited: '10.1109/CVPR.2016.91'
46
+ actual: '10.1109/CVPR.2016.90'
41
47
 
42
48
  ============================================================
43
49
  📋 SUMMARY
@@ -321,7 +327,9 @@ This enhanced URL display helps users access multiple authoritative sources for
321
327
  Verified URL: https://www.semanticscholar.org/paper/a1b2c3d4e5f6789012345678901234567890abcd
322
328
  ArXiv URL: https://arxiv.org/abs/2312.02119
323
329
  DOI URL: https://doi.org/10.48550/arxiv.2312.02119
324
- ❌ Error: First author mismatch: 'T. Xie' vs 'Zhao Xu'
330
+ ❌ Error: First author mismatch:
331
+ cited: 'T. Xie'
332
+ actual: 'Zhao Xu'
325
333
  ```
326
334
  - `title`: Title discrepancies
327
335
  ```
@@ -331,7 +339,9 @@ This enhanced URL display helps users access multiple authoritative sources for
331
339
  Verified URL: https://www.semanticscholar.org/paper/df2b0e26d0599ce3e70df8a9da02e51594e0e992
332
340
  ArXiv URL: https://arxiv.org/abs/1810.04805
333
341
  DOI URL: https://doi.org/10.18653/v1/n19-1423
334
- ❌ Error: Title mismatch: cited as 'BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding' but actually 'BERT: Pre-training of Deep Bidirectional Transformers for Language Comprehension'
342
+ ❌ Error: Title mismatch:
343
+ cited: 'BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding'
344
+ actual: 'BERT: Pre-training of Deep Bidirectional Transformers for Language Comprehension'
335
345
  ```
336
346
  - `arxiv_id`: Incorrect URLs or arXiv IDs
337
347
  ```
@@ -354,7 +364,9 @@ This enhanced URL display helps users access multiple authoritative sources for
354
364
  Verified URL: https://www.semanticscholar.org/paper/204e3073870fae3d05bcbc2f6a8e263d9b72e776
355
365
  ArXiv URL: https://arxiv.org/abs/1706.03762
356
366
  DOI URL: https://doi.org/10.48550/arXiv.1706.03762
357
- ❌ Error: DOI mismatch: cited as '10.5555/3295222.3295349' but actually '10.48550/arXiv.1706.03762'
367
+ ❌ Error: DOI mismatch:
368
+ cited: '10.5555/3295222.3295349'
369
+ actual: '10.48550/arXiv.1706.03762'
358
370
  ```
359
371
 
360
372
  - **⚠️ Warnings**: Minor issues that may need attention
@@ -367,7 +379,9 @@ This enhanced URL display helps users access multiple authoritative sources for
367
379
  Verified URL: https://www.semanticscholar.org/paper/f1a2b3c4d5e6f7890123456789012345678901ab
368
380
  ArXiv URL: https://arxiv.org/abs/2310.03684
369
381
  DOI URL: https://doi.org/10.48550/arxiv.2310.03684
370
- ⚠️ Warning: Year mismatch: cited as 2024 but actually 2023
382
+ ⚠️ Warning: Year mismatch:
383
+ cited: '2024'
384
+ actual: '2023'
371
385
  ```
372
386
  - `venue`: Venue format variations
373
387
  ```
@@ -378,7 +392,9 @@ This enhanced URL display helps users access multiple authoritative sources for
378
392
  Verified URL: https://www.semanticscholar.org/paper/c1d2e3f4a5b6c7d8e9f0123456789012345678ab
379
393
  ArXiv URL: https://arxiv.org/abs/2403.02151
380
394
  DOI URL: https://doi.org/10.48550/arxiv.2403.02151
381
- ⚠️ Warning: Venue mismatch: cited as 'arXiv, 2024' but actually 'Neural Information Processing Systems'
395
+ ⚠️ Warning: Venue mismatch:
396
+ cited: 'arXiv, 2024'
397
+ actual: 'Neural Information Processing Systems'
382
398
  ```
383
399
 
384
400
  - **❓ Unverified**: References that couldn't be verified with any of the checker APIs
@@ -1,3 +1,3 @@
1
1
  """Version information for RefChecker."""
2
2
 
3
- __version__ = "1.2.43"
3
+ __version__ = "1.2.45"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academic-refchecker
3
- Version: 1.2.43
3
+ Version: 1.2.45
4
4
  Summary: A comprehensive tool for validating reference accuracy in academic papers
5
5
  Author-email: Mark Russinovich <markrussinovich@hotmail.com>
6
6
  License-Expression: MIT
@@ -78,7 +78,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
78
78
  Verified URL: https://www.semanticscholar.org/paper/5f4ac1ac7ca4b17d3db1b52d9aafd9e8b26c0d7
79
79
  ArXiv URL: https://arxiv.org/abs/1610.10099
80
80
  DOI URL: https://doi.org/10.48550/arxiv.1610.10099
81
- ⚠️ Warning: Year mismatch: cited as 2017 but actually 2016
81
+ ⚠️ Warning: Year mismatch:
82
+ cited: '2017'
83
+ actual: '2016'
82
84
 
83
85
  [2/45] Effective approaches to attention-based neural machine translation
84
86
  Minh-Thang Luong, Hieu Pham, Christopher D. Manning
@@ -87,7 +89,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
87
89
  Verified URL: https://www.semanticscholar.org/paper/93499a7c7f699b6630a86fad964536f9423bb6d0
88
90
  ArXiv URL: https://arxiv.org/abs/1508.04025
89
91
  DOI URL: https://doi.org/10.18653/v1/d15-1166
90
- ❌ Error: First author mismatch: 'Minh-Thang Luong' vs 'Thang Luong'
92
+ ❌ Error: First author mismatch:
93
+ cited: 'Minh-Thang Luong'
94
+ actual: 'Thang Luong'
91
95
 
92
96
  [3/45] Deep Residual Learning for Image Recognition
93
97
  Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
@@ -98,7 +102,9 @@ A comprehensive tool for validating reference accuracy in academic papers, usefu
98
102
  Verified URL: https://www.semanticscholar.org/paper/2c03df8b48bf3fa39054345bafabfeff15bfd11d
99
103
  ArXiv URL: https://arxiv.org/abs/1512.03385
100
104
  DOI URL: https://doi.org/10.1109/CVPR.2016.90
101
- ❌ Error: DOI mismatch: cited as '10.1109/CVPR.2016.91' but actually '10.1109/CVPR.2016.90'
105
+ ❌ Error: DOI mismatch:
106
+ cited: '10.1109/CVPR.2016.91'
107
+ actual: '10.1109/CVPR.2016.90'
102
108
 
103
109
  ============================================================
104
110
  📋 SUMMARY
@@ -382,7 +388,9 @@ This enhanced URL display helps users access multiple authoritative sources for
382
388
  Verified URL: https://www.semanticscholar.org/paper/a1b2c3d4e5f6789012345678901234567890abcd
383
389
  ArXiv URL: https://arxiv.org/abs/2312.02119
384
390
  DOI URL: https://doi.org/10.48550/arxiv.2312.02119
385
- ❌ Error: First author mismatch: 'T. Xie' vs 'Zhao Xu'
391
+ ❌ Error: First author mismatch:
392
+ cited: 'T. Xie'
393
+ actual: 'Zhao Xu'
386
394
  ```
387
395
  - `title`: Title discrepancies
388
396
  ```
@@ -392,7 +400,9 @@ This enhanced URL display helps users access multiple authoritative sources for
392
400
  Verified URL: https://www.semanticscholar.org/paper/df2b0e26d0599ce3e70df8a9da02e51594e0e992
393
401
  ArXiv URL: https://arxiv.org/abs/1810.04805
394
402
  DOI URL: https://doi.org/10.18653/v1/n19-1423
395
- ❌ Error: Title mismatch: cited as 'BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding' but actually 'BERT: Pre-training of Deep Bidirectional Transformers for Language Comprehension'
403
+ ❌ Error: Title mismatch:
404
+ cited: 'BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding'
405
+ actual: 'BERT: Pre-training of Deep Bidirectional Transformers for Language Comprehension'
396
406
  ```
397
407
  - `arxiv_id`: Incorrect URLs or arXiv IDs
398
408
  ```
@@ -415,7 +425,9 @@ This enhanced URL display helps users access multiple authoritative sources for
415
425
  Verified URL: https://www.semanticscholar.org/paper/204e3073870fae3d05bcbc2f6a8e263d9b72e776
416
426
  ArXiv URL: https://arxiv.org/abs/1706.03762
417
427
  DOI URL: https://doi.org/10.48550/arXiv.1706.03762
418
- ❌ Error: DOI mismatch: cited as '10.5555/3295222.3295349' but actually '10.48550/arXiv.1706.03762'
428
+ ❌ Error: DOI mismatch:
429
+ cited: '10.5555/3295222.3295349'
430
+ actual: '10.48550/arXiv.1706.03762'
419
431
  ```
420
432
 
421
433
  - **⚠️ Warnings**: Minor issues that may need attention
@@ -428,7 +440,9 @@ This enhanced URL display helps users access multiple authoritative sources for
428
440
  Verified URL: https://www.semanticscholar.org/paper/f1a2b3c4d5e6f7890123456789012345678901ab
429
441
  ArXiv URL: https://arxiv.org/abs/2310.03684
430
442
  DOI URL: https://doi.org/10.48550/arxiv.2310.03684
431
- ⚠️ Warning: Year mismatch: cited as 2024 but actually 2023
443
+ ⚠️ Warning: Year mismatch:
444
+ cited: '2024'
445
+ actual: '2023'
432
446
  ```
433
447
  - `venue`: Venue format variations
434
448
  ```
@@ -439,7 +453,9 @@ This enhanced URL display helps users access multiple authoritative sources for
439
453
  Verified URL: https://www.semanticscholar.org/paper/c1d2e3f4a5b6c7d8e9f0123456789012345678ab
440
454
  ArXiv URL: https://arxiv.org/abs/2403.02151
441
455
  DOI URL: https://doi.org/10.48550/arxiv.2403.02151
442
- ⚠️ Warning: Venue mismatch: cited as 'arXiv, 2024' but actually 'Neural Information Processing Systems'
456
+ ⚠️ Warning: Venue mismatch:
457
+ cited: 'arXiv, 2024'
458
+ actual: 'Neural Information Processing Systems'
443
459
  ```
444
460
 
445
461
  - **❓ Unverified**: References that couldn't be verified with any of the checker APIs
@@ -31,6 +31,7 @@ import re
31
31
  from typing import Dict, List, Tuple, Optional, Any, Union
32
32
  from urllib.parse import quote_plus
33
33
  from utils.text_utils import normalize_text, clean_title_basic, find_best_match, is_name_match, compare_authors, clean_title_for_search
34
+ from utils.error_utils import format_year_mismatch, format_doi_mismatch
34
35
  from config.settings import get_config
35
36
 
36
37
  # Set up logging
@@ -478,21 +479,19 @@ class CrossRefReferenceChecker:
478
479
  if year and work_year and year != work_year:
479
480
  errors.append({
480
481
  'warning_type': 'year',
481
- 'warning_details': f"Year mismatch: cited as {year} but actually {work_year}",
482
+ 'warning_details': format_year_mismatch(year, work_year),
482
483
  'ref_year_correct': work_year
483
484
  })
484
485
 
485
486
  # Verify DOI
486
487
  work_doi = work_data.get('DOI')
487
488
  if doi and work_doi:
488
- # Normalize DOIs for comparison (remove URL prefix and trailing periods)
489
- cited_doi_clean = doi.replace('https://doi.org/', '').replace('http://doi.org/', '').strip().rstrip('.')
490
- work_doi_clean = work_doi.replace('https://doi.org/', '').replace('http://doi.org/', '').strip().rstrip('.')
491
-
492
- if cited_doi_clean.lower() != work_doi_clean.lower():
489
+ # Compare DOIs using the proper comparison function
490
+ from utils.doi_utils import compare_dois
491
+ if not compare_dois(doi, work_doi):
493
492
  errors.append({
494
493
  'error_type': 'doi',
495
- 'error_details': f"DOI mismatch: cited as {doi} but actually {work_doi}",
494
+ 'error_details': format_doi_mismatch(doi, work_doi),
496
495
  'ref_doi_correct': work_doi
497
496
  })
498
497
 
@@ -169,9 +169,14 @@ class GitHubChecker:
169
169
  if cited_title:
170
170
  title_match = self._check_title_match(cited_title, actual_name, actual_description)
171
171
  if not title_match:
172
+ from utils.error_utils import format_title_mismatch
173
+ details = format_title_mismatch(cited_title, actual_name)
174
+ if actual_description:
175
+ snippet = actual_description[:100] + ('...' if len(actual_description) > 100 else '')
176
+ details += f" ({snippet})"
172
177
  errors.append({
173
178
  "warning_type": "title",
174
- "warning_details": f"Title mismatch: cited as '{cited_title}' but repository is '{actual_name}' ({actual_description[:100]}{'...' if len(actual_description) > 100 else ''})"
179
+ "warning_details": details
175
180
  })
176
181
 
177
182
  # Verify authors
@@ -180,9 +185,13 @@ class GitHubChecker:
180
185
  author_str = ', '.join(cited_authors) if isinstance(cited_authors, list) else str(cited_authors)
181
186
  author_match = self._check_author_match(author_str, actual_owner, actual_owner_name)
182
187
  if not author_match:
188
+ from utils.error_utils import format_three_line_mismatch
189
+ left = author_str
190
+ right = f"{actual_owner} ({actual_owner_name})" if actual_owner_name else actual_owner
191
+ details = format_three_line_mismatch("Author mismatch", left, right)
183
192
  errors.append({
184
193
  "warning_type": "author",
185
- "warning_details": f"Author mismatch: cited as '{author_str}' but repository owner is '{actual_owner}' ({actual_owner_name})"
194
+ "warning_details": details
186
195
  })
187
196
 
188
197
  # Verify year
@@ -191,9 +200,10 @@ class GitHubChecker:
191
200
  try:
192
201
  cited_year_int = int(cited_year)
193
202
  if cited_year_int < creation_year:
203
+ from utils.error_utils import format_year_mismatch
194
204
  errors.append({
195
205
  "warning_type": "year",
196
- "warning_details": f"Year mismatch: cited as {cited_year} but repository created in {creation_year}",
206
+ "warning_details": format_year_mismatch(cited_year, creation_year),
197
207
  "ref_year_correct": str(creation_year)
198
208
  })
199
209
  except (ValueError, TypeError):
@@ -33,6 +33,7 @@ import re
33
33
  from typing import Dict, List, Tuple, Optional, Any, Union
34
34
  from urllib.parse import quote_plus
35
35
  from utils.text_utils import normalize_text, clean_title_basic, find_best_match, is_name_match, compare_authors, clean_title_for_search
36
+ from utils.error_utils import format_year_mismatch, format_doi_mismatch
36
37
  from config.settings import get_config
37
38
 
38
39
  # Set up logging
@@ -448,7 +449,7 @@ class OpenAlexReferenceChecker:
448
449
  if year and work_year and year != work_year:
449
450
  errors.append({
450
451
  'warning_type': 'year',
451
- 'warning_details': f"Year mismatch: cited as {year} but actually {work_year}",
452
+ 'warning_details': format_year_mismatch(year, work_year),
452
453
  'ref_year_correct': work_year
453
454
  })
454
455
 
@@ -458,14 +459,12 @@ class OpenAlexReferenceChecker:
458
459
  work_doi = work_data['ids']['doi']
459
460
 
460
461
  if doi and work_doi:
461
- # Normalize DOIs for comparison (remove URL prefix and trailing periods)
462
- cited_doi_clean = doi.replace('https://doi.org/', '').replace('http://doi.org/', '').strip().rstrip('.')
463
- work_doi_clean = work_doi.replace('https://doi.org/', '').replace('http://doi.org/', '').strip().rstrip('.')
464
-
465
- if cited_doi_clean.lower() != work_doi_clean.lower():
462
+ # Compare DOIs using the proper comparison function
463
+ from utils.doi_utils import compare_dois
464
+ if not compare_dois(doi, work_doi):
466
465
  errors.append({
467
466
  'error_type': 'doi',
468
- 'error_details': f"DOI mismatch: cited as {doi} but actually {work_doi}",
467
+ 'error_details': format_doi_mismatch(doi, work_doi),
469
468
  'ref_doi_correct': work_doi
470
469
  })
471
470
 
@@ -425,9 +425,11 @@ class OpenReviewReferenceChecker:
425
425
  if cited_title and paper_title:
426
426
  similarity = calculate_title_similarity(cited_title, paper_title)
427
427
  if similarity < 0.7: # Using a reasonable threshold
428
+ from utils.error_utils import format_title_mismatch
429
+ details = format_title_mismatch(cited_title, paper_title) + f" (similarity: {similarity:.2f})"
428
430
  errors.append({
429
431
  "warning_type": "title",
430
- "warning_details": f"Title mismatch: cited as '{cited_title}' but OpenReview shows '{paper_title}' (similarity: {similarity:.2f})"
432
+ "warning_details": details
431
433
  })
432
434
 
433
435
  # Check authors
@@ -460,9 +462,10 @@ class OpenReviewReferenceChecker:
460
462
 
461
463
  is_different, year_message = is_year_substantially_different(cited_year_int, paper_year_int)
462
464
  if is_different and year_message:
465
+ from utils.error_utils import format_year_mismatch
463
466
  errors.append({
464
467
  "warning_type": "year",
465
- "warning_details": year_message
468
+ "warning_details": format_year_mismatch(cited_year_int, paper_year_int)
466
469
  })
467
470
  except (ValueError, TypeError):
468
471
  pass # Skip year validation if conversion fails
@@ -473,10 +476,10 @@ class OpenReviewReferenceChecker:
473
476
 
474
477
  if cited_venue and paper_venue:
475
478
  if are_venues_substantially_different(cited_venue, paper_venue):
476
- from utils.error_utils import clean_venue_for_comparison
479
+ from utils.error_utils import format_venue_mismatch
477
480
  errors.append({
478
481
  "warning_type": "venue",
479
- "warning_details": f"Venue mismatch: cited as '{clean_venue_for_comparison(cited_venue)}' but OpenReview shows '{clean_venue_for_comparison(paper_venue)}'"
482
+ "warning_details": format_venue_mismatch(cited_venue, paper_venue)
480
483
  })
481
484
 
482
485
  # Create verified data structure
@@ -29,6 +29,7 @@ import logging
29
29
  import re
30
30
  from typing import Dict, List, Tuple, Optional, Any, Union
31
31
  from utils.text_utils import normalize_text, clean_title_basic, find_best_match, is_name_match, are_venues_substantially_different, calculate_title_similarity, compare_authors, clean_title_for_search
32
+ from utils.error_utils import format_title_mismatch
32
33
  from config.settings import get_config
33
34
 
34
35
  # Set up logging
@@ -471,7 +472,7 @@ class NonArxivReferenceChecker:
471
472
  if found_title and title_similarity < SIMILARITY_THRESHOLD:
472
473
  errors.append({
473
474
  'error_type': 'title',
474
- 'error_details': f"Title mismatch: cited as '{title}' but actually '{found_title}'",
475
+ 'error_details': format_title_mismatch(title, found_title),
475
476
  'ref_title_correct': paper_data.get('title', '')
476
477
  })
477
478
 
@@ -525,9 +526,10 @@ class NonArxivReferenceChecker:
525
526
  is_different, warning_message = is_year_substantially_different(year, paper_year, context)
526
527
 
527
528
  if is_different and warning_message:
529
+ from utils.error_utils import format_year_mismatch
528
530
  errors.append({
529
531
  'warning_type': 'year',
530
- 'warning_details': warning_message,
532
+ 'warning_details': format_year_mismatch(year, paper_year),
531
533
  'ref_year_correct': paper_year
532
534
  })
533
535
 
@@ -591,14 +593,13 @@ class NonArxivReferenceChecker:
591
593
  if external_ids and 'DOI' in external_ids:
592
594
  paper_doi = external_ids['DOI']
593
595
 
594
- # Compare DOIs, but strip hash fragments and trailing periods for comparison
595
- cited_doi_clean = doi.split('#')[0].rstrip('.') if doi else ''
596
- paper_doi_clean = paper_doi.split('#')[0].rstrip('.') if paper_doi else ''
597
-
598
- if cited_doi_clean and paper_doi_clean and cited_doi_clean.lower() != paper_doi_clean.lower():
596
+ # Compare DOIs using the proper comparison function
597
+ from utils.doi_utils import compare_dois
598
+ if doi and paper_doi and not compare_dois(doi, paper_doi):
599
+ from utils.error_utils import format_doi_mismatch
599
600
  errors.append({
600
601
  'error_type': 'doi',
601
- 'error_details': f"DOI mismatch: cited as {doi} but actually {paper_doi}",
602
+ 'error_details': format_doi_mismatch(doi, paper_doi),
602
603
  'ref_doi_correct': paper_doi
603
604
  })
604
605
 
@@ -182,9 +182,10 @@ class WebPageChecker:
182
182
  # Check title match
183
183
  if cited_title and page_title:
184
184
  if not self._check_title_match(cited_title, page_title, page_description):
185
+ from utils.error_utils import format_title_mismatch
185
186
  errors.append({
186
187
  "warning_type": "title",
187
- "warning_details": f"Title mismatch: cited as '{cited_title}' but page title is '{page_title}'"
188
+ "warning_details": format_title_mismatch(cited_title, page_title)
188
189
  })
189
190
 
190
191
  # Check if this is a documentation page for the cited topic
@@ -201,9 +202,13 @@ class WebPageChecker:
201
202
  if cited_authors:
202
203
  author_str = ', '.join(cited_authors) if isinstance(cited_authors, list) else str(cited_authors)
203
204
  if not self._check_author_match(author_str, site_info, web_url):
205
+ from utils.error_utils import format_three_line_mismatch
206
+ left = author_str
207
+ right = site_info.get('organization', 'unknown')
208
+ details = format_three_line_mismatch("Author/organization mismatch", left, right)
204
209
  errors.append({
205
210
  "warning_type": "author",
206
- "warning_details": f"Author/organization mismatch: cited as '{author_str}' but page is from '{site_info.get('organization', 'unknown')}'"
211
+ "warning_details": details
207
212
  })
208
213
 
209
214
  logger.debug(f"Web page verification completed for: {web_url}")
@@ -352,12 +352,15 @@ class ParallelReferenceProcessor:
352
352
  error_type = error.get('error_type') or error.get('warning_type')
353
353
  error_details = error.get('error_details') or error.get('warning_details', 'Unknown error')
354
354
 
355
+ from utils.error_utils import print_labeled_multiline
356
+
355
357
  if error_type == 'arxiv_id':
358
+ # Keep existing style for arXiv ID errors
356
359
  print(f" ❌ {error_details}")
357
360
  elif 'error_type' in error:
358
- print(f" ❌ Error: {error_details}")
361
+ print_labeled_multiline("❌ Error", error_details)
359
362
  else:
360
- print(f" ⚠️ Warning: {error_details}")
363
+ print_labeled_multiline("⚠️ Warning", error_details)
361
364
 
362
365
  # Show timing info for slow references
363
366
  if result.processing_time > 5.0: