academic-refchecker 1.2.68__py3-none-any.whl → 1.2.69__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {academic_refchecker-1.2.68.dist-info → academic_refchecker-1.2.69.dist-info}/METADATA +39 -13
- {academic_refchecker-1.2.68.dist-info → academic_refchecker-1.2.69.dist-info}/RECORD +12 -12
- backend/refchecker_wrapper.py +6 -6
- refchecker/__version__.py +1 -3
- refchecker/checkers/local_semantic_scholar.py +19 -15
- refchecker/checkers/semantic_scholar.py +22 -26
- refchecker/core/refchecker.py +18 -19
- refchecker/utils/error_utils.py +63 -2
- {academic_refchecker-1.2.68.dist-info → academic_refchecker-1.2.69.dist-info}/WHEEL +0 -0
- {academic_refchecker-1.2.68.dist-info → academic_refchecker-1.2.69.dist-info}/entry_points.txt +0 -0
- {academic_refchecker-1.2.68.dist-info → academic_refchecker-1.2.69.dist-info}/licenses/LICENSE +0 -0
- {academic_refchecker-1.2.68.dist-info → academic_refchecker-1.2.69.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: academic-refchecker
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.69
|
|
4
4
|
Summary: A comprehensive tool for validating reference accuracy in academic papers
|
|
5
5
|
Author-email: Mark Russinovich <markrussinovich@hotmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -188,18 +188,45 @@ RefChecker also includes a modern web interface with real-time progress updates,
|
|
|
188
188
|
|
|
189
189
|

|
|
190
190
|
|
|
191
|
-
###
|
|
192
|
-
|
|
193
|
-
- **Python 3.8+** with RefChecker installed (`pip install academic-refchecker[webui]`)
|
|
194
|
-
- **Node.js 18+** and npm
|
|
191
|
+
### Option 1: Install from PyPI (Recommended)
|
|
195
192
|
|
|
196
|
-
|
|
193
|
+
The simplest way to run the Web UI is using the pip-installed package:
|
|
197
194
|
|
|
198
195
|
```bash
|
|
199
|
-
# Install
|
|
196
|
+
# Install RefChecker with Web UI support
|
|
200
197
|
pip install academic-refchecker[llm,webui]
|
|
201
198
|
|
|
202
|
-
#
|
|
199
|
+
# Start the web server
|
|
200
|
+
refchecker-webui
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
Then open **http://localhost:8000** in your browser.
|
|
204
|
+
|
|
205
|
+
The `refchecker-webui` command starts a complete web server with both the API backend and the pre-built frontend.
|
|
206
|
+
|
|
207
|
+
**Options:**
|
|
208
|
+
```bash
|
|
209
|
+
refchecker-webui --port 8080 # Use a different port
|
|
210
|
+
refchecker-webui --host 0.0.0.0 # Allow external connections
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
### Option 2: Run from Cloned Repository (Development)
|
|
214
|
+
|
|
215
|
+
If you're developing or modifying the Web UI:
|
|
216
|
+
|
|
217
|
+
**Prerequisites:**
|
|
218
|
+
- **Python 3.8+** with dependencies installed
|
|
219
|
+
- **Node.js 18+** and npm
|
|
220
|
+
|
|
221
|
+
```bash
|
|
222
|
+
# Clone the repository
|
|
223
|
+
git clone https://github.com/markrussinovich/refchecker.git
|
|
224
|
+
cd refchecker
|
|
225
|
+
|
|
226
|
+
# Install Python dependencies
|
|
227
|
+
pip install -e ".[llm,webui]"
|
|
228
|
+
|
|
229
|
+
# Install and run the frontend development server
|
|
203
230
|
cd web-ui
|
|
204
231
|
npm install # First time only
|
|
205
232
|
npm start # Starts both backend and frontend
|
|
@@ -207,15 +234,14 @@ npm start # Starts both backend and frontend
|
|
|
207
234
|
|
|
208
235
|
Then open **http://localhost:5173** in your browser.
|
|
209
236
|
|
|
210
|
-
|
|
237
|
+
**Alternative: Start Servers Separately**
|
|
211
238
|
|
|
212
|
-
|
|
239
|
+
*Terminal 1 - Backend:*
|
|
213
240
|
```bash
|
|
214
|
-
|
|
215
|
-
# Or: python -m uvicorn backend.main:app --port 8000
|
|
241
|
+
python -m uvicorn backend.main:app --reload --port 8000
|
|
216
242
|
```
|
|
217
243
|
|
|
218
|
-
|
|
244
|
+
*Terminal 2 - Frontend:*
|
|
219
245
|
```bash
|
|
220
246
|
cd web-ui
|
|
221
247
|
npm run dev
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
academic_refchecker-1.2.
|
|
1
|
+
academic_refchecker-1.2.69.dist-info/licenses/LICENSE,sha256=Kwrx3fePVCeEFDCZvCW4OuoTNBiSoYbpGBI6qzGhWF0,1067
|
|
2
2
|
backend/__init__.py,sha256=TFVkOx5tSp3abty15RzUbaSwQ9ZD0kfUn7PDh63xkYY,521
|
|
3
3
|
backend/__main__.py,sha256=74V7yUMsRSZaaRyXYm-rZVc3TVUcUgwsoTQTUbV5EqM,211
|
|
4
4
|
backend/cli.py,sha256=xV3l9M5OdNQQYOcrzj2d_7RmCgj7CXP_1oi0TPe6zNo,1672
|
|
@@ -6,7 +6,7 @@ backend/concurrency.py,sha256=2KY9I_8dDkyl_HTGx27ZxU4rFXx2vqbGOlo5RrRbPjA,3223
|
|
|
6
6
|
backend/database.py,sha256=1jLP1m9vNk5sEs4bh_xmX0T5ilZkUTX1c7nOVz5XnNc,30681
|
|
7
7
|
backend/main.py,sha256=ntz5PbEfG65ENFTHVQlY-c8hP5UPM_hdFjl60YMNh78,54371
|
|
8
8
|
backend/models.py,sha256=El2F-RTHgxQ7-WODmiYCpjsTFDpjwF9PBt-JDa_XipE,2591
|
|
9
|
-
backend/refchecker_wrapper.py,sha256=
|
|
9
|
+
backend/refchecker_wrapper.py,sha256=cgJpPFNGAKHaNKxnLY3r3RxlJVT-yRyr3rJFeMxoxAo,51873
|
|
10
10
|
backend/thumbnail.py,sha256=wPFXp3RlmcL9jVKZmSBRB7Pfy9Ti7nCnzNtL4osfNtM,17618
|
|
11
11
|
backend/websocket_manager.py,sha256=l-Wou-rKV6n7t6Gcf5fR6s_4G-mssSrba0davNnYS70,4247
|
|
12
12
|
backend/static/favicon.svg,sha256=R0oQauh16Uy0D7JlT27k-zdjJtrvfPKOe9La5vKYwuM,395
|
|
@@ -16,16 +16,16 @@ backend/static/assets/index-2P6L_39v.css,sha256=KC3Wa6jfD1qwmEoVpqTovlzf8fsn5oHY
|
|
|
16
16
|
backend/static/assets/index-hk21nqxR.js,sha256=z2agP8ZFYw4AfYi-GJ5E_8_k-lPF-frXOJtPk-I0hDs,369533
|
|
17
17
|
refchecker/__init__.py,sha256=Pg5MrtLxDBRcNYcI02N-bv3tzURVd1S3nQ8IyF7Zw7E,322
|
|
18
18
|
refchecker/__main__.py,sha256=agBbT9iKN0g2xXtRNCoh29Nr7z2n5vU-r0MCVJKi4tI,232
|
|
19
|
-
refchecker/__version__.py,sha256=
|
|
19
|
+
refchecker/__version__.py,sha256=cloSFZI6gJtk10frg47j2b9FYBRIoN_5QQ4lFZq639o,65
|
|
20
20
|
refchecker/checkers/__init__.py,sha256=T0PAHTFt6UiGvn-WGoJU8CdhXNmf6zaHmcGVoWHhmJQ,533
|
|
21
21
|
refchecker/checkers/crossref.py,sha256=88moAyTudBqf9SKqTQkNAq1yyuRe95f8r4EpmJznupQ,20937
|
|
22
22
|
refchecker/checkers/enhanced_hybrid_checker.py,sha256=2jIeUX7hankPok3M4de9o2bsJZ17ZomuLkdfdr9EV0s,28671
|
|
23
23
|
refchecker/checkers/github_checker.py,sha256=YJ2sLj22qezw3uWjA0jhtDO0fOW4HUwcVbv2DQ4LjR0,14277
|
|
24
|
-
refchecker/checkers/local_semantic_scholar.py,sha256=
|
|
24
|
+
refchecker/checkers/local_semantic_scholar.py,sha256=c-KUTh99s-Di71h-pzdrwlPgoSTwB-tgVAZnCrMFXmw,21011
|
|
25
25
|
refchecker/checkers/openalex.py,sha256=WEjEppQMbutPs8kWOSorCIoXWqpJ9o1CXUicThHSWYU,20120
|
|
26
26
|
refchecker/checkers/openreview_checker.py,sha256=0IHZe4Nscy8fle28rmhy1hhsofR5g0FFSakk8FFH_0A,40540
|
|
27
27
|
refchecker/checkers/pdf_paper_checker.py,sha256=lrg09poNJBz9FNMrUoEjQ6CJbdYZAVANw0bCaTSb5oo,19904
|
|
28
|
-
refchecker/checkers/semantic_scholar.py,sha256=
|
|
28
|
+
refchecker/checkers/semantic_scholar.py,sha256=yvatQM5fXdW0qagqrTUpgotd0RbT7N_pqaRNGfmQjJs,35613
|
|
29
29
|
refchecker/checkers/webpage_checker.py,sha256=A_d5kg3OOsyliC00OVq_l0J-RJ4Ln7hUoURk21aO2fs,43653
|
|
30
30
|
refchecker/config/__init__.py,sha256=r7sONsX2-ITviUJRU1KEz76uAuTRqZlzU-TVkvFRGYY,15
|
|
31
31
|
refchecker/config/logging.conf,sha256=r1tP0ApLHtlz7rV-oKS1MVO7oXJOgahbZFTtYmKnf9U,687
|
|
@@ -33,7 +33,7 @@ refchecker/config/settings.py,sha256=-vODFoXbWbGPUElpmchE5zbCj_n4Vtxr8HU1hQDFp_c
|
|
|
33
33
|
refchecker/core/__init__.py,sha256=1T2MSQyDk0u_PupbHvm4CvNNN--dxsw78fqKUrqoYrM,157
|
|
34
34
|
refchecker/core/db_connection_pool.py,sha256=XRiOdehikkSz3obH4WKgf8woa3694if50Q15rBT-4XQ,4697
|
|
35
35
|
refchecker/core/parallel_processor.py,sha256=HpVFEMwPBiP2FRjvGqlaXpjV5S0qP-hxdB_Wdl_lACo,17704
|
|
36
|
-
refchecker/core/refchecker.py,sha256=
|
|
36
|
+
refchecker/core/refchecker.py,sha256=nX8guDXFL1ZdT-K6KUJT_3iZjuoYsWj4e0rKrqd5VZA,287117
|
|
37
37
|
refchecker/database/__init__.py,sha256=mEuVHlEBuS44t_2ZT_JnvQQrlRCjo1SJq1NmaJ6r8OY,125
|
|
38
38
|
refchecker/database/download_semantic_scholar_db.py,sha256=waN4I97KC_36YMiPbiBDUUmgfzu1nub5yeKdAsIR2aw,75276
|
|
39
39
|
refchecker/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -52,13 +52,13 @@ refchecker/utils/bibtex_parser.py,sha256=xY0dEqT8lBZF-W21YRpG28lp_F2ikLan7nK70Wi
|
|
|
52
52
|
refchecker/utils/config_validator.py,sha256=rxf7K3DYmJ-BNPsmtaCNipY2BTVT-pJZ7wN-M9Y3GC8,11167
|
|
53
53
|
refchecker/utils/db_utils.py,sha256=_wSupfBlm0ILFvntQTvoj7tLDCbrYPRQrp9NDvphF_E,6281
|
|
54
54
|
refchecker/utils/doi_utils.py,sha256=_7YvQ0DTOQBMIujUE0SdJicjPiAR3VETLU668GIji24,6094
|
|
55
|
-
refchecker/utils/error_utils.py,sha256=
|
|
55
|
+
refchecker/utils/error_utils.py,sha256=8TcfRUD6phZ7viPJrezQ4jKf_vE65lqEXZq5707eU6s,15425
|
|
56
56
|
refchecker/utils/mock_objects.py,sha256=QxU-UXyHSY27IZYN8Sb8ei0JtNkpGSdMXoErrRLHXvE,6437
|
|
57
57
|
refchecker/utils/text_utils.py,sha256=v5beDt_fyx4ETfTXLYrDMp3CuUGoDoLs7-d1H2GdySE,228585
|
|
58
58
|
refchecker/utils/unicode_utils.py,sha256=-WBKarXO756p7fd7gCeNsMag4ztDNURwFX5IVniOtwY,10366
|
|
59
59
|
refchecker/utils/url_utils.py,sha256=7b0rWCQJSajzqOvD7ghsBZPejiq6mUIz6SGhvU_WGDs,9441
|
|
60
|
-
academic_refchecker-1.2.
|
|
61
|
-
academic_refchecker-1.2.
|
|
62
|
-
academic_refchecker-1.2.
|
|
63
|
-
academic_refchecker-1.2.
|
|
64
|
-
academic_refchecker-1.2.
|
|
60
|
+
academic_refchecker-1.2.69.dist-info/METADATA,sha256=vdhMaYS48pa0U9rLKlZiU4U1V9eTKmCOqB511WOkEQQ,26478
|
|
61
|
+
academic_refchecker-1.2.69.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
62
|
+
academic_refchecker-1.2.69.dist-info/entry_points.txt,sha256=9cREsaKwlp05Ql0CBIjKrNHk5IG2cHY5LvJPsV2-SxA,108
|
|
63
|
+
academic_refchecker-1.2.69.dist-info/top_level.txt,sha256=FfNvrvpj25gfpUBjW0epvz7Qrdejhups5Za_DBiSRu4,19
|
|
64
|
+
academic_refchecker-1.2.69.dist-info/RECORD,,
|
backend/refchecker_wrapper.py
CHANGED
|
@@ -171,18 +171,18 @@ class ProgressRefChecker:
|
|
|
171
171
|
"is_warning": is_warning, # Preserve warning_type as warning flag
|
|
172
172
|
})
|
|
173
173
|
|
|
174
|
-
# Determine status - items
|
|
175
|
-
warning_types = ['year', 'venue', 'author']
|
|
174
|
+
# Determine status - items originally from warning_type are warnings, items from error_type are errors
|
|
176
175
|
# Items originally from info_type are suggestions, not errors
|
|
177
176
|
# Items originally from warning_type are warnings, not errors
|
|
177
|
+
# Items with error_type (including year/venue/author when missing) are errors
|
|
178
178
|
has_errors = any(
|
|
179
|
-
e.get('error_type') not in ['unverified', 'info']
|
|
179
|
+
e.get('error_type') not in ['unverified', 'info']
|
|
180
180
|
and not e.get('is_suggestion')
|
|
181
181
|
and not e.get('is_warning')
|
|
182
182
|
for e in sanitized
|
|
183
183
|
)
|
|
184
184
|
has_warnings = any(
|
|
185
|
-
|
|
185
|
+
e.get('is_warning')
|
|
186
186
|
and not e.get('is_suggestion')
|
|
187
187
|
for e in sanitized
|
|
188
188
|
)
|
|
@@ -252,8 +252,8 @@ class ProgressRefChecker:
|
|
|
252
252
|
"suggestion_type": err.get('error_type') or 'info',
|
|
253
253
|
"suggestion_details": err.get('error_details', '')
|
|
254
254
|
})
|
|
255
|
-
elif err.get('is_warning')
|
|
256
|
-
#
|
|
255
|
+
elif err.get('is_warning'):
|
|
256
|
+
# Only items with is_warning flag (originally warning_type) go to warnings
|
|
257
257
|
formatted_warnings.append(err_obj)
|
|
258
258
|
elif err.get('error_type') == 'unverified':
|
|
259
259
|
formatted_errors.append({**err_obj, "error_type": 'unverified'})
|
refchecker/__version__.py
CHANGED
|
@@ -36,7 +36,7 @@ import os
|
|
|
36
36
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
37
37
|
|
|
38
38
|
from refchecker.utils.doi_utils import extract_doi_from_url, compare_dois, construct_doi_url
|
|
39
|
-
from refchecker.utils.error_utils import create_author_error,
|
|
39
|
+
from refchecker.utils.error_utils import create_author_error, create_doi_error
|
|
40
40
|
from refchecker.utils.text_utils import normalize_author_name, normalize_paper_title, is_name_match, compare_authors, calculate_title_similarity
|
|
41
41
|
from refchecker.utils.url_utils import extract_arxiv_id_from_url, get_best_available_url
|
|
42
42
|
from refchecker.utils.db_utils import process_semantic_scholar_result, process_semantic_scholar_results
|
|
@@ -432,20 +432,24 @@ class LocalNonArxivReferenceChecker:
|
|
|
432
432
|
|
|
433
433
|
# Verify year (with tolerance)
|
|
434
434
|
paper_year = paper_data.get('year')
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
435
|
+
# Get year tolerance from config (default to 1 if not available)
|
|
436
|
+
year_tolerance = 1 # Default tolerance
|
|
437
|
+
try:
|
|
438
|
+
from config.settings import get_config
|
|
439
|
+
config = get_config()
|
|
440
|
+
year_tolerance = config.get('text_processing', {}).get('year_tolerance', 1)
|
|
441
|
+
except (ImportError, Exception):
|
|
442
|
+
pass # Use default if config not available
|
|
443
|
+
|
|
444
|
+
from refchecker.utils.error_utils import validate_year
|
|
445
|
+
year_warning = validate_year(
|
|
446
|
+
cited_year=year,
|
|
447
|
+
paper_year=paper_year,
|
|
448
|
+
year_tolerance=year_tolerance
|
|
449
|
+
)
|
|
450
|
+
if year_warning:
|
|
451
|
+
logger.debug(f"Local DB: Year issue - {year_warning.get('warning_details', '')}")
|
|
452
|
+
errors.append(year_warning)
|
|
449
453
|
|
|
450
454
|
# Verify DOI
|
|
451
455
|
paper_doi = None
|
|
@@ -511,29 +511,25 @@ class NonArxivReferenceChecker:
|
|
|
511
511
|
|
|
512
512
|
# Verify year using flexible validation
|
|
513
513
|
paper_year = paper_data.get('year')
|
|
514
|
-
if
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
'warning_type': 'year',
|
|
534
|
-
'warning_details': format_year_mismatch(year, paper_year),
|
|
535
|
-
'ref_year_correct': paper_year
|
|
536
|
-
})
|
|
514
|
+
# Check if we have an exact ArXiv ID match for additional context
|
|
515
|
+
arxiv_id_match = False
|
|
516
|
+
if url and 'arxiv.org/abs/' in url:
|
|
517
|
+
arxiv_match = re.search(r'arxiv\.org/abs/([^\s/?#]+)', url)
|
|
518
|
+
if arxiv_match:
|
|
519
|
+
cited_arxiv_id = arxiv_match.group(1)
|
|
520
|
+
external_ids = paper_data.get('externalIds', {})
|
|
521
|
+
found_arxiv_id = external_ids.get('ArXiv')
|
|
522
|
+
arxiv_id_match = (cited_arxiv_id == found_arxiv_id)
|
|
523
|
+
|
|
524
|
+
from refchecker.utils.error_utils import validate_year
|
|
525
|
+
year_warning = validate_year(
|
|
526
|
+
cited_year=year,
|
|
527
|
+
paper_year=paper_year,
|
|
528
|
+
use_flexible_validation=True,
|
|
529
|
+
context={'arxiv_match': arxiv_id_match}
|
|
530
|
+
)
|
|
531
|
+
if year_warning:
|
|
532
|
+
errors.append(year_warning)
|
|
537
533
|
|
|
538
534
|
# Verify venue
|
|
539
535
|
cited_venue = reference.get('journal', '') or reference.get('venue', '')
|
|
@@ -573,10 +569,10 @@ class NonArxivReferenceChecker:
|
|
|
573
569
|
from refchecker.utils.error_utils import create_venue_warning
|
|
574
570
|
errors.append(create_venue_warning(cited_venue, paper_venue))
|
|
575
571
|
elif not cited_venue and paper_venue:
|
|
576
|
-
# Reference has no venue but paper has one -
|
|
572
|
+
# Reference has no venue but paper has one - error for missing venue
|
|
577
573
|
errors.append({
|
|
578
|
-
'
|
|
579
|
-
'
|
|
574
|
+
'error_type': 'venue',
|
|
575
|
+
'error_details': f"Venue missing: should include '{paper_venue}'",
|
|
580
576
|
'ref_venue_correct': paper_venue
|
|
581
577
|
})
|
|
582
578
|
|
refchecker/core/refchecker.py
CHANGED
|
@@ -1928,25 +1928,24 @@ class ArxivReferenceChecker:
|
|
|
1928
1928
|
|
|
1929
1929
|
# Verify year (with tolerance)
|
|
1930
1930
|
paper_year = paper_data.get('year')
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
|
|
1935
|
-
|
|
1936
|
-
|
|
1937
|
-
|
|
1938
|
-
|
|
1939
|
-
|
|
1940
|
-
|
|
1941
|
-
|
|
1942
|
-
|
|
1943
|
-
|
|
1944
|
-
|
|
1945
|
-
|
|
1946
|
-
|
|
1947
|
-
|
|
1948
|
-
|
|
1949
|
-
})
|
|
1931
|
+
# Get year tolerance from config (default to 1 if not available)
|
|
1932
|
+
year_tolerance = 1 # Default tolerance
|
|
1933
|
+
try:
|
|
1934
|
+
from config.settings import get_config
|
|
1935
|
+
config = get_config()
|
|
1936
|
+
year_tolerance = config.get('text_processing', {}).get('year_tolerance', 1)
|
|
1937
|
+
except (ImportError, Exception):
|
|
1938
|
+
pass # Use default if config not available
|
|
1939
|
+
|
|
1940
|
+
from refchecker.utils.error_utils import validate_year
|
|
1941
|
+
year_warning = validate_year(
|
|
1942
|
+
cited_year=year,
|
|
1943
|
+
paper_year=paper_year,
|
|
1944
|
+
year_tolerance=year_tolerance
|
|
1945
|
+
)
|
|
1946
|
+
if year_warning:
|
|
1947
|
+
logger.debug(f"DB Verification: Year issue - {year_warning.get('warning_details', '')}")
|
|
1948
|
+
errors.append(year_warning)
|
|
1950
1949
|
|
|
1951
1950
|
# Verify DOI
|
|
1952
1951
|
if doi and external_ids.get('DOI'):
|
refchecker/utils/error_utils.py
CHANGED
|
@@ -124,6 +124,67 @@ def create_year_warning(cited_year: int, correct_year: int) -> Dict[str, Any]:
|
|
|
124
124
|
}
|
|
125
125
|
|
|
126
126
|
|
|
127
|
+
def create_year_missing_error(correct_year: int) -> Dict[str, Any]:
|
|
128
|
+
"""
|
|
129
|
+
Create a standardized error for missing year in reference.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
correct_year: Correct year from database
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
Standardized error dictionary
|
|
136
|
+
"""
|
|
137
|
+
return {
|
|
138
|
+
'error_type': 'year',
|
|
139
|
+
'error_details': f"Year missing: should include '{correct_year}'",
|
|
140
|
+
'ref_year_correct': correct_year
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def validate_year(cited_year: Optional[int], paper_year: Optional[int],
|
|
145
|
+
year_tolerance: int = 1, use_flexible_validation: bool = False,
|
|
146
|
+
context: Optional[Dict[str, Any]] = None) -> Optional[Dict[str, Any]]:
|
|
147
|
+
"""
|
|
148
|
+
Validate year field and return appropriate warning if needed.
|
|
149
|
+
|
|
150
|
+
This function handles:
|
|
151
|
+
- Year mismatch (with configurable tolerance)
|
|
152
|
+
- Missing year in reference
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
cited_year: Year as cited in the reference (may be None)
|
|
156
|
+
paper_year: Correct year from database/API (may be None)
|
|
157
|
+
year_tolerance: Maximum allowed difference between years (default 1)
|
|
158
|
+
use_flexible_validation: If True, use is_year_substantially_different for more context-aware checking
|
|
159
|
+
context: Optional context dict for flexible validation (e.g., {'arxiv_match': True})
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
Warning dictionary if year issue found, None otherwise
|
|
163
|
+
"""
|
|
164
|
+
if not paper_year:
|
|
165
|
+
# Can't validate without a known correct year
|
|
166
|
+
return None
|
|
167
|
+
|
|
168
|
+
if cited_year and paper_year:
|
|
169
|
+
if use_flexible_validation:
|
|
170
|
+
# Use the more sophisticated validation from text_utils
|
|
171
|
+
from refchecker.utils.text_utils import is_year_substantially_different
|
|
172
|
+
is_different, warning_message = is_year_substantially_different(
|
|
173
|
+
cited_year, paper_year, context or {}
|
|
174
|
+
)
|
|
175
|
+
if is_different and warning_message:
|
|
176
|
+
return create_year_warning(cited_year, paper_year)
|
|
177
|
+
else:
|
|
178
|
+
# Simple tolerance-based validation
|
|
179
|
+
if abs(cited_year - paper_year) > year_tolerance:
|
|
180
|
+
return create_year_warning(cited_year, paper_year)
|
|
181
|
+
elif not cited_year and paper_year:
|
|
182
|
+
# Reference is missing a year but paper has one
|
|
183
|
+
return create_year_missing_error(paper_year)
|
|
184
|
+
|
|
185
|
+
return None
|
|
186
|
+
|
|
187
|
+
|
|
127
188
|
def create_doi_error(cited_doi: str, correct_doi: str) -> Optional[Dict[str, str]]:
|
|
128
189
|
"""
|
|
129
190
|
Create a standardized DOI error or warning dictionary.
|
|
@@ -220,8 +281,8 @@ def create_venue_warning(cited_venue: str, correct_venue: str) -> Dict[str, str]
|
|
|
220
281
|
# If cited venue cleans to empty, treat as missing venue instead of mismatch
|
|
221
282
|
if not clean_cited and clean_correct:
|
|
222
283
|
return {
|
|
223
|
-
'
|
|
224
|
-
'
|
|
284
|
+
'error_type': 'venue',
|
|
285
|
+
'error_details': format_missing_venue(clean_correct),
|
|
225
286
|
'ref_venue_correct': correct_venue
|
|
226
287
|
}
|
|
227
288
|
|
|
File without changes
|
{academic_refchecker-1.2.68.dist-info → academic_refchecker-1.2.69.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{academic_refchecker-1.2.68.dist-info → academic_refchecker-1.2.69.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|