academic-refchecker 1.2.67__tar.gz → 1.2.69__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {academic_refchecker-1.2.67/academic_refchecker.egg-info → academic_refchecker-1.2.69}/PKG-INFO +40 -12
  2. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/README.md +39 -11
  3. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69/academic_refchecker.egg-info}/PKG-INFO +40 -12
  4. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/backend/refchecker_wrapper.py +6 -6
  5. academic_refchecker-1.2.69/src/refchecker/__version__.py +3 -0
  6. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/checkers/local_semantic_scholar.py +19 -15
  7. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/checkers/semantic_scholar.py +22 -26
  8. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/core/refchecker.py +18 -19
  9. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/utils/error_utils.py +63 -2
  10. academic_refchecker-1.2.67/src/refchecker/__version__.py +0 -5
  11. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/LICENSE +0 -0
  12. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/MANIFEST.in +0 -0
  13. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/academic_refchecker.egg-info/SOURCES.txt +0 -0
  14. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/academic_refchecker.egg-info/dependency_links.txt +0 -0
  15. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/academic_refchecker.egg-info/entry_points.txt +0 -0
  16. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/academic_refchecker.egg-info/requires.txt +0 -0
  17. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/academic_refchecker.egg-info/top_level.txt +0 -0
  18. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/backend/__init__.py +0 -0
  19. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/backend/__main__.py +0 -0
  20. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/backend/cli.py +0 -0
  21. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/backend/concurrency.py +0 -0
  22. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/backend/database.py +0 -0
  23. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/backend/main.py +0 -0
  24. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/backend/models.py +0 -0
  25. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/backend/static/assets/index-2P6L_39v.css +0 -0
  26. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/backend/static/assets/index-hk21nqxR.js +0 -0
  27. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/backend/static/favicon.svg +0 -0
  28. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/backend/static/index.html +0 -0
  29. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/backend/static/vite.svg +0 -0
  30. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/backend/thumbnail.py +0 -0
  31. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/backend/websocket_manager.py +0 -0
  32. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/pyproject.toml +0 -0
  33. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/requirements.txt +0 -0
  34. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/scripts/download_db.py +0 -0
  35. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/scripts/run_tests.py +0 -0
  36. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/scripts/start_vllm_server.py +0 -0
  37. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/setup.cfg +0 -0
  38. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/__init__.py +0 -0
  39. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/__main__.py +0 -0
  40. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/checkers/__init__.py +0 -0
  41. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/checkers/crossref.py +0 -0
  42. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/checkers/enhanced_hybrid_checker.py +0 -0
  43. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/checkers/github_checker.py +0 -0
  44. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/checkers/openalex.py +0 -0
  45. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/checkers/openreview_checker.py +0 -0
  46. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/checkers/pdf_paper_checker.py +0 -0
  47. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/checkers/webpage_checker.py +0 -0
  48. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/config/__init__.py +0 -0
  49. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/config/logging.conf +0 -0
  50. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/config/settings.py +0 -0
  51. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/core/__init__.py +0 -0
  52. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/core/db_connection_pool.py +0 -0
  53. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/core/parallel_processor.py +0 -0
  54. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/database/__init__.py +0 -0
  55. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/database/download_semantic_scholar_db.py +0 -0
  56. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/llm/__init__.py +0 -0
  57. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/llm/base.py +0 -0
  58. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/llm/providers.py +0 -0
  59. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/scripts/__init__.py +0 -0
  60. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/scripts/start_vllm_server.py +0 -0
  61. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/services/__init__.py +0 -0
  62. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/services/pdf_processor.py +0 -0
  63. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/utils/__init__.py +0 -0
  64. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/utils/arxiv_utils.py +0 -0
  65. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/utils/author_utils.py +0 -0
  66. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/utils/biblatex_parser.py +0 -0
  67. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/utils/bibliography_utils.py +0 -0
  68. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/utils/bibtex_parser.py +0 -0
  69. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/utils/config_validator.py +0 -0
  70. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/utils/db_utils.py +0 -0
  71. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/utils/doi_utils.py +0 -0
  72. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/utils/mock_objects.py +0 -0
  73. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/utils/text_utils.py +0 -0
  74. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/utils/unicode_utils.py +0 -0
  75. {academic_refchecker-1.2.67 → academic_refchecker-1.2.69}/src/refchecker/utils/url_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academic-refchecker
3
- Version: 1.2.67
3
+ Version: 1.2.69
4
4
  Summary: A comprehensive tool for validating reference accuracy in academic papers
5
5
  Author-email: Mark Russinovich <markrussinovich@hotmail.com>
6
6
  License-Expression: MIT
@@ -186,18 +186,47 @@ Learn about RefChecker's design philosophy and development process in this detai
186
186
 
187
187
  RefChecker also includes a modern web interface with real-time progress updates, check history, and export options.
188
188
 
189
- ### Prerequisites
189
+ ![RefChecker Web UI](assets/webui.png)
190
190
 
191
- - **Python 3.8+** with RefChecker installed (`pip install academic-refchecker[webui]`)
192
- - **Node.js 18+** and npm
191
+ ### Option 1: Install from PyPI (Recommended)
193
192
 
194
- ### Quick Start (Web UI)
193
+ The simplest way to run the Web UI is using the pip-installed package:
195
194
 
196
195
  ```bash
197
- # Install Python dependencies (if not already done)
196
+ # Install RefChecker with Web UI support
198
197
  pip install academic-refchecker[llm,webui]
199
198
 
200
- # Install Node.js dependencies
199
+ # Start the web server
200
+ refchecker-webui
201
+ ```
202
+
203
+ Then open **http://localhost:8000** in your browser.
204
+
205
+ The `refchecker-webui` command starts a complete web server with both the API backend and the pre-built frontend.
206
+
207
+ **Options:**
208
+ ```bash
209
+ refchecker-webui --port 8080 # Use a different port
210
+ refchecker-webui --host 0.0.0.0 # Allow external connections
211
+ ```
212
+
213
+ ### Option 2: Run from Cloned Repository (Development)
214
+
215
+ If you're developing or modifying the Web UI:
216
+
217
+ **Prerequisites:**
218
+ - **Python 3.8+** with dependencies installed
219
+ - **Node.js 18+** and npm
220
+
221
+ ```bash
222
+ # Clone the repository
223
+ git clone https://github.com/markrussinovich/refchecker.git
224
+ cd refchecker
225
+
226
+ # Install Python dependencies
227
+ pip install -e ".[llm,webui]"
228
+
229
+ # Install and run the frontend development server
201
230
  cd web-ui
202
231
  npm install # First time only
203
232
  npm start # Starts both backend and frontend
@@ -205,15 +234,14 @@ npm start # Starts both backend and frontend
205
234
 
206
235
  Then open **http://localhost:5173** in your browser.
207
236
 
208
- ### Alternative: Start Servers Separately
237
+ **Alternative: Start Servers Separately**
209
238
 
210
- **Terminal 1 - Backend:**
239
+ *Terminal 1 - Backend:*
211
240
  ```bash
212
- refchecker-webui --port 8000
213
- # Or: python -m uvicorn backend.main:app --port 8000
241
+ python -m uvicorn backend.main:app --reload --port 8000
214
242
  ```
215
243
 
216
- **Terminal 2 - Frontend:**
244
+ *Terminal 2 - Frontend:*
217
245
  ```bash
218
246
  cd web-ui
219
247
  npm run dev
@@ -115,18 +115,47 @@ Learn about RefChecker's design philosophy and development process in this detai
115
115
 
116
116
  RefChecker also includes a modern web interface with real-time progress updates, check history, and export options.
117
117
 
118
- ### Prerequisites
118
+ ![RefChecker Web UI](assets/webui.png)
119
119
 
120
- - **Python 3.8+** with RefChecker installed (`pip install academic-refchecker[webui]`)
121
- - **Node.js 18+** and npm
120
+ ### Option 1: Install from PyPI (Recommended)
122
121
 
123
- ### Quick Start (Web UI)
122
+ The simplest way to run the Web UI is using the pip-installed package:
124
123
 
125
124
  ```bash
126
- # Install Python dependencies (if not already done)
125
+ # Install RefChecker with Web UI support
127
126
  pip install academic-refchecker[llm,webui]
128
127
 
129
- # Install Node.js dependencies
128
+ # Start the web server
129
+ refchecker-webui
130
+ ```
131
+
132
+ Then open **http://localhost:8000** in your browser.
133
+
134
+ The `refchecker-webui` command starts a complete web server with both the API backend and the pre-built frontend.
135
+
136
+ **Options:**
137
+ ```bash
138
+ refchecker-webui --port 8080 # Use a different port
139
+ refchecker-webui --host 0.0.0.0 # Allow external connections
140
+ ```
141
+
142
+ ### Option 2: Run from Cloned Repository (Development)
143
+
144
+ If you're developing or modifying the Web UI:
145
+
146
+ **Prerequisites:**
147
+ - **Python 3.8+** with dependencies installed
148
+ - **Node.js 18+** and npm
149
+
150
+ ```bash
151
+ # Clone the repository
152
+ git clone https://github.com/markrussinovich/refchecker.git
153
+ cd refchecker
154
+
155
+ # Install Python dependencies
156
+ pip install -e ".[llm,webui]"
157
+
158
+ # Install and run the frontend development server
130
159
  cd web-ui
131
160
  npm install # First time only
132
161
  npm start # Starts both backend and frontend
@@ -134,15 +163,14 @@ npm start # Starts both backend and frontend
134
163
 
135
164
  Then open **http://localhost:5173** in your browser.
136
165
 
137
- ### Alternative: Start Servers Separately
166
+ **Alternative: Start Servers Separately**
138
167
 
139
- **Terminal 1 - Backend:**
168
+ *Terminal 1 - Backend:*
140
169
  ```bash
141
- refchecker-webui --port 8000
142
- # Or: python -m uvicorn backend.main:app --port 8000
170
+ python -m uvicorn backend.main:app --reload --port 8000
143
171
  ```
144
172
 
145
- **Terminal 2 - Frontend:**
173
+ *Terminal 2 - Frontend:*
146
174
  ```bash
147
175
  cd web-ui
148
176
  npm run dev
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academic-refchecker
3
- Version: 1.2.67
3
+ Version: 1.2.69
4
4
  Summary: A comprehensive tool for validating reference accuracy in academic papers
5
5
  Author-email: Mark Russinovich <markrussinovich@hotmail.com>
6
6
  License-Expression: MIT
@@ -186,18 +186,47 @@ Learn about RefChecker's design philosophy and development process in this detai
186
186
 
187
187
  RefChecker also includes a modern web interface with real-time progress updates, check history, and export options.
188
188
 
189
- ### Prerequisites
189
+ ![RefChecker Web UI](assets/webui.png)
190
190
 
191
- - **Python 3.8+** with RefChecker installed (`pip install academic-refchecker[webui]`)
192
- - **Node.js 18+** and npm
191
+ ### Option 1: Install from PyPI (Recommended)
193
192
 
194
- ### Quick Start (Web UI)
193
+ The simplest way to run the Web UI is using the pip-installed package:
195
194
 
196
195
  ```bash
197
- # Install Python dependencies (if not already done)
196
+ # Install RefChecker with Web UI support
198
197
  pip install academic-refchecker[llm,webui]
199
198
 
200
- # Install Node.js dependencies
199
+ # Start the web server
200
+ refchecker-webui
201
+ ```
202
+
203
+ Then open **http://localhost:8000** in your browser.
204
+
205
+ The `refchecker-webui` command starts a complete web server with both the API backend and the pre-built frontend.
206
+
207
+ **Options:**
208
+ ```bash
209
+ refchecker-webui --port 8080 # Use a different port
210
+ refchecker-webui --host 0.0.0.0 # Allow external connections
211
+ ```
212
+
213
+ ### Option 2: Run from Cloned Repository (Development)
214
+
215
+ If you're developing or modifying the Web UI:
216
+
217
+ **Prerequisites:**
218
+ - **Python 3.8+** with dependencies installed
219
+ - **Node.js 18+** and npm
220
+
221
+ ```bash
222
+ # Clone the repository
223
+ git clone https://github.com/markrussinovich/refchecker.git
224
+ cd refchecker
225
+
226
+ # Install Python dependencies
227
+ pip install -e ".[llm,webui]"
228
+
229
+ # Install and run the frontend development server
201
230
  cd web-ui
202
231
  npm install # First time only
203
232
  npm start # Starts both backend and frontend
@@ -205,15 +234,14 @@ npm start # Starts both backend and frontend
205
234
 
206
235
  Then open **http://localhost:5173** in your browser.
207
236
 
208
- ### Alternative: Start Servers Separately
237
+ **Alternative: Start Servers Separately**
209
238
 
210
- **Terminal 1 - Backend:**
239
+ *Terminal 1 - Backend:*
211
240
  ```bash
212
- refchecker-webui --port 8000
213
- # Or: python -m uvicorn backend.main:app --port 8000
241
+ python -m uvicorn backend.main:app --reload --port 8000
214
242
  ```
215
243
 
216
- **Terminal 2 - Frontend:**
244
+ *Terminal 2 - Frontend:*
217
245
  ```bash
218
246
  cd web-ui
219
247
  npm run dev
@@ -171,18 +171,18 @@ class ProgressRefChecker:
171
171
  "is_warning": is_warning, # Preserve warning_type as warning flag
172
172
  })
173
173
 
174
- # Determine status - items with warning_type or certain error types are warnings, not errors
175
- warning_types = ['year', 'venue', 'author']
174
+ # Determine status - items originally from warning_type are warnings, items from error_type are errors
176
175
  # Items originally from info_type are suggestions, not errors
177
176
  # Items originally from warning_type are warnings, not errors
177
+ # Items with error_type (including year/venue/author when missing) are errors
178
178
  has_errors = any(
179
- e.get('error_type') not in ['unverified', 'info'] + warning_types
179
+ e.get('error_type') not in ['unverified', 'info']
180
180
  and not e.get('is_suggestion')
181
181
  and not e.get('is_warning')
182
182
  for e in sanitized
183
183
  )
184
184
  has_warnings = any(
185
- (e.get('error_type') in warning_types or e.get('is_warning'))
185
+ e.get('is_warning')
186
186
  and not e.get('is_suggestion')
187
187
  for e in sanitized
188
188
  )
@@ -252,8 +252,8 @@ class ProgressRefChecker:
252
252
  "suggestion_type": err.get('error_type') or 'info',
253
253
  "suggestion_details": err.get('error_details', '')
254
254
  })
255
- elif err.get('is_warning') or err.get('error_type') in ['year', 'venue', 'author']:
256
- # Items with is_warning flag or known warning types go to warnings
255
+ elif err.get('is_warning'):
256
+ # Only items with is_warning flag (originally warning_type) go to warnings
257
257
  formatted_warnings.append(err_obj)
258
258
  elif err.get('error_type') == 'unverified':
259
259
  formatted_errors.append({**err_obj, "error_type": 'unverified'})
@@ -0,0 +1,3 @@
1
+ """Version information for RefChecker."""
2
+
3
+ __version__ = "1.2.69"
@@ -36,7 +36,7 @@ import os
36
36
  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
37
37
 
38
38
  from refchecker.utils.doi_utils import extract_doi_from_url, compare_dois, construct_doi_url
39
- from refchecker.utils.error_utils import create_author_error, create_year_warning, create_doi_error
39
+ from refchecker.utils.error_utils import create_author_error, create_doi_error
40
40
  from refchecker.utils.text_utils import normalize_author_name, normalize_paper_title, is_name_match, compare_authors, calculate_title_similarity
41
41
  from refchecker.utils.url_utils import extract_arxiv_id_from_url, get_best_available_url
42
42
  from refchecker.utils.db_utils import process_semantic_scholar_result, process_semantic_scholar_results
@@ -432,20 +432,24 @@ class LocalNonArxivReferenceChecker:
432
432
 
433
433
  # Verify year (with tolerance)
434
434
  paper_year = paper_data.get('year')
435
- if year and paper_year:
436
- # Get year tolerance from config (default to 1 if not available)
437
- year_tolerance = 1 # Default tolerance
438
- try:
439
- from config.settings import get_config
440
- config = get_config()
441
- year_tolerance = config.get('text_processing', {}).get('year_tolerance', 1)
442
- except (ImportError, Exception):
443
- pass # Use default if config not available
444
-
445
- # Only flag as mismatch if the difference is greater than tolerance
446
- if abs(year - paper_year) > year_tolerance:
447
- logger.debug(f"Local DB: Year mismatch - cited: {year}, actual: {paper_year}")
448
- errors.append(create_year_warning(year, paper_year))
435
+ # Get year tolerance from config (default to 1 if not available)
436
+ year_tolerance = 1 # Default tolerance
437
+ try:
438
+ from config.settings import get_config
439
+ config = get_config()
440
+ year_tolerance = config.get('text_processing', {}).get('year_tolerance', 1)
441
+ except (ImportError, Exception):
442
+ pass # Use default if config not available
443
+
444
+ from refchecker.utils.error_utils import validate_year
445
+ year_warning = validate_year(
446
+ cited_year=year,
447
+ paper_year=paper_year,
448
+ year_tolerance=year_tolerance
449
+ )
450
+ if year_warning:
451
+ logger.debug(f"Local DB: Year issue - {year_warning.get('warning_details', '')}")
452
+ errors.append(year_warning)
449
453
 
450
454
  # Verify DOI
451
455
  paper_doi = None
@@ -511,29 +511,25 @@ class NonArxivReferenceChecker:
511
511
 
512
512
  # Verify year using flexible validation
513
513
  paper_year = paper_data.get('year')
514
- if year and paper_year:
515
- # Check if we have an exact ArXiv ID match for additional context
516
- arxiv_id_match = False
517
- if url and 'arxiv.org/abs/' in url:
518
- arxiv_match = re.search(r'arxiv\.org/abs/([^\s/?#]+)', url)
519
- if arxiv_match:
520
- cited_arxiv_id = arxiv_match.group(1)
521
- external_ids = paper_data.get('externalIds', {})
522
- found_arxiv_id = external_ids.get('ArXiv')
523
- arxiv_id_match = (cited_arxiv_id == found_arxiv_id)
524
-
525
- # Use flexible year validation
526
- from refchecker.utils.text_utils import is_year_substantially_different
527
- context = {'arxiv_match': arxiv_id_match}
528
- is_different, warning_message = is_year_substantially_different(year, paper_year, context)
529
-
530
- if is_different and warning_message:
531
- from refchecker.utils.error_utils import format_year_mismatch
532
- errors.append({
533
- 'warning_type': 'year',
534
- 'warning_details': format_year_mismatch(year, paper_year),
535
- 'ref_year_correct': paper_year
536
- })
514
+ # Check if we have an exact ArXiv ID match for additional context
515
+ arxiv_id_match = False
516
+ if url and 'arxiv.org/abs/' in url:
517
+ arxiv_match = re.search(r'arxiv\.org/abs/([^\s/?#]+)', url)
518
+ if arxiv_match:
519
+ cited_arxiv_id = arxiv_match.group(1)
520
+ external_ids = paper_data.get('externalIds', {})
521
+ found_arxiv_id = external_ids.get('ArXiv')
522
+ arxiv_id_match = (cited_arxiv_id == found_arxiv_id)
523
+
524
+ from refchecker.utils.error_utils import validate_year
525
+ year_warning = validate_year(
526
+ cited_year=year,
527
+ paper_year=paper_year,
528
+ use_flexible_validation=True,
529
+ context={'arxiv_match': arxiv_id_match}
530
+ )
531
+ if year_warning:
532
+ errors.append(year_warning)
537
533
 
538
534
  # Verify venue
539
535
  cited_venue = reference.get('journal', '') or reference.get('venue', '')
@@ -573,10 +569,10 @@ class NonArxivReferenceChecker:
573
569
  from refchecker.utils.error_utils import create_venue_warning
574
570
  errors.append(create_venue_warning(cited_venue, paper_venue))
575
571
  elif not cited_venue and paper_venue:
576
- # Reference has no venue but paper has one - always warn about missing venue
572
+ # Reference has no venue but paper has one - error for missing venue
577
573
  errors.append({
578
- 'warning_type': 'venue',
579
- 'warning_details': f"Venue missing: should include '{paper_venue}'",
574
+ 'error_type': 'venue',
575
+ 'error_details': f"Venue missing: should include '{paper_venue}'",
580
576
  'ref_venue_correct': paper_venue
581
577
  })
582
578
 
@@ -1928,25 +1928,24 @@ class ArxivReferenceChecker:
1928
1928
 
1929
1929
  # Verify year (with tolerance)
1930
1930
  paper_year = paper_data.get('year')
1931
- if year and paper_year:
1932
- # Get year tolerance from config (default to 1 if not available)
1933
- year_tolerance = 1 # Default tolerance
1934
- try:
1935
- from config.settings import get_config
1936
- config = get_config()
1937
- year_tolerance = config.get('text_processing', {}).get('year_tolerance', 1)
1938
- except (ImportError, Exception):
1939
- pass # Use default if config not available
1940
-
1941
- # Only flag as mismatch if the difference is greater than tolerance
1942
- if abs(year - paper_year) > year_tolerance:
1943
- logger.debug(f"DB Verification: Year mismatch - cited: {year}, actual: {paper_year}")
1944
- from refchecker.utils.error_utils import format_year_mismatch
1945
- errors.append({
1946
- 'warning_type': 'year',
1947
- 'warning_details': format_year_mismatch(year, paper_year),
1948
- 'ref_year_correct': paper_year
1949
- })
1931
+ # Get year tolerance from config (default to 1 if not available)
1932
+ year_tolerance = 1 # Default tolerance
1933
+ try:
1934
+ from config.settings import get_config
1935
+ config = get_config()
1936
+ year_tolerance = config.get('text_processing', {}).get('year_tolerance', 1)
1937
+ except (ImportError, Exception):
1938
+ pass # Use default if config not available
1939
+
1940
+ from refchecker.utils.error_utils import validate_year
1941
+ year_warning = validate_year(
1942
+ cited_year=year,
1943
+ paper_year=paper_year,
1944
+ year_tolerance=year_tolerance
1945
+ )
1946
+ if year_warning:
1947
+ logger.debug(f"DB Verification: Year issue - {year_warning.get('warning_details', '')}")
1948
+ errors.append(year_warning)
1950
1949
 
1951
1950
  # Verify DOI
1952
1951
  if doi and external_ids.get('DOI'):
@@ -124,6 +124,67 @@ def create_year_warning(cited_year: int, correct_year: int) -> Dict[str, Any]:
124
124
  }
125
125
 
126
126
 
127
+ def create_year_missing_error(correct_year: int) -> Dict[str, Any]:
128
+ """
129
+ Create a standardized error for missing year in reference.
130
+
131
+ Args:
132
+ correct_year: Correct year from database
133
+
134
+ Returns:
135
+ Standardized error dictionary
136
+ """
137
+ return {
138
+ 'error_type': 'year',
139
+ 'error_details': f"Year missing: should include '{correct_year}'",
140
+ 'ref_year_correct': correct_year
141
+ }
142
+
143
+
144
+ def validate_year(cited_year: Optional[int], paper_year: Optional[int],
145
+ year_tolerance: int = 1, use_flexible_validation: bool = False,
146
+ context: Optional[Dict[str, Any]] = None) -> Optional[Dict[str, Any]]:
147
+ """
148
+ Validate year field and return appropriate warning if needed.
149
+
150
+ This function handles:
151
+ - Year mismatch (with configurable tolerance)
152
+ - Missing year in reference
153
+
154
+ Args:
155
+ cited_year: Year as cited in the reference (may be None)
156
+ paper_year: Correct year from database/API (may be None)
157
+ year_tolerance: Maximum allowed difference between years (default 1)
158
+ use_flexible_validation: If True, use is_year_substantially_different for more context-aware checking
159
+ context: Optional context dict for flexible validation (e.g., {'arxiv_match': True})
160
+
161
+ Returns:
162
+ Warning dictionary if year issue found, None otherwise
163
+ """
164
+ if not paper_year:
165
+ # Can't validate without a known correct year
166
+ return None
167
+
168
+ if cited_year and paper_year:
169
+ if use_flexible_validation:
170
+ # Use the more sophisticated validation from text_utils
171
+ from refchecker.utils.text_utils import is_year_substantially_different
172
+ is_different, warning_message = is_year_substantially_different(
173
+ cited_year, paper_year, context or {}
174
+ )
175
+ if is_different and warning_message:
176
+ return create_year_warning(cited_year, paper_year)
177
+ else:
178
+ # Simple tolerance-based validation
179
+ if abs(cited_year - paper_year) > year_tolerance:
180
+ return create_year_warning(cited_year, paper_year)
181
+ elif not cited_year and paper_year:
182
+ # Reference is missing a year but paper has one
183
+ return create_year_missing_error(paper_year)
184
+
185
+ return None
186
+
187
+
127
188
  def create_doi_error(cited_doi: str, correct_doi: str) -> Optional[Dict[str, str]]:
128
189
  """
129
190
  Create a standardized DOI error or warning dictionary.
@@ -220,8 +281,8 @@ def create_venue_warning(cited_venue: str, correct_venue: str) -> Dict[str, str]
220
281
  # If cited venue cleans to empty, treat as missing venue instead of mismatch
221
282
  if not clean_cited and clean_correct:
222
283
  return {
223
- 'warning_type': 'venue',
224
- 'warning_details': format_missing_venue(clean_correct),
284
+ 'error_type': 'venue',
285
+ 'error_details': format_missing_venue(clean_correct),
225
286
  'ref_venue_correct': correct_venue
226
287
  }
227
288
 
@@ -1,5 +0,0 @@
1
- """Version information for RefChecker."""
2
-
3
- __version__ = "1.2.67"
4
-
5
- __version__ = "1.2.67"