academic-refchecker 1.2.53__py3-none-any.whl → 1.2.55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {academic_refchecker-1.2.53.dist-info → academic_refchecker-1.2.55.dist-info}/METADATA +23 -23
  2. academic_refchecker-1.2.55.dist-info/RECORD +49 -0
  3. academic_refchecker-1.2.55.dist-info/entry_points.txt +2 -0
  4. academic_refchecker-1.2.55.dist-info/top_level.txt +1 -0
  5. refchecker/__init__.py +13 -0
  6. refchecker/__main__.py +11 -0
  7. refchecker/__version__.py +5 -0
  8. {checkers → refchecker/checkers}/crossref.py +5 -5
  9. {checkers → refchecker/checkers}/enhanced_hybrid_checker.py +1 -1
  10. {checkers → refchecker/checkers}/github_checker.py +4 -4
  11. {checkers → refchecker/checkers}/local_semantic_scholar.py +7 -7
  12. {checkers → refchecker/checkers}/openalex.py +6 -6
  13. {checkers → refchecker/checkers}/openreview_checker.py +8 -8
  14. {checkers → refchecker/checkers}/pdf_paper_checker.py +1 -1
  15. {checkers → refchecker/checkers}/semantic_scholar.py +10 -10
  16. {checkers → refchecker/checkers}/webpage_checker.py +3 -3
  17. {core → refchecker/core}/parallel_processor.py +6 -6
  18. {core → refchecker/core}/refchecker.py +63 -63
  19. {utils → refchecker/utils}/arxiv_utils.py +3 -3
  20. {utils → refchecker/utils}/biblatex_parser.py +4 -4
  21. {utils → refchecker/utils}/bibliography_utils.py +5 -5
  22. {utils → refchecker/utils}/bibtex_parser.py +5 -5
  23. {utils → refchecker/utils}/error_utils.py +1 -1
  24. {utils → refchecker/utils}/text_utils.py +62 -13
  25. __version__.py +0 -3
  26. academic_refchecker-1.2.53.dist-info/RECORD +0 -47
  27. academic_refchecker-1.2.53.dist-info/entry_points.txt +0 -2
  28. academic_refchecker-1.2.53.dist-info/top_level.txt +0 -9
  29. {academic_refchecker-1.2.53.dist-info → academic_refchecker-1.2.55.dist-info}/WHEEL +0 -0
  30. {academic_refchecker-1.2.53.dist-info → academic_refchecker-1.2.55.dist-info}/licenses/LICENSE +0 -0
  31. {checkers → refchecker/checkers}/__init__.py +0 -0
  32. {config → refchecker/config}/__init__.py +0 -0
  33. {config → refchecker/config}/logging.conf +0 -0
  34. {config → refchecker/config}/settings.py +0 -0
  35. {core → refchecker/core}/__init__.py +0 -0
  36. {core → refchecker/core}/db_connection_pool.py +0 -0
  37. {database → refchecker/database}/__init__.py +0 -0
  38. {database → refchecker/database}/download_semantic_scholar_db.py +0 -0
  39. {llm → refchecker/llm}/__init__.py +0 -0
  40. {llm → refchecker/llm}/base.py +0 -0
  41. {llm → refchecker/llm}/providers.py +0 -0
  42. {scripts → refchecker/scripts}/__init__.py +0 -0
  43. {scripts → refchecker/scripts}/start_vllm_server.py +0 -0
  44. {services → refchecker/services}/__init__.py +0 -0
  45. {services → refchecker/services}/pdf_processor.py +0 -0
  46. {utils → refchecker/utils}/__init__.py +0 -0
  47. {utils → refchecker/utils}/author_utils.py +0 -0
  48. {utils → refchecker/utils}/config_validator.py +0 -0
  49. {utils → refchecker/utils}/db_utils.py +0 -0
  50. {utils → refchecker/utils}/doi_utils.py +0 -0
  51. {utils → refchecker/utils}/mock_objects.py +0 -0
  52. {utils → refchecker/utils}/unicode_utils.py +0 -0
  53. {utils → refchecker/utils}/url_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academic-refchecker
3
- Version: 1.2.53
3
+ Version: 1.2.55
4
4
  Summary: A comprehensive tool for validating reference accuracy in academic papers
5
5
  Author-email: Mark Russinovich <markrussinovich@hotmail.com>
6
6
  License-Expression: MIT
@@ -156,17 +156,17 @@ Learn about RefChecker's design philosophy and development process in this detai
156
156
 
157
157
  1. **Check a famous paper:**
158
158
  ```bash
159
- python refchecker.py --paper 1706.03762
159
+ python run_refchecker.py --paper 1706.03762
160
160
  ```
161
161
 
162
162
  2. **Check your own PDF:**
163
163
  ```bash
164
- python refchecker.py --paper /path/to/your/paper.pdf
164
+ python run_refchecker.py --paper /path/to/your/paper.pdf
165
165
  ```
166
166
 
167
167
  3. **For faster processing with local database** (see [Local Database Setup](#local-database-setup)):
168
168
  ```bash
169
- python refchecker.py --paper 1706.03762 --db-path semantic_scholar_db/semantic_scholar.db
169
+ python run_refchecker.py --paper 1706.03762 --db-path semantic_scholar_db/semantic_scholar.db
170
170
  ```
171
171
 
172
172
  > **⚡ Performance Tip**: Reference verification takes 5-10 seconds per reference without a Semantic Scholar API key due to rate limiting. With an API key, verification speeds up to 1-2 seconds per reference. Set `SEMANTIC_SCHOLAR_API_KEY` environment variable or use `--semantic-scholar-api-key` for faster processing.
@@ -192,13 +192,13 @@ RefChecker supports AI-powered bibliography parsing using Large Language Models
192
192
  export REFCHECKER_LLM_PROVIDER=anthropic
193
193
  export ANTHROPIC_API_KEY=your_api_key_here
194
194
 
195
- python refchecker.py --paper 1706.03762
195
+ python run_refchecker.py --paper 1706.03762
196
196
  ```
197
197
 
198
198
  2. **Using Command Line Arguments**:
199
199
  ```bash
200
200
  # Enable LLM with specific provider and model
201
- python refchecker.py --paper 1706.03762 \
201
+ python run_refchecker.py --paper 1706.03762 \
202
202
  --llm-provider anthropic \
203
203
  --llm-model claude-sonnet-4-20250514 \
204
204
  ```
@@ -211,7 +211,7 @@ RefChecker supports AI-powered bibliography parsing using Large Language Models
211
211
  With `OPENAI_API_KEY` environment variable:
212
212
 
213
213
  ```bash
214
- python refchecker.py --paper /path/to/paper.pdf \
214
+ python run_refchecker.py --paper /path/to/paper.pdf \
215
215
  --llm-provider openai \
216
216
  --llm-model gpt-4o \
217
217
  ```
@@ -221,7 +221,7 @@ python refchecker.py --paper /path/to/paper.pdf \
221
221
  With `ANTHROPIC_API_KEY` environment variable:
222
222
 
223
223
  ```bash
224
- python refchecker.py --paper https://arxiv.org/abs/1706.03762 \
224
+ python run_refchecker.py --paper https://arxiv.org/abs/1706.03762 \
225
225
  --llm-provider anthropic \
226
226
  --llm-model claude-sonnet-4-20250514 \
227
227
  ```
@@ -229,7 +229,7 @@ python refchecker.py --paper https://arxiv.org/abs/1706.03762 \
229
229
  #### Google Gemini
230
230
 
231
231
  ```bash
232
- python refchecker.py --paper paper.tex \
232
+ python run_refchecker.py --paper paper.tex \
233
233
  --llm-provider google \
234
234
  --llm-model gemini-2.5-flash
235
235
  ```
@@ -237,7 +237,7 @@ python refchecker.py --paper paper.tex \
237
237
  #### Azure OpenAI
238
238
 
239
239
  ```bash
240
- python refchecker.py --paper paper.txt \
240
+ python run_refchecker.py --paper paper.txt \
241
241
  --llm-provider azure \
242
242
  --llm-model gpt-4 \
243
243
  --llm-endpoint https://your-resource.openai.azure.com/
@@ -249,7 +249,7 @@ For running models locally:
249
249
 
250
250
  ```bash
251
251
  # automatic Huggingface model download with VLLM server launch
252
- python refchecker.py --paper paper.pdf \
252
+ python run_refchecker.py --paper paper.pdf \
253
253
  --llm-provider vllm \
254
254
  --llm-model meta-llama/Llama-3.1-8B-Instruct
255
255
  ```
@@ -319,43 +319,43 @@ Check papers in various formats and online locations:
319
319
 
320
320
  ```bash
321
321
  # Check a specific ArXiv paper by ID
322
- python refchecker.py --paper 1706.03762
322
+ python run_refchecker.py --paper 1706.03762
323
323
 
324
324
  # Check by ArXiv URL
325
- python refchecker.py --paper https://arxiv.org/abs/1706.03762
325
+ python run_refchecker.py --paper https://arxiv.org/abs/1706.03762
326
326
 
327
327
  # Check by ArXiv PDF URL
328
- python refchecker.py --paper https://arxiv.org/pdf/1706.03762.pdf
328
+ python run_refchecker.py --paper https://arxiv.org/pdf/1706.03762.pdf
329
329
  ```
330
330
 
331
331
  #### Local PDF Files
332
332
 
333
333
  ```bash
334
334
  # Check a local PDF file
335
- python refchecker.py --paper /path/to/your/paper.pdf
335
+ python run_refchecker.py --paper /path/to/your/paper.pdf
336
336
 
337
337
  # Check with offline database for faster processing
338
- python refchecker.py --paper /path/to/your/paper.pdf --db-path semantic_scholar_db/semantic_scholar.db
338
+ python run_refchecker.py --paper /path/to/your/paper.pdf --db-path semantic_scholar_db/semantic_scholar.db
339
339
  ```
340
340
 
341
341
  #### LaTeX Files
342
342
 
343
343
  ```bash
344
344
  # Check a LaTeX document
345
- python refchecker.py --paper /path/to/your/paper.tex
345
+ python run_refchecker.py --paper /path/to/your/paper.tex
346
346
 
347
347
  # Check with debug mode for detailed processing info
348
- python refchecker.py --paper /path/to/your/paper.tex --debug
348
+ python run_refchecker.py --paper /path/to/your/paper.tex --debug
349
349
  ```
350
350
 
351
351
  #### Text Files
352
352
 
353
353
  ```bash
354
354
  # Check a plain text file containing paper content
355
- python refchecker.py --paper /path/to/your/paper.txt
355
+ python run_refchecker.py --paper /path/to/your/paper.txt
356
356
 
357
357
  # Combine with local database for offline verification
358
- python refchecker.py --paper /path/to/your/paper.txt --db-path semantic_scholar_db/semantic_scholar.db
358
+ python run_refchecker.py --paper /path/to/your/paper.txt --db-path semantic_scholar_db/semantic_scholar.db
359
359
  ```
360
360
 
361
361
 
@@ -367,10 +367,10 @@ By default, no files are generated. To save detailed results, use the `--output-
367
367
 
368
368
  ```bash
369
369
  # Save to default filename (reference_errors.txt)
370
- python refchecker.py --paper 1706.03762 --output-file
370
+ python run_refchecker.py --paper 1706.03762 --output-file
371
371
 
372
372
  # Save to custom filename
373
- python refchecker.py --paper 1706.03762 --output-file my_errors.txt
373
+ python run_refchecker.py --paper 1706.03762 --output-file my_errors.txt
374
374
  ```
375
375
 
376
376
  The output file contains a detailed report of references with errors and warnings, including corrected references.
@@ -574,7 +574,7 @@ python download_semantic_scholar_db.py \
574
574
 
575
575
  ## 🧪 Testing
576
576
 
577
- RefChecker includes a comprehensive test suite with 124 tests covering unit, integration, and end-to-end scenarios. The tests ensure reliability across all components and provide examples of how to use the system.
577
+ RefChecker includes a comprehensive test suite with **490+ tests** covering unit, integration, and end-to-end scenarios. The tests ensure reliability across all components and provide examples of how to use the system.
578
578
 
579
579
  ### Quick Test Run
580
580
 
@@ -0,0 +1,49 @@
1
+ academic_refchecker-1.2.55.dist-info/licenses/LICENSE,sha256=Kwrx3fePVCeEFDCZvCW4OuoTNBiSoYbpGBI6qzGhWF0,1067
2
+ refchecker/__init__.py,sha256=Pg5MrtLxDBRcNYcI02N-bv3tzURVd1S3nQ8IyF7Zw7E,322
3
+ refchecker/__main__.py,sha256=agBbT9iKN0g2xXtRNCoh29Nr7z2n5vU-r0MCVJKi4tI,232
4
+ refchecker/__version__.py,sha256=Xg3VGFPggqe_vQC3vz8fNW_FXpXAhzc4wLE7rwOBHjw,89
5
+ refchecker/checkers/__init__.py,sha256=T0PAHTFt6UiGvn-WGoJU8CdhXNmf6zaHmcGVoWHhmJQ,533
6
+ refchecker/checkers/crossref.py,sha256=5BeSCK8K_S_-iwgQaNAbxZGNsxaxOyBzUQ3AD0Rc6nU,20433
7
+ refchecker/checkers/enhanced_hybrid_checker.py,sha256=c5I_h8w6xD7XkBNkbneffeAnrO8B-uXH99edWBJvDMo,27788
8
+ refchecker/checkers/github_checker.py,sha256=YJ2sLj22qezw3uWjA0jhtDO0fOW4HUwcVbv2DQ4LjR0,14277
9
+ refchecker/checkers/local_semantic_scholar.py,sha256=nrAJhm0VNEl4RwJWAEOGNCRE31h7CneLc9zSqY5zrHY,21092
10
+ refchecker/checkers/openalex.py,sha256=omMQbZOnkDndMJSl9SQVtiETzpv1w1pt93YjlFTq8WA,19616
11
+ refchecker/checkers/openreview_checker.py,sha256=0IHZe4Nscy8fle28rmhy1hhsofR5g0FFSakk8FFH_0A,40540
12
+ refchecker/checkers/pdf_paper_checker.py,sha256=lrg09poNJBz9FNMrUoEjQ6CJbdYZAVANw0bCaTSb5oo,19904
13
+ refchecker/checkers/semantic_scholar.py,sha256=CCrOMdOCfazX8bkikU209dz0xsV_xkeeYcmxO-K9-6I,35072
14
+ refchecker/checkers/webpage_checker.py,sha256=A_d5kg3OOsyliC00OVq_l0J-RJ4Ln7hUoURk21aO2fs,43653
15
+ refchecker/config/__init__.py,sha256=r7sONsX2-ITviUJRU1KEz76uAuTRqZlzU-TVkvFRGYY,15
16
+ refchecker/config/logging.conf,sha256=r1tP0ApLHtlz7rV-oKS1MVO7oXJOgahbZFTtYmKnf9U,687
17
+ refchecker/config/settings.py,sha256=-vODFoXbWbGPUElpmchE5zbCj_n4Vtxr8HU1hQDFp_c,6164
18
+ refchecker/core/__init__.py,sha256=1T2MSQyDk0u_PupbHvm4CvNNN--dxsw78fqKUrqoYrM,157
19
+ refchecker/core/db_connection_pool.py,sha256=XRiOdehikkSz3obH4WKgf8woa3694if50Q15rBT-4XQ,4697
20
+ refchecker/core/parallel_processor.py,sha256=HpVFEMwPBiP2FRjvGqlaXpjV5S0qP-hxdB_Wdl_lACo,17704
21
+ refchecker/core/refchecker.py,sha256=IAxetrSC0Z7EzVR5coIL2g8MqhWlsZzQiDnceDE4_uc,287102
22
+ refchecker/database/__init__.py,sha256=mEuVHlEBuS44t_2ZT_JnvQQrlRCjo1SJq1NmaJ6r8OY,125
23
+ refchecker/database/download_semantic_scholar_db.py,sha256=waN4I97KC_36YMiPbiBDUUmgfzu1nub5yeKdAsIR2aw,75276
24
+ refchecker/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
+ refchecker/llm/base.py,sha256=uMF-KOqZ9ZQ7rccOQLpKJiW9sEMMxr7ePXBSF0yYDJY,16782
26
+ refchecker/llm/providers.py,sha256=A0usJpprCO5D-VX0hqaQzBfi4DG3rdjA39vu02XJsGw,40092
27
+ refchecker/scripts/__init__.py,sha256=xJwo6afG8s7S888BK2Bxw2d7FX8aLkbl0l_ZoJOFibE,37
28
+ refchecker/scripts/start_vllm_server.py,sha256=ZepWp2y2cKFW0Kgsoima2RbmF02fTU29UFcLLpsBhFU,4213
29
+ refchecker/services/__init__.py,sha256=jGi9S74Msak3YR-C4Qb68VU7HB4oLaX9o1rlVAFpOFI,187
30
+ refchecker/services/pdf_processor.py,sha256=vu_JnhFGZY6jFVbDbPvG-mlQojvB-3Dzc8_946KVV2E,9427
31
+ refchecker/utils/__init__.py,sha256=1RrGoIIn1_gVzxd56b6a7HeAS-wu7uDP-nxLbR3fJ-8,1199
32
+ refchecker/utils/arxiv_utils.py,sha256=idlCzkTApYwH-kdTiH9nrfo4GMmwdtUAv7cAGtoEG-0,19799
33
+ refchecker/utils/author_utils.py,sha256=DLTo1xsxef2wxoe4s_MWrh36maj4fgnvFlsDLpDE-qQ,5507
34
+ refchecker/utils/biblatex_parser.py,sha256=IKRUMtRsjdXIktyk9XGArt_ms0asmqP549uhFvvumuE,25581
35
+ refchecker/utils/bibliography_utils.py,sha256=d6kqDOQou_PX6WQkOzrGyN5GpzaOjhu54w9wGfBRQZw,11760
36
+ refchecker/utils/bibtex_parser.py,sha256=xY0dEqT8lBZF-W21YRpG28lp_F2ikLan7nK70WiCU2o,15286
37
+ refchecker/utils/config_validator.py,sha256=rxf7K3DYmJ-BNPsmtaCNipY2BTVT-pJZ7wN-M9Y3GC8,11167
38
+ refchecker/utils/db_utils.py,sha256=_wSupfBlm0ILFvntQTvoj7tLDCbrYPRQrp9NDvphF_E,6281
39
+ refchecker/utils/doi_utils.py,sha256=ezUiRnYRpoO0U_Rqgxv1FxqmeTwPh6X8gLgSDbqg5sY,4874
40
+ refchecker/utils/error_utils.py,sha256=Mm4ZqP_4FCRT9x4J_2IKSIAKRWaGLym-wbZqhj1wYzc,12512
41
+ refchecker/utils/mock_objects.py,sha256=QxU-UXyHSY27IZYN8Sb8ei0JtNkpGSdMXoErrRLHXvE,6437
42
+ refchecker/utils/text_utils.py,sha256=d_X4r1nVvkL7i0DhxfLaVK3CzbMP2oZvqX3kxfDudQw,220978
43
+ refchecker/utils/unicode_utils.py,sha256=-WBKarXO756p7fd7gCeNsMag4ztDNURwFX5IVniOtwY,10366
44
+ refchecker/utils/url_utils.py,sha256=HdxIO8QvciP6Jp8Wd4sTSrS8JQrOMwgM7pxdUC8RJb4,9176
45
+ academic_refchecker-1.2.55.dist-info/METADATA,sha256=q0soTYffNjsEJBWQVvIXMGqqVeQn-1ayHP4EOhTFDvk,23345
46
+ academic_refchecker-1.2.55.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
47
+ academic_refchecker-1.2.55.dist-info/entry_points.txt,sha256=kG6k2JwFIRvmKe0oZTr2RYStyfl79BirJxyaO6kjIxA,72
48
+ academic_refchecker-1.2.55.dist-info/top_level.txt,sha256=ZdIg_PFHiATpVT5Uvp4L17Q0d8mk8ZBsINXKf1tE0bo,11
49
+ academic_refchecker-1.2.55.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ academic-refchecker = refchecker.core.refchecker:main
@@ -0,0 +1 @@
1
+ refchecker
refchecker/__init__.py ADDED
@@ -0,0 +1,13 @@
1
+ """
2
+ RefChecker - Academic Paper Reference Validation Tool
3
+
4
+ A comprehensive tool for validating reference accuracy in academic papers.
5
+ """
6
+
7
+ __version__ = "1.2.1"
8
+ __author__ = "RefChecker Team"
9
+ __email__ = "markrussinovich@hotmail.com"
10
+
11
+ from .core.refchecker import ArxivReferenceChecker
12
+
13
+ __all__ = ["ArxivReferenceChecker"]
refchecker/__main__.py ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Allow running refchecker as a module: python -m refchecker
4
+
5
+ This provides a clean entry point without import collision issues.
6
+ """
7
+
8
+ from .core.refchecker import main
9
+
10
+ if __name__ == "__main__":
11
+ main()
@@ -0,0 +1,5 @@
1
+ """Version information for RefChecker."""
2
+
3
+ __version__ = "1.2.55"
4
+
5
+ __version__ = "1.2.55"
@@ -30,9 +30,9 @@ import logging
30
30
  import re
31
31
  from typing import Dict, List, Tuple, Optional, Any, Union
32
32
  from urllib.parse import quote_plus
33
- from utils.text_utils import normalize_text, clean_title_basic, find_best_match, is_name_match, compare_authors, clean_title_for_search
34
- from utils.error_utils import format_year_mismatch, format_doi_mismatch
35
- from config.settings import get_config
33
+ from refchecker.utils.text_utils import normalize_text, clean_title_basic, find_best_match, is_name_match, compare_authors, clean_title_for_search
34
+ from refchecker.utils.error_utils import format_year_mismatch, format_doi_mismatch
35
+ from refchecker.config.settings import get_config
36
36
 
37
37
  # Set up logging
38
38
  logger = logging.getLogger(__name__)
@@ -358,7 +358,7 @@ class CrossRefReferenceChecker:
358
358
  # Check for DOI
359
359
  doi = work_data.get('DOI')
360
360
  if doi:
361
- from utils.doi_utils import construct_doi_url
361
+ from refchecker.utils.doi_utils import construct_doi_url
362
362
  doi_url = construct_doi_url(doi)
363
363
  logger.debug(f"Generated DOI URL: {doi_url}")
364
364
  return doi_url
@@ -487,7 +487,7 @@ class CrossRefReferenceChecker:
487
487
  work_doi = work_data.get('DOI')
488
488
  if doi and work_doi:
489
489
  # Compare DOIs using the proper comparison function
490
- from utils.doi_utils import compare_dois
490
+ from refchecker.utils.doi_utils import compare_dois
491
491
  if not compare_dois(doi, work_doi):
492
492
  errors.append({
493
493
  'error_type': 'doi',
@@ -542,7 +542,7 @@ class EnhancedHybridReferenceChecker:
542
542
  """
543
543
  Compare author lists (delegates to shared utility)
544
544
  """
545
- from utils.text_utils import compare_authors
545
+ from refchecker.utils.text_utils import compare_authors
546
546
  return compare_authors(cited_authors, correct_authors)
547
547
 
548
548
  # Backward compatibility alias
@@ -5,7 +5,7 @@ import re
5
5
  import logging
6
6
  from urllib.parse import urlparse
7
7
  from typing import Dict, Optional, Tuple, List, Any
8
- from utils.text_utils import strip_latex_commands
8
+ from refchecker.utils.text_utils import strip_latex_commands
9
9
 
10
10
  logger = logging.getLogger(__name__)
11
11
 
@@ -170,7 +170,7 @@ class GitHubChecker:
170
170
  if cited_title:
171
171
  title_match = self._check_title_match(cited_title, actual_name, actual_description)
172
172
  if not title_match:
173
- from utils.error_utils import format_title_mismatch
173
+ from refchecker.utils.error_utils import format_title_mismatch
174
174
  # Clean the cited title for display (remove LaTeX commands like {LLM}s -> LLMs)
175
175
  clean_cited_title = strip_latex_commands(cited_title)
176
176
  details = format_title_mismatch(clean_cited_title, actual_name)
@@ -188,7 +188,7 @@ class GitHubChecker:
188
188
  author_str = ', '.join(cited_authors) if isinstance(cited_authors, list) else str(cited_authors)
189
189
  author_match = self._check_author_match(author_str, actual_owner, actual_owner_name)
190
190
  if not author_match:
191
- from utils.error_utils import format_three_line_mismatch
191
+ from refchecker.utils.error_utils import format_three_line_mismatch
192
192
  left = author_str
193
193
  right = f"{actual_owner} ({actual_owner_name})" if actual_owner_name else actual_owner
194
194
  details = format_three_line_mismatch("Author mismatch", left, right)
@@ -203,7 +203,7 @@ class GitHubChecker:
203
203
  try:
204
204
  cited_year_int = int(cited_year)
205
205
  if cited_year_int < creation_year:
206
- from utils.error_utils import format_year_mismatch
206
+ from refchecker.utils.error_utils import format_year_mismatch
207
207
  errors.append({
208
208
  "warning_type": "year",
209
209
  "warning_details": format_year_mismatch(cited_year, creation_year),
@@ -35,12 +35,12 @@ import sys
35
35
  import os
36
36
  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
37
37
 
38
- from utils.doi_utils import extract_doi_from_url, compare_dois, construct_doi_url
39
- from utils.error_utils import create_author_error, create_year_warning, create_doi_error
40
- from utils.text_utils import normalize_author_name, normalize_paper_title, is_name_match, compare_authors, calculate_title_similarity, extract_arxiv_id_from_url
41
- from utils.db_utils import process_semantic_scholar_result, process_semantic_scholar_results
42
- from utils.url_utils import get_best_available_url
43
- from config.settings import get_config
38
+ from refchecker.utils.doi_utils import extract_doi_from_url, compare_dois, construct_doi_url
39
+ from refchecker.utils.error_utils import create_author_error, create_year_warning, create_doi_error
40
+ from refchecker.utils.text_utils import normalize_author_name, normalize_paper_title, is_name_match, compare_authors, calculate_title_similarity, extract_arxiv_id_from_url
41
+ from refchecker.utils.db_utils import process_semantic_scholar_result, process_semantic_scholar_results
42
+ from refchecker.utils.url_utils import get_best_available_url
43
+ from refchecker.config.settings import get_config
44
44
 
45
45
  # Set up logging
46
46
  logger = logging.getLogger(__name__)
@@ -471,7 +471,7 @@ class LocalNonArxivReferenceChecker:
471
471
 
472
472
  # First try to get the Semantic Scholar URL since that's what we used for verification
473
473
  if external_ids.get('CorpusId'):
474
- from utils.url_utils import construct_semantic_scholar_url
474
+ from refchecker.utils.url_utils import construct_semantic_scholar_url
475
475
  paper_url = construct_semantic_scholar_url(external_ids['CorpusId'])
476
476
  logger.debug(f"Using Semantic Scholar URL for verification: {paper_url}")
477
477
  else:
@@ -32,9 +32,9 @@ import logging
32
32
  import re
33
33
  from typing import Dict, List, Tuple, Optional, Any, Union
34
34
  from urllib.parse import quote_plus
35
- from utils.text_utils import normalize_text, clean_title_basic, find_best_match, is_name_match, compare_authors, clean_title_for_search
36
- from utils.error_utils import format_year_mismatch, format_doi_mismatch
37
- from config.settings import get_config
35
+ from refchecker.utils.text_utils import normalize_text, clean_title_basic, find_best_match, is_name_match, compare_authors, clean_title_for_search
36
+ from refchecker.utils.error_utils import format_year_mismatch, format_doi_mismatch
37
+ from refchecker.config.settings import get_config
38
38
 
39
39
  # Set up logging
40
40
  logger = logging.getLogger(__name__)
@@ -343,7 +343,7 @@ class OpenAlexReferenceChecker:
343
343
  # Fall back to DOI URL
344
344
  doi = work_data.get('doi')
345
345
  if doi:
346
- from utils.doi_utils import construct_doi_url
346
+ from refchecker.utils.doi_utils import construct_doi_url
347
347
  doi_url = construct_doi_url(doi)
348
348
  logger.debug(f"Generated DOI URL: {doi_url}")
349
349
  return doi_url
@@ -351,7 +351,7 @@ class OpenAlexReferenceChecker:
351
351
  # Check ids for other identifiers
352
352
  ids = work_data.get('ids', {})
353
353
  if ids.get('doi'):
354
- from utils.doi_utils import construct_doi_url
354
+ from refchecker.utils.doi_utils import construct_doi_url
355
355
  doi_url = construct_doi_url(ids['doi'])
356
356
  logger.debug(f"Generated DOI URL from ids: {doi_url}")
357
357
  return doi_url
@@ -460,7 +460,7 @@ class OpenAlexReferenceChecker:
460
460
 
461
461
  if doi and work_doi:
462
462
  # Compare DOIs using the proper comparison function
463
- from utils.doi_utils import compare_dois
463
+ from refchecker.utils.doi_utils import compare_dois
464
464
  if not compare_dois(doi, work_doi):
465
465
  errors.append({
466
466
  'error_type': 'doi',
@@ -32,7 +32,7 @@ import json
32
32
  from typing import Dict, List, Tuple, Optional, Any, Union
33
33
  from urllib.parse import urlparse, parse_qs
34
34
  from bs4 import BeautifulSoup
35
- from utils.text_utils import (
35
+ from refchecker.utils.text_utils import (
36
36
  normalize_text, clean_title_basic, is_name_match,
37
37
  calculate_title_similarity, compare_authors,
38
38
  clean_title_for_search, are_venues_substantially_different,
@@ -426,7 +426,7 @@ class OpenReviewReferenceChecker:
426
426
  if cited_title and paper_title:
427
427
  similarity = compare_titles_with_latex_cleaning(cited_title, paper_title)
428
428
  if similarity < 0.7: # Using a reasonable threshold
429
- from utils.error_utils import format_title_mismatch
429
+ from refchecker.utils.error_utils import format_title_mismatch
430
430
  # Clean the cited title for display (remove LaTeX commands like {LLM}s -> LLMs)
431
431
  clean_cited_title = strip_latex_commands(cited_title)
432
432
  details = format_title_mismatch(clean_cited_title, paper_title) + f" (similarity: {similarity:.2f})"
@@ -465,7 +465,7 @@ class OpenReviewReferenceChecker:
465
465
 
466
466
  is_different, year_message = is_year_substantially_different(cited_year_int, paper_year_int)
467
467
  if is_different and year_message:
468
- from utils.error_utils import format_year_mismatch
468
+ from refchecker.utils.error_utils import format_year_mismatch
469
469
  errors.append({
470
470
  "warning_type": "year",
471
471
  "warning_details": format_year_mismatch(cited_year_int, paper_year_int)
@@ -479,7 +479,7 @@ class OpenReviewReferenceChecker:
479
479
 
480
480
  if cited_venue and paper_venue:
481
481
  if are_venues_substantially_different(cited_venue, paper_venue):
482
- from utils.error_utils import format_venue_mismatch
482
+ from refchecker.utils.error_utils import format_venue_mismatch
483
483
  errors.append({
484
484
  "warning_type": "venue",
485
485
  "warning_details": format_venue_mismatch(cited_venue, paper_venue)
@@ -552,7 +552,7 @@ class OpenReviewReferenceChecker:
552
552
  if cited_title and paper_title:
553
553
  similarity = compare_titles_with_latex_cleaning(cited_title, paper_title)
554
554
  if similarity < 0.8: # Slightly higher threshold for search results
555
- from utils.error_utils import format_title_mismatch
555
+ from refchecker.utils.error_utils import format_title_mismatch
556
556
  # Clean the cited title for display (remove LaTeX commands like {LLM}s -> LLMs)
557
557
  clean_cited_title = strip_latex_commands(cited_title)
558
558
  details = format_title_mismatch(clean_cited_title, paper_title) + f" (similarity: {similarity:.2f})"
@@ -591,7 +591,7 @@ class OpenReviewReferenceChecker:
591
591
 
592
592
  is_different, year_message = is_year_substantially_different(cited_year_int, paper_year_int)
593
593
  if is_different and year_message:
594
- from utils.error_utils import format_year_mismatch
594
+ from refchecker.utils.error_utils import format_year_mismatch
595
595
  errors.append({
596
596
  "warning_type": "year",
597
597
  "warning_details": format_year_mismatch(cited_year_int, paper_year_int)
@@ -605,7 +605,7 @@ class OpenReviewReferenceChecker:
605
605
 
606
606
  if cited_venue and paper_venue:
607
607
  if are_venues_substantially_different(cited_venue, paper_venue):
608
- from utils.error_utils import format_venue_mismatch
608
+ from refchecker.utils.error_utils import format_venue_mismatch
609
609
  errors.append({
610
610
  "warning_type": "venue",
611
611
  "warning_details": format_venue_mismatch(cited_venue, paper_venue)
@@ -931,7 +931,7 @@ class OpenReviewReferenceChecker:
931
931
 
932
932
  # Use similarity calculation from text_utils
933
933
  try:
934
- from utils.text_utils import calculate_title_similarity
934
+ from refchecker.utils.text_utils import calculate_title_similarity
935
935
  similarity = calculate_title_similarity(search_title, found_title)
936
936
  return similarity >= threshold
937
937
  except ImportError:
@@ -15,7 +15,7 @@ from pypdf import PdfReader
15
15
  from fuzzywuzzy import fuzz
16
16
  from bs4 import BeautifulSoup
17
17
 
18
- from utils.text_utils import normalize_text, calculate_title_similarity
18
+ from refchecker.utils.text_utils import normalize_text, calculate_title_similarity
19
19
 
20
20
  logger = logging.getLogger(__name__)
21
21
 
@@ -28,9 +28,9 @@ import time
28
28
  import logging
29
29
  import re
30
30
  from typing import Dict, List, Tuple, Optional, Any, Union
31
- from utils.text_utils import normalize_text, clean_title_basic, find_best_match, is_name_match, are_venues_substantially_different, calculate_title_similarity, compare_authors, clean_title_for_search, strip_latex_commands, compare_titles_with_latex_cleaning
32
- from utils.error_utils import format_title_mismatch
33
- from config.settings import get_config
31
+ from refchecker.utils.text_utils import normalize_text, clean_title_basic, find_best_match, is_name_match, are_venues_substantially_different, calculate_title_similarity, compare_authors, clean_title_for_search, strip_latex_commands, compare_titles_with_latex_cleaning
32
+ from refchecker.utils.error_utils import format_title_mismatch
33
+ from refchecker.config.settings import get_config
34
34
 
35
35
  # Set up logging
36
36
  logger = logging.getLogger(__name__)
@@ -523,12 +523,12 @@ class NonArxivReferenceChecker:
523
523
  arxiv_id_match = (cited_arxiv_id == found_arxiv_id)
524
524
 
525
525
  # Use flexible year validation
526
- from utils.text_utils import is_year_substantially_different
526
+ from refchecker.utils.text_utils import is_year_substantially_different
527
527
  context = {'arxiv_match': arxiv_id_match}
528
528
  is_different, warning_message = is_year_substantially_different(year, paper_year, context)
529
529
 
530
530
  if is_different and warning_message:
531
- from utils.error_utils import format_year_mismatch
531
+ from refchecker.utils.error_utils import format_year_mismatch
532
532
  errors.append({
533
533
  'warning_type': 'year',
534
534
  'warning_details': format_year_mismatch(year, paper_year),
@@ -549,7 +549,7 @@ class NonArxivReferenceChecker:
549
549
  if cited_venue and paper_venue:
550
550
  # Use the utility function to check if venues are substantially different
551
551
  if are_venues_substantially_different(cited_venue, paper_venue):
552
- from utils.error_utils import create_venue_warning
552
+ from refchecker.utils.error_utils import create_venue_warning
553
553
  errors.append(create_venue_warning(cited_venue, paper_venue))
554
554
  elif not cited_venue and paper_venue:
555
555
  # Original reference has the venue in raw text but not parsed correctly
@@ -597,9 +597,9 @@ class NonArxivReferenceChecker:
597
597
  paper_doi = external_ids['DOI']
598
598
 
599
599
  # Compare DOIs using the proper comparison function
600
- from utils.doi_utils import compare_dois
600
+ from refchecker.utils.doi_utils import compare_dois
601
601
  if doi and paper_doi and not compare_dois(doi, paper_doi):
602
- from utils.error_utils import format_doi_mismatch
602
+ from refchecker.utils.error_utils import format_doi_mismatch
603
603
  errors.append({
604
604
  'error_type': 'doi',
605
605
  'error_details': format_doi_mismatch(doi, paper_doi),
@@ -614,13 +614,13 @@ class NonArxivReferenceChecker:
614
614
  # Return the Semantic Scholar URL that was actually used for verification
615
615
  # First priority: Semantic Scholar URL since that's what we used for verification
616
616
  if external_ids.get('CorpusId'):
617
- from utils.url_utils import construct_semantic_scholar_url
617
+ from refchecker.utils.url_utils import construct_semantic_scholar_url
618
618
  paper_url = construct_semantic_scholar_url(external_ids['CorpusId'])
619
619
  logger.debug(f"Using Semantic Scholar URL for verification: {paper_url}")
620
620
 
621
621
  # Second priority: DOI URL (if this was verified through DOI)
622
622
  elif external_ids.get('DOI'):
623
- from utils.doi_utils import construct_doi_url
623
+ from refchecker.utils.doi_utils import construct_doi_url
624
624
  paper_url = construct_doi_url(external_ids['DOI'])
625
625
  logger.debug(f"Using DOI URL for verification: {paper_url}")
626
626
 
@@ -7,7 +7,7 @@ from urllib.parse import urlparse, urljoin
7
7
  from typing import Dict, Optional, Tuple, List, Any
8
8
  from bs4 import BeautifulSoup
9
9
  import time
10
- from utils.text_utils import strip_latex_commands
10
+ from refchecker.utils.text_utils import strip_latex_commands
11
11
 
12
12
  logger = logging.getLogger(__name__)
13
13
 
@@ -185,7 +185,7 @@ class WebPageChecker:
185
185
  # Check title match
186
186
  if cited_title and page_title:
187
187
  if not self._check_title_match(cited_title, page_title, page_description):
188
- from utils.error_utils import format_title_mismatch
188
+ from refchecker.utils.error_utils import format_title_mismatch
189
189
  # Clean the cited title for display (remove LaTeX commands like {LLM}s -> LLMs)
190
190
  clean_cited_title = strip_latex_commands(cited_title)
191
191
  errors.append({
@@ -207,7 +207,7 @@ class WebPageChecker:
207
207
  if cited_authors:
208
208
  author_str = ', '.join(cited_authors) if isinstance(cited_authors, list) else str(cited_authors)
209
209
  if not self._check_author_match(author_str, site_info, web_url):
210
- from utils.error_utils import format_three_line_mismatch
210
+ from refchecker.utils.error_utils import format_three_line_mismatch
211
211
  left = author_str
212
212
  right = site_info.get('organization', 'unknown')
213
213
  details = format_three_line_mismatch("Author/organization mismatch", left, right)
@@ -13,7 +13,7 @@ from threading import Thread, Lock
13
13
  from concurrent.futures import ThreadPoolExecutor, as_completed
14
14
  from dataclasses import dataclass
15
15
  from typing import List, Dict, Any, Optional, Tuple, Callable
16
- from utils.text_utils import deduplicate_urls
16
+ from refchecker.utils.text_utils import deduplicate_urls
17
17
 
18
18
  logger = logging.getLogger(__name__)
19
19
 
@@ -277,15 +277,15 @@ class ParallelReferenceProcessor:
277
277
  # Print reference info in the same format as sequential mode
278
278
  raw_title = reference.get('title', 'Untitled')
279
279
  # Clean LaTeX commands from title for display
280
- from utils.text_utils import strip_latex_commands
280
+ from refchecker.utils.text_utils import strip_latex_commands
281
281
  title = strip_latex_commands(raw_title)
282
- from utils.text_utils import format_authors_for_display
282
+ from refchecker.utils.text_utils import format_authors_for_display
283
283
  authors = format_authors_for_display(reference.get('authors', []))
284
284
  year = reference.get('year', '')
285
285
  # Get venue from either 'venue' or 'journal' field and clean it up
286
286
  venue = reference.get('venue', '') or reference.get('journal', '')
287
287
  if venue:
288
- from utils.error_utils import clean_venue_for_comparison
288
+ from refchecker.utils.error_utils import clean_venue_for_comparison
289
289
  venue = clean_venue_for_comparison(venue)
290
290
  url = reference.get('url', '')
291
291
  doi = reference.get('doi', '')
@@ -331,7 +331,7 @@ class ParallelReferenceProcessor:
331
331
 
332
332
  # Show DOI URL if available and different from what's already shown
333
333
  if external_ids.get('DOI'):
334
- from utils.doi_utils import construct_doi_url
334
+ from refchecker.utils.doi_utils import construct_doi_url
335
335
  doi_url = construct_doi_url(external_ids['DOI'])
336
336
  if doi_url != verified_url_to_show and doi_url != url:
337
337
  print(f" DOI URL: {doi_url}")
@@ -355,7 +355,7 @@ class ParallelReferenceProcessor:
355
355
  error_type = error.get('error_type') or error.get('warning_type') or error.get('info_type')
356
356
  error_details = error.get('error_details') or error.get('warning_details') or error.get('info_details', 'Unknown error')
357
357
 
358
- from utils.error_utils import print_labeled_multiline
358
+ from refchecker.utils.error_utils import print_labeled_multiline
359
359
 
360
360
  if error_type == 'arxiv_id':
361
361
  # Keep existing style for arXiv ID errors