academic-refchecker 1.2.61__py3-none-any.whl → 1.2.63__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academic-refchecker
3
- Version: 1.2.61
3
+ Version: 1.2.63
4
4
  Summary: A comprehensive tool for validating reference accuracy in academic papers
5
5
  Author-email: Mark Russinovich <markrussinovich@hotmail.com>
6
6
  License-Expression: MIT
@@ -1,7 +1,7 @@
1
- academic_refchecker-1.2.61.dist-info/licenses/LICENSE,sha256=Kwrx3fePVCeEFDCZvCW4OuoTNBiSoYbpGBI6qzGhWF0,1067
1
+ academic_refchecker-1.2.63.dist-info/licenses/LICENSE,sha256=Kwrx3fePVCeEFDCZvCW4OuoTNBiSoYbpGBI6qzGhWF0,1067
2
2
  refchecker/__init__.py,sha256=Pg5MrtLxDBRcNYcI02N-bv3tzURVd1S3nQ8IyF7Zw7E,322
3
3
  refchecker/__main__.py,sha256=agBbT9iKN0g2xXtRNCoh29Nr7z2n5vU-r0MCVJKi4tI,232
4
- refchecker/__version__.py,sha256=I3ssVxU3mQcgN9942sdRIF9leZqHevhg5PhexBz1zls,89
4
+ refchecker/__version__.py,sha256=g9WrCPVOJI3IjWoSMiuD4H5VZRA970ynr6TSkEmLHrw,89
5
5
  refchecker/checkers/__init__.py,sha256=T0PAHTFt6UiGvn-WGoJU8CdhXNmf6zaHmcGVoWHhmJQ,533
6
6
  refchecker/checkers/crossref.py,sha256=5BeSCK8K_S_-iwgQaNAbxZGNsxaxOyBzUQ3AD0Rc6nU,20433
7
7
  refchecker/checkers/enhanced_hybrid_checker.py,sha256=c5I_h8w6xD7XkBNkbneffeAnrO8B-uXH99edWBJvDMo,27788
@@ -10,7 +10,7 @@ refchecker/checkers/local_semantic_scholar.py,sha256=KuVL3LTtlN84t88nFuMSPBj3bUa
10
10
  refchecker/checkers/openalex.py,sha256=omMQbZOnkDndMJSl9SQVtiETzpv1w1pt93YjlFTq8WA,19616
11
11
  refchecker/checkers/openreview_checker.py,sha256=0IHZe4Nscy8fle28rmhy1hhsofR5g0FFSakk8FFH_0A,40540
12
12
  refchecker/checkers/pdf_paper_checker.py,sha256=lrg09poNJBz9FNMrUoEjQ6CJbdYZAVANw0bCaTSb5oo,19904
13
- refchecker/checkers/semantic_scholar.py,sha256=BXwSYcx0XCrAvZPHMCB1Fz_Hum2WwufEO85mbU0PaWc,35470
13
+ refchecker/checkers/semantic_scholar.py,sha256=kCNn1hu2R9vQhUEn2wqQ9qgVfIRRQGzk7Z5sB1VyWlE,35506
14
14
  refchecker/checkers/webpage_checker.py,sha256=A_d5kg3OOsyliC00OVq_l0J-RJ4Ln7hUoURk21aO2fs,43653
15
15
  refchecker/config/__init__.py,sha256=r7sONsX2-ITviUJRU1KEz76uAuTRqZlzU-TVkvFRGYY,15
16
16
  refchecker/config/logging.conf,sha256=r1tP0ApLHtlz7rV-oKS1MVO7oXJOgahbZFTtYmKnf9U,687
@@ -29,7 +29,7 @@ refchecker/scripts/start_vllm_server.py,sha256=ZepWp2y2cKFW0Kgsoima2RbmF02fTU29U
29
29
  refchecker/services/__init__.py,sha256=jGi9S74Msak3YR-C4Qb68VU7HB4oLaX9o1rlVAFpOFI,187
30
30
  refchecker/services/pdf_processor.py,sha256=7i5x043qfnyzE5EQmytfy_uPjbeCJp4Ka5OPyH-bwOE,10577
31
31
  refchecker/utils/__init__.py,sha256=1RrGoIIn1_gVzxd56b6a7HeAS-wu7uDP-nxLbR3fJ-8,1199
32
- refchecker/utils/arxiv_utils.py,sha256=idlCzkTApYwH-kdTiH9nrfo4GMmwdtUAv7cAGtoEG-0,19799
32
+ refchecker/utils/arxiv_utils.py,sha256=PvMBJGubvDvo9_WxrXUH4OWMNrEAD7z7NCcRl0la2iI,20601
33
33
  refchecker/utils/author_utils.py,sha256=DLTo1xsxef2wxoe4s_MWrh36maj4fgnvFlsDLpDE-qQ,5507
34
34
  refchecker/utils/biblatex_parser.py,sha256=IKRUMtRsjdXIktyk9XGArt_ms0asmqP549uhFvvumuE,25581
35
35
  refchecker/utils/bibliography_utils.py,sha256=d6kqDOQou_PX6WQkOzrGyN5GpzaOjhu54w9wGfBRQZw,11760
@@ -42,8 +42,8 @@ refchecker/utils/mock_objects.py,sha256=QxU-UXyHSY27IZYN8Sb8ei0JtNkpGSdMXoErrRLH
42
42
  refchecker/utils/text_utils.py,sha256=d_X4r1nVvkL7i0DhxfLaVK3CzbMP2oZvqX3kxfDudQw,220978
43
43
  refchecker/utils/unicode_utils.py,sha256=-WBKarXO756p7fd7gCeNsMag4ztDNURwFX5IVniOtwY,10366
44
44
  refchecker/utils/url_utils.py,sha256=7b0rWCQJSajzqOvD7ghsBZPejiq6mUIz6SGhvU_WGDs,9441
45
- academic_refchecker-1.2.61.dist-info/METADATA,sha256=B9ysTnRaEeuHG61zpB42FfhIVkv217TTEedEHkdmhQQ,24104
46
- academic_refchecker-1.2.61.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
47
- academic_refchecker-1.2.61.dist-info/entry_points.txt,sha256=kG6k2JwFIRvmKe0oZTr2RYStyfl79BirJxyaO6kjIxA,72
48
- academic_refchecker-1.2.61.dist-info/top_level.txt,sha256=ZdIg_PFHiATpVT5Uvp4L17Q0d8mk8ZBsINXKf1tE0bo,11
49
- academic_refchecker-1.2.61.dist-info/RECORD,,
45
+ academic_refchecker-1.2.63.dist-info/METADATA,sha256=0DDmZe79Q8p67vLjNqCialsfmbULKriNNuQ3v79C-cQ,24104
46
+ academic_refchecker-1.2.63.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
47
+ academic_refchecker-1.2.63.dist-info/entry_points.txt,sha256=kG6k2JwFIRvmKe0oZTr2RYStyfl79BirJxyaO6kjIxA,72
48
+ academic_refchecker-1.2.63.dist-info/top_level.txt,sha256=ZdIg_PFHiATpVT5Uvp4L17Q0d8mk8ZBsINXKf1tE0bo,11
49
+ academic_refchecker-1.2.63.dist-info/RECORD,,
refchecker/__version__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  """Version information for RefChecker."""
2
2
 
3
- __version__ = "1.2.61"
3
+ __version__ = "1.2.63"
4
4
 
5
- __version__ = "1.2.61"
5
+ __version__ = "1.2.63"
@@ -95,7 +95,7 @@ class NonArxivReferenceChecker:
95
95
  # Make the request with retries and backoff
96
96
  for attempt in range(max_retries_for_this_query):
97
97
  try:
98
- response = requests.get(endpoint, headers=self.headers, params=params)
98
+ response = requests.get(endpoint, headers=self.headers, params=params, timeout=30)
99
99
 
100
100
  # Check for rate limiting
101
101
  if response.status_code == 429:
@@ -141,7 +141,7 @@ class NonArxivReferenceChecker:
141
141
  # Make the request with retries and backoff
142
142
  for attempt in range(self.max_retries):
143
143
  try:
144
- response = requests.get(endpoint, headers=self.headers, params=params)
144
+ response = requests.get(endpoint, headers=self.headers, params=params, timeout=30)
145
145
 
146
146
  # Check for rate limiting
147
147
  if response.status_code == 429:
@@ -264,7 +264,7 @@ class NonArxivReferenceChecker:
264
264
 
265
265
  for attempt in range(self.max_retries):
266
266
  try:
267
- response = requests.get(endpoint, headers=self.headers, params=params)
267
+ response = requests.get(endpoint, headers=self.headers, params=params, timeout=30)
268
268
 
269
269
  if response.status_code == 429:
270
270
  wait_time = self.request_delay * (self.backoff_factor ** attempt)
@@ -458,7 +458,16 @@ def get_bibtex_content(paper):
458
458
 
459
459
  logger.debug(f"Bibliography comparison: .bbl has {bbl_entry_count} entries, .bib has {bib_entry_count} entries")
460
460
 
461
- if uses_bibtex and bib_entry_count > 0:
461
+ # IMPORTANT: Prefer .bbl when .bib is excessively large (e.g., includes full ACL Anthology)
462
+ # The .bbl file contains only the actually-cited references, while .bib may contain
463
+ # entire bibliography databases. Parsing 80k+ entries would cause the tool to hang.
464
+ # Use .bbl if: (1) .bbl has entries AND (2) .bib has >10x more entries than .bbl OR >1000 entries
465
+ excessive_bib = bib_entry_count > 1000 or (bbl_entry_count > 0 and bib_entry_count > bbl_entry_count * 10)
466
+
467
+ if bbl_entry_count > 0 and excessive_bib:
468
+ logger.info(f"Using .bbl files from ArXiv source (.bib has {bib_entry_count} entries which is excessive, .bbl has {bbl_entry_count})")
469
+ return bbl_content
470
+ elif uses_bibtex and bib_entry_count > 0 and not excessive_bib:
462
471
  logger.info(f"Using .bib files from ArXiv source (main TeX uses \\bibliography{{...}})")
463
472
  return bib_content
464
473
  elif bbl_entry_count > 0: