academic-refchecker 1.2.43__py3-none-any.whl → 1.2.44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
__version__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """Version information for RefChecker."""
2
2
 
3
- __version__ = "1.2.43"
3
+ __version__ = "1.2.44"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academic-refchecker
3
- Version: 1.2.43
3
+ Version: 1.2.44
4
4
  Summary: A comprehensive tool for validating reference accuracy in academic papers
5
5
  Author-email: Mark Russinovich <markrussinovich@hotmail.com>
6
6
  License-Expression: MIT
@@ -1,5 +1,5 @@
1
- __version__.py,sha256=JbybFux4Juuafz1jN0cgsedPmzBO8U9DJ874tJu2saA,65
2
- academic_refchecker-1.2.43.dist-info/licenses/LICENSE,sha256=Kwrx3fePVCeEFDCZvCW4OuoTNBiSoYbpGBI6qzGhWF0,1067
1
+ __version__.py,sha256=k3lYUlcZL-yL2e_2u3UPBtgwqMqZJ11x7KVMZOotlE8,65
2
+ academic_refchecker-1.2.44.dist-info/licenses/LICENSE,sha256=Kwrx3fePVCeEFDCZvCW4OuoTNBiSoYbpGBI6qzGhWF0,1067
3
3
  checkers/__init__.py,sha256=T0PAHTFt6UiGvn-WGoJU8CdhXNmf6zaHmcGVoWHhmJQ,533
4
4
  checkers/crossref.py,sha256=Hzq4dlf1CSn0aZWU8CMOnLxIvaSivTabLoepIOkgkmY,20585
5
5
  checkers/enhanced_hybrid_checker.py,sha256=6yf5tV4jLSVzjX1xR_kQq0NOgQIst-z_WmkiqqMc8hQ,23469
@@ -15,7 +15,7 @@ config/settings.py,sha256=-vODFoXbWbGPUElpmchE5zbCj_n4Vtxr8HU1hQDFp_c,6164
15
15
  core/__init__.py,sha256=1T2MSQyDk0u_PupbHvm4CvNNN--dxsw78fqKUrqoYrM,157
16
16
  core/db_connection_pool.py,sha256=XRiOdehikkSz3obH4WKgf8woa3694if50Q15rBT-4XQ,4697
17
17
  core/parallel_processor.py,sha256=5V2iJDBFwwryMCnCNU_oRt2u5he1wpy-_9qapC_6f00,17043
18
- core/refchecker.py,sha256=sVRg3PUzrs2vLFlEBoi4bxUy-TpO5iQHCkokGas-ygQ,273616
18
+ core/refchecker.py,sha256=w3KNWyyaZZVL3ghFhEfro8SPs4xXEUjmCJERfZ7Du6A,273648
19
19
  database/__init__.py,sha256=mEuVHlEBuS44t_2ZT_JnvQQrlRCjo1SJq1NmaJ6r8OY,125
20
20
  database/download_semantic_scholar_db.py,sha256=waN4I97KC_36YMiPbiBDUUmgfzu1nub5yeKdAsIR2aw,75276
21
21
  llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -36,11 +36,11 @@ utils/db_utils.py,sha256=_wSupfBlm0ILFvntQTvoj7tLDCbrYPRQrp9NDvphF_E,6281
36
36
  utils/doi_utils.py,sha256=8f3iE4RdSNkzeqa9wJfoKcVEiBVse3_uf643biLudmw,4134
37
37
  utils/error_utils.py,sha256=JqnRg4z-O9GcJ1eJGeTMzmOQwPWbWo2Lf6Duwj-ymHQ,6258
38
38
  utils/mock_objects.py,sha256=QxU-UXyHSY27IZYN8Sb8ei0JtNkpGSdMXoErrRLHXvE,6437
39
- utils/text_utils.py,sha256=jPgCOBTVboLRJyypoOtL-dg1wBDQrKBux2ImvC6wL58,206296
39
+ utils/text_utils.py,sha256=uEwKasw3aTVgIDHbDJDSOcTUbPwfiivIdhKwmxQJy0U,206378
40
40
  utils/unicode_utils.py,sha256=-WBKarXO756p7fd7gCeNsMag4ztDNURwFX5IVniOtwY,10366
41
- utils/url_utils.py,sha256=aq1hSYEA888bOKuBOGWRclgTFIjw32rpFdsBO_Ja8ZM,8402
42
- academic_refchecker-1.2.43.dist-info/METADATA,sha256=ZsJhIw1n7Yjoug6mpV4zpAPf-eSW5xSMdd3Dl_WTOlI,22298
43
- academic_refchecker-1.2.43.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
44
- academic_refchecker-1.2.43.dist-info/entry_points.txt,sha256=WdI89tYkIfz-M628PiboOfOLzTBWZAqvlF29qCVCkek,61
45
- academic_refchecker-1.2.43.dist-info/top_level.txt,sha256=6RlcQEA0kHb7-ndbKMFMZnYnJQVohgsU6BBkbEvJvEs,69
46
- academic_refchecker-1.2.43.dist-info/RECORD,,
41
+ utils/url_utils.py,sha256=HdxIO8QvciP6Jp8Wd4sTSrS8JQrOMwgM7pxdUC8RJb4,9176
42
+ academic_refchecker-1.2.44.dist-info/METADATA,sha256=ueA0mwKqmiqhR9WBLyPy2W40wfJc4JRiWSTbrQHKU14,22298
43
+ academic_refchecker-1.2.44.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
44
+ academic_refchecker-1.2.44.dist-info/entry_points.txt,sha256=WdI89tYkIfz-M628PiboOfOLzTBWZAqvlF29qCVCkek,61
45
+ academic_refchecker-1.2.44.dist-info/top_level.txt,sha256=6RlcQEA0kHb7-ndbKMFMZnYnJQVohgsU6BBkbEvJvEs,69
46
+ academic_refchecker-1.2.44.dist-info/RECORD,,
core/refchecker.py CHANGED
@@ -5181,7 +5181,7 @@ class ArxivReferenceChecker:
5181
5181
  from utils.text_utils import format_authors_for_display
5182
5182
  authors = format_authors_for_display(reference.get('authors', []))
5183
5183
  year = reference.get('year', '')
5184
- venue = reference.get('venue', '')
5184
+ venue = reference.get('venue', '') or reference.get('journal', '')
5185
5185
  url = reference.get('url', '')
5186
5186
  doi = reference.get('doi', '')
5187
5187
  # Extract actual reference number from raw text for accurate display
utils/text_utils.py CHANGED
@@ -4594,7 +4594,7 @@ def normalize_venue_for_display(venue: str) -> str:
4594
4594
  prefixes_to_remove = [
4595
4595
  r'^\d{4}\s+\d+(st|nd|rd|th)\s+', # "2012 IEEE/RSJ"
4596
4596
  r'^\d{4}\s+', # "2024 "
4597
- r'^proceedings\s+(of\s+)?(the\s+)?(\d+(st|nd|rd|th)\s+)?(ieee\s+)?', # "Proceedings of the IEEE"
4597
+ r'^proceedings\s+(of\s+)?(the\s+)?((acm|ieee|usenix|aaai|sigcomm|sigkdd|sigmod|sigops|vldb|osdi|sosp|eurosys)\s+)*(\d+(st|nd|rd|th)\s+)?', # "Proceedings of the [ORG] [ORG] 29th"
4598
4598
  r'^proc\.\s+of\s+(the\s+)?(\d+(st|nd|rd|th)\s+)?(ieee\s+)?', # "Proc. of the IEEE" (require "of")
4599
4599
  r'^procs\.\s+of\s+(the\s+)?(\d+(st|nd|rd|th)\s+)?(ieee\s+)?', # "Procs. of the IEEE" (require "of")
4600
4600
  r'^in\s+',
utils/url_utils.py CHANGED
@@ -214,6 +214,7 @@ def clean_url(url: str) -> str:
214
214
  This function handles:
215
215
  - Whitespace trimming
216
216
  - Malformed LaTeX URL wrappers like \\url{https://...}
217
+ - Markdown-style links like [text](url)
217
218
  - Trailing punctuation from academic references
218
219
  - DOI URL query parameter cleanup
219
220
 
@@ -237,6 +238,14 @@ def clean_url(url: str) -> str:
237
238
  if url_match:
238
239
  url = url_match.group(1)
239
240
 
241
+ # Handle markdown-style links like [text](url) or [url](url)
242
+ # e.g., "[https://example.com](https://example.com)" -> "https://example.com"
243
+ markdown_pattern = r'\[([^\]]*)\]\((https?://[^)]+)\)'
244
+ markdown_match = re.search(markdown_pattern, url)
245
+ if markdown_match:
246
+ # Use the URL from parentheses
247
+ url = markdown_match.group(2)
248
+
240
249
  # Remove trailing punctuation that's commonly part of sentence structure
241
250
  # but preserve legitimate URL characters
242
251
  url = url.rstrip('.,;!?)')
@@ -280,6 +289,14 @@ def clean_url_punctuation(url: str) -> str:
280
289
  if url_match:
281
290
  url = url_match.group(1)
282
291
 
292
+ # Handle markdown-style links like [text](url) or [url](url)
293
+ # e.g., "[https://example.com](https://example.com)" -> "https://example.com"
294
+ markdown_pattern = r'\[([^\]]*)\]\((https?://[^)]+)\)'
295
+ markdown_match = re.search(markdown_pattern, url)
296
+ if markdown_match:
297
+ # Use the URL from parentheses
298
+ url = markdown_match.group(2)
299
+
283
300
  # Remove trailing punctuation that's commonly part of sentence structure
284
301
  # but preserve legitimate URL characters
285
302
  url = url.rstrip('.,;!?)')