academic-refchecker 2.0.21__tar.gz → 2.0.22__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {academic_refchecker-2.0.21/academic_refchecker.egg-info → academic_refchecker-2.0.22}/PKG-INFO +1 -1
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22/academic_refchecker.egg-info}/PKG-INFO +1 -1
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/__version__.py +1 -1
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/text_utils.py +26 -4
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/LICENSE +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/MANIFEST.in +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/README.md +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/academic_refchecker.egg-info/SOURCES.txt +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/academic_refchecker.egg-info/dependency_links.txt +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/academic_refchecker.egg-info/entry_points.txt +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/academic_refchecker.egg-info/requires.txt +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/academic_refchecker.egg-info/top_level.txt +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/backend/__init__.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/backend/__main__.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/backend/cli.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/backend/concurrency.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/backend/database.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/backend/main.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/backend/models.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/backend/refchecker_wrapper.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/backend/static/assets/index-2P6L_39v.css +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/backend/static/assets/index-B92lKsA8.js +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/backend/static/assets/index-BuguAhjS.css +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/backend/static/assets/index-DMZJNrR0.js +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/backend/static/assets/index-hk21nqxR.js +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/backend/static/favicon.svg +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/backend/static/index.html +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/backend/static/vite.svg +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/backend/thumbnail.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/backend/websocket_manager.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/pyproject.toml +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/requirements.txt +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/scripts/download_db.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/scripts/run_tests.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/scripts/start_vllm_server.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/setup.cfg +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/__init__.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/__main__.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/checkers/__init__.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/checkers/arxiv_citation.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/checkers/crossref.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/checkers/enhanced_hybrid_checker.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/checkers/github_checker.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/checkers/local_semantic_scholar.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/checkers/openalex.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/checkers/openreview_checker.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/checkers/pdf_paper_checker.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/checkers/semantic_scholar.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/checkers/webpage_checker.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/config/__init__.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/config/logging.conf +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/config/settings.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/core/__init__.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/core/db_connection_pool.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/core/parallel_processor.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/core/refchecker.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/database/__init__.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/database/download_semantic_scholar_db.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/llm/__init__.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/llm/base.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/llm/providers.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/scripts/__init__.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/scripts/start_vllm_server.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/services/__init__.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/services/pdf_processor.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/__init__.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/arxiv_rate_limiter.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/arxiv_utils.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/author_utils.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/biblatex_parser.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/bibliography_utils.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/bibtex_parser.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/config_validator.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/db_utils.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/doi_utils.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/error_utils.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/mock_objects.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/unicode_utils.py +0 -0
- {academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/url_utils.py +0 -0
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/text_utils.py
RENAMED
|
@@ -6,7 +6,6 @@ Text processing utilities for ArXiv Reference Checker
|
|
|
6
6
|
import re
|
|
7
7
|
import logging
|
|
8
8
|
import unicodedata
|
|
9
|
-
import html
|
|
10
9
|
from typing import List
|
|
11
10
|
|
|
12
11
|
logger = logging.getLogger(__name__)
|
|
@@ -1373,6 +1372,15 @@ def is_name_match(name1: str, name2: str) -> bool:
|
|
|
1373
1372
|
first_initial == first_name[0] and
|
|
1374
1373
|
middle_initial == middle_name[0]):
|
|
1375
1374
|
return True
|
|
1375
|
+
else:
|
|
1376
|
+
# Simple last name case: "W. R. Weimer" vs "Westley Weimer"
|
|
1377
|
+
# The cited name has an extra middle initial that the actual name doesn't have
|
|
1378
|
+
# Allow match if first initial and last name match (tolerate extra middle initial)
|
|
1379
|
+
# BUT: Exclude cases where first_name is just concatenated initials (like "gv")
|
|
1380
|
+
# which should require exact initial matching, not tolerance
|
|
1381
|
+
is_real_first_name = len(first_name) > 2 # "Westley" yes, "gv" no
|
|
1382
|
+
if is_real_first_name and last_name == compound_last and first_initial == first_name[0]:
|
|
1383
|
+
return True
|
|
1376
1384
|
|
|
1377
1385
|
elif len(init_parts) == 3 and len(name_parts) == 3:
|
|
1378
1386
|
# Check for "Last, First Middle" vs "First Middle Last" format
|
|
@@ -4290,6 +4298,7 @@ def are_venues_substantially_different(venue1: str, venue2: str) -> bool:
|
|
|
4290
4298
|
# Handle specific multi-word patterns and well-known acronyms
|
|
4291
4299
|
'proc. natl. acad. sci.': 'proceedings of the national academy of sciences',
|
|
4292
4300
|
'pnas': 'proceedings of the national academy of sciences',
|
|
4301
|
+
'cacm': 'communications of the acm',
|
|
4293
4302
|
# Special cases that don't follow standard acronym patterns
|
|
4294
4303
|
'neurips': 'neural information processing systems', # Special case
|
|
4295
4304
|
'nips': 'neural information processing systems', # old name for neurips
|
|
@@ -4426,6 +4435,8 @@ def are_venues_substantially_different(venue1: str, venue2: str) -> bool:
|
|
|
4426
4435
|
'neurips': 'neural information processing systems', # Special case: doesn't follow standard acronym rules
|
|
4427
4436
|
'nips': 'neural information processing systems', # old name for neurips
|
|
4428
4437
|
'nsdi': 'networked systems design and implementation', # USENIX NSDI
|
|
4438
|
+
'cacm': 'communications of the acm',
|
|
4439
|
+
'communications of the': 'communications of the acm',
|
|
4429
4440
|
}
|
|
4430
4441
|
|
|
4431
4442
|
# Apply abbreviation expansion - handle multi-word phrases first
|
|
@@ -5089,8 +5100,18 @@ def normalize_venue_for_display(venue: str) -> str:
|
|
|
5089
5100
|
|
|
5090
5101
|
return text_lower
|
|
5091
5102
|
|
|
5092
|
-
|
|
5093
|
-
|
|
5103
|
+
venue_text = venue.strip()
|
|
5104
|
+
|
|
5105
|
+
# Fix common truncated venues that lose their organization suffix during PDF extraction
|
|
5106
|
+
truncated_aliases = {
|
|
5107
|
+
"communications of the": "Communications of the ACM",
|
|
5108
|
+
}
|
|
5109
|
+
|
|
5110
|
+
# Allow trailing punctuation/whitespace while matching truncated forms
|
|
5111
|
+
normalized_candidate = re.sub(r"[\s.,;:]+$", "", venue_text, flags=re.IGNORECASE)
|
|
5112
|
+
alias = truncated_aliases.get(normalized_candidate.lower())
|
|
5113
|
+
if alias:
|
|
5114
|
+
return alias
|
|
5094
5115
|
|
|
5095
5116
|
# Strip leading editor name lists like "..., editors, Venue ..." or "..., eds., Venue ..."
|
|
5096
5117
|
# This prevents author/editor lists from being treated as venue
|
|
@@ -5152,7 +5173,8 @@ def normalize_venue_for_display(venue: str) -> str:
|
|
|
5152
5173
|
if not re.match(r'ieee\s+transactions', venue_text, re.IGNORECASE):
|
|
5153
5174
|
venue_text = re.sub(r'^(ieee|acm|aaai|usenix|sigcomm|sigkdd|sigmod|vldb|osdi|sosp|eurosys)\s+', '', venue_text, flags=re.IGNORECASE) # Remove org prefixes
|
|
5154
5175
|
venue_text = re.sub(r'^ieee/\w+\s+', '', venue_text, flags=re.IGNORECASE) # Remove "IEEE/RSJ " etc
|
|
5155
|
-
|
|
5176
|
+
# Remove org suffixes, but NOT when preceded by "of the" (e.g., "Communications of the ACM", "Journal of the ACM")
|
|
5177
|
+
venue_text = re.sub(r'(?<!of the)\s+(ieee|acm|aaai|usenix)\s*$', '', venue_text, flags=re.IGNORECASE) # Remove org suffixes
|
|
5156
5178
|
venue_text = re.sub(r'/\w+\s+', ' ', venue_text) # Remove "/ACM " style org separators
|
|
5157
5179
|
|
|
5158
5180
|
# IMPORTANT: Don't remove "Conference on" or "International" - they're needed for display
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/academic_refchecker.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/academic_refchecker.egg-info/requires.txt
RENAMED
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/academic_refchecker.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/backend/static/assets/index-2P6L_39v.css
RENAMED
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/backend/static/assets/index-B92lKsA8.js
RENAMED
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/backend/static/assets/index-BuguAhjS.css
RENAMED
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/backend/static/assets/index-DMZJNrR0.js
RENAMED
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/backend/static/assets/index-hk21nqxR.js
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/checkers/__init__.py
RENAMED
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/checkers/arxiv_citation.py
RENAMED
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/checkers/crossref.py
RENAMED
|
File without changes
|
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/checkers/github_checker.py
RENAMED
|
File without changes
|
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/checkers/openalex.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/checkers/webpage_checker.py
RENAMED
|
File without changes
|
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/config/logging.conf
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/core/db_connection_pool.py
RENAMED
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/core/parallel_processor.py
RENAMED
|
File without changes
|
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/database/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/scripts/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/services/__init__.py
RENAMED
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/services/pdf_processor.py
RENAMED
|
File without changes
|
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/arxiv_rate_limiter.py
RENAMED
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/arxiv_utils.py
RENAMED
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/author_utils.py
RENAMED
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/biblatex_parser.py
RENAMED
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/bibliography_utils.py
RENAMED
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/bibtex_parser.py
RENAMED
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/config_validator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/error_utils.py
RENAMED
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/mock_objects.py
RENAMED
|
File without changes
|
{academic_refchecker-2.0.21 → academic_refchecker-2.0.22}/src/refchecker/utils/unicode_utils.py
RENAMED
|
File without changes
|
|
File without changes
|