academic-refchecker 1.2.46__tar.gz → 1.2.48__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {academic_refchecker-1.2.46/src/academic_refchecker.egg-info → academic_refchecker-1.2.48}/PKG-INFO +1 -1
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/__version__.py +1 -1
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48/src/academic_refchecker.egg-info}/PKG-INFO +1 -1
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/checkers/local_semantic_scholar.py +15 -4
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/core/refchecker.py +70 -18
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/doi_utils.py +23 -5
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/error_utils.py +17 -1
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/text_utils.py +25 -4
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/LICENSE +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/MANIFEST.in +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/README.md +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/pyproject.toml +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/requirements.txt +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/scripts/download_db.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/scripts/run_tests.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/scripts/start_vllm_server.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/setup.cfg +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/__init__.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/academic_refchecker.egg-info/SOURCES.txt +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/academic_refchecker.egg-info/dependency_links.txt +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/academic_refchecker.egg-info/entry_points.txt +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/academic_refchecker.egg-info/requires.txt +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/academic_refchecker.egg-info/top_level.txt +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/checkers/__init__.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/checkers/crossref.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/checkers/enhanced_hybrid_checker.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/checkers/github_checker.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/checkers/openalex.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/checkers/openreview_checker.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/checkers/semantic_scholar.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/checkers/webpage_checker.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/config/__init__.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/config/logging.conf +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/config/settings.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/core/__init__.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/core/db_connection_pool.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/core/parallel_processor.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/database/__init__.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/database/download_semantic_scholar_db.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/llm/__init__.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/llm/base.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/llm/providers.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/scripts/__init__.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/scripts/start_vllm_server.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/services/__init__.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/services/pdf_processor.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/__init__.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/arxiv_utils.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/author_utils.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/biblatex_parser.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/bibliography_utils.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/bibtex_parser.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/config_validator.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/db_utils.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/mock_objects.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/unicode_utils.py +0 -0
- {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/url_utils.py +0 -0
{academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/checkers/local_semantic_scholar.py
RENAMED
|
@@ -430,11 +430,22 @@ class LocalNonArxivReferenceChecker:
|
|
|
430
430
|
logger.debug(f"Local DB: Author mismatch - {author_error}")
|
|
431
431
|
errors.append(create_author_error(author_error, paper_data.get('authors', [])))
|
|
432
432
|
|
|
433
|
-
# Verify year
|
|
433
|
+
# Verify year (with tolerance)
|
|
434
434
|
paper_year = paper_data.get('year')
|
|
435
|
-
if year and paper_year
|
|
436
|
-
|
|
437
|
-
|
|
435
|
+
if year and paper_year:
|
|
436
|
+
# Get year tolerance from config (default to 1 if not available)
|
|
437
|
+
year_tolerance = 1 # Default tolerance
|
|
438
|
+
try:
|
|
439
|
+
from config.settings import get_config
|
|
440
|
+
config = get_config()
|
|
441
|
+
year_tolerance = config.get('text_processing', {}).get('year_tolerance', 1)
|
|
442
|
+
except (ImportError, Exception):
|
|
443
|
+
pass # Use default if config not available
|
|
444
|
+
|
|
445
|
+
# Only flag as mismatch if the difference is greater than tolerance
|
|
446
|
+
if abs(year - paper_year) > year_tolerance:
|
|
447
|
+
logger.debug(f"Local DB: Year mismatch - cited: {year}, actual: {paper_year}")
|
|
448
|
+
errors.append(create_year_warning(year, paper_year))
|
|
438
449
|
|
|
439
450
|
# Verify DOI
|
|
440
451
|
paper_doi = None
|
|
@@ -1922,16 +1922,27 @@ class ArxivReferenceChecker:
|
|
|
1922
1922
|
'ref_authors_correct': ', '.join(correct_names)
|
|
1923
1923
|
})
|
|
1924
1924
|
|
|
1925
|
-
# Verify year
|
|
1925
|
+
# Verify year (with tolerance)
|
|
1926
1926
|
paper_year = paper_data.get('year')
|
|
1927
|
-
if year and paper_year
|
|
1928
|
-
|
|
1929
|
-
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
'
|
|
1934
|
-
|
|
1927
|
+
if year and paper_year:
|
|
1928
|
+
# Get year tolerance from config (default to 1 if not available)
|
|
1929
|
+
year_tolerance = 1 # Default tolerance
|
|
1930
|
+
try:
|
|
1931
|
+
from config.settings import get_config
|
|
1932
|
+
config = get_config()
|
|
1933
|
+
year_tolerance = config.get('text_processing', {}).get('year_tolerance', 1)
|
|
1934
|
+
except (ImportError, Exception):
|
|
1935
|
+
pass # Use default if config not available
|
|
1936
|
+
|
|
1937
|
+
# Only flag as mismatch if the difference is greater than tolerance
|
|
1938
|
+
if abs(year - paper_year) > year_tolerance:
|
|
1939
|
+
logger.debug(f"DB Verification: Year mismatch - cited: {year}, actual: {paper_year}")
|
|
1940
|
+
from utils.error_utils import format_year_mismatch
|
|
1941
|
+
errors.append({
|
|
1942
|
+
'warning_type': 'year',
|
|
1943
|
+
'warning_details': format_year_mismatch(year, paper_year),
|
|
1944
|
+
'ref_year_correct': paper_year
|
|
1945
|
+
})
|
|
1935
1946
|
|
|
1936
1947
|
# Verify DOI
|
|
1937
1948
|
if doi and external_ids.get('DOI'):
|
|
@@ -2626,9 +2637,19 @@ class ArxivReferenceChecker:
|
|
|
2626
2637
|
|
|
2627
2638
|
# Generate corrected reference using all available corrections
|
|
2628
2639
|
corrected_data = self._extract_corrected_data_from_error(consolidated_entry, verified_data)
|
|
2629
|
-
|
|
2630
|
-
|
|
2631
|
-
|
|
2640
|
+
|
|
2641
|
+
# Generate all three formats for user convenience
|
|
2642
|
+
from utils.text_utils import format_corrected_plaintext, format_corrected_bibtex, format_corrected_bibitem
|
|
2643
|
+
plaintext_format = format_corrected_plaintext(reference, corrected_data, consolidated_entry)
|
|
2644
|
+
bibtex_format = format_corrected_bibtex(reference, corrected_data, consolidated_entry)
|
|
2645
|
+
bibitem_format = format_corrected_bibitem(reference, corrected_data, consolidated_entry)
|
|
2646
|
+
|
|
2647
|
+
if plaintext_format:
|
|
2648
|
+
consolidated_entry['ref_corrected_plaintext'] = plaintext_format
|
|
2649
|
+
if bibtex_format:
|
|
2650
|
+
consolidated_entry['ref_corrected_bibtex'] = bibtex_format
|
|
2651
|
+
if bibitem_format:
|
|
2652
|
+
consolidated_entry['ref_corrected_bibitem'] = bibitem_format
|
|
2632
2653
|
|
|
2633
2654
|
# Store the consolidated entry (write to file at end of run)
|
|
2634
2655
|
self.errors.append(consolidated_entry)
|
|
@@ -2685,11 +2706,21 @@ class ArxivReferenceChecker:
|
|
|
2685
2706
|
if error_type != 'unverified':
|
|
2686
2707
|
error_entry['ref_standard_format'] = self.format_standard_reference(error)
|
|
2687
2708
|
|
|
2688
|
-
# Generate corrected reference in
|
|
2709
|
+
# Generate corrected reference in all formats for user convenience
|
|
2689
2710
|
corrected_data = self._extract_corrected_data_from_error(error, verified_data)
|
|
2690
|
-
|
|
2691
|
-
|
|
2692
|
-
|
|
2711
|
+
|
|
2712
|
+
# Generate all three formats
|
|
2713
|
+
from utils.text_utils import format_corrected_plaintext, format_corrected_bibtex, format_corrected_bibitem
|
|
2714
|
+
plaintext_format = format_corrected_plaintext(reference, corrected_data, error_entry)
|
|
2715
|
+
bibtex_format = format_corrected_bibtex(reference, corrected_data, error_entry)
|
|
2716
|
+
bibitem_format = format_corrected_bibitem(reference, corrected_data, error_entry)
|
|
2717
|
+
|
|
2718
|
+
if plaintext_format:
|
|
2719
|
+
error_entry['ref_corrected_plaintext'] = plaintext_format
|
|
2720
|
+
if bibtex_format:
|
|
2721
|
+
error_entry['ref_corrected_bibtex'] = bibtex_format
|
|
2722
|
+
if bibitem_format:
|
|
2723
|
+
error_entry['ref_corrected_bibitem'] = bibitem_format
|
|
2693
2724
|
else:
|
|
2694
2725
|
error_entry['ref_standard_format'] = None
|
|
2695
2726
|
|
|
@@ -2761,8 +2792,29 @@ class ArxivReferenceChecker:
|
|
|
2761
2792
|
f.write(f" {error_entry['ref_verified_url']}\n")
|
|
2762
2793
|
f.write("\n")
|
|
2763
2794
|
|
|
2764
|
-
# Show corrected reference in
|
|
2765
|
-
|
|
2795
|
+
# Show corrected reference in all formats if available
|
|
2796
|
+
formats_written = False
|
|
2797
|
+
|
|
2798
|
+
# Plain text format
|
|
2799
|
+
if error_entry.get('ref_corrected_plaintext'):
|
|
2800
|
+
f.write("CORRECTED REFERENCE (Plain Text):\n")
|
|
2801
|
+
f.write(f"{error_entry['ref_corrected_plaintext']}\n\n")
|
|
2802
|
+
formats_written = True
|
|
2803
|
+
|
|
2804
|
+
# BibTeX format
|
|
2805
|
+
if error_entry.get('ref_corrected_bibtex'):
|
|
2806
|
+
f.write("CORRECTED REFERENCE (BibTeX):\n")
|
|
2807
|
+
f.write(f"{error_entry['ref_corrected_bibtex']}\n\n")
|
|
2808
|
+
formats_written = True
|
|
2809
|
+
|
|
2810
|
+
# Bibitem/LaTeX format
|
|
2811
|
+
if error_entry.get('ref_corrected_bibitem'):
|
|
2812
|
+
f.write("CORRECTED REFERENCE (LaTeX/Biblatex):\n")
|
|
2813
|
+
f.write(f"{error_entry['ref_corrected_bibitem']}\n\n")
|
|
2814
|
+
formats_written = True
|
|
2815
|
+
|
|
2816
|
+
# Fallback to legacy format if no new formats available
|
|
2817
|
+
if not formats_written and error_entry.get('ref_corrected_format'):
|
|
2766
2818
|
f.write("CORRECTED REFERENCE:\n")
|
|
2767
2819
|
f.write(f"{error_entry['ref_corrected_format']}\n\n")
|
|
2768
2820
|
|
|
@@ -99,9 +99,8 @@ def compare_dois(doi1: str, doi2: str) -> bool:
|
|
|
99
99
|
"""
|
|
100
100
|
Compare two DOIs for equality, handling different formats and prefixes.
|
|
101
101
|
|
|
102
|
-
This function performs exact matching after normalization,
|
|
103
|
-
|
|
104
|
-
prefixes, case differences, and punctuation.
|
|
102
|
+
This function performs exact matching after normalization, with support
|
|
103
|
+
for partial DOI citations where a shorter DOI is a valid prefix of a longer one.
|
|
105
104
|
|
|
106
105
|
Args:
|
|
107
106
|
doi1: First DOI to compare
|
|
@@ -117,8 +116,27 @@ def compare_dois(doi1: str, doi2: str) -> bool:
|
|
|
117
116
|
norm_doi1 = normalize_doi(doi1)
|
|
118
117
|
norm_doi2 = normalize_doi(doi2)
|
|
119
118
|
|
|
120
|
-
#
|
|
121
|
-
|
|
119
|
+
# First try exact match
|
|
120
|
+
if norm_doi1 == norm_doi2:
|
|
121
|
+
return True
|
|
122
|
+
|
|
123
|
+
# Handle partial DOI citations - if one DOI is a prefix of the other, consider it a match
|
|
124
|
+
# This handles cases like "10.1007" being cited instead of the full "10.1007/s10458-025-09691-y"
|
|
125
|
+
if len(norm_doi1) != len(norm_doi2):
|
|
126
|
+
shorter_doi = norm_doi1 if len(norm_doi1) < len(norm_doi2) else norm_doi2
|
|
127
|
+
longer_doi = norm_doi2 if len(norm_doi1) < len(norm_doi2) else norm_doi1
|
|
128
|
+
|
|
129
|
+
# Only consider it a valid partial match if:
|
|
130
|
+
# 1. The shorter DOI is at least 7 characters (e.g., "10.1007")
|
|
131
|
+
# 2. The longer DOI starts with the shorter DOI
|
|
132
|
+
# 3. The next character in the longer DOI is '/' or '.' (valid DOI separators)
|
|
133
|
+
if (len(shorter_doi) >= 7 and
|
|
134
|
+
longer_doi.startswith(shorter_doi) and
|
|
135
|
+
len(longer_doi) > len(shorter_doi) and
|
|
136
|
+
longer_doi[len(shorter_doi)] in ['/', '.']):
|
|
137
|
+
return True
|
|
138
|
+
|
|
139
|
+
return False
|
|
122
140
|
|
|
123
141
|
|
|
124
142
|
def construct_doi_url(doi: str) -> str:
|
|
@@ -183,6 +183,14 @@ def clean_venue_for_comparison(venue: str) -> str:
|
|
|
183
183
|
return normalize_venue_for_display(venue)
|
|
184
184
|
|
|
185
185
|
|
|
186
|
+
def format_missing_venue(correct_venue: str) -> str:
|
|
187
|
+
"""
|
|
188
|
+
Format a missing venue message with only the actual value.
|
|
189
|
+
"""
|
|
190
|
+
# Only show the actual venue; omit the empty cited line
|
|
191
|
+
return f"Missing venue: '{correct_venue}'"
|
|
192
|
+
|
|
193
|
+
|
|
186
194
|
def create_venue_warning(cited_venue: str, correct_venue: str) -> Dict[str, str]:
|
|
187
195
|
"""
|
|
188
196
|
Create a standardized venue warning dictionary.
|
|
@@ -197,7 +205,15 @@ def create_venue_warning(cited_venue: str, correct_venue: str) -> Dict[str, str]
|
|
|
197
205
|
# Clean both venues for display in the warning
|
|
198
206
|
clean_cited = clean_venue_for_comparison(cited_venue)
|
|
199
207
|
clean_correct = clean_venue_for_comparison(correct_venue)
|
|
200
|
-
|
|
208
|
+
|
|
209
|
+
# If cited venue cleans to empty, treat as missing venue instead of mismatch
|
|
210
|
+
if not clean_cited and clean_correct:
|
|
211
|
+
return {
|
|
212
|
+
'warning_type': 'venue',
|
|
213
|
+
'warning_details': format_missing_venue(clean_correct),
|
|
214
|
+
'ref_venue_correct': correct_venue
|
|
215
|
+
}
|
|
216
|
+
|
|
201
217
|
return {
|
|
202
218
|
'warning_type': 'venue',
|
|
203
219
|
'warning_details': format_three_line_mismatch("Venue mismatch", clean_cited, clean_correct),
|
|
@@ -506,8 +506,10 @@ def clean_author_name(author):
|
|
|
506
506
|
# Fix spacing around periods in initials (e.g., "Y . Li" -> "Y. Li")
|
|
507
507
|
author = re.sub(r'(\w)\s+\.', r'\1.', author)
|
|
508
508
|
|
|
509
|
-
# Remove common prefixes
|
|
510
|
-
|
|
509
|
+
# Remove common honorific prefixes only when they are standalone at the start (require trailing whitespace)
|
|
510
|
+
# Previous pattern falsely removed the leading "Mr" from names like "Mrinmaya" due to optional whitespace.
|
|
511
|
+
# Anchor to start and require at least one space after the title to avoid stripping inside longer names.
|
|
512
|
+
author = re.sub(r'^(?:Dr|Prof|Professor|Mr|Ms|Mrs)\.?\s+', '', author, flags=re.IGNORECASE)
|
|
511
513
|
|
|
512
514
|
# Remove email addresses
|
|
513
515
|
author = re.sub(r'\S+@\S+\.\S+', '', author)
|
|
@@ -2100,7 +2102,7 @@ def compare_authors(cited_authors: list, correct_authors: list, normalize_func=N
|
|
|
2100
2102
|
# Use standardized three-line formatting for author mismatch
|
|
2101
2103
|
cited_display = format_author_for_display(cited_author)
|
|
2102
2104
|
full_author_list = ', '.join(correct_names)
|
|
2103
|
-
error_msg = format_author_mismatch(i+1, f"{cited_display} (not found in author list - et al case)", f"
|
|
2105
|
+
error_msg = format_author_mismatch(i+1, f"{cited_display} (not found in author list - et al case)", f"{full_author_list}")
|
|
2104
2106
|
return False, error_msg
|
|
2105
2107
|
|
|
2106
2108
|
return True, f"Authors match (verified {len(cleaned_cited)} of {len(correct_names)} with et al)"
|
|
@@ -3588,6 +3590,12 @@ def calculate_title_similarity(title1: str, title2: str) -> float:
|
|
|
3588
3590
|
# Normalize titles for comparison
|
|
3589
3591
|
t1 = title1.lower().strip()
|
|
3590
3592
|
t2 = title2.lower().strip()
|
|
3593
|
+
|
|
3594
|
+
# Remove trailing year suffixes like ", 2024" or " 2024" for robust matching
|
|
3595
|
+
def strip_trailing_year(s: str) -> str:
|
|
3596
|
+
return re.sub(r"[,\s]*\b(19|20)\d{2}\b\s*$", "", s).strip()
|
|
3597
|
+
t1 = strip_trailing_year(t1)
|
|
3598
|
+
t2 = strip_trailing_year(t2)
|
|
3591
3599
|
|
|
3592
3600
|
# Exact match
|
|
3593
3601
|
if t1 == t2:
|
|
@@ -4676,6 +4684,13 @@ def normalize_venue_for_display(venue: str) -> str:
|
|
|
4676
4684
|
|
|
4677
4685
|
venue_text = venue.strip()
|
|
4678
4686
|
|
|
4687
|
+
# Strip leading editor name lists like "..., editors, Venue ..." or "..., eds., Venue ..."
|
|
4688
|
+
# This prevents author/editor lists from being treated as venue
|
|
4689
|
+
# Match 'editors,' 'editor,' or 'eds.,' possibly after a comma; capture the remainder as venue
|
|
4690
|
+
editors_match = re.search(r"(?:^|,)\s*(?:editors?|eds?\.?|editor)\s*,\s*(.+)$", venue_text, re.IGNORECASE)
|
|
4691
|
+
if editors_match:
|
|
4692
|
+
venue_text = editors_match.group(1).strip()
|
|
4693
|
+
|
|
4679
4694
|
# Extract venue from complex editor strings (e.g. "In Smith, J.; and Doe, K., eds., Conference Name, volume 1")
|
|
4680
4695
|
# This handles patterns like "In [authors], eds., [venue], [optional metadata]" (case-insensitive)
|
|
4681
4696
|
editor_match = re.search(r'in\s+[^,]+(?:,\s*[^,]*)*,\s*eds?\.,\s*(.+?)(?:,\s*volume\s*\d+|,\s*pp?\.|$)', venue_text, re.IGNORECASE)
|
|
@@ -4702,7 +4717,9 @@ def normalize_venue_for_display(venue: str) -> str:
|
|
|
4702
4717
|
prefixes_to_remove = [
|
|
4703
4718
|
r'^\d{4}\s+\d+(st|nd|rd|th)\s+', # "2012 IEEE/RSJ"
|
|
4704
4719
|
r'^\d{4}\s+', # "2024 "
|
|
4705
|
-
|
|
4720
|
+
# Remove 'Proceedings of [the] [ORG]* [ordinal]*' only when followed by at least one word
|
|
4721
|
+
# This avoids cutting a venue down to just 'Proceedings of the'
|
|
4722
|
+
r'^proceedings\s+of\s+(?!the\s*$)(?:the\s+)?(?:(?:acm|ieee|usenix|aaai|sigcomm|sigkdd|sigmod|sigops|vldb|osdi|sosp|eurosys)\s+)*(?:\d+(?:st|nd|rd|th)\s+)?',
|
|
4706
4723
|
r'^proc\.\s+of\s+(the\s+)?(\d+(st|nd|rd|th)\s+)?(ieee\s+)?', # "Proc. of the IEEE" (require "of")
|
|
4707
4724
|
r'^procs\.\s+of\s+(the\s+)?(\d+(st|nd|rd|th)\s+)?(ieee\s+)?', # "Procs. of the IEEE" (require "of")
|
|
4708
4725
|
r'^in\s+',
|
|
@@ -4741,4 +4758,8 @@ def normalize_venue_for_display(venue: str) -> str:
|
|
|
4741
4758
|
venue_text = re.sub(r'\s+', ' ', venue_text) # Normalize whitespace
|
|
4742
4759
|
venue_text = venue_text.strip()
|
|
4743
4760
|
|
|
4761
|
+
# If what's left is too generic (e.g., just 'Proceedings of the'), treat as no venue
|
|
4762
|
+
if venue_text.lower() in {"proceedings of the", "proceedings of"}:
|
|
4763
|
+
return ""
|
|
4764
|
+
|
|
4744
4765
|
return venue_text
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/checkers/enhanced_hybrid_checker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/checkers/openreview_checker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|