academic-refchecker 1.2.53__py3-none-any.whl → 1.2.55__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {academic_refchecker-1.2.53.dist-info → academic_refchecker-1.2.55.dist-info}/METADATA +23 -23
- academic_refchecker-1.2.55.dist-info/RECORD +49 -0
- academic_refchecker-1.2.55.dist-info/entry_points.txt +2 -0
- academic_refchecker-1.2.55.dist-info/top_level.txt +1 -0
- refchecker/__init__.py +13 -0
- refchecker/__main__.py +11 -0
- refchecker/__version__.py +5 -0
- {checkers → refchecker/checkers}/crossref.py +5 -5
- {checkers → refchecker/checkers}/enhanced_hybrid_checker.py +1 -1
- {checkers → refchecker/checkers}/github_checker.py +4 -4
- {checkers → refchecker/checkers}/local_semantic_scholar.py +7 -7
- {checkers → refchecker/checkers}/openalex.py +6 -6
- {checkers → refchecker/checkers}/openreview_checker.py +8 -8
- {checkers → refchecker/checkers}/pdf_paper_checker.py +1 -1
- {checkers → refchecker/checkers}/semantic_scholar.py +10 -10
- {checkers → refchecker/checkers}/webpage_checker.py +3 -3
- {core → refchecker/core}/parallel_processor.py +6 -6
- {core → refchecker/core}/refchecker.py +63 -63
- {utils → refchecker/utils}/arxiv_utils.py +3 -3
- {utils → refchecker/utils}/biblatex_parser.py +4 -4
- {utils → refchecker/utils}/bibliography_utils.py +5 -5
- {utils → refchecker/utils}/bibtex_parser.py +5 -5
- {utils → refchecker/utils}/error_utils.py +1 -1
- {utils → refchecker/utils}/text_utils.py +62 -13
- __version__.py +0 -3
- academic_refchecker-1.2.53.dist-info/RECORD +0 -47
- academic_refchecker-1.2.53.dist-info/entry_points.txt +0 -2
- academic_refchecker-1.2.53.dist-info/top_level.txt +0 -9
- {academic_refchecker-1.2.53.dist-info → academic_refchecker-1.2.55.dist-info}/WHEEL +0 -0
- {academic_refchecker-1.2.53.dist-info → academic_refchecker-1.2.55.dist-info}/licenses/LICENSE +0 -0
- {checkers → refchecker/checkers}/__init__.py +0 -0
- {config → refchecker/config}/__init__.py +0 -0
- {config → refchecker/config}/logging.conf +0 -0
- {config → refchecker/config}/settings.py +0 -0
- {core → refchecker/core}/__init__.py +0 -0
- {core → refchecker/core}/db_connection_pool.py +0 -0
- {database → refchecker/database}/__init__.py +0 -0
- {database → refchecker/database}/download_semantic_scholar_db.py +0 -0
- {llm → refchecker/llm}/__init__.py +0 -0
- {llm → refchecker/llm}/base.py +0 -0
- {llm → refchecker/llm}/providers.py +0 -0
- {scripts → refchecker/scripts}/__init__.py +0 -0
- {scripts → refchecker/scripts}/start_vllm_server.py +0 -0
- {services → refchecker/services}/__init__.py +0 -0
- {services → refchecker/services}/pdf_processor.py +0 -0
- {utils → refchecker/utils}/__init__.py +0 -0
- {utils → refchecker/utils}/author_utils.py +0 -0
- {utils → refchecker/utils}/config_validator.py +0 -0
- {utils → refchecker/utils}/db_utils.py +0 -0
- {utils → refchecker/utils}/doi_utils.py +0 -0
- {utils → refchecker/utils}/mock_objects.py +0 -0
- {utils → refchecker/utils}/unicode_utils.py +0 -0
- {utils → refchecker/utils}/url_utils.py +0 -0
|
@@ -11,7 +11,7 @@ For arXiv references, it uses the arXiv API to verify metadata.
|
|
|
11
11
|
For non-arXiv references, it uses the local Semantic Scholar database for verification.
|
|
12
12
|
|
|
13
13
|
Usage:
|
|
14
|
-
python
|
|
14
|
+
python run_refchecker.py --paper PAPER_SPEC [--db-path PATH] [--output-file [PATH]] [--debug]
|
|
15
15
|
|
|
16
16
|
Options:
|
|
17
17
|
--paper PAPER_SPEC Validate a specific paper by:
|
|
@@ -44,23 +44,23 @@ import argparse
|
|
|
44
44
|
import sys
|
|
45
45
|
import json
|
|
46
46
|
import random
|
|
47
|
-
from checkers.local_semantic_scholar import LocalNonArxivReferenceChecker
|
|
48
|
-
from utils.text_utils import (clean_author_name, clean_title, clean_title_basic,
|
|
47
|
+
from refchecker.checkers.local_semantic_scholar import LocalNonArxivReferenceChecker
|
|
48
|
+
from refchecker.utils.text_utils import (clean_author_name, clean_title, clean_title_basic,
|
|
49
49
|
extract_arxiv_id_from_url, normalize_text as common_normalize_text,
|
|
50
50
|
detect_latex_bibliography_format, extract_latex_references,
|
|
51
51
|
detect_standard_acm_natbib_format, strip_latex_commands,
|
|
52
52
|
format_corrected_reference, is_name_match, enhanced_name_match,
|
|
53
53
|
calculate_title_similarity, normalize_arxiv_url, deduplicate_urls,
|
|
54
54
|
compare_authors)
|
|
55
|
-
from utils.config_validator import ConfigValidator
|
|
56
|
-
from services.pdf_processor import PDFProcessor
|
|
57
|
-
from checkers.enhanced_hybrid_checker import EnhancedHybridReferenceChecker
|
|
58
|
-
from core.parallel_processor import ParallelReferenceProcessor
|
|
59
|
-
from core.db_connection_pool import ThreadSafeLocalChecker
|
|
55
|
+
from refchecker.utils.config_validator import ConfigValidator
|
|
56
|
+
from refchecker.services.pdf_processor import PDFProcessor
|
|
57
|
+
from refchecker.checkers.enhanced_hybrid_checker import EnhancedHybridReferenceChecker
|
|
58
|
+
from refchecker.core.parallel_processor import ParallelReferenceProcessor
|
|
59
|
+
from refchecker.core.db_connection_pool import ThreadSafeLocalChecker
|
|
60
60
|
|
|
61
61
|
# Import version
|
|
62
|
-
from __version__ import __version__
|
|
63
|
-
from llm.base import create_llm_provider, ReferenceExtractor
|
|
62
|
+
from refchecker.__version__ import __version__
|
|
63
|
+
from refchecker.llm.base import create_llm_provider, ReferenceExtractor
|
|
64
64
|
|
|
65
65
|
def get_llm_api_key_interactive(provider: str) -> str:
|
|
66
66
|
"""
|
|
@@ -453,7 +453,7 @@ class ArxivReferenceChecker:
|
|
|
453
453
|
def extract_arxiv_id_from_url(self, url):
|
|
454
454
|
"""
|
|
455
455
|
Extract ArXiv ID from a URL or text containing ArXiv reference.
|
|
456
|
-
Uses the common extraction function from utils.url_utils.
|
|
456
|
+
Uses the common extraction function from refchecker.utils.url_utils.
|
|
457
457
|
"""
|
|
458
458
|
return extract_arxiv_id_from_url(url)
|
|
459
459
|
|
|
@@ -1189,7 +1189,7 @@ class ArxivReferenceChecker:
|
|
|
1189
1189
|
last_author = and_parts[1].strip()
|
|
1190
1190
|
|
|
1191
1191
|
# Split the main list by commas, handling initials properly
|
|
1192
|
-
from utils.text_utils import parse_authors_with_initials
|
|
1192
|
+
from refchecker.utils.text_utils import parse_authors_with_initials
|
|
1193
1193
|
authors = parse_authors_with_initials(main_list)
|
|
1194
1194
|
|
|
1195
1195
|
# Add the last author
|
|
@@ -1197,7 +1197,7 @@ class ArxivReferenceChecker:
|
|
|
1197
1197
|
authors.append(last_author)
|
|
1198
1198
|
else:
|
|
1199
1199
|
# No "and" found, use smart comma parsing for initials
|
|
1200
|
-
from utils.text_utils import parse_authors_with_initials
|
|
1200
|
+
from refchecker.utils.text_utils import parse_authors_with_initials
|
|
1201
1201
|
authors = parse_authors_with_initials(authors_text)
|
|
1202
1202
|
|
|
1203
1203
|
# Clean up each author name
|
|
@@ -1679,7 +1679,7 @@ class ArxivReferenceChecker:
|
|
|
1679
1679
|
if not title and not authors_text:
|
|
1680
1680
|
# Try to detect a list of names
|
|
1681
1681
|
if re.match(r'^[A-Z][a-zA-Z\-\.]+(,\s*[A-Z][a-zA-Z\-\.]+)+$', cleaned_ref):
|
|
1682
|
-
from utils.text_utils import parse_authors_with_initials
|
|
1682
|
+
from refchecker.utils.text_utils import parse_authors_with_initials
|
|
1683
1683
|
authors = parse_authors_with_initials(cleaned_ref)
|
|
1684
1684
|
return authors, ""
|
|
1685
1685
|
|
|
@@ -1693,7 +1693,7 @@ class ArxivReferenceChecker:
|
|
|
1693
1693
|
|
|
1694
1694
|
# Final fallback: if the reference is just a list of names, return as authors
|
|
1695
1695
|
if not title and cleaned_ref and re.match(r'^[A-Z][a-zA-Z\-\.]+(,\s*[A-Z][a-zA-Z\-\.]+)+$', cleaned_ref):
|
|
1696
|
-
from utils.text_utils import parse_authors_with_initials
|
|
1696
|
+
from refchecker.utils.text_utils import parse_authors_with_initials
|
|
1697
1697
|
authors = parse_authors_with_initials(cleaned_ref)
|
|
1698
1698
|
return authors, ""
|
|
1699
1699
|
|
|
@@ -1901,7 +1901,7 @@ class ArxivReferenceChecker:
|
|
|
1901
1901
|
db_title = self.non_arxiv_checker.normalize_paper_title(paper_data.get('title'))
|
|
1902
1902
|
|
|
1903
1903
|
if normalized_title != db_title:
|
|
1904
|
-
from utils.error_utils import format_title_mismatch
|
|
1904
|
+
from refchecker.utils.error_utils import format_title_mismatch
|
|
1905
1905
|
# Clean the title for display (remove LaTeX commands like {LLM}s -> LLMs)
|
|
1906
1906
|
clean_cited_title = strip_latex_commands(title)
|
|
1907
1907
|
logger.debug(f"DB Verification: Title mismatch - cited: '{title}', actual: '{paper_data.get('title')}'")
|
|
@@ -1940,7 +1940,7 @@ class ArxivReferenceChecker:
|
|
|
1940
1940
|
# Only flag as mismatch if the difference is greater than tolerance
|
|
1941
1941
|
if abs(year - paper_year) > year_tolerance:
|
|
1942
1942
|
logger.debug(f"DB Verification: Year mismatch - cited: {year}, actual: {paper_year}")
|
|
1943
|
-
from utils.error_utils import format_year_mismatch
|
|
1943
|
+
from refchecker.utils.error_utils import format_year_mismatch
|
|
1944
1944
|
errors.append({
|
|
1945
1945
|
'warning_type': 'year',
|
|
1946
1946
|
'warning_details': format_year_mismatch(year, paper_year),
|
|
@@ -1949,7 +1949,7 @@ class ArxivReferenceChecker:
|
|
|
1949
1949
|
|
|
1950
1950
|
# Verify DOI
|
|
1951
1951
|
if doi and external_ids.get('DOI'):
|
|
1952
|
-
from utils.doi_utils import compare_dois, normalize_doi
|
|
1952
|
+
from refchecker.utils.doi_utils import compare_dois, normalize_doi
|
|
1953
1953
|
|
|
1954
1954
|
# Use proper DOI comparison first
|
|
1955
1955
|
if not compare_dois(doi, external_ids['DOI']):
|
|
@@ -1962,7 +1962,7 @@ class ArxivReferenceChecker:
|
|
|
1962
1962
|
# Only flag as error if it's not a reasonable partial match
|
|
1963
1963
|
if not actual_doi_normalized.startswith(cited_doi_normalized.rstrip('.')):
|
|
1964
1964
|
logger.debug(f"DB Verification: DOI mismatch - cited: {doi}, actual: {external_ids['DOI']}")
|
|
1965
|
-
from utils.error_utils import format_doi_mismatch
|
|
1965
|
+
from refchecker.utils.error_utils import format_doi_mismatch
|
|
1966
1966
|
errors.append({
|
|
1967
1967
|
'error_type': 'doi',
|
|
1968
1968
|
'error_details': format_doi_mismatch(doi, external_ids['DOI']),
|
|
@@ -2058,7 +2058,7 @@ class ArxivReferenceChecker:
|
|
|
2058
2058
|
elif error.get('error_type') == 'year' or error.get('warning_type') == 'year':
|
|
2059
2059
|
formatted_error['ref_year_correct'] = error.get('ref_year_correct', '')
|
|
2060
2060
|
elif error.get('error_type') == 'doi':
|
|
2061
|
-
from utils.doi_utils import construct_doi_url
|
|
2061
|
+
from refchecker.utils.doi_utils import construct_doi_url
|
|
2062
2062
|
formatted_error['ref_url_correct'] = construct_doi_url(error.get('ref_doi_correct', ''))
|
|
2063
2063
|
elif error.get('info_type') == 'url':
|
|
2064
2064
|
formatted_error['ref_url_correct'] = error.get('ref_url_correct', '')
|
|
@@ -2091,7 +2091,7 @@ class ArxivReferenceChecker:
|
|
|
2091
2091
|
# Use the CORRECT paper's Semantic Scholar URL
|
|
2092
2092
|
correct_external_ids = correct_paper_data.get('externalIds', {})
|
|
2093
2093
|
if correct_external_ids.get('CorpusId'):
|
|
2094
|
-
from utils.url_utils import construct_semantic_scholar_url
|
|
2094
|
+
from refchecker.utils.url_utils import construct_semantic_scholar_url
|
|
2095
2095
|
correct_paper_url = construct_semantic_scholar_url(correct_external_ids['CorpusId'])
|
|
2096
2096
|
paper_url = correct_paper_url # Update the main URL
|
|
2097
2097
|
logger.debug(f"Database mode: Using correct paper's Semantic Scholar URL for ArXiv ID mismatch: {paper_url}")
|
|
@@ -2118,7 +2118,7 @@ class ArxivReferenceChecker:
|
|
|
2118
2118
|
|
|
2119
2119
|
# Fallback to wrong paper's URL if we couldn't find the correct one
|
|
2120
2120
|
if not correct_paper_data and verified_data and verified_data.get('externalIds', {}).get('CorpusId'):
|
|
2121
|
-
from utils.url_utils import construct_semantic_scholar_url
|
|
2121
|
+
from refchecker.utils.url_utils import construct_semantic_scholar_url
|
|
2122
2122
|
paper_url = construct_semantic_scholar_url(verified_data['externalIds']['CorpusId'])
|
|
2123
2123
|
logger.debug(f"Database mode: Fallback to wrong paper's Semantic Scholar URL: {paper_url}")
|
|
2124
2124
|
elif not correct_paper_data:
|
|
@@ -2184,7 +2184,7 @@ class ArxivReferenceChecker:
|
|
|
2184
2184
|
logger.debug(f"Detected GitHub URL, using GitHub verification: {github_url}")
|
|
2185
2185
|
|
|
2186
2186
|
# Import and use GitHub checker
|
|
2187
|
-
from checkers.github_checker import GitHubChecker
|
|
2187
|
+
from refchecker.checkers.github_checker import GitHubChecker
|
|
2188
2188
|
github_checker = GitHubChecker()
|
|
2189
2189
|
verified_data, errors, paper_url = github_checker.verify_reference(reference)
|
|
2190
2190
|
|
|
@@ -2244,7 +2244,7 @@ class ArxivReferenceChecker:
|
|
|
2244
2244
|
return None # No URL to check
|
|
2245
2245
|
|
|
2246
2246
|
# Import and use web page checker
|
|
2247
|
-
from checkers.webpage_checker import WebPageChecker
|
|
2247
|
+
from refchecker.checkers.webpage_checker import WebPageChecker
|
|
2248
2248
|
webpage_checker = WebPageChecker()
|
|
2249
2249
|
|
|
2250
2250
|
if not webpage_checker.is_web_page_url(web_url):
|
|
@@ -2308,7 +2308,7 @@ class ArxivReferenceChecker:
|
|
|
2308
2308
|
return None, [{"error_type": "unverified", "error_details": "Reference could not be verified"}], None
|
|
2309
2309
|
|
|
2310
2310
|
# First try PDF paper checker if URL appears to be a PDF
|
|
2311
|
-
from checkers.pdf_paper_checker import PDFPaperChecker
|
|
2311
|
+
from refchecker.checkers.pdf_paper_checker import PDFPaperChecker
|
|
2312
2312
|
pdf_checker = PDFPaperChecker()
|
|
2313
2313
|
|
|
2314
2314
|
if pdf_checker.can_check_reference(reference):
|
|
@@ -2325,7 +2325,7 @@ class ArxivReferenceChecker:
|
|
|
2325
2325
|
logger.debug(f"PDF verification error, falling back to web page verification")
|
|
2326
2326
|
|
|
2327
2327
|
# Fall back to web page checker
|
|
2328
|
-
from checkers.pdf_paper_checker import PDFPaperChecker
|
|
2328
|
+
from refchecker.checkers.pdf_paper_checker import PDFPaperChecker
|
|
2329
2329
|
pdf_checker = PDFPaperChecker()
|
|
2330
2330
|
|
|
2331
2331
|
if pdf_checker.can_check_reference(reference):
|
|
@@ -2342,7 +2342,7 @@ class ArxivReferenceChecker:
|
|
|
2342
2342
|
logger.debug(f"PDF verification error, falling back to web page verification")
|
|
2343
2343
|
|
|
2344
2344
|
# Fall back to web page checker
|
|
2345
|
-
from checkers.webpage_checker import WebPageChecker
|
|
2345
|
+
from refchecker.checkers.webpage_checker import WebPageChecker
|
|
2346
2346
|
webpage_checker = WebPageChecker()
|
|
2347
2347
|
|
|
2348
2348
|
try:
|
|
@@ -2463,7 +2463,7 @@ class ArxivReferenceChecker:
|
|
|
2463
2463
|
elif error.get('error_type') == 'year' or error.get('warning_type') == 'year':
|
|
2464
2464
|
formatted_error['ref_year_correct'] = error.get('ref_year_correct', '')
|
|
2465
2465
|
elif error.get('error_type') == 'doi':
|
|
2466
|
-
from utils.doi_utils import construct_doi_url
|
|
2466
|
+
from refchecker.utils.doi_utils import construct_doi_url
|
|
2467
2467
|
formatted_error['ref_url_correct'] = construct_doi_url(error.get('ref_doi_correct', ''))
|
|
2468
2468
|
|
|
2469
2469
|
formatted_errors.append(formatted_error)
|
|
@@ -2753,7 +2753,7 @@ class ArxivReferenceChecker:
|
|
|
2753
2753
|
corrected_data = self._extract_corrected_data_from_error(consolidated_entry, verified_data)
|
|
2754
2754
|
|
|
2755
2755
|
# Generate all three formats for user convenience
|
|
2756
|
-
from utils.text_utils import format_corrected_plaintext, format_corrected_bibtex, format_corrected_bibitem
|
|
2756
|
+
from refchecker.utils.text_utils import format_corrected_plaintext, format_corrected_bibtex, format_corrected_bibitem
|
|
2757
2757
|
plaintext_format = format_corrected_plaintext(reference, corrected_data, consolidated_entry)
|
|
2758
2758
|
bibtex_format = format_corrected_bibtex(reference, corrected_data, consolidated_entry)
|
|
2759
2759
|
bibitem_format = format_corrected_bibitem(reference, corrected_data, consolidated_entry)
|
|
@@ -2824,7 +2824,7 @@ class ArxivReferenceChecker:
|
|
|
2824
2824
|
corrected_data = self._extract_corrected_data_from_error(error, verified_data)
|
|
2825
2825
|
|
|
2826
2826
|
# Generate all three formats
|
|
2827
|
-
from utils.text_utils import format_corrected_plaintext, format_corrected_bibtex, format_corrected_bibitem
|
|
2827
|
+
from refchecker.utils.text_utils import format_corrected_plaintext, format_corrected_bibtex, format_corrected_bibitem
|
|
2828
2828
|
plaintext_format = format_corrected_plaintext(reference, corrected_data, error_entry)
|
|
2829
2829
|
bibtex_format = format_corrected_bibtex(reference, corrected_data, error_entry)
|
|
2830
2830
|
bibitem_format = format_corrected_bibitem(reference, corrected_data, error_entry)
|
|
@@ -3326,7 +3326,7 @@ class ArxivReferenceChecker:
|
|
|
3326
3326
|
|
|
3327
3327
|
if authors:
|
|
3328
3328
|
# Limit to first 3 authors for readability
|
|
3329
|
-
from utils.text_utils import parse_authors_with_initials
|
|
3329
|
+
from refchecker.utils.text_utils import parse_authors_with_initials
|
|
3330
3330
|
author_list = parse_authors_with_initials(authors)
|
|
3331
3331
|
if len(author_list) > 3:
|
|
3332
3332
|
formatted += ", ".join(author_list[:3]) + " et al."
|
|
@@ -3568,7 +3568,7 @@ class ArxivReferenceChecker:
|
|
|
3568
3568
|
return self._parse_standard_acm_natbib_references(bibliography_text)
|
|
3569
3569
|
|
|
3570
3570
|
# Check if this is BibTeX format
|
|
3571
|
-
from utils.bibtex_parser import detect_bibtex_format
|
|
3571
|
+
from refchecker.utils.bibtex_parser import detect_bibtex_format
|
|
3572
3572
|
if detect_bibtex_format(bibliography_text):
|
|
3573
3573
|
logger.info("Detected BibTeX format, using BibTeX parser")
|
|
3574
3574
|
self.used_regex_extraction = True
|
|
@@ -3576,7 +3576,7 @@ class ArxivReferenceChecker:
|
|
|
3576
3576
|
return self._parse_bibtex_references(bibliography_text)
|
|
3577
3577
|
|
|
3578
3578
|
# Check if this is biblatex format
|
|
3579
|
-
from utils.biblatex_parser import detect_biblatex_format
|
|
3579
|
+
from refchecker.utils.biblatex_parser import detect_biblatex_format
|
|
3580
3580
|
if detect_biblatex_format(bibliography_text):
|
|
3581
3581
|
logger.debug("Detected biblatex format")
|
|
3582
3582
|
self.used_regex_extraction = True
|
|
@@ -3686,7 +3686,7 @@ class ArxivReferenceChecker:
|
|
|
3686
3686
|
if author_field_match:
|
|
3687
3687
|
author_content = author_field_match.group(1)
|
|
3688
3688
|
# Find all \bibinfo{person}{Name} entries using balanced brace extraction
|
|
3689
|
-
from utils.text_utils import extract_bibinfo_person_content
|
|
3689
|
+
from refchecker.utils.text_utils import extract_bibinfo_person_content
|
|
3690
3690
|
person_matches = extract_bibinfo_person_content(author_content)
|
|
3691
3691
|
if person_matches:
|
|
3692
3692
|
authors = []
|
|
@@ -3700,7 +3700,7 @@ class ArxivReferenceChecker:
|
|
|
3700
3700
|
ref['authors'] = authors
|
|
3701
3701
|
|
|
3702
3702
|
# Import balanced brace extraction function
|
|
3703
|
-
from utils.text_utils import extract_bibinfo_field_content
|
|
3703
|
+
from refchecker.utils.text_utils import extract_bibinfo_field_content
|
|
3704
3704
|
|
|
3705
3705
|
# Extract title from \bibinfo{title}{Title} using balanced brace extraction
|
|
3706
3706
|
title_content = extract_bibinfo_field_content(content, 'title')
|
|
@@ -3758,7 +3758,7 @@ class ArxivReferenceChecker:
|
|
|
3758
3758
|
author_part_clean = strip_latex_commands(author_part).strip()
|
|
3759
3759
|
if author_part_clean and not author_part_clean.startswith('\\'):
|
|
3760
3760
|
# Parse author names using the robust author parsing function
|
|
3761
|
-
from utils.text_utils import parse_authors_with_initials
|
|
3761
|
+
from refchecker.utils.text_utils import parse_authors_with_initials
|
|
3762
3762
|
author_names = parse_authors_with_initials(author_part_clean)
|
|
3763
3763
|
|
|
3764
3764
|
# Clean up author names
|
|
@@ -3812,14 +3812,14 @@ class ArxivReferenceChecker:
|
|
|
3812
3812
|
self.used_regex_extraction = True
|
|
3813
3813
|
|
|
3814
3814
|
# Check if this is BibTeX format first
|
|
3815
|
-
from utils.bibtex_parser import detect_bibtex_format
|
|
3815
|
+
from refchecker.utils.bibtex_parser import detect_bibtex_format
|
|
3816
3816
|
if detect_bibtex_format(bibliography_text):
|
|
3817
3817
|
logger.debug("Detected BibTeX format, using BibTeX-specific parsing")
|
|
3818
3818
|
# BibTeX parsing is robust, so we don't set used_unreliable_extraction
|
|
3819
3819
|
return self._parse_bibtex_references(bibliography_text)
|
|
3820
3820
|
|
|
3821
3821
|
# Check if this is biblatex format
|
|
3822
|
-
from utils.biblatex_parser import detect_biblatex_format
|
|
3822
|
+
from refchecker.utils.biblatex_parser import detect_biblatex_format
|
|
3823
3823
|
if detect_biblatex_format(bibliography_text):
|
|
3824
3824
|
logger.debug("Detected biblatex format, using biblatex-specific parsing")
|
|
3825
3825
|
# biblatex parsing is also robust, so we don't set used_unreliable_extraction
|
|
@@ -4105,7 +4105,7 @@ class ArxivReferenceChecker:
|
|
|
4105
4105
|
if doi_match:
|
|
4106
4106
|
doi = clean_doi(doi_match.group(1))
|
|
4107
4107
|
if doi:
|
|
4108
|
-
from utils.doi_utils import construct_doi_url
|
|
4108
|
+
from refchecker.utils.doi_utils import construct_doi_url
|
|
4109
4109
|
url = construct_doi_url(doi)
|
|
4110
4110
|
else:
|
|
4111
4111
|
url = ''
|
|
@@ -4265,7 +4265,7 @@ class ArxivReferenceChecker:
|
|
|
4265
4265
|
List of structured reference dictionaries
|
|
4266
4266
|
"""
|
|
4267
4267
|
# Use the dedicated BibTeX parser
|
|
4268
|
-
from utils.bibtex_parser import parse_bibtex_references
|
|
4268
|
+
from refchecker.utils.bibtex_parser import parse_bibtex_references
|
|
4269
4269
|
|
|
4270
4270
|
# Extract references using the BibTeX parser
|
|
4271
4271
|
references = parse_bibtex_references(bibliography_text)
|
|
@@ -4284,7 +4284,7 @@ class ArxivReferenceChecker:
|
|
|
4284
4284
|
List of structured reference dictionaries
|
|
4285
4285
|
"""
|
|
4286
4286
|
# Use the dedicated biblatex parser
|
|
4287
|
-
from utils.biblatex_parser import parse_biblatex_references
|
|
4287
|
+
from refchecker.utils.biblatex_parser import parse_biblatex_references
|
|
4288
4288
|
|
|
4289
4289
|
# Extract references using the biblatex parser
|
|
4290
4290
|
references = parse_biblatex_references(bibliography_text)
|
|
@@ -4484,7 +4484,7 @@ class ArxivReferenceChecker:
|
|
|
4484
4484
|
return True
|
|
4485
4485
|
|
|
4486
4486
|
# Also check if authors have significant overlap (at least 50% of the shorter author list)
|
|
4487
|
-
from utils.text_utils import parse_authors_with_initials
|
|
4487
|
+
from refchecker.utils.text_utils import parse_authors_with_initials
|
|
4488
4488
|
|
|
4489
4489
|
if '*' in seg1['author']:
|
|
4490
4490
|
author1_parts = seg1['author'].split('*')
|
|
@@ -4553,7 +4553,7 @@ class ArxivReferenceChecker:
|
|
|
4553
4553
|
parsed_authors = []
|
|
4554
4554
|
for author in raw_authors:
|
|
4555
4555
|
# Clean up the author entry and strip LaTeX commands
|
|
4556
|
-
from utils.text_utils import strip_latex_commands
|
|
4556
|
+
from refchecker.utils.text_utils import strip_latex_commands
|
|
4557
4557
|
author_cleaned = strip_latex_commands(author.rstrip('.'))
|
|
4558
4558
|
|
|
4559
4559
|
# Skip special indicators like "others", "et al", etc.
|
|
@@ -4571,14 +4571,14 @@ class ArxivReferenceChecker:
|
|
|
4571
4571
|
return parsed_authors
|
|
4572
4572
|
else:
|
|
4573
4573
|
# Fallback to original logic for backward compatibility
|
|
4574
|
-
from utils.text_utils import parse_authors_with_initials
|
|
4574
|
+
from refchecker.utils.text_utils import parse_authors_with_initials
|
|
4575
4575
|
|
|
4576
4576
|
cleaned_text = author_text.rstrip('.')
|
|
4577
4577
|
authors = parse_authors_with_initials(cleaned_text)
|
|
4578
4578
|
authors = [a.rstrip('.').strip() for a in authors if a.strip()]
|
|
4579
4579
|
|
|
4580
4580
|
# Handle "others" and similar indicators in fallback logic too
|
|
4581
|
-
from utils.text_utils import strip_latex_commands
|
|
4581
|
+
from refchecker.utils.text_utils import strip_latex_commands
|
|
4582
4582
|
processed_authors = []
|
|
4583
4583
|
for author in authors:
|
|
4584
4584
|
# Apply LaTeX cleaning to each author
|
|
@@ -4706,7 +4706,7 @@ class ArxivReferenceChecker:
|
|
|
4706
4706
|
if '*' in doi:
|
|
4707
4707
|
doi = doi.split('*')[0]
|
|
4708
4708
|
|
|
4709
|
-
from utils.doi_utils import construct_doi_url
|
|
4709
|
+
from refchecker.utils.doi_utils import construct_doi_url
|
|
4710
4710
|
url = construct_doi_url(doi)
|
|
4711
4711
|
break
|
|
4712
4712
|
|
|
@@ -4714,7 +4714,7 @@ class ArxivReferenceChecker:
|
|
|
4714
4714
|
if not url and not arxiv_url:
|
|
4715
4715
|
url_match = re.search(r'https?://(?!arxiv\.org)[^\s,]+', ref_text)
|
|
4716
4716
|
if url_match:
|
|
4717
|
-
from utils.url_utils import clean_url_punctuation
|
|
4717
|
+
from refchecker.utils.url_utils import clean_url_punctuation
|
|
4718
4718
|
url = clean_url_punctuation(url_match.group(0))
|
|
4719
4719
|
|
|
4720
4720
|
# Extract year - will be determined from structured parts below
|
|
@@ -4808,7 +4808,7 @@ class ArxivReferenceChecker:
|
|
|
4808
4808
|
if 'arxiv' in url_part.lower():
|
|
4809
4809
|
arxiv_url = url_part
|
|
4810
4810
|
else:
|
|
4811
|
-
from utils.url_utils import clean_url_punctuation
|
|
4811
|
+
from refchecker.utils.url_utils import clean_url_punctuation
|
|
4812
4812
|
url = clean_url_punctuation(url_part)
|
|
4813
4813
|
else:
|
|
4814
4814
|
# Fallback for other formats or malformed input
|
|
@@ -4829,7 +4829,7 @@ class ArxivReferenceChecker:
|
|
|
4829
4829
|
if 'arxiv' in url_part.lower():
|
|
4830
4830
|
arxiv_url = url_part
|
|
4831
4831
|
else:
|
|
4832
|
-
from utils.url_utils import clean_url_punctuation
|
|
4832
|
+
from refchecker.utils.url_utils import clean_url_punctuation
|
|
4833
4833
|
url = clean_url_punctuation(url_part)
|
|
4834
4834
|
if len(parts) > 5:
|
|
4835
4835
|
# For cases with more than 5 parts, combine the remaining parts as additional info
|
|
@@ -4966,7 +4966,7 @@ class ArxivReferenceChecker:
|
|
|
4966
4966
|
if '*' in doi:
|
|
4967
4967
|
doi = doi.split('*')[0]
|
|
4968
4968
|
|
|
4969
|
-
from utils.doi_utils import construct_doi_url
|
|
4969
|
+
from refchecker.utils.doi_utils import construct_doi_url
|
|
4970
4970
|
url = construct_doi_url(doi)
|
|
4971
4971
|
break
|
|
4972
4972
|
|
|
@@ -4974,7 +4974,7 @@ class ArxivReferenceChecker:
|
|
|
4974
4974
|
if not url and not arxiv_url:
|
|
4975
4975
|
url_match = re.search(r'https?://(?!arxiv\.org)[^\s,\)]+', ref_text)
|
|
4976
4976
|
if url_match:
|
|
4977
|
-
from utils.url_utils import clean_url_punctuation
|
|
4977
|
+
from refchecker.utils.url_utils import clean_url_punctuation
|
|
4978
4978
|
url = clean_url_punctuation(url_match.group(0))
|
|
4979
4979
|
|
|
4980
4980
|
# Extract year
|
|
@@ -5023,7 +5023,7 @@ class ArxivReferenceChecker:
|
|
|
5023
5023
|
logger.debug(f"Extracting bibliography for paper {paper_id}: {paper.title}")
|
|
5024
5024
|
|
|
5025
5025
|
# Check if we can get BibTeX content for this paper (ArXiv or other sources)
|
|
5026
|
-
from utils.arxiv_utils import get_bibtex_content
|
|
5026
|
+
from refchecker.utils.arxiv_utils import get_bibtex_content
|
|
5027
5027
|
bibtex_content = get_bibtex_content(paper)
|
|
5028
5028
|
if bibtex_content:
|
|
5029
5029
|
logger.debug(f"Found BibTeX content for {paper_id}, using structured bibliography")
|
|
@@ -5047,7 +5047,7 @@ class ArxivReferenceChecker:
|
|
|
5047
5047
|
references = extract_latex_references(bibtex_content, None)
|
|
5048
5048
|
|
|
5049
5049
|
# Validate the parsed references and fallback to LLM if needed
|
|
5050
|
-
from utils.text_utils import validate_parsed_references
|
|
5050
|
+
from refchecker.utils.text_utils import validate_parsed_references
|
|
5051
5051
|
validation = validate_parsed_references(references)
|
|
5052
5052
|
|
|
5053
5053
|
if not validation['is_valid']:
|
|
@@ -5372,9 +5372,9 @@ class ArxivReferenceChecker:
|
|
|
5372
5372
|
# Print reference info in non-debug mode (improved formatting)
|
|
5373
5373
|
raw_title = reference.get('title', 'Untitled')
|
|
5374
5374
|
# Clean LaTeX commands from title for display
|
|
5375
|
-
from utils.text_utils import strip_latex_commands
|
|
5375
|
+
from refchecker.utils.text_utils import strip_latex_commands
|
|
5376
5376
|
title = strip_latex_commands(raw_title)
|
|
5377
|
-
from utils.text_utils import format_authors_for_display
|
|
5377
|
+
from refchecker.utils.text_utils import format_authors_for_display
|
|
5378
5378
|
authors = format_authors_for_display(reference.get('authors', []))
|
|
5379
5379
|
year = reference.get('year', '')
|
|
5380
5380
|
venue = reference.get('venue', '') or reference.get('journal', '')
|
|
@@ -5424,7 +5424,7 @@ class ArxivReferenceChecker:
|
|
|
5424
5424
|
|
|
5425
5425
|
# Show DOI URL if available and different from what's already shown
|
|
5426
5426
|
if external_ids.get('DOI'):
|
|
5427
|
-
from utils.doi_utils import construct_doi_url
|
|
5427
|
+
from refchecker.utils.doi_utils import construct_doi_url
|
|
5428
5428
|
doi_url = construct_doi_url(external_ids['DOI'])
|
|
5429
5429
|
if doi_url != verified_url_to_show and doi_url != url:
|
|
5430
5430
|
print(f" DOI URL: {doi_url}")
|
|
@@ -5523,19 +5523,19 @@ class ArxivReferenceChecker:
|
|
|
5523
5523
|
|
|
5524
5524
|
# Second priority: Semantic Scholar URL from CorpusId (if no direct URL available)
|
|
5525
5525
|
if verified_data and verified_data.get('externalIds', {}).get('CorpusId'):
|
|
5526
|
-
from utils.url_utils import construct_semantic_scholar_url
|
|
5526
|
+
from refchecker.utils.url_utils import construct_semantic_scholar_url
|
|
5527
5527
|
return construct_semantic_scholar_url(verified_data['externalIds']['CorpusId'])
|
|
5528
5528
|
|
|
5529
5529
|
# Third priority: DOI URL from verified data (more reliable than potentially wrong ArXiv URLs)
|
|
5530
5530
|
if verified_data and verified_data.get('externalIds', {}).get('DOI'):
|
|
5531
|
-
from utils.doi_utils import construct_doi_url
|
|
5531
|
+
from refchecker.utils.doi_utils import construct_doi_url
|
|
5532
5532
|
return construct_doi_url(verified_data['externalIds']['DOI'])
|
|
5533
5533
|
|
|
5534
5534
|
# Fourth priority: ArXiv URL from verified data (but only if there's no ArXiv ID error)
|
|
5535
5535
|
if verified_data and verified_data.get('externalIds', {}).get('ArXiv'):
|
|
5536
5536
|
# Only show ArXiv URL as verified URL if there's no ArXiv ID mismatch
|
|
5537
5537
|
if not self._has_arxiv_id_error(errors):
|
|
5538
|
-
from utils.url_utils import construct_arxiv_url
|
|
5538
|
+
from refchecker.utils.url_utils import construct_arxiv_url
|
|
5539
5539
|
correct_arxiv_id = verified_data['externalIds']['ArXiv']
|
|
5540
5540
|
return construct_arxiv_url(correct_arxiv_id)
|
|
5541
5541
|
|
|
@@ -5556,7 +5556,7 @@ class ArxivReferenceChecker:
|
|
|
5556
5556
|
external_ids = verified_data.get('externalIds', {})
|
|
5557
5557
|
if external_ids.get('ArXiv'):
|
|
5558
5558
|
# Extract ArXiv ID from the URL using shared utility
|
|
5559
|
-
from utils.url_utils import extract_arxiv_id_from_url
|
|
5559
|
+
from refchecker.utils.url_utils import extract_arxiv_id_from_url
|
|
5560
5560
|
url_arxiv_id = extract_arxiv_id_from_url(reference_url)
|
|
5561
5561
|
if url_arxiv_id:
|
|
5562
5562
|
correct_arxiv_id = external_ids['ArXiv']
|
|
@@ -5579,10 +5579,10 @@ class ArxivReferenceChecker:
|
|
|
5579
5579
|
def _get_fallback_url(self, external_ids):
|
|
5580
5580
|
"""Get fallback URL from external IDs (Semantic Scholar or DOI)"""
|
|
5581
5581
|
if external_ids.get('CorpusId'):
|
|
5582
|
-
from utils.url_utils import construct_semantic_scholar_url
|
|
5582
|
+
from refchecker.utils.url_utils import construct_semantic_scholar_url
|
|
5583
5583
|
return construct_semantic_scholar_url(external_ids['CorpusId'])
|
|
5584
5584
|
elif external_ids.get('DOI'):
|
|
5585
|
-
from utils.doi_utils import construct_doi_url
|
|
5585
|
+
from refchecker.utils.doi_utils import construct_doi_url
|
|
5586
5586
|
return construct_doi_url(external_ids['DOI'])
|
|
5587
5587
|
return None
|
|
5588
5588
|
|
|
@@ -5660,7 +5660,7 @@ class ArxivReferenceChecker:
|
|
|
5660
5660
|
error_type = error.get('error_type') or error.get('warning_type') or error.get('info_type')
|
|
5661
5661
|
error_details = error.get('error_details') or error.get('warning_details') or error.get('info_details', 'Unknown error')
|
|
5662
5662
|
|
|
5663
|
-
from utils.error_utils import print_labeled_multiline
|
|
5663
|
+
from refchecker.utils.error_utils import print_labeled_multiline
|
|
5664
5664
|
|
|
5665
5665
|
if error_type == 'arxiv_id':
|
|
5666
5666
|
print(f" ❌ {error_details}")
|
|
@@ -32,7 +32,7 @@ def extract_arxiv_id_from_paper(paper):
|
|
|
32
32
|
|
|
33
33
|
if hasattr(paper, 'pdf_url') and paper.pdf_url:
|
|
34
34
|
# Try to extract ArXiv ID from the PDF URL
|
|
35
|
-
from utils.url_utils import extract_arxiv_id_from_url
|
|
35
|
+
from refchecker.utils.url_utils import extract_arxiv_id_from_url
|
|
36
36
|
arxiv_id = extract_arxiv_id_from_url(paper.pdf_url)
|
|
37
37
|
elif hasattr(paper, 'get_short_id'):
|
|
38
38
|
# Check if the paper ID itself is an ArXiv ID
|
|
@@ -316,7 +316,7 @@ def filter_bibtex_by_citations(bib_content, tex_files, main_tex_content):
|
|
|
316
316
|
return bib_content
|
|
317
317
|
|
|
318
318
|
# Parse BibTeX entries and filter
|
|
319
|
-
from utils.bibtex_parser import parse_bibtex_entries
|
|
319
|
+
from refchecker.utils.bibtex_parser import parse_bibtex_entries
|
|
320
320
|
entries = parse_bibtex_entries(bib_content)
|
|
321
321
|
|
|
322
322
|
# Filter entries to only cited ones and remove duplicates
|
|
@@ -481,7 +481,7 @@ def get_bibtex_content(paper):
|
|
|
481
481
|
|
|
482
482
|
elif tex_content:
|
|
483
483
|
# Check for embedded bibliography in LaTeX
|
|
484
|
-
from utils.text_utils import detect_latex_bibliography_format
|
|
484
|
+
from refchecker.utils.text_utils import detect_latex_bibliography_format
|
|
485
485
|
latex_format = detect_latex_bibliography_format(tex_content)
|
|
486
486
|
if latex_format['is_latex'] and ('\\bibitem' in tex_content or '@' in tex_content):
|
|
487
487
|
logger.info(f"Found embedded bibliography in ArXiv LaTeX source, but skipping due to formatting incompatibility")
|
|
@@ -200,8 +200,8 @@ def parse_biblatex_references(text: str) -> List[Dict[str, Any]]:
|
|
|
200
200
|
List of structured reference dictionaries, or empty list if
|
|
201
201
|
parsing quality is poor (to trigger LLM fallback)
|
|
202
202
|
"""
|
|
203
|
-
from utils.text_utils import parse_authors_with_initials, clean_title
|
|
204
|
-
from utils.doi_utils import construct_doi_url, is_valid_doi_format
|
|
203
|
+
from refchecker.utils.text_utils import parse_authors_with_initials, clean_title
|
|
204
|
+
from refchecker.utils.doi_utils import construct_doi_url, is_valid_doi_format
|
|
205
205
|
|
|
206
206
|
if not text or not detect_biblatex_format(text):
|
|
207
207
|
return []
|
|
@@ -300,8 +300,8 @@ def parse_biblatex_entry_content(entry_num: str, content: str) -> Dict[str, Any]
|
|
|
300
300
|
Returns:
|
|
301
301
|
Dictionary with parsed entry data
|
|
302
302
|
"""
|
|
303
|
-
from utils.text_utils import parse_authors_with_initials, clean_title
|
|
304
|
-
from utils.doi_utils import construct_doi_url, is_valid_doi_format
|
|
303
|
+
from refchecker.utils.text_utils import parse_authors_with_initials, clean_title
|
|
304
|
+
from refchecker.utils.doi_utils import construct_doi_url, is_valid_doi_format
|
|
305
305
|
|
|
306
306
|
# Initialize default values
|
|
307
307
|
title = ""
|
|
@@ -164,7 +164,7 @@ def _parse_bibtex_references(bibliography_text):
|
|
|
164
164
|
Returns:
|
|
165
165
|
List of reference dictionaries
|
|
166
166
|
"""
|
|
167
|
-
from utils.bibtex_parser import parse_bibtex_entries
|
|
167
|
+
from refchecker.utils.bibtex_parser import parse_bibtex_entries
|
|
168
168
|
return parse_bibtex_entries(bibliography_text)
|
|
169
169
|
|
|
170
170
|
|
|
@@ -178,7 +178,7 @@ def _parse_biblatex_references(bibliography_text):
|
|
|
178
178
|
Returns:
|
|
179
179
|
List of reference dictionaries
|
|
180
180
|
"""
|
|
181
|
-
from utils.text_utils import extract_latex_references
|
|
181
|
+
from refchecker.utils.text_utils import extract_latex_references
|
|
182
182
|
return extract_latex_references(bibliography_text)
|
|
183
183
|
|
|
184
184
|
|
|
@@ -186,7 +186,7 @@ def _parse_standard_acm_natbib_references(bibliography_text):
|
|
|
186
186
|
"""
|
|
187
187
|
Parse references using regex for standard ACM/natbib format (both ACM Reference Format and simple natbib)
|
|
188
188
|
"""
|
|
189
|
-
from utils.text_utils import detect_standard_acm_natbib_format
|
|
189
|
+
from refchecker.utils.text_utils import detect_standard_acm_natbib_format
|
|
190
190
|
|
|
191
191
|
references = []
|
|
192
192
|
|
|
@@ -230,7 +230,7 @@ def _parse_simple_natbib_format(ref_num, content, label):
|
|
|
230
230
|
Returns:
|
|
231
231
|
Dictionary containing parsed reference information
|
|
232
232
|
"""
|
|
233
|
-
from utils.text_utils import extract_url_from_reference, extract_year_from_reference
|
|
233
|
+
from refchecker.utils.text_utils import extract_url_from_reference, extract_year_from_reference
|
|
234
234
|
|
|
235
235
|
# Basic parsing - this could be enhanced with more sophisticated NLP
|
|
236
236
|
reference = {
|
|
@@ -288,7 +288,7 @@ def _parse_references_regex(bibliography_text):
|
|
|
288
288
|
}
|
|
289
289
|
|
|
290
290
|
# Basic information extraction
|
|
291
|
-
from utils.text_utils import extract_url_from_reference, extract_year_from_reference
|
|
291
|
+
from refchecker.utils.text_utils import extract_url_from_reference, extract_year_from_reference
|
|
292
292
|
|
|
293
293
|
url = extract_url_from_reference(ref_content)
|
|
294
294
|
if url:
|
|
@@ -214,8 +214,8 @@ def parse_bibtex_references(bibliography_text: str) -> List[Dict[str, Any]]:
|
|
|
214
214
|
Returns:
|
|
215
215
|
List of structured reference dictionaries
|
|
216
216
|
"""
|
|
217
|
-
from utils.text_utils import parse_authors_with_initials, clean_title
|
|
218
|
-
from utils.doi_utils import construct_doi_url, is_valid_doi_format
|
|
217
|
+
from refchecker.utils.text_utils import parse_authors_with_initials, clean_title
|
|
218
|
+
from refchecker.utils.doi_utils import construct_doi_url, is_valid_doi_format
|
|
219
219
|
|
|
220
220
|
entries = parse_bibtex_entries(bibliography_text)
|
|
221
221
|
references = []
|
|
@@ -291,7 +291,7 @@ def parse_bibtex_references(bibliography_text: str) -> List[Dict[str, Any]]:
|
|
|
291
291
|
# Extract other URLs
|
|
292
292
|
url = fields.get('url', '')
|
|
293
293
|
if url:
|
|
294
|
-
from utils.url_utils import clean_url
|
|
294
|
+
from refchecker.utils.url_utils import clean_url
|
|
295
295
|
url = clean_url(url)
|
|
296
296
|
|
|
297
297
|
# Handle special @misc entries with only howpublished field
|
|
@@ -318,7 +318,7 @@ def parse_bibtex_references(bibliography_text: str) -> List[Dict[str, Any]]:
|
|
|
318
318
|
url = howpublished
|
|
319
319
|
|
|
320
320
|
# Clean the reconstructed URL
|
|
321
|
-
from utils.url_utils import clean_url
|
|
321
|
+
from refchecker.utils.url_utils import clean_url
|
|
322
322
|
url = clean_url(url)
|
|
323
323
|
|
|
324
324
|
# Generate title from domain/path
|
|
@@ -350,7 +350,7 @@ def parse_bibtex_references(bibliography_text: str) -> List[Dict[str, Any]]:
|
|
|
350
350
|
|
|
351
351
|
# Clean any URL we extracted
|
|
352
352
|
if url:
|
|
353
|
-
from utils.url_utils import clean_url
|
|
353
|
+
from refchecker.utils.url_utils import clean_url
|
|
354
354
|
url = clean_url(url)
|
|
355
355
|
|
|
356
356
|
# Construct ArXiv URL from eprint field if no URL present
|
|
@@ -179,7 +179,7 @@ def clean_venue_for_comparison(venue: str) -> str:
|
|
|
179
179
|
Returns:
|
|
180
180
|
Cleaned venue name suitable for display
|
|
181
181
|
"""
|
|
182
|
-
from utils.text_utils import normalize_venue_for_display
|
|
182
|
+
from refchecker.utils.text_utils import normalize_venue_for_display
|
|
183
183
|
return normalize_venue_for_display(venue)
|
|
184
184
|
|
|
185
185
|
|