academic-refchecker 1.2.40__tar.gz → 1.2.41__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {academic_refchecker-1.2.40/src/academic_refchecker.egg-info → academic_refchecker-1.2.41}/PKG-INFO +1 -1
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/__version__.py +1 -1
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41/src/academic_refchecker.egg-info}/PKG-INFO +1 -1
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/core/parallel_processor.py +2 -1
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/core/refchecker.py +4 -3
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/utils/text_utils.py +3 -1
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/LICENSE +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/MANIFEST.in +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/README.md +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/pyproject.toml +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/requirements.txt +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/scripts/download_db.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/scripts/run_tests.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/scripts/start_vllm_server.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/setup.cfg +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/__init__.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/academic_refchecker.egg-info/SOURCES.txt +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/academic_refchecker.egg-info/dependency_links.txt +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/academic_refchecker.egg-info/entry_points.txt +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/academic_refchecker.egg-info/requires.txt +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/academic_refchecker.egg-info/top_level.txt +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/checkers/__init__.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/checkers/crossref.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/checkers/enhanced_hybrid_checker.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/checkers/github_checker.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/checkers/local_semantic_scholar.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/checkers/openalex.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/checkers/openreview_checker.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/checkers/semantic_scholar.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/checkers/webpage_checker.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/config/__init__.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/config/logging.conf +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/config/settings.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/core/__init__.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/core/db_connection_pool.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/database/__init__.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/database/download_semantic_scholar_db.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/llm/__init__.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/llm/base.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/llm/providers.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/scripts/__init__.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/scripts/start_vllm_server.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/services/__init__.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/services/pdf_processor.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/utils/__init__.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/utils/arxiv_utils.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/utils/author_utils.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/utils/biblatex_parser.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/utils/bibliography_utils.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/utils/bibtex_parser.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/utils/config_validator.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/utils/db_utils.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/utils/doi_utils.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/utils/error_utils.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/utils/mock_objects.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/utils/unicode_utils.py +0 -0
- {academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/utils/url_utils.py +0 -0
|
@@ -279,7 +279,8 @@ class ParallelReferenceProcessor:
|
|
|
279
279
|
from utils.text_utils import format_authors_for_display
|
|
280
280
|
authors = format_authors_for_display(reference.get('authors', []))
|
|
281
281
|
year = reference.get('year', '')
|
|
282
|
-
venue
|
|
282
|
+
# Get venue from either 'venue' or 'journal' field
|
|
283
|
+
venue = reference.get('venue', '') or reference.get('journal', '')
|
|
283
284
|
url = reference.get('url', '')
|
|
284
285
|
doi = reference.get('doi', '')
|
|
285
286
|
|
|
@@ -3383,7 +3383,7 @@ class ArxivReferenceChecker:
|
|
|
3383
3383
|
# Check if this is biblatex format
|
|
3384
3384
|
from utils.biblatex_parser import detect_biblatex_format
|
|
3385
3385
|
if detect_biblatex_format(bibliography_text):
|
|
3386
|
-
logger.
|
|
3386
|
+
logger.debug("Detected biblatex format")
|
|
3387
3387
|
self.used_regex_extraction = True
|
|
3388
3388
|
# Note: biblatex parsing is also robust, so we don't set used_unreliable_extraction
|
|
3389
3389
|
biblatex_refs = self._parse_biblatex_references(bibliography_text)
|
|
@@ -3391,7 +3391,7 @@ class ArxivReferenceChecker:
|
|
|
3391
3391
|
# If biblatex parsing returned empty results (due to quality validation),
|
|
3392
3392
|
# fallback to LLM if available
|
|
3393
3393
|
if not biblatex_refs and self.llm_extractor:
|
|
3394
|
-
logger.debug("Biblatex
|
|
3394
|
+
logger.debug("Biblatex is incompatible with parser")
|
|
3395
3395
|
try:
|
|
3396
3396
|
references = self.llm_extractor.extract_references(bibliography_text)
|
|
3397
3397
|
if references:
|
|
@@ -3403,7 +3403,7 @@ class ArxivReferenceChecker:
|
|
|
3403
3403
|
except Exception as e:
|
|
3404
3404
|
logger.error(f"LLM fallback failed: {e}")
|
|
3405
3405
|
return []
|
|
3406
|
-
|
|
3406
|
+
logger.debug("Using biblatex file")
|
|
3407
3407
|
return biblatex_refs
|
|
3408
3408
|
|
|
3409
3409
|
# For non-standard formats, try LLM-based extraction if available
|
|
@@ -3634,6 +3634,7 @@ class ArxivReferenceChecker:
|
|
|
3634
3634
|
# we'll continue with the unreliable fallback regex parsing
|
|
3635
3635
|
if not biblatex_refs:
|
|
3636
3636
|
logger.debug("Biblatex parser returned no results due to quality validation, falling back to regex parsing")
|
|
3637
|
+
print(f"⚠️ Biblatex parser found no valid references (failed quality validation) - falling back to regex parsing")
|
|
3637
3638
|
else:
|
|
3638
3639
|
return biblatex_refs
|
|
3639
3640
|
|
|
@@ -3006,7 +3006,9 @@ def extract_latex_references(text, file_path=None): # pylint: disable=unused-ar
|
|
|
3006
3006
|
if ref['year']:
|
|
3007
3007
|
venue_clean = re.sub(rf'\b{ref["year"]}\b.*', '', venue_clean)
|
|
3008
3008
|
venue_clean = venue_clean.rstrip(',. ')
|
|
3009
|
-
|
|
3009
|
+
# Filter out common non-venue patterns that shouldn't be treated as venues
|
|
3010
|
+
non_venue_patterns = ['URL', 'url', 'http:', 'https:', 'DOI', 'doi:', 'ArXiv', 'arxiv:']
|
|
3011
|
+
if venue_clean and not any(pattern in venue_clean for pattern in non_venue_patterns):
|
|
3010
3012
|
ref['journal'] = venue_clean
|
|
3011
3013
|
|
|
3012
3014
|
# Extract URL if present
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/checkers/enhanced_hybrid_checker.py
RENAMED
|
File without changes
|
|
File without changes
|
{academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/checkers/local_semantic_scholar.py
RENAMED
|
File without changes
|
|
File without changes
|
{academic_refchecker-1.2.40 → academic_refchecker-1.2.41}/src/checkers/openreview_checker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|