PyPI - academic-refchecker - Versions diffs - 2.0.12__tar.gz → 2.0.14__tar.gz - Mend

academic-refchecker 2.0.12tar.gz → 2.0.14tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

{academic_refchecker-2.0.12/academic_refchecker.egg-info → academic_refchecker-2.0.14}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: academic-refchecker
-Version: 2.0.12
+Version: 2.0.14
 Summary: A comprehensive tool for validating reference accuracy in academic papers
 Author-email: Mark Russinovich <markrussinovich@hotmail.com>
 License-Expression: MIT

{academic_refchecker-2.0.12 → academic_refchecker-2.0.14/academic_refchecker.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: academic-refchecker
-Version: 2.0.12
+Version: 2.0.14
 Summary: A comprehensive tool for validating reference accuracy in academic papers
 Author-email: Mark Russinovich <markrussinovich@hotmail.com>
 License-Expression: MIT

{academic_refchecker-2.0.12 → academic_refchecker-2.0.14}/backend/main.py RENAMED Viewed

@@ -27,6 +27,7 @@ from .thumbnail import (
     generate_pdf_thumbnail_async,
     generate_pdf_preview_async,
     get_text_thumbnail_async,
+    get_text_preview_async,
     get_thumbnail_cache_path,
     get_preview_cache_path
 )
@@ -220,12 +221,15 @@ async def start_check(
         elif source_type == "text":
             if not source_text:
                 raise HTTPException(status_code=400, detail="No text provided")
+            # Normalize line endings - remove all \r to prevent double carriage returns
+            # Browser may send \r\n, and Windows file writing can add extra \r
+            normalized_text = source_text.replace('\r\n', '\n').replace('\r', '\n')
             # Save pasted text to a file for later retrieval and thumbnail generation
             text_dir = Path(tempfile.gettempdir()) / "refchecker_texts"
             text_dir.mkdir(parents=True, exist_ok=True)
             text_file_path = text_dir / f"pasted_{session_id}.txt"
-            with open(text_file_path, "w", encoding="utf-8") as f:
-                f.write(source_text)
+            with open(text_file_path, "w", encoding="utf-8", newline='\n') as f:
+                f.write(normalized_text)
             paper_source = str(text_file_path)
             paper_title = "Pasted Text"
         elif source_type == "url":
@@ -646,9 +650,33 @@ async def get_preview(check_id: int):
                 media_type="image/png",
                 headers={"Cache-Control": "public, max-age=86400"}  # Cache for 1 day
             )
-        else:
-            # Fall back to thumbnail if preview can't be generated
-            raise HTTPException(status_code=404, detail="Could not generate preview")
+        # For text sources, generate a high-resolution text preview for overlay display
+        if source_type == 'text':
+            logger.info(f"Generating text preview for check {check_id}")
+            preview_path = await get_text_preview_async(check_id, "", paper_source)
+            if preview_path and os.path.exists(preview_path):
+                return FileResponse(
+                    preview_path,
+                    media_type="image/png",
+                    headers={"Cache-Control": "public, max-age=86400"}
+                )
+        # For non-PDF file uploads, also generate a text preview
+        if source_type == 'file' and not paper_source.lower().endswith('.pdf'):
+            logger.info(f"Generating text preview for uploaded file check {check_id}")
+            if os.path.exists(paper_source):
+                preview_path = await get_text_preview_async(check_id, "", paper_source)
+            else:
+                preview_path = await get_text_preview_async(check_id, "Uploaded file")
+            if preview_path and os.path.exists(preview_path):
+                return FileResponse(
+                    preview_path,
+                    media_type="image/png",
+                    headers={"Cache-Control": "public, max-age=86400"}
+                )
+        raise HTTPException(status_code=404, detail="Could not generate preview")
     except HTTPException:
         raise

{academic_refchecker-2.0.12 → academic_refchecker-2.0.14}/backend/refchecker_wrapper.py RENAMED Viewed

@@ -3,6 +3,7 @@ Wrapper around refchecker library with progress callbacks for real-time updates
 """
 import sys
 import os
+import re
 import asyncio
 import logging
 import tempfile
@@ -238,6 +239,18 @@ class ProgressRefChecker:
                 if not any(u.get('url') == doi_url for u in authoritative_urls):
                     authoritative_urls.append({"type": "doi", "url": doi_url})
+            # Add Semantic Scholar URL if available
+            s2_paper_id = external_ids.get('S2PaperId')
+            if s2_paper_id:
+                s2_url = f"https://www.semanticscholar.org/paper/{s2_paper_id}"
+                if not any(u.get('url') == s2_url for u in authoritative_urls):
+                    authoritative_urls.append({"type": "semantic_scholar", "url": s2_url})
+            # Also check for inline S2 URL (from merged data)
+            s2_inline_url = verified_data.get('_semantic_scholar_url')
+            if s2_inline_url and not any(u.get('url') == s2_inline_url for u in authoritative_urls):
+                authoritative_urls.append({"type": "semantic_scholar", "url": s2_inline_url})
         # Format errors, warnings, and suggestions
         formatted_errors = []
         formatted_warnings = []
@@ -462,11 +475,20 @@ class ProgressRefChecker:
                         raise ValueError("PDF extraction requires an LLM to be configured. Please configure an LLM provider in settings.")
                     pdf_processor = PDFProcessor()
                     paper_text = await asyncio.to_thread(pdf_processor.extract_text_from_pdf, paper_source)
-                elif paper_source.lower().endswith(('.tex', '.txt')):
+                elif paper_source.lower().endswith(('.tex', '.txt', '.bib')):
                     def read_file():
                         with open(paper_source, 'r', encoding='utf-8') as f:
                             return f.read()
                     paper_text = await asyncio.to_thread(read_file)
+                    # For .bib files, extract references directly using BibTeX parser
+                    if paper_source.lower().endswith('.bib'):
+                        logger.info("Processing uploaded .bib file as BibTeX")
+                        refs_result = await self._extract_references_from_bibtex(paper_text)
+                        if refs_result and refs_result[0]:
+                            arxiv_source_references = refs_result[0]
+                            extraction_method = 'bib'
+                            logger.info(f"Extracted {len(arxiv_source_references)} references from .bib file")
                 else:
                     raise ValueError(f"Unsupported file type: {paper_source}")
             elif source_type == "text":
@@ -494,6 +516,25 @@ class ProgressRefChecker:
                         arxiv_source_references = refs_result[0]
                         extraction_method = 'bbl'  # Mark as bbl extraction
                         logger.info(f"Extracted {len(arxiv_source_references)} references from pasted .bbl content")
+                # Check if the pasted text is BibTeX format (@article, @misc, @inproceedings, etc.)
+                elif re.search(r'@\s*(article|book|inproceedings|incollection|misc|techreport|phdthesis|mastersthesis|conference|inbook|proceedings)\s*\{', paper_text, re.IGNORECASE):
+                    logger.info("Detected BibTeX format in pasted text")
+                    refs_result = await self._extract_references_from_bibtex(paper_text)
+                    if refs_result and refs_result[0]:
+                        arxiv_source_references = refs_result[0]
+                        extraction_method = 'bib'  # Mark as bib extraction
+                        logger.info(f"Extracted {len(arxiv_source_references)} references from pasted BibTeX content")
+                # Fallback: Try BibTeX parsing anyway for partial/malformed content
+                # This handles cases like incomplete paste, or BibTeX-like content without standard entry types
+                elif any(marker in paper_text for marker in ['title={', 'author={', 'year={', 'eprint={', '@']):
+                    logger.info("Detected possible BibTeX-like content, attempting parse")
+                    refs_result = await self._extract_references_from_bibtex(paper_text)
+                    if refs_result and refs_result[0]:
+                        arxiv_source_references = refs_result[0]
+                        extraction_method = 'bib'
+                        logger.info(f"Extracted {len(arxiv_source_references)} references from partial BibTeX content")
+                    else:
+                        logger.warning("BibTeX-like content detected but parsing failed, will try LLM extraction")
                 # Don't update title for pasted text - keep the placeholder
             else:
                 raise ValueError(f"Unsupported source type: {source_type}")

{academic_refchecker-2.0.12 → academic_refchecker-2.0.14}/backend/thumbnail.py RENAMED Viewed

@@ -416,6 +416,13 @@ def get_text_thumbnail(check_id: int, text_preview: str = "", text_file_path: st
             except Exception as e:
                 logger.warning(f"Could not read text file: {e}")
+        # Clean up text content - remove excessive blank lines that cause rendering issues
+        if text_content:
+            # Normalize line endings and remove consecutive blank lines
+            lines = text_content.replace('\r\n', '\n').replace('\r', '\n').split('\n')
+            # Keep only non-empty lines
+            text_content = '\n'.join(line for line in lines if line.strip())
         # Create a document-like image with actual text content
         doc = fitz.open()
         page = doc.new_page(width=THUMBNAIL_WIDTH, height=int(THUMBNAIL_WIDTH * 1.4))
@@ -483,6 +490,116 @@ def get_text_thumbnail(check_id: int, text_preview: str = "", text_file_path: st
         return None
+def get_text_preview(check_id: int, text_preview: str = "", text_file_path: str = "") -> Optional[str]:
+    """
+    Generate a high-resolution preview for pasted text showing actual content.
+    Creates a larger image (similar to PDF previews) with the text content.
+    Args:
+        check_id: Check ID for naming
+        text_preview: Optional first few lines of text to display
+        text_file_path: Optional path to the text file to read content from
+    Returns:
+        Path to the generated preview, or None if generation failed
+    """
+    try:
+        import fitz
+        output_path = get_preview_cache_path(f"text_{check_id}", check_id)
+        if output_path.exists():
+            return str(output_path)
+        # Try to read text content from file
+        text_content = text_preview
+        if text_file_path and os.path.exists(text_file_path):
+            try:
+                with open(text_file_path, 'r', encoding='utf-8') as f:
+                    text_content = f.read()
+            except Exception as e:
+                logger.warning(f"Could not read text file: {e}")
+        # Clean up text content - remove excessive blank lines that cause rendering issues
+        if text_content:
+            # Normalize line endings and remove consecutive blank lines
+            lines = text_content.replace('\r\n', '\n').replace('\r', '\n').split('\n')
+            # Keep only non-empty lines
+            text_content = '\n'.join(line for line in lines if line.strip())
+        # Create a document-like image with actual text content at high resolution
+        doc = fitz.open()
+        page = doc.new_page(width=PREVIEW_WIDTH, height=int(PREVIEW_WIDTH * 1.4))
+        # Fill with white/off-white background
+        page.draw_rect(page.rect, color=(0.9, 0.9, 0.9), fill=(0.98, 0.98, 0.98))
+        # Draw border
+        page.draw_rect(page.rect, color=(0.7, 0.7, 0.7), width=2)
+        # Draw actual text content if available
+        margin = 40
+        if text_content:
+            # Create a text box for the content
+            text_rect = fitz.Rect(margin, margin, PREVIEW_WIDTH - margin, int(PREVIEW_WIDTH * 1.4) - margin)
+            # Truncate to first ~4000 chars for preview
+            display_text = text_content[:4000]
+            if len(text_content) > 4000:
+                display_text += "\n\n..."
+            # Insert text with readable font size
+            page.insert_textbox(
+                text_rect,
+                display_text,
+                fontsize=14,
+                color=(0.15, 0.15, 0.15),
+                fontname="helv"
+            )
+        else:
+            # Fallback: Draw placeholder
+            header_rect = fitz.Rect(margin, margin, PREVIEW_WIDTH - margin, margin + 60)
+            page.insert_textbox(header_rect, "Pasted Text", fontsize=36, color=(0.3, 0.3, 0.5))
+            # Draw placeholder lines
+            line_height = 24
+            y = margin + 100
+            for i in range(20):
+                line_width = PREVIEW_WIDTH - 2 * margin
+                if i % 3 == 2:
+                    line_width = line_width * 0.7
+                page.draw_line(
+                    fitz.Point(margin, y),
+                    fitz.Point(margin + line_width, y),
+                    color=(0.7, 0.7, 0.7),
+                    width=3
+                )
+                y += line_height
+        # Render to pixmap and save
+        pix = page.get_pixmap(alpha=False)
+        pix.save(str(output_path))
+        doc.close()
+        logger.info(f"Generated text preview: {output_path}")
+        return str(output_path)
+    except ImportError:
+        logger.error("PyMuPDF (fitz) is not installed")
+        return None
+    except Exception as e:
+        logger.error(f"Error generating text preview: {e}")
+        return None
+async def get_text_preview_async(check_id: int, text_preview: str = "", text_file_path: str = "") -> Optional[str]:
+    """Async wrapper for text preview generation."""
+    return await asyncio.to_thread(get_text_preview, check_id, text_preview, text_file_path)
 async def get_text_thumbnail_async(check_id: int, text_preview: str = "", text_file_path: str = "") -> Optional[str]:
     """Async wrapper for text thumbnail generation."""
     return await asyncio.to_thread(get_text_thumbnail, check_id, text_preview, text_file_path)

{academic_refchecker-2.0.12 → academic_refchecker-2.0.14}/src/refchecker/__version__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 """Version information for RefChecker."""
-__version__ = "2.0.12"
+__version__ = "2.0.14"

{academic_refchecker-2.0.12 → academic_refchecker-2.0.14}/src/refchecker/checkers/arxiv_citation.py RENAMED Viewed

@@ -8,8 +8,8 @@ for papers found on ArXiv, as it reflects the author-submitted metadata.
 Key features:
 - Fetches official BibTeX from https://arxiv.org/bibtex/{arxiv_id}
-- Always uses the latest version metadata (strips version suffixes)
-- Logs warnings when cited version differs from latest version
+- Checks reference against all historical versions when latest doesn't match
+- Annotates errors with version info when reference matches an older version
 - Parses BibTeX to extract normalized metadata matching refchecker schema
 Usage:
@@ -30,6 +30,7 @@ Usage:
 import re
 import logging
 import requests
+import html
 from typing import Dict, List, Tuple, Optional, Any
 import bibtexparser
@@ -88,6 +89,8 @@ class ArXivCitationChecker:
             # export.arxiv.org URLs
             r'export\.arxiv\.org/abs/([0-9]{4}\.[0-9]{4,5})(v\d+)?',
             r'export\.arxiv\.org/pdf/([0-9]{4}\.[0-9]{4,5})(v\d+)?',
+            # DOI format
+            r"(?:arxiv[:./])(\d{4}\.\d{4,5})(v\d+)?"
         ]
     def extract_arxiv_id(self, reference: Dict[str, Any]) -> Tuple[Optional[str], Optional[str]]:
@@ -107,6 +110,8 @@ class ArXivCitationChecker:
             reference.get('cited_url', ''),
             reference.get('raw_text', ''),
             reference.get('eprint', ''),  # BibTeX field
+            reference.get('journal', ''),
+            reference.get('doi', ''),  # DOI field (may contain arXiv ID)
         ]
         for source in sources:
@@ -324,35 +329,133 @@ class ArXivCitationChecker:
         return None
-    def get_latest_version_info(self, arxiv_id: str) -> Optional[str]:
+    def is_arxiv_reference(self, reference: Dict[str, Any]) -> bool:
         """
-        Get the latest version number for an ArXiv paper.
+        Check if a reference is an ArXiv paper.
-        Note: This requires fetching the abstract page, so it's optional.
-        For now, we rely on the BibTeX always returning latest version metadata.
+        Args:
+            reference: Reference dictionary
+        Returns:
+            True if reference appears to be an ArXiv paper
+        """
+        arxiv_id, _ = self.extract_arxiv_id(reference)
+        return arxiv_id is not None
+    def _fetch_version_metadata_from_html(self, arxiv_id: str, version_num: int) -> Optional[Dict[str, Any]]:
+        """
+        Fetch and parse metadata for a specific version using HTML scraping.
         Args:
             arxiv_id: ArXiv ID without version
+            version_num: Version number to fetch (1, 2, 3, etc.)
         Returns:
-            Latest version string (e.g., "v3") or None if couldn't determine
+            Dictionary with version metadata or None if version doesn't exist
         """
-        # The BibTeX endpoint always returns the latest version's metadata,
-        # so we don't need to explicitly fetch version info
-        return None
-    def is_arxiv_reference(self, reference: Dict[str, Any]) -> bool:
+        version_str = f"v{version_num}"
+        url = f"{self.abs_url}/{arxiv_id}{version_str}"
+        self.rate_limiter.wait()
+        try:
+            logger.debug(f"Checking historical version: {url}")
+            response = requests.get(url, timeout=self.timeout)
+            if response.status_code == 404:
+                return None  # Version does not exist
+            response.raise_for_status()
+            html_content = response.text
+            # Parse meta tags for metadata
+            # Title
+            title_match = re.search(r'<meta name="citation_title" content="(.*?)"', html_content)
+            title = html.unescape(title_match.group(1)).strip() if title_match else ""
+            # Authors
+            authors = []
+            for auth in re.findall(r'<meta name="citation_author" content="(.*?)"', html_content):
+                authors.append(html.unescape(auth).strip())
+            # Date/Year
+            date_match = re.search(r'<meta name="citation_date" content="(.*?)"', html_content)
+            year = None
+            if date_match:
+                ym = re.search(r'^(\d{4})', date_match.group(1))
+                if ym:
+                    year = int(ym.group(1))
+            return {
+                'version': version_str,
+                'version_num': version_num,
+                'title': title,
+                'authors': [{'name': a} for a in authors],
+                'year': year,
+                'url': url,
+            }
+        except Exception as e:
+            logger.warning(f"Failed to fetch history {version_str}: {e}")
+            return None
+    def _get_latest_version_number(self, arxiv_id: str) -> Optional[int]:
         """
-        Check if a reference is an ArXiv paper.
+        Get the latest version number by fetching the abstract page.
         Args:
-            reference: Reference dictionary
+            arxiv_id: ArXiv ID without version
         Returns:
-            True if reference appears to be an ArXiv paper
+            Latest version number as integer, or None if couldn't determine
         """
-        arxiv_id, _ = self.extract_arxiv_id(reference)
-        return arxiv_id is not None
+        url = f"{self.abs_url}/{arxiv_id}"
+        self.rate_limiter.wait()
+        try:
+            response = requests.get(url, timeout=self.timeout)
+            response.raise_for_status()
+            # Look for version links like "[v1]", "[v2]", etc.
+            versions = re.findall(r'\[v(\d+)\]', response.text)
+            if versions:
+                return max(int(v) for v in versions)
+            return None
+        except Exception as e:
+            logger.warning(f"Failed to get latest version for {arxiv_id}: {e}")
+            return None
+    def _compare_info_match(
+            self, cited_title: str, cited_authors: List[str], cited_year: Optional[int],
+            authoritative_title: str, authoritative_authors: List[str], authoritative_year: Optional[int]) -> bool:
+        """
+        Compare the information of a cited paper with the authoritative information.
+        Args:
+            cited_title: Title from the reference
+            cited_authors: Authors from the reference
+            cited_year: Year from the reference
+            authoritative_title: Title from ArXiv version
+            authoritative_authors: Authors from ArXiv version
+            authoritative_year: Year from ArXiv version
+        Returns:
+            True if the information matches, False otherwise.
+        """
+        # Compare title
+        if cited_title and authoritative_title:
+            title_similarity = compare_titles_with_latex_cleaning(cited_title, authoritative_title)
+            if title_similarity < SIMILARITY_THRESHOLD:
+                return False
+        # Compare authors
+        if cited_authors and authoritative_authors:
+            authors_match, _ = compare_authors(cited_authors, authoritative_authors)
+            if not authors_match:
+                return False
+        # Compare year
+        if cited_year and authoritative_year:
+            if cited_year != authoritative_year:
+                return False
+        return True
     def verify_reference(self, reference: Dict[str, Any]) -> Tuple[Optional[Dict[str, Any]], List[Dict[str, Any]], Optional[str]]:
         """
@@ -360,10 +463,10 @@ class ArXivCitationChecker:
         This method:
         1. Extracts the ArXiv ID from the reference
-        2. Fetches the official BibTeX from ArXiv (always latest version)
-        3. Parses the BibTeX to get authoritative metadata
-        4. Compares cited metadata against authoritative source
-        5. Logs warnings for version mismatches
+        2. Fetches the official BibTeX from ArXiv (latest version)
+        3. Compares cited metadata against latest version
+        4. If errors found, checks historical versions to find a match
+        5. Annotates errors with version info if reference matches an older version
         Args:
             reference: Reference dictionary with title, authors, year, url, etc.
@@ -385,34 +488,26 @@ class ArXivCitationChecker:
         logger.debug(f"ArXivCitationChecker: Verifying ArXiv paper {arxiv_id}")
-        # Fetch authoritative BibTeX
+        # Extract information from reference for comparison
+        cited_title = reference.get('title', '').strip()
+        cited_authors = reference.get('authors', [])
+        cited_year = reference.get('year')
+        # Fetch authoritative BibTeX (latest version)
         bibtex_content = self.fetch_bibtex(arxiv_id)
         if not bibtex_content:
             logger.debug(f"ArXivCitationChecker: Could not fetch BibTeX for {arxiv_id}")
             return None, [{"error_type": "api_failure", "error_details": f"Could not fetch ArXiv BibTeX for {arxiv_id}"}], None
-        # Parse BibTeX
-        verified_data = self.parse_bibtex(bibtex_content)
+        latest_data = self.parse_bibtex(bibtex_content)
-        if not verified_data:
+        if not latest_data:
             logger.debug(f"ArXivCitationChecker: Could not parse BibTeX for {arxiv_id}")
             return None, [], None
-        # Log version mismatch warning if cited version differs from latest
-        if cited_version:
-            # ArXiv BibTeX always returns latest version metadata
-            # We don't know the actual latest version number without additional API call,
-            # but we can warn that a specific version was cited
-            errors.append({
-                'warning_type': 'version',
-                'warning_details': f"Reference cites ArXiv version {cited_version}, verified against latest version metadata",
-            })
-            logger.debug(f"ArXivCitationChecker: Cited version {cited_version} for {arxiv_id}")
-        # Compare title
-        cited_title = reference.get('title', '').strip()
-        authoritative_title = verified_data.get('title', '').strip()
+        # Compare against latest version
+        authoritative_title = latest_data.get('title', '').strip()
         if cited_title and authoritative_title:
             title_similarity = compare_titles_with_latex_cleaning(cited_title, authoritative_title)
@@ -426,9 +521,8 @@ class ArXivCitationChecker:
                 })
         # Compare authors
-        cited_authors = reference.get('authors', [])
         if cited_authors:
-            authoritative_authors = verified_data.get('authors', [])
+            authoritative_authors = latest_data.get('authors', [])
             authors_match, author_error = compare_authors(cited_authors, authoritative_authors)
             if not authors_match:
@@ -440,9 +534,7 @@ class ArXivCitationChecker:
                 })
         # Compare year
-        cited_year = reference.get('year')
-        authoritative_year = verified_data.get('year')
+        authoritative_year = latest_data.get('year')
         year_warning = validate_year(
             cited_year=cited_year,
             paper_year=authoritative_year,
@@ -451,10 +543,50 @@ class ArXivCitationChecker:
         )
         if year_warning:
             errors.append(year_warning)
-        # Build URL
         paper_url = f"https://arxiv.org/abs/{arxiv_id}"
-        logger.debug(f"ArXivCitationChecker: Verified {arxiv_id} with {len(errors)} errors/warnings")
+        # If no errors against latest version, we're done
+        if len(errors) == 0:
+            logger.debug(f"ArXivCitationChecker: Verified {arxiv_id} with no errors")
+            return latest_data, errors, paper_url
+        # Check if reference matches a historical version
+        # Get latest version number first
+        latest_version_num = self._get_latest_version_number(arxiv_id)
+        if latest_version_num and latest_version_num > 1:
+            # Check historical versions (1 to latest-1)
+            for version_num in range(1, latest_version_num):
+                version_data = self._fetch_version_metadata_from_html(arxiv_id, version_num)
+                if not version_data:
+                    continue
+                # Check if reference matches this historical version
+                if self._compare_info_match(
+                        cited_title, cited_authors, cited_year,
+                        version_data['title'], version_data['authors'], version_data['year']):
+                    logger.debug(f"ArXivCitationChecker: Reference matches historical version v{version_num}")
+                    # Convert errors to warnings with version update info
+                    # Version update issues are informational, not errors - the citation was correct for its time
+                    version_suffix = f" (v{version_num} vs v{latest_version_num} update)"
+                    warnings = []
+                    for error in errors:
+                        warning = {
+                            'warning_type': error.get('error_type', 'unknown') + version_suffix,
+                            'warning_details': error.get('error_details', ''),
+                        }
+                        # Preserve correction hints
+                        for key in ['ref_title_correct', 'ref_authors_correct', 'ref_year_correct']:
+                            if key in error:
+                                warning[key] = error[key]
+                        warnings.append(warning)
+                    # Return with warnings instead of errors - URL points to the matched version
+                    matched_url = f"https://arxiv.org/abs/{arxiv_id}v{version_num}"
+                    return latest_data, warnings, matched_url
-        return verified_data, errors, paper_url
+        logger.debug(f"ArXivCitationChecker: Verified {arxiv_id} with {len(errors)} errors/warnings")
+        return latest_data, errors, paper_url

academic-refchecker 2.0.12__tar.gz → 2.0.14__tar.gz

academic-refchecker 2.0.12tar.gz → 2.0.14tar.gz