PyPI - academic-refchecker - Versions diffs - 1.2.50__py3-none-any.whl → 1.2.52__py3-none-any.whl - Mend

academic-refchecker 1.2.50py3-none-any.whl → 1.2.52py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

core/refchecker.py CHANGED Viewed

@@ -50,7 +50,8 @@ from utils.text_utils import (clean_author_name, clean_title, clean_title_basic,
                        detect_latex_bibliography_format, extract_latex_references,
                        detect_standard_acm_natbib_format, strip_latex_commands,
                        format_corrected_reference, is_name_match, enhanced_name_match,
-                       calculate_title_similarity, normalize_arxiv_url, deduplicate_urls)
+                       calculate_title_similarity, normalize_arxiv_url, deduplicate_urls,
+                       compare_authors)
 from utils.config_validator import ConfigValidator
 from services.pdf_processor import PDFProcessor
 from checkers.enhanced_hybrid_checker import EnhancedHybridReferenceChecker
@@ -1789,7 +1790,7 @@ class ArxivReferenceChecker:
                     if authors:
                         db_authors = [author.get('name', '') for author in check_paper_data['authors']]
-                        authors_match, author_error = self.compare_authors(authors, db_authors)
+                        authors_match, author_error = compare_authors(authors, db_authors)
                         if authors_match:
                             paper_data = check_paper_data
                             search_strategy = "Normalized title with author match"
@@ -1901,10 +1902,12 @@ class ArxivReferenceChecker:
             if normalized_title != db_title:
                 from utils.error_utils import format_title_mismatch
+                # Clean the title for display (remove LaTeX commands like {LLM}s -> LLMs)
+                clean_cited_title = strip_latex_commands(title)
                 logger.debug(f"DB Verification: Title mismatch - cited: '{title}', actual: '{paper_data.get('title')}'")
                 errors.append({
                     'error_type': 'title',
-                    'error_details': format_title_mismatch(title, paper_data.get('title')),
+                    'error_details': format_title_mismatch(clean_cited_title, paper_data.get('title')),
                     'ref_title_correct': paper_data.get('title')
                 })
@@ -1912,7 +1915,7 @@ class ArxivReferenceChecker:
         if authors and paper_data.get('authors'):
             # Extract author names from database data
             correct_names = [author.get('name', '') for author in paper_data['authors']]
-            authors_match, author_error = self.compare_authors(authors, correct_names)
+            authors_match, author_error = compare_authors(authors, correct_names)
             if not authors_match:
                 logger.debug(f"DB Verification: Author mismatch - {author_error}")
@@ -2018,8 +2021,20 @@ class ArxivReferenceChecker:
                 logger.debug(f"Database mode: Initial paper_url from database checker: {paper_url}")
                 if not verified_data:
-                    # Mark as unverified but keep the URL if found
-                    return [{"error_type": "unverified", "error_details": "Reference could not be verified in database"}], paper_url, None
+                    # Mark as unverified but check URL for more specific reason or verification
+                    if reference.get('url', '').strip():
+                        # Use raw URL verifier to check if it can be verified or get specific reason
+                        url_verified_data, url_errors, url_checked = self.verify_raw_url_reference(reference)
+                        if url_verified_data:
+                            # URL verification succeeded - return as verified
+                            logger.debug(f"Database mode: URL verification succeeded for unverified reference")
+                            return None, url_checked, url_verified_data
+                        else:
+                            # URL verification failed - use specific error reason
+                            url_error_details = url_errors[0].get('error_details', 'Reference could not be verified in database') if url_errors else 'Reference could not be verified in database'
+                            return [{"error_type": "unverified", "error_details": url_error_details}], paper_url, None
+                    else:
+                        return [{"error_type": "unverified", "error_details": "Reference could not be verified in database"}], paper_url, None
                 # Convert database errors to our format
                 formatted_errors = []
@@ -2115,7 +2130,29 @@ class ArxivReferenceChecker:
                 return [{"error_type": "unverified", "error_details": "Database connection not available"}], None, None
         # For non-database mode, use the standard reference verification
-        return self.verify_reference_standard(source_paper, reference)
+        errors, paper_url, verified_data = self.verify_reference_standard(source_paper, reference)
+        # If standard verification failed and the reference has a URL, try raw URL verification
+        if errors and verified_data is None:
+            # Check if there's an unverified error
+            unverified_errors = [e for e in errors if e.get('error_type') == 'unverified']
+            if unverified_errors and reference.get('url', '').strip():
+                # Use raw URL verifier to check if it can be verified or get specific reason
+                url_verified_data, url_errors, url_checked = self.verify_raw_url_reference(reference)
+                if url_verified_data:
+                    # URL verification succeeded - return as verified
+                    logger.debug(f"Non-database mode: URL verification succeeded for unverified reference")
+                    return None, url_checked, url_verified_data
+                else:
+                    # URL verification failed - use specific error reason
+                    url_error_details = url_errors[0].get('error_details', 'Reference could not be verified') if url_errors else 'Reference could not be verified'
+                    # Update the unverified error with the specific reason
+                    for error in errors:
+                        if error.get('error_type') == 'unverified':
+                            error['error_details'] = url_error_details
+                            break
+        return errors, paper_url, verified_data
     def verify_github_reference(self, reference):
@@ -2250,6 +2287,55 @@ class ArxivReferenceChecker:
                 formatted_errors.append(formatted_error)
             return formatted_errors if formatted_errors else [{"error_type": "unverified", "error_details": "Web page could not be verified"}], page_url, None
+    def verify_raw_url_reference(self, reference):
+        """
+        Verify a raw URL from an unverified reference - can return verified data if appropriate
+        Args:
+            reference: The reference to verify (already determined to be unverified by paper validators)
+        Returns:
+            Tuple of (verified_data, errors, url) where:
+            - verified_data: Dict with verified data if URL should be considered verified, None otherwise
+            - errors: List of error dictionaries
+            - url: The URL that was checked
+        """
+        logger.debug(f"Checking raw URL for unverified reference: {reference.get('title', 'Untitled')}")
+        # Extract URL from reference
+        web_url = reference.get('url', '').strip()
+        if not web_url:
+            return None, [{"error_type": "unverified", "error_details": "Reference could not be verified"}], None
+        # First try PDF paper checker if URL appears to be a PDF
+        from checkers.pdf_paper_checker import PDFPaperChecker
+        pdf_checker = PDFPaperChecker()
+        if pdf_checker.can_check_reference(reference):
+            logger.debug(f"URL appears to be PDF, trying PDF verification: {web_url}")
+            try:
+                verified_data, errors, url = pdf_checker.verify_reference(reference)
+                if verified_data:
+                    logger.debug(f"PDF verification successful for: {reference.get('title', 'Untitled')}")
+                    return verified_data, errors, url
+                else:
+                    logger.debug(f"PDF verification failed, falling back to web page verification")
+            except Exception as e:
+                logger.error(f"Error in PDF verification: {e}")
+                logger.debug(f"PDF verification error, falling back to web page verification")
+        # Fall back to web page checker
+        from checkers.webpage_checker import WebPageChecker
+        webpage_checker = WebPageChecker()
+        try:
+            verified_data, errors, url = webpage_checker.verify_raw_url_for_unverified_reference(reference)
+            logger.debug(f"Raw URL verification result: verified_data={verified_data is not None}, errors={len(errors)}, url={url}")
+            return verified_data, errors, url
+        except Exception as e:
+            logger.error(f"Error checking raw URL: {e}")
+            return None, [{"error_type": "unverified", "error_details": "Reference could not be verified"}], web_url
     def verify_reference_standard(self, source_paper, reference):
         """
         Verify if a reference is accurate using GitHub, Semantic Scholar, or other checkers
@@ -2271,11 +2357,6 @@ class ArxivReferenceChecker:
         if github_result:
             return github_result
-        # Next, check if this is a web page reference
-        webpage_result = self.verify_webpage_reference(reference)
-        if webpage_result:
-            return webpage_result
         # Use the Semantic Scholar client to verify the reference
         verified_data, errors, paper_url = self.non_arxiv_checker.verify_reference(reference)
@@ -3054,6 +3135,13 @@ class ArxivReferenceChecker:
                 try:
                     # Extract bibliography
                     bibliography = self.extract_bibliography(paper, debug_mode)
+                    # Apply deduplication to all bibliography sources (not just LLM-extracted)
+                    if len(bibliography) > 1:  # Only deduplicate if we have multiple references
+                        original_count = len(bibliography)
+                        bibliography = self._deduplicate_bibliography_entries(bibliography)
+                        if len(bibliography) < original_count:
+                            logger.debug(f"Deduplicated {original_count} references to {len(bibliography)} unique references")
                     # Update statistics
                     self.total_papers_processed += 1
@@ -3493,8 +3581,9 @@ class ArxivReferenceChecker:
                 except Exception as e:
                     logger.error(f"LLM fallback failed: {e}")
                     return []
-            logger.debug("Using biblatex file")
-            return biblatex_refs
+            if len(biblatex_refs) > 0:
+                logger.debug("Using biblatex file")
+                return biblatex_refs
         # For non-standard formats, try LLM-based extraction if available
         if self.llm_extractor:
@@ -4284,9 +4373,9 @@ class ArxivReferenceChecker:
             # If either has no title, can't reliably determine if duplicate
             return False
-        # If titles match exactly, consider them duplicates
-        # This handles the case where the same paper appears multiple times
-        if seg1['title'] == seg2['title']:
+        # If titles match exactly (case-insensitive), consider them duplicates
+        # This handles the case where the same paper appears multiple times with different capitalization
+        if seg1['title'].lower() == seg2['title'].lower():
             return True
         # Special case: Check if one title is an arXiv identifier and the other is a real title
@@ -4299,16 +4388,54 @@ class ArxivReferenceChecker:
         author1 = seg1['author']
         author2 = seg2['author']
-        if author1 and author2 and author1 == author2:
+        if author1 and author2 and author1.lower() == author2.lower():
             # Same authors - check if one title is substring of other or significant similarity
-            title1 = seg1['title']
-            title2 = seg2['title']
+            title1 = seg1['title'].lower()
+            title2 = seg2['title'].lower()
             if (title1 in title2 or title2 in title1):
                 return True
         return False
+    def _deduplicate_bibliography_entries(self, bibliography):
+        """
+        Deduplicate bibliography entries using title and author comparison.
+        This works with structured reference dictionaries from BibTeX/LaTeX parsing,
+        as opposed to _deduplicate_references_with_segment_matching which works with raw text.
+        Args:
+            bibliography: List of reference dictionaries with 'title', 'authors', etc.
+        Returns:
+            List of unique reference dictionaries
+        """
+        if len(bibliography) <= 1:
+            return bibliography
+        unique_refs = []
+        seen_titles = set()
+        for ref in bibliography:
+            title = ref.get('title', '').strip()
+            if not title:
+                # Keep references without titles (they can't be deduplicated)
+                unique_refs.append(ref)
+                continue
+            # Normalize title for comparison (case-insensitive, basic cleanup)
+            normalized_title = title.lower().strip()
+            # Check if we've seen this title before (case-insensitive)
+            if normalized_title in seen_titles:
+                logger.debug(f"Skipping duplicate reference: '{title}'")
+            else:
+                unique_refs.append(ref)
+                seen_titles.add(normalized_title)
+        return unique_refs
     def _is_arxiv_identifier_title_mismatch(self, seg1, seg2):
         """
         Check if one reference has an arXiv identifier as title while the other has a real title,
@@ -5087,60 +5214,6 @@ class ArxivReferenceChecker:
         return references
-    def compare_authors(self, cited_authors, correct_authors):
-        """
-        Compare author lists to check if they match using improved name matching.
-        Uses the utility function is_name_match for robust author name comparison.
-        """
-        # Clean up author names
-        cleaned_cited = []
-        for author in cited_authors:
-            # Remove reference numbers (e.g., "[1]")
-            author = re.sub(r'^\[\d+\]', '', author)
-            # Remove line breaks
-            author = author.replace('\n', ' ')
-            # Handle "et al" cases properly
-            author_clean = author.strip()
-            if author_clean.lower() == 'et al':
-                # Skip pure "et al" entries
-                continue
-            elif 'et al' in author_clean.lower():
-                # Remove "et al" from the author name (e.g., "S. M. Lundberg et al" -> "S. M. Lundberg")
-                author_clean = re.sub(r'\s+et\s+al\.?', '', author_clean, flags=re.IGNORECASE).strip()
-                if author_clean:  # Only add if something remains
-                    cleaned_cited.append(author_clean)
-            else:
-                cleaned_cited.append(author_clean)
-        if not cleaned_cited:
-            return True, "No authors to compare"
-        # Handle "et al" cases and length mismatches
-        has_et_al = any('et al' in a.lower() for a in cited_authors)
-        if len(cleaned_cited) < len(correct_authors) and (has_et_al or len(cleaned_cited) <= 3):
-            # Only compare the authors that are listed
-            correct_authors = correct_authors[:len(cleaned_cited)]
-        elif len(cleaned_cited) > len(correct_authors) and len(correct_authors) >= 3:
-            # Use available correct authors
-            cleaned_cited = cleaned_cited[:len(correct_authors)]
-        # If there's a big count mismatch and no "et al", it's likely an error
-        if abs(len(cleaned_cited) - len(correct_authors)) > 3 and not has_et_al:
-            return False, "Author count mismatch"
-        # Compare first author (most important) using the improved utility function
-        if cleaned_cited and correct_authors:
-            # Use raw names for comparison (is_name_match handles normalization internally)
-            cited_first = cleaned_cited[0]
-            correct_first = correct_authors[0]
-            if not enhanced_name_match(cited_first, correct_first):
-                from utils.error_utils import format_first_author_mismatch
-                return False, format_first_author_mismatch(cited_first, correct_first)
-        return True, "Authors match"
     def normalize_text(self, text):
         """
@@ -5251,6 +5324,19 @@ class ArxivReferenceChecker:
             return False
         return True
+    def compare_authors(self, authors1, authors2):
+        """
+        Compare authors using the text_utils compare_authors function.
+        Args:
+            authors1: First list of authors
+            authors2: Second list of authors
+        Returns:
+            Tuple of (match_result, error_message)
+        """
+        return compare_authors(authors1, authors2)
     def _verify_references_sequential(self, paper, bibliography, paper_errors, error_types, unverified_count, debug_mode):
         """
         Sequential reference verification (original implementation)
@@ -5267,7 +5353,10 @@ class ArxivReferenceChecker:
             ref_id = self.extract_arxiv_id_from_url(reference['url'])
             # Print reference info in non-debug mode (improved formatting)
-            title = reference.get('title', 'Untitled')
+            raw_title = reference.get('title', 'Untitled')
+            # Clean LaTeX commands from title for display
+            from utils.text_utils import strip_latex_commands
+            title = strip_latex_commands(raw_title)
             from utils.text_utils import format_authors_for_display
             authors = format_authors_for_display(reference.get('authors', []))
             year = reference.get('year', '')
@@ -5504,6 +5593,14 @@ class ArxivReferenceChecker:
         """Categorize the unverified error into checker error or not found"""
         error_details_lower = error_details.lower()
+        # New specific URL-based unverified reasons
+        if error_details_lower == "non-existent web page":
+            return "Non-existent web page"
+        elif error_details_lower == "paper not found and url doesn't reference it":
+            return "Paper not found and URL doesn't reference it"
+        elif error_details_lower == "paper not verified but url references paper":
+            return "Paper not verified but URL references paper"
         # Checker/API errors
         api_error_patterns = [
             'api error', 'rate limit', 'http error', 'network error',

utils/text_utils.py CHANGED Viewed

@@ -580,6 +580,9 @@ def clean_title_for_search(title):
     if not isinstance(title, str):
         return str(title) if title is not None else ''
+    # Strip LaTeX commands to handle math formatting and other LaTeX markup
+    title = strip_latex_commands(title)
     # Clean up newlines and normalize whitespace (but preserve other structure)
     title = title.replace('\n', ' ').strip()
     title = re.sub(r'\s+', ' ', title)  # Normalize whitespace only
@@ -753,8 +756,11 @@ def normalize_paper_title(title: str) -> str:
     if not title:
         return ""
+    # Strip LaTeX commands first to handle math formatting consistently
+    normalized = strip_latex_commands(title)
     # Convert to lowercase
-    normalized = title.lower()
+    normalized = normalized.lower()
     # Remove common prefixes that don't affect the actual title content
     prefixes_to_remove = [
@@ -2107,21 +2113,37 @@ def compare_authors(cited_authors: list, correct_authors: list, normalize_func=N
         return True, f"Authors match (verified {len(cleaned_cited)} of {len(correct_names)} with et al)"
+    # Detect if cited authors look like parsing fragments
+    # (many short single-word entries that might be first/last name fragments)
+    def looks_like_fragments(authors_list):
+        if len(authors_list) < 4:  # Need at least 4 to detect fragment pattern
+            return False
+        single_word_count = sum(1 for author in authors_list if len(author.strip().split()) == 1)
+        return single_word_count >= len(authors_list) * 0.7  # 70% or more are single words
     # Normal case without "et al" - compare all authors
     if len(cleaned_cited) != len(correct_names):
-        # For non-et-al cases, be more strict about count mismatches
-        # Allow minor flexibility (1 author difference) but not more
-        if abs(len(cleaned_cited) - len(correct_names)) > 1:
+        # Check if cited authors look like parsing fragments
+        if looks_like_fragments(cleaned_cited):
             from utils.error_utils import format_author_count_mismatch
-            # Convert cited names to display format (First Last) before showing in error
             display_cited = [format_author_for_display(author) for author in cleaned_cited]
             error_msg = format_author_count_mismatch(len(cleaned_cited), len(correct_names), display_cited, correct_names)
             return False, error_msg
-        # Use the shorter list for comparison
-        min_len = min(len(cleaned_cited), len(correct_names))
-        comparison_cited = cleaned_cited[:min_len]
-        comparison_correct = correct_names[:min_len]
+        # For all count mismatches, show the count mismatch error
+        if len(cleaned_cited) < len(correct_names):
+            from utils.error_utils import format_author_count_mismatch
+            display_cited = [format_author_for_display(author) for author in cleaned_cited]
+            error_msg = format_author_count_mismatch(len(cleaned_cited), len(correct_names), display_cited, correct_names)
+            return False, error_msg
+        # For cases where cited > correct, also show count mismatch
+        elif len(cleaned_cited) > len(correct_names):
+            from utils.error_utils import format_author_count_mismatch
+            display_cited = [format_author_for_display(author) for author in cleaned_cited]
+            error_msg = format_author_count_mismatch(len(cleaned_cited), len(correct_names), display_cited, correct_names)
+            return False, error_msg
     else:
         comparison_cited = cleaned_cited
         comparison_correct = correct_names
@@ -2484,8 +2506,64 @@ def strip_latex_commands(text):
     # Remove font size commands
     text = re.sub(r'\\(tiny|scriptsize|footnotesize|small|normalsize|large|Large|LARGE|huge|Huge)\b', '', text)
-    # Remove math mode delimiters
-    text = re.sub(r'\$([^$]*)\$', r'\1', text)
+    # Handle complex math mode patterns first
+    # Pattern like $\{$$\mu$second-scale$\}$ should become μsecond-scale
+    def process_complex_math(match):
+        content = match.group(1)
+        # Handle common Greek letters
+        content = re.sub(r'\\mu\b', 'μ', content)  # \mu -> μ
+        content = re.sub(r'\\alpha\b', 'α', content)  # \alpha -> α
+        content = re.sub(r'\\beta\b', 'β', content)   # \beta -> β
+        content = re.sub(r'\\gamma\b', 'γ', content)  # \gamma -> γ
+        content = re.sub(r'\\delta\b', 'δ', content)  # \delta -> δ
+        content = re.sub(r'\\epsilon\b', 'ε', content)  # \epsilon -> ε
+        content = re.sub(r'\\lambda\b', 'λ', content)  # \lambda -> λ
+        content = re.sub(r'\\pi\b', 'π', content)    # \pi -> π
+        content = re.sub(r'\\sigma\b', 'σ', content)  # \sigma -> σ
+        content = re.sub(r'\\theta\b', 'θ', content)  # \theta -> θ
+        # Remove any remaining LaTeX commands and braces from inside math
+        content = re.sub(r'\\[a-zA-Z]+\b', '', content)
+        content = re.sub(r'[{}]', '', content)
+        # Clean up any remaining $ signs
+        content = re.sub(r'\$+', '', content)
+        return content
+    # Handle complex nested math patterns first
+    # Pattern like $\{$$\mu$second-scale$\}$ should become μsecond-scale
+    def process_nested_math_specifically(match):
+        content = match.group(0)
+        # Handle the specific pattern: $\{$$\mu$second-scale$\}$
+        # Extract the meaningful parts
+        if r'\mu' in content:
+            # Replace \mu with μ and extract the surrounding text
+            content = re.sub(r'\\mu\b', 'μ', content)
+        # Remove all LaTeX math markup
+        content = re.sub(r'[\$\{\}\\]+', '', content)
+        return content
+    # Handle the specific problematic pattern
+    text = re.sub(r'\$\\\{[^}]*\\\}\$', process_nested_math_specifically, text)
+    # Handle Greek letters in math mode before removing delimiters
+    def process_standard_math(match):
+        content = match.group(1)
+        # Handle common Greek letters - content has single backslashes
+        content = re.sub(r'\\mu\b', 'μ', content)
+        content = re.sub(r'\\alpha\b', 'α', content)
+        content = re.sub(r'\\beta\b', 'β', content)
+        content = re.sub(r'\\gamma\b', 'γ', content)
+        content = re.sub(r'\\delta\b', 'δ', content)
+        content = re.sub(r'\\epsilon\b', 'ε', content)
+        content = re.sub(r'\\lambda\b', 'λ', content)
+        content = re.sub(r'\\pi\b', 'π', content)
+        content = re.sub(r'\\sigma\b', 'σ', content)
+        content = re.sub(r'\\theta\b', 'θ', content)
+        # Remove any remaining LaTeX commands
+        content = re.sub(r'\\[a-zA-Z]+\b', '', content)
+        return content
+    # Remove standard math mode delimiters with Greek letter processing
+    text = re.sub(r'\$([^$]*)\$', process_standard_math, text)
     text = re.sub(r'\\begin\{equation\}.*?\\end\{equation\}', '', text, flags=re.DOTALL)
     text = re.sub(r'\\begin\{align\}.*?\\end\{align\}', '', text, flags=re.DOTALL)
@@ -3369,7 +3447,18 @@ def _extract_corrected_reference_data(error_entry: dict, corrected_data: dict) -
     """
     # Get the corrected information
     correct_title = error_entry.get('ref_title_correct') or corrected_data.get('title', '')
-    correct_authors = error_entry.get('ref_authors_correct') or corrected_data.get('authors', '')
+    # Handle authors - can be string or list of dicts from API
+    authors_raw = error_entry.get('ref_authors_correct') or corrected_data.get('authors', '')
+    if isinstance(authors_raw, list):
+        # Convert list of author dicts to comma-separated string
+        if authors_raw and isinstance(authors_raw[0], dict):
+            correct_authors = ', '.join([author.get('name', '') for author in authors_raw])
+        else:
+            correct_authors = ', '.join(authors_raw)
+    else:
+        correct_authors = str(authors_raw) if authors_raw else ''
     correct_year = error_entry.get('ref_year_correct') or corrected_data.get('year', '')
     # Prioritize the verified URL that was actually used for verification
@@ -3573,7 +3662,39 @@ def format_corrected_plaintext(original_reference, corrected_data, error_entry):
     if correct_url:
         citation_parts.append(f"{correct_url}")
-    return '. '.join(citation_parts) + '.'
+    citation_text = '. '.join(citation_parts) + '.'
+    # Add citation key information if available (for easy copying)
+    citation_key = original_reference.get('bibtex_key') or original_reference.get('bibitem_key')
+    if citation_key and citation_key != 'unknown':
+        bibtex_type = original_reference.get('bibtex_type', 'misc')
+        citation_text += f"\n\n% Citation key for BibTeX: @{bibtex_type}{{{citation_key}, ...}}"
+    return citation_text
+def compare_titles_with_latex_cleaning(cited_title: str, database_title: str) -> float:
+    """
+    Compare two titles with proper LaTeX cleaning for accurate similarity scoring.
+    This function ensures both titles are cleaned of LaTeX commands before comparison
+    to avoid false mismatches due to formatting differences like {LLM}s vs LLMs.
+    Args:
+        cited_title: Title from cited reference (may contain LaTeX)
+        database_title: Title from database (usually already clean)
+    Returns:
+        Similarity score between 0 and 1
+    """
+    if not cited_title or not database_title:
+        return 0.0
+    # Clean LaTeX commands from cited title to match database format
+    clean_cited = strip_latex_commands(cited_title)
+    # Calculate similarity using cleaned titles
+    return calculate_title_similarity(clean_cited, database_title)
 def calculate_title_similarity(title1: str, title2: str) -> float:

{academic_refchecker-1.2.50.dist-info → academic_refchecker-1.2.52.dist-info}/WHEEL RENAMED Viewed

File without changes

{academic_refchecker-1.2.50.dist-info → academic_refchecker-1.2.52.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{academic_refchecker-1.2.50.dist-info → academic_refchecker-1.2.52.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{academic_refchecker-1.2.50.dist-info → academic_refchecker-1.2.52.dist-info}/top_level.txt RENAMED Viewed

File without changes

academic-refchecker 1.2.50__py3-none-any.whl → 1.2.52__py3-none-any.whl

academic-refchecker 1.2.50py3-none-any.whl → 1.2.52py3-none-any.whl