PyPI - academic-refchecker - Versions diffs - 1.2.40__py3-none-any.whl → 1.2.42__py3-none-any.whl - Mend

academic-refchecker 1.2.40py3-none-any.whl → 1.2.42py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

__version__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """Version information for RefChecker."""
-__version__ = "1.2.40"
+__version__ = "1.2.42"

{academic_refchecker-1.2.40.dist-info → academic_refchecker-1.2.42.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: academic-refchecker
-Version: 1.2.40
+Version: 1.2.42
 Summary: A comprehensive tool for validating reference accuracy in academic papers
 Author-email: Mark Russinovich <markrussinovich@hotmail.com>
 License-Expression: MIT

{academic_refchecker-1.2.40.dist-info → academic_refchecker-1.2.42.dist-info}/RECORD RENAMED Viewed

@@ -1,21 +1,21 @@
-__version__.py,sha256=soqcePD866a0kbKzHnJaobWHcBuIA5atngYu2ZaYJ0o,65
-academic_refchecker-1.2.40.dist-info/licenses/LICENSE,sha256=Kwrx3fePVCeEFDCZvCW4OuoTNBiSoYbpGBI6qzGhWF0,1067
+__version__.py,sha256=jrP5O1rb9OpfyEnz9IJjKo7ZhdOr-9_yzLGwvjDTLWA,65
+academic_refchecker-1.2.42.dist-info/licenses/LICENSE,sha256=Kwrx3fePVCeEFDCZvCW4OuoTNBiSoYbpGBI6qzGhWF0,1067
 checkers/__init__.py,sha256=T0PAHTFt6UiGvn-WGoJU8CdhXNmf6zaHmcGVoWHhmJQ,533
 checkers/crossref.py,sha256=Hzq4dlf1CSn0aZWU8CMOnLxIvaSivTabLoepIOkgkmY,20585
 checkers/enhanced_hybrid_checker.py,sha256=6yf5tV4jLSVzjX1xR_kQq0NOgQIst-z_WmkiqqMc8hQ,23469
 checkers/github_checker.py,sha256=54K6_YJW5w2GtzodnSOLfK5d1ErFJxbTOIIV5P_kFX0,13543
 checkers/local_semantic_scholar.py,sha256=DgGMjmR_w_saz2UTMatEhfXbhUED9tUmDG3vlZAIzc4,20428
 checkers/openalex.py,sha256=GxYUH9GZ0AyF-WFKgXiFHqkalrSnySgFSkiM1PsK0VI,19757
-checkers/openreview_checker.py,sha256=QRQXUk1Ws-e-wETSeLgq06WmHQrjUk17my_Zj4rrwmY,20303
-checkers/semantic_scholar.py,sha256=YHR9nWaT7aieyczVMRKCPHr3k_Hl8g1rzd0k4f3bDTs,35022
+checkers/openreview_checker.py,sha256=FLh21F0Zr7Gj3BI0u-gE6IwGNOZiRcViirDBeNvUp94,20432
+checkers/semantic_scholar.py,sha256=BelhyIJ-W8navRdqEGpk12CIXYWmVL2Cq8HHZR7ynJs,34905
 checkers/webpage_checker.py,sha256=BvNwOqukTX9IeQUpUfIrI_5Gr2w9VLBt5x_PB-hKUIo,21616
 config/__init__.py,sha256=r7sONsX2-ITviUJRU1KEz76uAuTRqZlzU-TVkvFRGYY,15
 config/logging.conf,sha256=r1tP0ApLHtlz7rV-oKS1MVO7oXJOgahbZFTtYmKnf9U,687
 config/settings.py,sha256=-vODFoXbWbGPUElpmchE5zbCj_n4Vtxr8HU1hQDFp_c,6164
 core/__init__.py,sha256=1T2MSQyDk0u_PupbHvm4CvNNN--dxsw78fqKUrqoYrM,157
 core/db_connection_pool.py,sha256=XRiOdehikkSz3obH4WKgf8woa3694if50Q15rBT-4XQ,4697
-core/parallel_processor.py,sha256=2S1cAPhtWH3glvtiJrt9JkZzk2iJkPKXsc-F3lg0X6U,16795
-core/refchecker.py,sha256=24c2zEciTneKcIkHkAZYNak_DjGW_wCuhYVkTdc2SDc,274522
+core/parallel_processor.py,sha256=5V2iJDBFwwryMCnCNU_oRt2u5he1wpy-_9qapC_6f00,17043
+core/refchecker.py,sha256=ElXgD1iPI-rDDFZmCPMZpkIP4UeX3nPAJVCfsVPNgcw,274640
 database/__init__.py,sha256=mEuVHlEBuS44t_2ZT_JnvQQrlRCjo1SJq1NmaJ6r8OY,125
 database/download_semantic_scholar_db.py,sha256=waN4I97KC_36YMiPbiBDUUmgfzu1nub5yeKdAsIR2aw,75276
 llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -34,13 +34,13 @@ utils/bibtex_parser.py,sha256=jsQ87lkzmBmJO3VEN3itw22CJ1Hesei4IvM2sfsaFKI,12867
 utils/config_validator.py,sha256=rxf7K3DYmJ-BNPsmtaCNipY2BTVT-pJZ7wN-M9Y3GC8,11167
 utils/db_utils.py,sha256=_wSupfBlm0ILFvntQTvoj7tLDCbrYPRQrp9NDvphF_E,6281
 utils/doi_utils.py,sha256=8f3iE4RdSNkzeqa9wJfoKcVEiBVse3_uf643biLudmw,4134
-utils/error_utils.py,sha256=2qdRM3Bv4GvE3mlXgXp9jiQBfvB08qeg8vTgNVivcgk,5706
+utils/error_utils.py,sha256=JqnRg4z-O9GcJ1eJGeTMzmOQwPWbWo2Lf6Duwj-ymHQ,6258
 utils/mock_objects.py,sha256=QxU-UXyHSY27IZYN8Sb8ei0JtNkpGSdMXoErrRLHXvE,6437
-utils/text_utils.py,sha256=8luQsOBfcEBv3O16d3LlQmCuoEB0dEF0aQWGey-s3us,190502
+utils/text_utils.py,sha256=F5o-37KUkkr-ie4sg6ld5om3-uDpAxPUSjDFxY0fsL4,203063
 utils/unicode_utils.py,sha256=-WBKarXO756p7fd7gCeNsMag4ztDNURwFX5IVniOtwY,10366
 utils/url_utils.py,sha256=n0m5rXKV0-UrE8lI85VEx23KmfGwky57sI6gFPuu78I,7358
-academic_refchecker-1.2.40.dist-info/METADATA,sha256=pH6qGXfz-0oNvKvLRRG5PRlUsD3HT5wowupyEIEXeo4,22298
-academic_refchecker-1.2.40.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-academic_refchecker-1.2.40.dist-info/entry_points.txt,sha256=WdI89tYkIfz-M628PiboOfOLzTBWZAqvlF29qCVCkek,61
-academic_refchecker-1.2.40.dist-info/top_level.txt,sha256=6RlcQEA0kHb7-ndbKMFMZnYnJQVohgsU6BBkbEvJvEs,69
-academic_refchecker-1.2.40.dist-info/RECORD,,
+academic_refchecker-1.2.42.dist-info/METADATA,sha256=k7fzk4fhb-kz-CdJE-gaeU2I5xM16D1rNNeEuer_9Hk,22298
+academic_refchecker-1.2.42.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+academic_refchecker-1.2.42.dist-info/entry_points.txt,sha256=WdI89tYkIfz-M628PiboOfOLzTBWZAqvlF29qCVCkek,61
+academic_refchecker-1.2.42.dist-info/top_level.txt,sha256=6RlcQEA0kHb7-ndbKMFMZnYnJQVohgsU6BBkbEvJvEs,69
+academic_refchecker-1.2.42.dist-info/RECORD,,

checkers/openreview_checker.py CHANGED Viewed

@@ -473,9 +473,10 @@ class OpenReviewReferenceChecker:
         if cited_venue and paper_venue:
             if are_venues_substantially_different(cited_venue, paper_venue):
+                from utils.error_utils import clean_venue_for_comparison
                 errors.append({
                     "warning_type": "venue",
-                    "warning_details": f"Venue mismatch: cited as '{cited_venue}' but OpenReview shows '{paper_venue}'"
+                    "warning_details": f"Venue mismatch: cited as '{clean_venue_for_comparison(cited_venue)}' but OpenReview shows '{clean_venue_for_comparison(paper_venue)}'"
                 })
         # Create verified data structure

checkers/semantic_scholar.py CHANGED Viewed

@@ -544,11 +544,8 @@ class NonArxivReferenceChecker:
         if cited_venue and paper_venue:
             # Use the utility function to check if venues are substantially different
             if are_venues_substantially_different(cited_venue, paper_venue):
-                errors.append({
-                    'warning_type': 'venue',
-                    'warning_details': f"Venue mismatch: cited as '{cited_venue}' but actually '{paper_venue}'",
-                    'ref_venue_correct': paper_venue
-                })
+                from utils.error_utils import create_venue_warning
+                errors.append(create_venue_warning(cited_venue, paper_venue))
         elif not cited_venue and paper_venue:
             # Check if this is an arXiv paper first
             external_ids = paper_data.get('externalIds', {})

core/parallel_processor.py CHANGED Viewed

@@ -279,7 +279,11 @@ class ParallelReferenceProcessor:
         from utils.text_utils import format_authors_for_display
         authors = format_authors_for_display(reference.get('authors', []))
         year = reference.get('year', '')
-        venue = reference.get('venue', '')
+        # Get venue from either 'venue' or 'journal' field and clean it up
+        venue = reference.get('venue', '') or reference.get('journal', '')
+        if venue:
+            from utils.error_utils import clean_venue_for_comparison
+            venue = clean_venue_for_comparison(venue)
         url = reference.get('url', '')
         doi = reference.get('doi', '')

core/refchecker.py CHANGED Viewed

@@ -3383,7 +3383,7 @@ class ArxivReferenceChecker:
         # Check if this is biblatex format
         from utils.biblatex_parser import detect_biblatex_format
         if detect_biblatex_format(bibliography_text):
-            logger.info("Detected biblatex format, using biblatex parser")
+            logger.debug("Detected biblatex format")
             self.used_regex_extraction = True
             # Note: biblatex parsing is also robust, so we don't set used_unreliable_extraction
             biblatex_refs = self._parse_biblatex_references(bibliography_text)
@@ -3391,7 +3391,7 @@ class ArxivReferenceChecker:
             # If biblatex parsing returned empty results (due to quality validation),
             # fallback to LLM if available
             if not biblatex_refs and self.llm_extractor:
-                logger.debug("Biblatex parser returned no results due to quality validation, trying LLM fallback")
+                logger.debug("Biblatex is incompatible with parser")
                 try:
                     references = self.llm_extractor.extract_references(bibliography_text)
                     if references:
@@ -3403,7 +3403,7 @@ class ArxivReferenceChecker:
                 except Exception as e:
                     logger.error(f"LLM fallback failed: {e}")
                     return []
+            logger.debug("Using biblatex file")
             return biblatex_refs
         # For non-standard formats, try LLM-based extraction if available
@@ -3634,6 +3634,7 @@ class ArxivReferenceChecker:
             # we'll continue with the unreliable fallback regex parsing
             if not biblatex_refs:
                 logger.debug("Biblatex parser returned no results due to quality validation, falling back to regex parsing")
+                print(f"⚠️  Biblatex parser found no valid references (failed quality validation) - falling back to regex parsing")
             else:
                 return biblatex_refs

utils/error_utils.py CHANGED Viewed

@@ -89,6 +89,20 @@ def create_title_error(error_details: str, correct_title: str) -> Dict[str, str]
     }
+def clean_venue_for_comparison(venue: str) -> str:
+    """
+    Clean venue name for display in warnings using the shared normalization logic.
+    Args:
+        venue: Raw venue string
+    Returns:
+        Cleaned venue name suitable for display
+    """
+    from utils.text_utils import normalize_venue_for_display
+    return normalize_venue_for_display(venue)
 def create_venue_warning(cited_venue: str, correct_venue: str) -> Dict[str, str]:
     """
     Create a standardized venue warning dictionary.
@@ -100,9 +114,13 @@ def create_venue_warning(cited_venue: str, correct_venue: str) -> Dict[str, str]
     Returns:
         Standardized warning dictionary
     """
+    # Clean both venues for display in the warning
+    clean_cited = clean_venue_for_comparison(cited_venue)
+    clean_correct = clean_venue_for_comparison(correct_venue)
     return {
         'warning_type': 'venue',
-        'warning_details': f"Venue mismatch: cited as '{cited_venue}' but actually '{correct_venue}'",
+        'warning_details': f"Venue mismatch: cited as '{clean_cited}' but actually '{clean_correct}'",
         'ref_venue_correct': correct_venue
     }

utils/text_utils.py CHANGED Viewed

@@ -2255,8 +2255,13 @@ def format_author_for_display(author_name):
     if not author_name:
         return author_name
+    # Clean up any stray punctuation that might have been attached during parsing
+    author_name = author_name.strip()
+    # Remove trailing semicolons that sometimes get attached during bibliographic parsing
+    author_name = re.sub(r'[;,]\s*$', '', author_name)
     # Normalize apostrophes for consistent display
-    author_name = normalize_apostrophes(author_name.strip())
+    author_name = normalize_apostrophes(author_name)
     # Check if it's in "Lastname, Firstname" format
     if ',' in author_name:
@@ -3006,7 +3011,9 @@ def extract_latex_references(text, file_path=None):  # pylint: disable=unused-ar
                         if ref['year']:
                             venue_clean = re.sub(rf'\b{ref["year"]}\b.*', '', venue_clean)
                         venue_clean = venue_clean.rstrip(',. ')
-                        if venue_clean:
+                        # Filter out common non-venue patterns that shouldn't be treated as venues
+                        non_venue_patterns = ['URL', 'url', 'http:', 'https:', 'DOI', 'doi:', 'ArXiv', 'arxiv:']
+                        if venue_clean and not any(pattern in venue_clean for pattern in non_venue_patterns):
                             ref['journal'] = venue_clean
                 # Extract URL if present
@@ -3665,8 +3672,77 @@ def are_venues_substantially_different(venue1: str, venue2: str) -> bool:
         return bool(venue1 != venue2)
     # Clean LaTeX commands from both venues first
-    venue1 = strip_latex_commands(venue1)
-    venue2 = strip_latex_commands(venue2)
+    venue1_latex_cleaned = strip_latex_commands(venue1)
+    venue2_latex_cleaned = strip_latex_commands(venue2)
+    # For comparison, we need lowercase normalized versions
+    def normalize_for_comparison(venue_text):
+        # Get the cleaned display version first
+        cleaned = normalize_venue_for_display(venue_text)
+        # Then normalize for comparison: lowercase, expand abbreviations, remove punctuation
+        venue_lower = cleaned.lower()
+        # Handle LaTeX penalty commands before abbreviation expansion
+        venue_lower = re.sub(r'\\penalty\d+\s*', ' ', venue_lower)  # Remove \\penalty0 etc
+        venue_lower = re.sub(r'\s+', ' ', venue_lower).strip()  # Clean up extra spaces
+        # Expand abbreviations for comparison
+        def expand_abbreviations(text):
+            common_abbrevs = {
+                # IEEE specific abbreviations (only expand with periods, not full words)
+                'robot.': 'robotics', 'autom.': 'automation', 'lett.': 'letters',
+                'trans.': 'transactions', 'syst.': 'systems', 'netw.': 'networks',
+                'learn.': 'learning', 'ind.': 'industrial', 'electron.': 'electronics',
+                'mechatron.': 'mechatronics', 'intell.': 'intelligence',
+                'transp.': 'transportation', 'contr.': 'control', 'mag.': 'magazine',
+                # General academic abbreviations (only expand with periods)
+                'int.': 'international', 'intl.': 'international', 'conf.': 'conference',
+                'j.': 'journal', 'proc.': 'proceedings', 'assoc.': 'association',
+                'comput.': 'computing', 'sci.': 'science', 'eng.': 'engineering',
+                'tech.': 'technology', 'artif.': 'artificial', 'mach.': 'machine',
+                'stat.': 'statistics', 'math.': 'mathematics', 'phys.': 'physics',
+                'chem.': 'chemistry', 'bio.': 'biology', 'med.': 'medicine',
+                'adv.': 'advances', 'ann.': 'annual', 'symp.': 'symposium',
+                'workshop': 'workshop', 'worksh.': 'workshop',
+                'natl.': 'national', 'acad.': 'academy', 'rev.': 'review',
+                # Physics journal abbreviations
+                'phys.': 'physics', 'phys. rev.': 'physical review',
+                'phys. rev. lett.': 'physical review letters',
+                'phys. rev. a': 'physical review a', 'phys. rev. b': 'physical review b',
+                'phys. rev. c': 'physical review c', 'phys. rev. d': 'physical review d',
+                'phys. rev. e': 'physical review e', 'phys. lett.': 'physics letters',
+                'phys. lett. b': 'physics letters b', 'nucl. phys.': 'nuclear physics',
+                'nucl. phys. a': 'nuclear physics a', 'nucl. phys. b': 'nuclear physics b',
+                'j. phys.': 'journal of physics', 'ann. phys.': 'annals of physics',
+                'mod. phys. lett.': 'modern physics letters', 'eur. phys. j.': 'european physical journal',
+                # Nature journals
+                'nature phys.': 'nature physics', 'sci. adv.': 'science advances',
+                # Handle specific multi-word patterns and well-known acronyms
+                'proc. natl. acad. sci.': 'proceedings of the national academy of sciences',
+                'pnas': 'proceedings of the national academy of sciences',
+            }
+            # Sort by length (longest first) to ensure longer matches take precedence
+            for abbrev, expansion in sorted(common_abbrevs.items(), key=lambda x: len(x[0]), reverse=True):
+                # For abbreviations ending in period, use word boundary at start only
+                if abbrev.endswith('.'):
+                    pattern = r'\b' + re.escape(abbrev)
+                else:
+                    pattern = r'\b' + re.escape(abbrev) + r'\b'
+                text = re.sub(pattern, expansion, text)
+            return text
+        venue_lower = expand_abbreviations(venue_lower)
+        # Remove punctuation and normalize spacing for comparison
+        venue_lower = re.sub(r'[.,;:]', '', venue_lower)  # Remove punctuation
+        venue_lower = re.sub(r'\\s+on\\s+', ' ', venue_lower)  # Remove \"on\" preposition
+        venue_lower = re.sub(r'\\s+for\\s+', ' ', venue_lower)  # Remove \"for\" preposition
+        venue_lower = re.sub(r'\\s+', ' ', venue_lower).strip()  # Normalize whitespace
+        return venue_lower
+    normalized_venue1 = normalize_for_comparison(venue1_latex_cleaned)
+    normalized_venue2 = normalize_for_comparison(venue2_latex_cleaned)
     def expand_abbreviations(text):
         """Generic abbreviation expansion using common academic patterns"""
@@ -3983,8 +4059,8 @@ def are_venues_substantially_different(venue1: str, venue2: str) -> bool:
             if not acronym or not full_text:
                 return False
-            # Normalize the full text
-            normalized_full = normalize_venue(full_text)
+            # Use the internal comparison normalization function
+            normalized_full = normalize_for_comparison(full_text)
             # Generate all possible acronyms from the full text
             possible_acronyms = []
@@ -4098,9 +4174,9 @@ def are_venues_substantially_different(venue1: str, venue2: str) -> bool:
         if (arxiv1 == 'arxiv' and arxiv2.startswith('https://arxiv.org')) or (arxiv2 == 'arxiv' and arxiv1.startswith('https://arxiv.org')):
             return False
-    # Normalize both venues first
-    norm1 = normalize_venue(venue1)
-    norm2 = normalize_venue(venue2)
+    # Use normalized venues from shared function
+    norm1 = normalized_venue1
+    norm2 = normalized_venue2
     # Direct match after normalization (highest priority)
     if norm1 == norm2:
@@ -4354,4 +4430,144 @@ def is_year_substantially_different(cited_year: int, correct_year: int, context:
     # Any year difference should be flagged as a warning for manual review
     warning_msg = f"Year mismatch: cited as {cited_year} but actually {correct_year}"
-    return True, warning_msg
+    return True, warning_msg
+def normalize_venue_for_display(venue: str) -> str:
+    """
+    Normalize venue names for consistent display and comparison.
+    This function is used both for display in warnings and for venue comparison
+    to ensure consistent normalization across the system.
+    Args:
+        venue: Raw venue string
+    Returns:
+        Normalized venue string with prefixes removed and abbreviations expanded
+    """
+    if not venue:
+        return ""
+    def expand_abbreviations(text):
+        """Generic abbreviation expansion using common academic patterns"""
+        # Common academic abbreviations mapping
+        common_abbrevs = {
+            # IEEE specific abbreviations (only expand with periods, not full words)
+            'robot.': 'robotics',
+            'autom.': 'automation',
+            'lett.': 'letters',
+            'trans.': 'transactions',
+            'syst.': 'systems',
+            'netw.': 'networks',
+            'learn.': 'learning',
+            'ind.': 'industrial',
+            'electron.': 'electronics',
+            'mechatron.': 'mechatronics',
+            'intell.': 'intelligence',
+            'transp.': 'transportation',
+            'contr.': 'control',
+            'mag.': 'magazine',
+            # General academic abbreviations (only expand with periods)
+            'int.': 'international',
+            'intl.': 'international',
+            'conf.': 'conference',
+            'j.': 'journal',
+            'proc.': 'proceedings',
+            'assoc.': 'association',
+            'comput.': 'computing',
+            'sci.': 'science',
+            'eng.': 'engineering',
+            'tech.': 'technology',
+            'artif.': 'artificial',
+            'mach.': 'machine',
+            'stat.': 'statistics',
+            'math.': 'mathematics',
+            'phys.': 'physics',
+            'chem.': 'chemistry',
+            'bio.': 'biology',
+            'med.': 'medicine',
+            'adv.': 'advances',
+            'ann.': 'annual',
+            'symp.': 'symposium',
+            'workshop': 'workshop',
+            'worksh.': 'workshop',
+        }
+        text_lower = text.lower()
+        for abbrev, expansion in common_abbrevs.items():
+            # Only replace if it's a word boundary to avoid partial replacements
+            pattern = r'\b' + re.escape(abbrev) + r'\b'
+            text_lower = re.sub(pattern, expansion, text_lower)
+        return text_lower
+    venue_text = venue.strip()
+    # Extract venue from complex editor strings (e.g. "In Smith, J.; and Doe, K., eds., Conference Name, volume 1")
+    # This handles patterns like "In [authors], eds., [venue], [optional metadata]" (case-insensitive)
+    editor_match = re.search(r'in\s+[^,]+(?:,\s*[^,]*)*,\s*eds?\.,\s*(.+?)(?:,\s*volume\s*\d+|,\s*pp?\.|$)', venue_text, re.IGNORECASE)
+    if editor_match:
+        # Extract the venue part from editor string (preserve original case)
+        venue_text = editor_match.group(1).strip()
+        # Clean up any remaining metadata like "volume X of Proceedings..." (case-insensitive)
+        venue_text = re.sub(r',\s*volume\s+\d+.*$', '', venue_text, flags=re.IGNORECASE)
+        venue_text = re.sub(r'\s+of\s+proceedings.*$', '', venue_text, flags=re.IGNORECASE)
+    # Remove years, volumes, pages, and other citation metadata
+    # But preserve arXiv IDs (don't remove digits after arXiv:)
+    if not re.match(r'arxiv:', venue_text, re.IGNORECASE):
+        venue_text = re.sub(r',?\s*\d{4}[a-z]?\s*$', '', venue_text)  # Years like "2024" or "2024b"
+        venue_text = re.sub(r',?\s*\(\d{4}\)$', '', venue_text)  # Years in parentheses
+        venue_text = re.sub(r"'\d{2}$", '', venue_text)  # Year suffixes like 'CVPR'16'
+    venue_text = re.sub(r',?\s*(vol\.?\s*|volume\s*)\d+.*$', '', venue_text, flags=re.IGNORECASE)  # Volume info
+    venue_text = re.sub(r',?\s*\d+\s*\([^)]*\).*$', '', venue_text)  # Issue info with optional spaces
+    venue_text = re.sub(r',?\s*pp?\.\s*\d+.*$', '', venue_text, flags=re.IGNORECASE)  # Page info
+    venue_text = re.sub(r'\s*\(print\).*$', '', venue_text, flags=re.IGNORECASE)  # Print designation
+    venue_text = re.sub(r'\s*\(\d{4}\.\s*print\).*$', '', venue_text, flags=re.IGNORECASE)  # Year.Print
+    # Remove procedural prefixes (case-insensitive)
+    prefixes_to_remove = [
+        r'^\d{4}\s+\d+(st|nd|rd|th)\s+',  # "2012 IEEE/RSJ"
+        r'^\d{4}\s+',                     # "2024 "
+        r'^proceedings\s+(of\s+)?(the\s+)?(\d+(st|nd|rd|th)\s+)?(ieee\s+)?',  # "Proceedings of the IEEE"
+        r'^proc\.\s+of\s+(the\s+)?(\d+(st|nd|rd|th)\s+)?(ieee\s+)?',        # "Proc. of the IEEE" (require "of")
+        r'^procs\.\s+of\s+(the\s+)?(\d+(st|nd|rd|th)\s+)?(ieee\s+)?',       # "Procs. of the IEEE" (require "of")
+        r'^in\s+',
+        r'^advances\s+in\s+',             # "Advances in Neural Information Processing Systems"
+        r'^adv\.\s+',                     # "Adv. Neural Information Processing Systems"
+        # Handle ordinal prefixes: "The Twelfth", "The Ninth", etc.
+        r'^the\s+(first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|twenty-first|twenty-second|twenty-third|twenty-fourth|twenty-fifth|twenty-sixth|twenty-seventh|twenty-eighth|twenty-ninth|thirtieth|thirty-first|thirty-second|thirty-third|thirty-fourth|thirty-fifth|thirty-sixth|thirty-seventh|thirty-eighth|thirty-ninth|fortieth|forty-first|forty-second|forty-third|forty-fourth|forty-fifth|forty-sixth|forty-seventh|forty-eighth|forty-ninth|fiftieth)\s+',
+        # Handle numeric ordinals: "The 41st", "The 12th", etc.
+        r'^the\s+\d+(st|nd|rd|th)\s+',
+        # Handle standalone "The" prefix
+        r'^the\s+',
+    ]
+    for prefix_pattern in prefixes_to_remove:
+        venue_text = re.sub(prefix_pattern, '', venue_text, flags=re.IGNORECASE)
+    # Note: For display purposes, we preserve case and don't expand abbreviations
+    # Only do minimal cleaning needed for proper display
+    # Remove organization prefixes/suffixes that don't affect identity (case-insensitive)
+    # But preserve IEEE when it's part of a journal name like \"IEEE Transactions\"
+    if not re.match(r'ieee\s+transactions', venue_text, re.IGNORECASE):
+        venue_text = re.sub(r'^(ieee|acm|aaai|usenix|sigcomm|sigkdd|sigmod|vldb|osdi|sosp|eurosys)\s+', '', venue_text, flags=re.IGNORECASE)  # Remove org prefixes
+    venue_text = re.sub(r'^ieee/\w+\s+', '', venue_text, flags=re.IGNORECASE)  # Remove "IEEE/RSJ " etc
+    venue_text = re.sub(r'\s+(ieee|acm|aaai|usenix)\s*$', '', venue_text, flags=re.IGNORECASE)  # Remove org suffixes
+    venue_text = re.sub(r'/\w+\s+', ' ', venue_text)  # Remove "/ACM " style org separators
+    # IMPORTANT: Don't remove "Conference on" or "International" - they're needed for display
+    # Only remove specific org-prefixed conference patterns where the org is clear
+    venue_text = re.sub(r'^(ieee|acm|aaai|nips)(/\w+)?\s+conference\s+on\s+', '', venue_text, flags=re.IGNORECASE)
+    # Note: Don't remove "Conference on" as it's often part of the actual venue name
+    # Only remove it if it's clearly a procedural prefix (handled in prefixes_to_remove above)
+    # Clean up spacing (preserve punctuation and case for display)
+    venue_text = re.sub(r'\s+', ' ', venue_text)     # Normalize whitespace
+    venue_text = venue_text.strip()
+    return venue_text

{academic_refchecker-1.2.40.dist-info → academic_refchecker-1.2.42.dist-info}/WHEEL RENAMED Viewed

File without changes

{academic_refchecker-1.2.40.dist-info → academic_refchecker-1.2.42.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{academic_refchecker-1.2.40.dist-info → academic_refchecker-1.2.42.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{academic_refchecker-1.2.40.dist-info → academic_refchecker-1.2.42.dist-info}/top_level.txt RENAMED Viewed

File without changes

academic-refchecker 1.2.40__py3-none-any.whl → 1.2.42__py3-none-any.whl

academic-refchecker 1.2.40py3-none-any.whl → 1.2.42py3-none-any.whl