PyPI - academic-refchecker - Versions diffs - 1.2.53__py3-none-any.whl → 1.2.55__py3-none-any.whl - Mend

academic-refchecker 1.2.53py3-none-any.whl → 1.2.55py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

{utils → refchecker/utils}/text_utils.py RENAMED Viewed

@@ -173,6 +173,11 @@ def parse_authors_with_initials(authors_text):
     if stripped_text in ['others', 'and others', 'et al', 'et al.']:
         return []
+    # Clean LaTeX commands early to prevent parsing issues
+    # This fixes cases like "Hochreiter, Sepp and Schmidhuber, J{\"u}rgen"
+    # which should parse as 2 authors, not get split incorrectly due to LaTeX braces
+    authors_text = strip_latex_commands(authors_text)
     # Fix spacing around periods in initials (e.g., "Y . Li" -> "Y. Li") before parsing
     authors_text = re.sub(r'(\w)\s+\.', r'\1.', authors_text)
@@ -300,9 +305,9 @@ def parse_authors_with_initials(authors_text):
                     comma_parts = [p.strip() for p in part.split(',')]
                     if len(comma_parts) == 2:
                         lastname, firstname = comma_parts
-                        # Both parts should contain only letters, spaces, hyphens, apostrophes, and periods
-                        if (re.match(r'^[A-Za-z\s\-\'.]+$', lastname) and
-                            re.match(r'^[A-Za-z\s\-\'.]+$', firstname) and
+                        # Both parts should contain only letters (including Unicode), spaces, hyphens, apostrophes, and periods
+                        if (re.match(r'^[\w\s\-\'.]+$', lastname, re.UNICODE) and
+                            re.match(r'^[\w\s\-\'.]+$', firstname, re.UNICODE) and
                             lastname and firstname):
                             valid_author_parts.append(part)
@@ -314,6 +319,50 @@ def parse_authors_with_initials(authors_text):
     # Split on commas first for other formats
     parts = [part.strip() for part in authors_text.split(',') if part.strip()]
+    # Handle single author with "Lastname, Firstname" format (exactly 2 parts)
+    if len(parts) == 2:
+        lastname, firstname = parts
+        # Pattern for surnames: capitalized word(s), possibly hyphenated or compound
+        # But exclude common patterns that suggest multiple authors like "Other Author"
+        surname_pattern = r'^[A-Z][a-zA-Z\-\']+$'  # Single surname word (no spaces to avoid "Other Author")
+        # Pattern for first names or initials: either full names or initials with periods
+        # Accept both full names like "David R" and initials like "A. C"
+        firstname_pattern = r'^[A-Z]([a-zA-Z\s\-\'.]*|\.(\s+[A-Z]\.?)*\s*)$'  # Full names or initials
+        # Additional check: if the "firstname" part looks like "Other Author" or similar,
+        # it's likely multiple authors, not a single "Lastname, Firstname" pattern
+        # We need to distinguish between:
+        # - "David R" (first name + middle initial - single author)
+        # - "Other Author" (two separate names - multiple authors)
+        if ' ' in firstname:
+            firstname_parts = firstname.split()
+            if len(firstname_parts) == 2:
+                first_part, second_part = firstname_parts
+                # Pattern 1: "David R" - first name + single letter (middle initial)
+                is_name_plus_initial = (
+                    len(first_part) >= 2 and first_part[0].isupper() and first_part[1:].islower() and
+                    len(second_part) <= 2 and second_part.replace('.', '').isalpha()  # Initial like "R" or "R."
+                )
+                # Pattern 2: "Other Author" - two full capitalized words suggesting separate authors
+                looks_like_separate_authors = (
+                    len(first_part) >= 3 and first_part[0].isupper() and first_part[1:].islower() and
+                    len(second_part) >= 3 and second_part[0].isupper() and second_part[1:].islower()
+                )
+                looks_like_multiple_authors = looks_like_separate_authors and not is_name_plus_initial
+            else:
+                # More than 2 parts with spaces likely indicates multiple authors
+                looks_like_multiple_authors = len(firstname_parts) > 2
+        else:
+            looks_like_multiple_authors = False
+        # Check if this looks like a single author in "Lastname, Firstname" format
+        if (re.match(surname_pattern, lastname) and
+            re.match(firstname_pattern, firstname) and
+            len(lastname) >= 2 and len(firstname) >= 1 and
+            not looks_like_multiple_authors):
+            # This is a single author, return as "Lastname, Firstname"
+            return [f"{lastname}, {firstname}"]
     # Check if this is BibTeX comma-separated format: "Surname, Given, Surname, Given"
     # Enhanced heuristic: even number of parts >= 6, alternating proper surname/given pattern
     # Distinguish between initials (should remain as "Surname, Initial") and full names
@@ -640,7 +689,7 @@ def extract_arxiv_id_from_url(url):
     Returns:
         ArXiv ID or None if not found
     """
-    from utils.url_utils import extract_arxiv_id_from_url as common_extract
+    from refchecker.utils.url_utils import extract_arxiv_id_from_url as common_extract
     return common_extract(url)
 def extract_year_from_text(text):
@@ -2092,7 +2141,7 @@ def compare_authors(cited_authors: list, correct_authors: list, normalize_func=N
     # and not penalize for the authoritative source having more authors
     if has_et_al:
         # Import here to avoid circular imports
-        from utils.error_utils import format_author_mismatch
+        from refchecker.utils.error_utils import format_author_mismatch
         # For et al cases, check if each cited author matches ANY author in the correct list
         # rather than comparing positionally, since author order can vary
         for i, cited_author in enumerate(cleaned_cited):
@@ -2126,21 +2175,21 @@ def compare_authors(cited_authors: list, correct_authors: list, normalize_func=N
         # Check if cited authors look like parsing fragments
         if looks_like_fragments(cleaned_cited):
-            from utils.error_utils import format_author_count_mismatch
+            from refchecker.utils.error_utils import format_author_count_mismatch
             display_cited = [format_author_for_display(author) for author in cleaned_cited]
             error_msg = format_author_count_mismatch(len(cleaned_cited), len(correct_names), display_cited, correct_names)
             return False, error_msg
         # For all count mismatches, show the count mismatch error
         if len(cleaned_cited) < len(correct_names):
-            from utils.error_utils import format_author_count_mismatch
+            from refchecker.utils.error_utils import format_author_count_mismatch
             display_cited = [format_author_for_display(author) for author in cleaned_cited]
             error_msg = format_author_count_mismatch(len(cleaned_cited), len(correct_names), display_cited, correct_names)
             return False, error_msg
         # For cases where cited > correct, also show count mismatch
         elif len(cleaned_cited) > len(correct_names):
-            from utils.error_utils import format_author_count_mismatch
+            from refchecker.utils.error_utils import format_author_count_mismatch
             display_cited = [format_author_for_display(author) for author in cleaned_cited]
             error_msg = format_author_count_mismatch(len(cleaned_cited), len(correct_names), display_cited, correct_names)
             return False, error_msg
@@ -2149,7 +2198,7 @@ def compare_authors(cited_authors: list, correct_authors: list, normalize_func=N
         comparison_correct = correct_names
     # Use shared three-line formatter (imported lazily to avoid circular imports)
-    from utils.error_utils import format_first_author_mismatch, format_author_mismatch
+    from refchecker.utils.error_utils import format_first_author_mismatch, format_author_mismatch
     # Compare first author (most important) using the enhanced name matching
     if comparison_cited and comparison_correct:
@@ -2757,7 +2806,7 @@ def filter_bibtex_by_cited_keys(bib_content, cited_keys):
         return bib_content
     # Parse entries and filter
-    from utils.bibtex_parser import parse_bibtex_entries
+    from refchecker.utils.bibtex_parser import parse_bibtex_entries
     entries = parse_bibtex_entries(bib_content)
     filtered_entries = []
@@ -3069,7 +3118,7 @@ def extract_latex_references(text, file_path=None):  # pylint: disable=unused-ar
     if format_info['format_type'] == 'bibtex':
         # Use the dedicated BibTeX parser for consistent results
-        from utils.bibtex_parser import parse_bibtex_references
+        from refchecker.utils.bibtex_parser import parse_bibtex_references
         return parse_bibtex_references(text)
     elif format_info['format_type'] == 'thebibliography':
@@ -3273,7 +3322,7 @@ def extract_latex_references(text, file_path=None):  # pylint: disable=unused-ar
                 # Extract URL if present
                 url_match = re.search(r'\\url\{([^}]+)\}', content)
                 if url_match:
-                    from utils.url_utils import clean_url_punctuation
+                    from refchecker.utils.url_utils import clean_url_punctuation
                     ref['url'] = clean_url_punctuation(url_match.group(1))
             # Extract title from \showarticletitle{} or \bibinfo{title}{}
@@ -3335,7 +3384,7 @@ def extract_latex_references(text, file_path=None):  # pylint: disable=unused-ar
             if not ref['url']:
                 url_match = re.search(r'\\url\{([^}]+)\}', content)
                 if url_match:
-                    from utils.url_utils import clean_url_punctuation
+                    from refchecker.utils.url_utils import clean_url_punctuation
                     ref['url'] = clean_url_punctuation(url_match.group(1))
             # Extract DOI from \href{https://doi.org/...}

__version__.py DELETED Viewed

@@ -1,3 +0,0 @@
-"""Version information for RefChecker."""
-__version__ = "1.2.53"

academic_refchecker-1.2.53.dist-info/RECORD DELETED Viewed

@@ -1,47 +0,0 @@
-__version__.py,sha256=iH7i3qnj4nR1gSXECRVUGvJH5oBPWtb7Lb8H9ODFTVc,65
-academic_refchecker-1.2.53.dist-info/licenses/LICENSE,sha256=Kwrx3fePVCeEFDCZvCW4OuoTNBiSoYbpGBI6qzGhWF0,1067
-checkers/__init__.py,sha256=T0PAHTFt6UiGvn-WGoJU8CdhXNmf6zaHmcGVoWHhmJQ,533
-checkers/crossref.py,sha256=cLYmSzE8ehJ5sNko_R3fEiGBGiPH5_HxLhFM-pCfDRM,20378
-checkers/enhanced_hybrid_checker.py,sha256=rbXkzpNkd0bn4e2OooX-CcdGTwwYpgmVaFvX_xCAFsA,27777
-checkers/github_checker.py,sha256=BXJaBC3AloKze04j8EcQz0a79EhtVoi9_871ilV7t60,14233
-checkers/local_semantic_scholar.py,sha256=D8py8-yMCgN1lvhXCiMUOEA4wBkH7AQvrkM4-3LCDsU,21015
-checkers/openalex.py,sha256=Fbc7iscZzmXjAZxH32PDX2r2Nwo9b5Ku-Sh1Ut9KpLA,19550
-checkers/openreview_checker.py,sha256=3ckn6U7TN5nQBjqPacr8W8mm2uMo6aWWB6gsxTDNCPk,40452
-checkers/pdf_paper_checker.py,sha256=L5HRHd3xpo0xDltZGTAA-Wk_arIS9bQV8ITeuxW0bNc,19893
-checkers/semantic_scholar.py,sha256=wk6e8DkYJM_O2nWsi-6EfJT53PzfL8KCmX1rS562KKc,34962
-checkers/webpage_checker.py,sha256=REOotx7Qka86_xbOIMeYj5YVb9D1RVMb4Ye311-28cA,43620
-config/__init__.py,sha256=r7sONsX2-ITviUJRU1KEz76uAuTRqZlzU-TVkvFRGYY,15
-config/logging.conf,sha256=r1tP0ApLHtlz7rV-oKS1MVO7oXJOgahbZFTtYmKnf9U,687
-config/settings.py,sha256=-vODFoXbWbGPUElpmchE5zbCj_n4Vtxr8HU1hQDFp_c,6164
-core/__init__.py,sha256=1T2MSQyDk0u_PupbHvm4CvNNN--dxsw78fqKUrqoYrM,157
-core/db_connection_pool.py,sha256=XRiOdehikkSz3obH4WKgf8woa3694if50Q15rBT-4XQ,4697
-core/parallel_processor.py,sha256=cq_WfzXrF2EI6IKOtJd6_QcwvM1xT3J6a13teg-wSbM,17638
-core/refchecker.py,sha256=-QIT5eUQaPCuQy7S80sXCvtrmcjdH5lf5wdZvsPQO9w,286416
-database/__init__.py,sha256=mEuVHlEBuS44t_2ZT_JnvQQrlRCjo1SJq1NmaJ6r8OY,125
-database/download_semantic_scholar_db.py,sha256=waN4I97KC_36YMiPbiBDUUmgfzu1nub5yeKdAsIR2aw,75276
-llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-llm/base.py,sha256=uMF-KOqZ9ZQ7rccOQLpKJiW9sEMMxr7ePXBSF0yYDJY,16782
-llm/providers.py,sha256=A0usJpprCO5D-VX0hqaQzBfi4DG3rdjA39vu02XJsGw,40092
-scripts/__init__.py,sha256=xJwo6afG8s7S888BK2Bxw2d7FX8aLkbl0l_ZoJOFibE,37
-scripts/start_vllm_server.py,sha256=ZepWp2y2cKFW0Kgsoima2RbmF02fTU29UFcLLpsBhFU,4213
-services/__init__.py,sha256=jGi9S74Msak3YR-C4Qb68VU7HB4oLaX9o1rlVAFpOFI,187
-services/pdf_processor.py,sha256=vu_JnhFGZY6jFVbDbPvG-mlQojvB-3Dzc8_946KVV2E,9427
-utils/__init__.py,sha256=1RrGoIIn1_gVzxd56b6a7HeAS-wu7uDP-nxLbR3fJ-8,1199
-utils/arxiv_utils.py,sha256=EzH1PhEAW0df5mmSP-kKHmuwqd4u2CSotRNwQ5IMJx8,19766
-utils/author_utils.py,sha256=DLTo1xsxef2wxoe4s_MWrh36maj4fgnvFlsDLpDE-qQ,5507
-utils/biblatex_parser.py,sha256=OkHXQcjiBrEDuhBfEk0RtmAYxufu5lAxAjb8__DzMjI,25537
-utils/bibliography_utils.py,sha256=mpmdAklzAs1CT3gqrOcjujGhouL95OuliCx0LE9Pg90,11705
-utils/bibtex_parser.py,sha256=a89NLy_q2kwED4QFJgxWFgPQOJBV73bIUL3RS_Urmro,15231
-utils/config_validator.py,sha256=rxf7K3DYmJ-BNPsmtaCNipY2BTVT-pJZ7wN-M9Y3GC8,11167
-utils/db_utils.py,sha256=_wSupfBlm0ILFvntQTvoj7tLDCbrYPRQrp9NDvphF_E,6281
-utils/doi_utils.py,sha256=ezUiRnYRpoO0U_Rqgxv1FxqmeTwPh6X8gLgSDbqg5sY,4874
-utils/error_utils.py,sha256=UJOH7Bp-rPV2JDY_XN38I2pSkqqPdnQoviKa4s4nK_A,12501
-utils/mock_objects.py,sha256=QxU-UXyHSY27IZYN8Sb8ei0JtNkpGSdMXoErrRLHXvE,6437
-utils/text_utils.py,sha256=T3PiiG9-BMPTbdCftG2zypyIeZJl6snuMCKQ0nEOQv0,217834
-utils/unicode_utils.py,sha256=-WBKarXO756p7fd7gCeNsMag4ztDNURwFX5IVniOtwY,10366
-utils/url_utils.py,sha256=HdxIO8QvciP6Jp8Wd4sTSrS8JQrOMwgM7pxdUC8RJb4,9176
-academic_refchecker-1.2.53.dist-info/METADATA,sha256=6j1G-R74oa1900hERaRnJFkV5u4zTuVyLC6YamhXxq4,23256
-academic_refchecker-1.2.53.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-academic_refchecker-1.2.53.dist-info/entry_points.txt,sha256=WdI89tYkIfz-M628PiboOfOLzTBWZAqvlF29qCVCkek,61
-academic_refchecker-1.2.53.dist-info/top_level.txt,sha256=6RlcQEA0kHb7-ndbKMFMZnYnJQVohgsU6BBkbEvJvEs,69
-academic_refchecker-1.2.53.dist-info/RECORD,,

academic_refchecker-1.2.53.dist-info/entry_points.txt DELETED Viewed

	@@ -1,2 +0,0 @@
1	- [console_scripts]
2	- academic-refchecker = core.refchecker:main

academic_refchecker-1.2.53.dist-info/top_level.txt DELETED Viewed

@@ -1,9 +0,0 @@
-__version__
-checkers
-config
-core
-database
-llm
-scripts
-services
-utils

{academic_refchecker-1.2.53.dist-info → academic_refchecker-1.2.55.dist-info}/WHEEL RENAMED Viewed

File without changes

{academic_refchecker-1.2.53.dist-info → academic_refchecker-1.2.55.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{checkers → refchecker/checkers}/__init__.py RENAMED Viewed

File without changes

{config → refchecker/config}/__init__.py RENAMED Viewed

File without changes

{config → refchecker/config}/logging.conf RENAMED Viewed

File without changes

{config → refchecker/config}/settings.py RENAMED Viewed

File without changes

{core → refchecker/core}/__init__.py RENAMED Viewed

File without changes

{core → refchecker/core}/db_connection_pool.py RENAMED Viewed

File without changes

{database → refchecker/database}/__init__.py RENAMED Viewed

File without changes

{database → refchecker/database}/download_semantic_scholar_db.py RENAMED Viewed

File without changes

{llm → refchecker/llm}/__init__.py RENAMED Viewed

File without changes

{llm → refchecker/llm}/base.py RENAMED Viewed

File without changes

{llm → refchecker/llm}/providers.py RENAMED Viewed

File without changes

{scripts → refchecker/scripts}/__init__.py RENAMED Viewed

File without changes

{scripts → refchecker/scripts}/start_vllm_server.py RENAMED Viewed

File without changes

{services → refchecker/services}/__init__.py RENAMED Viewed

File without changes

{services → refchecker/services}/pdf_processor.py RENAMED Viewed

File without changes

{utils → refchecker/utils}/__init__.py RENAMED Viewed

File without changes

{utils → refchecker/utils}/author_utils.py RENAMED Viewed

File without changes

{utils → refchecker/utils}/config_validator.py RENAMED Viewed

File without changes

{utils → refchecker/utils}/db_utils.py RENAMED Viewed

File without changes

{utils → refchecker/utils}/doi_utils.py RENAMED Viewed

File without changes

{utils → refchecker/utils}/mock_objects.py RENAMED Viewed

File without changes

{utils → refchecker/utils}/unicode_utils.py RENAMED Viewed

File without changes

{utils → refchecker/utils}/url_utils.py RENAMED Viewed

File without changes

academic-refchecker 1.2.53__py3-none-any.whl → 1.2.55__py3-none-any.whl

academic-refchecker 1.2.53py3-none-any.whl → 1.2.55py3-none-any.whl