academic-refchecker 1.2.53__tar.gz → 1.2.54__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. {academic_refchecker-1.2.53/src/academic_refchecker.egg-info → academic_refchecker-1.2.54}/PKG-INFO +1 -1
  2. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/__version__.py +1 -1
  3. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54/src/academic_refchecker.egg-info}/PKG-INFO +1 -1
  4. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/utils/text_utils.py +52 -3
  5. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/LICENSE +0 -0
  6. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/MANIFEST.in +0 -0
  7. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/README.md +0 -0
  8. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/pyproject.toml +0 -0
  9. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/requirements.txt +0 -0
  10. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/scripts/download_db.py +0 -0
  11. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/scripts/run_tests.py +0 -0
  12. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/scripts/start_vllm_server.py +0 -0
  13. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/setup.cfg +0 -0
  14. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/__init__.py +0 -0
  15. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/academic_refchecker.egg-info/SOURCES.txt +0 -0
  16. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/academic_refchecker.egg-info/dependency_links.txt +0 -0
  17. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/academic_refchecker.egg-info/entry_points.txt +0 -0
  18. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/academic_refchecker.egg-info/requires.txt +0 -0
  19. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/academic_refchecker.egg-info/top_level.txt +0 -0
  20. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/checkers/__init__.py +0 -0
  21. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/checkers/crossref.py +0 -0
  22. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/checkers/enhanced_hybrid_checker.py +0 -0
  23. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/checkers/github_checker.py +0 -0
  24. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/checkers/local_semantic_scholar.py +0 -0
  25. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/checkers/openalex.py +0 -0
  26. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/checkers/openreview_checker.py +0 -0
  27. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/checkers/pdf_paper_checker.py +0 -0
  28. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/checkers/semantic_scholar.py +0 -0
  29. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/checkers/webpage_checker.py +0 -0
  30. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/config/__init__.py +0 -0
  31. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/config/logging.conf +0 -0
  32. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/config/settings.py +0 -0
  33. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/core/__init__.py +0 -0
  34. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/core/db_connection_pool.py +0 -0
  35. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/core/parallel_processor.py +0 -0
  36. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/core/refchecker.py +0 -0
  37. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/database/__init__.py +0 -0
  38. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/database/download_semantic_scholar_db.py +0 -0
  39. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/llm/__init__.py +0 -0
  40. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/llm/base.py +0 -0
  41. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/llm/providers.py +0 -0
  42. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/scripts/__init__.py +0 -0
  43. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/scripts/start_vllm_server.py +0 -0
  44. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/services/__init__.py +0 -0
  45. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/services/pdf_processor.py +0 -0
  46. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/utils/__init__.py +0 -0
  47. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/utils/arxiv_utils.py +0 -0
  48. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/utils/author_utils.py +0 -0
  49. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/utils/biblatex_parser.py +0 -0
  50. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/utils/bibliography_utils.py +0 -0
  51. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/utils/bibtex_parser.py +0 -0
  52. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/utils/config_validator.py +0 -0
  53. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/utils/db_utils.py +0 -0
  54. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/utils/doi_utils.py +0 -0
  55. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/utils/error_utils.py +0 -0
  56. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/utils/mock_objects.py +0 -0
  57. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/utils/unicode_utils.py +0 -0
  58. {academic_refchecker-1.2.53 → academic_refchecker-1.2.54}/src/utils/url_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academic-refchecker
3
- Version: 1.2.53
3
+ Version: 1.2.54
4
4
  Summary: A comprehensive tool for validating reference accuracy in academic papers
5
5
  Author-email: Mark Russinovich <markrussinovich@hotmail.com>
6
6
  License-Expression: MIT
@@ -1,3 +1,3 @@
1
1
  """Version information for RefChecker."""
2
2
 
3
- __version__ = "1.2.53"
3
+ __version__ = "1.2.54"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academic-refchecker
3
- Version: 1.2.53
3
+ Version: 1.2.54
4
4
  Summary: A comprehensive tool for validating reference accuracy in academic papers
5
5
  Author-email: Mark Russinovich <markrussinovich@hotmail.com>
6
6
  License-Expression: MIT
@@ -173,6 +173,11 @@ def parse_authors_with_initials(authors_text):
173
173
  if stripped_text in ['others', 'and others', 'et al', 'et al.']:
174
174
  return []
175
175
 
176
+ # Clean LaTeX commands early to prevent parsing issues
177
+ # This fixes cases like "Hochreiter, Sepp and Schmidhuber, J{\"u}rgen"
178
+ # which should parse as 2 authors, not get split incorrectly due to LaTeX braces
179
+ authors_text = strip_latex_commands(authors_text)
180
+
176
181
  # Fix spacing around periods in initials (e.g., "Y . Li" -> "Y. Li") before parsing
177
182
  authors_text = re.sub(r'(\w)\s+\.', r'\1.', authors_text)
178
183
 
@@ -300,9 +305,9 @@ def parse_authors_with_initials(authors_text):
300
305
  comma_parts = [p.strip() for p in part.split(',')]
301
306
  if len(comma_parts) == 2:
302
307
  lastname, firstname = comma_parts
303
- # Both parts should contain only letters, spaces, hyphens, apostrophes, and periods
304
- if (re.match(r'^[A-Za-z\s\-\'.]+$', lastname) and
305
- re.match(r'^[A-Za-z\s\-\'.]+$', firstname) and
308
+ # Both parts should contain only letters (including Unicode), spaces, hyphens, apostrophes, and periods
309
+ if (re.match(r'^[\w\s\-\'.]+$', lastname, re.UNICODE) and
310
+ re.match(r'^[\w\s\-\'.]+$', firstname, re.UNICODE) and
306
311
  lastname and firstname):
307
312
  valid_author_parts.append(part)
308
313
 
@@ -314,6 +319,50 @@ def parse_authors_with_initials(authors_text):
314
319
  # Split on commas first for other formats
315
320
  parts = [part.strip() for part in authors_text.split(',') if part.strip()]
316
321
 
322
+ # Handle single author with "Lastname, Firstname" format (exactly 2 parts)
323
+ if len(parts) == 2:
324
+ lastname, firstname = parts
325
+ # Pattern for surnames: capitalized word(s), possibly hyphenated or compound
326
+ # But exclude common patterns that suggest multiple authors like "Other Author"
327
+ surname_pattern = r'^[A-Z][a-zA-Z\-\']+$' # Single surname word (no spaces to avoid "Other Author")
328
+ # Pattern for first names or initials: either full names or initials with periods
329
+ # Accept both full names like "David R" and initials like "A. C"
330
+ firstname_pattern = r'^[A-Z]([a-zA-Z\s\-\'.]*|\.(\s+[A-Z]\.?)*\s*)$' # Full names or initials
331
+
332
+ # Additional check: if the "firstname" part looks like "Other Author" or similar,
333
+ # it's likely multiple authors, not a single "Lastname, Firstname" pattern
334
+ # We need to distinguish between:
335
+ # - "David R" (first name + middle initial - single author)
336
+ # - "Other Author" (two separate names - multiple authors)
337
+ if ' ' in firstname:
338
+ firstname_parts = firstname.split()
339
+ if len(firstname_parts) == 2:
340
+ first_part, second_part = firstname_parts
341
+ # Pattern 1: "David R" - first name + single letter (middle initial)
342
+ is_name_plus_initial = (
343
+ len(first_part) >= 2 and first_part[0].isupper() and first_part[1:].islower() and
344
+ len(second_part) <= 2 and second_part.replace('.', '').isalpha() # Initial like "R" or "R."
345
+ )
346
+ # Pattern 2: "Other Author" - two full capitalized words suggesting separate authors
347
+ looks_like_separate_authors = (
348
+ len(first_part) >= 3 and first_part[0].isupper() and first_part[1:].islower() and
349
+ len(second_part) >= 3 and second_part[0].isupper() and second_part[1:].islower()
350
+ )
351
+ looks_like_multiple_authors = looks_like_separate_authors and not is_name_plus_initial
352
+ else:
353
+ # More than 2 parts with spaces likely indicates multiple authors
354
+ looks_like_multiple_authors = len(firstname_parts) > 2
355
+ else:
356
+ looks_like_multiple_authors = False
357
+
358
+ # Check if this looks like a single author in "Lastname, Firstname" format
359
+ if (re.match(surname_pattern, lastname) and
360
+ re.match(firstname_pattern, firstname) and
361
+ len(lastname) >= 2 and len(firstname) >= 1 and
362
+ not looks_like_multiple_authors):
363
+ # This is a single author, return as "Lastname, Firstname"
364
+ return [f"{lastname}, {firstname}"]
365
+
317
366
  # Check if this is BibTeX comma-separated format: "Surname, Given, Surname, Given"
318
367
  # Enhanced heuristic: even number of parts >= 6, alternating proper surname/given pattern
319
368
  # Distinguish between initials (should remain as "Surname, Initial") and full names