academic-refchecker 1.2.46__tar.gz → 1.2.48__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {academic_refchecker-1.2.46/src/academic_refchecker.egg-info → academic_refchecker-1.2.48}/PKG-INFO +1 -1
  2. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/__version__.py +1 -1
  3. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48/src/academic_refchecker.egg-info}/PKG-INFO +1 -1
  4. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/checkers/local_semantic_scholar.py +15 -4
  5. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/core/refchecker.py +70 -18
  6. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/doi_utils.py +23 -5
  7. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/error_utils.py +17 -1
  8. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/text_utils.py +25 -4
  9. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/LICENSE +0 -0
  10. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/MANIFEST.in +0 -0
  11. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/README.md +0 -0
  12. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/pyproject.toml +0 -0
  13. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/requirements.txt +0 -0
  14. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/scripts/download_db.py +0 -0
  15. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/scripts/run_tests.py +0 -0
  16. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/scripts/start_vllm_server.py +0 -0
  17. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/setup.cfg +0 -0
  18. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/__init__.py +0 -0
  19. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/academic_refchecker.egg-info/SOURCES.txt +0 -0
  20. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/academic_refchecker.egg-info/dependency_links.txt +0 -0
  21. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/academic_refchecker.egg-info/entry_points.txt +0 -0
  22. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/academic_refchecker.egg-info/requires.txt +0 -0
  23. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/academic_refchecker.egg-info/top_level.txt +0 -0
  24. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/checkers/__init__.py +0 -0
  25. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/checkers/crossref.py +0 -0
  26. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/checkers/enhanced_hybrid_checker.py +0 -0
  27. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/checkers/github_checker.py +0 -0
  28. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/checkers/openalex.py +0 -0
  29. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/checkers/openreview_checker.py +0 -0
  30. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/checkers/semantic_scholar.py +0 -0
  31. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/checkers/webpage_checker.py +0 -0
  32. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/config/__init__.py +0 -0
  33. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/config/logging.conf +0 -0
  34. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/config/settings.py +0 -0
  35. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/core/__init__.py +0 -0
  36. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/core/db_connection_pool.py +0 -0
  37. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/core/parallel_processor.py +0 -0
  38. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/database/__init__.py +0 -0
  39. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/database/download_semantic_scholar_db.py +0 -0
  40. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/llm/__init__.py +0 -0
  41. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/llm/base.py +0 -0
  42. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/llm/providers.py +0 -0
  43. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/scripts/__init__.py +0 -0
  44. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/scripts/start_vllm_server.py +0 -0
  45. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/services/__init__.py +0 -0
  46. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/services/pdf_processor.py +0 -0
  47. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/__init__.py +0 -0
  48. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/arxiv_utils.py +0 -0
  49. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/author_utils.py +0 -0
  50. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/biblatex_parser.py +0 -0
  51. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/bibliography_utils.py +0 -0
  52. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/bibtex_parser.py +0 -0
  53. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/config_validator.py +0 -0
  54. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/db_utils.py +0 -0
  55. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/mock_objects.py +0 -0
  56. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/unicode_utils.py +0 -0
  57. {academic_refchecker-1.2.46 → academic_refchecker-1.2.48}/src/utils/url_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academic-refchecker
3
- Version: 1.2.46
3
+ Version: 1.2.48
4
4
  Summary: A comprehensive tool for validating reference accuracy in academic papers
5
5
  Author-email: Mark Russinovich <markrussinovich@hotmail.com>
6
6
  License-Expression: MIT
@@ -1,3 +1,3 @@
1
1
  """Version information for RefChecker."""
2
2
 
3
- __version__ = "1.2.46"
3
+ __version__ = "1.2.48"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academic-refchecker
3
- Version: 1.2.46
3
+ Version: 1.2.48
4
4
  Summary: A comprehensive tool for validating reference accuracy in academic papers
5
5
  Author-email: Mark Russinovich <markrussinovich@hotmail.com>
6
6
  License-Expression: MIT
@@ -430,11 +430,22 @@ class LocalNonArxivReferenceChecker:
430
430
  logger.debug(f"Local DB: Author mismatch - {author_error}")
431
431
  errors.append(create_author_error(author_error, paper_data.get('authors', [])))
432
432
 
433
- # Verify year
433
+ # Verify year (with tolerance)
434
434
  paper_year = paper_data.get('year')
435
- if year and paper_year and year != paper_year:
436
- logger.debug(f"Local DB: Year mismatch - cited: {year}, actual: {paper_year}")
437
- errors.append(create_year_warning(year, paper_year))
435
+ if year and paper_year:
436
+ # Get year tolerance from config (default to 1 if not available)
437
+ year_tolerance = 1 # Default tolerance
438
+ try:
439
+ from config.settings import get_config
440
+ config = get_config()
441
+ year_tolerance = config.get('text_processing', {}).get('year_tolerance', 1)
442
+ except (ImportError, Exception):
443
+ pass # Use default if config not available
444
+
445
+ # Only flag as mismatch if the difference is greater than tolerance
446
+ if abs(year - paper_year) > year_tolerance:
447
+ logger.debug(f"Local DB: Year mismatch - cited: {year}, actual: {paper_year}")
448
+ errors.append(create_year_warning(year, paper_year))
438
449
 
439
450
  # Verify DOI
440
451
  paper_doi = None
@@ -1922,16 +1922,27 @@ class ArxivReferenceChecker:
1922
1922
  'ref_authors_correct': ', '.join(correct_names)
1923
1923
  })
1924
1924
 
1925
- # Verify year
1925
+ # Verify year (with tolerance)
1926
1926
  paper_year = paper_data.get('year')
1927
- if year and paper_year and year != paper_year:
1928
- logger.debug(f"DB Verification: Year mismatch - cited: {year}, actual: {paper_year}")
1929
- from utils.error_utils import format_year_mismatch
1930
- errors.append({
1931
- 'warning_type': 'year',
1932
- 'warning_details': format_year_mismatch(year, paper_year),
1933
- 'ref_year_correct': paper_year
1934
- })
1927
+ if year and paper_year:
1928
+ # Get year tolerance from config (default to 1 if not available)
1929
+ year_tolerance = 1 # Default tolerance
1930
+ try:
1931
+ from config.settings import get_config
1932
+ config = get_config()
1933
+ year_tolerance = config.get('text_processing', {}).get('year_tolerance', 1)
1934
+ except (ImportError, Exception):
1935
+ pass # Use default if config not available
1936
+
1937
+ # Only flag as mismatch if the difference is greater than tolerance
1938
+ if abs(year - paper_year) > year_tolerance:
1939
+ logger.debug(f"DB Verification: Year mismatch - cited: {year}, actual: {paper_year}")
1940
+ from utils.error_utils import format_year_mismatch
1941
+ errors.append({
1942
+ 'warning_type': 'year',
1943
+ 'warning_details': format_year_mismatch(year, paper_year),
1944
+ 'ref_year_correct': paper_year
1945
+ })
1935
1946
 
1936
1947
  # Verify DOI
1937
1948
  if doi and external_ids.get('DOI'):
@@ -2626,9 +2637,19 @@ class ArxivReferenceChecker:
2626
2637
 
2627
2638
  # Generate corrected reference using all available corrections
2628
2639
  corrected_data = self._extract_corrected_data_from_error(consolidated_entry, verified_data)
2629
- corrected_format = format_corrected_reference(reference, corrected_data, consolidated_entry)
2630
- if corrected_format:
2631
- consolidated_entry['ref_corrected_format'] = corrected_format
2640
+
2641
+ # Generate all three formats for user convenience
2642
+ from utils.text_utils import format_corrected_plaintext, format_corrected_bibtex, format_corrected_bibitem
2643
+ plaintext_format = format_corrected_plaintext(reference, corrected_data, consolidated_entry)
2644
+ bibtex_format = format_corrected_bibtex(reference, corrected_data, consolidated_entry)
2645
+ bibitem_format = format_corrected_bibitem(reference, corrected_data, consolidated_entry)
2646
+
2647
+ if plaintext_format:
2648
+ consolidated_entry['ref_corrected_plaintext'] = plaintext_format
2649
+ if bibtex_format:
2650
+ consolidated_entry['ref_corrected_bibtex'] = bibtex_format
2651
+ if bibitem_format:
2652
+ consolidated_entry['ref_corrected_bibitem'] = bibitem_format
2632
2653
 
2633
2654
  # Store the consolidated entry (write to file at end of run)
2634
2655
  self.errors.append(consolidated_entry)
@@ -2685,11 +2706,21 @@ class ArxivReferenceChecker:
2685
2706
  if error_type != 'unverified':
2686
2707
  error_entry['ref_standard_format'] = self.format_standard_reference(error)
2687
2708
 
2688
- # Generate corrected reference in original format
2709
+ # Generate corrected reference in all formats for user convenience
2689
2710
  corrected_data = self._extract_corrected_data_from_error(error, verified_data)
2690
- corrected_format = format_corrected_reference(reference, corrected_data, error_entry)
2691
- if corrected_format:
2692
- error_entry['ref_corrected_format'] = corrected_format
2711
+
2712
+ # Generate all three formats
2713
+ from utils.text_utils import format_corrected_plaintext, format_corrected_bibtex, format_corrected_bibitem
2714
+ plaintext_format = format_corrected_plaintext(reference, corrected_data, error_entry)
2715
+ bibtex_format = format_corrected_bibtex(reference, corrected_data, error_entry)
2716
+ bibitem_format = format_corrected_bibitem(reference, corrected_data, error_entry)
2717
+
2718
+ if plaintext_format:
2719
+ error_entry['ref_corrected_plaintext'] = plaintext_format
2720
+ if bibtex_format:
2721
+ error_entry['ref_corrected_bibtex'] = bibtex_format
2722
+ if bibitem_format:
2723
+ error_entry['ref_corrected_bibitem'] = bibitem_format
2693
2724
  else:
2694
2725
  error_entry['ref_standard_format'] = None
2695
2726
 
@@ -2761,8 +2792,29 @@ class ArxivReferenceChecker:
2761
2792
  f.write(f" {error_entry['ref_verified_url']}\n")
2762
2793
  f.write("\n")
2763
2794
 
2764
- # Show corrected reference in original format if available
2765
- if error_entry.get('ref_corrected_format'):
2795
+ # Show corrected reference in all formats if available
2796
+ formats_written = False
2797
+
2798
+ # Plain text format
2799
+ if error_entry.get('ref_corrected_plaintext'):
2800
+ f.write("CORRECTED REFERENCE (Plain Text):\n")
2801
+ f.write(f"{error_entry['ref_corrected_plaintext']}\n\n")
2802
+ formats_written = True
2803
+
2804
+ # BibTeX format
2805
+ if error_entry.get('ref_corrected_bibtex'):
2806
+ f.write("CORRECTED REFERENCE (BibTeX):\n")
2807
+ f.write(f"{error_entry['ref_corrected_bibtex']}\n\n")
2808
+ formats_written = True
2809
+
2810
+ # Bibitem/LaTeX format
2811
+ if error_entry.get('ref_corrected_bibitem'):
2812
+ f.write("CORRECTED REFERENCE (LaTeX/Biblatex):\n")
2813
+ f.write(f"{error_entry['ref_corrected_bibitem']}\n\n")
2814
+ formats_written = True
2815
+
2816
+ # Fallback to legacy format if no new formats available
2817
+ if not formats_written and error_entry.get('ref_corrected_format'):
2766
2818
  f.write("CORRECTED REFERENCE:\n")
2767
2819
  f.write(f"{error_entry['ref_corrected_format']}\n\n")
2768
2820
 
@@ -99,9 +99,8 @@ def compare_dois(doi1: str, doi2: str) -> bool:
99
99
  """
100
100
  Compare two DOIs for equality, handling different formats and prefixes.
101
101
 
102
- This function performs exact matching after normalization, which means
103
- DOIs are only considered equal if they are identical after removing
104
- prefixes, case differences, and punctuation.
102
+ This function performs exact matching after normalization, with support
103
+ for partial DOI citations where a shorter DOI is a valid prefix of a longer one.
105
104
 
106
105
  Args:
107
106
  doi1: First DOI to compare
@@ -117,8 +116,27 @@ def compare_dois(doi1: str, doi2: str) -> bool:
117
116
  norm_doi1 = normalize_doi(doi1)
118
117
  norm_doi2 = normalize_doi(doi2)
119
118
 
120
- # DOIs must be exactly identical after normalization
121
- return norm_doi1 == norm_doi2
119
+ # First try exact match
120
+ if norm_doi1 == norm_doi2:
121
+ return True
122
+
123
+ # Handle partial DOI citations - if one DOI is a prefix of the other, consider it a match
124
+ # This handles cases like "10.1007" being cited instead of the full "10.1007/s10458-025-09691-y"
125
+ if len(norm_doi1) != len(norm_doi2):
126
+ shorter_doi = norm_doi1 if len(norm_doi1) < len(norm_doi2) else norm_doi2
127
+ longer_doi = norm_doi2 if len(norm_doi1) < len(norm_doi2) else norm_doi1
128
+
129
+ # Only consider it a valid partial match if:
130
+ # 1. The shorter DOI is at least 7 characters (e.g., "10.1007")
131
+ # 2. The longer DOI starts with the shorter DOI
132
+ # 3. The next character in the longer DOI is '/' or '.' (valid DOI separators)
133
+ if (len(shorter_doi) >= 7 and
134
+ longer_doi.startswith(shorter_doi) and
135
+ len(longer_doi) > len(shorter_doi) and
136
+ longer_doi[len(shorter_doi)] in ['/', '.']):
137
+ return True
138
+
139
+ return False
122
140
 
123
141
 
124
142
  def construct_doi_url(doi: str) -> str:
@@ -183,6 +183,14 @@ def clean_venue_for_comparison(venue: str) -> str:
183
183
  return normalize_venue_for_display(venue)
184
184
 
185
185
 
186
+ def format_missing_venue(correct_venue: str) -> str:
187
+ """
188
+ Format a missing venue message with only the actual value.
189
+ """
190
+ # Only show the actual venue; omit the empty cited line
191
+ return f"Missing venue: '{correct_venue}'"
192
+
193
+
186
194
  def create_venue_warning(cited_venue: str, correct_venue: str) -> Dict[str, str]:
187
195
  """
188
196
  Create a standardized venue warning dictionary.
@@ -197,7 +205,15 @@ def create_venue_warning(cited_venue: str, correct_venue: str) -> Dict[str, str]
197
205
  # Clean both venues for display in the warning
198
206
  clean_cited = clean_venue_for_comparison(cited_venue)
199
207
  clean_correct = clean_venue_for_comparison(correct_venue)
200
-
208
+
209
+ # If cited venue cleans to empty, treat as missing venue instead of mismatch
210
+ if not clean_cited and clean_correct:
211
+ return {
212
+ 'warning_type': 'venue',
213
+ 'warning_details': format_missing_venue(clean_correct),
214
+ 'ref_venue_correct': correct_venue
215
+ }
216
+
201
217
  return {
202
218
  'warning_type': 'venue',
203
219
  'warning_details': format_three_line_mismatch("Venue mismatch", clean_cited, clean_correct),
@@ -506,8 +506,10 @@ def clean_author_name(author):
506
506
  # Fix spacing around periods in initials (e.g., "Y . Li" -> "Y. Li")
507
507
  author = re.sub(r'(\w)\s+\.', r'\1.', author)
508
508
 
509
- # Remove common prefixes/suffixes
510
- author = re.sub(r'\b(Dr\.?|Prof\.?|Professor|Mr\.?|Ms\.?|Mrs\.?)\s*', '', author, flags=re.IGNORECASE)
509
+ # Remove common honorific prefixes only when they are standalone at the start (require trailing whitespace)
510
+ # Previous pattern falsely removed the leading "Mr" from names like "Mrinmaya" due to optional whitespace.
511
+ # Anchor to start and require at least one space after the title to avoid stripping inside longer names.
512
+ author = re.sub(r'^(?:Dr|Prof|Professor|Mr|Ms|Mrs)\.?\s+', '', author, flags=re.IGNORECASE)
511
513
 
512
514
  # Remove email addresses
513
515
  author = re.sub(r'\S+@\S+\.\S+', '', author)
@@ -2100,7 +2102,7 @@ def compare_authors(cited_authors: list, correct_authors: list, normalize_func=N
2100
2102
  # Use standardized three-line formatting for author mismatch
2101
2103
  cited_display = format_author_for_display(cited_author)
2102
2104
  full_author_list = ', '.join(correct_names)
2103
- error_msg = format_author_mismatch(i+1, f"{cited_display} (not found in author list - et al case)", f"Correct authors: {full_author_list}")
2105
+ error_msg = format_author_mismatch(i+1, f"{cited_display} (not found in author list - et al case)", f"{full_author_list}")
2104
2106
  return False, error_msg
2105
2107
 
2106
2108
  return True, f"Authors match (verified {len(cleaned_cited)} of {len(correct_names)} with et al)"
@@ -3588,6 +3590,12 @@ def calculate_title_similarity(title1: str, title2: str) -> float:
3588
3590
  # Normalize titles for comparison
3589
3591
  t1 = title1.lower().strip()
3590
3592
  t2 = title2.lower().strip()
3593
+
3594
+ # Remove trailing year suffixes like ", 2024" or " 2024" for robust matching
3595
+ def strip_trailing_year(s: str) -> str:
3596
+ return re.sub(r"[,\s]*\b(19|20)\d{2}\b\s*$", "", s).strip()
3597
+ t1 = strip_trailing_year(t1)
3598
+ t2 = strip_trailing_year(t2)
3591
3599
 
3592
3600
  # Exact match
3593
3601
  if t1 == t2:
@@ -4676,6 +4684,13 @@ def normalize_venue_for_display(venue: str) -> str:
4676
4684
 
4677
4685
  venue_text = venue.strip()
4678
4686
 
4687
+ # Strip leading editor name lists like "..., editors, Venue ..." or "..., eds., Venue ..."
4688
+ # This prevents author/editor lists from being treated as venue
4689
+ # Match 'editors,' 'editor,' or 'eds.,' possibly after a comma; capture the remainder as venue
4690
+ editors_match = re.search(r"(?:^|,)\s*(?:editors?|eds?\.?|editor)\s*,\s*(.+)$", venue_text, re.IGNORECASE)
4691
+ if editors_match:
4692
+ venue_text = editors_match.group(1).strip()
4693
+
4679
4694
  # Extract venue from complex editor strings (e.g. "In Smith, J.; and Doe, K., eds., Conference Name, volume 1")
4680
4695
  # This handles patterns like "In [authors], eds., [venue], [optional metadata]" (case-insensitive)
4681
4696
  editor_match = re.search(r'in\s+[^,]+(?:,\s*[^,]*)*,\s*eds?\.,\s*(.+?)(?:,\s*volume\s*\d+|,\s*pp?\.|$)', venue_text, re.IGNORECASE)
@@ -4702,7 +4717,9 @@ def normalize_venue_for_display(venue: str) -> str:
4702
4717
  prefixes_to_remove = [
4703
4718
  r'^\d{4}\s+\d+(st|nd|rd|th)\s+', # "2012 IEEE/RSJ"
4704
4719
  r'^\d{4}\s+', # "2024 "
4705
- r'^proceedings\s+(of\s+)?(the\s+)?((acm|ieee|usenix|aaai|sigcomm|sigkdd|sigmod|sigops|vldb|osdi|sosp|eurosys)\s+)*(\d+(st|nd|rd|th)\s+)?', # "Proceedings of the [ORG] [ORG] 29th"
4720
+ # Remove 'Proceedings of [the] [ORG]* [ordinal]*' only when followed by at least one word
4721
+ # This avoids cutting a venue down to just 'Proceedings of the'
4722
+ r'^proceedings\s+of\s+(?!the\s*$)(?:the\s+)?(?:(?:acm|ieee|usenix|aaai|sigcomm|sigkdd|sigmod|sigops|vldb|osdi|sosp|eurosys)\s+)*(?:\d+(?:st|nd|rd|th)\s+)?',
4706
4723
  r'^proc\.\s+of\s+(the\s+)?(\d+(st|nd|rd|th)\s+)?(ieee\s+)?', # "Proc. of the IEEE" (require "of")
4707
4724
  r'^procs\.\s+of\s+(the\s+)?(\d+(st|nd|rd|th)\s+)?(ieee\s+)?', # "Procs. of the IEEE" (require "of")
4708
4725
  r'^in\s+',
@@ -4741,4 +4758,8 @@ def normalize_venue_for_display(venue: str) -> str:
4741
4758
  venue_text = re.sub(r'\s+', ' ', venue_text) # Normalize whitespace
4742
4759
  venue_text = venue_text.strip()
4743
4760
 
4761
+ # If what's left is too generic (e.g., just 'Proceedings of the'), treat as no venue
4762
+ if venue_text.lower() in {"proceedings of the", "proceedings of"}:
4763
+ return ""
4764
+
4744
4765
  return venue_text