academic-refchecker 1.2.43__tar.gz → 1.2.44__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {academic_refchecker-1.2.43/src/academic_refchecker.egg-info → academic_refchecker-1.2.44}/PKG-INFO +1 -1
  2. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/__version__.py +1 -1
  3. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44/src/academic_refchecker.egg-info}/PKG-INFO +1 -1
  4. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/core/refchecker.py +1 -1
  5. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/utils/text_utils.py +1 -1
  6. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/utils/url_utils.py +17 -0
  7. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/LICENSE +0 -0
  8. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/MANIFEST.in +0 -0
  9. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/README.md +0 -0
  10. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/pyproject.toml +0 -0
  11. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/requirements.txt +0 -0
  12. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/scripts/download_db.py +0 -0
  13. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/scripts/run_tests.py +0 -0
  14. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/scripts/start_vllm_server.py +0 -0
  15. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/setup.cfg +0 -0
  16. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/__init__.py +0 -0
  17. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/academic_refchecker.egg-info/SOURCES.txt +0 -0
  18. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/academic_refchecker.egg-info/dependency_links.txt +0 -0
  19. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/academic_refchecker.egg-info/entry_points.txt +0 -0
  20. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/academic_refchecker.egg-info/requires.txt +0 -0
  21. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/academic_refchecker.egg-info/top_level.txt +0 -0
  22. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/checkers/__init__.py +0 -0
  23. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/checkers/crossref.py +0 -0
  24. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/checkers/enhanced_hybrid_checker.py +0 -0
  25. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/checkers/github_checker.py +0 -0
  26. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/checkers/local_semantic_scholar.py +0 -0
  27. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/checkers/openalex.py +0 -0
  28. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/checkers/openreview_checker.py +0 -0
  29. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/checkers/semantic_scholar.py +0 -0
  30. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/checkers/webpage_checker.py +0 -0
  31. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/config/__init__.py +0 -0
  32. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/config/logging.conf +0 -0
  33. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/config/settings.py +0 -0
  34. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/core/__init__.py +0 -0
  35. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/core/db_connection_pool.py +0 -0
  36. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/core/parallel_processor.py +0 -0
  37. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/database/__init__.py +0 -0
  38. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/database/download_semantic_scholar_db.py +0 -0
  39. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/llm/__init__.py +0 -0
  40. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/llm/base.py +0 -0
  41. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/llm/providers.py +0 -0
  42. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/scripts/__init__.py +0 -0
  43. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/scripts/start_vllm_server.py +0 -0
  44. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/services/__init__.py +0 -0
  45. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/services/pdf_processor.py +0 -0
  46. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/utils/__init__.py +0 -0
  47. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/utils/arxiv_utils.py +0 -0
  48. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/utils/author_utils.py +0 -0
  49. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/utils/biblatex_parser.py +0 -0
  50. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/utils/bibliography_utils.py +0 -0
  51. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/utils/bibtex_parser.py +0 -0
  52. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/utils/config_validator.py +0 -0
  53. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/utils/db_utils.py +0 -0
  54. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/utils/doi_utils.py +0 -0
  55. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/utils/error_utils.py +0 -0
  56. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/utils/mock_objects.py +0 -0
  57. {academic_refchecker-1.2.43 → academic_refchecker-1.2.44}/src/utils/unicode_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academic-refchecker
3
- Version: 1.2.43
3
+ Version: 1.2.44
4
4
  Summary: A comprehensive tool for validating reference accuracy in academic papers
5
5
  Author-email: Mark Russinovich <markrussinovich@hotmail.com>
6
6
  License-Expression: MIT
@@ -1,3 +1,3 @@
1
1
  """Version information for RefChecker."""
2
2
 
3
- __version__ = "1.2.43"
3
+ __version__ = "1.2.44"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academic-refchecker
3
- Version: 1.2.43
3
+ Version: 1.2.44
4
4
  Summary: A comprehensive tool for validating reference accuracy in academic papers
5
5
  Author-email: Mark Russinovich <markrussinovich@hotmail.com>
6
6
  License-Expression: MIT
@@ -5181,7 +5181,7 @@ class ArxivReferenceChecker:
5181
5181
  from utils.text_utils import format_authors_for_display
5182
5182
  authors = format_authors_for_display(reference.get('authors', []))
5183
5183
  year = reference.get('year', '')
5184
- venue = reference.get('venue', '')
5184
+ venue = reference.get('venue', '') or reference.get('journal', '')
5185
5185
  url = reference.get('url', '')
5186
5186
  doi = reference.get('doi', '')
5187
5187
  # Extract actual reference number from raw text for accurate display
@@ -4594,7 +4594,7 @@ def normalize_venue_for_display(venue: str) -> str:
4594
4594
  prefixes_to_remove = [
4595
4595
  r'^\d{4}\s+\d+(st|nd|rd|th)\s+', # "2012 IEEE/RSJ"
4596
4596
  r'^\d{4}\s+', # "2024 "
4597
- r'^proceedings\s+(of\s+)?(the\s+)?(\d+(st|nd|rd|th)\s+)?(ieee\s+)?', # "Proceedings of the IEEE"
4597
+ r'^proceedings\s+(of\s+)?(the\s+)?((acm|ieee|usenix|aaai|sigcomm|sigkdd|sigmod|sigops|vldb|osdi|sosp|eurosys)\s+)*(\d+(st|nd|rd|th)\s+)?', # "Proceedings of the [ORG] [ORG] 29th"
4598
4598
  r'^proc\.\s+of\s+(the\s+)?(\d+(st|nd|rd|th)\s+)?(ieee\s+)?', # "Proc. of the IEEE" (require "of")
4599
4599
  r'^procs\.\s+of\s+(the\s+)?(\d+(st|nd|rd|th)\s+)?(ieee\s+)?', # "Procs. of the IEEE" (require "of")
4600
4600
  r'^in\s+',
@@ -214,6 +214,7 @@ def clean_url(url: str) -> str:
214
214
  This function handles:
215
215
  - Whitespace trimming
216
216
  - Malformed LaTeX URL wrappers like \\url{https://...}
217
+ - Markdown-style links like [text](url)
217
218
  - Trailing punctuation from academic references
218
219
  - DOI URL query parameter cleanup
219
220
 
@@ -237,6 +238,14 @@ def clean_url(url: str) -> str:
237
238
  if url_match:
238
239
  url = url_match.group(1)
239
240
 
241
+ # Handle markdown-style links like [text](url) or [url](url)
242
+ # e.g., "[https://example.com](https://example.com)" -> "https://example.com"
243
+ markdown_pattern = r'\[([^\]]*)\]\((https?://[^)]+)\)'
244
+ markdown_match = re.search(markdown_pattern, url)
245
+ if markdown_match:
246
+ # Use the URL from parentheses
247
+ url = markdown_match.group(2)
248
+
240
249
  # Remove trailing punctuation that's commonly part of sentence structure
241
250
  # but preserve legitimate URL characters
242
251
  url = url.rstrip('.,;!?)')
@@ -280,6 +289,14 @@ def clean_url_punctuation(url: str) -> str:
280
289
  if url_match:
281
290
  url = url_match.group(1)
282
291
 
292
+ # Handle markdown-style links like [text](url) or [url](url)
293
+ # e.g., "[https://example.com](https://example.com)" -> "https://example.com"
294
+ markdown_pattern = r'\[([^\]]*)\]\((https?://[^)]+)\)'
295
+ markdown_match = re.search(markdown_pattern, url)
296
+ if markdown_match:
297
+ # Use the URL from parentheses
298
+ url = markdown_match.group(2)
299
+
283
300
  # Remove trailing punctuation that's commonly part of sentence structure
284
301
  # but preserve legitimate URL characters
285
302
  url = url.rstrip('.,;!?)')