academic-refchecker 2.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. academic_refchecker-2.0.7.dist-info/METADATA +738 -0
  2. academic_refchecker-2.0.7.dist-info/RECORD +64 -0
  3. academic_refchecker-2.0.7.dist-info/WHEEL +5 -0
  4. academic_refchecker-2.0.7.dist-info/entry_points.txt +3 -0
  5. academic_refchecker-2.0.7.dist-info/licenses/LICENSE +21 -0
  6. academic_refchecker-2.0.7.dist-info/top_level.txt +2 -0
  7. backend/__init__.py +21 -0
  8. backend/__main__.py +11 -0
  9. backend/cli.py +64 -0
  10. backend/concurrency.py +100 -0
  11. backend/database.py +711 -0
  12. backend/main.py +1367 -0
  13. backend/models.py +99 -0
  14. backend/refchecker_wrapper.py +1126 -0
  15. backend/static/assets/index-2P6L_39v.css +1 -0
  16. backend/static/assets/index-hk21nqxR.js +25 -0
  17. backend/static/favicon.svg +6 -0
  18. backend/static/index.html +15 -0
  19. backend/static/vite.svg +1 -0
  20. backend/thumbnail.py +517 -0
  21. backend/websocket_manager.py +104 -0
  22. refchecker/__init__.py +13 -0
  23. refchecker/__main__.py +11 -0
  24. refchecker/__version__.py +3 -0
  25. refchecker/checkers/__init__.py +17 -0
  26. refchecker/checkers/crossref.py +541 -0
  27. refchecker/checkers/enhanced_hybrid_checker.py +563 -0
  28. refchecker/checkers/github_checker.py +326 -0
  29. refchecker/checkers/local_semantic_scholar.py +540 -0
  30. refchecker/checkers/openalex.py +513 -0
  31. refchecker/checkers/openreview_checker.py +984 -0
  32. refchecker/checkers/pdf_paper_checker.py +493 -0
  33. refchecker/checkers/semantic_scholar.py +764 -0
  34. refchecker/checkers/webpage_checker.py +938 -0
  35. refchecker/config/__init__.py +1 -0
  36. refchecker/config/logging.conf +36 -0
  37. refchecker/config/settings.py +170 -0
  38. refchecker/core/__init__.py +7 -0
  39. refchecker/core/db_connection_pool.py +141 -0
  40. refchecker/core/parallel_processor.py +415 -0
  41. refchecker/core/refchecker.py +5838 -0
  42. refchecker/database/__init__.py +6 -0
  43. refchecker/database/download_semantic_scholar_db.py +1725 -0
  44. refchecker/llm/__init__.py +0 -0
  45. refchecker/llm/base.py +376 -0
  46. refchecker/llm/providers.py +911 -0
  47. refchecker/scripts/__init__.py +1 -0
  48. refchecker/scripts/start_vllm_server.py +121 -0
  49. refchecker/services/__init__.py +8 -0
  50. refchecker/services/pdf_processor.py +268 -0
  51. refchecker/utils/__init__.py +27 -0
  52. refchecker/utils/arxiv_utils.py +462 -0
  53. refchecker/utils/author_utils.py +179 -0
  54. refchecker/utils/biblatex_parser.py +584 -0
  55. refchecker/utils/bibliography_utils.py +332 -0
  56. refchecker/utils/bibtex_parser.py +411 -0
  57. refchecker/utils/config_validator.py +262 -0
  58. refchecker/utils/db_utils.py +210 -0
  59. refchecker/utils/doi_utils.py +190 -0
  60. refchecker/utils/error_utils.py +482 -0
  61. refchecker/utils/mock_objects.py +211 -0
  62. refchecker/utils/text_utils.py +5057 -0
  63. refchecker/utils/unicode_utils.py +335 -0
  64. refchecker/utils/url_utils.py +307 -0
@@ -0,0 +1,482 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Error Utilities for Reference Checking
4
+
5
+ This module provides standardized error and warning creation functions
6
+ for reference checkers.
7
+ """
8
+
9
+ from typing import Dict, List, Any, Optional
10
+
11
+
12
+ def print_labeled_multiline(label: str, text: str) -> None:
13
+ """
14
+ Print a multi-line message with consistent label formatting.
15
+
16
+ This function ensures consistent indentation for all error and warning messages,
17
+ regardless of emoji width differences in the labels.
18
+
19
+ Args:
20
+ label: The label (e.g., "❌ Error", "⚠️ Warning")
21
+ text: The multi-line text to print
22
+ """
23
+ prefix = f" {label}: "
24
+ lines = (text or "").splitlines() or [""]
25
+
26
+ # Print the first line with the label prefix
27
+ print(prefix + lines[0])
28
+
29
+ # Print subsequent lines with fixed indentation to ensure consistency
30
+ # Use fixed 19-character indentation to align regardless of emoji width
31
+ fixed_indent = " " * 15
32
+ for line in lines[1:]:
33
+ print(fixed_indent + line)
34
+
35
+
36
+ def format_three_line_mismatch(mismatch_type: str, left: str, right: str) -> str:
37
+ """
38
+ Format a three-line mismatch message with fixed indentation.
39
+
40
+ This creates a clean, consistently formatted mismatch message that separates
41
+ the mismatch type from the values being compared:
42
+
43
+ Example:
44
+ Title mismatch:
45
+ cited: 'Cited Title'
46
+ actual: 'Correct Title'
47
+
48
+ Args:
49
+ mismatch_type: The type of mismatch (e.g., "Author 2 mismatch", "Title mismatch")
50
+ left: The cited/incorrect value
51
+ right: The correct value
52
+
53
+ Returns:
54
+ Three-line formatted mismatch message
55
+ """
56
+ # Ensure mismatch_type ends with a colon
57
+ if not mismatch_type.endswith(":"):
58
+ mismatch_type = mismatch_type.rstrip() + ":"
59
+
60
+ # Use fixed indentation for labels, keeping detail column aligned
61
+ label_indent = " " # 7 spaces to indent labels
62
+
63
+ return f"{mismatch_type}\n{label_indent}cited: {left}\n{label_indent}actual: {right}"
64
+
65
+
66
+ def format_title_mismatch(cited_title: str, verified_title: str) -> str:
67
+ """
68
+ Format a three-line title mismatch message.
69
+
70
+ Output format:
71
+ Title mismatch:
72
+ 'Cited Title'
73
+ vs: 'Correct Title'
74
+ """
75
+ return format_three_line_mismatch("Title mismatch", cited_title, verified_title)
76
+
77
+
78
+ def format_year_mismatch(cited_year: int | str, correct_year: int | str) -> str:
79
+ """
80
+ Three-line year mismatch message.
81
+ """
82
+ return format_three_line_mismatch("Year mismatch", str(cited_year), str(correct_year))
83
+
84
+
85
+ def format_doi_mismatch(cited_doi: str, correct_doi: str) -> str:
86
+ """
87
+ Three-line DOI mismatch message.
88
+ """
89
+ return format_three_line_mismatch("DOI mismatch", str(cited_doi), str(correct_doi))
90
+
91
+ def create_author_error(error_details: str, correct_authors: List[Dict[str, str]]) -> Dict[str, str]:
92
+ """
93
+ Create a standardized author error dictionary.
94
+
95
+ Args:
96
+ error_details: Description of the author error
97
+ correct_authors: List of correct author data from database
98
+
99
+ Returns:
100
+ Standardized error dictionary
101
+ """
102
+ return {
103
+ 'error_type': 'author',
104
+ 'error_details': error_details,
105
+ 'ref_authors_correct': ', '.join([author.get('name', '') for author in correct_authors])
106
+ }
107
+
108
+
109
+ def create_year_warning(cited_year: int, correct_year: int) -> Dict[str, Any]:
110
+ """
111
+ Create a standardized year warning dictionary.
112
+
113
+ Args:
114
+ cited_year: Year as cited in the reference
115
+ correct_year: Correct year from database
116
+
117
+ Returns:
118
+ Standardized warning dictionary
119
+ """
120
+ return {
121
+ 'warning_type': 'year',
122
+ 'warning_details': format_year_mismatch(cited_year, correct_year),
123
+ 'ref_year_correct': correct_year
124
+ }
125
+
126
+
127
+ def create_year_missing_error(correct_year: int) -> Dict[str, Any]:
128
+ """
129
+ Create a standardized error for missing year in reference.
130
+
131
+ Args:
132
+ correct_year: Correct year from database
133
+
134
+ Returns:
135
+ Standardized error dictionary
136
+ """
137
+ return {
138
+ 'error_type': 'year',
139
+ 'error_details': f"Year missing: should include '{correct_year}'",
140
+ 'ref_year_correct': correct_year
141
+ }
142
+
143
+
144
+ def validate_year(cited_year: Optional[int], paper_year: Optional[int],
145
+ year_tolerance: int = 1, use_flexible_validation: bool = False,
146
+ context: Optional[Dict[str, Any]] = None) -> Optional[Dict[str, Any]]:
147
+ """
148
+ Validate year field and return appropriate warning if needed.
149
+
150
+ This function handles:
151
+ - Year mismatch (with configurable tolerance)
152
+ - Missing year in reference
153
+
154
+ Args:
155
+ cited_year: Year as cited in the reference (may be None)
156
+ paper_year: Correct year from database/API (may be None)
157
+ year_tolerance: Maximum allowed difference between years (default 1)
158
+ use_flexible_validation: If True, use is_year_substantially_different for more context-aware checking
159
+ context: Optional context dict for flexible validation (e.g., {'arxiv_match': True})
160
+
161
+ Returns:
162
+ Warning dictionary if year issue found, None otherwise
163
+ """
164
+ if not paper_year:
165
+ # Can't validate without a known correct year
166
+ return None
167
+
168
+ if cited_year and paper_year:
169
+ if use_flexible_validation:
170
+ # Use the more sophisticated validation from text_utils
171
+ from refchecker.utils.text_utils import is_year_substantially_different
172
+ is_different, warning_message = is_year_substantially_different(
173
+ cited_year, paper_year, context or {}
174
+ )
175
+ if is_different and warning_message:
176
+ return create_year_warning(cited_year, paper_year)
177
+ else:
178
+ # Simple tolerance-based validation
179
+ if abs(cited_year - paper_year) > year_tolerance:
180
+ return create_year_warning(cited_year, paper_year)
181
+ elif not cited_year and paper_year:
182
+ # Reference is missing a year but paper has one
183
+ return create_year_missing_error(paper_year)
184
+
185
+ return None
186
+
187
+
188
+ def create_doi_error(cited_doi: str, correct_doi: str) -> Optional[Dict[str, str]]:
189
+ """
190
+ Create a standardized DOI error or warning dictionary.
191
+
192
+ If the cited DOI resolves (is valid), this returns a warning instead of an error,
193
+ since papers can have multiple valid DOIs (e.g., arXiv DOI vs conference DOI).
194
+
195
+ Args:
196
+ cited_doi: DOI as cited in the reference
197
+ correct_doi: Correct DOI from database
198
+
199
+ Returns:
200
+ Standardized error/warning dictionary if DOIs differ, None if they match after cleaning
201
+ """
202
+ from refchecker.utils.doi_utils import validate_doi_resolves, compare_dois
203
+
204
+ # Use compare_dois which handles normalization (case, prefixes, trailing punctuation)
205
+ if compare_dois(cited_doi, correct_doi):
206
+ return None
207
+
208
+ # DOIs are different - determine if this should be error or warning
209
+ # If cited DOI resolves, it's likely a valid alternate DOI
210
+ # Treat as warning instead of error
211
+ if validate_doi_resolves(cited_doi):
212
+ return {
213
+ 'warning_type': 'doi',
214
+ 'warning_details': format_doi_mismatch(cited_doi, correct_doi),
215
+ 'ref_doi_correct': correct_doi
216
+ }
217
+ else:
218
+ return {
219
+ 'error_type': 'doi',
220
+ 'error_details': format_doi_mismatch(cited_doi, correct_doi),
221
+ 'ref_doi_correct': correct_doi
222
+ }
223
+
224
+
225
+ def create_title_error(error_details: str, correct_title: str) -> Dict[str, str]:
226
+ """
227
+ Create a standardized title error dictionary.
228
+
229
+ Args:
230
+ error_details: Description of the title error
231
+ correct_title: Correct title from database
232
+
233
+ Returns:
234
+ Standardized error dictionary
235
+ """
236
+ return {
237
+ 'error_type': 'title',
238
+ 'error_details': error_details,
239
+ 'ref_title_correct': correct_title
240
+ }
241
+
242
+
243
+ def clean_venue_for_comparison(venue: str) -> str:
244
+ """
245
+ Clean venue name for display in warnings using the shared normalization logic.
246
+
247
+ Args:
248
+ venue: Raw venue string
249
+
250
+ Returns:
251
+ Cleaned venue name suitable for display
252
+ """
253
+ from refchecker.utils.text_utils import normalize_venue_for_display
254
+ return normalize_venue_for_display(venue)
255
+
256
+
257
+ def format_missing_venue(correct_venue: str) -> str:
258
+ """
259
+ Format a missing venue message with only the actual value.
260
+ """
261
+ # Only show the actual venue with indented label
262
+ label_indent = " " # 7 spaces to indent labels
263
+ return f"Missing venue:\n{label_indent}actual: {correct_venue}"
264
+
265
+
266
+ def create_venue_warning(cited_venue: str, correct_venue: str) -> Dict[str, str]:
267
+ """
268
+ Create a standardized venue warning dictionary.
269
+
270
+ Args:
271
+ cited_venue: Venue as cited in the reference
272
+ correct_venue: Correct venue from database
273
+
274
+ Returns:
275
+ Standardized warning dictionary
276
+ """
277
+ # Clean both venues for display in the warning
278
+ clean_cited = clean_venue_for_comparison(cited_venue)
279
+ clean_correct = clean_venue_for_comparison(correct_venue)
280
+
281
+ # If cited venue cleans to empty, treat as missing venue instead of mismatch
282
+ if not clean_cited and clean_correct:
283
+ return {
284
+ 'error_type': 'venue',
285
+ 'error_details': format_missing_venue(clean_correct),
286
+ 'ref_venue_correct': correct_venue
287
+ }
288
+
289
+ return {
290
+ 'warning_type': 'venue',
291
+ 'warning_details': format_three_line_mismatch("Venue mismatch", clean_cited, clean_correct),
292
+ 'ref_venue_correct': correct_venue
293
+ }
294
+
295
+
296
+ def format_venue_mismatch(cited_venue: str, verified_venue: str) -> str:
297
+ """
298
+ Format a three-line venue mismatch message with cleaned venue names.
299
+ """
300
+ clean_cited = clean_venue_for_comparison(cited_venue)
301
+ clean_verified = clean_venue_for_comparison(verified_venue)
302
+ return format_three_line_mismatch("Venue mismatch", clean_cited, clean_verified)
303
+
304
+
305
+ def create_url_error(error_details: str, correct_url: Optional[str] = None) -> Dict[str, str]:
306
+ """
307
+ Create a standardized URL error dictionary.
308
+
309
+ Args:
310
+ error_details: Description of the URL error
311
+ correct_url: Correct URL from database (optional)
312
+
313
+ Returns:
314
+ Standardized error dictionary
315
+ """
316
+ error_dict = {
317
+ 'error_type': 'url',
318
+ 'error_details': error_details
319
+ }
320
+
321
+ if correct_url:
322
+ error_dict['ref_url_correct'] = correct_url
323
+
324
+ return error_dict
325
+
326
+
327
+ def create_generic_error(error_type: str, error_details: str, **kwargs) -> Dict[str, Any]:
328
+ """
329
+ Create a generic error dictionary with custom fields.
330
+
331
+ Args:
332
+ error_type: Type of error (e.g., 'author', 'doi', 'title')
333
+ error_details: Description of the error
334
+ **kwargs: Additional fields to include in the error dictionary
335
+
336
+ Returns:
337
+ Standardized error dictionary
338
+ """
339
+ error_dict = {
340
+ 'error_type': error_type,
341
+ 'error_details': error_details
342
+ }
343
+
344
+ error_dict.update(kwargs)
345
+ return error_dict
346
+
347
+
348
+ def create_generic_warning(warning_type: str, warning_details: str, **kwargs) -> Dict[str, Any]:
349
+ """
350
+ Create a generic warning dictionary with custom fields.
351
+
352
+ Args:
353
+ warning_type: Type of warning (e.g., 'year', 'venue')
354
+ warning_details: Description of the warning
355
+ **kwargs: Additional fields to include in the warning dictionary
356
+
357
+ Returns:
358
+ Standardized warning dictionary
359
+ """
360
+ warning_dict = {
361
+ 'warning_type': warning_type,
362
+ 'warning_details': warning_details
363
+ }
364
+
365
+ warning_dict.update(kwargs)
366
+ return warning_dict
367
+
368
+
369
+ def create_generic_info(info_type: str, info_details: str, **kwargs) -> Dict[str, Any]:
370
+ """
371
+ Create a generic info dictionary with custom fields.
372
+
373
+ Args:
374
+ info_type: Type of info (e.g., 'url')
375
+ info_details: Description of the information
376
+ **kwargs: Additional fields to include in the info dictionary
377
+
378
+ Returns:
379
+ Standardized info dictionary
380
+ """
381
+ info_dict = {
382
+ 'info_type': info_type,
383
+ 'info_details': info_details
384
+ }
385
+
386
+ info_dict.update(kwargs)
387
+ return info_dict
388
+
389
+
390
+ def create_info_message(reference, reason, arxiv_url=None):
391
+ """Create a standardized info message structure."""
392
+ info_msg = {
393
+ 'info_type': 'arxiv_url_available',
394
+ 'reference': reference,
395
+ 'reason': reason
396
+ }
397
+ if arxiv_url:
398
+ info_msg['arxiv_url'] = arxiv_url
399
+ return info_msg
400
+
401
+
402
+ def format_author_mismatch(author_number: int, cited_author: str, correct_author: str) -> str:
403
+ """
404
+ Format a three-line author mismatch message.
405
+
406
+ Args:
407
+ author_number: The author position (1-based)
408
+ cited_author: The cited author name
409
+ correct_author: The correct author name
410
+
411
+ Returns:
412
+ Formatted three-line author mismatch message
413
+ """
414
+ return format_three_line_mismatch(f"Author {author_number} mismatch", cited_author, correct_author)
415
+
416
+
417
+ def format_first_author_mismatch(cited_author: str, correct_author: str) -> str:
418
+ """
419
+ Format a three-line first author mismatch message.
420
+
421
+ Args:
422
+ cited_author: The cited first author name
423
+ correct_author: The correct first author name
424
+
425
+ Returns:
426
+ Formatted three-line first author mismatch message
427
+ """
428
+ return format_three_line_mismatch("First author mismatch", cited_author, correct_author)
429
+
430
+
431
+ def format_author_count_mismatch(cited_count: int, correct_count: int, cited_authors: list, correct_authors: list) -> str:
432
+ """
433
+ Format an author count mismatch message showing all cited and correct authors.
434
+
435
+ Args:
436
+ cited_count: Number of cited authors
437
+ correct_count: Number of correct authors
438
+ cited_authors: List of cited author names
439
+ correct_authors: List of correct author names
440
+
441
+ Returns:
442
+ Formatted multi-line author count mismatch message
443
+ """
444
+ # Create the header with count information
445
+ header = f"Author count mismatch: {cited_count} cited vs {correct_count} correct"
446
+
447
+ # Format author lists
448
+ cited_list = ", ".join(cited_authors) if cited_authors else "None"
449
+ correct_list = ", ".join(correct_authors) if correct_authors else "None"
450
+
451
+ # Use the same format as other mismatches
452
+ return format_three_line_mismatch(header, cited_list, correct_list)
453
+
454
+
455
+ def format_authors_list(authors: List[Dict[str, str]]) -> str:
456
+ """
457
+ Format a list of author dictionaries into a readable string.
458
+
459
+ Args:
460
+ authors: List of author data dictionaries
461
+
462
+ Returns:
463
+ Formatted authors string
464
+ """
465
+ if not authors:
466
+ return ""
467
+
468
+ return ', '.join([author.get('name', '') for author in authors])
469
+
470
+
471
+ def validate_error_dict(error_dict: Dict[str, Any], required_fields: List[str]) -> bool:
472
+ """
473
+ Validate that an error dictionary contains all required fields.
474
+
475
+ Args:
476
+ error_dict: Error dictionary to validate
477
+ required_fields: List of required field names
478
+
479
+ Returns:
480
+ True if all required fields are present, False otherwise
481
+ """
482
+ return all(field in error_dict for field in required_fields)
@@ -0,0 +1,211 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Mock objects and test utilities for ArXiv Reference Checker
4
+ Provides shared mock objects for testing and development
5
+ """
6
+
7
+ from typing import Dict, Any, List, Optional
8
+ from dataclasses import dataclass
9
+
10
+
11
+ @dataclass
12
+ class MockPaper:
13
+ """Mock paper object for testing"""
14
+ title: str
15
+ authors: List[str]
16
+ abstract: str = ""
17
+ year: Optional[int] = None
18
+ venue: str = ""
19
+ url: str = ""
20
+ doi: str = ""
21
+ arxiv_id: str = ""
22
+ pdf_path: str = ""
23
+
24
+ def to_dict(self) -> Dict[str, Any]:
25
+ """Convert to dictionary format"""
26
+ return {
27
+ 'title': self.title,
28
+ 'authors': self.authors,
29
+ 'abstract': self.abstract,
30
+ 'year': self.year,
31
+ 'venue': self.venue,
32
+ 'url': self.url,
33
+ 'doi': self.doi,
34
+ 'arxiv_id': self.arxiv_id,
35
+ 'pdf_path': self.pdf_path
36
+ }
37
+
38
+
39
+ @dataclass
40
+ class MockReference:
41
+ """Mock reference object for testing"""
42
+ raw_text: str
43
+ title: str = ""
44
+ authors: List[str] = None
45
+ venue: str = ""
46
+ year: Optional[int] = None
47
+ url: str = ""
48
+ doi: str = ""
49
+ arxiv_id: str = ""
50
+
51
+ def __post_init__(self):
52
+ if self.authors is None:
53
+ self.authors = []
54
+
55
+ def to_dict(self) -> Dict[str, Any]:
56
+ """Convert to dictionary format"""
57
+ return {
58
+ 'raw_text': self.raw_text,
59
+ 'title': self.title,
60
+ 'authors': self.authors,
61
+ 'venue': self.venue,
62
+ 'year': self.year,
63
+ 'url': self.url,
64
+ 'doi': self.doi,
65
+ 'arxiv_id': self.arxiv_id
66
+ }
67
+
68
+
69
+ class MockLLMProvider:
70
+ """Mock LLM provider for testing"""
71
+
72
+ def __init__(self, config: Dict[str, Any] = None):
73
+ self.config = config or {}
74
+ self.responses = []
75
+ self.call_count = 0
76
+
77
+ def set_responses(self, responses: List[List[str]]):
78
+ """Set predefined responses for testing"""
79
+ self.responses = responses
80
+
81
+ def extract_references(self, bibliography_text: str) -> List[str]:
82
+ """Return mock references"""
83
+ if self.call_count < len(self.responses):
84
+ response = self.responses[self.call_count]
85
+ self.call_count += 1
86
+ return response
87
+ return []
88
+
89
+ def is_available(self) -> bool:
90
+ """Always available for testing"""
91
+ return True
92
+
93
+
94
+ class MockSemanticScholarAPI:
95
+ """Mock Semantic Scholar API for testing"""
96
+
97
+ def __init__(self):
98
+ self.responses = {}
99
+ self.call_count = 0
100
+
101
+ def set_response(self, query: str, response: Dict[str, Any]):
102
+ """Set response for specific query"""
103
+ self.responses[query] = response
104
+
105
+ def search_papers(self, query: str) -> Dict[str, Any]:
106
+ """Return mock search results"""
107
+ self.call_count += 1
108
+ return self.responses.get(query, {'data': []})
109
+
110
+ def get_paper_details(self, paper_id: str) -> Dict[str, Any]:
111
+ """Return mock paper details"""
112
+ self.call_count += 1
113
+ return self.responses.get(paper_id, {})
114
+
115
+
116
+ class MockArxivAPI:
117
+ """Mock ArXiv API for testing"""
118
+
119
+ def __init__(self):
120
+ self.responses = {}
121
+ self.call_count = 0
122
+
123
+ def set_response(self, arxiv_id: str, response: Dict[str, Any]):
124
+ """Set response for specific ArXiv ID"""
125
+ self.responses[arxiv_id] = response
126
+
127
+ def get_paper_metadata(self, arxiv_id: str) -> Dict[str, Any]:
128
+ """Return mock paper metadata"""
129
+ self.call_count += 1
130
+ return self.responses.get(arxiv_id, {})
131
+
132
+
133
+ def create_mock_config() -> Dict[str, Any]:
134
+ """Create a mock configuration for testing"""
135
+ return {
136
+ 'llm': {
137
+ 'provider': 'mock',
138
+ 'model': 'test-model',
139
+ 'max_tokens': 1000,
140
+ 'temperature': 0.1,
141
+ 'timeout': 30
142
+ },
143
+ 'processing': {
144
+ 'max_concurrent_requests': 5,
145
+ 'request_delay': 0.1,
146
+ 'retry_attempts': 3
147
+ },
148
+ 'apis': {
149
+ 'semantic_scholar': {
150
+ 'base_url': 'https://api.semanticscholar.org',
151
+ 'timeout': 30
152
+ },
153
+ 'arxiv': {
154
+ 'base_url': 'https://arxiv.org/api',
155
+ 'timeout': 30
156
+ }
157
+ }
158
+ }
159
+
160
+
161
+ def create_mock_paper(title: str = "Test Paper", authors: List[str] = None) -> MockPaper:
162
+ """Create a mock paper with default values"""
163
+ if authors is None:
164
+ authors = ["Test Author"]
165
+
166
+ return MockPaper(
167
+ title=title,
168
+ authors=authors,
169
+ abstract="This is a test abstract.",
170
+ year=2023,
171
+ venue="Test Conference",
172
+ url="https://example.com/paper",
173
+ doi="10.1000/test",
174
+ arxiv_id="2023.12345"
175
+ )
176
+
177
+
178
+ def create_mock_reference(raw_text: str = "Test Reference") -> MockReference:
179
+ """Create a mock reference with default values"""
180
+ return MockReference(
181
+ raw_text=raw_text,
182
+ title="Test Reference Title",
183
+ authors=["Test Author"],
184
+ venue="Test Journal",
185
+ year=2023,
186
+ url="https://example.com/reference",
187
+ doi="10.1000/test-ref"
188
+ )
189
+
190
+
191
+ def create_mock_bibliography() -> str:
192
+ """Create mock bibliography text for testing"""
193
+ return """
194
+ [1] Smith, J., & Doe, J. (2023). A comprehensive study of machine learning. Journal of AI Research, 15(3), 123-145.
195
+
196
+ [2] Johnson, A. (2022). Deep learning fundamentals. In Proceedings of the International Conference on Neural Networks (pp. 67-89).
197
+
198
+ [3] Brown, M., Davis, K., & Wilson, L. (2023). Natural language processing advances. arXiv preprint arXiv:2023.45678.
199
+
200
+ [4] Taylor, R. (2021). Computer vision applications. IEEE Transactions on Pattern Analysis, 43(7), 1456-1478.
201
+ """
202
+
203
+
204
+ def create_mock_extracted_references() -> List[str]:
205
+ """Create mock extracted references for testing"""
206
+ return [
207
+ "Smith, J., & Doe, J. (2023). A comprehensive study of machine learning. Journal of AI Research, 15(3), 123-145.",
208
+ "Johnson, A. (2022). Deep learning fundamentals. In Proceedings of the International Conference on Neural Networks (pp. 67-89).",
209
+ "Brown, M., Davis, K., & Wilson, L. (2023). Natural language processing advances. arXiv preprint arXiv:2023.45678.",
210
+ "Taylor, R. (2021). Computer vision applications. IEEE Transactions on Pattern Analysis, 43(7), 1456-1478."
211
+ ]