academic-refchecker 1.2.65__py3-none-any.whl → 1.2.66__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
backend/main.py ADDED
@@ -0,0 +1,1266 @@
1
+ """
2
+ FastAPI application for RefChecker Web UI
3
+ """
4
+ import asyncio
5
+ import uuid
6
+ import os
7
+ import tempfile
8
+ from pathlib import Path
9
+ from typing import Optional
10
+ from fastapi import FastAPI, WebSocket, WebSocketDisconnect, UploadFile, File, Form, HTTPException, Body
11
+ from fastapi.middleware.cors import CORSMiddleware
12
+ from fastapi.responses import FileResponse
13
+ from pydantic import BaseModel
14
+ import logging
15
+
16
+ import aiosqlite
17
+ from .database import db
18
+ from .websocket_manager import manager
19
+ from .refchecker_wrapper import ProgressRefChecker
20
+ from .models import CheckRequest, CheckHistoryItem
21
+ from .concurrency import init_limiter, get_limiter, DEFAULT_MAX_CONCURRENT
22
+ from .thumbnail import (
23
+ generate_arxiv_thumbnail_async,
24
+ generate_arxiv_preview_async,
25
+ generate_pdf_thumbnail_async,
26
+ generate_pdf_preview_async,
27
+ get_text_thumbnail_async,
28
+ get_thumbnail_cache_path,
29
+ get_preview_cache_path
30
+ )
31
+
32
+ # Configure logging
33
+ logging.basicConfig(
34
+ level=logging.INFO,
35
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
36
+ )
37
+ logger = logging.getLogger(__name__)
38
+
39
+
40
+ # Pydantic models for requests
41
+ class LLMConfigCreate(BaseModel):
42
+ name: str
43
+ provider: str
44
+ model: Optional[str] = None
45
+ api_key: Optional[str] = None
46
+ endpoint: Optional[str] = None
47
+
48
+
49
+ class LLMConfigUpdate(BaseModel):
50
+ name: Optional[str] = None
51
+ provider: Optional[str] = None
52
+ model: Optional[str] = None
53
+ api_key: Optional[str] = None
54
+ endpoint: Optional[str] = None
55
+
56
+
57
+ class LLMConfigValidate(BaseModel):
58
+ """Model for validating LLM config without requiring name"""
59
+ provider: str
60
+ model: Optional[str] = None
61
+ api_key: Optional[str] = None
62
+ endpoint: Optional[str] = None
63
+
64
+
65
+ class CheckLabelUpdate(BaseModel):
66
+ custom_label: str
67
+
68
+
69
+ # Create FastAPI app
70
+ app = FastAPI(title="RefChecker Web UI API", version="1.0.0")
71
+
72
+ # Configure CORS for local development
73
+ app.add_middleware(
74
+ CORSMiddleware,
75
+ allow_origins=["http://localhost:5173", "http://127.0.0.1:5173", "http://localhost:5174", "http://localhost:5175", "http://127.0.0.1:5174", "http://127.0.0.1:5175"],
76
+ allow_credentials=True,
77
+ allow_methods=["*"],
78
+ allow_headers=["*"],
79
+ )
80
+
81
+ # Track active check sessions
82
+ active_checks = {}
83
+
84
+
85
+ def _session_id_for_check(check_id: int) -> Optional[str]:
86
+ """Helper to find the session_id for an in-progress check."""
87
+ for session_id, meta in active_checks.items():
88
+ if meta.get("check_id") == check_id:
89
+ return session_id
90
+ return None
91
+
92
+
93
+ @app.on_event("startup")
94
+ async def startup_event():
95
+ """Initialize database and settings on startup"""
96
+ await db.init_db()
97
+
98
+ # Initialize global concurrency limiter with saved setting
99
+ try:
100
+ concurrency_setting = await db.get_setting("max_concurrent_checks")
101
+ max_concurrent = int(concurrency_setting) if concurrency_setting else DEFAULT_MAX_CONCURRENT
102
+ await init_limiter(max_concurrent)
103
+ logger.info(f"Initialized global concurrency limiter with max={max_concurrent}")
104
+ except Exception as e:
105
+ logger.warning(f"Failed to load concurrency setting, using default: {e}")
106
+ await init_limiter(DEFAULT_MAX_CONCURRENT)
107
+
108
+ # Mark any previously in-progress checks as cancelled (e.g., after restart)
109
+ try:
110
+ stale = await db.cancel_stale_in_progress()
111
+ if stale:
112
+ logger.info(f"Cancelled {stale} stale in-progress checks on startup")
113
+ except Exception as e:
114
+ logger.error(f"Failed to cancel stale checks: {e}")
115
+ logger.info("Database initialized")
116
+
117
+
118
+ @app.get("/")
119
+ async def root():
120
+ """Health check endpoint"""
121
+ return {"status": "ok", "message": "RefChecker Web UI API"}
122
+
123
+
124
+ @app.get("/api/health")
125
+ async def health():
126
+ """Health check endpoint"""
127
+ return {"status": "healthy"}
128
+
129
+
130
+ @app.websocket("/api/ws/{session_id}")
131
+ async def websocket_endpoint(websocket: WebSocket, session_id: str):
132
+ """WebSocket endpoint for real-time updates"""
133
+ await manager.connect(websocket, session_id)
134
+ try:
135
+ # Keep connection alive and handle incoming messages
136
+ while True:
137
+ data = await websocket.receive_text()
138
+ # Echo back or handle commands if needed
139
+ logger.debug(f"Received WebSocket message: {data}")
140
+ except WebSocketDisconnect:
141
+ manager.disconnect(websocket, session_id)
142
+ logger.info(f"WebSocket disconnected: {session_id}")
143
+
144
+
145
+ @app.post("/api/check")
146
+ async def start_check(
147
+ source_type: str = Form(...),
148
+ source_value: Optional[str] = Form(None),
149
+ file: Optional[UploadFile] = File(None),
150
+ source_text: Optional[str] = Form(None),
151
+ llm_config_id: Optional[int] = Form(None),
152
+ llm_provider: str = Form("anthropic"),
153
+ llm_model: Optional[str] = Form(None),
154
+ use_llm: bool = Form(True)
155
+ ):
156
+ """
157
+ Start a new reference check
158
+
159
+ Args:
160
+ source_type: 'url' or 'file'
161
+ source_value: URL or ArXiv ID (for url type)
162
+ file: Uploaded file (for file type)
163
+ llm_config_id: ID of the LLM config to use (for retrieving API key)
164
+ llm_provider: LLM provider to use
165
+ llm_model: Specific model to use
166
+ use_llm: Whether to use LLM for extraction
167
+
168
+ Returns:
169
+ Session ID for tracking progress via WebSocket
170
+ """
171
+ try:
172
+ # Generate session ID
173
+ session_id = str(uuid.uuid4())
174
+
175
+ # Retrieve API key from config if config_id provided
176
+ api_key = None
177
+ if llm_config_id and use_llm:
178
+ config = await db.get_llm_config_by_id(llm_config_id)
179
+ if config:
180
+ api_key = config.get('api_key')
181
+ llm_provider = config.get('provider', llm_provider)
182
+ llm_model = config.get('model') or llm_model
183
+ logger.info(f"Using LLM config {llm_config_id}: {llm_provider}/{llm_model}")
184
+ else:
185
+ logger.warning(f"LLM config {llm_config_id} not found")
186
+
187
+ # Handle file upload or pasted text
188
+ paper_source = source_value
189
+ paper_title = "Processing..." # Placeholder title until we parse the paper
190
+ if source_type == "file" and file:
191
+ # Save uploaded file to permanent uploads directory
192
+ uploads_dir = Path(__file__).parent / "uploads"
193
+ uploads_dir.mkdir(parents=True, exist_ok=True)
194
+ # Use check-specific naming to avoid conflicts
195
+ safe_filename = file.filename.replace("/", "_").replace("\\", "_")
196
+ file_path = uploads_dir / f"{session_id}_{safe_filename}"
197
+ with open(file_path, "wb") as f:
198
+ content = await file.read()
199
+ f.write(content)
200
+ paper_source = str(file_path)
201
+ paper_title = file.filename
202
+ elif source_type == "text":
203
+ if not source_text:
204
+ raise HTTPException(status_code=400, detail="No text provided")
205
+ # Save pasted text to a file for later retrieval and thumbnail generation
206
+ text_dir = Path(tempfile.gettempdir()) / "refchecker_texts"
207
+ text_dir.mkdir(parents=True, exist_ok=True)
208
+ text_file_path = text_dir / f"pasted_{session_id}.txt"
209
+ with open(text_file_path, "w", encoding="utf-8") as f:
210
+ f.write(source_text)
211
+ paper_source = str(text_file_path)
212
+ paper_title = "Pasted Text"
213
+ elif source_type == "url":
214
+ paper_title = source_value
215
+
216
+ if not paper_source:
217
+ raise HTTPException(status_code=400, detail="No source provided")
218
+
219
+ # Create history entry immediately (in_progress status)
220
+ check_id = await db.create_pending_check(
221
+ paper_title=paper_title,
222
+ paper_source=paper_source,
223
+ source_type=source_type,
224
+ llm_provider=llm_provider if use_llm else None,
225
+ llm_model=llm_model if use_llm else None
226
+ )
227
+ logger.info(f"Created pending check with ID {check_id}")
228
+
229
+ # Start check in background
230
+ cancel_event = asyncio.Event()
231
+ task = asyncio.create_task(
232
+ run_check(session_id, check_id, paper_source, source_type, llm_provider, llm_model, api_key, use_llm, cancel_event)
233
+ )
234
+ active_checks[session_id] = {"task": task, "cancel_event": cancel_event, "check_id": check_id}
235
+
236
+ return {
237
+ "session_id": session_id,
238
+ "check_id": check_id,
239
+ "message": "Check started",
240
+ "source": paper_source
241
+ }
242
+
243
+ except Exception as e:
244
+ logger.error(f"Error starting check: {e}", exc_info=True)
245
+ raise HTTPException(status_code=500, detail=str(e))
246
+
247
+
248
+ async def run_check(
249
+ session_id: str,
250
+ check_id: int,
251
+ paper_source: str,
252
+ source_type: str,
253
+ llm_provider: str,
254
+ llm_model: Optional[str],
255
+ api_key: Optional[str],
256
+ use_llm: bool,
257
+ cancel_event: asyncio.Event
258
+ ):
259
+ """
260
+ Run reference check in background and emit progress updates
261
+
262
+ Args:
263
+ session_id: Unique session ID
264
+ check_id: Database ID for this check
265
+ paper_source: Paper URL, ArXiv ID, or file path
266
+ source_type: 'url' or 'file'
267
+ llm_provider: LLM provider
268
+ llm_model: Specific model
269
+ api_key: API key for the LLM provider
270
+ use_llm: Whether to use LLM
271
+ """
272
+ try:
273
+ # Wait for WebSocket to connect (give client time to establish connection)
274
+ logger.info(f"Waiting for WebSocket connection for session {session_id}...")
275
+ for _ in range(30): # Wait up to 3 seconds
276
+ if session_id in manager.active_connections:
277
+ logger.info(f"WebSocket connected for session {session_id}")
278
+ break
279
+ await asyncio.sleep(0.1)
280
+ else:
281
+ logger.warning(f"WebSocket not connected after 3s for session {session_id}, proceeding anyway")
282
+
283
+ # Track accumulated results for incremental saving
284
+ accumulated_results = []
285
+ last_save_count = 0 # Track when we last saved to reduce lock contention
286
+
287
+ # Create progress callback that also saves to DB
288
+ async def progress_callback(event_type: str, data: dict):
289
+ nonlocal accumulated_results, last_save_count
290
+ await manager.send_message(session_id, event_type, data)
291
+
292
+ # Save reference results to DB as they come in
293
+ if event_type == "reference_result":
294
+ accumulated_results.append(data)
295
+
296
+ # Save progress to DB every 3 references to reduce lock contention
297
+ if event_type == "summary_update":
298
+ current_count = len(accumulated_results)
299
+ # Save every 3 references, or on first result
300
+ if current_count - last_save_count >= 3 or (current_count == 1 and last_save_count == 0):
301
+ try:
302
+ await db.update_check_progress(
303
+ check_id=check_id,
304
+ total_refs=data.get("total_refs", 0),
305
+ errors_count=data.get("errors_count", 0),
306
+ warnings_count=data.get("warnings_count", 0),
307
+ suggestions_count=data.get("suggestions_count", 0),
308
+ unverified_count=data.get("unverified_count", 0),
309
+ refs_with_errors=data.get("refs_with_errors", 0),
310
+ refs_with_warnings_only=data.get("refs_with_warnings_only", 0),
311
+ refs_verified=data.get("refs_verified", 0),
312
+ results=accumulated_results
313
+ )
314
+ last_save_count = current_count
315
+ except Exception as e:
316
+ logger.warning(f"Failed to save progress: {e}")
317
+
318
+ # Create title update callback
319
+ async def title_update_callback(check_id: int, paper_title: str):
320
+ await db.update_check_title(check_id, paper_title)
321
+ logger.info(f"Updated paper title for check {check_id}: {paper_title}")
322
+
323
+ # Create bibliography source callback to save bbl/bib content
324
+ async def bibliography_source_callback(check_id: int, content: str, arxiv_id: str):
325
+ try:
326
+ # Save the bibliography content to a file
327
+ bib_dir = Path(__file__).parent / "uploads" / "bibliography"
328
+ bib_dir.mkdir(parents=True, exist_ok=True)
329
+ bib_path = bib_dir / f"{check_id}_{arxiv_id}_bibliography.txt"
330
+ with open(bib_path, "w", encoding="utf-8") as f:
331
+ f.write(content)
332
+ # Update the database with the path
333
+ await db.update_check_bibliography_source(check_id, str(bib_path))
334
+ logger.info(f"Saved bibliography source for check {check_id}: {bib_path}")
335
+ except Exception as e:
336
+ logger.warning(f"Failed to save bibliography source: {e}")
337
+
338
+ # Create checker with progress callback
339
+ checker = ProgressRefChecker(
340
+ llm_provider=llm_provider,
341
+ llm_model=llm_model,
342
+ api_key=api_key,
343
+ use_llm=use_llm,
344
+ progress_callback=progress_callback,
345
+ cancel_event=cancel_event,
346
+ check_id=check_id,
347
+ title_update_callback=title_update_callback,
348
+ bibliography_source_callback=bibliography_source_callback
349
+ )
350
+
351
+ # Run the check
352
+ result = await checker.check_paper(paper_source, source_type)
353
+
354
+ # For file uploads, don't overwrite the original filename with "Unknown Paper"
355
+ # The correct title was already set in the database when the check was created
356
+ result_title = result["paper_title"]
357
+ if source_type == "file" and result_title == "Unknown Paper":
358
+ result_title = None # Don't update title
359
+
360
+ # Update the existing check entry with results
361
+ await db.update_check_results(
362
+ check_id=check_id,
363
+ paper_title=result_title,
364
+ total_refs=result["summary"]["total_refs"],
365
+ errors_count=result["summary"]["errors_count"],
366
+ warnings_count=result["summary"]["warnings_count"],
367
+ suggestions_count=result["summary"].get("suggestions_count", 0),
368
+ unverified_count=result["summary"]["unverified_count"],
369
+ refs_with_errors=result["summary"].get("refs_with_errors", 0),
370
+ refs_with_warnings_only=result["summary"].get("refs_with_warnings_only", 0),
371
+ refs_verified=result["summary"].get("refs_verified", 0),
372
+ results=result["references"],
373
+ status='completed',
374
+ extraction_method=result.get("extraction_method")
375
+ )
376
+
377
+ # Generate thumbnail for file uploads
378
+ if source_type == "file":
379
+ try:
380
+ # Generate and cache thumbnail
381
+ if paper_source.lower().endswith('.pdf'):
382
+ thumbnail_path = await generate_pdf_thumbnail_async(paper_source)
383
+ else:
384
+ thumbnail_path = await get_text_thumbnail_async(check_id, "", paper_source)
385
+ if thumbnail_path:
386
+ await db.update_check_thumbnail(check_id, thumbnail_path)
387
+ logger.info(f"Generated thumbnail for check {check_id}: {thumbnail_path}")
388
+ except Exception as thumb_error:
389
+ logger.warning(f"Failed to generate thumbnail for check {check_id}: {thumb_error}")
390
+
391
+ # Note: We keep uploaded files for later access via /api/file/{check_id}
392
+
393
+ except asyncio.CancelledError:
394
+ logger.info(f"Check cancelled: {session_id}")
395
+ await db.update_check_status(check_id, 'cancelled')
396
+ await manager.send_message(session_id, "cancelled", {"message": "Check cancelled", "check_id": check_id})
397
+ except Exception as e:
398
+ logger.error(f"Error in run_check: {e}", exc_info=True)
399
+ await db.update_check_status(check_id, 'error')
400
+ await manager.send_message(session_id, "error", {
401
+ "message": f"Check failed: {str(e)}",
402
+ "details": type(e).__name__,
403
+ "check_id": check_id
404
+ })
405
+ finally:
406
+ active_checks.pop(session_id, None)
407
+
408
+
409
+ @app.get("/api/history")
410
+ async def get_history(limit: int = 50):
411
+ """Get check history"""
412
+ try:
413
+ history = await db.get_history(limit)
414
+
415
+ enriched = []
416
+ for item in history:
417
+ if item.get("status") == "in_progress":
418
+ session_id = _session_id_for_check(item["id"])
419
+ if session_id:
420
+ item["session_id"] = session_id
421
+ enriched.append(item)
422
+
423
+ return enriched # Return array directly
424
+ except Exception as e:
425
+ logger.error(f"Error getting history: {e}", exc_info=True)
426
+ raise HTTPException(status_code=500, detail=str(e))
427
+
428
+
429
+ @app.get("/api/history/{check_id}")
430
+ async def get_check_detail(check_id: int):
431
+ """Get detailed results for a specific check"""
432
+ try:
433
+ check = await db.get_check_by_id(check_id)
434
+ if not check:
435
+ raise HTTPException(status_code=404, detail="Check not found")
436
+
437
+ if check.get("status") == "in_progress":
438
+ session_id = _session_id_for_check(check_id)
439
+ if session_id:
440
+ check["session_id"] = session_id
441
+ return check
442
+ except HTTPException:
443
+ raise
444
+ except Exception as e:
445
+ logger.error(f"Error getting check detail: {e}", exc_info=True)
446
+ raise HTTPException(status_code=500, detail=str(e))
447
+
448
+
449
+ @app.get("/api/thumbnail/{check_id}")
450
+ async def get_thumbnail(check_id: int):
451
+ """
452
+ Get or generate a thumbnail for a check.
453
+
454
+ Returns the thumbnail image file if available, or generates one on-demand.
455
+ For ArXiv papers, downloads the PDF and generates a thumbnail of the first page.
456
+ For uploaded PDFs, generates a thumbnail from the file.
457
+ For pasted text, returns a placeholder thumbnail.
458
+ """
459
+ try:
460
+ check = await db.get_check_by_id(check_id)
461
+ if not check:
462
+ raise HTTPException(status_code=404, detail="Check not found")
463
+
464
+ # Check if we already have a cached thumbnail path
465
+ thumbnail_path = check.get('thumbnail_path')
466
+ if thumbnail_path and os.path.exists(thumbnail_path):
467
+ return FileResponse(
468
+ thumbnail_path,
469
+ media_type="image/png",
470
+ headers={"Cache-Control": "public, max-age=86400"} # Cache for 1 day
471
+ )
472
+
473
+ # Generate thumbnail based on source type
474
+ paper_source = check.get('paper_source', '')
475
+ source_type = check.get('source_type', 'url')
476
+
477
+ # Try to extract ArXiv ID
478
+ import re
479
+ arxiv_id_pattern = r'(\d{4}\.\d{4,5})(v\d+)?'
480
+ arxiv_match = re.search(arxiv_id_pattern, paper_source)
481
+
482
+ # Check if this is a direct PDF URL (not ArXiv)
483
+ is_direct_pdf_url = (
484
+ source_type == 'url' and
485
+ paper_source.lower().endswith('.pdf') and
486
+ 'arxiv.org' not in paper_source.lower()
487
+ )
488
+
489
+ if is_direct_pdf_url:
490
+ # Generate thumbnail from direct PDF URL
491
+ logger.info(f"Generating thumbnail from PDF URL: {paper_source}")
492
+ import hashlib
493
+ import tempfile
494
+ import urllib.request
495
+
496
+ pdf_hash = hashlib.md5(paper_source.encode()).hexdigest()[:12]
497
+ pdf_path = os.path.join(tempfile.gettempdir(), f"refchecker_pdf_{pdf_hash}.pdf")
498
+
499
+ # Download PDF if not already cached
500
+ if not os.path.exists(pdf_path):
501
+ try:
502
+ await asyncio.to_thread(lambda: urllib.request.urlretrieve(paper_source, pdf_path))
503
+ except Exception as e:
504
+ logger.error(f"Failed to download PDF for thumbnail: {e}")
505
+ thumbnail_path = await get_text_thumbnail_async(check_id, "PDF")
506
+ pdf_path = None
507
+
508
+ if pdf_path and os.path.exists(pdf_path):
509
+ thumbnail_path = await generate_pdf_thumbnail_async(pdf_path)
510
+ else:
511
+ thumbnail_path = await get_text_thumbnail_async(check_id, "PDF")
512
+ elif arxiv_match:
513
+ # Generate thumbnail from ArXiv paper
514
+ arxiv_id = arxiv_match.group(1)
515
+ logger.info(f"Generating thumbnail for ArXiv paper: {arxiv_id}")
516
+ thumbnail_path = await generate_arxiv_thumbnail_async(arxiv_id, check_id)
517
+ elif source_type == 'file' and paper_source.lower().endswith('.pdf'):
518
+ # Generate thumbnail from uploaded PDF
519
+ if os.path.exists(paper_source):
520
+ logger.info(f"Generating thumbnail from PDF: {paper_source}")
521
+ thumbnail_path = await generate_pdf_thumbnail_async(paper_source)
522
+ else:
523
+ # PDF file no longer exists, use placeholder
524
+ thumbnail_path = await get_text_thumbnail_async(check_id, "PDF")
525
+ elif source_type == 'file':
526
+ # For non-PDF file uploads, generate thumbnail with file content
527
+ logger.info(f"Generating text content thumbnail for uploaded file check {check_id}")
528
+ if os.path.exists(paper_source):
529
+ thumbnail_path = await get_text_thumbnail_async(check_id, "", paper_source)
530
+ else:
531
+ thumbnail_path = await get_text_thumbnail_async(check_id, "Uploaded file")
532
+ elif source_type == 'text':
533
+ # Generate thumbnail with actual text content for pasted text
534
+ logger.info(f"Generating text content thumbnail for check {check_id}")
535
+ # paper_source is now a file path for text sources
536
+ thumbnail_path = await get_text_thumbnail_async(check_id, "", paper_source)
537
+ else:
538
+ # Default placeholder for other sources
539
+ thumbnail_path = await get_text_thumbnail_async(check_id, source_type)
540
+
541
+ if thumbnail_path and os.path.exists(thumbnail_path):
542
+ # Cache the thumbnail path in the database
543
+ await db.update_check_thumbnail(check_id, thumbnail_path)
544
+
545
+ return FileResponse(
546
+ thumbnail_path,
547
+ media_type="image/png",
548
+ headers={"Cache-Control": "public, max-age=86400"}
549
+ )
550
+ else:
551
+ raise HTTPException(status_code=404, detail="Could not generate thumbnail")
552
+
553
+ except HTTPException:
554
+ raise
555
+ except Exception as e:
556
+ logger.error(f"Error getting thumbnail: {e}", exc_info=True)
557
+ raise HTTPException(status_code=500, detail=str(e))
558
+
559
+
560
+ @app.get("/api/preview/{check_id}")
561
+ async def get_preview(check_id: int):
562
+ """
563
+ Get or generate a high-resolution preview for a check.
564
+
565
+ Returns a larger preview image suitable for overlay display.
566
+ For ArXiv papers, downloads the PDF and generates a preview of the first page.
567
+ For uploaded PDFs, generates a preview from the file.
568
+ """
569
+ try:
570
+ check = await db.get_check_by_id(check_id)
571
+ if not check:
572
+ raise HTTPException(status_code=404, detail="Check not found")
573
+
574
+ # Generate preview based on source type
575
+ paper_source = check.get('paper_source', '')
576
+ source_type = check.get('source_type', 'url')
577
+
578
+ # Try to extract ArXiv ID
579
+ import re
580
+ arxiv_id_pattern = r'(\d{4}\.\d{4,5})(v\d+)?'
581
+ arxiv_match = re.search(arxiv_id_pattern, paper_source)
582
+
583
+ # Check if this is a direct PDF URL (not ArXiv)
584
+ is_direct_pdf_url = (
585
+ source_type == 'url' and
586
+ paper_source.lower().endswith('.pdf') and
587
+ 'arxiv.org' not in paper_source.lower()
588
+ )
589
+
590
+ preview_path = None
591
+
592
+ if is_direct_pdf_url:
593
+ # Generate preview from direct PDF URL
594
+ logger.info(f"Generating preview from PDF URL: {paper_source}")
595
+ import hashlib
596
+ import tempfile
597
+ import urllib.request
598
+
599
+ pdf_hash = hashlib.md5(paper_source.encode()).hexdigest()[:12]
600
+ pdf_path = os.path.join(tempfile.gettempdir(), f"refchecker_pdf_{pdf_hash}.pdf")
601
+
602
+ # Download PDF if not already cached
603
+ if not os.path.exists(pdf_path):
604
+ try:
605
+ await asyncio.to_thread(lambda: urllib.request.urlretrieve(paper_source, pdf_path))
606
+ except Exception as e:
607
+ logger.error(f"Failed to download PDF for preview: {e}")
608
+ pdf_path = None
609
+
610
+ if pdf_path and os.path.exists(pdf_path):
611
+ preview_path = await generate_pdf_preview_async(pdf_path)
612
+ elif arxiv_match:
613
+ # Generate preview from ArXiv paper
614
+ arxiv_id = arxiv_match.group(1)
615
+ logger.info(f"Generating preview for ArXiv paper: {arxiv_id}")
616
+ preview_path = await generate_arxiv_preview_async(arxiv_id, check_id)
617
+ elif source_type == 'file' and paper_source.lower().endswith('.pdf'):
618
+ # Generate preview from uploaded PDF
619
+ if os.path.exists(paper_source):
620
+ logger.info(f"Generating preview from PDF: {paper_source}")
621
+ preview_path = await generate_pdf_preview_async(paper_source)
622
+
623
+ if preview_path and os.path.exists(preview_path):
624
+ return FileResponse(
625
+ preview_path,
626
+ media_type="image/png",
627
+ headers={"Cache-Control": "public, max-age=86400"} # Cache for 1 day
628
+ )
629
+ else:
630
+ # Fall back to thumbnail if preview can't be generated
631
+ raise HTTPException(status_code=404, detail="Could not generate preview")
632
+
633
+ except HTTPException:
634
+ raise
635
+ except Exception as e:
636
+ logger.error(f"Error getting preview: {e}", exc_info=True)
637
+ raise HTTPException(status_code=500, detail=str(e))
638
+
639
+
640
+ @app.get("/api/text/{check_id}")
641
+ async def get_pasted_text(check_id: int):
642
+ """
643
+ Get the pasted text content for a check.
644
+
645
+ Returns the text file content as plain text for viewing.
646
+ """
647
+ try:
648
+ check = await db.get_check_by_id(check_id)
649
+ if not check:
650
+ raise HTTPException(status_code=404, detail="Check not found")
651
+
652
+ source_type = check.get('source_type', '')
653
+ paper_source = check.get('paper_source', '')
654
+
655
+ if source_type != 'text':
656
+ raise HTTPException(status_code=400, detail="This check is not from pasted text")
657
+
658
+ # paper_source should now be a file path
659
+ if os.path.exists(paper_source):
660
+ return FileResponse(
661
+ paper_source,
662
+ media_type="text/plain; charset=utf-8",
663
+ filename="pasted_bibliography.txt",
664
+ headers={
665
+ "Content-Type": "text/plain; charset=utf-8",
666
+ "Cache-Control": "public, max-age=3600"
667
+ }
668
+ )
669
+ else:
670
+ # Fallback: if paper_source is the actual text content (legacy)
671
+ from fastapi.responses import PlainTextResponse
672
+ return PlainTextResponse(
673
+ paper_source,
674
+ headers={"Cache-Control": "public, max-age=3600"}
675
+ )
676
+
677
+ except HTTPException:
678
+ raise
679
+ except Exception as e:
680
+ logger.error(f"Error getting pasted text: {e}", exc_info=True)
681
+ raise HTTPException(status_code=500, detail=str(e))
682
+
683
+
684
+ @app.get("/api/file/{check_id}")
685
+ async def get_uploaded_file(check_id: int):
686
+ """
687
+ Get the uploaded file content for a check.
688
+
689
+ Returns the file for viewing/download.
690
+ """
691
+ try:
692
+ check = await db.get_check_by_id(check_id)
693
+ if not check:
694
+ raise HTTPException(status_code=404, detail="Check not found")
695
+
696
+ source_type = check.get('source_type', '')
697
+ paper_source = check.get('paper_source', '')
698
+ paper_title = check.get('paper_title', 'uploaded_file')
699
+
700
+ if source_type != 'file':
701
+ raise HTTPException(status_code=400, detail="This check is not from an uploaded file")
702
+
703
+ if os.path.exists(paper_source):
704
+ # Determine media type based on file extension
705
+ media_type = "application/octet-stream"
706
+ if paper_source.lower().endswith('.pdf'):
707
+ media_type = "application/pdf"
708
+ elif paper_source.lower().endswith('.txt'):
709
+ media_type = "text/plain; charset=utf-8"
710
+ elif paper_source.lower().endswith('.bib'):
711
+ media_type = "text/plain; charset=utf-8"
712
+ elif paper_source.lower().endswith('.tex'):
713
+ media_type = "text/plain; charset=utf-8"
714
+
715
+ return FileResponse(
716
+ paper_source,
717
+ media_type=media_type,
718
+ filename=paper_title,
719
+ headers={"Cache-Control": "public, max-age=3600"}
720
+ )
721
+ else:
722
+ raise HTTPException(status_code=404, detail="File no longer exists")
723
+
724
+ except HTTPException:
725
+ raise
726
+ except Exception as e:
727
+ logger.error(f"Error getting uploaded file: {e}", exc_info=True)
728
+ raise HTTPException(status_code=500, detail=str(e))
729
+
730
+
731
+ @app.get("/api/bibliography/{check_id}")
732
+ async def get_bibliography_source(check_id: int):
733
+ """
734
+ Get the bibliography source content (bbl/bib file) for a check.
735
+
736
+ Returns the bibliography file content as plain text for viewing.
737
+ This is the actual source file used to extract references (from ArXiv source or pasted text).
738
+ """
739
+ try:
740
+ check = await db.get_check_by_id(check_id)
741
+ if not check:
742
+ raise HTTPException(status_code=404, detail="Check not found")
743
+
744
+ bibliography_source_path = check.get('bibliography_source_path', '')
745
+ extraction_method = check.get('extraction_method', '')
746
+ source_type = check.get('source_type', '')
747
+ paper_source = check.get('paper_source', '')
748
+
749
+ # First check if we have a saved bibliography source file
750
+ if bibliography_source_path and os.path.exists(bibliography_source_path):
751
+ return FileResponse(
752
+ bibliography_source_path,
753
+ media_type="text/plain; charset=utf-8",
754
+ filename=f"bibliography_{check_id}.{extraction_method or 'txt'}",
755
+ headers={
756
+ "Content-Type": "text/plain; charset=utf-8",
757
+ "Cache-Control": "public, max-age=3600"
758
+ }
759
+ )
760
+
761
+ # Fall back to pasted text source if source_type is 'text' and it's bbl/bib
762
+ if source_type == 'text' and extraction_method in ['bbl', 'bib'] and os.path.exists(paper_source):
763
+ return FileResponse(
764
+ paper_source,
765
+ media_type="text/plain; charset=utf-8",
766
+ filename=f"bibliography_{check_id}.{extraction_method}",
767
+ headers={
768
+ "Content-Type": "text/plain; charset=utf-8",
769
+ "Cache-Control": "public, max-age=3600"
770
+ }
771
+ )
772
+
773
+ raise HTTPException(status_code=404, detail="Bibliography source not available for this check")
774
+
775
+ except HTTPException:
776
+ raise
777
+ except Exception as e:
778
+ logger.error(f"Error getting bibliography source: {e}", exc_info=True)
779
+ raise HTTPException(status_code=500, detail=str(e))
780
+
781
+
782
+ @app.post("/api/recheck/{check_id}")
783
+ async def recheck(check_id: int):
784
+ """Re-run a previous check"""
785
+ try:
786
+ # Get original check
787
+ original = await db.get_check_by_id(check_id)
788
+ if not original:
789
+ raise HTTPException(status_code=404, detail="Check not found")
790
+
791
+ # Generate new session ID
792
+ session_id = str(uuid.uuid4())
793
+
794
+ # Determine source type
795
+ source = original["paper_source"]
796
+ source_type = original.get("source_type") or (
797
+ "url" if source.startswith("http") or "arxiv" in source.lower() else "file"
798
+ )
799
+
800
+ llm_provider = original.get("llm_provider", "anthropic")
801
+ llm_model = original.get("llm_model")
802
+
803
+ # Create history entry immediately
804
+ new_check_id = await db.create_pending_check(
805
+ paper_title=original.get("paper_title", "Re-checking..."),
806
+ paper_source=source,
807
+ source_type=source_type,
808
+ llm_provider=llm_provider,
809
+ llm_model=llm_model
810
+ )
811
+
812
+ # Start check in background
813
+ cancel_event = asyncio.Event()
814
+ task = asyncio.create_task(
815
+ run_check(
816
+ session_id,
817
+ new_check_id,
818
+ source,
819
+ source_type,
820
+ llm_provider,
821
+ llm_model,
822
+ None, # API key will need to be retrieved separately
823
+ True,
824
+ cancel_event
825
+ )
826
+ )
827
+ active_checks[session_id] = {"task": task, "cancel_event": cancel_event, "check_id": new_check_id}
828
+
829
+ return {
830
+ "session_id": session_id,
831
+ "check_id": new_check_id,
832
+ "message": "Re-check started",
833
+ "original_id": check_id
834
+ }
835
+
836
+ except HTTPException:
837
+ raise
838
+ except Exception as e:
839
+ logger.error(f"Error rechecking: {e}", exc_info=True)
840
+ raise HTTPException(status_code=500, detail=str(e))
841
+
842
+
843
+ @app.post("/api/cancel/{session_id}")
844
+ async def cancel_check(session_id: str):
845
+ """Cancel an active check"""
846
+ active = active_checks.get(session_id)
847
+ if not active:
848
+ raise HTTPException(status_code=404, detail="Active check not found")
849
+ active["cancel_event"].set()
850
+ active["task"].cancel()
851
+ return {"message": "Cancellation requested"}
852
+
853
+
854
+ @app.delete("/api/history/{check_id}")
855
+ async def delete_check(check_id: int):
856
+ """Delete a check from history"""
857
+ try:
858
+ success = await db.delete_check(check_id)
859
+ if success:
860
+ return {"message": "Check deleted successfully"}
861
+ else:
862
+ raise HTTPException(status_code=404, detail="Check not found")
863
+ except HTTPException:
864
+ raise
865
+ except Exception as e:
866
+ logger.error(f"Error deleting check: {e}", exc_info=True)
867
+ raise HTTPException(status_code=500, detail=str(e))
868
+
869
+
870
+ @app.patch("/api/history/{check_id}")
871
+ async def update_check_label(check_id: int, update: CheckLabelUpdate):
872
+ """Update the custom label for a check"""
873
+ try:
874
+ success = await db.update_check_label(check_id, update.custom_label)
875
+ if success:
876
+ return {"message": "Label updated successfully"}
877
+ else:
878
+ raise HTTPException(status_code=404, detail="Check not found")
879
+ except HTTPException:
880
+ raise
881
+ except Exception as e:
882
+ logger.error(f"Error updating label: {e}", exc_info=True)
883
+ raise HTTPException(status_code=500, detail=str(e))
884
+
885
+
886
+ # LLM Configuration endpoints
887
+
888
+ @app.get("/api/llm-configs")
889
+ async def get_llm_configs():
890
+ """Get all LLM configurations (API keys are not returned)"""
891
+ try:
892
+ configs = await db.get_llm_configs()
893
+ return configs
894
+ except Exception as e:
895
+ logger.error(f"Error getting LLM configs: {e}", exc_info=True)
896
+ raise HTTPException(status_code=500, detail=str(e))
897
+
898
+
899
+ @app.post("/api/llm-configs")
900
+ async def create_llm_config(config: LLMConfigCreate):
901
+ """Create a new LLM configuration"""
902
+ try:
903
+ config_id = await db.create_llm_config(
904
+ name=config.name,
905
+ provider=config.provider,
906
+ model=config.model,
907
+ api_key=config.api_key,
908
+ endpoint=config.endpoint
909
+ )
910
+ # Return the created config (without API key)
911
+ return {
912
+ "id": config_id,
913
+ "name": config.name,
914
+ "provider": config.provider,
915
+ "model": config.model,
916
+ "endpoint": config.endpoint,
917
+ "is_default": False
918
+ }
919
+ except Exception as e:
920
+ logger.error(f"Error creating LLM config: {e}", exc_info=True)
921
+ raise HTTPException(status_code=500, detail=str(e))
922
+
923
+
924
+ @app.put("/api/llm-configs/{config_id}")
925
+ async def update_llm_config(config_id: int, config: LLMConfigUpdate):
926
+ """Update an existing LLM configuration"""
927
+ try:
928
+ success = await db.update_llm_config(
929
+ config_id=config_id,
930
+ name=config.name,
931
+ provider=config.provider,
932
+ model=config.model,
933
+ api_key=config.api_key,
934
+ endpoint=config.endpoint
935
+ )
936
+ if success:
937
+ # Get updated config
938
+ updated = await db.get_llm_configs()
939
+ updated_config = next((c for c in updated if c["id"] == config_id), None)
940
+ return updated_config or {"id": config_id, "message": "Updated"}
941
+ else:
942
+ raise HTTPException(status_code=404, detail="Config not found")
943
+ except HTTPException:
944
+ raise
945
+ except Exception as e:
946
+ logger.error(f"Error updating LLM config: {e}", exc_info=True)
947
+ raise HTTPException(status_code=500, detail=str(e))
948
+
949
+
950
+ @app.delete("/api/llm-configs/{config_id}")
951
+ async def delete_llm_config(config_id: int):
952
+ """Delete an LLM configuration"""
953
+ try:
954
+ success = await db.delete_llm_config(config_id)
955
+ if success:
956
+ return {"message": "Config deleted successfully"}
957
+ else:
958
+ raise HTTPException(status_code=404, detail="Config not found")
959
+ except HTTPException:
960
+ raise
961
+ except Exception as e:
962
+ logger.error(f"Error deleting LLM config: {e}", exc_info=True)
963
+ raise HTTPException(status_code=500, detail=str(e))
964
+
965
+
966
+ @app.post("/api/llm-configs/{config_id}/set-default")
967
+ async def set_default_llm_config(config_id: int):
968
+ """Set an LLM configuration as the default"""
969
+ try:
970
+ success = await db.set_default_llm_config(config_id)
971
+ if success:
972
+ return {"message": "Default config set successfully"}
973
+ else:
974
+ raise HTTPException(status_code=404, detail="Config not found")
975
+ except HTTPException:
976
+ raise
977
+ except Exception as e:
978
+ logger.error(f"Error setting default config: {e}", exc_info=True)
979
+ raise HTTPException(status_code=500, detail=str(e))
980
+
981
+
982
+ @app.post("/api/llm-configs/validate")
983
+ async def validate_llm_config(config: LLMConfigValidate):
984
+ """
985
+ Validate an LLM configuration by making a test API call.
986
+ Returns success or error message.
987
+ """
988
+ try:
989
+ import sys
990
+ from pathlib import Path
991
+ sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
992
+ from refchecker.llm.base import create_llm_provider
993
+
994
+ # Build config
995
+ llm_config = {}
996
+ if config.model:
997
+ llm_config['model'] = config.model
998
+ if config.api_key:
999
+ llm_config['api_key'] = config.api_key
1000
+ if config.endpoint:
1001
+ llm_config['endpoint'] = config.endpoint
1002
+
1003
+ # Try to create provider
1004
+ provider = create_llm_provider(config.provider, llm_config)
1005
+ if not provider:
1006
+ raise HTTPException(status_code=400, detail=f"Failed to create {config.provider} provider")
1007
+
1008
+ # Make a simple test call using _call_llm
1009
+ test_response = provider._call_llm("Say 'ok' if you can hear me.")
1010
+
1011
+ if test_response:
1012
+ return {"valid": True, "message": "Connection successful"}
1013
+ else:
1014
+ raise HTTPException(status_code=400, detail="Provider returned empty response")
1015
+
1016
+ except HTTPException:
1017
+ raise
1018
+ except Exception as e:
1019
+ error_msg = str(e)
1020
+ logger.error(f"LLM validation failed: {error_msg}")
1021
+ # Extract useful error message
1022
+ if "404" in error_msg and "model" in error_msg.lower():
1023
+ raise HTTPException(status_code=400, detail=f"Invalid model name. The model '{config.model}' was not found.")
1024
+ elif "401" in error_msg or "unauthorized" in error_msg.lower():
1025
+ raise HTTPException(status_code=400, detail="Invalid API key")
1026
+ elif "rate" in error_msg.lower():
1027
+ raise HTTPException(status_code=400, detail="Rate limited - but API key is valid")
1028
+ else:
1029
+ raise HTTPException(status_code=400, detail=f"Validation failed: {error_msg}")
1030
+
1031
+
1032
+ # Semantic Scholar API Key endpoints
1033
+
1034
+ class SemanticScholarKeyUpdate(BaseModel):
1035
+ api_key: str
1036
+
1037
+
1038
+ class SemanticScholarKeyValidate(BaseModel):
1039
+ api_key: str
1040
+
1041
+
1042
+ @app.post("/api/settings/semantic-scholar/validate")
1043
+ async def validate_semantic_scholar_key(data: SemanticScholarKeyValidate):
1044
+ """
1045
+ Validate a Semantic Scholar API key by making a test API call.
1046
+ Returns success or error message.
1047
+ """
1048
+ import httpx
1049
+
1050
+ try:
1051
+ if not data.api_key or not data.api_key.strip():
1052
+ raise HTTPException(status_code=400, detail="API key cannot be empty")
1053
+
1054
+ api_key = data.api_key.strip()
1055
+
1056
+ # Test the API key by making a simple search query
1057
+ # Using the paper search endpoint with a minimal query
1058
+ url = "https://api.semanticscholar.org/graph/v1/paper/search"
1059
+ headers = {
1060
+ "Accept": "application/json",
1061
+ "x-api-key": api_key
1062
+ }
1063
+ params = {
1064
+ "query": "test",
1065
+ "limit": 1,
1066
+ "fields": "title"
1067
+ }
1068
+
1069
+ async with httpx.AsyncClient(timeout=10.0) as client:
1070
+ response = await client.get(url, headers=headers, params=params)
1071
+
1072
+ if response.status_code == 200:
1073
+ return {"valid": True, "message": "API key is valid"}
1074
+ elif response.status_code == 401 or response.status_code == 403:
1075
+ raise HTTPException(status_code=400, detail="Invalid API key")
1076
+ elif response.status_code == 429:
1077
+ # Rate limited but key is valid
1078
+ return {"valid": True, "message": "API key is valid (rate limited)"}
1079
+ else:
1080
+ raise HTTPException(
1081
+ status_code=400,
1082
+ detail=f"API validation failed with status {response.status_code}"
1083
+ )
1084
+
1085
+ except HTTPException:
1086
+ raise
1087
+ except httpx.TimeoutException:
1088
+ raise HTTPException(status_code=400, detail="Connection timed out. Please try again.")
1089
+ except httpx.RequestError as e:
1090
+ logger.error(f"Semantic Scholar validation request error: {e}")
1091
+ raise HTTPException(status_code=400, detail=f"Connection error: {str(e)}")
1092
+ except Exception as e:
1093
+ logger.error(f"Semantic Scholar validation failed: {e}", exc_info=True)
1094
+ raise HTTPException(status_code=400, detail=f"Validation failed: {str(e)}")
1095
+
1096
+
1097
+ @app.get("/api/settings/semantic-scholar")
1098
+ async def get_semantic_scholar_key_status():
1099
+ """Check if Semantic Scholar API key is configured (does not return the key)"""
1100
+ try:
1101
+ has_key = await db.has_setting("semantic_scholar_api_key")
1102
+ return {"has_key": has_key}
1103
+ except Exception as e:
1104
+ logger.error(f"Error checking Semantic Scholar key: {e}", exc_info=True)
1105
+ raise HTTPException(status_code=500, detail=str(e))
1106
+
1107
+
1108
+ @app.put("/api/settings/semantic-scholar")
1109
+ async def set_semantic_scholar_key(data: SemanticScholarKeyUpdate):
1110
+ """Set or update the Semantic Scholar API key"""
1111
+ try:
1112
+ if not data.api_key or not data.api_key.strip():
1113
+ raise HTTPException(status_code=400, detail="API key cannot be empty")
1114
+
1115
+ await db.set_setting("semantic_scholar_api_key", data.api_key.strip())
1116
+ logger.info("Semantic Scholar API key updated")
1117
+ return {"message": "Semantic Scholar API key saved", "has_key": True}
1118
+ except HTTPException:
1119
+ raise
1120
+ except Exception as e:
1121
+ logger.error(f"Error saving Semantic Scholar key: {e}", exc_info=True)
1122
+ raise HTTPException(status_code=500, detail=str(e))
1123
+
1124
+
1125
+ @app.delete("/api/settings/semantic-scholar")
1126
+ async def delete_semantic_scholar_key():
1127
+ """Delete the Semantic Scholar API key"""
1128
+ try:
1129
+ await db.delete_setting("semantic_scholar_api_key")
1130
+ logger.info("Semantic Scholar API key deleted")
1131
+ return {"message": "Semantic Scholar API key deleted", "has_key": False}
1132
+ except Exception as e:
1133
+ logger.error(f"Error deleting Semantic Scholar key: {e}", exc_info=True)
1134
+ raise HTTPException(status_code=500, detail=str(e))
1135
+
1136
+
1137
+ # General Settings endpoints
1138
+
1139
+ class SettingUpdate(BaseModel):
1140
+ value: str
1141
+
1142
+
1143
+ @app.get("/api/settings")
1144
+ async def get_all_settings():
1145
+ """Get all application settings"""
1146
+ try:
1147
+ # Define all settings with their defaults and metadata
1148
+ settings_config = {
1149
+ "max_concurrent_checks": {
1150
+ "default": str(DEFAULT_MAX_CONCURRENT),
1151
+ "type": "number",
1152
+ "label": "Max Concurrent Checks",
1153
+ "description": "Maximum number of references to check simultaneously across all papers",
1154
+ "min": 1,
1155
+ "max": 20,
1156
+ "section": "Performance"
1157
+ }
1158
+ }
1159
+
1160
+ # Get current values from database
1161
+ settings = {}
1162
+ for key, config in settings_config.items():
1163
+ value = await db.get_setting(key)
1164
+ settings[key] = {
1165
+ "value": value if value is not None else config["default"],
1166
+ "default": config["default"],
1167
+ "type": config["type"],
1168
+ "label": config["label"],
1169
+ "description": config["description"],
1170
+ "section": config["section"]
1171
+ }
1172
+ # Include extra metadata for number types
1173
+ if config["type"] == "number":
1174
+ settings[key]["min"] = config.get("min")
1175
+ settings[key]["max"] = config.get("max")
1176
+
1177
+ return settings
1178
+ except Exception as e:
1179
+ logger.error(f"Error getting settings: {e}", exc_info=True)
1180
+ raise HTTPException(status_code=500, detail=str(e))
1181
+
1182
+
1183
+ @app.put("/api/settings/{setting_key}")
1184
+ async def update_setting(setting_key: str, update: SettingUpdate):
1185
+ """Update a specific setting"""
1186
+ try:
1187
+ # Validate the setting key
1188
+ valid_keys = {"max_concurrent_checks"}
1189
+ if setting_key not in valid_keys:
1190
+ raise HTTPException(status_code=400, detail=f"Unknown setting: {setting_key}")
1191
+
1192
+ # Apply setting-specific validation
1193
+ if setting_key == "max_concurrent_checks":
1194
+ try:
1195
+ value = int(update.value)
1196
+ if value < 1:
1197
+ value = 1
1198
+ if value > 50:
1199
+ value = 50
1200
+
1201
+ # Update the global limiter immediately
1202
+ limiter = get_limiter()
1203
+ await limiter.set_max_concurrent(value)
1204
+ logger.info(f"Updated global concurrency limit to {value}")
1205
+
1206
+ # Store the validated value
1207
+ await db.set_setting(setting_key, str(value))
1208
+
1209
+ return {"key": setting_key, "value": str(value), "message": "Setting updated"}
1210
+ except ValueError:
1211
+ raise HTTPException(status_code=400, detail="max_concurrent_checks must be a number")
1212
+
1213
+ # For other settings, just store the value
1214
+ await db.set_setting(setting_key, update.value)
1215
+ return {"key": setting_key, "value": update.value, "message": "Setting updated"}
1216
+
1217
+ except HTTPException:
1218
+ raise
1219
+ except Exception as e:
1220
+ logger.error(f"Error updating setting: {e}", exc_info=True)
1221
+ raise HTTPException(status_code=500, detail=str(e))
1222
+
1223
+
1224
+ # Debug/Admin endpoints
1225
+
1226
+ @app.delete("/api/admin/cache")
1227
+ async def clear_verification_cache():
1228
+ """Clear the verification cache"""
1229
+ try:
1230
+ count = await db.clear_verification_cache()
1231
+ logger.info(f"Cleared {count} entries from verification cache")
1232
+ return {"message": f"Cleared {count} cached verification results", "count": count}
1233
+ except Exception as e:
1234
+ logger.error(f"Error clearing cache: {e}", exc_info=True)
1235
+ raise HTTPException(status_code=500, detail=str(e))
1236
+
1237
+
1238
+ @app.delete("/api/admin/database")
1239
+ async def clear_database():
1240
+ """Clear all data (cache + history) but keep settings and LLM configs"""
1241
+ try:
1242
+ # Clear verification cache
1243
+ cache_count = await db.clear_verification_cache()
1244
+
1245
+ # Clear check history
1246
+ async with aiosqlite.connect(db.db_path) as conn:
1247
+ await conn.execute("DELETE FROM check_history")
1248
+ await conn.commit()
1249
+ cursor = await conn.execute("SELECT changes()")
1250
+ row = await cursor.fetchone()
1251
+ history_count = row[0] if row else 0
1252
+
1253
+ logger.info(f"Cleared database: {cache_count} cache entries, {history_count} history entries")
1254
+ return {
1255
+ "message": f"Cleared {cache_count} cache entries and {history_count} history entries",
1256
+ "cache_count": cache_count,
1257
+ "history_count": history_count
1258
+ }
1259
+ except Exception as e:
1260
+ logger.error(f"Error clearing database: {e}", exc_info=True)
1261
+ raise HTTPException(status_code=500, detail=str(e))
1262
+
1263
+
1264
+ if __name__ == "__main__":
1265
+ import uvicorn
1266
+ uvicorn.run(app, host="0.0.0.0", port=8000)