arionxiv 1.0.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arionxiv/__init__.py +40 -0
- arionxiv/__main__.py +10 -0
- arionxiv/arxiv_operations/__init__.py +0 -0
- arionxiv/arxiv_operations/client.py +225 -0
- arionxiv/arxiv_operations/fetcher.py +173 -0
- arionxiv/arxiv_operations/searcher.py +122 -0
- arionxiv/arxiv_operations/utils.py +293 -0
- arionxiv/cli/__init__.py +4 -0
- arionxiv/cli/commands/__init__.py +1 -0
- arionxiv/cli/commands/analyze.py +587 -0
- arionxiv/cli/commands/auth.py +365 -0
- arionxiv/cli/commands/chat.py +714 -0
- arionxiv/cli/commands/daily.py +482 -0
- arionxiv/cli/commands/fetch.py +217 -0
- arionxiv/cli/commands/library.py +295 -0
- arionxiv/cli/commands/preferences.py +426 -0
- arionxiv/cli/commands/search.py +254 -0
- arionxiv/cli/commands/settings_unified.py +1407 -0
- arionxiv/cli/commands/trending.py +41 -0
- arionxiv/cli/commands/welcome.py +168 -0
- arionxiv/cli/main.py +407 -0
- arionxiv/cli/ui/__init__.py +1 -0
- arionxiv/cli/ui/global_theme_manager.py +173 -0
- arionxiv/cli/ui/logo.py +127 -0
- arionxiv/cli/ui/splash.py +89 -0
- arionxiv/cli/ui/theme.py +32 -0
- arionxiv/cli/ui/theme_system.py +391 -0
- arionxiv/cli/utils/__init__.py +54 -0
- arionxiv/cli/utils/animations.py +522 -0
- arionxiv/cli/utils/api_client.py +583 -0
- arionxiv/cli/utils/api_config.py +505 -0
- arionxiv/cli/utils/command_suggestions.py +147 -0
- arionxiv/cli/utils/db_config_manager.py +254 -0
- arionxiv/github_actions_runner.py +206 -0
- arionxiv/main.py +23 -0
- arionxiv/prompts/__init__.py +9 -0
- arionxiv/prompts/prompts.py +247 -0
- arionxiv/rag_techniques/__init__.py +8 -0
- arionxiv/rag_techniques/basic_rag.py +1531 -0
- arionxiv/scheduler_daemon.py +139 -0
- arionxiv/server.py +1000 -0
- arionxiv/server_main.py +24 -0
- arionxiv/services/__init__.py +73 -0
- arionxiv/services/llm_client.py +30 -0
- arionxiv/services/llm_inference/__init__.py +58 -0
- arionxiv/services/llm_inference/groq_client.py +469 -0
- arionxiv/services/llm_inference/llm_utils.py +250 -0
- arionxiv/services/llm_inference/openrouter_client.py +564 -0
- arionxiv/services/unified_analysis_service.py +872 -0
- arionxiv/services/unified_auth_service.py +457 -0
- arionxiv/services/unified_config_service.py +456 -0
- arionxiv/services/unified_daily_dose_service.py +823 -0
- arionxiv/services/unified_database_service.py +1633 -0
- arionxiv/services/unified_llm_service.py +366 -0
- arionxiv/services/unified_paper_service.py +604 -0
- arionxiv/services/unified_pdf_service.py +522 -0
- arionxiv/services/unified_prompt_service.py +344 -0
- arionxiv/services/unified_scheduler_service.py +589 -0
- arionxiv/services/unified_user_service.py +954 -0
- arionxiv/utils/__init__.py +51 -0
- arionxiv/utils/api_helpers.py +200 -0
- arionxiv/utils/file_cleanup.py +150 -0
- arionxiv/utils/ip_helper.py +96 -0
- arionxiv-1.0.32.dist-info/METADATA +336 -0
- arionxiv-1.0.32.dist-info/RECORD +69 -0
- arionxiv-1.0.32.dist-info/WHEEL +5 -0
- arionxiv-1.0.32.dist-info/entry_points.txt +4 -0
- arionxiv-1.0.32.dist-info/licenses/LICENSE +21 -0
- arionxiv-1.0.32.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,564 @@
|
|
|
1
|
+
# OpenRouter LLM client for AI-powered paper analysis
|
|
2
|
+
# Uses free models like moonshotai/kimi-k2:free
|
|
3
|
+
|
|
4
|
+
from typing import Dict, Any, List, Optional, Tuple
|
|
5
|
+
from collections import OrderedDict
|
|
6
|
+
import logging
|
|
7
|
+
import json
|
|
8
|
+
import asyncio
|
|
9
|
+
import os
|
|
10
|
+
from datetime import datetime, timedelta
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
import time
|
|
13
|
+
import httpx
|
|
14
|
+
from rich.console import Console
|
|
15
|
+
from dotenv import load_dotenv
|
|
16
|
+
|
|
17
|
+
from .llm_utils import parse_json_response, generate_cache_key
|
|
18
|
+
|
|
19
|
+
# Load .env from current directory first
|
|
20
|
+
load_dotenv()
|
|
21
|
+
# Also try to load from ~/.arionxiv/.env if it exists
|
|
22
|
+
arionxiv_env = Path.home() / ".arionxiv" / ".env"
|
|
23
|
+
if arionxiv_env.exists():
|
|
24
|
+
load_dotenv(arionxiv_env)
|
|
25
|
+
|
|
26
|
+
# ============================================================================
|
|
27
|
+
# LOGGER CONFIGURATION
|
|
28
|
+
# ============================================================================
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
# ============================================================================
|
|
33
|
+
# OPENROUTER CLIENT DEFINITION
|
|
34
|
+
# ============================================================================
|
|
35
|
+
|
|
36
|
+
class OpenRouterClient:
|
|
37
|
+
"""
|
|
38
|
+
Production-ready client for LLM-based paper analysis using OpenRouter
|
|
39
|
+
|
|
40
|
+
Features:
|
|
41
|
+
- Access to multiple free AI models (Kimi K2, DeepSeek, etc.)
|
|
42
|
+
- Rate limiting and concurrency control with async safety
|
|
43
|
+
- Connection pooling and timeout handling
|
|
44
|
+
- Thread-safe LRU caching with TTL support
|
|
45
|
+
- Retry logic with exponential backoff
|
|
46
|
+
- Token usage tracking and monitoring
|
|
47
|
+
- Structured JSON response handling
|
|
48
|
+
- Async context manager support for proper resource cleanup
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
# Base URL for OpenRouter API (OpenAI-compatible)
|
|
52
|
+
BASE_URL = "https://openrouter.ai/api/v1"
|
|
53
|
+
|
|
54
|
+
# Free model configurations with context limits
|
|
55
|
+
# Free models on OpenRouter have ":free" suffix
|
|
56
|
+
MODEL_CONFIGS = {
|
|
57
|
+
"openai/gpt-oss-20b:free": {
|
|
58
|
+
"max_tokens": 131072,
|
|
59
|
+
"optimal_completion": 8000,
|
|
60
|
+
"rpm": 20,
|
|
61
|
+
"description": "OpenAI GPT-OSS 20B - Free open-source model"
|
|
62
|
+
},
|
|
63
|
+
"openai/gpt-oss-120b:free": {
|
|
64
|
+
"max_tokens": 131072,
|
|
65
|
+
"optimal_completion": 8000,
|
|
66
|
+
"rpm": 20,
|
|
67
|
+
"description": "OpenAI GPT-OSS 120B - Free open-source model"
|
|
68
|
+
},
|
|
69
|
+
"meta-llama/llama-3.3-70b-instruct:free": {
|
|
70
|
+
"max_tokens": 8192,
|
|
71
|
+
"optimal_completion": 4000,
|
|
72
|
+
"rpm": 20,
|
|
73
|
+
"description": "Meta Llama 3.3 70B - Free, reliable and fast"
|
|
74
|
+
},
|
|
75
|
+
"google/gemma-3-27b-it:free": {
|
|
76
|
+
"max_tokens": 8192,
|
|
77
|
+
"optimal_completion": 4000,
|
|
78
|
+
"rpm": 20,
|
|
79
|
+
"description": "Google Gemma 3 27B - Free"
|
|
80
|
+
},
|
|
81
|
+
"qwen/qwen3-32b:free": {
|
|
82
|
+
"max_tokens": 40000,
|
|
83
|
+
"optimal_completion": 8000,
|
|
84
|
+
"rpm": 20,
|
|
85
|
+
"description": "Qwen 3 32B - Free"
|
|
86
|
+
},
|
|
87
|
+
"meta-llama/llama-3.2-3b-instruct:free": {
|
|
88
|
+
"max_tokens": 8192,
|
|
89
|
+
"optimal_completion": 2000,
|
|
90
|
+
"rpm": 30,
|
|
91
|
+
"description": "Meta Llama 3.2 3B - Free, fast fallback"
|
|
92
|
+
},
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
DEFAULT_MODEL = "meta-llama/llama-3.3-70b-instruct:free"
|
|
96
|
+
|
|
97
|
+
# Verified free models from OpenRouter API (2026-01-03)
|
|
98
|
+
FALLBACK_MODELS = [
|
|
99
|
+
"openai/gpt-oss-20b:free",
|
|
100
|
+
"openai/gpt-oss-120b:free",
|
|
101
|
+
"google/gemma-3-27b-it:free",
|
|
102
|
+
"google/gemma-3-12b-it:free",
|
|
103
|
+
"mistralai/mistral-small-3.1-24b-instruct:free",
|
|
104
|
+
"moonshotai/kimi-k2:free",
|
|
105
|
+
"meta-llama/llama-3.2-3b-instruct:free",
|
|
106
|
+
"google/gemini-2.0-flash-exp:free",
|
|
107
|
+
]
|
|
108
|
+
|
|
109
|
+
def __init__(
|
|
110
|
+
self,
|
|
111
|
+
max_concurrent_requests: int = 3,
|
|
112
|
+
enable_cache: bool = True,
|
|
113
|
+
cache_ttl_hours: int = 24,
|
|
114
|
+
console: Console = None,
|
|
115
|
+
model: str = None
|
|
116
|
+
):
|
|
117
|
+
"""
|
|
118
|
+
Initialize OpenRouter client with production-ready configuration
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
max_concurrent_requests: Maximum concurrent API requests
|
|
122
|
+
enable_cache: Enable LRU caching for repeated analyses
|
|
123
|
+
cache_ttl_hours: Time-to-live for cache entries in hours
|
|
124
|
+
console: Rich console for output (optional)
|
|
125
|
+
model: Model to use (default: moonshotai/kimi-k2:free)
|
|
126
|
+
"""
|
|
127
|
+
# API configuration - lazy loaded
|
|
128
|
+
self._api_key = None
|
|
129
|
+
self._api_key_checked = False
|
|
130
|
+
self.model = model or os.getenv("OPENROUTER_MODEL", self.DEFAULT_MODEL)
|
|
131
|
+
self.timeout = 120 # Longer timeout for free models
|
|
132
|
+
self._console = console or Console()
|
|
133
|
+
|
|
134
|
+
# App identification for OpenRouter rankings
|
|
135
|
+
self.site_url = os.getenv("OPENROUTER_SITE_URL", "https://github.com/ArionDas/ArionXiv")
|
|
136
|
+
self.site_name = os.getenv("OPENROUTER_SITE_NAME", "ArionXiv")
|
|
137
|
+
|
|
138
|
+
# Concurrency control
|
|
139
|
+
self.semaphore = asyncio.Semaphore(max_concurrent_requests)
|
|
140
|
+
self.max_retries = 3
|
|
141
|
+
|
|
142
|
+
# Thread-safe caching with TTL
|
|
143
|
+
self.enable_cache = enable_cache
|
|
144
|
+
self.cache: OrderedDict[str, Tuple[Any, datetime]] = OrderedDict()
|
|
145
|
+
self.cache_max_size = 100
|
|
146
|
+
self.cache_ttl = timedelta(hours=cache_ttl_hours)
|
|
147
|
+
self.cache_lock = asyncio.Lock()
|
|
148
|
+
|
|
149
|
+
# Monitoring and metrics
|
|
150
|
+
self.total_tokens_used = 0
|
|
151
|
+
self.total_requests = 0
|
|
152
|
+
self.total_cache_hits = 0
|
|
153
|
+
self.total_errors = 0
|
|
154
|
+
|
|
155
|
+
# HTTP client for API calls
|
|
156
|
+
self._http_client: Optional[httpx.AsyncClient] = None
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def api_key(self):
|
|
160
|
+
"""Lazy load API key - re-checks if not found previously"""
|
|
161
|
+
# Always re-check if key was not found, in case it was loaded later
|
|
162
|
+
if not self._api_key_checked or self._api_key is None:
|
|
163
|
+
self._api_key = os.getenv("OPENROUTER_API_KEY")
|
|
164
|
+
self._api_key_checked = True
|
|
165
|
+
return self._api_key
|
|
166
|
+
|
|
167
|
+
def refresh_api_key(self):
|
|
168
|
+
"""Force refresh the API key and model from environment"""
|
|
169
|
+
self._api_key = os.getenv("OPENROUTER_API_KEY")
|
|
170
|
+
self._api_key_checked = True
|
|
171
|
+
# Also refresh the model in case it was set later
|
|
172
|
+
self.model = os.getenv("OPENROUTER_MODEL", self.DEFAULT_MODEL)
|
|
173
|
+
return self._api_key is not None
|
|
174
|
+
|
|
175
|
+
@property
|
|
176
|
+
def is_available(self) -> bool:
|
|
177
|
+
"""Check if the client is properly configured"""
|
|
178
|
+
# Re-check environment if not found - .env may have been loaded after initialization
|
|
179
|
+
if self._api_key is None:
|
|
180
|
+
load_dotenv() # Try loading .env again
|
|
181
|
+
arionxiv_env = Path.home() / ".arionxiv" / ".env"
|
|
182
|
+
if arionxiv_env.exists():
|
|
183
|
+
load_dotenv(arionxiv_env)
|
|
184
|
+
self.refresh_api_key()
|
|
185
|
+
return self.api_key is not None
|
|
186
|
+
|
|
187
|
+
def get_model_name(self) -> str:
|
|
188
|
+
"""Get the current model name"""
|
|
189
|
+
return self.model
|
|
190
|
+
|
|
191
|
+
def get_model_display_name(self) -> str:
|
|
192
|
+
"""Get a user-friendly model display name"""
|
|
193
|
+
model_name = self.model
|
|
194
|
+
|
|
195
|
+
# Special handling for common model names
|
|
196
|
+
display_names = {
|
|
197
|
+
"openai/gpt-oss-20b:free": "OpenAI GPT-OSS 20B",
|
|
198
|
+
"openai/gpt-oss-120b:free": "OpenAI GPT-OSS 120B",
|
|
199
|
+
"meta-llama/llama-3.3-70b-instruct:free": "Llama 3.3 70B Instruct",
|
|
200
|
+
"google/gemma-3-27b-it:free": "Gemma 3 27B",
|
|
201
|
+
"google/gemma-3-12b-it:free": "Gemma 3 12B",
|
|
202
|
+
"qwen/qwen3-32b:free": "Qwen 3 32B",
|
|
203
|
+
"moonshotai/kimi-k2:free": "Kimi K2",
|
|
204
|
+
"mistralai/mistral-small-3.1-24b-instruct:free": "Mistral Small 3.1 24B",
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
if model_name in display_names:
|
|
208
|
+
return display_names[model_name]
|
|
209
|
+
|
|
210
|
+
# Fallback: parse the model name
|
|
211
|
+
if "/" in model_name:
|
|
212
|
+
model_name = model_name.split("/")[-1]
|
|
213
|
+
if ":free" in model_name:
|
|
214
|
+
model_name = model_name.replace(":free", "")
|
|
215
|
+
return model_name.replace("-", " ").title()
|
|
216
|
+
|
|
217
|
+
async def _get_http_client(self) -> httpx.AsyncClient:
|
|
218
|
+
"""Get or create HTTP client with connection pooling"""
|
|
219
|
+
if self._http_client is None or self._http_client.is_closed:
|
|
220
|
+
self._http_client = httpx.AsyncClient(
|
|
221
|
+
base_url=self.BASE_URL,
|
|
222
|
+
timeout=httpx.Timeout(self.timeout, connect=10.0),
|
|
223
|
+
headers={
|
|
224
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
225
|
+
"HTTP-Referer": self.site_url,
|
|
226
|
+
"X-Title": self.site_name,
|
|
227
|
+
"Content-Type": "application/json"
|
|
228
|
+
}
|
|
229
|
+
)
|
|
230
|
+
return self._http_client
|
|
231
|
+
|
|
232
|
+
async def __aenter__(self):
|
|
233
|
+
"""Async context manager entry"""
|
|
234
|
+
return self
|
|
235
|
+
|
|
236
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
237
|
+
"""Async context manager exit with proper resource cleanup"""
|
|
238
|
+
try:
|
|
239
|
+
if self._http_client and not self._http_client.is_closed:
|
|
240
|
+
await self._http_client.aclose()
|
|
241
|
+
async with self.cache_lock:
|
|
242
|
+
self.cache.clear()
|
|
243
|
+
logger.info("OpenRouter client closed and resources cleaned up")
|
|
244
|
+
except Exception as e:
|
|
245
|
+
logger.error(f"Error during client cleanup: {str(e)}")
|
|
246
|
+
|
|
247
|
+
async def close(self):
|
|
248
|
+
"""Explicitly close the client"""
|
|
249
|
+
await self.__aexit__(None, None, None)
|
|
250
|
+
|
|
251
|
+
def _generate_cache_key(self, content: str, prompt_type: str) -> str:
|
|
252
|
+
"""Generate cache key - delegates to shared utility"""
|
|
253
|
+
return generate_cache_key(content, prompt_type, self.model)
|
|
254
|
+
|
|
255
|
+
async def _get_from_cache(self, cache_key: str) -> Optional[Any]:
|
|
256
|
+
"""Retrieve result from cache if available and not expired"""
|
|
257
|
+
if not self.enable_cache:
|
|
258
|
+
return None
|
|
259
|
+
|
|
260
|
+
async with self.cache_lock:
|
|
261
|
+
if cache_key not in self.cache:
|
|
262
|
+
return None
|
|
263
|
+
|
|
264
|
+
result, timestamp = self.cache[cache_key]
|
|
265
|
+
|
|
266
|
+
if datetime.now() - timestamp > self.cache_ttl:
|
|
267
|
+
del self.cache[cache_key]
|
|
268
|
+
return None
|
|
269
|
+
|
|
270
|
+
self.cache.move_to_end(cache_key)
|
|
271
|
+
self.total_cache_hits += 1
|
|
272
|
+
return result
|
|
273
|
+
|
|
274
|
+
async def _add_to_cache(self, cache_key: str, result: Any) -> None:
|
|
275
|
+
"""Add result to cache with TTL and LRU eviction"""
|
|
276
|
+
if not self.enable_cache:
|
|
277
|
+
return
|
|
278
|
+
|
|
279
|
+
async with self.cache_lock:
|
|
280
|
+
if cache_key in self.cache:
|
|
281
|
+
self.cache[cache_key] = (result, datetime.now())
|
|
282
|
+
self.cache.move_to_end(cache_key)
|
|
283
|
+
else:
|
|
284
|
+
if len(self.cache) >= self.cache_max_size:
|
|
285
|
+
oldest_key = next(iter(self.cache))
|
|
286
|
+
self.cache.pop(oldest_key)
|
|
287
|
+
|
|
288
|
+
self.cache[cache_key] = (result, datetime.now())
|
|
289
|
+
|
|
290
|
+
def _parse_json_response(self, response_content: str, max_retries: int = 3) -> Dict[str, Any]:
|
|
291
|
+
"""Parse JSON response - delegates to shared utility"""
|
|
292
|
+
return parse_json_response(response_content, max_retries)
|
|
293
|
+
|
|
294
|
+
async def _api_call_with_retry(
|
|
295
|
+
self,
|
|
296
|
+
messages: List[Dict[str, str]],
|
|
297
|
+
temperature: float = 0.3,
|
|
298
|
+
max_tokens: int = 8000,
|
|
299
|
+
response_format: Optional[Dict[str, str]] = None
|
|
300
|
+
) -> Dict[str, Any]:
|
|
301
|
+
"""Make API call with retry logic, exponential backoff, and model fallback"""
|
|
302
|
+
if not self.api_key:
|
|
303
|
+
raise ValueError("OpenRouter API key not configured. Set OPENROUTER_API_KEY environment variable.")
|
|
304
|
+
|
|
305
|
+
# Build list of models to try: current model + fallbacks
|
|
306
|
+
models_to_try = [self.model]
|
|
307
|
+
for fallback in self.FALLBACK_MODELS:
|
|
308
|
+
if fallback != self.model and fallback not in models_to_try:
|
|
309
|
+
models_to_try.append(fallback)
|
|
310
|
+
|
|
311
|
+
client = await self._get_http_client()
|
|
312
|
+
last_error = None
|
|
313
|
+
|
|
314
|
+
logger.debug(f"Will try models in order: {models_to_try}")
|
|
315
|
+
|
|
316
|
+
for model in models_to_try:
|
|
317
|
+
model_config = self.MODEL_CONFIGS.get(model, {
|
|
318
|
+
"max_tokens": 8192,
|
|
319
|
+
"optimal_completion": 4000
|
|
320
|
+
})
|
|
321
|
+
model_max_tokens = min(max_tokens, model_config.get("optimal_completion", 4000))
|
|
322
|
+
|
|
323
|
+
for attempt in range(self.max_retries):
|
|
324
|
+
try:
|
|
325
|
+
async with self.semaphore:
|
|
326
|
+
payload = {
|
|
327
|
+
"model": model,
|
|
328
|
+
"messages": messages,
|
|
329
|
+
"temperature": temperature,
|
|
330
|
+
"max_tokens": model_max_tokens
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
if response_format:
|
|
334
|
+
payload["response_format"] = response_format
|
|
335
|
+
|
|
336
|
+
logger.debug(f"Trying {model} (attempt {attempt + 1}/{self.max_retries})")
|
|
337
|
+
response = await client.post("/chat/completions", json=payload)
|
|
338
|
+
|
|
339
|
+
if response.status_code == 200:
|
|
340
|
+
result = response.json()
|
|
341
|
+
self.total_requests += 1
|
|
342
|
+
|
|
343
|
+
if "usage" in result:
|
|
344
|
+
self.total_tokens_used += result["usage"].get("total_tokens", 0)
|
|
345
|
+
|
|
346
|
+
# If we switched models, update for future calls
|
|
347
|
+
if model != self.model:
|
|
348
|
+
logger.info(f"Switched from {self.model} to {model} due to failures")
|
|
349
|
+
self.model = model
|
|
350
|
+
|
|
351
|
+
return result
|
|
352
|
+
|
|
353
|
+
elif response.status_code == 429:
|
|
354
|
+
last_error = f"Rate limited for model {model}"
|
|
355
|
+
wait_time = (2 ** attempt) * 2
|
|
356
|
+
logger.debug(f"Rate limited, waiting {wait_time}s")
|
|
357
|
+
await asyncio.sleep(wait_time)
|
|
358
|
+
continue
|
|
359
|
+
|
|
360
|
+
elif response.status_code >= 500:
|
|
361
|
+
last_error = f"Server error {response.status_code} for model {model}"
|
|
362
|
+
wait_time = (2 ** attempt) * 1
|
|
363
|
+
logger.debug(f"Server error, waiting {wait_time}s")
|
|
364
|
+
await asyncio.sleep(wait_time)
|
|
365
|
+
continue
|
|
366
|
+
|
|
367
|
+
else:
|
|
368
|
+
error_detail = response.text
|
|
369
|
+
last_error = f"API error: {response.status_code} - {error_detail}"
|
|
370
|
+
logger.debug(f"Model {model} failed: {last_error}")
|
|
371
|
+
break # Try next model
|
|
372
|
+
|
|
373
|
+
except httpx.TimeoutException:
|
|
374
|
+
self.total_errors += 1
|
|
375
|
+
last_error = f"Timeout for model {model}"
|
|
376
|
+
wait_time = (2 ** attempt) * 2
|
|
377
|
+
await asyncio.sleep(wait_time)
|
|
378
|
+
continue
|
|
379
|
+
|
|
380
|
+
except Exception as e:
|
|
381
|
+
last_error = str(e)
|
|
382
|
+
if attempt == self.max_retries - 1:
|
|
383
|
+
logger.debug(f"Model {model} exhausted retries: {last_error}")
|
|
384
|
+
break # Try next model
|
|
385
|
+
|
|
386
|
+
wait_time = (2 ** attempt) * 1
|
|
387
|
+
await asyncio.sleep(wait_time)
|
|
388
|
+
|
|
389
|
+
# If we got here, this model failed - try the next one
|
|
390
|
+
logger.debug(f"Model {model} failed with: {last_error}. Trying next fallback...")
|
|
391
|
+
|
|
392
|
+
# All models failed
|
|
393
|
+
self.total_errors += 1
|
|
394
|
+
raise Exception(f"API call failed after trying all models. Last error: {last_error}")
|
|
395
|
+
|
|
396
|
+
async def get_completion(
|
|
397
|
+
self,
|
|
398
|
+
prompt: str,
|
|
399
|
+
system_message: str = None,
|
|
400
|
+
temperature: float = 0.3,
|
|
401
|
+
max_tokens: int = 8000
|
|
402
|
+
) -> str:
|
|
403
|
+
"""Get a simple text completion from the model"""
|
|
404
|
+
messages = []
|
|
405
|
+
|
|
406
|
+
if system_message:
|
|
407
|
+
messages.append({"role": "system", "content": system_message})
|
|
408
|
+
|
|
409
|
+
messages.append({"role": "user", "content": prompt})
|
|
410
|
+
|
|
411
|
+
response = await self._api_call_with_retry(
|
|
412
|
+
messages=messages,
|
|
413
|
+
temperature=temperature,
|
|
414
|
+
max_tokens=max_tokens
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
return response["choices"][0]["message"]["content"]
|
|
418
|
+
|
|
419
|
+
async def get_json_completion(
|
|
420
|
+
self,
|
|
421
|
+
prompt: str,
|
|
422
|
+
system_message: str = None,
|
|
423
|
+
temperature: float = 0.2,
|
|
424
|
+
max_tokens: int = 8000
|
|
425
|
+
) -> Dict[str, Any]:
|
|
426
|
+
"""Get a JSON-formatted completion from the model"""
|
|
427
|
+
json_system = (system_message or "") + "\n\nIMPORTANT: Respond with valid JSON only. No markdown, no explanations, just the JSON object."
|
|
428
|
+
|
|
429
|
+
messages = [
|
|
430
|
+
{"role": "system", "content": json_system.strip()},
|
|
431
|
+
{"role": "user", "content": prompt}
|
|
432
|
+
]
|
|
433
|
+
|
|
434
|
+
response = await self._api_call_with_retry(
|
|
435
|
+
messages=messages,
|
|
436
|
+
temperature=temperature,
|
|
437
|
+
max_tokens=max_tokens,
|
|
438
|
+
response_format={"type": "json_object"}
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
content = response["choices"][0]["message"]["content"]
|
|
442
|
+
return self._parse_json_response(content)
|
|
443
|
+
|
|
444
|
+
async def analyze_paper(self, content: str, cache_key: str = None) -> Dict[str, Any]:
|
|
445
|
+
"""Analyze a research paper using the configured model"""
|
|
446
|
+
if cache_key:
|
|
447
|
+
cached = await self._get_from_cache(cache_key)
|
|
448
|
+
if cached:
|
|
449
|
+
return cached
|
|
450
|
+
|
|
451
|
+
from ...prompts import format_prompt
|
|
452
|
+
prompt = format_prompt("comprehensive_paper_analysis", content=content)
|
|
453
|
+
|
|
454
|
+
system_message = """You are an expert research analyst specializing in academic papers.
|
|
455
|
+
Provide thorough, accurate analysis with specific details from the paper. Always try to answer the user given question accurately using the content provided.
|
|
456
|
+
Always respond with valid JSON in the exact format requested."""
|
|
457
|
+
|
|
458
|
+
result = await self.get_json_completion(
|
|
459
|
+
prompt=prompt,
|
|
460
|
+
system_message=system_message,
|
|
461
|
+
temperature=0.2,
|
|
462
|
+
max_tokens=8000
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
result["_model"] = self.model
|
|
466
|
+
result["_model_display"] = self.get_model_display_name()
|
|
467
|
+
|
|
468
|
+
if cache_key:
|
|
469
|
+
await self._add_to_cache(cache_key, result)
|
|
470
|
+
|
|
471
|
+
return result
|
|
472
|
+
|
|
473
|
+
async def chat(
|
|
474
|
+
self,
|
|
475
|
+
message: str,
|
|
476
|
+
context: str = "",
|
|
477
|
+
history: List[Dict[str, str]] = None,
|
|
478
|
+
system_message: str = None,
|
|
479
|
+
paper_title: str = "",
|
|
480
|
+
paper_authors: str = "",
|
|
481
|
+
paper_published: str = ""
|
|
482
|
+
) -> Dict[str, Any]:
|
|
483
|
+
"""Have a conversation with context (for RAG chat)
|
|
484
|
+
|
|
485
|
+
Args:
|
|
486
|
+
message: User's question
|
|
487
|
+
context: Relevant text chunks from the paper
|
|
488
|
+
history: Conversation history
|
|
489
|
+
system_message: Optional custom system message
|
|
490
|
+
paper_title: Title of the paper being discussed
|
|
491
|
+
paper_authors: Authors of the paper (formatted string)
|
|
492
|
+
paper_published: Publication date on arXiv
|
|
493
|
+
"""
|
|
494
|
+
from ...prompts import format_prompt
|
|
495
|
+
|
|
496
|
+
history_text = ""
|
|
497
|
+
if history:
|
|
498
|
+
for msg in history[-10:]:
|
|
499
|
+
role = msg.get("type", msg.get("role", "user"))
|
|
500
|
+
content = msg.get("content", "")
|
|
501
|
+
history_text += f"{role.upper()}: {content}\n"
|
|
502
|
+
|
|
503
|
+
prompt = format_prompt(
|
|
504
|
+
"rag_chat",
|
|
505
|
+
context=context,
|
|
506
|
+
history=history_text,
|
|
507
|
+
message=message,
|
|
508
|
+
paper_title=paper_title or "Unknown Paper",
|
|
509
|
+
paper_authors=paper_authors or "Unknown",
|
|
510
|
+
paper_published=paper_published or "Unknown"
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
default_system = """You are ArionXiv, an AI research assistant specializing in academic papers.
|
|
514
|
+
Provide accurate, helpful answers based on the paper content provided.
|
|
515
|
+
Be conversational but maintain technical accuracy."""
|
|
516
|
+
|
|
517
|
+
response_text = await self.get_completion(
|
|
518
|
+
prompt=prompt,
|
|
519
|
+
system_message=system_message or default_system,
|
|
520
|
+
temperature=0.4,
|
|
521
|
+
max_tokens=8192
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
return {
|
|
525
|
+
"success": True,
|
|
526
|
+
"response": response_text,
|
|
527
|
+
"model": self.model,
|
|
528
|
+
"model_display": self.get_model_display_name()
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
def get_metrics(self) -> Dict[str, Any]:
|
|
532
|
+
"""Get client usage metrics"""
|
|
533
|
+
return {
|
|
534
|
+
"total_requests": self.total_requests,
|
|
535
|
+
"total_tokens": self.total_tokens_used,
|
|
536
|
+
"cache_hits": self.total_cache_hits,
|
|
537
|
+
"errors": self.total_errors,
|
|
538
|
+
"model": self.model,
|
|
539
|
+
"cache_size": len(self.cache)
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
|
|
543
|
+
# ============================================================================
|
|
544
|
+
# SINGLETON INSTANCE
|
|
545
|
+
# ============================================================================
|
|
546
|
+
|
|
547
|
+
_default_client: Optional[OpenRouterClient] = None
|
|
548
|
+
|
|
549
|
+
def get_openrouter_client(console: Console = None) -> OpenRouterClient:
|
|
550
|
+
"""Get or create the default OpenRouter client instance"""
|
|
551
|
+
global _default_client
|
|
552
|
+
if _default_client is None:
|
|
553
|
+
_default_client = OpenRouterClient(console=console)
|
|
554
|
+
return _default_client
|
|
555
|
+
|
|
556
|
+
# Create default singleton
|
|
557
|
+
openrouter_client = get_openrouter_client()
|
|
558
|
+
|
|
559
|
+
async def close_openrouter_client():
|
|
560
|
+
"""Close the default OpenRouter client"""
|
|
561
|
+
global _default_client
|
|
562
|
+
if _default_client:
|
|
563
|
+
await _default_client.close()
|
|
564
|
+
_default_client = None
|