arionxiv 1.0.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. arionxiv/__init__.py +40 -0
  2. arionxiv/__main__.py +10 -0
  3. arionxiv/arxiv_operations/__init__.py +0 -0
  4. arionxiv/arxiv_operations/client.py +225 -0
  5. arionxiv/arxiv_operations/fetcher.py +173 -0
  6. arionxiv/arxiv_operations/searcher.py +122 -0
  7. arionxiv/arxiv_operations/utils.py +293 -0
  8. arionxiv/cli/__init__.py +4 -0
  9. arionxiv/cli/commands/__init__.py +1 -0
  10. arionxiv/cli/commands/analyze.py +587 -0
  11. arionxiv/cli/commands/auth.py +365 -0
  12. arionxiv/cli/commands/chat.py +714 -0
  13. arionxiv/cli/commands/daily.py +482 -0
  14. arionxiv/cli/commands/fetch.py +217 -0
  15. arionxiv/cli/commands/library.py +295 -0
  16. arionxiv/cli/commands/preferences.py +426 -0
  17. arionxiv/cli/commands/search.py +254 -0
  18. arionxiv/cli/commands/settings_unified.py +1407 -0
  19. arionxiv/cli/commands/trending.py +41 -0
  20. arionxiv/cli/commands/welcome.py +168 -0
  21. arionxiv/cli/main.py +407 -0
  22. arionxiv/cli/ui/__init__.py +1 -0
  23. arionxiv/cli/ui/global_theme_manager.py +173 -0
  24. arionxiv/cli/ui/logo.py +127 -0
  25. arionxiv/cli/ui/splash.py +89 -0
  26. arionxiv/cli/ui/theme.py +32 -0
  27. arionxiv/cli/ui/theme_system.py +391 -0
  28. arionxiv/cli/utils/__init__.py +54 -0
  29. arionxiv/cli/utils/animations.py +522 -0
  30. arionxiv/cli/utils/api_client.py +583 -0
  31. arionxiv/cli/utils/api_config.py +505 -0
  32. arionxiv/cli/utils/command_suggestions.py +147 -0
  33. arionxiv/cli/utils/db_config_manager.py +254 -0
  34. arionxiv/github_actions_runner.py +206 -0
  35. arionxiv/main.py +23 -0
  36. arionxiv/prompts/__init__.py +9 -0
  37. arionxiv/prompts/prompts.py +247 -0
  38. arionxiv/rag_techniques/__init__.py +8 -0
  39. arionxiv/rag_techniques/basic_rag.py +1531 -0
  40. arionxiv/scheduler_daemon.py +139 -0
  41. arionxiv/server.py +1000 -0
  42. arionxiv/server_main.py +24 -0
  43. arionxiv/services/__init__.py +73 -0
  44. arionxiv/services/llm_client.py +30 -0
  45. arionxiv/services/llm_inference/__init__.py +58 -0
  46. arionxiv/services/llm_inference/groq_client.py +469 -0
  47. arionxiv/services/llm_inference/llm_utils.py +250 -0
  48. arionxiv/services/llm_inference/openrouter_client.py +564 -0
  49. arionxiv/services/unified_analysis_service.py +872 -0
  50. arionxiv/services/unified_auth_service.py +457 -0
  51. arionxiv/services/unified_config_service.py +456 -0
  52. arionxiv/services/unified_daily_dose_service.py +823 -0
  53. arionxiv/services/unified_database_service.py +1633 -0
  54. arionxiv/services/unified_llm_service.py +366 -0
  55. arionxiv/services/unified_paper_service.py +604 -0
  56. arionxiv/services/unified_pdf_service.py +522 -0
  57. arionxiv/services/unified_prompt_service.py +344 -0
  58. arionxiv/services/unified_scheduler_service.py +589 -0
  59. arionxiv/services/unified_user_service.py +954 -0
  60. arionxiv/utils/__init__.py +51 -0
  61. arionxiv/utils/api_helpers.py +200 -0
  62. arionxiv/utils/file_cleanup.py +150 -0
  63. arionxiv/utils/ip_helper.py +96 -0
  64. arionxiv-1.0.32.dist-info/METADATA +336 -0
  65. arionxiv-1.0.32.dist-info/RECORD +69 -0
  66. arionxiv-1.0.32.dist-info/WHEEL +5 -0
  67. arionxiv-1.0.32.dist-info/entry_points.txt +4 -0
  68. arionxiv-1.0.32.dist-info/licenses/LICENSE +21 -0
  69. arionxiv-1.0.32.dist-info/top_level.txt +1 -0
@@ -0,0 +1,564 @@
1
+ # OpenRouter LLM client for AI-powered paper analysis
2
+ # Uses free models like moonshotai/kimi-k2:free
3
+
4
+ from typing import Dict, Any, List, Optional, Tuple
5
+ from collections import OrderedDict
6
+ import logging
7
+ import json
8
+ import asyncio
9
+ import os
10
+ from datetime import datetime, timedelta
11
+ from pathlib import Path
12
+ import time
13
+ import httpx
14
+ from rich.console import Console
15
+ from dotenv import load_dotenv
16
+
17
+ from .llm_utils import parse_json_response, generate_cache_key
18
+
19
+ # Load .env from current directory first
20
+ load_dotenv()
21
+ # Also try to load from ~/.arionxiv/.env if it exists
22
+ arionxiv_env = Path.home() / ".arionxiv" / ".env"
23
+ if arionxiv_env.exists():
24
+ load_dotenv(arionxiv_env)
25
+
26
+ # ============================================================================
27
+ # LOGGER CONFIGURATION
28
+ # ============================================================================
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ # ============================================================================
33
+ # OPENROUTER CLIENT DEFINITION
34
+ # ============================================================================
35
+
36
+ class OpenRouterClient:
37
+ """
38
+ Production-ready client for LLM-based paper analysis using OpenRouter
39
+
40
+ Features:
41
+ - Access to multiple free AI models (Kimi K2, DeepSeek, etc.)
42
+ - Rate limiting and concurrency control with async safety
43
+ - Connection pooling and timeout handling
44
+ - Thread-safe LRU caching with TTL support
45
+ - Retry logic with exponential backoff
46
+ - Token usage tracking and monitoring
47
+ - Structured JSON response handling
48
+ - Async context manager support for proper resource cleanup
49
+ """
50
+
51
+ # Base URL for OpenRouter API (OpenAI-compatible)
52
+ BASE_URL = "https://openrouter.ai/api/v1"
53
+
54
+ # Free model configurations with context limits
55
+ # Free models on OpenRouter have ":free" suffix
56
+ MODEL_CONFIGS = {
57
+ "openai/gpt-oss-20b:free": {
58
+ "max_tokens": 131072,
59
+ "optimal_completion": 8000,
60
+ "rpm": 20,
61
+ "description": "OpenAI GPT-OSS 20B - Free open-source model"
62
+ },
63
+ "openai/gpt-oss-120b:free": {
64
+ "max_tokens": 131072,
65
+ "optimal_completion": 8000,
66
+ "rpm": 20,
67
+ "description": "OpenAI GPT-OSS 120B - Free open-source model"
68
+ },
69
+ "meta-llama/llama-3.3-70b-instruct:free": {
70
+ "max_tokens": 8192,
71
+ "optimal_completion": 4000,
72
+ "rpm": 20,
73
+ "description": "Meta Llama 3.3 70B - Free, reliable and fast"
74
+ },
75
+ "google/gemma-3-27b-it:free": {
76
+ "max_tokens": 8192,
77
+ "optimal_completion": 4000,
78
+ "rpm": 20,
79
+ "description": "Google Gemma 3 27B - Free"
80
+ },
81
+ "qwen/qwen3-32b:free": {
82
+ "max_tokens": 40000,
83
+ "optimal_completion": 8000,
84
+ "rpm": 20,
85
+ "description": "Qwen 3 32B - Free"
86
+ },
87
+ "meta-llama/llama-3.2-3b-instruct:free": {
88
+ "max_tokens": 8192,
89
+ "optimal_completion": 2000,
90
+ "rpm": 30,
91
+ "description": "Meta Llama 3.2 3B - Free, fast fallback"
92
+ },
93
+ }
94
+
95
+ DEFAULT_MODEL = "meta-llama/llama-3.3-70b-instruct:free"
96
+
97
+ # Verified free models from OpenRouter API (2026-01-03)
98
+ FALLBACK_MODELS = [
99
+ "openai/gpt-oss-20b:free",
100
+ "openai/gpt-oss-120b:free",
101
+ "google/gemma-3-27b-it:free",
102
+ "google/gemma-3-12b-it:free",
103
+ "mistralai/mistral-small-3.1-24b-instruct:free",
104
+ "moonshotai/kimi-k2:free",
105
+ "meta-llama/llama-3.2-3b-instruct:free",
106
+ "google/gemini-2.0-flash-exp:free",
107
+ ]
108
+
109
+ def __init__(
110
+ self,
111
+ max_concurrent_requests: int = 3,
112
+ enable_cache: bool = True,
113
+ cache_ttl_hours: int = 24,
114
+ console: Console = None,
115
+ model: str = None
116
+ ):
117
+ """
118
+ Initialize OpenRouter client with production-ready configuration
119
+
120
+ Args:
121
+ max_concurrent_requests: Maximum concurrent API requests
122
+ enable_cache: Enable LRU caching for repeated analyses
123
+ cache_ttl_hours: Time-to-live for cache entries in hours
124
+ console: Rich console for output (optional)
125
+ model: Model to use (default: moonshotai/kimi-k2:free)
126
+ """
127
+ # API configuration - lazy loaded
128
+ self._api_key = None
129
+ self._api_key_checked = False
130
+ self.model = model or os.getenv("OPENROUTER_MODEL", self.DEFAULT_MODEL)
131
+ self.timeout = 120 # Longer timeout for free models
132
+ self._console = console or Console()
133
+
134
+ # App identification for OpenRouter rankings
135
+ self.site_url = os.getenv("OPENROUTER_SITE_URL", "https://github.com/ArionDas/ArionXiv")
136
+ self.site_name = os.getenv("OPENROUTER_SITE_NAME", "ArionXiv")
137
+
138
+ # Concurrency control
139
+ self.semaphore = asyncio.Semaphore(max_concurrent_requests)
140
+ self.max_retries = 3
141
+
142
+ # Thread-safe caching with TTL
143
+ self.enable_cache = enable_cache
144
+ self.cache: OrderedDict[str, Tuple[Any, datetime]] = OrderedDict()
145
+ self.cache_max_size = 100
146
+ self.cache_ttl = timedelta(hours=cache_ttl_hours)
147
+ self.cache_lock = asyncio.Lock()
148
+
149
+ # Monitoring and metrics
150
+ self.total_tokens_used = 0
151
+ self.total_requests = 0
152
+ self.total_cache_hits = 0
153
+ self.total_errors = 0
154
+
155
+ # HTTP client for API calls
156
+ self._http_client: Optional[httpx.AsyncClient] = None
157
+
158
+ @property
159
+ def api_key(self):
160
+ """Lazy load API key - re-checks if not found previously"""
161
+ # Always re-check if key was not found, in case it was loaded later
162
+ if not self._api_key_checked or self._api_key is None:
163
+ self._api_key = os.getenv("OPENROUTER_API_KEY")
164
+ self._api_key_checked = True
165
+ return self._api_key
166
+
167
+ def refresh_api_key(self):
168
+ """Force refresh the API key and model from environment"""
169
+ self._api_key = os.getenv("OPENROUTER_API_KEY")
170
+ self._api_key_checked = True
171
+ # Also refresh the model in case it was set later
172
+ self.model = os.getenv("OPENROUTER_MODEL", self.DEFAULT_MODEL)
173
+ return self._api_key is not None
174
+
175
+ @property
176
+ def is_available(self) -> bool:
177
+ """Check if the client is properly configured"""
178
+ # Re-check environment if not found - .env may have been loaded after initialization
179
+ if self._api_key is None:
180
+ load_dotenv() # Try loading .env again
181
+ arionxiv_env = Path.home() / ".arionxiv" / ".env"
182
+ if arionxiv_env.exists():
183
+ load_dotenv(arionxiv_env)
184
+ self.refresh_api_key()
185
+ return self.api_key is not None
186
+
187
+ def get_model_name(self) -> str:
188
+ """Get the current model name"""
189
+ return self.model
190
+
191
+ def get_model_display_name(self) -> str:
192
+ """Get a user-friendly model display name"""
193
+ model_name = self.model
194
+
195
+ # Special handling for common model names
196
+ display_names = {
197
+ "openai/gpt-oss-20b:free": "OpenAI GPT-OSS 20B",
198
+ "openai/gpt-oss-120b:free": "OpenAI GPT-OSS 120B",
199
+ "meta-llama/llama-3.3-70b-instruct:free": "Llama 3.3 70B Instruct",
200
+ "google/gemma-3-27b-it:free": "Gemma 3 27B",
201
+ "google/gemma-3-12b-it:free": "Gemma 3 12B",
202
+ "qwen/qwen3-32b:free": "Qwen 3 32B",
203
+ "moonshotai/kimi-k2:free": "Kimi K2",
204
+ "mistralai/mistral-small-3.1-24b-instruct:free": "Mistral Small 3.1 24B",
205
+ }
206
+
207
+ if model_name in display_names:
208
+ return display_names[model_name]
209
+
210
+ # Fallback: parse the model name
211
+ if "/" in model_name:
212
+ model_name = model_name.split("/")[-1]
213
+ if ":free" in model_name:
214
+ model_name = model_name.replace(":free", "")
215
+ return model_name.replace("-", " ").title()
216
+
217
+ async def _get_http_client(self) -> httpx.AsyncClient:
218
+ """Get or create HTTP client with connection pooling"""
219
+ if self._http_client is None or self._http_client.is_closed:
220
+ self._http_client = httpx.AsyncClient(
221
+ base_url=self.BASE_URL,
222
+ timeout=httpx.Timeout(self.timeout, connect=10.0),
223
+ headers={
224
+ "Authorization": f"Bearer {self.api_key}",
225
+ "HTTP-Referer": self.site_url,
226
+ "X-Title": self.site_name,
227
+ "Content-Type": "application/json"
228
+ }
229
+ )
230
+ return self._http_client
231
+
232
+ async def __aenter__(self):
233
+ """Async context manager entry"""
234
+ return self
235
+
236
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
237
+ """Async context manager exit with proper resource cleanup"""
238
+ try:
239
+ if self._http_client and not self._http_client.is_closed:
240
+ await self._http_client.aclose()
241
+ async with self.cache_lock:
242
+ self.cache.clear()
243
+ logger.info("OpenRouter client closed and resources cleaned up")
244
+ except Exception as e:
245
+ logger.error(f"Error during client cleanup: {str(e)}")
246
+
247
+ async def close(self):
248
+ """Explicitly close the client"""
249
+ await self.__aexit__(None, None, None)
250
+
251
+ def _generate_cache_key(self, content: str, prompt_type: str) -> str:
252
+ """Generate cache key - delegates to shared utility"""
253
+ return generate_cache_key(content, prompt_type, self.model)
254
+
255
+ async def _get_from_cache(self, cache_key: str) -> Optional[Any]:
256
+ """Retrieve result from cache if available and not expired"""
257
+ if not self.enable_cache:
258
+ return None
259
+
260
+ async with self.cache_lock:
261
+ if cache_key not in self.cache:
262
+ return None
263
+
264
+ result, timestamp = self.cache[cache_key]
265
+
266
+ if datetime.now() - timestamp > self.cache_ttl:
267
+ del self.cache[cache_key]
268
+ return None
269
+
270
+ self.cache.move_to_end(cache_key)
271
+ self.total_cache_hits += 1
272
+ return result
273
+
274
+ async def _add_to_cache(self, cache_key: str, result: Any) -> None:
275
+ """Add result to cache with TTL and LRU eviction"""
276
+ if not self.enable_cache:
277
+ return
278
+
279
+ async with self.cache_lock:
280
+ if cache_key in self.cache:
281
+ self.cache[cache_key] = (result, datetime.now())
282
+ self.cache.move_to_end(cache_key)
283
+ else:
284
+ if len(self.cache) >= self.cache_max_size:
285
+ oldest_key = next(iter(self.cache))
286
+ self.cache.pop(oldest_key)
287
+
288
+ self.cache[cache_key] = (result, datetime.now())
289
+
290
+ def _parse_json_response(self, response_content: str, max_retries: int = 3) -> Dict[str, Any]:
291
+ """Parse JSON response - delegates to shared utility"""
292
+ return parse_json_response(response_content, max_retries)
293
+
294
+ async def _api_call_with_retry(
295
+ self,
296
+ messages: List[Dict[str, str]],
297
+ temperature: float = 0.3,
298
+ max_tokens: int = 8000,
299
+ response_format: Optional[Dict[str, str]] = None
300
+ ) -> Dict[str, Any]:
301
+ """Make API call with retry logic, exponential backoff, and model fallback"""
302
+ if not self.api_key:
303
+ raise ValueError("OpenRouter API key not configured. Set OPENROUTER_API_KEY environment variable.")
304
+
305
+ # Build list of models to try: current model + fallbacks
306
+ models_to_try = [self.model]
307
+ for fallback in self.FALLBACK_MODELS:
308
+ if fallback != self.model and fallback not in models_to_try:
309
+ models_to_try.append(fallback)
310
+
311
+ client = await self._get_http_client()
312
+ last_error = None
313
+
314
+ logger.debug(f"Will try models in order: {models_to_try}")
315
+
316
+ for model in models_to_try:
317
+ model_config = self.MODEL_CONFIGS.get(model, {
318
+ "max_tokens": 8192,
319
+ "optimal_completion": 4000
320
+ })
321
+ model_max_tokens = min(max_tokens, model_config.get("optimal_completion", 4000))
322
+
323
+ for attempt in range(self.max_retries):
324
+ try:
325
+ async with self.semaphore:
326
+ payload = {
327
+ "model": model,
328
+ "messages": messages,
329
+ "temperature": temperature,
330
+ "max_tokens": model_max_tokens
331
+ }
332
+
333
+ if response_format:
334
+ payload["response_format"] = response_format
335
+
336
+ logger.debug(f"Trying {model} (attempt {attempt + 1}/{self.max_retries})")
337
+ response = await client.post("/chat/completions", json=payload)
338
+
339
+ if response.status_code == 200:
340
+ result = response.json()
341
+ self.total_requests += 1
342
+
343
+ if "usage" in result:
344
+ self.total_tokens_used += result["usage"].get("total_tokens", 0)
345
+
346
+ # If we switched models, update for future calls
347
+ if model != self.model:
348
+ logger.info(f"Switched from {self.model} to {model} due to failures")
349
+ self.model = model
350
+
351
+ return result
352
+
353
+ elif response.status_code == 429:
354
+ last_error = f"Rate limited for model {model}"
355
+ wait_time = (2 ** attempt) * 2
356
+ logger.debug(f"Rate limited, waiting {wait_time}s")
357
+ await asyncio.sleep(wait_time)
358
+ continue
359
+
360
+ elif response.status_code >= 500:
361
+ last_error = f"Server error {response.status_code} for model {model}"
362
+ wait_time = (2 ** attempt) * 1
363
+ logger.debug(f"Server error, waiting {wait_time}s")
364
+ await asyncio.sleep(wait_time)
365
+ continue
366
+
367
+ else:
368
+ error_detail = response.text
369
+ last_error = f"API error: {response.status_code} - {error_detail}"
370
+ logger.debug(f"Model {model} failed: {last_error}")
371
+ break # Try next model
372
+
373
+ except httpx.TimeoutException:
374
+ self.total_errors += 1
375
+ last_error = f"Timeout for model {model}"
376
+ wait_time = (2 ** attempt) * 2
377
+ await asyncio.sleep(wait_time)
378
+ continue
379
+
380
+ except Exception as e:
381
+ last_error = str(e)
382
+ if attempt == self.max_retries - 1:
383
+ logger.debug(f"Model {model} exhausted retries: {last_error}")
384
+ break # Try next model
385
+
386
+ wait_time = (2 ** attempt) * 1
387
+ await asyncio.sleep(wait_time)
388
+
389
+ # If we got here, this model failed - try the next one
390
+ logger.debug(f"Model {model} failed with: {last_error}. Trying next fallback...")
391
+
392
+ # All models failed
393
+ self.total_errors += 1
394
+ raise Exception(f"API call failed after trying all models. Last error: {last_error}")
395
+
396
+ async def get_completion(
397
+ self,
398
+ prompt: str,
399
+ system_message: str = None,
400
+ temperature: float = 0.3,
401
+ max_tokens: int = 8000
402
+ ) -> str:
403
+ """Get a simple text completion from the model"""
404
+ messages = []
405
+
406
+ if system_message:
407
+ messages.append({"role": "system", "content": system_message})
408
+
409
+ messages.append({"role": "user", "content": prompt})
410
+
411
+ response = await self._api_call_with_retry(
412
+ messages=messages,
413
+ temperature=temperature,
414
+ max_tokens=max_tokens
415
+ )
416
+
417
+ return response["choices"][0]["message"]["content"]
418
+
419
+ async def get_json_completion(
420
+ self,
421
+ prompt: str,
422
+ system_message: str = None,
423
+ temperature: float = 0.2,
424
+ max_tokens: int = 8000
425
+ ) -> Dict[str, Any]:
426
+ """Get a JSON-formatted completion from the model"""
427
+ json_system = (system_message or "") + "\n\nIMPORTANT: Respond with valid JSON only. No markdown, no explanations, just the JSON object."
428
+
429
+ messages = [
430
+ {"role": "system", "content": json_system.strip()},
431
+ {"role": "user", "content": prompt}
432
+ ]
433
+
434
+ response = await self._api_call_with_retry(
435
+ messages=messages,
436
+ temperature=temperature,
437
+ max_tokens=max_tokens,
438
+ response_format={"type": "json_object"}
439
+ )
440
+
441
+ content = response["choices"][0]["message"]["content"]
442
+ return self._parse_json_response(content)
443
+
444
+ async def analyze_paper(self, content: str, cache_key: str = None) -> Dict[str, Any]:
445
+ """Analyze a research paper using the configured model"""
446
+ if cache_key:
447
+ cached = await self._get_from_cache(cache_key)
448
+ if cached:
449
+ return cached
450
+
451
+ from ...prompts import format_prompt
452
+ prompt = format_prompt("comprehensive_paper_analysis", content=content)
453
+
454
+ system_message = """You are an expert research analyst specializing in academic papers.
455
+ Provide thorough, accurate analysis with specific details from the paper. Always try to answer the user given question accurately using the content provided.
456
+ Always respond with valid JSON in the exact format requested."""
457
+
458
+ result = await self.get_json_completion(
459
+ prompt=prompt,
460
+ system_message=system_message,
461
+ temperature=0.2,
462
+ max_tokens=8000
463
+ )
464
+
465
+ result["_model"] = self.model
466
+ result["_model_display"] = self.get_model_display_name()
467
+
468
+ if cache_key:
469
+ await self._add_to_cache(cache_key, result)
470
+
471
+ return result
472
+
473
+ async def chat(
474
+ self,
475
+ message: str,
476
+ context: str = "",
477
+ history: List[Dict[str, str]] = None,
478
+ system_message: str = None,
479
+ paper_title: str = "",
480
+ paper_authors: str = "",
481
+ paper_published: str = ""
482
+ ) -> Dict[str, Any]:
483
+ """Have a conversation with context (for RAG chat)
484
+
485
+ Args:
486
+ message: User's question
487
+ context: Relevant text chunks from the paper
488
+ history: Conversation history
489
+ system_message: Optional custom system message
490
+ paper_title: Title of the paper being discussed
491
+ paper_authors: Authors of the paper (formatted string)
492
+ paper_published: Publication date on arXiv
493
+ """
494
+ from ...prompts import format_prompt
495
+
496
+ history_text = ""
497
+ if history:
498
+ for msg in history[-10:]:
499
+ role = msg.get("type", msg.get("role", "user"))
500
+ content = msg.get("content", "")
501
+ history_text += f"{role.upper()}: {content}\n"
502
+
503
+ prompt = format_prompt(
504
+ "rag_chat",
505
+ context=context,
506
+ history=history_text,
507
+ message=message,
508
+ paper_title=paper_title or "Unknown Paper",
509
+ paper_authors=paper_authors or "Unknown",
510
+ paper_published=paper_published or "Unknown"
511
+ )
512
+
513
+ default_system = """You are ArionXiv, an AI research assistant specializing in academic papers.
514
+ Provide accurate, helpful answers based on the paper content provided.
515
+ Be conversational but maintain technical accuracy."""
516
+
517
+ response_text = await self.get_completion(
518
+ prompt=prompt,
519
+ system_message=system_message or default_system,
520
+ temperature=0.4,
521
+ max_tokens=8192
522
+ )
523
+
524
+ return {
525
+ "success": True,
526
+ "response": response_text,
527
+ "model": self.model,
528
+ "model_display": self.get_model_display_name()
529
+ }
530
+
531
+ def get_metrics(self) -> Dict[str, Any]:
532
+ """Get client usage metrics"""
533
+ return {
534
+ "total_requests": self.total_requests,
535
+ "total_tokens": self.total_tokens_used,
536
+ "cache_hits": self.total_cache_hits,
537
+ "errors": self.total_errors,
538
+ "model": self.model,
539
+ "cache_size": len(self.cache)
540
+ }
541
+
542
+
543
+ # ============================================================================
544
+ # SINGLETON INSTANCE
545
+ # ============================================================================
546
+
547
+ _default_client: Optional[OpenRouterClient] = None
548
+
549
+ def get_openrouter_client(console: Console = None) -> OpenRouterClient:
550
+ """Get or create the default OpenRouter client instance"""
551
+ global _default_client
552
+ if _default_client is None:
553
+ _default_client = OpenRouterClient(console=console)
554
+ return _default_client
555
+
556
+ # Create default singleton
557
+ openrouter_client = get_openrouter_client()
558
+
559
+ async def close_openrouter_client():
560
+ """Close the default OpenRouter client"""
561
+ global _default_client
562
+ if _default_client:
563
+ await _default_client.close()
564
+ _default_client = None