arionxiv 1.0.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arionxiv/__init__.py +40 -0
- arionxiv/__main__.py +10 -0
- arionxiv/arxiv_operations/__init__.py +0 -0
- arionxiv/arxiv_operations/client.py +225 -0
- arionxiv/arxiv_operations/fetcher.py +173 -0
- arionxiv/arxiv_operations/searcher.py +122 -0
- arionxiv/arxiv_operations/utils.py +293 -0
- arionxiv/cli/__init__.py +4 -0
- arionxiv/cli/commands/__init__.py +1 -0
- arionxiv/cli/commands/analyze.py +587 -0
- arionxiv/cli/commands/auth.py +365 -0
- arionxiv/cli/commands/chat.py +714 -0
- arionxiv/cli/commands/daily.py +482 -0
- arionxiv/cli/commands/fetch.py +217 -0
- arionxiv/cli/commands/library.py +295 -0
- arionxiv/cli/commands/preferences.py +426 -0
- arionxiv/cli/commands/search.py +254 -0
- arionxiv/cli/commands/settings_unified.py +1407 -0
- arionxiv/cli/commands/trending.py +41 -0
- arionxiv/cli/commands/welcome.py +168 -0
- arionxiv/cli/main.py +407 -0
- arionxiv/cli/ui/__init__.py +1 -0
- arionxiv/cli/ui/global_theme_manager.py +173 -0
- arionxiv/cli/ui/logo.py +127 -0
- arionxiv/cli/ui/splash.py +89 -0
- arionxiv/cli/ui/theme.py +32 -0
- arionxiv/cli/ui/theme_system.py +391 -0
- arionxiv/cli/utils/__init__.py +54 -0
- arionxiv/cli/utils/animations.py +522 -0
- arionxiv/cli/utils/api_client.py +583 -0
- arionxiv/cli/utils/api_config.py +505 -0
- arionxiv/cli/utils/command_suggestions.py +147 -0
- arionxiv/cli/utils/db_config_manager.py +254 -0
- arionxiv/github_actions_runner.py +206 -0
- arionxiv/main.py +23 -0
- arionxiv/prompts/__init__.py +9 -0
- arionxiv/prompts/prompts.py +247 -0
- arionxiv/rag_techniques/__init__.py +8 -0
- arionxiv/rag_techniques/basic_rag.py +1531 -0
- arionxiv/scheduler_daemon.py +139 -0
- arionxiv/server.py +1000 -0
- arionxiv/server_main.py +24 -0
- arionxiv/services/__init__.py +73 -0
- arionxiv/services/llm_client.py +30 -0
- arionxiv/services/llm_inference/__init__.py +58 -0
- arionxiv/services/llm_inference/groq_client.py +469 -0
- arionxiv/services/llm_inference/llm_utils.py +250 -0
- arionxiv/services/llm_inference/openrouter_client.py +564 -0
- arionxiv/services/unified_analysis_service.py +872 -0
- arionxiv/services/unified_auth_service.py +457 -0
- arionxiv/services/unified_config_service.py +456 -0
- arionxiv/services/unified_daily_dose_service.py +823 -0
- arionxiv/services/unified_database_service.py +1633 -0
- arionxiv/services/unified_llm_service.py +366 -0
- arionxiv/services/unified_paper_service.py +604 -0
- arionxiv/services/unified_pdf_service.py +522 -0
- arionxiv/services/unified_prompt_service.py +344 -0
- arionxiv/services/unified_scheduler_service.py +589 -0
- arionxiv/services/unified_user_service.py +954 -0
- arionxiv/utils/__init__.py +51 -0
- arionxiv/utils/api_helpers.py +200 -0
- arionxiv/utils/file_cleanup.py +150 -0
- arionxiv/utils/ip_helper.py +96 -0
- arionxiv-1.0.32.dist-info/METADATA +336 -0
- arionxiv-1.0.32.dist-info/RECORD +69 -0
- arionxiv-1.0.32.dist-info/WHEEL +5 -0
- arionxiv-1.0.32.dist-info/entry_points.txt +4 -0
- arionxiv-1.0.32.dist-info/licenses/LICENSE +21 -0
- arionxiv-1.0.32.dist-info/top_level.txt +1 -0
arionxiv/server_main.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Server entry point for ArionXiv package
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
import asyncio
|
|
7
|
+
import logging
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
def main():
|
|
12
|
+
"""Main entry point for the arionxiv-server command"""
|
|
13
|
+
try:
|
|
14
|
+
from .server import main as server_main
|
|
15
|
+
asyncio.run(server_main())
|
|
16
|
+
except KeyboardInterrupt:
|
|
17
|
+
logger.info("Server stopped by user")
|
|
18
|
+
sys.exit(0)
|
|
19
|
+
except Exception as e:
|
|
20
|
+
logger.error(f"Server error: {e}", exc_info=True)
|
|
21
|
+
sys.exit(1)
|
|
22
|
+
|
|
23
|
+
if __name__ == "__main__":
|
|
24
|
+
main()
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Services module for ArionXiv
|
|
3
|
+
|
|
4
|
+
This module contains all the core service classes for paper analysis,
|
|
5
|
+
database operations, configuration management, and more.
|
|
6
|
+
|
|
7
|
+
NOTE: Services are lazily imported to avoid requiring fastapi for CLI usage.
|
|
8
|
+
The auth_service requires fastapi and is only needed for server/API functionality.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
# Core services that don't require fastapi - import directly
|
|
12
|
+
from .unified_config_service import config
|
|
13
|
+
from .unified_database_service import database_service
|
|
14
|
+
from .unified_paper_service import paper_service
|
|
15
|
+
from .unified_pdf_service import pdf_service
|
|
16
|
+
from .unified_prompt_service import prompt_service
|
|
17
|
+
|
|
18
|
+
# LLM Inference clients (new organized location)
|
|
19
|
+
from .llm_inference import groq_client, GroqClient, create_groq_client
|
|
20
|
+
from .llm_inference import OPENROUTER_AVAILABLE
|
|
21
|
+
|
|
22
|
+
if OPENROUTER_AVAILABLE:
|
|
23
|
+
from .llm_inference import openrouter_client, OpenRouterClient, get_openrouter_client
|
|
24
|
+
else:
|
|
25
|
+
openrouter_client = None
|
|
26
|
+
OpenRouterClient = None
|
|
27
|
+
get_openrouter_client = None
|
|
28
|
+
|
|
29
|
+
# Backward compatibility
|
|
30
|
+
from .llm_client import llm_client, LLMClient, create_llm_client
|
|
31
|
+
|
|
32
|
+
# Lazy imports for services that have heavy dependencies (fastapi, etc.)
|
|
33
|
+
# These are only loaded when actually accessed
|
|
34
|
+
_lazy_imports = {
|
|
35
|
+
"auth_service": ".unified_auth_service",
|
|
36
|
+
"llm_service": ".unified_llm_service",
|
|
37
|
+
"analysis_service": ".unified_analysis_service",
|
|
38
|
+
"trigger_user_daily_dose": ".unified_scheduler_service",
|
|
39
|
+
"unified_scheduler": ".unified_scheduler_service",
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
def __getattr__(name):
|
|
43
|
+
"""Lazy import of services with heavy dependencies."""
|
|
44
|
+
if name in _lazy_imports:
|
|
45
|
+
module_path = _lazy_imports[name]
|
|
46
|
+
import importlib
|
|
47
|
+
module = importlib.import_module(module_path, package=__name__)
|
|
48
|
+
return getattr(module, name)
|
|
49
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
50
|
+
|
|
51
|
+
__all__ = [
|
|
52
|
+
"config",
|
|
53
|
+
"database_service",
|
|
54
|
+
"paper_service",
|
|
55
|
+
"analysis_service",
|
|
56
|
+
"pdf_service",
|
|
57
|
+
"auth_service",
|
|
58
|
+
"llm_service",
|
|
59
|
+
"trigger_user_daily_dose",
|
|
60
|
+
"unified_scheduler",
|
|
61
|
+
"prompt_service",
|
|
62
|
+
# LLM clients
|
|
63
|
+
"groq_client",
|
|
64
|
+
"GroqClient",
|
|
65
|
+
"create_groq_client",
|
|
66
|
+
"openrouter_client",
|
|
67
|
+
"OpenRouterClient",
|
|
68
|
+
"get_openrouter_client",
|
|
69
|
+
# Backward compatibility
|
|
70
|
+
"llm_client",
|
|
71
|
+
"LLMClient",
|
|
72
|
+
"create_llm_client",
|
|
73
|
+
]
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Backward compatibility shim - imports from new location
|
|
2
|
+
# The LLM clients have been moved to arionxiv/services/llm_inference/
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
DEPRECATED: This module is kept for backward compatibility only.
|
|
6
|
+
Please import from arionxiv.services.llm_inference instead:
|
|
7
|
+
|
|
8
|
+
from arionxiv.services.llm_inference import groq_client, GroqClient
|
|
9
|
+
from arionxiv.services.llm_inference import openrouter_client, OpenRouterClient
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from .llm_inference.groq_client import (
|
|
13
|
+
GroqClient,
|
|
14
|
+
create_groq_client,
|
|
15
|
+
groq_client,
|
|
16
|
+
# Backward compatibility aliases
|
|
17
|
+
LLMClient,
|
|
18
|
+
llm_client,
|
|
19
|
+
create_llm_client,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# Re-export for backward compatibility
|
|
23
|
+
__all__ = [
|
|
24
|
+
'GroqClient',
|
|
25
|
+
'create_groq_client',
|
|
26
|
+
'groq_client',
|
|
27
|
+
'LLMClient',
|
|
28
|
+
'llm_client',
|
|
29
|
+
'create_llm_client',
|
|
30
|
+
]
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM Inference Module for ArionXiv
|
|
3
|
+
|
|
4
|
+
Provides unified access to multiple LLM providers:
|
|
5
|
+
- Groq (fast inference, Llama models)
|
|
6
|
+
- OpenRouter (access to free models like Kimi K2, DeepSeek, etc.)
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
from arionxiv.services.llm_inference import groq_client, openrouter_client
|
|
10
|
+
|
|
11
|
+
# Use Groq for fast inference
|
|
12
|
+
result = await groq_client.get_completion(prompt)
|
|
13
|
+
|
|
14
|
+
# Use OpenRouter for free models
|
|
15
|
+
result = await openrouter_client.chat(message, context)
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
# Shared utilities
|
|
19
|
+
from .llm_utils import (
|
|
20
|
+
parse_json_response,
|
|
21
|
+
generate_cache_key,
|
|
22
|
+
generate_paper_cache_key,
|
|
23
|
+
format_paper_metadata,
|
|
24
|
+
AsyncLRUCache,
|
|
25
|
+
sanitize_arxiv_id,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# Groq client (primary LLM provider)
|
|
29
|
+
from .groq_client import GroqClient, create_groq_client, groq_client
|
|
30
|
+
|
|
31
|
+
# OpenRouter client (free models)
|
|
32
|
+
try:
|
|
33
|
+
from .openrouter_client import OpenRouterClient, get_openrouter_client, openrouter_client
|
|
34
|
+
OPENROUTER_AVAILABLE = True
|
|
35
|
+
except ImportError:
|
|
36
|
+
OPENROUTER_AVAILABLE = False
|
|
37
|
+
OpenRouterClient = None
|
|
38
|
+
get_openrouter_client = None
|
|
39
|
+
openrouter_client = None
|
|
40
|
+
|
|
41
|
+
__all__ = [
|
|
42
|
+
# Shared utilities
|
|
43
|
+
'parse_json_response',
|
|
44
|
+
'generate_cache_key',
|
|
45
|
+
'generate_paper_cache_key',
|
|
46
|
+
'format_paper_metadata',
|
|
47
|
+
'AsyncLRUCache',
|
|
48
|
+
'sanitize_arxiv_id',
|
|
49
|
+
# Groq
|
|
50
|
+
'GroqClient',
|
|
51
|
+
'create_groq_client',
|
|
52
|
+
'groq_client',
|
|
53
|
+
# OpenRouter
|
|
54
|
+
'OpenRouterClient',
|
|
55
|
+
'get_openrouter_client',
|
|
56
|
+
'openrouter_client',
|
|
57
|
+
'OPENROUTER_AVAILABLE',
|
|
58
|
+
]
|
|
@@ -0,0 +1,469 @@
|
|
|
1
|
+
# Groq LLM client for AI-powered paper analysis
|
|
2
|
+
from typing import Dict, Any, List, Optional, Tuple
|
|
3
|
+
from collections import OrderedDict
|
|
4
|
+
import logging
|
|
5
|
+
import json
|
|
6
|
+
import asyncio
|
|
7
|
+
import os
|
|
8
|
+
from datetime import datetime, timedelta
|
|
9
|
+
from functools import lru_cache
|
|
10
|
+
import time
|
|
11
|
+
from groq import AsyncGroq
|
|
12
|
+
from dotenv import load_dotenv
|
|
13
|
+
import httpx
|
|
14
|
+
from rich.console import Console
|
|
15
|
+
|
|
16
|
+
## File imports
|
|
17
|
+
from ...prompts import format_prompt
|
|
18
|
+
from .llm_utils import parse_json_response, generate_cache_key, generate_paper_cache_key, format_paper_metadata
|
|
19
|
+
|
|
20
|
+
load_dotenv()
|
|
21
|
+
|
|
22
|
+
# ============================================================================
|
|
23
|
+
# LOGGER CONFIGURATION
|
|
24
|
+
# ============================================================================
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
# ============================================================================
|
|
29
|
+
# GROQ CLIENT DEFINITION
|
|
30
|
+
# ============================================================================
|
|
31
|
+
|
|
32
|
+
class GroqClient:
|
|
33
|
+
"""
|
|
34
|
+
Production-ready client for LLM-based paper analysis using Groq
|
|
35
|
+
|
|
36
|
+
Features:
|
|
37
|
+
- Rate limiting and concurrency control with async safety
|
|
38
|
+
- Connection pooling and timeout handling
|
|
39
|
+
- Thread-safe LRU caching with TTL support
|
|
40
|
+
- Retry logic with exponential backoff and rate limit handling
|
|
41
|
+
- Token usage tracking and monitoring
|
|
42
|
+
- Structured logging
|
|
43
|
+
- Async context manager support for proper resource cleanup
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
# Model configuration with context limits
|
|
47
|
+
MODEL_LIMITS = {
|
|
48
|
+
"llama-3.3-70b-versatile": {"max_tokens": 32768, "optimal_completion": 8000, "rpm": 30, "tpm": 14400},
|
|
49
|
+
"llama-3.1-70b-versatile": {"max_tokens": 32768, "optimal_completion": 8000, "rpm": 30, "tpm": 14400},
|
|
50
|
+
"llama-3.1-8b-instant": {"max_tokens": 8192, "optimal_completion": 4000, "rpm": 30, "tpm": 14400},
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
def __init__(self, max_concurrent_requests: int = 5, enable_cache: bool = True, cache_ttl_hours: int = 24, console: Console = None):
|
|
54
|
+
"""
|
|
55
|
+
Initialize Groq client with production-ready configuration
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
max_concurrent_requests: Maximum number of concurrent API requests (consider model RPM limits)
|
|
59
|
+
enable_cache: Enable LRU caching for repeated analyses
|
|
60
|
+
cache_ttl_hours: Time-to-live for cache entries in hours (default: 24)
|
|
61
|
+
console: Rich console for output (optional)
|
|
62
|
+
"""
|
|
63
|
+
# API configuration - lazy loaded
|
|
64
|
+
self._api_key = None
|
|
65
|
+
self._api_key_checked = False
|
|
66
|
+
self.model = os.getenv("DEFAULT_ANALYSIS_MODEL", "llama-3.3-70b-versatile")
|
|
67
|
+
self.timeout = 60
|
|
68
|
+
self._console = console or Console()
|
|
69
|
+
|
|
70
|
+
# Concurrency control
|
|
71
|
+
self._max_concurrent_requests = max_concurrent_requests
|
|
72
|
+
self.semaphore = asyncio.Semaphore(max_concurrent_requests)
|
|
73
|
+
self.max_retries = 3
|
|
74
|
+
|
|
75
|
+
# Thread-safe caching with TTL
|
|
76
|
+
self.enable_cache = enable_cache
|
|
77
|
+
self.cache: OrderedDict[str, Tuple[Any, datetime]] = OrderedDict()
|
|
78
|
+
self.cache_max_size = 100
|
|
79
|
+
self.cache_ttl = timedelta(hours=cache_ttl_hours)
|
|
80
|
+
self.cache_lock = asyncio.Lock()
|
|
81
|
+
|
|
82
|
+
# Monitoring and metrics
|
|
83
|
+
self.total_tokens_used = 0
|
|
84
|
+
self.total_requests = 0
|
|
85
|
+
self.total_cache_hits = 0
|
|
86
|
+
self.total_errors = 0
|
|
87
|
+
|
|
88
|
+
# Client initialization - lazy loaded
|
|
89
|
+
self._client = None
|
|
90
|
+
self._client_initialized = False
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def api_key(self):
|
|
94
|
+
"""Lazy load API key"""
|
|
95
|
+
if not self._api_key_checked:
|
|
96
|
+
self._api_key = os.getenv("GROQ_API_KEY")
|
|
97
|
+
self._api_key_checked = True
|
|
98
|
+
return self._api_key
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
def client(self):
|
|
102
|
+
"""Lazy initialize Groq client"""
|
|
103
|
+
if not self._client_initialized:
|
|
104
|
+
self._client_initialized = True
|
|
105
|
+
if self.api_key:
|
|
106
|
+
try:
|
|
107
|
+
self._client = AsyncGroq(
|
|
108
|
+
api_key=self.api_key,
|
|
109
|
+
max_retries=2,
|
|
110
|
+
timeout=httpx.Timeout(60.0, connect=5.0)
|
|
111
|
+
)
|
|
112
|
+
logger.debug("Groq client initialized", extra={"model": self.model})
|
|
113
|
+
except Exception as e:
|
|
114
|
+
logger.error("Failed to initialize Groq client", extra={"error": str(e)})
|
|
115
|
+
self._client = None
|
|
116
|
+
return self._client
|
|
117
|
+
|
|
118
|
+
@property
|
|
119
|
+
def is_available(self) -> bool:
|
|
120
|
+
"""Check if the client is properly configured"""
|
|
121
|
+
return self.client is not None
|
|
122
|
+
|
|
123
|
+
def get_model_name(self) -> str:
|
|
124
|
+
"""Get the current model name"""
|
|
125
|
+
return self.model
|
|
126
|
+
|
|
127
|
+
def get_model_display_name(self) -> str:
|
|
128
|
+
"""Get a user-friendly model display name"""
|
|
129
|
+
# Extract model name
|
|
130
|
+
model_name = self.model
|
|
131
|
+
if "-" in model_name:
|
|
132
|
+
parts = model_name.split("-")
|
|
133
|
+
# e.g., "llama-3.3-70b-versatile" -> "Llama 3.3 70B"
|
|
134
|
+
if len(parts) >= 3:
|
|
135
|
+
return f"{parts[0].title()} {parts[1]} {parts[2].upper()}"
|
|
136
|
+
return model_name.title()
|
|
137
|
+
|
|
138
|
+
async def __aenter__(self):
|
|
139
|
+
"""Async context manager entry"""
|
|
140
|
+
return self
|
|
141
|
+
|
|
142
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
143
|
+
"""Async context manager exit with proper resource cleanup"""
|
|
144
|
+
try:
|
|
145
|
+
if self.client and hasattr(self.client, 'aclose'):
|
|
146
|
+
await self.client.aclose()
|
|
147
|
+
async with self.cache_lock:
|
|
148
|
+
self.cache.clear()
|
|
149
|
+
logger.info("Groq client closed and resources cleaned up")
|
|
150
|
+
except Exception as e:
|
|
151
|
+
logger.error(f"Error during client cleanup: {str(e)}")
|
|
152
|
+
|
|
153
|
+
async def close(self):
|
|
154
|
+
"""Explicitly close the client"""
|
|
155
|
+
await self.__aexit__(None, None, None)
|
|
156
|
+
|
|
157
|
+
def _generate_cache_key(self, content: str, prompt_type: str) -> str:
|
|
158
|
+
"""Generate cache key from content and prompt type - delegates to shared utility"""
|
|
159
|
+
return generate_cache_key(content, prompt_type)
|
|
160
|
+
|
|
161
|
+
def _generate_paper_cache_key(self, paper: Dict[str, Any]) -> str:
|
|
162
|
+
"""Generate unique cache key for a paper - delegates to shared utility"""
|
|
163
|
+
return generate_paper_cache_key(paper)
|
|
164
|
+
|
|
165
|
+
def _format_paper_metadata(self, paper: Dict[str, Any], index: Optional[int] = None) -> str:
|
|
166
|
+
"""Format paper metadata - delegates to shared utility"""
|
|
167
|
+
return format_paper_metadata(paper, index)
|
|
168
|
+
|
|
169
|
+
async def _get_from_cache(self, cache_key: str) -> Optional[Any]:
|
|
170
|
+
"""Retrieve result from cache if available and not expired"""
|
|
171
|
+
if not self.enable_cache:
|
|
172
|
+
return None
|
|
173
|
+
|
|
174
|
+
async with self.cache_lock:
|
|
175
|
+
if cache_key not in self.cache:
|
|
176
|
+
return None
|
|
177
|
+
|
|
178
|
+
result, timestamp = self.cache[cache_key]
|
|
179
|
+
|
|
180
|
+
if datetime.now() - timestamp > self.cache_ttl:
|
|
181
|
+
del self.cache[cache_key]
|
|
182
|
+
return None
|
|
183
|
+
|
|
184
|
+
self.cache.move_to_end(cache_key)
|
|
185
|
+
self.total_cache_hits += 1
|
|
186
|
+
return result
|
|
187
|
+
|
|
188
|
+
async def _add_to_cache(self, cache_key: str, result: Any) -> None:
|
|
189
|
+
"""Add result to cache with TTL and LRU eviction"""
|
|
190
|
+
if not self.enable_cache:
|
|
191
|
+
return
|
|
192
|
+
|
|
193
|
+
async with self.cache_lock:
|
|
194
|
+
if cache_key in self.cache:
|
|
195
|
+
self.cache[cache_key] = (result, datetime.now())
|
|
196
|
+
self.cache.move_to_end(cache_key)
|
|
197
|
+
else:
|
|
198
|
+
if len(self.cache) >= self.cache_max_size:
|
|
199
|
+
oldest_key = next(iter(self.cache))
|
|
200
|
+
self.cache.pop(oldest_key)
|
|
201
|
+
|
|
202
|
+
self.cache[cache_key] = (result, datetime.now())
|
|
203
|
+
|
|
204
|
+
def _parse_json_response(self, response_content: str, max_retries: int = 3) -> Dict[str, Any]:
|
|
205
|
+
"""Parse JSON response - delegates to shared utility"""
|
|
206
|
+
return parse_json_response(response_content, max_retries)
|
|
207
|
+
|
|
208
|
+
async def _api_call_with_retry(
|
|
209
|
+
self,
|
|
210
|
+
messages: List[Dict[str, str]],
|
|
211
|
+
temperature: float = 0.2,
|
|
212
|
+
max_tokens: int = 8000
|
|
213
|
+
) -> Any:
|
|
214
|
+
"""Make API call with retry logic and exponential backoff"""
|
|
215
|
+
if not self.client:
|
|
216
|
+
raise ValueError("Service temporarily unavailable. Please try again later.")
|
|
217
|
+
|
|
218
|
+
model_limits = self.MODEL_LIMITS.get(self.model, {"optimal_completion": 4000, "max_tokens": 32768})
|
|
219
|
+
max_tokens = min(max_tokens, model_limits["optimal_completion"])
|
|
220
|
+
|
|
221
|
+
for attempt in range(self.max_retries):
|
|
222
|
+
try:
|
|
223
|
+
async with self.semaphore:
|
|
224
|
+
response = await asyncio.wait_for(
|
|
225
|
+
self.client.chat.completions.create(
|
|
226
|
+
model=self.model,
|
|
227
|
+
messages=messages,
|
|
228
|
+
temperature=temperature,
|
|
229
|
+
max_tokens=max_tokens
|
|
230
|
+
),
|
|
231
|
+
timeout=self.timeout
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
self.total_requests += 1
|
|
235
|
+
if hasattr(response, 'usage'):
|
|
236
|
+
self.total_tokens_used += response.usage.total_tokens
|
|
237
|
+
|
|
238
|
+
return response
|
|
239
|
+
|
|
240
|
+
except asyncio.TimeoutError:
|
|
241
|
+
logger.error(f"Request timed out (attempt {attempt + 1})")
|
|
242
|
+
if attempt == self.max_retries - 1:
|
|
243
|
+
self.total_errors += 1
|
|
244
|
+
raise
|
|
245
|
+
await asyncio.sleep(2 ** attempt)
|
|
246
|
+
|
|
247
|
+
except Exception as e:
|
|
248
|
+
# Check for rate limit error (429)
|
|
249
|
+
if hasattr(e, 'status_code') and e.status_code == 429:
|
|
250
|
+
retry_after = getattr(e, 'headers', {}).get('retry-after', 2 ** attempt)
|
|
251
|
+
try:
|
|
252
|
+
retry_after = float(retry_after)
|
|
253
|
+
except (ValueError, TypeError):
|
|
254
|
+
retry_after = 2 ** attempt
|
|
255
|
+
|
|
256
|
+
await asyncio.sleep(retry_after)
|
|
257
|
+
continue
|
|
258
|
+
|
|
259
|
+
if attempt == self.max_retries - 1:
|
|
260
|
+
self.total_errors += 1
|
|
261
|
+
raise
|
|
262
|
+
await asyncio.sleep(2 ** attempt)
|
|
263
|
+
|
|
264
|
+
raise Exception("Max retries exceeded")
|
|
265
|
+
|
|
266
|
+
async def get_completion(self, prompt: str, temperature: float = 0.7, max_tokens: int = 8000) -> str:
|
|
267
|
+
"""
|
|
268
|
+
Get a completion from the LLM for a given prompt
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
prompt: Input prompt string
|
|
272
|
+
temperature: Sampling temperature
|
|
273
|
+
max_tokens: Maximum tokens for completion
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
Completion text string
|
|
277
|
+
"""
|
|
278
|
+
try:
|
|
279
|
+
if not self.client:
|
|
280
|
+
return "Service temporarily unavailable"
|
|
281
|
+
|
|
282
|
+
response = await self._api_call_with_retry(
|
|
283
|
+
messages=[{"role": "user", "content": prompt}],
|
|
284
|
+
temperature=temperature,
|
|
285
|
+
max_tokens=max_tokens
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
return response.choices[0].message.content
|
|
289
|
+
|
|
290
|
+
except Exception as e:
|
|
291
|
+
logger.error(f"Groq completion failed: {str(e)}")
|
|
292
|
+
return "Sorry, I encountered an error while processing your request. Please try again."
|
|
293
|
+
|
|
294
|
+
async def get_json_completion(
|
|
295
|
+
self,
|
|
296
|
+
prompt: str,
|
|
297
|
+
system_message: str = None,
|
|
298
|
+
temperature: float = 0.2,
|
|
299
|
+
max_tokens: int = 8000
|
|
300
|
+
) -> Dict[str, Any]:
|
|
301
|
+
"""Get a JSON-formatted completion from the model"""
|
|
302
|
+
messages = []
|
|
303
|
+
|
|
304
|
+
if system_message:
|
|
305
|
+
messages.append({"role": "system", "content": system_message})
|
|
306
|
+
|
|
307
|
+
messages.append({"role": "user", "content": prompt})
|
|
308
|
+
|
|
309
|
+
response = await self._api_call_with_retry(
|
|
310
|
+
messages=messages,
|
|
311
|
+
temperature=temperature,
|
|
312
|
+
max_tokens=max_tokens
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
content = response.choices[0].message.content
|
|
316
|
+
return self._parse_json_response(content)
|
|
317
|
+
|
|
318
|
+
async def analyze_paper(self, content: str) -> Dict[str, Any]:
|
|
319
|
+
"""Analyze a single paper using Groq LLM"""
|
|
320
|
+
start_time = time.time()
|
|
321
|
+
|
|
322
|
+
try:
|
|
323
|
+
if not content.strip():
|
|
324
|
+
return {"analysis": "No content provided for analysis."}
|
|
325
|
+
|
|
326
|
+
if not self.client:
|
|
327
|
+
return {"error": "Service temporarily unavailable", "success": False}
|
|
328
|
+
|
|
329
|
+
cache_key = self._generate_cache_key(content, "paper_analysis")
|
|
330
|
+
cached_result = await self._get_from_cache(cache_key)
|
|
331
|
+
if cached_result:
|
|
332
|
+
return cached_result
|
|
333
|
+
|
|
334
|
+
prompt = format_prompt("comprehensive_paper_analysis", content=content)
|
|
335
|
+
|
|
336
|
+
response = await self._api_call_with_retry(
|
|
337
|
+
messages=[{"role": "user", "content": prompt}],
|
|
338
|
+
temperature=0.2,
|
|
339
|
+
max_tokens=8000
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
response_content = response.choices[0].message.content
|
|
343
|
+
analysis = self._parse_json_response(response_content)
|
|
344
|
+
|
|
345
|
+
await self._add_to_cache(cache_key, analysis)
|
|
346
|
+
|
|
347
|
+
return analysis
|
|
348
|
+
|
|
349
|
+
except Exception as e:
|
|
350
|
+
logger.error(f"Paper analysis failed: {str(e)}")
|
|
351
|
+
raise
|
|
352
|
+
|
|
353
|
+
async def generate_insights(self, papers: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
354
|
+
"""Generate cross-paper insights"""
|
|
355
|
+
try:
|
|
356
|
+
if not papers:
|
|
357
|
+
return {"message": "No papers provided for insight generation"}
|
|
358
|
+
|
|
359
|
+
if not self.client:
|
|
360
|
+
return {"error": "Service temporarily unavailable", "success": False}
|
|
361
|
+
|
|
362
|
+
papers_to_analyze = papers[:10]
|
|
363
|
+
papers_summary = []
|
|
364
|
+
|
|
365
|
+
for i, paper in enumerate(papers_to_analyze):
|
|
366
|
+
metadata = self._format_paper_metadata(paper, index=i+1)
|
|
367
|
+
papers_summary.append(metadata)
|
|
368
|
+
|
|
369
|
+
papers_data = f"Papers analyzed ({len(papers)} total):\n\n{chr(10).join(papers_summary)}"
|
|
370
|
+
prompt = format_prompt("trend_analysis", papers_data=papers_data)
|
|
371
|
+
|
|
372
|
+
response = await self._api_call_with_retry(
|
|
373
|
+
messages=[{"role": "user", "content": prompt}],
|
|
374
|
+
temperature=0.4,
|
|
375
|
+
max_tokens=8000
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
return self._parse_json_response(response.choices[0].message.content)
|
|
379
|
+
|
|
380
|
+
except Exception as e:
|
|
381
|
+
logger.error(f"Insight generation failed: {str(e)}")
|
|
382
|
+
raise
|
|
383
|
+
|
|
384
|
+
async def summarize_collection(self, papers: List[Dict[str, Any]]) -> str:
|
|
385
|
+
"""Generate a concise summary for a collection of papers"""
|
|
386
|
+
try:
|
|
387
|
+
if not papers:
|
|
388
|
+
return "No papers provided for summarization"
|
|
389
|
+
|
|
390
|
+
if not self.client:
|
|
391
|
+
return "Service temporarily unavailable"
|
|
392
|
+
|
|
393
|
+
papers_to_summarize = papers[:15]
|
|
394
|
+
papers_info = []
|
|
395
|
+
|
|
396
|
+
for paper in papers_to_summarize:
|
|
397
|
+
title = paper.get('title', 'Unknown')
|
|
398
|
+
abstract = paper.get('abstract', 'No abstract')
|
|
399
|
+
papers_info.append(f"- {title}: {abstract[:150]}...")
|
|
400
|
+
|
|
401
|
+
papers_data = chr(10).join(papers_info)
|
|
402
|
+
prompt = format_prompt("paper_summary", papers_data=papers_data)
|
|
403
|
+
|
|
404
|
+
response = await self._api_call_with_retry(
|
|
405
|
+
messages=[{"role": "user", "content": prompt}],
|
|
406
|
+
temperature=0.3,
|
|
407
|
+
max_tokens=8000
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
return response.choices[0].message.content.strip()
|
|
411
|
+
|
|
412
|
+
except Exception as e:
|
|
413
|
+
logger.error(f"Collection summarization failed: {str(e)}")
|
|
414
|
+
return f"Collection of {len(papers)} papers covering diverse topics."
|
|
415
|
+
|
|
416
|
+
def get_status(self) -> Dict[str, Any]:
|
|
417
|
+
"""Get client status and metrics"""
|
|
418
|
+
return {
|
|
419
|
+
"configured": self.client is not None,
|
|
420
|
+
"model": self.model,
|
|
421
|
+
"api_service": "Groq",
|
|
422
|
+
"timeout": self.timeout,
|
|
423
|
+
"metrics": {
|
|
424
|
+
"total_requests": self.total_requests,
|
|
425
|
+
"total_tokens_used": self.total_tokens_used,
|
|
426
|
+
"cache_hits": self.total_cache_hits,
|
|
427
|
+
"total_errors": self.total_errors,
|
|
428
|
+
"cache_size": len(self.cache)
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
def get_metrics(self) -> Dict[str, Any]:
|
|
433
|
+
"""Get detailed performance metrics"""
|
|
434
|
+
total_operations = self.total_requests + self.total_cache_hits
|
|
435
|
+
cache_hit_rate = (self.total_cache_hits / total_operations * 100) if total_operations > 0 else 0.0
|
|
436
|
+
|
|
437
|
+
return {
|
|
438
|
+
"total_requests": self.total_requests,
|
|
439
|
+
"total_tokens_used": self.total_tokens_used,
|
|
440
|
+
"cache_hits": self.total_cache_hits,
|
|
441
|
+
"cache_hit_rate_percent": round(cache_hit_rate, 2),
|
|
442
|
+
"total_errors": self.total_errors,
|
|
443
|
+
"model": self.model
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
# ============================================================================
|
|
448
|
+
# FACTORY AND SINGLETON
|
|
449
|
+
# ============================================================================
|
|
450
|
+
|
|
451
|
+
def create_groq_client(
|
|
452
|
+
max_concurrent_requests: int = 5,
|
|
453
|
+
enable_cache: bool = True,
|
|
454
|
+
cache_ttl_hours: int = 24
|
|
455
|
+
) -> GroqClient:
|
|
456
|
+
"""Factory function to create Groq client instances"""
|
|
457
|
+
return GroqClient(
|
|
458
|
+
max_concurrent_requests=max_concurrent_requests,
|
|
459
|
+
enable_cache=enable_cache,
|
|
460
|
+
cache_ttl_hours=cache_ttl_hours
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
# Global singleton instance
|
|
464
|
+
groq_client = create_groq_client()
|
|
465
|
+
|
|
466
|
+
# Backward compatibility alias
|
|
467
|
+
LLMClient = GroqClient
|
|
468
|
+
llm_client = groq_client
|
|
469
|
+
create_llm_client = create_groq_client
|