arionxiv 1.0.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arionxiv/__init__.py +40 -0
- arionxiv/__main__.py +10 -0
- arionxiv/arxiv_operations/__init__.py +0 -0
- arionxiv/arxiv_operations/client.py +225 -0
- arionxiv/arxiv_operations/fetcher.py +173 -0
- arionxiv/arxiv_operations/searcher.py +122 -0
- arionxiv/arxiv_operations/utils.py +293 -0
- arionxiv/cli/__init__.py +4 -0
- arionxiv/cli/commands/__init__.py +1 -0
- arionxiv/cli/commands/analyze.py +587 -0
- arionxiv/cli/commands/auth.py +365 -0
- arionxiv/cli/commands/chat.py +714 -0
- arionxiv/cli/commands/daily.py +482 -0
- arionxiv/cli/commands/fetch.py +217 -0
- arionxiv/cli/commands/library.py +295 -0
- arionxiv/cli/commands/preferences.py +426 -0
- arionxiv/cli/commands/search.py +254 -0
- arionxiv/cli/commands/settings_unified.py +1407 -0
- arionxiv/cli/commands/trending.py +41 -0
- arionxiv/cli/commands/welcome.py +168 -0
- arionxiv/cli/main.py +407 -0
- arionxiv/cli/ui/__init__.py +1 -0
- arionxiv/cli/ui/global_theme_manager.py +173 -0
- arionxiv/cli/ui/logo.py +127 -0
- arionxiv/cli/ui/splash.py +89 -0
- arionxiv/cli/ui/theme.py +32 -0
- arionxiv/cli/ui/theme_system.py +391 -0
- arionxiv/cli/utils/__init__.py +54 -0
- arionxiv/cli/utils/animations.py +522 -0
- arionxiv/cli/utils/api_client.py +583 -0
- arionxiv/cli/utils/api_config.py +505 -0
- arionxiv/cli/utils/command_suggestions.py +147 -0
- arionxiv/cli/utils/db_config_manager.py +254 -0
- arionxiv/github_actions_runner.py +206 -0
- arionxiv/main.py +23 -0
- arionxiv/prompts/__init__.py +9 -0
- arionxiv/prompts/prompts.py +247 -0
- arionxiv/rag_techniques/__init__.py +8 -0
- arionxiv/rag_techniques/basic_rag.py +1531 -0
- arionxiv/scheduler_daemon.py +139 -0
- arionxiv/server.py +1000 -0
- arionxiv/server_main.py +24 -0
- arionxiv/services/__init__.py +73 -0
- arionxiv/services/llm_client.py +30 -0
- arionxiv/services/llm_inference/__init__.py +58 -0
- arionxiv/services/llm_inference/groq_client.py +469 -0
- arionxiv/services/llm_inference/llm_utils.py +250 -0
- arionxiv/services/llm_inference/openrouter_client.py +564 -0
- arionxiv/services/unified_analysis_service.py +872 -0
- arionxiv/services/unified_auth_service.py +457 -0
- arionxiv/services/unified_config_service.py +456 -0
- arionxiv/services/unified_daily_dose_service.py +823 -0
- arionxiv/services/unified_database_service.py +1633 -0
- arionxiv/services/unified_llm_service.py +366 -0
- arionxiv/services/unified_paper_service.py +604 -0
- arionxiv/services/unified_pdf_service.py +522 -0
- arionxiv/services/unified_prompt_service.py +344 -0
- arionxiv/services/unified_scheduler_service.py +589 -0
- arionxiv/services/unified_user_service.py +954 -0
- arionxiv/utils/__init__.py +51 -0
- arionxiv/utils/api_helpers.py +200 -0
- arionxiv/utils/file_cleanup.py +150 -0
- arionxiv/utils/ip_helper.py +96 -0
- arionxiv-1.0.32.dist-info/METADATA +336 -0
- arionxiv-1.0.32.dist-info/RECORD +69 -0
- arionxiv-1.0.32.dist-info/WHEEL +5 -0
- arionxiv-1.0.32.dist-info/entry_points.txt +4 -0
- arionxiv-1.0.32.dist-info/licenses/LICENSE +21 -0
- arionxiv-1.0.32.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Shared utilities for LLM inference clients
|
|
3
|
+
Consolidates common functionality to avoid code duplication
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import re
|
|
8
|
+
import hashlib
|
|
9
|
+
import logging
|
|
10
|
+
from typing import Dict, Any, Optional, List, Tuple
|
|
11
|
+
from datetime import datetime, timedelta
|
|
12
|
+
from collections import OrderedDict
|
|
13
|
+
import asyncio
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def parse_json_response(response_content: str, max_retries: int = 3) -> Dict[str, Any]:
|
|
19
|
+
"""
|
|
20
|
+
Parse JSON response with retry logic and fallback handling.
|
|
21
|
+
|
|
22
|
+
Shared utility for all LLM clients to parse JSON from potentially
|
|
23
|
+
markdown-wrapped or malformed responses.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
response_content: Raw response from LLM
|
|
27
|
+
max_retries: Number of parsing attempts
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
Parsed JSON as dictionary, or fallback response on failure
|
|
31
|
+
"""
|
|
32
|
+
original_content = response_content
|
|
33
|
+
|
|
34
|
+
for attempt in range(max_retries):
|
|
35
|
+
try:
|
|
36
|
+
clean_content = response_content.strip()
|
|
37
|
+
|
|
38
|
+
# Remove markdown code blocks
|
|
39
|
+
if clean_content.startswith("```"):
|
|
40
|
+
lines = clean_content.split("\n")
|
|
41
|
+
start_idx = 0
|
|
42
|
+
end_idx = len(lines)
|
|
43
|
+
for i, line in enumerate(lines):
|
|
44
|
+
if line.strip().startswith("```") and i == 0:
|
|
45
|
+
start_idx = 1
|
|
46
|
+
elif line.strip() == "```":
|
|
47
|
+
end_idx = i
|
|
48
|
+
break
|
|
49
|
+
clean_content = "\n".join(lines[start_idx:end_idx]).strip()
|
|
50
|
+
if clean_content.startswith("json"):
|
|
51
|
+
clean_content = clean_content[4:].strip()
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
return json.loads(clean_content)
|
|
55
|
+
except json.JSONDecodeError:
|
|
56
|
+
# Try to extract JSON object
|
|
57
|
+
json_match = re.search(r'\{[\s\S]*\}', clean_content)
|
|
58
|
+
if json_match:
|
|
59
|
+
clean_content = json_match.group(0)
|
|
60
|
+
return json.loads(clean_content)
|
|
61
|
+
|
|
62
|
+
except json.JSONDecodeError as e:
|
|
63
|
+
logger.warning(f"JSON parsing attempt {attempt + 1} failed: {str(e)}")
|
|
64
|
+
|
|
65
|
+
if attempt < max_retries - 1:
|
|
66
|
+
# Try to find nested JSON structure
|
|
67
|
+
nested_match = re.search(r'\{["\'](?:summary|analysis)["\'][\s]*:', original_content)
|
|
68
|
+
if nested_match:
|
|
69
|
+
start = nested_match.start()
|
|
70
|
+
brace_count = 0
|
|
71
|
+
for i, char in enumerate(original_content[start:]):
|
|
72
|
+
if char == '{':
|
|
73
|
+
brace_count += 1
|
|
74
|
+
elif char == '}':
|
|
75
|
+
brace_count -= 1
|
|
76
|
+
if brace_count == 0:
|
|
77
|
+
response_content = original_content[start:start + i + 1]
|
|
78
|
+
break
|
|
79
|
+
else:
|
|
80
|
+
response_content = original_content.strip().strip('`').strip()
|
|
81
|
+
continue
|
|
82
|
+
|
|
83
|
+
# Fallback response
|
|
84
|
+
logger.error("JSON parsing failed after all retries")
|
|
85
|
+
raw_text = original_content.strip().replace("```json", "").replace("```", "").strip()
|
|
86
|
+
|
|
87
|
+
return {
|
|
88
|
+
"summary": raw_text[:1000] if len(raw_text) > 100 else "Analysis completed but could not be formatted properly.",
|
|
89
|
+
"raw_response": original_content[:2000],
|
|
90
|
+
"error": "JSON decode failed - displaying raw analysis",
|
|
91
|
+
"key_findings": ["See summary for analysis details"],
|
|
92
|
+
"methodology": "",
|
|
93
|
+
"strengths": [],
|
|
94
|
+
"limitations": [],
|
|
95
|
+
"confidence_score": 0.5
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def generate_cache_key(content: str, prompt_type: str, model: str = "") -> str:
|
|
100
|
+
"""Generate cache key from content, prompt type, and optionally model"""
|
|
101
|
+
cache_input = f"{prompt_type}:{model}:{content[:500]}" if model else f"{prompt_type}:{content[:500]}"
|
|
102
|
+
return hashlib.md5(cache_input.encode()).hexdigest()
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def generate_paper_cache_key(paper: Dict[str, Any]) -> str:
|
|
106
|
+
"""Generate unique cache key for a paper using stable identifiers"""
|
|
107
|
+
paper_id = paper.get('arxiv_id') or paper.get('doi') or paper.get('id')
|
|
108
|
+
|
|
109
|
+
if not paper_id:
|
|
110
|
+
title = paper.get('title', 'Unknown')
|
|
111
|
+
authors = paper.get('authors', [])
|
|
112
|
+
if authors and len(authors) > 0:
|
|
113
|
+
first_author = authors[0] if isinstance(authors, list) else str(authors)
|
|
114
|
+
paper_id = f"{title}:{first_author}"
|
|
115
|
+
else:
|
|
116
|
+
paper_id = title
|
|
117
|
+
|
|
118
|
+
return str(paper_id)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def format_paper_metadata(paper: Dict[str, Any], index: Optional[int] = None) -> str:
|
|
122
|
+
"""Format paper metadata into a standardized string"""
|
|
123
|
+
title = paper.get('title', 'Unknown')
|
|
124
|
+
abstract = paper.get('abstract', 'No abstract available')
|
|
125
|
+
categories = paper.get('categories', [])
|
|
126
|
+
authors = paper.get('authors', [])
|
|
127
|
+
|
|
128
|
+
cat_str = ', '.join(categories[:3]) if categories else 'N/A'
|
|
129
|
+
author_count = len(authors) if isinstance(authors, list) else 0
|
|
130
|
+
|
|
131
|
+
prefix = f"Paper {index}: " if index is not None else ""
|
|
132
|
+
|
|
133
|
+
return (
|
|
134
|
+
f"{prefix}{title}\n"
|
|
135
|
+
f"Categories: {cat_str}\n"
|
|
136
|
+
f"Authors: {author_count} author(s)\n"
|
|
137
|
+
f"Abstract: {abstract}"
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class AsyncLRUCache:
|
|
142
|
+
"""
|
|
143
|
+
Async-safe LRU cache with TTL support.
|
|
144
|
+
Shared utility for all LLM clients.
|
|
145
|
+
"""
|
|
146
|
+
|
|
147
|
+
def __init__(self, max_size: int = 100, ttl_hours: float = 1.0):
|
|
148
|
+
self.cache: OrderedDict[str, Tuple[Any, datetime]] = OrderedDict()
|
|
149
|
+
self.max_size = max_size
|
|
150
|
+
self.ttl = timedelta(hours=ttl_hours)
|
|
151
|
+
self.lock = asyncio.Lock()
|
|
152
|
+
self.hits = 0
|
|
153
|
+
self.misses = 0
|
|
154
|
+
|
|
155
|
+
async def get(self, key: str) -> Optional[Any]:
|
|
156
|
+
"""Get value from cache if exists and not expired"""
|
|
157
|
+
async with self.lock:
|
|
158
|
+
if key not in self.cache:
|
|
159
|
+
self.misses += 1
|
|
160
|
+
return None
|
|
161
|
+
|
|
162
|
+
value, timestamp = self.cache[key]
|
|
163
|
+
|
|
164
|
+
if datetime.now() - timestamp > self.ttl:
|
|
165
|
+
del self.cache[key]
|
|
166
|
+
self.misses += 1
|
|
167
|
+
return None
|
|
168
|
+
|
|
169
|
+
# Move to end (most recently used)
|
|
170
|
+
self.cache.move_to_end(key)
|
|
171
|
+
self.hits += 1
|
|
172
|
+
return value
|
|
173
|
+
|
|
174
|
+
async def set(self, key: str, value: Any) -> None:
|
|
175
|
+
"""Add value to cache with eviction if needed"""
|
|
176
|
+
async with self.lock:
|
|
177
|
+
# Evict oldest if at capacity
|
|
178
|
+
if len(self.cache) >= self.max_size:
|
|
179
|
+
self.cache.popitem(last=False)
|
|
180
|
+
|
|
181
|
+
self.cache[key] = (value, datetime.now())
|
|
182
|
+
|
|
183
|
+
async def clear(self) -> None:
|
|
184
|
+
"""Clear all cache entries"""
|
|
185
|
+
async with self.lock:
|
|
186
|
+
self.cache.clear()
|
|
187
|
+
|
|
188
|
+
@property
|
|
189
|
+
def hit_rate(self) -> float:
|
|
190
|
+
"""Calculate cache hit rate"""
|
|
191
|
+
total = self.hits + self.misses
|
|
192
|
+
return self.hits / total if total > 0 else 0.0
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def sanitize_arxiv_id(arxiv_id: str, remove_version: bool = False) -> str:
|
|
196
|
+
"""
|
|
197
|
+
Sanitize and normalize arXiv ID.
|
|
198
|
+
|
|
199
|
+
Consolidates duplicate implementations from:
|
|
200
|
+
- arxiv_operations/utils.py
|
|
201
|
+
- utils/api_helpers.py
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
arxiv_id: Raw arXiv ID or URL
|
|
205
|
+
remove_version: If True, strips version suffix (v1, v2, etc.)
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
Cleaned arXiv ID
|
|
209
|
+
"""
|
|
210
|
+
if not arxiv_id:
|
|
211
|
+
return arxiv_id
|
|
212
|
+
|
|
213
|
+
arxiv_id = arxiv_id.strip()
|
|
214
|
+
|
|
215
|
+
# Remove common URL prefixes
|
|
216
|
+
prefixes = [
|
|
217
|
+
"https://arxiv.org/abs/",
|
|
218
|
+
"http://arxiv.org/abs/",
|
|
219
|
+
"https://arxiv.org/pdf/",
|
|
220
|
+
"http://arxiv.org/pdf/",
|
|
221
|
+
"arxiv:",
|
|
222
|
+
"arXiv:",
|
|
223
|
+
]
|
|
224
|
+
for prefix in prefixes:
|
|
225
|
+
if arxiv_id.startswith(prefix):
|
|
226
|
+
arxiv_id = arxiv_id[len(prefix):]
|
|
227
|
+
|
|
228
|
+
# Remove .pdf extension if present
|
|
229
|
+
if arxiv_id.endswith(".pdf"):
|
|
230
|
+
arxiv_id = arxiv_id[:-4]
|
|
231
|
+
|
|
232
|
+
# Extract just the ID part if there's a path
|
|
233
|
+
if "/" in arxiv_id:
|
|
234
|
+
arxiv_id = arxiv_id.split("/")[-1]
|
|
235
|
+
|
|
236
|
+
# Optionally remove version suffix
|
|
237
|
+
if remove_version:
|
|
238
|
+
arxiv_id = re.sub(r'v\d+$', '', arxiv_id)
|
|
239
|
+
|
|
240
|
+
return arxiv_id.strip()
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
__all__ = [
|
|
244
|
+
'parse_json_response',
|
|
245
|
+
'generate_cache_key',
|
|
246
|
+
'generate_paper_cache_key',
|
|
247
|
+
'format_paper_metadata',
|
|
248
|
+
'AsyncLRUCache',
|
|
249
|
+
'sanitize_arxiv_id',
|
|
250
|
+
]
|