arionxiv 1.0.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arionxiv/__init__.py +40 -0
- arionxiv/__main__.py +10 -0
- arionxiv/arxiv_operations/__init__.py +0 -0
- arionxiv/arxiv_operations/client.py +225 -0
- arionxiv/arxiv_operations/fetcher.py +173 -0
- arionxiv/arxiv_operations/searcher.py +122 -0
- arionxiv/arxiv_operations/utils.py +293 -0
- arionxiv/cli/__init__.py +4 -0
- arionxiv/cli/commands/__init__.py +1 -0
- arionxiv/cli/commands/analyze.py +587 -0
- arionxiv/cli/commands/auth.py +365 -0
- arionxiv/cli/commands/chat.py +714 -0
- arionxiv/cli/commands/daily.py +482 -0
- arionxiv/cli/commands/fetch.py +217 -0
- arionxiv/cli/commands/library.py +295 -0
- arionxiv/cli/commands/preferences.py +426 -0
- arionxiv/cli/commands/search.py +254 -0
- arionxiv/cli/commands/settings_unified.py +1407 -0
- arionxiv/cli/commands/trending.py +41 -0
- arionxiv/cli/commands/welcome.py +168 -0
- arionxiv/cli/main.py +407 -0
- arionxiv/cli/ui/__init__.py +1 -0
- arionxiv/cli/ui/global_theme_manager.py +173 -0
- arionxiv/cli/ui/logo.py +127 -0
- arionxiv/cli/ui/splash.py +89 -0
- arionxiv/cli/ui/theme.py +32 -0
- arionxiv/cli/ui/theme_system.py +391 -0
- arionxiv/cli/utils/__init__.py +54 -0
- arionxiv/cli/utils/animations.py +522 -0
- arionxiv/cli/utils/api_client.py +583 -0
- arionxiv/cli/utils/api_config.py +505 -0
- arionxiv/cli/utils/command_suggestions.py +147 -0
- arionxiv/cli/utils/db_config_manager.py +254 -0
- arionxiv/github_actions_runner.py +206 -0
- arionxiv/main.py +23 -0
- arionxiv/prompts/__init__.py +9 -0
- arionxiv/prompts/prompts.py +247 -0
- arionxiv/rag_techniques/__init__.py +8 -0
- arionxiv/rag_techniques/basic_rag.py +1531 -0
- arionxiv/scheduler_daemon.py +139 -0
- arionxiv/server.py +1000 -0
- arionxiv/server_main.py +24 -0
- arionxiv/services/__init__.py +73 -0
- arionxiv/services/llm_client.py +30 -0
- arionxiv/services/llm_inference/__init__.py +58 -0
- arionxiv/services/llm_inference/groq_client.py +469 -0
- arionxiv/services/llm_inference/llm_utils.py +250 -0
- arionxiv/services/llm_inference/openrouter_client.py +564 -0
- arionxiv/services/unified_analysis_service.py +872 -0
- arionxiv/services/unified_auth_service.py +457 -0
- arionxiv/services/unified_config_service.py +456 -0
- arionxiv/services/unified_daily_dose_service.py +823 -0
- arionxiv/services/unified_database_service.py +1633 -0
- arionxiv/services/unified_llm_service.py +366 -0
- arionxiv/services/unified_paper_service.py +604 -0
- arionxiv/services/unified_pdf_service.py +522 -0
- arionxiv/services/unified_prompt_service.py +344 -0
- arionxiv/services/unified_scheduler_service.py +589 -0
- arionxiv/services/unified_user_service.py +954 -0
- arionxiv/utils/__init__.py +51 -0
- arionxiv/utils/api_helpers.py +200 -0
- arionxiv/utils/file_cleanup.py +150 -0
- arionxiv/utils/ip_helper.py +96 -0
- arionxiv-1.0.32.dist-info/METADATA +336 -0
- arionxiv-1.0.32.dist-info/RECORD +69 -0
- arionxiv-1.0.32.dist-info/WHEEL +5 -0
- arionxiv-1.0.32.dist-info/entry_points.txt +4 -0
- arionxiv-1.0.32.dist-info/licenses/LICENSE +21 -0
- arionxiv-1.0.32.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
# Utility functions for Arxiv operations
|
|
2
|
+
import re
|
|
3
|
+
import hashlib
|
|
4
|
+
import logging
|
|
5
|
+
from typing import Dict, Any, List, Optional
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
|
|
8
|
+
# Import shared utility
|
|
9
|
+
from ..services.llm_inference.llm_utils import sanitize_arxiv_id as _sanitize_arxiv_id
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
class ArxivUtils:
|
|
14
|
+
"""Utility functions for Arxiv operations"""
|
|
15
|
+
|
|
16
|
+
@staticmethod
|
|
17
|
+
def normalize_arxiv_id(arxiv_id: str) -> str:
|
|
18
|
+
"""
|
|
19
|
+
Normalize Arxiv ID by removing version numbers.
|
|
20
|
+
This ensures consistent ID format across the application.
|
|
21
|
+
Examples:
|
|
22
|
+
'2502.03095v1' -> '2502.03095'
|
|
23
|
+
'2502.03095v7' -> '2502.03095'
|
|
24
|
+
'2502.03095' -> '2502.03095'
|
|
25
|
+
"""
|
|
26
|
+
# Use shared utility with remove_version=True
|
|
27
|
+
return _sanitize_arxiv_id(arxiv_id, remove_version=True) if arxiv_id else arxiv_id
|
|
28
|
+
|
|
29
|
+
@staticmethod
|
|
30
|
+
def clean_arxiv_id(arxiv_id: str) -> str:
|
|
31
|
+
"""Clean and normalize Arxiv ID - delegates to shared utility"""
|
|
32
|
+
return _sanitize_arxiv_id(arxiv_id) if arxiv_id else arxiv_id
|
|
33
|
+
|
|
34
|
+
@staticmethod
|
|
35
|
+
def extract_arxiv_id_from_url(url: str) -> Optional[str]:
|
|
36
|
+
"""Extract Arxiv ID from various URL formats"""
|
|
37
|
+
try:
|
|
38
|
+
# Common patterns for Arxiv URLs
|
|
39
|
+
patterns = [
|
|
40
|
+
r"arxiv\.org/abs/([^/?]+)",
|
|
41
|
+
r"arxiv\.org/pdf/([^/?]+)",
|
|
42
|
+
r"arxiv:([^/?]+)",
|
|
43
|
+
r"/([0-9]{4}\.[0-9]{4,5}(?:v[0-9]+)?)",
|
|
44
|
+
r"/([a-z-]+/[0-9]{7}(?:v[0-9]+)?)"
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
for pattern in patterns:
|
|
48
|
+
match = re.search(pattern, url, re.IGNORECASE)
|
|
49
|
+
if match:
|
|
50
|
+
return ArxivUtils.clean_arxiv_id(match.group(1))
|
|
51
|
+
|
|
52
|
+
return None
|
|
53
|
+
except Exception as e:
|
|
54
|
+
logger.error(f"Error extracting arxiv ID from URL {url}: {str(e)}")
|
|
55
|
+
return None
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def validate_arxiv_id(arxiv_id: str) -> bool:
|
|
59
|
+
"""Validate if a string is a valid Arxiv ID"""
|
|
60
|
+
try:
|
|
61
|
+
cleaned_id = ArxivUtils.clean_arxiv_id(arxiv_id)
|
|
62
|
+
|
|
63
|
+
# New format: YYMM.NNNN[vN]
|
|
64
|
+
new_format = re.match(r"^[0-9]{4}\.[0-9]{4,5}(?:v[0-9]+)?$", cleaned_id)
|
|
65
|
+
|
|
66
|
+
# Old format: subject-class/YYMMnnn[vN]
|
|
67
|
+
old_format = re.match(r"^[a-z-]+/[0-9]{7}(?:v[0-9]+)?$", cleaned_id)
|
|
68
|
+
|
|
69
|
+
return bool(new_format or old_format)
|
|
70
|
+
except Exception as e:
|
|
71
|
+
logger.error(f"Error validating arxiv ID {arxiv_id}: {str(e)}")
|
|
72
|
+
return False
|
|
73
|
+
|
|
74
|
+
@staticmethod
|
|
75
|
+
def generate_paper_hash(paper_data: Dict[str, Any]) -> str:
|
|
76
|
+
"""Generate a unique hash for a paper"""
|
|
77
|
+
try:
|
|
78
|
+
# Use arxiv_id, title, and first author for hash
|
|
79
|
+
hash_string = ""
|
|
80
|
+
hash_string += paper_data.get("arxiv_id", "")
|
|
81
|
+
hash_string += paper_data.get("title", "")
|
|
82
|
+
|
|
83
|
+
authors = paper_data.get("authors", [])
|
|
84
|
+
if authors:
|
|
85
|
+
hash_string += authors[0]
|
|
86
|
+
|
|
87
|
+
return hashlib.md5(hash_string.encode()).hexdigest()
|
|
88
|
+
except Exception as e:
|
|
89
|
+
logger.error(f"Error generating paper hash: {str(e)}")
|
|
90
|
+
return hashlib.md5(str(paper_data).encode()).hexdigest()
|
|
91
|
+
|
|
92
|
+
@staticmethod
|
|
93
|
+
def parse_categories(categories: List[str]) -> Dict[str, Any]:
|
|
94
|
+
"""Parse and categorize Arxiv categories"""
|
|
95
|
+
try:
|
|
96
|
+
category_info = {
|
|
97
|
+
"primary": categories[0] if categories else None,
|
|
98
|
+
"all_categories": categories,
|
|
99
|
+
"subject_areas": [],
|
|
100
|
+
"is_cs": False,
|
|
101
|
+
"is_math": False,
|
|
102
|
+
"is_physics": False,
|
|
103
|
+
"is_stat": False
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
# Map categories to subject areas
|
|
107
|
+
subject_mapping = {
|
|
108
|
+
"cs": "Computer Science",
|
|
109
|
+
"math": "Mathematics",
|
|
110
|
+
"physics": "Physics",
|
|
111
|
+
"stat": "Statistics",
|
|
112
|
+
"q-bio": "Quantitative Biology",
|
|
113
|
+
"q-fin": "Quantitative Finance",
|
|
114
|
+
"econ": "Economics",
|
|
115
|
+
"eess": "Electrical Engineering"
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
for category in categories:
|
|
119
|
+
subject = category.split(".")[0] if "." in category else category.split("-")[0]
|
|
120
|
+
|
|
121
|
+
if subject in subject_mapping:
|
|
122
|
+
subject_area = subject_mapping[subject]
|
|
123
|
+
if subject_area not in category_info["subject_areas"]:
|
|
124
|
+
category_info["subject_areas"].append(subject_area)
|
|
125
|
+
|
|
126
|
+
# Set flags
|
|
127
|
+
if category.startswith("cs."):
|
|
128
|
+
category_info["is_cs"] = True
|
|
129
|
+
elif category.startswith("math."):
|
|
130
|
+
category_info["is_math"] = True
|
|
131
|
+
elif category.startswith("physics.") or category.startswith("astro-ph.") or category.startswith("cond-mat."):
|
|
132
|
+
category_info["is_physics"] = True
|
|
133
|
+
elif category.startswith("stat."):
|
|
134
|
+
category_info["is_stat"] = True
|
|
135
|
+
|
|
136
|
+
return category_info
|
|
137
|
+
except Exception as e:
|
|
138
|
+
logger.error(f"Error parsing categories: {str(e)}")
|
|
139
|
+
return {"primary": None, "all_categories": categories, "subject_areas": []}
|
|
140
|
+
|
|
141
|
+
@staticmethod
|
|
142
|
+
def extract_keywords_from_text(text: str, max_keywords: int = 10) -> List[str]:
|
|
143
|
+
"""Extract potential keywords from text"""
|
|
144
|
+
try:
|
|
145
|
+
if not text:
|
|
146
|
+
return []
|
|
147
|
+
|
|
148
|
+
# Simple keyword extraction
|
|
149
|
+
# Remove common stop words
|
|
150
|
+
stop_words = {
|
|
151
|
+
"the", "a", "an", "and", "or", "but", "in", "on", "at", "to",
|
|
152
|
+
"for", "of", "with", "by", "this", "that", "these", "those",
|
|
153
|
+
"is", "are", "was", "were", "be", "been", "being", "have",
|
|
154
|
+
"has", "had", "do", "does", "did", "will", "would", "could",
|
|
155
|
+
"should", "may", "might", "can", "we", "our", "us", "they",
|
|
156
|
+
"their", "them", "it", "its", "he", "his", "him", "she",
|
|
157
|
+
"her", "hers", "you", "your", "yours", "i", "my", "mine"
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
# Extract words
|
|
161
|
+
words = re.findall(r'\b[a-zA-Z]{3,}\b', text.lower())
|
|
162
|
+
|
|
163
|
+
# Filter out stop words and count frequency
|
|
164
|
+
word_freq = {}
|
|
165
|
+
for word in words:
|
|
166
|
+
if word not in stop_words and len(word) >= 3:
|
|
167
|
+
word_freq[word] = word_freq.get(word, 0) + 1
|
|
168
|
+
|
|
169
|
+
# Sort by frequency and return top keywords
|
|
170
|
+
sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
|
|
171
|
+
keywords = [word for word, freq in sorted_words[:max_keywords]]
|
|
172
|
+
|
|
173
|
+
return keywords
|
|
174
|
+
except Exception as e:
|
|
175
|
+
logger.error(f"Error extracting keywords: {str(e)}")
|
|
176
|
+
return []
|
|
177
|
+
|
|
178
|
+
@staticmethod
|
|
179
|
+
def format_paper_for_display(paper_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
180
|
+
"""Format paper data for display"""
|
|
181
|
+
try:
|
|
182
|
+
formatted = {
|
|
183
|
+
"id": paper_data.get("arxiv_id", ""),
|
|
184
|
+
"title": paper_data.get("title", "Untitled"),
|
|
185
|
+
"authors": paper_data.get("authors", []),
|
|
186
|
+
"abstract": paper_data.get("abstract", "")[:500] + "..." if len(paper_data.get("abstract", "")) > 500 else paper_data.get("abstract", ""),
|
|
187
|
+
"categories": paper_data.get("categories", []),
|
|
188
|
+
"published": paper_data.get("published", ""),
|
|
189
|
+
"pdf_url": paper_data.get("pdf_url", ""),
|
|
190
|
+
"entry_id": paper_data.get("entry_id", "")
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
# Format date
|
|
194
|
+
if formatted["published"]:
|
|
195
|
+
try:
|
|
196
|
+
pub_date = datetime.fromisoformat(formatted["published"].replace('Z', '+00:00'))
|
|
197
|
+
formatted["published_formatted"] = pub_date.strftime("%Y-%m-%d")
|
|
198
|
+
except:
|
|
199
|
+
formatted["published_formatted"] = formatted["published"]
|
|
200
|
+
else:
|
|
201
|
+
formatted["published_formatted"] = "Unknown"
|
|
202
|
+
|
|
203
|
+
# Format authors
|
|
204
|
+
if len(formatted["authors"]) > 3:
|
|
205
|
+
formatted["authors_display"] = ", ".join(formatted["authors"][:3]) + f" et al. ({len(formatted['authors'])} total)"
|
|
206
|
+
else:
|
|
207
|
+
formatted["authors_display"] = ", ".join(formatted["authors"])
|
|
208
|
+
|
|
209
|
+
# Extract primary category
|
|
210
|
+
if formatted["categories"]:
|
|
211
|
+
formatted["primary_category"] = formatted["categories"][0]
|
|
212
|
+
else:
|
|
213
|
+
formatted["primary_category"] = "Unknown"
|
|
214
|
+
|
|
215
|
+
return formatted
|
|
216
|
+
except Exception as e:
|
|
217
|
+
logger.error(f"Error formatting paper: {str(e)}")
|
|
218
|
+
return paper_data
|
|
219
|
+
|
|
220
|
+
@staticmethod
|
|
221
|
+
def create_paper_summary(paper_data: Dict[str, Any]) -> str:
|
|
222
|
+
"""Create a brief summary of a paper"""
|
|
223
|
+
try:
|
|
224
|
+
title = paper_data.get("title", "")
|
|
225
|
+
authors = paper_data.get("authors", [])
|
|
226
|
+
categories = paper_data.get("categories", [])
|
|
227
|
+
|
|
228
|
+
summary_parts = []
|
|
229
|
+
|
|
230
|
+
if title:
|
|
231
|
+
summary_parts.append(f"Title: {title}")
|
|
232
|
+
|
|
233
|
+
if authors:
|
|
234
|
+
if len(authors) <= 3:
|
|
235
|
+
author_str = ", ".join(authors)
|
|
236
|
+
else:
|
|
237
|
+
author_str = f"{', '.join(authors[:3])} et al."
|
|
238
|
+
summary_parts.append(f"Authors: {author_str}")
|
|
239
|
+
|
|
240
|
+
if categories:
|
|
241
|
+
summary_parts.append(f"Categories: {', '.join(categories[:3])}")
|
|
242
|
+
|
|
243
|
+
return " | ".join(summary_parts)
|
|
244
|
+
except Exception as e:
|
|
245
|
+
logger.error(f"Error creating paper summary: {str(e)}")
|
|
246
|
+
return "Summary unavailable"
|
|
247
|
+
|
|
248
|
+
@staticmethod
|
|
249
|
+
def batch_validate_papers(papers: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
250
|
+
"""Validate a batch of papers"""
|
|
251
|
+
try:
|
|
252
|
+
validation_results = {
|
|
253
|
+
"total_papers": len(papers),
|
|
254
|
+
"valid_papers": 0,
|
|
255
|
+
"invalid_papers": 0,
|
|
256
|
+
"validation_errors": []
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
for i, paper in enumerate(papers):
|
|
260
|
+
errors = []
|
|
261
|
+
|
|
262
|
+
# Check required fields
|
|
263
|
+
if not paper.get("arxiv_id"):
|
|
264
|
+
errors.append("Missing arxiv_id")
|
|
265
|
+
elif not ArxivUtils.validate_arxiv_id(paper["arxiv_id"]):
|
|
266
|
+
errors.append("Invalid arxiv_id format")
|
|
267
|
+
|
|
268
|
+
if not paper.get("title"):
|
|
269
|
+
errors.append("Missing title")
|
|
270
|
+
|
|
271
|
+
if not paper.get("abstract"):
|
|
272
|
+
errors.append("Missing abstract")
|
|
273
|
+
|
|
274
|
+
if not paper.get("authors"):
|
|
275
|
+
errors.append("Missing authors")
|
|
276
|
+
|
|
277
|
+
if errors:
|
|
278
|
+
validation_results["invalid_papers"] += 1
|
|
279
|
+
validation_results["validation_errors"].append({
|
|
280
|
+
"paper_index": i,
|
|
281
|
+
"arxiv_id": paper.get("arxiv_id", "Unknown"),
|
|
282
|
+
"errors": errors
|
|
283
|
+
})
|
|
284
|
+
else:
|
|
285
|
+
validation_results["valid_papers"] += 1
|
|
286
|
+
|
|
287
|
+
return validation_results
|
|
288
|
+
except Exception as e:
|
|
289
|
+
logger.error(f"Error validating papers batch: {str(e)}")
|
|
290
|
+
return {"error": str(e)}
|
|
291
|
+
|
|
292
|
+
# Global instance
|
|
293
|
+
arxiv_utils = ArxivUtils()
|
arionxiv/cli/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Commands Package"""
|