arionxiv 1.0.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. arionxiv/__init__.py +40 -0
  2. arionxiv/__main__.py +10 -0
  3. arionxiv/arxiv_operations/__init__.py +0 -0
  4. arionxiv/arxiv_operations/client.py +225 -0
  5. arionxiv/arxiv_operations/fetcher.py +173 -0
  6. arionxiv/arxiv_operations/searcher.py +122 -0
  7. arionxiv/arxiv_operations/utils.py +293 -0
  8. arionxiv/cli/__init__.py +4 -0
  9. arionxiv/cli/commands/__init__.py +1 -0
  10. arionxiv/cli/commands/analyze.py +587 -0
  11. arionxiv/cli/commands/auth.py +365 -0
  12. arionxiv/cli/commands/chat.py +714 -0
  13. arionxiv/cli/commands/daily.py +482 -0
  14. arionxiv/cli/commands/fetch.py +217 -0
  15. arionxiv/cli/commands/library.py +295 -0
  16. arionxiv/cli/commands/preferences.py +426 -0
  17. arionxiv/cli/commands/search.py +254 -0
  18. arionxiv/cli/commands/settings_unified.py +1407 -0
  19. arionxiv/cli/commands/trending.py +41 -0
  20. arionxiv/cli/commands/welcome.py +168 -0
  21. arionxiv/cli/main.py +407 -0
  22. arionxiv/cli/ui/__init__.py +1 -0
  23. arionxiv/cli/ui/global_theme_manager.py +173 -0
  24. arionxiv/cli/ui/logo.py +127 -0
  25. arionxiv/cli/ui/splash.py +89 -0
  26. arionxiv/cli/ui/theme.py +32 -0
  27. arionxiv/cli/ui/theme_system.py +391 -0
  28. arionxiv/cli/utils/__init__.py +54 -0
  29. arionxiv/cli/utils/animations.py +522 -0
  30. arionxiv/cli/utils/api_client.py +583 -0
  31. arionxiv/cli/utils/api_config.py +505 -0
  32. arionxiv/cli/utils/command_suggestions.py +147 -0
  33. arionxiv/cli/utils/db_config_manager.py +254 -0
  34. arionxiv/github_actions_runner.py +206 -0
  35. arionxiv/main.py +23 -0
  36. arionxiv/prompts/__init__.py +9 -0
  37. arionxiv/prompts/prompts.py +247 -0
  38. arionxiv/rag_techniques/__init__.py +8 -0
  39. arionxiv/rag_techniques/basic_rag.py +1531 -0
  40. arionxiv/scheduler_daemon.py +139 -0
  41. arionxiv/server.py +1000 -0
  42. arionxiv/server_main.py +24 -0
  43. arionxiv/services/__init__.py +73 -0
  44. arionxiv/services/llm_client.py +30 -0
  45. arionxiv/services/llm_inference/__init__.py +58 -0
  46. arionxiv/services/llm_inference/groq_client.py +469 -0
  47. arionxiv/services/llm_inference/llm_utils.py +250 -0
  48. arionxiv/services/llm_inference/openrouter_client.py +564 -0
  49. arionxiv/services/unified_analysis_service.py +872 -0
  50. arionxiv/services/unified_auth_service.py +457 -0
  51. arionxiv/services/unified_config_service.py +456 -0
  52. arionxiv/services/unified_daily_dose_service.py +823 -0
  53. arionxiv/services/unified_database_service.py +1633 -0
  54. arionxiv/services/unified_llm_service.py +366 -0
  55. arionxiv/services/unified_paper_service.py +604 -0
  56. arionxiv/services/unified_pdf_service.py +522 -0
  57. arionxiv/services/unified_prompt_service.py +344 -0
  58. arionxiv/services/unified_scheduler_service.py +589 -0
  59. arionxiv/services/unified_user_service.py +954 -0
  60. arionxiv/utils/__init__.py +51 -0
  61. arionxiv/utils/api_helpers.py +200 -0
  62. arionxiv/utils/file_cleanup.py +150 -0
  63. arionxiv/utils/ip_helper.py +96 -0
  64. arionxiv-1.0.32.dist-info/METADATA +336 -0
  65. arionxiv-1.0.32.dist-info/RECORD +69 -0
  66. arionxiv-1.0.32.dist-info/WHEEL +5 -0
  67. arionxiv-1.0.32.dist-info/entry_points.txt +4 -0
  68. arionxiv-1.0.32.dist-info/licenses/LICENSE +21 -0
  69. arionxiv-1.0.32.dist-info/top_level.txt +1 -0
@@ -0,0 +1,293 @@
1
+ # Utility functions for Arxiv operations
2
+ import re
3
+ import hashlib
4
+ import logging
5
+ from typing import Dict, Any, List, Optional
6
+ from datetime import datetime
7
+
8
+ # Import shared utility
9
+ from ..services.llm_inference.llm_utils import sanitize_arxiv_id as _sanitize_arxiv_id
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ class ArxivUtils:
14
+ """Utility functions for Arxiv operations"""
15
+
16
+ @staticmethod
17
+ def normalize_arxiv_id(arxiv_id: str) -> str:
18
+ """
19
+ Normalize Arxiv ID by removing version numbers.
20
+ This ensures consistent ID format across the application.
21
+ Examples:
22
+ '2502.03095v1' -> '2502.03095'
23
+ '2502.03095v7' -> '2502.03095'
24
+ '2502.03095' -> '2502.03095'
25
+ """
26
+ # Use shared utility with remove_version=True
27
+ return _sanitize_arxiv_id(arxiv_id, remove_version=True) if arxiv_id else arxiv_id
28
+
29
+ @staticmethod
30
+ def clean_arxiv_id(arxiv_id: str) -> str:
31
+ """Clean and normalize Arxiv ID - delegates to shared utility"""
32
+ return _sanitize_arxiv_id(arxiv_id) if arxiv_id else arxiv_id
33
+
34
+ @staticmethod
35
+ def extract_arxiv_id_from_url(url: str) -> Optional[str]:
36
+ """Extract Arxiv ID from various URL formats"""
37
+ try:
38
+ # Common patterns for Arxiv URLs
39
+ patterns = [
40
+ r"arxiv\.org/abs/([^/?]+)",
41
+ r"arxiv\.org/pdf/([^/?]+)",
42
+ r"arxiv:([^/?]+)",
43
+ r"/([0-9]{4}\.[0-9]{4,5}(?:v[0-9]+)?)",
44
+ r"/([a-z-]+/[0-9]{7}(?:v[0-9]+)?)"
45
+ ]
46
+
47
+ for pattern in patterns:
48
+ match = re.search(pattern, url, re.IGNORECASE)
49
+ if match:
50
+ return ArxivUtils.clean_arxiv_id(match.group(1))
51
+
52
+ return None
53
+ except Exception as e:
54
+ logger.error(f"Error extracting arxiv ID from URL {url}: {str(e)}")
55
+ return None
56
+
57
+ @staticmethod
58
+ def validate_arxiv_id(arxiv_id: str) -> bool:
59
+ """Validate if a string is a valid Arxiv ID"""
60
+ try:
61
+ cleaned_id = ArxivUtils.clean_arxiv_id(arxiv_id)
62
+
63
+ # New format: YYMM.NNNN[vN]
64
+ new_format = re.match(r"^[0-9]{4}\.[0-9]{4,5}(?:v[0-9]+)?$", cleaned_id)
65
+
66
+ # Old format: subject-class/YYMMnnn[vN]
67
+ old_format = re.match(r"^[a-z-]+/[0-9]{7}(?:v[0-9]+)?$", cleaned_id)
68
+
69
+ return bool(new_format or old_format)
70
+ except Exception as e:
71
+ logger.error(f"Error validating arxiv ID {arxiv_id}: {str(e)}")
72
+ return False
73
+
74
+ @staticmethod
75
+ def generate_paper_hash(paper_data: Dict[str, Any]) -> str:
76
+ """Generate a unique hash for a paper"""
77
+ try:
78
+ # Use arxiv_id, title, and first author for hash
79
+ hash_string = ""
80
+ hash_string += paper_data.get("arxiv_id", "")
81
+ hash_string += paper_data.get("title", "")
82
+
83
+ authors = paper_data.get("authors", [])
84
+ if authors:
85
+ hash_string += authors[0]
86
+
87
+ return hashlib.md5(hash_string.encode()).hexdigest()
88
+ except Exception as e:
89
+ logger.error(f"Error generating paper hash: {str(e)}")
90
+ return hashlib.md5(str(paper_data).encode()).hexdigest()
91
+
92
+ @staticmethod
93
+ def parse_categories(categories: List[str]) -> Dict[str, Any]:
94
+ """Parse and categorize Arxiv categories"""
95
+ try:
96
+ category_info = {
97
+ "primary": categories[0] if categories else None,
98
+ "all_categories": categories,
99
+ "subject_areas": [],
100
+ "is_cs": False,
101
+ "is_math": False,
102
+ "is_physics": False,
103
+ "is_stat": False
104
+ }
105
+
106
+ # Map categories to subject areas
107
+ subject_mapping = {
108
+ "cs": "Computer Science",
109
+ "math": "Mathematics",
110
+ "physics": "Physics",
111
+ "stat": "Statistics",
112
+ "q-bio": "Quantitative Biology",
113
+ "q-fin": "Quantitative Finance",
114
+ "econ": "Economics",
115
+ "eess": "Electrical Engineering"
116
+ }
117
+
118
+ for category in categories:
119
+ subject = category.split(".")[0] if "." in category else category.split("-")[0]
120
+
121
+ if subject in subject_mapping:
122
+ subject_area = subject_mapping[subject]
123
+ if subject_area not in category_info["subject_areas"]:
124
+ category_info["subject_areas"].append(subject_area)
125
+
126
+ # Set flags
127
+ if category.startswith("cs."):
128
+ category_info["is_cs"] = True
129
+ elif category.startswith("math."):
130
+ category_info["is_math"] = True
131
+ elif category.startswith("physics.") or category.startswith("astro-ph.") or category.startswith("cond-mat."):
132
+ category_info["is_physics"] = True
133
+ elif category.startswith("stat."):
134
+ category_info["is_stat"] = True
135
+
136
+ return category_info
137
+ except Exception as e:
138
+ logger.error(f"Error parsing categories: {str(e)}")
139
+ return {"primary": None, "all_categories": categories, "subject_areas": []}
140
+
141
+ @staticmethod
142
+ def extract_keywords_from_text(text: str, max_keywords: int = 10) -> List[str]:
143
+ """Extract potential keywords from text"""
144
+ try:
145
+ if not text:
146
+ return []
147
+
148
+ # Simple keyword extraction
149
+ # Remove common stop words
150
+ stop_words = {
151
+ "the", "a", "an", "and", "or", "but", "in", "on", "at", "to",
152
+ "for", "of", "with", "by", "this", "that", "these", "those",
153
+ "is", "are", "was", "were", "be", "been", "being", "have",
154
+ "has", "had", "do", "does", "did", "will", "would", "could",
155
+ "should", "may", "might", "can", "we", "our", "us", "they",
156
+ "their", "them", "it", "its", "he", "his", "him", "she",
157
+ "her", "hers", "you", "your", "yours", "i", "my", "mine"
158
+ }
159
+
160
+ # Extract words
161
+ words = re.findall(r'\b[a-zA-Z]{3,}\b', text.lower())
162
+
163
+ # Filter out stop words and count frequency
164
+ word_freq = {}
165
+ for word in words:
166
+ if word not in stop_words and len(word) >= 3:
167
+ word_freq[word] = word_freq.get(word, 0) + 1
168
+
169
+ # Sort by frequency and return top keywords
170
+ sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
171
+ keywords = [word for word, freq in sorted_words[:max_keywords]]
172
+
173
+ return keywords
174
+ except Exception as e:
175
+ logger.error(f"Error extracting keywords: {str(e)}")
176
+ return []
177
+
178
+ @staticmethod
179
+ def format_paper_for_display(paper_data: Dict[str, Any]) -> Dict[str, Any]:
180
+ """Format paper data for display"""
181
+ try:
182
+ formatted = {
183
+ "id": paper_data.get("arxiv_id", ""),
184
+ "title": paper_data.get("title", "Untitled"),
185
+ "authors": paper_data.get("authors", []),
186
+ "abstract": paper_data.get("abstract", "")[:500] + "..." if len(paper_data.get("abstract", "")) > 500 else paper_data.get("abstract", ""),
187
+ "categories": paper_data.get("categories", []),
188
+ "published": paper_data.get("published", ""),
189
+ "pdf_url": paper_data.get("pdf_url", ""),
190
+ "entry_id": paper_data.get("entry_id", "")
191
+ }
192
+
193
+ # Format date
194
+ if formatted["published"]:
195
+ try:
196
+ pub_date = datetime.fromisoformat(formatted["published"].replace('Z', '+00:00'))
197
+ formatted["published_formatted"] = pub_date.strftime("%Y-%m-%d")
198
+ except:
199
+ formatted["published_formatted"] = formatted["published"]
200
+ else:
201
+ formatted["published_formatted"] = "Unknown"
202
+
203
+ # Format authors
204
+ if len(formatted["authors"]) > 3:
205
+ formatted["authors_display"] = ", ".join(formatted["authors"][:3]) + f" et al. ({len(formatted['authors'])} total)"
206
+ else:
207
+ formatted["authors_display"] = ", ".join(formatted["authors"])
208
+
209
+ # Extract primary category
210
+ if formatted["categories"]:
211
+ formatted["primary_category"] = formatted["categories"][0]
212
+ else:
213
+ formatted["primary_category"] = "Unknown"
214
+
215
+ return formatted
216
+ except Exception as e:
217
+ logger.error(f"Error formatting paper: {str(e)}")
218
+ return paper_data
219
+
220
+ @staticmethod
221
+ def create_paper_summary(paper_data: Dict[str, Any]) -> str:
222
+ """Create a brief summary of a paper"""
223
+ try:
224
+ title = paper_data.get("title", "")
225
+ authors = paper_data.get("authors", [])
226
+ categories = paper_data.get("categories", [])
227
+
228
+ summary_parts = []
229
+
230
+ if title:
231
+ summary_parts.append(f"Title: {title}")
232
+
233
+ if authors:
234
+ if len(authors) <= 3:
235
+ author_str = ", ".join(authors)
236
+ else:
237
+ author_str = f"{', '.join(authors[:3])} et al."
238
+ summary_parts.append(f"Authors: {author_str}")
239
+
240
+ if categories:
241
+ summary_parts.append(f"Categories: {', '.join(categories[:3])}")
242
+
243
+ return " | ".join(summary_parts)
244
+ except Exception as e:
245
+ logger.error(f"Error creating paper summary: {str(e)}")
246
+ return "Summary unavailable"
247
+
248
+ @staticmethod
249
+ def batch_validate_papers(papers: List[Dict[str, Any]]) -> Dict[str, Any]:
250
+ """Validate a batch of papers"""
251
+ try:
252
+ validation_results = {
253
+ "total_papers": len(papers),
254
+ "valid_papers": 0,
255
+ "invalid_papers": 0,
256
+ "validation_errors": []
257
+ }
258
+
259
+ for i, paper in enumerate(papers):
260
+ errors = []
261
+
262
+ # Check required fields
263
+ if not paper.get("arxiv_id"):
264
+ errors.append("Missing arxiv_id")
265
+ elif not ArxivUtils.validate_arxiv_id(paper["arxiv_id"]):
266
+ errors.append("Invalid arxiv_id format")
267
+
268
+ if not paper.get("title"):
269
+ errors.append("Missing title")
270
+
271
+ if not paper.get("abstract"):
272
+ errors.append("Missing abstract")
273
+
274
+ if not paper.get("authors"):
275
+ errors.append("Missing authors")
276
+
277
+ if errors:
278
+ validation_results["invalid_papers"] += 1
279
+ validation_results["validation_errors"].append({
280
+ "paper_index": i,
281
+ "arxiv_id": paper.get("arxiv_id", "Unknown"),
282
+ "errors": errors
283
+ })
284
+ else:
285
+ validation_results["valid_papers"] += 1
286
+
287
+ return validation_results
288
+ except Exception as e:
289
+ logger.error(f"Error validating papers batch: {str(e)}")
290
+ return {"error": str(e)}
291
+
292
+ # Global instance
293
+ arxiv_utils = ArxivUtils()
@@ -0,0 +1,4 @@
1
+ """CLI Package Initialization"""
2
+
3
+ __version__ = "1.0.0"
4
+ __author__ = "ArionXiv Team"
@@ -0,0 +1 @@
1
+ """Commands Package"""