arionxiv 1.0.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. arionxiv/__init__.py +40 -0
  2. arionxiv/__main__.py +10 -0
  3. arionxiv/arxiv_operations/__init__.py +0 -0
  4. arionxiv/arxiv_operations/client.py +225 -0
  5. arionxiv/arxiv_operations/fetcher.py +173 -0
  6. arionxiv/arxiv_operations/searcher.py +122 -0
  7. arionxiv/arxiv_operations/utils.py +293 -0
  8. arionxiv/cli/__init__.py +4 -0
  9. arionxiv/cli/commands/__init__.py +1 -0
  10. arionxiv/cli/commands/analyze.py +587 -0
  11. arionxiv/cli/commands/auth.py +365 -0
  12. arionxiv/cli/commands/chat.py +714 -0
  13. arionxiv/cli/commands/daily.py +482 -0
  14. arionxiv/cli/commands/fetch.py +217 -0
  15. arionxiv/cli/commands/library.py +295 -0
  16. arionxiv/cli/commands/preferences.py +426 -0
  17. arionxiv/cli/commands/search.py +254 -0
  18. arionxiv/cli/commands/settings_unified.py +1407 -0
  19. arionxiv/cli/commands/trending.py +41 -0
  20. arionxiv/cli/commands/welcome.py +168 -0
  21. arionxiv/cli/main.py +407 -0
  22. arionxiv/cli/ui/__init__.py +1 -0
  23. arionxiv/cli/ui/global_theme_manager.py +173 -0
  24. arionxiv/cli/ui/logo.py +127 -0
  25. arionxiv/cli/ui/splash.py +89 -0
  26. arionxiv/cli/ui/theme.py +32 -0
  27. arionxiv/cli/ui/theme_system.py +391 -0
  28. arionxiv/cli/utils/__init__.py +54 -0
  29. arionxiv/cli/utils/animations.py +522 -0
  30. arionxiv/cli/utils/api_client.py +583 -0
  31. arionxiv/cli/utils/api_config.py +505 -0
  32. arionxiv/cli/utils/command_suggestions.py +147 -0
  33. arionxiv/cli/utils/db_config_manager.py +254 -0
  34. arionxiv/github_actions_runner.py +206 -0
  35. arionxiv/main.py +23 -0
  36. arionxiv/prompts/__init__.py +9 -0
  37. arionxiv/prompts/prompts.py +247 -0
  38. arionxiv/rag_techniques/__init__.py +8 -0
  39. arionxiv/rag_techniques/basic_rag.py +1531 -0
  40. arionxiv/scheduler_daemon.py +139 -0
  41. arionxiv/server.py +1000 -0
  42. arionxiv/server_main.py +24 -0
  43. arionxiv/services/__init__.py +73 -0
  44. arionxiv/services/llm_client.py +30 -0
  45. arionxiv/services/llm_inference/__init__.py +58 -0
  46. arionxiv/services/llm_inference/groq_client.py +469 -0
  47. arionxiv/services/llm_inference/llm_utils.py +250 -0
  48. arionxiv/services/llm_inference/openrouter_client.py +564 -0
  49. arionxiv/services/unified_analysis_service.py +872 -0
  50. arionxiv/services/unified_auth_service.py +457 -0
  51. arionxiv/services/unified_config_service.py +456 -0
  52. arionxiv/services/unified_daily_dose_service.py +823 -0
  53. arionxiv/services/unified_database_service.py +1633 -0
  54. arionxiv/services/unified_llm_service.py +366 -0
  55. arionxiv/services/unified_paper_service.py +604 -0
  56. arionxiv/services/unified_pdf_service.py +522 -0
  57. arionxiv/services/unified_prompt_service.py +344 -0
  58. arionxiv/services/unified_scheduler_service.py +589 -0
  59. arionxiv/services/unified_user_service.py +954 -0
  60. arionxiv/utils/__init__.py +51 -0
  61. arionxiv/utils/api_helpers.py +200 -0
  62. arionxiv/utils/file_cleanup.py +150 -0
  63. arionxiv/utils/ip_helper.py +96 -0
  64. arionxiv-1.0.32.dist-info/METADATA +336 -0
  65. arionxiv-1.0.32.dist-info/RECORD +69 -0
  66. arionxiv-1.0.32.dist-info/WHEEL +5 -0
  67. arionxiv-1.0.32.dist-info/entry_points.txt +4 -0
  68. arionxiv-1.0.32.dist-info/licenses/LICENSE +21 -0
  69. arionxiv-1.0.32.dist-info/top_level.txt +1 -0
arionxiv/__init__.py ADDED
@@ -0,0 +1,40 @@
1
+ """
2
+ ArionXiv - AI-Powered Research Paper Analysis and Management
3
+
4
+ A comprehensive tool for discovering, analyzing, and managing research papers
5
+ from arXiv with AI-powered insights and organizational features.
6
+ """
7
+
8
+ __version__ = "1.0.29"
9
+ __author__ = "Arion Das"
10
+ __email__ = "ariondasad@gmail.com"
11
+ __description__ = "AI-Powered Research Paper Analysis and Management"
12
+
13
+ # Lazy imports to avoid requiring fastapi for CLI/GitHub Actions usage
14
+ # Services are imported on-demand when accessed
15
+ def __getattr__(name):
16
+ """Lazy import of services to avoid loading fastapi for CLI usage."""
17
+ if name == "config":
18
+ from .services.unified_config_service import config
19
+ return config
20
+ elif name == "database_service":
21
+ from .services.unified_database_service import database_service
22
+ return database_service
23
+ elif name == "paper_service":
24
+ from .services.unified_paper_service import paper_service
25
+ return paper_service
26
+ elif name == "analysis_service":
27
+ from .services.unified_analysis_service import analysis_service
28
+ return analysis_service
29
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
30
+
31
+ __all__ = [
32
+ "__version__",
33
+ "__author__",
34
+ "__email__",
35
+ "__description__",
36
+ "config",
37
+ "database_service",
38
+ "paper_service",
39
+ "analysis_service"
40
+ ]
arionxiv/__main__.py ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ ArionXiv package main entry point
4
+ """
5
+
6
+ import sys
7
+ from .cli.main import cli
8
+
9
+ if __name__ == "__main__":
10
+ cli()
File without changes
@@ -0,0 +1,225 @@
1
+ # Arxiv API client for fetching papers
2
+ import arxiv
3
+ import logging
4
+ from typing import List, Dict, Any, Optional
5
+ from datetime import datetime, timedelta
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ class ArxivClient:
10
+ """Client for interacting with Arxiv API"""
11
+
12
+ def __init__(self):
13
+ self.client = arxiv.Client()
14
+ self.default_page_size = 100
15
+ self.max_results = 100
16
+
17
+ # Short words to skip in title searches (arXiv doesn't index these well)
18
+ SKIP_WORDS = {'a', 'an', 'the', 'is', 'are', 'be', 'to', 'of', 'in', 'on',
19
+ 'at', 'by', 'for', 'and', 'or', 'but', 'not', 'all', 'you',
20
+ 'it', 'its', 'as', 'so', 'if', 'do', 'no', 'up', 'we', 'my'}
21
+
22
+ def search_papers(self, query: str, max_results: int = None, sort_by=arxiv.SortCriterion.Relevance) -> List[Dict[str, Any]]:
23
+ """Search for papers on Arxiv with relevance scoring"""
24
+ try:
25
+ max_results = max_results or self.default_page_size
26
+
27
+ # If query already contains arXiv operators (cat:, au:, ti:, abs:, AND, OR), use as-is
28
+ has_operators = any(op in query for op in ['cat:', 'au:', 'ti:', 'abs:', ' AND ', ' OR '])
29
+
30
+ if has_operators:
31
+ # Query already formatted with operators
32
+ search_query = query
33
+ else:
34
+ # Build title search - skip short common words that arXiv doesn't handle well
35
+ words = [w.strip() for w in query.split() if w.strip()]
36
+ content_words = [w for w in words if w.lower() not in self.SKIP_WORDS]
37
+
38
+ if content_words:
39
+ title_parts = [f"ti:{word.title()}" for word in content_words]
40
+ search_query = " AND ".join(title_parts)
41
+ else:
42
+ # All words were skipped, use plain query
43
+ search_query = query
44
+
45
+ # Fetch more results than requested so we can filter/sort better
46
+ fetch_count = min(max_results * 3, self.max_results) if not has_operators else max_results
47
+
48
+ search = arxiv.Search(
49
+ query=search_query,
50
+ max_results=min(fetch_count, self.max_results),
51
+ sort_by=sort_by
52
+ )
53
+
54
+ papers = []
55
+ for result in self.client.results(search):
56
+ paper_data = {
57
+ "arxiv_id": result.entry_id.split('/')[-1],
58
+ "title": result.title,
59
+ "abstract": result.summary,
60
+ "authors": [str(author) for author in result.authors],
61
+ "published": result.published.isoformat() if result.published else None,
62
+ "updated": result.updated.isoformat() if result.updated else None,
63
+ "categories": result.categories,
64
+ "primary_category": result.primary_category,
65
+ "pdf_url": result.pdf_url,
66
+ "entry_id": result.entry_id,
67
+ "doi": result.doi,
68
+ "journal_ref": result.journal_ref,
69
+ "comment": result.comment,
70
+ "links": [{"href": link.href, "title": link.title, "rel": link.rel} for link in result.links]
71
+ }
72
+ papers.append(paper_data)
73
+
74
+ # Re-score and sort papers by title match quality, then limit to requested count
75
+ if not has_operators and papers:
76
+ papers = self._score_and_sort_papers(papers, query)[:max_results]
77
+
78
+ logger.info(f"Found {len(papers)} papers for query: {query}")
79
+ return papers
80
+ except Exception as e:
81
+ logger.error(f"Error searching papers: {str(e)}")
82
+ return []
83
+
84
+ def _score_and_sort_papers(self, papers: List[Dict[str, Any]], query: str) -> List[Dict[str, Any]]:
85
+ """Score papers by how well their title matches the query and sort by score"""
86
+ query_lower = query.lower().strip()
87
+ query_words = set(query_lower.split())
88
+
89
+ scored_papers = []
90
+ for paper in papers:
91
+ title_lower = paper['title'].lower()
92
+ score = 0
93
+
94
+ # Exact title match (highest priority)
95
+ if title_lower == query_lower:
96
+ score += 1000
97
+ # Title contains the exact query phrase
98
+ elif query_lower in title_lower:
99
+ score += 500
100
+
101
+ # Count matching words in title
102
+ title_words = set(title_lower.split())
103
+ matching_words = query_words & title_words
104
+ score += len(matching_words) * 50
105
+
106
+ # Bonus for shorter titles (more likely to be exact match)
107
+ if len(title_words) <= len(query_words) + 2:
108
+ score += 100
109
+
110
+ # Bonus for title starting with query words
111
+ if title_lower.startswith(query_lower.split()[0]):
112
+ score += 75
113
+
114
+ scored_papers.append((score, paper))
115
+
116
+ # Sort by score descending
117
+ scored_papers.sort(key=lambda x: x[0], reverse=True)
118
+ return [paper for score, paper in scored_papers]
119
+
120
+ def get_paper_by_id(self, arxiv_id: str) -> Optional[Dict[str, Any]]:
121
+ """Get a specific paper by its Arxiv ID"""
122
+ try:
123
+ search = arxiv.Search(id_list=[arxiv_id])
124
+
125
+ for result in self.client.results(search):
126
+ paper_data = {
127
+ "arxiv_id": result.entry_id.split('/')[-1],
128
+ "title": result.title,
129
+ "abstract": result.summary,
130
+ "authors": [str(author) for author in result.authors],
131
+ "published": result.published.isoformat() if result.published else None,
132
+ "updated": result.updated.isoformat() if result.updated else None,
133
+ "categories": result.categories,
134
+ "primary_category": result.primary_category,
135
+ "pdf_url": result.pdf_url,
136
+ "entry_id": result.entry_id,
137
+ "doi": result.doi,
138
+ "journal_ref": result.journal_ref,
139
+ "comment": result.comment,
140
+ "links": [{"href": link.href, "title": link.title, "rel": link.rel} for link in result.links]
141
+ }
142
+ return paper_data
143
+
144
+ return None
145
+ except Exception as e:
146
+ logger.error(f"Error fetching paper {arxiv_id}: {str(e)}")
147
+ return None
148
+
149
+ def get_recent_papers(self, category: str = None, days: int = 7, max_results: int = 50) -> List[Dict[str, Any]]:
150
+ """Get recent papers from the last N days"""
151
+ try:
152
+ # Build query for recent papers
153
+ query_parts = []
154
+
155
+ if category:
156
+ query_parts.append(f"cat:{category}")
157
+
158
+ # Date filter (Arxiv doesn't support date ranges directly, so we'll filter results)
159
+ cutoff_date = datetime.now() - timedelta(days=days)
160
+
161
+ query = " AND ".join(query_parts) if query_parts else "all:machine learning"
162
+
163
+ search = arxiv.Search(
164
+ query=query,
165
+ max_results=max_results,
166
+ sort_by=arxiv.SortCriterion.SubmittedDate
167
+ )
168
+
169
+ papers = []
170
+ for result in self.client.results(search):
171
+ # Filter by date
172
+ if result.published and result.published.replace(tzinfo=None) >= cutoff_date:
173
+ paper_data = {
174
+ "arxiv_id": result.entry_id.split('/')[-1],
175
+ "title": result.title,
176
+ "abstract": result.summary,
177
+ "authors": [str(author) for author in result.authors],
178
+ "published": result.published.isoformat() if result.published else None,
179
+ "updated": result.updated.isoformat() if result.updated else None,
180
+ "categories": result.categories,
181
+ "primary_category": result.primary_category,
182
+ "pdf_url": result.pdf_url,
183
+ "entry_id": result.entry_id,
184
+ "doi": result.doi,
185
+ "journal_ref": result.journal_ref,
186
+ "comment": result.comment
187
+ }
188
+ papers.append(paper_data)
189
+
190
+ logger.info(f"Found {len(papers)} recent papers in category: {category}")
191
+ return papers
192
+ except Exception as e:
193
+ logger.error(f"Error fetching recent papers: {str(e)}")
194
+ return []
195
+
196
+ def get_papers_by_category(self, category: str, max_results: int = 20) -> List[Dict[str, Any]]:
197
+ """Get papers by category"""
198
+ try:
199
+ query = f"cat:{category}"
200
+ return self.search_papers(query, max_results)
201
+ except Exception as e:
202
+ logger.error(f"Error fetching papers by category {category}: {str(e)}")
203
+ return []
204
+
205
+ def get_papers_by_author(self, author: str, max_results: int = 20) -> List[Dict[str, Any]]:
206
+ """Get papers by author"""
207
+ try:
208
+ query = f"au:{author}"
209
+ return self.search_papers(query, max_results)
210
+ except Exception as e:
211
+ logger.error(f"Error fetching papers by author {author}: {str(e)}")
212
+ return []
213
+
214
+ def get_trending_papers(self, category: str = None, days: int = 30) -> List[Dict[str, Any]]:
215
+ """Get trending papers (most recent with high engagement indicators)"""
216
+ try:
217
+ # For now, we'll use recent papers as a proxy for trending
218
+ # In a full implementation, this could consider download counts, citations, etc.
219
+ return self.get_recent_papers(category=category, days=days, max_results=30)
220
+ except Exception as e:
221
+ logger.error(f"Error fetching trending papers: {str(e)}")
222
+ return []
223
+
224
+ # Global instance
225
+ arxiv_client = ArxivClient()
@@ -0,0 +1,173 @@
1
+ import requests
2
+ import aiohttp
3
+ import asyncio
4
+ import os
5
+ import logging
6
+ from pathlib import Path
7
+ from typing import Optional, Dict, Any
8
+
9
+ from ..services.unified_pdf_service import pdf_processor
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ class ArxivFetcher:
14
+ """Fetches and processes papers from Arxiv"""
15
+
16
+ def __init__(self):
17
+ self.session = None
18
+ self.download_dir = "downloads"
19
+ os.makedirs(self.download_dir, exist_ok=True)
20
+
21
+ async def _get_session(self):
22
+ """Get or create aiohttp session"""
23
+ if self.session is None:
24
+ self.session = aiohttp.ClientSession()
25
+ return self.session
26
+
27
+ async def fetch_paper_pdf(self, arxiv_id: str, pdf_url: str) -> Optional[str]:
28
+ """Fetch PDF for a paper"""
29
+ try:
30
+ session = await self._get_session()
31
+
32
+ # Clean arxiv_id for filename
33
+ safe_id = arxiv_id.replace('/', '_').replace(':', '_')
34
+ pdf_path = os.path.join(self.download_dir, f"{safe_id}.pdf")
35
+
36
+ # Check if already downloaded
37
+ if os.path.exists(pdf_path):
38
+ logger.info(f"PDF already exists: {pdf_path}")
39
+ return pdf_path
40
+
41
+ # Download PDF
42
+ async with session.get(pdf_url) as response:
43
+ if response.status == 200:
44
+ content = await response.read()
45
+ with open(pdf_path, 'wb') as f:
46
+ f.write(content)
47
+ logger.info(f"Downloaded PDF: {pdf_path}")
48
+ return pdf_path
49
+ else:
50
+ logger.error(f"Failed to download PDF: {response.status}")
51
+ return None
52
+ except Exception as e:
53
+ logger.error(f"Error fetching PDF for {arxiv_id}: {str(e)}")
54
+ return None
55
+
56
+ async def fetch_and_process_paper(self, paper_data: Dict[str, Any]) -> Dict[str, Any]:
57
+ """Fetch and process a complete paper"""
58
+ try:
59
+ arxiv_id = paper_data.get("arxiv_id")
60
+ pdf_url = paper_data.get("pdf_url")
61
+
62
+ logger.info(f"Processing paper: {arxiv_id}")
63
+
64
+ if not arxiv_id or not pdf_url:
65
+ logger.warning(f"Missing arxiv_id or pdf_url for paper")
66
+ return {"error": "Missing arxiv_id or pdf_url"}
67
+
68
+ # Fetch PDF
69
+ logger.debug(f"Fetching PDF from: {pdf_url}")
70
+ pdf_path = await self.fetch_paper_pdf(arxiv_id, pdf_url)
71
+ if not pdf_path:
72
+ logger.error(f"Failed to download PDF for {arxiv_id}")
73
+ return {"error": "Failed to download PDF"}
74
+
75
+ # Process PDF
76
+ logger.debug(f"Processing PDF: {pdf_path}")
77
+ processing_result = await pdf_processor.process_pdf(pdf_path)
78
+
79
+ logger.info(f"Successfully processed paper: {arxiv_id}")
80
+
81
+ # Combine paper metadata with processed content
82
+ result = {
83
+ **paper_data,
84
+ "pdf_path": pdf_path,
85
+ "processed_content": processing_result,
86
+ "fetch_timestamp": asyncio.get_event_loop().time()
87
+ }
88
+
89
+ return result
90
+ except Exception as e:
91
+ logger.error(f"Error processing paper: {str(e)}")
92
+ return {"error": str(e)}
93
+
94
+ async def batch_fetch_papers(self, papers: list) -> list:
95
+ """Fetch multiple papers concurrently"""
96
+ try:
97
+ logger.info(f"Starting batch fetch for {len(papers)} papers")
98
+ tasks = []
99
+ for paper in papers:
100
+ task = self.fetch_and_process_paper(paper)
101
+ tasks.append(task)
102
+
103
+ # Limit concurrent downloads
104
+ semaphore = asyncio.Semaphore(3)
105
+
106
+ async def bounded_fetch(paper):
107
+ async with semaphore:
108
+ return await self.fetch_and_process_paper(paper)
109
+
110
+ bounded_tasks = [bounded_fetch(paper) for paper in papers]
111
+ results = await asyncio.gather(*bounded_tasks, return_exceptions=True)
112
+
113
+ # Filter out exceptions
114
+ successful_results = []
115
+ for result in results:
116
+ if isinstance(result, Exception):
117
+ logger.error(f"Batch fetch error: {str(result)}", exc_info=True)
118
+ else:
119
+ successful_results.append(result)
120
+
121
+ logger.info(f"Batch fetch completed: {len(successful_results)}/{len(papers)} successful")
122
+ return successful_results
123
+ except Exception as e:
124
+ logger.error(f"Batch fetch error: {str(e)}", exc_info=True)
125
+ return []
126
+
127
+ def fetch_paper_sync(self, arxiv_id: str, pdf_url: str) -> Optional[str]:
128
+ """Synchronous version of PDF fetch"""
129
+ try:
130
+ safe_id = arxiv_id.replace('/', '_').replace(':', '_')
131
+ pdf_path = os.path.join(self.download_dir, f"{safe_id}.pdf")
132
+
133
+ if os.path.exists(pdf_path):
134
+ return pdf_path
135
+
136
+ response = requests.get(pdf_url, timeout=30)
137
+ if response.status_code == 200:
138
+ with open(pdf_path, 'wb') as f:
139
+ f.write(response.content)
140
+ logger.info(f"Downloaded PDF: {pdf_path}")
141
+ return pdf_path
142
+ else:
143
+ logger.error(f"Failed to download PDF: {response.status_code}")
144
+ return None
145
+ except Exception as e:
146
+ logger.error(f"Error fetching PDF sync for {arxiv_id}: {str(e)}")
147
+ return None
148
+
149
+ async def cleanup_downloads(self, max_age_days: int = 7):
150
+ """Clean up old downloaded files"""
151
+ try:
152
+ import time
153
+ current_time = time.time()
154
+ max_age_seconds = max_age_days * 24 * 60 * 60
155
+
156
+ for filename in os.listdir(self.download_dir):
157
+ file_path = os.path.join(self.download_dir, filename)
158
+ if os.path.isfile(file_path):
159
+ file_age = current_time - os.path.getmtime(file_path)
160
+ if file_age > max_age_seconds:
161
+ os.remove(file_path)
162
+ logger.info(f"Cleaned up old file: {filename}")
163
+ except Exception as e:
164
+ logger.error(f"Error during cleanup: {str(e)}")
165
+
166
+ async def close(self):
167
+ """Close the session"""
168
+ if self.session:
169
+ await self.session.close()
170
+ self.session = None
171
+
172
+ # Global instance
173
+ arxiv_fetcher = ArxivFetcher()
@@ -0,0 +1,122 @@
1
+ # Simple text-based search for arXiv papers
2
+ from typing import List, Dict, Any, Optional
3
+ import logging
4
+
5
+ from .client import arxiv_client
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class ArxivSearcher:
11
+ """Simple text-based search for arXiv papers"""
12
+
13
+ def __init__(self):
14
+ self.client = arxiv_client
15
+
16
+ # Common categories for reference
17
+ self.categories = {
18
+ "cs.AI": "Artificial Intelligence",
19
+ "cs.LG": "Machine Learning",
20
+ "cs.CV": "Computer Vision",
21
+ "cs.CL": "Computation and Language",
22
+ "cs.RO": "Robotics",
23
+ "stat.ML": "Machine Learning (Statistics)",
24
+ "cs.DC": "Distributed Computing",
25
+ "cs.DB": "Databases",
26
+ "cs.IR": "Information Retrieval",
27
+ "math.OC": "Optimization and Control",
28
+ }
29
+
30
+ async def search(self, query: str, max_results: int = 10) -> Dict[str, Any]:
31
+ """
32
+ Simple text search that returns the closest matching papers.
33
+
34
+ Args:
35
+ query: Search text
36
+ max_results: Number of results to return (default 10)
37
+
38
+ Returns:
39
+ Dict with success status and list of papers
40
+ """
41
+ try:
42
+ logger.info(f"Searching arXiv: query='{query}', max_results={max_results}")
43
+ # Direct search via arXiv API (uses relevance sorting by default)
44
+ papers = self.client.search_papers(query=query, max_results=max_results)
45
+
46
+ logger.info(f"Search completed: found {len(papers)} papers")
47
+ return {
48
+ "success": True,
49
+ "papers": papers,
50
+ "count": len(papers),
51
+ "query": query
52
+ }
53
+
54
+ except Exception as e:
55
+ logger.error(f"Search failed: {str(e)}", exc_info=True)
56
+ return {"success": False, "error": str(e), "papers": []}
57
+
58
+ async def search_by_category(self, query: str, category: str, max_results: int = 10) -> Dict[str, Any]:
59
+ """
60
+ Search within a specific category.
61
+
62
+ Args:
63
+ query: Search text
64
+ category: arXiv category (e.g., cs.LG, cs.AI)
65
+ max_results: Number of results to return
66
+
67
+ Returns:
68
+ Dict with success status and list of papers
69
+ """
70
+ try:
71
+ # Combine query with category filter
72
+ full_query = f"{query} AND cat:{category}" if query else f"cat:{category}"
73
+ logger.info(f"Searching by category: query='{full_query}', category={category}")
74
+ papers = self.client.search_papers(query=full_query, max_results=max_results)
75
+
76
+ logger.info(f"Category search completed: found {len(papers)} papers in {category}")
77
+ return {
78
+ "success": True,
79
+ "papers": papers,
80
+ "count": len(papers),
81
+ "query": query,
82
+ "category": category
83
+ }
84
+
85
+ except Exception as e:
86
+ logger.error(f"Category search failed: {str(e)}", exc_info=True)
87
+ return {"success": False, "error": str(e), "papers": []}
88
+
89
+ async def search_by_author(self, author: str, max_results: int = 10) -> Dict[str, Any]:
90
+ """
91
+ Search papers by author name.
92
+
93
+ Args:
94
+ author: Author name
95
+ max_results: Number of results to return
96
+
97
+ Returns:
98
+ Dict with success status and list of papers
99
+ """
100
+ try:
101
+ logger.info(f"Searching by author: author='{author}', max_results={max_results}")
102
+ papers = self.client.get_papers_by_author(author=author, max_results=max_results)
103
+
104
+ logger.info(f"Author search completed: found {len(papers)} papers by {author}")
105
+ return {
106
+ "success": True,
107
+ "papers": papers,
108
+ "count": len(papers),
109
+ "author": author
110
+ }
111
+
112
+ except Exception as e:
113
+ logger.error(f"Author search failed: {str(e)}", exc_info=True)
114
+ return {"success": False, "error": str(e), "papers": []}
115
+
116
+ def get_available_categories(self) -> Dict[str, str]:
117
+ """Get available paper categories"""
118
+ return self.categories.copy()
119
+
120
+
121
+ # Global instance
122
+ arxiv_searcher = ArxivSearcher()