arionxiv 1.0.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. arionxiv/__init__.py +40 -0
  2. arionxiv/__main__.py +10 -0
  3. arionxiv/arxiv_operations/__init__.py +0 -0
  4. arionxiv/arxiv_operations/client.py +225 -0
  5. arionxiv/arxiv_operations/fetcher.py +173 -0
  6. arionxiv/arxiv_operations/searcher.py +122 -0
  7. arionxiv/arxiv_operations/utils.py +293 -0
  8. arionxiv/cli/__init__.py +4 -0
  9. arionxiv/cli/commands/__init__.py +1 -0
  10. arionxiv/cli/commands/analyze.py +587 -0
  11. arionxiv/cli/commands/auth.py +365 -0
  12. arionxiv/cli/commands/chat.py +714 -0
  13. arionxiv/cli/commands/daily.py +482 -0
  14. arionxiv/cli/commands/fetch.py +217 -0
  15. arionxiv/cli/commands/library.py +295 -0
  16. arionxiv/cli/commands/preferences.py +426 -0
  17. arionxiv/cli/commands/search.py +254 -0
  18. arionxiv/cli/commands/settings_unified.py +1407 -0
  19. arionxiv/cli/commands/trending.py +41 -0
  20. arionxiv/cli/commands/welcome.py +168 -0
  21. arionxiv/cli/main.py +407 -0
  22. arionxiv/cli/ui/__init__.py +1 -0
  23. arionxiv/cli/ui/global_theme_manager.py +173 -0
  24. arionxiv/cli/ui/logo.py +127 -0
  25. arionxiv/cli/ui/splash.py +89 -0
  26. arionxiv/cli/ui/theme.py +32 -0
  27. arionxiv/cli/ui/theme_system.py +391 -0
  28. arionxiv/cli/utils/__init__.py +54 -0
  29. arionxiv/cli/utils/animations.py +522 -0
  30. arionxiv/cli/utils/api_client.py +583 -0
  31. arionxiv/cli/utils/api_config.py +505 -0
  32. arionxiv/cli/utils/command_suggestions.py +147 -0
  33. arionxiv/cli/utils/db_config_manager.py +254 -0
  34. arionxiv/github_actions_runner.py +206 -0
  35. arionxiv/main.py +23 -0
  36. arionxiv/prompts/__init__.py +9 -0
  37. arionxiv/prompts/prompts.py +247 -0
  38. arionxiv/rag_techniques/__init__.py +8 -0
  39. arionxiv/rag_techniques/basic_rag.py +1531 -0
  40. arionxiv/scheduler_daemon.py +139 -0
  41. arionxiv/server.py +1000 -0
  42. arionxiv/server_main.py +24 -0
  43. arionxiv/services/__init__.py +73 -0
  44. arionxiv/services/llm_client.py +30 -0
  45. arionxiv/services/llm_inference/__init__.py +58 -0
  46. arionxiv/services/llm_inference/groq_client.py +469 -0
  47. arionxiv/services/llm_inference/llm_utils.py +250 -0
  48. arionxiv/services/llm_inference/openrouter_client.py +564 -0
  49. arionxiv/services/unified_analysis_service.py +872 -0
  50. arionxiv/services/unified_auth_service.py +457 -0
  51. arionxiv/services/unified_config_service.py +456 -0
  52. arionxiv/services/unified_daily_dose_service.py +823 -0
  53. arionxiv/services/unified_database_service.py +1633 -0
  54. arionxiv/services/unified_llm_service.py +366 -0
  55. arionxiv/services/unified_paper_service.py +604 -0
  56. arionxiv/services/unified_pdf_service.py +522 -0
  57. arionxiv/services/unified_prompt_service.py +344 -0
  58. arionxiv/services/unified_scheduler_service.py +589 -0
  59. arionxiv/services/unified_user_service.py +954 -0
  60. arionxiv/utils/__init__.py +51 -0
  61. arionxiv/utils/api_helpers.py +200 -0
  62. arionxiv/utils/file_cleanup.py +150 -0
  63. arionxiv/utils/ip_helper.py +96 -0
  64. arionxiv-1.0.32.dist-info/METADATA +336 -0
  65. arionxiv-1.0.32.dist-info/RECORD +69 -0
  66. arionxiv-1.0.32.dist-info/WHEEL +5 -0
  67. arionxiv-1.0.32.dist-info/entry_points.txt +4 -0
  68. arionxiv-1.0.32.dist-info/licenses/LICENSE +21 -0
  69. arionxiv-1.0.32.dist-info/top_level.txt +1 -0
@@ -0,0 +1,872 @@
1
+ """
2
+ Unified Analysis Service for ArionXiv
3
+ Consolidates rag_system.py, rag_chat_system.py, analysis_service.py, analysis_orchestrator.py, and embedding_service.py
4
+ Provides comprehensive text analysis, RAG capabilities, chat functionality, orchestration, and embedding services
5
+ """
6
+
7
+ import asyncio
8
+ import sys
9
+ import json
10
+ import os
11
+ from pathlib import Path
12
+ from datetime import datetime, timedelta
13
+ from typing import List, Dict, Any, Optional, Tuple, Union
14
+ from abc import ABC, abstractmethod
15
+ import hashlib
16
+ import secrets
17
+ import logging
18
+ from pymongo import IndexModel
19
+ from dotenv import load_dotenv
20
+
21
+ from rich.console import Console
22
+ from rich.panel import Panel
23
+ from rich.prompt import Prompt
24
+ from rich.text import Text
25
+ from rich.markdown import Markdown
26
+ from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, MofNCompleteColumn
27
+
28
+ try:
29
+ import numpy as np
30
+ from sentence_transformers import SentenceTransformer
31
+ ML_DEPENDENCIES_AVAILABLE = True
32
+ except ImportError:
33
+ ML_DEPENDENCIES_AVAILABLE = False
34
+ np = None
35
+ SentenceTransformer = None
36
+
37
+ from .unified_database_service import unified_database_service
38
+ from .unified_config_service import unified_config_service
39
+ from .unified_paper_service import unified_paper_service
40
+ from .unified_user_service import unified_user_service
41
+ from ..rag_techniques.basic_rag import BasicRAG
42
+ from ..prompts import format_prompt
43
+
44
+ # Import LLM clients from new organized location
45
+ from .llm_inference import groq_client, GroqClient
46
+ from .llm_inference import OPENROUTER_AVAILABLE
47
+ if OPENROUTER_AVAILABLE:
48
+ from .llm_inference import openrouter_client, OpenRouterClient, get_openrouter_client
49
+ else:
50
+ openrouter_client = None
51
+ OpenRouterClient = None
52
+ get_openrouter_client = None
53
+
54
+ # Backward compatibility alias
55
+ llm_client = groq_client
56
+ LLMClient = GroqClient
57
+
58
+ load_dotenv()
59
+
60
+ logger = logging.getLogger(__name__)
61
+
62
+
63
+ class UnifiedAnalysisService:
64
+ """
65
+ Comprehensive analysis service that combines:
66
+ 1. RAG (Retrieval-Augmented Generation) system
67
+ 2. Paper analysis and processing
68
+ 3. Interactive chat system for papers
69
+ 4. Analysis orchestration and workflow management
70
+ 5. Embedding services with multiple providers
71
+ """
72
+
73
+ def __init__(self):
74
+ # Lazy initialization flags
75
+ self._rag = None
76
+ self._rag_initialized = False
77
+ self._openrouter_client = None
78
+ self._openrouter_checked = False
79
+ self._console = None
80
+ self._get_theme_colors = None
81
+
82
+ # Analysis orchestrator functionality
83
+ self.analysis_orchestrator_enabled = True
84
+
85
+ logger.info("UnifiedAnalysisService initialized (lazy loading enabled)")
86
+
87
+ @property
88
+ def analysis_config(self):
89
+ """Lazy load analysis config"""
90
+ return unified_config_service.get_analysis_config()
91
+
92
+ @property
93
+ def embedding_config(self):
94
+ """Lazy load embedding config"""
95
+ return unified_config_service.get_embedding_config()
96
+
97
+ @property
98
+ def rag_config(self):
99
+ """Lazy load RAG config"""
100
+ return unified_config_service.get_rag_config()
101
+
102
+ @property
103
+ def batch_size(self):
104
+ """Get batch size from config"""
105
+ return self.analysis_config["batch_size"]
106
+
107
+ @property
108
+ def timeout_seconds(self):
109
+ """Get timeout from config"""
110
+ return self.analysis_config["timeout_seconds"]
111
+
112
+ @property
113
+ def openrouter_client(self):
114
+ """Lazy initialize OpenRouter client"""
115
+ if not self._openrouter_checked:
116
+ self._openrouter_checked = True
117
+ if OPENROUTER_AVAILABLE:
118
+ try:
119
+ self._openrouter_client = get_openrouter_client()
120
+ if self._openrouter_client and self._openrouter_client.is_available:
121
+ logger.info(f"OpenRouter client initialized with model: {self._openrouter_client.get_model_name()}")
122
+ else:
123
+ logger.info("OpenRouter client not configured (no API key)")
124
+ self._openrouter_client = None
125
+ except Exception as e:
126
+ logger.warning(f"Failed to initialize OpenRouter client: {str(e)}")
127
+ self._openrouter_client = None
128
+ return self._openrouter_client
129
+
130
+ @openrouter_client.setter
131
+ def openrouter_client(self, value):
132
+ """Allow setting OpenRouter client"""
133
+ self._openrouter_client = value
134
+ self._openrouter_checked = True
135
+
136
+ @property
137
+ def rag(self):
138
+ """Lazy initialize RAG system"""
139
+ if not self._rag_initialized:
140
+ self._rag_initialized = True
141
+ self._rag = BasicRAG(
142
+ database_service=unified_database_service,
143
+ config_service=unified_config_service,
144
+ llm_client=llm_client,
145
+ openrouter_client=self.openrouter_client
146
+ )
147
+ logger.info(f"BasicRAG initialized (ML available: {ML_DEPENDENCIES_AVAILABLE})")
148
+ return self._rag
149
+
150
+ @property
151
+ def console(self):
152
+ """Lazy initialize console"""
153
+ if self._console is None:
154
+ try:
155
+ from cli.ui.theme_system import create_themed_console, get_theme_colors
156
+ self._console = create_themed_console()
157
+ self._get_theme_colors = get_theme_colors
158
+ except ImportError:
159
+ self._console = Console()
160
+ self._get_theme_colors = lambda: {'primary': 'blue', 'secondary': 'cyan'}
161
+ return self._console
162
+
163
+
164
+ # ====================
165
+ # EMBEDDING SERVICE METHODS (delegated to RAG)
166
+ # ====================
167
+
168
+ async def get_embeddings(self, texts: Union[str, List[str]]) -> List[List[float]]:
169
+ """Get embeddings with automatic fallback"""
170
+ return await self.rag.get_embeddings(texts)
171
+
172
+ async def get_single_embedding(self, text: str) -> List[float]:
173
+ """Get embedding for a single text"""
174
+ return await self.rag.get_single_embedding(text)
175
+
176
+ def get_embedding_dimension(self) -> int:
177
+ """Get embedding dimension"""
178
+ return self.rag.get_embedding_dimension()
179
+
180
+ def get_embedding_provider_name(self) -> str:
181
+ """Get current provider name"""
182
+ return self.rag.get_embedding_provider_name()
183
+
184
+ async def compute_similarity(self, embedding1: List[float], embedding2: List[float]) -> float:
185
+ """Compute cosine similarity between two embeddings"""
186
+ return await self.rag.compute_similarity(embedding1, embedding2)
187
+
188
+ # ====================
189
+ # PAPER ANALYSIS
190
+ # ====================
191
+
192
+ async def analyze_papers_for_user(self, user_id: str, papers: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
193
+ """Analyze papers for a specific user"""
194
+ try:
195
+ if not papers:
196
+ logger.warning(f"No papers to analyze for user {user_id}")
197
+ return []
198
+
199
+ logger.info(f"Starting analysis of {len(papers)} papers for user {user_id}")
200
+
201
+ # Process papers in batches to avoid overwhelming the LLM service
202
+ analyzed_papers = []
203
+
204
+ for i in range(0, len(papers), self.batch_size):
205
+ batch = papers[i:i + self.batch_size]
206
+ logger.info(f"Processing batch {i//self.batch_size + 1}/{(len(papers) + self.batch_size - 1)//self.batch_size}")
207
+
208
+ batch_results = await self._analyze_batch(user_id, batch)
209
+ analyzed_papers.extend(batch_results)
210
+
211
+ # Small delay between batches to be respectful to the API
212
+ if i + self.batch_size < len(papers):
213
+ await asyncio.sleep(1)
214
+
215
+ logger.info(f"Completed analysis of {len(analyzed_papers)} papers for user {user_id}")
216
+ return analyzed_papers
217
+
218
+ except Exception as e:
219
+ logger.error(f"Failed to analyze papers for user {user_id}: {e}", exc_info=True)
220
+ return []
221
+
222
+ async def _analyze_batch(self, user_id: str, papers: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
223
+ """Analyze a batch of papers"""
224
+ analyzed_papers = []
225
+
226
+ for paper in papers:
227
+ try:
228
+ analysis_result = await self.analyze_single_paper(user_id, paper)
229
+ if analysis_result:
230
+ analyzed_papers.append(analysis_result)
231
+ except Exception as e:
232
+ logger.error(f"Failed to analyze paper {paper.get('id', 'unknown')}: {e}", exc_info=True)
233
+ # Continue with other papers even if one fails
234
+ continue
235
+
236
+ return analyzed_papers
237
+
238
+ async def analyze_single_paper(self, user_id: str, paper: Dict[str, Any]) -> Optional[Dict[str, Any]]:
239
+ """Analyze a single paper using LLM"""
240
+ try:
241
+ paper_id = paper.get('id', paper.get('arxiv_id', 'unknown'))
242
+ logger.info(f"Analyzing paper {paper_id}")
243
+
244
+ # Prepare paper content for analysis
245
+ content = self._prepare_paper_content(paper)
246
+
247
+ # Get analysis prompt
248
+ analysis_prompt = format_prompt("paper_analysis",
249
+ title=paper.get("title", ""),
250
+ abstract=paper.get("abstract", ""),
251
+ categories=paper.get("categories", [])
252
+ )
253
+
254
+ # Call LLM for analysis
255
+ response = await asyncio.wait_for(
256
+ llm_client.get_completion(analysis_prompt),
257
+ timeout=self.timeout_seconds
258
+ )
259
+
260
+ if not response or response.startswith("Error"):
261
+ logger.error(f"LLM analysis failed for paper {paper_id}")
262
+ return None
263
+
264
+ # Parse and structure the analysis
265
+ analysis = self._parse_analysis_response(response)
266
+
267
+ # Create analysis document
268
+ analysis_doc = {
269
+ 'paper_id': paper_id,
270
+ 'user_id': user_id,
271
+ 'title': paper.get('title', ''),
272
+ 'authors': paper.get('authors', []),
273
+ 'abstract': paper.get('abstract', ''),
274
+ 'categories': paper.get('categories', []),
275
+ 'analysis': analysis,
276
+ 'analyzed_at': datetime.utcnow(),
277
+ 'analysis_version': '1.0'
278
+ }
279
+
280
+ # Store analysis in database
281
+ await unified_database_service.insert_one('paper_analyses', analysis_doc)
282
+
283
+ logger.info(f"Successfully analyzed paper {paper_id}")
284
+ return analysis_doc
285
+
286
+ except asyncio.TimeoutError:
287
+ logger.error(f"Analysis timeout for paper {paper.get('id', 'unknown')}")
288
+ return None
289
+ except Exception as e:
290
+ logger.error(f"Analysis failed for paper {paper.get('id', 'unknown')}: {e}", exc_info=True)
291
+ return None
292
+
293
+ def _prepare_paper_content(self, paper: Dict[str, Any]) -> str:
294
+ """Prepare paper content for analysis"""
295
+ content_parts = []
296
+
297
+ if paper.get('title'):
298
+ content_parts.append(f"Title: {paper['title']}")
299
+
300
+ if paper.get('authors'):
301
+ authors = ', '.join(paper['authors'])
302
+ content_parts.append(f"Authors: {authors}")
303
+
304
+ if paper.get('abstract'):
305
+ content_parts.append(f"Abstract: {paper['abstract']}")
306
+
307
+ if paper.get('categories'):
308
+ categories = ', '.join(paper['categories'])
309
+ content_parts.append(f"Categories: {categories}")
310
+
311
+ return '\n\n'.join(content_parts)
312
+
313
+ def _parse_analysis_response(self, response: str) -> Dict[str, Any]:
314
+ """Parse LLM analysis response into structured format"""
315
+ try:
316
+ # Try to parse as JSON first
317
+ return json.loads(response)
318
+ except json.JSONDecodeError:
319
+ # If not JSON, create structured response from text
320
+ return {
321
+ 'summary': response[:500] + '...' if len(response) > 500 else response,
322
+ 'key_points': [],
323
+ 'methodology': '',
324
+ 'results': '',
325
+ 'significance': '',
326
+ 'limitations': '',
327
+ 'relevance_score': 5 # Default relevance
328
+ }
329
+
330
+ # ====================
331
+ # RAG SYSTEM
332
+ # ====================
333
+
334
+ async def add_document_to_index(self, doc_id: str, text: str, metadata: Dict[str, Any] = None) -> bool:
335
+ """Add document to vector index"""
336
+ return await self.rag.add_document_to_index(doc_id, text, metadata)
337
+
338
+ async def search_similar_documents(self, query: str, filters: Dict[str, Any] = None) -> List[Dict[str, Any]]:
339
+ """Search for similar documents using vector search"""
340
+ return await self.rag.search_similar_documents(query, filters)
341
+
342
+ # ====================
343
+ # CHAT SYSTEM
344
+ # ====================
345
+
346
+ async def start_chat_session(self, papers: Union[List[Dict[str, Any]], Dict[str, Any]], user_id: str = "default"):
347
+ """Start interactive chat session with papers"""
348
+ if not isinstance(papers, list):
349
+ papers = [papers]
350
+
351
+ await self.rag.start_chat_session(papers, user_id)
352
+
353
+ async def continue_chat_session(self, session: Dict[str, Any], paper_info: Dict[str, Any]):
354
+ """Continue a previous chat session"""
355
+ await self.rag.continue_chat_session(session, paper_info)
356
+
357
+ async def chat(self, user_name: str, paper_id: str, message: str, session_id: str = None) -> Dict[str, Any]:
358
+ """Process a chat message using the RAG system"""
359
+ return await self.rag._chat_with_session(session_id, message)
360
+
361
+ # ====================
362
+ # CLEANUP AND MAINTENANCE
363
+ # ====================
364
+
365
+ async def cleanup_expired_data(self):
366
+ """Clean up expired embeddings and chat sessions"""
367
+ await self.rag.cleanup_expired_data()
368
+
369
+ # ========== ANALYSIS ORCHESTRATION METHODS ==========
370
+
371
+ async def analyze_papers(self, user_id: str, query: Optional[str] = None,
372
+ categories: Optional[List[str]] = None,
373
+ max_papers: int = 10,
374
+ analysis_type: str = "comprehensive") -> Dict[str, Any]:
375
+ """
376
+ Orchestrate comprehensive paper analysis workflow.
377
+
378
+ Args:
379
+ user_id: User ID for the analysis
380
+ query: Optional search query for papers
381
+ categories: Optional ArXiv categories to filter by
382
+ max_papers: Maximum number of papers to analyze
383
+ analysis_type: Type of analysis ('quick', 'comprehensive', 'research')
384
+
385
+ Returns:
386
+ Dictionary containing analysis results and metrics
387
+ """
388
+ start_time = datetime.utcnow()
389
+ analysis_id = f"analysis_{user_id}_{int(start_time.timestamp())}"
390
+
391
+ try:
392
+ colors = self._get_theme_colors()
393
+ self.console.print(Panel(
394
+ f"[bold {colors['primary']}]Starting Paper Analysis[/bold {colors['primary']}]\n"
395
+ f"Analysis ID: {analysis_id}\n"
396
+ f"Type: {analysis_type}\n"
397
+ f"Max Papers: {max_papers}",
398
+ title="Analysis Orchestrator",
399
+ border_style=colors['primary']
400
+ ))
401
+
402
+ # Step 1: Fetch papers based on criteria
403
+ if query:
404
+ papers = await unified_paper_service.search_papers(query, max_results=max_papers)
405
+ else:
406
+ # Get recent papers from specified categories or user preferences
407
+ user_prefs = await unified_user_service.get_user_preferences(user_id)
408
+ target_categories = categories or user_prefs.get('preferred_categories', ['cs.AI', 'cs.LG'])
409
+
410
+ papers = []
411
+ for category in target_categories:
412
+ category_papers = await unified_paper_service.fetch_recent_papers(
413
+ category=category,
414
+ max_results=max_papers // len(target_categories)
415
+ )
416
+ papers.extend(category_papers)
417
+
418
+ if not papers:
419
+ return {
420
+ 'analysis_id': analysis_id,
421
+ 'status': 'no_papers',
422
+ 'message': 'No papers found for analysis',
423
+ 'duration': 0
424
+ }
425
+
426
+ # Step 2: Analyze each paper
427
+ analysis_results = []
428
+ progress = Progress(
429
+ SpinnerColumn(),
430
+ TextColumn("[progress.description]{task.description}"),
431
+ BarColumn(),
432
+ MofNCompleteColumn(),
433
+ console=self.console
434
+ )
435
+
436
+ with progress:
437
+ task = progress.add_task(f"Analyzing papers...", total=len(papers))
438
+
439
+ for paper in papers:
440
+ try:
441
+ # Generate analysis based on type
442
+ if analysis_type == "quick":
443
+ analysis = await self._quick_paper_analysis(paper, user_id)
444
+ elif analysis_type == "research":
445
+ analysis = await self._research_paper_analysis(paper, user_id)
446
+ else: # comprehensive
447
+ analysis = await self._comprehensive_paper_analysis(paper, user_id)
448
+
449
+ if analysis:
450
+ analysis_results.append({
451
+ 'paper_id': paper.get('id'),
452
+ 'title': paper.get('title'),
453
+ 'analysis': analysis,
454
+ 'timestamp': datetime.utcnow().isoformat()
455
+ })
456
+
457
+ progress.advance(task)
458
+
459
+ except Exception as e:
460
+ logger.error(f"Error analyzing paper {paper.get('id', 'unknown')}: {e}", exc_info=True)
461
+ continue
462
+
463
+ # Step 3: Generate insights and recommendations
464
+ insights = await self._generate_analysis_insights(analysis_results, user_id)
465
+
466
+ # Step 4: Store orchestration results
467
+ orchestration_doc = {
468
+ 'analysis_id': analysis_id,
469
+ 'user_id': user_id,
470
+ 'type': analysis_type,
471
+ 'query': query,
472
+ 'categories': categories,
473
+ 'papers_analyzed': len(analysis_results),
474
+ 'results': analysis_results,
475
+ 'insights': insights,
476
+ 'start_time': start_time.isoformat(),
477
+ 'end_time': datetime.utcnow().isoformat(),
478
+ 'duration': (datetime.utcnow() - start_time).total_seconds()
479
+ }
480
+
481
+ await unified_database_service.insert_one('analysis_orchestrations', orchestration_doc)
482
+
483
+ # Update user statistics
484
+ await self._update_user_analysis_stats(user_id, len(analysis_results))
485
+
486
+ self.console.print(Panel(
487
+ f"[bold green]Analysis Complete![/bold green]\n"
488
+ f"Papers Analyzed: {len(analysis_results)}\n"
489
+ f"Duration: {orchestration_doc['duration']:.1f}s\n"
490
+ f"Key Insights: {len(insights.get('key_themes', []))}"
491
+ ))
492
+
493
+ return {
494
+ 'analysis_id': analysis_id,
495
+ 'status': 'success',
496
+ 'papers_analyzed': len(analysis_results),
497
+ 'insights': insights,
498
+ 'duration': orchestration_doc['duration'],
499
+ 'results': analysis_results
500
+ }
501
+
502
+ except Exception as e:
503
+ logger.error(f"Error in analysis orchestration: {e}", exc_info=True)
504
+ return {
505
+ 'analysis_id': analysis_id,
506
+ 'status': 'error',
507
+ 'error': str(e),
508
+ 'duration': (datetime.utcnow() - start_time).total_seconds()
509
+ }
510
+
511
+ async def run_daily_analysis(self, user_id: str) -> Dict[str, Any]:
512
+ """
513
+ Run daily analysis workflow for a user.
514
+
515
+ Args:
516
+ user_id: User ID for the daily analysis
517
+
518
+ Returns:
519
+ Dictionary containing daily analysis results
520
+ """
521
+ try:
522
+ # Get user preferences
523
+ user_prefs = await unified_user_service.get_user_preferences(user_id)
524
+ categories = user_prefs.get('preferred_categories', ['cs.AI', 'cs.LG', 'cs.CV'])
525
+
526
+ # Fetch yesterday's papers
527
+ yesterday = datetime.utcnow() - timedelta(days=1)
528
+ papers = []
529
+
530
+ for category in categories:
531
+ category_papers = await unified_paper_service.fetch_papers_by_date(
532
+ category=category,
533
+ date=yesterday,
534
+ max_results=5
535
+ )
536
+ papers.extend(category_papers)
537
+
538
+ if not papers:
539
+ return {
540
+ 'status': 'no_new_papers',
541
+ 'message': 'No new papers found for yesterday',
542
+ 'date': yesterday.date().isoformat()
543
+ }
544
+
545
+ # Run analysis
546
+ analysis_result = await self.analyze_papers(
547
+ user_id=user_id,
548
+ max_papers=len(papers),
549
+ analysis_type="quick"
550
+ )
551
+
552
+ # Generate daily summary
553
+ summary = await self._generate_daily_summary(analysis_result, user_id)
554
+
555
+ # Store daily analysis
556
+ daily_doc = {
557
+ 'user_id': user_id,
558
+ 'date': yesterday.date().isoformat(),
559
+ 'papers_count': len(papers),
560
+ 'analysis_id': analysis_result.get('analysis_id'),
561
+ 'summary': summary,
562
+ 'timestamp': datetime.utcnow().isoformat()
563
+ }
564
+
565
+ await unified_database_service.insert_one('daily_analyses', daily_doc)
566
+
567
+ return {
568
+ 'status': 'success',
569
+ 'date': yesterday.date().isoformat(),
570
+ 'papers_analyzed': len(papers),
571
+ 'summary': summary,
572
+ 'analysis_id': analysis_result.get('analysis_id')
573
+ }
574
+
575
+ except Exception as e:
576
+ logger.error(f"Error in daily analysis: {e}", exc_info=True)
577
+ return {'status': 'error', 'error': str(e)}
578
+
579
+ async def get_weekly_insights(self, user_id: str) -> Dict[str, Any]:
580
+ """
581
+ Generate weekly insights from user's analysis history.
582
+
583
+ Args:
584
+ user_id: User ID for insights generation
585
+
586
+ Returns:
587
+ Dictionary containing weekly insights
588
+ """
589
+ try:
590
+ # Get analyses from the past week
591
+ week_ago = datetime.utcnow() - timedelta(days=7)
592
+
593
+ pipeline = [
594
+ {
595
+ '$match': {
596
+ 'user_id': user_id,
597
+ 'start_time': {'$gte': week_ago.isoformat()}
598
+ }
599
+ },
600
+ {
601
+ '$sort': {'start_time': -1}
602
+ }
603
+ ]
604
+
605
+ analyses = await unified_database_service.aggregate('analysis_orchestrations', pipeline)
606
+
607
+ if not analyses:
608
+ return {
609
+ 'status': 'no_data',
610
+ 'message': 'No analyses found in the past week'
611
+ }
612
+
613
+ # Generate insights
614
+ insights = {
615
+ 'period': f"{week_ago.date()} to {datetime.utcnow().date()}",
616
+ 'total_analyses': len(analyses),
617
+ 'total_papers': sum(a.get('papers_analyzed', 0) for a in analyses),
618
+ 'average_papers_per_analysis': sum(a.get('papers_analyzed', 0) for a in analyses) / len(analyses),
619
+ 'most_active_day': self._find_most_active_day(analyses),
620
+ 'trending_topics': await self._extract_trending_topics(analyses),
621
+ 'research_patterns': self._analyze_research_patterns(analyses)
622
+ }
623
+
624
+ return {
625
+ 'status': 'success',
626
+ 'insights': insights,
627
+ 'generated_at': datetime.utcnow().isoformat()
628
+ }
629
+
630
+ except Exception as e:
631
+ logger.error(f"Error generating weekly insights: {e}", exc_info=True)
632
+ return {'status': 'error', 'error': str(e)}
633
+
634
+ # ========== PRIVATE ORCHESTRATION HELPER METHODS ==========
635
+
636
+ async def _quick_paper_analysis(self, paper: Dict[str, Any], user_id: str) -> Optional[Dict[str, Any]]:
637
+ """Generate quick analysis of a paper."""
638
+ try:
639
+ prompt = format_prompt("quick_analysis",
640
+ title=paper.get("title", ""),
641
+ abstract=paper.get("abstract", "")
642
+ )
643
+
644
+ analysis_text = await self.llm_client.generate_completion(prompt)
645
+
646
+ return {
647
+ 'type': 'quick',
648
+ 'summary': analysis_text[:500],
649
+ 'key_points': self._extract_key_points(analysis_text),
650
+ 'relevance_score': await unified_user_service.calculate_paper_relevance(paper, user_id)
651
+ }
652
+
653
+ except Exception as e:
654
+ logger.error(f"Error in quick analysis: {e}", exc_info=True)
655
+ return None
656
+
657
+ async def _comprehensive_paper_analysis(self, paper: Dict[str, Any], user_id: str) -> Optional[Dict[str, Any]]:
658
+ """Generate comprehensive analysis of a paper."""
659
+ try:
660
+ # This uses the existing analyze_paper method
661
+ return await self.analyze_paper(paper.get('id'), user_id)
662
+
663
+ except Exception as e:
664
+ logger.error(f"Error in comprehensive analysis: {e}", exc_info=True)
665
+ return None
666
+
667
+ async def _research_paper_analysis(self, paper: Dict[str, Any], user_id: str) -> Optional[Dict[str, Any]]:
668
+ """Generate research-focused analysis of a paper."""
669
+ try:
670
+ prompt = format_prompt("research_analysis",
671
+ title=paper.get("title", ""),
672
+ abstract=paper.get("abstract", ""),
673
+ categories=paper.get("categories", [])
674
+ )
675
+
676
+ analysis_text = await self.llm_client.generate_completion(prompt)
677
+
678
+ # Find related papers using embeddings
679
+ if paper.get('abstract'):
680
+ similar_papers = await self.find_similar_papers(
681
+ paper.get('abstract'),
682
+ limit=3
683
+ )
684
+ else:
685
+ similar_papers = []
686
+
687
+ return {
688
+ 'type': 'research',
689
+ 'analysis': analysis_text,
690
+ 'methodology_assessment': self._assess_methodology(analysis_text),
691
+ 'novelty_score': self._calculate_novelty_score(analysis_text),
692
+ 'related_papers': similar_papers,
693
+ 'research_implications': self._extract_implications(analysis_text)
694
+ }
695
+
696
+ except Exception as e:
697
+ logger.error(f"Error in research analysis: {e}", exc_info=True)
698
+ return None
699
+
700
+ async def _generate_analysis_insights(self, results: List[Dict[str, Any]], user_id: str) -> Dict[str, Any]:
701
+ """Generate insights from analysis results."""
702
+ try:
703
+ # Extract themes and patterns
704
+ all_analyses = [r.get('analysis', {}) for r in results]
705
+
706
+ insights = {
707
+ 'key_themes': self._extract_common_themes(all_analyses),
708
+ 'methodology_trends': self._analyze_methodology_trends(all_analyses),
709
+ 'relevance_distribution': self._analyze_relevance_distribution(all_analyses),
710
+ 'recommendation_score': self._calculate_recommendation_score(all_analyses, user_id)
711
+ }
712
+
713
+ return insights
714
+
715
+ except Exception as e:
716
+ logger.error(f"Error generating insights: {e}", exc_info=True)
717
+ return {}
718
+
719
+ async def _generate_daily_summary(self, analysis_result: Dict[str, Any], user_id: str) -> Dict[str, Any]:
720
+ """Generate daily summary from analysis results."""
721
+ try:
722
+ results = analysis_result.get('results', [])
723
+ insights = analysis_result.get('insights', {})
724
+
725
+ summary = {
726
+ 'papers_reviewed': len(results),
727
+ 'top_papers': self._get_top_papers(results, limit=3),
728
+ 'key_themes': insights.get('key_themes', [])[:5],
729
+ 'research_highlights': self._extract_research_highlights(results),
730
+ 'personal_recommendations': await self._generate_personal_recommendations(results, user_id)
731
+ }
732
+
733
+ return summary
734
+
735
+ except Exception as e:
736
+ logger.error(f"Error generating daily summary: {e}", exc_info=True)
737
+ return {}
738
+
739
+ async def _update_user_analysis_stats(self, user_id: str, papers_count: int) -> None:
740
+ """Update user's analysis statistics."""
741
+ try:
742
+ stats_update = {
743
+ '$inc': {
744
+ 'total_analyses': 1,
745
+ 'total_papers_analyzed': papers_count
746
+ },
747
+ '$set': {
748
+ 'last_analysis': datetime.utcnow().isoformat()
749
+ }
750
+ }
751
+
752
+ await unified_database_service.update_one(
753
+ 'user_stats',
754
+ {'user_id': user_id},
755
+ stats_update,
756
+ upsert=True
757
+ )
758
+
759
+ except Exception as e:
760
+ logger.error(f"Error updating user stats: {e}", exc_info=True)
761
+
762
+ def _extract_key_points(self, text: str) -> List[str]:
763
+ """Extract key points from analysis text."""
764
+ # Simple extraction - can be enhanced with NLP
765
+ sentences = text.split('.')
766
+ return [s.strip() for s in sentences if len(s.strip()) > 50][:3]
767
+
768
+ def _assess_methodology(self, text: str) -> Dict[str, Any]:
769
+ """Assess methodology from analysis text."""
770
+ # Placeholder for methodology assessment
771
+ return {
772
+ 'rigor_score': 0.8,
773
+ 'reproducibility': 'high',
774
+ 'data_quality': 'good'
775
+ }
776
+
777
+ def _calculate_novelty_score(self, text: str) -> float:
778
+ """Calculate novelty score from analysis."""
779
+ # Placeholder for novelty scoring
780
+ return 0.75
781
+
782
+ def _extract_implications(self, text: str) -> List[str]:
783
+ """Extract research implications."""
784
+ # Placeholder for implication extraction
785
+ return ["Advances the field", "Practical applications", "Future research directions"]
786
+
787
+ def _extract_common_themes(self, analyses: List[Dict[str, Any]]) -> List[str]:
788
+ """Extract common themes from analyses."""
789
+ # Placeholder for theme extraction
790
+ return ["Machine Learning", "Deep Learning", "Computer Vision"]
791
+
792
+ def _analyze_methodology_trends(self, analyses: List[Dict[str, Any]]) -> Dict[str, Any]:
793
+ """Analyze methodology trends."""
794
+ return {'trending_methods': ['Transformers', 'GANs', 'Reinforcement Learning']}
795
+
796
+ def _analyze_relevance_distribution(self, analyses: List[Dict[str, Any]]) -> Dict[str, float]:
797
+ """Analyze relevance score distribution."""
798
+ return {'high_relevance': 0.3, 'medium_relevance': 0.5, 'low_relevance': 0.2}
799
+
800
+ def _calculate_recommendation_score(self, analyses: List[Dict[str, Any]], user_id: str) -> float:
801
+ """Calculate overall recommendation score."""
802
+ return 0.85
803
+
804
+ def _get_top_papers(self, results: List[Dict[str, Any]], limit: int = 3) -> List[Dict[str, Any]]:
805
+ """Get top papers from results."""
806
+ sorted_results = sorted(
807
+ results,
808
+ key=lambda x: x.get('analysis', {}).get('relevance_score', 0),
809
+ reverse=True
810
+ )
811
+ return sorted_results[:limit]
812
+
813
+ def _extract_research_highlights(self, results: List[Dict[str, Any]]) -> List[str]:
814
+ """Extract research highlights."""
815
+ return ["Novel approach to X", "Significant improvement in Y", "New dataset for Z"]
816
+
817
+ async def _generate_personal_recommendations(self, results: List[Dict[str, Any]], user_id: str) -> List[str]:
818
+ """Generate personal recommendations."""
819
+ return ["Read papers on topic X", "Explore methodology Y", "Consider collaboration on Z"]
820
+
821
+ def _find_most_active_day(self, analyses: List[Dict[str, Any]]) -> str:
822
+ """Find the most active analysis day."""
823
+ day_counts = {}
824
+ for analysis in analyses:
825
+ day = analysis.get('start_time', '').split('T')[0]
826
+ day_counts[day] = day_counts.get(day, 0) + 1
827
+
828
+ if day_counts:
829
+ return max(day_counts, key=day_counts.get)
830
+ return "No data"
831
+
832
+ async def _extract_trending_topics(self, analyses: List[Dict[str, Any]]) -> List[str]:
833
+ """Extract trending topics from analyses."""
834
+ # Placeholder for topic extraction
835
+ return ["Large Language Models", "Computer Vision", "Robotics"]
836
+
837
+ def _analyze_research_patterns(self, analyses: List[Dict[str, Any]]) -> Dict[str, Any]:
838
+ """Analyze research patterns."""
839
+ return {
840
+ 'peak_analysis_time': '14:00',
841
+ 'average_papers_per_session': 5.2,
842
+ 'most_analyzed_categories': ['cs.AI', 'cs.LG']
843
+ }
844
+
845
+
846
+ # Global instance
847
+ unified_analysis_service = UnifiedAnalysisService()
848
+
849
+ # Backwards compatibility
850
+ rag_system = unified_analysis_service
851
+ rag_chat_system = unified_analysis_service
852
+ analysis_service = unified_analysis_service
853
+
854
+ # Export commonly used functions
855
+ analyze_papers_for_user = unified_analysis_service.analyze_papers_for_user
856
+ analyze_single_paper = unified_analysis_service.analyze_single_paper
857
+ search_similar_documents = unified_analysis_service.search_similar_documents
858
+ start_chat_session = unified_analysis_service.start_chat_session
859
+ continue_chat_session = unified_analysis_service.continue_chat_session
860
+
861
+ __all__ = [
862
+ 'UnifiedAnalysisService',
863
+ 'unified_analysis_service',
864
+ 'rag_system',
865
+ 'rag_chat_system',
866
+ 'analysis_service',
867
+ 'analyze_papers_for_user',
868
+ 'analyze_single_paper',
869
+ 'search_similar_documents',
870
+ 'start_chat_session',
871
+ 'continue_chat_session'
872
+ ]