arionxiv 1.0.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. arionxiv/__init__.py +40 -0
  2. arionxiv/__main__.py +10 -0
  3. arionxiv/arxiv_operations/__init__.py +0 -0
  4. arionxiv/arxiv_operations/client.py +225 -0
  5. arionxiv/arxiv_operations/fetcher.py +173 -0
  6. arionxiv/arxiv_operations/searcher.py +122 -0
  7. arionxiv/arxiv_operations/utils.py +293 -0
  8. arionxiv/cli/__init__.py +4 -0
  9. arionxiv/cli/commands/__init__.py +1 -0
  10. arionxiv/cli/commands/analyze.py +587 -0
  11. arionxiv/cli/commands/auth.py +365 -0
  12. arionxiv/cli/commands/chat.py +714 -0
  13. arionxiv/cli/commands/daily.py +482 -0
  14. arionxiv/cli/commands/fetch.py +217 -0
  15. arionxiv/cli/commands/library.py +295 -0
  16. arionxiv/cli/commands/preferences.py +426 -0
  17. arionxiv/cli/commands/search.py +254 -0
  18. arionxiv/cli/commands/settings_unified.py +1407 -0
  19. arionxiv/cli/commands/trending.py +41 -0
  20. arionxiv/cli/commands/welcome.py +168 -0
  21. arionxiv/cli/main.py +407 -0
  22. arionxiv/cli/ui/__init__.py +1 -0
  23. arionxiv/cli/ui/global_theme_manager.py +173 -0
  24. arionxiv/cli/ui/logo.py +127 -0
  25. arionxiv/cli/ui/splash.py +89 -0
  26. arionxiv/cli/ui/theme.py +32 -0
  27. arionxiv/cli/ui/theme_system.py +391 -0
  28. arionxiv/cli/utils/__init__.py +54 -0
  29. arionxiv/cli/utils/animations.py +522 -0
  30. arionxiv/cli/utils/api_client.py +583 -0
  31. arionxiv/cli/utils/api_config.py +505 -0
  32. arionxiv/cli/utils/command_suggestions.py +147 -0
  33. arionxiv/cli/utils/db_config_manager.py +254 -0
  34. arionxiv/github_actions_runner.py +206 -0
  35. arionxiv/main.py +23 -0
  36. arionxiv/prompts/__init__.py +9 -0
  37. arionxiv/prompts/prompts.py +247 -0
  38. arionxiv/rag_techniques/__init__.py +8 -0
  39. arionxiv/rag_techniques/basic_rag.py +1531 -0
  40. arionxiv/scheduler_daemon.py +139 -0
  41. arionxiv/server.py +1000 -0
  42. arionxiv/server_main.py +24 -0
  43. arionxiv/services/__init__.py +73 -0
  44. arionxiv/services/llm_client.py +30 -0
  45. arionxiv/services/llm_inference/__init__.py +58 -0
  46. arionxiv/services/llm_inference/groq_client.py +469 -0
  47. arionxiv/services/llm_inference/llm_utils.py +250 -0
  48. arionxiv/services/llm_inference/openrouter_client.py +564 -0
  49. arionxiv/services/unified_analysis_service.py +872 -0
  50. arionxiv/services/unified_auth_service.py +457 -0
  51. arionxiv/services/unified_config_service.py +456 -0
  52. arionxiv/services/unified_daily_dose_service.py +823 -0
  53. arionxiv/services/unified_database_service.py +1633 -0
  54. arionxiv/services/unified_llm_service.py +366 -0
  55. arionxiv/services/unified_paper_service.py +604 -0
  56. arionxiv/services/unified_pdf_service.py +522 -0
  57. arionxiv/services/unified_prompt_service.py +344 -0
  58. arionxiv/services/unified_scheduler_service.py +589 -0
  59. arionxiv/services/unified_user_service.py +954 -0
  60. arionxiv/utils/__init__.py +51 -0
  61. arionxiv/utils/api_helpers.py +200 -0
  62. arionxiv/utils/file_cleanup.py +150 -0
  63. arionxiv/utils/ip_helper.py +96 -0
  64. arionxiv-1.0.32.dist-info/METADATA +336 -0
  65. arionxiv-1.0.32.dist-info/RECORD +69 -0
  66. arionxiv-1.0.32.dist-info/WHEEL +5 -0
  67. arionxiv-1.0.32.dist-info/entry_points.txt +4 -0
  68. arionxiv-1.0.32.dist-info/licenses/LICENSE +21 -0
  69. arionxiv-1.0.32.dist-info/top_level.txt +1 -0
@@ -0,0 +1,456 @@
1
+ """
2
+ Unified Config Service for ArionXiv
3
+ Consolidates config.py and logging_config.py
4
+ Provides comprehensive configuration management and logging setup
5
+ """
6
+
7
+ import os
8
+ import structlog
9
+ import logging
10
+ from typing import List, Dict, Any, Optional
11
+ from datetime import time
12
+ from dotenv import load_dotenv
13
+
14
+ # Load environment variables from .env file
15
+ load_dotenv()
16
+
17
+ class UnifiedConfigService:
18
+ """
19
+ Comprehensive configuration service that handles:
20
+ 1. System configuration management (config.py functionality)
21
+ 2. Logging configuration and verbosity control (logging_config.py functionality)
22
+ """
23
+
24
+ def __init__(self, debug_mode: bool = False, quiet: bool = False):
25
+ # Configuration constants - MongoDB URI for local development only
26
+ # End users use the hosted Vercel API, so this is not required
27
+ self.MONGODB_URI = os.getenv("MONGODB_URI") or os.getenv("MONGODB_URL")
28
+ # Silent - end users don't need local MongoDB
29
+ self.DATABASE_NAME = os.getenv("DATABASE_NAME", "arionxiv")
30
+
31
+ # MongoDB Connection Settings
32
+ self.MONGODB_CONNECT_TIMEOUT = int(os.getenv("MONGODB_CONNECT_TIMEOUT", "30000"))
33
+ self.MONGODB_SERVER_SELECTION_TIMEOUT = int(os.getenv("MONGODB_SERVER_SELECTION_TIMEOUT", "30000"))
34
+ self.MONGODB_SOCKET_TIMEOUT = int(os.getenv("MONGODB_SOCKET_TIMEOUT", "30000"))
35
+ self.MONGODB_MAX_POOL_SIZE = int(os.getenv("MONGODB_MAX_POOL_SIZE", "10"))
36
+ self.MONGODB_RETRY_WRITES = os.getenv("MONGODB_RETRY_WRITES", "true").lower() == "true"
37
+ self.MONGODB_RETRY_READS = os.getenv("MONGODB_RETRY_READS", "true").lower() == "true"
38
+
39
+ # All Collections
40
+ self.USERS_COLLECTION = "users"
41
+ self.PAPERS_COLLECTION = "papers"
42
+ self.DAILY_ANALYSIS_COLLECTION = "daily_analysis"
43
+ self.ANALYSIS_RESULTS_COLLECTION = "analysis_results"
44
+ self.USER_PAPERS_COLLECTION = "user_papers"
45
+ self.CRON_JOBS_COLLECTION = "cron_jobs"
46
+ self.RAG_VECTOR_COLLECTION = "paper_embeddings"
47
+ self.RAG_CHAT_COLLECTION = "chat_sessions"
48
+
49
+ # ArXiv Configuration
50
+ self.ARXIV_MAX_RESULTS_PER_QUERY = 50
51
+ self.ARXIV_SEARCH_DAYS_BACK = 7
52
+ self.ARXIV_DEFAULT_CATEGORIES = [
53
+ "cs.CL", "cs.LG", "cs.AI", "stat.ML", "cs.CV", "cs.NE"
54
+ ]
55
+
56
+ # Daily Cron Job Configuration
57
+ self.DAILY_CRON_HOUR = 6
58
+ self.DAILY_CRON_MINUTE = 0
59
+ self.TIMEZONE = "UTC"
60
+
61
+ # Analysis Configuration
62
+ self.ANALYSIS_BATCH_SIZE = 5
63
+ self.ANALYSIS_TIMEOUT_SECONDS = 60
64
+
65
+ # LLM Configuration
66
+ self.LLM_MODEL = "gpt-3.5-turbo"
67
+ self.LLM_MAX_TOKENS = 4000
68
+ self.LLM_TEMPERATURE = 0.3
69
+
70
+ # Embedding Model Configuration
71
+ # Primary: Google Gemini (FREE API, requires GEMINI_API_KEY)
72
+ # Fallbacks: Small HuggingFace models that run locally
73
+ self.EMBEDDING_PRIMARY_MODEL = "gemini" # Use Gemini embedding-001 (FREE)
74
+ self.EMBEDDING_FALLBACK_1 = "ibm-granite/granite-embedding-30m-english" # ~120MB
75
+ self.EMBEDDING_FALLBACK_2 = "all-MiniLM-L6-v2" # ~80MB, fast
76
+ self.EMBEDDING_DIMENSION_DEFAULT = 768 # Gemini dimension
77
+ self.EMBEDDING_ENABLE_GEMINI = True
78
+ self.EMBEDDING_ENABLE_HUGGINGFACE = True
79
+ self.EMBEDDING_BATCH_SIZE = 10
80
+ self.EMBEDDING_CACHE_ENABLED = True
81
+
82
+ # RAG System Configuration
83
+ self.RAG_CHUNK_SIZE = 1000
84
+ self.RAG_CHUNK_OVERLAP = 400
85
+ self.RAG_TOP_K_RESULTS = 10 # Increased from 5 for richer context
86
+ self.RAG_TTL_HOURS = 24
87
+ self.RAG_VECTOR_COLLECTION = self.RAG_VECTOR_COLLECTION
88
+ self.RAG_CHAT_COLLECTION = self.RAG_CHAT_COLLECTION
89
+
90
+ # User Preferences Default Categories
91
+ self.DEFAULT_USER_CATEGORIES = ["cs.LG", "cs.AI"]
92
+
93
+ # Paper Storage Configuration
94
+ self.PAPER_PDF_STORAGE_DAYS = 30
95
+
96
+ self.debug_mode = debug_mode or os.getenv("ARIONXIV_DEBUG", "false").lower() == "true"
97
+ self.quiet_mode = quiet or os.getenv("ARIONXIV_QUIET", "false").lower() == "true"
98
+
99
+ self.setup_logging()
100
+
101
+ logging.getLogger(__name__).info("UnifiedConfigService initialized")
102
+
103
+ # ============================================================
104
+ # CONFIGURATION MANAGEMENT (from config.py)
105
+ # ============================================================
106
+
107
+ def get_mongodb_uri(self) -> str:
108
+ """
109
+ Get MongoDB URI with environment variable override
110
+
111
+ Purpose: Retrieve the MongoDB connection URI, allowing for
112
+ overrides via environment variables for flexibility across
113
+ different deployment environments.
114
+ """
115
+ return os.getenv("MONGODB_URI") or os.getenv("MONGODB_URL") or self.MONGODB_URI
116
+
117
+ def get_database_name(self) -> str:
118
+ """
119
+ Get Database Name with environment variable override
120
+ """
121
+ return os.getenv("DATABASE_NAME", self.DATABASE_NAME)
122
+
123
+ def get_mongodb_connection_config(self) -> Dict[str, Any]:
124
+ """
125
+ Get MongoDB connection configuration options
126
+
127
+ Purpose: Provide a dictionary of MongoDB connection options
128
+ to ensure consistent and optimized database connectivity
129
+ """
130
+ return {
131
+ "connectTimeoutMS": self.MONGODB_CONNECT_TIMEOUT,
132
+ "serverSelectionTimeoutMS": self.MONGODB_SERVER_SELECTION_TIMEOUT,
133
+ "socketTimeoutMS": self.MONGODB_SOCKET_TIMEOUT,
134
+ "maxPoolSize": self.MONGODB_MAX_POOL_SIZE,
135
+ "retryWrites": self.MONGODB_RETRY_WRITES,
136
+ "retryReads": self.MONGODB_RETRY_READS,
137
+ "w": "majority",
138
+ "authSource": "admin"
139
+ }
140
+
141
+ def get_groq_api_key(self) -> str:
142
+ """
143
+ Get Groq API key from environment variables
144
+
145
+ Purpose: Retrieve the Groq API key for accessing Groq's
146
+ language models, ensuring secure and flexible configuration
147
+ """
148
+ return os.getenv("GROQ_API_KEY", "")
149
+
150
+ def get_gemini_api_key(self) -> str:
151
+ """
152
+ Get Gemini API key from environment variables
153
+
154
+ Purpose: Retrieve the Gemini API key for accessing Gemini
155
+ language models, ensuring secure and flexible configuration
156
+ """
157
+ return os.getenv("GEMINI_API_KEY", "")
158
+
159
+ def get_openai_api_key(self) -> str:
160
+ """
161
+ Get OpenAI API key from environment variables
162
+
163
+ Purpose: Retrieve the OpenAI API key for accessing OpenAI
164
+ language models, ensuring secure and flexible configuration
165
+ """
166
+ return os.getenv("OPENAI_API_KEY", "")
167
+
168
+ def get_cron_schedule(self) -> Dict[str, Any]:
169
+ """
170
+ Get daily cron job schedule configuration
171
+
172
+ Purpose: Provide the scheduling configuration for daily
173
+ cron jobs, allowing customization of execution time and timezone
174
+ """
175
+ return {
176
+ "hour": int(os.getenv("DAILY_CRON_HOUR", self.DAILY_CRON_HOUR)),
177
+ "minute": int(os.getenv("DAILY_CRON_MINUTE", self.DAILY_CRON_MINUTE)),
178
+ "timezone": os.getenv("TIMEZONE", self.TIMEZONE)
179
+ }
180
+
181
+ def get_arxiv_config(self) -> Dict[str, Any]:
182
+ """
183
+ Get ArXiv configuration
184
+
185
+ Purpose: Provide configuration settings for ArXiv API queries
186
+ such as maximum results, search duration, and default categories
187
+ """
188
+ return {
189
+ "max_results_per_query": int(os.getenv("ARXIV_MAX_RESULTS", self.ARXIV_MAX_RESULTS_PER_QUERY)),
190
+ "search_days_back": int(os.getenv("ARXIV_SEARCH_DAYS", self.ARXIV_SEARCH_DAYS_BACK)),
191
+ "default_categories": self.ARXIV_DEFAULT_CATEGORIES
192
+ }
193
+
194
+ def get_analysis_config(self) -> Dict[str, Any]:
195
+ """
196
+ Get analysis configuration
197
+
198
+ Purpose: Provide configuration settings for document analysis
199
+ such as batch size, timeout, LLM model, max tokens, and temperature
200
+ """
201
+ return {
202
+ "batch_size": int(os.getenv("ANALYSIS_BATCH_SIZE", self.ANALYSIS_BATCH_SIZE)),
203
+ "timeout_seconds": int(os.getenv("ANALYSIS_TIMEOUT", self.ANALYSIS_TIMEOUT_SECONDS)),
204
+ "llm_model": os.getenv("LLM_MODEL", self.LLM_MODEL),
205
+ "max_tokens": int(os.getenv("LLM_MAX_TOKENS", self.LLM_MAX_TOKENS)),
206
+ "temperature": float(os.getenv("LLM_TEMPERATURE", self.LLM_TEMPERATURE))
207
+ }
208
+
209
+ def get_embedding_config(self) -> Dict[str, Any]:
210
+ """
211
+ Get embedding model configuration
212
+
213
+ Purpose: Provide configuration settings for embedding models
214
+ including primary and fallback models, dimensions, and caching
215
+ """
216
+ return {
217
+ "primary_model": os.getenv("EMBEDDING_PRIMARY_MODEL", self.EMBEDDING_PRIMARY_MODEL),
218
+ "fallback_1": os.getenv("EMBEDDING_FALLBACK_1", self.EMBEDDING_FALLBACK_1),
219
+ "fallback_2": os.getenv("EMBEDDING_FALLBACK_2", self.EMBEDDING_FALLBACK_2),
220
+ "dimension_default": int(os.getenv("EMBEDDING_DIMENSION_DEFAULT", self.EMBEDDING_DIMENSION_DEFAULT)),
221
+ "enable_gemini": os.getenv("EMBEDDING_ENABLE_GEMINI", str(self.EMBEDDING_ENABLE_GEMINI)).lower() == "true",
222
+ "enable_huggingface": os.getenv("EMBEDDING_ENABLE_HUGGINGFACE", str(self.EMBEDDING_ENABLE_HUGGINGFACE)).lower() == "true",
223
+ "batch_size": int(os.getenv("EMBEDDING_BATCH_SIZE", self.EMBEDDING_BATCH_SIZE)),
224
+ "cache_enabled": os.getenv("EMBEDDING_CACHE_ENABLED", str(self.EMBEDDING_CACHE_ENABLED)).lower() == "true"
225
+ }
226
+
227
+ def get_rag_config(self) -> Dict[str, Any]:
228
+ """
229
+ Get RAG system configuration
230
+
231
+ Purpose: Provide configuration settings for the Retrieval-Augmented Generation (RAG) system including chunk size, overlap, top-k results, TTL, and collection names
232
+ """
233
+ return {
234
+ "chunk_size": int(os.getenv("RAG_CHUNK_SIZE", self.RAG_CHUNK_SIZE)),
235
+ "chunk_overlap": int(os.getenv("RAG_CHUNK_OVERLAP", self.RAG_CHUNK_OVERLAP)),
236
+ "top_k_results": int(os.getenv("RAG_TOP_K_RESULTS", self.RAG_TOP_K_RESULTS)),
237
+ "ttl_hours": int(os.getenv("RAG_TTL_HOURS", self.RAG_TTL_HOURS)),
238
+ "vector_collection": os.getenv("RAG_VECTOR_COLLECTION", self.RAG_VECTOR_COLLECTION),
239
+ "chat_collection": os.getenv("RAG_CHAT_COLLECTION", self.RAG_CHAT_COLLECTION)
240
+ }
241
+
242
+ # ====================
243
+ # LOGGING CONFIGURATION (from logging_config.py)
244
+ # ====================
245
+
246
+ def setup_logging(self):
247
+ """
248
+ Configure structured logging with appropriate verbosity
249
+
250
+ Purpose: Set up structured logging using structlog, allowing
251
+ for different verbosity levels based on debug and quiet modes
252
+ """
253
+
254
+ # Set different log levels based on debug mode
255
+ if self.quiet_mode:
256
+ log_level = logging.ERROR # Only show errors in quiet mode
257
+ init_message_level = "debug"
258
+ elif self.debug_mode:
259
+ log_level = logging.DEBUG
260
+ init_message_level = "info"
261
+ else:
262
+ log_level = logging.WARNING # Only show warnings and errors in normal mode
263
+ init_message_level = "debug"
264
+
265
+ # Configure structlog
266
+ structlog.configure(
267
+ processors=[
268
+ structlog.stdlib.filter_by_level,
269
+ structlog.stdlib.add_logger_name,
270
+ structlog.stdlib.add_log_level,
271
+ structlog.stdlib.PositionalArgumentsFormatter(),
272
+ structlog.processors.StackInfoRenderer(),
273
+ structlog.processors.format_exc_info,
274
+ structlog.processors.UnicodeDecoder(),
275
+ structlog.processors.JSONRenderer() if self.debug_mode else structlog.dev.ConsoleRenderer(),
276
+ ],
277
+ context_class=dict,
278
+ logger_factory=structlog.stdlib.LoggerFactory(),
279
+ wrapper_class=structlog.stdlib.BoundLogger,
280
+ cache_logger_on_first_use=True,
281
+ )
282
+
283
+ # Set root logger level
284
+ logging.basicConfig(level=log_level)
285
+
286
+ # Suppress specific noisy loggers in normal mode
287
+ if not self.debug_mode:
288
+ # Hide HTTP client messages - CRITICAL means no logs shown
289
+ logging.getLogger("httpx").setLevel(logging.CRITICAL)
290
+ logging.getLogger("aiohttp").setLevel(logging.CRITICAL)
291
+
292
+ # Hide model loading messages
293
+ logging.getLogger("transformers").setLevel(logging.CRITICAL)
294
+ logging.getLogger("sentence_transformers").setLevel(logging.CRITICAL)
295
+
296
+ # Hide embedding provider messages
297
+ logging.getLogger("embedding_service").setLevel(logging.CRITICAL)
298
+
299
+ # Hide ALL arionxiv internal logs from CLI users - they see rich UI instead
300
+ logging.getLogger("arionxiv").setLevel(logging.CRITICAL)
301
+
302
+ def get_logger(self, name: str) -> structlog.BoundLogger:
303
+ """Get a configured logger"""
304
+ return logging.getLogger(name)
305
+
306
+ def is_debug(self) -> bool:
307
+ """Check if debug mode is enabled"""
308
+ return self.debug_mode
309
+
310
+ def enable_debug_mode(self):
311
+ """Enable debug mode for verbose logging"""
312
+ self.debug_mode = True
313
+ self.quiet_mode = False
314
+ self.setup_logging()
315
+
316
+ def enable_quiet_mode(self):
317
+ """Enable quiet mode - minimal logging"""
318
+ self.debug_mode = False
319
+ self.quiet_mode = True
320
+ self.setup_logging()
321
+
322
+ # ============================================================
323
+ # ENHANCED CONFIGURATION METHODS
324
+ # ============================================================
325
+
326
+ def get_all_config(self) -> Dict[str, Any]:
327
+ """
328
+ Complete configuration dictionary
329
+
330
+ Purpose: Provide a comprehensive dictionary of all configuration settings
331
+ """
332
+
333
+ return {
334
+ "database": {
335
+ "uri": self.get_mongodb_uri(),
336
+ "name": self.get_database_name(),
337
+ "collections": {
338
+ "users": self.USERS_COLLECTION,
339
+ "papers": self.PAPERS_COLLECTION,
340
+ "daily_analysis": self.DAILY_ANALYSIS_COLLECTION,
341
+ "analysis_results": self.ANALYSIS_RESULTS_COLLECTION,
342
+ "user_papers": self.USER_PAPERS_COLLECTION,
343
+ "cron_jobs": self.CRON_JOBS_COLLECTION
344
+ }
345
+ },
346
+ "arxiv": self.get_arxiv_config(),
347
+ "analysis": self.get_analysis_config(),
348
+ "cron": self.get_cron_schedule(),
349
+ "logging": {
350
+ "debug_mode": self.debug_mode,
351
+ "quiet_mode": self.quiet_mode
352
+ }
353
+ }
354
+
355
+ def update_config(self, config_section: str, updates: Dict[str, Any]):
356
+ """
357
+ Update configuration settings dynamically
358
+ """
359
+
360
+ if config_section == "arxiv":
361
+ if "max_results_per_query" in updates:
362
+ self.ARXIV_MAX_RESULTS_PER_QUERY = updates["max_results_per_query"]
363
+ if "search_days_back" in updates:
364
+ self.ARXIV_SEARCH_DAYS_BACK = updates["search_days_back"]
365
+ if "default_categories" in updates:
366
+ self.ARXIV_DEFAULT_CATEGORIES = updates["default_categories"]
367
+
368
+ elif config_section == "analysis":
369
+ if "batch_size" in updates:
370
+ self.ANALYSIS_BATCH_SIZE = updates["batch_size"]
371
+ if "timeout_seconds" in updates:
372
+ self.ANALYSIS_TIMEOUT_SECONDS = updates["timeout_seconds"]
373
+ if "llm_model" in updates:
374
+ self.LLM_MODEL = updates["llm_model"]
375
+ if "max_tokens" in updates:
376
+ self.LLM_MAX_TOKENS = updates["max_tokens"]
377
+ if "temperature" in updates:
378
+ self.LLM_TEMPERATURE = updates["temperature"]
379
+
380
+ elif config_section == "logging":
381
+ if "debug_mode" in updates:
382
+ self.debug_mode = updates["debug_mode"]
383
+ if self.debug_mode:
384
+ self.enable_debug_mode()
385
+ if "quiet_mode" in updates:
386
+ self.quiet_mode = updates["quiet_mode"]
387
+ if self.quiet_mode:
388
+ self.enable_quiet_mode()
389
+
390
+ def validate_config(self) -> Dict[str, Any]:
391
+ """Validate current configuration and return validation results"""
392
+ validation_results = {
393
+ "valid": True,
394
+ "warnings": [],
395
+ "errors": []
396
+ }
397
+
398
+ # Validate ArXiv configuration
399
+ if self.ARXIV_MAX_RESULTS_PER_QUERY < 1 or self.ARXIV_MAX_RESULTS_PER_QUERY > 1000:
400
+ validation_results["warnings"].append("ARXIV_MAX_RESULTS_PER_QUERY should be between 1 and 1000")
401
+
402
+ if self.ARXIV_SEARCH_DAYS_BACK < 1 or self.ARXIV_SEARCH_DAYS_BACK > 365:
403
+ validation_results["warnings"].append("ARXIV_SEARCH_DAYS_BACK should be between 1 and 365")
404
+
405
+ # Validate analysis configuration
406
+ if self.ANALYSIS_BATCH_SIZE < 1 or self.ANALYSIS_BATCH_SIZE > 50:
407
+ validation_results["warnings"].append("ANALYSIS_BATCH_SIZE should be between 1 and 50")
408
+
409
+ if self.LLM_MAX_TOKENS < 100 or self.LLM_MAX_TOKENS > 8000:
410
+ validation_results["warnings"].append("LLM_MAX_TOKENS should be between 100 and 8000")
411
+
412
+ if self.LLM_TEMPERATURE < 0 or self.LLM_TEMPERATURE > 2:
413
+ validation_results["warnings"].append("LLM_TEMPERATURE should be between 0 and 2")
414
+
415
+ # Check for critical errors
416
+ if not self.get_mongodb_uri():
417
+ validation_results["errors"].append("MongoDB URI is required")
418
+ validation_results["valid"] = False
419
+
420
+ if not self.get_database_name():
421
+ validation_results["errors"].append("Database name is required")
422
+ validation_results["valid"] = False
423
+
424
+ return validation_results
425
+
426
+
427
+ # Global instances
428
+ unified_config_service = UnifiedConfigService()
429
+
430
+ # Backwards compatibility
431
+ config = unified_config_service
432
+ logging_config = unified_config_service
433
+
434
+ # Export commonly used functions
435
+ get_mongodb_uri = unified_config_service.get_mongodb_uri
436
+ get_database_name = unified_config_service.get_database_name
437
+ get_arxiv_config = unified_config_service.get_arxiv_config
438
+ get_analysis_config = unified_config_service.get_analysis_config
439
+ get_embedding_config = unified_config_service.get_embedding_config
440
+ get_rag_config = unified_config_service.get_rag_config
441
+ get_logger = unified_config_service.get_logger
442
+ is_debug_mode = unified_config_service.is_debug
443
+ __all__ = [
444
+ 'UnifiedConfigService',
445
+ 'unified_config_service',
446
+ 'config',
447
+ 'logging_config',
448
+ 'get_mongodb_uri',
449
+ 'get_database_name',
450
+ 'get_arxiv_config',
451
+ 'get_analysis_config',
452
+ 'get_embedding_config',
453
+ 'get_rag_config',
454
+ 'get_logger',
455
+ 'is_debug_mode'
456
+ ]