rnsr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. rnsr/__init__.py +118 -0
  2. rnsr/__main__.py +242 -0
  3. rnsr/agent/__init__.py +218 -0
  4. rnsr/agent/cross_doc_navigator.py +767 -0
  5. rnsr/agent/graph.py +1557 -0
  6. rnsr/agent/llm_cache.py +575 -0
  7. rnsr/agent/navigator_api.py +497 -0
  8. rnsr/agent/provenance.py +772 -0
  9. rnsr/agent/query_clarifier.py +617 -0
  10. rnsr/agent/reasoning_memory.py +736 -0
  11. rnsr/agent/repl_env.py +709 -0
  12. rnsr/agent/rlm_navigator.py +2108 -0
  13. rnsr/agent/self_reflection.py +602 -0
  14. rnsr/agent/variable_store.py +308 -0
  15. rnsr/benchmarks/__init__.py +118 -0
  16. rnsr/benchmarks/comprehensive_benchmark.py +733 -0
  17. rnsr/benchmarks/evaluation_suite.py +1210 -0
  18. rnsr/benchmarks/finance_bench.py +147 -0
  19. rnsr/benchmarks/pdf_merger.py +178 -0
  20. rnsr/benchmarks/performance.py +321 -0
  21. rnsr/benchmarks/quality.py +321 -0
  22. rnsr/benchmarks/runner.py +298 -0
  23. rnsr/benchmarks/standard_benchmarks.py +995 -0
  24. rnsr/client.py +560 -0
  25. rnsr/document_store.py +394 -0
  26. rnsr/exceptions.py +74 -0
  27. rnsr/extraction/__init__.py +172 -0
  28. rnsr/extraction/candidate_extractor.py +357 -0
  29. rnsr/extraction/entity_extractor.py +581 -0
  30. rnsr/extraction/entity_linker.py +825 -0
  31. rnsr/extraction/grounded_extractor.py +722 -0
  32. rnsr/extraction/learned_types.py +599 -0
  33. rnsr/extraction/models.py +232 -0
  34. rnsr/extraction/relationship_extractor.py +600 -0
  35. rnsr/extraction/relationship_patterns.py +511 -0
  36. rnsr/extraction/relationship_validator.py +392 -0
  37. rnsr/extraction/rlm_extractor.py +589 -0
  38. rnsr/extraction/rlm_unified_extractor.py +990 -0
  39. rnsr/extraction/tot_validator.py +610 -0
  40. rnsr/extraction/unified_extractor.py +342 -0
  41. rnsr/indexing/__init__.py +60 -0
  42. rnsr/indexing/knowledge_graph.py +1128 -0
  43. rnsr/indexing/kv_store.py +313 -0
  44. rnsr/indexing/persistence.py +323 -0
  45. rnsr/indexing/semantic_retriever.py +237 -0
  46. rnsr/indexing/semantic_search.py +320 -0
  47. rnsr/indexing/skeleton_index.py +395 -0
  48. rnsr/ingestion/__init__.py +161 -0
  49. rnsr/ingestion/chart_parser.py +569 -0
  50. rnsr/ingestion/document_boundary.py +662 -0
  51. rnsr/ingestion/font_histogram.py +334 -0
  52. rnsr/ingestion/header_classifier.py +595 -0
  53. rnsr/ingestion/hierarchical_cluster.py +515 -0
  54. rnsr/ingestion/layout_detector.py +356 -0
  55. rnsr/ingestion/layout_model.py +379 -0
  56. rnsr/ingestion/ocr_fallback.py +177 -0
  57. rnsr/ingestion/pipeline.py +936 -0
  58. rnsr/ingestion/semantic_fallback.py +417 -0
  59. rnsr/ingestion/table_parser.py +799 -0
  60. rnsr/ingestion/text_builder.py +460 -0
  61. rnsr/ingestion/tree_builder.py +402 -0
  62. rnsr/ingestion/vision_retrieval.py +965 -0
  63. rnsr/ingestion/xy_cut.py +555 -0
  64. rnsr/llm.py +733 -0
  65. rnsr/models.py +167 -0
  66. rnsr/py.typed +2 -0
  67. rnsr-0.1.0.dist-info/METADATA +592 -0
  68. rnsr-0.1.0.dist-info/RECORD +72 -0
  69. rnsr-0.1.0.dist-info/WHEEL +5 -0
  70. rnsr-0.1.0.dist-info/entry_points.txt +2 -0
  71. rnsr-0.1.0.dist-info/licenses/LICENSE +21 -0
  72. rnsr-0.1.0.dist-info/top_level.txt +1 -0
rnsr/llm.py ADDED
@@ -0,0 +1,733 @@
1
+ """
2
+ LLM Configuration - Multi-Provider LLM and Embedding Support
3
+
4
+ Supports:
5
+ - OpenAI (GPT-4, text-embedding-3-small)
6
+ - Anthropic (Claude)
7
+ - Google Gemini (gemini-pro, text-embedding-004)
8
+
9
+ Features:
10
+ - Automatic rate limit handling with exponential backoff
11
+ - Cross-provider fallback on 429/quota errors
12
+ - Provider priority chain for resilience
13
+
14
+ Usage:
15
+ from rnsr.llm import get_llm, get_embed_model, LLMProvider
16
+
17
+ # Auto-detect based on environment variables
18
+ llm = get_llm()
19
+ embed = get_embed_model()
20
+
21
+ # Or specify provider explicitly
22
+ llm = get_llm(provider=LLMProvider.GEMINI)
23
+ embed = get_embed_model(provider=LLMProvider.GEMINI)
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import os
29
+ import time
30
+ from enum import Enum
31
+ from pathlib import Path
32
+ from typing import Any, Callable, TypeVar, Union
33
+
34
+ import structlog
35
+ from tenacity import (
36
+ retry,
37
+ stop_after_attempt,
38
+ wait_exponential,
39
+ retry_if_exception_type,
40
+ before_sleep_log,
41
+ )
42
+
43
+ T = TypeVar("T")
44
+
45
+ # Load .env file if it exists
46
+ try:
47
+ from dotenv import load_dotenv
48
+
49
+ # Look for .env in the project root (parent of rnsr package)
50
+ env_path = Path(__file__).parent.parent / ".env"
51
+ if env_path.exists():
52
+ load_dotenv(env_path)
53
+ except ImportError:
54
+ pass # dotenv not installed, rely on system environment
55
+
56
+ logger = structlog.get_logger(__name__)
57
+
58
+
59
+ class LLMProvider(str, Enum):
60
+ """Supported LLM providers."""
61
+
62
+ OPENAI = "openai"
63
+ ANTHROPIC = "anthropic"
64
+ GEMINI = "gemini"
65
+ AUTO = "auto" # Auto-detect from environment
66
+
67
+
68
+ # Default models per provider (updated February 2026)
69
+ DEFAULT_MODELS = {
70
+ LLMProvider.OPENAI: {
71
+ "llm": "gpt-5-mini", # Fast, affordable - use "gpt-5.2" for latest
72
+ "embed": "text-embedding-3-small",
73
+ },
74
+ LLMProvider.ANTHROPIC: {
75
+ "llm": "claude-sonnet-4-5", # Smart model for agents/coding (alias for claude-sonnet-4-5-20250929)
76
+ "embed": None, # Anthropic doesn't have embeddings, fall back to OpenAI/Gemini
77
+ },
78
+ LLMProvider.GEMINI: {
79
+ "llm": "gemini-2.5-flash", # Stable model. Use "gemini-3-flash-preview" for latest.
80
+ "embed": "text-embedding-004",
81
+ },
82
+ }
83
+
84
+ # Fallback chain when a provider hits rate limits
85
+ PROVIDER_FALLBACK_CHAIN = {
86
+ LLMProvider.GEMINI: [LLMProvider.OPENAI, LLMProvider.ANTHROPIC],
87
+ LLMProvider.OPENAI: [LLMProvider.ANTHROPIC, LLMProvider.GEMINI],
88
+ LLMProvider.ANTHROPIC: [LLMProvider.OPENAI, LLMProvider.GEMINI],
89
+ }
90
+
91
+
92
+ def is_rate_limit_error(error: Exception) -> bool:
93
+ """Check if an error is a rate limit/quota error that should trigger fallback."""
94
+ error_str = str(error).lower()
95
+
96
+ # Check for common rate limit indicators
97
+ rate_limit_indicators = [
98
+ "429",
99
+ "rate limit",
100
+ "rate_limit",
101
+ "quota exceeded",
102
+ "quota_exceeded",
103
+ "resource exhausted",
104
+ "resourceexhausted",
105
+ "too many requests",
106
+ "overloaded",
107
+ ]
108
+
109
+ for indicator in rate_limit_indicators:
110
+ if indicator in error_str:
111
+ return True
112
+
113
+ # Check for specific exception types
114
+ try:
115
+ from google.api_core import exceptions as google_exceptions
116
+ if isinstance(error, (
117
+ google_exceptions.ResourceExhausted,
118
+ google_exceptions.TooManyRequests,
119
+ )):
120
+ return True
121
+ except ImportError:
122
+ pass
123
+
124
+ return False
125
+
126
+
127
+ def get_available_fallback_providers(primary: LLMProvider) -> list[LLMProvider]:
128
+ """Get list of available fallback providers for a given primary provider."""
129
+ fallbacks = []
130
+ for provider in PROVIDER_FALLBACK_CHAIN.get(primary, []):
131
+ if validate_provider(provider):
132
+ fallbacks.append(provider)
133
+ return fallbacks
134
+
135
+
136
+ def detect_provider() -> LLMProvider:
137
+ """
138
+ Auto-detect LLM provider from environment variables.
139
+
140
+ Checks for API keys in order:
141
+ 1. GOOGLE_API_KEY -> Gemini
142
+ 2. ANTHROPIC_API_KEY -> Anthropic
143
+ 3. OPENAI_API_KEY -> OpenAI
144
+
145
+ Returns:
146
+ Detected LLMProvider.
147
+
148
+ Raises:
149
+ ValueError: If no API key is found.
150
+ """
151
+ if os.getenv("GOOGLE_API_KEY"):
152
+ logger.info("provider_detected", provider="gemini")
153
+ return LLMProvider.GEMINI
154
+
155
+ if os.getenv("ANTHROPIC_API_KEY"):
156
+ logger.info("provider_detected", provider="anthropic")
157
+ return LLMProvider.ANTHROPIC
158
+
159
+ if os.getenv("OPENAI_API_KEY"):
160
+ logger.info("provider_detected", provider="openai")
161
+ return LLMProvider.OPENAI
162
+
163
+ raise ValueError(
164
+ "No LLM API key found. Set one of: "
165
+ "GOOGLE_API_KEY, ANTHROPIC_API_KEY, or OPENAI_API_KEY"
166
+ )
167
+
168
+
169
+ def get_llm(
170
+ provider: LLMProvider = LLMProvider.AUTO,
171
+ model: str | None = None,
172
+ enable_fallback: bool = True,
173
+ **kwargs: Any,
174
+ ) -> Any:
175
+ """
176
+ Get an LLM instance for the specified provider.
177
+
178
+ Args:
179
+ provider: LLM provider (openai, anthropic, gemini, or auto).
180
+ model: Model name override. Uses default if not specified.
181
+ enable_fallback: If True, enables cross-provider fallback on rate limits.
182
+ **kwargs: Additional arguments passed to the LLM constructor.
183
+
184
+ Returns:
185
+ LlamaIndex-compatible LLM instance with fallback support.
186
+
187
+ Example:
188
+ llm = get_llm(provider=LLMProvider.GEMINI)
189
+ response = await llm.acomplete("Hello!")
190
+ """
191
+ if provider == LLMProvider.AUTO:
192
+ provider = detect_provider()
193
+
194
+ model = model or DEFAULT_MODELS[provider]["llm"]
195
+
196
+ # Get primary LLM
197
+ primary_llm = _get_raw_llm(provider, model, **kwargs)
198
+
199
+ if not enable_fallback:
200
+ return primary_llm
201
+
202
+ # Build fallback chain
203
+ fallback_providers = get_available_fallback_providers(provider)
204
+ if not fallback_providers:
205
+ logger.debug("no_fallback_providers_available", primary=provider.value)
206
+ return primary_llm
207
+
208
+ logger.debug(
209
+ "llm_with_fallback_configured",
210
+ primary=provider.value,
211
+ fallbacks=[p.value for p in fallback_providers],
212
+ )
213
+
214
+ return ResilientLLMWrapper(
215
+ primary_llm=primary_llm,
216
+ primary_provider=provider,
217
+ fallback_providers=fallback_providers,
218
+ **kwargs,
219
+ )
220
+
221
+
222
+ def _get_raw_llm(provider: LLMProvider, model: str, **kwargs: Any) -> Any:
223
+ """Get a raw LLM instance without fallback wrapper."""
224
+ if provider == LLMProvider.OPENAI:
225
+ return _get_openai_llm(model, **kwargs)
226
+ elif provider == LLMProvider.ANTHROPIC:
227
+ return _get_anthropic_llm(model, **kwargs)
228
+ elif provider == LLMProvider.GEMINI:
229
+ return _get_gemini_llm(model, **kwargs)
230
+ else:
231
+ raise ValueError(f"Unknown provider: {provider}")
232
+
233
+
234
+ def get_embed_model(
235
+ provider: LLMProvider = LLMProvider.AUTO,
236
+ model: str | None = None,
237
+ **kwargs: Any,
238
+ ) -> Any:
239
+ """
240
+ Get an embedding model for the specified provider.
241
+
242
+ Args:
243
+ provider: LLM provider (openai, gemini, or auto).
244
+ model: Model name override. Uses default if not specified.
245
+ **kwargs: Additional arguments passed to the embedding constructor.
246
+
247
+ Returns:
248
+ LlamaIndex-compatible embedding model.
249
+
250
+ Note:
251
+ Anthropic doesn't have embeddings. Falls back to OpenAI or Gemini.
252
+
253
+ Example:
254
+ embed = get_embed_model(provider=LLMProvider.GEMINI)
255
+ vector = embed.get_text_embedding("Hello world")
256
+ """
257
+ if provider == LLMProvider.AUTO:
258
+ provider = detect_provider()
259
+
260
+ # Anthropic doesn't have embeddings, fall back
261
+ if provider == LLMProvider.ANTHROPIC:
262
+ if os.getenv("GOOGLE_API_KEY"):
263
+ provider = LLMProvider.GEMINI
264
+ logger.info("anthropic_no_embeddings", fallback="gemini")
265
+ elif os.getenv("OPENAI_API_KEY"):
266
+ provider = LLMProvider.OPENAI
267
+ logger.info("anthropic_no_embeddings", fallback="openai")
268
+ else:
269
+ raise ValueError(
270
+ "Anthropic doesn't provide embeddings. "
271
+ "Set GOOGLE_API_KEY or OPENAI_API_KEY for embeddings."
272
+ )
273
+
274
+ model = model or DEFAULT_MODELS[provider]["embed"]
275
+
276
+ if provider == LLMProvider.OPENAI:
277
+ return _get_openai_embed(model, **kwargs)
278
+ elif provider == LLMProvider.GEMINI:
279
+ return _get_gemini_embed(model, **kwargs)
280
+ else:
281
+ raise ValueError(f"Unknown embedding provider: {provider}")
282
+
283
+
284
+ # =============================================================================
285
+ # Resilient LLM Wrapper with Cross-Provider Fallback
286
+ # =============================================================================
287
+
288
+
289
+ class ResilientLLMWrapper:
290
+ """
291
+ LLM wrapper that provides cross-provider fallback on rate limits.
292
+
293
+ When the primary provider hits a 429/quota error, automatically switches
294
+ to fallback providers in order until one succeeds.
295
+ """
296
+
297
+ def __init__(
298
+ self,
299
+ primary_llm: Any,
300
+ primary_provider: LLMProvider,
301
+ fallback_providers: list[LLMProvider],
302
+ max_retries: int = 3,
303
+ retry_delay: float = 2.0,
304
+ **kwargs: Any,
305
+ ):
306
+ self.primary_llm = primary_llm
307
+ self.primary_provider = primary_provider
308
+ self.fallback_providers = fallback_providers
309
+ self.max_retries = max_retries
310
+ self.retry_delay = retry_delay
311
+ self.kwargs = kwargs
312
+
313
+ # Lazily initialized fallback LLMs
314
+ self._fallback_llms: dict[LLMProvider, Any] = {}
315
+
316
+ # Track which provider we're currently using
317
+ self._current_provider = primary_provider
318
+ self._rate_limited_until: dict[LLMProvider, float] = {}
319
+
320
+ def _get_fallback_llm(self, provider: LLMProvider) -> Any:
321
+ """Get or create a fallback LLM instance."""
322
+ if provider not in self._fallback_llms:
323
+ model = DEFAULT_MODELS[provider]["llm"]
324
+ self._fallback_llms[provider] = _get_raw_llm(provider, model, **self.kwargs)
325
+ logger.info("fallback_llm_initialized", provider=provider.value, model=model)
326
+ return self._fallback_llms[provider]
327
+
328
+ def _is_rate_limited(self, provider: LLMProvider) -> bool:
329
+ """Check if a provider is currently rate limited."""
330
+ if provider not in self._rate_limited_until:
331
+ return False
332
+ return time.time() < self._rate_limited_until[provider]
333
+
334
+ def _mark_rate_limited(self, provider: LLMProvider, duration: float = 60.0):
335
+ """Mark a provider as rate limited for a duration."""
336
+ self._rate_limited_until[provider] = time.time() + duration
337
+ logger.warning(
338
+ "provider_rate_limited",
339
+ provider=provider.value,
340
+ cooldown_seconds=duration,
341
+ )
342
+
343
+ def _get_available_llms(self) -> list[tuple[LLMProvider, Any]]:
344
+ """Get list of available LLMs in priority order."""
345
+ llms = []
346
+
347
+ # Primary first (if not rate limited)
348
+ if not self._is_rate_limited(self.primary_provider):
349
+ llms.append((self.primary_provider, self.primary_llm))
350
+
351
+ # Then fallbacks
352
+ for provider in self.fallback_providers:
353
+ if not self._is_rate_limited(provider):
354
+ llms.append((provider, self._get_fallback_llm(provider)))
355
+
356
+ # If all are rate limited, try primary anyway (it might work now)
357
+ if not llms:
358
+ llms.append((self.primary_provider, self.primary_llm))
359
+
360
+ return llms
361
+
362
+ def _call_with_fallback(self, method_name: str, *args: Any, **kwargs: Any) -> Any:
363
+ """Call a method with automatic fallback on rate limits."""
364
+ last_error = None
365
+
366
+ for provider, llm in self._get_available_llms():
367
+ for attempt in range(self.max_retries):
368
+ try:
369
+ method = getattr(llm, method_name)
370
+ result = method(*args, **kwargs)
371
+
372
+ # Success - update current provider
373
+ if provider != self._current_provider:
374
+ logger.info(
375
+ "switched_to_fallback_provider",
376
+ from_provider=self._current_provider.value,
377
+ to_provider=provider.value,
378
+ )
379
+ self._current_provider = provider
380
+
381
+ return result
382
+
383
+ except Exception as e:
384
+ last_error = e
385
+
386
+ if is_rate_limit_error(e):
387
+ logger.warning(
388
+ "rate_limit_hit",
389
+ provider=provider.value,
390
+ attempt=attempt + 1,
391
+ error=str(e)[:200],
392
+ )
393
+
394
+ # Mark provider as rate limited and try next
395
+ self._mark_rate_limited(provider, duration=60.0)
396
+ break # Move to next provider
397
+ else:
398
+ # Non-rate-limit error - retry with exponential backoff
399
+ if attempt < self.max_retries - 1:
400
+ delay = self.retry_delay * (2 ** attempt)
401
+ logger.debug(
402
+ "retrying_after_error",
403
+ provider=provider.value,
404
+ attempt=attempt + 1,
405
+ delay=delay,
406
+ error=str(e)[:100],
407
+ )
408
+ time.sleep(delay)
409
+ else:
410
+ # All retries exhausted for this provider
411
+ break
412
+
413
+ # All providers failed
414
+ logger.error(
415
+ "all_providers_failed",
416
+ primary=self.primary_provider.value,
417
+ fallbacks=[p.value for p in self.fallback_providers],
418
+ )
419
+ raise last_error or RuntimeError("All LLM providers failed")
420
+
421
+ def complete(self, prompt: str, **kwargs: Any) -> Any:
422
+ """Complete a prompt with fallback support."""
423
+ return self._call_with_fallback("complete", prompt, **kwargs)
424
+
425
+ def chat(self, messages: Any, **kwargs: Any) -> Any:
426
+ """Chat with fallback support."""
427
+ return self._call_with_fallback("chat", messages, **kwargs)
428
+
429
+ def __getattr__(self, name: str) -> Any:
430
+ """Forward other attributes to the current LLM."""
431
+ return getattr(self.primary_llm, name)
432
+
433
+
434
+ # =============================================================================
435
+ # Provider-Specific Implementations
436
+ # =============================================================================
437
+
438
+
439
+ def _get_openai_llm(model: str, **kwargs: Any) -> Any:
440
+ """Get OpenAI LLM instance."""
441
+ try:
442
+ from llama_index.llms.openai import OpenAI
443
+ except ImportError:
444
+ raise ImportError(
445
+ "OpenAI LLM not installed. "
446
+ "Install with: pip install llama-index-llms-openai"
447
+ )
448
+
449
+ logger.debug("initializing_llm", provider="openai", model=model)
450
+ return OpenAI(model=model, **kwargs)
451
+
452
+
453
+ def _get_anthropic_llm(model: str, **kwargs: Any) -> Any:
454
+ """Get Anthropic LLM instance."""
455
+ try:
456
+ from llama_index.llms.anthropic import Anthropic
457
+ except ImportError:
458
+ raise ImportError(
459
+ "Anthropic LLM not installed. "
460
+ "Install with: pip install llama-index-llms-anthropic"
461
+ )
462
+
463
+ logger.debug("initializing_llm", provider="anthropic", model=model)
464
+ return Anthropic(model=model, **kwargs)
465
+
466
+
467
+ def _get_gemini_llm(model: str, **kwargs: Any) -> Any:
468
+ """Get Google Gemini LLM instance using the new google-genai SDK."""
469
+ logger.debug("initializing_llm", provider="gemini", model=model)
470
+
471
+ # Try the new google-genai SDK first (recommended)
472
+ try:
473
+ from google import genai
474
+ from google.genai import types
475
+
476
+ # Define exceptions to retry on
477
+ # If google.api_core is available (usually is with google SDKs)
478
+ try:
479
+ from google.api_core import exceptions as google_exceptions
480
+ RETRY_EXCEPTIONS = (
481
+ google_exceptions.ServiceUnavailable,
482
+ google_exceptions.TooManyRequests,
483
+ google_exceptions.InternalServerError,
484
+ google_exceptions.ResourceExhausted,
485
+ google_exceptions.Aborted,
486
+ ConnectionError,
487
+ ConnectionRefusedError,
488
+ TimeoutError,
489
+ OSError, # Covers [Errno 61] and other socket errors
490
+ )
491
+ except ImportError:
492
+ # Fallback: Retry on any Exception that mentions overload/503/429
493
+ # But simpler to just retry on Exception if we can't import specific ones
494
+ RETRY_EXCEPTIONS = (Exception,)
495
+
496
+ api_key = os.getenv("GOOGLE_API_KEY")
497
+ if not api_key:
498
+ raise ValueError("GOOGLE_API_KEY environment variable not set")
499
+
500
+ # Create a wrapper that matches LlamaIndex LLM interface
501
+ class GeminiWrapper:
502
+ """Wrapper for google-genai to match LlamaIndex LLM interface."""
503
+
504
+ def __init__(self, model_name: str, api_key: str):
505
+ self.client = genai.Client(api_key=api_key)
506
+ self.model_name = model_name
507
+ self.fallback_model = "gemini-3-flash-preview"
508
+
509
+ @retry(
510
+ stop=stop_after_attempt(5),
511
+ wait=wait_exponential(multiplier=1, min=2, max=30),
512
+ retry=retry_if_exception_type(RETRY_EXCEPTIONS),
513
+ )
514
+ def complete(self, prompt: str, **kw: Any) -> str:
515
+ try:
516
+ # Try primary model first
517
+ response = self.client.models.generate_content(
518
+ model=self.model_name,
519
+ contents=prompt,
520
+ )
521
+ return response.text or ""
522
+ except RETRY_EXCEPTIONS as e:
523
+ # Fallback to preview model on overload/exhaustion
524
+ logger.warning(
525
+ "primary_llm_overloaded_using_fallback",
526
+ primary=self.model_name,
527
+ fallback=self.fallback_model,
528
+ error=str(e)
529
+ )
530
+ response = self.client.models.generate_content(
531
+ model=self.fallback_model,
532
+ contents=prompt,
533
+ )
534
+ return response.text or ""
535
+
536
+ @retry(
537
+ stop=stop_after_attempt(5),
538
+ wait=wait_exponential(multiplier=1, min=2, max=30),
539
+ retry=retry_if_exception_type(RETRY_EXCEPTIONS),
540
+ )
541
+ def chat(self, messages: list, **kw: Any) -> str:
542
+ # Convert to genai format
543
+ contents = []
544
+ for msg in messages:
545
+ role = "user" if msg.get("role") == "user" else "model"
546
+ contents.append({"role": role, "parts": [{"text": msg.get("content", "")}]})
547
+
548
+ try:
549
+ # Try primary model first
550
+ response = self.client.models.generate_content(
551
+ model=self.model_name,
552
+ contents=contents,
553
+ )
554
+ return response.text or ""
555
+ except RETRY_EXCEPTIONS as e:
556
+ # Fallback to preview model
557
+ logger.warning(
558
+ "primary_llm_overloaded_using_fallback",
559
+ primary=self.model_name,
560
+ fallback=self.fallback_model,
561
+ error=str(e)
562
+ )
563
+ response = self.client.models.generate_content(
564
+ model=self.fallback_model,
565
+ contents=contents,
566
+ )
567
+ return response.text or ""
568
+
569
+ return GeminiWrapper(model, api_key)
570
+
571
+ except ImportError:
572
+ # Fall back to llama-index-llms-gemini (deprecated)
573
+ try:
574
+ from llama_index.llms.gemini import Gemini
575
+
576
+ # Define exceptions for legacy/llama-index path
577
+ try:
578
+ from google.api_core import exceptions as google_exceptions
579
+ RETRY_EXCEPTIONS_LEGACY = (
580
+ google_exceptions.ServiceUnavailable,
581
+ google_exceptions.TooManyRequests,
582
+ google_exceptions.InternalServerError,
583
+ google_exceptions.ResourceExhausted,
584
+ google_exceptions.Aborted,
585
+ google_exceptions.DeadlineExceeded,
586
+ ConnectionError,
587
+ ConnectionRefusedError,
588
+ TimeoutError,
589
+ OSError,
590
+ )
591
+ except ImportError:
592
+ RETRY_EXCEPTIONS_LEGACY = (Exception,)
593
+
594
+ class LlamaIndexGeminiWrapper:
595
+ """Wrapper for llama-index Gemini to provide fallback logic."""
596
+
597
+ def __init__(self, model_name: str, **kwargs):
598
+ self.model_name = model_name
599
+ self.primary = Gemini(model=model_name, **kwargs)
600
+ # Fallback to older stable model or preview
601
+ self.fallback_model = "models/gemini-3-flash-preview"
602
+ self.fallback = Gemini(model=self.fallback_model, **kwargs)
603
+
604
+ @retry(
605
+ stop=stop_after_attempt(5),
606
+ wait=wait_exponential(multiplier=1, min=2, max=30),
607
+ retry=retry_if_exception_type(RETRY_EXCEPTIONS_LEGACY),
608
+ )
609
+ def complete(self, prompt: str, **kw: Any) -> Any:
610
+ try:
611
+ return self.primary.complete(prompt, **kw)
612
+ except RETRY_EXCEPTIONS_LEGACY as e:
613
+ logger.warning(
614
+ "primary_llm_overloaded_using_fallback",
615
+ primary=self.model_name,
616
+ fallback=self.fallback_model,
617
+ error=str(e)
618
+ )
619
+ return self.fallback.complete(prompt, **kw)
620
+
621
+ @retry(
622
+ stop=stop_after_attempt(5),
623
+ wait=wait_exponential(multiplier=1, min=2, max=30),
624
+ retry=retry_if_exception_type(RETRY_EXCEPTIONS_LEGACY),
625
+ )
626
+ def chat(self, messages: Any, **kw: Any) -> Any:
627
+ try:
628
+ return self.primary.chat(messages, **kw)
629
+ except RETRY_EXCEPTIONS_LEGACY as e:
630
+ logger.warning(
631
+ "primary_llm_overloaded_using_fallback",
632
+ primary=self.model_name,
633
+ fallback=self.fallback_model,
634
+ error=str(e)
635
+ )
636
+ return self.fallback.chat(messages, **kw)
637
+
638
+ def __getattr__(self, name: str) -> Any:
639
+ return getattr(self.primary, name)
640
+
641
+ return LlamaIndexGeminiWrapper(model, **kwargs)
642
+ except ImportError:
643
+ raise ImportError(
644
+ "Neither google-genai nor llama-index-llms-gemini installed. "
645
+ "Install with: pip install google-genai"
646
+ )
647
+
648
+
649
+ def _get_openai_embed(model: str, **kwargs: Any) -> Any:
650
+ """Get OpenAI embedding model."""
651
+ try:
652
+ from llama_index.embeddings.openai import OpenAIEmbedding
653
+ except ImportError:
654
+ raise ImportError(
655
+ "OpenAI embeddings not installed. "
656
+ "Install with: pip install llama-index-embeddings-openai"
657
+ )
658
+
659
+ logger.debug("initializing_embed", provider="openai", model=model)
660
+ return OpenAIEmbedding(model=model, **kwargs)
661
+
662
+
663
+ def _get_gemini_embed(model: str, **kwargs: Any) -> Any:
664
+ """Get Google Gemini embedding model."""
665
+ try:
666
+ from llama_index.embeddings.gemini import GeminiEmbedding
667
+ except ImportError:
668
+ raise ImportError(
669
+ "Gemini embeddings not installed. "
670
+ "Install with: pip install llama-index-embeddings-gemini"
671
+ )
672
+
673
+ logger.debug("initializing_embed", provider="gemini", model=model)
674
+ return GeminiEmbedding(model_name=f"models/{model}", **kwargs)
675
+
676
+
677
+ # =============================================================================
678
+ # Convenience Functions
679
+ # =============================================================================
680
+
681
+
682
+ def get_provider_info() -> dict[str, Any]:
683
+ """
684
+ Get information about available providers.
685
+
686
+ Returns:
687
+ Dictionary with provider availability and configuration.
688
+ """
689
+ info = {
690
+ "available": [],
691
+ "default_provider": None,
692
+ "models": DEFAULT_MODELS,
693
+ }
694
+
695
+ if os.getenv("OPENAI_API_KEY"):
696
+ info["available"].append("openai")
697
+ if os.getenv("ANTHROPIC_API_KEY"):
698
+ info["available"].append("anthropic")
699
+ if os.getenv("GOOGLE_API_KEY"):
700
+ info["available"].append("gemini")
701
+
702
+ if info["available"]:
703
+ try:
704
+ info["default_provider"] = detect_provider().value
705
+ except ValueError:
706
+ pass
707
+
708
+ return info
709
+
710
+
711
+ def validate_provider(provider: LLMProvider) -> bool:
712
+ """
713
+ Check if a provider is available (has API key set).
714
+
715
+ Args:
716
+ provider: Provider to check.
717
+
718
+ Returns:
719
+ True if provider is available.
720
+ """
721
+ if provider == LLMProvider.OPENAI:
722
+ return bool(os.getenv("OPENAI_API_KEY"))
723
+ elif provider == LLMProvider.ANTHROPIC:
724
+ return bool(os.getenv("ANTHROPIC_API_KEY"))
725
+ elif provider == LLMProvider.GEMINI:
726
+ return bool(os.getenv("GOOGLE_API_KEY"))
727
+ elif provider == LLMProvider.AUTO:
728
+ return any([
729
+ os.getenv("OPENAI_API_KEY"),
730
+ os.getenv("ANTHROPIC_API_KEY"),
731
+ os.getenv("GOOGLE_API_KEY"),
732
+ ])
733
+ return False