rnsr 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rnsr/__init__.py +118 -0
- rnsr/__main__.py +242 -0
- rnsr/agent/__init__.py +218 -0
- rnsr/agent/cross_doc_navigator.py +767 -0
- rnsr/agent/graph.py +1557 -0
- rnsr/agent/llm_cache.py +575 -0
- rnsr/agent/navigator_api.py +497 -0
- rnsr/agent/provenance.py +772 -0
- rnsr/agent/query_clarifier.py +617 -0
- rnsr/agent/reasoning_memory.py +736 -0
- rnsr/agent/repl_env.py +709 -0
- rnsr/agent/rlm_navigator.py +2108 -0
- rnsr/agent/self_reflection.py +602 -0
- rnsr/agent/variable_store.py +308 -0
- rnsr/benchmarks/__init__.py +118 -0
- rnsr/benchmarks/comprehensive_benchmark.py +733 -0
- rnsr/benchmarks/evaluation_suite.py +1210 -0
- rnsr/benchmarks/finance_bench.py +147 -0
- rnsr/benchmarks/pdf_merger.py +178 -0
- rnsr/benchmarks/performance.py +321 -0
- rnsr/benchmarks/quality.py +321 -0
- rnsr/benchmarks/runner.py +298 -0
- rnsr/benchmarks/standard_benchmarks.py +995 -0
- rnsr/client.py +560 -0
- rnsr/document_store.py +394 -0
- rnsr/exceptions.py +74 -0
- rnsr/extraction/__init__.py +172 -0
- rnsr/extraction/candidate_extractor.py +357 -0
- rnsr/extraction/entity_extractor.py +581 -0
- rnsr/extraction/entity_linker.py +825 -0
- rnsr/extraction/grounded_extractor.py +722 -0
- rnsr/extraction/learned_types.py +599 -0
- rnsr/extraction/models.py +232 -0
- rnsr/extraction/relationship_extractor.py +600 -0
- rnsr/extraction/relationship_patterns.py +511 -0
- rnsr/extraction/relationship_validator.py +392 -0
- rnsr/extraction/rlm_extractor.py +589 -0
- rnsr/extraction/rlm_unified_extractor.py +990 -0
- rnsr/extraction/tot_validator.py +610 -0
- rnsr/extraction/unified_extractor.py +342 -0
- rnsr/indexing/__init__.py +60 -0
- rnsr/indexing/knowledge_graph.py +1128 -0
- rnsr/indexing/kv_store.py +313 -0
- rnsr/indexing/persistence.py +323 -0
- rnsr/indexing/semantic_retriever.py +237 -0
- rnsr/indexing/semantic_search.py +320 -0
- rnsr/indexing/skeleton_index.py +395 -0
- rnsr/ingestion/__init__.py +161 -0
- rnsr/ingestion/chart_parser.py +569 -0
- rnsr/ingestion/document_boundary.py +662 -0
- rnsr/ingestion/font_histogram.py +334 -0
- rnsr/ingestion/header_classifier.py +595 -0
- rnsr/ingestion/hierarchical_cluster.py +515 -0
- rnsr/ingestion/layout_detector.py +356 -0
- rnsr/ingestion/layout_model.py +379 -0
- rnsr/ingestion/ocr_fallback.py +177 -0
- rnsr/ingestion/pipeline.py +936 -0
- rnsr/ingestion/semantic_fallback.py +417 -0
- rnsr/ingestion/table_parser.py +799 -0
- rnsr/ingestion/text_builder.py +460 -0
- rnsr/ingestion/tree_builder.py +402 -0
- rnsr/ingestion/vision_retrieval.py +965 -0
- rnsr/ingestion/xy_cut.py +555 -0
- rnsr/llm.py +733 -0
- rnsr/models.py +167 -0
- rnsr/py.typed +2 -0
- rnsr-0.1.0.dist-info/METADATA +592 -0
- rnsr-0.1.0.dist-info/RECORD +72 -0
- rnsr-0.1.0.dist-info/WHEEL +5 -0
- rnsr-0.1.0.dist-info/entry_points.txt +2 -0
- rnsr-0.1.0.dist-info/licenses/LICENSE +21 -0
- rnsr-0.1.0.dist-info/top_level.txt +1 -0
rnsr/llm.py
ADDED
|
@@ -0,0 +1,733 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM Configuration - Multi-Provider LLM and Embedding Support
|
|
3
|
+
|
|
4
|
+
Supports:
|
|
5
|
+
- OpenAI (GPT-4, text-embedding-3-small)
|
|
6
|
+
- Anthropic (Claude)
|
|
7
|
+
- Google Gemini (gemini-pro, text-embedding-004)
|
|
8
|
+
|
|
9
|
+
Features:
|
|
10
|
+
- Automatic rate limit handling with exponential backoff
|
|
11
|
+
- Cross-provider fallback on 429/quota errors
|
|
12
|
+
- Provider priority chain for resilience
|
|
13
|
+
|
|
14
|
+
Usage:
|
|
15
|
+
from rnsr.llm import get_llm, get_embed_model, LLMProvider
|
|
16
|
+
|
|
17
|
+
# Auto-detect based on environment variables
|
|
18
|
+
llm = get_llm()
|
|
19
|
+
embed = get_embed_model()
|
|
20
|
+
|
|
21
|
+
# Or specify provider explicitly
|
|
22
|
+
llm = get_llm(provider=LLMProvider.GEMINI)
|
|
23
|
+
embed = get_embed_model(provider=LLMProvider.GEMINI)
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import os
|
|
29
|
+
import time
|
|
30
|
+
from enum import Enum
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
from typing import Any, Callable, TypeVar, Union
|
|
33
|
+
|
|
34
|
+
import structlog
|
|
35
|
+
from tenacity import (
|
|
36
|
+
retry,
|
|
37
|
+
stop_after_attempt,
|
|
38
|
+
wait_exponential,
|
|
39
|
+
retry_if_exception_type,
|
|
40
|
+
before_sleep_log,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
T = TypeVar("T")
|
|
44
|
+
|
|
45
|
+
# Load .env file if it exists
|
|
46
|
+
try:
|
|
47
|
+
from dotenv import load_dotenv
|
|
48
|
+
|
|
49
|
+
# Look for .env in the project root (parent of rnsr package)
|
|
50
|
+
env_path = Path(__file__).parent.parent / ".env"
|
|
51
|
+
if env_path.exists():
|
|
52
|
+
load_dotenv(env_path)
|
|
53
|
+
except ImportError:
|
|
54
|
+
pass # dotenv not installed, rely on system environment
|
|
55
|
+
|
|
56
|
+
logger = structlog.get_logger(__name__)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class LLMProvider(str, Enum):
|
|
60
|
+
"""Supported LLM providers."""
|
|
61
|
+
|
|
62
|
+
OPENAI = "openai"
|
|
63
|
+
ANTHROPIC = "anthropic"
|
|
64
|
+
GEMINI = "gemini"
|
|
65
|
+
AUTO = "auto" # Auto-detect from environment
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# Default models per provider (updated February 2026)
|
|
69
|
+
DEFAULT_MODELS = {
|
|
70
|
+
LLMProvider.OPENAI: {
|
|
71
|
+
"llm": "gpt-5-mini", # Fast, affordable - use "gpt-5.2" for latest
|
|
72
|
+
"embed": "text-embedding-3-small",
|
|
73
|
+
},
|
|
74
|
+
LLMProvider.ANTHROPIC: {
|
|
75
|
+
"llm": "claude-sonnet-4-5", # Smart model for agents/coding (alias for claude-sonnet-4-5-20250929)
|
|
76
|
+
"embed": None, # Anthropic doesn't have embeddings, fall back to OpenAI/Gemini
|
|
77
|
+
},
|
|
78
|
+
LLMProvider.GEMINI: {
|
|
79
|
+
"llm": "gemini-2.5-flash", # Stable model. Use "gemini-3-flash-preview" for latest.
|
|
80
|
+
"embed": "text-embedding-004",
|
|
81
|
+
},
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
# Fallback chain when a provider hits rate limits
|
|
85
|
+
PROVIDER_FALLBACK_CHAIN = {
|
|
86
|
+
LLMProvider.GEMINI: [LLMProvider.OPENAI, LLMProvider.ANTHROPIC],
|
|
87
|
+
LLMProvider.OPENAI: [LLMProvider.ANTHROPIC, LLMProvider.GEMINI],
|
|
88
|
+
LLMProvider.ANTHROPIC: [LLMProvider.OPENAI, LLMProvider.GEMINI],
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def is_rate_limit_error(error: Exception) -> bool:
|
|
93
|
+
"""Check if an error is a rate limit/quota error that should trigger fallback."""
|
|
94
|
+
error_str = str(error).lower()
|
|
95
|
+
|
|
96
|
+
# Check for common rate limit indicators
|
|
97
|
+
rate_limit_indicators = [
|
|
98
|
+
"429",
|
|
99
|
+
"rate limit",
|
|
100
|
+
"rate_limit",
|
|
101
|
+
"quota exceeded",
|
|
102
|
+
"quota_exceeded",
|
|
103
|
+
"resource exhausted",
|
|
104
|
+
"resourceexhausted",
|
|
105
|
+
"too many requests",
|
|
106
|
+
"overloaded",
|
|
107
|
+
]
|
|
108
|
+
|
|
109
|
+
for indicator in rate_limit_indicators:
|
|
110
|
+
if indicator in error_str:
|
|
111
|
+
return True
|
|
112
|
+
|
|
113
|
+
# Check for specific exception types
|
|
114
|
+
try:
|
|
115
|
+
from google.api_core import exceptions as google_exceptions
|
|
116
|
+
if isinstance(error, (
|
|
117
|
+
google_exceptions.ResourceExhausted,
|
|
118
|
+
google_exceptions.TooManyRequests,
|
|
119
|
+
)):
|
|
120
|
+
return True
|
|
121
|
+
except ImportError:
|
|
122
|
+
pass
|
|
123
|
+
|
|
124
|
+
return False
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def get_available_fallback_providers(primary: LLMProvider) -> list[LLMProvider]:
|
|
128
|
+
"""Get list of available fallback providers for a given primary provider."""
|
|
129
|
+
fallbacks = []
|
|
130
|
+
for provider in PROVIDER_FALLBACK_CHAIN.get(primary, []):
|
|
131
|
+
if validate_provider(provider):
|
|
132
|
+
fallbacks.append(provider)
|
|
133
|
+
return fallbacks
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def detect_provider() -> LLMProvider:
|
|
137
|
+
"""
|
|
138
|
+
Auto-detect LLM provider from environment variables.
|
|
139
|
+
|
|
140
|
+
Checks for API keys in order:
|
|
141
|
+
1. GOOGLE_API_KEY -> Gemini
|
|
142
|
+
2. ANTHROPIC_API_KEY -> Anthropic
|
|
143
|
+
3. OPENAI_API_KEY -> OpenAI
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
Detected LLMProvider.
|
|
147
|
+
|
|
148
|
+
Raises:
|
|
149
|
+
ValueError: If no API key is found.
|
|
150
|
+
"""
|
|
151
|
+
if os.getenv("GOOGLE_API_KEY"):
|
|
152
|
+
logger.info("provider_detected", provider="gemini")
|
|
153
|
+
return LLMProvider.GEMINI
|
|
154
|
+
|
|
155
|
+
if os.getenv("ANTHROPIC_API_KEY"):
|
|
156
|
+
logger.info("provider_detected", provider="anthropic")
|
|
157
|
+
return LLMProvider.ANTHROPIC
|
|
158
|
+
|
|
159
|
+
if os.getenv("OPENAI_API_KEY"):
|
|
160
|
+
logger.info("provider_detected", provider="openai")
|
|
161
|
+
return LLMProvider.OPENAI
|
|
162
|
+
|
|
163
|
+
raise ValueError(
|
|
164
|
+
"No LLM API key found. Set one of: "
|
|
165
|
+
"GOOGLE_API_KEY, ANTHROPIC_API_KEY, or OPENAI_API_KEY"
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def get_llm(
|
|
170
|
+
provider: LLMProvider = LLMProvider.AUTO,
|
|
171
|
+
model: str | None = None,
|
|
172
|
+
enable_fallback: bool = True,
|
|
173
|
+
**kwargs: Any,
|
|
174
|
+
) -> Any:
|
|
175
|
+
"""
|
|
176
|
+
Get an LLM instance for the specified provider.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
provider: LLM provider (openai, anthropic, gemini, or auto).
|
|
180
|
+
model: Model name override. Uses default if not specified.
|
|
181
|
+
enable_fallback: If True, enables cross-provider fallback on rate limits.
|
|
182
|
+
**kwargs: Additional arguments passed to the LLM constructor.
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
LlamaIndex-compatible LLM instance with fallback support.
|
|
186
|
+
|
|
187
|
+
Example:
|
|
188
|
+
llm = get_llm(provider=LLMProvider.GEMINI)
|
|
189
|
+
response = await llm.acomplete("Hello!")
|
|
190
|
+
"""
|
|
191
|
+
if provider == LLMProvider.AUTO:
|
|
192
|
+
provider = detect_provider()
|
|
193
|
+
|
|
194
|
+
model = model or DEFAULT_MODELS[provider]["llm"]
|
|
195
|
+
|
|
196
|
+
# Get primary LLM
|
|
197
|
+
primary_llm = _get_raw_llm(provider, model, **kwargs)
|
|
198
|
+
|
|
199
|
+
if not enable_fallback:
|
|
200
|
+
return primary_llm
|
|
201
|
+
|
|
202
|
+
# Build fallback chain
|
|
203
|
+
fallback_providers = get_available_fallback_providers(provider)
|
|
204
|
+
if not fallback_providers:
|
|
205
|
+
logger.debug("no_fallback_providers_available", primary=provider.value)
|
|
206
|
+
return primary_llm
|
|
207
|
+
|
|
208
|
+
logger.debug(
|
|
209
|
+
"llm_with_fallback_configured",
|
|
210
|
+
primary=provider.value,
|
|
211
|
+
fallbacks=[p.value for p in fallback_providers],
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
return ResilientLLMWrapper(
|
|
215
|
+
primary_llm=primary_llm,
|
|
216
|
+
primary_provider=provider,
|
|
217
|
+
fallback_providers=fallback_providers,
|
|
218
|
+
**kwargs,
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def _get_raw_llm(provider: LLMProvider, model: str, **kwargs: Any) -> Any:
|
|
223
|
+
"""Get a raw LLM instance without fallback wrapper."""
|
|
224
|
+
if provider == LLMProvider.OPENAI:
|
|
225
|
+
return _get_openai_llm(model, **kwargs)
|
|
226
|
+
elif provider == LLMProvider.ANTHROPIC:
|
|
227
|
+
return _get_anthropic_llm(model, **kwargs)
|
|
228
|
+
elif provider == LLMProvider.GEMINI:
|
|
229
|
+
return _get_gemini_llm(model, **kwargs)
|
|
230
|
+
else:
|
|
231
|
+
raise ValueError(f"Unknown provider: {provider}")
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def get_embed_model(
|
|
235
|
+
provider: LLMProvider = LLMProvider.AUTO,
|
|
236
|
+
model: str | None = None,
|
|
237
|
+
**kwargs: Any,
|
|
238
|
+
) -> Any:
|
|
239
|
+
"""
|
|
240
|
+
Get an embedding model for the specified provider.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
provider: LLM provider (openai, gemini, or auto).
|
|
244
|
+
model: Model name override. Uses default if not specified.
|
|
245
|
+
**kwargs: Additional arguments passed to the embedding constructor.
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
LlamaIndex-compatible embedding model.
|
|
249
|
+
|
|
250
|
+
Note:
|
|
251
|
+
Anthropic doesn't have embeddings. Falls back to OpenAI or Gemini.
|
|
252
|
+
|
|
253
|
+
Example:
|
|
254
|
+
embed = get_embed_model(provider=LLMProvider.GEMINI)
|
|
255
|
+
vector = embed.get_text_embedding("Hello world")
|
|
256
|
+
"""
|
|
257
|
+
if provider == LLMProvider.AUTO:
|
|
258
|
+
provider = detect_provider()
|
|
259
|
+
|
|
260
|
+
# Anthropic doesn't have embeddings, fall back
|
|
261
|
+
if provider == LLMProvider.ANTHROPIC:
|
|
262
|
+
if os.getenv("GOOGLE_API_KEY"):
|
|
263
|
+
provider = LLMProvider.GEMINI
|
|
264
|
+
logger.info("anthropic_no_embeddings", fallback="gemini")
|
|
265
|
+
elif os.getenv("OPENAI_API_KEY"):
|
|
266
|
+
provider = LLMProvider.OPENAI
|
|
267
|
+
logger.info("anthropic_no_embeddings", fallback="openai")
|
|
268
|
+
else:
|
|
269
|
+
raise ValueError(
|
|
270
|
+
"Anthropic doesn't provide embeddings. "
|
|
271
|
+
"Set GOOGLE_API_KEY or OPENAI_API_KEY for embeddings."
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
model = model or DEFAULT_MODELS[provider]["embed"]
|
|
275
|
+
|
|
276
|
+
if provider == LLMProvider.OPENAI:
|
|
277
|
+
return _get_openai_embed(model, **kwargs)
|
|
278
|
+
elif provider == LLMProvider.GEMINI:
|
|
279
|
+
return _get_gemini_embed(model, **kwargs)
|
|
280
|
+
else:
|
|
281
|
+
raise ValueError(f"Unknown embedding provider: {provider}")
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
# =============================================================================
|
|
285
|
+
# Resilient LLM Wrapper with Cross-Provider Fallback
|
|
286
|
+
# =============================================================================
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
class ResilientLLMWrapper:
|
|
290
|
+
"""
|
|
291
|
+
LLM wrapper that provides cross-provider fallback on rate limits.
|
|
292
|
+
|
|
293
|
+
When the primary provider hits a 429/quota error, automatically switches
|
|
294
|
+
to fallback providers in order until one succeeds.
|
|
295
|
+
"""
|
|
296
|
+
|
|
297
|
+
def __init__(
|
|
298
|
+
self,
|
|
299
|
+
primary_llm: Any,
|
|
300
|
+
primary_provider: LLMProvider,
|
|
301
|
+
fallback_providers: list[LLMProvider],
|
|
302
|
+
max_retries: int = 3,
|
|
303
|
+
retry_delay: float = 2.0,
|
|
304
|
+
**kwargs: Any,
|
|
305
|
+
):
|
|
306
|
+
self.primary_llm = primary_llm
|
|
307
|
+
self.primary_provider = primary_provider
|
|
308
|
+
self.fallback_providers = fallback_providers
|
|
309
|
+
self.max_retries = max_retries
|
|
310
|
+
self.retry_delay = retry_delay
|
|
311
|
+
self.kwargs = kwargs
|
|
312
|
+
|
|
313
|
+
# Lazily initialized fallback LLMs
|
|
314
|
+
self._fallback_llms: dict[LLMProvider, Any] = {}
|
|
315
|
+
|
|
316
|
+
# Track which provider we're currently using
|
|
317
|
+
self._current_provider = primary_provider
|
|
318
|
+
self._rate_limited_until: dict[LLMProvider, float] = {}
|
|
319
|
+
|
|
320
|
+
def _get_fallback_llm(self, provider: LLMProvider) -> Any:
|
|
321
|
+
"""Get or create a fallback LLM instance."""
|
|
322
|
+
if provider not in self._fallback_llms:
|
|
323
|
+
model = DEFAULT_MODELS[provider]["llm"]
|
|
324
|
+
self._fallback_llms[provider] = _get_raw_llm(provider, model, **self.kwargs)
|
|
325
|
+
logger.info("fallback_llm_initialized", provider=provider.value, model=model)
|
|
326
|
+
return self._fallback_llms[provider]
|
|
327
|
+
|
|
328
|
+
def _is_rate_limited(self, provider: LLMProvider) -> bool:
|
|
329
|
+
"""Check if a provider is currently rate limited."""
|
|
330
|
+
if provider not in self._rate_limited_until:
|
|
331
|
+
return False
|
|
332
|
+
return time.time() < self._rate_limited_until[provider]
|
|
333
|
+
|
|
334
|
+
def _mark_rate_limited(self, provider: LLMProvider, duration: float = 60.0):
|
|
335
|
+
"""Mark a provider as rate limited for a duration."""
|
|
336
|
+
self._rate_limited_until[provider] = time.time() + duration
|
|
337
|
+
logger.warning(
|
|
338
|
+
"provider_rate_limited",
|
|
339
|
+
provider=provider.value,
|
|
340
|
+
cooldown_seconds=duration,
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
def _get_available_llms(self) -> list[tuple[LLMProvider, Any]]:
|
|
344
|
+
"""Get list of available LLMs in priority order."""
|
|
345
|
+
llms = []
|
|
346
|
+
|
|
347
|
+
# Primary first (if not rate limited)
|
|
348
|
+
if not self._is_rate_limited(self.primary_provider):
|
|
349
|
+
llms.append((self.primary_provider, self.primary_llm))
|
|
350
|
+
|
|
351
|
+
# Then fallbacks
|
|
352
|
+
for provider in self.fallback_providers:
|
|
353
|
+
if not self._is_rate_limited(provider):
|
|
354
|
+
llms.append((provider, self._get_fallback_llm(provider)))
|
|
355
|
+
|
|
356
|
+
# If all are rate limited, try primary anyway (it might work now)
|
|
357
|
+
if not llms:
|
|
358
|
+
llms.append((self.primary_provider, self.primary_llm))
|
|
359
|
+
|
|
360
|
+
return llms
|
|
361
|
+
|
|
362
|
+
def _call_with_fallback(self, method_name: str, *args: Any, **kwargs: Any) -> Any:
|
|
363
|
+
"""Call a method with automatic fallback on rate limits."""
|
|
364
|
+
last_error = None
|
|
365
|
+
|
|
366
|
+
for provider, llm in self._get_available_llms():
|
|
367
|
+
for attempt in range(self.max_retries):
|
|
368
|
+
try:
|
|
369
|
+
method = getattr(llm, method_name)
|
|
370
|
+
result = method(*args, **kwargs)
|
|
371
|
+
|
|
372
|
+
# Success - update current provider
|
|
373
|
+
if provider != self._current_provider:
|
|
374
|
+
logger.info(
|
|
375
|
+
"switched_to_fallback_provider",
|
|
376
|
+
from_provider=self._current_provider.value,
|
|
377
|
+
to_provider=provider.value,
|
|
378
|
+
)
|
|
379
|
+
self._current_provider = provider
|
|
380
|
+
|
|
381
|
+
return result
|
|
382
|
+
|
|
383
|
+
except Exception as e:
|
|
384
|
+
last_error = e
|
|
385
|
+
|
|
386
|
+
if is_rate_limit_error(e):
|
|
387
|
+
logger.warning(
|
|
388
|
+
"rate_limit_hit",
|
|
389
|
+
provider=provider.value,
|
|
390
|
+
attempt=attempt + 1,
|
|
391
|
+
error=str(e)[:200],
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
# Mark provider as rate limited and try next
|
|
395
|
+
self._mark_rate_limited(provider, duration=60.0)
|
|
396
|
+
break # Move to next provider
|
|
397
|
+
else:
|
|
398
|
+
# Non-rate-limit error - retry with exponential backoff
|
|
399
|
+
if attempt < self.max_retries - 1:
|
|
400
|
+
delay = self.retry_delay * (2 ** attempt)
|
|
401
|
+
logger.debug(
|
|
402
|
+
"retrying_after_error",
|
|
403
|
+
provider=provider.value,
|
|
404
|
+
attempt=attempt + 1,
|
|
405
|
+
delay=delay,
|
|
406
|
+
error=str(e)[:100],
|
|
407
|
+
)
|
|
408
|
+
time.sleep(delay)
|
|
409
|
+
else:
|
|
410
|
+
# All retries exhausted for this provider
|
|
411
|
+
break
|
|
412
|
+
|
|
413
|
+
# All providers failed
|
|
414
|
+
logger.error(
|
|
415
|
+
"all_providers_failed",
|
|
416
|
+
primary=self.primary_provider.value,
|
|
417
|
+
fallbacks=[p.value for p in self.fallback_providers],
|
|
418
|
+
)
|
|
419
|
+
raise last_error or RuntimeError("All LLM providers failed")
|
|
420
|
+
|
|
421
|
+
def complete(self, prompt: str, **kwargs: Any) -> Any:
|
|
422
|
+
"""Complete a prompt with fallback support."""
|
|
423
|
+
return self._call_with_fallback("complete", prompt, **kwargs)
|
|
424
|
+
|
|
425
|
+
def chat(self, messages: Any, **kwargs: Any) -> Any:
|
|
426
|
+
"""Chat with fallback support."""
|
|
427
|
+
return self._call_with_fallback("chat", messages, **kwargs)
|
|
428
|
+
|
|
429
|
+
def __getattr__(self, name: str) -> Any:
|
|
430
|
+
"""Forward other attributes to the current LLM."""
|
|
431
|
+
return getattr(self.primary_llm, name)
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
# =============================================================================
|
|
435
|
+
# Provider-Specific Implementations
|
|
436
|
+
# =============================================================================
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
def _get_openai_llm(model: str, **kwargs: Any) -> Any:
|
|
440
|
+
"""Get OpenAI LLM instance."""
|
|
441
|
+
try:
|
|
442
|
+
from llama_index.llms.openai import OpenAI
|
|
443
|
+
except ImportError:
|
|
444
|
+
raise ImportError(
|
|
445
|
+
"OpenAI LLM not installed. "
|
|
446
|
+
"Install with: pip install llama-index-llms-openai"
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
logger.debug("initializing_llm", provider="openai", model=model)
|
|
450
|
+
return OpenAI(model=model, **kwargs)
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
def _get_anthropic_llm(model: str, **kwargs: Any) -> Any:
|
|
454
|
+
"""Get Anthropic LLM instance."""
|
|
455
|
+
try:
|
|
456
|
+
from llama_index.llms.anthropic import Anthropic
|
|
457
|
+
except ImportError:
|
|
458
|
+
raise ImportError(
|
|
459
|
+
"Anthropic LLM not installed. "
|
|
460
|
+
"Install with: pip install llama-index-llms-anthropic"
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
logger.debug("initializing_llm", provider="anthropic", model=model)
|
|
464
|
+
return Anthropic(model=model, **kwargs)
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
def _get_gemini_llm(model: str, **kwargs: Any) -> Any:
|
|
468
|
+
"""Get Google Gemini LLM instance using the new google-genai SDK."""
|
|
469
|
+
logger.debug("initializing_llm", provider="gemini", model=model)
|
|
470
|
+
|
|
471
|
+
# Try the new google-genai SDK first (recommended)
|
|
472
|
+
try:
|
|
473
|
+
from google import genai
|
|
474
|
+
from google.genai import types
|
|
475
|
+
|
|
476
|
+
# Define exceptions to retry on
|
|
477
|
+
# If google.api_core is available (usually is with google SDKs)
|
|
478
|
+
try:
|
|
479
|
+
from google.api_core import exceptions as google_exceptions
|
|
480
|
+
RETRY_EXCEPTIONS = (
|
|
481
|
+
google_exceptions.ServiceUnavailable,
|
|
482
|
+
google_exceptions.TooManyRequests,
|
|
483
|
+
google_exceptions.InternalServerError,
|
|
484
|
+
google_exceptions.ResourceExhausted,
|
|
485
|
+
google_exceptions.Aborted,
|
|
486
|
+
ConnectionError,
|
|
487
|
+
ConnectionRefusedError,
|
|
488
|
+
TimeoutError,
|
|
489
|
+
OSError, # Covers [Errno 61] and other socket errors
|
|
490
|
+
)
|
|
491
|
+
except ImportError:
|
|
492
|
+
# Fallback: Retry on any Exception that mentions overload/503/429
|
|
493
|
+
# But simpler to just retry on Exception if we can't import specific ones
|
|
494
|
+
RETRY_EXCEPTIONS = (Exception,)
|
|
495
|
+
|
|
496
|
+
api_key = os.getenv("GOOGLE_API_KEY")
|
|
497
|
+
if not api_key:
|
|
498
|
+
raise ValueError("GOOGLE_API_KEY environment variable not set")
|
|
499
|
+
|
|
500
|
+
# Create a wrapper that matches LlamaIndex LLM interface
|
|
501
|
+
class GeminiWrapper:
|
|
502
|
+
"""Wrapper for google-genai to match LlamaIndex LLM interface."""
|
|
503
|
+
|
|
504
|
+
def __init__(self, model_name: str, api_key: str):
|
|
505
|
+
self.client = genai.Client(api_key=api_key)
|
|
506
|
+
self.model_name = model_name
|
|
507
|
+
self.fallback_model = "gemini-3-flash-preview"
|
|
508
|
+
|
|
509
|
+
@retry(
|
|
510
|
+
stop=stop_after_attempt(5),
|
|
511
|
+
wait=wait_exponential(multiplier=1, min=2, max=30),
|
|
512
|
+
retry=retry_if_exception_type(RETRY_EXCEPTIONS),
|
|
513
|
+
)
|
|
514
|
+
def complete(self, prompt: str, **kw: Any) -> str:
|
|
515
|
+
try:
|
|
516
|
+
# Try primary model first
|
|
517
|
+
response = self.client.models.generate_content(
|
|
518
|
+
model=self.model_name,
|
|
519
|
+
contents=prompt,
|
|
520
|
+
)
|
|
521
|
+
return response.text or ""
|
|
522
|
+
except RETRY_EXCEPTIONS as e:
|
|
523
|
+
# Fallback to preview model on overload/exhaustion
|
|
524
|
+
logger.warning(
|
|
525
|
+
"primary_llm_overloaded_using_fallback",
|
|
526
|
+
primary=self.model_name,
|
|
527
|
+
fallback=self.fallback_model,
|
|
528
|
+
error=str(e)
|
|
529
|
+
)
|
|
530
|
+
response = self.client.models.generate_content(
|
|
531
|
+
model=self.fallback_model,
|
|
532
|
+
contents=prompt,
|
|
533
|
+
)
|
|
534
|
+
return response.text or ""
|
|
535
|
+
|
|
536
|
+
@retry(
|
|
537
|
+
stop=stop_after_attempt(5),
|
|
538
|
+
wait=wait_exponential(multiplier=1, min=2, max=30),
|
|
539
|
+
retry=retry_if_exception_type(RETRY_EXCEPTIONS),
|
|
540
|
+
)
|
|
541
|
+
def chat(self, messages: list, **kw: Any) -> str:
|
|
542
|
+
# Convert to genai format
|
|
543
|
+
contents = []
|
|
544
|
+
for msg in messages:
|
|
545
|
+
role = "user" if msg.get("role") == "user" else "model"
|
|
546
|
+
contents.append({"role": role, "parts": [{"text": msg.get("content", "")}]})
|
|
547
|
+
|
|
548
|
+
try:
|
|
549
|
+
# Try primary model first
|
|
550
|
+
response = self.client.models.generate_content(
|
|
551
|
+
model=self.model_name,
|
|
552
|
+
contents=contents,
|
|
553
|
+
)
|
|
554
|
+
return response.text or ""
|
|
555
|
+
except RETRY_EXCEPTIONS as e:
|
|
556
|
+
# Fallback to preview model
|
|
557
|
+
logger.warning(
|
|
558
|
+
"primary_llm_overloaded_using_fallback",
|
|
559
|
+
primary=self.model_name,
|
|
560
|
+
fallback=self.fallback_model,
|
|
561
|
+
error=str(e)
|
|
562
|
+
)
|
|
563
|
+
response = self.client.models.generate_content(
|
|
564
|
+
model=self.fallback_model,
|
|
565
|
+
contents=contents,
|
|
566
|
+
)
|
|
567
|
+
return response.text or ""
|
|
568
|
+
|
|
569
|
+
return GeminiWrapper(model, api_key)
|
|
570
|
+
|
|
571
|
+
except ImportError:
|
|
572
|
+
# Fall back to llama-index-llms-gemini (deprecated)
|
|
573
|
+
try:
|
|
574
|
+
from llama_index.llms.gemini import Gemini
|
|
575
|
+
|
|
576
|
+
# Define exceptions for legacy/llama-index path
|
|
577
|
+
try:
|
|
578
|
+
from google.api_core import exceptions as google_exceptions
|
|
579
|
+
RETRY_EXCEPTIONS_LEGACY = (
|
|
580
|
+
google_exceptions.ServiceUnavailable,
|
|
581
|
+
google_exceptions.TooManyRequests,
|
|
582
|
+
google_exceptions.InternalServerError,
|
|
583
|
+
google_exceptions.ResourceExhausted,
|
|
584
|
+
google_exceptions.Aborted,
|
|
585
|
+
google_exceptions.DeadlineExceeded,
|
|
586
|
+
ConnectionError,
|
|
587
|
+
ConnectionRefusedError,
|
|
588
|
+
TimeoutError,
|
|
589
|
+
OSError,
|
|
590
|
+
)
|
|
591
|
+
except ImportError:
|
|
592
|
+
RETRY_EXCEPTIONS_LEGACY = (Exception,)
|
|
593
|
+
|
|
594
|
+
class LlamaIndexGeminiWrapper:
|
|
595
|
+
"""Wrapper for llama-index Gemini to provide fallback logic."""
|
|
596
|
+
|
|
597
|
+
def __init__(self, model_name: str, **kwargs):
|
|
598
|
+
self.model_name = model_name
|
|
599
|
+
self.primary = Gemini(model=model_name, **kwargs)
|
|
600
|
+
# Fallback to older stable model or preview
|
|
601
|
+
self.fallback_model = "models/gemini-3-flash-preview"
|
|
602
|
+
self.fallback = Gemini(model=self.fallback_model, **kwargs)
|
|
603
|
+
|
|
604
|
+
@retry(
|
|
605
|
+
stop=stop_after_attempt(5),
|
|
606
|
+
wait=wait_exponential(multiplier=1, min=2, max=30),
|
|
607
|
+
retry=retry_if_exception_type(RETRY_EXCEPTIONS_LEGACY),
|
|
608
|
+
)
|
|
609
|
+
def complete(self, prompt: str, **kw: Any) -> Any:
|
|
610
|
+
try:
|
|
611
|
+
return self.primary.complete(prompt, **kw)
|
|
612
|
+
except RETRY_EXCEPTIONS_LEGACY as e:
|
|
613
|
+
logger.warning(
|
|
614
|
+
"primary_llm_overloaded_using_fallback",
|
|
615
|
+
primary=self.model_name,
|
|
616
|
+
fallback=self.fallback_model,
|
|
617
|
+
error=str(e)
|
|
618
|
+
)
|
|
619
|
+
return self.fallback.complete(prompt, **kw)
|
|
620
|
+
|
|
621
|
+
@retry(
|
|
622
|
+
stop=stop_after_attempt(5),
|
|
623
|
+
wait=wait_exponential(multiplier=1, min=2, max=30),
|
|
624
|
+
retry=retry_if_exception_type(RETRY_EXCEPTIONS_LEGACY),
|
|
625
|
+
)
|
|
626
|
+
def chat(self, messages: Any, **kw: Any) -> Any:
|
|
627
|
+
try:
|
|
628
|
+
return self.primary.chat(messages, **kw)
|
|
629
|
+
except RETRY_EXCEPTIONS_LEGACY as e:
|
|
630
|
+
logger.warning(
|
|
631
|
+
"primary_llm_overloaded_using_fallback",
|
|
632
|
+
primary=self.model_name,
|
|
633
|
+
fallback=self.fallback_model,
|
|
634
|
+
error=str(e)
|
|
635
|
+
)
|
|
636
|
+
return self.fallback.chat(messages, **kw)
|
|
637
|
+
|
|
638
|
+
def __getattr__(self, name: str) -> Any:
|
|
639
|
+
return getattr(self.primary, name)
|
|
640
|
+
|
|
641
|
+
return LlamaIndexGeminiWrapper(model, **kwargs)
|
|
642
|
+
except ImportError:
|
|
643
|
+
raise ImportError(
|
|
644
|
+
"Neither google-genai nor llama-index-llms-gemini installed. "
|
|
645
|
+
"Install with: pip install google-genai"
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
def _get_openai_embed(model: str, **kwargs: Any) -> Any:
|
|
650
|
+
"""Get OpenAI embedding model."""
|
|
651
|
+
try:
|
|
652
|
+
from llama_index.embeddings.openai import OpenAIEmbedding
|
|
653
|
+
except ImportError:
|
|
654
|
+
raise ImportError(
|
|
655
|
+
"OpenAI embeddings not installed. "
|
|
656
|
+
"Install with: pip install llama-index-embeddings-openai"
|
|
657
|
+
)
|
|
658
|
+
|
|
659
|
+
logger.debug("initializing_embed", provider="openai", model=model)
|
|
660
|
+
return OpenAIEmbedding(model=model, **kwargs)
|
|
661
|
+
|
|
662
|
+
|
|
663
|
+
def _get_gemini_embed(model: str, **kwargs: Any) -> Any:
|
|
664
|
+
"""Get Google Gemini embedding model."""
|
|
665
|
+
try:
|
|
666
|
+
from llama_index.embeddings.gemini import GeminiEmbedding
|
|
667
|
+
except ImportError:
|
|
668
|
+
raise ImportError(
|
|
669
|
+
"Gemini embeddings not installed. "
|
|
670
|
+
"Install with: pip install llama-index-embeddings-gemini"
|
|
671
|
+
)
|
|
672
|
+
|
|
673
|
+
logger.debug("initializing_embed", provider="gemini", model=model)
|
|
674
|
+
return GeminiEmbedding(model_name=f"models/{model}", **kwargs)
|
|
675
|
+
|
|
676
|
+
|
|
677
|
+
# =============================================================================
|
|
678
|
+
# Convenience Functions
|
|
679
|
+
# =============================================================================
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
def get_provider_info() -> dict[str, Any]:
|
|
683
|
+
"""
|
|
684
|
+
Get information about available providers.
|
|
685
|
+
|
|
686
|
+
Returns:
|
|
687
|
+
Dictionary with provider availability and configuration.
|
|
688
|
+
"""
|
|
689
|
+
info = {
|
|
690
|
+
"available": [],
|
|
691
|
+
"default_provider": None,
|
|
692
|
+
"models": DEFAULT_MODELS,
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
if os.getenv("OPENAI_API_KEY"):
|
|
696
|
+
info["available"].append("openai")
|
|
697
|
+
if os.getenv("ANTHROPIC_API_KEY"):
|
|
698
|
+
info["available"].append("anthropic")
|
|
699
|
+
if os.getenv("GOOGLE_API_KEY"):
|
|
700
|
+
info["available"].append("gemini")
|
|
701
|
+
|
|
702
|
+
if info["available"]:
|
|
703
|
+
try:
|
|
704
|
+
info["default_provider"] = detect_provider().value
|
|
705
|
+
except ValueError:
|
|
706
|
+
pass
|
|
707
|
+
|
|
708
|
+
return info
|
|
709
|
+
|
|
710
|
+
|
|
711
|
+
def validate_provider(provider: LLMProvider) -> bool:
|
|
712
|
+
"""
|
|
713
|
+
Check if a provider is available (has API key set).
|
|
714
|
+
|
|
715
|
+
Args:
|
|
716
|
+
provider: Provider to check.
|
|
717
|
+
|
|
718
|
+
Returns:
|
|
719
|
+
True if provider is available.
|
|
720
|
+
"""
|
|
721
|
+
if provider == LLMProvider.OPENAI:
|
|
722
|
+
return bool(os.getenv("OPENAI_API_KEY"))
|
|
723
|
+
elif provider == LLMProvider.ANTHROPIC:
|
|
724
|
+
return bool(os.getenv("ANTHROPIC_API_KEY"))
|
|
725
|
+
elif provider == LLMProvider.GEMINI:
|
|
726
|
+
return bool(os.getenv("GOOGLE_API_KEY"))
|
|
727
|
+
elif provider == LLMProvider.AUTO:
|
|
728
|
+
return any([
|
|
729
|
+
os.getenv("OPENAI_API_KEY"),
|
|
730
|
+
os.getenv("ANTHROPIC_API_KEY"),
|
|
731
|
+
os.getenv("GOOGLE_API_KEY"),
|
|
732
|
+
])
|
|
733
|
+
return False
|