rag-core-lib 3.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rag_core_lib/impl/__init__.py +0 -0
- rag_core_lib/impl/data_types/__init__.py +0 -0
- rag_core_lib/impl/data_types/content_type.py +13 -0
- rag_core_lib/impl/data_types/search_request.py +21 -0
- rag_core_lib/impl/embeddings/__init__.py +13 -0
- rag_core_lib/impl/embeddings/embedder.py +11 -0
- rag_core_lib/impl/embeddings/embedder_type.py +12 -0
- rag_core_lib/impl/embeddings/langchain_community_embedder.py +34 -0
- rag_core_lib/impl/embeddings/stackit_embedder.py +80 -0
- rag_core_lib/impl/langfuse_manager/__init__.py +0 -0
- rag_core_lib/impl/langfuse_manager/langfuse_manager.py +208 -0
- rag_core_lib/impl/llms/__init__.py +0 -0
- rag_core_lib/impl/llms/llm_factory.py +88 -0
- rag_core_lib/impl/llms/llm_type.py +12 -0
- rag_core_lib/impl/settings/__init__.py +0 -0
- rag_core_lib/impl/settings/embedder_class_type_settings.py +18 -0
- rag_core_lib/impl/settings/fake_embedder_settings.py +16 -0
- rag_core_lib/impl/settings/langfuse_settings.py +29 -0
- rag_core_lib/impl/settings/logging_settings.py +23 -0
- rag_core_lib/impl/settings/ollama_embedder_settings.py +17 -0
- rag_core_lib/impl/settings/ollama_llm_settings.py +43 -0
- rag_core_lib/impl/settings/rag_class_types_settings.py +30 -0
- rag_core_lib/impl/settings/retry_decorator_settings.py +78 -0
- rag_core_lib/impl/settings/stackit_embedder_settings.py +36 -0
- rag_core_lib/impl/settings/stackit_vllm_settings.py +36 -0
- rag_core_lib/impl/tracers/__init__.py +0 -0
- rag_core_lib/impl/tracers/langfuse_traced_runnable.py +49 -0
- rag_core_lib/impl/utils/__init__.py +0 -0
- rag_core_lib/impl/utils/async_threadsafe_semaphore.py +71 -0
- rag_core_lib/impl/utils/retry_decorator.py +211 -0
- rag_core_lib/impl/utils/utils.py +71 -0
- rag_core_lib/runnables/__init__.py +0 -0
- rag_core_lib/runnables/async_runnable.py +60 -0
- rag_core_lib/secret_provider/__init__.py +0 -0
- rag_core_lib/secret_provider/secret_provider.py +30 -0
- rag_core_lib/tracers/__init__.py +0 -0
- rag_core_lib/tracers/traced_runnable.py +79 -0
- rag_core_lib-3.2.0.dist-info/METADATA +20 -0
- rag_core_lib-3.2.0.dist-info/RECORD +40 -0
- rag_core_lib-3.2.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Module that contains the settings for the class types, if multiple classes can be selected."""
|
|
2
|
+
|
|
3
|
+
from pydantic import Field
|
|
4
|
+
from pydantic_settings import BaseSettings
|
|
5
|
+
|
|
6
|
+
from rag_core_lib.impl.llms.llm_type import LLMType
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class RAGClassTypeSettings(BaseSettings):
|
|
10
|
+
"""
|
|
11
|
+
Settings class for RAG class types.
|
|
12
|
+
|
|
13
|
+
This class defines the configuration settings for RAG class types.
|
|
14
|
+
It inherits from the BaseSettings class.
|
|
15
|
+
|
|
16
|
+
Attributes
|
|
17
|
+
----------
|
|
18
|
+
llm_type : LLMType
|
|
19
|
+
The type of language model to use.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
class Config:
|
|
23
|
+
"""Config class for reading Fields from env."""
|
|
24
|
+
|
|
25
|
+
env_prefix = "RAG_CLASS_TYPE_"
|
|
26
|
+
case_sensitive = False
|
|
27
|
+
|
|
28
|
+
llm_type: LLMType = Field(
|
|
29
|
+
default=LLMType.STACKIT,
|
|
30
|
+
)
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Module contains settings for the retry decorator."""
|
|
2
|
+
|
|
3
|
+
from pydantic import Field, PositiveInt, model_validator
|
|
4
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class RetryDecoratorSettings(BaseSettings):
|
|
8
|
+
"""
|
|
9
|
+
Contains settings regarding the retry decorator.
|
|
10
|
+
|
|
11
|
+
Attributes
|
|
12
|
+
----------
|
|
13
|
+
max_retries : int (> 0)
|
|
14
|
+
Total retries (not counting the first attempt).
|
|
15
|
+
retry_base_delay : float (>= 0)
|
|
16
|
+
Base delay in seconds for the first retry.
|
|
17
|
+
retry_max_delay : float (> 0)
|
|
18
|
+
Maximum delay cap for any single wait.
|
|
19
|
+
backoff_factor : float (>= 1)
|
|
20
|
+
Exponential backoff factor.
|
|
21
|
+
attempt_cap : int (>= 0)
|
|
22
|
+
Cap for exponent growth (backoff_factor ** attempt_cap).
|
|
23
|
+
jitter_min : float (>= 0)
|
|
24
|
+
Minimum jitter to add to wait times.
|
|
25
|
+
jitter_max : float (>= jitter_min)
|
|
26
|
+
Maximum jitter to add to wait times.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
model_config = SettingsConfigDict(env_prefix="RETRY_DECORATOR_", case_sensitive=False)
|
|
30
|
+
|
|
31
|
+
max_retries: PositiveInt = Field(
|
|
32
|
+
default=5,
|
|
33
|
+
title="Max Retries",
|
|
34
|
+
description="Total retries, not counting the initial attempt.",
|
|
35
|
+
)
|
|
36
|
+
retry_base_delay: float = Field(
|
|
37
|
+
default=0.5,
|
|
38
|
+
ge=0,
|
|
39
|
+
title="Retry Base Delay",
|
|
40
|
+
description="Base delay in seconds for the first retry.",
|
|
41
|
+
)
|
|
42
|
+
retry_max_delay: float = Field(
|
|
43
|
+
default=600.0,
|
|
44
|
+
gt=0,
|
|
45
|
+
title="Retry Max Delay",
|
|
46
|
+
description="Maximum delay cap in seconds for any single wait.",
|
|
47
|
+
)
|
|
48
|
+
backoff_factor: float = Field(
|
|
49
|
+
default=2.0,
|
|
50
|
+
ge=1.0,
|
|
51
|
+
title="Backoff Factor",
|
|
52
|
+
description="Exponential backoff factor (>= 1).",
|
|
53
|
+
)
|
|
54
|
+
attempt_cap: int = Field(
|
|
55
|
+
default=6,
|
|
56
|
+
ge=0,
|
|
57
|
+
title="Attempt Cap",
|
|
58
|
+
description="Cap for exponent growth (backoff_factor ** attempt_cap).",
|
|
59
|
+
)
|
|
60
|
+
jitter_min: float = Field(
|
|
61
|
+
default=0.05,
|
|
62
|
+
ge=0.0,
|
|
63
|
+
title="Jitter Min (s)",
|
|
64
|
+
description="Minimum jitter in seconds.",
|
|
65
|
+
)
|
|
66
|
+
jitter_max: float = Field(
|
|
67
|
+
default=0.25,
|
|
68
|
+
ge=0.0,
|
|
69
|
+
title="Jitter Max (s)",
|
|
70
|
+
description="Maximum jitter in seconds.",
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
@model_validator(mode="after")
|
|
74
|
+
def _check_relations(self) -> "RetryDecoratorSettings":
|
|
75
|
+
# Ensure jitter_max >= jitter_min
|
|
76
|
+
if self.jitter_max < self.jitter_min:
|
|
77
|
+
raise ValueError("jitter_max must be >= jitter_min")
|
|
78
|
+
return self
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Settings regarding the STACKIT embedder."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from pydantic import Field, PositiveInt, model_validator
|
|
6
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class StackitEmbedderSettings(BaseSettings):
|
|
10
|
+
"""Configuration for the STACKIT embeddings endpoint."""
|
|
11
|
+
|
|
12
|
+
model_config = SettingsConfigDict(env_prefix="STACKIT_EMBEDDER_", case_sensitive=False)
|
|
13
|
+
|
|
14
|
+
model: str = Field(default="intfloat/e5-mistral-7b-instruct")
|
|
15
|
+
base_url: str = Field(
|
|
16
|
+
default="https://api.openai-compat.model-serving.eu01.onstackit.cloud/v1",
|
|
17
|
+
)
|
|
18
|
+
api_key: str = Field(default="")
|
|
19
|
+
max_retries: Optional[PositiveInt] = Field(default=None)
|
|
20
|
+
retry_base_delay: Optional[float] = Field(default=None, ge=0)
|
|
21
|
+
retry_max_delay: Optional[float] = Field(default=None, gt=0)
|
|
22
|
+
backoff_factor: Optional[float] = Field(default=None, ge=1.0)
|
|
23
|
+
attempt_cap: Optional[int] = Field(default=None, ge=0)
|
|
24
|
+
jitter_min: Optional[float] = Field(default=None, ge=0.0)
|
|
25
|
+
jitter_max: Optional[float] = Field(default=None, ge=0.0)
|
|
26
|
+
|
|
27
|
+
@model_validator(mode="after")
|
|
28
|
+
def _check_relations(self) -> "StackitEmbedderSettings":
|
|
29
|
+
"""Ensure that retry-related ranges are valid."""
|
|
30
|
+
|
|
31
|
+
if not self.jitter_min or not self.jitter_max:
|
|
32
|
+
return self
|
|
33
|
+
if self.jitter_max < self.jitter_min:
|
|
34
|
+
msg = "jitter_max must be >= jitter_min"
|
|
35
|
+
raise ValueError(msg)
|
|
36
|
+
return self
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Module contains settings regarding the STACKIT vLLM."""
|
|
2
|
+
|
|
3
|
+
from pydantic import Field
|
|
4
|
+
from pydantic_settings import BaseSettings
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class StackitVllmSettings(BaseSettings):
|
|
8
|
+
"""
|
|
9
|
+
Contains settings regarding the STACKIT vLLM.
|
|
10
|
+
|
|
11
|
+
Attributes
|
|
12
|
+
----------
|
|
13
|
+
model : str
|
|
14
|
+
The model identifier.
|
|
15
|
+
base_url : str
|
|
16
|
+
The base URL for the model serving endpoint.
|
|
17
|
+
api_key : str
|
|
18
|
+
The API key for authentication.
|
|
19
|
+
top_p : float
|
|
20
|
+
Total probability mass of tokens to consider at each step.
|
|
21
|
+
temperature : float
|
|
22
|
+
What sampling temperature to use.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
class Config:
|
|
26
|
+
"""Config class for reading Fields from env."""
|
|
27
|
+
|
|
28
|
+
env_prefix = "STACKIT_VLLM_"
|
|
29
|
+
case_sensitive = False
|
|
30
|
+
|
|
31
|
+
model: str = Field(default="cortecs/Llama-3.3-70B-Instruct-FP8-Dynamic")
|
|
32
|
+
base_url: str = Field(default="https://api.openai-compat.model-serving.eu01.onstackit.cloud/v1")
|
|
33
|
+
api_key: str
|
|
34
|
+
|
|
35
|
+
top_p: float = Field(default=0.1, title="LLM Top P")
|
|
36
|
+
temperature: float = Field(default=0, title="LLM Temperature")
|
|
File without changes
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""Module for the LangfuseTraceChain class."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from langchain_core.runnables import Runnable, RunnableConfig
|
|
6
|
+
from langfuse.langchain import CallbackHandler
|
|
7
|
+
|
|
8
|
+
from rag_core_lib.impl.settings.langfuse_settings import LangfuseSettings
|
|
9
|
+
from rag_core_lib.tracers.traced_runnable import TracedRunnable
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class LangfuseTracedRunnable(TracedRunnable):
|
|
13
|
+
"""A class to trace the execution of a Runnable using Langfuse.
|
|
14
|
+
|
|
15
|
+
This class wraps an inner Runnable and adds tracing capabilities using the Langfuse tracer.
|
|
16
|
+
It allows for the configuration of the tracer through the provided settings.
|
|
17
|
+
|
|
18
|
+
Attributes
|
|
19
|
+
----------
|
|
20
|
+
CONFIG_CALLBACK_KEY : str
|
|
21
|
+
The key used to store callbacks in the configuration.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
CONFIG_CALLBACK_KEY = "callbacks"
|
|
25
|
+
|
|
26
|
+
def __init__(self, inner_chain: Runnable, settings: LangfuseSettings):
|
|
27
|
+
"""
|
|
28
|
+
Initialize the LangfuseTracedChain with the given inner chain and settings.
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
inner_chain : Runnable
|
|
33
|
+
The inner chain to be wrapped by this tracer.
|
|
34
|
+
settings : LangfuseSettings
|
|
35
|
+
The settings to configure the Langfuse tracer.
|
|
36
|
+
"""
|
|
37
|
+
super().__init__(inner_chain)
|
|
38
|
+
self._settings = settings
|
|
39
|
+
|
|
40
|
+
def _add_tracing_callback(self, config: Optional[RunnableConfig]) -> RunnableConfig:
|
|
41
|
+
handler = CallbackHandler(
|
|
42
|
+
public_key=self._settings.public_key,
|
|
43
|
+
)
|
|
44
|
+
if not config:
|
|
45
|
+
return RunnableConfig(callbacks=[handler])
|
|
46
|
+
|
|
47
|
+
current_callbacks = config.get(self.CONFIG_CALLBACK_KEY, [])
|
|
48
|
+
config[self.CONFIG_CALLBACK_KEY] = (current_callbacks if current_callbacks else []) + [handler]
|
|
49
|
+
return config
|
|
File without changes
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Module containing the AsyncThreadsafeSemaphore class."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import threading
|
|
5
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
6
|
+
from typing import Optional, Type
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class AsyncThreadsafeSemaphore:
|
|
10
|
+
"""A threadsafe version of asyncio.Semaphore that can be used in async and sync contexts."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, value: int = 1) -> None:
|
|
13
|
+
"""
|
|
14
|
+
Initialize the AsyncThreadsafeSemaphore.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
value : int
|
|
19
|
+
The initial value for the semaphore (default 1).
|
|
20
|
+
"""
|
|
21
|
+
self._semaphore = threading.Semaphore(value)
|
|
22
|
+
self._executor = ThreadPoolExecutor()
|
|
23
|
+
|
|
24
|
+
# Context manager methods for synchronous usage
|
|
25
|
+
def __enter__(self) -> "AsyncThreadsafeSemaphore":
|
|
26
|
+
"""Enter the runtime context related to this object."""
|
|
27
|
+
self._acquire()
|
|
28
|
+
return self
|
|
29
|
+
|
|
30
|
+
def __exit__(self, *args: Optional[Type[BaseException]]) -> None:
|
|
31
|
+
"""Exit the runtime context related to this object."""
|
|
32
|
+
self.release()
|
|
33
|
+
|
|
34
|
+
# Async context manager methods for asynchronous usage
|
|
35
|
+
async def __aenter__(self) -> "AsyncThreadsafeSemaphore":
|
|
36
|
+
"""Enter the async runtime context related to this object."""
|
|
37
|
+
await self.acquire()
|
|
38
|
+
return self
|
|
39
|
+
|
|
40
|
+
async def __aexit__(self, *args: Optional[Type[BaseException]]) -> None:
|
|
41
|
+
"""Exit the async runtime context related to this object."""
|
|
42
|
+
self.release()
|
|
43
|
+
|
|
44
|
+
def release(self) -> None:
|
|
45
|
+
"""
|
|
46
|
+
Release a semaphore, incrementing the internal counter by one.
|
|
47
|
+
|
|
48
|
+
This method wakes up one of the threads waiting for the semaphore, if any.
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
None
|
|
53
|
+
"""
|
|
54
|
+
self._semaphore.release()
|
|
55
|
+
|
|
56
|
+
async def acquire(self) -> None:
|
|
57
|
+
"""
|
|
58
|
+
Asynchronously acquires a semaphore.
|
|
59
|
+
|
|
60
|
+
This method uses the event loop to run the semaphore acquisition in an executor,
|
|
61
|
+
allowing it to be thread-safe.
|
|
62
|
+
|
|
63
|
+
Returns
|
|
64
|
+
-------
|
|
65
|
+
None
|
|
66
|
+
"""
|
|
67
|
+
loop = asyncio.get_event_loop()
|
|
68
|
+
await loop.run_in_executor(self._executor, self._acquire)
|
|
69
|
+
|
|
70
|
+
def _acquire(self) -> None:
|
|
71
|
+
self._semaphore.acquire()
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
"""Reusable exponential backoff / retry decorator for sync and async functions."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import inspect
|
|
5
|
+
import logging
|
|
6
|
+
import random
|
|
7
|
+
import time
|
|
8
|
+
from functools import wraps
|
|
9
|
+
from typing import Callable, Optional, ParamSpec, TypeVar
|
|
10
|
+
|
|
11
|
+
from pydantic_settings import BaseSettings
|
|
12
|
+
|
|
13
|
+
from rag_core_lib.impl.settings.retry_decorator_settings import RetryDecoratorSettings
|
|
14
|
+
from rag_core_lib.impl.utils.utils import (
|
|
15
|
+
headers_from_exception,
|
|
16
|
+
status_code_from_exception,
|
|
17
|
+
wait_from_rate_limit_headers,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# Use ParamSpec and TypeVar for type-safe decorators
|
|
22
|
+
P = ParamSpec("P")
|
|
23
|
+
R = TypeVar("R")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class _RetryEngine:
|
|
27
|
+
"""Internal helper to keep retry logic small in the public API function."""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
cfg: RetryDecoratorSettings,
|
|
32
|
+
exceptions: tuple[type[BaseException], ...],
|
|
33
|
+
rate_limit_exceptions: tuple[type[BaseException], ...],
|
|
34
|
+
rate_limit_statuses: tuple[int, ...],
|
|
35
|
+
rate_limit_header_names: tuple[str, ...],
|
|
36
|
+
is_rate_limited: Optional[Callable[[BaseException], bool]],
|
|
37
|
+
logger: Optional[logging.Logger],
|
|
38
|
+
) -> None:
|
|
39
|
+
self.cfg = cfg
|
|
40
|
+
self.exceptions = exceptions
|
|
41
|
+
self.rate_limit_exceptions = rate_limit_exceptions
|
|
42
|
+
self.rate_limit_statuses = rate_limit_statuses
|
|
43
|
+
self.rate_limit_header_names = rate_limit_header_names
|
|
44
|
+
self.is_rate_limited_cb = is_rate_limited
|
|
45
|
+
self.logger = logger
|
|
46
|
+
|
|
47
|
+
def decorate(self, fn: Callable[P, R]) -> Callable[P, R]:
|
|
48
|
+
if inspect.iscoroutinefunction(fn):
|
|
49
|
+
return self._decorate_async(fn)
|
|
50
|
+
return self._decorate_sync(fn)
|
|
51
|
+
|
|
52
|
+
def _decorate_async(self, fn: Callable[P, R]) -> Callable[P, R]:
|
|
53
|
+
@wraps(fn)
|
|
54
|
+
async def async_wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
|
55
|
+
for attempt in range(self.cfg.max_retries + 1):
|
|
56
|
+
try:
|
|
57
|
+
return await fn(*args, **kwargs)
|
|
58
|
+
except self.exceptions as exc: # type: ignore[misc]
|
|
59
|
+
wait_time = self._calculate_wait_time(attempt, exc)
|
|
60
|
+
if wait_time is None:
|
|
61
|
+
raise
|
|
62
|
+
await asyncio.sleep(wait_time)
|
|
63
|
+
raise AssertionError("Retry loop exited unexpectedly.")
|
|
64
|
+
|
|
65
|
+
return async_wrapper
|
|
66
|
+
|
|
67
|
+
def _decorate_sync(self, fn: Callable[P, R]) -> Callable[P, R]:
|
|
68
|
+
@wraps(fn)
|
|
69
|
+
def sync_wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
|
70
|
+
for attempt in range(self.cfg.max_retries + 1):
|
|
71
|
+
try:
|
|
72
|
+
return fn(*args, **kwargs)
|
|
73
|
+
except self.exceptions as exc: # type: ignore[misc]
|
|
74
|
+
wait_time = self._calculate_wait_time(attempt, exc)
|
|
75
|
+
if wait_time is None:
|
|
76
|
+
raise
|
|
77
|
+
time.sleep(wait_time)
|
|
78
|
+
raise AssertionError("Retry loop exited unexpectedly.")
|
|
79
|
+
|
|
80
|
+
return sync_wrapper
|
|
81
|
+
|
|
82
|
+
def _should_rate_limited(self, exc: BaseException) -> bool:
|
|
83
|
+
if self.is_rate_limited_cb and self.is_rate_limited_cb(exc):
|
|
84
|
+
return True
|
|
85
|
+
if isinstance(exc, self.rate_limit_exceptions):
|
|
86
|
+
return True
|
|
87
|
+
status_code = status_code_from_exception(exc)
|
|
88
|
+
if status_code in self.rate_limit_statuses:
|
|
89
|
+
return True
|
|
90
|
+
msg = str(exc).lower()
|
|
91
|
+
return ("rate limit" in msg) or ("ratelimit" in msg)
|
|
92
|
+
|
|
93
|
+
def _compute_backoff_wait(self, attempt: int) -> float:
|
|
94
|
+
delay = self.cfg.retry_base_delay * (self.cfg.backoff_factor ** min(attempt, self.cfg.attempt_cap))
|
|
95
|
+
return min(delay, self.cfg.retry_max_delay)
|
|
96
|
+
|
|
97
|
+
def _with_jitter(self, seconds: float) -> float:
|
|
98
|
+
return min(
|
|
99
|
+
seconds + random.uniform(self.cfg.jitter_min, self.cfg.jitter_max), # noqa: S311 non-crypto jitter
|
|
100
|
+
self.cfg.retry_max_delay,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
def _calculate_wait_time(self, attempt: int, exc: BaseException) -> Optional[float]:
|
|
104
|
+
"""Return wait seconds or None to re-raise."""
|
|
105
|
+
total_attempts = self.cfg.max_retries + 1
|
|
106
|
+
if attempt == self.cfg.max_retries:
|
|
107
|
+
if self.logger:
|
|
108
|
+
self.logger.error("Failed after %d attempts: %s", total_attempts, exc, exc_info=False)
|
|
109
|
+
return None
|
|
110
|
+
|
|
111
|
+
if self._should_rate_limited(exc):
|
|
112
|
+
headers = headers_from_exception(exc)
|
|
113
|
+
wait = wait_from_rate_limit_headers(headers, self.rate_limit_header_names)
|
|
114
|
+
if wait is None:
|
|
115
|
+
wait = self._compute_backoff_wait(attempt)
|
|
116
|
+
final_wait = self._with_jitter(wait)
|
|
117
|
+
if self.logger:
|
|
118
|
+
self.logger.warning(
|
|
119
|
+
(
|
|
120
|
+
"Rate limited. Remaining: req=%s tok=%s. Reset in: req=%s tok=%s. "
|
|
121
|
+
"Retrying in %.2fs (attempt %d/%d)..."
|
|
122
|
+
),
|
|
123
|
+
headers.get("x-ratelimit-remaining-requests", "?"),
|
|
124
|
+
headers.get("x-ratelimit-remaining-tokens", "?"),
|
|
125
|
+
headers.get("x-ratelimit-reset-requests", "?"),
|
|
126
|
+
headers.get("x-ratelimit-reset-tokens", "?"),
|
|
127
|
+
final_wait,
|
|
128
|
+
attempt + 1,
|
|
129
|
+
total_attempts,
|
|
130
|
+
)
|
|
131
|
+
return final_wait
|
|
132
|
+
|
|
133
|
+
delay = self._compute_backoff_wait(attempt)
|
|
134
|
+
if self.logger:
|
|
135
|
+
self.logger.warning(
|
|
136
|
+
"Attempt %d/%d failed: %s. Retrying in %.2fs...",
|
|
137
|
+
attempt + 1,
|
|
138
|
+
total_attempts,
|
|
139
|
+
exc,
|
|
140
|
+
delay,
|
|
141
|
+
exc_info=False,
|
|
142
|
+
)
|
|
143
|
+
return delay
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def retry_with_backoff(
|
|
147
|
+
*,
|
|
148
|
+
settings: RetryDecoratorSettings | None = None,
|
|
149
|
+
exceptions: tuple[type[BaseException], ...] = (Exception,),
|
|
150
|
+
rate_limit_exceptions: tuple[type[BaseException], ...] = (),
|
|
151
|
+
rate_limit_statuses: tuple[int, ...] = (429,),
|
|
152
|
+
rate_limit_header_names: tuple[str, ...] = (
|
|
153
|
+
"x-ratelimit-reset-requests",
|
|
154
|
+
"x-ratelimit-reset-tokens",
|
|
155
|
+
),
|
|
156
|
+
is_rate_limited: Optional[Callable[[BaseException], bool]] = None,
|
|
157
|
+
logger: Optional[logging.Logger] = None,
|
|
158
|
+
) -> Callable[[Callable[P, R]], Callable[P, R]]:
|
|
159
|
+
"""Apply robust retry logic with exponential backoff and rate-limit awareness."""
|
|
160
|
+
cfg = settings or RetryDecoratorSettings()
|
|
161
|
+
engine = _RetryEngine(
|
|
162
|
+
cfg=cfg,
|
|
163
|
+
exceptions=exceptions,
|
|
164
|
+
rate_limit_exceptions=rate_limit_exceptions,
|
|
165
|
+
rate_limit_statuses=rate_limit_statuses,
|
|
166
|
+
rate_limit_header_names=rate_limit_header_names,
|
|
167
|
+
is_rate_limited=is_rate_limited,
|
|
168
|
+
logger=logger,
|
|
169
|
+
)
|
|
170
|
+
return engine.decorate
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def create_retry_decorator_settings(
|
|
174
|
+
ai_settings: BaseSettings, retry_decorator_settings: RetryDecoratorSettings
|
|
175
|
+
) -> RetryDecoratorSettings:
|
|
176
|
+
"""Create retry decorator settings based on AI and default settings.
|
|
177
|
+
|
|
178
|
+
If the corresponding field in ai_settings is not set, the value from retry_decorator_settings will be used.
|
|
179
|
+
|
|
180
|
+
Parameters
|
|
181
|
+
----------
|
|
182
|
+
ai_settings : BaseSettings
|
|
183
|
+
Those are the AI settings, e.g. Embeddings, Summarizers etc.
|
|
184
|
+
retry_decorator_settings : RetryDecoratorSettings
|
|
185
|
+
Those are the default retry settings.
|
|
186
|
+
|
|
187
|
+
Returns
|
|
188
|
+
-------
|
|
189
|
+
RetryDecoratorSettings
|
|
190
|
+
The combined retry settings.
|
|
191
|
+
"""
|
|
192
|
+
fields = [
|
|
193
|
+
"max_retries",
|
|
194
|
+
"retry_base_delay",
|
|
195
|
+
"retry_max_delay",
|
|
196
|
+
"backoff_factor",
|
|
197
|
+
"attempt_cap",
|
|
198
|
+
"jitter_min",
|
|
199
|
+
"jitter_max",
|
|
200
|
+
]
|
|
201
|
+
settings_kwargs = {
|
|
202
|
+
field: (
|
|
203
|
+
getattr(ai_settings, field)
|
|
204
|
+
if getattr(ai_settings, field) is not None
|
|
205
|
+
else getattr(retry_decorator_settings, field)
|
|
206
|
+
)
|
|
207
|
+
for field in fields
|
|
208
|
+
}
|
|
209
|
+
if settings_kwargs["jitter_max"] < settings_kwargs["jitter_min"]:
|
|
210
|
+
raise ValueError("jitter_max must be >= jitter_min")
|
|
211
|
+
return RetryDecoratorSettings(**settings_kwargs)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Any, Iterable, Optional
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def _to_seconds(v):
|
|
6
|
+
if v is None:
|
|
7
|
+
return None
|
|
8
|
+
try:
|
|
9
|
+
s = str(v).strip().lower()
|
|
10
|
+
# Support composite durations like "1h21m55s", as well as single-unit values
|
|
11
|
+
if any(u in s for u in ("h", "m", "s")):
|
|
12
|
+
total = 0.0
|
|
13
|
+
for val, unit in re.findall(r"([0-9]+(?:\.[0-9]+)?)([hms])", s):
|
|
14
|
+
num = float(val)
|
|
15
|
+
if unit == "h":
|
|
16
|
+
total += num * 3600
|
|
17
|
+
elif unit == "m":
|
|
18
|
+
total += num * 60
|
|
19
|
+
else: # "s"
|
|
20
|
+
total += num
|
|
21
|
+
return total
|
|
22
|
+
# Fallback: plain number interpreted as seconds
|
|
23
|
+
return float(s)
|
|
24
|
+
except Exception:
|
|
25
|
+
return None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _normalize_dict_items(items: Iterable[Any]) -> dict[str, str]:
|
|
29
|
+
"""Normalize dict items by converting keys and values to a consistent format."""
|
|
30
|
+
return {str(k).lower(): str(v).lower() for k, v in items if k is not None and v is not None}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _normalize_headers(raw_headers: Any) -> dict[str, str]:
|
|
34
|
+
"""Return a lowercased dict[str, str] from httpx.Headers or mapping-like objects."""
|
|
35
|
+
if not raw_headers:
|
|
36
|
+
return {}
|
|
37
|
+
try:
|
|
38
|
+
if hasattr(raw_headers, "items"):
|
|
39
|
+
items = list(raw_headers.items()) # works for dict-like and httpx.Headers
|
|
40
|
+
else:
|
|
41
|
+
items = list(dict(raw_headers).items())
|
|
42
|
+
except Exception:
|
|
43
|
+
try:
|
|
44
|
+
items = list(dict(raw_headers).items())
|
|
45
|
+
except Exception:
|
|
46
|
+
items = []
|
|
47
|
+
|
|
48
|
+
return _normalize_dict_items(items)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def status_code_from_exception(exc: BaseException) -> Optional[int]:
|
|
52
|
+
resp = getattr(exc, "response", None)
|
|
53
|
+
return getattr(resp, "status_code", None)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def headers_from_exception(exc: BaseException) -> dict[str, str]:
|
|
57
|
+
resp = getattr(exc, "response", None)
|
|
58
|
+
raw = getattr(resp, "headers", None)
|
|
59
|
+
return _normalize_headers(raw)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def wait_from_rate_limit_headers(
|
|
63
|
+
headers: dict[str, str],
|
|
64
|
+
header_names: Iterable[str] = ("x-ratelimit-reset-requests", "x-ratelimit-reset-tokens"),
|
|
65
|
+
) -> Optional[float]:
|
|
66
|
+
candidates = []
|
|
67
|
+
for name in header_names:
|
|
68
|
+
sec = _to_seconds(headers.get(name))
|
|
69
|
+
if sec is not None:
|
|
70
|
+
candidates.append(sec)
|
|
71
|
+
return max(candidates) if candidates else None
|
|
File without changes
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Module for the base class of asynchronous chains."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from typing import Any, Optional
|
|
5
|
+
|
|
6
|
+
from langchain_core.runnables import Runnable, RunnableConfig
|
|
7
|
+
from langchain_core.runnables.utils import Input, Output
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class AsyncRunnable(Runnable[Input, Output], ABC):
|
|
11
|
+
"""Base class for asynchronous chains."""
|
|
12
|
+
|
|
13
|
+
@abstractmethod
|
|
14
|
+
async def ainvoke(self, chain_input: Input, config: Optional[RunnableConfig] = None, **kwargs: Any) -> Output:
|
|
15
|
+
"""Asynchronously invoke the chain with the given input and configuration.
|
|
16
|
+
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
chain_input : Input
|
|
20
|
+
The input data required to asynchronously invoke the chain.
|
|
21
|
+
config : Optional[RunnableConfig], optional
|
|
22
|
+
The configuration settings for the chain invocation, by default None.
|
|
23
|
+
**kwargs : Any
|
|
24
|
+
Additional keyword arguments that may be required for the chain invocation.
|
|
25
|
+
|
|
26
|
+
Returns
|
|
27
|
+
-------
|
|
28
|
+
Output
|
|
29
|
+
The result of the chain invocation.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def invoke(self, chain_input: Input, config: Optional[RunnableConfig] = None, **kwargs: Any) -> Output:
|
|
33
|
+
"""
|
|
34
|
+
Invoke the chain with the given input and configuration.
|
|
35
|
+
|
|
36
|
+
Typing indicates `Output` will be the return, but because no implementation is planned,
|
|
37
|
+
this will never be returned. This method is not implemented and will raise a not implemented error.
|
|
38
|
+
|
|
39
|
+
Notes
|
|
40
|
+
-----
|
|
41
|
+
This method should never be called. It exists only because the base class requires an implementation.
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
chain_input : Input
|
|
46
|
+
The input data required to invoke the chain.
|
|
47
|
+
config : Optional[RunnableConfig], optional
|
|
48
|
+
The configuration settings for the chain invocation, by default None.
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
Output
|
|
53
|
+
The result of the chain invocation.
|
|
54
|
+
|
|
55
|
+
Raises
|
|
56
|
+
------
|
|
57
|
+
NotImplementedError
|
|
58
|
+
Is not implemented, so will raise not implemented error.
|
|
59
|
+
"""
|
|
60
|
+
raise NotImplementedError("Please use the async implementation.")
|
|
File without changes
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Interface for providing API tokens."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class SecretProvider(ABC):
|
|
7
|
+
"""Interface for providing API tokens."""
|
|
8
|
+
|
|
9
|
+
@property
|
|
10
|
+
@abstractmethod
|
|
11
|
+
def provided_key(self) -> str:
|
|
12
|
+
"""
|
|
13
|
+
Abstract property that should be implemented to provide a secret key.
|
|
14
|
+
|
|
15
|
+
Returns
|
|
16
|
+
-------
|
|
17
|
+
str
|
|
18
|
+
The secret key provided by the implementing class.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
@abstractmethod
|
|
22
|
+
def provide_token(self) -> dict:
|
|
23
|
+
"""
|
|
24
|
+
Abstract method that should be implemented to provide an API token.
|
|
25
|
+
|
|
26
|
+
Returns
|
|
27
|
+
-------
|
|
28
|
+
dict
|
|
29
|
+
A dictionary containing the API token.
|
|
30
|
+
"""
|
|
File without changes
|