kitkat 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kitkat/__init__.py ADDED
File without changes
@@ -0,0 +1,6 @@
1
+ """Private shared utilities for kitkat internals.
2
+
3
+ Nothing in this package is part of the public API. Imports from
4
+ ``kitkat._internal`` are not covered by stability guarantees and may
5
+ change without notice between any two releases.
6
+ """
@@ -0,0 +1,47 @@
1
+ """Shared httpx async-client configuration.
2
+
3
+ All providers that use httpx directly should build their clients through
4
+ :func:`build_async_client` so connection-pool settings, timeout defaults,
5
+ and the ``User-Agent`` header are consistent across the library.
6
+
7
+ Provider SDKs (anthropic, openai, google-genai) manage their own HTTP
8
+ transport internally, so this factory is primarily useful for custom
9
+ providers or future internal HTTP callers.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from importlib.metadata import PackageNotFoundError, version
15
+
16
+ import httpx
17
+
18
+ try:
19
+ _LIB_VERSION = version("kitkat")
20
+ except PackageNotFoundError:
21
+ _LIB_VERSION = "dev"
22
+
23
+ _USER_AGENT = f"kitkat/{_LIB_VERSION} httpx/{httpx.__version__}"
24
+
25
+
26
+ def build_async_client(
27
+ base_url: str = "",
28
+ timeout: float = 120.0,
29
+ **kwargs: object,
30
+ ) -> httpx.AsyncClient:
31
+ """Create a pre-configured :class:`httpx.AsyncClient`.
32
+
33
+ Args:
34
+ base_url: Optional base URL prefix applied to all requests.
35
+ timeout: Default request timeout in seconds.
36
+ **kwargs: Additional keyword arguments forwarded to
37
+ :class:`httpx.AsyncClient`.
38
+
39
+ Returns:
40
+ A ready-to-use async HTTP client with library defaults applied.
41
+ """
42
+ return httpx.AsyncClient(
43
+ base_url=base_url,
44
+ timeout=httpx.Timeout(timeout),
45
+ headers={"User-Agent": _USER_AGENT},
46
+ **kwargs, # type: ignore[arg-type]
47
+ )
@@ -0,0 +1,102 @@
1
+ """Shared retry logic for all provider implementations.
2
+
3
+ Provider subclasses call :func:`execute_with_retry` from within
4
+ ``complete_with_retry()``. This ensures consistent exponential back-off
5
+ behaviour regardless of which provider is in use.
6
+
7
+ Non-retriable errors (auth, token-limit, content-filter) are re-raised
8
+ immediately without sleeping, so callers never wait on deterministic
9
+ failures.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import asyncio
15
+ import logging
16
+ from typing import TYPE_CHECKING, TypeVar
17
+
18
+ if TYPE_CHECKING:
19
+ from collections.abc import Callable, Coroutine
20
+
21
+ from ..core.models import RetryPolicy
22
+
23
+ from ..core.exceptions import (
24
+ LLMAuthenticationError,
25
+ LLMContentFilterError,
26
+ LLMError,
27
+ LLMRateLimitError,
28
+ LLMTokenLimitError,
29
+ )
30
+
31
+ T = TypeVar("T")
32
+ logger = logging.getLogger(__name__)
33
+
34
+ # Errors that must never be retried
35
+ _NON_RETRIABLE = (LLMAuthenticationError, LLMTokenLimitError, LLMContentFilterError)
36
+
37
+
38
+ async def execute_with_retry(
39
+ func: Callable[[], Coroutine[None, None, T]],
40
+ policy: RetryPolicy,
41
+ provider_name: str,
42
+ ) -> T:
43
+ """Execute an async callable with exponential back-off retry.
44
+
45
+ Retries on :exc:`~kitkat.core.exceptions.LLMRateLimitError` and
46
+ generic :exc:`~kitkat.core.exceptions.LLMError`. Raises immediately
47
+ on non-retriable errors (authentication, token limit, content filter).
48
+
49
+ Args:
50
+ func: Zero-argument async callable that performs one inference attempt.
51
+ policy: Retry configuration (attempts, delays, jitter).
52
+ provider_name: Used in log messages to identify the provider.
53
+
54
+ Returns:
55
+ The return value of *func* on a successful attempt.
56
+
57
+ Raises:
58
+ LLMAuthenticationError: Immediately — credentials are invalid.
59
+ LLMTokenLimitError: Immediately — prompt is deterministically too long.
60
+ LLMContentFilterError: Immediately — content policy violation.
61
+ LLMRateLimitError: After all retry attempts are exhausted.
62
+ LLMError: After all retry attempts are exhausted for other errors.
63
+ """
64
+ last_exc: Exception | None = None
65
+
66
+ for attempt in range(policy.max_attempts):
67
+ try:
68
+ return await func()
69
+
70
+ except _NON_RETRIABLE:
71
+ raise # Deterministic failure — skip retries entirely
72
+
73
+ except LLMRateLimitError as exc:
74
+ wait = exc.retry_after_s or policy.delay_for_attempt(attempt)
75
+ logger.warning(
76
+ "[%s] Rate limited. Waiting %.1fs (attempt %d/%d).",
77
+ provider_name,
78
+ wait,
79
+ attempt + 1,
80
+ policy.max_attempts,
81
+ )
82
+ last_exc = exc
83
+ if attempt < policy.max_attempts - 1:
84
+ await asyncio.sleep(wait)
85
+
86
+ except LLMError as exc:
87
+ wait = policy.delay_for_attempt(attempt)
88
+ logger.warning(
89
+ "[%s] Provider error: %s. Waiting %.1fs (attempt %d/%d).",
90
+ provider_name,
91
+ exc,
92
+ wait,
93
+ attempt + 1,
94
+ policy.max_attempts,
95
+ )
96
+ last_exc = exc
97
+ if attempt < policy.max_attempts - 1:
98
+ await asyncio.sleep(wait)
99
+
100
+ # All attempts exhausted.
101
+ assert last_exc is not None, "execute_with_retry exited without an exception set"
102
+ raise last_exc
@@ -0,0 +1,76 @@
1
+ """Shared token-counting utilities.
2
+
3
+ Provides a tiktoken-based counter with a conservative character-ratio
4
+ fallback for models not yet supported by tiktoken (e.g. Gemini variants).
5
+
6
+ All provider ``count_tokens()`` implementations should delegate here so the
7
+ behaviour is consistent across the library.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import logging
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Conservative approximation: 4 chars ≈ 1 token (valid for most Latin-script
17
+ # text with GPT-style BPE tokenisers).
18
+ _CHARS_PER_TOKEN: float = 4.0
19
+
20
+ # Sentinel that is stored on first failed tiktoken load so we never try again
21
+ # in the same process (avoids repeated BPE-download attempts in air-gapped envs).
22
+ _TIKTOKEN_UNAVAILABLE = object()
23
+
24
+ # Cache per encoding name so we only call get_encoding() once.
25
+ _ENCODER_CACHE: dict[str, object] = {}
26
+
27
+
28
+ def count_tokens_tiktoken(text: str, encoding_name: str = "cl100k_base") -> int:
29
+ """Count tokens using tiktoken, with a char-ratio fallback.
30
+
31
+ Args:
32
+ text: The text to tokenise.
33
+ encoding_name: The tiktoken BPE encoding to use.
34
+ ``cl100k_base`` covers GPT-4 / GPT-3.5 and approximates
35
+ Anthropic Claude tokenisation well enough for budgeting.
36
+
37
+ Returns:
38
+ Token count (always ≥ 0; 0 for empty input; ≥ 1 for non-empty).
39
+ """
40
+ if not text:
41
+ return 0
42
+
43
+ enc = _ENCODER_CACHE.get(encoding_name)
44
+ if enc is None:
45
+ try:
46
+ import tiktoken
47
+
48
+ enc = tiktoken.get_encoding(encoding_name)
49
+ _ENCODER_CACHE[encoding_name] = enc
50
+ except Exception as exc:
51
+ logger.warning(
52
+ "tiktoken BPE load failed (%s); falling back to "
53
+ "character-based token estimate (4 chars ≈ 1 token).",
54
+ exc,
55
+ )
56
+ _ENCODER_CACHE[encoding_name] = _TIKTOKEN_UNAVAILABLE
57
+ enc = _TIKTOKEN_UNAVAILABLE
58
+
59
+ if enc is _TIKTOKEN_UNAVAILABLE:
60
+ return count_tokens_fallback(text)
61
+
62
+ return max(1, len(enc.encode(text))) # type: ignore[union-attr]
63
+
64
+
65
+ def count_tokens_fallback(text: str) -> int:
66
+ """Character-ratio token estimate for models without tiktoken support.
67
+
68
+ Args:
69
+ text: The text to estimate.
70
+
71
+ Returns:
72
+ Estimated token count (0 for empty input; ≥ 1 for non-empty).
73
+ """
74
+ if not text:
75
+ return 0
76
+ return max(1, round(len(text) / _CHARS_PER_TOKEN))
kitkat/abc/__init__.py ADDED
@@ -0,0 +1,11 @@
1
+ """kitkat.abc — abstract base classes for the library.
2
+
3
+ The only stable public export is :class:`~kitkat.abc.provider.LLMProvider`.
4
+ Third-party providers should import from here::
5
+
6
+ from kitkat.abc import LLMProvider
7
+ """
8
+
9
+ from .provider import LLMProvider
10
+
11
+ __all__ = ["LLMProvider"]
kitkat/abc/provider.py ADDED
@@ -0,0 +1,374 @@
1
+ """The LLMProvider abstract base class.
2
+
3
+ Every concrete provider (Anthropic, OpenAI, Gemini, or a custom third-party
4
+ provider) must sub-class :class:`LLMProvider` and implement all abstract
5
+ methods. The library's service layer works exclusively with this ABC — it
6
+ never imports concrete provider classes directly.
7
+
8
+ Implementing a custom provider::
9
+
10
+ from kitkat.abc import LLMProvider
11
+ from kitkat.core import (
12
+ LLMRequest, LLMResponse, ProviderCapabilities, ProviderType,
13
+ RetryPolicy, StreamChunk,
14
+ )
15
+ from collections.abc import AsyncIterator
16
+
17
+ class MyProvider(LLMProvider):
18
+ PROVIDER_TYPE = ProviderType.OPENAI # reuse an existing slot …
19
+ DEFAULT_MODEL = "my-model-v1"
20
+ CAPABILITIES = ProviderCapabilities(
21
+ supports_streaming=True,
22
+ supports_thinking=False,
23
+ max_context_tokens=32_768,
24
+ provider_type=ProviderType.OPENAI,
25
+ )
26
+
27
+ async def initialize(self) -> None:
28
+ self._client = MySDKClient(api_key=self._config["api_key"])
29
+ self._initialized = True
30
+
31
+ async def shutdown(self) -> None:
32
+ await self._client.aclose()
33
+ self._initialized = False
34
+
35
+ async def _init_client_only(self) -> None:
36
+ if self._initialized:
37
+ return
38
+ self._client = MySDKClient(api_key=self._config["api_key"])
39
+ self._initialized = True
40
+
41
+ async def complete(self, request: LLMRequest) -> LLMResponse:
42
+ ...
43
+
44
+ async def stream(self, request: LLMRequest) -> AsyncIterator[StreamChunk]:
45
+ ...
46
+
47
+ async def health_check(self) -> bool:
48
+ ...
49
+
50
+ def count_tokens(self, text: str) -> int:
51
+ from kitkat._internal.tokenizers import count_tokens_tiktoken
52
+ return count_tokens_tiktoken(text)
53
+ """
54
+
55
+ from __future__ import annotations
56
+
57
+ import asyncio
58
+ import logging
59
+ import time
60
+ from abc import ABC, abstractmethod
61
+ from typing import TYPE_CHECKING, Any
62
+
63
+ if TYPE_CHECKING:
64
+ from collections.abc import AsyncIterator
65
+
66
+ from ..core.enums import ProviderType
67
+
68
+ from .._internal.retry import execute_with_retry
69
+ from ..core.models import (
70
+ LLMRequest,
71
+ LLMResponse,
72
+ Message,
73
+ ProviderCapabilities,
74
+ RetryPolicy,
75
+ StreamChunk,
76
+ )
77
+
78
+ logger = logging.getLogger(__name__)
79
+
80
+
81
+ class LLMProvider(ABC):
82
+ """Abstract base class for all LLM provider implementations.
83
+
84
+ Concrete providers inherit from this class and implement the five
85
+ abstract methods below. The shared helpers (:meth:`complete_with_retry`,
86
+ :meth:`run_sync`, :meth:`_assert_initialized`, …) are provided here so
87
+ providers don't duplicate boilerplate.
88
+
89
+ Lifecycle::
90
+
91
+ async with MyProvider(config) as provider:
92
+ response = await provider.complete(request)
93
+
94
+ Or explicitly::
95
+
96
+ provider = MyProvider(config)
97
+ await provider.initialize()
98
+ try:
99
+ response = await provider.complete(request)
100
+ finally:
101
+ await provider.shutdown()
102
+ """
103
+
104
+ # -- Class-level attributes providers MUST declare --------------------
105
+
106
+ PROVIDER_TYPE: ProviderType
107
+ """Canonical enum value identifying this provider."""
108
+
109
+ DEFAULT_MODEL: str
110
+ """Default model identifier used when :attr:`LLMRequest.model` is empty."""
111
+
112
+ CAPABILITIES: ProviderCapabilities
113
+ """Static feature-flag descriptor queried by the service layer."""
114
+
115
+ RETRY_POLICY: RetryPolicy = RetryPolicy()
116
+ """Default retry policy; concrete providers may override at class level."""
117
+
118
+ # -- Constructor -------------------------------------------------------
119
+
120
+ def __init__(self, config: dict[str, Any]) -> None:
121
+ """Create the provider with a raw configuration dictionary.
122
+
123
+ Args:
124
+ config: Provider-specific key/value pairs (API key, model, etc.).
125
+ Concrete providers typically accept a typed ``*Config``
126
+ dataclass and call ``super().__init__(config.__dict__)``.
127
+ """
128
+ self._config = config
129
+ self._initialized = False
130
+ logger.debug("%s provider created.", self.__class__.__name__)
131
+
132
+ # -- Lifecycle (abstract) ---------------------------------------------
133
+
134
+ @abstractmethod
135
+ async def initialize(self) -> None:
136
+ """Initialize the provider: create the HTTP client and probe credentials.
137
+
138
+ This is the *full* initialization path. Callers using managed keys
139
+ should always prefer this over ``_init_client_only``.
140
+
141
+ Raises:
142
+ LLMProviderInitError: If the provider fails to start due to
143
+ configuration or credential errors.
144
+ """
145
+
146
+ @abstractmethod
147
+ async def shutdown(self) -> None:
148
+ """Gracefully release all resources associated with the provider."""
149
+
150
+ @abstractmethod
151
+ async def _init_client_only(self) -> None:
152
+ """Create the HTTP client *without* running a credential probe.
153
+
154
+ This lightweight initialization path is used by
155
+ :class:`~kitkat.service.byok.BYOKLLMService` for BYOK requests.
156
+ Auth failures surface on the first inference call rather than a
157
+ pre-flight probe, avoiding extra latency and billable requests per
158
+ user key.
159
+
160
+ Concrete implementations must:
161
+
162
+ 1. Guard against double-initialization (idempotent — return early if
163
+ ``self._initialized`` is already ``True``).
164
+ 2. Instantiate the provider-specific async HTTP client.
165
+ 3. Set ``self._initialized = True`` after successful client creation.
166
+
167
+ Raises:
168
+ LLMProviderInitError: If the underlying client cannot be created.
169
+ """
170
+
171
+ # -- Async context manager support ------------------------------------
172
+
173
+ async def __aenter__(self) -> LLMProvider:
174
+ """Initialize the provider upon context entry."""
175
+ await self.initialize()
176
+ return self
177
+
178
+ async def __aexit__(self) -> None:
179
+ """Ensure provider shutdown on context manager exit."""
180
+ await self.shutdown()
181
+
182
+ # -- Core inference (abstract) ----------------------------------------
183
+
184
+ @abstractmethod
185
+ async def complete(self, request: LLMRequest) -> LLMResponse:
186
+ """Execute a single non-streaming completion attempt.
187
+
188
+ This method does **not** apply retry logic. For retry-wrapped
189
+ completion use :meth:`complete_with_retry`.
190
+
191
+ Args:
192
+ request: The generation request.
193
+
194
+ Returns:
195
+ The completed response from the provider.
196
+
197
+ Raises:
198
+ LLMTimeoutError: If the request exceeds the configured timeout.
199
+ LLMRateLimitError: On HTTP 429.
200
+ LLMTokenLimitError: If the prompt exceeds the context window.
201
+ LLMProviderError: On any other provider-side failure.
202
+ """
203
+
204
+ @abstractmethod
205
+ async def stream(self, request: LLMRequest) -> AsyncIterator[StreamChunk]:
206
+ """Yield token deltas as an async stream.
207
+
208
+ Args:
209
+ request: The streaming generation request.
210
+
211
+ Yields:
212
+ :class:`~kitkat.core.models.StreamChunk` objects — one per
213
+ token delta. The final chunk has ``is_final=True`` and
214
+ carries aggregated ``usage``, ``model``, ``provider``,
215
+ ``finish_reason``, and ``latency_ms``.
216
+
217
+ Raises:
218
+ LLMTimeoutError: If the stream connection times out.
219
+ LLMRateLimitError: If rate-limited mid-stream.
220
+ LLMTokenLimitError: If the context window is exceeded.
221
+ LLMProviderError: On any other streaming error.
222
+ """
223
+ # The ``yield`` below satisfies the type-checker's requirement that an
224
+ # ``@abstractmethod`` decorated as ``AsyncIterator`` is a generator.
225
+ # Concrete providers should replace the entire body.
226
+ raise NotImplementedError # pragma: no cover
227
+ yield # type: ignore[misc] # makes this an async generator
228
+
229
+ # -- Health & introspection (abstract) --------------------------------
230
+
231
+ @abstractmethod
232
+ async def health_check(self) -> bool:
233
+ """Perform a lightweight liveness probe.
234
+
235
+ Returns:
236
+ ``True`` if the provider is reachable and credentials are valid.
237
+ """
238
+
239
+ @abstractmethod
240
+ def count_tokens(self, text: str) -> int:
241
+ """Estimate the token count for a piece of text.
242
+
243
+ Providers should delegate to
244
+ :func:`~kitkat._internal.tokenizers.count_tokens_tiktoken`
245
+ or their SDK's native token counter.
246
+
247
+ Args:
248
+ text: The text to evaluate.
249
+
250
+ Returns:
251
+ Estimated token count (≥ 1 for non-empty input).
252
+ """
253
+
254
+ # -- Shared helpers ---------------------------------------------------
255
+
256
+ def count_prompt_tokens(self, messages: list[Message]) -> int:
257
+ """Estimate total token count for a list of messages.
258
+
259
+ Concatenates all message contents with a single-space separator and
260
+ delegates to :meth:`count_tokens`.
261
+
262
+ Args:
263
+ messages: The conversation messages to estimate.
264
+
265
+ Returns:
266
+ Estimated token count, or 0 for an empty list.
267
+ """
268
+ if not messages:
269
+ return 0
270
+ return self.count_tokens(" ".join(m.content for m in messages))
271
+
272
+ def _assert_initialized(self) -> None:
273
+ """Raise if the provider has not been initialized.
274
+
275
+ Raises:
276
+ RuntimeError: If :meth:`initialize` (or :meth:`_init_client_only`)
277
+ has not been successfully called.
278
+ """
279
+ if not self._initialized:
280
+ raise RuntimeError(
281
+ f"{self.__class__.__name__}.initialize() must be called "
282
+ "before making inference requests. Use the async context manager."
283
+ )
284
+
285
+ def _build_base_response_kwargs(
286
+ self,
287
+ request: LLMRequest, # noqa: ARG002 (kept for API compatibility)
288
+ start_time: float,
289
+ ) -> dict[str, Any]:
290
+ """Build common tracing fields for every response.
291
+
292
+ Args:
293
+ request: The original :class:`~kitkat.core.models.LLMRequest`.
294
+ start_time: Monotonic clock value recorded before the API call.
295
+
296
+ Returns:
297
+ Dict with ``provider`` and ``latency_ms`` keys ready to unpack
298
+ into :class:`~kitkat.core.models.LLMResponse`.
299
+ """
300
+ return {
301
+ "provider": self.PROVIDER_TYPE,
302
+ "latency_ms": (time.monotonic() - start_time) * 1_000,
303
+ }
304
+
305
+ async def complete_with_retry(
306
+ self,
307
+ request: LLMRequest,
308
+ *,
309
+ policy: RetryPolicy | None = None,
310
+ ) -> LLMResponse:
311
+ """Execute a completion request with exponential back-off retry.
312
+
313
+ Delegates to :func:`~kitkat._internal.retry.execute_with_retry`,
314
+ which handles non-retriable errors (auth, token limit, content
315
+ filter) by re-raising immediately.
316
+
317
+ Args:
318
+ request: The completion request.
319
+ policy: Override the provider's class-level ``RETRY_POLICY``.
320
+
321
+ Returns:
322
+ The completed response after a successful attempt.
323
+
324
+ Raises:
325
+ LLMTimeoutError: If all retries time out.
326
+ LLMRateLimitError: If all rate-limit retries are exhausted.
327
+ LLMProviderError: On unrecoverable provider errors.
328
+ """
329
+ p = policy or getattr(self, "RETRY_POLICY", RetryPolicy())
330
+ return await execute_with_retry(
331
+ func=lambda: self.complete(request),
332
+ policy=p,
333
+ provider_name=self.__class__.__name__,
334
+ )
335
+
336
+ def run_sync(self, request: LLMRequest) -> LLMResponse:
337
+ """Execute a completion synchronously (blocks the calling thread).
338
+
339
+ Useful for scripts and tests that do not run inside an asyncio event
340
+ loop. **Do not call from within a running loop** — use
341
+ ``await provider.complete(request)`` instead.
342
+
343
+ Args:
344
+ request: The request to send to the provider.
345
+
346
+ Returns:
347
+ The provider response.
348
+
349
+ Raises:
350
+ RuntimeError: If called from within a running asyncio event loop.
351
+ """
352
+ try:
353
+ asyncio.get_running_loop()
354
+ except RuntimeError:
355
+ pass # No running loop — safe to proceed
356
+ else:
357
+ raise RuntimeError(
358
+ "run_sync() cannot be called from within a running event loop. "
359
+ "Use 'await provider.complete(request)' instead."
360
+ )
361
+ return asyncio.run(self.complete(request))
362
+
363
+ # -- Representation ---------------------------------------------------
364
+
365
+ def __repr__(self) -> str:
366
+ status = "ready" if self._initialized else "uninitialised"
367
+ provider_type = getattr(self, "PROVIDER_TYPE", "unknown")
368
+ model = getattr(self, "DEFAULT_MODEL", "unknown")
369
+ return (
370
+ f"<{self.__class__.__name__} "
371
+ f"provider={getattr(provider_type, 'value', provider_type)!r} "
372
+ f"model={model!r} "
373
+ f"status={status}>"
374
+ )
@@ -0,0 +1,61 @@
1
+ """Core layer public API.
2
+
3
+ Zero-dependency foundation — no provider SDK imports, no optional extras.
4
+ Every other module in the library imports from here.
5
+
6
+ Usage::
7
+
8
+ from kitkat.core import LLMRequest, LLMResponse, Role
9
+ from kitkat.core import LLMAuthenticationError, LLMRateLimitError
10
+ """
11
+
12
+ from .enums import FinishReason, ProviderType, Role
13
+ from .exceptions import (
14
+ KitkatError,
15
+ LLMAuthenticationError,
16
+ LLMContentFilterError,
17
+ LLMError,
18
+ LLMProviderError,
19
+ LLMProviderInitError,
20
+ LLMRateLimitError,
21
+ LLMTimeoutError,
22
+ LLMTokenLimitError,
23
+ )
24
+ from .models import (
25
+ LLMRequest,
26
+ LLMResponse,
27
+ Message,
28
+ ProviderCapabilities,
29
+ ProviderCapabilitiesModel,
30
+ RetryPolicy,
31
+ StreamChunk,
32
+ ThinkingConfig,
33
+ TokenUsage,
34
+ )
35
+
36
+ __all__ = [
37
+ # Enums
38
+ "Role",
39
+ "FinishReason",
40
+ "ProviderType",
41
+ # Models
42
+ "Message",
43
+ "ThinkingConfig",
44
+ "LLMRequest",
45
+ "LLMResponse",
46
+ "StreamChunk",
47
+ "TokenUsage",
48
+ "RetryPolicy",
49
+ "ProviderCapabilities",
50
+ "ProviderCapabilitiesModel",
51
+ # Exceptions
52
+ "KitkatError",
53
+ "LLMError",
54
+ "LLMProviderError",
55
+ "LLMProviderInitError",
56
+ "LLMAuthenticationError",
57
+ "LLMRateLimitError",
58
+ "LLMTokenLimitError",
59
+ "LLMTimeoutError",
60
+ "LLMContentFilterError",
61
+ ]