kitkat 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kitkat/__init__.py +0 -0
- kitkat/_internal/__init__.py +6 -0
- kitkat/_internal/http.py +47 -0
- kitkat/_internal/retry.py +102 -0
- kitkat/_internal/tokenizers.py +76 -0
- kitkat/abc/__init__.py +11 -0
- kitkat/abc/provider.py +374 -0
- kitkat/core/__init__.py +61 -0
- kitkat/core/enums.py +36 -0
- kitkat/core/exceptions.py +192 -0
- kitkat/core/models.py +217 -0
- kitkat/core/schemas.py +674 -0
- kitkat/exceptions.py +33 -0
- kitkat/providers/__init__.py +13 -0
- kitkat/providers/_registry.py +123 -0
- kitkat/providers/anthropic/__init__.py +33 -0
- kitkat/providers/anthropic/provider.py +818 -0
- kitkat/providers/gemini/__init__.py +36 -0
- kitkat/providers/gemini/provider.py +742 -0
- kitkat/providers/openai/__init__.py +37 -0
- kitkat/providers/openai/provider.py +891 -0
- kitkat/py.typed +0 -0
- kitkat/service/byok.py +215 -0
- kitkat/service/service.py +290 -0
- kitkat-0.1.0.dist-info/METADATA +473 -0
- kitkat-0.1.0.dist-info/RECORD +29 -0
- kitkat-0.1.0.dist-info/WHEEL +4 -0
- kitkat-0.1.0.dist-info/entry_points.txt +4 -0
- kitkat-0.1.0.dist-info/licenses/LICENSE +21 -0
kitkat/__init__.py
ADDED
|
File without changes
|
kitkat/_internal/http.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Shared httpx async-client configuration.
|
|
2
|
+
|
|
3
|
+
All providers that use httpx directly should build their clients through
|
|
4
|
+
:func:`build_async_client` so connection-pool settings, timeout defaults,
|
|
5
|
+
and the ``User-Agent`` header are consistent across the library.
|
|
6
|
+
|
|
7
|
+
Provider SDKs (anthropic, openai, google-genai) manage their own HTTP
|
|
8
|
+
transport internally, so this factory is primarily useful for custom
|
|
9
|
+
providers or future internal HTTP callers.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
15
|
+
|
|
16
|
+
import httpx
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
_LIB_VERSION = version("kitkat")
|
|
20
|
+
except PackageNotFoundError:
|
|
21
|
+
_LIB_VERSION = "dev"
|
|
22
|
+
|
|
23
|
+
_USER_AGENT = f"kitkat/{_LIB_VERSION} httpx/{httpx.__version__}"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def build_async_client(
|
|
27
|
+
base_url: str = "",
|
|
28
|
+
timeout: float = 120.0,
|
|
29
|
+
**kwargs: object,
|
|
30
|
+
) -> httpx.AsyncClient:
|
|
31
|
+
"""Create a pre-configured :class:`httpx.AsyncClient`.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
base_url: Optional base URL prefix applied to all requests.
|
|
35
|
+
timeout: Default request timeout in seconds.
|
|
36
|
+
**kwargs: Additional keyword arguments forwarded to
|
|
37
|
+
:class:`httpx.AsyncClient`.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
A ready-to-use async HTTP client with library defaults applied.
|
|
41
|
+
"""
|
|
42
|
+
return httpx.AsyncClient(
|
|
43
|
+
base_url=base_url,
|
|
44
|
+
timeout=httpx.Timeout(timeout),
|
|
45
|
+
headers={"User-Agent": _USER_AGENT},
|
|
46
|
+
**kwargs, # type: ignore[arg-type]
|
|
47
|
+
)
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""Shared retry logic for all provider implementations.
|
|
2
|
+
|
|
3
|
+
Provider subclasses call :func:`execute_with_retry` from within
|
|
4
|
+
``complete_with_retry()``. This ensures consistent exponential back-off
|
|
5
|
+
behaviour regardless of which provider is in use.
|
|
6
|
+
|
|
7
|
+
Non-retriable errors (auth, token-limit, content-filter) are re-raised
|
|
8
|
+
immediately without sleeping, so callers never wait on deterministic
|
|
9
|
+
failures.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import asyncio
|
|
15
|
+
import logging
|
|
16
|
+
from typing import TYPE_CHECKING, TypeVar
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from collections.abc import Callable, Coroutine
|
|
20
|
+
|
|
21
|
+
from ..core.models import RetryPolicy
|
|
22
|
+
|
|
23
|
+
from ..core.exceptions import (
|
|
24
|
+
LLMAuthenticationError,
|
|
25
|
+
LLMContentFilterError,
|
|
26
|
+
LLMError,
|
|
27
|
+
LLMRateLimitError,
|
|
28
|
+
LLMTokenLimitError,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
T = TypeVar("T")
|
|
32
|
+
logger = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
# Errors that must never be retried
|
|
35
|
+
_NON_RETRIABLE = (LLMAuthenticationError, LLMTokenLimitError, LLMContentFilterError)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
async def execute_with_retry(
|
|
39
|
+
func: Callable[[], Coroutine[None, None, T]],
|
|
40
|
+
policy: RetryPolicy,
|
|
41
|
+
provider_name: str,
|
|
42
|
+
) -> T:
|
|
43
|
+
"""Execute an async callable with exponential back-off retry.
|
|
44
|
+
|
|
45
|
+
Retries on :exc:`~kitkat.core.exceptions.LLMRateLimitError` and
|
|
46
|
+
generic :exc:`~kitkat.core.exceptions.LLMError`. Raises immediately
|
|
47
|
+
on non-retriable errors (authentication, token limit, content filter).
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
func: Zero-argument async callable that performs one inference attempt.
|
|
51
|
+
policy: Retry configuration (attempts, delays, jitter).
|
|
52
|
+
provider_name: Used in log messages to identify the provider.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
The return value of *func* on a successful attempt.
|
|
56
|
+
|
|
57
|
+
Raises:
|
|
58
|
+
LLMAuthenticationError: Immediately — credentials are invalid.
|
|
59
|
+
LLMTokenLimitError: Immediately — prompt is deterministically too long.
|
|
60
|
+
LLMContentFilterError: Immediately — content policy violation.
|
|
61
|
+
LLMRateLimitError: After all retry attempts are exhausted.
|
|
62
|
+
LLMError: After all retry attempts are exhausted for other errors.
|
|
63
|
+
"""
|
|
64
|
+
last_exc: Exception | None = None
|
|
65
|
+
|
|
66
|
+
for attempt in range(policy.max_attempts):
|
|
67
|
+
try:
|
|
68
|
+
return await func()
|
|
69
|
+
|
|
70
|
+
except _NON_RETRIABLE:
|
|
71
|
+
raise # Deterministic failure — skip retries entirely
|
|
72
|
+
|
|
73
|
+
except LLMRateLimitError as exc:
|
|
74
|
+
wait = exc.retry_after_s or policy.delay_for_attempt(attempt)
|
|
75
|
+
logger.warning(
|
|
76
|
+
"[%s] Rate limited. Waiting %.1fs (attempt %d/%d).",
|
|
77
|
+
provider_name,
|
|
78
|
+
wait,
|
|
79
|
+
attempt + 1,
|
|
80
|
+
policy.max_attempts,
|
|
81
|
+
)
|
|
82
|
+
last_exc = exc
|
|
83
|
+
if attempt < policy.max_attempts - 1:
|
|
84
|
+
await asyncio.sleep(wait)
|
|
85
|
+
|
|
86
|
+
except LLMError as exc:
|
|
87
|
+
wait = policy.delay_for_attempt(attempt)
|
|
88
|
+
logger.warning(
|
|
89
|
+
"[%s] Provider error: %s. Waiting %.1fs (attempt %d/%d).",
|
|
90
|
+
provider_name,
|
|
91
|
+
exc,
|
|
92
|
+
wait,
|
|
93
|
+
attempt + 1,
|
|
94
|
+
policy.max_attempts,
|
|
95
|
+
)
|
|
96
|
+
last_exc = exc
|
|
97
|
+
if attempt < policy.max_attempts - 1:
|
|
98
|
+
await asyncio.sleep(wait)
|
|
99
|
+
|
|
100
|
+
# All attempts exhausted.
|
|
101
|
+
assert last_exc is not None, "execute_with_retry exited without an exception set"
|
|
102
|
+
raise last_exc
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Shared token-counting utilities.
|
|
2
|
+
|
|
3
|
+
Provides a tiktoken-based counter with a conservative character-ratio
|
|
4
|
+
fallback for models not yet supported by tiktoken (e.g. Gemini variants).
|
|
5
|
+
|
|
6
|
+
All provider ``count_tokens()`` implementations should delegate here so the
|
|
7
|
+
behaviour is consistent across the library.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
# Conservative approximation: 4 chars ≈ 1 token (valid for most Latin-script
|
|
17
|
+
# text with GPT-style BPE tokenisers).
|
|
18
|
+
_CHARS_PER_TOKEN: float = 4.0
|
|
19
|
+
|
|
20
|
+
# Sentinel that is stored on first failed tiktoken load so we never try again
|
|
21
|
+
# in the same process (avoids repeated BPE-download attempts in air-gapped envs).
|
|
22
|
+
_TIKTOKEN_UNAVAILABLE = object()
|
|
23
|
+
|
|
24
|
+
# Cache per encoding name so we only call get_encoding() once.
|
|
25
|
+
_ENCODER_CACHE: dict[str, object] = {}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def count_tokens_tiktoken(text: str, encoding_name: str = "cl100k_base") -> int:
|
|
29
|
+
"""Count tokens using tiktoken, with a char-ratio fallback.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
text: The text to tokenise.
|
|
33
|
+
encoding_name: The tiktoken BPE encoding to use.
|
|
34
|
+
``cl100k_base`` covers GPT-4 / GPT-3.5 and approximates
|
|
35
|
+
Anthropic Claude tokenisation well enough for budgeting.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
Token count (always ≥ 0; 0 for empty input; ≥ 1 for non-empty).
|
|
39
|
+
"""
|
|
40
|
+
if not text:
|
|
41
|
+
return 0
|
|
42
|
+
|
|
43
|
+
enc = _ENCODER_CACHE.get(encoding_name)
|
|
44
|
+
if enc is None:
|
|
45
|
+
try:
|
|
46
|
+
import tiktoken
|
|
47
|
+
|
|
48
|
+
enc = tiktoken.get_encoding(encoding_name)
|
|
49
|
+
_ENCODER_CACHE[encoding_name] = enc
|
|
50
|
+
except Exception as exc:
|
|
51
|
+
logger.warning(
|
|
52
|
+
"tiktoken BPE load failed (%s); falling back to "
|
|
53
|
+
"character-based token estimate (4 chars ≈ 1 token).",
|
|
54
|
+
exc,
|
|
55
|
+
)
|
|
56
|
+
_ENCODER_CACHE[encoding_name] = _TIKTOKEN_UNAVAILABLE
|
|
57
|
+
enc = _TIKTOKEN_UNAVAILABLE
|
|
58
|
+
|
|
59
|
+
if enc is _TIKTOKEN_UNAVAILABLE:
|
|
60
|
+
return count_tokens_fallback(text)
|
|
61
|
+
|
|
62
|
+
return max(1, len(enc.encode(text))) # type: ignore[union-attr]
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def count_tokens_fallback(text: str) -> int:
|
|
66
|
+
"""Character-ratio token estimate for models without tiktoken support.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
text: The text to estimate.
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
Estimated token count (0 for empty input; ≥ 1 for non-empty).
|
|
73
|
+
"""
|
|
74
|
+
if not text:
|
|
75
|
+
return 0
|
|
76
|
+
return max(1, round(len(text) / _CHARS_PER_TOKEN))
|
kitkat/abc/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""kitkat.abc — abstract base classes for the library.
|
|
2
|
+
|
|
3
|
+
The only stable public export is :class:`~kitkat.abc.provider.LLMProvider`.
|
|
4
|
+
Third-party providers should import from here::
|
|
5
|
+
|
|
6
|
+
from kitkat.abc import LLMProvider
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from .provider import LLMProvider
|
|
10
|
+
|
|
11
|
+
__all__ = ["LLMProvider"]
|
kitkat/abc/provider.py
ADDED
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
"""The LLMProvider abstract base class.
|
|
2
|
+
|
|
3
|
+
Every concrete provider (Anthropic, OpenAI, Gemini, or a custom third-party
|
|
4
|
+
provider) must sub-class :class:`LLMProvider` and implement all abstract
|
|
5
|
+
methods. The library's service layer works exclusively with this ABC — it
|
|
6
|
+
never imports concrete provider classes directly.
|
|
7
|
+
|
|
8
|
+
Implementing a custom provider::
|
|
9
|
+
|
|
10
|
+
from kitkat.abc import LLMProvider
|
|
11
|
+
from kitkat.core import (
|
|
12
|
+
LLMRequest, LLMResponse, ProviderCapabilities, ProviderType,
|
|
13
|
+
RetryPolicy, StreamChunk,
|
|
14
|
+
)
|
|
15
|
+
from collections.abc import AsyncIterator
|
|
16
|
+
|
|
17
|
+
class MyProvider(LLMProvider):
|
|
18
|
+
PROVIDER_TYPE = ProviderType.OPENAI # reuse an existing slot …
|
|
19
|
+
DEFAULT_MODEL = "my-model-v1"
|
|
20
|
+
CAPABILITIES = ProviderCapabilities(
|
|
21
|
+
supports_streaming=True,
|
|
22
|
+
supports_thinking=False,
|
|
23
|
+
max_context_tokens=32_768,
|
|
24
|
+
provider_type=ProviderType.OPENAI,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
async def initialize(self) -> None:
|
|
28
|
+
self._client = MySDKClient(api_key=self._config["api_key"])
|
|
29
|
+
self._initialized = True
|
|
30
|
+
|
|
31
|
+
async def shutdown(self) -> None:
|
|
32
|
+
await self._client.aclose()
|
|
33
|
+
self._initialized = False
|
|
34
|
+
|
|
35
|
+
async def _init_client_only(self) -> None:
|
|
36
|
+
if self._initialized:
|
|
37
|
+
return
|
|
38
|
+
self._client = MySDKClient(api_key=self._config["api_key"])
|
|
39
|
+
self._initialized = True
|
|
40
|
+
|
|
41
|
+
async def complete(self, request: LLMRequest) -> LLMResponse:
|
|
42
|
+
...
|
|
43
|
+
|
|
44
|
+
async def stream(self, request: LLMRequest) -> AsyncIterator[StreamChunk]:
|
|
45
|
+
...
|
|
46
|
+
|
|
47
|
+
async def health_check(self) -> bool:
|
|
48
|
+
...
|
|
49
|
+
|
|
50
|
+
def count_tokens(self, text: str) -> int:
|
|
51
|
+
from kitkat._internal.tokenizers import count_tokens_tiktoken
|
|
52
|
+
return count_tokens_tiktoken(text)
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
from __future__ import annotations
|
|
56
|
+
|
|
57
|
+
import asyncio
|
|
58
|
+
import logging
|
|
59
|
+
import time
|
|
60
|
+
from abc import ABC, abstractmethod
|
|
61
|
+
from typing import TYPE_CHECKING, Any
|
|
62
|
+
|
|
63
|
+
if TYPE_CHECKING:
|
|
64
|
+
from collections.abc import AsyncIterator
|
|
65
|
+
|
|
66
|
+
from ..core.enums import ProviderType
|
|
67
|
+
|
|
68
|
+
from .._internal.retry import execute_with_retry
|
|
69
|
+
from ..core.models import (
|
|
70
|
+
LLMRequest,
|
|
71
|
+
LLMResponse,
|
|
72
|
+
Message,
|
|
73
|
+
ProviderCapabilities,
|
|
74
|
+
RetryPolicy,
|
|
75
|
+
StreamChunk,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
logger = logging.getLogger(__name__)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class LLMProvider(ABC):
|
|
82
|
+
"""Abstract base class for all LLM provider implementations.
|
|
83
|
+
|
|
84
|
+
Concrete providers inherit from this class and implement the five
|
|
85
|
+
abstract methods below. The shared helpers (:meth:`complete_with_retry`,
|
|
86
|
+
:meth:`run_sync`, :meth:`_assert_initialized`, …) are provided here so
|
|
87
|
+
providers don't duplicate boilerplate.
|
|
88
|
+
|
|
89
|
+
Lifecycle::
|
|
90
|
+
|
|
91
|
+
async with MyProvider(config) as provider:
|
|
92
|
+
response = await provider.complete(request)
|
|
93
|
+
|
|
94
|
+
Or explicitly::
|
|
95
|
+
|
|
96
|
+
provider = MyProvider(config)
|
|
97
|
+
await provider.initialize()
|
|
98
|
+
try:
|
|
99
|
+
response = await provider.complete(request)
|
|
100
|
+
finally:
|
|
101
|
+
await provider.shutdown()
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
# -- Class-level attributes providers MUST declare --------------------
|
|
105
|
+
|
|
106
|
+
PROVIDER_TYPE: ProviderType
|
|
107
|
+
"""Canonical enum value identifying this provider."""
|
|
108
|
+
|
|
109
|
+
DEFAULT_MODEL: str
|
|
110
|
+
"""Default model identifier used when :attr:`LLMRequest.model` is empty."""
|
|
111
|
+
|
|
112
|
+
CAPABILITIES: ProviderCapabilities
|
|
113
|
+
"""Static feature-flag descriptor queried by the service layer."""
|
|
114
|
+
|
|
115
|
+
RETRY_POLICY: RetryPolicy = RetryPolicy()
|
|
116
|
+
"""Default retry policy; concrete providers may override at class level."""
|
|
117
|
+
|
|
118
|
+
# -- Constructor -------------------------------------------------------
|
|
119
|
+
|
|
120
|
+
def __init__(self, config: dict[str, Any]) -> None:
|
|
121
|
+
"""Create the provider with a raw configuration dictionary.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
config: Provider-specific key/value pairs (API key, model, etc.).
|
|
125
|
+
Concrete providers typically accept a typed ``*Config``
|
|
126
|
+
dataclass and call ``super().__init__(config.__dict__)``.
|
|
127
|
+
"""
|
|
128
|
+
self._config = config
|
|
129
|
+
self._initialized = False
|
|
130
|
+
logger.debug("%s provider created.", self.__class__.__name__)
|
|
131
|
+
|
|
132
|
+
# -- Lifecycle (abstract) ---------------------------------------------
|
|
133
|
+
|
|
134
|
+
@abstractmethod
|
|
135
|
+
async def initialize(self) -> None:
|
|
136
|
+
"""Initialize the provider: create the HTTP client and probe credentials.
|
|
137
|
+
|
|
138
|
+
This is the *full* initialization path. Callers using managed keys
|
|
139
|
+
should always prefer this over ``_init_client_only``.
|
|
140
|
+
|
|
141
|
+
Raises:
|
|
142
|
+
LLMProviderInitError: If the provider fails to start due to
|
|
143
|
+
configuration or credential errors.
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
@abstractmethod
|
|
147
|
+
async def shutdown(self) -> None:
|
|
148
|
+
"""Gracefully release all resources associated with the provider."""
|
|
149
|
+
|
|
150
|
+
@abstractmethod
|
|
151
|
+
async def _init_client_only(self) -> None:
|
|
152
|
+
"""Create the HTTP client *without* running a credential probe.
|
|
153
|
+
|
|
154
|
+
This lightweight initialization path is used by
|
|
155
|
+
:class:`~kitkat.service.byok.BYOKLLMService` for BYOK requests.
|
|
156
|
+
Auth failures surface on the first inference call rather than a
|
|
157
|
+
pre-flight probe, avoiding extra latency and billable requests per
|
|
158
|
+
user key.
|
|
159
|
+
|
|
160
|
+
Concrete implementations must:
|
|
161
|
+
|
|
162
|
+
1. Guard against double-initialization (idempotent — return early if
|
|
163
|
+
``self._initialized`` is already ``True``).
|
|
164
|
+
2. Instantiate the provider-specific async HTTP client.
|
|
165
|
+
3. Set ``self._initialized = True`` after successful client creation.
|
|
166
|
+
|
|
167
|
+
Raises:
|
|
168
|
+
LLMProviderInitError: If the underlying client cannot be created.
|
|
169
|
+
"""
|
|
170
|
+
|
|
171
|
+
# -- Async context manager support ------------------------------------
|
|
172
|
+
|
|
173
|
+
async def __aenter__(self) -> LLMProvider:
|
|
174
|
+
"""Initialize the provider upon context entry."""
|
|
175
|
+
await self.initialize()
|
|
176
|
+
return self
|
|
177
|
+
|
|
178
|
+
async def __aexit__(self) -> None:
|
|
179
|
+
"""Ensure provider shutdown on context manager exit."""
|
|
180
|
+
await self.shutdown()
|
|
181
|
+
|
|
182
|
+
# -- Core inference (abstract) ----------------------------------------
|
|
183
|
+
|
|
184
|
+
@abstractmethod
|
|
185
|
+
async def complete(self, request: LLMRequest) -> LLMResponse:
|
|
186
|
+
"""Execute a single non-streaming completion attempt.
|
|
187
|
+
|
|
188
|
+
This method does **not** apply retry logic. For retry-wrapped
|
|
189
|
+
completion use :meth:`complete_with_retry`.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
request: The generation request.
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
The completed response from the provider.
|
|
196
|
+
|
|
197
|
+
Raises:
|
|
198
|
+
LLMTimeoutError: If the request exceeds the configured timeout.
|
|
199
|
+
LLMRateLimitError: On HTTP 429.
|
|
200
|
+
LLMTokenLimitError: If the prompt exceeds the context window.
|
|
201
|
+
LLMProviderError: On any other provider-side failure.
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
@abstractmethod
|
|
205
|
+
async def stream(self, request: LLMRequest) -> AsyncIterator[StreamChunk]:
|
|
206
|
+
"""Yield token deltas as an async stream.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
request: The streaming generation request.
|
|
210
|
+
|
|
211
|
+
Yields:
|
|
212
|
+
:class:`~kitkat.core.models.StreamChunk` objects — one per
|
|
213
|
+
token delta. The final chunk has ``is_final=True`` and
|
|
214
|
+
carries aggregated ``usage``, ``model``, ``provider``,
|
|
215
|
+
``finish_reason``, and ``latency_ms``.
|
|
216
|
+
|
|
217
|
+
Raises:
|
|
218
|
+
LLMTimeoutError: If the stream connection times out.
|
|
219
|
+
LLMRateLimitError: If rate-limited mid-stream.
|
|
220
|
+
LLMTokenLimitError: If the context window is exceeded.
|
|
221
|
+
LLMProviderError: On any other streaming error.
|
|
222
|
+
"""
|
|
223
|
+
# The ``yield`` below satisfies the type-checker's requirement that an
|
|
224
|
+
# ``@abstractmethod`` decorated as ``AsyncIterator`` is a generator.
|
|
225
|
+
# Concrete providers should replace the entire body.
|
|
226
|
+
raise NotImplementedError # pragma: no cover
|
|
227
|
+
yield # type: ignore[misc] # makes this an async generator
|
|
228
|
+
|
|
229
|
+
# -- Health & introspection (abstract) --------------------------------
|
|
230
|
+
|
|
231
|
+
@abstractmethod
|
|
232
|
+
async def health_check(self) -> bool:
|
|
233
|
+
"""Perform a lightweight liveness probe.
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
``True`` if the provider is reachable and credentials are valid.
|
|
237
|
+
"""
|
|
238
|
+
|
|
239
|
+
@abstractmethod
|
|
240
|
+
def count_tokens(self, text: str) -> int:
|
|
241
|
+
"""Estimate the token count for a piece of text.
|
|
242
|
+
|
|
243
|
+
Providers should delegate to
|
|
244
|
+
:func:`~kitkat._internal.tokenizers.count_tokens_tiktoken`
|
|
245
|
+
or their SDK's native token counter.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
text: The text to evaluate.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
Estimated token count (≥ 1 for non-empty input).
|
|
252
|
+
"""
|
|
253
|
+
|
|
254
|
+
# -- Shared helpers ---------------------------------------------------
|
|
255
|
+
|
|
256
|
+
def count_prompt_tokens(self, messages: list[Message]) -> int:
|
|
257
|
+
"""Estimate total token count for a list of messages.
|
|
258
|
+
|
|
259
|
+
Concatenates all message contents with a single-space separator and
|
|
260
|
+
delegates to :meth:`count_tokens`.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
messages: The conversation messages to estimate.
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
Estimated token count, or 0 for an empty list.
|
|
267
|
+
"""
|
|
268
|
+
if not messages:
|
|
269
|
+
return 0
|
|
270
|
+
return self.count_tokens(" ".join(m.content for m in messages))
|
|
271
|
+
|
|
272
|
+
def _assert_initialized(self) -> None:
|
|
273
|
+
"""Raise if the provider has not been initialized.
|
|
274
|
+
|
|
275
|
+
Raises:
|
|
276
|
+
RuntimeError: If :meth:`initialize` (or :meth:`_init_client_only`)
|
|
277
|
+
has not been successfully called.
|
|
278
|
+
"""
|
|
279
|
+
if not self._initialized:
|
|
280
|
+
raise RuntimeError(
|
|
281
|
+
f"{self.__class__.__name__}.initialize() must be called "
|
|
282
|
+
"before making inference requests. Use the async context manager."
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
def _build_base_response_kwargs(
|
|
286
|
+
self,
|
|
287
|
+
request: LLMRequest, # noqa: ARG002 (kept for API compatibility)
|
|
288
|
+
start_time: float,
|
|
289
|
+
) -> dict[str, Any]:
|
|
290
|
+
"""Build common tracing fields for every response.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
request: The original :class:`~kitkat.core.models.LLMRequest`.
|
|
294
|
+
start_time: Monotonic clock value recorded before the API call.
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
Dict with ``provider`` and ``latency_ms`` keys ready to unpack
|
|
298
|
+
into :class:`~kitkat.core.models.LLMResponse`.
|
|
299
|
+
"""
|
|
300
|
+
return {
|
|
301
|
+
"provider": self.PROVIDER_TYPE,
|
|
302
|
+
"latency_ms": (time.monotonic() - start_time) * 1_000,
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
async def complete_with_retry(
|
|
306
|
+
self,
|
|
307
|
+
request: LLMRequest,
|
|
308
|
+
*,
|
|
309
|
+
policy: RetryPolicy | None = None,
|
|
310
|
+
) -> LLMResponse:
|
|
311
|
+
"""Execute a completion request with exponential back-off retry.
|
|
312
|
+
|
|
313
|
+
Delegates to :func:`~kitkat._internal.retry.execute_with_retry`,
|
|
314
|
+
which handles non-retriable errors (auth, token limit, content
|
|
315
|
+
filter) by re-raising immediately.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
request: The completion request.
|
|
319
|
+
policy: Override the provider's class-level ``RETRY_POLICY``.
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
The completed response after a successful attempt.
|
|
323
|
+
|
|
324
|
+
Raises:
|
|
325
|
+
LLMTimeoutError: If all retries time out.
|
|
326
|
+
LLMRateLimitError: If all rate-limit retries are exhausted.
|
|
327
|
+
LLMProviderError: On unrecoverable provider errors.
|
|
328
|
+
"""
|
|
329
|
+
p = policy or getattr(self, "RETRY_POLICY", RetryPolicy())
|
|
330
|
+
return await execute_with_retry(
|
|
331
|
+
func=lambda: self.complete(request),
|
|
332
|
+
policy=p,
|
|
333
|
+
provider_name=self.__class__.__name__,
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
def run_sync(self, request: LLMRequest) -> LLMResponse:
|
|
337
|
+
"""Execute a completion synchronously (blocks the calling thread).
|
|
338
|
+
|
|
339
|
+
Useful for scripts and tests that do not run inside an asyncio event
|
|
340
|
+
loop. **Do not call from within a running loop** — use
|
|
341
|
+
``await provider.complete(request)`` instead.
|
|
342
|
+
|
|
343
|
+
Args:
|
|
344
|
+
request: The request to send to the provider.
|
|
345
|
+
|
|
346
|
+
Returns:
|
|
347
|
+
The provider response.
|
|
348
|
+
|
|
349
|
+
Raises:
|
|
350
|
+
RuntimeError: If called from within a running asyncio event loop.
|
|
351
|
+
"""
|
|
352
|
+
try:
|
|
353
|
+
asyncio.get_running_loop()
|
|
354
|
+
except RuntimeError:
|
|
355
|
+
pass # No running loop — safe to proceed
|
|
356
|
+
else:
|
|
357
|
+
raise RuntimeError(
|
|
358
|
+
"run_sync() cannot be called from within a running event loop. "
|
|
359
|
+
"Use 'await provider.complete(request)' instead."
|
|
360
|
+
)
|
|
361
|
+
return asyncio.run(self.complete(request))
|
|
362
|
+
|
|
363
|
+
# -- Representation ---------------------------------------------------
|
|
364
|
+
|
|
365
|
+
def __repr__(self) -> str:
|
|
366
|
+
status = "ready" if self._initialized else "uninitialised"
|
|
367
|
+
provider_type = getattr(self, "PROVIDER_TYPE", "unknown")
|
|
368
|
+
model = getattr(self, "DEFAULT_MODEL", "unknown")
|
|
369
|
+
return (
|
|
370
|
+
f"<{self.__class__.__name__} "
|
|
371
|
+
f"provider={getattr(provider_type, 'value', provider_type)!r} "
|
|
372
|
+
f"model={model!r} "
|
|
373
|
+
f"status={status}>"
|
|
374
|
+
)
|
kitkat/core/__init__.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Core layer public API.
|
|
2
|
+
|
|
3
|
+
Zero-dependency foundation — no provider SDK imports, no optional extras.
|
|
4
|
+
Every other module in the library imports from here.
|
|
5
|
+
|
|
6
|
+
Usage::
|
|
7
|
+
|
|
8
|
+
from kitkat.core import LLMRequest, LLMResponse, Role
|
|
9
|
+
from kitkat.core import LLMAuthenticationError, LLMRateLimitError
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from .enums import FinishReason, ProviderType, Role
|
|
13
|
+
from .exceptions import (
|
|
14
|
+
KitkatError,
|
|
15
|
+
LLMAuthenticationError,
|
|
16
|
+
LLMContentFilterError,
|
|
17
|
+
LLMError,
|
|
18
|
+
LLMProviderError,
|
|
19
|
+
LLMProviderInitError,
|
|
20
|
+
LLMRateLimitError,
|
|
21
|
+
LLMTimeoutError,
|
|
22
|
+
LLMTokenLimitError,
|
|
23
|
+
)
|
|
24
|
+
from .models import (
|
|
25
|
+
LLMRequest,
|
|
26
|
+
LLMResponse,
|
|
27
|
+
Message,
|
|
28
|
+
ProviderCapabilities,
|
|
29
|
+
ProviderCapabilitiesModel,
|
|
30
|
+
RetryPolicy,
|
|
31
|
+
StreamChunk,
|
|
32
|
+
ThinkingConfig,
|
|
33
|
+
TokenUsage,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
__all__ = [
|
|
37
|
+
# Enums
|
|
38
|
+
"Role",
|
|
39
|
+
"FinishReason",
|
|
40
|
+
"ProviderType",
|
|
41
|
+
# Models
|
|
42
|
+
"Message",
|
|
43
|
+
"ThinkingConfig",
|
|
44
|
+
"LLMRequest",
|
|
45
|
+
"LLMResponse",
|
|
46
|
+
"StreamChunk",
|
|
47
|
+
"TokenUsage",
|
|
48
|
+
"RetryPolicy",
|
|
49
|
+
"ProviderCapabilities",
|
|
50
|
+
"ProviderCapabilitiesModel",
|
|
51
|
+
# Exceptions
|
|
52
|
+
"KitkatError",
|
|
53
|
+
"LLMError",
|
|
54
|
+
"LLMProviderError",
|
|
55
|
+
"LLMProviderInitError",
|
|
56
|
+
"LLMAuthenticationError",
|
|
57
|
+
"LLMRateLimitError",
|
|
58
|
+
"LLMTokenLimitError",
|
|
59
|
+
"LLMTimeoutError",
|
|
60
|
+
"LLMContentFilterError",
|
|
61
|
+
]
|