model-library 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_library/base/base.py +139 -62
- model_library/base/delegate_only.py +77 -10
- model_library/base/output.py +43 -0
- model_library/base/utils.py +35 -0
- model_library/config/alibaba_models.yaml +44 -57
- model_library/config/all_models.json +253 -126
- model_library/config/kimi_models.yaml +30 -3
- model_library/config/openai_models.yaml +15 -23
- model_library/config/zai_models.yaml +24 -3
- model_library/exceptions.py +3 -77
- model_library/providers/ai21labs.py +12 -8
- model_library/providers/alibaba.py +17 -8
- model_library/providers/amazon.py +49 -16
- model_library/providers/anthropic.py +93 -40
- model_library/providers/azure.py +22 -10
- model_library/providers/cohere.py +7 -7
- model_library/providers/deepseek.py +8 -8
- model_library/providers/fireworks.py +7 -8
- model_library/providers/google/batch.py +14 -10
- model_library/providers/google/google.py +48 -29
- model_library/providers/inception.py +7 -7
- model_library/providers/kimi.py +18 -8
- model_library/providers/minimax.py +15 -17
- model_library/providers/mistral.py +20 -8
- model_library/providers/openai.py +99 -22
- model_library/providers/openrouter.py +34 -0
- model_library/providers/perplexity.py +7 -7
- model_library/providers/together.py +7 -8
- model_library/providers/vals.py +12 -6
- model_library/providers/xai.py +47 -42
- model_library/providers/zai.py +38 -8
- model_library/registry_utils.py +39 -15
- model_library/retriers/__init__.py +0 -0
- model_library/retriers/backoff.py +73 -0
- model_library/retriers/base.py +225 -0
- model_library/retriers/token.py +427 -0
- model_library/retriers/utils.py +11 -0
- model_library/settings.py +1 -1
- model_library/utils.py +13 -0
- {model_library-0.1.7.dist-info → model_library-0.1.8.dist-info}/METADATA +2 -1
- model_library-0.1.8.dist-info/RECORD +70 -0
- {model_library-0.1.7.dist-info → model_library-0.1.8.dist-info}/WHEEL +1 -1
- model_library-0.1.7.dist-info/RECORD +0 -64
- {model_library-0.1.7.dist-info → model_library-0.1.8.dist-info}/licenses/LICENSE +0 -0
- {model_library-0.1.7.dist-info → model_library-0.1.8.dist-info}/top_level.txt +0 -0
model_library/base/base.py
CHANGED
|
@@ -1,9 +1,12 @@
|
|
|
1
|
+
import hashlib
|
|
1
2
|
import io
|
|
2
3
|
import logging
|
|
4
|
+
import threading
|
|
3
5
|
import time
|
|
4
6
|
import uuid
|
|
5
7
|
from abc import ABC, abstractmethod
|
|
6
8
|
from collections.abc import Awaitable
|
|
9
|
+
from math import ceil
|
|
7
10
|
from pprint import pformat
|
|
8
11
|
from typing import (
|
|
9
12
|
Any,
|
|
@@ -14,7 +17,7 @@ from typing import (
|
|
|
14
17
|
)
|
|
15
18
|
|
|
16
19
|
import tiktoken
|
|
17
|
-
from pydantic import model_serializer
|
|
20
|
+
from pydantic import SecretStr, model_serializer
|
|
18
21
|
from pydantic.main import BaseModel
|
|
19
22
|
from tiktoken.core import Encoding
|
|
20
23
|
from typing_extensions import override
|
|
@@ -34,15 +37,15 @@ from model_library.base.output import (
|
|
|
34
37
|
QueryResult,
|
|
35
38
|
QueryResultCost,
|
|
36
39
|
QueryResultMetadata,
|
|
40
|
+
RateLimit,
|
|
37
41
|
)
|
|
38
42
|
from model_library.base.utils import (
|
|
39
43
|
get_pretty_input_types,
|
|
40
44
|
serialize_for_tokenizing,
|
|
41
45
|
)
|
|
42
|
-
from model_library.
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
)
|
|
46
|
+
from model_library.retriers.backoff import ExponentialBackoffRetrier
|
|
47
|
+
from model_library.retriers.base import BaseRetrier, R, RetrierType, retry_decorator
|
|
48
|
+
from model_library.retriers.token import TokenRetrier
|
|
46
49
|
from model_library.utils import truncate_str
|
|
47
50
|
|
|
48
51
|
PydanticT = TypeVar("PydanticT", bound=BaseModel)
|
|
@@ -56,11 +59,18 @@ class ProviderConfig(BaseModel):
|
|
|
56
59
|
return self.__dict__
|
|
57
60
|
|
|
58
61
|
|
|
59
|
-
|
|
62
|
+
class TokenRetryParams(BaseModel):
|
|
63
|
+
input_modifier: float
|
|
64
|
+
output_modifier: float
|
|
65
|
+
|
|
66
|
+
use_dynamic_estimate: bool = True
|
|
67
|
+
|
|
68
|
+
limit: int
|
|
69
|
+
limit_refresh_seconds: Literal[60] = 60
|
|
60
70
|
|
|
61
71
|
|
|
62
72
|
class LLMConfig(BaseModel):
|
|
63
|
-
max_tokens: int =
|
|
73
|
+
max_tokens: int | None = None
|
|
64
74
|
temperature: float | None = None
|
|
65
75
|
top_p: float | None = None
|
|
66
76
|
top_k: int | None = None
|
|
@@ -75,11 +85,18 @@ class LLMConfig(BaseModel):
|
|
|
75
85
|
native: bool = True
|
|
76
86
|
provider_config: ProviderConfig | None = None
|
|
77
87
|
registry_key: str | None = None
|
|
88
|
+
custom_api_key: SecretStr | None = None
|
|
89
|
+
|
|
78
90
|
|
|
91
|
+
class DelegateConfig(BaseModel):
|
|
92
|
+
base_url: str
|
|
93
|
+
api_key: SecretStr
|
|
79
94
|
|
|
80
|
-
RetrierType = Callable[[Callable[..., Awaitable[Any]]], Callable[..., Awaitable[Any]]]
|
|
81
95
|
|
|
82
|
-
|
|
96
|
+
# shared across all subclasses and instances
|
|
97
|
+
# hash(provider + api_key) -> client
|
|
98
|
+
client_registry_lock = threading.Lock()
|
|
99
|
+
client_registry: dict[tuple[str, str], Any] = {}
|
|
83
100
|
|
|
84
101
|
|
|
85
102
|
class LLM(ABC):
|
|
@@ -88,6 +105,34 @@ class LLM(ABC):
|
|
|
88
105
|
LLM call errors should be raised as exceptions
|
|
89
106
|
"""
|
|
90
107
|
|
|
108
|
+
@abstractmethod
|
|
109
|
+
def get_client(self, api_key: str | None = None) -> Any:
|
|
110
|
+
"""
|
|
111
|
+
Returns the cached instance of the appropriate SDK client.
|
|
112
|
+
Sublasses should implement this method and:
|
|
113
|
+
- if api_key is provided, initialize their client and call assing_client(client).
|
|
114
|
+
- else return super().get_client()
|
|
115
|
+
"""
|
|
116
|
+
global client_registry
|
|
117
|
+
return client_registry[self._client_registry_key]
|
|
118
|
+
|
|
119
|
+
def assign_client(self, client: object) -> None:
|
|
120
|
+
"""Thread-safe assignment to the client registry"""
|
|
121
|
+
global client_registry
|
|
122
|
+
|
|
123
|
+
if self._client_registry_key not in client_registry:
|
|
124
|
+
with client_registry_lock:
|
|
125
|
+
if self._client_registry_key not in client_registry:
|
|
126
|
+
client_registry[self._client_registry_key] = client
|
|
127
|
+
|
|
128
|
+
def has_client(self) -> bool:
|
|
129
|
+
return self._client_registry_key in client_registry
|
|
130
|
+
|
|
131
|
+
@abstractmethod
|
|
132
|
+
def _get_default_api_key(self) -> str:
|
|
133
|
+
"""Return the api key from model_library.settings"""
|
|
134
|
+
...
|
|
135
|
+
|
|
91
136
|
def __init__(
|
|
92
137
|
self,
|
|
93
138
|
model_name: str,
|
|
@@ -103,7 +148,7 @@ class LLM(ABC):
|
|
|
103
148
|
config = config or LLMConfig()
|
|
104
149
|
self._registry_key = config.registry_key
|
|
105
150
|
|
|
106
|
-
self.max_tokens: int = config.max_tokens
|
|
151
|
+
self.max_tokens: int | None = config.max_tokens
|
|
107
152
|
self.temperature: float | None = config.temperature
|
|
108
153
|
self.top_p: float | None = config.top_p
|
|
109
154
|
self.top_k: int | None = config.top_k
|
|
@@ -131,21 +176,33 @@ class LLM(ABC):
|
|
|
131
176
|
self.logger: logging.Logger = logging.getLogger(
|
|
132
177
|
f"llm.{provider}.{model_name}<instance={self.instance_id}>"
|
|
133
178
|
)
|
|
134
|
-
self.custom_retrier:
|
|
179
|
+
self.custom_retrier: RetrierType | None = None
|
|
180
|
+
|
|
181
|
+
self.token_retry_params = None
|
|
182
|
+
# set _client_registry_key after initializing delegate
|
|
183
|
+
if not self.native:
|
|
184
|
+
return
|
|
185
|
+
|
|
186
|
+
if config.custom_api_key:
|
|
187
|
+
raw_key = config.custom_api_key.get_secret_value()
|
|
188
|
+
else:
|
|
189
|
+
raw_key = self._get_default_api_key()
|
|
190
|
+
|
|
191
|
+
key_hash = hashlib.sha256(raw_key.encode()).hexdigest()
|
|
192
|
+
self._client_registry_key = (self.provider, key_hash)
|
|
193
|
+
self._client_registry_key_model_specific = (
|
|
194
|
+
f"{self.provider}.{self.model_name}",
|
|
195
|
+
key_hash,
|
|
196
|
+
)
|
|
197
|
+
self.get_client(api_key=raw_key)
|
|
135
198
|
|
|
136
199
|
@override
|
|
137
200
|
def __repr__(self):
|
|
138
201
|
attrs = vars(self).copy()
|
|
139
202
|
attrs.pop("logger", None)
|
|
140
203
|
attrs.pop("custom_retrier", None)
|
|
141
|
-
attrs.pop("_key", None)
|
|
142
204
|
return f"{self.__class__.__name__}(\n{pformat(attrs, indent=2, sort_dicts=False)}\n)"
|
|
143
205
|
|
|
144
|
-
@abstractmethod
|
|
145
|
-
def get_client(self) -> object:
|
|
146
|
-
"""Return the instance of the appropriate SDK client."""
|
|
147
|
-
...
|
|
148
|
-
|
|
149
206
|
@staticmethod
|
|
150
207
|
async def timer_wrapper(func: Callable[[], Awaitable[R]]) -> tuple[R, float]:
|
|
151
208
|
"""
|
|
@@ -155,43 +212,6 @@ class LLM(ABC):
|
|
|
155
212
|
result = await func()
|
|
156
213
|
return result, round(time.perf_counter() - start, 4)
|
|
157
214
|
|
|
158
|
-
@staticmethod
|
|
159
|
-
async def immediate_retry_wrapper(
|
|
160
|
-
func: Callable[[], Awaitable[R]],
|
|
161
|
-
logger: logging.Logger,
|
|
162
|
-
) -> R:
|
|
163
|
-
"""
|
|
164
|
-
Retry the query immediately
|
|
165
|
-
"""
|
|
166
|
-
MAX_IMMEDIATE_RETRIES = 10
|
|
167
|
-
retries = 0
|
|
168
|
-
while True:
|
|
169
|
-
try:
|
|
170
|
-
return await func()
|
|
171
|
-
except ImmediateRetryException as e:
|
|
172
|
-
if retries >= MAX_IMMEDIATE_RETRIES:
|
|
173
|
-
logger.error(f"Query reached max immediate retries {retries}: {e}")
|
|
174
|
-
raise Exception(
|
|
175
|
-
f"Query reached max immediate retries {retries}: {e}"
|
|
176
|
-
) from e
|
|
177
|
-
retries += 1
|
|
178
|
-
|
|
179
|
-
logger.warning(
|
|
180
|
-
f"Query retried immediately {retries}/{MAX_IMMEDIATE_RETRIES}: {e}"
|
|
181
|
-
)
|
|
182
|
-
|
|
183
|
-
@staticmethod
|
|
184
|
-
async def backoff_retry_wrapper(
|
|
185
|
-
func: Callable[..., Awaitable[R]],
|
|
186
|
-
backoff_retrier: RetrierType | None,
|
|
187
|
-
) -> R:
|
|
188
|
-
"""
|
|
189
|
-
Retry the query with backoff
|
|
190
|
-
"""
|
|
191
|
-
if not backoff_retrier:
|
|
192
|
-
return await func()
|
|
193
|
-
return await backoff_retrier(func)()
|
|
194
|
-
|
|
195
215
|
async def delegate_query(
|
|
196
216
|
self,
|
|
197
217
|
input: Sequence[InputItem],
|
|
@@ -276,15 +296,38 @@ class LLM(ABC):
|
|
|
276
296
|
return await LLM.timer_wrapper(query_func)
|
|
277
297
|
|
|
278
298
|
async def immediate_retry() -> tuple[QueryResult, float]:
|
|
279
|
-
return await
|
|
280
|
-
|
|
281
|
-
async def
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
299
|
+
return await BaseRetrier.immediate_retry_wrapper(timed_query, query_logger)
|
|
300
|
+
|
|
301
|
+
async def default_retry() -> tuple[QueryResult, float]:
|
|
302
|
+
if self.token_retry_params:
|
|
303
|
+
(
|
|
304
|
+
estimate_input_tokens,
|
|
305
|
+
estimate_output_tokens,
|
|
306
|
+
) = await self.estimate_query_tokens(
|
|
307
|
+
input,
|
|
308
|
+
tools=tools,
|
|
309
|
+
**kwargs,
|
|
310
|
+
)
|
|
311
|
+
retrier = TokenRetrier(
|
|
312
|
+
logger=query_logger,
|
|
313
|
+
client_registry_key=self._client_registry_key_model_specific,
|
|
314
|
+
estimate_input_tokens=estimate_input_tokens,
|
|
315
|
+
estimate_output_tokens=estimate_output_tokens,
|
|
316
|
+
dynamic_estimate_instance_id=self.instance_id
|
|
317
|
+
if self.token_retry_params.use_dynamic_estimate
|
|
318
|
+
else None,
|
|
319
|
+
)
|
|
320
|
+
else:
|
|
321
|
+
retrier = ExponentialBackoffRetrier(logger=query_logger)
|
|
322
|
+
return await retry_decorator(retrier)(immediate_retry)()
|
|
323
|
+
|
|
324
|
+
run_with_retry = (
|
|
325
|
+
default_retry
|
|
326
|
+
if not self.custom_retrier
|
|
327
|
+
else self.custom_retrier(immediate_retry)
|
|
328
|
+
)
|
|
286
329
|
|
|
287
|
-
output, duration = await
|
|
330
|
+
output, duration = await run_with_retry()
|
|
288
331
|
output.metadata.duration_seconds = duration
|
|
289
332
|
output.metadata.cost = await self._calculate_cost(output.metadata)
|
|
290
333
|
|
|
@@ -293,6 +336,16 @@ class LLM(ABC):
|
|
|
293
336
|
|
|
294
337
|
return output
|
|
295
338
|
|
|
339
|
+
async def init_token_retry(self, token_retry_params: TokenRetryParams) -> None:
|
|
340
|
+
self.token_retry_params = token_retry_params
|
|
341
|
+
await TokenRetrier.init_remaining_tokens(
|
|
342
|
+
client_registry_key=self._client_registry_key_model_specific,
|
|
343
|
+
limit=self.token_retry_params.limit,
|
|
344
|
+
limit_refresh_seconds=self.token_retry_params.limit_refresh_seconds,
|
|
345
|
+
get_rate_limit_func=self.get_rate_limit,
|
|
346
|
+
logger=self.logger,
|
|
347
|
+
)
|
|
348
|
+
|
|
296
349
|
async def _calculate_cost(
|
|
297
350
|
self,
|
|
298
351
|
metadata: QueryResultMetadata,
|
|
@@ -438,6 +491,30 @@ class LLM(ABC):
|
|
|
438
491
|
"""Upload a file to the model provider"""
|
|
439
492
|
...
|
|
440
493
|
|
|
494
|
+
async def get_rate_limit(self) -> RateLimit | None:
|
|
495
|
+
"""Get the rate limit for the model provider"""
|
|
496
|
+
return None
|
|
497
|
+
|
|
498
|
+
async def estimate_query_tokens(
|
|
499
|
+
self,
|
|
500
|
+
input: Sequence[InputItem],
|
|
501
|
+
*,
|
|
502
|
+
tools: list[ToolDefinition] = [],
|
|
503
|
+
**kwargs: object,
|
|
504
|
+
) -> tuple[int, int]:
|
|
505
|
+
"""Pessimistically estimate the number of tokens required for a query"""
|
|
506
|
+
assert self.token_retry_params
|
|
507
|
+
|
|
508
|
+
# TODO: when passing in images and files, we really need to take that into account when calculating the output tokens!!
|
|
509
|
+
|
|
510
|
+
input_tokens = (
|
|
511
|
+
await self.count_tokens(input, history=[], tools=tools, **kwargs)
|
|
512
|
+
* self.token_retry_params.input_modifier
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
output_tokens = input_tokens * self.token_retry_params.output_modifier
|
|
516
|
+
return ceil(input_tokens), ceil(output_tokens)
|
|
517
|
+
|
|
441
518
|
async def get_encoding(self) -> Encoding:
|
|
442
519
|
"""Get the appropriate tokenizer"""
|
|
443
520
|
|
|
@@ -13,6 +13,7 @@ from model_library.base import (
|
|
|
13
13
|
QueryResult,
|
|
14
14
|
ToolDefinition,
|
|
15
15
|
)
|
|
16
|
+
from model_library.base.base import DelegateConfig
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
class DelegateOnlyException(Exception):
|
|
@@ -21,17 +22,51 @@ class DelegateOnlyException(Exception):
|
|
|
21
22
|
delegate-only model.
|
|
22
23
|
"""
|
|
23
24
|
|
|
24
|
-
DEFAULT_MESSAGE: str = "This model
|
|
25
|
+
DEFAULT_MESSAGE: str = "This model is running in delegate-only mode, certain functionality is not supported."
|
|
25
26
|
|
|
26
27
|
def __init__(self, message: str | None = None):
|
|
27
28
|
super().__init__(message or DelegateOnlyException.DEFAULT_MESSAGE)
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
class DelegateOnly(LLM):
|
|
31
|
-
|
|
32
|
-
def get_client(self) -> None:
|
|
32
|
+
def _get_default_api_key(self) -> str:
|
|
33
33
|
raise DelegateOnlyException()
|
|
34
34
|
|
|
35
|
+
@override
|
|
36
|
+
def get_client(self, api_key: str | None = None) -> None:
|
|
37
|
+
assert self.delegate
|
|
38
|
+
return self.delegate.get_client()
|
|
39
|
+
|
|
40
|
+
def init_delegate(
|
|
41
|
+
self,
|
|
42
|
+
config: LLMConfig | None,
|
|
43
|
+
delegate_config: DelegateConfig,
|
|
44
|
+
delegate_provider: Literal["openai", "anthropic"],
|
|
45
|
+
use_completions: bool = True,
|
|
46
|
+
) -> None:
|
|
47
|
+
from model_library.providers.anthropic import AnthropicModel
|
|
48
|
+
from model_library.providers.openai import OpenAIModel
|
|
49
|
+
|
|
50
|
+
match delegate_provider:
|
|
51
|
+
case "openai":
|
|
52
|
+
self.delegate = OpenAIModel(
|
|
53
|
+
model_name=self.model_name,
|
|
54
|
+
provider=self.provider,
|
|
55
|
+
config=config,
|
|
56
|
+
use_completions=use_completions,
|
|
57
|
+
delegate_config=delegate_config,
|
|
58
|
+
)
|
|
59
|
+
case "anthropic":
|
|
60
|
+
self.delegate = AnthropicModel(
|
|
61
|
+
model_name=self.model_name,
|
|
62
|
+
provider=self.provider,
|
|
63
|
+
config=config,
|
|
64
|
+
delegate_config=delegate_config,
|
|
65
|
+
)
|
|
66
|
+
self._client_registry_key_model_specific = (
|
|
67
|
+
self.delegate._client_registry_key_model_specific
|
|
68
|
+
)
|
|
69
|
+
|
|
35
70
|
def __init__(
|
|
36
71
|
self,
|
|
37
72
|
model_name: str,
|
|
@@ -42,6 +77,11 @@ class DelegateOnly(LLM):
|
|
|
42
77
|
config = config or LLMConfig()
|
|
43
78
|
config.native = False
|
|
44
79
|
super().__init__(model_name, provider, config=config)
|
|
80
|
+
config.native = True
|
|
81
|
+
|
|
82
|
+
def _get_extra_body(self) -> dict[str, Any]:
|
|
83
|
+
"""Build extra body parameters for delegate-specific features."""
|
|
84
|
+
return {}
|
|
45
85
|
|
|
46
86
|
@override
|
|
47
87
|
async def _query_impl(
|
|
@@ -53,9 +93,12 @@ class DelegateOnly(LLM):
|
|
|
53
93
|
**kwargs: object,
|
|
54
94
|
) -> QueryResult:
|
|
55
95
|
assert self.delegate
|
|
56
|
-
|
|
57
96
|
return await self.delegate_query(
|
|
58
|
-
input,
|
|
97
|
+
input,
|
|
98
|
+
tools=tools,
|
|
99
|
+
query_logger=query_logger,
|
|
100
|
+
extra_body=self._get_extra_body(),
|
|
101
|
+
**kwargs,
|
|
59
102
|
)
|
|
60
103
|
|
|
61
104
|
@override
|
|
@@ -66,7 +109,8 @@ class DelegateOnly(LLM):
|
|
|
66
109
|
tools: list[ToolDefinition],
|
|
67
110
|
**kwargs: object,
|
|
68
111
|
) -> dict[str, Any]:
|
|
69
|
-
|
|
112
|
+
assert self.delegate
|
|
113
|
+
return await self.delegate.build_body(input, tools=tools, **kwargs)
|
|
70
114
|
|
|
71
115
|
@override
|
|
72
116
|
async def parse_input(
|
|
@@ -74,28 +118,32 @@ class DelegateOnly(LLM):
|
|
|
74
118
|
input: Sequence[InputItem],
|
|
75
119
|
**kwargs: Any,
|
|
76
120
|
) -> Any:
|
|
77
|
-
|
|
121
|
+
assert self.delegate
|
|
122
|
+
return await self.delegate.parse_input(input, **kwargs)
|
|
78
123
|
|
|
79
124
|
@override
|
|
80
125
|
async def parse_image(
|
|
81
126
|
self,
|
|
82
127
|
image: FileInput,
|
|
83
128
|
) -> Any:
|
|
84
|
-
|
|
129
|
+
assert self.delegate
|
|
130
|
+
return await self.delegate.parse_image(image)
|
|
85
131
|
|
|
86
132
|
@override
|
|
87
133
|
async def parse_file(
|
|
88
134
|
self,
|
|
89
135
|
file: FileInput,
|
|
90
136
|
) -> Any:
|
|
91
|
-
|
|
137
|
+
assert self.delegate
|
|
138
|
+
return await self.delegate.parse_file(file)
|
|
92
139
|
|
|
93
140
|
@override
|
|
94
141
|
async def parse_tools(
|
|
95
142
|
self,
|
|
96
143
|
tools: list[ToolDefinition],
|
|
97
144
|
) -> Any:
|
|
98
|
-
|
|
145
|
+
assert self.delegate
|
|
146
|
+
return await self.delegate.parse_tools(tools)
|
|
99
147
|
|
|
100
148
|
@override
|
|
101
149
|
async def upload_file(
|
|
@@ -106,3 +154,22 @@ class DelegateOnly(LLM):
|
|
|
106
154
|
type: Literal["image", "file"] = "file",
|
|
107
155
|
) -> FileWithId:
|
|
108
156
|
raise DelegateOnlyException()
|
|
157
|
+
|
|
158
|
+
@override
|
|
159
|
+
async def get_rate_limit(self) -> Any:
|
|
160
|
+
assert self.delegate
|
|
161
|
+
return await self.delegate.get_rate_limit()
|
|
162
|
+
|
|
163
|
+
@override
|
|
164
|
+
async def count_tokens(
|
|
165
|
+
self,
|
|
166
|
+
input: Sequence[InputItem],
|
|
167
|
+
*,
|
|
168
|
+
history: Sequence[InputItem] = [],
|
|
169
|
+
tools: list[ToolDefinition] = [],
|
|
170
|
+
**kwargs: object,
|
|
171
|
+
) -> int:
|
|
172
|
+
assert self.delegate
|
|
173
|
+
return await self.delegate.count_tokens(
|
|
174
|
+
input, history=history, tools=tools, **kwargs
|
|
175
|
+
)
|
model_library/base/output.py
CHANGED
|
@@ -118,6 +118,48 @@ class QueryResultCost(BaseModel):
|
|
|
118
118
|
)
|
|
119
119
|
|
|
120
120
|
|
|
121
|
+
class RateLimit(BaseModel):
|
|
122
|
+
"""Rate limit information"""
|
|
123
|
+
|
|
124
|
+
request_limit: int | None = None
|
|
125
|
+
request_remaining: int | None = None
|
|
126
|
+
|
|
127
|
+
token_limit: int | None = None
|
|
128
|
+
token_limit_input: int | None = None
|
|
129
|
+
token_limit_output: int | None = None
|
|
130
|
+
|
|
131
|
+
token_remaining: int | None = None
|
|
132
|
+
token_remaining_input: int | None = None
|
|
133
|
+
token_remaining_output: int | None = None
|
|
134
|
+
|
|
135
|
+
unix_timestamp: float
|
|
136
|
+
raw: Any
|
|
137
|
+
|
|
138
|
+
@computed_field
|
|
139
|
+
@property
|
|
140
|
+
def token_limit_total(self) -> int:
|
|
141
|
+
if self.token_limit:
|
|
142
|
+
return self.token_limit
|
|
143
|
+
else:
|
|
144
|
+
return (self.token_limit_input or 0) + (self.token_limit_output or 0)
|
|
145
|
+
|
|
146
|
+
@computed_field
|
|
147
|
+
@property
|
|
148
|
+
def token_remaining_total(self) -> int:
|
|
149
|
+
if self.token_remaining:
|
|
150
|
+
return self.token_remaining
|
|
151
|
+
else:
|
|
152
|
+
return (self.token_remaining_input or 0) + (
|
|
153
|
+
self.token_remaining_output or 0
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
@override
|
|
157
|
+
def __repr__(self):
|
|
158
|
+
attrs = vars(self).copy()
|
|
159
|
+
attrs.pop("raw", None)
|
|
160
|
+
return f"{self.__class__.__name__}(\n{pformat(attrs, indent=2, sort_dicts=False)}\n)"
|
|
161
|
+
|
|
162
|
+
|
|
121
163
|
class QueryResultMetadata(BaseModel):
|
|
122
164
|
"""
|
|
123
165
|
Metadata for a query: token usage and timing.
|
|
@@ -131,6 +173,7 @@ class QueryResultMetadata(BaseModel):
|
|
|
131
173
|
reasoning_tokens: int | None = None
|
|
132
174
|
cache_read_tokens: int | None = None
|
|
133
175
|
cache_write_tokens: int | None = None
|
|
176
|
+
extra: dict[str, Any] = {}
|
|
134
177
|
|
|
135
178
|
@property
|
|
136
179
|
def default_duration_seconds(self) -> float:
|
model_library/base/utils.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import re
|
|
3
|
+
from datetime import datetime, timedelta
|
|
2
4
|
from typing import Any, Sequence, TypeVar
|
|
3
5
|
|
|
4
6
|
from pydantic import BaseModel
|
|
@@ -77,3 +79,36 @@ def get_pretty_input_types(input: Sequence["InputItem"], verbose: bool = False)
|
|
|
77
79
|
|
|
78
80
|
processed_items = [f" {process_item(item)}" for item in input]
|
|
79
81
|
return "\n" + "\n".join(processed_items) if processed_items else ""
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
TIME_PATTERN = re.compile(r"^(\d+(?:\.\d+)?)([a-zA-Z]+)$")
|
|
85
|
+
UNIT_TO_SECONDS = {
|
|
86
|
+
"ms": 0.001,
|
|
87
|
+
"s": 1,
|
|
88
|
+
"m": 60,
|
|
89
|
+
"h": 3600,
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def to_timestamp(input_str: str, server_now: datetime) -> int:
|
|
94
|
+
"""Converts a header string into a server-relative Unix timestamp in ms."""
|
|
95
|
+
input_str = input_str.strip()
|
|
96
|
+
|
|
97
|
+
# ISO Timestamp (e.g. 2026-01-09T21:58:01Z)
|
|
98
|
+
if "T" in input_str and "-" in input_str:
|
|
99
|
+
try:
|
|
100
|
+
dt = datetime.fromisoformat(input_str.replace("Z", "+00:00"))
|
|
101
|
+
return int(dt.timestamp() * 1000)
|
|
102
|
+
except ValueError:
|
|
103
|
+
pass
|
|
104
|
+
|
|
105
|
+
# Duration (e.g. 10s, 6ms)
|
|
106
|
+
match = TIME_PATTERN.match(input_str)
|
|
107
|
+
if match:
|
|
108
|
+
value, unit = match.groups()
|
|
109
|
+
offset_seconds = float(value) * UNIT_TO_SECONDS.get(unit.lower(), 0)
|
|
110
|
+
# Add duration to the SERVER'S provided date
|
|
111
|
+
dt = server_now + timedelta(seconds=offset_seconds)
|
|
112
|
+
return int(dt.timestamp() * 1000)
|
|
113
|
+
|
|
114
|
+
raise ValueError(f"Unsupported time format: {input_str}")
|
|
@@ -1,17 +1,51 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
1
|
+
base-config:
|
|
2
|
+
company: Alibaba
|
|
3
|
+
open_source: false
|
|
4
|
+
supports:
|
|
5
|
+
temperature: true
|
|
6
|
+
metadata:
|
|
7
|
+
available_for_everyone: false
|
|
8
|
+
available_as_evaluator: false
|
|
9
|
+
default_parameters:
|
|
10
|
+
temperature: 0.7
|
|
11
|
+
properties:
|
|
12
|
+
reasoning_model: false
|
|
5
13
|
|
|
14
|
+
qwen-3-vl-models:
|
|
15
|
+
base-config:
|
|
6
16
|
supports:
|
|
7
|
-
|
|
17
|
+
images: true
|
|
18
|
+
|
|
19
|
+
alibaba/qwen3-vl-plus-2025-09-23:
|
|
20
|
+
label: Qwen 3 VL Plus
|
|
21
|
+
open_source: true
|
|
22
|
+
description: Qwen 3 VL Plus (2025-09-23)
|
|
23
|
+
release_date: 2025-09-23
|
|
8
24
|
metadata:
|
|
9
|
-
|
|
10
|
-
available_as_evaluator: false
|
|
11
|
-
default_parameters:
|
|
12
|
-
temperature: 0.7
|
|
25
|
+
deprecated: true
|
|
13
26
|
properties:
|
|
27
|
+
context_window: 262_144
|
|
28
|
+
max_tokens: 32_768
|
|
29
|
+
training_cutoff: ""
|
|
14
30
|
reasoning_model: false
|
|
31
|
+
costs_per_million_token:
|
|
32
|
+
input: 0.2
|
|
33
|
+
output: 1.6
|
|
34
|
+
|
|
35
|
+
qwen-3-max-models:
|
|
36
|
+
base-config:
|
|
37
|
+
supports:
|
|
38
|
+
tools: true
|
|
39
|
+
images: false
|
|
40
|
+
|
|
41
|
+
alibaba/qwen3-max-2026-01-23:
|
|
42
|
+
label: Qwen 3 Max Thinking
|
|
43
|
+
description: Qwen 3 Max with enhanced reasoning capabilities
|
|
44
|
+
release_date: 2026-01-23
|
|
45
|
+
properties:
|
|
46
|
+
context_window: 256_000
|
|
47
|
+
max_tokens: 32_000
|
|
48
|
+
reasoning_model: true
|
|
15
49
|
|
|
16
50
|
alibaba/qwen3-max-preview:
|
|
17
51
|
label: Qwen 3 Max Preview
|
|
@@ -20,15 +54,7 @@ qwen-models:
|
|
|
20
54
|
properties:
|
|
21
55
|
context_window: 262_144
|
|
22
56
|
max_tokens: 65_536
|
|
23
|
-
|
|
24
|
-
costs_per_million_token:
|
|
25
|
-
input: 1.2
|
|
26
|
-
output: 6
|
|
27
|
-
supports:
|
|
28
|
-
images: false
|
|
29
|
-
tools: true
|
|
30
|
-
metadata:
|
|
31
|
-
available_for_everyone: false
|
|
57
|
+
reasoning_model: true
|
|
32
58
|
|
|
33
59
|
alibaba/qwen3-max-2025-09-23:
|
|
34
60
|
label: Qwen 3 Max 2025-09-23
|
|
@@ -39,14 +65,6 @@ qwen-models:
|
|
|
39
65
|
max_tokens: 65_536
|
|
40
66
|
training_cutoff: ""
|
|
41
67
|
reasoning_model: true
|
|
42
|
-
costs_per_million_token:
|
|
43
|
-
input: 1.2
|
|
44
|
-
output: 6
|
|
45
|
-
supports:
|
|
46
|
-
images: false
|
|
47
|
-
tools: true
|
|
48
|
-
metadata:
|
|
49
|
-
available_for_everyone: false
|
|
50
68
|
|
|
51
69
|
alibaba/qwen3-max:
|
|
52
70
|
label: Qwen 3 Max
|
|
@@ -57,34 +75,3 @@ qwen-models:
|
|
|
57
75
|
max_tokens: 65_536
|
|
58
76
|
training_cutoff: ""
|
|
59
77
|
reasoning_model: false
|
|
60
|
-
costs_per_million_token:
|
|
61
|
-
input: 1.2
|
|
62
|
-
output: 6
|
|
63
|
-
cache:
|
|
64
|
-
read_discount: 0.8
|
|
65
|
-
write_markup: 1
|
|
66
|
-
context:
|
|
67
|
-
threshold: 32_000
|
|
68
|
-
input: 2.4
|
|
69
|
-
output: 12
|
|
70
|
-
supports:
|
|
71
|
-
images: false
|
|
72
|
-
tools: true
|
|
73
|
-
metadata:
|
|
74
|
-
available_for_everyone: false
|
|
75
|
-
|
|
76
|
-
alibaba/qwen3-vl-plus-2025-09-23:
|
|
77
|
-
label: Qwen 3 VL Plus
|
|
78
|
-
open_source: true
|
|
79
|
-
description: Qwen 3 VL Plus (2025-09-23)
|
|
80
|
-
release_date: 2025-09-23
|
|
81
|
-
properties:
|
|
82
|
-
context_window: 262_144
|
|
83
|
-
max_tokens: 32_768
|
|
84
|
-
training_cutoff: ""
|
|
85
|
-
reasoning_model: false
|
|
86
|
-
costs_per_million_token:
|
|
87
|
-
input: 0.2
|
|
88
|
-
output: 1.6
|
|
89
|
-
supports:
|
|
90
|
-
images: true
|