model-library 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. model_library/base/base.py +141 -62
  2. model_library/base/delegate_only.py +77 -10
  3. model_library/base/output.py +43 -0
  4. model_library/base/utils.py +35 -0
  5. model_library/config/alibaba_models.yaml +49 -57
  6. model_library/config/all_models.json +353 -120
  7. model_library/config/anthropic_models.yaml +2 -1
  8. model_library/config/kimi_models.yaml +30 -3
  9. model_library/config/mistral_models.yaml +2 -0
  10. model_library/config/openai_models.yaml +15 -23
  11. model_library/config/together_models.yaml +2 -0
  12. model_library/config/xiaomi_models.yaml +43 -0
  13. model_library/config/zai_models.yaml +27 -3
  14. model_library/exceptions.py +3 -77
  15. model_library/providers/ai21labs.py +12 -8
  16. model_library/providers/alibaba.py +17 -8
  17. model_library/providers/amazon.py +49 -16
  18. model_library/providers/anthropic.py +128 -48
  19. model_library/providers/azure.py +22 -10
  20. model_library/providers/cohere.py +7 -7
  21. model_library/providers/deepseek.py +8 -8
  22. model_library/providers/fireworks.py +7 -8
  23. model_library/providers/google/batch.py +14 -10
  24. model_library/providers/google/google.py +57 -30
  25. model_library/providers/inception.py +7 -7
  26. model_library/providers/kimi.py +18 -8
  27. model_library/providers/minimax.py +15 -17
  28. model_library/providers/mistral.py +20 -8
  29. model_library/providers/openai.py +99 -22
  30. model_library/providers/openrouter.py +34 -0
  31. model_library/providers/perplexity.py +7 -7
  32. model_library/providers/together.py +7 -8
  33. model_library/providers/vals.py +12 -6
  34. model_library/providers/vercel.py +34 -0
  35. model_library/providers/xai.py +47 -42
  36. model_library/providers/xiaomi.py +34 -0
  37. model_library/providers/zai.py +38 -8
  38. model_library/register_models.py +5 -0
  39. model_library/registry_utils.py +48 -17
  40. model_library/retriers/__init__.py +0 -0
  41. model_library/retriers/backoff.py +73 -0
  42. model_library/retriers/base.py +225 -0
  43. model_library/retriers/token.py +427 -0
  44. model_library/retriers/utils.py +11 -0
  45. model_library/settings.py +1 -1
  46. model_library/utils.py +17 -7
  47. {model_library-0.1.7.dist-info → model_library-0.1.9.dist-info}/METADATA +2 -1
  48. model_library-0.1.9.dist-info/RECORD +73 -0
  49. {model_library-0.1.7.dist-info → model_library-0.1.9.dist-info}/WHEEL +1 -1
  50. model_library-0.1.7.dist-info/RECORD +0 -64
  51. {model_library-0.1.7.dist-info → model_library-0.1.9.dist-info}/licenses/LICENSE +0 -0
  52. {model_library-0.1.7.dist-info → model_library-0.1.9.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,12 @@
1
+ import hashlib
1
2
  import io
2
3
  import logging
4
+ import threading
3
5
  import time
4
6
  import uuid
5
7
  from abc import ABC, abstractmethod
6
8
  from collections.abc import Awaitable
9
+ from math import ceil
7
10
  from pprint import pformat
8
11
  from typing import (
9
12
  Any,
@@ -14,7 +17,7 @@ from typing import (
14
17
  )
15
18
 
16
19
  import tiktoken
17
- from pydantic import model_serializer
20
+ from pydantic import SecretStr, model_serializer
18
21
  from pydantic.main import BaseModel
19
22
  from tiktoken.core import Encoding
20
23
  from typing_extensions import override
@@ -34,15 +37,15 @@ from model_library.base.output import (
34
37
  QueryResult,
35
38
  QueryResultCost,
36
39
  QueryResultMetadata,
40
+ RateLimit,
37
41
  )
38
42
  from model_library.base.utils import (
39
43
  get_pretty_input_types,
40
44
  serialize_for_tokenizing,
41
45
  )
42
- from model_library.exceptions import (
43
- ImmediateRetryException,
44
- retry_llm_call,
45
- )
46
+ from model_library.retriers.backoff import ExponentialBackoffRetrier
47
+ from model_library.retriers.base import BaseRetrier, R, RetrierType, retry_decorator
48
+ from model_library.retriers.token import TokenRetrier
46
49
  from model_library.utils import truncate_str
47
50
 
48
51
  PydanticT = TypeVar("PydanticT", bound=BaseModel)
@@ -56,16 +59,24 @@ class ProviderConfig(BaseModel):
56
59
  return self.__dict__
57
60
 
58
61
 
59
- DEFAULT_MAX_TOKENS = 2048
62
+ class TokenRetryParams(BaseModel):
63
+ input_modifier: float
64
+ output_modifier: float
65
+
66
+ use_dynamic_estimate: bool = True
67
+
68
+ limit: int
69
+ limit_refresh_seconds: Literal[60] = 60
60
70
 
61
71
 
62
72
  class LLMConfig(BaseModel):
63
- max_tokens: int = DEFAULT_MAX_TOKENS
73
+ max_tokens: int | None = None
64
74
  temperature: float | None = None
65
75
  top_p: float | None = None
66
76
  top_k: int | None = None
67
77
  reasoning: bool = False
68
78
  reasoning_effort: str | bool | None = None
79
+ compute_effort: str | None = None
69
80
  supports_images: bool = False
70
81
  supports_files: bool = False
71
82
  supports_videos: bool = False
@@ -75,11 +86,18 @@ class LLMConfig(BaseModel):
75
86
  native: bool = True
76
87
  provider_config: ProviderConfig | None = None
77
88
  registry_key: str | None = None
89
+ custom_api_key: SecretStr | None = None
90
+
78
91
 
92
+ class DelegateConfig(BaseModel):
93
+ base_url: str
94
+ api_key: SecretStr
79
95
 
80
- RetrierType = Callable[[Callable[..., Awaitable[Any]]], Callable[..., Awaitable[Any]]]
81
96
 
82
- R = TypeVar("R") # return type
97
+ # shared across all subclasses and instances
98
+ # hash(provider + api_key) -> client
99
+ client_registry_lock = threading.Lock()
100
+ client_registry: dict[tuple[str, str], Any] = {}
83
101
 
84
102
 
85
103
  class LLM(ABC):
@@ -88,6 +106,34 @@ class LLM(ABC):
88
106
  LLM call errors should be raised as exceptions
89
107
  """
90
108
 
109
+ @abstractmethod
110
+ def get_client(self, api_key: str | None = None) -> Any:
111
+ """
112
+ Returns the cached instance of the appropriate SDK client.
113
+ Sublasses should implement this method and:
114
+ - if api_key is provided, initialize their client and call assing_client(client).
115
+ - else return super().get_client()
116
+ """
117
+ global client_registry
118
+ return client_registry[self._client_registry_key]
119
+
120
+ def assign_client(self, client: object) -> None:
121
+ """Thread-safe assignment to the client registry"""
122
+ global client_registry
123
+
124
+ if self._client_registry_key not in client_registry:
125
+ with client_registry_lock:
126
+ if self._client_registry_key not in client_registry:
127
+ client_registry[self._client_registry_key] = client
128
+
129
+ def has_client(self) -> bool:
130
+ return self._client_registry_key in client_registry
131
+
132
+ @abstractmethod
133
+ def _get_default_api_key(self) -> str:
134
+ """Return the api key from model_library.settings"""
135
+ ...
136
+
91
137
  def __init__(
92
138
  self,
93
139
  model_name: str,
@@ -103,13 +149,14 @@ class LLM(ABC):
103
149
  config = config or LLMConfig()
104
150
  self._registry_key = config.registry_key
105
151
 
106
- self.max_tokens: int = config.max_tokens
152
+ self.max_tokens: int | None = config.max_tokens
107
153
  self.temperature: float | None = config.temperature
108
154
  self.top_p: float | None = config.top_p
109
155
  self.top_k: int | None = config.top_k
110
156
 
111
157
  self.reasoning: bool = config.reasoning
112
158
  self.reasoning_effort: str | bool | None = config.reasoning_effort
159
+ self.compute_effort: str | None = config.compute_effort
113
160
 
114
161
  self.supports_files: bool = config.supports_files
115
162
  self.supports_videos: bool = config.supports_videos
@@ -131,21 +178,33 @@ class LLM(ABC):
131
178
  self.logger: logging.Logger = logging.getLogger(
132
179
  f"llm.{provider}.{model_name}<instance={self.instance_id}>"
133
180
  )
134
- self.custom_retrier: Callable[..., RetrierType] | None = retry_llm_call
181
+ self.custom_retrier: RetrierType | None = None
182
+
183
+ self.token_retry_params = None
184
+ # set _client_registry_key after initializing delegate
185
+ if not self.native:
186
+ return
187
+
188
+ if config.custom_api_key:
189
+ raw_key = config.custom_api_key.get_secret_value()
190
+ else:
191
+ raw_key = self._get_default_api_key()
192
+
193
+ key_hash = hashlib.sha256(raw_key.encode()).hexdigest()
194
+ self._client_registry_key = (self.provider, key_hash)
195
+ self._client_registry_key_model_specific = (
196
+ f"{self.provider}.{self.model_name}",
197
+ key_hash,
198
+ )
199
+ self.get_client(api_key=raw_key)
135
200
 
136
201
  @override
137
202
  def __repr__(self):
138
203
  attrs = vars(self).copy()
139
204
  attrs.pop("logger", None)
140
205
  attrs.pop("custom_retrier", None)
141
- attrs.pop("_key", None)
142
206
  return f"{self.__class__.__name__}(\n{pformat(attrs, indent=2, sort_dicts=False)}\n)"
143
207
 
144
- @abstractmethod
145
- def get_client(self) -> object:
146
- """Return the instance of the appropriate SDK client."""
147
- ...
148
-
149
208
  @staticmethod
150
209
  async def timer_wrapper(func: Callable[[], Awaitable[R]]) -> tuple[R, float]:
151
210
  """
@@ -155,43 +214,6 @@ class LLM(ABC):
155
214
  result = await func()
156
215
  return result, round(time.perf_counter() - start, 4)
157
216
 
158
- @staticmethod
159
- async def immediate_retry_wrapper(
160
- func: Callable[[], Awaitable[R]],
161
- logger: logging.Logger,
162
- ) -> R:
163
- """
164
- Retry the query immediately
165
- """
166
- MAX_IMMEDIATE_RETRIES = 10
167
- retries = 0
168
- while True:
169
- try:
170
- return await func()
171
- except ImmediateRetryException as e:
172
- if retries >= MAX_IMMEDIATE_RETRIES:
173
- logger.error(f"Query reached max immediate retries {retries}: {e}")
174
- raise Exception(
175
- f"Query reached max immediate retries {retries}: {e}"
176
- ) from e
177
- retries += 1
178
-
179
- logger.warning(
180
- f"Query retried immediately {retries}/{MAX_IMMEDIATE_RETRIES}: {e}"
181
- )
182
-
183
- @staticmethod
184
- async def backoff_retry_wrapper(
185
- func: Callable[..., Awaitable[R]],
186
- backoff_retrier: RetrierType | None,
187
- ) -> R:
188
- """
189
- Retry the query with backoff
190
- """
191
- if not backoff_retrier:
192
- return await func()
193
- return await backoff_retrier(func)()
194
-
195
217
  async def delegate_query(
196
218
  self,
197
219
  input: Sequence[InputItem],
@@ -276,15 +298,38 @@ class LLM(ABC):
276
298
  return await LLM.timer_wrapper(query_func)
277
299
 
278
300
  async def immediate_retry() -> tuple[QueryResult, float]:
279
- return await LLM.immediate_retry_wrapper(timed_query, query_logger)
280
-
281
- async def backoff_retry() -> tuple[QueryResult, float]:
282
- backoff_retrier = (
283
- self.custom_retrier(query_logger) if self.custom_retrier else None
284
- )
285
- return await LLM.backoff_retry_wrapper(immediate_retry, backoff_retrier)
301
+ return await BaseRetrier.immediate_retry_wrapper(timed_query, query_logger)
302
+
303
+ async def default_retry() -> tuple[QueryResult, float]:
304
+ if self.token_retry_params:
305
+ (
306
+ estimate_input_tokens,
307
+ estimate_output_tokens,
308
+ ) = await self.estimate_query_tokens(
309
+ input,
310
+ tools=tools,
311
+ **kwargs,
312
+ )
313
+ retrier = TokenRetrier(
314
+ logger=query_logger,
315
+ client_registry_key=self._client_registry_key_model_specific,
316
+ estimate_input_tokens=estimate_input_tokens,
317
+ estimate_output_tokens=estimate_output_tokens,
318
+ dynamic_estimate_instance_id=self.instance_id
319
+ if self.token_retry_params.use_dynamic_estimate
320
+ else None,
321
+ )
322
+ else:
323
+ retrier = ExponentialBackoffRetrier(logger=query_logger)
324
+ return await retry_decorator(retrier)(immediate_retry)()
325
+
326
+ run_with_retry = (
327
+ default_retry
328
+ if not self.custom_retrier
329
+ else self.custom_retrier(immediate_retry)
330
+ )
286
331
 
287
- output, duration = await backoff_retry()
332
+ output, duration = await run_with_retry()
288
333
  output.metadata.duration_seconds = duration
289
334
  output.metadata.cost = await self._calculate_cost(output.metadata)
290
335
 
@@ -293,6 +338,16 @@ class LLM(ABC):
293
338
 
294
339
  return output
295
340
 
341
+ async def init_token_retry(self, token_retry_params: TokenRetryParams) -> None:
342
+ self.token_retry_params = token_retry_params
343
+ await TokenRetrier.init_remaining_tokens(
344
+ client_registry_key=self._client_registry_key_model_specific,
345
+ limit=self.token_retry_params.limit,
346
+ limit_refresh_seconds=self.token_retry_params.limit_refresh_seconds,
347
+ get_rate_limit_func=self.get_rate_limit,
348
+ logger=self.logger,
349
+ )
350
+
296
351
  async def _calculate_cost(
297
352
  self,
298
353
  metadata: QueryResultMetadata,
@@ -438,6 +493,30 @@ class LLM(ABC):
438
493
  """Upload a file to the model provider"""
439
494
  ...
440
495
 
496
+ async def get_rate_limit(self) -> RateLimit | None:
497
+ """Get the rate limit for the model provider"""
498
+ return None
499
+
500
+ async def estimate_query_tokens(
501
+ self,
502
+ input: Sequence[InputItem],
503
+ *,
504
+ tools: list[ToolDefinition] = [],
505
+ **kwargs: object,
506
+ ) -> tuple[int, int]:
507
+ """Pessimistically estimate the number of tokens required for a query"""
508
+ assert self.token_retry_params
509
+
510
+ # TODO: when passing in images and files, we really need to take that into account when calculating the output tokens!!
511
+
512
+ input_tokens = (
513
+ await self.count_tokens(input, history=[], tools=tools, **kwargs)
514
+ * self.token_retry_params.input_modifier
515
+ )
516
+
517
+ output_tokens = input_tokens * self.token_retry_params.output_modifier
518
+ return ceil(input_tokens), ceil(output_tokens)
519
+
441
520
  async def get_encoding(self) -> Encoding:
442
521
  """Get the appropriate tokenizer"""
443
522
 
@@ -13,6 +13,7 @@ from model_library.base import (
13
13
  QueryResult,
14
14
  ToolDefinition,
15
15
  )
16
+ from model_library.base.base import DelegateConfig
16
17
 
17
18
 
18
19
  class DelegateOnlyException(Exception):
@@ -21,17 +22,51 @@ class DelegateOnlyException(Exception):
21
22
  delegate-only model.
22
23
  """
23
24
 
24
- DEFAULT_MESSAGE: str = "This model supports only delegate-only functionality. Only the query() method should be used."
25
+ DEFAULT_MESSAGE: str = "This model is running in delegate-only mode, certain functionality is not supported."
25
26
 
26
27
  def __init__(self, message: str | None = None):
27
28
  super().__init__(message or DelegateOnlyException.DEFAULT_MESSAGE)
28
29
 
29
30
 
30
31
  class DelegateOnly(LLM):
31
- @override
32
- def get_client(self) -> None:
32
+ def _get_default_api_key(self) -> str:
33
33
  raise DelegateOnlyException()
34
34
 
35
+ @override
36
+ def get_client(self, api_key: str | None = None) -> None:
37
+ assert self.delegate
38
+ return self.delegate.get_client()
39
+
40
+ def init_delegate(
41
+ self,
42
+ config: LLMConfig | None,
43
+ delegate_config: DelegateConfig,
44
+ delegate_provider: Literal["openai", "anthropic"],
45
+ use_completions: bool = True,
46
+ ) -> None:
47
+ from model_library.providers.anthropic import AnthropicModel
48
+ from model_library.providers.openai import OpenAIModel
49
+
50
+ match delegate_provider:
51
+ case "openai":
52
+ self.delegate = OpenAIModel(
53
+ model_name=self.model_name,
54
+ provider=self.provider,
55
+ config=config,
56
+ use_completions=use_completions,
57
+ delegate_config=delegate_config,
58
+ )
59
+ case "anthropic":
60
+ self.delegate = AnthropicModel(
61
+ model_name=self.model_name,
62
+ provider=self.provider,
63
+ config=config,
64
+ delegate_config=delegate_config,
65
+ )
66
+ self._client_registry_key_model_specific = (
67
+ self.delegate._client_registry_key_model_specific
68
+ )
69
+
35
70
  def __init__(
36
71
  self,
37
72
  model_name: str,
@@ -42,6 +77,11 @@ class DelegateOnly(LLM):
42
77
  config = config or LLMConfig()
43
78
  config.native = False
44
79
  super().__init__(model_name, provider, config=config)
80
+ config.native = True
81
+
82
+ def _get_extra_body(self) -> dict[str, Any]:
83
+ """Build extra body parameters for delegate-specific features."""
84
+ return {}
45
85
 
46
86
  @override
47
87
  async def _query_impl(
@@ -53,9 +93,12 @@ class DelegateOnly(LLM):
53
93
  **kwargs: object,
54
94
  ) -> QueryResult:
55
95
  assert self.delegate
56
-
57
96
  return await self.delegate_query(
58
- input, tools=tools, query_logger=query_logger, **kwargs
97
+ input,
98
+ tools=tools,
99
+ query_logger=query_logger,
100
+ extra_body=self._get_extra_body(),
101
+ **kwargs,
59
102
  )
60
103
 
61
104
  @override
@@ -66,7 +109,8 @@ class DelegateOnly(LLM):
66
109
  tools: list[ToolDefinition],
67
110
  **kwargs: object,
68
111
  ) -> dict[str, Any]:
69
- raise DelegateOnlyException()
112
+ assert self.delegate
113
+ return await self.delegate.build_body(input, tools=tools, **kwargs)
70
114
 
71
115
  @override
72
116
  async def parse_input(
@@ -74,28 +118,32 @@ class DelegateOnly(LLM):
74
118
  input: Sequence[InputItem],
75
119
  **kwargs: Any,
76
120
  ) -> Any:
77
- raise DelegateOnlyException()
121
+ assert self.delegate
122
+ return await self.delegate.parse_input(input, **kwargs)
78
123
 
79
124
  @override
80
125
  async def parse_image(
81
126
  self,
82
127
  image: FileInput,
83
128
  ) -> Any:
84
- raise DelegateOnlyException()
129
+ assert self.delegate
130
+ return await self.delegate.parse_image(image)
85
131
 
86
132
  @override
87
133
  async def parse_file(
88
134
  self,
89
135
  file: FileInput,
90
136
  ) -> Any:
91
- raise DelegateOnlyException()
137
+ assert self.delegate
138
+ return await self.delegate.parse_file(file)
92
139
 
93
140
  @override
94
141
  async def parse_tools(
95
142
  self,
96
143
  tools: list[ToolDefinition],
97
144
  ) -> Any:
98
- raise DelegateOnlyException()
145
+ assert self.delegate
146
+ return await self.delegate.parse_tools(tools)
99
147
 
100
148
  @override
101
149
  async def upload_file(
@@ -106,3 +154,22 @@ class DelegateOnly(LLM):
106
154
  type: Literal["image", "file"] = "file",
107
155
  ) -> FileWithId:
108
156
  raise DelegateOnlyException()
157
+
158
+ @override
159
+ async def get_rate_limit(self) -> Any:
160
+ assert self.delegate
161
+ return await self.delegate.get_rate_limit()
162
+
163
+ @override
164
+ async def count_tokens(
165
+ self,
166
+ input: Sequence[InputItem],
167
+ *,
168
+ history: Sequence[InputItem] = [],
169
+ tools: list[ToolDefinition] = [],
170
+ **kwargs: object,
171
+ ) -> int:
172
+ assert self.delegate
173
+ return await self.delegate.count_tokens(
174
+ input, history=history, tools=tools, **kwargs
175
+ )
@@ -118,6 +118,48 @@ class QueryResultCost(BaseModel):
118
118
  )
119
119
 
120
120
 
121
+ class RateLimit(BaseModel):
122
+ """Rate limit information"""
123
+
124
+ request_limit: int | None = None
125
+ request_remaining: int | None = None
126
+
127
+ token_limit: int | None = None
128
+ token_limit_input: int | None = None
129
+ token_limit_output: int | None = None
130
+
131
+ token_remaining: int | None = None
132
+ token_remaining_input: int | None = None
133
+ token_remaining_output: int | None = None
134
+
135
+ unix_timestamp: float
136
+ raw: Any
137
+
138
+ @computed_field
139
+ @property
140
+ def token_limit_total(self) -> int:
141
+ if self.token_limit:
142
+ return self.token_limit
143
+ else:
144
+ return (self.token_limit_input or 0) + (self.token_limit_output or 0)
145
+
146
+ @computed_field
147
+ @property
148
+ def token_remaining_total(self) -> int:
149
+ if self.token_remaining:
150
+ return self.token_remaining
151
+ else:
152
+ return (self.token_remaining_input or 0) + (
153
+ self.token_remaining_output or 0
154
+ )
155
+
156
+ @override
157
+ def __repr__(self):
158
+ attrs = vars(self).copy()
159
+ attrs.pop("raw", None)
160
+ return f"{self.__class__.__name__}(\n{pformat(attrs, indent=2, sort_dicts=False)}\n)"
161
+
162
+
121
163
  class QueryResultMetadata(BaseModel):
122
164
  """
123
165
  Metadata for a query: token usage and timing.
@@ -131,6 +173,7 @@ class QueryResultMetadata(BaseModel):
131
173
  reasoning_tokens: int | None = None
132
174
  cache_read_tokens: int | None = None
133
175
  cache_write_tokens: int | None = None
176
+ extra: dict[str, Any] = {}
134
177
 
135
178
  @property
136
179
  def default_duration_seconds(self) -> float:
@@ -1,4 +1,6 @@
1
1
  import json
2
+ import re
3
+ from datetime import datetime, timedelta
2
4
  from typing import Any, Sequence, TypeVar
3
5
 
4
6
  from pydantic import BaseModel
@@ -77,3 +79,36 @@ def get_pretty_input_types(input: Sequence["InputItem"], verbose: bool = False)
77
79
 
78
80
  processed_items = [f" {process_item(item)}" for item in input]
79
81
  return "\n" + "\n".join(processed_items) if processed_items else ""
82
+
83
+
84
+ TIME_PATTERN = re.compile(r"^(\d+(?:\.\d+)?)([a-zA-Z]+)$")
85
+ UNIT_TO_SECONDS = {
86
+ "ms": 0.001,
87
+ "s": 1,
88
+ "m": 60,
89
+ "h": 3600,
90
+ }
91
+
92
+
93
+ def to_timestamp(input_str: str, server_now: datetime) -> int:
94
+ """Converts a header string into a server-relative Unix timestamp in ms."""
95
+ input_str = input_str.strip()
96
+
97
+ # ISO Timestamp (e.g. 2026-01-09T21:58:01Z)
98
+ if "T" in input_str and "-" in input_str:
99
+ try:
100
+ dt = datetime.fromisoformat(input_str.replace("Z", "+00:00"))
101
+ return int(dt.timestamp() * 1000)
102
+ except ValueError:
103
+ pass
104
+
105
+ # Duration (e.g. 10s, 6ms)
106
+ match = TIME_PATTERN.match(input_str)
107
+ if match:
108
+ value, unit = match.groups()
109
+ offset_seconds = float(value) * UNIT_TO_SECONDS.get(unit.lower(), 0)
110
+ # Add duration to the SERVER'S provided date
111
+ dt = server_now + timedelta(seconds=offset_seconds)
112
+ return int(dt.timestamp() * 1000)
113
+
114
+ raise ValueError(f"Unsupported time format: {input_str}")
@@ -1,17 +1,56 @@
1
- qwen-models:
2
- base-config:
3
- company: Alibaba
4
- open_source: false
1
+ base-config:
2
+ company: Alibaba
3
+ open_source: false
4
+ supports:
5
+ temperature: true
6
+ metadata:
7
+ available_for_everyone: false
8
+ available_as_evaluator: false
9
+ default_parameters:
10
+ temperature: 0.7
11
+ properties:
12
+ reasoning_model: false
5
13
 
14
+ qwen-3-vl-models:
15
+ base-config:
6
16
  supports:
7
- temperature: true
17
+ images: true
18
+
19
+ alibaba/qwen3-vl-plus-2025-09-23:
20
+ label: Qwen 3 VL Plus
21
+ open_source: true
22
+ description: Qwen 3 VL Plus (2025-09-23)
23
+ release_date: 2025-09-23
8
24
  metadata:
9
- available_for_everyone: false
10
- available_as_evaluator: false
11
- default_parameters:
12
- temperature: 0.7
25
+ deprecated: true
13
26
  properties:
27
+ context_window: 262_144
28
+ max_tokens: 32_768
29
+ training_cutoff: ""
14
30
  reasoning_model: false
31
+ costs_per_million_token:
32
+ input: 0.2
33
+ output: 1.6
34
+
35
+ qwen-3-max-models:
36
+ base-config:
37
+ supports:
38
+ tools: true
39
+ images: false
40
+ # only applies for <32K input tokens
41
+ # TODO: add thresholds
42
+ costs_per_million_token:
43
+ input: 1.2
44
+ output: 6.0
45
+
46
+ alibaba/qwen3-max-2026-01-23:
47
+ label: Qwen 3 Max Thinking
48
+ description: Qwen 3 Max with enhanced reasoning capabilities
49
+ release_date: 2026-01-23
50
+ properties:
51
+ context_window: 256_000
52
+ max_tokens: 32_000
53
+ reasoning_model: true
15
54
 
16
55
  alibaba/qwen3-max-preview:
17
56
  label: Qwen 3 Max Preview
@@ -20,15 +59,7 @@ qwen-models:
20
59
  properties:
21
60
  context_window: 262_144
22
61
  max_tokens: 65_536
23
- training_cutoff: ""
24
- costs_per_million_token:
25
- input: 1.2
26
- output: 6
27
- supports:
28
- images: false
29
- tools: true
30
- metadata:
31
- available_for_everyone: false
62
+ reasoning_model: true
32
63
 
33
64
  alibaba/qwen3-max-2025-09-23:
34
65
  label: Qwen 3 Max 2025-09-23
@@ -39,14 +70,6 @@ qwen-models:
39
70
  max_tokens: 65_536
40
71
  training_cutoff: ""
41
72
  reasoning_model: true
42
- costs_per_million_token:
43
- input: 1.2
44
- output: 6
45
- supports:
46
- images: false
47
- tools: true
48
- metadata:
49
- available_for_everyone: false
50
73
 
51
74
  alibaba/qwen3-max:
52
75
  label: Qwen 3 Max
@@ -57,34 +80,3 @@ qwen-models:
57
80
  max_tokens: 65_536
58
81
  training_cutoff: ""
59
82
  reasoning_model: false
60
- costs_per_million_token:
61
- input: 1.2
62
- output: 6
63
- cache:
64
- read_discount: 0.8
65
- write_markup: 1
66
- context:
67
- threshold: 32_000
68
- input: 2.4
69
- output: 12
70
- supports:
71
- images: false
72
- tools: true
73
- metadata:
74
- available_for_everyone: false
75
-
76
- alibaba/qwen3-vl-plus-2025-09-23:
77
- label: Qwen 3 VL Plus
78
- open_source: true
79
- description: Qwen 3 VL Plus (2025-09-23)
80
- release_date: 2025-09-23
81
- properties:
82
- context_window: 262_144
83
- max_tokens: 32_768
84
- training_cutoff: ""
85
- reasoning_model: false
86
- costs_per_million_token:
87
- input: 0.2
88
- output: 1.6
89
- supports:
90
- images: true