model-library 0.1.4__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. {model_library-0.1.4 → model_library-0.1.6}/Makefile +3 -1
  2. {model_library-0.1.4 → model_library-0.1.6}/PKG-INFO +1 -1
  3. {model_library-0.1.4 → model_library-0.1.6}/model_library/base/base.py +16 -12
  4. {model_library-0.1.4 → model_library-0.1.6}/model_library/base/delegate_only.py +5 -1
  5. {model_library-0.1.4 → model_library-0.1.6}/model_library/base/output.py +24 -10
  6. {model_library-0.1.4 → model_library-0.1.6}/model_library/base/utils.py +27 -5
  7. {model_library-0.1.4 → model_library-0.1.6}/model_library/config/all_models.json +143 -2
  8. {model_library-0.1.4 → model_library-0.1.6}/model_library/config/fireworks_models.yaml +2 -0
  9. {model_library-0.1.4 → model_library-0.1.6}/model_library/config/google_models.yaml +15 -0
  10. {model_library-0.1.4 → model_library-0.1.6}/model_library/config/minimax_models.yaml +18 -0
  11. {model_library-0.1.4 → model_library-0.1.6}/model_library/config/perplexity_models.yaml +2 -0
  12. {model_library-0.1.4 → model_library-0.1.6}/model_library/config/zai_models.yaml +14 -0
  13. {model_library-0.1.4 → model_library-0.1.6}/model_library/exceptions.py +1 -1
  14. {model_library-0.1.4 → model_library-0.1.6}/model_library/providers/ai21labs.py +2 -0
  15. {model_library-0.1.4 → model_library-0.1.6}/model_library/providers/amazon.py +2 -0
  16. {model_library-0.1.4 → model_library-0.1.6}/model_library/providers/anthropic.py +37 -11
  17. {model_library-0.1.4 → model_library-0.1.6}/model_library/providers/google/google.py +10 -2
  18. {model_library-0.1.4 → model_library-0.1.6}/model_library/providers/minimax.py +10 -10
  19. {model_library-0.1.4 → model_library-0.1.6}/model_library/providers/mistral.py +2 -0
  20. {model_library-0.1.4 → model_library-0.1.6}/model_library/providers/openai.py +9 -4
  21. {model_library-0.1.4 → model_library-0.1.6}/model_library/providers/vals.py +2 -0
  22. {model_library-0.1.4 → model_library-0.1.6}/model_library/providers/xai.py +6 -2
  23. {model_library-0.1.4 → model_library-0.1.6}/model_library/register_models.py +1 -1
  24. {model_library-0.1.4 → model_library-0.1.6}/model_library/registry_utils.py +60 -0
  25. {model_library-0.1.4 → model_library-0.1.6}/model_library.egg-info/PKG-INFO +1 -1
  26. {model_library-0.1.4 → model_library-0.1.6}/model_library.egg-info/SOURCES.txt +2 -0
  27. {model_library-0.1.4 → model_library-0.1.6}/tests/unit/providers/test_fireworks_provider.py +1 -0
  28. {model_library-0.1.4 → model_library-0.1.6}/tests/unit/test_deep_research.py +4 -2
  29. {model_library-0.1.4 → model_library-0.1.6}/tests/unit/test_perplexity_provider.py +3 -1
  30. {model_library-0.1.4 → model_library-0.1.6}/tests/unit/test_prompt_caching.py +4 -1
  31. model_library-0.1.6/tests/unit/test_query_logger.py +21 -0
  32. model_library-0.1.6/tests/unit/test_result_metadata.py +206 -0
  33. {model_library-0.1.4 → model_library-0.1.6}/.gitattributes +0 -0
  34. {model_library-0.1.4 → model_library-0.1.6}/.github/workflows/publish.yml +0 -0
  35. {model_library-0.1.4 → model_library-0.1.6}/.github/workflows/style.yaml +0 -0
  36. {model_library-0.1.4 → model_library-0.1.6}/.github/workflows/test.yaml +0 -0
  37. {model_library-0.1.4 → model_library-0.1.6}/.github/workflows/typecheck.yml +0 -0
  38. {model_library-0.1.4 → model_library-0.1.6}/.gitignore +0 -0
  39. {model_library-0.1.4 → model_library-0.1.6}/LICENSE +0 -0
  40. {model_library-0.1.4 → model_library-0.1.6}/README.md +0 -0
  41. {model_library-0.1.4 → model_library-0.1.6}/examples/README.md +0 -0
  42. {model_library-0.1.4 → model_library-0.1.6}/examples/advanced/batch.py +0 -0
  43. {model_library-0.1.4 → model_library-0.1.6}/examples/advanced/custom_retrier.py +0 -0
  44. {model_library-0.1.4 → model_library-0.1.6}/examples/advanced/deep_research.py +0 -0
  45. {model_library-0.1.4 → model_library-0.1.6}/examples/advanced/stress.py +0 -0
  46. {model_library-0.1.4 → model_library-0.1.6}/examples/advanced/structured_output.py +0 -0
  47. {model_library-0.1.4 → model_library-0.1.6}/examples/advanced/web_search.py +0 -0
  48. {model_library-0.1.4 → model_library-0.1.6}/examples/basics.py +0 -0
  49. {model_library-0.1.4 → model_library-0.1.6}/examples/data/files.py +0 -0
  50. {model_library-0.1.4 → model_library-0.1.6}/examples/data/images.py +0 -0
  51. {model_library-0.1.4 → model_library-0.1.6}/examples/embeddings.py +0 -0
  52. {model_library-0.1.4 → model_library-0.1.6}/examples/files.py +0 -0
  53. {model_library-0.1.4 → model_library-0.1.6}/examples/images.py +0 -0
  54. {model_library-0.1.4 → model_library-0.1.6}/examples/prompt_caching.py +0 -0
  55. {model_library-0.1.4 → model_library-0.1.6}/examples/setup.py +0 -0
  56. {model_library-0.1.4 → model_library-0.1.6}/examples/tool_calls.py +0 -0
  57. {model_library-0.1.4 → model_library-0.1.6}/model_library/__init__.py +0 -0
  58. {model_library-0.1.4 → model_library-0.1.6}/model_library/base/__init__.py +0 -0
  59. {model_library-0.1.4 → model_library-0.1.6}/model_library/base/batch.py +0 -0
  60. {model_library-0.1.4 → model_library-0.1.6}/model_library/base/input.py +0 -0
  61. {model_library-0.1.4 → model_library-0.1.6}/model_library/config/README.md +0 -0
  62. {model_library-0.1.4 → model_library-0.1.6}/model_library/config/ai21labs_models.yaml +0 -0
  63. {model_library-0.1.4 → model_library-0.1.6}/model_library/config/alibaba_models.yaml +0 -0
  64. {model_library-0.1.4 → model_library-0.1.6}/model_library/config/amazon_models.yaml +0 -0
  65. {model_library-0.1.4 → model_library-0.1.6}/model_library/config/anthropic_models.yaml +0 -0
  66. {model_library-0.1.4 → model_library-0.1.6}/model_library/config/cohere_models.yaml +0 -0
  67. {model_library-0.1.4 → model_library-0.1.6}/model_library/config/deepseek_models.yaml +0 -0
  68. {model_library-0.1.4 → model_library-0.1.6}/model_library/config/dummy_model.yaml +0 -0
  69. {model_library-0.1.4 → model_library-0.1.6}/model_library/config/inception_models.yaml +0 -0
  70. {model_library-0.1.4 → model_library-0.1.6}/model_library/config/kimi_models.yaml +0 -0
  71. {model_library-0.1.4 → model_library-0.1.6}/model_library/config/mistral_models.yaml +0 -0
  72. {model_library-0.1.4 → model_library-0.1.6}/model_library/config/openai_models.yaml +0 -0
  73. {model_library-0.1.4 → model_library-0.1.6}/model_library/config/together_models.yaml +0 -0
  74. {model_library-0.1.4 → model_library-0.1.6}/model_library/config/xai_models.yaml +0 -0
  75. {model_library-0.1.4 → model_library-0.1.6}/model_library/file_utils.py +0 -0
  76. {model_library-0.1.4 → model_library-0.1.6}/model_library/logging.py +0 -0
  77. {model_library-0.1.4 → model_library-0.1.6}/model_library/model_utils.py +0 -0
  78. {model_library-0.1.4 → model_library-0.1.6}/model_library/providers/__init__.py +0 -0
  79. {model_library-0.1.4 → model_library-0.1.6}/model_library/providers/alibaba.py +0 -0
  80. {model_library-0.1.4 → model_library-0.1.6}/model_library/providers/azure.py +0 -0
  81. {model_library-0.1.4 → model_library-0.1.6}/model_library/providers/cohere.py +0 -0
  82. {model_library-0.1.4 → model_library-0.1.6}/model_library/providers/deepseek.py +0 -0
  83. {model_library-0.1.4 → model_library-0.1.6}/model_library/providers/fireworks.py +0 -0
  84. {model_library-0.1.4 → model_library-0.1.6}/model_library/providers/google/__init__.py +0 -0
  85. {model_library-0.1.4 → model_library-0.1.6}/model_library/providers/google/batch.py +0 -0
  86. {model_library-0.1.4 → model_library-0.1.6}/model_library/providers/inception.py +0 -0
  87. {model_library-0.1.4 → model_library-0.1.6}/model_library/providers/kimi.py +0 -0
  88. {model_library-0.1.4 → model_library-0.1.6}/model_library/providers/perplexity.py +0 -0
  89. {model_library-0.1.4 → model_library-0.1.6}/model_library/providers/together.py +0 -0
  90. {model_library-0.1.4 → model_library-0.1.6}/model_library/providers/zai.py +0 -0
  91. {model_library-0.1.4 → model_library-0.1.6}/model_library/py.typed +0 -0
  92. {model_library-0.1.4 → model_library-0.1.6}/model_library/settings.py +0 -0
  93. {model_library-0.1.4 → model_library-0.1.6}/model_library/utils.py +0 -0
  94. {model_library-0.1.4 → model_library-0.1.6}/model_library.egg-info/dependency_links.txt +0 -0
  95. {model_library-0.1.4 → model_library-0.1.6}/model_library.egg-info/requires.txt +0 -0
  96. {model_library-0.1.4 → model_library-0.1.6}/model_library.egg-info/top_level.txt +0 -0
  97. {model_library-0.1.4 → model_library-0.1.6}/pyproject.toml +0 -0
  98. {model_library-0.1.4 → model_library-0.1.6}/scripts/browse_models.py +0 -0
  99. {model_library-0.1.4 → model_library-0.1.6}/scripts/config.py +0 -0
  100. {model_library-0.1.4 → model_library-0.1.6}/scripts/publish.py +0 -0
  101. {model_library-0.1.4 → model_library-0.1.6}/scripts/run_models.py +0 -0
  102. {model_library-0.1.4 → model_library-0.1.6}/setup.cfg +0 -0
  103. {model_library-0.1.4 → model_library-0.1.6}/tests/README.md +0 -0
  104. {model_library-0.1.4 → model_library-0.1.6}/tests/__init__.py +0 -0
  105. {model_library-0.1.4 → model_library-0.1.6}/tests/conftest.py +0 -0
  106. {model_library-0.1.4 → model_library-0.1.6}/tests/integration/__init__.py +0 -0
  107. {model_library-0.1.4 → model_library-0.1.6}/tests/integration/conftest.py +0 -0
  108. {model_library-0.1.4 → model_library-0.1.6}/tests/integration/test_batch.py +0 -0
  109. {model_library-0.1.4 → model_library-0.1.6}/tests/integration/test_completion.py +0 -0
  110. {model_library-0.1.4 → model_library-0.1.6}/tests/integration/test_files.py +0 -0
  111. {model_library-0.1.4 → model_library-0.1.6}/tests/integration/test_reasoning.py +0 -0
  112. {model_library-0.1.4 → model_library-0.1.6}/tests/integration/test_retry.py +0 -0
  113. {model_library-0.1.4 → model_library-0.1.6}/tests/integration/test_streaming.py +0 -0
  114. {model_library-0.1.4 → model_library-0.1.6}/tests/integration/test_structured_output.py +0 -0
  115. {model_library-0.1.4 → model_library-0.1.6}/tests/integration/test_tools.py +0 -0
  116. {model_library-0.1.4 → model_library-0.1.6}/tests/test_helpers.py +0 -0
  117. {model_library-0.1.4 → model_library-0.1.6}/tests/unit/__init__.py +0 -0
  118. {model_library-0.1.4 → model_library-0.1.6}/tests/unit/conftest.py +0 -0
  119. {model_library-0.1.4 → model_library-0.1.6}/tests/unit/providers/__init__.py +0 -0
  120. {model_library-0.1.4 → model_library-0.1.6}/tests/unit/providers/test_google_provider.py +0 -0
  121. {model_library-0.1.4 → model_library-0.1.6}/tests/unit/test_batch.py +0 -0
  122. {model_library-0.1.4 → model_library-0.1.6}/tests/unit/test_context_window.py +0 -0
  123. {model_library-0.1.4 → model_library-0.1.6}/tests/unit/test_registry.py +0 -0
  124. {model_library-0.1.4 → model_library-0.1.6}/tests/unit/test_retry.py +0 -0
  125. {model_library-0.1.4 → model_library-0.1.6}/tests/unit/test_streaming.py +0 -0
  126. {model_library-0.1.4 → model_library-0.1.6}/tests/unit/test_tools.py +0 -0
  127. {model_library-0.1.4 → model_library-0.1.6}/uv.lock +0 -0
@@ -16,8 +16,10 @@ help:
16
16
  @echo " make examples <model> Run all examples with specified model"
17
17
  @echo " make browse_models Interactively browse models and their configurations"
18
18
 
19
+ PYTHON_VERSION ?= 3.11
20
+
19
21
  install:
20
- uv venv
22
+ uv venv --python $(PYTHON_VERSION)
21
23
  uv sync --dev
22
24
  @echo "🎉 Done! Run 'source .venv/bin/activate' to activate the environment locally."
23
25
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: model-library
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: Model Library for vals.ai
5
5
  Author-email: "Vals AI, Inc." <contact@vals.ai>
6
6
  License: MIT
@@ -6,7 +6,6 @@ from abc import ABC, abstractmethod
6
6
  from collections.abc import Awaitable
7
7
  from pprint import pformat
8
8
  from typing import (
9
- TYPE_CHECKING,
10
9
  Any,
11
10
  Callable,
12
11
  Literal,
@@ -43,9 +42,6 @@ from model_library.exceptions import (
43
42
  )
44
43
  from model_library.utils import truncate_str
45
44
 
46
- if TYPE_CHECKING:
47
- from model_library.providers.openai import OpenAIModel
48
-
49
45
  PydanticT = TypeVar("PydanticT", bound=BaseModel)
50
46
 
51
47
 
@@ -66,7 +62,7 @@ class LLMConfig(BaseModel):
66
62
  top_p: float | None = None
67
63
  top_k: int | None = None
68
64
  reasoning: bool = False
69
- reasoning_effort: str | None = None
65
+ reasoning_effort: str | bool | None = None
70
66
  supports_images: bool = False
71
67
  supports_files: bool = False
72
68
  supports_videos: bool = False
@@ -110,7 +106,7 @@ class LLM(ABC):
110
106
  self.top_k: int | None = config.top_k
111
107
 
112
108
  self.reasoning: bool = config.reasoning
113
- self.reasoning_effort: str | None = config.reasoning_effort
109
+ self.reasoning_effort: str | bool | None = config.reasoning_effort
114
110
 
115
111
  self.supports_files: bool = config.supports_files
116
112
  self.supports_videos: bool = config.supports_videos
@@ -120,7 +116,7 @@ class LLM(ABC):
120
116
  self.supports_tools: bool = config.supports_tools
121
117
 
122
118
  self.native: bool = config.native
123
- self.delegate: "OpenAIModel | None" = None
119
+ self.delegate: "LLM | None" = None
124
120
  self.batch: LLMBatchMixin | None = None
125
121
 
126
122
  if config.provider_config:
@@ -198,11 +194,14 @@ class LLM(ABC):
198
194
  input: Sequence[InputItem],
199
195
  *,
200
196
  tools: list[ToolDefinition] = [],
197
+ query_logger: logging.Logger,
201
198
  **kwargs: object,
202
199
  ) -> QueryResult:
203
200
  if not self.delegate:
204
201
  raise Exception("Delegate not set")
205
- return await self.delegate._query_impl(input, tools=tools, **kwargs) # pyright: ignore[reportPrivateUsage]
202
+ return await self.delegate._query_impl( # pyright: ignore[reportPrivateUsage]
203
+ input, tools=tools, query_logger=query_logger, **kwargs
204
+ )
206
205
 
207
206
  async def query(
208
207
  self,
@@ -213,6 +212,7 @@ class LLM(ABC):
213
212
  # for backwards compatibility
214
213
  files: list[FileInput] = [],
215
214
  images: list[FileInput] = [],
215
+ query_logger: logging.Logger | None = None,
216
216
  **kwargs: object,
217
217
  ) -> QueryResult:
218
218
  """
@@ -256,15 +256,18 @@ class LLM(ABC):
256
256
  input = [*history, *input]
257
257
 
258
258
  # unique logger for the query
259
- query_id = uuid.uuid4().hex[:14]
260
- query_logger = self.logger.getChild(f"query={query_id}")
259
+ if not query_logger:
260
+ query_id = uuid.uuid4().hex[:14]
261
+ query_logger = self.logger.getChild(f"query={query_id}")
261
262
 
262
263
  query_logger.info(
263
264
  "Query started:\n" + item_info + tool_info + f"--- kwargs: {short_kwargs}\n"
264
265
  )
265
266
 
266
267
  async def query_func() -> QueryResult:
267
- return await self._query_impl(input, tools=tools, **kwargs)
268
+ return await self._query_impl(
269
+ input, tools=tools, query_logger=query_logger, **kwargs
270
+ )
268
271
 
269
272
  async def timed_query() -> tuple[QueryResult, float]:
270
273
  return await LLM.timer_wrapper(query_func)
@@ -361,7 +364,8 @@ class LLM(ABC):
361
364
  input: Sequence[InputItem],
362
365
  *,
363
366
  tools: list[ToolDefinition],
364
- **kwargs: object, # TODO: pass in query logger
367
+ query_logger: logging.Logger,
368
+ **kwargs: object,
365
369
  ) -> QueryResult:
366
370
  """
367
371
  Query the model with input
@@ -1,4 +1,5 @@
1
1
  import io
2
+ import logging
2
3
  from typing import Any, Literal, Sequence
3
4
 
4
5
  from typing_extensions import override
@@ -48,11 +49,14 @@ class DelegateOnly(LLM):
48
49
  input: Sequence[InputItem],
49
50
  *,
50
51
  tools: list[ToolDefinition],
52
+ query_logger: logging.Logger,
51
53
  **kwargs: object,
52
54
  ) -> QueryResult:
53
55
  assert self.delegate
54
56
 
55
- return await self.delegate_query(input, tools=tools, **kwargs)
57
+ return await self.delegate_query(
58
+ input, tools=tools, query_logger=query_logger, **kwargs
59
+ )
56
60
 
57
61
  @override
58
62
  async def parse_input(
@@ -9,9 +9,7 @@ from pydantic import BaseModel, Field, computed_field, field_validator
9
9
  from typing_extensions import override
10
10
 
11
11
  from model_library.base.input import InputItem, ToolCall
12
- from model_library.base.utils import (
13
- sum_optional,
14
- )
12
+ from model_library.base.utils import add_optional
15
13
  from model_library.utils import truncate_str
16
14
 
17
15
 
@@ -42,10 +40,14 @@ class QueryResultCost(BaseModel):
42
40
  reasoning: float | None = None
43
41
  cache_read: float | None = None
44
42
  cache_write: float | None = None
43
+ total_override: float | None = None
45
44
 
46
45
  @computed_field
47
46
  @property
48
47
  def total(self) -> float:
48
+ if self.total_override is not None:
49
+ return self.total_override
50
+
49
51
  return sum(
50
52
  filter(
51
53
  None,
@@ -86,6 +88,16 @@ class QueryResultCost(BaseModel):
86
88
  )
87
89
  )
88
90
 
91
+ def __add__(self, other: "QueryResultCost") -> "QueryResultCost":
92
+ return QueryResultCost(
93
+ input=self.input + other.input,
94
+ output=self.output + other.output,
95
+ reasoning=add_optional(self.reasoning, other.reasoning),
96
+ cache_read=add_optional(self.cache_read, other.cache_read),
97
+ cache_write=add_optional(self.cache_write, other.cache_write),
98
+ total_override=add_optional(self.total_override, other.total_override),
99
+ )
100
+
89
101
  @override
90
102
  def __repr__(self):
91
103
  use_cents = self.total < 1
@@ -150,18 +162,20 @@ class QueryResultMetadata(BaseModel):
150
162
  return QueryResultMetadata(
151
163
  in_tokens=self.in_tokens + other.in_tokens,
152
164
  out_tokens=self.out_tokens + other.out_tokens,
153
- reasoning_tokens=sum_optional(
154
- self.reasoning_tokens, other.reasoning_tokens
165
+ reasoning_tokens=cast(
166
+ int | None, add_optional(self.reasoning_tokens, other.reasoning_tokens)
155
167
  ),
156
- cache_read_tokens=sum_optional(
157
- self.cache_read_tokens, other.cache_read_tokens
168
+ cache_read_tokens=cast(
169
+ int | None,
170
+ add_optional(self.cache_read_tokens, other.cache_read_tokens),
158
171
  ),
159
- cache_write_tokens=sum_optional(
160
- self.cache_write_tokens, other.cache_write_tokens
172
+ cache_write_tokens=cast(
173
+ int | None,
174
+ add_optional(self.cache_write_tokens, other.cache_write_tokens),
161
175
  ),
162
176
  duration_seconds=self.default_duration_seconds
163
177
  + other.default_duration_seconds,
164
- cost=self.cost,
178
+ cost=cast(QueryResultCost | None, add_optional(self.cost, other.cost)),
165
179
  )
166
180
 
167
181
  @override
@@ -1,4 +1,4 @@
1
- from typing import Sequence, cast
1
+ from typing import Sequence, TypeVar, cast
2
2
 
3
3
  from model_library.base.input import (
4
4
  FileBase,
@@ -8,17 +8,39 @@ from model_library.base.input import (
8
8
  ToolResult,
9
9
  )
10
10
  from model_library.utils import truncate_str
11
+ from pydantic import BaseModel
11
12
 
13
+ T = TypeVar("T", bound=BaseModel)
12
14
 
13
- def sum_optional(a: int | None, b: int | None) -> int | None:
14
- """Sum two optional integers, returning None if both are None.
15
+
16
+ def add_optional(
17
+ a: int | float | T | None, b: int | float | T | None
18
+ ) -> int | float | T | None:
19
+ """Add two optional objects, returning None if both are None.
15
20
 
16
21
  Preserves None to indicate "unknown/not provided" when both inputs are None,
17
- otherwise treats None as 0 for summation.
22
+ otherwise returns the non-None value or their sum.
18
23
  """
19
24
  if a is None and b is None:
20
25
  return None
21
- return (a or 0) + (b or 0)
26
+
27
+ if a is None or b is None:
28
+ return a or b
29
+
30
+ if isinstance(a, (int, float)) and isinstance(b, (int, float)):
31
+ return a + b
32
+
33
+ # NOTE: Ensure that the subtypes are the same so we can use the __add__ method just from one
34
+ if type(a) is type(b):
35
+ add_method = getattr(a, "__add__", None)
36
+ if add_method is not None:
37
+ return add_method(b)
38
+ else:
39
+ raise ValueError(
40
+ f"Cannot add {type(a)} and {type(b)} because they are not the same subclass"
41
+ )
42
+
43
+ return None
22
44
 
23
45
 
24
46
  def get_pretty_input_types(input: Sequence["InputItem"], verbose: bool = False) -> str:
@@ -1,4 +1,144 @@
1
1
  {
2
+ "minimax/MiniMax-M2.1": {
3
+ "company": "MiniMax",
4
+ "label": "MiniMax-M2.1",
5
+ "description": null,
6
+ "release_date": "2025-12-23",
7
+ "open_source": true,
8
+ "documentation_url": "https://platform.minimax.io/docs",
9
+ "properties": {
10
+ "context_window": 204800,
11
+ "max_tokens": 131000,
12
+ "training_cutoff": null,
13
+ "reasoning_model": true
14
+ },
15
+ "supports": {
16
+ "images": false,
17
+ "files": false,
18
+ "temperature": true,
19
+ "tools": true
20
+ },
21
+ "metadata": {
22
+ "deprecated": false,
23
+ "available_for_everyone": true,
24
+ "available_as_evaluator": false,
25
+ "ignored_for_cost": false
26
+ },
27
+ "provider_properties": {},
28
+ "costs_per_million_token": {
29
+ "input": 0.3,
30
+ "output": 1.2,
31
+ "cache": {
32
+ "read": 0.03,
33
+ "write": 0.375,
34
+ "write_markup": 1.0
35
+ }
36
+ },
37
+ "alternative_keys": [],
38
+ "default_parameters": {
39
+ "temperature": 1.0,
40
+ "top_p": 0.95
41
+ },
42
+ "provider_endpoint": "MiniMax-M2.1",
43
+ "provider_name": "minimax",
44
+ "full_key": "minimax/MiniMax-M2.1",
45
+ "slug": "minimax_MiniMax-M2.1"
46
+ },
47
+ "zai/glm-4.7": {
48
+ "company": "zAI",
49
+ "label": "GLM 4.7",
50
+ "description": "Latest model from ZAI",
51
+ "release_date": "2025-12-22",
52
+ "open_source": true,
53
+ "documentation_url": "https://docs.z.ai/",
54
+ "properties": {
55
+ "context_window": 200000,
56
+ "max_tokens": 128000,
57
+ "training_cutoff": null,
58
+ "reasoning_model": true
59
+ },
60
+ "supports": {
61
+ "images": false,
62
+ "files": false,
63
+ "temperature": true,
64
+ "tools": true
65
+ },
66
+ "metadata": {
67
+ "deprecated": false,
68
+ "available_for_everyone": true,
69
+ "available_as_evaluator": false,
70
+ "ignored_for_cost": false
71
+ },
72
+ "provider_properties": {},
73
+ "costs_per_million_token": {
74
+ "input": 0.6,
75
+ "output": 2.2,
76
+ "cache": {
77
+ "read": 0.11,
78
+ "read_discount": 1.0,
79
+ "write_markup": 1.0
80
+ }
81
+ },
82
+ "alternative_keys": [],
83
+ "default_parameters": {
84
+ "temperature": 1.0,
85
+ "top_p": 1.0
86
+ },
87
+ "provider_endpoint": "glm-4.7",
88
+ "provider_name": "zai",
89
+ "full_key": "zai/glm-4.7",
90
+ "slug": "zai_glm-4.7"
91
+ },
92
+ "google/gemini-3-flash-preview": {
93
+ "company": "Google",
94
+ "label": "Gemini 3 Flash (12/25)",
95
+ "description": "Google's newest budget workhorse model",
96
+ "release_date": "2025-12-17",
97
+ "open_source": false,
98
+ "documentation_url": "https://ai.google.dev/gemini-api/docs/models",
99
+ "properties": {
100
+ "context_window": 1048576,
101
+ "max_tokens": 65536,
102
+ "training_cutoff": null,
103
+ "reasoning_model": true
104
+ },
105
+ "supports": {
106
+ "images": true,
107
+ "videos": true,
108
+ "files": true,
109
+ "batch": true,
110
+ "temperature": true,
111
+ "tools": true
112
+ },
113
+ "metadata": {
114
+ "deprecated": false,
115
+ "available_for_everyone": true,
116
+ "available_as_evaluator": false,
117
+ "ignored_for_cost": false
118
+ },
119
+ "provider_properties": {},
120
+ "costs_per_million_token": {
121
+ "input": 0.5,
122
+ "output": 3.0,
123
+ "cache": {
124
+ "read_discount": 0.1,
125
+ "write_markup": 1.0
126
+ },
127
+ "batch": {
128
+ "input_discount": 0.5,
129
+ "output_discount": 0.5
130
+ }
131
+ },
132
+ "alternative_keys": [],
133
+ "default_parameters": {
134
+ "temperature": 1.0,
135
+ "reasoning_effort": "high"
136
+ },
137
+ "provider_endpoint": "gemini-3-flash-preview",
138
+ "provider_name": "google",
139
+ "full_key": "google/gemini-3-flash-preview",
140
+ "slug": "google_gemini-3-flash-preview"
141
+ },
2
142
  "openai/gpt-5.2-pro-2025-12-11": {
3
143
  "company": "OpenAI",
4
144
  "label": "GPT 5.2 Pro",
@@ -454,7 +594,8 @@
454
594
  }
455
595
  ],
456
596
  "default_parameters": {
457
- "temperature": 1.0
597
+ "temperature": 1.0,
598
+ "reasoning_effort": "none"
458
599
  },
459
600
  "provider_endpoint": "deepseek-v3p2",
460
601
  "provider_name": "fireworks",
@@ -15428,7 +15569,7 @@
15428
15569
  "tools": false
15429
15570
  },
15430
15571
  "metadata": {
15431
- "deprecated": false,
15572
+ "deprecated": true,
15432
15573
  "available_for_everyone": true,
15433
15574
  "available_as_evaluator": false,
15434
15575
  "ignored_for_cost": false
@@ -150,6 +150,8 @@ deepseek-models:
150
150
  context_window: 160_000
151
151
  max_tokens: 20_480
152
152
  reasoning_model: false
153
+ default_parameters:
154
+ reasoning_effort: "none"
153
155
  costs_per_million_token:
154
156
  input: 0.56
155
157
  output: 1.68
@@ -54,6 +54,21 @@ gemini-3-models:
54
54
  temperature: 1
55
55
  reasoning_effort: "high"
56
56
 
57
+ google/gemini-3-flash-preview:
58
+ label: Gemini 3 Flash (12/25)
59
+ description: Google's newest budget workhorse model
60
+ release_date: 2025-12-17
61
+ properties:
62
+ context_window: 1048576
63
+ max_tokens: 65536
64
+ reasoning_model: true
65
+ costs_per_million_token:
66
+ input: 0.50
67
+ output: 3.00
68
+ default_parameters:
69
+ temperature: 1
70
+ reasoning_effort: "high"
71
+
57
72
  google/gemini-3-pro-preview:
58
73
  label: Gemini 3 Pro (11/25)
59
74
  description: Gemini 3 Pro, Google's most powerful model.
@@ -16,6 +16,24 @@ base-config:
16
16
 
17
17
  minimax-m2-models:
18
18
 
19
+ minimax/MiniMax-M2.1:
20
+ label: MiniMax-M2.1
21
+ release_date: 2025-12-23
22
+ properties:
23
+ context_window: 204_800
24
+ max_tokens: 131_000
25
+ reasoning_model: true
26
+ training_cutoff: null
27
+ default_parameters:
28
+ temperature: 1.0
29
+ top_p: 0.95
30
+ costs_per_million_token:
31
+ input: 0.30
32
+ output: 1.20
33
+ cache:
34
+ read: 0.03
35
+ write: 0.375
36
+
19
37
  minimax/MiniMax-M2:
20
38
  label: MiniMax-M2
21
39
  description: MiniMax-M2 is a cost-efficient open-source model optimized for agentic applications and coding in particular.
@@ -46,6 +46,8 @@ perplexity-models:
46
46
  label: Sonar Reasoning
47
47
  description: Reasoning-focused search model that exposes intermediate thinking for step-by-step answers.
48
48
  documentation_url: https://docs.perplexity.ai/models/models/sonar-reasoning
49
+ metadata:
50
+ deprecated: true
49
51
  properties:
50
52
  context_window: 128000
51
53
  reasoning_model: true
@@ -18,6 +18,20 @@ base-config:
18
18
  write_markup: 1
19
19
 
20
20
  zai-models:
21
+ zai/glm-4.7:
22
+ label: GLM 4.7
23
+ description: "Latest model from ZAI"
24
+ release_date: 2025-12-22
25
+ properties:
26
+ context_window: 200_000
27
+ max_tokens: 128_000
28
+ costs_per_million_token:
29
+ input: 0.6
30
+ output: 2.2
31
+ cache:
32
+ read: 0.11
33
+ default_parameters:
34
+ temperature: 1
21
35
  zai/glm-4.5:
22
36
  label: GLM 4.5
23
37
  description: "z.AI old model"
@@ -183,8 +183,8 @@ RETRIABLE_EXCEPTION_CODES = [
183
183
  "server_error",
184
184
  "overloaded",
185
185
  "throttling", # AWS throttling errors
186
- "throttlingexception", # AWS throttling errors
187
186
  "internal server error",
187
+ "InternalServerError",
188
188
  ]
189
189
 
190
190
 
@@ -1,4 +1,5 @@
1
1
  import io
2
+ import logging
2
3
  from typing import Any, Literal, Sequence
3
4
 
4
5
  from ai21 import AsyncAI21Client
@@ -137,6 +138,7 @@ class AI21LabsModel(LLM):
137
138
  input: Sequence[InputItem],
138
139
  *,
139
140
  tools: list[ToolDefinition],
141
+ query_logger: logging.Logger,
140
142
  **kwargs: object,
141
143
  ) -> QueryResult:
142
144
  messages: list[ChatMessage] = []
@@ -3,6 +3,7 @@ import asyncio
3
3
  import base64
4
4
  import io
5
5
  import json
6
+ import logging
6
7
  from typing import Any, Literal, Sequence, cast
7
8
 
8
9
  import boto3
@@ -337,6 +338,7 @@ class AmazonModel(LLM):
337
338
  input: Sequence[InputItem],
338
339
  *,
339
340
  tools: list[ToolDefinition],
341
+ query_logger: logging.Logger,
340
342
  **kwargs: object,
341
343
  ) -> QueryResult:
342
344
  body = await self.build_body(input, tools=tools, **kwargs)
@@ -1,4 +1,5 @@
1
1
  import io
2
+ import logging
2
3
  from typing import Any, Literal, Sequence, cast
3
4
 
4
5
  from anthropic import AsyncAnthropic
@@ -249,6 +250,8 @@ class AnthropicModel(LLM):
249
250
 
250
251
  @override
251
252
  def get_client(self) -> AsyncAnthropic:
253
+ if self._delegate_client:
254
+ return self._delegate_client
252
255
  if not AnthropicModel._client:
253
256
  headers: dict[str, str] = {}
254
257
  AnthropicModel._client = AsyncAnthropic(
@@ -262,16 +265,20 @@ class AnthropicModel(LLM):
262
265
  def __init__(
263
266
  self,
264
267
  model_name: str,
265
- provider: Literal["anthropic"] = "anthropic",
268
+ provider: str = "anthropic",
266
269
  *,
267
270
  config: LLMConfig | None = None,
271
+ custom_client: AsyncAnthropic | None = None,
268
272
  ):
269
273
  super().__init__(model_name, provider, config=config)
270
274
 
275
+ # allow custom client to act as delegate (native)
276
+ self._delegate_client: AsyncAnthropic | None = custom_client
277
+
271
278
  # https://docs.anthropic.com/en/api/openai-sdk
272
- self.delegate: OpenAIModel | None = (
279
+ self.delegate = (
273
280
  None
274
- if self.native
281
+ if self.native or custom_client
275
282
  else OpenAIModel(
276
283
  model_name=self.model_name,
277
284
  provider=provider,
@@ -285,7 +292,10 @@ class AnthropicModel(LLM):
285
292
  )
286
293
 
287
294
  # Initialize batch support if enabled
288
- self.supports_batch: bool = self.supports_batch and self.native
295
+ # Disable batch when using custom_client (similar to OpenAI)
296
+ self.supports_batch: bool = (
297
+ self.supports_batch and self.native and not custom_client
298
+ )
289
299
  self.batch: LLMBatchMixin | None = (
290
300
  AnthropicBatchMixin(self) if self.supports_batch else None
291
301
  )
@@ -555,20 +565,36 @@ class AnthropicModel(LLM):
555
565
  input: Sequence[InputItem],
556
566
  *,
557
567
  tools: list[ToolDefinition],
568
+ query_logger: logging.Logger,
558
569
  **kwargs: object,
559
570
  ) -> QueryResult:
560
571
  if self.delegate:
561
- return await self.delegate_query(input, tools=tools, **kwargs)
572
+ return await self.delegate_query(
573
+ input, tools=tools, query_logger=query_logger, **kwargs
574
+ )
562
575
 
563
576
  body = await self.create_body(input, tools=tools, **kwargs)
564
577
 
565
- betas = ["files-api-2025-04-14", "interleaved-thinking-2025-05-14"]
566
- if "sonnet-4-5" in self.model_name:
567
- betas.append("context-1m-2025-08-07")
578
+ client = self.get_client()
568
579
 
569
- async with self.get_client().beta.messages.stream(
570
- **body,
571
- betas=betas,
580
+ # only send betas for the official Anthropic endpoint
581
+ is_anthropic_endpoint = self._delegate_client is None
582
+ if not is_anthropic_endpoint:
583
+ client_base_url = getattr(client, "_base_url", None) or getattr(
584
+ client, "base_url", None
585
+ )
586
+ if client_base_url:
587
+ is_anthropic_endpoint = "api.anthropic.com" in str(client_base_url)
588
+
589
+ stream_kwargs = {**body}
590
+ if is_anthropic_endpoint:
591
+ betas = ["files-api-2025-04-14", "interleaved-thinking-2025-05-14"]
592
+ if "sonnet-4-5" in self.model_name:
593
+ betas.append("context-1m-2025-08-07")
594
+ stream_kwargs["betas"] = betas
595
+
596
+ async with client.beta.messages.stream(
597
+ **stream_kwargs,
572
598
  ) as stream: # pyright: ignore[reportAny]
573
599
  message = await stream.get_final_message()
574
600
  self.logger.info(f"Anthropic Response finished: {message.id}")