model-library 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,6 @@ from abc import ABC, abstractmethod
6
6
  from collections.abc import Awaitable
7
7
  from pprint import pformat
8
8
  from typing import (
9
- TYPE_CHECKING,
10
9
  Any,
11
10
  Callable,
12
11
  Literal,
@@ -43,9 +42,6 @@ from model_library.exceptions import (
43
42
  )
44
43
  from model_library.utils import truncate_str
45
44
 
46
- if TYPE_CHECKING:
47
- from model_library.providers.openai import OpenAIModel
48
-
49
45
  PydanticT = TypeVar("PydanticT", bound=BaseModel)
50
46
 
51
47
 
@@ -66,7 +62,7 @@ class LLMConfig(BaseModel):
66
62
  top_p: float | None = None
67
63
  top_k: int | None = None
68
64
  reasoning: bool = False
69
- reasoning_effort: str | None = None
65
+ reasoning_effort: str | bool | None = None
70
66
  supports_images: bool = False
71
67
  supports_files: bool = False
72
68
  supports_videos: bool = False
@@ -110,7 +106,7 @@ class LLM(ABC):
110
106
  self.top_k: int | None = config.top_k
111
107
 
112
108
  self.reasoning: bool = config.reasoning
113
- self.reasoning_effort: str | None = config.reasoning_effort
109
+ self.reasoning_effort: str | bool | None = config.reasoning_effort
114
110
 
115
111
  self.supports_files: bool = config.supports_files
116
112
  self.supports_videos: bool = config.supports_videos
@@ -120,7 +116,7 @@ class LLM(ABC):
120
116
  self.supports_tools: bool = config.supports_tools
121
117
 
122
118
  self.native: bool = config.native
123
- self.delegate: "OpenAIModel | None" = None
119
+ self.delegate: "LLM | None" = None
124
120
  self.batch: LLMBatchMixin | None = None
125
121
 
126
122
  if config.provider_config:
@@ -198,11 +194,14 @@ class LLM(ABC):
198
194
  input: Sequence[InputItem],
199
195
  *,
200
196
  tools: list[ToolDefinition] = [],
197
+ query_logger: logging.Logger,
201
198
  **kwargs: object,
202
199
  ) -> QueryResult:
203
200
  if not self.delegate:
204
201
  raise Exception("Delegate not set")
205
- return await self.delegate._query_impl(input, tools=tools, **kwargs) # pyright: ignore[reportPrivateUsage]
202
+ return await self.delegate._query_impl( # pyright: ignore[reportPrivateUsage]
203
+ input, tools=tools, query_logger=query_logger, **kwargs
204
+ )
206
205
 
207
206
  async def query(
208
207
  self,
@@ -213,6 +212,7 @@ class LLM(ABC):
213
212
  # for backwards compatibility
214
213
  files: list[FileInput] = [],
215
214
  images: list[FileInput] = [],
215
+ query_logger: logging.Logger | None = None,
216
216
  **kwargs: object,
217
217
  ) -> QueryResult:
218
218
  """
@@ -256,15 +256,18 @@ class LLM(ABC):
256
256
  input = [*history, *input]
257
257
 
258
258
  # unique logger for the query
259
- query_id = uuid.uuid4().hex[:14]
260
- query_logger = self.logger.getChild(f"query={query_id}")
259
+ if not query_logger:
260
+ query_id = uuid.uuid4().hex[:14]
261
+ query_logger = self.logger.getChild(f"query={query_id}")
261
262
 
262
263
  query_logger.info(
263
264
  "Query started:\n" + item_info + tool_info + f"--- kwargs: {short_kwargs}\n"
264
265
  )
265
266
 
266
267
  async def query_func() -> QueryResult:
267
- return await self._query_impl(input, tools=tools, **kwargs)
268
+ return await self._query_impl(
269
+ input, tools=tools, query_logger=query_logger, **kwargs
270
+ )
268
271
 
269
272
  async def timed_query() -> tuple[QueryResult, float]:
270
273
  return await LLM.timer_wrapper(query_func)
@@ -361,7 +364,8 @@ class LLM(ABC):
361
364
  input: Sequence[InputItem],
362
365
  *,
363
366
  tools: list[ToolDefinition],
364
- **kwargs: object, # TODO: pass in query logger
367
+ query_logger: logging.Logger,
368
+ **kwargs: object,
365
369
  ) -> QueryResult:
366
370
  """
367
371
  Query the model with input
@@ -1,4 +1,5 @@
1
1
  import io
2
+ import logging
2
3
  from typing import Any, Literal, Sequence
3
4
 
4
5
  from typing_extensions import override
@@ -48,11 +49,14 @@ class DelegateOnly(LLM):
48
49
  input: Sequence[InputItem],
49
50
  *,
50
51
  tools: list[ToolDefinition],
52
+ query_logger: logging.Logger,
51
53
  **kwargs: object,
52
54
  ) -> QueryResult:
53
55
  assert self.delegate
54
56
 
55
- return await self.delegate_query(input, tools=tools, **kwargs)
57
+ return await self.delegate_query(
58
+ input, tools=tools, query_logger=query_logger, **kwargs
59
+ )
56
60
 
57
61
  @override
58
62
  async def parse_input(
@@ -9,9 +9,7 @@ from pydantic import BaseModel, Field, computed_field, field_validator
9
9
  from typing_extensions import override
10
10
 
11
11
  from model_library.base.input import InputItem, ToolCall
12
- from model_library.base.utils import (
13
- sum_optional,
14
- )
12
+ from model_library.base.utils import add_optional
15
13
  from model_library.utils import truncate_str
16
14
 
17
15
 
@@ -42,10 +40,14 @@ class QueryResultCost(BaseModel):
42
40
  reasoning: float | None = None
43
41
  cache_read: float | None = None
44
42
  cache_write: float | None = None
43
+ total_override: float | None = None
45
44
 
46
45
  @computed_field
47
46
  @property
48
47
  def total(self) -> float:
48
+ if self.total_override is not None:
49
+ return self.total_override
50
+
49
51
  return sum(
50
52
  filter(
51
53
  None,
@@ -86,6 +88,16 @@ class QueryResultCost(BaseModel):
86
88
  )
87
89
  )
88
90
 
91
+ def __add__(self, other: "QueryResultCost") -> "QueryResultCost":
92
+ return QueryResultCost(
93
+ input=self.input + other.input,
94
+ output=self.output + other.output,
95
+ reasoning=add_optional(self.reasoning, other.reasoning),
96
+ cache_read=add_optional(self.cache_read, other.cache_read),
97
+ cache_write=add_optional(self.cache_write, other.cache_write),
98
+ total_override=add_optional(self.total_override, other.total_override),
99
+ )
100
+
89
101
  @override
90
102
  def __repr__(self):
91
103
  use_cents = self.total < 1
@@ -150,18 +162,20 @@ class QueryResultMetadata(BaseModel):
150
162
  return QueryResultMetadata(
151
163
  in_tokens=self.in_tokens + other.in_tokens,
152
164
  out_tokens=self.out_tokens + other.out_tokens,
153
- reasoning_tokens=sum_optional(
154
- self.reasoning_tokens, other.reasoning_tokens
165
+ reasoning_tokens=cast(
166
+ int | None, add_optional(self.reasoning_tokens, other.reasoning_tokens)
155
167
  ),
156
- cache_read_tokens=sum_optional(
157
- self.cache_read_tokens, other.cache_read_tokens
168
+ cache_read_tokens=cast(
169
+ int | None,
170
+ add_optional(self.cache_read_tokens, other.cache_read_tokens),
158
171
  ),
159
- cache_write_tokens=sum_optional(
160
- self.cache_write_tokens, other.cache_write_tokens
172
+ cache_write_tokens=cast(
173
+ int | None,
174
+ add_optional(self.cache_write_tokens, other.cache_write_tokens),
161
175
  ),
162
176
  duration_seconds=self.default_duration_seconds
163
177
  + other.default_duration_seconds,
164
- cost=self.cost,
178
+ cost=cast(QueryResultCost | None, add_optional(self.cost, other.cost)),
165
179
  )
166
180
 
167
181
  @override
@@ -1,4 +1,4 @@
1
- from typing import Sequence, cast
1
+ from typing import Sequence, TypeVar, cast
2
2
 
3
3
  from model_library.base.input import (
4
4
  FileBase,
@@ -8,17 +8,39 @@ from model_library.base.input import (
8
8
  ToolResult,
9
9
  )
10
10
  from model_library.utils import truncate_str
11
+ from pydantic import BaseModel
11
12
 
13
+ T = TypeVar("T", bound=BaseModel)
12
14
 
13
- def sum_optional(a: int | None, b: int | None) -> int | None:
14
- """Sum two optional integers, returning None if both are None.
15
+
16
+ def add_optional(
17
+ a: int | float | T | None, b: int | float | T | None
18
+ ) -> int | float | T | None:
19
+ """Add two optional objects, returning None if both are None.
15
20
 
16
21
  Preserves None to indicate "unknown/not provided" when both inputs are None,
17
- otherwise treats None as 0 for summation.
22
+ otherwise returns the non-None value or their sum.
18
23
  """
19
24
  if a is None and b is None:
20
25
  return None
21
- return (a or 0) + (b or 0)
26
+
27
+ if a is None or b is None:
28
+ return a or b
29
+
30
+ if isinstance(a, (int, float)) and isinstance(b, (int, float)):
31
+ return a + b
32
+
33
+ # NOTE: Ensure that the subtypes are the same so we can use the __add__ method just from one
34
+ if type(a) is type(b):
35
+ add_method = getattr(a, "__add__", None)
36
+ if add_method is not None:
37
+ return add_method(b)
38
+ else:
39
+ raise ValueError(
40
+ f"Cannot add {type(a)} and {type(b)} because they are not the same subclass"
41
+ )
42
+
43
+ return None
22
44
 
23
45
 
24
46
  def get_pretty_input_types(input: Sequence["InputItem"], verbose: bool = False) -> str:
@@ -1,4 +1,144 @@
1
1
  {
2
+ "minimax/MiniMax-M2.1": {
3
+ "company": "MiniMax",
4
+ "label": "MiniMax-M2.1",
5
+ "description": null,
6
+ "release_date": "2025-12-23",
7
+ "open_source": true,
8
+ "documentation_url": "https://platform.minimax.io/docs",
9
+ "properties": {
10
+ "context_window": 204800,
11
+ "max_tokens": 131000,
12
+ "training_cutoff": null,
13
+ "reasoning_model": true
14
+ },
15
+ "supports": {
16
+ "images": false,
17
+ "files": false,
18
+ "temperature": true,
19
+ "tools": true
20
+ },
21
+ "metadata": {
22
+ "deprecated": false,
23
+ "available_for_everyone": true,
24
+ "available_as_evaluator": false,
25
+ "ignored_for_cost": false
26
+ },
27
+ "provider_properties": {},
28
+ "costs_per_million_token": {
29
+ "input": 0.3,
30
+ "output": 1.2,
31
+ "cache": {
32
+ "read": 0.03,
33
+ "write": 0.375,
34
+ "write_markup": 1.0
35
+ }
36
+ },
37
+ "alternative_keys": [],
38
+ "default_parameters": {
39
+ "temperature": 1.0,
40
+ "top_p": 0.95
41
+ },
42
+ "provider_endpoint": "MiniMax-M2.1",
43
+ "provider_name": "minimax",
44
+ "full_key": "minimax/MiniMax-M2.1",
45
+ "slug": "minimax_MiniMax-M2.1"
46
+ },
47
+ "zai/glm-4.7": {
48
+ "company": "zAI",
49
+ "label": "GLM 4.7",
50
+ "description": "Latest model from ZAI",
51
+ "release_date": "2025-12-22",
52
+ "open_source": true,
53
+ "documentation_url": "https://docs.z.ai/",
54
+ "properties": {
55
+ "context_window": 200000,
56
+ "max_tokens": 128000,
57
+ "training_cutoff": null,
58
+ "reasoning_model": true
59
+ },
60
+ "supports": {
61
+ "images": false,
62
+ "files": false,
63
+ "temperature": true,
64
+ "tools": true
65
+ },
66
+ "metadata": {
67
+ "deprecated": false,
68
+ "available_for_everyone": true,
69
+ "available_as_evaluator": false,
70
+ "ignored_for_cost": false
71
+ },
72
+ "provider_properties": {},
73
+ "costs_per_million_token": {
74
+ "input": 0.6,
75
+ "output": 2.2,
76
+ "cache": {
77
+ "read": 0.11,
78
+ "read_discount": 1.0,
79
+ "write_markup": 1.0
80
+ }
81
+ },
82
+ "alternative_keys": [],
83
+ "default_parameters": {
84
+ "temperature": 1.0,
85
+ "top_p": 1.0
86
+ },
87
+ "provider_endpoint": "glm-4.7",
88
+ "provider_name": "zai",
89
+ "full_key": "zai/glm-4.7",
90
+ "slug": "zai_glm-4.7"
91
+ },
92
+ "google/gemini-3-flash-preview": {
93
+ "company": "Google",
94
+ "label": "Gemini 3 Flash (12/25)",
95
+ "description": "Google's newest budget workhorse model",
96
+ "release_date": "2025-12-17",
97
+ "open_source": false,
98
+ "documentation_url": "https://ai.google.dev/gemini-api/docs/models",
99
+ "properties": {
100
+ "context_window": 1048576,
101
+ "max_tokens": 65536,
102
+ "training_cutoff": null,
103
+ "reasoning_model": true
104
+ },
105
+ "supports": {
106
+ "images": true,
107
+ "videos": true,
108
+ "files": true,
109
+ "batch": true,
110
+ "temperature": true,
111
+ "tools": true
112
+ },
113
+ "metadata": {
114
+ "deprecated": false,
115
+ "available_for_everyone": true,
116
+ "available_as_evaluator": false,
117
+ "ignored_for_cost": false
118
+ },
119
+ "provider_properties": {},
120
+ "costs_per_million_token": {
121
+ "input": 0.5,
122
+ "output": 3.0,
123
+ "cache": {
124
+ "read_discount": 0.1,
125
+ "write_markup": 1.0
126
+ },
127
+ "batch": {
128
+ "input_discount": 0.5,
129
+ "output_discount": 0.5
130
+ }
131
+ },
132
+ "alternative_keys": [],
133
+ "default_parameters": {
134
+ "temperature": 1.0,
135
+ "reasoning_effort": "high"
136
+ },
137
+ "provider_endpoint": "gemini-3-flash-preview",
138
+ "provider_name": "google",
139
+ "full_key": "google/gemini-3-flash-preview",
140
+ "slug": "google_gemini-3-flash-preview"
141
+ },
2
142
  "openai/gpt-5.2-pro-2025-12-11": {
3
143
  "company": "OpenAI",
4
144
  "label": "GPT 5.2 Pro",
@@ -454,7 +594,8 @@
454
594
  }
455
595
  ],
456
596
  "default_parameters": {
457
- "temperature": 1.0
597
+ "temperature": 1.0,
598
+ "reasoning_effort": "none"
458
599
  },
459
600
  "provider_endpoint": "deepseek-v3p2",
460
601
  "provider_name": "fireworks",
@@ -15428,7 +15569,7 @@
15428
15569
  "tools": false
15429
15570
  },
15430
15571
  "metadata": {
15431
- "deprecated": false,
15572
+ "deprecated": true,
15432
15573
  "available_for_everyone": true,
15433
15574
  "available_as_evaluator": false,
15434
15575
  "ignored_for_cost": false
@@ -150,6 +150,8 @@ deepseek-models:
150
150
  context_window: 160_000
151
151
  max_tokens: 20_480
152
152
  reasoning_model: false
153
+ default_parameters:
154
+ reasoning_effort: "none"
153
155
  costs_per_million_token:
154
156
  input: 0.56
155
157
  output: 1.68
@@ -54,6 +54,21 @@ gemini-3-models:
54
54
  temperature: 1
55
55
  reasoning_effort: "high"
56
56
 
57
+ google/gemini-3-flash-preview:
58
+ label: Gemini 3 Flash (12/25)
59
+ description: Google's newest budget workhorse model
60
+ release_date: 2025-12-17
61
+ properties:
62
+ context_window: 1048576
63
+ max_tokens: 65536
64
+ reasoning_model: true
65
+ costs_per_million_token:
66
+ input: 0.50
67
+ output: 3.00
68
+ default_parameters:
69
+ temperature: 1
70
+ reasoning_effort: "high"
71
+
57
72
  google/gemini-3-pro-preview:
58
73
  label: Gemini 3 Pro (11/25)
59
74
  description: Gemini 3 Pro, Google's most powerful model.
@@ -16,6 +16,24 @@ base-config:
16
16
 
17
17
  minimax-m2-models:
18
18
 
19
+ minimax/MiniMax-M2.1:
20
+ label: MiniMax-M2.1
21
+ release_date: 2025-12-23
22
+ properties:
23
+ context_window: 204_800
24
+ max_tokens: 131_000
25
+ reasoning_model: true
26
+ training_cutoff: null
27
+ default_parameters:
28
+ temperature: 1.0
29
+ top_p: 0.95
30
+ costs_per_million_token:
31
+ input: 0.30
32
+ output: 1.20
33
+ cache:
34
+ read: 0.03
35
+ write: 0.375
36
+
19
37
  minimax/MiniMax-M2:
20
38
  label: MiniMax-M2
21
39
  description: MiniMax-M2 is a cost-efficient open-source model optimized for agentic applications and coding in particular.
@@ -46,6 +46,8 @@ perplexity-models:
46
46
  label: Sonar Reasoning
47
47
  description: Reasoning-focused search model that exposes intermediate thinking for step-by-step answers.
48
48
  documentation_url: https://docs.perplexity.ai/models/models/sonar-reasoning
49
+ metadata:
50
+ deprecated: true
49
51
  properties:
50
52
  context_window: 128000
51
53
  reasoning_model: true
@@ -18,6 +18,20 @@ base-config:
18
18
  write_markup: 1
19
19
 
20
20
  zai-models:
21
+ zai/glm-4.7:
22
+ label: GLM 4.7
23
+ description: "Latest model from ZAI"
24
+ release_date: 2025-12-22
25
+ properties:
26
+ context_window: 200_000
27
+ max_tokens: 128_000
28
+ costs_per_million_token:
29
+ input: 0.6
30
+ output: 2.2
31
+ cache:
32
+ read: 0.11
33
+ default_parameters:
34
+ temperature: 1
21
35
  zai/glm-4.5:
22
36
  label: GLM 4.5
23
37
  description: "z.AI old model"
@@ -183,8 +183,8 @@ RETRIABLE_EXCEPTION_CODES = [
183
183
  "server_error",
184
184
  "overloaded",
185
185
  "throttling", # AWS throttling errors
186
- "throttlingexception", # AWS throttling errors
187
186
  "internal server error",
187
+ "InternalServerError",
188
188
  ]
189
189
 
190
190
 
@@ -1,4 +1,5 @@
1
1
  import io
2
+ import logging
2
3
  from typing import Any, Literal, Sequence
3
4
 
4
5
  from ai21 import AsyncAI21Client
@@ -137,6 +138,7 @@ class AI21LabsModel(LLM):
137
138
  input: Sequence[InputItem],
138
139
  *,
139
140
  tools: list[ToolDefinition],
141
+ query_logger: logging.Logger,
140
142
  **kwargs: object,
141
143
  ) -> QueryResult:
142
144
  messages: list[ChatMessage] = []
@@ -3,6 +3,7 @@ import asyncio
3
3
  import base64
4
4
  import io
5
5
  import json
6
+ import logging
6
7
  from typing import Any, Literal, Sequence, cast
7
8
 
8
9
  import boto3
@@ -337,6 +338,7 @@ class AmazonModel(LLM):
337
338
  input: Sequence[InputItem],
338
339
  *,
339
340
  tools: list[ToolDefinition],
341
+ query_logger: logging.Logger,
340
342
  **kwargs: object,
341
343
  ) -> QueryResult:
342
344
  body = await self.build_body(input, tools=tools, **kwargs)
@@ -1,4 +1,5 @@
1
1
  import io
2
+ import logging
2
3
  from typing import Any, Literal, Sequence, cast
3
4
 
4
5
  from anthropic import AsyncAnthropic
@@ -249,6 +250,8 @@ class AnthropicModel(LLM):
249
250
 
250
251
  @override
251
252
  def get_client(self) -> AsyncAnthropic:
253
+ if self._delegate_client:
254
+ return self._delegate_client
252
255
  if not AnthropicModel._client:
253
256
  headers: dict[str, str] = {}
254
257
  AnthropicModel._client = AsyncAnthropic(
@@ -262,16 +265,20 @@ class AnthropicModel(LLM):
262
265
  def __init__(
263
266
  self,
264
267
  model_name: str,
265
- provider: Literal["anthropic"] = "anthropic",
268
+ provider: str = "anthropic",
266
269
  *,
267
270
  config: LLMConfig | None = None,
271
+ custom_client: AsyncAnthropic | None = None,
268
272
  ):
269
273
  super().__init__(model_name, provider, config=config)
270
274
 
275
+ # allow custom client to act as delegate (native)
276
+ self._delegate_client: AsyncAnthropic | None = custom_client
277
+
271
278
  # https://docs.anthropic.com/en/api/openai-sdk
272
- self.delegate: OpenAIModel | None = (
279
+ self.delegate = (
273
280
  None
274
- if self.native
281
+ if self.native or custom_client
275
282
  else OpenAIModel(
276
283
  model_name=self.model_name,
277
284
  provider=provider,
@@ -285,7 +292,10 @@ class AnthropicModel(LLM):
285
292
  )
286
293
 
287
294
  # Initialize batch support if enabled
288
- self.supports_batch: bool = self.supports_batch and self.native
295
+ # Disable batch when using custom_client (similar to OpenAI)
296
+ self.supports_batch: bool = (
297
+ self.supports_batch and self.native and not custom_client
298
+ )
289
299
  self.batch: LLMBatchMixin | None = (
290
300
  AnthropicBatchMixin(self) if self.supports_batch else None
291
301
  )
@@ -555,20 +565,36 @@ class AnthropicModel(LLM):
555
565
  input: Sequence[InputItem],
556
566
  *,
557
567
  tools: list[ToolDefinition],
568
+ query_logger: logging.Logger,
558
569
  **kwargs: object,
559
570
  ) -> QueryResult:
560
571
  if self.delegate:
561
- return await self.delegate_query(input, tools=tools, **kwargs)
572
+ return await self.delegate_query(
573
+ input, tools=tools, query_logger=query_logger, **kwargs
574
+ )
562
575
 
563
576
  body = await self.create_body(input, tools=tools, **kwargs)
564
577
 
565
- betas = ["files-api-2025-04-14", "interleaved-thinking-2025-05-14"]
566
- if "sonnet-4-5" in self.model_name:
567
- betas.append("context-1m-2025-08-07")
578
+ client = self.get_client()
568
579
 
569
- async with self.get_client().beta.messages.stream(
570
- **body,
571
- betas=betas,
580
+ # only send betas for the official Anthropic endpoint
581
+ is_anthropic_endpoint = self._delegate_client is None
582
+ if not is_anthropic_endpoint:
583
+ client_base_url = getattr(client, "_base_url", None) or getattr(
584
+ client, "base_url", None
585
+ )
586
+ if client_base_url:
587
+ is_anthropic_endpoint = "api.anthropic.com" in str(client_base_url)
588
+
589
+ stream_kwargs = {**body}
590
+ if is_anthropic_endpoint:
591
+ betas = ["files-api-2025-04-14", "interleaved-thinking-2025-05-14"]
592
+ if "sonnet-4-5" in self.model_name:
593
+ betas.append("context-1m-2025-08-07")
594
+ stream_kwargs["betas"] = betas
595
+
596
+ async with client.beta.messages.stream(
597
+ **stream_kwargs,
572
598
  ) as stream: # pyright: ignore[reportAny]
573
599
  message = await stream.get_final_message()
574
600
  self.logger.info(f"Anthropic Response finished: {message.id}")
@@ -1,5 +1,6 @@
1
1
  import base64
2
2
  import io
3
+ import logging
3
4
  from typing import Any, Literal, Sequence, cast
4
5
 
5
6
  from google.genai import Client
@@ -54,6 +55,11 @@ from model_library.exceptions import (
54
55
  from model_library.providers.google.batch import GoogleBatchMixin
55
56
  from model_library.register_models import register_provider
56
57
  from model_library.utils import normalize_tool_result
58
+ import uuid
59
+
60
+
61
+ def generate_tool_call_id(tool_name: str) -> str:
62
+ return str(tool_name + "_" + str(uuid.uuid4()))
57
63
 
58
64
 
59
65
  class GoogleConfig(ProviderConfig):
@@ -328,6 +334,7 @@ class GoogleModel(LLM):
328
334
  input: Sequence[InputItem],
329
335
  *,
330
336
  tools: list[ToolDefinition],
337
+ query_logger: logging.Logger,
331
338
  **kwargs: object,
332
339
  ) -> QueryResult:
333
340
  body: dict[str, Any] = await self.create_body(input, tools=tools, **kwargs)
@@ -357,9 +364,10 @@ class GoogleModel(LLM):
357
364
 
358
365
  call_args = part.function_call.args or {}
359
366
  tool_calls.append(
360
- # weirdly, id is not required
367
+ # Weirdly, id is not required. If not provided, we generate one.
361
368
  ToolCall(
362
- id=part.function_call.id or "",
369
+ id=part.function_call.id
370
+ or generate_tool_call_id(part.function_call.name),
363
371
  name=part.function_call.name,
364
372
  args=call_args,
365
373
  )
@@ -1,13 +1,12 @@
1
1
  from typing import Literal
2
2
 
3
3
  from model_library import model_library_settings
4
- from model_library.base import (
5
- DelegateOnly,
6
- LLMConfig,
7
- )
8
- from model_library.providers.openai import OpenAIModel
4
+ from model_library.base import DelegateOnly, LLMConfig
5
+ from model_library.providers.anthropic import AnthropicModel
9
6
  from model_library.register_models import register_provider
10
- from model_library.utils import create_openai_client_with_defaults
7
+ from model_library.utils import default_httpx_client
8
+
9
+ from anthropic import AsyncAnthropic
11
10
 
12
11
 
13
12
  @register_provider("minimax")
@@ -21,13 +20,14 @@ class MinimaxModel(DelegateOnly):
21
20
  ):
22
21
  super().__init__(model_name, provider, config=config)
23
22
 
24
- self.delegate = OpenAIModel(
23
+ self.delegate = AnthropicModel(
25
24
  model_name=self.model_name,
26
25
  provider=self.provider,
27
26
  config=config,
28
- custom_client=create_openai_client_with_defaults(
27
+ custom_client=AsyncAnthropic(
29
28
  api_key=model_library_settings.MINIMAX_API_KEY,
30
- base_url="https://api.minimax.io/v1",
29
+ base_url="https://api.minimax.io/anthropic",
30
+ http_client=default_httpx_client(),
31
+ max_retries=1,
31
32
  ),
32
- use_completions=True,
33
33
  )
@@ -1,4 +1,5 @@
1
1
  import io
2
+ import logging
2
3
  import time
3
4
  from collections.abc import Sequence
4
5
  from typing import Any, Literal
@@ -171,6 +172,7 @@ class MistralModel(LLM):
171
172
  input: Sequence[InputItem],
172
173
  *,
173
174
  tools: list[ToolDefinition],
175
+ query_logger: logging.Logger,
174
176
  **kwargs: object,
175
177
  ) -> QueryResult:
176
178
  # mistral supports max 8 images, merge extra images into the 8th image
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import io
4
4
  import json
5
+ import logging
5
6
  from typing import Any, Literal, Sequence, cast
6
7
 
7
8
  from openai import APIConnectionError, AsyncOpenAI
@@ -505,8 +506,11 @@ class OpenAIModel(LLM):
505
506
  if self.reasoning:
506
507
  del body["max_tokens"]
507
508
  body["max_completion_tokens"] = self.max_tokens
508
- if self.reasoning_effort:
509
- body["reasoning_effort"] = self.reasoning_effort
509
+
510
+ # some model endpoints (like `fireworks/deepseek-v3p2`)
511
+ # require explicitly setting reasoning effort to disable thinking
512
+ if self.reasoning_effort is not None:
513
+ body["reasoning_effort"] = self.reasoning_effort
510
514
 
511
515
  if self.supports_temperature:
512
516
  if self.temperature is not None:
@@ -701,8 +705,8 @@ class OpenAIModel(LLM):
701
705
 
702
706
  if self.reasoning:
703
707
  body["reasoning"] = {"summary": "auto"}
704
- if self.reasoning_effort:
705
- body["reasoning"]["effort"] = self.reasoning_effort
708
+ if self.reasoning_effort is not None:
709
+ body["reasoning"]["effort"] = self.reasoning_effort # type: ignore[reportArgumentType]
706
710
 
707
711
  if self.supports_temperature:
708
712
  if self.temperature is not None:
@@ -722,6 +726,7 @@ class OpenAIModel(LLM):
722
726
  input: Sequence[InputItem],
723
727
  *,
724
728
  tools: list[ToolDefinition],
729
+ query_logger: logging.Logger,
725
730
  **kwargs: object,
726
731
  ) -> QueryResult:
727
732
  if self.use_completions:
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import io
4
4
  import json
5
+ import logging
5
6
  import random
6
7
  import re
7
8
  import time
@@ -271,6 +272,7 @@ class DummyAIModel(LLM):
271
272
  input: Sequence[InputItem],
272
273
  *,
273
274
  tools: list[ToolDefinition],
275
+ query_logger: logging.Logger,
274
276
  **kwargs: object,
275
277
  ) -> QueryResult:
276
278
  body = await self.create_body(input, tools=tools, **kwargs)
@@ -1,5 +1,6 @@
1
1
  import asyncio
2
2
  import io
3
+ import logging
3
4
  from typing import Any, Literal, Sequence, cast
4
5
 
5
6
  import grpc
@@ -78,7 +79,7 @@ class XAIModel(LLM):
78
79
  super().__init__(model_name, provider, config=config)
79
80
 
80
81
  # https://docs.x.ai/docs/guides/migration
81
- self.delegate: OpenAIModel | None = (
82
+ self.delegate = (
82
83
  None
83
84
  if self.native
84
85
  else OpenAIModel(
@@ -225,13 +226,16 @@ class XAIModel(LLM):
225
226
  input: Sequence[InputItem],
226
227
  *,
227
228
  tools: list[ToolDefinition],
229
+ query_logger: logging.Logger,
228
230
  **kwargs: object,
229
231
  ) -> QueryResult:
230
232
  if self.reasoning_effort:
231
233
  kwargs["reasoning_effort"] = self.reasoning_effort
232
234
 
233
235
  if self.delegate:
234
- return await self.delegate_query(input, tools=tools, **kwargs)
236
+ return await self.delegate_query(
237
+ input, tools=tools, query_logger=query_logger, **kwargs
238
+ )
235
239
 
236
240
  messages: Sequence[Message] = []
237
241
  if "system_prompt" in kwargs:
@@ -169,7 +169,7 @@ class DefaultParameters(BaseModel):
169
169
  temperature: float | None = None
170
170
  top_p: float | None = None
171
171
  top_k: int | None = None
172
- reasoning_effort: str | None = None
172
+ reasoning_effort: str | bool | None = None
173
173
 
174
174
 
175
175
  class RawModelConfig(BaseModel):
@@ -1,9 +1,11 @@
1
1
  from functools import cache
2
2
  from pathlib import Path
3
+ from typing import TypedDict
3
4
 
4
5
  import tiktoken
5
6
 
6
7
  from model_library.base import LLM, LLMConfig, ProviderConfig
8
+ from model_library.base.output import QueryResultCost, QueryResultMetadata
7
9
  from model_library.register_models import (
8
10
  CostProperties,
9
11
  ModelConfig,
@@ -129,6 +131,64 @@ def get_model_cost(model_str: str) -> CostProperties | None:
129
131
  return model_config.costs_per_million_token
130
132
 
131
133
 
134
+ class TokenDict(TypedDict, total=False):
135
+ """Token counts for cost calculation."""
136
+
137
+ in_tokens: int
138
+ out_tokens: int
139
+ reasoning_tokens: int | None
140
+ cache_read_tokens: int | None
141
+ cache_write_tokens: int | None
142
+
143
+
144
+ async def recompute_cost(
145
+ model_str: str,
146
+ tokens: TokenDict,
147
+ ) -> QueryResultCost:
148
+ """
149
+ Recompute the cost for a model based on token information.
150
+
151
+ Uses the model provider's existing _calculate_cost method to ensure
152
+ provider-specific cost calculations are applied.
153
+
154
+ Args:
155
+ model_str: The model identifier (e.g., "openai/gpt-4o")
156
+ tokens: Dictionary containing token counts with keys:
157
+ - in_tokens (required): Number of input tokens
158
+ - out_tokens (required): Number of output tokens
159
+ - reasoning_tokens (optional): Number of reasoning tokens
160
+ - cache_read_tokens (optional): Number of cache read tokens
161
+ - cache_write_tokens (optional): Number of cache write tokens
162
+
163
+ Returns:
164
+ QueryResultCost with computed costs
165
+
166
+ Raises:
167
+ ValueError: If required token parameters are missing
168
+ Exception: If model not found in registry or costs not configured
169
+ """
170
+ if "in_tokens" not in tokens:
171
+ raise ValueError("Token dict must contain 'in_tokens'")
172
+ if "out_tokens" not in tokens:
173
+ raise ValueError("Token dict must contain 'out_tokens'")
174
+
175
+ model = get_registry_model(model_str)
176
+
177
+ metadata = QueryResultMetadata(
178
+ in_tokens=tokens["in_tokens"],
179
+ out_tokens=tokens["out_tokens"],
180
+ reasoning_tokens=tokens.get("reasoning_tokens"),
181
+ cache_read_tokens=tokens.get("cache_read_tokens"),
182
+ cache_write_tokens=tokens.get("cache_write_tokens"),
183
+ )
184
+
185
+ cost = await model._calculate_cost(metadata) # type: ignore[arg-type]
186
+ if cost is None:
187
+ raise Exception(f"No cost information available for model {model_str}")
188
+
189
+ return cost
190
+
191
+
132
192
  @cache
133
193
  def get_provider_names() -> list[str]:
134
194
  """Return all provider names in the registry"""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: model-library
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: Model Library for vals.ai
5
5
  Author-email: "Vals AI, Inc." <contact@vals.ai>
6
6
  License: MIT
@@ -1,64 +1,64 @@
1
1
  model_library/__init__.py,sha256=AKc_15aklOf-LbcS9z1Xer_moRWNpG6Dh3kqvSQ0nOI,714
2
- model_library/exceptions.py,sha256=I9wquqj5hE640OfwVjUFtQUuu_potWAejLcOQCpDxIg,8705
2
+ model_library/exceptions.py,sha256=ZHMr6lloXZz4V4Wy1UP8zc1CdUHx6-IS9_rOi6oN45s,8680
3
3
  model_library/file_utils.py,sha256=FAZRRtDT8c4Rjfoj64Te3knEHggXAAfRRuS8WLCsSe8,3682
4
4
  model_library/logging.py,sha256=McyaPHUk7RkB38-LrfnudrrU1B62ta8wAbbIBwLRmj0,853
5
5
  model_library/model_utils.py,sha256=l8oCltGeimMGtnne_3Q1EguVtzCj61UMsLsma-1czwg,753
6
6
  model_library/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- model_library/register_models.py,sha256=CY3Wd16AcWf7tYu_O2I2_kg_hdvQJFcvyQQA2OUu2SA,13646
8
- model_library/registry_utils.py,sha256=-ut95Aup5RYrZdv5Aih3bbYhe2vw9V0l4EFyH_1ONsQ,6797
7
+ model_library/register_models.py,sha256=3FeFrcS2qRpAhj9ahXNuZ6jcH5UEks3I_PaT6rPvKgs,13653
8
+ model_library/registry_utils.py,sha256=BVauHcP02Et2maLxowNBbdpGd32cnLz1_zSjDLVJjp0,8843
9
9
  model_library/settings.py,sha256=QyeUqzWBpexFi014L_mZkoXP49no3SAQNJRObATXrL8,873
10
10
  model_library/utils.py,sha256=T91ACGTc-KtksVyMFspt-vJtR5I-xcO3nVfH6SltmMU,3988
11
11
  model_library/base/__init__.py,sha256=TtxCXGUtkEqWZNMMofLPuC4orN7Ja2hemtbtHitt_UA,266
12
- model_library/base/base.py,sha256=HXxImh2H-GIIiVGNqV7gRPi0HH1KJxB_4ckuKyEqAYo,14139
12
+ model_library/base/base.py,sha256=mvubt5VC1eM8cuLw_RHP04hTgNEcULzIBiJcHqKF--c,14289
13
13
  model_library/base/batch.py,sha256=-jd6L0ECc5pkj73zoX2ZYcv_9iQdqxEi1kEilwaXWSA,2895
14
- model_library/base/delegate_only.py,sha256=V2MzENtvBg0pySKncgE-mfCLBhhRZ0y4BntQwQsxbqU,2111
14
+ model_library/base/delegate_only.py,sha256=YJUvP9k8x2kBsI-0ACNmx1Jx77zdZSBWCMSpx0LAyXE,2213
15
15
  model_library/base/input.py,sha256=Nhg8Ril1kFau1DnE8u102JC1l-vxNd-v9e3SjovR-Do,1876
16
- model_library/base/output.py,sha256=9pQZSOskkLDd_MAuDbYSimrbEcBL6x_3z6nLrPUnCOw,6701
17
- model_library/base/utils.py,sha256=KJZRVWr38Tik3yNJvTXnBy62ccilzzmSxHZFpQBJMPo,1330
16
+ model_library/base/output.py,sha256=Ak6CJRYqtjYILsSWkfE70fSK3yvP7v_n5NYfysMaIL4,7464
17
+ model_library/base/utils.py,sha256=YGQLPyQgCbfHNBxyTxCvpZNZ-ctEji258IdfMiXUJXs,1962
18
18
  model_library/config/README.md,sha256=i8_wHnlI6uHIqWN9fYBkDCglZM2p5ZMVD3SLlxiwUVk,4274
19
19
  model_library/config/ai21labs_models.yaml,sha256=ZWHhk1cep2GQIYHqkTS_0152mF3oZg2tSzMPmvfMRSI,2478
20
20
  model_library/config/alibaba_models.yaml,sha256=-RLWOwh3ZaCQqjaZ-4Zw0BJNVE6JVHJ8Ggm9gQJZ6QI,2082
21
- model_library/config/all_models.json,sha256=HuTWNX-noeGfLNoWuzLVjhjXqkFGJX0CgBMt01Ejy3A,529312
21
+ model_library/config/all_models.json,sha256=U-XQrbaWWhjmkawg0Bd9NTxoDN-DT0WPhmDLF6OALR4,533621
22
22
  model_library/config/amazon_models.yaml,sha256=HgLmhpfedHCQtkPEviEJCBbAb-dNQPOnVtf4UnwrDds,7654
23
23
  model_library/config/anthropic_models.yaml,sha256=bTc_3Oqn4wCdq-dcWcEfmXrPVZjcR8-V6pTao7sGa_E,10475
24
24
  model_library/config/cohere_models.yaml,sha256=ZfWrS1K45Hxd5nT_gpP5YGAovJcBIlLNIdaRyE3V-7o,5022
25
25
  model_library/config/deepseek_models.yaml,sha256=4CCrf-4UPBgFCrS6CQa3vzNiaYlD4B6dFJFK_kIYBWY,1156
26
26
  model_library/config/dummy_model.yaml,sha256=lImYJBtBVJk_jgnLbkuSyOshQphVlYCMkw-UiJIBYhY,877
27
- model_library/config/fireworks_models.yaml,sha256=BMyQqjEpayNfSVGekzOFNIx7Ng3QOfPtldw5k2msqX0,6269
28
- model_library/config/google_models.yaml,sha256=n6yPRSVLyKGoJQW7L3UiVmb182zKiYhVLbmiUQDwXiY,16101
27
+ model_library/config/fireworks_models.yaml,sha256=bAlXvjkdt-CnRp66WbfDv2qTrF5UHceRd2pvrsBERMk,6324
28
+ model_library/config/google_models.yaml,sha256=Rg127nsBbHpk62X7WBq2ckdHo0bwYM0NVjF7T2h_1c0,16494
29
29
  model_library/config/inception_models.yaml,sha256=YCqfQlkH_pTdHIKee5dP_aRFXw_fTIEQCpUvX2bwO0M,560
30
30
  model_library/config/kimi_models.yaml,sha256=AAqse_BCE-lrHkJHIWJVqMtttnZQCa-5Qy5qiLUJjYs,755
31
- model_library/config/minimax_models.yaml,sha256=IttkpdBrp75J9WZQ0IRE4m4eSfd0LonfcA9OtrzJrMY,873
31
+ model_library/config/minimax_models.yaml,sha256=gWTuTcl1-zyCF6KRuU6DSre2Cw5gXC-TeKV2Qp4StnQ,1263
32
32
  model_library/config/mistral_models.yaml,sha256=mYKYSzJl24lUiA_erSkom7nCBxAoeJ57Mi3090q1ArM,5162
33
33
  model_library/config/openai_models.yaml,sha256=1lKsTQwsxMMJqXtEoYs3liy6NcaK4p8NN7b-GSFnl8k,25261
34
- model_library/config/perplexity_models.yaml,sha256=XEvs3fXrsSYjYNHLJuGSlTW7biHMaXpZaW4Q-aVn6wU,2299
34
+ model_library/config/perplexity_models.yaml,sha256=WUDqhLvnB0kQhCCwPv19FYLHozet3m33Spdo6bGff3Q,2336
35
35
  model_library/config/together_models.yaml,sha256=BeqRJonYzPvWwoLfkyH0RMRKBYUrCSEQhg_25Nvx97M,23867
36
36
  model_library/config/xai_models.yaml,sha256=2KRNNQy3kV-4xxSfhj7Uhp9TZF-S5qPlM8Ef-04zv8Y,7985
37
- model_library/config/zai_models.yaml,sha256=Esa4P-zc5K1pejQTylKPe-uiH9AnvB_Zn7RB_sAZ5mU,1577
37
+ model_library/config/zai_models.yaml,sha256=lcYMh2FCrLWkKqdCnarRlwDoL3SbutRBNAiMPBUYQiw,1894
38
38
  model_library/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
- model_library/providers/ai21labs.py,sha256=7PnXKl-Fv8KlE95eBv2izbFg1u7utDRQPdWXYVl_-as,5832
39
+ model_library/providers/ai21labs.py,sha256=sqmu9R7owZZQLxOkNV9dhSeZVAlTMDCNFVdxJyJo6UA,5885
40
40
  model_library/providers/alibaba.py,sha256=k6LZErV_l9oTFTdKTwyw1SXD509Rl3AqFbN8umCryEE,2941
41
- model_library/providers/amazon.py,sha256=jRqOYCnxiONlbjT2C0UuFIrFOMU4d-hvLElPp41n5Ds,14015
42
- model_library/providers/anthropic.py,sha256=6YI04jdDDtDjLS17jThVYlNvLbqd9THrKAtaVTYL6eg,22194
41
+ model_library/providers/amazon.py,sha256=U0tH5mw8dAMDg06BtnVoR-RxYlcAL1-7ZR737sR8xgU,14068
42
+ model_library/providers/anthropic.py,sha256=G94hFkRFTWutq9XYd3321KkPrxVHnR6fs_h2AdkVqx4,23197
43
43
  model_library/providers/azure.py,sha256=brQNCED-zHvYjL5K5hdjFBNso6hJZg0HTHNnAgJPPG0,1408
44
44
  model_library/providers/cohere.py,sha256=lCBm1PP1l_UOa1pKFMIZM3C0wCv3QWB6UP0-jvjkFa4,1066
45
45
  model_library/providers/deepseek.py,sha256=7T4lxDiV5wmWUK7TAKwr332_T6uyXNCOiirZOCCETL0,1159
46
46
  model_library/providers/fireworks.py,sha256=w-5mOF5oNzqx_0ijCoTm1lSn2ZHwhp6fURKhV3LEqIc,2309
47
47
  model_library/providers/inception.py,sha256=Nrky53iujIM9spAWoNRtoJg2inFiL0li6E75vT3b6V8,1107
48
48
  model_library/providers/kimi.py,sha256=zzvcKpZLsM1xPebpLeMxNKTt_FRiLN1rFWrIly7wfXA,1092
49
- model_library/providers/minimax.py,sha256=HkM601mxTC0tpDGtxLTGq5IwnCfFfHG4EF6l1Bg77K4,1001
50
- model_library/providers/mistral.py,sha256=9zGYLpkn436ahZ716-5R5AQzn7htwVres1IjP5x5bFw,9745
51
- model_library/providers/openai.py,sha256=1PNmS-0ERjqLzWS9Prr1_cUpctyEj_xp15XOpl9-IGE,33421
49
+ model_library/providers/minimax.py,sha256=YRtJW2wgiu6KXEBScYETeVMNTfhPvpjL2J-oo0wE_BI,1057
50
+ model_library/providers/mistral.py,sha256=r0PY30kHY-guaSzIEahdp2I45meJzo71Ql97NfkPv-8,9798
51
+ model_library/providers/openai.py,sha256=MMm6K4iewhSpPzEeRhrPRYf_txrpklCrefNHiUly8S8,33665
52
52
  model_library/providers/perplexity.py,sha256=eIzzkaZ4ZMlRKFVI9bnwyo91iJkh7aEmJ-0_4OKeAWc,1083
53
53
  model_library/providers/together.py,sha256=7Y4QLnX8c_fyXUud-W_C1gidmROQainTgODBwbvFyXQ,2033
54
- model_library/providers/vals.py,sha256=VLF1rsCR13a_kmtZfboDzJJ64Io_tBFe60vf-0BdYPc,9830
55
- model_library/providers/xai.py,sha256=oJiMICYLkybHpLv77PmMbi1Xj9IUZmKX3kANksjjFEQ,10828
54
+ model_library/providers/vals.py,sha256=mKaItg_g9RJeaIDhoBu7ksTe42P0MRYFI4X1cla8YC0,9883
55
+ model_library/providers/xai.py,sha256=toSqWBHUaHE000aMdOayAW3-_ZmDUotWEpZ4-X33LuY,10918
56
56
  model_library/providers/zai.py,sha256=O_GM6KlJ0fM2wYoxO9xrCWfnpYH7IpoKEzjiD4jB8Kc,1050
57
57
  model_library/providers/google/__init__.py,sha256=ypuLVL_QJEQ7C3S47FhC9y4wyawYOdGikAViJmACI0U,115
58
58
  model_library/providers/google/batch.py,sha256=4TE90Uo1adi54dVtGcGyUAxw11YExJq-Y4KmkQ-cyHA,9978
59
- model_library/providers/google/google.py,sha256=s9vky9r5SVNhBvMXcIr0_h0MlKLXwx_tQlZzs57xXYo,16507
60
- model_library-0.1.4.dist-info/licenses/LICENSE,sha256=x6mf4o7U_wHaaqcfxoU-0R6uYJLbqL_TNuoULP3asaA,1070
61
- model_library-0.1.4.dist-info/METADATA,sha256=4XPEbWSeOBYYoQ3ZYsdktZSnrDz2YbZixPIW7wTqJfw,6989
62
- model_library-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
63
- model_library-0.1.4.dist-info/top_level.txt,sha256=HtQYxA_7RP8UT35I6VcUw20L6edI0Zf2t5Ys1uDGVjs,14
64
- model_library-0.1.4.dist-info/RECORD,,
59
+ model_library/providers/google/google.py,sha256=xmiktN-Z9W1fC1jHUT_m6x5fTpI6-mWpKvbMGg9kgXE,16787
60
+ model_library-0.1.6.dist-info/licenses/LICENSE,sha256=x6mf4o7U_wHaaqcfxoU-0R6uYJLbqL_TNuoULP3asaA,1070
61
+ model_library-0.1.6.dist-info/METADATA,sha256=sNWBOgDqydFI184UERputqhulBz0olrbye-fO7owrCE,6989
62
+ model_library-0.1.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
63
+ model_library-0.1.6.dist-info/top_level.txt,sha256=HtQYxA_7RP8UT35I6VcUw20L6edI0Zf2t5Ys1uDGVjs,14
64
+ model_library-0.1.6.dist-info/RECORD,,