model-library 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_library/base/base.py +16 -12
- model_library/base/delegate_only.py +5 -1
- model_library/config/all_models.json +92 -1
- model_library/config/fireworks_models.yaml +2 -0
- model_library/config/minimax_models.yaml +18 -0
- model_library/config/zai_models.yaml +14 -0
- model_library/providers/ai21labs.py +2 -0
- model_library/providers/amazon.py +2 -0
- model_library/providers/anthropic.py +37 -11
- model_library/providers/google/google.py +10 -2
- model_library/providers/minimax.py +10 -10
- model_library/providers/mistral.py +2 -0
- model_library/providers/openai.py +9 -4
- model_library/providers/vals.py +2 -0
- model_library/providers/xai.py +6 -2
- model_library/register_models.py +1 -1
- {model_library-0.1.5.dist-info → model_library-0.1.6.dist-info}/METADATA +1 -1
- {model_library-0.1.5.dist-info → model_library-0.1.6.dist-info}/RECORD +21 -21
- {model_library-0.1.5.dist-info → model_library-0.1.6.dist-info}/WHEEL +0 -0
- {model_library-0.1.5.dist-info → model_library-0.1.6.dist-info}/licenses/LICENSE +0 -0
- {model_library-0.1.5.dist-info → model_library-0.1.6.dist-info}/top_level.txt +0 -0
model_library/base/base.py
CHANGED
|
@@ -6,7 +6,6 @@ from abc import ABC, abstractmethod
|
|
|
6
6
|
from collections.abc import Awaitable
|
|
7
7
|
from pprint import pformat
|
|
8
8
|
from typing import (
|
|
9
|
-
TYPE_CHECKING,
|
|
10
9
|
Any,
|
|
11
10
|
Callable,
|
|
12
11
|
Literal,
|
|
@@ -43,9 +42,6 @@ from model_library.exceptions import (
|
|
|
43
42
|
)
|
|
44
43
|
from model_library.utils import truncate_str
|
|
45
44
|
|
|
46
|
-
if TYPE_CHECKING:
|
|
47
|
-
from model_library.providers.openai import OpenAIModel
|
|
48
|
-
|
|
49
45
|
PydanticT = TypeVar("PydanticT", bound=BaseModel)
|
|
50
46
|
|
|
51
47
|
|
|
@@ -66,7 +62,7 @@ class LLMConfig(BaseModel):
|
|
|
66
62
|
top_p: float | None = None
|
|
67
63
|
top_k: int | None = None
|
|
68
64
|
reasoning: bool = False
|
|
69
|
-
reasoning_effort: str | None = None
|
|
65
|
+
reasoning_effort: str | bool | None = None
|
|
70
66
|
supports_images: bool = False
|
|
71
67
|
supports_files: bool = False
|
|
72
68
|
supports_videos: bool = False
|
|
@@ -110,7 +106,7 @@ class LLM(ABC):
|
|
|
110
106
|
self.top_k: int | None = config.top_k
|
|
111
107
|
|
|
112
108
|
self.reasoning: bool = config.reasoning
|
|
113
|
-
self.reasoning_effort: str | None = config.reasoning_effort
|
|
109
|
+
self.reasoning_effort: str | bool | None = config.reasoning_effort
|
|
114
110
|
|
|
115
111
|
self.supports_files: bool = config.supports_files
|
|
116
112
|
self.supports_videos: bool = config.supports_videos
|
|
@@ -120,7 +116,7 @@ class LLM(ABC):
|
|
|
120
116
|
self.supports_tools: bool = config.supports_tools
|
|
121
117
|
|
|
122
118
|
self.native: bool = config.native
|
|
123
|
-
self.delegate: "
|
|
119
|
+
self.delegate: "LLM | None" = None
|
|
124
120
|
self.batch: LLMBatchMixin | None = None
|
|
125
121
|
|
|
126
122
|
if config.provider_config:
|
|
@@ -198,11 +194,14 @@ class LLM(ABC):
|
|
|
198
194
|
input: Sequence[InputItem],
|
|
199
195
|
*,
|
|
200
196
|
tools: list[ToolDefinition] = [],
|
|
197
|
+
query_logger: logging.Logger,
|
|
201
198
|
**kwargs: object,
|
|
202
199
|
) -> QueryResult:
|
|
203
200
|
if not self.delegate:
|
|
204
201
|
raise Exception("Delegate not set")
|
|
205
|
-
return await self.delegate._query_impl(
|
|
202
|
+
return await self.delegate._query_impl( # pyright: ignore[reportPrivateUsage]
|
|
203
|
+
input, tools=tools, query_logger=query_logger, **kwargs
|
|
204
|
+
)
|
|
206
205
|
|
|
207
206
|
async def query(
|
|
208
207
|
self,
|
|
@@ -213,6 +212,7 @@ class LLM(ABC):
|
|
|
213
212
|
# for backwards compatibility
|
|
214
213
|
files: list[FileInput] = [],
|
|
215
214
|
images: list[FileInput] = [],
|
|
215
|
+
query_logger: logging.Logger | None = None,
|
|
216
216
|
**kwargs: object,
|
|
217
217
|
) -> QueryResult:
|
|
218
218
|
"""
|
|
@@ -256,15 +256,18 @@ class LLM(ABC):
|
|
|
256
256
|
input = [*history, *input]
|
|
257
257
|
|
|
258
258
|
# unique logger for the query
|
|
259
|
-
|
|
260
|
-
|
|
259
|
+
if not query_logger:
|
|
260
|
+
query_id = uuid.uuid4().hex[:14]
|
|
261
|
+
query_logger = self.logger.getChild(f"query={query_id}")
|
|
261
262
|
|
|
262
263
|
query_logger.info(
|
|
263
264
|
"Query started:\n" + item_info + tool_info + f"--- kwargs: {short_kwargs}\n"
|
|
264
265
|
)
|
|
265
266
|
|
|
266
267
|
async def query_func() -> QueryResult:
|
|
267
|
-
return await self._query_impl(
|
|
268
|
+
return await self._query_impl(
|
|
269
|
+
input, tools=tools, query_logger=query_logger, **kwargs
|
|
270
|
+
)
|
|
268
271
|
|
|
269
272
|
async def timed_query() -> tuple[QueryResult, float]:
|
|
270
273
|
return await LLM.timer_wrapper(query_func)
|
|
@@ -361,7 +364,8 @@ class LLM(ABC):
|
|
|
361
364
|
input: Sequence[InputItem],
|
|
362
365
|
*,
|
|
363
366
|
tools: list[ToolDefinition],
|
|
364
|
-
|
|
367
|
+
query_logger: logging.Logger,
|
|
368
|
+
**kwargs: object,
|
|
365
369
|
) -> QueryResult:
|
|
366
370
|
"""
|
|
367
371
|
Query the model with input
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import io
|
|
2
|
+
import logging
|
|
2
3
|
from typing import Any, Literal, Sequence
|
|
3
4
|
|
|
4
5
|
from typing_extensions import override
|
|
@@ -48,11 +49,14 @@ class DelegateOnly(LLM):
|
|
|
48
49
|
input: Sequence[InputItem],
|
|
49
50
|
*,
|
|
50
51
|
tools: list[ToolDefinition],
|
|
52
|
+
query_logger: logging.Logger,
|
|
51
53
|
**kwargs: object,
|
|
52
54
|
) -> QueryResult:
|
|
53
55
|
assert self.delegate
|
|
54
56
|
|
|
55
|
-
return await self.delegate_query(
|
|
57
|
+
return await self.delegate_query(
|
|
58
|
+
input, tools=tools, query_logger=query_logger, **kwargs
|
|
59
|
+
)
|
|
56
60
|
|
|
57
61
|
@override
|
|
58
62
|
async def parse_input(
|
|
@@ -1,4 +1,94 @@
|
|
|
1
1
|
{
|
|
2
|
+
"minimax/MiniMax-M2.1": {
|
|
3
|
+
"company": "MiniMax",
|
|
4
|
+
"label": "MiniMax-M2.1",
|
|
5
|
+
"description": null,
|
|
6
|
+
"release_date": "2025-12-23",
|
|
7
|
+
"open_source": true,
|
|
8
|
+
"documentation_url": "https://platform.minimax.io/docs",
|
|
9
|
+
"properties": {
|
|
10
|
+
"context_window": 204800,
|
|
11
|
+
"max_tokens": 131000,
|
|
12
|
+
"training_cutoff": null,
|
|
13
|
+
"reasoning_model": true
|
|
14
|
+
},
|
|
15
|
+
"supports": {
|
|
16
|
+
"images": false,
|
|
17
|
+
"files": false,
|
|
18
|
+
"temperature": true,
|
|
19
|
+
"tools": true
|
|
20
|
+
},
|
|
21
|
+
"metadata": {
|
|
22
|
+
"deprecated": false,
|
|
23
|
+
"available_for_everyone": true,
|
|
24
|
+
"available_as_evaluator": false,
|
|
25
|
+
"ignored_for_cost": false
|
|
26
|
+
},
|
|
27
|
+
"provider_properties": {},
|
|
28
|
+
"costs_per_million_token": {
|
|
29
|
+
"input": 0.3,
|
|
30
|
+
"output": 1.2,
|
|
31
|
+
"cache": {
|
|
32
|
+
"read": 0.03,
|
|
33
|
+
"write": 0.375,
|
|
34
|
+
"write_markup": 1.0
|
|
35
|
+
}
|
|
36
|
+
},
|
|
37
|
+
"alternative_keys": [],
|
|
38
|
+
"default_parameters": {
|
|
39
|
+
"temperature": 1.0,
|
|
40
|
+
"top_p": 0.95
|
|
41
|
+
},
|
|
42
|
+
"provider_endpoint": "MiniMax-M2.1",
|
|
43
|
+
"provider_name": "minimax",
|
|
44
|
+
"full_key": "minimax/MiniMax-M2.1",
|
|
45
|
+
"slug": "minimax_MiniMax-M2.1"
|
|
46
|
+
},
|
|
47
|
+
"zai/glm-4.7": {
|
|
48
|
+
"company": "zAI",
|
|
49
|
+
"label": "GLM 4.7",
|
|
50
|
+
"description": "Latest model from ZAI",
|
|
51
|
+
"release_date": "2025-12-22",
|
|
52
|
+
"open_source": true,
|
|
53
|
+
"documentation_url": "https://docs.z.ai/",
|
|
54
|
+
"properties": {
|
|
55
|
+
"context_window": 200000,
|
|
56
|
+
"max_tokens": 128000,
|
|
57
|
+
"training_cutoff": null,
|
|
58
|
+
"reasoning_model": true
|
|
59
|
+
},
|
|
60
|
+
"supports": {
|
|
61
|
+
"images": false,
|
|
62
|
+
"files": false,
|
|
63
|
+
"temperature": true,
|
|
64
|
+
"tools": true
|
|
65
|
+
},
|
|
66
|
+
"metadata": {
|
|
67
|
+
"deprecated": false,
|
|
68
|
+
"available_for_everyone": true,
|
|
69
|
+
"available_as_evaluator": false,
|
|
70
|
+
"ignored_for_cost": false
|
|
71
|
+
},
|
|
72
|
+
"provider_properties": {},
|
|
73
|
+
"costs_per_million_token": {
|
|
74
|
+
"input": 0.6,
|
|
75
|
+
"output": 2.2,
|
|
76
|
+
"cache": {
|
|
77
|
+
"read": 0.11,
|
|
78
|
+
"read_discount": 1.0,
|
|
79
|
+
"write_markup": 1.0
|
|
80
|
+
}
|
|
81
|
+
},
|
|
82
|
+
"alternative_keys": [],
|
|
83
|
+
"default_parameters": {
|
|
84
|
+
"temperature": 1.0,
|
|
85
|
+
"top_p": 1.0
|
|
86
|
+
},
|
|
87
|
+
"provider_endpoint": "glm-4.7",
|
|
88
|
+
"provider_name": "zai",
|
|
89
|
+
"full_key": "zai/glm-4.7",
|
|
90
|
+
"slug": "zai_glm-4.7"
|
|
91
|
+
},
|
|
2
92
|
"google/gemini-3-flash-preview": {
|
|
3
93
|
"company": "Google",
|
|
4
94
|
"label": "Gemini 3 Flash (12/25)",
|
|
@@ -504,7 +594,8 @@
|
|
|
504
594
|
}
|
|
505
595
|
],
|
|
506
596
|
"default_parameters": {
|
|
507
|
-
"temperature": 1.0
|
|
597
|
+
"temperature": 1.0,
|
|
598
|
+
"reasoning_effort": "none"
|
|
508
599
|
},
|
|
509
600
|
"provider_endpoint": "deepseek-v3p2",
|
|
510
601
|
"provider_name": "fireworks",
|
|
@@ -16,6 +16,24 @@ base-config:
|
|
|
16
16
|
|
|
17
17
|
minimax-m2-models:
|
|
18
18
|
|
|
19
|
+
minimax/MiniMax-M2.1:
|
|
20
|
+
label: MiniMax-M2.1
|
|
21
|
+
release_date: 2025-12-23
|
|
22
|
+
properties:
|
|
23
|
+
context_window: 204_800
|
|
24
|
+
max_tokens: 131_000
|
|
25
|
+
reasoning_model: true
|
|
26
|
+
training_cutoff: null
|
|
27
|
+
default_parameters:
|
|
28
|
+
temperature: 1.0
|
|
29
|
+
top_p: 0.95
|
|
30
|
+
costs_per_million_token:
|
|
31
|
+
input: 0.30
|
|
32
|
+
output: 1.20
|
|
33
|
+
cache:
|
|
34
|
+
read: 0.03
|
|
35
|
+
write: 0.375
|
|
36
|
+
|
|
19
37
|
minimax/MiniMax-M2:
|
|
20
38
|
label: MiniMax-M2
|
|
21
39
|
description: MiniMax-M2 is a cost-efficient open-source model optimized for agentic applications and coding in particular.
|
|
@@ -18,6 +18,20 @@ base-config:
|
|
|
18
18
|
write_markup: 1
|
|
19
19
|
|
|
20
20
|
zai-models:
|
|
21
|
+
zai/glm-4.7:
|
|
22
|
+
label: GLM 4.7
|
|
23
|
+
description: "Latest model from ZAI"
|
|
24
|
+
release_date: 2025-12-22
|
|
25
|
+
properties:
|
|
26
|
+
context_window: 200_000
|
|
27
|
+
max_tokens: 128_000
|
|
28
|
+
costs_per_million_token:
|
|
29
|
+
input: 0.6
|
|
30
|
+
output: 2.2
|
|
31
|
+
cache:
|
|
32
|
+
read: 0.11
|
|
33
|
+
default_parameters:
|
|
34
|
+
temperature: 1
|
|
21
35
|
zai/glm-4.5:
|
|
22
36
|
label: GLM 4.5
|
|
23
37
|
description: "z.AI old model"
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import io
|
|
2
|
+
import logging
|
|
2
3
|
from typing import Any, Literal, Sequence
|
|
3
4
|
|
|
4
5
|
from ai21 import AsyncAI21Client
|
|
@@ -137,6 +138,7 @@ class AI21LabsModel(LLM):
|
|
|
137
138
|
input: Sequence[InputItem],
|
|
138
139
|
*,
|
|
139
140
|
tools: list[ToolDefinition],
|
|
141
|
+
query_logger: logging.Logger,
|
|
140
142
|
**kwargs: object,
|
|
141
143
|
) -> QueryResult:
|
|
142
144
|
messages: list[ChatMessage] = []
|
|
@@ -3,6 +3,7 @@ import asyncio
|
|
|
3
3
|
import base64
|
|
4
4
|
import io
|
|
5
5
|
import json
|
|
6
|
+
import logging
|
|
6
7
|
from typing import Any, Literal, Sequence, cast
|
|
7
8
|
|
|
8
9
|
import boto3
|
|
@@ -337,6 +338,7 @@ class AmazonModel(LLM):
|
|
|
337
338
|
input: Sequence[InputItem],
|
|
338
339
|
*,
|
|
339
340
|
tools: list[ToolDefinition],
|
|
341
|
+
query_logger: logging.Logger,
|
|
340
342
|
**kwargs: object,
|
|
341
343
|
) -> QueryResult:
|
|
342
344
|
body = await self.build_body(input, tools=tools, **kwargs)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import io
|
|
2
|
+
import logging
|
|
2
3
|
from typing import Any, Literal, Sequence, cast
|
|
3
4
|
|
|
4
5
|
from anthropic import AsyncAnthropic
|
|
@@ -249,6 +250,8 @@ class AnthropicModel(LLM):
|
|
|
249
250
|
|
|
250
251
|
@override
|
|
251
252
|
def get_client(self) -> AsyncAnthropic:
|
|
253
|
+
if self._delegate_client:
|
|
254
|
+
return self._delegate_client
|
|
252
255
|
if not AnthropicModel._client:
|
|
253
256
|
headers: dict[str, str] = {}
|
|
254
257
|
AnthropicModel._client = AsyncAnthropic(
|
|
@@ -262,16 +265,20 @@ class AnthropicModel(LLM):
|
|
|
262
265
|
def __init__(
|
|
263
266
|
self,
|
|
264
267
|
model_name: str,
|
|
265
|
-
provider:
|
|
268
|
+
provider: str = "anthropic",
|
|
266
269
|
*,
|
|
267
270
|
config: LLMConfig | None = None,
|
|
271
|
+
custom_client: AsyncAnthropic | None = None,
|
|
268
272
|
):
|
|
269
273
|
super().__init__(model_name, provider, config=config)
|
|
270
274
|
|
|
275
|
+
# allow custom client to act as delegate (native)
|
|
276
|
+
self._delegate_client: AsyncAnthropic | None = custom_client
|
|
277
|
+
|
|
271
278
|
# https://docs.anthropic.com/en/api/openai-sdk
|
|
272
|
-
self.delegate
|
|
279
|
+
self.delegate = (
|
|
273
280
|
None
|
|
274
|
-
if self.native
|
|
281
|
+
if self.native or custom_client
|
|
275
282
|
else OpenAIModel(
|
|
276
283
|
model_name=self.model_name,
|
|
277
284
|
provider=provider,
|
|
@@ -285,7 +292,10 @@ class AnthropicModel(LLM):
|
|
|
285
292
|
)
|
|
286
293
|
|
|
287
294
|
# Initialize batch support if enabled
|
|
288
|
-
|
|
295
|
+
# Disable batch when using custom_client (similar to OpenAI)
|
|
296
|
+
self.supports_batch: bool = (
|
|
297
|
+
self.supports_batch and self.native and not custom_client
|
|
298
|
+
)
|
|
289
299
|
self.batch: LLMBatchMixin | None = (
|
|
290
300
|
AnthropicBatchMixin(self) if self.supports_batch else None
|
|
291
301
|
)
|
|
@@ -555,20 +565,36 @@ class AnthropicModel(LLM):
|
|
|
555
565
|
input: Sequence[InputItem],
|
|
556
566
|
*,
|
|
557
567
|
tools: list[ToolDefinition],
|
|
568
|
+
query_logger: logging.Logger,
|
|
558
569
|
**kwargs: object,
|
|
559
570
|
) -> QueryResult:
|
|
560
571
|
if self.delegate:
|
|
561
|
-
return await self.delegate_query(
|
|
572
|
+
return await self.delegate_query(
|
|
573
|
+
input, tools=tools, query_logger=query_logger, **kwargs
|
|
574
|
+
)
|
|
562
575
|
|
|
563
576
|
body = await self.create_body(input, tools=tools, **kwargs)
|
|
564
577
|
|
|
565
|
-
|
|
566
|
-
if "sonnet-4-5" in self.model_name:
|
|
567
|
-
betas.append("context-1m-2025-08-07")
|
|
578
|
+
client = self.get_client()
|
|
568
579
|
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
580
|
+
# only send betas for the official Anthropic endpoint
|
|
581
|
+
is_anthropic_endpoint = self._delegate_client is None
|
|
582
|
+
if not is_anthropic_endpoint:
|
|
583
|
+
client_base_url = getattr(client, "_base_url", None) or getattr(
|
|
584
|
+
client, "base_url", None
|
|
585
|
+
)
|
|
586
|
+
if client_base_url:
|
|
587
|
+
is_anthropic_endpoint = "api.anthropic.com" in str(client_base_url)
|
|
588
|
+
|
|
589
|
+
stream_kwargs = {**body}
|
|
590
|
+
if is_anthropic_endpoint:
|
|
591
|
+
betas = ["files-api-2025-04-14", "interleaved-thinking-2025-05-14"]
|
|
592
|
+
if "sonnet-4-5" in self.model_name:
|
|
593
|
+
betas.append("context-1m-2025-08-07")
|
|
594
|
+
stream_kwargs["betas"] = betas
|
|
595
|
+
|
|
596
|
+
async with client.beta.messages.stream(
|
|
597
|
+
**stream_kwargs,
|
|
572
598
|
) as stream: # pyright: ignore[reportAny]
|
|
573
599
|
message = await stream.get_final_message()
|
|
574
600
|
self.logger.info(f"Anthropic Response finished: {message.id}")
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import io
|
|
3
|
+
import logging
|
|
3
4
|
from typing import Any, Literal, Sequence, cast
|
|
4
5
|
|
|
5
6
|
from google.genai import Client
|
|
@@ -54,6 +55,11 @@ from model_library.exceptions import (
|
|
|
54
55
|
from model_library.providers.google.batch import GoogleBatchMixin
|
|
55
56
|
from model_library.register_models import register_provider
|
|
56
57
|
from model_library.utils import normalize_tool_result
|
|
58
|
+
import uuid
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def generate_tool_call_id(tool_name: str) -> str:
|
|
62
|
+
return str(tool_name + "_" + str(uuid.uuid4()))
|
|
57
63
|
|
|
58
64
|
|
|
59
65
|
class GoogleConfig(ProviderConfig):
|
|
@@ -328,6 +334,7 @@ class GoogleModel(LLM):
|
|
|
328
334
|
input: Sequence[InputItem],
|
|
329
335
|
*,
|
|
330
336
|
tools: list[ToolDefinition],
|
|
337
|
+
query_logger: logging.Logger,
|
|
331
338
|
**kwargs: object,
|
|
332
339
|
) -> QueryResult:
|
|
333
340
|
body: dict[str, Any] = await self.create_body(input, tools=tools, **kwargs)
|
|
@@ -357,9 +364,10 @@ class GoogleModel(LLM):
|
|
|
357
364
|
|
|
358
365
|
call_args = part.function_call.args or {}
|
|
359
366
|
tool_calls.append(
|
|
360
|
-
#
|
|
367
|
+
# Weirdly, id is not required. If not provided, we generate one.
|
|
361
368
|
ToolCall(
|
|
362
|
-
id=part.function_call.id
|
|
369
|
+
id=part.function_call.id
|
|
370
|
+
or generate_tool_call_id(part.function_call.name),
|
|
363
371
|
name=part.function_call.name,
|
|
364
372
|
args=call_args,
|
|
365
373
|
)
|
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
from typing import Literal
|
|
2
2
|
|
|
3
3
|
from model_library import model_library_settings
|
|
4
|
-
from model_library.base import
|
|
5
|
-
|
|
6
|
-
LLMConfig,
|
|
7
|
-
)
|
|
8
|
-
from model_library.providers.openai import OpenAIModel
|
|
4
|
+
from model_library.base import DelegateOnly, LLMConfig
|
|
5
|
+
from model_library.providers.anthropic import AnthropicModel
|
|
9
6
|
from model_library.register_models import register_provider
|
|
10
|
-
from model_library.utils import
|
|
7
|
+
from model_library.utils import default_httpx_client
|
|
8
|
+
|
|
9
|
+
from anthropic import AsyncAnthropic
|
|
11
10
|
|
|
12
11
|
|
|
13
12
|
@register_provider("minimax")
|
|
@@ -21,13 +20,14 @@ class MinimaxModel(DelegateOnly):
|
|
|
21
20
|
):
|
|
22
21
|
super().__init__(model_name, provider, config=config)
|
|
23
22
|
|
|
24
|
-
self.delegate =
|
|
23
|
+
self.delegate = AnthropicModel(
|
|
25
24
|
model_name=self.model_name,
|
|
26
25
|
provider=self.provider,
|
|
27
26
|
config=config,
|
|
28
|
-
custom_client=
|
|
27
|
+
custom_client=AsyncAnthropic(
|
|
29
28
|
api_key=model_library_settings.MINIMAX_API_KEY,
|
|
30
|
-
base_url="https://api.minimax.io/
|
|
29
|
+
base_url="https://api.minimax.io/anthropic",
|
|
30
|
+
http_client=default_httpx_client(),
|
|
31
|
+
max_retries=1,
|
|
31
32
|
),
|
|
32
|
-
use_completions=True,
|
|
33
33
|
)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import io
|
|
2
|
+
import logging
|
|
2
3
|
import time
|
|
3
4
|
from collections.abc import Sequence
|
|
4
5
|
from typing import Any, Literal
|
|
@@ -171,6 +172,7 @@ class MistralModel(LLM):
|
|
|
171
172
|
input: Sequence[InputItem],
|
|
172
173
|
*,
|
|
173
174
|
tools: list[ToolDefinition],
|
|
175
|
+
query_logger: logging.Logger,
|
|
174
176
|
**kwargs: object,
|
|
175
177
|
) -> QueryResult:
|
|
176
178
|
# mistral supports max 8 images, merge extra images into the 8th image
|
|
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import io
|
|
4
4
|
import json
|
|
5
|
+
import logging
|
|
5
6
|
from typing import Any, Literal, Sequence, cast
|
|
6
7
|
|
|
7
8
|
from openai import APIConnectionError, AsyncOpenAI
|
|
@@ -505,8 +506,11 @@ class OpenAIModel(LLM):
|
|
|
505
506
|
if self.reasoning:
|
|
506
507
|
del body["max_tokens"]
|
|
507
508
|
body["max_completion_tokens"] = self.max_tokens
|
|
508
|
-
|
|
509
|
-
|
|
509
|
+
|
|
510
|
+
# some model endpoints (like `fireworks/deepseek-v3p2`)
|
|
511
|
+
# require explicitly setting reasoning effort to disable thinking
|
|
512
|
+
if self.reasoning_effort is not None:
|
|
513
|
+
body["reasoning_effort"] = self.reasoning_effort
|
|
510
514
|
|
|
511
515
|
if self.supports_temperature:
|
|
512
516
|
if self.temperature is not None:
|
|
@@ -701,8 +705,8 @@ class OpenAIModel(LLM):
|
|
|
701
705
|
|
|
702
706
|
if self.reasoning:
|
|
703
707
|
body["reasoning"] = {"summary": "auto"}
|
|
704
|
-
if self.reasoning_effort:
|
|
705
|
-
body["reasoning"]["effort"] = self.reasoning_effort
|
|
708
|
+
if self.reasoning_effort is not None:
|
|
709
|
+
body["reasoning"]["effort"] = self.reasoning_effort # type: ignore[reportArgumentType]
|
|
706
710
|
|
|
707
711
|
if self.supports_temperature:
|
|
708
712
|
if self.temperature is not None:
|
|
@@ -722,6 +726,7 @@ class OpenAIModel(LLM):
|
|
|
722
726
|
input: Sequence[InputItem],
|
|
723
727
|
*,
|
|
724
728
|
tools: list[ToolDefinition],
|
|
729
|
+
query_logger: logging.Logger,
|
|
725
730
|
**kwargs: object,
|
|
726
731
|
) -> QueryResult:
|
|
727
732
|
if self.use_completions:
|
model_library/providers/vals.py
CHANGED
|
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import io
|
|
4
4
|
import json
|
|
5
|
+
import logging
|
|
5
6
|
import random
|
|
6
7
|
import re
|
|
7
8
|
import time
|
|
@@ -271,6 +272,7 @@ class DummyAIModel(LLM):
|
|
|
271
272
|
input: Sequence[InputItem],
|
|
272
273
|
*,
|
|
273
274
|
tools: list[ToolDefinition],
|
|
275
|
+
query_logger: logging.Logger,
|
|
274
276
|
**kwargs: object,
|
|
275
277
|
) -> QueryResult:
|
|
276
278
|
body = await self.create_body(input, tools=tools, **kwargs)
|
model_library/providers/xai.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import io
|
|
3
|
+
import logging
|
|
3
4
|
from typing import Any, Literal, Sequence, cast
|
|
4
5
|
|
|
5
6
|
import grpc
|
|
@@ -78,7 +79,7 @@ class XAIModel(LLM):
|
|
|
78
79
|
super().__init__(model_name, provider, config=config)
|
|
79
80
|
|
|
80
81
|
# https://docs.x.ai/docs/guides/migration
|
|
81
|
-
self.delegate
|
|
82
|
+
self.delegate = (
|
|
82
83
|
None
|
|
83
84
|
if self.native
|
|
84
85
|
else OpenAIModel(
|
|
@@ -225,13 +226,16 @@ class XAIModel(LLM):
|
|
|
225
226
|
input: Sequence[InputItem],
|
|
226
227
|
*,
|
|
227
228
|
tools: list[ToolDefinition],
|
|
229
|
+
query_logger: logging.Logger,
|
|
228
230
|
**kwargs: object,
|
|
229
231
|
) -> QueryResult:
|
|
230
232
|
if self.reasoning_effort:
|
|
231
233
|
kwargs["reasoning_effort"] = self.reasoning_effort
|
|
232
234
|
|
|
233
235
|
if self.delegate:
|
|
234
|
-
return await self.delegate_query(
|
|
236
|
+
return await self.delegate_query(
|
|
237
|
+
input, tools=tools, query_logger=query_logger, **kwargs
|
|
238
|
+
)
|
|
235
239
|
|
|
236
240
|
messages: Sequence[Message] = []
|
|
237
241
|
if "system_prompt" in kwargs:
|
model_library/register_models.py
CHANGED
|
@@ -169,7 +169,7 @@ class DefaultParameters(BaseModel):
|
|
|
169
169
|
temperature: float | None = None
|
|
170
170
|
top_p: float | None = None
|
|
171
171
|
top_k: int | None = None
|
|
172
|
-
reasoning_effort: str | None = None
|
|
172
|
+
reasoning_effort: str | bool | None = None
|
|
173
173
|
|
|
174
174
|
|
|
175
175
|
class RawModelConfig(BaseModel):
|
|
@@ -4,61 +4,61 @@ model_library/file_utils.py,sha256=FAZRRtDT8c4Rjfoj64Te3knEHggXAAfRRuS8WLCsSe8,3
|
|
|
4
4
|
model_library/logging.py,sha256=McyaPHUk7RkB38-LrfnudrrU1B62ta8wAbbIBwLRmj0,853
|
|
5
5
|
model_library/model_utils.py,sha256=l8oCltGeimMGtnne_3Q1EguVtzCj61UMsLsma-1czwg,753
|
|
6
6
|
model_library/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
model_library/register_models.py,sha256=
|
|
7
|
+
model_library/register_models.py,sha256=3FeFrcS2qRpAhj9ahXNuZ6jcH5UEks3I_PaT6rPvKgs,13653
|
|
8
8
|
model_library/registry_utils.py,sha256=BVauHcP02Et2maLxowNBbdpGd32cnLz1_zSjDLVJjp0,8843
|
|
9
9
|
model_library/settings.py,sha256=QyeUqzWBpexFi014L_mZkoXP49no3SAQNJRObATXrL8,873
|
|
10
10
|
model_library/utils.py,sha256=T91ACGTc-KtksVyMFspt-vJtR5I-xcO3nVfH6SltmMU,3988
|
|
11
11
|
model_library/base/__init__.py,sha256=TtxCXGUtkEqWZNMMofLPuC4orN7Ja2hemtbtHitt_UA,266
|
|
12
|
-
model_library/base/base.py,sha256=
|
|
12
|
+
model_library/base/base.py,sha256=mvubt5VC1eM8cuLw_RHP04hTgNEcULzIBiJcHqKF--c,14289
|
|
13
13
|
model_library/base/batch.py,sha256=-jd6L0ECc5pkj73zoX2ZYcv_9iQdqxEi1kEilwaXWSA,2895
|
|
14
|
-
model_library/base/delegate_only.py,sha256=
|
|
14
|
+
model_library/base/delegate_only.py,sha256=YJUvP9k8x2kBsI-0ACNmx1Jx77zdZSBWCMSpx0LAyXE,2213
|
|
15
15
|
model_library/base/input.py,sha256=Nhg8Ril1kFau1DnE8u102JC1l-vxNd-v9e3SjovR-Do,1876
|
|
16
16
|
model_library/base/output.py,sha256=Ak6CJRYqtjYILsSWkfE70fSK3yvP7v_n5NYfysMaIL4,7464
|
|
17
17
|
model_library/base/utils.py,sha256=YGQLPyQgCbfHNBxyTxCvpZNZ-ctEji258IdfMiXUJXs,1962
|
|
18
18
|
model_library/config/README.md,sha256=i8_wHnlI6uHIqWN9fYBkDCglZM2p5ZMVD3SLlxiwUVk,4274
|
|
19
19
|
model_library/config/ai21labs_models.yaml,sha256=ZWHhk1cep2GQIYHqkTS_0152mF3oZg2tSzMPmvfMRSI,2478
|
|
20
20
|
model_library/config/alibaba_models.yaml,sha256=-RLWOwh3ZaCQqjaZ-4Zw0BJNVE6JVHJ8Ggm9gQJZ6QI,2082
|
|
21
|
-
model_library/config/all_models.json,sha256=
|
|
21
|
+
model_library/config/all_models.json,sha256=U-XQrbaWWhjmkawg0Bd9NTxoDN-DT0WPhmDLF6OALR4,533621
|
|
22
22
|
model_library/config/amazon_models.yaml,sha256=HgLmhpfedHCQtkPEviEJCBbAb-dNQPOnVtf4UnwrDds,7654
|
|
23
23
|
model_library/config/anthropic_models.yaml,sha256=bTc_3Oqn4wCdq-dcWcEfmXrPVZjcR8-V6pTao7sGa_E,10475
|
|
24
24
|
model_library/config/cohere_models.yaml,sha256=ZfWrS1K45Hxd5nT_gpP5YGAovJcBIlLNIdaRyE3V-7o,5022
|
|
25
25
|
model_library/config/deepseek_models.yaml,sha256=4CCrf-4UPBgFCrS6CQa3vzNiaYlD4B6dFJFK_kIYBWY,1156
|
|
26
26
|
model_library/config/dummy_model.yaml,sha256=lImYJBtBVJk_jgnLbkuSyOshQphVlYCMkw-UiJIBYhY,877
|
|
27
|
-
model_library/config/fireworks_models.yaml,sha256=
|
|
27
|
+
model_library/config/fireworks_models.yaml,sha256=bAlXvjkdt-CnRp66WbfDv2qTrF5UHceRd2pvrsBERMk,6324
|
|
28
28
|
model_library/config/google_models.yaml,sha256=Rg127nsBbHpk62X7WBq2ckdHo0bwYM0NVjF7T2h_1c0,16494
|
|
29
29
|
model_library/config/inception_models.yaml,sha256=YCqfQlkH_pTdHIKee5dP_aRFXw_fTIEQCpUvX2bwO0M,560
|
|
30
30
|
model_library/config/kimi_models.yaml,sha256=AAqse_BCE-lrHkJHIWJVqMtttnZQCa-5Qy5qiLUJjYs,755
|
|
31
|
-
model_library/config/minimax_models.yaml,sha256=
|
|
31
|
+
model_library/config/minimax_models.yaml,sha256=gWTuTcl1-zyCF6KRuU6DSre2Cw5gXC-TeKV2Qp4StnQ,1263
|
|
32
32
|
model_library/config/mistral_models.yaml,sha256=mYKYSzJl24lUiA_erSkom7nCBxAoeJ57Mi3090q1ArM,5162
|
|
33
33
|
model_library/config/openai_models.yaml,sha256=1lKsTQwsxMMJqXtEoYs3liy6NcaK4p8NN7b-GSFnl8k,25261
|
|
34
34
|
model_library/config/perplexity_models.yaml,sha256=WUDqhLvnB0kQhCCwPv19FYLHozet3m33Spdo6bGff3Q,2336
|
|
35
35
|
model_library/config/together_models.yaml,sha256=BeqRJonYzPvWwoLfkyH0RMRKBYUrCSEQhg_25Nvx97M,23867
|
|
36
36
|
model_library/config/xai_models.yaml,sha256=2KRNNQy3kV-4xxSfhj7Uhp9TZF-S5qPlM8Ef-04zv8Y,7985
|
|
37
|
-
model_library/config/zai_models.yaml,sha256=
|
|
37
|
+
model_library/config/zai_models.yaml,sha256=lcYMh2FCrLWkKqdCnarRlwDoL3SbutRBNAiMPBUYQiw,1894
|
|
38
38
|
model_library/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
|
-
model_library/providers/ai21labs.py,sha256=
|
|
39
|
+
model_library/providers/ai21labs.py,sha256=sqmu9R7owZZQLxOkNV9dhSeZVAlTMDCNFVdxJyJo6UA,5885
|
|
40
40
|
model_library/providers/alibaba.py,sha256=k6LZErV_l9oTFTdKTwyw1SXD509Rl3AqFbN8umCryEE,2941
|
|
41
|
-
model_library/providers/amazon.py,sha256=
|
|
42
|
-
model_library/providers/anthropic.py,sha256=
|
|
41
|
+
model_library/providers/amazon.py,sha256=U0tH5mw8dAMDg06BtnVoR-RxYlcAL1-7ZR737sR8xgU,14068
|
|
42
|
+
model_library/providers/anthropic.py,sha256=G94hFkRFTWutq9XYd3321KkPrxVHnR6fs_h2AdkVqx4,23197
|
|
43
43
|
model_library/providers/azure.py,sha256=brQNCED-zHvYjL5K5hdjFBNso6hJZg0HTHNnAgJPPG0,1408
|
|
44
44
|
model_library/providers/cohere.py,sha256=lCBm1PP1l_UOa1pKFMIZM3C0wCv3QWB6UP0-jvjkFa4,1066
|
|
45
45
|
model_library/providers/deepseek.py,sha256=7T4lxDiV5wmWUK7TAKwr332_T6uyXNCOiirZOCCETL0,1159
|
|
46
46
|
model_library/providers/fireworks.py,sha256=w-5mOF5oNzqx_0ijCoTm1lSn2ZHwhp6fURKhV3LEqIc,2309
|
|
47
47
|
model_library/providers/inception.py,sha256=Nrky53iujIM9spAWoNRtoJg2inFiL0li6E75vT3b6V8,1107
|
|
48
48
|
model_library/providers/kimi.py,sha256=zzvcKpZLsM1xPebpLeMxNKTt_FRiLN1rFWrIly7wfXA,1092
|
|
49
|
-
model_library/providers/minimax.py,sha256=
|
|
50
|
-
model_library/providers/mistral.py,sha256=
|
|
51
|
-
model_library/providers/openai.py,sha256=
|
|
49
|
+
model_library/providers/minimax.py,sha256=YRtJW2wgiu6KXEBScYETeVMNTfhPvpjL2J-oo0wE_BI,1057
|
|
50
|
+
model_library/providers/mistral.py,sha256=r0PY30kHY-guaSzIEahdp2I45meJzo71Ql97NfkPv-8,9798
|
|
51
|
+
model_library/providers/openai.py,sha256=MMm6K4iewhSpPzEeRhrPRYf_txrpklCrefNHiUly8S8,33665
|
|
52
52
|
model_library/providers/perplexity.py,sha256=eIzzkaZ4ZMlRKFVI9bnwyo91iJkh7aEmJ-0_4OKeAWc,1083
|
|
53
53
|
model_library/providers/together.py,sha256=7Y4QLnX8c_fyXUud-W_C1gidmROQainTgODBwbvFyXQ,2033
|
|
54
|
-
model_library/providers/vals.py,sha256=
|
|
55
|
-
model_library/providers/xai.py,sha256=
|
|
54
|
+
model_library/providers/vals.py,sha256=mKaItg_g9RJeaIDhoBu7ksTe42P0MRYFI4X1cla8YC0,9883
|
|
55
|
+
model_library/providers/xai.py,sha256=toSqWBHUaHE000aMdOayAW3-_ZmDUotWEpZ4-X33LuY,10918
|
|
56
56
|
model_library/providers/zai.py,sha256=O_GM6KlJ0fM2wYoxO9xrCWfnpYH7IpoKEzjiD4jB8Kc,1050
|
|
57
57
|
model_library/providers/google/__init__.py,sha256=ypuLVL_QJEQ7C3S47FhC9y4wyawYOdGikAViJmACI0U,115
|
|
58
58
|
model_library/providers/google/batch.py,sha256=4TE90Uo1adi54dVtGcGyUAxw11YExJq-Y4KmkQ-cyHA,9978
|
|
59
|
-
model_library/providers/google/google.py,sha256=
|
|
60
|
-
model_library-0.1.
|
|
61
|
-
model_library-0.1.
|
|
62
|
-
model_library-0.1.
|
|
63
|
-
model_library-0.1.
|
|
64
|
-
model_library-0.1.
|
|
59
|
+
model_library/providers/google/google.py,sha256=xmiktN-Z9W1fC1jHUT_m6x5fTpI6-mWpKvbMGg9kgXE,16787
|
|
60
|
+
model_library-0.1.6.dist-info/licenses/LICENSE,sha256=x6mf4o7U_wHaaqcfxoU-0R6uYJLbqL_TNuoULP3asaA,1070
|
|
61
|
+
model_library-0.1.6.dist-info/METADATA,sha256=sNWBOgDqydFI184UERputqhulBz0olrbye-fO7owrCE,6989
|
|
62
|
+
model_library-0.1.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
63
|
+
model_library-0.1.6.dist-info/top_level.txt,sha256=HtQYxA_7RP8UT35I6VcUw20L6edI0Zf2t5Ys1uDGVjs,14
|
|
64
|
+
model_library-0.1.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|