model-library 0.1.8__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_library/base/base.py +2 -0
- model_library/config/alibaba_models.yaml +5 -0
- model_library/config/all_models.json +115 -9
- model_library/config/anthropic_models.yaml +2 -1
- model_library/config/mistral_models.yaml +2 -0
- model_library/config/together_models.yaml +2 -0
- model_library/config/xiaomi_models.yaml +43 -0
- model_library/config/zai_models.yaml +3 -0
- model_library/providers/anthropic.py +35 -8
- model_library/providers/google/google.py +9 -1
- model_library/providers/vercel.py +34 -0
- model_library/providers/xiaomi.py +34 -0
- model_library/register_models.py +5 -0
- model_library/registry_utils.py +11 -4
- model_library/utils.py +4 -7
- {model_library-0.1.8.dist-info → model_library-0.1.9.dist-info}/METADATA +1 -1
- {model_library-0.1.8.dist-info → model_library-0.1.9.dist-info}/RECORD +20 -17
- {model_library-0.1.8.dist-info → model_library-0.1.9.dist-info}/WHEEL +0 -0
- {model_library-0.1.8.dist-info → model_library-0.1.9.dist-info}/licenses/LICENSE +0 -0
- {model_library-0.1.8.dist-info → model_library-0.1.9.dist-info}/top_level.txt +0 -0
model_library/base/base.py
CHANGED
|
@@ -76,6 +76,7 @@ class LLMConfig(BaseModel):
|
|
|
76
76
|
top_k: int | None = None
|
|
77
77
|
reasoning: bool = False
|
|
78
78
|
reasoning_effort: str | bool | None = None
|
|
79
|
+
compute_effort: str | None = None
|
|
79
80
|
supports_images: bool = False
|
|
80
81
|
supports_files: bool = False
|
|
81
82
|
supports_videos: bool = False
|
|
@@ -155,6 +156,7 @@ class LLM(ABC):
|
|
|
155
156
|
|
|
156
157
|
self.reasoning: bool = config.reasoning
|
|
157
158
|
self.reasoning_effort: str | bool | None = config.reasoning_effort
|
|
159
|
+
self.compute_effort: str | None = config.compute_effort
|
|
158
160
|
|
|
159
161
|
self.supports_files: bool = config.supports_files
|
|
160
162
|
self.supports_videos: bool = config.supports_videos
|
|
@@ -122,7 +122,10 @@
|
|
|
122
122
|
"ignored_for_cost": false
|
|
123
123
|
},
|
|
124
124
|
"provider_properties": {},
|
|
125
|
-
"costs_per_million_token": {
|
|
125
|
+
"costs_per_million_token": {
|
|
126
|
+
"input": 1.2,
|
|
127
|
+
"output": 6.0
|
|
128
|
+
},
|
|
126
129
|
"alternative_keys": [],
|
|
127
130
|
"default_parameters": {
|
|
128
131
|
"temperature": 0.7
|
|
@@ -259,7 +262,9 @@
|
|
|
259
262
|
"write_markup": 1.0
|
|
260
263
|
}
|
|
261
264
|
},
|
|
262
|
-
"alternative_keys": [
|
|
265
|
+
"alternative_keys": [
|
|
266
|
+
"vercel/zai/glm-4.7"
|
|
267
|
+
],
|
|
263
268
|
"default_parameters": {
|
|
264
269
|
"temperature": 1.0,
|
|
265
270
|
"top_p": 1.0
|
|
@@ -269,6 +274,94 @@
|
|
|
269
274
|
"full_key": "zai/glm-4.7",
|
|
270
275
|
"slug": "zai_glm-4.7"
|
|
271
276
|
},
|
|
277
|
+
"vercel/zai/glm-4.7": {
|
|
278
|
+
"company": "zAI",
|
|
279
|
+
"label": "GLM 4.7",
|
|
280
|
+
"description": "Latest model from ZAI",
|
|
281
|
+
"release_date": "2025-12-22",
|
|
282
|
+
"open_source": true,
|
|
283
|
+
"documentation_url": "https://docs.z.ai/",
|
|
284
|
+
"properties": {
|
|
285
|
+
"context_window": 200000,
|
|
286
|
+
"max_tokens": 128000,
|
|
287
|
+
"training_cutoff": null,
|
|
288
|
+
"reasoning_model": true
|
|
289
|
+
},
|
|
290
|
+
"supports": {
|
|
291
|
+
"images": false,
|
|
292
|
+
"files": false,
|
|
293
|
+
"temperature": true,
|
|
294
|
+
"tools": true
|
|
295
|
+
},
|
|
296
|
+
"metadata": {
|
|
297
|
+
"deprecated": false,
|
|
298
|
+
"available_for_everyone": true,
|
|
299
|
+
"available_as_evaluator": false,
|
|
300
|
+
"ignored_for_cost": false
|
|
301
|
+
},
|
|
302
|
+
"provider_properties": {},
|
|
303
|
+
"costs_per_million_token": {
|
|
304
|
+
"input": 0.6,
|
|
305
|
+
"output": 2.2,
|
|
306
|
+
"cache": {
|
|
307
|
+
"read": 0.11,
|
|
308
|
+
"read_discount": 1.0,
|
|
309
|
+
"write_markup": 1.0
|
|
310
|
+
}
|
|
311
|
+
},
|
|
312
|
+
"alternative_keys": [],
|
|
313
|
+
"default_parameters": {
|
|
314
|
+
"temperature": 1.0,
|
|
315
|
+
"top_p": 1.0
|
|
316
|
+
},
|
|
317
|
+
"provider_endpoint": "zai/glm-4.7",
|
|
318
|
+
"provider_name": "vercel",
|
|
319
|
+
"full_key": "vercel/zai/glm-4.7",
|
|
320
|
+
"slug": "vercel_zai_glm-4.7"
|
|
321
|
+
},
|
|
322
|
+
"xiaomi/mimo-v2-flash": {
|
|
323
|
+
"company": "Xiaomi",
|
|
324
|
+
"label": "MiMo V2 Flash",
|
|
325
|
+
"description": "MiMo V2 Flash is Xiaomi's Mixture-of-Experts (MoE) language model with 309B total parameters and 15B active parameters. Designed for high-speed reasoning and agentic workflows, it utilizes a novel hybrid attention architecture and Multi-Token Prediction (MTP) to achieve state-of-the-art performance while significantly reducing inference costs.",
|
|
326
|
+
"release_date": "2025-12-17",
|
|
327
|
+
"open_source": true,
|
|
328
|
+
"documentation_url": "https://platform.xiaomimimo.com/#/docs/",
|
|
329
|
+
"properties": {
|
|
330
|
+
"context_window": 256000,
|
|
331
|
+
"max_tokens": 64000,
|
|
332
|
+
"training_cutoff": "December 2024"
|
|
333
|
+
},
|
|
334
|
+
"supports": {
|
|
335
|
+
"images": false,
|
|
336
|
+
"files": false,
|
|
337
|
+
"temperature": true,
|
|
338
|
+
"tools": true
|
|
339
|
+
},
|
|
340
|
+
"metadata": {
|
|
341
|
+
"deprecated": false,
|
|
342
|
+
"available_for_everyone": true,
|
|
343
|
+
"available_as_evaluator": false,
|
|
344
|
+
"ignored_for_cost": false
|
|
345
|
+
},
|
|
346
|
+
"provider_properties": {},
|
|
347
|
+
"costs_per_million_token": {
|
|
348
|
+
"input": 0.1,
|
|
349
|
+
"output": 0.3,
|
|
350
|
+
"cache": {
|
|
351
|
+
"read": 0.01,
|
|
352
|
+
"write_markup": 1.0
|
|
353
|
+
}
|
|
354
|
+
},
|
|
355
|
+
"alternative_keys": [],
|
|
356
|
+
"default_parameters": {
|
|
357
|
+
"temperature": 0.3,
|
|
358
|
+
"top_p": 0.95
|
|
359
|
+
},
|
|
360
|
+
"provider_endpoint": "mimo-v2-flash",
|
|
361
|
+
"provider_name": "xiaomi",
|
|
362
|
+
"full_key": "xiaomi/mimo-v2-flash",
|
|
363
|
+
"slug": "xiaomi_mimo-v2-flash"
|
|
364
|
+
},
|
|
272
365
|
"google/gemini-3-flash-preview": {
|
|
273
366
|
"company": "Google",
|
|
274
367
|
"label": "Gemini 3 Flash (12/25)",
|
|
@@ -856,7 +949,9 @@
|
|
|
856
949
|
"available_as_evaluator": false,
|
|
857
950
|
"ignored_for_cost": false
|
|
858
951
|
},
|
|
859
|
-
"provider_properties": {
|
|
952
|
+
"provider_properties": {
|
|
953
|
+
"supports_compute_effort": true
|
|
954
|
+
},
|
|
860
955
|
"costs_per_million_token": {
|
|
861
956
|
"input": 5.0,
|
|
862
957
|
"output": 25.0,
|
|
@@ -906,7 +1001,9 @@
|
|
|
906
1001
|
"available_as_evaluator": false,
|
|
907
1002
|
"ignored_for_cost": false
|
|
908
1003
|
},
|
|
909
|
-
"provider_properties": {
|
|
1004
|
+
"provider_properties": {
|
|
1005
|
+
"supports_compute_effort": true
|
|
1006
|
+
},
|
|
910
1007
|
"costs_per_million_token": {
|
|
911
1008
|
"input": 5.0,
|
|
912
1009
|
"output": 25.0,
|
|
@@ -2210,7 +2307,10 @@
|
|
|
2210
2307
|
"ignored_for_cost": false
|
|
2211
2308
|
},
|
|
2212
2309
|
"provider_properties": {},
|
|
2213
|
-
"costs_per_million_token": {
|
|
2310
|
+
"costs_per_million_token": {
|
|
2311
|
+
"input": 1.2,
|
|
2312
|
+
"output": 6.0
|
|
2313
|
+
},
|
|
2214
2314
|
"alternative_keys": [],
|
|
2215
2315
|
"default_parameters": {
|
|
2216
2316
|
"temperature": 0.7
|
|
@@ -2244,7 +2344,10 @@
|
|
|
2244
2344
|
"ignored_for_cost": false
|
|
2245
2345
|
},
|
|
2246
2346
|
"provider_properties": {},
|
|
2247
|
-
"costs_per_million_token": {
|
|
2347
|
+
"costs_per_million_token": {
|
|
2348
|
+
"input": 1.2,
|
|
2349
|
+
"output": 6.0
|
|
2350
|
+
},
|
|
2248
2351
|
"alternative_keys": [],
|
|
2249
2352
|
"default_parameters": {
|
|
2250
2353
|
"temperature": 0.7
|
|
@@ -2611,7 +2714,10 @@
|
|
|
2611
2714
|
"ignored_for_cost": false
|
|
2612
2715
|
},
|
|
2613
2716
|
"provider_properties": {},
|
|
2614
|
-
"costs_per_million_token": {
|
|
2717
|
+
"costs_per_million_token": {
|
|
2718
|
+
"input": 1.2,
|
|
2719
|
+
"output": 6.0
|
|
2720
|
+
},
|
|
2615
2721
|
"alternative_keys": [],
|
|
2616
2722
|
"default_parameters": {
|
|
2617
2723
|
"temperature": 0.7
|
|
@@ -3920,7 +4026,7 @@
|
|
|
3920
4026
|
"tools": true
|
|
3921
4027
|
},
|
|
3922
4028
|
"metadata": {
|
|
3923
|
-
"deprecated":
|
|
4029
|
+
"deprecated": true,
|
|
3924
4030
|
"available_for_everyone": true,
|
|
3925
4031
|
"available_as_evaluator": false,
|
|
3926
4032
|
"ignored_for_cost": false
|
|
@@ -4550,7 +4656,7 @@
|
|
|
4550
4656
|
"tools": true
|
|
4551
4657
|
},
|
|
4552
4658
|
"metadata": {
|
|
4553
|
-
"deprecated":
|
|
4659
|
+
"deprecated": true,
|
|
4554
4660
|
"available_for_everyone": true,
|
|
4555
4661
|
"available_as_evaluator": false,
|
|
4556
4662
|
"ignored_for_cost": false
|
|
@@ -22,7 +22,6 @@ base-config:
|
|
|
22
22
|
documentation_url: https://docs.claude.com/en/docs/about-claude/models/overview
|
|
23
23
|
default_parameters:
|
|
24
24
|
temperature: 1
|
|
25
|
-
|
|
26
25
|
claude-4-models:
|
|
27
26
|
base-config:
|
|
28
27
|
supports:
|
|
@@ -40,6 +39,8 @@ claude-4-models:
|
|
|
40
39
|
cache:
|
|
41
40
|
read: 0.5
|
|
42
41
|
write: 6.25
|
|
42
|
+
provider_properties:
|
|
43
|
+
supports_compute_effort: true
|
|
43
44
|
alternative_keys:
|
|
44
45
|
- anthropic/claude-opus-4-5-20251101-thinking:
|
|
45
46
|
properties:
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
base-config:
|
|
2
|
+
company: Xiaomi
|
|
3
|
+
open_source: true
|
|
4
|
+
documentation_url: https://platform.xiaomimimo.com/#/docs/
|
|
5
|
+
supports:
|
|
6
|
+
images: false
|
|
7
|
+
files: false
|
|
8
|
+
tools: true
|
|
9
|
+
metadata:
|
|
10
|
+
available_as_evaluator: false
|
|
11
|
+
available_for_everyone: true
|
|
12
|
+
ignored_for_cost: false
|
|
13
|
+
properties:
|
|
14
|
+
training_cutoff: December 2024
|
|
15
|
+
|
|
16
|
+
xiaomi-models:
|
|
17
|
+
base-config:
|
|
18
|
+
properties:
|
|
19
|
+
context_window: 256000
|
|
20
|
+
supports:
|
|
21
|
+
temperature: true
|
|
22
|
+
top_p: true
|
|
23
|
+
default_parameters:
|
|
24
|
+
temperature: 0.3
|
|
25
|
+
top_p: 0.95
|
|
26
|
+
|
|
27
|
+
xiaomi/mimo-v2-flash:
|
|
28
|
+
label: MiMo V2 Flash
|
|
29
|
+
description:
|
|
30
|
+
MiMo V2 Flash is Xiaomi's Mixture-of-Experts (MoE) language model with
|
|
31
|
+
309B total parameters and 15B active parameters. Designed for high-speed
|
|
32
|
+
reasoning and agentic workflows, it utilizes a novel hybrid attention
|
|
33
|
+
architecture and Multi-Token Prediction (MTP) to achieve state-of-the-art
|
|
34
|
+
performance while significantly reducing inference costs.
|
|
35
|
+
release_date: 2025-12-17
|
|
36
|
+
properties:
|
|
37
|
+
context_window: 256000
|
|
38
|
+
max_tokens: 64000
|
|
39
|
+
costs_per_million_token:
|
|
40
|
+
input: 0.10
|
|
41
|
+
output: 0.30
|
|
42
|
+
cache:
|
|
43
|
+
read: 0.01
|
|
@@ -23,6 +23,7 @@ from model_library.base import (
|
|
|
23
23
|
InputItem,
|
|
24
24
|
LLMBatchMixin,
|
|
25
25
|
LLMConfig,
|
|
26
|
+
ProviderConfig,
|
|
26
27
|
QueryResult,
|
|
27
28
|
QueryResultCost,
|
|
28
29
|
QueryResultMetadata,
|
|
@@ -48,6 +49,11 @@ from model_library.utils import (
|
|
|
48
49
|
)
|
|
49
50
|
|
|
50
51
|
|
|
52
|
+
class AnthropicConfig(ProviderConfig):
|
|
53
|
+
supports_compute_effort: bool = False
|
|
54
|
+
supports_auto_thinking: bool = False
|
|
55
|
+
|
|
56
|
+
|
|
51
57
|
class AnthropicBatchMixin(LLMBatchMixin):
|
|
52
58
|
"""Batch processing support for Anthropic's Message Batches API."""
|
|
53
59
|
|
|
@@ -251,6 +257,8 @@ class AnthropicBatchMixin(LLMBatchMixin):
|
|
|
251
257
|
|
|
252
258
|
@register_provider("anthropic")
|
|
253
259
|
class AnthropicModel(LLM):
|
|
260
|
+
provider_config = AnthropicConfig()
|
|
261
|
+
|
|
254
262
|
def _get_default_api_key(self) -> str:
|
|
255
263
|
if self.delegate_config:
|
|
256
264
|
return self.delegate_config.api_key.get_secret_value()
|
|
@@ -547,13 +555,22 @@ class AnthropicModel(LLM):
|
|
|
547
555
|
body["max_tokens"] = self.max_tokens
|
|
548
556
|
|
|
549
557
|
if self.reasoning:
|
|
550
|
-
|
|
551
|
-
"
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
558
|
+
if self.provider_config.supports_auto_thinking:
|
|
559
|
+
body["thinking"] = {"type": "auto"}
|
|
560
|
+
else:
|
|
561
|
+
budget_tokens = kwargs.pop(
|
|
562
|
+
"budget_tokens", get_default_budget_tokens(self.max_tokens)
|
|
563
|
+
)
|
|
564
|
+
body["thinking"] = {
|
|
565
|
+
"type": "enabled",
|
|
566
|
+
"budget_tokens": budget_tokens,
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
# effort controls compute allocation for text, tool calls, and thinking. Opus-4.5+
|
|
570
|
+
# use instead of reasoning_effort with auto_thinking
|
|
571
|
+
if self.provider_config.supports_compute_effort and self.compute_effort:
|
|
572
|
+
# default is "high"
|
|
573
|
+
body["output_config"] = {"effort": self.compute_effort}
|
|
557
574
|
|
|
558
575
|
# Thinking models don't support temperature: https://docs.claude.com/en/docs/build-with-claude/extended-thinking#feature-compatibility
|
|
559
576
|
if self.supports_temperature and not self.reasoning:
|
|
@@ -599,7 +616,17 @@ class AnthropicModel(LLM):
|
|
|
599
616
|
|
|
600
617
|
stream_kwargs = {**body}
|
|
601
618
|
if is_anthropic_endpoint:
|
|
602
|
-
betas = ["files-api-2025-04-14"
|
|
619
|
+
betas = ["files-api-2025-04-14"]
|
|
620
|
+
if self.provider_config.supports_auto_thinking:
|
|
621
|
+
betas.extend(
|
|
622
|
+
[
|
|
623
|
+
"auto-thinking-2026-01-12",
|
|
624
|
+
"effort-2025-11-24",
|
|
625
|
+
"max-effort-2026-01-24",
|
|
626
|
+
]
|
|
627
|
+
)
|
|
628
|
+
else:
|
|
629
|
+
betas.extend(["interleaved-thinking-2025-05-14"])
|
|
603
630
|
if "sonnet-4-5" in self.model_name:
|
|
604
631
|
betas.append("context-1m-2025-08-07")
|
|
605
632
|
stream_kwargs["betas"] = betas
|
|
@@ -361,16 +361,21 @@ class GoogleModel(LLM):
|
|
|
361
361
|
contents: list[Content | None] = []
|
|
362
362
|
finish_reason: FinishReason | None = None
|
|
363
363
|
|
|
364
|
+
chunks: list[GenerateContentResponse] = []
|
|
365
|
+
|
|
364
366
|
async for chunk in stream:
|
|
367
|
+
chunks.append(chunk)
|
|
365
368
|
candidates = chunk.candidates
|
|
366
369
|
if not candidates:
|
|
367
370
|
continue
|
|
368
371
|
|
|
369
372
|
content = candidates[0].content
|
|
370
373
|
|
|
374
|
+
meaningful_content = False
|
|
371
375
|
if content and content.parts:
|
|
372
376
|
for part in content.parts:
|
|
373
377
|
if part.function_call:
|
|
378
|
+
meaningful_content = True
|
|
374
379
|
if not part.function_call.name:
|
|
375
380
|
raise Exception(f"Invalid function call: {part}")
|
|
376
381
|
|
|
@@ -387,13 +392,15 @@ class GoogleModel(LLM):
|
|
|
387
392
|
if not part.text:
|
|
388
393
|
continue
|
|
389
394
|
if part.thought:
|
|
395
|
+
meaningful_content = True
|
|
390
396
|
reasoning += part.text
|
|
391
397
|
else:
|
|
398
|
+
meaningful_content = True
|
|
392
399
|
text += part.text
|
|
393
400
|
|
|
394
401
|
if chunk.usage_metadata:
|
|
395
402
|
metadata = chunk.usage_metadata
|
|
396
|
-
if content:
|
|
403
|
+
if content and meaningful_content:
|
|
397
404
|
contents.append(content)
|
|
398
405
|
if candidates[0].finish_reason:
|
|
399
406
|
finish_reason = candidates[0].finish_reason
|
|
@@ -402,6 +409,7 @@ class GoogleModel(LLM):
|
|
|
402
409
|
self.logger.error(f"Unexpected finish reason: {finish_reason}")
|
|
403
410
|
|
|
404
411
|
if not text and not reasoning and not tool_calls:
|
|
412
|
+
self.logger.error(f"Chunks: {chunks}")
|
|
405
413
|
raise ModelNoOutputError("Model returned empty response")
|
|
406
414
|
|
|
407
415
|
result = QueryResult(
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
from pydantic import SecretStr
|
|
4
|
+
|
|
5
|
+
from model_library import model_library_settings
|
|
6
|
+
from model_library.base import (
|
|
7
|
+
DelegateConfig,
|
|
8
|
+
DelegateOnly,
|
|
9
|
+
LLMConfig,
|
|
10
|
+
)
|
|
11
|
+
from model_library.register_models import register_provider
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@register_provider("vercel")
|
|
15
|
+
class VercelModel(DelegateOnly):
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
model_name: str,
|
|
19
|
+
provider: Literal["vercel"] = "vercel",
|
|
20
|
+
*,
|
|
21
|
+
config: LLMConfig | None = None,
|
|
22
|
+
):
|
|
23
|
+
super().__init__(model_name, provider, config=config)
|
|
24
|
+
|
|
25
|
+
# https://vercel.com/docs/ai-gateway/sdks-and-apis#quick-start
|
|
26
|
+
self.init_delegate(
|
|
27
|
+
config=config,
|
|
28
|
+
delegate_config=DelegateConfig(
|
|
29
|
+
base_url="https://ai-gateway.vercel.sh/v1",
|
|
30
|
+
api_key=SecretStr(model_library_settings.VERCEL_API_KEY),
|
|
31
|
+
),
|
|
32
|
+
use_completions=True,
|
|
33
|
+
delegate_provider="openai",
|
|
34
|
+
)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
from pydantic import SecretStr
|
|
4
|
+
|
|
5
|
+
from model_library import model_library_settings
|
|
6
|
+
from model_library.base import (
|
|
7
|
+
DelegateConfig,
|
|
8
|
+
DelegateOnly,
|
|
9
|
+
LLMConfig,
|
|
10
|
+
)
|
|
11
|
+
from model_library.register_models import register_provider
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@register_provider("xiaomi")
|
|
15
|
+
class XiaomiModel(DelegateOnly):
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
model_name: str,
|
|
19
|
+
provider: Literal["xiaomi"] = "xiaomi",
|
|
20
|
+
*,
|
|
21
|
+
config: LLMConfig | None = None,
|
|
22
|
+
):
|
|
23
|
+
super().__init__(model_name, provider, config=config)
|
|
24
|
+
|
|
25
|
+
# https://platform.xiaomimimo.com/#/docs/quick-start/first-api-call
|
|
26
|
+
self.init_delegate(
|
|
27
|
+
config=config,
|
|
28
|
+
delegate_config=DelegateConfig(
|
|
29
|
+
base_url="https://api.xiaomimimo.com/v1",
|
|
30
|
+
api_key=SecretStr(model_library_settings.XIAOMI_API_KEY),
|
|
31
|
+
),
|
|
32
|
+
use_completions=True,
|
|
33
|
+
delegate_provider="openai",
|
|
34
|
+
)
|
model_library/register_models.py
CHANGED
|
@@ -170,6 +170,7 @@ class DefaultParameters(BaseModel):
|
|
|
170
170
|
top_p: float | None = None
|
|
171
171
|
top_k: int | None = None
|
|
172
172
|
reasoning_effort: str | bool | None = None
|
|
173
|
+
compute_effort: str | bool | None = None
|
|
173
174
|
|
|
174
175
|
|
|
175
176
|
class RawModelConfig(BaseModel):
|
|
@@ -338,6 +339,10 @@ def _register_models() -> ModelRegistry:
|
|
|
338
339
|
copy.slug = key.replace("/", "_")
|
|
339
340
|
copy.full_key = key
|
|
340
341
|
copy.alternative_keys = []
|
|
342
|
+
copy.provider_properties = ProviderProperties.model_validate(
|
|
343
|
+
provider_properties
|
|
344
|
+
)
|
|
345
|
+
|
|
341
346
|
registry[key] = copy
|
|
342
347
|
|
|
343
348
|
return registry
|
model_library/registry_utils.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
|
+
import tiktoken
|
|
1
2
|
from functools import cache
|
|
2
3
|
from pathlib import Path
|
|
3
4
|
from typing import TypedDict
|
|
4
5
|
|
|
5
|
-
import tiktoken
|
|
6
|
-
|
|
7
6
|
from model_library.base import (
|
|
8
7
|
LLM,
|
|
9
8
|
LLMConfig,
|
|
@@ -235,6 +234,14 @@ def get_model_names(
|
|
|
235
234
|
)
|
|
236
235
|
|
|
237
236
|
|
|
237
|
+
"""
|
|
238
|
+
everything below this comment is included for legacy support of caselaw/corpfin custom models.
|
|
239
|
+
@orestes please remove this as part of the migration to a standard CorpFin harness.
|
|
240
|
+
"""
|
|
241
|
+
|
|
242
|
+
DEFAULT_CONTEXT_WINDOW = 128_000
|
|
243
|
+
|
|
244
|
+
|
|
238
245
|
@cache
|
|
239
246
|
def _get_tiktoken_encoder():
|
|
240
247
|
"""Get cached tiktoken encoder for consistent tokenization."""
|
|
@@ -257,7 +264,7 @@ def auto_trim_document(
|
|
|
257
264
|
Trimmed document, or original document if trimming isn't needed
|
|
258
265
|
"""
|
|
259
266
|
|
|
260
|
-
max_tokens = get_max_document_tokens(model_name)
|
|
267
|
+
max_tokens = get_max_document_tokens(model_name) or DEFAULT_CONTEXT_WINDOW
|
|
261
268
|
|
|
262
269
|
encoding = _get_tiktoken_encoder()
|
|
263
270
|
tokens = encoding.encode(document)
|
|
@@ -284,5 +291,5 @@ def get_max_document_tokens(model_name: str, output_buffer: int = 10000) -> int:
|
|
|
284
291
|
# Import here to avoid circular imports
|
|
285
292
|
from model_library.utils import get_context_window_for_model
|
|
286
293
|
|
|
287
|
-
context_window = get_context_window_for_model(model_name)
|
|
294
|
+
context_window = get_context_window_for_model(model_name) or DEFAULT_CONTEXT_WINDOW
|
|
288
295
|
return context_window - output_buffer
|
model_library/utils.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from collections.abc import Mapping, Sequence
|
|
3
|
-
|
|
4
3
|
import httpx
|
|
5
4
|
from anthropic import AsyncAnthropic
|
|
6
5
|
from openai import AsyncOpenAI
|
|
@@ -75,16 +74,15 @@ def create_anthropic_client_with_defaults(
|
|
|
75
74
|
)
|
|
76
75
|
|
|
77
76
|
|
|
78
|
-
def get_context_window_for_model(model_name: str
|
|
77
|
+
def get_context_window_for_model(model_name: str) -> int | None:
|
|
79
78
|
"""
|
|
80
79
|
Get the context window for a model by looking up its configuration from the registry.
|
|
81
80
|
|
|
82
81
|
Args:
|
|
83
82
|
model_name: The name of the model in the registry (e.g., "openai/gpt-4o-mini-2024-07-18" or "azure/gpt-4o-mini-2024-07-18")
|
|
84
|
-
default: Default context window to return if model not found or missing context_window
|
|
85
83
|
|
|
86
84
|
Returns:
|
|
87
|
-
Context window size in tokens
|
|
85
|
+
Context window size in tokens (or `None` if not found)
|
|
88
86
|
"""
|
|
89
87
|
# import here to avoid circular imports
|
|
90
88
|
from model_library.register_models import get_model_registry
|
|
@@ -98,7 +96,6 @@ def get_context_window_for_model(model_name: str, default: int = 128_000) -> int
|
|
|
98
96
|
return model_config.properties.context_window
|
|
99
97
|
else:
|
|
100
98
|
logger.warning(
|
|
101
|
-
f"Model {model_name} not found in registry or missing context_window
|
|
102
|
-
f"using default context length of {default}"
|
|
99
|
+
f"Model {model_name} not found in registry or missing context_window"
|
|
103
100
|
)
|
|
104
|
-
return
|
|
101
|
+
return None
|
|
@@ -4,12 +4,12 @@ model_library/file_utils.py,sha256=FAZRRtDT8c4Rjfoj64Te3knEHggXAAfRRuS8WLCsSe8,3
|
|
|
4
4
|
model_library/logging.py,sha256=rZrrVQlEmyZzvKx6nIOR8bKHl49wQIIW5c36Zqcigm4,888
|
|
5
5
|
model_library/model_utils.py,sha256=l8oCltGeimMGtnne_3Q1EguVtzCj61UMsLsma-1czwg,753
|
|
6
6
|
model_library/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
model_library/register_models.py,sha256
|
|
8
|
-
model_library/registry_utils.py,sha256=
|
|
7
|
+
model_library/register_models.py,sha256=xSIQDwsIuZNbOPD-EudnvxHXkCu1xnAWyLek_laGntU,13902
|
|
8
|
+
model_library/registry_utils.py,sha256=UmFLG0PBk9x52nX7kXF0OYOiYNJT_RyaEN-JEthKAWk,9804
|
|
9
9
|
model_library/settings.py,sha256=6xD9RADiiLWDk3xafYeCV9_4ZwxA2vYzpul-VP3qQlQ,867
|
|
10
|
-
model_library/utils.py,sha256=
|
|
10
|
+
model_library/utils.py,sha256=NcSyQW6173HjExaNDsEQxtNbXVuW3dj-2x03URGYzSc,3152
|
|
11
11
|
model_library/base/__init__.py,sha256=TtxCXGUtkEqWZNMMofLPuC4orN7Ja2hemtbtHitt_UA,266
|
|
12
|
-
model_library/base/base.py,sha256=
|
|
12
|
+
model_library/base/base.py,sha256=yZkRz2DllCoXZnuJ9t-9oa17jr6_fTpPqRDF919TwyI,20690
|
|
13
13
|
model_library/base/batch.py,sha256=-jd6L0ECc5pkj73zoX2ZYcv_9iQdqxEi1kEilwaXWSA,2895
|
|
14
14
|
model_library/base/delegate_only.py,sha256=LYmci02i3ShQuF_3N0gJsUySVvFB__PEAgTQQryp0nA,4764
|
|
15
15
|
model_library/base/input.py,sha256=JrnvBZ_xLcEmaMjnOfUS6GFV0QWtCGpJq0RQQL2YBG8,1934
|
|
@@ -17,10 +17,10 @@ model_library/base/output.py,sha256=z8x3-0a3seRBuNwRDWgzdXjKS5bSASuR01qoRTmDoWI,
|
|
|
17
17
|
model_library/base/utils.py,sha256=AjVeC41vg8vbGGL0fQmSGtnnTuZ589Xl7ljv0N1Jzd0,3330
|
|
18
18
|
model_library/config/README.md,sha256=i8_wHnlI6uHIqWN9fYBkDCglZM2p5ZMVD3SLlxiwUVk,4274
|
|
19
19
|
model_library/config/ai21labs_models.yaml,sha256=ZWHhk1cep2GQIYHqkTS_0152mF3oZg2tSzMPmvfMRSI,2478
|
|
20
|
-
model_library/config/alibaba_models.yaml,sha256=
|
|
21
|
-
model_library/config/all_models.json,sha256=
|
|
20
|
+
model_library/config/alibaba_models.yaml,sha256=TkGIPVW0cUVfDeCr9Pz5bWt4qUutrUamhJ0ukUz_dPg,1958
|
|
21
|
+
model_library/config/all_models.json,sha256=Nv8EMbF95Qln7fmNxn9iFSrh1FxGNoM1_dvdQxgRe4M,540907
|
|
22
22
|
model_library/config/amazon_models.yaml,sha256=HgLmhpfedHCQtkPEviEJCBbAb-dNQPOnVtf4UnwrDds,7654
|
|
23
|
-
model_library/config/anthropic_models.yaml,sha256=
|
|
23
|
+
model_library/config/anthropic_models.yaml,sha256=P_ij3Y_scivwsPhwk5KcuaBb9DYsNVl5lDHh_R56F5I,10535
|
|
24
24
|
model_library/config/cohere_models.yaml,sha256=ZfWrS1K45Hxd5nT_gpP5YGAovJcBIlLNIdaRyE3V-7o,5022
|
|
25
25
|
model_library/config/deepseek_models.yaml,sha256=4CCrf-4UPBgFCrS6CQa3vzNiaYlD4B6dFJFK_kIYBWY,1156
|
|
26
26
|
model_library/config/dummy_model.yaml,sha256=lImYJBtBVJk_jgnLbkuSyOshQphVlYCMkw-UiJIBYhY,877
|
|
@@ -29,17 +29,18 @@ model_library/config/google_models.yaml,sha256=Rg127nsBbHpk62X7WBq2ckdHo0bwYM0NV
|
|
|
29
29
|
model_library/config/inception_models.yaml,sha256=YCqfQlkH_pTdHIKee5dP_aRFXw_fTIEQCpUvX2bwO0M,560
|
|
30
30
|
model_library/config/kimi_models.yaml,sha256=336jcbMhcKDHFNxzRIYxFTV7v2W_93y2sY-IkiYtkuo,1340
|
|
31
31
|
model_library/config/minimax_models.yaml,sha256=gWTuTcl1-zyCF6KRuU6DSre2Cw5gXC-TeKV2Qp4StnQ,1263
|
|
32
|
-
model_library/config/mistral_models.yaml,sha256=
|
|
32
|
+
model_library/config/mistral_models.yaml,sha256=p5mlzImOBVno_NFk6jsA6jRVUHkc458kSqXuhDn2hsE,5199
|
|
33
33
|
model_library/config/openai_models.yaml,sha256=pKsYFF4TnuWnAJVby6Bw6JFJhr6rbkt75N4VIg40iO4,25061
|
|
34
34
|
model_library/config/perplexity_models.yaml,sha256=WUDqhLvnB0kQhCCwPv19FYLHozet3m33Spdo6bGff3Q,2336
|
|
35
|
-
model_library/config/together_models.yaml,sha256=
|
|
35
|
+
model_library/config/together_models.yaml,sha256=qPtYWZ5kb373vuDCrHjXrfU8oWUP-RjqKvxVqhKMyR0,23904
|
|
36
36
|
model_library/config/xai_models.yaml,sha256=2KRNNQy3kV-4xxSfhj7Uhp9TZF-S5qPlM8Ef-04zv8Y,7985
|
|
37
|
-
model_library/config/
|
|
37
|
+
model_library/config/xiaomi_models.yaml,sha256=G1TnVJ6uar69NY2t5vH2L_Fq1AINoNVNngzgL04T908,1166
|
|
38
|
+
model_library/config/zai_models.yaml,sha256=2i4IXdSpta8uVY0q_OhHIfvHZ4o0jI6nvn1AMh0cqkw,2468
|
|
38
39
|
model_library/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
40
|
model_library/providers/ai21labs.py,sha256=xXqgk-bAC6WTZ7rvN9eHoE_IUukaU_qF3eLw-TX8gTY,6356
|
|
40
41
|
model_library/providers/alibaba.py,sha256=0oMgs09BmRknuY5E66OL987xjtIrLHMh2jzSce8UiCY,3226
|
|
41
42
|
model_library/providers/amazon.py,sha256=VvC0-DoZWXx-obMMkw9YLQOwwcMNYnCSIGrMGkun7tI,15589
|
|
42
|
-
model_library/providers/anthropic.py,sha256=
|
|
43
|
+
model_library/providers/anthropic.py,sha256=a5_rO2nBirlc79gFpLkaUlC3dfSnk9crNlebOi_xYyU,26046
|
|
43
44
|
model_library/providers/azure.py,sha256=mEwoGz8ajwI6MW464spPoa36uFsPWaG_4WtwQZWkxuE,1784
|
|
44
45
|
model_library/providers/cohere.py,sha256=9ZGNekBlvubVw8o-emxdIhre_00iq5saENCVaD74Jqc,944
|
|
45
46
|
model_library/providers/deepseek.py,sha256=S7Ud80vKVJMCkovaLEtZEPr1jI_yfOq-R0Ln-jWwqdo,1040
|
|
@@ -53,18 +54,20 @@ model_library/providers/openrouter.py,sha256=FRKyeUyCSSQFCvoKvVbKzYJ_SL60jEU-Y6w
|
|
|
53
54
|
model_library/providers/perplexity.py,sha256=GT7nfu42ATbG7Eu_J8nrIwJMr_BpFpLOaQVOQG4gtRk,961
|
|
54
55
|
model_library/providers/together.py,sha256=M1xAsbBd-41a3yHj2MDrysrIav9pp7eqGYUo16a4L8s,1911
|
|
55
56
|
model_library/providers/vals.py,sha256=lQXaQ6494r0Ujv2B9epLfVNdWk1nkMOZt1LKgAYsXhc,10084
|
|
57
|
+
model_library/providers/vercel.py,sha256=CZOr7ps0WRUQnoLDoD6ICu7X-G7-dHtHDQ_8ciSqiak,951
|
|
56
58
|
model_library/providers/xai.py,sha256=WzAV1WnUrx9UenLGkP77JslKDFVz1T0g25PfCYzMOlw,10687
|
|
59
|
+
model_library/providers/xiaomi.py,sha256=OJKz_dbolWt3lPkllw91Ntx1uoLQL_eM8VOEccN_ZDs,954
|
|
57
60
|
model_library/providers/zai.py,sha256=C-0Q41vEhsb6PIbFIW4kXGSx1-mTuB-czB7Vq1_KBqk,1924
|
|
58
61
|
model_library/providers/google/__init__.py,sha256=ypuLVL_QJEQ7C3S47FhC9y4wyawYOdGikAViJmACI0U,115
|
|
59
62
|
model_library/providers/google/batch.py,sha256=kqe9ponDe9Tkrh_-4kWd5-47wYf4V_XSKXZnrFOzAHc,10187
|
|
60
|
-
model_library/providers/google/google.py,sha256=
|
|
63
|
+
model_library/providers/google/google.py,sha256=mSbzEujS45vexUsDCL7gdRVgv6yZGDqE6-iDt2cGHDI,18766
|
|
61
64
|
model_library/retriers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
62
65
|
model_library/retriers/backoff.py,sha256=vmpzLje51yfvSgAEST_yNrL87_J_0pmI-jwnRek5Ie0,2296
|
|
63
66
|
model_library/retriers/base.py,sha256=zmiGZq4yvJx7aE0FD8rqZuJxZavhKTXLjt58z5TKJNw,6832
|
|
64
67
|
model_library/retriers/token.py,sha256=ypHzuJjEORsv931sjEI1lejRO8Ju_ljSrCGV4pgaa34,16794
|
|
65
68
|
model_library/retriers/utils.py,sha256=fhtQ64aT8mZcwZ8aTXnLRU4HVIf8JQ3CfmArvxZPWvQ,279
|
|
66
|
-
model_library-0.1.
|
|
67
|
-
model_library-0.1.
|
|
68
|
-
model_library-0.1.
|
|
69
|
-
model_library-0.1.
|
|
70
|
-
model_library-0.1.
|
|
69
|
+
model_library-0.1.9.dist-info/licenses/LICENSE,sha256=x6mf4o7U_wHaaqcfxoU-0R6uYJLbqL_TNuoULP3asaA,1070
|
|
70
|
+
model_library-0.1.9.dist-info/METADATA,sha256=1mJTenP1YD-twt90UzMzZNEFRKOM4lVQ3dNve34jvs8,7024
|
|
71
|
+
model_library-0.1.9.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
72
|
+
model_library-0.1.9.dist-info/top_level.txt,sha256=HtQYxA_7RP8UT35I6VcUw20L6edI0Zf2t5Ys1uDGVjs,14
|
|
73
|
+
model_library-0.1.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|