model-library 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_library/base/base.py +2 -0
- model_library/base/output.py +24 -9
- model_library/base/utils.py +27 -5
- model_library/config/README.md +169 -0
- model_library/config/ai21labs_models.yaml +11 -11
- model_library/config/alibaba_models.yaml +21 -22
- model_library/config/all_models.json +4623 -2599
- model_library/config/amazon_models.yaml +100 -102
- model_library/config/anthropic_models.yaml +43 -52
- model_library/config/cohere_models.yaml +25 -24
- model_library/config/deepseek_models.yaml +28 -25
- model_library/config/dummy_model.yaml +9 -7
- model_library/config/fireworks_models.yaml +86 -56
- model_library/config/google_models.yaml +146 -126
- model_library/config/inception_models.yaml +6 -6
- model_library/config/kimi_models.yaml +13 -14
- model_library/config/minimax_models.yaml +37 -0
- model_library/config/mistral_models.yaml +85 -29
- model_library/config/openai_models.yaml +192 -150
- model_library/config/perplexity_models.yaml +10 -23
- model_library/config/together_models.yaml +115 -104
- model_library/config/xai_models.yaml +47 -79
- model_library/config/zai_models.yaml +23 -15
- model_library/exceptions.py +7 -16
- model_library/providers/amazon.py +32 -17
- model_library/providers/minimax.py +33 -0
- model_library/providers/mistral.py +10 -1
- model_library/providers/openai.py +2 -6
- model_library/register_models.py +36 -36
- model_library/registry_utils.py +78 -16
- model_library/utils.py +2 -2
- {model_library-0.1.3.dist-info → model_library-0.1.5.dist-info}/METADATA +2 -2
- model_library-0.1.5.dist-info/RECORD +64 -0
- model_library-0.1.3.dist-info/RECORD +0 -61
- {model_library-0.1.3.dist-info → model_library-0.1.5.dist-info}/WHEEL +0 -0
- {model_library-0.1.3.dist-info → model_library-0.1.5.dist-info}/licenses/LICENSE +0 -0
- {model_library-0.1.3.dist-info → model_library-0.1.5.dist-info}/top_level.txt +0 -0
model_library/base/base.py
CHANGED
|
@@ -64,6 +64,7 @@ class LLMConfig(BaseModel):
|
|
|
64
64
|
max_tokens: int = DEFAULT_MAX_TOKENS
|
|
65
65
|
temperature: float | None = None
|
|
66
66
|
top_p: float | None = None
|
|
67
|
+
top_k: int | None = None
|
|
67
68
|
reasoning: bool = False
|
|
68
69
|
reasoning_effort: str | None = None
|
|
69
70
|
supports_images: bool = False
|
|
@@ -106,6 +107,7 @@ class LLM(ABC):
|
|
|
106
107
|
self.max_tokens: int = config.max_tokens
|
|
107
108
|
self.temperature: float | None = config.temperature
|
|
108
109
|
self.top_p: float | None = config.top_p
|
|
110
|
+
self.top_k: int | None = config.top_k
|
|
109
111
|
|
|
110
112
|
self.reasoning: bool = config.reasoning
|
|
111
113
|
self.reasoning_effort: str | None = config.reasoning_effort
|
model_library/base/output.py
CHANGED
|
@@ -9,9 +9,7 @@ from pydantic import BaseModel, Field, computed_field, field_validator
|
|
|
9
9
|
from typing_extensions import override
|
|
10
10
|
|
|
11
11
|
from model_library.base.input import InputItem, ToolCall
|
|
12
|
-
from model_library.base.utils import
|
|
13
|
-
sum_optional,
|
|
14
|
-
)
|
|
12
|
+
from model_library.base.utils import add_optional
|
|
15
13
|
from model_library.utils import truncate_str
|
|
16
14
|
|
|
17
15
|
|
|
@@ -42,10 +40,14 @@ class QueryResultCost(BaseModel):
|
|
|
42
40
|
reasoning: float | None = None
|
|
43
41
|
cache_read: float | None = None
|
|
44
42
|
cache_write: float | None = None
|
|
43
|
+
total_override: float | None = None
|
|
45
44
|
|
|
46
45
|
@computed_field
|
|
47
46
|
@property
|
|
48
47
|
def total(self) -> float:
|
|
48
|
+
if self.total_override is not None:
|
|
49
|
+
return self.total_override
|
|
50
|
+
|
|
49
51
|
return sum(
|
|
50
52
|
filter(
|
|
51
53
|
None,
|
|
@@ -86,6 +88,16 @@ class QueryResultCost(BaseModel):
|
|
|
86
88
|
)
|
|
87
89
|
)
|
|
88
90
|
|
|
91
|
+
def __add__(self, other: "QueryResultCost") -> "QueryResultCost":
|
|
92
|
+
return QueryResultCost(
|
|
93
|
+
input=self.input + other.input,
|
|
94
|
+
output=self.output + other.output,
|
|
95
|
+
reasoning=add_optional(self.reasoning, other.reasoning),
|
|
96
|
+
cache_read=add_optional(self.cache_read, other.cache_read),
|
|
97
|
+
cache_write=add_optional(self.cache_write, other.cache_write),
|
|
98
|
+
total_override=add_optional(self.total_override, other.total_override),
|
|
99
|
+
)
|
|
100
|
+
|
|
89
101
|
@override
|
|
90
102
|
def __repr__(self):
|
|
91
103
|
use_cents = self.total < 1
|
|
@@ -150,17 +162,20 @@ class QueryResultMetadata(BaseModel):
|
|
|
150
162
|
return QueryResultMetadata(
|
|
151
163
|
in_tokens=self.in_tokens + other.in_tokens,
|
|
152
164
|
out_tokens=self.out_tokens + other.out_tokens,
|
|
153
|
-
reasoning_tokens=
|
|
154
|
-
self.reasoning_tokens, other.reasoning_tokens
|
|
165
|
+
reasoning_tokens=cast(
|
|
166
|
+
int | None, add_optional(self.reasoning_tokens, other.reasoning_tokens)
|
|
155
167
|
),
|
|
156
|
-
cache_read_tokens=
|
|
157
|
-
|
|
168
|
+
cache_read_tokens=cast(
|
|
169
|
+
int | None,
|
|
170
|
+
add_optional(self.cache_read_tokens, other.cache_read_tokens),
|
|
158
171
|
),
|
|
159
|
-
cache_write_tokens=
|
|
160
|
-
|
|
172
|
+
cache_write_tokens=cast(
|
|
173
|
+
int | None,
|
|
174
|
+
add_optional(self.cache_write_tokens, other.cache_write_tokens),
|
|
161
175
|
),
|
|
162
176
|
duration_seconds=self.default_duration_seconds
|
|
163
177
|
+ other.default_duration_seconds,
|
|
178
|
+
cost=cast(QueryResultCost | None, add_optional(self.cost, other.cost)),
|
|
164
179
|
)
|
|
165
180
|
|
|
166
181
|
@override
|
model_library/base/utils.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Sequence, cast
|
|
1
|
+
from typing import Sequence, TypeVar, cast
|
|
2
2
|
|
|
3
3
|
from model_library.base.input import (
|
|
4
4
|
FileBase,
|
|
@@ -8,17 +8,39 @@ from model_library.base.input import (
|
|
|
8
8
|
ToolResult,
|
|
9
9
|
)
|
|
10
10
|
from model_library.utils import truncate_str
|
|
11
|
+
from pydantic import BaseModel
|
|
11
12
|
|
|
13
|
+
T = TypeVar("T", bound=BaseModel)
|
|
12
14
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
+
|
|
16
|
+
def add_optional(
|
|
17
|
+
a: int | float | T | None, b: int | float | T | None
|
|
18
|
+
) -> int | float | T | None:
|
|
19
|
+
"""Add two optional objects, returning None if both are None.
|
|
15
20
|
|
|
16
21
|
Preserves None to indicate "unknown/not provided" when both inputs are None,
|
|
17
|
-
otherwise
|
|
22
|
+
otherwise returns the non-None value or their sum.
|
|
18
23
|
"""
|
|
19
24
|
if a is None and b is None:
|
|
20
25
|
return None
|
|
21
|
-
|
|
26
|
+
|
|
27
|
+
if a is None or b is None:
|
|
28
|
+
return a or b
|
|
29
|
+
|
|
30
|
+
if isinstance(a, (int, float)) and isinstance(b, (int, float)):
|
|
31
|
+
return a + b
|
|
32
|
+
|
|
33
|
+
# NOTE: Ensure that the subtypes are the same so we can use the __add__ method just from one
|
|
34
|
+
if type(a) is type(b):
|
|
35
|
+
add_method = getattr(a, "__add__", None)
|
|
36
|
+
if add_method is not None:
|
|
37
|
+
return add_method(b)
|
|
38
|
+
else:
|
|
39
|
+
raise ValueError(
|
|
40
|
+
f"Cannot add {type(a)} and {type(b)} because they are not the same subclass"
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
return None
|
|
22
44
|
|
|
23
45
|
|
|
24
46
|
def get_pretty_input_types(input: Sequence["InputItem"], verbose: bool = False) -> str:
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# Model Library Configuration
|
|
2
|
+
|
|
3
|
+
This directory contains YAML configuration files that define all available models in the model-proxy library.
|
|
4
|
+
|
|
5
|
+
## Configuration Structure
|
|
6
|
+
|
|
7
|
+
Each model configuration is organized into distinct sections:
|
|
8
|
+
|
|
9
|
+
### Core Sections
|
|
10
|
+
|
|
11
|
+
#### `properties`
|
|
12
|
+
Model-specific technical characteristics and capabilities:
|
|
13
|
+
- `context_window`: Maximum context window in tokens
|
|
14
|
+
- `max_tokens`: Maximum output tokens the model can generate
|
|
15
|
+
- `training_cutoff`: Training data cutoff date (string or null)
|
|
16
|
+
- `reasoning_model`: Whether the model is a reasoning/thinking model
|
|
17
|
+
|
|
18
|
+
```yaml
|
|
19
|
+
properties:
|
|
20
|
+
context_window: 200_000
|
|
21
|
+
max_tokens: 32_000
|
|
22
|
+
training_cutoff: "2025-03"
|
|
23
|
+
reasoning_model: false
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
#### `supports`
|
|
27
|
+
Feature support flags indicating model capabilities:
|
|
28
|
+
- `images`: Supports image inputs
|
|
29
|
+
- `videos`: Supports video inputs
|
|
30
|
+
- `files`: Supports file inputs
|
|
31
|
+
- `batch`: Supports batch requests
|
|
32
|
+
- `temperature`: Supports temperature parameter
|
|
33
|
+
- `tools`: Supports tool/function calling
|
|
34
|
+
|
|
35
|
+
```yaml
|
|
36
|
+
supports:
|
|
37
|
+
images: true
|
|
38
|
+
files: true
|
|
39
|
+
tools: true
|
|
40
|
+
batch: true
|
|
41
|
+
temperature: true
|
|
42
|
+
videos: false
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
#### `metadata`
|
|
46
|
+
Vals platform-specific metadata for model availability and status:
|
|
47
|
+
- `deprecated`: Model is deprecated and should not be used for new projects
|
|
48
|
+
- `available_for_everyone`: Model is available to all users
|
|
49
|
+
- `available_as_evaluator`: Model can be used as an evaluator
|
|
50
|
+
- `ignored_for_cost`: Exclude from cost calculations
|
|
51
|
+
|
|
52
|
+
```yaml
|
|
53
|
+
metadata:
|
|
54
|
+
deprecated: false
|
|
55
|
+
available_for_everyone: true
|
|
56
|
+
available_as_evaluator: false
|
|
57
|
+
ignored_for_cost: false
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
#### Other Sections
|
|
61
|
+
|
|
62
|
+
- `costs_per_million_token`: Pricing information (input, output, cache, batch, context)
|
|
63
|
+
- `default_parameters`: Default parameter values (temperature, top_p, reasoning_effort)
|
|
64
|
+
- `provider_properties`: Provider-specific configuration options
|
|
65
|
+
- `alternative_keys`: Alternative model identifiers/aliases
|
|
66
|
+
|
|
67
|
+
## Configuration Inheritance
|
|
68
|
+
|
|
69
|
+
Configurations support hierarchical inheritance through `base-config` blocks:
|
|
70
|
+
|
|
71
|
+
### 1. Provider-level base-config
|
|
72
|
+
```yaml
|
|
73
|
+
base-config:
|
|
74
|
+
company: Anthropic
|
|
75
|
+
open_source: false
|
|
76
|
+
supports:
|
|
77
|
+
images: true
|
|
78
|
+
tools: true
|
|
79
|
+
metadata:
|
|
80
|
+
available_for_everyone: true
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### 2. Model-block base-config
|
|
84
|
+
```yaml
|
|
85
|
+
claude-4-models:
|
|
86
|
+
base-config:
|
|
87
|
+
supports:
|
|
88
|
+
temperature: true
|
|
89
|
+
default_parameters:
|
|
90
|
+
temperature: 1
|
|
91
|
+
|
|
92
|
+
anthropic/claude-opus-4-1-20250805:
|
|
93
|
+
# Inherits from both provider and block base-configs
|
|
94
|
+
properties:
|
|
95
|
+
context_window: 200_000
|
|
96
|
+
max_tokens: 32_000
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### 3. Individual model overrides
|
|
100
|
+
Models can override any inherited configuration:
|
|
101
|
+
|
|
102
|
+
```yaml
|
|
103
|
+
anthropic/claude-opus-4-1-20250805:
|
|
104
|
+
properties:
|
|
105
|
+
context_window: 200_000
|
|
106
|
+
max_tokens: 32_000
|
|
107
|
+
metadata:
|
|
108
|
+
available_for_everyone: false # Override base-config
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## Alternative Keys
|
|
112
|
+
|
|
113
|
+
Models can define alternative identifiers that map to the same configuration:
|
|
114
|
+
|
|
115
|
+
```yaml
|
|
116
|
+
anthropic/claude-3-5-sonnet-20241022:
|
|
117
|
+
label: Claude 3.5 Sonnet Latest
|
|
118
|
+
properties:
|
|
119
|
+
context_window: 200_000
|
|
120
|
+
max_tokens: 8_192
|
|
121
|
+
alternative_keys:
|
|
122
|
+
- anthropic/claude-3-5-sonnet-latest
|
|
123
|
+
- anthropic/claude-3.5-sonnet-latest
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Alternative keys can also override configuration:
|
|
127
|
+
|
|
128
|
+
```yaml
|
|
129
|
+
alternative_keys:
|
|
130
|
+
- anthropic/claude-opus-4-1-20250805-thinking:
|
|
131
|
+
properties:
|
|
132
|
+
reasoning_model: true
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## Generating all_models.json
|
|
136
|
+
|
|
137
|
+
After making changes to any YAML configuration file, regenerate the compiled configuration:
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
make config
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
This generates `all_models.json` which is used by the model registry at runtime.
|
|
144
|
+
|
|
145
|
+
## Schema Validation
|
|
146
|
+
|
|
147
|
+
The configuration is validated using Pydantic models defined in `register_models.py`:
|
|
148
|
+
- `Properties` - Model properties
|
|
149
|
+
- `Supports` - Feature support flags
|
|
150
|
+
- `Metadata` - Platform metadata
|
|
151
|
+
- `DefaultParameters` - Default parameter values
|
|
152
|
+
- `CostProperties` - Pricing information
|
|
153
|
+
- `ProviderProperties` - Provider-specific config (dynamically generated)
|
|
154
|
+
|
|
155
|
+
## Migration Notes
|
|
156
|
+
|
|
157
|
+
### Previous Structure (Deprecated)
|
|
158
|
+
The old configuration used `class_properties` which mixed support flags and metadata:
|
|
159
|
+
|
|
160
|
+
```yaml
|
|
161
|
+
# OLD - Do not use
|
|
162
|
+
class_properties:
|
|
163
|
+
supports_images: true
|
|
164
|
+
supports_batch_requests: true
|
|
165
|
+
deprecated: false
|
|
166
|
+
available_for_everyone: true
|
|
167
|
+
properties:
|
|
168
|
+
max_token_output: 32_000
|
|
169
|
+
```
|
|
@@ -1,25 +1,25 @@
|
|
|
1
1
|
base-config:
|
|
2
2
|
company: AI21 Labs
|
|
3
3
|
documentation_url: https://www.ai21.com/jamba
|
|
4
|
-
|
|
5
|
-
|
|
4
|
+
supports:
|
|
5
|
+
images: false
|
|
6
|
+
files: false
|
|
7
|
+
metadata:
|
|
6
8
|
available_as_evaluator: false
|
|
7
|
-
supports_files: false
|
|
8
9
|
available_for_everyone: false
|
|
9
10
|
ignored_for_cost: false
|
|
10
11
|
properties:
|
|
11
12
|
context_window: null
|
|
12
|
-
|
|
13
|
+
max_tokens: 4096
|
|
13
14
|
training_cutoff: null
|
|
14
15
|
|
|
15
16
|
ai21labs-models:
|
|
16
17
|
base-config:
|
|
17
18
|
open_source: true
|
|
18
|
-
|
|
19
|
-
|
|
19
|
+
supports:
|
|
20
|
+
temperature: true
|
|
20
21
|
default_parameters:
|
|
21
22
|
temperature: 0.4
|
|
22
|
-
max_output_tokens: 4096
|
|
23
23
|
|
|
24
24
|
ai21labs/jamba-large-1.7:
|
|
25
25
|
label: Jamba 1.7 Large
|
|
@@ -47,7 +47,7 @@ ai21labs-models:
|
|
|
47
47
|
label: Jamba 1.6 Large
|
|
48
48
|
description: The most powerful and efficient long context model
|
|
49
49
|
release_date: 2025-03-06
|
|
50
|
-
|
|
50
|
+
metadata:
|
|
51
51
|
deprecated: true
|
|
52
52
|
properties:
|
|
53
53
|
context_window: 256_000
|
|
@@ -61,7 +61,7 @@ ai21labs-models:
|
|
|
61
61
|
label: Jamba 1.6 Mini
|
|
62
62
|
description: The most powerful and efficient long context model
|
|
63
63
|
release_date: 2025-03-06
|
|
64
|
-
|
|
64
|
+
metadata:
|
|
65
65
|
deprecated: true
|
|
66
66
|
properties:
|
|
67
67
|
context_window: 256_000
|
|
@@ -77,7 +77,7 @@ ai21labs-models:
|
|
|
77
77
|
release_date: 2024-08-22
|
|
78
78
|
properties:
|
|
79
79
|
context_window: 256_000
|
|
80
|
-
|
|
80
|
+
metadata:
|
|
81
81
|
deprecated: true
|
|
82
82
|
costs_per_million_token:
|
|
83
83
|
input: 2.00
|
|
@@ -91,7 +91,7 @@ ai21labs-models:
|
|
|
91
91
|
release_date: 2024-08-22
|
|
92
92
|
properties:
|
|
93
93
|
context_window: 256_000
|
|
94
|
-
|
|
94
|
+
metadata:
|
|
95
95
|
deprecated: true
|
|
96
96
|
costs_per_million_token:
|
|
97
97
|
input: 0.2
|
|
@@ -3,13 +3,11 @@ qwen-models:
|
|
|
3
3
|
company: Alibaba
|
|
4
4
|
open_source: false
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
supports:
|
|
7
|
+
temperature: true
|
|
8
|
+
metadata:
|
|
7
9
|
available_for_everyone: false
|
|
8
10
|
available_as_evaluator: false
|
|
9
|
-
supports_metadata: true
|
|
10
|
-
supports_files: false
|
|
11
|
-
ignored_for_cost: false
|
|
12
|
-
supports_temperature: true
|
|
13
11
|
default_parameters:
|
|
14
12
|
temperature: 0.7
|
|
15
13
|
properties:
|
|
@@ -21,15 +19,16 @@ qwen-models:
|
|
|
21
19
|
release_date: 2025-09-05
|
|
22
20
|
properties:
|
|
23
21
|
context_window: 262_144
|
|
24
|
-
|
|
22
|
+
max_tokens: 65_536
|
|
25
23
|
training_cutoff: ""
|
|
26
24
|
costs_per_million_token:
|
|
27
25
|
input: 1.2
|
|
28
26
|
output: 6
|
|
29
|
-
|
|
27
|
+
supports:
|
|
28
|
+
images: false
|
|
29
|
+
tools: true
|
|
30
|
+
metadata:
|
|
30
31
|
available_for_everyone: false
|
|
31
|
-
supports_images: false
|
|
32
|
-
supports_tools: true
|
|
33
32
|
|
|
34
33
|
alibaba/qwen3-max-2025-09-23:
|
|
35
34
|
label: Qwen 3 Max 2025-09-23
|
|
@@ -37,16 +36,17 @@ qwen-models:
|
|
|
37
36
|
release_date: 2025-09-23
|
|
38
37
|
properties:
|
|
39
38
|
context_window: 262_144
|
|
40
|
-
|
|
39
|
+
max_tokens: 65_536
|
|
41
40
|
training_cutoff: ""
|
|
42
41
|
reasoning_model: true
|
|
43
42
|
costs_per_million_token:
|
|
44
43
|
input: 1.2
|
|
45
44
|
output: 6
|
|
46
|
-
|
|
45
|
+
supports:
|
|
46
|
+
images: false
|
|
47
|
+
tools: true
|
|
48
|
+
metadata:
|
|
47
49
|
available_for_everyone: false
|
|
48
|
-
supports_images: false
|
|
49
|
-
supports_tools: true
|
|
50
50
|
|
|
51
51
|
alibaba/qwen3-max:
|
|
52
52
|
label: Qwen 3 Max
|
|
@@ -54,7 +54,7 @@ qwen-models:
|
|
|
54
54
|
release_date: 2025-09-23
|
|
55
55
|
properties:
|
|
56
56
|
context_window: 262_144
|
|
57
|
-
|
|
57
|
+
max_tokens: 65_536
|
|
58
58
|
training_cutoff: ""
|
|
59
59
|
reasoning_model: false
|
|
60
60
|
costs_per_million_token:
|
|
@@ -67,12 +67,11 @@ qwen-models:
|
|
|
67
67
|
threshold: 32_000
|
|
68
68
|
input: 2.4
|
|
69
69
|
output: 12
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
70
|
+
supports:
|
|
71
|
+
images: false
|
|
72
|
+
tools: true
|
|
73
|
+
metadata:
|
|
73
74
|
available_for_everyone: false
|
|
74
|
-
supports_images: false
|
|
75
|
-
supports_tools: true
|
|
76
75
|
|
|
77
76
|
alibaba/qwen3-vl-plus-2025-09-23:
|
|
78
77
|
label: Qwen 3 VL Plus
|
|
@@ -81,11 +80,11 @@ qwen-models:
|
|
|
81
80
|
release_date: 2025-09-23
|
|
82
81
|
properties:
|
|
83
82
|
context_window: 262_144
|
|
84
|
-
|
|
83
|
+
max_tokens: 32_768
|
|
85
84
|
training_cutoff: ""
|
|
86
85
|
reasoning_model: false
|
|
87
86
|
costs_per_million_token:
|
|
88
87
|
input: 0.2
|
|
89
88
|
output: 1.6
|
|
90
|
-
|
|
91
|
-
|
|
89
|
+
supports:
|
|
90
|
+
images: true
|