model-library 0.1.2__tar.gz → 0.1.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {model_library-0.1.2 → model_library-0.1.4}/PKG-INFO +3 -4
- {model_library-0.1.2 → model_library-0.1.4}/examples/prompt_caching.py +1 -9
- {model_library-0.1.2 → model_library-0.1.4}/model_library/base/base.py +13 -6
- {model_library-0.1.2 → model_library-0.1.4}/model_library/base/output.py +55 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/base/utils.py +3 -2
- model_library-0.1.4/model_library/config/README.md +169 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/config/ai21labs_models.yaml +11 -10
- {model_library-0.1.2 → model_library-0.1.4}/model_library/config/alibaba_models.yaml +21 -22
- {model_library-0.1.2 → model_library-0.1.4}/model_library/config/all_models.json +4708 -2471
- {model_library-0.1.2 → model_library-0.1.4}/model_library/config/amazon_models.yaml +100 -102
- {model_library-0.1.2 → model_library-0.1.4}/model_library/config/anthropic_models.yaml +59 -45
- {model_library-0.1.2 → model_library-0.1.4}/model_library/config/cohere_models.yaml +25 -24
- model_library-0.1.4/model_library/config/deepseek_models.yaml +52 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/config/dummy_model.yaml +9 -7
- {model_library-0.1.2 → model_library-0.1.4}/model_library/config/fireworks_models.yaml +86 -56
- {model_library-0.1.2 → model_library-0.1.4}/model_library/config/google_models.yaml +156 -102
- {model_library-0.1.2 → model_library-0.1.4}/model_library/config/inception_models.yaml +6 -6
- {model_library-0.1.2 → model_library-0.1.4}/model_library/config/kimi_models.yaml +13 -14
- model_library-0.1.4/model_library/config/minimax_models.yaml +37 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/config/mistral_models.yaml +85 -29
- {model_library-0.1.2 → model_library-0.1.4}/model_library/config/openai_models.yaml +192 -159
- {model_library-0.1.2 → model_library-0.1.4}/model_library/config/perplexity_models.yaml +8 -23
- {model_library-0.1.2 → model_library-0.1.4}/model_library/config/together_models.yaml +115 -103
- {model_library-0.1.2 → model_library-0.1.4}/model_library/config/xai_models.yaml +85 -57
- {model_library-0.1.2 → model_library-0.1.4}/model_library/config/zai_models.yaml +23 -15
- {model_library-0.1.2 → model_library-0.1.4}/model_library/exceptions.py +12 -17
- {model_library-0.1.2 → model_library-0.1.4}/model_library/file_utils.py +1 -1
- {model_library-0.1.2 → model_library-0.1.4}/model_library/providers/amazon.py +32 -17
- {model_library-0.1.2 → model_library-0.1.4}/model_library/providers/anthropic.py +2 -6
- {model_library-0.1.2 → model_library-0.1.4}/model_library/providers/google/google.py +35 -29
- model_library-0.1.4/model_library/providers/minimax.py +33 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/providers/mistral.py +10 -1
- {model_library-0.1.2 → model_library-0.1.4}/model_library/providers/openai.py +10 -8
- model_library-0.1.4/model_library/providers/together.py +58 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/register_models.py +36 -38
- {model_library-0.1.2 → model_library-0.1.4}/model_library/registry_utils.py +18 -16
- {model_library-0.1.2 → model_library-0.1.4}/model_library/utils.py +2 -2
- {model_library-0.1.2 → model_library-0.1.4}/model_library.egg-info/PKG-INFO +3 -4
- {model_library-0.1.2 → model_library-0.1.4}/model_library.egg-info/SOURCES.txt +3 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library.egg-info/requires.txt +2 -3
- {model_library-0.1.2 → model_library-0.1.4}/pyproject.toml +2 -3
- {model_library-0.1.2 → model_library-0.1.4}/scripts/run_models.py +13 -7
- {model_library-0.1.2 → model_library-0.1.4}/tests/integration/test_retry.py +4 -4
- {model_library-0.1.2 → model_library-0.1.4}/tests/unit/test_retry.py +29 -2
- model_library-0.1.4/uv.lock +2128 -0
- model_library-0.1.2/model_library/config/deepseek_models.yaml +0 -49
- model_library-0.1.2/model_library/providers/together.py +0 -251
- model_library-0.1.2/uv.lock +0 -1895
- {model_library-0.1.2 → model_library-0.1.4}/.gitattributes +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/.github/workflows/publish.yml +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/.github/workflows/style.yaml +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/.github/workflows/test.yaml +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/.github/workflows/typecheck.yml +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/.gitignore +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/LICENSE +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/Makefile +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/README.md +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/examples/README.md +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/examples/advanced/batch.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/examples/advanced/custom_retrier.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/examples/advanced/deep_research.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/examples/advanced/stress.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/examples/advanced/structured_output.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/examples/advanced/web_search.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/examples/basics.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/examples/data/files.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/examples/data/images.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/examples/embeddings.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/examples/files.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/examples/images.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/examples/setup.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/examples/tool_calls.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/__init__.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/base/__init__.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/base/batch.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/base/delegate_only.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/base/input.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/logging.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/model_utils.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/providers/__init__.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/providers/ai21labs.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/providers/alibaba.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/providers/azure.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/providers/cohere.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/providers/deepseek.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/providers/fireworks.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/providers/google/__init__.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/providers/google/batch.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/providers/inception.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/providers/kimi.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/providers/perplexity.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/providers/vals.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/providers/xai.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/providers/zai.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/py.typed +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library/settings.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library.egg-info/dependency_links.txt +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/model_library.egg-info/top_level.txt +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/scripts/browse_models.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/scripts/config.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/scripts/publish.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/setup.cfg +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/README.md +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/__init__.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/conftest.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/integration/__init__.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/integration/conftest.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/integration/test_batch.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/integration/test_completion.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/integration/test_files.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/integration/test_reasoning.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/integration/test_streaming.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/integration/test_structured_output.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/integration/test_tools.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/test_helpers.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/unit/__init__.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/unit/conftest.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/unit/providers/__init__.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/unit/providers/test_fireworks_provider.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/unit/providers/test_google_provider.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/unit/test_batch.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/unit/test_context_window.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/unit/test_deep_research.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/unit/test_perplexity_provider.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/unit/test_prompt_caching.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/unit/test_registry.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/unit/test_streaming.py +0 -0
- {model_library-0.1.2 → model_library-0.1.4}/tests/unit/test_tools.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: model-library
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.4
|
|
4
4
|
Summary: Model Library for vals.ai
|
|
5
5
|
Author-email: "Vals AI, Inc." <contact@vals.ai>
|
|
6
6
|
License: MIT
|
|
@@ -15,14 +15,13 @@ Requires-Dist: backoff<3.0,>=2.2.1
|
|
|
15
15
|
Requires-Dist: redis<7.0,>=6.2.0
|
|
16
16
|
Requires-Dist: tiktoken==0.11.0
|
|
17
17
|
Requires-Dist: pillow
|
|
18
|
-
Requires-Dist: openai<
|
|
18
|
+
Requires-Dist: openai<3.0,>=2.0
|
|
19
19
|
Requires-Dist: anthropic<1.0,>=0.57.1
|
|
20
|
-
Requires-Dist: together<2.0,>=1.5.25
|
|
21
20
|
Requires-Dist: mistralai<2.0,>=1.9.10
|
|
22
21
|
Requires-Dist: xai-sdk<2.0,>=1.0.0
|
|
23
22
|
Requires-Dist: ai21<5.0,>=4.0.3
|
|
24
23
|
Requires-Dist: boto3<2.0,>=1.38.27
|
|
25
|
-
Requires-Dist: google-genai[aiohttp]
|
|
24
|
+
Requires-Dist: google-genai[aiohttp]>=1.51.0
|
|
26
25
|
Requires-Dist: google-cloud-storage>=1.26.0
|
|
27
26
|
Dynamic: license-file
|
|
28
27
|
|
|
@@ -44,19 +44,11 @@ async def run(model: LLM) -> None:
|
|
|
44
44
|
|
|
45
45
|
async def query_with_logging(tag: str, question: str) -> None:
|
|
46
46
|
user_prompt = f"{task_spec}\n\nQUESTION: {question}"
|
|
47
|
-
|
|
47
|
+
await model.query(
|
|
48
48
|
input=user_prompt,
|
|
49
49
|
system_prompt=system_prefix,
|
|
50
50
|
tools=tools,
|
|
51
51
|
)
|
|
52
|
-
meta = result.metadata
|
|
53
|
-
model.logger.info(
|
|
54
|
-
"%s -> cache_write=%s cache_read=%s uncached_input=%s",
|
|
55
|
-
tag,
|
|
56
|
-
meta.cache_write_tokens,
|
|
57
|
-
meta.cache_read_tokens,
|
|
58
|
-
meta.in_tokens,
|
|
59
|
-
)
|
|
60
52
|
|
|
61
53
|
await query_with_logging(
|
|
62
54
|
"first_query",
|
|
@@ -64,6 +64,7 @@ class LLMConfig(BaseModel):
|
|
|
64
64
|
max_tokens: int = DEFAULT_MAX_TOKENS
|
|
65
65
|
temperature: float | None = None
|
|
66
66
|
top_p: float | None = None
|
|
67
|
+
top_k: int | None = None
|
|
67
68
|
reasoning: bool = False
|
|
68
69
|
reasoning_effort: str | None = None
|
|
69
70
|
supports_images: bool = False
|
|
@@ -106,6 +107,7 @@ class LLM(ABC):
|
|
|
106
107
|
self.max_tokens: int = config.max_tokens
|
|
107
108
|
self.temperature: float | None = config.temperature
|
|
108
109
|
self.top_p: float | None = config.top_p
|
|
110
|
+
self.top_k: int | None = config.top_k
|
|
109
111
|
|
|
110
112
|
self.reasoning: bool = config.reasoning
|
|
111
113
|
self.reasoning_effort: str | None = config.reasoning_effort
|
|
@@ -218,6 +220,10 @@ class LLM(ABC):
|
|
|
218
220
|
Join input with history
|
|
219
221
|
Log, Time, and Retry
|
|
220
222
|
"""
|
|
223
|
+
|
|
224
|
+
# verbose on debug
|
|
225
|
+
verbose = self.logger.isEnabledFor(logging.DEBUG)
|
|
226
|
+
|
|
221
227
|
# format str input
|
|
222
228
|
if isinstance(input, str):
|
|
223
229
|
input = [TextInput(text=input)]
|
|
@@ -226,11 +232,11 @@ class LLM(ABC):
|
|
|
226
232
|
input = [*files, *images, *input]
|
|
227
233
|
|
|
228
234
|
# format input info
|
|
229
|
-
item_info =
|
|
235
|
+
item_info = (
|
|
236
|
+
f"--- input ({len(input)}): {get_pretty_input_types(input, verbose)}\n"
|
|
237
|
+
)
|
|
230
238
|
if history:
|
|
231
|
-
item_info += (
|
|
232
|
-
f"--- history({len(history)}): {get_pretty_input_types(history)}\n"
|
|
233
|
-
)
|
|
239
|
+
item_info += f"--- history({len(history)}): {get_pretty_input_types(history, verbose)}\n"
|
|
234
240
|
|
|
235
241
|
# format tool info
|
|
236
242
|
tool_results = [t for t in input if isinstance(t, ToolResult)]
|
|
@@ -251,7 +257,7 @@ class LLM(ABC):
|
|
|
251
257
|
|
|
252
258
|
# unique logger for the query
|
|
253
259
|
query_id = uuid.uuid4().hex[:14]
|
|
254
|
-
query_logger =
|
|
260
|
+
query_logger = self.logger.getChild(f"query={query_id}")
|
|
255
261
|
|
|
256
262
|
query_logger.info(
|
|
257
263
|
"Query started:\n" + item_info + tool_info + f"--- kwargs: {short_kwargs}\n"
|
|
@@ -277,6 +283,7 @@ class LLM(ABC):
|
|
|
277
283
|
output.metadata.cost = await self._calculate_cost(output.metadata)
|
|
278
284
|
|
|
279
285
|
query_logger.info(f"Query completed: {repr(output)}")
|
|
286
|
+
query_logger.debug(output.model_dump(exclude={"history", "raw"}))
|
|
280
287
|
|
|
281
288
|
return output
|
|
282
289
|
|
|
@@ -316,7 +323,7 @@ class LLM(ABC):
|
|
|
316
323
|
)
|
|
317
324
|
|
|
318
325
|
# costs for long context
|
|
319
|
-
total_in = metadata.
|
|
326
|
+
total_in = metadata.total_input_tokens
|
|
320
327
|
if costs.context and total_in > costs.context.threshold:
|
|
321
328
|
input_cost, output_cost = costs.context.get_costs(
|
|
322
329
|
input_cost,
|
|
@@ -59,6 +59,33 @@ class QueryResultCost(BaseModel):
|
|
|
59
59
|
)
|
|
60
60
|
)
|
|
61
61
|
|
|
62
|
+
@computed_field
|
|
63
|
+
@property
|
|
64
|
+
def total_input(self) -> float:
|
|
65
|
+
return sum(
|
|
66
|
+
filter(
|
|
67
|
+
None,
|
|
68
|
+
[
|
|
69
|
+
self.input,
|
|
70
|
+
self.cache_read,
|
|
71
|
+
self.cache_write,
|
|
72
|
+
],
|
|
73
|
+
)
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
@computed_field
|
|
77
|
+
@property
|
|
78
|
+
def total_output(self) -> float:
|
|
79
|
+
return sum(
|
|
80
|
+
filter(
|
|
81
|
+
None,
|
|
82
|
+
[
|
|
83
|
+
self.output,
|
|
84
|
+
self.reasoning,
|
|
85
|
+
],
|
|
86
|
+
)
|
|
87
|
+
)
|
|
88
|
+
|
|
62
89
|
@override
|
|
63
90
|
def __repr__(self):
|
|
64
91
|
use_cents = self.total < 1
|
|
@@ -92,6 +119,33 @@ class QueryResultMetadata(BaseModel):
|
|
|
92
119
|
def default_duration_seconds(self) -> float:
|
|
93
120
|
return self.duration_seconds or 0
|
|
94
121
|
|
|
122
|
+
@computed_field
|
|
123
|
+
@property
|
|
124
|
+
def total_input_tokens(self) -> int:
|
|
125
|
+
return sum(
|
|
126
|
+
filter(
|
|
127
|
+
None,
|
|
128
|
+
[
|
|
129
|
+
self.in_tokens,
|
|
130
|
+
self.cache_read_tokens,
|
|
131
|
+
self.cache_write_tokens,
|
|
132
|
+
],
|
|
133
|
+
)
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
@computed_field
|
|
137
|
+
@property
|
|
138
|
+
def total_output_tokens(self) -> int:
|
|
139
|
+
return sum(
|
|
140
|
+
filter(
|
|
141
|
+
None,
|
|
142
|
+
[
|
|
143
|
+
self.out_tokens,
|
|
144
|
+
self.reasoning_tokens,
|
|
145
|
+
],
|
|
146
|
+
)
|
|
147
|
+
)
|
|
148
|
+
|
|
95
149
|
def __add__(self, other: "QueryResultMetadata") -> "QueryResultMetadata":
|
|
96
150
|
return QueryResultMetadata(
|
|
97
151
|
in_tokens=self.in_tokens + other.in_tokens,
|
|
@@ -107,6 +161,7 @@ class QueryResultMetadata(BaseModel):
|
|
|
107
161
|
),
|
|
108
162
|
duration_seconds=self.default_duration_seconds
|
|
109
163
|
+ other.default_duration_seconds,
|
|
164
|
+
cost=self.cost,
|
|
110
165
|
)
|
|
111
166
|
|
|
112
167
|
@override
|
|
@@ -21,12 +21,13 @@ def sum_optional(a: int | None, b: int | None) -> int | None:
|
|
|
21
21
|
return (a or 0) + (b or 0)
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
def get_pretty_input_types(input: Sequence["InputItem"]) -> str:
|
|
24
|
+
def get_pretty_input_types(input: Sequence["InputItem"], verbose: bool = False) -> str:
|
|
25
25
|
# for logging
|
|
26
26
|
def process_item(item: "InputItem"):
|
|
27
27
|
match item:
|
|
28
28
|
case TextInput():
|
|
29
|
-
|
|
29
|
+
item_str = repr(item)
|
|
30
|
+
return item_str if verbose else truncate_str(item_str)
|
|
30
31
|
case FileBase(): # FileInput
|
|
31
32
|
return repr(item)
|
|
32
33
|
case ToolResult():
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# Model Library Configuration
|
|
2
|
+
|
|
3
|
+
This directory contains YAML configuration files that define all available models in the model-proxy library.
|
|
4
|
+
|
|
5
|
+
## Configuration Structure
|
|
6
|
+
|
|
7
|
+
Each model configuration is organized into distinct sections:
|
|
8
|
+
|
|
9
|
+
### Core Sections
|
|
10
|
+
|
|
11
|
+
#### `properties`
|
|
12
|
+
Model-specific technical characteristics and capabilities:
|
|
13
|
+
- `context_window`: Maximum context window in tokens
|
|
14
|
+
- `max_tokens`: Maximum output tokens the model can generate
|
|
15
|
+
- `training_cutoff`: Training data cutoff date (string or null)
|
|
16
|
+
- `reasoning_model`: Whether the model is a reasoning/thinking model
|
|
17
|
+
|
|
18
|
+
```yaml
|
|
19
|
+
properties:
|
|
20
|
+
context_window: 200_000
|
|
21
|
+
max_tokens: 32_000
|
|
22
|
+
training_cutoff: "2025-03"
|
|
23
|
+
reasoning_model: false
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
#### `supports`
|
|
27
|
+
Feature support flags indicating model capabilities:
|
|
28
|
+
- `images`: Supports image inputs
|
|
29
|
+
- `videos`: Supports video inputs
|
|
30
|
+
- `files`: Supports file inputs
|
|
31
|
+
- `batch`: Supports batch requests
|
|
32
|
+
- `temperature`: Supports temperature parameter
|
|
33
|
+
- `tools`: Supports tool/function calling
|
|
34
|
+
|
|
35
|
+
```yaml
|
|
36
|
+
supports:
|
|
37
|
+
images: true
|
|
38
|
+
files: true
|
|
39
|
+
tools: true
|
|
40
|
+
batch: true
|
|
41
|
+
temperature: true
|
|
42
|
+
videos: false
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
#### `metadata`
|
|
46
|
+
Vals platform-specific metadata for model availability and status:
|
|
47
|
+
- `deprecated`: Model is deprecated and should not be used for new projects
|
|
48
|
+
- `available_for_everyone`: Model is available to all users
|
|
49
|
+
- `available_as_evaluator`: Model can be used as an evaluator
|
|
50
|
+
- `ignored_for_cost`: Exclude from cost calculations
|
|
51
|
+
|
|
52
|
+
```yaml
|
|
53
|
+
metadata:
|
|
54
|
+
deprecated: false
|
|
55
|
+
available_for_everyone: true
|
|
56
|
+
available_as_evaluator: false
|
|
57
|
+
ignored_for_cost: false
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
#### Other Sections
|
|
61
|
+
|
|
62
|
+
- `costs_per_million_token`: Pricing information (input, output, cache, batch, context)
|
|
63
|
+
- `default_parameters`: Default parameter values (temperature, top_p, reasoning_effort)
|
|
64
|
+
- `provider_properties`: Provider-specific configuration options
|
|
65
|
+
- `alternative_keys`: Alternative model identifiers/aliases
|
|
66
|
+
|
|
67
|
+
## Configuration Inheritance
|
|
68
|
+
|
|
69
|
+
Configurations support hierarchical inheritance through `base-config` blocks:
|
|
70
|
+
|
|
71
|
+
### 1. Provider-level base-config
|
|
72
|
+
```yaml
|
|
73
|
+
base-config:
|
|
74
|
+
company: Anthropic
|
|
75
|
+
open_source: false
|
|
76
|
+
supports:
|
|
77
|
+
images: true
|
|
78
|
+
tools: true
|
|
79
|
+
metadata:
|
|
80
|
+
available_for_everyone: true
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### 2. Model-block base-config
|
|
84
|
+
```yaml
|
|
85
|
+
claude-4-models:
|
|
86
|
+
base-config:
|
|
87
|
+
supports:
|
|
88
|
+
temperature: true
|
|
89
|
+
default_parameters:
|
|
90
|
+
temperature: 1
|
|
91
|
+
|
|
92
|
+
anthropic/claude-opus-4-1-20250805:
|
|
93
|
+
# Inherits from both provider and block base-configs
|
|
94
|
+
properties:
|
|
95
|
+
context_window: 200_000
|
|
96
|
+
max_tokens: 32_000
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### 3. Individual model overrides
|
|
100
|
+
Models can override any inherited configuration:
|
|
101
|
+
|
|
102
|
+
```yaml
|
|
103
|
+
anthropic/claude-opus-4-1-20250805:
|
|
104
|
+
properties:
|
|
105
|
+
context_window: 200_000
|
|
106
|
+
max_tokens: 32_000
|
|
107
|
+
metadata:
|
|
108
|
+
available_for_everyone: false # Override base-config
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## Alternative Keys
|
|
112
|
+
|
|
113
|
+
Models can define alternative identifiers that map to the same configuration:
|
|
114
|
+
|
|
115
|
+
```yaml
|
|
116
|
+
anthropic/claude-3-5-sonnet-20241022:
|
|
117
|
+
label: Claude 3.5 Sonnet Latest
|
|
118
|
+
properties:
|
|
119
|
+
context_window: 200_000
|
|
120
|
+
max_tokens: 8_192
|
|
121
|
+
alternative_keys:
|
|
122
|
+
- anthropic/claude-3-5-sonnet-latest
|
|
123
|
+
- anthropic/claude-3.5-sonnet-latest
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Alternative keys can also override configuration:
|
|
127
|
+
|
|
128
|
+
```yaml
|
|
129
|
+
alternative_keys:
|
|
130
|
+
- anthropic/claude-opus-4-1-20250805-thinking:
|
|
131
|
+
properties:
|
|
132
|
+
reasoning_model: true
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## Generating all_models.json
|
|
136
|
+
|
|
137
|
+
After making changes to any YAML configuration file, regenerate the compiled configuration:
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
make config
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
This generates `all_models.json` which is used by the model registry at runtime.
|
|
144
|
+
|
|
145
|
+
## Schema Validation
|
|
146
|
+
|
|
147
|
+
The configuration is validated using Pydantic models defined in `register_models.py`:
|
|
148
|
+
- `Properties` - Model properties
|
|
149
|
+
- `Supports` - Feature support flags
|
|
150
|
+
- `Metadata` - Platform metadata
|
|
151
|
+
- `DefaultParameters` - Default parameter values
|
|
152
|
+
- `CostProperties` - Pricing information
|
|
153
|
+
- `ProviderProperties` - Provider-specific config (dynamically generated)
|
|
154
|
+
|
|
155
|
+
## Migration Notes
|
|
156
|
+
|
|
157
|
+
### Previous Structure (Deprecated)
|
|
158
|
+
The old configuration used `class_properties` which mixed support flags and metadata:
|
|
159
|
+
|
|
160
|
+
```yaml
|
|
161
|
+
# OLD - Do not use
|
|
162
|
+
class_properties:
|
|
163
|
+
supports_images: true
|
|
164
|
+
supports_batch_requests: true
|
|
165
|
+
deprecated: false
|
|
166
|
+
available_for_everyone: true
|
|
167
|
+
properties:
|
|
168
|
+
max_token_output: 32_000
|
|
169
|
+
```
|
|
@@ -1,22 +1,23 @@
|
|
|
1
1
|
base-config:
|
|
2
2
|
company: AI21 Labs
|
|
3
3
|
documentation_url: https://www.ai21.com/jamba
|
|
4
|
-
|
|
5
|
-
|
|
4
|
+
supports:
|
|
5
|
+
images: false
|
|
6
|
+
files: false
|
|
7
|
+
metadata:
|
|
6
8
|
available_as_evaluator: false
|
|
7
|
-
supports_files: false
|
|
8
9
|
available_for_everyone: false
|
|
9
10
|
ignored_for_cost: false
|
|
10
11
|
properties:
|
|
11
12
|
context_window: null
|
|
12
|
-
|
|
13
|
+
max_tokens: 4096
|
|
13
14
|
training_cutoff: null
|
|
14
15
|
|
|
15
16
|
ai21labs-models:
|
|
16
17
|
base-config:
|
|
17
18
|
open_source: true
|
|
18
|
-
|
|
19
|
-
|
|
19
|
+
supports:
|
|
20
|
+
temperature: true
|
|
20
21
|
default_parameters:
|
|
21
22
|
temperature: 0.4
|
|
22
23
|
|
|
@@ -46,7 +47,7 @@ ai21labs-models:
|
|
|
46
47
|
label: Jamba 1.6 Large
|
|
47
48
|
description: The most powerful and efficient long context model
|
|
48
49
|
release_date: 2025-03-06
|
|
49
|
-
|
|
50
|
+
metadata:
|
|
50
51
|
deprecated: true
|
|
51
52
|
properties:
|
|
52
53
|
context_window: 256_000
|
|
@@ -60,7 +61,7 @@ ai21labs-models:
|
|
|
60
61
|
label: Jamba 1.6 Mini
|
|
61
62
|
description: The most powerful and efficient long context model
|
|
62
63
|
release_date: 2025-03-06
|
|
63
|
-
|
|
64
|
+
metadata:
|
|
64
65
|
deprecated: true
|
|
65
66
|
properties:
|
|
66
67
|
context_window: 256_000
|
|
@@ -76,7 +77,7 @@ ai21labs-models:
|
|
|
76
77
|
release_date: 2024-08-22
|
|
77
78
|
properties:
|
|
78
79
|
context_window: 256_000
|
|
79
|
-
|
|
80
|
+
metadata:
|
|
80
81
|
deprecated: true
|
|
81
82
|
costs_per_million_token:
|
|
82
83
|
input: 2.00
|
|
@@ -90,7 +91,7 @@ ai21labs-models:
|
|
|
90
91
|
release_date: 2024-08-22
|
|
91
92
|
properties:
|
|
92
93
|
context_window: 256_000
|
|
93
|
-
|
|
94
|
+
metadata:
|
|
94
95
|
deprecated: true
|
|
95
96
|
costs_per_million_token:
|
|
96
97
|
input: 0.2
|
|
@@ -3,13 +3,11 @@ qwen-models:
|
|
|
3
3
|
company: Alibaba
|
|
4
4
|
open_source: false
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
supports:
|
|
7
|
+
temperature: true
|
|
8
|
+
metadata:
|
|
7
9
|
available_for_everyone: false
|
|
8
10
|
available_as_evaluator: false
|
|
9
|
-
supports_metadata: true
|
|
10
|
-
supports_files: false
|
|
11
|
-
ignored_for_cost: false
|
|
12
|
-
supports_temperature: true
|
|
13
11
|
default_parameters:
|
|
14
12
|
temperature: 0.7
|
|
15
13
|
properties:
|
|
@@ -21,15 +19,16 @@ qwen-models:
|
|
|
21
19
|
release_date: 2025-09-05
|
|
22
20
|
properties:
|
|
23
21
|
context_window: 262_144
|
|
24
|
-
|
|
22
|
+
max_tokens: 65_536
|
|
25
23
|
training_cutoff: ""
|
|
26
24
|
costs_per_million_token:
|
|
27
25
|
input: 1.2
|
|
28
26
|
output: 6
|
|
29
|
-
|
|
27
|
+
supports:
|
|
28
|
+
images: false
|
|
29
|
+
tools: true
|
|
30
|
+
metadata:
|
|
30
31
|
available_for_everyone: false
|
|
31
|
-
supports_images: false
|
|
32
|
-
supports_tools: true
|
|
33
32
|
|
|
34
33
|
alibaba/qwen3-max-2025-09-23:
|
|
35
34
|
label: Qwen 3 Max 2025-09-23
|
|
@@ -37,16 +36,17 @@ qwen-models:
|
|
|
37
36
|
release_date: 2025-09-23
|
|
38
37
|
properties:
|
|
39
38
|
context_window: 262_144
|
|
40
|
-
|
|
39
|
+
max_tokens: 65_536
|
|
41
40
|
training_cutoff: ""
|
|
42
41
|
reasoning_model: true
|
|
43
42
|
costs_per_million_token:
|
|
44
43
|
input: 1.2
|
|
45
44
|
output: 6
|
|
46
|
-
|
|
45
|
+
supports:
|
|
46
|
+
images: false
|
|
47
|
+
tools: true
|
|
48
|
+
metadata:
|
|
47
49
|
available_for_everyone: false
|
|
48
|
-
supports_images: false
|
|
49
|
-
supports_tools: true
|
|
50
50
|
|
|
51
51
|
alibaba/qwen3-max:
|
|
52
52
|
label: Qwen 3 Max
|
|
@@ -54,7 +54,7 @@ qwen-models:
|
|
|
54
54
|
release_date: 2025-09-23
|
|
55
55
|
properties:
|
|
56
56
|
context_window: 262_144
|
|
57
|
-
|
|
57
|
+
max_tokens: 65_536
|
|
58
58
|
training_cutoff: ""
|
|
59
59
|
reasoning_model: false
|
|
60
60
|
costs_per_million_token:
|
|
@@ -67,12 +67,11 @@ qwen-models:
|
|
|
67
67
|
threshold: 32_000
|
|
68
68
|
input: 2.4
|
|
69
69
|
output: 12
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
70
|
+
supports:
|
|
71
|
+
images: false
|
|
72
|
+
tools: true
|
|
73
|
+
metadata:
|
|
73
74
|
available_for_everyone: false
|
|
74
|
-
supports_images: false
|
|
75
|
-
supports_tools: true
|
|
76
75
|
|
|
77
76
|
alibaba/qwen3-vl-plus-2025-09-23:
|
|
78
77
|
label: Qwen 3 VL Plus
|
|
@@ -81,11 +80,11 @@ qwen-models:
|
|
|
81
80
|
release_date: 2025-09-23
|
|
82
81
|
properties:
|
|
83
82
|
context_window: 262_144
|
|
84
|
-
|
|
83
|
+
max_tokens: 32_768
|
|
85
84
|
training_cutoff: ""
|
|
86
85
|
reasoning_model: false
|
|
87
86
|
costs_per_million_token:
|
|
88
87
|
input: 0.2
|
|
89
88
|
output: 1.6
|
|
90
|
-
|
|
91
|
-
|
|
89
|
+
supports:
|
|
90
|
+
images: true
|