superbrain-server 1.0.15 → 1.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/payload/analyzers/__pycache__/__init__.cpython-311.pyc +0 -0
- package/payload/analyzers/__pycache__/audio_transcribe.cpython-311.pyc +0 -0
- package/payload/analyzers/__pycache__/caption.cpython-311.pyc +0 -0
- package/payload/analyzers/__pycache__/music_identifier.cpython-311.pyc +0 -0
- package/payload/analyzers/__pycache__/text_analyzer.cpython-311.pyc +0 -0
- package/payload/analyzers/__pycache__/visual_analyze.cpython-311.pyc +0 -0
- package/payload/analyzers/__pycache__/webpage_analyzer.cpython-311.pyc +0 -0
- package/payload/analyzers/__pycache__/youtube_analyzer.cpython-311.pyc +0 -0
- package/payload/api.py +1 -0
- package/payload/config/model_rankings.json +250 -20
- package/payload/config/openrouter_free_models.json +519 -482
- package/payload/core/__pycache__/__init__.cpython-311.pyc +0 -0
- package/payload/core/__pycache__/category_manager.cpython-311.pyc +0 -0
- package/payload/core/__pycache__/database.cpython-311.pyc +0 -0
- package/payload/core/__pycache__/link_checker.cpython-311.pyc +0 -0
- package/payload/core/__pycache__/model_router.cpython-311.pyc +0 -0
- package/payload/core/model_router.py +7 -10
- package/payload/instagram/__pycache__/__init__.cpython-311.pyc +0 -0
- package/payload/instagram/__pycache__/instagram_downloader.cpython-311.pyc +0 -0
- package/payload/instagram/__pycache__/instagram_login.cpython-311.pyc +0 -0
- package/payload/start.py +188 -189
- package/payload/temp/Random cinematic shots/Random cinematic shots..mp4 +0 -0
- package/payload/temp/Random cinematic shots/Random cinematic shots._audio.mp3 +0 -0
- package/payload/temp/Random cinematic shots/Random cinematic shots._thumbnail.jpg +0 -0
- package/payload/temp/Random cinematic shots/info.txt +18 -0
- package/payload/temp/Random cinematic shots._1/Random cinematic shots..mp4 +0 -0
- package/payload/temp/Random cinematic shots._1/Random cinematic shots._audio.mp3 +0 -0
- package/payload/temp/Random cinematic shots._1/Random cinematic shots._thumbnail.jpg +0 -0
- package/payload/temp/Random cinematic shots._1/info.txt +18 -0
- package/payload/temp/Random cinematic shots._2/Random cinematic shots..mp4 +0 -0
- package/payload/temp/Random cinematic shots._2/Random cinematic shots._audio.mp3 +0 -0
- package/payload/temp/Random cinematic shots._2/Random cinematic shots._thumbnail.jpg +0 -0
- package/payload/temp/Random cinematic shots._2/info.txt +18 -0
- package/payload/test_backend.py +241 -0
- package/payload/tests/__init__.py +0 -0
- package/payload/tests/__pycache__/__init__.cpython-311.pyc +0 -0
- package/payload/tests/__pycache__/test_api.cpython-311.pyc +0 -0
- package/payload/tests/__pycache__/test_db.cpython-311.pyc +0 -0
- package/payload/tests/__pycache__/test_sync_code.cpython-311.pyc +0 -0
- package/payload/tests/test_api.py +17 -0
- package/payload/tests/test_db.py +22 -0
- package/payload/tests/test_sync_code.py +65 -0
- package/payload/utils/__pycache__/__init__.cpython-311.pyc +0 -0
- package/payload/utils/__pycache__/db_stats.cpython-311.pyc +0 -0
- package/payload/utils/__pycache__/manage_token.cpython-311.pyc +0 -0
- package/payload/__pycache__/api.cpython-311.pyc +0 -0
- package/payload/__pycache__/main.cpython-311.pyc +0 -0
- package/payload/__pycache__/start.cpython-311.pyc +0 -0
- package/payload/config/.api_keys +0 -3
|
@@ -1,13 +1,230 @@
|
|
|
1
1
|
{
|
|
2
|
-
"cached_at": "2026-
|
|
2
|
+
"cached_at": "2026-04-07T19:16:35.621206",
|
|
3
3
|
"models": [
|
|
4
|
+
{
|
|
5
|
+
"id": "qwen/qwen3.6-plus:free",
|
|
6
|
+
"canonical_slug": "qwen/qwen3.6-plus-04-02",
|
|
7
|
+
"hugging_face_id": "",
|
|
8
|
+
"name": "Qwen: Qwen3.6 Plus (free)",
|
|
9
|
+
"created": 1775133557,
|
|
10
|
+
"description": "Qwen 3.6 Plus builds on a hybrid architecture that combines efficient linear attention with sparse mixture-of-experts routing, enabling strong scalability and high-performance inference. Compared to the 3.5 series, it delivers...",
|
|
11
|
+
"context_length": 1000000,
|
|
12
|
+
"architecture": {
|
|
13
|
+
"modality": "text+image+video->text",
|
|
14
|
+
"input_modalities": [
|
|
15
|
+
"text",
|
|
16
|
+
"image",
|
|
17
|
+
"video"
|
|
18
|
+
],
|
|
19
|
+
"output_modalities": [
|
|
20
|
+
"text"
|
|
21
|
+
],
|
|
22
|
+
"tokenizer": "Qwen3",
|
|
23
|
+
"instruct_type": null
|
|
24
|
+
},
|
|
25
|
+
"pricing": {
|
|
26
|
+
"prompt": "0",
|
|
27
|
+
"completion": "0"
|
|
28
|
+
},
|
|
29
|
+
"top_provider": {
|
|
30
|
+
"context_length": 1000000,
|
|
31
|
+
"max_completion_tokens": 65536,
|
|
32
|
+
"is_moderated": false
|
|
33
|
+
},
|
|
34
|
+
"per_request_limits": null,
|
|
35
|
+
"supported_parameters": [
|
|
36
|
+
"include_reasoning",
|
|
37
|
+
"max_tokens",
|
|
38
|
+
"presence_penalty",
|
|
39
|
+
"reasoning",
|
|
40
|
+
"response_format",
|
|
41
|
+
"seed",
|
|
42
|
+
"structured_outputs",
|
|
43
|
+
"temperature",
|
|
44
|
+
"tool_choice",
|
|
45
|
+
"tools",
|
|
46
|
+
"top_p"
|
|
47
|
+
],
|
|
48
|
+
"default_parameters": {
|
|
49
|
+
"temperature": null,
|
|
50
|
+
"top_p": null,
|
|
51
|
+
"top_k": null,
|
|
52
|
+
"frequency_penalty": null,
|
|
53
|
+
"presence_penalty": null,
|
|
54
|
+
"repetition_penalty": null
|
|
55
|
+
},
|
|
56
|
+
"knowledge_cutoff": null,
|
|
57
|
+
"expiration_date": "2026-04-07",
|
|
58
|
+
"links": {
|
|
59
|
+
"details": "/api/v1/models/qwen/qwen3.6-plus-04-02/endpoints"
|
|
60
|
+
}
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
"id": "google/lyria-3-pro-preview",
|
|
64
|
+
"canonical_slug": "google/lyria-3-pro-preview-20260330",
|
|
65
|
+
"hugging_face_id": null,
|
|
66
|
+
"name": "Google: Lyria 3 Pro Preview",
|
|
67
|
+
"created": 1774907286,
|
|
68
|
+
"description": "Full-length songs are priced at $0.08 per song. Lyria 3 is Google's family of music generation models, available through the Gemini API. With Lyria 3, you can generate high-quality, 48kHz...",
|
|
69
|
+
"context_length": 1048576,
|
|
70
|
+
"architecture": {
|
|
71
|
+
"modality": "text+image->text+audio",
|
|
72
|
+
"input_modalities": [
|
|
73
|
+
"text",
|
|
74
|
+
"image"
|
|
75
|
+
],
|
|
76
|
+
"output_modalities": [
|
|
77
|
+
"text",
|
|
78
|
+
"audio"
|
|
79
|
+
],
|
|
80
|
+
"tokenizer": "Other",
|
|
81
|
+
"instruct_type": null
|
|
82
|
+
},
|
|
83
|
+
"pricing": {
|
|
84
|
+
"prompt": "0",
|
|
85
|
+
"completion": "0"
|
|
86
|
+
},
|
|
87
|
+
"top_provider": {
|
|
88
|
+
"context_length": 1048576,
|
|
89
|
+
"max_completion_tokens": 65536,
|
|
90
|
+
"is_moderated": false
|
|
91
|
+
},
|
|
92
|
+
"per_request_limits": null,
|
|
93
|
+
"supported_parameters": [
|
|
94
|
+
"max_tokens",
|
|
95
|
+
"response_format",
|
|
96
|
+
"seed",
|
|
97
|
+
"temperature",
|
|
98
|
+
"top_p"
|
|
99
|
+
],
|
|
100
|
+
"default_parameters": {
|
|
101
|
+
"temperature": null,
|
|
102
|
+
"top_p": null,
|
|
103
|
+
"top_k": null,
|
|
104
|
+
"frequency_penalty": null,
|
|
105
|
+
"presence_penalty": null,
|
|
106
|
+
"repetition_penalty": null
|
|
107
|
+
},
|
|
108
|
+
"knowledge_cutoff": null,
|
|
109
|
+
"expiration_date": null,
|
|
110
|
+
"links": {
|
|
111
|
+
"details": "/api/v1/models/google/lyria-3-pro-preview-20260330/endpoints"
|
|
112
|
+
}
|
|
113
|
+
},
|
|
114
|
+
{
|
|
115
|
+
"id": "google/lyria-3-clip-preview",
|
|
116
|
+
"canonical_slug": "google/lyria-3-clip-preview-20260330",
|
|
117
|
+
"hugging_face_id": null,
|
|
118
|
+
"name": "Google: Lyria 3 Clip Preview",
|
|
119
|
+
"created": 1774907255,
|
|
120
|
+
"description": "30 second duration clips are priced at $0.04 per clip. Lyria 3 is Google's family of music generation models, available through the Gemini API. With Lyria 3, you can generate...",
|
|
121
|
+
"context_length": 1048576,
|
|
122
|
+
"architecture": {
|
|
123
|
+
"modality": "text+image->text+audio",
|
|
124
|
+
"input_modalities": [
|
|
125
|
+
"text",
|
|
126
|
+
"image"
|
|
127
|
+
],
|
|
128
|
+
"output_modalities": [
|
|
129
|
+
"text",
|
|
130
|
+
"audio"
|
|
131
|
+
],
|
|
132
|
+
"tokenizer": "Other",
|
|
133
|
+
"instruct_type": null
|
|
134
|
+
},
|
|
135
|
+
"pricing": {
|
|
136
|
+
"prompt": "0",
|
|
137
|
+
"completion": "0"
|
|
138
|
+
},
|
|
139
|
+
"top_provider": {
|
|
140
|
+
"context_length": 1048576,
|
|
141
|
+
"max_completion_tokens": 65536,
|
|
142
|
+
"is_moderated": false
|
|
143
|
+
},
|
|
144
|
+
"per_request_limits": null,
|
|
145
|
+
"supported_parameters": [
|
|
146
|
+
"max_tokens",
|
|
147
|
+
"response_format",
|
|
148
|
+
"seed",
|
|
149
|
+
"temperature",
|
|
150
|
+
"top_p"
|
|
151
|
+
],
|
|
152
|
+
"default_parameters": {
|
|
153
|
+
"temperature": null,
|
|
154
|
+
"top_p": null,
|
|
155
|
+
"top_k": null,
|
|
156
|
+
"frequency_penalty": null,
|
|
157
|
+
"presence_penalty": null,
|
|
158
|
+
"repetition_penalty": null
|
|
159
|
+
},
|
|
160
|
+
"knowledge_cutoff": null,
|
|
161
|
+
"expiration_date": null,
|
|
162
|
+
"links": {
|
|
163
|
+
"details": "/api/v1/models/google/lyria-3-clip-preview-20260330/endpoints"
|
|
164
|
+
}
|
|
165
|
+
},
|
|
166
|
+
{
|
|
167
|
+
"id": "nvidia/nemotron-3-super-120b-a12b:free",
|
|
168
|
+
"canonical_slug": "nvidia/nemotron-3-super-120b-a12b-20230311",
|
|
169
|
+
"hugging_face_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8",
|
|
170
|
+
"name": "NVIDIA: Nemotron 3 Super (free)",
|
|
171
|
+
"created": 1773245239,
|
|
172
|
+
"description": "NVIDIA Nemotron 3 Super is a 120B-parameter open hybrid MoE model, activating just 12B parameters for maximum compute efficiency and accuracy in complex multi-agent applications. Built on a hybrid Mamba-Transformer...",
|
|
173
|
+
"context_length": 262144,
|
|
174
|
+
"architecture": {
|
|
175
|
+
"modality": "text->text",
|
|
176
|
+
"input_modalities": [
|
|
177
|
+
"text"
|
|
178
|
+
],
|
|
179
|
+
"output_modalities": [
|
|
180
|
+
"text"
|
|
181
|
+
],
|
|
182
|
+
"tokenizer": "Other",
|
|
183
|
+
"instruct_type": null
|
|
184
|
+
},
|
|
185
|
+
"pricing": {
|
|
186
|
+
"prompt": "0",
|
|
187
|
+
"completion": "0"
|
|
188
|
+
},
|
|
189
|
+
"top_provider": {
|
|
190
|
+
"context_length": 262144,
|
|
191
|
+
"max_completion_tokens": 262144,
|
|
192
|
+
"is_moderated": false
|
|
193
|
+
},
|
|
194
|
+
"per_request_limits": null,
|
|
195
|
+
"supported_parameters": [
|
|
196
|
+
"include_reasoning",
|
|
197
|
+
"max_tokens",
|
|
198
|
+
"reasoning",
|
|
199
|
+
"response_format",
|
|
200
|
+
"seed",
|
|
201
|
+
"structured_outputs",
|
|
202
|
+
"temperature",
|
|
203
|
+
"tool_choice",
|
|
204
|
+
"tools",
|
|
205
|
+
"top_p"
|
|
206
|
+
],
|
|
207
|
+
"default_parameters": {
|
|
208
|
+
"temperature": 1,
|
|
209
|
+
"top_p": 0.95,
|
|
210
|
+
"top_k": null,
|
|
211
|
+
"frequency_penalty": null,
|
|
212
|
+
"presence_penalty": null,
|
|
213
|
+
"repetition_penalty": null
|
|
214
|
+
},
|
|
215
|
+
"knowledge_cutoff": null,
|
|
216
|
+
"expiration_date": null,
|
|
217
|
+
"links": {
|
|
218
|
+
"details": "/api/v1/models/nvidia/nemotron-3-super-120b-a12b-20230311/endpoints"
|
|
219
|
+
}
|
|
220
|
+
},
|
|
4
221
|
{
|
|
5
222
|
"id": "openrouter/free",
|
|
6
223
|
"canonical_slug": "openrouter/free",
|
|
7
224
|
"hugging_face_id": "",
|
|
8
225
|
"name": "Free Models Router",
|
|
9
226
|
"created": 1769917427,
|
|
10
|
-
"description": "The simplest way to get free inference. openrouter/free is a router that selects free models at random from the models available on OpenRouter. The router smartly filters for models that
|
|
227
|
+
"description": "The simplest way to get free inference. openrouter/free is a router that selects free models at random from the models available on OpenRouter. The router smartly filters for models that...",
|
|
11
228
|
"context_length": 200000,
|
|
12
229
|
"architecture": {
|
|
13
230
|
"modality": "text+image->text",
|
|
@@ -54,7 +271,11 @@
|
|
|
54
271
|
"top_p": null,
|
|
55
272
|
"frequency_penalty": null
|
|
56
273
|
},
|
|
57
|
-
"
|
|
274
|
+
"knowledge_cutoff": null,
|
|
275
|
+
"expiration_date": null,
|
|
276
|
+
"links": {
|
|
277
|
+
"details": "/api/v1/models/openrouter/free/endpoints"
|
|
278
|
+
}
|
|
58
279
|
},
|
|
59
280
|
{
|
|
60
281
|
"id": "nvidia/nemotron-3-nano-30b-a3b:free",
|
|
@@ -62,7 +283,7 @@
|
|
|
62
283
|
"hugging_face_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
|
|
63
284
|
"name": "NVIDIA: Nemotron 3 Nano 30B A3B (free)",
|
|
64
285
|
"created": 1765731275,
|
|
65
|
-
"description": "NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems
|
|
286
|
+
"description": "NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems. The model is fully...",
|
|
66
287
|
"context_length": 256000,
|
|
67
288
|
"architecture": {
|
|
68
289
|
"modality": "text->text",
|
|
@@ -100,7 +321,11 @@
|
|
|
100
321
|
"top_p": null,
|
|
101
322
|
"frequency_penalty": null
|
|
102
323
|
},
|
|
103
|
-
"
|
|
324
|
+
"knowledge_cutoff": null,
|
|
325
|
+
"expiration_date": null,
|
|
326
|
+
"links": {
|
|
327
|
+
"details": "/api/v1/models/nvidia/nemotron-3-nano-30b-a3b/endpoints"
|
|
328
|
+
}
|
|
104
329
|
},
|
|
105
330
|
{
|
|
106
331
|
"id": "qwen/qwen3-next-80b-a3b-instruct:free",
|
|
@@ -108,7 +333,7 @@
|
|
|
108
333
|
"hugging_face_id": "Qwen/Qwen3-Next-80B-A3B-Instruct",
|
|
109
334
|
"name": "Qwen: Qwen3 Next 80B A3B Instruct (free)",
|
|
110
335
|
"created": 1757612213,
|
|
111
|
-
"description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without \u201cthinking\u201d traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual
|
|
336
|
+
"description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without \u201cthinking\u201d traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual...",
|
|
112
337
|
"context_length": 262144,
|
|
113
338
|
"architecture": {
|
|
114
339
|
"modality": "text->text",
|
|
@@ -145,7 +370,11 @@
|
|
|
145
370
|
"top_p"
|
|
146
371
|
],
|
|
147
372
|
"default_parameters": {},
|
|
148
|
-
"
|
|
373
|
+
"knowledge_cutoff": "2025-09-30",
|
|
374
|
+
"expiration_date": null,
|
|
375
|
+
"links": {
|
|
376
|
+
"details": "/api/v1/models/qwen/qwen3-next-80b-a3b-instruct-2509/endpoints"
|
|
377
|
+
}
|
|
149
378
|
},
|
|
150
379
|
{
|
|
151
380
|
"id": "stepfun/step-3.5-flash:free",
|
|
@@ -153,7 +382,7 @@
|
|
|
153
382
|
"hugging_face_id": "stepfun-ai/Step-3.5-Flash",
|
|
154
383
|
"name": "StepFun: Step 3.5 Flash (free)",
|
|
155
384
|
"created": 1769728337,
|
|
156
|
-
"description": "Step 3.5 Flash is StepFun's most capable open-source foundation model. Built on a sparse Mixture of Experts (MoE) architecture, it selectively activates only 11B of its 196B parameters per token
|
|
385
|
+
"description": "Step 3.5 Flash is StepFun's most capable open-source foundation model. Built on a sparse Mixture of Experts (MoE) architecture, it selectively activates only 11B of its 196B parameters per token....",
|
|
157
386
|
"context_length": 256000,
|
|
158
387
|
"architecture": {
|
|
159
388
|
"modality": "text->text",
|
|
@@ -191,64 +420,11 @@
|
|
|
191
420
|
"top_p": null,
|
|
192
421
|
"frequency_penalty": null
|
|
193
422
|
},
|
|
194
|
-
"
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
"hugging_face_id": "Qwen/Qwen3-VL-30B-A3B-Thinking",
|
|
200
|
-
"name": "Qwen: Qwen3 VL 30B A3B Thinking",
|
|
201
|
-
"created": 1759794479,
|
|
202
|
-
"description": "Qwen3-VL-30B-A3B-Thinking is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Thinking variant enhances reasoning in STEM, math, and complex tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.",
|
|
203
|
-
"context_length": 131072,
|
|
204
|
-
"architecture": {
|
|
205
|
-
"modality": "text+image->text",
|
|
206
|
-
"input_modalities": [
|
|
207
|
-
"text",
|
|
208
|
-
"image"
|
|
209
|
-
],
|
|
210
|
-
"output_modalities": [
|
|
211
|
-
"text"
|
|
212
|
-
],
|
|
213
|
-
"tokenizer": "Qwen3",
|
|
214
|
-
"instruct_type": null
|
|
215
|
-
},
|
|
216
|
-
"pricing": {
|
|
217
|
-
"prompt": "0",
|
|
218
|
-
"completion": "0",
|
|
219
|
-
"request": "0",
|
|
220
|
-
"image": "0",
|
|
221
|
-
"web_search": "0",
|
|
222
|
-
"internal_reasoning": "0"
|
|
223
|
-
},
|
|
224
|
-
"top_provider": {
|
|
225
|
-
"context_length": 131072,
|
|
226
|
-
"max_completion_tokens": 32768,
|
|
227
|
-
"is_moderated": false
|
|
228
|
-
},
|
|
229
|
-
"per_request_limits": null,
|
|
230
|
-
"supported_parameters": [
|
|
231
|
-
"frequency_penalty",
|
|
232
|
-
"include_reasoning",
|
|
233
|
-
"max_tokens",
|
|
234
|
-
"presence_penalty",
|
|
235
|
-
"reasoning",
|
|
236
|
-
"repetition_penalty",
|
|
237
|
-
"response_format",
|
|
238
|
-
"seed",
|
|
239
|
-
"stop",
|
|
240
|
-
"structured_outputs",
|
|
241
|
-
"temperature",
|
|
242
|
-
"tool_choice",
|
|
243
|
-
"tools",
|
|
244
|
-
"top_k",
|
|
245
|
-
"top_p"
|
|
246
|
-
],
|
|
247
|
-
"default_parameters": {
|
|
248
|
-
"temperature": 0.8,
|
|
249
|
-
"top_p": 0.95
|
|
250
|
-
},
|
|
251
|
-
"expiration_date": null
|
|
423
|
+
"knowledge_cutoff": null,
|
|
424
|
+
"expiration_date": null,
|
|
425
|
+
"links": {
|
|
426
|
+
"details": "/api/v1/models/stepfun/step-3.5-flash/endpoints"
|
|
427
|
+
}
|
|
252
428
|
},
|
|
253
429
|
{
|
|
254
430
|
"id": "arcee-ai/trinity-mini:free",
|
|
@@ -256,7 +432,7 @@
|
|
|
256
432
|
"hugging_face_id": "arcee-ai/Trinity-Mini",
|
|
257
433
|
"name": "Arcee AI: Trinity Mini (free)",
|
|
258
434
|
"created": 1764601720,
|
|
259
|
-
"description": "Trinity Mini is a 26B-parameter (3B active) sparse mixture-of-experts language model featuring 128 experts with 8 active per token. Engineered for efficient reasoning over long contexts (131k) with robust function
|
|
435
|
+
"description": "Trinity Mini is a 26B-parameter (3B active) sparse mixture-of-experts language model featuring 128 experts with 8 active per token. Engineered for efficient reasoning over long contexts (131k) with robust function...",
|
|
260
436
|
"context_length": 131072,
|
|
261
437
|
"architecture": {
|
|
262
438
|
"modality": "text->text",
|
|
@@ -296,74 +472,72 @@
|
|
|
296
472
|
"top_p": 0.75,
|
|
297
473
|
"frequency_penalty": null
|
|
298
474
|
},
|
|
299
|
-
"
|
|
475
|
+
"knowledge_cutoff": null,
|
|
476
|
+
"expiration_date": "2026-04-10",
|
|
477
|
+
"links": {
|
|
478
|
+
"details": "/api/v1/models/arcee-ai/trinity-mini-20251201/endpoints"
|
|
479
|
+
}
|
|
300
480
|
},
|
|
301
481
|
{
|
|
302
|
-
"id": "
|
|
303
|
-
"canonical_slug": "
|
|
304
|
-
"hugging_face_id": "
|
|
305
|
-
"name": "
|
|
306
|
-
"created":
|
|
307
|
-
"description": "
|
|
308
|
-
"context_length":
|
|
482
|
+
"id": "nvidia/nemotron-nano-9b-v2:free",
|
|
483
|
+
"canonical_slug": "nvidia/nemotron-nano-9b-v2",
|
|
484
|
+
"hugging_face_id": "nvidia/NVIDIA-Nemotron-Nano-9B-v2",
|
|
485
|
+
"name": "NVIDIA: Nemotron Nano 9B V2 (free)",
|
|
486
|
+
"created": 1757106807,
|
|
487
|
+
"description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and...",
|
|
488
|
+
"context_length": 128000,
|
|
309
489
|
"architecture": {
|
|
310
|
-
"modality": "text
|
|
490
|
+
"modality": "text->text",
|
|
311
491
|
"input_modalities": [
|
|
312
|
-
"text"
|
|
313
|
-
"image"
|
|
492
|
+
"text"
|
|
314
493
|
],
|
|
315
494
|
"output_modalities": [
|
|
316
495
|
"text"
|
|
317
496
|
],
|
|
318
|
-
"tokenizer": "
|
|
497
|
+
"tokenizer": "Other",
|
|
319
498
|
"instruct_type": null
|
|
320
499
|
},
|
|
321
500
|
"pricing": {
|
|
322
501
|
"prompt": "0",
|
|
323
|
-
"completion": "0"
|
|
324
|
-
"request": "0",
|
|
325
|
-
"image": "0",
|
|
326
|
-
"web_search": "0",
|
|
327
|
-
"internal_reasoning": "0"
|
|
502
|
+
"completion": "0"
|
|
328
503
|
},
|
|
329
504
|
"top_provider": {
|
|
330
|
-
"context_length":
|
|
331
|
-
"max_completion_tokens":
|
|
505
|
+
"context_length": 128000,
|
|
506
|
+
"max_completion_tokens": null,
|
|
332
507
|
"is_moderated": false
|
|
333
508
|
},
|
|
334
509
|
"per_request_limits": null,
|
|
335
510
|
"supported_parameters": [
|
|
336
|
-
"frequency_penalty",
|
|
337
511
|
"include_reasoning",
|
|
338
512
|
"max_tokens",
|
|
339
|
-
"presence_penalty",
|
|
340
513
|
"reasoning",
|
|
341
|
-
"repetition_penalty",
|
|
342
514
|
"response_format",
|
|
343
515
|
"seed",
|
|
344
|
-
"stop",
|
|
345
516
|
"structured_outputs",
|
|
346
517
|
"temperature",
|
|
347
518
|
"tool_choice",
|
|
348
519
|
"tools",
|
|
349
|
-
"top_k",
|
|
350
520
|
"top_p"
|
|
351
521
|
],
|
|
352
522
|
"default_parameters": {
|
|
353
|
-
"temperature":
|
|
354
|
-
"top_p":
|
|
523
|
+
"temperature": null,
|
|
524
|
+
"top_p": null,
|
|
355
525
|
"frequency_penalty": null
|
|
356
526
|
},
|
|
357
|
-
"
|
|
527
|
+
"knowledge_cutoff": "2025-03-31",
|
|
528
|
+
"expiration_date": null,
|
|
529
|
+
"links": {
|
|
530
|
+
"details": "/api/v1/models/nvidia/nemotron-nano-9b-v2/endpoints"
|
|
531
|
+
}
|
|
358
532
|
},
|
|
359
533
|
{
|
|
360
|
-
"id": "
|
|
361
|
-
"canonical_slug": "
|
|
362
|
-
"hugging_face_id": "
|
|
363
|
-
"name": "
|
|
364
|
-
"created":
|
|
365
|
-
"description": "
|
|
366
|
-
"context_length":
|
|
534
|
+
"id": "minimax/minimax-m2.5:free",
|
|
535
|
+
"canonical_slug": "minimax/minimax-m2.5-20260211",
|
|
536
|
+
"hugging_face_id": "MiniMaxAI/MiniMax-M2.5",
|
|
537
|
+
"name": "MiniMax: MiniMax M2.5 (free)",
|
|
538
|
+
"created": 1770908502,
|
|
539
|
+
"description": "MiniMax-M2.5 is a SOTA large language model designed for real-world productivity. Trained in a diverse range of complex real-world digital working environments, M2.5 builds upon the coding expertise of M2.1...",
|
|
540
|
+
"context_length": 196608,
|
|
367
541
|
"architecture": {
|
|
368
542
|
"modality": "text->text",
|
|
369
543
|
"input_modalities": [
|
|
@@ -380,9 +554,9 @@
|
|
|
380
554
|
"completion": "0"
|
|
381
555
|
},
|
|
382
556
|
"top_provider": {
|
|
383
|
-
"context_length":
|
|
384
|
-
"max_completion_tokens":
|
|
385
|
-
"is_moderated":
|
|
557
|
+
"context_length": 196608,
|
|
558
|
+
"max_completion_tokens": 196608,
|
|
559
|
+
"is_moderated": true
|
|
386
560
|
},
|
|
387
561
|
"per_request_limits": null,
|
|
388
562
|
"supported_parameters": [
|
|
@@ -391,18 +565,23 @@
|
|
|
391
565
|
"reasoning",
|
|
392
566
|
"response_format",
|
|
393
567
|
"seed",
|
|
394
|
-
"
|
|
568
|
+
"stop",
|
|
395
569
|
"temperature",
|
|
396
|
-
"
|
|
397
|
-
"tools",
|
|
398
|
-
"top_p"
|
|
570
|
+
"tools"
|
|
399
571
|
],
|
|
400
572
|
"default_parameters": {
|
|
401
|
-
"temperature":
|
|
402
|
-
"top_p":
|
|
403
|
-
"
|
|
404
|
-
|
|
405
|
-
|
|
573
|
+
"temperature": 1,
|
|
574
|
+
"top_p": 0.95,
|
|
575
|
+
"top_k": null,
|
|
576
|
+
"frequency_penalty": null,
|
|
577
|
+
"presence_penalty": null,
|
|
578
|
+
"repetition_penalty": null
|
|
579
|
+
},
|
|
580
|
+
"knowledge_cutoff": null,
|
|
581
|
+
"expiration_date": null,
|
|
582
|
+
"links": {
|
|
583
|
+
"details": "/api/v1/models/minimax/minimax-m2.5-20260211/endpoints"
|
|
584
|
+
}
|
|
406
585
|
},
|
|
407
586
|
{
|
|
408
587
|
"id": "qwen/qwen3-coder:free",
|
|
@@ -410,7 +589,7 @@
|
|
|
410
589
|
"hugging_face_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
|
|
411
590
|
"name": "Qwen: Qwen3 Coder 480B A35B (free)",
|
|
412
591
|
"created": 1753230546,
|
|
413
|
-
"description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over
|
|
592
|
+
"description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over...",
|
|
414
593
|
"context_length": 262000,
|
|
415
594
|
"architecture": {
|
|
416
595
|
"modality": "text->text",
|
|
@@ -445,7 +624,11 @@
|
|
|
445
624
|
"top_p"
|
|
446
625
|
],
|
|
447
626
|
"default_parameters": {},
|
|
448
|
-
"
|
|
627
|
+
"knowledge_cutoff": "2025-06-30",
|
|
628
|
+
"expiration_date": null,
|
|
629
|
+
"links": {
|
|
630
|
+
"details": "/api/v1/models/qwen/qwen3-coder-480b-a35b-07-25/endpoints"
|
|
631
|
+
}
|
|
449
632
|
},
|
|
450
633
|
{
|
|
451
634
|
"id": "liquid/lfm-2.5-1.2b-thinking:free",
|
|
@@ -453,7 +636,7 @@
|
|
|
453
636
|
"hugging_face_id": "LiquidAI/LFM2.5-1.2B-Thinking",
|
|
454
637
|
"name": "LiquidAI: LFM2.5-1.2B-Thinking (free)",
|
|
455
638
|
"created": 1768927527,
|
|
456
|
-
"description": "LFM2.5-1.2B-Thinking is a lightweight reasoning-focused model optimized for agentic tasks, data extraction, and RAG\u2014while still running comfortably on edge devices. It supports long context (up to 32K tokens) and is
|
|
639
|
+
"description": "LFM2.5-1.2B-Thinking is a lightweight reasoning-focused model optimized for agentic tasks, data extraction, and RAG\u2014while still running comfortably on edge devices. It supports long context (up to 32K tokens) and is...",
|
|
457
640
|
"context_length": 32768,
|
|
458
641
|
"architecture": {
|
|
459
642
|
"modality": "text->text",
|
|
@@ -495,7 +678,11 @@
|
|
|
495
678
|
"top_p": null,
|
|
496
679
|
"frequency_penalty": null
|
|
497
680
|
},
|
|
498
|
-
"
|
|
681
|
+
"knowledge_cutoff": null,
|
|
682
|
+
"expiration_date": null,
|
|
683
|
+
"links": {
|
|
684
|
+
"details": "/api/v1/models/liquid/lfm-2.5-1.2b-thinking-20260120/endpoints"
|
|
685
|
+
}
|
|
499
686
|
},
|
|
500
687
|
{
|
|
501
688
|
"id": "liquid/lfm-2.5-1.2b-instruct:free",
|
|
@@ -543,7 +730,11 @@
|
|
|
543
730
|
"top_p": null,
|
|
544
731
|
"frequency_penalty": null
|
|
545
732
|
},
|
|
546
|
-
"
|
|
733
|
+
"knowledge_cutoff": null,
|
|
734
|
+
"expiration_date": null,
|
|
735
|
+
"links": {
|
|
736
|
+
"details": "/api/v1/models/liquid/lfm-2.5-1.2b-instruct-20260120/endpoints"
|
|
737
|
+
}
|
|
547
738
|
},
|
|
548
739
|
{
|
|
549
740
|
"id": "nvidia/nemotron-nano-12b-v2-vl:free",
|
|
@@ -551,7 +742,7 @@
|
|
|
551
742
|
"hugging_face_id": "nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-BF16",
|
|
552
743
|
"name": "NVIDIA: Nemotron Nano 12B 2 VL (free)",
|
|
553
744
|
"created": 1761675565,
|
|
554
|
-
"description": "NVIDIA Nemotron Nano 2 VL is a 12-billion-parameter open multimodal reasoning model designed for video understanding and document intelligence. It introduces a hybrid Transformer-Mamba architecture, combining transformer-level accuracy with Mamba\u2019s
|
|
745
|
+
"description": "NVIDIA Nemotron Nano 2 VL is a 12-billion-parameter open multimodal reasoning model designed for video understanding and document intelligence. It introduces a hybrid Transformer-Mamba architecture, combining transformer-level accuracy with Mamba\u2019s...",
|
|
555
746
|
"context_length": 128000,
|
|
556
747
|
"architecture": {
|
|
557
748
|
"modality": "text+image+video->text",
|
|
@@ -591,16 +782,20 @@
|
|
|
591
782
|
"top_p": null,
|
|
592
783
|
"frequency_penalty": null
|
|
593
784
|
},
|
|
594
|
-
"
|
|
785
|
+
"knowledge_cutoff": null,
|
|
786
|
+
"expiration_date": null,
|
|
787
|
+
"links": {
|
|
788
|
+
"details": "/api/v1/models/nvidia/nemotron-nano-12b-v2-vl/endpoints"
|
|
789
|
+
}
|
|
595
790
|
},
|
|
596
791
|
{
|
|
597
|
-
"id": "
|
|
598
|
-
"canonical_slug": "
|
|
599
|
-
"hugging_face_id": "
|
|
600
|
-
"name": "
|
|
601
|
-
"created":
|
|
602
|
-
"description": "
|
|
603
|
-
"context_length":
|
|
792
|
+
"id": "arcee-ai/trinity-large-preview:free",
|
|
793
|
+
"canonical_slug": "arcee-ai/trinity-large-preview",
|
|
794
|
+
"hugging_face_id": "arcee-ai/Trinity-Large-Preview",
|
|
795
|
+
"name": "Arcee AI: Trinity Large Preview (free)",
|
|
796
|
+
"created": 1769552670,
|
|
797
|
+
"description": "Trinity-Large-Preview is a frontier-scale open-weight language model from Arcee, built as a 400B-parameter sparse Mixture-of-Experts with 13B active parameters per token using 4-of-256 expert routing. It excels in creative writing,...",
|
|
798
|
+
"context_length": 131000,
|
|
604
799
|
"architecture": {
|
|
605
800
|
"modality": "text->text",
|
|
606
801
|
"input_modalities": [
|
|
@@ -609,124 +804,15 @@
|
|
|
609
804
|
"output_modalities": [
|
|
610
805
|
"text"
|
|
611
806
|
],
|
|
612
|
-
"tokenizer": "
|
|
613
|
-
"instruct_type":
|
|
807
|
+
"tokenizer": "Other",
|
|
808
|
+
"instruct_type": null
|
|
614
809
|
},
|
|
615
810
|
"pricing": {
|
|
616
811
|
"prompt": "0",
|
|
617
|
-
"completion": "0"
|
|
618
|
-
"request": "0",
|
|
619
|
-
"image": "0",
|
|
620
|
-
"web_search": "0",
|
|
621
|
-
"internal_reasoning": "0"
|
|
812
|
+
"completion": "0"
|
|
622
813
|
},
|
|
623
814
|
"top_provider": {
|
|
624
|
-
"context_length":
|
|
625
|
-
"max_completion_tokens": null,
|
|
626
|
-
"is_moderated": false
|
|
627
|
-
},
|
|
628
|
-
"per_request_limits": null,
|
|
629
|
-
"supported_parameters": [
|
|
630
|
-
"frequency_penalty",
|
|
631
|
-
"include_reasoning",
|
|
632
|
-
"logit_bias",
|
|
633
|
-
"max_tokens",
|
|
634
|
-
"min_p",
|
|
635
|
-
"presence_penalty",
|
|
636
|
-
"reasoning",
|
|
637
|
-
"repetition_penalty",
|
|
638
|
-
"response_format",
|
|
639
|
-
"seed",
|
|
640
|
-
"stop",
|
|
641
|
-
"structured_outputs",
|
|
642
|
-
"temperature",
|
|
643
|
-
"tool_choice",
|
|
644
|
-
"tools",
|
|
645
|
-
"top_k",
|
|
646
|
-
"top_p"
|
|
647
|
-
],
|
|
648
|
-
"default_parameters": {
|
|
649
|
-
"temperature": null,
|
|
650
|
-
"top_p": null,
|
|
651
|
-
"frequency_penalty": null
|
|
652
|
-
},
|
|
653
|
-
"expiration_date": null
|
|
654
|
-
},
|
|
655
|
-
{
|
|
656
|
-
"id": "upstage/solar-pro-3:free",
|
|
657
|
-
"canonical_slug": "upstage/solar-pro-3",
|
|
658
|
-
"hugging_face_id": "",
|
|
659
|
-
"name": "Upstage: Solar Pro 3 (free)",
|
|
660
|
-
"created": 1769481200,
|
|
661
|
-
"description": "Solar Pro 3 is Upstage's powerful Mixture-of-Experts (MoE) language model. With 102B total parameters and 12B active parameters per forward pass, it delivers exceptional performance while maintaining computational efficiency. Optimized for Korean with English and Japanese support.",
|
|
662
|
-
"context_length": 128000,
|
|
663
|
-
"architecture": {
|
|
664
|
-
"modality": "text->text",
|
|
665
|
-
"input_modalities": [
|
|
666
|
-
"text"
|
|
667
|
-
],
|
|
668
|
-
"output_modalities": [
|
|
669
|
-
"text"
|
|
670
|
-
],
|
|
671
|
-
"tokenizer": "Other",
|
|
672
|
-
"instruct_type": null
|
|
673
|
-
},
|
|
674
|
-
"pricing": {
|
|
675
|
-
"prompt": "0",
|
|
676
|
-
"completion": "0"
|
|
677
|
-
},
|
|
678
|
-
"top_provider": {
|
|
679
|
-
"context_length": 128000,
|
|
680
|
-
"max_completion_tokens": null,
|
|
681
|
-
"is_moderated": false
|
|
682
|
-
},
|
|
683
|
-
"per_request_limits": null,
|
|
684
|
-
"supported_parameters": [
|
|
685
|
-
"include_reasoning",
|
|
686
|
-
"max_tokens",
|
|
687
|
-
"reasoning",
|
|
688
|
-
"response_format",
|
|
689
|
-
"structured_outputs",
|
|
690
|
-
"temperature",
|
|
691
|
-
"tool_choice",
|
|
692
|
-
"tools"
|
|
693
|
-
],
|
|
694
|
-
"default_parameters": {
|
|
695
|
-
"temperature": null,
|
|
696
|
-
"top_p": null,
|
|
697
|
-
"frequency_penalty": null
|
|
698
|
-
},
|
|
699
|
-
"expiration_date": "2026-03-22"
|
|
700
|
-
},
|
|
701
|
-
{
|
|
702
|
-
"id": "arcee-ai/trinity-large-preview:free",
|
|
703
|
-
"canonical_slug": "arcee-ai/trinity-large-preview",
|
|
704
|
-
"hugging_face_id": "arcee-ai/Trinity-Large-Preview",
|
|
705
|
-
"name": "Arcee AI: Trinity Large Preview (free)",
|
|
706
|
-
"created": 1769552670,
|
|
707
|
-
"description": "Trinity-Large-Preview is a frontier-scale open-weight language model from Arcee, built as a 400B-parameter sparse Mixture-of-Experts with 13B active parameters per token using 4-of-256 expert routing. \n\nIt excels in creative writing, storytelling, role-play, chat scenarios, and real-time voice assistance, better than your average reasoning model usually can. But we\u2019re also introducing some of our newer agentic performance. It was trained to navigate well in agent harnesses like OpenCode, Cline, and Kilo Code, and to handle complex toolchains and long, constraint-filled prompts. \n\nThe architecture natively supports very long context windows up to 512k tokens, with the Preview API currently served at 128k context using 8-bit quantization for practical deployment. Trinity-Large-Preview reflects Arcee\u2019s efficiency-first design philosophy, offering a production-oriented frontier model with open weights and permissive licensing suitable for real-world applications and experimentation.",
|
|
708
|
-
"context_length": 131000,
|
|
709
|
-
"architecture": {
|
|
710
|
-
"modality": "text->text",
|
|
711
|
-
"input_modalities": [
|
|
712
|
-
"text"
|
|
713
|
-
],
|
|
714
|
-
"output_modalities": [
|
|
715
|
-
"text"
|
|
716
|
-
],
|
|
717
|
-
"tokenizer": "Other",
|
|
718
|
-
"instruct_type": null
|
|
719
|
-
},
|
|
720
|
-
"pricing": {
|
|
721
|
-
"prompt": "0",
|
|
722
|
-
"completion": "0",
|
|
723
|
-
"request": "0",
|
|
724
|
-
"image": "0",
|
|
725
|
-
"web_search": "0",
|
|
726
|
-
"internal_reasoning": "0"
|
|
727
|
-
},
|
|
728
|
-
"top_provider": {
|
|
729
|
-
"context_length": 131000,
|
|
815
|
+
"context_length": 131000,
|
|
730
816
|
"max_completion_tokens": null,
|
|
731
817
|
"is_moderated": false
|
|
732
818
|
},
|
|
@@ -743,9 +829,16 @@
|
|
|
743
829
|
"default_parameters": {
|
|
744
830
|
"temperature": 0.8,
|
|
745
831
|
"top_p": 0.8,
|
|
746
|
-
"
|
|
747
|
-
|
|
748
|
-
|
|
832
|
+
"top_k": null,
|
|
833
|
+
"frequency_penalty": null,
|
|
834
|
+
"presence_penalty": null,
|
|
835
|
+
"repetition_penalty": null
|
|
836
|
+
},
|
|
837
|
+
"knowledge_cutoff": null,
|
|
838
|
+
"expiration_date": null,
|
|
839
|
+
"links": {
|
|
840
|
+
"details": "/api/v1/models/arcee-ai/trinity-large-preview/endpoints"
|
|
841
|
+
}
|
|
749
842
|
},
|
|
750
843
|
{
|
|
751
844
|
"id": "meta-llama/llama-3.3-70b-instruct:free",
|
|
@@ -753,8 +846,8 @@
|
|
|
753
846
|
"hugging_face_id": "meta-llama/Llama-3.3-70B-Instruct",
|
|
754
847
|
"name": "Meta: Llama 3.3 70B Instruct (free)",
|
|
755
848
|
"created": 1733506137,
|
|
756
|
-
"description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model
|
|
757
|
-
"context_length":
|
|
849
|
+
"description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model...",
|
|
850
|
+
"context_length": 65536,
|
|
758
851
|
"architecture": {
|
|
759
852
|
"modality": "text->text",
|
|
760
853
|
"input_modalities": [
|
|
@@ -771,16 +864,15 @@
|
|
|
771
864
|
"completion": "0"
|
|
772
865
|
},
|
|
773
866
|
"top_provider": {
|
|
774
|
-
"context_length":
|
|
775
|
-
"max_completion_tokens":
|
|
776
|
-
"is_moderated":
|
|
867
|
+
"context_length": 65536,
|
|
868
|
+
"max_completion_tokens": null,
|
|
869
|
+
"is_moderated": false
|
|
777
870
|
},
|
|
778
871
|
"per_request_limits": null,
|
|
779
872
|
"supported_parameters": [
|
|
780
873
|
"frequency_penalty",
|
|
781
874
|
"max_tokens",
|
|
782
875
|
"presence_penalty",
|
|
783
|
-
"seed",
|
|
784
876
|
"stop",
|
|
785
877
|
"temperature",
|
|
786
878
|
"tool_choice",
|
|
@@ -789,26 +881,29 @@
|
|
|
789
881
|
"top_p"
|
|
790
882
|
],
|
|
791
883
|
"default_parameters": {},
|
|
792
|
-
"
|
|
884
|
+
"knowledge_cutoff": "2023-12-31",
|
|
885
|
+
"expiration_date": null,
|
|
886
|
+
"links": {
|
|
887
|
+
"details": "/api/v1/models/meta-llama/llama-3.3-70b-instruct/endpoints"
|
|
888
|
+
}
|
|
793
889
|
},
|
|
794
890
|
{
|
|
795
|
-
"id": "
|
|
796
|
-
"canonical_slug": "
|
|
797
|
-
"hugging_face_id": "
|
|
798
|
-
"name": "
|
|
799
|
-
"created":
|
|
800
|
-
"description": "
|
|
801
|
-
"context_length":
|
|
891
|
+
"id": "openai/gpt-oss-120b:free",
|
|
892
|
+
"canonical_slug": "openai/gpt-oss-120b",
|
|
893
|
+
"hugging_face_id": "openai/gpt-oss-120b",
|
|
894
|
+
"name": "OpenAI: gpt-oss-120b (free)",
|
|
895
|
+
"created": 1754414231,
|
|
896
|
+
"description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized...",
|
|
897
|
+
"context_length": 131072,
|
|
802
898
|
"architecture": {
|
|
803
|
-
"modality": "text
|
|
899
|
+
"modality": "text->text",
|
|
804
900
|
"input_modalities": [
|
|
805
|
-
"text"
|
|
806
|
-
"image"
|
|
901
|
+
"text"
|
|
807
902
|
],
|
|
808
903
|
"output_modalities": [
|
|
809
904
|
"text"
|
|
810
905
|
],
|
|
811
|
-
"tokenizer": "
|
|
906
|
+
"tokenizer": "GPT",
|
|
812
907
|
"instruct_type": null
|
|
813
908
|
},
|
|
814
909
|
"pricing": {
|
|
@@ -816,37 +911,40 @@
|
|
|
816
911
|
"completion": "0"
|
|
817
912
|
},
|
|
818
913
|
"top_provider": {
|
|
819
|
-
"context_length":
|
|
820
|
-
"max_completion_tokens":
|
|
821
|
-
"is_moderated":
|
|
914
|
+
"context_length": 131072,
|
|
915
|
+
"max_completion_tokens": 131072,
|
|
916
|
+
"is_moderated": true
|
|
822
917
|
},
|
|
823
918
|
"per_request_limits": null,
|
|
824
919
|
"supported_parameters": [
|
|
825
|
-
"
|
|
920
|
+
"include_reasoning",
|
|
826
921
|
"max_tokens",
|
|
827
|
-
"
|
|
828
|
-
"
|
|
922
|
+
"reasoning",
|
|
923
|
+
"seed",
|
|
829
924
|
"stop",
|
|
830
|
-
"structured_outputs",
|
|
831
925
|
"temperature",
|
|
832
926
|
"tool_choice",
|
|
833
|
-
"tools"
|
|
834
|
-
"top_k",
|
|
835
|
-
"top_p"
|
|
927
|
+
"tools"
|
|
836
928
|
],
|
|
837
929
|
"default_parameters": {
|
|
838
|
-
"temperature":
|
|
930
|
+
"temperature": null,
|
|
931
|
+
"top_p": null,
|
|
932
|
+
"frequency_penalty": null
|
|
839
933
|
},
|
|
840
|
-
"
|
|
934
|
+
"knowledge_cutoff": "2024-06-30",
|
|
935
|
+
"expiration_date": null,
|
|
936
|
+
"links": {
|
|
937
|
+
"details": "/api/v1/models/openai/gpt-oss-120b/endpoints"
|
|
938
|
+
}
|
|
841
939
|
},
|
|
842
940
|
{
|
|
843
|
-
"id": "
|
|
844
|
-
"canonical_slug": "
|
|
845
|
-
"hugging_face_id": "
|
|
846
|
-
"name": "
|
|
847
|
-
"created":
|
|
848
|
-
"description": "
|
|
849
|
-
"context_length":
|
|
941
|
+
"id": "openai/gpt-oss-20b:free",
|
|
942
|
+
"canonical_slug": "openai/gpt-oss-20b",
|
|
943
|
+
"hugging_face_id": "openai/gpt-oss-20b",
|
|
944
|
+
"name": "OpenAI: gpt-oss-20b (free)",
|
|
945
|
+
"created": 1754414229,
|
|
946
|
+
"description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for...",
|
|
947
|
+
"context_length": 131072,
|
|
850
948
|
"architecture": {
|
|
851
949
|
"modality": "text->text",
|
|
852
950
|
"input_modalities": [
|
|
@@ -855,87 +953,48 @@
|
|
|
855
953
|
"output_modalities": [
|
|
856
954
|
"text"
|
|
857
955
|
],
|
|
858
|
-
"tokenizer": "
|
|
956
|
+
"tokenizer": "GPT",
|
|
859
957
|
"instruct_type": null
|
|
860
958
|
},
|
|
861
959
|
"pricing": {
|
|
862
960
|
"prompt": "0",
|
|
863
961
|
"completion": "0"
|
|
864
962
|
},
|
|
865
|
-
"top_provider": {
|
|
866
|
-
"context_length": 8192,
|
|
867
|
-
"max_completion_tokens": 2048,
|
|
868
|
-
"is_moderated": false
|
|
869
|
-
},
|
|
870
|
-
"per_request_limits": null,
|
|
871
|
-
"supported_parameters": [
|
|
872
|
-
"frequency_penalty",
|
|
873
|
-
"max_tokens",
|
|
874
|
-
"presence_penalty",
|
|
875
|
-
"response_format",
|
|
876
|
-
"seed",
|
|
877
|
-
"stop",
|
|
878
|
-
"temperature",
|
|
879
|
-
"top_p"
|
|
880
|
-
],
|
|
881
|
-
"default_parameters": {},
|
|
882
|
-
"expiration_date": null
|
|
883
|
-
},
|
|
884
|
-
{
|
|
885
|
-
"id": "google/gemma-3-27b-it:free",
|
|
886
|
-
"canonical_slug": "google/gemma-3-27b-it",
|
|
887
|
-
"hugging_face_id": "google/gemma-3-27b-it",
|
|
888
|
-
"name": "Google: Gemma 3 27B (free)",
|
|
889
|
-
"created": 1741756359,
|
|
890
|
-
"description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)",
|
|
891
|
-
"context_length": 131072,
|
|
892
|
-
"architecture": {
|
|
893
|
-
"modality": "text+image->text",
|
|
894
|
-
"input_modalities": [
|
|
895
|
-
"text",
|
|
896
|
-
"image"
|
|
897
|
-
],
|
|
898
|
-
"output_modalities": [
|
|
899
|
-
"text"
|
|
900
|
-
],
|
|
901
|
-
"tokenizer": "Gemini",
|
|
902
|
-
"instruct_type": "gemma"
|
|
903
|
-
},
|
|
904
|
-
"pricing": {
|
|
905
|
-
"prompt": "0",
|
|
906
|
-
"completion": "0"
|
|
907
|
-
},
|
|
908
963
|
"top_provider": {
|
|
909
964
|
"context_length": 131072,
|
|
910
|
-
"max_completion_tokens":
|
|
911
|
-
"is_moderated":
|
|
965
|
+
"max_completion_tokens": 131072,
|
|
966
|
+
"is_moderated": true
|
|
912
967
|
},
|
|
913
968
|
"per_request_limits": null,
|
|
914
969
|
"supported_parameters": [
|
|
970
|
+
"include_reasoning",
|
|
915
971
|
"max_tokens",
|
|
916
|
-
"
|
|
972
|
+
"reasoning",
|
|
917
973
|
"seed",
|
|
918
974
|
"stop",
|
|
919
975
|
"temperature",
|
|
920
976
|
"tool_choice",
|
|
921
|
-
"tools"
|
|
922
|
-
"top_p"
|
|
977
|
+
"tools"
|
|
923
978
|
],
|
|
924
979
|
"default_parameters": {
|
|
925
980
|
"temperature": null,
|
|
926
981
|
"top_p": null,
|
|
927
982
|
"frequency_penalty": null
|
|
928
983
|
},
|
|
929
|
-
"
|
|
984
|
+
"knowledge_cutoff": "2024-06-30",
|
|
985
|
+
"expiration_date": null,
|
|
986
|
+
"links": {
|
|
987
|
+
"details": "/api/v1/models/openai/gpt-oss-20b/endpoints"
|
|
988
|
+
}
|
|
930
989
|
},
|
|
931
990
|
{
|
|
932
|
-
"id": "
|
|
933
|
-
"canonical_slug": "
|
|
934
|
-
"hugging_face_id": "
|
|
935
|
-
"name": "
|
|
936
|
-
"created":
|
|
937
|
-
"description": "
|
|
938
|
-
"context_length":
|
|
991
|
+
"id": "meta-llama/llama-3.2-3b-instruct:free",
|
|
992
|
+
"canonical_slug": "meta-llama/llama-3.2-3b-instruct",
|
|
993
|
+
"hugging_face_id": "meta-llama/Llama-3.2-3B-Instruct",
|
|
994
|
+
"name": "Meta: Llama 3.2 3B Instruct (free)",
|
|
995
|
+
"created": 1727222400,
|
|
996
|
+
"description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it...",
|
|
997
|
+
"context_length": 131072,
|
|
939
998
|
"architecture": {
|
|
940
999
|
"modality": "text->text",
|
|
941
1000
|
"input_modalities": [
|
|
@@ -944,45 +1003,43 @@
|
|
|
944
1003
|
"output_modalities": [
|
|
945
1004
|
"text"
|
|
946
1005
|
],
|
|
947
|
-
"tokenizer": "
|
|
948
|
-
"instruct_type": "
|
|
1006
|
+
"tokenizer": "Llama3",
|
|
1007
|
+
"instruct_type": "llama3"
|
|
949
1008
|
},
|
|
950
1009
|
"pricing": {
|
|
951
1010
|
"prompt": "0",
|
|
952
1011
|
"completion": "0"
|
|
953
1012
|
},
|
|
954
1013
|
"top_provider": {
|
|
955
|
-
"context_length":
|
|
1014
|
+
"context_length": 131072,
|
|
956
1015
|
"max_completion_tokens": null,
|
|
957
1016
|
"is_moderated": false
|
|
958
1017
|
},
|
|
959
1018
|
"per_request_limits": null,
|
|
960
1019
|
"supported_parameters": [
|
|
961
1020
|
"frequency_penalty",
|
|
962
|
-
"include_reasoning",
|
|
963
1021
|
"max_tokens",
|
|
964
1022
|
"presence_penalty",
|
|
965
|
-
"reasoning",
|
|
966
|
-
"response_format",
|
|
967
1023
|
"stop",
|
|
968
|
-
"structured_outputs",
|
|
969
1024
|
"temperature",
|
|
970
|
-
"tool_choice",
|
|
971
|
-
"tools",
|
|
972
1025
|
"top_k",
|
|
973
1026
|
"top_p"
|
|
974
1027
|
],
|
|
975
1028
|
"default_parameters": {},
|
|
976
|
-
"
|
|
1029
|
+
"knowledge_cutoff": "2023-12-31",
|
|
1030
|
+
"expiration_date": null,
|
|
1031
|
+
"links": {
|
|
1032
|
+
"details": "/api/v1/models/meta-llama/llama-3.2-3b-instruct/endpoints"
|
|
1033
|
+
}
|
|
977
1034
|
},
|
|
978
1035
|
{
|
|
979
|
-
"id": "
|
|
980
|
-
"canonical_slug": "
|
|
981
|
-
"hugging_face_id": "
|
|
982
|
-
"name": "
|
|
983
|
-
"created":
|
|
984
|
-
"description": "
|
|
985
|
-
"context_length":
|
|
1036
|
+
"id": "cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
|
|
1037
|
+
"canonical_slug": "venice/uncensored",
|
|
1038
|
+
"hugging_face_id": "cognitivecomputations/Dolphin-Mistral-24B-Venice-Edition",
|
|
1039
|
+
"name": "Venice: Uncensored (free)",
|
|
1040
|
+
"created": 1752094966,
|
|
1041
|
+
"description": "Venice Uncensored Dolphin Mistral 24B Venice Edition is a fine-tuned variant of Mistral-Small-24B-Instruct-2501, developed by dphn.ai in collaboration with Venice.ai. This model is designed as an \u201cuncensored\u201d instruct-tuned LLM, preserving...",
|
|
1042
|
+
"context_length": 32768,
|
|
986
1043
|
"architecture": {
|
|
987
1044
|
"modality": "text->text",
|
|
988
1045
|
"input_modalities": [
|
|
@@ -999,8 +1056,8 @@
|
|
|
999
1056
|
"completion": "0"
|
|
1000
1057
|
},
|
|
1001
1058
|
"top_provider": {
|
|
1002
|
-
"context_length":
|
|
1003
|
-
"max_completion_tokens":
|
|
1059
|
+
"context_length": 32768,
|
|
1060
|
+
"max_completion_tokens": null,
|
|
1004
1061
|
"is_moderated": false
|
|
1005
1062
|
},
|
|
1006
1063
|
"per_request_limits": null,
|
|
@@ -1009,32 +1066,38 @@
|
|
|
1009
1066
|
"max_tokens",
|
|
1010
1067
|
"presence_penalty",
|
|
1011
1068
|
"response_format",
|
|
1012
|
-
"seed",
|
|
1013
1069
|
"stop",
|
|
1070
|
+
"structured_outputs",
|
|
1014
1071
|
"temperature",
|
|
1072
|
+
"top_k",
|
|
1015
1073
|
"top_p"
|
|
1016
1074
|
],
|
|
1017
1075
|
"default_parameters": {},
|
|
1018
|
-
"
|
|
1076
|
+
"knowledge_cutoff": "2024-04-30",
|
|
1077
|
+
"expiration_date": null,
|
|
1078
|
+
"links": {
|
|
1079
|
+
"details": "/api/v1/models/venice/uncensored/endpoints"
|
|
1080
|
+
}
|
|
1019
1081
|
},
|
|
1020
1082
|
{
|
|
1021
|
-
"id": "
|
|
1022
|
-
"canonical_slug": "
|
|
1023
|
-
"hugging_face_id": "
|
|
1024
|
-
"name": "
|
|
1025
|
-
"created":
|
|
1026
|
-
"description": "
|
|
1083
|
+
"id": "google/gemma-3-27b-it:free",
|
|
1084
|
+
"canonical_slug": "google/gemma-3-27b-it",
|
|
1085
|
+
"hugging_face_id": "google/gemma-3-27b-it",
|
|
1086
|
+
"name": "Google: Gemma 3 27B (free)",
|
|
1087
|
+
"created": 1741756359,
|
|
1088
|
+
"description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities,...",
|
|
1027
1089
|
"context_length": 131072,
|
|
1028
1090
|
"architecture": {
|
|
1029
|
-
"modality": "text->text",
|
|
1091
|
+
"modality": "text+image->text",
|
|
1030
1092
|
"input_modalities": [
|
|
1031
|
-
"text"
|
|
1093
|
+
"text",
|
|
1094
|
+
"image"
|
|
1032
1095
|
],
|
|
1033
1096
|
"output_modalities": [
|
|
1034
1097
|
"text"
|
|
1035
1098
|
],
|
|
1036
|
-
"tokenizer": "
|
|
1037
|
-
"instruct_type":
|
|
1099
|
+
"tokenizer": "Gemini",
|
|
1100
|
+
"instruct_type": "gemma"
|
|
1038
1101
|
},
|
|
1039
1102
|
"pricing": {
|
|
1040
1103
|
"prompt": "0",
|
|
@@ -1042,34 +1105,36 @@
|
|
|
1042
1105
|
},
|
|
1043
1106
|
"top_provider": {
|
|
1044
1107
|
"context_length": 131072,
|
|
1045
|
-
"max_completion_tokens":
|
|
1046
|
-
"is_moderated":
|
|
1108
|
+
"max_completion_tokens": 8192,
|
|
1109
|
+
"is_moderated": false
|
|
1047
1110
|
},
|
|
1048
1111
|
"per_request_limits": null,
|
|
1049
1112
|
"supported_parameters": [
|
|
1050
|
-
"include_reasoning",
|
|
1051
1113
|
"max_tokens",
|
|
1052
|
-
"
|
|
1114
|
+
"response_format",
|
|
1053
1115
|
"seed",
|
|
1054
1116
|
"stop",
|
|
1055
1117
|
"temperature",
|
|
1056
|
-
"
|
|
1057
|
-
"tools"
|
|
1118
|
+
"top_p"
|
|
1058
1119
|
],
|
|
1059
1120
|
"default_parameters": {
|
|
1060
1121
|
"temperature": null,
|
|
1061
1122
|
"top_p": null,
|
|
1062
1123
|
"frequency_penalty": null
|
|
1063
1124
|
},
|
|
1064
|
-
"
|
|
1125
|
+
"knowledge_cutoff": "2024-08-31",
|
|
1126
|
+
"expiration_date": null,
|
|
1127
|
+
"links": {
|
|
1128
|
+
"details": "/api/v1/models/google/gemma-3-27b-it/endpoints"
|
|
1129
|
+
}
|
|
1065
1130
|
},
|
|
1066
1131
|
{
|
|
1067
|
-
"id": "
|
|
1068
|
-
"canonical_slug": "
|
|
1069
|
-
"hugging_face_id": "
|
|
1070
|
-
"name": "
|
|
1071
|
-
"created":
|
|
1072
|
-
"description": "
|
|
1132
|
+
"id": "z-ai/glm-4.5-air:free",
|
|
1133
|
+
"canonical_slug": "z-ai/glm-4.5-air",
|
|
1134
|
+
"hugging_face_id": "zai-org/GLM-4.5-Air",
|
|
1135
|
+
"name": "Z.ai: GLM 4.5 Air (free)",
|
|
1136
|
+
"created": 1753471258,
|
|
1137
|
+
"description": "GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications. Like GLM-4.5, it adopts the Mixture-of-Experts (MoE) architecture but with a more compact parameter...",
|
|
1073
1138
|
"context_length": 131072,
|
|
1074
1139
|
"architecture": {
|
|
1075
1140
|
"modality": "text->text",
|
|
@@ -1079,7 +1144,7 @@
|
|
|
1079
1144
|
"output_modalities": [
|
|
1080
1145
|
"text"
|
|
1081
1146
|
],
|
|
1082
|
-
"tokenizer": "
|
|
1147
|
+
"tokenizer": "Other",
|
|
1083
1148
|
"instruct_type": null
|
|
1084
1149
|
},
|
|
1085
1150
|
"pricing": {
|
|
@@ -1088,35 +1153,38 @@
|
|
|
1088
1153
|
},
|
|
1089
1154
|
"top_provider": {
|
|
1090
1155
|
"context_length": 131072,
|
|
1091
|
-
"max_completion_tokens":
|
|
1092
|
-
"is_moderated":
|
|
1156
|
+
"max_completion_tokens": 96000,
|
|
1157
|
+
"is_moderated": false
|
|
1093
1158
|
},
|
|
1094
1159
|
"per_request_limits": null,
|
|
1095
1160
|
"supported_parameters": [
|
|
1096
1161
|
"include_reasoning",
|
|
1097
1162
|
"max_tokens",
|
|
1098
1163
|
"reasoning",
|
|
1099
|
-
"seed",
|
|
1100
|
-
"stop",
|
|
1101
1164
|
"temperature",
|
|
1102
1165
|
"tool_choice",
|
|
1103
|
-
"tools"
|
|
1166
|
+
"tools",
|
|
1167
|
+
"top_p"
|
|
1104
1168
|
],
|
|
1105
1169
|
"default_parameters": {
|
|
1106
|
-
"temperature":
|
|
1170
|
+
"temperature": 0.75,
|
|
1107
1171
|
"top_p": null,
|
|
1108
1172
|
"frequency_penalty": null
|
|
1109
1173
|
},
|
|
1110
|
-
"
|
|
1174
|
+
"knowledge_cutoff": "2024-12-31",
|
|
1175
|
+
"expiration_date": null,
|
|
1176
|
+
"links": {
|
|
1177
|
+
"details": "/api/v1/models/z-ai/glm-4.5-air/endpoints"
|
|
1178
|
+
}
|
|
1111
1179
|
},
|
|
1112
1180
|
{
|
|
1113
|
-
"id": "
|
|
1114
|
-
"canonical_slug": "
|
|
1115
|
-
"hugging_face_id": "
|
|
1116
|
-
"name": "
|
|
1117
|
-
"created":
|
|
1118
|
-
"description": "
|
|
1119
|
-
"context_length":
|
|
1181
|
+
"id": "google/gemma-3n-e2b-it:free",
|
|
1182
|
+
"canonical_slug": "google/gemma-3n-e2b-it",
|
|
1183
|
+
"hugging_face_id": "google/gemma-3n-E2B-it",
|
|
1184
|
+
"name": "Google: Gemma 3n 2B (free)",
|
|
1185
|
+
"created": 1752074904,
|
|
1186
|
+
"description": "Gemma 3n E2B IT is a multimodal, instruction-tuned model developed by Google DeepMind, designed to operate efficiently at an effective parameter size of 2B while leveraging a 6B architecture. Based...",
|
|
1187
|
+
"context_length": 8192,
|
|
1120
1188
|
"architecture": {
|
|
1121
1189
|
"modality": "text->text",
|
|
1122
1190
|
"input_modalities": [
|
|
@@ -1133,74 +1201,78 @@
|
|
|
1133
1201
|
"completion": "0"
|
|
1134
1202
|
},
|
|
1135
1203
|
"top_provider": {
|
|
1136
|
-
"context_length":
|
|
1137
|
-
"max_completion_tokens":
|
|
1204
|
+
"context_length": 8192,
|
|
1205
|
+
"max_completion_tokens": 2048,
|
|
1138
1206
|
"is_moderated": false
|
|
1139
1207
|
},
|
|
1140
1208
|
"per_request_limits": null,
|
|
1141
1209
|
"supported_parameters": [
|
|
1142
|
-
"frequency_penalty",
|
|
1143
1210
|
"max_tokens",
|
|
1144
|
-
"presence_penalty",
|
|
1145
1211
|
"response_format",
|
|
1146
|
-
"
|
|
1147
|
-
"structured_outputs",
|
|
1212
|
+
"seed",
|
|
1148
1213
|
"temperature",
|
|
1149
|
-
"top_k",
|
|
1150
1214
|
"top_p"
|
|
1151
1215
|
],
|
|
1152
1216
|
"default_parameters": {},
|
|
1153
|
-
"
|
|
1217
|
+
"knowledge_cutoff": "2024-08-31",
|
|
1218
|
+
"expiration_date": null,
|
|
1219
|
+
"links": {
|
|
1220
|
+
"details": "/api/v1/models/google/gemma-3n-e2b-it/endpoints"
|
|
1221
|
+
}
|
|
1154
1222
|
},
|
|
1155
1223
|
{
|
|
1156
|
-
"id": "
|
|
1157
|
-
"canonical_slug": "
|
|
1158
|
-
"hugging_face_id": "
|
|
1159
|
-
"name": "
|
|
1160
|
-
"created":
|
|
1161
|
-
"description": "
|
|
1162
|
-
"context_length":
|
|
1224
|
+
"id": "google/gemma-3-4b-it:free",
|
|
1225
|
+
"canonical_slug": "google/gemma-3-4b-it",
|
|
1226
|
+
"hugging_face_id": "google/gemma-3-4b-it",
|
|
1227
|
+
"name": "Google: Gemma 3 4B (free)",
|
|
1228
|
+
"created": 1741905510,
|
|
1229
|
+
"description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities,...",
|
|
1230
|
+
"context_length": 32768,
|
|
1163
1231
|
"architecture": {
|
|
1164
|
-
"modality": "text->text",
|
|
1232
|
+
"modality": "text+image->text",
|
|
1165
1233
|
"input_modalities": [
|
|
1166
|
-
"text"
|
|
1234
|
+
"text",
|
|
1235
|
+
"image"
|
|
1167
1236
|
],
|
|
1168
1237
|
"output_modalities": [
|
|
1169
1238
|
"text"
|
|
1170
1239
|
],
|
|
1171
|
-
"tokenizer": "
|
|
1172
|
-
"instruct_type": "
|
|
1240
|
+
"tokenizer": "Gemini",
|
|
1241
|
+
"instruct_type": "gemma"
|
|
1173
1242
|
},
|
|
1174
1243
|
"pricing": {
|
|
1175
1244
|
"prompt": "0",
|
|
1176
1245
|
"completion": "0"
|
|
1177
1246
|
},
|
|
1178
1247
|
"top_provider": {
|
|
1179
|
-
"context_length":
|
|
1180
|
-
"max_completion_tokens":
|
|
1248
|
+
"context_length": 32768,
|
|
1249
|
+
"max_completion_tokens": 8192,
|
|
1181
1250
|
"is_moderated": false
|
|
1182
1251
|
},
|
|
1183
1252
|
"per_request_limits": null,
|
|
1184
1253
|
"supported_parameters": [
|
|
1185
|
-
"frequency_penalty",
|
|
1186
1254
|
"max_tokens",
|
|
1187
|
-
"
|
|
1255
|
+
"response_format",
|
|
1256
|
+
"seed",
|
|
1188
1257
|
"stop",
|
|
1189
1258
|
"temperature",
|
|
1190
|
-
"top_k",
|
|
1191
1259
|
"top_p"
|
|
1192
1260
|
],
|
|
1193
1261
|
"default_parameters": {},
|
|
1194
|
-
"
|
|
1262
|
+
"knowledge_cutoff": "2024-08-31",
|
|
1263
|
+
"expiration_date": null,
|
|
1264
|
+
"links": {
|
|
1265
|
+
"details": "/api/v1/models/google/gemma-3-4b-it/endpoints"
|
|
1266
|
+
}
|
|
1195
1267
|
},
|
|
1196
1268
|
{
|
|
1197
|
-
"id": "
|
|
1198
|
-
"canonical_slug": "
|
|
1199
|
-
"hugging_face_id": "
|
|
1200
|
-
"name": "
|
|
1201
|
-
"created":
|
|
1202
|
-
"description": "
|
|
1203
|
-
"context_length":
|
|
1269
|
+
"id": "google/gemma-3n-e4b-it:free",
|
|
1270
|
+
"canonical_slug": "google/gemma-3n-e4b-it",
|
|
1271
|
+
"hugging_face_id": "google/gemma-3n-E4B-it",
|
|
1272
|
+
"name": "Google: Gemma 3n 4B (free)",
|
|
1273
|
+
"created": 1747776824,
|
|
1274
|
+
"description": "Gemma 3n E4B-it is optimized for efficient execution on mobile and low-resource devices, such as phones, laptops, and tablets. It supports multimodal inputs\u2014including text, visual data, and audio\u2014enabling diverse tasks...",
|
|
1275
|
+
"context_length": 8192,
|
|
1204
1276
|
"architecture": {
|
|
1205
1277
|
"modality": "text->text",
|
|
1206
1278
|
"input_modalities": [
|
|
@@ -1217,54 +1289,8 @@
|
|
|
1217
1289
|
"completion": "0"
|
|
1218
1290
|
},
|
|
1219
1291
|
"top_provider": {
|
|
1220
|
-
"context_length":
|
|
1221
|
-
"max_completion_tokens":
|
|
1222
|
-
"is_moderated": false
|
|
1223
|
-
},
|
|
1224
|
-
"per_request_limits": null,
|
|
1225
|
-
"supported_parameters": [
|
|
1226
|
-
"include_reasoning",
|
|
1227
|
-
"max_tokens",
|
|
1228
|
-
"reasoning",
|
|
1229
|
-
"temperature",
|
|
1230
|
-
"tool_choice",
|
|
1231
|
-
"tools",
|
|
1232
|
-
"top_p"
|
|
1233
|
-
],
|
|
1234
|
-
"default_parameters": {
|
|
1235
|
-
"temperature": 0.75,
|
|
1236
|
-
"top_p": null,
|
|
1237
|
-
"frequency_penalty": null
|
|
1238
|
-
},
|
|
1239
|
-
"expiration_date": null
|
|
1240
|
-
},
|
|
1241
|
-
{
|
|
1242
|
-
"id": "google/gemma-3-4b-it:free",
|
|
1243
|
-
"canonical_slug": "google/gemma-3-4b-it",
|
|
1244
|
-
"hugging_face_id": "google/gemma-3-4b-it",
|
|
1245
|
-
"name": "Google: Gemma 3 4B (free)",
|
|
1246
|
-
"created": 1741905510,
|
|
1247
|
-
"description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling.",
|
|
1248
|
-
"context_length": 32768,
|
|
1249
|
-
"architecture": {
|
|
1250
|
-
"modality": "text+image->text",
|
|
1251
|
-
"input_modalities": [
|
|
1252
|
-
"text",
|
|
1253
|
-
"image"
|
|
1254
|
-
],
|
|
1255
|
-
"output_modalities": [
|
|
1256
|
-
"text"
|
|
1257
|
-
],
|
|
1258
|
-
"tokenizer": "Gemini",
|
|
1259
|
-
"instruct_type": "gemma"
|
|
1260
|
-
},
|
|
1261
|
-
"pricing": {
|
|
1262
|
-
"prompt": "0",
|
|
1263
|
-
"completion": "0"
|
|
1264
|
-
},
|
|
1265
|
-
"top_provider": {
|
|
1266
|
-
"context_length": 32768,
|
|
1267
|
-
"max_completion_tokens": 8192,
|
|
1292
|
+
"context_length": 8192,
|
|
1293
|
+
"max_completion_tokens": 2048,
|
|
1268
1294
|
"is_moderated": false
|
|
1269
1295
|
},
|
|
1270
1296
|
"per_request_limits": null,
|
|
@@ -1272,12 +1298,15 @@
|
|
|
1272
1298
|
"max_tokens",
|
|
1273
1299
|
"response_format",
|
|
1274
1300
|
"seed",
|
|
1275
|
-
"stop",
|
|
1276
1301
|
"temperature",
|
|
1277
1302
|
"top_p"
|
|
1278
1303
|
],
|
|
1279
1304
|
"default_parameters": {},
|
|
1280
|
-
"
|
|
1305
|
+
"knowledge_cutoff": "2024-08-31",
|
|
1306
|
+
"expiration_date": null,
|
|
1307
|
+
"links": {
|
|
1308
|
+
"details": "/api/v1/models/google/gemma-3n-e4b-it/endpoints"
|
|
1309
|
+
}
|
|
1281
1310
|
},
|
|
1282
1311
|
{
|
|
1283
1312
|
"id": "google/gemma-3-12b-it:free",
|
|
@@ -1285,7 +1314,7 @@
|
|
|
1285
1314
|
"hugging_face_id": "google/gemma-3-12b-it",
|
|
1286
1315
|
"name": "Google: Gemma 3 12B (free)",
|
|
1287
1316
|
"created": 1741902625,
|
|
1288
|
-
"description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities
|
|
1317
|
+
"description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities,...",
|
|
1289
1318
|
"context_length": 32768,
|
|
1290
1319
|
"architecture": {
|
|
1291
1320
|
"modality": "text+image->text",
|
|
@@ -1317,7 +1346,11 @@
|
|
|
1317
1346
|
"top_p"
|
|
1318
1347
|
],
|
|
1319
1348
|
"default_parameters": {},
|
|
1320
|
-
"
|
|
1349
|
+
"knowledge_cutoff": "2024-08-31",
|
|
1350
|
+
"expiration_date": null,
|
|
1351
|
+
"links": {
|
|
1352
|
+
"details": "/api/v1/models/google/gemma-3-12b-it/endpoints"
|
|
1353
|
+
}
|
|
1321
1354
|
},
|
|
1322
1355
|
{
|
|
1323
1356
|
"id": "nousresearch/hermes-3-llama-3.1-405b:free",
|
|
@@ -1325,7 +1358,7 @@
|
|
|
1325
1358
|
"hugging_face_id": "NousResearch/Hermes-3-Llama-3.1-405B",
|
|
1326
1359
|
"name": "Nous: Hermes 3 405B Instruct (free)",
|
|
1327
1360
|
"created": 1723766400,
|
|
1328
|
-
"description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the
|
|
1361
|
+
"description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the...",
|
|
1329
1362
|
"context_length": 131072,
|
|
1330
1363
|
"architecture": {
|
|
1331
1364
|
"modality": "text->text",
|
|
@@ -1358,7 +1391,11 @@
|
|
|
1358
1391
|
"top_p"
|
|
1359
1392
|
],
|
|
1360
1393
|
"default_parameters": {},
|
|
1361
|
-
"
|
|
1394
|
+
"knowledge_cutoff": "2023-12-31",
|
|
1395
|
+
"expiration_date": null,
|
|
1396
|
+
"links": {
|
|
1397
|
+
"details": "/api/v1/models/nousresearch/hermes-3-llama-3.1-405b/endpoints"
|
|
1398
|
+
}
|
|
1362
1399
|
}
|
|
1363
1400
|
]
|
|
1364
1401
|
}
|