superbrain-server 1.0.15 → 1.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/package.json +1 -1
  2. package/payload/analyzers/__pycache__/__init__.cpython-311.pyc +0 -0
  3. package/payload/analyzers/__pycache__/audio_transcribe.cpython-311.pyc +0 -0
  4. package/payload/analyzers/__pycache__/caption.cpython-311.pyc +0 -0
  5. package/payload/analyzers/__pycache__/music_identifier.cpython-311.pyc +0 -0
  6. package/payload/analyzers/__pycache__/text_analyzer.cpython-311.pyc +0 -0
  7. package/payload/analyzers/__pycache__/visual_analyze.cpython-311.pyc +0 -0
  8. package/payload/analyzers/__pycache__/webpage_analyzer.cpython-311.pyc +0 -0
  9. package/payload/analyzers/__pycache__/youtube_analyzer.cpython-311.pyc +0 -0
  10. package/payload/api.py +1 -0
  11. package/payload/config/model_rankings.json +250 -20
  12. package/payload/config/openrouter_free_models.json +519 -482
  13. package/payload/core/__pycache__/__init__.cpython-311.pyc +0 -0
  14. package/payload/core/__pycache__/category_manager.cpython-311.pyc +0 -0
  15. package/payload/core/__pycache__/database.cpython-311.pyc +0 -0
  16. package/payload/core/__pycache__/link_checker.cpython-311.pyc +0 -0
  17. package/payload/core/__pycache__/model_router.cpython-311.pyc +0 -0
  18. package/payload/core/model_router.py +7 -10
  19. package/payload/instagram/__pycache__/__init__.cpython-311.pyc +0 -0
  20. package/payload/instagram/__pycache__/instagram_downloader.cpython-311.pyc +0 -0
  21. package/payload/instagram/__pycache__/instagram_login.cpython-311.pyc +0 -0
  22. package/payload/start.py +188 -189
  23. package/payload/temp/Random cinematic shots/Random cinematic shots..mp4 +0 -0
  24. package/payload/temp/Random cinematic shots/Random cinematic shots._audio.mp3 +0 -0
  25. package/payload/temp/Random cinematic shots/Random cinematic shots._thumbnail.jpg +0 -0
  26. package/payload/temp/Random cinematic shots/info.txt +18 -0
  27. package/payload/temp/Random cinematic shots._1/Random cinematic shots..mp4 +0 -0
  28. package/payload/temp/Random cinematic shots._1/Random cinematic shots._audio.mp3 +0 -0
  29. package/payload/temp/Random cinematic shots._1/Random cinematic shots._thumbnail.jpg +0 -0
  30. package/payload/temp/Random cinematic shots._1/info.txt +18 -0
  31. package/payload/temp/Random cinematic shots._2/Random cinematic shots..mp4 +0 -0
  32. package/payload/temp/Random cinematic shots._2/Random cinematic shots._audio.mp3 +0 -0
  33. package/payload/temp/Random cinematic shots._2/Random cinematic shots._thumbnail.jpg +0 -0
  34. package/payload/temp/Random cinematic shots._2/info.txt +18 -0
  35. package/payload/test_backend.py +241 -0
  36. package/payload/tests/__init__.py +0 -0
  37. package/payload/tests/__pycache__/__init__.cpython-311.pyc +0 -0
  38. package/payload/tests/__pycache__/test_api.cpython-311.pyc +0 -0
  39. package/payload/tests/__pycache__/test_db.cpython-311.pyc +0 -0
  40. package/payload/tests/__pycache__/test_sync_code.cpython-311.pyc +0 -0
  41. package/payload/tests/test_api.py +17 -0
  42. package/payload/tests/test_db.py +22 -0
  43. package/payload/tests/test_sync_code.py +65 -0
  44. package/payload/utils/__pycache__/__init__.cpython-311.pyc +0 -0
  45. package/payload/utils/__pycache__/db_stats.cpython-311.pyc +0 -0
  46. package/payload/utils/__pycache__/manage_token.cpython-311.pyc +0 -0
  47. package/payload/__pycache__/api.cpython-311.pyc +0 -0
  48. package/payload/__pycache__/main.cpython-311.pyc +0 -0
  49. package/payload/__pycache__/start.cpython-311.pyc +0 -0
  50. package/payload/config/.api_keys +0 -3
@@ -1,13 +1,230 @@
1
1
  {
2
- "cached_at": "2026-02-28T15:42:27.072508",
2
+ "cached_at": "2026-04-07T19:16:35.621206",
3
3
  "models": [
4
+ {
5
+ "id": "qwen/qwen3.6-plus:free",
6
+ "canonical_slug": "qwen/qwen3.6-plus-04-02",
7
+ "hugging_face_id": "",
8
+ "name": "Qwen: Qwen3.6 Plus (free)",
9
+ "created": 1775133557,
10
+ "description": "Qwen 3.6 Plus builds on a hybrid architecture that combines efficient linear attention with sparse mixture-of-experts routing, enabling strong scalability and high-performance inference. Compared to the 3.5 series, it delivers...",
11
+ "context_length": 1000000,
12
+ "architecture": {
13
+ "modality": "text+image+video->text",
14
+ "input_modalities": [
15
+ "text",
16
+ "image",
17
+ "video"
18
+ ],
19
+ "output_modalities": [
20
+ "text"
21
+ ],
22
+ "tokenizer": "Qwen3",
23
+ "instruct_type": null
24
+ },
25
+ "pricing": {
26
+ "prompt": "0",
27
+ "completion": "0"
28
+ },
29
+ "top_provider": {
30
+ "context_length": 1000000,
31
+ "max_completion_tokens": 65536,
32
+ "is_moderated": false
33
+ },
34
+ "per_request_limits": null,
35
+ "supported_parameters": [
36
+ "include_reasoning",
37
+ "max_tokens",
38
+ "presence_penalty",
39
+ "reasoning",
40
+ "response_format",
41
+ "seed",
42
+ "structured_outputs",
43
+ "temperature",
44
+ "tool_choice",
45
+ "tools",
46
+ "top_p"
47
+ ],
48
+ "default_parameters": {
49
+ "temperature": null,
50
+ "top_p": null,
51
+ "top_k": null,
52
+ "frequency_penalty": null,
53
+ "presence_penalty": null,
54
+ "repetition_penalty": null
55
+ },
56
+ "knowledge_cutoff": null,
57
+ "expiration_date": "2026-04-07",
58
+ "links": {
59
+ "details": "/api/v1/models/qwen/qwen3.6-plus-04-02/endpoints"
60
+ }
61
+ },
62
+ {
63
+ "id": "google/lyria-3-pro-preview",
64
+ "canonical_slug": "google/lyria-3-pro-preview-20260330",
65
+ "hugging_face_id": null,
66
+ "name": "Google: Lyria 3 Pro Preview",
67
+ "created": 1774907286,
68
+ "description": "Full-length songs are priced at $0.08 per song. Lyria 3 is Google's family of music generation models, available through the Gemini API. With Lyria 3, you can generate high-quality, 48kHz...",
69
+ "context_length": 1048576,
70
+ "architecture": {
71
+ "modality": "text+image->text+audio",
72
+ "input_modalities": [
73
+ "text",
74
+ "image"
75
+ ],
76
+ "output_modalities": [
77
+ "text",
78
+ "audio"
79
+ ],
80
+ "tokenizer": "Other",
81
+ "instruct_type": null
82
+ },
83
+ "pricing": {
84
+ "prompt": "0",
85
+ "completion": "0"
86
+ },
87
+ "top_provider": {
88
+ "context_length": 1048576,
89
+ "max_completion_tokens": 65536,
90
+ "is_moderated": false
91
+ },
92
+ "per_request_limits": null,
93
+ "supported_parameters": [
94
+ "max_tokens",
95
+ "response_format",
96
+ "seed",
97
+ "temperature",
98
+ "top_p"
99
+ ],
100
+ "default_parameters": {
101
+ "temperature": null,
102
+ "top_p": null,
103
+ "top_k": null,
104
+ "frequency_penalty": null,
105
+ "presence_penalty": null,
106
+ "repetition_penalty": null
107
+ },
108
+ "knowledge_cutoff": null,
109
+ "expiration_date": null,
110
+ "links": {
111
+ "details": "/api/v1/models/google/lyria-3-pro-preview-20260330/endpoints"
112
+ }
113
+ },
114
+ {
115
+ "id": "google/lyria-3-clip-preview",
116
+ "canonical_slug": "google/lyria-3-clip-preview-20260330",
117
+ "hugging_face_id": null,
118
+ "name": "Google: Lyria 3 Clip Preview",
119
+ "created": 1774907255,
120
+ "description": "30 second duration clips are priced at $0.04 per clip. Lyria 3 is Google's family of music generation models, available through the Gemini API. With Lyria 3, you can generate...",
121
+ "context_length": 1048576,
122
+ "architecture": {
123
+ "modality": "text+image->text+audio",
124
+ "input_modalities": [
125
+ "text",
126
+ "image"
127
+ ],
128
+ "output_modalities": [
129
+ "text",
130
+ "audio"
131
+ ],
132
+ "tokenizer": "Other",
133
+ "instruct_type": null
134
+ },
135
+ "pricing": {
136
+ "prompt": "0",
137
+ "completion": "0"
138
+ },
139
+ "top_provider": {
140
+ "context_length": 1048576,
141
+ "max_completion_tokens": 65536,
142
+ "is_moderated": false
143
+ },
144
+ "per_request_limits": null,
145
+ "supported_parameters": [
146
+ "max_tokens",
147
+ "response_format",
148
+ "seed",
149
+ "temperature",
150
+ "top_p"
151
+ ],
152
+ "default_parameters": {
153
+ "temperature": null,
154
+ "top_p": null,
155
+ "top_k": null,
156
+ "frequency_penalty": null,
157
+ "presence_penalty": null,
158
+ "repetition_penalty": null
159
+ },
160
+ "knowledge_cutoff": null,
161
+ "expiration_date": null,
162
+ "links": {
163
+ "details": "/api/v1/models/google/lyria-3-clip-preview-20260330/endpoints"
164
+ }
165
+ },
166
+ {
167
+ "id": "nvidia/nemotron-3-super-120b-a12b:free",
168
+ "canonical_slug": "nvidia/nemotron-3-super-120b-a12b-20230311",
169
+ "hugging_face_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8",
170
+ "name": "NVIDIA: Nemotron 3 Super (free)",
171
+ "created": 1773245239,
172
+ "description": "NVIDIA Nemotron 3 Super is a 120B-parameter open hybrid MoE model, activating just 12B parameters for maximum compute efficiency and accuracy in complex multi-agent applications. Built on a hybrid Mamba-Transformer...",
173
+ "context_length": 262144,
174
+ "architecture": {
175
+ "modality": "text->text",
176
+ "input_modalities": [
177
+ "text"
178
+ ],
179
+ "output_modalities": [
180
+ "text"
181
+ ],
182
+ "tokenizer": "Other",
183
+ "instruct_type": null
184
+ },
185
+ "pricing": {
186
+ "prompt": "0",
187
+ "completion": "0"
188
+ },
189
+ "top_provider": {
190
+ "context_length": 262144,
191
+ "max_completion_tokens": 262144,
192
+ "is_moderated": false
193
+ },
194
+ "per_request_limits": null,
195
+ "supported_parameters": [
196
+ "include_reasoning",
197
+ "max_tokens",
198
+ "reasoning",
199
+ "response_format",
200
+ "seed",
201
+ "structured_outputs",
202
+ "temperature",
203
+ "tool_choice",
204
+ "tools",
205
+ "top_p"
206
+ ],
207
+ "default_parameters": {
208
+ "temperature": 1,
209
+ "top_p": 0.95,
210
+ "top_k": null,
211
+ "frequency_penalty": null,
212
+ "presence_penalty": null,
213
+ "repetition_penalty": null
214
+ },
215
+ "knowledge_cutoff": null,
216
+ "expiration_date": null,
217
+ "links": {
218
+ "details": "/api/v1/models/nvidia/nemotron-3-super-120b-a12b-20230311/endpoints"
219
+ }
220
+ },
4
221
  {
5
222
  "id": "openrouter/free",
6
223
  "canonical_slug": "openrouter/free",
7
224
  "hugging_face_id": "",
8
225
  "name": "Free Models Router",
9
226
  "created": 1769917427,
10
- "description": "The simplest way to get free inference. openrouter/free is a router that selects free models at random from the models available on OpenRouter. The router smartly filters for models that support features needed for your request such as image understanding, tool calling, structured outputs and more. ",
227
+ "description": "The simplest way to get free inference. openrouter/free is a router that selects free models at random from the models available on OpenRouter. The router smartly filters for models that...",
11
228
  "context_length": 200000,
12
229
  "architecture": {
13
230
  "modality": "text+image->text",
@@ -54,7 +271,11 @@
54
271
  "top_p": null,
55
272
  "frequency_penalty": null
56
273
  },
57
- "expiration_date": null
274
+ "knowledge_cutoff": null,
275
+ "expiration_date": null,
276
+ "links": {
277
+ "details": "/api/v1/models/openrouter/free/endpoints"
278
+ }
58
279
  },
59
280
  {
60
281
  "id": "nvidia/nemotron-3-nano-30b-a3b:free",
@@ -62,7 +283,7 @@
62
283
  "hugging_face_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
63
284
  "name": "NVIDIA: Nemotron 3 Nano 30B A3B (free)",
64
285
  "created": 1765731275,
65
- "description": "NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems.\n\nThe model is fully open with open-weights, datasets and recipes so developers can easily\ncustomize, optimize, and deploy the model on their infrastructure for maximum privacy and\nsecurity.",
286
+ "description": "NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems. The model is fully...",
66
287
  "context_length": 256000,
67
288
  "architecture": {
68
289
  "modality": "text->text",
@@ -100,7 +321,11 @@
100
321
  "top_p": null,
101
322
  "frequency_penalty": null
102
323
  },
103
- "expiration_date": null
324
+ "knowledge_cutoff": null,
325
+ "expiration_date": null,
326
+ "links": {
327
+ "details": "/api/v1/models/nvidia/nemotron-3-nano-30b-a3b/endpoints"
328
+ }
104
329
  },
105
330
  {
106
331
  "id": "qwen/qwen3-next-80b-a3b-instruct:free",
@@ -108,7 +333,7 @@
108
333
  "hugging_face_id": "Qwen/Qwen3-Next-80B-A3B-Instruct",
109
334
  "name": "Qwen: Qwen3 Next 80B A3B Instruct (free)",
110
335
  "created": 1757612213,
111
- "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without \u201cthinking\u201d traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.",
336
+ "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without \u201cthinking\u201d traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual...",
112
337
  "context_length": 262144,
113
338
  "architecture": {
114
339
  "modality": "text->text",
@@ -145,7 +370,11 @@
145
370
  "top_p"
146
371
  ],
147
372
  "default_parameters": {},
148
- "expiration_date": null
373
+ "knowledge_cutoff": "2025-09-30",
374
+ "expiration_date": null,
375
+ "links": {
376
+ "details": "/api/v1/models/qwen/qwen3-next-80b-a3b-instruct-2509/endpoints"
377
+ }
149
378
  },
150
379
  {
151
380
  "id": "stepfun/step-3.5-flash:free",
@@ -153,7 +382,7 @@
153
382
  "hugging_face_id": "stepfun-ai/Step-3.5-Flash",
154
383
  "name": "StepFun: Step 3.5 Flash (free)",
155
384
  "created": 1769728337,
156
- "description": "Step 3.5 Flash is StepFun's most capable open-source foundation model. Built on a sparse Mixture of Experts (MoE) architecture, it selectively activates only 11B of its 196B parameters per token. It is a reasoning model that is incredibly speed efficient even at long contexts.",
385
+ "description": "Step 3.5 Flash is StepFun's most capable open-source foundation model. Built on a sparse Mixture of Experts (MoE) architecture, it selectively activates only 11B of its 196B parameters per token....",
157
386
  "context_length": 256000,
158
387
  "architecture": {
159
388
  "modality": "text->text",
@@ -191,64 +420,11 @@
191
420
  "top_p": null,
192
421
  "frequency_penalty": null
193
422
  },
194
- "expiration_date": null
195
- },
196
- {
197
- "id": "qwen/qwen3-vl-30b-a3b-thinking",
198
- "canonical_slug": "qwen/qwen3-vl-30b-a3b-thinking",
199
- "hugging_face_id": "Qwen/Qwen3-VL-30B-A3B-Thinking",
200
- "name": "Qwen: Qwen3 VL 30B A3B Thinking",
201
- "created": 1759794479,
202
- "description": "Qwen3-VL-30B-A3B-Thinking is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Thinking variant enhances reasoning in STEM, math, and complex tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.",
203
- "context_length": 131072,
204
- "architecture": {
205
- "modality": "text+image->text",
206
- "input_modalities": [
207
- "text",
208
- "image"
209
- ],
210
- "output_modalities": [
211
- "text"
212
- ],
213
- "tokenizer": "Qwen3",
214
- "instruct_type": null
215
- },
216
- "pricing": {
217
- "prompt": "0",
218
- "completion": "0",
219
- "request": "0",
220
- "image": "0",
221
- "web_search": "0",
222
- "internal_reasoning": "0"
223
- },
224
- "top_provider": {
225
- "context_length": 131072,
226
- "max_completion_tokens": 32768,
227
- "is_moderated": false
228
- },
229
- "per_request_limits": null,
230
- "supported_parameters": [
231
- "frequency_penalty",
232
- "include_reasoning",
233
- "max_tokens",
234
- "presence_penalty",
235
- "reasoning",
236
- "repetition_penalty",
237
- "response_format",
238
- "seed",
239
- "stop",
240
- "structured_outputs",
241
- "temperature",
242
- "tool_choice",
243
- "tools",
244
- "top_k",
245
- "top_p"
246
- ],
247
- "default_parameters": {
248
- "temperature": 0.8,
249
- "top_p": 0.95
250
- },
251
- "expiration_date": null
423
+ "knowledge_cutoff": null,
424
+ "expiration_date": null,
425
+ "links": {
426
+ "details": "/api/v1/models/stepfun/step-3.5-flash/endpoints"
427
+ }
252
428
  },
253
429
  {
254
430
  "id": "arcee-ai/trinity-mini:free",
@@ -256,7 +432,7 @@
256
432
  "hugging_face_id": "arcee-ai/Trinity-Mini",
257
433
  "name": "Arcee AI: Trinity Mini (free)",
258
434
  "created": 1764601720,
259
- "description": "Trinity Mini is a 26B-parameter (3B active) sparse mixture-of-experts language model featuring 128 experts with 8 active per token. Engineered for efficient reasoning over long contexts (131k) with robust function calling and multi-step agent workflows.",
435
+ "description": "Trinity Mini is a 26B-parameter (3B active) sparse mixture-of-experts language model featuring 128 experts with 8 active per token. Engineered for efficient reasoning over long contexts (131k) with robust function...",
260
436
  "context_length": 131072,
261
437
  "architecture": {
262
438
  "modality": "text->text",
@@ -296,74 +472,72 @@
296
472
  "top_p": 0.75,
297
473
  "frequency_penalty": null
298
474
  },
299
- "expiration_date": null
475
+ "knowledge_cutoff": null,
476
+ "expiration_date": "2026-04-10",
477
+ "links": {
478
+ "details": "/api/v1/models/arcee-ai/trinity-mini-20251201/endpoints"
479
+ }
300
480
  },
301
481
  {
302
- "id": "qwen/qwen3-vl-235b-a22b-thinking",
303
- "canonical_slug": "qwen/qwen3-vl-235b-a22b-thinking",
304
- "hugging_face_id": "Qwen/Qwen3-VL-235B-A22B-Thinking",
305
- "name": "Qwen: Qwen3 VL 235B A22B Thinking",
306
- "created": 1758668690,
307
- "description": "Qwen3-VL-235B-A22B Thinking is a multimodal model that unifies strong text generation with visual understanding across images and video. The Thinking model is optimized for multimodal reasoning in STEM and math. The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows, turning sketches or mockups into code and assisting with UI debugging, while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.",
308
- "context_length": 131072,
482
+ "id": "nvidia/nemotron-nano-9b-v2:free",
483
+ "canonical_slug": "nvidia/nemotron-nano-9b-v2",
484
+ "hugging_face_id": "nvidia/NVIDIA-Nemotron-Nano-9B-v2",
485
+ "name": "NVIDIA: Nemotron Nano 9B V2 (free)",
486
+ "created": 1757106807,
487
+ "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and...",
488
+ "context_length": 128000,
309
489
  "architecture": {
310
- "modality": "text+image->text",
490
+ "modality": "text->text",
311
491
  "input_modalities": [
312
- "text",
313
- "image"
492
+ "text"
314
493
  ],
315
494
  "output_modalities": [
316
495
  "text"
317
496
  ],
318
- "tokenizer": "Qwen3",
497
+ "tokenizer": "Other",
319
498
  "instruct_type": null
320
499
  },
321
500
  "pricing": {
322
501
  "prompt": "0",
323
- "completion": "0",
324
- "request": "0",
325
- "image": "0",
326
- "web_search": "0",
327
- "internal_reasoning": "0"
502
+ "completion": "0"
328
503
  },
329
504
  "top_provider": {
330
- "context_length": 131072,
331
- "max_completion_tokens": 32768,
505
+ "context_length": 128000,
506
+ "max_completion_tokens": null,
332
507
  "is_moderated": false
333
508
  },
334
509
  "per_request_limits": null,
335
510
  "supported_parameters": [
336
- "frequency_penalty",
337
511
  "include_reasoning",
338
512
  "max_tokens",
339
- "presence_penalty",
340
513
  "reasoning",
341
- "repetition_penalty",
342
514
  "response_format",
343
515
  "seed",
344
- "stop",
345
516
  "structured_outputs",
346
517
  "temperature",
347
518
  "tool_choice",
348
519
  "tools",
349
- "top_k",
350
520
  "top_p"
351
521
  ],
352
522
  "default_parameters": {
353
- "temperature": 0.8,
354
- "top_p": 0.95,
523
+ "temperature": null,
524
+ "top_p": null,
355
525
  "frequency_penalty": null
356
526
  },
357
- "expiration_date": null
527
+ "knowledge_cutoff": "2025-03-31",
528
+ "expiration_date": null,
529
+ "links": {
530
+ "details": "/api/v1/models/nvidia/nemotron-nano-9b-v2/endpoints"
531
+ }
358
532
  },
359
533
  {
360
- "id": "nvidia/nemotron-nano-9b-v2:free",
361
- "canonical_slug": "nvidia/nemotron-nano-9b-v2",
362
- "hugging_face_id": "nvidia/NVIDIA-Nemotron-Nano-9B-v2",
363
- "name": "NVIDIA: Nemotron Nano 9B V2 (free)",
364
- "created": 1757106807,
365
- "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.",
366
- "context_length": 128000,
534
+ "id": "minimax/minimax-m2.5:free",
535
+ "canonical_slug": "minimax/minimax-m2.5-20260211",
536
+ "hugging_face_id": "MiniMaxAI/MiniMax-M2.5",
537
+ "name": "MiniMax: MiniMax M2.5 (free)",
538
+ "created": 1770908502,
539
+ "description": "MiniMax-M2.5 is a SOTA large language model designed for real-world productivity. Trained in a diverse range of complex real-world digital working environments, M2.5 builds upon the coding expertise of M2.1...",
540
+ "context_length": 196608,
367
541
  "architecture": {
368
542
  "modality": "text->text",
369
543
  "input_modalities": [
@@ -380,9 +554,9 @@
380
554
  "completion": "0"
381
555
  },
382
556
  "top_provider": {
383
- "context_length": 128000,
384
- "max_completion_tokens": null,
385
- "is_moderated": false
557
+ "context_length": 196608,
558
+ "max_completion_tokens": 196608,
559
+ "is_moderated": true
386
560
  },
387
561
  "per_request_limits": null,
388
562
  "supported_parameters": [
@@ -391,18 +565,23 @@
391
565
  "reasoning",
392
566
  "response_format",
393
567
  "seed",
394
- "structured_outputs",
568
+ "stop",
395
569
  "temperature",
396
- "tool_choice",
397
- "tools",
398
- "top_p"
570
+ "tools"
399
571
  ],
400
572
  "default_parameters": {
401
- "temperature": null,
402
- "top_p": null,
403
- "frequency_penalty": null
404
- },
405
- "expiration_date": null
573
+ "temperature": 1,
574
+ "top_p": 0.95,
575
+ "top_k": null,
576
+ "frequency_penalty": null,
577
+ "presence_penalty": null,
578
+ "repetition_penalty": null
579
+ },
580
+ "knowledge_cutoff": null,
581
+ "expiration_date": null,
582
+ "links": {
583
+ "details": "/api/v1/models/minimax/minimax-m2.5-20260211/endpoints"
584
+ }
406
585
  },
407
586
  {
408
587
  "id": "qwen/qwen3-coder:free",
@@ -410,7 +589,7 @@
410
589
  "hugging_face_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
411
590
  "name": "Qwen: Qwen3 Coder 480B A35B (free)",
412
591
  "created": 1753230546,
413
- "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.",
592
+ "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over...",
414
593
  "context_length": 262000,
415
594
  "architecture": {
416
595
  "modality": "text->text",
@@ -445,7 +624,11 @@
445
624
  "top_p"
446
625
  ],
447
626
  "default_parameters": {},
448
- "expiration_date": null
627
+ "knowledge_cutoff": "2025-06-30",
628
+ "expiration_date": null,
629
+ "links": {
630
+ "details": "/api/v1/models/qwen/qwen3-coder-480b-a35b-07-25/endpoints"
631
+ }
449
632
  },
450
633
  {
451
634
  "id": "liquid/lfm-2.5-1.2b-thinking:free",
@@ -453,7 +636,7 @@
453
636
  "hugging_face_id": "LiquidAI/LFM2.5-1.2B-Thinking",
454
637
  "name": "LiquidAI: LFM2.5-1.2B-Thinking (free)",
455
638
  "created": 1768927527,
456
- "description": "LFM2.5-1.2B-Thinking is a lightweight reasoning-focused model optimized for agentic tasks, data extraction, and RAG\u2014while still running comfortably on edge devices. It supports long context (up to 32K tokens) and is designed to provide higher-quality \u201cthinking\u201d responses in a small 1.2B model.",
639
+ "description": "LFM2.5-1.2B-Thinking is a lightweight reasoning-focused model optimized for agentic tasks, data extraction, and RAG\u2014while still running comfortably on edge devices. It supports long context (up to 32K tokens) and is...",
457
640
  "context_length": 32768,
458
641
  "architecture": {
459
642
  "modality": "text->text",
@@ -495,7 +678,11 @@
495
678
  "top_p": null,
496
679
  "frequency_penalty": null
497
680
  },
498
- "expiration_date": null
681
+ "knowledge_cutoff": null,
682
+ "expiration_date": null,
683
+ "links": {
684
+ "details": "/api/v1/models/liquid/lfm-2.5-1.2b-thinking-20260120/endpoints"
685
+ }
499
686
  },
500
687
  {
501
688
  "id": "liquid/lfm-2.5-1.2b-instruct:free",
@@ -543,7 +730,11 @@
543
730
  "top_p": null,
544
731
  "frequency_penalty": null
545
732
  },
546
- "expiration_date": null
733
+ "knowledge_cutoff": null,
734
+ "expiration_date": null,
735
+ "links": {
736
+ "details": "/api/v1/models/liquid/lfm-2.5-1.2b-instruct-20260120/endpoints"
737
+ }
547
738
  },
548
739
  {
549
740
  "id": "nvidia/nemotron-nano-12b-v2-vl:free",
@@ -551,7 +742,7 @@
551
742
  "hugging_face_id": "nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-BF16",
552
743
  "name": "NVIDIA: Nemotron Nano 12B 2 VL (free)",
553
744
  "created": 1761675565,
554
- "description": "NVIDIA Nemotron Nano 2 VL is a 12-billion-parameter open multimodal reasoning model designed for video understanding and document intelligence. It introduces a hybrid Transformer-Mamba architecture, combining transformer-level accuracy with Mamba\u2019s memory-efficient sequence modeling for significantly higher throughput and lower latency.\n\nThe model supports inputs of text and multi-image documents, producing natural-language outputs. It is trained on high-quality NVIDIA-curated synthetic datasets optimized for optical-character recognition, chart reasoning, and multimodal comprehension.\n\nNemotron Nano 2 VL achieves leading results on OCRBench v2 and scores \u2248 74 average across MMMU, MathVista, AI2D, OCRBench, OCR-Reasoning, ChartQA, DocVQA, and Video-MME\u2014surpassing prior open VL baselines. With Efficient Video Sampling (EVS), it handles long-form videos while reducing inference cost.\n\nOpen-weights, training data, and fine-tuning recipes are released under a permissive NVIDIA open license, with deployment supported across NeMo, NIM, and major inference runtimes.",
745
+ "description": "NVIDIA Nemotron Nano 2 VL is a 12-billion-parameter open multimodal reasoning model designed for video understanding and document intelligence. It introduces a hybrid Transformer-Mamba architecture, combining transformer-level accuracy with Mamba\u2019s...",
555
746
  "context_length": 128000,
556
747
  "architecture": {
557
748
  "modality": "text+image+video->text",
@@ -591,16 +782,20 @@
591
782
  "top_p": null,
592
783
  "frequency_penalty": null
593
784
  },
594
- "expiration_date": null
785
+ "knowledge_cutoff": null,
786
+ "expiration_date": null,
787
+ "links": {
788
+ "details": "/api/v1/models/nvidia/nemotron-nano-12b-v2-vl/endpoints"
789
+ }
595
790
  },
596
791
  {
597
- "id": "qwen/qwen3-235b-a22b-thinking-2507",
598
- "canonical_slug": "qwen/qwen3-235b-a22b-thinking-2507",
599
- "hugging_face_id": "Qwen/Qwen3-235B-A22B-Thinking-2507",
600
- "name": "Qwen: Qwen3 235B A22B Thinking 2507",
601
- "created": 1753449557,
602
- "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode (</think>) and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.",
603
- "context_length": 131072,
792
+ "id": "arcee-ai/trinity-large-preview:free",
793
+ "canonical_slug": "arcee-ai/trinity-large-preview",
794
+ "hugging_face_id": "arcee-ai/Trinity-Large-Preview",
795
+ "name": "Arcee AI: Trinity Large Preview (free)",
796
+ "created": 1769552670,
797
+ "description": "Trinity-Large-Preview is a frontier-scale open-weight language model from Arcee, built as a 400B-parameter sparse Mixture-of-Experts with 13B active parameters per token using 4-of-256 expert routing. It excels in creative writing,...",
798
+ "context_length": 131000,
604
799
  "architecture": {
605
800
  "modality": "text->text",
606
801
  "input_modalities": [
@@ -609,124 +804,15 @@
609
804
  "output_modalities": [
610
805
  "text"
611
806
  ],
612
- "tokenizer": "Qwen3",
613
- "instruct_type": "qwen3"
807
+ "tokenizer": "Other",
808
+ "instruct_type": null
614
809
  },
615
810
  "pricing": {
616
811
  "prompt": "0",
617
- "completion": "0",
618
- "request": "0",
619
- "image": "0",
620
- "web_search": "0",
621
- "internal_reasoning": "0"
812
+ "completion": "0"
622
813
  },
623
814
  "top_provider": {
624
- "context_length": 131072,
625
- "max_completion_tokens": null,
626
- "is_moderated": false
627
- },
628
- "per_request_limits": null,
629
- "supported_parameters": [
630
- "frequency_penalty",
631
- "include_reasoning",
632
- "logit_bias",
633
- "max_tokens",
634
- "min_p",
635
- "presence_penalty",
636
- "reasoning",
637
- "repetition_penalty",
638
- "response_format",
639
- "seed",
640
- "stop",
641
- "structured_outputs",
642
- "temperature",
643
- "tool_choice",
644
- "tools",
645
- "top_k",
646
- "top_p"
647
- ],
648
- "default_parameters": {
649
- "temperature": null,
650
- "top_p": null,
651
- "frequency_penalty": null
652
- },
653
- "expiration_date": null
654
- },
655
- {
656
- "id": "upstage/solar-pro-3:free",
657
- "canonical_slug": "upstage/solar-pro-3",
658
- "hugging_face_id": "",
659
- "name": "Upstage: Solar Pro 3 (free)",
660
- "created": 1769481200,
661
- "description": "Solar Pro 3 is Upstage's powerful Mixture-of-Experts (MoE) language model. With 102B total parameters and 12B active parameters per forward pass, it delivers exceptional performance while maintaining computational efficiency. Optimized for Korean with English and Japanese support.",
662
- "context_length": 128000,
663
- "architecture": {
664
- "modality": "text->text",
665
- "input_modalities": [
666
- "text"
667
- ],
668
- "output_modalities": [
669
- "text"
670
- ],
671
- "tokenizer": "Other",
672
- "instruct_type": null
673
- },
674
- "pricing": {
675
- "prompt": "0",
676
- "completion": "0"
677
- },
678
- "top_provider": {
679
- "context_length": 128000,
680
- "max_completion_tokens": null,
681
- "is_moderated": false
682
- },
683
- "per_request_limits": null,
684
- "supported_parameters": [
685
- "include_reasoning",
686
- "max_tokens",
687
- "reasoning",
688
- "response_format",
689
- "structured_outputs",
690
- "temperature",
691
- "tool_choice",
692
- "tools"
693
- ],
694
- "default_parameters": {
695
- "temperature": null,
696
- "top_p": null,
697
- "frequency_penalty": null
698
- },
699
- "expiration_date": "2026-03-22"
700
- },
701
- {
702
- "id": "arcee-ai/trinity-large-preview:free",
703
- "canonical_slug": "arcee-ai/trinity-large-preview",
704
- "hugging_face_id": "arcee-ai/Trinity-Large-Preview",
705
- "name": "Arcee AI: Trinity Large Preview (free)",
706
- "created": 1769552670,
707
- "description": "Trinity-Large-Preview is a frontier-scale open-weight language model from Arcee, built as a 400B-parameter sparse Mixture-of-Experts with 13B active parameters per token using 4-of-256 expert routing. \n\nIt excels in creative writing, storytelling, role-play, chat scenarios, and real-time voice assistance, better than your average reasoning model usually can. But we\u2019re also introducing some of our newer agentic performance. It was trained to navigate well in agent harnesses like OpenCode, Cline, and Kilo Code, and to handle complex toolchains and long, constraint-filled prompts. \n\nThe architecture natively supports very long context windows up to 512k tokens, with the Preview API currently served at 128k context using 8-bit quantization for practical deployment. Trinity-Large-Preview reflects Arcee\u2019s efficiency-first design philosophy, offering a production-oriented frontier model with open weights and permissive licensing suitable for real-world applications and experimentation.",
708
- "context_length": 131000,
709
- "architecture": {
710
- "modality": "text->text",
711
- "input_modalities": [
712
- "text"
713
- ],
714
- "output_modalities": [
715
- "text"
716
- ],
717
- "tokenizer": "Other",
718
- "instruct_type": null
719
- },
720
- "pricing": {
721
- "prompt": "0",
722
- "completion": "0",
723
- "request": "0",
724
- "image": "0",
725
- "web_search": "0",
726
- "internal_reasoning": "0"
727
- },
728
- "top_provider": {
729
- "context_length": 131000,
815
+ "context_length": 131000,
730
816
  "max_completion_tokens": null,
731
817
  "is_moderated": false
732
818
  },
@@ -743,9 +829,16 @@
743
829
  "default_parameters": {
744
830
  "temperature": 0.8,
745
831
  "top_p": 0.8,
746
- "frequency_penalty": null
747
- },
748
- "expiration_date": null
832
+ "top_k": null,
833
+ "frequency_penalty": null,
834
+ "presence_penalty": null,
835
+ "repetition_penalty": null
836
+ },
837
+ "knowledge_cutoff": null,
838
+ "expiration_date": null,
839
+ "links": {
840
+ "details": "/api/v1/models/arcee-ai/trinity-large-preview/endpoints"
841
+ }
749
842
  },
750
843
  {
751
844
  "id": "meta-llama/llama-3.3-70b-instruct:free",
@@ -753,8 +846,8 @@
753
846
  "hugging_face_id": "meta-llama/Llama-3.3-70B-Instruct",
754
847
  "name": "Meta: Llama 3.3 70B Instruct (free)",
755
848
  "created": 1733506137,
756
- "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)",
757
- "context_length": 128000,
849
+ "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model...",
850
+ "context_length": 65536,
758
851
  "architecture": {
759
852
  "modality": "text->text",
760
853
  "input_modalities": [
@@ -771,16 +864,15 @@
771
864
  "completion": "0"
772
865
  },
773
866
  "top_provider": {
774
- "context_length": 128000,
775
- "max_completion_tokens": 128000,
776
- "is_moderated": true
867
+ "context_length": 65536,
868
+ "max_completion_tokens": null,
869
+ "is_moderated": false
777
870
  },
778
871
  "per_request_limits": null,
779
872
  "supported_parameters": [
780
873
  "frequency_penalty",
781
874
  "max_tokens",
782
875
  "presence_penalty",
783
- "seed",
784
876
  "stop",
785
877
  "temperature",
786
878
  "tool_choice",
@@ -789,26 +881,29 @@
789
881
  "top_p"
790
882
  ],
791
883
  "default_parameters": {},
792
- "expiration_date": null
884
+ "knowledge_cutoff": "2023-12-31",
885
+ "expiration_date": null,
886
+ "links": {
887
+ "details": "/api/v1/models/meta-llama/llama-3.3-70b-instruct/endpoints"
888
+ }
793
889
  },
794
890
  {
795
- "id": "mistralai/mistral-small-3.1-24b-instruct:free",
796
- "canonical_slug": "mistralai/mistral-small-3.1-24b-instruct-2503",
797
- "hugging_face_id": "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
798
- "name": "Mistral: Mistral Small 3.1 24B (free)",
799
- "created": 1742238937,
800
- "description": "Mistral Small 3.1 24B Instruct is an upgraded variant of Mistral Small 3 (2501), featuring 24 billion parameters with advanced multimodal capabilities. It provides state-of-the-art performance in text-based reasoning and vision tasks, including image analysis, programming, mathematical reasoning, and multilingual support across dozens of languages. Equipped with an extensive 128k token context window and optimized for efficient local inference, it supports use cases such as conversational agents, function calling, long-document comprehension, and privacy-sensitive deployments. The updated version is [Mistral Small 3.2](mistralai/mistral-small-3.2-24b-instruct)",
801
- "context_length": 128000,
891
+ "id": "openai/gpt-oss-120b:free",
892
+ "canonical_slug": "openai/gpt-oss-120b",
893
+ "hugging_face_id": "openai/gpt-oss-120b",
894
+ "name": "OpenAI: gpt-oss-120b (free)",
895
+ "created": 1754414231,
896
+ "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized...",
897
+ "context_length": 131072,
802
898
  "architecture": {
803
- "modality": "text+image->text",
899
+ "modality": "text->text",
804
900
  "input_modalities": [
805
- "text",
806
- "image"
901
+ "text"
807
902
  ],
808
903
  "output_modalities": [
809
904
  "text"
810
905
  ],
811
- "tokenizer": "Mistral",
906
+ "tokenizer": "GPT",
812
907
  "instruct_type": null
813
908
  },
814
909
  "pricing": {
@@ -816,37 +911,40 @@
816
911
  "completion": "0"
817
912
  },
818
913
  "top_provider": {
819
- "context_length": 128000,
820
- "max_completion_tokens": null,
821
- "is_moderated": false
914
+ "context_length": 131072,
915
+ "max_completion_tokens": 131072,
916
+ "is_moderated": true
822
917
  },
823
918
  "per_request_limits": null,
824
919
  "supported_parameters": [
825
- "frequency_penalty",
920
+ "include_reasoning",
826
921
  "max_tokens",
827
- "presence_penalty",
828
- "response_format",
922
+ "reasoning",
923
+ "seed",
829
924
  "stop",
830
- "structured_outputs",
831
925
  "temperature",
832
926
  "tool_choice",
833
- "tools",
834
- "top_k",
835
- "top_p"
927
+ "tools"
836
928
  ],
837
929
  "default_parameters": {
838
- "temperature": 0.3
930
+ "temperature": null,
931
+ "top_p": null,
932
+ "frequency_penalty": null
839
933
  },
840
- "expiration_date": null
934
+ "knowledge_cutoff": "2024-06-30",
935
+ "expiration_date": null,
936
+ "links": {
937
+ "details": "/api/v1/models/openai/gpt-oss-120b/endpoints"
938
+ }
841
939
  },
842
940
  {
843
- "id": "google/gemma-3n-e2b-it:free",
844
- "canonical_slug": "google/gemma-3n-e2b-it",
845
- "hugging_face_id": "google/gemma-3n-E2B-it",
846
- "name": "Google: Gemma 3n 2B (free)",
847
- "created": 1752074904,
848
- "description": "Gemma 3n E2B IT is a multimodal, instruction-tuned model developed by Google DeepMind, designed to operate efficiently at an effective parameter size of 2B while leveraging a 6B architecture. Based on the MatFormer architecture, it supports nested submodels and modular composition via the Mix-and-Match framework. Gemma 3n models are optimized for low-resource deployment, offering 32K context length and strong multilingual and reasoning performance across common benchmarks. This variant is trained on a diverse corpus including code, math, web, and multimodal data.",
849
- "context_length": 8192,
941
+ "id": "openai/gpt-oss-20b:free",
942
+ "canonical_slug": "openai/gpt-oss-20b",
943
+ "hugging_face_id": "openai/gpt-oss-20b",
944
+ "name": "OpenAI: gpt-oss-20b (free)",
945
+ "created": 1754414229,
946
+ "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for...",
947
+ "context_length": 131072,
850
948
  "architecture": {
851
949
  "modality": "text->text",
852
950
  "input_modalities": [
@@ -855,87 +953,48 @@
855
953
  "output_modalities": [
856
954
  "text"
857
955
  ],
858
- "tokenizer": "Other",
956
+ "tokenizer": "GPT",
859
957
  "instruct_type": null
860
958
  },
861
959
  "pricing": {
862
960
  "prompt": "0",
863
961
  "completion": "0"
864
962
  },
865
- "top_provider": {
866
- "context_length": 8192,
867
- "max_completion_tokens": 2048,
868
- "is_moderated": false
869
- },
870
- "per_request_limits": null,
871
- "supported_parameters": [
872
- "frequency_penalty",
873
- "max_tokens",
874
- "presence_penalty",
875
- "response_format",
876
- "seed",
877
- "stop",
878
- "temperature",
879
- "top_p"
880
- ],
881
- "default_parameters": {},
882
- "expiration_date": null
883
- },
884
- {
885
- "id": "google/gemma-3-27b-it:free",
886
- "canonical_slug": "google/gemma-3-27b-it",
887
- "hugging_face_id": "google/gemma-3-27b-it",
888
- "name": "Google: Gemma 3 27B (free)",
889
- "created": 1741756359,
890
- "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)",
891
- "context_length": 131072,
892
- "architecture": {
893
- "modality": "text+image->text",
894
- "input_modalities": [
895
- "text",
896
- "image"
897
- ],
898
- "output_modalities": [
899
- "text"
900
- ],
901
- "tokenizer": "Gemini",
902
- "instruct_type": "gemma"
903
- },
904
- "pricing": {
905
- "prompt": "0",
906
- "completion": "0"
907
- },
908
963
  "top_provider": {
909
964
  "context_length": 131072,
910
- "max_completion_tokens": 8192,
911
- "is_moderated": false
965
+ "max_completion_tokens": 131072,
966
+ "is_moderated": true
912
967
  },
913
968
  "per_request_limits": null,
914
969
  "supported_parameters": [
970
+ "include_reasoning",
915
971
  "max_tokens",
916
- "response_format",
972
+ "reasoning",
917
973
  "seed",
918
974
  "stop",
919
975
  "temperature",
920
976
  "tool_choice",
921
- "tools",
922
- "top_p"
977
+ "tools"
923
978
  ],
924
979
  "default_parameters": {
925
980
  "temperature": null,
926
981
  "top_p": null,
927
982
  "frequency_penalty": null
928
983
  },
929
- "expiration_date": null
984
+ "knowledge_cutoff": "2024-06-30",
985
+ "expiration_date": null,
986
+ "links": {
987
+ "details": "/api/v1/models/openai/gpt-oss-20b/endpoints"
988
+ }
930
989
  },
931
990
  {
932
- "id": "qwen/qwen3-4b:free",
933
- "canonical_slug": "qwen/qwen3-4b-04-28",
934
- "hugging_face_id": "Qwen/Qwen3-4B",
935
- "name": "Qwen: Qwen3 4B (free)",
936
- "created": 1746031104,
937
- "description": "Qwen3-4B is a 4 billion parameter dense language model from the Qwen3 series, designed to support both general-purpose and reasoning-intensive tasks. It introduces a dual-mode architecture\u2014thinking and non-thinking\u2014allowing dynamic switching between high-precision logical reasoning and efficient dialogue generation. This makes it well-suited for multi-turn chat, instruction following, and complex agent workflows.",
938
- "context_length": 40960,
991
+ "id": "meta-llama/llama-3.2-3b-instruct:free",
992
+ "canonical_slug": "meta-llama/llama-3.2-3b-instruct",
993
+ "hugging_face_id": "meta-llama/Llama-3.2-3B-Instruct",
994
+ "name": "Meta: Llama 3.2 3B Instruct (free)",
995
+ "created": 1727222400,
996
+ "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it...",
997
+ "context_length": 131072,
939
998
  "architecture": {
940
999
  "modality": "text->text",
941
1000
  "input_modalities": [
@@ -944,45 +1003,43 @@
944
1003
  "output_modalities": [
945
1004
  "text"
946
1005
  ],
947
- "tokenizer": "Qwen3",
948
- "instruct_type": "qwen3"
1006
+ "tokenizer": "Llama3",
1007
+ "instruct_type": "llama3"
949
1008
  },
950
1009
  "pricing": {
951
1010
  "prompt": "0",
952
1011
  "completion": "0"
953
1012
  },
954
1013
  "top_provider": {
955
- "context_length": 40960,
1014
+ "context_length": 131072,
956
1015
  "max_completion_tokens": null,
957
1016
  "is_moderated": false
958
1017
  },
959
1018
  "per_request_limits": null,
960
1019
  "supported_parameters": [
961
1020
  "frequency_penalty",
962
- "include_reasoning",
963
1021
  "max_tokens",
964
1022
  "presence_penalty",
965
- "reasoning",
966
- "response_format",
967
1023
  "stop",
968
- "structured_outputs",
969
1024
  "temperature",
970
- "tool_choice",
971
- "tools",
972
1025
  "top_k",
973
1026
  "top_p"
974
1027
  ],
975
1028
  "default_parameters": {},
976
- "expiration_date": null
1029
+ "knowledge_cutoff": "2023-12-31",
1030
+ "expiration_date": null,
1031
+ "links": {
1032
+ "details": "/api/v1/models/meta-llama/llama-3.2-3b-instruct/endpoints"
1033
+ }
977
1034
  },
978
1035
  {
979
- "id": "google/gemma-3n-e4b-it:free",
980
- "canonical_slug": "google/gemma-3n-e4b-it",
981
- "hugging_face_id": "google/gemma-3n-E4B-it",
982
- "name": "Google: Gemma 3n 4B (free)",
983
- "created": 1747776824,
984
- "description": "Gemma 3n E4B-it is optimized for efficient execution on mobile and low-resource devices, such as phones, laptops, and tablets. It supports multimodal inputs\u2014including text, visual data, and audio\u2014enabling diverse tasks such as text generation, speech recognition, translation, and image analysis. Leveraging innovations like Per-Layer Embedding (PLE) caching and the MatFormer architecture, Gemma 3n dynamically manages memory usage and computational load by selectively activating model parameters, significantly reducing runtime resource requirements.\n\nThis model supports a wide linguistic range (trained in over 140 languages) and features a flexible 32K token context window. Gemma 3n can selectively load parameters, optimizing memory and computational efficiency based on the task or device capabilities, making it well-suited for privacy-focused, offline-capable applications and on-device AI solutions. [Read more in the blog post](https://developers.googleblog.com/en/introducing-gemma-3n/)",
985
- "context_length": 8192,
1036
+ "id": "cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
1037
+ "canonical_slug": "venice/uncensored",
1038
+ "hugging_face_id": "cognitivecomputations/Dolphin-Mistral-24B-Venice-Edition",
1039
+ "name": "Venice: Uncensored (free)",
1040
+ "created": 1752094966,
1041
+ "description": "Venice Uncensored Dolphin Mistral 24B Venice Edition is a fine-tuned variant of Mistral-Small-24B-Instruct-2501, developed by dphn.ai in collaboration with Venice.ai. This model is designed as an \u201cuncensored\u201d instruct-tuned LLM, preserving...",
1042
+ "context_length": 32768,
986
1043
  "architecture": {
987
1044
  "modality": "text->text",
988
1045
  "input_modalities": [
@@ -999,8 +1056,8 @@
999
1056
  "completion": "0"
1000
1057
  },
1001
1058
  "top_provider": {
1002
- "context_length": 8192,
1003
- "max_completion_tokens": 2048,
1059
+ "context_length": 32768,
1060
+ "max_completion_tokens": null,
1004
1061
  "is_moderated": false
1005
1062
  },
1006
1063
  "per_request_limits": null,
@@ -1009,32 +1066,38 @@
1009
1066
  "max_tokens",
1010
1067
  "presence_penalty",
1011
1068
  "response_format",
1012
- "seed",
1013
1069
  "stop",
1070
+ "structured_outputs",
1014
1071
  "temperature",
1072
+ "top_k",
1015
1073
  "top_p"
1016
1074
  ],
1017
1075
  "default_parameters": {},
1018
- "expiration_date": null
1076
+ "knowledge_cutoff": "2024-04-30",
1077
+ "expiration_date": null,
1078
+ "links": {
1079
+ "details": "/api/v1/models/venice/uncensored/endpoints"
1080
+ }
1019
1081
  },
1020
1082
  {
1021
- "id": "openai/gpt-oss-120b:free",
1022
- "canonical_slug": "openai/gpt-oss-120b",
1023
- "hugging_face_id": "openai/gpt-oss-120b",
1024
- "name": "OpenAI: gpt-oss-120b (free)",
1025
- "created": 1754414231,
1026
- "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.",
1083
+ "id": "google/gemma-3-27b-it:free",
1084
+ "canonical_slug": "google/gemma-3-27b-it",
1085
+ "hugging_face_id": "google/gemma-3-27b-it",
1086
+ "name": "Google: Gemma 3 27B (free)",
1087
+ "created": 1741756359,
1088
+ "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities,...",
1027
1089
  "context_length": 131072,
1028
1090
  "architecture": {
1029
- "modality": "text->text",
1091
+ "modality": "text+image->text",
1030
1092
  "input_modalities": [
1031
- "text"
1093
+ "text",
1094
+ "image"
1032
1095
  ],
1033
1096
  "output_modalities": [
1034
1097
  "text"
1035
1098
  ],
1036
- "tokenizer": "GPT",
1037
- "instruct_type": null
1099
+ "tokenizer": "Gemini",
1100
+ "instruct_type": "gemma"
1038
1101
  },
1039
1102
  "pricing": {
1040
1103
  "prompt": "0",
@@ -1042,34 +1105,36 @@
1042
1105
  },
1043
1106
  "top_provider": {
1044
1107
  "context_length": 131072,
1045
- "max_completion_tokens": 131072,
1046
- "is_moderated": true
1108
+ "max_completion_tokens": 8192,
1109
+ "is_moderated": false
1047
1110
  },
1048
1111
  "per_request_limits": null,
1049
1112
  "supported_parameters": [
1050
- "include_reasoning",
1051
1113
  "max_tokens",
1052
- "reasoning",
1114
+ "response_format",
1053
1115
  "seed",
1054
1116
  "stop",
1055
1117
  "temperature",
1056
- "tool_choice",
1057
- "tools"
1118
+ "top_p"
1058
1119
  ],
1059
1120
  "default_parameters": {
1060
1121
  "temperature": null,
1061
1122
  "top_p": null,
1062
1123
  "frequency_penalty": null
1063
1124
  },
1064
- "expiration_date": null
1125
+ "knowledge_cutoff": "2024-08-31",
1126
+ "expiration_date": null,
1127
+ "links": {
1128
+ "details": "/api/v1/models/google/gemma-3-27b-it/endpoints"
1129
+ }
1065
1130
  },
1066
1131
  {
1067
- "id": "openai/gpt-oss-20b:free",
1068
- "canonical_slug": "openai/gpt-oss-20b",
1069
- "hugging_face_id": "openai/gpt-oss-20b",
1070
- "name": "OpenAI: gpt-oss-20b (free)",
1071
- "created": 1754414229,
1072
- "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI\u2019s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.",
1132
+ "id": "z-ai/glm-4.5-air:free",
1133
+ "canonical_slug": "z-ai/glm-4.5-air",
1134
+ "hugging_face_id": "zai-org/GLM-4.5-Air",
1135
+ "name": "Z.ai: GLM 4.5 Air (free)",
1136
+ "created": 1753471258,
1137
+ "description": "GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications. Like GLM-4.5, it adopts the Mixture-of-Experts (MoE) architecture but with a more compact parameter...",
1073
1138
  "context_length": 131072,
1074
1139
  "architecture": {
1075
1140
  "modality": "text->text",
@@ -1079,7 +1144,7 @@
1079
1144
  "output_modalities": [
1080
1145
  "text"
1081
1146
  ],
1082
- "tokenizer": "GPT",
1147
+ "tokenizer": "Other",
1083
1148
  "instruct_type": null
1084
1149
  },
1085
1150
  "pricing": {
@@ -1088,35 +1153,38 @@
1088
1153
  },
1089
1154
  "top_provider": {
1090
1155
  "context_length": 131072,
1091
- "max_completion_tokens": 131072,
1092
- "is_moderated": true
1156
+ "max_completion_tokens": 96000,
1157
+ "is_moderated": false
1093
1158
  },
1094
1159
  "per_request_limits": null,
1095
1160
  "supported_parameters": [
1096
1161
  "include_reasoning",
1097
1162
  "max_tokens",
1098
1163
  "reasoning",
1099
- "seed",
1100
- "stop",
1101
1164
  "temperature",
1102
1165
  "tool_choice",
1103
- "tools"
1166
+ "tools",
1167
+ "top_p"
1104
1168
  ],
1105
1169
  "default_parameters": {
1106
- "temperature": null,
1170
+ "temperature": 0.75,
1107
1171
  "top_p": null,
1108
1172
  "frequency_penalty": null
1109
1173
  },
1110
- "expiration_date": null
1174
+ "knowledge_cutoff": "2024-12-31",
1175
+ "expiration_date": null,
1176
+ "links": {
1177
+ "details": "/api/v1/models/z-ai/glm-4.5-air/endpoints"
1178
+ }
1111
1179
  },
1112
1180
  {
1113
- "id": "cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
1114
- "canonical_slug": "venice/uncensored",
1115
- "hugging_face_id": "cognitivecomputations/Dolphin-Mistral-24B-Venice-Edition",
1116
- "name": "Venice: Uncensored (free)",
1117
- "created": 1752094966,
1118
- "description": "Venice Uncensored Dolphin Mistral 24B Venice Edition is a fine-tuned variant of Mistral-Small-24B-Instruct-2501, developed by dphn.ai in collaboration with Venice.ai. This model is designed as an \u201cuncensored\u201d instruct-tuned LLM, preserving user control over alignment, system prompts, and behavior. Intended for advanced and unrestricted use cases, Venice Uncensored emphasizes steerability and transparent behavior, removing default safety and alignment layers typically found in mainstream assistant models.",
1119
- "context_length": 32768,
1181
+ "id": "google/gemma-3n-e2b-it:free",
1182
+ "canonical_slug": "google/gemma-3n-e2b-it",
1183
+ "hugging_face_id": "google/gemma-3n-E2B-it",
1184
+ "name": "Google: Gemma 3n 2B (free)",
1185
+ "created": 1752074904,
1186
+ "description": "Gemma 3n E2B IT is a multimodal, instruction-tuned model developed by Google DeepMind, designed to operate efficiently at an effective parameter size of 2B while leveraging a 6B architecture. Based...",
1187
+ "context_length": 8192,
1120
1188
  "architecture": {
1121
1189
  "modality": "text->text",
1122
1190
  "input_modalities": [
@@ -1133,74 +1201,78 @@
1133
1201
  "completion": "0"
1134
1202
  },
1135
1203
  "top_provider": {
1136
- "context_length": 32768,
1137
- "max_completion_tokens": null,
1204
+ "context_length": 8192,
1205
+ "max_completion_tokens": 2048,
1138
1206
  "is_moderated": false
1139
1207
  },
1140
1208
  "per_request_limits": null,
1141
1209
  "supported_parameters": [
1142
- "frequency_penalty",
1143
1210
  "max_tokens",
1144
- "presence_penalty",
1145
1211
  "response_format",
1146
- "stop",
1147
- "structured_outputs",
1212
+ "seed",
1148
1213
  "temperature",
1149
- "top_k",
1150
1214
  "top_p"
1151
1215
  ],
1152
1216
  "default_parameters": {},
1153
- "expiration_date": null
1217
+ "knowledge_cutoff": "2024-08-31",
1218
+ "expiration_date": null,
1219
+ "links": {
1220
+ "details": "/api/v1/models/google/gemma-3n-e2b-it/endpoints"
1221
+ }
1154
1222
  },
1155
1223
  {
1156
- "id": "meta-llama/llama-3.2-3b-instruct:free",
1157
- "canonical_slug": "meta-llama/llama-3.2-3b-instruct",
1158
- "hugging_face_id": "meta-llama/Llama-3.2-3B-Instruct",
1159
- "name": "Meta: Llama 3.2 3B Instruct (free)",
1160
- "created": 1727222400,
1161
- "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).",
1162
- "context_length": 131072,
1224
+ "id": "google/gemma-3-4b-it:free",
1225
+ "canonical_slug": "google/gemma-3-4b-it",
1226
+ "hugging_face_id": "google/gemma-3-4b-it",
1227
+ "name": "Google: Gemma 3 4B (free)",
1228
+ "created": 1741905510,
1229
+ "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities,...",
1230
+ "context_length": 32768,
1163
1231
  "architecture": {
1164
- "modality": "text->text",
1232
+ "modality": "text+image->text",
1165
1233
  "input_modalities": [
1166
- "text"
1234
+ "text",
1235
+ "image"
1167
1236
  ],
1168
1237
  "output_modalities": [
1169
1238
  "text"
1170
1239
  ],
1171
- "tokenizer": "Llama3",
1172
- "instruct_type": "llama3"
1240
+ "tokenizer": "Gemini",
1241
+ "instruct_type": "gemma"
1173
1242
  },
1174
1243
  "pricing": {
1175
1244
  "prompt": "0",
1176
1245
  "completion": "0"
1177
1246
  },
1178
1247
  "top_provider": {
1179
- "context_length": 131072,
1180
- "max_completion_tokens": null,
1248
+ "context_length": 32768,
1249
+ "max_completion_tokens": 8192,
1181
1250
  "is_moderated": false
1182
1251
  },
1183
1252
  "per_request_limits": null,
1184
1253
  "supported_parameters": [
1185
- "frequency_penalty",
1186
1254
  "max_tokens",
1187
- "presence_penalty",
1255
+ "response_format",
1256
+ "seed",
1188
1257
  "stop",
1189
1258
  "temperature",
1190
- "top_k",
1191
1259
  "top_p"
1192
1260
  ],
1193
1261
  "default_parameters": {},
1194
- "expiration_date": null
1262
+ "knowledge_cutoff": "2024-08-31",
1263
+ "expiration_date": null,
1264
+ "links": {
1265
+ "details": "/api/v1/models/google/gemma-3-4b-it/endpoints"
1266
+ }
1195
1267
  },
1196
1268
  {
1197
- "id": "z-ai/glm-4.5-air:free",
1198
- "canonical_slug": "z-ai/glm-4.5-air",
1199
- "hugging_face_id": "zai-org/GLM-4.5-Air",
1200
- "name": "Z.ai: GLM 4.5 Air (free)",
1201
- "created": 1753471258,
1202
- "description": "GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications. Like GLM-4.5, it adopts the Mixture-of-Experts (MoE) architecture but with a more compact parameter size. GLM-4.5-Air also supports hybrid inference modes, offering a \"thinking mode\" for advanced reasoning and tool use, and a \"non-thinking mode\" for real-time interaction. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)",
1203
- "context_length": 131072,
1269
+ "id": "google/gemma-3n-e4b-it:free",
1270
+ "canonical_slug": "google/gemma-3n-e4b-it",
1271
+ "hugging_face_id": "google/gemma-3n-E4B-it",
1272
+ "name": "Google: Gemma 3n 4B (free)",
1273
+ "created": 1747776824,
1274
+ "description": "Gemma 3n E4B-it is optimized for efficient execution on mobile and low-resource devices, such as phones, laptops, and tablets. It supports multimodal inputs\u2014including text, visual data, and audio\u2014enabling diverse tasks...",
1275
+ "context_length": 8192,
1204
1276
  "architecture": {
1205
1277
  "modality": "text->text",
1206
1278
  "input_modalities": [
@@ -1217,54 +1289,8 @@
1217
1289
  "completion": "0"
1218
1290
  },
1219
1291
  "top_provider": {
1220
- "context_length": 131072,
1221
- "max_completion_tokens": 96000,
1222
- "is_moderated": false
1223
- },
1224
- "per_request_limits": null,
1225
- "supported_parameters": [
1226
- "include_reasoning",
1227
- "max_tokens",
1228
- "reasoning",
1229
- "temperature",
1230
- "tool_choice",
1231
- "tools",
1232
- "top_p"
1233
- ],
1234
- "default_parameters": {
1235
- "temperature": 0.75,
1236
- "top_p": null,
1237
- "frequency_penalty": null
1238
- },
1239
- "expiration_date": null
1240
- },
1241
- {
1242
- "id": "google/gemma-3-4b-it:free",
1243
- "canonical_slug": "google/gemma-3-4b-it",
1244
- "hugging_face_id": "google/gemma-3-4b-it",
1245
- "name": "Google: Gemma 3 4B (free)",
1246
- "created": 1741905510,
1247
- "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling.",
1248
- "context_length": 32768,
1249
- "architecture": {
1250
- "modality": "text+image->text",
1251
- "input_modalities": [
1252
- "text",
1253
- "image"
1254
- ],
1255
- "output_modalities": [
1256
- "text"
1257
- ],
1258
- "tokenizer": "Gemini",
1259
- "instruct_type": "gemma"
1260
- },
1261
- "pricing": {
1262
- "prompt": "0",
1263
- "completion": "0"
1264
- },
1265
- "top_provider": {
1266
- "context_length": 32768,
1267
- "max_completion_tokens": 8192,
1292
+ "context_length": 8192,
1293
+ "max_completion_tokens": 2048,
1268
1294
  "is_moderated": false
1269
1295
  },
1270
1296
  "per_request_limits": null,
@@ -1272,12 +1298,15 @@
1272
1298
  "max_tokens",
1273
1299
  "response_format",
1274
1300
  "seed",
1275
- "stop",
1276
1301
  "temperature",
1277
1302
  "top_p"
1278
1303
  ],
1279
1304
  "default_parameters": {},
1280
- "expiration_date": null
1305
+ "knowledge_cutoff": "2024-08-31",
1306
+ "expiration_date": null,
1307
+ "links": {
1308
+ "details": "/api/v1/models/google/gemma-3n-e4b-it/endpoints"
1309
+ }
1281
1310
  },
1282
1311
  {
1283
1312
  "id": "google/gemma-3-12b-it:free",
@@ -1285,7 +1314,7 @@
1285
1314
  "hugging_face_id": "google/gemma-3-12b-it",
1286
1315
  "name": "Google: Gemma 3 12B (free)",
1287
1316
  "created": 1741902625,
1288
- "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 12B is the second largest in the family of Gemma 3 models after [Gemma 3 27B](google/gemma-3-27b-it)",
1317
+ "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities,...",
1289
1318
  "context_length": 32768,
1290
1319
  "architecture": {
1291
1320
  "modality": "text+image->text",
@@ -1317,7 +1346,11 @@
1317
1346
  "top_p"
1318
1347
  ],
1319
1348
  "default_parameters": {},
1320
- "expiration_date": null
1349
+ "knowledge_cutoff": "2024-08-31",
1350
+ "expiration_date": null,
1351
+ "links": {
1352
+ "details": "/api/v1/models/google/gemma-3-12b-it/endpoints"
1353
+ }
1321
1354
  },
1322
1355
  {
1323
1356
  "id": "nousresearch/hermes-3-llama-3.1-405b:free",
@@ -1325,7 +1358,7 @@
1325
1358
  "hugging_face_id": "NousResearch/Hermes-3-Llama-3.1-405B",
1326
1359
  "name": "Nous: Hermes 3 405B Instruct (free)",
1327
1360
  "created": 1723766400,
1328
- "description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 405B is a frontier-level, full-parameter finetune of the Llama-3.1 405B foundation model, focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.\n\nHermes 3 is competitive, if not superior, to Llama-3.1 Instruct models at general capabilities, with varying strengths and weaknesses attributable between the two.",
1361
+ "description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the...",
1329
1362
  "context_length": 131072,
1330
1363
  "architecture": {
1331
1364
  "modality": "text->text",
@@ -1358,7 +1391,11 @@
1358
1391
  "top_p"
1359
1392
  ],
1360
1393
  "default_parameters": {},
1361
- "expiration_date": null
1394
+ "knowledge_cutoff": "2023-12-31",
1395
+ "expiration_date": null,
1396
+ "links": {
1397
+ "details": "/api/v1/models/nousresearch/hermes-3-llama-3.1-405b/endpoints"
1398
+ }
1362
1399
  }
1363
1400
  ]
1364
1401
  }