recoder-code 1.0.113
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.babelrc +4 -0
- package/.claude/commands/commit-push-pr.md +19 -0
- package/.claude/commands/dedupe.md +38 -0
- package/.devcontainer/Dockerfile +91 -0
- package/.devcontainer/devcontainer.json +57 -0
- package/.devcontainer/init-firewall.sh +137 -0
- package/.gitattributes +2 -0
- package/.github/ISSUE_TEMPLATE/bug_report.yml +188 -0
- package/.github/ISSUE_TEMPLATE/config.yml +17 -0
- package/.github/ISSUE_TEMPLATE/documentation.yml +117 -0
- package/.github/ISSUE_TEMPLATE/feature_request.yml +132 -0
- package/.github/ISSUE_TEMPLATE/model_behavior.yml +220 -0
- package/.github/workflows/auto-close-duplicates.yml +31 -0
- package/.github/workflows/backfill-duplicate-comments.yml +44 -0
- package/.github/workflows/claude-dedupe-issues.yml +80 -0
- package/.github/workflows/claude-issue-triage.yml +106 -0
- package/.github/workflows/claude.yml +37 -0
- package/.github/workflows/issue-opened-dispatch.yml +28 -0
- package/.github/workflows/lock-closed-issues.yml +92 -0
- package/.github/workflows/log-issue-events.yml +40 -0
- package/CHANGELOG.md +646 -0
- package/KILO.md +1273 -0
- package/LICENSE.md +21 -0
- package/README.md +176 -0
- package/SECURITY.md +12 -0
- package/Script/run_devcontainer_claude_code.ps1 +152 -0
- package/api/githubApi.ts +144 -0
- package/babel.config.js +7 -0
- package/cli/.gitkeep +0 -0
- package/cli/auto-close-duplicates.ts +5 -0
- package/cli/configure.js +33 -0
- package/cli/list-models.js +48 -0
- package/cli/run.js +61 -0
- package/cli/set-api-key.js +26 -0
- package/config.json +4 -0
- package/demo.gif +0 -0
- package/examples/gpt-3.5-turbo.js +38 -0
- package/examples/gpt-4.js +38 -0
- package/examples/hooks/bash_command_validator_example.py +83 -0
- package/index.d.ts +3 -0
- package/index.js +62 -0
- package/jest.config.js +6 -0
- package/openapi.yaml +61 -0
- package/package.json +47 -0
- package/scripts/backfill-duplicate-comments.ts +213 -0
- package/tests/api-githubApi.test.ts +30 -0
- package/tests/auto-close-duplicates.test.ts +145 -0
- package/tests/cli-configure.test.ts +88 -0
- package/tests/cli-list-models.test.ts +44 -0
- package/tests/cli-run.test.ts +97 -0
- package/tests/cli-set-api-key.test.ts +54 -0
- package/tests/cli-validate-api-key.test.ts +52 -0
- package/tsconfig.json +18 -0
package/KILO.md
ADDED
|
@@ -0,0 +1,1273 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: Quickstart
|
|
3
|
+
subtitle: Get started with OpenRouter
|
|
4
|
+
slug: quickstart
|
|
5
|
+
headline: OpenRouter Quickstart Guide | Developer Documentation
|
|
6
|
+
canonical-url: 'https://openrouter.ai/docs/quickstart'
|
|
7
|
+
'og:site_name': OpenRouter Documentation
|
|
8
|
+
'og:title': OpenRouter Quickstart Guide
|
|
9
|
+
'og:description': >-
|
|
10
|
+
Get started with OpenRouter's unified API for hundreds of AI models. Learn how
|
|
11
|
+
to integrate using OpenAI SDK, direct API calls, or third-party frameworks.
|
|
12
|
+
'og:image':
|
|
13
|
+
type: url
|
|
14
|
+
value: >-
|
|
15
|
+
https://openrouter.ai/dynamic-og?pathname=quickstart&title=Quick%20Start&description=Start%20using%20OpenRouter%20API%20in%20minutes%20with%20any%20SDK
|
|
16
|
+
'og:image:width': 1200
|
|
17
|
+
'og:image:height': 630
|
|
18
|
+
'twitter:card': summary_large_image
|
|
19
|
+
'twitter:site': '@OpenRouterAI'
|
|
20
|
+
noindex: false
|
|
21
|
+
nofollow: false
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
OpenRouter provides a unified API that gives you access to hundreds of AI models through a single endpoint, while automatically handling fallbacks and selecting the most cost-effective options. Get started with just a few lines of code using your preferred SDK or framework.
|
|
25
|
+
|
|
26
|
+
<Tip>
|
|
27
|
+
Looking for information about free models and rate limits? Please see the [FAQ](/docs/faq#how-are-rate-limits-calculated)
|
|
28
|
+
</Tip>
|
|
29
|
+
|
|
30
|
+
In the examples below, the OpenRouter-specific headers are optional. Setting them allows your app to appear on the OpenRouter leaderboards. For detailed information about app attribution, see our [App Attribution guide](/docs/features/app-attribution).
|
|
31
|
+
|
|
32
|
+
## Using the OpenAI SDK
|
|
33
|
+
|
|
34
|
+
<CodeGroup>
|
|
35
|
+
|
|
36
|
+
```python title="Python"
|
|
37
|
+
from openai import OpenAI
|
|
38
|
+
|
|
39
|
+
client = OpenAI(
|
|
40
|
+
base_url="https://openrouter.ai/api/v1",
|
|
41
|
+
api_key="<OPENROUTER_API_KEY>",
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
completion = client.chat.completions.create(
|
|
45
|
+
extra_headers={
|
|
46
|
+
"HTTP-Referer": "<YOUR_SITE_URL>", # Optional. Site URL for rankings on openrouter.ai.
|
|
47
|
+
"X-Title": "<YOUR_SITE_NAME>", # Optional. Site title for rankings on openrouter.ai.
|
|
48
|
+
},
|
|
49
|
+
model="openai/gpt-4o",
|
|
50
|
+
messages=[
|
|
51
|
+
{
|
|
52
|
+
"role": "user",
|
|
53
|
+
"content": "What is the meaning of life?"
|
|
54
|
+
}
|
|
55
|
+
]
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
print(completion.choices[0].message.content)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
```typescript title="TypeScript"
|
|
62
|
+
import OpenAI from 'openai';
|
|
63
|
+
|
|
64
|
+
const openai = new OpenAI({
|
|
65
|
+
baseURL: 'https://openrouter.ai/api/v1',
|
|
66
|
+
apiKey: '<OPENROUTER_API_KEY>',
|
|
67
|
+
defaultHeaders: {
|
|
68
|
+
'HTTP-Referer': '<YOUR_SITE_URL>', // Optional. Site URL for rankings on openrouter.ai.
|
|
69
|
+
'X-Title': '<YOUR_SITE_NAME>', // Optional. Site title for rankings on openrouter.ai.
|
|
70
|
+
},
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
async function main() {
|
|
74
|
+
const completion = await openai.chat.completions.create({
|
|
75
|
+
model: 'openai/gpt-4o',
|
|
76
|
+
messages: [
|
|
77
|
+
{
|
|
78
|
+
role: 'user',
|
|
79
|
+
content: 'What is the meaning of life?',
|
|
80
|
+
},
|
|
81
|
+
],
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
console.log(completion.choices[0].message);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
main();
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
</CodeGroup>
|
|
91
|
+
|
|
92
|
+
## Using the OpenRouter API directly
|
|
93
|
+
|
|
94
|
+
<Tip>
|
|
95
|
+
You can use the interactive [Request Builder](/request-builder) to generate OpenRouter API requests in the language of your choice.
|
|
96
|
+
</Tip>
|
|
97
|
+
|
|
98
|
+
<CodeGroup>
|
|
99
|
+
|
|
100
|
+
```python title="Python"
|
|
101
|
+
import requests
|
|
102
|
+
import json
|
|
103
|
+
|
|
104
|
+
response = requests.post(
|
|
105
|
+
url="https://openrouter.ai/api/v1/chat/completions",
|
|
106
|
+
headers={
|
|
107
|
+
"Authorization": "Bearer <OPENROUTER_API_KEY>",
|
|
108
|
+
"HTTP-Referer": "<YOUR_SITE_URL>", # Optional. Site URL for rankings on openrouter.ai.
|
|
109
|
+
"X-Title": "<YOUR_SITE_NAME>", # Optional. Site title for rankings on openrouter.ai.
|
|
110
|
+
},
|
|
111
|
+
data=json.dumps({
|
|
112
|
+
"model": "openai/gpt-4o", # Optional
|
|
113
|
+
"messages": [
|
|
114
|
+
{
|
|
115
|
+
"role": "user",
|
|
116
|
+
"content": "What is the meaning of life?"
|
|
117
|
+
}
|
|
118
|
+
]
|
|
119
|
+
})
|
|
120
|
+
)
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
```typescript title="TypeScript"
|
|
124
|
+
fetch('https://openrouter.ai/api/v1/chat/completions', {
|
|
125
|
+
method: 'POST',
|
|
126
|
+
headers: {
|
|
127
|
+
Authorization: 'Bearer <OPENROUTER_API_KEY>',
|
|
128
|
+
'HTTP-Referer': '<YOUR_SITE_URL>', // Optional. Site URL for rankings on openrouter.ai.
|
|
129
|
+
'X-Title': '<YOUR_SITE_NAME>', // Optional. Site title for rankings on openrouter.ai.
|
|
130
|
+
'Content-Type': 'application/json',
|
|
131
|
+
},
|
|
132
|
+
body: JSON.stringify({
|
|
133
|
+
model: 'openai/gpt-4o',
|
|
134
|
+
messages: [
|
|
135
|
+
{
|
|
136
|
+
role: 'user',
|
|
137
|
+
content: 'What is the meaning of life?',
|
|
138
|
+
},
|
|
139
|
+
],
|
|
140
|
+
}),
|
|
141
|
+
});
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
```shell title="Shell"
|
|
145
|
+
curl https://openrouter.ai/api/v1/chat/completions \
|
|
146
|
+
-H "Content-Type: application/json" \
|
|
147
|
+
-H "Authorization: Bearer $OPENROUTER_API_KEY" \
|
|
148
|
+
-d '{
|
|
149
|
+
"model": "openai/gpt-4o",
|
|
150
|
+
"messages": [
|
|
151
|
+
{
|
|
152
|
+
"role": "user",
|
|
153
|
+
"content": "What is the meaning of life?"
|
|
154
|
+
}
|
|
155
|
+
]
|
|
156
|
+
}'
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
</CodeGroup>
|
|
160
|
+
|
|
161
|
+
The API also supports [streaming](/docs/api-reference/streaming).
|
|
162
|
+
|
|
163
|
+
## Using third-party SDKs
|
|
164
|
+
|
|
165
|
+
For information about using third-party SDKs and frameworks with OpenRouter, please [see our frameworks documentation.](/docs/community/frameworks-overview)
|
|
166
|
+
---
|
|
167
|
+
title: Model Routing
|
|
168
|
+
subtitle: Dynamically route requests to models
|
|
169
|
+
headline: Model Routing | Dynamic AI Model Selection and Fallback
|
|
170
|
+
canonical-url: 'https://openrouter.ai/docs/features/model-routing'
|
|
171
|
+
'og:site_name': OpenRouter Documentation
|
|
172
|
+
'og:title': Model Routing - Smart Model Selection and Fallback
|
|
173
|
+
'og:description': >-
|
|
174
|
+
Route requests dynamically between AI models. Learn how to use OpenRouter's
|
|
175
|
+
Auto Router and model fallback features for optimal performance and
|
|
176
|
+
reliability.
|
|
177
|
+
'og:image':
|
|
178
|
+
type: url
|
|
179
|
+
value: >-
|
|
180
|
+
https://openrouter.ai/dynamic-og?title=Model%20Routing&description=Dynamic%20AI%20model%20selection%20and%20fallbacks
|
|
181
|
+
'og:image:width': 1200
|
|
182
|
+
'og:image:height': 630
|
|
183
|
+
'twitter:card': summary_large_image
|
|
184
|
+
'twitter:site': '@OpenRouterAI'
|
|
185
|
+
noindex: false
|
|
186
|
+
nofollow: false
|
|
187
|
+
---
|
|
188
|
+
|
|
189
|
+
import { API_KEY_REF } from '../../../imports/constants';
|
|
190
|
+
|
|
191
|
+
OpenRouter provides two options for model routing.
|
|
192
|
+
|
|
193
|
+
## Auto Router
|
|
194
|
+
|
|
195
|
+
The [Auto Router](https://openrouter.ai/openrouter/auto), a special model ID that you can use to choose between selected high-quality models based on your prompt, powered by [NotDiamond](https://www.notdiamond.ai/).
|
|
196
|
+
|
|
197
|
+
```json
|
|
198
|
+
{
|
|
199
|
+
"model": "openrouter/auto",
|
|
200
|
+
... // Other params
|
|
201
|
+
}
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
The resulting generation will have `model` set to the model that was used.
|
|
205
|
+
|
|
206
|
+
## The `models` parameter
|
|
207
|
+
|
|
208
|
+
The `models` parameter lets you automatically try other models if the primary model's providers are down, rate-limited, or refuse to reply due to content moderation.
|
|
209
|
+
|
|
210
|
+
```json
|
|
211
|
+
{
|
|
212
|
+
"models": ["anthropic/claude-3.5-sonnet", "gryphe/mythomax-l2-13b"],
|
|
213
|
+
... // Other params
|
|
214
|
+
}
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
If the model you selected returns an error, OpenRouter will try to use the fallback model instead. If the fallback model is down or returns an error, OpenRouter will return that error.
|
|
218
|
+
|
|
219
|
+
By default, any error can trigger the use of a fallback model, including context length validation errors, moderation flags for filtered models, rate-limiting, and downtime.
|
|
220
|
+
|
|
221
|
+
Requests are priced using the model that was ultimately used, which will be returned in the `model` attribute of the response body.
|
|
222
|
+
|
|
223
|
+
## Using with OpenAI SDK
|
|
224
|
+
|
|
225
|
+
To use the `models` array with the OpenAI SDK, include it in the `extra_body` parameter. In the example below, gpt-4o will be tried first, and the `models` array will be tried in order as fallbacks.
|
|
226
|
+
|
|
227
|
+
<Template data={{
|
|
228
|
+
API_KEY_REF,
|
|
229
|
+
}}>
|
|
230
|
+
<CodeGroup>
|
|
231
|
+
|
|
232
|
+
```typescript
|
|
233
|
+
import OpenAI from 'openai';
|
|
234
|
+
|
|
235
|
+
const openrouterClient = new OpenAI({
|
|
236
|
+
baseURL: 'https://openrouter.ai/api/v1',
|
|
237
|
+
// API key and headers
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
async function main() {
|
|
241
|
+
// @ts-expect-error
|
|
242
|
+
const completion = await openrouterClient.chat.completions.create({
|
|
243
|
+
model: 'openai/gpt-4o',
|
|
244
|
+
models: ['anthropic/claude-3.5-sonnet', 'gryphe/mythomax-l2-13b'],
|
|
245
|
+
messages: [
|
|
246
|
+
{
|
|
247
|
+
role: 'user',
|
|
248
|
+
content: 'What is the meaning of life?',
|
|
249
|
+
},
|
|
250
|
+
],
|
|
251
|
+
});
|
|
252
|
+
console.log(completion.choices[0].message);
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
main();
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
```python
|
|
259
|
+
from openai import OpenAI
|
|
260
|
+
|
|
261
|
+
openai_client = OpenAI(
|
|
262
|
+
base_url="https://openrouter.ai/api/v1",
|
|
263
|
+
api_key={{API_KEY_REF}},
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
completion = openai_client.chat.completions.create(
|
|
267
|
+
model="openai/gpt-4o",
|
|
268
|
+
extra_body={
|
|
269
|
+
"models": ["anthropic/claude-3.5-sonnet", "gryphe/mythomax-l2-13b"],
|
|
270
|
+
},
|
|
271
|
+
messages=[
|
|
272
|
+
{
|
|
273
|
+
"role": "user",
|
|
274
|
+
"content": "What is the meaning of life?"
|
|
275
|
+
}
|
|
276
|
+
]
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
print(completion.choices[0].message.content)
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
</CodeGroup>
|
|
283
|
+
</Template>
|
|
284
|
+
---
|
|
285
|
+
title: Provider Routing
|
|
286
|
+
subtitle: Route requests to the best provider
|
|
287
|
+
headline: Provider Routing | Intelligent Multi-Provider Request Routing
|
|
288
|
+
canonical-url: 'https://openrouter.ai/docs/features/provider-routing'
|
|
289
|
+
'og:site_name': OpenRouter Documentation
|
|
290
|
+
'og:title': Provider Routing - Smart Multi-Provider Request Management
|
|
291
|
+
'og:description': >-
|
|
292
|
+
Route AI model requests across multiple providers intelligently. Learn how to
|
|
293
|
+
optimize for cost, performance, and reliability with OpenRouter's provider
|
|
294
|
+
routing.
|
|
295
|
+
'og:image':
|
|
296
|
+
type: url
|
|
297
|
+
value: >-
|
|
298
|
+
https://openrouter.ai/dynamic-og?pathname=features/provider-routing&title=Smart%20Routing&description=Optimize%20AI%20requests%20across%20providers%20for%20best%20performance
|
|
299
|
+
'og:image:width': 1200
|
|
300
|
+
'og:image:height': 630
|
|
301
|
+
'twitter:card': summary_large_image
|
|
302
|
+
'twitter:site': '@OpenRouterAI'
|
|
303
|
+
noindex: false
|
|
304
|
+
nofollow: false
|
|
305
|
+
---
|
|
306
|
+
import { ProviderPreferencesSchema } from '../../../imports/constants';
|
|
307
|
+
import { TSFetchCodeBlock } from '../../../imports/TSFetchCodeBlock';
|
|
308
|
+
import { ZodToJSONSchemaBlock } from '../../../imports/ZodToJSONSchemaBlock';
|
|
309
|
+
import { TermsOfServiceDescriptions } from '../../../imports/TermsOfServiceDescriptions';
|
|
310
|
+
|
|
311
|
+
OpenRouter routes requests to the best available providers for your model. By default, [requests are load balanced](#price-based-load-balancing-default-strategy) across the top providers to maximize uptime.
|
|
312
|
+
|
|
313
|
+
You can customize how your requests are routed using the `provider` object in the request body for [Chat Completions](/docs/api-reference/chat-completion) and [Completions](/docs/api-reference/completion).
|
|
314
|
+
|
|
315
|
+
<Tip>
|
|
316
|
+
For a complete list of valid provider names to use in the API, see the [full
|
|
317
|
+
provider schema](#json-schema-for-provider-preferences).
|
|
318
|
+
</Tip>
|
|
319
|
+
|
|
320
|
+
The `provider` object can contain the following fields:
|
|
321
|
+
|
|
322
|
+
| Field | Type | Default | Description |
|
|
323
|
+
| --- | --- | --- | --- |
|
|
324
|
+
| `order` | string[] | - | List of provider slugs to try in order (e.g. `["anthropic", "openai"]`). [Learn more](#ordering-specific-providers) |
|
|
325
|
+
| `allow_fallbacks` | boolean | `true` | Whether to allow backup providers when the primary is unavailable. [Learn more](#disabling-fallbacks) |
|
|
326
|
+
| `require_parameters` | boolean | `false` | Only use providers that support all parameters in your request. [Learn more](#requiring-providers-to-support-all-parameters-beta) |
|
|
327
|
+
| `data_collection` | "allow" \| "deny" | "allow" | Control whether to use providers that may store data. [Learn more](#requiring-providers-to-comply-with-data-policies) |
|
|
328
|
+
| `only` | string[] | - | List of provider slugs to allow for this request. [Learn more](#allowing-only-specific-providers) |
|
|
329
|
+
| `ignore` | string[] | - | List of provider slugs to skip for this request. [Learn more](#ignoring-providers) |
|
|
330
|
+
| `quantizations` | string[] | - | List of quantization levels to filter by (e.g. `["int4", "int8"]`). [Learn more](#quantization) |
|
|
331
|
+
| `sort` | string | - | Sort providers by price or throughput. (e.g. `"price"` or `"throughput"`). [Learn more](#provider-sorting) |
|
|
332
|
+
| `max_price` | object | - | The maximum pricing you want to pay for this request. [Learn more](#maximum-price) |
|
|
333
|
+
|
|
334
|
+
## Price-Based Load Balancing (Default Strategy)
|
|
335
|
+
|
|
336
|
+
For each model in your request, OpenRouter's default behavior is to load balance requests across providers, prioritizing price.
|
|
337
|
+
|
|
338
|
+
If you are more sensitive to throughput than price, you can use the `sort` field to explicitly prioritize throughput.
|
|
339
|
+
|
|
340
|
+
<Tip>
|
|
341
|
+
When you send a request with `tools` or `tool_choice`, OpenRouter will only
|
|
342
|
+
route to providers that support tool use. Similarly, if you set a
|
|
343
|
+
`max_tokens`, then OpenRouter will only route to providers that support a
|
|
344
|
+
response of that length.
|
|
345
|
+
</Tip>
|
|
346
|
+
|
|
347
|
+
Here is OpenRouter's default load balancing strategy:
|
|
348
|
+
|
|
349
|
+
1. Prioritize providers that have not seen significant outages in the last 30 seconds.
|
|
350
|
+
2. For the stable providers, look at the lowest-cost candidates and select one weighted by inverse square of the price (example below).
|
|
351
|
+
3. Use the remaining providers as fallbacks.
|
|
352
|
+
|
|
353
|
+
<Note title="A Load Balancing Example">
|
|
354
|
+
If Provider A costs \$1 per million tokens, Provider B costs \$2, and Provider C costs \$3, and Provider B recently saw a few outages.
|
|
355
|
+
|
|
356
|
+
- Your request is routed to Provider A. Provider A is 9x more likely to be first routed to Provider A than Provider C because $(1 / 3^2 = 1/9)$ (inverse square of the price).
|
|
357
|
+
- If Provider A fails, then Provider C will be tried next.
|
|
358
|
+
- If Provider C also fails, Provider B will be tried last.
|
|
359
|
+
|
|
360
|
+
</Note>
|
|
361
|
+
|
|
362
|
+
If you have `sort` or `order` set in your provider preferences, load balancing will be disabled.
|
|
363
|
+
|
|
364
|
+
## Provider Sorting
|
|
365
|
+
|
|
366
|
+
As described above, OpenRouter load balances based on price, while taking uptime into account.
|
|
367
|
+
|
|
368
|
+
If you instead want to _explicitly_ prioritize a particular provider attribute, you can include the `sort` field in the `provider` preferences. Load balancing will be disabled, and the router will try providers in order.
|
|
369
|
+
|
|
370
|
+
The three sort options are:
|
|
371
|
+
|
|
372
|
+
- `"price"`: prioritize lowest price
|
|
373
|
+
- `"throughput"`: prioritize highest throughput
|
|
374
|
+
- `"latency"`: prioritize lowest latency
|
|
375
|
+
|
|
376
|
+
<TSFetchCodeBlock
|
|
377
|
+
title='Example with Fallbacks Enabled'
|
|
378
|
+
uriPath='/api/v1/chat/completions'
|
|
379
|
+
body={{
|
|
380
|
+
model: 'meta-llama/llama-3.1-70b-instruct',
|
|
381
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
382
|
+
provider: {
|
|
383
|
+
sort: 'throughput',
|
|
384
|
+
},
|
|
385
|
+
}}
|
|
386
|
+
/>
|
|
387
|
+
|
|
388
|
+
To _always_ prioritize low prices, and not apply any load balancing, set `sort` to `"price"`.
|
|
389
|
+
|
|
390
|
+
To _always_ prioritize low latency, and not apply any load balancing, set `sort` to `"latency"`.
|
|
391
|
+
|
|
392
|
+
## Nitro Shortcut
|
|
393
|
+
|
|
394
|
+
You can append `:nitro` to any model slug as a shortcut to sort by throughput. This is exactly equivalent to setting `provider.sort` to `"throughput"`.
|
|
395
|
+
|
|
396
|
+
<TSFetchCodeBlock
|
|
397
|
+
title='Example using Nitro shortcut'
|
|
398
|
+
uriPath='/api/v1/chat/completions'
|
|
399
|
+
body={{
|
|
400
|
+
model: 'meta-llama/llama-3.1-70b-instruct:nitro',
|
|
401
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
402
|
+
}}
|
|
403
|
+
/>
|
|
404
|
+
|
|
405
|
+
## Floor Price Shortcut
|
|
406
|
+
|
|
407
|
+
You can append `:floor` to any model slug as a shortcut to sort by price. This is exactly equivalent to setting `provider.sort` to `"price"`.
|
|
408
|
+
|
|
409
|
+
<TSFetchCodeBlock
|
|
410
|
+
title='Example using Floor shortcut'
|
|
411
|
+
uriPath='/api/v1/chat/completions'
|
|
412
|
+
body={{
|
|
413
|
+
model: 'meta-llama/llama-3.1-70b-instruct:floor',
|
|
414
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
415
|
+
}}
|
|
416
|
+
/>
|
|
417
|
+
|
|
418
|
+
## Ordering Specific Providers
|
|
419
|
+
|
|
420
|
+
You can set the providers that OpenRouter will prioritize for your request using the `order` field.
|
|
421
|
+
|
|
422
|
+
| Field | Type | Default | Description |
|
|
423
|
+
| --- | --- | --- | --- |
|
|
424
|
+
| `order` | string[] | - | List of provider slugs to try in order (e.g. `["anthropic", "openai"]`). |
|
|
425
|
+
|
|
426
|
+
The router will prioritize providers in this list, and in this order, for the model you're using. If you don't set this field, the router will [load balance](#price-based-load-balancing-default-strategy) across the top providers to maximize uptime.
|
|
427
|
+
|
|
428
|
+
<Tip>
|
|
429
|
+
You can use the copy button next to provider names on model pages to get the exact provider slug,
|
|
430
|
+
including any variants like "/turbo". See [Targeting Specific Provider Endpoints](#targeting-specific-provider-endpoints) for details.
|
|
431
|
+
</Tip>
|
|
432
|
+
|
|
433
|
+
OpenRouter will try them one at a time and proceed to other providers if none are operational. If you don't want to allow any other providers, you should [disable fallbacks](#disabling-fallbacks) as well.
|
|
434
|
+
|
|
435
|
+
### Example: Specifying providers with fallbacks
|
|
436
|
+
|
|
437
|
+
This example skips over OpenAI (which doesn't host Mixtral), tries Together, and then falls back to the normal list of providers on OpenRouter:
|
|
438
|
+
|
|
439
|
+
<TSFetchCodeBlock
|
|
440
|
+
title='Example with Fallbacks Enabled'
|
|
441
|
+
uriPath='/api/v1/chat/completions'
|
|
442
|
+
body={{
|
|
443
|
+
model: 'mistralai/mixtral-8x7b-instruct',
|
|
444
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
445
|
+
provider: {
|
|
446
|
+
order: ['openai', 'together'],
|
|
447
|
+
},
|
|
448
|
+
}}
|
|
449
|
+
/>
|
|
450
|
+
|
|
451
|
+
### Example: Specifying providers with fallbacks disabled
|
|
452
|
+
|
|
453
|
+
Here's an example with `allow_fallbacks` set to `false` that skips over OpenAI (which doesn't host Mixtral), tries Together, and then fails if Together fails:
|
|
454
|
+
|
|
455
|
+
<TSFetchCodeBlock
|
|
456
|
+
title='Example with Fallbacks Disabled'
|
|
457
|
+
uriPath='/api/v1/chat/completions'
|
|
458
|
+
body={{
|
|
459
|
+
model: 'mistralai/mixtral-8x7b-instruct',
|
|
460
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
461
|
+
provider: {
|
|
462
|
+
order: ['openai', 'together'],
|
|
463
|
+
allow_fallbacks: false,
|
|
464
|
+
},
|
|
465
|
+
}}
|
|
466
|
+
/>
|
|
467
|
+
|
|
468
|
+
## Targeting Specific Provider Endpoints
|
|
469
|
+
|
|
470
|
+
Each provider on OpenRouter may host multiple endpoints for the same model, such as a default endpoint and a specialized "turbo" endpoint. To target a specific endpoint, you can use the copy button next to the provider name on the model detail page to obtain the exact provider slug.
|
|
471
|
+
|
|
472
|
+
For example, DeepInfra offers DeepSeek R1 through multiple endpoints:
|
|
473
|
+
- Default endpoint with slug `deepinfra`
|
|
474
|
+
- Turbo endpoint with slug `deepinfra/turbo`
|
|
475
|
+
|
|
476
|
+
By copying the exact provider slug and using it in your request's `order` array, you can ensure your request is routed to the specific endpoint you want:
|
|
477
|
+
|
|
478
|
+
<TSFetchCodeBlock
|
|
479
|
+
title='Example targeting DeepInfra Turbo endpoint'
|
|
480
|
+
uriPath='/api/v1/chat/completions'
|
|
481
|
+
body={{
|
|
482
|
+
model: 'deepseek/deepseek-r1',
|
|
483
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
484
|
+
provider: {
|
|
485
|
+
order: ['deepinfra/turbo'],
|
|
486
|
+
allow_fallbacks: false,
|
|
487
|
+
},
|
|
488
|
+
}}
|
|
489
|
+
/>
|
|
490
|
+
|
|
491
|
+
This approach is especially useful when you want to consistently use a specific variant of a model from a particular provider.
|
|
492
|
+
|
|
493
|
+
## Requiring Providers to Support All Parameters
|
|
494
|
+
|
|
495
|
+
You can restrict requests only to providers that support all parameters in your request using the `require_parameters` field.
|
|
496
|
+
|
|
497
|
+
| Field | Type | Default | Description |
|
|
498
|
+
| --- | --- | --- | --- |
|
|
499
|
+
| `require_parameters` | boolean | `false` | Only use providers that support all parameters in your request. |
|
|
500
|
+
|
|
501
|
+
With the default routing strategy, providers that don't support all the [LLM parameters](/docs/api-reference/parameters) specified in your request can still receive the request, but will ignore unknown parameters. When you set `require_parameters` to `true`, the request won't even be routed to that provider.
|
|
502
|
+
|
|
503
|
+
### Example: Excluding providers that don't support JSON formatting
|
|
504
|
+
|
|
505
|
+
For example, to only use providers that support JSON formatting:
|
|
506
|
+
|
|
507
|
+
<TSFetchCodeBlock
|
|
508
|
+
uriPath='/api/v1/chat/completions'
|
|
509
|
+
body={{
|
|
510
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
511
|
+
provider: {
|
|
512
|
+
require_parameters: true,
|
|
513
|
+
},
|
|
514
|
+
response_format: { type: 'json_object' },
|
|
515
|
+
}}
|
|
516
|
+
/>
|
|
517
|
+
|
|
518
|
+
## Requiring Providers to Comply with Data Policies
|
|
519
|
+
|
|
520
|
+
You can restrict requests only to providers that comply with your data policies using the `data_collection` field.
|
|
521
|
+
|
|
522
|
+
| Field | Type | Default | Description |
|
|
523
|
+
| --- | --- | --- | --- |
|
|
524
|
+
| `data_collection` | "allow" \| "deny" | "allow" | Control whether to use providers that may store data. |
|
|
525
|
+
|
|
526
|
+
- `allow`: (default) allow providers which store user data non-transiently and may train on it
|
|
527
|
+
- `deny`: use only providers which do not collect user data
|
|
528
|
+
|
|
529
|
+
Some model providers may log prompts, so we display them with a **Data Policy** tag on model pages. This is not a definitive source of third party data policies, but represents our best knowledge.
|
|
530
|
+
|
|
531
|
+
<Tip title='Account-Wide Data Policy Filtering'>
|
|
532
|
+
This is also available as an account-wide setting in [your privacy
|
|
533
|
+
settings](https://openrouter.ai/settings/privacy). You can disable third party
|
|
534
|
+
model providers that store inputs for training.
|
|
535
|
+
</Tip>
|
|
536
|
+
|
|
537
|
+
### Example: Excluding providers that don't comply with data policies
|
|
538
|
+
|
|
539
|
+
To exclude providers that don't comply with your data policies, set `data_collection` to `deny`:
|
|
540
|
+
|
|
541
|
+
<TSFetchCodeBlock
|
|
542
|
+
uriPath='/api/v1/chat/completions'
|
|
543
|
+
body={{
|
|
544
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
545
|
+
provider: {
|
|
546
|
+
data_collection: 'deny', // or "allow"
|
|
547
|
+
},
|
|
548
|
+
}}
|
|
549
|
+
/>
|
|
550
|
+
|
|
551
|
+
## Disabling Fallbacks
|
|
552
|
+
|
|
553
|
+
To guarantee that your request is only served by the top (lowest-cost) provider, you can disable fallbacks.
|
|
554
|
+
|
|
555
|
+
This is combined with the `order` field from [Ordering Specific Providers](#ordering-specific-providers) to restrict the providers that OpenRouter will prioritize to just your chosen list.
|
|
556
|
+
|
|
557
|
+
<TSFetchCodeBlock
|
|
558
|
+
uriPath='/api/v1/chat/completions'
|
|
559
|
+
body={{
|
|
560
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
561
|
+
provider: {
|
|
562
|
+
allow_fallbacks: false,
|
|
563
|
+
},
|
|
564
|
+
}}
|
|
565
|
+
/>
|
|
566
|
+
|
|
567
|
+
## Allowing Only Specific Providers
|
|
568
|
+
|
|
569
|
+
You can allow only specific providers for a request by setting the `only` field in the `provider` object.
|
|
570
|
+
|
|
571
|
+
| Field | Type | Default | Description |
|
|
572
|
+
| --- | --- | --- | --- |
|
|
573
|
+
| `only` | string[] | - | List of provider slugs to allow for this request. |
|
|
574
|
+
|
|
575
|
+
<Warning>
|
|
576
|
+
Only allowing some providers may significantly reduce fallback options and
|
|
577
|
+
limit request recovery.
|
|
578
|
+
</Warning>
|
|
579
|
+
|
|
580
|
+
<Tip title="Account-Wide Allowed Providers">
|
|
581
|
+
You can allow providers for all account requests by configuring your [preferences](/settings/preferences). This configuration applies to all API requests and chatroom messages.
|
|
582
|
+
|
|
583
|
+
Note that when you allow providers for a specific request, the list of allowed providers is merged with your account-wide allowed providers.
|
|
584
|
+
|
|
585
|
+
</Tip>
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
### Example: Allowing Azure for a request calling GPT-4 Omni
|
|
589
|
+
|
|
590
|
+
Here's an example that will only use Azure for a request calling GPT-4 Omni:
|
|
591
|
+
|
|
592
|
+
<TSFetchCodeBlock
|
|
593
|
+
uriPath='/api/v1/chat/completions'
|
|
594
|
+
body={{
|
|
595
|
+
model: 'openai/gpt-4o',
|
|
596
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
597
|
+
provider: {
|
|
598
|
+
only: ['azure'],
|
|
599
|
+
},
|
|
600
|
+
}}
|
|
601
|
+
/>
|
|
602
|
+
|
|
603
|
+
## Ignoring Providers
|
|
604
|
+
|
|
605
|
+
You can ignore providers for a request by setting the `ignore` field in the `provider` object.
|
|
606
|
+
|
|
607
|
+
| Field | Type | Default | Description |
|
|
608
|
+
| --- | --- | --- | --- |
|
|
609
|
+
| `ignore` | string[] | - | List of provider slugs to skip for this request. |
|
|
610
|
+
|
|
611
|
+
<Warning>
|
|
612
|
+
Ignoring multiple providers may significantly reduce fallback options and
|
|
613
|
+
limit request recovery.
|
|
614
|
+
</Warning>
|
|
615
|
+
|
|
616
|
+
<Tip title="Account-Wide Ignored Providers">
|
|
617
|
+
You can ignore providers for all account requests by configuring your [preferences](/settings/preferences). This configuration applies to all API requests and chatroom messages.
|
|
618
|
+
|
|
619
|
+
Note that when you ignore providers for a specific request, the list of ignored providers is merged with your account-wide ignored providers.
|
|
620
|
+
|
|
621
|
+
</Tip>
|
|
622
|
+
|
|
623
|
+
### Example: Ignoring DeepInfra for a request calling Llama 3.3 70b
|
|
624
|
+
|
|
625
|
+
Here's an example that will ignore DeepInfra for a request calling Llama 3.3 70b:
|
|
626
|
+
|
|
627
|
+
<TSFetchCodeBlock
|
|
628
|
+
uriPath='/api/v1/chat/completions'
|
|
629
|
+
body={{
|
|
630
|
+
model: 'meta-llama/llama-3.3-70b-instruct',
|
|
631
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
632
|
+
provider: {
|
|
633
|
+
ignore: ['deepinfra'],
|
|
634
|
+
},
|
|
635
|
+
}}
|
|
636
|
+
/>
|
|
637
|
+
|
|
638
|
+
## Quantization
|
|
639
|
+
|
|
640
|
+
Quantization reduces model size and computational requirements while aiming to preserve performance. Most LLMs today use FP16 or BF16 for training and inference, cutting memory requirements in half compared to FP32. Some optimizations use FP8 or quantization to reduce size further (e.g., INT8, INT4).
|
|
641
|
+
|
|
642
|
+
| Field | Type | Default | Description |
|
|
643
|
+
| --- | --- | --- | --- |
|
|
644
|
+
| `quantizations` | string[] | - | List of quantization levels to filter by (e.g. `["int4", "int8"]`). [Learn more](#quantization) |
|
|
645
|
+
|
|
646
|
+
<Warning>
|
|
647
|
+
Quantized models may exhibit degraded performance for certain prompts,
|
|
648
|
+
depending on the method used.
|
|
649
|
+
</Warning>
|
|
650
|
+
|
|
651
|
+
Providers can support various quantization levels for open-weight models.
|
|
652
|
+
|
|
653
|
+
### Quantization Levels
|
|
654
|
+
|
|
655
|
+
By default, requests are load-balanced across all available providers, ordered by price. To filter providers by quantization level, specify the `quantizations` field in the `provider` parameter with the following values:
|
|
656
|
+
|
|
657
|
+
- `int4`: Integer (4 bit)
|
|
658
|
+
- `int8`: Integer (8 bit)
|
|
659
|
+
- `fp4`: Floating point (4 bit)
|
|
660
|
+
- `fp6`: Floating point (6 bit)
|
|
661
|
+
- `fp8`: Floating point (8 bit)
|
|
662
|
+
- `fp16`: Floating point (16 bit)
|
|
663
|
+
- `bf16`: Brain floating point (16 bit)
|
|
664
|
+
- `fp32`: Floating point (32 bit)
|
|
665
|
+
- `unknown`: Unknown
|
|
666
|
+
|
|
667
|
+
### Example: Requesting FP8 Quantization
|
|
668
|
+
|
|
669
|
+
Here's an example that will only use providers that support FP8 quantization:
|
|
670
|
+
|
|
671
|
+
<TSFetchCodeBlock
|
|
672
|
+
uriPath='/api/v1/chat/completions'
|
|
673
|
+
body={{
|
|
674
|
+
model: 'meta-llama/llama-3.1-8b-instruct',
|
|
675
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
676
|
+
provider: {
|
|
677
|
+
quantizations: ['fp8'],
|
|
678
|
+
},
|
|
679
|
+
}}
|
|
680
|
+
/>
|
|
681
|
+
|
|
682
|
+
### Max Price
|
|
683
|
+
|
|
684
|
+
To filter providers by price, specify the `max_price` field in the `provider` parameter with a JSON object specifying the highest provider pricing you will accept.
|
|
685
|
+
|
|
686
|
+
For example, the value `{"prompt": 1, "completion": 2}` will route to any provider with a price of `<= $1/m` prompt tokens, and `<= $2/m` completion tokens or less.
|
|
687
|
+
|
|
688
|
+
Some providers support per request pricing, in which case you can use the `request` attribute of max_price. Lastly, `image` is also available, which specifies the max price per image you will accept.
|
|
689
|
+
|
|
690
|
+
Practically, this field is often combined with a provider `sort` to express, for example, "Use the provider with the highest throughput, as long as it doesn\'t cost more than `$x/m` tokens."
|
|
691
|
+
|
|
692
|
+
|
|
693
|
+
## Terms of Service
|
|
694
|
+
|
|
695
|
+
You can view the terms of service for each provider below. You may not violate the terms of service or policies of third-party providers that power the models on OpenRouter.
|
|
696
|
+
|
|
697
|
+
<TermsOfServiceDescriptions />
|
|
698
|
+
|
|
699
|
+
## JSON Schema for Provider Preferences
|
|
700
|
+
|
|
701
|
+
For a complete list of options, see this JSON schema:
|
|
702
|
+
|
|
703
|
+
<ZodToJSONSchemaBlock
|
|
704
|
+
title='Provider Preferences Schema'
|
|
705
|
+
schema={ProviderPreferencesSchema}
|
|
706
|
+
/>
|
|
707
|
+
2-we will use openrouter not claude api 3- you should use api.recoder.xyz and engine.recoder.xyz also you most likely will need to use openrouter api import OpenAI from 'openai';
|
|
708
|
+
|
|
709
|
+
const openai = new OpenAI({
|
|
710
|
+
baseURL: 'https://openrouter.ai/api/v1',
|
|
711
|
+
apiKey: '<OPENROUTER_API_KEY>',
|
|
712
|
+
defaultHeaders: {
|
|
713
|
+
'HTTP-Referer': '<YOUR_SITE_URL>', // Optional. Site URL for rankings on openrouter.ai.
|
|
714
|
+
'X-Title': '<YOUR_SITE_NAME>', // Optional. Site title for rankings on openrouter.ai.
|
|
715
|
+
},
|
|
716
|
+
});
|
|
717
|
+
|
|
718
|
+
async function main() {
|
|
719
|
+
const completion = await openai.chat.completions.create({
|
|
720
|
+
model: 'openai/gpt-4o',
|
|
721
|
+
messages: [
|
|
722
|
+
{
|
|
723
|
+
role: 'user',
|
|
724
|
+
content: 'What is the meaning of life?',
|
|
725
|
+
},
|
|
726
|
+
],
|
|
727
|
+
});
|
|
728
|
+
|
|
729
|
+
console.log(completion.choices[0].message);
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
main();
|
|
733
|
+
---
|
|
734
|
+
title: Model Routing
|
|
735
|
+
subtitle: Dynamically route requests to models
|
|
736
|
+
headline: Model Routing | Dynamic AI Model Selection and Fallback
|
|
737
|
+
canonical-url: 'https://openrouter.ai/docs/features/model-routing'
|
|
738
|
+
'og:site_name': OpenRouter Documentation
|
|
739
|
+
'og:title': Model Routing - Smart Model Selection and Fallback
|
|
740
|
+
'og:description': >-
|
|
741
|
+
Route requests dynamically between AI models. Learn how to use OpenRouter's
|
|
742
|
+
Auto Router and model fallback features for optimal performance and
|
|
743
|
+
reliability.
|
|
744
|
+
'og:image':
|
|
745
|
+
type: url
|
|
746
|
+
value: >-
|
|
747
|
+
https://openrouter.ai/dynamic-og?title=Model%20Routing&description=Dynamic%20AI%20model%20selection%20and%20fallbacks
|
|
748
|
+
'og:image:width': 1200
|
|
749
|
+
'og:image:height': 630
|
|
750
|
+
'twitter:card': summary_large_image
|
|
751
|
+
'twitter:site': '@OpenRouterAI'
|
|
752
|
+
noindex: false
|
|
753
|
+
nofollow: false
|
|
754
|
+
---
|
|
755
|
+
|
|
756
|
+
import { API_KEY_REF } from '../../../imports/constants';
|
|
757
|
+
|
|
758
|
+
OpenRouter provides two options for model routing.
|
|
759
|
+
|
|
760
|
+
## Auto Router
|
|
761
|
+
|
|
762
|
+
The [Auto Router](https://openrouter.ai/openrouter/auto), a special model ID that you can use to choose between selected high-quality models based on your prompt, powered by [NotDiamond](https://www.notdiamond.ai/).
|
|
763
|
+
|
|
764
|
+
```json
|
|
765
|
+
{
|
|
766
|
+
"model": "openrouter/auto",
|
|
767
|
+
... // Other params
|
|
768
|
+
}
|
|
769
|
+
```
|
|
770
|
+
|
|
771
|
+
The resulting generation will have `model` set to the model that was used.
|
|
772
|
+
|
|
773
|
+
## The `models` parameter
|
|
774
|
+
|
|
775
|
+
The `models` parameter lets you automatically try other models if the primary model's providers are down, rate-limited, or refuse to reply due to content moderation.
|
|
776
|
+
|
|
777
|
+
```json
|
|
778
|
+
{
|
|
779
|
+
"models": ["anthropic/claude-3.5-sonnet", "gryphe/mythomax-l2-13b"],
|
|
780
|
+
... // Other params
|
|
781
|
+
}
|
|
782
|
+
```
|
|
783
|
+
|
|
784
|
+
If the model you selected returns an error, OpenRouter will try to use the fallback model instead. If the fallback model is down or returns an error, OpenRouter will return that error.
|
|
785
|
+
|
|
786
|
+
By default, any error can trigger the use of a fallback model, including context length validation errors, moderation flags for filtered models, rate-limiting, and downtime.
|
|
787
|
+
|
|
788
|
+
Requests are priced using the model that was ultimately used, which will be returned in the `model` attribute of the response body.
|
|
789
|
+
|
|
790
|
+
## Using with OpenAI SDK
|
|
791
|
+
|
|
792
|
+
To use the `models` array with the OpenAI SDK, include it in the `extra_body` parameter. In the example below, gpt-4o will be tried first, and the `models` array will be tried in order as fallbacks.
|
|
793
|
+
|
|
794
|
+
<Template data={{
|
|
795
|
+
API_KEY_REF,
|
|
796
|
+
}}>
|
|
797
|
+
<CodeGroup>
|
|
798
|
+
|
|
799
|
+
```typescript
|
|
800
|
+
import OpenAI from 'openai';
|
|
801
|
+
|
|
802
|
+
const openrouterClient = new OpenAI({
|
|
803
|
+
baseURL: 'https://openrouter.ai/api/v1',
|
|
804
|
+
// API key and headers
|
|
805
|
+
});
|
|
806
|
+
|
|
807
|
+
async function main() {
|
|
808
|
+
// @ts-expect-error
|
|
809
|
+
const completion = await openrouterClient.chat.completions.create({
|
|
810
|
+
model: 'openai/gpt-4o',
|
|
811
|
+
models: ['anthropic/claude-3.5-sonnet', 'gryphe/mythomax-l2-13b'],
|
|
812
|
+
messages: [
|
|
813
|
+
{
|
|
814
|
+
role: 'user',
|
|
815
|
+
content: 'What is the meaning of life?',
|
|
816
|
+
},
|
|
817
|
+
],
|
|
818
|
+
});
|
|
819
|
+
console.log(completion.choices[0].message);
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
main();
|
|
823
|
+
```
|
|
824
|
+
|
|
825
|
+
```python
|
|
826
|
+
from openai import OpenAI
|
|
827
|
+
|
|
828
|
+
openai_client = OpenAI(
|
|
829
|
+
base_url="https://openrouter.ai/api/v1",
|
|
830
|
+
api_key={{API_KEY_REF}},
|
|
831
|
+
)
|
|
832
|
+
|
|
833
|
+
completion = openai_client.chat.completions.create(
|
|
834
|
+
model="openai/gpt-4o",
|
|
835
|
+
extra_body={
|
|
836
|
+
"models": ["anthropic/claude-3.5-sonnet", "gryphe/mythomax-l2-13b"],
|
|
837
|
+
},
|
|
838
|
+
messages=[
|
|
839
|
+
{
|
|
840
|
+
"role": "user",
|
|
841
|
+
"content": "What is the meaning of life?"
|
|
842
|
+
}
|
|
843
|
+
]
|
|
844
|
+
)
|
|
845
|
+
|
|
846
|
+
print(completion.choices[0].message.content)
|
|
847
|
+
```
|
|
848
|
+
|
|
849
|
+
</CodeGroup>
|
|
850
|
+
</Template>
|
|
851
|
+
---
|
|
852
|
+
title: Provider Routing
|
|
853
|
+
subtitle: Route requests to the best provider
|
|
854
|
+
headline: Provider Routing | Intelligent Multi-Provider Request Routing
|
|
855
|
+
canonical-url: 'https://openrouter.ai/docs/features/provider-routing'
|
|
856
|
+
'og:site_name': OpenRouter Documentation
|
|
857
|
+
'og:title': Provider Routing - Smart Multi-Provider Request Management
|
|
858
|
+
'og:description': >-
|
|
859
|
+
Route AI model requests across multiple providers intelligently. Learn how to
|
|
860
|
+
optimize for cost, performance, and reliability with OpenRouter's provider
|
|
861
|
+
routing.
|
|
862
|
+
'og:image':
|
|
863
|
+
type: url
|
|
864
|
+
value: >-
|
|
865
|
+
https://openrouter.ai/dynamic-og?pathname=features/provider-routing&title=Smart%20Routing&description=Optimize%20AI%20requests%20across%20providers%20for%20best%20performance
|
|
866
|
+
'og:image:width': 1200
|
|
867
|
+
'og:image:height': 630
|
|
868
|
+
'twitter:card': summary_large_image
|
|
869
|
+
'twitter:site': '@OpenRouterAI'
|
|
870
|
+
noindex: false
|
|
871
|
+
nofollow: false
|
|
872
|
+
---
|
|
873
|
+
import { ProviderPreferencesSchema } from '../../../imports/constants';
|
|
874
|
+
import { TSFetchCodeBlock } from '../../../imports/TSFetchCodeBlock';
|
|
875
|
+
import { ZodToJSONSchemaBlock } from '../../../imports/ZodToJSONSchemaBlock';
|
|
876
|
+
import { TermsOfServiceDescriptions } from '../../../imports/TermsOfServiceDescriptions';
|
|
877
|
+
|
|
878
|
+
OpenRouter routes requests to the best available providers for your model. By default, [requests are load balanced](#price-based-load-balancing-default-strategy) across the top providers to maximize uptime.
|
|
879
|
+
|
|
880
|
+
You can customize how your requests are routed using the `provider` object in the request body for [Chat Completions](/docs/api-reference/chat-completion) and [Completions](/docs/api-reference/completion).
|
|
881
|
+
|
|
882
|
+
<Tip>
|
|
883
|
+
For a complete list of valid provider names to use in the API, see the [full
|
|
884
|
+
provider schema](#json-schema-for-provider-preferences).
|
|
885
|
+
</Tip>
|
|
886
|
+
|
|
887
|
+
The `provider` object can contain the following fields:
|
|
888
|
+
|
|
889
|
+
| Field | Type | Default | Description |
|
|
890
|
+
| --- | --- | --- | --- |
|
|
891
|
+
| `order` | string[] | - | List of provider slugs to try in order (e.g. `["anthropic", "openai"]`). [Learn more](#ordering-specific-providers) |
|
|
892
|
+
| `allow_fallbacks` | boolean | `true` | Whether to allow backup providers when the primary is unavailable. [Learn more](#disabling-fallbacks) |
|
|
893
|
+
| `require_parameters` | boolean | `false` | Only use providers that support all parameters in your request. [Learn more](#requiring-providers-to-support-all-parameters-beta) |
|
|
894
|
+
| `data_collection` | "allow" \| "deny" | "allow" | Control whether to use providers that may store data. [Learn more](#requiring-providers-to-comply-with-data-policies) |
|
|
895
|
+
| `only` | string[] | - | List of provider slugs to allow for this request. [Learn more](#allowing-only-specific-providers) |
|
|
896
|
+
| `ignore` | string[] | - | List of provider slugs to skip for this request. [Learn more](#ignoring-providers) |
|
|
897
|
+
| `quantizations` | string[] | - | List of quantization levels to filter by (e.g. `["int4", "int8"]`). [Learn more](#quantization) |
|
|
898
|
+
| `sort` | string | - | Sort providers by price or throughput. (e.g. `"price"` or `"throughput"`). [Learn more](#provider-sorting) |
|
|
899
|
+
| `max_price` | object | - | The maximum pricing you want to pay for this request. [Learn more](#maximum-price) |
|
|
900
|
+
|
|
901
|
+
## Price-Based Load Balancing (Default Strategy)
|
|
902
|
+
|
|
903
|
+
For each model in your request, OpenRouter's default behavior is to load balance requests across providers, prioritizing price.
|
|
904
|
+
|
|
905
|
+
If you are more sensitive to throughput than price, you can use the `sort` field to explicitly prioritize throughput.
|
|
906
|
+
|
|
907
|
+
<Tip>
|
|
908
|
+
When you send a request with `tools` or `tool_choice`, OpenRouter will only
|
|
909
|
+
route to providers that support tool use. Similarly, if you set a
|
|
910
|
+
`max_tokens`, then OpenRouter will only route to providers that support a
|
|
911
|
+
response of that length.
|
|
912
|
+
</Tip>
|
|
913
|
+
|
|
914
|
+
Here is OpenRouter's default load balancing strategy:
|
|
915
|
+
|
|
916
|
+
1. Prioritize providers that have not seen significant outages in the last 30 seconds.
|
|
917
|
+
2. For the stable providers, look at the lowest-cost candidates and select one weighted by inverse square of the price (example below).
|
|
918
|
+
3. Use the remaining providers as fallbacks.
|
|
919
|
+
|
|
920
|
+
<Note title="A Load Balancing Example">
|
|
921
|
+
If Provider A costs \$1 per million tokens, Provider B costs \$2, and Provider C costs \$3, and Provider B recently saw a few outages.
|
|
922
|
+
|
|
923
|
+
- Your request is routed to Provider A. Provider A is 9x more likely to be first routed to Provider A than Provider C because $(1 / 3^2 = 1/9)$ (inverse square of the price).
|
|
924
|
+
- If Provider A fails, then Provider C will be tried next.
|
|
925
|
+
- If Provider C also fails, Provider B will be tried last.
|
|
926
|
+
|
|
927
|
+
</Note>
|
|
928
|
+
|
|
929
|
+
If you have `sort` or `order` set in your provider preferences, load balancing will be disabled.
|
|
930
|
+
|
|
931
|
+
## Provider Sorting
|
|
932
|
+
|
|
933
|
+
As described above, OpenRouter load balances based on price, while taking uptime into account.
|
|
934
|
+
|
|
935
|
+
If you instead want to _explicitly_ prioritize a particular provider attribute, you can include the `sort` field in the `provider` preferences. Load balancing will be disabled, and the router will try providers in order.
|
|
936
|
+
|
|
937
|
+
The three sort options are:
|
|
938
|
+
|
|
939
|
+
- `"price"`: prioritize lowest price
|
|
940
|
+
- `"throughput"`: prioritize highest throughput
|
|
941
|
+
- `"latency"`: prioritize lowest latency
|
|
942
|
+
|
|
943
|
+
<TSFetchCodeBlock
|
|
944
|
+
title='Example with Fallbacks Enabled'
|
|
945
|
+
uriPath='/api/v1/chat/completions'
|
|
946
|
+
body={{
|
|
947
|
+
model: 'meta-llama/llama-3.1-70b-instruct',
|
|
948
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
949
|
+
provider: {
|
|
950
|
+
sort: 'throughput',
|
|
951
|
+
},
|
|
952
|
+
}}
|
|
953
|
+
/>
|
|
954
|
+
|
|
955
|
+
To _always_ prioritize low prices, and not apply any load balancing, set `sort` to `"price"`.
|
|
956
|
+
|
|
957
|
+
To _always_ prioritize low latency, and not apply any load balancing, set `sort` to `"latency"`.
|
|
958
|
+
|
|
959
|
+
## Nitro Shortcut
|
|
960
|
+
|
|
961
|
+
You can append `:nitro` to any model slug as a shortcut to sort by throughput. This is exactly equivalent to setting `provider.sort` to `"throughput"`.
|
|
962
|
+
|
|
963
|
+
<TSFetchCodeBlock
|
|
964
|
+
title='Example using Nitro shortcut'
|
|
965
|
+
uriPath='/api/v1/chat/completions'
|
|
966
|
+
body={{
|
|
967
|
+
model: 'meta-llama/llama-3.1-70b-instruct:nitro',
|
|
968
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
969
|
+
}}
|
|
970
|
+
/>
|
|
971
|
+
|
|
972
|
+
## Floor Price Shortcut
|
|
973
|
+
|
|
974
|
+
You can append `:floor` to any model slug as a shortcut to sort by price. This is exactly equivalent to setting `provider.sort` to `"price"`.
|
|
975
|
+
|
|
976
|
+
<TSFetchCodeBlock
|
|
977
|
+
title='Example using Floor shortcut'
|
|
978
|
+
uriPath='/api/v1/chat/completions'
|
|
979
|
+
body={{
|
|
980
|
+
model: 'meta-llama/llama-3.1-70b-instruct:floor',
|
|
981
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
982
|
+
}}
|
|
983
|
+
/>
|
|
984
|
+
|
|
985
|
+
## Ordering Specific Providers
|
|
986
|
+
|
|
987
|
+
You can set the providers that OpenRouter will prioritize for your request using the `order` field.
|
|
988
|
+
|
|
989
|
+
| Field | Type | Default | Description |
|
|
990
|
+
| --- | --- | --- | --- |
|
|
991
|
+
| `order` | string[] | - | List of provider slugs to try in order (e.g. `["anthropic", "openai"]`). |
|
|
992
|
+
|
|
993
|
+
The router will prioritize providers in this list, and in this order, for the model you're using. If you don't set this field, the router will [load balance](#price-based-load-balancing-default-strategy) across the top providers to maximize uptime.
|
|
994
|
+
|
|
995
|
+
<Tip>
|
|
996
|
+
You can use the copy button next to provider names on model pages to get the exact provider slug,
|
|
997
|
+
including any variants like "/turbo". See [Targeting Specific Provider Endpoints](#targeting-specific-provider-endpoints) for details.
|
|
998
|
+
</Tip>
|
|
999
|
+
|
|
1000
|
+
OpenRouter will try them one at a time and proceed to other providers if none are operational. If you don't want to allow any other providers, you should [disable fallbacks](#disabling-fallbacks) as well.
|
|
1001
|
+
|
|
1002
|
+
### Example: Specifying providers with fallbacks
|
|
1003
|
+
|
|
1004
|
+
This example skips over OpenAI (which doesn't host Mixtral), tries Together, and then falls back to the normal list of providers on OpenRouter:
|
|
1005
|
+
|
|
1006
|
+
<TSFetchCodeBlock
|
|
1007
|
+
title='Example with Fallbacks Enabled'
|
|
1008
|
+
uriPath='/api/v1/chat/completions'
|
|
1009
|
+
body={{
|
|
1010
|
+
model: 'mistralai/mixtral-8x7b-instruct',
|
|
1011
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
1012
|
+
provider: {
|
|
1013
|
+
order: ['openai', 'together'],
|
|
1014
|
+
},
|
|
1015
|
+
}}
|
|
1016
|
+
/>
|
|
1017
|
+
|
|
1018
|
+
### Example: Specifying providers with fallbacks disabled
|
|
1019
|
+
|
|
1020
|
+
Here's an example with `allow_fallbacks` set to `false` that skips over OpenAI (which doesn't host Mixtral), tries Together, and then fails if Together fails:
|
|
1021
|
+
|
|
1022
|
+
<TSFetchCodeBlock
|
|
1023
|
+
title='Example with Fallbacks Disabled'
|
|
1024
|
+
uriPath='/api/v1/chat/completions'
|
|
1025
|
+
body={{
|
|
1026
|
+
model: 'mistralai/mixtral-8x7b-instruct',
|
|
1027
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
1028
|
+
provider: {
|
|
1029
|
+
order: ['openai', 'together'],
|
|
1030
|
+
allow_fallbacks: false,
|
|
1031
|
+
},
|
|
1032
|
+
}}
|
|
1033
|
+
/>
|
|
1034
|
+
|
|
1035
|
+
## Targeting Specific Provider Endpoints
|
|
1036
|
+
|
|
1037
|
+
Each provider on OpenRouter may host multiple endpoints for the same model, such as a default endpoint and a specialized "turbo" endpoint. To target a specific endpoint, you can use the copy button next to the provider name on the model detail page to obtain the exact provider slug.
|
|
1038
|
+
|
|
1039
|
+
For example, DeepInfra offers DeepSeek R1 through multiple endpoints:
|
|
1040
|
+
- Default endpoint with slug `deepinfra`
|
|
1041
|
+
- Turbo endpoint with slug `deepinfra/turbo`
|
|
1042
|
+
|
|
1043
|
+
By copying the exact provider slug and using it in your request's `order` array, you can ensure your request is routed to the specific endpoint you want:
|
|
1044
|
+
|
|
1045
|
+
<TSFetchCodeBlock
|
|
1046
|
+
title='Example targeting DeepInfra Turbo endpoint'
|
|
1047
|
+
uriPath='/api/v1/chat/completions'
|
|
1048
|
+
body={{
|
|
1049
|
+
model: 'deepseek/deepseek-r1',
|
|
1050
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
1051
|
+
provider: {
|
|
1052
|
+
order: ['deepinfra/turbo'],
|
|
1053
|
+
allow_fallbacks: false,
|
|
1054
|
+
},
|
|
1055
|
+
}}
|
|
1056
|
+
/>
|
|
1057
|
+
|
|
1058
|
+
This approach is especially useful when you want to consistently use a specific variant of a model from a particular provider.
|
|
1059
|
+
|
|
1060
|
+
## Requiring Providers to Support All Parameters
|
|
1061
|
+
|
|
1062
|
+
You can restrict requests only to providers that support all parameters in your request using the `require_parameters` field.
|
|
1063
|
+
|
|
1064
|
+
| Field | Type | Default | Description |
|
|
1065
|
+
| --- | --- | --- | --- |
|
|
1066
|
+
| `require_parameters` | boolean | `false` | Only use providers that support all parameters in your request. |
|
|
1067
|
+
|
|
1068
|
+
With the default routing strategy, providers that don't support all the [LLM parameters](/docs/api-reference/parameters) specified in your request can still receive the request, but will ignore unknown parameters. When you set `require_parameters` to `true`, the request won't even be routed to that provider.
|
|
1069
|
+
|
|
1070
|
+
### Example: Excluding providers that don't support JSON formatting
|
|
1071
|
+
|
|
1072
|
+
For example, to only use providers that support JSON formatting:
|
|
1073
|
+
|
|
1074
|
+
<TSFetchCodeBlock
|
|
1075
|
+
uriPath='/api/v1/chat/completions'
|
|
1076
|
+
body={{
|
|
1077
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
1078
|
+
provider: {
|
|
1079
|
+
require_parameters: true,
|
|
1080
|
+
},
|
|
1081
|
+
response_format: { type: 'json_object' },
|
|
1082
|
+
}}
|
|
1083
|
+
/>
|
|
1084
|
+
|
|
1085
|
+
## Requiring Providers to Comply with Data Policies
|
|
1086
|
+
|
|
1087
|
+
You can restrict requests only to providers that comply with your data policies using the `data_collection` field.
|
|
1088
|
+
|
|
1089
|
+
| Field | Type | Default | Description |
|
|
1090
|
+
| --- | --- | --- | --- |
|
|
1091
|
+
| `data_collection` | "allow" \| "deny" | "allow" | Control whether to use providers that may store data. |
|
|
1092
|
+
|
|
1093
|
+
- `allow`: (default) allow providers which store user data non-transiently and may train on it
|
|
1094
|
+
- `deny`: use only providers which do not collect user data
|
|
1095
|
+
|
|
1096
|
+
Some model providers may log prompts, so we display them with a **Data Policy** tag on model pages. This is not a definitive source of third party data policies, but represents our best knowledge.
|
|
1097
|
+
|
|
1098
|
+
<Tip title='Account-Wide Data Policy Filtering'>
|
|
1099
|
+
This is also available as an account-wide setting in [your privacy
|
|
1100
|
+
settings](https://openrouter.ai/settings/privacy). You can disable third party
|
|
1101
|
+
model providers that store inputs for training.
|
|
1102
|
+
</Tip>
|
|
1103
|
+
|
|
1104
|
+
### Example: Excluding providers that don't comply with data policies
|
|
1105
|
+
|
|
1106
|
+
To exclude providers that don't comply with your data policies, set `data_collection` to `deny`:
|
|
1107
|
+
|
|
1108
|
+
<TSFetchCodeBlock
|
|
1109
|
+
uriPath='/api/v1/chat/completions'
|
|
1110
|
+
body={{
|
|
1111
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
1112
|
+
provider: {
|
|
1113
|
+
data_collection: 'deny', // or "allow"
|
|
1114
|
+
},
|
|
1115
|
+
}}
|
|
1116
|
+
/>
|
|
1117
|
+
|
|
1118
|
+
## Disabling Fallbacks
|
|
1119
|
+
|
|
1120
|
+
To guarantee that your request is only served by the top (lowest-cost) provider, you can disable fallbacks.
|
|
1121
|
+
|
|
1122
|
+
This is combined with the `order` field from [Ordering Specific Providers](#ordering-specific-providers) to restrict the providers that OpenRouter will prioritize to just your chosen list.
|
|
1123
|
+
|
|
1124
|
+
<TSFetchCodeBlock
|
|
1125
|
+
uriPath='/api/v1/chat/completions'
|
|
1126
|
+
body={{
|
|
1127
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
1128
|
+
provider: {
|
|
1129
|
+
allow_fallbacks: false,
|
|
1130
|
+
},
|
|
1131
|
+
}}
|
|
1132
|
+
/>
|
|
1133
|
+
|
|
1134
|
+
## Allowing Only Specific Providers
|
|
1135
|
+
|
|
1136
|
+
You can allow only specific providers for a request by setting the `only` field in the `provider` object.
|
|
1137
|
+
|
|
1138
|
+
| Field | Type | Default | Description |
|
|
1139
|
+
| --- | --- | --- | --- |
|
|
1140
|
+
| `only` | string[] | - | List of provider slugs to allow for this request. |
|
|
1141
|
+
|
|
1142
|
+
<Warning>
|
|
1143
|
+
Only allowing some providers may significantly reduce fallback options and
|
|
1144
|
+
limit request recovery.
|
|
1145
|
+
</Warning>
|
|
1146
|
+
|
|
1147
|
+
<Tip title="Account-Wide Allowed Providers">
|
|
1148
|
+
You can allow providers for all account requests by configuring your [preferences](/settings/preferences). This configuration applies to all API requests and chatroom messages.
|
|
1149
|
+
|
|
1150
|
+
Note that when you allow providers for a specific request, the list of allowed providers is merged with your account-wide allowed providers.
|
|
1151
|
+
|
|
1152
|
+
</Tip>
|
|
1153
|
+
|
|
1154
|
+
|
|
1155
|
+
### Example: Allowing Azure for a request calling GPT-4 Omni
|
|
1156
|
+
|
|
1157
|
+
Here's an example that will only use Azure for a request calling GPT-4 Omni:
|
|
1158
|
+
|
|
1159
|
+
<TSFetchCodeBlock
|
|
1160
|
+
uriPath='/api/v1/chat/completions'
|
|
1161
|
+
body={{
|
|
1162
|
+
model: 'openai/gpt-4o',
|
|
1163
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
1164
|
+
provider: {
|
|
1165
|
+
only: ['azure'],
|
|
1166
|
+
},
|
|
1167
|
+
}}
|
|
1168
|
+
/>
|
|
1169
|
+
|
|
1170
|
+
## Ignoring Providers
|
|
1171
|
+
|
|
1172
|
+
You can ignore providers for a request by setting the `ignore` field in the `provider` object.
|
|
1173
|
+
|
|
1174
|
+
| Field | Type | Default | Description |
|
|
1175
|
+
| --- | --- | --- | --- |
|
|
1176
|
+
| `ignore` | string[] | - | List of provider slugs to skip for this request. |
|
|
1177
|
+
|
|
1178
|
+
<Warning>
|
|
1179
|
+
Ignoring multiple providers may significantly reduce fallback options and
|
|
1180
|
+
limit request recovery.
|
|
1181
|
+
</Warning>
|
|
1182
|
+
|
|
1183
|
+
<Tip title="Account-Wide Ignored Providers">
|
|
1184
|
+
You can ignore providers for all account requests by configuring your [preferences](/settings/preferences). This configuration applies to all API requests and chatroom messages.
|
|
1185
|
+
|
|
1186
|
+
Note that when you ignore providers for a specific request, the list of ignored providers is merged with your account-wide ignored providers.
|
|
1187
|
+
|
|
1188
|
+
</Tip>
|
|
1189
|
+
|
|
1190
|
+
### Example: Ignoring DeepInfra for a request calling Llama 3.3 70b
|
|
1191
|
+
|
|
1192
|
+
Here's an example that will ignore DeepInfra for a request calling Llama 3.3 70b:
|
|
1193
|
+
|
|
1194
|
+
<TSFetchCodeBlock
|
|
1195
|
+
uriPath='/api/v1/chat/completions'
|
|
1196
|
+
body={{
|
|
1197
|
+
model: 'meta-llama/llama-3.3-70b-instruct',
|
|
1198
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
1199
|
+
provider: {
|
|
1200
|
+
ignore: ['deepinfra'],
|
|
1201
|
+
},
|
|
1202
|
+
}}
|
|
1203
|
+
/>
|
|
1204
|
+
|
|
1205
|
+
## Quantization
|
|
1206
|
+
|
|
1207
|
+
Quantization reduces model size and computational requirements while aiming to preserve performance. Most LLMs today use FP16 or BF16 for training and inference, cutting memory requirements in half compared to FP32. Some optimizations use FP8 or quantization to reduce size further (e.g., INT8, INT4).
|
|
1208
|
+
|
|
1209
|
+
| Field | Type | Default | Description |
|
|
1210
|
+
| --- | --- | --- | --- |
|
|
1211
|
+
| `quantizations` | string[] | - | List of quantization levels to filter by (e.g. `["int4", "int8"]`). [Learn more](#quantization) |
|
|
1212
|
+
|
|
1213
|
+
<Warning>
|
|
1214
|
+
Quantized models may exhibit degraded performance for certain prompts,
|
|
1215
|
+
depending on the method used.
|
|
1216
|
+
</Warning>
|
|
1217
|
+
|
|
1218
|
+
Providers can support various quantization levels for open-weight models.
|
|
1219
|
+
|
|
1220
|
+
### Quantization Levels
|
|
1221
|
+
|
|
1222
|
+
By default, requests are load-balanced across all available providers, ordered by price. To filter providers by quantization level, specify the `quantizations` field in the `provider` parameter with the following values:
|
|
1223
|
+
|
|
1224
|
+
- `int4`: Integer (4 bit)
|
|
1225
|
+
- `int8`: Integer (8 bit)
|
|
1226
|
+
- `fp4`: Floating point (4 bit)
|
|
1227
|
+
- `fp6`: Floating point (6 bit)
|
|
1228
|
+
- `fp8`: Floating point (8 bit)
|
|
1229
|
+
- `fp16`: Floating point (16 bit)
|
|
1230
|
+
- `bf16`: Brain floating point (16 bit)
|
|
1231
|
+
- `fp32`: Floating point (32 bit)
|
|
1232
|
+
- `unknown`: Unknown
|
|
1233
|
+
|
|
1234
|
+
### Example: Requesting FP8 Quantization
|
|
1235
|
+
|
|
1236
|
+
Here's an example that will only use providers that support FP8 quantization:
|
|
1237
|
+
|
|
1238
|
+
<TSFetchCodeBlock
|
|
1239
|
+
uriPath='/api/v1/chat/completions'
|
|
1240
|
+
body={{
|
|
1241
|
+
model: 'meta-llama/llama-3.1-8b-instruct',
|
|
1242
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
1243
|
+
provider: {
|
|
1244
|
+
quantizations: ['fp8'],
|
|
1245
|
+
},
|
|
1246
|
+
}}
|
|
1247
|
+
/>
|
|
1248
|
+
|
|
1249
|
+
### Max Price
|
|
1250
|
+
|
|
1251
|
+
To filter providers by price, specify the `max_price` field in the `provider` parameter with a JSON object specifying the highest provider pricing you will accept.
|
|
1252
|
+
|
|
1253
|
+
For example, the value `{"prompt": 1, "completion": 2}` will route to any provider with a price of `<= $1/m` prompt tokens, and `<= $2/m` completion tokens or less.
|
|
1254
|
+
|
|
1255
|
+
Some providers support per request pricing, in which case you can use the `request` attribute of max_price. Lastly, `image` is also available, which specifies the max price per image you will accept.
|
|
1256
|
+
|
|
1257
|
+
Practically, this field is often combined with a provider `sort` to express, for example, "Use the provider with the highest throughput, as long as it doesn\'t cost more than `$x/m` tokens."
|
|
1258
|
+
|
|
1259
|
+
|
|
1260
|
+
## Terms of Service
|
|
1261
|
+
|
|
1262
|
+
You can view the terms of service for each provider below. You may not violate the terms of service or policies of third-party providers that power the models on OpenRouter.
|
|
1263
|
+
|
|
1264
|
+
<TermsOfServiceDescriptions />
|
|
1265
|
+
|
|
1266
|
+
## JSON Schema for Provider Preferences
|
|
1267
|
+
|
|
1268
|
+
For a complete list of options, see this JSON schema:
|
|
1269
|
+
|
|
1270
|
+
<ZodToJSONSchemaBlock
|
|
1271
|
+
title='Provider Preferences Schema'
|
|
1272
|
+
schema={ProviderPreferencesSchema}
|
|
1273
|
+
/>
|