@providerprotocol/ai 0.0.34 → 0.0.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +542 -3
- package/dist/anthropic/index.d.ts +2 -1
- package/dist/anthropic/index.js +151 -145
- package/dist/anthropic/index.js.map +1 -1
- package/dist/cerebras/index.d.ts +392 -0
- package/dist/cerebras/index.js +648 -0
- package/dist/cerebras/index.js.map +1 -0
- package/dist/chunk-3GWM5GR3.js +153 -0
- package/dist/chunk-3GWM5GR3.js.map +1 -0
- package/dist/chunk-4OGB7JZA.js +157 -0
- package/dist/chunk-4OGB7JZA.js.map +1 -0
- package/dist/chunk-7DXVRILR.js +49 -0
- package/dist/chunk-7DXVRILR.js.map +1 -0
- package/dist/{chunk-3C7O2RNO.js → chunk-A2IM7PGT.js} +6 -4
- package/dist/{chunk-3C7O2RNO.js.map → chunk-A2IM7PGT.js.map} +1 -1
- package/dist/{chunk-3D6XGGVG.js → chunk-ARVM24K2.js} +2 -2
- package/dist/{chunk-4J6OFUKX.js → chunk-AY55T37A.js} +70 -162
- package/dist/chunk-AY55T37A.js.map +1 -0
- package/dist/{chunk-ILR2D5PN.js → chunk-BRP5XJ6Q.js} +2 -86
- package/dist/chunk-BRP5XJ6Q.js.map +1 -0
- package/dist/chunk-C4JP64VW.js +298 -0
- package/dist/chunk-C4JP64VW.js.map +1 -0
- package/dist/chunk-COS4ON4G.js +111 -0
- package/dist/chunk-COS4ON4G.js.map +1 -0
- package/dist/chunk-ETBFOLQN.js +34 -0
- package/dist/chunk-ETBFOLQN.js.map +1 -0
- package/dist/chunk-HB4ZIH3T.js +31 -0
- package/dist/chunk-HB4ZIH3T.js.map +1 -0
- package/dist/chunk-I53CI6ZZ.js +142 -0
- package/dist/chunk-I53CI6ZZ.js.map +1 -0
- package/dist/chunk-IDZOVWP3.js +29 -0
- package/dist/chunk-IDZOVWP3.js.map +1 -0
- package/dist/chunk-JA3UZALR.js +88 -0
- package/dist/chunk-JA3UZALR.js.map +1 -0
- package/dist/{chunk-WAKD3OO5.js → chunk-N5DX5JW3.js} +31 -31
- package/dist/chunk-N5DX5JW3.js.map +1 -0
- package/dist/chunk-OIEWDFQU.js +97 -0
- package/dist/chunk-OIEWDFQU.js.map +1 -0
- package/dist/{chunk-TOJCZMVU.js → chunk-PMK5LZ5Z.js} +40 -40
- package/dist/chunk-PMK5LZ5Z.js.map +1 -0
- package/dist/chunk-UFFJDYCE.js +94 -0
- package/dist/chunk-UFFJDYCE.js.map +1 -0
- package/dist/chunk-VGKZIGVI.js +222 -0
- package/dist/chunk-VGKZIGVI.js.map +1 -0
- package/dist/chunk-VOEWHQUB.js +31 -0
- package/dist/chunk-VOEWHQUB.js.map +1 -0
- package/dist/{chunk-KUPF5KHT.js → chunk-Y5H7C5J4.js} +2 -2
- package/dist/chunk-ZI67WIQS.js +30 -0
- package/dist/chunk-ZI67WIQS.js.map +1 -0
- package/dist/{embedding-D2BYIehX.d.ts → embedding-CW6SaOOz.d.ts} +1 -1
- package/dist/google/index.d.ts +2 -1
- package/dist/google/index.js +202 -199
- package/dist/google/index.js.map +1 -1
- package/dist/groq/index.d.ts +410 -0
- package/dist/groq/index.js +649 -0
- package/dist/groq/index.js.map +1 -0
- package/dist/http/index.d.ts +3 -2
- package/dist/http/index.js +5 -4
- package/dist/image-stream-C0ciACM2.d.ts +11 -0
- package/dist/index.d.ts +8 -118
- package/dist/index.js +518 -767
- package/dist/index.js.map +1 -1
- package/dist/{llm-BQJZj3cD.d.ts → llm-DwbUK7un.d.ts} +12 -1632
- package/dist/middleware/logging/index.d.ts +76 -0
- package/dist/middleware/logging/index.js +74 -0
- package/dist/middleware/logging/index.js.map +1 -0
- package/dist/middleware/parsed-object/index.d.ts +45 -0
- package/dist/middleware/parsed-object/index.js +73 -0
- package/dist/middleware/parsed-object/index.js.map +1 -0
- package/dist/middleware/pubsub/index.d.ts +104 -0
- package/dist/middleware/pubsub/index.js +230 -0
- package/dist/middleware/pubsub/index.js.map +1 -0
- package/dist/middleware/pubsub/server/express/index.d.ts +52 -0
- package/dist/middleware/pubsub/server/express/index.js +11 -0
- package/dist/middleware/pubsub/server/express/index.js.map +1 -0
- package/dist/middleware/pubsub/server/fastify/index.d.ts +53 -0
- package/dist/middleware/pubsub/server/fastify/index.js +11 -0
- package/dist/middleware/pubsub/server/fastify/index.js.map +1 -0
- package/dist/middleware/pubsub/server/h3/index.d.ts +56 -0
- package/dist/middleware/pubsub/server/h3/index.js +11 -0
- package/dist/middleware/pubsub/server/h3/index.js.map +1 -0
- package/dist/middleware/pubsub/server/index.d.ts +78 -0
- package/dist/middleware/pubsub/server/index.js +34 -0
- package/dist/middleware/pubsub/server/index.js.map +1 -0
- package/dist/middleware/pubsub/server/webapi/index.d.ts +53 -0
- package/dist/middleware/pubsub/server/webapi/index.js +11 -0
- package/dist/middleware/pubsub/server/webapi/index.js.map +1 -0
- package/dist/ollama/index.d.ts +2 -1
- package/dist/ollama/index.js +48 -45
- package/dist/ollama/index.js.map +1 -1
- package/dist/openai/index.d.ts +2 -1
- package/dist/openai/index.js +319 -313
- package/dist/openai/index.js.map +1 -1
- package/dist/openrouter/index.d.ts +2 -1
- package/dist/openrouter/index.js +379 -383
- package/dist/openrouter/index.js.map +1 -1
- package/dist/proxy/index.d.ts +10 -914
- package/dist/proxy/index.js +275 -1007
- package/dist/proxy/index.js.map +1 -1
- package/dist/proxy/server/express/index.d.ts +161 -0
- package/dist/proxy/server/express/index.js +24 -0
- package/dist/proxy/server/express/index.js.map +1 -0
- package/dist/proxy/server/fastify/index.d.ts +162 -0
- package/dist/proxy/server/fastify/index.js +24 -0
- package/dist/proxy/server/fastify/index.js.map +1 -0
- package/dist/proxy/server/h3/index.d.ts +189 -0
- package/dist/proxy/server/h3/index.js +28 -0
- package/dist/proxy/server/h3/index.js.map +1 -0
- package/dist/proxy/server/index.d.ts +151 -0
- package/dist/proxy/server/index.js +48 -0
- package/dist/proxy/server/index.js.map +1 -0
- package/dist/proxy/server/webapi/index.d.ts +278 -0
- package/dist/proxy/server/webapi/index.js +32 -0
- package/dist/proxy/server/webapi/index.js.map +1 -0
- package/dist/responses/index.d.ts +650 -0
- package/dist/responses/index.js +930 -0
- package/dist/responses/index.js.map +1 -0
- package/dist/{retry-8Ch-WWgX.d.ts → retry-YayV42GV.d.ts} +1 -1
- package/dist/stream-CecfVCPO.d.ts +1632 -0
- package/dist/types-C8Gciizr.d.ts +168 -0
- package/dist/utils/index.d.ts +53 -0
- package/dist/utils/index.js +7 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/xai/index.d.ts +2 -1
- package/dist/xai/index.js +310 -310
- package/dist/xai/index.js.map +1 -1
- package/package.json +82 -4
- package/dist/chunk-4J6OFUKX.js.map +0 -1
- package/dist/chunk-ILR2D5PN.js.map +0 -1
- package/dist/chunk-TOJCZMVU.js.map +0 -1
- package/dist/chunk-WAKD3OO5.js.map +0 -1
- /package/dist/{chunk-3D6XGGVG.js.map → chunk-ARVM24K2.js.map} +0 -0
- /package/dist/{chunk-KUPF5KHT.js.map → chunk-Y5H7C5J4.js.map} +0 -0
|
@@ -0,0 +1,392 @@
|
|
|
1
|
+
import { d as Provider } from '../llm-DwbUK7un.js';
|
|
2
|
+
import '../stream-CecfVCPO.js';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* @fileoverview Cerebras Provider Type Definitions
|
|
6
|
+
*
|
|
7
|
+
* This module contains all TypeScript type definitions for the Cerebras provider,
|
|
8
|
+
* including types for the Chat Completions API (OpenAI-compatible).
|
|
9
|
+
*
|
|
10
|
+
* @module providers/cerebras/types
|
|
11
|
+
*/
|
|
12
|
+
/**
|
|
13
|
+
* Parameters for the Cerebras Chat Completions API.
|
|
14
|
+
*
|
|
15
|
+
* These parameters are passed directly to the `/v1/chat/completions` endpoint.
|
|
16
|
+
* Cerebras's API is OpenAI-compatible with additional features like reasoning.
|
|
17
|
+
*
|
|
18
|
+
* @example
|
|
19
|
+
* ```typescript
|
|
20
|
+
* const params: CerebrasLLMParams = {
|
|
21
|
+
* temperature: 0.7,
|
|
22
|
+
* max_completion_tokens: 1000,
|
|
23
|
+
* top_p: 0.9
|
|
24
|
+
* };
|
|
25
|
+
* ```
|
|
26
|
+
*/
|
|
27
|
+
interface CerebrasLLMParams {
|
|
28
|
+
/** Maximum number of tokens to generate */
|
|
29
|
+
max_completion_tokens?: number;
|
|
30
|
+
/** Temperature for randomness (0 to 1.5, default: 1.0) */
|
|
31
|
+
temperature?: number;
|
|
32
|
+
/** Top-p (nucleus) sampling (0.0 - 1.0) */
|
|
33
|
+
top_p?: number;
|
|
34
|
+
/** Custom stop sequences (max 4) */
|
|
35
|
+
stop?: string[];
|
|
36
|
+
/** Seed for deterministic sampling */
|
|
37
|
+
seed?: number;
|
|
38
|
+
/** User identifier for tracking */
|
|
39
|
+
user?: string;
|
|
40
|
+
/** Response format for structured output */
|
|
41
|
+
response_format?: CerebrasResponseFormat;
|
|
42
|
+
/**
|
|
43
|
+
* Reasoning intensity for gpt-oss-120b model.
|
|
44
|
+
* Controls how much reasoning/thinking the model does.
|
|
45
|
+
*/
|
|
46
|
+
reasoning_effort?: 'low' | 'medium' | 'high';
|
|
47
|
+
/**
|
|
48
|
+
* How reasoning text appears in the response.
|
|
49
|
+
* - `parsed`: Thinking in separate `reasoning` field
|
|
50
|
+
* - `raw`: Thinking with `<think>...</think>` tags in content
|
|
51
|
+
* - `hidden`: Thinking removed but counted in tokens
|
|
52
|
+
* - `none`: Model's default behavior
|
|
53
|
+
*/
|
|
54
|
+
reasoning_format?: 'parsed' | 'raw' | 'hidden' | 'none';
|
|
55
|
+
/**
|
|
56
|
+
* Whether to clear thinking content for zai-glm-4.7 model.
|
|
57
|
+
* When true, removes thinking content from the response.
|
|
58
|
+
*/
|
|
59
|
+
clear_thinking?: boolean;
|
|
60
|
+
/** Whether to enable parallel tool calls (default: true) */
|
|
61
|
+
parallel_tool_calls?: boolean;
|
|
62
|
+
/**
|
|
63
|
+
* Service tier selection for request prioritization.
|
|
64
|
+
* - `priority`: Highest priority (dedicated endpoints only)
|
|
65
|
+
* - `default`: Standard production workloads
|
|
66
|
+
* - `auto`: Dynamic prioritization
|
|
67
|
+
* - `flex`: Lowest priority, overflow/experimental
|
|
68
|
+
*/
|
|
69
|
+
service_tier?: 'default' | 'priority' | 'auto' | 'flex';
|
|
70
|
+
/** Maximum queue time in ms (50-20000) */
|
|
71
|
+
queue_threshold?: number;
|
|
72
|
+
/**
|
|
73
|
+
* Predicted output for latency reduction.
|
|
74
|
+
* Only supported on gpt-oss-120b and zai-glm-4.7.
|
|
75
|
+
*/
|
|
76
|
+
prediction?: {
|
|
77
|
+
type: 'content';
|
|
78
|
+
content: string;
|
|
79
|
+
};
|
|
80
|
+
/** Tool choice configuration */
|
|
81
|
+
tool_choice?: CerebrasToolChoice;
|
|
82
|
+
/** Whether to return log probabilities of output tokens */
|
|
83
|
+
logprobs?: boolean;
|
|
84
|
+
/** Number of most likely tokens to return at each position (0-20). Requires logprobs=true. */
|
|
85
|
+
top_logprobs?: number;
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Response format options for structured output.
|
|
89
|
+
*/
|
|
90
|
+
type CerebrasResponseFormat = {
|
|
91
|
+
type: 'text';
|
|
92
|
+
} | {
|
|
93
|
+
type: 'json_object';
|
|
94
|
+
} | {
|
|
95
|
+
type: 'json_schema';
|
|
96
|
+
json_schema: {
|
|
97
|
+
name: string;
|
|
98
|
+
description?: string;
|
|
99
|
+
schema: Record<string, unknown>;
|
|
100
|
+
strict?: boolean;
|
|
101
|
+
};
|
|
102
|
+
};
|
|
103
|
+
/**
|
|
104
|
+
* Request body for the Cerebras Chat Completions API.
|
|
105
|
+
*/
|
|
106
|
+
interface CerebrasRequest {
|
|
107
|
+
model: string;
|
|
108
|
+
messages: CerebrasMessage[];
|
|
109
|
+
temperature?: number;
|
|
110
|
+
top_p?: number;
|
|
111
|
+
stream?: boolean;
|
|
112
|
+
stream_options?: {
|
|
113
|
+
include_usage?: boolean;
|
|
114
|
+
};
|
|
115
|
+
stop?: string[];
|
|
116
|
+
max_completion_tokens?: number;
|
|
117
|
+
user?: string;
|
|
118
|
+
seed?: number;
|
|
119
|
+
tools?: CerebrasTool[];
|
|
120
|
+
tool_choice?: CerebrasToolChoice;
|
|
121
|
+
parallel_tool_calls?: boolean;
|
|
122
|
+
response_format?: CerebrasResponseFormat;
|
|
123
|
+
reasoning_effort?: 'low' | 'medium' | 'high';
|
|
124
|
+
reasoning_format?: 'parsed' | 'raw' | 'hidden' | 'none';
|
|
125
|
+
clear_thinking?: boolean;
|
|
126
|
+
service_tier?: string;
|
|
127
|
+
queue_threshold?: number;
|
|
128
|
+
prediction?: {
|
|
129
|
+
type: 'content';
|
|
130
|
+
content: string;
|
|
131
|
+
};
|
|
132
|
+
logprobs?: boolean;
|
|
133
|
+
top_logprobs?: number;
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Union type for all message types in the Cerebras API.
|
|
137
|
+
*/
|
|
138
|
+
type CerebrasMessage = CerebrasSystemMessage | CerebrasUserMessage | CerebrasAssistantMessage | CerebrasToolMessage;
|
|
139
|
+
/** System message for setting context and instructions */
|
|
140
|
+
interface CerebrasSystemMessage {
|
|
141
|
+
role: 'system';
|
|
142
|
+
content: string;
|
|
143
|
+
}
|
|
144
|
+
/** User message with text or multimodal content */
|
|
145
|
+
interface CerebrasUserMessage {
|
|
146
|
+
role: 'user';
|
|
147
|
+
content: string | CerebrasUserContent[];
|
|
148
|
+
}
|
|
149
|
+
/** Assistant message containing the model's response */
|
|
150
|
+
interface CerebrasAssistantMessage {
|
|
151
|
+
role: 'assistant';
|
|
152
|
+
content?: string | null;
|
|
153
|
+
reasoning?: string;
|
|
154
|
+
tool_calls?: CerebrasToolCall[];
|
|
155
|
+
}
|
|
156
|
+
/** Tool result message providing output from a function call */
|
|
157
|
+
interface CerebrasToolMessage {
|
|
158
|
+
role: 'tool';
|
|
159
|
+
content: string;
|
|
160
|
+
tool_call_id: string;
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Union type for user content parts.
|
|
164
|
+
*/
|
|
165
|
+
type CerebrasUserContent = CerebrasTextContent;
|
|
166
|
+
/** Text content part */
|
|
167
|
+
interface CerebrasTextContent {
|
|
168
|
+
type: 'text';
|
|
169
|
+
text: string;
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Tool call structure in assistant messages.
|
|
173
|
+
*/
|
|
174
|
+
interface CerebrasToolCall {
|
|
175
|
+
id: string;
|
|
176
|
+
type: 'function';
|
|
177
|
+
function: {
|
|
178
|
+
name: string;
|
|
179
|
+
arguments: string;
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Tool definition for the Cerebras API.
|
|
184
|
+
*/
|
|
185
|
+
interface CerebrasTool {
|
|
186
|
+
type: 'function';
|
|
187
|
+
function: {
|
|
188
|
+
name: string;
|
|
189
|
+
description: string;
|
|
190
|
+
parameters: {
|
|
191
|
+
type: 'object';
|
|
192
|
+
properties: Record<string, unknown>;
|
|
193
|
+
required?: string[];
|
|
194
|
+
additionalProperties?: boolean;
|
|
195
|
+
};
|
|
196
|
+
strict?: boolean;
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
/**
|
|
200
|
+
* Tool choice options for controlling function calling behavior.
|
|
201
|
+
*/
|
|
202
|
+
type CerebrasToolChoice = 'none' | 'auto' | 'required' | {
|
|
203
|
+
type: 'function';
|
|
204
|
+
function: {
|
|
205
|
+
name: string;
|
|
206
|
+
};
|
|
207
|
+
};
|
|
208
|
+
/**
|
|
209
|
+
* Response structure from the Cerebras Chat Completions API.
|
|
210
|
+
*/
|
|
211
|
+
interface CerebrasResponse {
|
|
212
|
+
id: string;
|
|
213
|
+
object: 'chat.completion';
|
|
214
|
+
created?: number;
|
|
215
|
+
model: string;
|
|
216
|
+
choices: CerebrasChoice[];
|
|
217
|
+
usage: CerebrasUsage;
|
|
218
|
+
system_fingerprint?: string;
|
|
219
|
+
time_info?: CerebrasTimeInfo;
|
|
220
|
+
}
|
|
221
|
+
/** A single choice from a completion response */
|
|
222
|
+
interface CerebrasChoice {
|
|
223
|
+
index: number;
|
|
224
|
+
message: CerebrasAssistantMessage;
|
|
225
|
+
finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | null;
|
|
226
|
+
}
|
|
227
|
+
/** Time information from the API response */
|
|
228
|
+
interface CerebrasTimeInfo {
|
|
229
|
+
queue_time?: number;
|
|
230
|
+
prompt_time?: number;
|
|
231
|
+
completion_time?: number;
|
|
232
|
+
total_time?: number;
|
|
233
|
+
}
|
|
234
|
+
/** Token usage statistics from the API response */
|
|
235
|
+
interface CerebrasUsage {
|
|
236
|
+
prompt_tokens: number;
|
|
237
|
+
completion_tokens: number;
|
|
238
|
+
total_tokens: number;
|
|
239
|
+
prompt_tokens_details?: {
|
|
240
|
+
cached_tokens?: number;
|
|
241
|
+
};
|
|
242
|
+
completion_tokens_details?: {
|
|
243
|
+
accepted_prediction_tokens?: number;
|
|
244
|
+
rejected_prediction_tokens?: number;
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
/**
|
|
248
|
+
* Streaming chunk structure from the Cerebras API.
|
|
249
|
+
*/
|
|
250
|
+
interface CerebrasStreamChunk {
|
|
251
|
+
id: string;
|
|
252
|
+
object: 'chat.completion.chunk';
|
|
253
|
+
created?: number;
|
|
254
|
+
model: string;
|
|
255
|
+
choices: CerebrasStreamChoice[];
|
|
256
|
+
usage?: CerebrasUsage | null;
|
|
257
|
+
system_fingerprint?: string;
|
|
258
|
+
time_info?: CerebrasTimeInfo;
|
|
259
|
+
}
|
|
260
|
+
/** A streaming choice containing incremental content */
|
|
261
|
+
interface CerebrasStreamChoice {
|
|
262
|
+
index: number;
|
|
263
|
+
delta: CerebrasStreamDelta;
|
|
264
|
+
finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | null;
|
|
265
|
+
}
|
|
266
|
+
/** Incremental content delta in a streaming chunk */
|
|
267
|
+
interface CerebrasStreamDelta {
|
|
268
|
+
role?: 'assistant';
|
|
269
|
+
content?: string | null;
|
|
270
|
+
reasoning?: string | null;
|
|
271
|
+
tool_calls?: CerebrasStreamToolCall[];
|
|
272
|
+
}
|
|
273
|
+
/** Incremental tool call data in a streaming chunk */
|
|
274
|
+
interface CerebrasStreamToolCall {
|
|
275
|
+
index: number;
|
|
276
|
+
id?: string;
|
|
277
|
+
type?: 'function';
|
|
278
|
+
function?: {
|
|
279
|
+
name?: string;
|
|
280
|
+
arguments?: string;
|
|
281
|
+
};
|
|
282
|
+
}
|
|
283
|
+
/**
|
|
284
|
+
* Cerebras-specific HTTP headers for API requests.
|
|
285
|
+
*/
|
|
286
|
+
interface CerebrasHeaders {
|
|
287
|
+
[key: string]: string | undefined;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
/**
|
|
291
|
+
* @fileoverview Cerebras Provider Factory
|
|
292
|
+
*
|
|
293
|
+
* This module provides the main Cerebras provider implementation for the
|
|
294
|
+
* OpenAI-compatible Chat Completions API. Cerebras offers extremely fast
|
|
295
|
+
* inference with models like Llama, Qwen, and their reasoning models.
|
|
296
|
+
*
|
|
297
|
+
* @module providers/cerebras
|
|
298
|
+
*/
|
|
299
|
+
/**
|
|
300
|
+
* Configuration options for the Cerebras provider.
|
|
301
|
+
*
|
|
302
|
+
* Currently Cerebras only supports one API endpoint (Chat Completions),
|
|
303
|
+
* so no additional options are needed.
|
|
304
|
+
*/
|
|
305
|
+
interface CerebrasProviderOptions {
|
|
306
|
+
}
|
|
307
|
+
/**
|
|
308
|
+
* The Cerebras provider instance.
|
|
309
|
+
*
|
|
310
|
+
* Use this provider to create model references for Cerebras models like
|
|
311
|
+
* Llama 3.3, Qwen 3, GPT-OSS (reasoning), and other models available on Cerebras.
|
|
312
|
+
*
|
|
313
|
+
* @example Basic usage
|
|
314
|
+
* ```typescript
|
|
315
|
+
* import { cerebras } from './providers/cerebras';
|
|
316
|
+
* import { llm } from './core/llm';
|
|
317
|
+
*
|
|
318
|
+
* const model = llm({
|
|
319
|
+
* model: cerebras('llama-3.3-70b'),
|
|
320
|
+
* params: { max_completion_tokens: 1000 }
|
|
321
|
+
* });
|
|
322
|
+
*
|
|
323
|
+
* const turn = await model.generate('Hello!');
|
|
324
|
+
* console.log(turn.response.text);
|
|
325
|
+
* ```
|
|
326
|
+
*
|
|
327
|
+
* @example With streaming
|
|
328
|
+
* ```typescript
|
|
329
|
+
* const stream = model.stream('Tell me a story');
|
|
330
|
+
*
|
|
331
|
+
* for await (const event of stream) {
|
|
332
|
+
* if (event.type === StreamEventType.TextDelta) {
|
|
333
|
+
* process.stdout.write(event.delta.text ?? '');
|
|
334
|
+
* }
|
|
335
|
+
* }
|
|
336
|
+
*
|
|
337
|
+
* const turn = await stream.turn;
|
|
338
|
+
* console.log('Tokens used:', turn.usage.totalTokens);
|
|
339
|
+
* ```
|
|
340
|
+
*
|
|
341
|
+
* @example With tools
|
|
342
|
+
* ```typescript
|
|
343
|
+
* const calculator = {
|
|
344
|
+
* name: 'calculate',
|
|
345
|
+
* description: 'Calculate a math expression',
|
|
346
|
+
* parameters: {
|
|
347
|
+
* type: 'object',
|
|
348
|
+
* properties: {
|
|
349
|
+
* expression: { type: 'string' }
|
|
350
|
+
* },
|
|
351
|
+
* required: ['expression']
|
|
352
|
+
* },
|
|
353
|
+
* run: async (params: { expression: string }) => {
|
|
354
|
+
* return eval(params.expression);
|
|
355
|
+
* }
|
|
356
|
+
* };
|
|
357
|
+
*
|
|
358
|
+
* const model = llm({
|
|
359
|
+
* model: cerebras('llama-3.3-70b'),
|
|
360
|
+
* tools: [calculator]
|
|
361
|
+
* });
|
|
362
|
+
*
|
|
363
|
+
* const turn = await model.generate('What is 15 + 27?');
|
|
364
|
+
* ```
|
|
365
|
+
*
|
|
366
|
+
* @example With reasoning
|
|
367
|
+
* ```typescript
|
|
368
|
+
* const model = llm({
|
|
369
|
+
* model: cerebras('gpt-oss-120b'),
|
|
370
|
+
* params: {
|
|
371
|
+
* reasoning_effort: 'high',
|
|
372
|
+
* reasoning_format: 'parsed'
|
|
373
|
+
* }
|
|
374
|
+
* });
|
|
375
|
+
*
|
|
376
|
+
* const turn = await model.generate('Solve this complex math problem...');
|
|
377
|
+
* // Reasoning is available in turn.response.metadata.cerebras.reasoning
|
|
378
|
+
* ```
|
|
379
|
+
*
|
|
380
|
+
* @example Available models
|
|
381
|
+
* Production models:
|
|
382
|
+
* - `llama3.1-8b` - Fast Llama 3.1 8B model (~2200 tok/s)
|
|
383
|
+
* - `llama-3.3-70b` - Llama 3.3 70B with tool use (~2100 tok/s)
|
|
384
|
+
* - `qwen-3-32b` - Qwen 3 32B with reasoning support (~2600 tok/s)
|
|
385
|
+
* - `qwen-3-235b-a22b-instruct-2507` - Large Qwen model (~1400 tok/s)
|
|
386
|
+
* - `gpt-oss-120b` - Reasoning model with high performance (~3000 tok/s)
|
|
387
|
+
* - `zai-glm-4.6` - Z.ai GLM model with reasoning (~1000 tok/s)
|
|
388
|
+
* - `zai-glm-4.7` - Z.ai GLM model with reasoning (~1000 tok/s)
|
|
389
|
+
*/
|
|
390
|
+
declare const cerebras: Provider<CerebrasProviderOptions>;
|
|
391
|
+
|
|
392
|
+
export { type CerebrasHeaders, type CerebrasLLMParams, type CerebrasMessage, type CerebrasProviderOptions, type CerebrasRequest, type CerebrasResponse, type CerebrasResponseFormat, type CerebrasStreamChunk, type CerebrasTimeInfo, type CerebrasTool, type CerebrasToolCall, type CerebrasToolChoice, type CerebrasUsage, cerebras };
|