@livekit/agents-plugin-openai 0.9.3 → 1.0.0-next.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +16 -5
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +4 -4
- package/dist/index.d.ts +4 -4
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +14 -3
- package/dist/index.js.map +1 -1
- package/dist/llm.cjs +156 -197
- package/dist/llm.cjs.map +1 -1
- package/dist/llm.d.cts +27 -8
- package/dist/llm.d.ts +27 -8
- package/dist/llm.d.ts.map +1 -1
- package/dist/llm.js +164 -188
- package/dist/llm.js.map +1 -1
- package/dist/models.cjs +14 -0
- package/dist/models.cjs.map +1 -1
- package/dist/models.d.cts +11 -6
- package/dist/models.d.ts +11 -6
- package/dist/models.d.ts.map +1 -1
- package/dist/models.js +6 -0
- package/dist/models.js.map +1 -1
- package/dist/realtime/api_proto.cjs.map +1 -1
- package/dist/realtime/api_proto.d.cts +15 -0
- package/dist/realtime/api_proto.d.ts +15 -0
- package/dist/realtime/api_proto.d.ts.map +1 -1
- package/dist/realtime/api_proto.js.map +1 -1
- package/dist/realtime/realtime_model.cjs +1057 -820
- package/dist/realtime/realtime_model.cjs.map +1 -1
- package/dist/realtime/realtime_model.d.cts +126 -160
- package/dist/realtime/realtime_model.d.ts +126 -160
- package/dist/realtime/realtime_model.d.ts.map +1 -1
- package/dist/realtime/realtime_model.js +1067 -825
- package/dist/realtime/realtime_model.js.map +1 -1
- package/dist/tts.cjs +5 -5
- package/dist/tts.cjs.map +1 -1
- package/dist/tts.d.cts +2 -1
- package/dist/tts.d.ts +2 -1
- package/dist/tts.d.ts.map +1 -1
- package/dist/tts.js +6 -6
- package/dist/tts.js.map +1 -1
- package/package.json +9 -7
- package/src/index.ts +19 -5
- package/src/llm.ts +227 -228
- package/src/models.ts +83 -5
- package/src/realtime/api_proto.ts +15 -1
- package/src/realtime/realtime_model.ts +1305 -996
- package/src/tts.ts +6 -6
package/src/models.ts
CHANGED
|
@@ -3,6 +3,12 @@
|
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
5
|
export type ChatModels =
|
|
6
|
+
| 'gpt-5'
|
|
7
|
+
| 'gpt-5-mini'
|
|
8
|
+
| 'gpt-5-nano'
|
|
9
|
+
| 'gpt-4.1'
|
|
10
|
+
| 'gpt-4.1-mini'
|
|
11
|
+
| 'gpt-4.1-nano'
|
|
6
12
|
| 'gpt-4o'
|
|
7
13
|
| 'gpt-4o-2024-05-13'
|
|
8
14
|
| 'gpt-4o-mini'
|
|
@@ -41,8 +47,26 @@ export type TTSVoices =
|
|
|
41
47
|
| 'nova'
|
|
42
48
|
| 'onyx'
|
|
43
49
|
| 'sage'
|
|
44
|
-
| 'shimmer'
|
|
45
|
-
|
|
50
|
+
| 'shimmer';
|
|
51
|
+
|
|
52
|
+
export type DalleModels = 'dall-e-2' | 'dall-e-3';
|
|
53
|
+
|
|
54
|
+
export type EmbeddingModels =
|
|
55
|
+
| 'text-embedding-ada-002'
|
|
56
|
+
| 'text-embedding-3-small'
|
|
57
|
+
| 'text-embedding-3-large';
|
|
58
|
+
|
|
59
|
+
export type AssistantTools = 'code_interpreter' | 'file_search' | 'function';
|
|
60
|
+
|
|
61
|
+
export type VertexModels =
|
|
62
|
+
| 'google/gemini-2.0-flash-exp'
|
|
63
|
+
| 'google/gemini-1.5-flash'
|
|
64
|
+
| 'google/gemini-1.5-pro'
|
|
65
|
+
| 'google/gemini-1.0-pro-vision'
|
|
66
|
+
| 'google/gemini-1.0-pro-vision-001'
|
|
67
|
+
| 'google/gemini-1.0-pro-002'
|
|
68
|
+
| 'google/gemini-1.0-pro-001'
|
|
69
|
+
| 'google/gemini-1.0-pro';
|
|
46
70
|
|
|
47
71
|
// adapters for OpenAI-compatible LLMs, TTSs, STTs
|
|
48
72
|
|
|
@@ -50,7 +74,16 @@ export type TelnyxChatModels =
|
|
|
50
74
|
| 'meta-llama/Meta-Llama-3.1-8B-Instruct'
|
|
51
75
|
| 'meta-llama/Meta-Llama-3.1-70B-Instruct';
|
|
52
76
|
|
|
53
|
-
export type CerebrasChatModels =
|
|
77
|
+
export type CerebrasChatModels =
|
|
78
|
+
| 'llama3.1-8b'
|
|
79
|
+
| 'llama-3.3-70b'
|
|
80
|
+
| 'llama-4-scout-17b-16e-instruct'
|
|
81
|
+
| 'llama-4-maverick-17b-128e-instruct'
|
|
82
|
+
| 'qwen-3-32b'
|
|
83
|
+
| 'qwen-3-235b-a22b-instruct-2507'
|
|
84
|
+
| 'qwen-3-235b-a22b-thinking-2507'
|
|
85
|
+
| 'qwen-3-coder-480b'
|
|
86
|
+
| 'gpt-oss-120b';
|
|
54
87
|
|
|
55
88
|
export type PerplexityChatModels =
|
|
56
89
|
| 'llama-3.1-sonar-small-128k-online'
|
|
@@ -62,7 +95,6 @@ export type PerplexityChatModels =
|
|
|
62
95
|
|
|
63
96
|
export type GroqChatModels =
|
|
64
97
|
| 'llama-3.1-405b-reasoning'
|
|
65
|
-
| 'llama-3.1-70b-versatile'
|
|
66
98
|
| 'llama-3.1-8b-instant'
|
|
67
99
|
| 'llama-3.3-70b-versatile'
|
|
68
100
|
| 'llama3-groq-70b-8192-tool-use-preview'
|
|
@@ -82,6 +114,40 @@ export type GroqAudioModels =
|
|
|
82
114
|
export type DeepSeekChatModels = 'deepseek-coder' | 'deepseek-chat';
|
|
83
115
|
|
|
84
116
|
export type TogetherChatModels =
|
|
117
|
+
| 'Austism/chronos-hermes-13b'
|
|
118
|
+
| 'Gryphe/MythoMax-L2-13b'
|
|
119
|
+
| 'NousResearch/Nous-Capybara-7B-V1p9'
|
|
120
|
+
| 'NousResearch/Nous-Hermes-2-Mistral-7B-DPO'
|
|
121
|
+
| 'NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO'
|
|
122
|
+
| 'NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT'
|
|
123
|
+
| 'NousResearch/Nous-Hermes-2-Yi-34B'
|
|
124
|
+
| 'NousResearch/Nous-Hermes-Llama2-13b'
|
|
125
|
+
| 'NousResearch/Nous-Hermes-llama-2-7b'
|
|
126
|
+
| 'Open-Orca/Mistral-7B-OpenOrca'
|
|
127
|
+
| 'Qwen/Qwen1.5-0.5B-Chat'
|
|
128
|
+
| 'Qwen/Qwen1.5-1.8B-Chat'
|
|
129
|
+
| 'Qwen/Qwen1.5-110B-Chat'
|
|
130
|
+
| 'Qwen/Qwen1.5-14B-Chat'
|
|
131
|
+
| 'Qwen/Qwen1.5-32B-Chat'
|
|
132
|
+
| 'Qwen/Qwen1.5-4B-Chat'
|
|
133
|
+
| 'Qwen/Qwen1.5-72B-Chat'
|
|
134
|
+
| 'Qwen/Qwen1.5-7B-Chat'
|
|
135
|
+
| 'Qwen/Qwen2-72B-Instruct'
|
|
136
|
+
| 'Snowflake/snowflake-arctic-instruct'
|
|
137
|
+
| 'Undi95/ReMM-SLERP-L2-13B'
|
|
138
|
+
| 'Undi95/Toppy-M-7B'
|
|
139
|
+
| 'WizardLM/WizardLM-13B-V1.2'
|
|
140
|
+
| 'allenai/OLMo-7B'
|
|
141
|
+
| 'allenai/OLMo-7B-Instruct'
|
|
142
|
+
| 'allenai/OLMo-7B-Twin-2T'
|
|
143
|
+
| 'codellama/CodeLlama-13b-Instruct-hf'
|
|
144
|
+
| 'codellama/CodeLlama-34b-Instruct-hf'
|
|
145
|
+
| 'codellama/CodeLlama-70b-Instruct-hf'
|
|
146
|
+
| 'codellama/CodeLlama-7b-Instruct-hf'
|
|
147
|
+
| 'cognitivecomputations/dolphin-2.5-mixtral-8x7b'
|
|
148
|
+
| 'databricks/dbrx-instruct'
|
|
149
|
+
| 'deepseek-ai/deepseek-coder-33b-instruct'
|
|
150
|
+
| 'deepseek-ai/deepseek-llm-67b-chat'
|
|
85
151
|
| 'garage-bAInd/Platypus2-70B-instruct'
|
|
86
152
|
| 'google/gemma-2-27b-it'
|
|
87
153
|
| 'google/gemma-2-9b-it'
|
|
@@ -101,6 +167,7 @@ export type TogetherChatModels =
|
|
|
101
167
|
| 'meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo'
|
|
102
168
|
| 'meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo'
|
|
103
169
|
| 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
|
|
170
|
+
| 'meta-llama/Llama-3.3-70B-Instruct-Turbo'
|
|
104
171
|
| 'mistralai/Mistral-7B-Instruct-v0.1'
|
|
105
172
|
| 'mistralai/Mistral-7B-Instruct-v0.2'
|
|
106
173
|
| 'mistralai/Mistral-7B-Instruct-v0.3'
|
|
@@ -127,10 +194,21 @@ export type OctoChatModels =
|
|
|
127
194
|
| 'mixtral-8x7b-instruct'
|
|
128
195
|
| 'wizardlm-2-8x22bllamaguard-2-7b';
|
|
129
196
|
|
|
130
|
-
export type XAIChatModels =
|
|
197
|
+
export type XAIChatModels =
|
|
198
|
+
| 'grok-3'
|
|
199
|
+
| 'grok-3-fast'
|
|
200
|
+
| 'grok-3-mini'
|
|
201
|
+
| 'grok-3-mini-fast'
|
|
202
|
+
| 'grok-2-vision-1212'
|
|
203
|
+
| 'grok-2-image-1212'
|
|
204
|
+
| 'grok-2-1212';
|
|
131
205
|
|
|
132
206
|
export type MetaChatModels =
|
|
133
207
|
| 'Llama-4-Scout-17B-16E-Instruct-FP8'
|
|
134
208
|
| 'Llama-4-Maverick-17B-128E-Instruct-FP8'
|
|
135
209
|
| 'Llama-3.3-70B-Instruct'
|
|
136
210
|
| 'Llama-3.3-8B-Instruct';
|
|
211
|
+
|
|
212
|
+
export function supportsReasoningEffort(model: ChatModels | string): boolean {
|
|
213
|
+
return model === 'gpt-5' || model === 'gpt-5-mini' || model === 'gpt-5-nano';
|
|
214
|
+
}
|
|
@@ -84,7 +84,6 @@ export interface Tool {
|
|
|
84
84
|
type: 'object';
|
|
85
85
|
properties: {
|
|
86
86
|
[prop: string]: {
|
|
87
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
88
87
|
[prop: string]: any;
|
|
89
88
|
};
|
|
90
89
|
};
|
|
@@ -97,6 +96,8 @@ export type TurnDetectionType = {
|
|
|
97
96
|
threshold?: number; // 0.0 to 1.0, default: 0.5
|
|
98
97
|
prefix_padding_ms?: number; // default: 300
|
|
99
98
|
silence_duration_ms?: number; // default: 200
|
|
99
|
+
create_response?: boolean; // default: true
|
|
100
|
+
interrupt_response?: boolean; // default: true
|
|
100
101
|
};
|
|
101
102
|
|
|
102
103
|
export type InputAudioTranscription = {
|
|
@@ -228,6 +229,7 @@ export interface ModelUsage {
|
|
|
228
229
|
cached_tokens_details: {
|
|
229
230
|
text_tokens: number;
|
|
230
231
|
audio_tokens: number;
|
|
232
|
+
image_tokens: number;
|
|
231
233
|
};
|
|
232
234
|
};
|
|
233
235
|
output_token_details: {
|
|
@@ -243,6 +245,7 @@ export interface ResponseResource {
|
|
|
243
245
|
status_details: ResponseStatusDetails;
|
|
244
246
|
output: ItemResource[];
|
|
245
247
|
usage?: ModelUsage;
|
|
248
|
+
metadata?: Record<string, string>;
|
|
246
249
|
}
|
|
247
250
|
|
|
248
251
|
// Client Events
|
|
@@ -254,6 +257,7 @@ interface BaseClientEvent {
|
|
|
254
257
|
export interface SessionUpdateEvent extends BaseClientEvent {
|
|
255
258
|
type: 'session.update';
|
|
256
259
|
session: Partial<{
|
|
260
|
+
model: Model;
|
|
257
261
|
modalities: ['text', 'audio'] | ['text'];
|
|
258
262
|
instructions: string;
|
|
259
263
|
voice: Voice;
|
|
@@ -265,6 +269,7 @@ export interface SessionUpdateEvent extends BaseClientEvent {
|
|
|
265
269
|
tool_choice: ToolChoice;
|
|
266
270
|
temperature: number;
|
|
267
271
|
max_response_output_tokens?: number | 'inf';
|
|
272
|
+
speed?: number;
|
|
268
273
|
}>;
|
|
269
274
|
}
|
|
270
275
|
|
|
@@ -282,24 +287,28 @@ export interface InputAudioBufferClearEvent extends BaseClientEvent {
|
|
|
282
287
|
}
|
|
283
288
|
|
|
284
289
|
export interface UserItemCreate {
|
|
290
|
+
id: string;
|
|
285
291
|
type: 'message';
|
|
286
292
|
role: 'user';
|
|
287
293
|
content: (InputTextContent | InputAudioContent)[];
|
|
288
294
|
}
|
|
289
295
|
|
|
290
296
|
export interface AssistantItemCreate {
|
|
297
|
+
id: string;
|
|
291
298
|
type: 'message';
|
|
292
299
|
role: 'assistant';
|
|
293
300
|
content: TextContent[];
|
|
294
301
|
}
|
|
295
302
|
|
|
296
303
|
export interface SystemItemCreate {
|
|
304
|
+
id: string;
|
|
297
305
|
type: 'message';
|
|
298
306
|
role: 'system';
|
|
299
307
|
content: InputTextContent[];
|
|
300
308
|
}
|
|
301
309
|
|
|
302
310
|
export interface FunctionCallOutputItemCreate {
|
|
311
|
+
id: string;
|
|
303
312
|
type: 'function_call_output';
|
|
304
313
|
call_id: string;
|
|
305
314
|
output: string;
|
|
@@ -340,6 +349,7 @@ export interface ResponseCreateEvent extends BaseClientEvent {
|
|
|
340
349
|
tool_choice: ToolChoice;
|
|
341
350
|
temperature: number;
|
|
342
351
|
max_output_tokens: number | 'inf';
|
|
352
|
+
metadata?: Record<string, string>;
|
|
343
353
|
}>;
|
|
344
354
|
}
|
|
345
355
|
|
|
@@ -412,6 +422,7 @@ export interface InputAudioBufferSpeechStoppedEvent extends BaseServerEvent {
|
|
|
412
422
|
|
|
413
423
|
export interface ConversationItemCreatedEvent extends BaseServerEvent {
|
|
414
424
|
type: 'conversation.item.created';
|
|
425
|
+
previous_item_id: string;
|
|
415
426
|
item: ItemResource;
|
|
416
427
|
}
|
|
417
428
|
|
|
@@ -482,6 +493,7 @@ export interface ResponseContentPartAddedEvent extends BaseServerEvent {
|
|
|
482
493
|
export interface ResponseContentPartDoneEvent extends BaseServerEvent {
|
|
483
494
|
type: 'response.content_part.done';
|
|
484
495
|
response_id: string;
|
|
496
|
+
item_id: string;
|
|
485
497
|
output_index: number;
|
|
486
498
|
content_index: number;
|
|
487
499
|
part: ContentPart;
|
|
@@ -506,6 +518,7 @@ export interface ResponseTextDoneEvent extends BaseServerEvent {
|
|
|
506
518
|
export interface ResponseAudioTranscriptDeltaEvent extends BaseServerEvent {
|
|
507
519
|
type: 'response.audio_transcript.delta';
|
|
508
520
|
response_id: string;
|
|
521
|
+
item_id: string;
|
|
509
522
|
output_index: number;
|
|
510
523
|
content_index: number;
|
|
511
524
|
delta: string;
|
|
@@ -522,6 +535,7 @@ export interface ResponseAudioTranscriptDoneEvent extends BaseServerEvent {
|
|
|
522
535
|
export interface ResponseAudioDeltaEvent extends BaseServerEvent {
|
|
523
536
|
type: 'response.audio.delta';
|
|
524
537
|
response_id: string;
|
|
538
|
+
item_id: string;
|
|
525
539
|
output_index: number;
|
|
526
540
|
content_index: number;
|
|
527
541
|
delta: AudioBase64Bytes;
|