@livekit/agents-plugin-openai 0.9.3 → 1.0.0-next.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/index.cjs +16 -5
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.cts +4 -4
  4. package/dist/index.d.ts +4 -4
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js +14 -3
  7. package/dist/index.js.map +1 -1
  8. package/dist/llm.cjs +156 -197
  9. package/dist/llm.cjs.map +1 -1
  10. package/dist/llm.d.cts +27 -8
  11. package/dist/llm.d.ts +27 -8
  12. package/dist/llm.d.ts.map +1 -1
  13. package/dist/llm.js +164 -188
  14. package/dist/llm.js.map +1 -1
  15. package/dist/models.cjs +14 -0
  16. package/dist/models.cjs.map +1 -1
  17. package/dist/models.d.cts +11 -6
  18. package/dist/models.d.ts +11 -6
  19. package/dist/models.d.ts.map +1 -1
  20. package/dist/models.js +6 -0
  21. package/dist/models.js.map +1 -1
  22. package/dist/realtime/api_proto.cjs.map +1 -1
  23. package/dist/realtime/api_proto.d.cts +15 -0
  24. package/dist/realtime/api_proto.d.ts +15 -0
  25. package/dist/realtime/api_proto.d.ts.map +1 -1
  26. package/dist/realtime/api_proto.js.map +1 -1
  27. package/dist/realtime/realtime_model.cjs +1057 -820
  28. package/dist/realtime/realtime_model.cjs.map +1 -1
  29. package/dist/realtime/realtime_model.d.cts +126 -160
  30. package/dist/realtime/realtime_model.d.ts +126 -160
  31. package/dist/realtime/realtime_model.d.ts.map +1 -1
  32. package/dist/realtime/realtime_model.js +1067 -825
  33. package/dist/realtime/realtime_model.js.map +1 -1
  34. package/dist/tts.cjs +5 -5
  35. package/dist/tts.cjs.map +1 -1
  36. package/dist/tts.d.cts +2 -1
  37. package/dist/tts.d.ts +2 -1
  38. package/dist/tts.d.ts.map +1 -1
  39. package/dist/tts.js +6 -6
  40. package/dist/tts.js.map +1 -1
  41. package/package.json +9 -7
  42. package/src/index.ts +19 -5
  43. package/src/llm.ts +227 -228
  44. package/src/models.ts +83 -5
  45. package/src/realtime/api_proto.ts +15 -1
  46. package/src/realtime/realtime_model.ts +1305 -996
  47. package/src/tts.ts +6 -6
package/src/models.ts CHANGED
@@ -3,6 +3,12 @@
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
 
5
5
  export type ChatModels =
6
+ | 'gpt-5'
7
+ | 'gpt-5-mini'
8
+ | 'gpt-5-nano'
9
+ | 'gpt-4.1'
10
+ | 'gpt-4.1-mini'
11
+ | 'gpt-4.1-nano'
6
12
  | 'gpt-4o'
7
13
  | 'gpt-4o-2024-05-13'
8
14
  | 'gpt-4o-mini'
@@ -41,8 +47,26 @@ export type TTSVoices =
41
47
  | 'nova'
42
48
  | 'onyx'
43
49
  | 'sage'
44
- | 'shimmer'
45
- | 'verse';
50
+ | 'shimmer';
51
+
52
+ export type DalleModels = 'dall-e-2' | 'dall-e-3';
53
+
54
+ export type EmbeddingModels =
55
+ | 'text-embedding-ada-002'
56
+ | 'text-embedding-3-small'
57
+ | 'text-embedding-3-large';
58
+
59
+ export type AssistantTools = 'code_interpreter' | 'file_search' | 'function';
60
+
61
+ export type VertexModels =
62
+ | 'google/gemini-2.0-flash-exp'
63
+ | 'google/gemini-1.5-flash'
64
+ | 'google/gemini-1.5-pro'
65
+ | 'google/gemini-1.0-pro-vision'
66
+ | 'google/gemini-1.0-pro-vision-001'
67
+ | 'google/gemini-1.0-pro-002'
68
+ | 'google/gemini-1.0-pro-001'
69
+ | 'google/gemini-1.0-pro';
46
70
 
47
71
  // adapters for OpenAI-compatible LLMs, TTSs, STTs
48
72
 
@@ -50,7 +74,16 @@ export type TelnyxChatModels =
50
74
  | 'meta-llama/Meta-Llama-3.1-8B-Instruct'
51
75
  | 'meta-llama/Meta-Llama-3.1-70B-Instruct';
52
76
 
53
- export type CerebrasChatModels = 'llama3.1-8b' | 'llama3.1-70b';
77
+ export type CerebrasChatModels =
78
+ | 'llama3.1-8b'
79
+ | 'llama-3.3-70b'
80
+ | 'llama-4-scout-17b-16e-instruct'
81
+ | 'llama-4-maverick-17b-128e-instruct'
82
+ | 'qwen-3-32b'
83
+ | 'qwen-3-235b-a22b-instruct-2507'
84
+ | 'qwen-3-235b-a22b-thinking-2507'
85
+ | 'qwen-3-coder-480b'
86
+ | 'gpt-oss-120b';
54
87
 
55
88
  export type PerplexityChatModels =
56
89
  | 'llama-3.1-sonar-small-128k-online'
@@ -62,7 +95,6 @@ export type PerplexityChatModels =
62
95
 
63
96
  export type GroqChatModels =
64
97
  | 'llama-3.1-405b-reasoning'
65
- | 'llama-3.1-70b-versatile'
66
98
  | 'llama-3.1-8b-instant'
67
99
  | 'llama-3.3-70b-versatile'
68
100
  | 'llama3-groq-70b-8192-tool-use-preview'
@@ -82,6 +114,40 @@ export type GroqAudioModels =
82
114
  export type DeepSeekChatModels = 'deepseek-coder' | 'deepseek-chat';
83
115
 
84
116
  export type TogetherChatModels =
117
+ | 'Austism/chronos-hermes-13b'
118
+ | 'Gryphe/MythoMax-L2-13b'
119
+ | 'NousResearch/Nous-Capybara-7B-V1p9'
120
+ | 'NousResearch/Nous-Hermes-2-Mistral-7B-DPO'
121
+ | 'NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO'
122
+ | 'NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT'
123
+ | 'NousResearch/Nous-Hermes-2-Yi-34B'
124
+ | 'NousResearch/Nous-Hermes-Llama2-13b'
125
+ | 'NousResearch/Nous-Hermes-llama-2-7b'
126
+ | 'Open-Orca/Mistral-7B-OpenOrca'
127
+ | 'Qwen/Qwen1.5-0.5B-Chat'
128
+ | 'Qwen/Qwen1.5-1.8B-Chat'
129
+ | 'Qwen/Qwen1.5-110B-Chat'
130
+ | 'Qwen/Qwen1.5-14B-Chat'
131
+ | 'Qwen/Qwen1.5-32B-Chat'
132
+ | 'Qwen/Qwen1.5-4B-Chat'
133
+ | 'Qwen/Qwen1.5-72B-Chat'
134
+ | 'Qwen/Qwen1.5-7B-Chat'
135
+ | 'Qwen/Qwen2-72B-Instruct'
136
+ | 'Snowflake/snowflake-arctic-instruct'
137
+ | 'Undi95/ReMM-SLERP-L2-13B'
138
+ | 'Undi95/Toppy-M-7B'
139
+ | 'WizardLM/WizardLM-13B-V1.2'
140
+ | 'allenai/OLMo-7B'
141
+ | 'allenai/OLMo-7B-Instruct'
142
+ | 'allenai/OLMo-7B-Twin-2T'
143
+ | 'codellama/CodeLlama-13b-Instruct-hf'
144
+ | 'codellama/CodeLlama-34b-Instruct-hf'
145
+ | 'codellama/CodeLlama-70b-Instruct-hf'
146
+ | 'codellama/CodeLlama-7b-Instruct-hf'
147
+ | 'cognitivecomputations/dolphin-2.5-mixtral-8x7b'
148
+ | 'databricks/dbrx-instruct'
149
+ | 'deepseek-ai/deepseek-coder-33b-instruct'
150
+ | 'deepseek-ai/deepseek-llm-67b-chat'
85
151
  | 'garage-bAInd/Platypus2-70B-instruct'
86
152
  | 'google/gemma-2-27b-it'
87
153
  | 'google/gemma-2-9b-it'
@@ -101,6 +167,7 @@ export type TogetherChatModels =
101
167
  | 'meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo'
102
168
  | 'meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo'
103
169
  | 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
170
+ | 'meta-llama/Llama-3.3-70B-Instruct-Turbo'
104
171
  | 'mistralai/Mistral-7B-Instruct-v0.1'
105
172
  | 'mistralai/Mistral-7B-Instruct-v0.2'
106
173
  | 'mistralai/Mistral-7B-Instruct-v0.3'
@@ -127,10 +194,21 @@ export type OctoChatModels =
127
194
  | 'mixtral-8x7b-instruct'
128
195
  | 'wizardlm-2-8x22bllamaguard-2-7b';
129
196
 
130
- export type XAIChatModels = 'grok-2' | 'grok-2-mini' | 'grok-2-mini-public' | 'grok-2-public';
197
+ export type XAIChatModels =
198
+ | 'grok-3'
199
+ | 'grok-3-fast'
200
+ | 'grok-3-mini'
201
+ | 'grok-3-mini-fast'
202
+ | 'grok-2-vision-1212'
203
+ | 'grok-2-image-1212'
204
+ | 'grok-2-1212';
131
205
 
132
206
  export type MetaChatModels =
133
207
  | 'Llama-4-Scout-17B-16E-Instruct-FP8'
134
208
  | 'Llama-4-Maverick-17B-128E-Instruct-FP8'
135
209
  | 'Llama-3.3-70B-Instruct'
136
210
  | 'Llama-3.3-8B-Instruct';
211
+
212
+ export function supportsReasoningEffort(model: ChatModels | string): boolean {
213
+ return model === 'gpt-5' || model === 'gpt-5-mini' || model === 'gpt-5-nano';
214
+ }
@@ -84,7 +84,6 @@ export interface Tool {
84
84
  type: 'object';
85
85
  properties: {
86
86
  [prop: string]: {
87
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
88
87
  [prop: string]: any;
89
88
  };
90
89
  };
@@ -97,6 +96,8 @@ export type TurnDetectionType = {
97
96
  threshold?: number; // 0.0 to 1.0, default: 0.5
98
97
  prefix_padding_ms?: number; // default: 300
99
98
  silence_duration_ms?: number; // default: 200
99
+ create_response?: boolean; // default: true
100
+ interrupt_response?: boolean; // default: true
100
101
  };
101
102
 
102
103
  export type InputAudioTranscription = {
@@ -228,6 +229,7 @@ export interface ModelUsage {
228
229
  cached_tokens_details: {
229
230
  text_tokens: number;
230
231
  audio_tokens: number;
232
+ image_tokens: number;
231
233
  };
232
234
  };
233
235
  output_token_details: {
@@ -243,6 +245,7 @@ export interface ResponseResource {
243
245
  status_details: ResponseStatusDetails;
244
246
  output: ItemResource[];
245
247
  usage?: ModelUsage;
248
+ metadata?: Record<string, string>;
246
249
  }
247
250
 
248
251
  // Client Events
@@ -254,6 +257,7 @@ interface BaseClientEvent {
254
257
  export interface SessionUpdateEvent extends BaseClientEvent {
255
258
  type: 'session.update';
256
259
  session: Partial<{
260
+ model: Model;
257
261
  modalities: ['text', 'audio'] | ['text'];
258
262
  instructions: string;
259
263
  voice: Voice;
@@ -265,6 +269,7 @@ export interface SessionUpdateEvent extends BaseClientEvent {
265
269
  tool_choice: ToolChoice;
266
270
  temperature: number;
267
271
  max_response_output_tokens?: number | 'inf';
272
+ speed?: number;
268
273
  }>;
269
274
  }
270
275
 
@@ -282,24 +287,28 @@ export interface InputAudioBufferClearEvent extends BaseClientEvent {
282
287
  }
283
288
 
284
289
  export interface UserItemCreate {
290
+ id: string;
285
291
  type: 'message';
286
292
  role: 'user';
287
293
  content: (InputTextContent | InputAudioContent)[];
288
294
  }
289
295
 
290
296
  export interface AssistantItemCreate {
297
+ id: string;
291
298
  type: 'message';
292
299
  role: 'assistant';
293
300
  content: TextContent[];
294
301
  }
295
302
 
296
303
  export interface SystemItemCreate {
304
+ id: string;
297
305
  type: 'message';
298
306
  role: 'system';
299
307
  content: InputTextContent[];
300
308
  }
301
309
 
302
310
  export interface FunctionCallOutputItemCreate {
311
+ id: string;
303
312
  type: 'function_call_output';
304
313
  call_id: string;
305
314
  output: string;
@@ -340,6 +349,7 @@ export interface ResponseCreateEvent extends BaseClientEvent {
340
349
  tool_choice: ToolChoice;
341
350
  temperature: number;
342
351
  max_output_tokens: number | 'inf';
352
+ metadata?: Record<string, string>;
343
353
  }>;
344
354
  }
345
355
 
@@ -412,6 +422,7 @@ export interface InputAudioBufferSpeechStoppedEvent extends BaseServerEvent {
412
422
 
413
423
  export interface ConversationItemCreatedEvent extends BaseServerEvent {
414
424
  type: 'conversation.item.created';
425
+ previous_item_id: string;
415
426
  item: ItemResource;
416
427
  }
417
428
 
@@ -482,6 +493,7 @@ export interface ResponseContentPartAddedEvent extends BaseServerEvent {
482
493
  export interface ResponseContentPartDoneEvent extends BaseServerEvent {
483
494
  type: 'response.content_part.done';
484
495
  response_id: string;
496
+ item_id: string;
485
497
  output_index: number;
486
498
  content_index: number;
487
499
  part: ContentPart;
@@ -506,6 +518,7 @@ export interface ResponseTextDoneEvent extends BaseServerEvent {
506
518
  export interface ResponseAudioTranscriptDeltaEvent extends BaseServerEvent {
507
519
  type: 'response.audio_transcript.delta';
508
520
  response_id: string;
521
+ item_id: string;
509
522
  output_index: number;
510
523
  content_index: number;
511
524
  delta: string;
@@ -522,6 +535,7 @@ export interface ResponseAudioTranscriptDoneEvent extends BaseServerEvent {
522
535
  export interface ResponseAudioDeltaEvent extends BaseServerEvent {
523
536
  type: 'response.audio.delta';
524
537
  response_id: string;
538
+ item_id: string;
525
539
  output_index: number;
526
540
  content_index: number;
527
541
  delta: AudioBase64Bytes;