@livekit/agents 1.0.6 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/dist/index.cjs +3 -0
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.cts +2 -1
  4. package/dist/index.d.ts +2 -1
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js +2 -0
  7. package/dist/index.js.map +1 -1
  8. package/dist/inference/api_protos.cjs +104 -0
  9. package/dist/inference/api_protos.cjs.map +1 -0
  10. package/dist/inference/api_protos.d.cts +222 -0
  11. package/dist/inference/api_protos.d.ts +222 -0
  12. package/dist/inference/api_protos.d.ts.map +1 -0
  13. package/dist/inference/api_protos.js +70 -0
  14. package/dist/inference/api_protos.js.map +1 -0
  15. package/dist/inference/index.cjs +56 -0
  16. package/dist/inference/index.cjs.map +1 -0
  17. package/dist/inference/index.d.cts +9 -0
  18. package/dist/inference/index.d.ts +9 -0
  19. package/dist/inference/index.d.ts.map +1 -0
  20. package/dist/inference/index.js +16 -0
  21. package/dist/inference/index.js.map +1 -0
  22. package/dist/inference/llm.cjs +315 -0
  23. package/dist/inference/llm.cjs.map +1 -0
  24. package/dist/inference/llm.d.cts +92 -0
  25. package/dist/inference/llm.d.ts +92 -0
  26. package/dist/inference/llm.d.ts.map +1 -0
  27. package/dist/inference/llm.js +286 -0
  28. package/dist/inference/llm.js.map +1 -0
  29. package/dist/inference/stt.cjs +305 -0
  30. package/dist/inference/stt.cjs.map +1 -0
  31. package/dist/inference/stt.d.cts +79 -0
  32. package/dist/inference/stt.d.ts +79 -0
  33. package/dist/inference/stt.d.ts.map +1 -0
  34. package/dist/inference/stt.js +284 -0
  35. package/dist/inference/stt.js.map +1 -0
  36. package/dist/inference/tts.cjs +317 -0
  37. package/dist/inference/tts.cjs.map +1 -0
  38. package/dist/inference/tts.d.cts +75 -0
  39. package/dist/inference/tts.d.ts +75 -0
  40. package/dist/inference/tts.d.ts.map +1 -0
  41. package/dist/inference/tts.js +299 -0
  42. package/dist/inference/tts.js.map +1 -0
  43. package/dist/inference/utils.cjs +76 -0
  44. package/dist/inference/utils.cjs.map +1 -0
  45. package/dist/inference/utils.d.cts +5 -0
  46. package/dist/inference/utils.d.ts +5 -0
  47. package/dist/inference/utils.d.ts.map +1 -0
  48. package/dist/inference/utils.js +51 -0
  49. package/dist/inference/utils.js.map +1 -0
  50. package/dist/tts/tts.cjs +1 -1
  51. package/dist/tts/tts.cjs.map +1 -1
  52. package/dist/tts/tts.js +1 -1
  53. package/dist/tts/tts.js.map +1 -1
  54. package/dist/utils.cjs +11 -0
  55. package/dist/utils.cjs.map +1 -1
  56. package/dist/utils.d.cts +1 -0
  57. package/dist/utils.d.ts +1 -0
  58. package/dist/utils.d.ts.map +1 -1
  59. package/dist/utils.js +10 -0
  60. package/dist/utils.js.map +1 -1
  61. package/dist/voice/agent.cjs +16 -3
  62. package/dist/voice/agent.cjs.map +1 -1
  63. package/dist/voice/agent.d.cts +4 -3
  64. package/dist/voice/agent.d.ts +4 -3
  65. package/dist/voice/agent.d.ts.map +1 -1
  66. package/dist/voice/agent.js +20 -3
  67. package/dist/voice/agent.js.map +1 -1
  68. package/dist/voice/agent_session.cjs +16 -3
  69. package/dist/voice/agent_session.cjs.map +1 -1
  70. package/dist/voice/agent_session.d.cts +4 -3
  71. package/dist/voice/agent_session.d.ts +4 -3
  72. package/dist/voice/agent_session.d.ts.map +1 -1
  73. package/dist/voice/agent_session.js +20 -3
  74. package/dist/voice/agent_session.js.map +1 -1
  75. package/dist/voice/room_io/_input.cjs.map +1 -1
  76. package/dist/voice/room_io/_input.d.ts.map +1 -1
  77. package/dist/voice/room_io/_input.js +1 -0
  78. package/dist/voice/room_io/_input.js.map +1 -1
  79. package/dist/worker.cjs.map +1 -1
  80. package/dist/worker.d.ts.map +1 -1
  81. package/dist/worker.js +1 -1
  82. package/dist/worker.js.map +1 -1
  83. package/package.json +3 -2
  84. package/src/index.ts +2 -1
  85. package/src/inference/api_protos.ts +82 -0
  86. package/src/inference/index.ts +12 -0
  87. package/src/inference/llm.ts +485 -0
  88. package/src/inference/stt.ts +414 -0
  89. package/src/inference/tts.ts +421 -0
  90. package/src/inference/utils.ts +66 -0
  91. package/src/tts/tts.ts +1 -1
  92. package/src/utils.ts +11 -0
  93. package/src/voice/agent.ts +30 -6
  94. package/src/voice/agent_session.ts +29 -6
  95. package/src/voice/room_io/_input.ts +1 -1
  96. package/src/worker.ts +2 -7
@@ -0,0 +1,485 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import OpenAI from 'openai';
5
+ import {
6
+ APIConnectionError,
7
+ APIStatusError,
8
+ APITimeoutError,
9
+ DEFAULT_API_CONNECT_OPTIONS,
10
+ toError,
11
+ } from '../index.js';
12
+ import * as llm from '../llm/index.js';
13
+ import type { APIConnectOptions } from '../types.js';
14
+ import { type AnyModels, createAccessToken } from './utils.js';
15
+
16
+ export type AzureModels =
17
+ // | "azure/gpt-5"
18
+ // | "azure/gpt-5-mini"
19
+ // | "azure/gpt-5-nano"
20
+ 'azure/gpt-4.1' | 'azure/gpt-4.1-mini' | 'azure/gpt-4.1-nano';
21
+ // | "azure/gpt-4o"
22
+ // | "azure/gpt-4o-mini"
23
+
24
+ // https://inference-docs.cerebras.ai/models/overview
25
+ export type CerebrasModels =
26
+ // production models
27
+ | 'cerebras/llama3.1-8b'
28
+ | 'cerebras/llama-3.3-70b'
29
+ | 'cerebras/llama-4-scout-17b-16e-instruct'
30
+ | 'cerebras/gpt-oss-120b'
31
+ | 'cerebras/qwen-3-32b'
32
+ // preview models
33
+ | 'cerebras/llama-4-maverick-17b-128e-instruct'
34
+ | 'cerebras/qwen-3-235b-a22b-instruct-2507';
35
+
36
+ // https://console.groq.com/docs/models
37
+ export type GroqModels =
38
+ // production models
39
+ | 'groq/llama-3.1-8b-instant'
40
+ | 'groq/llama-3.3-70b-versatile'
41
+ | 'groq/openai/gpt-oss-120b'
42
+ | 'groq/openai/gpt-oss-20b'
43
+ // preview models
44
+ | 'groq/meta-llama/llama-4-maverick-17b-128e-instruct'
45
+ | 'groq/meta-llama/llama-4-scout-17b-16e-instruct'
46
+ | 'groq/qwen/qwen3-32b';
47
+
48
+ // https://www.baseten.co/library/tag/llms
49
+ export type BasetenModels =
50
+ | 'baseten/deepseek-ai/DeepSeek-V3-0324'
51
+ | 'baseten/meta-llama/Llama-4-Scout-17B-16E-Instruct'
52
+ | 'baseten/meta-llama/Llama-4-Maverick-17B-128E-Instruct'
53
+ | 'baseten/moonshotai/Kimi-K2-Instruct'
54
+ | 'baseten/openai/gpt-oss-120b'
55
+ | 'baseten/Qwen/Qwen3-235B-A22B-Instruct-2507';
56
+
57
+ export interface AzureOptions {
58
+ top_p?: number;
59
+ }
60
+
61
+ export interface CerebrasOptions {
62
+ top_p?: number;
63
+ }
64
+
65
+ export interface GroqOptions {
66
+ top_p?: number;
67
+ }
68
+
69
+ export interface BasetenOptions {
70
+ top_p?: number;
71
+ }
72
+
73
+ export type LLMModels = AzureModels | CerebrasModels | GroqModels | BasetenModels | AnyModels;
74
+
75
+ export type LLMOptions<T extends LLMModels> = T extends AzureModels
76
+ ? AzureOptions
77
+ : T extends CerebrasModels
78
+ ? CerebrasOptions
79
+ : T extends GroqOptions
80
+ ? GroqOptions
81
+ : T extends BasetenOptions
82
+ ? BasetenOptions
83
+ : Record<string, unknown>;
84
+
85
+ export type Verbosity = 'low' | 'medium' | 'high';
86
+ const DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';
87
+
88
+ export interface InferenceLLMOptions<TModel extends LLMModels> {
89
+ model: TModel;
90
+ temperature?: number;
91
+ parallelToolCalls?: boolean;
92
+ toolChoice?: llm.ToolChoice;
93
+ maxCompletionTokens?: number;
94
+ baseURL: string;
95
+ apiKey: string;
96
+ apiSecret: string;
97
+ verbosity?: Verbosity;
98
+ extraKwargs: Record<string, any>; // eslint-disable-line @typescript-eslint/no-explicit-any
99
+ }
100
+
101
+ export interface GatewayOptions {
102
+ apiKey: string;
103
+ apiSecret: string;
104
+ }
105
+
106
+ export class LLM<TModel extends LLMModels> extends llm.LLM {
107
+ private client: OpenAI;
108
+ private opts: InferenceLLMOptions<TModel>;
109
+
110
+ constructor(opts: {
111
+ model: TModel;
112
+ temperature?: number;
113
+ parallelToolCalls?: boolean;
114
+ toolChoice?: llm.ToolChoice;
115
+ maxCompletionTokens?: number;
116
+ baseURL?: string;
117
+ apiKey?: string;
118
+ apiSecret?: string;
119
+ maxRetries?: number;
120
+ timeout?: number;
121
+ verbosity?: Verbosity;
122
+ extraKwargs?: LLMOptions<TModel>;
123
+ }) {
124
+ super();
125
+
126
+ const {
127
+ model,
128
+ temperature,
129
+ parallelToolCalls,
130
+ toolChoice,
131
+ maxCompletionTokens,
132
+ baseURL,
133
+ apiKey,
134
+ apiSecret,
135
+ maxRetries,
136
+ timeout,
137
+ verbosity,
138
+ extraKwargs,
139
+ } = opts;
140
+
141
+ const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;
142
+ const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;
143
+ if (!lkApiKey) {
144
+ throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');
145
+ }
146
+
147
+ const lkApiSecret =
148
+ apiSecret || process.env.LIVEKIT_INFERENCE_API_SECRET || process.env.LIVEKIT_API_SECRET;
149
+ if (!lkApiSecret) {
150
+ throw new Error('apiSecret is required: pass apiSecret or set LIVEKIT_API_SECRET');
151
+ }
152
+
153
+ this.opts = {
154
+ model,
155
+ temperature,
156
+ parallelToolCalls,
157
+ toolChoice,
158
+ verbosity,
159
+ maxCompletionTokens,
160
+ baseURL: lkBaseURL,
161
+ apiKey: lkApiKey,
162
+ apiSecret: lkApiSecret,
163
+ extraKwargs: extraKwargs || {},
164
+ };
165
+
166
+ this.client = new OpenAI({
167
+ baseURL: this.opts.baseURL,
168
+ maxRetries: maxRetries || 0,
169
+ timeout: timeout || 15000,
170
+ });
171
+ }
172
+
173
+ label(): string {
174
+ return 'inference.LLM';
175
+ }
176
+
177
+ get model(): string {
178
+ return this.opts.model;
179
+ }
180
+
181
+ chat({
182
+ chatCtx,
183
+ toolCtx,
184
+ connOptions = DEFAULT_API_CONNECT_OPTIONS,
185
+ parallelToolCalls,
186
+ toolChoice,
187
+ // TODO(AJS-270): Add response_format parameter support
188
+ extraKwargs,
189
+ }: {
190
+ chatCtx: llm.ChatContext;
191
+ toolCtx?: llm.ToolContext;
192
+ connOptions?: APIConnectOptions;
193
+ parallelToolCalls?: boolean;
194
+ toolChoice?: llm.ToolChoice;
195
+ // TODO(AJS-270): Add responseFormat parameter
196
+ extraKwargs?: Record<string, unknown>;
197
+ }): LLMStream<TModel> {
198
+ let extras: Record<string, unknown> = { ...(extraKwargs || {}) };
199
+
200
+ if (this.opts.maxCompletionTokens !== undefined) {
201
+ extras.max_completion_tokens = this.opts.maxCompletionTokens;
202
+ }
203
+ if (this.opts.temperature !== undefined) {
204
+ extras.temperature = this.opts.temperature;
205
+ }
206
+ if (this.opts.verbosity !== undefined) {
207
+ extras.verbosity = this.opts.verbosity;
208
+ }
209
+
210
+ parallelToolCalls =
211
+ parallelToolCalls !== undefined ? parallelToolCalls : this.opts.parallelToolCalls;
212
+ if (toolCtx && Object.keys(toolCtx).length > 0 && parallelToolCalls !== undefined) {
213
+ extras.parallel_tool_calls = parallelToolCalls;
214
+ }
215
+
216
+ toolChoice = toolChoice !== undefined ? toolChoice : this.opts.toolChoice;
217
+ if (toolChoice) {
218
+ extras.tool_choice = toolChoice;
219
+ }
220
+
221
+ // TODO(AJS-270): Add response_format support here
222
+
223
+ extras = { ...extras, ...this.opts.extraKwargs };
224
+
225
+ return new LLMStream(this, {
226
+ model: this.opts.model,
227
+ providerFmt: 'openai',
228
+ client: this.client,
229
+ chatCtx,
230
+ toolCtx,
231
+ connOptions,
232
+ extraKwargs: extras,
233
+ gatewayOptions: {
234
+ apiKey: this.opts.apiKey,
235
+ apiSecret: this.opts.apiSecret,
236
+ },
237
+ });
238
+ }
239
+ }
240
+
241
+ export class LLMStream<TModel extends LLMModels> extends llm.LLMStream {
242
+ private model: TModel;
243
+ private providerFmt: llm.ProviderFormat;
244
+ private client: OpenAI;
245
+ private extraKwargs: Record<string, unknown>;
246
+
247
+ private gatewayOptions?: GatewayOptions;
248
+ private toolCallId?: string;
249
+ private toolIndex?: number;
250
+ private fncName?: string;
251
+ private fncRawArguments?: string;
252
+
253
+ constructor(
254
+ llm: LLM<TModel>,
255
+ {
256
+ model,
257
+ providerFmt,
258
+ client,
259
+ chatCtx,
260
+ toolCtx,
261
+ gatewayOptions,
262
+ connOptions,
263
+ extraKwargs,
264
+ }: {
265
+ model: TModel;
266
+ providerFmt: llm.ProviderFormat;
267
+ client: OpenAI;
268
+ chatCtx: llm.ChatContext;
269
+ toolCtx?: llm.ToolContext;
270
+ gatewayOptions?: GatewayOptions;
271
+ connOptions: APIConnectOptions;
272
+ extraKwargs: Record<string, any>;
273
+ },
274
+ ) {
275
+ super(llm, { chatCtx, toolCtx, connOptions });
276
+ this.client = client;
277
+ this.gatewayOptions = gatewayOptions;
278
+ this.providerFmt = providerFmt;
279
+ this.extraKwargs = extraKwargs;
280
+ this.model = model;
281
+ }
282
+
283
+ protected async run(): Promise<void> {
284
+ // current function call that we're waiting for full completion (args are streamed)
285
+ // (defined inside the run method to make sure the state is reset for each run/attempt)
286
+ let retryable = true;
287
+ this.toolCallId = this.fncName = this.fncRawArguments = this.toolIndex = undefined;
288
+
289
+ try {
290
+ const messages = (await this.chatCtx.toProviderFormat(
291
+ this.providerFmt,
292
+ )) as OpenAI.ChatCompletionMessageParam[];
293
+
294
+ const tools = this.toolCtx
295
+ ? Object.entries(this.toolCtx).map(([name, func]) => ({
296
+ type: 'function' as const,
297
+ function: {
298
+ name,
299
+ description: func.description,
300
+ parameters: llm.toJsonSchema(
301
+ func.parameters,
302
+ ) as unknown as OpenAI.Chat.Completions.ChatCompletionTool['function']['parameters'],
303
+ },
304
+ }))
305
+ : undefined;
306
+
307
+ const requestExtras: Record<string, unknown> = { ...this.extraKwargs };
308
+ if (!tools) {
309
+ delete requestExtras.tool_choice;
310
+ }
311
+
312
+ // Dynamically set the access token for the LiveKit Agent Gateway API
313
+ if (this.gatewayOptions) {
314
+ this.client.apiKey = await createAccessToken(
315
+ this.gatewayOptions.apiKey,
316
+ this.gatewayOptions.apiSecret,
317
+ );
318
+ }
319
+
320
+ const stream = await this.client.chat.completions.create(
321
+ {
322
+ model: this.model,
323
+ messages,
324
+ tools,
325
+ stream: true,
326
+ stream_options: { include_usage: true },
327
+ ...requestExtras,
328
+ },
329
+ {
330
+ timeout: this.connOptions.timeoutMs,
331
+ },
332
+ );
333
+
334
+ for await (const chunk of stream) {
335
+ for (const choice of chunk.choices) {
336
+ if (this.abortController.signal.aborted) {
337
+ break;
338
+ }
339
+ const chatChunk = this.parseChoice(chunk.id, choice);
340
+ if (chatChunk) {
341
+ retryable = false;
342
+ this.queue.put(chatChunk);
343
+ }
344
+ }
345
+
346
+ if (chunk.usage) {
347
+ const usage = chunk.usage;
348
+ retryable = false;
349
+ this.queue.put({
350
+ id: chunk.id,
351
+ usage: {
352
+ completionTokens: usage.completion_tokens,
353
+ promptTokens: usage.prompt_tokens,
354
+ promptCachedTokens: usage.prompt_tokens_details?.cached_tokens || 0,
355
+ totalTokens: usage.total_tokens,
356
+ },
357
+ });
358
+ }
359
+ }
360
+ } catch (error) {
361
+ if (error instanceof OpenAI.APIConnectionTimeoutError) {
362
+ throw new APITimeoutError({ options: { retryable } });
363
+ } else if (error instanceof OpenAI.APIError) {
364
+ throw new APIStatusError({
365
+ message: error.message,
366
+ options: {
367
+ statusCode: error.status,
368
+ body: error.error,
369
+ requestId: error.request_id,
370
+ retryable,
371
+ },
372
+ });
373
+ } else {
374
+ throw new APIConnectionError({
375
+ message: toError(error).message,
376
+ options: { retryable },
377
+ });
378
+ }
379
+ } finally {
380
+ this.queue.close();
381
+ }
382
+ }
383
+
384
+ private parseChoice(
385
+ id: string,
386
+ choice: OpenAI.ChatCompletionChunk.Choice,
387
+ ): llm.ChatChunk | undefined {
388
+ const delta = choice.delta;
389
+
390
+ // https://github.com/livekit/agents/issues/688
391
+ // the delta can be None when using Azure OpenAI (content filtering)
392
+ if (delta === undefined) return undefined;
393
+
394
+ if (delta.tool_calls) {
395
+ // check if we have functions to calls
396
+ for (const tool of delta.tool_calls) {
397
+ if (!tool.function) {
398
+ continue; // oai may add other tools in the future
399
+ }
400
+
401
+ /**
402
+ * The way OpenAI streams tool calls is a bit tricky.
403
+ *
404
+ * For any new tool call, it first emits a delta tool call with id, and function name,
405
+ * the rest of the delta chunks will only stream the remaining arguments string,
406
+ * until a new tool call is started or the tool call is finished.
407
+ * See below for an example.
408
+ *
409
+ * Choice(delta=ChoiceDelta(content=None, function_call=None, refusal=None, role='assistant', tool_calls=None), finish_reason=None, index=0, logprobs=None)
410
+ * [ChoiceDeltaToolCall(index=0, id='call_LaVeHWUHpef9K1sd5UO8TtLg', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]
411
+ * [ChoiceDeltaToolCall(index=0, id=None, function=ChoiceDeltaToolCallFunction(arguments='{"location": "P', name=None), type=None)]
412
+ * [ChoiceDeltaToolCall(index=0, id=None, function=ChoiceDeltaToolCallFunction(arguments='aris}', name=None), type=None)]
413
+ * [ChoiceDeltaToolCall(index=1, id='call_ThU4OmMdQXnnVmpXGOCknXIB', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]
414
+ * [ChoiceDeltaToolCall(index=1, id=None, function=ChoiceDeltaToolCallFunction(arguments='{"location": "T', name=None), type=None)]
415
+ * [ChoiceDeltaToolCall(index=1, id=None, function=ChoiceDeltaToolCallFunction(arguments='okyo', name=None), type=None)]
416
+ * Choice(delta=ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=None), finish_reason='tool_calls', index=0, logprobs=None)
417
+ */
418
+ let callChunk: llm.ChatChunk | undefined;
419
+ // If we have a previous tool call and this is a new one, emit the previous
420
+ if (this.toolCallId && tool.id && tool.index !== this.toolIndex) {
421
+ callChunk = this.createRunningToolCallChunk(id, delta);
422
+ this.toolCallId = this.fncName = this.fncRawArguments = undefined;
423
+ }
424
+
425
+ // Start or continue building the current tool call
426
+ if (tool.function.name) {
427
+ this.toolIndex = tool.index;
428
+ this.toolCallId = tool.id;
429
+ this.fncName = tool.function.name;
430
+ this.fncRawArguments = tool.function.arguments || '';
431
+ } else if (tool.function.arguments) {
432
+ this.fncRawArguments = (this.fncRawArguments || '') + tool.function.arguments;
433
+ }
434
+
435
+ if (callChunk) {
436
+ return callChunk;
437
+ }
438
+ }
439
+ }
440
+
441
+ // If we're done with tool calls, emit the final one
442
+ if (
443
+ choice.finish_reason &&
444
+ ['tool_calls', 'stop'].includes(choice.finish_reason) &&
445
+ this.toolCallId !== undefined
446
+ ) {
447
+ const callChunk = this.createRunningToolCallChunk(id, delta);
448
+ this.toolCallId = this.fncName = this.fncRawArguments = undefined;
449
+ return callChunk;
450
+ }
451
+
452
+ // Regular content message
453
+ if (!delta.content) {
454
+ return undefined;
455
+ }
456
+
457
+ return {
458
+ id,
459
+ delta: {
460
+ role: 'assistant',
461
+ content: delta.content,
462
+ },
463
+ };
464
+ }
465
+
466
+ private createRunningToolCallChunk(
467
+ id: string,
468
+ delta: OpenAI.Chat.Completions.ChatCompletionChunk.Choice.Delta,
469
+ ): llm.ChatChunk {
470
+ return {
471
+ id,
472
+ delta: {
473
+ role: 'assistant',
474
+ content: delta.content || undefined,
475
+ toolCalls: [
476
+ llm.FunctionCall.create({
477
+ callId: this.toolCallId || '',
478
+ name: this.fncName || '',
479
+ args: this.fncRawArguments || '',
480
+ }),
481
+ ],
482
+ },
483
+ };
484
+ }
485
+ }