@livekit/agents 1.0.6 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. package/dist/index.cjs +3 -0
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.cts +2 -1
  4. package/dist/index.d.ts +2 -1
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js +2 -0
  7. package/dist/index.js.map +1 -1
  8. package/dist/inference/api_protos.cjs +104 -0
  9. package/dist/inference/api_protos.cjs.map +1 -0
  10. package/dist/inference/api_protos.d.cts +222 -0
  11. package/dist/inference/api_protos.d.ts +222 -0
  12. package/dist/inference/api_protos.d.ts.map +1 -0
  13. package/dist/inference/api_protos.js +70 -0
  14. package/dist/inference/api_protos.js.map +1 -0
  15. package/dist/inference/index.cjs +56 -0
  16. package/dist/inference/index.cjs.map +1 -0
  17. package/dist/inference/index.d.cts +8 -0
  18. package/dist/inference/index.d.ts +8 -0
  19. package/dist/inference/index.d.ts.map +1 -0
  20. package/dist/inference/index.js +23 -0
  21. package/dist/inference/index.js.map +1 -0
  22. package/dist/inference/llm.cjs +301 -0
  23. package/dist/inference/llm.cjs.map +1 -0
  24. package/dist/inference/llm.d.cts +107 -0
  25. package/dist/inference/llm.d.ts +107 -0
  26. package/dist/inference/llm.d.ts.map +1 -0
  27. package/dist/inference/llm.js +272 -0
  28. package/dist/inference/llm.js.map +1 -0
  29. package/dist/inference/stt.cjs +313 -0
  30. package/dist/inference/stt.cjs.map +1 -0
  31. package/dist/inference/stt.d.cts +87 -0
  32. package/dist/inference/stt.d.ts +87 -0
  33. package/dist/inference/stt.d.ts.map +1 -0
  34. package/dist/inference/stt.js +292 -0
  35. package/dist/inference/stt.js.map +1 -0
  36. package/dist/inference/tts.cjs +324 -0
  37. package/dist/inference/tts.cjs.map +1 -0
  38. package/dist/inference/tts.d.cts +77 -0
  39. package/dist/inference/tts.d.ts +77 -0
  40. package/dist/inference/tts.d.ts.map +1 -0
  41. package/dist/inference/tts.js +306 -0
  42. package/dist/inference/tts.js.map +1 -0
  43. package/dist/inference/utils.cjs +76 -0
  44. package/dist/inference/utils.cjs.map +1 -0
  45. package/dist/inference/utils.d.cts +5 -0
  46. package/dist/inference/utils.d.ts +5 -0
  47. package/dist/inference/utils.d.ts.map +1 -0
  48. package/dist/inference/utils.js +51 -0
  49. package/dist/inference/utils.js.map +1 -0
  50. package/dist/llm/remote_chat_context.cjs.map +1 -1
  51. package/dist/llm/remote_chat_context.d.cts +2 -0
  52. package/dist/llm/remote_chat_context.d.ts +2 -0
  53. package/dist/llm/remote_chat_context.d.ts.map +1 -1
  54. package/dist/llm/remote_chat_context.js.map +1 -1
  55. package/dist/tts/tts.cjs +1 -1
  56. package/dist/tts/tts.cjs.map +1 -1
  57. package/dist/tts/tts.js +1 -1
  58. package/dist/tts/tts.js.map +1 -1
  59. package/dist/utils.cjs +11 -0
  60. package/dist/utils.cjs.map +1 -1
  61. package/dist/utils.d.cts +1 -0
  62. package/dist/utils.d.ts +1 -0
  63. package/dist/utils.d.ts.map +1 -1
  64. package/dist/utils.js +10 -0
  65. package/dist/utils.js.map +1 -1
  66. package/dist/voice/agent.cjs +16 -3
  67. package/dist/voice/agent.cjs.map +1 -1
  68. package/dist/voice/agent.d.cts +5 -3
  69. package/dist/voice/agent.d.ts +5 -3
  70. package/dist/voice/agent.d.ts.map +1 -1
  71. package/dist/voice/agent.js +20 -3
  72. package/dist/voice/agent.js.map +1 -1
  73. package/dist/voice/agent_activity.cjs +4 -2
  74. package/dist/voice/agent_activity.cjs.map +1 -1
  75. package/dist/voice/agent_activity.d.ts.map +1 -1
  76. package/dist/voice/agent_activity.js +4 -2
  77. package/dist/voice/agent_activity.js.map +1 -1
  78. package/dist/voice/agent_session.cjs +16 -3
  79. package/dist/voice/agent_session.cjs.map +1 -1
  80. package/dist/voice/agent_session.d.cts +4 -3
  81. package/dist/voice/agent_session.d.ts +4 -3
  82. package/dist/voice/agent_session.d.ts.map +1 -1
  83. package/dist/voice/agent_session.js +20 -3
  84. package/dist/voice/agent_session.js.map +1 -1
  85. package/dist/voice/events.cjs +2 -0
  86. package/dist/voice/events.cjs.map +1 -1
  87. package/dist/voice/events.d.cts +4 -1
  88. package/dist/voice/events.d.ts +4 -1
  89. package/dist/voice/events.d.ts.map +1 -1
  90. package/dist/voice/events.js +2 -0
  91. package/dist/voice/events.js.map +1 -1
  92. package/dist/voice/generation.cjs.map +1 -1
  93. package/dist/voice/generation.d.cts +1 -0
  94. package/dist/voice/generation.d.ts +1 -0
  95. package/dist/voice/generation.d.ts.map +1 -1
  96. package/dist/voice/generation.js.map +1 -1
  97. package/dist/voice/room_io/_input.cjs.map +1 -1
  98. package/dist/voice/room_io/_input.d.ts.map +1 -1
  99. package/dist/voice/room_io/_input.js +1 -0
  100. package/dist/voice/room_io/_input.js.map +1 -1
  101. package/dist/voice/room_io/_output.cjs +1 -1
  102. package/dist/voice/room_io/_output.cjs.map +1 -1
  103. package/dist/voice/room_io/_output.d.cts +1 -0
  104. package/dist/voice/room_io/_output.d.ts +1 -0
  105. package/dist/voice/room_io/_output.d.ts.map +1 -1
  106. package/dist/voice/room_io/_output.js +1 -1
  107. package/dist/voice/room_io/_output.js.map +1 -1
  108. package/dist/voice/room_io/room_io.cjs +1 -1
  109. package/dist/voice/room_io/room_io.cjs.map +1 -1
  110. package/dist/voice/room_io/room_io.d.cts +20 -0
  111. package/dist/voice/room_io/room_io.d.ts +20 -0
  112. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  113. package/dist/voice/room_io/room_io.js +1 -1
  114. package/dist/voice/room_io/room_io.js.map +1 -1
  115. package/dist/voice/transcription/synchronizer.cjs +1 -1
  116. package/dist/voice/transcription/synchronizer.cjs.map +1 -1
  117. package/dist/voice/transcription/synchronizer.d.cts +1 -0
  118. package/dist/voice/transcription/synchronizer.d.ts +1 -0
  119. package/dist/voice/transcription/synchronizer.d.ts.map +1 -1
  120. package/dist/voice/transcription/synchronizer.js +1 -1
  121. package/dist/voice/transcription/synchronizer.js.map +1 -1
  122. package/dist/worker.cjs +3 -3
  123. package/dist/worker.cjs.map +1 -1
  124. package/dist/worker.d.cts +3 -0
  125. package/dist/worker.d.ts +3 -0
  126. package/dist/worker.d.ts.map +1 -1
  127. package/dist/worker.js +4 -4
  128. package/dist/worker.js.map +1 -1
  129. package/package.json +3 -2
  130. package/src/index.ts +2 -1
  131. package/src/inference/api_protos.ts +82 -0
  132. package/src/inference/index.ts +32 -0
  133. package/src/inference/llm.ts +464 -0
  134. package/src/inference/stt.ts +444 -0
  135. package/src/inference/tts.ts +432 -0
  136. package/src/inference/utils.ts +66 -0
  137. package/src/llm/remote_chat_context.ts +2 -2
  138. package/src/tts/tts.ts +1 -1
  139. package/src/utils.ts +11 -0
  140. package/src/voice/agent.ts +31 -7
  141. package/src/voice/agent_activity.ts +2 -0
  142. package/src/voice/agent_session.ts +30 -6
  143. package/src/voice/events.ts +6 -0
  144. package/src/voice/generation.ts +1 -1
  145. package/src/voice/room_io/_input.ts +1 -1
  146. package/src/voice/room_io/_output.ts +1 -1
  147. package/src/voice/room_io/room_io.ts +21 -2
  148. package/src/voice/transcription/synchronizer.ts +1 -1
  149. package/src/worker.ts +5 -10
package/src/index.ts CHANGED
@@ -10,6 +10,7 @@
10
10
  * @packageDocumentation
11
11
  */
12
12
  import * as cli from './cli.js';
13
+ import * as inference from './inference/index.js';
13
14
  import * as ipc from './ipc/index.js';
14
15
  import * as llm from './llm/index.js';
15
16
  import * as metrics from './metrics/index.js';
@@ -33,4 +34,4 @@ export * from './vad.js';
33
34
  export * from './version.js';
34
35
  export * from './worker.js';
35
36
 
36
- export { cli, ipc, llm, metrics, stream, stt, tokenize, tts, voice };
37
+ export { cli, inference, ipc, llm, metrics, stream, stt, tokenize, tts, voice };
@@ -0,0 +1,82 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import { z } from 'zod';
5
+
6
+ export const ttsSessionCreateEventSchema = z.object({
7
+ type: z.literal('session.create'),
8
+ sample_rate: z.string(),
9
+ encoding: z.string(),
10
+ model: z.string().optional(),
11
+ voice: z.string().optional(),
12
+ language: z.string().optional(),
13
+ extra: z.record(z.string(), z.unknown()),
14
+ transcript: z.string().optional(),
15
+ });
16
+
17
+ export const ttsInputTranscriptEventSchema = z.object({
18
+ type: z.literal('input_transcript'),
19
+ transcript: z.string(),
20
+ });
21
+
22
+ export const ttsSessionFlushEventSchema = z.object({
23
+ type: z.literal('session.flush'),
24
+ });
25
+
26
+ export const ttsSessionCloseEventSchema = z.object({
27
+ type: z.literal('session.close'),
28
+ });
29
+
30
+ export const ttsSessionCreatedEventSchema = z.object({
31
+ type: z.literal('session.created'),
32
+ session_id: z.string(),
33
+ });
34
+
35
+ export const ttsOutputAudioEventSchema = z.object({
36
+ type: z.literal('output_audio'),
37
+ audio: z.string(),
38
+ session_id: z.string(),
39
+ });
40
+
41
+ export const ttsDoneEventSchema = z.object({
42
+ type: z.literal('done'),
43
+ session_id: z.string(),
44
+ });
45
+
46
+ export const ttsSessionClosedEventSchema = z.object({
47
+ type: z.literal('session.closed'),
48
+ session_id: z.string(),
49
+ });
50
+
51
+ export const ttsErrorEventSchema = z.object({
52
+ type: z.literal('error'),
53
+ message: z.string(),
54
+ session_id: z.string(),
55
+ });
56
+
57
+ export const ttsClientEventSchema = z.discriminatedUnion('type', [
58
+ ttsSessionCreateEventSchema,
59
+ ttsInputTranscriptEventSchema,
60
+ ttsSessionFlushEventSchema,
61
+ ttsSessionCloseEventSchema,
62
+ ]);
63
+
64
+ export const ttsServerEventSchema = z.discriminatedUnion('type', [
65
+ ttsSessionCreatedEventSchema,
66
+ ttsOutputAudioEventSchema,
67
+ ttsDoneEventSchema,
68
+ ttsSessionClosedEventSchema,
69
+ ttsErrorEventSchema,
70
+ ]);
71
+
72
+ export type TtsSessionCreateEvent = z.infer<typeof ttsSessionCreateEventSchema>;
73
+ export type TtsInputTranscriptEvent = z.infer<typeof ttsInputTranscriptEventSchema>;
74
+ export type TtsSessionFlushEvent = z.infer<typeof ttsSessionFlushEventSchema>;
75
+ export type TtsSessionCloseEvent = z.infer<typeof ttsSessionCloseEventSchema>;
76
+ export type TtsSessionCreatedEvent = z.infer<typeof ttsSessionCreatedEventSchema>;
77
+ export type TtsOutputAudioEvent = z.infer<typeof ttsOutputAudioEventSchema>;
78
+ export type TtsDoneEvent = z.infer<typeof ttsDoneEventSchema>;
79
+ export type TtsSessionClosedEvent = z.infer<typeof ttsSessionClosedEventSchema>;
80
+ export type TtsErrorEvent = z.infer<typeof ttsErrorEventSchema>;
81
+ export type TtsClientEvent = z.infer<typeof ttsClientEventSchema>;
82
+ export type TtsServerEvent = z.infer<typeof ttsServerEventSchema>;
@@ -0,0 +1,32 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import * as llm from './llm.js';
5
+ import * as stt from './stt.js';
6
+ import * as tts from './tts.js';
7
+
8
+ export {
9
+ LLM,
10
+ LLMStream,
11
+ type ChatCompletionOptions,
12
+ type GatewayOptions,
13
+ type InferenceLLMOptions,
14
+ type LLMModels,
15
+ } from './llm.js';
16
+
17
+ export {
18
+ STT,
19
+ type STTLanguages,
20
+ type STTModels,
21
+ type ModelWithLanguage as STTModelString,
22
+ type STTOptions,
23
+ } from './stt.js';
24
+
25
+ export {
26
+ TTS,
27
+ type TTSModels,
28
+ type ModelWithVoice as TTSModelString,
29
+ type TTSOptions,
30
+ } from './tts.js';
31
+
32
+ export { llm, stt, tts };
@@ -0,0 +1,464 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import OpenAI from 'openai';
5
+ import {
6
+ APIConnectionError,
7
+ APIStatusError,
8
+ APITimeoutError,
9
+ DEFAULT_API_CONNECT_OPTIONS,
10
+ toError,
11
+ } from '../index.js';
12
+ import * as llm from '../llm/index.js';
13
+ import type { APIConnectOptions } from '../types.js';
14
+ import { type AnyString, createAccessToken } from './utils.js';
15
+
16
+ const DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';
17
+
18
+ export type OpenAIModels =
19
+ | 'openai/gpt-5'
20
+ | 'openai/gpt-5-mini'
21
+ | 'openai/gpt-5-nano'
22
+ | 'openai/gpt-4.1'
23
+ | 'openai/gpt-4.1-mini'
24
+ | 'openai/gpt-4.1-nano'
25
+ | 'openai/gpt-4o'
26
+ | 'openai/gpt-4o-mini'
27
+ | 'openai/gpt-oss-120b';
28
+
29
+ export type GoogleModels = 'google/gemini-2.0-flash-lite';
30
+
31
+ export type QwenModels = 'qwen/qwen3-235b-a22b-instruct';
32
+
33
+ export type KimiModels = 'moonshotai/kimi-k2-instruct';
34
+
35
+ export type DeepSeekModels = 'deepseek-ai/deepseek-v3';
36
+
37
+ type ChatCompletionPredictionContentParam = OpenAI.Chat.Completions.ChatCompletionPredictionContent;
38
+ type WebSearchOptions = OpenAI.Chat.Completions.ChatCompletionCreateParams.WebSearchOptions;
39
+ type ToolChoice = OpenAI.Chat.Completions.ChatCompletionCreateParams['tool_choice'];
40
+ type Verbosity = 'low' | 'medium' | 'high';
41
+
42
+ export interface ChatCompletionOptions extends Record<string, unknown> {
43
+ frequency_penalty?: number;
44
+ logit_bias?: Record<string, number>;
45
+ logprobs?: boolean;
46
+ max_completion_tokens?: number;
47
+ max_tokens?: number;
48
+ metadata?: Record<string, string>;
49
+ modalities?: Array<'text' | 'audio'>;
50
+ n?: number;
51
+ parallel_tool_calls?: boolean;
52
+ prediction?: ChatCompletionPredictionContentParam | null;
53
+ presence_penalty?: number;
54
+ prompt_cache_key?: string;
55
+ reasoning_effort?: 'minimal' | 'low' | 'medium' | 'high';
56
+ safety_identifier?: string;
57
+ seed?: number;
58
+ service_tier?: 'auto' | 'default' | 'flex' | 'scale' | 'priority';
59
+ stop?: string | string[];
60
+ store?: boolean;
61
+ temperature?: number;
62
+ top_logprobs?: number;
63
+ top_p?: number;
64
+ user?: string;
65
+ verbosity?: Verbosity;
66
+ web_search_options?: WebSearchOptions;
67
+
68
+ // livekit-typed arguments
69
+ tool_choice?: ToolChoice;
70
+ // TODO(brian): support response format
71
+ // response_format?: OpenAI.Chat.Completions.ChatCompletionCreateParams['response_format']
72
+ }
73
+
74
+ export type LLMModels =
75
+ | OpenAIModels
76
+ | GoogleModels
77
+ | QwenModels
78
+ | KimiModels
79
+ | DeepSeekModels
80
+ | AnyString;
81
+
82
+ export interface InferenceLLMOptions {
83
+ model: LLMModels;
84
+ provider?: string;
85
+ baseURL: string;
86
+ apiKey: string;
87
+ apiSecret: string;
88
+ modelOptions: ChatCompletionOptions;
89
+ }
90
+
91
+ export interface GatewayOptions {
92
+ apiKey: string;
93
+ apiSecret: string;
94
+ }
95
+
96
+ /**
97
+ * Livekit Cloud Inference LLM
98
+ */
99
+ export class LLM extends llm.LLM {
100
+ private client: OpenAI;
101
+ private opts: InferenceLLMOptions;
102
+
103
+ constructor(opts: {
104
+ model: LLMModels;
105
+ provider?: string;
106
+ baseURL?: string;
107
+ apiKey?: string;
108
+ apiSecret?: string;
109
+ modelOptions?: InferenceLLMOptions['modelOptions'];
110
+ }) {
111
+ super();
112
+
113
+ const { model, provider, baseURL, apiKey, apiSecret, modelOptions } = opts;
114
+
115
+ const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;
116
+ const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;
117
+ if (!lkApiKey) {
118
+ throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');
119
+ }
120
+
121
+ const lkApiSecret =
122
+ apiSecret || process.env.LIVEKIT_INFERENCE_API_SECRET || process.env.LIVEKIT_API_SECRET;
123
+ if (!lkApiSecret) {
124
+ throw new Error('apiSecret is required: pass apiSecret or set LIVEKIT_API_SECRET');
125
+ }
126
+
127
+ this.opts = {
128
+ model,
129
+ provider,
130
+ baseURL: lkBaseURL,
131
+ apiKey: lkApiKey,
132
+ apiSecret: lkApiSecret,
133
+ modelOptions: modelOptions || {},
134
+ };
135
+
136
+ this.client = new OpenAI({
137
+ baseURL: this.opts.baseURL,
138
+ apiKey: '', // leave a temporary empty string to avoid OpenAI complain about missing key
139
+ timeout: 15000,
140
+ });
141
+ }
142
+
143
+ label(): string {
144
+ return 'inference.LLM';
145
+ }
146
+
147
+ get model(): string {
148
+ return this.opts.model;
149
+ }
150
+
151
+ static fromModelString(modelString: string): LLM {
152
+ return new LLM({ model: modelString });
153
+ }
154
+
155
+ chat({
156
+ chatCtx,
157
+ toolCtx,
158
+ connOptions = DEFAULT_API_CONNECT_OPTIONS,
159
+ parallelToolCalls,
160
+ toolChoice,
161
+ // TODO(AJS-270): Add response_format parameter support
162
+ extraKwargs,
163
+ }: {
164
+ chatCtx: llm.ChatContext;
165
+ toolCtx?: llm.ToolContext;
166
+ connOptions?: APIConnectOptions;
167
+ parallelToolCalls?: boolean;
168
+ toolChoice?: llm.ToolChoice;
169
+ // TODO(AJS-270): Add responseFormat parameter
170
+ extraKwargs?: Record<string, unknown>;
171
+ }): LLMStream {
172
+ let modelOptions: Record<string, unknown> = { ...(extraKwargs || {}) };
173
+
174
+ parallelToolCalls =
175
+ parallelToolCalls !== undefined
176
+ ? parallelToolCalls
177
+ : this.opts.modelOptions.parallel_tool_calls;
178
+
179
+ if (toolCtx && Object.keys(toolCtx).length > 0 && parallelToolCalls !== undefined) {
180
+ modelOptions.parallel_tool_calls = parallelToolCalls;
181
+ }
182
+
183
+ toolChoice = toolChoice !== undefined ? toolChoice : this.opts.modelOptions.tool_choice;
184
+ if (toolChoice) {
185
+ modelOptions.tool_choice = toolChoice;
186
+ }
187
+
188
+ // TODO(AJS-270): Add response_format support here
189
+
190
+ modelOptions = { ...modelOptions, ...this.opts.modelOptions };
191
+
192
+ return new LLMStream(this, {
193
+ model: this.opts.model,
194
+ provider: this.opts.provider,
195
+ client: this.client,
196
+ chatCtx,
197
+ toolCtx,
198
+ connOptions,
199
+ modelOptions,
200
+ gatewayOptions: {
201
+ apiKey: this.opts.apiKey,
202
+ apiSecret: this.opts.apiSecret,
203
+ },
204
+ });
205
+ }
206
+ }
207
+
208
+ export class LLMStream extends llm.LLMStream {
209
+ private model: LLMModels;
210
+ private provider?: string;
211
+ private providerFmt: llm.ProviderFormat;
212
+ private client: OpenAI;
213
+ private modelOptions: Record<string, unknown>;
214
+
215
+ private gatewayOptions?: GatewayOptions;
216
+ private toolCallId?: string;
217
+ private toolIndex?: number;
218
+ private fncName?: string;
219
+ private fncRawArguments?: string;
220
+
221
+ constructor(
222
+ llm: LLM,
223
+ {
224
+ model,
225
+ provider,
226
+ client,
227
+ chatCtx,
228
+ toolCtx,
229
+ gatewayOptions,
230
+ connOptions,
231
+ modelOptions,
232
+ providerFmt,
233
+ }: {
234
+ model: LLMModels;
235
+ provider?: string;
236
+ client: OpenAI;
237
+ chatCtx: llm.ChatContext;
238
+ toolCtx?: llm.ToolContext;
239
+ gatewayOptions?: GatewayOptions;
240
+ connOptions: APIConnectOptions;
241
+ modelOptions: Record<string, any>;
242
+ providerFmt?: llm.ProviderFormat;
243
+ },
244
+ ) {
245
+ super(llm, { chatCtx, toolCtx, connOptions });
246
+ this.client = client;
247
+ this.gatewayOptions = gatewayOptions;
248
+ this.provider = provider;
249
+ this.providerFmt = providerFmt || 'openai';
250
+ this.modelOptions = modelOptions;
251
+ this.model = model;
252
+ }
253
+
254
+ protected async run(): Promise<void> {
255
+ // current function call that we're waiting for full completion (args are streamed)
256
+ // (defined inside the run method to make sure the state is reset for each run/attempt)
257
+ let retryable = true;
258
+ this.toolCallId = this.fncName = this.fncRawArguments = this.toolIndex = undefined;
259
+
260
+ try {
261
+ const messages = (await this.chatCtx.toProviderFormat(
262
+ this.providerFmt,
263
+ )) as OpenAI.ChatCompletionMessageParam[];
264
+
265
+ const tools = this.toolCtx
266
+ ? Object.entries(this.toolCtx).map(([name, func]) => ({
267
+ type: 'function' as const,
268
+ function: {
269
+ name,
270
+ description: func.description,
271
+ parameters: llm.toJsonSchema(
272
+ func.parameters,
273
+ ) as unknown as OpenAI.Chat.Completions.ChatCompletionTool['function']['parameters'],
274
+ },
275
+ }))
276
+ : undefined;
277
+
278
+ const requestOptions: Record<string, unknown> = { ...this.modelOptions };
279
+ if (!tools) {
280
+ delete requestOptions.tool_choice;
281
+ }
282
+
283
+ // Dynamically set the access token for the LiveKit Agent Gateway API
284
+ if (this.gatewayOptions) {
285
+ this.client.apiKey = await createAccessToken(
286
+ this.gatewayOptions.apiKey,
287
+ this.gatewayOptions.apiSecret,
288
+ );
289
+ }
290
+
291
+ if (this.provider) {
292
+ const extraHeaders = requestOptions.extra_headers
293
+ ? (requestOptions.extra_headers as Record<string, string>)
294
+ : {};
295
+ extraHeaders['X-LiveKit-Inference-Provider'] = this.provider;
296
+ requestOptions.extra_headers = extraHeaders;
297
+ }
298
+
299
+ const stream = await this.client.chat.completions.create(
300
+ {
301
+ model: this.model,
302
+ messages,
303
+ tools,
304
+ stream: true,
305
+ stream_options: { include_usage: true },
306
+ ...requestOptions,
307
+ },
308
+ {
309
+ timeout: this.connOptions.timeoutMs,
310
+ },
311
+ );
312
+
313
+ for await (const chunk of stream) {
314
+ for (const choice of chunk.choices) {
315
+ if (this.abortController.signal.aborted) {
316
+ break;
317
+ }
318
+ const chatChunk = this.parseChoice(chunk.id, choice);
319
+ if (chatChunk) {
320
+ retryable = false;
321
+ this.queue.put(chatChunk);
322
+ }
323
+ }
324
+
325
+ if (chunk.usage) {
326
+ const usage = chunk.usage;
327
+ retryable = false;
328
+ this.queue.put({
329
+ id: chunk.id,
330
+ usage: {
331
+ completionTokens: usage.completion_tokens,
332
+ promptTokens: usage.prompt_tokens,
333
+ promptCachedTokens: usage.prompt_tokens_details?.cached_tokens || 0,
334
+ totalTokens: usage.total_tokens,
335
+ },
336
+ });
337
+ }
338
+ }
339
+ } catch (error) {
340
+ if (error instanceof OpenAI.APIConnectionTimeoutError) {
341
+ throw new APITimeoutError({ options: { retryable } });
342
+ } else if (error instanceof OpenAI.APIError) {
343
+ throw new APIStatusError({
344
+ message: error.message,
345
+ options: {
346
+ statusCode: error.status,
347
+ body: error.error,
348
+ requestId: error.request_id,
349
+ retryable,
350
+ },
351
+ });
352
+ } else {
353
+ throw new APIConnectionError({
354
+ message: toError(error).message,
355
+ options: { retryable },
356
+ });
357
+ }
358
+ } finally {
359
+ this.queue.close();
360
+ }
361
+ }
362
+
363
+ private parseChoice(
364
+ id: string,
365
+ choice: OpenAI.ChatCompletionChunk.Choice,
366
+ ): llm.ChatChunk | undefined {
367
+ const delta = choice.delta;
368
+
369
+ // https://github.com/livekit/agents/issues/688
370
+ // the delta can be None when using Azure OpenAI (content filtering)
371
+ if (delta === undefined) return undefined;
372
+
373
+ if (delta.tool_calls) {
374
+ // check if we have functions to calls
375
+ for (const tool of delta.tool_calls) {
376
+ if (!tool.function) {
377
+ continue; // oai may add other tools in the future
378
+ }
379
+
380
+ /**
381
+ * The way OpenAI streams tool calls is a bit tricky.
382
+ *
383
+ * For any new tool call, it first emits a delta tool call with id, and function name,
384
+ * the rest of the delta chunks will only stream the remaining arguments string,
385
+ * until a new tool call is started or the tool call is finished.
386
+ * See below for an example.
387
+ *
388
+ * Choice(delta=ChoiceDelta(content=None, function_call=None, refusal=None, role='assistant', tool_calls=None), finish_reason=None, index=0, logprobs=None)
389
+ * [ChoiceDeltaToolCall(index=0, id='call_LaVeHWUHpef9K1sd5UO8TtLg', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]
390
+ * [ChoiceDeltaToolCall(index=0, id=None, function=ChoiceDeltaToolCallFunction(arguments='{"location": "P', name=None), type=None)]
391
+ * [ChoiceDeltaToolCall(index=0, id=None, function=ChoiceDeltaToolCallFunction(arguments='aris}', name=None), type=None)]
392
+ * [ChoiceDeltaToolCall(index=1, id='call_ThU4OmMdQXnnVmpXGOCknXIB', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]
393
+ * [ChoiceDeltaToolCall(index=1, id=None, function=ChoiceDeltaToolCallFunction(arguments='{"location": "T', name=None), type=None)]
394
+ * [ChoiceDeltaToolCall(index=1, id=None, function=ChoiceDeltaToolCallFunction(arguments='okyo', name=None), type=None)]
395
+ * Choice(delta=ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=None), finish_reason='tool_calls', index=0, logprobs=None)
396
+ */
397
+ let callChunk: llm.ChatChunk | undefined;
398
+ // If we have a previous tool call and this is a new one, emit the previous
399
+ if (this.toolCallId && tool.id && tool.index !== this.toolIndex) {
400
+ callChunk = this.createRunningToolCallChunk(id, delta);
401
+ this.toolCallId = this.fncName = this.fncRawArguments = undefined;
402
+ }
403
+
404
+ // Start or continue building the current tool call
405
+ if (tool.function.name) {
406
+ this.toolIndex = tool.index;
407
+ this.toolCallId = tool.id;
408
+ this.fncName = tool.function.name;
409
+ this.fncRawArguments = tool.function.arguments || '';
410
+ } else if (tool.function.arguments) {
411
+ this.fncRawArguments = (this.fncRawArguments || '') + tool.function.arguments;
412
+ }
413
+
414
+ if (callChunk) {
415
+ return callChunk;
416
+ }
417
+ }
418
+ }
419
+
420
+ // If we're done with tool calls, emit the final one
421
+ if (
422
+ choice.finish_reason &&
423
+ ['tool_calls', 'stop'].includes(choice.finish_reason) &&
424
+ this.toolCallId !== undefined
425
+ ) {
426
+ const callChunk = this.createRunningToolCallChunk(id, delta);
427
+ this.toolCallId = this.fncName = this.fncRawArguments = undefined;
428
+ return callChunk;
429
+ }
430
+
431
+ // Regular content message
432
+ if (!delta.content) {
433
+ return undefined;
434
+ }
435
+
436
+ return {
437
+ id,
438
+ delta: {
439
+ role: 'assistant',
440
+ content: delta.content,
441
+ },
442
+ };
443
+ }
444
+
445
+ private createRunningToolCallChunk(
446
+ id: string,
447
+ delta: OpenAI.Chat.Completions.ChatCompletionChunk.Choice.Delta,
448
+ ): llm.ChatChunk {
449
+ return {
450
+ id,
451
+ delta: {
452
+ role: 'assistant',
453
+ content: delta.content || undefined,
454
+ toolCalls: [
455
+ llm.FunctionCall.create({
456
+ callId: this.toolCallId || '',
457
+ name: this.fncName || '',
458
+ args: this.fncRawArguments || '',
459
+ }),
460
+ ],
461
+ },
462
+ };
463
+ }
464
+ }