@livekit/agents-plugin-openai 0.3.5 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/llm.ts ADDED
@@ -0,0 +1,670 @@
1
+ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import { llm, log } from '@livekit/agents';
5
+ import { randomUUID } from 'node:crypto';
6
+ import { AzureOpenAI, OpenAI } from 'openai';
7
+ import sharp from 'sharp';
8
+ import type {
9
+ CerebrasChatModels,
10
+ ChatModels,
11
+ DeepSeekChatModels,
12
+ GroqChatModels,
13
+ OctoChatModels,
14
+ PerplexityChatModels,
15
+ TelnyxChatModels,
16
+ TogetherChatModels,
17
+ XAIChatModels,
18
+ } from './models.js';
19
+
20
+ export interface LLMOptions {
21
+ model: string | ChatModels;
22
+ apiKey?: string;
23
+ baseURL?: string;
24
+ user?: string;
25
+ temperature?: number;
26
+ client?: OpenAI;
27
+ }
28
+
29
+ const defaultLLMOptions: LLMOptions = {
30
+ model: 'gpt-4o',
31
+ apiKey: process.env.OPENAI_API_KEY,
32
+ };
33
+
34
+ const defaultAzureLLMOptions: LLMOptions = {
35
+ model: 'gpt-4o',
36
+ apiKey: process.env.AZURE_API_KEY,
37
+ };
38
+
39
+ export class LLM extends llm.LLM {
40
+ #opts: LLMOptions;
41
+ #client: OpenAI;
42
+
43
+ /**
44
+ * Create a new instance of OpenAI LLM.
45
+ *
46
+ * @remarks
47
+ * `apiKey` must be set to your OpenAI API key, either using the argument or by setting the
48
+ * `OPENAI_API_KEY` environmental variable.
49
+ */
50
+ constructor(opts: Partial<LLMOptions> = defaultLLMOptions) {
51
+ super();
52
+
53
+ this.#opts = { ...defaultLLMOptions, ...opts };
54
+ if (this.#opts.apiKey === undefined) {
55
+ throw new Error('OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY');
56
+ }
57
+
58
+ this.#client =
59
+ this.#opts.client ||
60
+ new OpenAI({
61
+ baseURL: opts.baseURL,
62
+ apiKey: opts.apiKey,
63
+ });
64
+ }
65
+
66
+ /**
67
+ * Create a new instance of OpenAI LLM with Azure.
68
+ *
69
+ * @remarks
70
+ * This automatically infers the following arguments from their corresponding environment variables if they are not provided:
71
+ * - `apiKey` from `AZURE_OPENAI_API_KEY`
72
+ * - `organization` from `OPENAI_ORG_ID`
73
+ * - `project` from `OPENAI_PROJECT_ID`
74
+ * - `azureAdToken` from `AZURE_OPENAI_AD_TOKEN`
75
+ * - `apiVersion` from `OPENAI_API_VERSION`
76
+ * - `azureEndpoint` from `AZURE_OPENAI_ENDPOINT`
77
+ */
78
+ static withAzure(
79
+ opts: {
80
+ model: string | ChatModels;
81
+ azureEndpoint?: string;
82
+ azureDeployment?: string;
83
+ apiVersion?: string;
84
+ apiKey?: string;
85
+ azureAdToken?: string;
86
+ azureAdTokenProvider?: () => Promise<string>;
87
+ organization?: string;
88
+ project?: string;
89
+ baseURL?: string;
90
+ user?: string;
91
+ temperature?: number;
92
+ } = defaultAzureLLMOptions,
93
+ ): LLM {
94
+ opts = { ...defaultLLMOptions, ...opts };
95
+ if (opts.apiKey === undefined) {
96
+ throw new Error('Azure API key is required, whether as an argument or as $AZURE_API_KEY');
97
+ }
98
+
99
+ return new LLM({
100
+ temperature: opts.temperature,
101
+ user: opts.user,
102
+ client: new AzureOpenAI(opts),
103
+ });
104
+ }
105
+
106
+ /**
107
+ * Create a new instance of Cerebras LLM.
108
+ *
109
+ * @remarks
110
+ * `apiKey` must be set to your Cerebras API key, either using the argument or by setting the
111
+ * `CEREBRAS_API_KEY` environmental variable.
112
+ */
113
+ static withCerebras(
114
+ opts: Partial<{
115
+ model: string | CerebrasChatModels;
116
+ apiKey?: string;
117
+ baseURL?: string;
118
+ user?: string;
119
+ temperature?: number;
120
+ client: OpenAI;
121
+ }> = {},
122
+ ): LLM {
123
+ opts.apiKey = opts.apiKey || process.env.CEREBRAS_API_KEY;
124
+ if (opts.apiKey === undefined) {
125
+ throw new Error(
126
+ 'Cerebras API key is required, whether as an argument or as $CEREBRAS_API_KEY',
127
+ );
128
+ }
129
+
130
+ return new LLM({
131
+ model: 'llama3.1-8b',
132
+ baseURL: 'https://api.cerebras.ai/v1',
133
+ ...opts,
134
+ });
135
+ }
136
+
137
+ /**
138
+ * Create a new instance of Fireworks LLM.
139
+ *
140
+ * @remarks
141
+ * `apiKey` must be set to your Fireworks API key, either using the argument or by setting the
142
+ * `FIREWORKS_API_KEY` environmental variable.
143
+ */
144
+ static withFireworks(opts: Partial<LLMOptions> = {}): LLM {
145
+ opts.apiKey = opts.apiKey || process.env.FIREWORKS_API_KEY;
146
+ if (opts.apiKey === undefined) {
147
+ throw new Error(
148
+ 'Fireworks API key is required, whether as an argument or as $FIREWORKS_API_KEY',
149
+ );
150
+ }
151
+
152
+ return new LLM({
153
+ model: 'accounts/fireworks/models/llama-v3p1-70b-instruct',
154
+ baseURL: 'https://api.fireworks.ai/inference/v1',
155
+ ...opts,
156
+ });
157
+ }
158
+
159
+ /**
160
+ * Create a new instance of xAI LLM.
161
+ *
162
+ * @remarks
163
+ * `apiKey` must be set to your xAI API key, either using the argument or by setting the
164
+ * `XAI_API_KEY` environmental variable.
165
+ */
166
+ static withXAI(
167
+ opts: Partial<{
168
+ model: string | XAIChatModels;
169
+ apiKey?: string;
170
+ baseURL?: string;
171
+ user?: string;
172
+ temperature?: number;
173
+ client: OpenAI;
174
+ }> = {},
175
+ ): LLM {
176
+ opts.apiKey = opts.apiKey || process.env.XAI_API_KEY;
177
+ if (opts.apiKey === undefined) {
178
+ throw new Error('xAI API key is required, whether as an argument or as $XAI_API_KEY');
179
+ }
180
+
181
+ return new LLM({
182
+ model: 'grok-2-public',
183
+ baseURL: 'https://api.x.ai/v1',
184
+ ...opts,
185
+ });
186
+ }
187
+
188
+ /**
189
+ * Create a new instance of Groq LLM.
190
+ *
191
+ * @remarks
192
+ * `apiKey` must be set to your Groq API key, either using the argument or by setting the
193
+ * `GROQ_API_KEY` environmental variable.
194
+ */
195
+ static withGroq(
196
+ opts: Partial<{
197
+ model: string | GroqChatModels;
198
+ apiKey?: string;
199
+ baseURL?: string;
200
+ user?: string;
201
+ temperature?: number;
202
+ client: OpenAI;
203
+ }> = {},
204
+ ): LLM {
205
+ opts.apiKey = opts.apiKey || process.env.GROQ_API_KEY;
206
+ if (opts.apiKey === undefined) {
207
+ throw new Error('Groq API key is required, whether as an argument or as $GROQ_API_KEY');
208
+ }
209
+
210
+ return new LLM({
211
+ model: 'llama3-8b-8192',
212
+ baseURL: 'https://api.groq.com/openai/v1',
213
+ ...opts,
214
+ });
215
+ }
216
+
217
+ /**
218
+ * Create a new instance of DeepSeek LLM.
219
+ *
220
+ * @remarks
221
+ * `apiKey` must be set to your DeepSeek API key, either using the argument or by setting the
222
+ * `DEEPSEEK_API_KEY` environmental variable.
223
+ */
224
+ static withDeepSeek(
225
+ opts: Partial<{
226
+ model: string | DeepSeekChatModels;
227
+ apiKey?: string;
228
+ baseURL?: string;
229
+ user?: string;
230
+ temperature?: number;
231
+ client: OpenAI;
232
+ }> = {},
233
+ ): LLM {
234
+ opts.apiKey = opts.apiKey || process.env.DEEPSEEK_API_KEY;
235
+ if (opts.apiKey === undefined) {
236
+ throw new Error(
237
+ 'DeepSeek API key is required, whether as an argument or as $DEEPSEEK_API_KEY',
238
+ );
239
+ }
240
+
241
+ return new LLM({
242
+ model: 'deepseek-chat',
243
+ baseURL: 'https://api.deepseek.com/v1',
244
+ ...opts,
245
+ });
246
+ }
247
+
248
+ /**
249
+ * Create a new instance of OctoAI LLM.
250
+ *
251
+ * @remarks
252
+ * `apiKey` must be set to your OctoAI API key, either using the argument or by setting the
253
+ * `OCTOAI_TOKEN` environmental variable.
254
+ */
255
+ static withOcto(
256
+ opts: Partial<{
257
+ model: string | OctoChatModels;
258
+ apiKey?: string;
259
+ baseURL?: string;
260
+ user?: string;
261
+ temperature?: number;
262
+ client: OpenAI;
263
+ }> = {},
264
+ ): LLM {
265
+ opts.apiKey = opts.apiKey || process.env.OCTOAI_TOKEN;
266
+ if (opts.apiKey === undefined) {
267
+ throw new Error('OctoAI API key is required, whether as an argument or as $OCTOAI_TOKEN');
268
+ }
269
+
270
+ return new LLM({
271
+ model: 'llama-2-13b-chat',
272
+ baseURL: 'https://text.octoai.run/v1',
273
+ ...opts,
274
+ });
275
+ }
276
+
277
+ /** Create a new instance of Ollama LLM. */
278
+ static withOllama(
279
+ opts: Partial<{
280
+ model: string;
281
+ baseURL?: string;
282
+ temperature?: number;
283
+ client: OpenAI;
284
+ }> = {},
285
+ ): LLM {
286
+ return new LLM({
287
+ model: 'llama-2-13b-chat',
288
+ baseURL: 'https://text.octoai.run/v1',
289
+ apiKey: 'ollama',
290
+ ...opts,
291
+ });
292
+ }
293
+
294
+ /**
295
+ * Create a new instance of PerplexityAI LLM.
296
+ *
297
+ * @remarks
298
+ * `apiKey` must be set to your PerplexityAI API key, either using the argument or by setting the
299
+ * `PERPLEXITY_API_KEY` environmental variable.
300
+ */
301
+ static withPerplexity(
302
+ opts: Partial<{
303
+ model: string | PerplexityChatModels;
304
+ apiKey?: string;
305
+ baseURL?: string;
306
+ user?: string;
307
+ temperature?: number;
308
+ client: OpenAI;
309
+ }> = {},
310
+ ): LLM {
311
+ opts.apiKey = opts.apiKey || process.env.PERPLEXITY_API_KEY;
312
+ if (opts.apiKey === undefined) {
313
+ throw new Error(
314
+ 'PerplexityAI API key is required, whether as an argument or as $PERPLEXITY_API_KEY',
315
+ );
316
+ }
317
+
318
+ return new LLM({
319
+ model: 'llama-3.1-sonar-small-128k-chat',
320
+ baseURL: 'https://api.perplexity.ai',
321
+ ...opts,
322
+ });
323
+ }
324
+
325
+ /**
326
+ * Create a new instance of TogetherAI LLM.
327
+ *
328
+ * @remarks
329
+ * `apiKey` must be set to your TogetherAI API key, either using the argument or by setting the
330
+ * `TOGETHER_API_KEY` environmental variable.
331
+ */
332
+ static withTogether(
333
+ opts: Partial<{
334
+ model: string | TogetherChatModels;
335
+ apiKey?: string;
336
+ baseURL?: string;
337
+ user?: string;
338
+ temperature?: number;
339
+ client: OpenAI;
340
+ }> = {},
341
+ ): LLM {
342
+ opts.apiKey = opts.apiKey || process.env.TOGETHER_API_KEY;
343
+ if (opts.apiKey === undefined) {
344
+ throw new Error(
345
+ 'TogetherAI API key is required, whether as an argument or as $TOGETHER_API_KEY',
346
+ );
347
+ }
348
+
349
+ return new LLM({
350
+ model: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo',
351
+ baseURL: 'https://api.together.xyz/v1',
352
+ ...opts,
353
+ });
354
+ }
355
+
356
+ /**
357
+ * Create a new instance of Telnyx LLM.
358
+ *
359
+ * @remarks
360
+ * `apiKey` must be set to your Telnyx API key, either using the argument or by setting the
361
+ * `TELNYX_API_KEY` environmental variable.
362
+ */
363
+ static withTelnyx(
364
+ opts: Partial<{
365
+ model: string | TelnyxChatModels;
366
+ apiKey?: string;
367
+ baseURL?: string;
368
+ user?: string;
369
+ temperature?: number;
370
+ client: OpenAI;
371
+ }> = {},
372
+ ): LLM {
373
+ opts.apiKey = opts.apiKey || process.env.TELNYX_API_KEY;
374
+ if (opts.apiKey === undefined) {
375
+ throw new Error('Telnyx API key is required, whether as an argument or as $TELNYX_API_KEY');
376
+ }
377
+
378
+ return new LLM({
379
+ model: 'meta-llama/Meta-Llama-3.1-70B-Instruct',
380
+ baseURL: 'https://api.telnyx.com/v2/ai',
381
+ ...opts,
382
+ });
383
+ }
384
+
385
+ chat({
386
+ chatCtx,
387
+ fncCtx,
388
+ temperature,
389
+ n,
390
+ parallelToolCalls,
391
+ }: {
392
+ chatCtx: llm.ChatContext;
393
+ fncCtx?: llm.FunctionContext | undefined;
394
+ temperature?: number | undefined;
395
+ n?: number | undefined;
396
+ parallelToolCalls?: boolean | undefined;
397
+ }): LLMStream {
398
+ temperature = temperature || this.#opts.temperature;
399
+
400
+ return new LLMStream(
401
+ this.#client,
402
+ chatCtx,
403
+ fncCtx,
404
+ this.#opts,
405
+ parallelToolCalls,
406
+ temperature,
407
+ n,
408
+ );
409
+ }
410
+ }
411
+
412
+ export class LLMStream extends llm.LLMStream {
413
+ #toolCallId?: string;
414
+ #fncName?: string;
415
+ #fncRawArguments?: string;
416
+ #client: OpenAI;
417
+ #logger = log();
418
+ #id = randomUUID();
419
+
420
+ constructor(
421
+ client: OpenAI,
422
+ chatCtx: llm.ChatContext,
423
+ fncCtx: llm.FunctionContext | undefined,
424
+ opts: LLMOptions,
425
+ parallelToolCalls?: boolean,
426
+ temperature?: number,
427
+ n?: number,
428
+ ) {
429
+ super(chatCtx, fncCtx);
430
+ this.#client = client;
431
+ this.#run(opts, n, parallelToolCalls, temperature);
432
+ }
433
+
434
+ async #run(opts: LLMOptions, n?: number, parallelToolCalls?: boolean, temperature?: number) {
435
+ const tools = this.fncCtx
436
+ ? Object.entries(this.fncCtx).map(([name, func]) => ({
437
+ type: 'function' as const,
438
+ function: {
439
+ name,
440
+ description: func.description,
441
+ parameters: llm.oaiParams(func.parameters),
442
+ },
443
+ }))
444
+ : undefined;
445
+
446
+ try {
447
+ const stream = await this.#client.chat.completions.create({
448
+ model: opts.model,
449
+ user: opts.user,
450
+ n,
451
+ messages: await Promise.all(
452
+ this.chatCtx.messages.map(async (m) => await buildMessage(m, this.#id)),
453
+ ),
454
+ temperature: temperature || opts.temperature,
455
+ stream_options: { include_usage: true },
456
+ stream: true,
457
+ tools,
458
+ parallel_tool_calls: this.fncCtx && parallelToolCalls,
459
+ });
460
+
461
+ for await (const chunk of stream) {
462
+ for (const choice of chunk.choices) {
463
+ const chatChunk = this.#parseChoice(chunk.id, choice);
464
+ if (chatChunk) {
465
+ this.queue.put(chatChunk);
466
+ }
467
+
468
+ if (chunk.usage) {
469
+ const usage = chunk.usage;
470
+ this.queue.put({
471
+ requestId: chunk.id,
472
+ choices: [],
473
+ usage: {
474
+ completionTokens: usage.completion_tokens,
475
+ promptTokens: usage.prompt_tokens,
476
+ totalTokens: usage.total_tokens,
477
+ },
478
+ });
479
+ }
480
+ }
481
+ }
482
+ } finally {
483
+ this.queue.close();
484
+ }
485
+ }
486
+
487
+ #parseChoice(id: string, choice: OpenAI.ChatCompletionChunk.Choice): llm.ChatChunk | undefined {
488
+ const delta = choice.delta;
489
+
490
+ if (delta.tool_calls) {
491
+ // check if we have functions to calls
492
+ for (const tool of delta.tool_calls) {
493
+ if (!tool.function) {
494
+ continue; // oai may add other tools in the future
495
+ }
496
+
497
+ if (tool.function.name) {
498
+ this.#toolCallId = tool.id;
499
+ this.#fncName = tool.function.name;
500
+ this.#fncRawArguments = tool.function.arguments || '';
501
+ } else if (tool.function.arguments) {
502
+ this.#fncRawArguments += tool.function.arguments;
503
+ }
504
+
505
+ if (this.#toolCallId && tool.id && tool.id !== this.#toolCallId) {
506
+ return this.#tryBuildFunction(id, choice);
507
+ }
508
+ }
509
+ }
510
+
511
+ if (
512
+ choice.finish_reason &&
513
+ ['tool_calls', 'stop'].includes(choice.finish_reason) &&
514
+ this.#toolCallId
515
+ ) {
516
+ // we're done with the tool calls, run the last one
517
+ return this.#tryBuildFunction(id, choice);
518
+ }
519
+
520
+ return {
521
+ requestId: id,
522
+ choices: [
523
+ {
524
+ delta: { content: delta.content || undefined, role: llm.ChatRole.ASSISTANT },
525
+ index: choice.index,
526
+ },
527
+ ],
528
+ };
529
+ }
530
+
531
+ #tryBuildFunction(
532
+ id: string,
533
+ choice: OpenAI.ChatCompletionChunk.Choice,
534
+ ): llm.ChatChunk | undefined {
535
+ if (!this.fncCtx) {
536
+ this.#logger.warn('oai stream tried to run function without function context');
537
+ return undefined;
538
+ }
539
+
540
+ if (!this.#toolCallId) {
541
+ this.#logger.warn('oai stream tried to run function but toolCallId is not set');
542
+ return undefined;
543
+ }
544
+
545
+ if (!this.#fncRawArguments || !this.#fncName) {
546
+ this.#logger.warn('oai stream tried to run function but rawArguments or fncName are not set');
547
+ return undefined;
548
+ }
549
+
550
+ const functionInfo = llm.oaiBuildFunctionInfo(
551
+ this.fncCtx,
552
+ this.#toolCallId,
553
+ this.#fncName,
554
+ this.#fncRawArguments,
555
+ );
556
+ this.#toolCallId = this.#fncName = this.#fncRawArguments = undefined;
557
+ this._functionCalls.push(functionInfo);
558
+
559
+ return {
560
+ requestId: id,
561
+ choices: [
562
+ {
563
+ delta: {
564
+ content: choice.delta.content || undefined,
565
+ role: llm.ChatRole.ASSISTANT,
566
+ toolCalls: this._functionCalls,
567
+ },
568
+ index: choice.index,
569
+ },
570
+ ],
571
+ };
572
+ }
573
+ }
574
+
575
+ const buildMessage = async (msg: llm.ChatMessage, cacheKey: any) => {
576
+ const oaiMsg: Partial<OpenAI.ChatCompletionMessageParam> = {};
577
+
578
+ switch (msg.role) {
579
+ case llm.ChatRole.SYSTEM:
580
+ oaiMsg.role = 'system';
581
+ break;
582
+ case llm.ChatRole.USER:
583
+ oaiMsg.role = 'user';
584
+ break;
585
+ case llm.ChatRole.ASSISTANT:
586
+ oaiMsg.role = 'assistant';
587
+ break;
588
+ case llm.ChatRole.TOOL:
589
+ oaiMsg.role = 'tool';
590
+ if (oaiMsg.role === 'tool') {
591
+ oaiMsg.tool_call_id = msg.toolCallId;
592
+ }
593
+ break;
594
+ }
595
+
596
+ if (typeof msg.content === 'string') {
597
+ oaiMsg.content = msg.content;
598
+ } else if (
599
+ ((c?: llm.ChatContent | llm.ChatContent[]): c is llm.ChatContent[] => {
600
+ return (c as llm.ChatContent[]).length !== undefined;
601
+ })(msg.content)
602
+ ) {
603
+ oaiMsg.content = (await Promise.all(
604
+ msg.content.map(async (c) => {
605
+ if (typeof c === 'string') {
606
+ return { type: 'text', text: c };
607
+ } else if (
608
+ // typescript type guard for determining ChatAudio vs ChatImage
609
+ ((c: llm.ChatAudio | llm.ChatImage): c is llm.ChatImage => {
610
+ return (c as llm.ChatImage).image !== undefined;
611
+ })(c)
612
+ ) {
613
+ return await buildImageContent(c, cacheKey);
614
+ } else {
615
+ throw new Error('ChatAudio is not supported');
616
+ }
617
+ }),
618
+ )) as OpenAI.ChatCompletionContentPart[];
619
+ }
620
+
621
+ // make sure to provide when function has been called inside the context
622
+ // (+ raw_arguments)
623
+ if (msg.toolCalls && oaiMsg.role === 'assistant') {
624
+ oaiMsg.tool_calls = Object.entries(msg.toolCalls).map(([name, func]) => ({
625
+ id: func.toolCallId,
626
+ type: 'function' as const,
627
+ function: {
628
+ name: name,
629
+ arguments: func.rawParams,
630
+ },
631
+ }));
632
+ }
633
+
634
+ return oaiMsg as OpenAI.ChatCompletionMessageParam;
635
+ };
636
+
637
+ const buildImageContent = async (image: llm.ChatImage, cacheKey: any) => {
638
+ if (typeof image.image === 'string') {
639
+ // image url
640
+ return {
641
+ type: 'image_url',
642
+ image_url: {
643
+ url: image.image,
644
+ detail: 'auto',
645
+ },
646
+ };
647
+ } else {
648
+ if (!image.cache[cacheKey]) {
649
+ // inside our internal implementation, we allow to put extra metadata to
650
+ // each ChatImage (avoid to reencode each time we do a chatcompletion request)
651
+ let encoded = sharp(image.image.data);
652
+
653
+ if (image.inferenceHeight && image.inferenceHeight) {
654
+ encoded = encoded.resize(image.inferenceWidth, image.inferenceHeight);
655
+ }
656
+
657
+ image.cache[cacheKey] = await encoded
658
+ .jpeg()
659
+ .toBuffer()
660
+ .then((buffer) => buffer.toString('utf-8'));
661
+ }
662
+
663
+ return {
664
+ type: 'image_url',
665
+ image_url: {
666
+ url: `data:image/jpeg;base64,${image.cache[cacheKey]}`,
667
+ },
668
+ };
669
+ }
670
+ };