@livekit/agents-plugin-openai 1.0.6 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/llm.ts CHANGED
@@ -2,14 +2,7 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import type { APIConnectOptions } from '@livekit/agents';
5
- import {
6
- APIConnectionError,
7
- APIStatusError,
8
- APITimeoutError,
9
- DEFAULT_API_CONNECT_OPTIONS,
10
- llm,
11
- toError,
12
- } from '@livekit/agents';
5
+ import { DEFAULT_API_CONNECT_OPTIONS, inference, llm } from '@livekit/agents';
13
6
  import { AzureOpenAI, OpenAI } from 'openai';
14
7
  import type {
15
8
  CerebrasChatModels,
@@ -491,230 +484,17 @@ export class LLM extends llm.LLM {
491
484
  extras.tool_choice = toolChoice;
492
485
  }
493
486
 
494
- return new LLMStream(this, {
495
- model: this.#opts.model,
487
+ return new LLMStream(this as unknown as inference.LLM<inference.AzureModels>, {
488
+ model: this.#opts.model as inference.AzureModels,
496
489
  providerFmt: this.#providerFmt,
497
490
  client: this.#client,
498
491
  chatCtx,
499
492
  toolCtx,
500
493
  connOptions,
501
494
  extraKwargs: extras,
495
+ gatewayOptions: undefined, // OpenAI plugin doesn't use gateway authentication
502
496
  });
503
497
  }
504
498
  }
505
499
 
506
- export class LLMStream extends llm.LLMStream {
507
- #toolCallId?: string;
508
- #fncName?: string;
509
- #fncRawArguments?: string;
510
- #toolIndex?: number;
511
- #client: OpenAI;
512
- #providerFmt: llm.ProviderFormat;
513
- #extraKwargs: Record<string, any>;
514
- private model: string | ChatModels;
515
-
516
- constructor(
517
- llm: LLM,
518
- {
519
- model,
520
- providerFmt,
521
- client,
522
- chatCtx,
523
- toolCtx,
524
- connOptions,
525
- extraKwargs,
526
- }: {
527
- model: string | ChatModels;
528
- providerFmt: llm.ProviderFormat;
529
- client: OpenAI;
530
- chatCtx: llm.ChatContext;
531
- toolCtx?: llm.ToolContext;
532
- connOptions: APIConnectOptions;
533
- extraKwargs: Record<string, any>;
534
- },
535
- ) {
536
- super(llm, { chatCtx, toolCtx, connOptions });
537
- this.#client = client;
538
- this.#providerFmt = providerFmt;
539
- this.#extraKwargs = extraKwargs;
540
- this.model = model;
541
- }
542
-
543
- protected async run(): Promise<void> {
544
- let retryable = true;
545
- try {
546
- const messages = (await this.chatCtx.toProviderFormat(
547
- this.#providerFmt,
548
- )) as OpenAI.ChatCompletionMessageParam[];
549
-
550
- const tools = this.toolCtx
551
- ? Object.entries(this.toolCtx).map(([name, func]) => ({
552
- type: 'function' as const,
553
- function: {
554
- name,
555
- description: func.description,
556
- parameters: llm.toJsonSchema(
557
- func.parameters,
558
- ) as unknown as OpenAI.Chat.Completions.ChatCompletionTool['function']['parameters'],
559
- },
560
- }))
561
- : undefined;
562
-
563
- const stream = await this.#client.chat.completions.create({
564
- model: this.model,
565
- messages,
566
- tools,
567
- stream: true,
568
- stream_options: { include_usage: true },
569
- ...this.#extraKwargs,
570
- });
571
-
572
- for await (const chunk of stream) {
573
- for (const choice of chunk.choices) {
574
- if (this.abortController.signal.aborted) {
575
- break;
576
- }
577
- const chatChunk = this.#parseChoice(chunk.id, choice);
578
- if (chatChunk) {
579
- retryable = false;
580
- this.queue.put(chatChunk);
581
- }
582
- }
583
-
584
- if (chunk.usage) {
585
- const usage = chunk.usage;
586
- retryable = false;
587
- this.queue.put({
588
- id: chunk.id,
589
- usage: {
590
- completionTokens: usage.completion_tokens,
591
- promptTokens: usage.prompt_tokens,
592
- promptCachedTokens: usage.prompt_tokens_details?.cached_tokens || 0,
593
- totalTokens: usage.total_tokens,
594
- },
595
- });
596
- }
597
- }
598
- } catch (error) {
599
- if (error instanceof OpenAI.APIConnectionTimeoutError) {
600
- throw new APITimeoutError({ options: { retryable } });
601
- } else if (error instanceof OpenAI.APIError) {
602
- throw new APIStatusError({
603
- message: error.message,
604
- options: {
605
- statusCode: error.status,
606
- body: error.error,
607
- requestId: error.request_id,
608
- retryable,
609
- },
610
- });
611
- } else {
612
- throw new APIConnectionError({
613
- message: toError(error).message,
614
- options: { retryable },
615
- });
616
- }
617
- } finally {
618
- this.queue.close();
619
- }
620
- }
621
-
622
- #parseChoice(id: string, choice: OpenAI.ChatCompletionChunk.Choice): llm.ChatChunk | undefined {
623
- const delta = choice.delta;
624
-
625
- // https://github.com/livekit/agents/issues/688
626
- // the delta can be None when using Azure OpenAI (content filtering)
627
- if (delta === undefined) return undefined;
628
-
629
- if (delta.tool_calls) {
630
- // check if we have functions to calls
631
- for (const tool of delta.tool_calls) {
632
- if (!tool.function) {
633
- continue; // oai may add other tools in the future
634
- }
635
-
636
- /**
637
- * The way OpenAI streams tool calls is a bit tricky.
638
- *
639
- * For any new tool call, it first emits a delta tool call with id, and function name,
640
- * the rest of the delta chunks will only stream the remaining arguments string,
641
- * until a new tool call is started or the tool call is finished.
642
- * See below for an example.
643
- *
644
- * Choice(delta=ChoiceDelta(content=None, function_call=None, refusal=None, role='assistant', tool_calls=None), finish_reason=None, index=0, logprobs=None)
645
- * [ChoiceDeltaToolCall(index=0, id='call_LaVeHWUHpef9K1sd5UO8TtLg', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]
646
- * [ChoiceDeltaToolCall(index=0, id=None, function=ChoiceDeltaToolCallFunction(arguments='{"location": "P', name=None), type=None)]
647
- * [ChoiceDeltaToolCall(index=0, id=None, function=ChoiceDeltaToolCallFunction(arguments='aris}', name=None), type=None)]
648
- * [ChoiceDeltaToolCall(index=1, id='call_ThU4OmMdQXnnVmpXGOCknXIB', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]
649
- * [ChoiceDeltaToolCall(index=1, id=None, function=ChoiceDeltaToolCallFunction(arguments='{"location": "T', name=None), type=None)]
650
- * [ChoiceDeltaToolCall(index=1, id=None, function=ChoiceDeltaToolCallFunction(arguments='okyo', name=None), type=None)]
651
- * Choice(delta=ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=None), finish_reason='tool_calls', index=0, logprobs=None)
652
- */
653
- let callChunk: llm.ChatChunk | undefined;
654
- // If we have a previous tool call and this is a new one, emit the previous
655
- if (this.#toolCallId && tool.id && tool.index !== this.#toolIndex) {
656
- callChunk = this.#createRunningToolCallChunk(id, delta);
657
- this.#toolCallId = this.#fncName = this.#fncRawArguments = undefined;
658
- }
659
-
660
- // Start or continue building the current tool call
661
- if (tool.function.name) {
662
- this.#toolIndex = tool.index;
663
- this.#toolCallId = tool.id;
664
- this.#fncName = tool.function.name;
665
- this.#fncRawArguments = tool.function.arguments || '';
666
- } else if (tool.function.arguments) {
667
- this.#fncRawArguments = (this.#fncRawArguments || '') + tool.function.arguments;
668
- }
669
-
670
- if (callChunk) {
671
- return callChunk;
672
- }
673
- }
674
- }
675
-
676
- // If we're done with tool calls, emit the final one
677
- if (
678
- choice.finish_reason &&
679
- ['tool_calls', 'stop'].includes(choice.finish_reason) &&
680
- this.#toolCallId !== undefined
681
- ) {
682
- const callChunk = this.#createRunningToolCallChunk(id, delta);
683
- this.#toolCallId = this.#fncName = this.#fncRawArguments = undefined;
684
- return callChunk;
685
- }
686
-
687
- // Regular content message
688
- if (!delta.content) {
689
- return undefined;
690
- }
691
-
692
- return {
693
- id,
694
- delta: {
695
- role: 'assistant',
696
- content: delta.content,
697
- },
698
- };
699
- }
700
-
701
- #createRunningToolCallChunk(
702
- id: string,
703
- delta: OpenAI.Chat.Completions.ChatCompletionChunk.Choice.Delta,
704
- ): llm.ChatChunk {
705
- return {
706
- id,
707
- delta: {
708
- role: 'assistant',
709
- content: delta.content || undefined,
710
- toolCalls: [
711
- llm.FunctionCall.create({
712
- callId: this.#toolCallId!,
713
- name: this.#fncName || '',
714
- args: this.#fncRawArguments || '',
715
- }),
716
- ],
717
- },
718
- };
719
- }
720
- }
500
+ export class LLMStream extends inference.LLMStream<inference.AzureModels> {}
@@ -25,6 +25,9 @@ import { AudioFrame, combineAudioFrames } from '@livekit/rtc-node';
25
25
  import { type MessageEvent, WebSocket } from 'ws';
26
26
  import * as api_proto from './api_proto.js';
27
27
 
28
+ // if LK_OPENAI_DEBUG convert it to a number, otherwise set it to 0
29
+ const lkOaiDebug = process.env.LK_OPENAI_DEBUG ? Number(process.env.LK_OPENAI_DEBUG) : 0;
30
+
28
31
  const SAMPLE_RATE = 24000;
29
32
  const NUM_CHANNELS = 1;
30
33
  const BASE_URL = 'https://api.openai.com/v1';
@@ -640,11 +643,8 @@ export class RealtimeSession extends llm.RealtimeSession {
640
643
  } as api_proto.ConversationItemTruncateEvent);
641
644
  }
642
645
 
643
- /// Truncates the data field of the event to the specified maxLength to avoid overwhelming logs
644
- /// with large amounts of base64 audio data.
645
- #loggableEvent(
646
+ private loggableEvent(
646
647
  event: api_proto.ClientEvent | api_proto.ServerEvent,
647
- maxLength: number = 30,
648
648
  ): Record<string, unknown> {
649
649
  const untypedEvent: Record<string, unknown> = {};
650
650
  for (const [key, value] of Object.entries(event)) {
@@ -654,18 +654,14 @@ export class RealtimeSession extends llm.RealtimeSession {
654
654
  }
655
655
 
656
656
  if (untypedEvent.audio && typeof untypedEvent.audio === 'string') {
657
- const truncatedData =
658
- untypedEvent.audio.slice(0, maxLength) + (untypedEvent.audio.length > maxLength ? '…' : '');
659
- return { ...untypedEvent, audio: truncatedData };
657
+ return { ...untypedEvent, audio: '...' };
660
658
  }
661
659
  if (
662
660
  untypedEvent.delta &&
663
661
  typeof untypedEvent.delta === 'string' &&
664
662
  event.type === 'response.audio.delta'
665
663
  ) {
666
- const truncatedDelta =
667
- untypedEvent.delta.slice(0, maxLength) + (untypedEvent.delta.length > maxLength ? '…' : '');
668
- return { ...untypedEvent, delta: truncatedDelta };
664
+ return { ...untypedEvent, delta: '...' };
669
665
  }
670
666
  return untypedEvent;
671
667
  }
@@ -699,7 +695,9 @@ export class RealtimeSession extends llm.RealtimeSession {
699
695
  azureDeployment: this.oaiRealtimeModel._options.azureDeployment,
700
696
  });
701
697
 
702
- this.#logger.debug(`Connecting to OpenAI Realtime API at ${url}`);
698
+ if (lkOaiDebug) {
699
+ this.#logger.debug(`Connecting to OpenAI Realtime API at ${url}`);
700
+ }
703
701
 
704
702
  return new Promise((resolve, reject) => {
705
703
  const ws = new WebSocket(url, { headers });
@@ -849,8 +847,8 @@ export class RealtimeSession extends llm.RealtimeSession {
849
847
  break;
850
848
  }
851
849
 
852
- if (event.type !== 'input_audio_buffer.append') {
853
- this.#logger.debug(`(client) -> ${JSON.stringify(this.#loggableEvent(event))}`);
850
+ if (lkOaiDebug) {
851
+ this.#logger.debug(this.loggableEvent(event), `(client) -> ${event.type}`);
854
852
  }
855
853
 
856
854
  this.emit('openai_client_event_queued', event);
@@ -876,7 +874,9 @@ export class RealtimeSession extends llm.RealtimeSession {
876
874
  const event: api_proto.ServerEvent = JSON.parse(message.data as string);
877
875
 
878
876
  this.emit('openai_server_event_received', event);
879
- this.#logger.debug(`(server) <- ${JSON.stringify(this.#loggableEvent(event))}`);
877
+ if (lkOaiDebug) {
878
+ this.#logger.debug(this.loggableEvent(event), `(server) <- ${event.type}`);
879
+ }
880
880
 
881
881
  switch (event.type) {
882
882
  case 'input_audio_buffer.speech_started':
@@ -931,7 +931,9 @@ export class RealtimeSession extends llm.RealtimeSession {
931
931
  this.handleError(event);
932
932
  break;
933
933
  default:
934
- this.#logger.debug(`unhandled event: ${event.type}`);
934
+ if (lkOaiDebug) {
935
+ this.#logger.debug(`unhandled event: ${event.type}`);
936
+ }
935
937
  break;
936
938
  }
937
939
  };