@livekit/agents-plugin-openai 1.0.5 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/llm.cjs +5 -157
- package/dist/llm.cjs.map +1 -1
- package/dist/llm.d.cts +2 -14
- package/dist/llm.d.ts +2 -14
- package/dist/llm.d.ts.map +1 -1
- package/dist/llm.js +6 -165
- package/dist/llm.js.map +1 -1
- package/dist/realtime/realtime_model.cjs +15 -12
- package/dist/realtime/realtime_model.cjs.map +1 -1
- package/dist/realtime/realtime_model.d.cts +1 -0
- package/dist/realtime/realtime_model.d.ts +1 -0
- package/dist/realtime/realtime_model.d.ts.map +1 -1
- package/dist/realtime/realtime_model.js +15 -12
- package/dist/realtime/realtime_model.js.map +1 -1
- package/package.json +5 -5
- package/src/llm.ts +6 -226
- package/src/realtime/realtime_model.ts +17 -15
package/src/llm.ts
CHANGED
|
@@ -2,14 +2,7 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import type { APIConnectOptions } from '@livekit/agents';
|
|
5
|
-
import {
|
|
6
|
-
APIConnectionError,
|
|
7
|
-
APIStatusError,
|
|
8
|
-
APITimeoutError,
|
|
9
|
-
DEFAULT_API_CONNECT_OPTIONS,
|
|
10
|
-
llm,
|
|
11
|
-
toError,
|
|
12
|
-
} from '@livekit/agents';
|
|
5
|
+
import { DEFAULT_API_CONNECT_OPTIONS, inference, llm } from '@livekit/agents';
|
|
13
6
|
import { AzureOpenAI, OpenAI } from 'openai';
|
|
14
7
|
import type {
|
|
15
8
|
CerebrasChatModels,
|
|
@@ -118,7 +111,7 @@ export class LLM extends llm.LLM {
|
|
|
118
111
|
temperature?: number;
|
|
119
112
|
} = defaultAzureLLMOptions,
|
|
120
113
|
): LLM {
|
|
121
|
-
opts = { ...
|
|
114
|
+
opts = { ...defaultAzureLLMOptions, ...opts };
|
|
122
115
|
if (opts.apiKey === undefined) {
|
|
123
116
|
throw new Error('Azure API key is required, whether as an argument or as $AZURE_API_KEY');
|
|
124
117
|
}
|
|
@@ -491,230 +484,17 @@ export class LLM extends llm.LLM {
|
|
|
491
484
|
extras.tool_choice = toolChoice;
|
|
492
485
|
}
|
|
493
486
|
|
|
494
|
-
return new LLMStream(this
|
|
495
|
-
model: this.#opts.model,
|
|
487
|
+
return new LLMStream(this as unknown as inference.LLM<inference.AzureModels>, {
|
|
488
|
+
model: this.#opts.model as inference.AzureModels,
|
|
496
489
|
providerFmt: this.#providerFmt,
|
|
497
490
|
client: this.#client,
|
|
498
491
|
chatCtx,
|
|
499
492
|
toolCtx,
|
|
500
493
|
connOptions,
|
|
501
494
|
extraKwargs: extras,
|
|
495
|
+
gatewayOptions: undefined, // OpenAI plugin doesn't use gateway authentication
|
|
502
496
|
});
|
|
503
497
|
}
|
|
504
498
|
}
|
|
505
499
|
|
|
506
|
-
export class LLMStream extends
|
|
507
|
-
#toolCallId?: string;
|
|
508
|
-
#fncName?: string;
|
|
509
|
-
#fncRawArguments?: string;
|
|
510
|
-
#toolIndex?: number;
|
|
511
|
-
#client: OpenAI;
|
|
512
|
-
#providerFmt: llm.ProviderFormat;
|
|
513
|
-
#extraKwargs: Record<string, any>;
|
|
514
|
-
private model: string | ChatModels;
|
|
515
|
-
|
|
516
|
-
constructor(
|
|
517
|
-
llm: LLM,
|
|
518
|
-
{
|
|
519
|
-
model,
|
|
520
|
-
providerFmt,
|
|
521
|
-
client,
|
|
522
|
-
chatCtx,
|
|
523
|
-
toolCtx,
|
|
524
|
-
connOptions,
|
|
525
|
-
extraKwargs,
|
|
526
|
-
}: {
|
|
527
|
-
model: string | ChatModels;
|
|
528
|
-
providerFmt: llm.ProviderFormat;
|
|
529
|
-
client: OpenAI;
|
|
530
|
-
chatCtx: llm.ChatContext;
|
|
531
|
-
toolCtx?: llm.ToolContext;
|
|
532
|
-
connOptions: APIConnectOptions;
|
|
533
|
-
extraKwargs: Record<string, any>;
|
|
534
|
-
},
|
|
535
|
-
) {
|
|
536
|
-
super(llm, { chatCtx, toolCtx, connOptions });
|
|
537
|
-
this.#client = client;
|
|
538
|
-
this.#providerFmt = providerFmt;
|
|
539
|
-
this.#extraKwargs = extraKwargs;
|
|
540
|
-
this.model = model;
|
|
541
|
-
}
|
|
542
|
-
|
|
543
|
-
protected async run(): Promise<void> {
|
|
544
|
-
let retryable = true;
|
|
545
|
-
try {
|
|
546
|
-
const messages = (await this.chatCtx.toProviderFormat(
|
|
547
|
-
this.#providerFmt,
|
|
548
|
-
)) as OpenAI.ChatCompletionMessageParam[];
|
|
549
|
-
|
|
550
|
-
const tools = this.toolCtx
|
|
551
|
-
? Object.entries(this.toolCtx).map(([name, func]) => ({
|
|
552
|
-
type: 'function' as const,
|
|
553
|
-
function: {
|
|
554
|
-
name,
|
|
555
|
-
description: func.description,
|
|
556
|
-
parameters: llm.toJsonSchema(
|
|
557
|
-
func.parameters,
|
|
558
|
-
) as unknown as OpenAI.Chat.Completions.ChatCompletionTool['function']['parameters'],
|
|
559
|
-
},
|
|
560
|
-
}))
|
|
561
|
-
: undefined;
|
|
562
|
-
|
|
563
|
-
const stream = await this.#client.chat.completions.create({
|
|
564
|
-
model: this.model,
|
|
565
|
-
messages,
|
|
566
|
-
tools,
|
|
567
|
-
stream: true,
|
|
568
|
-
stream_options: { include_usage: true },
|
|
569
|
-
...this.#extraKwargs,
|
|
570
|
-
});
|
|
571
|
-
|
|
572
|
-
for await (const chunk of stream) {
|
|
573
|
-
for (const choice of chunk.choices) {
|
|
574
|
-
if (this.abortController.signal.aborted) {
|
|
575
|
-
break;
|
|
576
|
-
}
|
|
577
|
-
const chatChunk = this.#parseChoice(chunk.id, choice);
|
|
578
|
-
if (chatChunk) {
|
|
579
|
-
retryable = false;
|
|
580
|
-
this.queue.put(chatChunk);
|
|
581
|
-
}
|
|
582
|
-
}
|
|
583
|
-
|
|
584
|
-
if (chunk.usage) {
|
|
585
|
-
const usage = chunk.usage;
|
|
586
|
-
retryable = false;
|
|
587
|
-
this.queue.put({
|
|
588
|
-
id: chunk.id,
|
|
589
|
-
usage: {
|
|
590
|
-
completionTokens: usage.completion_tokens,
|
|
591
|
-
promptTokens: usage.prompt_tokens,
|
|
592
|
-
promptCachedTokens: usage.prompt_tokens_details?.cached_tokens || 0,
|
|
593
|
-
totalTokens: usage.total_tokens,
|
|
594
|
-
},
|
|
595
|
-
});
|
|
596
|
-
}
|
|
597
|
-
}
|
|
598
|
-
} catch (error) {
|
|
599
|
-
if (error instanceof OpenAI.APIConnectionTimeoutError) {
|
|
600
|
-
throw new APITimeoutError({ options: { retryable } });
|
|
601
|
-
} else if (error instanceof OpenAI.APIError) {
|
|
602
|
-
throw new APIStatusError({
|
|
603
|
-
message: error.message,
|
|
604
|
-
options: {
|
|
605
|
-
statusCode: error.status,
|
|
606
|
-
body: error.error,
|
|
607
|
-
requestId: error.request_id,
|
|
608
|
-
retryable,
|
|
609
|
-
},
|
|
610
|
-
});
|
|
611
|
-
} else {
|
|
612
|
-
throw new APIConnectionError({
|
|
613
|
-
message: toError(error).message,
|
|
614
|
-
options: { retryable },
|
|
615
|
-
});
|
|
616
|
-
}
|
|
617
|
-
} finally {
|
|
618
|
-
this.queue.close();
|
|
619
|
-
}
|
|
620
|
-
}
|
|
621
|
-
|
|
622
|
-
#parseChoice(id: string, choice: OpenAI.ChatCompletionChunk.Choice): llm.ChatChunk | undefined {
|
|
623
|
-
const delta = choice.delta;
|
|
624
|
-
|
|
625
|
-
// https://github.com/livekit/agents/issues/688
|
|
626
|
-
// the delta can be None when using Azure OpenAI (content filtering)
|
|
627
|
-
if (delta === undefined) return undefined;
|
|
628
|
-
|
|
629
|
-
if (delta.tool_calls) {
|
|
630
|
-
// check if we have functions to calls
|
|
631
|
-
for (const tool of delta.tool_calls) {
|
|
632
|
-
if (!tool.function) {
|
|
633
|
-
continue; // oai may add other tools in the future
|
|
634
|
-
}
|
|
635
|
-
|
|
636
|
-
/**
|
|
637
|
-
* The way OpenAI streams tool calls is a bit tricky.
|
|
638
|
-
*
|
|
639
|
-
* For any new tool call, it first emits a delta tool call with id, and function name,
|
|
640
|
-
* the rest of the delta chunks will only stream the remaining arguments string,
|
|
641
|
-
* until a new tool call is started or the tool call is finished.
|
|
642
|
-
* See below for an example.
|
|
643
|
-
*
|
|
644
|
-
* Choice(delta=ChoiceDelta(content=None, function_call=None, refusal=None, role='assistant', tool_calls=None), finish_reason=None, index=0, logprobs=None)
|
|
645
|
-
* [ChoiceDeltaToolCall(index=0, id='call_LaVeHWUHpef9K1sd5UO8TtLg', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]
|
|
646
|
-
* [ChoiceDeltaToolCall(index=0, id=None, function=ChoiceDeltaToolCallFunction(arguments='{"location": "P', name=None), type=None)]
|
|
647
|
-
* [ChoiceDeltaToolCall(index=0, id=None, function=ChoiceDeltaToolCallFunction(arguments='aris}', name=None), type=None)]
|
|
648
|
-
* [ChoiceDeltaToolCall(index=1, id='call_ThU4OmMdQXnnVmpXGOCknXIB', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]
|
|
649
|
-
* [ChoiceDeltaToolCall(index=1, id=None, function=ChoiceDeltaToolCallFunction(arguments='{"location": "T', name=None), type=None)]
|
|
650
|
-
* [ChoiceDeltaToolCall(index=1, id=None, function=ChoiceDeltaToolCallFunction(arguments='okyo', name=None), type=None)]
|
|
651
|
-
* Choice(delta=ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=None), finish_reason='tool_calls', index=0, logprobs=None)
|
|
652
|
-
*/
|
|
653
|
-
let callChunk: llm.ChatChunk | undefined;
|
|
654
|
-
// If we have a previous tool call and this is a new one, emit the previous
|
|
655
|
-
if (this.#toolCallId && tool.id && tool.index !== this.#toolIndex) {
|
|
656
|
-
callChunk = this.#createRunningToolCallChunk(id, delta);
|
|
657
|
-
this.#toolCallId = this.#fncName = this.#fncRawArguments = undefined;
|
|
658
|
-
}
|
|
659
|
-
|
|
660
|
-
// Start or continue building the current tool call
|
|
661
|
-
if (tool.function.name) {
|
|
662
|
-
this.#toolIndex = tool.index;
|
|
663
|
-
this.#toolCallId = tool.id;
|
|
664
|
-
this.#fncName = tool.function.name;
|
|
665
|
-
this.#fncRawArguments = tool.function.arguments || '';
|
|
666
|
-
} else if (tool.function.arguments) {
|
|
667
|
-
this.#fncRawArguments = (this.#fncRawArguments || '') + tool.function.arguments;
|
|
668
|
-
}
|
|
669
|
-
|
|
670
|
-
if (callChunk) {
|
|
671
|
-
return callChunk;
|
|
672
|
-
}
|
|
673
|
-
}
|
|
674
|
-
}
|
|
675
|
-
|
|
676
|
-
// If we're done with tool calls, emit the final one
|
|
677
|
-
if (
|
|
678
|
-
choice.finish_reason &&
|
|
679
|
-
['tool_calls', 'stop'].includes(choice.finish_reason) &&
|
|
680
|
-
this.#toolCallId !== undefined
|
|
681
|
-
) {
|
|
682
|
-
const callChunk = this.#createRunningToolCallChunk(id, delta);
|
|
683
|
-
this.#toolCallId = this.#fncName = this.#fncRawArguments = undefined;
|
|
684
|
-
return callChunk;
|
|
685
|
-
}
|
|
686
|
-
|
|
687
|
-
// Regular content message
|
|
688
|
-
if (!delta.content) {
|
|
689
|
-
return undefined;
|
|
690
|
-
}
|
|
691
|
-
|
|
692
|
-
return {
|
|
693
|
-
id,
|
|
694
|
-
delta: {
|
|
695
|
-
role: 'assistant',
|
|
696
|
-
content: delta.content,
|
|
697
|
-
},
|
|
698
|
-
};
|
|
699
|
-
}
|
|
700
|
-
|
|
701
|
-
#createRunningToolCallChunk(
|
|
702
|
-
id: string,
|
|
703
|
-
delta: OpenAI.Chat.Completions.ChatCompletionChunk.Choice.Delta,
|
|
704
|
-
): llm.ChatChunk {
|
|
705
|
-
return {
|
|
706
|
-
id,
|
|
707
|
-
delta: {
|
|
708
|
-
role: 'assistant',
|
|
709
|
-
content: delta.content || undefined,
|
|
710
|
-
toolCalls: [
|
|
711
|
-
llm.FunctionCall.create({
|
|
712
|
-
callId: this.#toolCallId!,
|
|
713
|
-
name: this.#fncName || '',
|
|
714
|
-
args: this.#fncRawArguments || '',
|
|
715
|
-
}),
|
|
716
|
-
],
|
|
717
|
-
},
|
|
718
|
-
};
|
|
719
|
-
}
|
|
720
|
-
}
|
|
500
|
+
export class LLMStream extends inference.LLMStream<inference.AzureModels> {}
|
|
@@ -25,6 +25,9 @@ import { AudioFrame, combineAudioFrames } from '@livekit/rtc-node';
|
|
|
25
25
|
import { type MessageEvent, WebSocket } from 'ws';
|
|
26
26
|
import * as api_proto from './api_proto.js';
|
|
27
27
|
|
|
28
|
+
// if LK_OPENAI_DEBUG convert it to a number, otherwise set it to 0
|
|
29
|
+
const lkOaiDebug = process.env.LK_OPENAI_DEBUG ? Number(process.env.LK_OPENAI_DEBUG) : 0;
|
|
30
|
+
|
|
28
31
|
const SAMPLE_RATE = 24000;
|
|
29
32
|
const NUM_CHANNELS = 1;
|
|
30
33
|
const BASE_URL = 'https://api.openai.com/v1';
|
|
@@ -640,11 +643,8 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
640
643
|
} as api_proto.ConversationItemTruncateEvent);
|
|
641
644
|
}
|
|
642
645
|
|
|
643
|
-
|
|
644
|
-
/// with large amounts of base64 audio data.
|
|
645
|
-
#loggableEvent(
|
|
646
|
+
private loggableEvent(
|
|
646
647
|
event: api_proto.ClientEvent | api_proto.ServerEvent,
|
|
647
|
-
maxLength: number = 30,
|
|
648
648
|
): Record<string, unknown> {
|
|
649
649
|
const untypedEvent: Record<string, unknown> = {};
|
|
650
650
|
for (const [key, value] of Object.entries(event)) {
|
|
@@ -654,18 +654,14 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
654
654
|
}
|
|
655
655
|
|
|
656
656
|
if (untypedEvent.audio && typeof untypedEvent.audio === 'string') {
|
|
657
|
-
|
|
658
|
-
untypedEvent.audio.slice(0, maxLength) + (untypedEvent.audio.length > maxLength ? '…' : '');
|
|
659
|
-
return { ...untypedEvent, audio: truncatedData };
|
|
657
|
+
return { ...untypedEvent, audio: '...' };
|
|
660
658
|
}
|
|
661
659
|
if (
|
|
662
660
|
untypedEvent.delta &&
|
|
663
661
|
typeof untypedEvent.delta === 'string' &&
|
|
664
662
|
event.type === 'response.audio.delta'
|
|
665
663
|
) {
|
|
666
|
-
|
|
667
|
-
untypedEvent.delta.slice(0, maxLength) + (untypedEvent.delta.length > maxLength ? '…' : '');
|
|
668
|
-
return { ...untypedEvent, delta: truncatedDelta };
|
|
664
|
+
return { ...untypedEvent, delta: '...' };
|
|
669
665
|
}
|
|
670
666
|
return untypedEvent;
|
|
671
667
|
}
|
|
@@ -699,7 +695,9 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
699
695
|
azureDeployment: this.oaiRealtimeModel._options.azureDeployment,
|
|
700
696
|
});
|
|
701
697
|
|
|
702
|
-
|
|
698
|
+
if (lkOaiDebug) {
|
|
699
|
+
this.#logger.debug(`Connecting to OpenAI Realtime API at ${url}`);
|
|
700
|
+
}
|
|
703
701
|
|
|
704
702
|
return new Promise((resolve, reject) => {
|
|
705
703
|
const ws = new WebSocket(url, { headers });
|
|
@@ -849,8 +847,8 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
849
847
|
break;
|
|
850
848
|
}
|
|
851
849
|
|
|
852
|
-
if (
|
|
853
|
-
this.#logger.debug(`(client) -> ${
|
|
850
|
+
if (lkOaiDebug) {
|
|
851
|
+
this.#logger.debug(this.loggableEvent(event), `(client) -> ${event.type}`);
|
|
854
852
|
}
|
|
855
853
|
|
|
856
854
|
this.emit('openai_client_event_queued', event);
|
|
@@ -876,7 +874,9 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
876
874
|
const event: api_proto.ServerEvent = JSON.parse(message.data as string);
|
|
877
875
|
|
|
878
876
|
this.emit('openai_server_event_received', event);
|
|
879
|
-
|
|
877
|
+
if (lkOaiDebug) {
|
|
878
|
+
this.#logger.debug(this.loggableEvent(event), `(server) <- ${event.type}`);
|
|
879
|
+
}
|
|
880
880
|
|
|
881
881
|
switch (event.type) {
|
|
882
882
|
case 'input_audio_buffer.speech_started':
|
|
@@ -931,7 +931,9 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
931
931
|
this.handleError(event);
|
|
932
932
|
break;
|
|
933
933
|
default:
|
|
934
|
-
|
|
934
|
+
if (lkOaiDebug) {
|
|
935
|
+
this.#logger.debug(`unhandled event: ${event.type}`);
|
|
936
|
+
}
|
|
935
937
|
break;
|
|
936
938
|
}
|
|
937
939
|
};
|