@livekit/agents-plugin-openai 1.0.31 → 1.0.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/realtime/api_proto.cjs.map +1 -1
- package/dist/realtime/api_proto.d.cts +50 -12
- package/dist/realtime/api_proto.d.ts +50 -12
- package/dist/realtime/api_proto.d.ts.map +1 -1
- package/dist/realtime/api_proto.js.map +1 -1
- package/dist/realtime/index.cjs +19 -0
- package/dist/realtime/index.cjs.map +1 -1
- package/dist/realtime/index.d.cts +1 -0
- package/dist/realtime/index.d.ts +1 -0
- package/dist/realtime/index.d.ts.map +1 -1
- package/dist/realtime/index.js +4 -0
- package/dist/realtime/index.js.map +1 -1
- package/dist/realtime/realtime_model.cjs +69 -33
- package/dist/realtime/realtime_model.cjs.map +1 -1
- package/dist/realtime/realtime_model.d.cts +14 -6
- package/dist/realtime/realtime_model.d.ts +14 -6
- package/dist/realtime/realtime_model.d.ts.map +1 -1
- package/dist/realtime/realtime_model.js +69 -33
- package/dist/realtime/realtime_model.js.map +1 -1
- package/dist/realtime/realtime_model_beta.cjs +1300 -0
- package/dist/realtime/realtime_model_beta.cjs.map +1 -0
- package/dist/realtime/realtime_model_beta.d.cts +165 -0
- package/dist/realtime/realtime_model_beta.d.ts +165 -0
- package/dist/realtime/realtime_model_beta.d.ts.map +1 -0
- package/dist/realtime/realtime_model_beta.js +1280 -0
- package/dist/realtime/realtime_model_beta.js.map +1 -0
- package/package.json +5 -5
- package/src/realtime/api_proto.ts +76 -17
- package/src/realtime/index.ts +1 -0
- package/src/realtime/realtime_model.ts +86 -49
- package/src/realtime/realtime_model_beta.ts +1665 -0
|
@@ -52,7 +52,6 @@ class CreateResponseHandle {
|
|
|
52
52
|
}
|
|
53
53
|
}
|
|
54
54
|
const DEFAULT_FIRST_RETRY_INTERVAL_MS = 100;
|
|
55
|
-
const DEFAULT_TEMPERATURE = 0.8;
|
|
56
55
|
const DEFAULT_TURN_DETECTION = {
|
|
57
56
|
type: "semantic_vad",
|
|
58
57
|
eagerness: "medium",
|
|
@@ -78,14 +77,15 @@ const DEFAULT_MAX_SESSION_DURATION = 20 * 60 * 1e3;
|
|
|
78
77
|
const DEFAULT_REALTIME_MODEL_OPTIONS = {
|
|
79
78
|
model: "gpt-realtime",
|
|
80
79
|
voice: "marin",
|
|
81
|
-
temperature: DEFAULT_TEMPERATURE,
|
|
82
80
|
inputAudioTranscription: DEFAULT_INPUT_AUDIO_TRANSCRIPTION,
|
|
81
|
+
inputAudioNoiseReduction: void 0,
|
|
83
82
|
turnDetection: DEFAULT_TURN_DETECTION,
|
|
84
83
|
toolChoice: DEFAULT_TOOL_CHOICE,
|
|
85
84
|
maxResponseOutputTokens: DEFAULT_MAX_RESPONSE_OUTPUT_TOKENS,
|
|
86
85
|
maxSessionDuration: DEFAULT_MAX_SESSION_DURATION,
|
|
87
86
|
connOptions: import_agents.DEFAULT_API_CONNECT_OPTIONS,
|
|
88
|
-
modalities: ["text", "audio"]
|
|
87
|
+
modalities: ["text", "audio"],
|
|
88
|
+
tracing: void 0
|
|
89
89
|
};
|
|
90
90
|
class RealtimeModel extends import_agents.llm.RealtimeModel {
|
|
91
91
|
sampleRate = api_proto.SAMPLE_RATE;
|
|
@@ -94,6 +94,9 @@ class RealtimeModel extends import_agents.llm.RealtimeModel {
|
|
|
94
94
|
outFrameSize = api_proto.OUT_FRAME_SIZE;
|
|
95
95
|
/* @internal */
|
|
96
96
|
_options;
|
|
97
|
+
get model() {
|
|
98
|
+
return this._options.model;
|
|
99
|
+
}
|
|
97
100
|
constructor(options = {}) {
|
|
98
101
|
const modalities = options.modalities || DEFAULT_REALTIME_MODEL_OPTIONS.modalities;
|
|
99
102
|
super({
|
|
@@ -146,11 +149,10 @@ class RealtimeModel extends import_agents.llm.RealtimeModel {
|
|
|
146
149
|
* @param baseURL - Base URL for the API endpoint. If undefined, constructed from the azure_endpoint.
|
|
147
150
|
* @param voice - Voice setting for audio outputs. Defaults to "alloy".
|
|
148
151
|
* @param inputAudioTranscription - Options for transcribing input audio. Defaults to @see DEFAULT_INPUT_AUDIO_TRANSCRIPTION.
|
|
152
|
+
* @param inputAudioNoiseReduction - Options for noise reduction. Defaults to undefined.
|
|
149
153
|
* @param turnDetection - Options for server-based voice activity detection (VAD). Defaults to @see DEFAULT_SERVER_VAD_OPTIONS.
|
|
150
|
-
* @param temperature - Sampling temperature for response generation. Defaults to @see DEFAULT_TEMPERATURE.
|
|
151
154
|
* @param speed - Speed of the audio output. Defaults to 1.0.
|
|
152
|
-
* @param
|
|
153
|
-
* @param maxSessionDuration - Maximum duration of the session in milliseconds. Defaults to @see DEFAULT_MAX_SESSION_DURATION.
|
|
155
|
+
* @param tracing - Tracing configuration. Defaults to undefined.
|
|
154
156
|
*
|
|
155
157
|
* @returns A RealtimeModel instance configured for Azure OpenAI Service.
|
|
156
158
|
*
|
|
@@ -164,10 +166,13 @@ class RealtimeModel extends import_agents.llm.RealtimeModel {
|
|
|
164
166
|
entraToken,
|
|
165
167
|
baseURL,
|
|
166
168
|
voice = "alloy",
|
|
169
|
+
temperature,
|
|
170
|
+
// eslint-disable-line @typescript-eslint/no-unused-vars
|
|
167
171
|
inputAudioTranscription = AZURE_DEFAULT_INPUT_AUDIO_TRANSCRIPTION,
|
|
172
|
+
inputAudioNoiseReduction,
|
|
168
173
|
turnDetection = AZURE_DEFAULT_TURN_DETECTION,
|
|
169
|
-
|
|
170
|
-
|
|
174
|
+
speed,
|
|
175
|
+
tracing
|
|
171
176
|
}) {
|
|
172
177
|
apiKey = apiKey || process.env.AZURE_OPENAI_API_KEY;
|
|
173
178
|
if (!apiKey && !entraToken) {
|
|
@@ -193,9 +198,10 @@ class RealtimeModel extends import_agents.llm.RealtimeModel {
|
|
|
193
198
|
return new RealtimeModel({
|
|
194
199
|
voice,
|
|
195
200
|
inputAudioTranscription,
|
|
201
|
+
inputAudioNoiseReduction,
|
|
196
202
|
turnDetection,
|
|
197
|
-
temperature,
|
|
198
203
|
speed,
|
|
204
|
+
tracing,
|
|
199
205
|
apiKey,
|
|
200
206
|
azureDeployment,
|
|
201
207
|
apiVersion,
|
|
@@ -272,24 +278,31 @@ class RealtimeSession extends import_agents.llm.RealtimeSession {
|
|
|
272
278
|
this.messageChannel.put(command);
|
|
273
279
|
}
|
|
274
280
|
createSessionUpdateEvent() {
|
|
275
|
-
const
|
|
281
|
+
const audioFormat = { type: "audio/pcm", rate: SAMPLE_RATE };
|
|
282
|
+
const modality = this.oaiRealtimeModel._options.modalities.includes("audio") ? "audio" : "text";
|
|
276
283
|
return {
|
|
277
284
|
type: "session.update",
|
|
278
285
|
session: {
|
|
286
|
+
type: "realtime",
|
|
279
287
|
model: this.oaiRealtimeModel._options.model,
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
+
output_modalities: [modality],
|
|
289
|
+
audio: {
|
|
290
|
+
input: {
|
|
291
|
+
format: audioFormat,
|
|
292
|
+
noise_reduction: this.oaiRealtimeModel._options.inputAudioNoiseReduction,
|
|
293
|
+
transcription: this.oaiRealtimeModel._options.inputAudioTranscription,
|
|
294
|
+
turn_detection: this.oaiRealtimeModel._options.turnDetection
|
|
295
|
+
},
|
|
296
|
+
output: {
|
|
297
|
+
format: audioFormat,
|
|
298
|
+
speed: this.oaiRealtimeModel._options.speed,
|
|
299
|
+
voice: this.oaiRealtimeModel._options.voice
|
|
300
|
+
}
|
|
301
|
+
},
|
|
302
|
+
max_output_tokens: this.oaiRealtimeModel._options.maxResponseOutputTokens === Infinity ? "inf" : this.oaiRealtimeModel._options.maxResponseOutputTokens,
|
|
288
303
|
tool_choice: toOaiToolChoice(this.oaiRealtimeModel._options.toolChoice),
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
instructions: this.instructions,
|
|
292
|
-
speed: this.oaiRealtimeModel._options.speed
|
|
304
|
+
tracing: this.oaiRealtimeModel._options.tracing,
|
|
305
|
+
instructions: this.instructions
|
|
293
306
|
}
|
|
294
307
|
};
|
|
295
308
|
}
|
|
@@ -405,6 +418,7 @@ class RealtimeSession extends import_agents.llm.RealtimeSession {
|
|
|
405
418
|
return {
|
|
406
419
|
type: "session.update",
|
|
407
420
|
session: {
|
|
421
|
+
type: "realtime",
|
|
408
422
|
model: this.oaiRealtimeModel._options.model,
|
|
409
423
|
tools: oaiTools
|
|
410
424
|
},
|
|
@@ -416,6 +430,7 @@ class RealtimeSession extends import_agents.llm.RealtimeSession {
|
|
|
416
430
|
this.sendEvent({
|
|
417
431
|
type: "session.update",
|
|
418
432
|
session: {
|
|
433
|
+
type: "realtime",
|
|
419
434
|
instructions: _instructions
|
|
420
435
|
},
|
|
421
436
|
event_id: eventId
|
|
@@ -423,7 +438,9 @@ class RealtimeSession extends import_agents.llm.RealtimeSession {
|
|
|
423
438
|
this.instructions = _instructions;
|
|
424
439
|
}
|
|
425
440
|
updateOptions({ toolChoice }) {
|
|
426
|
-
const options = {
|
|
441
|
+
const options = {
|
|
442
|
+
type: "realtime"
|
|
443
|
+
};
|
|
427
444
|
this.oaiRealtimeModel._options.toolChoice = toolChoice;
|
|
428
445
|
options.tool_choice = toOaiToolChoice(toolChoice);
|
|
429
446
|
this.sendEvent({
|
|
@@ -522,8 +539,12 @@ class RealtimeSession extends import_agents.llm.RealtimeSession {
|
|
|
522
539
|
throw new Error("Microsoft API key or entraToken is required");
|
|
523
540
|
}
|
|
524
541
|
} else {
|
|
542
|
+
if (!this.oaiRealtimeModel._options.apiKey) {
|
|
543
|
+
throw new Error(
|
|
544
|
+
"OpenAI API key is required but not set. Check OPENAI_API_KEY environment variable."
|
|
545
|
+
);
|
|
546
|
+
}
|
|
525
547
|
headers.Authorization = `Bearer ${this.oaiRealtimeModel._options.apiKey}`;
|
|
526
|
-
headers["OpenAI-Beta"] = "realtime=v1";
|
|
527
548
|
}
|
|
528
549
|
const url = processBaseURL({
|
|
529
550
|
baseURL: this.oaiRealtimeModel._options.baseURL,
|
|
@@ -690,6 +711,7 @@ class RealtimeSession extends import_agents.llm.RealtimeSession {
|
|
|
690
711
|
case "response.output_item.added":
|
|
691
712
|
this.handleResponseOutputItemAdded(event);
|
|
692
713
|
break;
|
|
714
|
+
case "conversation.item.added":
|
|
693
715
|
case "conversation.item.created":
|
|
694
716
|
this.handleConversationItemCreated(event);
|
|
695
717
|
break;
|
|
@@ -708,21 +730,27 @@ class RealtimeSession extends import_agents.llm.RealtimeSession {
|
|
|
708
730
|
case "response.content_part.done":
|
|
709
731
|
this.handleResponseContentPartDone(event);
|
|
710
732
|
break;
|
|
733
|
+
case "response.output_text.delta":
|
|
711
734
|
case "response.text.delta":
|
|
712
735
|
this.handleResponseTextDelta(event);
|
|
713
736
|
break;
|
|
737
|
+
case "response.output_text.done":
|
|
714
738
|
case "response.text.done":
|
|
715
739
|
this.handleResponseTextDone(event);
|
|
716
740
|
break;
|
|
741
|
+
case "response.output_audio_transcript.delta":
|
|
717
742
|
case "response.audio_transcript.delta":
|
|
718
743
|
this.handleResponseAudioTranscriptDelta(event);
|
|
719
744
|
break;
|
|
745
|
+
case "response.output_audio.delta":
|
|
720
746
|
case "response.audio.delta":
|
|
721
747
|
this.handleResponseAudioDelta(event);
|
|
722
748
|
break;
|
|
749
|
+
case "response.output_audio_transcript.done":
|
|
723
750
|
case "response.audio_transcript.done":
|
|
724
751
|
this.handleResponseAudioTranscriptDone(event);
|
|
725
752
|
break;
|
|
753
|
+
case "response.output_audio.done":
|
|
726
754
|
case "response.audio.done":
|
|
727
755
|
this.handleResponseAudioDone(event);
|
|
728
756
|
break;
|
|
@@ -798,7 +826,8 @@ class RealtimeSession extends import_agents.llm.RealtimeSession {
|
|
|
798
826
|
const generationEv = {
|
|
799
827
|
messageStream: this.currentGeneration.messageChannel.stream(),
|
|
800
828
|
functionStream: this.currentGeneration.functionChannel.stream(),
|
|
801
|
-
userInitiated: false
|
|
829
|
+
userInitiated: false,
|
|
830
|
+
responseId: event.response.id
|
|
802
831
|
};
|
|
803
832
|
const clientEventId = (_a = event.response.metadata) == null ? void 0 : _a.client_event_id;
|
|
804
833
|
if (clientEventId) {
|
|
@@ -918,11 +947,12 @@ class RealtimeSession extends import_agents.llm.RealtimeSession {
|
|
|
918
947
|
this.#logger.warn(`itemGeneration not found for itemId=${itemId}`);
|
|
919
948
|
return;
|
|
920
949
|
}
|
|
921
|
-
|
|
950
|
+
const isTextType = itemType === "text" || itemType === "output_text";
|
|
951
|
+
if (isTextType && this.oaiRealtimeModel.capabilities.audioOutput) {
|
|
922
952
|
this.#logger.warn("Text response received from OpenAI Realtime API in audio modality.");
|
|
923
953
|
}
|
|
924
954
|
if (!itemGeneration.modalities.done) {
|
|
925
|
-
const modalityResult =
|
|
955
|
+
const modalityResult = isTextType ? ["text"] : ["audio", "text"];
|
|
926
956
|
itemGeneration.modalities.resolve(modalityResult);
|
|
927
957
|
}
|
|
928
958
|
if (this.currentGeneration._firstTokenTimestamp === void 0) {
|
|
@@ -930,6 +960,9 @@ class RealtimeSession extends import_agents.llm.RealtimeSession {
|
|
|
930
960
|
}
|
|
931
961
|
}
|
|
932
962
|
handleResponseContentPartDone(event) {
|
|
963
|
+
if (!event.part) {
|
|
964
|
+
return;
|
|
965
|
+
}
|
|
933
966
|
if (event.part.type !== "text") {
|
|
934
967
|
return;
|
|
935
968
|
}
|
|
@@ -1020,11 +1053,13 @@ class RealtimeSession extends import_agents.llm.RealtimeSession {
|
|
|
1020
1053
|
if (!item.call_id || !item.name || !item.arguments) {
|
|
1021
1054
|
throw new Error("item is not a function call");
|
|
1022
1055
|
}
|
|
1023
|
-
this.currentGeneration.functionChannel.write(
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1056
|
+
this.currentGeneration.functionChannel.write(
|
|
1057
|
+
import_agents.llm.FunctionCall.create({
|
|
1058
|
+
callId: item.call_id,
|
|
1059
|
+
name: item.name,
|
|
1060
|
+
args: item.arguments
|
|
1061
|
+
})
|
|
1062
|
+
);
|
|
1028
1063
|
} else if (itemType === "message") {
|
|
1029
1064
|
const itemGeneration = this.currentGeneration.messages.get(itemId);
|
|
1030
1065
|
if (!itemGeneration) {
|
|
@@ -1157,7 +1192,8 @@ class RealtimeSession extends import_agents.llm.RealtimeSession {
|
|
|
1157
1192
|
const generation_ev = {
|
|
1158
1193
|
messageStream: this.currentGeneration.messageChannel.stream(),
|
|
1159
1194
|
functionStream: this.currentGeneration.functionChannel.stream(),
|
|
1160
|
-
userInitiated: false
|
|
1195
|
+
userInitiated: false,
|
|
1196
|
+
responseId
|
|
1161
1197
|
};
|
|
1162
1198
|
const handle = this.responseCreatedFutures[responseId];
|
|
1163
1199
|
if (handle) {
|