@livekit/agents-plugin-openai 1.0.40 → 1.0.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,6 @@
1
1
  // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
- import type { metrics } from '@livekit/agents';
5
4
  import {
6
5
  type APIConnectOptions,
7
6
  APIConnectionError,
@@ -11,11 +10,14 @@ import {
11
10
  Future,
12
11
  Queue,
13
12
  Task,
13
+ type TimedString,
14
14
  cancelAndWait,
15
+ createTimedString,
15
16
  delay,
16
17
  isAPIError,
17
18
  llm,
18
19
  log,
20
+ type metrics,
19
21
  shortuuid,
20
22
  stream,
21
23
  } from '@livekit/agents';
@@ -61,7 +63,7 @@ interface RealtimeOptions {
61
63
 
62
64
  interface MessageGeneration {
63
65
  messageId: string;
64
- textChannel: stream.StreamChannel<string>;
66
+ textChannel: stream.StreamChannel<string | TimedString>;
65
67
  audioChannel: stream.StreamChannel<AudioFrame>;
66
68
  audioTranscript: string;
67
69
  modalities: Future<('text' | 'audio')[]>;
@@ -1107,7 +1109,7 @@ export class RealtimeSession extends llm.RealtimeSession {
1107
1109
  const modalitiesFut = new Future<Modality[]>();
1108
1110
  const itemGeneration: MessageGeneration = {
1109
1111
  messageId: itemId,
1110
- textChannel: stream.createStreamChannel<string>(),
1112
+ textChannel: stream.createStreamChannel<string | TimedString>(),
1111
1113
  audioChannel: stream.createStreamChannel<AudioFrame>(),
1112
1114
  audioTranscript: '',
1113
1115
  modalities: modalitiesFut,
@@ -1274,16 +1276,19 @@ export class RealtimeSession extends llm.RealtimeSession {
1274
1276
  }
1275
1277
 
1276
1278
  const itemId = event.item_id;
1277
- const delta = event.delta;
1278
1279
 
1279
- // TODO (shubhra): add timed string support
1280
+ // When start_time is provided, wrap the delta in a TimedString for aligned transcripts
1281
+ let delta: string | TimedString = event.delta;
1282
+ if (event.start_time !== undefined) {
1283
+ delta = createTimedString({ text: event.delta, startTime: event.start_time });
1284
+ }
1280
1285
 
1281
1286
  const itemGeneration = this.currentGeneration.messages.get(itemId);
1282
1287
  if (!itemGeneration) {
1283
1288
  throw new Error('itemGeneration is not set');
1284
1289
  } else {
1285
1290
  itemGeneration.textChannel.write(delta);
1286
- itemGeneration.audioTranscript += delta;
1291
+ itemGeneration.audioTranscript += event.delta;
1287
1292
  }
1288
1293
  }
1289
1294
 
package/src/tts.ts CHANGED
@@ -43,7 +43,7 @@ export class TTS extends tts.TTS {
43
43
  super(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS, { streaming: false });
44
44
 
45
45
  this.#opts = { ...defaultTTSOptions, ...opts };
46
- if (this.#opts.apiKey === undefined) {
46
+ if (this.#opts.apiKey === undefined && !this.#opts.client) {
47
47
  throw new Error('OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY');
48
48
  }
49
49