@livekit/agents-plugin-openai 0.7.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/llm.ts CHANGED
@@ -398,6 +398,7 @@ export class LLM extends llm.LLM {
398
398
  temperature = temperature || this.#opts.temperature;
399
399
 
400
400
  return new LLMStream(
401
+ this,
401
402
  this.#client,
402
403
  chatCtx,
403
404
  fncCtx,
@@ -416,8 +417,10 @@ export class LLMStream extends llm.LLMStream {
416
417
  #client: OpenAI;
417
418
  #logger = log();
418
419
  #id = randomUUID();
420
+ label = 'openai.LLMStream';
419
421
 
420
422
  constructor(
423
+ llm: LLM,
421
424
  client: OpenAI,
422
425
  chatCtx: llm.ChatContext,
423
426
  fncCtx: llm.FunctionContext | undefined,
@@ -426,7 +429,7 @@ export class LLMStream extends llm.LLMStream {
426
429
  temperature?: number,
427
430
  n?: number,
428
431
  ) {
429
- super(chatCtx, fncCtx);
432
+ super(llm, chatCtx, fncCtx);
430
433
  this.#client = client;
431
434
  this.#run(opts, n, parallelToolCalls, temperature);
432
435
  }
package/src/models.ts CHANGED
@@ -53,6 +53,7 @@ export type GroqChatModels =
53
53
  | 'llama-3.1-405b-reasoning'
54
54
  | 'llama-3.1-70b-versatile'
55
55
  | 'llama-3.1-8b-instant'
56
+ | 'llama-3.3-70b-versatile'
56
57
  | 'llama3-groq-70b-8192-tool-use-preview'
57
58
  | 'llama3-groq-8b-8192-tool-use-preview'
58
59
  | 'llama-guard-3-8b'
@@ -8,6 +8,7 @@ import {
8
8
  llm,
9
9
  log,
10
10
  mergeFrames,
11
+ metrics,
11
12
  multimodal,
12
13
  } from '@livekit/agents';
13
14
  import { AudioFrame } from '@livekit/rtc-node';
@@ -40,6 +41,8 @@ export interface RealtimeResponse {
40
41
  usage: api_proto.ModelUsage | null;
41
42
  output: RealtimeOutput[];
42
43
  doneFut: Future;
44
+ createdTimestamp: number;
45
+ firstTokenTimestamp?: number;
43
46
  }
44
47
 
45
48
  export interface RealtimeOutput {
@@ -932,6 +935,7 @@ export class RealtimeSession extends multimodal.RealtimeSession {
932
935
  usage: null,
933
936
  output: [],
934
937
  doneFut: doneFut,
938
+ createdTimestamp: Date.now(),
935
939
  };
936
940
  this.#pendingResponses[newResponse.id] = newResponse;
937
941
  this.emit('response_created', newResponse);
@@ -946,7 +950,70 @@ export class RealtimeSession extends multimodal.RealtimeSession {
946
950
  response.usage = responseData.usage ?? null;
947
951
  this.#pendingResponses[responseId] = response;
948
952
  response.doneFut.resolve();
953
+
954
+ let metricsError: Error | undefined;
955
+ let cancelled = false;
956
+ switch (response.status) {
957
+ case 'failed': {
958
+ if (response.statusDetails.type !== 'failed') break;
959
+ const err = response.statusDetails.error;
960
+ metricsError = new metrics.MultimodalLLMError({
961
+ type: response.statusDetails.type,
962
+ code: err?.code,
963
+ message: err?.message,
964
+ });
965
+ this.#logger
966
+ .child({ code: err?.code, error: err?.message })
967
+ .error('response generation failed');
968
+ break;
969
+ }
970
+ case 'incomplete': {
971
+ if (response.statusDetails.type !== 'incomplete') break;
972
+ const reason = response.statusDetails.reason;
973
+ metricsError = new metrics.MultimodalLLMError({
974
+ type: response.statusDetails.type,
975
+ reason,
976
+ });
977
+ this.#logger.child({ reason }).error('response generation incomplete');
978
+ break;
979
+ }
980
+ case 'cancelled': {
981
+ cancelled = true;
982
+ break;
983
+ }
984
+ }
949
985
  this.emit('response_done', response);
986
+
987
+ let ttft: number | undefined;
988
+ if (response.firstTokenTimestamp) {
989
+ ttft = response.firstTokenTimestamp - response.createdTimestamp;
990
+ }
991
+ const duration = Date.now() - response.createdTimestamp;
992
+
993
+ const usage = response.usage;
994
+ const metric: metrics.MultimodalLLMMetrics = {
995
+ timestamp: response.createdTimestamp,
996
+ requestId: response.id,
997
+ ttft: ttft!,
998
+ duration,
999
+ cancelled,
1000
+ label: this.constructor.name,
1001
+ completionTokens: usage?.output_tokens || 0,
1002
+ promptTokens: usage?.input_tokens || 0,
1003
+ totalTokens: usage?.total_tokens || 0,
1004
+ tokensPerSecond: ((usage?.output_tokens || 0) / duration) * 1000,
1005
+ error: metricsError,
1006
+ inputTokenDetails: {
1007
+ cachedTokens: usage?.input_token_details.cached_tokens || 0,
1008
+ textTokens: usage?.input_token_details.text_tokens || 0,
1009
+ audioTokens: usage?.input_token_details.audio_tokens || 0,
1010
+ },
1011
+ outputTokenDetails: {
1012
+ textTokens: usage?.output_token_details.text_tokens || 0,
1013
+ audioTokens: usage?.output_token_details.audio_tokens || 0,
1014
+ },
1015
+ };
1016
+ this.emit('metrics_collected', metric);
950
1017
  }
951
1018
 
952
1019
  #handleResponseOutputItemAdded(event: api_proto.ResponseOutputItemAddedEvent): void {
@@ -1062,6 +1129,7 @@ export class RealtimeSession extends multimodal.RealtimeSession {
1062
1129
  toolCalls: [],
1063
1130
  };
1064
1131
  output?.content.push(newContent);
1132
+ response!.firstTokenTimestamp = Date.now();
1065
1133
  this.emit('response_content_added', newContent);
1066
1134
  }
1067
1135
 
package/src/stt.ts CHANGED
@@ -25,6 +25,7 @@ const defaultSTTOptions: STTOptions = {
25
25
  export class STT extends stt.STT {
26
26
  #opts: STTOptions;
27
27
  #client: OpenAI;
28
+ label = 'openai.STT';
28
29
 
29
30
  /**
30
31
  * Create a new instance of OpenAI STT.
@@ -108,7 +109,7 @@ export class STT extends stt.STT {
108
109
  return Buffer.concat([header, Buffer.from(frame.data.buffer)]);
109
110
  }
110
111
 
111
- async recognize(buffer: AudioBuffer, language?: string): Promise<stt.SpeechEvent> {
112
+ async _recognize(buffer: AudioBuffer, language?: string): Promise<stt.SpeechEvent> {
112
113
  const config = this.#sanitizeOptions(language);
113
114
  buffer = mergeFrames(buffer);
114
115
  const file = new File([this.#createWav(buffer)], 'audio.wav', { type: 'audio/wav' });
package/src/tts.ts CHANGED
@@ -2,6 +2,8 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import { AudioByteStream, tts } from '@livekit/agents';
5
+ import type { AudioFrame } from '@livekit/rtc-node';
6
+ import { randomUUID } from 'crypto';
5
7
  import { OpenAI } from 'openai';
6
8
  import type { TTSModels, TTSVoices } from './models.js';
7
9
 
@@ -27,6 +29,7 @@ const defaultTTSOptions: TTSOptions = {
27
29
  export class TTS extends tts.TTS {
28
30
  #opts: TTSOptions;
29
31
  #client: OpenAI;
32
+ label = 'openai.TTS';
30
33
 
31
34
  /**
32
35
  * Create a new instance of OpenAI TTS.
@@ -57,6 +60,8 @@ export class TTS extends tts.TTS {
57
60
 
58
61
  synthesize(text: string): ChunkedStream {
59
62
  return new ChunkedStream(
63
+ this,
64
+ text,
60
65
  this.#client.audio.speech.create({
61
66
  input: text,
62
67
  model: this.#opts.model,
@@ -73,25 +78,34 @@ export class TTS extends tts.TTS {
73
78
  }
74
79
 
75
80
  export class ChunkedStream extends tts.ChunkedStream {
81
+ label = 'openai.ChunkedStream';
82
+
76
83
  // set Promise<T> to any because OpenAI returns an annoying Response type
77
- constructor(stream: Promise<any>) {
78
- super();
84
+ constructor(tts: TTS, text: string, stream: Promise<any>) {
85
+ super(text, tts);
79
86
  this.#run(stream);
80
87
  }
81
88
 
82
89
  async #run(stream: Promise<Response>) {
83
90
  const buffer = await stream.then((r) => r.arrayBuffer());
84
- const requestId = crypto.randomUUID();
91
+ const requestId = randomUUID();
85
92
  const audioByteStream = new AudioByteStream(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS);
86
93
  const frames = audioByteStream.write(buffer);
87
94
 
95
+ let lastFrame: AudioFrame | undefined;
96
+ const sendLastFrame = (segmentId: string, final: boolean) => {
97
+ if (lastFrame) {
98
+ this.queue.put({ requestId, segmentId, frame: lastFrame, final });
99
+ lastFrame = undefined;
100
+ }
101
+ };
102
+
88
103
  for (const frame of frames) {
89
- this.queue.put({
90
- frame,
91
- requestId,
92
- segmentId: requestId,
93
- });
104
+ sendLastFrame(requestId, false);
105
+ lastFrame = frame;
94
106
  }
107
+ sendLastFrame(requestId, true);
108
+
95
109
  this.queue.close();
96
110
  }
97
111
  }