@livekit/agents-plugin-openai 1.0.35 → 1.0.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/llm.cjs +2 -2
  2. package/dist/llm.cjs.map +1 -1
  3. package/dist/llm.js +2 -2
  4. package/dist/llm.js.map +1 -1
  5. package/dist/realtime/api_proto.cjs.map +1 -1
  6. package/dist/realtime/api_proto.d.cts +6 -2
  7. package/dist/realtime/api_proto.d.ts +6 -2
  8. package/dist/realtime/api_proto.d.ts.map +1 -1
  9. package/dist/realtime/api_proto.js.map +1 -1
  10. package/dist/realtime/realtime_model.cjs +16 -30
  11. package/dist/realtime/realtime_model.cjs.map +1 -1
  12. package/dist/realtime/realtime_model.d.cts +3 -1
  13. package/dist/realtime/realtime_model.d.ts +3 -1
  14. package/dist/realtime/realtime_model.d.ts.map +1 -1
  15. package/dist/realtime/realtime_model.js +14 -29
  16. package/dist/realtime/realtime_model.js.map +1 -1
  17. package/dist/realtime/realtime_model.test.cjs +106 -0
  18. package/dist/realtime/realtime_model.test.cjs.map +1 -0
  19. package/dist/realtime/realtime_model.test.d.cts +2 -0
  20. package/dist/realtime/realtime_model.test.d.ts +2 -0
  21. package/dist/realtime/realtime_model.test.d.ts.map +1 -0
  22. package/dist/realtime/realtime_model.test.js +105 -0
  23. package/dist/realtime/realtime_model.test.js.map +1 -0
  24. package/dist/realtime/realtime_model_beta.cjs +0 -26
  25. package/dist/realtime/realtime_model_beta.cjs.map +1 -1
  26. package/dist/realtime/realtime_model_beta.d.cts +0 -1
  27. package/dist/realtime/realtime_model_beta.d.ts +0 -1
  28. package/dist/realtime/realtime_model_beta.d.ts.map +1 -1
  29. package/dist/realtime/realtime_model_beta.js +0 -26
  30. package/dist/realtime/realtime_model_beta.js.map +1 -1
  31. package/dist/stt.cjs +2 -2
  32. package/dist/stt.cjs.map +1 -1
  33. package/dist/stt.js +2 -2
  34. package/dist/stt.js.map +1 -1
  35. package/dist/tts.cjs +2 -2
  36. package/dist/tts.cjs.map +1 -1
  37. package/dist/tts.js +2 -2
  38. package/dist/tts.js.map +1 -1
  39. package/package.json +7 -7
  40. package/src/llm.ts +2 -2
  41. package/src/realtime/api_proto.ts +12 -2
  42. package/src/realtime/realtime_model.test.ts +129 -0
  43. package/src/realtime/realtime_model.ts +28 -36
  44. package/src/realtime/realtime_model_beta.ts +2 -31
  45. package/src/stt.ts +2 -2
  46. package/src/tts.ts +2 -2
package/dist/tts.js CHANGED
@@ -27,8 +27,8 @@ class TTS extends tts.TTS {
27
27
  throw new Error("OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY");
28
28
  }
29
29
  this.#client = this.#opts.client || new OpenAI({
30
- baseURL: opts.baseURL,
31
- apiKey: opts.apiKey
30
+ baseURL: this.#opts.baseURL,
31
+ apiKey: this.#opts.apiKey
32
32
  });
33
33
  }
34
34
  updateOptions(opts) {
package/dist/tts.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type APIConnectOptions, AudioByteStream, shortuuid, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { OpenAI } from 'openai';\nimport type { TTSModels, TTSVoices } from './models.js';\n\nconst OPENAI_TTS_SAMPLE_RATE = 24000;\nconst OPENAI_TTS_CHANNELS = 1;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n voice: TTSVoices;\n speed: number;\n instructions?: string;\n baseURL?: string;\n client?: OpenAI;\n apiKey?: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.OPENAI_API_KEY,\n model: 'tts-1',\n voice: 'alloy',\n speed: 1,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n #client: OpenAI;\n label = 'openai.TTS';\n private abortController = new AbortController();\n\n /**\n * Create a new instance of OpenAI TTS.\n *\n * @remarks\n * `apiKey` must be set to your OpenAI API key, either using the argument or by setting the\n * `OPENAI_API_KEY` environment variable.\n */\n constructor(opts: Partial<TTSOptions> = defaultTTSOptions) {\n super(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS, { streaming: false });\n\n this.#opts = { ...defaultTTSOptions, ...opts };\n if (this.#opts.apiKey === undefined) {\n throw new Error('OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY');\n }\n\n this.#client =\n this.#opts.client ||\n new OpenAI({\n baseURL: opts.baseURL,\n apiKey: opts.apiKey,\n });\n }\n\n updateOptions(opts: { model?: TTSModels | string; voice?: TTSVoices; speed?: number }) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): ChunkedStream {\n return new ChunkedStream(\n this,\n text,\n this.#client.audio.speech.create(\n {\n input: text,\n model: this.#opts.model,\n voice: this.#opts.voice,\n instructions: this.#opts.instructions,\n response_format: 'pcm',\n speed: this.#opts.speed,\n },\n { signal: abortSignal },\n ),\n connOptions,\n abortSignal,\n );\n }\n\n stream(): tts.SynthesizeStream {\n throw new Error('Streaming is not supported on OpenAI TTS');\n }\n\n async close(): Promise<void> {\n this.abortController.abort();\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'openai.ChunkedStream';\n private stream: Promise<any>;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(\n tts: TTS,\n text: string,\n stream: Promise<any>,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.stream = stream;\n }\n\n protected async run() {\n try {\n const buffer = await this.stream.then((r) => r.arrayBuffer());\n const requestId = shortuuid();\n const audioByteStream = new AudioByteStream(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS);\n const frames = audioByteStream.write(buffer);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n for (const frame of frames) {\n sendLastFrame(requestId, false);\n lastFrame = frame;\n }\n sendLastFrame(requestId, true);\n\n this.queue.close();\n } catch (error) {\n if (error instanceof Error && error.name === 'AbortError') {\n return;\n }\n throw error;\n } finally {\n this.queue.close();\n }\n }\n}\n"],"mappings":"AAGA,SAAiC,iBAAiB,WAAW,WAAW;AAExE,SAAS,cAAc;AAGvB,MAAM,yBAAyB;AAC/B,MAAM,sBAAsB;AAY5B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,OAAO;AAAA,EACP,OAAO;AACT;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA;AAAA,EACA,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAS9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM,wBAAwB,qBAAqB,EAAE,WAAW,MAAM,CAAC;AAEvE,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAC7C,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI,MAAM,0EAA0E;AAAA,IAC5F;AAEA,SAAK,UACH,KAAK,MAAM,UACX,IAAI,OAAO;AAAA,MACT,SAAS,KAAK;AAAA,MACd,QAAQ,KAAK;AAAA,IACf,CAAC;AAAA,EACL;AAAA,EAEA,cAAc,MAAyE;AACrF,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,WACE,MACA,aACA,aACe;AACf,WAAO,IAAI;AAAA,MACT;AAAA,MACA;AAAA,MACA,KAAK,QAAQ,MAAM,OAAO;AAAA,QACxB;AAAA,UACE,OAAO;AAAA,UACP,OAAO,KAAK,MAAM;AAAA,UAClB,OAAO,KAAK,MAAM;AAAA,UAClB,cAAc,KAAK,MAAM;AAAA,UACzB,iBAAiB;AAAA,UACjB,OAAO,KAAK,MAAM;AAAA,QACpB;AAAA,QACA,EAAE,QAAQ,YAAY;AAAA,MACxB;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA,EAEA,SAA+B;AAC7B,UAAM,IAAI,MAAM,0CAA0C;AAAA,EAC5D;AAAA,EAEA,MAAM,QAAuB;AAC3B,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACA;AAAA;AAAA,EAGR,YACEA,MACA,MACA,QACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAgB,MAAM;AACpB,QAAI;AACF,YAAM,SAAS,MAAM,KAAK,OAAO,KAAK,CAAC,MAAM,EAAE,YAAY,CAAC;AAC5D,YAAM,YAAY,UAAU;AAC5B,YAAM,kBAAkB,IAAI,gBAAgB,wBAAwB,mBAAmB;AACvF,YAAM,SAAS,gBAAgB,MAAM,MAAM;AAE3C,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,WAAW;AACb,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,iBAAW,SAAS,QAAQ;AAC1B,sBAAc,WAAW,KAAK;AAC9B,oBAAY;AAAA,MACd;AACA,oBAAc,WAAW,IAAI;AAE7B,WAAK,MAAM,MAAM;AAAA,IACnB,SAAS,OAAO;AACd,UAAI,iBAAiB,SAAS,MAAM,SAAS,cAAc;AACzD;AAAA,MACF;AACA,YAAM;AAAA,IACR,UAAE;AACA,WAAK,MAAM,MAAM;AAAA,IACnB;AAAA,EACF;AACF;","names":["tts"]}
1
+ {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type APIConnectOptions, AudioByteStream, shortuuid, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { OpenAI } from 'openai';\nimport type { TTSModels, TTSVoices } from './models.js';\n\nconst OPENAI_TTS_SAMPLE_RATE = 24000;\nconst OPENAI_TTS_CHANNELS = 1;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n voice: TTSVoices;\n speed: number;\n instructions?: string;\n baseURL?: string;\n client?: OpenAI;\n apiKey?: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.OPENAI_API_KEY,\n model: 'tts-1',\n voice: 'alloy',\n speed: 1,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n #client: OpenAI;\n label = 'openai.TTS';\n private abortController = new AbortController();\n\n /**\n * Create a new instance of OpenAI TTS.\n *\n * @remarks\n * `apiKey` must be set to your OpenAI API key, either using the argument or by setting the\n * `OPENAI_API_KEY` environment variable.\n */\n constructor(opts: Partial<TTSOptions> = defaultTTSOptions) {\n super(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS, { streaming: false });\n\n this.#opts = { ...defaultTTSOptions, ...opts };\n if (this.#opts.apiKey === undefined) {\n throw new Error('OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY');\n }\n\n this.#client =\n this.#opts.client ||\n new OpenAI({\n baseURL: this.#opts.baseURL,\n apiKey: this.#opts.apiKey,\n });\n }\n\n updateOptions(opts: { model?: TTSModels | string; voice?: TTSVoices; speed?: number }) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): ChunkedStream {\n return new ChunkedStream(\n this,\n text,\n this.#client.audio.speech.create(\n {\n input: text,\n model: this.#opts.model,\n voice: this.#opts.voice,\n instructions: this.#opts.instructions,\n response_format: 'pcm',\n speed: this.#opts.speed,\n },\n { signal: abortSignal },\n ),\n connOptions,\n abortSignal,\n );\n }\n\n stream(): tts.SynthesizeStream {\n throw new Error('Streaming is not supported on OpenAI TTS');\n }\n\n async close(): Promise<void> {\n this.abortController.abort();\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'openai.ChunkedStream';\n private stream: Promise<any>;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(\n tts: TTS,\n text: string,\n stream: Promise<any>,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.stream = stream;\n }\n\n protected async run() {\n try {\n const buffer = await this.stream.then((r) => r.arrayBuffer());\n const requestId = shortuuid();\n const audioByteStream = new AudioByteStream(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS);\n const frames = audioByteStream.write(buffer);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n for (const frame of frames) {\n sendLastFrame(requestId, false);\n lastFrame = frame;\n }\n sendLastFrame(requestId, true);\n\n this.queue.close();\n } catch (error) {\n if (error instanceof Error && error.name === 'AbortError') {\n return;\n }\n throw error;\n } finally {\n this.queue.close();\n }\n }\n}\n"],"mappings":"AAGA,SAAiC,iBAAiB,WAAW,WAAW;AAExE,SAAS,cAAc;AAGvB,MAAM,yBAAyB;AAC/B,MAAM,sBAAsB;AAY5B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,OAAO;AAAA,EACP,OAAO;AACT;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA;AAAA,EACA,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAS9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM,wBAAwB,qBAAqB,EAAE,WAAW,MAAM,CAAC;AAEvE,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAC7C,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI,MAAM,0EAA0E;AAAA,IAC5F;AAEA,SAAK,UACH,KAAK,MAAM,UACX,IAAI,OAAO;AAAA,MACT,SAAS,KAAK,MAAM;AAAA,MACpB,QAAQ,KAAK,MAAM;AAAA,IACrB,CAAC;AAAA,EACL;AAAA,EAEA,cAAc,MAAyE;AACrF,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,WACE,MACA,aACA,aACe;AACf,WAAO,IAAI;AAAA,MACT;AAAA,MACA;AAAA,MACA,KAAK,QAAQ,MAAM,OAAO;AAAA,QACxB;AAAA,UACE,OAAO;AAAA,UACP,OAAO,KAAK,MAAM;AAAA,UAClB,OAAO,KAAK,MAAM;AAAA,UAClB,cAAc,KAAK,MAAM;AAAA,UACzB,iBAAiB;AAAA,UACjB,OAAO,KAAK,MAAM;AAAA,QACpB;AAAA,QACA,EAAE,QAAQ,YAAY;AAAA,MACxB;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA,EAEA,SAA+B;AAC7B,UAAM,IAAI,MAAM,0CAA0C;AAAA,EAC5D;AAAA,EAEA,MAAM,QAAuB;AAC3B,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACA;AAAA;AAAA,EAGR,YACEA,MACA,MACA,QACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAgB,MAAM;AACpB,QAAI;AACF,YAAM,SAAS,MAAM,KAAK,OAAO,KAAK,CAAC,MAAM,EAAE,YAAY,CAAC;AAC5D,YAAM,YAAY,UAAU;AAC5B,YAAM,kBAAkB,IAAI,gBAAgB,wBAAwB,mBAAmB;AACvF,YAAM,SAAS,gBAAgB,MAAM,MAAM;AAE3C,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,WAAW;AACb,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,iBAAW,SAAS,QAAQ;AAC1B,sBAAc,WAAW,KAAK;AAC9B,oBAAY;AAAA,MACd;AACA,oBAAc,WAAW,IAAI;AAE7B,WAAK,MAAM,MAAM;AAAA,IACnB,SAAS,OAAO;AACd,UAAI,iBAAiB,SAAS,MAAM,SAAS,cAAc;AACzD;AAAA,MACF;AACA,YAAM;AAAA,IACR,UAAE;AACA,WAAK,MAAM,MAAM;AAAA,IACnB;AAAA,EACF;AACF;","names":["tts"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@livekit/agents-plugin-openai",
3
- "version": "1.0.35",
3
+ "version": "1.0.36",
4
4
  "description": "OpenAI plugin for LiveKit Node Agents",
5
5
  "main": "dist/index.js",
6
6
  "require": "dist/index.cjs",
@@ -25,14 +25,14 @@
25
25
  "README.md"
26
26
  ],
27
27
  "devDependencies": {
28
- "@livekit/rtc-node": "^0.13.22",
28
+ "@livekit/rtc-node": "^0.13.24",
29
29
  "@microsoft/api-extractor": "^7.35.0",
30
30
  "@types/ws": "^8.5.10",
31
31
  "tsup": "^8.3.5",
32
32
  "typescript": "^5.0.0",
33
- "@livekit/agents": "1.0.35",
34
- "@livekit/agents-plugin-silero": "1.0.35",
35
- "@livekit/agents-plugins-test": "1.0.35"
33
+ "@livekit/agents": "1.0.36",
34
+ "@livekit/agents-plugin-silero": "1.0.36",
35
+ "@livekit/agents-plugins-test": "1.0.36"
36
36
  },
37
37
  "dependencies": {
38
38
  "@livekit/mutex": "^1.1.1",
@@ -40,8 +40,8 @@
40
40
  "ws": "^8.18.0"
41
41
  },
42
42
  "peerDependencies": {
43
- "@livekit/rtc-node": "^0.13.22",
44
- "@livekit/agents": "1.0.35"
43
+ "@livekit/rtc-node": "^0.13.24",
44
+ "@livekit/agents": "1.0.36"
45
45
  },
46
46
  "scripts": {
47
47
  "build": "tsup --onSuccess \"pnpm build:types\"",
package/src/llm.ts CHANGED
@@ -73,8 +73,8 @@ export class LLM extends llm.LLM {
73
73
  this.#client =
74
74
  this.#opts.client ||
75
75
  new OpenAI({
76
- baseURL: opts.baseURL,
77
- apiKey: opts.apiKey,
76
+ baseURL: this.#opts.baseURL,
77
+ apiKey: this.#opts.apiKey,
78
78
  });
79
79
  }
80
80
 
@@ -167,13 +167,23 @@ export interface TextContent {
167
167
  text: string;
168
168
  }
169
169
 
170
+ export interface OutputTextContent {
171
+ type: 'output_text';
172
+ text: string;
173
+ }
174
+
170
175
  export interface AudioContent {
171
176
  type: 'audio';
172
177
  audio: AudioBase64Bytes;
173
178
  transcript: string;
174
179
  }
175
180
 
176
- export type Content = InputTextContent | InputAudioContent | TextContent | AudioContent;
181
+ export type Content =
182
+ | InputTextContent
183
+ | InputAudioContent
184
+ | TextContent
185
+ | OutputTextContent
186
+ | AudioContent;
177
187
  export type ContentPart = {
178
188
  type: 'text' | 'audio' | 'output_text' | 'output_audio'; // GA: output_text/output_audio
179
189
  audio?: AudioBase64Bytes;
@@ -202,7 +212,7 @@ export interface UserItem extends BaseItem {
202
212
  export interface AssistantItem extends BaseItem {
203
213
  type: 'message';
204
214
  role: 'assistant';
205
- content: (TextContent | AudioContent)[];
215
+ content: (TextContent | OutputTextContent | AudioContent)[];
206
216
  }
207
217
 
208
218
  export interface FunctionCallItem extends BaseItem {
@@ -0,0 +1,129 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import { llm } from '@livekit/agents';
5
+ import { describe, expect, it } from 'vitest';
6
+ import type * as api_proto from './api_proto.js';
7
+ import { livekitItemToOpenAIItem } from './realtime_model.js';
8
+
9
+ describe('livekitItemToOpenAIItem', () => {
10
+ describe('message items', () => {
11
+ it('should use output_text type for assistant messages', () => {
12
+ const assistantMessage = new llm.ChatMessage({
13
+ role: 'assistant',
14
+ content: 'Hello, how can I help you?',
15
+ id: 'test-assistant-msg',
16
+ });
17
+
18
+ const result = livekitItemToOpenAIItem(assistantMessage) as api_proto.AssistantItem;
19
+
20
+ expect(result.type).toBe('message');
21
+ expect(result.role).toBe('assistant');
22
+ expect(result.content).toHaveLength(1);
23
+ const content = result.content[0]!;
24
+ expect(content.type).toBe('output_text');
25
+ expect((content as api_proto.OutputTextContent).text).toBe('Hello, how can I help you?');
26
+ });
27
+
28
+ it('should use input_text type for user messages', () => {
29
+ const userMessage = new llm.ChatMessage({
30
+ role: 'user',
31
+ content: 'What is the weather like?',
32
+ id: 'test-user-msg',
33
+ });
34
+
35
+ const result = livekitItemToOpenAIItem(userMessage) as api_proto.UserItem;
36
+
37
+ expect(result.type).toBe('message');
38
+ expect(result.role).toBe('user');
39
+ expect(result.content).toHaveLength(1);
40
+ const content = result.content[0]!;
41
+ expect(content.type).toBe('input_text');
42
+ expect((content as api_proto.InputTextContent).text).toBe('What is the weather like?');
43
+ });
44
+
45
+ it('should use input_text type for system messages', () => {
46
+ const systemMessage = new llm.ChatMessage({
47
+ role: 'system',
48
+ content: 'You are a helpful assistant.',
49
+ id: 'test-system-msg',
50
+ });
51
+
52
+ const result = livekitItemToOpenAIItem(systemMessage) as api_proto.UserItem;
53
+
54
+ expect(result.type).toBe('message');
55
+ expect(result.role).toBe('system');
56
+ expect(result.content).toHaveLength(1);
57
+ const content = result.content[0]!;
58
+ expect(content.type).toBe('input_text');
59
+ });
60
+
61
+ it('should convert developer role to system role', () => {
62
+ const developerMessage = new llm.ChatMessage({
63
+ role: 'developer',
64
+ content: 'System instructions.',
65
+ id: 'test-developer-msg',
66
+ });
67
+
68
+ const result = livekitItemToOpenAIItem(developerMessage) as api_proto.UserItem;
69
+
70
+ expect(result.type).toBe('message');
71
+ expect(result.role).toBe('system');
72
+ const content = result.content[0]!;
73
+ expect(content.type).toBe('input_text');
74
+ });
75
+
76
+ it('should handle multiple content items for assistant', () => {
77
+ const multiContentMessage = new llm.ChatMessage({
78
+ role: 'assistant',
79
+ content: ['First part.', 'Second part.'],
80
+ id: 'test-multi-msg',
81
+ });
82
+
83
+ const result = livekitItemToOpenAIItem(multiContentMessage) as api_proto.AssistantItem;
84
+
85
+ expect(result.content).toHaveLength(2);
86
+ const content0 = result.content[0]!;
87
+ const content1 = result.content[1]!;
88
+ expect(content0.type).toBe('output_text');
89
+ expect(content1.type).toBe('output_text');
90
+ });
91
+ });
92
+
93
+ describe('function_call items', () => {
94
+ it('should convert function call items correctly', () => {
95
+ const functionCall = new llm.FunctionCall({
96
+ callId: 'call-123',
97
+ name: 'get_weather',
98
+ args: '{"location": "San Francisco"}',
99
+ id: 'test-func-call',
100
+ });
101
+
102
+ const result = livekitItemToOpenAIItem(functionCall) as api_proto.FunctionCallItem;
103
+
104
+ expect(result.type).toBe('function_call');
105
+ expect(result.id).toBe('test-func-call');
106
+ expect(result.call_id).toBe('call-123');
107
+ expect(result.name).toBe('get_weather');
108
+ expect(result.arguments).toBe('{"location": "San Francisco"}');
109
+ });
110
+ });
111
+
112
+ describe('function_call_output items', () => {
113
+ it('should convert function call output items correctly', () => {
114
+ const functionOutput = new llm.FunctionCallOutput({
115
+ callId: 'call-123',
116
+ output: 'The weather in San Francisco is sunny.',
117
+ isError: false,
118
+ id: 'test-func-output',
119
+ });
120
+
121
+ const result = livekitItemToOpenAIItem(functionOutput) as api_proto.FunctionCallOutputItem;
122
+
123
+ expect(result.type).toBe('function_call_output');
124
+ expect(result.id).toBe('test-func-output');
125
+ expect(result.call_id).toBe('call-123');
126
+ expect(result.output).toBe('The weather in San Francisco is sunny.');
127
+ });
128
+ });
129
+ });
@@ -381,6 +381,10 @@ export class RealtimeSession extends llm.RealtimeSession {
381
381
  private itemCreateFutures: { [id: string]: Future } = {};
382
382
  private itemDeleteFutures: { [id: string]: Future } = {};
383
383
 
384
+ // Track items that have real server-side audio (created in current session, not restored)
385
+ // Items restored after reconnection are text-only and cannot be truncated
386
+ private audioCapableItemIds: Set<string> = new Set();
387
+
384
388
  private updateChatCtxLock = new Mutex();
385
389
  private updateFuncCtxLock = new Mutex();
386
390
 
@@ -673,7 +677,12 @@ export class RealtimeSession extends llm.RealtimeSession {
673
677
  modalities?: Modality[];
674
678
  audioTranscript?: string;
675
679
  }): Promise<void> {
676
- if (!_options.modalities || _options.modalities.includes('audio')) {
680
+ // Check if modalities include audio AND the item has real server-side audio
681
+ // Items restored after reconnection are text-only and cannot be truncated
682
+ const hasAudioModality = !_options.modalities || _options.modalities.includes('audio');
683
+ const hasServerSideAudio = this.audioCapableItemIds.has(_options.messageId);
684
+
685
+ if (hasAudioModality && hasServerSideAudio) {
677
686
  this.sendEvent({
678
687
  type: 'conversation.item.truncate',
679
688
  content_index: 0,
@@ -811,6 +820,9 @@ export class RealtimeSession extends llm.RealtimeSession {
811
820
  }
812
821
  this.itemDeleteFutures = {};
813
822
 
823
+ // Clear audio-capable item tracking - restored items are text-only on the server
824
+ this.audioCapableItemIds.clear();
825
+
814
826
  const events: api_proto.ClientEvent[] = [];
815
827
 
816
828
  // options and instructions
@@ -1169,16 +1181,11 @@ export class RealtimeSession extends llm.RealtimeSession {
1169
1181
  throw new Error('item.type is not set');
1170
1182
  }
1171
1183
 
1172
- if (!event.response_id) {
1173
- throw new Error('response_id is not set');
1174
- }
1175
-
1176
1184
  const itemType = event.item.type;
1177
- const responseId = event.response_id;
1178
1185
 
1179
1186
  if (itemType !== 'message') {
1180
- // emit immediately if it's not a message, otherwise wait response.content_part.added
1181
- this.resolveGeneration(responseId);
1187
+ // non-message items (e.g. function calls) don't need additional handling here
1188
+ // the generation event was already emitted in handleResponseCreated
1182
1189
  this.textModeRecoveryRetries = 0;
1183
1190
  return;
1184
1191
  }
@@ -1236,6 +1243,9 @@ export class RealtimeSession extends llm.RealtimeSession {
1236
1243
  throw new Error('item_id is not set');
1237
1244
  }
1238
1245
 
1246
+ // Clean up audio-capable tracking for deleted items
1247
+ this.audioCapableItemIds.delete(event.item_id);
1248
+
1239
1249
  try {
1240
1250
  this.remoteChatCtx.delete(event.item_id);
1241
1251
  } catch (error) {
@@ -1302,6 +1312,11 @@ export class RealtimeSession extends llm.RealtimeSession {
1302
1312
  if (!itemGeneration.modalities.done) {
1303
1313
  const modalityResult: Modality[] = isTextType ? ['text'] : ['audio', 'text'];
1304
1314
  itemGeneration.modalities.resolve(modalityResult);
1315
+
1316
+ // Track items with real server-side audio for truncation eligibility
1317
+ if (!isTextType) {
1318
+ this.audioCapableItemIds.add(itemId);
1319
+ }
1305
1320
  }
1306
1321
 
1307
1322
  if (this.currentGeneration._firstTokenTimestamp === undefined) {
@@ -1598,33 +1613,10 @@ export class RealtimeSession extends llm.RealtimeSession {
1598
1613
 
1599
1614
  return handle;
1600
1615
  }
1601
-
1602
- private resolveGeneration(responseId: string): void {
1603
- if (!this.currentGeneration) {
1604
- throw new Error('currentGeneration is not set');
1605
- }
1606
-
1607
- const generation_ev = {
1608
- messageStream: this.currentGeneration.messageChannel.stream(),
1609
- functionStream: this.currentGeneration.functionChannel.stream(),
1610
- userInitiated: false,
1611
- responseId,
1612
- } as llm.GenerationCreatedEvent;
1613
-
1614
- const handle = this.responseCreatedFutures[responseId];
1615
- if (handle) {
1616
- delete this.responseCreatedFutures[responseId];
1617
- generation_ev.userInitiated = true;
1618
- if (handle.doneFut.done) {
1619
- this.#logger.warn({ responseId }, 'response received after timeout');
1620
- } else {
1621
- handle.doneFut.resolve(generation_ev);
1622
- }
1623
- }
1624
- }
1625
1616
  }
1626
1617
 
1627
- function livekitItemToOpenAIItem(item: llm.ChatItem): api_proto.ItemResource {
1618
+ /** @internal Exported for testing purposes */
1619
+ export function livekitItemToOpenAIItem(item: llm.ChatItem): api_proto.ItemResource {
1628
1620
  switch (item.type) {
1629
1621
  case 'function_call':
1630
1622
  return {
@@ -1647,9 +1639,9 @@ function livekitItemToOpenAIItem(item: llm.ChatItem): api_proto.ItemResource {
1647
1639
  for (const c of item.content) {
1648
1640
  if (typeof c === 'string') {
1649
1641
  contentList.push({
1650
- type: role === 'assistant' ? 'text' : 'input_text',
1642
+ type: role === 'assistant' ? 'output_text' : 'input_text',
1651
1643
  text: c,
1652
- } as api_proto.InputTextContent);
1644
+ } as api_proto.InputTextContent | api_proto.OutputTextContent);
1653
1645
  } else if (c.type === 'image_content') {
1654
1646
  // not supported for now
1655
1647
  continue;
@@ -1668,7 +1660,7 @@ function livekitItemToOpenAIItem(item: llm.ChatItem): api_proto.ItemResource {
1668
1660
  type: 'message',
1669
1661
  role,
1670
1662
  content: contentList,
1671
- } as api_proto.UserItem;
1663
+ } as api_proto.UserItem | api_proto.AssistantItem | api_proto.SystemItem;
1672
1664
  default:
1673
1665
  throw new Error(`Unsupported item type: ${(item as any).type}`);
1674
1666
  }
@@ -1090,16 +1090,11 @@ export class RealtimeSession extends llm.RealtimeSession {
1090
1090
  throw new Error('item.type is not set');
1091
1091
  }
1092
1092
 
1093
- if (!event.response_id) {
1094
- throw new Error('response_id is not set');
1095
- }
1096
-
1097
1093
  const itemType = event.item.type;
1098
- const responseId = event.response_id;
1099
1094
 
1100
1095
  if (itemType !== 'message') {
1101
- // emit immediately if it's not a message, otherwise wait response.content_part.added
1102
- this.resolveGeneration(responseId);
1096
+ // non-message items (e.g. function calls) don't need additional handling here
1097
+ // the generation event was already emitted in handleResponseCreated
1103
1098
  this.textModeRecoveryRetries = 0;
1104
1099
  return;
1105
1100
  }
@@ -1518,30 +1513,6 @@ export class RealtimeSession extends llm.RealtimeSession {
1518
1513
 
1519
1514
  return handle;
1520
1515
  }
1521
-
1522
- private resolveGeneration(responseId: string): void {
1523
- if (!this.currentGeneration) {
1524
- throw new Error('currentGeneration is not set');
1525
- }
1526
-
1527
- const generation_ev = {
1528
- messageStream: this.currentGeneration.messageChannel.stream(),
1529
- functionStream: this.currentGeneration.functionChannel.stream(),
1530
- userInitiated: false,
1531
- responseId,
1532
- } as llm.GenerationCreatedEvent;
1533
-
1534
- const handle = this.responseCreatedFutures[responseId];
1535
- if (handle) {
1536
- delete this.responseCreatedFutures[responseId];
1537
- generation_ev.userInitiated = true;
1538
- if (handle.doneFut.done) {
1539
- this.#logger.warn({ responseId }, 'response received after timeout');
1540
- } else {
1541
- handle.doneFut.resolve(generation_ev);
1542
- }
1543
- }
1544
- }
1545
1516
  }
1546
1517
 
1547
1518
  function livekitItemToOpenAIItem(item: llm.ChatItem): api_proto.ItemResource {
package/src/stt.ts CHANGED
@@ -46,8 +46,8 @@ export class STT extends stt.STT {
46
46
  this.#client =
47
47
  this.#opts.client ||
48
48
  new OpenAI({
49
- baseURL: opts.baseURL,
50
- apiKey: opts.apiKey,
49
+ baseURL: this.#opts.baseURL,
50
+ apiKey: this.#opts.apiKey,
51
51
  });
52
52
  }
53
53
 
package/src/tts.ts CHANGED
@@ -50,8 +50,8 @@ export class TTS extends tts.TTS {
50
50
  this.#client =
51
51
  this.#opts.client ||
52
52
  new OpenAI({
53
- baseURL: opts.baseURL,
54
- apiKey: opts.apiKey,
53
+ baseURL: this.#opts.baseURL,
54
+ apiKey: this.#opts.apiKey,
55
55
  });
56
56
  }
57
57