npm - @livekit/agents-plugin-openai - Versions diffs - 1.0.35 → 1.0.36 - Mend

@livekit/agents-plugin-openai 1.0.35 → 1.0.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

package/dist/llm.cjs +2 -2
package/dist/llm.cjs.map +1 -1
package/dist/llm.js +2 -2
package/dist/llm.js.map +1 -1
package/dist/realtime/api_proto.cjs.map +1 -1
package/dist/realtime/api_proto.d.cts +6 -2
package/dist/realtime/api_proto.d.ts +6 -2
package/dist/realtime/api_proto.d.ts.map +1 -1
package/dist/realtime/api_proto.js.map +1 -1
package/dist/realtime/realtime_model.cjs +16 -30
package/dist/realtime/realtime_model.cjs.map +1 -1
package/dist/realtime/realtime_model.d.cts +3 -1
package/dist/realtime/realtime_model.d.ts +3 -1
package/dist/realtime/realtime_model.d.ts.map +1 -1
package/dist/realtime/realtime_model.js +14 -29
package/dist/realtime/realtime_model.js.map +1 -1
package/dist/realtime/realtime_model.test.cjs +106 -0
package/dist/realtime/realtime_model.test.cjs.map +1 -0
package/dist/realtime/realtime_model.test.d.cts +2 -0
package/dist/realtime/realtime_model.test.d.ts +2 -0
package/dist/realtime/realtime_model.test.d.ts.map +1 -0
package/dist/realtime/realtime_model.test.js +105 -0
package/dist/realtime/realtime_model.test.js.map +1 -0
package/dist/realtime/realtime_model_beta.cjs +0 -26
package/dist/realtime/realtime_model_beta.cjs.map +1 -1
package/dist/realtime/realtime_model_beta.d.cts +0 -1
package/dist/realtime/realtime_model_beta.d.ts +0 -1
package/dist/realtime/realtime_model_beta.d.ts.map +1 -1
package/dist/realtime/realtime_model_beta.js +0 -26
package/dist/realtime/realtime_model_beta.js.map +1 -1
package/dist/stt.cjs +2 -2
package/dist/stt.cjs.map +1 -1
package/dist/stt.js +2 -2
package/dist/stt.js.map +1 -1
package/dist/tts.cjs +2 -2
package/dist/tts.cjs.map +1 -1
package/dist/tts.js +2 -2
package/dist/tts.js.map +1 -1
package/package.json +7 -7
package/src/llm.ts +2 -2
package/src/realtime/api_proto.ts +12 -2
package/src/realtime/realtime_model.test.ts +129 -0
package/src/realtime/realtime_model.ts +28 -36
package/src/realtime/realtime_model_beta.ts +2 -31
package/src/stt.ts +2 -2
package/src/tts.ts +2 -2

package/dist/tts.js CHANGED Viewed

@@ -27,8 +27,8 @@ class TTS extends tts.TTS {
       throw new Error("OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY");
     }
     this.#client = this.#opts.client || new OpenAI({
-      baseURL: opts.baseURL,
-      apiKey: opts.apiKey
+      baseURL: this.#opts.baseURL,
+      apiKey: this.#opts.apiKey
     });
   }
   updateOptions(opts) {

package/dist/tts.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type APIConnectOptions, AudioByteStream, shortuuid, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { OpenAI } from 'openai';\nimport type { TTSModels, TTSVoices } from './models.js';\n\nconst OPENAI_TTS_SAMPLE_RATE = 24000;\nconst OPENAI_TTS_CHANNELS = 1;\n\nexport interface TTSOptions {\n model: TTSModels \| string;\n voice: TTSVoices;\n speed: number;\n instructions?: string;\n baseURL?: string;\n client?: OpenAI;\n apiKey?: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.OPENAI_API_KEY,\n model: 'tts-1',\n voice: 'alloy',\n speed: 1,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n #client: OpenAI;\n label = 'openai.TTS';\n private abortController = new AbortController();\n\n /*\n Create a new instance of OpenAI TTS.\n \n @remarks\n * `apiKey` must be set to your OpenAI API key, either using the argument or by setting the\n * `OPENAI_API_KEY` environment variable.\n */\n constructor(opts: Partial<TTSOptions> = defaultTTSOptions) {\n super(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS, { streaming: false });\n\n this.#opts = { ...defaultTTSOptions, ...opts };\n if (this.#opts.apiKey === undefined) {\n throw new Error('OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY');\n }\n\n this.#client =\n this.#opts.client \|\|\n new OpenAI({\n baseURL: opts.baseURL,\n apiKey: opts.apiKey,\n });\n }\n\n updateOptions(opts: { model?: TTSModels \| string; voice?: TTSVoices; speed?: number }) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): ChunkedStream {\n return new ChunkedStream(\n this,\n text,\n this.#client.audio.speech.create(\n {\n input: text,\n model: this.#opts.model,\n voice: this.#opts.voice,\n instructions: this.#opts.instructions,\n response_format: 'pcm',\n speed: this.#opts.speed,\n },\n { signal: abortSignal },\n ),\n connOptions,\n abortSignal,\n );\n }\n\n stream(): tts.SynthesizeStream {\n throw new Error('Streaming is not supported on OpenAI TTS');\n }\n\n async close(): Promise<void> {\n this.abortController.abort();\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'openai.ChunkedStream';\n private stream: Promise<any>;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(\n tts: TTS,\n text: string,\n stream: Promise<any>,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.stream = stream;\n }\n\n protected async run() {\n try {\n const buffer = await this.stream.then((r) => r.arrayBuffer());\n const requestId = shortuuid();\n const audioByteStream = new AudioByteStream(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS);\n const frames = audioByteStream.write(buffer);\n\n let lastFrame: AudioFrame \| undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n for (const frame of frames) {\n sendLastFrame(requestId, false);\n lastFrame = frame;\n }\n sendLastFrame(requestId, true);\n\n this.queue.close();\n } catch (error) {\n if (error instanceof Error && error.name === 'AbortError') {\n return;\n }\n throw error;\n } finally {\n this.queue.close();\n }\n }\n}\n"],"mappings":"AAGA,SAAiC,iBAAiB,WAAW,WAAW;AAExE,SAAS,cAAc;AAGvB,MAAM,yBAAyB;AAC/B,MAAM,sBAAsB;AAY5B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,OAAO;AAAA,EACP,OAAO;AACT;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA;AAAA,EACA,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAS9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM,wBAAwB,qBAAqB,EAAE,WAAW,MAAM,CAAC;AAEvE,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAC7C,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI,MAAM,0EAA0E;AAAA,IAC5F;AAEA,SAAK,UACH,KAAK,MAAM,UACX,IAAI,OAAO;AAAA,MACT,SAAS,KAAK;AAAA,~~MACd~~,QAAQ,KAAK;AAAA,~~IACf~~,CAAC;AAAA,EACL;AAAA,EAEA,cAAc,MAAyE;AACrF,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,WACE,MACA,aACA,aACe;AACf,WAAO,IAAI;AAAA,MACT;AAAA,MACA;AAAA,MACA,KAAK,QAAQ,MAAM,OAAO;AAAA,QACxB;AAAA,UACE,OAAO;AAAA,UACP,OAAO,KAAK,MAAM;AAAA,UAClB,OAAO,KAAK,MAAM;AAAA,UAClB,cAAc,KAAK,MAAM;AAAA,UACzB,iBAAiB;AAAA,UACjB,OAAO,KAAK,MAAM;AAAA,QACpB;AAAA,QACA,EAAE,QAAQ,YAAY;AAAA,MACxB;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA,EAEA,SAA+B;AAC7B,UAAM,IAAI,MAAM,0CAA0C;AAAA,EAC5D;AAAA,EAEA,MAAM,QAAuB;AAC3B,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACA;AAAA;AAAA,EAGR,YACEA,MACA,MACA,QACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAgB,MAAM;AACpB,QAAI;AACF,YAAM,SAAS,MAAM,KAAK,OAAO,KAAK,CAAC,MAAM,EAAE,YAAY,CAAC;AAC5D,YAAM,YAAY,UAAU;AAC5B,YAAM,kBAAkB,IAAI,gBAAgB,wBAAwB,mBAAmB;AACvF,YAAM,SAAS,gBAAgB,MAAM,MAAM;AAE3C,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,WAAW;AACb,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,iBAAW,SAAS,QAAQ;AAC1B,sBAAc,WAAW,KAAK;AAC9B,oBAAY;AAAA,MACd;AACA,oBAAc,WAAW,IAAI;AAE7B,WAAK,MAAM,MAAM;AAAA,IACnB,SAAS,OAAO;AACd,UAAI,iBAAiB,SAAS,MAAM,SAAS,cAAc;AACzD;AAAA,MACF;AACA,YAAM;AAAA,IACR,UAAE;AACA,WAAK,MAAM,MAAM;AAAA,IACnB;AAAA,EACF;AACF;","names":["tts"]}
1	+ {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type APIConnectOptions, AudioByteStream, shortuuid, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { OpenAI } from 'openai';\nimport type { TTSModels, TTSVoices } from './models.js';\n\nconst OPENAI_TTS_SAMPLE_RATE = 24000;\nconst OPENAI_TTS_CHANNELS = 1;\n\nexport interface TTSOptions {\n model: TTSModels \| string;\n voice: TTSVoices;\n speed: number;\n instructions?: string;\n baseURL?: string;\n client?: OpenAI;\n apiKey?: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.OPENAI_API_KEY,\n model: 'tts-1',\n voice: 'alloy',\n speed: 1,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n #client: OpenAI;\n label = 'openai.TTS';\n private abortController = new AbortController();\n\n /*\n Create a new instance of OpenAI TTS.\n \n @remarks\n * `apiKey` must be set to your OpenAI API key, either using the argument or by setting the\n * `OPENAI_API_KEY` environment variable.\n */\n constructor(opts: Partial<TTSOptions> = defaultTTSOptions) {\n super(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS, { streaming: false });\n\n this.#opts = { ...defaultTTSOptions, ...opts };\n if (this.#opts.apiKey === undefined) {\n throw new Error('OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY');\n }\n\n this.#client =\n this.#opts.client \|\|\n new OpenAI({\n baseURL: this.#opts.baseURL,\n apiKey: this.#opts.apiKey,\n });\n }\n\n updateOptions(opts: { model?: TTSModels \| string; voice?: TTSVoices; speed?: number }) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): ChunkedStream {\n return new ChunkedStream(\n this,\n text,\n this.#client.audio.speech.create(\n {\n input: text,\n model: this.#opts.model,\n voice: this.#opts.voice,\n instructions: this.#opts.instructions,\n response_format: 'pcm',\n speed: this.#opts.speed,\n },\n { signal: abortSignal },\n ),\n connOptions,\n abortSignal,\n );\n }\n\n stream(): tts.SynthesizeStream {\n throw new Error('Streaming is not supported on OpenAI TTS');\n }\n\n async close(): Promise<void> {\n this.abortController.abort();\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'openai.ChunkedStream';\n private stream: Promise<any>;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(\n tts: TTS,\n text: string,\n stream: Promise<any>,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.stream = stream;\n }\n\n protected async run() {\n try {\n const buffer = await this.stream.then((r) => r.arrayBuffer());\n const requestId = shortuuid();\n const audioByteStream = new AudioByteStream(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS);\n const frames = audioByteStream.write(buffer);\n\n let lastFrame: AudioFrame \| undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n for (const frame of frames) {\n sendLastFrame(requestId, false);\n lastFrame = frame;\n }\n sendLastFrame(requestId, true);\n\n this.queue.close();\n } catch (error) {\n if (error instanceof Error && error.name === 'AbortError') {\n return;\n }\n throw error;\n } finally {\n this.queue.close();\n }\n }\n}\n"],"mappings":"AAGA,SAAiC,iBAAiB,WAAW,WAAW;AAExE,SAAS,cAAc;AAGvB,MAAM,yBAAyB;AAC/B,MAAM,sBAAsB;AAY5B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,OAAO;AAAA,EACP,OAAO;AACT;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA;AAAA,EACA,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAS9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM,wBAAwB,qBAAqB,EAAE,WAAW,MAAM,CAAC;AAEvE,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAC7C,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI,MAAM,0EAA0E;AAAA,IAC5F;AAEA,SAAK,UACH,KAAK,MAAM,UACX,IAAI,OAAO;AAAA,MACT,SAAS,KAAK,MAAM;AAAA,MACpB,QAAQ,KAAK,MAAM;AAAA,IACrB,CAAC;AAAA,EACL;AAAA,EAEA,cAAc,MAAyE;AACrF,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,WACE,MACA,aACA,aACe;AACf,WAAO,IAAI;AAAA,MACT;AAAA,MACA;AAAA,MACA,KAAK,QAAQ,MAAM,OAAO;AAAA,QACxB;AAAA,UACE,OAAO;AAAA,UACP,OAAO,KAAK,MAAM;AAAA,UAClB,OAAO,KAAK,MAAM;AAAA,UAClB,cAAc,KAAK,MAAM;AAAA,UACzB,iBAAiB;AAAA,UACjB,OAAO,KAAK,MAAM;AAAA,QACpB;AAAA,QACA,EAAE,QAAQ,YAAY;AAAA,MACxB;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA,EAEA,SAA+B;AAC7B,UAAM,IAAI,MAAM,0CAA0C;AAAA,EAC5D;AAAA,EAEA,MAAM,QAAuB;AAC3B,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACA;AAAA;AAAA,EAGR,YACEA,MACA,MACA,QACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAgB,MAAM;AACpB,QAAI;AACF,YAAM,SAAS,MAAM,KAAK,OAAO,KAAK,CAAC,MAAM,EAAE,YAAY,CAAC;AAC5D,YAAM,YAAY,UAAU;AAC5B,YAAM,kBAAkB,IAAI,gBAAgB,wBAAwB,mBAAmB;AACvF,YAAM,SAAS,gBAAgB,MAAM,MAAM;AAE3C,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,WAAW;AACb,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,iBAAW,SAAS,QAAQ;AAC1B,sBAAc,WAAW,KAAK;AAC9B,oBAAY;AAAA,MACd;AACA,oBAAc,WAAW,IAAI;AAE7B,WAAK,MAAM,MAAM;AAAA,IACnB,SAAS,OAAO;AACd,UAAI,iBAAiB,SAAS,MAAM,SAAS,cAAc;AACzD;AAAA,MACF;AACA,YAAM;AAAA,IACR,UAAE;AACA,WAAK,MAAM,MAAM;AAAA,IACnB;AAAA,EACF;AACF;","names":["tts"]}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@livekit/agents-plugin-openai",
-  "version": "1.0.35",
+  "version": "1.0.36",
   "description": "OpenAI plugin for LiveKit Node Agents",
   "main": "dist/index.js",
   "require": "dist/index.cjs",
@@ -25,14 +25,14 @@
     "README.md"
   ],
   "devDependencies": {
-    "@livekit/rtc-node": "^0.13.22",
+    "@livekit/rtc-node": "^0.13.24",
     "@microsoft/api-extractor": "^7.35.0",
     "@types/ws": "^8.5.10",
     "tsup": "^8.3.5",
     "typescript": "^5.0.0",
-    "@livekit/agents": "1.0.35",
-    "@livekit/agents-plugin-silero": "1.0.35",
-    "@livekit/agents-plugins-test": "1.0.35"
+    "@livekit/agents": "1.0.36",
+    "@livekit/agents-plugin-silero": "1.0.36",
+    "@livekit/agents-plugins-test": "1.0.36"
   },
   "dependencies": {
     "@livekit/mutex": "^1.1.1",
@@ -40,8 +40,8 @@
     "ws": "^8.18.0"
   },
   "peerDependencies": {
-    "@livekit/rtc-node": "^0.13.22",
-    "@livekit/agents": "1.0.35"
+    "@livekit/rtc-node": "^0.13.24",
+    "@livekit/agents": "1.0.36"
   },
   "scripts": {
     "build": "tsup --onSuccess \"pnpm build:types\"",

package/src/llm.ts CHANGED Viewed

@@ -73,8 +73,8 @@ export class LLM extends llm.LLM {
     this.#client =
       this.#opts.client ||
       new OpenAI({
-        baseURL: opts.baseURL,
-        apiKey: opts.apiKey,
+        baseURL: this.#opts.baseURL,
+        apiKey: this.#opts.apiKey,
       });
   }

package/src/realtime/api_proto.ts CHANGED Viewed

@@ -167,13 +167,23 @@ export interface TextContent {
   text: string;
 }
+export interface OutputTextContent {
+  type: 'output_text';
+  text: string;
+}
 export interface AudioContent {
   type: 'audio';
   audio: AudioBase64Bytes;
   transcript: string;
 }
-export type Content = InputTextContent | InputAudioContent | TextContent | AudioContent;
+export type Content =
+  | InputTextContent
+  | InputAudioContent
+  | TextContent
+  | OutputTextContent
+  | AudioContent;
 export type ContentPart = {
   type: 'text' | 'audio' | 'output_text' | 'output_audio'; // GA: output_text/output_audio
   audio?: AudioBase64Bytes;
@@ -202,7 +212,7 @@ export interface UserItem extends BaseItem {
 export interface AssistantItem extends BaseItem {
   type: 'message';
   role: 'assistant';
-  content: (TextContent | AudioContent)[];
+  content: (TextContent | OutputTextContent | AudioContent)[];
 }
 export interface FunctionCallItem extends BaseItem {

package/src/realtime/realtime_model.test.ts ADDED Viewed

@@ -0,0 +1,129 @@
+// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { llm } from '@livekit/agents';
+import { describe, expect, it } from 'vitest';
+import type * as api_proto from './api_proto.js';
+import { livekitItemToOpenAIItem } from './realtime_model.js';
+describe('livekitItemToOpenAIItem', () => {
+  describe('message items', () => {
+    it('should use output_text type for assistant messages', () => {
+      const assistantMessage = new llm.ChatMessage({
+        role: 'assistant',
+        content: 'Hello, how can I help you?',
+        id: 'test-assistant-msg',
+      });
+      const result = livekitItemToOpenAIItem(assistantMessage) as api_proto.AssistantItem;
+      expect(result.type).toBe('message');
+      expect(result.role).toBe('assistant');
+      expect(result.content).toHaveLength(1);
+      const content = result.content[0]!;
+      expect(content.type).toBe('output_text');
+      expect((content as api_proto.OutputTextContent).text).toBe('Hello, how can I help you?');
+    });
+    it('should use input_text type for user messages', () => {
+      const userMessage = new llm.ChatMessage({
+        role: 'user',
+        content: 'What is the weather like?',
+        id: 'test-user-msg',
+      });
+      const result = livekitItemToOpenAIItem(userMessage) as api_proto.UserItem;
+      expect(result.type).toBe('message');
+      expect(result.role).toBe('user');
+      expect(result.content).toHaveLength(1);
+      const content = result.content[0]!;
+      expect(content.type).toBe('input_text');
+      expect((content as api_proto.InputTextContent).text).toBe('What is the weather like?');
+    });
+    it('should use input_text type for system messages', () => {
+      const systemMessage = new llm.ChatMessage({
+        role: 'system',
+        content: 'You are a helpful assistant.',
+        id: 'test-system-msg',
+      });
+      const result = livekitItemToOpenAIItem(systemMessage) as api_proto.UserItem;
+      expect(result.type).toBe('message');
+      expect(result.role).toBe('system');
+      expect(result.content).toHaveLength(1);
+      const content = result.content[0]!;
+      expect(content.type).toBe('input_text');
+    });
+    it('should convert developer role to system role', () => {
+      const developerMessage = new llm.ChatMessage({
+        role: 'developer',
+        content: 'System instructions.',
+        id: 'test-developer-msg',
+      });
+      const result = livekitItemToOpenAIItem(developerMessage) as api_proto.UserItem;
+      expect(result.type).toBe('message');
+      expect(result.role).toBe('system');
+      const content = result.content[0]!;
+      expect(content.type).toBe('input_text');
+    });
+    it('should handle multiple content items for assistant', () => {
+      const multiContentMessage = new llm.ChatMessage({
+        role: 'assistant',
+        content: ['First part.', 'Second part.'],
+        id: 'test-multi-msg',
+      });
+      const result = livekitItemToOpenAIItem(multiContentMessage) as api_proto.AssistantItem;
+      expect(result.content).toHaveLength(2);
+      const content0 = result.content[0]!;
+      const content1 = result.content[1]!;
+      expect(content0.type).toBe('output_text');
+      expect(content1.type).toBe('output_text');
+    });
+  });
+  describe('function_call items', () => {
+    it('should convert function call items correctly', () => {
+      const functionCall = new llm.FunctionCall({
+        callId: 'call-123',
+        name: 'get_weather',
+        args: '{"location": "San Francisco"}',
+        id: 'test-func-call',
+      });
+      const result = livekitItemToOpenAIItem(functionCall) as api_proto.FunctionCallItem;
+      expect(result.type).toBe('function_call');
+      expect(result.id).toBe('test-func-call');
+      expect(result.call_id).toBe('call-123');
+      expect(result.name).toBe('get_weather');
+      expect(result.arguments).toBe('{"location": "San Francisco"}');
+    });
+  });
+  describe('function_call_output items', () => {
+    it('should convert function call output items correctly', () => {
+      const functionOutput = new llm.FunctionCallOutput({
+        callId: 'call-123',
+        output: 'The weather in San Francisco is sunny.',
+        isError: false,
+        id: 'test-func-output',
+      });
+      const result = livekitItemToOpenAIItem(functionOutput) as api_proto.FunctionCallOutputItem;
+      expect(result.type).toBe('function_call_output');
+      expect(result.id).toBe('test-func-output');
+      expect(result.call_id).toBe('call-123');
+      expect(result.output).toBe('The weather in San Francisco is sunny.');
+    });
+  });
+});

package/src/realtime/realtime_model.ts CHANGED Viewed

@@ -381,6 +381,10 @@ export class RealtimeSession extends llm.RealtimeSession {
   private itemCreateFutures: { [id: string]: Future } = {};
   private itemDeleteFutures: { [id: string]: Future } = {};
+  // Track items that have real server-side audio (created in current session, not restored)
+  // Items restored after reconnection are text-only and cannot be truncated
+  private audioCapableItemIds: Set<string> = new Set();
   private updateChatCtxLock = new Mutex();
   private updateFuncCtxLock = new Mutex();
@@ -673,7 +677,12 @@ export class RealtimeSession extends llm.RealtimeSession {
     modalities?: Modality[];
     audioTranscript?: string;
   }): Promise<void> {
-    if (!_options.modalities || _options.modalities.includes('audio')) {
+    // Check if modalities include audio AND the item has real server-side audio
+    // Items restored after reconnection are text-only and cannot be truncated
+    const hasAudioModality = !_options.modalities || _options.modalities.includes('audio');
+    const hasServerSideAudio = this.audioCapableItemIds.has(_options.messageId);
+    if (hasAudioModality && hasServerSideAudio) {
       this.sendEvent({
         type: 'conversation.item.truncate',
         content_index: 0,
@@ -811,6 +820,9 @@ export class RealtimeSession extends llm.RealtimeSession {
       }
       this.itemDeleteFutures = {};
+      // Clear audio-capable item tracking - restored items are text-only on the server
+      this.audioCapableItemIds.clear();
       const events: api_proto.ClientEvent[] = [];
       // options and instructions
@@ -1169,16 +1181,11 @@ export class RealtimeSession extends llm.RealtimeSession {
       throw new Error('item.type is not set');
     }
-    if (!event.response_id) {
-      throw new Error('response_id is not set');
-    }
     const itemType = event.item.type;
-    const responseId = event.response_id;
     if (itemType !== 'message') {
-      // emit immediately if it's not a message, otherwise wait response.content_part.added
-      this.resolveGeneration(responseId);
+      // non-message items (e.g. function calls) don't need additional handling here
+      // the generation event was already emitted in handleResponseCreated
       this.textModeRecoveryRetries = 0;
       return;
     }
@@ -1236,6 +1243,9 @@ export class RealtimeSession extends llm.RealtimeSession {
       throw new Error('item_id is not set');
     }
+    // Clean up audio-capable tracking for deleted items
+    this.audioCapableItemIds.delete(event.item_id);
     try {
       this.remoteChatCtx.delete(event.item_id);
     } catch (error) {
@@ -1302,6 +1312,11 @@ export class RealtimeSession extends llm.RealtimeSession {
     if (!itemGeneration.modalities.done) {
       const modalityResult: Modality[] = isTextType ? ['text'] : ['audio', 'text'];
       itemGeneration.modalities.resolve(modalityResult);
+      // Track items with real server-side audio for truncation eligibility
+      if (!isTextType) {
+        this.audioCapableItemIds.add(itemId);
+      }
     }
     if (this.currentGeneration._firstTokenTimestamp === undefined) {
@@ -1598,33 +1613,10 @@ export class RealtimeSession extends llm.RealtimeSession {
     return handle;
   }
-  private resolveGeneration(responseId: string): void {
-    if (!this.currentGeneration) {
-      throw new Error('currentGeneration is not set');
-    }
-    const generation_ev = {
-      messageStream: this.currentGeneration.messageChannel.stream(),
-      functionStream: this.currentGeneration.functionChannel.stream(),
-      userInitiated: false,
-      responseId,
-    } as llm.GenerationCreatedEvent;
-    const handle = this.responseCreatedFutures[responseId];
-    if (handle) {
-      delete this.responseCreatedFutures[responseId];
-      generation_ev.userInitiated = true;
-      if (handle.doneFut.done) {
-        this.#logger.warn({ responseId }, 'response received after timeout');
-      } else {
-        handle.doneFut.resolve(generation_ev);
-      }
-    }
-  }
 }
-function livekitItemToOpenAIItem(item: llm.ChatItem): api_proto.ItemResource {
+/** @internal Exported for testing purposes */
+export function livekitItemToOpenAIItem(item: llm.ChatItem): api_proto.ItemResource {
   switch (item.type) {
     case 'function_call':
       return {
@@ -1647,9 +1639,9 @@ function livekitItemToOpenAIItem(item: llm.ChatItem): api_proto.ItemResource {
       for (const c of item.content) {
         if (typeof c === 'string') {
           contentList.push({
-            type: role === 'assistant' ? 'text' : 'input_text',
+            type: role === 'assistant' ? 'output_text' : 'input_text',
             text: c,
-          } as api_proto.InputTextContent);
+          } as api_proto.InputTextContent | api_proto.OutputTextContent);
         } else if (c.type === 'image_content') {
           // not supported for now
           continue;
@@ -1668,7 +1660,7 @@ function livekitItemToOpenAIItem(item: llm.ChatItem): api_proto.ItemResource {
         type: 'message',
         role,
         content: contentList,
-      } as api_proto.UserItem;
+      } as api_proto.UserItem | api_proto.AssistantItem | api_proto.SystemItem;
     default:
       throw new Error(`Unsupported item type: ${(item as any).type}`);
   }

package/src/realtime/realtime_model_beta.ts CHANGED Viewed

@@ -1090,16 +1090,11 @@ export class RealtimeSession extends llm.RealtimeSession {
       throw new Error('item.type is not set');
     }
-    if (!event.response_id) {
-      throw new Error('response_id is not set');
-    }
     const itemType = event.item.type;
-    const responseId = event.response_id;
     if (itemType !== 'message') {
-      // emit immediately if it's not a message, otherwise wait response.content_part.added
-      this.resolveGeneration(responseId);
+      // non-message items (e.g. function calls) don't need additional handling here
+      // the generation event was already emitted in handleResponseCreated
       this.textModeRecoveryRetries = 0;
       return;
     }
@@ -1518,30 +1513,6 @@ export class RealtimeSession extends llm.RealtimeSession {
     return handle;
   }
-  private resolveGeneration(responseId: string): void {
-    if (!this.currentGeneration) {
-      throw new Error('currentGeneration is not set');
-    }
-    const generation_ev = {
-      messageStream: this.currentGeneration.messageChannel.stream(),
-      functionStream: this.currentGeneration.functionChannel.stream(),
-      userInitiated: false,
-      responseId,
-    } as llm.GenerationCreatedEvent;
-    const handle = this.responseCreatedFutures[responseId];
-    if (handle) {
-      delete this.responseCreatedFutures[responseId];
-      generation_ev.userInitiated = true;
-      if (handle.doneFut.done) {
-        this.#logger.warn({ responseId }, 'response received after timeout');
-      } else {
-        handle.doneFut.resolve(generation_ev);
-      }
-    }
-  }
 }
 function livekitItemToOpenAIItem(item: llm.ChatItem): api_proto.ItemResource {

package/src/stt.ts CHANGED Viewed

@@ -46,8 +46,8 @@ export class STT extends stt.STT {
     this.#client =
       this.#opts.client ||
       new OpenAI({
-        baseURL: opts.baseURL,
-        apiKey: opts.apiKey,
+        baseURL: this.#opts.baseURL,
+        apiKey: this.#opts.apiKey,
       });
   }

package/src/tts.ts CHANGED Viewed

@@ -50,8 +50,8 @@ export class TTS extends tts.TTS {
     this.#client =
       this.#opts.client ||
       new OpenAI({
-        baseURL: opts.baseURL,
-        apiKey: opts.apiKey,
+        baseURL: this.#opts.baseURL,
+        apiKey: this.#opts.apiKey,
       });
   }