@livekit/agents-plugin-openai 1.0.35 → 1.0.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/llm.cjs +2 -2
- package/dist/llm.cjs.map +1 -1
- package/dist/llm.js +2 -2
- package/dist/llm.js.map +1 -1
- package/dist/realtime/api_proto.cjs.map +1 -1
- package/dist/realtime/api_proto.d.cts +6 -2
- package/dist/realtime/api_proto.d.ts +6 -2
- package/dist/realtime/api_proto.d.ts.map +1 -1
- package/dist/realtime/api_proto.js.map +1 -1
- package/dist/realtime/realtime_model.cjs +16 -30
- package/dist/realtime/realtime_model.cjs.map +1 -1
- package/dist/realtime/realtime_model.d.cts +3 -1
- package/dist/realtime/realtime_model.d.ts +3 -1
- package/dist/realtime/realtime_model.d.ts.map +1 -1
- package/dist/realtime/realtime_model.js +14 -29
- package/dist/realtime/realtime_model.js.map +1 -1
- package/dist/realtime/realtime_model.test.cjs +106 -0
- package/dist/realtime/realtime_model.test.cjs.map +1 -0
- package/dist/realtime/realtime_model.test.d.cts +2 -0
- package/dist/realtime/realtime_model.test.d.ts +2 -0
- package/dist/realtime/realtime_model.test.d.ts.map +1 -0
- package/dist/realtime/realtime_model.test.js +105 -0
- package/dist/realtime/realtime_model.test.js.map +1 -0
- package/dist/realtime/realtime_model_beta.cjs +0 -26
- package/dist/realtime/realtime_model_beta.cjs.map +1 -1
- package/dist/realtime/realtime_model_beta.d.cts +0 -1
- package/dist/realtime/realtime_model_beta.d.ts +0 -1
- package/dist/realtime/realtime_model_beta.d.ts.map +1 -1
- package/dist/realtime/realtime_model_beta.js +0 -26
- package/dist/realtime/realtime_model_beta.js.map +1 -1
- package/dist/stt.cjs +2 -2
- package/dist/stt.cjs.map +1 -1
- package/dist/stt.js +2 -2
- package/dist/stt.js.map +1 -1
- package/dist/tts.cjs +2 -2
- package/dist/tts.cjs.map +1 -1
- package/dist/tts.js +2 -2
- package/dist/tts.js.map +1 -1
- package/package.json +7 -7
- package/src/llm.ts +2 -2
- package/src/realtime/api_proto.ts +12 -2
- package/src/realtime/realtime_model.test.ts +129 -0
- package/src/realtime/realtime_model.ts +28 -36
- package/src/realtime/realtime_model_beta.ts +2 -31
- package/src/stt.ts +2 -2
- package/src/tts.ts +2 -2
package/dist/tts.js
CHANGED
|
@@ -27,8 +27,8 @@ class TTS extends tts.TTS {
|
|
|
27
27
|
throw new Error("OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY");
|
|
28
28
|
}
|
|
29
29
|
this.#client = this.#opts.client || new OpenAI({
|
|
30
|
-
baseURL: opts.baseURL,
|
|
31
|
-
apiKey: opts.apiKey
|
|
30
|
+
baseURL: this.#opts.baseURL,
|
|
31
|
+
apiKey: this.#opts.apiKey
|
|
32
32
|
});
|
|
33
33
|
}
|
|
34
34
|
updateOptions(opts) {
|
package/dist/tts.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type APIConnectOptions, AudioByteStream, shortuuid, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { OpenAI } from 'openai';\nimport type { TTSModels, TTSVoices } from './models.js';\n\nconst OPENAI_TTS_SAMPLE_RATE = 24000;\nconst OPENAI_TTS_CHANNELS = 1;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n voice: TTSVoices;\n speed: number;\n instructions?: string;\n baseURL?: string;\n client?: OpenAI;\n apiKey?: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.OPENAI_API_KEY,\n model: 'tts-1',\n voice: 'alloy',\n speed: 1,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n #client: OpenAI;\n label = 'openai.TTS';\n private abortController = new AbortController();\n\n /**\n * Create a new instance of OpenAI TTS.\n *\n * @remarks\n * `apiKey` must be set to your OpenAI API key, either using the argument or by setting the\n * `OPENAI_API_KEY` environment variable.\n */\n constructor(opts: Partial<TTSOptions> = defaultTTSOptions) {\n super(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS, { streaming: false });\n\n this.#opts = { ...defaultTTSOptions, ...opts };\n if (this.#opts.apiKey === undefined) {\n throw new Error('OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY');\n }\n\n this.#client =\n this.#opts.client ||\n new OpenAI({\n baseURL: opts.baseURL,\n apiKey: opts.apiKey,\n });\n }\n\n updateOptions(opts: { model?: TTSModels | string; voice?: TTSVoices; speed?: number }) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): ChunkedStream {\n return new ChunkedStream(\n this,\n text,\n this.#client.audio.speech.create(\n {\n input: text,\n model: this.#opts.model,\n voice: this.#opts.voice,\n instructions: this.#opts.instructions,\n response_format: 'pcm',\n speed: this.#opts.speed,\n },\n { signal: abortSignal },\n ),\n connOptions,\n abortSignal,\n );\n }\n\n stream(): tts.SynthesizeStream {\n throw new Error('Streaming is not supported on OpenAI TTS');\n }\n\n async close(): Promise<void> {\n this.abortController.abort();\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'openai.ChunkedStream';\n private stream: Promise<any>;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(\n tts: TTS,\n text: string,\n stream: Promise<any>,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.stream = stream;\n }\n\n protected async run() {\n try {\n const buffer = await this.stream.then((r) => r.arrayBuffer());\n const requestId = shortuuid();\n const audioByteStream = new AudioByteStream(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS);\n const frames = audioByteStream.write(buffer);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n for (const frame of frames) {\n sendLastFrame(requestId, false);\n lastFrame = frame;\n }\n sendLastFrame(requestId, true);\n\n this.queue.close();\n } catch (error) {\n if (error instanceof Error && error.name === 'AbortError') {\n return;\n }\n throw error;\n } finally {\n this.queue.close();\n }\n }\n}\n"],"mappings":"AAGA,SAAiC,iBAAiB,WAAW,WAAW;AAExE,SAAS,cAAc;AAGvB,MAAM,yBAAyB;AAC/B,MAAM,sBAAsB;AAY5B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,OAAO;AAAA,EACP,OAAO;AACT;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA;AAAA,EACA,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAS9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM,wBAAwB,qBAAqB,EAAE,WAAW,MAAM,CAAC;AAEvE,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAC7C,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI,MAAM,0EAA0E;AAAA,IAC5F;AAEA,SAAK,UACH,KAAK,MAAM,UACX,IAAI,OAAO;AAAA,MACT,SAAS,KAAK;AAAA,
|
|
1
|
+
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type APIConnectOptions, AudioByteStream, shortuuid, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { OpenAI } from 'openai';\nimport type { TTSModels, TTSVoices } from './models.js';\n\nconst OPENAI_TTS_SAMPLE_RATE = 24000;\nconst OPENAI_TTS_CHANNELS = 1;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n voice: TTSVoices;\n speed: number;\n instructions?: string;\n baseURL?: string;\n client?: OpenAI;\n apiKey?: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.OPENAI_API_KEY,\n model: 'tts-1',\n voice: 'alloy',\n speed: 1,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n #client: OpenAI;\n label = 'openai.TTS';\n private abortController = new AbortController();\n\n /**\n * Create a new instance of OpenAI TTS.\n *\n * @remarks\n * `apiKey` must be set to your OpenAI API key, either using the argument or by setting the\n * `OPENAI_API_KEY` environment variable.\n */\n constructor(opts: Partial<TTSOptions> = defaultTTSOptions) {\n super(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS, { streaming: false });\n\n this.#opts = { ...defaultTTSOptions, ...opts };\n if (this.#opts.apiKey === undefined) {\n throw new Error('OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY');\n }\n\n this.#client =\n this.#opts.client ||\n new OpenAI({\n baseURL: this.#opts.baseURL,\n apiKey: this.#opts.apiKey,\n });\n }\n\n updateOptions(opts: { model?: TTSModels | string; voice?: TTSVoices; speed?: number }) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): ChunkedStream {\n return new ChunkedStream(\n this,\n text,\n this.#client.audio.speech.create(\n {\n input: text,\n model: this.#opts.model,\n voice: this.#opts.voice,\n instructions: this.#opts.instructions,\n response_format: 'pcm',\n speed: this.#opts.speed,\n },\n { signal: abortSignal },\n ),\n connOptions,\n abortSignal,\n );\n }\n\n stream(): tts.SynthesizeStream {\n throw new Error('Streaming is not supported on OpenAI TTS');\n }\n\n async close(): Promise<void> {\n this.abortController.abort();\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'openai.ChunkedStream';\n private stream: Promise<any>;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(\n tts: TTS,\n text: string,\n stream: Promise<any>,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.stream = stream;\n }\n\n protected async run() {\n try {\n const buffer = await this.stream.then((r) => r.arrayBuffer());\n const requestId = shortuuid();\n const audioByteStream = new AudioByteStream(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS);\n const frames = audioByteStream.write(buffer);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n for (const frame of frames) {\n sendLastFrame(requestId, false);\n lastFrame = frame;\n }\n sendLastFrame(requestId, true);\n\n this.queue.close();\n } catch (error) {\n if (error instanceof Error && error.name === 'AbortError') {\n return;\n }\n throw error;\n } finally {\n this.queue.close();\n }\n }\n}\n"],"mappings":"AAGA,SAAiC,iBAAiB,WAAW,WAAW;AAExE,SAAS,cAAc;AAGvB,MAAM,yBAAyB;AAC/B,MAAM,sBAAsB;AAY5B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,OAAO;AAAA,EACP,OAAO;AACT;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA;AAAA,EACA,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAS9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM,wBAAwB,qBAAqB,EAAE,WAAW,MAAM,CAAC;AAEvE,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAC7C,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI,MAAM,0EAA0E;AAAA,IAC5F;AAEA,SAAK,UACH,KAAK,MAAM,UACX,IAAI,OAAO;AAAA,MACT,SAAS,KAAK,MAAM;AAAA,MACpB,QAAQ,KAAK,MAAM;AAAA,IACrB,CAAC;AAAA,EACL;AAAA,EAEA,cAAc,MAAyE;AACrF,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,WACE,MACA,aACA,aACe;AACf,WAAO,IAAI;AAAA,MACT;AAAA,MACA;AAAA,MACA,KAAK,QAAQ,MAAM,OAAO;AAAA,QACxB;AAAA,UACE,OAAO;AAAA,UACP,OAAO,KAAK,MAAM;AAAA,UAClB,OAAO,KAAK,MAAM;AAAA,UAClB,cAAc,KAAK,MAAM;AAAA,UACzB,iBAAiB;AAAA,UACjB,OAAO,KAAK,MAAM;AAAA,QACpB;AAAA,QACA,EAAE,QAAQ,YAAY;AAAA,MACxB;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA,EAEA,SAA+B;AAC7B,UAAM,IAAI,MAAM,0CAA0C;AAAA,EAC5D;AAAA,EAEA,MAAM,QAAuB;AAC3B,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACA;AAAA;AAAA,EAGR,YACEA,MACA,MACA,QACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAgB,MAAM;AACpB,QAAI;AACF,YAAM,SAAS,MAAM,KAAK,OAAO,KAAK,CAAC,MAAM,EAAE,YAAY,CAAC;AAC5D,YAAM,YAAY,UAAU;AAC5B,YAAM,kBAAkB,IAAI,gBAAgB,wBAAwB,mBAAmB;AACvF,YAAM,SAAS,gBAAgB,MAAM,MAAM;AAE3C,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,WAAW;AACb,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,iBAAW,SAAS,QAAQ;AAC1B,sBAAc,WAAW,KAAK;AAC9B,oBAAY;AAAA,MACd;AACA,oBAAc,WAAW,IAAI;AAE7B,WAAK,MAAM,MAAM;AAAA,IACnB,SAAS,OAAO;AACd,UAAI,iBAAiB,SAAS,MAAM,SAAS,cAAc;AACzD;AAAA,MACF;AACA,YAAM;AAAA,IACR,UAAE;AACA,WAAK,MAAM,MAAM;AAAA,IACnB;AAAA,EACF;AACF;","names":["tts"]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@livekit/agents-plugin-openai",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.36",
|
|
4
4
|
"description": "OpenAI plugin for LiveKit Node Agents",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"require": "dist/index.cjs",
|
|
@@ -25,14 +25,14 @@
|
|
|
25
25
|
"README.md"
|
|
26
26
|
],
|
|
27
27
|
"devDependencies": {
|
|
28
|
-
"@livekit/rtc-node": "^0.13.
|
|
28
|
+
"@livekit/rtc-node": "^0.13.24",
|
|
29
29
|
"@microsoft/api-extractor": "^7.35.0",
|
|
30
30
|
"@types/ws": "^8.5.10",
|
|
31
31
|
"tsup": "^8.3.5",
|
|
32
32
|
"typescript": "^5.0.0",
|
|
33
|
-
"@livekit/agents": "1.0.
|
|
34
|
-
"@livekit/agents-plugin-silero": "1.0.
|
|
35
|
-
"@livekit/agents-plugins-test": "1.0.
|
|
33
|
+
"@livekit/agents": "1.0.36",
|
|
34
|
+
"@livekit/agents-plugin-silero": "1.0.36",
|
|
35
|
+
"@livekit/agents-plugins-test": "1.0.36"
|
|
36
36
|
},
|
|
37
37
|
"dependencies": {
|
|
38
38
|
"@livekit/mutex": "^1.1.1",
|
|
@@ -40,8 +40,8 @@
|
|
|
40
40
|
"ws": "^8.18.0"
|
|
41
41
|
},
|
|
42
42
|
"peerDependencies": {
|
|
43
|
-
"@livekit/rtc-node": "^0.13.
|
|
44
|
-
"@livekit/agents": "1.0.
|
|
43
|
+
"@livekit/rtc-node": "^0.13.24",
|
|
44
|
+
"@livekit/agents": "1.0.36"
|
|
45
45
|
},
|
|
46
46
|
"scripts": {
|
|
47
47
|
"build": "tsup --onSuccess \"pnpm build:types\"",
|
package/src/llm.ts
CHANGED
|
@@ -167,13 +167,23 @@ export interface TextContent {
|
|
|
167
167
|
text: string;
|
|
168
168
|
}
|
|
169
169
|
|
|
170
|
+
export interface OutputTextContent {
|
|
171
|
+
type: 'output_text';
|
|
172
|
+
text: string;
|
|
173
|
+
}
|
|
174
|
+
|
|
170
175
|
export interface AudioContent {
|
|
171
176
|
type: 'audio';
|
|
172
177
|
audio: AudioBase64Bytes;
|
|
173
178
|
transcript: string;
|
|
174
179
|
}
|
|
175
180
|
|
|
176
|
-
export type Content =
|
|
181
|
+
export type Content =
|
|
182
|
+
| InputTextContent
|
|
183
|
+
| InputAudioContent
|
|
184
|
+
| TextContent
|
|
185
|
+
| OutputTextContent
|
|
186
|
+
| AudioContent;
|
|
177
187
|
export type ContentPart = {
|
|
178
188
|
type: 'text' | 'audio' | 'output_text' | 'output_audio'; // GA: output_text/output_audio
|
|
179
189
|
audio?: AudioBase64Bytes;
|
|
@@ -202,7 +212,7 @@ export interface UserItem extends BaseItem {
|
|
|
202
212
|
export interface AssistantItem extends BaseItem {
|
|
203
213
|
type: 'message';
|
|
204
214
|
role: 'assistant';
|
|
205
|
-
content: (TextContent | AudioContent)[];
|
|
215
|
+
content: (TextContent | OutputTextContent | AudioContent)[];
|
|
206
216
|
}
|
|
207
217
|
|
|
208
218
|
export interface FunctionCallItem extends BaseItem {
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import { llm } from '@livekit/agents';
|
|
5
|
+
import { describe, expect, it } from 'vitest';
|
|
6
|
+
import type * as api_proto from './api_proto.js';
|
|
7
|
+
import { livekitItemToOpenAIItem } from './realtime_model.js';
|
|
8
|
+
|
|
9
|
+
describe('livekitItemToOpenAIItem', () => {
|
|
10
|
+
describe('message items', () => {
|
|
11
|
+
it('should use output_text type for assistant messages', () => {
|
|
12
|
+
const assistantMessage = new llm.ChatMessage({
|
|
13
|
+
role: 'assistant',
|
|
14
|
+
content: 'Hello, how can I help you?',
|
|
15
|
+
id: 'test-assistant-msg',
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
const result = livekitItemToOpenAIItem(assistantMessage) as api_proto.AssistantItem;
|
|
19
|
+
|
|
20
|
+
expect(result.type).toBe('message');
|
|
21
|
+
expect(result.role).toBe('assistant');
|
|
22
|
+
expect(result.content).toHaveLength(1);
|
|
23
|
+
const content = result.content[0]!;
|
|
24
|
+
expect(content.type).toBe('output_text');
|
|
25
|
+
expect((content as api_proto.OutputTextContent).text).toBe('Hello, how can I help you?');
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
it('should use input_text type for user messages', () => {
|
|
29
|
+
const userMessage = new llm.ChatMessage({
|
|
30
|
+
role: 'user',
|
|
31
|
+
content: 'What is the weather like?',
|
|
32
|
+
id: 'test-user-msg',
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
const result = livekitItemToOpenAIItem(userMessage) as api_proto.UserItem;
|
|
36
|
+
|
|
37
|
+
expect(result.type).toBe('message');
|
|
38
|
+
expect(result.role).toBe('user');
|
|
39
|
+
expect(result.content).toHaveLength(1);
|
|
40
|
+
const content = result.content[0]!;
|
|
41
|
+
expect(content.type).toBe('input_text');
|
|
42
|
+
expect((content as api_proto.InputTextContent).text).toBe('What is the weather like?');
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it('should use input_text type for system messages', () => {
|
|
46
|
+
const systemMessage = new llm.ChatMessage({
|
|
47
|
+
role: 'system',
|
|
48
|
+
content: 'You are a helpful assistant.',
|
|
49
|
+
id: 'test-system-msg',
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
const result = livekitItemToOpenAIItem(systemMessage) as api_proto.UserItem;
|
|
53
|
+
|
|
54
|
+
expect(result.type).toBe('message');
|
|
55
|
+
expect(result.role).toBe('system');
|
|
56
|
+
expect(result.content).toHaveLength(1);
|
|
57
|
+
const content = result.content[0]!;
|
|
58
|
+
expect(content.type).toBe('input_text');
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
it('should convert developer role to system role', () => {
|
|
62
|
+
const developerMessage = new llm.ChatMessage({
|
|
63
|
+
role: 'developer',
|
|
64
|
+
content: 'System instructions.',
|
|
65
|
+
id: 'test-developer-msg',
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
const result = livekitItemToOpenAIItem(developerMessage) as api_proto.UserItem;
|
|
69
|
+
|
|
70
|
+
expect(result.type).toBe('message');
|
|
71
|
+
expect(result.role).toBe('system');
|
|
72
|
+
const content = result.content[0]!;
|
|
73
|
+
expect(content.type).toBe('input_text');
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
it('should handle multiple content items for assistant', () => {
|
|
77
|
+
const multiContentMessage = new llm.ChatMessage({
|
|
78
|
+
role: 'assistant',
|
|
79
|
+
content: ['First part.', 'Second part.'],
|
|
80
|
+
id: 'test-multi-msg',
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
const result = livekitItemToOpenAIItem(multiContentMessage) as api_proto.AssistantItem;
|
|
84
|
+
|
|
85
|
+
expect(result.content).toHaveLength(2);
|
|
86
|
+
const content0 = result.content[0]!;
|
|
87
|
+
const content1 = result.content[1]!;
|
|
88
|
+
expect(content0.type).toBe('output_text');
|
|
89
|
+
expect(content1.type).toBe('output_text');
|
|
90
|
+
});
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
describe('function_call items', () => {
|
|
94
|
+
it('should convert function call items correctly', () => {
|
|
95
|
+
const functionCall = new llm.FunctionCall({
|
|
96
|
+
callId: 'call-123',
|
|
97
|
+
name: 'get_weather',
|
|
98
|
+
args: '{"location": "San Francisco"}',
|
|
99
|
+
id: 'test-func-call',
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
const result = livekitItemToOpenAIItem(functionCall) as api_proto.FunctionCallItem;
|
|
103
|
+
|
|
104
|
+
expect(result.type).toBe('function_call');
|
|
105
|
+
expect(result.id).toBe('test-func-call');
|
|
106
|
+
expect(result.call_id).toBe('call-123');
|
|
107
|
+
expect(result.name).toBe('get_weather');
|
|
108
|
+
expect(result.arguments).toBe('{"location": "San Francisco"}');
|
|
109
|
+
});
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
describe('function_call_output items', () => {
|
|
113
|
+
it('should convert function call output items correctly', () => {
|
|
114
|
+
const functionOutput = new llm.FunctionCallOutput({
|
|
115
|
+
callId: 'call-123',
|
|
116
|
+
output: 'The weather in San Francisco is sunny.',
|
|
117
|
+
isError: false,
|
|
118
|
+
id: 'test-func-output',
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
const result = livekitItemToOpenAIItem(functionOutput) as api_proto.FunctionCallOutputItem;
|
|
122
|
+
|
|
123
|
+
expect(result.type).toBe('function_call_output');
|
|
124
|
+
expect(result.id).toBe('test-func-output');
|
|
125
|
+
expect(result.call_id).toBe('call-123');
|
|
126
|
+
expect(result.output).toBe('The weather in San Francisco is sunny.');
|
|
127
|
+
});
|
|
128
|
+
});
|
|
129
|
+
});
|
|
@@ -381,6 +381,10 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
381
381
|
private itemCreateFutures: { [id: string]: Future } = {};
|
|
382
382
|
private itemDeleteFutures: { [id: string]: Future } = {};
|
|
383
383
|
|
|
384
|
+
// Track items that have real server-side audio (created in current session, not restored)
|
|
385
|
+
// Items restored after reconnection are text-only and cannot be truncated
|
|
386
|
+
private audioCapableItemIds: Set<string> = new Set();
|
|
387
|
+
|
|
384
388
|
private updateChatCtxLock = new Mutex();
|
|
385
389
|
private updateFuncCtxLock = new Mutex();
|
|
386
390
|
|
|
@@ -673,7 +677,12 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
673
677
|
modalities?: Modality[];
|
|
674
678
|
audioTranscript?: string;
|
|
675
679
|
}): Promise<void> {
|
|
676
|
-
if
|
|
680
|
+
// Check if modalities include audio AND the item has real server-side audio
|
|
681
|
+
// Items restored after reconnection are text-only and cannot be truncated
|
|
682
|
+
const hasAudioModality = !_options.modalities || _options.modalities.includes('audio');
|
|
683
|
+
const hasServerSideAudio = this.audioCapableItemIds.has(_options.messageId);
|
|
684
|
+
|
|
685
|
+
if (hasAudioModality && hasServerSideAudio) {
|
|
677
686
|
this.sendEvent({
|
|
678
687
|
type: 'conversation.item.truncate',
|
|
679
688
|
content_index: 0,
|
|
@@ -811,6 +820,9 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
811
820
|
}
|
|
812
821
|
this.itemDeleteFutures = {};
|
|
813
822
|
|
|
823
|
+
// Clear audio-capable item tracking - restored items are text-only on the server
|
|
824
|
+
this.audioCapableItemIds.clear();
|
|
825
|
+
|
|
814
826
|
const events: api_proto.ClientEvent[] = [];
|
|
815
827
|
|
|
816
828
|
// options and instructions
|
|
@@ -1169,16 +1181,11 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
1169
1181
|
throw new Error('item.type is not set');
|
|
1170
1182
|
}
|
|
1171
1183
|
|
|
1172
|
-
if (!event.response_id) {
|
|
1173
|
-
throw new Error('response_id is not set');
|
|
1174
|
-
}
|
|
1175
|
-
|
|
1176
1184
|
const itemType = event.item.type;
|
|
1177
|
-
const responseId = event.response_id;
|
|
1178
1185
|
|
|
1179
1186
|
if (itemType !== 'message') {
|
|
1180
|
-
//
|
|
1181
|
-
|
|
1187
|
+
// non-message items (e.g. function calls) don't need additional handling here
|
|
1188
|
+
// the generation event was already emitted in handleResponseCreated
|
|
1182
1189
|
this.textModeRecoveryRetries = 0;
|
|
1183
1190
|
return;
|
|
1184
1191
|
}
|
|
@@ -1236,6 +1243,9 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
1236
1243
|
throw new Error('item_id is not set');
|
|
1237
1244
|
}
|
|
1238
1245
|
|
|
1246
|
+
// Clean up audio-capable tracking for deleted items
|
|
1247
|
+
this.audioCapableItemIds.delete(event.item_id);
|
|
1248
|
+
|
|
1239
1249
|
try {
|
|
1240
1250
|
this.remoteChatCtx.delete(event.item_id);
|
|
1241
1251
|
} catch (error) {
|
|
@@ -1302,6 +1312,11 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
1302
1312
|
if (!itemGeneration.modalities.done) {
|
|
1303
1313
|
const modalityResult: Modality[] = isTextType ? ['text'] : ['audio', 'text'];
|
|
1304
1314
|
itemGeneration.modalities.resolve(modalityResult);
|
|
1315
|
+
|
|
1316
|
+
// Track items with real server-side audio for truncation eligibility
|
|
1317
|
+
if (!isTextType) {
|
|
1318
|
+
this.audioCapableItemIds.add(itemId);
|
|
1319
|
+
}
|
|
1305
1320
|
}
|
|
1306
1321
|
|
|
1307
1322
|
if (this.currentGeneration._firstTokenTimestamp === undefined) {
|
|
@@ -1598,33 +1613,10 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
1598
1613
|
|
|
1599
1614
|
return handle;
|
|
1600
1615
|
}
|
|
1601
|
-
|
|
1602
|
-
private resolveGeneration(responseId: string): void {
|
|
1603
|
-
if (!this.currentGeneration) {
|
|
1604
|
-
throw new Error('currentGeneration is not set');
|
|
1605
|
-
}
|
|
1606
|
-
|
|
1607
|
-
const generation_ev = {
|
|
1608
|
-
messageStream: this.currentGeneration.messageChannel.stream(),
|
|
1609
|
-
functionStream: this.currentGeneration.functionChannel.stream(),
|
|
1610
|
-
userInitiated: false,
|
|
1611
|
-
responseId,
|
|
1612
|
-
} as llm.GenerationCreatedEvent;
|
|
1613
|
-
|
|
1614
|
-
const handle = this.responseCreatedFutures[responseId];
|
|
1615
|
-
if (handle) {
|
|
1616
|
-
delete this.responseCreatedFutures[responseId];
|
|
1617
|
-
generation_ev.userInitiated = true;
|
|
1618
|
-
if (handle.doneFut.done) {
|
|
1619
|
-
this.#logger.warn({ responseId }, 'response received after timeout');
|
|
1620
|
-
} else {
|
|
1621
|
-
handle.doneFut.resolve(generation_ev);
|
|
1622
|
-
}
|
|
1623
|
-
}
|
|
1624
|
-
}
|
|
1625
1616
|
}
|
|
1626
1617
|
|
|
1627
|
-
|
|
1618
|
+
/** @internal Exported for testing purposes */
|
|
1619
|
+
export function livekitItemToOpenAIItem(item: llm.ChatItem): api_proto.ItemResource {
|
|
1628
1620
|
switch (item.type) {
|
|
1629
1621
|
case 'function_call':
|
|
1630
1622
|
return {
|
|
@@ -1647,9 +1639,9 @@ function livekitItemToOpenAIItem(item: llm.ChatItem): api_proto.ItemResource {
|
|
|
1647
1639
|
for (const c of item.content) {
|
|
1648
1640
|
if (typeof c === 'string') {
|
|
1649
1641
|
contentList.push({
|
|
1650
|
-
type: role === 'assistant' ? '
|
|
1642
|
+
type: role === 'assistant' ? 'output_text' : 'input_text',
|
|
1651
1643
|
text: c,
|
|
1652
|
-
} as api_proto.InputTextContent);
|
|
1644
|
+
} as api_proto.InputTextContent | api_proto.OutputTextContent);
|
|
1653
1645
|
} else if (c.type === 'image_content') {
|
|
1654
1646
|
// not supported for now
|
|
1655
1647
|
continue;
|
|
@@ -1668,7 +1660,7 @@ function livekitItemToOpenAIItem(item: llm.ChatItem): api_proto.ItemResource {
|
|
|
1668
1660
|
type: 'message',
|
|
1669
1661
|
role,
|
|
1670
1662
|
content: contentList,
|
|
1671
|
-
} as api_proto.UserItem;
|
|
1663
|
+
} as api_proto.UserItem | api_proto.AssistantItem | api_proto.SystemItem;
|
|
1672
1664
|
default:
|
|
1673
1665
|
throw new Error(`Unsupported item type: ${(item as any).type}`);
|
|
1674
1666
|
}
|
|
@@ -1090,16 +1090,11 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
1090
1090
|
throw new Error('item.type is not set');
|
|
1091
1091
|
}
|
|
1092
1092
|
|
|
1093
|
-
if (!event.response_id) {
|
|
1094
|
-
throw new Error('response_id is not set');
|
|
1095
|
-
}
|
|
1096
|
-
|
|
1097
1093
|
const itemType = event.item.type;
|
|
1098
|
-
const responseId = event.response_id;
|
|
1099
1094
|
|
|
1100
1095
|
if (itemType !== 'message') {
|
|
1101
|
-
//
|
|
1102
|
-
|
|
1096
|
+
// non-message items (e.g. function calls) don't need additional handling here
|
|
1097
|
+
// the generation event was already emitted in handleResponseCreated
|
|
1103
1098
|
this.textModeRecoveryRetries = 0;
|
|
1104
1099
|
return;
|
|
1105
1100
|
}
|
|
@@ -1518,30 +1513,6 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
1518
1513
|
|
|
1519
1514
|
return handle;
|
|
1520
1515
|
}
|
|
1521
|
-
|
|
1522
|
-
private resolveGeneration(responseId: string): void {
|
|
1523
|
-
if (!this.currentGeneration) {
|
|
1524
|
-
throw new Error('currentGeneration is not set');
|
|
1525
|
-
}
|
|
1526
|
-
|
|
1527
|
-
const generation_ev = {
|
|
1528
|
-
messageStream: this.currentGeneration.messageChannel.stream(),
|
|
1529
|
-
functionStream: this.currentGeneration.functionChannel.stream(),
|
|
1530
|
-
userInitiated: false,
|
|
1531
|
-
responseId,
|
|
1532
|
-
} as llm.GenerationCreatedEvent;
|
|
1533
|
-
|
|
1534
|
-
const handle = this.responseCreatedFutures[responseId];
|
|
1535
|
-
if (handle) {
|
|
1536
|
-
delete this.responseCreatedFutures[responseId];
|
|
1537
|
-
generation_ev.userInitiated = true;
|
|
1538
|
-
if (handle.doneFut.done) {
|
|
1539
|
-
this.#logger.warn({ responseId }, 'response received after timeout');
|
|
1540
|
-
} else {
|
|
1541
|
-
handle.doneFut.resolve(generation_ev);
|
|
1542
|
-
}
|
|
1543
|
-
}
|
|
1544
|
-
}
|
|
1545
1516
|
}
|
|
1546
1517
|
|
|
1547
1518
|
function livekitItemToOpenAIItem(item: llm.ChatItem): api_proto.ItemResource {
|
package/src/stt.ts
CHANGED