@livekit/agents-plugin-openai 1.0.24 → 1.0.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/stt.cjs +6 -9
- package/dist/stt.cjs.map +1 -1
- package/dist/stt.d.cts +1 -3
- package/dist/stt.d.ts +1 -3
- package/dist/stt.d.ts.map +1 -1
- package/dist/stt.js +6 -9
- package/dist/stt.js.map +1 -1
- package/dist/tts.cjs +44 -26
- package/dist/tts.cjs.map +1 -1
- package/dist/tts.d.cts +5 -3
- package/dist/tts.d.ts +5 -3
- package/dist/tts.d.ts.map +1 -1
- package/dist/tts.js +44 -26
- package/dist/tts.js.map +1 -1
- package/package.json +5 -5
- package/src/stt.ts +6 -10
- package/src/tts.ts +58 -29
package/dist/stt.cjs
CHANGED
|
@@ -33,7 +33,6 @@ class STT extends import_agents.stt.STT {
|
|
|
33
33
|
#opts;
|
|
34
34
|
#client;
|
|
35
35
|
label = "openai.STT";
|
|
36
|
-
abortController = new AbortController();
|
|
37
36
|
/**
|
|
38
37
|
* Create a new instance of OpenAI STT.
|
|
39
38
|
*
|
|
@@ -117,10 +116,11 @@ class STT extends import_agents.stt.STT {
|
|
|
117
116
|
header.writeUInt32LE(frame.data.byteLength, 40);
|
|
118
117
|
return Buffer.concat([header, Buffer.from(frame.data.buffer)]);
|
|
119
118
|
}
|
|
120
|
-
async _recognize(buffer,
|
|
121
|
-
const config = this.#sanitizeOptions(
|
|
119
|
+
async _recognize(buffer, abortSignal) {
|
|
120
|
+
const config = this.#sanitizeOptions();
|
|
122
121
|
buffer = (0, import_agents.mergeFrames)(buffer);
|
|
123
|
-
const
|
|
122
|
+
const wavBuffer = this.#createWav(buffer);
|
|
123
|
+
const file = new File([new Uint8Array(wavBuffer)], "audio.wav", { type: "audio/wav" });
|
|
124
124
|
const resp = await this.#client.audio.transcriptions.create(
|
|
125
125
|
{
|
|
126
126
|
file,
|
|
@@ -130,7 +130,7 @@ class STT extends import_agents.stt.STT {
|
|
|
130
130
|
response_format: "json"
|
|
131
131
|
},
|
|
132
132
|
{
|
|
133
|
-
signal:
|
|
133
|
+
signal: abortSignal
|
|
134
134
|
}
|
|
135
135
|
);
|
|
136
136
|
return {
|
|
@@ -138,7 +138,7 @@ class STT extends import_agents.stt.STT {
|
|
|
138
138
|
alternatives: [
|
|
139
139
|
{
|
|
140
140
|
text: resp.text || "",
|
|
141
|
-
language: language || "",
|
|
141
|
+
language: config.language || "",
|
|
142
142
|
startTime: 0,
|
|
143
143
|
endTime: 0,
|
|
144
144
|
confidence: 0
|
|
@@ -150,9 +150,6 @@ class STT extends import_agents.stt.STT {
|
|
|
150
150
|
stream() {
|
|
151
151
|
throw new Error("Streaming is not supported on OpenAI STT");
|
|
152
152
|
}
|
|
153
|
-
async close() {
|
|
154
|
-
this.abortController.abort();
|
|
155
|
-
}
|
|
156
153
|
}
|
|
157
154
|
// Annotate the CommonJS export names for ESM import in node:
|
|
158
155
|
0 && (module.exports = {
|
package/dist/stt.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioBuffer, mergeFrames, stt } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { OpenAI } from 'openai';\nimport type { GroqAudioModels, WhisperModels } from './models.js';\n\nexport interface STTOptions {\n apiKey?: string;\n language: string;\n prompt?: string;\n detectLanguage: boolean;\n model: WhisperModels | string;\n baseURL?: string;\n client?: OpenAI;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.OPENAI_API_KEY,\n language: 'en',\n detectLanguage: false,\n model: 'whisper-1',\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #client: OpenAI;\n label = 'openai.STT';\n
|
|
1
|
+
{"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioBuffer, mergeFrames, stt } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { OpenAI } from 'openai';\nimport type { GroqAudioModels, WhisperModels } from './models.js';\n\nexport interface STTOptions {\n apiKey?: string;\n language: string;\n prompt?: string;\n detectLanguage: boolean;\n model: WhisperModels | string;\n baseURL?: string;\n client?: OpenAI;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.OPENAI_API_KEY,\n language: 'en',\n detectLanguage: false,\n model: 'whisper-1',\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #client: OpenAI;\n label = 'openai.STT';\n\n /**\n * Create a new instance of OpenAI STT.\n *\n * @remarks\n * `apiKey` must be set to your OpenAI API key, either using the argument or by setting the\n * `OPENAI_API_KEY` environment variable.\n */\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({ streaming: false, interimResults: false });\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n if (this.#opts.apiKey === undefined) {\n throw new Error('OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY');\n }\n\n this.#client =\n this.#opts.client ||\n new OpenAI({\n baseURL: opts.baseURL,\n apiKey: opts.apiKey,\n });\n }\n\n /**\n * Create a new instance of Groq STT.\n *\n * @remarks\n * `apiKey` must be set to your Groq API key, either using the argument or by setting the\n * `GROQ_API_KEY` environment variable.\n */\n static withGroq(\n opts: Partial<{\n model: string | GroqAudioModels;\n apiKey?: string;\n baseURL?: string;\n client: OpenAI;\n language: string;\n detectLanguage: boolean;\n }> = {},\n ): STT {\n opts.apiKey = opts.apiKey || process.env.GROQ_API_KEY;\n if (opts.apiKey === undefined) {\n throw new Error('Groq API key is required, whether as an argument or as $GROQ_API_KEY');\n }\n\n return new STT({\n model: 'whisper-large-v3-turbo',\n baseURL: 'https://api.groq.com/openai/v1',\n ...opts,\n });\n }\n\n /**\n * Create a new instance of OVHcloud AI Endpoints STT.\n *\n * @remarks\n * `apiKey` must be set to your OVHcloud AI Endpoints API key, either using the argument or by setting the\n * `OVHCLOUD_API_KEY` environment variable.\n */\n static withOVHcloud(\n opts: Partial<{\n model: string;\n apiKey?: string;\n baseURL?: string;\n client: OpenAI;\n language: string;\n detectLanguage: boolean;\n }> = {},\n ): STT {\n opts.apiKey = opts.apiKey || process.env.OVHCLOUD_API_KEY;\n if (opts.apiKey === undefined) {\n throw new Error(\n 'OVHcloud AI Endpoints API key is required, whether as an argument or as $OVHCLOUD_API_KEY',\n );\n }\n\n return new STT({\n model: 'whisper-large-v3-turbo',\n baseURL: 'https://oai.endpoints.kepler.ai.cloud.ovh.net/v1',\n ...opts,\n });\n }\n\n #sanitizeOptions(language?: string): STTOptions {\n if (language) {\n return { ...this.#opts, language };\n } else {\n return this.#opts;\n }\n }\n\n #createWav(frame: AudioFrame): Buffer {\n const bitsPerSample = 16;\n const byteRate = (frame.sampleRate * frame.channels * bitsPerSample) / 8;\n const blockAlign = (frame.channels * bitsPerSample) / 8;\n\n const header = Buffer.alloc(44);\n header.write('RIFF', 0);\n header.writeUInt32LE(36 + frame.data.byteLength, 4);\n header.write('WAVE', 8);\n header.write('fmt ', 12);\n header.writeUInt32LE(16, 16);\n header.writeUInt16LE(1, 20);\n header.writeUInt16LE(frame.channels, 22);\n header.writeUInt32LE(frame.sampleRate, 24);\n header.writeUInt32LE(byteRate, 28);\n header.writeUInt16LE(blockAlign, 32);\n header.writeUInt16LE(16, 34);\n header.write('data', 36);\n header.writeUInt32LE(frame.data.byteLength, 40);\n return Buffer.concat([header, Buffer.from(frame.data.buffer)]);\n }\n\n async _recognize(buffer: AudioBuffer, abortSignal?: AbortSignal): Promise<stt.SpeechEvent> {\n const config = this.#sanitizeOptions();\n buffer = mergeFrames(buffer);\n const wavBuffer = this.#createWav(buffer);\n const file = new File([new Uint8Array(wavBuffer)], 'audio.wav', { type: 'audio/wav' });\n\n const resp = await this.#client.audio.transcriptions.create(\n {\n file,\n model: this.#opts.model,\n language: config.language,\n prompt: config.prompt,\n response_format: 'json',\n },\n {\n signal: abortSignal,\n },\n );\n\n return {\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [\n {\n text: resp.text || '',\n language: config.language || '',\n startTime: 0,\n endTime: 0,\n confidence: 0,\n },\n ],\n };\n }\n\n /** This method throws an error; streaming is unsupported on OpenAI STT. */\n stream(): stt.SpeechStream {\n throw new Error('Streaming is not supported on OpenAI STT');\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAAmD;AAEnD,oBAAuB;AAavB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,OAAO;AACT;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA;AAAA,EACA,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASR,YAAY,OAA4B,mBAAmB;AACzD,UAAM,EAAE,WAAW,OAAO,gBAAgB,MAAM,CAAC;AAEjD,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAC7C,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI,MAAM,0EAA0E;AAAA,IAC5F;AAEA,SAAK,UACH,KAAK,MAAM,UACX,IAAI,qBAAO;AAAA,MACT,SAAS,KAAK;AAAA,MACd,QAAQ,KAAK;AAAA,IACf,CAAC;AAAA,EACL;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,OAAO,SACL,OAOK,CAAC,GACD;AACL,SAAK,SAAS,KAAK,UAAU,QAAQ,IAAI;AACzC,QAAI,KAAK,WAAW,QAAW;AAC7B,YAAM,IAAI,MAAM,sEAAsE;AAAA,IACxF;AAEA,WAAO,IAAI,IAAI;AAAA,MACb,OAAO;AAAA,MACP,SAAS;AAAA,MACT,GAAG;AAAA,IACL,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,OAAO,aACL,OAOK,CAAC,GACD;AACL,SAAK,SAAS,KAAK,UAAU,QAAQ,IAAI;AACzC,QAAI,KAAK,WAAW,QAAW;AAC7B,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,WAAO,IAAI,IAAI;AAAA,MACb,OAAO;AAAA,MACP,SAAS;AAAA,MACT,GAAG;AAAA,IACL,CAAC;AAAA,EACH;AAAA,EAEA,iBAAiB,UAA+B;AAC9C,QAAI,UAAU;AACZ,aAAO,EAAE,GAAG,KAAK,OAAO,SAAS;AAAA,IACnC,OAAO;AACL,aAAO,KAAK;AAAA,IACd;AAAA,EACF;AAAA,EAEA,WAAW,OAA2B;AACpC,UAAM,gBAAgB;AACtB,UAAM,WAAY,MAAM,aAAa,MAAM,WAAW,gBAAiB;AACvE,UAAM,aAAc,MAAM,WAAW,gBAAiB;AAEtD,UAAM,SAAS,OAAO,MAAM,EAAE;AAC9B,WAAO,MAAM,QAAQ,CAAC;AACtB,WAAO,cAAc,KAAK,MAAM,KAAK,YAAY,CAAC;AAClD,WAAO,MAAM,QAAQ,CAAC;AACtB,WAAO,MAAM,QAAQ,EAAE;AACvB,WAAO,cAAc,IAAI,EAAE;AAC3B,WAAO,cAAc,GAAG,EAAE;AAC1B,WAAO,cAAc,MAAM,UAAU,EAAE;AACvC,WAAO,cAAc,MAAM,YAAY,EAAE;AACzC,WAAO,cAAc,UAAU,EAAE;AACjC,WAAO,cAAc,YAAY,EAAE;AACnC,WAAO,cAAc,IAAI,EAAE;AAC3B,WAAO,MAAM,QAAQ,EAAE;AACvB,WAAO,cAAc,MAAM,KAAK,YAAY,EAAE;AAC9C,WAAO,OAAO,OAAO,CAAC,QAAQ,OAAO,KAAK,MAAM,KAAK,MAAM,CAAC,CAAC;AAAA,EAC/D;AAAA,EAEA,MAAM,WAAW,QAAqB,aAAqD;AACzF,UAAM,SAAS,KAAK,iBAAiB;AACrC,iBAAS,2BAAY,MAAM;AAC3B,UAAM,YAAY,KAAK,WAAW,MAAM;AACxC,UAAM,OAAO,IAAI,KAAK,CAAC,IAAI,WAAW,SAAS,CAAC,GAAG,aAAa,EAAE,MAAM,YAAY,CAAC;AAErF,UAAM,OAAO,MAAM,KAAK,QAAQ,MAAM,eAAe;AAAA,MACnD;AAAA,QACE;AAAA,QACA,OAAO,KAAK,MAAM;AAAA,QAClB,UAAU,OAAO;AAAA,QACjB,QAAQ,OAAO;AAAA,QACf,iBAAiB;AAAA,MACnB;AAAA,MACA;AAAA,QACE,QAAQ;AAAA,MACV;AAAA,IACF;AAEA,WAAO;AAAA,MACL,MAAM,kBAAI,gBAAgB;AAAA,MAC1B,cAAc;AAAA,QACZ;AAAA,UACE,MAAM,KAAK,QAAQ;AAAA,UACnB,UAAU,OAAO,YAAY;AAAA,UAC7B,WAAW;AAAA,UACX,SAAS;AAAA,UACT,YAAY;AAAA,QACd;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA;AAAA,EAGA,SAA2B;AACzB,UAAM,IAAI,MAAM,0CAA0C;AAAA,EAC5D;AACF;","names":[]}
|
package/dist/stt.d.cts
CHANGED
|
@@ -13,7 +13,6 @@ export interface STTOptions {
|
|
|
13
13
|
export declare class STT extends stt.STT {
|
|
14
14
|
#private;
|
|
15
15
|
label: string;
|
|
16
|
-
private abortController;
|
|
17
16
|
/**
|
|
18
17
|
* Create a new instance of OpenAI STT.
|
|
19
18
|
*
|
|
@@ -52,9 +51,8 @@ export declare class STT extends stt.STT {
|
|
|
52
51
|
language: string;
|
|
53
52
|
detectLanguage: boolean;
|
|
54
53
|
}>): STT;
|
|
55
|
-
_recognize(buffer: AudioBuffer,
|
|
54
|
+
_recognize(buffer: AudioBuffer, abortSignal?: AbortSignal): Promise<stt.SpeechEvent>;
|
|
56
55
|
/** This method throws an error; streaming is unsupported on OpenAI STT. */
|
|
57
56
|
stream(): stt.SpeechStream;
|
|
58
|
-
close(): Promise<void>;
|
|
59
57
|
}
|
|
60
58
|
//# sourceMappingURL=stt.d.ts.map
|
package/dist/stt.d.ts
CHANGED
|
@@ -13,7 +13,6 @@ export interface STTOptions {
|
|
|
13
13
|
export declare class STT extends stt.STT {
|
|
14
14
|
#private;
|
|
15
15
|
label: string;
|
|
16
|
-
private abortController;
|
|
17
16
|
/**
|
|
18
17
|
* Create a new instance of OpenAI STT.
|
|
19
18
|
*
|
|
@@ -52,9 +51,8 @@ export declare class STT extends stt.STT {
|
|
|
52
51
|
language: string;
|
|
53
52
|
detectLanguage: boolean;
|
|
54
53
|
}>): STT;
|
|
55
|
-
_recognize(buffer: AudioBuffer,
|
|
54
|
+
_recognize(buffer: AudioBuffer, abortSignal?: AbortSignal): Promise<stt.SpeechEvent>;
|
|
56
55
|
/** This method throws an error; streaming is unsupported on OpenAI STT. */
|
|
57
56
|
stream(): stt.SpeechStream;
|
|
58
|
-
close(): Promise<void>;
|
|
59
57
|
}
|
|
60
58
|
//# sourceMappingURL=stt.d.ts.map
|
package/dist/stt.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../src/stt.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,KAAK,WAAW,EAAe,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAErE,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAChC,OAAO,KAAK,EAAE,eAAe,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAElE,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,cAAc,EAAE,OAAO,CAAC;IACxB,KAAK,EAAE,aAAa,GAAG,MAAM,CAAC;IAC9B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AASD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAG9B,KAAK,SAAgB;
|
|
1
|
+
{"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../src/stt.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,KAAK,WAAW,EAAe,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAErE,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAChC,OAAO,KAAK,EAAE,eAAe,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAElE,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,cAAc,EAAE,OAAO,CAAC;IACxB,KAAK,EAAE,aAAa,GAAG,MAAM,CAAC;IAC9B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AASD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAG9B,KAAK,SAAgB;IAErB;;;;;;OAMG;gBACS,IAAI,GAAE,OAAO,CAAC,UAAU,CAAqB;IAgBzD;;;;;;OAMG;IACH,MAAM,CAAC,QAAQ,CACb,IAAI,GAAE,OAAO,CAAC;QACZ,KAAK,EAAE,MAAM,GAAG,eAAe,CAAC;QAChC,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,MAAM,EAAE,MAAM,CAAC;QACf,QAAQ,EAAE,MAAM,CAAC;QACjB,cAAc,EAAE,OAAO,CAAC;KACzB,CAAM,GACN,GAAG;IAaN;;;;;;OAMG;IACH,MAAM,CAAC,YAAY,CACjB,IAAI,GAAE,OAAO,CAAC;QACZ,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,MAAM,EAAE,MAAM,CAAC;QACf,QAAQ,EAAE,MAAM,CAAC;QACjB,cAAc,EAAE,OAAO,CAAC;KACzB,CAAM,GACN,GAAG;IA6CA,UAAU,CAAC,MAAM,EAAE,WAAW,EAAE,WAAW,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;IAiC1F,2EAA2E;IAC3E,MAAM,IAAI,GAAG,CAAC,YAAY;CAG3B"}
|
package/dist/stt.js
CHANGED
|
@@ -10,7 +10,6 @@ class STT extends stt.STT {
|
|
|
10
10
|
#opts;
|
|
11
11
|
#client;
|
|
12
12
|
label = "openai.STT";
|
|
13
|
-
abortController = new AbortController();
|
|
14
13
|
/**
|
|
15
14
|
* Create a new instance of OpenAI STT.
|
|
16
15
|
*
|
|
@@ -94,10 +93,11 @@ class STT extends stt.STT {
|
|
|
94
93
|
header.writeUInt32LE(frame.data.byteLength, 40);
|
|
95
94
|
return Buffer.concat([header, Buffer.from(frame.data.buffer)]);
|
|
96
95
|
}
|
|
97
|
-
async _recognize(buffer,
|
|
98
|
-
const config = this.#sanitizeOptions(
|
|
96
|
+
async _recognize(buffer, abortSignal) {
|
|
97
|
+
const config = this.#sanitizeOptions();
|
|
99
98
|
buffer = mergeFrames(buffer);
|
|
100
|
-
const
|
|
99
|
+
const wavBuffer = this.#createWav(buffer);
|
|
100
|
+
const file = new File([new Uint8Array(wavBuffer)], "audio.wav", { type: "audio/wav" });
|
|
101
101
|
const resp = await this.#client.audio.transcriptions.create(
|
|
102
102
|
{
|
|
103
103
|
file,
|
|
@@ -107,7 +107,7 @@ class STT extends stt.STT {
|
|
|
107
107
|
response_format: "json"
|
|
108
108
|
},
|
|
109
109
|
{
|
|
110
|
-
signal:
|
|
110
|
+
signal: abortSignal
|
|
111
111
|
}
|
|
112
112
|
);
|
|
113
113
|
return {
|
|
@@ -115,7 +115,7 @@ class STT extends stt.STT {
|
|
|
115
115
|
alternatives: [
|
|
116
116
|
{
|
|
117
117
|
text: resp.text || "",
|
|
118
|
-
language: language || "",
|
|
118
|
+
language: config.language || "",
|
|
119
119
|
startTime: 0,
|
|
120
120
|
endTime: 0,
|
|
121
121
|
confidence: 0
|
|
@@ -127,9 +127,6 @@ class STT extends stt.STT {
|
|
|
127
127
|
stream() {
|
|
128
128
|
throw new Error("Streaming is not supported on OpenAI STT");
|
|
129
129
|
}
|
|
130
|
-
async close() {
|
|
131
|
-
this.abortController.abort();
|
|
132
|
-
}
|
|
133
130
|
}
|
|
134
131
|
export {
|
|
135
132
|
STT
|
package/dist/stt.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioBuffer, mergeFrames, stt } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { OpenAI } from 'openai';\nimport type { GroqAudioModels, WhisperModels } from './models.js';\n\nexport interface STTOptions {\n apiKey?: string;\n language: string;\n prompt?: string;\n detectLanguage: boolean;\n model: WhisperModels | string;\n baseURL?: string;\n client?: OpenAI;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.OPENAI_API_KEY,\n language: 'en',\n detectLanguage: false,\n model: 'whisper-1',\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #client: OpenAI;\n label = 'openai.STT';\n
|
|
1
|
+
{"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioBuffer, mergeFrames, stt } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { OpenAI } from 'openai';\nimport type { GroqAudioModels, WhisperModels } from './models.js';\n\nexport interface STTOptions {\n apiKey?: string;\n language: string;\n prompt?: string;\n detectLanguage: boolean;\n model: WhisperModels | string;\n baseURL?: string;\n client?: OpenAI;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.OPENAI_API_KEY,\n language: 'en',\n detectLanguage: false,\n model: 'whisper-1',\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #client: OpenAI;\n label = 'openai.STT';\n\n /**\n * Create a new instance of OpenAI STT.\n *\n * @remarks\n * `apiKey` must be set to your OpenAI API key, either using the argument or by setting the\n * `OPENAI_API_KEY` environment variable.\n */\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({ streaming: false, interimResults: false });\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n if (this.#opts.apiKey === undefined) {\n throw new Error('OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY');\n }\n\n this.#client =\n this.#opts.client ||\n new OpenAI({\n baseURL: opts.baseURL,\n apiKey: opts.apiKey,\n });\n }\n\n /**\n * Create a new instance of Groq STT.\n *\n * @remarks\n * `apiKey` must be set to your Groq API key, either using the argument or by setting the\n * `GROQ_API_KEY` environment variable.\n */\n static withGroq(\n opts: Partial<{\n model: string | GroqAudioModels;\n apiKey?: string;\n baseURL?: string;\n client: OpenAI;\n language: string;\n detectLanguage: boolean;\n }> = {},\n ): STT {\n opts.apiKey = opts.apiKey || process.env.GROQ_API_KEY;\n if (opts.apiKey === undefined) {\n throw new Error('Groq API key is required, whether as an argument or as $GROQ_API_KEY');\n }\n\n return new STT({\n model: 'whisper-large-v3-turbo',\n baseURL: 'https://api.groq.com/openai/v1',\n ...opts,\n });\n }\n\n /**\n * Create a new instance of OVHcloud AI Endpoints STT.\n *\n * @remarks\n * `apiKey` must be set to your OVHcloud AI Endpoints API key, either using the argument or by setting the\n * `OVHCLOUD_API_KEY` environment variable.\n */\n static withOVHcloud(\n opts: Partial<{\n model: string;\n apiKey?: string;\n baseURL?: string;\n client: OpenAI;\n language: string;\n detectLanguage: boolean;\n }> = {},\n ): STT {\n opts.apiKey = opts.apiKey || process.env.OVHCLOUD_API_KEY;\n if (opts.apiKey === undefined) {\n throw new Error(\n 'OVHcloud AI Endpoints API key is required, whether as an argument or as $OVHCLOUD_API_KEY',\n );\n }\n\n return new STT({\n model: 'whisper-large-v3-turbo',\n baseURL: 'https://oai.endpoints.kepler.ai.cloud.ovh.net/v1',\n ...opts,\n });\n }\n\n #sanitizeOptions(language?: string): STTOptions {\n if (language) {\n return { ...this.#opts, language };\n } else {\n return this.#opts;\n }\n }\n\n #createWav(frame: AudioFrame): Buffer {\n const bitsPerSample = 16;\n const byteRate = (frame.sampleRate * frame.channels * bitsPerSample) / 8;\n const blockAlign = (frame.channels * bitsPerSample) / 8;\n\n const header = Buffer.alloc(44);\n header.write('RIFF', 0);\n header.writeUInt32LE(36 + frame.data.byteLength, 4);\n header.write('WAVE', 8);\n header.write('fmt ', 12);\n header.writeUInt32LE(16, 16);\n header.writeUInt16LE(1, 20);\n header.writeUInt16LE(frame.channels, 22);\n header.writeUInt32LE(frame.sampleRate, 24);\n header.writeUInt32LE(byteRate, 28);\n header.writeUInt16LE(blockAlign, 32);\n header.writeUInt16LE(16, 34);\n header.write('data', 36);\n header.writeUInt32LE(frame.data.byteLength, 40);\n return Buffer.concat([header, Buffer.from(frame.data.buffer)]);\n }\n\n async _recognize(buffer: AudioBuffer, abortSignal?: AbortSignal): Promise<stt.SpeechEvent> {\n const config = this.#sanitizeOptions();\n buffer = mergeFrames(buffer);\n const wavBuffer = this.#createWav(buffer);\n const file = new File([new Uint8Array(wavBuffer)], 'audio.wav', { type: 'audio/wav' });\n\n const resp = await this.#client.audio.transcriptions.create(\n {\n file,\n model: this.#opts.model,\n language: config.language,\n prompt: config.prompt,\n response_format: 'json',\n },\n {\n signal: abortSignal,\n },\n );\n\n return {\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [\n {\n text: resp.text || '',\n language: config.language || '',\n startTime: 0,\n endTime: 0,\n confidence: 0,\n },\n ],\n };\n }\n\n /** This method throws an error; streaming is unsupported on OpenAI STT. */\n stream(): stt.SpeechStream {\n throw new Error('Streaming is not supported on OpenAI STT');\n }\n}\n"],"mappings":"AAGA,SAA2B,aAAa,WAAW;AAEnD,SAAS,cAAc;AAavB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,OAAO;AACT;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA;AAAA,EACA,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASR,YAAY,OAA4B,mBAAmB;AACzD,UAAM,EAAE,WAAW,OAAO,gBAAgB,MAAM,CAAC;AAEjD,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAC7C,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI,MAAM,0EAA0E;AAAA,IAC5F;AAEA,SAAK,UACH,KAAK,MAAM,UACX,IAAI,OAAO;AAAA,MACT,SAAS,KAAK;AAAA,MACd,QAAQ,KAAK;AAAA,IACf,CAAC;AAAA,EACL;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,OAAO,SACL,OAOK,CAAC,GACD;AACL,SAAK,SAAS,KAAK,UAAU,QAAQ,IAAI;AACzC,QAAI,KAAK,WAAW,QAAW;AAC7B,YAAM,IAAI,MAAM,sEAAsE;AAAA,IACxF;AAEA,WAAO,IAAI,IAAI;AAAA,MACb,OAAO;AAAA,MACP,SAAS;AAAA,MACT,GAAG;AAAA,IACL,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,OAAO,aACL,OAOK,CAAC,GACD;AACL,SAAK,SAAS,KAAK,UAAU,QAAQ,IAAI;AACzC,QAAI,KAAK,WAAW,QAAW;AAC7B,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,WAAO,IAAI,IAAI;AAAA,MACb,OAAO;AAAA,MACP,SAAS;AAAA,MACT,GAAG;AAAA,IACL,CAAC;AAAA,EACH;AAAA,EAEA,iBAAiB,UAA+B;AAC9C,QAAI,UAAU;AACZ,aAAO,EAAE,GAAG,KAAK,OAAO,SAAS;AAAA,IACnC,OAAO;AACL,aAAO,KAAK;AAAA,IACd;AAAA,EACF;AAAA,EAEA,WAAW,OAA2B;AACpC,UAAM,gBAAgB;AACtB,UAAM,WAAY,MAAM,aAAa,MAAM,WAAW,gBAAiB;AACvE,UAAM,aAAc,MAAM,WAAW,gBAAiB;AAEtD,UAAM,SAAS,OAAO,MAAM,EAAE;AAC9B,WAAO,MAAM,QAAQ,CAAC;AACtB,WAAO,cAAc,KAAK,MAAM,KAAK,YAAY,CAAC;AAClD,WAAO,MAAM,QAAQ,CAAC;AACtB,WAAO,MAAM,QAAQ,EAAE;AACvB,WAAO,cAAc,IAAI,EAAE;AAC3B,WAAO,cAAc,GAAG,EAAE;AAC1B,WAAO,cAAc,MAAM,UAAU,EAAE;AACvC,WAAO,cAAc,MAAM,YAAY,EAAE;AACzC,WAAO,cAAc,UAAU,EAAE;AACjC,WAAO,cAAc,YAAY,EAAE;AACnC,WAAO,cAAc,IAAI,EAAE;AAC3B,WAAO,MAAM,QAAQ,EAAE;AACvB,WAAO,cAAc,MAAM,KAAK,YAAY,EAAE;AAC9C,WAAO,OAAO,OAAO,CAAC,QAAQ,OAAO,KAAK,MAAM,KAAK,MAAM,CAAC,CAAC;AAAA,EAC/D;AAAA,EAEA,MAAM,WAAW,QAAqB,aAAqD;AACzF,UAAM,SAAS,KAAK,iBAAiB;AACrC,aAAS,YAAY,MAAM;AAC3B,UAAM,YAAY,KAAK,WAAW,MAAM;AACxC,UAAM,OAAO,IAAI,KAAK,CAAC,IAAI,WAAW,SAAS,CAAC,GAAG,aAAa,EAAE,MAAM,YAAY,CAAC;AAErF,UAAM,OAAO,MAAM,KAAK,QAAQ,MAAM,eAAe;AAAA,MACnD;AAAA,QACE;AAAA,QACA,OAAO,KAAK,MAAM;AAAA,QAClB,UAAU,OAAO;AAAA,QACjB,QAAQ,OAAO;AAAA,QACf,iBAAiB;AAAA,MACnB;AAAA,MACA;AAAA,QACE,QAAQ;AAAA,MACV;AAAA,IACF;AAEA,WAAO;AAAA,MACL,MAAM,IAAI,gBAAgB;AAAA,MAC1B,cAAc;AAAA,QACZ;AAAA,UACE,MAAM,KAAK,QAAQ;AAAA,UACnB,UAAU,OAAO,YAAY;AAAA,UAC7B,WAAW;AAAA,UACX,SAAS;AAAA,UACT,YAAY;AAAA,QACd;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA;AAAA,EAGA,SAA2B;AACzB,UAAM,IAAI,MAAM,0CAA0C;AAAA,EAC5D;AACF;","names":[]}
|
package/dist/tts.cjs
CHANGED
|
@@ -36,6 +36,7 @@ class TTS extends import_agents.tts.TTS {
|
|
|
36
36
|
#opts;
|
|
37
37
|
#client;
|
|
38
38
|
label = "openai.TTS";
|
|
39
|
+
abortController = new AbortController();
|
|
39
40
|
/**
|
|
40
41
|
* Create a new instance of OpenAI TTS.
|
|
41
42
|
*
|
|
@@ -57,50 +58,67 @@ class TTS extends import_agents.tts.TTS {
|
|
|
57
58
|
updateOptions(opts) {
|
|
58
59
|
this.#opts = { ...this.#opts, ...opts };
|
|
59
60
|
}
|
|
60
|
-
synthesize(text) {
|
|
61
|
+
synthesize(text, connOptions, abortSignal) {
|
|
61
62
|
return new ChunkedStream(
|
|
62
63
|
this,
|
|
63
64
|
text,
|
|
64
|
-
this.#client.audio.speech.create(
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
65
|
+
this.#client.audio.speech.create(
|
|
66
|
+
{
|
|
67
|
+
input: text,
|
|
68
|
+
model: this.#opts.model,
|
|
69
|
+
voice: this.#opts.voice,
|
|
70
|
+
instructions: this.#opts.instructions,
|
|
71
|
+
response_format: "pcm",
|
|
72
|
+
speed: this.#opts.speed
|
|
73
|
+
},
|
|
74
|
+
{ signal: abortSignal }
|
|
75
|
+
),
|
|
76
|
+
connOptions,
|
|
77
|
+
abortSignal
|
|
72
78
|
);
|
|
73
79
|
}
|
|
74
80
|
stream() {
|
|
75
81
|
throw new Error("Streaming is not supported on OpenAI TTS");
|
|
76
82
|
}
|
|
83
|
+
async close() {
|
|
84
|
+
this.abortController.abort();
|
|
85
|
+
}
|
|
77
86
|
}
|
|
78
87
|
class ChunkedStream extends import_agents.tts.ChunkedStream {
|
|
79
88
|
label = "openai.ChunkedStream";
|
|
80
89
|
stream;
|
|
81
90
|
// set Promise<T> to any because OpenAI returns an annoying Response type
|
|
82
|
-
constructor(tts2, text, stream) {
|
|
83
|
-
super(text, tts2);
|
|
91
|
+
constructor(tts2, text, stream, connOptions, abortSignal) {
|
|
92
|
+
super(text, tts2, connOptions, abortSignal);
|
|
84
93
|
this.stream = stream;
|
|
85
94
|
}
|
|
86
95
|
async run() {
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
+
try {
|
|
97
|
+
const buffer = await this.stream.then((r) => r.arrayBuffer());
|
|
98
|
+
const requestId = (0, import_agents.shortuuid)();
|
|
99
|
+
const audioByteStream = new import_agents.AudioByteStream(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS);
|
|
100
|
+
const frames = audioByteStream.write(buffer);
|
|
101
|
+
let lastFrame;
|
|
102
|
+
const sendLastFrame = (segmentId, final) => {
|
|
103
|
+
if (lastFrame) {
|
|
104
|
+
this.queue.put({ requestId, segmentId, frame: lastFrame, final });
|
|
105
|
+
lastFrame = void 0;
|
|
106
|
+
}
|
|
107
|
+
};
|
|
108
|
+
for (const frame of frames) {
|
|
109
|
+
sendLastFrame(requestId, false);
|
|
110
|
+
lastFrame = frame;
|
|
111
|
+
}
|
|
112
|
+
sendLastFrame(requestId, true);
|
|
113
|
+
this.queue.close();
|
|
114
|
+
} catch (error) {
|
|
115
|
+
if (error instanceof Error && error.name === "AbortError") {
|
|
116
|
+
return;
|
|
96
117
|
}
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
lastFrame = frame;
|
|
118
|
+
throw error;
|
|
119
|
+
} finally {
|
|
120
|
+
this.queue.close();
|
|
101
121
|
}
|
|
102
|
-
sendLastFrame(requestId, true);
|
|
103
|
-
this.queue.close();
|
|
104
122
|
}
|
|
105
123
|
}
|
|
106
124
|
// Annotate the CommonJS export names for ESM import in node:
|
package/dist/tts.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioByteStream, shortuuid, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { OpenAI } from 'openai';\nimport type { TTSModels, TTSVoices } from './models.js';\n\nconst OPENAI_TTS_SAMPLE_RATE = 24000;\nconst OPENAI_TTS_CHANNELS = 1;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n voice: TTSVoices;\n speed: number;\n instructions?: string;\n baseURL?: string;\n client?: OpenAI;\n apiKey?: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.OPENAI_API_KEY,\n model: 'tts-1',\n voice: 'alloy',\n speed: 1,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n #client: OpenAI;\n label = 'openai.TTS';\n\n /**\n * Create a new instance of OpenAI TTS.\n *\n * @remarks\n * `apiKey` must be set to your OpenAI API key, either using the argument or by setting the\n * `OPENAI_API_KEY` environment variable.\n */\n constructor(opts: Partial<TTSOptions> = defaultTTSOptions) {\n super(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS, { streaming: false });\n\n this.#opts = { ...defaultTTSOptions, ...opts };\n if (this.#opts.apiKey === undefined) {\n throw new Error('OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY');\n }\n\n this.#client =\n this.#opts.client ||\n new OpenAI({\n baseURL: opts.baseURL,\n apiKey: opts.apiKey,\n });\n }\n\n updateOptions(opts: { model?: TTSModels | string; voice?: TTSVoices; speed?: number }) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n synthesize(text: string): ChunkedStream {\n return new ChunkedStream(\n this,\n text,\n this.#client.audio.speech.create({\n
|
|
1
|
+
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type APIConnectOptions, AudioByteStream, shortuuid, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { OpenAI } from 'openai';\nimport type { TTSModels, TTSVoices } from './models.js';\n\nconst OPENAI_TTS_SAMPLE_RATE = 24000;\nconst OPENAI_TTS_CHANNELS = 1;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n voice: TTSVoices;\n speed: number;\n instructions?: string;\n baseURL?: string;\n client?: OpenAI;\n apiKey?: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.OPENAI_API_KEY,\n model: 'tts-1',\n voice: 'alloy',\n speed: 1,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n #client: OpenAI;\n label = 'openai.TTS';\n private abortController = new AbortController();\n\n /**\n * Create a new instance of OpenAI TTS.\n *\n * @remarks\n * `apiKey` must be set to your OpenAI API key, either using the argument or by setting the\n * `OPENAI_API_KEY` environment variable.\n */\n constructor(opts: Partial<TTSOptions> = defaultTTSOptions) {\n super(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS, { streaming: false });\n\n this.#opts = { ...defaultTTSOptions, ...opts };\n if (this.#opts.apiKey === undefined) {\n throw new Error('OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY');\n }\n\n this.#client =\n this.#opts.client ||\n new OpenAI({\n baseURL: opts.baseURL,\n apiKey: opts.apiKey,\n });\n }\n\n updateOptions(opts: { model?: TTSModels | string; voice?: TTSVoices; speed?: number }) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): ChunkedStream {\n return new ChunkedStream(\n this,\n text,\n this.#client.audio.speech.create(\n {\n input: text,\n model: this.#opts.model,\n voice: this.#opts.voice,\n instructions: this.#opts.instructions,\n response_format: 'pcm',\n speed: this.#opts.speed,\n },\n { signal: abortSignal },\n ),\n connOptions,\n abortSignal,\n );\n }\n\n stream(): tts.SynthesizeStream {\n throw new Error('Streaming is not supported on OpenAI TTS');\n }\n\n async close(): Promise<void> {\n this.abortController.abort();\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'openai.ChunkedStream';\n private stream: Promise<any>;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(\n tts: TTS,\n text: string,\n stream: Promise<any>,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.stream = stream;\n }\n\n protected async run() {\n try {\n const buffer = await this.stream.then((r) => r.arrayBuffer());\n const requestId = shortuuid();\n const audioByteStream = new AudioByteStream(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS);\n const frames = audioByteStream.write(buffer);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n for (const frame of frames) {\n sendLastFrame(requestId, false);\n lastFrame = frame;\n }\n sendLastFrame(requestId, true);\n\n this.queue.close();\n } catch (error) {\n if (error instanceof Error && error.name === 'AbortError') {\n return;\n }\n throw error;\n } finally {\n this.queue.close();\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAAwE;AAExE,oBAAuB;AAGvB,MAAM,yBAAyB;AAC/B,MAAM,sBAAsB;AAY5B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,OAAO;AAAA,EACP,OAAO;AACT;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA;AAAA,EACA,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAS9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM,wBAAwB,qBAAqB,EAAE,WAAW,MAAM,CAAC;AAEvE,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAC7C,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI,MAAM,0EAA0E;AAAA,IAC5F;AAEA,SAAK,UACH,KAAK,MAAM,UACX,IAAI,qBAAO;AAAA,MACT,SAAS,KAAK;AAAA,MACd,QAAQ,KAAK;AAAA,IACf,CAAC;AAAA,EACL;AAAA,EAEA,cAAc,MAAyE;AACrF,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,WACE,MACA,aACA,aACe;AACf,WAAO,IAAI;AAAA,MACT;AAAA,MACA;AAAA,MACA,KAAK,QAAQ,MAAM,OAAO;AAAA,QACxB;AAAA,UACE,OAAO;AAAA,UACP,OAAO,KAAK,MAAM;AAAA,UAClB,OAAO,KAAK,MAAM;AAAA,UAClB,cAAc,KAAK,MAAM;AAAA,UACzB,iBAAiB;AAAA,UACjB,OAAO,KAAK,MAAM;AAAA,QACpB;AAAA,QACA,EAAE,QAAQ,YAAY;AAAA,MACxB;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA,EAEA,SAA+B;AAC7B,UAAM,IAAI,MAAM,0CAA0C;AAAA,EAC5D;AAAA,EAEA,MAAM,QAAuB;AAC3B,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,sBAAsB,kBAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACA;AAAA;AAAA,EAGR,YACEA,MACA,MACA,QACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAgB,MAAM;AACpB,QAAI;AACF,YAAM,SAAS,MAAM,KAAK,OAAO,KAAK,CAAC,MAAM,EAAE,YAAY,CAAC;AAC5D,YAAM,gBAAY,yBAAU;AAC5B,YAAM,kBAAkB,IAAI,8BAAgB,wBAAwB,mBAAmB;AACvF,YAAM,SAAS,gBAAgB,MAAM,MAAM;AAE3C,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,WAAW;AACb,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,iBAAW,SAAS,QAAQ;AAC1B,sBAAc,WAAW,KAAK;AAC9B,oBAAY;AAAA,MACd;AACA,oBAAc,WAAW,IAAI;AAE7B,WAAK,MAAM,MAAM;AAAA,IACnB,SAAS,OAAO;AACd,UAAI,iBAAiB,SAAS,MAAM,SAAS,cAAc;AACzD;AAAA,MACF;AACA,YAAM;AAAA,IACR,UAAE;AACA,WAAK,MAAM,MAAM;AAAA,IACnB;AAAA,EACF;AACF;","names":["tts"]}
|
package/dist/tts.d.cts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { tts } from '@livekit/agents';
|
|
1
|
+
import { type APIConnectOptions, tts } from '@livekit/agents';
|
|
2
2
|
import { OpenAI } from 'openai';
|
|
3
3
|
import type { TTSModels, TTSVoices } from './models.js';
|
|
4
4
|
export interface TTSOptions {
|
|
@@ -13,6 +13,7 @@ export interface TTSOptions {
|
|
|
13
13
|
export declare class TTS extends tts.TTS {
|
|
14
14
|
#private;
|
|
15
15
|
label: string;
|
|
16
|
+
private abortController;
|
|
16
17
|
/**
|
|
17
18
|
* Create a new instance of OpenAI TTS.
|
|
18
19
|
*
|
|
@@ -26,13 +27,14 @@ export declare class TTS extends tts.TTS {
|
|
|
26
27
|
voice?: TTSVoices;
|
|
27
28
|
speed?: number;
|
|
28
29
|
}): void;
|
|
29
|
-
synthesize(text: string): ChunkedStream;
|
|
30
|
+
synthesize(text: string, connOptions?: APIConnectOptions, abortSignal?: AbortSignal): ChunkedStream;
|
|
30
31
|
stream(): tts.SynthesizeStream;
|
|
32
|
+
close(): Promise<void>;
|
|
31
33
|
}
|
|
32
34
|
export declare class ChunkedStream extends tts.ChunkedStream {
|
|
33
35
|
label: string;
|
|
34
36
|
private stream;
|
|
35
|
-
constructor(tts: TTS, text: string, stream: Promise<any
|
|
37
|
+
constructor(tts: TTS, text: string, stream: Promise<any>, connOptions?: APIConnectOptions, abortSignal?: AbortSignal);
|
|
36
38
|
protected run(): Promise<void>;
|
|
37
39
|
}
|
|
38
40
|
//# sourceMappingURL=tts.d.ts.map
|
package/dist/tts.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { tts } from '@livekit/agents';
|
|
1
|
+
import { type APIConnectOptions, tts } from '@livekit/agents';
|
|
2
2
|
import { OpenAI } from 'openai';
|
|
3
3
|
import type { TTSModels, TTSVoices } from './models.js';
|
|
4
4
|
export interface TTSOptions {
|
|
@@ -13,6 +13,7 @@ export interface TTSOptions {
|
|
|
13
13
|
export declare class TTS extends tts.TTS {
|
|
14
14
|
#private;
|
|
15
15
|
label: string;
|
|
16
|
+
private abortController;
|
|
16
17
|
/**
|
|
17
18
|
* Create a new instance of OpenAI TTS.
|
|
18
19
|
*
|
|
@@ -26,13 +27,14 @@ export declare class TTS extends tts.TTS {
|
|
|
26
27
|
voice?: TTSVoices;
|
|
27
28
|
speed?: number;
|
|
28
29
|
}): void;
|
|
29
|
-
synthesize(text: string): ChunkedStream;
|
|
30
|
+
synthesize(text: string, connOptions?: APIConnectOptions, abortSignal?: AbortSignal): ChunkedStream;
|
|
30
31
|
stream(): tts.SynthesizeStream;
|
|
32
|
+
close(): Promise<void>;
|
|
31
33
|
}
|
|
32
34
|
export declare class ChunkedStream extends tts.ChunkedStream {
|
|
33
35
|
label: string;
|
|
34
36
|
private stream;
|
|
35
|
-
constructor(tts: TTS, text: string, stream: Promise<any
|
|
37
|
+
constructor(tts: TTS, text: string, stream: Promise<any>, connOptions?: APIConnectOptions, abortSignal?: AbortSignal);
|
|
36
38
|
protected run(): Promise<void>;
|
|
37
39
|
}
|
|
38
40
|
//# sourceMappingURL=tts.d.ts.map
|
package/dist/tts.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAGA,OAAO,EAA8B,GAAG,EAAE,MAAM,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,KAAK,iBAAiB,EAA8B,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAE1F,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAChC,OAAO,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAKxD,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,SAAS,GAAG,MAAM,CAAC;IAC1B,KAAK,EAAE,SAAS,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AASD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAG9B,KAAK,SAAgB;IACrB,OAAO,CAAC,eAAe,CAAyB;IAEhD;;;;;;OAMG;gBACS,IAAI,GAAE,OAAO,CAAC,UAAU,CAAqB;IAgBzD,aAAa,CAAC,IAAI,EAAE;QAAE,KAAK,CAAC,EAAE,SAAS,GAAG,MAAM,CAAC;QAAC,KAAK,CAAC,EAAE,SAAS,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,CAAA;KAAE;IAIrF,UAAU,CACR,IAAI,EAAE,MAAM,EACZ,WAAW,CAAC,EAAE,iBAAiB,EAC/B,WAAW,CAAC,EAAE,WAAW,GACxB,aAAa;IAoBhB,MAAM,IAAI,GAAG,CAAC,gBAAgB;IAIxB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAG7B;AAED,qBAAa,aAAc,SAAQ,GAAG,CAAC,aAAa;IAClD,KAAK,SAA0B;IAC/B,OAAO,CAAC,MAAM,CAAe;gBAI3B,GAAG,EAAE,GAAG,EACR,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,EACpB,WAAW,CAAC,EAAE,iBAAiB,EAC/B,WAAW,CAAC,EAAE,WAAW;cAMX,GAAG;CA+BpB"}
|
package/dist/tts.js
CHANGED
|
@@ -12,6 +12,7 @@ class TTS extends tts.TTS {
|
|
|
12
12
|
#opts;
|
|
13
13
|
#client;
|
|
14
14
|
label = "openai.TTS";
|
|
15
|
+
abortController = new AbortController();
|
|
15
16
|
/**
|
|
16
17
|
* Create a new instance of OpenAI TTS.
|
|
17
18
|
*
|
|
@@ -33,50 +34,67 @@ class TTS extends tts.TTS {
|
|
|
33
34
|
updateOptions(opts) {
|
|
34
35
|
this.#opts = { ...this.#opts, ...opts };
|
|
35
36
|
}
|
|
36
|
-
synthesize(text) {
|
|
37
|
+
synthesize(text, connOptions, abortSignal) {
|
|
37
38
|
return new ChunkedStream(
|
|
38
39
|
this,
|
|
39
40
|
text,
|
|
40
|
-
this.#client.audio.speech.create(
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
41
|
+
this.#client.audio.speech.create(
|
|
42
|
+
{
|
|
43
|
+
input: text,
|
|
44
|
+
model: this.#opts.model,
|
|
45
|
+
voice: this.#opts.voice,
|
|
46
|
+
instructions: this.#opts.instructions,
|
|
47
|
+
response_format: "pcm",
|
|
48
|
+
speed: this.#opts.speed
|
|
49
|
+
},
|
|
50
|
+
{ signal: abortSignal }
|
|
51
|
+
),
|
|
52
|
+
connOptions,
|
|
53
|
+
abortSignal
|
|
48
54
|
);
|
|
49
55
|
}
|
|
50
56
|
stream() {
|
|
51
57
|
throw new Error("Streaming is not supported on OpenAI TTS");
|
|
52
58
|
}
|
|
59
|
+
async close() {
|
|
60
|
+
this.abortController.abort();
|
|
61
|
+
}
|
|
53
62
|
}
|
|
54
63
|
class ChunkedStream extends tts.ChunkedStream {
|
|
55
64
|
label = "openai.ChunkedStream";
|
|
56
65
|
stream;
|
|
57
66
|
// set Promise<T> to any because OpenAI returns an annoying Response type
|
|
58
|
-
constructor(tts2, text, stream) {
|
|
59
|
-
super(text, tts2);
|
|
67
|
+
constructor(tts2, text, stream, connOptions, abortSignal) {
|
|
68
|
+
super(text, tts2, connOptions, abortSignal);
|
|
60
69
|
this.stream = stream;
|
|
61
70
|
}
|
|
62
71
|
async run() {
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
+
try {
|
|
73
|
+
const buffer = await this.stream.then((r) => r.arrayBuffer());
|
|
74
|
+
const requestId = shortuuid();
|
|
75
|
+
const audioByteStream = new AudioByteStream(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS);
|
|
76
|
+
const frames = audioByteStream.write(buffer);
|
|
77
|
+
let lastFrame;
|
|
78
|
+
const sendLastFrame = (segmentId, final) => {
|
|
79
|
+
if (lastFrame) {
|
|
80
|
+
this.queue.put({ requestId, segmentId, frame: lastFrame, final });
|
|
81
|
+
lastFrame = void 0;
|
|
82
|
+
}
|
|
83
|
+
};
|
|
84
|
+
for (const frame of frames) {
|
|
85
|
+
sendLastFrame(requestId, false);
|
|
86
|
+
lastFrame = frame;
|
|
87
|
+
}
|
|
88
|
+
sendLastFrame(requestId, true);
|
|
89
|
+
this.queue.close();
|
|
90
|
+
} catch (error) {
|
|
91
|
+
if (error instanceof Error && error.name === "AbortError") {
|
|
92
|
+
return;
|
|
72
93
|
}
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
lastFrame = frame;
|
|
94
|
+
throw error;
|
|
95
|
+
} finally {
|
|
96
|
+
this.queue.close();
|
|
77
97
|
}
|
|
78
|
-
sendLastFrame(requestId, true);
|
|
79
|
-
this.queue.close();
|
|
80
98
|
}
|
|
81
99
|
}
|
|
82
100
|
export {
|
package/dist/tts.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioByteStream, shortuuid, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { OpenAI } from 'openai';\nimport type { TTSModels, TTSVoices } from './models.js';\n\nconst OPENAI_TTS_SAMPLE_RATE = 24000;\nconst OPENAI_TTS_CHANNELS = 1;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n voice: TTSVoices;\n speed: number;\n instructions?: string;\n baseURL?: string;\n client?: OpenAI;\n apiKey?: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.OPENAI_API_KEY,\n model: 'tts-1',\n voice: 'alloy',\n speed: 1,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n #client: OpenAI;\n label = 'openai.TTS';\n\n /**\n * Create a new instance of OpenAI TTS.\n *\n * @remarks\n * `apiKey` must be set to your OpenAI API key, either using the argument or by setting the\n * `OPENAI_API_KEY` environment variable.\n */\n constructor(opts: Partial<TTSOptions> = defaultTTSOptions) {\n super(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS, { streaming: false });\n\n this.#opts = { ...defaultTTSOptions, ...opts };\n if (this.#opts.apiKey === undefined) {\n throw new Error('OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY');\n }\n\n this.#client =\n this.#opts.client ||\n new OpenAI({\n baseURL: opts.baseURL,\n apiKey: opts.apiKey,\n });\n }\n\n updateOptions(opts: { model?: TTSModels | string; voice?: TTSVoices; speed?: number }) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n synthesize(text: string): ChunkedStream {\n return new ChunkedStream(\n this,\n text,\n this.#client.audio.speech.create({\n
|
|
1
|
+
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type APIConnectOptions, AudioByteStream, shortuuid, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { OpenAI } from 'openai';\nimport type { TTSModels, TTSVoices } from './models.js';\n\nconst OPENAI_TTS_SAMPLE_RATE = 24000;\nconst OPENAI_TTS_CHANNELS = 1;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n voice: TTSVoices;\n speed: number;\n instructions?: string;\n baseURL?: string;\n client?: OpenAI;\n apiKey?: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.OPENAI_API_KEY,\n model: 'tts-1',\n voice: 'alloy',\n speed: 1,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n #client: OpenAI;\n label = 'openai.TTS';\n private abortController = new AbortController();\n\n /**\n * Create a new instance of OpenAI TTS.\n *\n * @remarks\n * `apiKey` must be set to your OpenAI API key, either using the argument or by setting the\n * `OPENAI_API_KEY` environment variable.\n */\n constructor(opts: Partial<TTSOptions> = defaultTTSOptions) {\n super(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS, { streaming: false });\n\n this.#opts = { ...defaultTTSOptions, ...opts };\n if (this.#opts.apiKey === undefined) {\n throw new Error('OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY');\n }\n\n this.#client =\n this.#opts.client ||\n new OpenAI({\n baseURL: opts.baseURL,\n apiKey: opts.apiKey,\n });\n }\n\n updateOptions(opts: { model?: TTSModels | string; voice?: TTSVoices; speed?: number }) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): ChunkedStream {\n return new ChunkedStream(\n this,\n text,\n this.#client.audio.speech.create(\n {\n input: text,\n model: this.#opts.model,\n voice: this.#opts.voice,\n instructions: this.#opts.instructions,\n response_format: 'pcm',\n speed: this.#opts.speed,\n },\n { signal: abortSignal },\n ),\n connOptions,\n abortSignal,\n );\n }\n\n stream(): tts.SynthesizeStream {\n throw new Error('Streaming is not supported on OpenAI TTS');\n }\n\n async close(): Promise<void> {\n this.abortController.abort();\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'openai.ChunkedStream';\n private stream: Promise<any>;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(\n tts: TTS,\n text: string,\n stream: Promise<any>,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.stream = stream;\n }\n\n protected async run() {\n try {\n const buffer = await this.stream.then((r) => r.arrayBuffer());\n const requestId = shortuuid();\n const audioByteStream = new AudioByteStream(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS);\n const frames = audioByteStream.write(buffer);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n for (const frame of frames) {\n sendLastFrame(requestId, false);\n lastFrame = frame;\n }\n sendLastFrame(requestId, true);\n\n this.queue.close();\n } catch (error) {\n if (error instanceof Error && error.name === 'AbortError') {\n return;\n }\n throw error;\n } finally {\n this.queue.close();\n }\n }\n}\n"],"mappings":"AAGA,SAAiC,iBAAiB,WAAW,WAAW;AAExE,SAAS,cAAc;AAGvB,MAAM,yBAAyB;AAC/B,MAAM,sBAAsB;AAY5B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,OAAO;AAAA,EACP,OAAO;AACT;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA;AAAA,EACA,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAS9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM,wBAAwB,qBAAqB,EAAE,WAAW,MAAM,CAAC;AAEvE,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAC7C,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI,MAAM,0EAA0E;AAAA,IAC5F;AAEA,SAAK,UACH,KAAK,MAAM,UACX,IAAI,OAAO;AAAA,MACT,SAAS,KAAK;AAAA,MACd,QAAQ,KAAK;AAAA,IACf,CAAC;AAAA,EACL;AAAA,EAEA,cAAc,MAAyE;AACrF,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,WACE,MACA,aACA,aACe;AACf,WAAO,IAAI;AAAA,MACT;AAAA,MACA;AAAA,MACA,KAAK,QAAQ,MAAM,OAAO;AAAA,QACxB;AAAA,UACE,OAAO;AAAA,UACP,OAAO,KAAK,MAAM;AAAA,UAClB,OAAO,KAAK,MAAM;AAAA,UAClB,cAAc,KAAK,MAAM;AAAA,UACzB,iBAAiB;AAAA,UACjB,OAAO,KAAK,MAAM;AAAA,QACpB;AAAA,QACA,EAAE,QAAQ,YAAY;AAAA,MACxB;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA,EAEA,SAA+B;AAC7B,UAAM,IAAI,MAAM,0CAA0C;AAAA,EAC5D;AAAA,EAEA,MAAM,QAAuB;AAC3B,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACA;AAAA;AAAA,EAGR,YACEA,MACA,MACA,QACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAgB,MAAM;AACpB,QAAI;AACF,YAAM,SAAS,MAAM,KAAK,OAAO,KAAK,CAAC,MAAM,EAAE,YAAY,CAAC;AAC5D,YAAM,YAAY,UAAU;AAC5B,YAAM,kBAAkB,IAAI,gBAAgB,wBAAwB,mBAAmB;AACvF,YAAM,SAAS,gBAAgB,MAAM,MAAM;AAE3C,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,WAAW;AACb,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,iBAAW,SAAS,QAAQ;AAC1B,sBAAc,WAAW,KAAK;AAC9B,oBAAY;AAAA,MACd;AACA,oBAAc,WAAW,IAAI;AAE7B,WAAK,MAAM,MAAM;AAAA,IACnB,SAAS,OAAO;AACd,UAAI,iBAAiB,SAAS,MAAM,SAAS,cAAc;AACzD;AAAA,MACF;AACA,YAAM;AAAA,IACR,UAAE;AACA,WAAK,MAAM,MAAM;AAAA,IACnB;AAAA,EACF;AACF;","names":["tts"]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@livekit/agents-plugin-openai",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.27",
|
|
4
4
|
"description": "OpenAI plugin for LiveKit Node Agents",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"require": "dist/index.cjs",
|
|
@@ -30,9 +30,9 @@
|
|
|
30
30
|
"@types/ws": "^8.5.10",
|
|
31
31
|
"tsup": "^8.3.5",
|
|
32
32
|
"typescript": "^5.0.0",
|
|
33
|
-
"@livekit/agents": "1.0.
|
|
34
|
-
"@livekit/agents-
|
|
35
|
-
"@livekit/agents-
|
|
33
|
+
"@livekit/agents": "1.0.27",
|
|
34
|
+
"@livekit/agents-plugin-silero": "1.0.27",
|
|
35
|
+
"@livekit/agents-plugins-test": "1.0.27"
|
|
36
36
|
},
|
|
37
37
|
"dependencies": {
|
|
38
38
|
"@livekit/mutex": "^1.1.1",
|
|
@@ -41,7 +41,7 @@
|
|
|
41
41
|
},
|
|
42
42
|
"peerDependencies": {
|
|
43
43
|
"@livekit/rtc-node": "^0.13.12",
|
|
44
|
-
"@livekit/agents": "1.0.
|
|
44
|
+
"@livekit/agents": "1.0.27"
|
|
45
45
|
},
|
|
46
46
|
"scripts": {
|
|
47
47
|
"build": "tsup --onSuccess \"pnpm build:types\"",
|
package/src/stt.ts
CHANGED
|
@@ -27,7 +27,6 @@ export class STT extends stt.STT {
|
|
|
27
27
|
#opts: STTOptions;
|
|
28
28
|
#client: OpenAI;
|
|
29
29
|
label = 'openai.STT';
|
|
30
|
-
private abortController = new AbortController();
|
|
31
30
|
|
|
32
31
|
/**
|
|
33
32
|
* Create a new instance of OpenAI STT.
|
|
@@ -142,10 +141,11 @@ export class STT extends stt.STT {
|
|
|
142
141
|
return Buffer.concat([header, Buffer.from(frame.data.buffer)]);
|
|
143
142
|
}
|
|
144
143
|
|
|
145
|
-
async _recognize(buffer: AudioBuffer,
|
|
146
|
-
const config = this.#sanitizeOptions(
|
|
144
|
+
async _recognize(buffer: AudioBuffer, abortSignal?: AbortSignal): Promise<stt.SpeechEvent> {
|
|
145
|
+
const config = this.#sanitizeOptions();
|
|
147
146
|
buffer = mergeFrames(buffer);
|
|
148
|
-
const
|
|
147
|
+
const wavBuffer = this.#createWav(buffer);
|
|
148
|
+
const file = new File([new Uint8Array(wavBuffer)], 'audio.wav', { type: 'audio/wav' });
|
|
149
149
|
|
|
150
150
|
const resp = await this.#client.audio.transcriptions.create(
|
|
151
151
|
{
|
|
@@ -156,7 +156,7 @@ export class STT extends stt.STT {
|
|
|
156
156
|
response_format: 'json',
|
|
157
157
|
},
|
|
158
158
|
{
|
|
159
|
-
signal:
|
|
159
|
+
signal: abortSignal,
|
|
160
160
|
},
|
|
161
161
|
);
|
|
162
162
|
|
|
@@ -165,7 +165,7 @@ export class STT extends stt.STT {
|
|
|
165
165
|
alternatives: [
|
|
166
166
|
{
|
|
167
167
|
text: resp.text || '',
|
|
168
|
-
language: language || '',
|
|
168
|
+
language: config.language || '',
|
|
169
169
|
startTime: 0,
|
|
170
170
|
endTime: 0,
|
|
171
171
|
confidence: 0,
|
|
@@ -178,8 +178,4 @@ export class STT extends stt.STT {
|
|
|
178
178
|
stream(): stt.SpeechStream {
|
|
179
179
|
throw new Error('Streaming is not supported on OpenAI STT');
|
|
180
180
|
}
|
|
181
|
-
|
|
182
|
-
async close(): Promise<void> {
|
|
183
|
-
this.abortController.abort();
|
|
184
|
-
}
|
|
185
181
|
}
|
package/src/tts.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
import { AudioByteStream, shortuuid, tts } from '@livekit/agents';
|
|
4
|
+
import { type APIConnectOptions, AudioByteStream, shortuuid, tts } from '@livekit/agents';
|
|
5
5
|
import type { AudioFrame } from '@livekit/rtc-node';
|
|
6
6
|
import { OpenAI } from 'openai';
|
|
7
7
|
import type { TTSModels, TTSVoices } from './models.js';
|
|
@@ -30,6 +30,7 @@ export class TTS extends tts.TTS {
|
|
|
30
30
|
#opts: TTSOptions;
|
|
31
31
|
#client: OpenAI;
|
|
32
32
|
label = 'openai.TTS';
|
|
33
|
+
private abortController = new AbortController();
|
|
33
34
|
|
|
34
35
|
/**
|
|
35
36
|
* Create a new instance of OpenAI TTS.
|
|
@@ -58,24 +59,37 @@ export class TTS extends tts.TTS {
|
|
|
58
59
|
this.#opts = { ...this.#opts, ...opts };
|
|
59
60
|
}
|
|
60
61
|
|
|
61
|
-
synthesize(
|
|
62
|
+
synthesize(
|
|
63
|
+
text: string,
|
|
64
|
+
connOptions?: APIConnectOptions,
|
|
65
|
+
abortSignal?: AbortSignal,
|
|
66
|
+
): ChunkedStream {
|
|
62
67
|
return new ChunkedStream(
|
|
63
68
|
this,
|
|
64
69
|
text,
|
|
65
|
-
this.#client.audio.speech.create(
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
70
|
+
this.#client.audio.speech.create(
|
|
71
|
+
{
|
|
72
|
+
input: text,
|
|
73
|
+
model: this.#opts.model,
|
|
74
|
+
voice: this.#opts.voice,
|
|
75
|
+
instructions: this.#opts.instructions,
|
|
76
|
+
response_format: 'pcm',
|
|
77
|
+
speed: this.#opts.speed,
|
|
78
|
+
},
|
|
79
|
+
{ signal: abortSignal },
|
|
80
|
+
),
|
|
81
|
+
connOptions,
|
|
82
|
+
abortSignal,
|
|
73
83
|
);
|
|
74
84
|
}
|
|
75
85
|
|
|
76
86
|
stream(): tts.SynthesizeStream {
|
|
77
87
|
throw new Error('Streaming is not supported on OpenAI TTS');
|
|
78
88
|
}
|
|
89
|
+
|
|
90
|
+
async close(): Promise<void> {
|
|
91
|
+
this.abortController.abort();
|
|
92
|
+
}
|
|
79
93
|
}
|
|
80
94
|
|
|
81
95
|
export class ChunkedStream extends tts.ChunkedStream {
|
|
@@ -83,31 +97,46 @@ export class ChunkedStream extends tts.ChunkedStream {
|
|
|
83
97
|
private stream: Promise<any>;
|
|
84
98
|
|
|
85
99
|
// set Promise<T> to any because OpenAI returns an annoying Response type
|
|
86
|
-
constructor(
|
|
87
|
-
|
|
100
|
+
constructor(
|
|
101
|
+
tts: TTS,
|
|
102
|
+
text: string,
|
|
103
|
+
stream: Promise<any>,
|
|
104
|
+
connOptions?: APIConnectOptions,
|
|
105
|
+
abortSignal?: AbortSignal,
|
|
106
|
+
) {
|
|
107
|
+
super(text, tts, connOptions, abortSignal);
|
|
88
108
|
this.stream = stream;
|
|
89
109
|
}
|
|
90
110
|
|
|
91
111
|
protected async run() {
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
112
|
+
try {
|
|
113
|
+
const buffer = await this.stream.then((r) => r.arrayBuffer());
|
|
114
|
+
const requestId = shortuuid();
|
|
115
|
+
const audioByteStream = new AudioByteStream(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS);
|
|
116
|
+
const frames = audioByteStream.write(buffer);
|
|
117
|
+
|
|
118
|
+
let lastFrame: AudioFrame | undefined;
|
|
119
|
+
const sendLastFrame = (segmentId: string, final: boolean) => {
|
|
120
|
+
if (lastFrame) {
|
|
121
|
+
this.queue.put({ requestId, segmentId, frame: lastFrame, final });
|
|
122
|
+
lastFrame = undefined;
|
|
123
|
+
}
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
for (const frame of frames) {
|
|
127
|
+
sendLastFrame(requestId, false);
|
|
128
|
+
lastFrame = frame;
|
|
102
129
|
}
|
|
103
|
-
|
|
130
|
+
sendLastFrame(requestId, true);
|
|
104
131
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
132
|
+
this.queue.close();
|
|
133
|
+
} catch (error) {
|
|
134
|
+
if (error instanceof Error && error.name === 'AbortError') {
|
|
135
|
+
return;
|
|
136
|
+
}
|
|
137
|
+
throw error;
|
|
138
|
+
} finally {
|
|
139
|
+
this.queue.close();
|
|
108
140
|
}
|
|
109
|
-
sendLastFrame(requestId, true);
|
|
110
|
-
|
|
111
|
-
this.queue.close();
|
|
112
141
|
}
|
|
113
142
|
}
|