@livekit/agents-plugin-openai 1.0.37 → 1.0.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/stt.cjs +1 -1
- package/dist/stt.cjs.map +1 -1
- package/dist/stt.js +1 -1
- package/dist/stt.js.map +1 -1
- package/package.json +5 -5
- package/src/stt.ts +1 -1
package/dist/stt.cjs
CHANGED
|
@@ -41,7 +41,7 @@ class STT extends import_agents.stt.STT {
|
|
|
41
41
|
* `OPENAI_API_KEY` environment variable.
|
|
42
42
|
*/
|
|
43
43
|
constructor(opts = defaultSTTOptions) {
|
|
44
|
-
super({ streaming: false, interimResults: false });
|
|
44
|
+
super({ streaming: false, interimResults: false, alignedTranscript: false });
|
|
45
45
|
this.#opts = { ...defaultSTTOptions, ...opts };
|
|
46
46
|
if (this.#opts.apiKey === void 0) {
|
|
47
47
|
throw new Error("OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY");
|
package/dist/stt.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioBuffer, mergeFrames, stt } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { OpenAI } from 'openai';\nimport type { GroqAudioModels, WhisperModels } from './models.js';\n\nexport interface STTOptions {\n apiKey?: string;\n language: string;\n prompt?: string;\n detectLanguage: boolean;\n model: WhisperModels | string;\n baseURL?: string;\n client?: OpenAI;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.OPENAI_API_KEY,\n language: 'en',\n detectLanguage: false,\n model: 'whisper-1',\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #client: OpenAI;\n label = 'openai.STT';\n\n /**\n * Create a new instance of OpenAI STT.\n *\n * @remarks\n * `apiKey` must be set to your OpenAI API key, either using the argument or by setting the\n * `OPENAI_API_KEY` environment variable.\n */\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({ streaming: false, interimResults: false });\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n if (this.#opts.apiKey === undefined) {\n throw new Error('OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY');\n }\n\n this.#client =\n this.#opts.client ||\n new OpenAI({\n baseURL: this.#opts.baseURL,\n apiKey: this.#opts.apiKey,\n });\n }\n\n /**\n * Create a new instance of Groq STT.\n *\n * @remarks\n * `apiKey` must be set to your Groq API key, either using the argument or by setting the\n * `GROQ_API_KEY` environment variable.\n */\n static withGroq(\n opts: Partial<{\n model: string | GroqAudioModels;\n apiKey?: string;\n baseURL?: string;\n client: OpenAI;\n language: string;\n detectLanguage: boolean;\n }> = {},\n ): STT {\n opts.apiKey = opts.apiKey || process.env.GROQ_API_KEY;\n if (opts.apiKey === undefined) {\n throw new Error('Groq API key is required, whether as an argument or as $GROQ_API_KEY');\n }\n\n return new STT({\n model: 'whisper-large-v3-turbo',\n baseURL: 'https://api.groq.com/openai/v1',\n ...opts,\n });\n }\n\n /**\n * Create a new instance of OVHcloud AI Endpoints STT.\n *\n * @remarks\n * `apiKey` must be set to your OVHcloud AI Endpoints API key, either using the argument or by setting the\n * `OVHCLOUD_API_KEY` environment variable.\n */\n static withOVHcloud(\n opts: Partial<{\n model: string;\n apiKey?: string;\n baseURL?: string;\n client: OpenAI;\n language: string;\n detectLanguage: boolean;\n }> = {},\n ): STT {\n opts.apiKey = opts.apiKey || process.env.OVHCLOUD_API_KEY;\n if (opts.apiKey === undefined) {\n throw new Error(\n 'OVHcloud AI Endpoints API key is required, whether as an argument or as $OVHCLOUD_API_KEY',\n );\n }\n\n return new STT({\n model: 'whisper-large-v3-turbo',\n baseURL: 'https://oai.endpoints.kepler.ai.cloud.ovh.net/v1',\n ...opts,\n });\n }\n\n #sanitizeOptions(language?: string): STTOptions {\n if (language) {\n return { ...this.#opts, language };\n } else {\n return this.#opts;\n }\n }\n\n #createWav(frame: AudioFrame): Buffer {\n const bitsPerSample = 16;\n const byteRate = (frame.sampleRate * frame.channels * bitsPerSample) / 8;\n const blockAlign = (frame.channels * bitsPerSample) / 8;\n\n const header = Buffer.alloc(44);\n header.write('RIFF', 0);\n header.writeUInt32LE(36 + frame.data.byteLength, 4);\n header.write('WAVE', 8);\n header.write('fmt ', 12);\n header.writeUInt32LE(16, 16);\n header.writeUInt16LE(1, 20);\n header.writeUInt16LE(frame.channels, 22);\n header.writeUInt32LE(frame.sampleRate, 24);\n header.writeUInt32LE(byteRate, 28);\n header.writeUInt16LE(blockAlign, 32);\n header.writeUInt16LE(16, 34);\n header.write('data', 36);\n header.writeUInt32LE(frame.data.byteLength, 40);\n return Buffer.concat([header, Buffer.from(frame.data.buffer)]);\n }\n\n async _recognize(buffer: AudioBuffer, abortSignal?: AbortSignal): Promise<stt.SpeechEvent> {\n const config = this.#sanitizeOptions();\n buffer = mergeFrames(buffer);\n const wavBuffer = this.#createWav(buffer);\n const file = new File([new Uint8Array(wavBuffer)], 'audio.wav', { type: 'audio/wav' });\n\n const resp = await this.#client.audio.transcriptions.create(\n {\n file,\n model: this.#opts.model,\n language: config.language,\n prompt: config.prompt,\n response_format: 'json',\n },\n {\n signal: abortSignal,\n },\n );\n\n return {\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [\n {\n text: resp.text || '',\n language: config.language || '',\n startTime: 0,\n endTime: 0,\n confidence: 0,\n },\n ],\n };\n }\n\n /** This method throws an error; streaming is unsupported on OpenAI STT. */\n stream(): stt.SpeechStream {\n throw new Error('Streaming is not supported on OpenAI STT');\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAAmD;AAEnD,oBAAuB;AAavB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,OAAO;AACT;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA;AAAA,EACA,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASR,YAAY,OAA4B,mBAAmB;AACzD,UAAM,EAAE,WAAW,OAAO,gBAAgB,MAAM,CAAC;
|
|
1
|
+
{"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioBuffer, mergeFrames, stt } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { OpenAI } from 'openai';\nimport type { GroqAudioModels, WhisperModels } from './models.js';\n\nexport interface STTOptions {\n apiKey?: string;\n language: string;\n prompt?: string;\n detectLanguage: boolean;\n model: WhisperModels | string;\n baseURL?: string;\n client?: OpenAI;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.OPENAI_API_KEY,\n language: 'en',\n detectLanguage: false,\n model: 'whisper-1',\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #client: OpenAI;\n label = 'openai.STT';\n\n /**\n * Create a new instance of OpenAI STT.\n *\n * @remarks\n * `apiKey` must be set to your OpenAI API key, either using the argument or by setting the\n * `OPENAI_API_KEY` environment variable.\n */\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({ streaming: false, interimResults: false, alignedTranscript: false });\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n if (this.#opts.apiKey === undefined) {\n throw new Error('OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY');\n }\n\n this.#client =\n this.#opts.client ||\n new OpenAI({\n baseURL: this.#opts.baseURL,\n apiKey: this.#opts.apiKey,\n });\n }\n\n /**\n * Create a new instance of Groq STT.\n *\n * @remarks\n * `apiKey` must be set to your Groq API key, either using the argument or by setting the\n * `GROQ_API_KEY` environment variable.\n */\n static withGroq(\n opts: Partial<{\n model: string | GroqAudioModels;\n apiKey?: string;\n baseURL?: string;\n client: OpenAI;\n language: string;\n detectLanguage: boolean;\n }> = {},\n ): STT {\n opts.apiKey = opts.apiKey || process.env.GROQ_API_KEY;\n if (opts.apiKey === undefined) {\n throw new Error('Groq API key is required, whether as an argument or as $GROQ_API_KEY');\n }\n\n return new STT({\n model: 'whisper-large-v3-turbo',\n baseURL: 'https://api.groq.com/openai/v1',\n ...opts,\n });\n }\n\n /**\n * Create a new instance of OVHcloud AI Endpoints STT.\n *\n * @remarks\n * `apiKey` must be set to your OVHcloud AI Endpoints API key, either using the argument or by setting the\n * `OVHCLOUD_API_KEY` environment variable.\n */\n static withOVHcloud(\n opts: Partial<{\n model: string;\n apiKey?: string;\n baseURL?: string;\n client: OpenAI;\n language: string;\n detectLanguage: boolean;\n }> = {},\n ): STT {\n opts.apiKey = opts.apiKey || process.env.OVHCLOUD_API_KEY;\n if (opts.apiKey === undefined) {\n throw new Error(\n 'OVHcloud AI Endpoints API key is required, whether as an argument or as $OVHCLOUD_API_KEY',\n );\n }\n\n return new STT({\n model: 'whisper-large-v3-turbo',\n baseURL: 'https://oai.endpoints.kepler.ai.cloud.ovh.net/v1',\n ...opts,\n });\n }\n\n #sanitizeOptions(language?: string): STTOptions {\n if (language) {\n return { ...this.#opts, language };\n } else {\n return this.#opts;\n }\n }\n\n #createWav(frame: AudioFrame): Buffer {\n const bitsPerSample = 16;\n const byteRate = (frame.sampleRate * frame.channels * bitsPerSample) / 8;\n const blockAlign = (frame.channels * bitsPerSample) / 8;\n\n const header = Buffer.alloc(44);\n header.write('RIFF', 0);\n header.writeUInt32LE(36 + frame.data.byteLength, 4);\n header.write('WAVE', 8);\n header.write('fmt ', 12);\n header.writeUInt32LE(16, 16);\n header.writeUInt16LE(1, 20);\n header.writeUInt16LE(frame.channels, 22);\n header.writeUInt32LE(frame.sampleRate, 24);\n header.writeUInt32LE(byteRate, 28);\n header.writeUInt16LE(blockAlign, 32);\n header.writeUInt16LE(16, 34);\n header.write('data', 36);\n header.writeUInt32LE(frame.data.byteLength, 40);\n return Buffer.concat([header, Buffer.from(frame.data.buffer)]);\n }\n\n async _recognize(buffer: AudioBuffer, abortSignal?: AbortSignal): Promise<stt.SpeechEvent> {\n const config = this.#sanitizeOptions();\n buffer = mergeFrames(buffer);\n const wavBuffer = this.#createWav(buffer);\n const file = new File([new Uint8Array(wavBuffer)], 'audio.wav', { type: 'audio/wav' });\n\n const resp = await this.#client.audio.transcriptions.create(\n {\n file,\n model: this.#opts.model,\n language: config.language,\n prompt: config.prompt,\n response_format: 'json',\n },\n {\n signal: abortSignal,\n },\n );\n\n return {\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [\n {\n text: resp.text || '',\n language: config.language || '',\n startTime: 0,\n endTime: 0,\n confidence: 0,\n },\n ],\n };\n }\n\n /** This method throws an error; streaming is unsupported on OpenAI STT. */\n stream(): stt.SpeechStream {\n throw new Error('Streaming is not supported on OpenAI STT');\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAAmD;AAEnD,oBAAuB;AAavB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,OAAO;AACT;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA;AAAA,EACA,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASR,YAAY,OAA4B,mBAAmB;AACzD,UAAM,EAAE,WAAW,OAAO,gBAAgB,OAAO,mBAAmB,MAAM,CAAC;AAE3E,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAC7C,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI,MAAM,0EAA0E;AAAA,IAC5F;AAEA,SAAK,UACH,KAAK,MAAM,UACX,IAAI,qBAAO;AAAA,MACT,SAAS,KAAK,MAAM;AAAA,MACpB,QAAQ,KAAK,MAAM;AAAA,IACrB,CAAC;AAAA,EACL;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,OAAO,SACL,OAOK,CAAC,GACD;AACL,SAAK,SAAS,KAAK,UAAU,QAAQ,IAAI;AACzC,QAAI,KAAK,WAAW,QAAW;AAC7B,YAAM,IAAI,MAAM,sEAAsE;AAAA,IACxF;AAEA,WAAO,IAAI,IAAI;AAAA,MACb,OAAO;AAAA,MACP,SAAS;AAAA,MACT,GAAG;AAAA,IACL,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,OAAO,aACL,OAOK,CAAC,GACD;AACL,SAAK,SAAS,KAAK,UAAU,QAAQ,IAAI;AACzC,QAAI,KAAK,WAAW,QAAW;AAC7B,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,WAAO,IAAI,IAAI;AAAA,MACb,OAAO;AAAA,MACP,SAAS;AAAA,MACT,GAAG;AAAA,IACL,CAAC;AAAA,EACH;AAAA,EAEA,iBAAiB,UAA+B;AAC9C,QAAI,UAAU;AACZ,aAAO,EAAE,GAAG,KAAK,OAAO,SAAS;AAAA,IACnC,OAAO;AACL,aAAO,KAAK;AAAA,IACd;AAAA,EACF;AAAA,EAEA,WAAW,OAA2B;AACpC,UAAM,gBAAgB;AACtB,UAAM,WAAY,MAAM,aAAa,MAAM,WAAW,gBAAiB;AACvE,UAAM,aAAc,MAAM,WAAW,gBAAiB;AAEtD,UAAM,SAAS,OAAO,MAAM,EAAE;AAC9B,WAAO,MAAM,QAAQ,CAAC;AACtB,WAAO,cAAc,KAAK,MAAM,KAAK,YAAY,CAAC;AAClD,WAAO,MAAM,QAAQ,CAAC;AACtB,WAAO,MAAM,QAAQ,EAAE;AACvB,WAAO,cAAc,IAAI,EAAE;AAC3B,WAAO,cAAc,GAAG,EAAE;AAC1B,WAAO,cAAc,MAAM,UAAU,EAAE;AACvC,WAAO,cAAc,MAAM,YAAY,EAAE;AACzC,WAAO,cAAc,UAAU,EAAE;AACjC,WAAO,cAAc,YAAY,EAAE;AACnC,WAAO,cAAc,IAAI,EAAE;AAC3B,WAAO,MAAM,QAAQ,EAAE;AACvB,WAAO,cAAc,MAAM,KAAK,YAAY,EAAE;AAC9C,WAAO,OAAO,OAAO,CAAC,QAAQ,OAAO,KAAK,MAAM,KAAK,MAAM,CAAC,CAAC;AAAA,EAC/D;AAAA,EAEA,MAAM,WAAW,QAAqB,aAAqD;AACzF,UAAM,SAAS,KAAK,iBAAiB;AACrC,iBAAS,2BAAY,MAAM;AAC3B,UAAM,YAAY,KAAK,WAAW,MAAM;AACxC,UAAM,OAAO,IAAI,KAAK,CAAC,IAAI,WAAW,SAAS,CAAC,GAAG,aAAa,EAAE,MAAM,YAAY,CAAC;AAErF,UAAM,OAAO,MAAM,KAAK,QAAQ,MAAM,eAAe;AAAA,MACnD;AAAA,QACE;AAAA,QACA,OAAO,KAAK,MAAM;AAAA,QAClB,UAAU,OAAO;AAAA,QACjB,QAAQ,OAAO;AAAA,QACf,iBAAiB;AAAA,MACnB;AAAA,MACA;AAAA,QACE,QAAQ;AAAA,MACV;AAAA,IACF;AAEA,WAAO;AAAA,MACL,MAAM,kBAAI,gBAAgB;AAAA,MAC1B,cAAc;AAAA,QACZ;AAAA,UACE,MAAM,KAAK,QAAQ;AAAA,UACnB,UAAU,OAAO,YAAY;AAAA,UAC7B,WAAW;AAAA,UACX,SAAS;AAAA,UACT,YAAY;AAAA,QACd;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA;AAAA,EAGA,SAA2B;AACzB,UAAM,IAAI,MAAM,0CAA0C;AAAA,EAC5D;AACF;","names":[]}
|
package/dist/stt.js
CHANGED
|
@@ -18,7 +18,7 @@ class STT extends stt.STT {
|
|
|
18
18
|
* `OPENAI_API_KEY` environment variable.
|
|
19
19
|
*/
|
|
20
20
|
constructor(opts = defaultSTTOptions) {
|
|
21
|
-
super({ streaming: false, interimResults: false });
|
|
21
|
+
super({ streaming: false, interimResults: false, alignedTranscript: false });
|
|
22
22
|
this.#opts = { ...defaultSTTOptions, ...opts };
|
|
23
23
|
if (this.#opts.apiKey === void 0) {
|
|
24
24
|
throw new Error("OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY");
|
package/dist/stt.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioBuffer, mergeFrames, stt } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { OpenAI } from 'openai';\nimport type { GroqAudioModels, WhisperModels } from './models.js';\n\nexport interface STTOptions {\n apiKey?: string;\n language: string;\n prompt?: string;\n detectLanguage: boolean;\n model: WhisperModels | string;\n baseURL?: string;\n client?: OpenAI;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.OPENAI_API_KEY,\n language: 'en',\n detectLanguage: false,\n model: 'whisper-1',\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #client: OpenAI;\n label = 'openai.STT';\n\n /**\n * Create a new instance of OpenAI STT.\n *\n * @remarks\n * `apiKey` must be set to your OpenAI API key, either using the argument or by setting the\n * `OPENAI_API_KEY` environment variable.\n */\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({ streaming: false, interimResults: false });\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n if (this.#opts.apiKey === undefined) {\n throw new Error('OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY');\n }\n\n this.#client =\n this.#opts.client ||\n new OpenAI({\n baseURL: this.#opts.baseURL,\n apiKey: this.#opts.apiKey,\n });\n }\n\n /**\n * Create a new instance of Groq STT.\n *\n * @remarks\n * `apiKey` must be set to your Groq API key, either using the argument or by setting the\n * `GROQ_API_KEY` environment variable.\n */\n static withGroq(\n opts: Partial<{\n model: string | GroqAudioModels;\n apiKey?: string;\n baseURL?: string;\n client: OpenAI;\n language: string;\n detectLanguage: boolean;\n }> = {},\n ): STT {\n opts.apiKey = opts.apiKey || process.env.GROQ_API_KEY;\n if (opts.apiKey === undefined) {\n throw new Error('Groq API key is required, whether as an argument or as $GROQ_API_KEY');\n }\n\n return new STT({\n model: 'whisper-large-v3-turbo',\n baseURL: 'https://api.groq.com/openai/v1',\n ...opts,\n });\n }\n\n /**\n * Create a new instance of OVHcloud AI Endpoints STT.\n *\n * @remarks\n * `apiKey` must be set to your OVHcloud AI Endpoints API key, either using the argument or by setting the\n * `OVHCLOUD_API_KEY` environment variable.\n */\n static withOVHcloud(\n opts: Partial<{\n model: string;\n apiKey?: string;\n baseURL?: string;\n client: OpenAI;\n language: string;\n detectLanguage: boolean;\n }> = {},\n ): STT {\n opts.apiKey = opts.apiKey || process.env.OVHCLOUD_API_KEY;\n if (opts.apiKey === undefined) {\n throw new Error(\n 'OVHcloud AI Endpoints API key is required, whether as an argument or as $OVHCLOUD_API_KEY',\n );\n }\n\n return new STT({\n model: 'whisper-large-v3-turbo',\n baseURL: 'https://oai.endpoints.kepler.ai.cloud.ovh.net/v1',\n ...opts,\n });\n }\n\n #sanitizeOptions(language?: string): STTOptions {\n if (language) {\n return { ...this.#opts, language };\n } else {\n return this.#opts;\n }\n }\n\n #createWav(frame: AudioFrame): Buffer {\n const bitsPerSample = 16;\n const byteRate = (frame.sampleRate * frame.channels * bitsPerSample) / 8;\n const blockAlign = (frame.channels * bitsPerSample) / 8;\n\n const header = Buffer.alloc(44);\n header.write('RIFF', 0);\n header.writeUInt32LE(36 + frame.data.byteLength, 4);\n header.write('WAVE', 8);\n header.write('fmt ', 12);\n header.writeUInt32LE(16, 16);\n header.writeUInt16LE(1, 20);\n header.writeUInt16LE(frame.channels, 22);\n header.writeUInt32LE(frame.sampleRate, 24);\n header.writeUInt32LE(byteRate, 28);\n header.writeUInt16LE(blockAlign, 32);\n header.writeUInt16LE(16, 34);\n header.write('data', 36);\n header.writeUInt32LE(frame.data.byteLength, 40);\n return Buffer.concat([header, Buffer.from(frame.data.buffer)]);\n }\n\n async _recognize(buffer: AudioBuffer, abortSignal?: AbortSignal): Promise<stt.SpeechEvent> {\n const config = this.#sanitizeOptions();\n buffer = mergeFrames(buffer);\n const wavBuffer = this.#createWav(buffer);\n const file = new File([new Uint8Array(wavBuffer)], 'audio.wav', { type: 'audio/wav' });\n\n const resp = await this.#client.audio.transcriptions.create(\n {\n file,\n model: this.#opts.model,\n language: config.language,\n prompt: config.prompt,\n response_format: 'json',\n },\n {\n signal: abortSignal,\n },\n );\n\n return {\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [\n {\n text: resp.text || '',\n language: config.language || '',\n startTime: 0,\n endTime: 0,\n confidence: 0,\n },\n ],\n };\n }\n\n /** This method throws an error; streaming is unsupported on OpenAI STT. */\n stream(): stt.SpeechStream {\n throw new Error('Streaming is not supported on OpenAI STT');\n }\n}\n"],"mappings":"AAGA,SAA2B,aAAa,WAAW;AAEnD,SAAS,cAAc;AAavB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,OAAO;AACT;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA;AAAA,EACA,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASR,YAAY,OAA4B,mBAAmB;AACzD,UAAM,EAAE,WAAW,OAAO,gBAAgB,MAAM,CAAC;
|
|
1
|
+
{"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioBuffer, mergeFrames, stt } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { OpenAI } from 'openai';\nimport type { GroqAudioModels, WhisperModels } from './models.js';\n\nexport interface STTOptions {\n apiKey?: string;\n language: string;\n prompt?: string;\n detectLanguage: boolean;\n model: WhisperModels | string;\n baseURL?: string;\n client?: OpenAI;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.OPENAI_API_KEY,\n language: 'en',\n detectLanguage: false,\n model: 'whisper-1',\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #client: OpenAI;\n label = 'openai.STT';\n\n /**\n * Create a new instance of OpenAI STT.\n *\n * @remarks\n * `apiKey` must be set to your OpenAI API key, either using the argument or by setting the\n * `OPENAI_API_KEY` environment variable.\n */\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({ streaming: false, interimResults: false, alignedTranscript: false });\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n if (this.#opts.apiKey === undefined) {\n throw new Error('OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY');\n }\n\n this.#client =\n this.#opts.client ||\n new OpenAI({\n baseURL: this.#opts.baseURL,\n apiKey: this.#opts.apiKey,\n });\n }\n\n /**\n * Create a new instance of Groq STT.\n *\n * @remarks\n * `apiKey` must be set to your Groq API key, either using the argument or by setting the\n * `GROQ_API_KEY` environment variable.\n */\n static withGroq(\n opts: Partial<{\n model: string | GroqAudioModels;\n apiKey?: string;\n baseURL?: string;\n client: OpenAI;\n language: string;\n detectLanguage: boolean;\n }> = {},\n ): STT {\n opts.apiKey = opts.apiKey || process.env.GROQ_API_KEY;\n if (opts.apiKey === undefined) {\n throw new Error('Groq API key is required, whether as an argument or as $GROQ_API_KEY');\n }\n\n return new STT({\n model: 'whisper-large-v3-turbo',\n baseURL: 'https://api.groq.com/openai/v1',\n ...opts,\n });\n }\n\n /**\n * Create a new instance of OVHcloud AI Endpoints STT.\n *\n * @remarks\n * `apiKey` must be set to your OVHcloud AI Endpoints API key, either using the argument or by setting the\n * `OVHCLOUD_API_KEY` environment variable.\n */\n static withOVHcloud(\n opts: Partial<{\n model: string;\n apiKey?: string;\n baseURL?: string;\n client: OpenAI;\n language: string;\n detectLanguage: boolean;\n }> = {},\n ): STT {\n opts.apiKey = opts.apiKey || process.env.OVHCLOUD_API_KEY;\n if (opts.apiKey === undefined) {\n throw new Error(\n 'OVHcloud AI Endpoints API key is required, whether as an argument or as $OVHCLOUD_API_KEY',\n );\n }\n\n return new STT({\n model: 'whisper-large-v3-turbo',\n baseURL: 'https://oai.endpoints.kepler.ai.cloud.ovh.net/v1',\n ...opts,\n });\n }\n\n #sanitizeOptions(language?: string): STTOptions {\n if (language) {\n return { ...this.#opts, language };\n } else {\n return this.#opts;\n }\n }\n\n #createWav(frame: AudioFrame): Buffer {\n const bitsPerSample = 16;\n const byteRate = (frame.sampleRate * frame.channels * bitsPerSample) / 8;\n const blockAlign = (frame.channels * bitsPerSample) / 8;\n\n const header = Buffer.alloc(44);\n header.write('RIFF', 0);\n header.writeUInt32LE(36 + frame.data.byteLength, 4);\n header.write('WAVE', 8);\n header.write('fmt ', 12);\n header.writeUInt32LE(16, 16);\n header.writeUInt16LE(1, 20);\n header.writeUInt16LE(frame.channels, 22);\n header.writeUInt32LE(frame.sampleRate, 24);\n header.writeUInt32LE(byteRate, 28);\n header.writeUInt16LE(blockAlign, 32);\n header.writeUInt16LE(16, 34);\n header.write('data', 36);\n header.writeUInt32LE(frame.data.byteLength, 40);\n return Buffer.concat([header, Buffer.from(frame.data.buffer)]);\n }\n\n async _recognize(buffer: AudioBuffer, abortSignal?: AbortSignal): Promise<stt.SpeechEvent> {\n const config = this.#sanitizeOptions();\n buffer = mergeFrames(buffer);\n const wavBuffer = this.#createWav(buffer);\n const file = new File([new Uint8Array(wavBuffer)], 'audio.wav', { type: 'audio/wav' });\n\n const resp = await this.#client.audio.transcriptions.create(\n {\n file,\n model: this.#opts.model,\n language: config.language,\n prompt: config.prompt,\n response_format: 'json',\n },\n {\n signal: abortSignal,\n },\n );\n\n return {\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [\n {\n text: resp.text || '',\n language: config.language || '',\n startTime: 0,\n endTime: 0,\n confidence: 0,\n },\n ],\n };\n }\n\n /** This method throws an error; streaming is unsupported on OpenAI STT. */\n stream(): stt.SpeechStream {\n throw new Error('Streaming is not supported on OpenAI STT');\n }\n}\n"],"mappings":"AAGA,SAA2B,aAAa,WAAW;AAEnD,SAAS,cAAc;AAavB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,OAAO;AACT;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA;AAAA,EACA,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASR,YAAY,OAA4B,mBAAmB;AACzD,UAAM,EAAE,WAAW,OAAO,gBAAgB,OAAO,mBAAmB,MAAM,CAAC;AAE3E,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAC7C,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI,MAAM,0EAA0E;AAAA,IAC5F;AAEA,SAAK,UACH,KAAK,MAAM,UACX,IAAI,OAAO;AAAA,MACT,SAAS,KAAK,MAAM;AAAA,MACpB,QAAQ,KAAK,MAAM;AAAA,IACrB,CAAC;AAAA,EACL;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,OAAO,SACL,OAOK,CAAC,GACD;AACL,SAAK,SAAS,KAAK,UAAU,QAAQ,IAAI;AACzC,QAAI,KAAK,WAAW,QAAW;AAC7B,YAAM,IAAI,MAAM,sEAAsE;AAAA,IACxF;AAEA,WAAO,IAAI,IAAI;AAAA,MACb,OAAO;AAAA,MACP,SAAS;AAAA,MACT,GAAG;AAAA,IACL,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,OAAO,aACL,OAOK,CAAC,GACD;AACL,SAAK,SAAS,KAAK,UAAU,QAAQ,IAAI;AACzC,QAAI,KAAK,WAAW,QAAW;AAC7B,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,WAAO,IAAI,IAAI;AAAA,MACb,OAAO;AAAA,MACP,SAAS;AAAA,MACT,GAAG;AAAA,IACL,CAAC;AAAA,EACH;AAAA,EAEA,iBAAiB,UAA+B;AAC9C,QAAI,UAAU;AACZ,aAAO,EAAE,GAAG,KAAK,OAAO,SAAS;AAAA,IACnC,OAAO;AACL,aAAO,KAAK;AAAA,IACd;AAAA,EACF;AAAA,EAEA,WAAW,OAA2B;AACpC,UAAM,gBAAgB;AACtB,UAAM,WAAY,MAAM,aAAa,MAAM,WAAW,gBAAiB;AACvE,UAAM,aAAc,MAAM,WAAW,gBAAiB;AAEtD,UAAM,SAAS,OAAO,MAAM,EAAE;AAC9B,WAAO,MAAM,QAAQ,CAAC;AACtB,WAAO,cAAc,KAAK,MAAM,KAAK,YAAY,CAAC;AAClD,WAAO,MAAM,QAAQ,CAAC;AACtB,WAAO,MAAM,QAAQ,EAAE;AACvB,WAAO,cAAc,IAAI,EAAE;AAC3B,WAAO,cAAc,GAAG,EAAE;AAC1B,WAAO,cAAc,MAAM,UAAU,EAAE;AACvC,WAAO,cAAc,MAAM,YAAY,EAAE;AACzC,WAAO,cAAc,UAAU,EAAE;AACjC,WAAO,cAAc,YAAY,EAAE;AACnC,WAAO,cAAc,IAAI,EAAE;AAC3B,WAAO,MAAM,QAAQ,EAAE;AACvB,WAAO,cAAc,MAAM,KAAK,YAAY,EAAE;AAC9C,WAAO,OAAO,OAAO,CAAC,QAAQ,OAAO,KAAK,MAAM,KAAK,MAAM,CAAC,CAAC;AAAA,EAC/D;AAAA,EAEA,MAAM,WAAW,QAAqB,aAAqD;AACzF,UAAM,SAAS,KAAK,iBAAiB;AACrC,aAAS,YAAY,MAAM;AAC3B,UAAM,YAAY,KAAK,WAAW,MAAM;AACxC,UAAM,OAAO,IAAI,KAAK,CAAC,IAAI,WAAW,SAAS,CAAC,GAAG,aAAa,EAAE,MAAM,YAAY,CAAC;AAErF,UAAM,OAAO,MAAM,KAAK,QAAQ,MAAM,eAAe;AAAA,MACnD;AAAA,QACE;AAAA,QACA,OAAO,KAAK,MAAM;AAAA,QAClB,UAAU,OAAO;AAAA,QACjB,QAAQ,OAAO;AAAA,QACf,iBAAiB;AAAA,MACnB;AAAA,MACA;AAAA,QACE,QAAQ;AAAA,MACV;AAAA,IACF;AAEA,WAAO;AAAA,MACL,MAAM,IAAI,gBAAgB;AAAA,MAC1B,cAAc;AAAA,QACZ;AAAA,UACE,MAAM,KAAK,QAAQ;AAAA,UACnB,UAAU,OAAO,YAAY;AAAA,UAC7B,WAAW;AAAA,UACX,SAAS;AAAA,UACT,YAAY;AAAA,QACd;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA;AAAA,EAGA,SAA2B;AACzB,UAAM,IAAI,MAAM,0CAA0C;AAAA,EAC5D;AACF;","names":[]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@livekit/agents-plugin-openai",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.39",
|
|
4
4
|
"description": "OpenAI plugin for LiveKit Node Agents",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"require": "dist/index.cjs",
|
|
@@ -30,9 +30,9 @@
|
|
|
30
30
|
"@types/ws": "^8.5.10",
|
|
31
31
|
"tsup": "^8.3.5",
|
|
32
32
|
"typescript": "^5.0.0",
|
|
33
|
-
"@livekit/agents": "1.0.
|
|
34
|
-
"@livekit/agents-plugin-silero": "1.0.
|
|
35
|
-
"@livekit/agents-plugins-test": "1.0.
|
|
33
|
+
"@livekit/agents": "1.0.39",
|
|
34
|
+
"@livekit/agents-plugin-silero": "1.0.39",
|
|
35
|
+
"@livekit/agents-plugins-test": "1.0.39"
|
|
36
36
|
},
|
|
37
37
|
"dependencies": {
|
|
38
38
|
"@livekit/mutex": "^1.1.1",
|
|
@@ -41,7 +41,7 @@
|
|
|
41
41
|
},
|
|
42
42
|
"peerDependencies": {
|
|
43
43
|
"@livekit/rtc-node": "^0.13.24",
|
|
44
|
-
"@livekit/agents": "1.0.
|
|
44
|
+
"@livekit/agents": "1.0.39"
|
|
45
45
|
},
|
|
46
46
|
"scripts": {
|
|
47
47
|
"build": "tsup --onSuccess \"pnpm build:types\"",
|
package/src/stt.ts
CHANGED
|
@@ -36,7 +36,7 @@ export class STT extends stt.STT {
|
|
|
36
36
|
* `OPENAI_API_KEY` environment variable.
|
|
37
37
|
*/
|
|
38
38
|
constructor(opts: Partial<STTOptions> = defaultSTTOptions) {
|
|
39
|
-
super({ streaming: false, interimResults: false });
|
|
39
|
+
super({ streaming: false, interimResults: false, alignedTranscript: false });
|
|
40
40
|
|
|
41
41
|
this.#opts = { ...defaultSTTOptions, ...opts };
|
|
42
42
|
if (this.#opts.apiKey === undefined) {
|