@livekit/agents-plugin-google 1.0.24 → 1.0.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/beta/gemini_tts.cjs +9 -5
- package/dist/beta/gemini_tts.cjs.map +1 -1
- package/dist/beta/gemini_tts.d.cts +3 -3
- package/dist/beta/gemini_tts.d.ts +3 -3
- package/dist/beta/gemini_tts.d.ts.map +1 -1
- package/dist/beta/gemini_tts.js +9 -5
- package/dist/beta/gemini_tts.js.map +1 -1
- package/package.json +5 -5
- package/src/beta/gemini_tts.ts +18 -4
package/dist/beta/gemini_tts.cjs
CHANGED
|
@@ -92,8 +92,8 @@ class TTS extends import_agents.tts.TTS {
|
|
|
92
92
|
};
|
|
93
93
|
this.#client = new import_genai.GoogleGenAI(clientOptions);
|
|
94
94
|
}
|
|
95
|
-
synthesize(text) {
|
|
96
|
-
return new ChunkedStream(text, this);
|
|
95
|
+
synthesize(text, connOptions, abortSignal) {
|
|
96
|
+
return new ChunkedStream(text, this, connOptions, abortSignal);
|
|
97
97
|
}
|
|
98
98
|
/**
|
|
99
99
|
* Update the TTS options.
|
|
@@ -118,8 +118,8 @@ class TTS extends import_agents.tts.TTS {
|
|
|
118
118
|
class ChunkedStream extends import_agents.tts.ChunkedStream {
|
|
119
119
|
#tts;
|
|
120
120
|
label = "google.gemini.ChunkedStream";
|
|
121
|
-
constructor(inputText, tts2) {
|
|
122
|
-
super(inputText, tts2);
|
|
121
|
+
constructor(inputText, tts2, connOptions, abortSignal) {
|
|
122
|
+
super(inputText, tts2, connOptions, abortSignal);
|
|
123
123
|
this.#tts = tts2;
|
|
124
124
|
}
|
|
125
125
|
async run() {
|
|
@@ -133,7 +133,8 @@ class ChunkedStream extends import_agents.tts.ChunkedStream {
|
|
|
133
133
|
voiceName: this.#tts.opts.voiceName
|
|
134
134
|
}
|
|
135
135
|
}
|
|
136
|
-
}
|
|
136
|
+
},
|
|
137
|
+
abortSignal: this.abortSignal
|
|
137
138
|
};
|
|
138
139
|
let inputText = this.inputText;
|
|
139
140
|
if (this.#tts.opts.instructions) {
|
|
@@ -156,6 +157,9 @@ class ChunkedStream extends import_agents.tts.ChunkedStream {
|
|
|
156
157
|
await this.#processResponse(response, bstream, requestId);
|
|
157
158
|
}
|
|
158
159
|
} catch (error) {
|
|
160
|
+
if (error instanceof Error && error.name === "AbortError") {
|
|
161
|
+
return;
|
|
162
|
+
}
|
|
159
163
|
if ((0, import_agents.isAPIError)(error)) throw error;
|
|
160
164
|
const err = error;
|
|
161
165
|
if (err.code && err.code >= 400 && err.code < 500) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/beta/gemini_tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type * as types from '@google/genai';\nimport { GoogleGenAI } from '@google/genai';\nimport {\n APIConnectionError,\n APIStatusError,\n AudioByteStream,\n isAPIError,\n shortuuid,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\n\nexport type GeminiTTSModels = 'gemini-2.5-flash-preview-tts' | 'gemini-2.5-pro-preview-tts';\n\nexport type GeminiVoices =\n | 'Zephyr'\n | 'Puck'\n | 'Charon'\n | 'Kore'\n | 'Fenrir'\n | 'Leda'\n | 'Orus'\n | 'Aoede'\n | 'Callirrhoe'\n | 'Autonoe'\n | 'Enceladus'\n | 'Iapetus'\n | 'Umbriel'\n | 'Algieba'\n | 'Despina'\n | 'Erinome'\n | 'Algenib'\n | 'Rasalgethi'\n | 'Laomedeia'\n | 'Achernar'\n | 'Alnilam'\n | 'Schedar'\n | 'Gacrux'\n | 'Pulcherrima'\n | 'Achird'\n | 'Zubenelgenubi'\n | 'Vindemiatrix'\n | 'Sadachbia'\n | 'Sadaltager'\n | 'Sulafat';\n\nconst DEFAULT_MODEL: GeminiTTSModels = 'gemini-2.5-flash-preview-tts';\nconst DEFAULT_VOICE: GeminiVoices = 'Kore';\nconst DEFAULT_SAMPLE_RATE = 24000; // not configurable\nconst NUM_CHANNELS = 1;\nconst DEFAULT_INSTRUCTIONS = \"Say the text with a proper tone, don't omit or add any words\";\n\nexport interface TTSOptions {\n model: GeminiTTSModels | string;\n voiceName: GeminiVoices | string;\n vertexai: boolean;\n project?: string;\n location?: string;\n instructions?: string;\n}\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n #client: GoogleGenAI;\n label = 'google.gemini.TTS';\n\n /**\n * Create a new instance of Gemini TTS.\n *\n * Environment Requirements:\n * - For VertexAI: Set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to the path of the service account key file.\n * - For Google Gemini API: Set the `apiKey` argument or the `GOOGLE_API_KEY` environment variable.\n *\n * @param opts - Configuration options for Gemini TTS\n */\n constructor({\n model = DEFAULT_MODEL,\n voiceName = DEFAULT_VOICE,\n apiKey,\n vertexai,\n project,\n location,\n instructions,\n }: Partial<TTSOptions & { apiKey: string }> = {}) {\n super(DEFAULT_SAMPLE_RATE, NUM_CHANNELS, { streaming: false });\n\n const gcpProject: string | undefined = project || process.env.GOOGLE_CLOUD_PROJECT;\n const gcpLocation: string | undefined =\n location || process.env.GOOGLE_CLOUD_LOCATION || 'us-central1';\n const useVertexai = vertexai ?? process.env.GOOGLE_GENAI_USE_VERTEXAI === 'true';\n const geminiApiKey = apiKey || process.env.GOOGLE_API_KEY;\n\n let finalProject: string | undefined = gcpProject;\n let finalLocation: string | undefined = gcpLocation;\n let finalApiKey: string | undefined = geminiApiKey;\n\n if (useVertexai) {\n if (!finalProject) {\n throw new APIConnectionError({\n message:\n 'Project ID is required for Vertex AI. Set via project option or GOOGLE_CLOUD_PROJECT environment variable',\n });\n }\n finalApiKey = undefined;\n } else {\n finalProject = undefined;\n finalLocation = undefined;\n if (!finalApiKey) {\n throw new APIConnectionError({\n message:\n 'API key is required for Google API either via apiKey or GOOGLE_API_KEY environment variable',\n });\n }\n }\n\n this.#opts = {\n model,\n voiceName,\n vertexai: useVertexai,\n project: finalProject,\n location: finalLocation,\n instructions: instructions ?? DEFAULT_INSTRUCTIONS,\n };\n\n const clientOptions: types.GoogleGenAIOptions = useVertexai\n ? {\n vertexai: true,\n project: finalProject,\n location: finalLocation,\n }\n : {\n apiKey: finalApiKey,\n };\n\n this.#client = new GoogleGenAI(clientOptions);\n }\n\n synthesize(text: string): ChunkedStream {\n return new ChunkedStream(text, this);\n }\n\n /**\n * Update the TTS options.\n *\n * @param opts - Options to update\n */\n updateOptions(opts: { voiceName?: GeminiVoices | string }) {\n if (opts.voiceName !== undefined) {\n this.#opts.voiceName = opts.voiceName;\n }\n }\n\n stream(): tts.SynthesizeStream {\n throw new Error('Streaming is not supported on Gemini TTS');\n }\n\n get opts(): TTSOptions {\n return this.#opts;\n }\n\n get client(): GoogleGenAI {\n return this.#client;\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n #tts: TTS;\n label = 'google.gemini.ChunkedStream';\n\n constructor(inputText: string, tts: TTS) {\n super(inputText, tts);\n this.#tts = tts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#tts.sampleRate, this.#tts.numChannels);\n\n const config: types.GenerateContentConfig = {\n responseModalities: ['AUDIO'],\n speechConfig: {\n voiceConfig: {\n prebuiltVoiceConfig: {\n voiceName: this.#tts.opts.voiceName,\n },\n },\n },\n };\n\n let inputText = this.inputText;\n if (this.#tts.opts.instructions) {\n inputText = `${this.#tts.opts.instructions}:\\n\"${inputText}\"`;\n }\n\n const contents: types.Content[] = [\n {\n role: 'user',\n parts: [{ text: inputText }],\n },\n ];\n\n const responseStream = await this.#tts.client.models.generateContentStream({\n model: this.#tts.opts.model,\n contents,\n config,\n });\n\n try {\n for await (const response of responseStream) {\n await this.#processResponse(response, bstream, requestId);\n }\n } catch (error: unknown) {\n if (isAPIError(error)) throw error;\n\n const err = error as {\n code?: number;\n message?: string;\n status?: string;\n type?: string;\n };\n\n if (err.code && err.code >= 400 && err.code < 500) {\n if (err.code === 429) {\n throw new APIStatusError({\n message: `Gemini TTS: Rate limit error - ${err.message || 'Unknown error'}`,\n options: {\n statusCode: 429,\n retryable: true,\n },\n });\n } else {\n throw new APIStatusError({\n message: `Gemini TTS: Client error (${err.code}) - ${err.message || 'Unknown error'}`,\n options: {\n statusCode: err.code,\n retryable: false,\n },\n });\n }\n }\n\n if (err.code && err.code >= 500) {\n throw new APIStatusError({\n message: `Gemini TTS: Server error (${err.code}) - ${err.message || 'Unknown error'}`,\n options: {\n statusCode: err.code,\n retryable: true,\n },\n });\n }\n\n throw new APIConnectionError({\n message: `Gemini TTS: Connection error - ${err.message || 'Unknown error'}`,\n options: { retryable: true },\n });\n } finally {\n this.queue.close();\n }\n }\n\n async #processResponse(\n response: types.GenerateContentResponse,\n bstream: AudioByteStream,\n requestId: string,\n ) {\n if (!response.candidates || response.candidates.length === 0) {\n return;\n }\n\n const candidate = response.candidates[0];\n if (!candidate || !candidate.content?.parts) {\n return;\n }\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (final: boolean) => {\n if (lastFrame) {\n this.queue.put({\n requestId,\n frame: lastFrame,\n segmentId: requestId,\n final,\n });\n lastFrame = undefined;\n }\n };\n\n for (const part of candidate.content.parts) {\n if (part.inlineData?.data && part.inlineData.mimeType?.startsWith('audio/')) {\n const audioBuffer = Buffer.from(part.inlineData.data, 'base64');\n\n for (const frame of bstream.write(audioBuffer)) {\n sendLastFrame(false);\n lastFrame = frame;\n }\n }\n }\n\n for (const frame of bstream.flush()) {\n sendLastFrame(false);\n lastFrame = frame;\n }\n\n sendLastFrame(true);\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,mBAA4B;AAC5B,oBAOO;AAqCP,MAAM,gBAAiC;AACvC,MAAM,gBAA8B;AACpC,MAAM,sBAAsB;AAC5B,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAWtB,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA;AAAA,EACA,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWR,YAAY;AAAA,IACV,QAAQ;AAAA,IACR,YAAY;AAAA,IACZ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,IAA8C,CAAC,GAAG;AAChD,UAAM,qBAAqB,cAAc,EAAE,WAAW,MAAM,CAAC;AAE7D,UAAM,aAAiC,WAAW,QAAQ,IAAI;AAC9D,UAAM,cACJ,YAAY,QAAQ,IAAI,yBAAyB;AACnD,UAAM,cAAc,YAAY,QAAQ,IAAI,8BAA8B;AAC1E,UAAM,eAAe,UAAU,QAAQ,IAAI;AAE3C,QAAI,eAAmC;AACvC,QAAI,gBAAoC;AACxC,QAAI,cAAkC;AAEtC,QAAI,aAAa;AACf,UAAI,CAAC,cAAc;AACjB,cAAM,IAAI,iCAAmB;AAAA,UAC3B,SACE;AAAA,QACJ,CAAC;AAAA,MACH;AACA,oBAAc;AAAA,IAChB,OAAO;AACL,qBAAe;AACf,sBAAgB;AAChB,UAAI,CAAC,aAAa;AAChB,cAAM,IAAI,iCAAmB;AAAA,UAC3B,SACE;AAAA,QACJ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,SAAK,QAAQ;AAAA,MACX;AAAA,MACA;AAAA,MACA,UAAU;AAAA,MACV,SAAS;AAAA,MACT,UAAU;AAAA,MACV,cAAc,gBAAgB;AAAA,IAChC;AAEA,UAAM,gBAA0C,cAC5C;AAAA,MACE,UAAU;AAAA,MACV,SAAS;AAAA,MACT,UAAU;AAAA,IACZ,IACA;AAAA,MACE,QAAQ;AAAA,IACV;AAEJ,SAAK,UAAU,IAAI,yBAAY,aAAa;AAAA,EAC9C;AAAA,EAEA,WAAW,MAA6B;AACtC,WAAO,IAAI,cAAc,MAAM,IAAI;AAAA,EACrC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,cAAc,MAA6C;AACzD,QAAI,KAAK,cAAc,QAAW;AAChC,WAAK,MAAM,YAAY,KAAK;AAAA,IAC9B;AAAA,EACF;AAAA,EAEA,SAA+B;AAC7B,UAAM,IAAI,MAAM,0CAA0C;AAAA,EAC5D;AAAA,EAEA,IAAI,OAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,SAAsB;AACxB,WAAO,KAAK;AAAA,EACd;AACF;AAEO,MAAM,sBAAsB,kBAAI,cAAc;AAAA,EACnD;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,WAAmBA,MAAU;AACvC,UAAM,WAAWA,IAAG;AACpB,SAAK,OAAOA;AAAA,EACd;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,UAAM,UAAU,IAAI,8BAAgB,KAAK,KAAK,YAAY,KAAK,KAAK,WAAW;AAE/E,UAAM,SAAsC;AAAA,MAC1C,oBAAoB,CAAC,OAAO;AAAA,MAC5B,cAAc;AAAA,QACZ,aAAa;AAAA,UACX,qBAAqB;AAAA,YACnB,WAAW,KAAK,KAAK,KAAK;AAAA,UAC5B;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,QAAI,YAAY,KAAK;AACrB,QAAI,KAAK,KAAK,KAAK,cAAc;AAC/B,kBAAY,GAAG,KAAK,KAAK,KAAK,YAAY;AAAA,GAAO,SAAS;AAAA,IAC5D;AAEA,UAAM,WAA4B;AAAA,MAChC;AAAA,QACE,MAAM;AAAA,QACN,OAAO,CAAC,EAAE,MAAM,UAAU,CAAC;AAAA,MAC7B;AAAA,IACF;AAEA,UAAM,iBAAiB,MAAM,KAAK,KAAK,OAAO,OAAO,sBAAsB;AAAA,MACzE,OAAO,KAAK,KAAK,KAAK;AAAA,MACtB;AAAA,MACA;AAAA,IACF,CAAC;AAED,QAAI;AACF,uBAAiB,YAAY,gBAAgB;AAC3C,cAAM,KAAK,iBAAiB,UAAU,SAAS,SAAS;AAAA,MAC1D;AAAA,IACF,SAAS,OAAgB;AACvB,cAAI,0BAAW,KAAK,EAAG,OAAM;AAE7B,YAAM,MAAM;AAOZ,UAAI,IAAI,QAAQ,IAAI,QAAQ,OAAO,IAAI,OAAO,KAAK;AACjD,YAAI,IAAI,SAAS,KAAK;AACpB,gBAAM,IAAI,6BAAe;AAAA,YACvB,SAAS,kCAAkC,IAAI,WAAW,eAAe;AAAA,YACzE,SAAS;AAAA,cACP,YAAY;AAAA,cACZ,WAAW;AAAA,YACb;AAAA,UACF,CAAC;AAAA,QACH,OAAO;AACL,gBAAM,IAAI,6BAAe;AAAA,YACvB,SAAS,6BAA6B,IAAI,IAAI,OAAO,IAAI,WAAW,eAAe;AAAA,YACnF,SAAS;AAAA,cACP,YAAY,IAAI;AAAA,cAChB,WAAW;AAAA,YACb;AAAA,UACF,CAAC;AAAA,QACH;AAAA,MACF;AAEA,UAAI,IAAI,QAAQ,IAAI,QAAQ,KAAK;AAC/B,cAAM,IAAI,6BAAe;AAAA,UACvB,SAAS,6BAA6B,IAAI,IAAI,OAAO,IAAI,WAAW,eAAe;AAAA,UACnF,SAAS;AAAA,YACP,YAAY,IAAI;AAAA,YAChB,WAAW;AAAA,UACb;AAAA,QACF,CAAC;AAAA,MACH;AAEA,YAAM,IAAI,iCAAmB;AAAA,QAC3B,SAAS,kCAAkC,IAAI,WAAW,eAAe;AAAA,QACzE,SAAS,EAAE,WAAW,KAAK;AAAA,MAC7B,CAAC;AAAA,IACH,UAAE;AACA,WAAK,MAAM,MAAM;AAAA,IACnB;AAAA,EACF;AAAA,EAEA,MAAM,iBACJ,UACA,SACA,WACA;AA3QJ;AA4QI,QAAI,CAAC,SAAS,cAAc,SAAS,WAAW,WAAW,GAAG;AAC5D;AAAA,IACF;AAEA,UAAM,YAAY,SAAS,WAAW,CAAC;AACvC,QAAI,CAAC,aAAa,GAAC,eAAU,YAAV,mBAAmB,QAAO;AAC3C;AAAA,IACF;AAEA,QAAI;AACJ,UAAM,gBAAgB,CAAC,UAAmB;AACxC,UAAI,WAAW;AACb,aAAK,MAAM,IAAI;AAAA,UACb;AAAA,UACA,OAAO;AAAA,UACP,WAAW;AAAA,UACX;AAAA,QACF,CAAC;AACD,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,eAAW,QAAQ,UAAU,QAAQ,OAAO;AAC1C,YAAI,UAAK,eAAL,mBAAiB,WAAQ,UAAK,WAAW,aAAhB,mBAA0B,WAAW,YAAW;AAC3E,cAAM,cAAc,OAAO,KAAK,KAAK,WAAW,MAAM,QAAQ;AAE9D,mBAAW,SAAS,QAAQ,MAAM,WAAW,GAAG;AAC9C,wBAAc,KAAK;AACnB,sBAAY;AAAA,QACd;AAAA,MACF;AAAA,IACF;AAEA,eAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,oBAAc,KAAK;AACnB,kBAAY;AAAA,IACd;AAEA,kBAAc,IAAI;AAAA,EACpB;AACF;","names":["tts"]}
|
|
1
|
+
{"version":3,"sources":["../../src/beta/gemini_tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type * as types from '@google/genai';\nimport { GoogleGenAI } from '@google/genai';\nimport {\n type APIConnectOptions,\n APIConnectionError,\n APIStatusError,\n AudioByteStream,\n isAPIError,\n shortuuid,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\n\nexport type GeminiTTSModels = 'gemini-2.5-flash-preview-tts' | 'gemini-2.5-pro-preview-tts';\n\nexport type GeminiVoices =\n | 'Zephyr'\n | 'Puck'\n | 'Charon'\n | 'Kore'\n | 'Fenrir'\n | 'Leda'\n | 'Orus'\n | 'Aoede'\n | 'Callirrhoe'\n | 'Autonoe'\n | 'Enceladus'\n | 'Iapetus'\n | 'Umbriel'\n | 'Algieba'\n | 'Despina'\n | 'Erinome'\n | 'Algenib'\n | 'Rasalgethi'\n | 'Laomedeia'\n | 'Achernar'\n | 'Alnilam'\n | 'Schedar'\n | 'Gacrux'\n | 'Pulcherrima'\n | 'Achird'\n | 'Zubenelgenubi'\n | 'Vindemiatrix'\n | 'Sadachbia'\n | 'Sadaltager'\n | 'Sulafat';\n\nconst DEFAULT_MODEL: GeminiTTSModels = 'gemini-2.5-flash-preview-tts';\nconst DEFAULT_VOICE: GeminiVoices = 'Kore';\nconst DEFAULT_SAMPLE_RATE = 24000; // not configurable\nconst NUM_CHANNELS = 1;\nconst DEFAULT_INSTRUCTIONS = \"Say the text with a proper tone, don't omit or add any words\";\n\nexport interface TTSOptions {\n model: GeminiTTSModels | string;\n voiceName: GeminiVoices | string;\n vertexai: boolean;\n project?: string;\n location?: string;\n instructions?: string;\n}\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n #client: GoogleGenAI;\n label = 'google.gemini.TTS';\n\n /**\n * Create a new instance of Gemini TTS.\n *\n * Environment Requirements:\n * - For VertexAI: Set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to the path of the service account key file.\n * - For Google Gemini API: Set the `apiKey` argument or the `GOOGLE_API_KEY` environment variable.\n *\n * @param opts - Configuration options for Gemini TTS\n */\n constructor({\n model = DEFAULT_MODEL,\n voiceName = DEFAULT_VOICE,\n apiKey,\n vertexai,\n project,\n location,\n instructions,\n }: Partial<TTSOptions & { apiKey: string }> = {}) {\n super(DEFAULT_SAMPLE_RATE, NUM_CHANNELS, { streaming: false });\n\n const gcpProject: string | undefined = project || process.env.GOOGLE_CLOUD_PROJECT;\n const gcpLocation: string | undefined =\n location || process.env.GOOGLE_CLOUD_LOCATION || 'us-central1';\n const useVertexai = vertexai ?? process.env.GOOGLE_GENAI_USE_VERTEXAI === 'true';\n const geminiApiKey = apiKey || process.env.GOOGLE_API_KEY;\n\n let finalProject: string | undefined = gcpProject;\n let finalLocation: string | undefined = gcpLocation;\n let finalApiKey: string | undefined = geminiApiKey;\n\n if (useVertexai) {\n if (!finalProject) {\n throw new APIConnectionError({\n message:\n 'Project ID is required for Vertex AI. Set via project option or GOOGLE_CLOUD_PROJECT environment variable',\n });\n }\n finalApiKey = undefined;\n } else {\n finalProject = undefined;\n finalLocation = undefined;\n if (!finalApiKey) {\n throw new APIConnectionError({\n message:\n 'API key is required for Google API either via apiKey or GOOGLE_API_KEY environment variable',\n });\n }\n }\n\n this.#opts = {\n model,\n voiceName,\n vertexai: useVertexai,\n project: finalProject,\n location: finalLocation,\n instructions: instructions ?? DEFAULT_INSTRUCTIONS,\n };\n\n const clientOptions: types.GoogleGenAIOptions = useVertexai\n ? {\n vertexai: true,\n project: finalProject,\n location: finalLocation,\n }\n : {\n apiKey: finalApiKey,\n };\n\n this.#client = new GoogleGenAI(clientOptions);\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): ChunkedStream {\n return new ChunkedStream(text, this, connOptions, abortSignal);\n }\n\n /**\n * Update the TTS options.\n *\n * @param opts - Options to update\n */\n updateOptions(opts: { voiceName?: GeminiVoices | string }) {\n if (opts.voiceName !== undefined) {\n this.#opts.voiceName = opts.voiceName;\n }\n }\n\n stream(): tts.SynthesizeStream {\n throw new Error('Streaming is not supported on Gemini TTS');\n }\n\n get opts(): TTSOptions {\n return this.#opts;\n }\n\n get client(): GoogleGenAI {\n return this.#client;\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n #tts: TTS;\n label = 'google.gemini.ChunkedStream';\n\n constructor(\n inputText: string,\n tts: TTS,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(inputText, tts, connOptions, abortSignal);\n this.#tts = tts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#tts.sampleRate, this.#tts.numChannels);\n\n const config: types.GenerateContentConfig = {\n responseModalities: ['AUDIO'],\n speechConfig: {\n voiceConfig: {\n prebuiltVoiceConfig: {\n voiceName: this.#tts.opts.voiceName,\n },\n },\n },\n abortSignal: this.abortSignal,\n };\n\n let inputText = this.inputText;\n if (this.#tts.opts.instructions) {\n inputText = `${this.#tts.opts.instructions}:\\n\"${inputText}\"`;\n }\n\n const contents: types.Content[] = [\n {\n role: 'user',\n parts: [{ text: inputText }],\n },\n ];\n\n const responseStream = await this.#tts.client.models.generateContentStream({\n model: this.#tts.opts.model,\n contents,\n config,\n });\n\n try {\n for await (const response of responseStream) {\n await this.#processResponse(response, bstream, requestId);\n }\n } catch (error: unknown) {\n if (error instanceof Error && error.name === 'AbortError') {\n return;\n }\n if (isAPIError(error)) throw error;\n\n const err = error as {\n code?: number;\n message?: string;\n status?: string;\n type?: string;\n };\n\n if (err.code && err.code >= 400 && err.code < 500) {\n if (err.code === 429) {\n throw new APIStatusError({\n message: `Gemini TTS: Rate limit error - ${err.message || 'Unknown error'}`,\n options: {\n statusCode: 429,\n retryable: true,\n },\n });\n } else {\n throw new APIStatusError({\n message: `Gemini TTS: Client error (${err.code}) - ${err.message || 'Unknown error'}`,\n options: {\n statusCode: err.code,\n retryable: false,\n },\n });\n }\n }\n\n if (err.code && err.code >= 500) {\n throw new APIStatusError({\n message: `Gemini TTS: Server error (${err.code}) - ${err.message || 'Unknown error'}`,\n options: {\n statusCode: err.code,\n retryable: true,\n },\n });\n }\n\n throw new APIConnectionError({\n message: `Gemini TTS: Connection error - ${err.message || 'Unknown error'}`,\n options: { retryable: true },\n });\n } finally {\n this.queue.close();\n }\n }\n\n async #processResponse(\n response: types.GenerateContentResponse,\n bstream: AudioByteStream,\n requestId: string,\n ) {\n if (!response.candidates || response.candidates.length === 0) {\n return;\n }\n\n const candidate = response.candidates[0];\n if (!candidate || !candidate.content?.parts) {\n return;\n }\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (final: boolean) => {\n if (lastFrame) {\n this.queue.put({\n requestId,\n frame: lastFrame,\n segmentId: requestId,\n final,\n });\n lastFrame = undefined;\n }\n };\n\n for (const part of candidate.content.parts) {\n if (part.inlineData?.data && part.inlineData.mimeType?.startsWith('audio/')) {\n const audioBuffer = Buffer.from(part.inlineData.data, 'base64');\n\n for (const frame of bstream.write(audioBuffer)) {\n sendLastFrame(false);\n lastFrame = frame;\n }\n }\n }\n\n for (const frame of bstream.flush()) {\n sendLastFrame(false);\n lastFrame = frame;\n }\n\n sendLastFrame(true);\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,mBAA4B;AAC5B,oBAQO;AAqCP,MAAM,gBAAiC;AACvC,MAAM,gBAA8B;AACpC,MAAM,sBAAsB;AAC5B,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAWtB,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA;AAAA,EACA,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWR,YAAY;AAAA,IACV,QAAQ;AAAA,IACR,YAAY;AAAA,IACZ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,IAA8C,CAAC,GAAG;AAChD,UAAM,qBAAqB,cAAc,EAAE,WAAW,MAAM,CAAC;AAE7D,UAAM,aAAiC,WAAW,QAAQ,IAAI;AAC9D,UAAM,cACJ,YAAY,QAAQ,IAAI,yBAAyB;AACnD,UAAM,cAAc,YAAY,QAAQ,IAAI,8BAA8B;AAC1E,UAAM,eAAe,UAAU,QAAQ,IAAI;AAE3C,QAAI,eAAmC;AACvC,QAAI,gBAAoC;AACxC,QAAI,cAAkC;AAEtC,QAAI,aAAa;AACf,UAAI,CAAC,cAAc;AACjB,cAAM,IAAI,iCAAmB;AAAA,UAC3B,SACE;AAAA,QACJ,CAAC;AAAA,MACH;AACA,oBAAc;AAAA,IAChB,OAAO;AACL,qBAAe;AACf,sBAAgB;AAChB,UAAI,CAAC,aAAa;AAChB,cAAM,IAAI,iCAAmB;AAAA,UAC3B,SACE;AAAA,QACJ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,SAAK,QAAQ;AAAA,MACX;AAAA,MACA;AAAA,MACA,UAAU;AAAA,MACV,SAAS;AAAA,MACT,UAAU;AAAA,MACV,cAAc,gBAAgB;AAAA,IAChC;AAEA,UAAM,gBAA0C,cAC5C;AAAA,MACE,UAAU;AAAA,MACV,SAAS;AAAA,MACT,UAAU;AAAA,IACZ,IACA;AAAA,MACE,QAAQ;AAAA,IACV;AAEJ,SAAK,UAAU,IAAI,yBAAY,aAAa;AAAA,EAC9C;AAAA,EAEA,WACE,MACA,aACA,aACe;AACf,WAAO,IAAI,cAAc,MAAM,MAAM,aAAa,WAAW;AAAA,EAC/D;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,cAAc,MAA6C;AACzD,QAAI,KAAK,cAAc,QAAW;AAChC,WAAK,MAAM,YAAY,KAAK;AAAA,IAC9B;AAAA,EACF;AAAA,EAEA,SAA+B;AAC7B,UAAM,IAAI,MAAM,0CAA0C;AAAA,EAC5D;AAAA,EAEA,IAAI,OAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,SAAsB;AACxB,WAAO,KAAK;AAAA,EACd;AACF;AAEO,MAAM,sBAAsB,kBAAI,cAAc;AAAA,EACnD;AAAA,EACA,QAAQ;AAAA,EAER,YACE,WACAA,MACA,aACA,aACA;AACA,UAAM,WAAWA,MAAK,aAAa,WAAW;AAC9C,SAAK,OAAOA;AAAA,EACd;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,UAAM,UAAU,IAAI,8BAAgB,KAAK,KAAK,YAAY,KAAK,KAAK,WAAW;AAE/E,UAAM,SAAsC;AAAA,MAC1C,oBAAoB,CAAC,OAAO;AAAA,MAC5B,cAAc;AAAA,QACZ,aAAa;AAAA,UACX,qBAAqB;AAAA,YACnB,WAAW,KAAK,KAAK,KAAK;AAAA,UAC5B;AAAA,QACF;AAAA,MACF;AAAA,MACA,aAAa,KAAK;AAAA,IACpB;AAEA,QAAI,YAAY,KAAK;AACrB,QAAI,KAAK,KAAK,KAAK,cAAc;AAC/B,kBAAY,GAAG,KAAK,KAAK,KAAK,YAAY;AAAA,GAAO,SAAS;AAAA,IAC5D;AAEA,UAAM,WAA4B;AAAA,MAChC;AAAA,QACE,MAAM;AAAA,QACN,OAAO,CAAC,EAAE,MAAM,UAAU,CAAC;AAAA,MAC7B;AAAA,IACF;AAEA,UAAM,iBAAiB,MAAM,KAAK,KAAK,OAAO,OAAO,sBAAsB;AAAA,MACzE,OAAO,KAAK,KAAK,KAAK;AAAA,MACtB;AAAA,MACA;AAAA,IACF,CAAC;AAED,QAAI;AACF,uBAAiB,YAAY,gBAAgB;AAC3C,cAAM,KAAK,iBAAiB,UAAU,SAAS,SAAS;AAAA,MAC1D;AAAA,IACF,SAAS,OAAgB;AACvB,UAAI,iBAAiB,SAAS,MAAM,SAAS,cAAc;AACzD;AAAA,MACF;AACA,cAAI,0BAAW,KAAK,EAAG,OAAM;AAE7B,YAAM,MAAM;AAOZ,UAAI,IAAI,QAAQ,IAAI,QAAQ,OAAO,IAAI,OAAO,KAAK;AACjD,YAAI,IAAI,SAAS,KAAK;AACpB,gBAAM,IAAI,6BAAe;AAAA,YACvB,SAAS,kCAAkC,IAAI,WAAW,eAAe;AAAA,YACzE,SAAS;AAAA,cACP,YAAY;AAAA,cACZ,WAAW;AAAA,YACb;AAAA,UACF,CAAC;AAAA,QACH,OAAO;AACL,gBAAM,IAAI,6BAAe;AAAA,YACvB,SAAS,6BAA6B,IAAI,IAAI,OAAO,IAAI,WAAW,eAAe;AAAA,YACnF,SAAS;AAAA,cACP,YAAY,IAAI;AAAA,cAChB,WAAW;AAAA,YACb;AAAA,UACF,CAAC;AAAA,QACH;AAAA,MACF;AAEA,UAAI,IAAI,QAAQ,IAAI,QAAQ,KAAK;AAC/B,cAAM,IAAI,6BAAe;AAAA,UACvB,SAAS,6BAA6B,IAAI,IAAI,OAAO,IAAI,WAAW,eAAe;AAAA,UACnF,SAAS;AAAA,YACP,YAAY,IAAI;AAAA,YAChB,WAAW;AAAA,UACb;AAAA,QACF,CAAC;AAAA,MACH;AAEA,YAAM,IAAI,iCAAmB;AAAA,QAC3B,SAAS,kCAAkC,IAAI,WAAW,eAAe;AAAA,QACzE,SAAS,EAAE,WAAW,KAAK;AAAA,MAC7B,CAAC;AAAA,IACH,UAAE;AACA,WAAK,MAAM,MAAM;AAAA,IACnB;AAAA,EACF;AAAA,EAEA,MAAM,iBACJ,UACA,SACA,WACA;AAzRJ;AA0RI,QAAI,CAAC,SAAS,cAAc,SAAS,WAAW,WAAW,GAAG;AAC5D;AAAA,IACF;AAEA,UAAM,YAAY,SAAS,WAAW,CAAC;AACvC,QAAI,CAAC,aAAa,GAAC,eAAU,YAAV,mBAAmB,QAAO;AAC3C;AAAA,IACF;AAEA,QAAI;AACJ,UAAM,gBAAgB,CAAC,UAAmB;AACxC,UAAI,WAAW;AACb,aAAK,MAAM,IAAI;AAAA,UACb;AAAA,UACA,OAAO;AAAA,UACP,WAAW;AAAA,UACX;AAAA,QACF,CAAC;AACD,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,eAAW,QAAQ,UAAU,QAAQ,OAAO;AAC1C,YAAI,UAAK,eAAL,mBAAiB,WAAQ,UAAK,WAAW,aAAhB,mBAA0B,WAAW,YAAW;AAC3E,cAAM,cAAc,OAAO,KAAK,KAAK,WAAW,MAAM,QAAQ;AAE9D,mBAAW,SAAS,QAAQ,MAAM,WAAW,GAAG;AAC9C,wBAAc,KAAK;AACnB,sBAAY;AAAA,QACd;AAAA,MACF;AAAA,IACF;AAEA,eAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,oBAAc,KAAK;AACnB,kBAAY;AAAA,IACd;AAEA,kBAAc,IAAI;AAAA,EACpB;AACF;","names":["tts"]}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { GoogleGenAI } from '@google/genai';
|
|
2
|
-
import { tts } from '@livekit/agents';
|
|
2
|
+
import { type APIConnectOptions, tts } from '@livekit/agents';
|
|
3
3
|
export type GeminiTTSModels = 'gemini-2.5-flash-preview-tts' | 'gemini-2.5-pro-preview-tts';
|
|
4
4
|
export type GeminiVoices = 'Zephyr' | 'Puck' | 'Charon' | 'Kore' | 'Fenrir' | 'Leda' | 'Orus' | 'Aoede' | 'Callirrhoe' | 'Autonoe' | 'Enceladus' | 'Iapetus' | 'Umbriel' | 'Algieba' | 'Despina' | 'Erinome' | 'Algenib' | 'Rasalgethi' | 'Laomedeia' | 'Achernar' | 'Alnilam' | 'Schedar' | 'Gacrux' | 'Pulcherrima' | 'Achird' | 'Zubenelgenubi' | 'Vindemiatrix' | 'Sadachbia' | 'Sadaltager' | 'Sulafat';
|
|
5
5
|
export interface TTSOptions {
|
|
@@ -25,7 +25,7 @@ export declare class TTS extends tts.TTS {
|
|
|
25
25
|
constructor({ model, voiceName, apiKey, vertexai, project, location, instructions, }?: Partial<TTSOptions & {
|
|
26
26
|
apiKey: string;
|
|
27
27
|
}>);
|
|
28
|
-
synthesize(text: string): ChunkedStream;
|
|
28
|
+
synthesize(text: string, connOptions?: APIConnectOptions, abortSignal?: AbortSignal): ChunkedStream;
|
|
29
29
|
/**
|
|
30
30
|
* Update the TTS options.
|
|
31
31
|
*
|
|
@@ -41,7 +41,7 @@ export declare class TTS extends tts.TTS {
|
|
|
41
41
|
export declare class ChunkedStream extends tts.ChunkedStream {
|
|
42
42
|
#private;
|
|
43
43
|
label: string;
|
|
44
|
-
constructor(inputText: string, tts: TTS);
|
|
44
|
+
constructor(inputText: string, tts: TTS, connOptions?: APIConnectOptions, abortSignal?: AbortSignal);
|
|
45
45
|
protected run(): Promise<void>;
|
|
46
46
|
}
|
|
47
47
|
//# sourceMappingURL=gemini_tts.d.ts.map
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { GoogleGenAI } from '@google/genai';
|
|
2
|
-
import { tts } from '@livekit/agents';
|
|
2
|
+
import { type APIConnectOptions, tts } from '@livekit/agents';
|
|
3
3
|
export type GeminiTTSModels = 'gemini-2.5-flash-preview-tts' | 'gemini-2.5-pro-preview-tts';
|
|
4
4
|
export type GeminiVoices = 'Zephyr' | 'Puck' | 'Charon' | 'Kore' | 'Fenrir' | 'Leda' | 'Orus' | 'Aoede' | 'Callirrhoe' | 'Autonoe' | 'Enceladus' | 'Iapetus' | 'Umbriel' | 'Algieba' | 'Despina' | 'Erinome' | 'Algenib' | 'Rasalgethi' | 'Laomedeia' | 'Achernar' | 'Alnilam' | 'Schedar' | 'Gacrux' | 'Pulcherrima' | 'Achird' | 'Zubenelgenubi' | 'Vindemiatrix' | 'Sadachbia' | 'Sadaltager' | 'Sulafat';
|
|
5
5
|
export interface TTSOptions {
|
|
@@ -25,7 +25,7 @@ export declare class TTS extends tts.TTS {
|
|
|
25
25
|
constructor({ model, voiceName, apiKey, vertexai, project, location, instructions, }?: Partial<TTSOptions & {
|
|
26
26
|
apiKey: string;
|
|
27
27
|
}>);
|
|
28
|
-
synthesize(text: string): ChunkedStream;
|
|
28
|
+
synthesize(text: string, connOptions?: APIConnectOptions, abortSignal?: AbortSignal): ChunkedStream;
|
|
29
29
|
/**
|
|
30
30
|
* Update the TTS options.
|
|
31
31
|
*
|
|
@@ -41,7 +41,7 @@ export declare class TTS extends tts.TTS {
|
|
|
41
41
|
export declare class ChunkedStream extends tts.ChunkedStream {
|
|
42
42
|
#private;
|
|
43
43
|
label: string;
|
|
44
|
-
constructor(inputText: string, tts: TTS);
|
|
44
|
+
constructor(inputText: string, tts: TTS, connOptions?: APIConnectOptions, abortSignal?: AbortSignal);
|
|
45
45
|
protected run(): Promise<void>;
|
|
46
46
|
}
|
|
47
47
|
//# sourceMappingURL=gemini_tts.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"gemini_tts.d.ts","sourceRoot":"","sources":["../../src/beta/gemini_tts.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAC5C,OAAO,
|
|
1
|
+
{"version":3,"file":"gemini_tts.d.ts","sourceRoot":"","sources":["../../src/beta/gemini_tts.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAC5C,OAAO,EACL,KAAK,iBAAiB,EAMtB,GAAG,EACJ,MAAM,iBAAiB,CAAC;AAGzB,MAAM,MAAM,eAAe,GAAG,8BAA8B,GAAG,4BAA4B,CAAC;AAE5F,MAAM,MAAM,YAAY,GACpB,QAAQ,GACR,MAAM,GACN,QAAQ,GACR,MAAM,GACN,QAAQ,GACR,MAAM,GACN,MAAM,GACN,OAAO,GACP,YAAY,GACZ,SAAS,GACT,WAAW,GACX,SAAS,GACT,SAAS,GACT,SAAS,GACT,SAAS,GACT,SAAS,GACT,SAAS,GACT,YAAY,GACZ,WAAW,GACX,UAAU,GACV,SAAS,GACT,SAAS,GACT,QAAQ,GACR,aAAa,GACb,QAAQ,GACR,eAAe,GACf,cAAc,GACd,WAAW,GACX,YAAY,GACZ,SAAS,CAAC;AAQd,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,eAAe,GAAG,MAAM,CAAC;IAChC,SAAS,EAAE,YAAY,GAAG,MAAM,CAAC;IACjC,QAAQ,EAAE,OAAO,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAG9B,KAAK,SAAuB;IAE5B;;;;;;;;OAQG;gBACS,EACV,KAAqB,EACrB,SAAyB,EACzB,MAAM,EACN,QAAQ,EACR,OAAO,EACP,QAAQ,EACR,YAAY,GACb,GAAE,OAAO,CAAC,UAAU,GAAG;QAAE,MAAM,EAAE,MAAM,CAAA;KAAE,CAAM;IAsDhD,UAAU,CACR,IAAI,EAAE,MAAM,EACZ,WAAW,CAAC,EAAE,iBAAiB,EAC/B,WAAW,CAAC,EAAE,WAAW,GACxB,aAAa;IAIhB;;;;OAIG;IACH,aAAa,CAAC,IAAI,EAAE;QAAE,SAAS,CAAC,EAAE,YAAY,GAAG,MAAM,CAAA;KAAE;IAMzD,MAAM,IAAI,GAAG,CAAC,gBAAgB;IAI9B,IAAI,IAAI,IAAI,UAAU,CAErB;IAED,IAAI,MAAM,IAAI,WAAW,CAExB;CACF;AAED,qBAAa,aAAc,SAAQ,GAAG,CAAC,aAAa;;IAElD,KAAK,SAAiC;gBAGpC,SAAS,EAAE,MAAM,EACjB,GAAG,EAAE,GAAG,EACR,WAAW,CAAC,EAAE,iBAAiB,EAC/B,WAAW,CAAC,EAAE,WAAW;cAMX,GAAG;CAuIpB"}
|
package/dist/beta/gemini_tts.js
CHANGED
|
@@ -75,8 +75,8 @@ class TTS extends tts.TTS {
|
|
|
75
75
|
};
|
|
76
76
|
this.#client = new GoogleGenAI(clientOptions);
|
|
77
77
|
}
|
|
78
|
-
synthesize(text) {
|
|
79
|
-
return new ChunkedStream(text, this);
|
|
78
|
+
synthesize(text, connOptions, abortSignal) {
|
|
79
|
+
return new ChunkedStream(text, this, connOptions, abortSignal);
|
|
80
80
|
}
|
|
81
81
|
/**
|
|
82
82
|
* Update the TTS options.
|
|
@@ -101,8 +101,8 @@ class TTS extends tts.TTS {
|
|
|
101
101
|
class ChunkedStream extends tts.ChunkedStream {
|
|
102
102
|
#tts;
|
|
103
103
|
label = "google.gemini.ChunkedStream";
|
|
104
|
-
constructor(inputText, tts2) {
|
|
105
|
-
super(inputText, tts2);
|
|
104
|
+
constructor(inputText, tts2, connOptions, abortSignal) {
|
|
105
|
+
super(inputText, tts2, connOptions, abortSignal);
|
|
106
106
|
this.#tts = tts2;
|
|
107
107
|
}
|
|
108
108
|
async run() {
|
|
@@ -116,7 +116,8 @@ class ChunkedStream extends tts.ChunkedStream {
|
|
|
116
116
|
voiceName: this.#tts.opts.voiceName
|
|
117
117
|
}
|
|
118
118
|
}
|
|
119
|
-
}
|
|
119
|
+
},
|
|
120
|
+
abortSignal: this.abortSignal
|
|
120
121
|
};
|
|
121
122
|
let inputText = this.inputText;
|
|
122
123
|
if (this.#tts.opts.instructions) {
|
|
@@ -139,6 +140,9 @@ class ChunkedStream extends tts.ChunkedStream {
|
|
|
139
140
|
await this.#processResponse(response, bstream, requestId);
|
|
140
141
|
}
|
|
141
142
|
} catch (error) {
|
|
143
|
+
if (error instanceof Error && error.name === "AbortError") {
|
|
144
|
+
return;
|
|
145
|
+
}
|
|
142
146
|
if (isAPIError(error)) throw error;
|
|
143
147
|
const err = error;
|
|
144
148
|
if (err.code && err.code >= 400 && err.code < 500) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/beta/gemini_tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type * as types from '@google/genai';\nimport { GoogleGenAI } from '@google/genai';\nimport {\n APIConnectionError,\n APIStatusError,\n AudioByteStream,\n isAPIError,\n shortuuid,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\n\nexport type GeminiTTSModels = 'gemini-2.5-flash-preview-tts' | 'gemini-2.5-pro-preview-tts';\n\nexport type GeminiVoices =\n | 'Zephyr'\n | 'Puck'\n | 'Charon'\n | 'Kore'\n | 'Fenrir'\n | 'Leda'\n | 'Orus'\n | 'Aoede'\n | 'Callirrhoe'\n | 'Autonoe'\n | 'Enceladus'\n | 'Iapetus'\n | 'Umbriel'\n | 'Algieba'\n | 'Despina'\n | 'Erinome'\n | 'Algenib'\n | 'Rasalgethi'\n | 'Laomedeia'\n | 'Achernar'\n | 'Alnilam'\n | 'Schedar'\n | 'Gacrux'\n | 'Pulcherrima'\n | 'Achird'\n | 'Zubenelgenubi'\n | 'Vindemiatrix'\n | 'Sadachbia'\n | 'Sadaltager'\n | 'Sulafat';\n\nconst DEFAULT_MODEL: GeminiTTSModels = 'gemini-2.5-flash-preview-tts';\nconst DEFAULT_VOICE: GeminiVoices = 'Kore';\nconst DEFAULT_SAMPLE_RATE = 24000; // not configurable\nconst NUM_CHANNELS = 1;\nconst DEFAULT_INSTRUCTIONS = \"Say the text with a proper tone, don't omit or add any words\";\n\nexport interface TTSOptions {\n model: GeminiTTSModels | string;\n voiceName: GeminiVoices | string;\n vertexai: boolean;\n project?: string;\n location?: string;\n instructions?: string;\n}\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n #client: GoogleGenAI;\n label = 'google.gemini.TTS';\n\n /**\n * Create a new instance of Gemini TTS.\n *\n * Environment Requirements:\n * - For VertexAI: Set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to the path of the service account key file.\n * - For Google Gemini API: Set the `apiKey` argument or the `GOOGLE_API_KEY` environment variable.\n *\n * @param opts - Configuration options for Gemini TTS\n */\n constructor({\n model = DEFAULT_MODEL,\n voiceName = DEFAULT_VOICE,\n apiKey,\n vertexai,\n project,\n location,\n instructions,\n }: Partial<TTSOptions & { apiKey: string }> = {}) {\n super(DEFAULT_SAMPLE_RATE, NUM_CHANNELS, { streaming: false });\n\n const gcpProject: string | undefined = project || process.env.GOOGLE_CLOUD_PROJECT;\n const gcpLocation: string | undefined =\n location || process.env.GOOGLE_CLOUD_LOCATION || 'us-central1';\n const useVertexai = vertexai ?? process.env.GOOGLE_GENAI_USE_VERTEXAI === 'true';\n const geminiApiKey = apiKey || process.env.GOOGLE_API_KEY;\n\n let finalProject: string | undefined = gcpProject;\n let finalLocation: string | undefined = gcpLocation;\n let finalApiKey: string | undefined = geminiApiKey;\n\n if (useVertexai) {\n if (!finalProject) {\n throw new APIConnectionError({\n message:\n 'Project ID is required for Vertex AI. Set via project option or GOOGLE_CLOUD_PROJECT environment variable',\n });\n }\n finalApiKey = undefined;\n } else {\n finalProject = undefined;\n finalLocation = undefined;\n if (!finalApiKey) {\n throw new APIConnectionError({\n message:\n 'API key is required for Google API either via apiKey or GOOGLE_API_KEY environment variable',\n });\n }\n }\n\n this.#opts = {\n model,\n voiceName,\n vertexai: useVertexai,\n project: finalProject,\n location: finalLocation,\n instructions: instructions ?? DEFAULT_INSTRUCTIONS,\n };\n\n const clientOptions: types.GoogleGenAIOptions = useVertexai\n ? {\n vertexai: true,\n project: finalProject,\n location: finalLocation,\n }\n : {\n apiKey: finalApiKey,\n };\n\n this.#client = new GoogleGenAI(clientOptions);\n }\n\n synthesize(text: string): ChunkedStream {\n return new ChunkedStream(text, this);\n }\n\n /**\n * Update the TTS options.\n *\n * @param opts - Options to update\n */\n updateOptions(opts: { voiceName?: GeminiVoices | string }) {\n if (opts.voiceName !== undefined) {\n this.#opts.voiceName = opts.voiceName;\n }\n }\n\n stream(): tts.SynthesizeStream {\n throw new Error('Streaming is not supported on Gemini TTS');\n }\n\n get opts(): TTSOptions {\n return this.#opts;\n }\n\n get client(): GoogleGenAI {\n return this.#client;\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n #tts: TTS;\n label = 'google.gemini.ChunkedStream';\n\n constructor(inputText: string, tts: TTS) {\n super(inputText, tts);\n this.#tts = tts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#tts.sampleRate, this.#tts.numChannels);\n\n const config: types.GenerateContentConfig = {\n responseModalities: ['AUDIO'],\n speechConfig: {\n voiceConfig: {\n prebuiltVoiceConfig: {\n voiceName: this.#tts.opts.voiceName,\n },\n },\n },\n };\n\n let inputText = this.inputText;\n if (this.#tts.opts.instructions) {\n inputText = `${this.#tts.opts.instructions}:\\n\"${inputText}\"`;\n }\n\n const contents: types.Content[] = [\n {\n role: 'user',\n parts: [{ text: inputText }],\n },\n ];\n\n const responseStream = await this.#tts.client.models.generateContentStream({\n model: this.#tts.opts.model,\n contents,\n config,\n });\n\n try {\n for await (const response of responseStream) {\n await this.#processResponse(response, bstream, requestId);\n }\n } catch (error: unknown) {\n if (isAPIError(error)) throw error;\n\n const err = error as {\n code?: number;\n message?: string;\n status?: string;\n type?: string;\n };\n\n if (err.code && err.code >= 400 && err.code < 500) {\n if (err.code === 429) {\n throw new APIStatusError({\n message: `Gemini TTS: Rate limit error - ${err.message || 'Unknown error'}`,\n options: {\n statusCode: 429,\n retryable: true,\n },\n });\n } else {\n throw new APIStatusError({\n message: `Gemini TTS: Client error (${err.code}) - ${err.message || 'Unknown error'}`,\n options: {\n statusCode: err.code,\n retryable: false,\n },\n });\n }\n }\n\n if (err.code && err.code >= 500) {\n throw new APIStatusError({\n message: `Gemini TTS: Server error (${err.code}) - ${err.message || 'Unknown error'}`,\n options: {\n statusCode: err.code,\n retryable: true,\n },\n });\n }\n\n throw new APIConnectionError({\n message: `Gemini TTS: Connection error - ${err.message || 'Unknown error'}`,\n options: { retryable: true },\n });\n } finally {\n this.queue.close();\n }\n }\n\n async #processResponse(\n response: types.GenerateContentResponse,\n bstream: AudioByteStream,\n requestId: string,\n ) {\n if (!response.candidates || response.candidates.length === 0) {\n return;\n }\n\n const candidate = response.candidates[0];\n if (!candidate || !candidate.content?.parts) {\n return;\n }\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (final: boolean) => {\n if (lastFrame) {\n this.queue.put({\n requestId,\n frame: lastFrame,\n segmentId: requestId,\n final,\n });\n lastFrame = undefined;\n }\n };\n\n for (const part of candidate.content.parts) {\n if (part.inlineData?.data && part.inlineData.mimeType?.startsWith('audio/')) {\n const audioBuffer = Buffer.from(part.inlineData.data, 'base64');\n\n for (const frame of bstream.write(audioBuffer)) {\n sendLastFrame(false);\n lastFrame = frame;\n }\n }\n }\n\n for (const frame of bstream.flush()) {\n sendLastFrame(false);\n lastFrame = frame;\n }\n\n sendLastFrame(true);\n }\n}\n"],"mappings":"AAIA,SAAS,mBAAmB;AAC5B;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAqCP,MAAM,gBAAiC;AACvC,MAAM,gBAA8B;AACpC,MAAM,sBAAsB;AAC5B,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAWtB,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA;AAAA,EACA,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWR,YAAY;AAAA,IACV,QAAQ;AAAA,IACR,YAAY;AAAA,IACZ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,IAA8C,CAAC,GAAG;AAChD,UAAM,qBAAqB,cAAc,EAAE,WAAW,MAAM,CAAC;AAE7D,UAAM,aAAiC,WAAW,QAAQ,IAAI;AAC9D,UAAM,cACJ,YAAY,QAAQ,IAAI,yBAAyB;AACnD,UAAM,cAAc,YAAY,QAAQ,IAAI,8BAA8B;AAC1E,UAAM,eAAe,UAAU,QAAQ,IAAI;AAE3C,QAAI,eAAmC;AACvC,QAAI,gBAAoC;AACxC,QAAI,cAAkC;AAEtC,QAAI,aAAa;AACf,UAAI,CAAC,cAAc;AACjB,cAAM,IAAI,mBAAmB;AAAA,UAC3B,SACE;AAAA,QACJ,CAAC;AAAA,MACH;AACA,oBAAc;AAAA,IAChB,OAAO;AACL,qBAAe;AACf,sBAAgB;AAChB,UAAI,CAAC,aAAa;AAChB,cAAM,IAAI,mBAAmB;AAAA,UAC3B,SACE;AAAA,QACJ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,SAAK,QAAQ;AAAA,MACX;AAAA,MACA;AAAA,MACA,UAAU;AAAA,MACV,SAAS;AAAA,MACT,UAAU;AAAA,MACV,cAAc,gBAAgB;AAAA,IAChC;AAEA,UAAM,gBAA0C,cAC5C;AAAA,MACE,UAAU;AAAA,MACV,SAAS;AAAA,MACT,UAAU;AAAA,IACZ,IACA;AAAA,MACE,QAAQ;AAAA,IACV;AAEJ,SAAK,UAAU,IAAI,YAAY,aAAa;AAAA,EAC9C;AAAA,EAEA,WAAW,MAA6B;AACtC,WAAO,IAAI,cAAc,MAAM,IAAI;AAAA,EACrC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,cAAc,MAA6C;AACzD,QAAI,KAAK,cAAc,QAAW;AAChC,WAAK,MAAM,YAAY,KAAK;AAAA,IAC9B;AAAA,EACF;AAAA,EAEA,SAA+B;AAC7B,UAAM,IAAI,MAAM,0CAA0C;AAAA,EAC5D;AAAA,EAEA,IAAI,OAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,SAAsB;AACxB,WAAO,KAAK;AAAA,EACd;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,WAAmBA,MAAU;AACvC,UAAM,WAAWA,IAAG;AACpB,SAAK,OAAOA;AAAA,EACd;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,UAAU,IAAI,gBAAgB,KAAK,KAAK,YAAY,KAAK,KAAK,WAAW;AAE/E,UAAM,SAAsC;AAAA,MAC1C,oBAAoB,CAAC,OAAO;AAAA,MAC5B,cAAc;AAAA,QACZ,aAAa;AAAA,UACX,qBAAqB;AAAA,YACnB,WAAW,KAAK,KAAK,KAAK;AAAA,UAC5B;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,QAAI,YAAY,KAAK;AACrB,QAAI,KAAK,KAAK,KAAK,cAAc;AAC/B,kBAAY,GAAG,KAAK,KAAK,KAAK,YAAY;AAAA,GAAO,SAAS;AAAA,IAC5D;AAEA,UAAM,WAA4B;AAAA,MAChC;AAAA,QACE,MAAM;AAAA,QACN,OAAO,CAAC,EAAE,MAAM,UAAU,CAAC;AAAA,MAC7B;AAAA,IACF;AAEA,UAAM,iBAAiB,MAAM,KAAK,KAAK,OAAO,OAAO,sBAAsB;AAAA,MACzE,OAAO,KAAK,KAAK,KAAK;AAAA,MACtB;AAAA,MACA;AAAA,IACF,CAAC;AAED,QAAI;AACF,uBAAiB,YAAY,gBAAgB;AAC3C,cAAM,KAAK,iBAAiB,UAAU,SAAS,SAAS;AAAA,MAC1D;AAAA,IACF,SAAS,OAAgB;AACvB,UAAI,WAAW,KAAK,EAAG,OAAM;AAE7B,YAAM,MAAM;AAOZ,UAAI,IAAI,QAAQ,IAAI,QAAQ,OAAO,IAAI,OAAO,KAAK;AACjD,YAAI,IAAI,SAAS,KAAK;AACpB,gBAAM,IAAI,eAAe;AAAA,YACvB,SAAS,kCAAkC,IAAI,WAAW,eAAe;AAAA,YACzE,SAAS;AAAA,cACP,YAAY;AAAA,cACZ,WAAW;AAAA,YACb;AAAA,UACF,CAAC;AAAA,QACH,OAAO;AACL,gBAAM,IAAI,eAAe;AAAA,YACvB,SAAS,6BAA6B,IAAI,IAAI,OAAO,IAAI,WAAW,eAAe;AAAA,YACnF,SAAS;AAAA,cACP,YAAY,IAAI;AAAA,cAChB,WAAW;AAAA,YACb;AAAA,UACF,CAAC;AAAA,QACH;AAAA,MACF;AAEA,UAAI,IAAI,QAAQ,IAAI,QAAQ,KAAK;AAC/B,cAAM,IAAI,eAAe;AAAA,UACvB,SAAS,6BAA6B,IAAI,IAAI,OAAO,IAAI,WAAW,eAAe;AAAA,UACnF,SAAS;AAAA,YACP,YAAY,IAAI;AAAA,YAChB,WAAW;AAAA,UACb;AAAA,QACF,CAAC;AAAA,MACH;AAEA,YAAM,IAAI,mBAAmB;AAAA,QAC3B,SAAS,kCAAkC,IAAI,WAAW,eAAe;AAAA,QACzE,SAAS,EAAE,WAAW,KAAK;AAAA,MAC7B,CAAC;AAAA,IACH,UAAE;AACA,WAAK,MAAM,MAAM;AAAA,IACnB;AAAA,EACF;AAAA,EAEA,MAAM,iBACJ,UACA,SACA,WACA;AA3QJ;AA4QI,QAAI,CAAC,SAAS,cAAc,SAAS,WAAW,WAAW,GAAG;AAC5D;AAAA,IACF;AAEA,UAAM,YAAY,SAAS,WAAW,CAAC;AACvC,QAAI,CAAC,aAAa,GAAC,eAAU,YAAV,mBAAmB,QAAO;AAC3C;AAAA,IACF;AAEA,QAAI;AACJ,UAAM,gBAAgB,CAAC,UAAmB;AACxC,UAAI,WAAW;AACb,aAAK,MAAM,IAAI;AAAA,UACb;AAAA,UACA,OAAO;AAAA,UACP,WAAW;AAAA,UACX;AAAA,QACF,CAAC;AACD,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,eAAW,QAAQ,UAAU,QAAQ,OAAO;AAC1C,YAAI,UAAK,eAAL,mBAAiB,WAAQ,UAAK,WAAW,aAAhB,mBAA0B,WAAW,YAAW;AAC3E,cAAM,cAAc,OAAO,KAAK,KAAK,WAAW,MAAM,QAAQ;AAE9D,mBAAW,SAAS,QAAQ,MAAM,WAAW,GAAG;AAC9C,wBAAc,KAAK;AACnB,sBAAY;AAAA,QACd;AAAA,MACF;AAAA,IACF;AAEA,eAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,oBAAc,KAAK;AACnB,kBAAY;AAAA,IACd;AAEA,kBAAc,IAAI;AAAA,EACpB;AACF;","names":["tts"]}
|
|
1
|
+
{"version":3,"sources":["../../src/beta/gemini_tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type * as types from '@google/genai';\nimport { GoogleGenAI } from '@google/genai';\nimport {\n type APIConnectOptions,\n APIConnectionError,\n APIStatusError,\n AudioByteStream,\n isAPIError,\n shortuuid,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\n\nexport type GeminiTTSModels = 'gemini-2.5-flash-preview-tts' | 'gemini-2.5-pro-preview-tts';\n\nexport type GeminiVoices =\n | 'Zephyr'\n | 'Puck'\n | 'Charon'\n | 'Kore'\n | 'Fenrir'\n | 'Leda'\n | 'Orus'\n | 'Aoede'\n | 'Callirrhoe'\n | 'Autonoe'\n | 'Enceladus'\n | 'Iapetus'\n | 'Umbriel'\n | 'Algieba'\n | 'Despina'\n | 'Erinome'\n | 'Algenib'\n | 'Rasalgethi'\n | 'Laomedeia'\n | 'Achernar'\n | 'Alnilam'\n | 'Schedar'\n | 'Gacrux'\n | 'Pulcherrima'\n | 'Achird'\n | 'Zubenelgenubi'\n | 'Vindemiatrix'\n | 'Sadachbia'\n | 'Sadaltager'\n | 'Sulafat';\n\nconst DEFAULT_MODEL: GeminiTTSModels = 'gemini-2.5-flash-preview-tts';\nconst DEFAULT_VOICE: GeminiVoices = 'Kore';\nconst DEFAULT_SAMPLE_RATE = 24000; // not configurable\nconst NUM_CHANNELS = 1;\nconst DEFAULT_INSTRUCTIONS = \"Say the text with a proper tone, don't omit or add any words\";\n\nexport interface TTSOptions {\n model: GeminiTTSModels | string;\n voiceName: GeminiVoices | string;\n vertexai: boolean;\n project?: string;\n location?: string;\n instructions?: string;\n}\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n #client: GoogleGenAI;\n label = 'google.gemini.TTS';\n\n /**\n * Create a new instance of Gemini TTS.\n *\n * Environment Requirements:\n * - For VertexAI: Set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to the path of the service account key file.\n * - For Google Gemini API: Set the `apiKey` argument or the `GOOGLE_API_KEY` environment variable.\n *\n * @param opts - Configuration options for Gemini TTS\n */\n constructor({\n model = DEFAULT_MODEL,\n voiceName = DEFAULT_VOICE,\n apiKey,\n vertexai,\n project,\n location,\n instructions,\n }: Partial<TTSOptions & { apiKey: string }> = {}) {\n super(DEFAULT_SAMPLE_RATE, NUM_CHANNELS, { streaming: false });\n\n const gcpProject: string | undefined = project || process.env.GOOGLE_CLOUD_PROJECT;\n const gcpLocation: string | undefined =\n location || process.env.GOOGLE_CLOUD_LOCATION || 'us-central1';\n const useVertexai = vertexai ?? process.env.GOOGLE_GENAI_USE_VERTEXAI === 'true';\n const geminiApiKey = apiKey || process.env.GOOGLE_API_KEY;\n\n let finalProject: string | undefined = gcpProject;\n let finalLocation: string | undefined = gcpLocation;\n let finalApiKey: string | undefined = geminiApiKey;\n\n if (useVertexai) {\n if (!finalProject) {\n throw new APIConnectionError({\n message:\n 'Project ID is required for Vertex AI. Set via project option or GOOGLE_CLOUD_PROJECT environment variable',\n });\n }\n finalApiKey = undefined;\n } else {\n finalProject = undefined;\n finalLocation = undefined;\n if (!finalApiKey) {\n throw new APIConnectionError({\n message:\n 'API key is required for Google API either via apiKey or GOOGLE_API_KEY environment variable',\n });\n }\n }\n\n this.#opts = {\n model,\n voiceName,\n vertexai: useVertexai,\n project: finalProject,\n location: finalLocation,\n instructions: instructions ?? DEFAULT_INSTRUCTIONS,\n };\n\n const clientOptions: types.GoogleGenAIOptions = useVertexai\n ? {\n vertexai: true,\n project: finalProject,\n location: finalLocation,\n }\n : {\n apiKey: finalApiKey,\n };\n\n this.#client = new GoogleGenAI(clientOptions);\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): ChunkedStream {\n return new ChunkedStream(text, this, connOptions, abortSignal);\n }\n\n /**\n * Update the TTS options.\n *\n * @param opts - Options to update\n */\n updateOptions(opts: { voiceName?: GeminiVoices | string }) {\n if (opts.voiceName !== undefined) {\n this.#opts.voiceName = opts.voiceName;\n }\n }\n\n stream(): tts.SynthesizeStream {\n throw new Error('Streaming is not supported on Gemini TTS');\n }\n\n get opts(): TTSOptions {\n return this.#opts;\n }\n\n get client(): GoogleGenAI {\n return this.#client;\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n #tts: TTS;\n label = 'google.gemini.ChunkedStream';\n\n constructor(\n inputText: string,\n tts: TTS,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(inputText, tts, connOptions, abortSignal);\n this.#tts = tts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#tts.sampleRate, this.#tts.numChannels);\n\n const config: types.GenerateContentConfig = {\n responseModalities: ['AUDIO'],\n speechConfig: {\n voiceConfig: {\n prebuiltVoiceConfig: {\n voiceName: this.#tts.opts.voiceName,\n },\n },\n },\n abortSignal: this.abortSignal,\n };\n\n let inputText = this.inputText;\n if (this.#tts.opts.instructions) {\n inputText = `${this.#tts.opts.instructions}:\\n\"${inputText}\"`;\n }\n\n const contents: types.Content[] = [\n {\n role: 'user',\n parts: [{ text: inputText }],\n },\n ];\n\n const responseStream = await this.#tts.client.models.generateContentStream({\n model: this.#tts.opts.model,\n contents,\n config,\n });\n\n try {\n for await (const response of responseStream) {\n await this.#processResponse(response, bstream, requestId);\n }\n } catch (error: unknown) {\n if (error instanceof Error && error.name === 'AbortError') {\n return;\n }\n if (isAPIError(error)) throw error;\n\n const err = error as {\n code?: number;\n message?: string;\n status?: string;\n type?: string;\n };\n\n if (err.code && err.code >= 400 && err.code < 500) {\n if (err.code === 429) {\n throw new APIStatusError({\n message: `Gemini TTS: Rate limit error - ${err.message || 'Unknown error'}`,\n options: {\n statusCode: 429,\n retryable: true,\n },\n });\n } else {\n throw new APIStatusError({\n message: `Gemini TTS: Client error (${err.code}) - ${err.message || 'Unknown error'}`,\n options: {\n statusCode: err.code,\n retryable: false,\n },\n });\n }\n }\n\n if (err.code && err.code >= 500) {\n throw new APIStatusError({\n message: `Gemini TTS: Server error (${err.code}) - ${err.message || 'Unknown error'}`,\n options: {\n statusCode: err.code,\n retryable: true,\n },\n });\n }\n\n throw new APIConnectionError({\n message: `Gemini TTS: Connection error - ${err.message || 'Unknown error'}`,\n options: { retryable: true },\n });\n } finally {\n this.queue.close();\n }\n }\n\n async #processResponse(\n response: types.GenerateContentResponse,\n bstream: AudioByteStream,\n requestId: string,\n ) {\n if (!response.candidates || response.candidates.length === 0) {\n return;\n }\n\n const candidate = response.candidates[0];\n if (!candidate || !candidate.content?.parts) {\n return;\n }\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (final: boolean) => {\n if (lastFrame) {\n this.queue.put({\n requestId,\n frame: lastFrame,\n segmentId: requestId,\n final,\n });\n lastFrame = undefined;\n }\n };\n\n for (const part of candidate.content.parts) {\n if (part.inlineData?.data && part.inlineData.mimeType?.startsWith('audio/')) {\n const audioBuffer = Buffer.from(part.inlineData.data, 'base64');\n\n for (const frame of bstream.write(audioBuffer)) {\n sendLastFrame(false);\n lastFrame = frame;\n }\n }\n }\n\n for (const frame of bstream.flush()) {\n sendLastFrame(false);\n lastFrame = frame;\n }\n\n sendLastFrame(true);\n }\n}\n"],"mappings":"AAIA,SAAS,mBAAmB;AAC5B;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAqCP,MAAM,gBAAiC;AACvC,MAAM,gBAA8B;AACpC,MAAM,sBAAsB;AAC5B,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAWtB,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA;AAAA,EACA,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWR,YAAY;AAAA,IACV,QAAQ;AAAA,IACR,YAAY;AAAA,IACZ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,IAA8C,CAAC,GAAG;AAChD,UAAM,qBAAqB,cAAc,EAAE,WAAW,MAAM,CAAC;AAE7D,UAAM,aAAiC,WAAW,QAAQ,IAAI;AAC9D,UAAM,cACJ,YAAY,QAAQ,IAAI,yBAAyB;AACnD,UAAM,cAAc,YAAY,QAAQ,IAAI,8BAA8B;AAC1E,UAAM,eAAe,UAAU,QAAQ,IAAI;AAE3C,QAAI,eAAmC;AACvC,QAAI,gBAAoC;AACxC,QAAI,cAAkC;AAEtC,QAAI,aAAa;AACf,UAAI,CAAC,cAAc;AACjB,cAAM,IAAI,mBAAmB;AAAA,UAC3B,SACE;AAAA,QACJ,CAAC;AAAA,MACH;AACA,oBAAc;AAAA,IAChB,OAAO;AACL,qBAAe;AACf,sBAAgB;AAChB,UAAI,CAAC,aAAa;AAChB,cAAM,IAAI,mBAAmB;AAAA,UAC3B,SACE;AAAA,QACJ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,SAAK,QAAQ;AAAA,MACX;AAAA,MACA;AAAA,MACA,UAAU;AAAA,MACV,SAAS;AAAA,MACT,UAAU;AAAA,MACV,cAAc,gBAAgB;AAAA,IAChC;AAEA,UAAM,gBAA0C,cAC5C;AAAA,MACE,UAAU;AAAA,MACV,SAAS;AAAA,MACT,UAAU;AAAA,IACZ,IACA;AAAA,MACE,QAAQ;AAAA,IACV;AAEJ,SAAK,UAAU,IAAI,YAAY,aAAa;AAAA,EAC9C;AAAA,EAEA,WACE,MACA,aACA,aACe;AACf,WAAO,IAAI,cAAc,MAAM,MAAM,aAAa,WAAW;AAAA,EAC/D;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,cAAc,MAA6C;AACzD,QAAI,KAAK,cAAc,QAAW;AAChC,WAAK,MAAM,YAAY,KAAK;AAAA,IAC9B;AAAA,EACF;AAAA,EAEA,SAA+B;AAC7B,UAAM,IAAI,MAAM,0CAA0C;AAAA,EAC5D;AAAA,EAEA,IAAI,OAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,SAAsB;AACxB,WAAO,KAAK;AAAA,EACd;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD;AAAA,EACA,QAAQ;AAAA,EAER,YACE,WACAA,MACA,aACA,aACA;AACA,UAAM,WAAWA,MAAK,aAAa,WAAW;AAC9C,SAAK,OAAOA;AAAA,EACd;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,UAAU,IAAI,gBAAgB,KAAK,KAAK,YAAY,KAAK,KAAK,WAAW;AAE/E,UAAM,SAAsC;AAAA,MAC1C,oBAAoB,CAAC,OAAO;AAAA,MAC5B,cAAc;AAAA,QACZ,aAAa;AAAA,UACX,qBAAqB;AAAA,YACnB,WAAW,KAAK,KAAK,KAAK;AAAA,UAC5B;AAAA,QACF;AAAA,MACF;AAAA,MACA,aAAa,KAAK;AAAA,IACpB;AAEA,QAAI,YAAY,KAAK;AACrB,QAAI,KAAK,KAAK,KAAK,cAAc;AAC/B,kBAAY,GAAG,KAAK,KAAK,KAAK,YAAY;AAAA,GAAO,SAAS;AAAA,IAC5D;AAEA,UAAM,WAA4B;AAAA,MAChC;AAAA,QACE,MAAM;AAAA,QACN,OAAO,CAAC,EAAE,MAAM,UAAU,CAAC;AAAA,MAC7B;AAAA,IACF;AAEA,UAAM,iBAAiB,MAAM,KAAK,KAAK,OAAO,OAAO,sBAAsB;AAAA,MACzE,OAAO,KAAK,KAAK,KAAK;AAAA,MACtB;AAAA,MACA;AAAA,IACF,CAAC;AAED,QAAI;AACF,uBAAiB,YAAY,gBAAgB;AAC3C,cAAM,KAAK,iBAAiB,UAAU,SAAS,SAAS;AAAA,MAC1D;AAAA,IACF,SAAS,OAAgB;AACvB,UAAI,iBAAiB,SAAS,MAAM,SAAS,cAAc;AACzD;AAAA,MACF;AACA,UAAI,WAAW,KAAK,EAAG,OAAM;AAE7B,YAAM,MAAM;AAOZ,UAAI,IAAI,QAAQ,IAAI,QAAQ,OAAO,IAAI,OAAO,KAAK;AACjD,YAAI,IAAI,SAAS,KAAK;AACpB,gBAAM,IAAI,eAAe;AAAA,YACvB,SAAS,kCAAkC,IAAI,WAAW,eAAe;AAAA,YACzE,SAAS;AAAA,cACP,YAAY;AAAA,cACZ,WAAW;AAAA,YACb;AAAA,UACF,CAAC;AAAA,QACH,OAAO;AACL,gBAAM,IAAI,eAAe;AAAA,YACvB,SAAS,6BAA6B,IAAI,IAAI,OAAO,IAAI,WAAW,eAAe;AAAA,YACnF,SAAS;AAAA,cACP,YAAY,IAAI;AAAA,cAChB,WAAW;AAAA,YACb;AAAA,UACF,CAAC;AAAA,QACH;AAAA,MACF;AAEA,UAAI,IAAI,QAAQ,IAAI,QAAQ,KAAK;AAC/B,cAAM,IAAI,eAAe;AAAA,UACvB,SAAS,6BAA6B,IAAI,IAAI,OAAO,IAAI,WAAW,eAAe;AAAA,UACnF,SAAS;AAAA,YACP,YAAY,IAAI;AAAA,YAChB,WAAW;AAAA,UACb;AAAA,QACF,CAAC;AAAA,MACH;AAEA,YAAM,IAAI,mBAAmB;AAAA,QAC3B,SAAS,kCAAkC,IAAI,WAAW,eAAe;AAAA,QACzE,SAAS,EAAE,WAAW,KAAK;AAAA,MAC7B,CAAC;AAAA,IACH,UAAE;AACA,WAAK,MAAM,MAAM;AAAA,IACnB;AAAA,EACF;AAAA,EAEA,MAAM,iBACJ,UACA,SACA,WACA;AAzRJ;AA0RI,QAAI,CAAC,SAAS,cAAc,SAAS,WAAW,WAAW,GAAG;AAC5D;AAAA,IACF;AAEA,UAAM,YAAY,SAAS,WAAW,CAAC;AACvC,QAAI,CAAC,aAAa,GAAC,eAAU,YAAV,mBAAmB,QAAO;AAC3C;AAAA,IACF;AAEA,QAAI;AACJ,UAAM,gBAAgB,CAAC,UAAmB;AACxC,UAAI,WAAW;AACb,aAAK,MAAM,IAAI;AAAA,UACb;AAAA,UACA,OAAO;AAAA,UACP,WAAW;AAAA,UACX;AAAA,QACF,CAAC;AACD,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,eAAW,QAAQ,UAAU,QAAQ,OAAO;AAC1C,YAAI,UAAK,eAAL,mBAAiB,WAAQ,UAAK,WAAW,aAAhB,mBAA0B,WAAW,YAAW;AAC3E,cAAM,cAAc,OAAO,KAAK,KAAK,WAAW,MAAM,QAAQ;AAE9D,mBAAW,SAAS,QAAQ,MAAM,WAAW,GAAG;AAC9C,wBAAc,KAAK;AACnB,sBAAY;AAAA,QACd;AAAA,MACF;AAAA,IACF;AAEA,eAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,oBAAc,KAAK;AACnB,kBAAY;AAAA,IACd;AAEA,kBAAc,IAAI;AAAA,EACpB;AACF;","names":["tts"]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@livekit/agents-plugin-google",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.27",
|
|
4
4
|
"description": "Google Gemini plugin for LiveKit Node Agents",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"require": "dist/index.cjs",
|
|
@@ -29,9 +29,9 @@
|
|
|
29
29
|
"@microsoft/api-extractor": "^7.35.0",
|
|
30
30
|
"tsup": "^8.3.5",
|
|
31
31
|
"typescript": "^5.0.0",
|
|
32
|
-
"@livekit/agents": "1.0.
|
|
33
|
-
"@livekit/agents-plugin-openai": "1.0.
|
|
34
|
-
"@livekit/agents-plugins-test": "1.0.
|
|
32
|
+
"@livekit/agents": "1.0.27",
|
|
33
|
+
"@livekit/agents-plugin-openai": "1.0.27",
|
|
34
|
+
"@livekit/agents-plugins-test": "1.0.27"
|
|
35
35
|
},
|
|
36
36
|
"dependencies": {
|
|
37
37
|
"@google/genai": "^1.13.0",
|
|
@@ -41,7 +41,7 @@
|
|
|
41
41
|
},
|
|
42
42
|
"peerDependencies": {
|
|
43
43
|
"@livekit/rtc-node": "^0.13.12",
|
|
44
|
-
"@livekit/agents": "1.0.
|
|
44
|
+
"@livekit/agents": "1.0.27"
|
|
45
45
|
},
|
|
46
46
|
"scripts": {
|
|
47
47
|
"build": "tsup --onSuccess \"pnpm build:types\"",
|
package/src/beta/gemini_tts.ts
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
import type * as types from '@google/genai';
|
|
5
5
|
import { GoogleGenAI } from '@google/genai';
|
|
6
6
|
import {
|
|
7
|
+
type APIConnectOptions,
|
|
7
8
|
APIConnectionError,
|
|
8
9
|
APIStatusError,
|
|
9
10
|
AudioByteStream,
|
|
@@ -138,8 +139,12 @@ export class TTS extends tts.TTS {
|
|
|
138
139
|
this.#client = new GoogleGenAI(clientOptions);
|
|
139
140
|
}
|
|
140
141
|
|
|
141
|
-
synthesize(
|
|
142
|
-
|
|
142
|
+
synthesize(
|
|
143
|
+
text: string,
|
|
144
|
+
connOptions?: APIConnectOptions,
|
|
145
|
+
abortSignal?: AbortSignal,
|
|
146
|
+
): ChunkedStream {
|
|
147
|
+
return new ChunkedStream(text, this, connOptions, abortSignal);
|
|
143
148
|
}
|
|
144
149
|
|
|
145
150
|
/**
|
|
@@ -170,8 +175,13 @@ export class ChunkedStream extends tts.ChunkedStream {
|
|
|
170
175
|
#tts: TTS;
|
|
171
176
|
label = 'google.gemini.ChunkedStream';
|
|
172
177
|
|
|
173
|
-
constructor(
|
|
174
|
-
|
|
178
|
+
constructor(
|
|
179
|
+
inputText: string,
|
|
180
|
+
tts: TTS,
|
|
181
|
+
connOptions?: APIConnectOptions,
|
|
182
|
+
abortSignal?: AbortSignal,
|
|
183
|
+
) {
|
|
184
|
+
super(inputText, tts, connOptions, abortSignal);
|
|
175
185
|
this.#tts = tts;
|
|
176
186
|
}
|
|
177
187
|
|
|
@@ -188,6 +198,7 @@ export class ChunkedStream extends tts.ChunkedStream {
|
|
|
188
198
|
},
|
|
189
199
|
},
|
|
190
200
|
},
|
|
201
|
+
abortSignal: this.abortSignal,
|
|
191
202
|
};
|
|
192
203
|
|
|
193
204
|
let inputText = this.inputText;
|
|
@@ -213,6 +224,9 @@ export class ChunkedStream extends tts.ChunkedStream {
|
|
|
213
224
|
await this.#processResponse(response, bstream, requestId);
|
|
214
225
|
}
|
|
215
226
|
} catch (error: unknown) {
|
|
227
|
+
if (error instanceof Error && error.name === 'AbortError') {
|
|
228
|
+
return;
|
|
229
|
+
}
|
|
216
230
|
if (isAPIError(error)) throw error;
|
|
217
231
|
|
|
218
232
|
const err = error as {
|