@livekit/agents-plugin-google 1.0.0-next.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +89 -0
  3. package/dist/beta/gemini_tts.cjs +239 -0
  4. package/dist/beta/gemini_tts.cjs.map +1 -0
  5. package/dist/beta/gemini_tts.d.cts +47 -0
  6. package/dist/beta/gemini_tts.d.ts +47 -0
  7. package/dist/beta/gemini_tts.d.ts.map +1 -0
  8. package/dist/beta/gemini_tts.js +221 -0
  9. package/dist/beta/gemini_tts.js.map +1 -0
  10. package/dist/beta/gemini_tts.test.cjs +9 -0
  11. package/dist/beta/gemini_tts.test.cjs.map +1 -0
  12. package/dist/beta/gemini_tts.test.d.cts +2 -0
  13. package/dist/beta/gemini_tts.test.d.ts +2 -0
  14. package/dist/beta/gemini_tts.test.d.ts.map +1 -0
  15. package/dist/beta/gemini_tts.test.js +8 -0
  16. package/dist/beta/gemini_tts.test.js.map +1 -0
  17. package/dist/beta/index.cjs +42 -0
  18. package/dist/beta/index.cjs.map +1 -0
  19. package/dist/beta/index.d.cts +3 -0
  20. package/dist/beta/index.d.ts +3 -0
  21. package/dist/beta/index.d.ts.map +1 -0
  22. package/dist/beta/index.js +7 -0
  23. package/dist/beta/index.js.map +1 -0
  24. package/dist/beta/realtime/api_proto.cjs +17 -0
  25. package/dist/beta/realtime/api_proto.cjs.map +1 -0
  26. package/dist/beta/realtime/api_proto.d.cts +26 -0
  27. package/dist/beta/realtime/api_proto.d.ts +26 -0
  28. package/dist/beta/realtime/api_proto.d.ts.map +1 -0
  29. package/dist/beta/realtime/api_proto.js +1 -0
  30. package/dist/beta/realtime/api_proto.js.map +1 -0
  31. package/dist/beta/realtime/index.cjs +29 -0
  32. package/dist/beta/realtime/index.cjs.map +1 -0
  33. package/dist/beta/realtime/index.d.cts +3 -0
  34. package/dist/beta/realtime/index.d.ts +3 -0
  35. package/dist/beta/realtime/index.d.ts.map +1 -0
  36. package/dist/beta/realtime/index.js +5 -0
  37. package/dist/beta/realtime/index.js.map +1 -0
  38. package/dist/beta/realtime/realtime_api.cjs +993 -0
  39. package/dist/beta/realtime/realtime_api.cjs.map +1 -0
  40. package/dist/beta/realtime/realtime_api.d.cts +267 -0
  41. package/dist/beta/realtime/realtime_api.d.ts +267 -0
  42. package/dist/beta/realtime/realtime_api.d.ts.map +1 -0
  43. package/dist/beta/realtime/realtime_api.js +974 -0
  44. package/dist/beta/realtime/realtime_api.js.map +1 -0
  45. package/dist/index.cjs +58 -0
  46. package/dist/index.cjs.map +1 -0
  47. package/dist/index.d.cts +4 -0
  48. package/dist/index.d.ts +4 -0
  49. package/dist/index.d.ts.map +1 -0
  50. package/dist/index.js +20 -0
  51. package/dist/index.js.map +1 -0
  52. package/dist/llm.cjs +381 -0
  53. package/dist/llm.cjs.map +1 -0
  54. package/dist/llm.d.cts +82 -0
  55. package/dist/llm.d.ts +82 -0
  56. package/dist/llm.d.ts.map +1 -0
  57. package/dist/llm.js +362 -0
  58. package/dist/llm.js.map +1 -0
  59. package/dist/llm.test.cjs +8 -0
  60. package/dist/llm.test.cjs.map +1 -0
  61. package/dist/llm.test.d.cts +2 -0
  62. package/dist/llm.test.d.ts +2 -0
  63. package/dist/llm.test.d.ts.map +1 -0
  64. package/dist/llm.test.js +7 -0
  65. package/dist/llm.test.js.map +1 -0
  66. package/dist/models.cjs +17 -0
  67. package/dist/models.cjs.map +1 -0
  68. package/dist/models.d.cts +5 -0
  69. package/dist/models.d.ts +5 -0
  70. package/dist/models.d.ts.map +1 -0
  71. package/dist/models.js +1 -0
  72. package/dist/models.js.map +1 -0
  73. package/dist/tools.cjs +17 -0
  74. package/dist/tools.cjs.map +1 -0
  75. package/dist/tools.d.cts +3 -0
  76. package/dist/tools.d.ts +3 -0
  77. package/dist/tools.d.ts.map +1 -0
  78. package/dist/tools.js +1 -0
  79. package/dist/tools.js.map +1 -0
  80. package/dist/utils.cjs +137 -0
  81. package/dist/utils.cjs.map +1 -0
  82. package/dist/utils.d.cts +14 -0
  83. package/dist/utils.d.ts +14 -0
  84. package/dist/utils.d.ts.map +1 -0
  85. package/dist/utils.js +112 -0
  86. package/dist/utils.js.map +1 -0
  87. package/package.json +56 -0
  88. package/src/beta/gemini_tts.test.ts +11 -0
  89. package/src/beta/gemini_tts.ts +309 -0
  90. package/src/beta/index.ts +6 -0
  91. package/src/beta/realtime/api_proto.ts +41 -0
  92. package/src/beta/realtime/index.ts +5 -0
  93. package/src/beta/realtime/realtime_api.ts +1440 -0
  94. package/src/index.ts +20 -0
  95. package/src/llm.test.ts +10 -0
  96. package/src/llm.ts +463 -0
  97. package/src/models.ts +100 -0
  98. package/src/tools.ts +6 -0
  99. package/src/utils.ts +157 -0
@@ -0,0 +1,309 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import type * as types from '@google/genai';
5
+ import { GoogleGenAI } from '@google/genai';
6
+ import {
7
+ APIConnectionError,
8
+ APIStatusError,
9
+ AudioByteStream,
10
+ isAPIError,
11
+ shortuuid,
12
+ tts,
13
+ } from '@livekit/agents';
14
+ import type { AudioFrame } from '@livekit/rtc-node';
15
+
16
+ export type GeminiTTSModels = 'gemini-2.5-flash-preview-tts' | 'gemini-2.5-pro-preview-tts';
17
+
18
+ export type GeminiVoices =
19
+ | 'Zephyr'
20
+ | 'Puck'
21
+ | 'Charon'
22
+ | 'Kore'
23
+ | 'Fenrir'
24
+ | 'Leda'
25
+ | 'Orus'
26
+ | 'Aoede'
27
+ | 'Callirrhoe'
28
+ | 'Autonoe'
29
+ | 'Enceladus'
30
+ | 'Iapetus'
31
+ | 'Umbriel'
32
+ | 'Algieba'
33
+ | 'Despina'
34
+ | 'Erinome'
35
+ | 'Algenib'
36
+ | 'Rasalgethi'
37
+ | 'Laomedeia'
38
+ | 'Achernar'
39
+ | 'Alnilam'
40
+ | 'Schedar'
41
+ | 'Gacrux'
42
+ | 'Pulcherrima'
43
+ | 'Achird'
44
+ | 'Zubenelgenubi'
45
+ | 'Vindemiatrix'
46
+ | 'Sadachbia'
47
+ | 'Sadaltager'
48
+ | 'Sulafat';
49
+
50
+ const DEFAULT_MODEL: GeminiTTSModels = 'gemini-2.5-flash-preview-tts';
51
+ const DEFAULT_VOICE: GeminiVoices = 'Kore';
52
+ const DEFAULT_SAMPLE_RATE = 24000; // not configurable
53
+ const NUM_CHANNELS = 1;
54
+ const DEFAULT_INSTRUCTIONS = "Say the text with a proper tone, don't omit or add any words";
55
+
56
+ export interface TTSOptions {
57
+ model: GeminiTTSModels | string;
58
+ voiceName: GeminiVoices | string;
59
+ vertexai: boolean;
60
+ project?: string;
61
+ location?: string;
62
+ instructions?: string;
63
+ }
64
+
65
+ export class TTS extends tts.TTS {
66
+ #opts: TTSOptions;
67
+ #client: GoogleGenAI;
68
+ label = 'google.gemini.TTS';
69
+
70
+ /**
71
+ * Create a new instance of Gemini TTS.
72
+ *
73
+ * Environment Requirements:
74
+ * - For VertexAI: Set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to the path of the service account key file.
75
+ * - For Google Gemini API: Set the `apiKey` argument or the `GOOGLE_API_KEY` environment variable.
76
+ *
77
+ * @param opts - Configuration options for Gemini TTS
78
+ */
79
+ constructor({
80
+ model = DEFAULT_MODEL,
81
+ voiceName = DEFAULT_VOICE,
82
+ apiKey,
83
+ vertexai,
84
+ project,
85
+ location,
86
+ instructions,
87
+ }: Partial<TTSOptions & { apiKey: string }> = {}) {
88
+ super(DEFAULT_SAMPLE_RATE, NUM_CHANNELS, { streaming: false });
89
+
90
+ const gcpProject: string | undefined = project || process.env.GOOGLE_CLOUD_PROJECT;
91
+ const gcpLocation: string | undefined =
92
+ location || process.env.GOOGLE_CLOUD_LOCATION || 'us-central1';
93
+ const useVertexai = vertexai ?? process.env.GOOGLE_GENAI_USE_VERTEXAI === 'true';
94
+ const geminiApiKey = apiKey || process.env.GOOGLE_API_KEY;
95
+
96
+ let finalProject: string | undefined = gcpProject;
97
+ let finalLocation: string | undefined = gcpLocation;
98
+ let finalApiKey: string | undefined = geminiApiKey;
99
+
100
+ if (useVertexai) {
101
+ if (!finalProject) {
102
+ throw new APIConnectionError({
103
+ message:
104
+ 'Project ID is required for Vertex AI. Set via project option or GOOGLE_CLOUD_PROJECT environment variable',
105
+ });
106
+ }
107
+ finalApiKey = undefined;
108
+ } else {
109
+ finalProject = undefined;
110
+ finalLocation = undefined;
111
+ if (!finalApiKey) {
112
+ throw new APIConnectionError({
113
+ message:
114
+ 'API key is required for Google API either via apiKey or GOOGLE_API_KEY environment variable',
115
+ });
116
+ }
117
+ }
118
+
119
+ this.#opts = {
120
+ model,
121
+ voiceName,
122
+ vertexai: useVertexai,
123
+ project: finalProject,
124
+ location: finalLocation,
125
+ instructions: instructions ?? DEFAULT_INSTRUCTIONS,
126
+ };
127
+
128
+ const clientOptions: types.GoogleGenAIOptions = useVertexai
129
+ ? {
130
+ vertexai: true,
131
+ project: finalProject,
132
+ location: finalLocation,
133
+ }
134
+ : {
135
+ apiKey: finalApiKey,
136
+ };
137
+
138
+ this.#client = new GoogleGenAI(clientOptions);
139
+ }
140
+
141
+ synthesize(text: string): ChunkedStream {
142
+ return new ChunkedStream(text, this);
143
+ }
144
+
145
+ /**
146
+ * Update the TTS options.
147
+ *
148
+ * @param opts - Options to update
149
+ */
150
+ updateOptions(opts: { voiceName?: GeminiVoices | string }) {
151
+ if (opts.voiceName !== undefined) {
152
+ this.#opts.voiceName = opts.voiceName;
153
+ }
154
+ }
155
+
156
+ stream(): tts.SynthesizeStream {
157
+ throw new Error('Streaming is not supported on Gemini TTS');
158
+ }
159
+
160
+ get opts(): TTSOptions {
161
+ return this.#opts;
162
+ }
163
+
164
+ get client(): GoogleGenAI {
165
+ return this.#client;
166
+ }
167
+ }
168
+
169
+ export class ChunkedStream extends tts.ChunkedStream {
170
+ #tts: TTS;
171
+ label = 'google.gemini.ChunkedStream';
172
+
173
+ constructor(inputText: string, tts: TTS) {
174
+ super(inputText, tts);
175
+ this.#tts = tts;
176
+ }
177
+
178
+ protected async run() {
179
+ const requestId = shortuuid();
180
+ const bstream = new AudioByteStream(this.#tts.sampleRate, this.#tts.numChannels);
181
+
182
+ const config: types.GenerateContentConfig = {
183
+ responseModalities: ['AUDIO'],
184
+ speechConfig: {
185
+ voiceConfig: {
186
+ prebuiltVoiceConfig: {
187
+ voiceName: this.#tts.opts.voiceName,
188
+ },
189
+ },
190
+ },
191
+ };
192
+
193
+ let inputText = this.inputText;
194
+ if (this.#tts.opts.instructions) {
195
+ inputText = `${this.#tts.opts.instructions}:\n"${inputText}"`;
196
+ }
197
+
198
+ const contents: types.Content[] = [
199
+ {
200
+ role: 'user',
201
+ parts: [{ text: inputText }],
202
+ },
203
+ ];
204
+
205
+ const responseStream = await this.#tts.client.models.generateContentStream({
206
+ model: this.#tts.opts.model,
207
+ contents,
208
+ config,
209
+ });
210
+
211
+ try {
212
+ for await (const response of responseStream) {
213
+ await this.#processResponse(response, bstream, requestId);
214
+ }
215
+ } catch (error: unknown) {
216
+ if (isAPIError(error)) throw error;
217
+
218
+ const err = error as {
219
+ code?: number;
220
+ message?: string;
221
+ status?: string;
222
+ type?: string;
223
+ };
224
+
225
+ if (err.code && err.code >= 400 && err.code < 500) {
226
+ if (err.code === 429) {
227
+ throw new APIStatusError({
228
+ message: `Gemini TTS: Rate limit error - ${err.message || 'Unknown error'}`,
229
+ options: {
230
+ statusCode: 429,
231
+ retryable: true,
232
+ },
233
+ });
234
+ } else {
235
+ throw new APIStatusError({
236
+ message: `Gemini TTS: Client error (${err.code}) - ${err.message || 'Unknown error'}`,
237
+ options: {
238
+ statusCode: err.code,
239
+ retryable: false,
240
+ },
241
+ });
242
+ }
243
+ }
244
+
245
+ if (err.code && err.code >= 500) {
246
+ throw new APIStatusError({
247
+ message: `Gemini TTS: Server error (${err.code}) - ${err.message || 'Unknown error'}`,
248
+ options: {
249
+ statusCode: err.code,
250
+ retryable: true,
251
+ },
252
+ });
253
+ }
254
+
255
+ throw new APIConnectionError({
256
+ message: `Gemini TTS: Connection error - ${err.message || 'Unknown error'}`,
257
+ options: { retryable: true },
258
+ });
259
+ } finally {
260
+ this.queue.close();
261
+ }
262
+ }
263
+
264
+ async #processResponse(
265
+ response: types.GenerateContentResponse,
266
+ bstream: AudioByteStream,
267
+ requestId: string,
268
+ ) {
269
+ if (!response.candidates || response.candidates.length === 0) {
270
+ return;
271
+ }
272
+
273
+ const candidate = response.candidates[0];
274
+ if (!candidate || !candidate.content?.parts) {
275
+ return;
276
+ }
277
+
278
+ let lastFrame: AudioFrame | undefined;
279
+ const sendLastFrame = (final: boolean) => {
280
+ if (lastFrame) {
281
+ this.queue.put({
282
+ requestId,
283
+ frame: lastFrame,
284
+ segmentId: requestId,
285
+ final,
286
+ });
287
+ lastFrame = undefined;
288
+ }
289
+ };
290
+
291
+ for (const part of candidate.content.parts) {
292
+ if (part.inlineData?.data && part.inlineData.mimeType?.startsWith('audio/')) {
293
+ const audioBuffer = Buffer.from(part.inlineData.data, 'base64');
294
+
295
+ for (const frame of bstream.write(audioBuffer)) {
296
+ sendLastFrame(false);
297
+ lastFrame = frame;
298
+ }
299
+ }
300
+ }
301
+
302
+ for (const frame of bstream.flush()) {
303
+ sendLastFrame(false);
304
+ lastFrame = frame;
305
+ }
306
+
307
+ sendLastFrame(true);
308
+ }
309
+ }
@@ -0,0 +1,6 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+
5
+ export { TTS, type GeminiTTSModels, type GeminiVoices, type TTSOptions } from './gemini_tts.js';
6
+ export * as realtime from './realtime/index.js';
@@ -0,0 +1,41 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import type * as types from '@google/genai';
5
+
6
+ /**
7
+ * Supported Google Live API models
8
+ */
9
+ export type LiveAPIModels =
10
+ | 'gemini-2.0-flash-exp'
11
+ // models supported on Gemini API
12
+ | 'gemini-2.0-flash-live-001'
13
+ | 'gemini-2.5-flash-preview-native-audio-dialog'
14
+ | 'gemini-2.5-flash-exp-native-audio-thinking-dialog';
15
+
16
+ /**
17
+ * Available voice options for Google Realtime API
18
+ */
19
+ export type Voice = 'Puck' | 'Charon' | 'Kore' | 'Fenrir' | 'Aoede' | 'Leda' | 'Orus' | 'Zephyr';
20
+
21
+ /**
22
+ * Union type for all possible client events
23
+ */
24
+
25
+ export type ClientEvents =
26
+ | {
27
+ type: 'content';
28
+ value: types.LiveClientContent;
29
+ }
30
+ | {
31
+ type: 'realtime_input';
32
+ value: types.LiveClientRealtimeInput;
33
+ }
34
+ | {
35
+ type: 'tool_response';
36
+ value: types.LiveClientToolResponse;
37
+ }
38
+ | {
39
+ type: 'function_response';
40
+ value: types.FunctionResponse;
41
+ };
@@ -0,0 +1,5 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ export type { ClientEvents, LiveAPIModels, Voice } from './api_proto.js';
5
+ export { RealtimeModel } from './realtime_api.js';