kimaki 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -129,7 +129,7 @@ async function run({ restart, addChannels }) {
129
129
  '2. Click "Reset Token" to generate a new bot token (in case of errors try again)\n' +
130
130
  "3. Copy the token (you won't be able to see it again!)", 'Step 3: Get Bot Token');
131
131
  const tokenInput = await password({
132
- message: 'Enter your Discord Bot Token (will be hidden):',
132
+ message: 'Enter your Discord Bot Token (from "Bot" section - click "Reset Token" if needed):',
133
133
  validate(value) {
134
134
  if (!value)
135
135
  return 'Bot token is required';
@@ -142,6 +142,24 @@ async function run({ restart, addChannels }) {
142
142
  process.exit(0);
143
143
  }
144
144
  token = tokenInput;
145
+ note(`You can get a Gemini api Key at https://aistudio.google.com/apikey`, `Gemini API Key`);
146
+ const geminiApiKey = await password({
147
+ message: 'Enter your Gemini API Key for voice channels and audio transcription (optional, press Enter to skip):',
148
+ validate(value) {
149
+ if (value && value.length < 10)
150
+ return 'Invalid API key format';
151
+ return undefined;
152
+ },
153
+ });
154
+ if (isCancel(geminiApiKey)) {
155
+ cancel('Setup cancelled');
156
+ process.exit(0);
157
+ }
158
+ // Store API key in database
159
+ if (geminiApiKey) {
160
+ db.prepare('INSERT OR REPLACE INTO bot_api_keys (app_id, gemini_api_key) VALUES (?, ?)').run(appId, geminiApiKey || null);
161
+ note('API key saved successfully', 'API Key Stored');
162
+ }
145
163
  note(`Bot install URL:\n${generateBotInstallUrl({ clientId: appId })}\n\nYou MUST install the bot in your Discord server before continuing.`, 'Step 4: Install Bot to Server');
146
164
  const installed = await text({
147
165
  message: 'Press Enter AFTER you have installed the bot in your server:',
@@ -78,7 +78,7 @@ async function createUserAudioLogStream(guildId, channelId) {
78
78
  }
79
79
  }
80
80
  // Set up voice handling for a connection (called once per connection)
81
- async function setupVoiceHandling({ connection, guildId, channelId, }) {
81
+ async function setupVoiceHandling({ connection, guildId, channelId, appId, }) {
82
82
  voiceLogger.log(`Setting up voice handling for guild ${guildId}, channel ${channelId}`);
83
83
  // Check if this voice channel has an associated directory
84
84
  const channelDirRow = getDatabase()
@@ -98,11 +98,17 @@ async function setupVoiceHandling({ connection, guildId, channelId, }) {
98
98
  }
99
99
  // Create user audio stream for debugging
100
100
  voiceData.userAudioStream = await createUserAudioLogStream(guildId, channelId);
101
+ // Get API keys from database
102
+ const apiKeys = getDatabase()
103
+ .prepare('SELECT gemini_api_key FROM bot_api_keys WHERE app_id = ?')
104
+ .get(appId);
101
105
  // Create GenAI worker
102
106
  const genAiWorker = await createGenAIWorker({
103
107
  directory,
104
108
  guildId,
105
109
  channelId,
110
+ appId,
111
+ geminiApiKey: apiKeys?.gemini_api_key,
106
112
  systemMessage: dedent `
107
113
  You are Kimaki, an AI similar to Jarvis: you help your user (an engineer) controlling his coding agent, just like Jarvis controls Ironman armor and machines. Speak fast.
108
114
 
@@ -347,6 +353,13 @@ export function getDatabase() {
347
353
  channel_type TEXT NOT NULL,
348
354
  created_at DATETIME DEFAULT CURRENT_TIMESTAMP
349
355
  )
356
+ `);
357
+ db.exec(`
358
+ CREATE TABLE IF NOT EXISTS bot_api_keys (
359
+ app_id TEXT PRIMARY KEY,
360
+ gemini_api_key TEXT,
361
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP
362
+ )
350
363
  `);
351
364
  }
352
365
  return db;
@@ -458,7 +471,7 @@ async function waitForServer(port, maxAttempts = 30) {
458
471
  }
459
472
  throw new Error(`Server did not start on port ${port} after ${maxAttempts} seconds`);
460
473
  }
461
- async function processVoiceAttachment({ message, thread, projectDirectory, isNewThread = false, }) {
474
+ async function processVoiceAttachment({ message, thread, projectDirectory, isNewThread = false, appId, }) {
462
475
  const audioAttachment = Array.from(message.attachments.values()).find((attachment) => attachment.contentType?.startsWith('audio/'));
463
476
  if (!audioAttachment)
464
477
  return null;
@@ -488,9 +501,20 @@ async function processVoiceAttachment({ message, thread, projectDirectory, isNew
488
501
  voiceLogger.log(`Could not get project tree:`, e);
489
502
  }
490
503
  }
504
+ // Get Gemini API key from database if appId is provided
505
+ let geminiApiKey;
506
+ if (appId) {
507
+ const apiKeys = getDatabase()
508
+ .prepare('SELECT gemini_api_key FROM bot_api_keys WHERE app_id = ?')
509
+ .get(appId);
510
+ if (apiKeys?.gemini_api_key) {
511
+ geminiApiKey = apiKeys.gemini_api_key;
512
+ }
513
+ }
491
514
  const transcription = await transcribeAudio({
492
515
  audio: audioBuffer,
493
516
  prompt: transcriptionPrompt,
517
+ geminiApiKey,
494
518
  });
495
519
  voiceLogger.log(`Transcription successful: "${transcription.slice(0, 50)}${transcription.length > 50 ? '...' : ''}"`);
496
520
  // Update thread name with transcribed content only for new threads
@@ -1233,6 +1257,7 @@ export async function startDiscordBot({ token, appId, discordClient, }) {
1233
1257
  message,
1234
1258
  thread,
1235
1259
  projectDirectory,
1260
+ appId: currentAppId,
1236
1261
  });
1237
1262
  if (transcription) {
1238
1263
  messageContent = transcription;
@@ -1291,6 +1316,7 @@ export async function startDiscordBot({ token, appId, discordClient, }) {
1291
1316
  thread,
1292
1317
  projectDirectory,
1293
1318
  isNewThread: true,
1319
+ appId: currentAppId,
1294
1320
  });
1295
1321
  if (transcription) {
1296
1322
  messageContent = transcription;
@@ -1651,6 +1677,7 @@ export async function startDiscordBot({ token, appId, discordClient, }) {
1651
1677
  connection,
1652
1678
  guildId: newState.guild.id,
1653
1679
  channelId: voiceChannel.id,
1680
+ appId: currentAppId,
1654
1681
  });
1655
1682
  // Handle connection state changes
1656
1683
  connection.on(VoiceConnectionStatus.Disconnected, async () => {
@@ -98,6 +98,8 @@ export function createGenAIWorker(options) {
98
98
  systemMessage: options.systemMessage,
99
99
  guildId: options.guildId,
100
100
  channelId: options.channelId,
101
+ appId: options.appId,
102
+ geminiApiKey: options.geminiApiKey,
101
103
  };
102
104
  worker.postMessage(initMessage);
103
105
  });
@@ -210,6 +210,7 @@ parentPort.on('message', async (message) => {
210
210
  session = await startGenAiSession({
211
211
  tools,
212
212
  systemMessage: message.systemMessage,
213
+ geminiApiKey: message.geminiApiKey,
213
214
  onAssistantAudioChunk({ data }) {
214
215
  // Write to audio log if enabled
215
216
  if (audioLogStream && !audioLogStream.destroyed) {
package/dist/genai.js CHANGED
@@ -68,7 +68,7 @@ function defaultAudioChunkHandler({ data, mimeType, }) {
68
68
  const buffer = convertToWav(audioParts, mimeType);
69
69
  saveBinaryFile(fileName, buffer);
70
70
  }
71
- export async function startGenAiSession({ onAssistantAudioChunk, onAssistantStartSpeaking, onAssistantStopSpeaking, onAssistantInterruptSpeaking, systemMessage, tools, } = {}) {
71
+ export async function startGenAiSession({ onAssistantAudioChunk, onAssistantStartSpeaking, onAssistantStopSpeaking, onAssistantInterruptSpeaking, systemMessage, tools, geminiApiKey, } = {}) {
72
72
  let session = undefined;
73
73
  const callableTools = [];
74
74
  let isAssistantSpeaking = false;
@@ -161,8 +161,13 @@ export async function startGenAiSession({ onAssistantAudioChunk, onAssistantStar
161
161
  }
162
162
  }
163
163
  }
164
+ const apiKey = geminiApiKey || process.env.GEMINI_API_KEY;
165
+ if (!apiKey) {
166
+ genaiLogger.error('No Gemini API key provided');
167
+ throw new Error('Gemini API key is required for voice interactions');
168
+ }
164
169
  const ai = new GoogleGenAI({
165
- apiKey: process.env.GEMINI_API_KEY,
170
+ apiKey,
166
171
  });
167
172
  const model = 'models/gemini-2.5-flash-live-preview';
168
173
  session = await ai.live.connect({
package/dist/voice.js CHANGED
@@ -1,25 +1,60 @@
1
- import { openai } from '@ai-sdk/openai';
2
- import { experimental_transcribe as transcribe } from 'ai';
1
+ import { GoogleGenAI } from '@google/genai';
3
2
  import { createLogger } from './logger.js';
4
3
  const voiceLogger = createLogger('VOICE');
5
- export async function transcribeAudio({ audio, prompt, language, temperature, }) {
4
+ export async function transcribeAudio({ audio, prompt, language, temperature, geminiApiKey, }) {
6
5
  try {
7
- const result = await transcribe({
8
- model: openai.transcription('whisper-1'),
9
- audio,
10
- ...(prompt || language || temperature !== undefined
11
- ? {
12
- providerOptions: {
13
- openai: {
14
- ...(prompt && { prompt }),
15
- ...(language && { language }),
16
- ...(temperature !== undefined && { temperature }),
6
+ // Use provided API key or fall back to environment variable
7
+ const apiKey = geminiApiKey || process.env.GEMINI_API_KEY;
8
+ if (!apiKey) {
9
+ throw new Error('Gemini API key is required for audio transcription');
10
+ }
11
+ // Initialize Google Generative AI
12
+ const genAI = new GoogleGenAI({ apiKey });
13
+ // Convert audio to base64 string if it's not already
14
+ let audioBase64;
15
+ if (typeof audio === 'string') {
16
+ audioBase64 = audio;
17
+ }
18
+ else if (audio instanceof Buffer) {
19
+ audioBase64 = audio.toString('base64');
20
+ }
21
+ else if (audio instanceof Uint8Array) {
22
+ audioBase64 = Buffer.from(audio).toString('base64');
23
+ }
24
+ else if (audio instanceof ArrayBuffer) {
25
+ audioBase64 = Buffer.from(audio).toString('base64');
26
+ }
27
+ else {
28
+ throw new Error('Invalid audio format');
29
+ }
30
+ // Build the transcription prompt
31
+ let transcriptionPrompt = `Please transcribe this audio file accurately. Here is some relevant information and filenames that may be present in the audio:\n<context>\n${prompt}\n</context>\n`;
32
+ if (language) {
33
+ transcriptionPrompt += `\nThe audio is in ${language}.`;
34
+ }
35
+ // Create the content with audio using the inline data format
36
+ const response = await genAI.models.generateContent({
37
+ model: 'gemini-2.5-flash',
38
+ contents: [
39
+ {
40
+ parts: [
41
+ { text: transcriptionPrompt },
42
+ {
43
+ inlineData: {
44
+ data: audioBase64,
45
+ mimeType: 'audio/mpeg',
46
+ },
17
47
  },
18
- },
48
+ ],
49
+ },
50
+ ],
51
+ config: temperature !== undefined
52
+ ? {
53
+ temperature,
19
54
  }
20
- : {}),
55
+ : undefined,
21
56
  });
22
- return result.text;
57
+ return response.text || '';
23
58
  }
24
59
  catch (error) {
25
60
  voiceLogger.error('Failed to transcribe audio:', error);
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "kimaki",
3
3
  "module": "index.ts",
4
4
  "type": "module",
5
- "version": "0.1.3",
5
+ "version": "0.1.4",
6
6
  "repository": "https://github.com/remorses/kimaki",
7
7
  "bin": "bin.js",
8
8
  "files": [
@@ -19,7 +19,7 @@
19
19
  "tsx": "^4.20.5"
20
20
  },
21
21
  "dependencies": {
22
- "@ai-sdk/openai": "^2.0.23",
22
+ "@ai-sdk/google": "^2.0.16",
23
23
  "@clack/prompts": "^0.11.0",
24
24
  "@discordjs/opus": "^0.10.0",
25
25
  "@discordjs/voice": "^0.19.0",
package/src/cli.ts CHANGED
@@ -223,9 +223,9 @@ async function run({ restart, addChannels }: CliOptions) {
223
223
  "3. Copy the token (you won't be able to see it again!)",
224
224
  'Step 3: Get Bot Token',
225
225
  )
226
-
227
226
  const tokenInput = await password({
228
- message: 'Enter your Discord Bot Token (will be hidden):',
227
+ message:
228
+ 'Enter your Discord Bot Token (from "Bot" section - click "Reset Token" if needed):',
229
229
  validate(value) {
230
230
  if (!value) return 'Bot token is required'
231
231
  if (value.length < 50) return 'Invalid token format (too short)'
@@ -238,6 +238,33 @@ async function run({ restart, addChannels }: CliOptions) {
238
238
  }
239
239
  token = tokenInput
240
240
 
241
+ note(
242
+ `You can get a Gemini api Key at https://aistudio.google.com/apikey`,
243
+ `Gemini API Key`,
244
+ )
245
+
246
+ const geminiApiKey = await password({
247
+ message:
248
+ 'Enter your Gemini API Key for voice channels and audio transcription (optional, press Enter to skip):',
249
+ validate(value) {
250
+ if (value && value.length < 10) return 'Invalid API key format'
251
+ return undefined
252
+ },
253
+ })
254
+
255
+ if (isCancel(geminiApiKey)) {
256
+ cancel('Setup cancelled')
257
+ process.exit(0)
258
+ }
259
+
260
+ // Store API key in database
261
+ if (geminiApiKey) {
262
+ db.prepare(
263
+ 'INSERT OR REPLACE INTO bot_api_keys (app_id, gemini_api_key) VALUES (?, ?)',
264
+ ).run(appId, geminiApiKey || null)
265
+ note('API key saved successfully', 'API Key Stored')
266
+ }
267
+
241
268
  note(
242
269
  `Bot install URL:\n${generateBotInstallUrl({ clientId: appId })}\n\nYou MUST install the bot in your Discord server before continuing.`,
243
270
  'Step 4: Install Bot to Server',
package/src/discordBot.ts CHANGED
@@ -152,10 +152,12 @@ async function setupVoiceHandling({
152
152
  connection,
153
153
  guildId,
154
154
  channelId,
155
+ appId,
155
156
  }: {
156
157
  connection: VoiceConnection
157
158
  guildId: string
158
159
  channelId: string
160
+ appId: string
159
161
  }) {
160
162
  voiceLogger.log(
161
163
  `Setting up voice handling for guild ${guildId}, channel ${channelId}`,
@@ -188,11 +190,18 @@ async function setupVoiceHandling({
188
190
  // Create user audio stream for debugging
189
191
  voiceData.userAudioStream = await createUserAudioLogStream(guildId, channelId)
190
192
 
193
+ // Get API keys from database
194
+ const apiKeys = getDatabase()
195
+ .prepare('SELECT gemini_api_key FROM bot_api_keys WHERE app_id = ?')
196
+ .get(appId) as { gemini_api_key: string | null } | undefined
197
+
191
198
  // Create GenAI worker
192
199
  const genAiWorker = await createGenAIWorker({
193
200
  directory,
194
201
  guildId,
195
202
  channelId,
203
+ appId,
204
+ geminiApiKey: apiKeys?.gemini_api_key,
196
205
  systemMessage: dedent`
197
206
  You are Kimaki, an AI similar to Jarvis: you help your user (an engineer) controlling his coding agent, just like Jarvis controls Ironman armor and machines. Speak fast.
198
207
 
@@ -480,6 +489,14 @@ export function getDatabase(): Database.Database {
480
489
  created_at DATETIME DEFAULT CURRENT_TIMESTAMP
481
490
  )
482
491
  `)
492
+
493
+ db.exec(`
494
+ CREATE TABLE IF NOT EXISTS bot_api_keys (
495
+ app_id TEXT PRIMARY KEY,
496
+ gemini_api_key TEXT,
497
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP
498
+ )
499
+ `)
483
500
  }
484
501
 
485
502
  return db
@@ -614,11 +631,13 @@ async function processVoiceAttachment({
614
631
  thread,
615
632
  projectDirectory,
616
633
  isNewThread = false,
634
+ appId,
617
635
  }: {
618
636
  message: Message
619
637
  thread: ThreadChannel
620
638
  projectDirectory?: string
621
639
  isNewThread?: boolean
640
+ appId?: string
622
641
  }): Promise<string | null> {
623
642
  const audioAttachment = Array.from(message.attachments.values()).find(
624
643
  (attachment) => attachment.contentType?.startsWith('audio/'),
@@ -660,9 +679,22 @@ async function processVoiceAttachment({
660
679
  }
661
680
  }
662
681
 
682
+ // Get Gemini API key from database if appId is provided
683
+ let geminiApiKey: string | undefined
684
+ if (appId) {
685
+ const apiKeys = getDatabase()
686
+ .prepare('SELECT gemini_api_key FROM bot_api_keys WHERE app_id = ?')
687
+ .get(appId) as { gemini_api_key: string | null } | undefined
688
+
689
+ if (apiKeys?.gemini_api_key) {
690
+ geminiApiKey = apiKeys.gemini_api_key
691
+ }
692
+ }
693
+
663
694
  const transcription = await transcribeAudio({
664
695
  audio: audioBuffer,
665
696
  prompt: transcriptionPrompt,
697
+ geminiApiKey,
666
698
  })
667
699
 
668
700
  voiceLogger.log(
@@ -1635,6 +1667,7 @@ export async function startDiscordBot({
1635
1667
  message,
1636
1668
  thread,
1637
1669
  projectDirectory,
1670
+ appId: currentAppId,
1638
1671
  })
1639
1672
  if (transcription) {
1640
1673
  messageContent = transcription
@@ -1727,6 +1760,7 @@ export async function startDiscordBot({
1727
1760
  thread,
1728
1761
  projectDirectory,
1729
1762
  isNewThread: true,
1763
+ appId: currentAppId,
1730
1764
  })
1731
1765
  if (transcription) {
1732
1766
  messageContent = transcription
@@ -2224,6 +2258,7 @@ export async function startDiscordBot({
2224
2258
  connection,
2225
2259
  guildId: newState.guild.id,
2226
2260
  channelId: voiceChannel.id,
2261
+ appId: currentAppId!,
2227
2262
  })
2228
2263
 
2229
2264
  // Handle connection state changes
@@ -11,6 +11,8 @@ export interface GenAIWorkerOptions {
11
11
  systemMessage?: string
12
12
  guildId: string
13
13
  channelId: string
14
+ appId: string
15
+ geminiApiKey?: string | null
14
16
  onAssistantOpusPacket: (packet: ArrayBuffer) => void
15
17
  onAssistantStartSpeaking?: () => void
16
18
  onAssistantStopSpeaking?: () => void
@@ -146,6 +148,8 @@ export function createGenAIWorker(
146
148
  systemMessage: options.systemMessage,
147
149
  guildId: options.guildId,
148
150
  channelId: options.channelId,
151
+ appId: options.appId,
152
+ geminiApiKey: options.geminiApiKey,
149
153
  }
150
154
  worker.postMessage(initMessage)
151
155
  })
@@ -271,6 +271,7 @@ parentPort.on('message', async (message: WorkerInMessage) => {
271
271
  session = await startGenAiSession({
272
272
  tools,
273
273
  systemMessage: message.systemMessage,
274
+ geminiApiKey: message.geminiApiKey,
274
275
  onAssistantAudioChunk({ data }) {
275
276
  // Write to audio log if enabled
276
277
  if (audioLogStream && !audioLogStream.destroyed) {
package/src/genai.ts CHANGED
@@ -113,6 +113,7 @@ export async function startGenAiSession({
113
113
  onAssistantInterruptSpeaking,
114
114
  systemMessage,
115
115
  tools,
116
+ geminiApiKey,
116
117
  }: {
117
118
  onAssistantAudioChunk?: (args: { data: Buffer; mimeType: string }) => void
118
119
  onAssistantStartSpeaking?: () => void
@@ -120,6 +121,7 @@ export async function startGenAiSession({
120
121
  onAssistantInterruptSpeaking?: () => void
121
122
  systemMessage?: string
122
123
  tools?: Record<string, AITool<any, any>>
124
+ geminiApiKey?: string | null
123
125
  } = {}) {
124
126
  let session: Session | undefined = undefined
125
127
  const callableTools: Array<CallableTool & { name: string }> = []
@@ -242,8 +244,15 @@ export async function startGenAiSession({
242
244
  }
243
245
  }
244
246
 
247
+ const apiKey = geminiApiKey || process.env.GEMINI_API_KEY
248
+
249
+ if (!apiKey) {
250
+ genaiLogger.error('No Gemini API key provided')
251
+ throw new Error('Gemini API key is required for voice interactions')
252
+ }
253
+
245
254
  const ai = new GoogleGenAI({
246
- apiKey: process.env.GEMINI_API_KEY,
255
+ apiKey,
247
256
  })
248
257
 
249
258
  const model = 'models/gemini-2.5-flash-live-preview'
package/src/voice.ts CHANGED
@@ -1,5 +1,4 @@
1
- import { openai } from '@ai-sdk/openai'
2
- import { experimental_transcribe as transcribe } from 'ai'
1
+ import { GoogleGenAI } from '@google/genai'
3
2
  import { createLogger } from './logger.js'
4
3
 
5
4
  const voiceLogger = createLogger('VOICE')
@@ -9,30 +8,70 @@ export async function transcribeAudio({
9
8
  prompt,
10
9
  language,
11
10
  temperature,
11
+ geminiApiKey,
12
12
  }: {
13
13
  audio: Buffer | Uint8Array | ArrayBuffer | string
14
14
  prompt?: string
15
15
  language?: string
16
16
  temperature?: number
17
+ geminiApiKey?: string
17
18
  }): Promise<string> {
18
19
  try {
19
- const result = await transcribe({
20
- model: openai.transcription('whisper-1'),
21
- audio,
22
- ...(prompt || language || temperature !== undefined
23
- ? {
24
- providerOptions: {
25
- openai: {
26
- ...(prompt && { prompt }),
27
- ...(language && { language }),
28
- ...(temperature !== undefined && { temperature }),
20
+ // Use provided API key or fall back to environment variable
21
+ const apiKey = geminiApiKey || process.env.GEMINI_API_KEY
22
+
23
+ if (!apiKey) {
24
+ throw new Error('Gemini API key is required for audio transcription')
25
+ }
26
+
27
+ // Initialize Google Generative AI
28
+ const genAI = new GoogleGenAI({ apiKey })
29
+
30
+ // Convert audio to base64 string if it's not already
31
+ let audioBase64: string
32
+ if (typeof audio === 'string') {
33
+ audioBase64 = audio
34
+ } else if (audio instanceof Buffer) {
35
+ audioBase64 = audio.toString('base64')
36
+ } else if (audio instanceof Uint8Array) {
37
+ audioBase64 = Buffer.from(audio).toString('base64')
38
+ } else if (audio instanceof ArrayBuffer) {
39
+ audioBase64 = Buffer.from(audio).toString('base64')
40
+ } else {
41
+ throw new Error('Invalid audio format')
42
+ }
43
+
44
+ // Build the transcription prompt
45
+ let transcriptionPrompt = `Please transcribe this audio file accurately. Here is some relevant information and filenames that may be present in the audio:\n<context>\n${prompt}\n</context>\n`
46
+ if (language) {
47
+ transcriptionPrompt += `\nThe audio is in ${language}.`
48
+ }
49
+
50
+ // Create the content with audio using the inline data format
51
+ const response = await genAI.models.generateContent({
52
+ model: 'gemini-2.5-flash',
53
+ contents: [
54
+ {
55
+ parts: [
56
+ { text: transcriptionPrompt },
57
+ {
58
+ inlineData: {
59
+ data: audioBase64,
60
+ mimeType: 'audio/mpeg',
29
61
  },
30
62
  },
31
- }
32
- : {}),
63
+ ],
64
+ },
65
+ ],
66
+ config:
67
+ temperature !== undefined
68
+ ? {
69
+ temperature,
70
+ }
71
+ : undefined,
33
72
  })
34
73
 
35
- return result.text
74
+ return response.text || ''
36
75
  } catch (error) {
37
76
  voiceLogger.error('Failed to transcribe audio:', error)
38
77
  throw new Error(
@@ -8,6 +8,8 @@ export type WorkerInMessage =
8
8
  systemMessage?: string
9
9
  guildId: string
10
10
  channelId: string
11
+ appId: string
12
+ geminiApiKey?: string | null
11
13
  }
12
14
  | {
13
15
  type: 'sendRealtimeInput'