kimaki 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import { cac } from 'cac';
3
- import { intro, outro, text, password, note, cancel, isCancel, log, multiselect, spinner, } from '@clack/prompts';
4
- import { generateBotInstallUrl } from './utils.js';
3
+ import { intro, outro, text, password, note, cancel, isCancel, confirm, log, multiselect, spinner, } from '@clack/prompts';
4
+ import { deduplicateByKey, generateBotInstallUrl } from './utils.js';
5
5
  import { getChannelsWithDescriptions, createDiscordClient, getDatabase, startDiscordBot, initializeOpencodeForDirectory, } from './discordBot.js';
6
6
  import { Events, ChannelType, REST, Routes, SlashCommandBuilder, } from 'discord.js';
7
7
  import path from 'node:path';
@@ -74,7 +74,6 @@ async function ensureKimakiCategory(guild) {
74
74
  }
75
75
  async function run({ restart, addChannels }) {
76
76
  const forceSetup = Boolean(restart);
77
- const shouldAddChannels = Boolean(addChannels);
78
77
  intro('🤖 Discord Bot Setup');
79
78
  const db = getDatabase();
80
79
  let appId;
@@ -82,6 +81,7 @@ async function run({ restart, addChannels }) {
82
81
  const existingBot = db
83
82
  .prepare('SELECT app_id, token FROM bot_tokens ORDER BY created_at DESC LIMIT 1')
84
83
  .get();
84
+ const shouldAddChannels = !existingBot?.token || forceSetup || Boolean(addChannels);
85
85
  if (existingBot && !forceSetup) {
86
86
  appId = existingBot.app_id;
87
87
  token = existingBot.token;
@@ -112,10 +112,24 @@ async function run({ restart, addChannels }) {
112
112
  }
113
113
  appId = appIdInput;
114
114
  note('1. Go to the "Bot" section in the left sidebar\n' +
115
- '2. Click "Reset Token" to generate a new bot token\n' +
116
- "3. Copy the token (you won't be able to see it again!)", 'Step 2: Get Bot Token');
115
+ '2. Scroll down to "Privileged Gateway Intents"\n' +
116
+ '3. Enable these intents by toggling them ON:\n' +
117
+ ' • SERVER MEMBERS INTENT\n' +
118
+ ' • MESSAGE CONTENT INTENT\n' +
119
+ '4. Click "Save Changes" at the bottom', 'Step 2: Enable Required Intents');
120
+ const intentsConfirmed = await text({
121
+ message: 'Press Enter after enabling both intents:',
122
+ placeholder: 'Enter',
123
+ });
124
+ if (isCancel(intentsConfirmed)) {
125
+ cancel('Setup cancelled');
126
+ process.exit(0);
127
+ }
128
+ note('1. Still in the "Bot" section\n' +
129
+ '2. Click "Reset Token" to generate a new bot token (in case of errors try again)\n' +
130
+ "3. Copy the token (you won't be able to see it again!)", 'Step 3: Get Bot Token');
117
131
  const tokenInput = await password({
118
- message: 'Enter your Discord Bot Token (will be hidden):',
132
+ message: 'Enter your Discord Bot Token (from "Bot" section - click "Reset Token" if needed):',
119
133
  validate(value) {
120
134
  if (!value)
121
135
  return 'Bot token is required';
@@ -128,16 +142,29 @@ async function run({ restart, addChannels }) {
128
142
  process.exit(0);
129
143
  }
130
144
  token = tokenInput;
131
- db.prepare('INSERT OR REPLACE INTO bot_tokens (app_id, token) VALUES (?, ?)').run(appId, token);
132
- note('Token saved to database', 'Credentials Stored');
133
- note(`Bot install URL:\n${generateBotInstallUrl({ clientId: appId })}\n\nYou MUST install the bot in your Discord server before continuing.`, 'Step 3: Install Bot to Server');
134
- const installed = await text({
135
- message: 'Press Enter AFTER you have installed the bot in your server:',
136
- placeholder: 'Press Enter to continue',
137
- validate() {
145
+ note(`You can get a Gemini api Key at https://aistudio.google.com/apikey`, `Gemini API Key`);
146
+ const geminiApiKey = await password({
147
+ message: 'Enter your Gemini API Key for voice channels and audio transcription (optional, press Enter to skip):',
148
+ validate(value) {
149
+ if (value && value.length < 10)
150
+ return 'Invalid API key format';
138
151
  return undefined;
139
152
  },
140
153
  });
154
+ if (isCancel(geminiApiKey)) {
155
+ cancel('Setup cancelled');
156
+ process.exit(0);
157
+ }
158
+ // Store API key in database
159
+ if (geminiApiKey) {
160
+ db.prepare('INSERT OR REPLACE INTO bot_api_keys (app_id, gemini_api_key) VALUES (?, ?)').run(appId, geminiApiKey || null);
161
+ note('API key saved successfully', 'API Key Stored');
162
+ }
163
+ note(`Bot install URL:\n${generateBotInstallUrl({ clientId: appId })}\n\nYou MUST install the bot in your Discord server before continuing.`, 'Step 4: Install Bot to Server');
164
+ const installed = await text({
165
+ message: 'Press Enter AFTER you have installed the bot in your server:',
166
+ placeholder: 'Enter',
167
+ });
141
168
  if (isCancel(installed)) {
142
169
  cancel('Setup cancelled');
143
170
  process.exit(0);
@@ -172,6 +199,7 @@ async function run({ restart, addChannels }) {
172
199
  cliLogger.error('Error: ' + (error instanceof Error ? error.message : String(error)));
173
200
  process.exit(EXIT_NO_RESTART);
174
201
  }
202
+ db.prepare('INSERT OR REPLACE INTO bot_tokens (app_id, token) VALUES (?, ?)').run(appId, token);
175
203
  for (const { guild, channels } of kimakiChannels) {
176
204
  for (const channel of channels) {
177
205
  if (channel.kimakiDirectory) {
@@ -216,12 +244,16 @@ async function run({ restart, addChannels }) {
216
244
  discordClient.destroy();
217
245
  process.exit(EXIT_NO_RESTART);
218
246
  }
219
- const existingDirs = kimakiChannels.flatMap(({ channels }) => channels.map((ch) => ch.kimakiDirectory).filter(Boolean));
220
- const availableProjects = projects.filter((project) => !existingDirs.includes(project.worktree));
247
+ const existingDirs = kimakiChannels.flatMap(({ channels }) => channels
248
+ .filter((ch) => ch.kimakiDirectory && ch.kimakiApp === appId)
249
+ .map((ch) => ch.kimakiDirectory)
250
+ .filter(Boolean));
251
+ const availableProjects = deduplicateByKey(projects.filter((project) => !existingDirs.includes(project.worktree)), (x) => x.worktree);
221
252
  if (availableProjects.length === 0) {
222
253
  note('All OpenCode projects already have Discord channels', 'No New Projects');
223
254
  }
224
- if (shouldAddChannels && availableProjects.length > 0) {
255
+ if ((!existingDirs?.length && availableProjects.length > 0) ||
256
+ shouldAddChannels) {
225
257
  const selectedProjects = await multiselect({
226
258
  message: 'Select projects to create Discord channels for:',
227
259
  options: availableProjects.map((project) => ({
@@ -262,7 +294,7 @@ async function run({ restart, addChannels }) {
262
294
  if (!project)
263
295
  continue;
264
296
  const baseName = path.basename(project.worktree);
265
- const channelName = `kimaki-${baseName}`
297
+ const channelName = `${baseName}`
266
298
  .toLowerCase()
267
299
  .replace(/[^a-z0-9-]/g, '-')
268
300
  .slice(0, 100);
@@ -78,7 +78,7 @@ async function createUserAudioLogStream(guildId, channelId) {
78
78
  }
79
79
  }
80
80
  // Set up voice handling for a connection (called once per connection)
81
- async function setupVoiceHandling({ connection, guildId, channelId, }) {
81
+ async function setupVoiceHandling({ connection, guildId, channelId, appId, }) {
82
82
  voiceLogger.log(`Setting up voice handling for guild ${guildId}, channel ${channelId}`);
83
83
  // Check if this voice channel has an associated directory
84
84
  const channelDirRow = getDatabase()
@@ -98,11 +98,17 @@ async function setupVoiceHandling({ connection, guildId, channelId, }) {
98
98
  }
99
99
  // Create user audio stream for debugging
100
100
  voiceData.userAudioStream = await createUserAudioLogStream(guildId, channelId);
101
+ // Get API keys from database
102
+ const apiKeys = getDatabase()
103
+ .prepare('SELECT gemini_api_key FROM bot_api_keys WHERE app_id = ?')
104
+ .get(appId);
101
105
  // Create GenAI worker
102
106
  const genAiWorker = await createGenAIWorker({
103
107
  directory,
104
108
  guildId,
105
109
  channelId,
110
+ appId,
111
+ geminiApiKey: apiKeys?.gemini_api_key,
106
112
  systemMessage: dedent `
107
113
  You are Kimaki, an AI similar to Jarvis: you help your user (an engineer) controlling his coding agent, just like Jarvis controls Ironman armor and machines. Speak fast.
108
114
 
@@ -227,14 +233,16 @@ async function setupVoiceHandling({ connection, guildId, channelId, }) {
227
233
  .on('data', (frame) => {
228
234
  // Check if a newer speaking session has started
229
235
  if (currentSessionCount !== speakingSessionCount) {
230
- voiceLogger.log(`Skipping audio frame from session ${currentSessionCount} because newer session ${speakingSessionCount} has started`);
236
+ // voiceLogger.log(
237
+ // `Skipping audio frame from session ${currentSessionCount} because newer session ${speakingSessionCount} has started`,
238
+ // )
231
239
  return;
232
240
  }
233
241
  if (!voiceData.genAiWorker) {
234
242
  voiceLogger.warn(`[VOICE] Received audio frame but no GenAI worker active for guild ${guildId}`);
235
243
  return;
236
244
  }
237
- voiceLogger.debug('User audio chunk length', frame.length);
245
+ // voiceLogger.debug('User audio chunk length', frame.length)
238
246
  // Write to PCM file if stream exists
239
247
  voiceData.userAudioStream?.write(frame);
240
248
  // stream incrementally — low latency
@@ -345,6 +353,13 @@ export function getDatabase() {
345
353
  channel_type TEXT NOT NULL,
346
354
  created_at DATETIME DEFAULT CURRENT_TIMESTAMP
347
355
  )
356
+ `);
357
+ db.exec(`
358
+ CREATE TABLE IF NOT EXISTS bot_api_keys (
359
+ app_id TEXT PRIMARY KEY,
360
+ gemini_api_key TEXT,
361
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP
362
+ )
348
363
  `);
349
364
  }
350
365
  return db;
@@ -456,7 +471,7 @@ async function waitForServer(port, maxAttempts = 30) {
456
471
  }
457
472
  throw new Error(`Server did not start on port ${port} after ${maxAttempts} seconds`);
458
473
  }
459
- async function processVoiceAttachment({ message, thread, projectDirectory, isNewThread = false, }) {
474
+ async function processVoiceAttachment({ message, thread, projectDirectory, isNewThread = false, appId, }) {
460
475
  const audioAttachment = Array.from(message.attachments.values()).find((attachment) => attachment.contentType?.startsWith('audio/'));
461
476
  if (!audioAttachment)
462
477
  return null;
@@ -486,9 +501,20 @@ async function processVoiceAttachment({ message, thread, projectDirectory, isNew
486
501
  voiceLogger.log(`Could not get project tree:`, e);
487
502
  }
488
503
  }
504
+ // Get Gemini API key from database if appId is provided
505
+ let geminiApiKey;
506
+ if (appId) {
507
+ const apiKeys = getDatabase()
508
+ .prepare('SELECT gemini_api_key FROM bot_api_keys WHERE app_id = ?')
509
+ .get(appId);
510
+ if (apiKeys?.gemini_api_key) {
511
+ geminiApiKey = apiKeys.gemini_api_key;
512
+ }
513
+ }
489
514
  const transcription = await transcribeAudio({
490
515
  audio: audioBuffer,
491
516
  prompt: transcriptionPrompt,
517
+ geminiApiKey,
492
518
  });
493
519
  voiceLogger.log(`Transcription successful: "${transcription.slice(0, 50)}${transcription.length > 50 ? '...' : ''}"`);
494
520
  // Update thread name with transcribed content only for new threads
@@ -1231,6 +1257,7 @@ export async function startDiscordBot({ token, appId, discordClient, }) {
1231
1257
  message,
1232
1258
  thread,
1233
1259
  projectDirectory,
1260
+ appId: currentAppId,
1234
1261
  });
1235
1262
  if (transcription) {
1236
1263
  messageContent = transcription;
@@ -1289,6 +1316,7 @@ export async function startDiscordBot({ token, appId, discordClient, }) {
1289
1316
  thread,
1290
1317
  projectDirectory,
1291
1318
  isNewThread: true,
1319
+ appId: currentAppId,
1292
1320
  });
1293
1321
  if (transcription) {
1294
1322
  messageContent = transcription;
@@ -1649,6 +1677,7 @@ export async function startDiscordBot({ token, appId, discordClient, }) {
1649
1677
  connection,
1650
1678
  guildId: newState.guild.id,
1651
1679
  channelId: voiceChannel.id,
1680
+ appId: currentAppId,
1652
1681
  });
1653
1682
  // Handle connection state changes
1654
1683
  connection.on(VoiceConnectionStatus.Disconnected, async () => {
@@ -98,6 +98,8 @@ export function createGenAIWorker(options) {
98
98
  systemMessage: options.systemMessage,
99
99
  guildId: options.guildId,
100
100
  channelId: options.channelId,
101
+ appId: options.appId,
102
+ geminiApiKey: options.geminiApiKey,
101
103
  };
102
104
  worker.postMessage(initMessage);
103
105
  });
@@ -210,6 +210,7 @@ parentPort.on('message', async (message) => {
210
210
  session = await startGenAiSession({
211
211
  tools,
212
212
  systemMessage: message.systemMessage,
213
+ geminiApiKey: message.geminiApiKey,
213
214
  onAssistantAudioChunk({ data }) {
214
215
  // Write to audio log if enabled
215
216
  if (audioLogStream && !audioLogStream.destroyed) {
package/dist/genai.js CHANGED
@@ -68,7 +68,7 @@ function defaultAudioChunkHandler({ data, mimeType, }) {
68
68
  const buffer = convertToWav(audioParts, mimeType);
69
69
  saveBinaryFile(fileName, buffer);
70
70
  }
71
- export async function startGenAiSession({ onAssistantAudioChunk, onAssistantStartSpeaking, onAssistantStopSpeaking, onAssistantInterruptSpeaking, systemMessage, tools, } = {}) {
71
+ export async function startGenAiSession({ onAssistantAudioChunk, onAssistantStartSpeaking, onAssistantStopSpeaking, onAssistantInterruptSpeaking, systemMessage, tools, geminiApiKey, } = {}) {
72
72
  let session = undefined;
73
73
  const callableTools = [];
74
74
  let isAssistantSpeaking = false;
@@ -161,8 +161,13 @@ export async function startGenAiSession({ onAssistantAudioChunk, onAssistantStar
161
161
  }
162
162
  }
163
163
  }
164
+ const apiKey = geminiApiKey || process.env.GEMINI_API_KEY;
165
+ if (!apiKey) {
166
+ genaiLogger.error('No Gemini API key provided');
167
+ throw new Error('Gemini API key is required for voice interactions');
168
+ }
164
169
  const ai = new GoogleGenAI({
165
- apiKey: process.env.GEMINI_API_KEY,
170
+ apiKey,
166
171
  });
167
172
  const model = 'models/gemini-2.5-flash-live-preview';
168
173
  session = await ai.live.connect({
package/dist/utils.js CHANGED
@@ -28,25 +28,14 @@ export function generateBotInstallUrl({ clientId, permissions = [
28
28
  }
29
29
  return url.toString();
30
30
  }
31
- function getRequiredBotPermissions() {
32
- return [
33
- PermissionsBitField.Flags.ViewChannel,
34
- PermissionsBitField.Flags.ManageChannels,
35
- PermissionsBitField.Flags.SendMessages,
36
- PermissionsBitField.Flags.SendMessagesInThreads,
37
- PermissionsBitField.Flags.CreatePublicThreads,
38
- PermissionsBitField.Flags.ManageThreads,
39
- PermissionsBitField.Flags.ReadMessageHistory,
40
- PermissionsBitField.Flags.AddReactions,
41
- PermissionsBitField.Flags.ManageMessages,
42
- PermissionsBitField.Flags.UseExternalEmojis,
43
- PermissionsBitField.Flags.AttachFiles,
44
- PermissionsBitField.Flags.Connect,
45
- PermissionsBitField.Flags.Speak,
46
- ];
47
- }
48
- function getPermissionNames() {
49
- const permissions = getRequiredBotPermissions();
50
- const permissionsBitField = new PermissionsBitField(permissions);
51
- return permissionsBitField.toArray();
31
+ export function deduplicateByKey(arr, keyFn) {
32
+ const seen = new Set();
33
+ return arr.filter(item => {
34
+ const key = keyFn(item);
35
+ if (seen.has(key)) {
36
+ return false;
37
+ }
38
+ seen.add(key);
39
+ return true;
40
+ });
52
41
  }
package/dist/voice.js CHANGED
@@ -1,25 +1,60 @@
1
- import { openai } from '@ai-sdk/openai';
2
- import { experimental_transcribe as transcribe } from 'ai';
1
+ import { GoogleGenAI } from '@google/genai';
3
2
  import { createLogger } from './logger.js';
4
3
  const voiceLogger = createLogger('VOICE');
5
- export async function transcribeAudio({ audio, prompt, language, temperature, }) {
4
+ export async function transcribeAudio({ audio, prompt, language, temperature, geminiApiKey, }) {
6
5
  try {
7
- const result = await transcribe({
8
- model: openai.transcription('whisper-1'),
9
- audio,
10
- ...(prompt || language || temperature !== undefined
11
- ? {
12
- providerOptions: {
13
- openai: {
14
- ...(prompt && { prompt }),
15
- ...(language && { language }),
16
- ...(temperature !== undefined && { temperature }),
6
+ // Use provided API key or fall back to environment variable
7
+ const apiKey = geminiApiKey || process.env.GEMINI_API_KEY;
8
+ if (!apiKey) {
9
+ throw new Error('Gemini API key is required for audio transcription');
10
+ }
11
+ // Initialize Google Generative AI
12
+ const genAI = new GoogleGenAI({ apiKey });
13
+ // Convert audio to base64 string if it's not already
14
+ let audioBase64;
15
+ if (typeof audio === 'string') {
16
+ audioBase64 = audio;
17
+ }
18
+ else if (audio instanceof Buffer) {
19
+ audioBase64 = audio.toString('base64');
20
+ }
21
+ else if (audio instanceof Uint8Array) {
22
+ audioBase64 = Buffer.from(audio).toString('base64');
23
+ }
24
+ else if (audio instanceof ArrayBuffer) {
25
+ audioBase64 = Buffer.from(audio).toString('base64');
26
+ }
27
+ else {
28
+ throw new Error('Invalid audio format');
29
+ }
30
+ // Build the transcription prompt
31
+ let transcriptionPrompt = `Please transcribe this audio file accurately. Here is some relevant information and filenames that may be present in the audio:\n<context>\n${prompt}\n</context>\n`;
32
+ if (language) {
33
+ transcriptionPrompt += `\nThe audio is in ${language}.`;
34
+ }
35
+ // Create the content with audio using the inline data format
36
+ const response = await genAI.models.generateContent({
37
+ model: 'gemini-2.5-flash',
38
+ contents: [
39
+ {
40
+ parts: [
41
+ { text: transcriptionPrompt },
42
+ {
43
+ inlineData: {
44
+ data: audioBase64,
45
+ mimeType: 'audio/mpeg',
46
+ },
17
47
  },
18
- },
48
+ ],
49
+ },
50
+ ],
51
+ config: temperature !== undefined
52
+ ? {
53
+ temperature,
19
54
  }
20
- : {}),
55
+ : undefined,
21
56
  });
22
- return result.text;
57
+ return response.text || '';
23
58
  }
24
59
  catch (error) {
25
60
  voiceLogger.error('Failed to transcribe audio:', error);
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "kimaki",
3
3
  "module": "index.ts",
4
4
  "type": "module",
5
- "version": "0.1.2",
5
+ "version": "0.1.4",
6
6
  "repository": "https://github.com/remorses/kimaki",
7
7
  "bin": "bin.js",
8
8
  "files": [
@@ -19,7 +19,7 @@
19
19
  "tsx": "^4.20.5"
20
20
  },
21
21
  "dependencies": {
22
- "@ai-sdk/openai": "^2.0.23",
22
+ "@ai-sdk/google": "^2.0.16",
23
23
  "@clack/prompts": "^0.11.0",
24
24
  "@discordjs/opus": "^0.10.0",
25
25
  "@discordjs/voice": "^0.19.0",
package/src/cli.ts CHANGED
@@ -8,11 +8,12 @@ import {
8
8
  note,
9
9
  cancel,
10
10
  isCancel,
11
+ confirm,
11
12
  log,
12
13
  multiselect,
13
14
  spinner,
14
15
  } from '@clack/prompts'
15
- import { generateBotInstallUrl } from './utils.js'
16
+ import { deduplicateByKey, generateBotInstallUrl } from './utils.js'
16
17
  import {
17
18
  getChannelsWithDescriptions,
18
19
  createDiscordClient,
@@ -138,7 +139,6 @@ async function ensureKimakiCategory(guild: Guild): Promise<CategoryChannel> {
138
139
 
139
140
  async function run({ restart, addChannels }: CliOptions) {
140
141
  const forceSetup = Boolean(restart)
141
- const shouldAddChannels = Boolean(addChannels)
142
142
 
143
143
  intro('🤖 Discord Bot Setup')
144
144
 
@@ -152,6 +152,9 @@ async function run({ restart, addChannels }: CliOptions) {
152
152
  )
153
153
  .get() as { app_id: string; token: string } | undefined
154
154
 
155
+ const shouldAddChannels =
156
+ !existingBot?.token || forceSetup || Boolean(addChannels)
157
+
155
158
  if (existingBot && !forceSetup) {
156
159
  appId = existingBot.app_id
157
160
  token = existingBot.token
@@ -196,13 +199,33 @@ async function run({ restart, addChannels }: CliOptions) {
196
199
 
197
200
  note(
198
201
  '1. Go to the "Bot" section in the left sidebar\n' +
199
- '2. Click "Reset Token" to generate a new bot token\n' +
200
- "3. Copy the token (you won't be able to see it again!)",
201
- 'Step 2: Get Bot Token',
202
+ '2. Scroll down to "Privileged Gateway Intents"\n' +
203
+ '3. Enable these intents by toggling them ON:\n' +
204
+ ' SERVER MEMBERS INTENT\n' +
205
+ ' • MESSAGE CONTENT INTENT\n' +
206
+ '4. Click "Save Changes" at the bottom',
207
+ 'Step 2: Enable Required Intents',
202
208
  )
203
209
 
210
+ const intentsConfirmed = await text({
211
+ message: 'Press Enter after enabling both intents:',
212
+ placeholder: 'Enter',
213
+ })
214
+
215
+ if (isCancel(intentsConfirmed)) {
216
+ cancel('Setup cancelled')
217
+ process.exit(0)
218
+ }
219
+
220
+ note(
221
+ '1. Still in the "Bot" section\n' +
222
+ '2. Click "Reset Token" to generate a new bot token (in case of errors try again)\n' +
223
+ "3. Copy the token (you won't be able to see it again!)",
224
+ 'Step 3: Get Bot Token',
225
+ )
204
226
  const tokenInput = await password({
205
- message: 'Enter your Discord Bot Token (will be hidden):',
227
+ message:
228
+ 'Enter your Discord Bot Token (from "Bot" section - click "Reset Token" if needed):',
206
229
  validate(value) {
207
230
  if (!value) return 'Bot token is required'
208
231
  if (value.length < 50) return 'Invalid token format (too short)'
@@ -215,23 +238,41 @@ async function run({ restart, addChannels }: CliOptions) {
215
238
  }
216
239
  token = tokenInput
217
240
 
218
- db.prepare(
219
- 'INSERT OR REPLACE INTO bot_tokens (app_id, token) VALUES (?, ?)',
220
- ).run(appId, token)
241
+ note(
242
+ `You can get a Gemini api Key at https://aistudio.google.com/apikey`,
243
+ `Gemini API Key`,
244
+ )
221
245
 
222
- note('Token saved to database', 'Credentials Stored')
246
+ const geminiApiKey = await password({
247
+ message:
248
+ 'Enter your Gemini API Key for voice channels and audio transcription (optional, press Enter to skip):',
249
+ validate(value) {
250
+ if (value && value.length < 10) return 'Invalid API key format'
251
+ return undefined
252
+ },
253
+ })
254
+
255
+ if (isCancel(geminiApiKey)) {
256
+ cancel('Setup cancelled')
257
+ process.exit(0)
258
+ }
259
+
260
+ // Store API key in database
261
+ if (geminiApiKey) {
262
+ db.prepare(
263
+ 'INSERT OR REPLACE INTO bot_api_keys (app_id, gemini_api_key) VALUES (?, ?)',
264
+ ).run(appId, geminiApiKey || null)
265
+ note('API key saved successfully', 'API Key Stored')
266
+ }
223
267
 
224
268
  note(
225
269
  `Bot install URL:\n${generateBotInstallUrl({ clientId: appId })}\n\nYou MUST install the bot in your Discord server before continuing.`,
226
- 'Step 3: Install Bot to Server',
270
+ 'Step 4: Install Bot to Server',
227
271
  )
228
272
 
229
273
  const installed = await text({
230
274
  message: 'Press Enter AFTER you have installed the bot in your server:',
231
- placeholder: 'Press Enter to continue',
232
- validate() {
233
- return undefined
234
- },
275
+ placeholder: 'Enter',
235
276
  })
236
277
 
237
278
  if (isCancel(installed)) {
@@ -282,6 +323,9 @@ async function run({ restart, addChannels }: CliOptions) {
282
323
  )
283
324
  process.exit(EXIT_NO_RESTART)
284
325
  }
326
+ db.prepare(
327
+ 'INSERT OR REPLACE INTO bot_tokens (app_id, token) VALUES (?, ?)',
328
+ ).run(appId, token)
285
329
 
286
330
  for (const { guild, channels } of kimakiChannels) {
287
331
  for (const channel of channels) {
@@ -350,11 +394,15 @@ async function run({ restart, addChannels }: CliOptions) {
350
394
  }
351
395
 
352
396
  const existingDirs = kimakiChannels.flatMap(({ channels }) =>
353
- channels.map((ch) => ch.kimakiDirectory).filter(Boolean),
397
+ channels
398
+ .filter((ch) => ch.kimakiDirectory && ch.kimakiApp === appId)
399
+ .map((ch) => ch.kimakiDirectory)
400
+ .filter(Boolean),
354
401
  )
355
402
 
356
- const availableProjects = projects.filter(
357
- (project) => !existingDirs.includes(project.worktree),
403
+ const availableProjects = deduplicateByKey(
404
+ projects.filter((project) => !existingDirs.includes(project.worktree)),
405
+ (x) => x.worktree,
358
406
  )
359
407
 
360
408
  if (availableProjects.length === 0) {
@@ -364,7 +412,10 @@ async function run({ restart, addChannels }: CliOptions) {
364
412
  )
365
413
  }
366
414
 
367
- if (shouldAddChannels && availableProjects.length > 0) {
415
+ if (
416
+ (!existingDirs?.length && availableProjects.length > 0) ||
417
+ shouldAddChannels
418
+ ) {
368
419
  const selectedProjects = await multiselect({
369
420
  message: 'Select projects to create Discord channels for:',
370
421
  options: availableProjects.map((project) => ({
@@ -410,7 +461,7 @@ async function run({ restart, addChannels }: CliOptions) {
410
461
  if (!project) continue
411
462
 
412
463
  const baseName = path.basename(project.worktree)
413
- const channelName = `kimaki-${baseName}`
464
+ const channelName = `${baseName}`
414
465
  .toLowerCase()
415
466
  .replace(/[^a-z0-9-]/g, '-')
416
467
  .slice(0, 100)
package/src/discordBot.ts CHANGED
@@ -152,10 +152,12 @@ async function setupVoiceHandling({
152
152
  connection,
153
153
  guildId,
154
154
  channelId,
155
+ appId,
155
156
  }: {
156
157
  connection: VoiceConnection
157
158
  guildId: string
158
159
  channelId: string
160
+ appId: string
159
161
  }) {
160
162
  voiceLogger.log(
161
163
  `Setting up voice handling for guild ${guildId}, channel ${channelId}`,
@@ -188,11 +190,18 @@ async function setupVoiceHandling({
188
190
  // Create user audio stream for debugging
189
191
  voiceData.userAudioStream = await createUserAudioLogStream(guildId, channelId)
190
192
 
193
+ // Get API keys from database
194
+ const apiKeys = getDatabase()
195
+ .prepare('SELECT gemini_api_key FROM bot_api_keys WHERE app_id = ?')
196
+ .get(appId) as { gemini_api_key: string | null } | undefined
197
+
191
198
  // Create GenAI worker
192
199
  const genAiWorker = await createGenAIWorker({
193
200
  directory,
194
201
  guildId,
195
202
  channelId,
203
+ appId,
204
+ geminiApiKey: apiKeys?.gemini_api_key,
196
205
  systemMessage: dedent`
197
206
  You are Kimaki, an AI similar to Jarvis: you help your user (an engineer) controlling his coding agent, just like Jarvis controls Ironman armor and machines. Speak fast.
198
207
 
@@ -334,9 +343,9 @@ async function setupVoiceHandling({
334
343
  .on('data', (frame: Buffer) => {
335
344
  // Check if a newer speaking session has started
336
345
  if (currentSessionCount !== speakingSessionCount) {
337
- voiceLogger.log(
338
- `Skipping audio frame from session ${currentSessionCount} because newer session ${speakingSessionCount} has started`,
339
- )
346
+ // voiceLogger.log(
347
+ // `Skipping audio frame from session ${currentSessionCount} because newer session ${speakingSessionCount} has started`,
348
+ // )
340
349
  return
341
350
  }
342
351
 
@@ -346,7 +355,7 @@ async function setupVoiceHandling({
346
355
  )
347
356
  return
348
357
  }
349
- voiceLogger.debug('User audio chunk length', frame.length)
358
+ // voiceLogger.debug('User audio chunk length', frame.length)
350
359
 
351
360
  // Write to PCM file if stream exists
352
361
  voiceData.userAudioStream?.write(frame)
@@ -480,6 +489,14 @@ export function getDatabase(): Database.Database {
480
489
  created_at DATETIME DEFAULT CURRENT_TIMESTAMP
481
490
  )
482
491
  `)
492
+
493
+ db.exec(`
494
+ CREATE TABLE IF NOT EXISTS bot_api_keys (
495
+ app_id TEXT PRIMARY KEY,
496
+ gemini_api_key TEXT,
497
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP
498
+ )
499
+ `)
483
500
  }
484
501
 
485
502
  return db
@@ -614,11 +631,13 @@ async function processVoiceAttachment({
614
631
  thread,
615
632
  projectDirectory,
616
633
  isNewThread = false,
634
+ appId,
617
635
  }: {
618
636
  message: Message
619
637
  thread: ThreadChannel
620
638
  projectDirectory?: string
621
639
  isNewThread?: boolean
640
+ appId?: string
622
641
  }): Promise<string | null> {
623
642
  const audioAttachment = Array.from(message.attachments.values()).find(
624
643
  (attachment) => attachment.contentType?.startsWith('audio/'),
@@ -660,9 +679,22 @@ async function processVoiceAttachment({
660
679
  }
661
680
  }
662
681
 
682
+ // Get Gemini API key from database if appId is provided
683
+ let geminiApiKey: string | undefined
684
+ if (appId) {
685
+ const apiKeys = getDatabase()
686
+ .prepare('SELECT gemini_api_key FROM bot_api_keys WHERE app_id = ?')
687
+ .get(appId) as { gemini_api_key: string | null } | undefined
688
+
689
+ if (apiKeys?.gemini_api_key) {
690
+ geminiApiKey = apiKeys.gemini_api_key
691
+ }
692
+ }
693
+
663
694
  const transcription = await transcribeAudio({
664
695
  audio: audioBuffer,
665
696
  prompt: transcriptionPrompt,
697
+ geminiApiKey,
666
698
  })
667
699
 
668
700
  voiceLogger.log(
@@ -1635,6 +1667,7 @@ export async function startDiscordBot({
1635
1667
  message,
1636
1668
  thread,
1637
1669
  projectDirectory,
1670
+ appId: currentAppId,
1638
1671
  })
1639
1672
  if (transcription) {
1640
1673
  messageContent = transcription
@@ -1727,6 +1760,7 @@ export async function startDiscordBot({
1727
1760
  thread,
1728
1761
  projectDirectory,
1729
1762
  isNewThread: true,
1763
+ appId: currentAppId,
1730
1764
  })
1731
1765
  if (transcription) {
1732
1766
  messageContent = transcription
@@ -1950,7 +1984,7 @@ export async function startDiscordBot({
1950
1984
  return ''
1951
1985
  })
1952
1986
  .filter((t) => t.trim())
1953
-
1987
+
1954
1988
  const userText = userTexts.join('\n\n')
1955
1989
  if (userText) {
1956
1990
  // Escape backticks in user messages to prevent formatting issues
@@ -2224,6 +2258,7 @@ export async function startDiscordBot({
2224
2258
  connection,
2225
2259
  guildId: newState.guild.id,
2226
2260
  channelId: voiceChannel.id,
2261
+ appId: currentAppId!,
2227
2262
  })
2228
2263
 
2229
2264
  // Handle connection state changes
@@ -11,6 +11,8 @@ export interface GenAIWorkerOptions {
11
11
  systemMessage?: string
12
12
  guildId: string
13
13
  channelId: string
14
+ appId: string
15
+ geminiApiKey?: string | null
14
16
  onAssistantOpusPacket: (packet: ArrayBuffer) => void
15
17
  onAssistantStartSpeaking?: () => void
16
18
  onAssistantStopSpeaking?: () => void
@@ -146,6 +148,8 @@ export function createGenAIWorker(
146
148
  systemMessage: options.systemMessage,
147
149
  guildId: options.guildId,
148
150
  channelId: options.channelId,
151
+ appId: options.appId,
152
+ geminiApiKey: options.geminiApiKey,
149
153
  }
150
154
  worker.postMessage(initMessage)
151
155
  })
@@ -271,6 +271,7 @@ parentPort.on('message', async (message: WorkerInMessage) => {
271
271
  session = await startGenAiSession({
272
272
  tools,
273
273
  systemMessage: message.systemMessage,
274
+ geminiApiKey: message.geminiApiKey,
274
275
  onAssistantAudioChunk({ data }) {
275
276
  // Write to audio log if enabled
276
277
  if (audioLogStream && !audioLogStream.destroyed) {
package/src/genai.ts CHANGED
@@ -113,6 +113,7 @@ export async function startGenAiSession({
113
113
  onAssistantInterruptSpeaking,
114
114
  systemMessage,
115
115
  tools,
116
+ geminiApiKey,
116
117
  }: {
117
118
  onAssistantAudioChunk?: (args: { data: Buffer; mimeType: string }) => void
118
119
  onAssistantStartSpeaking?: () => void
@@ -120,6 +121,7 @@ export async function startGenAiSession({
120
121
  onAssistantInterruptSpeaking?: () => void
121
122
  systemMessage?: string
122
123
  tools?: Record<string, AITool<any, any>>
124
+ geminiApiKey?: string | null
123
125
  } = {}) {
124
126
  let session: Session | undefined = undefined
125
127
  const callableTools: Array<CallableTool & { name: string }> = []
@@ -242,8 +244,15 @@ export async function startGenAiSession({
242
244
  }
243
245
  }
244
246
 
247
+ const apiKey = geminiApiKey || process.env.GEMINI_API_KEY
248
+
249
+ if (!apiKey) {
250
+ genaiLogger.error('No Gemini API key provided')
251
+ throw new Error('Gemini API key is required for voice interactions')
252
+ }
253
+
245
254
  const ai = new GoogleGenAI({
246
- apiKey: process.env.GEMINI_API_KEY,
255
+ apiKey,
247
256
  })
248
257
 
249
258
  const model = 'models/gemini-2.5-flash-live-preview'
package/src/utils.ts CHANGED
@@ -48,26 +48,15 @@ export function generateBotInstallUrl({
48
48
  return url.toString()
49
49
  }
50
50
 
51
- function getRequiredBotPermissions(): bigint[] {
52
- return [
53
- PermissionsBitField.Flags.ViewChannel,
54
- PermissionsBitField.Flags.ManageChannels,
55
- PermissionsBitField.Flags.SendMessages,
56
- PermissionsBitField.Flags.SendMessagesInThreads,
57
- PermissionsBitField.Flags.CreatePublicThreads,
58
- PermissionsBitField.Flags.ManageThreads,
59
- PermissionsBitField.Flags.ReadMessageHistory,
60
- PermissionsBitField.Flags.AddReactions,
61
- PermissionsBitField.Flags.ManageMessages,
62
- PermissionsBitField.Flags.UseExternalEmojis,
63
- PermissionsBitField.Flags.AttachFiles,
64
- PermissionsBitField.Flags.Connect,
65
- PermissionsBitField.Flags.Speak,
66
- ]
67
- }
68
51
 
69
- function getPermissionNames(): string[] {
70
- const permissions = getRequiredBotPermissions()
71
- const permissionsBitField = new PermissionsBitField(permissions)
72
- return permissionsBitField.toArray()
52
+ export function deduplicateByKey<T, K>(arr: T[], keyFn: (item: T) => K): T[] {
53
+ const seen = new Set<K>()
54
+ return arr.filter(item => {
55
+ const key = keyFn(item)
56
+ if (seen.has(key)) {
57
+ return false
58
+ }
59
+ seen.add(key)
60
+ return true
61
+ })
73
62
  }
package/src/voice.ts CHANGED
@@ -1,5 +1,4 @@
1
- import { openai } from '@ai-sdk/openai'
2
- import { experimental_transcribe as transcribe } from 'ai'
1
+ import { GoogleGenAI } from '@google/genai'
3
2
  import { createLogger } from './logger.js'
4
3
 
5
4
  const voiceLogger = createLogger('VOICE')
@@ -9,30 +8,70 @@ export async function transcribeAudio({
9
8
  prompt,
10
9
  language,
11
10
  temperature,
11
+ geminiApiKey,
12
12
  }: {
13
13
  audio: Buffer | Uint8Array | ArrayBuffer | string
14
14
  prompt?: string
15
15
  language?: string
16
16
  temperature?: number
17
+ geminiApiKey?: string
17
18
  }): Promise<string> {
18
19
  try {
19
- const result = await transcribe({
20
- model: openai.transcription('whisper-1'),
21
- audio,
22
- ...(prompt || language || temperature !== undefined
23
- ? {
24
- providerOptions: {
25
- openai: {
26
- ...(prompt && { prompt }),
27
- ...(language && { language }),
28
- ...(temperature !== undefined && { temperature }),
20
+ // Use provided API key or fall back to environment variable
21
+ const apiKey = geminiApiKey || process.env.GEMINI_API_KEY
22
+
23
+ if (!apiKey) {
24
+ throw new Error('Gemini API key is required for audio transcription')
25
+ }
26
+
27
+ // Initialize Google Generative AI
28
+ const genAI = new GoogleGenAI({ apiKey })
29
+
30
+ // Convert audio to base64 string if it's not already
31
+ let audioBase64: string
32
+ if (typeof audio === 'string') {
33
+ audioBase64 = audio
34
+ } else if (audio instanceof Buffer) {
35
+ audioBase64 = audio.toString('base64')
36
+ } else if (audio instanceof Uint8Array) {
37
+ audioBase64 = Buffer.from(audio).toString('base64')
38
+ } else if (audio instanceof ArrayBuffer) {
39
+ audioBase64 = Buffer.from(audio).toString('base64')
40
+ } else {
41
+ throw new Error('Invalid audio format')
42
+ }
43
+
44
+ // Build the transcription prompt
45
+ let transcriptionPrompt = `Please transcribe this audio file accurately. Here is some relevant information and filenames that may be present in the audio:\n<context>\n${prompt}\n</context>\n`
46
+ if (language) {
47
+ transcriptionPrompt += `\nThe audio is in ${language}.`
48
+ }
49
+
50
+ // Create the content with audio using the inline data format
51
+ const response = await genAI.models.generateContent({
52
+ model: 'gemini-2.5-flash',
53
+ contents: [
54
+ {
55
+ parts: [
56
+ { text: transcriptionPrompt },
57
+ {
58
+ inlineData: {
59
+ data: audioBase64,
60
+ mimeType: 'audio/mpeg',
29
61
  },
30
62
  },
31
- }
32
- : {}),
63
+ ],
64
+ },
65
+ ],
66
+ config:
67
+ temperature !== undefined
68
+ ? {
69
+ temperature,
70
+ }
71
+ : undefined,
33
72
  })
34
73
 
35
- return result.text
74
+ return response.text || ''
36
75
  } catch (error) {
37
76
  voiceLogger.error('Failed to transcribe audio:', error)
38
77
  throw new Error(
@@ -8,6 +8,8 @@ export type WorkerInMessage =
8
8
  systemMessage?: string
9
9
  guildId: string
10
10
  channelId: string
11
+ appId: string
12
+ geminiApiKey?: string | null
11
13
  }
12
14
  | {
13
15
  type: 'sendRealtimeInput'