kimaki 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +19 -1
- package/dist/discordBot.js +45 -4
- package/dist/genai-worker-wrapper.js +2 -0
- package/dist/genai-worker.js +1 -0
- package/dist/genai.js +7 -2
- package/dist/voice.js +51 -16
- package/package.json +2 -2
- package/src/cli.ts +29 -2
- package/src/discordBot.ts +54 -2
- package/src/genai-worker-wrapper.ts +4 -0
- package/src/genai-worker.ts +1 -0
- package/src/genai.ts +10 -1
- package/src/voice.ts +54 -15
- package/src/worker-types.ts +2 -0
package/dist/cli.js
CHANGED
|
@@ -129,7 +129,7 @@ async function run({ restart, addChannels }) {
|
|
|
129
129
|
'2. Click "Reset Token" to generate a new bot token (in case of errors try again)\n' +
|
|
130
130
|
"3. Copy the token (you won't be able to see it again!)", 'Step 3: Get Bot Token');
|
|
131
131
|
const tokenInput = await password({
|
|
132
|
-
message: 'Enter your Discord Bot Token (
|
|
132
|
+
message: 'Enter your Discord Bot Token (from "Bot" section - click "Reset Token" if needed):',
|
|
133
133
|
validate(value) {
|
|
134
134
|
if (!value)
|
|
135
135
|
return 'Bot token is required';
|
|
@@ -142,6 +142,24 @@ async function run({ restart, addChannels }) {
|
|
|
142
142
|
process.exit(0);
|
|
143
143
|
}
|
|
144
144
|
token = tokenInput;
|
|
145
|
+
note(`You can get a Gemini api Key at https://aistudio.google.com/apikey`, `Gemini API Key`);
|
|
146
|
+
const geminiApiKey = await password({
|
|
147
|
+
message: 'Enter your Gemini API Key for voice channels and audio transcription (optional, press Enter to skip):',
|
|
148
|
+
validate(value) {
|
|
149
|
+
if (value && value.length < 10)
|
|
150
|
+
return 'Invalid API key format';
|
|
151
|
+
return undefined;
|
|
152
|
+
},
|
|
153
|
+
});
|
|
154
|
+
if (isCancel(geminiApiKey)) {
|
|
155
|
+
cancel('Setup cancelled');
|
|
156
|
+
process.exit(0);
|
|
157
|
+
}
|
|
158
|
+
// Store API key in database
|
|
159
|
+
if (geminiApiKey) {
|
|
160
|
+
db.prepare('INSERT OR REPLACE INTO bot_api_keys (app_id, gemini_api_key) VALUES (?, ?)').run(appId, geminiApiKey || null);
|
|
161
|
+
note('API key saved successfully', 'API Key Stored');
|
|
162
|
+
}
|
|
145
163
|
note(`Bot install URL:\n${generateBotInstallUrl({ clientId: appId })}\n\nYou MUST install the bot in your Discord server before continuing.`, 'Step 4: Install Bot to Server');
|
|
146
164
|
const installed = await text({
|
|
147
165
|
message: 'Press Enter AFTER you have installed the bot in your server:',
|
package/dist/discordBot.js
CHANGED
|
@@ -8,6 +8,7 @@ import { spawn, exec } from 'node:child_process';
|
|
|
8
8
|
import fs, { createWriteStream } from 'node:fs';
|
|
9
9
|
import { mkdir } from 'node:fs/promises';
|
|
10
10
|
import net from 'node:net';
|
|
11
|
+
import os from 'node:os';
|
|
11
12
|
import path from 'node:path';
|
|
12
13
|
import { promisify } from 'node:util';
|
|
13
14
|
import { PassThrough, Transform } from 'node:stream';
|
|
@@ -78,7 +79,7 @@ async function createUserAudioLogStream(guildId, channelId) {
|
|
|
78
79
|
}
|
|
79
80
|
}
|
|
80
81
|
// Set up voice handling for a connection (called once per connection)
|
|
81
|
-
async function setupVoiceHandling({ connection, guildId, channelId, }) {
|
|
82
|
+
async function setupVoiceHandling({ connection, guildId, channelId, appId, }) {
|
|
82
83
|
voiceLogger.log(`Setting up voice handling for guild ${guildId}, channel ${channelId}`);
|
|
83
84
|
// Check if this voice channel has an associated directory
|
|
84
85
|
const channelDirRow = getDatabase()
|
|
@@ -98,11 +99,17 @@ async function setupVoiceHandling({ connection, guildId, channelId, }) {
|
|
|
98
99
|
}
|
|
99
100
|
// Create user audio stream for debugging
|
|
100
101
|
voiceData.userAudioStream = await createUserAudioLogStream(guildId, channelId);
|
|
102
|
+
// Get API keys from database
|
|
103
|
+
const apiKeys = getDatabase()
|
|
104
|
+
.prepare('SELECT gemini_api_key FROM bot_api_keys WHERE app_id = ?')
|
|
105
|
+
.get(appId);
|
|
101
106
|
// Create GenAI worker
|
|
102
107
|
const genAiWorker = await createGenAIWorker({
|
|
103
108
|
directory,
|
|
104
109
|
guildId,
|
|
105
110
|
channelId,
|
|
111
|
+
appId,
|
|
112
|
+
geminiApiKey: apiKeys?.gemini_api_key,
|
|
106
113
|
systemMessage: dedent `
|
|
107
114
|
You are Kimaki, an AI similar to Jarvis: you help your user (an engineer) controlling his coding agent, just like Jarvis controls Ironman armor and machines. Speak fast.
|
|
108
115
|
|
|
@@ -316,7 +323,17 @@ export function frameMono16khz() {
|
|
|
316
323
|
}
|
|
317
324
|
export function getDatabase() {
|
|
318
325
|
if (!db) {
|
|
319
|
-
|
|
326
|
+
// Create ~/.kimaki directory if it doesn't exist
|
|
327
|
+
const kimakiDir = path.join(os.homedir(), '.kimaki');
|
|
328
|
+
try {
|
|
329
|
+
fs.mkdirSync(kimakiDir, { recursive: true });
|
|
330
|
+
}
|
|
331
|
+
catch (error) {
|
|
332
|
+
dbLogger.error('Failed to create ~/.kimaki directory:', error);
|
|
333
|
+
}
|
|
334
|
+
const dbPath = path.join(kimakiDir, 'discord-sessions.db');
|
|
335
|
+
dbLogger.log(`Opening database at: ${dbPath}`);
|
|
336
|
+
db = new Database(dbPath);
|
|
320
337
|
// Initialize tables
|
|
321
338
|
db.exec(`
|
|
322
339
|
CREATE TABLE IF NOT EXISTS thread_sessions (
|
|
@@ -347,6 +364,13 @@ export function getDatabase() {
|
|
|
347
364
|
channel_type TEXT NOT NULL,
|
|
348
365
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
349
366
|
)
|
|
367
|
+
`);
|
|
368
|
+
db.exec(`
|
|
369
|
+
CREATE TABLE IF NOT EXISTS bot_api_keys (
|
|
370
|
+
app_id TEXT PRIMARY KEY,
|
|
371
|
+
gemini_api_key TEXT,
|
|
372
|
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
373
|
+
)
|
|
350
374
|
`);
|
|
351
375
|
}
|
|
352
376
|
return db;
|
|
@@ -458,7 +482,7 @@ async function waitForServer(port, maxAttempts = 30) {
|
|
|
458
482
|
}
|
|
459
483
|
throw new Error(`Server did not start on port ${port} after ${maxAttempts} seconds`);
|
|
460
484
|
}
|
|
461
|
-
async function processVoiceAttachment({ message, thread, projectDirectory, isNewThread = false, }) {
|
|
485
|
+
async function processVoiceAttachment({ message, thread, projectDirectory, isNewThread = false, appId, }) {
|
|
462
486
|
const audioAttachment = Array.from(message.attachments.values()).find((attachment) => attachment.contentType?.startsWith('audio/'));
|
|
463
487
|
if (!audioAttachment)
|
|
464
488
|
return null;
|
|
@@ -488,9 +512,20 @@ async function processVoiceAttachment({ message, thread, projectDirectory, isNew
|
|
|
488
512
|
voiceLogger.log(`Could not get project tree:`, e);
|
|
489
513
|
}
|
|
490
514
|
}
|
|
515
|
+
// Get Gemini API key from database if appId is provided
|
|
516
|
+
let geminiApiKey;
|
|
517
|
+
if (appId) {
|
|
518
|
+
const apiKeys = getDatabase()
|
|
519
|
+
.prepare('SELECT gemini_api_key FROM bot_api_keys WHERE app_id = ?')
|
|
520
|
+
.get(appId);
|
|
521
|
+
if (apiKeys?.gemini_api_key) {
|
|
522
|
+
geminiApiKey = apiKeys.gemini_api_key;
|
|
523
|
+
}
|
|
524
|
+
}
|
|
491
525
|
const transcription = await transcribeAudio({
|
|
492
526
|
audio: audioBuffer,
|
|
493
527
|
prompt: transcriptionPrompt,
|
|
528
|
+
geminiApiKey,
|
|
494
529
|
});
|
|
495
530
|
voiceLogger.log(`Transcription successful: "${transcription.slice(0, 50)}${transcription.length > 50 ? '...' : ''}"`);
|
|
496
531
|
// Update thread name with transcribed content only for new threads
|
|
@@ -1233,6 +1268,7 @@ export async function startDiscordBot({ token, appId, discordClient, }) {
|
|
|
1233
1268
|
message,
|
|
1234
1269
|
thread,
|
|
1235
1270
|
projectDirectory,
|
|
1271
|
+
appId: currentAppId,
|
|
1236
1272
|
});
|
|
1237
1273
|
if (transcription) {
|
|
1238
1274
|
messageContent = transcription;
|
|
@@ -1291,6 +1327,7 @@ export async function startDiscordBot({ token, appId, discordClient, }) {
|
|
|
1291
1327
|
thread,
|
|
1292
1328
|
projectDirectory,
|
|
1293
1329
|
isNewThread: true,
|
|
1330
|
+
appId: currentAppId,
|
|
1294
1331
|
});
|
|
1295
1332
|
if (transcription) {
|
|
1296
1333
|
messageContent = transcription;
|
|
@@ -1651,6 +1688,7 @@ export async function startDiscordBot({ token, appId, discordClient, }) {
|
|
|
1651
1688
|
connection,
|
|
1652
1689
|
guildId: newState.guild.id,
|
|
1653
1690
|
channelId: voiceChannel.id,
|
|
1691
|
+
appId: currentAppId,
|
|
1654
1692
|
});
|
|
1655
1693
|
// Handle connection state changes
|
|
1656
1694
|
connection.on(VoiceConnectionStatus.Disconnected, async () => {
|
|
@@ -1721,7 +1759,10 @@ export async function startDiscordBot({ token, appId, discordClient, }) {
|
|
|
1721
1759
|
}
|
|
1722
1760
|
opencodeServers.clear();
|
|
1723
1761
|
discordLogger.log('Closing database...');
|
|
1724
|
-
|
|
1762
|
+
if (db) {
|
|
1763
|
+
db.close();
|
|
1764
|
+
db = null;
|
|
1765
|
+
}
|
|
1725
1766
|
discordLogger.log('Destroying Discord client...');
|
|
1726
1767
|
discordClient.destroy();
|
|
1727
1768
|
discordLogger.log('Cleanup complete, exiting.');
|
|
@@ -98,6 +98,8 @@ export function createGenAIWorker(options) {
|
|
|
98
98
|
systemMessage: options.systemMessage,
|
|
99
99
|
guildId: options.guildId,
|
|
100
100
|
channelId: options.channelId,
|
|
101
|
+
appId: options.appId,
|
|
102
|
+
geminiApiKey: options.geminiApiKey,
|
|
101
103
|
};
|
|
102
104
|
worker.postMessage(initMessage);
|
|
103
105
|
});
|
package/dist/genai-worker.js
CHANGED
|
@@ -210,6 +210,7 @@ parentPort.on('message', async (message) => {
|
|
|
210
210
|
session = await startGenAiSession({
|
|
211
211
|
tools,
|
|
212
212
|
systemMessage: message.systemMessage,
|
|
213
|
+
geminiApiKey: message.geminiApiKey,
|
|
213
214
|
onAssistantAudioChunk({ data }) {
|
|
214
215
|
// Write to audio log if enabled
|
|
215
216
|
if (audioLogStream && !audioLogStream.destroyed) {
|
package/dist/genai.js
CHANGED
|
@@ -68,7 +68,7 @@ function defaultAudioChunkHandler({ data, mimeType, }) {
|
|
|
68
68
|
const buffer = convertToWav(audioParts, mimeType);
|
|
69
69
|
saveBinaryFile(fileName, buffer);
|
|
70
70
|
}
|
|
71
|
-
export async function startGenAiSession({ onAssistantAudioChunk, onAssistantStartSpeaking, onAssistantStopSpeaking, onAssistantInterruptSpeaking, systemMessage, tools, } = {}) {
|
|
71
|
+
export async function startGenAiSession({ onAssistantAudioChunk, onAssistantStartSpeaking, onAssistantStopSpeaking, onAssistantInterruptSpeaking, systemMessage, tools, geminiApiKey, } = {}) {
|
|
72
72
|
let session = undefined;
|
|
73
73
|
const callableTools = [];
|
|
74
74
|
let isAssistantSpeaking = false;
|
|
@@ -161,8 +161,13 @@ export async function startGenAiSession({ onAssistantAudioChunk, onAssistantStar
|
|
|
161
161
|
}
|
|
162
162
|
}
|
|
163
163
|
}
|
|
164
|
+
const apiKey = geminiApiKey || process.env.GEMINI_API_KEY;
|
|
165
|
+
if (!apiKey) {
|
|
166
|
+
genaiLogger.error('No Gemini API key provided');
|
|
167
|
+
throw new Error('Gemini API key is required for voice interactions');
|
|
168
|
+
}
|
|
164
169
|
const ai = new GoogleGenAI({
|
|
165
|
-
apiKey
|
|
170
|
+
apiKey,
|
|
166
171
|
});
|
|
167
172
|
const model = 'models/gemini-2.5-flash-live-preview';
|
|
168
173
|
session = await ai.live.connect({
|
package/dist/voice.js
CHANGED
|
@@ -1,25 +1,60 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { experimental_transcribe as transcribe } from 'ai';
|
|
1
|
+
import { GoogleGenAI } from '@google/genai';
|
|
3
2
|
import { createLogger } from './logger.js';
|
|
4
3
|
const voiceLogger = createLogger('VOICE');
|
|
5
|
-
export async function transcribeAudio({ audio, prompt, language, temperature, }) {
|
|
4
|
+
export async function transcribeAudio({ audio, prompt, language, temperature, geminiApiKey, }) {
|
|
6
5
|
try {
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
6
|
+
// Use provided API key or fall back to environment variable
|
|
7
|
+
const apiKey = geminiApiKey || process.env.GEMINI_API_KEY;
|
|
8
|
+
if (!apiKey) {
|
|
9
|
+
throw new Error('Gemini API key is required for audio transcription');
|
|
10
|
+
}
|
|
11
|
+
// Initialize Google Generative AI
|
|
12
|
+
const genAI = new GoogleGenAI({ apiKey });
|
|
13
|
+
// Convert audio to base64 string if it's not already
|
|
14
|
+
let audioBase64;
|
|
15
|
+
if (typeof audio === 'string') {
|
|
16
|
+
audioBase64 = audio;
|
|
17
|
+
}
|
|
18
|
+
else if (audio instanceof Buffer) {
|
|
19
|
+
audioBase64 = audio.toString('base64');
|
|
20
|
+
}
|
|
21
|
+
else if (audio instanceof Uint8Array) {
|
|
22
|
+
audioBase64 = Buffer.from(audio).toString('base64');
|
|
23
|
+
}
|
|
24
|
+
else if (audio instanceof ArrayBuffer) {
|
|
25
|
+
audioBase64 = Buffer.from(audio).toString('base64');
|
|
26
|
+
}
|
|
27
|
+
else {
|
|
28
|
+
throw new Error('Invalid audio format');
|
|
29
|
+
}
|
|
30
|
+
// Build the transcription prompt
|
|
31
|
+
let transcriptionPrompt = `Please transcribe this audio file accurately. Here is some relevant information and filenames that may be present in the audio:\n<context>\n${prompt}\n</context>\n`;
|
|
32
|
+
if (language) {
|
|
33
|
+
transcriptionPrompt += `\nThe audio is in ${language}.`;
|
|
34
|
+
}
|
|
35
|
+
// Create the content with audio using the inline data format
|
|
36
|
+
const response = await genAI.models.generateContent({
|
|
37
|
+
model: 'gemini-2.5-flash',
|
|
38
|
+
contents: [
|
|
39
|
+
{
|
|
40
|
+
parts: [
|
|
41
|
+
{ text: transcriptionPrompt },
|
|
42
|
+
{
|
|
43
|
+
inlineData: {
|
|
44
|
+
data: audioBase64,
|
|
45
|
+
mimeType: 'audio/mpeg',
|
|
46
|
+
},
|
|
17
47
|
},
|
|
18
|
-
|
|
48
|
+
],
|
|
49
|
+
},
|
|
50
|
+
],
|
|
51
|
+
config: temperature !== undefined
|
|
52
|
+
? {
|
|
53
|
+
temperature,
|
|
19
54
|
}
|
|
20
|
-
:
|
|
55
|
+
: undefined,
|
|
21
56
|
});
|
|
22
|
-
return
|
|
57
|
+
return response.text || '';
|
|
23
58
|
}
|
|
24
59
|
catch (error) {
|
|
25
60
|
voiceLogger.error('Failed to transcribe audio:', error);
|
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "kimaki",
|
|
3
3
|
"module": "index.ts",
|
|
4
4
|
"type": "module",
|
|
5
|
-
"version": "0.1.
|
|
5
|
+
"version": "0.1.5",
|
|
6
6
|
"repository": "https://github.com/remorses/kimaki",
|
|
7
7
|
"bin": "bin.js",
|
|
8
8
|
"files": [
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
"tsx": "^4.20.5"
|
|
20
20
|
},
|
|
21
21
|
"dependencies": {
|
|
22
|
-
"@ai-sdk/
|
|
22
|
+
"@ai-sdk/google": "^2.0.16",
|
|
23
23
|
"@clack/prompts": "^0.11.0",
|
|
24
24
|
"@discordjs/opus": "^0.10.0",
|
|
25
25
|
"@discordjs/voice": "^0.19.0",
|
package/src/cli.ts
CHANGED
|
@@ -223,9 +223,9 @@ async function run({ restart, addChannels }: CliOptions) {
|
|
|
223
223
|
"3. Copy the token (you won't be able to see it again!)",
|
|
224
224
|
'Step 3: Get Bot Token',
|
|
225
225
|
)
|
|
226
|
-
|
|
227
226
|
const tokenInput = await password({
|
|
228
|
-
message:
|
|
227
|
+
message:
|
|
228
|
+
'Enter your Discord Bot Token (from "Bot" section - click "Reset Token" if needed):',
|
|
229
229
|
validate(value) {
|
|
230
230
|
if (!value) return 'Bot token is required'
|
|
231
231
|
if (value.length < 50) return 'Invalid token format (too short)'
|
|
@@ -238,6 +238,33 @@ async function run({ restart, addChannels }: CliOptions) {
|
|
|
238
238
|
}
|
|
239
239
|
token = tokenInput
|
|
240
240
|
|
|
241
|
+
note(
|
|
242
|
+
`You can get a Gemini api Key at https://aistudio.google.com/apikey`,
|
|
243
|
+
`Gemini API Key`,
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
const geminiApiKey = await password({
|
|
247
|
+
message:
|
|
248
|
+
'Enter your Gemini API Key for voice channels and audio transcription (optional, press Enter to skip):',
|
|
249
|
+
validate(value) {
|
|
250
|
+
if (value && value.length < 10) return 'Invalid API key format'
|
|
251
|
+
return undefined
|
|
252
|
+
},
|
|
253
|
+
})
|
|
254
|
+
|
|
255
|
+
if (isCancel(geminiApiKey)) {
|
|
256
|
+
cancel('Setup cancelled')
|
|
257
|
+
process.exit(0)
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// Store API key in database
|
|
261
|
+
if (geminiApiKey) {
|
|
262
|
+
db.prepare(
|
|
263
|
+
'INSERT OR REPLACE INTO bot_api_keys (app_id, gemini_api_key) VALUES (?, ?)',
|
|
264
|
+
).run(appId, geminiApiKey || null)
|
|
265
|
+
note('API key saved successfully', 'API Key Stored')
|
|
266
|
+
}
|
|
267
|
+
|
|
241
268
|
note(
|
|
242
269
|
`Bot install URL:\n${generateBotInstallUrl({ clientId: appId })}\n\nYou MUST install the bot in your Discord server before continuing.`,
|
|
243
270
|
'Step 4: Install Bot to Server',
|
package/src/discordBot.ts
CHANGED
|
@@ -34,6 +34,7 @@ import { spawn, exec, type ChildProcess } from 'node:child_process'
|
|
|
34
34
|
import fs, { createWriteStream } from 'node:fs'
|
|
35
35
|
import { mkdir } from 'node:fs/promises'
|
|
36
36
|
import net from 'node:net'
|
|
37
|
+
import os from 'node:os'
|
|
37
38
|
import path from 'node:path'
|
|
38
39
|
import { promisify } from 'node:util'
|
|
39
40
|
import { PassThrough, Transform, type TransformCallback } from 'node:stream'
|
|
@@ -152,10 +153,12 @@ async function setupVoiceHandling({
|
|
|
152
153
|
connection,
|
|
153
154
|
guildId,
|
|
154
155
|
channelId,
|
|
156
|
+
appId,
|
|
155
157
|
}: {
|
|
156
158
|
connection: VoiceConnection
|
|
157
159
|
guildId: string
|
|
158
160
|
channelId: string
|
|
161
|
+
appId: string
|
|
159
162
|
}) {
|
|
160
163
|
voiceLogger.log(
|
|
161
164
|
`Setting up voice handling for guild ${guildId}, channel ${channelId}`,
|
|
@@ -188,11 +191,18 @@ async function setupVoiceHandling({
|
|
|
188
191
|
// Create user audio stream for debugging
|
|
189
192
|
voiceData.userAudioStream = await createUserAudioLogStream(guildId, channelId)
|
|
190
193
|
|
|
194
|
+
// Get API keys from database
|
|
195
|
+
const apiKeys = getDatabase()
|
|
196
|
+
.prepare('SELECT gemini_api_key FROM bot_api_keys WHERE app_id = ?')
|
|
197
|
+
.get(appId) as { gemini_api_key: string | null } | undefined
|
|
198
|
+
|
|
191
199
|
// Create GenAI worker
|
|
192
200
|
const genAiWorker = await createGenAIWorker({
|
|
193
201
|
directory,
|
|
194
202
|
guildId,
|
|
195
203
|
channelId,
|
|
204
|
+
appId,
|
|
205
|
+
geminiApiKey: apiKeys?.gemini_api_key,
|
|
196
206
|
systemMessage: dedent`
|
|
197
207
|
You are Kimaki, an AI similar to Jarvis: you help your user (an engineer) controlling his coding agent, just like Jarvis controls Ironman armor and machines. Speak fast.
|
|
198
208
|
|
|
@@ -444,7 +454,19 @@ export function frameMono16khz(): Transform {
|
|
|
444
454
|
|
|
445
455
|
export function getDatabase(): Database.Database {
|
|
446
456
|
if (!db) {
|
|
447
|
-
|
|
457
|
+
// Create ~/.kimaki directory if it doesn't exist
|
|
458
|
+
const kimakiDir = path.join(os.homedir(), '.kimaki')
|
|
459
|
+
|
|
460
|
+
try {
|
|
461
|
+
fs.mkdirSync(kimakiDir, { recursive: true })
|
|
462
|
+
} catch (error) {
|
|
463
|
+
dbLogger.error('Failed to create ~/.kimaki directory:', error)
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
const dbPath = path.join(kimakiDir, 'discord-sessions.db')
|
|
467
|
+
|
|
468
|
+
dbLogger.log(`Opening database at: ${dbPath}`)
|
|
469
|
+
db = new Database(dbPath)
|
|
448
470
|
|
|
449
471
|
// Initialize tables
|
|
450
472
|
db.exec(`
|
|
@@ -480,6 +502,14 @@ export function getDatabase(): Database.Database {
|
|
|
480
502
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
481
503
|
)
|
|
482
504
|
`)
|
|
505
|
+
|
|
506
|
+
db.exec(`
|
|
507
|
+
CREATE TABLE IF NOT EXISTS bot_api_keys (
|
|
508
|
+
app_id TEXT PRIMARY KEY,
|
|
509
|
+
gemini_api_key TEXT,
|
|
510
|
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
511
|
+
)
|
|
512
|
+
`)
|
|
483
513
|
}
|
|
484
514
|
|
|
485
515
|
return db
|
|
@@ -614,11 +644,13 @@ async function processVoiceAttachment({
|
|
|
614
644
|
thread,
|
|
615
645
|
projectDirectory,
|
|
616
646
|
isNewThread = false,
|
|
647
|
+
appId,
|
|
617
648
|
}: {
|
|
618
649
|
message: Message
|
|
619
650
|
thread: ThreadChannel
|
|
620
651
|
projectDirectory?: string
|
|
621
652
|
isNewThread?: boolean
|
|
653
|
+
appId?: string
|
|
622
654
|
}): Promise<string | null> {
|
|
623
655
|
const audioAttachment = Array.from(message.attachments.values()).find(
|
|
624
656
|
(attachment) => attachment.contentType?.startsWith('audio/'),
|
|
@@ -660,9 +692,22 @@ async function processVoiceAttachment({
|
|
|
660
692
|
}
|
|
661
693
|
}
|
|
662
694
|
|
|
695
|
+
// Get Gemini API key from database if appId is provided
|
|
696
|
+
let geminiApiKey: string | undefined
|
|
697
|
+
if (appId) {
|
|
698
|
+
const apiKeys = getDatabase()
|
|
699
|
+
.prepare('SELECT gemini_api_key FROM bot_api_keys WHERE app_id = ?')
|
|
700
|
+
.get(appId) as { gemini_api_key: string | null } | undefined
|
|
701
|
+
|
|
702
|
+
if (apiKeys?.gemini_api_key) {
|
|
703
|
+
geminiApiKey = apiKeys.gemini_api_key
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
|
|
663
707
|
const transcription = await transcribeAudio({
|
|
664
708
|
audio: audioBuffer,
|
|
665
709
|
prompt: transcriptionPrompt,
|
|
710
|
+
geminiApiKey,
|
|
666
711
|
})
|
|
667
712
|
|
|
668
713
|
voiceLogger.log(
|
|
@@ -1502,6 +1547,7 @@ export async function startDiscordBot({
|
|
|
1502
1547
|
discordLogger.log(`Bot Application ID (provided): ${currentAppId}`)
|
|
1503
1548
|
}
|
|
1504
1549
|
|
|
1550
|
+
|
|
1505
1551
|
// List all guilds and channels that belong to this bot
|
|
1506
1552
|
for (const guild of c.guilds.cache.values()) {
|
|
1507
1553
|
discordLogger.log(`${guild.name} (${guild.id})`)
|
|
@@ -1635,6 +1681,7 @@ export async function startDiscordBot({
|
|
|
1635
1681
|
message,
|
|
1636
1682
|
thread,
|
|
1637
1683
|
projectDirectory,
|
|
1684
|
+
appId: currentAppId,
|
|
1638
1685
|
})
|
|
1639
1686
|
if (transcription) {
|
|
1640
1687
|
messageContent = transcription
|
|
@@ -1727,6 +1774,7 @@ export async function startDiscordBot({
|
|
|
1727
1774
|
thread,
|
|
1728
1775
|
projectDirectory,
|
|
1729
1776
|
isNewThread: true,
|
|
1777
|
+
appId: currentAppId,
|
|
1730
1778
|
})
|
|
1731
1779
|
if (transcription) {
|
|
1732
1780
|
messageContent = transcription
|
|
@@ -2224,6 +2272,7 @@ export async function startDiscordBot({
|
|
|
2224
2272
|
connection,
|
|
2225
2273
|
guildId: newState.guild.id,
|
|
2226
2274
|
channelId: voiceChannel.id,
|
|
2275
|
+
appId: currentAppId!,
|
|
2227
2276
|
})
|
|
2228
2277
|
|
|
2229
2278
|
// Handle connection state changes
|
|
@@ -2313,7 +2362,10 @@ export async function startDiscordBot({
|
|
|
2313
2362
|
opencodeServers.clear()
|
|
2314
2363
|
|
|
2315
2364
|
discordLogger.log('Closing database...')
|
|
2316
|
-
|
|
2365
|
+
if (db) {
|
|
2366
|
+
db.close()
|
|
2367
|
+
db = null
|
|
2368
|
+
}
|
|
2317
2369
|
|
|
2318
2370
|
discordLogger.log('Destroying Discord client...')
|
|
2319
2371
|
discordClient.destroy()
|
|
@@ -11,6 +11,8 @@ export interface GenAIWorkerOptions {
|
|
|
11
11
|
systemMessage?: string
|
|
12
12
|
guildId: string
|
|
13
13
|
channelId: string
|
|
14
|
+
appId: string
|
|
15
|
+
geminiApiKey?: string | null
|
|
14
16
|
onAssistantOpusPacket: (packet: ArrayBuffer) => void
|
|
15
17
|
onAssistantStartSpeaking?: () => void
|
|
16
18
|
onAssistantStopSpeaking?: () => void
|
|
@@ -146,6 +148,8 @@ export function createGenAIWorker(
|
|
|
146
148
|
systemMessage: options.systemMessage,
|
|
147
149
|
guildId: options.guildId,
|
|
148
150
|
channelId: options.channelId,
|
|
151
|
+
appId: options.appId,
|
|
152
|
+
geminiApiKey: options.geminiApiKey,
|
|
149
153
|
}
|
|
150
154
|
worker.postMessage(initMessage)
|
|
151
155
|
})
|
package/src/genai-worker.ts
CHANGED
|
@@ -271,6 +271,7 @@ parentPort.on('message', async (message: WorkerInMessage) => {
|
|
|
271
271
|
session = await startGenAiSession({
|
|
272
272
|
tools,
|
|
273
273
|
systemMessage: message.systemMessage,
|
|
274
|
+
geminiApiKey: message.geminiApiKey,
|
|
274
275
|
onAssistantAudioChunk({ data }) {
|
|
275
276
|
// Write to audio log if enabled
|
|
276
277
|
if (audioLogStream && !audioLogStream.destroyed) {
|
package/src/genai.ts
CHANGED
|
@@ -113,6 +113,7 @@ export async function startGenAiSession({
|
|
|
113
113
|
onAssistantInterruptSpeaking,
|
|
114
114
|
systemMessage,
|
|
115
115
|
tools,
|
|
116
|
+
geminiApiKey,
|
|
116
117
|
}: {
|
|
117
118
|
onAssistantAudioChunk?: (args: { data: Buffer; mimeType: string }) => void
|
|
118
119
|
onAssistantStartSpeaking?: () => void
|
|
@@ -120,6 +121,7 @@ export async function startGenAiSession({
|
|
|
120
121
|
onAssistantInterruptSpeaking?: () => void
|
|
121
122
|
systemMessage?: string
|
|
122
123
|
tools?: Record<string, AITool<any, any>>
|
|
124
|
+
geminiApiKey?: string | null
|
|
123
125
|
} = {}) {
|
|
124
126
|
let session: Session | undefined = undefined
|
|
125
127
|
const callableTools: Array<CallableTool & { name: string }> = []
|
|
@@ -242,8 +244,15 @@ export async function startGenAiSession({
|
|
|
242
244
|
}
|
|
243
245
|
}
|
|
244
246
|
|
|
247
|
+
const apiKey = geminiApiKey || process.env.GEMINI_API_KEY
|
|
248
|
+
|
|
249
|
+
if (!apiKey) {
|
|
250
|
+
genaiLogger.error('No Gemini API key provided')
|
|
251
|
+
throw new Error('Gemini API key is required for voice interactions')
|
|
252
|
+
}
|
|
253
|
+
|
|
245
254
|
const ai = new GoogleGenAI({
|
|
246
|
-
apiKey
|
|
255
|
+
apiKey,
|
|
247
256
|
})
|
|
248
257
|
|
|
249
258
|
const model = 'models/gemini-2.5-flash-live-preview'
|
package/src/voice.ts
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { experimental_transcribe as transcribe } from 'ai'
|
|
1
|
+
import { GoogleGenAI } from '@google/genai'
|
|
3
2
|
import { createLogger } from './logger.js'
|
|
4
3
|
|
|
5
4
|
const voiceLogger = createLogger('VOICE')
|
|
@@ -9,30 +8,70 @@ export async function transcribeAudio({
|
|
|
9
8
|
prompt,
|
|
10
9
|
language,
|
|
11
10
|
temperature,
|
|
11
|
+
geminiApiKey,
|
|
12
12
|
}: {
|
|
13
13
|
audio: Buffer | Uint8Array | ArrayBuffer | string
|
|
14
14
|
prompt?: string
|
|
15
15
|
language?: string
|
|
16
16
|
temperature?: number
|
|
17
|
+
geminiApiKey?: string
|
|
17
18
|
}): Promise<string> {
|
|
18
19
|
try {
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
20
|
+
// Use provided API key or fall back to environment variable
|
|
21
|
+
const apiKey = geminiApiKey || process.env.GEMINI_API_KEY
|
|
22
|
+
|
|
23
|
+
if (!apiKey) {
|
|
24
|
+
throw new Error('Gemini API key is required for audio transcription')
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// Initialize Google Generative AI
|
|
28
|
+
const genAI = new GoogleGenAI({ apiKey })
|
|
29
|
+
|
|
30
|
+
// Convert audio to base64 string if it's not already
|
|
31
|
+
let audioBase64: string
|
|
32
|
+
if (typeof audio === 'string') {
|
|
33
|
+
audioBase64 = audio
|
|
34
|
+
} else if (audio instanceof Buffer) {
|
|
35
|
+
audioBase64 = audio.toString('base64')
|
|
36
|
+
} else if (audio instanceof Uint8Array) {
|
|
37
|
+
audioBase64 = Buffer.from(audio).toString('base64')
|
|
38
|
+
} else if (audio instanceof ArrayBuffer) {
|
|
39
|
+
audioBase64 = Buffer.from(audio).toString('base64')
|
|
40
|
+
} else {
|
|
41
|
+
throw new Error('Invalid audio format')
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Build the transcription prompt
|
|
45
|
+
let transcriptionPrompt = `Please transcribe this audio file accurately. Here is some relevant information and filenames that may be present in the audio:\n<context>\n${prompt}\n</context>\n`
|
|
46
|
+
if (language) {
|
|
47
|
+
transcriptionPrompt += `\nThe audio is in ${language}.`
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Create the content with audio using the inline data format
|
|
51
|
+
const response = await genAI.models.generateContent({
|
|
52
|
+
model: 'gemini-2.5-flash',
|
|
53
|
+
contents: [
|
|
54
|
+
{
|
|
55
|
+
parts: [
|
|
56
|
+
{ text: transcriptionPrompt },
|
|
57
|
+
{
|
|
58
|
+
inlineData: {
|
|
59
|
+
data: audioBase64,
|
|
60
|
+
mimeType: 'audio/mpeg',
|
|
29
61
|
},
|
|
30
62
|
},
|
|
31
|
-
|
|
32
|
-
|
|
63
|
+
],
|
|
64
|
+
},
|
|
65
|
+
],
|
|
66
|
+
config:
|
|
67
|
+
temperature !== undefined
|
|
68
|
+
? {
|
|
69
|
+
temperature,
|
|
70
|
+
}
|
|
71
|
+
: undefined,
|
|
33
72
|
})
|
|
34
73
|
|
|
35
|
-
return
|
|
74
|
+
return response.text || ''
|
|
36
75
|
} catch (error) {
|
|
37
76
|
voiceLogger.error('Failed to transcribe audio:', error)
|
|
38
77
|
throw new Error(
|