shuvmaki 0.4.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/bin.js +70 -0
  2. package/dist/ai-tool-to-genai.js +210 -0
  3. package/dist/ai-tool-to-genai.test.js +267 -0
  4. package/dist/channel-management.js +97 -0
  5. package/dist/cli.js +709 -0
  6. package/dist/commands/abort.js +78 -0
  7. package/dist/commands/add-project.js +98 -0
  8. package/dist/commands/agent.js +152 -0
  9. package/dist/commands/ask-question.js +183 -0
  10. package/dist/commands/create-new-project.js +78 -0
  11. package/dist/commands/fork.js +186 -0
  12. package/dist/commands/model.js +313 -0
  13. package/dist/commands/permissions.js +126 -0
  14. package/dist/commands/queue.js +129 -0
  15. package/dist/commands/resume.js +145 -0
  16. package/dist/commands/session.js +142 -0
  17. package/dist/commands/share.js +80 -0
  18. package/dist/commands/types.js +2 -0
  19. package/dist/commands/undo-redo.js +161 -0
  20. package/dist/commands/user-command.js +145 -0
  21. package/dist/database.js +184 -0
  22. package/dist/discord-bot.js +384 -0
  23. package/dist/discord-utils.js +217 -0
  24. package/dist/escape-backticks.test.js +410 -0
  25. package/dist/format-tables.js +96 -0
  26. package/dist/format-tables.test.js +418 -0
  27. package/dist/genai-worker-wrapper.js +109 -0
  28. package/dist/genai-worker.js +297 -0
  29. package/dist/genai.js +232 -0
  30. package/dist/interaction-handler.js +144 -0
  31. package/dist/logger.js +51 -0
  32. package/dist/markdown.js +310 -0
  33. package/dist/markdown.test.js +262 -0
  34. package/dist/message-formatting.js +273 -0
  35. package/dist/message-formatting.test.js +73 -0
  36. package/dist/openai-realtime.js +228 -0
  37. package/dist/opencode.js +216 -0
  38. package/dist/session-handler.js +580 -0
  39. package/dist/system-message.js +61 -0
  40. package/dist/tools.js +356 -0
  41. package/dist/utils.js +85 -0
  42. package/dist/voice-handler.js +541 -0
  43. package/dist/voice.js +314 -0
  44. package/dist/worker-types.js +4 -0
  45. package/dist/xml.js +92 -0
  46. package/dist/xml.test.js +32 -0
  47. package/package.json +60 -0
  48. package/src/__snapshots__/compact-session-context-no-system.md +35 -0
  49. package/src/__snapshots__/compact-session-context.md +47 -0
  50. package/src/ai-tool-to-genai.test.ts +296 -0
  51. package/src/ai-tool-to-genai.ts +255 -0
  52. package/src/channel-management.ts +161 -0
  53. package/src/cli.ts +1010 -0
  54. package/src/commands/abort.ts +94 -0
  55. package/src/commands/add-project.ts +139 -0
  56. package/src/commands/agent.ts +201 -0
  57. package/src/commands/ask-question.ts +276 -0
  58. package/src/commands/create-new-project.ts +111 -0
  59. package/src/commands/fork.ts +257 -0
  60. package/src/commands/model.ts +402 -0
  61. package/src/commands/permissions.ts +146 -0
  62. package/src/commands/queue.ts +181 -0
  63. package/src/commands/resume.ts +230 -0
  64. package/src/commands/session.ts +184 -0
  65. package/src/commands/share.ts +96 -0
  66. package/src/commands/types.ts +25 -0
  67. package/src/commands/undo-redo.ts +213 -0
  68. package/src/commands/user-command.ts +178 -0
  69. package/src/database.ts +220 -0
  70. package/src/discord-bot.ts +513 -0
  71. package/src/discord-utils.ts +282 -0
  72. package/src/escape-backticks.test.ts +447 -0
  73. package/src/format-tables.test.ts +440 -0
  74. package/src/format-tables.ts +110 -0
  75. package/src/genai-worker-wrapper.ts +160 -0
  76. package/src/genai-worker.ts +366 -0
  77. package/src/genai.ts +321 -0
  78. package/src/interaction-handler.ts +187 -0
  79. package/src/logger.ts +57 -0
  80. package/src/markdown.test.ts +358 -0
  81. package/src/markdown.ts +365 -0
  82. package/src/message-formatting.test.ts +81 -0
  83. package/src/message-formatting.ts +340 -0
  84. package/src/openai-realtime.ts +363 -0
  85. package/src/opencode.ts +277 -0
  86. package/src/session-handler.ts +758 -0
  87. package/src/system-message.ts +62 -0
  88. package/src/tools.ts +428 -0
  89. package/src/utils.ts +118 -0
  90. package/src/voice-handler.ts +760 -0
  91. package/src/voice.ts +432 -0
  92. package/src/worker-types.ts +66 -0
  93. package/src/xml.test.ts +37 -0
  94. package/src/xml.ts +121 -0
@@ -0,0 +1,760 @@
1
+ // Discord voice channel connection and audio stream handler.
2
+ // Manages joining/leaving voice channels, captures user audio, resamples to 16kHz,
3
+ // and routes audio to the GenAI worker for real-time voice assistant interactions.
4
+
5
+ import {
6
+ VoiceConnectionStatus,
7
+ EndBehaviorType,
8
+ joinVoiceChannel,
9
+ entersState,
10
+ type VoiceConnection,
11
+ } from '@discordjs/voice'
12
+ import { exec } from 'node:child_process'
13
+ import fs, { createWriteStream } from 'node:fs'
14
+ import { mkdir } from 'node:fs/promises'
15
+ import path from 'node:path'
16
+ import { promisify } from 'node:util'
17
+ import { Transform, type TransformCallback } from 'node:stream'
18
+ import * as prism from 'prism-media'
19
+ import dedent from 'string-dedent'
20
+ import {
21
+ PermissionsBitField,
22
+ Events,
23
+ type Client,
24
+ type Message,
25
+ type ThreadChannel,
26
+ type VoiceChannel,
27
+ type VoiceState,
28
+ } from 'discord.js'
29
+ import { createGenAIWorker, type GenAIWorker } from './genai-worker-wrapper.js'
30
+ import { getDatabase } from './database.js'
31
+ import { sendThreadMessage, escapeDiscordFormatting, SILENT_MESSAGE_FLAGS } from './discord-utils.js'
32
+ import { transcribeAudio } from './voice.js'
33
+ import { createLogger } from './logger.js'
34
+
35
+ const voiceLogger = createLogger('VOICE')
36
+
37
+ export type VoiceConnectionData = {
38
+ connection: VoiceConnection
39
+ genAiWorker?: GenAIWorker
40
+ userAudioStream?: fs.WriteStream
41
+ }
42
+
43
+ export const voiceConnections = new Map<string, VoiceConnectionData>()
44
+
45
+ export function convertToMono16k(buffer: Buffer): Buffer {
46
+ const inputSampleRate = 48000
47
+ const outputSampleRate = 16000
48
+ const ratio = inputSampleRate / outputSampleRate
49
+ const inputChannels = 2
50
+ const bytesPerSample = 2
51
+
52
+ const inputSamples = buffer.length / (bytesPerSample * inputChannels)
53
+ const outputSamples = Math.floor(inputSamples / ratio)
54
+ const outputBuffer = Buffer.alloc(outputSamples * bytesPerSample)
55
+
56
+ for (let i = 0; i < outputSamples; i++) {
57
+ const inputIndex = Math.floor(i * ratio) * inputChannels * bytesPerSample
58
+
59
+ if (inputIndex + 3 < buffer.length) {
60
+ const leftSample = buffer.readInt16LE(inputIndex)
61
+ const rightSample = buffer.readInt16LE(inputIndex + 2)
62
+ const monoSample = Math.round((leftSample + rightSample) / 2)
63
+
64
+ outputBuffer.writeInt16LE(monoSample, i * bytesPerSample)
65
+ }
66
+ }
67
+
68
+ return outputBuffer
69
+ }
70
+
71
+ export async function createUserAudioLogStream(
72
+ guildId: string,
73
+ channelId: string,
74
+ ): Promise<fs.WriteStream | undefined> {
75
+ if (!process.env.DEBUG) return undefined
76
+
77
+ const timestamp = new Date().toISOString().replace(/[:.]/g, '-')
78
+ const audioDir = path.join(
79
+ process.cwd(),
80
+ 'discord-audio-logs',
81
+ guildId,
82
+ channelId,
83
+ )
84
+
85
+ try {
86
+ await mkdir(audioDir, { recursive: true })
87
+
88
+ const inputFileName = `user_${timestamp}.16.pcm`
89
+ const inputFilePath = path.join(audioDir, inputFileName)
90
+ const inputAudioStream = createWriteStream(inputFilePath)
91
+ voiceLogger.log(`Created user audio log: ${inputFilePath}`)
92
+
93
+ return inputAudioStream
94
+ } catch (error) {
95
+ voiceLogger.error('Failed to create audio log directory:', error)
96
+ return undefined
97
+ }
98
+ }
99
+
100
+ export function frameMono16khz(): Transform {
101
+ const FRAME_BYTES =
102
+ (100 * 16_000 * 1 * 2) / 1000
103
+ let stash: Buffer = Buffer.alloc(0)
104
+ let offset = 0
105
+
106
+ return new Transform({
107
+ readableObjectMode: false,
108
+ writableObjectMode: false,
109
+
110
+ transform(chunk: Buffer, _enc: BufferEncoding, cb: TransformCallback) {
111
+ if (offset > 0) {
112
+ stash = stash.subarray(offset)
113
+ offset = 0
114
+ }
115
+
116
+ stash = stash.length ? Buffer.concat([stash, chunk]) : chunk
117
+
118
+ while (stash.length - offset >= FRAME_BYTES) {
119
+ this.push(stash.subarray(offset, offset + FRAME_BYTES))
120
+ offset += FRAME_BYTES
121
+ }
122
+
123
+ if (offset === stash.length) {
124
+ stash = Buffer.alloc(0)
125
+ offset = 0
126
+ }
127
+
128
+ cb()
129
+ },
130
+
131
+ flush(cb: TransformCallback) {
132
+ stash = Buffer.alloc(0)
133
+ offset = 0
134
+ cb()
135
+ },
136
+ })
137
+ }
138
+
139
+ export async function setupVoiceHandling({
140
+ connection,
141
+ guildId,
142
+ channelId,
143
+ appId,
144
+ discordClient,
145
+ }: {
146
+ connection: VoiceConnection
147
+ guildId: string
148
+ channelId: string
149
+ appId: string
150
+ discordClient: Client
151
+ }) {
152
+ voiceLogger.log(
153
+ `Setting up voice handling for guild ${guildId}, channel ${channelId}`,
154
+ )
155
+
156
+ const channelDirRow = getDatabase()
157
+ .prepare(
158
+ 'SELECT directory FROM channel_directories WHERE channel_id = ? AND channel_type = ?',
159
+ )
160
+ .get(channelId, 'voice') as { directory: string } | undefined
161
+
162
+ if (!channelDirRow) {
163
+ voiceLogger.log(
164
+ `Voice channel ${channelId} has no associated directory, skipping setup`,
165
+ )
166
+ return
167
+ }
168
+
169
+ const directory = channelDirRow.directory
170
+ voiceLogger.log(`Found directory for voice channel: ${directory}`)
171
+
172
+ const voiceData = voiceConnections.get(guildId)
173
+ if (!voiceData) {
174
+ voiceLogger.error(`No voice data found for guild ${guildId}`)
175
+ return
176
+ }
177
+
178
+ voiceData.userAudioStream = await createUserAudioLogStream(guildId, channelId)
179
+
180
+ const apiKeys = getDatabase()
181
+ .prepare('SELECT gemini_api_key FROM bot_api_keys WHERE app_id = ?')
182
+ .get(appId) as { gemini_api_key: string | null } | undefined
183
+
184
+ const genAiWorker = await createGenAIWorker({
185
+ directory,
186
+ guildId,
187
+ channelId,
188
+ appId,
189
+ geminiApiKey: apiKeys?.gemini_api_key,
190
+ systemMessage: dedent`
191
+ You are Kimaki, an AI similar to Jarvis: you help your user (an engineer) controlling his coding agent, just like Jarvis controls Ironman armor and machines. Speak fast.
192
+
193
+ You should talk like Jarvis, British accent, satirical, joking and calm. Be short and concise. Speak fast.
194
+
195
+ After tool calls give a super short summary of the assistant message, you should say what the assistant message writes.
196
+
197
+ Before starting a new session ask for confirmation if it is not clear if the user finished describing it. ask "message ready, send?"
198
+
199
+ NEVER repeat the whole tool call parameters or message.
200
+
201
+ Your job is to manage many opencode agent chat instances. Opencode is the agent used to write the code, it is similar to Claude Code.
202
+
203
+ For everything the user asks it is implicit that the user is asking for you to proxy the requests to opencode sessions.
204
+
205
+ You can
206
+ - start new chats on a given project
207
+ - read the chats to report progress to the user
208
+ - submit messages to the chat
209
+ - list files for a given projects, so you can translate imprecise user prompts to precise messages that mention filename paths using @
210
+
211
+ Common patterns
212
+ - to get the last session use the listChats tool
213
+ - when user asks you to do something you submit a new session to do it. it's implicit that you proxy requests to the agents chat!
214
+ - when you submit a session assume the session will take a minute or 2 to complete the task
215
+
216
+ Rules
217
+ - never spell files by mentioning dots, letters, etc. instead give a brief description of the filename
218
+ - NEVER spell hashes or IDs
219
+ - never read session ids or other ids
220
+
221
+ Your voice is calm and monotone, NEVER excited and goofy. But you speak without jargon or bs and do veiled short jokes.
222
+ You speak like you knew something other don't. You are cool and cold.
223
+ `,
224
+ onAssistantOpusPacket(packet) {
225
+ if (connection.state.status !== VoiceConnectionStatus.Ready) {
226
+ voiceLogger.log('Skipping packet: connection not ready')
227
+ return
228
+ }
229
+
230
+ try {
231
+ connection.setSpeaking(true)
232
+ connection.playOpusPacket(Buffer.from(packet))
233
+ } catch (error) {
234
+ voiceLogger.error('Error sending packet:', error)
235
+ }
236
+ },
237
+ onAssistantStartSpeaking() {
238
+ voiceLogger.log('Assistant started speaking')
239
+ connection.setSpeaking(true)
240
+ },
241
+ onAssistantStopSpeaking() {
242
+ voiceLogger.log('Assistant stopped speaking (natural finish)')
243
+ connection.setSpeaking(false)
244
+ },
245
+ onAssistantInterruptSpeaking() {
246
+ voiceLogger.log('Assistant interrupted while speaking')
247
+ genAiWorker.interrupt()
248
+ connection.setSpeaking(false)
249
+ },
250
+ onToolCallCompleted(params) {
251
+ const text = params.error
252
+ ? `<systemMessage>\nThe coding agent encountered an error while processing session ${params.sessionId}: ${params.error?.message || String(params.error)}\n</systemMessage>`
253
+ : `<systemMessage>\nThe coding agent finished working on session ${params.sessionId}\n\nHere's what the assistant wrote:\n${params.markdown}\n</systemMessage>`
254
+
255
+ genAiWorker.sendTextInput(text)
256
+ },
257
+ async onError(error) {
258
+ voiceLogger.error('GenAI worker error:', error)
259
+ const textChannelRow = getDatabase()
260
+ .prepare(
261
+ `SELECT cd2.channel_id FROM channel_directories cd1
262
+ JOIN channel_directories cd2 ON cd1.directory = cd2.directory
263
+ WHERE cd1.channel_id = ? AND cd1.channel_type = 'voice' AND cd2.channel_type = 'text'`,
264
+ )
265
+ .get(channelId) as { channel_id: string } | undefined
266
+
267
+ if (textChannelRow) {
268
+ try {
269
+ const textChannel = await discordClient.channels.fetch(
270
+ textChannelRow.channel_id,
271
+ )
272
+ if (textChannel?.isTextBased() && 'send' in textChannel) {
273
+ await textChannel.send({ content: `⚠️ Voice session error: ${error}`, flags: SILENT_MESSAGE_FLAGS })
274
+ }
275
+ } catch (e) {
276
+ voiceLogger.error('Failed to send error to text channel:', e)
277
+ }
278
+ }
279
+ },
280
+ })
281
+
282
+ if (voiceData.genAiWorker) {
283
+ voiceLogger.log('Stopping existing GenAI worker before creating new one')
284
+ await voiceData.genAiWorker.stop()
285
+ }
286
+
287
+ genAiWorker.sendTextInput(
288
+ `<systemMessage>\nsay "Hello boss, how we doing today?"\n</systemMessage>`,
289
+ )
290
+
291
+ voiceData.genAiWorker = genAiWorker
292
+
293
+ const receiver = connection.receiver
294
+
295
+ receiver.speaking.removeAllListeners('start')
296
+
297
+ let speakingSessionCount = 0
298
+
299
+ receiver.speaking.on('start', (userId) => {
300
+ voiceLogger.log(`User ${userId} started speaking`)
301
+
302
+ speakingSessionCount++
303
+ const currentSessionCount = speakingSessionCount
304
+ voiceLogger.log(`Speaking session ${currentSessionCount} started`)
305
+
306
+ const audioStream = receiver.subscribe(userId, {
307
+ end: { behavior: EndBehaviorType.AfterSilence, duration: 500 },
308
+ })
309
+
310
+ const decoder = new prism.opus.Decoder({
311
+ rate: 48000,
312
+ channels: 2,
313
+ frameSize: 960,
314
+ })
315
+
316
+ decoder.on('error', (error) => {
317
+ voiceLogger.error(`Opus decoder error for user ${userId}:`, error)
318
+ })
319
+
320
+ const downsampleTransform = new Transform({
321
+ transform(chunk: Buffer, _encoding, callback) {
322
+ try {
323
+ const downsampled = convertToMono16k(chunk)
324
+ callback(null, downsampled)
325
+ } catch (error) {
326
+ callback(error as Error)
327
+ }
328
+ },
329
+ })
330
+
331
+ const framer = frameMono16khz()
332
+
333
+ const pipeline = audioStream
334
+ .pipe(decoder)
335
+ .pipe(downsampleTransform)
336
+ .pipe(framer)
337
+
338
+ pipeline
339
+ .on('data', (frame: Buffer) => {
340
+ if (currentSessionCount !== speakingSessionCount) {
341
+ return
342
+ }
343
+
344
+ if (!voiceData.genAiWorker) {
345
+ voiceLogger.warn(
346
+ `[VOICE] Received audio frame but no GenAI worker active for guild ${guildId}`,
347
+ )
348
+ return
349
+ }
350
+
351
+ voiceData.userAudioStream?.write(frame)
352
+
353
+ voiceData.genAiWorker.sendRealtimeInput({
354
+ audio: {
355
+ mimeType: 'audio/pcm;rate=16000',
356
+ data: frame.toString('base64'),
357
+ },
358
+ })
359
+ })
360
+ .on('end', () => {
361
+ if (currentSessionCount === speakingSessionCount) {
362
+ voiceLogger.log(
363
+ `User ${userId} stopped speaking (session ${currentSessionCount})`,
364
+ )
365
+ voiceData.genAiWorker?.sendRealtimeInput({
366
+ audioStreamEnd: true,
367
+ })
368
+ } else {
369
+ voiceLogger.log(
370
+ `User ${userId} stopped speaking (session ${currentSessionCount}), but skipping audioStreamEnd because newer session ${speakingSessionCount} exists`,
371
+ )
372
+ }
373
+ })
374
+ .on('error', (error) => {
375
+ voiceLogger.error(`Pipeline error for user ${userId}:`, error)
376
+ })
377
+
378
+ audioStream.on('error', (error) => {
379
+ voiceLogger.error(`Audio stream error for user ${userId}:`, error)
380
+ })
381
+
382
+ downsampleTransform.on('error', (error) => {
383
+ voiceLogger.error(`Downsample transform error for user ${userId}:`, error)
384
+ })
385
+
386
+ framer.on('error', (error) => {
387
+ voiceLogger.error(`Framer error for user ${userId}:`, error)
388
+ })
389
+ })
390
+ }
391
+
392
+ export async function cleanupVoiceConnection(guildId: string) {
393
+ const voiceData = voiceConnections.get(guildId)
394
+ if (!voiceData) return
395
+
396
+ voiceLogger.log(`Starting cleanup for guild ${guildId}`)
397
+
398
+ try {
399
+ if (voiceData.genAiWorker) {
400
+ voiceLogger.log(`Stopping GenAI worker...`)
401
+ await voiceData.genAiWorker.stop()
402
+ voiceLogger.log(`GenAI worker stopped`)
403
+ }
404
+
405
+ if (voiceData.userAudioStream) {
406
+ voiceLogger.log(`Closing user audio stream...`)
407
+ await new Promise<void>((resolve) => {
408
+ voiceData.userAudioStream!.end(() => {
409
+ voiceLogger.log('User audio stream closed')
410
+ resolve()
411
+ })
412
+ setTimeout(resolve, 2000)
413
+ })
414
+ }
415
+
416
+ if (
417
+ voiceData.connection.state.status !== VoiceConnectionStatus.Destroyed
418
+ ) {
419
+ voiceLogger.log(`Destroying voice connection...`)
420
+ voiceData.connection.destroy()
421
+ }
422
+
423
+ voiceConnections.delete(guildId)
424
+ voiceLogger.log(`Cleanup complete for guild ${guildId}`)
425
+ } catch (error) {
426
+ voiceLogger.error(`Error during cleanup for guild ${guildId}:`, error)
427
+ voiceConnections.delete(guildId)
428
+ }
429
+ }
430
+
431
+ export async function processVoiceAttachment({
432
+ message,
433
+ thread,
434
+ projectDirectory,
435
+ isNewThread = false,
436
+ appId,
437
+ currentSessionContext,
438
+ lastSessionContext,
439
+ }: {
440
+ message: Message
441
+ thread: ThreadChannel
442
+ projectDirectory?: string
443
+ isNewThread?: boolean
444
+ appId?: string
445
+ currentSessionContext?: string
446
+ lastSessionContext?: string
447
+ }): Promise<string | null> {
448
+ const audioAttachment = Array.from(message.attachments.values()).find(
449
+ (attachment) => attachment.contentType?.startsWith('audio/'),
450
+ )
451
+
452
+ if (!audioAttachment) return null
453
+
454
+ voiceLogger.log(
455
+ `Detected audio attachment: ${audioAttachment.name} (${audioAttachment.contentType})`,
456
+ )
457
+
458
+ await sendThreadMessage(thread, '🎤 Transcribing voice message...')
459
+
460
+ const audioResponse = await fetch(audioAttachment.url)
461
+ const audioBuffer = Buffer.from(await audioResponse.arrayBuffer())
462
+
463
+ voiceLogger.log(`Downloaded ${audioBuffer.length} bytes, transcribing...`)
464
+
465
+ let transcriptionPrompt = 'Discord voice message transcription'
466
+
467
+ if (projectDirectory) {
468
+ try {
469
+ voiceLogger.log(`Getting project file tree from ${projectDirectory}`)
470
+ const execAsync = promisify(exec)
471
+ const { stdout } = await execAsync('git ls-files | tree --fromfile -a', {
472
+ cwd: projectDirectory,
473
+ })
474
+ const result = stdout
475
+
476
+ if (result) {
477
+ transcriptionPrompt = `Discord voice message transcription. Project file structure:\n${result}\n\nPlease transcribe file names and paths accurately based on this context.`
478
+ voiceLogger.log(`Added project context to transcription prompt`)
479
+ }
480
+ } catch (e) {
481
+ voiceLogger.log(`Could not get project tree:`, e)
482
+ }
483
+ }
484
+
485
+ let geminiApiKey: string | undefined
486
+ if (appId) {
487
+ const apiKeys = getDatabase()
488
+ .prepare('SELECT gemini_api_key FROM bot_api_keys WHERE app_id = ?')
489
+ .get(appId) as { gemini_api_key: string | null } | undefined
490
+
491
+ if (apiKeys?.gemini_api_key) {
492
+ geminiApiKey = apiKeys.gemini_api_key
493
+ }
494
+ }
495
+
496
+ let transcription: string
497
+ try {
498
+ transcription = await transcribeAudio({
499
+ audio: audioBuffer,
500
+ prompt: transcriptionPrompt,
501
+ geminiApiKey,
502
+ directory: projectDirectory,
503
+ currentSessionContext,
504
+ lastSessionContext,
505
+ })
506
+ } catch (error) {
507
+ const errMsg = error instanceof Error ? error.message : String(error)
508
+ voiceLogger.error(`Transcription failed:`, error)
509
+ await sendThreadMessage(thread, `⚠️ Transcription failed: ${errMsg}`)
510
+ return null
511
+ }
512
+
513
+ voiceLogger.log(
514
+ `Transcription successful: "${transcription.slice(0, 50)}${transcription.length > 50 ? '...' : ''}"`,
515
+ )
516
+
517
+ if (isNewThread) {
518
+ const threadName = transcription.replace(/\s+/g, ' ').trim().slice(0, 80)
519
+ if (threadName) {
520
+ try {
521
+ await Promise.race([
522
+ thread.setName(threadName),
523
+ new Promise((resolve) => setTimeout(resolve, 2000)),
524
+ ])
525
+ voiceLogger.log(`Updated thread name to: "${threadName}"`)
526
+ } catch (e) {
527
+ voiceLogger.log(`Could not update thread name:`, e)
528
+ }
529
+ }
530
+ }
531
+
532
+ await sendThreadMessage(
533
+ thread,
534
+ `📝 **Transcribed message:** ${escapeDiscordFormatting(transcription)}`,
535
+ )
536
+ return transcription
537
+ }
538
+
539
+ export function registerVoiceStateHandler({
540
+ discordClient,
541
+ appId,
542
+ }: {
543
+ discordClient: Client
544
+ appId: string
545
+ }) {
546
+ discordClient.on(Events.VoiceStateUpdate, async (oldState: VoiceState, newState: VoiceState) => {
547
+ try {
548
+ const member = newState.member || oldState.member
549
+ if (!member) return
550
+
551
+ const guild = newState.guild || oldState.guild
552
+ const isOwner = member.id === guild.ownerId
553
+ const isAdmin = member.permissions.has(
554
+ PermissionsBitField.Flags.Administrator,
555
+ )
556
+ const canManageServer = member.permissions.has(
557
+ PermissionsBitField.Flags.ManageGuild,
558
+ )
559
+ const hasKimakiRole = member.roles.cache.some(
560
+ (role) => role.name.toLowerCase() === 'kimaki',
561
+ )
562
+
563
+ if (!isOwner && !isAdmin && !canManageServer && !hasKimakiRole) {
564
+ return
565
+ }
566
+
567
+ if (oldState.channelId !== null && newState.channelId === null) {
568
+ voiceLogger.log(
569
+ `Admin user ${member.user.tag} left voice channel: ${oldState.channel?.name}`,
570
+ )
571
+
572
+ const guildId = guild.id
573
+ const voiceData = voiceConnections.get(guildId)
574
+
575
+ if (
576
+ voiceData &&
577
+ voiceData.connection.joinConfig.channelId === oldState.channelId
578
+ ) {
579
+ const voiceChannel = oldState.channel as VoiceChannel
580
+ if (!voiceChannel) return
581
+
582
+ const hasOtherAdmins = voiceChannel.members.some((m) => {
583
+ if (m.id === member.id || m.user.bot) return false
584
+ return (
585
+ m.id === guild.ownerId ||
586
+ m.permissions.has(PermissionsBitField.Flags.Administrator) ||
587
+ m.permissions.has(PermissionsBitField.Flags.ManageGuild) ||
588
+ m.roles.cache.some((role) => role.name.toLowerCase() === 'kimaki')
589
+ )
590
+ })
591
+
592
+ if (!hasOtherAdmins) {
593
+ voiceLogger.log(
594
+ `No other admins in channel, bot leaving voice channel in guild: ${guild.name}`,
595
+ )
596
+
597
+ await cleanupVoiceConnection(guildId)
598
+ } else {
599
+ voiceLogger.log(
600
+ `Other admins still in channel, bot staying in voice channel`,
601
+ )
602
+ }
603
+ }
604
+ return
605
+ }
606
+
607
+ if (
608
+ oldState.channelId !== null &&
609
+ newState.channelId !== null &&
610
+ oldState.channelId !== newState.channelId
611
+ ) {
612
+ voiceLogger.log(
613
+ `Admin user ${member.user.tag} moved from ${oldState.channel?.name} to ${newState.channel?.name}`,
614
+ )
615
+
616
+ const guildId = guild.id
617
+ const voiceData = voiceConnections.get(guildId)
618
+
619
+ if (
620
+ voiceData &&
621
+ voiceData.connection.joinConfig.channelId === oldState.channelId
622
+ ) {
623
+ const oldVoiceChannel = oldState.channel as VoiceChannel
624
+ if (oldVoiceChannel) {
625
+ const hasOtherAdmins = oldVoiceChannel.members.some((m) => {
626
+ if (m.id === member.id || m.user.bot) return false
627
+ return (
628
+ m.id === guild.ownerId ||
629
+ m.permissions.has(PermissionsBitField.Flags.Administrator) ||
630
+ m.permissions.has(PermissionsBitField.Flags.ManageGuild) ||
631
+ m.roles.cache.some((role) => role.name.toLowerCase() === 'kimaki')
632
+ )
633
+ })
634
+
635
+ if (!hasOtherAdmins) {
636
+ voiceLogger.log(
637
+ `Following admin to new channel: ${newState.channel?.name}`,
638
+ )
639
+ const voiceChannel = newState.channel as VoiceChannel
640
+ if (voiceChannel) {
641
+ voiceData.connection.rejoin({
642
+ channelId: voiceChannel.id,
643
+ selfDeaf: false,
644
+ selfMute: false,
645
+ })
646
+ }
647
+ } else {
648
+ voiceLogger.log(
649
+ `Other admins still in old channel, bot staying put`,
650
+ )
651
+ }
652
+ }
653
+ }
654
+ }
655
+
656
+ if (oldState.channelId === null && newState.channelId !== null) {
657
+ voiceLogger.log(
658
+ `Admin user ${member.user.tag} (Owner: ${isOwner}, Admin: ${isAdmin}) joined voice channel: ${newState.channel?.name}`,
659
+ )
660
+ }
661
+
662
+ if (newState.channelId === null) return
663
+
664
+ const voiceChannel = newState.channel as VoiceChannel
665
+ if (!voiceChannel) return
666
+
667
+ const existingVoiceData = voiceConnections.get(newState.guild.id)
668
+ if (
669
+ existingVoiceData &&
670
+ existingVoiceData.connection.state.status !==
671
+ VoiceConnectionStatus.Destroyed
672
+ ) {
673
+ voiceLogger.log(
674
+ `Bot already connected to a voice channel in guild ${newState.guild.name}`,
675
+ )
676
+
677
+ if (
678
+ existingVoiceData.connection.joinConfig.channelId !== voiceChannel.id
679
+ ) {
680
+ voiceLogger.log(
681
+ `Moving bot from channel ${existingVoiceData.connection.joinConfig.channelId} to ${voiceChannel.id}`,
682
+ )
683
+ existingVoiceData.connection.rejoin({
684
+ channelId: voiceChannel.id,
685
+ selfDeaf: false,
686
+ selfMute: false,
687
+ })
688
+ }
689
+ return
690
+ }
691
+
692
+ try {
693
+ voiceLogger.log(
694
+ `Attempting to join voice channel: ${voiceChannel.name} (${voiceChannel.id})`,
695
+ )
696
+
697
+ const connection = joinVoiceChannel({
698
+ channelId: voiceChannel.id,
699
+ guildId: newState.guild.id,
700
+ adapterCreator: newState.guild.voiceAdapterCreator,
701
+ selfDeaf: false,
702
+ debug: true,
703
+ daveEncryption: false,
704
+ selfMute: false,
705
+ })
706
+
707
+ voiceConnections.set(newState.guild.id, { connection })
708
+
709
+ await entersState(connection, VoiceConnectionStatus.Ready, 30_000)
710
+ voiceLogger.log(
711
+ `Successfully joined voice channel: ${voiceChannel.name} in guild: ${newState.guild.name}`,
712
+ )
713
+
714
+ await setupVoiceHandling({
715
+ connection,
716
+ guildId: newState.guild.id,
717
+ channelId: voiceChannel.id,
718
+ appId,
719
+ discordClient,
720
+ })
721
+
722
+ connection.on(VoiceConnectionStatus.Disconnected, async () => {
723
+ voiceLogger.log(
724
+ `Disconnected from voice channel in guild: ${newState.guild.name}`,
725
+ )
726
+ try {
727
+ await Promise.race([
728
+ entersState(connection, VoiceConnectionStatus.Signalling, 5_000),
729
+ entersState(connection, VoiceConnectionStatus.Connecting, 5_000),
730
+ ])
731
+ voiceLogger.log(`Reconnecting to voice channel`)
732
+ } catch (error) {
733
+ voiceLogger.log(`Failed to reconnect, destroying connection`)
734
+ connection.destroy()
735
+ voiceConnections.delete(newState.guild.id)
736
+ }
737
+ })
738
+
739
+ connection.on(VoiceConnectionStatus.Destroyed, async () => {
740
+ voiceLogger.log(
741
+ `Connection destroyed for guild: ${newState.guild.name}`,
742
+ )
743
+ await cleanupVoiceConnection(newState.guild.id)
744
+ })
745
+
746
+ connection.on('error', (error) => {
747
+ voiceLogger.error(
748
+ `Connection error in guild ${newState.guild.name}:`,
749
+ error,
750
+ )
751
+ })
752
+ } catch (error) {
753
+ voiceLogger.error(`Failed to join voice channel:`, error)
754
+ await cleanupVoiceConnection(newState.guild.id)
755
+ }
756
+ } catch (error) {
757
+ voiceLogger.error('Error in voice state update handler:', error)
758
+ }
759
+ })
760
+ }