shuvmaki 0.4.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin.js +70 -0
- package/dist/ai-tool-to-genai.js +210 -0
- package/dist/ai-tool-to-genai.test.js +267 -0
- package/dist/channel-management.js +97 -0
- package/dist/cli.js +709 -0
- package/dist/commands/abort.js +78 -0
- package/dist/commands/add-project.js +98 -0
- package/dist/commands/agent.js +152 -0
- package/dist/commands/ask-question.js +183 -0
- package/dist/commands/create-new-project.js +78 -0
- package/dist/commands/fork.js +186 -0
- package/dist/commands/model.js +313 -0
- package/dist/commands/permissions.js +126 -0
- package/dist/commands/queue.js +129 -0
- package/dist/commands/resume.js +145 -0
- package/dist/commands/session.js +142 -0
- package/dist/commands/share.js +80 -0
- package/dist/commands/types.js +2 -0
- package/dist/commands/undo-redo.js +161 -0
- package/dist/commands/user-command.js +145 -0
- package/dist/database.js +184 -0
- package/dist/discord-bot.js +384 -0
- package/dist/discord-utils.js +217 -0
- package/dist/escape-backticks.test.js +410 -0
- package/dist/format-tables.js +96 -0
- package/dist/format-tables.test.js +418 -0
- package/dist/genai-worker-wrapper.js +109 -0
- package/dist/genai-worker.js +297 -0
- package/dist/genai.js +232 -0
- package/dist/interaction-handler.js +144 -0
- package/dist/logger.js +51 -0
- package/dist/markdown.js +310 -0
- package/dist/markdown.test.js +262 -0
- package/dist/message-formatting.js +273 -0
- package/dist/message-formatting.test.js +73 -0
- package/dist/openai-realtime.js +228 -0
- package/dist/opencode.js +216 -0
- package/dist/session-handler.js +580 -0
- package/dist/system-message.js +61 -0
- package/dist/tools.js +356 -0
- package/dist/utils.js +85 -0
- package/dist/voice-handler.js +541 -0
- package/dist/voice.js +314 -0
- package/dist/worker-types.js +4 -0
- package/dist/xml.js +92 -0
- package/dist/xml.test.js +32 -0
- package/package.json +60 -0
- package/src/__snapshots__/compact-session-context-no-system.md +35 -0
- package/src/__snapshots__/compact-session-context.md +47 -0
- package/src/ai-tool-to-genai.test.ts +296 -0
- package/src/ai-tool-to-genai.ts +255 -0
- package/src/channel-management.ts +161 -0
- package/src/cli.ts +1010 -0
- package/src/commands/abort.ts +94 -0
- package/src/commands/add-project.ts +139 -0
- package/src/commands/agent.ts +201 -0
- package/src/commands/ask-question.ts +276 -0
- package/src/commands/create-new-project.ts +111 -0
- package/src/commands/fork.ts +257 -0
- package/src/commands/model.ts +402 -0
- package/src/commands/permissions.ts +146 -0
- package/src/commands/queue.ts +181 -0
- package/src/commands/resume.ts +230 -0
- package/src/commands/session.ts +184 -0
- package/src/commands/share.ts +96 -0
- package/src/commands/types.ts +25 -0
- package/src/commands/undo-redo.ts +213 -0
- package/src/commands/user-command.ts +178 -0
- package/src/database.ts +220 -0
- package/src/discord-bot.ts +513 -0
- package/src/discord-utils.ts +282 -0
- package/src/escape-backticks.test.ts +447 -0
- package/src/format-tables.test.ts +440 -0
- package/src/format-tables.ts +110 -0
- package/src/genai-worker-wrapper.ts +160 -0
- package/src/genai-worker.ts +366 -0
- package/src/genai.ts +321 -0
- package/src/interaction-handler.ts +187 -0
- package/src/logger.ts +57 -0
- package/src/markdown.test.ts +358 -0
- package/src/markdown.ts +365 -0
- package/src/message-formatting.test.ts +81 -0
- package/src/message-formatting.ts +340 -0
- package/src/openai-realtime.ts +363 -0
- package/src/opencode.ts +277 -0
- package/src/session-handler.ts +758 -0
- package/src/system-message.ts +62 -0
- package/src/tools.ts +428 -0
- package/src/utils.ts +118 -0
- package/src/voice-handler.ts +760 -0
- package/src/voice.ts +432 -0
- package/src/worker-types.ts +66 -0
- package/src/xml.test.ts +37 -0
- package/src/xml.ts +121 -0
|
@@ -0,0 +1,760 @@
|
|
|
1
|
+
// Discord voice channel connection and audio stream handler.
|
|
2
|
+
// Manages joining/leaving voice channels, captures user audio, resamples to 16kHz,
|
|
3
|
+
// and routes audio to the GenAI worker for real-time voice assistant interactions.
|
|
4
|
+
|
|
5
|
+
import {
|
|
6
|
+
VoiceConnectionStatus,
|
|
7
|
+
EndBehaviorType,
|
|
8
|
+
joinVoiceChannel,
|
|
9
|
+
entersState,
|
|
10
|
+
type VoiceConnection,
|
|
11
|
+
} from '@discordjs/voice'
|
|
12
|
+
import { exec } from 'node:child_process'
|
|
13
|
+
import fs, { createWriteStream } from 'node:fs'
|
|
14
|
+
import { mkdir } from 'node:fs/promises'
|
|
15
|
+
import path from 'node:path'
|
|
16
|
+
import { promisify } from 'node:util'
|
|
17
|
+
import { Transform, type TransformCallback } from 'node:stream'
|
|
18
|
+
import * as prism from 'prism-media'
|
|
19
|
+
import dedent from 'string-dedent'
|
|
20
|
+
import {
|
|
21
|
+
PermissionsBitField,
|
|
22
|
+
Events,
|
|
23
|
+
type Client,
|
|
24
|
+
type Message,
|
|
25
|
+
type ThreadChannel,
|
|
26
|
+
type VoiceChannel,
|
|
27
|
+
type VoiceState,
|
|
28
|
+
} from 'discord.js'
|
|
29
|
+
import { createGenAIWorker, type GenAIWorker } from './genai-worker-wrapper.js'
|
|
30
|
+
import { getDatabase } from './database.js'
|
|
31
|
+
import { sendThreadMessage, escapeDiscordFormatting, SILENT_MESSAGE_FLAGS } from './discord-utils.js'
|
|
32
|
+
import { transcribeAudio } from './voice.js'
|
|
33
|
+
import { createLogger } from './logger.js'
|
|
34
|
+
|
|
35
|
+
const voiceLogger = createLogger('VOICE')
|
|
36
|
+
|
|
37
|
+
export type VoiceConnectionData = {
|
|
38
|
+
connection: VoiceConnection
|
|
39
|
+
genAiWorker?: GenAIWorker
|
|
40
|
+
userAudioStream?: fs.WriteStream
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export const voiceConnections = new Map<string, VoiceConnectionData>()
|
|
44
|
+
|
|
45
|
+
export function convertToMono16k(buffer: Buffer): Buffer {
|
|
46
|
+
const inputSampleRate = 48000
|
|
47
|
+
const outputSampleRate = 16000
|
|
48
|
+
const ratio = inputSampleRate / outputSampleRate
|
|
49
|
+
const inputChannels = 2
|
|
50
|
+
const bytesPerSample = 2
|
|
51
|
+
|
|
52
|
+
const inputSamples = buffer.length / (bytesPerSample * inputChannels)
|
|
53
|
+
const outputSamples = Math.floor(inputSamples / ratio)
|
|
54
|
+
const outputBuffer = Buffer.alloc(outputSamples * bytesPerSample)
|
|
55
|
+
|
|
56
|
+
for (let i = 0; i < outputSamples; i++) {
|
|
57
|
+
const inputIndex = Math.floor(i * ratio) * inputChannels * bytesPerSample
|
|
58
|
+
|
|
59
|
+
if (inputIndex + 3 < buffer.length) {
|
|
60
|
+
const leftSample = buffer.readInt16LE(inputIndex)
|
|
61
|
+
const rightSample = buffer.readInt16LE(inputIndex + 2)
|
|
62
|
+
const monoSample = Math.round((leftSample + rightSample) / 2)
|
|
63
|
+
|
|
64
|
+
outputBuffer.writeInt16LE(monoSample, i * bytesPerSample)
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
return outputBuffer
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export async function createUserAudioLogStream(
|
|
72
|
+
guildId: string,
|
|
73
|
+
channelId: string,
|
|
74
|
+
): Promise<fs.WriteStream | undefined> {
|
|
75
|
+
if (!process.env.DEBUG) return undefined
|
|
76
|
+
|
|
77
|
+
const timestamp = new Date().toISOString().replace(/[:.]/g, '-')
|
|
78
|
+
const audioDir = path.join(
|
|
79
|
+
process.cwd(),
|
|
80
|
+
'discord-audio-logs',
|
|
81
|
+
guildId,
|
|
82
|
+
channelId,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
try {
|
|
86
|
+
await mkdir(audioDir, { recursive: true })
|
|
87
|
+
|
|
88
|
+
const inputFileName = `user_${timestamp}.16.pcm`
|
|
89
|
+
const inputFilePath = path.join(audioDir, inputFileName)
|
|
90
|
+
const inputAudioStream = createWriteStream(inputFilePath)
|
|
91
|
+
voiceLogger.log(`Created user audio log: ${inputFilePath}`)
|
|
92
|
+
|
|
93
|
+
return inputAudioStream
|
|
94
|
+
} catch (error) {
|
|
95
|
+
voiceLogger.error('Failed to create audio log directory:', error)
|
|
96
|
+
return undefined
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
export function frameMono16khz(): Transform {
|
|
101
|
+
const FRAME_BYTES =
|
|
102
|
+
(100 * 16_000 * 1 * 2) / 1000
|
|
103
|
+
let stash: Buffer = Buffer.alloc(0)
|
|
104
|
+
let offset = 0
|
|
105
|
+
|
|
106
|
+
return new Transform({
|
|
107
|
+
readableObjectMode: false,
|
|
108
|
+
writableObjectMode: false,
|
|
109
|
+
|
|
110
|
+
transform(chunk: Buffer, _enc: BufferEncoding, cb: TransformCallback) {
|
|
111
|
+
if (offset > 0) {
|
|
112
|
+
stash = stash.subarray(offset)
|
|
113
|
+
offset = 0
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
stash = stash.length ? Buffer.concat([stash, chunk]) : chunk
|
|
117
|
+
|
|
118
|
+
while (stash.length - offset >= FRAME_BYTES) {
|
|
119
|
+
this.push(stash.subarray(offset, offset + FRAME_BYTES))
|
|
120
|
+
offset += FRAME_BYTES
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if (offset === stash.length) {
|
|
124
|
+
stash = Buffer.alloc(0)
|
|
125
|
+
offset = 0
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
cb()
|
|
129
|
+
},
|
|
130
|
+
|
|
131
|
+
flush(cb: TransformCallback) {
|
|
132
|
+
stash = Buffer.alloc(0)
|
|
133
|
+
offset = 0
|
|
134
|
+
cb()
|
|
135
|
+
},
|
|
136
|
+
})
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
export async function setupVoiceHandling({
|
|
140
|
+
connection,
|
|
141
|
+
guildId,
|
|
142
|
+
channelId,
|
|
143
|
+
appId,
|
|
144
|
+
discordClient,
|
|
145
|
+
}: {
|
|
146
|
+
connection: VoiceConnection
|
|
147
|
+
guildId: string
|
|
148
|
+
channelId: string
|
|
149
|
+
appId: string
|
|
150
|
+
discordClient: Client
|
|
151
|
+
}) {
|
|
152
|
+
voiceLogger.log(
|
|
153
|
+
`Setting up voice handling for guild ${guildId}, channel ${channelId}`,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
const channelDirRow = getDatabase()
|
|
157
|
+
.prepare(
|
|
158
|
+
'SELECT directory FROM channel_directories WHERE channel_id = ? AND channel_type = ?',
|
|
159
|
+
)
|
|
160
|
+
.get(channelId, 'voice') as { directory: string } | undefined
|
|
161
|
+
|
|
162
|
+
if (!channelDirRow) {
|
|
163
|
+
voiceLogger.log(
|
|
164
|
+
`Voice channel ${channelId} has no associated directory, skipping setup`,
|
|
165
|
+
)
|
|
166
|
+
return
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const directory = channelDirRow.directory
|
|
170
|
+
voiceLogger.log(`Found directory for voice channel: ${directory}`)
|
|
171
|
+
|
|
172
|
+
const voiceData = voiceConnections.get(guildId)
|
|
173
|
+
if (!voiceData) {
|
|
174
|
+
voiceLogger.error(`No voice data found for guild ${guildId}`)
|
|
175
|
+
return
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
voiceData.userAudioStream = await createUserAudioLogStream(guildId, channelId)
|
|
179
|
+
|
|
180
|
+
const apiKeys = getDatabase()
|
|
181
|
+
.prepare('SELECT gemini_api_key FROM bot_api_keys WHERE app_id = ?')
|
|
182
|
+
.get(appId) as { gemini_api_key: string | null } | undefined
|
|
183
|
+
|
|
184
|
+
const genAiWorker = await createGenAIWorker({
|
|
185
|
+
directory,
|
|
186
|
+
guildId,
|
|
187
|
+
channelId,
|
|
188
|
+
appId,
|
|
189
|
+
geminiApiKey: apiKeys?.gemini_api_key,
|
|
190
|
+
systemMessage: dedent`
|
|
191
|
+
You are Kimaki, an AI similar to Jarvis: you help your user (an engineer) controlling his coding agent, just like Jarvis controls Ironman armor and machines. Speak fast.
|
|
192
|
+
|
|
193
|
+
You should talk like Jarvis, British accent, satirical, joking and calm. Be short and concise. Speak fast.
|
|
194
|
+
|
|
195
|
+
After tool calls give a super short summary of the assistant message, you should say what the assistant message writes.
|
|
196
|
+
|
|
197
|
+
Before starting a new session ask for confirmation if it is not clear if the user finished describing it. ask "message ready, send?"
|
|
198
|
+
|
|
199
|
+
NEVER repeat the whole tool call parameters or message.
|
|
200
|
+
|
|
201
|
+
Your job is to manage many opencode agent chat instances. Opencode is the agent used to write the code, it is similar to Claude Code.
|
|
202
|
+
|
|
203
|
+
For everything the user asks it is implicit that the user is asking for you to proxy the requests to opencode sessions.
|
|
204
|
+
|
|
205
|
+
You can
|
|
206
|
+
- start new chats on a given project
|
|
207
|
+
- read the chats to report progress to the user
|
|
208
|
+
- submit messages to the chat
|
|
209
|
+
- list files for a given projects, so you can translate imprecise user prompts to precise messages that mention filename paths using @
|
|
210
|
+
|
|
211
|
+
Common patterns
|
|
212
|
+
- to get the last session use the listChats tool
|
|
213
|
+
- when user asks you to do something you submit a new session to do it. it's implicit that you proxy requests to the agents chat!
|
|
214
|
+
- when you submit a session assume the session will take a minute or 2 to complete the task
|
|
215
|
+
|
|
216
|
+
Rules
|
|
217
|
+
- never spell files by mentioning dots, letters, etc. instead give a brief description of the filename
|
|
218
|
+
- NEVER spell hashes or IDs
|
|
219
|
+
- never read session ids or other ids
|
|
220
|
+
|
|
221
|
+
Your voice is calm and monotone, NEVER excited and goofy. But you speak without jargon or bs and do veiled short jokes.
|
|
222
|
+
You speak like you knew something other don't. You are cool and cold.
|
|
223
|
+
`,
|
|
224
|
+
onAssistantOpusPacket(packet) {
|
|
225
|
+
if (connection.state.status !== VoiceConnectionStatus.Ready) {
|
|
226
|
+
voiceLogger.log('Skipping packet: connection not ready')
|
|
227
|
+
return
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
try {
|
|
231
|
+
connection.setSpeaking(true)
|
|
232
|
+
connection.playOpusPacket(Buffer.from(packet))
|
|
233
|
+
} catch (error) {
|
|
234
|
+
voiceLogger.error('Error sending packet:', error)
|
|
235
|
+
}
|
|
236
|
+
},
|
|
237
|
+
onAssistantStartSpeaking() {
|
|
238
|
+
voiceLogger.log('Assistant started speaking')
|
|
239
|
+
connection.setSpeaking(true)
|
|
240
|
+
},
|
|
241
|
+
onAssistantStopSpeaking() {
|
|
242
|
+
voiceLogger.log('Assistant stopped speaking (natural finish)')
|
|
243
|
+
connection.setSpeaking(false)
|
|
244
|
+
},
|
|
245
|
+
onAssistantInterruptSpeaking() {
|
|
246
|
+
voiceLogger.log('Assistant interrupted while speaking')
|
|
247
|
+
genAiWorker.interrupt()
|
|
248
|
+
connection.setSpeaking(false)
|
|
249
|
+
},
|
|
250
|
+
onToolCallCompleted(params) {
|
|
251
|
+
const text = params.error
|
|
252
|
+
? `<systemMessage>\nThe coding agent encountered an error while processing session ${params.sessionId}: ${params.error?.message || String(params.error)}\n</systemMessage>`
|
|
253
|
+
: `<systemMessage>\nThe coding agent finished working on session ${params.sessionId}\n\nHere's what the assistant wrote:\n${params.markdown}\n</systemMessage>`
|
|
254
|
+
|
|
255
|
+
genAiWorker.sendTextInput(text)
|
|
256
|
+
},
|
|
257
|
+
async onError(error) {
|
|
258
|
+
voiceLogger.error('GenAI worker error:', error)
|
|
259
|
+
const textChannelRow = getDatabase()
|
|
260
|
+
.prepare(
|
|
261
|
+
`SELECT cd2.channel_id FROM channel_directories cd1
|
|
262
|
+
JOIN channel_directories cd2 ON cd1.directory = cd2.directory
|
|
263
|
+
WHERE cd1.channel_id = ? AND cd1.channel_type = 'voice' AND cd2.channel_type = 'text'`,
|
|
264
|
+
)
|
|
265
|
+
.get(channelId) as { channel_id: string } | undefined
|
|
266
|
+
|
|
267
|
+
if (textChannelRow) {
|
|
268
|
+
try {
|
|
269
|
+
const textChannel = await discordClient.channels.fetch(
|
|
270
|
+
textChannelRow.channel_id,
|
|
271
|
+
)
|
|
272
|
+
if (textChannel?.isTextBased() && 'send' in textChannel) {
|
|
273
|
+
await textChannel.send({ content: `⚠️ Voice session error: ${error}`, flags: SILENT_MESSAGE_FLAGS })
|
|
274
|
+
}
|
|
275
|
+
} catch (e) {
|
|
276
|
+
voiceLogger.error('Failed to send error to text channel:', e)
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
},
|
|
280
|
+
})
|
|
281
|
+
|
|
282
|
+
if (voiceData.genAiWorker) {
|
|
283
|
+
voiceLogger.log('Stopping existing GenAI worker before creating new one')
|
|
284
|
+
await voiceData.genAiWorker.stop()
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
genAiWorker.sendTextInput(
|
|
288
|
+
`<systemMessage>\nsay "Hello boss, how we doing today?"\n</systemMessage>`,
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
voiceData.genAiWorker = genAiWorker
|
|
292
|
+
|
|
293
|
+
const receiver = connection.receiver
|
|
294
|
+
|
|
295
|
+
receiver.speaking.removeAllListeners('start')
|
|
296
|
+
|
|
297
|
+
let speakingSessionCount = 0
|
|
298
|
+
|
|
299
|
+
receiver.speaking.on('start', (userId) => {
|
|
300
|
+
voiceLogger.log(`User ${userId} started speaking`)
|
|
301
|
+
|
|
302
|
+
speakingSessionCount++
|
|
303
|
+
const currentSessionCount = speakingSessionCount
|
|
304
|
+
voiceLogger.log(`Speaking session ${currentSessionCount} started`)
|
|
305
|
+
|
|
306
|
+
const audioStream = receiver.subscribe(userId, {
|
|
307
|
+
end: { behavior: EndBehaviorType.AfterSilence, duration: 500 },
|
|
308
|
+
})
|
|
309
|
+
|
|
310
|
+
const decoder = new prism.opus.Decoder({
|
|
311
|
+
rate: 48000,
|
|
312
|
+
channels: 2,
|
|
313
|
+
frameSize: 960,
|
|
314
|
+
})
|
|
315
|
+
|
|
316
|
+
decoder.on('error', (error) => {
|
|
317
|
+
voiceLogger.error(`Opus decoder error for user ${userId}:`, error)
|
|
318
|
+
})
|
|
319
|
+
|
|
320
|
+
const downsampleTransform = new Transform({
|
|
321
|
+
transform(chunk: Buffer, _encoding, callback) {
|
|
322
|
+
try {
|
|
323
|
+
const downsampled = convertToMono16k(chunk)
|
|
324
|
+
callback(null, downsampled)
|
|
325
|
+
} catch (error) {
|
|
326
|
+
callback(error as Error)
|
|
327
|
+
}
|
|
328
|
+
},
|
|
329
|
+
})
|
|
330
|
+
|
|
331
|
+
const framer = frameMono16khz()
|
|
332
|
+
|
|
333
|
+
const pipeline = audioStream
|
|
334
|
+
.pipe(decoder)
|
|
335
|
+
.pipe(downsampleTransform)
|
|
336
|
+
.pipe(framer)
|
|
337
|
+
|
|
338
|
+
pipeline
|
|
339
|
+
.on('data', (frame: Buffer) => {
|
|
340
|
+
if (currentSessionCount !== speakingSessionCount) {
|
|
341
|
+
return
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
if (!voiceData.genAiWorker) {
|
|
345
|
+
voiceLogger.warn(
|
|
346
|
+
`[VOICE] Received audio frame but no GenAI worker active for guild ${guildId}`,
|
|
347
|
+
)
|
|
348
|
+
return
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
voiceData.userAudioStream?.write(frame)
|
|
352
|
+
|
|
353
|
+
voiceData.genAiWorker.sendRealtimeInput({
|
|
354
|
+
audio: {
|
|
355
|
+
mimeType: 'audio/pcm;rate=16000',
|
|
356
|
+
data: frame.toString('base64'),
|
|
357
|
+
},
|
|
358
|
+
})
|
|
359
|
+
})
|
|
360
|
+
.on('end', () => {
|
|
361
|
+
if (currentSessionCount === speakingSessionCount) {
|
|
362
|
+
voiceLogger.log(
|
|
363
|
+
`User ${userId} stopped speaking (session ${currentSessionCount})`,
|
|
364
|
+
)
|
|
365
|
+
voiceData.genAiWorker?.sendRealtimeInput({
|
|
366
|
+
audioStreamEnd: true,
|
|
367
|
+
})
|
|
368
|
+
} else {
|
|
369
|
+
voiceLogger.log(
|
|
370
|
+
`User ${userId} stopped speaking (session ${currentSessionCount}), but skipping audioStreamEnd because newer session ${speakingSessionCount} exists`,
|
|
371
|
+
)
|
|
372
|
+
}
|
|
373
|
+
})
|
|
374
|
+
.on('error', (error) => {
|
|
375
|
+
voiceLogger.error(`Pipeline error for user ${userId}:`, error)
|
|
376
|
+
})
|
|
377
|
+
|
|
378
|
+
audioStream.on('error', (error) => {
|
|
379
|
+
voiceLogger.error(`Audio stream error for user ${userId}:`, error)
|
|
380
|
+
})
|
|
381
|
+
|
|
382
|
+
downsampleTransform.on('error', (error) => {
|
|
383
|
+
voiceLogger.error(`Downsample transform error for user ${userId}:`, error)
|
|
384
|
+
})
|
|
385
|
+
|
|
386
|
+
framer.on('error', (error) => {
|
|
387
|
+
voiceLogger.error(`Framer error for user ${userId}:`, error)
|
|
388
|
+
})
|
|
389
|
+
})
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
export async function cleanupVoiceConnection(guildId: string) {
|
|
393
|
+
const voiceData = voiceConnections.get(guildId)
|
|
394
|
+
if (!voiceData) return
|
|
395
|
+
|
|
396
|
+
voiceLogger.log(`Starting cleanup for guild ${guildId}`)
|
|
397
|
+
|
|
398
|
+
try {
|
|
399
|
+
if (voiceData.genAiWorker) {
|
|
400
|
+
voiceLogger.log(`Stopping GenAI worker...`)
|
|
401
|
+
await voiceData.genAiWorker.stop()
|
|
402
|
+
voiceLogger.log(`GenAI worker stopped`)
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
if (voiceData.userAudioStream) {
|
|
406
|
+
voiceLogger.log(`Closing user audio stream...`)
|
|
407
|
+
await new Promise<void>((resolve) => {
|
|
408
|
+
voiceData.userAudioStream!.end(() => {
|
|
409
|
+
voiceLogger.log('User audio stream closed')
|
|
410
|
+
resolve()
|
|
411
|
+
})
|
|
412
|
+
setTimeout(resolve, 2000)
|
|
413
|
+
})
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
if (
|
|
417
|
+
voiceData.connection.state.status !== VoiceConnectionStatus.Destroyed
|
|
418
|
+
) {
|
|
419
|
+
voiceLogger.log(`Destroying voice connection...`)
|
|
420
|
+
voiceData.connection.destroy()
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
voiceConnections.delete(guildId)
|
|
424
|
+
voiceLogger.log(`Cleanup complete for guild ${guildId}`)
|
|
425
|
+
} catch (error) {
|
|
426
|
+
voiceLogger.error(`Error during cleanup for guild ${guildId}:`, error)
|
|
427
|
+
voiceConnections.delete(guildId)
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
export async function processVoiceAttachment({
|
|
432
|
+
message,
|
|
433
|
+
thread,
|
|
434
|
+
projectDirectory,
|
|
435
|
+
isNewThread = false,
|
|
436
|
+
appId,
|
|
437
|
+
currentSessionContext,
|
|
438
|
+
lastSessionContext,
|
|
439
|
+
}: {
|
|
440
|
+
message: Message
|
|
441
|
+
thread: ThreadChannel
|
|
442
|
+
projectDirectory?: string
|
|
443
|
+
isNewThread?: boolean
|
|
444
|
+
appId?: string
|
|
445
|
+
currentSessionContext?: string
|
|
446
|
+
lastSessionContext?: string
|
|
447
|
+
}): Promise<string | null> {
|
|
448
|
+
const audioAttachment = Array.from(message.attachments.values()).find(
|
|
449
|
+
(attachment) => attachment.contentType?.startsWith('audio/'),
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
if (!audioAttachment) return null
|
|
453
|
+
|
|
454
|
+
voiceLogger.log(
|
|
455
|
+
`Detected audio attachment: ${audioAttachment.name} (${audioAttachment.contentType})`,
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
await sendThreadMessage(thread, '🎤 Transcribing voice message...')
|
|
459
|
+
|
|
460
|
+
const audioResponse = await fetch(audioAttachment.url)
|
|
461
|
+
const audioBuffer = Buffer.from(await audioResponse.arrayBuffer())
|
|
462
|
+
|
|
463
|
+
voiceLogger.log(`Downloaded ${audioBuffer.length} bytes, transcribing...`)
|
|
464
|
+
|
|
465
|
+
let transcriptionPrompt = 'Discord voice message transcription'
|
|
466
|
+
|
|
467
|
+
if (projectDirectory) {
|
|
468
|
+
try {
|
|
469
|
+
voiceLogger.log(`Getting project file tree from ${projectDirectory}`)
|
|
470
|
+
const execAsync = promisify(exec)
|
|
471
|
+
const { stdout } = await execAsync('git ls-files | tree --fromfile -a', {
|
|
472
|
+
cwd: projectDirectory,
|
|
473
|
+
})
|
|
474
|
+
const result = stdout
|
|
475
|
+
|
|
476
|
+
if (result) {
|
|
477
|
+
transcriptionPrompt = `Discord voice message transcription. Project file structure:\n${result}\n\nPlease transcribe file names and paths accurately based on this context.`
|
|
478
|
+
voiceLogger.log(`Added project context to transcription prompt`)
|
|
479
|
+
}
|
|
480
|
+
} catch (e) {
|
|
481
|
+
voiceLogger.log(`Could not get project tree:`, e)
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
let geminiApiKey: string | undefined
|
|
486
|
+
if (appId) {
|
|
487
|
+
const apiKeys = getDatabase()
|
|
488
|
+
.prepare('SELECT gemini_api_key FROM bot_api_keys WHERE app_id = ?')
|
|
489
|
+
.get(appId) as { gemini_api_key: string | null } | undefined
|
|
490
|
+
|
|
491
|
+
if (apiKeys?.gemini_api_key) {
|
|
492
|
+
geminiApiKey = apiKeys.gemini_api_key
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
let transcription: string
|
|
497
|
+
try {
|
|
498
|
+
transcription = await transcribeAudio({
|
|
499
|
+
audio: audioBuffer,
|
|
500
|
+
prompt: transcriptionPrompt,
|
|
501
|
+
geminiApiKey,
|
|
502
|
+
directory: projectDirectory,
|
|
503
|
+
currentSessionContext,
|
|
504
|
+
lastSessionContext,
|
|
505
|
+
})
|
|
506
|
+
} catch (error) {
|
|
507
|
+
const errMsg = error instanceof Error ? error.message : String(error)
|
|
508
|
+
voiceLogger.error(`Transcription failed:`, error)
|
|
509
|
+
await sendThreadMessage(thread, `⚠️ Transcription failed: ${errMsg}`)
|
|
510
|
+
return null
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
voiceLogger.log(
|
|
514
|
+
`Transcription successful: "${transcription.slice(0, 50)}${transcription.length > 50 ? '...' : ''}"`,
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
if (isNewThread) {
|
|
518
|
+
const threadName = transcription.replace(/\s+/g, ' ').trim().slice(0, 80)
|
|
519
|
+
if (threadName) {
|
|
520
|
+
try {
|
|
521
|
+
await Promise.race([
|
|
522
|
+
thread.setName(threadName),
|
|
523
|
+
new Promise((resolve) => setTimeout(resolve, 2000)),
|
|
524
|
+
])
|
|
525
|
+
voiceLogger.log(`Updated thread name to: "${threadName}"`)
|
|
526
|
+
} catch (e) {
|
|
527
|
+
voiceLogger.log(`Could not update thread name:`, e)
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
await sendThreadMessage(
|
|
533
|
+
thread,
|
|
534
|
+
`📝 **Transcribed message:** ${escapeDiscordFormatting(transcription)}`,
|
|
535
|
+
)
|
|
536
|
+
return transcription
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
export function registerVoiceStateHandler({
|
|
540
|
+
discordClient,
|
|
541
|
+
appId,
|
|
542
|
+
}: {
|
|
543
|
+
discordClient: Client
|
|
544
|
+
appId: string
|
|
545
|
+
}) {
|
|
546
|
+
discordClient.on(Events.VoiceStateUpdate, async (oldState: VoiceState, newState: VoiceState) => {
|
|
547
|
+
try {
|
|
548
|
+
const member = newState.member || oldState.member
|
|
549
|
+
if (!member) return
|
|
550
|
+
|
|
551
|
+
const guild = newState.guild || oldState.guild
|
|
552
|
+
const isOwner = member.id === guild.ownerId
|
|
553
|
+
const isAdmin = member.permissions.has(
|
|
554
|
+
PermissionsBitField.Flags.Administrator,
|
|
555
|
+
)
|
|
556
|
+
const canManageServer = member.permissions.has(
|
|
557
|
+
PermissionsBitField.Flags.ManageGuild,
|
|
558
|
+
)
|
|
559
|
+
const hasKimakiRole = member.roles.cache.some(
|
|
560
|
+
(role) => role.name.toLowerCase() === 'kimaki',
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
if (!isOwner && !isAdmin && !canManageServer && !hasKimakiRole) {
|
|
564
|
+
return
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
if (oldState.channelId !== null && newState.channelId === null) {
|
|
568
|
+
voiceLogger.log(
|
|
569
|
+
`Admin user ${member.user.tag} left voice channel: ${oldState.channel?.name}`,
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
const guildId = guild.id
|
|
573
|
+
const voiceData = voiceConnections.get(guildId)
|
|
574
|
+
|
|
575
|
+
if (
|
|
576
|
+
voiceData &&
|
|
577
|
+
voiceData.connection.joinConfig.channelId === oldState.channelId
|
|
578
|
+
) {
|
|
579
|
+
const voiceChannel = oldState.channel as VoiceChannel
|
|
580
|
+
if (!voiceChannel) return
|
|
581
|
+
|
|
582
|
+
const hasOtherAdmins = voiceChannel.members.some((m) => {
|
|
583
|
+
if (m.id === member.id || m.user.bot) return false
|
|
584
|
+
return (
|
|
585
|
+
m.id === guild.ownerId ||
|
|
586
|
+
m.permissions.has(PermissionsBitField.Flags.Administrator) ||
|
|
587
|
+
m.permissions.has(PermissionsBitField.Flags.ManageGuild) ||
|
|
588
|
+
m.roles.cache.some((role) => role.name.toLowerCase() === 'kimaki')
|
|
589
|
+
)
|
|
590
|
+
})
|
|
591
|
+
|
|
592
|
+
if (!hasOtherAdmins) {
|
|
593
|
+
voiceLogger.log(
|
|
594
|
+
`No other admins in channel, bot leaving voice channel in guild: ${guild.name}`,
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
await cleanupVoiceConnection(guildId)
|
|
598
|
+
} else {
|
|
599
|
+
voiceLogger.log(
|
|
600
|
+
`Other admins still in channel, bot staying in voice channel`,
|
|
601
|
+
)
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
return
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
if (
|
|
608
|
+
oldState.channelId !== null &&
|
|
609
|
+
newState.channelId !== null &&
|
|
610
|
+
oldState.channelId !== newState.channelId
|
|
611
|
+
) {
|
|
612
|
+
voiceLogger.log(
|
|
613
|
+
`Admin user ${member.user.tag} moved from ${oldState.channel?.name} to ${newState.channel?.name}`,
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
const guildId = guild.id
|
|
617
|
+
const voiceData = voiceConnections.get(guildId)
|
|
618
|
+
|
|
619
|
+
if (
|
|
620
|
+
voiceData &&
|
|
621
|
+
voiceData.connection.joinConfig.channelId === oldState.channelId
|
|
622
|
+
) {
|
|
623
|
+
const oldVoiceChannel = oldState.channel as VoiceChannel
|
|
624
|
+
if (oldVoiceChannel) {
|
|
625
|
+
const hasOtherAdmins = oldVoiceChannel.members.some((m) => {
|
|
626
|
+
if (m.id === member.id || m.user.bot) return false
|
|
627
|
+
return (
|
|
628
|
+
m.id === guild.ownerId ||
|
|
629
|
+
m.permissions.has(PermissionsBitField.Flags.Administrator) ||
|
|
630
|
+
m.permissions.has(PermissionsBitField.Flags.ManageGuild) ||
|
|
631
|
+
m.roles.cache.some((role) => role.name.toLowerCase() === 'kimaki')
|
|
632
|
+
)
|
|
633
|
+
})
|
|
634
|
+
|
|
635
|
+
if (!hasOtherAdmins) {
|
|
636
|
+
voiceLogger.log(
|
|
637
|
+
`Following admin to new channel: ${newState.channel?.name}`,
|
|
638
|
+
)
|
|
639
|
+
const voiceChannel = newState.channel as VoiceChannel
|
|
640
|
+
if (voiceChannel) {
|
|
641
|
+
voiceData.connection.rejoin({
|
|
642
|
+
channelId: voiceChannel.id,
|
|
643
|
+
selfDeaf: false,
|
|
644
|
+
selfMute: false,
|
|
645
|
+
})
|
|
646
|
+
}
|
|
647
|
+
} else {
|
|
648
|
+
voiceLogger.log(
|
|
649
|
+
`Other admins still in old channel, bot staying put`,
|
|
650
|
+
)
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
if (oldState.channelId === null && newState.channelId !== null) {
|
|
657
|
+
voiceLogger.log(
|
|
658
|
+
`Admin user ${member.user.tag} (Owner: ${isOwner}, Admin: ${isAdmin}) joined voice channel: ${newState.channel?.name}`,
|
|
659
|
+
)
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
if (newState.channelId === null) return
|
|
663
|
+
|
|
664
|
+
const voiceChannel = newState.channel as VoiceChannel
|
|
665
|
+
if (!voiceChannel) return
|
|
666
|
+
|
|
667
|
+
const existingVoiceData = voiceConnections.get(newState.guild.id)
|
|
668
|
+
if (
|
|
669
|
+
existingVoiceData &&
|
|
670
|
+
existingVoiceData.connection.state.status !==
|
|
671
|
+
VoiceConnectionStatus.Destroyed
|
|
672
|
+
) {
|
|
673
|
+
voiceLogger.log(
|
|
674
|
+
`Bot already connected to a voice channel in guild ${newState.guild.name}`,
|
|
675
|
+
)
|
|
676
|
+
|
|
677
|
+
if (
|
|
678
|
+
existingVoiceData.connection.joinConfig.channelId !== voiceChannel.id
|
|
679
|
+
) {
|
|
680
|
+
voiceLogger.log(
|
|
681
|
+
`Moving bot from channel ${existingVoiceData.connection.joinConfig.channelId} to ${voiceChannel.id}`,
|
|
682
|
+
)
|
|
683
|
+
existingVoiceData.connection.rejoin({
|
|
684
|
+
channelId: voiceChannel.id,
|
|
685
|
+
selfDeaf: false,
|
|
686
|
+
selfMute: false,
|
|
687
|
+
})
|
|
688
|
+
}
|
|
689
|
+
return
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
try {
|
|
693
|
+
voiceLogger.log(
|
|
694
|
+
`Attempting to join voice channel: ${voiceChannel.name} (${voiceChannel.id})`,
|
|
695
|
+
)
|
|
696
|
+
|
|
697
|
+
const connection = joinVoiceChannel({
|
|
698
|
+
channelId: voiceChannel.id,
|
|
699
|
+
guildId: newState.guild.id,
|
|
700
|
+
adapterCreator: newState.guild.voiceAdapterCreator,
|
|
701
|
+
selfDeaf: false,
|
|
702
|
+
debug: true,
|
|
703
|
+
daveEncryption: false,
|
|
704
|
+
selfMute: false,
|
|
705
|
+
})
|
|
706
|
+
|
|
707
|
+
voiceConnections.set(newState.guild.id, { connection })
|
|
708
|
+
|
|
709
|
+
await entersState(connection, VoiceConnectionStatus.Ready, 30_000)
|
|
710
|
+
voiceLogger.log(
|
|
711
|
+
`Successfully joined voice channel: ${voiceChannel.name} in guild: ${newState.guild.name}`,
|
|
712
|
+
)
|
|
713
|
+
|
|
714
|
+
await setupVoiceHandling({
|
|
715
|
+
connection,
|
|
716
|
+
guildId: newState.guild.id,
|
|
717
|
+
channelId: voiceChannel.id,
|
|
718
|
+
appId,
|
|
719
|
+
discordClient,
|
|
720
|
+
})
|
|
721
|
+
|
|
722
|
+
connection.on(VoiceConnectionStatus.Disconnected, async () => {
|
|
723
|
+
voiceLogger.log(
|
|
724
|
+
`Disconnected from voice channel in guild: ${newState.guild.name}`,
|
|
725
|
+
)
|
|
726
|
+
try {
|
|
727
|
+
await Promise.race([
|
|
728
|
+
entersState(connection, VoiceConnectionStatus.Signalling, 5_000),
|
|
729
|
+
entersState(connection, VoiceConnectionStatus.Connecting, 5_000),
|
|
730
|
+
])
|
|
731
|
+
voiceLogger.log(`Reconnecting to voice channel`)
|
|
732
|
+
} catch (error) {
|
|
733
|
+
voiceLogger.log(`Failed to reconnect, destroying connection`)
|
|
734
|
+
connection.destroy()
|
|
735
|
+
voiceConnections.delete(newState.guild.id)
|
|
736
|
+
}
|
|
737
|
+
})
|
|
738
|
+
|
|
739
|
+
connection.on(VoiceConnectionStatus.Destroyed, async () => {
|
|
740
|
+
voiceLogger.log(
|
|
741
|
+
`Connection destroyed for guild: ${newState.guild.name}`,
|
|
742
|
+
)
|
|
743
|
+
await cleanupVoiceConnection(newState.guild.id)
|
|
744
|
+
})
|
|
745
|
+
|
|
746
|
+
connection.on('error', (error) => {
|
|
747
|
+
voiceLogger.error(
|
|
748
|
+
`Connection error in guild ${newState.guild.name}:`,
|
|
749
|
+
error,
|
|
750
|
+
)
|
|
751
|
+
})
|
|
752
|
+
} catch (error) {
|
|
753
|
+
voiceLogger.error(`Failed to join voice channel:`, error)
|
|
754
|
+
await cleanupVoiceConnection(newState.guild.id)
|
|
755
|
+
}
|
|
756
|
+
} catch (error) {
|
|
757
|
+
voiceLogger.error('Error in voice state update handler:', error)
|
|
758
|
+
}
|
|
759
|
+
})
|
|
760
|
+
}
|