kimaki 0.0.3 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,152 @@
1
+ import { Worker } from 'node:worker_threads'
2
+ import type { WorkerInMessage, WorkerOutMessage } from './worker-types.js'
3
+ import type { Tool as AITool } from 'ai'
4
+ import { createLogger } from './logger.js'
5
+
6
+ const genaiWorkerLogger = createLogger('GENAI WORKER')
7
+ const genaiWrapperLogger = createLogger('GENAI WORKER WRAPPER')
8
+
9
+ export interface GenAIWorkerOptions {
10
+ directory: string
11
+ systemMessage?: string
12
+ guildId: string
13
+ channelId: string
14
+ onAssistantOpusPacket: (packet: ArrayBuffer) => void
15
+ onAssistantStartSpeaking?: () => void
16
+ onAssistantStopSpeaking?: () => void
17
+ onAssistantInterruptSpeaking?: () => void
18
+ onToolCallCompleted?: (params: {
19
+ sessionId: string
20
+ messageId: string
21
+ data?: any
22
+ error?: any
23
+ markdown?: string
24
+ }) => void
25
+ onError?: (error: string) => void
26
+ }
27
+
28
+ export interface GenAIWorker {
29
+ sendRealtimeInput(params: {
30
+ audio?: { mimeType: string; data: string }
31
+ audioStreamEnd?: boolean
32
+ }): void
33
+ sendTextInput(text: string): void
34
+ interrupt(): void
35
+ stop(): Promise<void>
36
+ }
37
+
38
+ export function createGenAIWorker(
39
+ options: GenAIWorkerOptions,
40
+ ): Promise<GenAIWorker> {
41
+ return new Promise((resolve, reject) => {
42
+ const worker = new Worker(
43
+ new URL('../dist/genai-worker.js', import.meta.url),
44
+ )
45
+
46
+ // Handle messages from worker
47
+ worker.on('message', (message: WorkerOutMessage) => {
48
+ switch (message.type) {
49
+ case 'assistantOpusPacket':
50
+ options.onAssistantOpusPacket(message.packet)
51
+ break
52
+ case 'assistantStartSpeaking':
53
+ options.onAssistantStartSpeaking?.()
54
+ break
55
+ case 'assistantStopSpeaking':
56
+ options.onAssistantStopSpeaking?.()
57
+ break
58
+ case 'assistantInterruptSpeaking':
59
+ options.onAssistantInterruptSpeaking?.()
60
+ break
61
+ case 'toolCallCompleted':
62
+ options.onToolCallCompleted?.(message)
63
+ break
64
+ case 'error':
65
+ genaiWorkerLogger.error('Error:', message.error)
66
+ options.onError?.(message.error)
67
+ break
68
+ case 'ready':
69
+ genaiWorkerLogger.log('Ready')
70
+ // Resolve with the worker interface
71
+ resolve({
72
+ sendRealtimeInput({ audio, audioStreamEnd }) {
73
+ worker.postMessage({
74
+ type: 'sendRealtimeInput',
75
+ audio,
76
+ audioStreamEnd,
77
+ } satisfies WorkerInMessage)
78
+ },
79
+ sendTextInput(text) {
80
+ worker.postMessage({
81
+ type: 'sendTextInput',
82
+ text,
83
+ } satisfies WorkerInMessage)
84
+ },
85
+ interrupt() {
86
+ worker.postMessage({
87
+ type: 'interrupt',
88
+ } satisfies WorkerInMessage)
89
+ },
90
+ async stop() {
91
+ genaiWrapperLogger.log('Stopping worker...')
92
+ // Send stop message to trigger graceful shutdown
93
+ worker.postMessage({ type: 'stop' } satisfies WorkerInMessage)
94
+
95
+ // Wait for worker to exit gracefully (with timeout)
96
+ await new Promise<void>((resolve) => {
97
+ let resolved = false
98
+
99
+ // Listen for worker exit
100
+ worker.once('exit', (code) => {
101
+ if (!resolved) {
102
+ resolved = true
103
+ genaiWrapperLogger.log(
104
+ `[GENAI WORKER WRAPPER] Worker exited with code ${code}`,
105
+ )
106
+ resolve()
107
+ }
108
+ })
109
+
110
+ // Timeout after 5 seconds and force terminate
111
+ setTimeout(() => {
112
+ if (!resolved) {
113
+ resolved = true
114
+ genaiWrapperLogger.log(
115
+ '[GENAI WORKER WRAPPER] Worker did not exit gracefully, terminating...',
116
+ )
117
+ worker.terminate().then(() => {
118
+ genaiWrapperLogger.log('Worker terminated')
119
+ resolve()
120
+ })
121
+ }
122
+ }, 5000)
123
+ })
124
+ },
125
+ })
126
+ break
127
+ }
128
+ })
129
+
130
+ // Handle worker errors
131
+ worker.on('error', (error) => {
132
+ genaiWorkerLogger.error('Worker error:', error)
133
+ reject(error)
134
+ })
135
+
136
+ worker.on('exit', (code) => {
137
+ if (code !== 0) {
138
+ genaiWorkerLogger.error(`Worker stopped with exit code ${code}`)
139
+ }
140
+ })
141
+
142
+ // Send initialization message
143
+ const initMessage: WorkerInMessage = {
144
+ type: 'init',
145
+ directory: options.directory,
146
+ systemMessage: options.systemMessage,
147
+ guildId: options.guildId,
148
+ channelId: options.channelId,
149
+ }
150
+ worker.postMessage(initMessage)
151
+ })
152
+ }
@@ -0,0 +1,361 @@
1
+ import { parentPort, threadId } from 'node:worker_threads'
2
+ import { createWriteStream, type WriteStream } from 'node:fs'
3
+ import { mkdir } from 'node:fs/promises'
4
+ import path from 'node:path'
5
+ import { Resampler } from '@purinton/resampler'
6
+ import * as prism from 'prism-media'
7
+ import { startGenAiSession } from './genai.js'
8
+ import { getTools } from './tools.js'
9
+ import type { WorkerInMessage, WorkerOutMessage } from './worker-types.js'
10
+ import type { Session } from '@google/genai'
11
+ import { createLogger } from './logger.js'
12
+
13
+ if (!parentPort) {
14
+ throw new Error('This module must be run as a worker thread')
15
+ }
16
+
17
+ const workerLogger = createLogger(`WORKER ${threadId}`)
18
+ workerLogger.log('GenAI worker started')
19
+
20
+ // Define sendError early so it can be used by global handlers
21
+ function sendError(error: string) {
22
+ if (parentPort) {
23
+ parentPort.postMessage({
24
+ type: 'error',
25
+ error,
26
+ } satisfies WorkerOutMessage)
27
+ }
28
+ }
29
+
30
+ // Add global error handlers for the worker thread
31
+ process.on('uncaughtException', (error) => {
32
+ workerLogger.error('Uncaught exception in worker:', error)
33
+ sendError(`Worker crashed: ${error.message}`)
34
+ // Exit immediately on uncaught exception
35
+ process.exit(1)
36
+ })
37
+
38
+ process.on('unhandledRejection', (reason, promise) => {
39
+ workerLogger.error(
40
+ 'Unhandled rejection in worker:',
41
+ reason,
42
+ 'at promise:',
43
+ promise,
44
+ )
45
+ sendError(`Worker unhandled rejection: ${reason}`)
46
+ })
47
+
48
+ // Audio configuration
49
+ const AUDIO_CONFIG = {
50
+ inputSampleRate: 24000, // GenAI output
51
+ inputChannels: 1,
52
+ outputSampleRate: 48000, // Discord expects
53
+ outputChannels: 2,
54
+ opusFrameSize: 960, // 20ms at 48kHz
55
+ }
56
+
57
+ // Initialize audio processing components
58
+ const resampler = new Resampler({
59
+ inRate: AUDIO_CONFIG.inputSampleRate,
60
+ outRate: AUDIO_CONFIG.outputSampleRate,
61
+ inChannels: AUDIO_CONFIG.inputChannels,
62
+ outChannels: AUDIO_CONFIG.outputChannels,
63
+ volume: 1,
64
+ filterWindow: 8,
65
+ })
66
+
67
+ const opusEncoder = new prism.opus.Encoder({
68
+ rate: AUDIO_CONFIG.outputSampleRate,
69
+ channels: AUDIO_CONFIG.outputChannels,
70
+ frameSize: AUDIO_CONFIG.opusFrameSize,
71
+ })
72
+
73
+ // Pipe resampler to encoder with error handling
74
+ resampler.pipe(opusEncoder).on('error', (error) => {
75
+ workerLogger.error('Pipe error between resampler and encoder:', error)
76
+ sendError(`Audio pipeline error: ${error.message}`)
77
+ })
78
+
79
+ // Opus packet queue and interval for 20ms packet sending
80
+ const opusPacketQueue: Buffer[] = []
81
+ let packetInterval: NodeJS.Timeout | null = null
82
+
83
+ // Send packets every 20ms
84
+ function startPacketSending() {
85
+ if (packetInterval) return
86
+
87
+ packetInterval = setInterval(() => {
88
+ const packet = opusPacketQueue.shift()
89
+ if (!packet) return
90
+
91
+ // Transfer packet as ArrayBuffer
92
+ const arrayBuffer = packet.buffer.slice(
93
+ packet.byteOffset,
94
+ packet.byteOffset + packet.byteLength,
95
+ ) as ArrayBuffer
96
+
97
+ parentPort!.postMessage(
98
+ {
99
+ type: 'assistantOpusPacket',
100
+ packet: arrayBuffer,
101
+ } satisfies WorkerOutMessage,
102
+ [arrayBuffer], // Transfer ownership
103
+ )
104
+ }, 20)
105
+ }
106
+
107
+ function stopPacketSending() {
108
+ if (packetInterval) {
109
+ clearInterval(packetInterval)
110
+ packetInterval = null
111
+ }
112
+ opusPacketQueue.length = 0
113
+ }
114
+
115
+ // Session state
116
+ let session: { session: Session; stop: () => void } | null = null
117
+
118
+ // Audio log stream for assistant audio
119
+ let audioLogStream: WriteStream | null = null
120
+
121
+ // Create assistant audio log stream for debugging
122
+ async function createAssistantAudioLogStream(
123
+ guildId: string,
124
+ channelId: string,
125
+ ): Promise<WriteStream | null> {
126
+ if (!process.env.DEBUG) return null
127
+
128
+ const timestamp = new Date().toISOString().replace(/[:.]/g, '-')
129
+ const audioDir = path.join(
130
+ process.cwd(),
131
+ 'discord-audio-logs',
132
+ guildId,
133
+ channelId,
134
+ )
135
+
136
+ try {
137
+ await mkdir(audioDir, { recursive: true })
138
+
139
+ // Create stream for assistant audio (24kHz mono s16le PCM)
140
+ const outputFileName = `assistant_${timestamp}.24.pcm`
141
+ const outputFilePath = path.join(audioDir, outputFileName)
142
+ const outputAudioStream = createWriteStream(outputFilePath)
143
+
144
+ // Add error handler to prevent crashes
145
+ outputAudioStream.on('error', (error) => {
146
+ workerLogger.error(`Assistant audio log stream error:`, error)
147
+ })
148
+
149
+ workerLogger.log(`Created assistant audio log: ${outputFilePath}`)
150
+
151
+ return outputAudioStream
152
+ } catch (error) {
153
+ workerLogger.error(`Failed to create audio log directory:`, error)
154
+ return null
155
+ }
156
+ }
157
+
158
+ // Handle encoded Opus packets
159
+ opusEncoder.on('data', (packet: Buffer) => {
160
+ opusPacketQueue.push(packet)
161
+ })
162
+
163
+ // Handle stream end events
164
+ opusEncoder.on('end', () => {
165
+ workerLogger.log('Opus encoder stream ended')
166
+ })
167
+
168
+ resampler.on('end', () => {
169
+ workerLogger.log('Resampler stream ended')
170
+ })
171
+
172
+ // Handle errors
173
+ resampler.on('error', (error: any) => {
174
+ workerLogger.error(`Resampler error:`, error)
175
+ sendError(`Resampler error: ${error.message}`)
176
+ })
177
+
178
+ opusEncoder.on('error', (error: any) => {
179
+ workerLogger.error(`Encoder error:`, error)
180
+ // Check for specific corrupted data errors
181
+ if (error.message?.includes('The compressed data passed is corrupted')) {
182
+ workerLogger.warn('Received corrupted audio data in opus encoder')
183
+ } else {
184
+ sendError(`Encoder error: ${error.message}`)
185
+ }
186
+ })
187
+
188
+ async function cleanupAsync(): Promise<void> {
189
+ workerLogger.log(`Starting async cleanup`)
190
+
191
+ stopPacketSending()
192
+
193
+ if (session) {
194
+ workerLogger.log(`Stopping GenAI session`)
195
+ session.stop()
196
+ session = null
197
+ }
198
+
199
+ // Wait for audio log stream to finish writing
200
+ if (audioLogStream) {
201
+ workerLogger.log(`Closing assistant audio log stream`)
202
+ await new Promise<void>((resolve, reject) => {
203
+ audioLogStream!.end(() => {
204
+ workerLogger.log(`Assistant audio log stream closed`)
205
+ resolve()
206
+ })
207
+ audioLogStream!.on('error', reject)
208
+ // Add timeout to prevent hanging
209
+ setTimeout(() => {
210
+ workerLogger.log(`Audio stream close timeout, continuing`)
211
+ resolve()
212
+ }, 3000)
213
+ })
214
+ audioLogStream = null
215
+ }
216
+
217
+ // Unpipe and end the encoder first
218
+ resampler.unpipe(opusEncoder)
219
+
220
+ // End the encoder stream
221
+ await new Promise<void>((resolve) => {
222
+ opusEncoder.end(() => {
223
+ workerLogger.log(`Opus encoder ended`)
224
+ resolve()
225
+ })
226
+ // Add timeout
227
+ setTimeout(resolve, 1000)
228
+ })
229
+
230
+ // End the resampler stream
231
+ await new Promise<void>((resolve) => {
232
+ resampler.end(() => {
233
+ workerLogger.log(`Resampler ended`)
234
+ resolve()
235
+ })
236
+ // Add timeout
237
+ setTimeout(resolve, 1000)
238
+ })
239
+
240
+ workerLogger.log(`Async cleanup complete`)
241
+ }
242
+
243
+ // Handle messages from main thread
244
+ parentPort.on('message', async (message: WorkerInMessage) => {
245
+ try {
246
+ switch (message.type) {
247
+ case 'init': {
248
+ workerLogger.log(`Initializing with directory:`, message.directory)
249
+
250
+ // Create audio log stream for assistant audio
251
+ audioLogStream = await createAssistantAudioLogStream(
252
+ message.guildId,
253
+ message.channelId,
254
+ )
255
+
256
+ // Start packet sending interval
257
+ startPacketSending()
258
+
259
+ // Get tools for the directory
260
+ const { tools } = await getTools({
261
+ directory: message.directory,
262
+ onMessageCompleted: (params) => {
263
+ parentPort!.postMessage({
264
+ type: 'toolCallCompleted',
265
+ ...params,
266
+ } satisfies WorkerOutMessage)
267
+ },
268
+ })
269
+
270
+ // Start GenAI session
271
+ session = await startGenAiSession({
272
+ tools,
273
+ systemMessage: message.systemMessage,
274
+ onAssistantAudioChunk({ data }) {
275
+ // Write to audio log if enabled
276
+ if (audioLogStream && !audioLogStream.destroyed) {
277
+ audioLogStream.write(data, (err) => {
278
+ if (err) {
279
+ workerLogger.error('Error writing to audio log:', err)
280
+ }
281
+ })
282
+ }
283
+
284
+ // Write PCM data to resampler which will output Opus packets
285
+ if (!resampler.destroyed) {
286
+ resampler.write(data, (err) => {
287
+ if (err) {
288
+ workerLogger.error('Error writing to resampler:', err)
289
+ sendError(`Failed to process audio: ${err.message}`)
290
+ }
291
+ })
292
+ }
293
+ },
294
+ onAssistantStartSpeaking() {
295
+ parentPort!.postMessage({
296
+ type: 'assistantStartSpeaking',
297
+ } satisfies WorkerOutMessage)
298
+ },
299
+ onAssistantStopSpeaking() {
300
+ parentPort!.postMessage({
301
+ type: 'assistantStopSpeaking',
302
+ } satisfies WorkerOutMessage)
303
+ },
304
+ onAssistantInterruptSpeaking() {
305
+ parentPort!.postMessage({
306
+ type: 'assistantInterruptSpeaking',
307
+ } satisfies WorkerOutMessage)
308
+ },
309
+ })
310
+
311
+ // Notify main thread we're ready
312
+ parentPort!.postMessage({
313
+ type: 'ready',
314
+ } satisfies WorkerOutMessage)
315
+ break
316
+ }
317
+
318
+ case 'sendRealtimeInput': {
319
+ if (!session) {
320
+ sendError('Session not initialized')
321
+ return
322
+ }
323
+ session.session.sendRealtimeInput({
324
+ audio: message.audio,
325
+ audioStreamEnd: message.audioStreamEnd,
326
+ })
327
+ break
328
+ }
329
+
330
+ case 'sendTextInput': {
331
+ if (!session) {
332
+ sendError('Session not initialized')
333
+ return
334
+ }
335
+ session.session.sendRealtimeInput({
336
+ text: message.text,
337
+ })
338
+ break
339
+ }
340
+
341
+ case 'interrupt': {
342
+ workerLogger.log(`Interrupting playback`)
343
+ // Clear the opus packet queue
344
+ opusPacketQueue.length = 0
345
+ break
346
+ }
347
+
348
+ case 'stop': {
349
+ workerLogger.log(`Stopping worker`)
350
+ await cleanupAsync()
351
+ // process.exit(0)
352
+ break
353
+ }
354
+ }
355
+ } catch (error) {
356
+ workerLogger.error(`Error handling message:`, error)
357
+ sendError(
358
+ error instanceof Error ? error.message : 'Unknown error in worker',
359
+ )
360
+ }
361
+ })