@brianli/kimaki 0.4.72-brianli.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (328) hide show
  1. package/bin.js +2 -0
  2. package/dist/ai-tool-to-genai.js +233 -0
  3. package/dist/ai-tool-to-genai.test.js +267 -0
  4. package/dist/ai-tool.js +6 -0
  5. package/dist/bin.js +87 -0
  6. package/dist/bot-token.js +121 -0
  7. package/dist/bot-token.test.js +134 -0
  8. package/dist/channel-management.js +101 -0
  9. package/dist/cli-parsing.test.js +89 -0
  10. package/dist/cli.js +2529 -0
  11. package/dist/commands/abort.js +82 -0
  12. package/dist/commands/action-buttons.js +257 -0
  13. package/dist/commands/add-project.js +114 -0
  14. package/dist/commands/agent.js +291 -0
  15. package/dist/commands/ask-question.js +223 -0
  16. package/dist/commands/compact.js +120 -0
  17. package/dist/commands/context-usage.js +140 -0
  18. package/dist/commands/create-new-project.js +118 -0
  19. package/dist/commands/diff.js +128 -0
  20. package/dist/commands/file-upload.js +275 -0
  21. package/dist/commands/fork.js +217 -0
  22. package/dist/commands/gemini-apikey.js +70 -0
  23. package/dist/commands/login.js +490 -0
  24. package/dist/commands/mention-mode.js +51 -0
  25. package/dist/commands/merge-worktree.js +124 -0
  26. package/dist/commands/model.js +694 -0
  27. package/dist/commands/permissions.js +163 -0
  28. package/dist/commands/queue.js +217 -0
  29. package/dist/commands/remove-project.js +115 -0
  30. package/dist/commands/restart-opencode-server.js +116 -0
  31. package/dist/commands/resume.js +159 -0
  32. package/dist/commands/run-command.js +79 -0
  33. package/dist/commands/session-id.js +78 -0
  34. package/dist/commands/session.js +192 -0
  35. package/dist/commands/share.js +80 -0
  36. package/dist/commands/types.js +2 -0
  37. package/dist/commands/undo-redo.js +159 -0
  38. package/dist/commands/unset-model.js +152 -0
  39. package/dist/commands/upgrade.js +42 -0
  40. package/dist/commands/user-command.js +148 -0
  41. package/dist/commands/verbosity.js +60 -0
  42. package/dist/commands/worktree-settings.js +50 -0
  43. package/dist/commands/worktree.js +299 -0
  44. package/dist/condense-memory.js +33 -0
  45. package/dist/config.js +110 -0
  46. package/dist/database.js +1050 -0
  47. package/dist/db.js +159 -0
  48. package/dist/db.test.js +49 -0
  49. package/dist/discord-api.js +28 -0
  50. package/dist/discord-auth.js +231 -0
  51. package/dist/discord-auth.test.js +80 -0
  52. package/dist/discord-bot.js +997 -0
  53. package/dist/discord-utils.js +560 -0
  54. package/dist/discord-utils.test.js +115 -0
  55. package/dist/errors.js +167 -0
  56. package/dist/escape-backticks.test.js +429 -0
  57. package/dist/format-tables.js +122 -0
  58. package/dist/format-tables.test.js +199 -0
  59. package/dist/forum-sync/config.js +79 -0
  60. package/dist/forum-sync/discord-operations.js +154 -0
  61. package/dist/forum-sync/index.js +5 -0
  62. package/dist/forum-sync/markdown.js +117 -0
  63. package/dist/forum-sync/sync-to-discord.js +417 -0
  64. package/dist/forum-sync/sync-to-files.js +190 -0
  65. package/dist/forum-sync/types.js +53 -0
  66. package/dist/forum-sync/watchers.js +307 -0
  67. package/dist/gateway-consumer.js +232 -0
  68. package/dist/gateway-consumer.test.js +18 -0
  69. package/dist/genai-worker-wrapper.js +111 -0
  70. package/dist/genai-worker.js +311 -0
  71. package/dist/genai.js +232 -0
  72. package/dist/generated/browser.js +17 -0
  73. package/dist/generated/client.js +35 -0
  74. package/dist/generated/commonInputTypes.js +10 -0
  75. package/dist/generated/enums.js +30 -0
  76. package/dist/generated/internal/class.js +41 -0
  77. package/dist/generated/internal/prismaNamespace.js +239 -0
  78. package/dist/generated/internal/prismaNamespaceBrowser.js +209 -0
  79. package/dist/generated/models/bot_api_keys.js +1 -0
  80. package/dist/generated/models/bot_tokens.js +1 -0
  81. package/dist/generated/models/channel_agents.js +1 -0
  82. package/dist/generated/models/channel_directories.js +1 -0
  83. package/dist/generated/models/channel_mention_mode.js +1 -0
  84. package/dist/generated/models/channel_models.js +1 -0
  85. package/dist/generated/models/channel_verbosity.js +1 -0
  86. package/dist/generated/models/channel_worktrees.js +1 -0
  87. package/dist/generated/models/forum_sync_configs.js +1 -0
  88. package/dist/generated/models/global_models.js +1 -0
  89. package/dist/generated/models/ipc_requests.js +1 -0
  90. package/dist/generated/models/part_messages.js +1 -0
  91. package/dist/generated/models/scheduled_tasks.js +1 -0
  92. package/dist/generated/models/session_agents.js +1 -0
  93. package/dist/generated/models/session_models.js +1 -0
  94. package/dist/generated/models/session_start_sources.js +1 -0
  95. package/dist/generated/models/thread_sessions.js +1 -0
  96. package/dist/generated/models/thread_worktrees.js +1 -0
  97. package/dist/generated/models.js +1 -0
  98. package/dist/heap-monitor.js +95 -0
  99. package/dist/hrana-server.js +416 -0
  100. package/dist/hrana-server.test.js +368 -0
  101. package/dist/image-utils.js +112 -0
  102. package/dist/interaction-handler.js +327 -0
  103. package/dist/ipc-polling.js +251 -0
  104. package/dist/kimaki-digital-twin.e2e.test.js +165 -0
  105. package/dist/limit-heading-depth.js +25 -0
  106. package/dist/limit-heading-depth.test.js +105 -0
  107. package/dist/logger.js +160 -0
  108. package/dist/markdown.js +342 -0
  109. package/dist/markdown.test.js +253 -0
  110. package/dist/message-formatting.js +433 -0
  111. package/dist/message-formatting.test.js +73 -0
  112. package/dist/openai-realtime.js +228 -0
  113. package/dist/opencode-plugin-loading.e2e.test.js +91 -0
  114. package/dist/opencode-plugin.js +536 -0
  115. package/dist/opencode-plugin.test.js +98 -0
  116. package/dist/opencode.js +409 -0
  117. package/dist/privacy-sanitizer.js +105 -0
  118. package/dist/runtime-mode.js +51 -0
  119. package/dist/runtime-mode.test.js +115 -0
  120. package/dist/sentry.js +127 -0
  121. package/dist/session-handler/state.js +151 -0
  122. package/dist/session-handler.js +1874 -0
  123. package/dist/session-search.js +100 -0
  124. package/dist/session-search.test.js +40 -0
  125. package/dist/startup-service.js +153 -0
  126. package/dist/system-message.js +499 -0
  127. package/dist/task-runner.js +282 -0
  128. package/dist/task-schedule.js +191 -0
  129. package/dist/task-schedule.test.js +71 -0
  130. package/dist/thinking-utils.js +35 -0
  131. package/dist/thread-message-queue.e2e.test.js +781 -0
  132. package/dist/tools.js +359 -0
  133. package/dist/unnest-code-blocks.js +136 -0
  134. package/dist/unnest-code-blocks.test.js +641 -0
  135. package/dist/upgrade.js +114 -0
  136. package/dist/utils.js +109 -0
  137. package/dist/voice-handler.js +606 -0
  138. package/dist/voice.js +304 -0
  139. package/dist/voice.test.js +187 -0
  140. package/dist/wait-session.js +94 -0
  141. package/dist/worker-types.js +4 -0
  142. package/dist/worktree-utils.js +727 -0
  143. package/dist/xml.js +92 -0
  144. package/dist/xml.test.js +32 -0
  145. package/package.json +82 -0
  146. package/schema.prisma +246 -0
  147. package/skills/batch/SKILL.md +87 -0
  148. package/skills/critique/SKILL.md +129 -0
  149. package/skills/errore/SKILL.md +589 -0
  150. package/skills/goke/.prettierrc +5 -0
  151. package/skills/goke/CHANGELOG.md +40 -0
  152. package/skills/goke/LICENSE +21 -0
  153. package/skills/goke/README.md +666 -0
  154. package/skills/goke/SKILL.md +458 -0
  155. package/skills/goke/package.json +43 -0
  156. package/skills/goke/src/__test__/coerce.test.ts +411 -0
  157. package/skills/goke/src/__test__/index.test.ts +1798 -0
  158. package/skills/goke/src/__test__/types.test-d.ts +111 -0
  159. package/skills/goke/src/coerce.ts +547 -0
  160. package/skills/goke/src/goke.ts +1362 -0
  161. package/skills/goke/src/index.ts +16 -0
  162. package/skills/goke/src/mri.ts +164 -0
  163. package/skills/goke/tsconfig.json +15 -0
  164. package/skills/jitter/EDITOR.md +219 -0
  165. package/skills/jitter/EXPORT-INTERNALS.md +309 -0
  166. package/skills/jitter/SKILL.md +158 -0
  167. package/skills/jitter/jitter-clipboard.json +1042 -0
  168. package/skills/jitter/package.json +14 -0
  169. package/skills/jitter/tsconfig.json +15 -0
  170. package/skills/jitter/utils/actions.ts +212 -0
  171. package/skills/jitter/utils/export.ts +114 -0
  172. package/skills/jitter/utils/index.ts +141 -0
  173. package/skills/jitter/utils/snapshot.ts +154 -0
  174. package/skills/jitter/utils/traverse.ts +246 -0
  175. package/skills/jitter/utils/types.ts +279 -0
  176. package/skills/jitter/utils/wait.ts +133 -0
  177. package/skills/playwriter/SKILL.md +31 -0
  178. package/skills/security-review/SKILL.md +208 -0
  179. package/skills/simplify/SKILL.md +58 -0
  180. package/skills/termcast/SKILL.md +945 -0
  181. package/skills/tuistory/SKILL.md +250 -0
  182. package/skills/zustand-centralized-state/SKILL.md +582 -0
  183. package/src/__snapshots__/compact-session-context-no-system.md +35 -0
  184. package/src/__snapshots__/compact-session-context.md +41 -0
  185. package/src/__snapshots__/first-session-no-info.md +17 -0
  186. package/src/__snapshots__/first-session-with-info.md +23 -0
  187. package/src/__snapshots__/session-1.md +17 -0
  188. package/src/__snapshots__/session-2.md +5871 -0
  189. package/src/__snapshots__/session-3.md +17 -0
  190. package/src/__snapshots__/session-with-tools.md +5871 -0
  191. package/src/ai-tool-to-genai.test.ts +296 -0
  192. package/src/ai-tool-to-genai.ts +282 -0
  193. package/src/ai-tool.ts +39 -0
  194. package/src/bin.ts +108 -0
  195. package/src/bot-token.test.ts +171 -0
  196. package/src/bot-token.ts +159 -0
  197. package/src/channel-management.ts +172 -0
  198. package/src/cli-parsing.test.ts +132 -0
  199. package/src/cli.ts +3605 -0
  200. package/src/commands/abort.ts +112 -0
  201. package/src/commands/action-buttons.ts +376 -0
  202. package/src/commands/add-project.ts +152 -0
  203. package/src/commands/agent.ts +404 -0
  204. package/src/commands/ask-question.ts +330 -0
  205. package/src/commands/compact.ts +157 -0
  206. package/src/commands/context-usage.ts +199 -0
  207. package/src/commands/create-new-project.ts +179 -0
  208. package/src/commands/diff.ts +165 -0
  209. package/src/commands/file-upload.ts +389 -0
  210. package/src/commands/fork.ts +320 -0
  211. package/src/commands/gemini-apikey.ts +104 -0
  212. package/src/commands/login.ts +634 -0
  213. package/src/commands/mention-mode.ts +77 -0
  214. package/src/commands/merge-worktree.ts +177 -0
  215. package/src/commands/model.ts +961 -0
  216. package/src/commands/permissions.ts +261 -0
  217. package/src/commands/queue.ts +296 -0
  218. package/src/commands/remove-project.ts +155 -0
  219. package/src/commands/restart-opencode-server.ts +162 -0
  220. package/src/commands/resume.ts +242 -0
  221. package/src/commands/run-command.ts +123 -0
  222. package/src/commands/session-id.ts +109 -0
  223. package/src/commands/session.ts +250 -0
  224. package/src/commands/share.ts +106 -0
  225. package/src/commands/types.ts +25 -0
  226. package/src/commands/undo-redo.ts +221 -0
  227. package/src/commands/unset-model.ts +189 -0
  228. package/src/commands/upgrade.ts +52 -0
  229. package/src/commands/user-command.ts +193 -0
  230. package/src/commands/verbosity.ts +88 -0
  231. package/src/commands/worktree-settings.ts +79 -0
  232. package/src/commands/worktree.ts +431 -0
  233. package/src/condense-memory.ts +36 -0
  234. package/src/config.ts +148 -0
  235. package/src/database.ts +1530 -0
  236. package/src/db.test.ts +60 -0
  237. package/src/db.ts +190 -0
  238. package/src/discord-api.ts +35 -0
  239. package/src/discord-bot.ts +1316 -0
  240. package/src/discord-utils.test.ts +132 -0
  241. package/src/discord-utils.ts +767 -0
  242. package/src/errors.ts +213 -0
  243. package/src/escape-backticks.test.ts +469 -0
  244. package/src/format-tables.test.ts +223 -0
  245. package/src/format-tables.ts +145 -0
  246. package/src/forum-sync/config.ts +92 -0
  247. package/src/forum-sync/discord-operations.ts +241 -0
  248. package/src/forum-sync/index.ts +9 -0
  249. package/src/forum-sync/markdown.ts +176 -0
  250. package/src/forum-sync/sync-to-discord.ts +595 -0
  251. package/src/forum-sync/sync-to-files.ts +294 -0
  252. package/src/forum-sync/types.ts +175 -0
  253. package/src/forum-sync/watchers.ts +454 -0
  254. package/src/genai-worker-wrapper.ts +164 -0
  255. package/src/genai-worker.ts +386 -0
  256. package/src/genai.ts +321 -0
  257. package/src/generated/browser.ts +109 -0
  258. package/src/generated/client.ts +131 -0
  259. package/src/generated/commonInputTypes.ts +512 -0
  260. package/src/generated/enums.ts +46 -0
  261. package/src/generated/internal/class.ts +362 -0
  262. package/src/generated/internal/prismaNamespace.ts +2251 -0
  263. package/src/generated/internal/prismaNamespaceBrowser.ts +308 -0
  264. package/src/generated/models/bot_api_keys.ts +1288 -0
  265. package/src/generated/models/bot_tokens.ts +1577 -0
  266. package/src/generated/models/channel_agents.ts +1256 -0
  267. package/src/generated/models/channel_directories.ts +2104 -0
  268. package/src/generated/models/channel_mention_mode.ts +1300 -0
  269. package/src/generated/models/channel_models.ts +1288 -0
  270. package/src/generated/models/channel_verbosity.ts +1224 -0
  271. package/src/generated/models/channel_worktrees.ts +1308 -0
  272. package/src/generated/models/forum_sync_configs.ts +1452 -0
  273. package/src/generated/models/global_models.ts +1288 -0
  274. package/src/generated/models/ipc_requests.ts +1485 -0
  275. package/src/generated/models/part_messages.ts +1302 -0
  276. package/src/generated/models/scheduled_tasks.ts +2320 -0
  277. package/src/generated/models/session_agents.ts +1086 -0
  278. package/src/generated/models/session_models.ts +1114 -0
  279. package/src/generated/models/session_start_sources.ts +1408 -0
  280. package/src/generated/models/thread_sessions.ts +1599 -0
  281. package/src/generated/models/thread_worktrees.ts +1352 -0
  282. package/src/generated/models.ts +29 -0
  283. package/src/heap-monitor.ts +121 -0
  284. package/src/hrana-server.test.ts +428 -0
  285. package/src/hrana-server.ts +547 -0
  286. package/src/image-utils.ts +149 -0
  287. package/src/interaction-handler.ts +461 -0
  288. package/src/ipc-polling.ts +325 -0
  289. package/src/kimaki-digital-twin.e2e.test.ts +201 -0
  290. package/src/limit-heading-depth.test.ts +116 -0
  291. package/src/limit-heading-depth.ts +26 -0
  292. package/src/logger.ts +203 -0
  293. package/src/markdown.test.ts +360 -0
  294. package/src/markdown.ts +410 -0
  295. package/src/message-formatting.test.ts +81 -0
  296. package/src/message-formatting.ts +549 -0
  297. package/src/openai-realtime.ts +362 -0
  298. package/src/opencode-plugin-loading.e2e.test.ts +112 -0
  299. package/src/opencode-plugin.test.ts +108 -0
  300. package/src/opencode-plugin.ts +652 -0
  301. package/src/opencode.ts +554 -0
  302. package/src/privacy-sanitizer.ts +142 -0
  303. package/src/schema.sql +158 -0
  304. package/src/sentry.ts +137 -0
  305. package/src/session-handler/state.ts +232 -0
  306. package/src/session-handler.ts +2668 -0
  307. package/src/session-search.test.ts +50 -0
  308. package/src/session-search.ts +148 -0
  309. package/src/startup-service.ts +200 -0
  310. package/src/system-message.ts +568 -0
  311. package/src/task-runner.ts +425 -0
  312. package/src/task-schedule.test.ts +84 -0
  313. package/src/task-schedule.ts +287 -0
  314. package/src/thinking-utils.ts +61 -0
  315. package/src/thread-message-queue.e2e.test.ts +997 -0
  316. package/src/tools.ts +432 -0
  317. package/src/unnest-code-blocks.test.ts +679 -0
  318. package/src/unnest-code-blocks.ts +168 -0
  319. package/src/upgrade.ts +127 -0
  320. package/src/utils.ts +145 -0
  321. package/src/voice-handler.ts +852 -0
  322. package/src/voice.test.ts +219 -0
  323. package/src/voice.ts +444 -0
  324. package/src/wait-session.ts +147 -0
  325. package/src/worker-types.ts +64 -0
  326. package/src/worktree-utils.ts +988 -0
  327. package/src/xml.test.ts +38 -0
  328. package/src/xml.ts +121 -0
@@ -0,0 +1,386 @@
1
+ // Worker thread for GenAI voice processing.
2
+ // Runs in a separate thread to handle audio encoding/decoding without blocking.
3
+ // Resamples 24kHz GenAI output to 48kHz stereo Opus packets for Discord.
4
+
5
+ import { parentPort, threadId } from 'node:worker_threads'
6
+ import { createWriteStream, type WriteStream } from 'node:fs'
7
+ import path from 'node:path'
8
+ import * as errore from 'errore'
9
+ import { Resampler } from '@purinton/resampler'
10
+ import * as prism from 'prism-media'
11
+ import { startGenAiSession } from './genai.js'
12
+ import type { Session } from '@google/genai'
13
+ import { getTools } from './tools.js'
14
+ import { mkdir } from 'node:fs/promises'
15
+ import type { WorkerInMessage, WorkerOutMessage } from './worker-types.js'
16
+ import { createLogger, formatErrorWithStack, LogPrefix } from './logger.js'
17
+ import { initSentry, notifyError } from './sentry.js'
18
+
19
+ if (!parentPort) {
20
+ throw new Error('This module must be run as a worker thread')
21
+ }
22
+
23
+ const workerLogger = createLogger(`${LogPrefix.WORKER}_${threadId}`)
24
+ workerLogger.log('GenAI worker started')
25
+
26
+ // Initialize Sentry in worker thread (inherits KIMAKI_SENTRY_DSN from parent)
27
+ initSentry()
28
+
29
+ // Define sendError early so it can be used by global handlers
30
+ function sendError(error: string) {
31
+ if (parentPort) {
32
+ parentPort.postMessage({
33
+ type: 'error',
34
+ error,
35
+ } satisfies WorkerOutMessage)
36
+ }
37
+ }
38
+
39
+ // Add global error handlers for the worker thread
40
+ process.on('uncaughtException', (error) => {
41
+ workerLogger.error('Uncaught exception in worker:', error)
42
+ void notifyError(error, 'Uncaught exception in GenAI worker')
43
+ sendError(`Worker crashed: ${error.message}`)
44
+ process.exit(1)
45
+ })
46
+
47
+ process.on('unhandledRejection', (reason, promise) => {
48
+ const formattedReason = formatErrorWithStack(reason)
49
+ workerLogger.error(
50
+ 'Unhandled rejection in worker:',
51
+ formattedReason,
52
+ 'at promise:',
53
+ promise,
54
+ )
55
+ const error =
56
+ reason instanceof Error
57
+ ? reason
58
+ : new Error(formattedReason)
59
+ void notifyError(error, 'Unhandled rejection in GenAI worker')
60
+ sendError(`Worker unhandled rejection: ${formattedReason}`)
61
+ })
62
+
63
+ // Audio configuration
64
+ const AUDIO_CONFIG = {
65
+ inputSampleRate: 24000, // GenAI output
66
+ inputChannels: 1,
67
+ outputSampleRate: 48000, // Discord expects
68
+ outputChannels: 2,
69
+ opusFrameSize: 960, // 20ms at 48kHz
70
+ }
71
+
72
+ // Initialize audio processing components
73
+ const resampler = new Resampler({
74
+ inRate: AUDIO_CONFIG.inputSampleRate,
75
+ outRate: AUDIO_CONFIG.outputSampleRate,
76
+ inChannels: AUDIO_CONFIG.inputChannels,
77
+ outChannels: AUDIO_CONFIG.outputChannels,
78
+ volume: 1,
79
+ filterWindow: 8,
80
+ })
81
+
82
+ const opusEncoder = new prism.opus.Encoder({
83
+ rate: AUDIO_CONFIG.outputSampleRate,
84
+ channels: AUDIO_CONFIG.outputChannels,
85
+ frameSize: AUDIO_CONFIG.opusFrameSize,
86
+ })
87
+
88
+ // Pipe resampler to encoder with error handling
89
+ resampler.pipe(opusEncoder).on('error', (error) => {
90
+ workerLogger.error('Pipe error between resampler and encoder:', error)
91
+ void notifyError(error, 'GenAI worker audio pipeline error')
92
+ sendError(`Audio pipeline error: ${error.message}`)
93
+ })
94
+
95
+ // Opus packet queue and interval for 20ms packet sending
96
+ const opusPacketQueue: Buffer[] = []
97
+ let packetInterval: NodeJS.Timeout | null = null
98
+
99
+ // Send packets every 20ms
100
+ function startPacketSending() {
101
+ if (packetInterval) return
102
+
103
+ packetInterval = setInterval(() => {
104
+ const packet = opusPacketQueue.shift()
105
+ if (!packet) return
106
+
107
+ // Transfer packet as ArrayBuffer
108
+ const arrayBuffer = packet.buffer.slice(
109
+ packet.byteOffset,
110
+ packet.byteOffset + packet.byteLength,
111
+ ) as ArrayBuffer
112
+
113
+ parentPort!.postMessage(
114
+ {
115
+ type: 'assistantOpusPacket',
116
+ packet: arrayBuffer,
117
+ } satisfies WorkerOutMessage,
118
+ [arrayBuffer], // Transfer ownership
119
+ )
120
+ }, 20)
121
+ }
122
+
123
+ function stopPacketSending() {
124
+ if (packetInterval) {
125
+ clearInterval(packetInterval)
126
+ packetInterval = null
127
+ }
128
+ opusPacketQueue.length = 0
129
+ }
130
+
131
+ // Session state
132
+ let session: { session: Session; stop: () => void } | null = null
133
+
134
+ // Audio log stream for assistant audio
135
+ let audioLogStream: WriteStream | null = null
136
+
137
+ // Create assistant audio log stream for debugging
138
+ async function createAssistantAudioLogStream(
139
+ guildId: string,
140
+ channelId: string,
141
+ ): Promise<WriteStream | null> {
142
+ if (!process.env.DEBUG) return null
143
+
144
+ const timestamp = new Date().toISOString().replace(/[:.]/g, '-')
145
+ const audioDir = path.join(
146
+ process.cwd(),
147
+ 'discord-audio-logs',
148
+ guildId,
149
+ channelId,
150
+ )
151
+
152
+ const mkdirError = await errore.tryAsync({
153
+ try: () => mkdir(audioDir, { recursive: true }),
154
+ catch: (e) => e as Error,
155
+ })
156
+ if (mkdirError instanceof Error) {
157
+ workerLogger.error(
158
+ `Failed to create audio log directory:`,
159
+ mkdirError.message,
160
+ )
161
+ return null
162
+ }
163
+
164
+ // Create stream for assistant audio (24kHz mono s16le PCM)
165
+ const outputFileName = `assistant_${timestamp}.24.pcm`
166
+ const outputFilePath = path.join(audioDir, outputFileName)
167
+ const outputAudioStream = createWriteStream(outputFilePath)
168
+
169
+ // Add error handler to prevent crashes
170
+ outputAudioStream.on('error', (error) => {
171
+ workerLogger.error(`Assistant audio log stream error:`, error)
172
+ })
173
+
174
+ workerLogger.log(`Created assistant audio log: ${outputFilePath}`)
175
+
176
+ return outputAudioStream
177
+ }
178
+
179
+ // Handle encoded Opus packets
180
+ opusEncoder.on('data', (packet: Buffer) => {
181
+ opusPacketQueue.push(packet)
182
+ })
183
+
184
+ // Handle stream end events
185
+ opusEncoder.on('end', () => {
186
+ workerLogger.log('Opus encoder stream ended')
187
+ })
188
+
189
+ resampler.on('end', () => {
190
+ workerLogger.log('Resampler stream ended')
191
+ })
192
+
193
+ // Handle errors
194
+ resampler.on('error', (error: unknown) => {
195
+ workerLogger.error(`Resampler error:`, error)
196
+ void notifyError(error, 'GenAI worker resampler error')
197
+ sendError(`Resampler error: ${(error as Error).message}`)
198
+ })
199
+
200
+ opusEncoder.on('error', (error: unknown) => {
201
+ workerLogger.error(`Encoder error:`, error)
202
+ const errMsg = (error as Error).message || ''
203
+ // Check for specific corrupted data errors
204
+ if (errMsg.includes('The compressed data passed is corrupted')) {
205
+ workerLogger.warn('Received corrupted audio data in opus encoder')
206
+ } else {
207
+ void notifyError(error, 'GenAI worker encoder error')
208
+ sendError(`Encoder error: ${errMsg}`)
209
+ }
210
+ })
211
+
212
+ async function cleanupAsync(): Promise<void> {
213
+ workerLogger.log(`Starting async cleanup`)
214
+
215
+ stopPacketSending()
216
+
217
+ if (session) {
218
+ workerLogger.log(`Stopping GenAI session`)
219
+ session.stop()
220
+ session = null
221
+ }
222
+
223
+ // Wait for audio log stream to finish writing
224
+ if (audioLogStream) {
225
+ workerLogger.log(`Closing assistant audio log stream`)
226
+ await new Promise<void>((resolve, reject) => {
227
+ audioLogStream!.end(() => {
228
+ workerLogger.log(`Assistant audio log stream closed`)
229
+ resolve()
230
+ })
231
+ audioLogStream!.on('error', reject)
232
+ // Add timeout to prevent hanging
233
+ setTimeout(() => {
234
+ workerLogger.log(`Audio stream close timeout, continuing`)
235
+ resolve()
236
+ }, 3000)
237
+ })
238
+ audioLogStream = null
239
+ }
240
+
241
+ // Unpipe and end the encoder first
242
+ resampler.unpipe(opusEncoder)
243
+
244
+ // End the encoder stream
245
+ await new Promise<void>((resolve) => {
246
+ opusEncoder.end(() => {
247
+ workerLogger.log(`Opus encoder ended`)
248
+ resolve()
249
+ })
250
+ // Add timeout
251
+ setTimeout(resolve, 1000)
252
+ })
253
+
254
+ // End the resampler stream
255
+ await new Promise<void>((resolve) => {
256
+ resampler.end(() => {
257
+ workerLogger.log(`Resampler ended`)
258
+ resolve()
259
+ })
260
+ // Add timeout
261
+ setTimeout(resolve, 1000)
262
+ })
263
+
264
+ workerLogger.log(`Async cleanup complete`)
265
+ }
266
+
267
+ // Handle messages from main thread
268
+ parentPort.on('message', async (message: WorkerInMessage) => {
269
+ try {
270
+ switch (message.type) {
271
+ case 'init': {
272
+ workerLogger.log(`Initializing with directory:`, message.directory)
273
+
274
+ // Create audio log stream for assistant audio
275
+ audioLogStream = await createAssistantAudioLogStream(
276
+ message.guildId,
277
+ message.channelId,
278
+ )
279
+
280
+ // Start packet sending interval
281
+ startPacketSending()
282
+
283
+ // Get tools for the directory
284
+ const { tools } = await getTools({
285
+ directory: message.directory,
286
+ onMessageCompleted: (params) => {
287
+ parentPort!.postMessage({
288
+ type: 'toolCallCompleted',
289
+ ...params,
290
+ } satisfies WorkerOutMessage)
291
+ },
292
+ })
293
+
294
+ // Start GenAI session
295
+ session = await startGenAiSession({
296
+ tools,
297
+ systemMessage: message.systemMessage,
298
+ geminiApiKey: message.geminiApiKey,
299
+ onAssistantAudioChunk({ data }) {
300
+ // Write to audio log if enabled
301
+ if (audioLogStream && !audioLogStream.destroyed) {
302
+ audioLogStream.write(data, (err) => {
303
+ if (err) {
304
+ workerLogger.error('Error writing to audio log:', err)
305
+ }
306
+ })
307
+ }
308
+
309
+ // Write PCM data to resampler which will output Opus packets
310
+ if (!resampler.destroyed) {
311
+ resampler.write(data, (err) => {
312
+ if (err) {
313
+ workerLogger.error('Error writing to resampler:', err)
314
+ sendError(`Failed to process audio: ${err.message}`)
315
+ }
316
+ })
317
+ }
318
+ },
319
+ onAssistantStartSpeaking() {
320
+ parentPort!.postMessage({
321
+ type: 'assistantStartSpeaking',
322
+ } satisfies WorkerOutMessage)
323
+ },
324
+ onAssistantStopSpeaking() {
325
+ parentPort!.postMessage({
326
+ type: 'assistantStopSpeaking',
327
+ } satisfies WorkerOutMessage)
328
+ },
329
+ onAssistantInterruptSpeaking() {
330
+ parentPort!.postMessage({
331
+ type: 'assistantInterruptSpeaking',
332
+ } satisfies WorkerOutMessage)
333
+ },
334
+ })
335
+
336
+ // Notify main thread we're ready
337
+ parentPort!.postMessage({
338
+ type: 'ready',
339
+ } satisfies WorkerOutMessage)
340
+ break
341
+ }
342
+
343
+ case 'sendRealtimeInput': {
344
+ if (!session) {
345
+ sendError('Session not initialized')
346
+ return
347
+ }
348
+ session.session.sendRealtimeInput({
349
+ audio: message.audio,
350
+ audioStreamEnd: message.audioStreamEnd,
351
+ })
352
+ break
353
+ }
354
+
355
+ case 'sendTextInput': {
356
+ if (!session) {
357
+ sendError('Session not initialized')
358
+ return
359
+ }
360
+ session.session.sendRealtimeInput({
361
+ text: message.text,
362
+ })
363
+ break
364
+ }
365
+
366
+ case 'interrupt': {
367
+ workerLogger.log(`Interrupting playback`)
368
+ // Clear the opus packet queue
369
+ opusPacketQueue.length = 0
370
+ break
371
+ }
372
+
373
+ case 'stop': {
374
+ workerLogger.log(`Stopping worker`)
375
+ await cleanupAsync()
376
+ // process.exit(0)
377
+ break
378
+ }
379
+ }
380
+ } catch (error) {
381
+ workerLogger.error(`Error handling message:`, error)
382
+ sendError(
383
+ error instanceof Error ? error.message : 'Unknown error in worker',
384
+ )
385
+ }
386
+ })
package/src/genai.ts ADDED
@@ -0,0 +1,321 @@
1
+ // Google GenAI Live session manager for real-time voice interactions.
2
+ // Establishes bidirectional audio streaming with Gemini, handles tool calls,
3
+ // and manages the assistant's audio output for Discord voice channels.
4
+
5
+ import {
6
+ GoogleGenAI,
7
+ LiveServerMessage,
8
+ MediaResolution,
9
+ Modality,
10
+ Session,
11
+ } from '@google/genai'
12
+ import type { CallableTool } from '@google/genai'
13
+ import { writeFile } from 'fs'
14
+ import type { AnyTool } from './ai-tool.js'
15
+
16
+ import { createLogger, LogPrefix } from './logger.js'
17
+ import { aiToolToCallableTool } from './ai-tool-to-genai.js'
18
+
19
+ const genaiLogger = createLogger(LogPrefix.GENAI)
20
+
21
+ const audioParts: Buffer[] = []
22
+
23
+ function saveBinaryFile(fileName: string, content: Buffer) {
24
+ writeFile(fileName, content, 'utf8', (err) => {
25
+ if (err) {
26
+ genaiLogger.error(`Error writing file ${fileName}:`, err)
27
+ return
28
+ }
29
+ genaiLogger.log(`Appending stream content to file ${fileName}.`)
30
+ })
31
+ }
32
+
33
+ interface WavConversionOptions {
34
+ numChannels: number
35
+ sampleRate: number
36
+ bitsPerSample: number
37
+ }
38
+
39
+ function convertToWav(rawData: Buffer[], mimeType: string) {
40
+ const options = parseMimeType(mimeType)
41
+ const dataLength = rawData.reduce((a, b) => a + b.length, 0)
42
+ const wavHeader = createWavHeader(dataLength, options)
43
+ const buffer = Buffer.concat(rawData)
44
+
45
+ return Buffer.concat([wavHeader, buffer])
46
+ }
47
+
48
+ function parseMimeType(mimeType: string) {
49
+ const [fileType, ...params] = mimeType.split(';').map((s) => s.trim())
50
+ const [_, format] = fileType?.split('/') || []
51
+
52
+ const options: Partial<WavConversionOptions> = {
53
+ numChannels: 1,
54
+ bitsPerSample: 16,
55
+ }
56
+
57
+ if (format && format.startsWith('L')) {
58
+ const bits = parseInt(format.slice(1), 10)
59
+ if (!isNaN(bits)) {
60
+ options.bitsPerSample = bits
61
+ }
62
+ }
63
+
64
+ for (const param of params) {
65
+ const [key, value] = param.split('=').map((s) => s.trim())
66
+ if (key === 'rate') {
67
+ options.sampleRate = parseInt(value || '', 10)
68
+ }
69
+ }
70
+
71
+ return options as WavConversionOptions
72
+ }
73
+
74
+ function createWavHeader(dataLength: number, options: WavConversionOptions) {
75
+ const { numChannels, sampleRate, bitsPerSample } = options
76
+
77
+ // http://soundfile.sapp.org/doc/WaveFormat
78
+
79
+ const byteRate = (sampleRate * numChannels * bitsPerSample) / 8
80
+ const blockAlign = (numChannels * bitsPerSample) / 8
81
+ const buffer = Buffer.alloc(44)
82
+
83
+ buffer.write('RIFF', 0) // ChunkID
84
+ buffer.writeUInt32LE(36 + dataLength, 4) // ChunkSize
85
+ buffer.write('WAVE', 8) // Format
86
+ buffer.write('fmt ', 12) // Subchunk1ID
87
+ buffer.writeUInt32LE(16, 16) // Subchunk1Size (PCM)
88
+ buffer.writeUInt16LE(1, 20) // AudioFormat (1 = PCM)
89
+ buffer.writeUInt16LE(numChannels, 22) // NumChannels
90
+ buffer.writeUInt32LE(sampleRate, 24) // SampleRate
91
+ buffer.writeUInt32LE(byteRate, 28) // ByteRate
92
+ buffer.writeUInt16LE(blockAlign, 32) // BlockAlign
93
+ buffer.writeUInt16LE(bitsPerSample, 34) // BitsPerSample
94
+ buffer.write('data', 36) // Subchunk2ID
95
+ buffer.writeUInt32LE(dataLength, 40) // Subchunk2Size
96
+
97
+ return buffer
98
+ }
99
+
100
+ function defaultAudioChunkHandler({
101
+ data,
102
+ mimeType,
103
+ }: {
104
+ data: Buffer
105
+ mimeType: string
106
+ }) {
107
+ audioParts.push(data)
108
+ const fileName = 'audio.wav'
109
+ const buffer = convertToWav(audioParts, mimeType)
110
+ saveBinaryFile(fileName, buffer)
111
+ }
112
+
113
+ export async function startGenAiSession({
114
+ onAssistantAudioChunk,
115
+ onAssistantStartSpeaking,
116
+ onAssistantStopSpeaking,
117
+ onAssistantInterruptSpeaking,
118
+ systemMessage,
119
+ tools,
120
+ geminiApiKey,
121
+ }: {
122
+ onAssistantAudioChunk?: (args: { data: Buffer; mimeType: string }) => void
123
+ onAssistantStartSpeaking?: () => void
124
+ onAssistantStopSpeaking?: () => void
125
+ onAssistantInterruptSpeaking?: () => void
126
+ systemMessage?: string
127
+ tools?: Record<string, AnyTool>
128
+ geminiApiKey?: string | null
129
+ } = {}) {
130
+ let session: Session | undefined = undefined
131
+ const callableTools: Array<CallableTool & { name: string }> = []
132
+ let isAssistantSpeaking = false
133
+
134
+ const audioChunkHandler = onAssistantAudioChunk || defaultAudioChunkHandler
135
+
136
+ // Convert AI SDK tools to GenAI CallableTools
137
+ if (tools) {
138
+ for (const [name, tool] of Object.entries(tools)) {
139
+ callableTools.push(aiToolToCallableTool(tool, name))
140
+ }
141
+ }
142
+
143
+ function handleModelTurn(message: LiveServerMessage) {
144
+ if (message.toolCall) {
145
+ genaiLogger.log('Tool call:', message.toolCall)
146
+
147
+ // Handle tool calls
148
+ if (message.toolCall.functionCalls && callableTools.length > 0) {
149
+ for (const tool of callableTools) {
150
+ if (
151
+ !message.toolCall.functionCalls.some((x) => x.name === tool.name)
152
+ ) {
153
+ continue
154
+ }
155
+ tool
156
+ .callTool(message.toolCall.functionCalls)
157
+ .then((parts) => {
158
+ const functionResponses = parts
159
+ .filter((part) => part.functionResponse)
160
+ .map((part) => ({
161
+ response: part.functionResponse!.response as Record<
162
+ string,
163
+ unknown
164
+ >,
165
+ id: part.functionResponse!.id,
166
+ name: part.functionResponse!.name,
167
+ }))
168
+
169
+ if (functionResponses.length > 0 && session) {
170
+ session.sendToolResponse({ functionResponses })
171
+ genaiLogger.log(
172
+ 'client-toolResponse: ' +
173
+ JSON.stringify({ functionResponses }),
174
+ )
175
+ }
176
+ })
177
+ .catch((error) => {
178
+ genaiLogger.error('Error handling tool calls:', error)
179
+ })
180
+ }
181
+ }
182
+ }
183
+ if (message.serverContent?.modelTurn?.parts) {
184
+ for (const part of message.serverContent.modelTurn.parts) {
185
+ if (part?.fileData) {
186
+ genaiLogger.log(`File: ${part?.fileData.fileUri}`)
187
+ }
188
+
189
+ if (part?.inlineData) {
190
+ const inlineData = part.inlineData
191
+ if (
192
+ !inlineData.mimeType ||
193
+ !inlineData.mimeType.startsWith('audio/')
194
+ ) {
195
+ genaiLogger.log(
196
+ 'Skipping non-audio inlineData:',
197
+ inlineData.mimeType,
198
+ )
199
+ continue
200
+ }
201
+
202
+ // Trigger start speaking callback the first time audio is received
203
+ if (!isAssistantSpeaking && onAssistantStartSpeaking) {
204
+ isAssistantSpeaking = true
205
+ onAssistantStartSpeaking()
206
+ }
207
+
208
+ const buffer = Buffer.from(inlineData?.data ?? '', 'base64')
209
+ audioChunkHandler({
210
+ data: buffer,
211
+ mimeType: inlineData.mimeType ?? '',
212
+ })
213
+ }
214
+
215
+ if (part?.text) {
216
+ genaiLogger.log('Text:', part.text)
217
+ }
218
+ }
219
+ }
220
+ // Handle input transcription (user's audio transcription)
221
+ if (message.serverContent?.inputTranscription?.text) {
222
+ genaiLogger.log(
223
+ '[user transcription]',
224
+ message.serverContent.inputTranscription.text,
225
+ )
226
+ }
227
+
228
+ // Handle output transcription (model's audio transcription)
229
+ if (message.serverContent?.outputTranscription?.text) {
230
+ genaiLogger.log(
231
+ '[assistant transcription]',
232
+ message.serverContent.outputTranscription.text,
233
+ )
234
+ }
235
+ if (message.serverContent?.interrupted) {
236
+ genaiLogger.log('Assistant was interrupted')
237
+ if (isAssistantSpeaking && onAssistantInterruptSpeaking) {
238
+ isAssistantSpeaking = false
239
+ onAssistantInterruptSpeaking()
240
+ }
241
+ }
242
+ if (message.serverContent?.turnComplete) {
243
+ genaiLogger.log('Assistant turn complete')
244
+ if (isAssistantSpeaking && onAssistantStopSpeaking) {
245
+ isAssistantSpeaking = false
246
+ onAssistantStopSpeaking()
247
+ }
248
+ }
249
+ }
250
+
251
+ const apiKey = geminiApiKey || process.env.GEMINI_API_KEY
252
+
253
+ if (!apiKey) {
254
+ genaiLogger.error('No Gemini API key provided')
255
+ throw new Error('Gemini API key is required for voice interactions')
256
+ }
257
+
258
+ const ai = new GoogleGenAI({
259
+ apiKey,
260
+ })
261
+
262
+ const model = 'gemini-2.5-flash-native-audio-preview-12-2025'
263
+
264
+ session = await ai.live.connect({
265
+ model,
266
+ callbacks: {
267
+ onopen: function () {
268
+ genaiLogger.debug('Opened')
269
+ },
270
+ onmessage: function (message: LiveServerMessage) {
271
+ // genaiLogger.log(message)
272
+ try {
273
+ handleModelTurn(message)
274
+ } catch (error) {
275
+ genaiLogger.error('Error handling turn:', error)
276
+ }
277
+ },
278
+ onerror: function (e: ErrorEvent) {
279
+ genaiLogger.debug('Error:', e.message)
280
+ },
281
+ onclose: function (e: CloseEvent) {
282
+ genaiLogger.debug('Close:', e.reason)
283
+ },
284
+ },
285
+ config: {
286
+ tools: callableTools,
287
+ responseModalities: [Modality.AUDIO],
288
+ mediaResolution: MediaResolution.MEDIA_RESOLUTION_MEDIUM,
289
+ inputAudioTranscription: {}, // transcribes your input speech
290
+ outputAudioTranscription: {}, // transcribes the model's spoken audio
291
+ systemInstruction: {
292
+ parts: [
293
+ {
294
+ text: systemMessage || '',
295
+ },
296
+ ],
297
+ },
298
+ speechConfig: {
299
+ voiceConfig: {
300
+ prebuiltVoiceConfig: {
301
+ voiceName: 'Charon', // Orus also not bad
302
+ },
303
+ },
304
+ },
305
+ contextWindowCompression: {
306
+ triggerTokens: '25600',
307
+
308
+ slidingWindow: { targetTokens: '12800' },
309
+ },
310
+ },
311
+ })
312
+
313
+ return {
314
+ session,
315
+ stop: () => {
316
+ const currentSession = session
317
+ session = undefined
318
+ currentSession?.close()
319
+ },
320
+ }
321
+ }