@brianli/kimaki 0.4.72-brianli.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (328) hide show
  1. package/bin.js +2 -0
  2. package/dist/ai-tool-to-genai.js +233 -0
  3. package/dist/ai-tool-to-genai.test.js +267 -0
  4. package/dist/ai-tool.js +6 -0
  5. package/dist/bin.js +87 -0
  6. package/dist/bot-token.js +121 -0
  7. package/dist/bot-token.test.js +134 -0
  8. package/dist/channel-management.js +101 -0
  9. package/dist/cli-parsing.test.js +89 -0
  10. package/dist/cli.js +2529 -0
  11. package/dist/commands/abort.js +82 -0
  12. package/dist/commands/action-buttons.js +257 -0
  13. package/dist/commands/add-project.js +114 -0
  14. package/dist/commands/agent.js +291 -0
  15. package/dist/commands/ask-question.js +223 -0
  16. package/dist/commands/compact.js +120 -0
  17. package/dist/commands/context-usage.js +140 -0
  18. package/dist/commands/create-new-project.js +118 -0
  19. package/dist/commands/diff.js +128 -0
  20. package/dist/commands/file-upload.js +275 -0
  21. package/dist/commands/fork.js +217 -0
  22. package/dist/commands/gemini-apikey.js +70 -0
  23. package/dist/commands/login.js +490 -0
  24. package/dist/commands/mention-mode.js +51 -0
  25. package/dist/commands/merge-worktree.js +124 -0
  26. package/dist/commands/model.js +694 -0
  27. package/dist/commands/permissions.js +163 -0
  28. package/dist/commands/queue.js +217 -0
  29. package/dist/commands/remove-project.js +115 -0
  30. package/dist/commands/restart-opencode-server.js +116 -0
  31. package/dist/commands/resume.js +159 -0
  32. package/dist/commands/run-command.js +79 -0
  33. package/dist/commands/session-id.js +78 -0
  34. package/dist/commands/session.js +192 -0
  35. package/dist/commands/share.js +80 -0
  36. package/dist/commands/types.js +2 -0
  37. package/dist/commands/undo-redo.js +159 -0
  38. package/dist/commands/unset-model.js +152 -0
  39. package/dist/commands/upgrade.js +42 -0
  40. package/dist/commands/user-command.js +148 -0
  41. package/dist/commands/verbosity.js +60 -0
  42. package/dist/commands/worktree-settings.js +50 -0
  43. package/dist/commands/worktree.js +299 -0
  44. package/dist/condense-memory.js +33 -0
  45. package/dist/config.js +110 -0
  46. package/dist/database.js +1050 -0
  47. package/dist/db.js +159 -0
  48. package/dist/db.test.js +49 -0
  49. package/dist/discord-api.js +28 -0
  50. package/dist/discord-auth.js +231 -0
  51. package/dist/discord-auth.test.js +80 -0
  52. package/dist/discord-bot.js +997 -0
  53. package/dist/discord-utils.js +560 -0
  54. package/dist/discord-utils.test.js +115 -0
  55. package/dist/errors.js +167 -0
  56. package/dist/escape-backticks.test.js +429 -0
  57. package/dist/format-tables.js +122 -0
  58. package/dist/format-tables.test.js +199 -0
  59. package/dist/forum-sync/config.js +79 -0
  60. package/dist/forum-sync/discord-operations.js +154 -0
  61. package/dist/forum-sync/index.js +5 -0
  62. package/dist/forum-sync/markdown.js +117 -0
  63. package/dist/forum-sync/sync-to-discord.js +417 -0
  64. package/dist/forum-sync/sync-to-files.js +190 -0
  65. package/dist/forum-sync/types.js +53 -0
  66. package/dist/forum-sync/watchers.js +307 -0
  67. package/dist/gateway-consumer.js +232 -0
  68. package/dist/gateway-consumer.test.js +18 -0
  69. package/dist/genai-worker-wrapper.js +111 -0
  70. package/dist/genai-worker.js +311 -0
  71. package/dist/genai.js +232 -0
  72. package/dist/generated/browser.js +17 -0
  73. package/dist/generated/client.js +35 -0
  74. package/dist/generated/commonInputTypes.js +10 -0
  75. package/dist/generated/enums.js +30 -0
  76. package/dist/generated/internal/class.js +41 -0
  77. package/dist/generated/internal/prismaNamespace.js +239 -0
  78. package/dist/generated/internal/prismaNamespaceBrowser.js +209 -0
  79. package/dist/generated/models/bot_api_keys.js +1 -0
  80. package/dist/generated/models/bot_tokens.js +1 -0
  81. package/dist/generated/models/channel_agents.js +1 -0
  82. package/dist/generated/models/channel_directories.js +1 -0
  83. package/dist/generated/models/channel_mention_mode.js +1 -0
  84. package/dist/generated/models/channel_models.js +1 -0
  85. package/dist/generated/models/channel_verbosity.js +1 -0
  86. package/dist/generated/models/channel_worktrees.js +1 -0
  87. package/dist/generated/models/forum_sync_configs.js +1 -0
  88. package/dist/generated/models/global_models.js +1 -0
  89. package/dist/generated/models/ipc_requests.js +1 -0
  90. package/dist/generated/models/part_messages.js +1 -0
  91. package/dist/generated/models/scheduled_tasks.js +1 -0
  92. package/dist/generated/models/session_agents.js +1 -0
  93. package/dist/generated/models/session_models.js +1 -0
  94. package/dist/generated/models/session_start_sources.js +1 -0
  95. package/dist/generated/models/thread_sessions.js +1 -0
  96. package/dist/generated/models/thread_worktrees.js +1 -0
  97. package/dist/generated/models.js +1 -0
  98. package/dist/heap-monitor.js +95 -0
  99. package/dist/hrana-server.js +416 -0
  100. package/dist/hrana-server.test.js +368 -0
  101. package/dist/image-utils.js +112 -0
  102. package/dist/interaction-handler.js +327 -0
  103. package/dist/ipc-polling.js +251 -0
  104. package/dist/kimaki-digital-twin.e2e.test.js +165 -0
  105. package/dist/limit-heading-depth.js +25 -0
  106. package/dist/limit-heading-depth.test.js +105 -0
  107. package/dist/logger.js +160 -0
  108. package/dist/markdown.js +342 -0
  109. package/dist/markdown.test.js +253 -0
  110. package/dist/message-formatting.js +433 -0
  111. package/dist/message-formatting.test.js +73 -0
  112. package/dist/openai-realtime.js +228 -0
  113. package/dist/opencode-plugin-loading.e2e.test.js +91 -0
  114. package/dist/opencode-plugin.js +536 -0
  115. package/dist/opencode-plugin.test.js +98 -0
  116. package/dist/opencode.js +409 -0
  117. package/dist/privacy-sanitizer.js +105 -0
  118. package/dist/runtime-mode.js +51 -0
  119. package/dist/runtime-mode.test.js +115 -0
  120. package/dist/sentry.js +127 -0
  121. package/dist/session-handler/state.js +151 -0
  122. package/dist/session-handler.js +1874 -0
  123. package/dist/session-search.js +100 -0
  124. package/dist/session-search.test.js +40 -0
  125. package/dist/startup-service.js +153 -0
  126. package/dist/system-message.js +499 -0
  127. package/dist/task-runner.js +282 -0
  128. package/dist/task-schedule.js +191 -0
  129. package/dist/task-schedule.test.js +71 -0
  130. package/dist/thinking-utils.js +35 -0
  131. package/dist/thread-message-queue.e2e.test.js +781 -0
  132. package/dist/tools.js +359 -0
  133. package/dist/unnest-code-blocks.js +136 -0
  134. package/dist/unnest-code-blocks.test.js +641 -0
  135. package/dist/upgrade.js +114 -0
  136. package/dist/utils.js +109 -0
  137. package/dist/voice-handler.js +606 -0
  138. package/dist/voice.js +304 -0
  139. package/dist/voice.test.js +187 -0
  140. package/dist/wait-session.js +94 -0
  141. package/dist/worker-types.js +4 -0
  142. package/dist/worktree-utils.js +727 -0
  143. package/dist/xml.js +92 -0
  144. package/dist/xml.test.js +32 -0
  145. package/package.json +82 -0
  146. package/schema.prisma +246 -0
  147. package/skills/batch/SKILL.md +87 -0
  148. package/skills/critique/SKILL.md +129 -0
  149. package/skills/errore/SKILL.md +589 -0
  150. package/skills/goke/.prettierrc +5 -0
  151. package/skills/goke/CHANGELOG.md +40 -0
  152. package/skills/goke/LICENSE +21 -0
  153. package/skills/goke/README.md +666 -0
  154. package/skills/goke/SKILL.md +458 -0
  155. package/skills/goke/package.json +43 -0
  156. package/skills/goke/src/__test__/coerce.test.ts +411 -0
  157. package/skills/goke/src/__test__/index.test.ts +1798 -0
  158. package/skills/goke/src/__test__/types.test-d.ts +111 -0
  159. package/skills/goke/src/coerce.ts +547 -0
  160. package/skills/goke/src/goke.ts +1362 -0
  161. package/skills/goke/src/index.ts +16 -0
  162. package/skills/goke/src/mri.ts +164 -0
  163. package/skills/goke/tsconfig.json +15 -0
  164. package/skills/jitter/EDITOR.md +219 -0
  165. package/skills/jitter/EXPORT-INTERNALS.md +309 -0
  166. package/skills/jitter/SKILL.md +158 -0
  167. package/skills/jitter/jitter-clipboard.json +1042 -0
  168. package/skills/jitter/package.json +14 -0
  169. package/skills/jitter/tsconfig.json +15 -0
  170. package/skills/jitter/utils/actions.ts +212 -0
  171. package/skills/jitter/utils/export.ts +114 -0
  172. package/skills/jitter/utils/index.ts +141 -0
  173. package/skills/jitter/utils/snapshot.ts +154 -0
  174. package/skills/jitter/utils/traverse.ts +246 -0
  175. package/skills/jitter/utils/types.ts +279 -0
  176. package/skills/jitter/utils/wait.ts +133 -0
  177. package/skills/playwriter/SKILL.md +31 -0
  178. package/skills/security-review/SKILL.md +208 -0
  179. package/skills/simplify/SKILL.md +58 -0
  180. package/skills/termcast/SKILL.md +945 -0
  181. package/skills/tuistory/SKILL.md +250 -0
  182. package/skills/zustand-centralized-state/SKILL.md +582 -0
  183. package/src/__snapshots__/compact-session-context-no-system.md +35 -0
  184. package/src/__snapshots__/compact-session-context.md +41 -0
  185. package/src/__snapshots__/first-session-no-info.md +17 -0
  186. package/src/__snapshots__/first-session-with-info.md +23 -0
  187. package/src/__snapshots__/session-1.md +17 -0
  188. package/src/__snapshots__/session-2.md +5871 -0
  189. package/src/__snapshots__/session-3.md +17 -0
  190. package/src/__snapshots__/session-with-tools.md +5871 -0
  191. package/src/ai-tool-to-genai.test.ts +296 -0
  192. package/src/ai-tool-to-genai.ts +282 -0
  193. package/src/ai-tool.ts +39 -0
  194. package/src/bin.ts +108 -0
  195. package/src/bot-token.test.ts +171 -0
  196. package/src/bot-token.ts +159 -0
  197. package/src/channel-management.ts +172 -0
  198. package/src/cli-parsing.test.ts +132 -0
  199. package/src/cli.ts +3605 -0
  200. package/src/commands/abort.ts +112 -0
  201. package/src/commands/action-buttons.ts +376 -0
  202. package/src/commands/add-project.ts +152 -0
  203. package/src/commands/agent.ts +404 -0
  204. package/src/commands/ask-question.ts +330 -0
  205. package/src/commands/compact.ts +157 -0
  206. package/src/commands/context-usage.ts +199 -0
  207. package/src/commands/create-new-project.ts +179 -0
  208. package/src/commands/diff.ts +165 -0
  209. package/src/commands/file-upload.ts +389 -0
  210. package/src/commands/fork.ts +320 -0
  211. package/src/commands/gemini-apikey.ts +104 -0
  212. package/src/commands/login.ts +634 -0
  213. package/src/commands/mention-mode.ts +77 -0
  214. package/src/commands/merge-worktree.ts +177 -0
  215. package/src/commands/model.ts +961 -0
  216. package/src/commands/permissions.ts +261 -0
  217. package/src/commands/queue.ts +296 -0
  218. package/src/commands/remove-project.ts +155 -0
  219. package/src/commands/restart-opencode-server.ts +162 -0
  220. package/src/commands/resume.ts +242 -0
  221. package/src/commands/run-command.ts +123 -0
  222. package/src/commands/session-id.ts +109 -0
  223. package/src/commands/session.ts +250 -0
  224. package/src/commands/share.ts +106 -0
  225. package/src/commands/types.ts +25 -0
  226. package/src/commands/undo-redo.ts +221 -0
  227. package/src/commands/unset-model.ts +189 -0
  228. package/src/commands/upgrade.ts +52 -0
  229. package/src/commands/user-command.ts +193 -0
  230. package/src/commands/verbosity.ts +88 -0
  231. package/src/commands/worktree-settings.ts +79 -0
  232. package/src/commands/worktree.ts +431 -0
  233. package/src/condense-memory.ts +36 -0
  234. package/src/config.ts +148 -0
  235. package/src/database.ts +1530 -0
  236. package/src/db.test.ts +60 -0
  237. package/src/db.ts +190 -0
  238. package/src/discord-api.ts +35 -0
  239. package/src/discord-bot.ts +1316 -0
  240. package/src/discord-utils.test.ts +132 -0
  241. package/src/discord-utils.ts +767 -0
  242. package/src/errors.ts +213 -0
  243. package/src/escape-backticks.test.ts +469 -0
  244. package/src/format-tables.test.ts +223 -0
  245. package/src/format-tables.ts +145 -0
  246. package/src/forum-sync/config.ts +92 -0
  247. package/src/forum-sync/discord-operations.ts +241 -0
  248. package/src/forum-sync/index.ts +9 -0
  249. package/src/forum-sync/markdown.ts +176 -0
  250. package/src/forum-sync/sync-to-discord.ts +595 -0
  251. package/src/forum-sync/sync-to-files.ts +294 -0
  252. package/src/forum-sync/types.ts +175 -0
  253. package/src/forum-sync/watchers.ts +454 -0
  254. package/src/genai-worker-wrapper.ts +164 -0
  255. package/src/genai-worker.ts +386 -0
  256. package/src/genai.ts +321 -0
  257. package/src/generated/browser.ts +109 -0
  258. package/src/generated/client.ts +131 -0
  259. package/src/generated/commonInputTypes.ts +512 -0
  260. package/src/generated/enums.ts +46 -0
  261. package/src/generated/internal/class.ts +362 -0
  262. package/src/generated/internal/prismaNamespace.ts +2251 -0
  263. package/src/generated/internal/prismaNamespaceBrowser.ts +308 -0
  264. package/src/generated/models/bot_api_keys.ts +1288 -0
  265. package/src/generated/models/bot_tokens.ts +1577 -0
  266. package/src/generated/models/channel_agents.ts +1256 -0
  267. package/src/generated/models/channel_directories.ts +2104 -0
  268. package/src/generated/models/channel_mention_mode.ts +1300 -0
  269. package/src/generated/models/channel_models.ts +1288 -0
  270. package/src/generated/models/channel_verbosity.ts +1224 -0
  271. package/src/generated/models/channel_worktrees.ts +1308 -0
  272. package/src/generated/models/forum_sync_configs.ts +1452 -0
  273. package/src/generated/models/global_models.ts +1288 -0
  274. package/src/generated/models/ipc_requests.ts +1485 -0
  275. package/src/generated/models/part_messages.ts +1302 -0
  276. package/src/generated/models/scheduled_tasks.ts +2320 -0
  277. package/src/generated/models/session_agents.ts +1086 -0
  278. package/src/generated/models/session_models.ts +1114 -0
  279. package/src/generated/models/session_start_sources.ts +1408 -0
  280. package/src/generated/models/thread_sessions.ts +1599 -0
  281. package/src/generated/models/thread_worktrees.ts +1352 -0
  282. package/src/generated/models.ts +29 -0
  283. package/src/heap-monitor.ts +121 -0
  284. package/src/hrana-server.test.ts +428 -0
  285. package/src/hrana-server.ts +547 -0
  286. package/src/image-utils.ts +149 -0
  287. package/src/interaction-handler.ts +461 -0
  288. package/src/ipc-polling.ts +325 -0
  289. package/src/kimaki-digital-twin.e2e.test.ts +201 -0
  290. package/src/limit-heading-depth.test.ts +116 -0
  291. package/src/limit-heading-depth.ts +26 -0
  292. package/src/logger.ts +203 -0
  293. package/src/markdown.test.ts +360 -0
  294. package/src/markdown.ts +410 -0
  295. package/src/message-formatting.test.ts +81 -0
  296. package/src/message-formatting.ts +549 -0
  297. package/src/openai-realtime.ts +362 -0
  298. package/src/opencode-plugin-loading.e2e.test.ts +112 -0
  299. package/src/opencode-plugin.test.ts +108 -0
  300. package/src/opencode-plugin.ts +652 -0
  301. package/src/opencode.ts +554 -0
  302. package/src/privacy-sanitizer.ts +142 -0
  303. package/src/schema.sql +158 -0
  304. package/src/sentry.ts +137 -0
  305. package/src/session-handler/state.ts +232 -0
  306. package/src/session-handler.ts +2668 -0
  307. package/src/session-search.test.ts +50 -0
  308. package/src/session-search.ts +148 -0
  309. package/src/startup-service.ts +200 -0
  310. package/src/system-message.ts +568 -0
  311. package/src/task-runner.ts +425 -0
  312. package/src/task-schedule.test.ts +84 -0
  313. package/src/task-schedule.ts +287 -0
  314. package/src/thinking-utils.ts +61 -0
  315. package/src/thread-message-queue.e2e.test.ts +997 -0
  316. package/src/tools.ts +432 -0
  317. package/src/unnest-code-blocks.test.ts +679 -0
  318. package/src/unnest-code-blocks.ts +168 -0
  319. package/src/upgrade.ts +127 -0
  320. package/src/utils.ts +145 -0
  321. package/src/voice-handler.ts +852 -0
  322. package/src/voice.test.ts +219 -0
  323. package/src/voice.ts +444 -0
  324. package/src/wait-session.ts +147 -0
  325. package/src/worker-types.ts +64 -0
  326. package/src/worktree-utils.ts +988 -0
  327. package/src/xml.test.ts +38 -0
  328. package/src/xml.ts +121 -0
package/dist/voice.js ADDED
@@ -0,0 +1,304 @@
1
+ // Audio transcription service using AI SDK providers.
2
+ // Both providers use LanguageModelV3 (chat model) with audio file parts + tool calling,
3
+ // so we can pass full context (file tree, session info) for better word recognition.
4
+ // - OpenAI: gpt-4o-audio-preview via .chat() (Chat Completions API). MUST use .chat()
5
+ // because the default Responses API doesn't support audio file parts. The Chat
6
+ // Completions handler converts audio/mpeg file parts to input_audio format.
7
+ // - Gemini: gemini-2.5-flash natively accepts audio file parts in chat.
8
+ // Calls model.doGenerate() directly without the `ai` npm package.
9
+ // Uses errore for type-safe error handling.
10
+ import { createGoogleGenerativeAI } from '@ai-sdk/google';
11
+ import { createOpenAI } from '@ai-sdk/openai';
12
+ import { Readable } from 'node:stream';
13
+ import prism from 'prism-media';
14
+ import * as errore from 'errore';
15
+ import { createLogger, LogPrefix } from './logger.js';
16
+ import { ApiKeyMissingError, InvalidAudioFormatError, TranscriptionError, EmptyTranscriptionError, NoResponseContentError, NoToolResponseError, } from './errors.js';
17
+ const voiceLogger = createLogger(LogPrefix.VOICE);
18
+ // OpenAI input_audio only supports wav and mp3. Other formats (OGG Opus, etc)
19
+ // must be converted before sending.
20
+ const OPENAI_SUPPORTED_AUDIO_TYPES = new Set([
21
+ 'audio/mpeg',
22
+ 'audio/mp3',
23
+ 'audio/wav',
24
+ ]);
25
+ /**
26
+ * Convert OGG Opus audio to WAV using prism-media (already installed for Discord voice).
27
+ * Pipeline: OGG buffer → OggDemuxer → Opus Decoder → PCM → WAV (with header).
28
+ * No ffmpeg needed — uses @discordjs/opus native bindings.
29
+ */
30
+ export function convertOggToWav(input) {
31
+ return new Promise((resolve) => {
32
+ const pcmChunks = [];
33
+ const demuxer = new prism.opus.OggDemuxer();
34
+ const decoder = new prism.opus.Decoder({
35
+ rate: 48000,
36
+ channels: 1,
37
+ frameSize: 960,
38
+ });
39
+ decoder.on('data', (chunk) => {
40
+ pcmChunks.push(chunk);
41
+ });
42
+ decoder.on('end', () => {
43
+ const pcmData = Buffer.concat(pcmChunks);
44
+ const wavHeader = createWavHeader({
45
+ dataLength: pcmData.length,
46
+ sampleRate: 48000,
47
+ numChannels: 1,
48
+ bitsPerSample: 16,
49
+ });
50
+ resolve(Buffer.concat([wavHeader, pcmData]));
51
+ });
52
+ decoder.on('error', (err) => {
53
+ resolve(new TranscriptionError({
54
+ reason: `Opus decode failed: ${err.message}`,
55
+ cause: err,
56
+ }));
57
+ });
58
+ demuxer.on('error', (err) => {
59
+ resolve(new TranscriptionError({
60
+ reason: `OGG demux failed: ${err.message}`,
61
+ cause: err,
62
+ }));
63
+ });
64
+ Readable.from(input).pipe(demuxer).pipe(decoder);
65
+ });
66
+ }
67
+ function createWavHeader({ dataLength, sampleRate, numChannels, bitsPerSample, }) {
68
+ const byteRate = (sampleRate * numChannels * bitsPerSample) / 8;
69
+ const blockAlign = (numChannels * bitsPerSample) / 8;
70
+ const buffer = Buffer.alloc(44);
71
+ buffer.write('RIFF', 0);
72
+ buffer.writeUInt32LE(36 + dataLength, 4);
73
+ buffer.write('WAVE', 8);
74
+ buffer.write('fmt ', 12);
75
+ buffer.writeUInt32LE(16, 16);
76
+ buffer.writeUInt16LE(1, 20);
77
+ buffer.writeUInt16LE(numChannels, 22);
78
+ buffer.writeUInt32LE(sampleRate, 24);
79
+ buffer.writeUInt32LE(byteRate, 28);
80
+ buffer.writeUInt16LE(blockAlign, 32);
81
+ buffer.writeUInt16LE(bitsPerSample, 34);
82
+ buffer.write('data', 36);
83
+ buffer.writeUInt32LE(dataLength, 40);
84
+ return buffer;
85
+ }
86
+ const transcriptionTool = {
87
+ type: 'function',
88
+ name: 'transcriptionResult',
89
+ description: 'MANDATORY: You MUST call this tool to complete the task. This is the ONLY way to return results - text responses are ignored. Call this with your transcription, even if imperfect. An imperfect transcription is better than none.',
90
+ inputSchema: {
91
+ type: 'object',
92
+ properties: {
93
+ transcription: {
94
+ type: 'string',
95
+ description: 'The final transcription of the audio. MUST be non-empty. If audio is unclear, transcribe your best interpretation. If silent, use "[inaudible audio]".',
96
+ },
97
+ queueMessage: {
98
+ type: 'boolean',
99
+ description: 'Set to true ONLY if the user explicitly says "queue this message", "queue this", or similar phrasing indicating they want this message queued instead of sent immediately. If not mentioned, omit or set to false.',
100
+ },
101
+ },
102
+ required: ['transcription'],
103
+ },
104
+ };
105
+ /**
106
+ * Extract transcription result from doGenerate content array.
107
+ * Looks for a tool-call named 'transcriptionResult', falls back to text content.
108
+ * Returns structured result with transcription text and queueMessage flag.
109
+ */
110
+ export function extractTranscription(content) {
111
+ const toolCall = content.find((c) => c.type === 'tool-call' && c.toolName === 'transcriptionResult');
112
+ if (toolCall) {
113
+ // toolCall.input is a JSON string in LanguageModelV3
114
+ const args = (() => {
115
+ if (typeof toolCall.input === 'string') {
116
+ return JSON.parse(toolCall.input);
117
+ }
118
+ return {};
119
+ })();
120
+ const transcription = (typeof args.transcription === 'string' ? args.transcription : '').trim();
121
+ const queueMessage = args.queueMessage === true;
122
+ voiceLogger.log(`Transcription result received: "${transcription.slice(0, 100)}..."${queueMessage ? ' [QUEUE]' : ''}`);
123
+ if (!transcription) {
124
+ return new EmptyTranscriptionError();
125
+ }
126
+ return { transcription, queueMessage };
127
+ }
128
+ // Fall back to text content if no tool call
129
+ const textPart = content.find((c) => c.type === 'text');
130
+ if (textPart && textPart.type === 'text' && textPart.text.trim()) {
131
+ voiceLogger.log(`No tool call but got text: "${textPart.text.trim().slice(0, 100)}..."`);
132
+ return { transcription: textPart.text.trim(), queueMessage: false };
133
+ }
134
+ if (content.length === 0) {
135
+ return new NoResponseContentError();
136
+ }
137
+ return new TranscriptionError({
138
+ reason: 'Model did not produce a transcription',
139
+ });
140
+ }
141
+ async function runTranscriptionOnce({ model, prompt, audioBase64, mediaType, temperature, }) {
142
+ const options = {
143
+ prompt: [
144
+ {
145
+ role: 'user',
146
+ content: [
147
+ { type: 'text', text: prompt },
148
+ {
149
+ type: 'file',
150
+ data: audioBase64,
151
+ mediaType,
152
+ },
153
+ ],
154
+ },
155
+ ],
156
+ temperature,
157
+ maxOutputTokens: 2048,
158
+ tools: [transcriptionTool],
159
+ toolChoice: { type: 'tool', toolName: 'transcriptionResult' },
160
+ providerOptions: {
161
+ google: {
162
+ thinkingConfig: { thinkingBudget: 1024 },
163
+ },
164
+ },
165
+ };
166
+ // doGenerate returns PromiseLike, wrap in Promise.resolve for errore compatibility
167
+ const response = await errore.tryAsync({
168
+ try: () => Promise.resolve(model.doGenerate(options)),
169
+ catch: (e) => new TranscriptionError({
170
+ reason: `API call failed: ${String(e)}`,
171
+ cause: e,
172
+ }),
173
+ });
174
+ if (response instanceof TranscriptionError) {
175
+ return response;
176
+ }
177
+ return extractTranscription(response.content);
178
+ }
179
+ /**
180
+ * Create a LanguageModelV3 for transcription.
181
+ * Both providers use chat models that accept audio file parts, so we get full
182
+ * context (prompt, session info, tool calling) for better word recognition.
183
+ *
184
+ * OpenAI: must use .chat() to get the Chat Completions API model, because the
185
+ * default callable (Responses API) doesn't support audio file parts.
186
+ * Gemini: language models natively accept audio in chat.
187
+ */
188
+ export function createTranscriptionModel({ apiKey, provider, }) {
189
+ const resolvedProvider = provider || (apiKey.startsWith('sk-') ? 'openai' : 'gemini');
190
+ if (resolvedProvider === 'openai') {
191
+ const openai = createOpenAI({ apiKey });
192
+ return openai.chat('gpt-4o-audio-preview');
193
+ }
194
+ const google = createGoogleGenerativeAI({ apiKey });
195
+ return google('gemini-2.5-flash');
196
+ }
197
+ export async function transcribeAudio({ audio, prompt, language, temperature, apiKey: apiKeyParam, model, provider, mediaType: mediaTypeParam, currentSessionContext, lastSessionContext, }) {
198
+ const apiKey = apiKeyParam || process.env.OPENAI_API_KEY || process.env.GEMINI_API_KEY;
199
+ if (!model && !apiKey) {
200
+ return Promise.resolve(new ApiKeyMissingError({ service: 'OpenAI or Gemini' }));
201
+ }
202
+ const resolvedProvider = (() => {
203
+ if (provider) {
204
+ return provider;
205
+ }
206
+ if (apiKey) {
207
+ return apiKey.startsWith('sk-') ? 'openai' : 'gemini';
208
+ }
209
+ return 'gemini';
210
+ })();
211
+ const languageModel = model || createTranscriptionModel({ apiKey: apiKey, provider: resolvedProvider });
212
+ // Convert audio to Buffer for potential format conversion
213
+ const audioBuffer = (() => {
214
+ if (typeof audio === 'string') {
215
+ return Buffer.from(audio, 'base64');
216
+ }
217
+ if (audio instanceof Buffer) {
218
+ return audio;
219
+ }
220
+ if (audio instanceof ArrayBuffer) {
221
+ return Buffer.from(new Uint8Array(audio));
222
+ }
223
+ return Buffer.from(audio);
224
+ })();
225
+ if (audioBuffer.length === 0) {
226
+ return new InvalidAudioFormatError();
227
+ }
228
+ let mediaType = mediaTypeParam || 'audio/mpeg';
229
+ let finalAudioBase64 = audioBuffer.toString('base64');
230
+ // OpenAI input_audio only supports mp3/wav. Convert OGG Opus (Discord voice) to WAV.
231
+ if (resolvedProvider === 'openai' && !OPENAI_SUPPORTED_AUDIO_TYPES.has(mediaType)) {
232
+ voiceLogger.log(`Converting ${mediaType} to WAV for OpenAI compatibility`);
233
+ const converted = await convertOggToWav(audioBuffer);
234
+ if (converted instanceof Error) {
235
+ return converted;
236
+ }
237
+ finalAudioBase64 = converted.toString('base64');
238
+ mediaType = 'audio/wav';
239
+ }
240
+ const languageHint = language ? `The audio is in ${language}.\n\n` : '';
241
+ // build session context section
242
+ const sessionContextParts = [];
243
+ if (lastSessionContext) {
244
+ sessionContextParts.push(`<last_session>
245
+ ${lastSessionContext}
246
+ </last_session>`);
247
+ }
248
+ if (currentSessionContext) {
249
+ sessionContextParts.push(`<current_session>
250
+ ${currentSessionContext}
251
+ </current_session>`);
252
+ }
253
+ const sessionContextSection = sessionContextParts.length > 0
254
+ ? `\n<session_context>
255
+ ${sessionContextParts.join('\n\n')}
256
+ </session_context>`
257
+ : '';
258
+ const transcriptionPrompt = `${languageHint}Transcribe this audio for a coding agent (like Claude Code or OpenCode).
259
+
260
+ CRITICAL REQUIREMENT: You MUST call the "transcriptionResult" tool to complete this task.
261
+ - The transcriptionResult tool is the ONLY way to return results
262
+ - Text responses are completely ignored - only tool calls work
263
+ - You MUST call transcriptionResult even if you run out of tool calls
264
+ - Always call transcriptionResult with your best approximation of what was said
265
+ - DO NOT end without calling transcriptionResult
266
+
267
+ This is a software development environment. The speaker is giving instructions to an AI coding assistant. Expect:
268
+ - File paths, function names, CLI commands, package names, API endpoints
269
+
270
+ RULES:
271
+ - NEVER change the meaning or intent of the user's message. Your job is ONLY to transcribe, not to respond or answer.
272
+ - If the user asks a question, keep it as a question. Do NOT answer it. Do NOT rephrase it as a statement.
273
+ - Only fix grammar, punctuation, and markdown formatting. Preserve the original content faithfully.
274
+ - If audio is unclear, transcribe your best interpretation, even with strong accents. Always provide an approximation.
275
+ - If audio seems silent/empty, call transcriptionResult with "[inaudible audio]"
276
+ - The session context below is ONLY for understanding technical terms, file names, and function names. It may contain previous transcriptions — NEVER copy or reuse them. Always transcribe fresh from the current audio.
277
+
278
+ QUEUE DETECTION:
279
+ - If the user says "queue this message", "queue this", "add this to the queue", or similar phrasing indicating they want the message queued instead of sent immediately, set queueMessage to true.
280
+ - Remove the queue instruction from the transcription text itself — only include the actual message content.
281
+ - Example: "Queue this message. Fix the login bug in auth.ts" → transcription: "Fix the login bug in auth.ts", queueMessage: true
282
+ - If removing the queue phrase would leave empty content (user only said "queue this" with nothing else), keep the full spoken text as the transcription — never return an empty transcription.
283
+ - If no queue intent is detected, omit queueMessage or set it to false.
284
+
285
+ Common corrections (apply without tool calls):
286
+ - "reacked" → "React", "jason" → "JSON", "get hub" → "GitHub", "no JS" → "Node.js", "dacker" → "Docker"
287
+
288
+ Project file structure:
289
+ <file_tree>
290
+ ${prompt}
291
+ </file_tree>
292
+ ${sessionContextSection}
293
+
294
+ REMEMBER: Call "transcriptionResult" tool with your transcription. This is mandatory.
295
+
296
+ Note: "critique" is a CLI tool for showing diffs in the browser.`;
297
+ return runTranscriptionOnce({
298
+ model: languageModel,
299
+ prompt: transcriptionPrompt,
300
+ audioBase64: finalAudioBase64,
301
+ mediaType,
302
+ temperature: temperature ?? 0.3,
303
+ });
304
+ }
@@ -0,0 +1,187 @@
1
+ // Tests for voice transcription using AI SDK provider (LanguageModelV3).
2
+ // Uses the example audio files at scripts/example-audio.{mp3,ogg}.
3
+ import { describe, test, expect } from 'vitest';
4
+ import fs from 'node:fs';
5
+ import path from 'node:path';
6
+ import { transcribeAudio, convertOggToWav } from './voice.js';
7
+ import { extractTranscription } from './voice.js';
8
+ describe('extractTranscription', () => {
9
+ test('extracts transcription from tool call', () => {
10
+ const result = extractTranscription([
11
+ {
12
+ type: 'tool-call',
13
+ toolCallId: 'call_1',
14
+ toolName: 'transcriptionResult',
15
+ input: JSON.stringify({ transcription: 'hello world' }),
16
+ },
17
+ ]);
18
+ expect(result).toMatchInlineSnapshot(`
19
+ {
20
+ "queueMessage": false,
21
+ "transcription": "hello world",
22
+ }
23
+ `);
24
+ });
25
+ test('extracts queueMessage: true from tool call', () => {
26
+ const result = extractTranscription([
27
+ {
28
+ type: 'tool-call',
29
+ toolCallId: 'call_1',
30
+ toolName: 'transcriptionResult',
31
+ input: JSON.stringify({
32
+ transcription: 'Fix the login bug in auth.ts',
33
+ queueMessage: true,
34
+ }),
35
+ },
36
+ ]);
37
+ expect(result).toMatchInlineSnapshot(`
38
+ {
39
+ "queueMessage": true,
40
+ "transcription": "Fix the login bug in auth.ts",
41
+ }
42
+ `);
43
+ });
44
+ test('queueMessage defaults to false when omitted', () => {
45
+ const result = extractTranscription([
46
+ {
47
+ type: 'tool-call',
48
+ toolCallId: 'call_1',
49
+ toolName: 'transcriptionResult',
50
+ input: JSON.stringify({ transcription: 'regular message' }),
51
+ },
52
+ ]);
53
+ expect(result).not.toBeInstanceOf(Error);
54
+ expect(result.queueMessage).toBe(false);
55
+ });
56
+ test('falls back to text when no tool call', () => {
57
+ const result = extractTranscription([
58
+ {
59
+ type: 'text',
60
+ text: 'fallback text response',
61
+ },
62
+ ]);
63
+ expect(result).toMatchInlineSnapshot(`
64
+ {
65
+ "queueMessage": false,
66
+ "transcription": "fallback text response",
67
+ }
68
+ `);
69
+ });
70
+ test('returns NoResponseContentError for empty content', () => {
71
+ const result = extractTranscription([]);
72
+ expect(result).toBeInstanceOf(Error);
73
+ expect(result.message).toMatchInlineSnapshot(`"No response content from model"`);
74
+ });
75
+ test('returns EmptyTranscriptionError for empty transcription string', () => {
76
+ const result = extractTranscription([
77
+ {
78
+ type: 'tool-call',
79
+ toolCallId: 'call_1',
80
+ toolName: 'transcriptionResult',
81
+ input: JSON.stringify({ transcription: ' ' }),
82
+ },
83
+ ]);
84
+ expect(result).toBeInstanceOf(Error);
85
+ expect(result.message).toMatchInlineSnapshot(`"Model returned empty transcription"`);
86
+ });
87
+ test('returns TranscriptionError when content has no tool call or text', () => {
88
+ const result = extractTranscription([
89
+ {
90
+ type: 'reasoning',
91
+ text: 'thinking about it',
92
+ },
93
+ ]);
94
+ expect(result).toBeInstanceOf(Error);
95
+ expect(result.message).toMatchInlineSnapshot(`"Transcription failed: Model did not produce a transcription"`);
96
+ });
97
+ });
98
+ describe('transcribeAudio with real API', () => {
99
+ const audioPath = path.join(import.meta.dirname, '..', 'scripts', 'example-audio.mp3');
100
+ test('transcribes with Gemini', { timeout: 30_000 }, async () => {
101
+ const apiKey = process.env.GEMINI_API_KEY;
102
+ if (!apiKey) {
103
+ console.log('Skipping: GEMINI_API_KEY not set');
104
+ return;
105
+ }
106
+ if (!fs.existsSync(audioPath)) {
107
+ console.log('Skipping: example-audio.mp3 not found');
108
+ return;
109
+ }
110
+ const audio = fs.readFileSync(audioPath);
111
+ const result = await transcribeAudio({
112
+ audio,
113
+ prompt: 'test project',
114
+ apiKey,
115
+ provider: 'gemini',
116
+ });
117
+ expect(result).not.toBeInstanceOf(Error);
118
+ const { transcription } = result;
119
+ expect(transcription.length).toBeGreaterThan(0);
120
+ console.log('Gemini transcription:', result);
121
+ });
122
+ test('transcribes with OpenAI', { timeout: 30_000 }, async () => {
123
+ const apiKey = process.env.OPENAI_API_KEY;
124
+ if (!apiKey) {
125
+ console.log('Skipping: OPENAI_API_KEY not set');
126
+ return;
127
+ }
128
+ if (!fs.existsSync(audioPath)) {
129
+ console.log('Skipping: example-audio.mp3 not found');
130
+ return;
131
+ }
132
+ const audio = fs.readFileSync(audioPath);
133
+ const result = await transcribeAudio({
134
+ audio,
135
+ prompt: 'test project',
136
+ apiKey,
137
+ provider: 'openai',
138
+ });
139
+ expect(result).not.toBeInstanceOf(Error);
140
+ const { transcription } = result;
141
+ expect(transcription.length).toBeGreaterThan(0);
142
+ console.log('OpenAI transcription:', result);
143
+ });
144
+ test('transcribes OGG with OpenAI (converts to WAV)', { timeout: 30_000 }, async () => {
145
+ const apiKey = process.env.OPENAI_API_KEY;
146
+ const oggPath = path.join(import.meta.dirname, '..', 'scripts', 'example-audio.ogg');
147
+ if (!apiKey) {
148
+ console.log('Skipping: OPENAI_API_KEY not set');
149
+ return;
150
+ }
151
+ if (!fs.existsSync(oggPath)) {
152
+ console.log('Skipping: example-audio.ogg not found');
153
+ return;
154
+ }
155
+ const audio = fs.readFileSync(oggPath);
156
+ const result = await transcribeAudio({
157
+ audio,
158
+ prompt: 'test project',
159
+ apiKey,
160
+ provider: 'openai',
161
+ mediaType: 'audio/ogg',
162
+ });
163
+ expect(result).not.toBeInstanceOf(Error);
164
+ const { transcription } = result;
165
+ expect(transcription.length).toBeGreaterThan(0);
166
+ console.log('OpenAI OGG transcription:', result);
167
+ });
168
+ });
169
+ describe('convertOggToWav', () => {
170
+ test('converts OGG Opus to valid WAV', async () => {
171
+ const oggPath = path.join(import.meta.dirname, '..', 'scripts', 'example-audio.ogg');
172
+ if (!fs.existsSync(oggPath)) {
173
+ console.log('Skipping: example-audio.ogg not found');
174
+ return;
175
+ }
176
+ const ogg = fs.readFileSync(oggPath);
177
+ const result = await convertOggToWav(ogg);
178
+ expect(result).toBeInstanceOf(Buffer);
179
+ const wav = result;
180
+ // WAV header starts with RIFF
181
+ expect(wav.subarray(0, 4).toString()).toBe('RIFF');
182
+ expect(wav.subarray(8, 12).toString()).toBe('WAVE');
183
+ // Must be larger than just the header (44 bytes)
184
+ expect(wav.length).toBeGreaterThan(44);
185
+ console.log(`Converted OGG (${ogg.length} bytes) to WAV (${wav.length} bytes)`);
186
+ });
187
+ });
@@ -0,0 +1,94 @@
1
+ // Wait utilities for polling session completion.
2
+ // Used by `kimaki send --wait` to block until a session finishes,
3
+ // then output the session markdown to stdout.
4
+ import { getThreadSession } from './database.js';
5
+ import { initializeOpencodeForDirectory } from './opencode.js';
6
+ import { ShareMarkdown } from './markdown.js';
7
+ import { createLogger, LogPrefix } from './logger.js';
8
+ const waitLogger = createLogger(LogPrefix.SESSION);
9
+ /**
10
+ * Poll the kimaki database until a session ID appears for the given thread.
11
+ * The bot writes this mapping in session-handler.ts:551 when it picks up
12
+ * the thread and creates/reuses a session.
13
+ */
14
+ export async function waitForSessionId({ threadId, timeoutMs = 120_000, }) {
15
+ const startTime = Date.now();
16
+ const pollIntervalMs = 2_000;
17
+ while (Date.now() - startTime < timeoutMs) {
18
+ const sessionId = await getThreadSession(threadId);
19
+ if (sessionId) {
20
+ waitLogger.log(`Session ID resolved: ${sessionId}`);
21
+ return sessionId;
22
+ }
23
+ await new Promise((resolve) => {
24
+ setTimeout(resolve, pollIntervalMs);
25
+ });
26
+ }
27
+ throw new Error(`Timed out waiting for session ID (thread: ${threadId}, timeout: ${timeoutMs}ms)`);
28
+ }
29
+ /**
30
+ * Poll the OpenCode SDK until the session's last assistant message
31
+ * has `time.completed` set, meaning the model finished responding.
32
+ */
33
+ export async function waitForSessionComplete({ projectDirectory, sessionId, timeoutMs = 30 * 60 * 1000, }) {
34
+ const pollIntervalMs = 3_000;
35
+ const startTime = Date.now();
36
+ const getClient = await initializeOpencodeForDirectory(projectDirectory);
37
+ if (getClient instanceof Error) {
38
+ throw new Error(`Failed to connect to OpenCode server: ${getClient.message}`, {
39
+ cause: getClient,
40
+ });
41
+ }
42
+ while (Date.now() - startTime < timeoutMs) {
43
+ const messagesResponse = await getClient().session.messages({
44
+ sessionID: sessionId,
45
+ });
46
+ const messages = messagesResponse.data || [];
47
+ // Find the last assistant message
48
+ const lastAssistant = [...messages]
49
+ .reverse()
50
+ .find((m) => m.info.role === 'assistant');
51
+ if (lastAssistant &&
52
+ lastAssistant.info.role === 'assistant' &&
53
+ lastAssistant.info.time.completed) {
54
+ waitLogger.log(`Session ${sessionId} completed`);
55
+ return;
56
+ }
57
+ await new Promise((resolve) => {
58
+ setTimeout(resolve, pollIntervalMs);
59
+ });
60
+ }
61
+ throw new Error(`Timed out waiting for session completion (session: ${sessionId}, timeout: ${timeoutMs}ms)`);
62
+ }
63
+ /**
64
+ * Wait for session completion and output the session markdown to stdout.
65
+ * Orchestrates the full wait flow: session ID resolution -> completion -> output.
66
+ */
67
+ export async function waitAndOutputSession({ threadId, projectDirectory, sessionIdTimeoutMs, completionTimeoutMs, }) {
68
+ waitLogger.log('Waiting for session ID...');
69
+ const sessionId = await waitForSessionId({
70
+ threadId,
71
+ timeoutMs: sessionIdTimeoutMs,
72
+ });
73
+ waitLogger.log(`Waiting for session ${sessionId} to complete...`);
74
+ await waitForSessionComplete({
75
+ projectDirectory,
76
+ sessionId,
77
+ timeoutMs: completionTimeoutMs,
78
+ });
79
+ waitLogger.log('Generating session output...');
80
+ const getClient = await initializeOpencodeForDirectory(projectDirectory);
81
+ if (getClient instanceof Error) {
82
+ throw new Error(`Failed to connect to OpenCode server: ${getClient.message}`, {
83
+ cause: getClient,
84
+ });
85
+ }
86
+ const markdown = new ShareMarkdown(getClient());
87
+ const result = await markdown.generate({ sessionID: sessionId });
88
+ if (result instanceof Error) {
89
+ throw new Error(`Failed to generate session markdown: ${result.message}`, {
90
+ cause: result,
91
+ });
92
+ }
93
+ process.stdout.write(result);
94
+ }
@@ -0,0 +1,4 @@
1
+ // Type definitions for worker thread message passing.
2
+ // Defines the protocol between main thread and GenAI worker for
3
+ // audio streaming, tool calls, and session lifecycle management.
4
+ export {};