kimaki 0.4.76 → 0.4.78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. package/dist/adapter-rest-boundary.test.js +34 -0
  2. package/dist/agent-model.e2e.test.js +2 -20
  3. package/dist/cli.js +50 -13
  4. package/dist/commands/channel-ref.js +16 -0
  5. package/dist/commands/diff.js +20 -85
  6. package/dist/commands/merge-worktree.js +5 -17
  7. package/dist/commands/new-worktree.js +5 -9
  8. package/dist/commands/permissions.js +77 -11
  9. package/dist/commands/resume.js +5 -9
  10. package/dist/commands/screenshare.js +295 -0
  11. package/dist/commands/session.js +6 -17
  12. package/dist/critique-utils.js +95 -0
  13. package/dist/diff-patch-plugin.js +314 -0
  14. package/dist/discord-bot.js +19 -14
  15. package/dist/discord-js-import-boundary.test.js +62 -0
  16. package/dist/discord-utils.js +44 -0
  17. package/dist/event-stream-real-capture.e2e.test.js +2 -20
  18. package/dist/gateway-proxy.e2e.test.js +2 -5
  19. package/dist/generated/cloudflare/browser.js +17 -0
  20. package/dist/generated/cloudflare/client.js +34 -0
  21. package/dist/generated/cloudflare/commonInputTypes.js +10 -0
  22. package/dist/generated/cloudflare/enums.js +48 -0
  23. package/dist/generated/cloudflare/internal/class.js +47 -0
  24. package/dist/generated/cloudflare/internal/prismaNamespace.js +252 -0
  25. package/dist/generated/cloudflare/internal/prismaNamespaceBrowser.js +222 -0
  26. package/dist/generated/cloudflare/internal/query_compiler_fast_bg.js +135 -0
  27. package/dist/generated/cloudflare/models/bot_api_keys.js +1 -0
  28. package/dist/generated/cloudflare/models/bot_tokens.js +1 -0
  29. package/dist/generated/cloudflare/models/channel_agents.js +1 -0
  30. package/dist/generated/cloudflare/models/channel_directories.js +1 -0
  31. package/dist/generated/cloudflare/models/channel_mention_mode.js +1 -0
  32. package/dist/generated/cloudflare/models/channel_models.js +1 -0
  33. package/dist/generated/cloudflare/models/channel_verbosity.js +1 -0
  34. package/dist/generated/cloudflare/models/channel_worktrees.js +1 -0
  35. package/dist/generated/cloudflare/models/forum_sync_configs.js +1 -0
  36. package/dist/generated/cloudflare/models/global_models.js +1 -0
  37. package/dist/generated/cloudflare/models/ipc_requests.js +1 -0
  38. package/dist/generated/cloudflare/models/part_messages.js +1 -0
  39. package/dist/generated/cloudflare/models/scheduled_tasks.js +1 -0
  40. package/dist/generated/cloudflare/models/session_agents.js +1 -0
  41. package/dist/generated/cloudflare/models/session_events.js +1 -0
  42. package/dist/generated/cloudflare/models/session_models.js +1 -0
  43. package/dist/generated/cloudflare/models/session_start_sources.js +1 -0
  44. package/dist/generated/cloudflare/models/thread_sessions.js +1 -0
  45. package/dist/generated/cloudflare/models/thread_worktrees.js +1 -0
  46. package/dist/generated/cloudflare/models.js +1 -0
  47. package/dist/generated/node/browser.js +17 -0
  48. package/dist/generated/node/client.js +37 -0
  49. package/dist/generated/node/commonInputTypes.js +10 -0
  50. package/dist/generated/node/enums.js +48 -0
  51. package/dist/generated/node/internal/class.js +49 -0
  52. package/dist/generated/node/internal/prismaNamespace.js +252 -0
  53. package/dist/generated/node/internal/prismaNamespaceBrowser.js +222 -0
  54. package/dist/generated/node/models/bot_api_keys.js +1 -0
  55. package/dist/generated/node/models/bot_tokens.js +1 -0
  56. package/dist/generated/node/models/channel_agents.js +1 -0
  57. package/dist/generated/node/models/channel_directories.js +1 -0
  58. package/dist/generated/node/models/channel_mention_mode.js +1 -0
  59. package/dist/generated/node/models/channel_models.js +1 -0
  60. package/dist/generated/node/models/channel_verbosity.js +1 -0
  61. package/dist/generated/node/models/channel_worktrees.js +1 -0
  62. package/dist/generated/node/models/forum_sync_configs.js +1 -0
  63. package/dist/generated/node/models/global_models.js +1 -0
  64. package/dist/generated/node/models/ipc_requests.js +1 -0
  65. package/dist/generated/node/models/part_messages.js +1 -0
  66. package/dist/generated/node/models/scheduled_tasks.js +1 -0
  67. package/dist/generated/node/models/session_agents.js +1 -0
  68. package/dist/generated/node/models/session_events.js +1 -0
  69. package/dist/generated/node/models/session_models.js +1 -0
  70. package/dist/generated/node/models/session_start_sources.js +1 -0
  71. package/dist/generated/node/models/thread_sessions.js +1 -0
  72. package/dist/generated/node/models/thread_worktrees.js +1 -0
  73. package/dist/generated/node/models.js +1 -0
  74. package/dist/interaction-handler.js +10 -0
  75. package/dist/kimaki-digital-twin.e2e.test.js +2 -20
  76. package/dist/message-flags-boundary.test.js +54 -0
  77. package/dist/message-formatting.js +3 -62
  78. package/dist/onboarding-tutorial-plugin.js +1 -1
  79. package/dist/opencode-command.js +129 -0
  80. package/dist/opencode-command.test.js +48 -0
  81. package/dist/opencode-interrupt-plugin.js +19 -1
  82. package/dist/opencode-interrupt-plugin.test.js +0 -5
  83. package/dist/opencode-plugin-loading.e2e.test.js +9 -20
  84. package/dist/opencode-plugin.js +4 -4
  85. package/dist/opencode.js +150 -27
  86. package/dist/patch-text-parser.js +97 -0
  87. package/dist/platform/components-v2.js +20 -0
  88. package/dist/platform/discord-adapter.js +1440 -0
  89. package/dist/platform/discord-routes.js +31 -0
  90. package/dist/platform/message-flags.js +8 -0
  91. package/dist/platform/platform-value.js +41 -0
  92. package/dist/platform/slack-adapter.js +872 -0
  93. package/dist/platform/slack-markdown.js +169 -0
  94. package/dist/platform/types.js +4 -0
  95. package/dist/queue-advanced-e2e-setup.js +265 -0
  96. package/dist/queue-advanced-footer.e2e.test.js +173 -0
  97. package/dist/queue-advanced-model-switch.e2e.test.js +299 -0
  98. package/dist/queue-advanced-permissions-typing.e2e.test.js +73 -1
  99. package/dist/runtime-lifecycle.e2e.test.js +2 -20
  100. package/dist/session-handler/event-stream-state.js +5 -0
  101. package/dist/session-handler/event-stream-state.test.js +6 -2
  102. package/dist/session-handler/thread-session-runtime.js +32 -2
  103. package/dist/system-message.js +26 -23
  104. package/dist/test-utils.js +16 -0
  105. package/dist/thread-message-queue.e2e.test.js +2 -20
  106. package/dist/utils.js +3 -1
  107. package/dist/voice-message.e2e.test.js +2 -20
  108. package/dist/voice.js +122 -9
  109. package/dist/voice.test.js +17 -2
  110. package/dist/websockify.js +69 -0
  111. package/dist/worktree-lifecycle.e2e.test.js +308 -0
  112. package/package.json +4 -2
  113. package/skills/critique/SKILL.md +17 -0
  114. package/skills/egaki/SKILL.md +35 -0
  115. package/skills/event-sourcing-state/SKILL.md +252 -0
  116. package/skills/goke/SKILL.md +1 -0
  117. package/skills/npm-package/SKILL.md +21 -2
  118. package/skills/playwriter/SKILL.md +1 -1
  119. package/skills/x-articles/SKILL.md +554 -0
  120. package/src/agent-model.e2e.test.ts +4 -19
  121. package/src/cli.ts +60 -13
  122. package/src/commands/diff.ts +25 -99
  123. package/src/commands/merge-worktree.ts +5 -21
  124. package/src/commands/new-worktree.ts +5 -11
  125. package/src/commands/permissions.ts +100 -15
  126. package/src/commands/resume.ts +5 -12
  127. package/src/commands/screenshare.ts +354 -0
  128. package/src/commands/session.ts +6 -23
  129. package/src/critique-utils.ts +139 -0
  130. package/src/discord-bot.ts +20 -15
  131. package/src/discord-utils.ts +53 -0
  132. package/src/event-stream-real-capture.e2e.test.ts +4 -20
  133. package/src/gateway-proxy.e2e.test.ts +2 -5
  134. package/src/interaction-handler.ts +15 -0
  135. package/src/kimaki-digital-twin.e2e.test.ts +2 -21
  136. package/src/message-formatting.ts +3 -68
  137. package/src/onboarding-tutorial-plugin.ts +1 -1
  138. package/src/opencode-command.test.ts +70 -0
  139. package/src/opencode-command.ts +188 -0
  140. package/src/opencode-interrupt-plugin.test.ts +0 -5
  141. package/src/opencode-interrupt-plugin.ts +34 -1
  142. package/src/opencode-plugin-loading.e2e.test.ts +25 -35
  143. package/src/opencode-plugin.ts +5 -4
  144. package/src/opencode.ts +199 -32
  145. package/src/patch-text-parser.ts +107 -0
  146. package/src/queue-advanced-e2e-setup.ts +273 -0
  147. package/src/queue-advanced-footer.e2e.test.ts +211 -0
  148. package/src/queue-advanced-model-switch.e2e.test.ts +383 -0
  149. package/src/queue-advanced-permissions-typing.e2e.test.ts +92 -0
  150. package/src/runtime-lifecycle.e2e.test.ts +4 -19
  151. package/src/session-handler/event-stream-state.test.ts +6 -2
  152. package/src/session-handler/event-stream-state.ts +5 -0
  153. package/src/session-handler/thread-session-runtime.ts +45 -2
  154. package/src/system-message.ts +26 -23
  155. package/src/test-utils.ts +17 -0
  156. package/src/thread-message-queue.e2e.test.ts +2 -20
  157. package/src/utils.ts +3 -1
  158. package/src/voice-message.e2e.test.ts +3 -20
  159. package/src/voice.test.ts +26 -2
  160. package/src/voice.ts +147 -9
  161. package/src/websockify.ts +101 -0
  162. package/src/worktree-lifecycle.e2e.test.ts +391 -0
@@ -67,7 +67,7 @@ this feature by restarting kimaki with the \`--no-critique\` flag.
67
67
 
68
68
  ### reviewing diffs with AI
69
69
 
70
- \`critique review --web\` generates an AI-powered review of a diff and uploads it as a shareable URL.
70
+ \`bunx critique review --web\` generates an AI-powered review of a diff and uploads it as a shareable URL.
71
71
  It spawns a separate opencode session that analyzes the diff, groups related changes, and produces
72
72
  a structured review with explanations, diagrams, and suggestions. This is useful when the user
73
73
  asks you to explain or review a diff — the output is much richer than a plain diff URL.
@@ -109,7 +109,7 @@ The command prints a preview URL when done — share that URL with the user.
109
109
  const KIMAKI_TUNNEL_INSTRUCTIONS = `
110
110
  ## running dev servers with tunnel access
111
111
 
112
- ALWAYS use \`kimaki tunnel\` when starting any dev server. NEVER run \`pnpm dev\`, \`npm run dev\`, or any dev server command without wrapping it in \`kimaki tunnel\`. The user is on Discord, not at the terminal — localhost URLs are useless to them. They need a tunnel URL to access the site.
112
+ ALWAYS use \`kimaki tunnel\` when starting any dev server. NEVER run \`pnpm dev\`, \`npm run dev\`, or any dev server command without wrapping it in \`kimaki tunnel\`. Always invoke Kimaki directly as \`kimaki\`, never via \`npx\` or \`bunx\`. The user is on Discord, not at the terminal — localhost URLs are useless to them. They need a tunnel URL to access the site.
113
113
 
114
114
  Use \`tmux\` to run the tunnel + dev server combo in the background so it persists across commands.
115
115
 
@@ -134,7 +134,7 @@ Use random tunnel IDs by default. Only pass \`-t\` when exposing a service that
134
134
  tmux new-session -d -s myapp-dev
135
135
 
136
136
  # Run the dev server with kimaki tunnel inside the session
137
- tmux send-keys -t myapp-dev "npx kimaki tunnel -p 3000 -- pnpm dev" Enter
137
+ tmux send-keys -t myapp-dev "kimaki tunnel -p 3000 -- pnpm dev" Enter
138
138
  \`\`\`
139
139
 
140
140
  ### getting the tunnel URL
@@ -149,15 +149,15 @@ tmux capture-pane -t myapp-dev -p | grep -i "tunnel"
149
149
  \`\`\`bash
150
150
  # Next.js project
151
151
  tmux new-session -d -s projectname-nextjs-dev-3000
152
- tmux send-keys -t nextjs-dev "npx kimaki tunnel -p 3000 -- pnpm dev" Enter
152
+ tmux send-keys -t nextjs-dev "kimaki tunnel -p 3000 -- pnpm dev" Enter
153
153
 
154
154
  # Vite project on port 5173
155
155
  tmux new-session -d -s vite-dev-5173
156
- tmux send-keys -t vite-dev "npx kimaki tunnel -p 5173 -- pnpm dev" Enter
156
+ tmux send-keys -t vite-dev "kimaki tunnel -p 5173 -- pnpm dev" Enter
157
157
 
158
158
  # Custom tunnel ID (only for intentionally public-safe services)
159
159
  tmux new-session -d -s holocron-dev
160
- tmux send-keys -t holocron-dev "npx kimaki tunnel -p 3000 -t holocron -- pnpm dev" Enter
160
+ tmux send-keys -t holocron-dev "kimaki tunnel -p 3000 -t holocron -- pnpm dev" Enter
161
161
  \`\`\`
162
162
 
163
163
  ### stopping the dev server
@@ -285,7 +285,7 @@ If there are internal kimaki issues (sessions not responding, bot errors, unexpe
285
285
 
286
286
  To upload files to the Discord thread (images, screenshots, long files that would clutter the chat), run:
287
287
 
288
- npx -y kimaki upload-to-discord --session ${sessionId} <file1> [file2] ...
288
+ kimaki upload-to-discord --session ${sessionId} <file1> [file2] ...
289
289
 
290
290
  ## requesting files from the user
291
291
 
@@ -295,7 +295,7 @@ To ask the user to upload files from their device, use the \`kimaki_file_upload\
295
295
 
296
296
  To archive the current Discord thread (hide it from sidebar) and stop the session, run:
297
297
 
298
- npx -y kimaki session archive --session ${sessionId}
298
+ kimaki session archive --session ${sessionId}
299
299
 
300
300
  Only do this when the user explicitly asks to close or archive the thread, and only after your final message.
301
301
 
@@ -303,7 +303,7 @@ Only do this when the user explicitly asks to close or archive the thread, and o
303
303
 
304
304
  To search for Discord users in a guild (needed for mentions like <@userId>), run:
305
305
 
306
- npx -y kimaki user list --guild ${guildId || '<guildId>'} --query "username"
306
+ kimaki user list --guild ${guildId || '<guildId>'} --query "username"
307
307
 
308
308
  This returns user IDs you can use for Discord mentions.
309
309
  ${
@@ -313,7 +313,7 @@ ${
313
313
 
314
314
  To start a new thread/session in this channel pro-grammatically, run:
315
315
 
316
- npx -y kimaki send --channel ${channelId} --prompt "your prompt here"${agentFlag}${username ? ` --user "${username}"` : ''}
316
+ kimaki send --channel ${channelId} --prompt "your prompt here"${agentFlag}${username ? ` --user "${username}"` : ''}
317
317
 
318
318
  You can use this to "spawn" parallel helper sessions like teammates: start new threads with focused prompts, then come back and collect the results.
319
319
 
@@ -321,23 +321,23 @@ IMPORTANT: NEVER use \`--worktree\` unless the user explicitly asks for a worktr
321
321
 
322
322
  To send a prompt to an existing thread instead of creating a new one:
323
323
 
324
- npx -y kimaki send --thread <thread_id> --prompt "follow-up prompt"
324
+ kimaki send --thread <thread_id> --prompt "follow-up prompt"
325
325
 
326
326
  Use this when you already have the Discord thread ID.
327
327
 
328
328
  To send to the thread associated with a known session:
329
329
 
330
- npx -y kimaki send --session <session_id> --prompt "follow-up prompt"
330
+ kimaki send --session <session_id> --prompt "follow-up prompt"
331
331
 
332
332
  Use this when you have the OpenCode session ID.
333
333
 
334
334
  Use --notify-only to create a notification thread without starting an AI session:
335
335
 
336
- npx -y kimaki send --channel ${channelId} --prompt "User cancelled subscription" --notify-only
336
+ kimaki send --channel ${channelId} --prompt "User cancelled subscription" --notify-only
337
337
 
338
338
  Use --worktree to create a git worktree for the session (ONLY when the user explicitly asks for a worktree):
339
339
 
340
- npx -y kimaki send --channel ${channelId} --prompt "Add dark mode support" --worktree dark-mode${agentFlag}${username ? ` --user "${username}"` : ''}
340
+ kimaki send --channel ${channelId} --prompt "Add dark mode support" --worktree dark-mode${agentFlag}${username ? ` --user "${username}"` : ''}
341
341
 
342
342
  Important:
343
343
  - NEVER use \`--worktree\` unless the user explicitly requests a worktree. Most tasks should use normal threads without worktrees.
@@ -347,18 +347,21 @@ Important:
347
347
 
348
348
  Use --agent to specify which agent to use for the session:
349
349
 
350
- npx -y kimaki send --channel ${channelId} --prompt "Plan the refactor of the auth module" --agent plan${username ? ` --user "${username}"` : ''}
350
+ kimaki send --channel ${channelId} --prompt "Plan the refactor of the auth module" --agent plan${username ? ` --user "${username}"` : ''}
351
351
  ${agents && agents.length > 0 ? `
352
352
  Available agents:
353
353
  ${agents.map((a) => { return `- \`${a.name}\`${a.name === currentAgent ? ' (current)' : ''}${a.description ? `: ${a.description}` : ''}` }).join('\n')}
354
354
  ` : ''}
355
+ ## switching agents in the current session
356
+
357
+ The user can switch the active agent mid-session using the Discord slash command \`/<agentname>-agent\`. For example if you are in plan mode and the user asks you to edit files, tell them to run \`/build-agent\` to switch to the build agent first.
355
358
 
356
359
  ## scheduled sends and task management
357
360
 
358
361
  Use \`--send-at\` to schedule a one-time or recurring task:
359
362
 
360
- npx -y kimaki send --channel ${channelId} --prompt "Reminder: review open PRs" --send-at "2026-03-01T09:00:00Z"
361
- npx -y kimaki send --channel ${channelId} --prompt "Run weekly test suite and summarize failures" --send-at "0 9 * * 1"
363
+ kimaki send --channel ${channelId} --prompt "Reminder: review open PRs" --send-at "2026-03-01T09:00:00Z"
364
+ kimaki send --channel ${channelId} --prompt "Run weekly test suite and summarize failures" --send-at "0 9 * * 1"
362
365
 
363
366
  When using a date for \`--send-at\`, it must be UTC in ISO format ending with \`Z\`.
364
367
 
@@ -390,13 +393,13 @@ kimaki task delete <id>
390
393
 
391
394
  Use case patterns:
392
395
  - Reminder flows: create deadline reminders in this channel with one-time \`--send-at\`; mention only if action is required.
393
- - Proactive reminders: when you encounter time-sensitive information during your work (e.g. creating an API key that expires in 90 days, a certificate with an expiration date, a trial period ending, a deadline mentioned in code comments), proactively schedule a \`--notify-only\` reminder before the expiration so the user gets notified in time. For example, if you generate an API key expiring on 2026-06-01, schedule a reminder a few days before: \`npx -y kimaki send --channel ${channelId} --prompt "Reminder: <@${userId || 'USER_ID'}> the API key created on 2026-03-01 expires on 2026-06-01. Renew it before it breaks production." --send-at "2026-05-28T09:00:00Z" --notify-only\`. Always tell the user you scheduled the reminder so they know.
396
+ - Proactive reminders: when you encounter time-sensitive information during your work (e.g. creating an API key that expires in 90 days, a certificate with an expiration date, a trial period ending, a deadline mentioned in code comments), proactively schedule a \`--notify-only\` reminder before the expiration so the user gets notified in time. For example, if you generate an API key expiring on 2026-06-01, schedule a reminder a few days before: \`kimaki send --channel ${channelId} --prompt "Reminder: <@${userId || 'USER_ID'}> the API key created on 2026-03-01 expires on 2026-06-01. Renew it before it breaks production." --send-at "2026-05-28T09:00:00Z" --notify-only\`. Always tell the user you scheduled the reminder so they know.
394
397
  - Weekly QA: schedule "run full test suite, inspect failures, post summary, and mention ${username ? `@${username}` : '@username'} only when failures require review".
395
398
  - Weekly benchmark automation: schedule a benchmark prompt that runs model evals, writes JSON outputs in the repo, commits results, and mentions only for regressions.
396
399
  - Recurring maintenance: use cron \`--send-at\` for repetitive tasks like rotating secrets, checking dependency updates, running security audits, or cleaning up stale branches. Example: \`--send-at "0 9 1 * *"\` to run on the 1st of every month.
397
400
  - Thread reminders: when the user says "remind me about this in 2 hours" (or any duration), use \`--send-at\` with \`--thread\` to resurface the current thread. Compute the future UTC time and send a mention so Discord shows a notification:
398
401
 
399
- npx -y kimaki send --session ${sessionId} --prompt "Reminder: <@${userId || 'USER_ID'}> you asked to be reminded about this thread." --send-at "<future_UTC_time>" --notify-only
402
+ kimaki send --session ${sessionId} --prompt "Reminder: <@${userId || 'USER_ID'}> you asked to be reminded about this thread." --send-at "<future_UTC_time>" --notify-only
400
403
 
401
404
  Replace \`<future_UTC_time>\` with the computed UTC ISO timestamp. The \`--notify-only\` flag creates just a notification message without starting a new AI session. The \`<@userId>\` mention ensures the user gets a Discord notification.
402
405
 
@@ -411,7 +414,7 @@ ONLY create worktrees when the user explicitly asks for one. Never proactively u
411
414
  When the user asks to "create a worktree" or "make a worktree", they mean you should use the kimaki CLI to create it. Do NOT use raw \`git worktree add\` commands. Instead use:
412
415
 
413
416
  \`\`\`bash
414
- npx -y kimaki send --channel ${channelId} --prompt "your task description" --worktree worktree-name${agentFlag}${username ? ` --user "${username}"` : ''}
417
+ kimaki send --channel ${channelId} --prompt "your task description" --worktree worktree-name${agentFlag}${username ? ` --user "${username}"` : ''}
415
418
  \`\`\`
416
419
 
417
420
  This creates a new Discord thread with an isolated git worktree and starts a session in it. The worktree name should be kebab-case and descriptive of the task.
@@ -429,7 +432,7 @@ This is useful for automation (cron jobs, GitHub webhooks, n8n, etc.)
429
432
  When you are approaching the **context window limit** or the user explicitly asks to **handoff to a new thread**, use the \`kimaki send\` command to start a fresh session with context:
430
433
 
431
434
  \`\`\`bash
432
- npx -y kimaki send --channel ${channelId} --prompt "Continuing from previous session: <summary of current task and state>"${agentFlag}${username ? ` --user "${username}"` : ''}
435
+ kimaki send --channel ${channelId} --prompt "Continuing from previous session: <summary of current task and state>"${agentFlag}${username ? ` --user "${username}"` : ''}
433
436
  \`\`\`
434
437
 
435
438
  The command automatically handles long prompts (over 2000 chars) by sending them as file attachments.
@@ -513,10 +516,10 @@ If your Bash tool timeout triggers anyway, fall back to reading the session outp
513
516
 
514
517
  \`\`\`bash
515
518
  # Start a session and wait for it to finish
516
- npx -y kimaki send --channel <channel_id> --prompt "Fix the auth bug" --wait
519
+ kimaki send --channel <channel_id> --prompt "Fix the auth bug" --wait
517
520
 
518
521
  # Send to an existing thread and wait
519
- npx -y kimaki send --thread <thread_id> --prompt "Run the tests" --wait
522
+ kimaki send --thread <thread_id> --prompt "Run the tests" --wait
520
523
  \`\`\`
521
524
 
522
525
  The command exits with the session markdown on stdout once the model finishes responding.
package/src/test-utils.ts CHANGED
@@ -8,6 +8,23 @@
8
8
  // a new server only if no existing client is available.
9
9
 
10
10
  import type { APIMessage } from 'discord.js'
11
+
12
+ /**
13
+ * Deterministic port from a string key (channel ID, test file name, etc.).
14
+ * Uses a hash to pick a stable port in range 53000-54999, avoiding overlap
15
+ * with queue-advanced tests (51000-52999) and getLockPort (30000-39999).
16
+ * Replaces the old TOCTOU-prone pattern of binding port 0, reading the
17
+ * assigned port, closing, then rebinding — which races under parallel vitest.
18
+ */
19
+ export function chooseLockPort({ key }: { key: string }): number {
20
+ let hash = 0
21
+ for (let i = 0; i < key.length; i++) {
22
+ const char = key.charCodeAt(i)
23
+ hash = (hash << 5) - hash + char
24
+ hash |= 0
25
+ }
26
+ return 53_000 + (Math.abs(hash) % 2_000)
27
+ }
11
28
  import type { DigitalDiscord } from 'discord-digital-twin/src'
12
29
  import {
13
30
  getOpencodeClient,
@@ -10,7 +10,6 @@
10
10
  // so vitest can parallelize across files.
11
11
 
12
12
  import fs from 'node:fs'
13
- import net from 'node:net'
14
13
  import path from 'node:path'
15
14
  import url from 'node:url'
16
15
  import { describe, beforeAll, afterAll, test, expect } from 'vitest'
@@ -37,6 +36,7 @@ import {
37
36
  import { startHranaServer, stopHranaServer } from './hrana-server.js'
38
37
  import { initializeOpencodeForDirectory, stopOpencodeServer } from './opencode.js'
39
38
  import {
39
+ chooseLockPort,
40
40
  cleanupTestSessions,
41
41
  waitForFooterMessage,
42
42
  waitForBotMessageContaining,
@@ -60,24 +60,6 @@ function createRunDirectories() {
60
60
  return { root, dataDir, projectDirectory }
61
61
  }
62
62
 
63
- function chooseLockPort(): Promise<number> {
64
- return new Promise((resolve, reject) => {
65
- const server = net.createServer()
66
- server.listen(0, () => {
67
- const address = server.address()
68
- if (!address || typeof address === 'string') {
69
- server.close()
70
- reject(new Error('Failed to resolve lock port'))
71
- return
72
- }
73
- const port = address.port
74
- server.close(() => {
75
- resolve(port)
76
- })
77
- })
78
- })
79
- }
80
-
81
63
  function createDiscordJsClient({ restUrl }: { restUrl: string }) {
82
64
  return new Client({
83
65
  intents: [
@@ -272,7 +254,7 @@ e2eTest('thread message queue ordering', () => {
272
254
  beforeAll(async () => {
273
255
  testStartTime = Date.now()
274
256
  directories = createRunDirectories()
275
- const lockPort = await chooseLockPort()
257
+ const lockPort = chooseLockPort({ key: TEXT_CHANNEL_ID })
276
258
 
277
259
  process.env['KIMAKI_LOCK_PORT'] = String(lockPort)
278
260
  setDataDir(directories.dataDir)
package/src/utils.ts CHANGED
@@ -107,11 +107,13 @@ export function generateDiscordInstallUrlForBot({
107
107
  // This initiates the better-auth OAuth flow with clientId/clientSecret
108
108
  // as additionalData, which better-auth stores in its verification table
109
109
  // and recovers after Discord redirects back to the callback.
110
+ // Use a kimaki-specific callback field name to avoid ambiguity with
111
+ // better-auth's own callbackURL state field.
110
112
  const url = new URL(`${KIMAKI_WEBSITE_URL}/discord-install`)
111
113
  url.searchParams.set('clientId', clientId)
112
114
  url.searchParams.set('clientSecret', clientSecret)
113
115
  if (gatewayCallbackUrl) {
114
- url.searchParams.set('callbackUrl', gatewayCallbackUrl)
116
+ url.searchParams.set('kimakiCallbackUrl', gatewayCallbackUrl)
115
117
  }
116
118
  return url.toString()
117
119
  }
@@ -9,7 +9,7 @@
9
9
  // transitions (via getThreadState from the zustand store).
10
10
 
11
11
  import fs from 'node:fs'
12
- import net from 'node:net'
12
+
13
13
  import path from 'node:path'
14
14
  import url from 'node:url'
15
15
  import { describe, beforeAll, afterAll, beforeEach, test, expect } from 'vitest'
@@ -33,6 +33,7 @@ import { startHranaServer, stopHranaServer } from './hrana-server.js'
33
33
  import { initializeOpencodeForDirectory, getOpencodeClient, stopOpencodeServer } from './opencode.js'
34
34
  import type { Part, Message } from '@opencode-ai/sdk/v2'
35
35
  import {
36
+ chooseLockPort,
36
37
  cleanupTestSessions,
37
38
  waitForFooterMessage,
38
39
  waitForBotMessageContaining,
@@ -56,24 +57,6 @@ function createRunDirectories() {
56
57
  return { root, dataDir, projectDirectory }
57
58
  }
58
59
 
59
- function chooseLockPort(): Promise<number> {
60
- return new Promise((resolve, reject) => {
61
- const server = net.createServer()
62
- server.listen(0, () => {
63
- const address = server.address()
64
- if (!address || typeof address === 'string') {
65
- server.close()
66
- reject(new Error('Failed to resolve lock port'))
67
- return
68
- }
69
- const port = address.port
70
- server.close(() => {
71
- resolve(port)
72
- })
73
- })
74
- })
75
- }
76
-
77
60
  function createDiscordJsClient({ restUrl }: { restUrl: string }) {
78
61
  return new Client({
79
62
  intents: [
@@ -306,7 +289,7 @@ e2eTest('voice message handling', () => {
306
289
  beforeAll(async () => {
307
290
  testStartTime = Date.now()
308
291
  directories = createRunDirectories()
309
- const lockPort = await chooseLockPort()
292
+ const lockPort = chooseLockPort({ key: TEXT_CHANNEL_ID })
310
293
 
311
294
  process.env['KIMAKI_LOCK_PORT'] = String(lockPort)
312
295
  setDataDir(directories.dataDir)
package/src/voice.test.ts CHANGED
@@ -4,8 +4,32 @@
4
4
  import { describe, test, expect } from 'vitest'
5
5
  import fs from 'node:fs'
6
6
  import path from 'node:path'
7
- import { transcribeAudio, convertOggToWav } from './voice.js'
8
- import { extractTranscription } from './voice.js'
7
+ import {
8
+ transcribeAudio,
9
+ convertOggToWav,
10
+ extractTranscription,
11
+ normalizeAudioMediaType,
12
+ getOpenAIAudioConversionStrategy,
13
+ } from './voice.js'
14
+
15
+ describe('audio media type routing', () => {
16
+ test('normalizes m4a aliases to audio/mp4', () => {
17
+ expect(normalizeAudioMediaType('audio/x-m4a')).toMatchInlineSnapshot('"audio/mp4"')
18
+ expect(normalizeAudioMediaType('audio/m4a')).toMatchInlineSnapshot('"audio/mp4"')
19
+ })
20
+
21
+ test('keeps non-m4a media types unchanged', () => {
22
+ expect(normalizeAudioMediaType('audio/ogg')).toMatchInlineSnapshot('"audio/ogg"')
23
+ expect(normalizeAudioMediaType('audio/wav')).toMatchInlineSnapshot('"audio/wav"')
24
+ })
25
+
26
+ test('converts ogg only when mime is actual ogg/opus', () => {
27
+ expect(getOpenAIAudioConversionStrategy('audio/ogg')).toMatchInlineSnapshot('"convert-ogg-to-wav"')
28
+ expect(getOpenAIAudioConversionStrategy('audio/opus')).toMatchInlineSnapshot('"convert-ogg-to-wav"')
29
+ expect(getOpenAIAudioConversionStrategy('audio/mp4')).toMatchInlineSnapshot('"convert-m4a-to-wav"')
30
+ expect(getOpenAIAudioConversionStrategy('audio/mpeg')).toMatchInlineSnapshot('"none"')
31
+ })
32
+ })
9
33
 
10
34
  describe('extractTranscription', () => {
11
35
  test('extracts transcription from tool call', () => {
package/src/voice.ts CHANGED
@@ -38,8 +38,49 @@ const OPENAI_SUPPORTED_AUDIO_TYPES = new Set([
38
38
  'audio/mpeg',
39
39
  'audio/mp3',
40
40
  'audio/wav',
41
+ 'audio/x-wav',
41
42
  ])
42
43
 
44
+ const OGG_AUDIO_TYPES = new Set([
45
+ 'audio/ogg',
46
+ 'audio/opus',
47
+ ])
48
+
49
+ const M4A_AUDIO_TYPES = new Set([
50
+ 'audio/mp4',
51
+ 'audio/m4a',
52
+ 'audio/x-m4a',
53
+ ])
54
+
55
+ export function normalizeAudioMediaType(mediaType: string): string {
56
+ const normalized = mediaType.trim().toLowerCase()
57
+ if (normalized === 'audio/x-m4a' || normalized === 'audio/m4a') {
58
+ return 'audio/mp4'
59
+ }
60
+ return normalized
61
+ }
62
+
63
+ type OpenAIAudioConversionStrategy =
64
+ | 'none'
65
+ | 'convert-ogg-to-wav'
66
+ | 'convert-m4a-to-wav'
67
+ | 'unsupported'
68
+
69
+ export function getOpenAIAudioConversionStrategy(
70
+ mediaType: string,
71
+ ): OpenAIAudioConversionStrategy {
72
+ if (OPENAI_SUPPORTED_AUDIO_TYPES.has(mediaType)) {
73
+ return 'none'
74
+ }
75
+ if (OGG_AUDIO_TYPES.has(mediaType)) {
76
+ return 'convert-ogg-to-wav'
77
+ }
78
+ if (M4A_AUDIO_TYPES.has(mediaType)) {
79
+ return 'convert-m4a-to-wav'
80
+ }
81
+ return 'unsupported'
82
+ }
83
+
43
84
  /**
44
85
  * Convert OGG Opus audio to WAV using prism-media (already installed for Discord voice).
45
86
  * Pipeline: OGG buffer → OggDemuxer → Opus Decoder → PCM → WAV (with header).
@@ -93,6 +134,89 @@ export function convertOggToWav(input: Buffer): Promise<TranscriptionError | Buf
93
134
  })
94
135
  }
95
136
 
137
+ /**
138
+ * Convert M4A/MP4 audio to WAV using prism-media FFmpeg wrapper.
139
+ * This depends on an ffmpeg binary available in PATH.
140
+ */
141
+ export function convertM4aToWav(input: Buffer): Promise<TranscriptionError | Buffer> {
142
+ return new Promise((resolve) => {
143
+ const pcmChunks: Buffer[] = []
144
+ const transcoder = new prism.FFmpeg({
145
+ args: [
146
+ '-analyzeduration',
147
+ '0',
148
+ '-loglevel',
149
+ '0',
150
+ '-f',
151
+ 'mp4',
152
+ '-i',
153
+ 'pipe:0',
154
+ '-f',
155
+ 's16le',
156
+ '-acodec',
157
+ 'pcm_s16le',
158
+ '-ac',
159
+ '1',
160
+ '-ar',
161
+ '48000',
162
+ 'pipe:1',
163
+ ],
164
+ })
165
+
166
+ transcoder.on('data', (chunk: Buffer) => {
167
+ pcmChunks.push(chunk)
168
+ })
169
+
170
+ transcoder.on('end', () => {
171
+ const pcmData = Buffer.concat(pcmChunks)
172
+ if (pcmData.length === 0) {
173
+ resolve(
174
+ new TranscriptionError({
175
+ reason: 'FFmpeg conversion produced empty audio output',
176
+ }),
177
+ )
178
+ return
179
+ }
180
+
181
+ const wavHeader = createWavHeader({
182
+ dataLength: pcmData.length,
183
+ sampleRate: 48000,
184
+ numChannels: 1,
185
+ bitsPerSample: 16,
186
+ })
187
+ resolve(Buffer.concat([wavHeader, pcmData]))
188
+ })
189
+
190
+ transcoder.on('error', (err: Error) => {
191
+ const lower = err.message.toLowerCase()
192
+ const isMissingFfmpeg =
193
+ lower.includes('ffmpeg') &&
194
+ (lower.includes('not found') ||
195
+ lower.includes('enoent') ||
196
+ lower.includes('spawn'))
197
+ if (isMissingFfmpeg) {
198
+ resolve(
199
+ new TranscriptionError({
200
+ reason:
201
+ 'M4A transcription with OpenAI requires ffmpeg to be installed and available in PATH',
202
+ cause: err,
203
+ }),
204
+ )
205
+ return
206
+ }
207
+
208
+ resolve(
209
+ new TranscriptionError({
210
+ reason: `M4A decode failed: ${err.message}`,
211
+ cause: err,
212
+ }),
213
+ )
214
+ })
215
+
216
+ Readable.from(input).pipe(transcoder)
217
+ })
218
+ }
219
+
96
220
  function createWavHeader({
97
221
  dataLength,
98
222
  sampleRate,
@@ -359,18 +483,32 @@ export async function transcribeAudio({
359
483
  return new InvalidAudioFormatError()
360
484
  }
361
485
 
362
- let mediaType = mediaTypeParam || 'audio/mpeg'
486
+ let mediaType = normalizeAudioMediaType(mediaTypeParam || 'audio/mpeg')
363
487
  let finalAudioBase64 = audioBuffer.toString('base64')
364
488
 
365
- // OpenAI input_audio only supports mp3/wav. Convert OGG Opus (Discord voice) to WAV.
366
- if (resolvedProvider === 'openai' && !OPENAI_SUPPORTED_AUDIO_TYPES.has(mediaType)) {
367
- voiceLogger.log(`Converting ${mediaType} to WAV for OpenAI compatibility`)
368
- const converted = await convertOggToWav(audioBuffer)
369
- if (converted instanceof Error) {
370
- return converted
489
+ // OpenAI input_audio supports only a subset of audio formats.
490
+ // Convert based on MIME so OGG conversion runs only for real OGG/Opus inputs.
491
+ if (resolvedProvider === 'openai') {
492
+ const conversionStrategy = getOpenAIAudioConversionStrategy(mediaType)
493
+ if (conversionStrategy === 'convert-ogg-to-wav') {
494
+ voiceLogger.log(`Converting ${mediaType} to WAV for OpenAI compatibility`)
495
+ const converted = await convertOggToWav(audioBuffer)
496
+ if (converted instanceof Error) {
497
+ return converted
498
+ }
499
+ finalAudioBase64 = converted.toString('base64')
500
+ mediaType = 'audio/wav'
501
+ } else if (conversionStrategy === 'convert-m4a-to-wav') {
502
+ voiceLogger.log(`Converting ${mediaType} to WAV for OpenAI compatibility`)
503
+ const converted = await convertM4aToWav(audioBuffer)
504
+ if (converted instanceof Error) {
505
+ return converted
506
+ }
507
+ finalAudioBase64 = converted.toString('base64')
508
+ mediaType = 'audio/wav'
509
+ } else if (conversionStrategy === 'unsupported') {
510
+ return new InvalidAudioFormatError()
371
511
  }
372
- finalAudioBase64 = converted.toString('base64')
373
- mediaType = 'audio/wav'
374
512
  }
375
513
 
376
514
  const languageHint = language ? `The audio is in ${language}.\n\n` : ''
@@ -0,0 +1,101 @@
1
+ // In-process WebSocket-to-TCP bridge (websockify replacement).
2
+ // Accepts WebSocket connections and pipes raw bytes to/from a TCP target.
3
+ // Used by /screenshare to bridge noVNC (WebSocket) to a VNC server (TCP).
4
+ // Supports the 'binary' subprotocol required by noVNC.
5
+
6
+ import { WebSocketServer, WebSocket } from 'ws'
7
+ import net from 'node:net'
8
+ import { createLogger } from './logger.js'
9
+
10
+ const logger = createLogger('SCREEN')
11
+
12
+ type WebsockifyOptions = {
13
+ /** Port for the WebSocket server (0 = auto-assign) */
14
+ wsPort: number
15
+ /** TCP target host */
16
+ tcpHost: string
17
+ /** TCP target port */
18
+ tcpPort: number
19
+ }
20
+
21
+ type WebsockifyInstance = {
22
+ wss: WebSocketServer
23
+ /** Resolved port (useful when wsPort=0) */
24
+ port: number
25
+ close: () => void
26
+ }
27
+
28
+ export function startWebsockify({
29
+ wsPort,
30
+ tcpHost,
31
+ tcpPort,
32
+ }: WebsockifyOptions): Promise<WebsockifyInstance> {
33
+ return new Promise((resolve, reject) => {
34
+ const wss = new WebSocketServer({
35
+ port: wsPort,
36
+ // noVNC negotiates the 'binary' subprotocol
37
+ handleProtocols: (protocols) => {
38
+ if (protocols.has('binary')) {
39
+ return 'binary'
40
+ }
41
+ return false
42
+ },
43
+ })
44
+
45
+ wss.on('listening', () => {
46
+ const addr = wss.address()
47
+ const port = typeof addr === 'object' && addr ? addr.port : wsPort
48
+ logger.log(`Websockify listening on port ${port} → ${tcpHost}:${tcpPort}`)
49
+ resolve({
50
+ wss,
51
+ port,
52
+ close: () => {
53
+ for (const client of wss.clients) {
54
+ client.close()
55
+ }
56
+ wss.close()
57
+ },
58
+ })
59
+ })
60
+
61
+ wss.on('error', (err) => {
62
+ reject(new Error('Websockify failed to start', { cause: err }))
63
+ })
64
+
65
+ wss.on('connection', (ws) => {
66
+ const tcp = net.createConnection(tcpPort, tcpHost, () => {
67
+ logger.log(`TCP connection established to ${tcpHost}:${tcpPort}`)
68
+ })
69
+
70
+ tcp.on('data', (data) => {
71
+ if (ws.readyState === WebSocket.OPEN) {
72
+ ws.send(data)
73
+ }
74
+ })
75
+
76
+ ws.on('message', (data: Buffer) => {
77
+ if (!tcp.destroyed) {
78
+ tcp.write(data)
79
+ }
80
+ })
81
+
82
+ ws.on('close', () => {
83
+ tcp.destroy()
84
+ })
85
+
86
+ ws.on('error', (err) => {
87
+ logger.error('WebSocket error:', err)
88
+ tcp.destroy()
89
+ })
90
+
91
+ tcp.on('close', () => {
92
+ ws.close()
93
+ })
94
+
95
+ tcp.on('error', (err) => {
96
+ logger.error('TCP connection error:', err)
97
+ ws.close()
98
+ })
99
+ })
100
+ })
101
+ }