kimaki 0.4.76 → 0.4.78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. package/dist/adapter-rest-boundary.test.js +34 -0
  2. package/dist/agent-model.e2e.test.js +2 -20
  3. package/dist/cli.js +50 -13
  4. package/dist/commands/channel-ref.js +16 -0
  5. package/dist/commands/diff.js +20 -85
  6. package/dist/commands/merge-worktree.js +5 -17
  7. package/dist/commands/new-worktree.js +5 -9
  8. package/dist/commands/permissions.js +77 -11
  9. package/dist/commands/resume.js +5 -9
  10. package/dist/commands/screenshare.js +295 -0
  11. package/dist/commands/session.js +6 -17
  12. package/dist/critique-utils.js +95 -0
  13. package/dist/diff-patch-plugin.js +314 -0
  14. package/dist/discord-bot.js +19 -14
  15. package/dist/discord-js-import-boundary.test.js +62 -0
  16. package/dist/discord-utils.js +44 -0
  17. package/dist/event-stream-real-capture.e2e.test.js +2 -20
  18. package/dist/gateway-proxy.e2e.test.js +2 -5
  19. package/dist/generated/cloudflare/browser.js +17 -0
  20. package/dist/generated/cloudflare/client.js +34 -0
  21. package/dist/generated/cloudflare/commonInputTypes.js +10 -0
  22. package/dist/generated/cloudflare/enums.js +48 -0
  23. package/dist/generated/cloudflare/internal/class.js +47 -0
  24. package/dist/generated/cloudflare/internal/prismaNamespace.js +252 -0
  25. package/dist/generated/cloudflare/internal/prismaNamespaceBrowser.js +222 -0
  26. package/dist/generated/cloudflare/internal/query_compiler_fast_bg.js +135 -0
  27. package/dist/generated/cloudflare/models/bot_api_keys.js +1 -0
  28. package/dist/generated/cloudflare/models/bot_tokens.js +1 -0
  29. package/dist/generated/cloudflare/models/channel_agents.js +1 -0
  30. package/dist/generated/cloudflare/models/channel_directories.js +1 -0
  31. package/dist/generated/cloudflare/models/channel_mention_mode.js +1 -0
  32. package/dist/generated/cloudflare/models/channel_models.js +1 -0
  33. package/dist/generated/cloudflare/models/channel_verbosity.js +1 -0
  34. package/dist/generated/cloudflare/models/channel_worktrees.js +1 -0
  35. package/dist/generated/cloudflare/models/forum_sync_configs.js +1 -0
  36. package/dist/generated/cloudflare/models/global_models.js +1 -0
  37. package/dist/generated/cloudflare/models/ipc_requests.js +1 -0
  38. package/dist/generated/cloudflare/models/part_messages.js +1 -0
  39. package/dist/generated/cloudflare/models/scheduled_tasks.js +1 -0
  40. package/dist/generated/cloudflare/models/session_agents.js +1 -0
  41. package/dist/generated/cloudflare/models/session_events.js +1 -0
  42. package/dist/generated/cloudflare/models/session_models.js +1 -0
  43. package/dist/generated/cloudflare/models/session_start_sources.js +1 -0
  44. package/dist/generated/cloudflare/models/thread_sessions.js +1 -0
  45. package/dist/generated/cloudflare/models/thread_worktrees.js +1 -0
  46. package/dist/generated/cloudflare/models.js +1 -0
  47. package/dist/generated/node/browser.js +17 -0
  48. package/dist/generated/node/client.js +37 -0
  49. package/dist/generated/node/commonInputTypes.js +10 -0
  50. package/dist/generated/node/enums.js +48 -0
  51. package/dist/generated/node/internal/class.js +49 -0
  52. package/dist/generated/node/internal/prismaNamespace.js +252 -0
  53. package/dist/generated/node/internal/prismaNamespaceBrowser.js +222 -0
  54. package/dist/generated/node/models/bot_api_keys.js +1 -0
  55. package/dist/generated/node/models/bot_tokens.js +1 -0
  56. package/dist/generated/node/models/channel_agents.js +1 -0
  57. package/dist/generated/node/models/channel_directories.js +1 -0
  58. package/dist/generated/node/models/channel_mention_mode.js +1 -0
  59. package/dist/generated/node/models/channel_models.js +1 -0
  60. package/dist/generated/node/models/channel_verbosity.js +1 -0
  61. package/dist/generated/node/models/channel_worktrees.js +1 -0
  62. package/dist/generated/node/models/forum_sync_configs.js +1 -0
  63. package/dist/generated/node/models/global_models.js +1 -0
  64. package/dist/generated/node/models/ipc_requests.js +1 -0
  65. package/dist/generated/node/models/part_messages.js +1 -0
  66. package/dist/generated/node/models/scheduled_tasks.js +1 -0
  67. package/dist/generated/node/models/session_agents.js +1 -0
  68. package/dist/generated/node/models/session_events.js +1 -0
  69. package/dist/generated/node/models/session_models.js +1 -0
  70. package/dist/generated/node/models/session_start_sources.js +1 -0
  71. package/dist/generated/node/models/thread_sessions.js +1 -0
  72. package/dist/generated/node/models/thread_worktrees.js +1 -0
  73. package/dist/generated/node/models.js +1 -0
  74. package/dist/interaction-handler.js +10 -0
  75. package/dist/kimaki-digital-twin.e2e.test.js +2 -20
  76. package/dist/message-flags-boundary.test.js +54 -0
  77. package/dist/message-formatting.js +3 -62
  78. package/dist/onboarding-tutorial-plugin.js +1 -1
  79. package/dist/opencode-command.js +129 -0
  80. package/dist/opencode-command.test.js +48 -0
  81. package/dist/opencode-interrupt-plugin.js +19 -1
  82. package/dist/opencode-interrupt-plugin.test.js +0 -5
  83. package/dist/opencode-plugin-loading.e2e.test.js +9 -20
  84. package/dist/opencode-plugin.js +4 -4
  85. package/dist/opencode.js +150 -27
  86. package/dist/patch-text-parser.js +97 -0
  87. package/dist/platform/components-v2.js +20 -0
  88. package/dist/platform/discord-adapter.js +1440 -0
  89. package/dist/platform/discord-routes.js +31 -0
  90. package/dist/platform/message-flags.js +8 -0
  91. package/dist/platform/platform-value.js +41 -0
  92. package/dist/platform/slack-adapter.js +872 -0
  93. package/dist/platform/slack-markdown.js +169 -0
  94. package/dist/platform/types.js +4 -0
  95. package/dist/queue-advanced-e2e-setup.js +265 -0
  96. package/dist/queue-advanced-footer.e2e.test.js +173 -0
  97. package/dist/queue-advanced-model-switch.e2e.test.js +299 -0
  98. package/dist/queue-advanced-permissions-typing.e2e.test.js +73 -1
  99. package/dist/runtime-lifecycle.e2e.test.js +2 -20
  100. package/dist/session-handler/event-stream-state.js +5 -0
  101. package/dist/session-handler/event-stream-state.test.js +6 -2
  102. package/dist/session-handler/thread-session-runtime.js +32 -2
  103. package/dist/system-message.js +26 -23
  104. package/dist/test-utils.js +16 -0
  105. package/dist/thread-message-queue.e2e.test.js +2 -20
  106. package/dist/utils.js +3 -1
  107. package/dist/voice-message.e2e.test.js +2 -20
  108. package/dist/voice.js +122 -9
  109. package/dist/voice.test.js +17 -2
  110. package/dist/websockify.js +69 -0
  111. package/dist/worktree-lifecycle.e2e.test.js +308 -0
  112. package/package.json +4 -2
  113. package/skills/critique/SKILL.md +17 -0
  114. package/skills/egaki/SKILL.md +35 -0
  115. package/skills/event-sourcing-state/SKILL.md +252 -0
  116. package/skills/goke/SKILL.md +1 -0
  117. package/skills/npm-package/SKILL.md +21 -2
  118. package/skills/playwriter/SKILL.md +1 -1
  119. package/skills/x-articles/SKILL.md +554 -0
  120. package/src/agent-model.e2e.test.ts +4 -19
  121. package/src/cli.ts +60 -13
  122. package/src/commands/diff.ts +25 -99
  123. package/src/commands/merge-worktree.ts +5 -21
  124. package/src/commands/new-worktree.ts +5 -11
  125. package/src/commands/permissions.ts +100 -15
  126. package/src/commands/resume.ts +5 -12
  127. package/src/commands/screenshare.ts +354 -0
  128. package/src/commands/session.ts +6 -23
  129. package/src/critique-utils.ts +139 -0
  130. package/src/discord-bot.ts +20 -15
  131. package/src/discord-utils.ts +53 -0
  132. package/src/event-stream-real-capture.e2e.test.ts +4 -20
  133. package/src/gateway-proxy.e2e.test.ts +2 -5
  134. package/src/interaction-handler.ts +15 -0
  135. package/src/kimaki-digital-twin.e2e.test.ts +2 -21
  136. package/src/message-formatting.ts +3 -68
  137. package/src/onboarding-tutorial-plugin.ts +1 -1
  138. package/src/opencode-command.test.ts +70 -0
  139. package/src/opencode-command.ts +188 -0
  140. package/src/opencode-interrupt-plugin.test.ts +0 -5
  141. package/src/opencode-interrupt-plugin.ts +34 -1
  142. package/src/opencode-plugin-loading.e2e.test.ts +25 -35
  143. package/src/opencode-plugin.ts +5 -4
  144. package/src/opencode.ts +199 -32
  145. package/src/patch-text-parser.ts +107 -0
  146. package/src/queue-advanced-e2e-setup.ts +273 -0
  147. package/src/queue-advanced-footer.e2e.test.ts +211 -0
  148. package/src/queue-advanced-model-switch.e2e.test.ts +383 -0
  149. package/src/queue-advanced-permissions-typing.e2e.test.ts +92 -0
  150. package/src/runtime-lifecycle.e2e.test.ts +4 -19
  151. package/src/session-handler/event-stream-state.test.ts +6 -2
  152. package/src/session-handler/event-stream-state.ts +5 -0
  153. package/src/session-handler/thread-session-runtime.ts +45 -2
  154. package/src/system-message.ts +26 -23
  155. package/src/test-utils.ts +17 -0
  156. package/src/thread-message-queue.e2e.test.ts +2 -20
  157. package/src/utils.ts +3 -1
  158. package/src/voice-message.e2e.test.ts +3 -20
  159. package/src/voice.test.ts +26 -2
  160. package/src/voice.ts +147 -9
  161. package/src/websockify.ts +101 -0
  162. package/src/worktree-lifecycle.e2e.test.ts +391 -0
@@ -65,7 +65,7 @@ this feature by restarting kimaki with the \`--no-critique\` flag.
65
65
 
66
66
  ### reviewing diffs with AI
67
67
 
68
- \`critique review --web\` generates an AI-powered review of a diff and uploads it as a shareable URL.
68
+ \`bunx critique review --web\` generates an AI-powered review of a diff and uploads it as a shareable URL.
69
69
  It spawns a separate opencode session that analyzes the diff, groups related changes, and produces
70
70
  a structured review with explanations, diagrams, and suggestions. This is useful when the user
71
71
  asks you to explain or review a diff — the output is much richer than a plain diff URL.
@@ -106,7 +106,7 @@ The command prints a preview URL when done — share that URL with the user.
106
106
  const KIMAKI_TUNNEL_INSTRUCTIONS = `
107
107
  ## running dev servers with tunnel access
108
108
 
109
- ALWAYS use \`kimaki tunnel\` when starting any dev server. NEVER run \`pnpm dev\`, \`npm run dev\`, or any dev server command without wrapping it in \`kimaki tunnel\`. The user is on Discord, not at the terminal — localhost URLs are useless to them. They need a tunnel URL to access the site.
109
+ ALWAYS use \`kimaki tunnel\` when starting any dev server. NEVER run \`pnpm dev\`, \`npm run dev\`, or any dev server command without wrapping it in \`kimaki tunnel\`. Always invoke Kimaki directly as \`kimaki\`, never via \`npx\` or \`bunx\`. The user is on Discord, not at the terminal — localhost URLs are useless to them. They need a tunnel URL to access the site.
110
110
 
111
111
  Use \`tmux\` to run the tunnel + dev server combo in the background so it persists across commands.
112
112
 
@@ -131,7 +131,7 @@ Use random tunnel IDs by default. Only pass \`-t\` when exposing a service that
131
131
  tmux new-session -d -s myapp-dev
132
132
 
133
133
  # Run the dev server with kimaki tunnel inside the session
134
- tmux send-keys -t myapp-dev "npx kimaki tunnel -p 3000 -- pnpm dev" Enter
134
+ tmux send-keys -t myapp-dev "kimaki tunnel -p 3000 -- pnpm dev" Enter
135
135
  \`\`\`
136
136
 
137
137
  ### getting the tunnel URL
@@ -146,15 +146,15 @@ tmux capture-pane -t myapp-dev -p | grep -i "tunnel"
146
146
  \`\`\`bash
147
147
  # Next.js project
148
148
  tmux new-session -d -s projectname-nextjs-dev-3000
149
- tmux send-keys -t nextjs-dev "npx kimaki tunnel -p 3000 -- pnpm dev" Enter
149
+ tmux send-keys -t nextjs-dev "kimaki tunnel -p 3000 -- pnpm dev" Enter
150
150
 
151
151
  # Vite project on port 5173
152
152
  tmux new-session -d -s vite-dev-5173
153
- tmux send-keys -t vite-dev "npx kimaki tunnel -p 5173 -- pnpm dev" Enter
153
+ tmux send-keys -t vite-dev "kimaki tunnel -p 5173 -- pnpm dev" Enter
154
154
 
155
155
  # Custom tunnel ID (only for intentionally public-safe services)
156
156
  tmux new-session -d -s holocron-dev
157
- tmux send-keys -t holocron-dev "npx kimaki tunnel -p 3000 -t holocron -- pnpm dev" Enter
157
+ tmux send-keys -t holocron-dev "kimaki tunnel -p 3000 -t holocron -- pnpm dev" Enter
158
158
  \`\`\`
159
159
 
160
160
  ### stopping the dev server
@@ -217,7 +217,7 @@ If there are internal kimaki issues (sessions not responding, bot errors, unexpe
217
217
 
218
218
  To upload files to the Discord thread (images, screenshots, long files that would clutter the chat), run:
219
219
 
220
- npx -y kimaki upload-to-discord --session ${sessionId} <file1> [file2] ...
220
+ kimaki upload-to-discord --session ${sessionId} <file1> [file2] ...
221
221
 
222
222
  ## requesting files from the user
223
223
 
@@ -227,7 +227,7 @@ To ask the user to upload files from their device, use the \`kimaki_file_upload\
227
227
 
228
228
  To archive the current Discord thread (hide it from sidebar) and stop the session, run:
229
229
 
230
- npx -y kimaki session archive --session ${sessionId}
230
+ kimaki session archive --session ${sessionId}
231
231
 
232
232
  Only do this when the user explicitly asks to close or archive the thread, and only after your final message.
233
233
 
@@ -235,7 +235,7 @@ Only do this when the user explicitly asks to close or archive the thread, and o
235
235
 
236
236
  To search for Discord users in a guild (needed for mentions like <@userId>), run:
237
237
 
238
- npx -y kimaki user list --guild ${guildId || '<guildId>'} --query "username"
238
+ kimaki user list --guild ${guildId || '<guildId>'} --query "username"
239
239
 
240
240
  This returns user IDs you can use for Discord mentions.
241
241
  ${channelId
@@ -244,7 +244,7 @@ ${channelId
244
244
 
245
245
  To start a new thread/session in this channel pro-grammatically, run:
246
246
 
247
- npx -y kimaki send --channel ${channelId} --prompt "your prompt here"${agentFlag}${username ? ` --user "${username}"` : ''}
247
+ kimaki send --channel ${channelId} --prompt "your prompt here"${agentFlag}${username ? ` --user "${username}"` : ''}
248
248
 
249
249
  You can use this to "spawn" parallel helper sessions like teammates: start new threads with focused prompts, then come back and collect the results.
250
250
 
@@ -252,23 +252,23 @@ IMPORTANT: NEVER use \`--worktree\` unless the user explicitly asks for a worktr
252
252
 
253
253
  To send a prompt to an existing thread instead of creating a new one:
254
254
 
255
- npx -y kimaki send --thread <thread_id> --prompt "follow-up prompt"
255
+ kimaki send --thread <thread_id> --prompt "follow-up prompt"
256
256
 
257
257
  Use this when you already have the Discord thread ID.
258
258
 
259
259
  To send to the thread associated with a known session:
260
260
 
261
- npx -y kimaki send --session <session_id> --prompt "follow-up prompt"
261
+ kimaki send --session <session_id> --prompt "follow-up prompt"
262
262
 
263
263
  Use this when you have the OpenCode session ID.
264
264
 
265
265
  Use --notify-only to create a notification thread without starting an AI session:
266
266
 
267
- npx -y kimaki send --channel ${channelId} --prompt "User cancelled subscription" --notify-only
267
+ kimaki send --channel ${channelId} --prompt "User cancelled subscription" --notify-only
268
268
 
269
269
  Use --worktree to create a git worktree for the session (ONLY when the user explicitly asks for a worktree):
270
270
 
271
- npx -y kimaki send --channel ${channelId} --prompt "Add dark mode support" --worktree dark-mode${agentFlag}${username ? ` --user "${username}"` : ''}
271
+ kimaki send --channel ${channelId} --prompt "Add dark mode support" --worktree dark-mode${agentFlag}${username ? ` --user "${username}"` : ''}
272
272
 
273
273
  Important:
274
274
  - NEVER use \`--worktree\` unless the user explicitly requests a worktree. Most tasks should use normal threads without worktrees.
@@ -278,18 +278,21 @@ Important:
278
278
 
279
279
  Use --agent to specify which agent to use for the session:
280
280
 
281
- npx -y kimaki send --channel ${channelId} --prompt "Plan the refactor of the auth module" --agent plan${username ? ` --user "${username}"` : ''}
281
+ kimaki send --channel ${channelId} --prompt "Plan the refactor of the auth module" --agent plan${username ? ` --user "${username}"` : ''}
282
282
  ${agents && agents.length > 0 ? `
283
283
  Available agents:
284
284
  ${agents.map((a) => { return `- \`${a.name}\`${a.name === currentAgent ? ' (current)' : ''}${a.description ? `: ${a.description}` : ''}`; }).join('\n')}
285
285
  ` : ''}
286
+ ## switching agents in the current session
287
+
288
+ The user can switch the active agent mid-session using the Discord slash command \`/<agentname>-agent\`. For example if you are in plan mode and the user asks you to edit files, tell them to run \`/build-agent\` to switch to the build agent first.
286
289
 
287
290
  ## scheduled sends and task management
288
291
 
289
292
  Use \`--send-at\` to schedule a one-time or recurring task:
290
293
 
291
- npx -y kimaki send --channel ${channelId} --prompt "Reminder: review open PRs" --send-at "2026-03-01T09:00:00Z"
292
- npx -y kimaki send --channel ${channelId} --prompt "Run weekly test suite and summarize failures" --send-at "0 9 * * 1"
294
+ kimaki send --channel ${channelId} --prompt "Reminder: review open PRs" --send-at "2026-03-01T09:00:00Z"
295
+ kimaki send --channel ${channelId} --prompt "Run weekly test suite and summarize failures" --send-at "0 9 * * 1"
293
296
 
294
297
  When using a date for \`--send-at\`, it must be UTC in ISO format ending with \`Z\`.
295
298
 
@@ -321,13 +324,13 @@ kimaki task delete <id>
321
324
 
322
325
  Use case patterns:
323
326
  - Reminder flows: create deadline reminders in this channel with one-time \`--send-at\`; mention only if action is required.
324
- - Proactive reminders: when you encounter time-sensitive information during your work (e.g. creating an API key that expires in 90 days, a certificate with an expiration date, a trial period ending, a deadline mentioned in code comments), proactively schedule a \`--notify-only\` reminder before the expiration so the user gets notified in time. For example, if you generate an API key expiring on 2026-06-01, schedule a reminder a few days before: \`npx -y kimaki send --channel ${channelId} --prompt "Reminder: <@${userId || 'USER_ID'}> the API key created on 2026-03-01 expires on 2026-06-01. Renew it before it breaks production." --send-at "2026-05-28T09:00:00Z" --notify-only\`. Always tell the user you scheduled the reminder so they know.
327
+ - Proactive reminders: when you encounter time-sensitive information during your work (e.g. creating an API key that expires in 90 days, a certificate with an expiration date, a trial period ending, a deadline mentioned in code comments), proactively schedule a \`--notify-only\` reminder before the expiration so the user gets notified in time. For example, if you generate an API key expiring on 2026-06-01, schedule a reminder a few days before: \`kimaki send --channel ${channelId} --prompt "Reminder: <@${userId || 'USER_ID'}> the API key created on 2026-03-01 expires on 2026-06-01. Renew it before it breaks production." --send-at "2026-05-28T09:00:00Z" --notify-only\`. Always tell the user you scheduled the reminder so they know.
325
328
  - Weekly QA: schedule "run full test suite, inspect failures, post summary, and mention ${username ? `@${username}` : '@username'} only when failures require review".
326
329
  - Weekly benchmark automation: schedule a benchmark prompt that runs model evals, writes JSON outputs in the repo, commits results, and mentions only for regressions.
327
330
  - Recurring maintenance: use cron \`--send-at\` for repetitive tasks like rotating secrets, checking dependency updates, running security audits, or cleaning up stale branches. Example: \`--send-at "0 9 1 * *"\` to run on the 1st of every month.
328
331
  - Thread reminders: when the user says "remind me about this in 2 hours" (or any duration), use \`--send-at\` with \`--thread\` to resurface the current thread. Compute the future UTC time and send a mention so Discord shows a notification:
329
332
 
330
- npx -y kimaki send --session ${sessionId} --prompt "Reminder: <@${userId || 'USER_ID'}> you asked to be reminded about this thread." --send-at "<future_UTC_time>" --notify-only
333
+ kimaki send --session ${sessionId} --prompt "Reminder: <@${userId || 'USER_ID'}> you asked to be reminded about this thread." --send-at "<future_UTC_time>" --notify-only
331
334
 
332
335
  Replace \`<future_UTC_time>\` with the computed UTC ISO timestamp. The \`--notify-only\` flag creates just a notification message without starting a new AI session. The \`<@userId>\` mention ensures the user gets a Discord notification.
333
336
 
@@ -342,7 +345,7 @@ ONLY create worktrees when the user explicitly asks for one. Never proactively u
342
345
  When the user asks to "create a worktree" or "make a worktree", they mean you should use the kimaki CLI to create it. Do NOT use raw \`git worktree add\` commands. Instead use:
343
346
 
344
347
  \`\`\`bash
345
- npx -y kimaki send --channel ${channelId} --prompt "your task description" --worktree worktree-name${agentFlag}${username ? ` --user "${username}"` : ''}
348
+ kimaki send --channel ${channelId} --prompt "your task description" --worktree worktree-name${agentFlag}${username ? ` --user "${username}"` : ''}
346
349
  \`\`\`
347
350
 
348
351
  This creates a new Discord thread with an isolated git worktree and starts a session in it. The worktree name should be kebab-case and descriptive of the task.
@@ -360,7 +363,7 @@ This is useful for automation (cron jobs, GitHub webhooks, n8n, etc.)
360
363
  When you are approaching the **context window limit** or the user explicitly asks to **handoff to a new thread**, use the \`kimaki send\` command to start a fresh session with context:
361
364
 
362
365
  \`\`\`bash
363
- npx -y kimaki send --channel ${channelId} --prompt "Continuing from previous session: <summary of current task and state>"${agentFlag}${username ? ` --user "${username}"` : ''}
366
+ kimaki send --channel ${channelId} --prompt "Continuing from previous session: <summary of current task and state>"${agentFlag}${username ? ` --user "${username}"` : ''}
364
367
  \`\`\`
365
368
 
366
369
  The command automatically handles long prompts (over 2000 chars) by sending them as file attachments.
@@ -444,10 +447,10 @@ If your Bash tool timeout triggers anyway, fall back to reading the session outp
444
447
 
445
448
  \`\`\`bash
446
449
  # Start a session and wait for it to finish
447
- npx -y kimaki send --channel <channel_id> --prompt "Fix the auth bug" --wait
450
+ kimaki send --channel <channel_id> --prompt "Fix the auth bug" --wait
448
451
 
449
452
  # Send to an existing thread and wait
450
- npx -y kimaki send --thread <thread_id> --prompt "Run the tests" --wait
453
+ kimaki send --thread <thread_id> --prompt "Run the tests" --wait
451
454
  \`\`\`
452
455
 
453
456
  The command exits with the session markdown on stdout once the model finishes responding.
@@ -6,6 +6,22 @@
6
6
  // Prefers using the existing opencode client (already running server) to avoid
7
7
  // spawning a new server process during teardown. Falls back to initializing
8
8
  // a new server only if no existing client is available.
9
+ /**
10
+ * Deterministic port from a string key (channel ID, test file name, etc.).
11
+ * Uses a hash to pick a stable port in range 53000-54999, avoiding overlap
12
+ * with queue-advanced tests (51000-52999) and getLockPort (30000-39999).
13
+ * Replaces the old TOCTOU-prone pattern of binding port 0, reading the
14
+ * assigned port, closing, then rebinding — which races under parallel vitest.
15
+ */
16
+ export function chooseLockPort({ key }) {
17
+ let hash = 0;
18
+ for (let i = 0; i < key.length; i++) {
19
+ const char = key.charCodeAt(i);
20
+ hash = (hash << 5) - hash + char;
21
+ hash |= 0;
22
+ }
23
+ return 53_000 + (Math.abs(hash) % 2_000);
24
+ }
9
25
  import { getOpencodeClient, initializeOpencodeForDirectory, } from './opencode.js';
10
26
  import { getThreadState, } from './session-handler/thread-runtime-state.js';
11
27
  /**
@@ -9,7 +9,6 @@
9
9
  // If total duration of a file exceeds ~10s, split into a new test file
10
10
  // so vitest can parallelize across files.
11
11
  import fs from 'node:fs';
12
- import net from 'node:net';
13
12
  import path from 'node:path';
14
13
  import url from 'node:url';
15
14
  import { describe, beforeAll, afterAll, test, expect } from 'vitest';
@@ -22,7 +21,7 @@ import { startDiscordBot } from './discord-bot.js';
22
21
  import { setBotToken, initDatabase, closeDatabase, setChannelDirectory, setChannelVerbosity, getChannelVerbosity, } from './database.js';
23
22
  import { startHranaServer, stopHranaServer } from './hrana-server.js';
24
23
  import { initializeOpencodeForDirectory, stopOpencodeServer } from './opencode.js';
25
- import { cleanupTestSessions, waitForFooterMessage, waitForBotMessageContaining, waitForMessageById, waitForBotMessageCount, waitForBotReplyAfterUserMessage, waitForThreadState, } from './test-utils.js';
24
+ import { chooseLockPort, cleanupTestSessions, waitForFooterMessage, waitForBotMessageContaining, waitForMessageById, waitForBotMessageCount, waitForBotReplyAfterUserMessage, waitForThreadState, } from './test-utils.js';
26
25
  const e2eTest = describe;
27
26
  function createRunDirectories() {
28
27
  const root = path.resolve(process.cwd(), 'tmp', 'thread-queue-e2e');
@@ -32,23 +31,6 @@ function createRunDirectories() {
32
31
  fs.mkdirSync(projectDirectory, { recursive: true });
33
32
  return { root, dataDir, projectDirectory };
34
33
  }
35
- function chooseLockPort() {
36
- return new Promise((resolve, reject) => {
37
- const server = net.createServer();
38
- server.listen(0, () => {
39
- const address = server.address();
40
- if (!address || typeof address === 'string') {
41
- server.close();
42
- reject(new Error('Failed to resolve lock port'));
43
- return;
44
- }
45
- const port = address.port;
46
- server.close(() => {
47
- resolve(port);
48
- });
49
- });
50
- });
51
- }
52
34
  function createDiscordJsClient({ restUrl }) {
53
35
  return new Client({
54
36
  intents: [
@@ -233,7 +215,7 @@ e2eTest('thread message queue ordering', () => {
233
215
  beforeAll(async () => {
234
216
  testStartTime = Date.now();
235
217
  directories = createRunDirectories();
236
- const lockPort = await chooseLockPort();
218
+ const lockPort = chooseLockPort({ key: TEXT_CHANNEL_ID });
237
219
  process.env['KIMAKI_LOCK_PORT'] = String(lockPort);
238
220
  setDataDir(directories.dataDir);
239
221
  previousDefaultVerbosity = store.getState().defaultVerbosity;
package/dist/utils.js CHANGED
@@ -58,11 +58,13 @@ export function generateDiscordInstallUrlForBot({ appId, mode, clientId, clientS
58
58
  // This initiates the better-auth OAuth flow with clientId/clientSecret
59
59
  // as additionalData, which better-auth stores in its verification table
60
60
  // and recovers after Discord redirects back to the callback.
61
+ // Use a kimaki-specific callback field name to avoid ambiguity with
62
+ // better-auth's own callbackURL state field.
61
63
  const url = new URL(`${KIMAKI_WEBSITE_URL}/discord-install`);
62
64
  url.searchParams.set('clientId', clientId);
63
65
  url.searchParams.set('clientSecret', clientSecret);
64
66
  if (gatewayCallbackUrl) {
65
- url.searchParams.set('callbackUrl', gatewayCallbackUrl);
67
+ url.searchParams.set('kimakiCallbackUrl', gatewayCallbackUrl);
66
68
  }
67
69
  return url.toString();
68
70
  }
@@ -8,7 +8,6 @@
8
8
  // Tests assert on both Discord messages (via digital twin) and session state
9
9
  // transitions (via getThreadState from the zustand store).
10
10
  import fs from 'node:fs';
11
- import net from 'node:net';
12
11
  import path from 'node:path';
13
12
  import url from 'node:url';
14
13
  import { describe, beforeAll, afterAll, beforeEach, test, expect } from 'vitest';
@@ -21,7 +20,7 @@ import { startDiscordBot } from './discord-bot.js';
21
20
  import { setBotToken, initDatabase, closeDatabase, setChannelDirectory, setChannelVerbosity, } from './database.js';
22
21
  import { startHranaServer, stopHranaServer } from './hrana-server.js';
23
22
  import { initializeOpencodeForDirectory, getOpencodeClient, stopOpencodeServer } from './opencode.js';
24
- import { cleanupTestSessions, waitForFooterMessage, waitForBotMessageContaining, waitForThreadState, } from './test-utils.js';
23
+ import { chooseLockPort, cleanupTestSessions, waitForFooterMessage, waitForBotMessageContaining, waitForThreadState, } from './test-utils.js';
25
24
  import { getThreadState } from './session-handler/thread-runtime-state.js';
26
25
  const e2eTest = describe;
27
26
  // ── Helpers ──────────────────────────────────────────────────────
@@ -33,23 +32,6 @@ function createRunDirectories() {
33
32
  fs.mkdirSync(projectDirectory, { recursive: true });
34
33
  return { root, dataDir, projectDirectory };
35
34
  }
36
- function chooseLockPort() {
37
- return new Promise((resolve, reject) => {
38
- const server = net.createServer();
39
- server.listen(0, () => {
40
- const address = server.address();
41
- if (!address || typeof address === 'string') {
42
- server.close();
43
- reject(new Error('Failed to resolve lock port'));
44
- return;
45
- }
46
- const port = address.port;
47
- server.close(() => {
48
- resolve(port);
49
- });
50
- });
51
- });
52
- }
53
35
  function createDiscordJsClient({ restUrl }) {
54
36
  return new Client({
55
37
  intents: [
@@ -245,7 +227,7 @@ e2eTest('voice message handling', () => {
245
227
  beforeAll(async () => {
246
228
  testStartTime = Date.now();
247
229
  directories = createRunDirectories();
248
- const lockPort = await chooseLockPort();
230
+ const lockPort = chooseLockPort({ key: TEXT_CHANNEL_ID });
249
231
  process.env['KIMAKI_LOCK_PORT'] = String(lockPort);
250
232
  setDataDir(directories.dataDir);
251
233
  previousDefaultVerbosity = store.getState().defaultVerbosity;
package/dist/voice.js CHANGED
@@ -21,7 +21,36 @@ const OPENAI_SUPPORTED_AUDIO_TYPES = new Set([
21
21
  'audio/mpeg',
22
22
  'audio/mp3',
23
23
  'audio/wav',
24
+ 'audio/x-wav',
24
25
  ]);
26
+ const OGG_AUDIO_TYPES = new Set([
27
+ 'audio/ogg',
28
+ 'audio/opus',
29
+ ]);
30
+ const M4A_AUDIO_TYPES = new Set([
31
+ 'audio/mp4',
32
+ 'audio/m4a',
33
+ 'audio/x-m4a',
34
+ ]);
35
+ export function normalizeAudioMediaType(mediaType) {
36
+ const normalized = mediaType.trim().toLowerCase();
37
+ if (normalized === 'audio/x-m4a' || normalized === 'audio/m4a') {
38
+ return 'audio/mp4';
39
+ }
40
+ return normalized;
41
+ }
42
+ export function getOpenAIAudioConversionStrategy(mediaType) {
43
+ if (OPENAI_SUPPORTED_AUDIO_TYPES.has(mediaType)) {
44
+ return 'none';
45
+ }
46
+ if (OGG_AUDIO_TYPES.has(mediaType)) {
47
+ return 'convert-ogg-to-wav';
48
+ }
49
+ if (M4A_AUDIO_TYPES.has(mediaType)) {
50
+ return 'convert-m4a-to-wav';
51
+ }
52
+ return 'unsupported';
53
+ }
25
54
  /**
26
55
  * Convert OGG Opus audio to WAV using prism-media (already installed for Discord voice).
27
56
  * Pipeline: OGG buffer → OggDemuxer → Opus Decoder → PCM → WAV (with header).
@@ -64,6 +93,74 @@ export function convertOggToWav(input) {
64
93
  Readable.from(input).pipe(demuxer).pipe(decoder);
65
94
  });
66
95
  }
96
+ /**
97
+ * Convert M4A/MP4 audio to WAV using prism-media FFmpeg wrapper.
98
+ * This depends on an ffmpeg binary available in PATH.
99
+ */
100
+ export function convertM4aToWav(input) {
101
+ return new Promise((resolve) => {
102
+ const pcmChunks = [];
103
+ const transcoder = new prism.FFmpeg({
104
+ args: [
105
+ '-analyzeduration',
106
+ '0',
107
+ '-loglevel',
108
+ '0',
109
+ '-f',
110
+ 'mp4',
111
+ '-i',
112
+ 'pipe:0',
113
+ '-f',
114
+ 's16le',
115
+ '-acodec',
116
+ 'pcm_s16le',
117
+ '-ac',
118
+ '1',
119
+ '-ar',
120
+ '48000',
121
+ 'pipe:1',
122
+ ],
123
+ });
124
+ transcoder.on('data', (chunk) => {
125
+ pcmChunks.push(chunk);
126
+ });
127
+ transcoder.on('end', () => {
128
+ const pcmData = Buffer.concat(pcmChunks);
129
+ if (pcmData.length === 0) {
130
+ resolve(new TranscriptionError({
131
+ reason: 'FFmpeg conversion produced empty audio output',
132
+ }));
133
+ return;
134
+ }
135
+ const wavHeader = createWavHeader({
136
+ dataLength: pcmData.length,
137
+ sampleRate: 48000,
138
+ numChannels: 1,
139
+ bitsPerSample: 16,
140
+ });
141
+ resolve(Buffer.concat([wavHeader, pcmData]));
142
+ });
143
+ transcoder.on('error', (err) => {
144
+ const lower = err.message.toLowerCase();
145
+ const isMissingFfmpeg = lower.includes('ffmpeg') &&
146
+ (lower.includes('not found') ||
147
+ lower.includes('enoent') ||
148
+ lower.includes('spawn'));
149
+ if (isMissingFfmpeg) {
150
+ resolve(new TranscriptionError({
151
+ reason: 'M4A transcription with OpenAI requires ffmpeg to be installed and available in PATH',
152
+ cause: err,
153
+ }));
154
+ return;
155
+ }
156
+ resolve(new TranscriptionError({
157
+ reason: `M4A decode failed: ${err.message}`,
158
+ cause: err,
159
+ }));
160
+ });
161
+ Readable.from(input).pipe(transcoder);
162
+ });
163
+ }
67
164
  function createWavHeader({ dataLength, sampleRate, numChannels, bitsPerSample, }) {
68
165
  const byteRate = (sampleRate * numChannels * bitsPerSample) / 8;
69
166
  const blockAlign = (numChannels * bitsPerSample) / 8;
@@ -225,17 +322,33 @@ export async function transcribeAudio({ audio, prompt, language, temperature, ap
225
322
  if (audioBuffer.length === 0) {
226
323
  return new InvalidAudioFormatError();
227
324
  }
228
- let mediaType = mediaTypeParam || 'audio/mpeg';
325
+ let mediaType = normalizeAudioMediaType(mediaTypeParam || 'audio/mpeg');
229
326
  let finalAudioBase64 = audioBuffer.toString('base64');
230
- // OpenAI input_audio only supports mp3/wav. Convert OGG Opus (Discord voice) to WAV.
231
- if (resolvedProvider === 'openai' && !OPENAI_SUPPORTED_AUDIO_TYPES.has(mediaType)) {
232
- voiceLogger.log(`Converting ${mediaType} to WAV for OpenAI compatibility`);
233
- const converted = await convertOggToWav(audioBuffer);
234
- if (converted instanceof Error) {
235
- return converted;
327
+ // OpenAI input_audio supports only a subset of audio formats.
328
+ // Convert based on MIME so OGG conversion runs only for real OGG/Opus inputs.
329
+ if (resolvedProvider === 'openai') {
330
+ const conversionStrategy = getOpenAIAudioConversionStrategy(mediaType);
331
+ if (conversionStrategy === 'convert-ogg-to-wav') {
332
+ voiceLogger.log(`Converting ${mediaType} to WAV for OpenAI compatibility`);
333
+ const converted = await convertOggToWav(audioBuffer);
334
+ if (converted instanceof Error) {
335
+ return converted;
336
+ }
337
+ finalAudioBase64 = converted.toString('base64');
338
+ mediaType = 'audio/wav';
339
+ }
340
+ else if (conversionStrategy === 'convert-m4a-to-wav') {
341
+ voiceLogger.log(`Converting ${mediaType} to WAV for OpenAI compatibility`);
342
+ const converted = await convertM4aToWav(audioBuffer);
343
+ if (converted instanceof Error) {
344
+ return converted;
345
+ }
346
+ finalAudioBase64 = converted.toString('base64');
347
+ mediaType = 'audio/wav';
348
+ }
349
+ else if (conversionStrategy === 'unsupported') {
350
+ return new InvalidAudioFormatError();
236
351
  }
237
- finalAudioBase64 = converted.toString('base64');
238
- mediaType = 'audio/wav';
239
352
  }
240
353
  const languageHint = language ? `The audio is in ${language}.\n\n` : '';
241
354
  // build session context section
@@ -3,8 +3,23 @@
3
3
  import { describe, test, expect } from 'vitest';
4
4
  import fs from 'node:fs';
5
5
  import path from 'node:path';
6
- import { transcribeAudio, convertOggToWav } from './voice.js';
7
- import { extractTranscription } from './voice.js';
6
+ import { transcribeAudio, convertOggToWav, extractTranscription, normalizeAudioMediaType, getOpenAIAudioConversionStrategy, } from './voice.js';
7
+ describe('audio media type routing', () => {
8
+ test('normalizes m4a aliases to audio/mp4', () => {
9
+ expect(normalizeAudioMediaType('audio/x-m4a')).toMatchInlineSnapshot('"audio/mp4"');
10
+ expect(normalizeAudioMediaType('audio/m4a')).toMatchInlineSnapshot('"audio/mp4"');
11
+ });
12
+ test('keeps non-m4a media types unchanged', () => {
13
+ expect(normalizeAudioMediaType('audio/ogg')).toMatchInlineSnapshot('"audio/ogg"');
14
+ expect(normalizeAudioMediaType('audio/wav')).toMatchInlineSnapshot('"audio/wav"');
15
+ });
16
+ test('converts ogg only when mime is actual ogg/opus', () => {
17
+ expect(getOpenAIAudioConversionStrategy('audio/ogg')).toMatchInlineSnapshot('"convert-ogg-to-wav"');
18
+ expect(getOpenAIAudioConversionStrategy('audio/opus')).toMatchInlineSnapshot('"convert-ogg-to-wav"');
19
+ expect(getOpenAIAudioConversionStrategy('audio/mp4')).toMatchInlineSnapshot('"convert-m4a-to-wav"');
20
+ expect(getOpenAIAudioConversionStrategy('audio/mpeg')).toMatchInlineSnapshot('"none"');
21
+ });
22
+ });
8
23
  describe('extractTranscription', () => {
9
24
  test('extracts transcription from tool call', () => {
10
25
  const result = extractTranscription([
@@ -0,0 +1,69 @@
1
+ // In-process WebSocket-to-TCP bridge (websockify replacement).
2
+ // Accepts WebSocket connections and pipes raw bytes to/from a TCP target.
3
+ // Used by /screenshare to bridge noVNC (WebSocket) to a VNC server (TCP).
4
+ // Supports the 'binary' subprotocol required by noVNC.
5
+ import { WebSocketServer, WebSocket } from 'ws';
6
+ import net from 'node:net';
7
+ import { createLogger } from './logger.js';
8
+ const logger = createLogger('SCREEN');
9
+ export function startWebsockify({ wsPort, tcpHost, tcpPort, }) {
10
+ return new Promise((resolve, reject) => {
11
+ const wss = new WebSocketServer({
12
+ port: wsPort,
13
+ // noVNC negotiates the 'binary' subprotocol
14
+ handleProtocols: (protocols) => {
15
+ if (protocols.has('binary')) {
16
+ return 'binary';
17
+ }
18
+ return false;
19
+ },
20
+ });
21
+ wss.on('listening', () => {
22
+ const addr = wss.address();
23
+ const port = typeof addr === 'object' && addr ? addr.port : wsPort;
24
+ logger.log(`Websockify listening on port ${port} → ${tcpHost}:${tcpPort}`);
25
+ resolve({
26
+ wss,
27
+ port,
28
+ close: () => {
29
+ for (const client of wss.clients) {
30
+ client.close();
31
+ }
32
+ wss.close();
33
+ },
34
+ });
35
+ });
36
+ wss.on('error', (err) => {
37
+ reject(new Error('Websockify failed to start', { cause: err }));
38
+ });
39
+ wss.on('connection', (ws) => {
40
+ const tcp = net.createConnection(tcpPort, tcpHost, () => {
41
+ logger.log(`TCP connection established to ${tcpHost}:${tcpPort}`);
42
+ });
43
+ tcp.on('data', (data) => {
44
+ if (ws.readyState === WebSocket.OPEN) {
45
+ ws.send(data);
46
+ }
47
+ });
48
+ ws.on('message', (data) => {
49
+ if (!tcp.destroyed) {
50
+ tcp.write(data);
51
+ }
52
+ });
53
+ ws.on('close', () => {
54
+ tcp.destroy();
55
+ });
56
+ ws.on('error', (err) => {
57
+ logger.error('WebSocket error:', err);
58
+ tcp.destroy();
59
+ });
60
+ tcp.on('close', () => {
61
+ ws.close();
62
+ });
63
+ tcp.on('error', (err) => {
64
+ logger.error('TCP connection error:', err);
65
+ ws.close();
66
+ });
67
+ });
68
+ });
69
+ }