npm - kimaki - Versions diffs - 0.4.76 → 0.4.78 - Mend

kimaki 0.4.76 → 0.4.78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (162) hide show

package/dist/adapter-rest-boundary.test.js +34 -0
package/dist/agent-model.e2e.test.js +2 -20
package/dist/cli.js +50 -13
package/dist/commands/channel-ref.js +16 -0
package/dist/commands/diff.js +20 -85
package/dist/commands/merge-worktree.js +5 -17
package/dist/commands/new-worktree.js +5 -9
package/dist/commands/permissions.js +77 -11
package/dist/commands/resume.js +5 -9
package/dist/commands/screenshare.js +295 -0
package/dist/commands/session.js +6 -17
package/dist/critique-utils.js +95 -0
package/dist/diff-patch-plugin.js +314 -0
package/dist/discord-bot.js +19 -14
package/dist/discord-js-import-boundary.test.js +62 -0
package/dist/discord-utils.js +44 -0
package/dist/event-stream-real-capture.e2e.test.js +2 -20
package/dist/gateway-proxy.e2e.test.js +2 -5
package/dist/generated/cloudflare/browser.js +17 -0
package/dist/generated/cloudflare/client.js +34 -0
package/dist/generated/cloudflare/commonInputTypes.js +10 -0
package/dist/generated/cloudflare/enums.js +48 -0
package/dist/generated/cloudflare/internal/class.js +47 -0
package/dist/generated/cloudflare/internal/prismaNamespace.js +252 -0
package/dist/generated/cloudflare/internal/prismaNamespaceBrowser.js +222 -0
package/dist/generated/cloudflare/internal/query_compiler_fast_bg.js +135 -0
package/dist/generated/cloudflare/models/bot_api_keys.js +1 -0
package/dist/generated/cloudflare/models/bot_tokens.js +1 -0
package/dist/generated/cloudflare/models/channel_agents.js +1 -0
package/dist/generated/cloudflare/models/channel_directories.js +1 -0
package/dist/generated/cloudflare/models/channel_mention_mode.js +1 -0
package/dist/generated/cloudflare/models/channel_models.js +1 -0
package/dist/generated/cloudflare/models/channel_verbosity.js +1 -0
package/dist/generated/cloudflare/models/channel_worktrees.js +1 -0
package/dist/generated/cloudflare/models/forum_sync_configs.js +1 -0
package/dist/generated/cloudflare/models/global_models.js +1 -0
package/dist/generated/cloudflare/models/ipc_requests.js +1 -0
package/dist/generated/cloudflare/models/part_messages.js +1 -0
package/dist/generated/cloudflare/models/scheduled_tasks.js +1 -0
package/dist/generated/cloudflare/models/session_agents.js +1 -0
package/dist/generated/cloudflare/models/session_events.js +1 -0
package/dist/generated/cloudflare/models/session_models.js +1 -0
package/dist/generated/cloudflare/models/session_start_sources.js +1 -0
package/dist/generated/cloudflare/models/thread_sessions.js +1 -0
package/dist/generated/cloudflare/models/thread_worktrees.js +1 -0
package/dist/generated/cloudflare/models.js +1 -0
package/dist/generated/node/browser.js +17 -0
package/dist/generated/node/client.js +37 -0
package/dist/generated/node/commonInputTypes.js +10 -0
package/dist/generated/node/enums.js +48 -0
package/dist/generated/node/internal/class.js +49 -0
package/dist/generated/node/internal/prismaNamespace.js +252 -0
package/dist/generated/node/internal/prismaNamespaceBrowser.js +222 -0
package/dist/generated/node/models/bot_api_keys.js +1 -0
package/dist/generated/node/models/bot_tokens.js +1 -0
package/dist/generated/node/models/channel_agents.js +1 -0
package/dist/generated/node/models/channel_directories.js +1 -0
package/dist/generated/node/models/channel_mention_mode.js +1 -0
package/dist/generated/node/models/channel_models.js +1 -0
package/dist/generated/node/models/channel_verbosity.js +1 -0
package/dist/generated/node/models/channel_worktrees.js +1 -0
package/dist/generated/node/models/forum_sync_configs.js +1 -0
package/dist/generated/node/models/global_models.js +1 -0
package/dist/generated/node/models/ipc_requests.js +1 -0
package/dist/generated/node/models/part_messages.js +1 -0
package/dist/generated/node/models/scheduled_tasks.js +1 -0
package/dist/generated/node/models/session_agents.js +1 -0
package/dist/generated/node/models/session_events.js +1 -0
package/dist/generated/node/models/session_models.js +1 -0
package/dist/generated/node/models/session_start_sources.js +1 -0
package/dist/generated/node/models/thread_sessions.js +1 -0
package/dist/generated/node/models/thread_worktrees.js +1 -0
package/dist/generated/node/models.js +1 -0
package/dist/interaction-handler.js +10 -0
package/dist/kimaki-digital-twin.e2e.test.js +2 -20
package/dist/message-flags-boundary.test.js +54 -0
package/dist/message-formatting.js +3 -62
package/dist/onboarding-tutorial-plugin.js +1 -1
package/dist/opencode-command.js +129 -0
package/dist/opencode-command.test.js +48 -0
package/dist/opencode-interrupt-plugin.js +19 -1
package/dist/opencode-interrupt-plugin.test.js +0 -5
package/dist/opencode-plugin-loading.e2e.test.js +9 -20
package/dist/opencode-plugin.js +4 -4
package/dist/opencode.js +150 -27
package/dist/patch-text-parser.js +97 -0
package/dist/platform/components-v2.js +20 -0
package/dist/platform/discord-adapter.js +1440 -0
package/dist/platform/discord-routes.js +31 -0
package/dist/platform/message-flags.js +8 -0
package/dist/platform/platform-value.js +41 -0
package/dist/platform/slack-adapter.js +872 -0
package/dist/platform/slack-markdown.js +169 -0
package/dist/platform/types.js +4 -0
package/dist/queue-advanced-e2e-setup.js +265 -0
package/dist/queue-advanced-footer.e2e.test.js +173 -0
package/dist/queue-advanced-model-switch.e2e.test.js +299 -0
package/dist/queue-advanced-permissions-typing.e2e.test.js +73 -1
package/dist/runtime-lifecycle.e2e.test.js +2 -20
package/dist/session-handler/event-stream-state.js +5 -0
package/dist/session-handler/event-stream-state.test.js +6 -2
package/dist/session-handler/thread-session-runtime.js +32 -2
package/dist/system-message.js +26 -23
package/dist/test-utils.js +16 -0
package/dist/thread-message-queue.e2e.test.js +2 -20
package/dist/utils.js +3 -1
package/dist/voice-message.e2e.test.js +2 -20
package/dist/voice.js +122 -9
package/dist/voice.test.js +17 -2
package/dist/websockify.js +69 -0
package/dist/worktree-lifecycle.e2e.test.js +308 -0
package/package.json +4 -2
package/skills/critique/SKILL.md +17 -0
package/skills/egaki/SKILL.md +35 -0
package/skills/event-sourcing-state/SKILL.md +252 -0
package/skills/goke/SKILL.md +1 -0
package/skills/npm-package/SKILL.md +21 -2
package/skills/playwriter/SKILL.md +1 -1
package/skills/x-articles/SKILL.md +554 -0
package/src/agent-model.e2e.test.ts +4 -19
package/src/cli.ts +60 -13
package/src/commands/diff.ts +25 -99
package/src/commands/merge-worktree.ts +5 -21
package/src/commands/new-worktree.ts +5 -11
package/src/commands/permissions.ts +100 -15
package/src/commands/resume.ts +5 -12
package/src/commands/screenshare.ts +354 -0
package/src/commands/session.ts +6 -23
package/src/critique-utils.ts +139 -0
package/src/discord-bot.ts +20 -15
package/src/discord-utils.ts +53 -0
package/src/event-stream-real-capture.e2e.test.ts +4 -20
package/src/gateway-proxy.e2e.test.ts +2 -5
package/src/interaction-handler.ts +15 -0
package/src/kimaki-digital-twin.e2e.test.ts +2 -21
package/src/message-formatting.ts +3 -68
package/src/onboarding-tutorial-plugin.ts +1 -1
package/src/opencode-command.test.ts +70 -0
package/src/opencode-command.ts +188 -0
package/src/opencode-interrupt-plugin.test.ts +0 -5
package/src/opencode-interrupt-plugin.ts +34 -1
package/src/opencode-plugin-loading.e2e.test.ts +25 -35
package/src/opencode-plugin.ts +5 -4
package/src/opencode.ts +199 -32
package/src/patch-text-parser.ts +107 -0
package/src/queue-advanced-e2e-setup.ts +273 -0
package/src/queue-advanced-footer.e2e.test.ts +211 -0
package/src/queue-advanced-model-switch.e2e.test.ts +383 -0
package/src/queue-advanced-permissions-typing.e2e.test.ts +92 -0
package/src/runtime-lifecycle.e2e.test.ts +4 -19
package/src/session-handler/event-stream-state.test.ts +6 -2
package/src/session-handler/event-stream-state.ts +5 -0
package/src/session-handler/thread-session-runtime.ts +45 -2
package/src/system-message.ts +26 -23
package/src/test-utils.ts +17 -0
package/src/thread-message-queue.e2e.test.ts +2 -20
package/src/utils.ts +3 -1
package/src/voice-message.e2e.test.ts +3 -20
package/src/voice.test.ts +26 -2
package/src/voice.ts +147 -9
package/src/websockify.ts +101 -0
package/src/worktree-lifecycle.e2e.test.ts +391 -0

package/src/system-message.ts CHANGED Viewed

@@ -67,7 +67,7 @@ this feature by restarting kimaki with the \`--no-critique\` flag.
 ### reviewing diffs with AI
-\`critique review --web\` generates an AI-powered review of a diff and uploads it as a shareable URL.
+\`bunx critique review --web\` generates an AI-powered review of a diff and uploads it as a shareable URL.
 It spawns a separate opencode session that analyzes the diff, groups related changes, and produces
 a structured review with explanations, diagrams, and suggestions. This is useful when the user
 asks you to explain or review a diff — the output is much richer than a plain diff URL.
@@ -109,7 +109,7 @@ The command prints a preview URL when done — share that URL with the user.
 const KIMAKI_TUNNEL_INSTRUCTIONS = `
 ## running dev servers with tunnel access
-ALWAYS use \`kimaki tunnel\` when starting any dev server. NEVER run \`pnpm dev\`, \`npm run dev\`, or any dev server command without wrapping it in \`kimaki tunnel\`. The user is on Discord, not at the terminal — localhost URLs are useless to them. They need a tunnel URL to access the site.
+ALWAYS use \`kimaki tunnel\` when starting any dev server. NEVER run \`pnpm dev\`, \`npm run dev\`, or any dev server command without wrapping it in \`kimaki tunnel\`. Always invoke Kimaki directly as \`kimaki\`, never via \`npx\` or \`bunx\`. The user is on Discord, not at the terminal — localhost URLs are useless to them. They need a tunnel URL to access the site.
 Use \`tmux\` to run the tunnel + dev server combo in the background so it persists across commands.
@@ -134,7 +134,7 @@ Use random tunnel IDs by default. Only pass \`-t\` when exposing a service that
 tmux new-session -d -s myapp-dev
 # Run the dev server with kimaki tunnel inside the session
-tmux send-keys -t myapp-dev "npx kimaki tunnel -p 3000 -- pnpm dev" Enter
+tmux send-keys -t myapp-dev "kimaki tunnel -p 3000 -- pnpm dev" Enter
 \`\`\`
 ### getting the tunnel URL
@@ -149,15 +149,15 @@ tmux capture-pane -t myapp-dev -p | grep -i "tunnel"
 \`\`\`bash
 # Next.js project
 tmux new-session -d -s projectname-nextjs-dev-3000
-tmux send-keys -t nextjs-dev "npx kimaki tunnel -p 3000 -- pnpm dev" Enter
+tmux send-keys -t nextjs-dev "kimaki tunnel -p 3000 -- pnpm dev" Enter
 # Vite project on port 5173
 tmux new-session -d -s vite-dev-5173
-tmux send-keys -t vite-dev "npx kimaki tunnel -p 5173 -- pnpm dev" Enter
+tmux send-keys -t vite-dev "kimaki tunnel -p 5173 -- pnpm dev" Enter
 # Custom tunnel ID (only for intentionally public-safe services)
 tmux new-session -d -s holocron-dev
-tmux send-keys -t holocron-dev "npx kimaki tunnel -p 3000 -t holocron -- pnpm dev" Enter
+tmux send-keys -t holocron-dev "kimaki tunnel -p 3000 -t holocron -- pnpm dev" Enter
 \`\`\`
 ### stopping the dev server
@@ -285,7 +285,7 @@ If there are internal kimaki issues (sessions not responding, bot errors, unexpe
 To upload files to the Discord thread (images, screenshots, long files that would clutter the chat), run:
-npx -y kimaki upload-to-discord --session ${sessionId} <file1> [file2] ...
+kimaki upload-to-discord --session ${sessionId} <file1> [file2] ...
 ## requesting files from the user
@@ -295,7 +295,7 @@ To ask the user to upload files from their device, use the \`kimaki_file_upload\
 To archive the current Discord thread (hide it from sidebar) and stop the session, run:
-npx -y kimaki session archive --session ${sessionId}
+kimaki session archive --session ${sessionId}
 Only do this when the user explicitly asks to close or archive the thread, and only after your final message.
@@ -303,7 +303,7 @@ Only do this when the user explicitly asks to close or archive the thread, and o
 To search for Discord users in a guild (needed for mentions like <@userId>), run:
-npx -y kimaki user list --guild ${guildId || '<guildId>'} --query "username"
+kimaki user list --guild ${guildId || '<guildId>'} --query "username"
 This returns user IDs you can use for Discord mentions.
 ${
@@ -313,7 +313,7 @@ ${
 To start a new thread/session in this channel pro-grammatically, run:
-npx -y kimaki send --channel ${channelId} --prompt "your prompt here"${agentFlag}${username ? ` --user "${username}"` : ''}
+kimaki send --channel ${channelId} --prompt "your prompt here"${agentFlag}${username ? ` --user "${username}"` : ''}
 You can use this to "spawn" parallel helper sessions like teammates: start new threads with focused prompts, then come back and collect the results.
@@ -321,23 +321,23 @@ IMPORTANT: NEVER use \`--worktree\` unless the user explicitly asks for a worktr
 To send a prompt to an existing thread instead of creating a new one:
-npx -y kimaki send --thread <thread_id> --prompt "follow-up prompt"
+kimaki send --thread <thread_id> --prompt "follow-up prompt"
 Use this when you already have the Discord thread ID.
 To send to the thread associated with a known session:
-npx -y kimaki send --session <session_id> --prompt "follow-up prompt"
+kimaki send --session <session_id> --prompt "follow-up prompt"
 Use this when you have the OpenCode session ID.
 Use --notify-only to create a notification thread without starting an AI session:
-npx -y kimaki send --channel ${channelId} --prompt "User cancelled subscription" --notify-only
+kimaki send --channel ${channelId} --prompt "User cancelled subscription" --notify-only
 Use --worktree to create a git worktree for the session (ONLY when the user explicitly asks for a worktree):
-npx -y kimaki send --channel ${channelId} --prompt "Add dark mode support" --worktree dark-mode${agentFlag}${username ? ` --user "${username}"` : ''}
+kimaki send --channel ${channelId} --prompt "Add dark mode support" --worktree dark-mode${agentFlag}${username ? ` --user "${username}"` : ''}
 Important:
 - NEVER use \`--worktree\` unless the user explicitly requests a worktree. Most tasks should use normal threads without worktrees.
@@ -347,18 +347,21 @@ Important:
 Use --agent to specify which agent to use for the session:
-npx -y kimaki send --channel ${channelId} --prompt "Plan the refactor of the auth module" --agent plan${username ? ` --user "${username}"` : ''}
+kimaki send --channel ${channelId} --prompt "Plan the refactor of the auth module" --agent plan${username ? ` --user "${username}"` : ''}
 ${agents && agents.length > 0 ? `
 Available agents:
 ${agents.map((a) => { return `- \`${a.name}\`${a.name === currentAgent ? ' (current)' : ''}${a.description ? `: ${a.description}` : ''}` }).join('\n')}
 ` : ''}
+## switching agents in the current session
+The user can switch the active agent mid-session using the Discord slash command \`/<agentname>-agent\`. For example if you are in plan mode and the user asks you to edit files, tell them to run \`/build-agent\` to switch to the build agent first.
 ## scheduled sends and task management
 Use \`--send-at\` to schedule a one-time or recurring task:
-npx -y kimaki send --channel ${channelId} --prompt "Reminder: review open PRs" --send-at "2026-03-01T09:00:00Z"
-npx -y kimaki send --channel ${channelId} --prompt "Run weekly test suite and summarize failures" --send-at "0 9 * * 1"
+kimaki send --channel ${channelId} --prompt "Reminder: review open PRs" --send-at "2026-03-01T09:00:00Z"
+kimaki send --channel ${channelId} --prompt "Run weekly test suite and summarize failures" --send-at "0 9 * * 1"
 When using a date for \`--send-at\`, it must be UTC in ISO format ending with \`Z\`.
@@ -390,13 +393,13 @@ kimaki task delete <id>
 Use case patterns:
 - Reminder flows: create deadline reminders in this channel with one-time \`--send-at\`; mention only if action is required.
-- Proactive reminders: when you encounter time-sensitive information during your work (e.g. creating an API key that expires in 90 days, a certificate with an expiration date, a trial period ending, a deadline mentioned in code comments), proactively schedule a \`--notify-only\` reminder before the expiration so the user gets notified in time. For example, if you generate an API key expiring on 2026-06-01, schedule a reminder a few days before: \`npx -y kimaki send --channel ${channelId} --prompt "Reminder: <@${userId || 'USER_ID'}> the API key created on 2026-03-01 expires on 2026-06-01. Renew it before it breaks production." --send-at "2026-05-28T09:00:00Z" --notify-only\`. Always tell the user you scheduled the reminder so they know.
+- Proactive reminders: when you encounter time-sensitive information during your work (e.g. creating an API key that expires in 90 days, a certificate with an expiration date, a trial period ending, a deadline mentioned in code comments), proactively schedule a \`--notify-only\` reminder before the expiration so the user gets notified in time. For example, if you generate an API key expiring on 2026-06-01, schedule a reminder a few days before: \`kimaki send --channel ${channelId} --prompt "Reminder: <@${userId || 'USER_ID'}> the API key created on 2026-03-01 expires on 2026-06-01. Renew it before it breaks production." --send-at "2026-05-28T09:00:00Z" --notify-only\`. Always tell the user you scheduled the reminder so they know.
 - Weekly QA: schedule "run full test suite, inspect failures, post summary, and mention ${username ? `@${username}` : '@username'} only when failures require review".
 - Weekly benchmark automation: schedule a benchmark prompt that runs model evals, writes JSON outputs in the repo, commits results, and mentions only for regressions.
 - Recurring maintenance: use cron \`--send-at\` for repetitive tasks like rotating secrets, checking dependency updates, running security audits, or cleaning up stale branches. Example: \`--send-at "0 9 1 * *"\` to run on the 1st of every month.
 - Thread reminders: when the user says "remind me about this in 2 hours" (or any duration), use \`--send-at\` with \`--thread\` to resurface the current thread. Compute the future UTC time and send a mention so Discord shows a notification:
-npx -y kimaki send --session ${sessionId} --prompt "Reminder: <@${userId || 'USER_ID'}> you asked to be reminded about this thread." --send-at "<future_UTC_time>" --notify-only
+kimaki send --session ${sessionId} --prompt "Reminder: <@${userId || 'USER_ID'}> you asked to be reminded about this thread." --send-at "<future_UTC_time>" --notify-only
 Replace \`<future_UTC_time>\` with the computed UTC ISO timestamp. The \`--notify-only\` flag creates just a notification message without starting a new AI session. The \`<@userId>\` mention ensures the user gets a Discord notification.
@@ -411,7 +414,7 @@ ONLY create worktrees when the user explicitly asks for one. Never proactively u
 When the user asks to "create a worktree" or "make a worktree", they mean you should use the kimaki CLI to create it. Do NOT use raw \`git worktree add\` commands. Instead use:
 \`\`\`bash
-npx -y kimaki send --channel ${channelId} --prompt "your task description" --worktree worktree-name${agentFlag}${username ? ` --user "${username}"` : ''}
+kimaki send --channel ${channelId} --prompt "your task description" --worktree worktree-name${agentFlag}${username ? ` --user "${username}"` : ''}
 \`\`\`
 This creates a new Discord thread with an isolated git worktree and starts a session in it. The worktree name should be kebab-case and descriptive of the task.
@@ -429,7 +432,7 @@ This is useful for automation (cron jobs, GitHub webhooks, n8n, etc.)
 When you are approaching the **context window limit** or the user explicitly asks to **handoff to a new thread**, use the \`kimaki send\` command to start a fresh session with context:
 \`\`\`bash
-npx -y kimaki send --channel ${channelId} --prompt "Continuing from previous session: <summary of current task and state>"${agentFlag}${username ? ` --user "${username}"` : ''}
+kimaki send --channel ${channelId} --prompt "Continuing from previous session: <summary of current task and state>"${agentFlag}${username ? ` --user "${username}"` : ''}
 \`\`\`
 The command automatically handles long prompts (over 2000 chars) by sending them as file attachments.
@@ -513,10 +516,10 @@ If your Bash tool timeout triggers anyway, fall back to reading the session outp
 \`\`\`bash
 # Start a session and wait for it to finish
-npx -y kimaki send --channel <channel_id> --prompt "Fix the auth bug" --wait
+kimaki send --channel <channel_id> --prompt "Fix the auth bug" --wait
 # Send to an existing thread and wait
-npx -y kimaki send --thread <thread_id> --prompt "Run the tests" --wait
+kimaki send --thread <thread_id> --prompt "Run the tests" --wait
 \`\`\`
 The command exits with the session markdown on stdout once the model finishes responding.

package/src/test-utils.ts CHANGED Viewed

@@ -8,6 +8,23 @@
 // a new server only if no existing client is available.
 import type { APIMessage } from 'discord.js'
+/**
+ * Deterministic port from a string key (channel ID, test file name, etc.).
+ * Uses a hash to pick a stable port in range 53000-54999, avoiding overlap
+ * with queue-advanced tests (51000-52999) and getLockPort (30000-39999).
+ * Replaces the old TOCTOU-prone pattern of binding port 0, reading the
+ * assigned port, closing, then rebinding — which races under parallel vitest.
+ */
+export function chooseLockPort({ key }: { key: string }): number {
+  let hash = 0
+  for (let i = 0; i < key.length; i++) {
+    const char = key.charCodeAt(i)
+    hash = (hash << 5) - hash + char
+    hash |= 0
+  }
+  return 53_000 + (Math.abs(hash) % 2_000)
+}
 import type { DigitalDiscord } from 'discord-digital-twin/src'
 import {
   getOpencodeClient,

package/src/thread-message-queue.e2e.test.ts CHANGED Viewed

@@ -10,7 +10,6 @@
 // so vitest can parallelize across files.
 import fs from 'node:fs'
-import net from 'node:net'
 import path from 'node:path'
 import url from 'node:url'
 import { describe, beforeAll, afterAll, test, expect } from 'vitest'
@@ -37,6 +36,7 @@ import {
 import { startHranaServer, stopHranaServer } from './hrana-server.js'
 import { initializeOpencodeForDirectory, stopOpencodeServer } from './opencode.js'
 import {
+  chooseLockPort,
   cleanupTestSessions,
   waitForFooterMessage,
   waitForBotMessageContaining,
@@ -60,24 +60,6 @@ function createRunDirectories() {
   return { root, dataDir, projectDirectory }
 }
-function chooseLockPort(): Promise<number> {
-  return new Promise((resolve, reject) => {
-    const server = net.createServer()
-    server.listen(0, () => {
-      const address = server.address()
-      if (!address || typeof address === 'string') {
-        server.close()
-        reject(new Error('Failed to resolve lock port'))
-        return
-      }
-      const port = address.port
-      server.close(() => {
-        resolve(port)
-      })
-    })
-  })
-}
 function createDiscordJsClient({ restUrl }: { restUrl: string }) {
   return new Client({
     intents: [
@@ -272,7 +254,7 @@ e2eTest('thread message queue ordering', () => {
   beforeAll(async () => {
     testStartTime = Date.now()
     directories = createRunDirectories()
-    const lockPort = await chooseLockPort()
+    const lockPort = chooseLockPort({ key: TEXT_CHANNEL_ID })
     process.env['KIMAKI_LOCK_PORT'] = String(lockPort)
     setDataDir(directories.dataDir)

package/src/utils.ts CHANGED Viewed

@@ -107,11 +107,13 @@ export function generateDiscordInstallUrlForBot({
   // This initiates the better-auth OAuth flow with clientId/clientSecret
   // as additionalData, which better-auth stores in its verification table
   // and recovers after Discord redirects back to the callback.
+  // Use a kimaki-specific callback field name to avoid ambiguity with
+  // better-auth's own callbackURL state field.
   const url = new URL(`${KIMAKI_WEBSITE_URL}/discord-install`)
   url.searchParams.set('clientId', clientId)
   url.searchParams.set('clientSecret', clientSecret)
   if (gatewayCallbackUrl) {
-    url.searchParams.set('callbackUrl', gatewayCallbackUrl)
+    url.searchParams.set('kimakiCallbackUrl', gatewayCallbackUrl)
   }
   return url.toString()
 }

package/src/voice-message.e2e.test.ts CHANGED Viewed

@@ -9,7 +9,7 @@
 // transitions (via getThreadState from the zustand store).
 import fs from 'node:fs'
-import net from 'node:net'
 import path from 'node:path'
 import url from 'node:url'
 import { describe, beforeAll, afterAll, beforeEach, test, expect } from 'vitest'
@@ -33,6 +33,7 @@ import { startHranaServer, stopHranaServer } from './hrana-server.js'
 import { initializeOpencodeForDirectory, getOpencodeClient, stopOpencodeServer } from './opencode.js'
 import type { Part, Message } from '@opencode-ai/sdk/v2'
 import {
+  chooseLockPort,
   cleanupTestSessions,
   waitForFooterMessage,
   waitForBotMessageContaining,
@@ -56,24 +57,6 @@ function createRunDirectories() {
   return { root, dataDir, projectDirectory }
 }
-function chooseLockPort(): Promise<number> {
-  return new Promise((resolve, reject) => {
-    const server = net.createServer()
-    server.listen(0, () => {
-      const address = server.address()
-      if (!address || typeof address === 'string') {
-        server.close()
-        reject(new Error('Failed to resolve lock port'))
-        return
-      }
-      const port = address.port
-      server.close(() => {
-        resolve(port)
-      })
-    })
-  })
-}
 function createDiscordJsClient({ restUrl }: { restUrl: string }) {
   return new Client({
     intents: [
@@ -306,7 +289,7 @@ e2eTest('voice message handling', () => {
   beforeAll(async () => {
     testStartTime = Date.now()
     directories = createRunDirectories()
-    const lockPort = await chooseLockPort()
+    const lockPort = chooseLockPort({ key: TEXT_CHANNEL_ID })
     process.env['KIMAKI_LOCK_PORT'] = String(lockPort)
     setDataDir(directories.dataDir)

package/src/voice.test.ts CHANGED Viewed

@@ -4,8 +4,32 @@
 import { describe, test, expect } from 'vitest'
 import fs from 'node:fs'
 import path from 'node:path'
-import { transcribeAudio, convertOggToWav } from './voice.js'
-import { extractTranscription } from './voice.js'
+import {
+  transcribeAudio,
+  convertOggToWav,
+  extractTranscription,
+  normalizeAudioMediaType,
+  getOpenAIAudioConversionStrategy,
+} from './voice.js'
+describe('audio media type routing', () => {
+  test('normalizes m4a aliases to audio/mp4', () => {
+    expect(normalizeAudioMediaType('audio/x-m4a')).toMatchInlineSnapshot('"audio/mp4"')
+    expect(normalizeAudioMediaType('audio/m4a')).toMatchInlineSnapshot('"audio/mp4"')
+  })
+  test('keeps non-m4a media types unchanged', () => {
+    expect(normalizeAudioMediaType('audio/ogg')).toMatchInlineSnapshot('"audio/ogg"')
+    expect(normalizeAudioMediaType('audio/wav')).toMatchInlineSnapshot('"audio/wav"')
+  })
+  test('converts ogg only when mime is actual ogg/opus', () => {
+    expect(getOpenAIAudioConversionStrategy('audio/ogg')).toMatchInlineSnapshot('"convert-ogg-to-wav"')
+    expect(getOpenAIAudioConversionStrategy('audio/opus')).toMatchInlineSnapshot('"convert-ogg-to-wav"')
+    expect(getOpenAIAudioConversionStrategy('audio/mp4')).toMatchInlineSnapshot('"convert-m4a-to-wav"')
+    expect(getOpenAIAudioConversionStrategy('audio/mpeg')).toMatchInlineSnapshot('"none"')
+  })
+})
 describe('extractTranscription', () => {
   test('extracts transcription from tool call', () => {

package/src/voice.ts CHANGED Viewed

@@ -38,8 +38,49 @@ const OPENAI_SUPPORTED_AUDIO_TYPES = new Set([
   'audio/mpeg',
   'audio/mp3',
   'audio/wav',
+  'audio/x-wav',
 ])
+const OGG_AUDIO_TYPES = new Set([
+  'audio/ogg',
+  'audio/opus',
+])
+const M4A_AUDIO_TYPES = new Set([
+  'audio/mp4',
+  'audio/m4a',
+  'audio/x-m4a',
+])
+export function normalizeAudioMediaType(mediaType: string): string {
+  const normalized = mediaType.trim().toLowerCase()
+  if (normalized === 'audio/x-m4a' || normalized === 'audio/m4a') {
+    return 'audio/mp4'
+  }
+  return normalized
+}
+type OpenAIAudioConversionStrategy =
+  | 'none'
+  | 'convert-ogg-to-wav'
+  | 'convert-m4a-to-wav'
+  | 'unsupported'
+export function getOpenAIAudioConversionStrategy(
+  mediaType: string,
+): OpenAIAudioConversionStrategy {
+  if (OPENAI_SUPPORTED_AUDIO_TYPES.has(mediaType)) {
+    return 'none'
+  }
+  if (OGG_AUDIO_TYPES.has(mediaType)) {
+    return 'convert-ogg-to-wav'
+  }
+  if (M4A_AUDIO_TYPES.has(mediaType)) {
+    return 'convert-m4a-to-wav'
+  }
+  return 'unsupported'
+}
 /**
  * Convert OGG Opus audio to WAV using prism-media (already installed for Discord voice).
  * Pipeline: OGG buffer → OggDemuxer → Opus Decoder → PCM → WAV (with header).
@@ -93,6 +134,89 @@ export function convertOggToWav(input: Buffer): Promise<TranscriptionError | Buf
   })
 }
+/**
+ * Convert M4A/MP4 audio to WAV using prism-media FFmpeg wrapper.
+ * This depends on an ffmpeg binary available in PATH.
+ */
+export function convertM4aToWav(input: Buffer): Promise<TranscriptionError | Buffer> {
+  return new Promise((resolve) => {
+    const pcmChunks: Buffer[] = []
+    const transcoder = new prism.FFmpeg({
+      args: [
+        '-analyzeduration',
+        '0',
+        '-loglevel',
+        '0',
+        '-f',
+        'mp4',
+        '-i',
+        'pipe:0',
+        '-f',
+        's16le',
+        '-acodec',
+        'pcm_s16le',
+        '-ac',
+        '1',
+        '-ar',
+        '48000',
+        'pipe:1',
+      ],
+    })
+    transcoder.on('data', (chunk: Buffer) => {
+      pcmChunks.push(chunk)
+    })
+    transcoder.on('end', () => {
+      const pcmData = Buffer.concat(pcmChunks)
+      if (pcmData.length === 0) {
+        resolve(
+          new TranscriptionError({
+            reason: 'FFmpeg conversion produced empty audio output',
+          }),
+        )
+        return
+      }
+      const wavHeader = createWavHeader({
+        dataLength: pcmData.length,
+        sampleRate: 48000,
+        numChannels: 1,
+        bitsPerSample: 16,
+      })
+      resolve(Buffer.concat([wavHeader, pcmData]))
+    })
+    transcoder.on('error', (err: Error) => {
+      const lower = err.message.toLowerCase()
+      const isMissingFfmpeg =
+        lower.includes('ffmpeg') &&
+        (lower.includes('not found') ||
+          lower.includes('enoent') ||
+          lower.includes('spawn'))
+      if (isMissingFfmpeg) {
+        resolve(
+          new TranscriptionError({
+            reason:
+              'M4A transcription with OpenAI requires ffmpeg to be installed and available in PATH',
+            cause: err,
+          }),
+        )
+        return
+      }
+      resolve(
+        new TranscriptionError({
+          reason: `M4A decode failed: ${err.message}`,
+          cause: err,
+        }),
+      )
+    })
+    Readable.from(input).pipe(transcoder)
+  })
+}
 function createWavHeader({
   dataLength,
   sampleRate,
@@ -359,18 +483,32 @@ export async function transcribeAudio({
     return new InvalidAudioFormatError()
   }
-  let mediaType = mediaTypeParam || 'audio/mpeg'
+  let mediaType = normalizeAudioMediaType(mediaTypeParam || 'audio/mpeg')
   let finalAudioBase64 = audioBuffer.toString('base64')
-  // OpenAI input_audio only supports mp3/wav. Convert OGG Opus (Discord voice) to WAV.
-  if (resolvedProvider === 'openai' && !OPENAI_SUPPORTED_AUDIO_TYPES.has(mediaType)) {
-    voiceLogger.log(`Converting ${mediaType} to WAV for OpenAI compatibility`)
-    const converted = await convertOggToWav(audioBuffer)
-    if (converted instanceof Error) {
-      return converted
+  // OpenAI input_audio supports only a subset of audio formats.
+  // Convert based on MIME so OGG conversion runs only for real OGG/Opus inputs.
+  if (resolvedProvider === 'openai') {
+    const conversionStrategy = getOpenAIAudioConversionStrategy(mediaType)
+    if (conversionStrategy === 'convert-ogg-to-wav') {
+      voiceLogger.log(`Converting ${mediaType} to WAV for OpenAI compatibility`)
+      const converted = await convertOggToWav(audioBuffer)
+      if (converted instanceof Error) {
+        return converted
+      }
+      finalAudioBase64 = converted.toString('base64')
+      mediaType = 'audio/wav'
+    } else if (conversionStrategy === 'convert-m4a-to-wav') {
+      voiceLogger.log(`Converting ${mediaType} to WAV for OpenAI compatibility`)
+      const converted = await convertM4aToWav(audioBuffer)
+      if (converted instanceof Error) {
+        return converted
+      }
+      finalAudioBase64 = converted.toString('base64')
+      mediaType = 'audio/wav'
+    } else if (conversionStrategy === 'unsupported') {
+      return new InvalidAudioFormatError()
     }
-    finalAudioBase64 = converted.toString('base64')
-    mediaType = 'audio/wav'
   }
   const languageHint = language ? `The audio is in ${language}.\n\n` : ''

package/src/websockify.ts ADDED Viewed

@@ -0,0 +1,101 @@
+// In-process WebSocket-to-TCP bridge (websockify replacement).
+// Accepts WebSocket connections and pipes raw bytes to/from a TCP target.
+// Used by /screenshare to bridge noVNC (WebSocket) to a VNC server (TCP).
+// Supports the 'binary' subprotocol required by noVNC.
+import { WebSocketServer, WebSocket } from 'ws'
+import net from 'node:net'
+import { createLogger } from './logger.js'
+const logger = createLogger('SCREEN')
+type WebsockifyOptions = {
+  /** Port for the WebSocket server (0 = auto-assign) */
+  wsPort: number
+  /** TCP target host */
+  tcpHost: string
+  /** TCP target port */
+  tcpPort: number
+}
+type WebsockifyInstance = {
+  wss: WebSocketServer
+  /** Resolved port (useful when wsPort=0) */
+  port: number
+  close: () => void
+}
+export function startWebsockify({
+  wsPort,
+  tcpHost,
+  tcpPort,
+}: WebsockifyOptions): Promise<WebsockifyInstance> {
+  return new Promise((resolve, reject) => {
+    const wss = new WebSocketServer({
+      port: wsPort,
+      // noVNC negotiates the 'binary' subprotocol
+      handleProtocols: (protocols) => {
+        if (protocols.has('binary')) {
+          return 'binary'
+        }
+        return false
+      },
+    })
+    wss.on('listening', () => {
+      const addr = wss.address()
+      const port = typeof addr === 'object' && addr ? addr.port : wsPort
+      logger.log(`Websockify listening on port ${port} → ${tcpHost}:${tcpPort}`)
+      resolve({
+        wss,
+        port,
+        close: () => {
+          for (const client of wss.clients) {
+            client.close()
+          }
+          wss.close()
+        },
+      })
+    })
+    wss.on('error', (err) => {
+      reject(new Error('Websockify failed to start', { cause: err }))
+    })
+    wss.on('connection', (ws) => {
+      const tcp = net.createConnection(tcpPort, tcpHost, () => {
+        logger.log(`TCP connection established to ${tcpHost}:${tcpPort}`)
+      })
+      tcp.on('data', (data) => {
+        if (ws.readyState === WebSocket.OPEN) {
+          ws.send(data)
+        }
+      })
+      ws.on('message', (data: Buffer) => {
+        if (!tcp.destroyed) {
+          tcp.write(data)
+        }
+      })
+      ws.on('close', () => {
+        tcp.destroy()
+      })
+      ws.on('error', (err) => {
+        logger.error('WebSocket error:', err)
+        tcp.destroy()
+      })
+      tcp.on('close', () => {
+        ws.close()
+      })
+      tcp.on('error', (err) => {
+        logger.error('TCP connection error:', err)
+        ws.close()
+      })
+    })
+  })
+}