npm - kimaki - Versions diffs - 0.4.84 → 0.4.86 - Mend

kimaki 0.4.84 → 0.4.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/commands/ask-question.js +22 -8
package/dist/discord-bot.js +29 -9
package/dist/external-opencode-sync.js +11 -11
package/dist/queue-advanced-question.e2e.test.js +108 -34
package/dist/session-handler/thread-session-runtime.js +7 -1
package/package.json +3 -3
package/src/commands/ask-question.ts +23 -8
package/src/discord-bot.ts +30 -9
package/src/external-opencode-sync.ts +11 -13
package/src/queue-advanced-question.e2e.test.ts +129 -35
package/src/session-handler/thread-session-runtime.ts +8 -1

package/dist/commands/ask-question.js CHANGED Viewed

@@ -11,6 +11,11 @@ const logger = createLogger(LogPrefix.ASK_QUESTION);
 // TTL prevents unbounded growth if user never answers a question.
 const QUESTION_CONTEXT_TTL_MS = 10 * 60 * 1000;
 export const pendingQuestionContexts = new Map();
+export function hasPendingQuestionForThread(threadId) {
+    return [...pendingQuestionContexts.values()].some((ctx) => {
+        return ctx.thread.id === threadId;
+    });
+}
 /**
  * Show dropdown menus for question tool input.
  * Sends one message per question with the dropdown directly under the question text.
@@ -205,13 +210,21 @@ export function parseAskUserQuestionTool(part) {
     return input;
 }
 /**
- * Cancel a pending question for a thread (e.g., when user sends a new message).
- * Sends the user's message as the answer to OpenCode so the model sees their actual response.
+ * Cancel a pending question for a thread.
+ *
+ * Two modes depending on whether `userMessage` is provided:
+ *
+ * - `cancelPendingQuestion(threadId)` — cleanup only. Removes the context
+ *   without replying to OpenCode. Use when aborting the blocked session
+ *   separately (e.g. voice/attachment messages whose content needs
+ *   transcription first). Returns 'no-pending' in both "found+cleaned" and
+ *   "nothing found" cases.
  *
- * Returns 'replied' if the question was answered successfully (caller should NOT
- * enqueue the user message as a new prompt — it was consumed as the answer).
- * Returns 'reply-failed' if reply failed (context kept pending so TTL can retry).
- * Returns 'no-pending' if no question was pending for this thread.
+ * - `cancelPendingQuestion(threadId, text)` — reply path. Sends the text as
+ *   the tool answer so the model sees the user's response. The caller should
+ *   NOT also enqueue the message as a new prompt.
+ *   Returns 'replied' on success, 'reply-failed' if the reply call fails
+ *   (context kept pending so TTL can retry).
  */
 export async function cancelPendingQuestion(threadId, userMessage) {
     // Find pending question for this thread
@@ -228,8 +241,9 @@ export async function cancelPendingQuestion(threadId, userMessage) {
         return 'no-pending';
     }
     // undefined means teardown/cleanup — just remove context, don't reply.
-    // The session is already being torn down. Empty string '' is a valid
-    // user message (attachment-only, voice, etc.) and must still go through.
+    // The session is already being torn down or the caller wants to dismiss
+    // the question without providing an answer (e.g. voice/attachment-only
+    // messages where content needs transcription before it can be an answer).
     if (userMessage === undefined) {
         pendingQuestionContexts.delete(contextHash);
         return 'no-pending';

package/dist/discord-bot.js CHANGED Viewed

@@ -12,7 +12,7 @@ import { getTextAttachments, resolveMentions, } from './message-formatting.js';
 import { isVoiceAttachment } from './voice-attachment.js';
 import { preprocessExistingThreadMessage, preprocessNewThreadMessage, } from './message-preprocessing.js';
 import { cancelPendingActionButtons } from './commands/action-buttons.js';
-import { cancelPendingQuestion } from './commands/ask-question.js';
+import { cancelPendingQuestion, hasPendingQuestionForThread } from './commands/ask-question.js';
 import { cancelPendingFileUpload } from './commands/file-upload.js';
 import { cancelPendingPermission } from './commands/permissions.js';
 import { cancelHtmlActionsForThread } from './html-actions.js';
@@ -185,7 +185,11 @@ export async function startDiscordBot({ token, appId, discordClient, useWorktree
         await setupHandlers(discordClient);
     }
     else {
-        discordClient.once(Events.ClientReady, setupHandlers);
+        discordClient.once(Events.ClientReady, (readyClient) => {
+            void setupHandlers(readyClient).catch((error) => {
+                discordLogger.error(`[GATEWAY] ClientReady handler failed: ${formatErrorWithStack(error)}`);
+            });
+        });
     }
     discordClient.on(Events.Error, (error) => {
         discordLogger.error('[GATEWAY] Client error:', formatErrorWithStack(error));
@@ -434,9 +438,6 @@ export async function startDiscordBot({ token, appId, discordClient, useWorktree
                     appId: currentAppId,
                 });
                 // Cancel interactive UI when a real user sends a message.
-                // If a question was pending and answered with the user's text,
-                // early-return: the message was consumed as the question answer
-                // and must NOT also be sent as a new prompt (causes abort loops).
                 if (!message.author.bot && !isCliInjectedPrompt) {
                     cancelPendingActionButtons(thread.id);
                     cancelHtmlActionsForThread(thread.id);
@@ -446,11 +447,30 @@ export async function startDiscordBot({ token, appId, discordClient, useWorktree
                             reason: 'user sent a new message while permission was pending',
                         });
                     }
-                    const questionResult = await cancelPendingQuestion(thread.id, message.content);
-                    void cancelPendingFileUpload(thread.id);
-                    if (questionResult === 'replied') {
-                        return;
+                    // For text messages: pass the content as the question answer so the
+                    // model sees the user's response. The early return prevents the message
+                    // from also being sent as a new prompt (duplicate).
+                    // For voice/image messages: message.content is "" (audio is in
+                    // attachments, transcription happens later). Passing "" as the answer
+                    // loses the content entirely. Instead, reply with "" to properly
+                    // unblock OpenCode's question.waitForReply (without a reply the next
+                    // promptAsync immediately fails with MessageAbortedError), then let
+                    // the voice message flow through normal preprocessing — it gets
+                    // transcribed and queued as the next user message after the model
+                    // finishes responding to the empty answer.
+                    if (message.content.trim().length > 0) {
+                        const questionResult = await cancelPendingQuestion(thread.id, message.content);
+                        if (questionResult === 'replied') {
+                            void cancelPendingFileUpload(thread.id);
+                            return;
+                        }
+                    }
+                    else if (hasPendingQuestionForThread(thread.id)) {
+                        // Reply empty to unblock the question tool — no early return so
+                        // the voice/image message continues through to enqueueIncoming.
+                        await cancelPendingQuestion(thread.id, '');
                     }
+                    void cancelPendingFileUpload(thread.id);
                 }
                 // Expensive pre-processing (voice transcription, context fetch,
                 // attachment download) runs inside the runtime's serialized

package/dist/external-opencode-sync.js CHANGED Viewed

@@ -19,7 +19,7 @@ function isSyntheticTextPart(part) {
     return candidate.synthetic === true;
 }
 function parseDiscordOriginMetadata(text) {
-    const match = text.match(/^<discord-user\s+([^>]+)\s*\/>$/);
+    const match = text.match(/<discord-user\s+([^>]+)\s*\/>/);
     if (!match?.[1]) {
         return null;
     }
@@ -42,17 +42,17 @@ function parseDiscordOriginMetadata(text) {
     };
 }
 function getDiscordOriginMetadataFromMessage({ message, }) {
-    const syntheticTexts = message.parts.flatMap((part) => {
-        if (part.type !== 'text') {
-            return [];
-        }
-        if (!isSyntheticTextPart(part)) {
-            return [];
-        }
-        return [part.text || ''];
+    const textParts = message.parts.filter((p) => {
+        return p.type === 'text';
     });
-    for (const text of syntheticTexts) {
-        const metadata = parseDiscordOriginMetadata(text);
+    // Synthetic parts first (normal promptAsync path), then non-synthetic
+    // (session.command() path where the tag is embedded in arguments text).
+    const sorted = [
+        ...textParts.filter((p) => { return isSyntheticTextPart(p); }),
+        ...textParts.filter((p) => { return !isSyntheticTextPart(p); }),
+    ];
+    for (const part of sorted) {
+        const metadata = parseDiscordOriginMetadata(part.text || '');
         if (metadata) {
             return metadata;
         }

package/dist/queue-advanced-question.e2e.test.js CHANGED Viewed

@@ -1,11 +1,14 @@
 // E2e test for question tool: user text message during pending question should
-// be consumed as the answer and NOT also sent as a duplicate promptAsync.
-// Reproduces the bug from commit a4dfb01 where the same message was sent twice.
-import { describe, test, expect } from 'vitest';
+// dismiss the question (abort), then enqueue as a normal user prompt.
+// The user's message must appear as a real user message in the thread, not
+// get consumed as a tool result answer (which lost voice/image content).
+import { describe, test, expect, afterEach } from 'vitest';
 import { setupQueueAdvancedSuite, TEST_USER_ID, } from './queue-advanced-e2e-setup.js';
 import { waitForBotMessageContaining, waitForFooterMessage, } from './test-utils.js';
 import { pendingQuestionContexts } from './commands/ask-question.js';
+import { store } from './store.js';
 const TEXT_CHANNEL_ID = '200000000000001007';
+const VOICE_CHANNEL_ID = '200000000000001017';
 async function waitForPendingQuestion({ threadId, timeoutMs, }) {
     const start = Date.now();
     while (Date.now() - start < timeoutMs) {
@@ -36,14 +39,22 @@ async function waitForNoPendingQuestion({ threadId, timeoutMs, }) {
     }
     throw new Error('Timed out waiting for question context cleanup');
 }
-describe('queue advanced: question tool text answer', () => {
+function setDeterministicTranscription(config) {
+    store.setState({
+        test: { deterministicTranscription: config },
+    });
+}
+describe('queue advanced: question tool answer', () => {
     const ctx = setupQueueAdvancedSuite({
         channelId: TEXT_CHANNEL_ID,
         channelName: 'qa-question-e2e',
         dirName: 'qa-question-e2e',
         username: 'queue-question-tester',
     });
-    test('user text message answers pending question without sending duplicate prompt', async () => {
+    afterEach(() => {
+        setDeterministicTranscription(null);
+    });
+    test('user text message dismisses pending question and enqueues as normal prompt', async () => {
         await ctx.discord.channel(TEXT_CHANNEL_ID).user(TEST_USER_ID).sendMessage({
             content: 'QUESTION_TEXT_ANSWER_MARKER',
         });
@@ -69,32 +80,17 @@ describe('queue advanced: question tool text answer', () => {
         });
         // User sends a text message while question is pending.
         // This should:
-        // 1. Answer the question via cancelPendingQuestion (consumed as answer)
-        // 2. NOT also send as a new promptAsync (the fix)
-        // 3. Clean up the pending question context
+        // 1. Dismiss the pending question (cleanup context)
+        // 2. Abort the blocked session so OpenCode unblocks
+        // 3. Enqueue the message as a normal user prompt (not consumed as answer)
         await th.user(TEST_USER_ID).sendMessage({
             content: 'my text answer',
         });
-        // Pending question context should be cleaned up after answer
+        // Pending question context should be cleaned up
         await waitForNoPendingQuestion({
             threadId: thread.id,
             timeoutMs: 4_000,
         });
-        // Wait for second question dropdown (from question-answer followup —
-        // OpenCode calls LLM again with same prompt after question tool completes,
-        // deterministic matcher fires question tool again). This is expected.
-        // Poll for it instead of sleeping.
-        const start = Date.now();
-        while (Date.now() - start < 4_000) {
-            const msgs = await th.getMessages();
-            const questionMsgs = msgs.filter((m) => {
-                return m.content.includes('Which option do you prefer?');
-            });
-            if (questionMsgs.length >= 2) {
-                break;
-            }
-            await new Promise((r) => { setTimeout(r, 50); });
-        }
         const timeline = await th.text({ showInteractions: true });
         expect(timeline).toMatchInlineSnapshot(`
         "--- from: user (queue-question-tester)
@@ -103,18 +99,96 @@ describe('queue advanced: question tool text answer', () => {
         **Pick one**
         Which option do you prefer?
         --- from: user (queue-question-tester)
-        my text answer
-        --- from: assistant (TestBot)
-        **Pick one**
-        Which option do you prefer?"
+        my text answer"
       `);
-        // The user's "my text answer" message must appear in the thread
+        // The user's message must appear in Discord
         expect(timeline).toContain('my text answer');
-        // Key regression assertion: without the fix, the user's text message
-        // is ALSO sent as a duplicate promptAsync which triggers a THIRD question
-        // dropdown. With the fix, only 2 dropdowns appear (initial + followup
-        // from question answer). Count occurrences of "Which option do you prefer?"
+        // Only 1 question dropdown — text message was consumed as the answer,
+        // no duplicate prompt was sent (which would trigger a second dropdown).
         const questionCount = (timeline.match(/Which option do you prefer\?/g) || []).length;
-        expect(questionCount).toBe(2);
+        expect(questionCount).toBe(1);
+    }, 20_000);
+});
+describe('queue advanced: voice message during pending question', () => {
+    const ctx = setupQueueAdvancedSuite({
+        channelId: VOICE_CHANNEL_ID,
+        channelName: 'qa-question-voice-e2e',
+        dirName: 'qa-question-voice-e2e',
+        username: 'queue-question-tester',
+    });
+    afterEach(() => {
+        setDeterministicTranscription(null);
+    });
+    test('voice message during pending question dismisses question and transcribes normally', async () => {
+        // This is the exact bug scenario: user sends a voice message while a
+        // question dropdown is pending. Voice messages have empty message.content
+        // (audio is in attachments, transcription happens later). The old code
+        // passed "" as the question answer and consumed the message — the voice
+        // content was completely lost.
+        await ctx.discord.channel(VOICE_CHANNEL_ID).user(TEST_USER_ID).sendMessage({
+            content: 'QUESTION_TEXT_ANSWER_MARKER',
+        });
+        const thread = await ctx.discord.channel(VOICE_CHANNEL_ID).waitForThread({
+            timeout: 4_000,
+            predicate: (t) => {
+                return t.name === 'QUESTION_TEXT_ANSWER_MARKER';
+            },
+        });
+        const th = ctx.discord.thread(thread.id);
+        // Wait for the question dropdown to appear
+        await waitForPendingQuestion({
+            threadId: thread.id,
+            timeoutMs: 4_000,
+        });
+        await waitForBotMessageContaining({
+            discord: ctx.discord,
+            threadId: thread.id,
+            text: 'Which option do you prefer?',
+            timeout: 4_000,
+        });
+        // Send a voice message while the question is pending.
+        // message.content is "" for voice messages — only the attachment exists.
+        setDeterministicTranscription({
+            transcription: 'I want option Alpha please',
+            queueMessage: false,
+        });
+        await th.user(TEST_USER_ID).sendVoiceMessage();
+        // Question context should be cleaned up (empty reply sent to unblock OpenCode)
+        await waitForNoPendingQuestion({
+            threadId: thread.id,
+            timeoutMs: 4_000,
+        });
+        // Voice content should be transcribed and appear as the next user message,
+        // processed after the model responds to the empty question answer.
+        await waitForBotMessageContaining({
+            discord: ctx.discord,
+            threadId: thread.id,
+            text: 'I want option Alpha please',
+            timeout: 4_000,
+        });
+        await waitForFooterMessage({
+            discord: ctx.discord,
+            threadId: thread.id,
+            timeout: 4_000,
+            afterMessageIncludes: 'I want option Alpha please',
+            afterAuthorId: ctx.discord.botUserId,
+        });
+        const timeline = await th.text({ showInteractions: true });
+        expect(timeline).toMatchInlineSnapshot(`
+        "--- from: user (queue-question-tester)
+        QUESTION_TEXT_ANSWER_MARKER
+        --- from: assistant (TestBot)
+        **Pick one**
+        Which option do you prefer?
+        --- from: user (queue-question-tester)
+        [attachment: voice-message.ogg]
+        --- from: assistant (TestBot)
+        🎤 Transcribing voice message...
+        📝 **Transcribed message:** I want option Alpha please
+        ⬥ ok
+        *project ⋅ main ⋅ Ns ⋅ N% ⋅ deterministic-v2*"
+      `);
+        // Voice content must be present as a real transcribed message, not lost
+        expect(timeline).toContain('I want option Alpha please');
     }, 20_000);
 });

package/dist/session-handler/thread-session-runtime.js CHANGED Viewed

@@ -2667,12 +2667,18 @@ export class ThreadSessionRuntime {
         if (input.command) {
             const queuedCommand = input.command;
             const commandSignal = AbortSignal.timeout(30_000);
+            // session.command() only accepts FilePart in parts, not text parts.
+            // Append <discord-user /> tag to arguments so external sync can
+            // detect this message came from Discord (same tag as promptAsync).
+            const discordTag = input.username
+                ? `\n<discord-user name="${input.username}" />`
+                : '';
             const commandResponse = await errore.tryAsync(() => {
                 return getClient().session.command({
                     sessionID: session.id,
                     directory: this.sdkDirectory,
                     command: queuedCommand.name,
-                    arguments: queuedCommand.arguments,
+                    arguments: queuedCommand.arguments + discordTag,
                     agent: earlyAgentPreference,
                     ...variantField,
                 }, { signal: commandSignal });

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "kimaki",
   "module": "index.ts",
   "type": "module",
-  "version": "0.4.84",
+  "version": "0.4.86",
   "repository": "https://github.com/remorses/kimaki",
   "bin": "bin.js",
   "files": [
@@ -67,8 +67,8 @@
     "zod": "^4.3.6",
     "zustand": "^5.0.11",
     "errore": "^0.14.1",
-    "libsqlproxy": "^0.1.0",
-    "traforo": "^0.2.0"
+    "traforo": "^0.2.0",
+    "libsqlproxy": "^0.1.0"
   },
   "optionalDependencies": {
     "@discordjs/opus": "^0.10.0",

package/src/commands/ask-question.ts CHANGED Viewed

@@ -49,6 +49,12 @@ type PendingQuestionContext = {
 const QUESTION_CONTEXT_TTL_MS = 10 * 60 * 1000
 export const pendingQuestionContexts = new Map<string, PendingQuestionContext>()
+export function hasPendingQuestionForThread(threadId: string): boolean {
+  return [...pendingQuestionContexts.values()].some((ctx) => {
+    return ctx.thread.id === threadId
+  })
+}
 /**
  * Show dropdown menus for question tool input.
  * Sends one message per question with the dropdown directly under the question text.
@@ -311,13 +317,21 @@ export function parseAskUserQuestionTool(part: {
 }
 /**
- * Cancel a pending question for a thread (e.g., when user sends a new message).
- * Sends the user's message as the answer to OpenCode so the model sees their actual response.
+ * Cancel a pending question for a thread.
+ *
+ * Two modes depending on whether `userMessage` is provided:
+ *
+ * - `cancelPendingQuestion(threadId)` — cleanup only. Removes the context
+ *   without replying to OpenCode. Use when aborting the blocked session
+ *   separately (e.g. voice/attachment messages whose content needs
+ *   transcription first). Returns 'no-pending' in both "found+cleaned" and
+ *   "nothing found" cases.
  *
- * Returns 'replied' if the question was answered successfully (caller should NOT
- * enqueue the user message as a new prompt — it was consumed as the answer).
- * Returns 'reply-failed' if reply failed (context kept pending so TTL can retry).
- * Returns 'no-pending' if no question was pending for this thread.
+ * - `cancelPendingQuestion(threadId, text)` — reply path. Sends the text as
+ *   the tool answer so the model sees the user's response. The caller should
+ *   NOT also enqueue the message as a new prompt.
+ *   Returns 'replied' on success, 'reply-failed' if the reply call fails
+ *   (context kept pending so TTL can retry).
  */
 export async function cancelPendingQuestion(
   threadId: string,
@@ -339,8 +353,9 @@ export async function cancelPendingQuestion(
   }
   // undefined means teardown/cleanup — just remove context, don't reply.
-  // The session is already being torn down. Empty string '' is a valid
-  // user message (attachment-only, voice, etc.) and must still go through.
+  // The session is already being torn down or the caller wants to dismiss
+  // the question without providing an answer (e.g. voice/attachment-only
+  // messages where content needs transcription before it can be an answer).
   if (userMessage === undefined) {
     pendingQuestionContexts.delete(contextHash)
     return 'no-pending'

package/src/discord-bot.ts CHANGED Viewed

@@ -46,7 +46,7 @@ import {
   preprocessNewThreadMessage,
 } from './message-preprocessing.js'
 import { cancelPendingActionButtons } from './commands/action-buttons.js'
-import { cancelPendingQuestion, type CancelQuestionResult } from './commands/ask-question.js'
+import { cancelPendingQuestion, hasPendingQuestionForThread } from './commands/ask-question.js'
 import { cancelPendingFileUpload } from './commands/file-upload.js'
 import { cancelPendingPermission } from './commands/permissions.js'
 import { cancelHtmlActionsForThread } from './html-actions.js'
@@ -316,7 +316,13 @@ export async function startDiscordBot({
   if (discordClient.isReady()) {
     await setupHandlers(discordClient)
   } else {
-    discordClient.once(Events.ClientReady, setupHandlers)
+    discordClient.once(Events.ClientReady, (readyClient) => {
+      void setupHandlers(readyClient).catch((error) => {
+        discordLogger.error(
+          `[GATEWAY] ClientReady handler failed: ${formatErrorWithStack(error)}`,
+        )
+      })
+    })
   }
   discordClient.on(Events.Error, (error) => {
@@ -624,9 +630,6 @@ export async function startDiscordBot({
         })
         // Cancel interactive UI when a real user sends a message.
-        // If a question was pending and answered with the user's text,
-        // early-return: the message was consumed as the question answer
-        // and must NOT also be sent as a new prompt (causes abort loops).
         if (!message.author.bot && !isCliInjectedPrompt) {
           cancelPendingActionButtons(thread.id)
           cancelHtmlActionsForThread(thread.id)
@@ -636,11 +639,29 @@ export async function startDiscordBot({
               reason: 'user sent a new message while permission was pending',
             })
           }
-          const questionResult = await cancelPendingQuestion(thread.id, message.content)
-          void cancelPendingFileUpload(thread.id)
-          if (questionResult === 'replied') {
-            return
+          // For text messages: pass the content as the question answer so the
+          // model sees the user's response. The early return prevents the message
+          // from also being sent as a new prompt (duplicate).
+          // For voice/image messages: message.content is "" (audio is in
+          // attachments, transcription happens later). Passing "" as the answer
+          // loses the content entirely. Instead, reply with "" to properly
+          // unblock OpenCode's question.waitForReply (without a reply the next
+          // promptAsync immediately fails with MessageAbortedError), then let
+          // the voice message flow through normal preprocessing — it gets
+          // transcribed and queued as the next user message after the model
+          // finishes responding to the empty answer.
+          if (message.content.trim().length > 0) {
+            const questionResult = await cancelPendingQuestion(thread.id, message.content)
+            if (questionResult === 'replied') {
+              void cancelPendingFileUpload(thread.id)
+              return
+            }
+          } else if (hasPendingQuestionForThread(thread.id)) {
+            // Reply empty to unblock the question tool — no early return so
+            // the voice/image message continues through to enqueueIncoming.
+            await cancelPendingQuestion(thread.id, '')
           }
+          void cancelPendingFileUpload(thread.id)
         }
         // Expensive pre-processing (voice transcription, context fetch,

package/src/external-opencode-sync.ts CHANGED Viewed

@@ -86,7 +86,7 @@ function isSyntheticTextPart(part: Extract<Part, { type: 'text' }>): boolean {
 }
 function parseDiscordOriginMetadata(text: string): DiscordOriginMetadata | null {
-  const match = text.match(/^<discord-user\s+([^>]+)\s*\/>$/)
+  const match = text.match(/<discord-user\s+([^>]+)\s*\/>/)
   if (!match?.[1]) {
     return null
   }
@@ -117,23 +117,21 @@ function getDiscordOriginMetadataFromMessage({
 }: {
   message: SessionMessageLike
 }): DiscordOriginMetadata | null {
-  const syntheticTexts = message.parts.flatMap((part) => {
-    if (part.type !== 'text') {
-      return [] as string[]
-    }
-    if (!isSyntheticTextPart(part)) {
-      return [] as string[]
-    }
-    return [part.text || '']
+  const textParts = message.parts.filter((p): p is Extract<typeof p, { type: 'text' }> => {
+    return p.type === 'text'
   })
-  for (const text of syntheticTexts) {
-    const metadata = parseDiscordOriginMetadata(text)
+  // Synthetic parts first (normal promptAsync path), then non-synthetic
+  // (session.command() path where the tag is embedded in arguments text).
+  const sorted = [
+    ...textParts.filter((p) => { return isSyntheticTextPart(p) }),
+    ...textParts.filter((p) => { return !isSyntheticTextPart(p) }),
+  ]
+  for (const part of sorted) {
+    const metadata = parseDiscordOriginMetadata(part.text || '')
     if (metadata) {
       return metadata
     }
   }
   return null
 }

package/src/queue-advanced-question.e2e.test.ts CHANGED Viewed

@@ -1,8 +1,9 @@
 // E2e test for question tool: user text message during pending question should
-// be consumed as the answer and NOT also sent as a duplicate promptAsync.
-// Reproduces the bug from commit a4dfb01 where the same message was sent twice.
+// dismiss the question (abort), then enqueue as a normal user prompt.
+// The user's message must appear as a real user message in the thread, not
+// get consumed as a tool result answer (which lost voice/image content).
-import { describe, test, expect } from 'vitest'
+import { describe, test, expect, afterEach } from 'vitest'
 import {
   setupQueueAdvancedSuite,
   TEST_USER_ID,
@@ -12,8 +13,10 @@ import {
   waitForFooterMessage,
 } from './test-utils.js'
 import { pendingQuestionContexts } from './commands/ask-question.js'
+import { store, type DeterministicTranscriptionConfig } from './store.js'
 const TEXT_CHANNEL_ID = '200000000000001007'
+const VOICE_CHANNEL_ID = '200000000000001017'
 async function waitForPendingQuestion({
   threadId,
@@ -59,7 +62,13 @@ async function waitForNoPendingQuestion({
   throw new Error('Timed out waiting for question context cleanup')
 }
-describe('queue advanced: question tool text answer', () => {
+function setDeterministicTranscription(config: DeterministicTranscriptionConfig | null) {
+  store.setState({
+    test: { deterministicTranscription: config },
+  })
+}
+describe('queue advanced: question tool answer', () => {
   const ctx = setupQueueAdvancedSuite({
     channelId: TEXT_CHANNEL_ID,
     channelName: 'qa-question-e2e',
@@ -67,8 +76,12 @@ describe('queue advanced: question tool text answer', () => {
     username: 'queue-question-tester',
   })
+  afterEach(() => {
+    setDeterministicTranscription(null)
+  })
   test(
-    'user text message answers pending question without sending duplicate prompt',
+    'user text message dismisses pending question and enqueues as normal prompt',
     async () => {
       await ctx.discord.channel(TEXT_CHANNEL_ID).user(TEST_USER_ID).sendMessage({
         content: 'QUESTION_TEXT_ANSWER_MARKER',
@@ -100,35 +113,19 @@ describe('queue advanced: question tool text answer', () => {
       // User sends a text message while question is pending.
       // This should:
-      // 1. Answer the question via cancelPendingQuestion (consumed as answer)
-      // 2. NOT also send as a new promptAsync (the fix)
-      // 3. Clean up the pending question context
+      // 1. Dismiss the pending question (cleanup context)
+      // 2. Abort the blocked session so OpenCode unblocks
+      // 3. Enqueue the message as a normal user prompt (not consumed as answer)
       await th.user(TEST_USER_ID).sendMessage({
         content: 'my text answer',
       })
-      // Pending question context should be cleaned up after answer
+      // Pending question context should be cleaned up
       await waitForNoPendingQuestion({
         threadId: thread.id,
         timeoutMs: 4_000,
       })
-      // Wait for second question dropdown (from question-answer followup —
-      // OpenCode calls LLM again with same prompt after question tool completes,
-      // deterministic matcher fires question tool again). This is expected.
-      // Poll for it instead of sleeping.
-      const start = Date.now()
-      while (Date.now() - start < 4_000) {
-        const msgs = await th.getMessages()
-        const questionMsgs = msgs.filter((m) => {
-          return m.content.includes('Which option do you prefer?')
-        })
-        if (questionMsgs.length >= 2) {
-          break
-        }
-        await new Promise<void>((r) => { setTimeout(r, 50) })
-      }
       const timeline = await th.text({ showInteractions: true })
       expect(timeline).toMatchInlineSnapshot(`
         "--- from: user (queue-question-tester)
@@ -137,21 +134,118 @@ describe('queue advanced: question tool text answer', () => {
         **Pick one**
         Which option do you prefer?
         --- from: user (queue-question-tester)
-        my text answer
-        --- from: assistant (TestBot)
-        **Pick one**
-        Which option do you prefer?"
+        my text answer"
       `)
-      // The user's "my text answer" message must appear in the thread
+      // The user's message must appear in Discord
       expect(timeline).toContain('my text answer')
-      // Key regression assertion: without the fix, the user's text message
-      // is ALSO sent as a duplicate promptAsync which triggers a THIRD question
-      // dropdown. With the fix, only 2 dropdowns appear (initial + followup
-      // from question answer). Count occurrences of "Which option do you prefer?"
+      // Only 1 question dropdown — text message was consumed as the answer,
+      // no duplicate prompt was sent (which would trigger a second dropdown).
       const questionCount = (timeline.match(/Which option do you prefer\?/g) || []).length
-      expect(questionCount).toBe(2)
+      expect(questionCount).toBe(1)
+    },
+    20_000,
+  )
+})
+describe('queue advanced: voice message during pending question', () => {
+  const ctx = setupQueueAdvancedSuite({
+    channelId: VOICE_CHANNEL_ID,
+    channelName: 'qa-question-voice-e2e',
+    dirName: 'qa-question-voice-e2e',
+    username: 'queue-question-tester',
+  })
+  afterEach(() => {
+    setDeterministicTranscription(null)
+  })
+  test(
+    'voice message during pending question dismisses question and transcribes normally',
+    async () => {
+      // This is the exact bug scenario: user sends a voice message while a
+      // question dropdown is pending. Voice messages have empty message.content
+      // (audio is in attachments, transcription happens later). The old code
+      // passed "" as the question answer and consumed the message — the voice
+      // content was completely lost.
+      await ctx.discord.channel(VOICE_CHANNEL_ID).user(TEST_USER_ID).sendMessage({
+        content: 'QUESTION_TEXT_ANSWER_MARKER',
+      })
+      const thread = await ctx.discord.channel(VOICE_CHANNEL_ID).waitForThread({
+        timeout: 4_000,
+        predicate: (t) => {
+          return t.name === 'QUESTION_TEXT_ANSWER_MARKER'
+        },
+      })
+      const th = ctx.discord.thread(thread.id)
+      // Wait for the question dropdown to appear
+      await waitForPendingQuestion({
+        threadId: thread.id,
+        timeoutMs: 4_000,
+      })
+      await waitForBotMessageContaining({
+        discord: ctx.discord,
+        threadId: thread.id,
+        text: 'Which option do you prefer?',
+        timeout: 4_000,
+      })
+      // Send a voice message while the question is pending.
+      // message.content is "" for voice messages — only the attachment exists.
+      setDeterministicTranscription({
+        transcription: 'I want option Alpha please',
+        queueMessage: false,
+      })
+      await th.user(TEST_USER_ID).sendVoiceMessage()
+      // Question context should be cleaned up (empty reply sent to unblock OpenCode)
+      await waitForNoPendingQuestion({
+        threadId: thread.id,
+        timeoutMs: 4_000,
+      })
+      // Voice content should be transcribed and appear as the next user message,
+      // processed after the model responds to the empty question answer.
+      await waitForBotMessageContaining({
+        discord: ctx.discord,
+        threadId: thread.id,
+        text: 'I want option Alpha please',
+        timeout: 4_000,
+      })
+      await waitForFooterMessage({
+        discord: ctx.discord,
+        threadId: thread.id,
+        timeout: 4_000,
+        afterMessageIncludes: 'I want option Alpha please',
+        afterAuthorId: ctx.discord.botUserId,
+      })
+      const timeline = await th.text({ showInteractions: true })
+      expect(timeline).toMatchInlineSnapshot(`
+        "--- from: user (queue-question-tester)
+        QUESTION_TEXT_ANSWER_MARKER
+        --- from: assistant (TestBot)
+        **Pick one**
+        Which option do you prefer?
+        --- from: user (queue-question-tester)
+        [attachment: voice-message.ogg]
+        --- from: assistant (TestBot)
+        🎤 Transcribing voice message...
+        📝 **Transcribed message:** I want option Alpha please
+        ⬥ ok
+        *project ⋅ main ⋅ Ns ⋅ N% ⋅ deterministic-v2*"
+      `)
+      // Voice content must be present as a real transcribed message, not lost
+      expect(timeline).toContain('I want option Alpha please')
     },
     20_000,
   )

package/src/session-handler/thread-session-runtime.ts CHANGED Viewed

@@ -3510,13 +3510,20 @@ export class ThreadSessionRuntime {
     if (input.command) {
       const queuedCommand = input.command
       const commandSignal = AbortSignal.timeout(30_000)
+      // session.command() only accepts FilePart in parts, not text parts.
+      // Append <discord-user /> tag to arguments so external sync can
+      // detect this message came from Discord (same tag as promptAsync).
+      const discordTag = input.username
+        ? `\n<discord-user name="${input.username}" />`
+        : ''
       const commandResponse = await errore.tryAsync(() => {
         return getClient().session.command(
           {
             sessionID: session.id,
             directory: this.sdkDirectory,
             command: queuedCommand.name,
-            arguments: queuedCommand.arguments,
+            arguments: queuedCommand.arguments + discordTag,
             agent: earlyAgentPreference,
             ...variantField,
           },