kimaki 0.4.84 → 0.4.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,6 +11,11 @@ const logger = createLogger(LogPrefix.ASK_QUESTION);
11
11
  // TTL prevents unbounded growth if user never answers a question.
12
12
  const QUESTION_CONTEXT_TTL_MS = 10 * 60 * 1000;
13
13
  export const pendingQuestionContexts = new Map();
14
+ export function hasPendingQuestionForThread(threadId) {
15
+ return [...pendingQuestionContexts.values()].some((ctx) => {
16
+ return ctx.thread.id === threadId;
17
+ });
18
+ }
14
19
  /**
15
20
  * Show dropdown menus for question tool input.
16
21
  * Sends one message per question with the dropdown directly under the question text.
@@ -205,13 +210,21 @@ export function parseAskUserQuestionTool(part) {
205
210
  return input;
206
211
  }
207
212
  /**
208
- * Cancel a pending question for a thread (e.g., when user sends a new message).
209
- * Sends the user's message as the answer to OpenCode so the model sees their actual response.
213
+ * Cancel a pending question for a thread.
214
+ *
215
+ * Two modes depending on whether `userMessage` is provided:
216
+ *
217
+ * - `cancelPendingQuestion(threadId)` — cleanup only. Removes the context
218
+ * without replying to OpenCode. Use when aborting the blocked session
219
+ * separately (e.g. voice/attachment messages whose content needs
220
+ * transcription first). Returns 'no-pending' in both "found+cleaned" and
221
+ * "nothing found" cases.
210
222
  *
211
- * Returns 'replied' if the question was answered successfully (caller should NOT
212
- * enqueue the user message as a new prompt it was consumed as the answer).
213
- * Returns 'reply-failed' if reply failed (context kept pending so TTL can retry).
214
- * Returns 'no-pending' if no question was pending for this thread.
223
+ * - `cancelPendingQuestion(threadId, text)` reply path. Sends the text as
224
+ * the tool answer so the model sees the user's response. The caller should
225
+ * NOT also enqueue the message as a new prompt.
226
+ * Returns 'replied' on success, 'reply-failed' if the reply call fails
227
+ * (context kept pending so TTL can retry).
215
228
  */
216
229
  export async function cancelPendingQuestion(threadId, userMessage) {
217
230
  // Find pending question for this thread
@@ -228,8 +241,9 @@ export async function cancelPendingQuestion(threadId, userMessage) {
228
241
  return 'no-pending';
229
242
  }
230
243
  // undefined means teardown/cleanup — just remove context, don't reply.
231
- // The session is already being torn down. Empty string '' is a valid
232
- // user message (attachment-only, voice, etc.) and must still go through.
244
+ // The session is already being torn down or the caller wants to dismiss
245
+ // the question without providing an answer (e.g. voice/attachment-only
246
+ // messages where content needs transcription before it can be an answer).
233
247
  if (userMessage === undefined) {
234
248
  pendingQuestionContexts.delete(contextHash);
235
249
  return 'no-pending';
@@ -12,7 +12,7 @@ import { getTextAttachments, resolveMentions, } from './message-formatting.js';
12
12
  import { isVoiceAttachment } from './voice-attachment.js';
13
13
  import { preprocessExistingThreadMessage, preprocessNewThreadMessage, } from './message-preprocessing.js';
14
14
  import { cancelPendingActionButtons } from './commands/action-buttons.js';
15
- import { cancelPendingQuestion } from './commands/ask-question.js';
15
+ import { cancelPendingQuestion, hasPendingQuestionForThread } from './commands/ask-question.js';
16
16
  import { cancelPendingFileUpload } from './commands/file-upload.js';
17
17
  import { cancelPendingPermission } from './commands/permissions.js';
18
18
  import { cancelHtmlActionsForThread } from './html-actions.js';
@@ -185,7 +185,11 @@ export async function startDiscordBot({ token, appId, discordClient, useWorktree
185
185
  await setupHandlers(discordClient);
186
186
  }
187
187
  else {
188
- discordClient.once(Events.ClientReady, setupHandlers);
188
+ discordClient.once(Events.ClientReady, (readyClient) => {
189
+ void setupHandlers(readyClient).catch((error) => {
190
+ discordLogger.error(`[GATEWAY] ClientReady handler failed: ${formatErrorWithStack(error)}`);
191
+ });
192
+ });
189
193
  }
190
194
  discordClient.on(Events.Error, (error) => {
191
195
  discordLogger.error('[GATEWAY] Client error:', formatErrorWithStack(error));
@@ -434,9 +438,6 @@ export async function startDiscordBot({ token, appId, discordClient, useWorktree
434
438
  appId: currentAppId,
435
439
  });
436
440
  // Cancel interactive UI when a real user sends a message.
437
- // If a question was pending and answered with the user's text,
438
- // early-return: the message was consumed as the question answer
439
- // and must NOT also be sent as a new prompt (causes abort loops).
440
441
  if (!message.author.bot && !isCliInjectedPrompt) {
441
442
  cancelPendingActionButtons(thread.id);
442
443
  cancelHtmlActionsForThread(thread.id);
@@ -446,11 +447,30 @@ export async function startDiscordBot({ token, appId, discordClient, useWorktree
446
447
  reason: 'user sent a new message while permission was pending',
447
448
  });
448
449
  }
449
- const questionResult = await cancelPendingQuestion(thread.id, message.content);
450
- void cancelPendingFileUpload(thread.id);
451
- if (questionResult === 'replied') {
452
- return;
450
+ // For text messages: pass the content as the question answer so the
451
+ // model sees the user's response. The early return prevents the message
452
+ // from also being sent as a new prompt (duplicate).
453
+ // For voice/image messages: message.content is "" (audio is in
454
+ // attachments, transcription happens later). Passing "" as the answer
455
+ // loses the content entirely. Instead, reply with "" to properly
456
+ // unblock OpenCode's question.waitForReply (without a reply the next
457
+ // promptAsync immediately fails with MessageAbortedError), then let
458
+ // the voice message flow through normal preprocessing — it gets
459
+ // transcribed and queued as the next user message after the model
460
+ // finishes responding to the empty answer.
461
+ if (message.content.trim().length > 0) {
462
+ const questionResult = await cancelPendingQuestion(thread.id, message.content);
463
+ if (questionResult === 'replied') {
464
+ void cancelPendingFileUpload(thread.id);
465
+ return;
466
+ }
467
+ }
468
+ else if (hasPendingQuestionForThread(thread.id)) {
469
+ // Reply empty to unblock the question tool — no early return so
470
+ // the voice/image message continues through to enqueueIncoming.
471
+ await cancelPendingQuestion(thread.id, '');
453
472
  }
473
+ void cancelPendingFileUpload(thread.id);
454
474
  }
455
475
  // Expensive pre-processing (voice transcription, context fetch,
456
476
  // attachment download) runs inside the runtime's serialized
@@ -19,7 +19,7 @@ function isSyntheticTextPart(part) {
19
19
  return candidate.synthetic === true;
20
20
  }
21
21
  function parseDiscordOriginMetadata(text) {
22
- const match = text.match(/^<discord-user\s+([^>]+)\s*\/>$/);
22
+ const match = text.match(/<discord-user\s+([^>]+)\s*\/>/);
23
23
  if (!match?.[1]) {
24
24
  return null;
25
25
  }
@@ -42,17 +42,17 @@ function parseDiscordOriginMetadata(text) {
42
42
  };
43
43
  }
44
44
  function getDiscordOriginMetadataFromMessage({ message, }) {
45
- const syntheticTexts = message.parts.flatMap((part) => {
46
- if (part.type !== 'text') {
47
- return [];
48
- }
49
- if (!isSyntheticTextPart(part)) {
50
- return [];
51
- }
52
- return [part.text || ''];
45
+ const textParts = message.parts.filter((p) => {
46
+ return p.type === 'text';
53
47
  });
54
- for (const text of syntheticTexts) {
55
- const metadata = parseDiscordOriginMetadata(text);
48
+ // Synthetic parts first (normal promptAsync path), then non-synthetic
49
+ // (session.command() path where the tag is embedded in arguments text).
50
+ const sorted = [
51
+ ...textParts.filter((p) => { return isSyntheticTextPart(p); }),
52
+ ...textParts.filter((p) => { return !isSyntheticTextPart(p); }),
53
+ ];
54
+ for (const part of sorted) {
55
+ const metadata = parseDiscordOriginMetadata(part.text || '');
56
56
  if (metadata) {
57
57
  return metadata;
58
58
  }
@@ -1,11 +1,14 @@
1
1
  // E2e test for question tool: user text message during pending question should
2
- // be consumed as the answer and NOT also sent as a duplicate promptAsync.
3
- // Reproduces the bug from commit a4dfb01 where the same message was sent twice.
4
- import { describe, test, expect } from 'vitest';
2
+ // dismiss the question (abort), then enqueue as a normal user prompt.
3
+ // The user's message must appear as a real user message in the thread, not
4
+ // get consumed as a tool result answer (which lost voice/image content).
5
+ import { describe, test, expect, afterEach } from 'vitest';
5
6
  import { setupQueueAdvancedSuite, TEST_USER_ID, } from './queue-advanced-e2e-setup.js';
6
7
  import { waitForBotMessageContaining, waitForFooterMessage, } from './test-utils.js';
7
8
  import { pendingQuestionContexts } from './commands/ask-question.js';
9
+ import { store } from './store.js';
8
10
  const TEXT_CHANNEL_ID = '200000000000001007';
11
+ const VOICE_CHANNEL_ID = '200000000000001017';
9
12
  async function waitForPendingQuestion({ threadId, timeoutMs, }) {
10
13
  const start = Date.now();
11
14
  while (Date.now() - start < timeoutMs) {
@@ -36,14 +39,22 @@ async function waitForNoPendingQuestion({ threadId, timeoutMs, }) {
36
39
  }
37
40
  throw new Error('Timed out waiting for question context cleanup');
38
41
  }
39
- describe('queue advanced: question tool text answer', () => {
42
+ function setDeterministicTranscription(config) {
43
+ store.setState({
44
+ test: { deterministicTranscription: config },
45
+ });
46
+ }
47
+ describe('queue advanced: question tool answer', () => {
40
48
  const ctx = setupQueueAdvancedSuite({
41
49
  channelId: TEXT_CHANNEL_ID,
42
50
  channelName: 'qa-question-e2e',
43
51
  dirName: 'qa-question-e2e',
44
52
  username: 'queue-question-tester',
45
53
  });
46
- test('user text message answers pending question without sending duplicate prompt', async () => {
54
+ afterEach(() => {
55
+ setDeterministicTranscription(null);
56
+ });
57
+ test('user text message dismisses pending question and enqueues as normal prompt', async () => {
47
58
  await ctx.discord.channel(TEXT_CHANNEL_ID).user(TEST_USER_ID).sendMessage({
48
59
  content: 'QUESTION_TEXT_ANSWER_MARKER',
49
60
  });
@@ -69,32 +80,17 @@ describe('queue advanced: question tool text answer', () => {
69
80
  });
70
81
  // User sends a text message while question is pending.
71
82
  // This should:
72
- // 1. Answer the question via cancelPendingQuestion (consumed as answer)
73
- // 2. NOT also send as a new promptAsync (the fix)
74
- // 3. Clean up the pending question context
83
+ // 1. Dismiss the pending question (cleanup context)
84
+ // 2. Abort the blocked session so OpenCode unblocks
85
+ // 3. Enqueue the message as a normal user prompt (not consumed as answer)
75
86
  await th.user(TEST_USER_ID).sendMessage({
76
87
  content: 'my text answer',
77
88
  });
78
- // Pending question context should be cleaned up after answer
89
+ // Pending question context should be cleaned up
79
90
  await waitForNoPendingQuestion({
80
91
  threadId: thread.id,
81
92
  timeoutMs: 4_000,
82
93
  });
83
- // Wait for second question dropdown (from question-answer followup —
84
- // OpenCode calls LLM again with same prompt after question tool completes,
85
- // deterministic matcher fires question tool again). This is expected.
86
- // Poll for it instead of sleeping.
87
- const start = Date.now();
88
- while (Date.now() - start < 4_000) {
89
- const msgs = await th.getMessages();
90
- const questionMsgs = msgs.filter((m) => {
91
- return m.content.includes('Which option do you prefer?');
92
- });
93
- if (questionMsgs.length >= 2) {
94
- break;
95
- }
96
- await new Promise((r) => { setTimeout(r, 50); });
97
- }
98
94
  const timeline = await th.text({ showInteractions: true });
99
95
  expect(timeline).toMatchInlineSnapshot(`
100
96
  "--- from: user (queue-question-tester)
@@ -103,18 +99,96 @@ describe('queue advanced: question tool text answer', () => {
103
99
  **Pick one**
104
100
  Which option do you prefer?
105
101
  --- from: user (queue-question-tester)
106
- my text answer
107
- --- from: assistant (TestBot)
108
- **Pick one**
109
- Which option do you prefer?"
102
+ my text answer"
110
103
  `);
111
- // The user's "my text answer" message must appear in the thread
104
+ // The user's message must appear in Discord
112
105
  expect(timeline).toContain('my text answer');
113
- // Key regression assertion: without the fix, the user's text message
114
- // is ALSO sent as a duplicate promptAsync which triggers a THIRD question
115
- // dropdown. With the fix, only 2 dropdowns appear (initial + followup
116
- // from question answer). Count occurrences of "Which option do you prefer?"
106
+ // Only 1 question dropdown text message was consumed as the answer,
107
+ // no duplicate prompt was sent (which would trigger a second dropdown).
117
108
  const questionCount = (timeline.match(/Which option do you prefer\?/g) || []).length;
118
- expect(questionCount).toBe(2);
109
+ expect(questionCount).toBe(1);
110
+ }, 20_000);
111
+ });
112
+ describe('queue advanced: voice message during pending question', () => {
113
+ const ctx = setupQueueAdvancedSuite({
114
+ channelId: VOICE_CHANNEL_ID,
115
+ channelName: 'qa-question-voice-e2e',
116
+ dirName: 'qa-question-voice-e2e',
117
+ username: 'queue-question-tester',
118
+ });
119
+ afterEach(() => {
120
+ setDeterministicTranscription(null);
121
+ });
122
+ test('voice message during pending question dismisses question and transcribes normally', async () => {
123
+ // This is the exact bug scenario: user sends a voice message while a
124
+ // question dropdown is pending. Voice messages have empty message.content
125
+ // (audio is in attachments, transcription happens later). The old code
126
+ // passed "" as the question answer and consumed the message — the voice
127
+ // content was completely lost.
128
+ await ctx.discord.channel(VOICE_CHANNEL_ID).user(TEST_USER_ID).sendMessage({
129
+ content: 'QUESTION_TEXT_ANSWER_MARKER',
130
+ });
131
+ const thread = await ctx.discord.channel(VOICE_CHANNEL_ID).waitForThread({
132
+ timeout: 4_000,
133
+ predicate: (t) => {
134
+ return t.name === 'QUESTION_TEXT_ANSWER_MARKER';
135
+ },
136
+ });
137
+ const th = ctx.discord.thread(thread.id);
138
+ // Wait for the question dropdown to appear
139
+ await waitForPendingQuestion({
140
+ threadId: thread.id,
141
+ timeoutMs: 4_000,
142
+ });
143
+ await waitForBotMessageContaining({
144
+ discord: ctx.discord,
145
+ threadId: thread.id,
146
+ text: 'Which option do you prefer?',
147
+ timeout: 4_000,
148
+ });
149
+ // Send a voice message while the question is pending.
150
+ // message.content is "" for voice messages — only the attachment exists.
151
+ setDeterministicTranscription({
152
+ transcription: 'I want option Alpha please',
153
+ queueMessage: false,
154
+ });
155
+ await th.user(TEST_USER_ID).sendVoiceMessage();
156
+ // Question context should be cleaned up (empty reply sent to unblock OpenCode)
157
+ await waitForNoPendingQuestion({
158
+ threadId: thread.id,
159
+ timeoutMs: 4_000,
160
+ });
161
+ // Voice content should be transcribed and appear as the next user message,
162
+ // processed after the model responds to the empty question answer.
163
+ await waitForBotMessageContaining({
164
+ discord: ctx.discord,
165
+ threadId: thread.id,
166
+ text: 'I want option Alpha please',
167
+ timeout: 4_000,
168
+ });
169
+ await waitForFooterMessage({
170
+ discord: ctx.discord,
171
+ threadId: thread.id,
172
+ timeout: 4_000,
173
+ afterMessageIncludes: 'I want option Alpha please',
174
+ afterAuthorId: ctx.discord.botUserId,
175
+ });
176
+ const timeline = await th.text({ showInteractions: true });
177
+ expect(timeline).toMatchInlineSnapshot(`
178
+ "--- from: user (queue-question-tester)
179
+ QUESTION_TEXT_ANSWER_MARKER
180
+ --- from: assistant (TestBot)
181
+ **Pick one**
182
+ Which option do you prefer?
183
+ --- from: user (queue-question-tester)
184
+ [attachment: voice-message.ogg]
185
+ --- from: assistant (TestBot)
186
+ 🎤 Transcribing voice message...
187
+ 📝 **Transcribed message:** I want option Alpha please
188
+ ⬥ ok
189
+ *project ⋅ main ⋅ Ns ⋅ N% ⋅ deterministic-v2*"
190
+ `);
191
+ // Voice content must be present as a real transcribed message, not lost
192
+ expect(timeline).toContain('I want option Alpha please');
119
193
  }, 20_000);
120
194
  });
@@ -2667,12 +2667,18 @@ export class ThreadSessionRuntime {
2667
2667
  if (input.command) {
2668
2668
  const queuedCommand = input.command;
2669
2669
  const commandSignal = AbortSignal.timeout(30_000);
2670
+ // session.command() only accepts FilePart in parts, not text parts.
2671
+ // Append <discord-user /> tag to arguments so external sync can
2672
+ // detect this message came from Discord (same tag as promptAsync).
2673
+ const discordTag = input.username
2674
+ ? `\n<discord-user name="${input.username}" />`
2675
+ : '';
2670
2676
  const commandResponse = await errore.tryAsync(() => {
2671
2677
  return getClient().session.command({
2672
2678
  sessionID: session.id,
2673
2679
  directory: this.sdkDirectory,
2674
2680
  command: queuedCommand.name,
2675
- arguments: queuedCommand.arguments,
2681
+ arguments: queuedCommand.arguments + discordTag,
2676
2682
  agent: earlyAgentPreference,
2677
2683
  ...variantField,
2678
2684
  }, { signal: commandSignal });
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "kimaki",
3
3
  "module": "index.ts",
4
4
  "type": "module",
5
- "version": "0.4.84",
5
+ "version": "0.4.86",
6
6
  "repository": "https://github.com/remorses/kimaki",
7
7
  "bin": "bin.js",
8
8
  "files": [
@@ -67,8 +67,8 @@
67
67
  "zod": "^4.3.6",
68
68
  "zustand": "^5.0.11",
69
69
  "errore": "^0.14.1",
70
- "libsqlproxy": "^0.1.0",
71
- "traforo": "^0.2.0"
70
+ "traforo": "^0.2.0",
71
+ "libsqlproxy": "^0.1.0"
72
72
  },
73
73
  "optionalDependencies": {
74
74
  "@discordjs/opus": "^0.10.0",
@@ -49,6 +49,12 @@ type PendingQuestionContext = {
49
49
  const QUESTION_CONTEXT_TTL_MS = 10 * 60 * 1000
50
50
  export const pendingQuestionContexts = new Map<string, PendingQuestionContext>()
51
51
 
52
+ export function hasPendingQuestionForThread(threadId: string): boolean {
53
+ return [...pendingQuestionContexts.values()].some((ctx) => {
54
+ return ctx.thread.id === threadId
55
+ })
56
+ }
57
+
52
58
  /**
53
59
  * Show dropdown menus for question tool input.
54
60
  * Sends one message per question with the dropdown directly under the question text.
@@ -311,13 +317,21 @@ export function parseAskUserQuestionTool(part: {
311
317
  }
312
318
 
313
319
  /**
314
- * Cancel a pending question for a thread (e.g., when user sends a new message).
315
- * Sends the user's message as the answer to OpenCode so the model sees their actual response.
320
+ * Cancel a pending question for a thread.
321
+ *
322
+ * Two modes depending on whether `userMessage` is provided:
323
+ *
324
+ * - `cancelPendingQuestion(threadId)` — cleanup only. Removes the context
325
+ * without replying to OpenCode. Use when aborting the blocked session
326
+ * separately (e.g. voice/attachment messages whose content needs
327
+ * transcription first). Returns 'no-pending' in both "found+cleaned" and
328
+ * "nothing found" cases.
316
329
  *
317
- * Returns 'replied' if the question was answered successfully (caller should NOT
318
- * enqueue the user message as a new prompt it was consumed as the answer).
319
- * Returns 'reply-failed' if reply failed (context kept pending so TTL can retry).
320
- * Returns 'no-pending' if no question was pending for this thread.
330
+ * - `cancelPendingQuestion(threadId, text)` reply path. Sends the text as
331
+ * the tool answer so the model sees the user's response. The caller should
332
+ * NOT also enqueue the message as a new prompt.
333
+ * Returns 'replied' on success, 'reply-failed' if the reply call fails
334
+ * (context kept pending so TTL can retry).
321
335
  */
322
336
  export async function cancelPendingQuestion(
323
337
  threadId: string,
@@ -339,8 +353,9 @@ export async function cancelPendingQuestion(
339
353
  }
340
354
 
341
355
  // undefined means teardown/cleanup — just remove context, don't reply.
342
- // The session is already being torn down. Empty string '' is a valid
343
- // user message (attachment-only, voice, etc.) and must still go through.
356
+ // The session is already being torn down or the caller wants to dismiss
357
+ // the question without providing an answer (e.g. voice/attachment-only
358
+ // messages where content needs transcription before it can be an answer).
344
359
  if (userMessage === undefined) {
345
360
  pendingQuestionContexts.delete(contextHash)
346
361
  return 'no-pending'
@@ -46,7 +46,7 @@ import {
46
46
  preprocessNewThreadMessage,
47
47
  } from './message-preprocessing.js'
48
48
  import { cancelPendingActionButtons } from './commands/action-buttons.js'
49
- import { cancelPendingQuestion, type CancelQuestionResult } from './commands/ask-question.js'
49
+ import { cancelPendingQuestion, hasPendingQuestionForThread } from './commands/ask-question.js'
50
50
  import { cancelPendingFileUpload } from './commands/file-upload.js'
51
51
  import { cancelPendingPermission } from './commands/permissions.js'
52
52
  import { cancelHtmlActionsForThread } from './html-actions.js'
@@ -316,7 +316,13 @@ export async function startDiscordBot({
316
316
  if (discordClient.isReady()) {
317
317
  await setupHandlers(discordClient)
318
318
  } else {
319
- discordClient.once(Events.ClientReady, setupHandlers)
319
+ discordClient.once(Events.ClientReady, (readyClient) => {
320
+ void setupHandlers(readyClient).catch((error) => {
321
+ discordLogger.error(
322
+ `[GATEWAY] ClientReady handler failed: ${formatErrorWithStack(error)}`,
323
+ )
324
+ })
325
+ })
320
326
  }
321
327
 
322
328
  discordClient.on(Events.Error, (error) => {
@@ -624,9 +630,6 @@ export async function startDiscordBot({
624
630
  })
625
631
 
626
632
  // Cancel interactive UI when a real user sends a message.
627
- // If a question was pending and answered with the user's text,
628
- // early-return: the message was consumed as the question answer
629
- // and must NOT also be sent as a new prompt (causes abort loops).
630
633
  if (!message.author.bot && !isCliInjectedPrompt) {
631
634
  cancelPendingActionButtons(thread.id)
632
635
  cancelHtmlActionsForThread(thread.id)
@@ -636,11 +639,29 @@ export async function startDiscordBot({
636
639
  reason: 'user sent a new message while permission was pending',
637
640
  })
638
641
  }
639
- const questionResult = await cancelPendingQuestion(thread.id, message.content)
640
- void cancelPendingFileUpload(thread.id)
641
- if (questionResult === 'replied') {
642
- return
642
+ // For text messages: pass the content as the question answer so the
643
+ // model sees the user's response. The early return prevents the message
644
+ // from also being sent as a new prompt (duplicate).
645
+ // For voice/image messages: message.content is "" (audio is in
646
+ // attachments, transcription happens later). Passing "" as the answer
647
+ // loses the content entirely. Instead, reply with "" to properly
648
+ // unblock OpenCode's question.waitForReply (without a reply the next
649
+ // promptAsync immediately fails with MessageAbortedError), then let
650
+ // the voice message flow through normal preprocessing — it gets
651
+ // transcribed and queued as the next user message after the model
652
+ // finishes responding to the empty answer.
653
+ if (message.content.trim().length > 0) {
654
+ const questionResult = await cancelPendingQuestion(thread.id, message.content)
655
+ if (questionResult === 'replied') {
656
+ void cancelPendingFileUpload(thread.id)
657
+ return
658
+ }
659
+ } else if (hasPendingQuestionForThread(thread.id)) {
660
+ // Reply empty to unblock the question tool — no early return so
661
+ // the voice/image message continues through to enqueueIncoming.
662
+ await cancelPendingQuestion(thread.id, '')
643
663
  }
664
+ void cancelPendingFileUpload(thread.id)
644
665
  }
645
666
 
646
667
  // Expensive pre-processing (voice transcription, context fetch,
@@ -86,7 +86,7 @@ function isSyntheticTextPart(part: Extract<Part, { type: 'text' }>): boolean {
86
86
  }
87
87
 
88
88
  function parseDiscordOriginMetadata(text: string): DiscordOriginMetadata | null {
89
- const match = text.match(/^<discord-user\s+([^>]+)\s*\/>$/)
89
+ const match = text.match(/<discord-user\s+([^>]+)\s*\/>/)
90
90
  if (!match?.[1]) {
91
91
  return null
92
92
  }
@@ -117,23 +117,21 @@ function getDiscordOriginMetadataFromMessage({
117
117
  }: {
118
118
  message: SessionMessageLike
119
119
  }): DiscordOriginMetadata | null {
120
- const syntheticTexts = message.parts.flatMap((part) => {
121
- if (part.type !== 'text') {
122
- return [] as string[]
123
- }
124
- if (!isSyntheticTextPart(part)) {
125
- return [] as string[]
126
- }
127
- return [part.text || '']
120
+ const textParts = message.parts.filter((p): p is Extract<typeof p, { type: 'text' }> => {
121
+ return p.type === 'text'
128
122
  })
129
-
130
- for (const text of syntheticTexts) {
131
- const metadata = parseDiscordOriginMetadata(text)
123
+ // Synthetic parts first (normal promptAsync path), then non-synthetic
124
+ // (session.command() path where the tag is embedded in arguments text).
125
+ const sorted = [
126
+ ...textParts.filter((p) => { return isSyntheticTextPart(p) }),
127
+ ...textParts.filter((p) => { return !isSyntheticTextPart(p) }),
128
+ ]
129
+ for (const part of sorted) {
130
+ const metadata = parseDiscordOriginMetadata(part.text || '')
132
131
  if (metadata) {
133
132
  return metadata
134
133
  }
135
134
  }
136
-
137
135
  return null
138
136
  }
139
137
 
@@ -1,8 +1,9 @@
1
1
  // E2e test for question tool: user text message during pending question should
2
- // be consumed as the answer and NOT also sent as a duplicate promptAsync.
3
- // Reproduces the bug from commit a4dfb01 where the same message was sent twice.
2
+ // dismiss the question (abort), then enqueue as a normal user prompt.
3
+ // The user's message must appear as a real user message in the thread, not
4
+ // get consumed as a tool result answer (which lost voice/image content).
4
5
 
5
- import { describe, test, expect } from 'vitest'
6
+ import { describe, test, expect, afterEach } from 'vitest'
6
7
  import {
7
8
  setupQueueAdvancedSuite,
8
9
  TEST_USER_ID,
@@ -12,8 +13,10 @@ import {
12
13
  waitForFooterMessage,
13
14
  } from './test-utils.js'
14
15
  import { pendingQuestionContexts } from './commands/ask-question.js'
16
+ import { store, type DeterministicTranscriptionConfig } from './store.js'
15
17
 
16
18
  const TEXT_CHANNEL_ID = '200000000000001007'
19
+ const VOICE_CHANNEL_ID = '200000000000001017'
17
20
 
18
21
  async function waitForPendingQuestion({
19
22
  threadId,
@@ -59,7 +62,13 @@ async function waitForNoPendingQuestion({
59
62
  throw new Error('Timed out waiting for question context cleanup')
60
63
  }
61
64
 
62
- describe('queue advanced: question tool text answer', () => {
65
+ function setDeterministicTranscription(config: DeterministicTranscriptionConfig | null) {
66
+ store.setState({
67
+ test: { deterministicTranscription: config },
68
+ })
69
+ }
70
+
71
+ describe('queue advanced: question tool answer', () => {
63
72
  const ctx = setupQueueAdvancedSuite({
64
73
  channelId: TEXT_CHANNEL_ID,
65
74
  channelName: 'qa-question-e2e',
@@ -67,8 +76,12 @@ describe('queue advanced: question tool text answer', () => {
67
76
  username: 'queue-question-tester',
68
77
  })
69
78
 
79
+ afterEach(() => {
80
+ setDeterministicTranscription(null)
81
+ })
82
+
70
83
  test(
71
- 'user text message answers pending question without sending duplicate prompt',
84
+ 'user text message dismisses pending question and enqueues as normal prompt',
72
85
  async () => {
73
86
  await ctx.discord.channel(TEXT_CHANNEL_ID).user(TEST_USER_ID).sendMessage({
74
87
  content: 'QUESTION_TEXT_ANSWER_MARKER',
@@ -100,35 +113,19 @@ describe('queue advanced: question tool text answer', () => {
100
113
 
101
114
  // User sends a text message while question is pending.
102
115
  // This should:
103
- // 1. Answer the question via cancelPendingQuestion (consumed as answer)
104
- // 2. NOT also send as a new promptAsync (the fix)
105
- // 3. Clean up the pending question context
116
+ // 1. Dismiss the pending question (cleanup context)
117
+ // 2. Abort the blocked session so OpenCode unblocks
118
+ // 3. Enqueue the message as a normal user prompt (not consumed as answer)
106
119
  await th.user(TEST_USER_ID).sendMessage({
107
120
  content: 'my text answer',
108
121
  })
109
122
 
110
- // Pending question context should be cleaned up after answer
123
+ // Pending question context should be cleaned up
111
124
  await waitForNoPendingQuestion({
112
125
  threadId: thread.id,
113
126
  timeoutMs: 4_000,
114
127
  })
115
128
 
116
- // Wait for second question dropdown (from question-answer followup —
117
- // OpenCode calls LLM again with same prompt after question tool completes,
118
- // deterministic matcher fires question tool again). This is expected.
119
- // Poll for it instead of sleeping.
120
- const start = Date.now()
121
- while (Date.now() - start < 4_000) {
122
- const msgs = await th.getMessages()
123
- const questionMsgs = msgs.filter((m) => {
124
- return m.content.includes('Which option do you prefer?')
125
- })
126
- if (questionMsgs.length >= 2) {
127
- break
128
- }
129
- await new Promise<void>((r) => { setTimeout(r, 50) })
130
- }
131
-
132
129
  const timeline = await th.text({ showInteractions: true })
133
130
  expect(timeline).toMatchInlineSnapshot(`
134
131
  "--- from: user (queue-question-tester)
@@ -137,21 +134,118 @@ describe('queue advanced: question tool text answer', () => {
137
134
  **Pick one**
138
135
  Which option do you prefer?
139
136
  --- from: user (queue-question-tester)
140
- my text answer
141
- --- from: assistant (TestBot)
142
- **Pick one**
143
- Which option do you prefer?"
137
+ my text answer"
144
138
  `)
145
139
 
146
- // The user's "my text answer" message must appear in the thread
140
+ // The user's message must appear in Discord
147
141
  expect(timeline).toContain('my text answer')
148
142
 
149
- // Key regression assertion: without the fix, the user's text message
150
- // is ALSO sent as a duplicate promptAsync which triggers a THIRD question
151
- // dropdown. With the fix, only 2 dropdowns appear (initial + followup
152
- // from question answer). Count occurrences of "Which option do you prefer?"
143
+ // Only 1 question dropdown text message was consumed as the answer,
144
+ // no duplicate prompt was sent (which would trigger a second dropdown).
153
145
  const questionCount = (timeline.match(/Which option do you prefer\?/g) || []).length
154
- expect(questionCount).toBe(2)
146
+ expect(questionCount).toBe(1)
147
+ },
148
+ 20_000,
149
+ )
150
+
151
+ })
152
+
153
+ describe('queue advanced: voice message during pending question', () => {
154
+ const ctx = setupQueueAdvancedSuite({
155
+ channelId: VOICE_CHANNEL_ID,
156
+ channelName: 'qa-question-voice-e2e',
157
+ dirName: 'qa-question-voice-e2e',
158
+ username: 'queue-question-tester',
159
+ })
160
+
161
+ afterEach(() => {
162
+ setDeterministicTranscription(null)
163
+ })
164
+
165
+ test(
166
+ 'voice message during pending question dismisses question and transcribes normally',
167
+ async () => {
168
+ // This is the exact bug scenario: user sends a voice message while a
169
+ // question dropdown is pending. Voice messages have empty message.content
170
+ // (audio is in attachments, transcription happens later). The old code
171
+ // passed "" as the question answer and consumed the message — the voice
172
+ // content was completely lost.
173
+ await ctx.discord.channel(VOICE_CHANNEL_ID).user(TEST_USER_ID).sendMessage({
174
+ content: 'QUESTION_TEXT_ANSWER_MARKER',
175
+ })
176
+
177
+ const thread = await ctx.discord.channel(VOICE_CHANNEL_ID).waitForThread({
178
+ timeout: 4_000,
179
+ predicate: (t) => {
180
+ return t.name === 'QUESTION_TEXT_ANSWER_MARKER'
181
+ },
182
+ })
183
+
184
+ const th = ctx.discord.thread(thread.id)
185
+
186
+ // Wait for the question dropdown to appear
187
+ await waitForPendingQuestion({
188
+ threadId: thread.id,
189
+ timeoutMs: 4_000,
190
+ })
191
+
192
+ await waitForBotMessageContaining({
193
+ discord: ctx.discord,
194
+ threadId: thread.id,
195
+ text: 'Which option do you prefer?',
196
+ timeout: 4_000,
197
+ })
198
+
199
+ // Send a voice message while the question is pending.
200
+ // message.content is "" for voice messages — only the attachment exists.
201
+ setDeterministicTranscription({
202
+ transcription: 'I want option Alpha please',
203
+ queueMessage: false,
204
+ })
205
+
206
+ await th.user(TEST_USER_ID).sendVoiceMessage()
207
+
208
+ // Question context should be cleaned up (empty reply sent to unblock OpenCode)
209
+ await waitForNoPendingQuestion({
210
+ threadId: thread.id,
211
+ timeoutMs: 4_000,
212
+ })
213
+
214
+ // Voice content should be transcribed and appear as the next user message,
215
+ // processed after the model responds to the empty question answer.
216
+ await waitForBotMessageContaining({
217
+ discord: ctx.discord,
218
+ threadId: thread.id,
219
+ text: 'I want option Alpha please',
220
+ timeout: 4_000,
221
+ })
222
+
223
+ await waitForFooterMessage({
224
+ discord: ctx.discord,
225
+ threadId: thread.id,
226
+ timeout: 4_000,
227
+ afterMessageIncludes: 'I want option Alpha please',
228
+ afterAuthorId: ctx.discord.botUserId,
229
+ })
230
+
231
+ const timeline = await th.text({ showInteractions: true })
232
+ expect(timeline).toMatchInlineSnapshot(`
233
+ "--- from: user (queue-question-tester)
234
+ QUESTION_TEXT_ANSWER_MARKER
235
+ --- from: assistant (TestBot)
236
+ **Pick one**
237
+ Which option do you prefer?
238
+ --- from: user (queue-question-tester)
239
+ [attachment: voice-message.ogg]
240
+ --- from: assistant (TestBot)
241
+ 🎤 Transcribing voice message...
242
+ 📝 **Transcribed message:** I want option Alpha please
243
+ ⬥ ok
244
+ *project ⋅ main ⋅ Ns ⋅ N% ⋅ deterministic-v2*"
245
+ `)
246
+
247
+ // Voice content must be present as a real transcribed message, not lost
248
+ expect(timeline).toContain('I want option Alpha please')
155
249
  },
156
250
  20_000,
157
251
  )
@@ -3510,13 +3510,20 @@ export class ThreadSessionRuntime {
3510
3510
  if (input.command) {
3511
3511
  const queuedCommand = input.command
3512
3512
  const commandSignal = AbortSignal.timeout(30_000)
3513
+ // session.command() only accepts FilePart in parts, not text parts.
3514
+ // Append <discord-user /> tag to arguments so external sync can
3515
+ // detect this message came from Discord (same tag as promptAsync).
3516
+ const discordTag = input.username
3517
+ ? `\n<discord-user name="${input.username}" />`
3518
+ : ''
3513
3519
  const commandResponse = await errore.tryAsync(() => {
3514
3520
  return getClient().session.command(
3515
3521
  {
3516
3522
  sessionID: session.id,
3523
+
3517
3524
  directory: this.sdkDirectory,
3518
3525
  command: queuedCommand.name,
3519
- arguments: queuedCommand.arguments,
3526
+ arguments: queuedCommand.arguments + discordTag,
3520
3527
  agent: earlyAgentPreference,
3521
3528
  ...variantField,
3522
3529
  },