kimaki 0.4.84 → 0.4.86
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/ask-question.js +22 -8
- package/dist/discord-bot.js +29 -9
- package/dist/external-opencode-sync.js +11 -11
- package/dist/queue-advanced-question.e2e.test.js +108 -34
- package/dist/session-handler/thread-session-runtime.js +7 -1
- package/package.json +3 -3
- package/src/commands/ask-question.ts +23 -8
- package/src/discord-bot.ts +30 -9
- package/src/external-opencode-sync.ts +11 -13
- package/src/queue-advanced-question.e2e.test.ts +129 -35
- package/src/session-handler/thread-session-runtime.ts +8 -1
|
@@ -11,6 +11,11 @@ const logger = createLogger(LogPrefix.ASK_QUESTION);
|
|
|
11
11
|
// TTL prevents unbounded growth if user never answers a question.
|
|
12
12
|
const QUESTION_CONTEXT_TTL_MS = 10 * 60 * 1000;
|
|
13
13
|
export const pendingQuestionContexts = new Map();
|
|
14
|
+
export function hasPendingQuestionForThread(threadId) {
|
|
15
|
+
return [...pendingQuestionContexts.values()].some((ctx) => {
|
|
16
|
+
return ctx.thread.id === threadId;
|
|
17
|
+
});
|
|
18
|
+
}
|
|
14
19
|
/**
|
|
15
20
|
* Show dropdown menus for question tool input.
|
|
16
21
|
* Sends one message per question with the dropdown directly under the question text.
|
|
@@ -205,13 +210,21 @@ export function parseAskUserQuestionTool(part) {
|
|
|
205
210
|
return input;
|
|
206
211
|
}
|
|
207
212
|
/**
|
|
208
|
-
* Cancel a pending question for a thread
|
|
209
|
-
*
|
|
213
|
+
* Cancel a pending question for a thread.
|
|
214
|
+
*
|
|
215
|
+
* Two modes depending on whether `userMessage` is provided:
|
|
216
|
+
*
|
|
217
|
+
* - `cancelPendingQuestion(threadId)` — cleanup only. Removes the context
|
|
218
|
+
* without replying to OpenCode. Use when aborting the blocked session
|
|
219
|
+
* separately (e.g. voice/attachment messages whose content needs
|
|
220
|
+
* transcription first). Returns 'no-pending' in both "found+cleaned" and
|
|
221
|
+
* "nothing found" cases.
|
|
210
222
|
*
|
|
211
|
-
*
|
|
212
|
-
*
|
|
213
|
-
*
|
|
214
|
-
*
|
|
223
|
+
* - `cancelPendingQuestion(threadId, text)` — reply path. Sends the text as
|
|
224
|
+
* the tool answer so the model sees the user's response. The caller should
|
|
225
|
+
* NOT also enqueue the message as a new prompt.
|
|
226
|
+
* Returns 'replied' on success, 'reply-failed' if the reply call fails
|
|
227
|
+
* (context kept pending so TTL can retry).
|
|
215
228
|
*/
|
|
216
229
|
export async function cancelPendingQuestion(threadId, userMessage) {
|
|
217
230
|
// Find pending question for this thread
|
|
@@ -228,8 +241,9 @@ export async function cancelPendingQuestion(threadId, userMessage) {
|
|
|
228
241
|
return 'no-pending';
|
|
229
242
|
}
|
|
230
243
|
// undefined means teardown/cleanup — just remove context, don't reply.
|
|
231
|
-
// The session is already being torn down
|
|
232
|
-
//
|
|
244
|
+
// The session is already being torn down or the caller wants to dismiss
|
|
245
|
+
// the question without providing an answer (e.g. voice/attachment-only
|
|
246
|
+
// messages where content needs transcription before it can be an answer).
|
|
233
247
|
if (userMessage === undefined) {
|
|
234
248
|
pendingQuestionContexts.delete(contextHash);
|
|
235
249
|
return 'no-pending';
|
package/dist/discord-bot.js
CHANGED
|
@@ -12,7 +12,7 @@ import { getTextAttachments, resolveMentions, } from './message-formatting.js';
|
|
|
12
12
|
import { isVoiceAttachment } from './voice-attachment.js';
|
|
13
13
|
import { preprocessExistingThreadMessage, preprocessNewThreadMessage, } from './message-preprocessing.js';
|
|
14
14
|
import { cancelPendingActionButtons } from './commands/action-buttons.js';
|
|
15
|
-
import { cancelPendingQuestion } from './commands/ask-question.js';
|
|
15
|
+
import { cancelPendingQuestion, hasPendingQuestionForThread } from './commands/ask-question.js';
|
|
16
16
|
import { cancelPendingFileUpload } from './commands/file-upload.js';
|
|
17
17
|
import { cancelPendingPermission } from './commands/permissions.js';
|
|
18
18
|
import { cancelHtmlActionsForThread } from './html-actions.js';
|
|
@@ -185,7 +185,11 @@ export async function startDiscordBot({ token, appId, discordClient, useWorktree
|
|
|
185
185
|
await setupHandlers(discordClient);
|
|
186
186
|
}
|
|
187
187
|
else {
|
|
188
|
-
discordClient.once(Events.ClientReady,
|
|
188
|
+
discordClient.once(Events.ClientReady, (readyClient) => {
|
|
189
|
+
void setupHandlers(readyClient).catch((error) => {
|
|
190
|
+
discordLogger.error(`[GATEWAY] ClientReady handler failed: ${formatErrorWithStack(error)}`);
|
|
191
|
+
});
|
|
192
|
+
});
|
|
189
193
|
}
|
|
190
194
|
discordClient.on(Events.Error, (error) => {
|
|
191
195
|
discordLogger.error('[GATEWAY] Client error:', formatErrorWithStack(error));
|
|
@@ -434,9 +438,6 @@ export async function startDiscordBot({ token, appId, discordClient, useWorktree
|
|
|
434
438
|
appId: currentAppId,
|
|
435
439
|
});
|
|
436
440
|
// Cancel interactive UI when a real user sends a message.
|
|
437
|
-
// If a question was pending and answered with the user's text,
|
|
438
|
-
// early-return: the message was consumed as the question answer
|
|
439
|
-
// and must NOT also be sent as a new prompt (causes abort loops).
|
|
440
441
|
if (!message.author.bot && !isCliInjectedPrompt) {
|
|
441
442
|
cancelPendingActionButtons(thread.id);
|
|
442
443
|
cancelHtmlActionsForThread(thread.id);
|
|
@@ -446,11 +447,30 @@ export async function startDiscordBot({ token, appId, discordClient, useWorktree
|
|
|
446
447
|
reason: 'user sent a new message while permission was pending',
|
|
447
448
|
});
|
|
448
449
|
}
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
450
|
+
// For text messages: pass the content as the question answer so the
|
|
451
|
+
// model sees the user's response. The early return prevents the message
|
|
452
|
+
// from also being sent as a new prompt (duplicate).
|
|
453
|
+
// For voice/image messages: message.content is "" (audio is in
|
|
454
|
+
// attachments, transcription happens later). Passing "" as the answer
|
|
455
|
+
// loses the content entirely. Instead, reply with "" to properly
|
|
456
|
+
// unblock OpenCode's question.waitForReply (without a reply the next
|
|
457
|
+
// promptAsync immediately fails with MessageAbortedError), then let
|
|
458
|
+
// the voice message flow through normal preprocessing — it gets
|
|
459
|
+
// transcribed and queued as the next user message after the model
|
|
460
|
+
// finishes responding to the empty answer.
|
|
461
|
+
if (message.content.trim().length > 0) {
|
|
462
|
+
const questionResult = await cancelPendingQuestion(thread.id, message.content);
|
|
463
|
+
if (questionResult === 'replied') {
|
|
464
|
+
void cancelPendingFileUpload(thread.id);
|
|
465
|
+
return;
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
else if (hasPendingQuestionForThread(thread.id)) {
|
|
469
|
+
// Reply empty to unblock the question tool — no early return so
|
|
470
|
+
// the voice/image message continues through to enqueueIncoming.
|
|
471
|
+
await cancelPendingQuestion(thread.id, '');
|
|
453
472
|
}
|
|
473
|
+
void cancelPendingFileUpload(thread.id);
|
|
454
474
|
}
|
|
455
475
|
// Expensive pre-processing (voice transcription, context fetch,
|
|
456
476
|
// attachment download) runs inside the runtime's serialized
|
|
@@ -19,7 +19,7 @@ function isSyntheticTextPart(part) {
|
|
|
19
19
|
return candidate.synthetic === true;
|
|
20
20
|
}
|
|
21
21
|
function parseDiscordOriginMetadata(text) {
|
|
22
|
-
const match = text.match(
|
|
22
|
+
const match = text.match(/<discord-user\s+([^>]+)\s*\/>/);
|
|
23
23
|
if (!match?.[1]) {
|
|
24
24
|
return null;
|
|
25
25
|
}
|
|
@@ -42,17 +42,17 @@ function parseDiscordOriginMetadata(text) {
|
|
|
42
42
|
};
|
|
43
43
|
}
|
|
44
44
|
function getDiscordOriginMetadataFromMessage({ message, }) {
|
|
45
|
-
const
|
|
46
|
-
|
|
47
|
-
return [];
|
|
48
|
-
}
|
|
49
|
-
if (!isSyntheticTextPart(part)) {
|
|
50
|
-
return [];
|
|
51
|
-
}
|
|
52
|
-
return [part.text || ''];
|
|
45
|
+
const textParts = message.parts.filter((p) => {
|
|
46
|
+
return p.type === 'text';
|
|
53
47
|
});
|
|
54
|
-
|
|
55
|
-
|
|
48
|
+
// Synthetic parts first (normal promptAsync path), then non-synthetic
|
|
49
|
+
// (session.command() path where the tag is embedded in arguments text).
|
|
50
|
+
const sorted = [
|
|
51
|
+
...textParts.filter((p) => { return isSyntheticTextPart(p); }),
|
|
52
|
+
...textParts.filter((p) => { return !isSyntheticTextPart(p); }),
|
|
53
|
+
];
|
|
54
|
+
for (const part of sorted) {
|
|
55
|
+
const metadata = parseDiscordOriginMetadata(part.text || '');
|
|
56
56
|
if (metadata) {
|
|
57
57
|
return metadata;
|
|
58
58
|
}
|
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
// E2e test for question tool: user text message during pending question should
|
|
2
|
-
//
|
|
3
|
-
//
|
|
4
|
-
|
|
2
|
+
// dismiss the question (abort), then enqueue as a normal user prompt.
|
|
3
|
+
// The user's message must appear as a real user message in the thread, not
|
|
4
|
+
// get consumed as a tool result answer (which lost voice/image content).
|
|
5
|
+
import { describe, test, expect, afterEach } from 'vitest';
|
|
5
6
|
import { setupQueueAdvancedSuite, TEST_USER_ID, } from './queue-advanced-e2e-setup.js';
|
|
6
7
|
import { waitForBotMessageContaining, waitForFooterMessage, } from './test-utils.js';
|
|
7
8
|
import { pendingQuestionContexts } from './commands/ask-question.js';
|
|
9
|
+
import { store } from './store.js';
|
|
8
10
|
const TEXT_CHANNEL_ID = '200000000000001007';
|
|
11
|
+
const VOICE_CHANNEL_ID = '200000000000001017';
|
|
9
12
|
async function waitForPendingQuestion({ threadId, timeoutMs, }) {
|
|
10
13
|
const start = Date.now();
|
|
11
14
|
while (Date.now() - start < timeoutMs) {
|
|
@@ -36,14 +39,22 @@ async function waitForNoPendingQuestion({ threadId, timeoutMs, }) {
|
|
|
36
39
|
}
|
|
37
40
|
throw new Error('Timed out waiting for question context cleanup');
|
|
38
41
|
}
|
|
39
|
-
|
|
42
|
+
function setDeterministicTranscription(config) {
|
|
43
|
+
store.setState({
|
|
44
|
+
test: { deterministicTranscription: config },
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
describe('queue advanced: question tool answer', () => {
|
|
40
48
|
const ctx = setupQueueAdvancedSuite({
|
|
41
49
|
channelId: TEXT_CHANNEL_ID,
|
|
42
50
|
channelName: 'qa-question-e2e',
|
|
43
51
|
dirName: 'qa-question-e2e',
|
|
44
52
|
username: 'queue-question-tester',
|
|
45
53
|
});
|
|
46
|
-
|
|
54
|
+
afterEach(() => {
|
|
55
|
+
setDeterministicTranscription(null);
|
|
56
|
+
});
|
|
57
|
+
test('user text message dismisses pending question and enqueues as normal prompt', async () => {
|
|
47
58
|
await ctx.discord.channel(TEXT_CHANNEL_ID).user(TEST_USER_ID).sendMessage({
|
|
48
59
|
content: 'QUESTION_TEXT_ANSWER_MARKER',
|
|
49
60
|
});
|
|
@@ -69,32 +80,17 @@ describe('queue advanced: question tool text answer', () => {
|
|
|
69
80
|
});
|
|
70
81
|
// User sends a text message while question is pending.
|
|
71
82
|
// This should:
|
|
72
|
-
// 1.
|
|
73
|
-
// 2.
|
|
74
|
-
// 3.
|
|
83
|
+
// 1. Dismiss the pending question (cleanup context)
|
|
84
|
+
// 2. Abort the blocked session so OpenCode unblocks
|
|
85
|
+
// 3. Enqueue the message as a normal user prompt (not consumed as answer)
|
|
75
86
|
await th.user(TEST_USER_ID).sendMessage({
|
|
76
87
|
content: 'my text answer',
|
|
77
88
|
});
|
|
78
|
-
// Pending question context should be cleaned up
|
|
89
|
+
// Pending question context should be cleaned up
|
|
79
90
|
await waitForNoPendingQuestion({
|
|
80
91
|
threadId: thread.id,
|
|
81
92
|
timeoutMs: 4_000,
|
|
82
93
|
});
|
|
83
|
-
// Wait for second question dropdown (from question-answer followup —
|
|
84
|
-
// OpenCode calls LLM again with same prompt after question tool completes,
|
|
85
|
-
// deterministic matcher fires question tool again). This is expected.
|
|
86
|
-
// Poll for it instead of sleeping.
|
|
87
|
-
const start = Date.now();
|
|
88
|
-
while (Date.now() - start < 4_000) {
|
|
89
|
-
const msgs = await th.getMessages();
|
|
90
|
-
const questionMsgs = msgs.filter((m) => {
|
|
91
|
-
return m.content.includes('Which option do you prefer?');
|
|
92
|
-
});
|
|
93
|
-
if (questionMsgs.length >= 2) {
|
|
94
|
-
break;
|
|
95
|
-
}
|
|
96
|
-
await new Promise((r) => { setTimeout(r, 50); });
|
|
97
|
-
}
|
|
98
94
|
const timeline = await th.text({ showInteractions: true });
|
|
99
95
|
expect(timeline).toMatchInlineSnapshot(`
|
|
100
96
|
"--- from: user (queue-question-tester)
|
|
@@ -103,18 +99,96 @@ describe('queue advanced: question tool text answer', () => {
|
|
|
103
99
|
**Pick one**
|
|
104
100
|
Which option do you prefer?
|
|
105
101
|
--- from: user (queue-question-tester)
|
|
106
|
-
my text answer
|
|
107
|
-
--- from: assistant (TestBot)
|
|
108
|
-
**Pick one**
|
|
109
|
-
Which option do you prefer?"
|
|
102
|
+
my text answer"
|
|
110
103
|
`);
|
|
111
|
-
// The user's
|
|
104
|
+
// The user's message must appear in Discord
|
|
112
105
|
expect(timeline).toContain('my text answer');
|
|
113
|
-
//
|
|
114
|
-
//
|
|
115
|
-
// dropdown. With the fix, only 2 dropdowns appear (initial + followup
|
|
116
|
-
// from question answer). Count occurrences of "Which option do you prefer?"
|
|
106
|
+
// Only 1 question dropdown — text message was consumed as the answer,
|
|
107
|
+
// no duplicate prompt was sent (which would trigger a second dropdown).
|
|
117
108
|
const questionCount = (timeline.match(/Which option do you prefer\?/g) || []).length;
|
|
118
|
-
expect(questionCount).toBe(
|
|
109
|
+
expect(questionCount).toBe(1);
|
|
110
|
+
}, 20_000);
|
|
111
|
+
});
|
|
112
|
+
describe('queue advanced: voice message during pending question', () => {
|
|
113
|
+
const ctx = setupQueueAdvancedSuite({
|
|
114
|
+
channelId: VOICE_CHANNEL_ID,
|
|
115
|
+
channelName: 'qa-question-voice-e2e',
|
|
116
|
+
dirName: 'qa-question-voice-e2e',
|
|
117
|
+
username: 'queue-question-tester',
|
|
118
|
+
});
|
|
119
|
+
afterEach(() => {
|
|
120
|
+
setDeterministicTranscription(null);
|
|
121
|
+
});
|
|
122
|
+
test('voice message during pending question dismisses question and transcribes normally', async () => {
|
|
123
|
+
// This is the exact bug scenario: user sends a voice message while a
|
|
124
|
+
// question dropdown is pending. Voice messages have empty message.content
|
|
125
|
+
// (audio is in attachments, transcription happens later). The old code
|
|
126
|
+
// passed "" as the question answer and consumed the message — the voice
|
|
127
|
+
// content was completely lost.
|
|
128
|
+
await ctx.discord.channel(VOICE_CHANNEL_ID).user(TEST_USER_ID).sendMessage({
|
|
129
|
+
content: 'QUESTION_TEXT_ANSWER_MARKER',
|
|
130
|
+
});
|
|
131
|
+
const thread = await ctx.discord.channel(VOICE_CHANNEL_ID).waitForThread({
|
|
132
|
+
timeout: 4_000,
|
|
133
|
+
predicate: (t) => {
|
|
134
|
+
return t.name === 'QUESTION_TEXT_ANSWER_MARKER';
|
|
135
|
+
},
|
|
136
|
+
});
|
|
137
|
+
const th = ctx.discord.thread(thread.id);
|
|
138
|
+
// Wait for the question dropdown to appear
|
|
139
|
+
await waitForPendingQuestion({
|
|
140
|
+
threadId: thread.id,
|
|
141
|
+
timeoutMs: 4_000,
|
|
142
|
+
});
|
|
143
|
+
await waitForBotMessageContaining({
|
|
144
|
+
discord: ctx.discord,
|
|
145
|
+
threadId: thread.id,
|
|
146
|
+
text: 'Which option do you prefer?',
|
|
147
|
+
timeout: 4_000,
|
|
148
|
+
});
|
|
149
|
+
// Send a voice message while the question is pending.
|
|
150
|
+
// message.content is "" for voice messages — only the attachment exists.
|
|
151
|
+
setDeterministicTranscription({
|
|
152
|
+
transcription: 'I want option Alpha please',
|
|
153
|
+
queueMessage: false,
|
|
154
|
+
});
|
|
155
|
+
await th.user(TEST_USER_ID).sendVoiceMessage();
|
|
156
|
+
// Question context should be cleaned up (empty reply sent to unblock OpenCode)
|
|
157
|
+
await waitForNoPendingQuestion({
|
|
158
|
+
threadId: thread.id,
|
|
159
|
+
timeoutMs: 4_000,
|
|
160
|
+
});
|
|
161
|
+
// Voice content should be transcribed and appear as the next user message,
|
|
162
|
+
// processed after the model responds to the empty question answer.
|
|
163
|
+
await waitForBotMessageContaining({
|
|
164
|
+
discord: ctx.discord,
|
|
165
|
+
threadId: thread.id,
|
|
166
|
+
text: 'I want option Alpha please',
|
|
167
|
+
timeout: 4_000,
|
|
168
|
+
});
|
|
169
|
+
await waitForFooterMessage({
|
|
170
|
+
discord: ctx.discord,
|
|
171
|
+
threadId: thread.id,
|
|
172
|
+
timeout: 4_000,
|
|
173
|
+
afterMessageIncludes: 'I want option Alpha please',
|
|
174
|
+
afterAuthorId: ctx.discord.botUserId,
|
|
175
|
+
});
|
|
176
|
+
const timeline = await th.text({ showInteractions: true });
|
|
177
|
+
expect(timeline).toMatchInlineSnapshot(`
|
|
178
|
+
"--- from: user (queue-question-tester)
|
|
179
|
+
QUESTION_TEXT_ANSWER_MARKER
|
|
180
|
+
--- from: assistant (TestBot)
|
|
181
|
+
**Pick one**
|
|
182
|
+
Which option do you prefer?
|
|
183
|
+
--- from: user (queue-question-tester)
|
|
184
|
+
[attachment: voice-message.ogg]
|
|
185
|
+
--- from: assistant (TestBot)
|
|
186
|
+
🎤 Transcribing voice message...
|
|
187
|
+
📝 **Transcribed message:** I want option Alpha please
|
|
188
|
+
⬥ ok
|
|
189
|
+
*project ⋅ main ⋅ Ns ⋅ N% ⋅ deterministic-v2*"
|
|
190
|
+
`);
|
|
191
|
+
// Voice content must be present as a real transcribed message, not lost
|
|
192
|
+
expect(timeline).toContain('I want option Alpha please');
|
|
119
193
|
}, 20_000);
|
|
120
194
|
});
|
|
@@ -2667,12 +2667,18 @@ export class ThreadSessionRuntime {
|
|
|
2667
2667
|
if (input.command) {
|
|
2668
2668
|
const queuedCommand = input.command;
|
|
2669
2669
|
const commandSignal = AbortSignal.timeout(30_000);
|
|
2670
|
+
// session.command() only accepts FilePart in parts, not text parts.
|
|
2671
|
+
// Append <discord-user /> tag to arguments so external sync can
|
|
2672
|
+
// detect this message came from Discord (same tag as promptAsync).
|
|
2673
|
+
const discordTag = input.username
|
|
2674
|
+
? `\n<discord-user name="${input.username}" />`
|
|
2675
|
+
: '';
|
|
2670
2676
|
const commandResponse = await errore.tryAsync(() => {
|
|
2671
2677
|
return getClient().session.command({
|
|
2672
2678
|
sessionID: session.id,
|
|
2673
2679
|
directory: this.sdkDirectory,
|
|
2674
2680
|
command: queuedCommand.name,
|
|
2675
|
-
arguments: queuedCommand.arguments,
|
|
2681
|
+
arguments: queuedCommand.arguments + discordTag,
|
|
2676
2682
|
agent: earlyAgentPreference,
|
|
2677
2683
|
...variantField,
|
|
2678
2684
|
}, { signal: commandSignal });
|
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "kimaki",
|
|
3
3
|
"module": "index.ts",
|
|
4
4
|
"type": "module",
|
|
5
|
-
"version": "0.4.
|
|
5
|
+
"version": "0.4.86",
|
|
6
6
|
"repository": "https://github.com/remorses/kimaki",
|
|
7
7
|
"bin": "bin.js",
|
|
8
8
|
"files": [
|
|
@@ -67,8 +67,8 @@
|
|
|
67
67
|
"zod": "^4.3.6",
|
|
68
68
|
"zustand": "^5.0.11",
|
|
69
69
|
"errore": "^0.14.1",
|
|
70
|
-
"
|
|
71
|
-
"
|
|
70
|
+
"traforo": "^0.2.0",
|
|
71
|
+
"libsqlproxy": "^0.1.0"
|
|
72
72
|
},
|
|
73
73
|
"optionalDependencies": {
|
|
74
74
|
"@discordjs/opus": "^0.10.0",
|
|
@@ -49,6 +49,12 @@ type PendingQuestionContext = {
|
|
|
49
49
|
const QUESTION_CONTEXT_TTL_MS = 10 * 60 * 1000
|
|
50
50
|
export const pendingQuestionContexts = new Map<string, PendingQuestionContext>()
|
|
51
51
|
|
|
52
|
+
export function hasPendingQuestionForThread(threadId: string): boolean {
|
|
53
|
+
return [...pendingQuestionContexts.values()].some((ctx) => {
|
|
54
|
+
return ctx.thread.id === threadId
|
|
55
|
+
})
|
|
56
|
+
}
|
|
57
|
+
|
|
52
58
|
/**
|
|
53
59
|
* Show dropdown menus for question tool input.
|
|
54
60
|
* Sends one message per question with the dropdown directly under the question text.
|
|
@@ -311,13 +317,21 @@ export function parseAskUserQuestionTool(part: {
|
|
|
311
317
|
}
|
|
312
318
|
|
|
313
319
|
/**
|
|
314
|
-
* Cancel a pending question for a thread
|
|
315
|
-
*
|
|
320
|
+
* Cancel a pending question for a thread.
|
|
321
|
+
*
|
|
322
|
+
* Two modes depending on whether `userMessage` is provided:
|
|
323
|
+
*
|
|
324
|
+
* - `cancelPendingQuestion(threadId)` — cleanup only. Removes the context
|
|
325
|
+
* without replying to OpenCode. Use when aborting the blocked session
|
|
326
|
+
* separately (e.g. voice/attachment messages whose content needs
|
|
327
|
+
* transcription first). Returns 'no-pending' in both "found+cleaned" and
|
|
328
|
+
* "nothing found" cases.
|
|
316
329
|
*
|
|
317
|
-
*
|
|
318
|
-
*
|
|
319
|
-
*
|
|
320
|
-
*
|
|
330
|
+
* - `cancelPendingQuestion(threadId, text)` — reply path. Sends the text as
|
|
331
|
+
* the tool answer so the model sees the user's response. The caller should
|
|
332
|
+
* NOT also enqueue the message as a new prompt.
|
|
333
|
+
* Returns 'replied' on success, 'reply-failed' if the reply call fails
|
|
334
|
+
* (context kept pending so TTL can retry).
|
|
321
335
|
*/
|
|
322
336
|
export async function cancelPendingQuestion(
|
|
323
337
|
threadId: string,
|
|
@@ -339,8 +353,9 @@ export async function cancelPendingQuestion(
|
|
|
339
353
|
}
|
|
340
354
|
|
|
341
355
|
// undefined means teardown/cleanup — just remove context, don't reply.
|
|
342
|
-
// The session is already being torn down
|
|
343
|
-
//
|
|
356
|
+
// The session is already being torn down or the caller wants to dismiss
|
|
357
|
+
// the question without providing an answer (e.g. voice/attachment-only
|
|
358
|
+
// messages where content needs transcription before it can be an answer).
|
|
344
359
|
if (userMessage === undefined) {
|
|
345
360
|
pendingQuestionContexts.delete(contextHash)
|
|
346
361
|
return 'no-pending'
|
package/src/discord-bot.ts
CHANGED
|
@@ -46,7 +46,7 @@ import {
|
|
|
46
46
|
preprocessNewThreadMessage,
|
|
47
47
|
} from './message-preprocessing.js'
|
|
48
48
|
import { cancelPendingActionButtons } from './commands/action-buttons.js'
|
|
49
|
-
import { cancelPendingQuestion,
|
|
49
|
+
import { cancelPendingQuestion, hasPendingQuestionForThread } from './commands/ask-question.js'
|
|
50
50
|
import { cancelPendingFileUpload } from './commands/file-upload.js'
|
|
51
51
|
import { cancelPendingPermission } from './commands/permissions.js'
|
|
52
52
|
import { cancelHtmlActionsForThread } from './html-actions.js'
|
|
@@ -316,7 +316,13 @@ export async function startDiscordBot({
|
|
|
316
316
|
if (discordClient.isReady()) {
|
|
317
317
|
await setupHandlers(discordClient)
|
|
318
318
|
} else {
|
|
319
|
-
discordClient.once(Events.ClientReady,
|
|
319
|
+
discordClient.once(Events.ClientReady, (readyClient) => {
|
|
320
|
+
void setupHandlers(readyClient).catch((error) => {
|
|
321
|
+
discordLogger.error(
|
|
322
|
+
`[GATEWAY] ClientReady handler failed: ${formatErrorWithStack(error)}`,
|
|
323
|
+
)
|
|
324
|
+
})
|
|
325
|
+
})
|
|
320
326
|
}
|
|
321
327
|
|
|
322
328
|
discordClient.on(Events.Error, (error) => {
|
|
@@ -624,9 +630,6 @@ export async function startDiscordBot({
|
|
|
624
630
|
})
|
|
625
631
|
|
|
626
632
|
// Cancel interactive UI when a real user sends a message.
|
|
627
|
-
// If a question was pending and answered with the user's text,
|
|
628
|
-
// early-return: the message was consumed as the question answer
|
|
629
|
-
// and must NOT also be sent as a new prompt (causes abort loops).
|
|
630
633
|
if (!message.author.bot && !isCliInjectedPrompt) {
|
|
631
634
|
cancelPendingActionButtons(thread.id)
|
|
632
635
|
cancelHtmlActionsForThread(thread.id)
|
|
@@ -636,11 +639,29 @@ export async function startDiscordBot({
|
|
|
636
639
|
reason: 'user sent a new message while permission was pending',
|
|
637
640
|
})
|
|
638
641
|
}
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
642
|
+
// For text messages: pass the content as the question answer so the
|
|
643
|
+
// model sees the user's response. The early return prevents the message
|
|
644
|
+
// from also being sent as a new prompt (duplicate).
|
|
645
|
+
// For voice/image messages: message.content is "" (audio is in
|
|
646
|
+
// attachments, transcription happens later). Passing "" as the answer
|
|
647
|
+
// loses the content entirely. Instead, reply with "" to properly
|
|
648
|
+
// unblock OpenCode's question.waitForReply (without a reply the next
|
|
649
|
+
// promptAsync immediately fails with MessageAbortedError), then let
|
|
650
|
+
// the voice message flow through normal preprocessing — it gets
|
|
651
|
+
// transcribed and queued as the next user message after the model
|
|
652
|
+
// finishes responding to the empty answer.
|
|
653
|
+
if (message.content.trim().length > 0) {
|
|
654
|
+
const questionResult = await cancelPendingQuestion(thread.id, message.content)
|
|
655
|
+
if (questionResult === 'replied') {
|
|
656
|
+
void cancelPendingFileUpload(thread.id)
|
|
657
|
+
return
|
|
658
|
+
}
|
|
659
|
+
} else if (hasPendingQuestionForThread(thread.id)) {
|
|
660
|
+
// Reply empty to unblock the question tool — no early return so
|
|
661
|
+
// the voice/image message continues through to enqueueIncoming.
|
|
662
|
+
await cancelPendingQuestion(thread.id, '')
|
|
643
663
|
}
|
|
664
|
+
void cancelPendingFileUpload(thread.id)
|
|
644
665
|
}
|
|
645
666
|
|
|
646
667
|
// Expensive pre-processing (voice transcription, context fetch,
|
|
@@ -86,7 +86,7 @@ function isSyntheticTextPart(part: Extract<Part, { type: 'text' }>): boolean {
|
|
|
86
86
|
}
|
|
87
87
|
|
|
88
88
|
function parseDiscordOriginMetadata(text: string): DiscordOriginMetadata | null {
|
|
89
|
-
const match = text.match(
|
|
89
|
+
const match = text.match(/<discord-user\s+([^>]+)\s*\/>/)
|
|
90
90
|
if (!match?.[1]) {
|
|
91
91
|
return null
|
|
92
92
|
}
|
|
@@ -117,23 +117,21 @@ function getDiscordOriginMetadataFromMessage({
|
|
|
117
117
|
}: {
|
|
118
118
|
message: SessionMessageLike
|
|
119
119
|
}): DiscordOriginMetadata | null {
|
|
120
|
-
const
|
|
121
|
-
|
|
122
|
-
return [] as string[]
|
|
123
|
-
}
|
|
124
|
-
if (!isSyntheticTextPart(part)) {
|
|
125
|
-
return [] as string[]
|
|
126
|
-
}
|
|
127
|
-
return [part.text || '']
|
|
120
|
+
const textParts = message.parts.filter((p): p is Extract<typeof p, { type: 'text' }> => {
|
|
121
|
+
return p.type === 'text'
|
|
128
122
|
})
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
123
|
+
// Synthetic parts first (normal promptAsync path), then non-synthetic
|
|
124
|
+
// (session.command() path where the tag is embedded in arguments text).
|
|
125
|
+
const sorted = [
|
|
126
|
+
...textParts.filter((p) => { return isSyntheticTextPart(p) }),
|
|
127
|
+
...textParts.filter((p) => { return !isSyntheticTextPart(p) }),
|
|
128
|
+
]
|
|
129
|
+
for (const part of sorted) {
|
|
130
|
+
const metadata = parseDiscordOriginMetadata(part.text || '')
|
|
132
131
|
if (metadata) {
|
|
133
132
|
return metadata
|
|
134
133
|
}
|
|
135
134
|
}
|
|
136
|
-
|
|
137
135
|
return null
|
|
138
136
|
}
|
|
139
137
|
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
// E2e test for question tool: user text message during pending question should
|
|
2
|
-
//
|
|
3
|
-
//
|
|
2
|
+
// dismiss the question (abort), then enqueue as a normal user prompt.
|
|
3
|
+
// The user's message must appear as a real user message in the thread, not
|
|
4
|
+
// get consumed as a tool result answer (which lost voice/image content).
|
|
4
5
|
|
|
5
|
-
import { describe, test, expect } from 'vitest'
|
|
6
|
+
import { describe, test, expect, afterEach } from 'vitest'
|
|
6
7
|
import {
|
|
7
8
|
setupQueueAdvancedSuite,
|
|
8
9
|
TEST_USER_ID,
|
|
@@ -12,8 +13,10 @@ import {
|
|
|
12
13
|
waitForFooterMessage,
|
|
13
14
|
} from './test-utils.js'
|
|
14
15
|
import { pendingQuestionContexts } from './commands/ask-question.js'
|
|
16
|
+
import { store, type DeterministicTranscriptionConfig } from './store.js'
|
|
15
17
|
|
|
16
18
|
const TEXT_CHANNEL_ID = '200000000000001007'
|
|
19
|
+
const VOICE_CHANNEL_ID = '200000000000001017'
|
|
17
20
|
|
|
18
21
|
async function waitForPendingQuestion({
|
|
19
22
|
threadId,
|
|
@@ -59,7 +62,13 @@ async function waitForNoPendingQuestion({
|
|
|
59
62
|
throw new Error('Timed out waiting for question context cleanup')
|
|
60
63
|
}
|
|
61
64
|
|
|
62
|
-
|
|
65
|
+
function setDeterministicTranscription(config: DeterministicTranscriptionConfig | null) {
|
|
66
|
+
store.setState({
|
|
67
|
+
test: { deterministicTranscription: config },
|
|
68
|
+
})
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
describe('queue advanced: question tool answer', () => {
|
|
63
72
|
const ctx = setupQueueAdvancedSuite({
|
|
64
73
|
channelId: TEXT_CHANNEL_ID,
|
|
65
74
|
channelName: 'qa-question-e2e',
|
|
@@ -67,8 +76,12 @@ describe('queue advanced: question tool text answer', () => {
|
|
|
67
76
|
username: 'queue-question-tester',
|
|
68
77
|
})
|
|
69
78
|
|
|
79
|
+
afterEach(() => {
|
|
80
|
+
setDeterministicTranscription(null)
|
|
81
|
+
})
|
|
82
|
+
|
|
70
83
|
test(
|
|
71
|
-
'user text message
|
|
84
|
+
'user text message dismisses pending question and enqueues as normal prompt',
|
|
72
85
|
async () => {
|
|
73
86
|
await ctx.discord.channel(TEXT_CHANNEL_ID).user(TEST_USER_ID).sendMessage({
|
|
74
87
|
content: 'QUESTION_TEXT_ANSWER_MARKER',
|
|
@@ -100,35 +113,19 @@ describe('queue advanced: question tool text answer', () => {
|
|
|
100
113
|
|
|
101
114
|
// User sends a text message while question is pending.
|
|
102
115
|
// This should:
|
|
103
|
-
// 1.
|
|
104
|
-
// 2.
|
|
105
|
-
// 3.
|
|
116
|
+
// 1. Dismiss the pending question (cleanup context)
|
|
117
|
+
// 2. Abort the blocked session so OpenCode unblocks
|
|
118
|
+
// 3. Enqueue the message as a normal user prompt (not consumed as answer)
|
|
106
119
|
await th.user(TEST_USER_ID).sendMessage({
|
|
107
120
|
content: 'my text answer',
|
|
108
121
|
})
|
|
109
122
|
|
|
110
|
-
// Pending question context should be cleaned up
|
|
123
|
+
// Pending question context should be cleaned up
|
|
111
124
|
await waitForNoPendingQuestion({
|
|
112
125
|
threadId: thread.id,
|
|
113
126
|
timeoutMs: 4_000,
|
|
114
127
|
})
|
|
115
128
|
|
|
116
|
-
// Wait for second question dropdown (from question-answer followup —
|
|
117
|
-
// OpenCode calls LLM again with same prompt after question tool completes,
|
|
118
|
-
// deterministic matcher fires question tool again). This is expected.
|
|
119
|
-
// Poll for it instead of sleeping.
|
|
120
|
-
const start = Date.now()
|
|
121
|
-
while (Date.now() - start < 4_000) {
|
|
122
|
-
const msgs = await th.getMessages()
|
|
123
|
-
const questionMsgs = msgs.filter((m) => {
|
|
124
|
-
return m.content.includes('Which option do you prefer?')
|
|
125
|
-
})
|
|
126
|
-
if (questionMsgs.length >= 2) {
|
|
127
|
-
break
|
|
128
|
-
}
|
|
129
|
-
await new Promise<void>((r) => { setTimeout(r, 50) })
|
|
130
|
-
}
|
|
131
|
-
|
|
132
129
|
const timeline = await th.text({ showInteractions: true })
|
|
133
130
|
expect(timeline).toMatchInlineSnapshot(`
|
|
134
131
|
"--- from: user (queue-question-tester)
|
|
@@ -137,21 +134,118 @@ describe('queue advanced: question tool text answer', () => {
|
|
|
137
134
|
**Pick one**
|
|
138
135
|
Which option do you prefer?
|
|
139
136
|
--- from: user (queue-question-tester)
|
|
140
|
-
my text answer
|
|
141
|
-
--- from: assistant (TestBot)
|
|
142
|
-
**Pick one**
|
|
143
|
-
Which option do you prefer?"
|
|
137
|
+
my text answer"
|
|
144
138
|
`)
|
|
145
139
|
|
|
146
|
-
// The user's
|
|
140
|
+
// The user's message must appear in Discord
|
|
147
141
|
expect(timeline).toContain('my text answer')
|
|
148
142
|
|
|
149
|
-
//
|
|
150
|
-
//
|
|
151
|
-
// dropdown. With the fix, only 2 dropdowns appear (initial + followup
|
|
152
|
-
// from question answer). Count occurrences of "Which option do you prefer?"
|
|
143
|
+
// Only 1 question dropdown — text message was consumed as the answer,
|
|
144
|
+
// no duplicate prompt was sent (which would trigger a second dropdown).
|
|
153
145
|
const questionCount = (timeline.match(/Which option do you prefer\?/g) || []).length
|
|
154
|
-
expect(questionCount).toBe(
|
|
146
|
+
expect(questionCount).toBe(1)
|
|
147
|
+
},
|
|
148
|
+
20_000,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
})
|
|
152
|
+
|
|
153
|
+
describe('queue advanced: voice message during pending question', () => {
|
|
154
|
+
const ctx = setupQueueAdvancedSuite({
|
|
155
|
+
channelId: VOICE_CHANNEL_ID,
|
|
156
|
+
channelName: 'qa-question-voice-e2e',
|
|
157
|
+
dirName: 'qa-question-voice-e2e',
|
|
158
|
+
username: 'queue-question-tester',
|
|
159
|
+
})
|
|
160
|
+
|
|
161
|
+
afterEach(() => {
|
|
162
|
+
setDeterministicTranscription(null)
|
|
163
|
+
})
|
|
164
|
+
|
|
165
|
+
test(
|
|
166
|
+
'voice message during pending question dismisses question and transcribes normally',
|
|
167
|
+
async () => {
|
|
168
|
+
// This is the exact bug scenario: user sends a voice message while a
|
|
169
|
+
// question dropdown is pending. Voice messages have empty message.content
|
|
170
|
+
// (audio is in attachments, transcription happens later). The old code
|
|
171
|
+
// passed "" as the question answer and consumed the message — the voice
|
|
172
|
+
// content was completely lost.
|
|
173
|
+
await ctx.discord.channel(VOICE_CHANNEL_ID).user(TEST_USER_ID).sendMessage({
|
|
174
|
+
content: 'QUESTION_TEXT_ANSWER_MARKER',
|
|
175
|
+
})
|
|
176
|
+
|
|
177
|
+
const thread = await ctx.discord.channel(VOICE_CHANNEL_ID).waitForThread({
|
|
178
|
+
timeout: 4_000,
|
|
179
|
+
predicate: (t) => {
|
|
180
|
+
return t.name === 'QUESTION_TEXT_ANSWER_MARKER'
|
|
181
|
+
},
|
|
182
|
+
})
|
|
183
|
+
|
|
184
|
+
const th = ctx.discord.thread(thread.id)
|
|
185
|
+
|
|
186
|
+
// Wait for the question dropdown to appear
|
|
187
|
+
await waitForPendingQuestion({
|
|
188
|
+
threadId: thread.id,
|
|
189
|
+
timeoutMs: 4_000,
|
|
190
|
+
})
|
|
191
|
+
|
|
192
|
+
await waitForBotMessageContaining({
|
|
193
|
+
discord: ctx.discord,
|
|
194
|
+
threadId: thread.id,
|
|
195
|
+
text: 'Which option do you prefer?',
|
|
196
|
+
timeout: 4_000,
|
|
197
|
+
})
|
|
198
|
+
|
|
199
|
+
// Send a voice message while the question is pending.
|
|
200
|
+
// message.content is "" for voice messages — only the attachment exists.
|
|
201
|
+
setDeterministicTranscription({
|
|
202
|
+
transcription: 'I want option Alpha please',
|
|
203
|
+
queueMessage: false,
|
|
204
|
+
})
|
|
205
|
+
|
|
206
|
+
await th.user(TEST_USER_ID).sendVoiceMessage()
|
|
207
|
+
|
|
208
|
+
// Question context should be cleaned up (empty reply sent to unblock OpenCode)
|
|
209
|
+
await waitForNoPendingQuestion({
|
|
210
|
+
threadId: thread.id,
|
|
211
|
+
timeoutMs: 4_000,
|
|
212
|
+
})
|
|
213
|
+
|
|
214
|
+
// Voice content should be transcribed and appear as the next user message,
|
|
215
|
+
// processed after the model responds to the empty question answer.
|
|
216
|
+
await waitForBotMessageContaining({
|
|
217
|
+
discord: ctx.discord,
|
|
218
|
+
threadId: thread.id,
|
|
219
|
+
text: 'I want option Alpha please',
|
|
220
|
+
timeout: 4_000,
|
|
221
|
+
})
|
|
222
|
+
|
|
223
|
+
await waitForFooterMessage({
|
|
224
|
+
discord: ctx.discord,
|
|
225
|
+
threadId: thread.id,
|
|
226
|
+
timeout: 4_000,
|
|
227
|
+
afterMessageIncludes: 'I want option Alpha please',
|
|
228
|
+
afterAuthorId: ctx.discord.botUserId,
|
|
229
|
+
})
|
|
230
|
+
|
|
231
|
+
const timeline = await th.text({ showInteractions: true })
|
|
232
|
+
expect(timeline).toMatchInlineSnapshot(`
|
|
233
|
+
"--- from: user (queue-question-tester)
|
|
234
|
+
QUESTION_TEXT_ANSWER_MARKER
|
|
235
|
+
--- from: assistant (TestBot)
|
|
236
|
+
**Pick one**
|
|
237
|
+
Which option do you prefer?
|
|
238
|
+
--- from: user (queue-question-tester)
|
|
239
|
+
[attachment: voice-message.ogg]
|
|
240
|
+
--- from: assistant (TestBot)
|
|
241
|
+
🎤 Transcribing voice message...
|
|
242
|
+
📝 **Transcribed message:** I want option Alpha please
|
|
243
|
+
⬥ ok
|
|
244
|
+
*project ⋅ main ⋅ Ns ⋅ N% ⋅ deterministic-v2*"
|
|
245
|
+
`)
|
|
246
|
+
|
|
247
|
+
// Voice content must be present as a real transcribed message, not lost
|
|
248
|
+
expect(timeline).toContain('I want option Alpha please')
|
|
155
249
|
},
|
|
156
250
|
20_000,
|
|
157
251
|
)
|
|
@@ -3510,13 +3510,20 @@ export class ThreadSessionRuntime {
|
|
|
3510
3510
|
if (input.command) {
|
|
3511
3511
|
const queuedCommand = input.command
|
|
3512
3512
|
const commandSignal = AbortSignal.timeout(30_000)
|
|
3513
|
+
// session.command() only accepts FilePart in parts, not text parts.
|
|
3514
|
+
// Append <discord-user /> tag to arguments so external sync can
|
|
3515
|
+
// detect this message came from Discord (same tag as promptAsync).
|
|
3516
|
+
const discordTag = input.username
|
|
3517
|
+
? `\n<discord-user name="${input.username}" />`
|
|
3518
|
+
: ''
|
|
3513
3519
|
const commandResponse = await errore.tryAsync(() => {
|
|
3514
3520
|
return getClient().session.command(
|
|
3515
3521
|
{
|
|
3516
3522
|
sessionID: session.id,
|
|
3523
|
+
|
|
3517
3524
|
directory: this.sdkDirectory,
|
|
3518
3525
|
command: queuedCommand.name,
|
|
3519
|
-
arguments: queuedCommand.arguments,
|
|
3526
|
+
arguments: queuedCommand.arguments + discordTag,
|
|
3520
3527
|
agent: earlyAgentPreference,
|
|
3521
3528
|
...variantField,
|
|
3522
3529
|
},
|