@openclaw/discord 2026.5.7 → 2026.5.10-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/dist/{access-CHY9FK3X.js → access-R4X7pYt4.js} +1 -1
  2. package/dist/action-runtime-api.js +1 -1
  3. package/dist/{allow-list-ek-1hMKN.js → allow-list-4wnZg5P9.js} +2 -6
  4. package/dist/api.js +18 -16
  5. package/dist/{approval-handler.runtime-BBZRYAGs.js → approval-handler.runtime-B5BCBw4D.js} +3 -4
  6. package/dist/{approval-native-CBdZsAR7.js → approval-native-CrCc3b4u.js} +2 -2
  7. package/dist/{approval-shared-Ck6TxKgo.js → approval-shared-Ckk65xSG.js} +1 -1
  8. package/dist/{audit-CCJ0h49k.js → audit-BRBe8xFv.js} +4 -4
  9. package/dist/{channel-UXGa9PGc.js → channel-DTBEGHsT.js} +53 -31
  10. package/dist/{channel-actions-Br29_1nE.js → channel-actions-CHgoSpPF.js} +27 -1
  11. package/dist/{channel-actions.runtime-ChmNUig1.js → channel-actions.runtime-E5Plubuu.js} +5 -5
  12. package/dist/channel-config-api.js +1 -1
  13. package/dist/channel-plugin-api.js +1 -1
  14. package/dist/{channel.setup-D_xyQu_h.js → channel.setup-EQXuueIk.js} +3 -3
  15. package/dist/chunk-DYl-_5RL.js +179 -0
  16. package/dist/{components-D5LnN7ZQ.js → components-BJOShoIU.js} +2 -3
  17. package/dist/{config-schema-Cc953rAs.js → config-schema-BGn5I3_3.js} +46 -1
  18. package/dist/contract-api.js +7 -7
  19. package/dist/{conversation-identity-DHhS0ez3.js → conversation-identity-BPQm_uhV.js} +1 -1
  20. package/dist/{directory-config-DoETeOOx.js → directory-config-JBaFwbAQ.js} +1 -1
  21. package/dist/directory-contract-api.js +1 -1
  22. package/dist/{directory-live-DJ0V5asB.js → directory-live-B4kW11z5.js} +2 -2
  23. package/dist/{discord-eZlimVfW.js → discord-DtHMmZqJ.js} +58 -15
  24. package/dist/{doctor-Bo-yifB3.js → doctor-CPV4Ch3V.js} +3 -3
  25. package/dist/{doctor-contract-Bso46EOQ.js → doctor-contract-BGjjFBdq.js} +1 -1
  26. package/dist/doctor-contract-api.js +1 -1
  27. package/dist/{handle-action.guild-admin-sJiQymg8.js → handle-action.guild-admin-DSeMCHvR.js} +6 -3
  28. package/dist/{inbound-context-CRylwjg0.js → inbound-context-BAwOn5Iq.js} +1 -1
  29. package/dist/manager.runtime-DGvCDWI5.js +2082 -0
  30. package/dist/{message-handler-C9Ohf-ea.js → message-handler-BWHSteQf.js} +7 -7
  31. package/dist/{message-handler.preflight-BrvazsYn.js → message-handler.preflight-q7ose9Ta.js} +31 -42
  32. package/dist/{message-handler.process-CEnzuLiN.js → message-handler.process-BN8bE7AE.js} +81 -64
  33. package/dist/{message-utils-9kaGF59d.js → message-utils-DEjCwCRz.js} +2 -2
  34. package/dist/normalize-Cu94FOqy.js +58 -0
  35. package/dist/{outbound-adapter-DNsTVJfH.js → outbound-adapter-Bt8FL3yO.js} +78 -17
  36. package/dist/{outbound-session-route-DK9qkPgP.js → outbound-session-route-BaJRt05p.js} +1 -1
  37. package/dist/{pluralkit-OFss_pIy.js → pluralkit-CUfobeQu.js} +1 -1
  38. package/dist/{preflight-audio-CRmUxxuM.js → preflight-audio-BcsH127L.js} +1 -1
  39. package/dist/{preview-streaming-Cc_oeIPP.js → preview-streaming-DCPAe24T.js} +1 -0
  40. package/dist/{probe.runtime-CMgUDax3.js → probe.runtime-4Q9GD7FG.js} +1 -1
  41. package/dist/{provider-CuOh6z_b.js → provider-DBvvDP9A.js} +185 -225
  42. package/dist/{provider-session.runtime-CCESIHVo.js → provider-session.runtime-DepAOR1U.js} +3 -3
  43. package/dist/provider.runtime-BlbOt97W.js +2 -0
  44. package/dist/{reply-delivery-D9aKHtDH.js → reply-delivery-vHSqaKKo.js} +9 -6
  45. package/dist/{resolve-allowlist-common-_e1cWOb3.js → resolve-allowlist-common-ZcVU4OMA.js} +1 -1
  46. package/dist/{resolve-channels-kyuvrXJg.js → resolve-channels-D9Dubj_y.js} +3 -3
  47. package/dist/{resolve-users-CAwh4EBq.js → resolve-users-DlzKsLQe.js} +2 -2
  48. package/dist/route-resolution-foWW8_D1.js +268 -0
  49. package/dist/runtime-api.actions.js +2 -2
  50. package/dist/runtime-api.js +25 -25
  51. package/dist/runtime-api.lookup.js +6 -6
  52. package/dist/runtime-api.monitor-CsetE9pd.js +6 -0
  53. package/dist/runtime-api.monitor.js +7 -7
  54. package/dist/runtime-api.send.js +5 -5
  55. package/dist/runtime-api.threads.js +5 -5
  56. package/dist/{runtime-D8alY00g.js → runtime-oXRGbcb5.js} +7 -7
  57. package/dist/runtime-setter-api.js +1 -1
  58. package/dist/secret-contract-api.js +1 -1
  59. package/dist/{security-audit-BtRd_VhN.js → security-audit-BQ_sGK3J.js} +1 -1
  60. package/dist/security-audit-contract-api.js +1 -1
  61. package/dist/{security-audit.runtime-Dm1LW9KX.js → security-audit.runtime-B7Gmz2DX.js} +1 -1
  62. package/dist/security-contract-api.js +1 -1
  63. package/dist/{send-8S_HKJpQ.js → send-BSBMchf8.js} +83 -57
  64. package/dist/{send.components-A42c_5tQ.js → send.components-BoEhxys4.js} +22 -15
  65. package/dist/{send.outbound-D3tonSz8.js → send.outbound-B2DK2loZ.js} +22 -11
  66. package/dist/send.receipt-BAZw2Zsz.js +35 -0
  67. package/dist/{send.shared-BQGiUPvZ.js → send.shared-DzwpAEMP.js} +26 -7
  68. package/dist/session-key-api.js +1 -1
  69. package/dist/setup-plugin-api.js +1 -1
  70. package/dist/{shared-BEW4H3bj.js → shared-CBsrEG5q.js} +8 -8
  71. package/dist/{shared-interactive-KgJjCqnB.js → shared-interactive-D39V9Wtn.js} +3 -3
  72. package/dist/{subagent-hooks-T0LPLh4H.js → subagent-hooks-BDy1pJD5.js} +2 -2
  73. package/dist/subagent-hooks-api.js +1 -1
  74. package/dist/{system-events-_fzSG--3.js → system-events-BeVImqnV.js} +1 -1
  75. package/dist/target-parsing-D-H7nnh2.js +51 -0
  76. package/dist/target-resolver-DzOelJEt.js +85 -0
  77. package/dist/targets-Dj-qyUaE.js +3 -0
  78. package/dist/test-api.js +4 -4
  79. package/dist/{thread-bindings-CMpZjP50.js → thread-bindings-DUzBL8jL.js} +7 -7
  80. package/dist/{thread-bindings.discord-api-CwWGoyei.js → thread-bindings.discord-api-dpSDYFKX.js} +7 -7
  81. package/dist/{thread-bindings.manager-BtxfLfWf.js → thread-bindings.manager-DTuwm8Tk.js} +5 -6
  82. package/dist/{thread-bindings.session-updates-jcZSiRPI.js → thread-bindings.session-updates-BhcGQTJb.js} +1 -1
  83. package/dist/{threading-BMmpA2JR.js → threading-CTUnZtNi.js} +4 -5
  84. package/dist/timeouts.js +1 -1
  85. package/dist/{typing-Bw6NKWLZ.js → typing-BYeVSviE.js} +2 -3
  86. package/openclaw.plugin.json +222 -1
  87. package/package.json +5 -5
  88. package/dist/manager.runtime-Cug1PoeZ.js +0 -1025
  89. package/dist/provider.runtime-B68g3qLv.js +0 -2
  90. package/dist/rolldown-runtime-C3SqQTfK.js +0 -28
  91. package/dist/route-resolution-BWErj5Cn.js +0 -236
  92. package/dist/runtime-api.monitor-DzkCxeBL.js +0 -6
  93. package/dist/target-resolver-DA84_xbt.js +0 -365
  94. package/dist/targets-FwL1BPTU.js +0 -2
  95. /package/dist/{agent-components.runtime-Dof1YMSz.js → agent-components.runtime-zT8qPsnM.js} +0 -0
  96. /package/dist/{audit-core-CejGc3hO.js → audit-core-CG37TsZm.js} +0 -0
  97. /package/dist/{channel-access-DFIQqbYm.js → channel-access-BL-wemES.js} +0 -0
  98. /package/dist/{doctor-shared-Cqvfgv9m.js → doctor-shared-D_QLzu30.js} +0 -0
  99. /package/dist/{mentions-BPZUaFk7.js → mentions-f806C7MB.js} +0 -0
  100. /package/dist/{preflight-audio.runtime-DPVbpZid.js → preflight-audio.runtime-fXnUxxBa.js} +0 -0
  101. /package/dist/{probe-E80IMT1X.js → probe-CjO53qsc.js} +0 -0
  102. /package/dist/{runtime-K9RT6Egn.js → runtime-BqCoo-zp.js} +0 -0
  103. /package/dist/{secret-config-contract-5S9U9pjx.js → secret-config-contract-BCQNNS7N.js} +0 -0
  104. /package/dist/{security-contract-BE8rsdPq.js → security-contract-DrbQqyyW.js} +0 -0
  105. /package/dist/{security-doctor-DiilN216.js → security-doctor-BJH5YIGL.js} +0 -0
  106. /package/dist/{sender-identity-BGUfyvOC.js → sender-identity-CDOVm5Dk.js} +0 -0
  107. /package/dist/{session-contract-CuW9Nlxg.js → session-contract-D871HDFG.js} +0 -0
  108. /package/dist/{session-key-normalization-B5La-jFM.js → session-key-normalization-BQNCS1gu.js} +0 -0
  109. /package/dist/{thread-bindings.state-WU4duXKY.js → thread-bindings.state-DwgDYzBM.js} +0 -0
  110. /package/dist/{timeouts-CdsmBWWs.js → timeouts-C3FYXWJX.js} +0 -0
@@ -0,0 +1,2082 @@
1
+ import { c as ResumedListener, s as ReadyListener, t as discord_exports } from "./discord-DtHMmZqJ.js";
2
+ import { c as resolveDiscordAccountAllowFrom } from "./accounts-CaHGiVB4.js";
3
+ import { t as parseDiscordTarget } from "./target-parsing-D-H7nnh2.js";
4
+ import { n as formatDiscordUserTag } from "./format-D8TsaXxW.js";
5
+ import { a as normalizeDiscordSlug, m as resolveDiscordOwnerAccess } from "./allow-list-4wnZg5P9.js";
6
+ import { t as formatMention } from "./mentions-f806C7MB.js";
7
+ import { t as getDiscordRuntime } from "./runtime-BqCoo-zp.js";
8
+ import { t as buildDiscordGroupSystemPrompt } from "./inbound-context-BAwOn5Iq.js";
9
+ import { n as resolveDiscordVoiceEnabled, t as authorizeDiscordVoiceIngress } from "./access-R4X7pYt4.js";
10
+ import { createRequire } from "node:module";
11
+ import { escapeRegExp, normalizeOptionalString, stripInlineDirectiveTagsForDisplay } from "openclaw/plugin-sdk/text-runtime";
12
+ import { resolveAgentRoute } from "openclaw/plugin-sdk/routing";
13
+ import path from "node:path";
14
+ import { createSubsystemLogger, logVerbose, shouldLogVerbose } from "openclaw/plugin-sdk/runtime-env";
15
+ import fs from "node:fs/promises";
16
+ import { resolvePreferredOpenClawTmpDir, tempWorkspace } from "openclaw/plugin-sdk/temp-path";
17
+ import { formatErrorMessage } from "openclaw/plugin-sdk/ssrf-runtime";
18
+ import { agentCommandFromIngress, getTtsProvider, resolveAgentDir, resolveTtsConfig, resolveTtsPrefsPath } from "openclaw/plugin-sdk/agent-runtime";
19
+ import { REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME, REALTIME_VOICE_AUDIO_FORMAT_PCM16_24KHZ, buildRealtimeVoiceAgentConsultChatMessage, buildRealtimeVoiceAgentConsultPolicyInstructions, buildRealtimeVoiceAgentConsultWorkingResponse, createRealtimeVoiceAgentTalkbackQueue, createRealtimeVoiceBridgeSession, resamplePcm, resolveConfiguredRealtimeVoiceProvider, resolveRealtimeVoiceAgentConsultToolPolicy, resolveRealtimeVoiceAgentConsultTools, resolveRealtimeVoiceAgentConsultToolsAllow } from "openclaw/plugin-sdk/realtime-voice";
20
+ import { PassThrough, Readable } from "node:stream";
21
+ import { parseTtsDirectives } from "openclaw/plugin-sdk/speech";
22
+ //#region extensions/discord/src/voice/audio.ts
23
+ const require = createRequire(import.meta.url);
24
+ const SAMPLE_RATE = 48e3;
25
+ const CHANNELS = 2;
26
+ const BIT_DEPTH = 16;
27
+ let warnedOpusMissing = false;
28
+ let cachedOpusDecoderFactory = "unresolved";
29
+ function buildWavBuffer(pcm) {
30
+ const blockAlign = CHANNELS * BIT_DEPTH / 8;
31
+ const byteRate = SAMPLE_RATE * blockAlign;
32
+ const header = Buffer.alloc(44);
33
+ header.write("RIFF", 0);
34
+ header.writeUInt32LE(36 + pcm.length, 4);
35
+ header.write("WAVE", 8);
36
+ header.write("fmt ", 12);
37
+ header.writeUInt32LE(16, 16);
38
+ header.writeUInt16LE(1, 20);
39
+ header.writeUInt16LE(CHANNELS, 22);
40
+ header.writeUInt32LE(SAMPLE_RATE, 24);
41
+ header.writeUInt32LE(byteRate, 28);
42
+ header.writeUInt16LE(blockAlign, 32);
43
+ header.writeUInt16LE(BIT_DEPTH, 34);
44
+ header.write("data", 36);
45
+ header.writeUInt32LE(pcm.length, 40);
46
+ return Buffer.concat([header, pcm]);
47
+ }
48
+ function resolveOpusDecoderFactory(params) {
49
+ const factories = [{
50
+ name: "opusscript",
51
+ load: () => {
52
+ const OpusScript = require("opusscript");
53
+ return new OpusScript(SAMPLE_RATE, CHANNELS, OpusScript.Application.AUDIO);
54
+ }
55
+ }, {
56
+ name: "@discordjs/opus",
57
+ load: () => {
58
+ return new (require("@discordjs/opus")).OpusEncoder(SAMPLE_RATE, CHANNELS);
59
+ }
60
+ }];
61
+ const failures = [];
62
+ for (const factory of factories) try {
63
+ factory.load();
64
+ return factory;
65
+ } catch (err) {
66
+ failures.push(`${factory.name}: ${formatErrorMessage(err)}`);
67
+ }
68
+ if (!warnedOpusMissing) {
69
+ warnedOpusMissing = true;
70
+ params.onWarn(`discord voice: no usable opus decoder available (${failures.join("; ")}); cannot decode voice audio`);
71
+ }
72
+ return null;
73
+ }
74
+ function getOrCreateOpusDecoderFactory(params) {
75
+ if (cachedOpusDecoderFactory !== "unresolved") return cachedOpusDecoderFactory;
76
+ cachedOpusDecoderFactory = resolveOpusDecoderFactory(params);
77
+ return cachedOpusDecoderFactory;
78
+ }
79
+ function createOpusDecoder(params) {
80
+ const factory = getOrCreateOpusDecoderFactory(params);
81
+ if (!factory) return null;
82
+ return {
83
+ decoder: factory.load(),
84
+ name: factory.name
85
+ };
86
+ }
87
+ async function decodeOpusStream(stream, params) {
88
+ const selected = createOpusDecoder({ onWarn: params.onWarn });
89
+ if (!selected) return Buffer.alloc(0);
90
+ params.onVerbose(`opus decoder: ${selected.name}`);
91
+ const chunks = [];
92
+ try {
93
+ for await (const chunk of stream) {
94
+ if (!chunk || !(chunk instanceof Buffer) || chunk.length === 0) continue;
95
+ const decoded = selected.decoder.decode(chunk);
96
+ if (decoded && decoded.length > 0) chunks.push(Buffer.from(decoded));
97
+ }
98
+ } catch (err) {
99
+ if (shouldLogVerbose()) logVerbose(`discord voice: opus decode failed: ${formatErrorMessage(err)}`);
100
+ }
101
+ return chunks.length > 0 ? Buffer.concat(chunks) : Buffer.alloc(0);
102
+ }
103
+ async function decodeOpusStreamChunks(stream, params) {
104
+ const selected = createOpusDecoder({ onWarn: params.onWarn });
105
+ if (!selected) return;
106
+ params.onVerbose(`opus decoder: ${selected.name}`);
107
+ try {
108
+ for await (const chunk of stream) {
109
+ if (!chunk || !(chunk instanceof Buffer) || chunk.length === 0) continue;
110
+ const decoded = selected.decoder.decode(chunk);
111
+ if (decoded && decoded.length > 0) params.onChunk(Buffer.from(decoded));
112
+ }
113
+ } catch (err) {
114
+ if (shouldLogVerbose()) logVerbose(`discord voice: opus decode failed: ${formatErrorMessage(err)}`);
115
+ }
116
+ }
117
+ function convertDiscordPcm48kStereoToRealtimePcm24kMono(pcm) {
118
+ const frameCount = Math.floor(pcm.length / 4);
119
+ if (frameCount === 0) return Buffer.alloc(0);
120
+ const mono48k = Buffer.alloc(frameCount * 2);
121
+ for (let frame = 0; frame < frameCount; frame += 1) {
122
+ const offset = frame * 4;
123
+ const left = pcm.readInt16LE(offset);
124
+ const right = pcm.readInt16LE(offset + 2);
125
+ mono48k.writeInt16LE(Math.round((left + right) / 2), frame * 2);
126
+ }
127
+ return resamplePcm(mono48k, SAMPLE_RATE, 24e3);
128
+ }
129
+ function convertRealtimePcm24kMonoToDiscordPcm48kStereo(pcm) {
130
+ const mono48k = resamplePcm(pcm, 24e3, SAMPLE_RATE);
131
+ const sampleCount = Math.floor(mono48k.length / 2);
132
+ if (sampleCount === 0) return Buffer.alloc(0);
133
+ const stereo = Buffer.alloc(sampleCount * 4);
134
+ for (let sampleIndex = 0; sampleIndex < sampleCount; sampleIndex += 1) {
135
+ const sample = mono48k.readInt16LE(sampleIndex * 2);
136
+ const offset = sampleIndex * 4;
137
+ stereo.writeInt16LE(sample, offset);
138
+ stereo.writeInt16LE(sample, offset + 2);
139
+ }
140
+ return stereo;
141
+ }
142
+ function estimateDurationSeconds(pcm) {
143
+ const bytesPerSample = BIT_DEPTH / 8 * CHANNELS;
144
+ if (bytesPerSample <= 0) return 0;
145
+ return pcm.length / (bytesPerSample * SAMPLE_RATE);
146
+ }
147
+ async function writeVoiceWavFile(pcm) {
148
+ const workspace = await tempWorkspace({
149
+ rootDir: resolvePreferredOpenClawTmpDir(),
150
+ prefix: "discord-voice-"
151
+ });
152
+ const wav = buildWavBuffer(pcm);
153
+ const filePath = await workspace.write("segment.wav", wav);
154
+ scheduleTempCleanup(workspace.dir);
155
+ return {
156
+ path: filePath,
157
+ durationSeconds: estimateDurationSeconds(pcm)
158
+ };
159
+ }
160
+ function scheduleTempCleanup(tempDir, delayMs = 1800 * 1e3) {
161
+ setTimeout(() => {
162
+ fs.rm(tempDir, {
163
+ recursive: true,
164
+ force: true
165
+ }).catch((err) => {
166
+ if (shouldLogVerbose()) logVerbose(`discord voice: temp cleanup failed for ${tempDir}: ${formatErrorMessage(err)}`);
167
+ });
168
+ }, delayMs).unref();
169
+ }
170
+ //#endregion
171
+ //#region extensions/discord/src/voice/capture-state.ts
172
+ function createVoiceCaptureState() {
173
+ return {
174
+ activeSpeakers: /* @__PURE__ */ new Set(),
175
+ activeCaptureStreams: /* @__PURE__ */ new Map(),
176
+ captureFinalizeTimers: /* @__PURE__ */ new Map(),
177
+ captureGenerations: /* @__PURE__ */ new Map()
178
+ };
179
+ }
180
+ function stopVoiceCaptureState(state) {
181
+ for (const { timer } of state.captureFinalizeTimers.values()) clearTimeout(timer);
182
+ state.captureFinalizeTimers.clear();
183
+ for (const { stream } of state.activeCaptureStreams.values()) stream.destroy();
184
+ state.activeCaptureStreams.clear();
185
+ state.captureGenerations.clear();
186
+ state.activeSpeakers.clear();
187
+ }
188
+ function getActiveVoiceCapture(state, userId) {
189
+ return state.activeCaptureStreams.get(userId);
190
+ }
191
+ function isVoiceCaptureActive(state, userId) {
192
+ return state.activeSpeakers.has(userId);
193
+ }
194
+ function clearVoiceCaptureFinalizeTimer(state, userId, generation) {
195
+ const scheduled = state.captureFinalizeTimers.get(userId);
196
+ if (!scheduled || generation !== void 0 && scheduled.generation !== generation) return false;
197
+ clearTimeout(scheduled.timer);
198
+ state.captureFinalizeTimers.delete(userId);
199
+ return true;
200
+ }
201
+ function beginVoiceCapture(state, userId, stream) {
202
+ const generation = (state.captureGenerations.get(userId) ?? 0) + 1;
203
+ state.captureGenerations.set(userId, generation);
204
+ state.activeSpeakers.add(userId);
205
+ state.activeCaptureStreams.set(userId, {
206
+ generation,
207
+ stream
208
+ });
209
+ clearVoiceCaptureFinalizeTimer(state, userId, generation);
210
+ return generation;
211
+ }
212
+ function finishVoiceCapture(state, userId, generation) {
213
+ clearVoiceCaptureFinalizeTimer(state, userId, generation);
214
+ if (state.activeCaptureStreams.get(userId)?.generation !== generation) return false;
215
+ state.activeCaptureStreams.delete(userId);
216
+ state.activeSpeakers.delete(userId);
217
+ return true;
218
+ }
219
+ function scheduleVoiceCaptureFinalize(params) {
220
+ const { state, userId, delayMs, onFinalize } = params;
221
+ const capture = state.activeCaptureStreams.get(userId);
222
+ if (!capture) return false;
223
+ clearVoiceCaptureFinalizeTimer(state, userId, capture.generation);
224
+ const timer = setTimeout(() => {
225
+ const activeCapture = state.activeCaptureStreams.get(userId);
226
+ if (!activeCapture || activeCapture.generation !== capture.generation) return;
227
+ state.captureFinalizeTimers.delete(userId);
228
+ state.activeCaptureStreams.delete(userId);
229
+ state.activeSpeakers.delete(userId);
230
+ onFinalize?.(activeCapture);
231
+ activeCapture.stream.destroy();
232
+ }, delayMs);
233
+ state.captureFinalizeTimers.set(userId, {
234
+ generation: capture.generation,
235
+ timer
236
+ });
237
+ return true;
238
+ }
239
+ async function resolveDiscordVoiceIngressContext(params) {
240
+ const { entry, userId } = params;
241
+ if (!entry.guildName) entry.guildName = await params.fetchGuildName(entry.guildId);
242
+ const speaker = await params.speakerContext.resolveContext(entry.guildId, userId);
243
+ const speakerIdentity = await params.speakerContext.resolveIdentity(entry.guildId, userId);
244
+ const access = await authorizeDiscordVoiceIngress({
245
+ cfg: params.cfg,
246
+ discordConfig: params.discordConfig,
247
+ guildName: entry.guildName,
248
+ guildId: entry.guildId,
249
+ channelId: entry.channelId,
250
+ channelName: entry.channelName,
251
+ channelSlug: entry.channelName ? normalizeDiscordSlug(entry.channelName) : "",
252
+ channelLabel: formatMention({ channelId: entry.channelId }),
253
+ memberRoleIds: speakerIdentity.memberRoleIds,
254
+ ownerAllowFrom: params.ownerAllowFrom,
255
+ sender: {
256
+ id: speakerIdentity.id,
257
+ name: speakerIdentity.name,
258
+ tag: speakerIdentity.tag
259
+ }
260
+ });
261
+ if (!access.ok) return null;
262
+ return {
263
+ extraSystemPrompt: buildDiscordGroupSystemPrompt(access.channelConfig),
264
+ senderIsOwner: speaker.senderIsOwner,
265
+ speakerLabel: speaker.label
266
+ };
267
+ }
268
+ async function runDiscordVoiceAgentTurn(params) {
269
+ const context = params.context ?? await resolveDiscordVoiceIngressContext({
270
+ entry: params.entry,
271
+ userId: params.userId,
272
+ cfg: params.cfg,
273
+ discordConfig: params.discordConfig,
274
+ ownerAllowFrom: params.ownerAllowFrom,
275
+ fetchGuildName: params.fetchGuildName,
276
+ speakerContext: params.speakerContext
277
+ });
278
+ if (!context) return null;
279
+ const voiceModel = normalizeOptionalString(params.discordConfig.voice?.model);
280
+ return {
281
+ context,
282
+ text: ((await agentCommandFromIngress({
283
+ message: params.message,
284
+ sessionKey: params.entry.route.sessionKey,
285
+ agentId: params.entry.route.agentId,
286
+ messageChannel: "discord",
287
+ messageProvider: "discord-voice",
288
+ extraSystemPrompt: context.extraSystemPrompt,
289
+ senderIsOwner: context.senderIsOwner,
290
+ allowModelOverride: Boolean(voiceModel),
291
+ model: voiceModel,
292
+ toolsAllow: params.toolsAllow,
293
+ deliver: false
294
+ }, params.runtime)).payloads ?? []).map((payload) => payload.text).filter((text) => typeof text === "string" && text.trim()).join("\n").trim()
295
+ };
296
+ }
297
+ //#endregion
298
+ //#region extensions/discord/src/voice/prompt.ts
299
+ const DISCORD_VOICE_SPOKEN_OUTPUT_CONTRACT = [
300
+ "You are OpenClaw's Discord voice interface in a live voice channel.",
301
+ "Discord voice reply requirements:",
302
+ "- Return only the concise text that should be spoken aloud in the voice channel.",
303
+ "- Treat the transcript as speech-to-text from a live conversation; repair obvious transcription artifacts and ignore repeated partial fragments caused by voice buffering.",
304
+ "- If the transcript is garbled, incomplete, or missing the user's intent, ask one brief clarifying question instead of guessing.",
305
+ "- If the request needs deeper reasoning, current information, or tools, use the available tools before answering.",
306
+ "- Do not call the tts tool; Discord voice will synthesize and play the returned text.",
307
+ "- Do not reply with NO_REPLY unless no spoken response is appropriate.",
308
+ "- Keep the response brief, natural, and conversational. Prefer one to three short sentences.",
309
+ "- Avoid markdown tables, code fences, citations, and visual formatting unless the user explicitly asks for something that cannot be spoken naturally."
310
+ ].join("\n");
311
+ function formatVoiceIngressPrompt(transcript, speakerLabel) {
312
+ const cleanedTranscript = transcript.trim();
313
+ const cleanedLabel = speakerLabel?.trim();
314
+ return [DISCORD_VOICE_SPOKEN_OUTPUT_CONTRACT, cleanedLabel ? [`Voice transcript from speaker "${cleanedLabel}":`, cleanedTranscript].join("\n") : cleanedTranscript].join("\n\n");
315
+ }
316
+ //#endregion
317
+ //#region extensions/discord/src/voice/sdk-runtime.ts
318
+ let cachedDiscordVoiceSdk = null;
319
+ function loadDiscordVoiceSdk() {
320
+ if (cachedDiscordVoiceSdk) return cachedDiscordVoiceSdk;
321
+ cachedDiscordVoiceSdk = createRequire(import.meta.url)("@discordjs/voice");
322
+ return cachedDiscordVoiceSdk;
323
+ }
324
+ const CAPTURE_FINALIZE_GRACE_MS = 2500;
325
+ const VOICE_CONNECT_READY_TIMEOUT_MS = 3e4;
326
+ const VOICE_RECONNECT_GRACE_MS = 15e3;
327
+ const PLAYBACK_READY_TIMEOUT_MS = 6e4;
328
+ const SPEAKING_READY_TIMEOUT_MS = 6e4;
329
+ function resolveVoiceTimeoutMs(value, fallbackMs) {
330
+ if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) return fallbackMs;
331
+ return Math.floor(value);
332
+ }
333
+ function logVoiceVerbose(message) {
334
+ logVerbose(`discord voice: ${message}`);
335
+ }
336
+ function isVoiceChannel(type) {
337
+ return type === discord_exports.ChannelType.GuildVoice || type === discord_exports.ChannelType.GuildStageVoice;
338
+ }
339
+ //#endregion
340
+ //#region extensions/discord/src/voice/realtime.ts
341
+ const logger$2 = createSubsystemLogger("discord/voice");
342
+ const DISCORD_REALTIME_TALKBACK_DEBOUNCE_MS = 350;
343
+ const DISCORD_REALTIME_FALLBACK_TEXT = "I hit an error while checking that. Please try again.";
344
+ const DISCORD_REALTIME_PENDING_SPEAKER_CONTEXT_LIMIT = 32;
345
+ const DISCORD_REALTIME_RECENT_AGENT_PROXY_CONSULT_LIMIT = 16;
346
+ const DISCORD_REALTIME_RECENT_AGENT_PROXY_CONSULT_TTL_MS = 15e3;
347
+ const DISCORD_REALTIME_LOG_PREVIEW_CHARS = 500;
348
+ const DISCORD_REALTIME_DEFAULT_MIN_BARGE_IN_AUDIO_END_MS = 250;
349
+ const DISCORD_REALTIME_FORCED_CONSULT_FALLBACK_DELAY_MS = 200;
350
+ const REALTIME_PCM16_BYTES_PER_SAMPLE = 2;
351
+ function formatRealtimeLogPreview(text) {
352
+ const oneLine = text.replace(/\s+/g, " ").trim();
353
+ if (oneLine.length <= DISCORD_REALTIME_LOG_PREVIEW_CHARS) return oneLine;
354
+ return `${oneLine.slice(0, DISCORD_REALTIME_LOG_PREVIEW_CHARS)}...`;
355
+ }
356
+ function formatRealtimeInterruptionLog(event) {
357
+ const detail = event.detail ? ` ${event.detail}` : "";
358
+ if (event.direction === "client") {
359
+ if (event.type === "response.cancel") return `discord voice: realtime model interrupt requested ${event.direction}:${event.type}${detail}`;
360
+ if (event.type === "conversation.item.truncate.skipped") return `discord voice: realtime model interrupt ignored ${event.direction}:${event.type}${detail}`;
361
+ if (event.type === "conversation.item.truncate") return `discord voice: realtime model audio truncated ${event.direction}:${event.type}${detail}`;
362
+ }
363
+ if (event.direction === "server") {
364
+ if (event.type === "response.cancelled") return `discord voice: realtime model interrupt confirmed ${event.direction}:${event.type}${detail}`;
365
+ if (event.type === "response.done" && event.detail?.includes("status=cancelled")) return `discord voice: realtime model interrupt confirmed ${event.direction}:${event.type}${detail}`;
366
+ if (event.type === "error" && event.detail === "Cancellation failed: no active response found") return `discord voice: realtime model interrupt raced ${event.direction}:${event.type}${detail}`;
367
+ }
368
+ }
369
+ function readProviderConfigString(config, key) {
370
+ const value = config[key];
371
+ return typeof value === "string" && value.trim() ? value.trim() : void 0;
372
+ }
373
+ function readProviderConfigBoolean(config, key) {
374
+ const value = config?.[key];
375
+ return typeof value === "boolean" ? value : void 0;
376
+ }
377
+ function resolveDiscordVoiceMode(voice) {
378
+ const mode = voice?.mode;
379
+ if (mode === "stt-tts" || mode === "bidi") return mode;
380
+ return "agent-proxy";
381
+ }
382
+ function isDiscordRealtimeVoiceMode(mode) {
383
+ return mode === "agent-proxy" || mode === "bidi";
384
+ }
385
+ function isDiscordAgentProxyVoiceMode(mode) {
386
+ return mode === "agent-proxy";
387
+ }
388
+ function resolveDiscordRealtimeInterruptResponseOnInputAudio(params) {
389
+ const providerConfig = params.realtimeConfig?.providers?.[params.providerId];
390
+ return readProviderConfigBoolean(providerConfig, "interruptResponseOnInputAudio") ?? true;
391
+ }
392
+ function resolveDiscordRealtimeBargeIn(params) {
393
+ const configured = params.realtimeConfig?.bargeIn;
394
+ if (typeof configured === "boolean") return configured;
395
+ return resolveDiscordRealtimeInterruptResponseOnInputAudio(params);
396
+ }
397
+ function buildDiscordSpeakExactUserMessage(text) {
398
+ return [
399
+ "Internal OpenClaw voice playback result.",
400
+ "Do not call openclaw_agent_consult or any other tool for this message.",
401
+ "Speak this exact OpenClaw answer to the Discord voice channel, without adding, removing, or rephrasing words.",
402
+ `Answer: ${JSON.stringify(text)}`
403
+ ].join("\n");
404
+ }
405
+ function isEscapedQuote(text, quoteIndex) {
406
+ let backslashes = 0;
407
+ for (let index = quoteIndex - 1; index >= 0 && text[index] === "\\"; index -= 1) backslashes += 1;
408
+ return backslashes % 2 === 1;
409
+ }
410
+ function readJsonStringAfterLabel(text, label) {
411
+ const labelIndex = text.indexOf(label);
412
+ if (labelIndex < 0) return;
413
+ const quoteIndex = text.indexOf("\"", labelIndex + label.length);
414
+ if (quoteIndex < 0) return;
415
+ for (let index = quoteIndex + 1; index < text.length; index += 1) {
416
+ if (text[index] !== "\"" || isEscapedQuote(text, index)) continue;
417
+ try {
418
+ const parsed = JSON.parse(text.slice(quoteIndex, index + 1));
419
+ return typeof parsed === "string" ? parsed : void 0;
420
+ } catch {
421
+ return;
422
+ }
423
+ }
424
+ }
425
+ function collectRealtimeConsultArgStrings(args) {
426
+ if (!args || typeof args !== "object") return typeof args === "string" ? [args] : [];
427
+ const values = [];
428
+ for (const key of [
429
+ "question",
430
+ "prompt",
431
+ "query",
432
+ "task",
433
+ "context",
434
+ "responseStyle"
435
+ ]) {
436
+ const value = args[key];
437
+ if (typeof value === "string") values.push(value);
438
+ }
439
+ return values;
440
+ }
441
+ function extractDiscordExactSpeechConsultText(args) {
442
+ const message = collectRealtimeConsultArgStrings(args).join("\n");
443
+ if (!message.includes("Speak this exact OpenClaw answer") && !message.includes("Speak the provided exact answer verbatim")) return;
444
+ return readJsonStringAfterLabel(message, "Answer:") ?? readJsonStringAfterLabel(message, "Provided answer text:");
445
+ }
446
+ function normalizeRealtimeConsultMatchText(text) {
447
+ return text.toLowerCase().replace(/\s+/g, " ").trim();
448
+ }
449
+ function matchesPendingAgentProxyQuestion(consultMessage, question) {
450
+ const normalizedConsult = normalizeRealtimeConsultMatchText(consultMessage);
451
+ const normalizedQuestion = normalizeRealtimeConsultMatchText(question);
452
+ if (!normalizedConsult || !normalizedQuestion) return false;
453
+ return normalizedConsult.includes(normalizedQuestion) || normalizedQuestion.includes(normalizedConsult);
454
+ }
455
+ var DiscordRealtimeVoiceSession = class {
456
+ constructor(params) {
457
+ this.params = params;
458
+ this.bridge = null;
459
+ this.outputStream = null;
460
+ this.stopped = false;
461
+ this.consultToolPolicy = "safe-read-only";
462
+ this.consultPolicy = "auto";
463
+ this.pendingAgentProxyConsultContexts = [];
464
+ this.recentAgentProxyConsultContexts = [];
465
+ this.pendingSpeakerTurns = [];
466
+ this.outputAudioTimestampMs = 0;
467
+ this.outputAudioDiscordBytes = 0;
468
+ this.outputAudioRealtimeBytes = 0;
469
+ this.outputAudioChunks = 0;
470
+ this.outputStreamEnding = false;
471
+ this.queuedExactSpeechMessages = [];
472
+ this.exactSpeechResponseActive = false;
473
+ this.exactSpeechAudioStarted = false;
474
+ this.playerIdleHandler = () => {
475
+ this.resetOutputStream("player-idle");
476
+ this.completeExactSpeechResponse("player-idle");
477
+ };
478
+ this.talkback = createRealtimeVoiceAgentTalkbackQueue({
479
+ debounceMs: this.realtimeConfig?.debounceMs ?? DISCORD_REALTIME_TALKBACK_DEBOUNCE_MS,
480
+ isStopped: () => this.stopped,
481
+ logger: logger$2,
482
+ logPrefix: "[discord] realtime agent",
483
+ responseStyle: "Brief, natural spoken answer for a Discord voice channel.",
484
+ fallbackText: DISCORD_REALTIME_FALLBACK_TEXT,
485
+ consult: async ({ question, responseStyle, metadata }) => {
486
+ const context = isDiscordRealtimeSpeakerContext(metadata) ? metadata : void 0;
487
+ return { text: await this.runAgentTurn({
488
+ context,
489
+ message: formatVoiceIngressPrompt([question, responseStyle ? `Spoken style: ${responseStyle}` : void 0].filter(Boolean).join("\n\n"), context?.speakerLabel ?? "Discord voice speaker")
490
+ }) };
491
+ },
492
+ deliver: (text) => this.enqueueExactSpeechMessage(text)
493
+ });
494
+ }
495
+ async connect() {
496
+ const resolved = resolveConfiguredRealtimeVoiceProvider({
497
+ configuredProviderId: this.realtimeConfig?.provider,
498
+ providerConfigs: buildProviderConfigs(this.realtimeConfig),
499
+ providerConfigOverrides: buildProviderConfigOverrides(this.realtimeConfig),
500
+ cfg: this.params.cfg,
501
+ defaultModel: this.realtimeConfig?.model,
502
+ noRegisteredProviderMessage: "No configured realtime voice provider registered"
503
+ });
504
+ const isAgentProxy = isDiscordAgentProxyVoiceMode(this.params.mode);
505
+ const defaultToolPolicy = isAgentProxy ? "owner" : "safe-read-only";
506
+ const toolPolicy = resolveRealtimeVoiceAgentConsultToolPolicy(this.realtimeConfig?.toolPolicy, defaultToolPolicy);
507
+ this.consultToolPolicy = toolPolicy;
508
+ this.consultToolsAllow = resolveRealtimeVoiceAgentConsultToolsAllow(toolPolicy);
509
+ const consultPolicy = this.realtimeConfig?.consultPolicy ?? (isAgentProxy ? "always" : "auto");
510
+ this.consultPolicy = consultPolicy;
511
+ const usesRealtimeAgentHandoff = this.params.mode === "bidi" || toolPolicy !== "none";
512
+ const autoRespondToAudio = !isAgentProxy || consultPolicy !== "always";
513
+ const interruptResponseOnInputAudio = resolveDiscordRealtimeInterruptResponseOnInputAudio({
514
+ realtimeConfig: this.realtimeConfig,
515
+ providerId: resolved.provider.id
516
+ });
517
+ const instructions = buildDiscordRealtimeInstructions({
518
+ mode: this.params.mode,
519
+ instructions: this.realtimeConfig?.instructions,
520
+ toolPolicy,
521
+ consultPolicy
522
+ });
523
+ this.bridge = createRealtimeVoiceBridgeSession({
524
+ provider: resolved.provider,
525
+ providerConfig: resolved.providerConfig,
526
+ audioFormat: REALTIME_VOICE_AUDIO_FORMAT_PCM16_24KHZ,
527
+ instructions,
528
+ autoRespondToAudio,
529
+ interruptResponseOnInputAudio,
530
+ markStrategy: "ack-immediately",
531
+ tools: usesRealtimeAgentHandoff ? resolveRealtimeVoiceAgentConsultTools(toolPolicy) : [],
532
+ audioSink: {
533
+ isOpen: () => !this.stopped,
534
+ sendAudio: (audio) => this.sendOutputAudio(audio),
535
+ clearAudio: () => this.clearOutputAudio("provider-clear-audio")
536
+ },
537
+ onTranscript: (role, text, isFinal) => {
538
+ if (isFinal && text.trim()) logger$2.info(`discord voice: realtime ${role} transcript (${text.length} chars): ${formatRealtimeLogPreview(text)}`);
539
+ if (!isFinal || role !== "user" || !isDiscordAgentProxyVoiceMode(this.params.mode)) return;
540
+ if (usesRealtimeAgentHandoff) {
541
+ this.scheduleForcedAgentProxyConsult(text);
542
+ return;
543
+ }
544
+ this.talkback.enqueue(text, this.consumePendingSpeakerContext());
545
+ },
546
+ onToolCall: (event, session) => this.handleToolCall(event, session),
547
+ onEvent: (event) => {
548
+ const detail = event.detail ? ` ${event.detail}` : "";
549
+ logVoiceVerbose(`realtime ${event.direction}:${event.type}${detail}`);
550
+ if (event.direction === "server" && (event.type === "response.done" || event.type === "response.cancelled")) {
551
+ if (this.exactSpeechResponseActive && !this.exactSpeechAudioStarted) this.completeExactSpeechResponse(event.type);
552
+ this.finishOutputAudioStream(event.type);
553
+ }
554
+ const interruptionLog = formatRealtimeInterruptionLog(event);
555
+ if (interruptionLog) logger$2.info(interruptionLog);
556
+ },
557
+ onError: (error) => logger$2.warn(`discord voice: realtime error: ${formatErrorMessage(error)}`),
558
+ onClose: (reason) => logVoiceVerbose(`realtime closed: ${reason}`)
559
+ });
560
+ const resolvedModel = readProviderConfigString(resolved.providerConfig, "model") ?? resolved.provider.defaultModel;
561
+ const resolvedVoice = readProviderConfigString(resolved.providerConfig, "voice");
562
+ logger$2.info(`discord voice: realtime bridge starting mode=${this.params.mode} provider=${resolved.provider.id} model=${resolvedModel ?? "default"} voice=${resolvedVoice ?? "default"} consultPolicy=${consultPolicy} toolPolicy=${toolPolicy} autoRespond=${autoRespondToAudio} interruptResponse=${interruptResponseOnInputAudio} bargeIn=${resolveDiscordRealtimeBargeIn({
563
+ realtimeConfig: this.realtimeConfig,
564
+ providerId: resolved.provider.id
565
+ })} minBargeInAudioEndMs=${resolveDiscordRealtimeMinBargeInAudioEndMs(this.realtimeConfig)}`);
566
+ const voiceSdk = loadDiscordVoiceSdk();
567
+ this.params.entry.player.on(voiceSdk.AudioPlayerStatus.Idle, this.playerIdleHandler);
568
+ await this.bridge.connect();
569
+ logger$2.info(`discord voice: realtime bridge ready mode=${this.params.mode} provider=${resolved.provider.id} model=${resolvedModel ?? "default"} voice=${resolvedVoice ?? "default"}`);
570
+ }
571
+ close() {
572
+ this.stopped = true;
573
+ this.talkback.close();
574
+ this.clearForcedConsultTimers();
575
+ this.pendingAgentProxyConsultContexts = [];
576
+ this.recentAgentProxyConsultContexts = [];
577
+ this.pendingSpeakerTurns.length = 0;
578
+ this.queuedExactSpeechMessages = [];
579
+ this.exactSpeechResponseActive = false;
580
+ this.exactSpeechAudioStarted = false;
581
+ this.clearOutputAudio("session-close");
582
+ this.bridge?.close();
583
+ this.bridge = null;
584
+ const voiceSdk = loadDiscordVoiceSdk();
585
+ this.params.entry.player.off(voiceSdk.AudioPlayerStatus.Idle, this.playerIdleHandler);
586
+ }
587
+ beginSpeakerTurn(context, userId) {
588
+ const turn = {
589
+ context: {
590
+ ...context,
591
+ userId
592
+ },
593
+ hasAudio: false,
594
+ inputDiscordBytes: 0,
595
+ inputRealtimeBytes: 0,
596
+ inputChunks: 0,
597
+ interruptedPlayback: false,
598
+ closed: false,
599
+ startedAt: Date.now()
600
+ };
601
+ this.pendingSpeakerTurns.push(turn);
602
+ logger$2.info(`discord voice: realtime speaker turn opened guild=${this.params.entry.guildId} channel=${this.params.entry.channelId} user=${userId} speaker=${context.speakerLabel} owner=${context.senderIsOwner} pendingTurns=${this.pendingSpeakerTurns.length}`);
603
+ this.prunePendingSpeakerTurns();
604
+ return {
605
+ sendInputAudio: (discordPcm48kStereo) => this.sendInputAudioForTurn(turn, discordPcm48kStereo),
606
+ close: () => {
607
+ this.logSpeakerTurnClosed(turn);
608
+ turn.closed = true;
609
+ this.prunePendingSpeakerTurns();
610
+ }
611
+ };
612
+ }
613
+ sendInputAudioForTurn(turn, discordPcm48kStereo) {
614
+ if (!this.bridge || this.stopped) return;
615
+ turn.hasAudio = true;
616
+ const realtimePcm = convertDiscordPcm48kStereoToRealtimePcm24kMono(discordPcm48kStereo);
617
+ if (realtimePcm.length > 0) {
618
+ turn.inputDiscordBytes += discordPcm48kStereo.length;
619
+ turn.inputRealtimeBytes += realtimePcm.length;
620
+ turn.inputChunks += 1;
621
+ turn.lastAudioAt = Date.now();
622
+ if (turn.inputChunks === 1) logger$2.info(`discord voice: realtime input audio started guild=${this.params.entry.guildId} channel=${this.params.entry.channelId} user=${turn.context.userId} speaker=${turn.context.speakerLabel} discordBytes=${discordPcm48kStereo.length} realtimeBytes=${realtimePcm.length} outputAudioMs=${Math.floor(this.outputAudioTimestampMs)} outputActive=${this.isOutputAudioActive()}`);
623
+ const outputActive = this.hasInterruptibleOutputAudio();
624
+ if (!turn.interruptedPlayback && this.isBargeInEnabled() && outputActive) {
625
+ turn.interruptedPlayback = true;
626
+ logVoiceVerbose(`realtime barge-in from active speaker audio: guild ${this.params.entry.guildId} channel ${this.params.entry.channelId} user ${turn.context.userId}`);
627
+ logger$2.info(`discord voice: realtime barge-in detected source=active-speaker-audio guild=${this.params.entry.guildId} channel=${this.params.entry.channelId} user=${turn.context.userId} speaker=${turn.context.speakerLabel} outputAudioMs=${Math.floor(this.outputAudioTimestampMs)} outputActive=${this.isOutputAudioActive()} discordBytes=${discordPcm48kStereo.length} realtimeBytes=${realtimePcm.length}`);
628
+ this.handleBargeIn("active-speaker-audio");
629
+ }
630
+ this.bridge.sendAudio(realtimePcm);
631
+ }
632
+ }
633
+ handleBargeIn(reason = "barge-in") {
634
+ if (!this.isBargeInEnabled()) {
635
+ logger$2.info(`discord voice: realtime barge-in ignored reason=${reason} bargeIn=false guild=${this.params.entry.guildId} channel=${this.params.entry.channelId}`);
636
+ return;
637
+ }
638
+ if (!this.hasInterruptibleOutputAudio()) {
639
+ logger$2.info(`discord voice: realtime barge-in ignored reason=${reason} outputActive=false guild=${this.params.entry.guildId} channel=${this.params.entry.channelId} playbackChunks=${this.outputAudioChunks}`);
640
+ return;
641
+ }
642
+ logger$2.info(`discord voice: realtime barge-in requested reason=${reason} guild=${this.params.entry.guildId} channel=${this.params.entry.channelId} outputAudioMs=${Math.floor(this.outputAudioTimestampMs)} outputActive=${this.isOutputAudioActive()} playbackChunks=${this.outputAudioChunks}`);
643
+ this.bridge?.handleBargeIn({ audioPlaybackActive: true });
644
+ }
645
+ isBargeInEnabled() {
646
+ const providerId = this.realtimeConfig?.provider ?? "openai";
647
+ return resolveDiscordRealtimeBargeIn({
648
+ realtimeConfig: this.realtimeConfig,
649
+ providerId
650
+ });
651
+ }
652
+ hasInterruptibleOutputAudio() {
653
+ this.syncOutputAudioTimestamp();
654
+ return this.isOutputAudioActive() || this.outputAudioChunks > 0 || this.outputAudioTimestampMs > 0;
655
+ }
656
+ get realtimeConfig() {
657
+ return this.params.discordConfig.voice?.realtime;
658
+ }
659
+ sendOutputAudio(realtimePcm24kMono) {
660
+ const discordPcm = convertRealtimePcm24kMonoToDiscordPcm48kStereo(realtimePcm24kMono);
661
+ if (discordPcm.length === 0) return;
662
+ this.syncOutputAudioTimestamp();
663
+ const stream = this.ensureOutputStream();
664
+ if (this.exactSpeechResponseActive) this.exactSpeechAudioStarted = true;
665
+ stream.write(discordPcm);
666
+ this.outputAudioDiscordBytes += discordPcm.length;
667
+ this.outputAudioRealtimeBytes += realtimePcm24kMono.length;
668
+ this.outputAudioChunks += 1;
669
+ this.outputAudioTimestampMs += pcm16MonoDurationMs(realtimePcm24kMono, REALTIME_VOICE_AUDIO_FORMAT_PCM16_24KHZ.sampleRateHz);
670
+ }
671
+ ensureOutputStream() {
672
+ if (this.outputStream && !this.outputStream.destroyed) return this.outputStream;
673
+ const voiceSdk = loadDiscordVoiceSdk();
674
+ const stream = new PassThrough();
675
+ this.outputStream = stream;
676
+ this.outputAudioStartedAt = Date.now();
677
+ stream.once("close", () => {
678
+ if (this.outputStream === stream) {
679
+ this.logOutputAudioStopped("stream-close");
680
+ this.outputStream = null;
681
+ this.resetOutputAudioStats();
682
+ this.completeExactSpeechResponse("stream-close");
683
+ }
684
+ });
685
+ const resource = voiceSdk.createAudioResource(stream, { inputType: voiceSdk.StreamType.Raw });
686
+ this.params.entry.player.play(resource);
687
+ const realtimeConfig = this.realtimeConfig;
688
+ logger$2.info(`discord voice: realtime audio playback started guild=${this.params.entry.guildId} channel=${this.params.entry.channelId} mode=${this.params.mode} model=${realtimeConfig?.model ?? "provider-default"} voice=${realtimeConfig?.voice ?? "provider-default"}`);
689
+ return stream;
690
+ }
691
+ clearOutputAudio(reason = "clear") {
692
+ this.resetOutputStream(reason);
693
+ this.params.entry.player.stop(true);
694
+ }
695
+ resetOutputStream(reason = "reset") {
696
+ const stream = this.outputStream;
697
+ this.logOutputAudioStopped(reason);
698
+ this.outputStream = null;
699
+ this.resetOutputAudioStats();
700
+ stream?.end();
701
+ stream?.destroy();
702
+ }
703
+ finishOutputAudioStream(reason) {
704
+ const stream = this.outputStream;
705
+ if (!stream || stream.destroyed || this.outputStreamEnding) return;
706
+ this.outputStreamEnding = true;
707
+ logger$2.info(`discord voice: realtime audio playback finishing reason=${reason} guild=${this.params.entry.guildId} channel=${this.params.entry.channelId} audioMs=${Math.floor(this.outputAudioTimestampMs)} chunks=${this.outputAudioChunks}`);
708
+ stream.end();
709
+ }
710
+ enqueueExactSpeechMessage(text) {
711
+ if (this.stopped || !text.trim()) return;
712
+ if (this.exactSpeechResponseActive || this.hasInterruptibleOutputAudio()) {
713
+ this.queuedExactSpeechMessages.push(text);
714
+ logger$2.info(`discord voice: realtime exact speech queued guild=${this.params.entry.guildId} channel=${this.params.entry.channelId} queued=${this.queuedExactSpeechMessages.length} outputAudioMs=${Math.floor(this.outputAudioTimestampMs)} outputActive=${this.isOutputAudioActive()}`);
715
+ return;
716
+ }
717
+ this.sendExactSpeechMessage(text);
718
+ }
719
+ sendExactSpeechMessage(text) {
720
+ if (this.stopped || !text.trim()) return;
721
+ this.exactSpeechResponseActive = true;
722
+ this.exactSpeechAudioStarted = false;
723
+ this.bridge?.sendUserMessage(buildDiscordSpeakExactUserMessage(text));
724
+ }
725
+ completeExactSpeechResponse(reason) {
726
+ if (!this.exactSpeechResponseActive && this.queuedExactSpeechMessages.length === 0) return;
727
+ this.exactSpeechResponseActive = false;
728
+ this.exactSpeechAudioStarted = false;
729
+ this.drainQueuedExactSpeechMessages(reason);
730
+ }
731
+ drainQueuedExactSpeechMessages(reason) {
732
+ if (this.stopped || this.exactSpeechResponseActive || this.queuedExactSpeechMessages.length === 0 || this.hasInterruptibleOutputAudio()) return;
733
+ const next = this.queuedExactSpeechMessages.shift();
734
+ if (!next) return;
735
+ logger$2.info(`discord voice: realtime exact speech dequeued reason=${reason} guild=${this.params.entry.guildId} channel=${this.params.entry.channelId} queued=${this.queuedExactSpeechMessages.length}`);
736
+ this.sendExactSpeechMessage(next);
737
+ }
738
+ logOutputAudioStopped(reason) {
739
+ const audioMs = Math.floor(this.outputAudioTimestampMs);
740
+ const chunks = this.outputAudioChunks;
741
+ const discordBytes = this.outputAudioDiscordBytes;
742
+ const realtimeBytes = this.outputAudioRealtimeBytes;
743
+ const elapsedMs = this.outputAudioStartedAt ? Date.now() - this.outputAudioStartedAt : 0;
744
+ if (this.outputStream || chunks > 0 || audioMs > 0) logger$2.info(`discord voice: realtime audio playback stopped reason=${reason} guild=${this.params.entry.guildId} channel=${this.params.entry.channelId} audioMs=${audioMs} elapsedMs=${elapsedMs} chunks=${chunks} discordBytes=${discordBytes} realtimeBytes=${realtimeBytes}`);
745
+ }
746
+ resetOutputAudioStats() {
747
+ this.outputAudioTimestampMs = 0;
748
+ this.outputAudioDiscordBytes = 0;
749
+ this.outputAudioRealtimeBytes = 0;
750
+ this.outputAudioChunks = 0;
751
+ this.outputAudioStartedAt = void 0;
752
+ this.outputStreamEnding = false;
753
+ }
754
+ syncOutputAudioTimestamp() {
755
+ this.bridge?.setMediaTimestamp(Math.floor(this.outputAudioTimestampMs));
756
+ }
757
+ isOutputAudioActive() {
758
+ return Boolean(this.outputStream && !this.outputStream.destroyed) || this.outputAudioChunks > 0;
759
+ }
760
+ logSpeakerTurnClosed(turn) {
761
+ if (turn.closed) return;
762
+ const elapsedMs = Date.now() - turn.startedAt;
763
+ const sinceLastAudioMs = turn.lastAudioAt ? Date.now() - turn.lastAudioAt : void 0;
764
+ logger$2.info(`discord voice: realtime speaker turn closed guild=${this.params.entry.guildId} channel=${this.params.entry.channelId} user=${turn.context.userId} speaker=${turn.context.speakerLabel} owner=${turn.context.senderIsOwner} hasAudio=${turn.hasAudio} chunks=${turn.inputChunks} discordBytes=${turn.inputDiscordBytes} realtimeBytes=${turn.inputRealtimeBytes} elapsedMs=${elapsedMs}${sinceLastAudioMs === void 0 ? "" : ` sinceLastAudioMs=${sinceLastAudioMs}`} interruptedPlayback=${turn.interruptedPlayback}`);
765
+ }
766
+ handleToolCall(event, session) {
767
+ const callId = event.callId || event.itemId || "unknown";
768
+ if (event.name !== REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME) {
769
+ session.submitToolResult(callId, { error: `Tool "${event.name}" not available` });
770
+ return;
771
+ }
772
+ if (this.consultToolPolicy === "none") {
773
+ session.submitToolResult(callId, { error: `Tool "${event.name}" not available` });
774
+ return;
775
+ }
776
+ const exactSpeechText = extractDiscordExactSpeechConsultText(event.args);
777
+ if (exactSpeechText !== void 0) {
778
+ logger$2.info(`discord voice: realtime exact speech consult bypassed call=${callId || "unknown"} answerChars=${exactSpeechText.length}`);
779
+ session.submitToolResult(callId, { text: exactSpeechText });
780
+ return;
781
+ }
782
+ const consultMessage = buildRealtimeVoiceAgentConsultChatMessage(event.args);
783
+ logger$2.info(`discord voice: realtime consult requested call=${callId || "unknown"} voiceSession=${this.params.entry.voiceSessionKey} supervisorSession=${this.params.entry.route.sessionKey} agent=${this.params.entry.route.agentId} question=${formatRealtimeLogPreview(consultMessage)}`);
784
+ if (session.bridge.supportsToolResultContinuation) session.submitToolResult(callId, buildRealtimeVoiceAgentConsultWorkingResponse("speaker"), { willContinue: true });
785
+ const pendingConsultContext = this.consumeAgentProxyConsultContext(consultMessage);
786
+ if (pendingConsultContext) this.addRecentAgentProxyConsultQuestion(pendingConsultContext.recent, consultMessage);
787
+ let context = pendingConsultContext?.context;
788
+ let recent = pendingConsultContext?.recent;
789
+ if (!context) {
790
+ const recentConsult = this.findRecentAgentProxyConsultContext(consultMessage);
791
+ if (recentConsult) {
792
+ if (this.hasPendingSpeakerAudioContext()) {
793
+ logger$2.info(`discord voice: realtime consult matched recent agent result but newer speaker audio is pending call=${callId} speaker=${recentConsult.context.speakerLabel} owner=${recentConsult.context.senderIsOwner}`);
794
+ session.submitToolResult(callId, { error: "Discord speaker context changed before this realtime consult completed" });
795
+ return;
796
+ }
797
+ if (this.submitRecentAgentProxyConsultResult(callId, recentConsult, session)) return;
798
+ }
799
+ }
800
+ if (!context) {
801
+ context = this.consumePendingSpeakerContext();
802
+ if (context) recent = this.rememberRecentAgentProxyConsultContext(consultMessage, context);
803
+ }
804
+ if (!context) {
805
+ logger$2.warn(`discord voice: realtime consult has no speaker context call=${callId || "unknown"}`);
806
+ session.submitToolResult(callId, { error: "No Discord speaker context available" });
807
+ return;
808
+ }
809
+ const promise = this.runAgentTurn({
810
+ context,
811
+ message: consultMessage
812
+ });
813
+ if (recent) this.setRecentAgentProxyConsultPromise(recent, promise);
814
+ promise.then((text) => {
815
+ logger$2.info(`discord voice: realtime consult answer (${text.length} chars) voiceSession=${this.params.entry.voiceSessionKey} supervisorSession=${this.params.entry.route.sessionKey} agent=${this.params.entry.route.agentId} speaker=${context.speakerLabel} owner=${context.senderIsOwner}: ${formatRealtimeLogPreview(text)}`);
816
+ session.submitToolResult(callId, { text });
817
+ }).catch((error) => {
818
+ logger$2.warn(`discord voice: realtime consult failed call=${callId || "unknown"}: ${formatErrorMessage(error)}`);
819
+ session.submitToolResult(callId, { error: formatErrorMessage(error) });
820
+ });
821
+ }
822
+ async runAgentTurn(params) {
823
+ const context = params.context;
824
+ if (!context) return "";
825
+ return this.params.runAgentTurn({
826
+ context,
827
+ message: params.message,
828
+ toolsAllow: this.consultToolsAllow,
829
+ userId: context.userId
830
+ });
831
+ }
832
+ scheduleForcedAgentProxyConsult(transcript) {
833
+ if (this.consultPolicy !== "always") return;
834
+ const question = transcript.trim();
835
+ if (!question) return;
836
+ const context = this.consumePendingSpeakerContext();
837
+ if (!context) {
838
+ const recent = this.findRecentAgentProxyConsultContext(question);
839
+ if (recent) {
840
+ logVoiceVerbose(`realtime forced agent consult skipped (already delegated): guild ${this.params.entry.guildId} channel ${this.params.entry.channelId} speaker ${recent.context.userId}`);
841
+ return;
842
+ }
843
+ logger$2.warn("discord voice: realtime forced agent consult has no speaker context");
844
+ return;
845
+ }
846
+ const pending = {
847
+ context,
848
+ question,
849
+ recent: this.rememberRecentAgentProxyConsultContext(question, context)
850
+ };
851
+ this.pendingAgentProxyConsultContexts.push(pending);
852
+ pending.timer = setTimeout(() => {
853
+ pending.timer = void 0;
854
+ this.runForcedAgentProxyConsult(pending);
855
+ }, DISCORD_REALTIME_FORCED_CONSULT_FALLBACK_DELAY_MS);
856
+ pending.timer.unref?.();
857
+ }
858
+ clearForcedConsultTimers() {
859
+ for (const pending of this.pendingAgentProxyConsultContexts) this.clearForcedConsultTimer(pending);
860
+ }
861
+ clearForcedConsultTimer(pending) {
862
+ if (!pending.timer) return;
863
+ clearTimeout(pending.timer);
864
+ pending.timer = void 0;
865
+ }
866
+ consumeAgentProxyConsultContext(consultMessage) {
867
+ let pending;
868
+ if (this.pendingAgentProxyConsultContexts.length === 1) pending = this.pendingAgentProxyConsultContexts.shift();
869
+ else if (this.pendingAgentProxyConsultContexts.length > 1) {
870
+ const index = this.pendingAgentProxyConsultContexts.findIndex((candidate) => matchesPendingAgentProxyQuestion(consultMessage, candidate.question));
871
+ if (index < 0) return;
872
+ pending = this.pendingAgentProxyConsultContexts.splice(index, 1)[0];
873
+ }
874
+ if (!pending) return;
875
+ this.clearForcedConsultTimer(pending);
876
+ return pending;
877
+ }
878
+ removePendingAgentProxyConsultContext(pending) {
879
+ this.clearForcedConsultTimer(pending);
880
+ const index = this.pendingAgentProxyConsultContexts.indexOf(pending);
881
+ if (index >= 0) this.pendingAgentProxyConsultContexts.splice(index, 1);
882
+ }
883
+ async runForcedAgentProxyConsult(pending) {
884
+ this.removePendingAgentProxyConsultContext(pending);
885
+ const { context, question } = pending;
886
+ if (this.stopped) return;
887
+ const startedAt = Date.now();
888
+ logger$2.info(`discord voice: realtime forced agent consult starting chars=${question.length} voiceSession=${this.params.entry.voiceSessionKey} supervisorSession=${this.params.entry.route.sessionKey} agent=${this.params.entry.route.agentId} speaker=${context.speakerLabel} owner=${context.senderIsOwner}`);
889
+ if (this.hasInterruptibleOutputAudio()) {
890
+ logger$2.info(`discord voice: realtime barge-in requested reason=forced-agent-consult guild=${this.params.entry.guildId} channel=${this.params.entry.channelId} outputAudioMs=${Math.floor(this.outputAudioTimestampMs)} outputActive=${this.isOutputAudioActive()} playbackChunks=${this.outputAudioChunks} force=true`);
891
+ this.bridge?.handleBargeIn({
892
+ audioPlaybackActive: true,
893
+ force: true
894
+ });
895
+ this.clearOutputAudio("forced-agent-consult");
896
+ }
897
+ pending.recent.handledByForcedPlayback = true;
898
+ try {
899
+ const promise = this.runAgentTurn({
900
+ context,
901
+ message: [question, "Context: The realtime model produced a final user transcript without calling openclaw_agent_consult. OpenClaw is forcing the consult because consultPolicy is always."].join("\n\n")
902
+ });
903
+ this.setRecentAgentProxyConsultPromise(pending.recent, promise);
904
+ const text = await promise;
905
+ logger$2.info(`discord voice: realtime forced agent consult answer (${text.length} chars) elapsedMs=${Date.now() - startedAt} voiceSession=${this.params.entry.voiceSessionKey} supervisorSession=${this.params.entry.route.sessionKey} agent=${this.params.entry.route.agentId}: ${formatRealtimeLogPreview(text)}`);
906
+ if (text.trim()) this.enqueueExactSpeechMessage(text);
907
+ } catch (error) {
908
+ logger$2.warn(`discord voice: realtime forced agent consult failed elapsedMs=${Date.now() - startedAt}: ${formatErrorMessage(error)}`);
909
+ this.enqueueExactSpeechMessage(DISCORD_REALTIME_FALLBACK_TEXT);
910
+ }
911
+ }
912
+ consumePendingSpeakerContext() {
913
+ this.prunePendingSpeakerTurns();
914
+ this.expireClosedSpeakerTurnsBeforeLaterAudio();
915
+ const index = this.pendingSpeakerTurns.findIndex((turn) => turn.hasAudio);
916
+ if (index < 0) return;
917
+ const [turn] = this.pendingSpeakerTurns.splice(index, 1);
918
+ this.prunePendingSpeakerTurns();
919
+ return turn?.context;
920
+ }
921
+ hasPendingSpeakerAudioContext() {
922
+ this.prunePendingSpeakerTurns();
923
+ this.expireClosedSpeakerTurnsBeforeLaterAudio();
924
+ return this.pendingSpeakerTurns.some((turn) => turn.hasAudio);
925
+ }
926
+ prunePendingSpeakerTurns() {
927
+ for (let index = this.pendingSpeakerTurns.length - 1; index >= 0; index -= 1) {
928
+ const turn = this.pendingSpeakerTurns[index];
929
+ if (turn?.closed && !turn.hasAudio) this.pendingSpeakerTurns.splice(index, 1);
930
+ }
931
+ while (this.pendingSpeakerTurns.length > DISCORD_REALTIME_PENDING_SPEAKER_CONTEXT_LIMIT) {
932
+ const completedIndex = this.pendingSpeakerTurns.findIndex((turn) => turn.closed);
933
+ this.pendingSpeakerTurns.splice(Math.max(completedIndex, 0), 1);
934
+ }
935
+ }
936
+ expireClosedSpeakerTurnsBeforeLaterAudio() {
937
+ let hasLaterAudio = false;
938
+ for (let index = this.pendingSpeakerTurns.length - 1; index >= 0; index -= 1) {
939
+ const turn = this.pendingSpeakerTurns[index];
940
+ if (!turn?.hasAudio) continue;
941
+ if (turn.closed && hasLaterAudio) {
942
+ this.pendingSpeakerTurns.splice(index, 1);
943
+ continue;
944
+ }
945
+ hasLaterAudio = true;
946
+ }
947
+ }
948
+ rememberRecentAgentProxyConsultContext(question, context) {
949
+ this.pruneRecentAgentProxyConsultContexts();
950
+ const recent = {
951
+ context,
952
+ createdAt: Date.now(),
953
+ questions: [question]
954
+ };
955
+ this.recentAgentProxyConsultContexts.push(recent);
956
+ this.pruneRecentAgentProxyConsultContexts();
957
+ return recent;
958
+ }
959
+ addRecentAgentProxyConsultQuestion(recent, question) {
960
+ if (recent.questions.some((candidate) => matchesPendingAgentProxyQuestion(question, candidate))) return;
961
+ recent.questions.push(question);
962
+ }
963
+ setRecentAgentProxyConsultPromise(recent, promise) {
964
+ recent.promise = promise;
965
+ promise.then((text) => {
966
+ recent.result = {
967
+ status: "fulfilled",
968
+ text
969
+ };
970
+ }).catch((error) => {
971
+ recent.result = {
972
+ status: "rejected",
973
+ error: formatErrorMessage(error)
974
+ };
975
+ });
976
+ }
977
+ findRecentAgentProxyConsultContext(consultMessage) {
978
+ this.pruneRecentAgentProxyConsultContexts();
979
+ for (let index = this.recentAgentProxyConsultContexts.length - 1; index >= 0; index -= 1) {
980
+ const recent = this.recentAgentProxyConsultContexts[index];
981
+ if (recent?.questions.some((question) => matchesPendingAgentProxyQuestion(consultMessage, question))) return recent;
982
+ }
983
+ }
984
+ submitRecentAgentProxyConsultResult(callId, recent, session) {
985
+ const submitAlreadyDelivered = () => {
986
+ session.submitToolResult(callId, {
987
+ status: "already_delivered",
988
+ message: "OpenClaw already delivered this answer to Discord voice."
989
+ }, { suppressResponse: true });
990
+ };
991
+ const submitResult = (result) => {
992
+ if (recent.handledByForcedPlayback) {
993
+ submitAlreadyDelivered();
994
+ return;
995
+ }
996
+ if (result.status === "fulfilled") {
997
+ session.submitToolResult(callId, { text: result.text });
998
+ return;
999
+ }
1000
+ session.submitToolResult(callId, { error: result.error });
1001
+ };
1002
+ if (recent.result) {
1003
+ logger$2.info(`discord voice: realtime consult reused recent agent result call=${callId || "unknown"} speaker=${recent.context.speakerLabel} owner=${recent.context.senderIsOwner}`);
1004
+ submitResult(recent.result);
1005
+ return true;
1006
+ }
1007
+ if (!recent.promise) return false;
1008
+ logger$2.info(`discord voice: realtime consult joined in-flight agent result call=${callId || "unknown"} speaker=${recent.context.speakerLabel} owner=${recent.context.senderIsOwner}`);
1009
+ if (recent.handledByForcedPlayback) {
1010
+ recent.promise.then(submitAlreadyDelivered, submitAlreadyDelivered);
1011
+ return true;
1012
+ }
1013
+ recent.promise.then((text) => session.submitToolResult(callId, { text })).catch((error) => session.submitToolResult(callId, { error: formatErrorMessage(error) }));
1014
+ return true;
1015
+ }
1016
+ pruneRecentAgentProxyConsultContexts() {
1017
+ const minCreatedAt = Date.now() - DISCORD_REALTIME_RECENT_AGENT_PROXY_CONSULT_TTL_MS;
1018
+ for (let index = this.recentAgentProxyConsultContexts.length - 1; index >= 0; index -= 1) {
1019
+ const recent = this.recentAgentProxyConsultContexts[index];
1020
+ if (recent && recent.createdAt < minCreatedAt) this.recentAgentProxyConsultContexts.splice(index, 1);
1021
+ }
1022
+ while (this.recentAgentProxyConsultContexts.length > DISCORD_REALTIME_RECENT_AGENT_PROXY_CONSULT_LIMIT) this.recentAgentProxyConsultContexts.shift();
1023
+ }
1024
+ };
1025
+ function isDiscordRealtimeSpeakerContext(value) {
1026
+ return Boolean(value) && typeof value === "object" && typeof value.userId === "string" && typeof value.senderIsOwner === "boolean" && typeof value.speakerLabel === "string";
1027
+ }
1028
+ function pcm16MonoDurationMs(audio, sampleRate) {
1029
+ if (audio.length === 0 || sampleRate <= 0) return 0;
1030
+ return audio.length / REALTIME_PCM16_BYTES_PER_SAMPLE * 1e3 / sampleRate;
1031
+ }
1032
+ function buildProviderConfigs(realtimeConfig) {
1033
+ const configs = realtimeConfig?.providers;
1034
+ return configs && Object.keys(configs).length > 0 ? { ...configs } : void 0;
1035
+ }
1036
+ function buildProviderConfigOverrides(realtimeConfig) {
1037
+ const overrides = {
1038
+ ...realtimeConfig?.model ? { model: realtimeConfig.model } : {},
1039
+ ...realtimeConfig?.voice ? { voice: realtimeConfig.voice } : {},
1040
+ ...typeof realtimeConfig?.minBargeInAudioEndMs === "number" ? { minBargeInAudioEndMs: realtimeConfig.minBargeInAudioEndMs } : {}
1041
+ };
1042
+ return Object.keys(overrides).length > 0 ? overrides : void 0;
1043
+ }
1044
+ function resolveDiscordRealtimeMinBargeInAudioEndMs(realtimeConfig) {
1045
+ return typeof realtimeConfig?.minBargeInAudioEndMs === "number" ? realtimeConfig.minBargeInAudioEndMs : DISCORD_REALTIME_DEFAULT_MIN_BARGE_IN_AUDIO_END_MS;
1046
+ }
1047
+ function buildDiscordRealtimeInstructions(params) {
1048
+ const base = params.instructions ?? ["You are OpenClaw's Discord voice interface.", "Keep spoken replies concise, natural, and suitable for a live Discord voice channel."].join("\n");
1049
+ if (isDiscordAgentProxyVoiceMode(params.mode)) return [
1050
+ base,
1051
+ "Mode: OpenClaw agent proxy.",
1052
+ "You are the realtime voice surface for the same OpenClaw agent the user can message directly.",
1053
+ "Do not mention a backend, supervisor, helper, or separate system. Present the result as your own work.",
1054
+ "Delegate substantive requests, actions, tool work, current facts, memory, workspace context, and user-specific context with openclaw_agent_consult.",
1055
+ "Do not block, refuse, or downscope at the voice layer. Delegate to OpenClaw and treat its result as authoritative.",
1056
+ "Answer directly only for greetings, acknowledgements, brief latency tests, or filler while waiting.",
1057
+ "When OpenClaw sends an internal exact answer to speak, do not call tools. Say only that answer.",
1058
+ buildRealtimeVoiceAgentConsultPolicyInstructions({
1059
+ toolPolicy: params.toolPolicy,
1060
+ consultPolicy: params.consultPolicy
1061
+ })
1062
+ ].join("\n\n");
1063
+ return [base, buildRealtimeVoiceAgentConsultPolicyInstructions({
1064
+ toolPolicy: params.toolPolicy,
1065
+ consultPolicy: params.consultPolicy
1066
+ })].filter(Boolean).join("\n\n");
1067
+ }
1068
+ //#endregion
1069
+ //#region extensions/discord/src/voice/receive-recovery.ts
1070
+ const DECRYPT_FAILURE_WINDOW_MS = 3e4;
1071
+ const DECRYPT_FAILURE_RECONNECT_THRESHOLD = 3;
1072
+ const DECRYPT_FAILURE_MARKER = "DecryptionFailed(";
1073
+ const DAVE_PASSTHROUGH_DISABLED_MARKER = "UnencryptedWhenPassthroughDisabled";
1074
+ function createVoiceReceiveRecoveryState() {
1075
+ return {
1076
+ decryptFailureCount: 0,
1077
+ lastDecryptFailureAt: 0,
1078
+ decryptRecoveryInFlight: false
1079
+ };
1080
+ }
1081
+ function isAbortLikeReceiveError(err) {
1082
+ if (!err || typeof err !== "object") return false;
1083
+ const name = "name" in err && typeof err.name === "string" ? err.name : "";
1084
+ const message = "message" in err && typeof err.message === "string" ? err.message : "";
1085
+ return name === "AbortError" || message.includes("The operation was aborted") || message.includes("aborted");
1086
+ }
1087
+ function analyzeVoiceReceiveError(err) {
1088
+ const message = formatErrorMessage(err);
1089
+ const shouldAttemptPassthrough = message.includes(DAVE_PASSTHROUGH_DISABLED_MARKER);
1090
+ return {
1091
+ message,
1092
+ isAbortLike: isAbortLikeReceiveError(err),
1093
+ shouldAttemptPassthrough,
1094
+ countsAsDecryptFailure: message.includes(DECRYPT_FAILURE_MARKER) || shouldAttemptPassthrough
1095
+ };
1096
+ }
1097
+ function noteVoiceDecryptFailure(state, now = Date.now()) {
1098
+ if (now - state.lastDecryptFailureAt > DECRYPT_FAILURE_WINDOW_MS) state.decryptFailureCount = 0;
1099
+ state.lastDecryptFailureAt = now;
1100
+ state.decryptFailureCount += 1;
1101
+ const firstFailure = state.decryptFailureCount === 1;
1102
+ if (state.decryptFailureCount < DECRYPT_FAILURE_RECONNECT_THRESHOLD || state.decryptRecoveryInFlight) return {
1103
+ firstFailure,
1104
+ shouldRecover: false
1105
+ };
1106
+ state.decryptRecoveryInFlight = true;
1107
+ resetVoiceReceiveRecoveryState(state);
1108
+ return {
1109
+ firstFailure,
1110
+ shouldRecover: true
1111
+ };
1112
+ }
1113
+ function resetVoiceReceiveRecoveryState(state) {
1114
+ state.decryptFailureCount = 0;
1115
+ state.lastDecryptFailureAt = 0;
1116
+ }
1117
+ function finishVoiceDecryptRecovery(state) {
1118
+ state.decryptRecoveryInFlight = false;
1119
+ }
1120
+ function enableDaveReceivePassthrough(params) {
1121
+ const { target, sdk, reason, expirySeconds, onVerbose, onWarn } = params;
1122
+ const networkingState = target.connection.state.networking?.state;
1123
+ if (target.connection.state.status !== sdk.VoiceConnectionStatus.Ready || !networkingState || networkingState.code !== sdk.NetworkingStatusCode.Ready && networkingState.code !== sdk.NetworkingStatusCode.Resuming) return false;
1124
+ const daveSession = networkingState.dave?.session;
1125
+ if (!daveSession) return false;
1126
+ try {
1127
+ daveSession.setPassthroughMode(true, expirySeconds);
1128
+ onVerbose(`enabled DAVE receive passthrough: guild ${target.guildId} channel ${target.channelId} expiry=${expirySeconds}s reason=${reason}`);
1129
+ return true;
1130
+ } catch (err) {
1131
+ onWarn(`discord voice: failed to enable DAVE passthrough guild=${target.guildId} channel=${target.channelId} reason=${reason}: ${formatErrorMessage(err)}`);
1132
+ return false;
1133
+ }
1134
+ }
1135
+ //#endregion
1136
+ //#region extensions/discord/src/voice/sanitize.ts
1137
+ const SPEECH_EMOJI_RE = /(?:\p{Extended_Pictographic}(?:\uFE0F|\u200D|\p{Extended_Pictographic}|\p{Emoji_Modifier})*)+/gu;
1138
+ function stripEmojiForSpeech(text) {
1139
+ return text.replace(SPEECH_EMOJI_RE, " ").replace(/\s+([?!.,:;])/g, "$1").replace(/[ \t]{2,}/g, " ").replace(/ *\n */g, "\n").trim();
1140
+ }
1141
+ function sanitizeVoiceReplyTextForSpeech(text, speakerLabel) {
1142
+ let cleaned = stripInlineDirectiveTagsForDisplay(text).text.trim();
1143
+ if (!cleaned) return "";
1144
+ const label = speakerLabel?.trim();
1145
+ if (label) {
1146
+ const prefix = new RegExp(`^${escapeRegExp(label)}\\s*:\\s*`, "i");
1147
+ cleaned = cleaned.replace(prefix, "").trim();
1148
+ }
1149
+ return stripEmojiForSpeech(cleaned);
1150
+ }
1151
+ //#endregion
1152
+ //#region extensions/discord/src/voice/tts.ts
1153
+ function mergeTtsConfig(base, override) {
1154
+ if (!override) return base;
1155
+ const baseProviders = base.providers ?? {};
1156
+ const overrideProviders = override.providers ?? {};
1157
+ const mergedProviders = Object.fromEntries([...new Set([...Object.keys(baseProviders), ...Object.keys(overrideProviders)])].map((providerId) => {
1158
+ const baseProvider = baseProviders[providerId] ?? {};
1159
+ const overrideProvider = overrideProviders[providerId] ?? {};
1160
+ return [providerId, {
1161
+ ...baseProvider,
1162
+ ...overrideProvider
1163
+ }];
1164
+ }));
1165
+ return {
1166
+ ...base,
1167
+ ...override,
1168
+ modelOverrides: {
1169
+ ...base.modelOverrides,
1170
+ ...override.modelOverrides
1171
+ },
1172
+ ...Object.keys(mergedProviders).length === 0 ? {} : { providers: mergedProviders }
1173
+ };
1174
+ }
1175
+ function resolveVoiceTtsConfig(params) {
1176
+ if (!params.override) return {
1177
+ cfg: params.cfg,
1178
+ resolved: resolveTtsConfig(params.cfg)
1179
+ };
1180
+ const merged = mergeTtsConfig(params.cfg.messages?.tts ?? {}, params.override);
1181
+ const messages = params.cfg.messages ?? {};
1182
+ const cfg = {
1183
+ ...params.cfg,
1184
+ messages: {
1185
+ ...messages,
1186
+ tts: merged
1187
+ }
1188
+ };
1189
+ return {
1190
+ cfg,
1191
+ resolved: resolveTtsConfig(cfg)
1192
+ };
1193
+ }
1194
+ async function transcribeVoiceAudio(params) {
1195
+ return normalizeOptionalString((await getDiscordRuntime().mediaUnderstanding.transcribeAudioFile({
1196
+ filePath: params.filePath,
1197
+ cfg: params.cfg,
1198
+ agentDir: resolveAgentDir(params.cfg, params.agentId),
1199
+ mime: "audio/wav"
1200
+ })).text);
1201
+ }
1202
+ async function synthesizeVoiceReplyAudio(params) {
1203
+ const { cfg: ttsCfg, resolved: ttsConfig } = resolveVoiceTtsConfig({
1204
+ cfg: params.cfg,
1205
+ override: params.override
1206
+ });
1207
+ const directive = parseTtsDirectives(params.replyText, ttsConfig.modelOverrides, {
1208
+ cfg: ttsCfg,
1209
+ providerConfigs: ttsConfig.providerConfigs,
1210
+ preferredProviderId: getTtsProvider(ttsConfig, resolveTtsPrefsPath(ttsConfig))
1211
+ });
1212
+ const speakText = sanitizeVoiceReplyTextForSpeech(directive.overrides.ttsText ?? directive.cleanedText.trim(), params.speakerLabel);
1213
+ if (!speakText) return { status: "empty" };
1214
+ const runtime = getDiscordRuntime();
1215
+ const streamResult = await runtime.tts.textToSpeechStream?.({
1216
+ text: speakText,
1217
+ cfg: ttsCfg,
1218
+ channel: "discord",
1219
+ overrides: directive.overrides,
1220
+ disableFallback: true
1221
+ });
1222
+ if (streamResult?.success && streamResult.audioStream) return {
1223
+ status: "ok",
1224
+ mode: "stream",
1225
+ audioStream: streamResult.audioStream,
1226
+ release: streamResult.release,
1227
+ speakText
1228
+ };
1229
+ const result = await runtime.tts.textToSpeech({
1230
+ text: speakText,
1231
+ cfg: ttsCfg,
1232
+ channel: "discord",
1233
+ overrides: directive.overrides
1234
+ });
1235
+ if (!result.success || !result.audioPath) return {
1236
+ status: "failed",
1237
+ error: result.error ?? "unknown error"
1238
+ };
1239
+ return {
1240
+ status: "ok",
1241
+ mode: "file",
1242
+ audioPath: result.audioPath,
1243
+ speakText
1244
+ };
1245
+ }
1246
+ //#endregion
1247
+ //#region extensions/discord/src/voice/segment.ts
1248
+ const VOICE_TRANSCRIPT_LOG_PREVIEW_CHARS = 500;
1249
+ const logger$1 = createSubsystemLogger("discord/voice");
1250
+ function formatVoiceTranscriptLogPreview(text) {
1251
+ const oneLine = text.replace(/\s+/g, " ").trim();
1252
+ if (oneLine.length <= VOICE_TRANSCRIPT_LOG_PREVIEW_CHARS) return oneLine;
1253
+ return `${oneLine.slice(0, VOICE_TRANSCRIPT_LOG_PREVIEW_CHARS)}...`;
1254
+ }
1255
+ async function processDiscordVoiceSegment(params) {
1256
+ const { entry, wavPath, userId, durationSeconds } = params;
1257
+ logVoiceVerbose(`segment processing (${durationSeconds.toFixed(2)}s): guild ${entry.guildId} channel ${entry.channelId}`);
1258
+ const ingress = await resolveDiscordVoiceIngressContext({
1259
+ entry,
1260
+ userId,
1261
+ cfg: params.cfg,
1262
+ discordConfig: params.discordConfig,
1263
+ ownerAllowFrom: params.ownerAllowFrom,
1264
+ fetchGuildName: params.fetchGuildName,
1265
+ speakerContext: params.speakerContext
1266
+ });
1267
+ if (!ingress) {
1268
+ logVoiceVerbose(`segment unauthorized: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
1269
+ return;
1270
+ }
1271
+ const transcript = await transcribeVoiceAudio({
1272
+ cfg: params.cfg,
1273
+ agentId: entry.route.agentId,
1274
+ filePath: wavPath
1275
+ });
1276
+ if (!transcript) {
1277
+ logVoiceVerbose(`transcription empty: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
1278
+ return;
1279
+ }
1280
+ logVoiceVerbose(`transcription ok (${transcript.length} chars): guild ${entry.guildId} channel ${entry.channelId}`);
1281
+ logVoiceVerbose(`transcript from ${ingress.speakerLabel} (${userId}) in guild ${entry.guildId} channel ${entry.channelId}: ${formatVoiceTranscriptLogPreview(transcript)}`);
1282
+ const turn = await runDiscordVoiceAgentTurn({
1283
+ entry,
1284
+ userId,
1285
+ message: formatVoiceIngressPrompt(transcript, ingress.speakerLabel),
1286
+ cfg: params.cfg,
1287
+ discordConfig: params.discordConfig,
1288
+ runtime: params.runtime,
1289
+ context: ingress,
1290
+ ownerAllowFrom: params.ownerAllowFrom,
1291
+ fetchGuildName: params.fetchGuildName,
1292
+ speakerContext: params.speakerContext
1293
+ });
1294
+ if (!turn) {
1295
+ logVoiceVerbose(`segment unauthorized before agent turn: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
1296
+ return;
1297
+ }
1298
+ const replyText = turn.text;
1299
+ if (!replyText) {
1300
+ logVoiceVerbose(`reply empty: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
1301
+ return;
1302
+ }
1303
+ logVoiceVerbose(`reply ok (${replyText.length} chars): guild ${entry.guildId} channel ${entry.channelId}`);
1304
+ const voiceReplyAudio = await synthesizeVoiceReplyAudio({
1305
+ cfg: params.cfg,
1306
+ override: params.discordConfig.voice?.tts,
1307
+ replyText,
1308
+ speakerLabel: ingress.speakerLabel
1309
+ });
1310
+ if (voiceReplyAudio.status === "empty") {
1311
+ logVoiceVerbose(`tts skipped (empty): guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
1312
+ return;
1313
+ }
1314
+ if (voiceReplyAudio.status === "failed") {
1315
+ logger$1.warn(`discord voice: TTS failed: ${voiceReplyAudio.error ?? "unknown error"}`);
1316
+ return;
1317
+ }
1318
+ logVoiceVerbose(`tts ok (${voiceReplyAudio.speakText.length} chars): guild ${entry.guildId} channel ${entry.channelId}`);
1319
+ params.enqueuePlayback(entry, async () => {
1320
+ const voiceSdk = loadDiscordVoiceSdk();
1321
+ const releaseAudioStream = voiceReplyAudio.mode === "stream" ? voiceReplyAudio.release : void 0;
1322
+ try {
1323
+ if (voiceReplyAudio.mode === "stream") {
1324
+ logVoiceVerbose(`playback start: guild ${entry.guildId} channel ${entry.channelId} stream`);
1325
+ const nodeStream = Readable.fromWeb(voiceReplyAudio.audioStream);
1326
+ const resource = voiceSdk.createAudioResource(nodeStream);
1327
+ entry.player.play(resource);
1328
+ } else {
1329
+ logVoiceVerbose(`playback start: guild ${entry.guildId} channel ${entry.channelId} file ${path.basename(voiceReplyAudio.audioPath)}`);
1330
+ const resource = voiceSdk.createAudioResource(voiceReplyAudio.audioPath);
1331
+ entry.player.play(resource);
1332
+ }
1333
+ await voiceSdk.entersState(entry.player, voiceSdk.AudioPlayerStatus.Playing, PLAYBACK_READY_TIMEOUT_MS).catch(() => void 0);
1334
+ await voiceSdk.entersState(entry.player, voiceSdk.AudioPlayerStatus.Idle, SPEAKING_READY_TIMEOUT_MS).catch(() => void 0);
1335
+ logVoiceVerbose(`playback done: guild ${entry.guildId} channel ${entry.channelId}`);
1336
+ } finally {
1337
+ await releaseAudioStream?.();
1338
+ }
1339
+ });
1340
+ }
1341
+ //#endregion
1342
+ //#region extensions/discord/src/voice/speaker-context.ts
1343
+ const SPEAKER_CONTEXT_CACHE_TTL_MS = 6e4;
1344
+ var DiscordVoiceSpeakerContextResolver = class {
1345
+ constructor(params) {
1346
+ this.params = params;
1347
+ this.cache = /* @__PURE__ */ new Map();
1348
+ }
1349
+ async resolveContext(guildId, userId) {
1350
+ const cached = this.getCachedContext(guildId, userId);
1351
+ if (cached) return cached;
1352
+ const identity = await this.resolveIdentity(guildId, userId);
1353
+ const context = {
1354
+ id: identity.id,
1355
+ label: identity.label,
1356
+ name: identity.name,
1357
+ tag: identity.tag,
1358
+ senderIsOwner: this.resolveIsOwner(identity)
1359
+ };
1360
+ this.setCachedContext(guildId, userId, context);
1361
+ return context;
1362
+ }
1363
+ async resolveIdentity(guildId, userId) {
1364
+ try {
1365
+ const member = await this.params.client.fetchMember(guildId, userId);
1366
+ const username = member.user?.username ?? void 0;
1367
+ return {
1368
+ id: userId,
1369
+ label: member.nickname ?? member.user?.globalName ?? username ?? userId,
1370
+ name: username,
1371
+ tag: member.user ? formatDiscordUserTag(member.user) : void 0,
1372
+ memberRoleIds: Array.isArray(member.roles) ? member.roles.map((role) => typeof role === "string" ? role : typeof role?.id === "string" ? role.id : "").filter(Boolean) : []
1373
+ };
1374
+ } catch {
1375
+ try {
1376
+ const user = await this.params.client.fetchUser(userId);
1377
+ const username = user.username ?? void 0;
1378
+ return {
1379
+ id: userId,
1380
+ label: user.globalName ?? username ?? userId,
1381
+ name: username,
1382
+ tag: formatDiscordUserTag(user),
1383
+ memberRoleIds: []
1384
+ };
1385
+ } catch {
1386
+ return {
1387
+ id: userId,
1388
+ label: userId,
1389
+ memberRoleIds: []
1390
+ };
1391
+ }
1392
+ }
1393
+ }
1394
+ resolveIsOwner(identity) {
1395
+ return resolveDiscordOwnerAccess({
1396
+ allowFrom: this.params.ownerAllowFrom,
1397
+ sender: {
1398
+ id: identity.id,
1399
+ name: identity.name,
1400
+ tag: identity.tag
1401
+ },
1402
+ allowNameMatching: false
1403
+ }).ownerAllowed;
1404
+ }
1405
+ resolveCacheKey(guildId, userId) {
1406
+ return `${guildId}:${userId}`;
1407
+ }
1408
+ getCachedContext(guildId, userId) {
1409
+ const key = this.resolveCacheKey(guildId, userId);
1410
+ const cached = this.cache.get(key);
1411
+ if (!cached) return;
1412
+ if (cached.expiresAt <= Date.now()) {
1413
+ this.cache.delete(key);
1414
+ return;
1415
+ }
1416
+ return {
1417
+ id: cached.id,
1418
+ label: cached.label,
1419
+ name: cached.name,
1420
+ tag: cached.tag,
1421
+ senderIsOwner: cached.senderIsOwner
1422
+ };
1423
+ }
1424
+ setCachedContext(guildId, userId, context) {
1425
+ const key = this.resolveCacheKey(guildId, userId);
1426
+ this.cache.set(key, {
1427
+ ...context,
1428
+ expiresAt: Date.now() + SPEAKER_CONTEXT_CACHE_TTL_MS
1429
+ });
1430
+ }
1431
+ };
1432
+ //#endregion
1433
+ //#region extensions/discord/src/voice/manager.ts
1434
+ const logger = createSubsystemLogger("discord/voice");
1435
+ const VOICE_LOG_PREVIEW_CHARS = 500;
1436
+ function formatVoiceLogPreview(text) {
1437
+ const oneLine = text.replace(/\s+/g, " ").trim();
1438
+ if (oneLine.length <= VOICE_LOG_PREVIEW_CHARS) return oneLine;
1439
+ return `${oneLine.slice(0, VOICE_LOG_PREVIEW_CHARS)}...`;
1440
+ }
1441
+ function isVoiceConnectionDestroyed(connection, voiceSdk) {
1442
+ return connection.state.status === voiceSdk.VoiceConnectionStatus.Destroyed;
1443
+ }
1444
+ function destroyVoiceConnectionSafely(params) {
1445
+ if (isVoiceConnectionDestroyed(params.connection, params.voiceSdk)) {
1446
+ logVoiceVerbose(`destroy skipped: ${params.reason}; connection already destroyed`);
1447
+ return;
1448
+ }
1449
+ try {
1450
+ params.connection.destroy();
1451
+ } catch (err) {
1452
+ const message = formatErrorMessage(err);
1453
+ if (message.includes("already been destroyed")) {
1454
+ logVoiceVerbose(`destroy skipped: ${params.reason}; ${message}`);
1455
+ return;
1456
+ }
1457
+ logger.warn(`discord voice: destroy failed: ${params.reason}: ${message}`);
1458
+ }
1459
+ }
1460
+ function startAutoJoin(manager) {
1461
+ manager.autoJoin().catch((err) => logger.warn(`discord voice: autoJoin failed: ${formatErrorMessage(err)}`));
1462
+ }
1463
+ function resolveDiscordVoiceAgentRoute(params) {
1464
+ const voiceRoute = resolveAgentRoute({
1465
+ cfg: params.cfg,
1466
+ channel: "discord",
1467
+ accountId: params.accountId,
1468
+ guildId: params.guildId,
1469
+ peer: {
1470
+ kind: "channel",
1471
+ id: params.sessionChannelId
1472
+ }
1473
+ });
1474
+ const agentSession = params.voiceConfig?.agentSession;
1475
+ if (agentSession?.mode !== "target") return {
1476
+ route: voiceRoute,
1477
+ voiceRoute,
1478
+ agentSessionMode: "voice",
1479
+ agentSessionTarget: void 0
1480
+ };
1481
+ const target = agentSession.target?.trim();
1482
+ if (!target) throw new Error("channels.discord.voice.agentSession.target is required when mode is \"target\"");
1483
+ const parsed = parseDiscordTarget(target, { defaultKind: "channel" });
1484
+ if (!parsed) throw new Error(`Invalid Discord voice agent session target "${target}"`);
1485
+ return {
1486
+ route: resolveAgentRoute({
1487
+ cfg: params.cfg,
1488
+ channel: "discord",
1489
+ accountId: params.accountId,
1490
+ guildId: params.guildId,
1491
+ peer: {
1492
+ kind: parsed.kind === "user" ? "direct" : "channel",
1493
+ id: parsed.id
1494
+ }
1495
+ }),
1496
+ voiceRoute,
1497
+ agentSessionMode: "target",
1498
+ agentSessionTarget: parsed.normalized
1499
+ };
1500
+ }
1501
+ var DiscordVoiceManager$1 = class {
1502
+ constructor(params) {
1503
+ this.params = params;
1504
+ this.sessions = /* @__PURE__ */ new Map();
1505
+ this.autoJoinTask = null;
1506
+ this.botUserId = params.botUserId;
1507
+ this.voiceEnabled = resolveDiscordVoiceEnabled(params.discordConfig.voice);
1508
+ this.ownerAllowFrom = resolveDiscordAccountAllowFrom({
1509
+ cfg: params.cfg,
1510
+ accountId: params.accountId
1511
+ }) ?? params.discordConfig.allowFrom ?? params.discordConfig.dm?.allowFrom ?? [];
1512
+ this.speakerContext = new DiscordVoiceSpeakerContextResolver({
1513
+ client: params.client,
1514
+ ownerAllowFrom: this.ownerAllowFrom
1515
+ });
1516
+ }
1517
+ setBotUserId(id) {
1518
+ if (id) this.botUserId = id;
1519
+ }
1520
+ isEnabled() {
1521
+ return this.voiceEnabled;
1522
+ }
1523
+ async autoJoin() {
1524
+ if (!this.voiceEnabled) return;
1525
+ if (this.autoJoinTask) return this.autoJoinTask;
1526
+ this.autoJoinTask = (async () => {
1527
+ const entries = this.params.discordConfig.voice?.autoJoin ?? [];
1528
+ const entriesByGuild = /* @__PURE__ */ new Map();
1529
+ const duplicateGuilds = /* @__PURE__ */ new Set();
1530
+ for (const entry of entries) {
1531
+ const guildId = entry.guildId.trim();
1532
+ const channelId = entry.channelId.trim();
1533
+ if (!guildId || !channelId) continue;
1534
+ if (entriesByGuild.has(guildId)) duplicateGuilds.add(guildId);
1535
+ entriesByGuild.set(guildId, {
1536
+ guildId,
1537
+ channelId
1538
+ });
1539
+ }
1540
+ logVoiceVerbose(`autoJoin: ${entries.length} entries, ${entriesByGuild.size} guilds`);
1541
+ for (const guildId of duplicateGuilds) {
1542
+ const selected = entriesByGuild.get(guildId);
1543
+ if (selected) logger.warn(`discord voice: autoJoin has multiple entries for guild ${guildId}; using channel ${selected.channelId}`);
1544
+ }
1545
+ for (const entry of entriesByGuild.values()) {
1546
+ logVoiceVerbose(`autoJoin: joining guild ${entry.guildId} channel ${entry.channelId}`);
1547
+ await this.join({
1548
+ guildId: entry.guildId,
1549
+ channelId: entry.channelId
1550
+ });
1551
+ }
1552
+ })().finally(() => {
1553
+ this.autoJoinTask = null;
1554
+ });
1555
+ return this.autoJoinTask;
1556
+ }
1557
+ status() {
1558
+ return Array.from(this.sessions.values()).map((session) => ({
1559
+ ok: true,
1560
+ message: `connected: guild ${session.guildId} channel ${session.channelId}`,
1561
+ guildId: session.guildId,
1562
+ channelId: session.channelId
1563
+ }));
1564
+ }
1565
+ async join(params) {
1566
+ if (!this.voiceEnabled) return {
1567
+ ok: false,
1568
+ message: "Discord voice is disabled (channels.discord.voice.enabled)."
1569
+ };
1570
+ const guildId = params.guildId.trim();
1571
+ const channelId = params.channelId.trim();
1572
+ if (!guildId || !channelId) return {
1573
+ ok: false,
1574
+ message: "Missing guildId or channelId."
1575
+ };
1576
+ logVoiceVerbose(`join requested: guild ${guildId} channel ${channelId}`);
1577
+ const existing = this.sessions.get(guildId);
1578
+ if (existing && existing.channelId === channelId) {
1579
+ logVoiceVerbose(`join: already connected to guild ${guildId} channel ${channelId}`);
1580
+ return {
1581
+ ok: true,
1582
+ message: `Already connected to ${formatMention({ channelId })}.`,
1583
+ guildId,
1584
+ channelId
1585
+ };
1586
+ }
1587
+ if (existing) {
1588
+ logVoiceVerbose(`join: replacing existing session for guild ${guildId}`);
1589
+ await this.leave({ guildId });
1590
+ }
1591
+ const channelInfo = await this.params.client.fetchChannel(channelId).catch(() => null);
1592
+ if (!channelInfo || "type" in channelInfo && !isVoiceChannel(channelInfo.type)) return {
1593
+ ok: false,
1594
+ message: `Channel ${channelId} is not a voice channel.`
1595
+ };
1596
+ const channelGuildId = "guildId" in channelInfo ? channelInfo.guildId : void 0;
1597
+ if (channelGuildId && channelGuildId !== guildId) return {
1598
+ ok: false,
1599
+ message: "Voice channel is not in this guild."
1600
+ };
1601
+ const voicePlugin = this.params.client.getPlugin("voice");
1602
+ if (!voicePlugin) return {
1603
+ ok: false,
1604
+ message: "Discord voice plugin is not available."
1605
+ };
1606
+ const voiceConfig = this.params.discordConfig.voice;
1607
+ const voiceMode = resolveDiscordVoiceMode(voiceConfig);
1608
+ const adapterCreator = voicePlugin.getGatewayAdapterCreator(guildId);
1609
+ const daveEncryption = voiceConfig?.daveEncryption;
1610
+ const decryptionFailureTolerance = voiceConfig?.decryptionFailureTolerance;
1611
+ const connectReadyTimeoutMs = resolveVoiceTimeoutMs(voiceConfig?.connectTimeoutMs, VOICE_CONNECT_READY_TIMEOUT_MS);
1612
+ const reconnectGraceMs = resolveVoiceTimeoutMs(voiceConfig?.reconnectGraceMs, VOICE_RECONNECT_GRACE_MS);
1613
+ logVoiceVerbose(`join: DAVE settings encryption=${daveEncryption === false ? "off" : "on"} tolerance=${decryptionFailureTolerance ?? "default"} connectTimeout=${connectReadyTimeoutMs}ms reconnectGrace=${reconnectGraceMs}ms`);
1614
+ const voiceSdk = loadDiscordVoiceSdk();
1615
+ const existingEntry = this.sessions.get(guildId);
1616
+ if (existingEntry) {
1617
+ existingEntry.stop();
1618
+ this.sessions.delete(guildId);
1619
+ }
1620
+ const staleConnection = voiceSdk.getVoiceConnection(guildId);
1621
+ if (staleConnection) destroyVoiceConnectionSafely({
1622
+ connection: staleConnection,
1623
+ voiceSdk,
1624
+ reason: `stale connection before join guild ${guildId}`
1625
+ });
1626
+ const connection = voiceSdk.joinVoiceChannel({
1627
+ channelId,
1628
+ guildId,
1629
+ adapterCreator,
1630
+ selfDeaf: false,
1631
+ selfMute: false,
1632
+ daveEncryption,
1633
+ decryptionFailureTolerance
1634
+ });
1635
+ try {
1636
+ await voiceSdk.entersState(connection, voiceSdk.VoiceConnectionStatus.Ready, connectReadyTimeoutMs);
1637
+ logVoiceVerbose(`join: connected to guild ${guildId} channel ${channelId}`);
1638
+ } catch (err) {
1639
+ logger.warn(`discord voice: join failed before ready: guild ${guildId} channel ${channelId} timeout=${connectReadyTimeoutMs}ms error=${formatErrorMessage(err)}`);
1640
+ destroyVoiceConnectionSafely({
1641
+ connection,
1642
+ voiceSdk,
1643
+ reason: `failed join cleanup guild ${guildId} channel ${channelId}`
1644
+ });
1645
+ return {
1646
+ ok: false,
1647
+ message: `Failed to join voice channel: ${formatErrorMessage(err)}`
1648
+ };
1649
+ }
1650
+ const sessionChannelId = channelInfo?.id ?? channelId;
1651
+ if (sessionChannelId !== channelId) logVoiceVerbose(`join: using session channel ${sessionChannelId} for voice channel ${channelId}`);
1652
+ let routeInfo;
1653
+ try {
1654
+ routeInfo = resolveDiscordVoiceAgentRoute({
1655
+ cfg: this.params.cfg,
1656
+ accountId: this.params.accountId,
1657
+ guildId,
1658
+ sessionChannelId,
1659
+ voiceConfig
1660
+ });
1661
+ } catch (err) {
1662
+ destroyVoiceConnectionSafely({
1663
+ connection,
1664
+ voiceSdk,
1665
+ reason: `voice agent session route failed guild ${guildId} channel ${channelId}`
1666
+ });
1667
+ return {
1668
+ ok: false,
1669
+ message: `Failed to resolve Discord voice agent session: ${formatErrorMessage(err)}`,
1670
+ guildId,
1671
+ channelId
1672
+ };
1673
+ }
1674
+ const { route, voiceRoute, agentSessionMode, agentSessionTarget } = routeInfo;
1675
+ logger.info(`discord voice: joining guild=${guildId} channel=${channelId} mode=${voiceMode} agent=${route.agentId} voiceSession=${voiceRoute.sessionKey} supervisorSession=${route.sessionKey} agentSessionMode=${agentSessionMode}${agentSessionTarget ? ` agentSessionTarget=${agentSessionTarget}` : ""} voiceModel=${voiceConfig?.model ?? "route-default"} realtimeProvider=${voiceConfig?.realtime?.provider ?? "auto"} realtimeModel=${voiceConfig?.realtime?.model ?? "provider-default"} realtimeVoice=${voiceConfig?.realtime?.voice ?? "provider-default"}`);
1676
+ const player = voiceSdk.createAudioPlayer();
1677
+ connection.subscribe(player);
1678
+ let speakingHandler;
1679
+ let speakingEndHandler;
1680
+ let disconnectedHandler;
1681
+ let destroyedHandler;
1682
+ let playerErrorHandler;
1683
+ let stopped = false;
1684
+ const clearSessionIfCurrent = () => {
1685
+ if (this.sessions.get(guildId)?.connection === connection) this.sessions.delete(guildId);
1686
+ };
1687
+ const stopEntry = (entry, options) => {
1688
+ if (stopped) return;
1689
+ stopped = true;
1690
+ if (speakingHandler) connection.receiver.speaking.off("start", speakingHandler);
1691
+ if (speakingEndHandler) connection.receiver.speaking.off("end", speakingEndHandler);
1692
+ stopVoiceCaptureState(entry.capture);
1693
+ if (disconnectedHandler) connection.off(voiceSdk.VoiceConnectionStatus.Disconnected, disconnectedHandler);
1694
+ if (destroyedHandler) connection.off(voiceSdk.VoiceConnectionStatus.Destroyed, destroyedHandler);
1695
+ if (playerErrorHandler) player.off("error", playerErrorHandler);
1696
+ entry.realtime?.close();
1697
+ entry.realtime = void 0;
1698
+ player.stop();
1699
+ if (options.destroyConnection) destroyVoiceConnectionSafely({
1700
+ connection,
1701
+ voiceSdk,
1702
+ reason: options.reason
1703
+ });
1704
+ };
1705
+ const entry = {
1706
+ guildId,
1707
+ guildName: channelInfo && "guild" in channelInfo && channelInfo.guild && typeof channelInfo.guild.name === "string" ? channelInfo.guild.name : void 0,
1708
+ channelId,
1709
+ channelName: channelInfo && "name" in channelInfo && typeof channelInfo.name === "string" ? channelInfo.name : void 0,
1710
+ sessionChannelId,
1711
+ voiceSessionKey: voiceRoute.sessionKey,
1712
+ route,
1713
+ connection,
1714
+ player,
1715
+ playbackQueue: Promise.resolve(),
1716
+ processingQueue: Promise.resolve(),
1717
+ capture: createVoiceCaptureState(),
1718
+ receiveRecovery: createVoiceReceiveRecoveryState(),
1719
+ stop: () => {
1720
+ stopEntry(entry, {
1721
+ destroyConnection: true,
1722
+ reason: `stop guild ${guildId} channel ${channelId}`
1723
+ });
1724
+ }
1725
+ };
1726
+ if (voiceMode !== "stt-tts") {
1727
+ entry.realtime = new DiscordRealtimeVoiceSession({
1728
+ cfg: this.params.cfg,
1729
+ discordConfig: this.params.discordConfig,
1730
+ entry,
1731
+ mode: voiceMode,
1732
+ runAgentTurn: ({ context, message, toolsAllow, userId }) => this.runDiscordRealtimeAgentTurn({
1733
+ context,
1734
+ entry,
1735
+ message,
1736
+ toolsAllow,
1737
+ userId
1738
+ })
1739
+ });
1740
+ try {
1741
+ await entry.realtime.connect();
1742
+ } catch (err) {
1743
+ entry.realtime.close();
1744
+ destroyVoiceConnectionSafely({
1745
+ connection,
1746
+ voiceSdk,
1747
+ reason: `realtime setup failed guild ${guildId} channel ${channelId}`
1748
+ });
1749
+ return {
1750
+ ok: false,
1751
+ message: `Failed to start Discord realtime voice: ${formatErrorMessage(err)}`,
1752
+ guildId,
1753
+ channelId
1754
+ };
1755
+ }
1756
+ }
1757
+ speakingHandler = (userId) => {
1758
+ this.handleSpeakingStart(entry, userId).catch((err) => {
1759
+ logger.warn(`discord voice: capture failed: ${formatErrorMessage(err)}`);
1760
+ });
1761
+ };
1762
+ speakingEndHandler = (userId) => {
1763
+ this.scheduleCaptureFinalize(entry, userId, "speaker end");
1764
+ };
1765
+ disconnectedHandler = async () => {
1766
+ try {
1767
+ logVoiceVerbose(`disconnected: attempting recovery guild ${guildId} channel ${channelId} grace=${reconnectGraceMs}ms`);
1768
+ await Promise.race([voiceSdk.entersState(connection, voiceSdk.VoiceConnectionStatus.Signalling, reconnectGraceMs), voiceSdk.entersState(connection, voiceSdk.VoiceConnectionStatus.Connecting, reconnectGraceMs)]);
1769
+ logVoiceVerbose(`disconnected: recovery started guild ${guildId} channel ${channelId}`);
1770
+ } catch (err) {
1771
+ logger.warn(`discord voice: disconnect recovery failed: guild ${guildId} channel ${channelId} timeout=${reconnectGraceMs}ms error=${formatErrorMessage(err)}; destroying connection`);
1772
+ clearSessionIfCurrent();
1773
+ stopEntry(entry, {
1774
+ destroyConnection: true,
1775
+ reason: `disconnect recovery failed guild ${guildId} channel ${channelId}`
1776
+ });
1777
+ }
1778
+ };
1779
+ destroyedHandler = () => {
1780
+ clearSessionIfCurrent();
1781
+ stopEntry(entry, {
1782
+ destroyConnection: false,
1783
+ reason: `destroyed guild ${guildId} channel ${channelId}`
1784
+ });
1785
+ };
1786
+ playerErrorHandler = (err) => {
1787
+ logger.warn(`discord voice: playback error: ${formatErrorMessage(err)}`);
1788
+ };
1789
+ this.enableDaveReceivePassthrough(entry, "post-join warmup", 30);
1790
+ connection.receiver.speaking.on("start", speakingHandler);
1791
+ connection.receiver.speaking.on("end", speakingEndHandler);
1792
+ connection.on(voiceSdk.VoiceConnectionStatus.Disconnected, disconnectedHandler);
1793
+ connection.on(voiceSdk.VoiceConnectionStatus.Destroyed, destroyedHandler);
1794
+ player.on("error", playerErrorHandler);
1795
+ this.sessions.set(guildId, entry);
1796
+ logger.info(`discord voice: joined guild=${guildId} channel=${channelId} mode=${voiceMode} agent=${route.agentId} voiceSession=${voiceRoute.sessionKey} supervisorSession=${route.sessionKey} voiceModel=${voiceConfig?.model ?? "route-default"}`);
1797
+ return {
1798
+ ok: true,
1799
+ message: `Joined ${formatMention({ channelId })}.`,
1800
+ guildId,
1801
+ channelId
1802
+ };
1803
+ }
1804
+ async leave(params) {
1805
+ const guildId = params.guildId.trim();
1806
+ logVoiceVerbose(`leave requested: guild ${guildId} channel ${params.channelId ?? "current"}`);
1807
+ const entry = this.sessions.get(guildId);
1808
+ if (!entry) return {
1809
+ ok: false,
1810
+ message: "Not connected to a voice channel."
1811
+ };
1812
+ if (params.channelId && params.channelId !== entry.channelId) return {
1813
+ ok: false,
1814
+ message: "Not connected to that voice channel."
1815
+ };
1816
+ entry.stop();
1817
+ this.sessions.delete(guildId);
1818
+ logVoiceVerbose(`leave: disconnected from guild ${guildId} channel ${entry.channelId}`);
1819
+ return {
1820
+ ok: true,
1821
+ message: `Left ${formatMention({ channelId: entry.channelId })}.`,
1822
+ guildId,
1823
+ channelId: entry.channelId
1824
+ };
1825
+ }
1826
+ async destroy() {
1827
+ for (const entry of this.sessions.values()) entry.stop();
1828
+ this.sessions.clear();
1829
+ }
1830
+ enqueueProcessing(entry, task) {
1831
+ entry.processingQueue = entry.processingQueue.then(task).catch((err) => logger.warn(`discord voice: processing failed: ${formatErrorMessage(err)}`));
1832
+ }
1833
+ enqueuePlayback(entry, task) {
1834
+ entry.playbackQueue = entry.playbackQueue.then(task).catch((err) => logger.warn(`discord voice: playback failed: ${formatErrorMessage(err)}`));
1835
+ }
1836
+ clearCaptureFinalizeTimer(entry, userId, generation) {
1837
+ return clearVoiceCaptureFinalizeTimer(entry.capture, userId, generation);
1838
+ }
1839
+ scheduleCaptureFinalize(entry, userId, reason) {
1840
+ const graceMs = resolveVoiceTimeoutMs(this.params.discordConfig.voice?.captureSilenceGraceMs, CAPTURE_FINALIZE_GRACE_MS);
1841
+ scheduleVoiceCaptureFinalize({
1842
+ state: entry.capture,
1843
+ userId,
1844
+ delayMs: graceMs,
1845
+ onFinalize: () => {
1846
+ logVoiceVerbose(`capture finalize: guild ${entry.guildId} channel ${entry.channelId} user ${userId} reason=${reason} grace=${graceMs}ms`);
1847
+ }
1848
+ });
1849
+ }
1850
+ async handleSpeakingStart(entry, userId) {
1851
+ if (!userId) return;
1852
+ if (this.botUserId && userId === this.botUserId) return;
1853
+ if (isVoiceCaptureActive(entry.capture, userId)) {
1854
+ const activeCapture = getActiveVoiceCapture(entry.capture, userId);
1855
+ const extended = activeCapture ? this.clearCaptureFinalizeTimer(entry, userId, activeCapture.generation) : false;
1856
+ logVoiceVerbose(`capture start ignored (already active): guild ${entry.guildId} channel ${entry.channelId} user ${userId}${extended ? " (finalize canceled)" : ""}`);
1857
+ return;
1858
+ }
1859
+ logVoiceVerbose(`capture start: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
1860
+ const voiceSdk = loadDiscordVoiceSdk();
1861
+ const voiceMode = resolveDiscordVoiceMode(this.params.discordConfig.voice);
1862
+ const realtime = entry.realtime && isDiscordRealtimeVoiceMode(voiceMode) ? entry.realtime : void 0;
1863
+ if (entry.player.state.status === voiceSdk.AudioPlayerStatus.Playing && !realtime) {
1864
+ logVoiceVerbose(`capture ignored during playback: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
1865
+ return;
1866
+ }
1867
+ const realtimeIngress = realtime ? await this.resolveDiscordVoiceIngressContext(entry, userId) : void 0;
1868
+ if (realtime && !realtimeIngress) {
1869
+ logVoiceVerbose(`realtime capture unauthorized: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
1870
+ return;
1871
+ }
1872
+ if (entry.player.state.status === voiceSdk.AudioPlayerStatus.Playing && realtime) {
1873
+ if (!realtime.isBargeInEnabled()) {
1874
+ logger.info(`discord voice: realtime capture ignored during playback (barge-in disabled): guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
1875
+ return;
1876
+ }
1877
+ logVoiceVerbose(`realtime barge-in: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
1878
+ logger.info(`discord voice: realtime barge-in detected source=speaker-start guild=${entry.guildId} channel=${entry.channelId} user=${userId} playerStatus=${entry.player.state.status}`);
1879
+ realtime.handleBargeIn("speaker-start");
1880
+ }
1881
+ this.enableDaveReceivePassthrough(entry, `speaker ${userId} start`, 15);
1882
+ const stream = entry.connection.receiver.subscribe(userId, { end: { behavior: voiceSdk.EndBehaviorType.Manual } });
1883
+ const generation = beginVoiceCapture(entry.capture, userId, stream);
1884
+ let streamAborted = false;
1885
+ stream.on("error", (err) => {
1886
+ streamAborted = analyzeVoiceReceiveError(err).isAbortLike;
1887
+ this.handleReceiveError(entry, err);
1888
+ });
1889
+ try {
1890
+ if (realtime && realtimeIngress) {
1891
+ const turn = realtime.beginSpeakerTurn(realtimeIngress, userId);
1892
+ try {
1893
+ await this.processRealtimeAudioCapture({
1894
+ entry,
1895
+ stream,
1896
+ turn
1897
+ });
1898
+ } finally {
1899
+ turn.close();
1900
+ }
1901
+ return;
1902
+ }
1903
+ const pcm = await decodeOpusStream(stream, {
1904
+ onVerbose: logVoiceVerbose,
1905
+ onWarn: (message) => logger.warn(message)
1906
+ });
1907
+ if (pcm.length === 0) {
1908
+ logVoiceVerbose(`capture empty: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
1909
+ return;
1910
+ }
1911
+ this.resetDecryptFailureState(entry);
1912
+ const { path: wavPath, durationSeconds } = await writeVoiceWavFile(pcm);
1913
+ if (durationSeconds < (streamAborted ? .2 : .35)) {
1914
+ logVoiceVerbose(`capture too short (${durationSeconds.toFixed(2)}s): guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
1915
+ return;
1916
+ }
1917
+ logVoiceVerbose(`capture ready (${durationSeconds.toFixed(2)}s): guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
1918
+ this.enqueueProcessing(entry, async () => {
1919
+ await this.processSegment({
1920
+ entry,
1921
+ wavPath,
1922
+ userId,
1923
+ durationSeconds
1924
+ });
1925
+ });
1926
+ } finally {
1927
+ finishVoiceCapture(entry.capture, userId, generation);
1928
+ }
1929
+ }
1930
+ async processRealtimeAudioCapture(params) {
1931
+ const { entry, stream, turn } = params;
1932
+ let resetReceiveRecovery = false;
1933
+ await decodeOpusStreamChunks(stream, {
1934
+ onChunk: (pcm) => {
1935
+ if (!resetReceiveRecovery && pcm.length > 0) {
1936
+ resetReceiveRecovery = true;
1937
+ this.resetDecryptFailureState(entry);
1938
+ }
1939
+ turn.sendInputAudio(pcm);
1940
+ },
1941
+ onVerbose: logVoiceVerbose,
1942
+ onWarn: (message) => logger.warn(message)
1943
+ });
1944
+ }
1945
+ async resolveDiscordVoiceIngressContext(entry, userId) {
1946
+ return await resolveDiscordVoiceIngressContext({
1947
+ entry,
1948
+ userId,
1949
+ cfg: this.params.cfg,
1950
+ discordConfig: this.params.discordConfig,
1951
+ ownerAllowFrom: this.ownerAllowFrom,
1952
+ fetchGuildName: async (guildId) => {
1953
+ const guild = await this.params.client.fetchGuild(guildId).catch(() => null);
1954
+ return guild && typeof guild.name === "string" && guild.name.trim() ? guild.name : void 0;
1955
+ },
1956
+ speakerContext: this.speakerContext
1957
+ });
1958
+ }
1959
+ async runDiscordRealtimeAgentTurn(params) {
1960
+ const { context, entry, message, toolsAllow, userId } = params;
1961
+ logger.info(`discord voice: agent turn start guild=${entry.guildId} channel=${entry.channelId} voiceSession=${entry.voiceSessionKey} supervisorSession=${entry.route.sessionKey} agent=${entry.route.agentId} user=${userId} speaker=${context.speakerLabel} owner=${context.senderIsOwner} model=${this.params.discordConfig.voice?.model ?? "route-default"} message=${formatVoiceLogPreview(message)}`);
1962
+ const turn = await runDiscordVoiceAgentTurn({
1963
+ entry,
1964
+ userId,
1965
+ message,
1966
+ cfg: this.params.cfg,
1967
+ discordConfig: this.params.discordConfig,
1968
+ runtime: this.params.runtime,
1969
+ context,
1970
+ toolsAllow,
1971
+ ownerAllowFrom: this.ownerAllowFrom,
1972
+ fetchGuildName: async (guildId) => {
1973
+ const guild = await this.params.client.fetchGuild(guildId).catch(() => null);
1974
+ return guild && typeof guild.name === "string" && guild.name.trim() ? guild.name : void 0;
1975
+ },
1976
+ speakerContext: this.speakerContext
1977
+ });
1978
+ if (!turn) {
1979
+ logVoiceVerbose(`realtime agent unauthorized: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
1980
+ return "";
1981
+ }
1982
+ logger.info(`discord voice: agent turn answer (${turn.text.length} chars) guild=${entry.guildId} channel=${entry.channelId} voiceSession=${entry.voiceSessionKey} supervisorSession=${entry.route.sessionKey} agent=${entry.route.agentId}: ${formatVoiceLogPreview(turn.text)}`);
1983
+ return turn.text;
1984
+ }
1985
+ async processSegment(params) {
1986
+ await processDiscordVoiceSegment({
1987
+ ...params,
1988
+ cfg: this.params.cfg,
1989
+ discordConfig: this.params.discordConfig,
1990
+ ownerAllowFrom: this.ownerAllowFrom,
1991
+ runtime: this.params.runtime,
1992
+ speakerContext: this.speakerContext,
1993
+ fetchGuildName: async (guildId) => {
1994
+ const guild = await this.params.client.fetchGuild(guildId).catch(() => null);
1995
+ return guild && typeof guild.name === "string" && guild.name.trim() ? guild.name : void 0;
1996
+ },
1997
+ enqueuePlayback: (entry, task) => {
1998
+ this.enqueuePlayback(entry, task);
1999
+ }
2000
+ });
2001
+ }
2002
+ handleReceiveError(entry, err) {
2003
+ const analysis = analyzeVoiceReceiveError(err);
2004
+ if (analysis.isAbortLike && !analysis.countsAsDecryptFailure) {
2005
+ logVoiceVerbose(`receive stream ended: ${analysis.message}`);
2006
+ return;
2007
+ }
2008
+ logger.warn(`discord voice: receive error: ${analysis.message}`);
2009
+ if (analysis.shouldAttemptPassthrough) this.enableDaveReceivePassthrough(entry, "receive decrypt error", 15);
2010
+ if (!analysis.countsAsDecryptFailure) return;
2011
+ const decryptFailure = noteVoiceDecryptFailure(entry.receiveRecovery);
2012
+ if (decryptFailure.firstFailure) logger.warn("discord voice: DAVE decrypt failures detected; voice receive may be unstable (upstream: discordjs/discord.js#11419)");
2013
+ if (!decryptFailure.shouldRecover) return;
2014
+ this.recoverFromDecryptFailures(entry).catch((recoverErr) => logger.warn(`discord voice: decrypt recovery failed: ${formatErrorMessage(recoverErr)}`)).finally(() => {
2015
+ finishVoiceDecryptRecovery(entry.receiveRecovery);
2016
+ });
2017
+ }
2018
+ enableDaveReceivePassthrough(entry, reason, expirySeconds) {
2019
+ const voiceSdk = loadDiscordVoiceSdk();
2020
+ return enableDaveReceivePassthrough({
2021
+ target: {
2022
+ guildId: entry.guildId,
2023
+ channelId: entry.channelId,
2024
+ connection: entry.connection
2025
+ },
2026
+ sdk: {
2027
+ VoiceConnectionStatus: { Ready: voiceSdk.VoiceConnectionStatus.Ready },
2028
+ NetworkingStatusCode: {
2029
+ Ready: voiceSdk.NetworkingStatusCode.Ready,
2030
+ Resuming: voiceSdk.NetworkingStatusCode.Resuming
2031
+ }
2032
+ },
2033
+ reason,
2034
+ expirySeconds,
2035
+ onVerbose: logVoiceVerbose,
2036
+ onWarn: (message) => logger.warn(message)
2037
+ });
2038
+ }
2039
+ resetDecryptFailureState(entry) {
2040
+ resetVoiceReceiveRecoveryState(entry.receiveRecovery);
2041
+ }
2042
+ async recoverFromDecryptFailures(entry) {
2043
+ const active = this.sessions.get(entry.guildId);
2044
+ if (!active || active.connection !== entry.connection) return;
2045
+ logger.warn(`discord voice: repeated decrypt failures; attempting rejoin for guild ${entry.guildId} channel ${entry.channelId}`);
2046
+ const leaveResult = await this.leave({ guildId: entry.guildId });
2047
+ if (!leaveResult.ok) {
2048
+ logger.warn(`discord voice: decrypt recovery leave failed: ${leaveResult.message}`);
2049
+ return;
2050
+ }
2051
+ const result = await this.join({
2052
+ guildId: entry.guildId,
2053
+ channelId: entry.channelId
2054
+ });
2055
+ if (!result.ok) logger.warn(`discord voice: rejoin after decrypt failures failed: ${result.message}`);
2056
+ }
2057
+ };
2058
+ var DiscordVoiceReadyListener$1 = class extends ReadyListener {
2059
+ constructor(manager) {
2060
+ super();
2061
+ this.manager = manager;
2062
+ }
2063
+ async handle(_data, _client) {
2064
+ startAutoJoin(this.manager);
2065
+ }
2066
+ };
2067
+ var DiscordVoiceResumedListener$1 = class extends ResumedListener {
2068
+ constructor(manager) {
2069
+ super();
2070
+ this.manager = manager;
2071
+ }
2072
+ async handle(_data, _client) {
2073
+ startAutoJoin(this.manager);
2074
+ }
2075
+ };
2076
+ //#endregion
2077
+ //#region extensions/discord/src/voice/manager.runtime.ts
2078
+ var DiscordVoiceManager = class extends DiscordVoiceManager$1 {};
2079
+ var DiscordVoiceReadyListener = class extends DiscordVoiceReadyListener$1 {};
2080
+ var DiscordVoiceResumedListener = class extends DiscordVoiceResumedListener$1 {};
2081
+ //#endregion
2082
+ export { DiscordVoiceManager, DiscordVoiceReadyListener, DiscordVoiceResumedListener };