@openclaw/discord 2026.5.7 → 2026.5.9-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/dist/{access-CHY9FK3X.js → access-Lk7H_e7y.js} +1 -1
  2. package/dist/action-runtime-api.js +1 -1
  3. package/dist/{allow-list-ek-1hMKN.js → allow-list-Dtho5Hww.js} +1 -1
  4. package/dist/api.js +19 -17
  5. package/dist/{approval-handler.runtime-BBZRYAGs.js → approval-handler.runtime-CqvKEAqO.js} +3 -3
  6. package/dist/{approval-native-CBdZsAR7.js → approval-native-W5mtrjYy.js} +2 -2
  7. package/dist/{approval-shared-Ck6TxKgo.js → approval-shared-D0FVj8b_.js} +1 -1
  8. package/dist/{audit-CCJ0h49k.js → audit-CrCPCVFG.js} +3 -3
  9. package/dist/{channel-UXGa9PGc.js → channel-CRTWY5R-.js} +53 -31
  10. package/dist/{channel-actions-Br29_1nE.js → channel-actions-BRgtLo8F.js} +27 -1
  11. package/dist/{channel-actions.runtime-ChmNUig1.js → channel-actions.runtime-gosyu06g.js} +5 -5
  12. package/dist/channel-config-api.js +1 -1
  13. package/dist/channel-plugin-api.js +1 -1
  14. package/dist/{channel.setup-D_xyQu_h.js → channel.setup-BQlXR1Gt.js} +3 -3
  15. package/dist/chunk-BDgKaWaV.js +179 -0
  16. package/dist/{components-D5LnN7ZQ.js → components-BWdIitIb.js} +2 -2
  17. package/dist/{config-schema-Cc953rAs.js → config-schema-D1DUqXws.js} +46 -1
  18. package/dist/contract-api.js +7 -7
  19. package/dist/{conversation-identity-DHhS0ez3.js → conversation-identity-6hcTVj3L.js} +1 -1
  20. package/dist/directory-contract-api.js +1 -1
  21. package/dist/{directory-live-DJ0V5asB.js → directory-live-D3kbmVAu.js} +3 -3
  22. package/dist/{discord-eZlimVfW.js → discord-Du6FnKAq.js} +30 -13
  23. package/dist/{doctor-Bo-yifB3.js → doctor-Ys2-q5bc.js} +3 -3
  24. package/dist/{doctor-contract-Bso46EOQ.js → doctor-contract-DSB2zzJA.js} +1 -1
  25. package/dist/doctor-contract-api.js +1 -1
  26. package/dist/{handle-action.guild-admin-sJiQymg8.js → handle-action.guild-admin-LM6kZ6zE.js} +6 -3
  27. package/dist/{inbound-context-CRylwjg0.js → inbound-context-BobVUBqo.js} +1 -1
  28. package/dist/{manager.runtime-Cug1PoeZ.js → manager.runtime-C_cPd048.js} +802 -134
  29. package/dist/{mentions-BPZUaFk7.js → mentions-CiPUID82.js} +1 -1
  30. package/dist/{message-handler-C9Ohf-ea.js → message-handler-C2ZQV7ZQ.js} +7 -7
  31. package/dist/{message-handler.preflight-BrvazsYn.js → message-handler.preflight-B5hN7RpX.js} +15 -15
  32. package/dist/{message-handler.process-CEnzuLiN.js → message-handler.process-Dns8D7t2.js} +82 -65
  33. package/dist/{message-utils-9kaGF59d.js → message-utils-B3uf0_3D.js} +2 -2
  34. package/dist/normalize-DBcng6RL.js +58 -0
  35. package/dist/{outbound-adapter-DNsTVJfH.js → outbound-adapter-jP0OgIyW.js} +78 -17
  36. package/dist/{outbound-session-route-DK9qkPgP.js → outbound-session-route-BaJRt05p.js} +1 -1
  37. package/dist/{preflight-audio-CRmUxxuM.js → preflight-audio-6J0vFNtu.js} +1 -1
  38. package/dist/{preview-streaming-Cc_oeIPP.js → preview-streaming-CXrFDP2T.js} +1 -0
  39. package/dist/{probe-E80IMT1X.js → probe-91lU5eh8.js} +1 -1
  40. package/dist/{probe.runtime-CMgUDax3.js → probe.runtime-DV37RDCU.js} +1 -1
  41. package/dist/{provider-CuOh6z_b.js → provider-D_QVXvp8.js} +53 -50
  42. package/dist/{provider-session.runtime-CCESIHVo.js → provider-session.runtime-CcPDguh6.js} +3 -3
  43. package/dist/provider.runtime-O7G03kik.js +2 -0
  44. package/dist/{reply-delivery-D9aKHtDH.js → reply-delivery-DKTZ6HkK.js} +10 -7
  45. package/dist/{resolve-allowlist-common-_e1cWOb3.js → resolve-allowlist-common-LhuVITjh.js} +2 -2
  46. package/dist/{resolve-channels-kyuvrXJg.js → resolve-channels-DjpVRJdT.js} +3 -3
  47. package/dist/{resolve-users-CAwh4EBq.js → resolve-users-DrZYxZSX.js} +2 -2
  48. package/dist/{route-resolution-BWErj5Cn.js → route-resolution-CYRPDKY4.js} +3 -3
  49. package/dist/{runtime-D8alY00g.js → runtime-BHAwVXEa.js} +7 -7
  50. package/dist/runtime-api.actions.js +2 -2
  51. package/dist/runtime-api.js +25 -25
  52. package/dist/runtime-api.lookup.js +6 -6
  53. package/dist/runtime-api.monitor--iuvLjPX.js +6 -0
  54. package/dist/runtime-api.monitor.js +7 -7
  55. package/dist/runtime-api.send.js +5 -5
  56. package/dist/runtime-api.threads.js +5 -5
  57. package/dist/runtime-setter-api.js +1 -1
  58. package/dist/secret-contract-api.js +1 -1
  59. package/dist/{security-audit-BtRd_VhN.js → security-audit-D8IaFuCm.js} +1 -1
  60. package/dist/security-audit-contract-api.js +1 -1
  61. package/dist/{security-audit.runtime-Dm1LW9KX.js → security-audit.runtime-DO1398sV.js} +1 -1
  62. package/dist/security-contract-api.js +1 -1
  63. package/dist/{send-8S_HKJpQ.js → send-C3peGbWO.js} +83 -56
  64. package/dist/{send.components-A42c_5tQ.js → send.components-Dsk3IzS_.js} +22 -14
  65. package/dist/{send.outbound-D3tonSz8.js → send.outbound-CXyInQ3c.js} +22 -11
  66. package/dist/send.receipt-DXimpUGs.js +35 -0
  67. package/dist/{send.shared-BQGiUPvZ.js → send.shared-BydEWvYg.js} +26 -7
  68. package/dist/{sender-identity-BGUfyvOC.js → sender-identity-DVJCrDxs.js} +1 -1
  69. package/dist/session-key-api.js +1 -1
  70. package/dist/setup-plugin-api.js +1 -1
  71. package/dist/{shared-BEW4H3bj.js → shared-BWD6uc0p.js} +8 -8
  72. package/dist/{shared-interactive-KgJjCqnB.js → shared-interactive-BZCU0ZJ8.js} +2 -2
  73. package/dist/{subagent-hooks-T0LPLh4H.js → subagent-hooks-DtbWOdAK.js} +1 -1
  74. package/dist/subagent-hooks-api.js +1 -1
  75. package/dist/{system-events-_fzSG--3.js → system-events-Dsc8MFYs.js} +2 -2
  76. package/dist/target-parsing-D-H7nnh2.js +51 -0
  77. package/dist/target-resolver-D7t8FMhO.js +85 -0
  78. package/dist/targets-DzAZIwg4.js +3 -0
  79. package/dist/test-api.js +4 -4
  80. package/dist/{thread-bindings-CMpZjP50.js → thread-bindings-B90pBWJg.js} +6 -6
  81. package/dist/{thread-bindings.discord-api-CwWGoyei.js → thread-bindings.discord-api-Dvf789Je.js} +7 -7
  82. package/dist/{thread-bindings.manager-BtxfLfWf.js → thread-bindings.manager-AwZ5Vble.js} +5 -5
  83. package/dist/{thread-bindings.session-updates-jcZSiRPI.js → thread-bindings.session-updates-B_AcsXSf.js} +1 -1
  84. package/dist/{threading-BMmpA2JR.js → threading-Dku_cGS8.js} +4 -4
  85. package/dist/timeouts.js +1 -1
  86. package/dist/{typing-Bw6NKWLZ.js → typing-BIjMmOZB.js} +2 -2
  87. package/openclaw.plugin.json +222 -1
  88. package/package.json +5 -5
  89. package/dist/provider.runtime-B68g3qLv.js +0 -2
  90. package/dist/runtime-api.monitor-DzkCxeBL.js +0 -6
  91. package/dist/target-resolver-DA84_xbt.js +0 -365
  92. package/dist/targets-FwL1BPTU.js +0 -2
  93. /package/dist/{agent-components.runtime-Dof1YMSz.js → agent-components.runtime-caj0h9y4.js} +0 -0
  94. /package/dist/{api-DzNBVTto.js → api-DaTujGTe.js} +0 -0
  95. /package/dist/{audit-core-CejGc3hO.js → audit-core-CIvZE94W.js} +0 -0
  96. /package/dist/{channel-access-DFIQqbYm.js → channel-access-CXAdcM95.js} +0 -0
  97. /package/dist/{directory-cache-D93eSrpB.js → directory-cache-D3l2v2_L.js} +0 -0
  98. /package/dist/{directory-config-DoETeOOx.js → directory-config-S2852QcC.js} +0 -0
  99. /package/dist/{doctor-shared-Cqvfgv9m.js → doctor-shared-DtOaqWzy.js} +0 -0
  100. /package/dist/{format-D8TsaXxW.js → format-XDPCvGK4.js} +0 -0
  101. /package/dist/{pluralkit-OFss_pIy.js → pluralkit-EXKKni07.js} +0 -0
  102. /package/dist/{preflight-audio.runtime-DPVbpZid.js → preflight-audio.runtime-3hiRefuj.js} +0 -0
  103. /package/dist/{runtime-K9RT6Egn.js → runtime-EoVRXYxX.js} +0 -0
  104. /package/dist/{secret-config-contract-5S9U9pjx.js → secret-config-contract-BA9jVaKx.js} +0 -0
  105. /package/dist/{security-contract-BE8rsdPq.js → security-contract-B7i5uqn6.js} +0 -0
  106. /package/dist/{security-doctor-DiilN216.js → security-doctor-2SGSPibT.js} +0 -0
  107. /package/dist/{session-contract-CuW9Nlxg.js → session-contract-DwjfF970.js} +0 -0
  108. /package/dist/{session-key-normalization-B5La-jFM.js → session-key-normalization-DvI2OlyS.js} +0 -0
  109. /package/dist/{thread-bindings.state-WU4duXKY.js → thread-bindings.state-CBwtFsbB.js} +0 -0
  110. /package/dist/{timeouts-CdsmBWWs.js → timeouts-BxAzVpSG.js} +0 -0
@@ -1,21 +1,23 @@
1
1
  import { c as resolveDiscordAccountAllowFrom } from "./accounts-CaHGiVB4.js";
2
- import { c as ResumedListener, s as ReadyListener, t as discord_exports } from "./discord-eZlimVfW.js";
3
- import { n as formatDiscordUserTag } from "./format-D8TsaXxW.js";
4
- import { a as normalizeDiscordSlug, m as resolveDiscordOwnerAccess } from "./allow-list-ek-1hMKN.js";
5
- import { t as formatMention } from "./mentions-BPZUaFk7.js";
6
- import { t as getDiscordRuntime } from "./runtime-K9RT6Egn.js";
7
- import { t as buildDiscordGroupSystemPrompt } from "./inbound-context-CRylwjg0.js";
8
- import { n as resolveDiscordVoiceEnabled, t as authorizeDiscordVoiceIngress } from "./access-CHY9FK3X.js";
2
+ import { t as parseDiscordTarget } from "./target-parsing-D-H7nnh2.js";
3
+ import { c as ResumedListener, s as ReadyListener, t as discord_exports } from "./discord-Du6FnKAq.js";
4
+ import { n as formatDiscordUserTag } from "./format-XDPCvGK4.js";
5
+ import { a as normalizeDiscordSlug, m as resolveDiscordOwnerAccess } from "./allow-list-Dtho5Hww.js";
6
+ import { t as formatMention } from "./mentions-CiPUID82.js";
7
+ import { t as getDiscordRuntime } from "./runtime-EoVRXYxX.js";
8
+ import { t as buildDiscordGroupSystemPrompt } from "./inbound-context-BobVUBqo.js";
9
+ import { n as resolveDiscordVoiceEnabled, t as authorizeDiscordVoiceIngress } from "./access-Lk7H_e7y.js";
9
10
  import { createRequire } from "node:module";
10
- import { normalizeOptionalString, stripInlineDirectiveTagsForDisplay } from "openclaw/plugin-sdk/text-runtime";
11
+ import { escapeRegExp, normalizeOptionalString, stripInlineDirectiveTagsForDisplay } from "openclaw/plugin-sdk/text-runtime";
11
12
  import { resolveAgentRoute } from "openclaw/plugin-sdk/routing";
12
- import { randomUUID } from "node:crypto";
13
- import fs from "node:fs/promises";
14
13
  import path from "node:path";
15
14
  import { createSubsystemLogger, logVerbose, shouldLogVerbose } from "openclaw/plugin-sdk/runtime-env";
16
- import { resolvePreferredOpenClawTmpDir } from "openclaw/plugin-sdk/temp-path";
15
+ import fs from "node:fs/promises";
16
+ import { resolvePreferredOpenClawTmpDir, tempWorkspace } from "openclaw/plugin-sdk/temp-path";
17
17
  import { formatErrorMessage } from "openclaw/plugin-sdk/ssrf-runtime";
18
18
  import { agentCommandFromIngress, getTtsProvider, resolveAgentDir, resolveTtsConfig, resolveTtsPrefsPath } from "openclaw/plugin-sdk/agent-runtime";
19
+ import { REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME, REALTIME_VOICE_AUDIO_FORMAT_PCM16_24KHZ, buildRealtimeVoiceAgentConsultChatMessage, buildRealtimeVoiceAgentConsultPolicyInstructions, buildRealtimeVoiceAgentConsultWorkingResponse, createRealtimeVoiceAgentTalkbackQueue, createRealtimeVoiceBridgeSession, resamplePcm, resolveConfiguredRealtimeVoiceProvider, resolveRealtimeVoiceAgentConsultToolPolicy, resolveRealtimeVoiceAgentConsultTools, resolveRealtimeVoiceAgentConsultToolsAllow } from "openclaw/plugin-sdk/realtime-voice";
20
+ import { PassThrough, Readable } from "node:stream";
19
21
  import { parseTtsDirectives } from "openclaw/plugin-sdk/speech";
20
22
  //#region extensions/discord/src/voice/audio.ts
21
23
  const require = createRequire(import.meta.url);
@@ -98,17 +100,58 @@ async function decodeOpusStream(stream, params) {
98
100
  }
99
101
  return chunks.length > 0 ? Buffer.concat(chunks) : Buffer.alloc(0);
100
102
  }
103
+ async function decodeOpusStreamChunks(stream, params) {
104
+ const selected = createOpusDecoder({ onWarn: params.onWarn });
105
+ if (!selected) return;
106
+ params.onVerbose(`opus decoder: ${selected.name}`);
107
+ try {
108
+ for await (const chunk of stream) {
109
+ if (!chunk || !(chunk instanceof Buffer) || chunk.length === 0) continue;
110
+ const decoded = selected.decoder.decode(chunk);
111
+ if (decoded && decoded.length > 0) params.onChunk(Buffer.from(decoded));
112
+ }
113
+ } catch (err) {
114
+ if (shouldLogVerbose()) logVerbose(`discord voice: opus decode failed: ${formatErrorMessage(err)}`);
115
+ }
116
+ }
117
+ function convertDiscordPcm48kStereoToRealtimePcm24kMono(pcm) {
118
+ const frameCount = Math.floor(pcm.length / 4);
119
+ if (frameCount === 0) return Buffer.alloc(0);
120
+ const mono48k = Buffer.alloc(frameCount * 2);
121
+ for (let frame = 0; frame < frameCount; frame += 1) {
122
+ const offset = frame * 4;
123
+ const left = pcm.readInt16LE(offset);
124
+ const right = pcm.readInt16LE(offset + 2);
125
+ mono48k.writeInt16LE(Math.round((left + right) / 2), frame * 2);
126
+ }
127
+ return resamplePcm(mono48k, SAMPLE_RATE, 24e3);
128
+ }
129
+ function convertRealtimePcm24kMonoToDiscordPcm48kStereo(pcm) {
130
+ const mono48k = resamplePcm(pcm, 24e3, SAMPLE_RATE);
131
+ const sampleCount = Math.floor(mono48k.length / 2);
132
+ if (sampleCount === 0) return Buffer.alloc(0);
133
+ const stereo = Buffer.alloc(sampleCount * 4);
134
+ for (let sampleIndex = 0; sampleIndex < sampleCount; sampleIndex += 1) {
135
+ const sample = mono48k.readInt16LE(sampleIndex * 2);
136
+ const offset = sampleIndex * 4;
137
+ stereo.writeInt16LE(sample, offset);
138
+ stereo.writeInt16LE(sample, offset + 2);
139
+ }
140
+ return stereo;
141
+ }
101
142
  function estimateDurationSeconds(pcm) {
102
143
  const bytesPerSample = BIT_DEPTH / 8 * CHANNELS;
103
144
  if (bytesPerSample <= 0) return 0;
104
145
  return pcm.length / (bytesPerSample * SAMPLE_RATE);
105
146
  }
106
147
  async function writeVoiceWavFile(pcm) {
107
- const tempDir = await fs.mkdtemp(path.join(resolvePreferredOpenClawTmpDir(), "discord-voice-"));
108
- const filePath = path.join(tempDir, `segment-${randomUUID()}.wav`);
148
+ const workspace = await tempWorkspace({
149
+ rootDir: resolvePreferredOpenClawTmpDir(),
150
+ prefix: "discord-voice-"
151
+ });
109
152
  const wav = buildWavBuffer(pcm);
110
- await fs.writeFile(filePath, wav);
111
- scheduleTempCleanup(tempDir);
153
+ const filePath = await workspace.write("segment.wav", wav);
154
+ scheduleTempCleanup(workspace.dir);
112
155
  return {
113
156
  path: filePath,
114
157
  durationSeconds: estimateDurationSeconds(pcm)
@@ -193,6 +236,447 @@ function scheduleVoiceCaptureFinalize(params) {
193
236
  });
194
237
  return true;
195
238
  }
239
+ async function resolveDiscordVoiceIngressContext(params) {
240
+ const { entry, userId } = params;
241
+ if (!entry.guildName) entry.guildName = await params.fetchGuildName(entry.guildId);
242
+ const speaker = await params.speakerContext.resolveContext(entry.guildId, userId);
243
+ const speakerIdentity = await params.speakerContext.resolveIdentity(entry.guildId, userId);
244
+ const access = await authorizeDiscordVoiceIngress({
245
+ cfg: params.cfg,
246
+ discordConfig: params.discordConfig,
247
+ guildName: entry.guildName,
248
+ guildId: entry.guildId,
249
+ channelId: entry.channelId,
250
+ channelName: entry.channelName,
251
+ channelSlug: entry.channelName ? normalizeDiscordSlug(entry.channelName) : "",
252
+ channelLabel: formatMention({ channelId: entry.channelId }),
253
+ memberRoleIds: speakerIdentity.memberRoleIds,
254
+ ownerAllowFrom: params.ownerAllowFrom,
255
+ sender: {
256
+ id: speakerIdentity.id,
257
+ name: speakerIdentity.name,
258
+ tag: speakerIdentity.tag
259
+ }
260
+ });
261
+ if (!access.ok) return null;
262
+ return {
263
+ extraSystemPrompt: buildDiscordGroupSystemPrompt(access.channelConfig),
264
+ senderIsOwner: speaker.senderIsOwner,
265
+ speakerLabel: speaker.label
266
+ };
267
+ }
268
+ async function runDiscordVoiceAgentTurn(params) {
269
+ const context = params.context ?? await resolveDiscordVoiceIngressContext({
270
+ entry: params.entry,
271
+ userId: params.userId,
272
+ cfg: params.cfg,
273
+ discordConfig: params.discordConfig,
274
+ ownerAllowFrom: params.ownerAllowFrom,
275
+ fetchGuildName: params.fetchGuildName,
276
+ speakerContext: params.speakerContext
277
+ });
278
+ if (!context) return null;
279
+ const voiceModel = normalizeOptionalString(params.discordConfig.voice?.model);
280
+ return {
281
+ context,
282
+ text: ((await agentCommandFromIngress({
283
+ message: params.message,
284
+ sessionKey: params.entry.route.sessionKey,
285
+ agentId: params.entry.route.agentId,
286
+ messageChannel: "discord",
287
+ messageProvider: "discord-voice",
288
+ extraSystemPrompt: context.extraSystemPrompt,
289
+ senderIsOwner: context.senderIsOwner,
290
+ allowModelOverride: Boolean(voiceModel),
291
+ model: voiceModel,
292
+ toolsAllow: params.toolsAllow,
293
+ deliver: false
294
+ }, params.runtime)).payloads ?? []).map((payload) => payload.text).filter((text) => typeof text === "string" && text.trim()).join("\n").trim()
295
+ };
296
+ }
297
+ //#endregion
298
+ //#region extensions/discord/src/voice/prompt.ts
299
+ const DISCORD_VOICE_SPOKEN_OUTPUT_CONTRACT = [
300
+ "You are OpenClaw's Discord voice interface in a live voice channel.",
301
+ "Discord voice reply requirements:",
302
+ "- Return only the concise text that should be spoken aloud in the voice channel.",
303
+ "- Treat the transcript as speech-to-text from a live conversation; repair obvious transcription artifacts and ignore repeated partial fragments caused by voice buffering.",
304
+ "- If the transcript is garbled, incomplete, or missing the user's intent, ask one brief clarifying question instead of guessing.",
305
+ "- If the request needs deeper reasoning, current information, or tools, use the available tools before answering.",
306
+ "- Do not call the tts tool; Discord voice will synthesize and play the returned text.",
307
+ "- Do not reply with NO_REPLY unless no spoken response is appropriate.",
308
+ "- Keep the response brief, natural, and conversational. Prefer one to three short sentences.",
309
+ "- Avoid markdown tables, code fences, citations, and visual formatting unless the user explicitly asks for something that cannot be spoken naturally."
310
+ ].join("\n");
311
+ function formatVoiceIngressPrompt(transcript, speakerLabel) {
312
+ const cleanedTranscript = transcript.trim();
313
+ const cleanedLabel = speakerLabel?.trim();
314
+ return [DISCORD_VOICE_SPOKEN_OUTPUT_CONTRACT, cleanedLabel ? [`Voice transcript from speaker "${cleanedLabel}":`, cleanedTranscript].join("\n") : cleanedTranscript].join("\n\n");
315
+ }
316
+ //#endregion
317
+ //#region extensions/discord/src/voice/sdk-runtime.ts
318
+ let cachedDiscordVoiceSdk = null;
319
+ function loadDiscordVoiceSdk() {
320
+ if (cachedDiscordVoiceSdk) return cachedDiscordVoiceSdk;
321
+ cachedDiscordVoiceSdk = createRequire(import.meta.url)("@discordjs/voice");
322
+ return cachedDiscordVoiceSdk;
323
+ }
324
+ const CAPTURE_FINALIZE_GRACE_MS = 2500;
325
+ const VOICE_CONNECT_READY_TIMEOUT_MS = 3e4;
326
+ const VOICE_RECONNECT_GRACE_MS = 15e3;
327
+ const PLAYBACK_READY_TIMEOUT_MS = 6e4;
328
+ const SPEAKING_READY_TIMEOUT_MS = 6e4;
329
+ function resolveVoiceTimeoutMs(value, fallbackMs) {
330
+ if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) return fallbackMs;
331
+ return Math.floor(value);
332
+ }
333
+ function logVoiceVerbose(message) {
334
+ logVerbose(`discord voice: ${message}`);
335
+ }
336
+ function isVoiceChannel(type) {
337
+ return type === discord_exports.ChannelType.GuildVoice || type === discord_exports.ChannelType.GuildStageVoice;
338
+ }
339
+ //#endregion
340
+ //#region extensions/discord/src/voice/realtime.ts
341
+ const logger$2 = createSubsystemLogger("discord/voice");
342
+ const DISCORD_REALTIME_TALKBACK_DEBOUNCE_MS = 350;
343
+ const DISCORD_REALTIME_FALLBACK_TEXT = "I hit an error while checking that. Please try again.";
344
+ const DISCORD_REALTIME_PENDING_SPEAKER_CONTEXT_LIMIT = 32;
345
+ const DISCORD_REALTIME_LOG_PREVIEW_CHARS = 500;
346
+ const DISCORD_REALTIME_DEFAULT_MIN_BARGE_IN_AUDIO_END_MS = 250;
347
+ function formatRealtimeLogPreview(text) {
348
+ const oneLine = text.replace(/\s+/g, " ").trim();
349
+ if (oneLine.length <= DISCORD_REALTIME_LOG_PREVIEW_CHARS) return oneLine;
350
+ return `${oneLine.slice(0, DISCORD_REALTIME_LOG_PREVIEW_CHARS)}...`;
351
+ }
352
+ function formatRealtimeInterruptionLog(event) {
353
+ const detail = event.detail ? ` ${event.detail}` : "";
354
+ if (event.direction === "client") {
355
+ if (event.type === "response.cancel") return `discord voice: realtime model interrupt requested ${event.direction}:${event.type}${detail}`;
356
+ if (event.type === "conversation.item.truncate.skipped") return `discord voice: realtime model interrupt ignored ${event.direction}:${event.type}${detail}`;
357
+ if (event.type === "conversation.item.truncate") return `discord voice: realtime model audio truncated ${event.direction}:${event.type}${detail}`;
358
+ }
359
+ if (event.direction === "server") {
360
+ if (event.type === "response.cancelled") return `discord voice: realtime model interrupt confirmed ${event.direction}:${event.type}${detail}`;
361
+ if (event.type === "response.done" && event.detail?.includes("status=cancelled")) return `discord voice: realtime model interrupt confirmed ${event.direction}:${event.type}${detail}`;
362
+ if (event.type === "error" && event.detail === "Cancellation failed: no active response found") return `discord voice: realtime model interrupt raced ${event.direction}:${event.type}${detail}`;
363
+ }
364
+ }
365
+ function readProviderConfigString(config, key) {
366
+ const value = config[key];
367
+ return typeof value === "string" && value.trim() ? value.trim() : void 0;
368
+ }
369
+ function readProviderConfigBoolean(config, key) {
370
+ const value = config?.[key];
371
+ return typeof value === "boolean" ? value : void 0;
372
+ }
373
+ function resolveDiscordVoiceMode(voice) {
374
+ const mode = voice?.mode;
375
+ if (mode === "stt-tts" || mode === "bidi") return mode;
376
+ return "agent-proxy";
377
+ }
378
+ function isDiscordRealtimeVoiceMode(mode) {
379
+ return mode === "agent-proxy" || mode === "bidi";
380
+ }
381
+ function isDiscordAgentProxyVoiceMode(mode) {
382
+ return mode === "agent-proxy";
383
+ }
384
+ function resolveDiscordRealtimeInterruptResponseOnInputAudio(params) {
385
+ const providerConfig = params.realtimeConfig?.providers?.[params.providerId];
386
+ return readProviderConfigBoolean(providerConfig, "interruptResponseOnInputAudio") ?? true;
387
+ }
388
+ function resolveDiscordRealtimeBargeIn(params) {
389
+ const configured = params.realtimeConfig?.bargeIn;
390
+ if (typeof configured === "boolean") return configured;
391
+ return resolveDiscordRealtimeInterruptResponseOnInputAudio(params);
392
+ }
393
+ function buildDiscordSpeakExactUserMessage(text) {
394
+ return ["Speak this exact OpenClaw answer to the Discord voice channel, without adding, removing, or rephrasing words.", `Answer: ${JSON.stringify(text)}`].join("\n");
395
+ }
396
+ var DiscordRealtimeVoiceSession = class {
397
+ constructor(params) {
398
+ this.params = params;
399
+ this.bridge = null;
400
+ this.outputStream = null;
401
+ this.stopped = false;
402
+ this.consultToolPolicy = "safe-read-only";
403
+ this.pendingSpeakerTurns = [];
404
+ this.playerIdleHandler = () => {
405
+ this.resetOutputStream();
406
+ };
407
+ this.talkback = createRealtimeVoiceAgentTalkbackQueue({
408
+ debounceMs: this.realtimeConfig?.debounceMs ?? DISCORD_REALTIME_TALKBACK_DEBOUNCE_MS,
409
+ isStopped: () => this.stopped,
410
+ logger: logger$2,
411
+ logPrefix: "[discord] realtime agent",
412
+ responseStyle: "Brief, natural spoken answer for a Discord voice channel.",
413
+ fallbackText: DISCORD_REALTIME_FALLBACK_TEXT,
414
+ consult: async ({ question, responseStyle, metadata }) => {
415
+ const context = isDiscordRealtimeSpeakerContext(metadata) ? metadata : void 0;
416
+ return { text: await this.runAgentTurn({
417
+ context,
418
+ message: formatVoiceIngressPrompt([question, responseStyle ? `Spoken style: ${responseStyle}` : void 0].filter(Boolean).join("\n\n"), context?.speakerLabel ?? "Discord voice speaker")
419
+ }) };
420
+ },
421
+ deliver: (text) => this.bridge?.sendUserMessage(buildDiscordSpeakExactUserMessage(text))
422
+ });
423
+ }
424
+ async connect() {
425
+ const resolved = resolveConfiguredRealtimeVoiceProvider({
426
+ configuredProviderId: this.realtimeConfig?.provider,
427
+ providerConfigs: buildProviderConfigs(this.realtimeConfig),
428
+ providerConfigOverrides: buildProviderConfigOverrides(this.realtimeConfig),
429
+ cfg: this.params.cfg,
430
+ defaultModel: this.realtimeConfig?.model,
431
+ noRegisteredProviderMessage: "No configured realtime voice provider registered"
432
+ });
433
+ const toolPolicy = resolveRealtimeVoiceAgentConsultToolPolicy(this.realtimeConfig?.toolPolicy, "safe-read-only");
434
+ this.consultToolPolicy = toolPolicy;
435
+ this.consultToolsAllow = resolveRealtimeVoiceAgentConsultToolsAllow(toolPolicy);
436
+ const consultPolicy = this.realtimeConfig?.consultPolicy ?? "auto";
437
+ const interruptResponseOnInputAudio = resolveDiscordRealtimeInterruptResponseOnInputAudio({
438
+ realtimeConfig: this.realtimeConfig,
439
+ providerId: resolved.provider.id
440
+ });
441
+ const instructions = buildDiscordRealtimeInstructions({
442
+ mode: this.params.mode,
443
+ instructions: this.realtimeConfig?.instructions,
444
+ toolPolicy,
445
+ consultPolicy
446
+ });
447
+ this.bridge = createRealtimeVoiceBridgeSession({
448
+ provider: resolved.provider,
449
+ providerConfig: resolved.providerConfig,
450
+ audioFormat: REALTIME_VOICE_AUDIO_FORMAT_PCM16_24KHZ,
451
+ instructions,
452
+ autoRespondToAudio: this.params.mode === "bidi",
453
+ interruptResponseOnInputAudio,
454
+ markStrategy: "ack-immediately",
455
+ tools: this.params.mode === "bidi" ? resolveRealtimeVoiceAgentConsultTools(toolPolicy) : [],
456
+ audioSink: {
457
+ isOpen: () => !this.stopped,
458
+ sendAudio: (audio) => this.sendOutputAudio(audio),
459
+ clearAudio: () => this.clearOutputAudio()
460
+ },
461
+ onTranscript: (role, text, isFinal) => {
462
+ if (isFinal && text.trim()) logger$2.info(`discord voice: realtime ${role} transcript (${text.length} chars): ${formatRealtimeLogPreview(text)}`);
463
+ if (!isFinal || role !== "user" || !isDiscordAgentProxyVoiceMode(this.params.mode)) return;
464
+ this.talkback.enqueue(text, this.consumePendingSpeakerContext());
465
+ },
466
+ onToolCall: (event, session) => this.handleToolCall(event, session),
467
+ onEvent: (event) => {
468
+ const detail = event.detail ? ` ${event.detail}` : "";
469
+ logVoiceVerbose(`realtime ${event.direction}:${event.type}${detail}`);
470
+ const interruptionLog = formatRealtimeInterruptionLog(event);
471
+ if (interruptionLog) logger$2.info(interruptionLog);
472
+ },
473
+ onError: (error) => logger$2.warn(`discord voice: realtime error: ${formatErrorMessage(error)}`),
474
+ onClose: (reason) => logVoiceVerbose(`realtime closed: ${reason}`)
475
+ });
476
+ const resolvedModel = readProviderConfigString(resolved.providerConfig, "model") ?? resolved.provider.defaultModel;
477
+ const resolvedVoice = readProviderConfigString(resolved.providerConfig, "voice");
478
+ logger$2.info(`discord voice: realtime bridge starting mode=${this.params.mode} provider=${resolved.provider.id} model=${resolvedModel ?? "default"} voice=${resolvedVoice ?? "default"} consultPolicy=${consultPolicy} toolPolicy=${toolPolicy} autoRespond=${this.params.mode === "bidi"} interruptResponse=${interruptResponseOnInputAudio} bargeIn=${resolveDiscordRealtimeBargeIn({
479
+ realtimeConfig: this.realtimeConfig,
480
+ providerId: resolved.provider.id
481
+ })} minBargeInAudioEndMs=${resolveDiscordRealtimeMinBargeInAudioEndMs(this.realtimeConfig)}`);
482
+ const voiceSdk = loadDiscordVoiceSdk();
483
+ this.params.entry.player.on(voiceSdk.AudioPlayerStatus.Idle, this.playerIdleHandler);
484
+ await this.bridge.connect();
485
+ logger$2.info(`discord voice: realtime bridge ready mode=${this.params.mode} provider=${resolved.provider.id} model=${resolvedModel ?? "default"} voice=${resolvedVoice ?? "default"}`);
486
+ }
487
+ close() {
488
+ this.stopped = true;
489
+ this.talkback.close();
490
+ this.pendingSpeakerTurns.length = 0;
491
+ this.clearOutputAudio();
492
+ this.bridge?.close();
493
+ this.bridge = null;
494
+ const voiceSdk = loadDiscordVoiceSdk();
495
+ this.params.entry.player.off(voiceSdk.AudioPlayerStatus.Idle, this.playerIdleHandler);
496
+ }
497
+ beginSpeakerTurn(context, userId) {
498
+ const turn = {
499
+ context: {
500
+ ...context,
501
+ userId
502
+ },
503
+ hasAudio: false,
504
+ interruptedPlayback: false,
505
+ closed: false
506
+ };
507
+ this.pendingSpeakerTurns.push(turn);
508
+ this.prunePendingSpeakerTurns();
509
+ return {
510
+ sendInputAudio: (discordPcm48kStereo) => this.sendInputAudioForTurn(turn, discordPcm48kStereo),
511
+ close: () => {
512
+ turn.closed = true;
513
+ this.prunePendingSpeakerTurns();
514
+ }
515
+ };
516
+ }
517
+ sendInputAudioForTurn(turn, discordPcm48kStereo) {
518
+ if (!this.bridge || this.stopped) return;
519
+ turn.hasAudio = true;
520
+ const realtimePcm = convertDiscordPcm48kStereoToRealtimePcm24kMono(discordPcm48kStereo);
521
+ if (realtimePcm.length > 0) {
522
+ if (!turn.interruptedPlayback && this.isBargeInEnabled()) {
523
+ turn.interruptedPlayback = true;
524
+ logVoiceVerbose(`realtime barge-in from active speaker audio: guild ${this.params.entry.guildId} channel ${this.params.entry.channelId} user ${turn.context.userId}`);
525
+ this.handleBargeIn();
526
+ }
527
+ this.bridge.sendAudio(realtimePcm);
528
+ }
529
+ }
530
+ handleBargeIn() {
531
+ if (!this.isBargeInEnabled()) return;
532
+ this.bridge?.handleBargeIn({ audioPlaybackActive: true });
533
+ }
534
+ isBargeInEnabled() {
535
+ const providerId = this.realtimeConfig?.provider ?? "openai";
536
+ return resolveDiscordRealtimeBargeIn({
537
+ realtimeConfig: this.realtimeConfig,
538
+ providerId
539
+ });
540
+ }
541
+ get realtimeConfig() {
542
+ return this.params.discordConfig.voice?.realtime;
543
+ }
544
+ sendOutputAudio(realtimePcm24kMono) {
545
+ const discordPcm = convertRealtimePcm24kMonoToDiscordPcm48kStereo(realtimePcm24kMono);
546
+ if (discordPcm.length === 0) return;
547
+ this.ensureOutputStream().write(discordPcm);
548
+ }
549
+ ensureOutputStream() {
550
+ if (this.outputStream && !this.outputStream.destroyed) return this.outputStream;
551
+ const voiceSdk = loadDiscordVoiceSdk();
552
+ const stream = new PassThrough();
553
+ this.outputStream = stream;
554
+ stream.once("close", () => {
555
+ if (this.outputStream === stream) this.outputStream = null;
556
+ });
557
+ const resource = voiceSdk.createAudioResource(stream, { inputType: voiceSdk.StreamType.Raw });
558
+ this.params.entry.player.play(resource);
559
+ const realtimeConfig = this.realtimeConfig;
560
+ logger$2.info(`discord voice: realtime audio playback started guild=${this.params.entry.guildId} channel=${this.params.entry.channelId} mode=${this.params.mode} model=${realtimeConfig?.model ?? "provider-default"} voice=${realtimeConfig?.voice ?? "provider-default"}`);
561
+ return stream;
562
+ }
563
+ clearOutputAudio() {
564
+ this.resetOutputStream();
565
+ this.params.entry.player.stop(true);
566
+ }
567
+ resetOutputStream() {
568
+ const stream = this.outputStream;
569
+ this.outputStream = null;
570
+ stream?.end();
571
+ stream?.destroy();
572
+ }
573
+ handleToolCall(event, session) {
574
+ const callId = event.callId || event.itemId;
575
+ if (this.params.mode !== "bidi") {
576
+ session.submitToolResult(callId, { error: `Tool "${event.name}" is only available in bidi Discord voice mode` });
577
+ return;
578
+ }
579
+ if (event.name !== REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME) {
580
+ session.submitToolResult(callId, { error: `Tool "${event.name}" not available` });
581
+ return;
582
+ }
583
+ if (this.consultToolPolicy === "none") {
584
+ session.submitToolResult(callId, { error: `Tool "${event.name}" not available` });
585
+ return;
586
+ }
587
+ const consultMessage = buildRealtimeVoiceAgentConsultChatMessage(event.args);
588
+ logger$2.info(`discord voice: realtime consult requested call=${callId || "unknown"} voiceSession=${this.params.entry.voiceSessionKey} supervisorSession=${this.params.entry.route.sessionKey} agent=${this.params.entry.route.agentId} question=${formatRealtimeLogPreview(consultMessage)}`);
589
+ if (session.bridge.supportsToolResultContinuation) session.submitToolResult(callId, buildRealtimeVoiceAgentConsultWorkingResponse("speaker"), { willContinue: true });
590
+ const context = this.consumePendingSpeakerContext();
591
+ if (!context) {
592
+ logger$2.warn(`discord voice: realtime consult has no speaker context call=${callId || "unknown"}`);
593
+ session.submitToolResult(callId, { error: "No Discord speaker context available" });
594
+ return;
595
+ }
596
+ this.runAgentTurn({
597
+ context,
598
+ message: consultMessage
599
+ }).then((text) => {
600
+ logger$2.info(`discord voice: realtime consult answer (${text.length} chars) voiceSession=${this.params.entry.voiceSessionKey} supervisorSession=${this.params.entry.route.sessionKey} agent=${this.params.entry.route.agentId} speaker=${context.speakerLabel} owner=${context.senderIsOwner}: ${formatRealtimeLogPreview(text)}`);
601
+ session.submitToolResult(callId, { text });
602
+ }).catch((error) => {
603
+ logger$2.warn(`discord voice: realtime consult failed call=${callId || "unknown"}: ${formatErrorMessage(error)}`);
604
+ session.submitToolResult(callId, { error: formatErrorMessage(error) });
605
+ });
606
+ }
607
+ async runAgentTurn(params) {
608
+ const context = params.context;
609
+ if (!context) return "";
610
+ return this.params.runAgentTurn({
611
+ context,
612
+ message: params.message,
613
+ toolsAllow: this.params.mode === "bidi" ? this.consultToolsAllow : void 0,
614
+ userId: context.userId
615
+ });
616
+ }
617
+ consumePendingSpeakerContext() {
618
+ this.prunePendingSpeakerTurns();
619
+ this.expireClosedSpeakerTurnsBeforeLaterAudio();
620
+ const index = this.pendingSpeakerTurns.findIndex((turn) => turn.hasAudio);
621
+ if (index < 0) return;
622
+ const [turn] = this.pendingSpeakerTurns.splice(index, 1);
623
+ this.prunePendingSpeakerTurns();
624
+ return turn?.context;
625
+ }
626
+ prunePendingSpeakerTurns() {
627
+ for (let index = this.pendingSpeakerTurns.length - 1; index >= 0; index -= 1) {
628
+ const turn = this.pendingSpeakerTurns[index];
629
+ if (turn?.closed && !turn.hasAudio) this.pendingSpeakerTurns.splice(index, 1);
630
+ }
631
+ while (this.pendingSpeakerTurns.length > DISCORD_REALTIME_PENDING_SPEAKER_CONTEXT_LIMIT) {
632
+ const completedIndex = this.pendingSpeakerTurns.findIndex((turn) => turn.closed);
633
+ this.pendingSpeakerTurns.splice(Math.max(completedIndex, 0), 1);
634
+ }
635
+ }
636
+ expireClosedSpeakerTurnsBeforeLaterAudio() {
637
+ let hasLaterAudio = false;
638
+ for (let index = this.pendingSpeakerTurns.length - 1; index >= 0; index -= 1) {
639
+ const turn = this.pendingSpeakerTurns[index];
640
+ if (!turn?.hasAudio) continue;
641
+ if (turn.closed && hasLaterAudio) {
642
+ this.pendingSpeakerTurns.splice(index, 1);
643
+ continue;
644
+ }
645
+ hasLaterAudio = true;
646
+ }
647
+ }
648
+ };
649
+ function isDiscordRealtimeSpeakerContext(value) {
650
+ return Boolean(value) && typeof value === "object" && typeof value.userId === "string" && typeof value.senderIsOwner === "boolean" && typeof value.speakerLabel === "string";
651
+ }
652
+ function buildProviderConfigs(realtimeConfig) {
653
+ const configs = realtimeConfig?.providers;
654
+ return configs && Object.keys(configs).length > 0 ? { ...configs } : void 0;
655
+ }
656
+ function buildProviderConfigOverrides(realtimeConfig) {
657
+ const overrides = {
658
+ ...realtimeConfig?.model ? { model: realtimeConfig.model } : {},
659
+ ...realtimeConfig?.voice ? { voice: realtimeConfig.voice } : {},
660
+ ...typeof realtimeConfig?.minBargeInAudioEndMs === "number" ? { minBargeInAudioEndMs: realtimeConfig.minBargeInAudioEndMs } : {}
661
+ };
662
+ return Object.keys(overrides).length > 0 ? overrides : void 0;
663
+ }
664
+ function resolveDiscordRealtimeMinBargeInAudioEndMs(realtimeConfig) {
665
+ return typeof realtimeConfig?.minBargeInAudioEndMs === "number" ? realtimeConfig.minBargeInAudioEndMs : DISCORD_REALTIME_DEFAULT_MIN_BARGE_IN_AUDIO_END_MS;
666
+ }
667
+ function buildDiscordRealtimeInstructions(params) {
668
+ const base = params.instructions ?? ["You are OpenClaw's Discord voice interface.", "Keep spoken replies concise, natural, and suitable for a live Discord voice channel."].join("\n");
669
+ if (isDiscordAgentProxyVoiceMode(params.mode)) return [
670
+ base,
671
+ "Mode: OpenClaw agent proxy.",
672
+ "Use audio input only to transcribe the speaker. Do not answer user speech by yourself.",
673
+ "When OpenClaw sends an exact answer to speak, say only that answer."
674
+ ].join("\n\n");
675
+ return [base, buildRealtimeVoiceAgentConsultPolicyInstructions({
676
+ toolPolicy: params.toolPolicy,
677
+ consultPolicy: params.consultPolicy
678
+ })].filter(Boolean).join("\n\n");
679
+ }
196
680
  //#endregion
197
681
  //#region extensions/discord/src/voice/receive-recovery.ts
198
682
  const DECRYPT_FAILURE_WINDOW_MS = 3e4;
@@ -261,53 +745,8 @@ function enableDaveReceivePassthrough(params) {
261
745
  }
262
746
  }
263
747
  //#endregion
264
- //#region extensions/discord/src/voice/sdk-runtime.ts
265
- let cachedDiscordVoiceSdk = null;
266
- function loadDiscordVoiceSdk() {
267
- if (cachedDiscordVoiceSdk) return cachedDiscordVoiceSdk;
268
- cachedDiscordVoiceSdk = createRequire(import.meta.url)("@discordjs/voice");
269
- return cachedDiscordVoiceSdk;
270
- }
271
- //#endregion
272
- //#region extensions/discord/src/voice/prompt.ts
273
- const DISCORD_VOICE_SPOKEN_OUTPUT_CONTRACT = [
274
- "You are OpenClaw's Discord voice interface in a live voice channel.",
275
- "Discord voice reply requirements:",
276
- "- Return only the concise text that should be spoken aloud in the voice channel.",
277
- "- Treat the transcript as speech-to-text from a live conversation; repair obvious transcription artifacts and ignore repeated partial fragments caused by voice buffering.",
278
- "- If the transcript is garbled, incomplete, or missing the user's intent, ask one brief clarifying question instead of guessing.",
279
- "- If the request needs deeper reasoning, current information, or tools, use the available tools before answering.",
280
- "- Do not call the tts tool; Discord voice will synthesize and play the returned text.",
281
- "- Do not reply with NO_REPLY unless no spoken response is appropriate.",
282
- "- Keep the response brief, natural, and conversational. Prefer one to three short sentences.",
283
- "- Avoid markdown tables, code fences, citations, and visual formatting unless the user explicitly asks for something that cannot be spoken naturally."
284
- ].join("\n");
285
- function formatVoiceIngressPrompt(transcript, speakerLabel) {
286
- const cleanedTranscript = transcript.trim();
287
- const cleanedLabel = speakerLabel?.trim();
288
- return [DISCORD_VOICE_SPOKEN_OUTPUT_CONTRACT, cleanedLabel ? [`Voice transcript from speaker "${cleanedLabel}":`, cleanedTranscript].join("\n") : cleanedTranscript].join("\n\n");
289
- }
290
- const CAPTURE_FINALIZE_GRACE_MS = 2500;
291
- const VOICE_CONNECT_READY_TIMEOUT_MS = 3e4;
292
- const VOICE_RECONNECT_GRACE_MS = 15e3;
293
- const PLAYBACK_READY_TIMEOUT_MS = 6e4;
294
- const SPEAKING_READY_TIMEOUT_MS = 6e4;
295
- function resolveVoiceTimeoutMs(value, fallbackMs) {
296
- if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) return fallbackMs;
297
- return Math.floor(value);
298
- }
299
- function logVoiceVerbose(message) {
300
- logVerbose(`discord voice: ${message}`);
301
- }
302
- function isVoiceChannel(type) {
303
- return type === discord_exports.ChannelType.GuildVoice || type === discord_exports.ChannelType.GuildStageVoice;
304
- }
305
- //#endregion
306
748
  //#region extensions/discord/src/voice/sanitize.ts
307
749
  const SPEECH_EMOJI_RE = /(?:\p{Extended_Pictographic}(?:\uFE0F|\u200D|\p{Extended_Pictographic}|\p{Emoji_Modifier})*)+/gu;
308
- function escapeRegExp(value) {
309
- return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
310
- }
311
750
  function stripEmojiForSpeech(text) {
312
751
  return text.replace(SPEECH_EMOJI_RE, " ").replace(/\s+([?!.,:;])/g, "$1").replace(/[ \t]{2,}/g, " ").replace(/ *\n */g, "\n").trim();
313
752
  }
@@ -384,7 +823,22 @@ async function synthesizeVoiceReplyAudio(params) {
384
823
  });
385
824
  const speakText = sanitizeVoiceReplyTextForSpeech(directive.overrides.ttsText ?? directive.cleanedText.trim(), params.speakerLabel);
386
825
  if (!speakText) return { status: "empty" };
387
- const result = await getDiscordRuntime().tts.textToSpeech({
826
+ const runtime = getDiscordRuntime();
827
+ const streamResult = await runtime.tts.textToSpeechStream?.({
828
+ text: speakText,
829
+ cfg: ttsCfg,
830
+ channel: "discord",
831
+ overrides: directive.overrides,
832
+ disableFallback: true
833
+ });
834
+ if (streamResult?.success && streamResult.audioStream) return {
835
+ status: "ok",
836
+ mode: "stream",
837
+ audioStream: streamResult.audioStream,
838
+ release: streamResult.release,
839
+ speakText
840
+ };
841
+ const result = await runtime.tts.textToSpeech({
388
842
  text: speakText,
389
843
  cfg: ttsCfg,
390
844
  channel: "discord",
@@ -396,39 +850,34 @@ async function synthesizeVoiceReplyAudio(params) {
396
850
  };
397
851
  return {
398
852
  status: "ok",
853
+ mode: "file",
399
854
  audioPath: result.audioPath,
400
855
  speakText
401
856
  };
402
857
  }
403
858
  //#endregion
404
859
  //#region extensions/discord/src/voice/segment.ts
405
- const DISCORD_VOICE_MESSAGE_PROVIDER = "discord-voice";
860
+ const VOICE_TRANSCRIPT_LOG_PREVIEW_CHARS = 500;
406
861
  const logger$1 = createSubsystemLogger("discord/voice");
862
+ function formatVoiceTranscriptLogPreview(text) {
863
+ const oneLine = text.replace(/\s+/g, " ").trim();
864
+ if (oneLine.length <= VOICE_TRANSCRIPT_LOG_PREVIEW_CHARS) return oneLine;
865
+ return `${oneLine.slice(0, VOICE_TRANSCRIPT_LOG_PREVIEW_CHARS)}...`;
866
+ }
407
867
  async function processDiscordVoiceSegment(params) {
408
868
  const { entry, wavPath, userId, durationSeconds } = params;
409
869
  logVoiceVerbose(`segment processing (${durationSeconds.toFixed(2)}s): guild ${entry.guildId} channel ${entry.channelId}`);
410
- if (!entry.guildName) entry.guildName = await params.fetchGuildName(entry.guildId);
411
- const speaker = await params.speakerContext.resolveContext(entry.guildId, userId);
412
- const speakerIdentity = await params.speakerContext.resolveIdentity(entry.guildId, userId);
413
- const access = await authorizeDiscordVoiceIngress({
870
+ const ingress = await resolveDiscordVoiceIngressContext({
871
+ entry,
872
+ userId,
414
873
  cfg: params.cfg,
415
874
  discordConfig: params.discordConfig,
416
- guildName: entry.guildName,
417
- guildId: entry.guildId,
418
- channelId: entry.channelId,
419
- channelName: entry.channelName,
420
- channelSlug: entry.channelName ? normalizeDiscordSlug(entry.channelName) : "",
421
- channelLabel: formatMention({ channelId: entry.channelId }),
422
- memberRoleIds: speakerIdentity.memberRoleIds,
423
875
  ownerAllowFrom: params.ownerAllowFrom,
424
- sender: {
425
- id: speakerIdentity.id,
426
- name: speakerIdentity.name,
427
- tag: speakerIdentity.tag
428
- }
876
+ fetchGuildName: params.fetchGuildName,
877
+ speakerContext: params.speakerContext
429
878
  });
430
- if (!access.ok) {
431
- logVoiceVerbose(`segment unauthorized: guild ${entry.guildId} channel ${entry.channelId} user ${userId} reason=${access.message}`);
879
+ if (!ingress) {
880
+ logVoiceVerbose(`segment unauthorized: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
432
881
  return;
433
882
  }
434
883
  const transcript = await transcribeVoiceAudio({
@@ -441,21 +890,24 @@ async function processDiscordVoiceSegment(params) {
441
890
  return;
442
891
  }
443
892
  logVoiceVerbose(`transcription ok (${transcript.length} chars): guild ${entry.guildId} channel ${entry.channelId}`);
444
- const prompt = formatVoiceIngressPrompt(transcript, speaker.label);
445
- const extraSystemPrompt = buildDiscordGroupSystemPrompt(access.channelConfig);
446
- const modelOverride = normalizeOptionalString(params.discordConfig.voice?.model);
447
- const replyText = ((await agentCommandFromIngress({
448
- message: prompt,
449
- sessionKey: entry.route.sessionKey,
450
- agentId: entry.route.agentId,
451
- messageChannel: "discord",
452
- messageProvider: DISCORD_VOICE_MESSAGE_PROVIDER,
453
- extraSystemPrompt,
454
- senderIsOwner: speaker.senderIsOwner,
455
- allowModelOverride: Boolean(modelOverride),
456
- model: modelOverride,
457
- deliver: false
458
- }, params.runtime)).payloads ?? []).map((payload) => payload.text).filter((text) => typeof text === "string" && text.trim()).join("\n").trim();
893
+ logVoiceVerbose(`transcript from ${ingress.speakerLabel} (${userId}) in guild ${entry.guildId} channel ${entry.channelId}: ${formatVoiceTranscriptLogPreview(transcript)}`);
894
+ const turn = await runDiscordVoiceAgentTurn({
895
+ entry,
896
+ userId,
897
+ message: formatVoiceIngressPrompt(transcript, ingress.speakerLabel),
898
+ cfg: params.cfg,
899
+ discordConfig: params.discordConfig,
900
+ runtime: params.runtime,
901
+ context: ingress,
902
+ ownerAllowFrom: params.ownerAllowFrom,
903
+ fetchGuildName: params.fetchGuildName,
904
+ speakerContext: params.speakerContext
905
+ });
906
+ if (!turn) {
907
+ logVoiceVerbose(`segment unauthorized before agent turn: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
908
+ return;
909
+ }
910
+ const replyText = turn.text;
459
911
  if (!replyText) {
460
912
  logVoiceVerbose(`reply empty: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
461
913
  return;
@@ -465,7 +917,7 @@ async function processDiscordVoiceSegment(params) {
465
917
  cfg: params.cfg,
466
918
  override: params.discordConfig.voice?.tts,
467
919
  replyText,
468
- speakerLabel: speaker.label
920
+ speakerLabel: ingress.speakerLabel
469
921
  });
470
922
  if (voiceReplyAudio.status === "empty") {
471
923
  logVoiceVerbose(`tts skipped (empty): guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
@@ -477,13 +929,25 @@ async function processDiscordVoiceSegment(params) {
477
929
  }
478
930
  logVoiceVerbose(`tts ok (${voiceReplyAudio.speakText.length} chars): guild ${entry.guildId} channel ${entry.channelId}`);
479
931
  params.enqueuePlayback(entry, async () => {
480
- logVoiceVerbose(`playback start: guild ${entry.guildId} channel ${entry.channelId} file ${path.basename(voiceReplyAudio.audioPath)}`);
481
932
  const voiceSdk = loadDiscordVoiceSdk();
482
- const resource = voiceSdk.createAudioResource(voiceReplyAudio.audioPath);
483
- entry.player.play(resource);
484
- await voiceSdk.entersState(entry.player, voiceSdk.AudioPlayerStatus.Playing, PLAYBACK_READY_TIMEOUT_MS).catch(() => void 0);
485
- await voiceSdk.entersState(entry.player, voiceSdk.AudioPlayerStatus.Idle, SPEAKING_READY_TIMEOUT_MS).catch(() => void 0);
486
- logVoiceVerbose(`playback done: guild ${entry.guildId} channel ${entry.channelId}`);
933
+ const releaseAudioStream = voiceReplyAudio.mode === "stream" ? voiceReplyAudio.release : void 0;
934
+ try {
935
+ if (voiceReplyAudio.mode === "stream") {
936
+ logVoiceVerbose(`playback start: guild ${entry.guildId} channel ${entry.channelId} stream`);
937
+ const nodeStream = Readable.fromWeb(voiceReplyAudio.audioStream);
938
+ const resource = voiceSdk.createAudioResource(nodeStream);
939
+ entry.player.play(resource);
940
+ } else {
941
+ logVoiceVerbose(`playback start: guild ${entry.guildId} channel ${entry.channelId} file ${path.basename(voiceReplyAudio.audioPath)}`);
942
+ const resource = voiceSdk.createAudioResource(voiceReplyAudio.audioPath);
943
+ entry.player.play(resource);
944
+ }
945
+ await voiceSdk.entersState(entry.player, voiceSdk.AudioPlayerStatus.Playing, PLAYBACK_READY_TIMEOUT_MS).catch(() => void 0);
946
+ await voiceSdk.entersState(entry.player, voiceSdk.AudioPlayerStatus.Idle, SPEAKING_READY_TIMEOUT_MS).catch(() => void 0);
947
+ logVoiceVerbose(`playback done: guild ${entry.guildId} channel ${entry.channelId}`);
948
+ } finally {
949
+ await releaseAudioStream?.();
950
+ }
487
951
  });
488
952
  }
489
953
  //#endregion
@@ -580,6 +1044,12 @@ var DiscordVoiceSpeakerContextResolver = class {
580
1044
  //#endregion
581
1045
  //#region extensions/discord/src/voice/manager.ts
582
1046
  const logger = createSubsystemLogger("discord/voice");
1047
+ const VOICE_LOG_PREVIEW_CHARS = 500;
1048
+ function formatVoiceLogPreview(text) {
1049
+ const oneLine = text.replace(/\s+/g, " ").trim();
1050
+ if (oneLine.length <= VOICE_LOG_PREVIEW_CHARS) return oneLine;
1051
+ return `${oneLine.slice(0, VOICE_LOG_PREVIEW_CHARS)}...`;
1052
+ }
583
1053
  function isVoiceConnectionDestroyed(connection, voiceSdk) {
584
1054
  return connection.state.status === voiceSdk.VoiceConnectionStatus.Destroyed;
585
1055
  }
@@ -602,6 +1072,44 @@ function destroyVoiceConnectionSafely(params) {
602
1072
  function startAutoJoin(manager) {
603
1073
  manager.autoJoin().catch((err) => logger.warn(`discord voice: autoJoin failed: ${formatErrorMessage(err)}`));
604
1074
  }
1075
+ function resolveDiscordVoiceAgentRoute(params) {
1076
+ const voiceRoute = resolveAgentRoute({
1077
+ cfg: params.cfg,
1078
+ channel: "discord",
1079
+ accountId: params.accountId,
1080
+ guildId: params.guildId,
1081
+ peer: {
1082
+ kind: "channel",
1083
+ id: params.sessionChannelId
1084
+ }
1085
+ });
1086
+ const agentSession = params.voiceConfig?.agentSession;
1087
+ if (agentSession?.mode !== "target") return {
1088
+ route: voiceRoute,
1089
+ voiceRoute,
1090
+ agentSessionMode: "voice",
1091
+ agentSessionTarget: void 0
1092
+ };
1093
+ const target = agentSession.target?.trim();
1094
+ if (!target) throw new Error("channels.discord.voice.agentSession.target is required when mode is \"target\"");
1095
+ const parsed = parseDiscordTarget(target, { defaultKind: "channel" });
1096
+ if (!parsed) throw new Error(`Invalid Discord voice agent session target "${target}"`);
1097
+ return {
1098
+ route: resolveAgentRoute({
1099
+ cfg: params.cfg,
1100
+ channel: "discord",
1101
+ accountId: params.accountId,
1102
+ guildId: params.guildId,
1103
+ peer: {
1104
+ kind: parsed.kind === "user" ? "direct" : "channel",
1105
+ id: parsed.id
1106
+ }
1107
+ }),
1108
+ voiceRoute,
1109
+ agentSessionMode: "target",
1110
+ agentSessionTarget: parsed.normalized
1111
+ };
1112
+ }
605
1113
  var DiscordVoiceManager$1 = class {
606
1114
  constructor(params) {
607
1115
  this.params = params;
@@ -629,17 +1137,25 @@ var DiscordVoiceManager$1 = class {
629
1137
  if (this.autoJoinTask) return this.autoJoinTask;
630
1138
  this.autoJoinTask = (async () => {
631
1139
  const entries = this.params.discordConfig.voice?.autoJoin ?? [];
632
- logVoiceVerbose(`autoJoin: ${entries.length} entries`);
633
- const seenGuilds = /* @__PURE__ */ new Set();
1140
+ const entriesByGuild = /* @__PURE__ */ new Map();
1141
+ const duplicateGuilds = /* @__PURE__ */ new Set();
634
1142
  for (const entry of entries) {
635
1143
  const guildId = entry.guildId.trim();
636
- if (!guildId) continue;
637
- if (seenGuilds.has(guildId)) {
638
- logger.warn(`discord voice: autoJoin has multiple entries for guild ${guildId}; skipping`);
639
- continue;
640
- }
641
- seenGuilds.add(guildId);
642
- logVoiceVerbose(`autoJoin: joining guild ${guildId} channel ${entry.channelId}`);
1144
+ const channelId = entry.channelId.trim();
1145
+ if (!guildId || !channelId) continue;
1146
+ if (entriesByGuild.has(guildId)) duplicateGuilds.add(guildId);
1147
+ entriesByGuild.set(guildId, {
1148
+ guildId,
1149
+ channelId
1150
+ });
1151
+ }
1152
+ logVoiceVerbose(`autoJoin: ${entries.length} entries, ${entriesByGuild.size} guilds`);
1153
+ for (const guildId of duplicateGuilds) {
1154
+ const selected = entriesByGuild.get(guildId);
1155
+ if (selected) logger.warn(`discord voice: autoJoin has multiple entries for guild ${guildId}; using channel ${selected.channelId}`);
1156
+ }
1157
+ for (const entry of entriesByGuild.values()) {
1158
+ logVoiceVerbose(`autoJoin: joining guild ${entry.guildId} channel ${entry.channelId}`);
643
1159
  await this.join({
644
1160
  guildId: entry.guildId,
645
1161
  channelId: entry.channelId
@@ -700,6 +1216,7 @@ var DiscordVoiceManager$1 = class {
700
1216
  message: "Discord voice plugin is not available."
701
1217
  };
702
1218
  const voiceConfig = this.params.discordConfig.voice;
1219
+ const voiceMode = resolveDiscordVoiceMode(voiceConfig);
703
1220
  const adapterCreator = voicePlugin.getGatewayAdapterCreator(guildId);
704
1221
  const daveEncryption = voiceConfig?.daveEncryption;
705
1222
  const decryptionFailureTolerance = voiceConfig?.decryptionFailureTolerance;
@@ -744,16 +1261,30 @@ var DiscordVoiceManager$1 = class {
744
1261
  }
745
1262
  const sessionChannelId = channelInfo?.id ?? channelId;
746
1263
  if (sessionChannelId !== channelId) logVoiceVerbose(`join: using session channel ${sessionChannelId} for voice channel ${channelId}`);
747
- const route = resolveAgentRoute({
748
- cfg: this.params.cfg,
749
- channel: "discord",
750
- accountId: this.params.accountId,
751
- guildId,
752
- peer: {
753
- kind: "channel",
754
- id: sessionChannelId
755
- }
756
- });
1264
+ let routeInfo;
1265
+ try {
1266
+ routeInfo = resolveDiscordVoiceAgentRoute({
1267
+ cfg: this.params.cfg,
1268
+ accountId: this.params.accountId,
1269
+ guildId,
1270
+ sessionChannelId,
1271
+ voiceConfig
1272
+ });
1273
+ } catch (err) {
1274
+ destroyVoiceConnectionSafely({
1275
+ connection,
1276
+ voiceSdk,
1277
+ reason: `voice agent session route failed guild ${guildId} channel ${channelId}`
1278
+ });
1279
+ return {
1280
+ ok: false,
1281
+ message: `Failed to resolve Discord voice agent session: ${formatErrorMessage(err)}`,
1282
+ guildId,
1283
+ channelId
1284
+ };
1285
+ }
1286
+ const { route, voiceRoute, agentSessionMode, agentSessionTarget } = routeInfo;
1287
+ logger.info(`discord voice: joining guild=${guildId} channel=${channelId} mode=${voiceMode} agent=${route.agentId} voiceSession=${voiceRoute.sessionKey} supervisorSession=${route.sessionKey} agentSessionMode=${agentSessionMode}${agentSessionTarget ? ` agentSessionTarget=${agentSessionTarget}` : ""} voiceModel=${voiceConfig?.model ?? "route-default"} realtimeProvider=${voiceConfig?.realtime?.provider ?? "auto"} realtimeModel=${voiceConfig?.realtime?.model ?? "provider-default"} realtimeVoice=${voiceConfig?.realtime?.voice ?? "provider-default"}`);
757
1288
  const player = voiceSdk.createAudioPlayer();
758
1289
  connection.subscribe(player);
759
1290
  let speakingHandler;
@@ -761,15 +1292,35 @@ var DiscordVoiceManager$1 = class {
761
1292
  let disconnectedHandler;
762
1293
  let destroyedHandler;
763
1294
  let playerErrorHandler;
1295
+ let stopped = false;
764
1296
  const clearSessionIfCurrent = () => {
765
1297
  if (this.sessions.get(guildId)?.connection === connection) this.sessions.delete(guildId);
766
1298
  };
1299
+ const stopEntry = (entry, options) => {
1300
+ if (stopped) return;
1301
+ stopped = true;
1302
+ if (speakingHandler) connection.receiver.speaking.off("start", speakingHandler);
1303
+ if (speakingEndHandler) connection.receiver.speaking.off("end", speakingEndHandler);
1304
+ stopVoiceCaptureState(entry.capture);
1305
+ if (disconnectedHandler) connection.off(voiceSdk.VoiceConnectionStatus.Disconnected, disconnectedHandler);
1306
+ if (destroyedHandler) connection.off(voiceSdk.VoiceConnectionStatus.Destroyed, destroyedHandler);
1307
+ if (playerErrorHandler) player.off("error", playerErrorHandler);
1308
+ entry.realtime?.close();
1309
+ entry.realtime = void 0;
1310
+ player.stop();
1311
+ if (options.destroyConnection) destroyVoiceConnectionSafely({
1312
+ connection,
1313
+ voiceSdk,
1314
+ reason: options.reason
1315
+ });
1316
+ };
767
1317
  const entry = {
768
1318
  guildId,
769
1319
  guildName: channelInfo && "guild" in channelInfo && channelInfo.guild && typeof channelInfo.guild.name === "string" ? channelInfo.guild.name : void 0,
770
1320
  channelId,
771
1321
  channelName: channelInfo && "name" in channelInfo && typeof channelInfo.name === "string" ? channelInfo.name : void 0,
772
1322
  sessionChannelId,
1323
+ voiceSessionKey: voiceRoute.sessionKey,
773
1324
  route,
774
1325
  connection,
775
1326
  player,
@@ -778,20 +1329,43 @@ var DiscordVoiceManager$1 = class {
778
1329
  capture: createVoiceCaptureState(),
779
1330
  receiveRecovery: createVoiceReceiveRecoveryState(),
780
1331
  stop: () => {
781
- if (speakingHandler) connection.receiver.speaking.off("start", speakingHandler);
782
- if (speakingEndHandler) connection.receiver.speaking.off("end", speakingEndHandler);
783
- stopVoiceCaptureState(entry.capture);
784
- if (disconnectedHandler) connection.off(voiceSdk.VoiceConnectionStatus.Disconnected, disconnectedHandler);
785
- if (destroyedHandler) connection.off(voiceSdk.VoiceConnectionStatus.Destroyed, destroyedHandler);
786
- if (playerErrorHandler) player.off("error", playerErrorHandler);
787
- player.stop();
1332
+ stopEntry(entry, {
1333
+ destroyConnection: true,
1334
+ reason: `stop guild ${guildId} channel ${channelId}`
1335
+ });
1336
+ }
1337
+ };
1338
+ if (voiceMode !== "stt-tts") {
1339
+ entry.realtime = new DiscordRealtimeVoiceSession({
1340
+ cfg: this.params.cfg,
1341
+ discordConfig: this.params.discordConfig,
1342
+ entry,
1343
+ mode: voiceMode,
1344
+ runAgentTurn: ({ context, message, toolsAllow, userId }) => this.runDiscordRealtimeAgentTurn({
1345
+ context,
1346
+ entry,
1347
+ message,
1348
+ toolsAllow,
1349
+ userId
1350
+ })
1351
+ });
1352
+ try {
1353
+ await entry.realtime.connect();
1354
+ } catch (err) {
1355
+ entry.realtime.close();
788
1356
  destroyVoiceConnectionSafely({
789
1357
  connection,
790
1358
  voiceSdk,
791
- reason: `stop guild ${guildId} channel ${channelId}`
1359
+ reason: `realtime setup failed guild ${guildId} channel ${channelId}`
792
1360
  });
1361
+ return {
1362
+ ok: false,
1363
+ message: `Failed to start Discord realtime voice: ${formatErrorMessage(err)}`,
1364
+ guildId,
1365
+ channelId
1366
+ };
793
1367
  }
794
- };
1368
+ }
795
1369
  speakingHandler = (userId) => {
796
1370
  this.handleSpeakingStart(entry, userId).catch((err) => {
797
1371
  logger.warn(`discord voice: capture failed: ${formatErrorMessage(err)}`);
@@ -808,15 +1382,18 @@ var DiscordVoiceManager$1 = class {
808
1382
  } catch (err) {
809
1383
  logger.warn(`discord voice: disconnect recovery failed: guild ${guildId} channel ${channelId} timeout=${reconnectGraceMs}ms error=${formatErrorMessage(err)}; destroying connection`);
810
1384
  clearSessionIfCurrent();
811
- destroyVoiceConnectionSafely({
812
- connection,
813
- voiceSdk,
1385
+ stopEntry(entry, {
1386
+ destroyConnection: true,
814
1387
  reason: `disconnect recovery failed guild ${guildId} channel ${channelId}`
815
1388
  });
816
1389
  }
817
1390
  };
818
1391
  destroyedHandler = () => {
819
1392
  clearSessionIfCurrent();
1393
+ stopEntry(entry, {
1394
+ destroyConnection: false,
1395
+ reason: `destroyed guild ${guildId} channel ${channelId}`
1396
+ });
820
1397
  };
821
1398
  playerErrorHandler = (err) => {
822
1399
  logger.warn(`discord voice: playback error: ${formatErrorMessage(err)}`);
@@ -828,6 +1405,7 @@ var DiscordVoiceManager$1 = class {
828
1405
  connection.on(voiceSdk.VoiceConnectionStatus.Destroyed, destroyedHandler);
829
1406
  player.on("error", playerErrorHandler);
830
1407
  this.sessions.set(guildId, entry);
1408
+ logger.info(`discord voice: joined guild=${guildId} channel=${channelId} mode=${voiceMode} agent=${route.agentId} voiceSession=${voiceRoute.sessionKey} supervisorSession=${route.sessionKey} voiceModel=${voiceConfig?.model ?? "route-default"}`);
831
1409
  return {
832
1410
  ok: true,
833
1411
  message: `Joined ${formatMention({ channelId })}.`,
@@ -892,8 +1470,26 @@ var DiscordVoiceManager$1 = class {
892
1470
  }
893
1471
  logVoiceVerbose(`capture start: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
894
1472
  const voiceSdk = loadDiscordVoiceSdk();
1473
+ const voiceMode = resolveDiscordVoiceMode(this.params.discordConfig.voice);
1474
+ const realtime = entry.realtime && isDiscordRealtimeVoiceMode(voiceMode) ? entry.realtime : void 0;
1475
+ if (entry.player.state.status === voiceSdk.AudioPlayerStatus.Playing && !realtime) {
1476
+ logVoiceVerbose(`capture ignored during playback: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
1477
+ return;
1478
+ }
1479
+ const realtimeIngress = realtime ? await this.resolveDiscordVoiceIngressContext(entry, userId) : void 0;
1480
+ if (realtime && !realtimeIngress) {
1481
+ logVoiceVerbose(`realtime capture unauthorized: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
1482
+ return;
1483
+ }
1484
+ if (entry.player.state.status === voiceSdk.AudioPlayerStatus.Playing && realtime) {
1485
+ if (!realtime.isBargeInEnabled()) {
1486
+ logger.info(`discord voice: realtime capture ignored during playback (barge-in disabled): guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
1487
+ return;
1488
+ }
1489
+ logVoiceVerbose(`realtime barge-in: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
1490
+ realtime.handleBargeIn();
1491
+ }
895
1492
  this.enableDaveReceivePassthrough(entry, `speaker ${userId} start`, 15);
896
- if (entry.player.state.status === voiceSdk.AudioPlayerStatus.Playing) entry.player.stop(true);
897
1493
  const stream = entry.connection.receiver.subscribe(userId, { end: { behavior: voiceSdk.EndBehaviorType.Manual } });
898
1494
  const generation = beginVoiceCapture(entry.capture, userId, stream);
899
1495
  let streamAborted = false;
@@ -902,6 +1498,19 @@ var DiscordVoiceManager$1 = class {
902
1498
  this.handleReceiveError(entry, err);
903
1499
  });
904
1500
  try {
1501
+ if (realtime && realtimeIngress) {
1502
+ const turn = realtime.beginSpeakerTurn(realtimeIngress, userId);
1503
+ try {
1504
+ await this.processRealtimeAudioCapture({
1505
+ entry,
1506
+ stream,
1507
+ turn
1508
+ });
1509
+ } finally {
1510
+ turn.close();
1511
+ }
1512
+ return;
1513
+ }
905
1514
  const pcm = await decodeOpusStream(stream, {
906
1515
  onVerbose: logVoiceVerbose,
907
1516
  onWarn: (message) => logger.warn(message)
@@ -929,6 +1538,61 @@ var DiscordVoiceManager$1 = class {
929
1538
  finishVoiceCapture(entry.capture, userId, generation);
930
1539
  }
931
1540
  }
1541
+ async processRealtimeAudioCapture(params) {
1542
+ const { entry, stream, turn } = params;
1543
+ let resetReceiveRecovery = false;
1544
+ await decodeOpusStreamChunks(stream, {
1545
+ onChunk: (pcm) => {
1546
+ if (!resetReceiveRecovery && pcm.length > 0) {
1547
+ resetReceiveRecovery = true;
1548
+ this.resetDecryptFailureState(entry);
1549
+ }
1550
+ turn.sendInputAudio(pcm);
1551
+ },
1552
+ onVerbose: logVoiceVerbose,
1553
+ onWarn: (message) => logger.warn(message)
1554
+ });
1555
+ }
1556
+ async resolveDiscordVoiceIngressContext(entry, userId) {
1557
+ return await resolveDiscordVoiceIngressContext({
1558
+ entry,
1559
+ userId,
1560
+ cfg: this.params.cfg,
1561
+ discordConfig: this.params.discordConfig,
1562
+ ownerAllowFrom: this.ownerAllowFrom,
1563
+ fetchGuildName: async (guildId) => {
1564
+ const guild = await this.params.client.fetchGuild(guildId).catch(() => null);
1565
+ return guild && typeof guild.name === "string" && guild.name.trim() ? guild.name : void 0;
1566
+ },
1567
+ speakerContext: this.speakerContext
1568
+ });
1569
+ }
1570
+ async runDiscordRealtimeAgentTurn(params) {
1571
+ const { context, entry, message, toolsAllow, userId } = params;
1572
+ logger.info(`discord voice: agent turn start guild=${entry.guildId} channel=${entry.channelId} voiceSession=${entry.voiceSessionKey} supervisorSession=${entry.route.sessionKey} agent=${entry.route.agentId} user=${userId} speaker=${context.speakerLabel} owner=${context.senderIsOwner} model=${this.params.discordConfig.voice?.model ?? "route-default"} message=${formatVoiceLogPreview(message)}`);
1573
+ const turn = await runDiscordVoiceAgentTurn({
1574
+ entry,
1575
+ userId,
1576
+ message,
1577
+ cfg: this.params.cfg,
1578
+ discordConfig: this.params.discordConfig,
1579
+ runtime: this.params.runtime,
1580
+ context,
1581
+ toolsAllow,
1582
+ ownerAllowFrom: this.ownerAllowFrom,
1583
+ fetchGuildName: async (guildId) => {
1584
+ const guild = await this.params.client.fetchGuild(guildId).catch(() => null);
1585
+ return guild && typeof guild.name === "string" && guild.name.trim() ? guild.name : void 0;
1586
+ },
1587
+ speakerContext: this.speakerContext
1588
+ });
1589
+ if (!turn) {
1590
+ logVoiceVerbose(`realtime agent unauthorized: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
1591
+ return "";
1592
+ }
1593
+ logger.info(`discord voice: agent turn answer (${turn.text.length} chars) guild=${entry.guildId} channel=${entry.channelId} voiceSession=${entry.voiceSessionKey} supervisorSession=${entry.route.sessionKey} agent=${entry.route.agentId}: ${formatVoiceLogPreview(turn.text)}`);
1594
+ return turn.text;
1595
+ }
932
1596
  async processSegment(params) {
933
1597
  await processDiscordVoiceSegment({
934
1598
  ...params,
@@ -948,6 +1612,10 @@ var DiscordVoiceManager$1 = class {
948
1612
  }
949
1613
  handleReceiveError(entry, err) {
950
1614
  const analysis = analyzeVoiceReceiveError(err);
1615
+ if (analysis.isAbortLike && !analysis.countsAsDecryptFailure) {
1616
+ logVoiceVerbose(`receive stream ended: ${analysis.message}`);
1617
+ return;
1618
+ }
951
1619
  logger.warn(`discord voice: receive error: ${analysis.message}`);
952
1620
  if (analysis.shouldAttemptPassthrough) this.enableDaveReceivePassthrough(entry, "receive decrypt error", 15);
953
1621
  if (!analysis.countsAsDecryptFailure) return;